From 48c75bd0c2b96bf3b9fad2bbfb8a403e82db26d5 Mon Sep 17 00:00:00 2001 From: Xianming Lei <31424839+leixm@users.noreply.github.com> Date: Sun, 12 Jan 2025 22:44:07 +0800 Subject: [PATCH 1/3] [CELEBORN-1577][Phase2] QuotaManager should support interrupt shuffle. --- .../apache/celeborn/common/CelebornConf.scala | 96 ++- .../common/quota/ResourceConsumption.scala | 21 + .../quota/{Quota.scala => StorageQuota.scala} | 6 +- docs/configuration/quota.md | 16 +- docs/migration.md | 8 + .../clustermeta/AbstractMetaManager.java | 20 +- .../master/clustermeta/IMetadataHandler.java | 1 - .../clustermeta/SingleMasterMetaManager.java | 12 +- .../clustermeta/ha/HAMasterMetaManager.java | 3 - .../master/clustermeta/ha/MetaHandler.java | 1 - .../service/deploy/master/Master.scala | 90 +-- .../service/deploy/master/MasterSource.scala | 2 + .../deploy/master/quota/QuotaManager.scala | 349 ++++++++-- .../deploy/master/quota/QuotaStatus.scala | 32 + .../clustermeta/DefaultMetaSystemSuiteJ.java | 4 - .../ha/RatisMasterStatusSystemSuiteJ.java | 4 - .../test/resources/dynamicConfig-quota-2.yaml | 35 + .../test/resources/dynamicConfig-quota-3.yaml | 37 + .../test/resources/dynamicConfig-quota.yaml | 17 +- .../master/quota/QuotaManagerSuite.scala | 655 +++++++++++++++++- .../common/service/config/DynamicConfig.java | 91 ++- .../worker/storage/StorageManager.scala | 19 +- 22 files changed, 1268 insertions(+), 251 deletions(-) rename common/src/main/scala/org/apache/celeborn/common/quota/{Quota.scala => StorageQuota.scala} (90%) create mode 100644 master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaStatus.scala create mode 100644 master/src/test/resources/dynamicConfig-quota-2.yaml create mode 100644 master/src/test/resources/dynamicConfig-quota-3.yaml diff --git a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala index 80d31d74750..db47e7ac0fd 100644 --- a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala +++ b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala @@ -5411,40 +5411,40 @@ object CelebornConf extends Logging { .stringConf .createWithDefault("org.apache.celeborn.server.common.container.DefaultContainerInfoProvider") - val QUOTA_DISK_BYTES_WRITTEN: ConfigEntry[Long] = + val QUOTA_TENANT_DISK_BYTES_WRITTEN: ConfigEntry[Long] = buildConf("celeborn.quota.tenant.diskBytesWritten") .categories("quota") .dynamic - .doc("Quota dynamic configuration for written disk bytes.") + .doc("Tenant level quota dynamic configuration for written disk bytes.") .version("0.5.0") - .longConf + .bytesConf(ByteUnit.BYTE) .createWithDefault(Long.MaxValue) - val QUOTA_DISK_FILE_COUNT: ConfigEntry[Long] = + val QUOTA_TENANT_DISK_FILE_COUNT: ConfigEntry[Long] = buildConf("celeborn.quota.tenant.diskFileCount") .categories("quota") .dynamic - .doc("Quota dynamic configuration for written disk file count.") + .doc("Tenant level quota dynamic configuration for written disk file count.") .version("0.5.0") - .longConf + .bytesConf(ByteUnit.BYTE) .createWithDefault(Long.MaxValue) - val QUOTA_HDFS_BYTES_WRITTEN: ConfigEntry[Long] = + val QUOTA_TENANT_HDFS_BYTES_WRITTEN: ConfigEntry[Long] = buildConf("celeborn.quota.tenant.hdfsBytesWritten") .categories("quota") .dynamic - .doc("Quota dynamic configuration for written hdfs bytes.") + .doc("Tenant level quota dynamic configuration for written hdfs bytes.") .version("0.5.0") - .longConf + .bytesConf(ByteUnit.BYTE) .createWithDefault(Long.MaxValue) - val QUOTA_HDFS_FILE_COUNT: ConfigEntry[Long] = + val QUOTA_TENANT_HDFS_FILE_COUNT: ConfigEntry[Long] = buildConf("celeborn.quota.tenant.hdfsFileCount") .categories("quota") .dynamic - .doc("Quota dynamic configuration for written hdfs file count.") + .doc("Tenant level quota dynamic configuration for written hdfs file count.") .version("0.5.0") - .longConf + .bytesConf(ByteUnit.BYTE) .createWithDefault(Long.MaxValue) val QUOTA_INTERRUPT_SHUFFLE_ENABLED: ConfigEntry[Boolean] = @@ -6051,4 +6051,76 @@ object CelebornConf extends Logging { .doubleConf .checkValue(v => v > 0.0 && v <= 1.0, "Should be in (0.0, 1.0].") .createWithDefault(1) + + val QUOTA_CLUSTER_DISK_BYTES_WRITTEN: ConfigEntry[Long] = + buildConf("celeborn.quota.cluster.diskBytesWritten") + .categories("quota") + .dynamic + .doc("Cluster level quota dynamic configuration for written disk bytes.") + .version("0.6.0") + .bytesConf(ByteUnit.BYTE) + .createWithDefault(Long.MaxValue) + + val QUOTA_CLUSTER_DISK_FILE_COUNT: ConfigEntry[Long] = + buildConf("celeborn.quota.cluster.diskFileCount") + .categories("quota") + .dynamic + .doc("Cluster level quota dynamic configuration for written disk file count.") + .version("0.6.0") + .bytesConf(ByteUnit.BYTE) + .createWithDefault(Long.MaxValue) + + val QUOTA_CLUSTER_HDFS_BYTES_WRITTEN: ConfigEntry[Long] = + buildConf("celeborn.quota.cluster.hdfsBytesWritten") + .categories("quota") + .dynamic + .doc("Cluster level quota dynamic configuration for written hdfs bytes.") + .version("0.6.0") + .bytesConf(ByteUnit.BYTE) + .createWithDefault(Long.MaxValue) + + val QUOTA_CLUSTER_HDFS_FILE_COUNT: ConfigEntry[Long] = + buildConf("celeborn.quota.cluster.hdfsFileCount") + .categories("quota") + .dynamic + .doc("Cluster level quota dynamic configuration for written hdfs file count.") + .version("0.6.0") + .bytesConf(ByteUnit.BYTE) + .createWithDefault(Long.MaxValue) + + val QUOTA_USER_DISK_BYTES_WRITTEN: ConfigEntry[Long] = + buildConf("celeborn.quota.user.diskBytesWritten") + .categories("quota") + .dynamic + .doc("User level quota dynamic configuration for written disk bytes.") + .version("0.6.0") + .bytesConf(ByteUnit.BYTE) + .createWithDefault(Long.MaxValue) + + val QUOTA_USER_DISK_FILE_COUNT: ConfigEntry[Long] = + buildConf("celeborn.quota.user.diskFileCount") + .categories("quota") + .dynamic + .doc("User level quota dynamic configuration for written disk file count.") + .version("0.6.0") + .bytesConf(ByteUnit.BYTE) + .createWithDefault(Long.MaxValue) + + val QUOTA_USER_HDFS_BYTES_WRITTEN: ConfigEntry[Long] = + buildConf("celeborn.quota.user.hdfsBytesWritten") + .categories("quota") + .dynamic + .doc("User level quota dynamic configuration for written hdfs bytes.") + .version("0.6.0") + .bytesConf(ByteUnit.BYTE) + .createWithDefault(Long.MaxValue) + + val QUOTA_USER_HDFS_FILE_COUNT: ConfigEntry[Long] = + buildConf("celeborn.quota.user.hdfsFileCount") + .categories("quota") + .dynamic + .doc("User level quota dynamic configuration for written hdfs file count.") + .version("0.6.0") + .bytesConf(ByteUnit.BYTE) + .createWithDefault(Long.MaxValue) } diff --git a/common/src/main/scala/org/apache/celeborn/common/quota/ResourceConsumption.scala b/common/src/main/scala/org/apache/celeborn/common/quota/ResourceConsumption.scala index 10d1114b985..d454bb894c2 100644 --- a/common/src/main/scala/org/apache/celeborn/common/quota/ResourceConsumption.scala +++ b/common/src/main/scala/org/apache/celeborn/common/quota/ResourceConsumption.scala @@ -30,6 +30,12 @@ case class ResourceConsumption( hdfsFileCount: Long, var subResourceConsumptions: util.Map[String, ResourceConsumption] = null) { + def withSubResourceConsumptions( + resourceConsumptions: util.Map[String, ResourceConsumption]): ResourceConsumption = { + subResourceConsumptions = resourceConsumptions + this + } + def add(other: ResourceConsumption): ResourceConsumption = { ResourceConsumption( diskBytesWritten + other.diskBytesWritten, @@ -38,6 +44,14 @@ case class ResourceConsumption( hdfsFileCount + other.hdfsFileCount) } + def subtract(other: ResourceConsumption): ResourceConsumption = { + ResourceConsumption( + diskBytesWritten - other.diskBytesWritten, + diskFileCount - other.diskFileCount, + hdfsBytesWritten - other.hdfsBytesWritten, + hdfsFileCount - other.hdfsFileCount) + } + def addSubResourceConsumptions(otherSubResourceConsumptions: Map[ String, ResourceConsumption]): Map[String, ResourceConsumption] = { @@ -77,4 +91,11 @@ case class ResourceConsumption( s" hdfsFileCount: $hdfsFileCount," + s" subResourceConsumptions: $subResourceConsumptionString)" } + + def simpleString: String = { + s"ResourceConsumption(diskBytesWritten: ${Utils.bytesToString(diskBytesWritten)}," + + s" diskFileCount: $diskFileCount," + + s" hdfsBytesWritten: ${Utils.bytesToString(hdfsBytesWritten)}," + + s" hdfsFileCount: $hdfsFileCount)" + } } diff --git a/common/src/main/scala/org/apache/celeborn/common/quota/Quota.scala b/common/src/main/scala/org/apache/celeborn/common/quota/StorageQuota.scala similarity index 90% rename from common/src/main/scala/org/apache/celeborn/common/quota/Quota.scala rename to common/src/main/scala/org/apache/celeborn/common/quota/StorageQuota.scala index 8a845225821..1a7a8e52abf 100644 --- a/common/src/main/scala/org/apache/celeborn/common/quota/Quota.scala +++ b/common/src/main/scala/org/apache/celeborn/common/quota/StorageQuota.scala @@ -20,7 +20,7 @@ package org.apache.celeborn.common.quota import org.apache.celeborn.common.internal.Logging import org.apache.celeborn.common.util.Utils -case class Quota( +case class StorageQuota( diskBytesWritten: Long, diskFileCount: Long, hdfsBytesWritten: Long, @@ -34,3 +34,7 @@ case class Quota( s"]" } } + +object StorageQuota { + val DEFAULT_QUOTA = StorageQuota(Long.MaxValue, Long.MaxValue, Long.MaxValue, Long.MaxValue) +} diff --git a/docs/configuration/quota.md b/docs/configuration/quota.md index b1fa4f9403e..a5ff696e136 100644 --- a/docs/configuration/quota.md +++ b/docs/configuration/quota.md @@ -19,10 +19,18 @@ license: | | Key | Default | isDynamic | Description | Since | Deprecated | | --- | ------- | --------- | ----------- | ----- | ---------- | +| celeborn.quota.cluster.diskBytesWritten | 9223372036854775807b | true | Cluster level quota dynamic configuration for written disk bytes. | 0.6.0 | | +| celeborn.quota.cluster.diskFileCount | 9223372036854775807b | true | Cluster level quota dynamic configuration for written disk file count. | 0.6.0 | | +| celeborn.quota.cluster.hdfsBytesWritten | 9223372036854775807b | true | Cluster level quota dynamic configuration for written hdfs bytes. | 0.6.0 | | +| celeborn.quota.cluster.hdfsFileCount | 9223372036854775807b | true | Cluster level quota dynamic configuration for written hdfs file count. | 0.6.0 | | | celeborn.quota.enabled | true | false | When Master side sets to true, the master will enable to check the quota via QuotaManager. When Client side sets to true, LifecycleManager will request Master side to check whether the current user has enough quota before registration of shuffle. Fallback to the default shuffle service when Master side checks that there is no enough quota for current user. | 0.2.0 | | | celeborn.quota.interruptShuffle.enabled | false | false | Whether to enable interrupt shuffle when quota exceeds. | 0.6.0 | | -| celeborn.quota.tenant.diskBytesWritten | 9223372036854775807 | true | Quota dynamic configuration for written disk bytes. | 0.5.0 | | -| celeborn.quota.tenant.diskFileCount | 9223372036854775807 | true | Quota dynamic configuration for written disk file count. | 0.5.0 | | -| celeborn.quota.tenant.hdfsBytesWritten | 9223372036854775807 | true | Quota dynamic configuration for written hdfs bytes. | 0.5.0 | | -| celeborn.quota.tenant.hdfsFileCount | 9223372036854775807 | true | Quota dynamic configuration for written hdfs file count. | 0.5.0 | | +| celeborn.quota.tenant.diskBytesWritten | 9223372036854775807b | true | Tenant level quota dynamic configuration for written disk bytes. | 0.5.0 | | +| celeborn.quota.tenant.diskFileCount | 9223372036854775807b | true | Tenant level quota dynamic configuration for written disk file count. | 0.5.0 | | +| celeborn.quota.tenant.hdfsBytesWritten | 9223372036854775807b | true | Tenant level quota dynamic configuration for written hdfs bytes. | 0.5.0 | | +| celeborn.quota.tenant.hdfsFileCount | 9223372036854775807b | true | Tenant level quota dynamic configuration for written hdfs file count. | 0.5.0 | | +| celeborn.quota.user.diskBytesWritten | 9223372036854775807b | true | User level quota dynamic configuration for written disk bytes. | 0.6.0 | | +| celeborn.quota.user.diskFileCount | 9223372036854775807b | true | User level quota dynamic configuration for written disk file count. | 0.6.0 | | +| celeborn.quota.user.hdfsBytesWritten | 9223372036854775807b | true | User level quota dynamic configuration for written hdfs bytes. | 0.6.0 | | +| celeborn.quota.user.hdfsFileCount | 9223372036854775807b | true | User level quota dynamic configuration for written hdfs file count. | 0.6.0 | | diff --git a/docs/migration.md b/docs/migration.md index 800655e5e04..d53495280c8 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -23,6 +23,14 @@ license: | # Upgrading from 0.5 to 0.6 +- Since 0.6.0, Celeborn modified `celeborn.quota.tenant.diskBytesWritten` to `celeborn.quota.user.diskBytesWritten`. Please use `celeborn.quota.user.diskBytesWritten` if you want to set user level quota. + +- Since 0.6.0, Celeborn modified `celeborn.quota.tenant.diskFileCount` to `celeborn.quota.user.diskFileCount`. Please use `celeborn.quota.user.diskFileCount` if you want to set user level quota. + +- Since 0.6.0, Celeborn modified `celeborn.quota.tenant.hdfsBytesWritten` to `celeborn.quota.user.hdfsBytesWritten`. Please use `celeborn.quota.user.hdfsBytesWritten` if you want to set user level quota. + +- Since 0.6.0, Celeborn modified `celeborn.quota.tenant.hdfsFileCount` to `celeborn.quota.user.hdfsFileCount`. Please use `celeborn.quota.user.hdfsFileCount` if you want to set user level quota. + - Since 0.6.0, Celeborn changed the default value of `celeborn.master.slot.assign.extraSlots` from `2` to `100`, which means Celeborn will involve more workers in offering slots. - Since 0.6.0, Celeborn deprecate `celeborn.worker.congestionControl.low.watermark`. Please use `celeborn.worker.congestionControl.diskBuffer.low.watermark` instead. diff --git a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/AbstractMetaManager.java b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/AbstractMetaManager.java index cf37796bf42..2f0ad008386 100644 --- a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/AbstractMetaManager.java +++ b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/AbstractMetaManager.java @@ -250,13 +250,11 @@ public void updateWorkerHeartbeatMeta( int fetchPort, int replicatePort, Map disks, - Map userResourceConsumption, long time, WorkerStatus workerStatus, boolean highWorkload) { WorkerInfo worker = - new WorkerInfo( - host, rpcPort, pushPort, fetchPort, replicatePort, -1, disks, userResourceConsumption); + new WorkerInfo(host, rpcPort, pushPort, fetchPort, replicatePort, -1, disks, null); AtomicLong availableSlots = new AtomicLong(); LOG.debug("update worker {}:{} heartbeat {}", host, rpcPort, disks); synchronized (workersMap) { @@ -264,7 +262,6 @@ public void updateWorkerHeartbeatMeta( workerInfo.ifPresent( info -> { info.updateThenGetDiskInfos(disks, Option.apply(estimatedPartitionSize)); - info.updateThenGetUserResourceConsumption(userResourceConsumption); availableSlots.set(info.totalAvailableSlots()); info.lastHeartbeat_$eq(time); info.setWorkerStatus(workerStatus); @@ -613,4 +610,19 @@ private void addShuffleFallbackCounts(Map fallbackCounts) { fallbackPolicy, (k, v) -> v == null ? fallbackCounts.get(k) : v + fallbackCounts.get(k)); } } + + public void updateWorkerResourceConsumptions( + String host, + int rpcPort, + int pushPort, + int fetchPort, + int replicatePort, + Map resourceConsumptions) { + WorkerInfo worker = + new WorkerInfo(host, rpcPort, pushPort, fetchPort, replicatePort, -1, null, null); + synchronized (workersMap) { + Optional workerInfo = Optional.ofNullable(workersMap.get(worker.toUniqueId())); + workerInfo.ifPresent(info -> info.updateThenGetUserResourceConsumption(resourceConsumptions)); + } + } } diff --git a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/IMetadataHandler.java b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/IMetadataHandler.java index c2513563466..52fc783b4ea 100644 --- a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/IMetadataHandler.java +++ b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/IMetadataHandler.java @@ -66,7 +66,6 @@ void handleWorkerHeartbeat( int fetchPort, int replicatePort, Map disks, - Map userResourceConsumption, long time, boolean highWorkload, WorkerStatus workerStatus, diff --git a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/SingleMasterMetaManager.java b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/SingleMasterMetaManager.java index 765beb791c1..7a3ce249fdb 100644 --- a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/SingleMasterMetaManager.java +++ b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/SingleMasterMetaManager.java @@ -118,22 +118,12 @@ public void handleWorkerHeartbeat( int fetchPort, int replicatePort, Map disks, - Map userResourceConsumption, long time, boolean highWorkload, WorkerStatus workerStatus, String requestId) { updateWorkerHeartbeatMeta( - host, - rpcPort, - pushPort, - fetchPort, - replicatePort, - disks, - userResourceConsumption, - time, - workerStatus, - highWorkload); + host, rpcPort, pushPort, fetchPort, replicatePort, disks, time, workerStatus, highWorkload); } @Override diff --git a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/HAMasterMetaManager.java b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/HAMasterMetaManager.java index 1b50b5d7adf..f8a8da925dc 100644 --- a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/HAMasterMetaManager.java +++ b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/HAMasterMetaManager.java @@ -272,7 +272,6 @@ public void handleWorkerHeartbeat( int fetchPort, int replicatePort, Map disks, - Map userResourceConsumption, long time, boolean highWorkload, WorkerStatus workerStatus, @@ -290,8 +289,6 @@ public void handleWorkerHeartbeat( .setFetchPort(fetchPort) .setReplicatePort(replicatePort) .putAllDisks(MetaUtil.toPbDiskInfos(disks)) - .putAllUserResourceConsumption( - MetaUtil.toPbUserResourceConsumption(userResourceConsumption)) .setWorkerStatus(MetaUtil.toPbWorkerStatus(workerStatus)) .setTime(time) .setHighWorkload(highWorkload) diff --git a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/MetaHandler.java b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/MetaHandler.java index b1ee9b72eb0..72df5f7b722 100644 --- a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/MetaHandler.java +++ b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/MetaHandler.java @@ -227,7 +227,6 @@ public ResourceResponse handleWriteRequest(ResourceProtos.ResourceRequest reques fetchPort, replicatePort, diskInfos, - userResourceConsumption, request.getWorkerHeartbeatRequest().getTime(), workerStatus, highWorkload); diff --git a/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala b/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala index 31f10f9d4c1..3b154dc5c67 100644 --- a/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala +++ b/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala @@ -189,11 +189,13 @@ private[celeborn] class Master( private val hasHDFSStorage = conf.hasHDFSStorage private val hasS3Storage = conf.hasS3Storage - private val quotaManager = new QuotaManager(conf, configService) + private val quotaManager = new QuotaManager( + statusSystem, + masterSource, + resourceConsumptionSource, + conf, + configService) private val tagsManager = new TagsManager(Option(configService)) - private val masterResourceConsumptionInterval = conf.masterResourceConsumptionInterval - private val userResourceConsumptions = - JavaUtils.newConcurrentHashMap[UserIdentifier, (ResourceConsumption, Long)]() private val slotsAssignMaxWorkers = conf.masterSlotAssignMaxWorkers private val slotsAssignLoadAwareDiskGroupNum = conf.masterSlotAssignLoadAwareDiskGroupNum @@ -538,6 +540,7 @@ private[celeborn] class Master( .asScala.map(PbSerDeUtils.fromPbWorkerInfo).toList.asJava) val workersToRemove = new util.ArrayList[WorkerInfo](pb.getWorkersToRemoveList .asScala.map(PbSerDeUtils.fromPbWorkerInfo).toList.asJava) + executeWithLeaderChecker( context, handleWorkerExclude( @@ -668,11 +671,17 @@ private[celeborn] class Master( fetchPort, replicatePort, disks.map { disk => disk.mountPoint -> disk }.toMap.asJava, - userResourceConsumption, System.currentTimeMillis(), highWorkload, workerStatus, requestId) + statusSystem.updateWorkerResourceConsumptions( + host, + rpcPort, + pushPort, + fetchPort, + replicatePort, + userResourceConsumption) } val expiredShuffleKeys = new util.HashSet[String] @@ -1138,7 +1147,7 @@ private[celeborn] class Master( new util.ArrayList[WorkerInfo]( (statusSystem.shutdownWorkers.asScala ++ statusSystem.decommissionWorkers.asScala).asJava), new util.ArrayList(appRelatedShuffles), - CheckQuotaResponse(isAvailable = true, ""))) + quotaManager.checkApplicationQuotaStatus(appId))) } else { context.reply(OneWayMessageResponse) } @@ -1154,78 +1163,11 @@ private[celeborn] class Master( } } - private def handleResourceConsumption(userIdentifier: UserIdentifier): ResourceConsumption = { - val userResourceConsumption = computeUserResourceConsumption(userIdentifier) - gaugeResourceConsumption(userIdentifier) - userResourceConsumption - } - - private def gaugeResourceConsumption( - userIdentifier: UserIdentifier, - applicationId: String = null): Unit = { - val resourceConsumptionLabel = - if (applicationId == null) userIdentifier.toMap - else userIdentifier.toMap + (resourceConsumptionSource.applicationLabel -> applicationId) - resourceConsumptionSource.addGauge( - ResourceConsumptionSource.DISK_FILE_COUNT, - resourceConsumptionLabel) { () => - computeResourceConsumption(userIdentifier, applicationId).diskFileCount - } - resourceConsumptionSource.addGauge( - ResourceConsumptionSource.DISK_BYTES_WRITTEN, - resourceConsumptionLabel) { () => - computeResourceConsumption(userIdentifier, applicationId).diskBytesWritten - } - if (hasHDFSStorage) { - resourceConsumptionSource.addGauge( - ResourceConsumptionSource.HDFS_FILE_COUNT, - resourceConsumptionLabel) { () => - computeResourceConsumption(userIdentifier, applicationId).hdfsFileCount - } - resourceConsumptionSource.addGauge( - ResourceConsumptionSource.HDFS_BYTES_WRITTEN, - resourceConsumptionLabel) { () => - computeResourceConsumption(userIdentifier, applicationId).hdfsBytesWritten - } - } - } - - private def computeResourceConsumption( - userIdentifier: UserIdentifier, - applicationId: String = null): ResourceConsumption = { - val newResourceConsumption = computeUserResourceConsumption(userIdentifier) - if (applicationId == null) { - val current = System.currentTimeMillis() - if (userResourceConsumptions.containsKey(userIdentifier)) { - val resourceConsumptionAndUpdateTime = userResourceConsumptions.get(userIdentifier) - if (current - resourceConsumptionAndUpdateTime._2 <= masterResourceConsumptionInterval) { - return resourceConsumptionAndUpdateTime._1 - } - } - userResourceConsumptions.put(userIdentifier, (newResourceConsumption, current)) - newResourceConsumption - } else { - newResourceConsumption.subResourceConsumptions.get(applicationId) - } - } - - // TODO: Support calculate topN app resource consumption. - private def computeUserResourceConsumption( - userIdentifier: UserIdentifier): ResourceConsumption = { - val resourceConsumption = statusSystem.workersMap.values().asScala.flatMap { - workerInfo => workerInfo.userResourceConsumption.asScala.get(userIdentifier) - }.foldRight(ResourceConsumption(0, 0, 0, 0))(_ add _) - resourceConsumption - } - private[master] def handleCheckQuota( userIdentifier: UserIdentifier, context: RpcCallContext): Unit = { - val userResourceConsumption = handleResourceConsumption(userIdentifier) if (conf.quotaEnabled) { - val (isAvailable, reason) = - quotaManager.checkQuotaSpaceAvailable(userIdentifier, userResourceConsumption) - context.reply(CheckQuotaResponse(isAvailable, reason)) + context.reply(quotaManager.checkUserQuotaStatus(userIdentifier)) } else { context.reply(CheckQuotaResponse(true, "")) } diff --git a/master/src/main/scala/org/apache/celeborn/service/deploy/master/MasterSource.scala b/master/src/main/scala/org/apache/celeborn/service/deploy/master/MasterSource.scala index 80bd1e5fb77..d144b96d2a6 100644 --- a/master/src/main/scala/org/apache/celeborn/service/deploy/master/MasterSource.scala +++ b/master/src/main/scala/org/apache/celeborn/service/deploy/master/MasterSource.scala @@ -63,4 +63,6 @@ object MasterSource { // Capacity val DEVICE_CELEBORN_FREE_CAPACITY = "DeviceCelebornFreeBytes" val DEVICE_CELEBORN_TOTAL_CAPACITY = "DeviceCelebornTotalBytes" + + val UPDATE_RESOURCE_CONSUMPTION_TIME = "UpdateResourceConsumptionTime" } diff --git a/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManager.scala b/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManager.scala index 07906b7d3b0..7ae6409951d 100644 --- a/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManager.scala +++ b/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManager.scala @@ -16,96 +16,319 @@ */ package org.apache.celeborn.service.deploy.master.quota +import java.util.{Map => JMap} +import java.util.concurrent.TimeUnit + +import scala.collection.JavaConverters._ + import org.apache.celeborn.common.CelebornConf import org.apache.celeborn.common.identity.UserIdentifier import org.apache.celeborn.common.internal.Logging -import org.apache.celeborn.common.quota.{Quota, ResourceConsumption} -import org.apache.celeborn.common.util.Utils +import org.apache.celeborn.common.metrics.source.ResourceConsumptionSource +import org.apache.celeborn.common.metrics.source.ResourceConsumptionSource._ +import org.apache.celeborn.common.protocol.message.ControlMessages.CheckQuotaResponse +import org.apache.celeborn.common.quota.{ResourceConsumption, StorageQuota} +import org.apache.celeborn.common.util.{JavaUtils, ThreadUtils, Utils} import org.apache.celeborn.server.common.service.config.ConfigService +import org.apache.celeborn.service.deploy.master.MasterSource +import org.apache.celeborn.service.deploy.master.MasterSource.UPDATE_RESOURCE_CONSUMPTION_TIME +import org.apache.celeborn.service.deploy.master.clustermeta.AbstractMetaManager +import org.apache.celeborn.service.deploy.master.quota.QuotaStatus._ + +class QuotaManager( + statusSystem: AbstractMetaManager, + masterSource: MasterSource, + resourceConsumptionSource: ResourceConsumptionSource, + celebornConf: CelebornConf, + configService: ConfigService) extends Logging { + + val userQuotaStatus: JMap[UserIdentifier, QuotaStatus] = JavaUtils.newConcurrentHashMap() + val tenantQuotaStatus: JMap[String, QuotaStatus] = JavaUtils.newConcurrentHashMap() + @volatile + var clusterQuotaStatus: QuotaStatus = new QuotaStatus() + val appQuotaStatus: JMap[String, QuotaStatus] = JavaUtils.newConcurrentHashMap() + val userResourceConsumptionMap: JMap[UserIdentifier, ResourceConsumption] = + JavaUtils.newConcurrentHashMap() + private val quotaChecker = + ThreadUtils.newDaemonSingleThreadScheduledExecutor("master-quota-checker") + quotaChecker.scheduleWithFixedDelay( + new Runnable { + override def run(): Unit = { + try { + updateResourceConsumption() + } catch { + case t: Throwable => logError("Update user resource consumption failed.", t) + } + } + }, + 0L, + celebornConf.masterResourceConsumptionInterval, + TimeUnit.MILLISECONDS) -class QuotaManager(celebornConf: CelebornConf, configService: ConfigService) extends Logging { - private val DEFAULT_QUOTA = Quota( - celebornConf.get(CelebornConf.QUOTA_DISK_BYTES_WRITTEN), - celebornConf.get(CelebornConf.QUOTA_DISK_FILE_COUNT), - celebornConf.get(CelebornConf.QUOTA_HDFS_BYTES_WRITTEN), - celebornConf.get(CelebornConf.QUOTA_HDFS_FILE_COUNT)) - def getQuota(userIdentifier: UserIdentifier): Quota = { - if (configService != null) { - val config = - configService.getTenantUserConfigFromCache(userIdentifier.tenantId, userIdentifier.name) - config.getQuota + def handleAppLost(appId: String): Unit = { + appQuotaStatus.remove(appId) + } + + def checkUserQuotaStatus(userIdentifier: UserIdentifier): CheckQuotaResponse = { + val tenantStatus = tenantQuotaStatus.getOrDefault(userIdentifier.tenantId, QuotaStatus()) + val userStatus = userQuotaStatus.getOrDefault(userIdentifier, QuotaStatus()) + if (userStatus.exceed) { + CheckQuotaResponse(false, userStatus.exceedReason) + } else if (tenantStatus.exceed) { + CheckQuotaResponse(false, tenantStatus.exceedReason) } else { - DEFAULT_QUOTA + CheckQuotaResponse(!clusterQuotaStatus.exceed, clusterQuotaStatus.exceedReason) } } - def checkQuotaSpaceAvailable( - userIdentifier: UserIdentifier, - resourceResumption: ResourceConsumption): (Boolean, String) = { - val quota = getQuota(userIdentifier) + def checkApplicationQuotaStatus(applicationId: String): CheckQuotaResponse = { + val status = appQuotaStatus.getOrDefault(applicationId, QuotaStatus()) + CheckQuotaResponse(!status.exceed, status.exceedReason) + } + + def getUserStorageQuota(user: UserIdentifier): StorageQuota = { + Option(configService) + .map(_.getTenantUserConfigFromCache(user.tenantId, user.name).getUserStorageQuota) + .getOrElse(StorageQuota.DEFAULT_QUOTA) + } + + def getTenantStorageQuota(tenantId: String): StorageQuota = { + Option(configService) + .map(_.getTenantConfigFromCache(tenantId).getTenantStorageQuota) + .getOrElse(StorageQuota.DEFAULT_QUOTA) + } + + def getClusterStorageQuota: StorageQuota = { + Option(configService) + .map(_.getSystemConfigFromCache.getClusterStorageQuota) + .getOrElse(StorageQuota.DEFAULT_QUOTA) + } + + private def interruptShuffleEnabled: Boolean = { + Option(configService) + .map(_.getSystemConfigFromCache.interruptShuffleEnabled()) + .getOrElse(celebornConf.quotaInterruptShuffleEnabled) + } + + private def checkUserQuotaSpace( + user: UserIdentifier, + consumption: ResourceConsumption): QuotaStatus = { + val quota = getUserStorageQuota(user) + checkQuotaSpace(s"$USER_EXHAUSTED user: $user. ", consumption, quota) + } + + private def checkTenantQuotaSpace( + tenantId: String, + consumption: ResourceConsumption): QuotaStatus = { + val quota = getTenantStorageQuota(tenantId) + checkQuotaSpace(s"$USER_EXHAUSTED tenant: $tenantId. ", consumption, quota) + } + + private def checkClusterQuotaSpace(consumption: ResourceConsumption): QuotaStatus = { + checkQuotaSpace(CLUSTER_EXHAUSTED, consumption, getClusterStorageQuota) + } + + private def checkQuotaSpace( + reason: String, + consumption: ResourceConsumption, + quota: StorageQuota): QuotaStatus = { val checkResults = Seq( - checkDiskBytesWritten(userIdentifier, resourceResumption.diskBytesWritten, quota), - checkDiskFileCount(userIdentifier, resourceResumption.diskFileCount, quota), - checkHdfsBytesWritten(userIdentifier, resourceResumption.hdfsBytesWritten, quota), - checkHdfsFileCount(userIdentifier, resourceResumption.hdfsFileCount, quota)) + checkQuota( + consumption.diskBytesWritten, + quota.diskBytesWritten, + "DISK_BYTES_WRITTEN", + Utils.bytesToString), + checkQuota( + consumption.diskFileCount, + quota.diskFileCount, + "DISK_FILE_COUNT", + _.toString), + checkQuota( + consumption.hdfsBytesWritten, + quota.hdfsBytesWritten, + "HDFS_BYTES_WRITTEN", + Utils.bytesToString), + checkQuota( + consumption.hdfsFileCount, + quota.hdfsFileCount, + "HDFS_FILE_COUNT", + _.toString)) val exceed = checkResults.foldLeft(false)(_ || _._1) - val reason = checkResults.foldLeft("")(_ + _._2) - (!exceed, reason) + val exceedReason = + if (exceed) { + s"$reason ${checkResults.foldLeft("")(_ + _._2)}" + } else { + "" + } + QuotaStatus(exceed, exceedReason) } - private def checkDiskBytesWritten( - userIdentifier: UserIdentifier, + private def checkQuota( value: Long, - quota: Quota): (Boolean, String) = { - val exceed = (quota.diskBytesWritten > 0 && value >= quota.diskBytesWritten) + quota: Long, + quotaType: String, + format: Long => String): (Boolean, String) = { + val exceed = quota > 0 && value >= quota var reason = "" if (exceed) { - reason = s"User $userIdentifier used diskBytesWritten (${Utils.bytesToString(value)}) " + - s"exceeds quota (${Utils.bytesToString(quota.diskBytesWritten)}). " + reason = s"$quotaType(${format(value)}) exceeds quota(${format(quota)}). " logWarning(reason) } (exceed, reason) } - private def checkDiskFileCount( - userIdentifier: UserIdentifier, - value: Long, - quota: Quota): (Boolean, String) = { - val exceed = (quota.diskFileCount > 0 && value >= quota.diskFileCount) - var reason = "" - if (exceed) { - reason = - s"User $userIdentifier used diskFileCount($value) exceeds quota(${quota.diskFileCount}). " - logWarning(reason) + private def checkConsumptionExceeded( + used: ResourceConsumption, + threshold: StorageQuota): Boolean = { + used.diskBytesWritten >= threshold.diskBytesWritten || + used.diskFileCount >= threshold.diskFileCount || + used.hdfsBytesWritten >= threshold.hdfsBytesWritten || + used.hdfsFileCount >= threshold.hdfsFileCount + } + + def updateResourceConsumption(): Unit = { + masterSource.sample(UPDATE_RESOURCE_CONSUMPTION_TIME, this.getClass.getSimpleName, Map.empty) { + val clusterQuota = getClusterStorageQuota + var clusterResourceConsumption = ResourceConsumption(0, 0, 0, 0) + + val tenantResourceConsumption = + statusSystem.availableWorkers.asScala.flatMap { workerInfo => + workerInfo.userResourceConsumption.asScala + }.groupBy(_._1.tenantId).toSeq.map { case (tenantId, tenantConsumptionList) => + var tenantResourceConsumption = ResourceConsumption(0, 0, 0, 0) + val userResourceConsumption = + tenantConsumptionList.groupBy(_._1).map { + case (userIdentifier, userConsumptionList) => + // Step 1: Compute user consumption and set quota status. + val resourceConsumptionList = userConsumptionList.map(_._2).toSeq + val resourceConsumption = computeUserResourceConsumption(resourceConsumptionList) + + // Step 2: Update user resource consumption metrics. + // For extract metrics + userResourceConsumptionMap.put(userIdentifier, resourceConsumption) + registerUserResourceConsumptionMetrics(userIdentifier) + + // Step 3: Expire user level exceeded app except already expired app + clusterResourceConsumption = clusterResourceConsumption.add(resourceConsumption) + tenantResourceConsumption = tenantResourceConsumption.add(resourceConsumption) + val quotaStatus = checkUserQuotaSpace(userIdentifier, resourceConsumption) + userQuotaStatus.put(userIdentifier, quotaStatus) + if (interruptShuffleEnabled && quotaStatus.exceed) { + val subResourceConsumptions = computeSubConsumption(resourceConsumptionList) + // Compute expired size + val (expired, notExpired) = subResourceConsumptions.partition { case (app, _) => + appQuotaStatus.containsKey(app) + } + val userConsumptions = + expired.values.foldLeft(resourceConsumption)(_.subtract(_)) + expireApplication( + userConsumptions, + getUserStorageQuota(userIdentifier), + notExpired.toSeq, + USER_EXHAUSTED) + (Option(subResourceConsumptions), resourceConsumptionList) + } else { + (None, resourceConsumptionList) + } + } + + val quotaStatus = checkTenantQuotaSpace(tenantId, tenantResourceConsumption) + tenantQuotaStatus.put(tenantId, quotaStatus) + // Expire tenant level exceeded app except already expired app + if (interruptShuffleEnabled && quotaStatus.exceed) { + val appConsumptions = userResourceConsumption.map { + case (None, subConsumptionList) => computeSubConsumption(subConsumptionList) + case (Some(subConsumptions), _) => subConsumptions + }.flatMap(_.toSeq).toSeq + + // Compute nonExpired app total usage + val (expired, notExpired) = appConsumptions.partition { case (app, _) => + appQuotaStatus.containsKey(app) + } + tenantResourceConsumption = + expired.map(_._2).foldLeft(tenantResourceConsumption)(_.subtract(_)) + expireApplication( + tenantResourceConsumption, + getTenantStorageQuota(tenantId), + notExpired, + TENANT_EXHAUSTED) + (Option(appConsumptions), tenantConsumptionList.map(_._2).toSeq) + } else { + (None, tenantConsumptionList.map(_._2).toSeq) + } + } + + // Expire cluster level exceeded app except already expired app + clusterQuotaStatus = checkClusterQuotaSpace(clusterResourceConsumption) + if (interruptShuffleEnabled && clusterQuotaStatus.exceed) { + val appConsumptions = tenantResourceConsumption.map { + case (None, subConsumptionList) => computeSubConsumption(subConsumptionList) + case (Some(subConsumptions), _) => subConsumptions + }.flatMap(_.toSeq).toSeq + + // Compute nonExpired app total usage + val (expired, notExpired) = appConsumptions.partition { case (app, _) => + appQuotaStatus.containsKey(app) + } + clusterResourceConsumption = + expired.map(_._2).foldLeft(clusterResourceConsumption)(_.subtract(_)) + expireApplication(clusterResourceConsumption, clusterQuota, notExpired, CLUSTER_EXHAUSTED) + } } - (exceed, reason) } - private def checkHdfsBytesWritten( - userIdentifier: UserIdentifier, - value: Long, - quota: Quota): (Boolean, String) = { - val exceed = (quota.hdfsBytesWritten > 0 && value >= quota.hdfsBytesWritten) - var reason = "" - if (exceed) { - reason = s"User $userIdentifier used hdfsBytesWritten(${Utils.bytesToString(value)}) " + - s"exceeds quota(${Utils.bytesToString(quota.hdfsBytesWritten)}). " - logWarning(reason) + private def expireApplication( + used: ResourceConsumption, + threshold: StorageQuota, + appMap: Seq[(String, ResourceConsumption)], + expireReason: String): Unit = { + var nonExpired = used + if (checkConsumptionExceeded(used, threshold)) { + val sortedConsumption = + appMap.sortBy(_._2)(Ordering.by((r: ResourceConsumption) => + ( + r.diskBytesWritten, + r.diskFileCount, + r.hdfsBytesWritten, + r.hdfsFileCount)).reverse) + for ((appId, consumption) <- sortedConsumption + if checkConsumptionExceeded(nonExpired, threshold)) { + val reason = s"$expireReason Used: ${consumption.simpleString}, Threshold: $threshold" + appQuotaStatus.put(appId, QuotaStatus(exceed = true, reason)) + nonExpired = nonExpired.subtract(consumption) + } } - (exceed, reason) } - private def checkHdfsFileCount( - userIdentifier: UserIdentifier, - value: Long, - quota: Quota): (Boolean, String) = { - val exceed = (quota.hdfsFileCount > 0 && value >= quota.hdfsFileCount) - var reason = "" - if (exceed) { - reason = - s"User $userIdentifier used hdfsFileCount($value) exceeds quota(${quota.hdfsFileCount}). " - logWarning(reason) + private def computeUserResourceConsumption( + consumptions: Seq[ResourceConsumption]): ResourceConsumption = { + consumptions.foldRight(ResourceConsumption(0, 0, 0, 0))(_ add _) + } + + private def computeSubConsumption( + resourceConsumptionList: Seq[ResourceConsumption]): Map[String, ResourceConsumption] = { + resourceConsumptionList.foldRight(Map.empty[String, ResourceConsumption]) { + case (consumption, subConsumption) => + consumption.addSubResourceConsumptions(subConsumption) + } + } + + private def getResourceConsumption(userIdentifier: UserIdentifier): ResourceConsumption = { + userResourceConsumptionMap.getOrDefault(userIdentifier, ResourceConsumption(0, 0, 0, 0)) + } + + private def registerUserResourceConsumptionMetrics(userIdentifier: UserIdentifier): Unit = { + resourceConsumptionSource.addGauge(DISK_FILE_COUNT, userIdentifier.toMap) { () => + getResourceConsumption(userIdentifier).diskBytesWritten + } + resourceConsumptionSource.addGauge(DISK_BYTES_WRITTEN, userIdentifier.toMap) { () => + getResourceConsumption(userIdentifier).diskBytesWritten + } + resourceConsumptionSource.addGauge(HDFS_FILE_COUNT, userIdentifier.toMap) { () => + getResourceConsumption(userIdentifier).hdfsFileCount + } + resourceConsumptionSource.addGauge(HDFS_BYTES_WRITTEN, userIdentifier.toMap) { () => + getResourceConsumption(userIdentifier).hdfsBytesWritten } - (exceed, reason) } } diff --git a/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaStatus.scala b/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaStatus.scala new file mode 100644 index 00000000000..2d7d2383548 --- /dev/null +++ b/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaStatus.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.celeborn.service.deploy.master.quota + +import QuotaStatus._ + +case class QuotaStatus(exceed: Boolean = false, exceedReason: String = NORMAL) + +object QuotaStatus { + val NORMAL: String = "" + val CLUSTER_EXHAUSTED: String = + "Interrupt application caused by the cluster storage usage reach threshold." + val TENANT_EXHAUSTED: String = + "Interrupt application caused by the tenant storage usage reach threshold." + val USER_EXHAUSTED: String = + "Interrupt or reject application caused by the user storage usage reach threshold." +} diff --git a/master/src/test/java/org/apache/celeborn/service/deploy/master/clustermeta/DefaultMetaSystemSuiteJ.java b/master/src/test/java/org/apache/celeborn/service/deploy/master/clustermeta/DefaultMetaSystemSuiteJ.java index 78695460b5f..1be17ac0cf8 100644 --- a/master/src/test/java/org/apache/celeborn/service/deploy/master/clustermeta/DefaultMetaSystemSuiteJ.java +++ b/master/src/test/java/org/apache/celeborn/service/deploy/master/clustermeta/DefaultMetaSystemSuiteJ.java @@ -696,7 +696,6 @@ public void testHandleWorkerHeartbeat() { FETCHPORT1, REPLICATEPORT1, new HashMap<>(), - userResourceConsumption1, 1, false, workerStatus, @@ -712,7 +711,6 @@ public void testHandleWorkerHeartbeat() { FETCHPORT2, REPLICATEPORT2, new HashMap<>(), - userResourceConsumption2, 1, false, workerStatus, @@ -728,7 +726,6 @@ public void testHandleWorkerHeartbeat() { FETCHPORT3, REPLICATEPORT3, disks3, - userResourceConsumption3, 1, false, workerStatus, @@ -744,7 +741,6 @@ public void testHandleWorkerHeartbeat() { FETCHPORT3, REPLICATEPORT3, disks3, - userResourceConsumption3, 1, true, workerStatus, diff --git a/master/src/test/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/RatisMasterStatusSystemSuiteJ.java b/master/src/test/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/RatisMasterStatusSystemSuiteJ.java index 0f7f752cb3a..41255cbbb44 100644 --- a/master/src/test/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/RatisMasterStatusSystemSuiteJ.java +++ b/master/src/test/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/RatisMasterStatusSystemSuiteJ.java @@ -1029,7 +1029,6 @@ public void testHandleWorkerHeartbeat() throws InterruptedException { FETCHPORT1, REPLICATEPORT1, new HashMap<>(), - userResourceConsumption1, 1, false, workerStatus, @@ -1055,7 +1054,6 @@ public void testHandleWorkerHeartbeat() throws InterruptedException { FETCHPORT2, REPLICATEPORT2, new HashMap<>(), - userResourceConsumption2, 1, false, workerStatus, @@ -1084,7 +1082,6 @@ public void testHandleWorkerHeartbeat() throws InterruptedException { FETCHPORT1, REPLICATEPORT1, disks1, - userResourceConsumption1, 1, false, workerStatus, @@ -1113,7 +1110,6 @@ public void testHandleWorkerHeartbeat() throws InterruptedException { FETCHPORT1, REPLICATEPORT1, disks1, - userResourceConsumption1, 1, true, workerStatus, diff --git a/master/src/test/resources/dynamicConfig-quota-2.yaml b/master/src/test/resources/dynamicConfig-quota-2.yaml new file mode 100644 index 00000000000..a30b7430ea6 --- /dev/null +++ b/master/src/test/resources/dynamicConfig-quota-2.yaml @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +- level: SYSTEM + config: + celeborn.quota.user.diskBytesWritten: 1000G + celeborn.quota.user.diskFileCount: 100 + celeborn.quota.user.hdfsBytesWritten: 1G + celeborn.quota.cluster.diskBytesWritten: 130G + celeborn.quota.interruptShuffle.enabled: true + +- tenantId: tenant_01 + level: TENANT + config: + celeborn.quota.user.diskBytesWritten: 10G + celeborn.quota.user.diskFileCount: 1000 + celeborn.quota.user.hdfsBytesWritten: 10G + users: + - name: Jerry + config: + celeborn.quota.user.diskBytesWritten: 100G + celeborn.quota.user.diskFileCount: 10000 diff --git a/master/src/test/resources/dynamicConfig-quota-3.yaml b/master/src/test/resources/dynamicConfig-quota-3.yaml new file mode 100644 index 00000000000..30711c07a6a --- /dev/null +++ b/master/src/test/resources/dynamicConfig-quota-3.yaml @@ -0,0 +1,37 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +- level: SYSTEM + config: + celeborn.quota.cluster.diskBytesWritten: 300G + celeborn.quota.interruptShuffle.enabled: true + +- tenantId: tenant_01 + level: TENANT + config: + celeborn.quota.tenant.diskBytesWritten: 150G + celeborn.quota.tenant.diskFileCount: 1500 + users: + - name: Jerry + config: + celeborn.quota.user.diskBytesWritten: 100G + celeborn.quota.user.diskFileCount: 10000 + - name: John + config: + celeborn.quota.user.diskBytesWritten: 100G + celeborn.quota.user.diskFileCount: 10000 + + diff --git a/master/src/test/resources/dynamicConfig-quota.yaml b/master/src/test/resources/dynamicConfig-quota.yaml index 156a3f692b4..a829b7c9331 100644 --- a/master/src/test/resources/dynamicConfig-quota.yaml +++ b/master/src/test/resources/dynamicConfig-quota.yaml @@ -16,20 +16,21 @@ # - level: SYSTEM config: - celeborn.quota.tenant.diskBytesWritten: 1G - celeborn.quota.tenant.diskFileCount: 100 - celeborn.quota.tenant.hdfsBytesWritten: 1G + celeborn.quota.user.diskBytesWritten: 1G + celeborn.quota.user.diskFileCount: 100 + celeborn.quota.user.hdfsBytesWritten: 1G + celeborn.quota.interruptShuffle.enabled: true - tenantId: tenant_01 level: TENANT config: - celeborn.quota.tenant.diskBytesWritten: 10G - celeborn.quota.tenant.diskFileCount: 1000 - celeborn.quota.tenant.hdfsBytesWritten: 10G + celeborn.quota.user.diskBytesWritten: 10G + celeborn.quota.user.diskFileCount: 1000 + celeborn.quota.user.hdfsBytesWritten: 10G users: - name: Jerry config: - celeborn.quota.tenant.diskBytesWritten: 100G - celeborn.quota.tenant.diskFileCount: 10000 + celeborn.quota.user.diskBytesWritten: 100G + celeborn.quota.user.diskFileCount: 10000 diff --git a/master/src/test/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManagerSuite.scala b/master/src/test/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManagerSuite.scala index 90da1cfd162..d34164f7f24 100644 --- a/master/src/test/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManagerSuite.scala +++ b/master/src/test/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManagerSuite.scala @@ -17,45 +17,112 @@ package org.apache.celeborn.service.deploy.master.quota -import org.junit.Assert.assertEquals +import java.util + +import scala.collection.JavaConverters.{mapAsJavaMapConverter, mapAsScalaMapConverter} +import scala.util.Random + +import org.junit.Assert.{assertEquals, assertFalse, assertTrue} +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} import org.apache.celeborn.CelebornFunSuite import org.apache.celeborn.common.CelebornConf import org.apache.celeborn.common.identity.UserIdentifier -import org.apache.celeborn.common.quota.{Quota, ResourceConsumption} -import org.apache.celeborn.common.util.Utils -import org.apache.celeborn.server.common.service.config.DynamicConfigServiceFactory +import org.apache.celeborn.common.internal.Logging +import org.apache.celeborn.common.meta.WorkerInfo +import org.apache.celeborn.common.metrics.source.ResourceConsumptionSource +import org.apache.celeborn.common.protocol.message.ControlMessages.CheckQuotaResponse +import org.apache.celeborn.common.quota.{ResourceConsumption, StorageQuota} +import org.apache.celeborn.common.rpc.RpcEnv +import org.apache.celeborn.common.util.{JavaUtils, Utils} +import org.apache.celeborn.server.common.service.config.{ConfigService, DynamicConfigServiceFactory, FsConfigServiceImpl} +import org.apache.celeborn.service.deploy.master.MasterSource +import org.apache.celeborn.service.deploy.master.clustermeta.{AbstractMetaManager, SingleMasterMetaManager} -class QuotaManagerSuite extends CelebornFunSuite { +class QuotaManagerSuite extends CelebornFunSuite + with BeforeAndAfterAll + with BeforeAndAfterEach + with Logging { protected var quotaManager: QuotaManager = _ - override def beforeAll(): Unit = { - super.beforeAll() - DynamicConfigServiceFactory.reset() + private var resourceConsumptionSource: ResourceConsumptionSource = _ + + val worker = new WorkerInfo( + "localhost", + 10001, + 10002, + 10003, + 10004) + + var statusSystem: AbstractMetaManager = _ + + var rpcEnv: RpcEnv = _ - val conf = new CelebornConf() + val workerToResourceConsumptions = + JavaUtils.newConcurrentHashMap[String, util.Map[UserIdentifier, ResourceConsumption]]() + + val conf = new CelebornConf() + + var configService: ConfigService = _ + + override def beforeAll(): Unit = { conf.set(CelebornConf.DYNAMIC_CONFIG_STORE_BACKEND, "FS") conf.set( CelebornConf.DYNAMIC_CONFIG_STORE_FS_PATH.key, getTestResourceFile("dynamicConfig-quota.yaml").getPath) - quotaManager = new QuotaManager(conf, DynamicConfigServiceFactory.getConfigService(conf)) + resourceConsumptionSource = new ResourceConsumptionSource(conf, "Master") + DynamicConfigServiceFactory.reset() + configService = DynamicConfigServiceFactory.getConfigService(conf) + + rpcEnv = RpcEnv.create( + "test-rpc", + "rpc", + "localhost", + 9001, + conf, + None) + statusSystem = new SingleMasterMetaManager(rpcEnv, conf) + statusSystem.availableWorkers.add(worker) + quotaManager = new QuotaManager( + statusSystem, + new MasterSource(conf), + resourceConsumptionSource, + conf, + configService) + } + + override def afterAll(): Unit = { + rpcEnv.shutdown() } test("test celeborn quota conf") { + configService.refreshCache() assertEquals( - quotaManager.getQuota(UserIdentifier("tenant_01", "Jerry")), - Quota(Utils.byteStringAsBytes("100G"), 10000, Utils.byteStringAsBytes("10G"), Long.MaxValue)) + quotaManager.getUserStorageQuota(UserIdentifier("tenant_01", "Jerry")), + StorageQuota( + Utils.byteStringAsBytes("100G"), + 10000, + Utils.byteStringAsBytes("10G"), + Long.MaxValue)) // Fallback to tenant level assertEquals( - quotaManager.getQuota(UserIdentifier("tenant_01", "name_not_exist")), - Quota(Utils.byteStringAsBytes("10G"), 1000, Utils.byteStringAsBytes("10G"), Long.MaxValue)) + quotaManager.getUserStorageQuota(UserIdentifier("tenant_01", "name_not_exist")), + StorageQuota( + Utils.byteStringAsBytes("10G"), + 1000, + Utils.byteStringAsBytes("10G"), + Long.MaxValue)) // Fallback to system level assertEquals( - quotaManager.getQuota(UserIdentifier("tenant_not_exist", "Tom")), - Quota(Utils.byteStringAsBytes("1G"), 100, Utils.byteStringAsBytes("1G"), Long.MaxValue)) + quotaManager.getUserStorageQuota(UserIdentifier("tenant_not_exist", "Tom")), + StorageQuota( + Utils.byteStringAsBytes("1G"), + 100, + Utils.byteStringAsBytes("1G"), + Long.MaxValue)) } - test("test check quota return result") { + test("test check user quota return result") { val user = UserIdentifier("tenant_01", "Jerry") val rc1 = ResourceConsumption(Utils.byteStringAsBytes("10G"), 20, Utils.byteStringAsBytes("1G"), 40) @@ -68,22 +135,556 @@ class QuotaManagerSuite extends CelebornFunSuite { Utils.byteStringAsBytes("30G"), 40) - val res1 = quotaManager.checkQuotaSpaceAvailable(user, rc1) - val res2 = quotaManager.checkQuotaSpaceAvailable(user, rc2) - val res3 = quotaManager.checkQuotaSpaceAvailable(user, rc3) + addUserConsumption(user, rc1) + quotaManager.updateResourceConsumption() + val res1 = checkUserQuota(user) + + addUserConsumption(user, rc2) + quotaManager.updateResourceConsumption() + val res2 = checkUserQuota(user) - val exp1 = (true, "") - val exp2 = ( + addUserConsumption(user, rc3) + quotaManager.updateResourceConsumption() + val res3 = checkUserQuota(user) + + val exp1 = CheckQuotaResponse(true, "") + val exp2 = CheckQuotaResponse( false, - s"User $user used hdfsBytesWritten(30.0 GiB) exceeds quota(10.0 GiB). ") - val exp3 = ( + s"Interrupt or reject application caused by the user storage usage reach threshold. " + + s"user: `tenant_01`.`Jerry`. " + + s"HDFS_BYTES_WRITTEN(30.0 GiB) exceeds quota(10.0 GiB). ") + val exp3 = CheckQuotaResponse( false, - s"User $user used diskBytesWritten (200.0 GiB) exceeds quota (100.0 GiB). " + - s"User $user used diskFileCount(20000) exceeds quota(10000). " + - s"User $user used hdfsBytesWritten(30.0 GiB) exceeds quota(10.0 GiB). ") + s"Interrupt or reject application caused by the user storage usage reach threshold. " + + s"user: `tenant_01`.`Jerry`. " + + s"DISK_BYTES_WRITTEN(200.0 GiB) exceeds quota(100.0 GiB). " + + s"DISK_FILE_COUNT(20000) exceeds quota(10000). " + + s"HDFS_BYTES_WRITTEN(30.0 GiB) exceeds quota(10.0 GiB). ") assert(res1 == exp1) assert(res2 == exp2) assert(res3 == exp3) + clearUserConsumption() + } + + test("test check application quota return result") { + val user = UserIdentifier("tenant_01", "Jerry") + var rc = + ResourceConsumption( + Utils.byteStringAsBytes("200G"), + 20000, + Utils.byteStringAsBytes("30G"), + 40) + rc.withSubResourceConsumptions( + Map( + "app1" -> ResourceConsumption( + Utils.byteStringAsBytes("150G"), + 15000, + Utils.byteStringAsBytes("25G"), + 20), + "app2" -> ResourceConsumption( + Utils.byteStringAsBytes("50G"), + 5000, + Utils.byteStringAsBytes("5G"), + 20)).asJava) + + addUserConsumption(user, rc) + conf.set("celeborn.quota.cluster.diskBytesWritten", "60gb") + configService.refreshCache() + quotaManager.updateResourceConsumption() + var res1 = checkUserQuota(user) + var res2 = checkApplicationQuota(user, "app1") + var res3 = checkApplicationQuota(user, "app2") + + val succeed = CheckQuotaResponse(true, "") + val failed = CheckQuotaResponse( + false, + s"Interrupt or reject application caused by the user storage usage reach threshold. " + + s"user: `tenant_01`.`Jerry`. " + + s"DISK_BYTES_WRITTEN(200.0 GiB) exceeds quota(100.0 GiB). " + + s"DISK_FILE_COUNT(20000) exceeds quota(10000). " + + s"HDFS_BYTES_WRITTEN(30.0 GiB) exceeds quota(10.0 GiB). ") + assert(res1 == failed) + assert(res2 == CheckQuotaResponse( + false, + "Interrupt or reject application caused by the user storage usage reach threshold. " + + "Used: " + + "ResourceConsumption(" + + "diskBytesWritten: 150.0 GiB, " + + "diskFileCount: 15000, " + + "hdfsBytesWritten: 25.0 GiB, " + + "hdfsFileCount: 20), " + + "Threshold: " + + "Quota[" + + "diskBytesWritten=100.0 GiB, " + + "diskFileCount=10000, " + + "hdfsBytesWritten=10.0 GiB, " + + "hdfsFileCount=9223372036854775807]")) + assert(res3 == succeed) + + conf.set("celeborn.quota.cluster.diskBytesWritten", "50gb") + configService.refreshCache() + quotaManager.updateResourceConsumption() + res1 = checkUserQuota(user) + res2 = checkApplicationQuota(user, "app1") + res3 = checkApplicationQuota(user, "app2") + + assert(res1 == failed) + assert(res2 == CheckQuotaResponse( + false, + "Interrupt or reject application caused by the user storage usage reach threshold. " + + "Used: ResourceConsumption(" + + "diskBytesWritten: 150.0 GiB, " + + "diskFileCount: 15000, " + + "hdfsBytesWritten: 25.0 GiB, " + + "hdfsFileCount: 20), " + + "Threshold: Quota[" + + "diskBytesWritten=100.0 GiB, " + + "diskFileCount=10000, " + + "hdfsBytesWritten=10.0 GiB, " + + "hdfsFileCount=9223372036854775807]")) + assert(res3 == CheckQuotaResponse( + false, + "Interrupt application caused by the cluster storage usage reach threshold. " + + "Used: ResourceConsumption(" + + "diskBytesWritten: 50.0 GiB, " + + "diskFileCount: 5000, " + + "hdfsBytesWritten: 5.0 GiB, " + + "hdfsFileCount: 20), " + + "Threshold: " + + "Quota[" + + "diskBytesWritten=50.0 GiB, " + + "diskFileCount=9223372036854775807, " + + "hdfsBytesWritten=8.0 EiB, " + + "hdfsFileCount=9223372036854775807]")) + clearUserConsumption() + + rc = + ResourceConsumption( + Utils.byteStringAsBytes("50G"), + 1000, + Utils.byteStringAsBytes("5G"), + 40) + rc.withSubResourceConsumptions( + Map( + "app1" -> ResourceConsumption( + Utils.byteStringAsBytes("40G"), + 500, + Utils.byteStringAsBytes("3G"), + 20), + "app2" -> ResourceConsumption( + Utils.byteStringAsBytes("10G"), + 500, + Utils.byteStringAsBytes("2G"), + 20)).asJava) + + addUserConsumption(user, rc) + conf.set("celeborn.quota.cluster.diskBytesWritten", "20gb") + configService.refreshCache() + quotaManager.updateResourceConsumption() + + res1 = checkUserQuota(user) + res2 = checkApplicationQuota(user, "app1") + res3 = checkApplicationQuota(user, "app2") + + assert(res1 == CheckQuotaResponse( + false, + "Interrupt application caused by the cluster storage usage reach threshold. " + + "DISK_BYTES_WRITTEN(50.0 GiB) exceeds quota(20.0 GiB). ")) + assert(res2 == CheckQuotaResponse( + false, + "Interrupt application caused by the cluster storage usage reach threshold. " + + "Used: " + + "ResourceConsumption(" + + "diskBytesWritten: 40.0 GiB, " + + "diskFileCount: 500, " + + "hdfsBytesWritten: 3.0 GiB, " + + "hdfsFileCount: 20), " + + "Threshold: " + + "Quota[diskBytesWritten=20.0 GiB, " + + "diskFileCount=9223372036854775807, " + + "hdfsBytesWritten=8.0 EiB, " + + "hdfsFileCount=9223372036854775807]")) + assert(res3 == CheckQuotaResponse(true, "")) + + clearUserConsumption() + } + + test("test handleResourceConsumption time - case1") { + // 1000 users 100wapplications, all exceeded + conf.set("celeborn.quota.tenant.diskBytesWritten", "1mb") + conf.set("celeborn.quota.cluster.diskBytesWritten", "1mb") + configService.refreshCache() + val MAX = 2L * 1024 * 1024 * 1024 + val MIN = 1L * 1024 * 1024 * 1024 + val random = new Random() + for (i <- 0 until 1000) { + val user = UserIdentifier("default", s"user$i") + val subResourceConsumption = (0 until 1000).map { + index => + val appId = s"$user$i app$index" + val consumption = ResourceConsumption( + MIN + Math.abs(random.nextLong()) % (MAX - MIN), + MIN + Math.abs(random.nextLong()) % (MAX - MIN), + MIN + Math.abs(random.nextLong()) % (MAX - MIN), + MIN + Math.abs(random.nextLong()) % (MAX - MIN)) + (appId, consumption) + }.toMap + val userConsumption = subResourceConsumption.values.foldRight( + ResourceConsumption(0, 0, 0, 0))(_ add _) + userConsumption.subResourceConsumptions = subResourceConsumption.asJava + addUserConsumption(user, userConsumption) + } + + val start = System.currentTimeMillis() + quotaManager.updateResourceConsumption() + val duration = System.currentTimeMillis() - start + print(s"duration=$duration") + + val res = resourceConsumptionSource.getMetrics() + for (i <- 0 until 1000) { + val user = UserIdentifier("default", s"user$i") + assert(res.contains( + s"""metrics_diskFileCount_Value{name="user$i",role="Master",tenantId="default"}""")) + assert(res.contains( + s"""metrics_diskFileCount_Value{name="user$i",role="Master",tenantId="default"}""")) + assert(res.contains( + s"""metrics_hdfsFileCount_Value{name="user$i",role="Master",tenantId="default"}""")) + assert(res.contains( + s"""metrics_hdfsBytesWritten_Value{name="user$i",role="Master",tenantId="default"}""")) + assertFalse(quotaManager.checkUserQuotaStatus(user).isAvailable) + (0 until 1000).foreach { + index => + val appId = s"$user$i app$index" + assertFalse(quotaManager.checkApplicationQuotaStatus(appId).isAvailable) + } + } + clearUserConsumption() + } + + test("test handleResourceConsumption time - case2") { + // 1000 users 2000000 applications, all exceeded + conf.set("celeborn.quota.tenant.diskBytesWritten", "1mb") + conf.set("celeborn.quota.cluster.diskBytesWritten", "1mb") + configService.refreshCache() + val MAX = 2L * 1024 * 1024 * 1024 + val MIN = 1L * 1024 * 1024 * 1024 + val random = new Random() + for (i <- 0 until 1000) { + val user = UserIdentifier("default", s"user$i") + val subResourceConsumption = + if (i < 100) { + (0 until 1000).map { + index => + val appId = s"$user$i app$index" + val consumption = ResourceConsumption( + MIN + Math.abs(random.nextLong()) % (MAX - MIN), + MIN + Math.abs(random.nextLong()) % (MAX - MIN), + MIN + Math.abs(random.nextLong()) % (MAX - MIN), + MIN + Math.abs(random.nextLong()) % (MAX - MIN)) + (appId, consumption) + }.toMap + } else { + (0 until 1000).map { + index => + val appId = s"$user$i app$index" + val consumption = ResourceConsumption(0, 0, 0, 0) + (appId, consumption) + }.toMap + } + val userConsumption = subResourceConsumption.values.foldRight( + ResourceConsumption(0, 0, 0, 0))(_ add _) + userConsumption.subResourceConsumptions = subResourceConsumption.asJava + addUserConsumption(user, userConsumption) + } + + val start = System.currentTimeMillis() + quotaManager.updateResourceConsumption() + val duration = System.currentTimeMillis() - start + print(s"duration=$duration") + + val res = resourceConsumptionSource.getMetrics() + for (i <- 0 until 1000) { + val user = UserIdentifier("default", s"user$i") + assert(res.contains( + s"""metrics_diskFileCount_Value{name="user$i",role="Master",tenantId="default"}""")) + assert(res.contains( + s"""metrics_diskFileCount_Value{name="user$i",role="Master",tenantId="default"}""")) + assert(res.contains( + s"""metrics_hdfsFileCount_Value{name="user$i",role="Master",tenantId="default"}""")) + assert(res.contains( + s"""metrics_hdfsBytesWritten_Value{name="user$i",role="Master",tenantId="default"}""")) + assertFalse(quotaManager.checkUserQuotaStatus(user).isAvailable) + (0 until 1000).foreach { + index => + val appId = s"$user$i app$index" + if (i < 100) { + assertFalse(quotaManager.checkApplicationQuotaStatus(appId).isAvailable) + } else { + assertTrue(quotaManager.checkApplicationQuotaStatus(appId).isAvailable) + } + } + } + clearUserConsumption() + } + + test("test user level conf") { + val conf1 = new CelebornConf() + conf1.set(CelebornConf.DYNAMIC_CONFIG_STORE_BACKEND, "FS") + conf1.set( + CelebornConf.DYNAMIC_CONFIG_STORE_FS_PATH.key, + getTestResourceFile("dynamicConfig-quota-2.yaml").getPath) + val rpcEnv = RpcEnv.create( + "test-rpc", + "rpc", + "localhost", + 9002, + conf, + None) + val statusSystem1 = new SingleMasterMetaManager(rpcEnv, conf) + statusSystem1.availableWorkers.add(worker) + val quotaManager1 = new QuotaManager( + statusSystem1, + new MasterSource(conf1), + resourceConsumptionSource, + conf1, + new FsConfigServiceImpl(conf1)) + + val user = UserIdentifier("tenant_01", "Jerry") + val user1 = UserIdentifier("tenant_01", "John") + + val rc = + ResourceConsumption( + Utils.byteStringAsBytes("200G"), + 20000, + Utils.byteStringAsBytes("30G"), + 40) + rc.withSubResourceConsumptions( + Map( + "app1" -> ResourceConsumption( + Utils.byteStringAsBytes("150G"), + 15000, + Utils.byteStringAsBytes("25G"), + 20), + "app2" -> ResourceConsumption( + Utils.byteStringAsBytes("50G"), + 5000, + Utils.byteStringAsBytes("5G"), + 20)).asJava) + + val rc1 = + ResourceConsumption( + Utils.byteStringAsBytes("80G"), + 0, + 0, + 0) + + rc1.withSubResourceConsumptions( + Map( + "app3" -> ResourceConsumption( + Utils.byteStringAsBytes("80G"), + 0, + 0, + 0)).asJava) + + addUserConsumption(user, rc) + addUserConsumption(user1, rc1) + + quotaManager1.updateResourceConsumption() + val res1 = quotaManager1.checkUserQuotaStatus(user) + val res2 = quotaManager1.checkApplicationQuotaStatus("app1") + val res3 = quotaManager1.checkApplicationQuotaStatus("app2") + val res4 = quotaManager1.checkApplicationQuotaStatus("app3") + assert(res1 == CheckQuotaResponse( + false, + s"Interrupt or reject application caused by the user storage usage reach threshold. " + + s"user: `tenant_01`.`Jerry`. " + + s"DISK_BYTES_WRITTEN(200.0 GiB) exceeds quota(100.0 GiB). " + + s"DISK_FILE_COUNT(20000) exceeds quota(10000). " + + s"HDFS_BYTES_WRITTEN(30.0 GiB) exceeds quota(10.0 GiB). ")) + assert(res2 == CheckQuotaResponse( + false, + "Interrupt or reject application caused by the user storage usage reach threshold. " + + "Used: ResourceConsumption(" + + "diskBytesWritten: 150.0 GiB, " + + "diskFileCount: 15000, " + + "hdfsBytesWritten: 25.0 GiB, " + + "hdfsFileCount: 20), " + + "Threshold: " + + "Quota[" + + "diskBytesWritten=100.0 GiB, " + + "diskFileCount=10000, " + + "hdfsBytesWritten=10.0 GiB, " + + "hdfsFileCount=9223372036854775807]")) + assert(res3 == CheckQuotaResponse(true, "")) + assert(res4 == CheckQuotaResponse( + false, + "Interrupt or reject application caused by the user storage usage reach threshold. " + + "Used: " + + "ResourceConsumption(" + + "diskBytesWritten: 80.0 GiB, " + + "diskFileCount: 0, " + + "hdfsBytesWritten: 0.0 B, " + + "hdfsFileCount: 0), " + + "Threshold: " + + "Quota[" + + "diskBytesWritten=10.0 GiB, " + + "diskFileCount=1000, " + + "hdfsBytesWritten=10.0 GiB, " + + "hdfsFileCount=9223372036854775807]")) + + clearUserConsumption() + } + + test("test tenant level conf") { + val conf1 = new CelebornConf() + conf1.set(CelebornConf.DYNAMIC_CONFIG_STORE_BACKEND, "FS") + conf1.set( + CelebornConf.DYNAMIC_CONFIG_STORE_FS_PATH.key, + getTestResourceFile("dynamicConfig-quota-3.yaml").getPath) + val rpcEnv = RpcEnv.create( + "test-rpc", + "rpc", + "localhost", + 9003, + conf, + None) + val statusSystem1 = new SingleMasterMetaManager(rpcEnv, conf) + statusSystem1.availableWorkers.add(worker) + val quotaManager1 = new QuotaManager( + statusSystem1, + new MasterSource(conf1), + resourceConsumptionSource, + conf1, + new FsConfigServiceImpl(conf1)) + + val user1 = UserIdentifier("tenant_01", "Jerry") + val user2 = UserIdentifier("tenant_01", "John") + + val rc1 = + ResourceConsumption( + Utils.byteStringAsBytes("230G"), + 0, + 0, + 0) + rc1.withSubResourceConsumptions( + Map( + "app1" -> ResourceConsumption( + Utils.byteStringAsBytes("150G"), + 0, + 0, + 0), + "app2" -> ResourceConsumption( + Utils.byteStringAsBytes("80G"), + 0, + 0, + 0)).asJava) + + val rc2 = + ResourceConsumption( + Utils.byteStringAsBytes("220G"), + 0, + 0, + 0) + + rc2.withSubResourceConsumptions( + Map( + "app3" -> ResourceConsumption( + Utils.byteStringAsBytes("150G"), + 0, + 0, + 0), + "app4" -> ResourceConsumption( + Utils.byteStringAsBytes("70G"), + 0, + 0, + 0)).asJava) + + addUserConsumption(user1, rc1) + addUserConsumption(user2, rc2) + + quotaManager1.updateResourceConsumption() + val res1 = quotaManager1.checkUserQuotaStatus(user1) + val res2 = quotaManager1.checkUserQuotaStatus(user2) + val res3 = quotaManager1.checkApplicationQuotaStatus("app1") + val res4 = quotaManager1.checkApplicationQuotaStatus("app2") + val res5 = quotaManager1.checkApplicationQuotaStatus("app3") + val res6 = quotaManager1.checkApplicationQuotaStatus("app4") + assert(res1 == CheckQuotaResponse( + false, + "" + + "Interrupt or reject application caused by the user storage usage reach threshold. " + + "user: `tenant_01`.`Jerry`. DISK_BYTES_WRITTEN(230.0 GiB) exceeds quota(100.0 GiB). ")) + assert(res2 == CheckQuotaResponse( + false, + "Interrupt or reject application caused by the user storage usage reach threshold. " + + "user: `tenant_01`.`John`. DISK_BYTES_WRITTEN(220.0 GiB) exceeds quota(100.0 GiB). ")) + assert(res3 == CheckQuotaResponse( + false, + "Interrupt or reject application caused by the user storage usage reach threshold. " + + "Used: ResourceConsumption(" + + "diskBytesWritten: 150.0 GiB, " + + "diskFileCount: 0, " + + "hdfsBytesWritten: 0.0 B, " + + "hdfsFileCount: 0), " + + "Threshold: Quota[" + + "diskBytesWritten=100.0 GiB, " + + "diskFileCount=10000, " + + "hdfsBytesWritten=8.0 EiB, " + + "hdfsFileCount=9223372036854775807]")) + assert(res4 == CheckQuotaResponse( + false, + "Interrupt application caused by the tenant storage usage reach threshold. " + + "Used: ResourceConsumption(" + + "diskBytesWritten: 80.0 GiB, " + + "diskFileCount: 0, " + + "hdfsBytesWritten: 0.0 B, " + + "hdfsFileCount: 0), " + + "Threshold: Quota[" + + "diskBytesWritten=150.0 GiB, " + + "diskFileCount=1500, " + + "hdfsBytesWritten=8.0 EiB, " + + "hdfsFileCount=9223372036854775807]")) + assert(res5 == CheckQuotaResponse( + false, + "Interrupt or reject application caused by the user storage usage reach threshold. " + + "Used: ResourceConsumption(" + + "diskBytesWritten: 150.0 GiB, " + + "diskFileCount: 0, " + + "hdfsBytesWritten: 0.0 B, " + + "hdfsFileCount: 0), " + + "Threshold: Quota[" + + "diskBytesWritten=100.0 GiB, " + + "diskFileCount=10000, " + + "hdfsBytesWritten=8.0 EiB, " + + "hdfsFileCount=9223372036854775807]")) + assert(res6 == CheckQuotaResponse(true, "")) + clearUserConsumption() + } + + def checkUserQuota(userIdentifier: UserIdentifier): CheckQuotaResponse = { + quotaManager.checkUserQuotaStatus(userIdentifier) + } + + def checkApplicationQuota( + userIdentifier: UserIdentifier, + applicationId: String): CheckQuotaResponse = { + quotaManager.checkApplicationQuotaStatus(applicationId) + } + + def addUserConsumption( + userIdentifier: UserIdentifier, + resourceConsumption: ResourceConsumption): Unit = { + worker.userResourceConsumption.put(userIdentifier, resourceConsumption) + workerToResourceConsumptions.put(worker.toUniqueId(), worker.userResourceConsumption) + } + + def clearUserConsumption(): Unit = { + val applicationSet = worker.userResourceConsumption.asScala.values.flatMap { consumption => + Option(consumption.subResourceConsumptions).map(_.asScala.keySet) + }.flatten.toSet + + applicationSet.foreach(quotaManager.handleAppLost) + worker.userResourceConsumption.clear() } } diff --git a/service/src/main/java/org/apache/celeborn/server/common/service/config/DynamicConfig.java b/service/src/main/java/org/apache/celeborn/server/common/service/config/DynamicConfig.java index c09ac082392..daa4b67d460 100644 --- a/service/src/main/java/org/apache/celeborn/server/common/service/config/DynamicConfig.java +++ b/service/src/main/java/org/apache/celeborn/server/common/service/config/DynamicConfig.java @@ -26,7 +26,7 @@ import org.apache.celeborn.common.CelebornConf; import org.apache.celeborn.common.internal.config.ConfigEntry; -import org.apache.celeborn.common.quota.Quota; +import org.apache.celeborn.common.quota.StorageQuota; import org.apache.celeborn.common.quota.UserTrafficQuota; import org.apache.celeborn.common.quota.WorkerTrafficQuota; import org.apache.celeborn.common.tags.WorkerTagsMeta; @@ -44,7 +44,6 @@ public abstract class DynamicConfig { private static final Logger LOG = LoggerFactory.getLogger(DynamicConfig.class); protected volatile Map configs = new HashMap<>(); - protected volatile Quota quota = null; protected volatile Map> tags = null; public abstract DynamicConfig getParentLevelConfig(); @@ -94,37 +93,26 @@ public T formatValue( return null; } - public Quota getQuota() { - if (quota == null) { - synchronized (DynamicConfig.class) { - if (quota == null) { - quota = currentQuota(); - } - } - } - return quota; - } - - protected Quota currentQuota() { - return new Quota( + public StorageQuota getTenantStorageQuota() { + return new StorageQuota( getValue( - CelebornConf.QUOTA_DISK_BYTES_WRITTEN().key(), - CelebornConf.QUOTA_DISK_BYTES_WRITTEN(), + CelebornConf.QUOTA_TENANT_DISK_BYTES_WRITTEN().key(), + CelebornConf.QUOTA_TENANT_DISK_BYTES_WRITTEN(), Long.TYPE, ConfigType.BYTES), getValue( - CelebornConf.QUOTA_DISK_FILE_COUNT().key(), - CelebornConf.QUOTA_DISK_FILE_COUNT(), + CelebornConf.QUOTA_TENANT_DISK_FILE_COUNT().key(), + CelebornConf.QUOTA_TENANT_DISK_FILE_COUNT(), Long.TYPE, ConfigType.STRING), getValue( - CelebornConf.QUOTA_HDFS_BYTES_WRITTEN().key(), - CelebornConf.QUOTA_HDFS_BYTES_WRITTEN(), + CelebornConf.QUOTA_TENANT_HDFS_BYTES_WRITTEN().key(), + CelebornConf.QUOTA_TENANT_HDFS_BYTES_WRITTEN(), Long.TYPE, ConfigType.BYTES), getValue( - CelebornConf.QUOTA_HDFS_FILE_COUNT().key(), - CelebornConf.QUOTA_HDFS_FILE_COUNT(), + CelebornConf.QUOTA_TENANT_HDFS_FILE_COUNT().key(), + CelebornConf.QUOTA_TENANT_HDFS_FILE_COUNT(), Long.TYPE, ConfigType.STRING)); } @@ -181,6 +169,62 @@ public WorkerTagsMeta getWorkerTagsMeta() { ConfigType.STRING)); } + public StorageQuota getClusterStorageQuota() { + return new StorageQuota( + getValue( + CelebornConf.QUOTA_CLUSTER_DISK_BYTES_WRITTEN().key(), + CelebornConf.QUOTA_CLUSTER_DISK_BYTES_WRITTEN(), + Long.TYPE, + ConfigType.BYTES), + getValue( + CelebornConf.QUOTA_CLUSTER_DISK_FILE_COUNT().key(), + CelebornConf.QUOTA_CLUSTER_DISK_FILE_COUNT(), + Long.TYPE, + ConfigType.STRING), + getValue( + CelebornConf.QUOTA_CLUSTER_HDFS_BYTES_WRITTEN().key(), + CelebornConf.QUOTA_CLUSTER_HDFS_BYTES_WRITTEN(), + Long.TYPE, + ConfigType.BYTES), + getValue( + CelebornConf.QUOTA_CLUSTER_HDFS_FILE_COUNT().key(), + CelebornConf.QUOTA_CLUSTER_HDFS_FILE_COUNT(), + Long.TYPE, + ConfigType.STRING)); + } + + public StorageQuota getUserStorageQuota() { + return new StorageQuota( + getValue( + CelebornConf.QUOTA_USER_DISK_BYTES_WRITTEN().key(), + CelebornConf.QUOTA_USER_DISK_BYTES_WRITTEN(), + Long.TYPE, + ConfigType.BYTES), + getValue( + CelebornConf.QUOTA_USER_DISK_FILE_COUNT().key(), + CelebornConf.QUOTA_USER_DISK_FILE_COUNT(), + Long.TYPE, + ConfigType.STRING), + getValue( + CelebornConf.QUOTA_USER_HDFS_BYTES_WRITTEN().key(), + CelebornConf.QUOTA_USER_HDFS_BYTES_WRITTEN(), + Long.TYPE, + ConfigType.BYTES), + getValue( + CelebornConf.QUOTA_USER_HDFS_FILE_COUNT().key(), + CelebornConf.QUOTA_USER_HDFS_FILE_COUNT(), + Long.TYPE, + ConfigType.STRING)); + } + + public boolean interruptShuffleEnabled() { + return getValue( + CelebornConf.QUOTA_INTERRUPT_SHUFFLE_ENABLED().key(), + CelebornConf.QUOTA_INTERRUPT_SHUFFLE_ENABLED(), + Boolean.TYPE, + ConfigType.BOOLEAN); + } + public Map getConfigs() { return configs; } @@ -212,6 +256,7 @@ public enum ConfigType { BYTES, STRING, TIME_MS, + BOOLEAN } public static T convert(Class clazz, String value) { diff --git a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/storage/StorageManager.scala b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/storage/StorageManager.scala index af32daa0b36..859ea5b8e9d 100644 --- a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/storage/StorageManager.scala +++ b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/storage/StorageManager.scala @@ -919,18 +919,15 @@ final private[worker] class StorageManager(conf: CelebornConf, workerSource: Abs } // userIdentifier -> List((userIdentifier, (applicationId, fileInfo)))) .groupBy(_._1) - .map { case (userIdentifier, userWithFileInfoList) => + .mapValues { userWithFileInfoList => // collect resource consumed by each user on this worker - val userFileInfos = userWithFileInfoList.map(_._2) - ( - userIdentifier, - resourceConsumption( - userFileInfos.map(_._2), - userFileInfos.groupBy(_._1).map { - case (applicationId, appWithFileInfoList) => - (applicationId, resourceConsumption(appWithFileInfoList.map(_._2))) - }.asJava)) - } + val subResourceConsumption = userWithFileInfoList.map(_._2).groupBy(_._1).map { + case (applicationId, appWithFileInfoList) => + (applicationId, resourceConsumption(appWithFileInfoList.map(_._2))) + } + subResourceConsumption.values.foldLeft(ResourceConsumption(0, 0, 0, 0))(_ add _) + .withSubResourceConsumptions(subResourceConsumption.asJava) + }.toMap } } From 57bd3ed45fed8e427bfdfa48aa98aa9e1dfa4f43 Mon Sep 17 00:00:00 2001 From: Xianming Lei <31424839+leixm@users.noreply.github.com> Date: Sun, 12 Jan 2025 23:07:05 +0800 Subject: [PATCH 2/3] fix --- .../service/deploy/master/Master.scala | 1 + .../deploy/master/quota/QuotaManager.scala | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala b/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala index 3b154dc5c67..976218a18cf 100644 --- a/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala +++ b/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala @@ -1058,6 +1058,7 @@ private[celeborn] class Master( override def run(): Unit = { workersAssignedToApp.remove(appId) statusSystem.handleAppLost(appId, requestId) + quotaManager.handleAppLost(appId) logInfo(s"Removed application $appId") if (hasHDFSStorage || hasS3Storage) { checkAndCleanExpiredAppDirsOnDFS(appId) diff --git a/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManager.scala b/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManager.scala index 7ae6409951d..4fba9932bb2 100644 --- a/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManager.scala +++ b/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManager.scala @@ -20,6 +20,7 @@ import java.util.{Map => JMap} import java.util.concurrent.TimeUnit import scala.collection.JavaConverters._ +import scala.collection.mutable import org.apache.celeborn.common.CelebornConf import org.apache.celeborn.common.identity.UserIdentifier @@ -190,6 +191,7 @@ class QuotaManager( masterSource.sample(UPDATE_RESOURCE_CONSUMPTION_TIME, this.getClass.getSimpleName, Map.empty) { val clusterQuota = getClusterStorageQuota var clusterResourceConsumption = ResourceConsumption(0, 0, 0, 0) + val activeUsers = mutable.Set[UserIdentifier]() val tenantResourceConsumption = statusSystem.availableWorkers.asScala.flatMap { workerInfo => @@ -199,6 +201,7 @@ class QuotaManager( val userResourceConsumption = tenantConsumptionList.groupBy(_._1).map { case (userIdentifier, userConsumptionList) => + activeUsers.add(userIdentifier) // Step 1: Compute user consumption and set quota status. val resourceConsumptionList = userConsumptionList.map(_._2).toSeq val resourceConsumption = computeUserResourceConsumption(resourceConsumptionList) @@ -258,6 +261,9 @@ class QuotaManager( } } + // Clear expired users/tenant quota status + clearQuotaStatus(activeUsers) + // Expire cluster level exceeded app except already expired app clusterQuotaStatus = checkClusterQuotaSpace(clusterResourceConsumption) if (interruptShuffleEnabled && clusterQuotaStatus.exceed) { @@ -331,4 +337,17 @@ class QuotaManager( getResourceConsumption(userIdentifier).hdfsBytesWritten } } + + private def clearQuotaStatus(activeUsers: mutable.Set[UserIdentifier]): Unit = { + userQuotaStatus + .keySet() + .asScala + .diff(activeUsers) + .foreach(userQuotaStatus.remove) + tenantQuotaStatus + .keySet() + .asScala + .diff(activeUsers.map(_.tenantId).toSet) + .foreach(tenantQuotaStatus.remove) + } } From 1cd059a86428ccf221bca093d36404ac6d585767 Mon Sep 17 00:00:00 2001 From: Xianming Lei <31424839+leixm@users.noreply.github.com> Date: Sun, 12 Jan 2025 23:16:10 +0800 Subject: [PATCH 3/3] fix --- .../service/deploy/master/quota/QuotaManagerSuite.scala | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/master/src/test/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManagerSuite.scala b/master/src/test/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManagerSuite.scala index d34164f7f24..59e5cff5189 100644 --- a/master/src/test/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManagerSuite.scala +++ b/master/src/test/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManagerSuite.scala @@ -80,6 +80,7 @@ class QuotaManagerSuite extends CelebornFunSuite "localhost", 9001, conf, + "master", None) statusSystem = new SingleMasterMetaManager(rpcEnv, conf) statusSystem.availableWorkers.add(worker) @@ -341,7 +342,7 @@ class QuotaManagerSuite extends CelebornFunSuite val duration = System.currentTimeMillis() - start print(s"duration=$duration") - val res = resourceConsumptionSource.getMetrics() + val res = resourceConsumptionSource.getMetrics for (i <- 0 until 1000) { val user = UserIdentifier("default", s"user$i") assert(res.contains( @@ -403,7 +404,7 @@ class QuotaManagerSuite extends CelebornFunSuite val duration = System.currentTimeMillis() - start print(s"duration=$duration") - val res = resourceConsumptionSource.getMetrics() + val res = resourceConsumptionSource.getMetrics for (i <- 0 until 1000) { val user = UserIdentifier("default", s"user$i") assert(res.contains( @@ -440,6 +441,7 @@ class QuotaManagerSuite extends CelebornFunSuite "localhost", 9002, conf, + "master", None) val statusSystem1 = new SingleMasterMetaManager(rpcEnv, conf) statusSystem1.availableWorkers.add(worker) @@ -548,6 +550,7 @@ class QuotaManagerSuite extends CelebornFunSuite "localhost", 9003, conf, + "master", None) val statusSystem1 = new SingleMasterMetaManager(rpcEnv, conf) statusSystem1.availableWorkers.add(worker) @@ -676,7 +679,7 @@ class QuotaManagerSuite extends CelebornFunSuite userIdentifier: UserIdentifier, resourceConsumption: ResourceConsumption): Unit = { worker.userResourceConsumption.put(userIdentifier, resourceConsumption) - workerToResourceConsumptions.put(worker.toUniqueId(), worker.userResourceConsumption) + workerToResourceConsumptions.put(worker.toUniqueId, worker.userResourceConsumption) } def clearUserConsumption(): Unit = {