From 93e50f0108e64c26fbba5a5d8b7b4b472c5728ba Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 13 Oct 2023 14:43:53 -0400 Subject: [PATCH 01/42] First pass at enforcing execution minutes Handled similarly to storage quotas in backend and frontend, except execution minutes quotas are tracked and enforced monthly. Almost surely is buggy at this stage, and there is no soft/hard cap distinction yet. --- backend/btrixcloud/basecrawls.py | 3 + backend/btrixcloud/crawlconfigs.py | 33 +++++++-- backend/btrixcloud/models.py | 2 + backend/btrixcloud/orgs.py | 30 ++++++++ frontend/src/components/orgs-list.ts | 3 + frontend/src/pages/org/index.ts | 88 ++++++++++++++++++++++- frontend/src/pages/org/workflow-detail.ts | 36 +++++++--- frontend/src/pages/org/workflow-editor.ts | 13 +++- frontend/src/pages/org/workflows-list.ts | 6 +- frontend/src/types/org.ts | 1 + frontend/src/utils/LiteElement.ts | 19 +++++ 11 files changed, 216 insertions(+), 18 deletions(-) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index 0260e44c65..4af83eb900 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -127,6 +127,9 @@ async def get_crawl( crawl.config.seeds = None crawl.storageQuotaReached = await self.orgs.storage_quota_reached(crawl.oid) + crawl.executionMinutesQuotaReached = ( + await self.orgs.execution_mins_quota_reached(crawl.oid) + ) return crawl diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index a7386b0abc..0c413a15df 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -172,12 +172,19 @@ async def add_crawl_config( ) run_now = config.runNow - quota_reached = await self.org_ops.storage_quota_reached(org.id) + storage_quota_reached = await self.org_ops.storage_quota_reached(org.id) + exec_mins_quota_reached = await self.org_ops.execution_mins_quota_reached( + org.id + ) - if quota_reached: + if storage_quota_reached: run_now = False print(f"Storage quota exceeded for org {org.id}", flush=True) + if exec_mins_quota_reached: + run_now = False + print(f"Execution miutes quota exceeded for org {org.id}", flush=True) + crawl_id = await self.crawl_manager.add_crawl_config( crawlconfig=crawlconfig, storage=org.storage, @@ -189,7 +196,12 @@ async def add_crawl_config( if crawl_id and run_now: await self.add_new_crawl(crawl_id, crawlconfig, user, manual=True) - return result.inserted_id, crawl_id, quota_reached + return ( + result.inserted_id, + crawl_id, + storage_quota_reached, + exec_mins_quota_reached, + ) async def add_new_crawl( self, crawl_id: str, crawlconfig: CrawlConfig, user: User, manual: bool @@ -747,6 +759,11 @@ async def run_now(self, cid: str, org: Organization, user: User): if await self.org_ops.storage_quota_reached(org.id): raise HTTPException(status_code=403, detail="storage_quota_reached") + if await self.org_ops.execution_mins_quota_reached(org.id): + raise HTTPException( + status_code=403, detail="execution_minutes_quota_reached" + ) + try: crawl_id = await self.crawl_manager.create_crawl_job( crawlconfig, userid=str(user.id) @@ -981,12 +998,18 @@ async def add_crawl_config( org: Organization = Depends(org_crawl_dep), user: User = Depends(user_dep), ): - cid, new_job_name, quota_reached = await ops.add_crawl_config(config, org, user) + ( + cid, + new_job_name, + storage_quota_reached, + execution_mins_quota_reached, + ) = await ops.add_crawl_config(config, org, user) return { "added": True, "id": str(cid), "run_now_job": new_job_name, - "storageQuotaReached": quota_reached, + "storageQuotaReached": storage_quota_reached, + "executionMinutesQuotaReached": execution_mins_quota_reached, } @router.patch("/{cid}", dependencies=[Depends(org_crawl_dep)]) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 872bb5bc5c..a11aa239af 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -478,6 +478,7 @@ class CrawlOut(BaseMongoModel): cid_rev: Optional[int] storageQuotaReached: Optional[bool] + executionMinutesQuotaReached: Optional[bool] # ============================================================================ @@ -687,6 +688,7 @@ class OrgQuotas(BaseModel): maxConcurrentCrawls: Optional[int] = 0 maxPagesPerCrawl: Optional[int] = 0 storageQuota: Optional[int] = 0 + crawlExecMinutesQuota: Optional[int] = 0 # ============================================================================ diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 31fff79b7c..f030bb7699 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -1,6 +1,7 @@ """ Organization API handling """ +import math import os import time import urllib.parse @@ -315,6 +316,27 @@ async def storage_quota_reached(self, oid: uuid.UUID): return False + async def execution_mins_quota_reached(self, oid: uuid.UUID): + """Return boolean indicating if execution minutes quota is met or exceeded.""" + quota = await self.get_org_execution_mins_quota(oid) + if not quota: + return False + + org = await self.orgs.find_one({"_id": oid}) + org = Organization.from_dict(org) + + yymm = datetime.utcnow().strftime("%Y-%m") + try: + monthly_exec_seconds = org.crawlExecSeconds[yymm] + except KeyError: + monthly_exec_seconds = 0 + monthly_exec_minutes = math.floor(monthly_exec_seconds / 60) + + if monthly_exec_minutes >= quota: + return True + + return False + async def get_org_storage_quota(self, oid): """return max allowed concurrent crawls, if any""" org = await self.orgs.find_one({"_id": oid}) @@ -323,6 +345,14 @@ async def get_org_storage_quota(self, oid): return org.quotas.storageQuota return 0 + async def get_org_execution_mins_quota(self, oid): + """return max allowed execution mins per month, if any""" + org = await self.orgs.find_one({"_id": oid}) + if org: + org = Organization.from_dict(org) + return org.quotas.crawlExecMinutesQuota + return 0 + async def set_origin(self, org: Organization, request: Request): """Get origin from request and store in db for use in event webhooks""" headers = request.headers diff --git a/frontend/src/components/orgs-list.ts b/frontend/src/components/orgs-list.ts index f1bf92ebd4..8d7971e6d9 100644 --- a/frontend/src/components/orgs-list.ts +++ b/frontend/src/components/orgs-list.ts @@ -61,6 +61,9 @@ export class OrgsList extends LiteElement { label = msg("Org Storage Quota (GB)"); value = Math.floor(value / 1e9); break; + case "crawlExecMinutesQuota": + label = msg("Org Monthly Execution Minutes Quota"); + break; default: label = msg("Unlabeled"); } diff --git a/frontend/src/pages/org/index.ts b/frontend/src/pages/org/index.ts index d811076a7d..182d48f735 100644 --- a/frontend/src/pages/org/index.ts +++ b/frontend/src/pages/org/index.ts @@ -97,6 +97,12 @@ export class Org extends LiteElement { @state() private showStorageQuotaAlert = false; + @state() + private orgExecutionMinutesQuotaReached = false; + + @state() + private showExecutionMinutesQuotaAlert = false; + @state() private openDialogName?: ResourceName; @@ -163,6 +169,7 @@ export class Org extends LiteElement { try { this.org = await this.getOrg(this.orgId); this.checkStorageQuota(); + this.checkExecutionMinutesQuota(); } catch { // TODO handle 404 this.org = null; @@ -242,7 +249,8 @@ export class Org extends LiteElement { } return html` - ${this.renderStorageAlert()} ${this.renderOrgNavBar()} + ${this.renderStorageAlert()} ${this.renderExecutionMinutesAlert()} + ${this.renderOrgNavBar()}
+
+ + (this.showExecutionMinutesQuotaAlert = false)} + > + + ${msg( + "Your org has reached its monthly execution minutes limit" + )}
+ ${msg( + "To run crawls again before the montly limit resets, contact us to upgrade your plan." + )} +
+
+
+ `; + } + private renderOrgNavBar() { return html`
@@ -460,11 +498,14 @@ export class Org extends LiteElement { .authState=${this.authState!} orgId=${this.orgId!} ?orgStorageQuotaReached=${this.orgStorageQuotaReached} + ?orgExecutionMinutesQuotaReached=${this + .orgExecutionMinutesQuotaReached} workflowId=${workflowId} openDialogName=${this.viewStateData?.dialog} ?isEditing=${isEditing} ?isCrawler=${this.isCrawler} @storage-quota-update=${this.onStorageQuotaUpdate} + @execution-mins-update=${this.onExecutionMinutesQuotaUpdate} > `; } @@ -481,6 +522,7 @@ export class Org extends LiteElement { .initialSeeds=${seeds} jobType=${ifDefined(this.params.jobType)} @storage-quota-update=${this.onStorageQuotaUpdate} + @execution-mins-update=${this.onExecutionMinutesQuotaUpdate} @select-new-dialog=${this.onSelectNewDialog} >`; } @@ -489,9 +531,11 @@ export class Org extends LiteElement { .authState=${this.authState!} orgId=${this.orgId!} ?orgStorageQuotaReached=${this.orgStorageQuotaReached} + ?orgExecutionMinutesQuotaReached=${this.orgExecutionMinutesQuotaReached} userId=${this.userInfo!.id} ?isCrawler=${this.isCrawler} @storage-quota-update=${this.onStorageQuotaUpdate} + @execution-mins-update=${this.onExecutionMinutesQuotaUpdate} @select-new-dialog=${this.onSelectNewDialog} >`; } @@ -633,6 +677,15 @@ export class Org extends LiteElement { } } + private async onExecutionMinutesQuotaUpdate(e: CustomEvent) { + e.stopPropagation(); + const { reached } = e.detail; + this.orgExecutionMinutesQuotaReached = reached; + if (reached) { + this.showExecutionMinutesQuotaAlert = true; + } + } + private async onUserRoleChange(e: UserRoleChangeEvent) { const { user, newRole } = e.detail; @@ -744,4 +797,37 @@ export class Org extends LiteElement { this.showStorageQuotaAlert = true; } } + + checkExecutionMinutesQuota() { + if ( + !this.org || + !this.org.quotas.crawlExecMinutesQuota || + this.org.quotas.crawlExecMinutesQuota == 0 + ) { + this.orgExecutionMinutesQuotaReached = false; + return; + } + + const todaysDate = new Date().toISOString(); + const todaysYear = todaysDate.slice(2, 4); + const todaysMonth = todaysDate.slice(5, 7); + + const monthlyExecutionSeconds = + this.org.crawlExecSeconds[`${todaysYear}${todaysMonth}`]; + if (monthlyExecutionSeconds) { + const monthlyExecutionMinutes = Math.floor(monthlyExecutionSeconds / 60); + + if (monthlyExecutionMinutes >= this.org.quotas.crawlExecMinutesQuota) { + this.orgExecutionMinutesQuotaReached = true; + } else { + this.orgExecutionMinutesQuotaReached = false; + } + } else { + this.orgExecutionMinutesQuotaReached = false; + } + + if (this.orgExecutionMinutesQuotaReached) { + this.showExecutionMinutesQuotaAlert = true; + } + } } diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index c4ad8c5d20..f1ac6a3116 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -49,6 +49,9 @@ export class WorkflowDetail extends LiteElement { @property({ type: Boolean }) orgStorageQuotaReached = false; + @property({ type: Boolean }) + orgExecutionMinutesQuotaReached = false; + @property({ type: String }) workflowId!: string; @@ -578,14 +581,18 @@ export class WorkflowDetail extends LiteElement { `, () => html` this.runNow()} > @@ -625,7 +632,8 @@ export class WorkflowDetail extends LiteElement { () => html` this.runNow()} > @@ -1023,12 +1031,16 @@ export class WorkflowDetail extends LiteElement { )} this.runNow()} > @@ -1107,13 +1119,17 @@ export class WorkflowDetail extends LiteElement {

this.runNow()} > diff --git a/frontend/src/pages/org/workflow-editor.ts b/frontend/src/pages/org/workflow-editor.ts index 67b19e7738..aa8f56b3e0 100644 --- a/frontend/src/pages/org/workflow-editor.ts +++ b/frontend/src/pages/org/workflow-editor.ts @@ -2160,8 +2160,9 @@ https://archiveweb.page/images/${"logo.svg"}`} const crawlId = data.run_now_job; const storageQuotaReached = data.storageQuotaReached; + const executionMinutesQuotaReached = data.executionMinutesQuotaReached; - if (crawlId && storageQuotaReached) { + if (storageQuotaReached) { this.notify({ title: msg("Workflow saved without starting crawl."), message: msg( @@ -2171,6 +2172,16 @@ https://archiveweb.page/images/${"logo.svg"}`} icon: "exclamation-circle", duration: 12000, }); + } else if (executionMinutesQuotaReached) { + this.notify({ + title: msg("Workflow saved without starting crawl."), + message: msg( + "Could not run crawl with new workflow settings due to execution minutes quota." + ), + variant: "warning", + icon: "exclamation-circle", + duration: 12000, + }); } else { let message = msg("Workflow created."); if (crawlId) { diff --git a/frontend/src/pages/org/workflows-list.ts b/frontend/src/pages/org/workflows-list.ts index d81d212852..a6c6ebcf10 100644 --- a/frontend/src/pages/org/workflows-list.ts +++ b/frontend/src/pages/org/workflows-list.ts @@ -74,6 +74,9 @@ export class WorkflowsList extends LiteElement { @property({ type: Boolean }) orgStorageQuotaReached = false; + @property({ type: Boolean }) + orgExecutionMinutesQuotaReached = false; + @property({ type: String }) userId!: string; @@ -440,7 +443,8 @@ export class WorkflowsList extends LiteElement { () => html` this.runNow(workflow)} > diff --git a/frontend/src/types/org.ts b/frontend/src/types/org.ts index 2ae8619175..d6a3358a6a 100644 --- a/frontend/src/types/org.ts +++ b/frontend/src/types/org.ts @@ -14,6 +14,7 @@ export type OrgData = { slug: string; quotas: Record; bytesStored: number; + crawlExecSeconds: Record; users?: { [id: string]: { role: (typeof AccessCode)[UserRole]; diff --git a/frontend/src/utils/LiteElement.ts b/frontend/src/utils/LiteElement.ts index 26adf1db92..e60285de5e 100644 --- a/frontend/src/utils/LiteElement.ts +++ b/frontend/src/utils/LiteElement.ts @@ -139,6 +139,7 @@ export default class LiteElement extends LitElement { if (resp.ok) { const body = await resp.json(); const storageQuotaReached = body.storageQuotaReached; + const executionMinutesQuotaReached = body.executionMinutesQuotaReached; if (typeof storageQuotaReached === "boolean") { this.dispatchEvent( new CustomEvent("storage-quota-update", { @@ -147,6 +148,14 @@ export default class LiteElement extends LitElement { }) ); } + if (typeof executionMinutesQuotaReached === "boolean") { + this.dispatchEvent( + new CustomEvent("execution-minutes-quota-update", { + detail: { reached: executionMinutesQuotaReached }, + bubbles: true, + }) + ); + } return body; } @@ -175,6 +184,16 @@ export default class LiteElement extends LitElement { errorMessage = msg("Storage quota reached"); break; } + if (errorDetail === "execution_minutes_quota_reached") { + this.dispatchEvent( + new CustomEvent("execution-minutes-quota-update", { + detail: { reached: true }, + bubbles: true, + }) + ); + errorMessage = msg("Monthly execution minutes quota reached"); + break; + } } case 404: { errorMessage = msg("Not found"); From d203800d5a1551b5a7daa837fbb03817f7024c9e Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 13 Oct 2023 15:25:01 -0400 Subject: [PATCH 02/42] Inc execution time in org db throughout crawl --- backend/btrixcloud/operator.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/backend/btrixcloud/operator.py b/backend/btrixcloud/operator.py index 813d6f196e..324b9df320 100644 --- a/backend/btrixcloud/operator.py +++ b/backend/btrixcloud/operator.py @@ -699,11 +699,11 @@ async def cancel_crawl( running = cstatus["state"].get("running") if running: - self.inc_exec_time( + await self.inc_exec_time( name, status, cancel_time, running.get("startedAt") ) - self.handle_terminated_pod( + await self.handle_terminated_pod( name, role, status, cstatus["state"].get("terminated") ) @@ -779,7 +779,9 @@ async def finalize_response( if not children[POD] and not children[PVC]: # ensure exec time was successfully updated - exec_updated = await self.store_exec_time(crawl_id, oid, status.execTime) + exec_updated = await self.store_exec_time_in_crawl( + crawl_id, oid, status.execTime + ) # keep parent until ttl expired, if any if status.finished: @@ -816,7 +818,7 @@ async def _get_redis(self, redis_url): async def sync_crawl_state(self, redis_url, crawl, status, pods, metrics): """sync crawl state for running crawl""" # check if at least one crawler pod started running - crawler_running, redis_running, done = self.sync_pod_status(pods, status) + crawler_running, redis_running, done = await self.sync_pod_status(pods, status) redis = None try: @@ -902,7 +904,7 @@ async def sync_crawl_state(self, redis_url, crawl, status, pods, metrics): if redis: await redis.close() - def sync_pod_status(self, pods, status): + async def sync_pod_status(self, pods, status): """check status of pods""" crawler_running = False redis_running = False @@ -930,7 +932,7 @@ def sync_pod_status(self, pods, status): ): running = True - self.handle_terminated_pod( + await self.handle_terminated_pod( name, role, status, cstatus["state"].get("terminated") ) @@ -947,7 +949,7 @@ def sync_pod_status(self, pods, status): return crawler_running, redis_running, done - def handle_terminated_pod(self, name, role, status, terminated): + async def handle_terminated_pod(self, name, role, status, terminated): """handle terminated pod state""" if not terminated: return @@ -961,7 +963,9 @@ def handle_terminated_pod(self, name, role, status, terminated): pod_status.isNewExit = pod_status.exitTime != exit_time if pod_status.isNewExit and role == "crawler": - self.inc_exec_time(name, status, exit_time, terminated.get("startedAt")) + await self.inc_exec_time( + name, status, exit_time, terminated.get("startedAt") + ) pod_status.exitTime = exit_time # detect reason @@ -1279,8 +1283,8 @@ async def do_crawl_finished_tasks( # finally, delete job await self.delete_crawl_job(crawl_id) - def inc_exec_time(self, name, status, finished_at, started_at): - """increment execTime on pod status""" + async def inc_exec_time(self, name, status, finished_at, started_at): + """increment execTime on pod status and in org""" end_time = ( from_k8s_date(finished_at) if not isinstance(finished_at, datetime) @@ -1288,17 +1292,16 @@ def inc_exec_time(self, name, status, finished_at, started_at): ) start_time = from_k8s_date(started_at) exec_time = int((end_time - start_time).total_seconds()) + await self.org_ops.inc_org_time_stats(oid, exec_time, True) status.execTime += exec_time print(f"{name} exec time: {exec_time}") return exec_time - async def store_exec_time(self, crawl_id, oid, exec_time): - """store execTime in crawl (if not already set), and increment org counter""" + async def store_exec_time_in_crawl(self, crawl_id, oid, exec_time): + """store execTime in crawl (if not already set)""" try: if await self.crawl_ops.store_exec_time(crawl_id, exec_time): - print(f"Exec Time: {exec_time}", flush=True) - await self.org_ops.inc_org_time_stats(oid, exec_time, True) - + print(f"Exec Time stored in crawl: {exec_time}", flush=True) return True # pylint: disable=broad-except except Exception as exc: From 9fc1d2e733cc49b5f1cfbee3e89b3ba6e7129d9a Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 13 Oct 2023 15:29:28 -0400 Subject: [PATCH 03/42] Cancel crawl if execution mins quota is exceeded while running --- backend/btrixcloud/operator.py | 50 ++++++++++++++++++++++------------ backend/btrixcloud/orgs.py | 8 +++--- 2 files changed, 37 insertions(+), 21 deletions(-) diff --git a/backend/btrixcloud/operator.py b/backend/btrixcloud/operator.py index 324b9df320..d2a21f31ca 100644 --- a/backend/btrixcloud/operator.py +++ b/backend/btrixcloud/operator.py @@ -347,7 +347,7 @@ async def sync_crawls(self, data: MCSyncData): raise HTTPException(status_code=400, detail="out_of_sync_status") return await self.finalize_response( - crawl_id, uuid.UUID(oid), status, spec, data.children, params + crawl_id, status, spec, data.children, params ) # just in case, finished but not deleted, can only get here if @@ -358,7 +358,7 @@ async def sync_crawls(self, data: MCSyncData): ) asyncio.create_task(self.delete_crawl_job(crawl_id)) return await self.finalize_response( - crawl_id, uuid.UUID(oid), status, spec, data.children, params + crawl_id, status, spec, data.children, params ) try: @@ -396,10 +396,20 @@ async def sync_crawls(self, data: MCSyncData): and await self.org_ops.storage_quota_reached(crawl.oid) ): await self.mark_finished( - crawl.id, crawl.cid, crawl.oid, status, "skipped_quota_reached" + crawl.id, + crawl.cid, + crawl.oid, + status, + "skipped_quota_reached", ) return self._empty_response(status) + # Cancel crawl if execution minutes quota is reached while running + if await self.org_ops.execution_mins_quota_reached(crawl.oid): + await self.cancel_crawl( + crawl.id, uuid.UUID(cid), uuid.UUID(oid), status, data.children[POD] + ) + if status.state in ("starting", "waiting_org_limit"): if not await self.can_start_new(crawl, data, status): return self._empty_response(status) @@ -413,7 +423,12 @@ async def sync_crawls(self, data: MCSyncData): self.sync_resources(status, pod_name, pod, data.children) status = await self.sync_crawl_state( - redis_url, crawl, status, pods, data.related.get(METRICS, {}) + redis_url, + crawl, + status, + pods, + data.related.get(METRICS, {}), + crawl.oid, ) # auto sizing handled here @@ -421,7 +436,7 @@ async def sync_crawls(self, data: MCSyncData): if status.finished: return await self.finalize_response( - crawl_id, uuid.UUID(oid), status, spec, data.children, params + crawl_id, status, spec, data.children, params ) else: status.scale = crawl.scale @@ -700,11 +715,11 @@ async def cancel_crawl( if running: await self.inc_exec_time( - name, status, cancel_time, running.get("startedAt") + name, oid, status, cancel_time, running.get("startedAt") ) await self.handle_terminated_pod( - name, role, status, cstatus["state"].get("terminated") + name, oid, role, status, cstatus["state"].get("terminated") ) status.canceled = True @@ -751,7 +766,6 @@ def _empty_response(self, status): async def finalize_response( self, crawl_id: str, - oid: uuid.UUID, status: CrawlStatus, spec: dict, children: dict, @@ -780,7 +794,7 @@ async def finalize_response( if not children[POD] and not children[PVC]: # ensure exec time was successfully updated exec_updated = await self.store_exec_time_in_crawl( - crawl_id, oid, status.execTime + crawl_id, status.execTime ) # keep parent until ttl expired, if any @@ -815,10 +829,12 @@ async def _get_redis(self, redis_url): return None - async def sync_crawl_state(self, redis_url, crawl, status, pods, metrics): + async def sync_crawl_state(self, redis_url, crawl, status, pods, metrics, oid): """sync crawl state for running crawl""" # check if at least one crawler pod started running - crawler_running, redis_running, done = await self.sync_pod_status(pods, status) + crawler_running, redis_running, done = await self.sync_pod_status( + pods, status, oid + ) redis = None try: @@ -904,7 +920,7 @@ async def sync_crawl_state(self, redis_url, crawl, status, pods, metrics): if redis: await redis.close() - async def sync_pod_status(self, pods, status): + async def sync_pod_status(self, pods, status, oid): """check status of pods""" crawler_running = False redis_running = False @@ -933,7 +949,7 @@ async def sync_pod_status(self, pods, status): running = True await self.handle_terminated_pod( - name, role, status, cstatus["state"].get("terminated") + name, oid, role, status, cstatus["state"].get("terminated") ) if role == "crawler": @@ -949,7 +965,7 @@ async def sync_pod_status(self, pods, status): return crawler_running, redis_running, done - async def handle_terminated_pod(self, name, role, status, terminated): + async def handle_terminated_pod(self, name, oid, role, status, terminated): """handle terminated pod state""" if not terminated: return @@ -964,7 +980,7 @@ async def handle_terminated_pod(self, name, role, status, terminated): pod_status.isNewExit = pod_status.exitTime != exit_time if pod_status.isNewExit and role == "crawler": await self.inc_exec_time( - name, status, exit_time, terminated.get("startedAt") + name, oid, status, exit_time, terminated.get("startedAt") ) pod_status.exitTime = exit_time @@ -1283,7 +1299,7 @@ async def do_crawl_finished_tasks( # finally, delete job await self.delete_crawl_job(crawl_id) - async def inc_exec_time(self, name, status, finished_at, started_at): + async def inc_exec_time(self, name, oid, status, finished_at, started_at): """increment execTime on pod status and in org""" end_time = ( from_k8s_date(finished_at) @@ -1297,7 +1313,7 @@ async def inc_exec_time(self, name, status, finished_at, started_at): print(f"{name} exec time: {exec_time}") return exec_time - async def store_exec_time_in_crawl(self, crawl_id, oid, exec_time): + async def store_exec_time_in_crawl(self, crawl_id, exec_time): """store execTime in crawl (if not already set)""" try: if await self.crawl_ops.store_exec_time(crawl_id, exec_time): diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index f030bb7699..6f4d34ac85 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -302,7 +302,7 @@ async def inc_org_bytes_stored(self, oid: uuid.UUID, size: int, type_="crawl"): return await self.storage_quota_reached(oid) # pylint: disable=invalid-name - async def storage_quota_reached(self, oid: uuid.UUID): + async def storage_quota_reached(self, oid: uuid.UUID) -> bool: """Return boolean indicating if storage quota is met or exceeded.""" quota = await self.get_org_storage_quota(oid) if not quota: @@ -316,7 +316,7 @@ async def storage_quota_reached(self, oid: uuid.UUID): return False - async def execution_mins_quota_reached(self, oid: uuid.UUID): + async def execution_mins_quota_reached(self, oid: uuid.UUID) -> bool: """Return boolean indicating if execution minutes quota is met or exceeded.""" quota = await self.get_org_execution_mins_quota(oid) if not quota: @@ -337,7 +337,7 @@ async def execution_mins_quota_reached(self, oid: uuid.UUID): return False - async def get_org_storage_quota(self, oid): + async def get_org_storage_quota(self, oid: uuid.UUID) -> int: """return max allowed concurrent crawls, if any""" org = await self.orgs.find_one({"_id": oid}) if org: @@ -345,7 +345,7 @@ async def get_org_storage_quota(self, oid): return org.quotas.storageQuota return 0 - async def get_org_execution_mins_quota(self, oid): + async def get_org_execution_mins_quota(self, oid: uuid.UUID) -> int: """return max allowed execution mins per month, if any""" org = await self.orgs.find_one({"_id": oid}) if org: From eccf365cb4bc8c370711e60b6549eaa53839b919 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 13 Oct 2023 16:48:48 -0400 Subject: [PATCH 04/42] Frontend fixes --- frontend/src/pages/org/index.ts | 13 ++++++------- frontend/src/pages/org/workflow-detail.ts | 4 ++++ frontend/src/pages/org/workflows-list.ts | 4 ++++ 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/frontend/src/pages/org/index.ts b/frontend/src/pages/org/index.ts index 182d48f735..c8c85385fe 100644 --- a/frontend/src/pages/org/index.ts +++ b/frontend/src/pages/org/index.ts @@ -505,7 +505,7 @@ export class Org extends LiteElement { ?isEditing=${isEditing} ?isCrawler=${this.isCrawler} @storage-quota-update=${this.onStorageQuotaUpdate} - @execution-mins-update=${this.onExecutionMinutesQuotaUpdate} + @execution-minutes-quota-update=${this.onExecutionMinutesQuotaUpdate} > `; } @@ -522,7 +522,7 @@ export class Org extends LiteElement { .initialSeeds=${seeds} jobType=${ifDefined(this.params.jobType)} @storage-quota-update=${this.onStorageQuotaUpdate} - @execution-mins-update=${this.onExecutionMinutesQuotaUpdate} + @execution-minutes-quota-update=${this.onExecutionMinutesQuotaUpdate} @select-new-dialog=${this.onSelectNewDialog} >`; } @@ -535,7 +535,7 @@ export class Org extends LiteElement { userId=${this.userInfo!.id} ?isCrawler=${this.isCrawler} @storage-quota-update=${this.onStorageQuotaUpdate} - @execution-mins-update=${this.onExecutionMinutesQuotaUpdate} + @execution-minutes-quota-update=${this.onExecutionMinutesQuotaUpdate} @select-new-dialog=${this.onSelectNewDialog} >`; } @@ -809,14 +809,13 @@ export class Org extends LiteElement { } const todaysDate = new Date().toISOString(); - const todaysYear = todaysDate.slice(2, 4); + const todaysYear = todaysDate.slice(0, 4); const todaysMonth = todaysDate.slice(5, 7); + const monthKey = `${todaysYear}-${todaysMonth}`; - const monthlyExecutionSeconds = - this.org.crawlExecSeconds[`${todaysYear}${todaysMonth}`]; + const monthlyExecutionSeconds = this.org.crawlExecSeconds[monthKey]; if (monthlyExecutionSeconds) { const monthlyExecutionMinutes = Math.floor(monthlyExecutionSeconds / 60); - if (monthlyExecutionMinutes >= this.org.quotas.crawlExecMinutesQuota) { this.orgExecutionMinutesQuotaReached = true; } else { diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index f1ac6a3116..2036d28a82 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -1610,6 +1610,10 @@ export class WorkflowDetail extends LiteElement { if (e.isApiError && e.statusCode === 403) { if (e.details === "storage_quota_reached") { message = msg("Your org does not have enough storage to run crawls."); + } else if (e.details === "execution_minutes_quota_reached") { + message = msg( + "Your org has used all of its execution minutes for this month." + ); } else { message = msg("You do not have permission to run crawls."); } diff --git a/frontend/src/pages/org/workflows-list.ts b/frontend/src/pages/org/workflows-list.ts index a6c6ebcf10..068f208c9b 100644 --- a/frontend/src/pages/org/workflows-list.ts +++ b/frontend/src/pages/org/workflows-list.ts @@ -802,6 +802,10 @@ export class WorkflowsList extends LiteElement { if (e.isApiError && e.statusCode === 403) { if (e.details === "storage_quota_reached") { message = msg("Your org does not have enough storage to run crawls."); + } else if (e.details === "execution_minutes_quota_reached") { + message = msg( + "Your org has used all of its execution minutes for this month." + ); } else { message = msg("You do not have permission to run crawls."); } From cb118ee1be3396171068dfc96b7b29d031f555c7 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 13 Oct 2023 17:44:45 -0400 Subject: [PATCH 05/42] Implement hard cap for execution minutes --- backend/btrixcloud/basecrawls.py | 7 +-- backend/btrixcloud/crawlconfigs.py | 21 ++++++--- backend/btrixcloud/models.py | 2 + backend/btrixcloud/operator.py | 7 ++- backend/btrixcloud/orgs.py | 26 ++++++++--- frontend/src/components/orgs-list.ts | 5 +++ frontend/src/pages/org/index.ts | 54 +++++++++++++++++++++-- frontend/src/pages/org/workflow-detail.ts | 16 +++---- frontend/src/pages/org/workflows-list.ts | 4 +- frontend/src/utils/LiteElement.ts | 10 +++++ 10 files changed, 121 insertions(+), 31 deletions(-) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index 4af83eb900..be51ef89c1 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -127,9 +127,10 @@ async def get_crawl( crawl.config.seeds = None crawl.storageQuotaReached = await self.orgs.storage_quota_reached(crawl.oid) - crawl.executionMinutesQuotaReached = ( - await self.orgs.execution_mins_quota_reached(crawl.oid) - ) + ( + crawl.executionMinutesQuotaReached, + crawl.executionMinutesHardCapReached, + ) = await self.orgs.execution_mins_quota_reached(crawl.oid) return crawl diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index 0c413a15df..396cdb86d3 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -173,17 +173,18 @@ async def add_crawl_config( run_now = config.runNow storage_quota_reached = await self.org_ops.storage_quota_reached(org.id) - exec_mins_quota_reached = await self.org_ops.execution_mins_quota_reached( - org.id - ) + ( + exec_mins_quota_reached, + exec_mins_hard_cap_reached, + ) = await self.org_ops.execution_mins_quota_reached(org.id) if storage_quota_reached: run_now = False print(f"Storage quota exceeded for org {org.id}", flush=True) - if exec_mins_quota_reached: + if exec_mins_hard_cap_reached: run_now = False - print(f"Execution miutes quota exceeded for org {org.id}", flush=True) + print(f"Monthly execution minute hard cap hit for org {org.id}", flush=True) crawl_id = await self.crawl_manager.add_crawl_config( crawlconfig=crawlconfig, @@ -201,6 +202,7 @@ async def add_crawl_config( crawl_id, storage_quota_reached, exec_mins_quota_reached, + exec_mins_hard_cap_reached, ) async def add_new_crawl( @@ -759,9 +761,12 @@ async def run_now(self, cid: str, org: Organization, user: User): if await self.org_ops.storage_quota_reached(org.id): raise HTTPException(status_code=403, detail="storage_quota_reached") - if await self.org_ops.execution_mins_quota_reached(org.id): + _, exec_mins_hard_cap_reached = await self.org_ops.execution_mins_quota_reached( + org.id + ) + if exec_mins_hard_cap_reached: raise HTTPException( - status_code=403, detail="execution_minutes_quota_reached" + status_code=403, detail="execution_minutes_hard_cap_reached" ) try: @@ -1003,6 +1008,7 @@ async def add_crawl_config( new_job_name, storage_quota_reached, execution_mins_quota_reached, + execution_mins_hard_cap_reached, ) = await ops.add_crawl_config(config, org, user) return { "added": True, @@ -1010,6 +1016,7 @@ async def add_crawl_config( "run_now_job": new_job_name, "storageQuotaReached": storage_quota_reached, "executionMinutesQuotaReached": execution_mins_quota_reached, + "executionMinutesHardCapReached": execution_mins_hard_cap_reached, } @router.patch("/{cid}", dependencies=[Depends(org_crawl_dep)]) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index a11aa239af..e42ce0e4b6 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -479,6 +479,7 @@ class CrawlOut(BaseMongoModel): storageQuotaReached: Optional[bool] executionMinutesQuotaReached: Optional[bool] + executionMinutesHardCapReached: Optional[bool] # ============================================================================ @@ -689,6 +690,7 @@ class OrgQuotas(BaseModel): maxPagesPerCrawl: Optional[int] = 0 storageQuota: Optional[int] = 0 crawlExecMinutesQuota: Optional[int] = 0 + crawlExecExtraMinutesHardCap: Optional[int] = 0 # ============================================================================ diff --git a/backend/btrixcloud/operator.py b/backend/btrixcloud/operator.py index d2a21f31ca..a664a9e078 100644 --- a/backend/btrixcloud/operator.py +++ b/backend/btrixcloud/operator.py @@ -404,8 +404,11 @@ async def sync_crawls(self, data: MCSyncData): ) return self._empty_response(status) - # Cancel crawl if execution minutes quota is reached while running - if await self.org_ops.execution_mins_quota_reached(crawl.oid): + # Cancel crawl if execution minutes hard cap is reached while running + _, exec_mins_hard_cap_reached = await self.org_ops.execution_mins_quota_reached( + crawl.oid + ) + if exec_mins_hard_cap_reached: await self.cancel_crawl( crawl.id, uuid.UUID(cid), uuid.UUID(oid), status, data.children[POD] ) diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 6f4d34ac85..d19b32eb00 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -8,7 +8,7 @@ import uuid from datetime import datetime -from typing import Union, Optional +from typing import Union, Optional, Tuple from pymongo import ReturnDocument from pymongo.errors import AutoReconnect, DuplicateKeyError @@ -316,11 +316,17 @@ async def storage_quota_reached(self, oid: uuid.UUID) -> bool: return False - async def execution_mins_quota_reached(self, oid: uuid.UUID) -> bool: + async def execution_mins_quota_reached(self, oid: uuid.UUID) -> Tuple[bool, bool]: """Return boolean indicating if execution minutes quota is met or exceeded.""" quota = await self.get_org_execution_mins_quota(oid) if not quota: - return False + return False, False + + quota_reached = False + hard_cap_reached = False + + hard_cap_additional_mins = await self.get_org_execution_mins_hard_cap(oid) + hard_cap_quota = quota + hard_cap_additional_mins org = await self.orgs.find_one({"_id": oid}) org = Organization.from_dict(org) @@ -333,9 +339,11 @@ async def execution_mins_quota_reached(self, oid: uuid.UUID) -> bool: monthly_exec_minutes = math.floor(monthly_exec_seconds / 60) if monthly_exec_minutes >= quota: - return True + quota_reached = True + if monthly_exec_minutes >= hard_cap_quota: + hard_cap_reached = True - return False + return quota_reached, hard_cap_reached async def get_org_storage_quota(self, oid: uuid.UUID) -> int: """return max allowed concurrent crawls, if any""" @@ -353,6 +361,14 @@ async def get_org_execution_mins_quota(self, oid: uuid.UUID) -> int: return org.quotas.crawlExecMinutesQuota return 0 + async def get_org_execution_mins_hard_cap(self, oid: uuid.UUID) -> int: + """return additional minutes before exec time hard cap, if any""" + org = await self.orgs.find_one({"_id": oid}) + if org: + org = Organization.from_dict(org) + return org.quotas.crawlExecExtraMinutesHardCap + return 0 + async def set_origin(self, org: Organization, request: Request): """Get origin from request and store in db for use in event webhooks""" headers = request.headers diff --git a/frontend/src/components/orgs-list.ts b/frontend/src/components/orgs-list.ts index 8d7971e6d9..1164bada83 100644 --- a/frontend/src/components/orgs-list.ts +++ b/frontend/src/components/orgs-list.ts @@ -64,6 +64,11 @@ export class OrgsList extends LiteElement { case "crawlExecMinutesQuota": label = msg("Org Monthly Execution Minutes Quota"); break; + case "crawlExecExtraMinutesHardCap": + label = msg( + "Additional Minutes Over Monthly Execution Quota Before Hard Cap" + ); + break; default: label = msg("Unlabeled"); } diff --git a/frontend/src/pages/org/index.ts b/frontend/src/pages/org/index.ts index c8c85385fe..3c9e27fb1d 100644 --- a/frontend/src/pages/org/index.ts +++ b/frontend/src/pages/org/index.ts @@ -103,6 +103,9 @@ export class Org extends LiteElement { @state() private showExecutionMinutesQuotaAlert = false; + @state() + private orgExecutionMinutesHardCapReached = false; + @state() private openDialogName?: ResourceName; @@ -498,14 +501,16 @@ export class Org extends LiteElement { .authState=${this.authState!} orgId=${this.orgId!} ?orgStorageQuotaReached=${this.orgStorageQuotaReached} - ?orgExecutionMinutesQuotaReached=${this - .orgExecutionMinutesQuotaReached} + ?orgExecutionMinutesHardCapReached=${this + .orgExecutionMinutesHardCapReached} workflowId=${workflowId} openDialogName=${this.viewStateData?.dialog} ?isEditing=${isEditing} ?isCrawler=${this.isCrawler} @storage-quota-update=${this.onStorageQuotaUpdate} @execution-minutes-quota-update=${this.onExecutionMinutesQuotaUpdate} + @execution-minutes-hard-cap-update=${this + .onExecutionMinutesHardCapUpdate} > `; } @@ -523,6 +528,8 @@ export class Org extends LiteElement { jobType=${ifDefined(this.params.jobType)} @storage-quota-update=${this.onStorageQuotaUpdate} @execution-minutes-quota-update=${this.onExecutionMinutesQuotaUpdate} + @execution-minutes-hard-cap-update=${this + .onExecutionMinutesHardCapUpdate} @select-new-dialog=${this.onSelectNewDialog} >`; } @@ -531,11 +538,13 @@ export class Org extends LiteElement { .authState=${this.authState!} orgId=${this.orgId!} ?orgStorageQuotaReached=${this.orgStorageQuotaReached} - ?orgExecutionMinutesQuotaReached=${this.orgExecutionMinutesQuotaReached} + ?orgExecutionMinutesHardCapReached=${this + .orgExecutionMinutesHardCapReached} userId=${this.userInfo!.id} ?isCrawler=${this.isCrawler} @storage-quota-update=${this.onStorageQuotaUpdate} @execution-minutes-quota-update=${this.onExecutionMinutesQuotaUpdate} + @execution-minutes-hard-cap-update=${this.onExecutionMinutesHardCapUpdate} @select-new-dialog=${this.onSelectNewDialog} >`; } @@ -686,6 +695,12 @@ export class Org extends LiteElement { } } + private async onExecutionMinutesHardCapUpdate(e: CustomEvent) { + e.stopPropagation(); + const { reached } = e.detail; + this.orgExecutionMinutesHardCapReached = reached; + } + private async onUserRoleChange(e: UserRoleChangeEvent) { const { user, newRole } = e.detail; @@ -798,7 +813,7 @@ export class Org extends LiteElement { } } - checkExecutionMinutesQuota() { + checkExecutionMinutesQuota(hardCap = false) { if ( !this.org || !this.org.quotas.crawlExecMinutesQuota || @@ -829,4 +844,35 @@ export class Org extends LiteElement { this.showExecutionMinutesQuotaAlert = true; } } + + checkExecutionMinutesHardCap() { + if ( + !this.org || + !this.org.quotas.crawlExecMinutesQuota || + this.org.quotas.crawlExecMinutesQuota == 0 + ) { + this.orgExecutionMinutesQuotaReached = false; + return; + } + + let quota = this.org.quotas.crawlExecMinutesQuota; + if (this.org.quotas.crawlExecExtraMinutesHardCap) { + quota = quota + this.org.quotas.crawlExecExtraMinutesHardCap; + } + + const todaysDate = new Date().toISOString(); + const monthKey = todaysDate.slice(0, 7); + + const monthlyExecutionSeconds = this.org.crawlExecSeconds[monthKey]; + if (monthlyExecutionSeconds) { + const monthlyExecutionMinutes = Math.floor(monthlyExecutionSeconds / 60); + if (monthlyExecutionMinutes >= quota) { + this.orgExecutionMinutesHardCapReached = true; + } else { + this.orgExecutionMinutesHardCapReached = false; + } + } else { + this.orgExecutionMinutesHardCapReached = false; + } + } } diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index 2036d28a82..2f71ce109b 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -50,7 +50,7 @@ export class WorkflowDetail extends LiteElement { orgStorageQuotaReached = false; @property({ type: Boolean }) - orgExecutionMinutesQuotaReached = false; + orgExecutionMinutesHardCapReached = false; @property({ type: String }) workflowId!: string; @@ -585,14 +585,14 @@ export class WorkflowDetail extends LiteElement { "Org Storage Full or Monthly Execution Minutes Reached" )} ?disabled=${!this.orgStorageQuotaReached && - !this.orgExecutionMinutesQuotaReached} + !this.orgExecutionMinutesHardCapReached} > this.runNow()} > @@ -633,7 +633,7 @@ export class WorkflowDetail extends LiteElement { this.runNow()} > @@ -1035,12 +1035,12 @@ export class WorkflowDetail extends LiteElement { "Org Storage Full or Monthly Execution Minutes Reached" )} ?disabled=${!this.orgStorageQuotaReached && - !this.orgExecutionMinutesQuotaReached} + !this.orgExecutionMinutesHardCapReached} > this.runNow()} > @@ -1123,13 +1123,13 @@ export class WorkflowDetail extends LiteElement { "Org Storage Full or Monthly Execution Minutes Reached" )} ?disabled=${!this.orgStorageQuotaReached && - !this.orgExecutionMinutesQuotaReached} + !this.orgExecutionMinutesHardCapReached} > this.runNow()} > diff --git a/frontend/src/pages/org/workflows-list.ts b/frontend/src/pages/org/workflows-list.ts index 068f208c9b..f5276575f2 100644 --- a/frontend/src/pages/org/workflows-list.ts +++ b/frontend/src/pages/org/workflows-list.ts @@ -75,7 +75,7 @@ export class WorkflowsList extends LiteElement { orgStorageQuotaReached = false; @property({ type: Boolean }) - orgExecutionMinutesQuotaReached = false; + orgExecutionMinutesHardCapReached = false; @property({ type: String }) userId!: string; @@ -444,7 +444,7 @@ export class WorkflowsList extends LiteElement { this.runNow(workflow)} > diff --git a/frontend/src/utils/LiteElement.ts b/frontend/src/utils/LiteElement.ts index e60285de5e..54f07be1f5 100644 --- a/frontend/src/utils/LiteElement.ts +++ b/frontend/src/utils/LiteElement.ts @@ -184,6 +184,16 @@ export default class LiteElement extends LitElement { errorMessage = msg("Storage quota reached"); break; } + if (errorDetail === "execution_minutes_hard_cap_reached") { + this.dispatchEvent( + new CustomEvent("execution-minutes-hard-cap-update", { + detail: { reached: true }, + bubbles: true, + }) + ); + errorMessage = msg("Monthly execution minutes hard cap reached"); + break; + } if (errorDetail === "execution_minutes_quota_reached") { this.dispatchEvent( new CustomEvent("execution-minutes-quota-update", { From 75904020c16df85326a9513fd694d124571b22d3 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 13 Oct 2023 18:35:45 -0400 Subject: [PATCH 06/42] Update execution minutes alert text --- frontend/src/pages/org/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/pages/org/index.ts b/frontend/src/pages/org/index.ts index 3c9e27fb1d..e6bfa1b90e 100644 --- a/frontend/src/pages/org/index.ts +++ b/frontend/src/pages/org/index.ts @@ -314,7 +314,7 @@ export class Org extends LiteElement { )}
${msg( - "To run crawls again before the montly limit resets, contact us to upgrade your plan." + "To purchase additional monthly execution minutes, contact us to upgrade your plan." )}
From 4bb38394f796369d04b3d36ebb6a37d6928b40a1 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 13 Oct 2023 18:46:07 -0400 Subject: [PATCH 07/42] Check for execution_minutes_hard_cap_reached --- frontend/src/pages/org/workflow-detail.ts | 2 +- frontend/src/pages/org/workflows-list.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index 2f71ce109b..abc2dd4462 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -1610,7 +1610,7 @@ export class WorkflowDetail extends LiteElement { if (e.isApiError && e.statusCode === 403) { if (e.details === "storage_quota_reached") { message = msg("Your org does not have enough storage to run crawls."); - } else if (e.details === "execution_minutes_quota_reached") { + } else if (e.details === "execution_minutes_hard_cap_reached") { message = msg( "Your org has used all of its execution minutes for this month." ); diff --git a/frontend/src/pages/org/workflows-list.ts b/frontend/src/pages/org/workflows-list.ts index f5276575f2..6a9652deb6 100644 --- a/frontend/src/pages/org/workflows-list.ts +++ b/frontend/src/pages/org/workflows-list.ts @@ -802,7 +802,7 @@ export class WorkflowsList extends LiteElement { if (e.isApiError && e.statusCode === 403) { if (e.details === "storage_quota_reached") { message = msg("Your org does not have enough storage to run crawls."); - } else if (e.details === "execution_minutes_quota_reached") { + } else if (e.details === "execution_minutes_hard_cap_reached") { message = msg( "Your org has used all of its execution minutes for this month." ); From 16ff01b4e09686228c59031b847bd2d51e5e392a Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 16 Oct 2023 16:02:44 -0400 Subject: [PATCH 08/42] Move hard cap additional minutes to org owner settings --- backend/btrixcloud/models.py | 10 +++- backend/btrixcloud/orgs.py | 24 +++++++- frontend/src/components/orgs-list.ts | 5 -- frontend/src/pages/org/settings.ts | 89 +++++++++++++++++++++++++++- frontend/src/routes.ts | 2 +- frontend/src/types/org.ts | 1 + 6 files changed, 122 insertions(+), 9 deletions(-) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index e42ce0e4b6..706de61364 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -659,6 +659,13 @@ class RenameOrg(BaseModel): slug: Optional[str] = None +# ============================================================================ +class OrgUpdateExecMinsOverage(BaseModel): + """Update allowed exec mins overage""" + + allowedOverage: int + + # ============================================================================ class DefaultStorage(BaseModel): """Storage reference""" @@ -690,7 +697,6 @@ class OrgQuotas(BaseModel): maxPagesPerCrawl: Optional[int] = 0 storageQuota: Optional[int] = 0 crawlExecMinutesQuota: Optional[int] = 0 - crawlExecExtraMinutesHardCap: Optional[int] = 0 # ============================================================================ @@ -750,6 +756,8 @@ class Organization(BaseMongoModel): quotas: Optional[OrgQuotas] = OrgQuotas() + crawlExecMinutesAllowedOverage: Optional[int] = 0 + webhookUrls: Optional[OrgWebhookUrls] = OrgWebhookUrls() origin: Optional[AnyHttpUrl] = None diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index d19b32eb00..6552832443 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -22,6 +22,7 @@ OrgQuotas, OrgMetrics, OrgWebhookUrls, + OrgUpdateExecMinsOverage, RenameOrg, UpdateRole, RemovePendingInvite, @@ -228,6 +229,15 @@ async def update_quotas(self, org: Organization, quotas: OrgQuotas): }, ) + async def update_execution_mins_overage( + self, org: Organization, allowed_overage: int = 0 + ): + """update allowed execution minutes overage""" + return await self.orgs.find_one_and_update( + {"_id": org.id}, + {"$set": {"crawlExecMinutesAllowedOverage": allowed_overage}}, + ) + async def update_event_webhook_urls(self, org: Organization, urls: OrgWebhookUrls): """Update organization event webhook URLs""" return await self.orgs.find_one_and_update( @@ -366,7 +376,7 @@ async def get_org_execution_mins_hard_cap(self, oid: uuid.UUID) -> int: org = await self.orgs.find_one({"_id": oid}) if org: org = Organization.from_dict(org) - return org.quotas.crawlExecExtraMinutesHardCap + return org.crawlExecMinutesAllowedOverage return 0 async def set_origin(self, org: Organization, request: Request): @@ -610,6 +620,18 @@ async def update_quotas( return {"updated": True} + @router.post("/billing", tags=["organizations"]) + async def update_org_billing_settings( + settings: OrgUpdateExecMinsOverage, + org: Organization = Depends(org_owner_dep), + user: User = Depends(user_dep), + ): + await ops.update_execution_mins_overage( + org, settings.crawlExecMinutesAllowedOverage + ) + + return {"updated": True} + @router.post("/event-webhook-urls", tags=["organizations"]) async def update_event_webhook_urls( urls: OrgWebhookUrls, diff --git a/frontend/src/components/orgs-list.ts b/frontend/src/components/orgs-list.ts index 1164bada83..8d7971e6d9 100644 --- a/frontend/src/components/orgs-list.ts +++ b/frontend/src/components/orgs-list.ts @@ -64,11 +64,6 @@ export class OrgsList extends LiteElement { case "crawlExecMinutesQuota": label = msg("Org Monthly Execution Minutes Quota"); break; - case "crawlExecExtraMinutesHardCap": - label = msg( - "Additional Minutes Over Monthly Execution Quota Before Hard Cap" - ); - break; default: label = msg("Unlabeled"); } diff --git a/frontend/src/pages/org/settings.ts b/frontend/src/pages/org/settings.ts index e63ebb9264..1a8a6fb121 100644 --- a/frontend/src/pages/org/settings.ts +++ b/frontend/src/pages/org/settings.ts @@ -14,7 +14,7 @@ import type { CurrentUser } from "../../types/user"; import type { APIPaginatedList } from "../../types/api"; import { maxLengthValidator } from "../../utils/form"; -type Tab = "information" | "members"; +type Tab = "information" | "members" | "billing"; type User = { email: string; role: number; @@ -78,6 +78,9 @@ export class OrgSettings extends LiteElement { @property({ type: Boolean }) isSavingOrgName = false; + @property({ type: Boolean }) + isSavingOrgBilling = false; + @state() pendingInvites: Invite[] = []; @@ -94,6 +97,7 @@ export class OrgSettings extends LiteElement { return { information: msg("General"), members: msg("Members"), + billing: msg("Billing"), }; } @@ -142,6 +146,7 @@ export class OrgSettings extends LiteElement { ${this.renderTab("information", "settings")} ${this.renderTab("members", "settings/members")} + ${this.renderTab("billing", "settings/billing")} ${this.renderInformation()}${this.renderMembers()} + ${this.renderBilling()} `; } private renderTab(name: Tab, path: string) { + console.log(`Active panel: ${this.activePanel}`); const isActive = name === this.activePanel; return html` +
+
+
+ +
+
+
+ +
+
+ ${msg( + "Allowed overage minutes beyond monthly execution minutes quota before users are no longer allowed to create new crawls." + )} +
+
+
+ ${msg("Save Changes")} +
+ +
`; + } + private renderUserRole({ role }: User) { if (isAdmin(role)) return msg("Admin"); if (isCrawler(role)) return msg("Crawler"); @@ -552,6 +599,46 @@ export class OrgSettings extends LiteElement { this.isSubmittingInvite = false; } + private async onOrgBillingSubmit(e: SubmitEvent) { + e.preventDefault(); + + const formEl = e.target as HTMLFormElement; + if (!(await this.checkFormValidity(formEl))) return; + + this.isSavingOrgBilling = true; + + const { execMinutesOverage } = serialize(formEl); + + try { + const data = await this.apiFetch( + `/orgs/${this.orgId}/billing`, + this.authState!, + { + method: "POST", + body: JSON.stringify({ + crawlExecMinutesAllowedOverage: execMinutesOverage, + }), + } + ); + + this.notify({ + message: msg(str`Successfully updated org billing settings.`), + variant: "success", + icon: "check2-circle", + }); + } catch (e: any) { + this.notify({ + message: e.isApiError + ? e.message + : msg("Sorry, couldn't update org at this time."), + variant: "danger", + icon: "exclamation-octagon", + }); + } + + this.isSavingOrgBilling = false; + } + private async removeInvite(invite: Invite) { try { await this.apiFetch( diff --git a/frontend/src/routes.ts b/frontend/src/routes.ts index 0baf97ed5b..5eb9474f43 100644 --- a/frontend/src/routes.ts +++ b/frontend/src/routes.ts @@ -17,7 +17,7 @@ export const ROUTES = { "(/items(/:itemType(/:itemId)))", "(/collections(/new)(/view/:collectionId(/:collectionTab))(/edit/:collectionId))", "(/browser-profiles(/profile(/browser/:browserId)(/:browserProfileId)))", - "(/settings(/members))", + "(/settings(/members)(/billing))", ].join(""), users: "/users", usersInvite: "/users/invite", diff --git a/frontend/src/types/org.ts b/frontend/src/types/org.ts index d6a3358a6a..d83e637e72 100644 --- a/frontend/src/types/org.ts +++ b/frontend/src/types/org.ts @@ -15,6 +15,7 @@ export type OrgData = { quotas: Record; bytesStored: number; crawlExecSeconds: Record; + crawlExecMinutesAllowedOverage: number; users?: { [id: string]: { role: (typeof AccessCode)[UserRole]; From 4fe3730a38fa539e2ffb684f9665d792f9fb8830 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 16 Oct 2023 16:07:06 -0400 Subject: [PATCH 09/42] Update field in OrgUpdateExecMinsOverage --- backend/btrixcloud/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 706de61364..04993a60df 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -663,7 +663,7 @@ class RenameOrg(BaseModel): class OrgUpdateExecMinsOverage(BaseModel): """Update allowed exec mins overage""" - allowedOverage: int + crawlExecMinutesAllowedOverage: int # ============================================================================ From 14e982104d14b2e2f6735d107f2105b7f3aef318 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 16 Oct 2023 16:09:31 -0400 Subject: [PATCH 10/42] Remove unused user arg --- backend/btrixcloud/orgs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 6552832443..5aef30e1d0 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -624,7 +624,6 @@ async def update_quotas( async def update_org_billing_settings( settings: OrgUpdateExecMinsOverage, org: Organization = Depends(org_owner_dep), - user: User = Depends(user_dep), ): await ops.update_execution_mins_overage( org, settings.crawlExecMinutesAllowedOverage From 3b7855cae1d70cca4e7a9fd2a760a38b46beb082 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 16 Oct 2023 17:08:29 -0400 Subject: [PATCH 11/42] Fix setting overage in org owner settings --- backend/btrixcloud/models.py | 1 + frontend/src/pages/org/index.ts | 5 ++--- frontend/src/pages/org/settings.ts | 1 - frontend/src/routes.ts | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 04993a60df..59cd35fb1d 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -729,6 +729,7 @@ class OrgOut(BaseMongoModel): webhookUrls: Optional[OrgWebhookUrls] = OrgWebhookUrls() quotas: Optional[OrgQuotas] = OrgQuotas() + crawlExecMinutesAllowedOverage: int = 0 # ============================================================================ diff --git a/frontend/src/pages/org/index.ts b/frontend/src/pages/org/index.ts index e6bfa1b90e..3bba3e9d06 100644 --- a/frontend/src/pages/org/index.ts +++ b/frontend/src/pages/org/index.ts @@ -59,6 +59,7 @@ type Params = { collectionTab?: string; itemType?: Crawl["type"]; jobType?: JobType; + settingsTab?: string; new?: ResourceName; }; const defaultTab = "home"; @@ -607,9 +608,7 @@ export class Org extends LiteElement { private renderOrgSettings() { if (!this.userInfo || !this.org) return; - const activePanel = this.orgPath.includes("/members") - ? "members" - : "information"; + const activePanel = this.params.settingsTab || "information"; const isAddingMember = this.params.hasOwnProperty("invite"); return html` Date: Mon, 16 Oct 2023 17:08:42 -0400 Subject: [PATCH 12/42] Display execution time in crawl detail overview --- frontend/src/pages/org/crawl-detail.ts | 10 ++++++++++ frontend/src/types/crawler.ts | 2 ++ 2 files changed, 12 insertions(+) diff --git a/frontend/src/pages/org/crawl-detail.ts b/frontend/src/pages/org/crawl-detail.ts index 2af1fa71b2..bd9b7f7f4c 100644 --- a/frontend/src/pages/org/crawl-detail.ts +++ b/frontend/src/pages/org/crawl-detail.ts @@ -4,6 +4,7 @@ import { when } from "lit/directives/when.js"; import { ifDefined } from "lit/directives/if-defined.js"; import { classMap } from "lit/directives/class-map.js"; import { msg, localized, str } from "@lit/localize"; +import humanizeDuration from "pretty-ms"; import type { PageChangeEvent } from "../../components/pagination"; import { RelativeDuration } from "../../components/relative-duration"; @@ -644,6 +645,15 @@ export class CrawlDetail extends LiteElement { `} + + ${this.crawl!.finished + ? html`${humanizeDuration( + this.crawl!.crawlExecSeconds * 1000 + )}` + : html`${msg("Pending")}`} + ${this.crawl!.manual ? msg( diff --git a/frontend/src/types/crawler.ts b/frontend/src/types/crawler.ts index 0a1b28210f..3fa472119e 100644 --- a/frontend/src/types/crawler.ts +++ b/frontend/src/types/crawler.ts @@ -135,6 +135,7 @@ export type Crawl = CrawlConfig & { collectionIds: string[]; collections: { id: string; name: string }[]; type?: "crawl" | "upload" | null; + crawlExecSeconds: number; }; export type Upload = Omit< @@ -146,6 +147,7 @@ export type Upload = Omit< | "stopping" | "firstSeed" | "seedCount" + | "crawlExecSeconds" > & { type: "upload"; }; From 5d862362572a2e5ee3b91baed943536995dde4b1 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 16 Oct 2023 17:24:59 -0400 Subject: [PATCH 13/42] Fix setting crawlExecSeconds in crawl --- backend/btrixcloud/crawls.py | 9 +++++++-- backend/btrixcloud/operator.py | 5 ++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index 1f6a5e3adc..628aabb9fc 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -530,9 +530,14 @@ async def update_running_crawl_stats(self, crawl_id, stats): async def store_exec_time(self, crawl_id, exec_time): """set exec time, only if not already set""" - query = {"_id": crawl_id, "type": "crawl", "execTime": {"$in": [0, None]}} + query = { + "_id": crawl_id, + "type": "crawl", + "crawlExecSeconds": {"$in": [0, None]}, + } return await self.crawls.find_one_and_update( - query, {"$set": {"execTime": exec_time}} + query, + {"$set": {"crawlExecSeconds": exec_time}}, ) async def get_crawl_state(self, crawl_id): diff --git a/backend/btrixcloud/operator.py b/backend/btrixcloud/operator.py index a664a9e078..dcace78ac1 100644 --- a/backend/btrixcloud/operator.py +++ b/backend/btrixcloud/operator.py @@ -1316,11 +1316,10 @@ async def inc_exec_time(self, name, oid, status, finished_at, started_at): print(f"{name} exec time: {exec_time}") return exec_time - async def store_exec_time_in_crawl(self, crawl_id, exec_time): + async def store_exec_time_in_crawl(self, crawl_id: str, exec_time: int): """store execTime in crawl (if not already set)""" try: - if await self.crawl_ops.store_exec_time(crawl_id, exec_time): - print(f"Exec Time stored in crawl: {exec_time}", flush=True) + await self.crawl_ops.store_exec_time(crawl_id, exec_time) return True # pylint: disable=broad-except except Exception as exc: From a4176c91bbef87f91507ad0a1b9c88572a4c3882 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 16 Oct 2023 17:42:33 -0400 Subject: [PATCH 14/42] Update docstring --- backend/btrixcloud/operator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/btrixcloud/operator.py b/backend/btrixcloud/operator.py index dcace78ac1..3cb19a24c3 100644 --- a/backend/btrixcloud/operator.py +++ b/backend/btrixcloud/operator.py @@ -1317,7 +1317,7 @@ async def inc_exec_time(self, name, oid, status, finished_at, started_at): return exec_time async def store_exec_time_in_crawl(self, crawl_id: str, exec_time: int): - """store execTime in crawl (if not already set)""" + """store crawlExecSeconds in crawl (if not already set)""" try: await self.crawl_ops.store_exec_time(crawl_id, exec_time) return True From 911bf6004a543b6fbbb04aa1715bb8d5d6f3d507 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 16 Oct 2023 17:49:18 -0400 Subject: [PATCH 15/42] Add backend test for setting org exec time overage --- backend/test/test_org.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/backend/test/test_org.py b/backend/test/test_org.py index fa6ce9cb63..00f5210cb4 100644 --- a/backend/test/test_org.py +++ b/backend/test/test_org.py @@ -435,3 +435,33 @@ def test_get_org_slug_lookup_non_superadmin(crawler_auth_headers): r = requests.get(f"{API_PREFIX}/orgs/slug-lookup", headers=crawler_auth_headers) assert r.status_code == 403 assert r.json()["detail"] == "Not Allowed" + + +def test_set_org_execution_time_overage(admin_auth_headers, default_org_id): + NEW_VALUE = 30 + r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers) + assert r.status_code == 200 + data = r.json() + value_before = data.get("crawlExecMinutesAllowedOverage", 0) + assert value_before != NEW_VALUE + + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/billing", + headers=admin_auth_headers, + json={"crawlExecMinutesAllowedOverage": NEW_VALUE}, + ) + assert r.status_code == 200 + + r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers) + assert r.json()["crawlExecMinutesAllowedOverage"] == NEW_VALUE + + +def test_set_org_execution_time_overage_non_superadmin( + crawler_auth_headers, default_org_id +): + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/billing", + headers=crawler_auth_headers, + json={"crawlExecMinutesAllowedOverage": 60}, + ) + assert r.status_code == 403 From 1eb5861be45fb9676e98fe12965b3252a734c6c5 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 16 Oct 2023 23:39:52 -0400 Subject: [PATCH 16/42] Fix and simplify checking quota and hard cap in org --- frontend/src/pages/org/index.ts | 50 +++++++++------------------------ 1 file changed, 13 insertions(+), 37 deletions(-) diff --git a/frontend/src/pages/org/index.ts b/frontend/src/pages/org/index.ts index 3bba3e9d06..d47f717453 100644 --- a/frontend/src/pages/org/index.ts +++ b/frontend/src/pages/org/index.ts @@ -812,7 +812,7 @@ export class Org extends LiteElement { } } - checkExecutionMinutesQuota(hardCap = false) { + checkExecutionMinutesQuota() { if ( !this.org || !this.org.quotas.crawlExecMinutesQuota || @@ -822,56 +822,32 @@ export class Org extends LiteElement { return; } - const todaysDate = new Date().toISOString(); - const todaysYear = todaysDate.slice(0, 4); - const todaysMonth = todaysDate.slice(5, 7); - const monthKey = `${todaysYear}-${todaysMonth}`; - + const monthKey = new Date().toISOString().slice(0, 7); const monthlyExecutionSeconds = this.org.crawlExecSeconds[monthKey]; + const quota = this.org.quotas.crawlExecMinutesQuota; + const hardCap = quota + (this.org.crawlExecMinutesAllowedOverage || 0); + if (monthlyExecutionSeconds) { const monthlyExecutionMinutes = Math.floor(monthlyExecutionSeconds / 60); - if (monthlyExecutionMinutes >= this.org.quotas.crawlExecMinutesQuota) { + + if (monthlyExecutionMinutes >= quota) { this.orgExecutionMinutesQuotaReached = true; } else { this.orgExecutionMinutesQuotaReached = false; } - } else { - this.orgExecutionMinutesQuotaReached = false; - } - if (this.orgExecutionMinutesQuotaReached) { - this.showExecutionMinutesQuotaAlert = true; - } - } - - checkExecutionMinutesHardCap() { - if ( - !this.org || - !this.org.quotas.crawlExecMinutesQuota || - this.org.quotas.crawlExecMinutesQuota == 0 - ) { - this.orgExecutionMinutesQuotaReached = false; - return; - } - - let quota = this.org.quotas.crawlExecMinutesQuota; - if (this.org.quotas.crawlExecExtraMinutesHardCap) { - quota = quota + this.org.quotas.crawlExecExtraMinutesHardCap; - } - - const todaysDate = new Date().toISOString(); - const monthKey = todaysDate.slice(0, 7); - - const monthlyExecutionSeconds = this.org.crawlExecSeconds[monthKey]; - if (monthlyExecutionSeconds) { - const monthlyExecutionMinutes = Math.floor(monthlyExecutionSeconds / 60); - if (monthlyExecutionMinutes >= quota) { + if (monthlyExecutionMinutes >= hardCap) { this.orgExecutionMinutesHardCapReached = true; } else { this.orgExecutionMinutesHardCapReached = false; } } else { + this.orgExecutionMinutesQuotaReached = false; this.orgExecutionMinutesHardCapReached = false; } + + if (this.orgExecutionMinutesQuotaReached) { + this.showExecutionMinutesQuotaAlert = true; + } } } From 05b29691afb4cad7315b81fc3ca99e7e0b9be9d4 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 17 Oct 2023 00:45:46 -0400 Subject: [PATCH 17/42] Cancel crawl immediately when exec minutes hard cap passed Track runningExecTime in operator CrawlStatus to track execution time for running crawler pods. If the running crawl time + monthly execution time tracked in org exceeds hard cap, cancel crawl. This should cancel the crawl within about 5-10 seconds of hard cap being exceeded (i.e. on next sync), rather than at the end of the crawl or when a crawler pod restarts as before. --- backend/btrixcloud/operator.py | 23 ++++++++++++++++++++--- backend/btrixcloud/orgs.py | 26 ++++++++++++++++---------- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/backend/btrixcloud/operator.py b/backend/btrixcloud/operator.py index 3cb19a24c3..ca8156df99 100644 --- a/backend/btrixcloud/operator.py +++ b/backend/btrixcloud/operator.py @@ -219,6 +219,10 @@ class CrawlStatus(BaseModel): execTime: int = 0 canceled: bool = False + # Track estimated exec time as crawlers are running to cancel + # the crawl quickly if execution minutes hard cap is reached + runningExecTime: int = 0 + # don't include in status, use by metacontroller resync_after: Optional[int] = None @@ -313,7 +317,7 @@ async def sync_profile_browsers(self, data: MCSyncData): return {"status": {}, "children": children} - # pylint: disable=too-many-return-statements + # pylint: disable=too-many-return-statements, invalid-name async def sync_crawls(self, data: MCSyncData): """sync crawls""" @@ -404,15 +408,17 @@ async def sync_crawls(self, data: MCSyncData): ) return self._empty_response(status) - # Cancel crawl if execution minutes hard cap is reached while running _, exec_mins_hard_cap_reached = await self.org_ops.execution_mins_quota_reached( - crawl.oid + crawl.oid, status.runningExecTime ) if exec_mins_hard_cap_reached: await self.cancel_crawl( crawl.id, uuid.UUID(cid), uuid.UUID(oid), status, data.children[POD] ) + # Reset runningExecTime to recalculate below for next sync + status.runningExecTime = 0 + if status.state in ("starting", "waiting_org_limit"): if not await self.can_start_new(crawl, data, status): return self._empty_response(status) @@ -955,6 +961,9 @@ async def sync_pod_status(self, pods, status, oid): name, oid, role, status, cstatus["state"].get("terminated") ) + if role == "crawler": + self.increment_running_exec_time(cstatus["state"], status) + if role == "crawler": crawler_running = crawler_running or running done = done and phase == "Succeeded" @@ -968,6 +977,14 @@ async def sync_pod_status(self, pods, status, oid): return crawler_running, redis_running, done + def increment_running_exec_time(self, container_state, status): + """Increment runningExecTime in crawl status for running crawler pod""" + running = container_state.get("running") + if running: + start_time = from_k8s_date(running.get("startedAt")) + running_exec_time = int((datetime.now() - start_time).total_seconds()) + status.runningExecTime += running_exec_time + async def handle_terminated_pod(self, name, oid, role, status, terminated): """handle terminated pod state""" if not terminated: diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 5aef30e1d0..ad4a1e4488 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -326,8 +326,20 @@ async def storage_quota_reached(self, oid: uuid.UUID) -> bool: return False - async def execution_mins_quota_reached(self, oid: uuid.UUID) -> Tuple[bool, bool]: - """Return boolean indicating if execution minutes quota is met or exceeded.""" + async def get_this_month_crawl_exec_seconds(self, oid: uuid.UUID) -> int: + """Return crawlExecSeconds for current month""" + org = await self.orgs.find_one({"_id": oid}) + org = Organization.from_dict(org) + yymm = datetime.utcnow().strftime("%Y-%m") + try: + return org.crawlExecSeconds[yymm] + except KeyError: + return 0 + + async def execution_mins_quota_reached( + self, oid: uuid.UUID, running_exec_seconds: int = 0 + ) -> Tuple[bool, bool]: + """Return bools for if execution minutes quota and hard cap are reached.""" quota = await self.get_org_execution_mins_quota(oid) if not quota: return False, False @@ -338,14 +350,8 @@ async def execution_mins_quota_reached(self, oid: uuid.UUID) -> Tuple[bool, bool hard_cap_additional_mins = await self.get_org_execution_mins_hard_cap(oid) hard_cap_quota = quota + hard_cap_additional_mins - org = await self.orgs.find_one({"_id": oid}) - org = Organization.from_dict(org) - - yymm = datetime.utcnow().strftime("%Y-%m") - try: - monthly_exec_seconds = org.crawlExecSeconds[yymm] - except KeyError: - monthly_exec_seconds = 0 + monthly_exec_seconds = await self.get_this_month_crawl_exec_seconds(oid) + monthly_exec_seconds = monthly_exec_seconds + running_exec_seconds monthly_exec_minutes = math.floor(monthly_exec_seconds / 60) if monthly_exec_minutes >= quota: From 2eaed5acdd559fea1ed4cc56c3eef1b4e52d1a2c Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 17 Oct 2023 13:20:41 -0400 Subject: [PATCH 18/42] Stop rather than cancel crawl after exec mins hard cap --- backend/btrixcloud/operator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/btrixcloud/operator.py b/backend/btrixcloud/operator.py index ca8156df99..91d876c3ff 100644 --- a/backend/btrixcloud/operator.py +++ b/backend/btrixcloud/operator.py @@ -408,13 +408,13 @@ async def sync_crawls(self, data: MCSyncData): ) return self._empty_response(status) + # Gracefully stop crawl when execution minutes hard cap is reached to + # ensure that the user still gets their data from the crawl _, exec_mins_hard_cap_reached = await self.org_ops.execution_mins_quota_reached( crawl.oid, status.runningExecTime ) if exec_mins_hard_cap_reached: - await self.cancel_crawl( - crawl.id, uuid.UUID(cid), uuid.UUID(oid), status, data.children[POD] - ) + crawl.stopping = True # Reset runningExecTime to recalculate below for next sync status.runningExecTime = 0 From 5b590ac73bc2d856b25e61a9d73f58ba71ed3390 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 17 Oct 2023 13:24:35 -0400 Subject: [PATCH 19/42] Add org settings billing page docs --- docs/user-guide/org-settings.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/user-guide/org-settings.md b/docs/user-guide/org-settings.md index 41f71e8e0c..d5397d9030 100644 --- a/docs/user-guide/org-settings.md +++ b/docs/user-guide/org-settings.md @@ -24,3 +24,7 @@ Sent invites can be invalidated by pressing the trash button in the relevant _Pe `Admin` : Users with the administrator role have full access to the organization, including its settings page. + +## Billing + +This page lets organization admins set an additional number of allowed overage minutes over the monthly execution minutes quota for their organization. If set, this serves as a hard cap after which all running crawls will be stopped. When set at the default of 0, crawls will be stopped as soon as the quota is reached. From f4f1ce87718aba1a398ab73131fb310ee5daa69c Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 17 Oct 2023 14:10:39 -0400 Subject: [PATCH 20/42] Add nightly tests for quota and hard cap enforcement --- .../test_concurrent_crawl_limit.py | 10 +- .../test_execution_minutes_quota.py | 156 ++++++++++++++++++ backend/test_nightly/utils.py | 14 ++ 3 files changed, 171 insertions(+), 9 deletions(-) create mode 100644 backend/test_nightly/test_execution_minutes_quota.py create mode 100644 backend/test_nightly/utils.py diff --git a/backend/test_nightly/test_concurrent_crawl_limit.py b/backend/test_nightly/test_concurrent_crawl_limit.py index 7f6ee3d215..141ec97948 100644 --- a/backend/test_nightly/test_concurrent_crawl_limit.py +++ b/backend/test_nightly/test_concurrent_crawl_limit.py @@ -2,6 +2,7 @@ import time from .conftest import API_PREFIX +from .utils import get_crawl_status crawl_id_a = None crawl_id_b = None @@ -103,12 +104,3 @@ def run_crawl(org_id, headers): data = r.json() return data["run_now_job"] - - -def get_crawl_status(org_id, crawl_id, headers): - r = requests.get( - f"{API_PREFIX}/orgs/{org_id}/crawls/{crawl_id}/replay.json", - headers=headers, - ) - data = r.json() - return data["state"] diff --git a/backend/test_nightly/test_execution_minutes_quota.py b/backend/test_nightly/test_execution_minutes_quota.py new file mode 100644 index 0000000000..74cb6ebf41 --- /dev/null +++ b/backend/test_nightly/test_execution_minutes_quota.py @@ -0,0 +1,156 @@ +import math +import requests +import time +from datetime import datetime + +from .conftest import API_PREFIX +from .utils import get_crawl_status + + +EXEC_MINS_QUOTA = 1 +EXEC_MINS_ALLOWED_OVERAGE = 10 +EXEC_MINS_HARD_CAP = EXEC_MINS_QUOTA + EXEC_MINS_ALLOWED_OVERAGE + +config_id = None + + +def test_set_execution_mins_quota(org_with_quotas, admin_auth_headers): + r = requests.post( + f"{API_PREFIX}/orgs/{org_with_quotas}/quotas", + headers=admin_auth_headers, + json={"crawlExecMinutesQuota": EXEC_MINS_QUOTA}, + ) + data = r.json() + assert data.get("updated") == True + + +def test_crawl_stopped_when_quota_reached(org_with_quotas, admin_auth_headers): + # Run crawl + global config_id + crawl_id, config_id = run_crawl(org_with_quotas, admin_auth_headers) + time.sleep(1) + + while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in ( + "starting", + "waiting_capacity", + ): + time.sleep(2) + + while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in ( + "running", + "generate-wacz", + "uploading-wacz", + "pending-wait", + ): + time.sleep(2) + + # Ensure that crawl was stopped by quota + assert ( + get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) + == "partial_complete" + ) + + time.sleep(5) + + # Ensure crawl execution seconds went over quota + r = requests.get( + f"{API_PREFIX}/orgs/{org_with_quotas}/crawls/{crawl_id}/replay.json", + headers=admin_auth_headers, + ) + data = r.json() + execution_seconds = data["crawlExecSeconds"] + assert math.floor(execution_seconds / 60) >= EXEC_MINS_QUOTA + + time.sleep(5) + + # Ensure we can't start another crawl when over the quota + r = requests.post( + f"{API_PREFIX}/orgs/{org_with_quotas}/crawlconfigs/{config_id}/run", + headers=admin_auth_headers, + ) + assert r.status_code == 403 + assert r.json()["detail"] == "execution_minutes_hard_cap_reached" + + +def test_crawl_stopped_when_card_cap_reached(org_with_quotas, admin_auth_headers): + # Set allowed overage on org + r = requests.post( + f"{API_PREFIX}/orgs/{org_with_quotas}/billing", + headers=admin_auth_headers, + json={"crawlExecMinutesAllowedOverage": EXEC_MINS_ALLOWED_OVERAGE}, + ) + assert r.status_code == 200 + + time.sleep(10) + + # Run new crawl from config + r = requests.post( + f"{API_PREFIX}/orgs/{org_with_quotas}/crawlconfigs/{config_id}/run", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + crawl_id = r.json()["started"] + assert crawl_id + + time.sleep(1) + + while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in ( + "starting", + "waiting_capacity", + ): + time.sleep(2) + + while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in ( + "running", + "generate-wacz", + "uploading-wacz", + "pending-wait", + ): + time.sleep(2) + + # Ensure that crawl was stopped when hard cap reached + assert ( + get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) + == "partial_complete" + ) + + time.sleep(5) + + # Ensure crawl execution seconds went over hard cap (stopping takes a while) + r = requests.get( + f"{API_PREFIX}/orgs/{org_with_quotas}", + headers=admin_auth_headers, + ) + data = r.json() + execution_seconds = data["crawlExecSeconds"] + yymm = datetime.utcnow().strftime("%Y-%m") + assert math.floor(execution_seconds[yymm] / 60) >= EXEC_MINS_HARD_CAP + + time.sleep(5) + + # Ensure we can't start another crawl when over the hard cap + r = requests.post( + f"{API_PREFIX}/orgs/{org_with_quotas}/crawlconfigs/{config_id}/run", + headers=admin_auth_headers, + ) + assert r.status_code == 403 + assert r.json()["detail"] == "execution_minutes_hard_cap_reached" + + +def run_crawl(org_id, headers): + crawl_data = { + "runNow": True, + "name": "Execution Mins Quota", + "config": { + "seeds": [{"url": "https://webrecorder.net/"}], + "extraHops": 1, + }, + } + r = requests.post( + f"{API_PREFIX}/orgs/{org_id}/crawlconfigs/", + headers=headers, + json=crawl_data, + ) + data = r.json() + + return data["run_now_job"], data["id"] diff --git a/backend/test_nightly/utils.py b/backend/test_nightly/utils.py new file mode 100644 index 0000000000..3b94a78c1e --- /dev/null +++ b/backend/test_nightly/utils.py @@ -0,0 +1,14 @@ +"""nightly test utils""" + +import requests + +from .conftest import API_PREFIX + + +def get_crawl_status(org_id, crawl_id, headers): + r = requests.get( + f"{API_PREFIX}/orgs/{org_id}/crawls/{crawl_id}/replay.json", + headers=headers, + ) + data = r.json() + return data["state"] From c18a25959d1206feb37887a1ad664689de02a2a5 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 17 Oct 2023 16:42:42 -0400 Subject: [PATCH 21/42] Apply copy changes from code review Co-authored-by: Henry Wilkinson --- docs/user-guide/org-settings.md | 2 +- frontend/src/pages/org/settings.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user-guide/org-settings.md b/docs/user-guide/org-settings.md index d5397d9030..cb448d281c 100644 --- a/docs/user-guide/org-settings.md +++ b/docs/user-guide/org-settings.md @@ -27,4 +27,4 @@ Sent invites can be invalidated by pressing the trash button in the relevant _Pe ## Billing -This page lets organization admins set an additional number of allowed overage minutes over the monthly execution minutes quota for their organization. If set, this serves as a hard cap after which all running crawls will be stopped. When set at the default of 0, crawls will be stopped as soon as the quota is reached. +This page lets organization admins set an additional number of allowed overage minutes when the organization's monthly execution minutes quota has been reached. If set, this serves as a hard cap after which all running crawls will be stopped. When set at the default of 0, crawls will be stopped as soon as the monthly quota is reached. diff --git a/frontend/src/pages/org/settings.ts b/frontend/src/pages/org/settings.ts index a72a6d0b3c..14e6c6f32f 100644 --- a/frontend/src/pages/org/settings.ts +++ b/frontend/src/pages/org/settings.ts @@ -349,7 +349,7 @@ export class OrgSettings extends LiteElement {
${msg( - "Allowed overage minutes beyond monthly execution minutes quota before users are no longer allowed to create new crawls." + "Allowed overage minutes beyond the organization's monthly quota. Once reached, crawl workflows will not run." )}
From 18314cc3789765bddaf11edc9c44dd0dcd155158 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 17 Oct 2023 16:53:40 -0400 Subject: [PATCH 22/42] Add inputmode numeric and minutes suffix to hard cap field --- frontend/src/pages/org/settings.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/frontend/src/pages/org/settings.ts b/frontend/src/pages/org/settings.ts index 14e6c6f32f..d4cbef6fb9 100644 --- a/frontend/src/pages/org/settings.ts +++ b/frontend/src/pages/org/settings.ts @@ -337,11 +337,14 @@ export class OrgSettings extends LiteElement {
+ > + ${msg("minutes")} +
From 8f7b5c6305000b8e673eee60315e54575f763274 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 17 Oct 2023 17:05:31 -0400 Subject: [PATCH 23/42] Fix Org Settings Billing page layout --- frontend/src/pages/org/settings.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/src/pages/org/settings.ts b/frontend/src/pages/org/settings.ts index d4cbef6fb9..86c4b7825f 100644 --- a/frontend/src/pages/org/settings.ts +++ b/frontend/src/pages/org/settings.ts @@ -356,6 +356,7 @@ export class OrgSettings extends LiteElement { )}
+
Date: Thu, 19 Oct 2023 12:38:40 -0400 Subject: [PATCH 24/42] Change Billing to Limits --- backend/btrixcloud/orgs.py | 4 ++-- docs/user-guide/org-settings.md | 2 +- frontend/src/pages/org/settings.ts | 30 ++++++++++++++---------------- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index ad4a1e4488..6b3e6c4a2d 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -626,8 +626,8 @@ async def update_quotas( return {"updated": True} - @router.post("/billing", tags=["organizations"]) - async def update_org_billing_settings( + @router.post("/limits", tags=["organizations"]) + async def update_org_limits_settings( settings: OrgUpdateExecMinsOverage, org: Organization = Depends(org_owner_dep), ): diff --git a/docs/user-guide/org-settings.md b/docs/user-guide/org-settings.md index cb448d281c..0abb5a8770 100644 --- a/docs/user-guide/org-settings.md +++ b/docs/user-guide/org-settings.md @@ -25,6 +25,6 @@ Sent invites can be invalidated by pressing the trash button in the relevant _Pe `Admin` : Users with the administrator role have full access to the organization, including its settings page. -## Billing +## Limits This page lets organization admins set an additional number of allowed overage minutes when the organization's monthly execution minutes quota has been reached. If set, this serves as a hard cap after which all running crawls will be stopped. When set at the default of 0, crawls will be stopped as soon as the monthly quota is reached. diff --git a/frontend/src/pages/org/settings.ts b/frontend/src/pages/org/settings.ts index 86c4b7825f..90872f2f3a 100644 --- a/frontend/src/pages/org/settings.ts +++ b/frontend/src/pages/org/settings.ts @@ -14,7 +14,7 @@ import type { CurrentUser } from "../../types/user"; import type { APIPaginatedList } from "../../types/api"; import { maxLengthValidator } from "../../utils/form"; -type Tab = "information" | "members" | "billing"; +type Tab = "information" | "members" | "limits"; type User = { email: string; role: number; @@ -79,7 +79,7 @@ export class OrgSettings extends LiteElement { isSavingOrgName = false; @property({ type: Boolean }) - isSavingOrgBilling = false; + isSavingOrgLimits = false; @state() pendingInvites: Invite[] = []; @@ -97,7 +97,7 @@ export class OrgSettings extends LiteElement { return { information: msg("General"), members: msg("Members"), - billing: msg("Billing"), + limits: msg("Limits"), }; } @@ -146,7 +146,7 @@ export class OrgSettings extends LiteElement { ${this.renderTab("information", "settings")} ${this.renderTab("members", "settings/members")} - ${this.renderTab("billing", "settings/billing")} + ${this.renderTab("limits", "settings/limits")} ${this.renderInformation()}${this.renderMembers()} - ${this.renderBilling()} + ${this.renderLimits()} `; } @@ -330,9 +328,9 @@ export class OrgSettings extends LiteElement { `; } - private renderBilling() { + private renderLimits() { return html`
-
+
${msg("Save Changes")}
@@ -602,19 +600,19 @@ export class OrgSettings extends LiteElement { this.isSubmittingInvite = false; } - private async onOrgBillingSubmit(e: SubmitEvent) { + private async onOrgLimitsSubmit(e: SubmitEvent) { e.preventDefault(); const formEl = e.target as HTMLFormElement; if (!(await this.checkFormValidity(formEl))) return; - this.isSavingOrgBilling = true; + this.isSavingOrgLimits = true; const { execMinutesOverage } = serialize(formEl); try { const data = await this.apiFetch( - `/orgs/${this.orgId}/billing`, + `/orgs/${this.orgId}/limits`, this.authState!, { method: "POST", @@ -625,7 +623,7 @@ export class OrgSettings extends LiteElement { ); this.notify({ - message: msg(str`Successfully updated org billing settings.`), + message: msg(str`Successfully updated org limits settings.`), variant: "success", icon: "check2-circle", }); @@ -639,7 +637,7 @@ export class OrgSettings extends LiteElement { }); } - this.isSavingOrgBilling = false; + this.isSavingOrgLimits = false; } private async removeInvite(invite: Invite) { From 32066fc01ef25d69a4258d7cf415ac808e3f2bb1 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 19 Oct 2023 12:53:00 -0400 Subject: [PATCH 25/42] Update API endpoint to /limits in nightly test --- backend/test_nightly/test_execution_minutes_quota.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/test_nightly/test_execution_minutes_quota.py b/backend/test_nightly/test_execution_minutes_quota.py index 74cb6ebf41..d6b863e5e4 100644 --- a/backend/test_nightly/test_execution_minutes_quota.py +++ b/backend/test_nightly/test_execution_minutes_quota.py @@ -75,7 +75,7 @@ def test_crawl_stopped_when_quota_reached(org_with_quotas, admin_auth_headers): def test_crawl_stopped_when_card_cap_reached(org_with_quotas, admin_auth_headers): # Set allowed overage on org r = requests.post( - f"{API_PREFIX}/orgs/{org_with_quotas}/billing", + f"{API_PREFIX}/orgs/{org_with_quotas}/limits", headers=admin_auth_headers, json={"crawlExecMinutesAllowedOverage": EXEC_MINS_ALLOWED_OVERAGE}, ) From 15d3769bfd85cf9bc250f78fcbe10485e3fab031 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 19 Oct 2023 13:58:57 -0400 Subject: [PATCH 26/42] Change /billing -> /limits in backend tests --- backend/test/test_org.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/test/test_org.py b/backend/test/test_org.py index 00f5210cb4..819263e876 100644 --- a/backend/test/test_org.py +++ b/backend/test/test_org.py @@ -446,7 +446,7 @@ def test_set_org_execution_time_overage(admin_auth_headers, default_org_id): assert value_before != NEW_VALUE r = requests.post( - f"{API_PREFIX}/orgs/{default_org_id}/billing", + f"{API_PREFIX}/orgs/{default_org_id}/limits", headers=admin_auth_headers, json={"crawlExecMinutesAllowedOverage": NEW_VALUE}, ) @@ -460,7 +460,7 @@ def test_set_org_execution_time_overage_non_superadmin( crawler_auth_headers, default_org_id ): r = requests.post( - f"{API_PREFIX}/orgs/{default_org_id}/billing", + f"{API_PREFIX}/orgs/{default_org_id}/limits", headers=crawler_auth_headers, json={"crawlExecMinutesAllowedOverage": 60}, ) From 518f7395bf114953e5493f0a3553b59038e44812 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 23 Oct 2023 21:07:53 -0700 Subject: [PATCH 27/42] add null check --- frontend/src/pages/org/index.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/src/pages/org/index.ts b/frontend/src/pages/org/index.ts index d47f717453..28a6fb4f62 100644 --- a/frontend/src/pages/org/index.ts +++ b/frontend/src/pages/org/index.ts @@ -815,6 +815,7 @@ export class Org extends LiteElement { checkExecutionMinutesQuota() { if ( !this.org || + !this.org.crawlExecSeconds || !this.org.quotas.crawlExecMinutesQuota || this.org.quotas.crawlExecMinutesQuota == 0 ) { From d2689229cff1c836e202a318ced41d22a0db11ab Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 25 Oct 2023 11:25:31 -0700 Subject: [PATCH 28/42] Exec Time Incremental Updates (#1314) This adds an slight refactor to #1284 to ensure execution minutes are updated incrementally while a crawl is running. The execution minutes are updated on the org and on the crawl if: - 60 seconds elapsed from last update - any pod has newly exited - crawl has been canceled (compute to current time) - if month has changed This should ensure that the execution time quota is accurate within 1 minute, and also that crawls running at the end of the month properly count towards old and new month quotas (a bit of an edge case yes, but bound to happen!) other cleanup: - add background tasks to a set to avoid premature garbage collection (see: https://stackoverflow.com/a/74059981) - use dt_now() consistently instead of datetime.utcnow() to store second-rounded dates --- backend/btrixcloud/crawls.py | 13 +- backend/btrixcloud/operator.py | 285 +++++++++++++++++++++------------ backend/btrixcloud/orgs.py | 5 +- 3 files changed, 188 insertions(+), 115 deletions(-) diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index 628aabb9fc..20a7c62f0e 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -528,16 +528,11 @@ async def update_running_crawl_stats(self, crawl_id, stats): query = {"_id": crawl_id, "type": "crawl", "state": "running"} return await self.crawls.find_one_and_update(query, {"$set": {"stats": stats}}) - async def store_exec_time(self, crawl_id, exec_time): - """set exec time, only if not already set""" - query = { - "_id": crawl_id, - "type": "crawl", - "crawlExecSeconds": {"$in": [0, None]}, - } + async def inc_crawl_exec_time(self, crawl_id, exec_time): + """increment exec time""" return await self.crawls.find_one_and_update( - query, - {"$set": {"crawlExecSeconds": exec_time}}, + {"_id": crawl_id, "type": "crawl"}, + {"$inc": {"crawlExecSeconds": exec_time}}, ) async def get_crawl_state(self, crawl_id): diff --git a/backend/btrixcloud/operator.py b/backend/btrixcloud/operator.py index 91d876c3ff..7fa08a4354 100644 --- a/backend/btrixcloud/operator.py +++ b/backend/btrixcloud/operator.py @@ -54,6 +54,9 @@ # time in seconds before a crawl is deemed 'waiting' instead of 'starting' STARTING_TIME_SECS = 60 +# how often to update execution time seconds +EXEC_TIME_UPDATE_SECS = 60 + # ============================================================================ class MCBaseRequest(BaseModel): @@ -216,15 +219,19 @@ class CrawlStatus(BaseModel): # DefaultDict[str, Annotated[PodInfo, Field(default_factory=PodInfo)]] # ] restartTime: Optional[str] - execTime: int = 0 canceled: bool = False - # Track estimated exec time as crawlers are running to cancel - # the crawl quickly if execution minutes hard cap is reached - runningExecTime: int = 0 + # Execution Time -- updated on pod exits and at regular interval + crawlExecTime: int = 0 + + # last exec time update + lastUpdatedTime: str = "" + + # any pods exited + anyCrawlPodNewExit: Optional[bool] = Field(default=False, exclude=True) # don't include in status, use by metacontroller - resync_after: Optional[int] = None + resync_after: Optional[int] = Field(default=None, exclude=True) # ============================================================================ @@ -259,6 +266,10 @@ def __init__( self._has_pod_metrics = False self.compute_crawler_resources() + # to avoid background tasks being garbage collected + # see: https://stackoverflow.com/a/74059981 + self.bg_tasks = set() + def compute_crawler_resources(self): """compute memory / cpu resources for crawlers""" # pylint: disable=invalid-name @@ -299,7 +310,7 @@ async def sync_profile_browsers(self, data: MCSyncData): browserid = spec.get("id") if dt_now() >= expire_time: - asyncio.create_task(self.delete_profile_browser(browserid)) + self.run_task(self.delete_profile_browser(browserid)) return {"status": {}, "children": []} params = {} @@ -351,7 +362,12 @@ async def sync_crawls(self, data: MCSyncData): raise HTTPException(status_code=400, detail="out_of_sync_status") return await self.finalize_response( - crawl_id, status, spec, data.children, params + crawl_id, + uuid.UUID(oid), + status, + spec, + data.children, + params, ) # just in case, finished but not deleted, can only get here if @@ -360,9 +376,14 @@ async def sync_crawls(self, data: MCSyncData): print( f"warn crawl {crawl_id} finished but not deleted, post-finish taking too long?" ) - asyncio.create_task(self.delete_crawl_job(crawl_id)) + self.run_task(self.delete_crawl_job(crawl_id)) return await self.finalize_response( - crawl_id, status, spec, data.children, params + crawl_id, + uuid.UUID(oid), + status, + spec, + data.children, + params, ) try: @@ -400,25 +421,10 @@ async def sync_crawls(self, data: MCSyncData): and await self.org_ops.storage_quota_reached(crawl.oid) ): await self.mark_finished( - crawl.id, - crawl.cid, - crawl.oid, - status, - "skipped_quota_reached", + crawl.id, crawl.cid, crawl.oid, status, "skipped_quota_reached" ) return self._empty_response(status) - # Gracefully stop crawl when execution minutes hard cap is reached to - # ensure that the user still gets their data from the crawl - _, exec_mins_hard_cap_reached = await self.org_ops.execution_mins_quota_reached( - crawl.oid, status.runningExecTime - ) - if exec_mins_hard_cap_reached: - crawl.stopping = True - - # Reset runningExecTime to recalculate below for next sync - status.runningExecTime = 0 - if status.state in ("starting", "waiting_org_limit"): if not await self.can_start_new(crawl, data, status): return self._empty_response(status) @@ -437,7 +443,6 @@ async def sync_crawls(self, data: MCSyncData): status, pods, data.related.get(METRICS, {}), - crawl.oid, ) # auto sizing handled here @@ -445,10 +450,21 @@ async def sync_crawls(self, data: MCSyncData): if status.finished: return await self.finalize_response( - crawl_id, status, spec, data.children, params + crawl_id, + uuid.UUID(oid), + status, + spec, + data.children, + params, ) + + await self.increment_pod_exec_time( + pods, status, crawl.id, crawl.oid, EXEC_TIME_UPDATE_SECS + ) + else: status.scale = crawl.scale + status.lastUpdatedTime = to_k8s_date(dt_now()) children = self._load_redis(params, status, data.children) @@ -471,7 +487,7 @@ async def sync_crawls(self, data: MCSyncData): children.extend(self._load_crawler(params, i, status, data.children)) return { - "status": status.dict(exclude_none=True, exclude={"resync_after": True}), + "status": status.dict(exclude_none=True), "children": children, "resyncAfterSeconds": status.resync_after, } @@ -706,8 +722,6 @@ async def cancel_crawl( await self.mark_for_cancelation(crawl_id) if not status.canceled: - cancel_time = datetime.utcnow() - for name, pod in pods.items(): pstatus = pod["status"] role = pod["metadata"]["labels"]["role"] @@ -720,15 +734,8 @@ async def cancel_crawl( cstatus = pstatus["containerStatuses"][0] - running = cstatus["state"].get("running") - - if running: - await self.inc_exec_time( - name, oid, status, cancel_time, running.get("startedAt") - ) - - await self.handle_terminated_pod( - name, oid, role, status, cstatus["state"].get("terminated") + self.handle_terminated_pod( + name, role, status, cstatus["state"].get("terminated") ) status.canceled = True @@ -761,20 +768,21 @@ async def fail_crawl( print(f"============== POD STATUS: {name} ==============") pprint(pods[name]["status"]) - asyncio.create_task(self.print_pod_logs(pod_names, self.log_failed_crawl_lines)) + self.run_task(self.print_pod_logs(pod_names, self.log_failed_crawl_lines)) return True def _empty_response(self, status): """done response for removing crawl""" return { - "status": status.dict(exclude_none=True, exclude={"resync_after": True}), + "status": status.dict(exclude_none=True), "children": [], } async def finalize_response( self, crawl_id: str, + oid: uuid.UUID, status: CrawlStatus, spec: dict, children: dict, @@ -787,25 +795,19 @@ async def finalize_response( finalized = False - exec_updated = False - pods = children[POD] if redis_pod in pods: # if has other pods, keep redis pod until they are removed if len(pods) > 1: new_children = self._load_redis(params, status, children) + await self.increment_pod_exec_time(pods, status, crawl_id, oid) # keep pvs until pods are removed if new_children: new_children.extend(list(children[PVC].values())) if not children[POD] and not children[PVC]: - # ensure exec time was successfully updated - exec_updated = await self.store_exec_time_in_crawl( - crawl_id, status.execTime - ) - # keep parent until ttl expired, if any if status.finished: ttl = spec.get("ttlSecondsAfterFinished", DEFAULT_TTL) @@ -817,9 +819,9 @@ async def finalize_response( finalized = True return { - "status": status.dict(exclude_none=True, exclude={"resync_after": True}), + "status": status.dict(exclude_none=True), "children": new_children, - "finalized": finalized and exec_updated, + "finalized": finalized, } async def _get_redis(self, redis_url): @@ -838,12 +840,10 @@ async def _get_redis(self, redis_url): return None - async def sync_crawl_state(self, redis_url, crawl, status, pods, metrics, oid): + async def sync_crawl_state(self, redis_url, crawl, status, pods, metrics): """sync crawl state for running crawl""" # check if at least one crawler pod started running - crawler_running, redis_running, done = await self.sync_pod_status( - pods, status, oid - ) + crawler_running, redis_running, done = self.sync_pod_status(pods, status) redis = None try: @@ -852,7 +852,9 @@ async def sync_crawl_state(self, redis_url, crawl, status, pods, metrics, oid): await self.add_used_stats(crawl.id, status.podStatus, redis, metrics) - await self.log_crashes(crawl.id, status.podStatus, redis) + # skip if no newly exited pods + if status.anyCrawlPodNewExit: + await self.log_crashes(crawl.id, status.podStatus, redis) if not crawler_running: if self.should_mark_waiting(status.state, crawl.started): @@ -894,7 +896,7 @@ async def sync_crawl_state(self, redis_url, crawl, status, pods, metrics, oid): crawl.id, allowed_from=["starting", "waiting_capacity"], ): - asyncio.create_task( + self.run_task( self.event_webhook_ops.create_crawl_started_notification( crawl.id, crawl.oid, scheduled=crawl.scheduled ) @@ -929,11 +931,12 @@ async def sync_crawl_state(self, redis_url, crawl, status, pods, metrics, oid): if redis: await redis.close() - async def sync_pod_status(self, pods, status, oid): + def sync_pod_status(self, pods, status): """check status of pods""" crawler_running = False redis_running = False done = True + try: for name, pod in pods.items(): running = False @@ -957,13 +960,10 @@ async def sync_pod_status(self, pods, status, oid): ): running = True - await self.handle_terminated_pod( - name, oid, role, status, cstatus["state"].get("terminated") + self.handle_terminated_pod( + name, role, status, cstatus["state"].get("terminated") ) - if role == "crawler": - self.increment_running_exec_time(cstatus["state"], status) - if role == "crawler": crawler_running = crawler_running or running done = done and phase == "Succeeded" @@ -977,15 +977,7 @@ async def sync_pod_status(self, pods, status, oid): return crawler_running, redis_running, done - def increment_running_exec_time(self, container_state, status): - """Increment runningExecTime in crawl status for running crawler pod""" - running = container_state.get("running") - if running: - start_time = from_k8s_date(running.get("startedAt")) - running_exec_time = int((datetime.now() - start_time).total_seconds()) - status.runningExecTime += running_exec_time - - async def handle_terminated_pod(self, name, oid, role, status, terminated): + def handle_terminated_pod(self, name, role, status, terminated): """handle terminated pod state""" if not terminated: return @@ -999,10 +991,8 @@ async def handle_terminated_pod(self, name, oid, role, status, terminated): pod_status.isNewExit = pod_status.exitTime != exit_time if pod_status.isNewExit and role == "crawler": - await self.inc_exec_time( - name, oid, status, exit_time, terminated.get("startedAt") - ) pod_status.exitTime = exit_time + status.anyCrawlPodNewExit = True # detect reason exit_code = terminated.get("exitCode") @@ -1016,6 +1006,105 @@ async def handle_terminated_pod(self, name, oid, role, status, terminated): pod_status.exitCode = exit_code + async def increment_pod_exec_time( + self, + pods: dict[str, dict], + status: CrawlStatus, + crawl_id: str, + oid: uuid.UUID, + min_duration=0, + ) -> None: + """inc exec time tracking""" + now = dt_now() + + if not status.lastUpdatedTime: + status.lastUpdatedTime = to_k8s_date(now) + return + + update_start_time = from_k8s_date(status.lastUpdatedTime) + + reason = None + update_duration = (now - update_start_time).total_seconds() + + if status.anyCrawlPodNewExit: + reason = "new pod exit" + + elif status.canceled: + reason = "crawl canceled" + + elif now.month != update_start_time.month: + reason = "month change" + + elif update_duration >= min_duration: + reason = "duration reached" if min_duration else "finalizing" + + if not reason: + return + + exec_time = 0 + print( + f"Exec Time Update: {reason}: {now} - {update_start_time} = {update_duration}" + ) + + for name, pod in pods.items(): + pstatus = pod["status"] + role = pod["metadata"]["labels"]["role"] + + if role != "crawler": + continue + + if "containerStatuses" not in pstatus: + continue + + cstate = pstatus["containerStatuses"][0]["state"] + + end_time = None + start_time = None + pod_state = "" + + if "running" in cstate: + pod_state = "running" + state = cstate["running"] + start_time = from_k8s_date(state.get("startedAt")) + if update_start_time and update_start_time > start_time: + start_time = update_start_time + + end_time = now + elif "terminated" in cstate: + pod_state = "terminated" + state = cstate["terminated"] + start_time = from_k8s_date(state.get("startedAt")) + end_time = from_k8s_date(state.get("finishedAt")) + if update_start_time and update_start_time > start_time: + start_time = update_start_time + + # already counted + if update_start_time and end_time < update_start_time: + print( + f" - {name}: {pod_state}: skipping already counted, " + + f"{end_time} < {start_time}" + ) + continue + + if end_time and start_time: + duration = int((end_time - start_time).total_seconds()) + print( + f" - {name}: {pod_state}: {end_time} - {start_time} = {duration}" + ) + exec_time += duration + + if exec_time: + await self.crawl_ops.inc_crawl_exec_time(crawl_id, exec_time) + await self.org_ops.inc_org_time_stats(oid, exec_time, True) + status.crawlExecTime += exec_time + + print( + f" Exec Time Total: {status.crawlExecTime}, Incremented By: {exec_time}", + flush=True, + ) + + status.lastUpdatedTime = to_k8s_date(now) + def should_mark_waiting(self, state, started): """Should the crawl be marked as waiting for capacity?""" if state in RUNNING_STATES: @@ -1023,7 +1112,7 @@ def should_mark_waiting(self, state, started): if state == "starting": started = from_k8s_date(started) - return (datetime.utcnow() - started).total_seconds() > STARTING_TIME_SECS + return (dt_now() - started).total_seconds() > STARTING_TIME_SECS return False @@ -1085,7 +1174,7 @@ async def log_crashes(self, crawl_id, pod_status, redis): def get_log_line(self, message, details): """get crawler error line for logging""" err = { - "timestamp": datetime.utcnow().isoformat(), + "timestamp": dt_now().isoformat(), "logLevel": "error", "context": "k8s", "message": message, @@ -1128,7 +1217,7 @@ def is_crawl_stopping(self, crawl, size): return True # check crawl expiry - if crawl.expire_time and datetime.utcnow() > crawl.expire_time: + if crawl.expire_time and dt_now() > crawl.expire_time: print(f"Graceful Stop: Job duration expired at {crawl.expire_time}") return True @@ -1178,6 +1267,16 @@ async def update_crawl_state(self, redis, crawl, status, pods, done): status.stopping = self.is_crawl_stopping(crawl, status.size) + # check exec time quotas and stop if reached limit + if not status.stopping: + ( + _, + exec_mins_hard_cap_reached, + ) = await self.org_ops.execution_mins_quota_reached(crawl.oid) + if exec_mins_hard_cap_reached: + status.stopping = True + + # mark crawl as stopping if status.stopping: await redis.set(f"{crawl.id}:stopping", "1") # backwards compatibility with older crawler @@ -1285,7 +1384,7 @@ async def mark_finished( if crawl and state in SUCCESSFUL_STATES: await self.inc_crawl_complete_stats(crawl, finished) - asyncio.create_task( + self.run_task( self.do_crawl_finished_tasks( crawl_id, cid, oid, status.filesAddedSize, state ) @@ -1319,30 +1418,6 @@ async def do_crawl_finished_tasks( # finally, delete job await self.delete_crawl_job(crawl_id) - async def inc_exec_time(self, name, oid, status, finished_at, started_at): - """increment execTime on pod status and in org""" - end_time = ( - from_k8s_date(finished_at) - if not isinstance(finished_at, datetime) - else finished_at - ) - start_time = from_k8s_date(started_at) - exec_time = int((end_time - start_time).total_seconds()) - await self.org_ops.inc_org_time_stats(oid, exec_time, True) - status.execTime += exec_time - print(f"{name} exec time: {exec_time}") - return exec_time - - async def store_exec_time_in_crawl(self, crawl_id: str, exec_time: int): - """store crawlExecSeconds in crawl (if not already set)""" - try: - await self.crawl_ops.store_exec_time(crawl_id, exec_time) - return True - # pylint: disable=broad-except - except Exception as exc: - print(exc, flush=True) - return False - async def inc_crawl_complete_stats(self, crawl, finished): """Increment Crawl Stats""" @@ -1491,6 +1566,12 @@ async def sync_cronjob_crawl(self, data: MCDecoratorSyncData): "attachments": attachments, } + def run_task(self, func): + """add bg tasks to set to avoid premature garbage collection""" + task = asyncio.create_task(func) + self.bg_tasks.add(task) + task.add_done_callback(self.bg_tasks.discard) + # ============================================================================ def init_operator_api( diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 6b3e6c4a2d..71e25b2cd6 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -336,9 +336,7 @@ async def get_this_month_crawl_exec_seconds(self, oid: uuid.UUID) -> int: except KeyError: return 0 - async def execution_mins_quota_reached( - self, oid: uuid.UUID, running_exec_seconds: int = 0 - ) -> Tuple[bool, bool]: + async def execution_mins_quota_reached(self, oid: uuid.UUID) -> Tuple[bool, bool]: """Return bools for if execution minutes quota and hard cap are reached.""" quota = await self.get_org_execution_mins_quota(oid) if not quota: @@ -351,7 +349,6 @@ async def execution_mins_quota_reached( hard_cap_quota = quota + hard_cap_additional_mins monthly_exec_seconds = await self.get_this_month_crawl_exec_seconds(oid) - monthly_exec_seconds = monthly_exec_seconds + running_exec_seconds monthly_exec_minutes = math.floor(monthly_exec_seconds / 60) if monthly_exec_minutes >= quota: From 928f913f658568c71dc156ca825b83eb3b726c51 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 26 Oct 2023 09:10:36 -0700 Subject: [PATCH 29/42] Issue 1261 execution time exceeded simplify (#1318) - Remove user-set overage - Just have a 'single execution minutes' quota, rename hard cap to be the quota (for now) - have backend compute storage and exec minutes quotas, report to frontend (will eventually involve additional variables) --- backend/btrixcloud/basecrawls.py | 7 +- backend/btrixcloud/crawlconfigs.py | 25 ++---- backend/btrixcloud/models.py | 18 ++-- backend/btrixcloud/operator.py | 6 +- backend/btrixcloud/orgs.py | 65 ++++---------- backend/test/test_org.py | 30 ------- .../test_execution_minutes_quota.py | 69 +------------- frontend/src/components/orgs-list.ts | 2 +- frontend/src/pages/org/index.ts | 83 ++--------------- frontend/src/pages/org/settings.ts | 90 +------------------ frontend/src/pages/org/workflow-detail.ts | 18 ++-- frontend/src/pages/org/workflows-list.ts | 6 +- frontend/src/routes.ts | 2 +- frontend/src/types/org.ts | 3 +- frontend/src/utils/LiteElement.ts | 10 --- 15 files changed, 60 insertions(+), 374 deletions(-) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index 1389bd9b0e..405bf3b5a9 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -136,10 +136,9 @@ async def get_crawl( crawl.config.seeds = None crawl.storageQuotaReached = await self.orgs.storage_quota_reached(crawl.oid) - ( - crawl.executionMinutesQuotaReached, - crawl.executionMinutesHardCapReached, - ) = await self.orgs.execution_mins_quota_reached(crawl.oid) + crawl.execMinutesQuotaReached = await self.orgs.exec_mins_quota_reached( + crawl.oid + ) return crawl diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index 49fea6a74a..13f7aa071f 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -173,18 +173,15 @@ async def add_crawl_config( run_now = config.runNow storage_quota_reached = await self.org_ops.storage_quota_reached(org.id) - ( - exec_mins_quota_reached, - exec_mins_hard_cap_reached, - ) = await self.org_ops.execution_mins_quota_reached(org.id) + exec_mins_quota_reached = await self.org_ops.exec_mins_quota_reached(org.id) if storage_quota_reached: run_now = False print(f"Storage quota exceeded for org {org.id}", flush=True) - if exec_mins_hard_cap_reached: + if exec_mins_quota_reached: run_now = False - print(f"Monthly execution minute hard cap hit for org {org.id}", flush=True) + print(f"Execution minutes quota exceeded for org {org.id}", flush=True) crawl_id = await self.crawl_manager.add_crawl_config( crawlconfig=crawlconfig, @@ -202,7 +199,6 @@ async def add_crawl_config( crawl_id, storage_quota_reached, exec_mins_quota_reached, - exec_mins_hard_cap_reached, ) async def add_new_crawl( @@ -769,13 +765,8 @@ async def run_now(self, cid: str, org: Organization, user: User): if await self.org_ops.storage_quota_reached(org.id): raise HTTPException(status_code=403, detail="storage_quota_reached") - _, exec_mins_hard_cap_reached = await self.org_ops.execution_mins_quota_reached( - org.id - ) - if exec_mins_hard_cap_reached: - raise HTTPException( - status_code=403, detail="execution_minutes_hard_cap_reached" - ) + if await self.org_ops.exec_mins_quota_reached(org.id): + raise HTTPException(status_code=403, detail="exec_minutes_quota_reached") try: crawl_id = await self.crawl_manager.create_crawl_job( @@ -1015,16 +1006,14 @@ async def add_crawl_config( cid, new_job_name, storage_quota_reached, - execution_mins_quota_reached, - execution_mins_hard_cap_reached, + exec_mins_quota_reached, ) = await ops.add_crawl_config(config, org, user) return { "added": True, "id": str(cid), "run_now_job": new_job_name, "storageQuotaReached": storage_quota_reached, - "executionMinutesQuotaReached": execution_mins_quota_reached, - "executionMinutesHardCapReached": execution_mins_hard_cap_reached, + "execMinutesQuotaReached": exec_mins_quota_reached, } @router.patch("/{cid}", dependencies=[Depends(org_crawl_dep)]) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 6a779c8b48..f389f0af8c 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -496,8 +496,7 @@ class CrawlOut(BaseMongoModel): cid_rev: Optional[int] storageQuotaReached: Optional[bool] - executionMinutesQuotaReached: Optional[bool] - executionMinutesHardCapReached: Optional[bool] + execMinutesQuotaReached: Optional[bool] # ============================================================================ @@ -677,13 +676,6 @@ class RenameOrg(BaseModel): slug: Optional[str] = None -# ============================================================================ -class OrgUpdateExecMinsOverage(BaseModel): - """Update allowed exec mins overage""" - - crawlExecMinutesAllowedOverage: int - - # ============================================================================ class DefaultStorage(BaseModel): """Storage reference""" @@ -714,7 +706,7 @@ class OrgQuotas(BaseModel): maxConcurrentCrawls: Optional[int] = 0 maxPagesPerCrawl: Optional[int] = 0 storageQuota: Optional[int] = 0 - crawlExecMinutesQuota: Optional[int] = 0 + maxCrawlMinutesPerMonth: Optional[int] = 0 # ============================================================================ @@ -747,7 +739,9 @@ class OrgOut(BaseMongoModel): webhookUrls: Optional[OrgWebhookUrls] = OrgWebhookUrls() quotas: Optional[OrgQuotas] = OrgQuotas() - crawlExecMinutesAllowedOverage: int = 0 + + storageQuotaReached: Optional[bool] + execMinutesQuotaReached: Optional[bool] # ============================================================================ @@ -775,8 +769,6 @@ class Organization(BaseMongoModel): quotas: Optional[OrgQuotas] = OrgQuotas() - crawlExecMinutesAllowedOverage: Optional[int] = 0 - webhookUrls: Optional[OrgWebhookUrls] = OrgWebhookUrls() origin: Optional[AnyHttpUrl] = None diff --git a/backend/btrixcloud/operator.py b/backend/btrixcloud/operator.py index 7fa08a4354..7848798864 100644 --- a/backend/btrixcloud/operator.py +++ b/backend/btrixcloud/operator.py @@ -1269,11 +1269,7 @@ async def update_crawl_state(self, redis, crawl, status, pods, done): # check exec time quotas and stop if reached limit if not status.stopping: - ( - _, - exec_mins_hard_cap_reached, - ) = await self.org_ops.execution_mins_quota_reached(crawl.oid) - if exec_mins_hard_cap_reached: + if await self.org_ops.exec_mins_quota_reached(crawl.oid): status.stopping = True # mark crawl as stopping diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 71e25b2cd6..250b413569 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -8,7 +8,7 @@ import uuid from datetime import datetime -from typing import Union, Optional, Tuple +from typing import Union, Optional from pymongo import ReturnDocument from pymongo.errors import AutoReconnect, DuplicateKeyError @@ -22,7 +22,6 @@ OrgQuotas, OrgMetrics, OrgWebhookUrls, - OrgUpdateExecMinsOverage, RenameOrg, UpdateRole, RemovePendingInvite, @@ -229,15 +228,6 @@ async def update_quotas(self, org: Organization, quotas: OrgQuotas): }, ) - async def update_execution_mins_overage( - self, org: Organization, allowed_overage: int = 0 - ): - """update allowed execution minutes overage""" - return await self.orgs.find_one_and_update( - {"_id": org.id}, - {"$set": {"crawlExecMinutesAllowedOverage": allowed_overage}}, - ) - async def update_event_webhook_urls(self, org: Organization, urls: OrgWebhookUrls): """Update organization event webhook URLs""" return await self.orgs.find_one_and_update( @@ -336,27 +326,22 @@ async def get_this_month_crawl_exec_seconds(self, oid: uuid.UUID) -> int: except KeyError: return 0 - async def execution_mins_quota_reached(self, oid: uuid.UUID) -> Tuple[bool, bool]: - """Return bools for if execution minutes quota and hard cap are reached.""" - quota = await self.get_org_execution_mins_quota(oid) - if not quota: - return False, False + async def exec_mins_quota_reached(self, oid: uuid.UUID) -> bool: + """Return bools for if execution minutes quota""" + quota = await self.get_org_exec_mins_monthly_quota(oid) quota_reached = False - hard_cap_reached = False - hard_cap_additional_mins = await self.get_org_execution_mins_hard_cap(oid) - hard_cap_quota = quota + hard_cap_additional_mins + if quota: + monthly_exec_seconds = await self.get_this_month_crawl_exec_seconds(oid) + monthly_exec_minutes = math.floor(monthly_exec_seconds / 60) - monthly_exec_seconds = await self.get_this_month_crawl_exec_seconds(oid) - monthly_exec_minutes = math.floor(monthly_exec_seconds / 60) + if monthly_exec_minutes >= quota: + quota_reached = True - if monthly_exec_minutes >= quota: - quota_reached = True - if monthly_exec_minutes >= hard_cap_quota: - hard_cap_reached = True + # add additional quotas here - return quota_reached, hard_cap_reached + return quota_reached async def get_org_storage_quota(self, oid: uuid.UUID) -> int: """return max allowed concurrent crawls, if any""" @@ -366,20 +351,12 @@ async def get_org_storage_quota(self, oid: uuid.UUID) -> int: return org.quotas.storageQuota return 0 - async def get_org_execution_mins_quota(self, oid: uuid.UUID) -> int: + async def get_org_exec_mins_monthly_quota(self, oid: uuid.UUID) -> int: """return max allowed execution mins per month, if any""" org = await self.orgs.find_one({"_id": oid}) if org: org = Organization.from_dict(org) - return org.quotas.crawlExecMinutesQuota - return 0 - - async def get_org_execution_mins_hard_cap(self, oid: uuid.UUID) -> int: - """return additional minutes before exec time hard cap, if any""" - org = await self.orgs.find_one({"_id": oid}) - if org: - org = Organization.from_dict(org) - return org.crawlExecMinutesAllowedOverage + return org.quotas.maxCrawlMinutesPerMonth return 0 async def set_origin(self, org: Organization, request: Request): @@ -589,7 +566,10 @@ async def create_org( async def get_org( org: Organization = Depends(org_dep), user: User = Depends(user_dep) ): - return await org.serialize_for_user(user, user_manager) + org_out = await org.serialize_for_user(user, user_manager) + org_out.storageQuotaReached = await ops.storage_quota_reached(org.id) + org_out.execMinutesQuotaReached = await ops.exec_mins_quota_reached(org.id) + return org_out @router.post("/rename", tags=["organizations"]) async def rename_org( @@ -623,17 +603,6 @@ async def update_quotas( return {"updated": True} - @router.post("/limits", tags=["organizations"]) - async def update_org_limits_settings( - settings: OrgUpdateExecMinsOverage, - org: Organization = Depends(org_owner_dep), - ): - await ops.update_execution_mins_overage( - org, settings.crawlExecMinutesAllowedOverage - ) - - return {"updated": True} - @router.post("/event-webhook-urls", tags=["organizations"]) async def update_event_webhook_urls( urls: OrgWebhookUrls, diff --git a/backend/test/test_org.py b/backend/test/test_org.py index 819263e876..fa6ce9cb63 100644 --- a/backend/test/test_org.py +++ b/backend/test/test_org.py @@ -435,33 +435,3 @@ def test_get_org_slug_lookup_non_superadmin(crawler_auth_headers): r = requests.get(f"{API_PREFIX}/orgs/slug-lookup", headers=crawler_auth_headers) assert r.status_code == 403 assert r.json()["detail"] == "Not Allowed" - - -def test_set_org_execution_time_overage(admin_auth_headers, default_org_id): - NEW_VALUE = 30 - r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers) - assert r.status_code == 200 - data = r.json() - value_before = data.get("crawlExecMinutesAllowedOverage", 0) - assert value_before != NEW_VALUE - - r = requests.post( - f"{API_PREFIX}/orgs/{default_org_id}/limits", - headers=admin_auth_headers, - json={"crawlExecMinutesAllowedOverage": NEW_VALUE}, - ) - assert r.status_code == 200 - - r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers) - assert r.json()["crawlExecMinutesAllowedOverage"] == NEW_VALUE - - -def test_set_org_execution_time_overage_non_superadmin( - crawler_auth_headers, default_org_id -): - r = requests.post( - f"{API_PREFIX}/orgs/{default_org_id}/limits", - headers=crawler_auth_headers, - json={"crawlExecMinutesAllowedOverage": 60}, - ) - assert r.status_code == 403 diff --git a/backend/test_nightly/test_execution_minutes_quota.py b/backend/test_nightly/test_execution_minutes_quota.py index d6b863e5e4..f6479f3b32 100644 --- a/backend/test_nightly/test_execution_minutes_quota.py +++ b/backend/test_nightly/test_execution_minutes_quota.py @@ -18,7 +18,7 @@ def test_set_execution_mins_quota(org_with_quotas, admin_auth_headers): r = requests.post( f"{API_PREFIX}/orgs/{org_with_quotas}/quotas", headers=admin_auth_headers, - json={"crawlExecMinutesQuota": EXEC_MINS_QUOTA}, + json={"maxCrawlMinutesPerMonth": EXEC_MINS_QUOTA}, ) data = r.json() assert data.get("updated") == True @@ -69,72 +69,7 @@ def test_crawl_stopped_when_quota_reached(org_with_quotas, admin_auth_headers): headers=admin_auth_headers, ) assert r.status_code == 403 - assert r.json()["detail"] == "execution_minutes_hard_cap_reached" - - -def test_crawl_stopped_when_card_cap_reached(org_with_quotas, admin_auth_headers): - # Set allowed overage on org - r = requests.post( - f"{API_PREFIX}/orgs/{org_with_quotas}/limits", - headers=admin_auth_headers, - json={"crawlExecMinutesAllowedOverage": EXEC_MINS_ALLOWED_OVERAGE}, - ) - assert r.status_code == 200 - - time.sleep(10) - - # Run new crawl from config - r = requests.post( - f"{API_PREFIX}/orgs/{org_with_quotas}/crawlconfigs/{config_id}/run", - headers=admin_auth_headers, - ) - assert r.status_code == 200 - crawl_id = r.json()["started"] - assert crawl_id - - time.sleep(1) - - while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in ( - "starting", - "waiting_capacity", - ): - time.sleep(2) - - while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in ( - "running", - "generate-wacz", - "uploading-wacz", - "pending-wait", - ): - time.sleep(2) - - # Ensure that crawl was stopped when hard cap reached - assert ( - get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) - == "partial_complete" - ) - - time.sleep(5) - - # Ensure crawl execution seconds went over hard cap (stopping takes a while) - r = requests.get( - f"{API_PREFIX}/orgs/{org_with_quotas}", - headers=admin_auth_headers, - ) - data = r.json() - execution_seconds = data["crawlExecSeconds"] - yymm = datetime.utcnow().strftime("%Y-%m") - assert math.floor(execution_seconds[yymm] / 60) >= EXEC_MINS_HARD_CAP - - time.sleep(5) - - # Ensure we can't start another crawl when over the hard cap - r = requests.post( - f"{API_PREFIX}/orgs/{org_with_quotas}/crawlconfigs/{config_id}/run", - headers=admin_auth_headers, - ) - assert r.status_code == 403 - assert r.json()["detail"] == "execution_minutes_hard_cap_reached" + assert r.json()["detail"] == "exec_minutes_quota_reached" def run_crawl(org_id, headers): diff --git a/frontend/src/components/orgs-list.ts b/frontend/src/components/orgs-list.ts index 8d7971e6d9..42ac5c210f 100644 --- a/frontend/src/components/orgs-list.ts +++ b/frontend/src/components/orgs-list.ts @@ -61,7 +61,7 @@ export class OrgsList extends LiteElement { label = msg("Org Storage Quota (GB)"); value = Math.floor(value / 1e9); break; - case "crawlExecMinutesQuota": + case "maxCrawlMinutesPerMonth": label = msg("Org Monthly Execution Minutes Quota"); break; default: diff --git a/frontend/src/pages/org/index.ts b/frontend/src/pages/org/index.ts index 28a6fb4f62..62572990b8 100644 --- a/frontend/src/pages/org/index.ts +++ b/frontend/src/pages/org/index.ts @@ -59,7 +59,6 @@ type Params = { collectionTab?: string; itemType?: Crawl["type"]; jobType?: JobType; - settingsTab?: string; new?: ResourceName; }; const defaultTab = "home"; @@ -104,9 +103,6 @@ export class Org extends LiteElement { @state() private showExecutionMinutesQuotaAlert = false; - @state() - private orgExecutionMinutesHardCapReached = false; - @state() private openDialogName?: ResourceName; @@ -502,16 +498,14 @@ export class Org extends LiteElement { .authState=${this.authState!} orgId=${this.orgId!} ?orgStorageQuotaReached=${this.orgStorageQuotaReached} - ?orgExecutionMinutesHardCapReached=${this - .orgExecutionMinutesHardCapReached} + ?orgExecutionMinutesQuotaReached=${this + .orgExecutionMinutesQuotaReached} workflowId=${workflowId} openDialogName=${this.viewStateData?.dialog} ?isEditing=${isEditing} ?isCrawler=${this.isCrawler} @storage-quota-update=${this.onStorageQuotaUpdate} @execution-minutes-quota-update=${this.onExecutionMinutesQuotaUpdate} - @execution-minutes-hard-cap-update=${this - .onExecutionMinutesHardCapUpdate} > `; } @@ -529,8 +523,6 @@ export class Org extends LiteElement { jobType=${ifDefined(this.params.jobType)} @storage-quota-update=${this.onStorageQuotaUpdate} @execution-minutes-quota-update=${this.onExecutionMinutesQuotaUpdate} - @execution-minutes-hard-cap-update=${this - .onExecutionMinutesHardCapUpdate} @select-new-dialog=${this.onSelectNewDialog} >`; } @@ -539,13 +531,11 @@ export class Org extends LiteElement { .authState=${this.authState!} orgId=${this.orgId!} ?orgStorageQuotaReached=${this.orgStorageQuotaReached} - ?orgExecutionMinutesHardCapReached=${this - .orgExecutionMinutesHardCapReached} + ?orgExecutionMinutesQuotaReached=${this.orgExecutionMinutesQuotaReached} userId=${this.userInfo!.id} ?isCrawler=${this.isCrawler} @storage-quota-update=${this.onStorageQuotaUpdate} @execution-minutes-quota-update=${this.onExecutionMinutesQuotaUpdate} - @execution-minutes-hard-cap-update=${this.onExecutionMinutesHardCapUpdate} @select-new-dialog=${this.onSelectNewDialog} >`; } @@ -608,7 +598,7 @@ export class Org extends LiteElement { private renderOrgSettings() { if (!this.userInfo || !this.org) return; - const activePanel = this.params.settingsTab || "information"; + const activePanel = "information"; const isAddingMember = this.params.hasOwnProperty("invite"); return html` this.org.quotas.storageQuota) { - this.orgStorageQuotaReached = true; - } else { - this.orgStorageQuotaReached = false; - } - - if (this.orgStorageQuotaReached) { - this.showStorageQuotaAlert = true; - } + this.orgStorageQuotaReached = !!this.org?.storageQuotaReached; + this.showStorageQuotaAlert = this.orgStorageQuotaReached; } checkExecutionMinutesQuota() { - if ( - !this.org || - !this.org.crawlExecSeconds || - !this.org.quotas.crawlExecMinutesQuota || - this.org.quotas.crawlExecMinutesQuota == 0 - ) { - this.orgExecutionMinutesQuotaReached = false; - return; - } - - const monthKey = new Date().toISOString().slice(0, 7); - const monthlyExecutionSeconds = this.org.crawlExecSeconds[monthKey]; - const quota = this.org.quotas.crawlExecMinutesQuota; - const hardCap = quota + (this.org.crawlExecMinutesAllowedOverage || 0); - - if (monthlyExecutionSeconds) { - const monthlyExecutionMinutes = Math.floor(monthlyExecutionSeconds / 60); - - if (monthlyExecutionMinutes >= quota) { - this.orgExecutionMinutesQuotaReached = true; - } else { - this.orgExecutionMinutesQuotaReached = false; - } - - if (monthlyExecutionMinutes >= hardCap) { - this.orgExecutionMinutesHardCapReached = true; - } else { - this.orgExecutionMinutesHardCapReached = false; - } - } else { - this.orgExecutionMinutesQuotaReached = false; - this.orgExecutionMinutesHardCapReached = false; - } - - if (this.orgExecutionMinutesQuotaReached) { - this.showExecutionMinutesQuotaAlert = true; - } + this.orgExecutionMinutesQuotaReached = !!this.org?.execMinutesQuotaReached; + this.showExecutionMinutesQuotaAlert = this.orgExecutionMinutesQuotaReached; } } diff --git a/frontend/src/pages/org/settings.ts b/frontend/src/pages/org/settings.ts index 90872f2f3a..e63ebb9264 100644 --- a/frontend/src/pages/org/settings.ts +++ b/frontend/src/pages/org/settings.ts @@ -14,7 +14,7 @@ import type { CurrentUser } from "../../types/user"; import type { APIPaginatedList } from "../../types/api"; import { maxLengthValidator } from "../../utils/form"; -type Tab = "information" | "members" | "limits"; +type Tab = "information" | "members"; type User = { email: string; role: number; @@ -78,9 +78,6 @@ export class OrgSettings extends LiteElement { @property({ type: Boolean }) isSavingOrgName = false; - @property({ type: Boolean }) - isSavingOrgLimits = false; - @state() pendingInvites: Invite[] = []; @@ -97,7 +94,6 @@ export class OrgSettings extends LiteElement { return { information: msg("General"), members: msg("Members"), - limits: msg("Limits"), }; } @@ -146,7 +142,6 @@ export class OrgSettings extends LiteElement { ${this.renderTab("information", "settings")} ${this.renderTab("members", "settings/members")} - ${this.renderTab("limits", "settings/limits")} ${this.renderInformation()}${this.renderMembers()} - ${this.renderLimits()} `; } @@ -328,48 +322,6 @@ export class OrgSettings extends LiteElement { `; } - private renderLimits() { - return html`
- -
-
- - ${msg("minutes")} - -
-
-
- -
-
- ${msg( - "Allowed overage minutes beyond the organization's monthly quota. Once reached, crawl workflows will not run." - )} -
-
-
-
- ${msg("Save Changes")} -
- -
`; - } - private renderUserRole({ role }: User) { if (isAdmin(role)) return msg("Admin"); if (isCrawler(role)) return msg("Crawler"); @@ -600,46 +552,6 @@ export class OrgSettings extends LiteElement { this.isSubmittingInvite = false; } - private async onOrgLimitsSubmit(e: SubmitEvent) { - e.preventDefault(); - - const formEl = e.target as HTMLFormElement; - if (!(await this.checkFormValidity(formEl))) return; - - this.isSavingOrgLimits = true; - - const { execMinutesOverage } = serialize(formEl); - - try { - const data = await this.apiFetch( - `/orgs/${this.orgId}/limits`, - this.authState!, - { - method: "POST", - body: JSON.stringify({ - crawlExecMinutesAllowedOverage: execMinutesOverage, - }), - } - ); - - this.notify({ - message: msg(str`Successfully updated org limits settings.`), - variant: "success", - icon: "check2-circle", - }); - } catch (e: any) { - this.notify({ - message: e.isApiError - ? e.message - : msg("Sorry, couldn't update org at this time."), - variant: "danger", - icon: "exclamation-octagon", - }); - } - - this.isSavingOrgLimits = false; - } - private async removeInvite(invite: Invite) { try { await this.apiFetch( diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index abc2dd4462..d0c2a0236e 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -50,7 +50,7 @@ export class WorkflowDetail extends LiteElement { orgStorageQuotaReached = false; @property({ type: Boolean }) - orgExecutionMinutesHardCapReached = false; + orgExecutionMinutesQuotaReached = false; @property({ type: String }) workflowId!: string; @@ -585,14 +585,14 @@ export class WorkflowDetail extends LiteElement { "Org Storage Full or Monthly Execution Minutes Reached" )} ?disabled=${!this.orgStorageQuotaReached && - !this.orgExecutionMinutesHardCapReached} + !this.orgExecutionMinutesQuotaReached} > this.runNow()} > @@ -633,7 +633,7 @@ export class WorkflowDetail extends LiteElement { this.runNow()} > @@ -1035,12 +1035,12 @@ export class WorkflowDetail extends LiteElement { "Org Storage Full or Monthly Execution Minutes Reached" )} ?disabled=${!this.orgStorageQuotaReached && - !this.orgExecutionMinutesHardCapReached} + !this.orgExecutionMinutesQuotaReached} > this.runNow()} > @@ -1123,13 +1123,13 @@ export class WorkflowDetail extends LiteElement { "Org Storage Full or Monthly Execution Minutes Reached" )} ?disabled=${!this.orgStorageQuotaReached && - !this.orgExecutionMinutesHardCapReached} + !this.orgExecutionMinutesQuotaReached} > this.runNow()} > @@ -1610,7 +1610,7 @@ export class WorkflowDetail extends LiteElement { if (e.isApiError && e.statusCode === 403) { if (e.details === "storage_quota_reached") { message = msg("Your org does not have enough storage to run crawls."); - } else if (e.details === "execution_minutes_hard_cap_reached") { + } else if (e.details === "exec_minutes_quota_reached") { message = msg( "Your org has used all of its execution minutes for this month." ); diff --git a/frontend/src/pages/org/workflows-list.ts b/frontend/src/pages/org/workflows-list.ts index 6a9652deb6..14b3db710e 100644 --- a/frontend/src/pages/org/workflows-list.ts +++ b/frontend/src/pages/org/workflows-list.ts @@ -75,7 +75,7 @@ export class WorkflowsList extends LiteElement { orgStorageQuotaReached = false; @property({ type: Boolean }) - orgExecutionMinutesHardCapReached = false; + orgExecutionMinutesQuotaReached = false; @property({ type: String }) userId!: string; @@ -444,7 +444,7 @@ export class WorkflowsList extends LiteElement { this.runNow(workflow)} > @@ -802,7 +802,7 @@ export class WorkflowsList extends LiteElement { if (e.isApiError && e.statusCode === 403) { if (e.details === "storage_quota_reached") { message = msg("Your org does not have enough storage to run crawls."); - } else if (e.details === "execution_minutes_hard_cap_reached") { + } else if (e.details === "exec_minutes_quota_reached") { message = msg( "Your org has used all of its execution minutes for this month." ); diff --git a/frontend/src/routes.ts b/frontend/src/routes.ts index 18f7bd4745..0baf97ed5b 100644 --- a/frontend/src/routes.ts +++ b/frontend/src/routes.ts @@ -17,7 +17,7 @@ export const ROUTES = { "(/items(/:itemType(/:itemId)))", "(/collections(/new)(/view/:collectionId(/:collectionTab))(/edit/:collectionId))", "(/browser-profiles(/profile(/browser/:browserId)(/:browserProfileId)))", - "(/settings(/:settingsTab))", + "(/settings(/members))", ].join(""), users: "/users", usersInvite: "/users/invite", diff --git a/frontend/src/types/org.ts b/frontend/src/types/org.ts index 840cfc13b8..586ffb23cc 100644 --- a/frontend/src/types/org.ts +++ b/frontend/src/types/org.ts @@ -22,7 +22,8 @@ export type OrgData = { // Keyed by {4-digit year}-{2-digit month} [key: string]: number; } | null; - crawlExecMinutesAllowedOverage: number; + storageQuotaReached?: boolean; + execMinutesQuotaReached?: boolean; users?: { [id: string]: { role: (typeof AccessCode)[UserRole]; diff --git a/frontend/src/utils/LiteElement.ts b/frontend/src/utils/LiteElement.ts index 54f07be1f5..e60285de5e 100644 --- a/frontend/src/utils/LiteElement.ts +++ b/frontend/src/utils/LiteElement.ts @@ -184,16 +184,6 @@ export default class LiteElement extends LitElement { errorMessage = msg("Storage quota reached"); break; } - if (errorDetail === "execution_minutes_hard_cap_reached") { - this.dispatchEvent( - new CustomEvent("execution-minutes-hard-cap-update", { - detail: { reached: true }, - bubbles: true, - }) - ); - errorMessage = msg("Monthly execution minutes hard cap reached"); - break; - } if (errorDetail === "execution_minutes_quota_reached") { this.dispatchEvent( new CustomEvent("execution-minutes-quota-update", { From 53d4a7707e00024e51be1f26abd6a1e37917759e Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 26 Oct 2023 12:15:26 -0400 Subject: [PATCH 30/42] Rename maxCrawlMinutesPerMonth to maxExecMinutesPerMonth --- backend/btrixcloud/models.py | 2 +- backend/btrixcloud/orgs.py | 2 +- backend/test_nightly/test_execution_minutes_quota.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index f389f0af8c..82fd14d9d3 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -706,7 +706,7 @@ class OrgQuotas(BaseModel): maxConcurrentCrawls: Optional[int] = 0 maxPagesPerCrawl: Optional[int] = 0 storageQuota: Optional[int] = 0 - maxCrawlMinutesPerMonth: Optional[int] = 0 + maxExecMinutesPerMonth: Optional[int] = 0 # ============================================================================ diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 250b413569..8e700c173b 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -356,7 +356,7 @@ async def get_org_exec_mins_monthly_quota(self, oid: uuid.UUID) -> int: org = await self.orgs.find_one({"_id": oid}) if org: org = Organization.from_dict(org) - return org.quotas.maxCrawlMinutesPerMonth + return org.quotas.maxExecMinutesPerMonth return 0 async def set_origin(self, org: Organization, request: Request): diff --git a/backend/test_nightly/test_execution_minutes_quota.py b/backend/test_nightly/test_execution_minutes_quota.py index f6479f3b32..fcb1d9902c 100644 --- a/backend/test_nightly/test_execution_minutes_quota.py +++ b/backend/test_nightly/test_execution_minutes_quota.py @@ -18,7 +18,7 @@ def test_set_execution_mins_quota(org_with_quotas, admin_auth_headers): r = requests.post( f"{API_PREFIX}/orgs/{org_with_quotas}/quotas", headers=admin_auth_headers, - json={"maxCrawlMinutesPerMonth": EXEC_MINS_QUOTA}, + json={"maxExecMinutesPerMonth": EXEC_MINS_QUOTA}, ) data = r.json() assert data.get("updated") == True From f984a0203858792d9dc5448f980c15619fb7e578 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 26 Oct 2023 12:23:51 -0400 Subject: [PATCH 31/42] Fix typing for add_crawl_config --- backend/btrixcloud/crawlconfigs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index 13f7aa071f..0102b182ba 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -3,7 +3,7 @@ """ # pylint: disable=too-many-lines -from typing import List, Union, Optional +from typing import List, Union, Optional, Tuple import uuid import asyncio @@ -135,7 +135,7 @@ async def add_crawl_config( config: CrawlConfigIn, org: Organization, user: User, - ): + ) -> Tuple[str, str, bool, bool]: """Add new crawl config""" data = config.dict() data["oid"] = org.id @@ -195,8 +195,8 @@ async def add_crawl_config( await self.add_new_crawl(crawl_id, crawlconfig, user, manual=True) return ( - result.inserted_id, - crawl_id, + result.inserted_id or "", + crawl_id or "", storage_quota_reached, exec_mins_quota_reached, ) From cee0c85882b9d7016d9c8752764ecf6dc5b902e6 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 26 Oct 2023 12:35:51 -0400 Subject: [PATCH 32/42] Fix label for maxExecMinutesPerMonth --- frontend/src/components/orgs-list.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/orgs-list.ts b/frontend/src/components/orgs-list.ts index 42ac5c210f..1c3afa87a0 100644 --- a/frontend/src/components/orgs-list.ts +++ b/frontend/src/components/orgs-list.ts @@ -61,8 +61,8 @@ export class OrgsList extends LiteElement { label = msg("Org Storage Quota (GB)"); value = Math.floor(value / 1e9); break; - case "maxCrawlMinutesPerMonth": - label = msg("Org Monthly Execution Minutes Quota"); + case "maxExecMinutesPerMonth": + label = msg("Max Execution Minutes Per Month"); break; default: label = msg("Unlabeled"); From aabe83aee2f453ffbbdaec7ac542084d241266b3 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 26 Oct 2023 12:48:25 -0400 Subject: [PATCH 33/42] Allow run_now_job to be None --- backend/btrixcloud/crawlconfigs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index fb2a7c2d64..9c3fb166e2 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -137,7 +137,7 @@ async def add_crawl_config( config: CrawlConfigIn, org: Organization, user: User, - ) -> Tuple[str, str, bool, bool]: + ) -> Tuple[str, Optional[str], bool, bool]: """Add new crawl config""" data = config.dict() data["oid"] = org.id @@ -197,8 +197,8 @@ async def add_crawl_config( await self.add_new_crawl(crawl_id, crawlconfig, user, manual=True) return ( - result.inserted_id or "", - crawl_id or "", + result.inserted_id, + crawl_id or None, storage_quota_reached, exec_mins_quota_reached, ) From 7ec514fb2a717de3ac08b33f91c542557c924e8d Mon Sep 17 00:00:00 2001 From: sua yoo Date: Thu, 26 Oct 2023 10:56:31 -0700 Subject: [PATCH 34/42] Show execution/running minutes meter in dashboard (#1305) Co-authored-by: Tessa Walsh --- frontend/src/pages/org/dashboard.ts | 113 ++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/frontend/src/pages/org/dashboard.ts b/frontend/src/pages/org/dashboard.ts index 072bdfd517..1b7f38cbf4 100644 --- a/frontend/src/pages/org/dashboard.ts +++ b/frontend/src/pages/org/dashboard.ts @@ -49,6 +49,7 @@ export class Dashboard extends LiteElement { crawls: "green", uploads: "sky", browserProfiles: "indigo", + runningTime: "blue", }; willUpdate(changedProperties: PropertyValues) { @@ -179,6 +180,7 @@ export class Dashboard extends LiteElement { ${this.renderCard( msg("Crawling"), (metrics) => html` + ${this.renderCrawlingMeter(metrics)}
${this.renderStat({ value: @@ -336,6 +338,117 @@ export class Dashboard extends LiteElement { `; } + private renderCrawlingMeter(metrics: Metrics) { + let quotaSeconds = 0; + if (this.org!.quotas && this.org!.quotas.maxExecMinutesPerMonth) { + quotaSeconds = this.org!.quotas.maxExecMinutesPerMonth * 60; + } + + let usageSeconds = 0; + const now = new Date(); + if (this.org!.crawlExecSeconds) { + const actualUsage = + this.org!.crawlExecSeconds[ + `${now.getFullYear()}-${now.getUTCMonth() + 1}` + ]; + if (actualUsage) { + usageSeconds = actualUsage; + } + } + + const hasQuota = Boolean(quotaSeconds); + const isReached = hasQuota && usageSeconds >= quotaSeconds; + + const renderBar = (value: number, label: string, color: string) => html` + +
+
${label}
+
+ ${humanizeDuration(value * 1000)} | + ${this.renderPercentage(value / quotaSeconds)} +
+
+
+ `; + return html` +
+ ${when( + isReached, + () => html` +
+ + ${msg("Monthly Execution Minutes Quota Reached")} +
+ `, + () => + hasQuota + ? html` + + ${humanizeDuration((quotaSeconds - usageSeconds) * 1000)} + ${msg("Available")} + + + + ` + : "" + )} +
+ ${when( + hasQuota, + () => html` +
+ + ${when(usageSeconds, () => + renderBar( + usageSeconds, + msg("Monthly Execution Time Used"), + isReached ? "warning" : this.colors.runningTime + ) + )} +
+ +
+
${msg("Monthly Execution Time Available")}
+
+ ${humanizeDuration((quotaSeconds - usageSeconds) * 1000)} + | + ${this.renderPercentage( + (quotaSeconds - usageSeconds) / quotaSeconds + )} +
+
+
+
+
+ + ${humanizeDuration(usageSeconds * 1000)} + + + ${humanizeDuration(quotaSeconds * 1000)} + +
+
+ ` + )} + `; + } + private renderCard( title: string, renderContent: (metric: Metrics) => TemplateResult, From edd49607dfcc5813498cf76cff3923646aea5ee9 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 26 Oct 2023 15:06:25 -0400 Subject: [PATCH 35/42] Implement code review changes - Cap display of execution time usage at 100% - Remove extraneous tooltip --- frontend/src/pages/org/dashboard.ts | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/frontend/src/pages/org/dashboard.ts b/frontend/src/pages/org/dashboard.ts index 1b7f38cbf4..75a4030fc8 100644 --- a/frontend/src/pages/org/dashboard.ts +++ b/frontend/src/pages/org/dashboard.ts @@ -359,6 +359,10 @@ export class Dashboard extends LiteElement { const hasQuota = Boolean(quotaSeconds); const isReached = hasQuota && usageSeconds >= quotaSeconds; + if (isReached) { + usageSeconds = quotaSeconds; + } + const renderBar = (value: number, label: string, color: string) => html` ${humanizeDuration((quotaSeconds - usageSeconds) * 1000)} ${msg("Available")} - - ` : "" @@ -410,7 +406,7 @@ export class Dashboard extends LiteElement { () => html`
From af76d47fe3fa9bec13424bfca742c459d019d51a Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 26 Oct 2023 15:13:22 -0400 Subject: [PATCH 36/42] Fix org settings Members tab --- frontend/src/pages/org/index.ts | 3 ++- frontend/src/routes.ts | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/frontend/src/pages/org/index.ts b/frontend/src/pages/org/index.ts index 62572990b8..d6d5717709 100644 --- a/frontend/src/pages/org/index.ts +++ b/frontend/src/pages/org/index.ts @@ -59,6 +59,7 @@ type Params = { collectionTab?: string; itemType?: Crawl["type"]; jobType?: JobType; + settingsTab?: string; new?: ResourceName; }; const defaultTab = "home"; @@ -598,7 +599,7 @@ export class Org extends LiteElement { private renderOrgSettings() { if (!this.userInfo || !this.org) return; - const activePanel = "information"; + const activePanel = this.params.settingsTab || "information"; const isAddingMember = this.params.hasOwnProperty("invite"); return html` Date: Thu, 26 Oct 2023 15:32:56 -0400 Subject: [PATCH 37/42] Catch new changed exec_minutes_quota_reached detail --- frontend/src/utils/LiteElement.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/utils/LiteElement.ts b/frontend/src/utils/LiteElement.ts index e60285de5e..1cc6535fbb 100644 --- a/frontend/src/utils/LiteElement.ts +++ b/frontend/src/utils/LiteElement.ts @@ -184,7 +184,7 @@ export default class LiteElement extends LitElement { errorMessage = msg("Storage quota reached"); break; } - if (errorDetail === "execution_minutes_quota_reached") { + if (errorDetail === "exec_minutes_quota_reached") { this.dispatchEvent( new CustomEvent("execution-minutes-quota-update", { detail: { reached: true }, From fc7773855074780ee6f2c67578a9ea829248fa9b Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 26 Oct 2023 15:43:58 -0400 Subject: [PATCH 38/42] Disable Run on Save in workflow editor when quotas are hit --- frontend/src/pages/org/index.ts | 2 ++ frontend/src/pages/org/workflow-detail.ts | 3 +++ frontend/src/pages/org/workflow-editor.ts | 13 ++++++++++++- frontend/src/pages/org/workflows-new.ts | 9 +++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/frontend/src/pages/org/index.ts b/frontend/src/pages/org/index.ts index d6d5717709..0c8ef24cd2 100644 --- a/frontend/src/pages/org/index.ts +++ b/frontend/src/pages/org/index.ts @@ -522,6 +522,8 @@ export class Org extends LiteElement { .initialWorkflow=${workflow} .initialSeeds=${seeds} jobType=${ifDefined(this.params.jobType)} + ?orgStorageQuotaReached=${this.orgStorageQuotaReached} + ?orgExecutionMinutesQuotaReached=${this.orgExecutionMinutesQuotaReached} @storage-quota-update=${this.onStorageQuotaUpdate} @execution-minutes-quota-update=${this.onExecutionMinutesQuotaUpdate} @select-new-dialog=${this.onSelectNewDialog} diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index d0c2a0236e..8a78df27ea 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -536,6 +536,9 @@ export class WorkflowDetail extends LiteElement { configId=${this.workflow!.id} orgId=${this.orgId} .authState=${this.authState} + ?orgStorageQuotaReached=${this.orgStorageQuotaReached} + ?orgExecutionMinutesQuotaReached=${this + .orgExecutionMinutesQuotaReached} @reset=${(e: Event) => this.navTo( `${this.orgBasePath}/workflows/crawl/${this.workflow!.id}` diff --git a/frontend/src/pages/org/workflow-editor.ts b/frontend/src/pages/org/workflow-editor.ts index aa8f56b3e0..decc46af4d 100644 --- a/frontend/src/pages/org/workflow-editor.ts +++ b/frontend/src/pages/org/workflow-editor.ts @@ -242,6 +242,12 @@ export class CrawlConfigEditor extends LiteElement { @property({ type: Array }) initialSeeds?: Seed[]; + @property({ type: Boolean }) + orgStorageQuotaReached = false; + + @property({ type: Boolean }) + orgExecutionMinutesQuotaReached = false; + @state() private tagOptions: string[] = []; @@ -535,7 +541,10 @@ export class CrawlConfigEditor extends LiteElement { lang: this.initialWorkflow.config.lang, scheduleType: defaultFormState.scheduleType, scheduleFrequency: defaultFormState.scheduleFrequency, - runNow: defaultFormState.runNow, + runNow: + this.orgStorageQuotaReached || this.orgExecutionMinutesQuotaReached + ? false + : defaultFormState.runNow, tags: this.initialWorkflow.tags, autoAddCollections: this.initialWorkflow.autoAddCollections, jobName: this.initialWorkflow.name || defaultFormState.jobName, @@ -864,6 +873,8 @@ export class CrawlConfigEditor extends LiteElement { { this.updateFormState( { diff --git a/frontend/src/pages/org/workflows-new.ts b/frontend/src/pages/org/workflows-new.ts index d32d9315df..8a28aaf3a0 100644 --- a/frontend/src/pages/org/workflows-new.ts +++ b/frontend/src/pages/org/workflows-new.ts @@ -56,6 +56,12 @@ export class WorkflowsNew extends LiteElement { @property({ type: String }) jobType?: JobType; + @property({ type: Boolean }) + orgStorageQuotaReached = false; + + @property({ type: Boolean }) + orgExecutionMinutesQuotaReached = false; + // Use custom property accessor to prevent // overriding default Workflow values @property({ type: Object }) @@ -116,6 +122,9 @@ export class WorkflowsNew extends LiteElement { jobType=${jobType} orgId=${this.orgId} .authState=${this.authState} + ?orgStorageQuotaReached=${this.orgStorageQuotaReached} + ?orgExecutionMinutesQuotaReached=${this + .orgExecutionMinutesQuotaReached} @reset=${async (e: Event) => { await (e.target as LitElement).updateComplete; this.dispatchEvent( From af3351aa04d05a3bc7b3a13ca5a9047effe67ed3 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 26 Oct 2023 16:04:41 -0400 Subject: [PATCH 39/42] Only go to watch tab if crawl actually started --- backend/btrixcloud/crawlconfigs.py | 2 ++ frontend/src/pages/org/workflow-editor.ts | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index 9c3fb166e2..f10879bc08 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -346,6 +346,8 @@ async def update_crawl_config( "updated": True, "settings_changed": changed, "metadata_changed": metadata_changed, + "storageQuotaReached": await this.org_ops.storage_quota_reached(org.id), + "execMinutesQuotaReached": await this.org_ops.exec_mins_quota_reached(org.id), } if run_now: crawl_id = await self.run_now(cid, org, user) diff --git a/frontend/src/pages/org/workflow-editor.ts b/frontend/src/pages/org/workflow-editor.ts index decc46af4d..661803ab0d 100644 --- a/frontend/src/pages/org/workflow-editor.ts +++ b/frontend/src/pages/org/workflow-editor.ts @@ -2169,9 +2169,9 @@ https://archiveweb.page/images/${"logo.svg"}`} body: JSON.stringify(config), })); - const crawlId = data.run_now_job; + const crawlId = data.run_now_job || data.started || null; const storageQuotaReached = data.storageQuotaReached; - const executionMinutesQuotaReached = data.executionMinutesQuotaReached; + const executionMinutesQuotaReached = data.execMinutesQuotaReached; if (storageQuotaReached) { this.notify({ @@ -2210,7 +2210,9 @@ https://archiveweb.page/images/${"logo.svg"}`} this.navTo( `${this.orgBasePath}/workflows/crawl/${this.configId || data.id}${ - crawlId && !storageQuotaReached ? "#watch" : "" + crawlId && !storageQuotaReached && !executionMinutesQuotaReached + ? "#watch" + : "" }` ); } catch (e: any) { From 085ed61b3668a1dd5cbba312335cb4081bcf952b Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 26 Oct 2023 16:06:38 -0400 Subject: [PATCH 40/42] Format with Black --- backend/btrixcloud/crawlconfigs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index f10879bc08..c4af46efac 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -347,7 +347,9 @@ async def update_crawl_config( "settings_changed": changed, "metadata_changed": metadata_changed, "storageQuotaReached": await this.org_ops.storage_quota_reached(org.id), - "execMinutesQuotaReached": await this.org_ops.exec_mins_quota_reached(org.id), + "execMinutesQuotaReached": await this.org_ops.exec_mins_quota_reached( + org.id + ), } if run_now: crawl_id = await self.run_now(cid, org, user) From c5e5363aa63907ebfa0ec0134876c2afcadb3332 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 26 Oct 2023 16:09:54 -0400 Subject: [PATCH 41/42] this -> self --- backend/btrixcloud/crawlconfigs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index c4af46efac..f6a5970355 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -346,8 +346,8 @@ async def update_crawl_config( "updated": True, "settings_changed": changed, "metadata_changed": metadata_changed, - "storageQuotaReached": await this.org_ops.storage_quota_reached(org.id), - "execMinutesQuotaReached": await this.org_ops.exec_mins_quota_reached( + "storageQuotaReached": await self.org_ops.storage_quota_reached(org.id), + "execMinutesQuotaReached": await self.org_ops.exec_mins_quota_reached( org.id ), } From 4483de9277e042732f418371d434b65209ba8c22 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 26 Oct 2023 16:14:43 -0400 Subject: [PATCH 42/42] Remove messages made irrelevant by disabling run on save --- frontend/src/pages/org/workflow-editor.ts | 44 ++++++----------------- 1 file changed, 11 insertions(+), 33 deletions(-) diff --git a/frontend/src/pages/org/workflow-editor.ts b/frontend/src/pages/org/workflow-editor.ts index 661803ab0d..26aea10bbe 100644 --- a/frontend/src/pages/org/workflow-editor.ts +++ b/frontend/src/pages/org/workflow-editor.ts @@ -2173,41 +2173,19 @@ https://archiveweb.page/images/${"logo.svg"}`} const storageQuotaReached = data.storageQuotaReached; const executionMinutesQuotaReached = data.execMinutesQuotaReached; - if (storageQuotaReached) { - this.notify({ - title: msg("Workflow saved without starting crawl."), - message: msg( - "Could not run crawl with new workflow settings due to storage quota." - ), - variant: "warning", - icon: "exclamation-circle", - duration: 12000, - }); - } else if (executionMinutesQuotaReached) { - this.notify({ - title: msg("Workflow saved without starting crawl."), - message: msg( - "Could not run crawl with new workflow settings due to execution minutes quota." - ), - variant: "warning", - icon: "exclamation-circle", - duration: 12000, - }); - } else { - let message = msg("Workflow created."); - if (crawlId) { - message = msg("Crawl started with new workflow settings."); - } else if (this.configId) { - message = msg("Workflow updated."); - } - - this.notify({ - message, - variant: "success", - icon: "check2-circle", - }); + let message = msg("Workflow created."); + if (crawlId) { + message = msg("Crawl started with new workflow settings."); + } else if (this.configId) { + message = msg("Workflow updated."); } + this.notify({ + message, + variant: "success", + icon: "check2-circle", + }); + this.navTo( `${this.orgBasePath}/workflows/crawl/${this.configId || data.id}${ crawlId && !storageQuotaReached && !executionMinutesQuotaReached