Skip to content

Commit

Permalink
Cancel crawl immediately when exec minutes hard cap passed
Browse files Browse the repository at this point in the history
Track runningExecTime in operator CrawlStatus to track execution
time for running crawler pods. If the running crawl time + monthly
execution time tracked in org exceeds hard cap, cancel crawl.

This should cancel the crawl within about 5-10 seconds of hard cap
being exceeded (i.e. on next sync), rather than at the end of the
crawl or when a crawler pod restarts as before.
  • Loading branch information
tw4l committed Oct 17, 2023
1 parent c43d9b5 commit d5cca67
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 13 deletions.
23 changes: 20 additions & 3 deletions backend/btrixcloud/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,10 @@ class CrawlStatus(BaseModel):
execTime: int = 0
canceled: bool = False

# Track estimated exec time as crawlers are running to cancel
# the crawl quickly if execution minutes hard cap is reached
runningExecTime: int = 0

# don't include in status, use by metacontroller
resync_after: Optional[int] = None

Expand Down Expand Up @@ -309,7 +313,7 @@ async def sync_profile_browsers(self, data: MCSyncData):

return {"status": {}, "children": children}

# pylint: disable=too-many-return-statements
# pylint: disable=too-many-return-statements, invalid-name
async def sync_crawls(self, data: MCSyncData):
"""sync crawls"""

Expand Down Expand Up @@ -400,15 +404,17 @@ async def sync_crawls(self, data: MCSyncData):
)
return self._empty_response(status)

# Cancel crawl if execution minutes hard cap is reached while running
_, exec_mins_hard_cap_reached = await self.org_ops.execution_mins_quota_reached(
crawl.oid
crawl.oid, status.runningExecTime
)
if exec_mins_hard_cap_reached:
await self.cancel_crawl(
crawl.id, uuid.UUID(cid), uuid.UUID(oid), status, data.children[POD]
)

# Reset runningExecTime to recalculate below for next sync
status.runningExecTime = 0

if status.state in ("starting", "waiting_org_limit"):
if not await self.can_start_new(crawl, data, status):
return self._empty_response(status)
Expand Down Expand Up @@ -951,6 +957,9 @@ async def sync_pod_status(self, pods, status, oid):
name, oid, role, status, cstatus["state"].get("terminated")
)

if role == "crawler":
self.increment_running_exec_time(cstatus["state"], status)

if role == "crawler":
crawler_running = crawler_running or running
done = done and phase == "Succeeded"
Expand All @@ -964,6 +973,14 @@ async def sync_pod_status(self, pods, status, oid):

return crawler_running, redis_running, done

def increment_running_exec_time(self, container_state, status):
"""Increment runningExecTime in crawl status for running crawler pod"""
running = container_state.get("running")
if running:
start_time = from_k8s_date(running.get("startedAt"))
running_exec_time = int((datetime.now() - start_time).total_seconds())
status.runningExecTime += running_exec_time

async def handle_terminated_pod(self, name, oid, role, status, terminated):
"""handle terminated pod state"""
if not terminated:
Expand Down
26 changes: 16 additions & 10 deletions backend/btrixcloud/orgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,20 @@ async def storage_quota_reached(self, oid: uuid.UUID) -> bool:

return False

async def execution_mins_quota_reached(self, oid: uuid.UUID) -> Tuple[bool, bool]:
"""Return boolean indicating if execution minutes quota is met or exceeded."""
async def get_this_month_crawl_exec_seconds(self, oid: uuid.UUID) -> int:
"""Return crawlExecSeconds for current month"""
org = await self.orgs.find_one({"_id": oid})
org = Organization.from_dict(org)
yymm = datetime.utcnow().strftime("%Y-%m")
try:
return org.crawlExecSeconds[yymm]
except KeyError:
return 0

async def execution_mins_quota_reached(
self, oid: uuid.UUID, running_exec_seconds: int = 0
) -> Tuple[bool, bool]:
"""Return bools for if execution minutes quota and hard cap are reached."""
quota = await self.get_org_execution_mins_quota(oid)
if not quota:
return False, False
Expand All @@ -337,14 +349,8 @@ async def execution_mins_quota_reached(self, oid: uuid.UUID) -> Tuple[bool, bool
hard_cap_additional_mins = await self.get_org_execution_mins_hard_cap(oid)
hard_cap_quota = quota + hard_cap_additional_mins

org = await self.orgs.find_one({"_id": oid})
org = Organization.from_dict(org)

yymm = datetime.utcnow().strftime("%Y-%m")
try:
monthly_exec_seconds = org.crawlExecSeconds[yymm]
except KeyError:
monthly_exec_seconds = 0
monthly_exec_seconds = await self.get_this_month_crawl_exec_seconds(oid)
monthly_exec_seconds = monthly_exec_seconds + running_exec_seconds
monthly_exec_minutes = math.floor(monthly_exec_seconds / 60)

if monthly_exec_minutes >= quota:
Expand Down

0 comments on commit d5cca67

Please sign in to comment.