Skip to content

Commit

Permalink
A quick script to check if a job took longer than x time to run and u…
Browse files Browse the repository at this point in the history
…pdate python in precommit
  • Loading branch information
EmanElsaban committed Nov 21, 2023
1 parent 425b32f commit 6ede8b6
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 6 deletions.
7 changes: 4 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
default_language_version:
python: python3.6
python: python3.8
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0
Expand All @@ -21,6 +21,7 @@ repos:
hooks:
- id: flake8
exclude: ^docs/source/conf.py$
language_version: python3.8
- repo: https://github.com/asottile/reorder_python_imports
rev: v1.9.0
hooks:
Expand All @@ -41,8 +42,8 @@ repos:
language: script
files: ^tests/.*\.py$
- repo: http://github.com/psf/black
rev: 19.10b0
rev: 23.3.0
hooks:
- id: black
language_version: python3.6
language_version: python3.8
args: [--target-version, py36]
6 changes: 3 additions & 3 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ asynctest==0.12.0
cfgv==2.0.1
entrypoints==0.3
flake8==3.7.9
identify==1.4.9
identify==2.4.4
importlib-resources==1.0.2
iniconfig==1.1.1
isort==4.3.18
Expand All @@ -15,7 +15,7 @@ mypy-extensions==0.4.3
nodeenv==1.3.3
packaging==19.2
pluggy==0.13.0
pre-commit==1.21.0
pre-commit==2.9.2
py==1.10.0
pycodestyle==2.5.0
pyflakes==2.1.1
Expand All @@ -26,4 +26,4 @@ pytest-asyncio==0.14.0
requirements-tools==1.2.1
toml==0.10.2
typed-ast==1.4.0
virtualenv==16.7.5
virtualenv==20.0.8
95 changes: 95 additions & 0 deletions tron/bin/check_job_exceeding_time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/env python3.8
import logging
import sys

import pytimeparse

from tron.commands import cmd_utils
from tron.commands.client import Client


log = logging.getLogger("check_exceeding_time")


def parse_cli():
parser = cmd_utils.build_option_parser()
parser.add_argument(
"--job",
default=None,
help="Check if a particular job exceeded a time to run. If unset checks all jobs",
)
parser.add_argument(
"--time",
help="This is used to specify the time that if any job exceeds will show. Defaults to 5 hours",
type=int,
dest="time_limit",
default=18000,
)
args = parser.parse_args()
return args


def check_if_time_exceeded(job_runs, job_expected_runtime, result):
states_to_check = {"queued", "scheduled", "cancelled", "skipped"}
for job_run in job_runs:
if job_run.get("state", "unknown") not in states_to_check:
if is_job_run_exceeding_expected_runtime(
job_run,
job_expected_runtime,
):
result.append(job_run["id"])
return


def is_job_run_exceeding_expected_runtime(job_run, job_expected_runtime):
states_to_check = {"queued", "scheduled", "cancelled", "skipped"}
if (
job_expected_runtime is not None
and job_run.get(
"state",
"unknown",
)
not in states_to_check
):
duration_seconds = pytimeparse.parse(job_run.get("duration", ""))
if duration_seconds and duration_seconds > job_expected_runtime:
return True
return False


def check_job_time(job, time_limit, result):
job_runs = sorted(
job.get("runs", []),
key=lambda k: (k["end_time"] is None, k["end_time"], k["run_time"]),
reverse=True,
)

check_if_time_exceeded(job_runs, time_limit, result)


def main():
args = parse_cli()
cmd_utils.setup_logging(args)
cmd_utils.load_config(args)
client = Client(args.server, args.cluster_name)
result = []

url_index = client.index()
if args.job is None:
jobs = client.jobs(include_job_runs=True)
for job in jobs:
check_job_time(job=job, time_limit=args.time_limit, result=result)
else:
job_url = client.get_url(args.job)
job = client.job_runs(job_url)
check_job_time(job=job, client=client, url_index=url_index, result=result)

if result is None:
print("All jobs ran within the time limit")
else:
print(f"These are the runs that took longer than {args.time_limit} to run: {result}")
return


if __name__ == "__main__":
sys.exit(main())
86 changes: 86 additions & 0 deletions tron/bin/get_jobs_exceeding_runtime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/usr/bin/env python3.8
import argparse
import logging
import sys
from typing import Optional

import pytimeparse

from tron.commands import cmd_utils
from tron.commands.client import Client


log = logging.getLogger("check_exceeding_time")

STATES_TO_CHECK = {"queued", "scheduled", "cancelled", "skipped"}


def parse_args() -> argparse.Namespace:
parser = cmd_utils.build_option_parser()
parser.add_argument(
"--job",
default=None,
help="Check if a particular job exceeded a time to run. If unset checks all jobs",
)
parser.add_argument(
"--time",
help="This is used to specify the time that if any job exceeds will show. Defaults to 5 hours",
type=int,
dest="time_limit",
default=18000,
)
args = parser.parse_args()
return args


def check_if_time_exceeded(job_runs, job_expected_runtime) -> list:
result = []
for job_run in job_runs:
if job_run.get("state", "unknown") not in STATES_TO_CHECK:
if is_job_run_exceeding_expected_runtime(
job_run,
job_expected_runtime,
):
result.append(job_run["id"])
return result


def is_job_run_exceeding_expected_runtime(job_run, job_expected_runtime) -> bool:
if job_expected_runtime is not None:
duration_seconds = pytimeparse.parse(job_run.get("duration", ""))
return duration_seconds and duration_seconds > job_expected_runtime
return False


def check_job_time(job, time_limit) -> list:
job_runs = job.get("runs", [])
return check_if_time_exceeded(job_runs, time_limit)


def main() -> Optional[int]:
args = parse_args()
cmd_utils.setup_logging(args)
cmd_utils.load_config(args)
client = Client(args.server, args.cluster_name)
results = []

if args.job is None:
jobs = client.jobs(include_job_runs=True)
for job in jobs:
job_url = client.get_url(job["name"])
job = client.job_runs(job_url)
results.extend(check_job_time(job=job, time_limit=args.time_limit))
else:
job_url = client.get_url(args.job)
job = client.job_runs(job_url)
results.extend(check_job_time(job=job, time_limit=args.time_limit))

if not results:
print("All jobs ran within the time limit")
else:
print(f"These are the runs that took longer than {args.time_limit} to run: {sorted(results)}")
return


if __name__ == "__main__":
sys.exit(main())

0 comments on commit 6ede8b6

Please sign in to comment.