From dd4f77a19d524b01d672e87d92ebf71dd30c1e0e Mon Sep 17 00:00:00 2001 From: Alessio Pollero Date: Wed, 22 Nov 2017 17:40:27 +0100 Subject: [PATCH 1/4] Add JobStatus REST web service to easily get the status of a JobEspecially useful to poll the status of single job, to be able then to retrieve results from Item endpoint --- docs/api.rst | 22 ++++++++++++++++++++++ scrapyd/default_scrapyd.conf | 1 + scrapyd/webservice.py | 29 ++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index e8e398eb..ee32dbdf 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -106,6 +106,28 @@ Example response:: {"status": "ok", "prevstate": "running"} +status.json +----------- + + .. versionadded:: 1.2.1 + + View the status of a spider run (aka. job). + + * Supported Request Methods: ``GET`` + * Parameters: + + * ``project`` (string, optional) - the project name + * ``job`` (string, required) - the job id + + Example request:: + + $ curl "http://localhost:6800/status.json?job=6487ec79947edab326d6db28a2d86511e8247444&project=myproject" + + Example response:: + + { "node_name": "node-name", "status": "running" } + + listprojects.json ----------------- diff --git a/scrapyd/default_scrapyd.conf b/scrapyd/default_scrapyd.conf index 4bfdc86e..93667606 100644 --- a/scrapyd/default_scrapyd.conf +++ b/scrapyd/default_scrapyd.conf @@ -19,6 +19,7 @@ webroot = scrapyd.website.Root [services] schedule.json = scrapyd.webservice.Schedule cancel.json = scrapyd.webservice.Cancel +jobstatus.json = scrapyd.webservice.JobStatus addversion.json = scrapyd.webservice.AddVersion listprojects.json = scrapyd.webservice.ListProjects listversions.json = scrapyd.webservice.ListVersions diff --git a/scrapyd/webservice.py b/scrapyd/webservice.py index f12620c9..75043089 100644 --- a/scrapyd/webservice.py +++ b/scrapyd/webservice.py @@ -35,7 +35,6 @@ def render_GET(self, txrequest): return {"node_name": self.root.nodename, "status":"ok", "pending": pending, "running": running, "finished": finished} - class Schedule(WsResource): def render_POST(self, txrequest): @@ -112,6 +111,34 @@ def render_GET(self, txrequest): spiders = get_spider_list(project, runner=self.root.runner, version=version) return {"node_name": self.root.nodename, "status": "ok", "spiders": spiders} +class JobStatus(WsResource): + + def render_GET(self, txrequest): + result = {"node_name": self.root.nodename, "status":"unknown"} + args = native_stringify_dict(copy(txrequest.args), keys_only=False) + job = args['job'][0] + project = args.get('project', [None])[0] + spiders = self.root.launcher.processes.values() + queues = self.root.poller.queues + + for s in self.root.launcher.finished: + if (project is None or s.project == project) and (s.job == job): + result["status"] = "finished" + return result + + for s in spiders: + if (project is None or s.project == project) and (s.job == job): + result["status"] = "running" + return result + + for x in queues[qname].list(): + for qname in (queues if project is None else [project]): + if(x["_job"] == job): + result["status"] = "pending" + return result + + return result + class ListJobs(WsResource): def render_GET(self, txrequest): From 222aa0c2d621a60545f8a9bc21225aa1074a059d Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 16 Jul 2024 02:27:05 -0400 Subject: [PATCH 2/4] chore: "status" is reserved for the webservice's status. Use "currstate" to match "prevstate". Rename jobstatus to status, since we don\'t do "jobcancel" or "jobschedule". --- docs/api.rst | 46 +++++++++++++++------------- docs/config.rst | 2 ++ docs/news.rst | 8 +++-- integration_tests/test_webservice.py | 1 + scrapyd/default_scrapyd.conf | 2 +- scrapyd/webservice.py | 35 +++++++++++++-------- 6 files changed, 56 insertions(+), 38 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 2eec2f94..390dcebe 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -111,6 +111,30 @@ Example: When a parameter other than ``setting`` is entered multiple times with ``-d``, only the first value is sent to the spider. +.. _status.json + +status.json +----------- + +.. versionadded:: 1.5.0 + +Get the status of a job. + +Supported request methods + ``GET`` +Parameters + ``job`` (required) + the job ID + ``project`` + the project name + +Example: + +.. code-block:: shell-session + + $ curl http://localhost:6800/status.json?job=6487ec79947edab326d6db28a2d86511e8247444 + {"node_name": "mynodename", "status": "ok", "currstate": "running"} + .. _cancel.json: cancel.json @@ -138,28 +162,6 @@ Example: .. _listprojects.json: -status.json ------------ - - .. versionadded:: 1.2.1 - - View the status of a spider run (aka. job). - - * Supported Request Methods: ``GET`` - * Parameters: - - * ``project`` (string, optional) - the project name - * ``job`` (string, required) - the job id - - Example request:: - - $ curl "http://localhost:6800/status.json?job=6487ec79947edab326d6db28a2d86511e8247444&project=myproject" - - Example response:: - - { "node_name": "node-name", "status": "running" } - - listprojects.json ----------------- diff --git a/docs/config.rst b/docs/config.rst index 900aeb82..91debf17 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -18,6 +18,8 @@ the :ref:`example `). Environment variables --------------------- +.. versionadded:: 1.5.0 + The following environment variables override corresponding options: * ``SCRAPYD_BIND_ADDRESS`` (:ref:`bind_address`) diff --git a/docs/news.rst b/docs/news.rst index 0ffca853..4eba3019 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -13,8 +13,12 @@ Added - Respond to HTTP ``OPTIONS`` method requests. - Add environment variables to override common options. See :doc:`config`. -- Add documentation on how to add webservices (endpoints). See :ref:`config-services`. -- Add documentation on how to create a Docker image. See :ref:`docker`. + +Documentation +^^^^^^^^^^^^^ + +- How to add webservices (endpoints). See :ref:`config-services`. +- How to create Docker images. See :ref:`docker`. Changed ~~~~~~~ diff --git a/integration_tests/test_webservice.py b/integration_tests/test_webservice.py index 101b263e..ea643ee6 100644 --- a/integration_tests/test_webservice.py +++ b/integration_tests/test_webservice.py @@ -20,6 +20,7 @@ def assert_webservice(method, path, expected, **kwargs): ("addversion", "POST"), ("schedule", "POST"), ("cancel", "POST"), + ("status", "GET"), ("listprojects", "GET"), ("listversions", "GET"), ("listspiders", "GET"), diff --git a/scrapyd/default_scrapyd.conf b/scrapyd/default_scrapyd.conf index cf71e80e..622ec111 100644 --- a/scrapyd/default_scrapyd.conf +++ b/scrapyd/default_scrapyd.conf @@ -36,7 +36,7 @@ dbs_dir = dbs [services] schedule.json = scrapyd.webservice.Schedule cancel.json = scrapyd.webservice.Cancel -jobstatus.json = scrapyd.webservice.JobStatus +status.json = scrapyd.webservice.Status addversion.json = scrapyd.webservice.AddVersion listprojects.json = scrapyd.webservice.ListProjects listversions.json = scrapyd.webservice.ListVersions diff --git a/scrapyd/webservice.py b/scrapyd/webservice.py index a5fa3258..da1b08f8 100644 --- a/scrapyd/webservice.py +++ b/scrapyd/webservice.py @@ -73,6 +73,7 @@ def render_GET(self, txrequest): "finished": finished, } + class Schedule(WsResource): def render_POST(self, txrequest): @@ -158,30 +159,32 @@ def render_GET(self, txrequest): return {"node_name": self.root.nodename, "status": "ok", "spiders": spiders} -class JobStatus(WsResource): +class Status(WsResource): def render_GET(self, txrequest): - result = {"node_name": self.root.nodename, "status":"unknown"} args = native_stringify_dict(copy(txrequest.args), keys_only=False) - job = args['job'][0] + job = _get_required_param(args, 'job')[0] project = args.get('project', [None])[0] + spiders = self.root.launcher.processes.values() queues = self.root.poller.queues + result = {"node_name": self.root.nodename, "status": "ok", "currstate": "unknown"} + for s in self.root.launcher.finished: - if (project is None or s.project == project) and (s.job == job): - result["status"] = "finished" + if (project is None or s.project == project) and s.job == job: + result["currstate"] = "finished" return result for s in spiders: - if (project is None or s.project == project) and (s.job == job): - result["status"] = "running" + if (project is None or s.project == project) and s.job == job: + result["currstate"] = "running" return result - for x in queues[qname].list(): - for qname in (queues if project is None else [project]): - if(x["_job"] == job): - result["status"] = "pending" + for qname in (queues if project is None else [project]): + for x in queues[qname].list(): + if x["_job"] == job: + result["currstate"] = "pending" return result return result @@ -192,8 +195,10 @@ class ListJobs(WsResource): def render_GET(self, txrequest): args = native_stringify_dict(copy(txrequest.args), keys_only=False) project = args.get('project', [None])[0] + spiders = self.root.launcher.processes.values() queues = self.root.poller.queues + pending = [ {"project": qname, "spider": x["name"], "id": x["_job"]} for qname in (queues if project is None else [project]) @@ -206,7 +211,9 @@ def render_GET(self, txrequest): "id": s.job, "pid": s.pid, "start_time": str(s.start_time), - } for s in spiders if project is None or s.project == project + } + for s in spiders + if project is None or s.project == project ] finished = [ { @@ -217,9 +224,11 @@ def render_GET(self, txrequest): "end_time": str(s.end_time), "log_url": job_log_url(s), "items_url": job_items_url(s), - } for s in self.root.launcher.finished + } + for s in self.root.launcher.finished if project is None or s.project == project ] + return {"node_name": self.root.nodename, "status": "ok", "pending": pending, "running": running, "finished": finished} From 384c0547289d8783da6bbc3b97c4c5aa76dcfcf6 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 16 Jul 2024 02:48:38 -0400 Subject: [PATCH 3/4] test: Add status.json integration test --- integration_tests/test_webservice.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/integration_tests/test_webservice.py b/integration_tests/test_webservice.py index d71c3e49..e95a23c5 100644 --- a/integration_tests/test_webservice.py +++ b/integration_tests/test_webservice.py @@ -60,6 +60,15 @@ def test_schedule(): ) +def test_status_nonexistent(): + assert_webservice( + "get", + "/status.json", + {"status": "ok", "currstate": "unknown"}, + params={"job": "sample"}, + ) + + def test_cancel_nonexistent(): assert_webservice( "post", From 94985e75b03e0da5792959eceac7384aaff6282d Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 16 Jul 2024 02:51:24 -0400 Subject: [PATCH 4/4] docs: Add changelog entry --- docs/news.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/news.rst b/docs/news.rst index 83c242a3..044611a1 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -11,6 +11,7 @@ Unreleased Added ~~~~~ +- Add a :ref:`status.json` webservice, to get the status of a job. - Respond to HTTP ``OPTIONS`` method requests. - Add environment variables to override common options. See :doc:`config`.