From ff132601de4f894cab2bdf4eab5a8b8f8397001f Mon Sep 17 00:00:00 2001 From: Stephen Herbein Date: Thu, 27 Feb 2020 22:14:35 -0800 Subject: [PATCH 1/8] gitignore: add ignore rule for sphinx build directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..e35d8850 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +_build From 99d1b3034ad084f0236419cc292b3e07a36666e8 Mon Sep 17 00:00:00 2001 From: Stephen Herbein Date: Thu, 27 Feb 2020 22:28:54 -0800 Subject: [PATCH 2/8] validate: enforce python3 usage --- Makefile | 2 +- validate.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b708c60d..748cb87b 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ check: $(SCHEMA_DIRS) spelling ./indexcheck spec_*.rst $(SCHEMA_DIRS): - python ./validate.py --schema=$@/schema.json $@/*.yaml + python3 ./validate.py --schema=$@/schema.json $@/*.yaml spelling: @$(SPHINXBUILD) -W -b spelling "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/validate.py b/validate.py index bb6f6ba1..1e3e5aac 100755 --- a/validate.py +++ b/validate.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Usage: validate.py --schema=jobspec.json data.json [data.json ...] # Usage: cat data.json | validate.py --schema=jobspec.json From 914ad819522bddca6e179d467762f037828cd2e6 Mon Sep 17 00:00:00 2001 From: Stephen Herbein Date: Thu, 27 Feb 2020 16:34:45 -0800 Subject: [PATCH 3/8] rfc25: fix non-canonical copy-paste typo --- spec_25.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec_25.rst b/spec_25.rst index a7d68c73..1bb9573e 100644 --- a/spec_25.rst +++ b/spec_25.rst @@ -58,7 +58,7 @@ This RFC describes the version 1 form of "jobspec", a domain specific language based on YAML [#f1]_. The version 1 of jobspec SHALL consist of a single YAML document representing a reusable request to run exactly one program. Hereafter, "jobspec" refers to the version 1 -form, and "non-canonical jobspec" refers to the non-canonical form. +form, and "canonical jobspec" refers to the canonical form. Jobspec Language Definition From 85c0ea0abe59260f7a6116115942bae171c09dba Mon Sep 17 00:00:00 2001 From: Stephen Herbein Date: Thu, 27 Feb 2020 22:40:12 -0800 Subject: [PATCH 4/8] rfc14: add missing `per_resource` count to schema.json --- data/spec_14/schema.json | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/data/spec_14/schema.json b/data/spec_14/schema.json index e8c462c9..3a0561cc 100644 --- a/data/spec_14/schema.json +++ b/data/spec_14/schema.json @@ -128,7 +128,17 @@ "type": "object", "properties": { "per_slot": { "type": "integer", "minimum" : 1 }, - "total": { "type": "integer", "minimum" : 1 } + "total": { "type": "integer", "minimum" : 1 }, + "per_resource": { "type": "object", + "required": ["type", "count"], + "properties": { + "type": {"type": "string"}, + "count": { + "type": "integer", + "mininum" : 1 + } + } + } } }, "distribution": { "type": "string" }, From 68b293840fe41f3b20abd51d261b00515bc3f9ce Mon Sep 17 00:00:00 2001 From: Stephen Herbein Date: Thu, 27 Feb 2020 16:32:37 -0800 Subject: [PATCH 5/8] rfc31: copy over rfc25 (jobspec v1) as starting point for v2 Also copy most use cases from spec_25's data. The last example (2.5) comes from spec_14. --- data/spec_31/example1.yaml | 22 +++ data/spec_31/schema.json | 147 ++++++++++++++++ data/spec_31/use_case_1.1.yaml | 22 +++ data/spec_31/use_case_2.1.yaml | 22 +++ data/spec_31/use_case_2.2.yaml | 19 +++ data/spec_31/use_case_2.3.yaml | 21 +++ data/spec_31/use_case_2.4.yaml | 24 +++ data/spec_31/use_case_2.5.yaml | 30 ++++ spec_31.rst | 295 +++++++++++++++++++++++++++++++++ 9 files changed, 602 insertions(+) create mode 100644 data/spec_31/example1.yaml create mode 100644 data/spec_31/schema.json create mode 100644 data/spec_31/use_case_1.1.yaml create mode 100644 data/spec_31/use_case_2.1.yaml create mode 100644 data/spec_31/use_case_2.2.yaml create mode 100644 data/spec_31/use_case_2.3.yaml create mode 100644 data/spec_31/use_case_2.4.yaml create mode 100644 data/spec_31/use_case_2.5.yaml create mode 100644 spec_31.rst diff --git a/data/spec_31/example1.yaml b/data/spec_31/example1.yaml new file mode 100644 index 00000000..dfef75d5 --- /dev/null +++ b/data/spec_31/example1.yaml @@ -0,0 +1,22 @@ +version: 1 +resources: + - type: node + count: 4 + with: + - type: slot + count: 1 + label: default + with: + - type: core + count: 2 +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/schema.json b/data/spec_31/schema.json new file mode 100644 index 00000000..e8c462c9 --- /dev/null +++ b/data/spec_31/schema.json @@ -0,0 +1,147 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://github.com/flux-framework/rfc/tree/master/data/spec_14/schema.json", + "title": "canonical-jobspec", + + "description": "Flux canonical jobspec", + + "definitions": { + "complex_range": { + "description": "a complex range of numbers", + "type": "object", + "properties":{ + "min": { "type": "integer", "minimum" : 1 }, + "max": { "type": "integer", "minimum" : 1 }, + "operator": { "type": "string", "enum": ["+", "*", "^"] }, + "operand": { "type": "integer", "minimum" : 1 } + }, + "required": ["min"], + "dependencies": { + "max": { "required": ["operator", "operand"] }, + "operator": { "required": ["max", "operand"] }, + "operand": { "required": ["max", "operator"] } + }, + "additionalProperties": false + }, + "resource_vertex_base": { + "description": "base schema for slot/other resource vertex", + "type": "object", + "required": ["type", "count"], + "properties": { + "type": { "type": "string" }, + "count": { + "oneOf": [ + { "type": "integer", "minimum" : 1 }, + { "$ref": "#/definitions/complex_range" } + ] + }, + "exclusive": { "type": "boolean" }, + "with": { + "type": "array", + "items": { "$ref": "#/definitions/resource_vertex" } + }, + "id": { "type": "string" }, + "unit": { "type": "string" }, + "label": { "type": "string" } + }, + "additionalProperties": false + }, + "resource_vertex_slot": { + "description": "special slot resource type - label assigns to task slot", + "allOf": [ + { "$ref": "#/definitions/resource_vertex_base" }, + { + "properties": { + "type": { "enum": ["slot"] } + }, + "required": ["label"] + } + ] + }, + "resource_vertex_other": { + "description": "other (non-slot) resource type", + "allOf": [ + { "$ref": "#/definitions/resource_vertex_base" }, + { + "properties": { + "type": { "not": { "enum": ["slot"] } } + } + } + ] + }, + "resource_vertex": { + "oneOf":[ + { "$ref": "#/definitions/resource_vertex_slot" }, + { "$ref": "#/definitions/resource_vertex_other" } + ] + } + }, + + "type": "object", + "required": ["version", "resources", "attributes", "tasks"], + "properties": { + "version": { + "description": "the jobspec version", + "type": "integer" + }, + "resources": { + "description": "requested resources", + "type": "array", + "minItems": 1, + "items": { "$ref": "#/definitions/resource_vertex" } + }, + "attributes": { + "description": "system and user attributes", + "type": ["object", "null"], + "properties": { + "system": { + "type": "object", + "properties": { + "duration": { "type": "number", "minimum": 0 }, + "cwd": { "type": "string" }, + "environment": { "type": "object" }, + "dependencies" : { + "$ref": "file:data/spec_26/schema.json" + } + } + }, + "user": { + "type": "object" + } + }, + "additionalProperties": false + }, + "tasks": { + "description": "task configuration", + "type": "array", + "items": { + "type": "object", + "required": ["command", "slot", "count" ], + "properties": { + "command": { + "type": "array", + "minItems": 1, + "items": { "type": "string" } + }, + "slot": { "type": "string" }, + "count": { + "type": "object", + "properties": { + "per_slot": { "type": "integer", "minimum" : 1 }, + "total": { "type": "integer", "minimum" : 1 } + } + }, + "distribution": { "type": "string" }, + "attributes": { + "type": "object", + "properties": { + "environment": { "type" : "object"} + }, + "additionalProperties": { "type": "string" } + } + }, + "additionalProperties": false + } + } + } +} diff --git a/data/spec_31/use_case_1.1.yaml b/data/spec_31/use_case_1.1.yaml new file mode 100644 index 00000000..8b3c177f --- /dev/null +++ b/data/spec_31/use_case_1.1.yaml @@ -0,0 +1,22 @@ +version: 1 +resources: + - type: node + count: 4 + with: + - type: slot + count: 1 + label: default + with: + - type: core + count: 1 +tasks: + - command: [ "flux", "start" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_2.1.yaml b/data/spec_31/use_case_2.1.yaml new file mode 100644 index 00000000..40707c6d --- /dev/null +++ b/data/spec_31/use_case_2.1.yaml @@ -0,0 +1,22 @@ +version: 1 +resources: + - type: node + count: 4 + with: + - type: slot + count: 1 + label: myslot + with: + - type: core + count: 1 +tasks: + - command: [ "hostname" ] + slot: myslot + count: + total: 5 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_2.2.yaml b/data/spec_31/use_case_2.2.yaml new file mode 100644 index 00000000..aa8d56f1 --- /dev/null +++ b/data/spec_31/use_case_2.2.yaml @@ -0,0 +1,19 @@ +version: 1 +resources: + - type: slot + label: default + count: 10 + with: + - type: core + count: 2 +tasks: + - command: [ "myapp" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_2.3.yaml b/data/spec_31/use_case_2.3.yaml new file mode 100644 index 00000000..ec1d3bc7 --- /dev/null +++ b/data/spec_31/use_case_2.3.yaml @@ -0,0 +1,21 @@ +version: 1 +resources: + - type: slot + count: 10 + label: default + with: + - type: core + count: 2 + - type: gpu + count: 1 +tasks: + - command: [ "myapp" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_2.4.yaml b/data/spec_31/use_case_2.4.yaml new file mode 100644 index 00000000..30f5aa2a --- /dev/null +++ b/data/spec_31/use_case_2.4.yaml @@ -0,0 +1,24 @@ +version: 1 +resources: + - type: node + count: 4 + with: + - type: slot + count: 4 + label: default + with: + - type: core + count: 1 + - type: gpu + count: 1 +tasks: + - command: [ "myapp" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_2.5.yaml b/data/spec_31/use_case_2.5.yaml new file mode 100644 index 00000000..6072aebb --- /dev/null +++ b/data/spec_31/use_case_2.5.yaml @@ -0,0 +1,30 @@ +version: 999 +resources: + - type: slot + count: 1 + label: default + with: + - type: node + count: 1 +tasks: + - command: [ "flux", "start" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + dependencies: + - type: in + scope: user + scheme: fluid + value: hungry-hippo-white-elephant + - type: in + scope: user + scheme: string + value: foo + - type: out + scope: user + scheme: string + value: bar diff --git a/spec_31.rst b/spec_31.rst new file mode 100644 index 00000000..5f455500 --- /dev/null +++ b/spec_31.rst @@ -0,0 +1,295 @@ + +25/Job Specification Version 1 +============================== + +A domain specific language based on YAML is defined to express the resource +requirements and other attributes of one or more programs submitted to a Flux +instance for execution. This RFC describes the version 1 of jobspec, which +represents a request to run exactly one program. This version is a simplified +version of the canonical jobspec format described in +:doc:`RFC 14 `. + +- Name: github.com/flux-framework/rfc/spec_25.rst + +- Editor: Stephen Herbein + +- State: raw + + +Language +-------- + +The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", +"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to +be interpreted as described in `RFC 2119 `__. + + +Related Standards +----------------- + +- :doc:`4/Flux Resource Model ` + +- :doc:`8/Flux Task and Program Execution Services ` + +- :doc:`14/Canonical Job Specification ` + +- :doc:`20/Resource Set Specification Version 1 ` + + +Goals +----- + +- Express the resource requirements of a program to the scheduler. + +- Allow resource requirements to be expressed simply in terms of Nodes, CPUs, + and GPUs. + +- Express program attributes such as arguments, run time, and + task layout, to be considered by the program execution service (RFC 12) + + +Overview +-------- + +This RFC describes the version 1 form of "jobspec", a domain specific language +based on YAML [#f1]_. The version 1 of jobspec SHALL consist of +a single YAML document representing a reusable request to run +exactly one program. Hereafter, "jobspec" refers to the version 1 +form, and "canonical jobspec" refers to the canonical form. + + +Jobspec Language Definition +--------------------------- + +A jobspec V1 YAML document SHALL consist of a dictionary +defining the resources, tasks and other attributes of a single +program. The dictionary MUST contain the keys ``resources``, ``tasks``, +``attributes``, and ``version``. + +Each of the listed jobspec keys SHALL meet the form and requirements +listed in detail in the sections below. For reference, a ruleset for +compliant jobspec V1 is provided in the **Schema** section below. + + +Resources +~~~~~~~~~ + +The value of the ``resources`` key SHALL be a strict list which MUST define either +``node`` or ``slot`` as the first and only resource. Each list element SHALL represent a +**resource vertex** (described below). + +A resource vertex SHALL contain only the following keys: + +- type + +- count + +- unit + +- with + +- label + +The definitions of ``unit``, ``with``, and ``label`` SHALL match +those found in RFC14. The others are redefined and simplified to mean the +following: + +**type** + The ``type`` key for a resource SHALL indicate the type of resource to be + matched. In V1, only four resource types are valid: [``node``, ``slot``, ``core``, + and ``gpu``]. ``slot`` types are described in the **Reserved Resource Types** section + below. + +**count** + The ``count`` key SHALL indicate the desired number of + resources matching the current vertex. The ``count`` SHALL be a single integer + value representing a fixed count + + +V1-Specific Resource Graph Restrictions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In V1, the ``resources`` list MUST contain exactly one element, which MUST be +either ``node`` or ``slot``. Additionally, the resource graph MUST contain the +``core`` type. + +In V1, there are also restrictions on which resources can have ``out`` edges to +other resources. Specifically, a ``node`` can have an out edge to a ``slot``, and a +``slot`` can have an ``out`` edge to a ``core``. If a ``slot`` has an ``out`` edge to a +``core``, it can also, optionally, have an ``out`` edge to a ``gpu`` as +well. Therefore, the complete enumeration of valid resource graphs in V1 is: + +- ``slot>core`` + +- ``node>slot>core`` + +- ``slot>(core,gpu)`` + +- ``node>slot>(core,gpu)`` + + +Tasks +~~~~~ + +The value of the ``tasks`` key SHALL be a strict list which MUST define exactly +one task. The list element SHALL be a dictionary representing a task to run as +part of the program. A task descriptor SHALL contain the following keys, whose +definitions SHALL match those provided in RFC14: + +- command + +- slot + +- count + + - per_slot + + - total + + +Attributes +~~~~~~~~~~ + +The ``attributes`` key SHALL be a dictionary of +dictionaries. The ``attributes`` dictionary MUST contain ``system`` key and MAY +contain the ``user`` key. Common ``system`` keys are listed below, and their +definitions can be found in RFC14. Values MAY have any valid YAML type. + +- user + +- system + + - duration + + - environment + + - cwd + +Most system attributes are optional, but the ``duration`` attribute is required in +jobspec V1. + + +Example Jobspec +~~~~~~~~~~~~~~~ + +Under the description above, the following is an example of a fully compliant +version 1 jobspec. The example below declares a request for 4 "nodes" +each of which with 1 task slot consisting of 2 cores each, for a total +of 4 task slots. A single copy of the command ``app`` will be run on each +task slot for a total of 4 tasks. + +.. literalinclude:: data/spec_25/example1.yaml + :language: yaml + + +Basic Use Cases +--------------- + +To implement basic resource manager functionality, the following use +cases SHALL be supported by the jobspec: + + +Section 1: Node-level Requests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following "node-level" requests are all requests to start an instance, +i.e. run a single copy of ``flux start`` per allocated node. Many of these +requests are similar to existing resource manager batch job submission or +allocation requests, i.e. equivalent to ``oarsub``, ``qsub``, and ``salloc``. + +Use Case 1.1 + Request nodes outside of a slot + +Specific Example + Request 4 nodes, each with 1 slot + +Existing Equivalents + +-----------------------------------+-----------------------------------+ + | Slurm | ``salloc -N4`` | + +-----------------------------------+-----------------------------------+ + | PBS | ``qsub -l nodes=4`` | + +-----------------------------------+-----------------------------------+ + +Jobspec YAML + .. literalinclude:: data/spec_25/use_case_1.1.yaml + :language: yaml + + +Section 2: General Requests +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following use cases are more general and include more complex slot placement +and task counts. + +Use Case 2.1 + Run N tasks across M nodes, unequal distribution + +Specific Example + Run 5 copies of ``hostname`` across 4 nodes, + default distribution + +Existing Equivalents + +-----------------------------------+-----------------------------------+ + | Slurm | ``srun -n5 -N4 hostname`` | + +-----------------------------------+-----------------------------------+ + +Jobspec YAML + .. literalinclude:: data/spec_25/use_case_2.1.yaml + :language: yaml + +Use Case 2.2 + Run N tasks, Require M cores per task + +Specific Example + Run 10 copies of ``myapp``, require 2 cores per copy, + for a total of 20 cores + +Existing Equivalents + +-----------------------------------+-----------------------------------+ + | Slurm | ``srun -n10 -c 2 myapp`` | + +-----------------------------------+-----------------------------------+ + +Jobspec YAML + .. literalinclude:: data/spec_25/use_case_2.2.yaml + :language: yaml + +Use Case 2.3 + Run N tasks, Require M cores and J gpus per task + +Specific Example + Run 10 copies of ``myapp``, require 2 cores and 1 gpu per copy, + for a total of 20 cores and 10 gpus + +Jobspec YAML + .. literalinclude:: data/spec_25/use_case_2.3.yaml + :language: yaml + +Use Case 2.4 + Run N tasks across M nodes, each task with 1 core and 1 gpu + +Specific Example + Run 16 copies of ``myapp`` across 4 nodes, each copy with + 1 core and 1 gpu + +Existing Equivalents + +-----------------------------------+-------------------------------------------+ + | Slurm | ``srun -n16 -N4 --gpus-per-task=1 myapp`` | + +-----------------------------------+-------------------------------------------+ + +Jobspec YAML + .. literalinclude:: data/spec_25/use_case_2.4.yaml + :language: yaml + + +Schema +~~~~~~ + +A jobspec conforming to version 1 of the language definition SHALL +adhere to the following ruleset, described using JSON Schema [#f2]_. + +.. literalinclude:: data/spec_25/schema.json + :language: json + +.. [#f1] `YAML Ain’t Markup Language (YAML) Version 1.1 `__, O. Ben-Kiki, C. Evans, B. Ingerson, 2004. + +.. [#f2] `JSON Schema: A Media Type for Describing JSON Documents `__; H. Andrews; 2018 From 8680493b7ef5729b6f9c558b730e090060342298 Mon Sep 17 00:00:00 2001 From: Stephen Herbein Date: Thu, 27 Feb 2020 17:21:58 -0800 Subject: [PATCH 6/8] rfc31: update jobspec v1 specification into v2 mainly designed to handle the flexibility provided by the new `flux run` and `flux submit` interfaces - relaxes restrictions on the ordering of resources, especially in regards to the slot (i.e., a node can be inside or outside a slot). - adds exclusive flag - adds per_resource count key for late-binding of tasks to resources - adds job dependencies Update examples to meet Jobspec V2 specification, including adding a new use case example to highlight the capabilities of V2. --- Makefile | 2 +- README.md | 1 + data/spec_31/example1.yaml | 2 +- data/spec_31/schema.json | 28 +++-- data/spec_31/use_case_1.1.yaml | 2 +- data/spec_31/use_case_1.2.yaml | 22 ++++ data/spec_31/use_case_1.3.yaml | 24 ++++ data/spec_31/use_case_2.1.yaml | 2 +- data/spec_31/use_case_2.2.yaml | 2 +- data/spec_31/use_case_2.3.yaml | 2 +- data/spec_31/use_case_2.4.yaml | 2 +- data/spec_31/use_case_2.5.yaml | 2 +- index.rst | 8 ++ spec_31.rst | 198 +++++++++++++++++++++------------ 14 files changed, 212 insertions(+), 85 deletions(-) create mode 100644 data/spec_31/use_case_1.2.yaml create mode 100644 data/spec_31/use_case_1.3.yaml diff --git a/Makefile b/Makefile index 748cb87b..19668cb7 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ SOURCEDIR = . BUILDDIR = _build # YAML Validation on these directories -SCHEMA_DIRS=data/spec_26 data/spec_14 +SCHEMA_DIRS=data/spec_31 data/spec_26 data/spec_14 # Put it first so that "make" without argument is like "make help". help: diff --git a/README.md b/README.md index bc6b3a02..3c10304c 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ Table of Contents - [27/Flux Resource Allocation Protocol Version 1](spec_27.rst) - [29/Hostlist Format](spec_29.rst) - [30/Job Urgency](spec_30.rst) +- [31/Job Specification Version 2](spec_31.rst) Build Instructions ------------------ diff --git a/data/spec_31/example1.yaml b/data/spec_31/example1.yaml index dfef75d5..f654856d 100644 --- a/data/spec_31/example1.yaml +++ b/data/spec_31/example1.yaml @@ -1,4 +1,4 @@ -version: 1 +version: 2 resources: - type: node count: 4 diff --git a/data/spec_31/schema.json b/data/spec_31/schema.json index e8c462c9..464392a7 100644 --- a/data/spec_31/schema.json +++ b/data/spec_31/schema.json @@ -1,9 +1,9 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://github.com/flux-framework/rfc/tree/master/data/spec_14/schema.json", - "title": "canonical-jobspec", + "$id": "http://github.com/flux-framework/rfc/tree/master/data/spec_31/schema.json", + "title": "jobspec-02", - "description": "Flux canonical jobspec", + "description": "Flux jobspec version 2", "definitions": { "complex_range": { @@ -38,7 +38,9 @@ "exclusive": { "type": "boolean" }, "with": { "type": "array", - "items": { "$ref": "#/definitions/resource_vertex" } + "items": { "$ref": "#/definitions/resource_vertex" }, + "minItems": 1, + "maxItems": 2 }, "id": { "type": "string" }, "unit": { "type": "string" }, @@ -64,7 +66,7 @@ { "$ref": "#/definitions/resource_vertex_base" }, { "properties": { - "type": { "not": { "enum": ["slot"] } } + "type": { "enum": ["node", "gpu", "core"] } } } ] @@ -82,12 +84,14 @@ "properties": { "version": { "description": "the jobspec version", - "type": "integer" + "type": "integer", + "enum": [2] }, "resources": { "description": "requested resources", "type": "array", "minItems": 1, + "maxItems": 1, "items": { "$ref": "#/definitions/resource_vertex" } }, "attributes": { @@ -128,7 +132,17 @@ "type": "object", "properties": { "per_slot": { "type": "integer", "minimum" : 1 }, - "total": { "type": "integer", "minimum" : 1 } + "total": { "type": "integer", "minimum" : 1 }, + "per_resource": { "type": "object", + "required": ["type", "count"], + "properties": { + "type": {"type": "string"}, + "count": { + "type": "integer", + "mininum" : 1 + } + } + } } }, "distribution": { "type": "string" }, diff --git a/data/spec_31/use_case_1.1.yaml b/data/spec_31/use_case_1.1.yaml index 8b3c177f..8858a07b 100644 --- a/data/spec_31/use_case_1.1.yaml +++ b/data/spec_31/use_case_1.1.yaml @@ -1,4 +1,4 @@ -version: 1 +version: 2 resources: - type: node count: 4 diff --git a/data/spec_31/use_case_1.2.yaml b/data/spec_31/use_case_1.2.yaml new file mode 100644 index 00000000..4c630e66 --- /dev/null +++ b/data/spec_31/use_case_1.2.yaml @@ -0,0 +1,22 @@ +version: 2 +resources: + - type: slot + count: 4 + label: default + with: + - type: node + count: 1 + with: + - type: core + count: 1 +tasks: + - command: [ "flux", "start" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_1.3.yaml b/data/spec_31/use_case_1.3.yaml new file mode 100644 index 00000000..1320b420 --- /dev/null +++ b/data/spec_31/use_case_1.3.yaml @@ -0,0 +1,24 @@ +version: 2 +resources: + - type: slot + count: 1 + label: default + with: + - type: node + count: {min: 1} + with: + - type: core + count: 120 +tasks: + - command: [ "flux", "start" ] + slot: default + count: + per_resource: + type: node + count: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_2.1.yaml b/data/spec_31/use_case_2.1.yaml index 40707c6d..366ce317 100644 --- a/data/spec_31/use_case_2.1.yaml +++ b/data/spec_31/use_case_2.1.yaml @@ -1,4 +1,4 @@ -version: 1 +version: 2 resources: - type: node count: 4 diff --git a/data/spec_31/use_case_2.2.yaml b/data/spec_31/use_case_2.2.yaml index aa8d56f1..4d3ebb7a 100644 --- a/data/spec_31/use_case_2.2.yaml +++ b/data/spec_31/use_case_2.2.yaml @@ -1,4 +1,4 @@ -version: 1 +version: 2 resources: - type: slot label: default diff --git a/data/spec_31/use_case_2.3.yaml b/data/spec_31/use_case_2.3.yaml index ec1d3bc7..75293e8a 100644 --- a/data/spec_31/use_case_2.3.yaml +++ b/data/spec_31/use_case_2.3.yaml @@ -1,4 +1,4 @@ -version: 1 +version: 2 resources: - type: slot count: 10 diff --git a/data/spec_31/use_case_2.4.yaml b/data/spec_31/use_case_2.4.yaml index 30f5aa2a..43992e45 100644 --- a/data/spec_31/use_case_2.4.yaml +++ b/data/spec_31/use_case_2.4.yaml @@ -1,4 +1,4 @@ -version: 1 +version: 2 resources: - type: node count: 4 diff --git a/data/spec_31/use_case_2.5.yaml b/data/spec_31/use_case_2.5.yaml index 6072aebb..c19d56fb 100644 --- a/data/spec_31/use_case_2.5.yaml +++ b/data/spec_31/use_case_2.5.yaml @@ -1,4 +1,4 @@ -version: 999 +version: 2 resources: - type: slot count: 1 diff --git a/index.rst b/index.rst index 9381edcd..ae0ffeb8 100644 --- a/index.rst +++ b/index.rst @@ -209,6 +209,13 @@ This specification describes the Flux implementation of the Hostlist Format This specification describes the Flux job urgency parameter. +:doc:`31/Job Specification Version 2 ` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Version 2 of the domain specific job specification language canonically defined +in RFC14. + + .. Each file must appear in a toctree .. toctree:: :hidden: @@ -241,3 +248,4 @@ This specification describes the Flux job urgency parameter. spec_27 spec_29 spec_30 + spec_31 diff --git a/spec_31.rst b/spec_31.rst index 5f455500..7e85fb33 100644 --- a/spec_31.rst +++ b/spec_31.rst @@ -1,15 +1,16 @@ -25/Job Specification Version 1 +31/Job Specification Version 2 ============================== A domain specific language based on YAML is defined to express the resource requirements and other attributes of one or more programs submitted to a Flux -instance for execution. This RFC describes the version 1 of jobspec, which +instance for execution. This RFC describes the version 2 of jobspec, which represents a request to run exactly one program. This version is a simplified version of the canonical jobspec format described in -:doc:`RFC 14 `. +:doc:`RFC 14 `. This version is an extended version of the V1 jobspec +format described in :doc:`RFC 25 `. -- Name: github.com/flux-framework/rfc/spec_25.rst +- Name: github.com/flux-framework/rfc/spec_31.rst - Editor: Stephen Herbein @@ -35,48 +36,57 @@ Related Standards - :doc:`20/Resource Set Specification Version 1 ` +- :doc:`25/Job Specification Version 1 ` + +- :doc:`26/Job Dependency Specification ` Goals ----- - Express the resource requirements of a program to the scheduler. -- Allow resource requirements to be expressed simply in terms of Nodes, CPUs, - and GPUs. +- Allow resource requirements to be expressed in terms of an expanded set of + resources versus those allowed in Jobspec V1. + + - Support the forms of jobspec produced by the `flux run` command. - Express program attributes such as arguments, run time, and task layout, to be considered by the program execution service (RFC 12) +- Express dependencies relative to other programs executing within + the same Flux instance. Overview -------- -This RFC describes the version 1 form of "jobspec", a domain specific language -based on YAML [#f1]_. The version 1 of jobspec SHALL consist of +This RFC describes the version 2 form of "jobspec", a domain specific language +based on YAML [#f1]_. The version 2 of jobspec SHALL consist of a single YAML document representing a reusable request to run -exactly one program. Hereafter, "jobspec" refers to the version 1 +exactly one program. Hereafter, "jobspec" refers to the version 2 form, and "canonical jobspec" refers to the canonical form. Jobspec Language Definition --------------------------- -A jobspec V1 YAML document SHALL consist of a dictionary +A jobspec V2 YAML document SHALL consist of a dictionary defining the resources, tasks and other attributes of a single program. The dictionary MUST contain the keys ``resources``, ``tasks``, ``attributes``, and ``version``. Each of the listed jobspec keys SHALL meet the form and requirements listed in detail in the sections below. For reference, a ruleset for -compliant jobspec V1 is provided in the **Schema** section below. +compliant jobspec V2 is provided in the **Schema** section below. Resources ~~~~~~~~~ -The value of the ``resources`` key SHALL be a strict list which MUST define either -``node`` or ``slot`` as the first and only resource. Each list element SHALL represent a -**resource vertex** (described below). +The value of the ``resources`` key SHALL be a strict list which MUST contain +exactly one resource. The list element SHALL represent a **resource vertex** +(described below). These keys and definitions are similar to those in Jobspec +V1. V2 adds the `exclusive` key and uses the canonical jobspec definition for +`count`, rather than the simplified definition used in V1. A resource vertex SHALL contain only the following keys: @@ -90,43 +100,38 @@ A resource vertex SHALL contain only the following keys: - label -The definitions of ``unit``, ``with``, and ``label`` SHALL match -those found in RFC14. The others are redefined and simplified to mean the -following: +- exclusive + +The definitions of ``count``, ``unit``, ``with``, ``label``, and ``exclusive`` +SHALL match those found in RFC14. The others are redefined and simplified to +mean the following: **type** The ``type`` key for a resource SHALL indicate the type of resource to be - matched. In V1, only four resource types are valid: [``node``, ``slot``, ``core``, - and ``gpu``]. ``slot`` types are described in the **Reserved Resource Types** section - below. + matched. In V2, only eight resource types are valid: [``node``, ``socket``, + ``core``, ``hardware-thread``, ``gpu``, ``memory``, ``storage``, and + ``slot``]. ``slot`` types are described in the :ref:`rfc14-reserved-resource-types`. -**count** - The ``count`` key SHALL indicate the desired number of - resources matching the current vertex. The ``count`` SHALL be a single integer - value representing a fixed count -V1-Specific Resource Graph Restrictions +V2-Specific Resource Graph Restrictions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In V1, the ``resources`` list MUST contain exactly one element, which MUST be -either ``node`` or ``slot``. Additionally, the resource graph MUST contain the -``core`` type. - -In V1, there are also restrictions on which resources can have ``out`` edges to -other resources. Specifically, a ``node`` can have an out edge to a ``slot``, and a -``slot`` can have an ``out`` edge to a ``core``. If a ``slot`` has an ``out`` edge to a -``core``, it can also, optionally, have an ``out`` edge to a ``gpu`` as -well. Therefore, the complete enumeration of valid resource graphs in V1 is: +In V2, the ``resources`` list MUST contain exactly one element. Additionally, +the resource graph MUST contain the ``slot`` type and either the ``core`` or the +``hardware-thread`` type. -- ``slot>core`` - -- ``node>slot>core`` - -- ``slot>(core,gpu)`` - -- ``node>slot>(core,gpu)`` +In V2, there are restrictions on the order in which resources can appear. The +*primary* compute resources MUST only appear in the following order (with +intermediate resources being optional): +``node->socket->core->hardware-thread``. The ``slot`` resource MUST appear +somewhere within that resource chain and have at least one *primary* compute +resource as a child. *Auxillary* resources (i.e., ``memory``, ``storage``, +``gpu``) can be siblings or children of any *primary* compute resource. For +example: ``(node->(socket->(core,gpu), memory), storage)``. +In V2, an ``exclusive`` key with a value of ``false`` SHALL NOT be included in a +``slot`` or any of its children. Tasks ~~~~~ @@ -144,16 +149,26 @@ definitions SHALL match those provided in RFC14: - per_slot + - per_resource + - total +- attributes + +- distribution + +These keys are the same as those in Jobspec V1 except for the addition of +`per_resource`, which enables the late-binding of tasks to resources (i.e., the +number of tasks is determined after the resource request is allocated by the +scheduler). Attributes ~~~~~~~~~~ -The ``attributes`` key SHALL be a dictionary of -dictionaries. The ``attributes`` dictionary MUST contain ``system`` key and MAY -contain the ``user`` key. Common ``system`` keys are listed below, and their -definitions can be found in RFC14. Values MAY have any valid YAML type. +The ``attributes`` key SHALL be a dictionary of dictionaries. The ``attributes`` +dictionary MUST contain ``system`` key and MAY contain the ``user`` key. Common +``system`` keys are listed below, and their definitions can be found in +RFC14. Values MAY have any valid YAML type. - user @@ -165,20 +180,21 @@ definitions can be found in RFC14. Values MAY have any valid YAML type. - cwd -Most system attributes are optional, but the ``duration`` attribute is required in -jobspec V1. + - dependencies +Most system attributes are optional, but the ``duration`` attribute is required +in jobspec V2. Example Jobspec ~~~~~~~~~~~~~~~ Under the description above, the following is an example of a fully compliant -version 1 jobspec. The example below declares a request for 4 "nodes" +version 2 jobspec. The example below declares a request for 4 "nodes" each of which with 1 task slot consisting of 2 cores each, for a total of 4 task slots. A single copy of the command ``app`` will be run on each task slot for a total of 4 tasks. -.. literalinclude:: data/spec_25/example1.yaml +.. literalinclude:: data/spec_31/example1.yaml :language: yaml @@ -204,23 +220,53 @@ Specific Example Request 4 nodes, each with 1 slot Existing Equivalents - +-----------------------------------+-----------------------------------+ - | Slurm | ``salloc -N4`` | - +-----------------------------------+-----------------------------------+ - | PBS | ``qsub -l nodes=4`` | - +-----------------------------------+-----------------------------------+ + +--------+-----------------------+ + | Slurm | ``salloc -N4`` | + +--------+-----------------------+ + | PBS | ``qsub -l nodes=4`` | + +--------+-----------------------+ Jobspec YAML - .. literalinclude:: data/spec_25/use_case_1.1.yaml + .. literalinclude:: data/spec_31/use_case_1.1.yaml :language: yaml +Use Case 1.2 + Request nodes inside of a slot + +Specific Example + Request 4 slots, each with 1 node + +Existing Equivalents + +---------+---------------------+ + | Slurm | ``salloc -N4`` | + +---------+---------------------+ + | PBS | ``qsub -l nodes=4`` | + +---------+---------------------+ + +Jobspec YAML + .. literalinclude:: data/spec_31/use_case_1.2.yaml + :language: yaml + + +Use Case 1.3 + Request a fixed number of cores with no constraint on nodes + +Specific Example + Request 120 cores, one broker per node + +Jobspec YAML + .. literalinclude:: data/spec_31/use_case_1.3.yaml + :language: yaml + + Section 2: General Requests ~~~~~~~~~~~~~~~~~~~~~~~~~~~ The following use cases are more general and include more complex slot placement and task counts. + Use Case 2.1 Run N tasks across M nodes, unequal distribution @@ -229,12 +275,12 @@ Specific Example default distribution Existing Equivalents - +-----------------------------------+-----------------------------------+ - | Slurm | ``srun -n5 -N4 hostname`` | - +-----------------------------------+-----------------------------------+ + +--------+----------------------------+ + | Slurm | ``srun -n5 -N4 hostname`` | + +--------+----------------------------+ Jobspec YAML - .. literalinclude:: data/spec_25/use_case_2.1.yaml + .. literalinclude:: data/spec_31/use_case_2.1.yaml :language: yaml Use Case 2.2 @@ -245,12 +291,12 @@ Specific Example for a total of 20 cores Existing Equivalents - +-----------------------------------+-----------------------------------+ - | Slurm | ``srun -n10 -c 2 myapp`` | - +-----------------------------------+-----------------------------------+ + +--------+---------------------------+ + | Slurm | ``srun -n10 -c 2 myapp`` | + +--------+---------------------------+ Jobspec YAML - .. literalinclude:: data/spec_25/use_case_2.2.yaml + .. literalinclude:: data/spec_31/use_case_2.2.yaml :language: yaml Use Case 2.3 @@ -261,7 +307,7 @@ Specific Example for a total of 20 cores and 10 gpus Jobspec YAML - .. literalinclude:: data/spec_25/use_case_2.3.yaml + .. literalinclude:: data/spec_31/use_case_2.3.yaml :language: yaml Use Case 2.4 @@ -272,22 +318,34 @@ Specific Example 1 core and 1 gpu Existing Equivalents - +-----------------------------------+-------------------------------------------+ - | Slurm | ``srun -n16 -N4 --gpus-per-task=1 myapp`` | - +-----------------------------------+-------------------------------------------+ + +-------+-------------------------------------------+ + | Slurm | ``srun -n16 -N4 --gpus-per-task=1 myapp`` | + +-------+-------------------------------------------+ Jobspec YAML - .. literalinclude:: data/spec_25/use_case_2.4.yaml + .. literalinclude:: data/spec_31/use_case_2.4.yaml :language: yaml +Use Case 2.5 + Specify dependencies + +Specific Example + Depend on two previously submitted jobs. The first job’s + Flux ID (fluid) is known (``hungry-hippo-white-elephant``). The second job’s + fluid is not known but its ``out`` dependency (``foo``) is known. Also provide an + ``out`` dependency (``bar``) that other jobs can depend on. + +Jobspec YAML + .. literalinclude:: data/spec_31/use_case_2.5.yaml + :language: yaml Schema ~~~~~~ -A jobspec conforming to version 1 of the language definition SHALL +A jobspec conforming to version 2 of the language definition SHALL adhere to the following ruleset, described using JSON Schema [#f2]_. -.. literalinclude:: data/spec_25/schema.json +.. literalinclude:: data/spec_31/schema.json :language: json .. [#f1] `YAML Ain’t Markup Language (YAML) Version 1.1 `__, O. Ben-Kiki, C. Evans, B. Ingerson, 2004. From 20fc45ddedf3b3a6daa12d5626c2e4b648440518 Mon Sep 17 00:00:00 2001 From: Stephen Herbein Date: Thu, 27 Feb 2020 20:46:24 -0800 Subject: [PATCH 7/8] rfc14,25,31: switch use-case X.Y headers to rst titles improves the separation between use-cases by making the use-case headers distinct from the headers for "existing examples" and "jobspec YAML" --- spec_14.rst | 45 ++++++++++++++++++++++++++++++--------------- spec_25.rst | 15 ++++++++++----- spec_31.rst | 33 ++++++++++++++++++++------------- 3 files changed, 60 insertions(+), 33 deletions(-) diff --git a/spec_14.rst b/spec_14.rst index 7dc994ae..1b6e30cd 100644 --- a/spec_14.rst +++ b/spec_14.rst @@ -409,7 +409,8 @@ requests are similar to existing resource manager batch job submission or allocation requests, i.e. equivalent to ``oarsub``, ``qsub``, and ``salloc``. Use Case 1.1 - Request Single Resource with Count +^^^^^^^^^^^^ +Request Single Resource with Count Specific Example Request 4 nodes @@ -426,7 +427,8 @@ Jobspec YAML :language: yaml Use Case 1.2 - Request a range of a type of resource +^^^^^^^^^^^^ +Request a range of a type of resource Specific Example Request between 3 and 30 nodes @@ -441,7 +443,8 @@ Jobspec YAML :language: yaml Use Case 1.3 - Request M nodes with a minimum number of sockets per node +^^^^^^^^^^^^ +Request M nodes with a minimum number of sockets per node and cores per socket Specific Example @@ -462,7 +465,8 @@ Jobspec YAML :language: yaml Use Case 1.4 - Exclusively allocate nodes, while constraining cores and +^^^^^^^^^^^^ +Exclusively allocate nodes, while constraining cores and sockets. Specific Example @@ -474,7 +478,8 @@ Jobspec YAML :language: yaml Use Case 1.5 - Complex example from OAR +^^^^^^^^^^^^ +Complex example from OAR Specific Example ask for 1 core on 2 nodes on the same cluster with 4096 GB of memory @@ -494,7 +499,8 @@ Jobspec YAML :language: yaml Use Case 1.6 - Request resources across multiple clusters +^^^^^^^^^^^^ +Request resources across multiple clusters Specific Example Ask for 30 cores on 2 clusters (total = 60 cores), with 1 flux broker launched per node @@ -504,7 +510,8 @@ Jobspec YAML :language: yaml Use Case 1.7 - Request N cores across M switches +^^^^^^^^^^^^ +Request N cores across M switches Specific Example Request 3 cores across 3 switches, with 1 flux broker launched per node @@ -525,7 +532,8 @@ The following use cases are more general and include more complex slot placement and task counts. Use Case 2.1 - Run N tasks across M nodes +^^^^^^^^^^^^ +Run N tasks across M nodes Specific Example Run ``hostname`` 20 times on 4 nodes, 5 per node @@ -543,7 +551,8 @@ Jobspec YAML :language: yaml Use Case 2.2 - Run N tasks across M nodes, unequal distribution +^^^^^^^^^^^^ +Run N tasks across M nodes, unequal distribution Specific Example Run 5 copies of ``hostname`` across 4 nodes, @@ -559,7 +568,8 @@ Jobspec YAML :language: yaml Use Case 2.3 - Run N tasks, Require M cores per task +^^^^^^^^^^^^ +Run N tasks, Require M cores per task Specific Example Run 10 copies of ``myapp``, require 2 cores per copy, @@ -575,7 +585,8 @@ Jobspec YAML :language: yaml Use Case 2.4 - Run different binaries with differing resource +^^^^^^^^^^^^ +Run different binaries with differing resource requirements as single program Specific Example @@ -590,7 +601,8 @@ Jobspec YAML :language: yaml Use Case 2.5 - Run command requesting minimum amount of RAM per core +^^^^^^^^^^^^ +Run command requesting minimum amount of RAM per core Specific Example Run 10 copies of ``app`` across 10 cores with at least 2GB per core @@ -605,7 +617,8 @@ Jobspec YAML :language: yaml Use Case 2.6 - Run N copies of a command with minimum amount of RAM per node +^^^^^^^^^^^^ +Run N copies of a command with minimum amount of RAM per node Specific Example Run 10 copies of ``app`` across 2 nodes with at least 4GB per node @@ -622,7 +635,8 @@ Jobspec YAML :language: yaml Use Case 2.7 - Override the global environment +^^^^^^^^^^^^ +Override the global environment Specific Example Run two different tasks, one with the global environment and one with an @@ -633,7 +647,8 @@ Jobspec YAML :language: yaml Use Case 2.8 - Specify dependencies +^^^^^^^^^^^^ +Specify dependencies Specific Example Depend on two previously submitted jobs. The first job’s diff --git a/spec_25.rst b/spec_25.rst index 1bb9573e..bfd24143 100644 --- a/spec_25.rst +++ b/spec_25.rst @@ -201,7 +201,8 @@ requests are similar to existing resource manager batch job submission or allocation requests, i.e. equivalent to ``oarsub``, ``qsub``, and ``salloc``. Use Case 1.1 - Request nodes outside of a slot +^^^^^^^^^^^^ +Request nodes outside of a slot Specific Example Request 4 nodes, each with 1 slot @@ -225,7 +226,8 @@ The following use cases are more general and include more complex slot placement and task counts. Use Case 2.1 - Run N tasks across M nodes, unequal distribution +^^^^^^^^^^^^ +Run N tasks across M nodes, unequal distribution Specific Example Run 5 copies of ``hostname`` across 4 nodes, @@ -241,7 +243,8 @@ Jobspec YAML :language: yaml Use Case 2.2 - Run N tasks, Require M cores per task +^^^^^^^^^^^^ +Run N tasks, Require M cores per task Specific Example Run 10 copies of ``myapp``, require 2 cores per copy, @@ -257,7 +260,8 @@ Jobspec YAML :language: yaml Use Case 2.3 - Run N tasks, Require M cores and J gpus per task +^^^^^^^^^^^^ +Run N tasks, Require M cores and J gpus per task Specific Example Run 10 copies of ``myapp``, require 2 cores and 1 gpu per copy, @@ -268,7 +272,8 @@ Jobspec YAML :language: yaml Use Case 2.4 - Run N tasks across M nodes, each task with 1 core and 1 gpu +^^^^^^^^^^^^ +Run N tasks across M nodes, each task with 1 core and 1 gpu Specific Example Run 16 copies of ``myapp`` across 4 nodes, each copy with diff --git a/spec_31.rst b/spec_31.rst index 7e85fb33..00aaf8b9 100644 --- a/spec_31.rst +++ b/spec_31.rst @@ -78,7 +78,6 @@ Each of the listed jobspec keys SHALL meet the form and requirements listed in detail in the sections below. For reference, a ruleset for compliant jobspec V2 is provided in the **Schema** section below. - Resources ~~~~~~~~~ @@ -214,7 +213,8 @@ requests are similar to existing resource manager batch job submission or allocation requests, i.e. equivalent to ``oarsub``, ``qsub``, and ``salloc``. Use Case 1.1 - Request nodes outside of a slot +^^^^^^^^^^^^ +Request nodes outside of a slot Specific Example Request 4 nodes, each with 1 slot @@ -232,7 +232,8 @@ Jobspec YAML Use Case 1.2 - Request nodes inside of a slot +^^^^^^^^^^^^ + Request nodes inside of a slot Specific Example Request 4 slots, each with 1 node @@ -250,7 +251,8 @@ Jobspec YAML Use Case 1.3 - Request a fixed number of cores with no constraint on nodes +^^^^^^^^^^^^ + Request a fixed number of cores with no constraint on nodes Specific Example Request 120 cores, one broker per node @@ -268,7 +270,8 @@ and task counts. Use Case 2.1 - Run N tasks across M nodes, unequal distribution +^^^^^^^^^^^^ +Run N tasks across M nodes, unequal distribution Specific Example Run 5 copies of ``hostname`` across 4 nodes, @@ -283,8 +286,9 @@ Jobspec YAML .. literalinclude:: data/spec_31/use_case_2.1.yaml :language: yaml -Use Case 2.2 - Run N tasks, Require M cores per task +Use Case 2.3 +^^^^^^^^^^^^ +Run N tasks, Require M cores per task Specific Example Run 10 copies of ``myapp``, require 2 cores per copy, @@ -299,8 +303,9 @@ Jobspec YAML .. literalinclude:: data/spec_31/use_case_2.2.yaml :language: yaml -Use Case 2.3 - Run N tasks, Require M cores and J gpus per task +Use Case 2.4 +^^^^^^^^^^^^ +Run N tasks, Require M cores and J gpus per task Specific Example Run 10 copies of ``myapp``, require 2 cores and 1 gpu per copy, @@ -310,8 +315,9 @@ Jobspec YAML .. literalinclude:: data/spec_31/use_case_2.3.yaml :language: yaml -Use Case 2.4 - Run N tasks across M nodes, each task with 1 core and 1 gpu +Use Case 2.5 +^^^^^^^^^^^^ +Run N tasks across M nodes, each task with 1 core and 1 gpu Specific Example Run 16 copies of ``myapp`` across 4 nodes, each copy with @@ -326,8 +332,9 @@ Jobspec YAML .. literalinclude:: data/spec_31/use_case_2.4.yaml :language: yaml -Use Case 2.5 - Specify dependencies +Use Case 2.6 +^^^^^^^^^^^^ +Specify dependencies Specific Example Depend on two previously submitted jobs. The first job’s From def0a386438445ea5886863acdfb40fe0289f6d1 Mon Sep 17 00:00:00 2001 From: Stephen Herbein Date: Tue, 23 Feb 2021 15:19:32 -0800 Subject: [PATCH 8/8] RFC14,25: fix reference to the "Reserved Resource Types" Problem: RFC25 references a section that only exists in the canonical jobspec specification (RFC14). Solution: make an explicit label for the section and reference it from RFC25 with `:ref:`. --- spec_14.rst | 2 ++ spec_25.rst | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/spec_14.rst b/spec_14.rst index 1b6e30cd..51f32aa0 100644 --- a/spec_14.rst +++ b/spec_14.rst @@ -220,6 +220,8 @@ following keys matching resource identifiers. +.. _rfc14-reserved-resource-types: + Reserved Resource Types ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/spec_25.rst b/spec_25.rst index bfd24143..ea4851ab 100644 --- a/spec_25.rst +++ b/spec_25.rst @@ -100,8 +100,7 @@ following: **type** The ``type`` key for a resource SHALL indicate the type of resource to be matched. In V1, only four resource types are valid: [``node``, ``slot``, ``core``, - and ``gpu``]. ``slot`` types are described in the **Reserved Resource Types** section - below. + and ``gpu``]. ``slot`` types are described in the :ref:`rfc14-reserved-resource-types`. **count** The ``count`` key SHALL indicate the desired number of