diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..e35d8850 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +_build diff --git a/Makefile b/Makefile index b708c60d..19668cb7 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ SOURCEDIR = . BUILDDIR = _build # YAML Validation on these directories -SCHEMA_DIRS=data/spec_26 data/spec_14 +SCHEMA_DIRS=data/spec_31 data/spec_26 data/spec_14 # Put it first so that "make" without argument is like "make help". help: @@ -26,7 +26,7 @@ check: $(SCHEMA_DIRS) spelling ./indexcheck spec_*.rst $(SCHEMA_DIRS): - python ./validate.py --schema=$@/schema.json $@/*.yaml + python3 ./validate.py --schema=$@/schema.json $@/*.yaml spelling: @$(SPHINXBUILD) -W -b spelling "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/README.md b/README.md index bc6b3a02..3c10304c 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ Table of Contents - [27/Flux Resource Allocation Protocol Version 1](spec_27.rst) - [29/Hostlist Format](spec_29.rst) - [30/Job Urgency](spec_30.rst) +- [31/Job Specification Version 2](spec_31.rst) Build Instructions ------------------ diff --git a/data/spec_14/schema.json b/data/spec_14/schema.json index e8c462c9..3a0561cc 100644 --- a/data/spec_14/schema.json +++ b/data/spec_14/schema.json @@ -128,7 +128,17 @@ "type": "object", "properties": { "per_slot": { "type": "integer", "minimum" : 1 }, - "total": { "type": "integer", "minimum" : 1 } + "total": { "type": "integer", "minimum" : 1 }, + "per_resource": { "type": "object", + "required": ["type", "count"], + "properties": { + "type": {"type": "string"}, + "count": { + "type": "integer", + "mininum" : 1 + } + } + } } }, "distribution": { "type": "string" }, diff --git a/data/spec_31/example1.yaml b/data/spec_31/example1.yaml new file mode 100644 index 00000000..f654856d --- /dev/null +++ b/data/spec_31/example1.yaml @@ -0,0 +1,22 @@ +version: 2 +resources: + - type: node + count: 4 + with: + - type: slot + count: 1 + label: default + with: + - type: core + count: 2 +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/schema.json b/data/spec_31/schema.json new file mode 100644 index 00000000..464392a7 --- /dev/null +++ b/data/spec_31/schema.json @@ -0,0 +1,161 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://github.com/flux-framework/rfc/tree/master/data/spec_31/schema.json", + "title": "jobspec-02", + + "description": "Flux jobspec version 2", + + "definitions": { + "complex_range": { + "description": "a complex range of numbers", + "type": "object", + "properties":{ + "min": { "type": "integer", "minimum" : 1 }, + "max": { "type": "integer", "minimum" : 1 }, + "operator": { "type": "string", "enum": ["+", "*", "^"] }, + "operand": { "type": "integer", "minimum" : 1 } + }, + "required": ["min"], + "dependencies": { + "max": { "required": ["operator", "operand"] }, + "operator": { "required": ["max", "operand"] }, + "operand": { "required": ["max", "operator"] } + }, + "additionalProperties": false + }, + "resource_vertex_base": { + "description": "base schema for slot/other resource vertex", + "type": "object", + "required": ["type", "count"], + "properties": { + "type": { "type": "string" }, + "count": { + "oneOf": [ + { "type": "integer", "minimum" : 1 }, + { "$ref": "#/definitions/complex_range" } + ] + }, + "exclusive": { "type": "boolean" }, + "with": { + "type": "array", + "items": { "$ref": "#/definitions/resource_vertex" }, + "minItems": 1, + "maxItems": 2 + }, + "id": { "type": "string" }, + "unit": { "type": "string" }, + "label": { "type": "string" } + }, + "additionalProperties": false + }, + "resource_vertex_slot": { + "description": "special slot resource type - label assigns to task slot", + "allOf": [ + { "$ref": "#/definitions/resource_vertex_base" }, + { + "properties": { + "type": { "enum": ["slot"] } + }, + "required": ["label"] + } + ] + }, + "resource_vertex_other": { + "description": "other (non-slot) resource type", + "allOf": [ + { "$ref": "#/definitions/resource_vertex_base" }, + { + "properties": { + "type": { "enum": ["node", "gpu", "core"] } + } + } + ] + }, + "resource_vertex": { + "oneOf":[ + { "$ref": "#/definitions/resource_vertex_slot" }, + { "$ref": "#/definitions/resource_vertex_other" } + ] + } + }, + + "type": "object", + "required": ["version", "resources", "attributes", "tasks"], + "properties": { + "version": { + "description": "the jobspec version", + "type": "integer", + "enum": [2] + }, + "resources": { + "description": "requested resources", + "type": "array", + "minItems": 1, + "maxItems": 1, + "items": { "$ref": "#/definitions/resource_vertex" } + }, + "attributes": { + "description": "system and user attributes", + "type": ["object", "null"], + "properties": { + "system": { + "type": "object", + "properties": { + "duration": { "type": "number", "minimum": 0 }, + "cwd": { "type": "string" }, + "environment": { "type": "object" }, + "dependencies" : { + "$ref": "file:data/spec_26/schema.json" + } + } + }, + "user": { + "type": "object" + } + }, + "additionalProperties": false + }, + "tasks": { + "description": "task configuration", + "type": "array", + "items": { + "type": "object", + "required": ["command", "slot", "count" ], + "properties": { + "command": { + "type": "array", + "minItems": 1, + "items": { "type": "string" } + }, + "slot": { "type": "string" }, + "count": { + "type": "object", + "properties": { + "per_slot": { "type": "integer", "minimum" : 1 }, + "total": { "type": "integer", "minimum" : 1 }, + "per_resource": { "type": "object", + "required": ["type", "count"], + "properties": { + "type": {"type": "string"}, + "count": { + "type": "integer", + "mininum" : 1 + } + } + } + } + }, + "distribution": { "type": "string" }, + "attributes": { + "type": "object", + "properties": { + "environment": { "type" : "object"} + }, + "additionalProperties": { "type": "string" } + } + }, + "additionalProperties": false + } + } + } +} diff --git a/data/spec_31/use_case_1.1.yaml b/data/spec_31/use_case_1.1.yaml new file mode 100644 index 00000000..8858a07b --- /dev/null +++ b/data/spec_31/use_case_1.1.yaml @@ -0,0 +1,22 @@ +version: 2 +resources: + - type: node + count: 4 + with: + - type: slot + count: 1 + label: default + with: + - type: core + count: 1 +tasks: + - command: [ "flux", "start" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_1.2.yaml b/data/spec_31/use_case_1.2.yaml new file mode 100644 index 00000000..4c630e66 --- /dev/null +++ b/data/spec_31/use_case_1.2.yaml @@ -0,0 +1,22 @@ +version: 2 +resources: + - type: slot + count: 4 + label: default + with: + - type: node + count: 1 + with: + - type: core + count: 1 +tasks: + - command: [ "flux", "start" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_1.3.yaml b/data/spec_31/use_case_1.3.yaml new file mode 100644 index 00000000..1320b420 --- /dev/null +++ b/data/spec_31/use_case_1.3.yaml @@ -0,0 +1,24 @@ +version: 2 +resources: + - type: slot + count: 1 + label: default + with: + - type: node + count: {min: 1} + with: + - type: core + count: 120 +tasks: + - command: [ "flux", "start" ] + slot: default + count: + per_resource: + type: node + count: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_2.1.yaml b/data/spec_31/use_case_2.1.yaml new file mode 100644 index 00000000..366ce317 --- /dev/null +++ b/data/spec_31/use_case_2.1.yaml @@ -0,0 +1,22 @@ +version: 2 +resources: + - type: node + count: 4 + with: + - type: slot + count: 1 + label: myslot + with: + - type: core + count: 1 +tasks: + - command: [ "hostname" ] + slot: myslot + count: + total: 5 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_2.2.yaml b/data/spec_31/use_case_2.2.yaml new file mode 100644 index 00000000..4d3ebb7a --- /dev/null +++ b/data/spec_31/use_case_2.2.yaml @@ -0,0 +1,19 @@ +version: 2 +resources: + - type: slot + label: default + count: 10 + with: + - type: core + count: 2 +tasks: + - command: [ "myapp" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_2.3.yaml b/data/spec_31/use_case_2.3.yaml new file mode 100644 index 00000000..75293e8a --- /dev/null +++ b/data/spec_31/use_case_2.3.yaml @@ -0,0 +1,21 @@ +version: 2 +resources: + - type: slot + count: 10 + label: default + with: + - type: core + count: 2 + - type: gpu + count: 1 +tasks: + - command: [ "myapp" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_2.4.yaml b/data/spec_31/use_case_2.4.yaml new file mode 100644 index 00000000..43992e45 --- /dev/null +++ b/data/spec_31/use_case_2.4.yaml @@ -0,0 +1,24 @@ +version: 2 +resources: + - type: node + count: 4 + with: + - type: slot + count: 4 + label: default + with: + - type: core + count: 1 + - type: gpu + count: 1 +tasks: + - command: [ "myapp" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + environment: + HOME: "/home/flux" diff --git a/data/spec_31/use_case_2.5.yaml b/data/spec_31/use_case_2.5.yaml new file mode 100644 index 00000000..c19d56fb --- /dev/null +++ b/data/spec_31/use_case_2.5.yaml @@ -0,0 +1,30 @@ +version: 2 +resources: + - type: slot + count: 1 + label: default + with: + - type: node + count: 1 +tasks: + - command: [ "flux", "start" ] + slot: default + count: + per_slot: 1 +attributes: + system: + duration: 3600. + cwd: "/home/flux" + dependencies: + - type: in + scope: user + scheme: fluid + value: hungry-hippo-white-elephant + - type: in + scope: user + scheme: string + value: foo + - type: out + scope: user + scheme: string + value: bar diff --git a/index.rst b/index.rst index 9381edcd..ae0ffeb8 100644 --- a/index.rst +++ b/index.rst @@ -209,6 +209,13 @@ This specification describes the Flux implementation of the Hostlist Format This specification describes the Flux job urgency parameter. +:doc:`31/Job Specification Version 2 ` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Version 2 of the domain specific job specification language canonically defined +in RFC14. + + .. Each file must appear in a toctree .. toctree:: :hidden: @@ -241,3 +248,4 @@ This specification describes the Flux job urgency parameter. spec_27 spec_29 spec_30 + spec_31 diff --git a/spec_14.rst b/spec_14.rst index 7dc994ae..51f32aa0 100644 --- a/spec_14.rst +++ b/spec_14.rst @@ -220,6 +220,8 @@ following keys matching resource identifiers. +.. _rfc14-reserved-resource-types: + Reserved Resource Types ^^^^^^^^^^^^^^^^^^^^^^^ @@ -409,7 +411,8 @@ requests are similar to existing resource manager batch job submission or allocation requests, i.e. equivalent to ``oarsub``, ``qsub``, and ``salloc``. Use Case 1.1 - Request Single Resource with Count +^^^^^^^^^^^^ +Request Single Resource with Count Specific Example Request 4 nodes @@ -426,7 +429,8 @@ Jobspec YAML :language: yaml Use Case 1.2 - Request a range of a type of resource +^^^^^^^^^^^^ +Request a range of a type of resource Specific Example Request between 3 and 30 nodes @@ -441,7 +445,8 @@ Jobspec YAML :language: yaml Use Case 1.3 - Request M nodes with a minimum number of sockets per node +^^^^^^^^^^^^ +Request M nodes with a minimum number of sockets per node and cores per socket Specific Example @@ -462,7 +467,8 @@ Jobspec YAML :language: yaml Use Case 1.4 - Exclusively allocate nodes, while constraining cores and +^^^^^^^^^^^^ +Exclusively allocate nodes, while constraining cores and sockets. Specific Example @@ -474,7 +480,8 @@ Jobspec YAML :language: yaml Use Case 1.5 - Complex example from OAR +^^^^^^^^^^^^ +Complex example from OAR Specific Example ask for 1 core on 2 nodes on the same cluster with 4096 GB of memory @@ -494,7 +501,8 @@ Jobspec YAML :language: yaml Use Case 1.6 - Request resources across multiple clusters +^^^^^^^^^^^^ +Request resources across multiple clusters Specific Example Ask for 30 cores on 2 clusters (total = 60 cores), with 1 flux broker launched per node @@ -504,7 +512,8 @@ Jobspec YAML :language: yaml Use Case 1.7 - Request N cores across M switches +^^^^^^^^^^^^ +Request N cores across M switches Specific Example Request 3 cores across 3 switches, with 1 flux broker launched per node @@ -525,7 +534,8 @@ The following use cases are more general and include more complex slot placement and task counts. Use Case 2.1 - Run N tasks across M nodes +^^^^^^^^^^^^ +Run N tasks across M nodes Specific Example Run ``hostname`` 20 times on 4 nodes, 5 per node @@ -543,7 +553,8 @@ Jobspec YAML :language: yaml Use Case 2.2 - Run N tasks across M nodes, unequal distribution +^^^^^^^^^^^^ +Run N tasks across M nodes, unequal distribution Specific Example Run 5 copies of ``hostname`` across 4 nodes, @@ -559,7 +570,8 @@ Jobspec YAML :language: yaml Use Case 2.3 - Run N tasks, Require M cores per task +^^^^^^^^^^^^ +Run N tasks, Require M cores per task Specific Example Run 10 copies of ``myapp``, require 2 cores per copy, @@ -575,7 +587,8 @@ Jobspec YAML :language: yaml Use Case 2.4 - Run different binaries with differing resource +^^^^^^^^^^^^ +Run different binaries with differing resource requirements as single program Specific Example @@ -590,7 +603,8 @@ Jobspec YAML :language: yaml Use Case 2.5 - Run command requesting minimum amount of RAM per core +^^^^^^^^^^^^ +Run command requesting minimum amount of RAM per core Specific Example Run 10 copies of ``app`` across 10 cores with at least 2GB per core @@ -605,7 +619,8 @@ Jobspec YAML :language: yaml Use Case 2.6 - Run N copies of a command with minimum amount of RAM per node +^^^^^^^^^^^^ +Run N copies of a command with minimum amount of RAM per node Specific Example Run 10 copies of ``app`` across 2 nodes with at least 4GB per node @@ -622,7 +637,8 @@ Jobspec YAML :language: yaml Use Case 2.7 - Override the global environment +^^^^^^^^^^^^ +Override the global environment Specific Example Run two different tasks, one with the global environment and one with an @@ -633,7 +649,8 @@ Jobspec YAML :language: yaml Use Case 2.8 - Specify dependencies +^^^^^^^^^^^^ +Specify dependencies Specific Example Depend on two previously submitted jobs. The first job’s diff --git a/spec_25.rst b/spec_25.rst index a7d68c73..ea4851ab 100644 --- a/spec_25.rst +++ b/spec_25.rst @@ -58,7 +58,7 @@ This RFC describes the version 1 form of "jobspec", a domain specific language based on YAML [#f1]_. The version 1 of jobspec SHALL consist of a single YAML document representing a reusable request to run exactly one program. Hereafter, "jobspec" refers to the version 1 -form, and "non-canonical jobspec" refers to the non-canonical form. +form, and "canonical jobspec" refers to the canonical form. Jobspec Language Definition @@ -100,8 +100,7 @@ following: **type** The ``type`` key for a resource SHALL indicate the type of resource to be matched. In V1, only four resource types are valid: [``node``, ``slot``, ``core``, - and ``gpu``]. ``slot`` types are described in the **Reserved Resource Types** section - below. + and ``gpu``]. ``slot`` types are described in the :ref:`rfc14-reserved-resource-types`. **count** The ``count`` key SHALL indicate the desired number of @@ -201,7 +200,8 @@ requests are similar to existing resource manager batch job submission or allocation requests, i.e. equivalent to ``oarsub``, ``qsub``, and ``salloc``. Use Case 1.1 - Request nodes outside of a slot +^^^^^^^^^^^^ +Request nodes outside of a slot Specific Example Request 4 nodes, each with 1 slot @@ -225,7 +225,8 @@ The following use cases are more general and include more complex slot placement and task counts. Use Case 2.1 - Run N tasks across M nodes, unequal distribution +^^^^^^^^^^^^ +Run N tasks across M nodes, unequal distribution Specific Example Run 5 copies of ``hostname`` across 4 nodes, @@ -241,7 +242,8 @@ Jobspec YAML :language: yaml Use Case 2.2 - Run N tasks, Require M cores per task +^^^^^^^^^^^^ +Run N tasks, Require M cores per task Specific Example Run 10 copies of ``myapp``, require 2 cores per copy, @@ -257,7 +259,8 @@ Jobspec YAML :language: yaml Use Case 2.3 - Run N tasks, Require M cores and J gpus per task +^^^^^^^^^^^^ +Run N tasks, Require M cores and J gpus per task Specific Example Run 10 copies of ``myapp``, require 2 cores and 1 gpu per copy, @@ -268,7 +271,8 @@ Jobspec YAML :language: yaml Use Case 2.4 - Run N tasks across M nodes, each task with 1 core and 1 gpu +^^^^^^^^^^^^ +Run N tasks across M nodes, each task with 1 core and 1 gpu Specific Example Run 16 copies of ``myapp`` across 4 nodes, each copy with diff --git a/spec_31.rst b/spec_31.rst new file mode 100644 index 00000000..00aaf8b9 --- /dev/null +++ b/spec_31.rst @@ -0,0 +1,360 @@ + +31/Job Specification Version 2 +============================== + +A domain specific language based on YAML is defined to express the resource +requirements and other attributes of one or more programs submitted to a Flux +instance for execution. This RFC describes the version 2 of jobspec, which +represents a request to run exactly one program. This version is a simplified +version of the canonical jobspec format described in +:doc:`RFC 14 `. This version is an extended version of the V1 jobspec +format described in :doc:`RFC 25 `. + +- Name: github.com/flux-framework/rfc/spec_31.rst + +- Editor: Stephen Herbein + +- State: raw + + +Language +-------- + +The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", +"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to +be interpreted as described in `RFC 2119 `__. + + +Related Standards +----------------- + +- :doc:`4/Flux Resource Model ` + +- :doc:`8/Flux Task and Program Execution Services ` + +- :doc:`14/Canonical Job Specification ` + +- :doc:`20/Resource Set Specification Version 1 ` + +- :doc:`25/Job Specification Version 1 ` + +- :doc:`26/Job Dependency Specification ` + +Goals +----- + +- Express the resource requirements of a program to the scheduler. + +- Allow resource requirements to be expressed in terms of an expanded set of + resources versus those allowed in Jobspec V1. + + - Support the forms of jobspec produced by the `flux run` command. + +- Express program attributes such as arguments, run time, and + task layout, to be considered by the program execution service (RFC 12) + +- Express dependencies relative to other programs executing within + the same Flux instance. + +Overview +-------- + +This RFC describes the version 2 form of "jobspec", a domain specific language +based on YAML [#f1]_. The version 2 of jobspec SHALL consist of +a single YAML document representing a reusable request to run +exactly one program. Hereafter, "jobspec" refers to the version 2 +form, and "canonical jobspec" refers to the canonical form. + + +Jobspec Language Definition +--------------------------- + +A jobspec V2 YAML document SHALL consist of a dictionary +defining the resources, tasks and other attributes of a single +program. The dictionary MUST contain the keys ``resources``, ``tasks``, +``attributes``, and ``version``. + +Each of the listed jobspec keys SHALL meet the form and requirements +listed in detail in the sections below. For reference, a ruleset for +compliant jobspec V2 is provided in the **Schema** section below. + +Resources +~~~~~~~~~ + +The value of the ``resources`` key SHALL be a strict list which MUST contain +exactly one resource. The list element SHALL represent a **resource vertex** +(described below). These keys and definitions are similar to those in Jobspec +V1. V2 adds the `exclusive` key and uses the canonical jobspec definition for +`count`, rather than the simplified definition used in V1. + +A resource vertex SHALL contain only the following keys: + +- type + +- count + +- unit + +- with + +- label + +- exclusive + +The definitions of ``count``, ``unit``, ``with``, ``label``, and ``exclusive`` +SHALL match those found in RFC14. The others are redefined and simplified to +mean the following: + +**type** + The ``type`` key for a resource SHALL indicate the type of resource to be + matched. In V2, only eight resource types are valid: [``node``, ``socket``, + ``core``, ``hardware-thread``, ``gpu``, ``memory``, ``storage``, and + ``slot``]. ``slot`` types are described in the :ref:`rfc14-reserved-resource-types`. + + + +V2-Specific Resource Graph Restrictions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In V2, the ``resources`` list MUST contain exactly one element. Additionally, +the resource graph MUST contain the ``slot`` type and either the ``core`` or the +``hardware-thread`` type. + +In V2, there are restrictions on the order in which resources can appear. The +*primary* compute resources MUST only appear in the following order (with +intermediate resources being optional): +``node->socket->core->hardware-thread``. The ``slot`` resource MUST appear +somewhere within that resource chain and have at least one *primary* compute +resource as a child. *Auxillary* resources (i.e., ``memory``, ``storage``, +``gpu``) can be siblings or children of any *primary* compute resource. For +example: ``(node->(socket->(core,gpu), memory), storage)``. + +In V2, an ``exclusive`` key with a value of ``false`` SHALL NOT be included in a +``slot`` or any of its children. + +Tasks +~~~~~ + +The value of the ``tasks`` key SHALL be a strict list which MUST define exactly +one task. The list element SHALL be a dictionary representing a task to run as +part of the program. A task descriptor SHALL contain the following keys, whose +definitions SHALL match those provided in RFC14: + +- command + +- slot + +- count + + - per_slot + + - per_resource + + - total + +- attributes + +- distribution + +These keys are the same as those in Jobspec V1 except for the addition of +`per_resource`, which enables the late-binding of tasks to resources (i.e., the +number of tasks is determined after the resource request is allocated by the +scheduler). + +Attributes +~~~~~~~~~~ + +The ``attributes`` key SHALL be a dictionary of dictionaries. The ``attributes`` +dictionary MUST contain ``system`` key and MAY contain the ``user`` key. Common +``system`` keys are listed below, and their definitions can be found in +RFC14. Values MAY have any valid YAML type. + +- user + +- system + + - duration + + - environment + + - cwd + + - dependencies + +Most system attributes are optional, but the ``duration`` attribute is required +in jobspec V2. + +Example Jobspec +~~~~~~~~~~~~~~~ + +Under the description above, the following is an example of a fully compliant +version 2 jobspec. The example below declares a request for 4 "nodes" +each of which with 1 task slot consisting of 2 cores each, for a total +of 4 task slots. A single copy of the command ``app`` will be run on each +task slot for a total of 4 tasks. + +.. literalinclude:: data/spec_31/example1.yaml + :language: yaml + + +Basic Use Cases +--------------- + +To implement basic resource manager functionality, the following use +cases SHALL be supported by the jobspec: + + +Section 1: Node-level Requests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following "node-level" requests are all requests to start an instance, +i.e. run a single copy of ``flux start`` per allocated node. Many of these +requests are similar to existing resource manager batch job submission or +allocation requests, i.e. equivalent to ``oarsub``, ``qsub``, and ``salloc``. + +Use Case 1.1 +^^^^^^^^^^^^ +Request nodes outside of a slot + +Specific Example + Request 4 nodes, each with 1 slot + +Existing Equivalents + +--------+-----------------------+ + | Slurm | ``salloc -N4`` | + +--------+-----------------------+ + | PBS | ``qsub -l nodes=4`` | + +--------+-----------------------+ + +Jobspec YAML + .. literalinclude:: data/spec_31/use_case_1.1.yaml + :language: yaml + + +Use Case 1.2 +^^^^^^^^^^^^ + Request nodes inside of a slot + +Specific Example + Request 4 slots, each with 1 node + +Existing Equivalents + +---------+---------------------+ + | Slurm | ``salloc -N4`` | + +---------+---------------------+ + | PBS | ``qsub -l nodes=4`` | + +---------+---------------------+ + +Jobspec YAML + .. literalinclude:: data/spec_31/use_case_1.2.yaml + :language: yaml + + +Use Case 1.3 +^^^^^^^^^^^^ + Request a fixed number of cores with no constraint on nodes + +Specific Example + Request 120 cores, one broker per node + +Jobspec YAML + .. literalinclude:: data/spec_31/use_case_1.3.yaml + :language: yaml + + +Section 2: General Requests +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following use cases are more general and include more complex slot placement +and task counts. + + +Use Case 2.1 +^^^^^^^^^^^^ +Run N tasks across M nodes, unequal distribution + +Specific Example + Run 5 copies of ``hostname`` across 4 nodes, + default distribution + +Existing Equivalents + +--------+----------------------------+ + | Slurm | ``srun -n5 -N4 hostname`` | + +--------+----------------------------+ + +Jobspec YAML + .. literalinclude:: data/spec_31/use_case_2.1.yaml + :language: yaml + +Use Case 2.3 +^^^^^^^^^^^^ +Run N tasks, Require M cores per task + +Specific Example + Run 10 copies of ``myapp``, require 2 cores per copy, + for a total of 20 cores + +Existing Equivalents + +--------+---------------------------+ + | Slurm | ``srun -n10 -c 2 myapp`` | + +--------+---------------------------+ + +Jobspec YAML + .. literalinclude:: data/spec_31/use_case_2.2.yaml + :language: yaml + +Use Case 2.4 +^^^^^^^^^^^^ +Run N tasks, Require M cores and J gpus per task + +Specific Example + Run 10 copies of ``myapp``, require 2 cores and 1 gpu per copy, + for a total of 20 cores and 10 gpus + +Jobspec YAML + .. literalinclude:: data/spec_31/use_case_2.3.yaml + :language: yaml + +Use Case 2.5 +^^^^^^^^^^^^ +Run N tasks across M nodes, each task with 1 core and 1 gpu + +Specific Example + Run 16 copies of ``myapp`` across 4 nodes, each copy with + 1 core and 1 gpu + +Existing Equivalents + +-------+-------------------------------------------+ + | Slurm | ``srun -n16 -N4 --gpus-per-task=1 myapp`` | + +-------+-------------------------------------------+ + +Jobspec YAML + .. literalinclude:: data/spec_31/use_case_2.4.yaml + :language: yaml + +Use Case 2.6 +^^^^^^^^^^^^ +Specify dependencies + +Specific Example + Depend on two previously submitted jobs. The first job’s + Flux ID (fluid) is known (``hungry-hippo-white-elephant``). The second job’s + fluid is not known but its ``out`` dependency (``foo``) is known. Also provide an + ``out`` dependency (``bar``) that other jobs can depend on. + +Jobspec YAML + .. literalinclude:: data/spec_31/use_case_2.5.yaml + :language: yaml + +Schema +~~~~~~ + +A jobspec conforming to version 2 of the language definition SHALL +adhere to the following ruleset, described using JSON Schema [#f2]_. + +.. literalinclude:: data/spec_31/schema.json + :language: json + +.. [#f1] `YAML Ain’t Markup Language (YAML) Version 1.1 `__, O. Ben-Kiki, C. Evans, B. Ingerson, 2004. + +.. [#f2] `JSON Schema: A Media Type for Describing JSON Documents `__; H. Andrews; 2018 diff --git a/validate.py b/validate.py index bb6f6ba1..1e3e5aac 100755 --- a/validate.py +++ b/validate.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Usage: validate.py --schema=jobspec.json data.json [data.json ...] # Usage: cat data.json | validate.py --schema=jobspec.json