-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathChap_API_Proc_Mgmt.tex
1721 lines (1365 loc) · 72 KB
/
Chap_API_Proc_Mgmt.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Chapter: Process Management
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\chapter{Process Management}
\label{chap:api_proc_mgmt}
This chapter defines functionality processes can use to create and manage processes. The management features presented in this chapter include aborting processes, connecting and disconnecting processes and determining the relative locality of local processes.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Process Creation}
\label{chap:api_proc_mgmt:spawn}
The \refapi{PMIx_Spawn} commands spawn new processes and/or applications in the \ac{PMIx} universe. This may include requests to extend the existing resource allocation or obtain a new one, depending upon provided and supported attributes.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{\code{PMIx_Spawn}}
\declareapi{PMIx_Spawn}
%%%%
\summary
Spawn a new job.
%%%%
\format
\copySignature{PMIx_Spawn}{1.0}{
pmix_status_t \\
PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, \\
\hspace*{11\sigspace}const pmix_app_t apps[], size_t napps, \\
\hspace*{11\sigspace}char nspace[])
}
\begin{arglist}
\argin{job_info}{Array of info structures (array of handles)}
\argin{ninfo}{Number of elements in the \refarg{job_info} array (integer)}
\argin{apps}{Array of \refstruct{pmix_app_t} structures (array of handles)}
\argin{napps}{Number of elements in the \refarg{apps} array (integer)}
\argout{nspace}{Namespace of the new job (string)}
\end{arglist}
\returnstart
\begin{constantdesc}
\item \refconst{PMIX_ERR_JOB_ALLOC_FAILED} The job request could not be executed due to failure to obtain the specified allocation.
\item \refconst{PMIX_ERR_JOB_APP_NOT_EXECUTABLE} The specified application executable either could not be found, or lacks execution privileges.
\item \refconst{PMIX_ERR_JOB_NO_EXE_SPECIFIED} The job request did not specify an executable.
\item \refconst{PMIX_ERR_JOB_FAILED_TO_MAP} The launcher was unable to map the processes for the specified job request.
\item \refconst{PMIX_ERR_JOB_FAILED_TO_LAUNCH} One or more processes in the job request failed to launch.
\item \refconst{PMIX_ERR_JOB_EXE_NOT_FOUND} Specified executable not found.
\item \refconst{PMIX_ERR_JOB_INSUFFICIENT_RESOURCES} Insufficient resources to spawn job.
\item \refconst{PMIX_ERR_JOB_SYS_OP_FAILED} System library operation failed.
\item \refconst{PMIX_ERR_JOB_WDIR_NOT_FOUND} Specified working directory not found.
\end{constantdesc}
\returnend
\reqattrstart
\ac{PMIx} libraries are not required to directly support any attributes for this function. However, any provided attributes must be passed to the host environment for processing.
Host environments are required to support the following attributes when present in either the \refarg{job_info} or the \textit{info} array of an element of the \refarg{apps} array:
\pasteAttributeItem{PMIX_WDIR}
\pasteAttributeItem{PMIX_SET_SESSION_CWD}
\pasteAttributeItem{PMIX_PREFIX}
\pasteAttributeItem{PMIX_HOST}
\pasteAttributeItem{PMIX_HOSTFILE}
\reqattrend
\optattrstart
The following attributes are optional for host environments that support this operation:
\pasteAttributeItem{PMIX_ADD_HOSTFILE}
\pasteAttributeItem{PMIX_ADD_HOST}
\pasteAttributeItem{PMIX_PRELOAD_BIN}
\pasteAttributeItem{PMIX_PRELOAD_FILES}
\pasteAttributeItem{PMIX_PERSONALITY}
\pasteAttributeItem{PMIX_DISPLAY_MAP}
\pasteAttributeItem{PMIX_PPR}
\pasteAttributeItem{PMIX_MAPBY}
\pasteAttributeItem{PMIX_RANKBY}
\pasteAttributeItem{PMIX_BINDTO}
\pasteAttributeItem{PMIX_STDIN_TGT}
\pasteAttributeItem{PMIX_TAG_OUTPUT}
\pasteAttributeItem{PMIX_TIMESTAMP_OUTPUT}
\pasteAttributeItem{PMIX_MERGE_STDERR_STDOUT}
\pasteAttributeItem{PMIX_OUTPUT_TO_FILE}
\pasteAttributeItem{PMIX_INDEX_ARGV}
\pasteAttributeItem{PMIX_CPUS_PER_PROC}
\pasteAttributeItem{PMIX_NO_PROCS_ON_HEAD}
\pasteAttributeItem{PMIX_NO_OVERSUBSCRIBE}
\pasteAttributeItem{PMIX_REPORT_BINDINGS}
\pasteAttributeItem{PMIX_CPU_LIST}
\pasteAttributeItem{PMIX_JOB_RECOVERABLE}
\pasteAttributeItem{PMIX_JOB_CONTINUOUS}
\pasteAttributeItem{PMIX_MAX_RESTARTS}
\pasteAttributeItem{PMIX_SET_ENVAR}
\pasteAttributeItem{PMIX_UNSET_ENVAR}
\pasteAttributeItem{PMIX_ADD_ENVAR}
\pasteAttributeItem{PMIX_PREPEND_ENVAR}
\pasteAttributeItem{PMIX_APPEND_ENVAR}
\pasteAttributeItem{PMIX_FIRST_ENVAR}
\pasteAttributeItem{PMIX_ALLOC_QUEUE}
\pasteAttributeItem{PMIX_ALLOC_TIME}
\pasteAttributeItem{PMIX_ALLOC_NUM_NODES}
\pasteAttributeItem{PMIX_ALLOC_NODE_LIST}
\pasteAttributeItem{PMIX_ALLOC_NUM_CPUS}
\pasteAttributeItem{PMIX_ALLOC_NUM_CPU_LIST}
\pasteAttributeItem{PMIX_ALLOC_CPU_LIST}
\pasteAttributeItem{PMIX_ALLOC_MEM_SIZE}
\pasteAttributeItem{PMIX_ALLOC_BANDWIDTH}
\pasteAttributeItem{PMIX_ALLOC_FABRIC_QOS}
\pasteAttributeItem{PMIX_ALLOC_FABRIC_TYPE}
\pasteAttributeItem{PMIX_ALLOC_FABRIC_PLANE}
\pasteAttributeItem{PMIX_ALLOC_FABRIC_ENDPTS}
\pasteAttributeItem{PMIX_ALLOC_FABRIC_ENDPTS_NODE}
\pasteAttributeItem{PMIX_COSPAWN_APP}
\pasteAttributeItem{PMIX_SPAWN_TOOL}
\pasteAttributeItem{PMIX_EVENT_SILENT_TERMINATION}
\pasteAttributeItem{PMIX_ENVARS_HARVESTED}
\pasteAttributeItem{PMIX_JOB_TIMEOUT}
\pasteAttributeItem{PMIX_SPAWN_TIMEOUT}
\pasteAttributeItem{PMIX_NOTIFY_COMPLETION}
\pasteAttributeItem{PMIX_NOTIFY_PROC_TERMINATION}
\pasteAttributeItem{PMIX_NOTIFY_PROC_ABNORMAL_TERMINATION}
\pasteAttributeItem{PMIX_LOG_COMPLETION}
\pasteAttributeItem{PMIX_LOG_PROC_TERMINATION}
\pasteAttributeItem{PMIX_LOG_PROC_ABNORMAL_TERMINATION}
\pasteAttributeItem{PMIX_LOG_JOB_EVENTS}
\pasteAttributeItem{PMIX_LOG_COMPLETION}
\optattrend
%%%%
\descr
Spawn a new job.
The assigned namespace of the spawned applications is returned in the \refarg{nspace} parameter.
A \code{NULL} value in that location indicates that the caller does not wish to have the namespace returned.
The \refarg{nspace} array must be at least of size one more than \refconst{PMIX_MAX_NSLEN}.
By default, the spawned processes will be PMIx ``connected'' to the parent process upon successful launch (see Section \ref{chap:api_proc_mgmt:connect}
for details).
Both the parent process and members of the child job will receive notification of errors from processes in their combined assemblage.
\advicermstart
It is recommended that an implementation will cause the parent process to be given a copy of the new job's job-level
information so it can query job-level info without incurring any communication penalties. Similarly, the newly
spawned child processes should receive a copy of the parent processes' job-level info due to the high likelihood
that the child will make subsequent queries about its parent.
\advicermend
\adviceuserstart
Behavior of individual resource managers may differ, but it is expected that failure of any application process to start will result in termination/cleanup of all processes in the newly spawned job and return of an error code to the caller.
\adviceuserend
\adviceimplstart
Tools may utilize \refapi{PMIx_Spawn} to start intermediate launchers as described in Section \ref{chap:api_tools:indirect}. For times where the tool is not attached to a \ac{PMIx} server, internal support for fork/exec of the specified applications would allow the tool to maintain a single code path for both the connected and disconnected cases. Inclusion of such support is recommended, but not required.
\adviceimplend
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{\code{PMIx_Spawn_nb}}
\declareapi{PMIx_Spawn_nb}
%%%%
\summary
Nonblocking version of the \refapi{PMIx_Spawn} routine.
%%%%
\format
\copySignature{PMIx_Spawn_nb}{1.0}{
pmix_status_t \\
PMIx_Spawn_nb(const pmix_info_t job_info[], size_t ninfo, \\
\hspace*{14\sigspace}const pmix_app_t apps[], size_t napps, \\
\hspace*{14\sigspace}pmix_spawn_cbfunc_t cbfunc, void *cbdata)
}
\begin{arglist}
\argin{job_info}{Array of info structures (array of handles)}
\argin{ninfo}{Number of elements in the \refarg{job_info} array (integer)}
\argin{apps}{Array of \refstruct{pmix_app_t} structures (array of handles)}
\argin{cbfunc}{Callback function \refapi{pmix_spawn_cbfunc_t} (function reference)}
\argin{cbdata}{Data to be passed to the callback function (memory reference)}
\end{arglist}
\returnsimplenb
If executed, the status returned in the provided callback function will be one of the following constants:
\begin{itemize}
\item \refconst{PMIX_SUCCESS} The operation was successfully completed.
\item \refconst{PMIX_ERR_JOB_ALLOC_FAILED} The job request could not be executed due to failure to obtain the specified allocation.
\item \refconst{PMIX_ERR_JOB_APP_NOT_EXECUTABLE} The specified application executable either could not be found, or lacks execution privileges.
\item \refconst{PMIX_ERR_JOB_NO_EXE_SPECIFIED} The job request did not specify an executable.
\item \refconst{PMIX_ERR_JOB_FAILED_TO_MAP} The launcher was unable to map the processes for the specified job request.
\item \refconst{PMIX_ERR_JOB_FAILED_TO_LAUNCH} One or more processes in the job request failed to launch.
\item \refconst{PMIX_ERR_JOB_EXE_NOT_FOUND} Specified executable not found.
\item \refconst{PMIX_ERR_JOB_INSUFFICIENT_RESOURCES} Insufficient resources to spawn job.
\item \refconst{PMIX_ERR_JOB_SYS_OP_FAILED} System library operation failed.
\item \refconst{PMIX_ERR_JOB_WDIR_NOT_FOUND} Specified working directory not found
\item a non-zero \ac{PMIx} error constant indicating a reason for the request's failure.
\end{itemize}
\reqattrstart
\ac{PMIx} libraries are not required to directly support any attributes for this function. However, any provided attributes must be passed to the host \ac{SMS} daemon for processing.
Host environments are required to support the following attributes when present in either the \refarg{job_info} or the \textit{info} array of an element of the \refarg{apps} array:
\pasteAttributeItem{PMIX_WDIR}
\pasteAttributeItem{PMIX_SET_SESSION_CWD}
\pasteAttributeItem{PMIX_PREFIX}
\pasteAttributeItem{PMIX_HOST}
\pasteAttributeItem{PMIX_HOSTFILE}
\reqattrend
\optattrstart
The following attributes are optional for host environments that support this operation:
\pasteAttributeItem{PMIX_ADD_HOSTFILE}
\pasteAttributeItem{PMIX_ADD_HOST}
\pasteAttributeItem{PMIX_PRELOAD_BIN}
\pasteAttributeItem{PMIX_PRELOAD_FILES}
\pasteAttributeItem{PMIX_PERSONALITY}
\pasteAttributeItem{PMIX_DISPLAY_MAP}
\pasteAttributeItem{PMIX_PPR}
\pasteAttributeItem{PMIX_MAPBY}
\pasteAttributeItem{PMIX_RANKBY}
\pasteAttributeItem{PMIX_BINDTO}
\pasteAttributeItem{PMIX_STDIN_TGT}
\pasteAttributeItem{PMIX_TAG_OUTPUT}
\pasteAttributeItem{PMIX_TIMESTAMP_OUTPUT}
\pasteAttributeItem{PMIX_MERGE_STDERR_STDOUT}
\pasteAttributeItem{PMIX_OUTPUT_TO_FILE}
\pasteAttributeItem{PMIX_INDEX_ARGV}
\pasteAttributeItem{PMIX_CPUS_PER_PROC}
\pasteAttributeItem{PMIX_NO_PROCS_ON_HEAD}
\pasteAttributeItem{PMIX_NO_OVERSUBSCRIBE}
\pasteAttributeItem{PMIX_REPORT_BINDINGS}
\pasteAttributeItem{PMIX_CPU_LIST}
\pasteAttributeItem{PMIX_JOB_RECOVERABLE}
\pasteAttributeItem{PMIX_JOB_CONTINUOUS}
\pasteAttributeItem{PMIX_MAX_RESTARTS}
\pasteAttributeItem{PMIX_SET_ENVAR}
\pasteAttributeItem{PMIX_UNSET_ENVAR}
\pasteAttributeItem{PMIX_ADD_ENVAR}
\pasteAttributeItem{PMIX_PREPEND_ENVAR}
\pasteAttributeItem{PMIX_APPEND_ENVAR}
\pasteAttributeItem{PMIX_FIRST_ENVAR}
\pasteAttributeItem{PMIX_ALLOC_QUEUE}
\pasteAttributeItem{PMIX_ALLOC_TIME}
\pasteAttributeItem{PMIX_ALLOC_NUM_NODES}
\pasteAttributeItem{PMIX_ALLOC_NODE_LIST}
\pasteAttributeItem{PMIX_ALLOC_NUM_CPUS}
\pasteAttributeItem{PMIX_ALLOC_NUM_CPU_LIST}
\pasteAttributeItem{PMIX_ALLOC_CPU_LIST}
\pasteAttributeItem{PMIX_ALLOC_MEM_SIZE}
\pasteAttributeItem{PMIX_ALLOC_BANDWIDTH}
\pasteAttributeItem{PMIX_ALLOC_FABRIC_QOS}
\pasteAttributeItem{PMIX_ALLOC_FABRIC_TYPE}
\pasteAttributeItem{PMIX_ALLOC_FABRIC_PLANE}
\pasteAttributeItem{PMIX_ALLOC_FABRIC_ENDPTS}
\pasteAttributeItem{PMIX_ALLOC_FABRIC_ENDPTS_NODE}
\pasteAttributeItem{PMIX_COSPAWN_APP}
\pasteAttributeItem{PMIX_SPAWN_TOOL}
\pasteAttributeItem{PMIX_EVENT_SILENT_TERMINATION}
\pasteAttributeItem{PMIX_ENVARS_HARVESTED}
\pasteAttributeItem{PMIX_JOB_TIMEOUT}
\pasteAttributeItem{PMIX_SPAWN_TIMEOUT}
\pasteAttributeItem{PMIX_NOTIFY_COMPLETION}
\pasteAttributeItem{PMIX_NOTIFY_PROC_TERMINATION}
\pasteAttributeItem{PMIX_NOTIFY_PROC_ABNORMAL_TERMINATION}
\pasteAttributeItem{PMIX_LOG_COMPLETION}
\pasteAttributeItem{PMIX_LOG_PROC_TERMINATION}
\pasteAttributeItem{PMIX_LOG_PROC_ABNORMAL_TERMINATION}
\pasteAttributeItem{PMIX_LOG_JOB_EVENTS}
\pasteAttributeItem{PMIX_LOG_COMPLETION}
\optattrend
%%%%
\descr
Nonblocking version of the \refapi{PMIx_Spawn} routine. The provided callback function will be executed upon successful start of \textit{all} specified application processes.
\adviceuserstart
Behavior of individual resource managers may differ, but it is expected that failure of any application process to start will result in termination/cleanup of all processes in the newly spawned job and return of an error code to the caller.
\adviceuserend
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsubsection{Spawn Callback Function}
\declareapi{pmix_spawn_cbfunc_t}
%%%%
\summary
The \refapi{pmix_spawn_cbfunc_t} is used on the PMIx client side by \refapi{PMIx_Spawn_nb} and on the PMIx server side by \refapi{pmix_server_spawn_fn_t}.
\copySignature{pmix_spawn_cbfunc_t}{1.0}{
typedef void (*pmix_spawn_cbfunc_t) \\
\hspace*{4\sigspace}(pmix_status_t status, \\
\hspace*{5\sigspace}pmix_nspace_t nspace, void *cbdata);
}
\begin{arglist}
\argin{status}{Status associated with the operation (handle)}
\argin{nspace}{Namespace string (\refstruct{pmix_nspace_t})}
\argin{cbdata}{Callback data passed to original API call (memory reference)}
\end{arglist}
%%%%
\descr
The callback will be executed upon launch of the specified applications in \refapi{PMIx_Spawn_nb}, or upon failure to launch any of them.
The \refarg{status} of the callback will indicate whether or not the spawn succeeded.
The \refarg{nspace} of the spawned processes will be returned, along with any provided callback data.
Note that the returned \refarg{nspace} value will not be protected upon return from the callback function, so the receiver must copy it if it needs to be retained.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Spawn-specific constants}
\label{api:struct:constants:spawn}
In addition to the generic error constants, the following spawn-specific error constants may be returned by the spawn \acp{API}:
\begin{constantdesc}
%
\declareconstitemvalue{PMIX_ERR_JOB_ALLOC_FAILED}{-188}
The job request could not be executed due to failure to obtain the specified allocation.
%
\declareconstitemvalue{PMIX_ERR_JOB_APP_NOT_EXECUTABLE}{-177}
The specified application executable either could not be found, or lacks execution privileges.
%
\declareconstitemvalue{PMIX_ERR_JOB_NO_EXE_SPECIFIED}{-178}
The job request did not specify an executable.
%
\declareconstitemvalue{PMIX_ERR_JOB_FAILED_TO_MAP}{-179}
The launcher was unable to map the processes for the specified job request.
%
\declareconstitemvalue{PMIX_ERR_JOB_FAILED_TO_LAUNCH}{-181}
One or more processes in the job request failed to launch.
%
\declareconstitemvalueProvisional{PMIX_ERR_JOB_EXE_NOT_FOUND}{-190}
Specified executable not found
%
\declareconstitemvalueProvisional{PMIX_ERR_JOB_INSUFFICIENT_RESOURCES}{-234}
Insufficient resources to spawn job
%
\declareconstitemvalueProvisional{PMIX_ERR_JOB_SYS_OP_FAILED}{-235}
System library operation failed
%
\declareconstitemvalueProvisional{PMIX_ERR_JOB_WDIR_NOT_FOUND}{-233}
Specified working directory not found
%
\end{constantdesc}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Spawn attributes}
\label{api:struct:attributes:spawn}
Attributes used to describe \refapi{PMIx_Spawn} behavior - they are values passed to the \refapi{PMIx_Spawn} \ac{API} and therefore are not accessed using the \refapi{PMIx_Get} \acp{API} when used in that context. However, some of the attributes defined in this section can be provided by the host environment for other purposes - e.g., the host might provide the \refattr{PMIX_MAPBY} attribute in the job-related information so that an application can use \refapi{PMIx_Get} to discover the mapping used for determining process locations. Multi-use attributes and their respective access reference rank are denoted below.
%
\declareAttribute{PMIX_PERSONALITY}{"pmix.pers"}{char*}{
Name of personality corresponding to programming model used by application - supported values depend upon \ac{PMIx} implementation.
}
%
\declareAttribute{PMIX_HOST}{"pmix.host"}{char*}{
Comma-delimited list of hosts to use for spawned processes.
}
%
\declareAttribute{PMIX_HOSTFILE}{"pmix.hostfile"}{char*}{
Hostfile to use for spawned processes.
The format of this file is determined by the host environment, therefore a file may not be portable across different host environments.
}
%
\declareAttribute{PMIX_ADD_HOST}{"pmix.addhost"}{char*}{
Comma-delimited list of hosts to add to the allocation.
}
%
\declareAttribute{PMIX_ADD_HOSTFILE}{"pmix.addhostfile"}{char*}{
Hostfile containing hosts to add to existing allocation.
The format of this file is determined by the host environment, therefore a file may not be portable across different host environments.
}
%
\declareAttribute{PMIX_PREFIX}{"pmix.prefix"}{char*}{
Prefix to use for starting spawned processes - i.e., the directory where the executables can be found.
}
%
\declareAttribute{PMIX_WDIR}{"pmix.wdir"}{char*}{
Working directory for spawned processes.
}
%
\declareAttribute{PMIX_DISPLAY_MAP}{"pmix.dispmap"}{bool}{
Display process mapping upon spawn. The format of the displayed map is specific to the host environment providing it.
}
%
\declareAttribute{PMIX_PPR}{"pmix.ppr"}{char*}{
Number of processes to spawn on each identified resource.
}
%
\declareAttribute{PMIX_MAPBY}{"pmix.mapby"}{char*}{
Process mapping policy - when accessed using \refapi{PMIx_Get}, use the \refconst{PMIX_RANK_WILDCARD} value for the rank to discover the mapping policy used for the provided namespace. Supported values are launcher specific.
}
%
\declareAttribute{PMIX_RANKBY}{"pmix.rankby"}{char*}{
Process ranking policy - when accessed using \refapi{PMIx_Get}, use the \refconst{PMIX_RANK_WILDCARD} value for the rank to discover the ranking algorithm used for the provided namespace. Supported values are launcher specific.
}
%
\declareAttribute{PMIX_BINDTO}{"pmix.bindto"}{char*}{
Process binding policy - when accessed using \refapi{PMIx_Get}, use the \refconst{PMIX_RANK_WILDCARD} value for the rank to discover the binding policy used for the provided namespace. Supported values are launcher specific.
}
%
\declareAttribute{PMIX_PRELOAD_BIN}{"pmix.preloadbin"}{bool}{
Preload executables onto nodes prior to executing launch procedure.
}
%
\declareAttribute{PMIX_PRELOAD_FILES}{"pmix.preloadfiles"}{char*}{
Comma-delimited list of files to pre-position on nodes prior to executing launch procedure.
}
%
\declareAttribute{PMIX_STDIN_TGT}{"pmix.stdin"}{uint32_t}{
Spawned process rank that is to receive any forwarded \code{stdin}.
}
%
\declareAttribute{PMIX_SET_SESSION_CWD}{"pmix.ssncwd"}{bool}{
Set the current working directory to the session working directory assigned by the \ac{RM} - can be assigned to the entire job (by including attribute in the \refarg{job_info} array) or on a per-application basis in the \refarg{info} array for each \refstruct{pmix_app_t}.
}
%
\declareAttribute{PMIX_TAG_OUTPUT}{"pmix.tagout"}{bool}{
Tag \code{stdout}/\code{stderr} with the identity of the source process - can be assigned to the entire job (by including attribute in the \refarg{job_info} array) or on a per-application basis in the \refarg{info} array for each \refstruct{pmix_app_t}. The format of how the text is tagged is implementation dependent.
}
%
\declareAttribute{PMIX_TIMESTAMP_OUTPUT}{"pmix.tsout"}{bool}{
Timestamp output - can be assigned to the entire job (by including attribute in the \refarg{job_info} array) or on a per-application basis in the \refarg{info} array for each \refstruct{pmix_app_t}. The format of how the text is tagged is implementation dependent.
}
%
\declareAttribute{PMIX_MERGE_STDERR_STDOUT}{"pmix.mergeerrout"}{bool}{
Merge \code{stdout} and \code{stderr} streams - can be assigned to the entire job (by including attribute in the \refarg{job_info} array) or on a per-application basis in the \refarg{info} array for each \refstruct{pmix_app_t}.
}
%
\declareAttribute{PMIX_OUTPUT_TO_FILE}{"pmix.outfile"}{char*}{
Direct output (both stdout and stderr) into files of form \code{"<filename>.rank"} - can be assigned to the entire job (by including attribute in the \refarg{job_info} array) or on a per-application basis in the \refarg{info} array for each \refstruct{pmix_app_t}.
}
%
\declareAttribute{PMIX_OUTPUT_TO_DIRECTORY}{"pmix.outdir"}{char*}{
Direct output into files of form \code{"<directory>/\allowbreak <jobid>/\allowbreak rank.<rank>/\allowbreak stdout[err]"} - can be assigned to the entire job (by including attribute in the \refarg{job_info} array) or on a per-application basis in the \refarg{info} array for each \refstruct{pmix_app_t}.
}
%
\declareAttribute{PMIX_INDEX_ARGV}{"pmix.indxargv"}{bool}{
If set to true, will use the given name of the executable (\code{argv[0]}) as a base name and each rank will be invoked with \code{argv[0]} set to the base name with the string "-<\emph{rank}>" appended to it, where \emph{rank} is the \ac{PMIx} rank of the process being invoked (e.g. a.out-0, a.out-1, etc.). The executable invoked will remain the same for all processes, only the value of \code{argv[0]} will be different for each process.
}
%
\declareAttribute{PMIX_CPUS_PER_PROC}{"pmix.cpuperproc"}{uint32_t}{
Number of \acp{PU} to assign to each rank - when accessed using \refapi{PMIx_Get}, use the \refconst{PMIX_RANK_WILDCARD} value for the rank to discover the \acp{PU}/process assigned to the provided namespace.
}
%
\declareAttribute{PMIX_NO_PROCS_ON_HEAD}{"pmix.nolocal"}{bool}{
Do not place processes on the head node.
}
%
\declareAttribute{PMIX_NO_OVERSUBSCRIBE}{"pmix.noover"}{bool}{
Do not oversubscribe the nodes - i.e., do not place more processes than allocated slots on a node.
}
%
\declareAttribute{PMIX_REPORT_BINDINGS}{"pmix.repbind"}{bool}{
Report bindings of the individual processes. How and where this information is reported is host environment dependent as well as dependent on whether the processes are created through a launching tool or by a direct call to \refapi{PMIx_Spawn}.
}
%
\declareAttribute{PMIX_CPU_LIST}{"pmix.cpulist"}{char*}{
List of \acp{PU} to use for this job - when accessed using \refapi{PMIx_Get}, use the \refconst{PMIX_RANK_WILDCARD} value for the rank to discover the \ac{PU} list used for the provided namespace.
}
%
\declareAttribute{PMIX_JOB_RECOVERABLE}{"pmix.recover"}{bool}{
Application supports recoverable operations.
}
%
\declareAttribute{PMIX_JOB_CONTINUOUS}{"pmix.continuous"}{bool}{
Application is continuous, all failed processes should be immediately restarted.
}
%
\declareAttribute{PMIX_MAX_RESTARTS}{"pmix.maxrestarts"}{uint32_t}{
Maximum number of times to restart a process - when accessed using \refapi{PMIx_Get}, use the \refconst{PMIX_RANK_WILDCARD} value for the rank to discover the max restarts for the provided namespace.
}
%
\declareAttribute{PMIX_SPAWN_TOOL}{"pmix.spwn.tool"}{bool}{
Indicate that the job being spawned is a tool. The repercussions of setting this attribute varies based on the underlying host environment. For example, some host environments may not perform cpu-binding on a process marked as a tool.
}
%
\declareAttribute{PMIX_TIMEOUT_STACKTRACES}{"pmix.tim.stack"}{bool}{
Include process stacktraces in timeout report from a job.
}
%
\declareAttribute{PMIX_TIMEOUT_REPORT_STATE}{"pmix.tim.state"}{bool}{
Report process states in timeout report from a job.
}
%
\declareAttribute{PMIX_NOTIFY_JOB_EVENTS}{"pmix.note.jev"}{bool}{
Requests that the launcher generate the
\refconst{PMIX_EVENT_JOB_START}, \refconst{PMIX_LAUNCH_COMPLETE}, and
\refconst{PMIX_EVENT_JOB_END} events. Each event is to include at least the
namespace of the corresponding job and a \refattr{PMIX_EVENT_TIMESTAMP}
indicating the time the event occurred. Note that the requester must register
for these individual events, or capture
and process them by registering a default event handler instead of individual
handlers and then process the events based on the returned status code.
Another common method is to register one event handler for all job-related
events, with a separate handler for non-job events - see
\refapi{PMIx_Register_event_handler} for details.
}
%
\declareAttribute{PMIX_NOTIFY_COMPLETION}{"pmix.notecomp"}{bool}{
Requests that the launcher generate the \refconst{PMIX_EVENT_JOB_END} event
for normal or abnormal termination of the spawned job. The event shall include
the returned status code (\refattr{PMIX_JOB_TERM_STATUS}) for the
corresponding job; the identity (\refattr{PMIX_PROCID}) and exit status
(\refattr{PMIX_EXIT_CODE}) of the first failed process, if applicable; and a
\refattr{PMIX_EVENT_TIMESTAMP} indicating the time the termination occurred.
Note that the requester must register for the event or capture and process it
within a default event handler.
}
%
\declareAttribute{PMIX_NOTIFY_PROC_TERMINATION}{"pmix.noteproc"}{bool}{
Requests that the launcher generate the \refconst{PMIX_EVENT_PROC_TERMINATED}
event whenever a process either normally or abnormally terminates.
}
%
\declareAttribute{PMIX_NOTIFY_PROC_ABNORMAL_TERMINATION}{"pmix.noteabproc"}{bool}{
Requests that the launcher generate the \refconst{PMIX_EVENT_PROC_TERMINATED}
event only when a process abnormally terminates.
}
%
\declareAttribute{PMIX_LOG_PROC_TERMINATION}{"pmix.logproc"}{bool}{
Requests that the launcher log the \refconst{PMIX_EVENT_PROC_TERMINATED} event
whenever a process either normally or abnormally terminates.
}
%
\declareAttribute{PMIX_LOG_PROC_ABNORMAL_TERMINATION}{"pmix.logabproc"}{bool}{
Requests that the launcher log the \refconst{PMIX_EVENT_PROC_TERMINATED} event
only when a process abnormally terminates.
}
%
\declareAttribute{PMIX_LOG_JOB_EVENTS}{"pmix.log.jev"}{bool}{
Requests that the launcher log the \refconst{PMIX_EVENT_JOB_START},
\refconst{PMIX_LAUNCH_COMPLETE}, and \refconst{PMIX_EVENT_JOB_END} events using
\refapi{PMIx_Log}, subject to the logging attributes of Section
\ref{api:struct:attributes:log}.
}
%
\declareAttribute{PMIX_LOG_COMPLETION}{"pmix.logcomp"}{bool}{
Requests that the launcher log the \refconst{PMIX_EVENT_JOB_END} event
for normal or abnormal termination of the spawned job using
\refapi{PMIx_Log}, subject to the logging attributes of Section
\ref{api:struct:attributes:log}. The event shall include
the returned status code (\refattr{PMIX_JOB_TERM_STATUS}) for the
corresponding job; the identity (\refattr{PMIX_PROCID}) and exit status
(\refattr{PMIX_EXIT_CODE}) of the first failed process, if applicable; and a
\refattr{PMIX_EVENT_TIMESTAMP} indicating the time the termination occurred.
}
%
\declareAttribute{PMIX_EVENT_SILENT_TERMINATION}{"pmix.evsilentterm"}{bool}{
Do not generate a \refconst{PMIX_EVENT_JOB_END} event when this job normally terminates.
}
%
\declareAttributeProvisional{PMIX_ENVARS_HARVESTED}{"pmix.evar.hvstd"}{bool}{
Environmental parameters have been harvested by the spawn requestor - the server
does not need to harvest them.
}
%
\declareAttributeProvisional{PMIX_JOB_TIMEOUT}{"pmix.job.time"}{int}{
Time in seconds before the spawned job should time out and be terminated (0 => infinite), defined as the total runtime of the job (equivalent to the walltime limit of typical batch schedulers).
}
%
\declareAttributeProvisional{PMIX_SPAWN_TIMEOUT}{"pmix.sp.time"}{int}{
Time in seconds before spawn operation should time out (0 => infinite).
Logically equivalent to passing the \refattr{PMIX_TIMEOUT} attribute to the
\refapi{PMIx_Spawn} \ac{API}, it is provided as a separate attribute to distinguish
it from the \refattr{PMIX_JOB_TIMEOUT} attribute.
}
\vspace{\baselineskip}
Attributes used to adjust remote environment variables prior to spawning the specified application processes.
%
\declareAttribute{PMIX_SET_ENVAR}{"pmix.envar.set"}{pmix_envar_t*}{
Set the envar to the given value, overwriting any pre-existing one
}
%
\declareAttribute{PMIX_UNSET_ENVAR}{"pmix.envar.unset"}{char*}{
Unset the environment variable specified in the string.
}
%
\declareAttribute{PMIX_ADD_ENVAR}{"pmix.envar.add"}{pmix_envar_t*}{
Add the environment variable, but do not overwrite any pre-existing one
}
%
\declareAttribute{PMIX_PREPEND_ENVAR}{"pmix.envar.prepnd"}{pmix_envar_t*}{
Prepend the given value to the specified environmental value using the given separator character, creating the variable if it does not already exist
}
%
\declareAttribute{PMIX_APPEND_ENVAR}{"pmix.envar.appnd"}{pmix_envar_t*}{
Append the given value to the specified environmental value using the given separator character, creating the variable if it does not already exist
}
%
\declareAttribute{PMIX_FIRST_ENVAR}{"pmix.envar.first"}{pmix_envar_t*}{
Ensure the given value appears first in the specified envar using the separator character, creating the envar if it does not already exist
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Application Structure}
\declarestruct{pmix_app_t}
The \refstruct{pmix_app_t} structure describes the application context for the \refapi{PMIx_Spawn} and \refapi{PMIx_Spawn_nb} operations.
\copySignature{pmix_app_t}{1.0}{
typedef struct pmix_app \{ \\
\hspace*{4\sigspace}/** Executable */ \\
\hspace*{4\sigspace}char *cmd; \\
\hspace*{4\sigspace}/** Argument set, NULL terminated */ \\
\hspace*{4\sigspace}char **argv; \\
\hspace*{4\sigspace}/** Environment set, NULL terminated */ \\
\hspace*{4\sigspace}char **env; \\
\hspace*{4\sigspace}/** Current working directory */ \\
\hspace*{4\sigspace}char *cwd; \\
\hspace*{4\sigspace}/** Maximum processes with this profile */ \\
\hspace*{4\sigspace}int maxprocs; \\
\hspace*{4\sigspace}/** Array of info keys describing this application*/ \\
\hspace*{4\sigspace}pmix_info_t *info; \\
\hspace*{4\sigspace}/** Number of info keys in 'info' array */ \\
\hspace*{4\sigspace}size_t ninfo; \\
\} pmix_app_t;
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsubsection{App structure support macros}
The following macros are provided to support the \refstruct{pmix_app_t} structure.
%%%%
\littleheader{Static initializer for the app structure}
\declaremacroProvisional{PMIX_APP_STATIC_INIT}
Provide a static initializer for the \refstruct{pmix_app_t} fields.
\versionMarker{5.0}
\cspecificstart
\begin{codepar}
PMIX_APP_STATIC_INIT
\end{codepar}
\cspecificend
%%%%%%%%%%%
\littleheader{Initialize the app structure}
\declaremacro{PMIX_APP_CONSTRUCT}
Initialize the \refstruct{pmix_app_t} fields
\copySignature{PMIX_APP_CONSTRUCT}{1.0}{
PMIX_APP_CONSTRUCT(m)
}
\begin{arglist}
\argin{m}{Pointer to the structure to be initialized (pointer to \refstruct{pmix_app_t})}
\end{arglist}
%%%%%%%%%%%
\littleheader{Destruct the app structure}
\declaremacro{PMIX_APP_DESTRUCT}
Destruct the \refstruct{pmix_app_t} fields
\copySignature{PMIX_APP_DESTRUCT}{1.0}{
PMIX_APP_DESTRUCT(m)
}
\begin{arglist}
\argin{m}{Pointer to the structure to be destructed (pointer to \refstruct{pmix_app_t})}
\end{arglist}
%%%%%%%%%%%
\littleheader{Create an app array}
\declaremacro{PMIX_APP_CREATE}
Allocate and initialize an array of \refstruct{pmix_app_t} structures
\copySignature{PMIX_APP_CREATE}{1.0}{
PMIX_APP_CREATE(m, n)
}
\begin{arglist}
\arginout{m}{Address where the pointer to the array of \refstruct{pmix_app_t} structures shall be stored (handle)}
\argin{n}{Number of structures to be allocated (\code{size_t})}
\end{arglist}
%%%%%%%%%%%
\littleheader{Free an app structure}
\declaremacro{PMIX_APP_RELEASE}
Release a \refstruct{pmix_app_t} structure
\copySignature{PMIX_APP_RELEASE}{4.0}{
PMIX_APP_RELEASE(m)
}
\begin{arglist}
\argin{m}{Pointer to a \refstruct{pmix_app_t} structure (handle)}
\end{arglist}
%%%%%%%%%%%
\littleheader{Free an app array}
\declaremacro{PMIX_APP_FREE}
Release an array of \refstruct{pmix_app_t} structures
\copySignature{PMIX_APP_FREE}{1.0}{
PMIX_APP_FREE(m, n)
}
\begin{arglist}
\argin{m}{Pointer to the array of \refstruct{pmix_app_t} structures (handle)}
\argin{n}{Number of structures in the array (\code{size_t})}
\end{arglist}
%%%%%%%%%%%
\littleheader{Create the info array of application directives}
\declaremacro{PMIX_APP_INFO_CREATE}
Create an array of \refstruct{pmix_info_t} structures for passing application-level directives, updating the \refarg{ninfo} field of the \refstruct{pmix_app_t} structure.
\copySignature{PMIX_APP_INFO_CREATE}{2.2}{
PMIX_APP_INFO_CREATE(m, n)
}
\begin{arglist}
\argin{m}{Pointer to the \refstruct{pmix_app_t} structure (handle)}
\argin{n}{Number of directives to be allocated (\code{size_t})}
\end{arglist}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Abort}
\label{chap:api_proc_mgmt:abort}
\ac{PMIx} provides a dedicated API by which an application can request that specified processes be aborted by the system.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{\code{PMIx_Abort}}
\declareapi{PMIx_Abort}
%%%%
\summary
Abort the specified processes
%%%%
\format
\copySignature{PMIx_Abort}{1.0}{
pmix_status_t \\
PMIx_Abort(int status, const char msg[], \\
\hspace*{11\sigspace}pmix_proc_t procs[], size_t nprocs)
}
\begin{arglist}
\argin{status}{Error code to return to invoking environment (integer)}
\argin{msg}{String message to be returned to user (string)}
\argin{procs}{Array of \refstruct{pmix_proc_t} structures (array of handles)}
\argin{nprocs}{Number of elements in the \refarg{procs} array (integer)}
\end{arglist}
A successful return indicates that the requested processes are in a terminated state. Note that the function shall not return in this situation if the caller's own process was included in the request.
\returnstart
\begin{itemize}
\item \refconst{PMIX_ERR_PARAM_VALUE_NOT_SUPPORTED} if the \ac{PMIx} implementation and host environment support this \ac{API}, but the request includes processes that the host environment cannot abort - e.g., if the request is to abort subsets of processes from a namespace, or processes outside of the caller's own namespace, and the host environment does not permit such operations. In this case, none of the specified processes will be terminated.
\end{itemize}
\returnend
%%%%
\descr
Request that the host resource manager abort the provided array of procs. If the design of the host resource manager allows, the provided message should be associated with any record it prints or logs of the operation.
If the processes were launched by an application designed to launch the processes and which exists for the lifetime of the processes, than this application should terminate with the return code provided if the system allows.
A \code{NULL} for the \refarg{procs} array indicates that all processes in the caller's namespace are to be aborted, including itself - this is the equivalent of passing a \refstruct{pmix_proc_t} array element containing the caller's namespace and a rank value of \refconst{PMIX_RANK_WILDCARD}. While it is permitted for a caller to request abort of processes from namespaces other than its own, not all environments will support such requests.
Passing a \code{NULL} \refarg{msg} parameter is allowed.
The function shall not return until the host environment has carried out the operation on the specified processes. If the caller is included in the array of targets, then the function will not return unless the host is unable to execute the operation.
\adviceuserstart
The response to this request is somewhat dependent on the specific \ac{RM} and its configuration (e.g., some resource managers will not abort the application if the provided status is zero unless specifically configured to do so, some cannot abort subsets of processes in an application, and some may not permit termination of processes outside of the caller's own namespace), and thus lies outside the control of PMIx itself.
However, the PMIx client library shall inform the \ac{RM} of the request that the specified \refarg{procs} be aborted, regardless of the value of the provided status.
Note that race conditions caused by multiple processes calling \refapi{PMIx_Abort} are left to the server implementation to resolve with regard to which status is returned and what messages (if any) are printed.
\adviceuserend
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Connecting and Disconnecting Processes}
\label{chap:api_proc_mgmt:connect}
This section defines functions to connect and disconnect processes in two or more separate \ac{PMIx} namespaces.
The \ac{PMIx} definition of \textit{connected} solely implies that the host environment should treat the failure of any process in the assemblage as a reportable event, taking action on the assemblage as if it were a single application.
The call requests that \ac{PMIx}, together with the \ac{RM}, should treat connected processes as a single assemblage for the purposes of event notification and responses to abnormal process termination.
For example, if the environment defaults (in the absence of any application directives) to terminating an application upon failure of any process in that application, then the environment should terminate all processes in the connected assemblage upon failure of any member.
The host environment may choose to assign a new namespace to the connected assemblage and/or assign new ranks for its members for its own internal tracking purposes. For implementations which use this approach, it is up to the implementation whether such namespaces are exposed to users or clients (e.g., in response to an appropriate call to \refapi{PMIx_Query_info_nb}). The host environment is required to generate a \refconst{PMIX_ERR_PROC_TERM_WO_SYNC} event should any process in the assemblage terminate or call \refapi{PMIx_Finalize} without first \textit{disconnecting} from the assemblage. If the job including the process is terminated as a result of that action, then the host environment is required to also generate the \refconst{PMIX_ERR_JOB_TERM_WO_SYNC} for all jobs that were terminated as a result.
\adviceuserstart
Attempting to \textit{connect} processes solely within the same namespace is essentially a \textit{no-op} operation. While not explicitly prohibited, users are advised that a \ac{PMIx} implementation or host environment may return an error in such cases.
\adviceuserend
\advicermstart
The \textit{connect} operation does not require the exchange of job-level information nor the inclusion of information posted by participating processes via \refapi{PMIx_Put}. Indeed, the callback function utilized in \refapi{pmix_server_connect_fn_t} cannot pass information back into the \ac{PMIx} server library. However, host environments are advised that collecting such information at the participating daemons represents an optimization opportunity as participating processes are likely to request such information after the connect operation completes.
\advicermend
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{\code{PMIx_Connect}}
\declareapi{PMIx_Connect}
%%%%
\summary
Connect namespaces.
%%%%
\format
\copySignature{PMIx_Connect}{1.0}{
pmix_status_t \\
PMIx_Connect(const pmix_proc_t procs[], size_t nprocs, \\
\hspace*{13\sigspace}const pmix_info_t info[], size_t ninfo)
}
\begin{arglist}
\argin{procs}{Array of proc structures (array of handles)}
\argin{nprocs}{Number of elements in the \refarg{procs} array (integer)}
\argin{info}{Array of info structures (array of handles)}
\argin{ninfo}{Number of elements in the \refarg{info} array (integer)}
\end{arglist}
\returnsimple
\reqattrstart
\ac{PMIx} libraries are not required to directly support any attributes for this function. However, any provided attributes must be passed to the host \ac{SMS} daemon for processing.
\reqattrend
\optattrstart
The following attributes are optional for host environments that support this operation:
\pasteAttributeItem{PMIX_ALL_CLONES_PARTICIPATE}
\pasteAttributeItem{PMIX_TIMEOUT}
\optattrend
%%%%
\descr
Record the processes specified by the \refarg{procs} array as \textit{connected}.
The \ac{PMIx} definition of \textit{connected} solely implies that the host environment should treat the failure of any process in the assemblage as a reportable event, taking action on the assemblage as if it were a single application.
The function will return once all processes identified in \refarg{procs} have called either \refapi{PMIx_Connect} or its non-blocking version, \textit{and} the host environment has completed any supporting operations required to meet the terms of the \ac{PMIx} definition of \textit{connected} processes.
A process can only engage in one connect operation involving the identical \refarg{procs} array at a time.
However, a process can be simultaneously engaged in multiple connect operations, each involving a different \refarg{procs} array.
As in the case of the \refapi{PMIx_Fence} operation, the \refarg{info} array can be used to pass user-level directives regarding timeout constraints and other options available from the host \ac{RM}.
Each provided \refstruct{pmix_proc_t} struct can pass \refconst{PMIX_RANK_WILDCARD} to indicate that all processes in the given namespace are participating.
The ordering of the entries in the \refarg{procs} has no significance. However, all processes engaged in a given
\refapi{PMIx_Connect}
operation must use the same method to identify processes. Callers which describe
the target set of processes using PMIX_RANK_WILDCARD will not be matched with
callers which list the individual processes of a namespace explicitly.
\adviceimplstart
\refapi{PMIx_Connect} and its non-blocking form are both \emph{collective} operations. Accordingly, the \ac{PMIx} server library is required to aggregate participation by local clients, passing the request to the host environment once all local participants have executed the \ac{API}.
\adviceimplend
\advicermstart
The host will receive a single call for each collective operation. It is the responsibility of the host to identify the nodes containing participating processes, execute the collective across all participating nodes, and notify the local \ac{PMIx} server library upon completion of the global collective.
\advicermend
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{\code{PMIx_Connect_nb}}
\declareapi{PMIx_Connect_nb}
%%%%
\summary
Nonblocking \refapi{PMIx_Connect_nb} routine.
%%%%
\format
\copySignature{PMIx_Connect_nb}{1.0}{
pmix_status_t \\
PMIx_Connect_nb(const pmix_proc_t procs[], size_t nprocs, \\
\hspace*{16\sigspace}const pmix_info_t info[], size_t ninfo, \\
\hspace*{16\sigspace}pmix_op_cbfunc_t cbfunc, void *cbdata)
}
\begin{arglist}
\argin{procs}{Array of proc structures (array of handles)}
\argin{nprocs}{Number of elements in the \refarg{procs} array (integer)}
\argin{info}{Array of info structures (array of handles)}
\argin{ninfo}{Number of elements in the \refarg{info} array (integer)}
\argin{cbfunc}{Callback function \refapi{pmix_op_cbfunc_t} (function reference)}
\argin{cbdata}{Data to be passed to the callback function (memory reference)}
\end{arglist}
\returnsimplenb
\returnstart
\begin{itemize}
\item \refconst{PMIX_OPERATION_SUCCEEDED}, indicating that the request was immediately processed and returned \textit{success} - the \refarg{cbfunc} will \textit{not} be called
\end{itemize}
\returnend
\reqattrstart
\ac{PMIx} libraries are not required to directly support any attributes for this function. However, any provided attributes must be passed to the host \ac{SMS} daemon for processing.
\reqattrend
\optattrstart
The following attributes are optional for \ac{PMIx} implementations:
\pasteAttributeItem{PMIX_ALL_CLONES_PARTICIPATE}
The following attributes are optional for host environments that support this operation:
\pasteAttributeItem{PMIX_TIMEOUT}
\optattrend
%%%%
\descr
Nonblocking version of \refapi{PMIx_Connect}. The callback function is called once all processes identified in \refarg{procs} have called either \refapi{PMIx_Connect} or its non-blocking version, \textit{and} the host environment has completed any supporting operations required to meet the terms of the \ac{PMIx} definition of \textit{connected} processes. See the description of \refapi{PMIx_Connect} for more information.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{\code{PMIx_Disconnect}}
\declareapi{PMIx_Disconnect}
%%%%
\summary
Disconnect a previously connected set of processes.
%%%%
\format
\copySignature{PMIx_Disconnect}{1.0}{
pmix_status_t \\
PMIx_Disconnect(const pmix_proc_t procs[], size_t nprocs, \\
\hspace*{16\sigspace}const pmix_info_t info[], size_t ninfo);
}
\begin{arglist}
\argin{procs}{Array of proc structures (array of handles)}
\argin{nprocs}{Number of elements in the \refarg{procs} array (integer)}
\argin{info}{Array of info structures (array of handles)}
\argin{ninfo}{Number of elements in the \refarg{info} array (integer)}
\end{arglist}
\returnstart
\begin{itemize}
\item the \refconst{PMIX_ERR_INVALID_OPERATION} error indicating that the specified set of \refarg{procs} was not previously \textit{connected} via a call to \refapi{PMIx_Connect} or its non-blocking form.
\end{itemize}
\returnend
\reqattrstart
\ac{PMIx} libraries are not required to directly support any attributes for this function. However, any provided attributes must be passed to the host \ac{SMS} daemon for processing.
\reqattrend
\optattrstart
The following attributes are optional for \ac{PMIx} implementations:
\pasteAttributeItem{PMIX_ALL_CLONES_PARTICIPATE}
The following attributes are optional for host environments that support this operation:
\pasteAttributeItem{PMIX_TIMEOUT}
\optattrend
%%%%
\descr
Disconnect a previously connected set of processes. The function will return once all processes identified in \refarg{procs} have called either \refapi{PMIx_Disconnect} or its non-blocking version, \textit{and} the host environment has completed any required supporting operations.