forked from jtchilders/deephyper_pytorch_layers
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathqlaunch.sh
executable file
·36 lines (27 loc) · 1.2 KB
/
qlaunch.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
#COBALT -n 1
#COBALT -t 60
#COBALT -q debug-cache-quad
#COBALT -A datascience
#COBALT --jobname pytorch-deephyper
module load miniconda-3/2019-11
# all of these environment variables are again set within the *_run.py. AND:
# os.environ['MKLDNN_VERBOSE'] = str(1)
# os.environ['MKL_VERBOSE'] = str(1)
export OMP_NUM_THREADS=64
export MKL_NUM_THREADS=$OMP_NUM_THREADS
export KMP_HW_SUBSET=1s,${OMP_NUM_THREADS}c,2t
export KMP_AFFINITY=granularity=fine,compact,1,0
export KMP_BLOCKTIME=0
env > ${COBALT_JOBID}.env
cp $0 ${COBALT_JOBID}.sh
#export MKLDNN_VERBOSE=1
#export MKL_VERBOSE=1
#aprun -n 1 -N 1 -d 64 -j 2 python -m deephyper.search.hps.ambs --problem problem.py --run model_run.py --n-jobs=1
cd /projects/datascience/parton/deephyper/deephyper_pytorch_layers
# help="number of cores to use for the 'learner', if n_jobs=-1 then it will use all cores available.")
aprun -n 1 -N 1 --cc none python -m deephyper.search.hps.ambs --problem problem.py --run model_run.py --n-jobs=1 --evaluator=subprocess
# recall, -n vs. -N convention is the OPPOSITE of Slurm's convention. Cray aprun documentation:
# [‐n | ‐‐pes width ]
# [‐N | ‐‐pes‐per‐node pes_per_node ]
mv results.csv ${COBALT_JOBID}.csv