From 8b36181839a9184d1a1c1db4a05f56468cc9c187 Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Thu, 2 Mar 2023 14:37:46 -0600 Subject: [PATCH 01/15] CryoSPARC installation and Slurm cluster config --- playbooks/cccluster.yml | 7 +- .../files/slurm_cluster_info.json.j2 | 11 ++++ .../files/slurm_cluster_script.sh.j2 | 14 ++++ .../scripts/01-setup_data_disk.sh.j2 | 10 +++ .../scripts/02-download_cryosparc.sh.j2 | 15 +++++ .../scripts/03-install_cryosparc.sh.j2 | 66 +++++++++++++++++++ .../scripts/04-import_slurm_cluster.sh.j2 | 17 +++++ .../roles/cyclecloud_cluster/tasks/main.yml | 53 +++++++++++++-- .../templates/azhop-slurm.txt.j2 | 28 +++++++- .../roles/cyclecloud_cluster/vars/main.yml | 3 +- 10 files changed, 211 insertions(+), 13 deletions(-) create mode 100644 playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json.j2 create mode 100644 playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_script.sh.j2 create mode 100644 playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/01-setup_data_disk.sh.j2 create mode 100644 playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/02-download_cryosparc.sh.j2 create mode 100644 playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 create mode 100644 playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/04-import_slurm_cluster.sh.j2 diff --git a/playbooks/cccluster.yml b/playbooks/cccluster.yml index c20ff268a..40b4ba3ed 100644 --- a/playbooks/cccluster.yml +++ b/playbooks/cccluster.yml @@ -94,9 +94,9 @@ include_vars: file: '{{lookup_img_file}}' - - include_role: + - include_role: name: cyclecloud_cluster - apply: + apply: become: true vars: cc_region: '{{location}}' @@ -109,7 +109,7 @@ cc_domain: '{{domain_name}}' cc_queue_manager: '{{ queue_manager | default("openpbs") }}' influxdb_database_name: "telegraf" - telegraf_influxdb_urls: + telegraf_influxdb_urls: - "http://grafana:8086" cc_slurm_version: '{{slurm.slurm_version | default("20.11.9")}}-1' slurm_uid: 11100 @@ -120,6 +120,7 @@ enroot_scratch_dir: '/mnt/resource' cvmfs_eessi_enabled: '{{cvmfs_eessi.enabled | default(false)}}' cc_enable_remote_winviz: '{{enable_remote_winviz | default(false)}}' + cryosparc_enabled: '{{applications.cryosparc.enabled | default(false)}}' # Generate the node array core lookup file for ondemand - will be only run if the marker file for ondemand exists - import_tasks: nodearray_lookup.yml diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json.j2 new file mode 100644 index 000000000..cf1401848 --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json.j2 @@ -0,0 +1,11 @@ +{ + "name": "{{ applications.cryosparc.target_queue }}", + "worker_bin_path": "/apps/cryosparc_worker/bin/cryosparcw", + "cache_path": "/mnt/resource", + "send_cmd_tpl": "{%raw%}{{ command }}{%endraw%}", + "qsub_cmd_tpl": "sbatch {%raw%}{{ script_path_abs }}{%endraw%}", + "qstat_cmd_tpl": "squeue -j {%raw%}{{ cluster_job_id }}{%endraw%}", + "qdel_cmd_tpl": "scancel {%raw%}{{ cluster_job_id }}{%endraw%}", + "qinfo_cmd_tpl": "sinfo", + "transfer_cmd_tpl": "scp {%raw%}{{ src_path }}{%endraw%} loginnode:{%raw%}{{ dest_path }}{%endraw%}" +} diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_script.sh.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_script.sh.j2 new file mode 100644 index 000000000..5de718737 --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_script.sh.j2 @@ -0,0 +1,14 @@ +#jinja2: trim_blocks:False +#!/bin/bash +#SBATCH --partition={{ applications.cryosparc.target_queue }} +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node={%raw%}{{ num_cpu }}{%endraw%} +#SBATCH --cpus-per-task=1 +#SBATCH --threads-per-core=1 +#SBATCH --gres=gpu:{%raw%}{{ num_gpu }}{%endraw%} +#SBATCH --mem={%raw%}{{ (ram_gb*1000)|int }}{%endraw%}MB +#SBATCH --job-name cryosparc_{%raw%}{{ project_uid }}_{{ job_uid }}{%endraw%} +#SBATCH --output={%raw%}{{ job_log_path_abs }}{%endraw%} +#SBATCH --error={%raw%}{{ job_log_path_abs }}{%endraw%} + +{%raw%}{{ run_cmd }}{%endraw%} diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/01-setup_data_disk.sh.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/01-setup_data_disk.sh.j2 new file mode 100644 index 000000000..18628f273 --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/01-setup_data_disk.sh.j2 @@ -0,0 +1,10 @@ +#!/bin/bash + +parted /dev/sdb mktable gpt +parted /dev/sdb mkpart primary ext4 0% 100% +mkfs.ext4 /dev/sdb1 +DEV_UUID=$(blkid -s UUID -o value /dev/sdb1) +printf 'UUID=%s /cryosparc_data ext4 defaults 0 0\n' $DEV_UUID >> /etc/fstab +mkdir /cryosparc_data +mount -a +chown {{ applications.cryosparc.admin_user }}: /cryosparc_data diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/02-download_cryosparc.sh.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/02-download_cryosparc.sh.j2 new file mode 100644 index 000000000..60a1d38ab --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/02-download_cryosparc.sh.j2 @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +mkdir -p /sched/apps/cryosparc +cd /sched/apps/cryosparc + +if [ -s /sched/apps/cryosparc/cryosparc_master.tar.gz ] && [ -s /sched/apps/cryosparc/cryosparc_worker.tar.gz ]; then + echo "CryoSPARC archives already downloaded" +else + echo "Downloading CryoSPARC master" + curl -L https://get.cryosparc.com/download/master-latest/{{ applications.cryosparc.license_id }} -o cryosparc_master.tar.gz + curl -L https://get.cryosparc.com/download/worker-latest/{{ applications.cryosparc.license_id }} -o cryosparc_worker.tar.gz +fi + +chown -R {{ applications.cryosparc.admin_user }}: /sched/apps/cryosparc diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 new file mode 100644 index 000000000..bf76e34aa --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 @@ -0,0 +1,66 @@ +#!/bin/bash +set -e + +if [ $(hostname) == {{ applications.cryosparc.master_hostname }} ]; then + + # Install CryoSPARC master as admin user on master node + sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF + cd /cryosparc_data + tar xzf /sched/apps/cryosparc/cryosparc_master.tar.gz + cd cryosparc_master + ./install.sh --license {{ applications.cryosparc.license_id }} \ + --hostname $(hostname -f) \ + --dbpath /cryosparc_data/cryosparc_database \ + --port 39000 \ + --yes + + # The service log target directory must be created manually + # otherwise the systemd service will fail to start (likely a bug) + mkdir -p /cryosparc_data/cryosparc_master/run +EOF + + # Install CryoSPARC systemd service + eval $(/cryosparc_data/cryosparc_master/bin/cryosparcm env) + cd /cryosparc_data/cryosparc_master/systemd + env "CRYOSPARC_ROOT_DIR=$CRYOSPARC_ROOT_DIR" ./install_services.sh + + systemctl enable cryosparc-supervisor.service + systemctl start cryosparc-supervisor.service + + # Create admin user in CryoSPARC + sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF + eval $(/cryosparc_data/cryosparc_master/bin/cryosparcm env) + cryosparcm createuser --email {{ applications.cryosparc.admin_user }}@azhop.com \ + --username {{ applications.cryosparc.admin_user }} \ + --firstname Admin \ + --lastname User \ + --password {{ applications.cryosparc.admin_pwd }} +EOF + +else + + # Create /apps directory if it doesn't exist + if [ ! -d /apps ]; then + mkdir -p /apps + chmod 777 /apps + fi + + cd /apps + tar xzf /sched/apps/cryosparc/cryosparc_worker.tar.gz + chown -R {{ applications.cryosparc.admin_user }}: /apps/cryosparc* + + # Get CUDA library path + LIBCUDART_PATH=$(sudo find /usr/local -name libcudart.so) + CUDA_PATH=$(echo $LIBCUDART_PATH | cut -d'/' -f-4) + export CUDA_PATH + + # Install CryoSPARC worker as admin user on worker node + sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF + cd /apps/cryosparc_worker + ./install.sh --license {{ applications.cryosparc.license_id }} \ + --cudapath ${CUDA_PATH} \ + --yes +EOF + +fi + diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/04-import_slurm_cluster.sh.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/04-import_slurm_cluster.sh.j2 new file mode 100644 index 000000000..ffe96c124 --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/04-import_slurm_cluster.sh.j2 @@ -0,0 +1,17 @@ +#!/bin/bash +set -e + +if [ $(hostname) == {{ applications.cryosparc.master_hostname }} ]; then + + ADMIN_HOME_DIR=$(eval echo "~{{ applications.cryosparc.admin_user }}") + export TARGET_DIR=${ADMIN_HOME_DIR}/cryosparc_cluster_{{ applications.cryosparc.target_queue }} + mkdir -p ${TARGET_DIR} + cp $CYCLECLOUD_SPEC_PATH/files/* ${TARGET_DIR} + chown -R adminuser: ${TARGET_DIR} + + # Import Slurm cluster + sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF + cd ${TARGET_DIR} + /cryosparc_data/cryosparc_master/bin/cryosparcm cluster connect +EOF +fi diff --git a/playbooks/roles/cyclecloud_cluster/tasks/main.yml b/playbooks/roles/cyclecloud_cluster/tasks/main.yml index e0413f0e5..a25553f13 100644 --- a/playbooks/roles/cyclecloud_cluster/tasks/main.yml +++ b/playbooks/roles/cyclecloud_cluster/tasks/main.yml @@ -27,38 +27,38 @@ dest: '{{common_project_root}}/specs/default/cluster-init/scripts/1-mountnfs.sh' mode: 0777 -- name: Add lustre script +- name: Add lustre script template: src: '{{role_path}}/projects/common/cluster-init/scripts/2-mountlustre.sh.j2' dest: '{{common_project_root}}/specs/default/cluster-init/scripts/2-mountlustre.sh' mode: 0777 when: ( lustre.create | default(false) ) -- name: Add Linux joindomain script +- name: Add Linux joindomain script template: src: '{{role_path}}/projects/common/cluster-init/scripts/3-joindomain.sh.j2' dest: '{{common_project_root}}/specs/default/cluster-init/scripts/3-joindomain.sh' mode: 0777 -- name: Add Windows joindomain script +- name: Add Windows joindomain script template: src: '{{role_path}}/projects/common/cluster-init/scripts/3-joindomain.bat.j2' dest: '{{common_project_root}}/specs/default/cluster-init/scripts/3-joindomain.bat' mode: 0777 -- name: Add default script +- name: Add default script template: src: '{{role_path}}/projects/common/cluster-init/scripts/5-default.sh.j2' dest: '{{common_project_root}}/specs/default/cluster-init/scripts/5-default.sh' mode: 0777 -- name: Add telegraf configuration file +- name: Add telegraf configuration file template: src: '{{role_path}}/projects/common/cluster-init/files/telegraf.conf.j2' dest: '{{common_project_root}}/specs/default/cluster-init/files/telegraf.conf' mode: 0600 -- name: Add nhc configuration file +- name: Add nhc configuration file template: src: '{{role_path}}/projects/common/cluster-init/files/nhc/nhc_common.conf.j2' dest: '{{common_project_root}}/specs/default/cluster-init/files/nhc/nhc_common.conf' @@ -117,6 +117,46 @@ command: '/usr/local/bin/cyclecloud start_cluster pbs1' when: cc_queue_manager == "openpbs" +- name: CryoSPARC CycleCloud project + block: + - name: Create cryosparc project + command: '/usr/local/bin/cyclecloud project init cryosparc' + args: + chdir: '{{project_root}}' + creates: '{{cryosparc_project_root}}/project.ini' + + - name: Create setup_data_disk.sh, download_cryosparc.sh, install_cryosparc.sh + template: + src: '{{role_path}}/projects/cryosparc/cluster-init/scripts/{{ item }}.j2' + dest: '{{cryosparc_project_root}}/specs/default/cluster-init/scripts/{{ item }}' + mode: 0777 + with_items: + - 01-setup_data_disk.sh + - 02-download_cryosparc.sh + - 03-install_cryosparc.sh + + - name: Create import_slurm_cluster.sh + template: + src: '{{role_path}}/projects/cryosparc/cluster-init/scripts/04-import_slurm_cluster.sh.j2' + dest: '{{cryosparc_project_root}}/specs/default/cluster-init/scripts/04-import_slurm_cluster.sh' + mode: 0777 + when: cc_queue_manager == "slurm" + + - name: Create CryoSPARC Slurm cluster definition files + template: + src: '{{role_path}}/projects/cryosparc/cluster-init/files/slurm_{{ item }}.j2' + dest: '{{cryosparc_project_root}}/specs/default/cluster-init/files/{{ item }}' + with_items: + - cluster_info.json + - cluster_script.sh + when: cc_queue_manager == "slurm" + + - name: Upload cryosparc CycleCloud project + command: '/usr/local/bin/cyclecloud project upload' + args: + chdir: '{{cryosparc_project_root}}' + when: cryosparc_enabled == true + - name: SLURM template and optional Enroot project block: - name: Add azhop-Slurm template @@ -138,6 +178,7 @@ args: chdir: '{{project_root}}' creates: '{{enroot_project_root}}/project.ini' + - name: Create install_pyxis.sh template: src: '{{role_path}}/projects/enroot/cluster-init/scripts/1-install_pyxis.sh.j2' diff --git a/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 b/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 index b046d5f9e..47dca927a 100644 --- a/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 +++ b/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 @@ -5,7 +5,7 @@ [cluster azhop-slurm] FormLayout = selectionpanel -Category = Azure HPC OnDemand Platform +Category = Azure HPC OnDemand Platform Autoscale = true @@ -74,7 +74,7 @@ echo "cloud-init done" >> /tmp/cloud-init.txt cyclecloud.cluster.autoscale.idle_time_before_jobs = {{autoscale.idle_timeout}} {% endif %} - [[[volume boot]]] + [[[volume boot]]] StorageAccountType = StandardSSD_LRS [[[cluster-init cyclecloud/slurm:default:{{cyclecloud_slurm_release}}]]] @@ -99,7 +99,7 @@ echo "cloud-init done" >> /tmp/cloud-init.txt {% for queue in cc_queues %} [[nodearray {{ queue.name }}]] Extends = nodearraybase - MachineType = {{ queue.vm_size }} + MachineType = {{ queue.vm_size }} MaxCoreCount = {{ queue.max_core_count }} {% if queue.EnableAcceleratedNetworking is defined %} EnableAcceleratedNetworking = {{ queue.EnableAcceleratedNetworking }} @@ -137,4 +137,26 @@ echo "cloud-init done" >> /tmp/cloud-init.txt {% if enroot_enabled is defined and enroot_enabled %} [[[cluster-init enroot:default:1.0.0]]] {% endif %} + {% if cryosparc_enabled and queue.name == applications.cryosparc.target_queue %} + [[[cluster-init cryosparc:default:1.0.0]]] + {% endif %} {% endfor %} + +{% if cryosparc_enabled %} + [[node {{ applications.cryosparc.master_hostname }}]] + MachineType = {{applications.cryosparc.master_size}} + EnableAcceleratedNetworking = true + ImageName = azhpc:azhop-compute:centos-7_9:latest + + [[[volume boot]]] + size = 64 + StorageAccountType = StandardSSD_LRS + + [[[volume cryosparc]]] + size = {{applications.cryosparc.master_data_disk_size}} + StorageAccountType = {{applications.cryosparc.master_data_disk_type}} + Peristent = true + + [[[cluster-init cyclecloud/slurm:login:{{cyclecloud_slurm_release}}]]] + [[[cluster-init cryosparc:default:1.0.0]]] +{% endif %} diff --git a/playbooks/roles/cyclecloud_cluster/vars/main.yml b/playbooks/roles/cyclecloud_cluster/vars/main.yml index cf6b8fd7d..d34e887e3 100644 --- a/playbooks/roles/cyclecloud_cluster/vars/main.yml +++ b/playbooks/roles/cyclecloud_cluster/vars/main.yml @@ -2,6 +2,7 @@ project_root: /root/projects common_project_root: '{{project_root}}/common' openpbs_project_root: '{{project_root}}/openpbs' enroot_project_root: '{{project_root}}/enroot' +cryosparc_project_root: '{{project_root}}/cryosparc' cc_queue_manager: cyclecloud_slurm_release: 2.7.0 slurm_version: '{{cc_slurm_version}}' @@ -12,4 +13,4 @@ slurm_gid: 11100 munge_uid: 11101 munge_gid: 11101 cvmfs_eessi_enabled: false -cc_enable_remote_winviz: false \ No newline at end of file +cc_enable_remote_winviz: false From ff7d040bfc57e329df3298eba438b092c7d20724 Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Thu, 2 Mar 2023 15:18:56 -0600 Subject: [PATCH 02/15] Added cryosparc app entry in config.yml template --- config.tpl.yml | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/config.tpl.yml b/config.tpl.yml index eed7754c8..a4a164750 100644 --- a/config.tpl.yml +++ b/config.tpl.yml @@ -32,7 +32,7 @@ mounts: export: '{{anf_home_path}}' # Specify an existing NFS export directory, when using the ANF built in use '{{anf_home_path}}' options: "rw,hard,rsize=262144,wsize=262144,vers=3,tcp,_netdev" # Specify the mount options. Default to rw,hard,rsize=262144,wsize=262144,vers=3,tcp,_netdev # mount1: -# mountpoint: /mount1 +# mountpoint: /mount1 # server: a.b.c.d # Specify an existing NFS server name or IP # export: myexport1 # Specify an existing NFS export name # options: my_options # Specify the mount options. @@ -48,7 +48,7 @@ network: vnet: name: hpcvnet # Optional - default to hpcvnet id: # If a vnet id is set then no network will be created and the provided vnet will be used - address_space: "10.0.0.0/23" + address_space: "10.0.0.0/23" # Special VNET Tags # tags: # key1: value1 @@ -56,7 +56,7 @@ network: subnets: # all subnets are optionals # name values can be used to rename the default to specific names, address_prefixes to change the IP ranges to be used # All values below are the default values - frontend: + frontend: name: frontend address_prefixes: "10.0.0.0/29" create: true # create the subnet if true. default to true when not specified, default to false if using an existing VNET when not specified @@ -111,7 +111,7 @@ network: # asg-deployer: asg-deployer # asg-guacamole: asg-guacamole # asg-mariadb-client: asg-mariadb-client - + # peering: # This list is optional, and can be used to create VNet Peerings in the same subscription. # - vnet_name: #"VNET Name to Peer to" # vnet_resource_group: #"Resource Group of the VNET to peer to" @@ -235,9 +235,9 @@ database: # Admin user of the database for which the password will be retrieved from the azhop keyvault user: sqladmin # FQDN of the managed instance - fqdn: + fqdn: # IP of the managed private endpoint if the FQDN is not registered in a private DNS - ip: + ip: # Create a Bastion in the bastion subnet when defined bastion: @@ -380,7 +380,7 @@ queues: ColocateNodes: false # Specific idle time in seconds before shutting down VMs, make sure it's lower than autoscale.idle_timeout idle_timeout: 300 - # Set the max number of vm's in a VMSS; requires additional limit raise through support ticket for >100; + # Set the max number of vm's in a VMSS; requires additional limit raise through support ticket for >100; # 100 is default value; lower numbers will improve scaling for single node jobs or jobs with small number of nodes MaxScaleSetSize: 100 - name: hc44rs @@ -459,3 +459,13 @@ applications: enabled: false bc_paraview: enabled: false + cryosparc: + enabled: true + license_id: + admin_user: adminuser + admin_pwd: + master_size: Standard_D8s_v5 + master_data_disk_size: 512 + master_data_disk_type: Premium_LRS + master_hostname: cryosparc-master + target_queue: nc24v3 From 690913b1580818a07f8763459eed57f4f98cbd19 Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Fri, 3 Mar 2023 10:34:45 -0600 Subject: [PATCH 03/15] Created NSG rule for cluster applications --- deploy/purebicep/azhop.bicep | 60 ++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/deploy/purebicep/azhop.bicep b/deploy/purebicep/azhop.bicep index 601733783..8b5d28ed5 100644 --- a/deploy/purebicep/azhop.bicep +++ b/deploy/purebicep/azhop.bicep @@ -56,7 +56,7 @@ var createDatabase = (config.queue_manager == 'slurm' && config.slurm.accounting var lustreOssCount = deployLustre ? azhopConfig.lustre.oss_count : 0 -var ossVmConfig = [for oss in range(0, lustreOssCount) : { +var ossVmConfig = [for oss in range(0, lustreOssCount) : { key: 'lustre-oss-${oss}' value: { identity: { @@ -387,6 +387,8 @@ var config = { MariaDB: ['3306', '33060'] Guacamole: ['8080'] WinRM: ['5985', '5986'] + // Applications: CryoSPARC + Applications: ['39000'] } nsg_rules: { @@ -394,7 +396,7 @@ var config = { // // INBOUND RULES // - + // AD communication AllowAdServerTcpIn : ['220', 'Inbound', 'Allow', 'Tcp', 'DomainControlerTcp', 'asg', 'asg-ad', 'asg', 'asg-ad-client'] AllowAdServerUdpIn : ['230', 'Inbound', 'Allow', 'Udp', 'DomainControlerUdp', 'asg', 'asg-ad', 'asg', 'asg-ad-client'] @@ -406,17 +408,17 @@ var config = { AllowAdClientComputeUdpIn : ['290', 'Inbound', 'Allow', 'Udp', 'DomainControlerUdp', 'subnet', 'compute', 'asg', 'asg-ad'] AllowAdServerNetappTcpIn : ['300', 'Inbound', 'Allow', 'Tcp', 'DomainControlerTcp', 'subnet', 'netapp', 'asg', 'asg-ad'] AllowAdServerNetappUdpIn : ['310', 'Inbound', 'Allow', 'Udp', 'DomainControlerUdp', 'subnet', 'netapp', 'asg', 'asg-ad'] - + // SSH internal rules AllowSshFromJumpboxIn : ['320', 'Inbound', 'Allow', 'Tcp', 'Ssh', 'asg', 'asg-jumpbox', 'asg', 'asg-ssh'] AllowSshFromComputeIn : ['330', 'Inbound', 'Allow', 'Tcp', 'Ssh', 'subnet', 'compute', 'asg', 'asg-ssh'] // Only in a deployer VM scenario - AllowSshFromDeployerIn : ['340', 'Inbound', 'Allow', 'Tcp', 'Ssh', 'asg', 'asg-deployer', 'asg', 'asg-ssh'] + AllowSshFromDeployerIn : ['340', 'Inbound', 'Allow', 'Tcp', 'Ssh', 'asg', 'asg-deployer', 'asg', 'asg-ssh'] // Only in a deployer VM scenario AllowDeployerToPackerSshIn : ['350', 'Inbound', 'Allow', 'Tcp', 'Ssh', 'asg', 'asg-deployer', 'subnet', 'admin'] AllowSshToComputeIn : ['360', 'Inbound', 'Allow', 'Tcp', 'Ssh', 'asg', 'asg-ssh', 'subnet', 'compute'] AllowSshComputeComputeIn : ['365', 'Inbound', 'Allow', 'Tcp', 'Ssh', 'subnet', 'compute', 'subnet', 'compute'] - + // PBS AllowPbsIn : ['369', 'Inbound', 'Allow', '*', 'Pbs', 'asg', 'asg-pbs', 'asg', 'asg-pbs-client'] AllowPbsClientIn : ['370', 'Inbound', 'Allow', '*', 'Pbs', 'asg', 'asg-pbs-client', 'asg', 'asg-pbs'] @@ -424,44 +426,47 @@ var config = { AllowComputePbsClientIn : ['390', 'Inbound', 'Allow', '*', 'Pbs', 'subnet', 'compute', 'asg', 'asg-pbs-client'] AllowComputePbsIn : ['400', 'Inbound', 'Allow', '*', 'Pbs', 'subnet', 'compute', 'asg', 'asg-pbs'] AllowComputeComputePbsIn : ['401', 'Inbound', 'Allow', '*', 'Pbs', 'subnet', 'compute', 'subnet', 'compute'] - + // SLURM AllowComputeSlurmIn : ['405', 'Inbound', 'Allow', '*', 'Slurmd', 'asg', 'asg-ondemand', 'subnet', 'compute'] - + // CycleCloud AllowCycleWebIn : ['440', 'Inbound', 'Allow', 'Tcp', 'Web', 'asg', 'asg-ondemand', 'asg', 'asg-cyclecloud'] AllowCycleClientIn : ['450', 'Inbound', 'Allow', 'Tcp', 'CycleCloud', 'asg', 'asg-cyclecloud-client', 'asg', 'asg-cyclecloud'] AllowCycleClientComputeIn : ['460', 'Inbound', 'Allow', 'Tcp', 'CycleCloud', 'subnet', 'compute', 'asg', 'asg-cyclecloud'] AllowCycleServerIn : ['465', 'Inbound', 'Allow', 'Tcp', 'CycleCloud', 'asg', 'asg-cyclecloud', 'asg', 'asg-cyclecloud-client'] - + // OnDemand NoVNC AllowComputeNoVncIn : ['470', 'Inbound', 'Allow', 'Tcp', 'NoVnc', 'subnet', 'compute', 'asg', 'asg-ondemand'] AllowNoVncComputeIn : ['480', 'Inbound', 'Allow', 'Tcp', 'NoVnc', 'asg', 'asg-ondemand', 'subnet', 'compute'] - + // Telegraf / Grafana AllowTelegrafIn : ['490', 'Inbound', 'Allow', 'Tcp', 'Telegraf', 'asg', 'asg-telegraf', 'asg', 'asg-grafana'] AllowComputeTelegrafIn : ['500', 'Inbound', 'Allow', 'Tcp', 'Telegraf', 'subnet', 'compute', 'asg', 'asg-grafana'] AllowGrafanaIn : ['510', 'Inbound', 'Allow', 'Tcp', 'Grafana', 'asg', 'asg-ondemand', 'asg', 'asg-grafana'] - + // Admin and Deployment AllowWinRMIn : ['520', 'Inbound', 'Allow', 'Tcp', 'WinRM', 'asg', 'asg-jumpbox', 'asg', 'asg-rdp'] AllowRdpIn : ['550', 'Inbound', 'Allow', 'Tcp', 'Rdp', 'asg', 'asg-jumpbox', 'asg', 'asg-rdp'] AllowWebDeployerIn : ['595', 'Inbound', 'Allow', 'Tcp', 'Web', 'asg', 'asg-deployer', 'asg', 'asg-ondemand'] - + // Guacamole AllowGuacamoleRdpIn : ['610', 'Inbound', 'Allow', 'Tcp', 'Rdp', 'asg', 'asg-guacamole', 'subnet', 'compute'] - + // MariaDB AllowMariaDBIn : ['700', 'Inbound', 'Allow', 'Tcp', 'MariaDB', 'asg', 'asg-mariadb-client', 'subnet', 'admin'] + // Cluster applications + AllowApplicationsIn : ['710', 'Inbound', 'Allow', 'All', 'Applications', 'asg', 'asg-ondemand', 'subnet', 'compute'] + // Deny all remaining traffic DenyVnetInbound : ['3100', 'Inbound', 'Deny', '*', 'All', 'tag', 'VirtualNetwork', 'tag', 'VirtualNetwork'] - - + + // // Outbound // - + // AD communication AllowAdClientTcpOut : ['200', 'Outbound', 'Allow', 'Tcp', 'DomainControlerTcp', 'asg', 'asg-ad-client', 'asg', 'asg-ad'] AllowAdClientUdpOut : ['210', 'Outbound', 'Allow', 'Udp', 'DomainControlerUdp', 'asg', 'asg-ad-client', 'asg', 'asg-ad'] @@ -473,13 +478,13 @@ var config = { AllowAdServerComputeUdpOut : ['270', 'Outbound', 'Allow', 'Udp', 'DomainControlerUdp', 'asg', 'asg-ad', 'subnet', 'compute'] AllowAdServerNetappTcpOut : ['280', 'Outbound', 'Allow', 'Tcp', 'DomainControlerTcp', 'asg', 'asg-ad', 'subnet', 'netapp'] AllowAdServerNetappUdpOut : ['290', 'Outbound', 'Allow', 'Udp', 'DomainControlerUdp', 'asg', 'asg-ad', 'subnet', 'netapp'] - + // CycleCloud AllowCycleServerOut : ['300', 'Outbound', 'Allow', 'Tcp', 'CycleCloud', 'asg', 'asg-cyclecloud', 'asg', 'asg-cyclecloud-client'] AllowCycleClientOut : ['310', 'Outbound', 'Allow', 'Tcp', 'CycleCloud', 'asg', 'asg-cyclecloud-client', 'asg', 'asg-cyclecloud'] AllowComputeCycleClientIn : ['320', 'Outbound', 'Allow', 'Tcp', 'CycleCloud', 'subnet', 'compute', 'asg', 'asg-cyclecloud'] AllowCycleWebOut : ['330', 'Outbound', 'Allow', 'Tcp', 'Web', 'asg', 'asg-ondemand', 'asg', 'asg-cyclecloud'] - + // PBS AllowPbsOut : ['340', 'Outbound', 'Allow', '*', 'Pbs', 'asg', 'asg-pbs', 'asg', 'asg-pbs-client'] AllowPbsClientOut : ['350', 'Outbound', 'Allow', '*', 'Pbs', 'asg', 'asg-pbs-client', 'asg', 'asg-pbs'] @@ -487,19 +492,19 @@ var config = { AllowPbsClientComputeOut : ['370', 'Outbound', 'Allow', '*', 'Pbs', 'subnet', 'compute', 'asg', 'asg-pbs'] AllowComputePbsClientOut : ['380', 'Outbound', 'Allow', '*', 'Pbs', 'subnet', 'compute', 'asg', 'asg-pbs-client'] AllowComputeComputePbsOut : ['381', 'Outbound', 'Allow', '*', 'Pbs', 'subnet', 'compute', 'subnet', 'compute'] - + // SLURM AllowSlurmComputeOut : ['385', 'Outbound', 'Allow', '*', 'Slurmd', 'asg', 'asg-ondemand', 'subnet', 'compute'] - + // NFS AllowNfsOut : ['440', 'Outbound', 'Allow', '*', 'Nfs', 'asg', 'asg-nfs-client', 'subnet', 'netapp'] AllowNfsComputeOut : ['450', 'Outbound', 'Allow', '*', 'Nfs', 'subnet', 'compute', 'subnet', 'netapp'] - + // Telegraf / Grafana AllowTelegrafOut : ['460', 'Outbound', 'Allow', 'Tcp', 'Telegraf', 'asg', 'asg-telegraf', 'asg', 'asg-grafana'] AllowComputeTelegrafOut : ['470', 'Outbound', 'Allow', 'Tcp', 'Telegraf', 'subnet', 'compute', 'asg', 'asg-grafana'] AllowGrafanaOut : ['480', 'Outbound', 'Allow', 'Tcp', 'Grafana', 'asg', 'asg-ondemand', 'asg', 'asg-grafana'] - + // SSH internal rules AllowSshFromJumpboxOut : ['490', 'Outbound', 'Allow', 'Tcp', 'Ssh', 'asg', 'asg-jumpbox', 'asg', 'asg-ssh'] AllowSshComputeOut : ['500', 'Outbound', 'Allow', 'Tcp', 'Ssh', 'asg', 'asg-ssh', 'subnet', 'compute'] @@ -507,23 +512,26 @@ var config = { AllowSshDeployerPackerOut : ['520', 'Outbound', 'Allow', 'Tcp', 'Ssh', 'asg', 'asg-deployer', 'subnet', 'admin'] AllowSshFromComputeOut : ['530', 'Outbound', 'Allow', 'Tcp', 'Ssh', 'subnet', 'compute', 'asg', 'asg-ssh'] AllowSshComputeComputeOut : ['540', 'Outbound', 'Allow', 'Tcp', 'Ssh', 'subnet', 'compute', 'subnet', 'compute'] - + // OnDemand NoVNC AllowComputeNoVncOut : ['550', 'Outbound', 'Allow', 'Tcp', 'NoVnc', 'subnet', 'compute', 'asg', 'asg-ondemand'] AllowNoVncComputeOut : ['560', 'Outbound', 'Allow', 'Tcp', 'NoVnc', 'asg', 'asg-ondemand', 'subnet', 'compute'] - + // Admin and Deployment AllowRdpOut : ['570', 'Outbound', 'Allow', 'Tcp', 'Rdp', 'asg', 'asg-jumpbox', 'asg', 'asg-rdp'] AllowWinRMOut : ['580', 'Outbound', 'Allow', 'Tcp', 'WinRM', 'asg', 'asg-jumpbox', 'asg', 'asg-rdp'] AllowDnsOut : ['590', 'Outbound', 'Allow', '*', 'Dns', 'tag', 'VirtualNetwork', 'tag', 'VirtualNetwork'] AllowWebDeployerOut : ['595', 'Outbound', 'Allow', 'Tcp', 'Web', 'asg', 'asg-deployer', 'asg', 'asg-ondemand'] - + // Guacamole AllowGuacamoleRdpOut : ['610', 'Outbound', 'Allow', 'Tcp', 'Rdp', 'asg', 'asg-guacamole', 'subnet', 'compute'] - + // MariaDB AllowMariaDBOut : ['700', 'Outbound', 'Allow', 'Tcp', 'MariaDB', 'asg', 'asg-mariadb-client', 'subnet', 'admin'] - + + // Cluster applications + AllowApplicationsOut : ['710', 'Outbound', 'Allow', 'All', 'Applications', 'asg', 'asg-ondemand', 'subnet', 'compute'] + // Deny all remaining traffic and allow Internet access AllowInternetOutBound : ['3000', 'Outbound', 'Allow', 'Tcp', 'All', 'tag', 'VirtualNetwork', 'tag', 'Internet'] DenyVnetOutbound : ['3100', 'Outbound', 'Deny', '*', 'All', 'tag', 'VirtualNetwork', 'tag', 'VirtualNetwork'] From 548ada8487570448434b45e6b00ce08642dec879 Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Tue, 7 Mar 2023 11:49:12 -0600 Subject: [PATCH 04/15] Support multiple queues import in CryoSPARC --- config.tpl.yml | 11 ++++++---- .../files/slurm_cluster_info.json | 11 ++++++++++ .../files/slurm_cluster_info.json.j2 | 11 ---------- .../files/slurm_cluster_script.sh | 13 ++++++++++++ .../files/slurm_cluster_script.sh.j2 | 14 ------------- .../scripts/04-import_slurm_cluster.sh.j2 | 21 ++++++++++--------- 6 files changed, 42 insertions(+), 39 deletions(-) create mode 100644 playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json delete mode 100644 playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json.j2 create mode 100644 playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_script.sh delete mode 100644 playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_script.sh.j2 diff --git a/config.tpl.yml b/config.tpl.yml index a4a164750..ad15cf2be 100644 --- a/config.tpl.yml +++ b/config.tpl.yml @@ -460,12 +460,15 @@ applications: bc_paraview: enabled: false cryosparc: - enabled: true + enabled: false license_id: admin_user: adminuser - admin_pwd: master_size: Standard_D8s_v5 - master_data_disk_size: 512 + master_data_disk_size: 256 master_data_disk_type: Premium_LRS master_hostname: cryosparc-master - target_queue: nc24v3 + master_image: azhpc:azhop-compute:centos-7_9:latest + target_queues: + - nc24v3 + - hb120v3 + - hc44rs diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json new file mode 100644 index 000000000..fd450c82d --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json @@ -0,0 +1,11 @@ +{ + "name": "PARTITION", + "worker_bin_path": "/anfhome/apps/cryosparc_worker/bin/cryosparcw", + "cache_path": "/mnt/resource", + "send_cmd_tpl": "{{ command }}", + "qsub_cmd_tpl": "sbatch {{ script_path_abs }}", + "qstat_cmd_tpl": "squeue -j {{ cluster_job_id }}", + "qdel_cmd_tpl": "scancel {{ cluster_job_id }}", + "qinfo_cmd_tpl": "sinfo", + "transfer_cmd_tpl": "scp {{ src_path }} loginnode:{{ dest_path }}" +} diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json.j2 deleted file mode 100644 index cf1401848..000000000 --- a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json.j2 +++ /dev/null @@ -1,11 +0,0 @@ -{ - "name": "{{ applications.cryosparc.target_queue }}", - "worker_bin_path": "/apps/cryosparc_worker/bin/cryosparcw", - "cache_path": "/mnt/resource", - "send_cmd_tpl": "{%raw%}{{ command }}{%endraw%}", - "qsub_cmd_tpl": "sbatch {%raw%}{{ script_path_abs }}{%endraw%}", - "qstat_cmd_tpl": "squeue -j {%raw%}{{ cluster_job_id }}{%endraw%}", - "qdel_cmd_tpl": "scancel {%raw%}{{ cluster_job_id }}{%endraw%}", - "qinfo_cmd_tpl": "sinfo", - "transfer_cmd_tpl": "scp {%raw%}{{ src_path }}{%endraw%} loginnode:{%raw%}{{ dest_path }}{%endraw%}" -} diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_script.sh b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_script.sh new file mode 100644 index 000000000..af537ceab --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_script.sh @@ -0,0 +1,13 @@ +#!/bin/bash +#SBATCH --partition=PARTITION +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node={{ num_cpu }} +#SBATCH --cpus-per-task=1 +#SBATCH --threads-per-core=1 +#SBATCH --gres=gpu:{{ num_gpu }} +#SBATCH --mem={{ (ram_gb*1000)|int }}MB +#SBATCH --job-name cryosparc_{{ project_uid }}_{{ job_uid }} +#SBATCH --output={{ job_log_path_abs }} +#SBATCH --error={{ job_log_path_abs }} + +{{ run_cmd }} diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_script.sh.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_script.sh.j2 deleted file mode 100644 index 5de718737..000000000 --- a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_script.sh.j2 +++ /dev/null @@ -1,14 +0,0 @@ -#jinja2: trim_blocks:False -#!/bin/bash -#SBATCH --partition={{ applications.cryosparc.target_queue }} -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node={%raw%}{{ num_cpu }}{%endraw%} -#SBATCH --cpus-per-task=1 -#SBATCH --threads-per-core=1 -#SBATCH --gres=gpu:{%raw%}{{ num_gpu }}{%endraw%} -#SBATCH --mem={%raw%}{{ (ram_gb*1000)|int }}{%endraw%}MB -#SBATCH --job-name cryosparc_{%raw%}{{ project_uid }}_{{ job_uid }}{%endraw%} -#SBATCH --output={%raw%}{{ job_log_path_abs }}{%endraw%} -#SBATCH --error={%raw%}{{ job_log_path_abs }}{%endraw%} - -{%raw%}{{ run_cmd }}{%endraw%} diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/04-import_slurm_cluster.sh.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/04-import_slurm_cluster.sh.j2 index ffe96c124..3fa11bb9f 100644 --- a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/04-import_slurm_cluster.sh.j2 +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/04-import_slurm_cluster.sh.j2 @@ -2,16 +2,17 @@ set -e if [ $(hostname) == {{ applications.cryosparc.master_hostname }} ]; then + {% for partition in applications.cryosparc.target_queues %} + export TARGET_DIR=/cryosparc_data/cryosparc_cluster_{{ partition }} + mkdir -p ${TARGET_DIR} + cp $CYCLECLOUD_SPEC_PATH/files/* ${TARGET_DIR} + sed -i 's/PARTITION/{{ partition }}/g' ${TARGET_DIR}/* + chown -R adminuser: ${TARGET_DIR} - ADMIN_HOME_DIR=$(eval echo "~{{ applications.cryosparc.admin_user }}") - export TARGET_DIR=${ADMIN_HOME_DIR}/cryosparc_cluster_{{ applications.cryosparc.target_queue }} - mkdir -p ${TARGET_DIR} - cp $CYCLECLOUD_SPEC_PATH/files/* ${TARGET_DIR} - chown -R adminuser: ${TARGET_DIR} - - # Import Slurm cluster - sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF - cd ${TARGET_DIR} - /cryosparc_data/cryosparc_master/bin/cryosparcm cluster connect + # Import Slurm cluster + sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF + cd ${TARGET_DIR} + /cryosparc_data/cryosparc_master/bin/cryosparcm cluster connect EOF + {% endfor %} fi From ef19445a836744c5f8fdc4a19f538d20a0568b09 Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Tue, 7 Mar 2023 11:51:04 -0600 Subject: [PATCH 05/15] Create apps directoy in shared home path --- playbooks/linux.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/playbooks/linux.yml b/playbooks/linux.yml index 10737e5fe..7e8085c1e 100644 --- a/playbooks/linux.yml +++ b/playbooks/linux.yml @@ -19,7 +19,7 @@ line: 'AllowTcpForwarding yes' - name: restart sshd service: - name: sshd + name: sshd state: restarted - name: Join AD domain and mount anfhome @@ -64,3 +64,10 @@ state: directory mode: '0755' run_once : true + + - name: Create {{homedir_mountpoint}}/apps directory + file: + path: '{{homedir_mountpoint}}/apps' + state: directory + mode: '0755' + run_once : true From 3f2bc7c14650800d635af1aa9ad218ee7c9824ad Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Tue, 7 Mar 2023 11:52:50 -0600 Subject: [PATCH 06/15] Download CryoSPARC source archives in shared apps dir --- .../scripts/02-download_cryosparc.sh.j2 | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/02-download_cryosparc.sh.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/02-download_cryosparc.sh.j2 index 60a1d38ab..928085b6c 100644 --- a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/02-download_cryosparc.sh.j2 +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/02-download_cryosparc.sh.j2 @@ -1,15 +1,19 @@ #!/bin/bash set -e -mkdir -p /sched/apps/cryosparc -cd /sched/apps/cryosparc - -if [ -s /sched/apps/cryosparc/cryosparc_master.tar.gz ] && [ -s /sched/apps/cryosparc/cryosparc_worker.tar.gz ]; then - echo "CryoSPARC archives already downloaded" -else - echo "Downloading CryoSPARC master" - curl -L https://get.cryosparc.com/download/master-latest/{{ applications.cryosparc.license_id }} -o cryosparc_master.tar.gz - curl -L https://get.cryosparc.com/download/worker-latest/{{ applications.cryosparc.license_id }} -o cryosparc_worker.tar.gz -fi - -chown -R {{ applications.cryosparc.admin_user }}: /sched/apps/cryosparc +INSTALL_DIR=/anfhome/apps/cryosparc +SOURCES_DIR=${INSTALL_DIR}/sources + +mkdir -p ${SOURCES_DIR} +cd ${SOURCES_DIR} + +for COMPONENT in master worker; do + if [ -s ${SOURCES_DIR}/cryosparc_${COMPONENT}.tar.gz ]; then + echo "cryosparc_${COMPONENT}.tar.gz already downloaded" + else + echo "Downloading cryosparc_${COMPONENT}.tar.gz" + curl -L https://get.cryosparc.com/download/${COMPONENT}-latest/{{ applications.cryosparc.license_id }} -o cryosparc_${COMPONENT}.tar.gz + fi +done + +chown -R {{ applications.cryosparc.admin_user }}: ${INSTALL_DIR} From 04d50889a18637e9823f7164ffa0b9cf2d0d7751 Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Tue, 7 Mar 2023 13:59:54 -0600 Subject: [PATCH 07/15] Install CryoSPARC worker in shared volume from master --- .../scripts/03-install_cryosparc.sh.j2 | 100 +++++++++--------- .../templates/azhop-slurm.txt.j2 | 3 - 2 files changed, 49 insertions(+), 54 deletions(-) diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 index bf76e34aa..3ddb6ed0a 100644 --- a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 @@ -1,66 +1,64 @@ #!/bin/bash set -e -if [ $(hostname) == {{ applications.cryosparc.master_hostname }} ]; then +export INSTALL_DIR=/anfhome/apps/cryosparc +SOURCES_DIR=${INSTALL_DIR}/sources - # Install CryoSPARC master as admin user on master node - sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF - cd /cryosparc_data - tar xzf /sched/apps/cryosparc/cryosparc_master.tar.gz - cd cryosparc_master - ./install.sh --license {{ applications.cryosparc.license_id }} \ - --hostname $(hostname -f) \ - --dbpath /cryosparc_data/cryosparc_database \ - --port 39000 \ - --yes +################################################### +# Install CryoSPARC master locally on master node # +################################################### - # The service log target directory must be created manually - # otherwise the systemd service will fail to start (likely a bug) - mkdir -p /cryosparc_data/cryosparc_master/run +# CryoSPARC master must be installed as admin user +sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF +cd /cryosparc_data +tar xzf ${SOURCES_DIR}/cryosparc_master.tar.gz +cd cryosparc_master +./install.sh --license {{ applications.cryosparc.license_id }} \ + --hostname $(hostname -f) \ + --dbpath /cryosparc_data/cryosparc_database \ + --port 39000 \ + --yes + +# The service log target directory must be created manually +# otherwise the systemd service will fail to start (likely a bug) +mkdir -p /cryosparc_data/cryosparc_master/run EOF - # Install CryoSPARC systemd service - eval $(/cryosparc_data/cryosparc_master/bin/cryosparcm env) - cd /cryosparc_data/cryosparc_master/systemd - env "CRYOSPARC_ROOT_DIR=$CRYOSPARC_ROOT_DIR" ./install_services.sh +# Install CryoSPARC systemd service +eval $(/cryosparc_data/cryosparc_master/bin/cryosparcm env) +cd /cryosparc_data/cryosparc_master/systemd +env "CRYOSPARC_ROOT_DIR=$CRYOSPARC_ROOT_DIR" ./install_services.sh - systemctl enable cryosparc-supervisor.service - systemctl start cryosparc-supervisor.service +systemctl enable cryosparc-supervisor.service +systemctl start cryosparc-supervisor.service - # Create admin user in CryoSPARC - sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF - eval $(/cryosparc_data/cryosparc_master/bin/cryosparcm env) - cryosparcm createuser --email {{ applications.cryosparc.admin_user }}@azhop.com \ - --username {{ applications.cryosparc.admin_user }} \ - --firstname Admin \ - --lastname User \ - --password {{ applications.cryosparc.admin_pwd }} +# Create admin user in CryoSPARC +sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF +eval $(/cryosparc_data/cryosparc_master/bin/cryosparcm env) +cryosparcm createuser --email {{ applications.cryosparc.admin_user }}@azhop.com \ + --username {{ applications.cryosparc.admin_user }} \ + --firstname Admin \ + --lastname User \ + --password {{ applications.cryosparc.admin_pwd }} EOF -else - - # Create /apps directory if it doesn't exist - if [ ! -d /apps ]; then - mkdir -p /apps - chmod 777 /apps - fi +############################################################# +# Install CryoSPARC worker on shared applications directory # +############################################################# - cd /apps - tar xzf /sched/apps/cryosparc/cryosparc_worker.tar.gz - chown -R {{ applications.cryosparc.admin_user }}: /apps/cryosparc* +cd ${INSTALL_DIR} +tar xzf ${SOURCES_DIR}/cryosparc_worker.tar.gz +chown -R {{ applications.cryosparc.admin_user }}: ./cryosparc_worker* - # Get CUDA library path - LIBCUDART_PATH=$(sudo find /usr/local -name libcudart.so) - CUDA_PATH=$(echo $LIBCUDART_PATH | cut -d'/' -f-4) - export CUDA_PATH +# Get CUDA library path +LIBCUDART_PATH=$(sudo find /usr/local -name libcudart.so) +CUDA_PATH=$(echo $LIBCUDART_PATH | cut -d'/' -f-4) +export CUDA_PATH - # Install CryoSPARC worker as admin user on worker node - sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF - cd /apps/cryosparc_worker - ./install.sh --license {{ applications.cryosparc.license_id }} \ - --cudapath ${CUDA_PATH} \ - --yes +# CryoSPARC must be installed as admin user +sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF +cd /apps/cryosparc_worker +./install.sh --license {{ applications.cryosparc.license_id }} \ + --cudapath ${CUDA_PATH} \ + --yes EOF - -fi - diff --git a/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 b/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 index 47dca927a..f2c461361 100644 --- a/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 +++ b/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 @@ -137,9 +137,6 @@ echo "cloud-init done" >> /tmp/cloud-init.txt {% if enroot_enabled is defined and enroot_enabled %} [[[cluster-init enroot:default:1.0.0]]] {% endif %} - {% if cryosparc_enabled and queue.name == applications.cryosparc.target_queue %} - [[[cluster-init cryosparc:default:1.0.0]]] - {% endif %} {% endfor %} {% if cryosparc_enabled %} From a221edbdb83a8b9d07c5abe725ebd7a9157f857a Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Tue, 7 Mar 2023 14:04:23 -0600 Subject: [PATCH 08/15] Use CryoSPARC admin password from KV --- .../cluster-init/scripts/03-install_cryosparc.sh.j2 | 2 +- playbooks/roles/cyclecloud_cluster/tasks/main.yml | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 index 3ddb6ed0a..41d388957 100644 --- a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 @@ -39,7 +39,7 @@ cryosparcm createuser --email {{ applications.cryosparc.admin_user }}@azhop.com --username {{ applications.cryosparc.admin_user }} \ --firstname Admin \ --lastname User \ - --password {{ applications.cryosparc.admin_pwd }} + --password {{ cryosparc_admin_pwd }} EOF ############################################################# diff --git a/playbooks/roles/cyclecloud_cluster/tasks/main.yml b/playbooks/roles/cyclecloud_cluster/tasks/main.yml index a25553f13..e71a966cb 100644 --- a/playbooks/roles/cyclecloud_cluster/tasks/main.yml +++ b/playbooks/roles/cyclecloud_cluster/tasks/main.yml @@ -119,6 +119,13 @@ - name: CryoSPARC CycleCloud project block: + - name: Read CryoSPARC admin password from KV + command: az keyvault secret show --vault-name {{key_vault}} -n {{database_user}}-password --query "value" -o tsv + delegate_to: localhost + connection: local + register: cryosparc_admin_pwd + become: false + - name: Create cryosparc project command: '/usr/local/bin/cyclecloud project init cryosparc' args: From c22fae3da3b41af385d6d687f3bf8cadc6856deb Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Wed, 8 Mar 2023 10:23:49 -0600 Subject: [PATCH 09/15] Corrected typos --- .../cluster-init/scripts/03-install_cryosparc.sh.j2 | 6 +++--- playbooks/roles/cyclecloud_cluster/tasks/main.yml | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 index 41d388957..499f59343 100644 --- a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 @@ -39,7 +39,7 @@ cryosparcm createuser --email {{ applications.cryosparc.admin_user }}@azhop.com --username {{ applications.cryosparc.admin_user }} \ --firstname Admin \ --lastname User \ - --password {{ cryosparc_admin_pwd }} + --password {{ cryosparc_admin_pwd.stdout }} EOF ############################################################# @@ -48,7 +48,7 @@ EOF cd ${INSTALL_DIR} tar xzf ${SOURCES_DIR}/cryosparc_worker.tar.gz -chown -R {{ applications.cryosparc.admin_user }}: ./cryosparc_worker* +chown -R {{ applications.cryosparc.admin_user }}: ./cryosparc*_worker # Get CUDA library path LIBCUDART_PATH=$(sudo find /usr/local -name libcudart.so) @@ -57,7 +57,7 @@ export CUDA_PATH # CryoSPARC must be installed as admin user sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF -cd /apps/cryosparc_worker +cd ${INSTALL_DIR}/cryosparc_worker ./install.sh --license {{ applications.cryosparc.license_id }} \ --cudapath ${CUDA_PATH} \ --yes diff --git a/playbooks/roles/cyclecloud_cluster/tasks/main.yml b/playbooks/roles/cyclecloud_cluster/tasks/main.yml index e71a966cb..ac45d2868 100644 --- a/playbooks/roles/cyclecloud_cluster/tasks/main.yml +++ b/playbooks/roles/cyclecloud_cluster/tasks/main.yml @@ -21,7 +21,7 @@ src: '{{role_path}}/projects/common/cluster-init' dest: '{{common_project_root}}/specs/default/' -- name: Copy mountnfs file. +- name: Copy mountnfs file template: src: '{{role_path}}/projects/common/cluster-init/scripts/1-mountnfs.sh.j2' dest: '{{common_project_root}}/specs/default/cluster-init/scripts/1-mountnfs.sh' @@ -149,9 +149,9 @@ mode: 0777 when: cc_queue_manager == "slurm" - - name: Create CryoSPARC Slurm cluster definition files - template: - src: '{{role_path}}/projects/cryosparc/cluster-init/files/slurm_{{ item }}.j2' + - name: Copy CryoSPARC Slurm cluster definition files + copy: + src: '{{role_path}}/projects/cryosparc/cluster-init/files/slurm_{{ item }}' dest: '{{cryosparc_project_root}}/specs/default/cluster-init/files/{{ item }}' with_items: - cluster_info.json From 6221ff1f5890e9b7fbb6596954981cbc990812f6 Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Thu, 9 Mar 2023 14:06:01 -0600 Subject: [PATCH 10/15] Add CryoSPARC XFCE apps manu entry --- .../cluster-init/files/cryosparc.desktop | 8 ++++++ .../cluster-init/files/cryosparc_16.png | Bin 0 -> 5677 bytes .../scripts/01-setup_data_disk.sh.j2 | 4 +++ .../scripts/02-download_cryosparc.sh.j2 | 3 ++ .../scripts/03-install_cryosparc.sh.j2 | 3 ++ .../scripts/04-import_slurm_cluster.sh.j2 | 27 +++++++++--------- .../05-install_app_menu_shortcut.sh.j2 | 10 +++++++ .../roles/cyclecloud_cluster/tasks/main.yml | 13 +++++++-- .../templates/azhop-slurm.txt.j2 | 3 ++ 9 files changed, 56 insertions(+), 15 deletions(-) create mode 100644 playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/cryosparc.desktop create mode 100644 playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/cryosparc_16.png create mode 100644 playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/05-install_app_menu_shortcut.sh.j2 diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/cryosparc.desktop b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/cryosparc.desktop new file mode 100644 index 000000000..c95721942 --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/cryosparc.desktop @@ -0,0 +1,8 @@ +[Desktop Entry] +Type=Link +Version=1.0 +Name=CryoSPARC +Icon=/usr/share/icons/hicolor/16x16/apps/cryosparc.png +URL=http://cryosparc-master:39000/ +Name[en_US.UTF-8]=CryoSPARC +Categories=Education diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/cryosparc_16.png b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/cryosparc_16.png new file mode 100644 index 0000000000000000000000000000000000000000..5f4ed1066eff34bd1bba12f723a4f1787d65d485 GIT binary patch literal 5677 zcmeHLcT^Ku7Y`sskf!2VP(nlvu9My+(n~1Pq9EexBr|~sAu)vntcV2!tni5nBCCQG z3rbZHr062Bf~YIHA}FX>P*HFN{3byW&$s7%$8&c7GiPQp^WMF`d++bvdy_Ybe)HXp z4JR9-P$*-Lhf4tRZKggA^pLC7+fN`0rSmM78!Qh1m1v1nED%P*XnBkTM#Czh0EJTB zy&tqV`Z;z?f0#x7C>v|D+VW-f#dC{lv$R{-1CqvVk0QL230DKMcS`P_T>DAhYl@rs z1{Z!do!?XBWU|LducPqz_5G{T2L@ZWmDgP;okPeO6;R-wR60oX9jqQ}k=*m!_U6@K z@sg`xp;);o+MQ%Z)DzKDmCh9{+As8eD*&wW4|f4`x@NYV8P^g;FIiJ*BWfFD+PIvu z?KgNk+h*ED12GD^*LG+bI-iam8&ZBUKX>YAZRv#E+O)VMrbixfpKeO4O{nj@&t4p- zv?w-`Ubxy6;~rc8+n9m0&Sf7H#GSVg-;BL#(p2(y%Dq{dA2j zs>98cN^n|jdaNF0^vN#CAR9Njk>Lxaf1uP-gB?F6LiMS_8(hx zEx=)+!Ipkb`F3Yl*Z5;@KFjx>cS*Efo=mJ-f&Gt-ZP6jyoYXmwx!!ZqL(V~hKRi3; zFeucTjI)n3yy6>>LQ+coX6#D&SSs7d9w2t+=&szt}&Y12B_ z+aJ0anj^*xeYl7a~I#sIMmy$V4bzkRq)7Qm87Fk-d2F!=q z4!vIMm+ps8BxvQWYIC-7Ty-kkUJ%GpI7+YW58acuv&*|Af8*Sh z`et{UyAEAEO;#3&Y3uHQYwp=x04 z0kX4|7K@p0pZaprfuN(ZvQY+Qw=Xqh2WcsK{UP7B)=O2dH_2c9bcIQLY2ncLdTmpm zRzIF}^4@Im8F!@WwKfQyo&7k@&R?n?sr8K2JJ=pq94xDr23BDfn9ogMKu6{S30oMR z7D>yp&sl^h2*o`{dnq=yxUK6W%FDM^9g9!O&ojwekM7fId)9X731-G1-u7I=>((Nx zS1;Kw|I~6iSi#MY(DtL&dQ|aww8Gi%FH~$p>D}0tQ0dLy@yqqgvQ_u5)Z8w`HoTeF z+xaB$%}kwDnAes~#!0Gpm7uAS`D)zeU`{|nL*jJ3CENRUbe*jgXC#>z*^wbA;BBQf z@64TYseb;>?6JP%Hd`z>xXQZgY(-2`e#p@?x7Z(y;$uWJ>J)nqCh$j(wy}0{Glsd# ztUZsrZAl#eqQ>Xa;v{Ky~kzuHQzw&_;tPH zzj7=Ia-*KS;*00sulNfE9XbAZN^7gxM0~*elA2PT%!>mnbDAHRhDS&bM$R)Op-?6Z zgh&?-_VH%%#UdOCiFq(iC6XXr8ilg6S4lvA1T07M;4onn8}q8P41*RzY)lZ%29>K;0 z`}m=q#Znke!BKDoz*QxTCSm3oqV1%RfED24_89_sVq?PPatRBMS1Oe_B^f7{hT(}! zCKFE};YlO_kpN^dQF2fPM9HSBA%-wqU>RR3l*onRD6|?AHU@*tqrdnklKA)x z!$-+Jvw-k{SAh~d5l6s_MEH>!GP!Fs0`l3Rztxa&ksmXB04x(LqwROVG$xKL#z_NG3n0n@f+4qQxGN;NiE@Hky?HolIr$1Og&JCiAHPg$(fkCV>e95J3PVB}`%x zp&@S|KFduk6@kcd3PoTTjF&`(X>6zoXF2+D*ccLy@O8v55|j%N1*8UqQIJ?E`#QxH zir_#wsOFPMqcccMDxF9s(;0NykSPt~LRcz8a#4*+B;d#-jk8(MS^p$xGfqZt~isX%ze(9_Ukq)<~#Xtbs*SRj8W1Q{3&LmEF3 ztf47>I2aWMBmLuZ!hRVS{!KDa=rjrir1Aln3KIYdh}1Zf$Y1~jh{PZY1OkXkr+$kr z6AR=@PzpPSAv_{nAqlGC3XT1|RJPyBE5l*+Iv|t*1mw_AMxY?{VetQcct~jwX(S{( zkV!`p6ebV>Bw>*YnMS2DU<#8)9Lkzu^8ShN)EO}xo*iD@5Wg&@9sYlodq`kJ2SsQZ z8bdlW(&+KSo&Galh_Ce%@7f&@7#`jr z^=wS%l~&0X0yen|&!;-&-o zE!{>|(#$oxU7NSO^MU=Tx-X50g9=NB>gbG$X{##7dY8+S9mKM{CM(P83HK^+T-CPS zeG}}SZ+&;E=#_&qa1ocC9`C%PA7vDuRS=qc3ih9IuloMFp4p5=Hpc32QCt$JAQ-tjs6Ue_uXT(Kh2Q0?YjIvgI%-T^>A+oQ~zZe`}lb+JfNKj6k z+&AkH3y3h$Ut=0+FU`btvZh|ym44{wy<_SWAxW>()4O_(K1eiwaJm>Qqb!T8sB3k} yYie_ATReHNTpYT*Xl}gj!yRvfQ)D`2Z> /tmp/cloud-init.txt {% if enroot_enabled is defined and enroot_enabled %} [[[cluster-init enroot:default:1.0.0]]] {% endif %} + {% if queue.name.startswith('viz') %} + [[[cluster-init cryosparc:default:1.0.0]]] + {% endif %} {% endfor %} {% if cryosparc_enabled %} From 78b9b7df76ee3c0122f662089bb854d801a7008a Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Thu, 9 Mar 2023 14:07:06 -0600 Subject: [PATCH 11/15] Corrected typo --- playbooks/roles/cyclecloud_cluster/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/playbooks/roles/cyclecloud_cluster/tasks/main.yml b/playbooks/roles/cyclecloud_cluster/tasks/main.yml index 46b4c46c8..ade391776 100644 --- a/playbooks/roles/cyclecloud_cluster/tasks/main.yml +++ b/playbooks/roles/cyclecloud_cluster/tasks/main.yml @@ -159,7 +159,7 @@ - cluster_script.sh when: cc_queue_manager == "slurm" - - name: Copy CryoSPARC cd /usrapp shortcut files + - name: Copy CryoSPARC app shortcut files copy: src: '{{role_path}}/projects/cryosparc/cluster-init/files/{{ item }}' dest: '{{cryosparc_project_root}}/specs/default/cluster-init/files/{{ item }}' From 8f221209616ec9c4b55da1f62d87ba46722678b9 Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Thu, 9 Mar 2023 14:17:54 -0600 Subject: [PATCH 12/15] Removed hardcoded image value --- config.tpl.yml | 6 +++--- .../roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/config.tpl.yml b/config.tpl.yml index ad15cf2be..e410a512e 100644 --- a/config.tpl.yml +++ b/config.tpl.yml @@ -463,11 +463,11 @@ applications: enabled: false license_id: admin_user: adminuser - master_size: Standard_D8s_v5 + master_vm_size: Standard_D8s_v5 + master_vm_image: azhpc:azhop-compute:centos-7_9:latest + master_hostname: cryosparc-master master_data_disk_size: 256 master_data_disk_type: Premium_LRS - master_hostname: cryosparc-master - master_image: azhpc:azhop-compute:centos-7_9:latest target_queues: - nc24v3 - hb120v3 diff --git a/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 b/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 index f4719b04d..3af64213c 100644 --- a/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 +++ b/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 @@ -144,9 +144,9 @@ echo "cloud-init done" >> /tmp/cloud-init.txt {% if cryosparc_enabled %} [[node {{ applications.cryosparc.master_hostname }}]] - MachineType = {{applications.cryosparc.master_size}} + MachineType = {{applications.cryosparc.master_vm_size}} EnableAcceleratedNetworking = true - ImageName = azhpc:azhop-compute:centos-7_9:latest + ImageName = {{applications.cryosparc.master_vm_image}} [[[volume boot]]] size = 64 From 069abfd11ad0c9f188974faa340083702090754c Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Fri, 10 Mar 2023 14:04:18 -0600 Subject: [PATCH 13/15] Resolved merge conflict --- .../roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 | 3 --- 1 file changed, 3 deletions(-) diff --git a/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 b/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 index 9b543edfe..18513b593 100644 --- a/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 +++ b/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 @@ -135,13 +135,10 @@ echo "cloud-init done" >> /tmp/cloud-init.txt cyclecloud.cluster.autoscale.idle_time_before_jobs = {{queue.idle_timeout}} {% endif %} [[[cluster-init enroot:default:1.0.0]]] -<<<<<<< HEAD {% endif %} {% if queue.name.startswith('viz') %} [[[cluster-init cryosparc:default:1.0.0]]] {% endif %} -======= ->>>>>>> main {% endfor %} {% if cryosparc_enabled %} From 570789af5362052392cbd54ca0a53bd2eaaf4b94 Mon Sep 17 00:00:00 2001 From: Jerry Morey Date: Tue, 12 Dec 2023 11:07:05 -0500 Subject: [PATCH 14/15] Update azhop-slurm.txt.j2 (#1783) line 138 has an extra `{% endif % }` that causes failure during configuration --- playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 | 1 - 1 file changed, 1 deletion(-) diff --git a/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 b/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 index 18513b593..a61794ce7 100644 --- a/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 +++ b/playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2 @@ -135,7 +135,6 @@ echo "cloud-init done" >> /tmp/cloud-init.txt cyclecloud.cluster.autoscale.idle_time_before_jobs = {{queue.idle_timeout}} {% endif %} [[[cluster-init enroot:default:1.0.0]]] - {% endif %} {% if queue.name.startswith('viz') %} [[[cluster-init cryosparc:default:1.0.0]]] {% endif %} From 8911d400d397e37f8305430fc30603bb760c9026 Mon Sep 17 00:00:00 2001 From: Jerry Morey Date: Fri, 22 Mar 2024 12:28:25 -0400 Subject: [PATCH 15/15] Update slurm_cluster_info.json (#1889) * Update slurm_cluster_info.json updated `worker_bin_path` to match install path * Update 03-install_cryosparc.sh.j2 added section to connect worker nodes to the master --- .../cluster-init/files/slurm_cluster_info.json | 2 +- .../scripts/03-install_cryosparc.sh.j2 | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json index fd450c82d..212147251 100644 --- a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/files/slurm_cluster_info.json @@ -1,6 +1,6 @@ { "name": "PARTITION", - "worker_bin_path": "/anfhome/apps/cryosparc_worker/bin/cryosparcw", + "worker_bin_path": "/anfhome/apps/cryosparc/cryosparc_worker/bin/cryosparcw", "cache_path": "/mnt/resource", "send_cmd_tpl": "{{ command }}", "qsub_cmd_tpl": "sbatch {{ script_path_abs }}", diff --git a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 index eeffd9a64..f49947846 100644 --- a/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 +++ b/playbooks/roles/cyclecloud_cluster/projects/cryosparc/cluster-init/scripts/03-install_cryosparc.sh.j2 @@ -1,17 +1,26 @@ #!/bin/bash set -e -# Run only on CryoSPARC master node -[ $(hostname) != {{ applications.cryosparc.master_hostname }} ] && exit 0 - export INSTALL_DIR=/anfhome/apps/cryosparc SOURCES_DIR=${INSTALL_DIR}/sources +# connect worker nodes to the master +if [[ $(hostname) != {{ applications.cryosparc.master_hostname }} ]]; then + sudo chmod 777 /mnt + sudo su -c "$INSTALL_DIR/cryosparc_worker/bin/cryosparcw connect --worker $(hostname) \ + --master {{ applications.cryosparc.master_hostname }} \ + --port 39000 --ssdpath /mnt/" {{ applications.cryosparc.admin_user }} +fi + +# Run only on CryoSPARC master node +[ $(hostname) != {{ applications.cryosparc.master_hostname }} ] && exit 0 + ################################################### # Install CryoSPARC master locally on master node # ################################################### # CryoSPARC master must be installed as admin user +sudo chown {{ applications.cryosparc.admin_user }} /cryosparc_data sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF cd /cryosparc_data tar xzf ${SOURCES_DIR}/cryosparc_master.tar.gz @@ -62,6 +71,5 @@ export CUDA_PATH sudo -i -u {{ applications.cryosparc.admin_user }} bash << EOF cd ${INSTALL_DIR}/cryosparc_worker ./install.sh --license {{ applications.cryosparc.license_id }} \ - --cudapath ${CUDA_PATH} \ --yes EOF