参考:🔗
我们已将数据和代码存储到我公司的FTP服务器上,单击此处下载,并将其上传到服务器上。
依次单击系统管理->文件管理->文档上传
将文件(主要,文件应该是解压后的)上传到服务器。
JSON文件如下:
{
"jobName": "tensorflow-mmmg_1",
"image": "sitonholy/scm:16.04-10.0-7.4-3.6-4.1.0-pmt",
"authFile": "",
"dataDir": "",
"outputDir": "",
"codeDir": "",
"retryCount": 0,
"taskRoles": [
{
"name": "ps",
"taskNumber": 1,
"cpuNumber": 1,
"memoryMB": 4096,
"shmMB": 1024,
"gpuNumber": 0,
"storageGB": 1,
"minFailedTaskCount": 1,
"minSucceededTaskCount": null,
"command": "cd /root/data/distributeTensorflowExample/ && CUDA_VISIBLE_DEVICES='' python distribute.py --ps_hosts=$PAI_HOST_IP_ps_0:$PAI_PORT_LIST_ps_0_ps --worker_hosts=$PAI_HOST_IP_worker1_0:$PAI_PORT_LIST_worker1_0_worker1,$PAI_HOST_IP_worker2_0:$PAI_PORT_LIST_worker2_0_worker2 --job_name=ps --task_index=0",
"portList": [
{
"label": "ps",
"beginAt": 0,
"portNumber": 1
}
]
},
{
"name": "worker1",
"taskNumber": 1,
"cpuNumber": 1,
"memoryMB": 4096,
"shmMB": 1024,
"gpuNumber": 1,
"storageGB": 1,
"minFailedTaskCount": 1,
"minSucceededTaskCount": null,
"command": "cd /root/data/distributeTensorflowExample/ && CUDA_VISIBLE_DEVICES=0 python distribute.py --ps_hosts=$PAI_HOST_IP_ps_0:$PAI_PORT_LIST_ps_0_ps --worker_hosts=$PAI_HOST_IP_worker1_0:$PAI_PORT_LIST_worker1_0_worker1,$PAI_HOST_IP_worker2_0:$PAI_PORT_LIST_worker2_0_worker2 --job_name=worker --task_index=0",
"portList": [
{
"label": "worker1",
"beginAt": 0,
"portNumber": 1
}
]
},
{
"name": "worker2",
"taskNumber": 1,
"cpuNumber": 1,
"memoryMB": 4096,
"shmMB": 1024,
"gpuNumber": 1,
"storageGB": 1,
"minFailedTaskCount": 1,
"minSucceededTaskCount": null,
"command": "cd /root/data/distributeTensorflowExample/ && CUDA_VISIBLE_DEVICES=0 python distribute.py --ps_hosts=$PAI_HOST_IP_ps_0:$PAI_PORT_LIST_ps_0_ps --worker_hosts=$PAI_HOST_IP_worker1_0:$PAI_PORT_LIST_worker1_0_worker1,$PAI_HOST_IP_worker2_0:$PAI_PORT_LIST_worker2_0_worker2 --job_name=worker --task_index=1",
"portList": [
{
"label": "worker2",
"beginAt": 0,
"portNumber": 1
}
]
}
],
"jobEnvs": {},
"extras": {
"virtualGroup": "total"
},
"gpuType": "TITANX"
}