Skip to content

Commit

Permalink
add distributed ut in CI
Browse files Browse the repository at this point in the history
  • Loading branch information
zxd1997066 committed Jan 22, 2025
1 parent 65db5bc commit a1eaa8e
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/scripts/ut_result_check.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
ut_suite="${1:-op_regression}" # op_regression / op_extended / op_ut / torch_xpu

if [[ "${ut_suite}" == 'op_regression' || "${ut_suite}" == 'op_regression_dev1' || "${ut_suite}" == 'op_extended' ]]; then
if [[ "${ut_suite}" == 'op_regression' || "${ut_suite}" == 'op_regression_dev1' || "${ut_suite}" == 'op_extended' || "${ut_suite}" == 'torch_xpu_distributed' ]]; then
grep "^FAILED" "${ut_suite}"_test.log | awk '{print $2}' > ./"${ut_suite}"_failed.log
grep "PASSED" "${ut_suite}"_test.log | awk '{print $1}' > ./"${ut_suite}"_passed.log
num_failed=$(wc -l < "./${ut_suite}_failed.log")
Expand Down
15 changes: 14 additions & 1 deletion .github/workflows/_linux_ut.yml
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,19 @@ jobs:
test_cmd="${test_cmd} test_xpu.py"
fi
eval $test_cmd 2>${{ github.workspace }}/ut_log/torch_xpu/torch_xpu_test_error.log | tee ${{ github.workspace }}/ut_log/torch_xpu/torch_xpu_test.log
- name: Run Torch XPU Distributed UT
if: contains(inputs.ut, 'xpu_distributed')
run: |
source .github/scripts/env.sh ${{ inputs.pytorch }}
source activate xpu_op_${ZE_AFFINITY_MASK}
sudo cp /proc/sys/kernel/yama/ptrace_scope ptrace_scope.bk
sudo echo "0"|sudo tee /proc/sys/kernel/yama/ptrace_scope
pip install pytest
cd ${{ github.workspace }}
mkdir -p ut_log/torch_xpu_distributed
cd ../pytorch/third_party/torch-xpu-ops/test/xpu
timeout 10000 python run_distributed.py 2>${{ github.workspace }}/ut_log/torch_xpu_distributed/torch_xpu_distributed_test_error.log | tee ${{ github.workspace }}/ut_log/torch_xpu_distributed/torch_xpu_distributed_test.log
sudo cp ptrace_scope.bk /proc/sys/kernel/yama/ptrace_scope
- name: UT Test Results Check
shell: bash
run: |
Expand All @@ -249,7 +262,7 @@ jobs:
set -xe
for ut_suite in $(echo ${{ inputs.ut }} |sed 's/,/ /g')
do
contains "op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu" $ut_suite
contains "op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu,xpu_distributed" $ut_suite
$contains_status
cd ${{ github.workspace }}/ut_log/${ut_suite}
cp ${{ github.workspace }}/.github/scripts/ut_result_check.sh ./
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,18 @@ jobs:
ut: op_regression,op_regression_dev1,op_extended,op_ut
runner: linux.idc.xpu

preci-ut-distributed:
# Don't run on forked repos and draft PRs
secrets: inherit
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }}
name: preci-linux
needs: preci-linux-build
uses: ./.github/workflows/_linux_ut.yml
with:
pytorch: ${{ needs.preci-linux-build.outputs.torch_commit_id }}
ut: xpu_distributed
runner: pvc_e2e

Inductor-XPU-E2E-CI-Tests:
name: preci-linux / e2e_test
needs: preci-linux-build
Expand Down

0 comments on commit a1eaa8e

Please sign in to comment.