Skip to content

Commit

Permalink
Merge branch 'main' into penghuic/pytest_by_script
Browse files Browse the repository at this point in the history
  • Loading branch information
PenghuiCheng authored Jan 10, 2025
2 parents b7a9117 + a868a2e commit 7c9ac96
Show file tree
Hide file tree
Showing 39 changed files with 3,697 additions and 62 deletions.
2 changes: 2 additions & 0 deletions .github/scripts/apply_torch_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
"https://github.com/pytorch/pytorch/pull/126516",
# Modify the tolerance level in TIMM benchmark
"https://github.com/pytorch/pytorch/pull/143739",
# Fix build error caused by incorrect namespace change by #144014
"https://github.com/pytorch/pytorch/pull/144450",
]
)
parser.add_argument('--extra-pr-list', '-e', nargs='+',default=[])
Expand Down
114 changes: 114 additions & 0 deletions .github/scripts/install_xpu.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
@echo on
REM Description: Install Intel Support Packages on Windows
REM BKM reference: https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html

set XPU_INSTALL_MODE=%~1
if "%XPU_INSTALL_MODE%"=="" goto xpu_bundle_install_start
if "%XPU_INSTALL_MODE%"=="bundle" goto xpu_bundle_install_start
if "%XPU_INSTALL_MODE%"=="driver" goto xpu_driver_install_start
if "%XPU_INSTALL_MODE%"=="all" goto xpu_driver_install_start

:arg_error

echo Illegal XPU installation mode. The value can be "bundle"/"driver"/"all"
echo If keep the value as space, will use default "bundle" mode
exit /b 1

:xpu_driver_install_start
:: TODO Need more testing for driver installation
set XPU_DRIVER_LINK=https://downloadmirror.intel.com/830975/gfx_win_101.5972.exe
curl -o xpu_driver.exe --retry 3 --retry-all-errors -k %XPU_DRIVER_LINK%
echo "XPU Driver installing..."
start /wait "Intel XPU Driver Installer" "xpu_driver.exe"
if errorlevel 1 exit /b 1
del xpu_driver.exe
if "%XPU_INSTALL_MODE%"=="driver" goto xpu_install_end

:xpu_bundle_install_start

set XPU_BUNDLE_PARENT_DIR=C:\Program Files (x86)\Intel\oneAPI
set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d1a91e2-e8b8-40a5-8c7f-5db768a6a60c/w_intel-for-pytorch-gpu-dev_p_0.5.3.37_offline.exe
set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.intel-for-pytorch-gpu-dev.product
set XPU_BUNDLE_VERSION=0.5.3+31
set XPU_BUNDLE_INSTALLED=0
set XPU_BUNDLE_UNINSTALL=0
set XPU_EXTRA_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d1a91e2-e8b8-40a5-8c7f-5db768a6a60c/w_intel-pti-dev_p_0.9.0.37_offline.exe
set XPU_EXTRA_PRODUCT_NAME=intel.oneapi.win.intel-pti-dev.product
set XPU_EXTRA_VERSION=0.9.0+36
set XPU_EXTRA_INSTALLED=0
set XPU_EXTRA_UNINSTALL=0

if not [%XPU_VERSION%]==[] if [%XPU_VERSION%]==[2025.0] (
set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/efc86abd-cb77-452e-a03f-a741895b8ece/intel-deep-learning-essentials-2025.0.0.336_offline.exe
set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.deep-learning-essentials.product
set XPU_BUNDLE_VERSION=2025.0.0+335
set XPU_BUNDLE_INSTALLED=0
set XPU_BUNDLE_UNINSTALL=0
set XPU_EXTRA_URL=NULL
set XPU_EXTRA_PRODUCT_NAME=intel.oneapi.win.compiler.product
set XPU_EXTRA_VERSION=2025.0.1+1226
set XPU_EXTRA_INSTALLED=0
set XPU_EXTRA_UNINSTALL=0
)

:: Check if XPU bundle is target version or already installed
if exist "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" goto xpu_bundle_ver_check
goto xpu_bundle_install

:xpu_bundle_ver_check

"%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --list-products > xpu_bundle_installed_ver.log

for /f "tokens=1,2" %%a in (xpu_bundle_installed_ver.log) do (
if "%%a"=="%XPU_BUNDLE_PRODUCT_NAME%" (
echo %%a Installed Version: %%b
set XPU_BUNDLE_INSTALLED=1
if not "%XPU_BUNDLE_VERSION%"=="%%b" (
start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %%a --product-ver %%b --log-dir uninstall_bundle
set XPU_BUNDLE_UNINSTALL=1
)
)
if "%%a"=="%XPU_EXTRA_PRODUCT_NAME%" (
echo %%a Installed Version: %%b
set XPU_EXTRA_INSTALLED=1
if not "%XPU_EXTRA_VERSION%"=="%%b" (
start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %%a --product-ver %%b --log-dir uninstall_bundle
set XPU_EXTRA_UNINSTALL=1
)
)
if not "%%b" == "Version" if not [%%b]==[] if not "%%a"=="%XPU_BUNDLE_PRODUCT_NAME%" if not "%%a"=="%XPU_EXTRA_PRODUCT_NAME%" (
echo "Uninstalling...."
start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %%a --product-ver %%b --log-dir uninstall_bundle
)
)
if errorlevel 1 exit /b 1
if exist xpu_bundle_installed_ver.log del xpu_bundle_installed_ver.log
if exist uninstall_bundle rmdir /s /q uninstall_bundle
if "%XPU_BUNDLE_INSTALLED%"=="0" goto xpu_bundle_install
if "%XPU_BUNDLE_UNINSTALL%"=="1" goto xpu_bundle_install

:xpu_extra_check

if "%XPU_EXTRA_URL%"=="NULL" goto xpu_install_end
if "%XPU_EXTRA_INSTALLED%"=="0" goto xpu_extra_install
if "%XPU_EXTRA_UNINSTALL%"=="1" goto xpu_extra_install
goto xpu_install_end

:xpu_bundle_install

curl -o xpu_bundle.exe --retry 3 --retry-all-errors -k %XPU_BUNDLE_URL%
echo "XPU Bundle installing..."
start /wait "Intel Pytorch Bundle Installer" "xpu_bundle.exe" --action=install --eula=accept --silent --log-dir install_bundle
if errorlevel 1 exit /b 1
del xpu_bundle.exe
goto xpu_extra_check

:xpu_extra_install

curl -o xpu_extra.exe --retry 3 --retry-all-errors -k %XPU_EXTRA_URL%
echo "Intel XPU EXTRA installing..."
start /wait "Intel XPU EXTRA Installer" "xpu_extra.exe" --action=install --eula=accept --silent --log-dir install_bundle
if errorlevel 1 exit /b 1
del xpu_extra.exe

:xpu_install_end
74 changes: 74 additions & 0 deletions .github/scripts/ut_result_check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
ut_suite=${1:-op_regression} # op_regression / op_extended / op_ut / torch_xpu

if [[ ${ut_suite} == 'op_regression' || ${ut_suite} == 'op_regression_dev1' || ${ut_suite} == 'op_extended' ]]; then
grep "^FAILED" ${ut_suite}_test.log | awk '{print $2}' > ./${ut_suite}_failed.log
grep "PASSED" ${ut_suite}_test.log | awk '{print $1}' > ./${ut_suite}_passed.log
num_failed=$(cat ./${ut_suite}_failed.log | wc -l)
num_passed=$(cat ./${ut_suite}_passed.log | wc -l)
echo -e "========================================================================="
echo -e "Show Failed cases in ${ut_suite}"
echo -e "========================================================================="
cat ./${ut_suite}_failed.log
if [[ $num_failed -gt 0 ]] || [[ $num_passed -le 0 ]]; then
echo -e "[ERROR] UT ${ut_suite} test Fail"
exit 1
else
echo -e "[PASS] UT ${ut_suite} test Pass"
fi
fi
if [[ ${ut_suite} == 'op_ut' ]]; then
grep "^FAILED" op_ut_with_skip_test.log | awk '{print $2}' > ./${ut_suite}_with_skip_test_failed.log
grep "^FAILED" op_ut_with_only_test.log | awk '{print $2}' > ./${ut_suite}_with_only_test_failed.log
num_failed_with_skip=$(cat ./${ut_suite}_with_skip_test_failed.log | wc -l)
num_failed_with_only=$(cat ./${ut_suite}_with_only_test_failed.log | wc -l)
echo -e "========================================================================="
echo -e "Show Failed cases in ${ut_suite} with skip"
echo -e "========================================================================="
cat ./${ut_suite}_with_skip_test_failed.log
echo -e "========================================================================="
echo -e "Show Failed cases in ${ut_suite} with only"
echo -e "========================================================================="
cat ./${ut_suite}_with_only_test_failed.log
let num_failed=num_failed_with_skip+num_failed_with_only
grep "PASSED" op_ut_with_skip_test.log | awk '{print $1}' > ./${ut_suite}_with_skip_test_passed.log
grep "PASSED" op_ut_with_only_test.log | awk '{print $1}' > ./${ut_suite}_with_only_test_passed.log
num_passed_with_skip=$(cat ./${ut_suite}_with_skip_test_passed.log | wc -l)
num_passed_with_only=$(cat ./${ut_suite}_with_only_test_passed.log | wc -l)
let num_passed=num_passed_with_skip+num_passed_with_only
if [[ $num_failed -gt 0 ]] || [[ $num_passed -le 0 ]]; then
echo -e "[ERROR] UT ${ut_suite} test Fail"
exit 1
else
echo -e "[PASS] UT ${ut_suite} test Pass"
fi
fi
if [[ ${ut_suite} == 'torch_xpu' ]]; then
echo "Pytorch XPU binary UT checking"
cd ../../pytorch
TEST_REPORTS_DIR=$(pwd)/test/test-reports
for xpu_case in build/bin/*{xpu,sycl}*; do
if [[ "$xpu_case" != *"*"* && "$xpu_case" != *.so && "$xpu_case" != *.a ]]; then
case_name=$(basename "$xpu_case")
cd ../ut_log/torch_xpu
grep -E "FAILED" binary_ut_${ut_suite}_${case_name}_test.log | awk '{print $2}' > ./binary_ut_${ut_suite}_${case_name}_failed.log
echo $(cat ./binary_ut_${ut_suite}_${case_name}_failed.log | wc -l) | tee -a ./binary_ut_${ut_suite}_failed_summary.log
grep -E "PASSED|Pass" binary_ut_${ut_suite}_${case_name}_test.log | awk '{print $2}' > ./binary_ut_${ut_suite}_${case_name}_passed.log
echo $(cat ./binary_ut_${ut_suite}_${case_name}_passed.log | wc -l) | tee -a ./binary_ut_${ut_suite}_passed_summary.log
cd -
fi
done
echo -e "========================================================================="
echo -e "Show Failed cases in ${ut_suite}"
echo -e "========================================================================="
cd ../ut_log/torch_xpu
cat ./binary_ut_${ut_suite}_${case_name}_failed.log
num_failed_binary_ut=$(awk '{sum += $1};END {print sum}' binary_ut_${ut_suite}_failed_summary.log)
num_passed_binary_ut=$(awk '{sum += $1};END {print sum}' binary_ut_${ut_suite}_passed_summary.log)
let num_failed=num_failed_binary_ut
if [[ $num_failed -gt 0 ]] || [[ $num_passed_binary_ut -le 0 ]]; then
echo -e "[ERROR] UT ${ut_suite} test Fail"
exit 1
else
echo -e "[PASS] UT ${ut_suite} test Pass"
fi
fi
12 changes: 11 additions & 1 deletion .github/workflows/_linux_transformers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ jobs:
Torch-XPU-Transformers-Tests:
runs-on: ${{ inputs.runner != '' && inputs.runner || 'linux.idc.xpu' }}
env:
HF_HOME: ${{ github.workspace }}/.hf_home
HF_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
python: ${{ inputs.python != '' && inputs.python || '3.10' }}
Expand Down Expand Up @@ -115,7 +117,7 @@ jobs:
cat /sys/class/drm/render*/device/device | tee ${{ github.workspace }}/transformers/tests_log/device_IDs.txt
echo "xpu-smi output:"
xpu-smi discovery -y --json --dump -1
- name: Sanitry check installed packages
- name: Sanity check installed packages
run: |
source activate huggingface_transformers_test
# These checks are to exit earlier if for any reason Transformers
Expand All @@ -124,6 +126,9 @@ jobs:
pip show torchaudio | grep Version | grep xpu
pip show torchvision | grep Version | grep xpu
python -c 'import torch; exit(not torch.xpu.is_available())'
- name: Clean HF home directory and cache
run: |
rm -rf ${{ env.HF_HOME }}
- name: Run -k backbone tests
env:
TEST_CASE: 'tests_backbone'
Expand Down Expand Up @@ -212,6 +217,11 @@ jobs:
FAILED_CASES=$(echo $FAILED_CASES | sed 's/^,//')
echo "Failed cases: [$(echo $FAILED_CASES | sed 's/,/, /g')]"
test -z "$FAILED_CASES"
- name: Clean HF home directory and cache
if: ${{ always() }}
run: |
du -sh ${{ env.HF_HOME }} || true
rm -rf ${{ env.HF_HOME }}
- name: Print results table
if: ${{ ! cancelled() }}
run: |
Expand Down
51 changes: 43 additions & 8 deletions .github/workflows/_linux_ut.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,21 +138,23 @@ jobs:
python pytorch/torch/utils/collect_env.py
rm -rf /tmp/torchinductor_*
rm -rf ~/.triton/cache
- name: Run XPU OP Examples
- name: Run XPU OP Regression
if: contains(inputs.ut, 'op_regression') || github.event_name == 'schedule'
run: |
cd ${{ github.workspace }}
mkdir -p ut_log/op_regression
xpu-smi discovery
source .github/scripts/env.sh ${{ inputs.pytorch }}
source activate xpu_op_${ZE_AFFINITY_MASK}
cd ${{ github.workspace }}
cd test/regressions
pip install pytest
timeout 8000 pytest -v
timeout 8000 pytest -v 2>${{ github.workspace }}/ut_log/op_regression/op_regression_test_error.log | tee ${{ github.workspace }}/ut_log/op_regression/op_regression_test.log
- name: Run XPU OP Regressions test on device 1
if: contains(inputs.ut, 'op_regression_dev1') || github.event_name == 'schedule'
run: |
cd ${{ github.workspace }}
mkdir -p ut_log/op_regression_dev1
xpu-smi discovery
source .github/scripts/env.sh ${{ inputs.pytorch }}
source activate xpu_op_${ZE_AFFINITY_MASK}
Expand All @@ -161,35 +163,41 @@ jobs:
cd ${{ github.workspace }}
cd test/regressions
pip install pytest
timeout 8000 pytest -v test_operation_on_device_1.py
timeout 8000 pytest -v test_operation_on_device_1.py 2>${{ github.workspace }}/ut_log/op_regression_dev1/op_regression_dev1_test_error.log | tee ${{ github.workspace }}/ut_log/op_regression_dev1/op_regression_dev1_test.log
export ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK_OLD}
- name: Run XPU OP Extended UT
if: contains(inputs.ut, 'op_extended') || github.event_name == 'schedule'
run: |
source .github/scripts/env.sh ${{ inputs.pytorch }}
source activate xpu_op_${ZE_AFFINITY_MASK}
export PYTORCH_TEST_WITH_SLOW=1
cd ${{ github.workspace }}
mkdir -p ut_log/op_extended
cd ../pytorch/third_party/torch-xpu-ops/test/xpu/extended/
timeout 10000 python run_test_with_skip.py
timeout 10000 python run_test_with_skip.py 2>${{ github.workspace }}/ut_log/op_extended/op_extended_test_error.log | tee ${{ github.workspace }}/ut_log/op_extended/op_extended_test.log
- name: Run XPU OP UT
if: contains(inputs.ut, 'op_ut') || github.event_name == 'schedule'
run: |
source .github/scripts/env.sh ${{ inputs.pytorch }}
source activate xpu_op_${ZE_AFFINITY_MASK}
export PYTORCH_ENABLE_XPU_FALLBACK=1
export PYTORCH_TEST_WITH_SLOW=1
cd ${{ github.workspace }}
mkdir -p ut_log/op_ut
cd ../pytorch/third_party/torch-xpu-ops/test/xpu
timeout 10000 python run_test_with_skip.py
timeout 10000 python run_test_with_skip.py 2>${{ github.workspace }}/ut_log/op_ut/op_ut_with_skip_test_error.log | tee ${{ github.workspace }}/ut_log/op_ut/op_ut_with_skip_test.log
# Cases run with a on-demand white list, since some suites are too
# slow to go through all operators on CPU. So add cases on-demand
# when XPU implementatoin is done.
# test_foreach, test_decomp
timeout 10000 python run_test_with_only.py
timeout 10000 python run_test_with_only.py 2>${{ github.workspace }}/ut_log/op_ut/op_ut_with_only_test_error.log | tee ${{ github.workspace }}/ut_log/op_ut/op_ut_with_only_test.log
- name: Run Torch XPU UT
if: contains(inputs.ut, 'torch_xpu') || github.event_name == 'schedule'
run: |
source .github/scripts/env.sh ${{ inputs.pytorch }}
source activate xpu_op_${ZE_AFFINITY_MASK}
cd ${{ github.workspace }}
mkdir -p ut_log/torch_xpu
cd ../pytorch
TEST_REPORTS_DIR=$(pwd)/test/test-reports
rm -rf "$TEST_REPORTS_DIR" && mkdir -p "$TEST_REPORTS_DIR"
Expand All @@ -198,7 +206,7 @@ jobs:
if [[ "$xpu_case" != *"*"* && "$xpu_case" != *.so && "$xpu_case" != *.a ]]; then
case_name=$(basename "$xpu_case")
echo "Testing ${case_name} ..."
"$xpu_case" --gtest_output=xml:"$TEST_REPORTS_DIR"/"$case_name".xml
"$xpu_case" --gtest_output=xml:"$TEST_REPORTS_DIR"/"$case_name".xml 2>${{ github.workspace }}/ut_log/torch_xpu/binary_ut_torch_xpu_${case_name}_test_error.log | tee ${{ github.workspace }}/ut_log/torch_xpu/binary_ut_torch_xpu_${case_name}_test.log
fi
done
# Run Pytorch XPU python UT
Expand All @@ -219,4 +227,31 @@ jobs:
if [ -f "test/test_xpu.py" ]; then
test_cmd="${test_cmd} test_xpu.py"
fi
eval $test_cmd
eval $test_cmd 2>${{ github.workspace }}/ut_log/torch_xpu/torch_xpu_test_error.log | tee ${{ github.workspace }}/ut_log/torch_xpu/torch_xpu_test.log
- name: UT Test Results Check
shell: bash
run: |
function contains() {
contains_status="echo 'Start $2 ...'"
{
[[ $1 =~ (^|,)$2($|,) ]]
} || {
echo "[Warning] $2 is not suppotted type! Skipped!"
contains_status="continue"
}
}
set -xe
for ut_suite in $(echo ${{ inputs.ut }} |sed 's/,/ /g')
do
contains "op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu" $ut_suite
$contains_status
cd ${{ github.workspace }}/ut_log/${ut_suite}
cp ${{ github.workspace }}/.github/scripts/ut_result_check.sh ./
bash ut_result_check.sh ${ut_suite}
done
- name: Upload Inductor XPU UT Log
if: always()
uses: actions/upload-artifact@v4
with:
name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ inputs.abi }}
path: ${{ github.workspace }}/ut_log
Loading

0 comments on commit 7c9ac96

Please sign in to comment.