-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ci: add test for Huggingface Accelerate
Signed-off-by: Dmitry Rogozhkin <[email protected]>
- Loading branch information
Showing
1 changed file
with
213 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
name: Linux Accelerate Test | ||
|
||
on: | ||
pull_request: | ||
branches: | ||
- main | ||
paths: | ||
- '.github/scripts/spec.py' | ||
- '.github/workflows/_linux_accelerate.yml' | ||
workflow_dispatch: | ||
inputs: | ||
pytorch: | ||
required: false | ||
type: string | ||
default: 'nightly' | ||
description: Pytorch branch/commit | ||
python: | ||
required: false | ||
type: string | ||
default: '3.10' | ||
description: Python version | ||
runner: | ||
required: true | ||
type: string | ||
default: 'linux.idc.xpu' | ||
description: Runner label | ||
nightly_whl: | ||
required: false | ||
type: string | ||
default: '' | ||
description: Pytorch nightly wheel version | ||
accelerate: | ||
required: false | ||
type: string | ||
default: 'v1.2.1' | ||
description: Accelerate version | ||
|
||
permissions: read-all | ||
|
||
jobs: | ||
Torch-XPU-Accelerate-Tests: | ||
runs-on: ${{ inputs.runner != '' && inputs.runner || 'linux.idc.xpu' }} | ||
env: | ||
CONDA_ENV_NAME: 'huggingface_accelerate_test' | ||
WORK_DIR: 'accelerate' | ||
NEOReadDebugKeys: 0 | ||
DisableScratchPages: 0 | ||
accelerate: ${{ inputs.accelerate != '' && inputs.accelerate || 'v1.2.1' }} | ||
python: ${{ inputs.python != '' && inputs.python || '3.10' }} | ||
pytorch: ${{ inputs.pytorch != '' && inputs.pytorch || 'nightly' }} | ||
PYTORCH_DEBUG_XPU_FALLBACK: 1 | ||
ZE_AFFINITY_MASK: 0 | ||
PARSE_JUNIT: ${{ github.workspace }}/torch-xpu-ops/.github/scripts/parse-junitxml.py | ||
steps: | ||
- name: Checkout torch-xpu-ops | ||
uses: actions/checkout@v4 | ||
with: | ||
path: torch-xpu-ops | ||
- name: Checkout Accelerate | ||
uses: actions/checkout@v4 | ||
with: | ||
repository: huggingface/accelerate | ||
ref: ${{ env.accelerate }} | ||
path: accelerate | ||
#- name: Prepare OS environment | ||
# run: | | ||
# sudo apt-get update | ||
# sudo apt-get install -y \ | ||
# espeak-ng \ | ||
# git-lfs \ | ||
# pkg-config \ | ||
# libavcodec-dev \ | ||
# libavdevice-dev \ | ||
# libavfilter-dev \ | ||
# libavformat-dev \ | ||
# libavutil-dev \ | ||
# libswresample-dev \ | ||
# libswscale-dev | ||
# git lfs install | ||
- name: Prepare Conda ENV | ||
run: | | ||
which conda && conda clean -ay | ||
conda remove --all -y -n $CONDA_ENV_NAME || rm -rf $(dirname ${CONDA_EXE})/../envs/$CONDA_ENV_NAME | ||
conda create -y -n $CONDA_ENV_NAME python=${{ env.python }} | ||
source activate $CONDA_ENV_NAME | ||
pip install junitparser | ||
- name: Prepare Stock XPU Pytorch | ||
run: | | ||
source activate $CONDA_ENV_NAME | ||
if [ -z "${{ inputs.nightly_whl }}" ]; then | ||
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu | ||
else | ||
pip install torch==$(echo ${{ inputs.nightly_whl }}) torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu | ||
fi | ||
- name: Prepare Accelerate | ||
run: | | ||
source activate $CONDA_ENV_NAME | ||
cd $WORK_DIR | ||
pip install -e . | ||
pip install -e ".[testing]" | ||
rm -rf tests_log && mkdir -p tests_log | ||
rm -rf reports | ||
cp ${{ github.workspace }}/torch-xpu-ops/.github/scripts/spec.py ./ | ||
- name: Report installed versions | ||
run: | | ||
source activate $CONDA_ENV_NAME | ||
echo "pip installed packages:" | ||
pip list | tee ${{ github.workspace }}/$WORK_DIR/tests_log/pip_list.txt | ||
echo "lspci gpu devices:" | ||
lspci -d ::0380 | tee ${{ github.workspace }}/$WORK_DIR/tests_log/lspci_0380.txt | ||
echo "GPU render nodes:" | ||
cat /sys/class/drm/render*/device/device | tee ${{ github.workspace }}/$WORK_DIR/tests_log/device_IDs.txt | ||
echo "xpu-smi output:" | ||
xpu-smi discovery -y --json --dump -1 | ||
- name: Sanity check installed packages | ||
run: | | ||
source activate $CONDA_ENV_NAME | ||
# These checks are to exit earlier if for any reason torch | ||
# packages were reinstalled back to CUDA versions (not expected). | ||
pip show torch | grep Version | grep xpu | ||
pip show torchaudio | grep Version | grep xpu | ||
pip show torchvision | grep Version | grep xpu | ||
python -c 'import torch; exit(not torch.xpu.is_available())' | ||
- name: Run -k backbone tests | ||
env: | ||
TEST_CASE: 'tests_backbone' | ||
run: | | ||
source activate $CONDA_ENV_NAME | ||
cd $WORK_DIR && rm -rf reports && mkdir -p reports | ||
# Excluding tests due to: | ||
# * tests/test_examples.py::FeatureExamplesTests::test_profiler fails on | ||
# Kineto profiler initialization for XPU device: PTI_ERROR_INTERNAL | ||
# * tests/test_cli.py::ModelEstimatorTester::test_gated for failures due | ||
# to not root caused environment configuration issue | ||
pattern="not test_profiler and not test_gated" | ||
cmd=(python3 -m pytest -rsf --junitxml=reports/accelerate.xml -k "$pattern" tests/) | ||
{ | ||
echo "### Running" | ||
echo "\`\`\`" | ||
echo "${cmd[@]@Q}" | ||
echo "\`\`\`" | ||
} >> $GITHUB_STEP_SUMMARY | ||
"${cmd[@]}" | ||
- name: Print result tables | ||
if: ${{ ! cancelled() }} | ||
run: | | ||
source activate $CONDA_ENV_NAME | ||
cd $WORK_DIR | ||
{ | ||
echo "### Results" | ||
python3 $PARSE_JUNIT reports/accelerate.xml --stats | ||
echo "### Failed" | ||
python3 $PARSE_JUNIT reports/accelerate.xml --errors --failed | ||
echo "### Skipped" | ||
python3 $PARSE_JUNIT reports/accelerate.xml --skipped | ||
} >> $GITHUB_STEP_SUMMARY | ||
- name: Print annotations | ||
if: ${{ ! cancelled() }} | ||
run: | | ||
source activate $CONDA_ENV_NAME | ||
{ | ||
echo "### Annotations" | ||
echo "| | |" | ||
echo "| --- | --- |" | ||
echo "| jobs.$GITHUB_JOB.versions.os | $(source /etc/os-release && echo $VERSION_ID) |" | ||
echo "| jobs.$GITHUB_JOB.versions.linux-kernel | $(uname -r) |" | ||
echo "| jobs.$GITHUB_JOB.versions.python | $(python --version | cut -f2 -d' ') |" | ||
packages=" \ | ||
level-zero \ | ||
libigc1 \ | ||
libigc2 \ | ||
libze1 \ | ||
libze-intel-gpu1 \ | ||
intel-i915-dkms \ | ||
intel-level-zero-gpu \ | ||
intel-opencl-icd" | ||
for package in $packages; do | ||
package_version=$(dpkg -l | grep $package | grep ii | head -1 | sed "s/ */ /g" | cut -f3 -d" ") | ||
echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |" | ||
done | ||
packages="accelerate \ | ||
numpy \ | ||
torch \ | ||
torchaudio \ | ||
torchvision \ | ||
accelerate" | ||
for package in $packages; do | ||
package_version=$(python -c "import $package; print($package.__version__)" || true) | ||
echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |" | ||
done | ||
# printing annotations for GPU cards | ||
var="[$(cat /sys/class/drm/render*/device/vendor || true)]" | ||
echo "| jobs.$GITHUB_JOB.drm.render_nodes_vendor_ids | $(echo $var | sed 's/ /,/g') |" | ||
var="[$(cat /sys/class/drm/render*/device/device || true)]" | ||
echo "| jobs.$GITHUB_JOB.drm.render_nodes_device_ids | $(echo $var | sed 's/ /,/g') |" | ||
var=$(python -c "import torch; print(torch.version.xpu)" || true) | ||
echo "| jobs.$GITHUB_JOB.torch.version.xpu | $var |" | ||
var=$(python -c "import torch; print(torch.xpu.device_count())" || true) | ||
echo "| jobs.$GITHUB_JOB.torch.xpu.device_count | $var |" | ||
# printing annotations with key environment variables | ||
echo "| jobs.$GITHUB_JOB.env.ZE_AFFINITY_MASK | $ZE_AFFINITY_MASK |" | ||
echo "| jobs.$GITHUB_JOB.env.NEOReadDebugKeys | $NEOReadDebugKeys |" | ||
echo "| jobs.$GITHUB_JOB.env.PYTORCH_ENABLE_XPU_FALLBACK | $PYTORCH_ENABLE_XPU_FALLBACK |" | ||
echo "| jobs.$GITHUB_JOB.env.PYTORCH_DEBUG_XPU_FALLBACK | $PYTORCH_DEBUG_XPU_FALLBACK |" | ||
} >> $GITHUB_STEP_SUMMARY | ||
- name: Upload Test log | ||
if: ${{ ! cancelled() }} | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: Torch-XPU-Accelerate-Log-${{ github.event.pull_request.number || github.sha }} | ||
path: | | ||
${{ github.workspace }}/accelerate/reports | ||
${{ github.workspace }}/accelerate/tests_log |