forked from triton-lang/triton
-
Notifications
You must be signed in to change notification settings - Fork 0
399 lines (348 loc) · 14.5 KB
/
integration-tests.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
name: Integration Tests
on:
workflow_dispatch:
pull_request:
branches: [main]
merge_group:
branches: [main]
types: [checks_requested]
concurrency:
group: ${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}
env:
TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE"
jobs:
Runner-Preparation:
runs-on: ubuntu-latest
outputs:
matrix-required: ${{ steps.set-matrix.outputs.matrix-required }}
matrix-optional: ${{ steps.set-matrix.outputs.matrix-optional }}
steps:
- name: Prepare runner matrix
id: set-matrix
run: |
if [ x"${{ github.repository }}" == x"openai/triton" ]; then
echo '::set-output name=matrix-required::[["self-hosted", "A100"], ["self-hosted", "H100"]]'
echo '::set-output name=matrix-optional::[["self-hosted", "gfx908"], ["self-hosted", "arc770"]]'
else
echo '::set-output name=matrix-required::["ubuntu-latest"]'
echo '::set-output name=matrix-optional::["ubuntu-latest"]'
fi
Integration-Tests-Nvidia:
needs: Runner-Preparation
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-required)}}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: 'true'
- name: Set CUDA ENV
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}}
run: |
echo "BACKEND=CUDA" >> "${GITHUB_ENV}"
echo "ENABLE_TMA=0" >> "${GITHUB_ENV}"
echo "ENABLE_MMA_V3=0" >> "${GITHUB_ENV}"
echo "TRITON_DISABLE_LINE_INFO=1" >> "${GITHUB_ENV}"
- name: Clear cache
run: |
rm -rf ~/.triton
- name: Update PATH
run: |
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}"
- name: Check pre-commit
run: |
python3 -m pip install --upgrade pre-commit
python3 -m pre_commit run --all-files --verbose
- name: Install Triton
if: ${{ env.BACKEND == 'CUDA'}}
run: |
cd python
python3 -m pip install --upgrade pip
python3 -m pip install cmake==3.24
python3 -m pip install ninja
python3 -m pip install --no-build-isolation -vvv '.[tests]'
python3 -m pip install pytest-xdist
- name: Run lit tests
if: ${{ env.BACKEND == 'CUDA'}}
run: |
python3 -m pip install lit
cd python
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test"
if [ ! -d "${LIT_TEST_DIR}" ]; then
echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1
fi
lit -v "${LIT_TEST_DIR}"
- name: Enable MMAV3 and TMA
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'H100')}}
run: |
echo "ENABLE_TMA=1" >> "${GITHUB_ENV}"
echo "ENABLE_MMA_V3=1" >> "${GITHUB_ENV}"
- name: Run python tests on CUDA with ENABLE_TMA=1 and ENABLE_MMA_V3=1
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '1' && env.ENABLE_MMA_V3 == '1'}}
run: |
cd python/test/unit
python3 -m pytest -n 8 --ignore=runtime --ignore=operators --ignore=language/test_line_info.py
# run runtime tests serially to avoid race condition with cache handling.
python3 -m pytest runtime/
# run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest language/test_line_info.py
- name: Run python tests on CUDA with ENABLE_TMA=0 and ENABLE_MMA_V3=0
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '0' && env.ENABLE_MMA_V3 == '0'}}
run: |
cd python/test/unit
python3 -m pytest -n 8 --ignore=runtime --ignore=hopper --ignore=operators --ignore=language/test_line_info.py
# run runtime tests serially to avoid race condition with cache handling.
python3 -m pytest runtime/
# run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest language/test_line_info.py
- name: Clear cache
run: |
rm -rf ~/.triton
- name: Run interpreter tests
env:
# TRITON_INTERPRET: "1"
CUA_VISIBLE_DEVICES: ""
run: |
cd python/test/unit
python3 -m pytest -vs operators/test_flash_attention.py
- name: Run partial tests on CUDA with ENABLE_TMA=1 and ENABLE_MMA_V3=1
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '1' && env.ENABLE_MMA_V3 == '1'}}
run: |
cd python/test/unit
python3 -m pytest -n 8 operators
- name: Run partial tests on CUDA with ENABLE_TMA=0 and ENABLE_MMA_V3=0
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '0' && env.ENABLE_MMA_V3 == '0'}}
run: |
cd python/test/unit
python3 -m pytest -n 8 operators
- name: Create artifacts archive
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}}
run: |
cd ~/.triton
tar -czf artifacts.tar.gz cache
- name: Upload artifacts archive
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}}
uses: actions/upload-artifact@v2
with:
name: artifacts ${{ matrix.runner[1] }}
path: ~/.triton/artifacts.tar.gz
- name: Run CXX unittests
if: ${{ env.BACKEND == 'CUDA'}}
run: |
cd python
cd "build/$(ls build | grep -i cmake)"
ctest
- name: Regression tests
if: ${{ contains(matrix.runner, 'A100') }}
run: |
python3 -m pip install pytest-rerunfailures
cd python/test/regression
sudo nvidia-smi -i 0 -pm 1
sudo nvidia-smi -i 0 --lock-gpu-clocks=1280,1280
python3 -m pytest -vs . --reruns 10
sudo nvidia-smi -i 0 -rgc
Integration-Tests-Third-Party:
needs: Runner-Preparation
if: false
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-optional)}}
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set ROCM ENV
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'gfx908')}}
run: |
echo "BACKEND=ROCM" >> "${GITHUB_ENV}"
- name: Set XPU ENV
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'arc770')}}
run: |
echo "BACKEND=XPU" >> "${GITHUB_ENV}"
- name: Clear cache
run: |
rm -rf ~/.triton
- name: Update PATH
run: |
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}"
- name: Check pre-commit
if: ${{ matrix.runner != 'macos-10.15' && (matrix.runner[1] != 'arc770') }}
run: |
python3 -m pip install --upgrade pre-commit
python3 -m pre_commit run --all-files --verbose
- name: Check pre-commit arc770
if: ${{ matrix.runner != 'macos-10.15' && (matrix.runner[1] == 'arc770') }}
run: |
source ${HOME}/triton_vars.sh
source ${HOME}/miniconda3/bin/activate
conda activate triton-xpu-ci
python3 -m pip install --upgrade pre-commit
python3 -m pre_commit run --all-files
- name: Install Triton on ROCM
if: ${{ env.BACKEND == 'ROCM'}}
run: |
git submodule update --init --recursive
cd python
python3 -m pip install --upgrade pip
python3 -m pip install cmake==3.24
python3 -m pip install torch==1.13.1 --index-url https://download.pytorch.org/whl/rocm5.2
export TRITON_CODEGEN_AMD_HIP_BACKEND=1
python3 -m pip install --no-build-isolation -vvv '.[tests]'
- name: Install Triton on XPU
if: ${{ env.BACKEND == 'XPU'}}
run: |
source ${HOME}/triton_vars.sh
source ${HOME}/miniconda3/bin/activate
conda activate triton-xpu-ci
git submodule update --init --recursive
cd python
python3 -m pip install --upgrade pip
python3 -m pip install cmake==3.24
export TRITON_CODEGEN_INTEL_XPU_BACKEND=1
python3 -m pip uninstall -y triton
python3 setup.py build
python3 -m pip install --no-build-isolation -vvv '.[tests]'
- name: Run python tests on ROCM
if: ${{ env.BACKEND == 'ROCM'}}
run: |
cd python/test/unit/language
python3 -m pytest --capture=tee-sys -rfs --verbose "test_core.py"
- name: Run python tests on XPU
if: ${{ env.BACKEND == 'XPU'}}
run: |
source ${HOME}/triton_vars.sh
source ${HOME}/miniconda3/bin/activate
conda activate triton-xpu-ci
cd python/test/backend/third_party_backends
python3 -m pytest --capture=tee-sys -rfs --verbose --backend xpu
Compare-artifacts:
needs: Integration-Tests-Nvidia
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Install gh CLI
run: |
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-key 23F3D4EA75716059
echo "deb [arch=$(dpkg --print-architecture)] https://cli.github.com/packages focal main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null
sudo apt update
sudo apt install gh
- name: Save PR number to a file
env:
PR_NUMBER: ${{ github.event.number }}
run: |
echo $PR_NUMBER > pr_number
- name: Upload PR number to artifacts
uses: actions/upload-artifact@v3
with:
name: pr_number
path: pr_number
- name: Download latest main artifacts
env:
ARTIFACT_NAME: artifacts A100
ARTIFACT_JOB_NAME: Integration-Tests-Nvidia
MAX_NUM_ACTIONS_PAGES: 30
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
OWNER_REPO="${{ github.repository }}"
echo "OWNER_REPO: $OWNER_REPO"
PR_NUMBERS=($(gh api --method GET repos/$OWNER_REPO/pulls -f state=closed | jq -r ".[] | select(.merged_at != null) | .number"))
# Not all PRs go through integration tests
success=0
for PR_NUMBER in "${PR_NUMBERS[@]}"
do
echo "Last merged PR number: $PR_NUMBER"
BRANCH_NAME=$(gh api repos/$OWNER_REPO/pulls/$PR_NUMBER --jq '.head.ref')
echo "BRANCH_NAME: $BRANCH_NAME"
USER_ID=$(gh api repos/$OWNER_REPO/pulls/$PR_NUMBER --jq '.user.id')
echo "USER_ID: $USER_ID"
run_id_found=false
page=1
while true; do
if [ "$page" -gt $MAX_NUM_ACTIONS_PAGES ]; then
break
fi
run_id=$(gh api --method GET "repos/$OWNER_REPO/actions/runs?page=$page&per_page=100" | jq --arg branch_name "$BRANCH_NAME" --arg run_name "Integration Tests" --arg user_id "$USER_ID" '.workflow_runs[] | select(.head_branch == $branch_name and .name == $run_name and .actor.id == ($user_id | tonumber))' | jq '.id' | head -1)
if [ "$run_id" != "" ]; then
echo "First run ID on branch $BRANCH_NAME is: $run_id"
WORKFLOW_RUN_ID=$run_id
run_id_found=true
break
fi
((page++))
done
if ! $run_id_found; then
echo "No run_id found for PR ${PR_NUMBER}, moving to the next PR."
continue
fi
echo "WORKFLOW_RUN_ID: $WORKFLOW_RUN_ID"
ARTIFACT_URL=$(gh api repos/$OWNER_REPO/actions/runs/$WORKFLOW_RUN_ID/artifacts | jq --arg artifact_name "$ARTIFACT_NAME" '.artifacts[] | select(.name == $artifact_name).archive_download_url' --raw-output)
echo "ARTIFACT_URL: $ARTIFACT_URL"
if [ -n "$ARTIFACT_URL" ]; then
echo "Downloading artifact: $ARTIFACT_URL"
curl --location --remote-header-name -H "Authorization: token $GH_TOKEN" -o reference.zip "$ARTIFACT_URL"
# Print the size of the downloaded artifact
echo "Artifact size (stat): $(stat --printf="%s bytes" reference.zip)"
echo "Artifact size (du): $(du -sh reference.zip)"
unzip reference.zip
tar -xzf artifacts.tar.gz
rm reference.zip
rm artifacts.tar.gz
mv cache reference
success=1
break
fi
done
if [ $success -eq 0 ]; then
echo "No artifact found with the name: $ARTIFACT_NAME"
exit 1
fi
- name: Download current job artifacts
uses: actions/download-artifact@v2
with:
name: artifacts A100
- name: Unzip current job artifacts
run: |
# Print the size of the downloaded artifact
echo "Artifact size (stat): $(stat --printf="%s bytes" artifacts.tar.gz)"
echo "Artifact size (du): $(du -sh artifacts.tar.gz)"
tar -xzf artifacts.tar.gz
rm artifacts.tar.gz
mv cache current
- name: Compare artifacts
run: |
set +e
python3 python/test/tools/compare_files.py --path1 reference --path2 current
exit_code=$?
set -e
echo $exit_code
if [ $exit_code -eq 0 ]; then
echo "Artifacts are identical"
echo "COMPARISON_RESULT=true" >> $GITHUB_ENV
elif [ $exit_code -eq 1 ]; then
echo "Artifacts are different"
echo "COMPARISON_RESULT=false" >> $GITHUB_ENV
else
echo "Error while comparing artifacts"
echo "COMPARISON_RESULT=error" >> $GITHUB_ENV
fi
- name: Check comparison result and write to file
run: |
if [ "${{ env.COMPARISON_RESULT }}" = "true" ]; then
echo "SUCCESS" > comparison_result
else
echo "FAILED" > comparison_result
fi
- name: Upload comparison result to artifacts
uses: actions/upload-artifact@v3
with:
name: comparison_result
path: comparison_result
- name: Upload results as artifact
uses: actions/upload-artifact@v2
with:
name: kernels-reference-check
path: kernels_reference_check.txt