[IIIF Upload]: smallform_0072 #280
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Export and validate PAGE XML from Transkribus (by document) | |
on: | |
issues: | |
types: [opened, edited] | |
issue_comment: | |
types: [created, edited] | |
# Allows you to run this workflow manually from the Actions tab | |
workflow_dispatch: | |
permissions: write-all | |
jobs: | |
evaluate-label: | |
name: evaluate-label | |
runs-on: ubuntu-latest | |
env: | |
TASKS: ${{ join(github.event.issue.labels.*.name,', ') }} | |
CONTINUE: ${{ contains(join(github.event.issue.labels.*.name,', '),'task:transkribus-export') && contains(fromJson(vars.WHITELIST),github.actor) && 'true' || 'false'}} | |
outputs: | |
continue: ${{ env.CONTINUE }} | |
steps: | |
- run: echo ${{ env.CONTINUE }} | |
run: | |
name: run | |
runs-on: ubuntu-latest | |
needs: evaluate-label | |
if: needs.evaluate-label.outputs.continue == 'true' | |
# if: always() && ${{ contains(fromJson(vars.WHITELIST),github.actor) }} && ${{ contains(github.event.issue.labels.*.name,'task:transkribus-export') }} | |
outputs: | |
schematron_is_valid: ${{ steps.schematron-result.outputs.schematron_is_valid }} | |
schematron_svrl: ${{ steps.schematron-svrl.outputs.schematron_svrl }} | |
issue_reply_file: ${{ steps.issue-reply.outputs.issue_reply_file }} | |
issue_reply_file2: ${{ steps.issue-reply-post-transform.outputs.issue_reply_file2 }} | |
debug_param0: ${{ steps.debug_param0.outputs.debug_param0 }} | |
debug_param: ${{ steps.debug_param.outputs.debug_param }} | |
env: | |
TASKS: ${{ join(github.event.issue.labels.*.name,', ') }} | |
steps: | |
- run: echo ${{join(github.event.issue.labels.*.name,', ')}} | |
- run: echo env.TASKS | |
- uses: actions/checkout@v4 | |
with: | |
ref: ${{ github.head_ref }} | |
- uses: stefanbuck/github-issue-parser@v3 | |
id: issue-parser | |
with: | |
template-path: .github/ISSUE_TEMPLATE/2-transkribus-export.yml | |
- run: cat ${HOME}/issue-parser-result.json | |
- run: cp ${HOME}/issue-parser-result.json ./issue-parser-result.json | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.12' | |
- uses: dse-as/run-notebook@v3-locked #fzimmermann89/run-notebook@357b84ea1b0605afb5ed2da0548ab80c9556cd1a #fzimmermann89/run-notebook@v3 #yaananth/run-notebook@v2 | |
id: ipynb | |
env: | |
RUNNER: ${{ toJson(runner) }} | |
SECRETS: ${{ toJson(secrets) }} | |
GITHUB: ${{ toJson(github) }} | |
ISSUE: ./issue-parser-result.json | |
with: | |
notebook: "./transkribus-export/transkribus-export.ipynb" | |
params: "./transkribus-export/default-params.json" | |
isReport: False | |
poll: True | |
- run: ls -l | |
# output for auto reply | |
- name: Read result file | |
id: schematron-result | |
run: | | |
{ | |
echo 'schematron_is_valid<<DELIM' | |
cat ./validation_is_valid.txt | |
echo | |
echo DELIM | |
} >> "$GITHUB_OUTPUT" | |
# not sure we need this: | |
- name: Read svrl file | |
id: schematron-svrl | |
run: | | |
{ | |
echo 'schematron_svrl<<DELIM' | |
cat ./validation_output-svrl.xml | |
echo | |
echo DELIM | |
} >> "$GITHUB_OUTPUT" | |
- name: Read issue reply file | |
id: issue-reply | |
run: | | |
{ | |
echo 'issue_reply_file<<DELIM' | |
cat ./issue-reply.txt | |
echo | |
echo DELIM | |
} >> "$GITHUB_OUTPUT" | |
- name: Debug param (as-is) | |
id: debug_param0 | |
run: echo "debug_param0=$(cat ./issue-parser-result.json | jq -r '.debug[]')" >> "$GITHUB_OUTPUT" | |
- name: Debug param (boolean) | |
id: debug_param | |
run: echo "debug_param=${{ steps.debug_param0.outputs.debug_param0 == 'Enable' && 'true' || 'false'}}" >> "$GITHUB_OUTPUT" | |
- run: | | |
{ | |
echo "### Workflow variables" | |
echo "| Variable | Description |Value |" | |
echo "| ------------------- | ------------------------ | ------------------ |" | |
echo "| schematron_is_valid | Document is valid? | ${{ steps.schematron-result.outputs.schematron_is_valid }} |" | |
echo "| debug0 | XSLT-Debugging enabled? | ${{ steps.debug_param0.outputs.debug_param0 == 'Enable' && 'Enable' || 'Disable' }} |" | |
echo "| debug | XSLT-Debugging (bool)? | ${{ steps.debug_param.outputs.debug_param }} |" | |
} >> $GITHUB_STEP_SUMMARY | |
# - run: echo "${{ steps.schematron-result.outputs.schematron_is_valid }}" | |
- run: ls -l | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
- run: mkdir -p ./data/0-transkribus-PAGE ./data/1-raw-TEI ./data/2-base-TEI | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
- run: tree -L 1 | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
- name: PAGE XML to raw TEI (XSLT) | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
uses: pdaengeli/[email protected] | |
with: | |
args: -s:validation_input.xml -xsl:transkribus-export/page-xml-to-raw-tei.xsl debug=${{ steps.debug_param.outputs.debug_param }} | |
- run: ls -l | |
- run: chmod +x transkribus-export/string-to-tags.sh | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
- run: transkribus-export/string-to-tags.sh ./data/1-raw-TEI/*_[0-9]*_raw.xml | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
- run: ls -l | |
- run: cat ./data/1-raw-TEI/*_[0-9]*_raw.xml | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
# at this stage we got a raw xml file in the root directory of the checked out repo (the working directory) | |
# next we can convert this to dseas-tei (using the -o flag or creating an xsl:result-document) | |
- name: raw TEI to dseas TEI (XSLT) | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
uses: pdaengeli/[email protected] | |
with: | |
args: -s:data/1-raw-TEI -xsl:transkribus-export/raw-to-dseas-tei.xsl -o:data/2-base-TEI | |
- run: for f in data/2-base-TEI/*_raw.xml; do mv -i -- "$f" "${f//_raw/}"; done | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
- run: tree -L 2 data | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
- run: cat data/2-base-TEI/*_[0-9]*.xml | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
- run: mkdir -p _generated && cp -r data/* _generated/ | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
# Create a multiline string to be used by the git-auto-commit Action | |
- name: Set commit message | |
id: commit_message_step | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
run: | | |
echo 'commit_message<<EOF' >> $GITHUB_OUTPUT | |
cat commit-message.txt >> $GITHUB_OUTPUT | |
echo 'EOF' >> $GITHUB_OUTPUT | |
rm -rf commit-message.txt | |
- uses: stefanzweifel/git-auto-commit-action@v5 | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
with: | |
commit_message: ${{ steps.commit_message_step.outputs.commit_message }} | |
file_pattern: '_generated/**' | |
skip_fetch: true | |
- name: Read issue reply file | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
id: issue-reply-post-transform | |
run: | | |
{ | |
echo 'issue_reply_file2<<DELIM' | |
cat ./issue-reply-post-transform.txt | |
echo | |
echo DELIM | |
} >> "$GITHUB_OUTPUT" | |
# exclude _generated from artifact | |
- run: rm -r _generated | |
if: steps.schematron-result.outputs.schematron_is_valid == 'True' | |
# for easier debugging: | |
- uses: actions/upload-pages-artifact@v3 | |
with: | |
name: output | |
path: . | |
env: | |
RUNNER: ${{ toJson(runner) }} | |
# adding a reply to the issue thread | |
add-comment: | |
needs: run | |
runs-on: ubuntu-latest | |
#outputs: | |
# issue_reply: ${{ steps.prepare-comment.outputs.issue_reply }} | |
env: | |
TASKS: ${{ join(github.event.issue.labels.*.name,', ') }} | |
ISSUE_MD: ${{ needs.run.outputs.issue_reply_file }} | |
ISSUE_MD2: ${{ needs.run.outputs.issue_reply_file2 }} | |
permissions: | |
issues: write | |
steps: | |
- run: ls -l | |
- name: Prepare comment (success) | |
id: prepare-comment-success | |
if: needs.run.outputs.schematron_is_valid == 'True' | |
run: | | |
{ | |
echo 'issue_reply<<DELIM' | |
echo '# Validation successful: :green_circle:' | |
echo ${{ needs.run.outputs.schematron_is_valid }} | |
echo | |
echo '# Transformation result: ' | |
echo "$ISSUE_MD2" | |
echo | |
echo "Details and artifact ${{ needs.run.outputs.debug_param0 == 'Enable' && '(with debug files) ' || ''}}available at: https://github.com/dse-as/workflow_IIIF-ATR-TEI/actions/runs/${{ github.run_id }}" | |
echo | |
echo DELIM | |
} >> "$GITHUB_OUTPUT" | |
- name: Prepare comment (failure) | |
id: prepare-comment-failure | |
if: needs.run.outputs.schematron_is_valid == 'False' | |
run: | | |
{ | |
echo 'issue_reply<<DELIM' | |
echo '# Validation not successful :stop_sign:' | |
echo "$ISSUE_MD" | |
echo | |
echo DELIM | |
} >> "$GITHUB_OUTPUT" | |
- name: Add comment | |
# run: gh issue comment "$NUMBER" --body-file ./ipynb.txt | |
run: gh issue comment "$NUMBER" --body "$BODY" | |
env: | |
OUTPUT1: ${{needs.run.outputs.schematron_is_valid}} | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
GH_REPO: ${{ github.repository }} | |
NUMBER: ${{ github.event.issue.number }} | |
BODY: ${{ steps.prepare-comment-success.outputs.issue_reply }} ${{ steps.prepare-comment-failure.outputs.issue_reply }} | |
# also add comment for success case (TBD after transformation) | |
# adding a reply to the issue thread in case of non-authorization | |
add-comment-unauthorized: | |
runs-on: ubuntu-latest | |
permissions: | |
issues: write | |
steps: | |
- name: Add comment | |
if: ${{ ! contains(fromJson(vars.WHITELIST),github.actor)}} | |
run: gh issue close "$NUMBER" -c "$BODY" | |
env: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
GH_REPO: ${{ github.repository }} | |
NUMBER: ${{ github.event.issue.number }} | |
BODY: "**Initiator of the action (${{ github.actor }}) is not whitelisted.**\n\n **Aborting.**\n\n\n [Edit `WHITELIST`](https://github.com/organizations/dse-as/settings/variables/actions/WHITELIST)" | |