diff --git a/.drone.yml b/.drone.yml deleted file mode 100644 index 3e05dc31..00000000 --- a/.drone.yml +++ /dev/null @@ -1,100 +0,0 @@ ---- -kind: pipeline -type: docker -name: release-latest - -platform: - os: linux - arch: amd64 - -trigger: - branch: - - master - event: - exclude: - - pull_request - - tag - - promote - - rollback - -steps: -- name: publish-docker-wopi-latest - pull: always - image: plugins/docker - settings: - repo: cs3org/wopiserver - tags: latest - dockerfile: wopiserver.Dockerfile - username: - from_secret: dockerhub_username - password: - from_secret: dockerhub_password - build_args: - - VERSION=${DRONE_SEMVER_SHORT}-g${DRONE_COMMIT:0:7} - custom_dns: - - 128.142.17.5 - - 128.142.16.5 - ---- -kind: pipeline -type: docker -name: release - -platform: - os: linux - arch: amd64 - -trigger: - event: - include: - - tag - -steps: -- name: publish-docker-wopi-tag - pull: always - image: plugins/docker - settings: - repo: cs3org/wopiserver - tags: ${DRONE_TAG} - dockerfile: wopiserver.Dockerfile - username: - from_secret: dockerhub_username - password: - from_secret: dockerhub_password - build_args: - - VERSION=${DRONE_TAG} - custom_dns: - - 128.142.17.5 - - 128.142.16.5 - ---- -kind: pipeline -type: docker -name: release-xrootd - -platform: - os: linux - arch: amd64 - -trigger: - event: - include: - - tag - -steps: -- name: publish-docker-wopi-tag - pull: always - image: plugins/docker - settings: - repo: cs3org/wopiserver - tags: ${DRONE_TAG}-xrootd - dockerfile: wopiserver-xrootd.Dockerfile - username: - from_secret: dockerhub_username - password: - from_secret: dockerhub_password - build_args: - - VERSION=${DRONE_TAG} - custom_dns: - - 128.142.17.5 - - 128.142.16.5 diff --git a/.github/workflows/python-app.yml b/.github/workflows/ci-tests.yml similarity index 75% rename from .github/workflows/python-app.yml rename to .github/workflows/ci-tests.yml index 004674b5..71c2b5c8 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/ci-tests.yml @@ -1,36 +1,38 @@ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Python application - +name: Linting and unit tests on: push: - branches: [ master ] + branches: [ "master" ] pull_request: - branches: [ master ] + branches: [ "master" ] jobs: build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.10 - uses: actions/setup-python@v2 + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v3 with: - python-version: "3.10" + python-version: "3.12" + - name: Install dependencies run: | python -m pip install --upgrade pip pip install flake8 pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide, we further relax this - flake8 . --count --exit-zero --max-complexity=15 --max-line-length=130 --statistics + flake8 . --count --exit-zero --max-complexity=30 --max-line-length=130 --statistics + - name: Test with pytest run: | pytest diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000..2a79f727 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,41 @@ +name: CodeQL + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + schedule: + - cron: "44 11 * * 0" + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ python ] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + queries: +security-and-quality + + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..c9ad20f7 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,78 @@ +name: Releases +on: + push: + tags: + - "*" + workflow_dispatch: + +jobs: +# The following is a clone of cs3org/reva/.github/workflows/docker.yml because reusable actions do not (yet) support lists as input types: +# see https://github.com/community/community/discussions/11692 + release: + runs-on: ${{ fromJSON('["ubuntu-latest", "self-hosted"]')[github.repository == 'cs3org/wopiserver'] }} + strategy: + fail-fast: false + matrix: + include: + - file: wopiserver.Dockerfile + tags: ${{ vars.DOCKERHUB_ORGANIZATION }}/wopiserver:${{ github.ref_name }}-amd64 + platform: linux/amd64 + image: python:3.11-alpine + push: ${{ github.event_name != 'workflow_dispatch' }} + - file: wopiserver.Dockerfile + tags: ${{ vars.DOCKERHUB_ORGANIZATION }}/wopiserver:${{ github.ref_name }}-arm64 + platform: linux/arm64 + image: python:3.10-slim-buster + push: ${{ github.event_name != 'workflow_dispatch' }} + - file: wopiserver-xrootd.Dockerfile + tags: ${{ vars.DOCKERHUB_ORGANIZATION }}/wopiserver:${{ github.ref_name }}-xrootd + platform: linux/amd64 + push: ${{ github.event_name != 'workflow_dispatch' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up QEMU + if: matrix.platform != '' + uses: docker/setup-qemu-action@v2 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Login to Docker Hub + if: matrix.push + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Build ${{ matrix.push && 'and push' || '' }} ${{ matrix.tags }} Docker image + uses: docker/build-push-action@v3 + with: + context: . + file: ${{ matrix.file }} + tags: ${{ matrix.tags }} + push: ${{ matrix.push }} + build-args: | + VERSION=${{ github.ref_name }} + BASEIMAGE=${{ matrix.image }} + platforms: ${{ matrix.platform }} + manifest: + runs-on: ${{ fromJSON('["ubuntu-latest", "self-hosted"]')[github.repository == 'cs3org/wopiserver'] }} + needs: release + if: github.event_name != 'workflow_dispatch' + strategy: + fail-fast: false + matrix: + manifest: + - ${{ vars.DOCKERHUB_ORGANIZATION }}/wopiserver:${{ github.ref_name }} + - ${{ vars.DOCKERHUB_ORGANIZATION }}/wopiserver:latest + steps: + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Create manifest + run: | + docker manifest create ${{ matrix.manifest }} \ + --amend ${{ vars.DOCKERHUB_ORGANIZATION }}/wopiserver:${{ github.ref_name }}-amd64 \ + --amend ${{ vars.DOCKERHUB_ORGANIZATION }}/wopiserver:${{ github.ref_name }}-arm64 + - name: Push manifest + run: docker manifest push ${{ matrix.manifest }} diff --git a/.gitignore b/.gitignore index 43c6e740..f84713a0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *pyc *rpm +.cache .mypy_cache +.pytest_cache diff --git a/CHANGELOG.md b/CHANGELOG.md index f7c11db0..25ef559e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,105 @@ ## Changelog for the WOPI server +### Fri May 24 2024 - v10.5.0 +- Added timeout settings for GRPC and HTTP connections (#149) +- Fixed handing of trailing slashes (#151) +- Moved docker image to 3.12.3-alpine (#147) + +### Tue May 14 2024 - v10.4.0 +- Added support for Microsoft compliance domains +- Fixed opening of markdown files created on Windows platforms +- Improved lock handling on write and xattr operations (#137) +- Improved logs + +### Fri Jan 19 2024 - v10.3.0 +- Implemented support for X-Trace-Id header (#64) +- Fixed SaveAs logic for non-authenticated (anonymous) users +- Improved handling of HTTP requests +- Improved memory efficiency by streaming files' content (#136, #141) +- Fixed 0-byte uploads (#142) + +### Mon Oct 23 2023 - v10.2.0 +- Implemented cache for xattrs in the cs3 storage (#128) +- Implemented advisory locking via xattrs for cs3 storages that + do not support native locking (#129) +- Improved handling of default values, in order to clean up + the default config file +- Fixed the PostMessageOrigin property in CheckFileInfo when + using the same wopiserver with multiple cloud storages +- Fixed xroot build +- Fixed failed precondition error handling in unlock + +### Wed Jul 5 2023 - v10.1.0 +- Fixed handling of filenames with non latin-1 characters (#127) +- Improved logging and adjusted log levels (#123) +- Switched from CentOS Stream 8 to AlmaLinux 8 for the + xroot-flavoured docker image + +### Wed May 31 2023 - v10.0.0 +- Added CloseUrl and other properties to CheckFileInfo +- Introduced health check of the configured storage interface + to ease deployment validation (#122) +- Inverted default for wopilockstrictcheck +- Fixed Preview mode +- Removed legacy logic for discovery of app endpoints (#119): + this is now only implemented by Reva's app providers, and + legacy ownCloud/CERNBox UIs are not supported any longer +- Removed support to forcefully evict valid locks, introduced + to compensate a Microsoft Word issue +- Converted all responses to JSON-formatted (#120) +- Cleaned up obsoleted scripts + +### Fri Mar 10 2023 - v9.5.0 +- Introduced concept of user type, given on `/wopi/iop/open`, + to better serve federated vs regular users with respect to + folder URLs and SaveAs operations +- Redefined `conflictpath` option as `homepath` (the former is + still supported for backwards compatibility): when defined, + a SaveAs operation falls back to the user's `homepath` when + it can't work on the original folder +- Fixed PutUserInfo to use the user's username as xattr key +- Added arm64-based builds + +### Tue Jan 31 2023 - v9.4.0 +- Introduced support to forcefully evict valid locks + to compensate Microsoft Online mishandling of collaborative + sessions. This workaround will stay until a proper fix + is implemented following Microsoft CSPP team's advices +- Improved logging, in particular around lock eviction +- Bridged apps: moved plugin loading apps out of the deprecated + discovery module, and fixed some minor bugs +- CI: moved release builds to GitHub actions + +### Thu Nov 24 2022 - v9.3.0 +- Introduced heuristic to log which sessions are allowed + to open a collaborative session and which ones are + prevented by the application +- Introduced support for app-aware locks in EOS (#94) +- Disabled SaveAs action when user is not owner +- Improved error coverage in case of transient errors + in bridged apps and in PutFile operations +- Moved from LGTM to CodeQL workflow on GitHub (#100) +- Introduced support for PutUserInfo +- Added support for the Microsoft "business" flow (#105) + +### Mon Oct 17 2022 - v9.2.0 +- Added option to use file or stream handler for logging (#91) +- Introduced configurable hostURLs for CheckFileInfo (#93) +- Fixed duplicate log entries (#92) +- CodiMD: added support for direct storage access via + the ownCloud file picker (#95) +- Fixed check for external locks +- Further fixes to improve coverage of the WOPI validator tests + +### Wed Oct 5 2022 - v9.1.0 +- Introduced support for PREVIEW mode (#82) +- Improved UnlockAndRelock logic (#85, #87) +- Switched to python-alpine docker image (#88) +- Introduced further branding options in CheckFileInfo +- Further improvements in the bridged apps logic +- Added more logging and a new endpoint to monitor + conflicted sessions + ### Thu Sep 1 2022 - v9.0.0 - Refactored and strengthened save workflow for bridged applications, and simplified lock metadata (#80) @@ -8,7 +108,7 @@ - Refactored PutFile logic when handling conflict files (#78) - Improved support for Spaces in Reva (#79) - Implemented save workflow for Etherpad documents (#81) - Fixed direct download in case of errors +- Fixed direct download in case of errors - Updated dependencies and documentation ### Thu Jun 16 2022 - v8.3.0 diff --git a/Makefile b/Makefile index 590aa17a..8bf09cdd 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -FILES_TO_RPM = src mon tools wopiserver.conf wopiserver.service wopiserver.logrotate +FILES_TO_RPM = src tools wopiserver.conf wopiserver.service wopiserver.logrotate SPECFILE = $(shell find . -type f -name *.spec) VERSREL = $(shell git describe | sed 's/^v//') VERSION = $(shell echo ${VERSREL} | cut -d\- -f 1) diff --git a/README.md b/README.md index 323c6d93..0f9147ff 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,17 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) - [![Gitter chat](https://badges.gitter.im/cs3org/wopiserver.svg)](https://gitter.im/cs3org/wopiserver) [![Build Status](https://drone.cernbox.cern.ch/api/badges/cs3org/wopiserver/status.svg)](https://drone.cernbox.cern.ch/cs3org/wopiserver) + [![Gitter chat](https://badges.gitter.im/cs3org/wopiserver.svg)](https://gitter.im/cs3org/wopiserver) + [![Build Status](https://github.com/cs3org/wopiserver/actions/workflows/release.yml/badge.svg)](https://github.com/cs3org/wopiserver/actions) [![codecov](https://codecov.io/gh/cs3org/wopiserver/branch/master/graph/badge.svg)](https://codecov.io/gh/cs3org/wopiserver) ======== # WOPI Server -This service is part of the ScienceMesh Interoperability Platform (IOP) and implements a vendor-neutral application gateway compatible with the Web-application Open Platform Interface ([WOPI](https://docs.microsoft.com/en-us/microsoft-365/cloud-storage-partner-program/online)) specifications. +This service is part of the ScienceMesh Interoperability Platform ([IOP](https://developer.sciencemesh.io)) and implements a vendor-neutral application gateway compatible with the Web-application Open Platform Interface ([WOPI](https://docs.microsoft.com/en-us/microsoft-365/cloud-storage-partner-program/online)) specifications. It enables ScienceMesh EFSS storages to integrate Office Online platforms including Microsoft Office Online and Collabora Online. In addition it implements a [bridge](src/bridge/readme.md) module with dedicated extensions to support apps like CodiMD and Etherpad. Author: Giuseppe Lo Presti (@glpatcern)
-Contributors: +Contributors (oldest contributions first): - Michael DSilva (@madsi1m) - Lovisa Lugnegaard (@LovisaLugnegard) - Samuel Alfageme (@SamuAlfageme) @@ -20,6 +21,12 @@ Contributors: - Gianmaria Del Monte (@gmgigi96) - Klaas Freitag (@dragotin) - Jörn Friedrich Dreyer (@butonic) +- Michael Barz (@micbar) +- Robert Kaussow (@xoxys) +- Javier Ferrer (@javfg) +- Vasco Guita (@vascoguita) +- Thomas Mueller (@deepdiver1975) +- Andre Duffeck (@aduffeck) Initial revision: December 2016
First production version for CERNBox: September 2017 (presented at [oCCon17](https://occon17.owncloud.org) - [slides](https://www.slideshare.net/giuseppelopresti/collaborative-editing-and-more-in-cernbox))
@@ -37,7 +44,7 @@ Integration in the CS3 Organisation: April 2020 ## Compatibility -This WOPI server implements the required APIs to ensure full compatibility with Collabora Online and Microsoft Office. For the latter, however, the OneNote application uses newer WOPI APIs and is currently not supported. +This WOPI server implements the required APIs to ensure full compatibility with Microsoft Office (as provided via the CSPP Terms), Collabora Online, and ONLYOFFICE. ## Unit testing @@ -51,21 +58,25 @@ To run the tests, either run `pytest` if available in your system, or execute th 1. Run all tests: `python3 test/test_storageiface.py [-v]` 2. Run only one test: `python3 test/test_storageiface.py [-v] TestStorage.` -### Test against a Reva endpoint: +### Test against a Reva CS3 endpoint: 1. Clone reva (https://github.com/cs3org/reva) -2. Run Reva according to (ie up until step 4 in the instructions). -3. Run the tests: `WOPI_STORAGE=cs3 python3 test/test_storageiface.py` +2. Run Reva according to (ie up until step 4 in the instructions) +4. Configure `test/wopiserver-test.conf` such that the wopiserver can talk to your Reva instance: use [this example](docker/etc/wopiserver.cs3.conf) for a skeleton configuration +5. Run the tests: `WOPI_STORAGE=cs3 python3 test/test_storageiface.py` +3. For a production deployment, configure your `wopiserver.conf` following the example above, and make sure the `iopsecret` file contains the same secret as configured in the [Reva appprovider](https://developer.sciencemesh.io/docs/technical-documentation/iop/iop-optional-configs/collabora-wopi-server/wopiserver) ### Test against an Eos endpoint: 1. Make sure your Eos instance is configured to accept connections from WOPI as a privileged gateway -2. Configure `wopiserver-test.conf` according to your Eos setup. The provided defaults are valid at CERN. +2. Configure `test/wopiserver-test.conf` according to your Eos setup (the provided defaults are valid at CERN) 3. Run the tests: `WOPI_STORAGE=xroot python3 test/test_storageiface.py` +4. For a production deployment (CERN only), configure your `wopiserver.conf` according to the Puppet infrastructure ### Test using the Microsoft WOPI validator test suite -This is work in progress. Refer to [these notes](test/wopi-validator.md). +Refer to [these notes](test/wopi-validator.md). Microsoft also provides a graphical version of the test suite +as part of their Office 365 offer, which is also supported via the Reva open-in-app workflow. ## Run the WOPI server locally for development purposes @@ -74,15 +85,16 @@ This is work in progress. Refer to [these notes](test/wopi-validator.md). 2. Add log file directory: `sudo mkdir /var/log/wopi/ && sudo chmod a+rwx /var/log/wopi` 3. Create the folder for the wopi config: `sudo mkdir /etc/wopi/ && sudo chmod a+rwx /etc/wopi` 4. Create recoveryfolder: `sudo mkdir /var/spool/wopirecovery && sudo chmod a+rwx /var/spool/wopirecovery` -5. Create the files `iopsecret` and `wopiscret` in the folder `/etc/wopi/`, create random strings for the secrets -6. Copy the provided `wopiserver.conf` to `/etc/wopi/wopiserver.defaults.conf` +5. Create the files `iopsecret` and `wopisecret` in the folder `/etc/wopi/`, create random strings for the secrets +6. Copy the provided [wopiserver.conf](./wopiserver.conf) to `/etc/wopi/wopiserver.defaults.conf` 7. Create a config file `/etc/wopi/wopiserver.conf`: start from `docker/etc/wopiserver.conf` for a minimal configuration and add from the defaults file as needed 8. From the WOPI server folder run: `python3 src/wopiserver.py` ### Test the open-in-app workflow on the local WOPI server Once the WOPI server runs on top of local storage, the `tools/wopiopen.py` script can be used -to test the open-in-app workflow. For that, assuming you have e.g. CodiMD deployed in your (docker-compose) cluster: +to test the open-in-app workflow. +For that, assuming you have e.g. CodiMD deployed in your cluster: 1. Create a `test.md` file in your local storage folder, e.g. `/var/wopi_local_storage` 2. From the WOPI server folder, execute `tools/wopiopen.py -a CodiMD -i "internal_CodiMD_URL" -u "user_visible_CodiMD_URL" -k CodiMD_API_Key test.md` diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..fd854407 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,15 @@ +# Security Policy + +## Supported Versions + +By default, only the latest tagged version is supported. + +In case of major issues upgrading to the latest tag, a backport +to a previous release from the same major version can be considered. + +## Reporting a Vulnerability + +Please open a standard issue and mention `Vulnerability:` in the title. + +Depending on the severity, it will be reviewed as part of the +next development cycle. diff --git a/cernbox-wopi-server.spec b/cernbox-wopi-server.spec index 07da8bff..03ceb5cd 100644 --- a/cernbox-wopi-server.spec +++ b/cernbox-wopi-server.spec @@ -55,7 +55,6 @@ install -m 644 src/cs3iface.py %buildroot/%_python_lib/cs3iface.py install -m 644 wopiserver.service %buildroot/usr/lib/systemd/system/wopiserver.service install -m 644 wopiserver.conf %buildroot/etc/wopi/wopiserver.defaults.conf install -m 644 wopiserver.logrotate %buildroot/etc/logrotate.d/cernbox-wopi-server -install -m 755 mon/wopi_grafana_feeder.py %buildroot/usr/bin/wopi_grafana_feeder.py install -m 755 tools/wopicheckfile.py %buildroot/usr/bin/wopicheckfile.py install -m 755 tools/wopilistopenfiles.sh %buildroot/usr/bin/wopilistopenfiles.sh install -m 755 tools/wopiopen.py %buildroot/usr/bin/wopiopen.py diff --git a/docker/README b/docker/README deleted file mode 100644 index b040a4e1..00000000 --- a/docker/README +++ /dev/null @@ -1,18 +0,0 @@ -========== -WOPISERVER - -Build with: - `make rpm` - `cd docker; mv ../cernbox-wopi-server* .` - `docker-compose -f wopiserver.yaml build` - -Run with: - `docker-compose -f wopiserver.yaml up -d` - -Inspect the logs - `docker-compose -f wopiserver.yaml logs -f` - -Specs: - - listening on port 8880/HTTP (internal port is also 8880) - - volumes for paths `/var/log/wopi`, `/etc/wopi`, and `/var/wopi_local_storage` - diff --git a/docker/buildimage_wopiserver.sh b/docker/buildimage_wopiserver.sh deleted file mode 100755 index 57cdf75c..00000000 --- a/docker/buildimage_wopiserver.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/sh -# -# buildimage.sh -# -# This script can be used to generate a docker image of the WOPI server. -# Prior to run it, you need to collect here a valid wopiserver.conf and -# an iopsecret file that contains a shared secret used to strengthen the -# open REST endpoint, as they give access to any file of the underlying -# storage: the secret is only to be used between the client of the -# /wopi/iop/open endpoint and the WOPI server. -# -# If you want the WOPI server to run in secure mode, you need to generate -# a certificate/key with the hostname of the node that will be running -# the generated docker image, and copy them into the generated image. - -pushd .. -make rpm -make clean -popd -mv ../cernbox-wopi*rpm . - -sudo docker build -t your-personal-repo-area/cloudstor-wopi-server --pull=true --no-cache --force-rm wopiserver.Dockerfile && \ -sudo docker push your-personal-repo-area/cloudstor-wopi-server diff --git a/docker/entrypoint b/docker/entrypoint deleted file mode 100755 index 982311de..00000000 --- a/docker/entrypoint +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/sh - -echo "${WOPISECRET:-$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 12 | head -n 1)}" > /etc/wopi/wopisecret -#cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 12 | head -n 1 > /etc/wopi/wopisecret - -echo "${IOPSECRET:-password}" > /etc/wopi/iopsecret - -/usr/bin/curl -o /etc/wopi/wopiserver.conf ${CONFIGURATION}/wopi/wopiserver.conf - -/usr/bin/curl -o /etc/eos.keytab ${CONFIGURATION}/eos/eos.keytab - -groupadd -g 48 apache -useradd -u 48 -g 48 -m apache - -chown apache:apache /etc/eos.keytab -chmod 400 /etc/eos.keytab -chown -R apache:apache /etc/wopi - -#exec /usr/bin/wopiserver.py - -exec sudo -u apache python /usr/bin/wopiserver.py diff --git a/docker/etc/wopiserver.conf b/docker/etc/wopiserver.conf index a4f3b91e..9c317c3a 100644 --- a/docker/etc/wopiserver.conf +++ b/docker/etc/wopiserver.conf @@ -1,18 +1,16 @@ # # wopiserver.conf - basic working configuration for a docker image # +# This is OK for test/development, NOT for production [general] storagetype = local port = 8880 -nonofficetypes = .md .zmd .txt wopiurl = http://localhost -downloadurl = http://localhost -tokenvalidity = 86400 -wopilockexpiration = 1800 # Logging level. Debug enables the Flask debug mode as well. # Valid values are: Debug, Info, Warning, Error. loglevel = Debug +loghandler = file [security] usehttps = no diff --git a/docker/etc/wopiserver.cs3.conf b/docker/etc/wopiserver.cs3.conf new file mode 100644 index 00000000..586e5fea --- /dev/null +++ b/docker/etc/wopiserver.cs3.conf @@ -0,0 +1,34 @@ +# An example wopiserver.conf skeleton to work with CS3 APIs and Reva + +[general] +storagetype = cs3 +port = 8880 +wopiurl = https://your.wopi.org:8880 +loglevel = Debug +loghandler = stream +detectexternalmodifications = False +#hostediturl = https://your.revad.org/external?app=&fileId=! +#hostviewurl = https://your.revad.org/external?app=&fileId=!&viewmode=VIEW_MODE_PREVIEW + +#codimdurl = https://your.codimd.org:443 +#codimdinturl = https://your.internal.codimd.org:443 +nonofficetypes = .md .zmd .txt + +[bridge] +sslverify = True + +[io] +recoverypath = /var/spool/wopirecovery + +[security] +usehttps = yes +wopicert = your.cert.pem +wopikey = your.key.pem + +[cs3] +revagateway = your.revad.org:19000 +authtokenvalidity = 3600 +sslverify = True +grpctimeout = 10 +httptimeout = 10 +lockasattr = True diff --git a/docker/etc/xrootd.repo b/docker/etc/xrootd.repo new file mode 100644 index 00000000..cb1c6857 --- /dev/null +++ b/docker/etc/xrootd.repo @@ -0,0 +1,6 @@ +[xrootd] +name=xroot upstream for CentOS/Alma 8 +baseurl=https://xrootd.web.cern.ch/sw/repos/stable/slc/8/x86_64 +enabled=1 +gpgcheck=0 +priority=5 diff --git a/mon/wopi_grafana_feeder.py2 b/mon/wopi_grafana_feeder.py2 deleted file mode 100755 index 2fbd1f05..00000000 --- a/mon/wopi_grafana_feeder.py2 +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/python -''' -wopi_grafana_feeder.py - -A daemon pushing CERNBox WOPI monitoring data to Grafana. -TODO: make it a collectd plugin. References: -https://collectd.org/documentation/manpages/collectd-python.5.shtml -https://blog.dbrgn.ch/2017/3/10/write-a-collectd-python-plugin/ -https://github.com/dbrgn/collectd-python-plugins - -author: Giuseppe.LoPresti@cern.ch -CERN/IT-ST -''' - -import fileinput -import socket -import time -import pickle -import struct -import datetime -import getopt -import sys - -CARBON_TCPPORT = 2004 -carbonHost = '' -verbose = False -prefix = 'cernbox.wopi.' + socket.gethostname().split('.')[0] -epoch = datetime.datetime(1970, 1, 1) - - -def usage(exitCode): - '''prints usage''' - print 'Usage : cat | ' + sys.argv[0] + ' [-h|--help] -g|--grafanahost ' - sys.exit(exitCode) - -def send_metric(data): - '''send data to grafana using the pickle protocol''' - payload = pickle.dumps(data, protocol=2) - header = struct.pack("!L", len(payload)) - message = header + payload - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.connect((carbonHost, CARBON_TCPPORT)) - sock.sendall(message) - sock.close() - -def get_wopi_metrics(data): - '''Parse WOPI usage metrics''' - for line in data: - if data.isfirstline(): - logdate = line.split('T')[0].split('-') # keeps the date until 'T', splits - timestamp = (datetime.datetime(int(logdate[0]), int(logdate[1]), int(logdate[2]), 1, 0, 0) - epoch).total_seconds() + time.altzone - errors = 0 - users = {} - openfiles = {} - openfiles['docx'] = {} - openfiles['xlsx'] = {} - openfiles['pptx'] = {} - openfiles['odt'] = {} - openfiles['ods'] = {} - openfiles['odp'] = {} - openfiles['md'] = {} - openfiles['zmd'] = {} - openfiles['txt'] = {} - wrfiles = {} - wrfiles['docx'] = {} - wrfiles['xlsx'] = {} - wrfiles['pptx'] = {} - wrfiles['odt'] = {} - wrfiles['ods'] = {} - wrfiles['odp'] = {} - wrfiles['md'] = {} - wrfiles['zmd'] = {} - wrfiles['txt'] = {} - collab = 0 - try: - if ' ERROR ' in line: - errors += 1 - # all opened files - elif 'CheckFileInfo' in line: - # count of unique users - l = line.split() - u = l[4].split('=')[1] - if u in users.keys(): - users[u] += 1 - else: - users[u] = 1 - # count of open files per type: look for the file extension - fname = line[line.find('filename=')+10:line.rfind('fileid=')-2] - fext = fname[fname.rfind('.')+1:] - if fext not in openfiles: - openfiles[fext] = {} - if fname in openfiles[fext]: - openfiles[fext][fname] += 1 - else: - openfiles[fext][fname] = 1 - # files opened for write - elif 'successfully written' in line: - # count of written files - fname = line[line.find('filename=')+10:line.rfind('token=')-2] - fext = fname[fname.rfind('.')+1:] - if fname in wrfiles[fext]: - wrfiles[fext][fname] += 1 - else: - wrfiles[fext][fname] = 1 - # collaborative editing sessions - elif 'Collaborative editing detected' in line: - collab += 1 - # we could extract the filename and the users list for further statistics - except Exception: - if verbose: - print 'Error occurred at line: %s' % line - raise - - if 'timestamp' not in locals(): - # the file was empty, nothing to do - return - # prepare data for grafana - output = [] - output.append(( prefix + '.errors', (int(timestamp), errors) )) - output.append(( prefix + '.users', (int(timestamp), len(users)) )) - # get the top user by sorting the users dict by values instead of by keys - if len(users) > 0: - top = sorted(users.iteritems(), key=lambda (k, v): (v, k))[-1][1] - output.append(( prefix + '.topuser', (int(timestamp), int(top)) )) - for fext in openfiles: - output.append(( prefix + '.openfiles.' + fext, (int(timestamp), len(openfiles[fext])) )) - for fext in wrfiles: - output.append(( prefix + '.writtenfiles.' + fext, (int(timestamp), len(wrfiles[fext])) )) - output.append(( prefix + '.collab', (int(timestamp), collab) )) - # send and print all collected data - send_metric(output) - if verbose: - print output - - -# first parse options -try: - options, args = getopt.getopt(sys.argv[1:], 'hvg:', ['help', 'verbose', 'grafanahost']) -except Exception, e: - print e - usage(1) -for f, v in options: - if f == '-h' or f == '--help': - usage(0) - elif f == '-v' or f == '--verbose': - verbose = True - elif f == '-g' or f == '--grafanahost': - carbonHost = v - else: - print "unknown option : " + f - usage(1) -if carbonHost == '': - print 'grafanahost option is mandatory' - usage(1) -# now parse input and collect statistics -try: - get_wopi_metrics(fileinput.input('-')) -except Exception, e: - print 'Error with collecting metrics:', e - if verbose: - raise - diff --git a/mon/wopi_max_concurrency.py2 b/mon/wopi_max_concurrency.py2 deleted file mode 100755 index 1f74199e..00000000 --- a/mon/wopi_max_concurrency.py2 +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/python -''' -wopi_max_concurrency.py - -A daemon pushing CERNBox WOPI monitoring data to Grafana. -TODO: make it a collectd plugin. References: -https://collectd.org/documentation/manpages/collectd-python.5.shtml -https://blog.dbrgn.ch/2017/3/10/write-a-collectd-python-plugin/ -https://github.com/dbrgn/collectd-python-plugins - -author: Giuseppe.LoPresti@cern.ch -CERN/IT-ST -''' - -import fileinput -import socket -import time -import pickle -import struct -import datetime -import getopt -import sys - -CARBON_TCPPORT = 2004 -carbonHost = '' -verbose = False -prefix = 'cernbox.wopi.' + socket.gethostname().split('.')[0] -epoch = datetime.datetime(1970, 1, 1) - - -def usage(exitCode): - '''prints usage''' - print 'Usage : cat | ' + sys.argv[0] + ' [-h|--help] -g|--grafanahost ' - sys.exit(exitCode) - -def send_metric(data): - '''send data to grafana using the pickle protocol''' - payload = pickle.dumps(data, protocol=2) - header = struct.pack("!L", len(payload)) - message = header + payload - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.connect((carbonHost, CARBON_TCPPORT)) - sock.sendall(message) - sock.close() - -def get_wopi_metrics(data): - '''Parse WOPI usage metrics''' - for line in data: - if data.isfirstline(): - logdate = line.split('T')[0].split('-') # keeps the date until 'T', splits - timestamp = (datetime.datetime(int(logdate[0]), int(logdate[1]), int(logdate[2]), 1, 0, 0) - epoch).total_seconds() + time.altzone - maxconc = 0 - tokens = set() - try: - if 'msg="Lock"' in line and 'INFO' in line and 'result' not in line: - # +1 for this acc. token - l = line.split() - tok = l[-1].split('=')[1] - tokens.add(tok) - if len(tokens) > maxconc: - maxconc += 1 - if 'msg="Unlock"' in line and 'INFO' in line: - # -1 for this acc. token - l = line.split() - tok = l[-1].split('=')[1] - try: - tokens.remove(tok) - except KeyError: - pass - except Exception: - if verbose: - print 'Error occurred at line: %s' % line - raise - - if 'tok' not in locals(): - # the file was empty, nothing to do - return - # prepare data for grafana - output = [] - output.append(( prefix + '.maxconc', (int(timestamp), maxconc) )) - send_metric(output) - if verbose: - print output - - -# first parse options -try: - options, args = getopt.getopt(sys.argv[1:], 'hvg:', ['help', 'verbose', 'grafanahost']) -except Exception, e: - print e - usage(1) -for f, v in options: - if f == '-h' or f == '--help': - usage(0) - elif f == '-v' or f == '--verbose': - verbose = True - elif f == '-g' or f == '--grafanahost': - carbonHost = v - else: - print "unknown option : " + f - usage(1) -if carbonHost == '': - print 'grafanahost option is mandatory' - usage(1) -# now parse input and collect statistics -try: - get_wopi_metrics(fileinput.input('-')) -except Exception, e: - print 'Error with collecting metrics:', e - if verbose: - raise - diff --git a/requirements.txt b/requirements.txt index 471d18c7..899cc369 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,5 +7,9 @@ PyJWT requests more_itertools prometheus-flask-exporter -cs3apis>=0.1.dev95 +cs3apis>=0.1.dev101 waitress +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability +setuptools>=70.0.0 # not directly required, pinned by Snyk to avoid a vulnerability +werkzeug>=3.0.1 # not directly required, pinned by Snyk to avoid a vulnerability +zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/src/bridge/__init__.py b/src/bridge/__init__.py index 2f5c48f5..26877d43 100644 --- a/src/bridge/__init__.py +++ b/src/bridge/__init__.py @@ -6,7 +6,6 @@ import sys import time -import socket import traceback import threading import atexit @@ -25,8 +24,8 @@ # The supported plugins integrated with the WOPI Bridge extensions BRIDGE_EXT_PLUGINS = {'md': 'codimd', 'txt': 'codimd', 'zmd': 'codimd', 'epd': 'etherpad', 'zep': 'etherpad'} -# The header that bridged apps are expected to send to the save endpoint -BRIDGED_APP_HEADER = 'X-EFSS-Bridged-App' +# A header that bridged apps MUST send to the save endpoint to identify themselves +BRIDGED_APPNAME_HEADER = 'X-Efss-Bridged-App' # a standard message to be displayed by the app when some content might be lost: this would only # appear in case of uncaught exceptions or bugs handling the webhook callbacks @@ -73,13 +72,28 @@ def init(cls, config, log, secret): cls.hashsecret = secret cls.log = wopic.log = log wopic.sslverify = cls.sslverify + # now look for and load plugins for supported apps if configured + for app in BRIDGE_EXT_PLUGINS.values(): + url = config.get('general', f'{app}url', fallback=None) + if url: + inturl = config.get('general', f'{app}inturl', fallback=None) + try: + with open(f'/var/run/secrets/{app}_apikey', encoding='utf-8') as f: + apikey = f.readline().strip('\n') + except FileNotFoundError: + apikey = None + cls.loadplugin(app, url, inturl, apikey) @classmethod def loadplugin(cls, appname, appurl, appinturl, apikey): '''Load plugin for the given appname, if supported by the bridge service''' p = appname.lower() if p in cls.plugins: - # already initialized + # already initialized, check that the app URL matches: the current model does not support multiple app backends + if appurl != cls.plugins[p].appexturl: + cls.log.warning('msg="Attempt to use plugin with another appurl" client="%s" app="%s" appurl="%s"' % + (flask.request.remote_addr, appname, appurl)) + raise KeyError(appname) return if not issupported(appname): raise ValueError(appname) @@ -88,15 +102,12 @@ def loadplugin(cls, appname, appurl, appinturl, apikey): cls.plugins[p].log = cls.log cls.plugins[p].sslverify = cls.sslverify cls.plugins[p].disablezip = cls.disablezip - addrinfo = socket.getaddrinfo(urlparse.urlparse(appinturl).netloc.split(':')[0], None, proto=socket.IPPROTO_TCP) - cls.plugins[p].remoteaddrs = list({addr[-1][0] for addr in addrinfo}) cls.plugins[p].appname = appname cls.plugins[p].init(appurl, appinturl, apikey) - cls.log.info('msg="Imported plugin for application" app="%s" plugin="%s" authorizedfrom="%s"' % - (p, cls.plugins[p], cls.plugins[p].remoteaddrs)) + cls.log.info(f'msg="Imported plugin for application" app="{p}" plugin="{cls.plugins[p]}"') except Exception as e: - cls.log.info('msg="Failed to initialize plugin" app="%s" URL="%s" exception="%s"' % - (p, appinturl, e)) + cls.log.warning('msg="Failed to initialize plugin" app="%s" URL="%s" exception="%s"' % + (p, appinturl, e)) cls.plugins.pop(p, None) # regardless which step failed, this will remove the failed plugin raise ValueError(appname) @@ -117,19 +128,12 @@ def isextsupported(fileext): return fileext.lower() in set(BRIDGE_EXT_PLUGINS.keys()) -def _getappnamebyaddr(remoteaddr): - '''Return the appname of a (supported) app given its remote IP address''' - for p in WB.plugins.values(): - if remoteaddr in p.remoteaddrs: - return p.appname - raise ValueError - - def _validateappname(appname): '''Return the plugin's appname if one of the registered plugins matches (case-insensitive) the given appname''' for p in WB.plugins.values(): - if appname.lower() == p.appname.lower(): + if appname.lower() in p.appname.lower(): return p.appname + WB.log.debug(f'msg="BridgeSave: unknown application" appname="{appname}" plugins="{WB.plugins.values()}"') raise ValueError @@ -142,25 +146,36 @@ def _gendocid(wopisrc): # The Bridge endpoints start here ############################################################################################################# -def appopen(wopisrc, acctok, appname): +def appopen(wopisrc, acctok, appmd, viewmode, revatok=None): '''Open a doc by contacting the provided WOPISrc with the given access_token. Returns a (app-url, params{}) pair if successful, raises a FailedOpen exception otherwise''' wopisrc = urlparse.unquote_plus(wopisrc) if not isinstance(acctok, str): # TODO when using the wopiopen.py tool, the access token has to be decoded, to be clarified acctok = acctok.decode() + + # (re)load plugin and validate URLs + appname, appurl, appinturl, apikey = appmd + try: + WB.loadplugin(appname, appurl, appinturl, apikey) + appname = _validateappname(appname) + app = WB.plugins[appname] + WB.log.debug(f'msg="BridgeOpen: processing supported app" appname="{appname}" plugin="{app}"') + except ValueError: + WB.log.warning('msg="BridgeOpen: appname not supported or missing plugin" appname="%s" token="%s"' % + (appname, acctok[-20:])) + raise FailedOpen(f'Failed to load WOPI bridge plugin for {appname}', http.client.INTERNAL_SERVER_ERROR) + except KeyError: + WB.log.error('msg="BridgeOpen: app already configured" appname="%s" appurl="%s" token="%s"' % + (appname, appurl, acctok[-20:])) + raise FailedOpen(f'Bridged app {appname} already configured with a different appurl', http.client.NOT_IMPLEMENTED) + # WOPI GetFileInfo res = wopic.request(wopisrc, acctok, 'GET') if res.status_code != http.client.OK: WB.log.warning('msg="BridgeOpen: unable to fetch file WOPI metadata" response="%d"' % res.status_code) raise FailedOpen('Invalid WOPI context', http.client.NOT_FOUND) filemd = res.json() - app = WB.plugins.get(appname.lower()) - if not app: - WB.log.warning('msg="Open: appname not supported or missing plugin" filename="%s" appname="%s" token="%s"' % - (filemd['BaseFileName'], appname, acctok[-20:])) - raise FailedOpen('File type not supported', http.client.BAD_REQUEST) - WB.log.debug('msg="Processing open in supported app" appname="%s" plugin="%s"' % (appname, app)) try: # use the 'UserCanWrite' attribute to decide whether the file is to be opened in read-only mode @@ -168,14 +183,14 @@ def appopen(wopisrc, acctok, appname): try: # was it already being worked on? wopilock = wopic.getlock(wopisrc, acctok) - WB.log.info('msg="Lock already held" lock="%s" token="%s"' % (wopilock, acctok[-20:])) + WB.log.info(f'msg="Lock already held" lock="{wopilock}" token="{acctok[-20:]}"') # add this token to the list, if not already in if acctok[-20:] not in wopilock['tocl']: wopilock = wopic.refreshlock(wopisrc, acctok, wopilock) except wopic.InvalidLock as e: if str(e) != str(int(http.client.NOT_FOUND)): # lock is invalid/corrupted: force read-only mode - WB.log.info('msg="Invalid lock, forcing read-only mode" error="%s" token="%s"' % (e, acctok[-20:])) + WB.log.info(f'msg="Invalid lock, forcing read-only mode" error="{e}" token="{acctok[-20:]}"') filemd['UserCanWrite'] = False # otherwise, this is the first user opening the file; in both cases, fetch it @@ -207,13 +222,31 @@ def appopen(wopisrc, acctok, appname): try: del WB.saveresponses[wopisrc] except KeyError: + # nothing found, that's fine pass else: # user has no write privileges, just fetch the document and push it to the app on a random docid wopilock = app.loadfromstorage(filemd, wopisrc, acctok, None) - redirurl = app.getredirecturl(filemd['UserCanWrite'], wopisrc, acctok, wopilock['doc'][1:], - urlparse.quote_plus(filemd['UserFriendlyName'])) + # extract the path from the given folder URL: TODO this works with Reva master, not with Reva edge! + filepath = "" + if 'BreadcrumbFolderUrl' in filemd: + try: + filepath = urlparse.urlparse(filemd['BreadcrumbFolderUrl']).path + if filepath.find('/s/') == 0: + filepath = filepath[3:] + '/' # top of public link, no leading / + elif filepath.find('/files/public/show/') == 0: + filepath = filepath[19:] + '/' # subfolder of public link, no leading / + elif filepath.find('/files/spaces/') == 0: + filepath = filepath[13:] + '/' # direct path to resource with leading / + else: + # other folderurl strctures are not supported for the time being + filepath = "" + except (ValueError, IndexError) as e: + WB.log.warning('msg="Failed to parse folderUrl, ignoring" url="%s" error="%s" token="%s"' % + (filemd['BreadcrumbFolderUrl'], e, acctok[-20:])) + redirurl = app.getredirecturl(viewmode, wopisrc, acctok, wopilock['doc'][1:], filepath + filemd['BaseFileName'], + filemd['UserFriendlyName'], revatok) except app.AppFailure as e: # this can be raised by loadfromstorage or getredirecturl usermsg = str(e) if str(e) else 'Unable to load the app, please try again later or contact support' @@ -232,23 +265,18 @@ def appsave(docid): isclose = flask.request.args.get('close') == 'true' # ensure a save request comes from known/registered applications: - # this is done via a specific header, falling back to reverse IP resolution - # (note that the latter fails with apps deployed in k8s clusters) - # both functions raise ValueError if not found - if BRIDGED_APP_HEADER in flask.request.headers: - appname = _validateappname(flask.request.headers[BRIDGED_APP_HEADER]) - else: - appname = _getappnamebyaddr(flask.request.remote_addr) + # this is done via a specific header + appname = _validateappname(flask.request.headers[BRIDGED_APPNAME_HEADER]) WB.log.info('msg="BridgeSave: requested action" isclose="%s" docid="%s" app="%s" wopisrc="%s" token="%s"' % (isclose, docid, appname, wopisrc, acctok[-20:])) except KeyError as e: WB.log.error('msg="BridgeSave: missing metadata" address="%s" headers="%s" args="%s" error="%s"' % (flask.request.remote_addr, flask.request.headers, flask.request.args, e)) - return wopic.jsonify('Missing metadata, could not save. %s' % RECOVER_MSG), http.client.BAD_REQUEST - except ValueError as e: - WB.log.error('msg="BridgeSave: unknown application" address="%s" headers="%s" args="%s"' % - (flask.request.remote_addr, flask.request.headers, flask.request.args)) - return wopic.jsonify('Unknown application, could not save. %s' % RECOVER_MSG), http.client.UNAUTHORIZED + return wopic.jsonify(f'Missing metadata, could not save. {RECOVER_MSG}'), http.client.BAD_REQUEST + except ValueError: + WB.log.error('msg="BridgeSave: unknown application" address="%s" appheader="%s" args="%s"' % + (flask.request.remote_addr, flask.request.headers.get(BRIDGED_APPNAME_HEADER), flask.request.args)) + return wopic.jsonify(f'Unknown application, could not save. {RECOVER_MSG}'), http.client.BAD_REQUEST # decide whether to notify the save thread donotify = isclose or wopisrc not in WB.openfiles or WB.openfiles[wopisrc]['lastsave'] < time.time() - WB.saveinterval @@ -258,7 +286,7 @@ def appsave(docid): WB.openfiles[wopisrc]['tosave'] = True WB.openfiles[wopisrc]['toclose'][acctok[-20:]] = isclose else: - WB.log.info('msg="Save: repopulating missing metadata" wopisrc="%s" token="%s"' % (wopisrc, acctok[-20:])) + WB.log.info(f'msg="Save: repopulating missing metadata" wopisrc="{wopisrc}" token="{acctok[-20:]}"') WB.openfiles[wopisrc] = { 'acctok': acctok, 'tosave': True, 'lastsave': int(time.time() - WB.saveinterval), @@ -270,6 +298,7 @@ def appsave(docid): try: del WB.saveresponses[wopisrc] except KeyError: + # nothing found, that's fine pass if donotify: # note that the save thread stays locked until we release the context, after return! @@ -277,10 +306,14 @@ def appsave(docid): # return latest known state for this document if wopisrc in WB.saveresponses: resp = WB.saveresponses[wopisrc] - WB.log.info('msg="BridgeSave: returned response" response="%s" token="%s"' % (resp, acctok[-20:])) + if resp[1] == http.client.INTERNAL_SERVER_ERROR: + logf = WB.log.error + else: + logf = WB.log.info + logf(f'msg="BridgeSave: returned response" response="{resp}" token="{acctok[-20:]}"') del WB.saveresponses[wopisrc] return resp - WB.log.info('msg="BridgeSave: enqueued action" immediate="%s" token="%s"' % (donotify, acctok[-20:])) + WB.log.info(f'msg="BridgeSave: enqueued action" immediate="{donotify}" token="{acctok[-20:]}"') return '{}', http.client.ACCEPTED @@ -291,7 +324,7 @@ def applist(): WB.log.warning('msg="BridgeList: unauthorized access attempt, missing authorization token" ' 'client="%s"' % flask.request.remote_addr) return 'Client not authorized', http.client.UNAUTHORIZED - WB.log.info('msg="BridgeList: returning list of open files" client="%s"' % flask.request.remote_addr) + WB.log.info(f'msg="BridgeList: returning list of open files" client="{flask.request.remote_addr}"') return flask.Response(json.dumps(WB.openfiles), mimetype='application/json') @@ -338,37 +371,62 @@ def savedirty(self, openfile, wopisrc): appname = openfile['app'].lower() try: wopilock = wopic.getlock(wopisrc, openfile['acctok']) - except wopic.InvalidLock: + except wopic.InvalidLock as ile1: + if str(ile1) == str(http.client.UNAUTHORIZED): + # this token has expired, nothing we can do any longer: by experience this happens on left-over + # browser sessions, and the file was fully saved. Therefore just clean up by using some 'fake' metadata + WB.log.warning('msg="SaveThread: discarding file as token has expired" token="%s" docid="%s"' % + (openfile['acctok'][-20:], openfile['docid'])) + openfile['lastsave'] = int(time.time()) + openfile['tosave'] = False + openfile['toclose'] = {'invalid-lock': True} + return None + WB.log.info('msg="SaveThread: attempting to relock file" token="%s" docid="%s"' % (openfile['acctok'][-20:], openfile['docid'])) try: wopilock = WB.saveresponses[wopisrc] = wopic.relock( wopisrc, openfile['acctok'], openfile['docid'], _intersection(openfile['toclose'])) - except wopic.InvalidLock as ile: + except wopic.InvalidLock as ile2: # even this attempt failed, give up - WB.saveresponses[wopisrc] = wopic.jsonify(str(ile)), http.client.INTERNAL_SERVER_ERROR + WB.saveresponses[wopisrc] = wopic.jsonify(str(ile2)), http.client.INTERNAL_SERVER_ERROR # attempt to save to local storage to help for later recovery: this is a feature of the core wopiserver content, rc = WB.plugins[appname].savetostorage(wopisrc, openfile['acctok'], False, {'doc': openfile['docid']}, onlyfetch=True) if rc == http.client.OK: - utils.storeForRecovery(content, 'unknown', wopisrc[wopisrc.rfind('/') + 1:], - openfile['acctok'][-20:], ile) + utils.storeForRecovery('unknown', wopisrc[wopisrc.rfind('/') + 1:], + openfile['acctok'][-20:], ile2, content) else: WB.log.error('msg="SaveThread: failed to fetch file for recovery to local storage" ' + 'token="%s" docid="%s" app="%s" response="%s"' % (openfile['acctok'][-20:], openfile['docid'], appname, rc)) - # set some 'fake' metadata, will be automatically cleaned up later + # as above set some 'fake' metadata, will be automatically cleaned up later openfile['lastsave'] = int(time.time()) openfile['tosave'] = False openfile['toclose'] = {'invalid-lock': True} return None - WB.log.info('msg="SaveThread: saving file" token="%s" docid="%s"' % - (openfile['acctok'][-20:], openfile['docid'])) - WB.saveresponses[wopisrc] = WB.plugins[appname].savetostorage( - wopisrc, openfile['acctok'], _intersection(openfile['toclose']), wopilock) + # now save and log + WB.saveresponses[wopisrc] = WB.plugins[appname].savetostorage(wopisrc, openfile['acctok'], + _intersection(openfile['toclose']), wopilock) openfile['lastsave'] = int(time.time()) - openfile['tosave'] = False + if WB.saveresponses[wopisrc][1] == http.client.FAILED_DEPENDENCY: + # this is hopefully transient, yet we need to try until we get the file back to storage: + # the updated lastsave time ensures next retry will happen after the saveinterval time + if 'still-dirty' not in openfile['toclose']: + # add a special key that will prevent close/unlock and refresh lock. If the refresh fails, + # the whole process will be retried at next round + openfile['toclose']['still-dirty'] = False + wopilock = wopic.refreshlock(wopisrc, openfile['acctok'], wopilock, toclose=openfile['toclose']) + WB.log.warning('msg="SaveThread: failed to save, will retry" token="%s" docid="%s" lasterror="%s" tocl="%s"' % + (openfile['acctok'][-20:], openfile['docid'], WB.saveresponses[wopisrc], wopilock['tocl'])) + else: + openfile['tosave'] = False + if 'still-dirty' in openfile['toclose']: # remove the special key above if present + openfile['toclose'].pop('still-dirty') + wopilock = wopic.refreshlock(wopisrc, openfile['acctok'], wopilock, toclose=openfile['toclose']) + WB.log.info('msg="SaveThread: file saved successfully" token="%s" docid="%s" tocl="%s"' % + (openfile['acctok'][-20:], openfile['docid'], wopilock['tocl'])) return wopilock def closewhenidle(self, openfile, wopisrc, wopilock): @@ -384,7 +442,7 @@ def closewhenidle(self, openfile, wopisrc, wopilock): (openfile['lastsave'], openfile['toclose'])) except wopic.InvalidLock: # lock is gone, just cleanup our metadata - WB.log.warning('msg="SaveThread: cleaning up metadata, detected missed close event" url="%s"' % wopisrc) + WB.log.warning(f'msg="SaveThread: cleaning up metadata, detected missed close event" url="{wopisrc}"') del WB.openfiles[wopisrc] return wopilock @@ -397,11 +455,12 @@ def cleanup(self, openfile, wopisrc, wopilock): except wopic.InvalidLock: # nothing to do here, this document may have been closed by another wopibridge if openfile['lastsave'] < time.time() - WB.unlockinterval: - # yet cleanup only after the unlockinterval time, cf. the InvalidLock handling in savedirty() - WB.log.info('msg="SaveThread: cleaning up metadata, file already unlocked" url="%s"' % wopisrc) + # yet clean up only after the unlockinterval time, cf. the InvalidLock handling in savedirty() + WB.log.info(f'msg="SaveThread: cleaning up metadata, file already unlocked" url="{wopisrc}"') try: del WB.openfiles[wopisrc] except KeyError: + # ignore potential races on this item pass return @@ -425,7 +484,7 @@ def cleanup(self, openfile, wopisrc, wopilock): try: wopic.refreshlock(wopisrc, openfile['acctok'], wopilock, toclose=openfile['toclose']) except wopic.InvalidLock: - WB.log.warning('msg="SaveThread: failed to refresh lock, will try again later" url="%s"' % wopisrc) + WB.log.warning(f'msg="SaveThread: failed to refresh lock, will retry" url="{wopisrc}"') @atexit.register diff --git a/src/bridge/codimd.py b/src/bridge/codimd.py index 8b57c803..f6f95009 100644 --- a/src/bridge/codimd.py +++ b/src/bridge/codimd.py @@ -16,7 +16,7 @@ import http.client import requests import bridge.wopiclient as wopic - +import core.wopiutils as utils TOOLARGE = 'File is too large to be edited in CodiMD. Please reduce its size with a regular text editor and try again.' @@ -26,7 +26,6 @@ # initialized by the main class or by the init method appurl = None appexturl = None -apikey = None log = None sslverify = None disablezip = None @@ -40,37 +39,37 @@ def init(_appurl, _appinturl, _apikey): '''Initialize global vars from the environment''' global appurl global appexturl - global apikey appexturl = _appurl appurl = _appinturl - apikey = _apikey try: # CodiMD integrates Prometheus metrics, let's probe if they exist - res = requests.head(appurl + '/metrics/codimd', verify=sslverify) + res = requests.head(appurl + '/metrics/codimd', verify=sslverify, timeout=10) if res.status_code != http.client.OK: - log.error('msg="The provided URL does not seem to be a CodiMD instance" appurl="%s"' % appurl) + log.error(f'msg="The provided URL does not seem to be a CodiMD instance" appurl="{appurl}"') raise AppFailure - log.info('msg="Successfully connected to CodiMD" appurl="%s"' % appurl) - except requests.exceptions.ConnectionError as e: - log.error('msg="Exception raised attempting to connect to CodiMD" exception="%s"' % e) + log.info(f'msg="Successfully connected to CodiMD" appurl="{appurl}"') + except requests.exceptions.RequestException as e: + log.error(f'msg="Exception raised attempting to connect to CodiMD" exception="{e}"') raise AppFailure -def getredirecturl(isreadwrite, wopisrc, acctok, docid, displayname): +def getredirecturl(viewmode, wopisrc, acctok, docid, filename, displayname, revatok): '''Return a valid URL to the app for the given WOPI context''' - if isreadwrite: - return '%s/%s?wopiSrc=%s&accessToken=%s&displayName=%s' % \ - (appexturl, docid, urlparse.quote_plus(wopisrc), acctok, displayname) - - # read-only mode: first check if we have a CodiMD redirection - res = requests.head(appurl + '/' + docid, - verify=sslverify) - if res.status_code == http.client.FOUND: - return '%s/s/%s' % (appexturl, urlparse.urlsplit(res.next.url).path.split('/')[-1]) - # we used to redirect to publish mode or normal view to quickly jump in slide mode depending on the content, - # but this was based on a bad side effect - here it would require to add: - # ('/publish' if not _isslides(content) else '') before the '?' - return '%s/%s/publish' % (appexturl, docid) + if viewmode in (utils.ViewMode.READ_WRITE, utils.ViewMode.PREVIEW): + mode = 'view' if viewmode == utils.ViewMode.PREVIEW else 'both' + params = { + 'wopiSrc': wopisrc, + 'accessToken': acctok, + 'disableEmbedding': ('%s' % (os.path.splitext(filename)[1] != '.zmd')).lower(), + 'displayName': displayname, + 'path': os.path.dirname(filename), + } + if revatok: + params['revaToken'] = revatok + return f'{appexturl}/{docid}?{mode}&{urlparse.urlencode(params)}' + + # read-only mode: use the publish view of CodiMD + return f'{appexturl}/{docid}/publish' # Cloud storage to CodiMD @@ -78,57 +77,60 @@ def getredirecturl(isreadwrite, wopisrc, acctok, docid, displayname): def _unzipattachments(inputbuf): '''Unzip the given input buffer uploading the content to CodiMD and return the contained .md file''' - inputzip = zipfile.ZipFile(io.BytesIO(inputbuf), compression=zipfile.ZIP_STORED) mddoc = None - for zipinfo in inputzip.infolist(): - fname = zipinfo.filename - log.debug('msg="Extracting attachment" name="%s"' % fname) - if os.path.splitext(fname)[1] == '.md': - mddoc = inputzip.read(zipinfo) - else: - # first check if the file already exists in CodiMD: - res = requests.head(appurl + '/uploads/' + fname, verify=sslverify) - if res.status_code == http.client.OK and int(res.headers['Content-Length']) == zipinfo.file_size: - # yes (assume that hashed filename AND size matching is a good enough content match!) - log.debug('msg="Skipped existing attachment" filename="%s"' % fname) - continue - # check for collision - if res.status_code == http.client.OK: - log.warning('msg="Attachment collision detected" filename="%s"' % fname) - # append a random letter to the filename - name, ext = os.path.splitext(fname) - fname = name + '_' + chr(randint(65, 65+26)) + ext - # and replace its reference in the document (this creates a copy of the doc, not very efficient) - mddoc = mddoc.replace(bytes(zipinfo.filename), bytes(fname)) - # OK, let's upload - log.debug('msg="Pushing attachment" filename="%s"' % fname) - res = requests.post(appurl + '/uploadimage', params={'generateFilename': 'false'}, - files={'image': (fname, inputzip.read(zipinfo))}, verify=sslverify) - if res.status_code != http.client.OK: - log.error('msg="Failed to push included file" filename="%s" httpcode="%d"' % (fname, res.status_code)) - if mddoc: - # for backwards compatibility, drop the hardcoded reverse proxy paths if found in the document - mddoc = mddoc.replace(b'/byoa/codimd/', b'/') - return mddoc - - -#def _isslides(doc): -# '''Heuristically look for signatures of slides in the header of a md document''' -# return doc[:9].decode() == '---\ntitle' or doc[:8].decode() == '---\ntype' or doc[:16].decode() == '---\nslideOptions' + try: + inputzip = zipfile.ZipFile(io.BytesIO(inputbuf), compression=zipfile.ZIP_STORED) + for zipinfo in inputzip.infolist(): + fname = zipinfo.filename + log.debug(f'msg="Extracting attachment" name="{fname}"') + if os.path.splitext(fname)[1] == '.md': + mddoc = inputzip.read(zipinfo) + else: + # first check if the file already exists in CodiMD: + res = requests.head(appurl + '/uploads/' + fname, verify=sslverify, timeout=10) + if res.status_code == http.client.OK and int(res.headers['Content-Length']) == zipinfo.file_size: + # yes (assume that hashed filename AND size matching is a good enough content match!) + log.debug(f'msg="Skipped existing attachment" filename="{fname}"') + continue + # check for collision + if res.status_code == http.client.OK: + log.warning(f'msg="Attachment collision detected" filename="{fname}"') + # append a random letter to the filename + name, ext = os.path.splitext(fname) + fname = name + '_' + chr(randint(65, 65+26)) + ext + # and replace its reference in the document (this creates a copy of the doc, not very efficient) + mddoc = mddoc.replace(bytes(zipinfo.filename), bytes(fname)) + # OK, let's upload + log.debug(f'msg="Pushing attachment" filename="{fname}"') + res = requests.post(appurl + '/uploadimage', params={'generateFilename': 'false'}, + files={'image': (fname, inputzip.read(zipinfo))}, verify=sslverify, timeout=10) + if res.status_code != http.client.OK: + log.error('msg="Failed to push included file" filename="%s" httpcode="%d"' % (fname, res.status_code)) + if mddoc: + # for backwards compatibility, drop the hardcoded reverse proxy paths if found in the document + mddoc = mddoc.replace(b'/byoa/codimd/', b'/') + return mddoc + except zipfile.BadZipFile as e: + log.warn(f'msg="File is not in a valid zip format" exception="{e}"') + raise AppFailure('The file is not in the expected zipped format') from e + except requests.exceptions.RequestException as e: + log.error(f'msg="Exception raised attempting to connect to CodiMD" exception="{e}"') + raise AppFailure('Failed to connect to CodiMD') from e def _fetchfromcodimd(wopilock, acctok): '''Fetch a given document from from CodiMD, raise AppFailure in case of errors''' try: - res = requests.get(appurl + ('/' if wopilock['doc'][0] != '/' else '') + wopilock['doc'] + '/download', verify=sslverify) + res = requests.get(appurl + ('/' if wopilock['doc'][0] != '/' else '') + wopilock['doc'] + '/download', + verify=sslverify, timeout=10) if res.status_code != http.client.OK: log.error('msg="Unable to fetch document from CodiMD" token="%s" response="%d: %s"' % - (acctok[-20:], res.status_code, res.content.decode())) + (acctok[-20:], res.status_code, res.content.decode()[:50])) raise AppFailure return res.content - except requests.exceptions.ConnectionError as e: - log.error('msg="Exception raised attempting to connect to CodiMD" exception="%s"' % e) - raise AppFailure + except requests.exceptions.RequestException as e: + log.error(f'msg="Exception raised attempting to connect to CodiMD" exception="{e}"') + raise AppFailure('Failed to connect to CodiMD') from e def loadfromstorage(filemd, wopisrc, acctok, docid): @@ -141,10 +143,14 @@ def loadfromstorage(filemd, wopisrc, acctok, docid): wasbundle = os.path.splitext(filemd['BaseFileName'])[1] == '.zmd' # if it's a bundled file, unzip it and push the attachments in the appropriate folder - if wasbundle: + if wasbundle and mdfile: mddoc = _unzipattachments(mdfile) else: mddoc = mdfile + # if the file was created on Windows, convert \r\n to \n for CodiMD to correctly edit it + if mddoc.find(b'\r\n') >= 0: + mddoc = mddoc.replace(b'\r\n', b'\n') + try: if not docid: # read-only case: push the doc to a newly generated note with a random docid @@ -152,55 +158,60 @@ def loadfromstorage(filemd, wopisrc, acctok, docid): allow_redirects=False, params={'mode': 'locked'}, headers={'Content-Type': 'text/markdown'}, - verify=sslverify) + verify=sslverify, + timeout=10) if res.status_code == http.client.REQUEST_ENTITY_TOO_LARGE: - log.error('msg="File is too large to be edited in CodiMD" token="%s"') + log.error(f'msg="File is too large to be edited in CodiMD" token="{acctok[-20:]}"') raise AppFailure(TOOLARGE) if res.status_code != http.client.FOUND: log.error('msg="Unable to push read-only document to CodiMD" token="%s" response="%d"' % (acctok[-20:], res.status_code)) raise AppFailure - docid = urlparse.urlsplit(res.next.url).path.split('/')[-1] - log.info('msg="Pushed read-only document to CodiMD" docid="%s" token="%s"' % (docid, acctok[-20:])) + docid = urlparse.urlsplit(res.headers['location']).path.split('/')[-1] + log.info(f'msg="Pushed read-only document to CodiMD" docid="{docid}" token="{acctok[-20:]}"') + else: # reserve the given docid in CodiMD via a HEAD request res = requests.head(appurl + '/' + docid, allow_redirects=False, - verify=sslverify) + verify=sslverify, + timeout=10) if res.status_code not in (http.client.OK, http.client.FOUND): log.error('msg="Unable to reserve note hash in CodiMD" token="%s" response="%d"' % (acctok[-20:], res.status_code)) raise AppFailure + # check if the target docid is real or is a redirect if res.status_code == http.client.FOUND: - newdocid = urlparse.urlsplit(res.next.url).path.split('/')[-1] + newdocid = urlparse.urlsplit(res.headers['location']).path.split('/')[-1] log.info('msg="Document got aliased in CodiMD" olddocid="%s" docid="%s" token="%s"' % (docid, newdocid, acctok[-20:])) docid = newdocid - else: - log.debug('msg="Got note hash from CodiMD" docid="%s"' % docid) + # push the document to CodiMD with the update API res = requests.put(appurl + '/api/notes/' + docid, json={'content': mddoc.decode()}, - verify=sslverify) + verify=sslverify, + timeout=10) if res.status_code == http.client.FORBIDDEN: # the file got unlocked because of no activity, yet some user is there: let it go - log.warning('msg="Document was being edited in CodiMD, redirecting user" token"%s"' % acctok[-20:]) + log.warning(f'msg="Document was being edited in CodiMD, redirecting user" token="{acctok[-20:]}"') elif res.status_code == http.client.REQUEST_ENTITY_TOO_LARGE: - log.error('msg="File is too large to be edited in CodiMD" token="%s"') + log.error(f'msg="File is too large to be edited in CodiMD" docid="{docid}" token="{acctok[-20:]}"') raise AppFailure(TOOLARGE) elif res.status_code != http.client.OK: - log.error('msg="Unable to push document to CodiMD" token="%s" response="%d"' % - (acctok[-20:], res.status_code)) + log.error('msg="Unable to push document to CodiMD" docid="%s" token="%s" response="%d"' % + (docid, acctok[-20:], res.status_code)) raise AppFailure - log.info('msg="Pushed document to CodiMD" docid="%s" token="%s"' % (docid, acctok[-20:])) - except requests.exceptions.ConnectionError as e: - log.error('msg="Exception raised attempting to connect to CodiMD" exception="%s"' % e) - raise AppFailure + + log.info(f'msg="Pushed document to CodiMD" docid="{docid}" token="{acctok[-20:]}"') + except requests.exceptions.RequestException as e: + log.error(f'msg="Exception raised attempting to connect to CodiMD" exception="{e}"') + raise AppFailure from e except UnicodeDecodeError as e: - log.warning('msg="Invalid UTF-8 content found in file" exception="%s"' % e) + log.warning(f'msg="Invalid UTF-8 content found in file" exception="{e}"') raise AppFailure('File contains an invalid UTF-8 character, was it corrupted? ' + - 'Please fix it in a regular editor before opening it in CodiMD.') + 'Please fix it in a regular editor before opening it in CodiMD.') from e # generate and return a WOPI lock structure for this document return wopic.generatelock(docid, filemd, mddoc, acctok, False) @@ -213,11 +224,13 @@ def _getattachments(mddoc, docfilename, forcezip=False): zip_buffer = io.BytesIO() response = None for attachment in upload_re.findall(mddoc): - log.debug('msg="Fetching attachment" url="%s"' % attachment) - res = requests.get(appurl + attachment, verify=sslverify) - if res.status_code != http.client.OK: - log.error('msg="Failed to fetch included file, skipping" path="%s" response="%d"' % ( - attachment, res.status_code)) + log.debug(f'msg="Fetching attachment" url="{attachment}"') + try: + res = requests.get(appurl + attachment, verify=sslverify, timeout=10) + if res.status_code != http.client.OK: + raise ValueError(res.status_code) + except (requests.exceptions.RequestException, ValueError) as e: + log.error(f'msg="Failed to fetch included file, skipping" path="{attachment}" type="{type(e)}" error="{e}"') # also notify the user response = wopic.jsonify('Failed to include a referenced picture in the saved file'), http.client.NOT_FOUND continue @@ -236,7 +249,7 @@ def savetostorage(wopisrc, acctok, isclose, wopilock, onlyfetch=False): '''Copy document from CodiMD back to storage''' # get document from CodiMD try: - log.info('msg="Fetching file from CodiMD" isclose="%s" url="%s" token="%s"' % + log.info('msg="Fetching file from CodiMD" isclose="%s" appurl="%s" token="%s"' % (isclose, appurl + wopilock['doc'], acctok[-20:])) mddoc = _fetchfromcodimd(wopilock, acctok) if onlyfetch: @@ -254,8 +267,7 @@ def savetostorage(wopisrc, acctok, isclose, wopilock, onlyfetch=False): wasbundle = os.path.splitext(wopilock['fn'])[1] == '.zmd' bundlefile = attresponse = None if not disablezip or wasbundle: # in disablezip mode, preserve existing .zmd files but don't create new ones - bundlefile, attresponse = _getattachments(mddoc.decode(), wopilock['fn'].replace('.zmd', '.md'), - (wasbundle and not isclose)) + bundlefile, attresponse = _getattachments(mddoc.decode(), wopilock['fn'].replace('.zmd', '.md'), wasbundle) # WOPI PutFile for the file or the bundle if it already existed if (wasbundle ^ (not bundlefile)) or not isclose: diff --git a/src/bridge/etherpad.py b/src/bridge/etherpad.py index b2a8a51f..a792ea91 100644 --- a/src/bridge/etherpad.py +++ b/src/bridge/etherpad.py @@ -13,7 +13,7 @@ import urllib.parse as urlparse import requests import bridge.wopiclient as wopic - +import core.wopiutils as utils # initialized by the main class or by the init method appurl = None @@ -40,54 +40,56 @@ def init(_appurl, _appinturl, _apikey): # create a general group to attach all pads; can raise AppFailure groupid = _apicall('createGroupIfNotExistsFor', {'groupMapper': 1}) groupid = groupid['data']['groupID'] - log.info('msg="Got Etherpad global groupid" groupid="%s"' % groupid) + log.info(f'msg="Got Etherpad global groupid" groupid="{groupid}"') def _apicall(method, params, data=None, acctok=None, raiseonnonzerocode=True): '''Generic method to call the Etherpad REST API''' params['apikey'] = apikey try: - res = requests.post(appurl + '/api/1/' + method, params=params, data=data, verify=sslverify) + res = requests.post(appurl + '/api/1/' + method, params=params, data=data, verify=sslverify, timeout=10) if res.status_code != http.client.OK: log.error('msg="Failed to call Etherpad" method="%s" token="%s" response="%d: %s"' % (method, acctok[-20:] if acctok else 'N/A', res.status_code, res.content.decode())) - raise AppFailure - except requests.exceptions.ConnectionError as e: - log.error('msg="Exception raised attempting to connect to Etherpad" method="%s" exception="%s"' % (method, e)) - raise AppFailure + raise AppFailure('Failed to connect to Etherpad') + except requests.exceptions.RequestException as e: + log.error(f'msg="Exception raised attempting to connect to Etherpad" method="{method}" exception="{e}"') + raise AppFailure('Failed to connect to Etherpad') from e res = res.json() if res['code'] != 0 and raiseonnonzerocode: log.error('msg="Error response from Etherpad" method="%s" token="%s" response="%s"' % (method, acctok[-20:] if acctok else 'N/A', res['message'])) - raise AppFailure + raise AppFailure('Error response from Etherpad') log.debug('msg="Called Etherpad API" method="%s" token="%s" result="%s"' % (method, acctok[-20:] if acctok else 'N/A', res)) return res -def getredirecturl(isreadwrite, wopisrc, acctok, docid, displayname): +def getredirecturl(viewmode, wopisrc, acctok, docid, _filename, displayname, _revatok): '''Return a valid URL to the app for the given WOPI context''' + if viewmode in (utils.ViewMode.READ_ONLY, utils.ViewMode.VIEW_ONLY): + # for read-only mode generate a read-only link + res = _apicall('getReadOnlyID', {'padID': docid}, acctok=acctok) + return appexturl + f"/p/{res['data']['readOnlyID']}?userName={urlparse.quote_plus(displayname)}" + # pass to Etherpad the required metadata for the save webhook try: res = requests.post(appurl + '/setEFSSMetadata', - params={'padID': docid, 'wopiSrc': urlparse.quote_plus(wopisrc), 'accessToken': acctok, - 'apikey': apikey}, - verify=sslverify) + params={'padID': docid, 'wopiSrc': urlparse.quote_plus(wopisrc), + 'accessToken': acctok, 'apikey': apikey}, + verify=sslverify, + timeout=10) if res.status_code != http.client.OK or res.json()['code'] != 0: log.error('msg="Failed to call Etherpad" method="setEFSSMetadata" token="%s" response="%d: %s"' % (acctok[-20:], res.status_code, res.content.decode().replace('"', "'"))) - raise AppFailure - log.debug('msg="Called Etherpad" method="setEFSSMetadata" token="%s"' % acctok[-20:]) - except requests.exceptions.ConnectionError as e: - log.error('msg="Exception raised attempting to connect to Etherpad" method="setEFSSMetadata" exception="%s"' % e) - raise AppFailure + raise AppFailure('Error response from Etherpad') + log.debug(f'msg="Called Etherpad" method="setEFSSMetadata" token="{acctok[-20:]}"') + except requests.exceptions.RequestException as e: + log.error(f'msg="Exception raised attempting to connect to Etherpad" method="setEFSSMetadata" exception="{e}"') + raise AppFailure('Failed to connect to Etherpad') from e - if not isreadwrite: - # for read-only mode generate a read-only link - res = _apicall('getReadOnlyID', {'padID': docid}, acctok=acctok) - return appexturl + '/p/%s?userName=%s' % (res['data']['readOnlyID'], displayname) - # return the URL to the pad - return appexturl + '/p/%s?userName=%s' % (docid, displayname) + # return the URL to the pad for editing (a PREVIEW viewmode is not supported) + return appexturl + f'/p/{docid}?userName={urlparse.quote_plus(displayname)}' # Cloud storage to Etherpad @@ -105,7 +107,7 @@ def loadfromstorage(filemd, wopisrc, acctok, docid): try: if not docid: docid = ''.join([choice(ascii_lowercase) for _ in range(20)]) - log.debug('msg="Generated random padID for read-only document" padid="%s" token="%s"' % (docid, acctok[-20:])) + log.debug(f'msg="Generated random padID for read-only document" padid="{docid}" token="{acctok[-20:]}"') # first drop previous pad if it exists _apicall('deletePad', {'padID': docid}, acctok=acctok, raiseonnonzerocode=False) # create pad with the given docid as name @@ -115,15 +117,16 @@ def loadfromstorage(filemd, wopisrc, acctok, docid): res = requests.post(appurl + '/p/' + docid + '/import', files={'file': (docid + '.etherpad', epfile, 'application/json')}, params={'apikey': apikey}, - verify=sslverify) + verify=sslverify, + timeout=10) if res.status_code != http.client.OK: - log.error('msg="Unable to push document to Etherpad" token="%s" padid="%s" response="%d: %s" content="%s"' % - (acctok[-20:], docid, res.status_code, res.content.decode(), epfile.decode())) - raise AppFailure - log.info('msg="Pushed document to Etherpad" padid="%s" token="%s"' % (docid, acctok[-20:])) - except requests.exceptions.ConnectionError as e: - log.error('msg="Exception raised attempting to connect to Etherpad" method="import" exception="%s"' % e) - raise AppFailure + log.error('msg="Unable to push document to Etherpad" token="%s" padid="%s" response="%d: %s"' % + (acctok[-20:], docid, res.status_code, res.content.decode())) + raise AppFailure('Error response from Etherpad') + log.info(f'msg="Pushed document to Etherpad" padid="{docid}" token="{acctok[-20:]}"') + except requests.exceptions.RequestException as e: + log.error(f'msg="Exception raised attempting to connect to Etherpad" method="import" exception="{e}"') + raise AppFailure('Failed to connect to Etherpad') from e # generate and return a WOPI lock structure for this document return wopic.generatelock(docid, filemd, epfile, acctok, False) @@ -136,14 +139,15 @@ def _fetchfrometherpad(wopilock, acctok): try: # this operation does not use the API (and it is NOT protected by the API key!), so we use a plain GET res = requests.get(appurl + '/p' + wopilock['doc'] + '/export/etherpad', - verify=sslverify) + verify=sslverify, + timeout=10) if res.status_code != http.client.OK: log.error('msg="Unable to fetch document from Etherpad" token="%s" response="%d: %s"' % - (acctok[-20:], res.status_code, res.content.decode())) + (acctok[-20:], res.status_code, res.content.decode()[:50])) raise AppFailure return res.content - except requests.exceptions.ConnectionError as e: - log.error('msg="Exception raised attempting to connect to Etherpad" exception="%s"' % e) + except requests.exceptions.RequestException as e: + log.error(f'msg="Exception raised attempting to connect to Etherpad" exception="{e}"') raise AppFailure diff --git a/src/bridge/readme.md b/src/bridge/readme.md index 15733148..4f1ede77 100644 --- a/src/bridge/readme.md +++ b/src/bridge/readme.md @@ -19,10 +19,9 @@ The module implements a stateless server, as all context information is stored i ### CodiMD specifics * Support for readonly (publish or slide) mode vs. read/write mode -* Transparent handling of uploads (i.e. pictures): - * If a note has no pictures, it is handled as a `.md` text file - * Once a picture is included, on close the save to WOPI is executed as a zipped bundle, with a `.zmd` extension, and the previous `.md` file is removed; similarly if all pictures are removed and the file is saved back as `.md` - * Files ending as `.zmd` are equally treated as zipped bundles and expanded to CodiMD +* Inclusion of pictures supported according to file extension: + * For plain `.md` files, directly incorporating pictures is disabled, but a cloud file-picker is enabled to allow incorporating links to external pictures + * If a file is created as `.zmd` (for _zipped markdown_), it is possible to include pictures and the save to WOPI is executed as a zipped bundle, including all pictures (if any) in the bundle. On load, `.zmd` files are transparently expanded to CodiMD #### Required CodiMD APIs * `/new` push a new file to a random `` @@ -36,5 +35,7 @@ The module implements a stateless server, as all context information is stored i ### Etherpad specifics + +This is still work in progress as the etherpad plugin is incomplete. * Support for readonly and read/write files * Automatic save via dedicated `ep_sciencemesh` plugin diff --git a/src/bridge/wopiclient.py b/src/bridge/wopiclient.py index 2f773514..d418958a 100644 --- a/src/bridge/wopiclient.py +++ b/src/bridge/wopiclient.py @@ -38,12 +38,12 @@ def request(wopisrc, acctok, method, contents=None, headers=None): log.debug('msg="Calling WOPI" url="%s" headers="%s" acctok="%s" ssl="%s"' % (wopiurl, headers, acctok[-20:], sslverify)) if method == 'GET': - return requests.get('%s?access_token=%s' % (wopiurl, acctok), verify=sslverify) + return requests.get(f'{wopiurl}?access_token={acctok}', verify=sslverify, timeout=10) if method == 'POST': - return requests.post('%s?access_token=%s' % (wopiurl, acctok), verify=sslverify, - headers=headers, data=contents) - except (requests.exceptions.ConnectionError, IOError) as e: - log.error('msg="Unable to contact WOPI" wopiurl="%s" acctok="%s" response="%s"' % (wopiurl, acctok, e)) + return requests.post(f'{wopiurl}?access_token={acctok}', verify=sslverify, + headers=headers, data=contents, timeout=10) + except (requests.exceptions.RequestException, IOError) as e: + log.error(f'msg="Unable to contact WOPI" wopiurl="{wopiurl}" acctok="{acctok}" response="{e}"') res = Response() res.status_code = http.client.INTERNAL_SERVER_ERROR return res @@ -74,7 +74,7 @@ def checkfornochanges(content, wopilock, acctokforlog): h = hashlib.sha1() h.update(content) if h.hexdigest() == wopilock['dig']: - log.info('msg="File unchanged, skipping save" token="%s"' % acctokforlog[-20:]) + log.info(f'msg="File unchanged, skipping save" token="{acctokforlog[-20:]}"') return True return False @@ -85,16 +85,16 @@ def getlock(wopisrc, acctok): res = request(wopisrc, acctok, 'POST', headers={'X-Wopi-Override': 'GET_LOCK'}) if res.status_code != http.client.OK: # lock got lost or any other error - raise InvalidLock(res.content.decode()) + raise InvalidLock(res.status_code) # the lock is expected to be a JSON dict, see generatelock() return json.loads(res.headers['X-WOPI-Lock']) except (ValueError, KeyError, json.decoder.JSONDecodeError) as e: - log.warning('msg="Missing or malformed WOPI lock" exception="%s" error="%s"' % (type(e), e)) - raise InvalidLock(e) + log.warning(f'msg="Missing or malformed WOPI lock" exception="{type(e)}: {e}"') + raise InvalidLock(e) from e -def _getheadersforrefreshlock(acctok, wopilock, digest, toclose): - '''Helper function for refreshlock to generate the old and new lock structures''' +def _getheadersforrelock(acctok, wopilock, digest, toclose): + '''Helper function for relock to generate the old and new lock structures''' newlock = json.loads(json.dumps(wopilock)) # this is a hack for a deep copy if toclose: # we got the full 'toclose' dict, push it as is @@ -105,7 +105,7 @@ def _getheadersforrefreshlock(acctok, wopilock, digest, toclose): if digest and wopilock['dig'] != digest: newlock['dig'] = digest return { - 'X-Wopi-Override': 'REFRESH_LOCK', + 'X-Wopi-Override': 'LOCK', 'X-WOPI-OldLock': json.dumps(wopilock), 'X-WOPI-Lock': json.dumps(newlock) }, newlock @@ -113,19 +113,19 @@ def _getheadersforrefreshlock(acctok, wopilock, digest, toclose): def refreshlock(wopisrc, acctok, wopilock, digest=None, toclose=None): '''Refresh an existing WOPI lock. Returns the new lock if successful, None otherwise''' - h, newlock = _getheadersforrefreshlock(acctok, wopilock, digest, toclose) + h, newlock = _getheadersforrelock(acctok, wopilock, digest, toclose) res = request(wopisrc, acctok, 'POST', headers=h) if res.status_code == http.client.OK: return newlock if res.status_code == http.client.CONFLICT: # we have a race condition, another thread has updated the lock before us - log.warning('msg="Got conflict in refreshing lock, retrying" url="%s"' % wopisrc) + log.warning(f'msg="Got conflict in refreshing lock, retrying" url="{wopisrc}"') try: currlock = json.loads(res.headers['X-WOPI-Lock']) except json.decoder.JSONDecodeError as e: log.error('msg="Got unresolvable conflict in RefreshLock" url="%s" previouslock="%s" error="%s"' % (wopisrc, res.headers.get('X-WOPI-Lock'), e)) - raise InvalidLock('Found existing malformed lock on refreshlock') + raise InvalidLock('Found existing malformed lock on refreshlock') from e if toclose: # merge toclose token lists for t in currlock['tocl']: @@ -133,7 +133,7 @@ def refreshlock(wopisrc, acctok, wopilock, digest=None, toclose=None): if digest: wopilock['dig'] = currlock['dig'] # retry with the newly got lock - h, newlock = _getheadersforrefreshlock(acctok, wopilock, digest, toclose) + h, newlock = _getheadersforrelock(acctok, wopilock, digest, toclose) res = request(wopisrc, acctok, 'POST', headers=h) if res.status_code == http.client.OK: return newlock @@ -152,7 +152,7 @@ def refreshdigestandlock(wopisrc, acctok, wopilock, content): dig = h.hexdigest() try: wopilock = refreshlock(wopisrc, acctok, wopilock, digest=dig) - log.info('msg="Save completed" filename="%s" dig="%s" token="%s"' % (wopilock['fn'], dig, acctok[-20:])) + log.info(f"msg=\"Save completed\" filename=\"{wopilock['fn']}\" dig=\"{dig}\" token=\"{acctok[-20:]}\"") return jsonify('File saved successfully'), http.client.OK except InvalidLock: return jsonify('File saved, but failed to refresh lock'), http.client.INTERNAL_SERVER_ERROR @@ -163,8 +163,8 @@ def relock(wopisrc, acctok, docid, isclose): # first get again the file metadata res = request(wopisrc, acctok, 'GET') if res.status_code != http.client.OK: - log.warning('msg="Session expired or file renamed when attempting to relock it" response="%d" token="%s"' % - (res.status_code, acctok[-20:])) + log.warning('msg="Session expired or file renamed when attempting to relock it" response="%d" docid="%s" token="%s"' % + (res.status_code, docid, acctok[-20:])) raise InvalidLock('Session expired, please refresh this page') filemd = res.json() @@ -192,13 +192,18 @@ def relock(wopisrc, acctok, docid, isclose): def handleputfile(wopicall, wopisrc, res): '''Deal with conflicts or errors following a PutFile/PutRelative request''' if res.status_code == http.client.CONFLICT: + # this is typically a user issue, return 500 and stop further editing log.warning('msg="Conflict when calling WOPI %s" url="%s" reason="%s"' % (wopicall, wopisrc, res.headers.get('X-WOPI-LockFailureReason'))) return jsonify('Error saving the file. %s' % res.headers.get('X-WOPI-LockFailureReason')), http.client.INTERNAL_SERVER_ERROR + if res.status_code == http.client.INTERNAL_SERVER_ERROR: + # hopefully this is transient and the server has kept a local copy for later recovery + log.error(f'msg="Calling WOPI {wopicall} failed, will retry" url="{wopisrc}" response="{res.status_code}"') + return jsonify('Error saving the file, will try again'), http.client.FAILED_DEPENDENCY if res.status_code != http.client.OK: - # hopefully the server has kept a local copy for later recovery - log.error('msg="Calling WOPI %s failed" url="%s" response="%s"' % (wopicall, wopisrc, res.status_code)) + # any other error is considered also fatal + log.error(f'msg="Calling WOPI {wopicall} failed" url="{wopisrc}" response="{res.status_code}"') return jsonify('Error saving the file, please contact support'), http.client.INTERNAL_SERVER_ERROR return None @@ -230,7 +235,7 @@ def saveas(wopisrc, acctok, wopilock, targetname, content): log.warning('msg="Failed to delete the previous file" token="%s" response="%d"' % (acctok[-20:], res.status_code)) else: - log.info('msg="Previous file unlocked and removed successfully" token="%s"' % acctok[-20:]) + log.info(f'msg="Previous file unlocked and removed successfully" token="{acctok[-20:]}"') - log.info('msg="Final save completed" filename="%s" token="%s"' % (newname, acctok[-20:])) + log.info(f'msg="Final save completed" filename="{newname}" token="{acctok[-20:]}"') return jsonify('File saved successfully'), http.client.OK diff --git a/src/core/commoniface.py b/src/core/commoniface.py index 86fc8158..b2b6d571 100644 --- a/src/core/commoniface.py +++ b/src/core/commoniface.py @@ -17,7 +17,8 @@ ENOENT_MSG = 'No such file or directory' # standard error thrown when attempting to overwrite a file/xattr in O_EXCL mode -EXCL_ERROR = 'File exists and islock flag requested' +# or when a lock operation cannot be performed because of failed preconditions +EXCL_ERROR = 'File/xattr exists but EXCL mode requested, lock mismatch or lock expired' # standard error thrown when attempting an operation without the required access rights ACCESS_ERROR = 'Operation not permitted' @@ -57,11 +58,11 @@ def genrevalock(appname, value): { "lock_id": value, "type": 2, # LOCK_TYPE_WRITE - "app_name": appname if appname else "wopi", + "app_name": appname, "user": {}, "expiration": { "seconds": int(time.time()) - + config.getint("general", "wopilockexpiration") + + config.getint('general', 'wopilockexpiration') }, } ).encode() @@ -72,24 +73,18 @@ def retrieverevalock(rawlock): '''Restores the JSON payload from a base64-encoded Reva lock''' try: return json.loads(urlsafe_b64decode(rawlock + '==').decode()) - except (B64Error, json.JSONDecodeError) as e: + except (B64Error, json.JSONDecodeError, UnicodeDecodeError) as e: raise IOError("Unable to parse existing lock: " + str(e)) def encodeinode(endpoint, inode): - '''Encodes a given endpoint and inode to be used as a safe WOPISrc: endpoint is assumed to already be URL safe''' - return endpoint + '-' + urlsafe_b64encode(inode.encode()).decode() - - -def validatelock(filepath, appname, oldlock, op, log): - '''Common logic for validating locks in the xrootd and local storage interfaces. - Duplicates some logic implemented in Reva for the cs3 storage interface''' - if not oldlock: - log.warning('msg="Failed to %s" filepath="%s" appname="%s" reason="%s"' % - (op, filepath, appname, 'File was not locked or lock had expired')) - raise IOError('File was not locked or lock had expired') - if oldlock['app_name'] != 'wopi' and appname != 'wopi' and oldlock['app_name'] and appname \ - and oldlock['app_name'] != appname: - log.warning('msg="Failed to %s" filepath="%s" appname="%s" reason="%s"' % - (op, filepath, appname, 'File is locked by %s' % oldlock['app_name'])) - raise IOError('File is locked by %s' % oldlock['app_name']) + '''Encodes a given endpoint and inode to be used as a safe WOPISrc: endpoint is assumed to already be URL safe. + Note that the separator is chosen to be `!` (similar to how the web frontend is implemented) to allow the inverse + operation, assuming that `endpoint` does not contain any `!` characters.''' + return endpoint + '!' + urlsafe_b64encode(inode.encode()).decode() + + +def decodeinode(inode): + '''Decodes an inode obtained from encodeinode()''' + e, f = inode.split('!') + return e, urlsafe_b64decode(f.encode()).decode() diff --git a/src/core/cs3iface.py b/src/core/cs3iface.py index 57d0b27b..0860b7ee 100644 --- a/src/core/cs3iface.py +++ b/src/core/cs3iface.py @@ -14,13 +14,17 @@ import cs3.storage.provider.v1beta1.resources_pb2 as cs3spr import cs3.storage.provider.v1beta1.provider_api_pb2 as cs3sp -import cs3.gateway.v1beta1.gateway_api_pb2_grpc as cs3gw_grpc +import cs3.auth.registry.v1beta1.registry_api_pb2 as cs3auth import cs3.gateway.v1beta1.gateway_api_pb2 as cs3gw +import cs3.gateway.v1beta1.gateway_api_pb2_grpc as cs3gw_grpc import cs3.rpc.v1beta1.code_pb2 as cs3code import cs3.types.v1beta1.types_pb2 as types import core.commoniface as common +# key used if the `lockasattr` option is true, in order to store the lock payload without ensuring any lock semantic +LOCK_ATTR_KEY = 'wopi.advlock' + # module-wide state ctx = {} # "map" to store some module context: cf. init() log = None @@ -32,22 +36,40 @@ def init(inconfig, inlog): log = inlog ctx['chunksize'] = inconfig.getint('io', 'chunksize') ctx['ssl_verify'] = inconfig.getboolean('cs3', 'sslverify', fallback=True) - ctx['authtokenvalidity'] = inconfig.getint('cs3', 'authtokenvalidity') ctx['lockexpiration'] = inconfig.getint('general', 'wopilockexpiration') - if inconfig.has_option('cs3', 'revagateway'): - revagateway = inconfig.get('cs3', 'revagateway') - else: - # legacy entry, to be dropped at next major release - revagateway = inconfig.get('cs3', 'revahost') - # prepare the gRPC connection - ch = grpc.insecure_channel(revagateway) + ctx['lockasattr'] = inconfig.getboolean('cs3', 'lockasattr', fallback=False) + ctx['locknotimpl'] = False + ctx['revagateway'] = inconfig.get('cs3', 'revagateway') + ctx['xattrcache'] = {} # this is a map cs3ref -> arbitrary_metadata as returned by Stat() + ctx['grpc_timeout'] = inconfig.getint('cs3', "grpctimeout", fallback=10) + ctx['http_timeout'] = inconfig.getint('cs3', "httptimeout", fallback=10) + # prepare the gRPC channel and validate that the revagateway gRPC server is ready + try: + ch = grpc.insecure_channel(ctx['revagateway']) + grpc.channel_ready_future(ch).result(timeout=ctx['grpc_timeout']) + except grpc.FutureTimeoutError as e: + log.error('msg="Failed to connect to Reva via GRPC" error="%s"' % e) + raise IOError(e) from e ctx['cs3gw'] = cs3gw_grpc.GatewayAPIStub(ch) -def getuseridfromcreds(token, _wopiuser): +def healthcheck(): + '''Probes the storage and returns a status message. For cs3 storage, we execute a call to ListAuthProviders''' + try: + res = ctx['cs3gw'].ListAuthProviders(request=cs3auth.ListAuthProvidersRequest()) + log.debug('msg="Executed ListAuthProviders as health check" endpoint="%s" result="%s"' % + (ctx['revagateway'], res.status)) + return 'OK' + except grpc.RpcError as e: + log.error('msg="Health check: failed to call ListAuthProviders" endpoint="%s" error="%s"' % + (ctx['revagateway'], e)) + return str(e) + + +def getuseridfromcreds(token, wopiuser): '''Maps a Reva token and wopiuser to the credentials to be used to access the storage. - For the CS3 API case, this is just the token''' - return token + For the CS3 API case this is the token, and wopiuser is expected to be `username!userid_as_returned_by_stat`''' + return token, wopiuser.split('@')[0] + '!' + wopiuser def _getcs3reference(endpoint, fileref): @@ -60,7 +82,10 @@ def _getcs3reference(endpoint, fileref): if len(parts) == 2: space_id = parts[1] - if fileref.find('/') > 0: + if fileref.find('/') == 0: + # assume we have an absolute path (works in Reva master, not in edge) + ref = cs3spr.Reference(path=fileref) + elif fileref.find('/') > 0: # assume we have a relative path in the form `/`, # also works if we get `//` ref = cs3spr.Reference(resource_id=cs3spr.ResourceId(storage_id=endpoint, space_id=space_id, @@ -72,11 +97,16 @@ def _getcs3reference(endpoint, fileref): return ref +def _hashedref(endpoint, fileref): + '''Returns an hashable key for the given endpoint and file reference''' + return str(endpoint) + str(fileref) + + def authenticate_for_test(userid, userpwd): '''Use basic authentication against Reva for testing purposes''' authReq = cs3gw.AuthenticateRequest(type='basic', client_id=userid, client_secret=userpwd) authRes = ctx['cs3gw'].Authenticate(authReq) - log.debug('msg="Authenticated user" res="%s"' % authRes) + log.debug(f'msg="Authenticated user" userid="{authRes.user.id}"') if authRes.status.code != cs3code.CODE_OK: raise IOError('Failed to authenticate as user ' + userid + ': ' + authRes.status.message) return authRes.token @@ -90,25 +120,35 @@ def stat(endpoint, fileref, userid, versioninv=1): ref = _getcs3reference(endpoint, fileref) statInfo = ctx['cs3gw'].Stat(request=cs3sp.StatRequest(ref=ref), metadata=[('x-access-token', userid)]) tend = time.time() - if statInfo.status.code != cs3code.CODE_OK: - log.info('msg="Failed stat" fileref="%s" trace="%s" reason="%s"' % - (fileref, statInfo.status.trace, statInfo.status.message.replace('"', "'"))) - raise IOError(common.ENOENT_MSG if statInfo.status.code == cs3code.CODE_NOT_FOUND else statInfo.status.message) + if statInfo.status.code == cs3code.CODE_NOT_FOUND: + log.info(f'msg="File not found" endpoint="{endpoint}" fileref="{fileref}" trace="{statInfo.status.trace}"') + raise IOError(common.ENOENT_MSG) + if statInfo.status.code != cs3code.CODE_OK: + log.error('msg="Failed stat" endpoint="%s" fileref="%s" trace="%s" reason="%s"' % + (endpoint, fileref, statInfo.status.trace, statInfo.status.message.replace('"', "'"))) + raise IOError(statInfo.status.message) if statInfo.info.type == cs3spr.RESOURCE_TYPE_CONTAINER: - log.info('msg="Invoked stat" fileref="%s" trace="%s" result="ISDIR"' % (fileref, statInfo.status.trace)) + log.info('msg="Invoked stat" endpoint="%s" fileref="%s" trace="%s" result="ISDIR"' % + (endpoint, fileref, statInfo.status.trace)) raise IOError('Is a directory') - if statInfo.info.type not in (cs3spr.RESOURCE_TYPE_FILE, cs3spr.RESOURCE_TYPE_SYMLINK): - log.warning('msg="Invoked stat" fileref="%s" unexpectedtype="%d"' % (fileref, statInfo.info.type)) + log.warning('msg="Invoked stat" endpoint="%s" fileref="%s" unexpectedtype="%d"' % + (endpoint, fileref, statInfo.info.type)) raise IOError('Unexpected type %d' % statInfo.info.type) inode = common.encodeinode(statInfo.info.id.storage_id, statInfo.info.id.opaque_id) - # here we build an hybrid path that can be used to reference the file, as the path is actually just the basename - # (and eventually the CS3 APIs should be updated to reflect that): note that as per specs the parent_id MUST be available - filepath = statInfo.info.parent_id.opaque_id + '/' + os.path.basename(statInfo.info.path) + if statInfo.info.path[0] == '/': + # we got an absolute path from Reva, use it + filepath = statInfo.info.path + else: + # we got a relative path (actually, just the basename): build an hybrid path that can be used to reference + # the file, using the parent_id that per specs MUST be available + filepath = statInfo.info.parent_id.opaque_id + '/' + os.path.basename(statInfo.info.path) log.info('msg="Invoked stat" fileref="%s" trace="%s" inode="%s" filepath="%s" elapsedTimems="%.1f"' % (fileref, statInfo.status.trace, inode, filepath, (tend-tstart)*1000)) + # cache the xattrs map prior to returning; note we're never cleaning this cache and let it grow indefinitely + ctx['xattrcache'][_hashedref(endpoint, fileref)] = statInfo.info.arbitrary_metadata.metadata return { 'inode': inode, 'filepath': filepath, @@ -124,88 +164,154 @@ def statx(endpoint, fileref, userid, versioninv=1): return stat(endpoint, fileref, userid, versioninv) -def setxattr(endpoint, filepath, userid, key, value, lockid): +def setxattr(endpoint, filepath, userid, key, value, lockmd): '''Set the extended attribute to using the given userid as access token''' - reference = _getcs3reference(endpoint, filepath) + ref = _getcs3reference(endpoint, filepath) md = cs3spr.ArbitraryMetadata() md.metadata.update({key: str(value)}) # pylint: disable=no-member - req = cs3sp.SetArbitraryMetadataRequest(ref=reference, arbitrary_metadata=md, lock_id=lockid) + try: + ctx['xattrcache'][_hashedref(endpoint, filepath)][key] = str(value) + except KeyError: + # we did not have this file in the cache, ignore + pass + lockid = None + if lockmd: + _, lockid = lockmd + req = cs3sp.SetArbitraryMetadataRequest(ref=ref, arbitrary_metadata=md, lock_id=lockid) res = ctx['cs3gw'].SetArbitraryMetadata(request=req, metadata=[('x-access-token', userid)]) + if res.status.code in [cs3code.CODE_FAILED_PRECONDITION, cs3code.CODE_ABORTED]: + # CS3 storages may refuse to set an xattr in case of lock mismatch: this is an overprotection, + # as the lock should concern the file's content, not its metadata, however we need to handle that + log.info('msg="Failed precondition on setxattr" filepath="%s" key="%s" trace="%s" reason="%s"' % + (filepath, key, res.status.trace, res.status.message.replace('"', "'"))) + raise IOError(common.EXCL_ERROR) if res.status.code != cs3code.CODE_OK: log.error('msg="Failed to setxattr" filepath="%s" key="%s" trace="%s" code="%s" reason="%s"' % (filepath, key, res.status.trace, res.status.code, res.status.message.replace('"', "'"))) raise IOError(res.status.message) - log.debug('msg="Invoked setxattr" result="%s"' % res) + log.debug(f'msg="Invoked setxattr" result="{res}"') def getxattr(endpoint, filepath, userid, key): '''Get the extended attribute using the given userid as access token''' - tstart = time.time() - reference = _getcs3reference(endpoint, filepath) - statInfo = ctx['cs3gw'].Stat(request=cs3sp.StatRequest(ref=reference), metadata=[('x-access-token', userid)]) - tend = time.time() - if statInfo.status.code == cs3code.CODE_NOT_FOUND: - log.debug('msg="Invoked stat for getxattr on missing file" filepath="%s"' % filepath) - return None - if statInfo.status.code != cs3code.CODE_OK: - log.error('msg="Failed to stat" filepath="%s" trace="%s" key="%s" reason="%s"' % - (filepath, statInfo.status.trace, key, statInfo.status.message.replace('"', "'"))) - raise IOError(statInfo.status.message) + ref = _getcs3reference(endpoint, filepath) + statInfo = None + href = _hashedref(endpoint, filepath) + if href not in ctx['xattrcache']: + # cache miss, go for Stat and refresh cache + tstart = time.time() + statInfo = ctx['cs3gw'].Stat(request=cs3sp.StatRequest(ref=ref), metadata=[('x-access-token', userid)]) + tend = time.time() + if statInfo.status.code == cs3code.CODE_NOT_FOUND: + log.debug(f'msg="Invoked stat for getxattr on missing file" filepath="{filepath}"') + return None + if statInfo.status.code != cs3code.CODE_OK: + log.error('msg="Failed to stat" filepath="%s" userid="%s" trace="%s" key="%s" reason="%s"' % + (filepath, userid[-20:], statInfo.status.trace, key, statInfo.status.message.replace('"', "'"))) + raise IOError(statInfo.status.message) + log.debug(f'msg="Invoked stat for getxattr" filepath="{filepath}" elapsedTimems="{(tend - tstart) * 1000:.1f}"') + ctx['xattrcache'][href] = statInfo.info.arbitrary_metadata.metadata try: - xattrvalue = statInfo.info.arbitrary_metadata.metadata[key] + xattrvalue = ctx['xattrcache'][href][key] if xattrvalue == '': raise KeyError - log.debug('msg="Invoked stat for getxattr" filepath="%s" elapsedTimems="%.1f"' % (filepath, (tend - tstart) * 1000)) + if not statInfo: + log.debug(f'msg="Returning cached attr on getxattr" filepath="{filepath}" key="{key}"') return xattrvalue except KeyError: - log.warning('msg="Empty value or key not found in getxattr" filepath="%s" key="%s" trace="%s" metadata="%s"' % - (filepath, key, statInfo.status.trace, statInfo.info.arbitrary_metadata.metadata)) + log.info('msg="Empty value or key not found in getxattr" filepath="%s" key="%s" trace="%s" metadata="%s"' % + (filepath, key, statInfo.status.trace if statInfo else 'N/A', ctx['xattrcache'][href])) return None -def rmxattr(endpoint, filepath, userid, key, lockid): +def rmxattr(endpoint, filepath, userid, key, lockmd): '''Remove the extended attribute using the given userid as access token''' - reference = _getcs3reference(endpoint, filepath) - req = cs3sp.UnsetArbitraryMetadataRequest(ref=reference, arbitrary_metadata_keys=[key], lock_id=lockid) + ref = _getcs3reference(endpoint, filepath) + lockid = None + if lockmd: + _, lockid = lockmd + req = cs3sp.UnsetArbitraryMetadataRequest(ref=ref, arbitrary_metadata_keys=[key], lock_id=lockid) res = ctx['cs3gw'].UnsetArbitraryMetadata(request=req, metadata=[('x-access-token', userid)]) + if res.status.code in [cs3code.CODE_FAILED_PRECONDITION, cs3code.CODE_ABORTED]: + log.info('msg="Failed precondition on rmxattr" filepath="%s" key="%s" trace="%s" reason="%s"' % + (filepath, key, res.status.trace, res.status.message.replace('"', "'"))) + raise IOError(common.EXCL_ERROR) if res.status.code != cs3code.CODE_OK: log.error('msg="Failed to rmxattr" filepath="%s" trace="%s" key="%s" reason="%s"' % (filepath, key, res.status.trace, res.status.message.replace('"', "'"))) raise IOError(res.status.message) - log.debug('msg="Invoked rmxattr" result="%s"' % res.status) + try: + del ctx['xattrcache'][_hashedref(endpoint, filepath)][key] + except KeyError: + # we did not have this file in the cache, ignore + pass + log.debug(f'msg="Invoked rmxattr" result="{res.status}"') def setlock(endpoint, filepath, userid, appname, value): '''Set a lock to filepath with the given value metadata and appname as holder''' + if ctx['lockasattr'] and ctx['locknotimpl']: + log.debug(f'msg="Using xattrs to execute setlock" filepath="{filepath}" value="{value}"') + try: + currvalue = getxattr(endpoint, filepath, userid, LOCK_ATTR_KEY) + log.info('msg="Invoked setlock on an already locked entity" filepath="%s" appname="%s" previouslock="%s"' % + (filepath, appname, currvalue)) + raise IOError(common.EXCL_ERROR) + except KeyError: + expiration = int(time.time() + ctx['lockexpiration']) + setxattr(endpoint, filepath, userid, LOCK_ATTR_KEY, f'{appname}!{value}!{expiration}', None) + return + reference = _getcs3reference(endpoint, filepath) lock = cs3spr.Lock(type=cs3spr.LOCK_TYPE_WRITE, app_name=appname, lock_id=value, expiration={'seconds': int(time.time() + ctx['lockexpiration'])}) req = cs3sp.SetLockRequest(ref=reference, lock=lock) res = ctx['cs3gw'].SetLock(request=req, metadata=[('x-access-token', userid)]) - if res.status.code == cs3code.CODE_FAILED_PRECONDITION: + if res.status.code in [cs3code.CODE_FAILED_PRECONDITION, cs3code.CODE_ABORTED]: log.info('msg="Invoked setlock on an already locked entity" filepath="%s" appname="%s" trace="%s" reason="%s"' % (filepath, appname, res.status.trace, res.status.message.replace('"', "'"))) raise IOError(common.EXCL_ERROR) + if res.status.code == cs3code.CODE_UNIMPLEMENTED and ctx['lockasattr']: + ctx['locknotimpl'] = True + setlock(endpoint, filepath, userid, appname, value) + return if res.status.code != cs3code.CODE_OK: log.error('msg="Failed to setlock" filepath="%s" appname="%s" value="%s" trace="%s" code="%s" reason="%s"' % (filepath, appname, value, res.status.trace, res.status.code, res.status.message.replace('"', "'"))) raise IOError(res.status.message) - log.debug('msg="Invoked setlock" filepath="%s" value="%s" result="%s"' % (filepath, value, res.status)) + log.debug(f'msg="Invoked setlock" filepath="{filepath}" value="{value}" result="{res.status}"') def getlock(endpoint, filepath, userid): '''Get the lock metadata for the given filepath''' + if ctx['lockasattr'] and ctx['locknotimpl']: + log.debug(f'msg="Using xattrs to execute getlock" filepath="{filepath}"') + try: + currvalue = getxattr(endpoint, filepath, userid, LOCK_ATTR_KEY) + return { + 'lock_id': currvalue.split('!')[1], + 'type': 2, # LOCK_TYPE_WRITE, though this is advisory! + 'app_name': currvalue.split('!')[0], + 'user': {}, + 'expiration': int(currvalue.split('!')[2]) + } + except KeyError: + return None + reference = _getcs3reference(endpoint, filepath) req = cs3sp.GetLockRequest(ref=reference) res = ctx['cs3gw'].GetLock(request=req, metadata=[('x-access-token', userid)]) if res.status.code == cs3code.CODE_NOT_FOUND: - log.debug('msg="Invoked getlock on unlocked or missing file" filepath="%s"' % filepath) + log.debug(f'msg="Invoked getlock on unlocked or missing file" filepath="{filepath}"') return None + if res.status.code == cs3code.CODE_UNIMPLEMENTED and ctx['lockasattr']: + ctx['locknotimpl'] = True + return getlock(endpoint, filepath, userid) if res.status.code != cs3code.CODE_OK: log.error('msg="Failed to getlock" filepath="%s" trace="%s" code="%s" reason="%s"' % (filepath, res.status.trace, res.status.code, res.status.message.replace('"', "'"))) raise IOError(res.status.message) - log.debug('msg="Invoked getlock" filepath="%s" result="%s"' % (filepath, res.lock)) + log.debug(f'msg="Invoked getlock" filepath="{filepath}" result="{res.lock}"') # rebuild a dict corresponding to the internal JSON structure used by Reva, cf. commoniface.py return { 'lock_id': res.lock.lock_id, @@ -222,35 +328,78 @@ def getlock(endpoint, filepath, userid): } -def refreshlock(endpoint, filepath, userid, appname, value): +def refreshlock(endpoint, filepath, userid, appname, value, oldvalue=None): '''Refresh the lock metadata for the given filepath''' + if ctx['lockasattr'] and ctx['locknotimpl']: + log.debug(f'msg="Using xattrs to execute setlock" filepath="{filepath}" value="{value}"') + try: + currvalue = getxattr(endpoint, filepath, userid, LOCK_ATTR_KEY) + if currvalue.split('!')[0] == appname and (not oldvalue or currvalue.split('!')[1] == oldvalue): + raise KeyError + log.info('msg="Failed precondition on refreshlock" filepath="%s" appname="%s" previouslock="%s"' % + (filepath, appname, currvalue)) + raise IOError(common.EXCL_ERROR) + except KeyError: + expiration = int(time.time() + ctx['lockexpiration']) + setxattr(endpoint, filepath, userid, LOCK_ATTR_KEY, f'{appname}!{value}!{expiration}', None) + return + reference = _getcs3reference(endpoint, filepath) lock = cs3spr.Lock(type=cs3spr.LOCK_TYPE_WRITE, app_name=appname, lock_id=value, expiration={'seconds': int(time.time() + ctx['lockexpiration'])}) - req = cs3sp.RefreshLockRequest(ref=reference, lock=lock) + req = cs3sp.RefreshLockRequest(ref=reference, lock=lock, existing_lock_id=oldvalue) res = ctx['cs3gw'].RefreshLock(request=req, metadata=[('x-access-token', userid)]) + if res.status.code in [cs3code.CODE_FAILED_PRECONDITION, cs3code.CODE_ABORTED]: + log.info('msg="Failed precondition on refreshlock" filepath="%s" appname="%s" trace="%s" reason="%s"' % + (filepath, appname, res.status.trace, res.status.message.replace('"', "'"))) + raise IOError(common.EXCL_ERROR) + if res.status.code == cs3code.CODE_UNIMPLEMENTED and ctx['lockasattr']: + ctx['locknotimpl'] = True + refreshlock(endpoint, filepath, userid, appname, value, oldvalue) + return if res.status.code != cs3code.CODE_OK: log.warning('msg="Failed to refreshlock" filepath="%s" appname="%s" value="%s" trace="%s" code="%s" reason="%s"' % (filepath, appname, value, res.status.trace, res.status.code, res.status.message.replace('"', "'"))) raise IOError(res.status.message) - log.debug('msg="Invoked refreshlock" filepath="%s" value="%s" result="%s"' % (filepath, value, res.status)) + log.debug(f'msg="Invoked refreshlock" filepath="{filepath}" value="{value}" result="{res.status}"') def unlock(endpoint, filepath, userid, appname, value): '''Remove the lock for the given filepath''' + if ctx['lockasattr'] and ctx['locknotimpl']: + log.debug(f'msg="Using xattrs to execute unlock" filepath="{filepath}" value="{value}"') + try: + currvalue = getxattr(endpoint, filepath, userid, LOCK_ATTR_KEY) + if currvalue.split('!')[0] == appname and currvalue.split('!')[1] == value: + raise KeyError + log.info('msg="Failed precondition on unlock" filepath="%s" appname="%s" previouslock="%s"' % + (filepath, appname, currvalue)) + raise IOError(common.EXCL_ERROR) + except KeyError: + rmxattr(endpoint, filepath, userid, LOCK_ATTR_KEY, None) + return + reference = _getcs3reference(endpoint, filepath) lock = cs3spr.Lock(type=cs3spr.LOCK_TYPE_WRITE, app_name=appname, lock_id=value) req = cs3sp.UnlockRequest(ref=reference, lock=lock) res = ctx['cs3gw'].Unlock(request=req, metadata=[('x-access-token', userid)]) + if res.status.code in [cs3code.CODE_FAILED_PRECONDITION, cs3code.CODE_ABORTED]: + log.info('msg="Failed precondition on unlock" filepath="%s" appname="%s" trace="%s" reason="%s"' % + (filepath, appname, res.status.trace, res.status.message.replace('"', "'"))) + raise IOError(common.EXCL_ERROR) + if res.status.code == cs3code.CODE_UNIMPLEMENTED and ctx['lockasattr']: + ctx['locknotimpl'] = True + unlock(endpoint, filepath, userid, appname, value) + return if res.status.code != cs3code.CODE_OK: log.error('msg="Failed to unlock" filepath="%s" trace="%s" code="%s" reason="%s"' % (filepath, res.status.trace, res.status.code, res.status.message.replace('"', "'"))) raise IOError(res.status.message) - log.debug('msg="Invoked unlock" filepath="%s" value="%s" result="%s"' % (filepath, value, res.status)) + log.debug(f'msg="Invoked unlock" filepath="{filepath}" value="{value}" result="{res.status}"') def readfile(endpoint, filepath, userid, lockid): - '''Read a file using the given userid as access token. Note that the function is a generator, managed by Flask.''' + '''Read a file using the given userid as access token. Note that the function is a generator, managed by the app server.''' tstart = time.time() reference = _getcs3reference(endpoint, filepath) @@ -258,97 +407,126 @@ def readfile(endpoint, filepath, userid, lockid): req = cs3sp.InitiateFileDownloadRequest(ref=reference, lock_id=lockid) res = ctx['cs3gw'].InitiateFileDownload(request=req, metadata=[('x-access-token', userid)]) if res.status.code == cs3code.CODE_NOT_FOUND: - log.info('msg="File not found on read" filepath="%s"' % filepath) + log.info(f'msg="File not found on read" filepath="{filepath}"') yield IOError(common.ENOENT_MSG) elif res.status.code != cs3code.CODE_OK: log.error('msg="Failed to initiateFileDownload on read" filepath="%s" trace="%s" code="%s" reason="%s"' % (filepath, res.status.trace, res.status.code, res.status.message.replace('"', "'"))) yield IOError(res.status.message) + tend = time.time() log.debug('msg="readfile: InitiateFileDownloadRes returned" trace="%s" protocols="%s"' % (res.status.trace, res.protocols)) # Download try: - protocol = [p for p in res.protocols if p.protocol == "simple" or p.protocol == "spaces"][0] + protocol = [p for p in res.protocols if p.protocol in ["simple", "spaces"]][0] headers = { - 'x-access-token': userid, - 'x-reva-transfer': protocol.token # needed if the downloads pass through the data gateway in reva + 'X-Access-Token': userid, + 'X-Reva-Transfer': protocol.token } - fileget = requests.get(url=protocol.download_endpoint, headers=headers, verify=ctx['ssl_verify']) + fileget = requests.get(url=protocol.download_endpoint, headers=headers, + verify=ctx['ssl_verify'], timeout=ctx['http_timeout'], + stream=True) except requests.exceptions.RequestException as e: - log.error('msg="Exception when downloading file from Reva" reason="%s"' % e) + log.error(f'msg="Exception when downloading file from Reva" reason="{e}"') yield IOError(e) - tend = time.time() - data = fileget.content + data = fileget.iter_content(ctx['chunksize']) if fileget.status_code != http.client.OK: log.error('msg="Error downloading file from Reva" code="%d" reason="%s"' % (fileget.status_code, fileget.reason.replace('"', "'"))) yield IOError(fileget.reason) else: - log.info('msg="File open for read" filepath="%s" elapsedTimems="%.1f"' % (filepath, (tend - tstart) * 1000)) - for i in range(0, len(data), ctx['chunksize']): - yield data[i:i + ctx['chunksize']] + log.info(f'msg="File open for read" filepath="{filepath}" elapsedTimems="{(tend - tstart) * 1000:.1f}"') + for chunk in data: + yield chunk -def writefile(endpoint, filepath, userid, content, lockid, islock=False): +def writefile(endpoint, filepath, userid, content, size, lockmd, islock=False): '''Write a file using the given userid as access token. The entire content is written and any pre-existing file is deleted (or moved to the previous version if supported). The islock flag is currently not supported. The backend should at least support writing the file with O_CREAT|O_EXCL flags to prevent races.''' + tstart = time.time() if islock: log.warning('msg="Lock (no-overwrite) flag not supported, going for standard upload"') - tstart = time.time() + if lockmd: + appname, lockid = lockmd + else: + appname = lockid = '' # prepare endpoint - if isinstance(content, str): - content = bytes(content, 'UTF-8') - size = str(len(content)) + if size == -1: + if isinstance(content, str): + content = bytes(content, 'UTF-8') + size = len(content) reference = _getcs3reference(endpoint, filepath) - metadata = types.Opaque(map={"Upload-Length": types.OpaqueEntry(decoder="plain", value=str.encode(size))}) - req = cs3sp.InitiateFileUploadRequest(ref=reference, lock_id=lockid, opaque=metadata) + req = cs3sp.InitiateFileUploadRequest(ref=reference, lock_id=lockid, opaque=types.Opaque( + map={'Upload-Length': types.OpaqueEntry(decoder='plain', value=str.encode(str(size)))})) res = ctx['cs3gw'].InitiateFileUpload(request=req, metadata=[('x-access-token', userid)]) + if res.status.code == cs3code.CODE_FAILED_PRECONDITION: + log.info('msg="Lock mismatch uploading file" filepath="%s" reason="%s"' % + (filepath, res.status.message.replace('"', "'"))) + raise IOError(common.EXCL_ERROR) if res.status.code != cs3code.CODE_OK: log.error('msg="Failed to initiateFileUpload on write" filepath="%s" trace="%s" code="%s" reason="%s"' % (filepath, res.status.trace, res.status.code, res.status.message.replace('"', "'"))) raise IOError(res.status.message) + tend = time.time() log.debug('msg="writefile: InitiateFileUploadRes returned" trace="%s" protocols="%s"' % (res.status.trace, res.protocols)) # Upload try: - protocol = [p for p in res.protocols if p.protocol == "simple" or p.protocol == "spaces"][0] + protocol = [p for p in res.protocols if p.protocol in ["simple", "spaces"]][0] headers = { - 'x-access-token': userid, - 'Upload-Length': size, - 'x-reva-transfer': protocol.token # needed if the uploads pass through the data gateway in reva + 'X-Access-Token': userid, + 'Upload-Length': str(size), + 'X-Reva-Transfer': protocol.token, + 'X-Lock-Id': lockid, + 'X-Lock-Holder': appname, } - putres = requests.put(url=protocol.upload_endpoint, data=content, headers=headers, verify=ctx['ssl_verify']) + putres = requests.put(url=protocol.upload_endpoint, data=content, headers=headers, + verify=ctx['ssl_verify'], timeout=ctx['http_timeout']) except requests.exceptions.RequestException as e: - log.error('msg="Exception when uploading file to Reva" reason="%s"' % e) - raise IOError(e) - tend = time.time() + log.error(f'msg="Exception when uploading file to Reva" reason="{e}"') + raise IOError(e) from e + if putres.status_code == http.client.CONFLICT: + log.info(f'msg="Got conflict on PUT, file is locked" reason="{putres.reason}" filepath="{filepath}"') + raise IOError(common.EXCL_ERROR) if putres.status_code == http.client.UNAUTHORIZED: - log.warning('msg="Access denied uploading file to Reva" reason="%s"' % putres.reason) + log.warning(f'msg="Access denied uploading file to Reva" reason="{putres.reason}" filepath="{filepath}"') raise IOError(common.ACCESS_ERROR) if putres.status_code != http.client.OK: + if size == 0: # 0-byte file uploads may have been finalized after InitiateFileUploadRequest, let's assume it's OK + # TODO this use-case is to be reimplemented with a call to `TouchFile`. + log.info('msg="0-byte file written successfully" filepath="%s" elapsedTimems="%.1f" islock="%s"' % + (filepath, (tend - tstart) * 1000, islock)) + return + log.error('msg="Error uploading file to Reva" code="%d" reason="%s"' % (putres.status_code, putres.reason)) raise IOError(putres.reason) log.info('msg="File written successfully" filepath="%s" elapsedTimems="%.1f" islock="%s"' % (filepath, (tend - tstart) * 1000, islock)) -def renamefile(endpoint, filepath, newfilepath, userid, lockid): +def renamefile(endpoint, filepath, newfilepath, userid, lockmd): '''Rename a file from origfilepath to newfilepath using the given userid as access token.''' reference = _getcs3reference(endpoint, filepath) newfileref = _getcs3reference(endpoint, newfilepath) - + lockid = None + if lockmd: + _, lockid = lockmd req = cs3sp.MoveRequest(source=reference, destination=newfileref, lock_id=lockid) res = ctx['cs3gw'].Move(request=req, metadata=[('x-access-token', userid)]) + if res.status.code in [cs3code.CODE_FAILED_PRECONDITION, cs3code.CODE_ABORTED]: + log.info('msg="Failed precondition on rename" filepath="%s" trace="%s" reason="%s"' % + (filepath, res.status.trace, res.status.message.replace('"', "'"))) + raise IOError(common.EXCL_ERROR) if res.status.code != cs3code.CODE_OK: log.error('msg="Failed to rename file" filepath="%s" trace="%s" code="%s" reason="%s"' % (filepath, res.status.trace, res.status.code, res.status.message.replace('"', "'"))) raise IOError(res.status.message) - log.debug('msg="Invoked renamefile" result="%s"' % res) + log.debug(f'msg="Invoked renamefile" result="{res}"') def removefile(endpoint, filepath, userid, _force=False): @@ -358,10 +536,10 @@ def removefile(endpoint, filepath, userid, _force=False): req = cs3sp.DeleteRequest(ref=reference) res = ctx['cs3gw'].Delete(request=req, metadata=[('x-access-token', userid)]) if res.status.code != cs3code.CODE_OK: - if str(res) == common.ENOENT_MSG: - log.info('msg="Invoked removefile on non-existing file" filepath="%s"' % filepath) - else: - log.error('msg="Failed to remove file" filepath="%s" trace="%s" code="%s" reason="%s"' % - (filepath, res.status.trace, res.status.code, res.status.message.replace('"', "'"))) + if 'path not found' in str(res): + log.info(f'msg="Invoked removefile on non-existing file" filepath="{filepath}"') + raise IOError(common.ENOENT_MSG) + log.error('msg="Failed to remove file" filepath="%s" trace="%s" code="%s" reason="%s"' % + (filepath, res.status.trace, res.status.code, res.status.message.replace('"', "'"))) raise IOError(res.status.message) - log.debug('msg="Invoked removefile" result="%s"' % res) + log.debug(f'msg="Invoked removefile" result="{res}"') diff --git a/src/core/discovery.py b/src/core/discovery.py deleted file mode 100644 index db583120..00000000 --- a/src/core/discovery.py +++ /dev/null @@ -1,119 +0,0 @@ -''' -discovery.py - -Helper code for the WOPI discovery phase, as well as for integrating the apps -supported by the bridge functionality. -This code is deprecated and is only used in conjunction with the xroot storage interface: -when the WOPI server is interfaced to Reva via the cs3 storage interface this code is disabled. - -Main author: Giuseppe.LoPresti@cern.ch, CERN/IT-ST -''' - -from xml.etree import ElementTree as ET -import http.client -import requests -import bridge - -# convenience references to global entities -config = None -codetypes = None -log = None - -# map of all registered apps' endpoints -endpoints = {} - - -def registerapp(appname, appurl, appinturl, apikey=None): - '''Registers the given app in the internal endpoints list - For the time being, this is highly customized to keep backwards-compatibility. To be reviewed''' - if not appinturl: - appinturl = appurl - try: - discReq = requests.get(appurl + '/hosting/discovery', verify=False) - except requests.exceptions.ConnectionError as e: - log.error('msg="Failed to probe application" appurl="%s" response="%s"' % (appurl, e)) - return - - if discReq.status_code == http.client.OK: - discXml = ET.fromstring(discReq.content) - # extract urlsrc from first node inside - urlsrc = discXml.find('net-zone/app')[0].attrib['urlsrc'] - if urlsrc.find('loleaflet') > 0: - # this is Collabora - for t in codetypes: - endpoints[t] = {} - endpoints[t]['view'] = urlsrc + 'permission=readonly' - endpoints[t]['edit'] = urlsrc + 'permission=edit' - endpoints[t]['new'] = urlsrc + 'permission=edit' # noqa: E221 - log.info('msg="Collabora Online endpoints successfully configured" count="%d" CODEURL="%s"' % - (len(codetypes), endpoints['.odt']['edit'])) - return - - # else this must be Microsoft Office Online - endpoints['.docx'] = {} - endpoints['.docx']['view'] = appurl + '/wv/wordviewerframe.aspx?edit=0' - endpoints['.docx']['edit'] = appurl + '/we/wordeditorframe.aspx?edit=1' - endpoints['.docx']['new'] = appurl + '/we/wordeditorframe.aspx?new=1' # noqa: E221 - endpoints['.xlsx'] = {} - endpoints['.xlsx']['view'] = appurl + '/x/_layouts/xlviewerinternal.aspx?edit=0' - endpoints['.xlsx']['edit'] = appurl + '/x/_layouts/xlviewerinternal.aspx?edit=1' - endpoints['.xlsx']['new'] = appurl + '/x/_layouts/xlviewerinternal.aspx?edit=1&new=1' # noqa: E221 - endpoints['.pptx'] = {} - endpoints['.pptx']['view'] = appurl + '/p/PowerPointFrame.aspx?PowerPointView=ReadingView' - endpoints['.pptx']['edit'] = appurl + '/p/PowerPointFrame.aspx?PowerPointView=EditView' - endpoints['.pptx']['new'] = appurl + '/p/PowerPointFrame.aspx?PowerPointView=EditView&New=1' # noqa: E221 - log.info('msg="Microsoft Office Online endpoints successfully configured" OfficeURL="%s"' % - endpoints['.docx']['edit']) - return - - if discReq.status_code == http.client.NOT_FOUND: - # try and scrape the app homepage to see if a bridge-supported app is found - try: - discReq = requests.get(appurl, verify=False).content.decode() - if discReq.find('CodiMD') > 0: - bridge.WB.loadplugin(appname, appurl, appinturl, apikey) - endpoints['.md'] = {} - endpoints['.md']['view'] = endpoints['.md']['edit'] = appurl - endpoints['.zmd'] = {} - endpoints['.zmd']['view'] = endpoints['.zmd']['edit'] = appurl - endpoints['.txt'] = {} - endpoints['.txt']['view'] = endpoints['.txt']['edit'] = appurl - log.info('msg="CodiMD endpoints successfully configured" CodiMDURL="%s"' % appurl) - return - - if discReq.find('Etherpad') > 0: - bridge.WB.loadplugin(appname, appurl, appinturl, apikey) - endpoints['.epd'] = {} - endpoints['.epd']['view'] = endpoints['.epd']['edit'] = appurl - log.info('msg="Etherpad endpoints successfully configured" EtherpadURL="%s"' % appurl) - return - except ValueError: - # bridge plugin could not be initialized - pass - except requests.exceptions.ConnectionError: - pass - - # in all other cases, log failure - log.error('msg="Attempted to register a non WOPI-compatible app" appurl="%s"' % appurl) - - -def initappsregistry(): - '''Initializes the CERNBox Office-like Apps Registry''' - oos = config.get('general', 'oosurl', fallback=None) - if oos: - registerapp('MSOffice', oos, oos) - code = config.get('general', 'codeurl', fallback=None) - if code: - registerapp('Collabora', code, code) - codimd = config.get('general', 'codimdurl', fallback=None) - codimdint = config.get('general', 'codimdinturl', fallback=None) - if codimd: - with open('/var/run/secrets/codimd_apikey') as f: - apikey = f.readline().strip('\n') - registerapp('CodiMD', codimd, codimdint, apikey) - etherpad = config.get('general', 'etherpadurl', fallback=None) - etherpadint = config.get('general', 'etherpadinturl', fallback=None) - if etherpad: - with open('/var/run/secrets/etherpad_apikey') as f: - apikey = f.readline().strip('\n') - registerapp('Etherpad', etherpad, etherpadint, apikey) diff --git a/src/core/localiface.py b/src/core/localiface.py index 724e9966..4c05fb05 100644 --- a/src/core/localiface.py +++ b/src/core/localiface.py @@ -22,9 +22,6 @@ log = None homepath = None -# a conventional value used by _checklock() -LOCK = '__LOCK__' - class Flock: '''A simple class to lock/unlock when entering/leaving a runtime context @@ -67,13 +64,30 @@ def init(inconfig, inlog): if not S_ISDIR(mode): raise IOError('Not a directory') except IOError as e: - raise IOError('Could not stat storagehomepath folder %s: %s' % (homepath, e)) + raise IOError(f'Could not stat storagehomepath folder {homepath}: {e}') from e + # all right but inform the user + log.warning('msg="Use this local storage interface for test/development purposes only, not for production"') + + +def healthcheck(): + '''Probes the storage and returns a status message. For local storage, we just stat the root''' + try: + stat(None, '/', None) + return 'Warning' # to please CodeQL but never reached + except IOError as e: + if str(e) == 'Is a directory': + # that's expected, yet we return warning as this is a test/dev storage interface + log.debug('msg="Executed health check against storage root"') + return 'Warning' + # any other error is a failure + log.error('msg="Health check failed against storage root" error="%s"' % e) + return str(e) def getuseridfromcreds(_token, _wopiuser): '''Maps a Reva token and wopiuser to the credentials to be used to access the storage. For the localfs case, this is trivially hardcoded''' - return '0:0' + return '0:0', 'root!0:0' def stat(_endpoint, filepath, _userid): @@ -96,7 +110,7 @@ def stat(_endpoint, filepath, _userid): 'etag': str(statInfo.st_mtime), } except (FileNotFoundError, PermissionError) as e: - raise IOError(e) + raise IOError(e) from e def statx(endpoint, filepath, userid, versioninv=1): @@ -105,25 +119,32 @@ def statx(endpoint, filepath, userid, versioninv=1): return stat(endpoint, filepath, userid) -def _checklock(op, endpoint, filepath, userid, lockid): - '''Verify if the given lockid matches the existing one on the given filepath, if any''' - if lockid == LOCK: - # this is a special value to skip the check, used by the lock operations themselves - return - lock = getlock(endpoint, filepath, userid) - if lock and lock['lock_id'] != lockid: - log.warning('msg="%s: file was locked" filepath="%s" holder="%s"' % (op, filepath, lock['app_name'])) - raise IOError('File was locked') +def _validatelock(filepath, currlock, lockmd, op, log): + '''Common logic for validating locks: duplicates some logic + natively implemented by EOS and Reva on the other storage interfaces''' + appname = value = None + if lockmd: + appname, value = lockmd + try: + if not currlock: + raise IOError(common.EXCL_ERROR) + if appname and currlock['app_name'] != appname: + raise IOError(common.EXCL_ERROR + f", file is locked by {currlock['app_name']}") + if value != currlock['lock_id']: + raise IOError(common.EXCL_ERROR) + except IOError as e: + log.warning('msg="Failed to %s" filepath="%s" appname="%s" lockid="%s" currlock="%s" reason="%s"' % + (op, filepath, appname, value, currlock, e)) + raise -def setxattr(endpoint, filepath, userid, key, value, lockid): +def setxattr(endpoint, filepath, userid, key, value, lockmd): '''Set the extended attribute to on behalf of the given userid''' - _checklock('setxattr', endpoint, filepath, userid, lockid) try: os.setxattr(_getfilepath(filepath), 'user.' + key, str(value).encode()) except OSError as e: - log.error('msg="Failed to setxattr" filepath="%s" key="%s" exception="%s"' % (filepath, key, e)) - raise IOError(e) + log.error(f'msg="Failed to setxattr" filepath="{filepath}" key="{key}" exception="{e}"') + raise IOError(e) from e def getxattr(_endpoint, filepath, _userid, key): @@ -131,34 +152,34 @@ def getxattr(_endpoint, filepath, _userid, key): try: return os.getxattr(_getfilepath(filepath), 'user.' + key).decode('UTF-8') except OSError as e: - log.warn('msg="Failed to getxattr or missing key" filepath="%s" key="%s" exception="%s"' % (filepath, key, e)) + log.warning(f'msg="Failed to getxattr or missing key" filepath="{filepath}" key="{key}" exception="{e}"') return None -def rmxattr(endpoint, filepath, userid, key, lockid): +def rmxattr(endpoint, filepath, userid, key, lockmd): '''Remove the extended attribute on behalf of the given userid''' - _checklock('rmxattr', endpoint, filepath, userid, lockid) try: os.removexattr(_getfilepath(filepath), 'user.' + key) except OSError as e: - log.error('msg="Failed to rmxattr" filepath="%s" key="%s" exception="%s"' % (filepath, key, e)) - raise IOError(e) + log.error(f'msg="Failed to rmxattr" filepath="{filepath}" key="{key}" exception="{e}"') + raise IOError(e) from e def setlock(endpoint, filepath, userid, appname, value): '''Set the lock as an xattr on behalf of the given userid''' - log.debug('msg="Invoked setlock" filepath="%s" value="%s"' % (filepath, value)) + log.debug(f'msg="Invoked setlock" filepath="{filepath}" value="{value}"') with open(_getfilepath(filepath)) as fd: fl = Flock(fd) # ensures atomicity of the following operations try: with fl: if not getlock(endpoint, filepath, userid): - setxattr(endpoint, filepath, '0:0', common.LOCKKEY, common.genrevalock(appname, value), LOCK) + log.debug(f'msg="setlock: invoking setxattr" filepath="{filepath}" value="{value}"') + setxattr(endpoint, filepath, '0:0', common.LOCKKEY, common.genrevalock(appname, value), None) else: raise IOError(common.EXCL_ERROR) except BlockingIOError as e: - log.error('msg="File already flocked" filepath="%s" exception="%s"' % (filepath, e)) - raise IOError(common.EXCL_ERROR) + log.error(f'msg="File already flocked" filepath="{filepath}" exception="{e}"') + raise IOError(common.EXCL_ERROR) from e def getlock(endpoint, filepath, _userid): @@ -167,62 +188,72 @@ def getlock(endpoint, filepath, _userid): if rawl: lock = common.retrieverevalock(rawl) if lock['expiration']['seconds'] > time.time(): - log.debug('msg="Invoked getlock" filepath="%s"' % filepath) + log.debug(f'msg="Invoked getlock" filepath="{filepath}"') return lock # otherwise, the lock had expired: drop it and return None - log.debug('msg="getlock: removed stale lock" filepath="%s"' % filepath) - rmxattr(endpoint, filepath, '0:0', common.LOCKKEY, LOCK) + log.debug(f'msg="getlock: removed stale lock" filepath="{filepath}"') + rmxattr(endpoint, filepath, '0:0', common.LOCKKEY, None) return None -def refreshlock(endpoint, filepath, userid, appname, value): +def refreshlock(endpoint, filepath, userid, appname, value, oldvalue=None): '''Refresh the lock value as an xattr on behalf of the given userid''' - common.validatelock(filepath, appname, getlock(endpoint, filepath, userid), 'refreshlock', log) + currlock = getlock(endpoint, filepath, userid) + if not oldvalue and currlock: + # this is a pure refresh operation + oldvalue = currlock['lock_id'] + _validatelock(filepath, currlock, (appname, oldvalue), 'refreshlock', log) # this is non-atomic, but if we get here the lock was already held - log.debug('msg="Invoked refreshlock" filepath="%s" value="%s"' % (filepath, value)) - setxattr(endpoint, filepath, '0:0', common.LOCKKEY, common.genrevalock(appname, value), LOCK) + log.debug(f'msg="Invoked refreshlock" filepath="{filepath}" value="{value}"') + setxattr(endpoint, filepath, '0:0', common.LOCKKEY, common.genrevalock(appname, value), None) def unlock(endpoint, filepath, userid, appname, value): '''Remove the lock as an xattr on behalf of the given userid''' - common.validatelock(filepath, appname, getlock(endpoint, filepath, userid), 'unlock', log) - log.debug('msg="Invoked unlock" filepath="%s" value="%s"' % (filepath, value)) - rmxattr(endpoint, filepath, '0:0', common.LOCKKEY, LOCK) + _validatelock(filepath, getlock(endpoint, filepath, userid), (appname, value), 'unlock', log) + log.debug(f'msg="Invoked unlock" filepath="{filepath}" value="{value}"') + rmxattr(endpoint, filepath, '0:0', common.LOCKKEY, None) def readfile(_endpoint, filepath, _userid, _lockid): - '''Read a file on behalf of the given userid. Note that the function is a generator, managed by Flask.''' - log.debug('msg="Invoking readFile" filepath="%s"' % filepath) + '''Read a file on behalf of the given userid. Note that the function is a generator, managed by the app server.''' + log.debug(f'msg="Invoking readFile" filepath="{filepath}"') try: tstart = time.time() chunksize = config.getint('io', 'chunksize') with open(_getfilepath(filepath), mode='rb', buffering=chunksize) as f: tend = time.time() - log.info('msg="File open for read" filepath="%s" elapsedTimems="%.1f"' % (filepath, (tend - tstart) * 1000)) - # the actual read is buffered and managed by the Flask server + log.info(f'msg="File open for read" filepath="{filepath}" elapsedTimems="{(tend - tstart) * 1000:.1f}"') + # the actual read is buffered and managed by the app server for chunk in iter(lambda: f.read(chunksize), b''): yield chunk except FileNotFoundError: # log this case as info to keep the logs cleaner - log.info('msg="File not found on read" filepath="%s"' % filepath) + log.info(f'msg="File not found on read" filepath="{filepath}"') # as this is a generator, we yield the error string instead of the file's contents yield IOError('No such file or directory') except OSError as e: - # general case, issue a warning - log.error('msg="Error opening the file for read" filepath="%s" error="%s"' % (filepath, e)) + log.error(f'msg="Error opening the file for read" filepath="{filepath}" error="{e}"') yield IOError(e) -def writefile(endpoint, filepath, userid, content, lockid, islock=False): +def writefile(endpoint, filepath, userid, content, size, lockmd, islock=False): '''Write a file via xroot on behalf of the given userid. The entire content is written and any pre-existing file is deleted (or moved to the previous version if supported). With islock=True, the file is opened with O_CREAT|O_EXCL.''' - if isinstance(content, str): - content = bytes(content, 'UTF-8') - size = len(content) - _checklock('writefile', endpoint, filepath, userid, lockid) + stream = True + if size == -1: + if isinstance(content, str): + content = bytes(content, 'UTF-8') + size = len(content) + stream = False + if lockmd: + _validatelock(filepath, getlock(endpoint, filepath, userid), lockmd, 'writefile', log) + elif getlock(endpoint, filepath, userid): + raise IOError(common.EXCL_ERROR) log.debug('msg="Invoking writeFile" filepath="%s" size="%d"' % (filepath, size)) tstart = time.time() + written = 0 if islock: warnings.simplefilter("ignore", ResourceWarning) try: @@ -231,36 +262,51 @@ def writefile(endpoint, filepath, userid, content, lockid, islock=False): # so we resort to the os-level open(), with some caveats fd = os.open(_getfilepath(filepath), os.O_CREAT | os.O_EXCL) f = os.fdopen(fd, mode='wb') + tend = time.time() written = f.write(content) # os.write(fd, ...) raises EBADF? os.close(fd) # f.close() raises EBADF! while this works # as f goes out of scope here, we'd get a false ResourceWarning, which is ignored by the above filter - except FileExistsError: - log.info('msg="File exists on write but islock flag requested" filepath="%s"' % filepath) - raise IOError(common.EXCL_ERROR) + except FileExistsError as e: + log.info(f'msg="File exists on write but islock flag requested" filepath="{filepath}"') + raise IOError(common.EXCL_ERROR) from e except OSError as e: - log.warning('msg="Error writing file in O_EXCL mode" filepath="%s" error="%s"' % (filepath, e)) - raise IOError(e) + log.warning(f'msg="Error writing file in O_EXCL mode" filepath="{filepath}" error="{e}"') + raise IOError(e) from e else: try: with open(_getfilepath(filepath), mode='wb') as f: - written = f.write(content) + tend = time.time() + if stream: + chunksize = config.getint('io', 'chunksize') + o = 0 + while True: + chunk = content.read(chunksize) + if len(chunk) == 0: + break + f.seek(o) + written += f.write(chunk) + o += len(chunk) + else: + written = f.write(content) except OSError as e: - log.error('msg="Error writing file" filepath="%s" error="%s"' % (filepath, e)) - raise IOError(e) - tend = time.time() + log.error(f'msg="Error writing file" filepath="{filepath}" error="{e}"') + raise IOError(e) from e if written != size: raise IOError('Written %d bytes but content is %d bytes' % (written, size)) log.info('msg="File written successfully" filepath="%s" elapsedTimems="%.1f" islock="%s"' % (filepath, (tend - tstart) * 1000, islock)) -def renamefile(endpoint, origfilepath, newfilepath, userid, lockid): +def renamefile(endpoint, origfilepath, newfilepath, userid, lockmd): '''Rename a file from origfilepath to newfilepath on behalf of the given userid.''' - _checklock('renamefile', endpoint, origfilepath, userid, lockid) + currlock = getlock(endpoint, origfilepath, userid) + if currlock: + # enforce lock only if previously set + _validatelock(origfilepath, currlock, lockmd, 'renamefile', log) try: os.rename(_getfilepath(origfilepath), _getfilepath(newfilepath)) except OSError as e: - raise IOError(e) + raise IOError(e) from e def removefile(_endpoint, filepath, _userid, force=False): @@ -269,4 +315,4 @@ def removefile(_endpoint, filepath, _userid, force=False): try: os.remove(_getfilepath(filepath)) except OSError as e: - raise IOError(e) + raise IOError(e) from e diff --git a/src/core/readme.md b/src/core/readme.md index 6750e74e..14ac1e33 100644 --- a/src/core/readme.md +++ b/src/core/readme.md @@ -1,15 +1,12 @@ -## WOPI server - core module - -This module includes the core WOPI protocol implementation, along with the discovery logic -in the `discovery.py` module. The latter has already been implemented in Reva's WOPI appprovider driver, -therefore this implementation will eventually be removed. - -To access the storage, three interfaces are provided: - -* `xrootiface.py` to interface to an EOS storage via the xrootd protocol. Though the code is generic enough to enable support for any xrootd-based storage, it does include EOS-specific calls. - -* `cs3iface.py` to interface to storage providers via [CS3 APIs](https://github.com/cs3org/cs3apis). - -* `localiface.py` to interface to a local filesystem. Note that this interface is provided for testing purposes only, and it is supported on Linux and WSL for Windows, not on native Windows nor on native MacOS systems as they lack support for extended attributes in Python. - -The `/test` folder contains a unit test suite for the storage interfaces. +## WOPI server - core module + +This module includes the core WOPI protocol implementation. +To access the storage, three interfaces are provided: + +* `xrootiface.py` to interface to an EOS storage via the xrootd protocol. Though the code is generic enough to enable support for any xrootd-based storage, it does include EOS-specific calls. + +* `cs3iface.py` to interface to storage providers via [CS3 APIs](https://github.com/cs3org/cs3apis). + +* `localiface.py` to interface to a local filesystem. Note that this interface is provided for testing purposes only, and it is supported on Linux and WSL for Windows, not on native Windows nor on native MacOS systems as they lack support for extended attributes in Python. + +The `/test` folder contains a unit test suite for the storage interfaces. diff --git a/src/core/wopi.py b/src/core/wopi.py index ba34f1fb..9f5ba2ec 100644 --- a/src/core/wopi.py +++ b/src/core/wopi.py @@ -14,6 +14,7 @@ from datetime import datetime from urllib.parse import unquote_plus as url_unquote from urllib.parse import quote_plus as url_quote +from urllib.parse import urlparse from more_itertools import peekable import flask import core.wopiutils as utils @@ -32,79 +33,108 @@ def checkFileInfo(fileid, acctok): '''Implements the CheckFileInfo WOPI call''' try: acctok['viewmode'] = utils.ViewMode(acctok['viewmode']) + acctok['usertype'] = utils.UserType(acctok['usertype']) statInfo = st.statx(acctok['endpoint'], acctok['filename'], acctok['userid']) # populate metadata for this file fmd = {} fmd['BaseFileName'] = fmd['BreadcrumbDocName'] = os.path.basename(acctok['filename']) + if acctok['viewmode'] in (utils.ViewMode.VIEW_ONLY, utils.ViewMode.READ_ONLY): + fmd['BreadcrumbDocName'] += ' (read only)' + fmd['FileExtension'] = os.path.splitext(acctok['filename'])[1] wopiSrc = 'WOPISrc=%s&access_token=%s' % (utils.generateWopiSrc(fileid, acctok['appname'] == srv.proxiedappname), flask.request.args['access_token']) - fmd['HostViewUrl'] = '%s%s%s' % (acctok['appviewurl'], '&' if '?' in acctok['appviewurl'] else '?', wopiSrc) - fmd['HostEditUrl'] = '%s%s%s' % (acctok['appediturl'], '&' if '?' in acctok['appediturl'] else '?', wopiSrc) + hosteurl = srv.config.get('general', 'hostediturl', fallback=None) + if hosteurl: + fmd['HostEditUrl'] = utils.generateUrlFromTemplate(hosteurl, acctok) + # for the PostMessage origin, use the folderurl if given and not empty, else the editurl + pmhost = urlparse(acctok['folderurl'] if len(acctok['folderurl']) > 1 else fmd['HostEditUrl']) + fmd['PostMessageOrigin'] = pmhost.scheme + '://' + pmhost.netloc + fmd['EditModePostMessage'] = fmd['EditNotificationPostMessage'] = True + else: + fmd['HostEditUrl'] = f"{acctok['appediturl']}{'&' if '?' in acctok['appediturl'] else '?'}{wopiSrc}" + hostvurl = srv.config.get('general', 'hostviewurl', fallback=None) + if hostvurl: + fmd['HostViewUrl'] = utils.generateUrlFromTemplate(hostvurl, acctok) + else: + fmd['HostViewUrl'] = f"{acctok['appviewurl']}{'&' if '?' in acctok['appviewurl'] else '?'}{wopiSrc}" + fsurl = srv.config.get('general', 'filesharingurl', fallback=None) + if fsurl: + fmd['FileSharingUrl'] = utils.generateUrlFromTemplate(fsurl, acctok) + fmd['FileSharingPostMessage'] = True + try: + fmd['PrivacyUrl'] = srv.config.get('general', 'privacyurl') + except configparser.NoOptionError: + # ignore, this property is optional + pass furl = acctok['folderurl'] - fmd['BreadcrumbFolderUrl'] = furl if furl != '/' else srv.wopiurl # the WOPI URL is a placeholder - if acctok['username'] == '': + if furl != '/': + fmd['CloseUrl'] = fmd['BreadcrumbFolderUrl'] = furl + '?scrollTo=' + fmd['BaseFileName'] + if acctok['username'] == '' or acctok['usertype'] == utils.UserType.ANONYMOUS: fmd['IsAnonymousUser'] = True fmd['UserFriendlyName'] = 'Guest ' + utils.randomString(3) - if '?path' in furl and furl[-1] != '/' and furl[-1] != '=': - # this is a subfolder of a public share, show it - fmd['BreadcrumbFolderName'] = furl[furl.find('?path'):].split('/')[-1] - else: - # this is the top level public share, which is anonymous - fmd['BreadcrumbFolderName'] = 'Public share' + fmd['BreadcrumbFolderName'] = 'Public share' else: + fmd['IsAnonymousUser'] = False fmd['UserFriendlyName'] = acctok['username'] - fmd['BreadcrumbFolderName'] = 'Back to ' + os.path.dirname(acctok['filename']) - if furl == '/': # if no target folder URL was given, override the above and completely hide it - fmd['BreadcrumbFolderName'] = '' - if acctok['viewmode'] in (utils.ViewMode.READ_ONLY, utils.ViewMode.READ_WRITE) \ - and srv.config.get('general', 'downloadurl', fallback=None): + fmd['BreadcrumbFolderName'] = 'ScienceMesh share' if acctok['usertype'] == utils.UserType.OCM else 'Parent folder' + if acctok['viewmode'] != utils.ViewMode.VIEW_ONLY and srv.config.get('general', 'downloadurl', fallback=None): fmd['DownloadUrl'] = fmd['FileUrl'] = '%s?access_token=%s' % \ - (srv.config.get('general', 'downloadurl'), flask.request.args['access_token']) + (srv.config.get('general', 'downloadurl'), flask.request.args['access_token']) + if srv.config.get('general', 'businessflow', fallback='True').upper() == 'TRUE': + # according to Microsoft, this must be enabled for all users + fmd['LicenseCheckForEditIsEnabled'] = True fmd['BreadcrumbBrandName'] = srv.config.get('general', 'brandingname', fallback=None) fmd['BreadcrumbBrandUrl'] = srv.config.get('general', 'brandingurl', fallback=None) - fmd['FileSharingUrl'] = srv.config.get('general', 'filesharingurl', fallback=None) - if fmd['FileSharingUrl']: - fmd['FileSharingUrl'] = fmd['FileSharingUrl'].replace('', url_quote(acctok['filename'])).replace('', fileid) fmd['OwnerId'] = statInfo['ownerid'] - fmd['UserId'] = acctok['wopiuser'] # typically same as OwnerId; different when accessing shared documents + fmd['UserId'] = acctok['wopiuser'].split('!')[-1] # typically same as OwnerId; different when accessing shared documents fmd['Size'] = statInfo['size'] - # note that in ownCloud the version is generated as: `'V' + etag + checksum` - fmd['Version'] = 'v%s' % statInfo['etag'] + fmd['LastModifiedTime'] = str(datetime.fromtimestamp(int(statInfo['mtime']))) + '.000' + # note that in ownCloud 10 the version is generated as: `'V' + etag + checksum` + fmd['Version'] = f"v{statInfo['etag']}" fmd['SupportsExtendedLockLength'] = fmd['SupportsGetLock'] = True fmd['SupportsUpdate'] = fmd['UserCanWrite'] = fmd['SupportsLocks'] = \ - fmd['SupportsDeleteFile'] = acctok['viewmode'] == utils.ViewMode.READ_WRITE - fmd['UserCanNotWriteRelative'] = acctok['viewmode'] != utils.ViewMode.READ_WRITE - fmd['SupportsRename'] = fmd['UserCanRename'] = enablerename and (acctok['viewmode'] == utils.ViewMode.READ_WRITE) - fmd['SupportsContainers'] = False # TODO this is all to be implemented - fmd['SupportsUserInfo'] = False # TODO https://docs.microsoft.com/en-us/openspecs/office_protocols/ms-wopi/371e25ae-e45b-47ab-aec3-9111e962919d + fmd['SupportsDeleteFile'] = acctok['viewmode'] in (utils.ViewMode.READ_WRITE, utils.ViewMode.PREVIEW) + fmd['ReadOnly'] = not fmd['SupportsUpdate'] + fmd['RestrictedWebViewOnly'] = acctok['viewmode'] == utils.ViewMode.VIEW_ONLY + # SaveAs functionality is disabled for anonymous and federated users, as they have no personal space where to save + # as an alternate location and we cannot assume that saving to the same folder is allowed (e.g. single-file shares). + # Instead, regular (authenticated) users are offered a SaveAs (unless in view-only mode), where the operation + # is executed to the user's home if no access is given to the same folder where the file is. + fmd['UserCanNotWriteRelative'] = acctok['viewmode'] == utils.ViewMode.VIEW_ONLY or \ + acctok['usertype'] != utils.UserType.REGULAR + fmd['SupportsRename'] = fmd['UserCanRename'] = enablerename and \ + acctok['viewmode'] in (utils.ViewMode.READ_WRITE, utils.ViewMode.PREVIEW) + fmd['SupportsUserInfo'] = True + uinfo = st.getxattr(acctok['endpoint'], acctok['filename'], acctok['userid'], + utils.USERINFOKEY + '.' + acctok['wopiuser'].split('!')[0]) + if uinfo: + fmd['UserInfo'] = uinfo + if srv.config.get('general', 'earlyfeatures', fallback='False').upper() == 'TRUE': + fmd['AllowEarlyFeatures'] = True + fmd['ComplianceDomainPrefix'] = srv.config.get('general', 'compliancedomain', fallback='euc') # populate app-specific metadata - if acctok['appname'].find('Microsoft') > 0: - # the following is to enable the 'Edit in Word/Excel/PowerPoint' (desktop) action (probably broken) - try: - fmd['ClientUrl'] = srv.config.get('general', 'webdavurl') + '/' + acctok['filename'] - except configparser.NoOptionError: - # if no WebDAV URL is provided, ignore this setting - pass + # the following is to enable the 'Edit in Word/Excel/PowerPoint' (desktop) action (probably broken) + try: + fmd['ClientUrl'] = srv.config.get('general', 'webdavurl') + '/' + acctok['filename'] + except configparser.NoOptionError: + # if no WebDAV URL is provided, ignore this setting + pass # extensions for Collabora Online - fmd['EnableOwnerTermination'] = True - fmd['DisableExport'] = fmd['DisableCopy'] = fmd['DisablePrint'] = acctok['viewmode'] == utils.ViewMode.VIEW_ONLY - # fmd['LastModifiedTime'] = datetime.fromtimestamp(int(statInfo['mtime'])).isoformat() # this currently breaks + if acctok['appname'] == 'Collabora': + fmd['EnableOwnerTermination'] = True + fmd['DisableExport'] = fmd['DisableCopy'] = fmd['DisablePrint'] = acctok['viewmode'] == utils.ViewMode.VIEW_ONLY res = flask.Response(json.dumps(fmd), mimetype='application/json') - # amend sensitive metadata for the logs + # redact sensitive metadata for the logs fmd['HostViewUrl'] = fmd['HostEditUrl'] = fmd['DownloadUrl'] = fmd['FileUrl'] = \ fmd['BreadcrumbBrandUrl'] = fmd['FileSharingUrl'] = '_redacted_' - log.info('msg="File metadata response" token="%s" metadata="%s"' % - (flask.request.args['access_token'][-20:], fmd)) + log.info(f"msg=\"File metadata response\" token=\"{flask.request.args['access_token'][-20:]}\" metadata=\"{fmd}\"") return res except IOError as e: - log.info('msg="Requested file not found" filename="%s" token="%s" error="%s"' % + log.info('msg="Requested file not found" filename="%s" token="%s" details="%s"' % (acctok['filename'], flask.request.args['access_token'][-20:], e)) - return 'File not found', http.client.NOT_FOUND - except KeyError as e: - log.warning('msg="Invalid access token or request argument" error="%s" request="%s"' % (e, flask.request.__dict__)) - return 'Invalid request', http.client.UNAUTHORIZED + return utils.createJsonResponse({'message': 'File not found'}, http.client.NOT_FOUND) def getFile(_fileid, acctok): @@ -116,18 +146,18 @@ def getFile(_fileid, acctok): f = peekable(st.readfile(acctok['endpoint'], acctok['filename'], acctok['userid'], None)) firstchunk = f.peek() if isinstance(firstchunk, IOError): - log.error('msg="GetFile: download failed" filename="%s" token="%s" error="%s"' % - (acctok['filename'], flask.request.args['access_token'][-20:], firstchunk)) - return 'Failed to fetch file from storage', http.client.INTERNAL_SERVER_ERROR + log.error('msg="GetFile: download failed" endpoint="%s" filename="%s" token="%s" error="%s"' % + (acctok['endpoint'], acctok['filename'], flask.request.args['access_token'][-20:], firstchunk)) + return utils.createJsonResponse({'message': 'Failed to fetch file from storage'}, http.client.INTERNAL_SERVER_ERROR) # stat the file to get the current version statInfo = st.statx(acctok['endpoint'], acctok['filename'], acctok['userid']) # stream file from storage to client resp = flask.Response(f, mimetype='application/octet-stream') resp.status_code = http.client.OK - resp.headers['Content-Disposition'] = 'attachment; filename="%s"' % os.path.basename(acctok['filename']) + resp.headers['Content-Disposition'] = f"attachment; filename*=UTF-8''{url_quote(os.path.basename(acctok['filename']))}" resp.headers['X-Frame-Options'] = 'sameorigin' resp.headers['X-XSS-Protection'] = '1; mode=block' - resp.headers['X-WOPI-ItemVersion'] = 'v%s' % statInfo['etag'] + resp.headers['X-WOPI-ItemVersion'] = f"v{statInfo['etag']}" return resp except StopIteration: # File is empty, still return OK (strictly speaking, we should return 204 NO_CONTENT) @@ -136,7 +166,7 @@ def getFile(_fileid, acctok): # File is readable but statx failed? log.error('msg="GetFile: failed to stat after read, possible race" filename="%s" token="%s" error="%s"' % (acctok['filename'], flask.request.args['access_token'][-20:], e)) - return 'Failed to access file', http.client.INTERNAL_SERVER_ERROR + return utils.createJsonResponse({'message': 'Failed to access file'}, http.client.INTERNAL_SERVER_ERROR) # @@ -150,6 +180,7 @@ def setLock(fileid, reqheaders, acctok): validateTarget = reqheaders.get('X-WOPI-Validate-Target') retrievedLock, lockHolder = utils.retrieveWopiLock(fileid, op, lock, acctok) fn = acctok['filename'] + savetime = None try: # validate that the underlying file is still there (it might have been moved/deleted) @@ -158,34 +189,41 @@ def setLock(fileid, reqheaders, acctok): log.warning('msg="Error with target file" lockop="%s" filename="%s" token="%s" error="%s"' % (op.title(), fn, flask.request.args['access_token'][-20:], e)) if common.ENOENT_MSG in str(e): - return 'File not found', http.client.NOT_FOUND - return IO_ERROR, http.client.INTERNAL_SERVER_ERROR - - # perform the required checks for the validity of the new lock - if op == 'REFRESH_LOCK' and not retrievedLock: - if validateTarget: - # this is an extension of the API: a REFRESH_LOCK without previous lock but with a Validate-Target header - # is allowed provided that the target file was last saved by WOPI and not overwritten by external actions - # (cf. PutFile logic) - savetime = st.getxattr(acctok['endpoint'], fn, acctok['userid'], utils.LASTSAVETIMEKEY) - if savetime and (not savetime.isdigit() or int(savetime) < int(statInfo['mtime'])): - savetime = None - else: + return utils.createJsonResponse({'message': 'File not found'}, http.client.NOT_FOUND) + return utils.createJsonResponse({'message': IO_ERROR}, http.client.INTERNAL_SERVER_ERROR) + + if retrievedLock or op == 'REFRESH_LOCK': + # useful for later checks + savetime = st.getxattr(acctok['endpoint'], fn, acctok['userid'], utils.LASTSAVETIMEKEY) + if savetime and (not savetime.isdigit() or int(savetime) < int(statInfo['mtime'])): + # we had stale information, discard + log.warning('msg="Detected external modification" filename="%s" savetime="%s" mtime="%s" token="%s"' % + (fn, savetime, statInfo['mtime'], flask.request.args['access_token'][-20:])) savetime = None - if not savetime: - return utils.makeConflictResponse(op, acctok['userid'], None, lock, oldLock, fn, - 'The file was not locked' + ' and got modified' if validateTarget else '') - # now create an "external" lock if required + # perform the required checks for the validity of the new lock + if op == 'REFRESH_LOCK' and not retrievedLock and (not validateTarget or not savetime): + # validateTarget is an extension of the API: a REFRESH_LOCK without previous lock but with a Validate-Target header + # is allowed, provided that the target file was last saved by WOPI (i.e. savetime is valid) and not overwritten + # by other external actions (cf. PutFile logic) + return utils.makeConflictResponse(op, acctok['userid'], None, lock, oldLock, fn, + 'The file was not locked' + (' and got modified' if validateTarget else ''), + savetime=savetime) + + # now check for and create an "external" lock if required if srv.config.get('general', 'detectexternallocks', fallback='True').upper() == 'TRUE' and \ os.path.splitext(fn)[1] in srv.codetypes: try: + if retrievedLock == utils.EXTERNALLOCK: + return utils.makeConflictResponse(op, acctok['userid'], retrievedLock, lock, oldLock, + fn, 'The file is locked by ' + lockHolder, savetime=savetime) + # create a LibreOffice-compatible lock file for interoperability purposes, making sure to # not overwrite any existing or being created lock lockcontent = ',Collaborative Online Editor,%s,%s,WOPIServer;' % \ (srv.wopiurl, time.strftime('%d.%m.%Y %H:%M', time.localtime(time.time()))) st.writefile(acctok['endpoint'], utils.getLibreOfficeLockName(fn), acctok['userid'], - lockcontent, None, islock=True) + lockcontent, -1, None, islock=True) except IOError as e: if common.EXCL_ERROR in str(e): # retrieve the LibreOffice-compatible lock just found @@ -193,7 +231,7 @@ def setLock(fileid, reqheaders, acctok): retrievedlolock = next(st.readfile(acctok['endpoint'], utils.getLibreOfficeLockName(fn), acctok['userid'], None)) if isinstance(retrievedlolock, IOError): - raise retrievedlolock + raise retrievedlolock from e retrievedlolock = retrievedlolock.decode() # check that the lock is not stale if datetime.strptime(retrievedlolock.split(',')[3], '%d.%m.%Y %H:%M').timestamp() + \ @@ -207,7 +245,8 @@ def setLock(fileid, reqheaders, acctok): log.warning('msg="Valid LibreOffice lock found, denying WOPI lock" lockop="%s" filename="%s" holder="%s"' % (op.title(), fn, lockholder if lockholder else retrievedlolock)) reason = 'File locked by ' + ((lockholder + ' via LibreOffice') if lockholder else 'a LibreOffice user') - return utils.makeConflictResponse(op, acctok['userid'], 'External App', lock, oldLock, fn, reason) + return utils.makeConflictResponse(op, acctok['userid'], 'External App', lock, oldLock, + fn, reason, savetime=savetime) # else it's our previous lock or it had expired: all right, move on else: # any other error is logged but not raised as this is optimistically not blocking WOPI operations @@ -220,29 +259,17 @@ def setLock(fileid, reqheaders, acctok): # LOCK or REFRESH_LOCK: atomically set the lock to the given one, including the expiration time, # and return conflict response if the file was already locked st.setlock(acctok['endpoint'], fn, acctok['userid'], acctok['appname'], utils.encodeLock(lock)) - log.info('msg="Successfully locked" lockop="%s" filename="%s" token="%s" lock="%s"' % - (op.title(), fn, flask.request.args['access_token'][-20:], lock)) - # on first lock, set an xattr with the current time for later conflicts checking + # on first lock, set in addition an xattr with the current time for later conflicts checking if required try: - st.setxattr(acctok['endpoint'], fn, acctok['userid'], utils.LASTSAVETIMEKEY, - int(time.time()), utils.encodeLock(lock)) + st.setxattr(acctok['endpoint'], fn, acctok['userid'], utils.LASTSAVETIMEKEY, int(time.time()), + (acctok['appname'], utils.encodeLock(lock))) except IOError as e: # not fatal, but will generate a conflict file later on, so log a warning log.warning('msg="Unable to set lastwritetime xattr" lockop="%s" user="%s" filename="%s" token="%s" reason="%s"' % (op.title(), acctok['userid'][-20:], fn, flask.request.args['access_token'][-20:], e)) - # also, keep track of files that have been opened for write: this is for statistical purposes only - # (cf. the GetLock WOPI call and the /wopi/cbox/open/list action) - if fn not in srv.openfiles: - srv.openfiles[fn] = (time.asctime(), set([acctok['username']])) - else: - # the file was already opened but without lock: this happens on new files (cf. editnew action), just log - log.info('msg="First lock for new file" lockop="%s" user="%s" filename="%s" token="%s"' % - (op.title(), acctok['userid'][-20:], fn, flask.request.args['access_token'][-20:])) - resp = flask.Response() - resp.status_code = http.client.OK - resp.headers['X-WOPI-ItemVersion'] = 'v%s' % statInfo['etag'] - return resp + + return utils.makeLockSuccessResponse(op, acctok, lock, oldLock, f"v{statInfo['etag']}") except IOError as e: if common.EXCL_ERROR in str(e): @@ -250,31 +277,30 @@ def setLock(fileid, reqheaders, acctok): # get the lock that was set if not retrievedLock: retrievedLock, lockHolder = utils.retrieveWopiLock(fileid, op, lock, acctok) - if retrievedLock and not utils.compareWopiLocks(retrievedLock, (oldLock if oldLock else lock)): - # lock mismatch, the WOPI client is supposed to acknowledge the existing lock - # or deny write access to the file + # validate against either the given lock (RefreshLock case) or the given old lock (UnlockAndRelock case); + # in the context of the EXCL_ERROR case, retrievedLock may be None only if the storage is holding a user lock + if not retrievedLock or not utils.compareWopiLocks(retrievedLock, (oldLock if oldLock else lock)): + # lock mismatch, the WOPI client is supposed to acknowledge the existing lock to start a collab session, + # or deny access to the file in edit mode otherwise return utils.makeConflictResponse(op, acctok['userid'], retrievedLock, lock, oldLock, fn, 'The file is locked by %s' % - (lockHolder if lockHolder != 'wopi' else 'another online editor')) - # else it's our own lock, refresh it and return + (lockHolder if lockHolder else 'another editor'), + savetime=savetime) + + # else it's our own lock, refresh it (rechecking the oldLock if necessary, for atomicity) and return try: st.refreshlock(acctok['endpoint'], fn, acctok['userid'], acctok['appname'], - utils.encodeLock(lock)) - log.info('msg="Successfully refreshed" lockop="%s" filename="%s" token="%s" lock="%s"' % - (op.title(), fn, flask.request.args['access_token'][-20:], lock)) - # else we don't need to refresh it again - resp = flask.Response() - resp.status_code = http.client.OK - resp.headers['X-WOPI-ItemVersion'] = 'v%s' % statInfo['etag'] - return resp + utils.encodeLock(lock), utils.encodeLock(oldLock)) + return utils.makeLockSuccessResponse(op, acctok, lock, oldLock, f"v{statInfo['etag']}") except IOError as rle: # this is unexpected now log.error('msg="Failed to refresh lock" lockop="%s" filename="%s" token="%s" lock="%s" error="%s"' % (op.title(), fn, flask.request.args['access_token'][-20:], lock, rle)) + # any other error is raised log.error('msg="Unable to store WOPI lock" lockop="%s" filename="%s" token="%s" lock="%s" error="%s"' % (op.title(), fn, flask.request.args['access_token'][-20:], lock, e)) - return IO_ERROR, http.client.INTERNAL_SERVER_ERROR + return utils.createJsonResponse({'message': IO_ERROR}, http.client.INTERNAL_SERVER_ERROR) def getLock(fileid, _reqheaders_unused, acctok): @@ -283,23 +309,6 @@ def getLock(fileid, _reqheaders_unused, acctok): lock, _ = utils.retrieveWopiLock(fileid, 'GETLOCK', '', acctok) resp.status_code = http.client.OK if lock else http.client.NOT_FOUND resp.headers['X-WOPI-Lock'] = lock if lock else '' - # for statistical purposes, check whether a lock exists and update internal bookkeeping - if lock and lock != 'External': - try: - # the file was already opened for write, check whether this is a new user - if not acctok['username'] in srv.openfiles[acctok['filename']][1]: - # yes it's a new user - srv.openfiles[acctok['filename']][1].add(acctok['username']) - if len(srv.openfiles[acctok['filename']][1]) > 1: - # for later monitoring, explicitly log that this file is being edited by at least two users - log.info('msg="Collaborative editing detected" filename="%s" token="%s" users="%s"' % - (acctok['filename'], flask.request.args['access_token'][-20:], - list(srv.openfiles[acctok['filename']][1]))) - except KeyError: - # existing lock but missing srv.openfiles[acctok['filename']] ? - log.warning('msg="Repopulating missing metadata" filename="%s" token="%s" friendlyname="%s"' % - (acctok['filename'], flask.request.args['access_token'][-20:], acctok['username'])) - srv.openfiles[acctok['filename']] = (time.asctime(), set([acctok['username']])) return resp @@ -309,7 +318,7 @@ def unlock(fileid, reqheaders, acctok): retrievedLock, _ = utils.retrieveWopiLock(fileid, 'UNLOCK', lock, acctok) if not utils.compareWopiLocks(retrievedLock, lock): return utils.makeConflictResponse('UNLOCK', acctok['userid'], retrievedLock, lock, 'NA', - acctok['filename'], 'Lock mismatch') + acctok['filename'], 'Lock mismatch unlocking file') # OK, the lock matches, remove it try: # validate that the underlying file is still there @@ -317,8 +326,8 @@ def unlock(fileid, reqheaders, acctok): st.unlock(acctok['endpoint'], acctok['filename'], acctok['userid'], acctok['appname'], utils.encodeLock(lock)) except IOError as e: if common.ENOENT_MSG in str(e): - return 'File not found', http.client.NOT_FOUND - return IO_ERROR, http.client.INTERNAL_SERVER_ERROR + return utils.createJsonResponse({'message': 'File not found'}, http.client.NOT_FOUND) + return utils.createJsonResponse({'message': IO_ERROR}, http.client.INTERNAL_SERVER_ERROR) if srv.config.get('general', 'detectexternallocks', fallback='True').upper() == 'TRUE': # and os.path.splitext(acctok['filename'])[1] in srv.codetypes: @@ -330,15 +339,22 @@ def unlock(fileid, reqheaders, acctok): # ignore, it's not worth to report anything here pass - # and update our internal list of opened files + # and update our internal lists of opened files and conflicted sessions try: del srv.openfiles[acctok['filename']] + session = flask.request.headers.get('X-WOPI-SessionId') + if session in srv.conflictsessions['pending']: + s = srv.conflictsessions['pending'].pop(session) + srv.conflictsessions['resolved'][session] = { + 'user': s['user'], + 'restime': int(time.time() - int(s['time'])) + } except KeyError: # already removed? pass resp = flask.Response() resp.status_code = http.client.OK - resp.headers['X-WOPI-ItemVersion'] = 'v%s' % statInfo['etag'] + resp.headers['X-WOPI-ItemVersion'] = f"v{statInfo['etag']}" return resp @@ -349,18 +365,20 @@ def putRelative(fileid, reqheaders, acctok): overwriteTarget = str(reqheaders.get('X-WOPI-OverwriteRelativeTarget')).upper() == 'TRUE' log.info('msg="PutRelative" user="%s" filename="%s" fileid="%s" suggTarget="%s" relTarget="%s" ' 'overwrite="%r" wopitimestamp="%s" token="%s"' % - (acctok['userid'], acctok['filename'], fileid, suggTarget, relTarget, + (acctok['userid'][-20:], acctok['filename'], fileid, suggTarget, relTarget, overwriteTarget, reqheaders.get('X-WOPI-TimeStamp'), flask.request.args['access_token'][-20:])) - # either one xor the other must be present; note we can't use `^` as we have a mix of str and NoneType + # either one xor the other MUST be present; note we can't use `^` as we have a mix of str and NoneType if (suggTarget and relTarget) or (not suggTarget and not relTarget): - return '', http.client.NOT_IMPLEMENTED + return utils.createJsonResponse({'message': 'Conflicting headers given'}, http.client.BAD_REQUEST) + else: + targetName = os.path.dirname(acctok['filename']) if suggTarget: # the suggested target is a UTF7-encoded (!) filename that can be changed to avoid collisions suggTarget = suggTarget.encode().decode('utf-7') if suggTarget[0] == '.': # we just have the extension here - targetName = os.path.splitext(acctok['filename'])[0] + suggTarget + targetName += os.path.basename(os.path.splitext(acctok['filename'])[0]) + suggTarget else: - targetName = os.path.dirname(acctok['filename']) + os.path.sep + suggTarget + targetName += os.path.sep + suggTarget # check for existence of the target file and adjust until a non-existing one is obtained while True: try: @@ -373,13 +391,13 @@ def putRelative(fileid, reqheaders, acctok): # OK, the targetName is good to go break # we got another error with this file, fail - log.warning('msg="PutRelative" user="%s" filename="%s" token="%s" suggTarget="%s" error="%s"' % - (acctok['userid'][-20:], targetName, flask.request.args['access_token'][-20:], - suggTarget, str(e))) - return '', http.client.BAD_REQUEST + log.error('msg="Error in PutRelative" user="%s" filename="%s" token="%s" suggTarget="%s" error="%s"' % + (acctok['userid'][-20:], targetName, flask.request.args['access_token'][-20:], + suggTarget, str(e))) + return utils.createJsonResponse({'message': 'Error with the given target'}, http.client.INTERNAL_SERVER_ERROR) else: # the relative target is a UTF7-encoded filename to be respected, and that may overwrite an existing file - relTarget = os.path.dirname(acctok['filename']) + os.path.sep + relTarget.encode().decode('utf-7') # make full path + relTarget = targetName + os.path.sep + relTarget.encode().decode('utf-7') # make full path try: # check for file existence statInfo = st.statx(acctok['endpoint'], relTarget, acctok['userid']) @@ -387,41 +405,79 @@ def putRelative(fileid, reqheaders, acctok): retrievedTargetLock, _ = utils.retrieveWopiLock(fileid, 'PUT_RELATIVE', None, acctok, overridefn=relTarget) # deny if lock is valid or if overwriteTarget is False if not overwriteTarget or retrievedTargetLock: - return utils.makeConflictResponse('PUT_RELATIVE', acctok['userid'], retrievedTargetLock, 'NA', 'NA', relTarget, { + respmd = { 'message': 'Target file already exists', # specs (the WOPI validator) require these to be populated with valid values 'Name': os.path.basename(relTarget), 'Url': utils.generateWopiSrc(statInfo['inode'], acctok['appname'] == srv.proxiedappname), - }) + } + return utils.makeConflictResponse('PUT_RELATIVE', acctok['userid'], retrievedTargetLock, 'NA', 'NA', + relTarget, respmd) except IOError: + # optimistically assume we're clear pass - # else we can use the relative target targetName = relTarget + # either way, we now have a targetName to save the file: attempt to do so try: utils.storeWopiFile(acctok, None, utils.LASTSAVETIMEKEY, targetName) except IOError as e: - utils.storeForRecovery(flask.request.get_data(), acctok['username'], targetName, - flask.request.args['access_token'][-20:], e) - return IO_ERROR, http.client.INTERNAL_SERVER_ERROR + if str(e) != common.ACCESS_ERROR: + return utils.createJsonResponse({'message': IO_ERROR}, http.client.INTERNAL_SERVER_ERROR) + raisenoaccess = True + # make an attempt in the user's home if possible: that would be allowed for regular (authenticated) users + # when the target is a single file r/w share + if utils.UserType(acctok['usertype']) == utils.UserType.REGULAR: + targetName = srv.homepath.replace('user_initial', acctok['wopiuser'][0]). \ + replace('username', acctok['wopiuser'].split('!')[0]) \ + + os.path.sep + os.path.basename(targetName) # noqa: E131 + log.info('msg="PutRelative: set homepath as destination" user="%s" filename="%s" target="%s" token="%s"' % + (acctok['userid'][-20:], acctok['filename'], targetName, flask.request.args['access_token'][-20:])) + try: + utils.storeWopiFile(acctok, None, utils.LASTSAVETIMEKEY, targetName) + raisenoaccess = False + except IOError: + # at this point give up and return error + pass + if raisenoaccess: + # UNAUTHORIZED may seem better but the WOPI validator tests explicitly expect NOT_IMPLEMENTED + return utils.createJsonResponse({'message': 'Unauthorized to perform PutRelative'}, http.client.NOT_IMPLEMENTED) + # generate an access token for the new file log.info('msg="PutRelative: generating new access token" user="%s" filename="%s" ' 'mode="ViewMode.READ_WRITE" friendlyname="%s"' % (acctok['userid'][-20:], targetName, acctok['username'])) - inode, newacctok = utils.generateAccessToken(acctok['userid'], targetName, utils.ViewMode.READ_WRITE, - (acctok['username'], acctok['wopiuser']), - acctok['folderurl'], acctok['endpoint'], - (acctok['appname'], acctok['appediturl'], acctok['appviewurl'])) + inode, newacctok, _ = utils.generateAccessToken(acctok['userid'], targetName, utils.ViewMode.READ_WRITE, + (acctok['username'], acctok['wopiuser'], utils.UserType(acctok['usertype'])), + acctok['folderurl'], acctok['endpoint'], + (acctok['appname'], acctok['appediturl'], acctok['appviewurl']), + acctok.get('trace', 'N/A')) # prepare and send the response as JSON - putrelmd = {} - putrelmd['Name'] = os.path.basename(targetName) - newwopisrc = '%s&access_token=%s' % (utils.generateWopiSrc(inode, acctok['appname'] == srv.proxiedappname), newacctok) - putrelmd['Url'] = url_unquote(newwopisrc).replace('&access_token', '?access_token') - putrelmd['HostEditUrl'] = '%s%s%s' % (acctok['appediturl'], '&' if '?' in acctok['appediturl'] else '?', newwopisrc) - putrelmd['HostViewUrl'] = '%s%s%s' % (acctok['appviewurl'], '&' if '?' in acctok['appediturl'] else '?', newwopisrc) + _, newfileid = common.decodeinode(inode) + mdforhosturls = { + 'appname': acctok['appname'], + 'filename': targetName, + 'endpoint': acctok['endpoint'], + 'fileid': newfileid, + } + newwopisrc = f"{utils.generateWopiSrc(inode, acctok['appname'] == srv.proxiedappname)}&access_token={newacctok}" + putrelmd = { + 'Name': os.path.basename(targetName), + 'Url': url_unquote(newwopisrc).replace('&access_token', '?access_token'), + } + hosteurl = srv.config.get('general', 'hostediturl', fallback=None) + if hosteurl: + putrelmd['HostEditUrl'] = utils.generateUrlFromTemplate(hosteurl, mdforhosturls) + else: + putrelmd['HostEditUrl'] = f"{acctok['appediturl']}{'&' if '?' in acctok['appediturl'] else '?'}{newwopisrc}" + hostvurl = srv.config.get('general', 'hostviewurl', fallback=None) + if hostvurl: + putrelmd['HostViewUrl'] = utils.generateUrlFromTemplate(hostvurl, mdforhosturls) + else: + putrelmd['HostViewUrl'] = f"{acctok['appviewurl']}{'&' if '?' in acctok['appviewurl'] else '?'}{newwopisrc}" resp = flask.Response(json.dumps(putrelmd), mimetype='application/json') putrelmd['Url'] = putrelmd['HostEditUrl'] = putrelmd['HostViewUrl'] = '_redacted_' - log.info('msg="PutRelative response" token="%s" metadata="%s"' % (newacctok[-20:], putrelmd)) + log.info(f'msg="PutRelative response" token="{newacctok[-20:]}" metadata="{putrelmd}"') return resp @@ -430,16 +486,16 @@ def deleteFile(fileid, _reqheaders_unused, acctok): retrievedLock, _ = utils.retrieveWopiLock(fileid, 'DELETE', '', acctok) if retrievedLock is not None: # file is locked and cannot be deleted - return utils.makeConflictResponse('DELETE', acctok['userid'], retrievedLock, 'NA', 'NA', acctok['filename'], - 'Cannot delete a locked file') + return utils.makeConflictResponse('DELETE', acctok['userid'], retrievedLock, 'NA', 'NA', + acctok['filename'], 'Cannot delete a locked file') try: st.removefile(acctok['endpoint'], acctok['filename'], acctok['userid']) - return 'OK', http.client.OK + return utils.createJsonResponse({'message': 'OK'}, http.client.OK) except IOError as e: if common.ENOENT_MSG in str(e): - return 'File not found', http.client.NOT_FOUND - log.info('msg="DeleteFile" token="%s" error="%s"' % (flask.request.args['access_token'][-20:], e)) - return IO_ERROR, http.client.INTERNAL_SERVER_ERROR + return utils.createJsonResponse({'message': 'File not found'}, http.client.NOT_FOUND) + log.error(f"msg=\"DeleteFile\" token=\"{flask.request.args['access_token'][-20:]}\" error=\"{e}\"") + return utils.createJsonResponse({'message': IO_ERROR}, http.client.INTERNAL_SERVER_ERROR) def renameFile(fileid, reqheaders, acctok): @@ -451,31 +507,42 @@ def renameFile(fileid, reqheaders, acctok): except KeyError as e: log.warning('msg="Missing argument" client="%s" requestedUrl="%s" error="%s" token="%s"' % (flask.request.remote_addr, flask.request.base_url, e, flask.request.args.get('access_token')[-20:])) - return 'Missing argument', http.client.BAD_REQUEST - lock = reqheaders.get('X-WOPI-Lock') + return utils.createJsonResponse({'message': 'Missing argument'}, http.client.BAD_REQUEST) + lock = reqheaders.get('X-WOPI-Lock') # may not be specified retrievedLock, _ = utils.retrieveWopiLock(fileid, 'RENAMEFILE', lock, acctok) if retrievedLock is not None and not utils.compareWopiLocks(retrievedLock, lock): - return utils.makeConflictResponse('RENAMEFILE', acctok['userid'], retrievedLock, lock, 'NA', acctok['filename']) + return utils.makeConflictResponse('RENAMEFILE', acctok['userid'], retrievedLock, lock, 'NA', + acctok['filename'], 'Lock mismatch renaming file') try: # the destination name comes without base path and typically without extension targetName = os.path.dirname(acctok['filename']) + os.path.sep + targetName \ + os.path.splitext(acctok['filename'])[1] if targetName.find('.') < 0 else '' log.info('msg="RenameFile" user="%s" filename="%s" token="%s" targetname="%s"' % (acctok['userid'][-20:], acctok['filename'], flask.request.args['access_token'][-20:], targetName)) - st.renamefile(acctok['endpoint'], acctok['filename'], targetName, acctok['userid'], utils.encodeLock(retrievedLock)) - # also rename the lock if applicable + + # try to rename and pass the lock if present. Note that WOPI specs do not require files to be locked + # on rename operations, but the backend may still fail as renames may be implemented as copy + delete, + # which may require to pass a lock. + lockmd = (acctok['appname'], utils.encodeLock(retrievedLock)) if retrievedLock else None + st.renamefile(acctok['endpoint'], acctok['filename'], targetName, acctok['userid'], lockmd) + # also rename the LO lock if applicable if os.path.splitext(acctok['filename'])[1] in srv.codetypes: st.renamefile(acctok['endpoint'], utils.getLibreOfficeLockName(acctok['filename']), utils.getLibreOfficeLockName(targetName), acctok['userid'], None) # send the response as JSON return flask.Response(json.dumps(renamemd), mimetype='application/json') except IOError as e: - if common.ENOENT_MSG in str(e): - return 'File not found', http.client.NOT_FOUND - log.info('msg="RenameFile" token="%s" error="%s"' % (flask.request.args['access_token'][-20:], e)) + log.warn(f"msg=\"RenameFile\" token=\"{flask.request.args['access_token'][-20:]}\" error=\"{e}\"") resp = flask.Response() - resp.headers['X-WOPI-InvalidFileNameError'] = 'Failed to rename: %s' % e - resp.status_code = http.client.BAD_REQUEST + if common.ENOENT_MSG in str(e): + resp.headers['X-WOPI-InvalidFileNameError'] = 'File not found' + resp.status_code = http.client.NOT_FOUND + elif common.EXCL_ERROR in str(e): + resp.headers['X-WOPI-InvalidFileNameError'] = 'Cannot rename/move unlocked file' + resp.status_code = http.client.NOT_IMPLEMENTED + else: + resp.headers['X-WOPI-InvalidFileNameError'] = f'Failed to rename: {e}' + resp.status_code = http.client.INTERNAL_SERVER_ERROR return resp @@ -490,21 +557,17 @@ def _createNewFile(fileid, acctok): raise IOError log.warning('msg="PutFile" error="File exists but no WOPI lock provided" filename="%s" token="%s"' % (acctok['filename'], flask.request.args['access_token'][-20:])) - return 'File exists', http.client.CONFLICT + return utils.createJsonResponse({'message': 'File exists'}, http.client.CONFLICT) except IOError: # indeed the file did not exist, so we write it for the first time try: utils.storeWopiFile(acctok, None, utils.LASTSAVETIMEKEY) log.info('msg="File stored successfully" action="editnew" user="%s" filename="%s" token="%s"' % (acctok['userid'][-20:], acctok['filename'], flask.request.args['access_token'][-20:])) - # and we keep track of it as an open file with timestamp = Epoch, despite not having any lock yet. - # XXX this is to work around an issue with concurrent editing of newly created files (cf. iopOpen) - srv.openfiles[acctok['filename']] = ('0', set([acctok['username']])) - return 'OK', http.client.OK + return utils.createJsonResponse({'message': 'OK'}, http.client.OK) except IOError as e: - utils.storeForRecovery(flask.request.get_data(), acctok['username'], acctok['filename'], - flask.request.args['access_token'][-20:], e) - return IO_ERROR, http.client.INTERNAL_SERVER_ERROR + utils.storeForRecovery(acctok['wopiuser'], acctok['filename'], flask.request.args['access_token'][-20:], e) + return utils.createJsonResponse({'message': IO_ERROR}, http.client.INTERNAL_SERVER_ERROR) def putFile(fileid, acctok): @@ -516,42 +579,69 @@ def putFile(fileid, acctok): lock = flask.request.headers['X-WOPI-Lock'] retrievedLock, lockHolder = utils.retrieveWopiLock(fileid, 'PUTFILE', lock, acctok) if retrievedLock is None: - return utils.makeConflictResponse('PUTFILE', acctok['userid'], retrievedLock, lock, 'NA', acctok['filename'], - 'Cannot overwrite unlocked file') + return utils.makeConflictResponse('PUTFILE', acctok['userid'], retrievedLock, lock, 'NA', + acctok['filename'], 'Cannot overwrite unlocked file') + if retrievedLock == utils.EXTERNALLOCK: + # this should not happen and we must fail, yet we save the file as conflict for the user to recover it + log.error('msg="Detected external lock, forcing conflict" user="%s" filename="%s" tocken="%s"' % + (acctok['userid'][-20:], acctok['filename'], flask.request.args['access_token'][-20:])) + return utils.storeAfterConflict(acctok, retrievedLock, lock, f'Cannot overwrite file edited by {lockHolder}') if not utils.compareWopiLocks(retrievedLock, lock): - log.warning('msg="Forcing conflict based on external lock" user="%s" filename="%s" token="%s"' % - (acctok['userid'][-20:], acctok['filename'], flask.request.args['access_token'][-20:])) - return utils.storeAfterConflict(acctok, retrievedLock, lock, 'Cannot overwrite file locked by %s' % - (lockHolder if lockHolder != 'wopi' else 'another application')) - # OK, we can save the file now + # the save operation is to be refused, but we should get a subsequent PutFile call with the correct lock, given that + # the current lock is from WOPI; yet we keep the file in the recovery area in case the error turned out to be real + utils.storeForRecovery(acctok['wopiuser'], acctok['filename'], flask.request.args['access_token'][-20:], + 'Mismatched lock on PutFile') + return utils.makeConflictResponse('PUTFILE', acctok['userid'], retrievedLock, lock, 'NA', + acctok['filename'], f'Cannot overwrite file locked by {lockHolder}') + + # OK, we can save the file: check the destination file against conflicts if required log.info('msg="PutFile" user="%s" filename="%s" fileid="%s" action="edit" token="%s"' % (acctok['userid'][-20:], acctok['filename'], fileid, flask.request.args['access_token'][-20:])) try: - # check now the destination file against conflicts - savetime = st.getxattr(acctok['endpoint'], acctok['filename'], acctok['userid'], utils.LASTSAVETIMEKEY) - mtime = None - mtime = st.stat(acctok['endpoint'], acctok['filename'], acctok['userid'])['mtime'] - if savetime and savetime.isdigit() and int(savetime) >= int(mtime): - # Go for overwriting the file. Note that the entire check+write operation should be atomic, - # but the previous checks still give the opportunity of a race condition. We just live with it. - # Also, note we can't get a time resolution better than one second! - # Anyhow, the EFSS should support versioning for such cases. - utils.storeWopiFile(acctok, retrievedLock, utils.LASTSAVETIMEKEY) - log.info('msg="File stored successfully" action="edit" user="%s" filename="%s" token="%s"' % - (acctok['userid'][-20:], acctok['filename'], flask.request.args['access_token'][-20:])) - statInfo = st.statx(acctok['endpoint'], acctok['filename'], acctok['userid'], versioninv=1) - resp = flask.Response() - resp.status_code = http.client.OK - resp.headers['X-WOPI-ItemVersion'] = 'v%s' % statInfo['etag'] - return resp + if srv.config.get('general', 'detectexternalmodifications', fallback='True').upper() == 'TRUE': + # check now the destination file against conflicts if required + savetime = st.getxattr(acctok['endpoint'], acctok['filename'], acctok['userid'], utils.LASTSAVETIMEKEY) + mtime = None + mtime = st.stat(acctok['endpoint'], acctok['filename'], acctok['userid'])['mtime'] + if not savetime or not savetime.isdigit() or int(savetime) < int(mtime): + # no xattr was there or we got our xattr but mtime is more recent: someone may have updated the file from + # a different source (e.g. FUSE or SMB mount), therefore force conflict and return failure to the application + log.warning('msg="Detected external modification, forcing conflict" user="%s" filename="%s" ' + 'savetime="%s" mtime="%s" token="%s"' % + (acctok['userid'][-20:], acctok['filename'], savetime, mtime, + flask.request.args['access_token'][-20:])) + return utils.storeAfterConflict(acctok, utils.EXTERNALLOCK, lock, + 'The file being edited got moved or overwritten') + + # Go for overwriting the file. Note that the entire check+write operation should be atomic, + # but the previous checks still give the opportunity of a race condition. We just live with it. + # Also, note we can't get a time resolution better than one second! + # Anyhow, the EFSS should support versioning for such cases. + utils.storeWopiFile(acctok, retrievedLock, utils.LASTSAVETIMEKEY) + statInfo = st.statx(acctok['endpoint'], acctok['filename'], acctok['userid'], versioninv=1) + log.info('msg="File stored successfully" action="edit" user="%s" filename="%s" version="%s" token="%s"' % + (acctok['userid'][-20:], acctok['filename'], statInfo['etag'], flask.request.args['access_token'][-20:])) + resp = flask.Response() + resp.status_code = http.client.OK + resp.headers['X-WOPI-ItemVersion'] = f"v{statInfo['etag']}" + return resp + + except IOError as e: + utils.storeForRecovery(acctok['wopiuser'], acctok['filename'], flask.request.args['access_token'][-20:], e) + return utils.createJsonResponse({'message': IO_ERROR}, http.client.INTERNAL_SERVER_ERROR) + +def putUserInfo(fileid, reqbody, acctok): + '''Implements the PutUserInfo WOPI call''' + try: + lockmd = st.getlock(acctok['endpoint'], acctok['filename'], acctok['userid']) + lockmd = (acctok['appname'], utils.encodeLock(lockmd)) if lockmd else None + st.setxattr(acctok['endpoint'], acctok['filename'], acctok['userid'], + utils.USERINFOKEY + '.' + acctok['wopiuser'].split('!')[0], reqbody.decode(), lockmd) + log.info('msg="PutUserInfo" user="%s" filename="%s" fileid="%s" token="%s"' % + (acctok['userid'][-20:], acctok['filename'], fileid, flask.request.args['access_token'][-20:])) + return utils.createJsonResponse({'message': 'OK'}, http.client.OK) except IOError as e: - utils.storeForRecovery(flask.request.get_data(), acctok['username'], acctok['filename'], - flask.request.args['access_token'][-20:], e) - return IO_ERROR, http.client.INTERNAL_SERVER_ERROR - - # no xattr was there or we got our xattr but mtime is more recent: someone may have updated the file - # from a different source (e.g. FUSE or SMB mount), therefore force conflict and return failure to the application - log.warning('msg="Forcing conflict based on save time" user="%s" filename="%s" savetime="%s" lastmtime="%s" token="%s"' % - (acctok['userid'][-20:], acctok['filename'], savetime, mtime, flask.request.args['access_token'][-20:])) - return utils.storeAfterConflict(acctok, 'External', lock, 'The file being edited got moved or overwritten') + log.error('msg="PutUserInfo failed" filename="%s" error="%s" token="%s"' % + (acctok['filename'], e, flask.request.args['access_token'][-20:])) + return utils.createJsonResponse({'message': IO_ERROR}, http.client.INTERNAL_SERVER_ERROR) diff --git a/src/core/wopiutils.py b/src/core/wopiutils.py index dc6a33ab..0c191363 100644 --- a/src/core/wopiutils.py +++ b/src/core/wopiutils.py @@ -27,12 +27,20 @@ # this is the xattr key used for conflicts resolution on the remote storage LASTSAVETIMEKEY = 'iop.wopi.lastwritetime' +# this is the xattr key used to store user info data from WOPI apps +USERINFOKEY = 'iop.wopi.userinfo' + +# header used by reverse proxies such as traefik to pass the real remote IP address +REALIPHEADER = 'X-Real-IP' + +# conventional string representing an external, non-WOPI lock +EXTERNALLOCK = 'External' + # convenience references to global entities st = None srv = None log = None WOPIVER = None -endpoints = {} class ViewMode(Enum): @@ -43,8 +51,25 @@ class ViewMode(Enum): VIEW_ONLY = "VIEW_MODE_VIEW_ONLY" # The file can be downloaded READ_ONLY = "VIEW_MODE_READ_ONLY" - # The file can be downloaded and updated + # The file can be downloaded and updated, and the app should be shown in edit mode READ_WRITE = "VIEW_MODE_READ_WRITE" + # The file can be downloaded and updated, and the app should be shown in preview mode + PREVIEW = "VIEW_MODE_PREVIEW" + + +class UserType(Enum): + '''App user types as given by + https://github.com/cs3org/reva/blob/master/pkg/app/provider/wopi/wopi.go + ''' + INVALID = "invalid" + # regular user, logged in the local ID provider + REGULAR = "regular" + # federated/external user, logged in the local ID provider but with no home space + FEDERATED = "federated" + # OCM user, logged in a remote ID provider + OCM = "ocm" + # anonymous user, accessing a public link + ANONYMOUS = "anonymous" class JsonLogger: @@ -69,15 +94,17 @@ def facade(*args, **kwargs): m = f[f.rfind('/') + 1:] try: # as we use a `key="value" ...` format in all logs, we only have args[0] - payload = 'module="%s" %s ' % (m, args[0]) + payload = f'module="{m}" {args[0]} ' # now convert the payload to a dictionary assuming no `="` nor `" ` is present inside any key or value! # the added trailing space matches the `" ` split, so we remove the last element of that list payload = dict([tuple(kv.split('="')) for kv in payload.split('" ')[:-1]]) # then convert dict -> json -> str + strip `{` and `}` payload = str(json.dumps(payload))[1:-1] except Exception: # pylint: disable=broad-except - # if the above assumptions do not hold, just json-escape the original log - payload = '"module": "%s", "payload": "%s"' % (m, json.dumps(args[0])) + # if the above assumptions do not hold, just json-escape the original log and add debug info + exc_type, exc_obj, tb = sys.exc_info() + payload = f'"module": "{m}", "payload": "{json.dumps(args[0])}", "' + \ + f'"loggerex": "{exc_type}: {exc_obj} at L{tb.tb_lineno}"' args = (payload,) # pass-through facade return getattr(self.logger, name)(*args, **kwargs) @@ -88,7 +115,8 @@ def logGeneralExceptionAndReturn(ex, req): '''Convenience function to log a stack trace and return HTTP 500''' ex_type, ex_value, ex_traceback = sys.exc_info() log.critical('msg="Unexpected exception caught" exception="%s" type="%s" traceback="%s" client="%s" requestedUrl="%s"' % - (ex, ex_type, traceback.format_exception(ex_type, ex_value, ex_traceback), req.remote_addr, req.url)) + (ex, ex_type, traceback.format_exception(ex_type, ex_value, ex_traceback), + flask.request.headers.get(REALIPHEADER, flask.request.remote_addr), req.url)) return 'Internal error, please contact support', http.client.INTERNAL_SERVER_ERROR @@ -98,11 +126,12 @@ def validateAndLogHeaders(op): # validate the access token try: acctok = jwt.decode(flask.request.args['access_token'], srv.wopisecret, algorithms=['HS256']) - if acctok['exp'] < time.time(): + if acctok['exp'] < time.time() or 'cs3org:wopiserver' not in acctok['iss']: raise jwt.exceptions.ExpiredSignatureError - except (jwt.exceptions.DecodeError, jwt.exceptions.ExpiredSignatureError) as e: - log.info('msg="Expired or malformed token" client="%s" requestedUrl="%s" error="%s" token="%s"' % - (flask.request.remote_addr, flask.request.base_url, str(type(e)) + ': ' + str(e), flask.request.args['access_token'])) + except (jwt.exceptions.DecodeError, jwt.exceptions.ExpiredSignatureError, KeyError) as e: + log.info('msg="Expired or malformed token" client="%s" requestedUrl="%s" details="%s" token="%s"' % + (flask.request.headers.get(REALIPHEADER, flask.request.remote_addr), flask.request.base_url, + str(type(e)) + ': ' + str(e), flask.request.args.get('access_token'))) return 'Invalid access token', http.client.UNAUTHORIZED # validate the WOPI timestamp: this is typically not present, but if it is we must check its expiration @@ -115,33 +144,49 @@ def validateAndLogHeaders(op): # timestamps older than 20 minutes must be considered expired raise ValueError except ValueError: - log.warning('msg="%s: invalid X-WOPI-Timestamp" user="%s" filename="%s" request="%s"' % - (op, acctok['userid'][-20:], acctok['filename'], flask.request.__dict__)) + log.warning('msg="%s: invalid X-WOPI-Timestamp" user="%s" token="%s" client="%s"' % + (op, acctok['userid'][-20:], flask.request.args['access_token'][-20:], + flask.request.headers.get(REALIPHEADER, flask.request.remote_addr))) # UNAUTHORIZED would seem more appropriate here, but the ProofKeys part of the MS test suite explicitly requires this return 'Invalid or expired X-WOPI-Timestamp header', http.client.INTERNAL_SERVER_ERROR # log all relevant headers to help debugging - log.debug('msg="%s: client context" user="%s" filename="%s" token="%s" client="%s" deviceId="%s" reqId="%s" sessionId="%s" ' - 'app="%s" appEndpoint="%s" correlationId="%s" wopits="%s"' % - (op.title(), acctok['userid'][-20:], acctok['filename'], - flask.request.args['access_token'][-20:], flask.request.remote_addr, + session = flask.request.headers.get('X-WOPI-SessionId') + log.debug('msg="%s: client context" trace="%s" user="%s" filename="%s" token="%s" client="%s" deviceId="%s" reqId="%s" ' + 'sessionId="%s" app="%s" appEndpoint="%s" correlationId="%s" wopits="%s"' % + (op.title(), acctok.get('trace', 'N/A'), acctok['userid'][-20:], acctok['filename'], + flask.request.args['access_token'][-20:], flask.request.headers.get(REALIPHEADER, flask.request.remote_addr), flask.request.headers.get('X-WOPI-DeviceId'), flask.request.headers.get('X-Request-Id'), - flask.request.headers.get('X-WOPI-SessionId'), flask.request.headers.get('X-WOPI-RequestingApplication'), + session, flask.request.headers.get('X-WOPI-RequestingApplication'), flask.request.headers.get('X-WOPI-AppEndpoint'), flask.request.headers.get('X-WOPI-CorrelationId'), wopits)) + + # update bookkeeping of pending sessions + if op.title() == 'Checkfileinfo' and session in srv.conflictsessions['pending'] and \ + int(srv.conflictsessions['pending'][session]['time']) < time.time() - 30: + # a previously conflicted session is still around executing Checkfileinfo after some time, assume it got resolved + _resolveSession(session, acctok['filename']) return acctok, None def generateWopiSrc(fileid, proxy=False): '''Returns a URL-encoded WOPISrc for the given fileid, proxied if required.''' if not proxy or not srv.wopiproxy: - return url_quote_plus('%s/wopi/files/%s' % (srv.wopiurl, fileid)).replace('-', '%2D') + return url_quote_plus(f'{srv.wopiurl}/wopi/files/{fileid}').replace('-', '%2D') # proxy the WOPI request through an external WOPI proxy service, but only if it was not already proxied - if len(fileid) < 50: # heuristically, proxied fileids are (much) longer than that - log.debug('msg="Generating proxied fileid" fileid="%s" proxy="%s"' % (fileid, srv.wopiproxy)) + if len(fileid) < 90: # heuristically, proxied fileids are (much) longer than that + log.debug(f'msg="Generating proxied fileid" fileid="{fileid}" proxy="{srv.wopiproxy}"') fileid = jwt.encode({'u': srv.wopiurl + '/wopi/files/', 'f': fileid}, srv.wopiproxykey, algorithm='HS256') else: - log.debug('msg="Proxied fileid already created" fileid="%s" proxy="%s"' % (fileid, srv.wopiproxy)) - return url_quote_plus('%s/wopi/files/%s' % (srv.wopiproxy, fileid)).replace('-', '%2D') + log.debug(f'msg="Proxied fileid already created" fileid="{fileid}" proxy="{srv.wopiproxy}"') + return url_quote_plus(f'{srv.wopiproxy}/wopi/files/{fileid}').replace('-', '%2D') + + +def generateUrlFromTemplate(url, acctok): + '''One-liner to parse an URL template and return it with actualised placeholders''' + return url.replace('', acctok['filename']). \ + replace('', acctok['endpoint']). \ + replace('', acctok['fileid']). \ + replace('', acctok['appname']) def getLibreOfficeLockName(filename): @@ -166,49 +211,44 @@ def randomString(size): return ''.join([choice(ascii_lowercase) for _ in range(size)]) -def generateAccessToken(userid, fileid, viewmode, user, folderurl, endpoint, app): +def generateAccessToken(userid, fileid, viewmode, user, folderurl, endpoint, app, trace): '''Generates an access token for a given file and a given user, and returns a tuple with the file's inode and the URL-encoded access token.''' appname, appediturl, appviewurl = app - username, wopiuser = user + friendlyname, wopiuser, usertype = user # wopiuser has the form `username!userid_in_stat_format` log.debug('msg="Generating token" userid="%s" fileid="%s" endpoint="%s" app="%s"' % (userid[-20:], fileid, endpoint, appname)) try: - # stat the file to check for existence and get a version-invariant inode and modification time: - # the inode serves as fileid (and must not change across save operations), the mtime is used for version information. + # stat the file to check for existence and get a version-invariant inode: + # the inode serves as fileid (and must not change across save operations) statinfo = st.statx(endpoint, fileid, userid) except IOError as e: - log.info('msg="Requested file not found or not a file" fileid="%s" error="%s"' % (fileid, e)) + log.info(f'msg="Requested file not found or not a file" fileid="{fileid}" error="{e}"') raise - exptime = int(time.time()) + srv.tokenvalidity + exptime = int(time.time()) + srv.config.getint('general', 'tokenvalidity') fext = os.path.splitext(statinfo['filepath'])[1].lower() - if not appediturl: - # deprecated: for backwards compatibility, work out the URLs from the discovered app endpoints - try: - appediturl = endpoints[fext]['edit'] - appviewurl = endpoints[fext]['view'] - except KeyError: - log.critical('msg="No app URLs registered for the given file type" fileext="%s" mimetypescount="%d"' % - (fext, len(endpoints) if endpoints else 0)) - raise IOError if srv.config.get('general', 'disablemswriteodf', fallback='False').upper() == 'TRUE' and \ - fext in srv.codetypes and appname != 'Collabora' and appname != '' and viewmode == ViewMode.READ_WRITE: - # we're opening an ODF file and the app is not Collabora (the last check is needed because the legacy endpoint - # does not set appname when the app is not proxied, so we optimistically assume it's Collabora and let it go) - log.info('msg="Forcing read-only access to ODF file" filename="%s"' % statinfo['filepath']) + fext[1:3] in ('od', 'ot') and appname != 'Collabora' and viewmode == ViewMode.READ_WRITE: + # we're opening an ODF (`.o[d|t]?`) file and the app is not Collabora + log.info(f"msg=\"Forcing read-only access to ODF file\" filename=\"{statinfo['filepath']}\"") viewmode = ViewMode.READ_ONLY - acctok = jwt.encode({'userid': userid, 'wopiuser': wopiuser, 'filename': statinfo['filepath'], 'username': username, - 'viewmode': viewmode.value, 'folderurl': folderurl, 'endpoint': endpoint, - 'appname': appname, 'appediturl': appediturl, 'appviewurl': appviewurl, - 'exp': exptime, 'iss': 'cs3org:wopiserver:%s' % WOPIVER}, # standard claims - srv.wopisecret, algorithm='HS256') - log.info('msg="Access token generated" userid="%s" wopiuser="%s" mode="%s" endpoint="%s" filename="%s" inode="%s" ' - 'mtime="%s" folderurl="%s" appname="%s" expiration="%d" token="%s"' % - (userid[-20:], wopiuser if wopiuser != userid else username, viewmode, endpoint, - statinfo['filepath'], statinfo['inode'], statinfo['mtime'], - folderurl, appname, exptime, acctok[-20:])) - # return the inode == fileid, the filepath and the access token - return statinfo['inode'], acctok + if viewmode == ViewMode.PREVIEW and statinfo['size'] == 0: + # override preview mode when a new file is being created + viewmode = ViewMode.READ_WRITE + tokmd = { + 'userid': userid, 'wopiuser': wopiuser, 'usertype': usertype.value, 'filename': statinfo['filepath'], 'fileid': fileid, + 'username': friendlyname, 'viewmode': viewmode.value, 'folderurl': folderurl, 'endpoint': endpoint, + 'appname': appname, 'appediturl': appediturl, 'appviewurl': appviewurl, 'trace': trace, + 'exp': exptime, 'iss': f'cs3org:wopiserver:{WOPIVER}' # standard claims + } + acctok = jwt.encode(tokmd, srv.wopisecret, algorithm='HS256') + if 'MS 365' in appname: + srv.allusers.add(userid) + log.info('msg="Access token generated" trace="%s" userid="%s" wopiuser="%s" friendlyname="%s" usertype="%s" mode="%s" ' + 'endpoint="%s" filename="%s" inode="%s" mtime="%s" folderurl="%s" appname="%s" expiration="%d" token="%s"' % + (trace, userid[-20:], wopiuser, friendlyname, usertype, viewmode, endpoint, statinfo['filepath'], + statinfo['inode'], statinfo['mtime'], folderurl, appname, exptime, acctok[-20:])) + return statinfo['inode'], acctok, viewmode def encodeLock(lock): @@ -241,7 +281,7 @@ def retrieveWopiLock(fileid, operation, lockforlog, acctok, overridefn=None): mslockstat = st.stat(acctok['endpoint'], getMicrosoftOfficeLockName(acctok['filename']), acctok['userid']) log.info('msg="Found existing MS Office lock" lockop="%s" user="%s" filename="%s" token="%s" lockmtime="%ld"' % (operation.title(), acctok['userid'][-20:], acctok['filename'], encacctok, mslockstat['mtime'])) - return 'External', 'Microsoft Office for Desktop' + return EXTERNALLOCK, 'Microsoft Office for Desktop' except IOError: pass try: @@ -258,7 +298,7 @@ def retrieveWopiLock(fileid, operation, lockforlog, acctok, overridefn=None): 'lockmtime="%ld" holder="%s"' % (operation.title(), acctok['userid'][-20:], acctok['filename'], encacctok, lolockstat['mtime'], lolockholder)) - return 'External', 'LibreOffice for Desktop' + return EXTERNALLOCK, 'LibreOffice for Desktop' except (IOError, StopIteration): pass @@ -286,7 +326,7 @@ def retrieveWopiLock(fileid, operation, lockforlog, acctok, overridefn=None): except IOError as e: log.info('msg="Found non-compatible or unreadable lock" lockop="%s" user="%s" filename="%s" token="%s" error="%s"' % (operation.title(), acctok['userid'][-20:], acctok['filename'], encacctok, e)) - return 'External', 'Another app or user' + return EXTERNALLOCK, 'Another app or user' log.info('msg="Retrieved lock" lockop="%s" user="%s" filename="%s" fileid="%s" lock="%s" ' 'retrievedlock="%s" expTime="%s" token="%s"' % @@ -297,14 +337,14 @@ def retrieveWopiLock(fileid, operation, lockforlog, acctok, overridefn=None): def compareWopiLocks(lock1, lock2): '''Compares two locks and returns True if they represent the same WOPI lock. - Officially, the comparison must be based on the locks' string representations, but because of - a bug in Word Online, currently the internal format of the WOPI locks is looked at, based - on heuristics. Note that this format is subject to change and is not documented!''' + Officially, the comparison must be based on the locks' string representations. But because of + a bug in early versions of Word Online, the internal format of the WOPI locks may be looked at, + based on heuristics. Note that this format is subject to change and is not documented!''' if lock1 == lock2: - log.debug('msg="compareLocks" lock1="%s" lock2="%s" result="True"' % (lock1, lock2)) + log.debug(f'msg="compareLocks" lock1="{lock1}" lock2="{lock2}" result="True"') return True - if srv.config.get('general', 'wopilockstrictcheck', fallback='False').upper() == 'TRUE': - log.debug('msg="compareLocks" lock1="%s" lock2="%s" strict="True" result="False"' % (lock1, lock2)) + if srv.config.get('general', 'wopilockstrictcheck', fallback='True').upper() == 'TRUE': + log.debug(f'msg="compareLocks" lock1="{lock1}" lock2="{lock2}" strict="True" result="False"') return False # before giving up, attempt to parse the lock as a JSON dictionary if allowed by the config @@ -316,47 +356,110 @@ def compareWopiLocks(lock1, lock2): log.debug('msg="compareLocks" lock1="%s" lock2="%s" strict="False" result="%r"' % (lock1, lock2, l1['S'] == l2['S'])) return l1['S'] == l2['S'] # used by Word - log.debug('msg="compareLocks" lock1="%s" lock2="%s" strict="False" result="False"' % (lock1, lock2)) - return False except (TypeError, ValueError): # lock2 is not a JSON dictionary if 'S' in l1: log.debug('msg="compareLocks" lock1="%s" lock2="%s" strict="False" result="%r"' % (lock1, lock2, l1['S'] == lock2)) - return l1['S'] == lock2 # also used by Word (BUG!) + return l1['S'] == lock2 # also used by Word except (TypeError, ValueError): # lock1 is not a JSON dictionary: log the lock values and fail the comparison - log.debug('msg="compareLocks" lock1="%s" lock2="%s" strict="False" result="False"' % (lock1, lock2)) - return False + pass + log.debug(f'msg="compareLocks" lock1="{lock1}" lock2="{lock2}" strict="False" result="False"') + return False -def makeConflictResponse(operation, user, retrievedlock, lock, oldlock, filename, reason=None): +def makeConflictResponse(operation, user, retrievedlock, lock, oldlock, filename, reason, savetime=None): '''Generates and logs an HTTP 409 response in case of locks conflict''' resp = flask.Response(mimetype='application/json') resp.headers['X-WOPI-Lock'] = retrievedlock if retrievedlock else '' resp.status_code = http.client.CONFLICT - if reason: - # this is either a simple message or a dictionary: in all cases we want a dictionary to be JSON-ified - if isinstance(reason, str): - reason = {'message': reason} - resp.headers['X-WOPI-LockFailureReason'] = reason['message'] - resp.data = json.dumps(reason) + if isinstance(reason, str): + # transform the given message in a dict to be JSON-ified + reason = {'message': reason} + resp.headers['X-WOPI-LockFailureReason'] = reason['message'] + resp.data = json.dumps(reason) + + session = flask.request.headers.get('X-WOPI-SessionId') + if session and retrievedlock != EXTERNALLOCK and \ + session not in srv.conflictsessions['pending'] and session not in srv.conflictsessions['resolved']: + srv.conflictsessions['pending'][session] = { + 'user': user, + 'time': int(time.time()), + 'heldby': retrievedlock, + 'type': os.path.splitext(filename)[1], + } + if savetime: + fileage = f'{time.time() - int(savetime):1.1f}' + else: + fileage = 'NA' log.warning('msg="Returning conflict" lockop="%s" user="%s" filename="%s" token="%s" sessionId="%s" lock="%s" ' - 'oldlock="%s" retrievedlock="%s" reason="%s"' % - (operation.title(), user, filename, flask.request.args['access_token'][-20:], - flask.request.headers.get('X-WOPI-SessionId'), lock, oldlock, retrievedlock, + 'oldlock="%s" retrievedlock="%s" fileage="%s" reason="%s"' % + (('UnlockAndRelock' if oldlock and oldlock != 'NA' and operation != 'PUTFILE' else operation.title()), + user, filename, flask.request.args['access_token'][-20:], session, lock, oldlock, retrievedlock, fileage, (reason['message'] if reason else 'NA'))) return resp +def _resolveSession(session, filename): + '''Mark a session as resolved and account the given filename in the openfiles map. + This is only used for bookkeeping, no functionality is associated to those maps''' + if session in srv.conflictsessions['pending']: + s = srv.conflictsessions['pending'].pop(session) + srv.conflictsessions['resolved'][session] = { + 'user': s['user'], + 'restime': int(time.time() - int(s['time'])), + 'type': s['type'], + } + # keep some accounting of the open files + if filename not in srv.openfiles: + srv.openfiles[filename] = (time.asctime(), set()) + if session not in srv.openfiles[filename][1]: + srv.openfiles[filename][1].add(session) + + +def makeLockSuccessResponse(operation, acctok, lock, oldlock, version): + '''Generates and logs an HTTP 200 response with appropriate headers for Lock/RefreshLock operations''' + session = flask.request.headers.get('X-WOPI-SessionId') + if not session: + session = acctok['wopiuser'].split('!')[0] + _resolveSession(session, acctok['filename']) + + log.info('msg="Successfully locked" lockop="%s" filename="%s" token="%s" sessionId="%s" ' + 'lock="%s" oldlock="%s" version="%s"' % + (('UnlockAndRelock' if oldlock else operation.title()), acctok['filename'], + flask.request.args['access_token'][-20:], session, lock, oldlock, version)) + resp = flask.Response() + resp.status_code = http.client.OK + resp.headers['X-WOPI-ItemVersion'] = version + return resp + + def storeWopiFile(acctok, retrievedlock, xakey, targetname=''): '''Saves a file from an HTTP request to the given target filename (defaulting to the access token's one), and stores the save time as an xattr. Throws IOError in case of any failure''' if not targetname: targetname = acctok['filename'] - st.writefile(acctok['endpoint'], targetname, acctok['userid'], flask.request.get_data(), encodeLock(retrievedlock)) - # save the current time for later conflict checking: this is never older than the mtime of the file - st.setxattr(acctok['endpoint'], targetname, acctok['userid'], xakey, int(time.time()), encodeLock(retrievedlock)) + session = flask.request.headers.get('X-WOPI-SessionId') + if not session: + session = acctok['wopiuser'].split('!')[0] + _resolveSession(session, targetname) + + writeerror = None + try: + st.writefile(acctok['endpoint'], targetname, acctok['userid'], + flask.request.stream, flask.request.content_length, + (acctok['appname'], encodeLock(retrievedlock))) + except IOError as e: + if str(e) == common.ACCESS_ERROR: + raise + # something went wrong on write: we still want to setxattr but report this error to the caller + writeerror = e + # in all cases save the current time for later conflict checking: this is never older than the mtime of the file + st.setxattr(acctok['endpoint'], targetname, acctok['userid'], xakey, int(time.time()), + (acctok['appname'], encodeLock(retrievedlock))) + if writeerror: + raise writeerror def storeAfterConflict(acctok, retrievedlock, lock, reason): @@ -365,7 +468,7 @@ def storeAfterConflict(acctok, retrievedlock, lock, reason): next to the original one, or to the user's home, or to the recovery path.''' newname, ext = os.path.splitext(acctok['filename']) # typical EFSS formats are like '_conflict--