diff --git a/.github/actions/ansible/Dockerfile b/.github/actions/ansible/Dockerfile deleted file mode 100644 index ad9558dbc7..0000000000 --- a/.github/actions/ansible/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM alpine:3.19.0 -LABEL app=kg-prototypes - -ENV ANSIBLE_HOST_KEY_CHECKING=False - -RUN apk add ansible python3-dev gcc openssh-client - -COPY entrypoint.sh ./entrypoint.sh - -ENTRYPOINT [ "/entrypoint.sh" ] diff --git a/.github/actions/ansible/action.yml b/.github/actions/ansible/action.yml deleted file mode 100644 index 1dc81d1b02..0000000000 --- a/.github/actions/ansible/action.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Ansible Runner -description: Runs Ansible Playbooks using Github Actions - -inputs: - workspace_dir: - description: Root directory of the Ansible workspace. - required: true - playbook_file_path: - description: Playook path relative to the workspace directory. - required: true - inventory_file_path: - description: Inventory path relative to the workspace directory - required: true - ssh_key: - description: SSH key for remote Ansible connections - required: false - vault_password: - description: Password used for Ansible vault encrypted files - required: false - options: - description: Additional options for Ansible playbook - required: false - -runs: - using: docker - image: Dockerfile - args: - - ${{ inputs.workspace_dir }} - - ${{ inputs.playbook_file_path }} - - ${{ inputs.inventory_file_path }} - - ${{ inputs.ssh_key }} - - ${{ inputs.vault_password }} - - ${{ inputs.options }} diff --git a/.github/actions/ansible/entrypoint.sh b/.github/actions/ansible/entrypoint.sh deleted file mode 100755 index f3da85a0ed..0000000000 --- a/.github/actions/ansible/entrypoint.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/sh - -set -e - -export WORKSPACE_DIR= -if [ ! -z "$INPUT_WORKSPACE_DIR" ] -then - WORKSPACE_DIR="${INPUT_WORKSPACE_DIR}" -else - echo "No working space directory specified." -fi - -export PLAYBOOK_FILE_PATH= -if [ ! -z "$INPUT_PLAYBOOK_FILE_PATH" ] -then - PLAYBOOK_FILE_PATH="${INPUT_PLAYBOOK_FILE_PATH}" -else - echo "No playbook specified." -fi - -export INVENTORY_FILE_PATH= -if [ ! -z "$INPUT_INVENTORY_FILE_PATH" ] -then - INVENTORY_FILE_PATH="-i ${INPUT_INVENTORY_FILE_PATH}" -else - echo "No inventory specified." -fi - -export SSH_KEY= -if [ ! -z "$INPUT_SSH_KEY" ] -then - mkdir ~/.ssh - echo "$INPUT_SSH_KEY" > ~/.ssh/ansible - chmod 0600 ~/.ssh/ansible - tilde=~ - SSH_KEY_PATH="${tilde}/.ssh/ansible" - SSH_KEY="--key-file ${SSH_KEY_PATH}" -else - echo "No SSH key specified." -fi - -export VAULT_PASSWORD= -if [ ! -z "$INPUT_VAULT_PASSWORD" ] -then - echo "$INPUT_VAULT_PASSWORD" > ~/.vault_secrets_pw - tilde=~ - VAULT_PASSWORD_PATH="${tilde}/.vault_secrets_pw" - VAULT_PASSWORD="--vault-password-file ${VAULT_PASSWORD_PATH}" -else - echo "No vault password specified." -fi - -export OPTIONS= -if [ ! -z "$INPUT_OPTIONS" ] -then - OPTIONS=$(echo "${INPUT_OPTIONS}" | tr "\n" " ") -fi - -cd ${WORKSPACE_DIR} -echo "Running command..." -echo ansible-playbook ${PLAYBOOK_FILE_PATH} ${INVENTORY_FILE_PATH} ${SSH_KEY} ${VAULT_PASSWORD} ${OPTIONS} -ansible-playbook ${PLAYBOOK_FILE_PATH} ${INVENTORY_FILE_PATH} ${SSH_KEY} ${VAULT_PASSWORD} ${OPTIONS} diff --git a/.github/actions/cloud-sql-backup/action.yml b/.github/actions/cloud-sql-backup/action.yml deleted file mode 100644 index 9199899ced..0000000000 --- a/.github/actions/cloud-sql-backup/action.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: Cloud SQL backup -description: Backup a Google Cloud SQL instance - -inputs: - gcp_credentials: - description: Google Cloud Platform credentials - required: true - cloud_sql_instance_name: - description: Google Cloud SQL instance name - required: true - backup_description: - description: Backup description - required: false - default: "" - -outputs: - backup_id: - description: Google Cloud SQL backup ID - value: ${{ steps.fetch-backup-id.outputs.backup_id }} - -runs: - using: composite - steps: - - id: auth - uses: google-github-actions/auth@v0 - with: - credentials_json: "${{ inputs.gcp_credentials }}" - - - uses: google-github-actions/setup-gcloud@v0 - - - run: | - gcloud sql backups create \ - --instance="${{ inputs.cloud_sql_instance_name }}" \ - --description="${{ inputs.backup_description }}" - shell: bash - - - id: fetch-backup-id - run: | - echo ::set-output name=backup_id::$( \ - gcloud sql backups list \ - --instance="${{ inputs.cloud_sql_instance_name }}" \ - --format=json --limit 1 | jq -r ".[0].selfLink" ) - shell: bash diff --git a/.github/actions/cloud-sql-restore/action.yml b/.github/actions/cloud-sql-restore/action.yml deleted file mode 100644 index 7d78d1d270..0000000000 --- a/.github/actions/cloud-sql-restore/action.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: Cloud SQL restore -description: Restore a Google Cloud SQL instance backup - -inputs: - gcp_credentials: - description: Google Cloud Platform credentials - required: true - cloud_sql_backup_id: - description: Google Cloud SQL backup ID to restore - required: true - cloud_sql_restore_instance: - description: Google Cloud SQL instance name to restore backup into - required: true - -runs: - using: composite - steps: - - id: auth - uses: google-github-actions/auth@v0 - with: - credentials_json: "${{ inputs.gcp_credentials }}" - - - uses: google-github-actions/setup-gcloud@v0 - - - run: | - gcloud sql backups restore \ - --restore-instance="${{ inputs.cloud_sql_restore_instance }}" \ - ${{ inputs.cloud_sql_backup_id }} - shell: bash diff --git a/.github/actions/docker-publish.yml b/.github/actions/docker-publish.yml deleted file mode 100644 index 580dd94672..0000000000 --- a/.github/actions/docker-publish.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: 'Docker build and publish' -description: 'Builds and publishes a Docker image into a Docker registry.' - -inputs: - registry: - description: 'Server address of the Docker registry' - default: lifelike.azurecr.io - required: false - image: - description: 'Docker image' - required: true - username: - description: 'Username to log in to a Docker registry' - default: ${{ secrets.AZURE_CR_USERNAME }} - required: false - password: - description: 'Password or PAT to log in to a Docker registry' - default: ${{ secrets.AZURE_CR_PASSWORD }} - required: false - dockerfile: - description: 'Dockerfile to use for building the image' - required: false - default: './Dockerfile' - context: - description: 'Docker build context' - required: false - default: '.' - push: - description: 'Whether to push the image to the registry' - required: false - default: 'true' - -runs: - using: 'composite' - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Docker meta - id: docker_meta - uses: crazy-max/ghaction-docker-meta@v1 - with: - images: ${{ inputs.image }} - - - name: Set up QEMU - uses: docker/setup-qemu-action@v1 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - - name: Login to DockerHub - if: github.event_name != 'pull_request' && inputs.username != '' && inputs.password != '' - uses: docker/login-action@v1 - with: - username: ${{ inputs.username }} - password: ${{ inputs.password }} - - - name: Build and push - uses: docker/build-push-action@v2 - with: - file: ${{ inputs.dockerfile }} - context: ${{ inputs.context }} - tags: ${{ steps.docker_meta.outputs.tags }} - labels: ${{ steps.docker_meta.outputs.labels }} - push: ${{ inputs.push }} - cache-from: type=gha - cache-to: type=gha,mode=max diff --git a/.github/labeler.yml b/.github/labeler.yml index debee6d781..c19e1e9fb8 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -37,6 +37,3 @@ docker: workflow: - .github/* - .github/**/* - -infrastructure: - - deployment diff --git a/.github/workflows/annotate-code-style.yml b/.github/workflows/annotate-code-style.yml index ada29e894f..7420fd73e2 100644 --- a/.github/workflows/annotate-code-style.yml +++ b/.github/workflows/annotate-code-style.yml @@ -49,7 +49,8 @@ jobs: steps: - uses: actions/checkout@v3 with: - token: ${{ secrets.ACTION_TOKEN }} + # Set ACTION_TOKEN to re-run this action after autofix + token: ${{ secrets.ACTION_TOKEN || secrets.GITHUB_TOKEN }} - name: Annotate code style uses: wearerequired/lint-action@v2 @@ -71,15 +72,8 @@ jobs: - name: Inform how to run autofix if: failure() && (github.event_name == 'pull_request') - uses: actions/github-script@v6 - with: - script: | - github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: "Code style issues has been found - [AUTOFIX](${{ env.AUTOFIX_URL }})" - }) + run: | + echo "Code style issues has been found - [AUTOFIX]($AUTOFIX_URL)" | tee $GITHUB_STEP_SUMMARY - name: Client linting autofix if: github.event.inputs.autoFix && github.event.inputs.tslintAutoFix diff --git a/.github/workflows/backup-tests.yml b/.github/workflows/backup-tests.yml deleted file mode 100644 index 18e7b9c516..0000000000 --- a/.github/workflows/backup-tests.yml +++ /dev/null @@ -1,284 +0,0 @@ -name: Database backups tests - -on: - workflow_dispatch: - inputs: - force: - description: Force backup tests to run even for backups already tested - type: boolean - default: false - required: false - schedule: - # Run once a week on sunday at 00:00 - - cron: "0 1 * * 0" - -concurrency: - group: ${{ github.workflow }} - cancel-in-progress: false - -jobs: - get-sql-instances: - name: List Cloud SQL instances - runs-on: ubuntu-latest - outputs: - instances: ${{ steps.instances.outputs.json }} - steps: - - uses: actions/checkout@v3 - - - name: Authenticate to Google Cloud - uses: google-github-actions/auth@v0 - id: auth - with: - credentials_json: "${{ secrets.GCE_SA_KEY }}" - - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v0 - - - name: Get all Cloud SQL instances - id: instances - run: | - INSTANCES=$( \ - gcloud sql instances list \ - --format="value(name.list())" \ - --filter="labels.test-backups=true" \ - | jq -R -s -c 'split("\n") | map(select(.!=""))') - echo ::set-output name=json::$INSTANCES - - restore-and-test: - needs: get-sql-instances - runs-on: ubuntu-latest - continue-on-error: true - strategy: - matrix: - instance: ${{ fromJSON(needs.get-sql-instances.outputs.instances) }} - type: ["AUTOMATED", "ON_DEMAND"] - steps: - - uses: actions/checkout@v3 - - - name: Authenticate to Google Cloud - uses: google-github-actions/auth@v0 - id: auth - with: - credentials_json: "${{ secrets.GCE_SA_KEY }}" - - - name: "Set up Cloud SDK" - uses: google-github-actions/setup-gcloud@v0 - with: - install_components: beta - - - name: Get latest ${{ matrix.type }} backup ID - id: get-backup - run: | - BACKUP=$( \ - gcloud sql backups list \ - --instance="${{ matrix.instance }}" \ - --filter="status=SUCCESSFUL AND type=${{ matrix.type }}" \ - --format="value(id, startTime)" --sort-by="-startTime" \ - | head -n 1) - echo ::set-output name=id::$(echo $BACKUP | cut -d' ' -f1) - echo ::set-output name=date::$(echo $BACKUP | cut -d' ' -f2) - - - name: Retrieve previous results artifact, if any - id: download-artifact - uses: dawidd6/action-download-artifact@v2 - with: - name: backup-test-results-${{ steps.get-backup.outputs.id }} - search_artifacts: true - if_no_artifact_found: ignore - - - name: Check if backup exists and hasn't been tested before - uses: actions/github-script@v6 - with: - script: | - if (!'${{ steps.get-backup.outputs.id }}') { - core.warning('Skipping. No backup found of type ${{ matrix.type }} for Cloud SQL instance: {{ matrix.instance }}') - } else if (!core.getInput('force') && (await (await glob.create('results.txt')).glob()).length > 0) { - core.notice('Skipping. Backup ${{ steps.get-backup.outputs.id }} has already been tested.') - } else { - core.exportVariable('PROCEED', 'true') - } - - - name: Get instance details - if: ${{ env.PROCEED == 'true' }} - id: instance-details - run: | - INSTANCE=$( \ - gcloud sql instances describe \ - --format="value(databaseVersion, settings.tier, region, project)" \ - ${{ matrix.instance }}) - echo ::set-output name=version::$(echo $INSTANCE | cut -d' ' -f1) - echo ::set-output name=tier::$(echo $INSTANCE | cut -d' ' -f2) - echo ::set-output name=region::$(echo $INSTANCE | cut -d' ' -f3) - echo ::set-output name=project::$(echo $INSTANCE | cut -d' ' -f4) - - - name: Create temporary Cloud SQL instance - if: ${{ env.PROCEED == 'true' }} - id: create-temp-instance - run: | - TEMP_INSTANCE_NAME=$(echo "temp-${{ matrix.instance }}-${{ matrix.type }}-${{ github.run_id }}" | sed 's/_/-/g' | awk '{print tolower($0)}') - gcloud beta sql instances create "$TEMP_INSTANCE_NAME" \ - --database-version=${{ steps.instance-details.outputs.version }} \ - --tier=${{ steps.instance-details.outputs.tier }} \ - --region=${{ steps.instance-details.outputs.region }} \ - --labels="temp-instance=true,workflow-run=${{ github.run_id }}" - echo ::set-output name=temp_instance_name::$TEMP_INSTANCE_NAME - - - name: Restore backup into temporary instance - if: ${{ env.PROCEED == 'true' }} - run: | - gcloud sql backups restore --quiet \ - --restore-instance=${{ steps.create-temp-instance.outputs.temp_instance_name }} \ - --backup-instance=${{ matrix.instance }} \ - ${{ steps.get-backup.outputs.id }} - - - name: Reset postgres password - if: ${{ env.PROCEED == 'true' }} - run: | - gcloud sql users set-password \ - --instance=${{ steps.create-temp-instance.outputs.temp_instance_name }} \ - --password=password postgres - - - uses: mattes/gce-cloudsql-proxy-action@v1 - if: ${{ env.PROCEED == 'true' }} - with: - creds: ${{ secrets.GCE_SA_KEY }} - instance: ${{ steps.instance-details.outputs.project }}:${{ steps.instance-details.outputs.region }}:${{ steps.create-temp-instance.outputs.temp_instance_name }} - - - name: Setup PostgreSQL client - if: ${{ env.PROCEED == 'true' }} - run: | - sudo apt-get update - sudo apt-get install --yes --no-install-recommends postgresql-client - - - name: Query temporary instance - if: ${{ env.PROCEED == 'true' }} - env: - PGPASSWORD: password - PGHOST: localhost - run: | - cat < queries.sql - -- ------------------------------------------------------------ - -- Count stats - -- ------------------------------------------------------------ - WITH tbl AS - (SELECT table_schema,TABLE_NAME - FROM information_schema.tables - WHERE TABLE_NAME not like 'pg_%' - AND table_schema in ('public')) - SELECT TABLE_NAME, (xpath('/row/c/text()', query_to_xml(format('select count(*) as c from %I.%I', table_schema, TABLE_NAME), FALSE, TRUE, '')))[1]::text::int AS rows - FROM tbl - ORDER BY rows DESC; - -- ------------------------------------------------------------ - -- Alembic - -- ------------------------------------------------------------ - SELECT version_num as current_alembic_version FROM alembic_version; - -- ------------------------------------------------------------ - -- Date stats - -- ------------------------------------------------------------ - CREATE OR REPLACE FUNCTION get_min_max_modified_date() - RETURNS TABLE (table_name text, - max_modified_daye timestamp, - min_modified_daye timestamp) - LANGUAGE plpgsql - AS \$\$ - DECLARE - r record; - BEGIN - FOR r IN - SELECT i.table_name, i.table_schema - FROM information_schema.tables i - WHERE i.table_name in ( - SELECT c.table_name - FROM information_schema.columns c - WHERE c.table_schema = 'public' - AND c.column_name = 'modified_date' - GROUP BY c.table_name - ) - LOOP - execute format ( - 'SELECT min(modified_date) FROM %I.%I', - r.table_schema, r.table_name - ) INTO min_modified_daye; - execute format ( - 'SELECT max(modified_date) FROM %I.%I', - r.table_schema, r.table_name - ) INTO max_modified_daye; - table_name := r.table_name; - RETURN next; - END LOOP; - END - \$\$; - SELECT * FROM get_min_max_modified_date(); - EOF - - # Query the restored database - psql -U postgres -d postgres -f queries.sql > results.txt - - # Export results to variable - echo "RESULTS<> $GITHUB_ENV - echo $(cat results.txt) >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - results=$(> $GITHUB_STEP_SUMMARY - ## Backup details - - - Cloud SQL instance name: **${{ matrix.instance }}** - - Backup date: **${{ steps.get-backup.outputs.date }}** - - Backup type: **${{ matrix.type }}** - - Backup ID: **${{ steps.get-backup.outputs.id }}** - - Workflow run ID: [**${{ github.run_id }}**](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) - - ## Results - \`\`\` - $results - \`\`\` - EOF - - - name: Upload results artifact - if: ${{ env.PROCEED == 'true' }} - uses: actions/upload-artifact@v3 - with: - name: backup-test-results-${{ steps.get-backup.outputs.id }} - path: results.txt - retention-days: 90 - - - name: Delete temporary instance - if: ${{ env.PROCEED == 'true' }} - run: | - gcloud sql instances delete --quiet \ - ${{ steps.create-temp-instance.outputs.temp_instance_name }} - - - name: Notify results by email - if: ${{ env.PROCEED == 'true' }} - uses: licenseware/send-email-notification@v1 - with: - api-key: "***REMOVED***" - # api-key: ${{ secrets.SENDGRID_API_KEY }} - from-email: no-reply@lifelike.bio - to-email: darede@biosustain.dtu.dk - # to-email: ${{ secrets.BACKUP_TEST_NOTIFY_EMAILS }} - subject: "Lifelike DB backup test passed for ${{ matrix.instance }} - ${{ matrix.type }}" - markdown-body: | - # Lifelike PostgreSQL backup test results - - ## Test result: ✅ PASSED - - ## Details - - - Cloud SQL instance name: **${{ matrix.instance }}** - - Backup date: **${{ steps.get-backup.outputs.date }}** - - Backup type: **${{ matrix.type }}** - - Backup ID: **${{ steps.get-backup.outputs.id }}** - - Workflow run ID: [**${{ github.run_id }}**](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) - - ## Database queries - - ``` - $(cat results.txt) - ``` - - --- - - See the workflow run [at GitHub](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}). diff --git a/.github/workflows/browserstack.yml b/.github/workflows/browserstack.yml index f7719bf92a..584381005b 100644 --- a/.github/workflows/browserstack.yml +++ b/.github/workflows/browserstack.yml @@ -1,4 +1,4 @@ -name: 'BrowserStack Test' +name: "BrowserStack Test" on: workflow_dispatch: inputs: @@ -9,31 +9,30 @@ on: jobs: browserstack: - name: 'BrowserStack Tests' + name: "BrowserStack Tests" runs-on: ubuntu-latest steps: - - - name: 'BrowserStack Env Setup' # Invokes the setup-env action + - name: "BrowserStack Env Setup" # Invokes the setup-env action uses: browserstack/github-actions/setup-env@master with: - username: ${{ secrets.BROWSERSTACK_USERNAME }} + username: ${{ secrets.BROWSERSTACK_USERNAME }} access-key: ${{ secrets.BROWSERSTACK_ACCESS_KEY }} project-name: lifelike - - name: 'Checkout the repository' + - name: "Checkout the repository" uses: actions/checkout@v2 - - name: 'Install BrowserStack Cypress CLI' + - name: "Install BrowserStack Cypress CLI" run: npm install -g browserstack-cypress-cli - + - name: Run BrowserStack tests working-directory: ./tests/cypress run: browserstack-cypress run - - name: 'Running test on BrowserStack' # Invokes the actual test script that would run on BrowserStack browsers - run: node index.js # See sample test script above + - name: "Running test on BrowserStack" # Invokes the actual test script that would run on BrowserStack browsers + run: node index.js # See sample test script above - - name: 'BrowserStackLocal Stop' # Terminating the BrowserStackLocal tunnel connection + - name: "BrowserStackLocal Stop" # Terminating the BrowserStackLocal tunnel connection uses: browserstack/github-actions/setup-local@master with: - local-testing: stop \ No newline at end of file + local-testing: stop diff --git a/.github/workflows/call_webhook.yml b/.github/workflows/call_webhook.yml new file mode 100644 index 0000000000..dbc9125e2c --- /dev/null +++ b/.github/workflows/call_webhook.yml @@ -0,0 +1,41 @@ +name: Trigger Target Workflow + +on: + workflow_dispatch: + secrets: + WEBHOOK_TOKEN: + required: true + push: + branches: + - master + +jobs: + trigger: + runs-on: ubuntu-latest + + steps: + - name: Trigger Webhook + run: | + # Set the required variables + repo_owner="biosustain" + repo_name="lifelike-infrastructure" + load=$( + cat<> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 659c4fde62..7d2fa1ede2 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -13,12 +13,12 @@ name: "CodeQL" on: push: - branches: [ main ] + branches: [main] pull_request: # The branches below must be a subset of the branches above - branches: [ main ] + branches: [main] schedule: - - cron: '15 12 * * 1' + - cron: "15 12 * * 1" jobs: analyze: @@ -32,39 +32,39 @@ jobs: strategy: fail-fast: false matrix: - language: [ 'python', 'javascript' ] + language: ["python", "javascript"] # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] # Learn more about CodeQL language support at https://git.io/codeql-language-support steps: - - name: Checkout repository - uses: actions/checkout@v2 + - name: Checkout repository + uses: actions/checkout@v2 - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v1 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - # queries: ./path/to/local/query, your-org/your-repo/queries@main + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + # queries: ./path/to/local/query, your-org/your-repo/queries@main - # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v1 + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v1 - # ℹī¸ Command-line programs to run using the OS shell. - # 📚 https://git.io/JvXDl + # ℹī¸ Command-line programs to run using the OS shell. + # 📚 https://git.io/JvXDl - # ✏ī¸ If the Autobuild fails above, remove it and uncomment the following three lines - # and modify them (or add more) to build your code if your project - # uses a compiled language + # ✏ī¸ If the Autobuild fails above, remove it and uncomment the following three lines + # and modify them (or add more) to build your code if your project + # uses a compiled language - #- run: | - # make bootstrap - # make release + #- run: | + # make bootstrap + # make release - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 diff --git a/.github/workflows/deployment-az-public.yml b/.github/workflows/deployment-az-public.yml deleted file mode 100644 index 0347495991..0000000000 --- a/.github/workflows/deployment-az-public.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Azure Public Deployment - -on: - workflow_dispatch: - push: - tags: [public] - branches: [public/**] - -jobs: - call-deployment-gcp: - uses: ./.github/workflows/deployment-az.yml - with: - environment_name: public - client_config: production - cloud_sql_instance_name: lifelike-public - secrets: - VAULT_PASSWORD: ${{ secrets.ANSIBLE_VAULT }} - SSH_KEY: ${{ secrets.ANSIBLE_PRIVATE_SSH_KEY }} - CONTAINER_REGISTRY_USERNAME: ${{ secrets.AZURE_CR_USERNAME }} - CONTAINER_REGISTRY_PASSWORD: ${{ secrets.AZURE_CR_PASSWORD }} - INFRA_PAT: ${{ secrets.INFRA_PAT }} diff --git a/.github/workflows/deployment-az-staging.yml b/.github/workflows/deployment-az-staging.yml deleted file mode 100644 index d7e2c2525e..0000000000 --- a/.github/workflows/deployment-az-staging.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: Azure Staging Deployment - -on: - workflow_dispatch: - push: - tags: [staging-az] - branches: [staging-az/**] - -jobs: - call-deployment-az: - uses: ./.github/workflows/deployment-az.yml - with: - environment_name: staging-az - client_config: production - secrets: - VAULT_PASSWORD: ${{ secrets.ANSIBLE_VAULT }} - SSH_KEY: ${{ secrets.ANSIBLE_PRIVATE_SSH_KEY }} - CONTAINER_REGISTRY_USERNAME: ${{ secrets.AZURE_CR_USERNAME }} - CONTAINER_REGISTRY_PASSWORD: ${{ secrets.AZURE_CR_PASSWORD }} - INFRA_PAT: ${{ secrets.INFRA_PAT }} diff --git a/.github/workflows/deployment-az.yml b/.github/workflows/deployment-az.yml deleted file mode 100644 index f2cb12a9bd..0000000000 --- a/.github/workflows/deployment-az.yml +++ /dev/null @@ -1,111 +0,0 @@ -name: Lifelike Azure deployment - -on: - workflow_call: - inputs: - environment_name: - description: Environment name (prod, staging, qa or demo) - required: true - type: string - client_config: - description: Client Runtime configuration preset - required: true - default: production - type: string - container_registry: - description: Container registry name - required: false - default: lifelike.azurecr.io - type: string - secrets: - CONTAINER_REGISTRY_USERNAME: - required: true - CONTAINER_REGISTRY_PASSWORD: - required: true - VAULT_PASSWORD: - required: true - SSH_KEY: - required: true - INFRA_PAT: - required: true - -jobs: - # ------------------------------------------------------------- - # JOB: Build Docker images - # ------------------------------------------------------------- - build: - name: Build - runs-on: ubuntu-latest - strategy: - matrix: - include: - - image: kg-webserver - path: ./client - build_extra_args: --build-arg ANGULAR_CONFIG=${{ inputs.client_config }} --build-arg CLIENT_VERSION=${{ github.sha }} - - image: kg-appserver - path: ./appserver - - image: kg-cache-service - path: ./cache-invalidator - - image: kg-statistical-enrichment - path: ./statistical-enrichment - - image: filebeat - path: ./filebeat - - image: metricbeat - path: ./metricbeat - - steps: - - uses: actions/checkout@v3 - - name: Build and push ${{ matrix.image }} image - uses: whoan/docker-build-with-cache-action@v5 - with: - context: ${{ matrix.path }} - image_name: ${{ matrix.image }} - image_tag: ${{ github.sha }},${{ inputs.environment_name }},latest - build_extra_args: ${{ matrix.build_extra_args }} - registry: ${{ inputs.container_registry }} - username: ${{ secrets.CONTAINER_REGISTRY_USERNAME }} - password: ${{ secrets.CONTAINER_REGISTRY_PASSWORD }} - dockerfile: ./Dockerfile - - # ------------------------------------------------------------- - # JOB: Deploy to AZ environment using Ansible playbook - # pointed by the Git submodule: /deployment - # ------------------------------------------------------------- - deploy: - name: Deploy - needs: - - build - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v3 - with: - token: ${{ secrets.INFRA_PAT }} - submodules: recursive - - - name: Set git metadata - id: git-meta - run: | - echo ::set-output name=commit_timestamp::$(git log -1 --format=%cI) - echo ::set-output name=build_number::$(git rev-list --count HEAD) - echo ::set-output name=build_version::$(echo "${GITHUB_REF#refs/*/}") - - - name: Run Ansible deployment action - uses: ./.github/actions/ansible - with: - workspace_dir: deployment/ansible - playbook_file_path: playbooks/deploy-azure.yml - inventory_file_path: inventories/hosts-az.yml - vault_password: ${{ secrets.VAULT_PASSWORD }} - ssh_key: ${{ secrets.SSH_KEY }} - options: | - --extra-vars environment_name=${{ inputs.environment_name }} - --extra-vars client_config=${{ inputs.client_config }} - --extra-vars docker_img_hash=${{ github.sha }} - --extra-vars git_timestamp=${{ steps.git-meta.outputs.commit_timestamp }} - --extra-vars app_build_number=${{ steps.git-meta.outputs.build_number }} - --extra-vars app_version=${{ steps.git-meta.outputs.build_version }} - --extra-vars github_run_id=${{ github.run_id }} - --user ansible - --verbose diff --git a/.github/workflows/deployment-contabo-ucsd.yml b/.github/workflows/deployment-contabo-ucsd.yml deleted file mode 100644 index 2251acc3ee..0000000000 --- a/.github/workflows/deployment-contabo-ucsd.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: Contabo UCSD Deployment - -on: - workflow_dispatch: - push: - tags: [ucsd] - branches: [ucsd/**] - -jobs: - call-deployment-gcp: - uses: ./.github/workflows/deployment-contabo.yml - with: - environment_name: ucsd - client_config: production - secrets: - VAULT_PASSWORD: ${{ secrets.ANSIBLE_VAULT }} - SSH_KEY: ${{ secrets.ANSIBLE_PRIVATE_SSH_KEY }} - CONTAINER_REGISTRY_USERNAME: ${{ secrets.AZURE_CR_USERNAME }} - CONTAINER_REGISTRY_PASSWORD: ${{ secrets.AZURE_CR_PASSWORD }} - INFRA_PAT: ${{ secrets.INFRA_PAT }} diff --git a/.github/workflows/deployment-contabo.yml b/.github/workflows/deployment-contabo.yml deleted file mode 100644 index 041064b423..0000000000 --- a/.github/workflows/deployment-contabo.yml +++ /dev/null @@ -1,113 +0,0 @@ -name: Lifelike Contabo deployment - -on: - workflow_call: - inputs: - environment_name: - description: Environment name - required: true - type: string - client_config: - description: Client Runtime configuration preset - required: true - default: production - type: string - container_registry: - description: Container registry name - required: false - default: lifelike.azurecr.io - type: string - secrets: - CONTAINER_REGISTRY_USERNAME: - required: true - CONTAINER_REGISTRY_PASSWORD: - required: true - VAULT_PASSWORD: - required: true - SSH_KEY: - required: true - GCP_CREDENTIALS: - required: true - INFRA_PAT: - required: true - -jobs: - # ------------------------------------------------------------- - # JOB: Build Docker images - # ------------------------------------------------------------- - build: - name: Build - runs-on: ubuntu-latest - strategy: - matrix: - include: - - image: kg-webserver - path: ./client - build_extra_args: --build-arg ANGULAR_CONFIG=${{ inputs.client_config }} --build-arg CLIENT_VERSION=${{ github.sha }} - - image: kg-appserver - path: ./appserver - - image: kg-cache-service - path: ./cache-invalidator - - image: kg-statistical-enrichment - path: ./statistical-enrichment - - image: filebeat - path: ./filebeat - - image: metricbeat - path: ./metricbeat - - steps: - - uses: actions/checkout@v3 - - name: Build and push ${{ matrix.image }} image - uses: whoan/docker-build-with-cache-action@v5 - with: - context: ${{ matrix.path }} - image_name: ${{ matrix.image }} - image_tag: ${{ github.sha }},${{ inputs.environment_name }},latest - build_extra_args: ${{ matrix.build_extra_args }} - registry: ${{ inputs.container_registry }} - username: ${{ secrets.CONTAINER_REGISTRY_USERNAME }} - password: ${{ secrets.CONTAINER_REGISTRY_PASSWORD }} - dockerfile: ./Dockerfile - - # ------------------------------------------------------------- - # JOB: Deploy to Contabo environment using Ansible playbook - # pointed by the Git submodule: /deployment - # ------------------------------------------------------------- - deploy: - name: Deploy - needs: - - build - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v3 - with: - token: ${{ secrets.INFRA_PAT }} - submodules: recursive - - - name: Set git metadata - id: git-meta - run: | - echo ::set-output name=commit_timestamp::$(git log -1 --format=%cI) - echo ::set-output name=build_number::$(git rev-list --count HEAD) - echo ::set-output name=build_version::$(echo "${GITHUB_REF#refs/*/}") - - - name: Run Ansible deployment action - uses: ./.github/actions/ansible - with: - workspace_dir: deployment/ansible - playbook_file_path: playbooks/deploy-contabo.yml - inventory_file_path: inventories/hosts-contabo.yml - vault_password: ${{ secrets.VAULT_PASSWORD }} - ssh_key: ${{ secrets.SSH_KEY }} - options: | - --extra-vars environment_name=${{ inputs.environment_name }} - --extra-vars client_config=${{ inputs.client_config }} - --extra-vars docker_img_hash=${{ github.sha }} - --extra-vars git_timestamp=${{ steps.git-meta.outputs.commit_timestamp }} - --extra-vars app_build_number=${{ steps.git-meta.outputs.build_number }} - --extra-vars app_version=${{ steps.git-meta.outputs.build_version }} - --extra-vars github_run_id=${{ github.run_id }} - --user ansible - --verbose diff --git a/.github/workflows/deployment-gcp-demo.yml b/.github/workflows/deployment-gcp-demo.yml deleted file mode 100644 index b673c5c93d..0000000000 --- a/.github/workflows/deployment-gcp-demo.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: GCP Demo Deployment - -on: - workflow_dispatch: - push: - tags: [demo] - -jobs: - call-deployment-gcp: - uses: ./.github/workflows/deployment-gcp.yml - with: - environment_name: demo - client_config: demo - cloud_sql_instance_name: kg-demo - secrets: - VAULT_PASSWORD: ${{ secrets.ANSIBLE_VAULT }} - SSH_KEY: ${{ secrets.ANSIBLE_PRIVATE_SSH_KEY }} - CONTAINER_REGISTRY_USERNAME: ${{ secrets.AZURE_CR_USERNAME }} - CONTAINER_REGISTRY_PASSWORD: ${{ secrets.AZURE_CR_PASSWORD }} - GCP_CREDENTIALS: ${{ secrets.GCE_SA_KEY }} - INFRA_PAT: ${{ secrets.INFRA_PAT }} diff --git a/.github/workflows/deployment-gcp-prod.yml b/.github/workflows/deployment-gcp-prod.yml deleted file mode 100644 index 4d86fab703..0000000000 --- a/.github/workflows/deployment-gcp-prod.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: GCP Prod Deployment - -on: - workflow_dispatch: - release: - types: [released] - push: - tags: [prod] - -jobs: - call-deployment-gcp: - uses: ./.github/workflows/deployment-gcp.yml - with: - environment_name: prod - client_config: production - cloud_sql_instance_name: lifelike-prod - secrets: - VAULT_PASSWORD: ${{ secrets.ANSIBLE_VAULT }} - SSH_KEY: ${{ secrets.ANSIBLE_PRIVATE_SSH_KEY }} - CONTAINER_REGISTRY_USERNAME: ${{ secrets.AZURE_CR_USERNAME }} - CONTAINER_REGISTRY_PASSWORD: ${{ secrets.AZURE_CR_PASSWORD }} - GCP_CREDENTIALS: ${{ secrets.GCE_SA_KEY }} - INFRA_PAT: ${{ secrets.INFRA_PAT }} diff --git a/.github/workflows/deployment-gcp-public.yml b/.github/workflows/deployment-gcp-public.yml deleted file mode 100644 index cfaf35aa95..0000000000 --- a/.github/workflows/deployment-gcp-public.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: GCP Public Deployment - -on: - workflow_dispatch: - push: - tags: [public] - branches: [public/**] - -jobs: - call-deployment-gcp: - uses: ./.github/workflows/deployment-gcp.yml - with: - environment_name: public - client_config: production - cloud_sql_instance_name: lifelike-public - secrets: - VAULT_PASSWORD: ${{ secrets.ANSIBLE_VAULT }} - SSH_KEY: ${{ secrets.ANSIBLE_PRIVATE_SSH_KEY }} - CONTAINER_REGISTRY_USERNAME: ${{ secrets.AZURE_CR_USERNAME }} - CONTAINER_REGISTRY_PASSWORD: ${{ secrets.AZURE_CR_PASSWORD }} - GCP_CREDENTIALS: ${{ secrets.GCE_SA_KEY }} - INFRA_PAT: ${{ secrets.INFRA_PAT }} diff --git a/.github/workflows/deployment-gcp-qa.yml b/.github/workflows/deployment-gcp-qa.yml deleted file mode 100644 index af46d79f4e..0000000000 --- a/.github/workflows/deployment-gcp-qa.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: GCP QA Deployment - -on: - workflow_dispatch: - push: - branches: [master] - tags: [qa] - -jobs: - call-deployment-gcp: - uses: ./.github/workflows/deployment-gcp.yml - with: - environment_name: qa - client_config: qa - cloud_sql_instance_name: lifelike-qa - secrets: - VAULT_PASSWORD: ${{ secrets.ANSIBLE_VAULT }} - SSH_KEY: ${{ secrets.ANSIBLE_PRIVATE_SSH_KEY }} - CONTAINER_REGISTRY_USERNAME: ${{ secrets.AZURE_CR_USERNAME }} - CONTAINER_REGISTRY_PASSWORD: ${{ secrets.AZURE_CR_PASSWORD }} - GCP_CREDENTIALS: ${{ secrets.GCE_SA_KEY }} - INFRA_PAT: ${{ secrets.INFRA_PAT }} diff --git a/.github/workflows/deployment-gcp-staging.yml b/.github/workflows/deployment-gcp-staging.yml deleted file mode 100644 index f90e923c57..0000000000 --- a/.github/workflows/deployment-gcp-staging.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: GCP Staging Deployment - -on: - workflow_dispatch: - push: - tags: [staging] - branches: [staging/**] - -jobs: - call-deployment-gcp: - uses: ./.github/workflows/deployment-gcp.yml - with: - environment_name: staging - client_config: staging - cloud_sql_instance_name: kg-staging - secrets: - VAULT_PASSWORD: ${{ secrets.ANSIBLE_VAULT }} - SSH_KEY: ${{ secrets.ANSIBLE_PRIVATE_SSH_KEY }} - CONTAINER_REGISTRY_USERNAME: ${{ secrets.AZURE_CR_USERNAME }} - CONTAINER_REGISTRY_PASSWORD: ${{ secrets.AZURE_CR_PASSWORD }} - GCP_CREDENTIALS: ${{ secrets.GCE_SA_KEY }} - INFRA_PAT: ${{ secrets.INFRA_PAT }} diff --git a/.github/workflows/deployment-gcp.yml b/.github/workflows/deployment-gcp.yml deleted file mode 100644 index a5ab0c6ce9..0000000000 --- a/.github/workflows/deployment-gcp.yml +++ /dev/null @@ -1,159 +0,0 @@ -name: Lifelike GCP deployment - -on: - workflow_call: - inputs: - environment_name: - description: Environment nme (prod, staging, qa or demo) - required: true - type: string - client_config: - description: Client Runtime configuration preset - required: true - default: production - type: string - container_registry: - description: Container registry name - required: false - default: lifelike.azurecr.io - type: string - cloud_sql_instance_name: - description: Google Cloud SQL instance name - required: true - type: string - cloud_sql_force_backup: - description: Backup before deploying even if no migrations need to be run - required: false - default: false - type: boolean - secrets: - CONTAINER_REGISTRY_USERNAME: - required: true - CONTAINER_REGISTRY_PASSWORD: - required: true - VAULT_PASSWORD: - required: true - SSH_KEY: - required: true - GCP_CREDENTIALS: - required: true - INFRA_PAT: - required: true - -jobs: - # ------------------------------------------------------------- - # JOB: Build Docker images - # ------------------------------------------------------------- - build: - name: Build - runs-on: ubuntu-latest - strategy: - matrix: - include: - - image: kg-webserver - path: ./client - build_extra_args: --build-arg ANGULAR_CONFIG=${{ inputs.client_config }} --build-arg CLIENT_VERSION=${{ github.sha }} - - image: kg-appserver - path: ./appserver - - image: kg-cache-service - path: ./cache-invalidator - - image: kg-statistical-enrichment - path: ./statistical-enrichment - - image: filebeat - path: ./filebeat - - image: metricbeat - path: ./metricbeat - - steps: - - uses: actions/checkout@v3 - - name: Build and push ${{ matrix.image }} image - uses: whoan/docker-build-with-cache-action@v5 - with: - context: ${{ matrix.path }} - image_name: ${{ matrix.image }} - image_tag: ${{ github.sha }},${{ inputs.environment_name }},latest - build_extra_args: ${{ matrix.build_extra_args }} - registry: ${{ inputs.container_registry }} - username: ${{ secrets.CONTAINER_REGISTRY_USERNAME }} - password: ${{ secrets.CONTAINER_REGISTRY_PASSWORD }} - dockerfile: ./Dockerfile - - # --------------------------------------------- - # JOB: Backup DB - # --------------------------------------------- - cloud-sql-backup: - name: Backup Cloud SQL instance - needs: build - outputs: - backup_id: ${{ steps.backup.outputs.backup_id }} - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: Create a new Clod SQL instance backup - id: backup - uses: ./.github/actions/cloud-sql-backup - with: - gcp_credentials: "${{ secrets.GCP_CREDENTIALS }}" - cloud_sql_instance_name: "${{ inputs.cloud_sql_instance_name }}" - backup_description: "Automated backup from GitHub workflow. Run ID: ${{ github.run_id }}" - - # ------------------------------------------------------------- - # JOB: Deploy to GCP environment using Ansible playbook - # pointed by the Git submodule: /deployment - # ------------------------------------------------------------- - deploy: - name: Deploy - needs: - - build - - cloud-sql-backup - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v3 - with: - token: ${{ secrets.INFRA_PAT }} - submodules: recursive - - - name: Set git metadata - id: git-meta - run: | - echo ::set-output name=commit_timestamp::$(git log -1 --format=%cI) - echo ::set-output name=build_number::$(git rev-list --count HEAD) - echo ::set-output name=build_version::$(echo "${GITHUB_REF#refs/*/}") - - - name: Authenticate to GCP - id: auth - uses: google-github-actions/auth@v0 - with: - credentials_json: "${{ secrets.GCP_CREDENTIALS }}" - - uses: google-github-actions/setup-gcloud@v0 - - - name: Get Cloud SQL instance private IP address - id: database-host - run: | - echo ::set-output name=ip_address::$( \ - gcloud sql instances describe --format=json \ - ${{ inputs.cloud_sql_instance_name }} \ - | jq -r '.ipAddresses[] | select(.type == "PRIVATE").ipAddress') - - - name: Run Ansible deployment action - uses: ./.github/actions/ansible - with: - workspace_dir: deployment/ansible - playbook_file_path: playbooks/deploy-gcloud.yml - inventory_file_path: inventories/hosts.yml - vault_password: ${{ secrets.VAULT_PASSWORD }} - ssh_key: ${{ secrets.SSH_KEY }} - options: | - --extra-vars environment_name=${{ inputs.environment_name }} - --extra-vars client_config=${{ inputs.client_config }} - --extra-vars docker_img_hash=${{ github.sha }} - --extra-vars git_timestamp=${{ steps.git-meta.outputs.commit_timestamp }} - --extra-vars app_build_number=${{ steps.git-meta.outputs.build_number }} - --extra-vars app_version=${{ steps.git-meta.outputs.build_version }} - --extra-vars github_run_id=${{ github.run_id }} - --extra-vars postgres_host=${{ steps.database-host.outputs.ip_address }} - --user ansible - -vvvv diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml deleted file mode 100644 index ac173f1076..0000000000 --- a/.github/workflows/docker.yml +++ /dev/null @@ -1,69 +0,0 @@ -name: Docker images - -on: - push: - branches: - - main - tags: - - v* - pull_request: - branches: - - main -env: - REGISTRY: ghcr.io - REPOSITORY: ${{ github.repository }} - -jobs: - publish-images: - name: Publish ${{ matrix.name }} Docker image - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - include: - - name: appserver - path: appserver - - name: frontend - path: client - # - name: elasticsearch - # path: docker - # file: docker/elasticsearch.Dockerfile - # - name: Grap data migrator - # path: graph-db - # image: lifelike-graph-db - - permissions: - contents: read - packages: write - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - - name: Log in to the Container registry - uses: docker/login-action@v1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@v3 - with: - images: ${{ env.REGISTRY }}/${{ env.REPOSITORY }}-${{ matrix.name }} - tags: | - type=ref,event=branch - type=ref,event=pr - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor - - - name: Build and push ${{ env.REGISTRY }}/${{ env.REPOSITORY }}-${{ matrix.name }} image - uses: docker/build-push-action@v2 - with: - context: ${{ matrix.path }} - file: ${{ matrix.file }} - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - push: true diff --git a/.github/workflows/graphdb-migrate.yml b/.github/workflows/graphdb-migrate.yml deleted file mode 100644 index de9d842f48..0000000000 --- a/.github/workflows/graphdb-migrate.yml +++ /dev/null @@ -1,84 +0,0 @@ -name: Grpah DB Migrate - -on: - workflow_dispatch: - inputs: - neo4j_host: - description: Neo4j target host - type: string - required: true - neo4j_username: - description: Neo4j username - type: string - required: true - default: neo4j - neo4j_password: - description: Neo4j password - type: string - required: true - log_level: - description: Log level - type: choice - required: true - default: info - options: - - debug - - info - - warn - - error - release_lock: - description: Release any present lock before executing - type: boolean - required: true - default: true - datafiles_prefix: - description: Prefix of the datafiles in remote storage (stage/prod) - type: choice - required: true - default: stage - options: - - stage - - prod - chunk_size: - description: Liquibase transaction chunk size - type: string - required: true - default: '2000' - changelog: - description: Path to the changelog file - type: string - required: true - default: 'lifelike-graph/changelog-master.xml' - migrator_image_tag: - description: Tag of the graphdb-migrator image to use - type: string - required: true - default: latest - -jobs: - migrate: - name: Migrate Neo4j DB - runs-on: self-hosted - timeout-minutes: 360 - container: - image: ghcr.io/sbrg/lifelike-graphdb-migrator:${{ github.event.inputs.migrator_image_tag }} - options: --user root - env: - NEO4J_HOST: ${{ github.event.inputs.neo4j_host }} - NEO4J_USERNAME: ${{ github.event.inputs.neo4j_username }} - NEO4J_PASSWORD: ${{ github.event.inputs.neo4j_password }} - LOG_LEVEL: ${{ github.event.inputs.log_level }} - CHANGELOG_FILE: ${{ github.event.inputs.changelog }} - DATAFILES_PREFIX: ${{ github.event.inputs.datafiles_prefix }} - AZURE_ACCOUNT_STORAGE_KEY: ${{ secrets.AZURE_ACCOUNT_STORAGE_KEY }} - AZURE_ACCOUNT_STORAGE_NAME: ${{ secrets.AZURE_ACCOUNT_STORAGE_NAME }} - CHUNK_SIZE: ${{ github.event.inputs.chunk_size }} - steps: - - uses: actions/checkout@v2 - - name: Copy changelog files - run: cp -rp graph-db/changelog/* /liquibase/changelog/ - - name: Release previous lock - if: ${{ github.event.inputs.release_lock }} - run: /docker-entrypoint.sh releaseLocks - - name: Run Liquibase migrations - run: /docker-entrypoint.sh update diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index b100f06f55..52e359569f 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -21,28 +21,23 @@ jobs: - uses: actions/checkout@v3 - - name: Build appserver image - uses: whoan/docker-build-with-cache-action@v5 - with: - username: ${{ secrets.AZURE_CR_USERNAME }} - password: ${{ secrets.AZURE_CR_PASSWORD }} - registry: lifelike.azurecr.io - image_name: kg-appserver - image_tag: ${{ github.sha }} - context: ./appserver - dockerfile: ./Dockerfile - - name: Create environment variable to pass in GitHub hash for Docker Compose run: echo "GITHUB_HASH=${{ github.sha }}" >> .env + - name: Build Docker image + id: build + run: | + ${{ env.DOCKER_COMPOSE_CMD }} build --build-arg DEV=True appserver + echo "image_hash=$(docker images | awk '{print $3}' | awk 'NR==2')" | tee $GITHUB_OUTPUT + - name: Migration Integrity Check - run: ${{ env.DOCKER_COMPOSE_CMD }} run --no-deps appserver bin/migration-integrity-check + run: docker run ${{ steps.build.outputs.image_hash }} bin/migration-integrity-check - name: Linting and code style run: |- - ${{ env.DOCKER_COMPOSE_CMD }} run --no-deps appserver flake8 . - ${{ env.DOCKER_COMPOSE_CMD }} run --no-deps appserver mypy . - ${{ env.DOCKER_COMPOSE_CMD }} run --no-deps appserver pycodestyle . + docker run ${{ steps.build.outputs.image_hash }} flake8 . + docker run ${{ steps.build.outputs.image_hash }} mypy . + docker run ${{ steps.build.outputs.image_hash }} pycodestyle . - name: Bring up containers and wait for webserver to be ready timeout-minutes: 10 diff --git a/.github/workflows/sonar.yml b/.github/workflows/sonar.yml deleted file mode 100644 index e0bf8c0361..0000000000 --- a/.github/workflows/sonar.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: Sonarqube code analysis - -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] - workflow_dispatch: - inputs: - application: - required: false - type: choice - default: "" - description: Specific application to analyze - options: - - "" - - Appserver - - Frontend - -jobs: - sonarqube: - name: Quality of ${{ matrix.name }} - strategy: - fail-fast: false - matrix: - include: - - name: appserver - path: appserver - extraArgs: > - -Dsonar.tests=tests/ - -Dsonar.exclusions=tests/** - - name: frontend - path: client - - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v2 - - # Check if code was changed in project path - - name: Check code changes - uses: marceloprado/has-changed-path@v1 - if: github.event_name != 'workflow_dispatch' - id: changed - with: - paths: ${{ matrix.path }} - - # Scan if code was changed or if workflow was triggered manually - - name: SonarCloud Scan - uses: SonarSource/sonarcloud-github-action@master - if: steps.changed.outputs.changed == 'true' || github.event.inputs.application == matrix.name || github.event.inputs.application == '' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} - SONAR_ORGANIZATION: lifelike - with: - projectBaseDir: ${{ matrix.path }} - args: > - -Dsonar.organization=${{ env.SONAR_ORGANIZATION }} - -Dsonar.projectKey=${{ env.SONAR_ORGANIZATION }}-${{ matrix.name }} - -Dsonar.projectName=${{ matrix.name }} - ${{ matrix.extraArgs }} diff --git a/.github/workflows/traefik-az.yml b/.github/workflows/traefik-az.yml deleted file mode 100644 index 2eca1579eb..0000000000 --- a/.github/workflows/traefik-az.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: Azure Traefik deployment - -on: - workflow_dispatch: - push: - tags: [traefik-az] - branches: [traefik-az/**] - -jobs: - call-deployment-gcp: - uses: ./.github/workflows/traefik.yml - with: - playbook_path: playbooks/proxy-azure.yml - inventory_path: inventories/hosts-az.yml - secrets: - VAULT_PASSWORD: ${{ secrets.ANSIBLE_VAULT }} - SSH_KEY: ${{ secrets.ANSIBLE_PRIVATE_SSH_KEY }} - INFRA_PAT: ${{ secrets.INFRA_PAT }} diff --git a/.github/workflows/traefik.yml b/.github/workflows/traefik.yml deleted file mode 100644 index e5c6367820..0000000000 --- a/.github/workflows/traefik.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: Lifelike Traefik deployment - -on: - workflow_call: - inputs: - playbook_path: - description: File path to Ansible playbook - required: true - type: string - inventory_path: - description: File path to Ansible inventory - required: true - type: string - secrets: - VAULT_PASSWORD: - required: true - SSH_KEY: - required: true - INFRA_PAT: - required: true - -jobs: - deploy: - name: Deploy - needs: - - build - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v3 - with: - token: ${{ secrets.INFRA_PAT }} - submodules: recursive - - - name: Run Ansible deployment action - uses: ./.github/actions/ansible - with: - workspace_dir: deployment/ansible - playbook_file_path: ${{ inputs.playbook_path }} - inventory_file_path: ${{ inputs.inventory_path }} - vault_password: ${{ secrets.VAULT_PASSWORD }} - ssh_key: ${{ secrets.SSH_KEY }} - options: | - --user ansible - --verbose diff --git a/.gitignore b/.gitignore index 62a9d48bb5..1147a0ce50 100755 --- a/.gitignore +++ b/.gitignore @@ -88,4 +88,4 @@ arangodb/data/* arangodb/apps/* # Arango seed data -arangodb/bin/seed_data \ No newline at end of file +arangodb/bin/seed_data diff --git a/.gitmodules b/.gitmodules index 98bab4898e..e69de29bb2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,13 +0,0 @@ -[submodule "lifelike-oss"] - path = lifelike-oss - url = https://github.com/SBRG/lifelike -[submodule "keycloak"] - path = keycloak - url = https://github.com/SBRG/lifelike-keycloak -[submodule "website"] - path = website - url = https://github.com/SBRG/lifelike-website -[submodule "deployment"] - path = deployment - url = git@github.com:SBRG/lifelike-infra.git - branch = ansible diff --git a/.prettierignore b/.prettierignore new file mode 100755 index 0000000000..5e74be90bf --- /dev/null +++ b/.prettierignore @@ -0,0 +1,98 @@ +helm/lifelike/templates +**/*.lock + +# -------------- +# Gitignore file +# -------------- + +# liinting +/**/.husky + +# Python +__pycache__/ +*.pyc +env/ +.cache/ +.python_history +.ipython/ +.mypy_cache/ + +# compiled output +dist/ +/**/*.pyc + +# System Files +.DS_Store +Thumbs.db +.bash_history +.local/ +.run + +# IDE - VSCode +.vscode/ + +# ignore db files +neo4j/data/* +neo4j/import/* +neo4j/logs/* +neo4j/plugins/* + +postgres/ +!postgres/.gitkeep + +# ignore node_modules +client/node_modules +esdata/ +node_modules/ + +# ignore copy +deployment/kg-prototypes +# terraform files +terraform.tfstate.* +.terraform/ +# The terraform Gcloud service account +terraform-gcloud.json + +# .env Files (to protect secrets) +*.env +*.env.* + +# IDE - IntelliJ +.idea/ +/*.iml +.run + +#virtual environments +.venv/ +venv/ +outputs/ + +# LMDB files +appserver/neo4japp/services/annotations/**/*.mdb +appserver/neo4japp/services/annotations/**/*.mdb.old +appserver/neo4japp/services/annotations/datasets/**/*.csv +appserver/neo4japp/services/annotations/datasets/**/*.tsv + +# profiler results +appserver/profilers/results/ + +# nlp pdfs +appserver/neo4japp/services/annotations/nlp_data_output/pdfs/*.pdf +appserver/neo4japp/services/annotations/nlp_data_output/annotations/*.json +appserver/neo4japp/services/annotations/nlp_data_output/*.txt + +# Backups +*.backup + +# gcloud service accounts +*ansible_service_account* + +# ansible secrets +.vault_secrets_pw + +# Arango volumes +arangodb/data/* +arangodb/apps/* + +# Arango seed data +arangodb/bin/seed_data diff --git a/.prettierrc b/.prettierrc index 19f2eb490a..e2c9ad1e39 100644 --- a/.prettierrc +++ b/.prettierrc @@ -1,6 +1,6 @@ singleQuote: true jsxSingleQuote: true overrides: - - files: ["*.yml", "*.yaml"] - options: - singleQuote: false + - files: ['*.yml', '*.yaml'] + options: + singleQuote: false diff --git a/README.md b/README.md index f5abb35137..2640590ab8 100644 --- a/README.md +++ b/README.md @@ -55,15 +55,15 @@ Visualizations are a powerful way to help you to understand the relationships be Lifelike currently provides the following built-in visualization types: -- Maps -- Enrichment tables -- Sankey diagrams -- Pathway Browser +- Maps +- Enrichment tables +- Sankey diagrams +- Pathway Browser ### Other features -- Multi-user collaborative workbench -- Powerful search engine +- Multi-user collaborative workbench +- Powerful search engine ## Common development operations @@ -111,20 +111,20 @@ Lifelike is a distributed system comprised of the following components: ### Core services -- **[Appserver](appserver)**. Backend API service, written in Python using the the Flask framework. -- **[Client](client)**. Frontend Single Page Application, written in Typescript using the Angular framework. -- **[Statistical enrichment](statistical-enrichment)**. Statistics generation microservice, written in Python using the the Flask framework. -- **[Cache invalidator](cache-invalidator)**. Recurrent task runner for bulk large computations and cache data management, written in Python. -- **[Graph data migrator](graph-db)**. Utility service for migrating and versioning knowledge graph database, using the Liquibase database migration tool. +- **[Appserver](appserver)**. Backend API service, written in Python using the the Flask framework. +- **[Client](client)**. Frontend Single Page Application, written in Typescript using the Angular framework. +- **[Statistical enrichment](statistical-enrichment)**. Statistics generation microservice, written in Python using the the Flask framework. +- **[Cache invalidator](cache-invalidator)**. Recurrent task runner for bulk large computations and cache data management, written in Python. +- **[Graph data migrator](graph-db)**. Utility service for migrating and versioning knowledge graph database, using the Liquibase database migration tool. ### Backing services -- **PostgreSQL** as a RDBMS. -- **Neo4j** as a graph database. -- **Elasticsearch** as a full-text search engine. -- **Redis** as a key-value cache store. -- **PDFParser** as a document parsing library. -- **Sendgrid** as an email messaging service. +- **PostgreSQL** as a RDBMS. +- **ArangoDB** as a graph database. +- **Elasticsearch** as a full-text search engine. +- **Redis** as a key-value cache store. +- **PDFParser** as a document parsing library. +- **Sendgrid** as an email messaging service. ## License diff --git a/appserver/Dockerfile b/appserver/Dockerfile index 66b2431483..d2e572feb9 100644 --- a/appserver/Dockerfile +++ b/appserver/Dockerfile @@ -1,133 +1,44 @@ -# ======================================== -# Base image -# ======================================== -FROM python:3.10-slim as base - -ENV LANG C.UTF-8 -ENV LC_ALL C.UTF-8 -ENV PYTHONDONTWRITEBYTECODE 1 -ENV PYTHONFAULTHANDLER 1 +FROM fedora:33 as base +LABEL app=kg-prototypes +# Install dependencies +RUN dnf install htop postgresql graphviz python-pip python3-devel vim net-tools which -y \ + && dnf groupinstall 'Development Tools' -y \ + && dnf clean packages RUN pip install pipenv +ENV N4J_USER n4j +ENV N4J_HOME /home/$N4J_USER +ENV UID 1000 +ENV GID 1000 -# ======================================== -# Build dependencies stage -# ======================================== -FROM base as build-deps - -# Install build dependencies -RUN apt-get update \ - && apt-get install -y liblmdb-dev python3-dev libxml2-dev libxslt-dev build-essential \ - && apt-get clean - -# Copy Pipfiles -COPY Pipfile Pipfile.lock ./ - -# Install Python dependencies -ARG DEV -RUN PIPENV_VENV_IN_PROJECT=1 pipenv install --deploy $(if [ "$DEV" ]; then echo --dev; fi) - - -# ======================================== -# Runtime stage -# ======================================== -FROM base -LABEL org.opencontainers.image.source https://github.com/SBRG/lifelike - -# Install runtime system dependencies -RUN apt-get update \ - && apt-get install -y libmagic-dev graphviz libgraphviz-dev curl \ - && apt-get clean - -# Copy Python virtual environment -COPY --from=build-deps /.venv /.venv -ENV PATH="/.venv/bin:$PATH" - -# Set user and workdir -WORKDIR /app -RUN useradd -m -d /app app -USER app - -# Copy application code -COPY --chown=app . . - -# Set to 1 to automatically apply any pending DB migrations at startup -ENV MIGRATE_DB= - -# Create an initial admin user -ENV INITIAL_ADMIN_EMAIL= - -# LMDB database volume -ENV LMDB_DATA_DIR=/lmdb -VOLUME /lmdb - -# LMDB download cloud storage -# ENV AZURE_ACCOUNT_STORAGE_NAME= -# ENV AZURE_ACCOUNT_STORAGE_KEY= - -# JWT Authendication -ENV JWT_SECRET=secret - -# Base URL of this app, reachable by external services -ENV APPSERVER_URL=http://localhost:5000 - -# Base URL of the frontend app, for link generation -ENV FRONTEND_URL=http://localhost:4242 - -# PostgreSQL configuration -ENV POSTGRES_HOST=postgres -ENV POSTGRES_PORT=5432 -ENV POSTGRES_USER=postgres -ENV POSTGRES_PASSWORD=postgres -ENV POSTGRES_DB=postgres - -# Neo4j configuration -ENV NEO4J_HOST=neo4j -ENV NEO4J_PORT=7687 -ENV NEO4J_AUTH=neo4j/password -ENV NEO4J_DATABASE=neo4j -ENV NEO4J_SCHEME=bolt - -# Elasticsearch configuration -ENV ELASTICSEARCH_URL=http://elasticsearch:9200 -ENV ELASTICSEARCH_FILE_INDEX=file - -# Statistical enrichment service -ENV STATISTICAL_ENRICHMENT_URL=http://statistical-enrichment:5000 - -# PDFParser service -ENV PDFPARSER_URL=http://pdfparser:7600 +# User and group creation +RUN groupadd -g $GID $N4J_USER && \ + useradd -u $UID -g $GID -G wheel --create-home --home-dir $N4J_HOME --shell /bin/bash $N4J_USER -# NLP Processing service -ENV NLP_URL=https://nlp-api.lifelike.bio/v1/predict -ENV NLP_SECRET=secret +WORKDIR $N4J_HOME -# Mailserver configuration -ENV FROM_EMAIL=lifelike@example.com +# Copy Pipfiles and install dependencies FIRST to better apply Docker layer cache +COPY --chown=1000:1000 Pipfile . +COPY --chown=1000:1000 Pipfile.lock . +RUN pipenv install --dev --deploy --system -# Sendgrid integration -ENV SENDGRID_API_KEY= +# ...then copy everything else +COPY --chown=1000:1000 . . -# Optional Sentry logging configuration -ENV SENTRY_DSN= +# TODO: We should consider breaking this apart into dev and prod +# builds, so we don't build unnecessary packages -# Optional Elastic APM configuration. -# To enable, at least ELASTIC_APM_SERVER_URL must be set -# Other available variables: https://www.elastic.co/guide/en/apm/agent/python/master/configuration.html -ENV ELASTIC_APM_SERVER_URL= -ENV ELASTIC_APM_SERVICE_NAME=appserver +# Don't lose stdin, stdout and stderr output due to buffering +ENV PYTHONUNBUFFERED 1 +ENV PYTHONPATH $N4J_HOME -# Flask env (development, testing, production) -ENV FLASK_ENV=production +# Set Python3 as the default when running "python" +RUN echo 'alias python=python3' >> ~/.bashrc && source ~/.bashrc -# Listen port -ENV PORT=5000 -EXPOSE $PORT +USER $N4J_USER -# Health check by requesting system info to /meta endpoint -HEALTHCHECK --start-period=30s \ - CMD curl -f localhost:$PORT/meta || exit 1 +# Setup flask application environment vars +ENV MAX_ALLOWED_LOGIN_FAILURES 6 -RUN chmod +x bin/docker-entrypoint.sh -ENTRYPOINT ["bin/docker-entrypoint.sh"] +CMD [ "bin/startup.sh" ] diff --git a/appserver/bin/docker-entrypoint.sh b/appserver/bin/docker-entrypoint.sh deleted file mode 100755 index feb2d11530..0000000000 --- a/appserver/bin/docker-entrypoint.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -## This scripts serves as the Docker entrypoint. -## If any command is specified, it runs it as is, -## else, it runs the appserver Flask application. - -set -e - -if [ "$1" ]; then - ## A command is specified, then run it. - exec "$@" -else - if [ "$MIGRATE_DB" ]; then - ## If $MIGRATE_DB is set, wait for PostgreSQL and run any required migrations. - while ! curl $POSTGRES_HOST:$POSTGRES_PORT 2>&1 | grep '52'; do - echo "Waiting for PostgreSQL to be available in $POSTGRES_HOST:$POSTGRES_PORT" - sleep 5; - done - echo "PostreSQL is ready. Executing DB migrations now" - flask db upgrade --x-arg data_migrate="True" - echo "Finished executing DB migrations" - fi - - ## Create initial user if $INITIAL_ADMIN_EMAIL is set. - if [ "$INITIAL_ADMIN_EMAIL" ]; then - echo "Trying to create initial admin user: $INITIAL_ADMIN_EMAIL" - flask create-user "Admin" "$INITIAL_ADMIN_EMAIL" > /dev/null 2>&1 || true - flask set-role "$INITIAL_ADMIN_EMAIL" "admin" > /dev/null 2>&1 || true - fi - - ## Run the Flask appserver app, using the built-in development Flask server, - ## or gunicron WSGI server if the $FLASK_ENV is not set to development. - if [ "${FLASK_ENV}" = "development" ]; then - flask run -h 0.0.0.0 -p ${PORT:-5000} - else - gunicorn \ - -b 0.0.0.0:${PORT:-5000} \ - --workers=${GUNICORN_WORKERS:-9} \ - --threads=${GUNICORN_THREADS:-10} \ - --timeout=${GUNICORN_TIMEOUT:-300} \ - --max-requests=${GUNICORN_MAX_REQUESTS:-120} \ - app:app - fi -fi diff --git a/appserver/bin/startup.sh b/appserver/bin/startup.sh index 87179382d4..53c67775f1 100755 --- a/appserver/bin/startup.sh +++ b/appserver/bin/startup.sh @@ -8,7 +8,7 @@ if [ "${FLASK_ENV}" = "development" ] && [ "${FLASK_APP_CONFIG}" = "Development" __dir__="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # wait for postgres timeout 300 ${__dir__}/wait-for-postgres - # wait for neo4j + # wait for arangodb timeout 300 ${__dir__}/wait-for-arango #wait for elastic timeout 300 ${__dir__}/wait-for-elastic diff --git a/appserver/bin/wait-for-neo4j b/appserver/bin/wait-for-neo4j deleted file mode 100755 index c85c5339b8..0000000000 --- a/appserver/bin/wait-for-neo4j +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -echo "Waiting for Neo4J" - -NEO4J_STATUS="000" - -until [ "$NEO4J_STATUS" = "200" ] -do - NEO4J_STATUS=`curl -s -o /dev/null -I -w "%{http_code}" http://${NEO4J_HOST}:7474` - echo "Status of Neo4J: $NEO4J_STATUS" - sleep 2 -done - -# Run command | https://docs.docker.com/compose/startup-order/ ->&2 echo "Neo4j started - executing command" -exec $@ diff --git a/appserver/config.py b/appserver/config.py index 3f01a758f0..c094e85bb1 100644 --- a/appserver/config.py +++ b/appserver/config.py @@ -29,12 +29,6 @@ class Base: POSTGRES_PASSWORD = os.environ.get('POSTGRES_PASSWORD') POSTGRES_DB = os.environ.get('POSTGRES_DB') - NEO4J_HOST = os.environ.get('NEO4J_HOST', '0.0.0.0') - NEO4J_SCHEME = os.environ.get('NEO4J_SCHEME', 'bolt') - NEO4J_AUTH = os.environ.get('NEO4J_AUTH', 'neo4j/password') - NEO4J_PORT = os.environ.get('NEO4J_PORT', '7687') - NEO4J_DATABASE = os.environ.get('NEO4J_DATABASE') - ARANGO_HOST = os.environ.get('ARANGO_HOST', 'http://localhost:8529') ARANGO_USERNAME = os.environ.get('ARANGO_USERNAME', 'root') ARANGO_PASSWORD = os.environ.get('ARANGO_PASSWORD', 'password') diff --git a/appserver/neo4japp/blueprints/reports.py b/appserver/neo4japp/blueprints/reports.py index 3529036c0f..c9b55d444e 100644 --- a/appserver/neo4japp/blueprints/reports.py +++ b/appserver/neo4japp/blueprints/reports.py @@ -20,7 +20,36 @@ class CopyrightInfringementReportView(MethodView): @use_args(CopyrightInfringementRequestSchema) def post(self, params: dict): - with db.session.begin_nested(): + # Try to send an email to the user and currator + send_email_exception = None + try: + message = Mail( + from_email=MESSAGE_SENDER_IDENTITY, + to_emails=params['email'], + subject=COPYRIGHT_REPORT_CONFIRMATION_EMAIL_TITLE, + html_content=COPYRIGHT_REPORT_CONFIRMATION_EMAIL_CONTENT.format( + url=params['url'], + description=params['description'], + name=params['name'], + company=params['company'], + address=params['address'], + country=params['country'], + city=params['city'], + province=params['province'], + zip=params['zip'], + phone=params['phone'], + fax=params['fax'], + email=params['email'], + ), + ) + message.add_bcc(bcc_email=LIFELIKE_EMAIL_ACCOUNT) + get_send_grid_service().send(message) + except Exception as e: + # If the email fails to send, store the exception to raise later + # after the report is saved to the database + send_email_exception = e + + try: copyright_infringement_report = CopyrightInfringementRequest( url=params['url'], description=params['description'], @@ -41,38 +70,16 @@ def post(self, params: dict): signature=params['signature'], ) db.session.add(copyright_infringement_report) - - message = Mail( - from_email=MESSAGE_SENDER_IDENTITY, - to_emails=params['email'], - subject=COPYRIGHT_REPORT_CONFIRMATION_EMAIL_TITLE, - html_content=COPYRIGHT_REPORT_CONFIRMATION_EMAIL_CONTENT.format( - url=params['url'], - description=params['description'], - name=params['name'], - company=params['company'], - address=params['address'], - country=params['country'], - city=params['city'], - province=params['province'], - zip=params['zip'], - phone=params['phone'], - fax=params['fax'], - email=params['email'], - ), - ) - message.add_bcc(bcc_email=LIFELIKE_EMAIL_ACCOUNT) - try: - get_send_grid_service().send(message) - except Exception as e: - with db.session.begin_nested(): - # If for some reason we cannot send a confirmation email, delete the row we just - # created and re-raise the error. - db.session.delete(copyright_infringement_report) - # rollback in case of error? + db.session.commit() + except Exception: + db.session.rollback() raise - - return jsonify(dict(result=copyright_infringement_report.to_dict())) + else: + return jsonify(dict(result=copyright_infringement_report.to_dict())) + finally: + # If the email failed to send, raise the exception before returning + if send_email_exception: + raise send_email_exception copyright_infringement_report_view = CopyrightInfringementReportView.as_view( diff --git a/appserver/neo4japp/blueprints/user.py b/appserver/neo4japp/blueprints/user.py index e33c85c59e..e0d7be30c2 100644 --- a/appserver/neo4japp/blueprints/user.py +++ b/appserver/neo4japp/blueprints/user.py @@ -7,6 +7,7 @@ from webargs.flaskparser import use_args from neo4japp.database import db +from neo4japp.exceptions import NotAuthorized from neo4japp.models import Projects, Files from neo4japp.schemas.filesystem import ( PublishSchema, @@ -48,6 +49,9 @@ def get(self, user_hash_id: str): @use_args(PublishSchema, locations=['json', 'form', 'files', 'mixed_form_json']) def post(self, params: dict, user_hash_id: str): + if g.current_user.has_role('admin') is False: + raise NotAuthorized() + file = Publish.create_uncommited_publication( user_hash_id, creator=g.current_user, **params ) diff --git a/appserver/neo4japp/constants.py b/appserver/neo4japp/constants.py index 1952651df5..9b1ae5a969 100644 --- a/appserver/neo4japp/constants.py +++ b/appserver/neo4japp/constants.py @@ -451,7 +451,7 @@ def is_db_name(s: str): RESET_PASSWORD_ALPHABET = RESET_PASSWORD_SYMBOLS + string.ascii_letters + string.digits # Start email constants -LIFELIKE_EMAIL_ACCOUNT = 'lifelike.science@gmail.com' +LIFELIKE_EMAIL_ACCOUNT = 'lifelike@biosustain.dtu.dk' MESSAGE_SENDER_IDENTITY = 'lifelike-account-service@lifelike.bio' MAILING_API_KEY = LocalProxy(lambda: config.get('SEND_GRID_EMAIL_API_KEY')) RESET_PASSWORD_EMAIL_TITLE = 'Lifelike: Account password reset' diff --git a/appserver/neo4japp/data_transfer_objects/search.py b/appserver/neo4japp/data_transfer_objects/search.py index f5ee549bdc..018b767158 100644 --- a/appserver/neo4japp/data_transfer_objects/search.py +++ b/appserver/neo4japp/data_transfer_objects/search.py @@ -8,7 +8,7 @@ @attr.s(frozen=True) class FTSQueryRecord(CamelDictMixin): - """Single record from a full text query in Neo4j""" + """Single record from a full text query in ArangoDB""" node: GraphNode = attr.ib() @@ -36,7 +36,7 @@ class FTSReferenceRecord(FTSQueryRecord): @attr.s(frozen=True) class FTSResult(CamelDictMixin): - """Paginated results for a full text search query in Neo4j""" + """Paginated results for a full text search query in ArangoDB""" query: str = attr.ib() nodes: List[FTSQueryRecord] = attr.ib() diff --git a/appserver/neo4japp/services/annotations/README.md b/appserver/neo4japp/services/annotations/README.md index 71ab2d16d1..be769a411b 100644 --- a/appserver/neo4japp/services/annotations/README.md +++ b/appserver/neo4japp/services/annotations/README.md @@ -146,9 +146,9 @@ A global inclusion/exclusion is **both** a local and a global. Local inclusions/exclusions are stored in Postgres, as well as global exclusions. The locals are saved in the `files` table, while the globals are in `global_list`. -The global inclusions are stored in Neo4j under the label `:GlobalInclusion` if they do not map to any existing nodes, otherwise use that existing node and create a new synonym relationship with it. +The global inclusions are stored in ArangoDB under the label `:GlobalInclusion` if they do not map to any existing nodes, otherwise use that existing node and create a new synonym relationship with it. -Because we do not curate before they're added to Neo4j, a user can potentially create a bad global inclusion. To correctly delete, we need the property `original_entity_types` so we don't accidentally delete the wrong thing. See https://sbrgsoftware.atlassian.net/browse/LL-3625 for more information. +Because we do not curate before they're added to ArangoDB, a user can potentially create a bad global inclusion. To correctly delete, we need the property `original_entity_types` so we don't accidentally delete the wrong thing. See https://sbrgsoftware.atlassian.net/browse/LL-3625 for more information. ## NLP Service diff --git a/cache-invalidator/Dockerfile b/cache-invalidator/Dockerfile index 33f146b5ba..1c52bb3e62 100644 --- a/cache-invalidator/Dockerfile +++ b/cache-invalidator/Dockerfile @@ -1,7 +1,7 @@ # ======================================== # Base image # ======================================== -FROM python:3.10-slim as base +FROM python:3.11-slim as base ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 @@ -42,12 +42,12 @@ USER app # Copy application code COPY --chown=app main.py ./ -# Neo4j configuration -ENV NEO4J_HOST=neo4j -ENV NEO4J_PORT=7687 -ENV NEO4J_AUTH=neo4j/password -ENV NEO4J_SCHEME=bolt -ENV NEO4J_DATABASE=neo4j +# ArangoDB configuration +ENV ARANGODB_HOST=arangodb +ENV ARANGODB_PORT=7687 +ENV ARANGODB_AUTH=arangodb/password +ENV ARANGODB_SCHEME=bolt +ENV ARANGODB_DATABASE=arangodb # Redis cache configuration ENV REDIS_HOST=redis diff --git a/cache-invalidator/Pipfile.lock b/cache-invalidator/Pipfile.lock index e95b37a95e..12972e9c28 100644 --- a/cache-invalidator/Pipfile.lock +++ b/cache-invalidator/Pipfile.lock @@ -18,42 +18,123 @@ "default": { "certifi": { "hashes": [ - "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3", - "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18" + "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f", + "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1" ], "markers": "python_version >= '3.6'", - "version": "==2022.12.7" + "version": "==2024.2.2" }, "charset-normalizer": { "hashes": [ - "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845", - "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f" - ], - "markers": "python_version >= '3.6'", - "version": "==2.1.1" + "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", + "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", + "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", + "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", + "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", + "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", + "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", + "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", + "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", + "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", + "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", + "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", + "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", + "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", + "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", + "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", + "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", + "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", + "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", + "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", + "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", + "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", + "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", + "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", + "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", + "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", + "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", + "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", + "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", + "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", + "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", + "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", + "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", + "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", + "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", + "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", + "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", + "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", + "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", + "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", + "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", + "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", + "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", + "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", + "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", + "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", + "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", + "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", + "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", + "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", + "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", + "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", + "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", + "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", + "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", + "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", + "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", + "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", + "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", + "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", + "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", + "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", + "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", + "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", + "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", + "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", + "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", + "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", + "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", + "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", + "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", + "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", + "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", + "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", + "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", + "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", + "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", + "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", + "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", + "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", + "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", + "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", + "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", + "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", + "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", + "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", + "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", + "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", + "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", + "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==3.3.2" }, "idna": { "hashes": [ - "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4", - "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" + "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca", + "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f" ], "markers": "python_version >= '3.5'", - "version": "==3.4" - }, - "neo4j": { - "hashes": [ - "sha256:b6c49fbd60426e268ed4afbd414766444fe70aee1ac0376a0c871d75526b8251" - ], - "index": "pypi", - "version": "==4.2.1" + "version": "==3.6" }, "pyjwt": { "hashes": [ - "sha256:69285c7e31fc44f68a1feb309e948e0df53259d579295e6cfe2b1792329f05fd", - "sha256:d83c3d892a77bbb74d3e1a2cfa90afaadb60945205d1095d9221f04466f64c14" + "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de", + "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320" ], "markers": "python_version >= '3.7'", - "version": "==2.6.0" + "version": "==2.8.0" }, "python-arango": { "hashes": [ @@ -61,54 +142,49 @@ "sha256:bb7cdd55776f5cf613e4bb9f0f0aba70b60408b964ec7c13b37f69b215acbbe8" ], "index": "pypi", + "markers": "python_version >= '3.7'", "version": "==7.5.2" }, - "pytz": { - "hashes": [ - "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7", - "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c" - ], - "version": "==2022.1" - }, "redis": { "hashes": [ "sha256:2ef11f489003f151777c064c5dbc6653dfb9f3eade159bcadc524619fddc2242", "sha256:6d65e84bc58091140081ee9d9c187aab0480097750fac44239307a3bdf0b1251" ], "index": "pypi", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==3.5.2" }, "requests": { "hashes": [ - "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983", - "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349" + "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", + "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" ], - "markers": "python_version >= '3.7' and python_version < '4'", - "version": "==2.28.1" + "markers": "python_version >= '3.7'", + "version": "==2.31.0" }, "requests-toolbelt": { "hashes": [ - "sha256:18565aa58116d9951ac39baa288d3adb5b3ff975c4f25eee78555d89e8f247f7", - "sha256:62e09f7ff5ccbda92772a29f394a49c3ad6cb181d568b1337626b2abb628a63d" + "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", + "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==0.10.1" + "version": "==1.0.0" }, "setuptools": { "hashes": [ - "sha256:57f6f22bde4e042978bcd50176fdb381d7c21a9efa4041202288d3737a0c6a54", - "sha256:a7620757bf984b58deaf32fc8a4577a9bbc0850cf92c20e1ce41c38c19e5fb75" + "sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e", + "sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c" ], - "markers": "python_version >= '3.7'", - "version": "==65.6.3" + "markers": "python_version >= '3.8'", + "version": "==69.2.0" }, "urllib3": { "hashes": [ - "sha256:47cc05d99aaa09c9e72ed5809b60e7ba354e64b59c9c173ac3018642d8bb41fc", - "sha256:c083dd0dce68dbfbe1129d5271cb90f9447dea7d52097c6e0126120c521ddea8" + "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d", + "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", - "version": "==1.26.13" + "markers": "python_version >= '3.8'", + "version": "==2.2.1" } }, "develop": { @@ -139,14 +215,16 @@ "sha256:edf7237137a1a9330046dbb14796963d734dd740a98d5e144a3eb1d267f5f9ee" ], "index": "pypi", + "markers": "python_version >= '3.6'", "version": "==0.942" }, "mypy-extensions": { "hashes": [ - "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", - "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" + "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d", + "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782" ], - "version": "==0.4.3" + "markers": "python_version >= '3.5'", + "version": "==1.0.0" }, "pycodestyle": { "hashes": [ @@ -154,6 +232,7 @@ "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef" ], "index": "pypi", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.7.0" }, "tomli": { @@ -166,11 +245,11 @@ }, "typing-extensions": { "hashes": [ - "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42", - "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2" + "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475", + "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb" ], - "markers": "python_version >= '3.6'", - "version": "==4.1.1" + "markers": "python_version >= '3.8'", + "version": "==4.10.0" } } } diff --git a/client/Dockerfile b/client/Dockerfile index 268192f7ce..b15603ee86 100644 --- a/client/Dockerfile +++ b/client/Dockerfile @@ -1,5 +1,20 @@ ARG NODE_IMAGE_TAG=node:14 +# ======================================== +# Landing page +# ======================================== +FROM $NODE_IMAGE_TAG as landing-build +WORKDIR /app + +# Install dependencies +COPY landing/package.json landing/yarn.lock ./ +RUN yarn install + +# Build landing page +COPY landing ./ +RUN yarn build + + # ================================================================== # Angular app dependencies by default used for local development # ================================================================== @@ -11,23 +26,28 @@ COPY package.json yarn.lock ./ ARG YARN_INSTALL_OPTS RUN yarn install ${YARN_INSTALL_OPTS} -ENV ENVIRONMENT_CONFIG development - +# build time arguments for Angular environment +ARG ANGULAR_CONFIG=development +ARG CLIENT_VERSION=undefined +# default enviroment presets +ENV ENVIRONMENT_CONFIG $ANGULAR_CONFIG # ======================================== # Angular app bundle build # ======================================== FROM angular-deps as angular-build +# build time arguments for Angular environment +ARG ANGULAR_CONFIG=production +ARG CLIENT_VERSION=undefined + # Copy the code and build the app bundle COPY src ./src COPY tslint ./tslint COPY e2e ./e2e COPY *.json browserslist ./ - -ARG ANGULAR_CONFIG=production -ENV NODE_OPTIONS=--max-old-space-size=4096 -RUN yarn build --configuration=$ANGULAR_CONFIG --output-path=dist +RUN sed -i "s/__VERSION__/${CLIENT_VERSION}/" src/environments/environment.ts +RUN yarn build --configuration=$ANGULAR_CONFIG --aot --output-path=dist # When targeting this image stage, run angulat dev server EXPOSE 4200 @@ -39,27 +59,43 @@ CMD yarn dev-start # ======================================== # Runtime stage - NGINX # ======================================== -FROM nginx:1.21 -LABEL org.opencontainers.image.source https://github.com/SBRG/lifelike - +FROM nginx:1.25.1 +LABEL app=kg-prototypes WORKDIR /usr/share/nginx/html -# Copy built assets -COPY --from=angular-build /app/dist ./ +# URL to proxy requests to /api +ENV APPSERVER_UPSTREAM http://appserver:5000 -# Copy nginx configuraiton template -COPY nginx.conf /etc/nginx/templates/default.conf.template +# Whether to run the app in prod mode +ENV PRODUCTION_MODE true + +# Whether we are running with valid KEGG license +ENV KEGG_ENABLED false -# appserver URL to proxy /api requests -ENV APPSERVER_URL http://appserver:5000 +# Whether to run the app with oauth login +ENV OAUTH_ENABLED false + +# OAuth issuer discovert URL +ENV OAUTH_ISSUER "" + +# Client ID of the OAuth application +ENV OAUTH_CLIENT_ID "" # List of space delimited list of non-stantdard MIME types # which are known to benefit from gzip compression (text based content) ENV GZIP_EXTRA_TYPES text/tsv vnd.lifelike.document/bioc vnd.lifelike.document/enrichment-table vnd.lifelike.document/graph vnd.lifelike.document/map -# Runtime environment configuration preset -ENV ENVIRONMENT_CONFIG production +# build time argument for Angular environment +ARG ANGULAR_CONFIG=production + +# default enviroment presets +ENV ENVIRONMENT_CONFIG $ANGULAR_CONFIG + +# Copy nginx configuraiton template +COPY nginx.conf /etc/nginx/templates/default.conf.template + +# Copy built assets +COPY --from=landing-build /app/dist ./ +COPY --from=angular-build /app/dist ./ -# Listen port -ENV PORT 80 -EXPOSE $PORT +EXPOSE 80 diff --git a/client/landing/src/index.html b/client/landing/src/index.html index 468d424815..2664fc7ce3 100644 --- a/client/landing/src/index.html +++ b/client/landing/src/index.html @@ -45,12 +45,13 @@

From Big Data
to Big Picture

picture understanding, augmenting our intelligence in solving complex problems. - + diff --git a/client/src/app/app-routing.module.ts b/client/src/app/app-routing.module.ts index 0ae44202f8..3e43ffe2af 100644 --- a/client/src/app/app-routing.module.ts +++ b/client/src/app/app-routing.module.ts @@ -4,7 +4,6 @@ import { Router, RouterModule, Routes } from '@angular/router'; import { Store } from '@ngrx/store'; import { AdminPanelComponent } from 'app/admin/components/admin-panel.component'; -import { UserFileImportComponent } from 'app/user-file-import/components/user-file-import.component'; import { VisualizationComponent } from 'app/visualization/containers/visualization/visualization.component'; import { GraphSearchComponent } from 'app/search/components/graph-search.component'; import { ObjectBrowserComponent } from 'app/file-browser/components/object-browser.component'; @@ -25,9 +24,8 @@ import { CommunityBrowserComponent } from 'app/file-browser/components/community import { BrowserComponent } from 'app/file-browser/components/browser/browser.component'; import { ContentSearchComponent } from 'app/search/components/content-search.component'; import { ObjectNavigatorComponent } from 'app/file-navigator/components/object-navigator.component'; -import { ShortestPathComponent } from 'app/shortest-path/containers/shortest-path.component'; -import {EnrichmentTableViewerComponent} from 'app/enrichment/components/table/enrichment-table-viewer.component'; -import {EnrichmentVisualisationViewerComponent} from 'app/enrichment/components/visualisation/enrichment-visualisation-viewer.component'; +import { EnrichmentTableViewerComponent } from 'app/enrichment/components/table/enrichment-table-viewer.component'; +import { EnrichmentVisualisationViewerComponent } from 'app/enrichment/components/visualisation/enrichment-visualisation-viewer.component'; import { BiocViewComponent } from 'app/bioc-viewer/components/bioc-view.component'; import { ObjectViewerComponent } from 'app/file-browser/components/object-viewer.component'; import { SankeyViewComponent } from 'app/sankey/components/sankey-view.component'; @@ -113,11 +111,6 @@ const routes: Routes = [ fontAwesomeIcon: 'search', }, }, - { - path: 'pathway-browser-prototype', - canActivate: [AuthGuard], - component: ShortestPathComponent, - }, { path: 'projects/:project_name/enrichment-table/:file_id', canActivate: [], @@ -172,15 +165,6 @@ const routes: Routes = [ fontAwesomeIcon: 'fas fa-chart-network', }, }, - { - path: 'upload', - component: UserFileImportComponent, - canActivate: [AuthGuard], - data: { - title: 'Knowledge Graph Upload', - fontAwesomeIcon: 'fas fa-chart-network', - }, - }, ], }, { diff --git a/client/src/app/app.component.html b/client/src/app/app.component.html index b92e740d4f..6328577354 100644 --- a/client/src/app/app.component.html +++ b/client/src/app/app.component.html @@ -105,9 +105,6 @@ [appAutoCloseTooltipOutOfView]="tooltipRef"> --> - - - Publish diff --git a/client/src/app/file-browser/components/published-browser/published-browser.component.ts b/client/src/app/file-browser/components/published-browser/published-browser.component.ts index bb7cbe0349..b68c26555c 100644 --- a/client/src/app/file-browser/components/published-browser/published-browser.component.ts +++ b/client/src/app/file-browser/components/published-browser/published-browser.component.ts @@ -49,6 +49,8 @@ export class PublishedBrowserComponent implements OnInit, OnDestroy { ); private loadTaskSubscription: Subscription; + readonly disablePublishMessage = + 'You do not have permission to publish files. Please contact administrator if you need this feature.'; constructor( private readonly filesystemService: FilesystemService, diff --git a/client/src/app/sankey/components/entity-details/node-details.component.html b/client/src/app/sankey/components/entity-details/node-details.component.html index bd2780277c..20a49aafbc 100644 --- a/client/src/app/sankey/components/entity-details/node-details.component.html +++ b/client/src/app/sankey/components/entity-details/node-details.component.html @@ -69,7 +69,7 @@ - +
  • {{ label }}
  • diff --git a/client/src/app/search/components/graph-search.component.ts b/client/src/app/search/components/graph-search.component.ts index a710d77e8e..67900606ce 100644 --- a/client/src/app/search/components/graph-search.component.ts +++ b/client/src/app/search/components/graph-search.component.ts @@ -79,7 +79,7 @@ export class GraphSearchComponent implements OnInit, OnDestroy, ModuleAwareCompo map( (legend) => // Keys of the result dict are all lowercase, need to change the first character - // to uppercase to match Neo4j labels + // to uppercase to match ArangoDB labels new Map(Object.entries(legend).map(([label, { color }]) => [label, color])) ), startWith(new Map()) diff --git a/client/src/app/shortest-path/components/route-builder.component.html b/client/src/app/shortest-path/components/route-builder.component.html deleted file mode 100644 index ab0280681e..0000000000 --- a/client/src/app/shortest-path/components/route-builder.component.html +++ /dev/null @@ -1,47 +0,0 @@ -
    - -
    -
    -
    - -
    -
    - -
    -
    -
    -
    -
    - -
    -

    Loading pre-defined queries...

    -
    -
    -
    - - -
    -

    Could not load pre-defined shortest path queries!

    -
    -
    - - -
    -
    -
    -
    diff --git a/client/src/app/shortest-path/components/route-builder.component.scss b/client/src/app/shortest-path/components/route-builder.component.scss deleted file mode 100644 index 357cdccf59..0000000000 --- a/client/src/app/shortest-path/components/route-builder.component.scss +++ /dev/null @@ -1,27 +0,0 @@ -.graph-type-btn-container { - height: 40px; - width: 40px; -} - -.route-builder-container-open { - position: absolute; - width: 400px; - right: 0; - z-index: 2; - transition: right .25s; -} - -.route-builder-container-closed { - position: absolute; - width: 400px; - right: -400px; - z-index: 2; - transition: right .25s; -} - -.toggle-route-builder-button { - height: 60px; - top: 28px; - right: 100%; - position: absolute; -} diff --git a/client/src/app/shortest-path/components/route-builder.component.spec.ts b/client/src/app/shortest-path/components/route-builder.component.spec.ts deleted file mode 100644 index a7884c15cd..0000000000 --- a/client/src/app/shortest-path/components/route-builder.component.spec.ts +++ /dev/null @@ -1,33 +0,0 @@ -import { ComponentFixture, TestBed } from '@angular/core/testing'; - -import { configureTestSuite } from 'ng-bullet'; - -import { RootStoreModule } from 'app/root-store'; -import { SharedModule } from 'app/shared/shared.module'; - -import { RouteBuilderComponent } from './route-builder.component'; - -describe('RouteBuilderComponent', () => { - let component: RouteBuilderComponent; - let fixture: ComponentFixture; - - configureTestSuite(() => { - TestBed.configureTestingModule({ - imports: [ - RootStoreModule, - SharedModule - ], - declarations: [ RouteBuilderComponent ] - }); - }); - - beforeEach(() => { - fixture = TestBed.createComponent(RouteBuilderComponent); - component = fixture.componentInstance; - fixture.detectChanges(); - }); - - it('should create', () => { - expect(component).toBeTruthy(); - }); -}); diff --git a/client/src/app/shortest-path/components/route-builder.component.ts b/client/src/app/shortest-path/components/route-builder.component.ts deleted file mode 100644 index 22c02806a9..0000000000 --- a/client/src/app/shortest-path/components/route-builder.component.ts +++ /dev/null @@ -1,74 +0,0 @@ -import { Component, EventEmitter, OnDestroy, OnInit, Output } from '@angular/core'; - -import { combineLatest, Subscription } from 'rxjs'; - -import { BackgroundTask } from 'app/shared/rxjs/background-task'; - -import { ShortestPathService } from '../services/shortest-path.service'; -import { DisplayType } from './route-display.component'; - -@Component({ - selector: 'app-route-builder', - templateUrl: './route-builder.component.html', - styleUrls: ['./route-builder.component.scss'] -}) -export class RouteBuilderComponent implements OnInit, OnDestroy { - @Output() loadNewQuery: EventEmitter; - @Output() changeDisplayType: EventEmitter; - - loadShortestPathQueries: BackgroundTask<[], any>; - queriesLoadedsub: Subscription; - - routeBuilderContainerClass: string; - - routeBuilderOpen: boolean; - - queries: string[][]; - - constructor( - public shortestPathService: ShortestPathService, - ) { - this.queries = []; - - this.routeBuilderContainerClass = 'route-builder-container-open'; - this.routeBuilderOpen = true; - - this.loadNewQuery = new EventEmitter(); - this.changeDisplayType = new EventEmitter(); - - this.loadShortestPathQueries = new BackgroundTask(() => { - return combineLatest( - this.shortestPathService.getShortestPathQueryList(), - ); - }); - this.queriesLoadedsub = this.loadShortestPathQueries.results$.subscribe(({ - result: [shortestPathQueries], - value: [], - }) => { - this.queries = Object.keys(shortestPathQueries).map(key => [key, shortestPathQueries[key]]); - }); - } - - ngOnInit() { - this.loadNewQuery.emit(0); - this.changeDisplayType.emit('NETWORK'); - this.loadShortestPathQueries.update([]); - } - - ngOnDestroy() { - this.queriesLoadedsub.unsubscribe(); - } - - toggleRouteBuilderOpen() { - this.routeBuilderOpen = !this.routeBuilderOpen; - this.routeBuilderContainerClass = this.routeBuilderOpen ? 'route-builder-container-open' : 'route-builder-container-closed'; - } - - requestQueryLoadFromParent(event: any) { - this.loadNewQuery.emit(Number.parseInt(event.target.value, 10)); - } - - requestChangeDisplayTypeFromParent(type: string) { - this.changeDisplayType.emit(type); - } -} diff --git a/client/src/app/shortest-path/components/route-display.component.html b/client/src/app/shortest-path/components/route-display.component.html deleted file mode 100644 index f0ba2043ec..0000000000 --- a/client/src/app/shortest-path/components/route-display.component.html +++ /dev/null @@ -1,18 +0,0 @@ -
    -
    -
    - -
    -
    - -
    -
    -
    diff --git a/client/src/app/shortest-path/components/route-display.component.scss b/client/src/app/shortest-path/components/route-display.component.scss deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/client/src/app/shortest-path/components/route-display.component.spec.ts b/client/src/app/shortest-path/components/route-display.component.spec.ts deleted file mode 100644 index e04e8f259d..0000000000 --- a/client/src/app/shortest-path/components/route-display.component.spec.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { ComponentFixture, TestBed } from '@angular/core/testing'; - -import { configureTestSuite } from 'ng-bullet'; -import { MockComponents } from 'ng-mocks'; - -import { PlotlySankeyDiagramComponent } from 'app/shared/components/plotly-sankey-diagram/plotly-sankey-diagram.component'; -import { VisJsNetworkComponent } from 'app/shared/components/vis-js-network/vis-js-network.component'; - -import { RouteDisplayComponent } from './route-display.component'; - -describe('RouteDisplayComponent', () => { - let component: RouteDisplayComponent; - let fixture: ComponentFixture; - - configureTestSuite(() => { - TestBed.configureTestingModule({ - declarations: [ - RouteDisplayComponent, - MockComponents( - PlotlySankeyDiagramComponent, - VisJsNetworkComponent - ) - ] - }); - }); - - beforeEach(() => { - fixture = TestBed.createComponent(RouteDisplayComponent); - component = fixture.componentInstance; - fixture.detectChanges(); - }); - - it('should create', () => { - expect(component).toBeTruthy(); - }); -}); diff --git a/client/src/app/shortest-path/components/route-display.component.ts b/client/src/app/shortest-path/components/route-display.component.ts deleted file mode 100644 index 43a68748ee..0000000000 --- a/client/src/app/shortest-path/components/route-display.component.ts +++ /dev/null @@ -1,191 +0,0 @@ -import { Component, Input } from '@angular/core'; - -import { isNil } from 'lodash-es'; -import { Options } from 'vis-network'; - -import { GraphData } from 'app/interfaces/vis-js.interface'; - -export enum DisplayType { - NETWORK = 'network', - SANKEY = 'sankey' -} - -@Component({ - selector: 'app-route-display', - templateUrl: './route-display.component.html', - styleUrls: ['./route-display.component.scss'] -}) -export class RouteDisplayComponent { - @Input() set displayType(displayType: DisplayType) { - this.currentDisplay = DisplayType[displayType]; - } - @Input() set graphData(graphData: GraphData) { - // Update vis js data - this.networkData = graphData; - - // Update sankey data - this.generateSankeyData(graphData.nodes, graphData.edges); - - // Update legend - this.setupLegend(graphData.nodes); - } - - currentDisplay: string; - - networkConfig: Options; - networkData: GraphData; - - sankeyConfig: any; - sankeyData: any; - - legend: Map; - - constructor() { - this.initVisJsSettings(); - this.initPlotlySankeySettings(); - this.legend = new Map(); - } - - initVisJsSettings() { - // Init vis js network settings - this.networkConfig = { - interaction: { - hover: true, - multiselect: true, - selectConnectedEdges: false, - }, - physics: { - enabled: true, - solver: 'barnesHut', - }, - edges: { - font: { - size: 12, - }, - length: 250, - widthConstraint: { - maximum: 90, - }, - }, - nodes: { - scaling: { - min: 25, - max: 50, - label: { - enabled: true, - min: 12, - max: 72, - maxVisible: 72, - drawThreshold: 5, - }, - }, - shape: 'box', - widthConstraint: { - maximum: 180, - }, - }, - }; - - this.networkData = { - nodes: [], - edges: [], - }; - } - - generateSankeyData(nodes: any[], edges: any[]) { - const source = []; - const target = []; - const value = []; - const label = []; - const color = []; - - const nodeIdentityMap = new Map(); - nodes.forEach((node, i) => { - nodeIdentityMap.set(node.id, i); - label.push(node.label); - color.push(node.color.border); - }); - - const seenEdges = new Map(); - edges.forEach(edge => { - const sankeyEdge = [nodeIdentityMap.get(edge.from), nodeIdentityMap.get(edge.to)]; - if (seenEdges.has(sankeyEdge.toString())) { - value[seenEdges.get(sankeyEdge.toString())] += 1; - } else { - source.push(sankeyEdge[0]); - target.push(sankeyEdge[1]); - value.push(1); - seenEdges.set(sankeyEdge.toString(), value.length - 1); - } - }); - - this.sankeyData = { - type: 'sankey', - orientation: 'h', - node: { - arrangement: 'snap', - pad: 15, - thickness: 20, - line: { - color: 'black', - width: 0.5 - }, - label, - color, - }, - - link: { - source, - target, - value - } - }; - } - - initPlotlySankeySettings() { - // Init plotly sankey settings - this.sankeyData = { - type: 'sankey', - orientation: 'h', - node: { - arrangement: 'snap', - pad: 15, - thickness: 20, - line: { - color: 'black', - width: 0.5 - }, - label: [], - color: [], - }, - - link: { - source: [], - target: [], - value: [] - } - }; - - this.sankeyConfig = { - font: { - size: 10 - } - }; - } - - /** - * Given a list of input nodes, generates a Map object representing a node legend. Keys are the label of the nodes, and values are a list - * of colors representing the border and background of the node. - * @param nodes list of node objects - */ - setupLegend(nodes: any) { - nodes.forEach((node) => { - if (!isNil(node.databaseLabel)) { - if (!this.legend.has(node.databaseLabel)) { - this.legend.set(node.databaseLabel, [node.color.border, node.color.background]); - } - } - }); - } - -} diff --git a/client/src/app/shortest-path/containers/route-search.component.html b/client/src/app/shortest-path/containers/route-search.component.html deleted file mode 100644 index 542f90ab00..0000000000 --- a/client/src/app/shortest-path/containers/route-search.component.html +++ /dev/null @@ -1 +0,0 @@ -

    route-search works!

    diff --git a/client/src/app/shortest-path/containers/route-search.component.scss b/client/src/app/shortest-path/containers/route-search.component.scss deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/client/src/app/shortest-path/containers/route-search.component.spec.ts b/client/src/app/shortest-path/containers/route-search.component.spec.ts deleted file mode 100644 index 7a5354fab2..0000000000 --- a/client/src/app/shortest-path/containers/route-search.component.spec.ts +++ /dev/null @@ -1,31 +0,0 @@ -import { ComponentFixture, TestBed } from '@angular/core/testing'; - -import { configureTestSuite } from 'ng-bullet'; - -import { RootStoreModule } from 'app/root-store'; - -import { RouteSearchComponent } from './route-search.component'; - -describe('RouteSearchComponent', () => { - let component: RouteSearchComponent; - let fixture: ComponentFixture; - - configureTestSuite(() => { - TestBed.configureTestingModule({ - imports: [ - RootStoreModule - ], - declarations: [ RouteSearchComponent ] - }); - }); - - beforeEach(() => { - fixture = TestBed.createComponent(RouteSearchComponent); - component = fixture.componentInstance; - fixture.detectChanges(); - }); - - it('should create', () => { - expect(component).toBeTruthy(); - }); -}); diff --git a/client/src/app/shortest-path/containers/route-search.component.ts b/client/src/app/shortest-path/containers/route-search.component.ts deleted file mode 100644 index 10d8fa3313..0000000000 --- a/client/src/app/shortest-path/containers/route-search.component.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { Component, OnInit } from '@angular/core'; - -@Component({ - selector: 'app-route-search', - templateUrl: './route-search.component.html', - styleUrls: ['./route-search.component.scss'] -}) -export class RouteSearchComponent implements OnInit { - - constructor() { } - - ngOnInit() { - } - -} diff --git a/client/src/app/shortest-path/containers/shortest-path.component.html b/client/src/app/shortest-path/containers/shortest-path.component.html deleted file mode 100644 index 58514a26c1..0000000000 --- a/client/src/app/shortest-path/containers/shortest-path.component.html +++ /dev/null @@ -1,20 +0,0 @@ -
    - -
    - -
    - - - -
    - -
    -
    -
    diff --git a/client/src/app/shortest-path/containers/shortest-path.component.scss b/client/src/app/shortest-path/containers/shortest-path.component.scss deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/client/src/app/shortest-path/containers/shortest-path.component.spec.ts b/client/src/app/shortest-path/containers/shortest-path.component.spec.ts deleted file mode 100644 index 01988d548c..0000000000 --- a/client/src/app/shortest-path/containers/shortest-path.component.spec.ts +++ /dev/null @@ -1,42 +0,0 @@ -import { ComponentFixture, TestBed } from '@angular/core/testing'; - -import { configureTestSuite } from 'ng-bullet'; -import { MockComponents } from 'ng-mocks'; - -import { RootStoreModule } from 'app/root-store'; -import { SharedModule } from 'app/shared/shared.module'; - -import { RouteBuilderComponent } from '../components/route-builder.component'; -import { RouteDisplayComponent } from '../components/route-display.component'; -import { ShortestPathComponent } from './shortest-path.component'; - -describe('ShortestPathComponent', () => { - let component: ShortestPathComponent; - let fixture: ComponentFixture; - - configureTestSuite(() => { - TestBed.configureTestingModule({ - imports: [ - RootStoreModule, - SharedModule, - ], - declarations: [ - ShortestPathComponent, - MockComponents( - RouteBuilderComponent, - RouteDisplayComponent, - ), - ] - }); - }); - - beforeEach(() => { - fixture = TestBed.createComponent(ShortestPathComponent); - component = fixture.componentInstance; - fixture.detectChanges(); - }); - - it('should create', () => { - expect(component).toBeTruthy(); - }); -}); diff --git a/client/src/app/shortest-path/containers/shortest-path.component.ts b/client/src/app/shortest-path/containers/shortest-path.component.ts deleted file mode 100644 index 9fa38e6a88..0000000000 --- a/client/src/app/shortest-path/containers/shortest-path.component.ts +++ /dev/null @@ -1,57 +0,0 @@ -import { Component, OnDestroy } from '@angular/core'; - -import { combineLatest, Subscription } from 'rxjs'; - -import { GraphData } from 'app/interfaces/vis-js.interface'; -import { BackgroundTask } from 'app/shared/rxjs/background-task'; - -import { ShortestPathService } from '../services/shortest-path.service'; - - -@Component({ - selector: 'app-shortest-path', - templateUrl: './shortest-path.component.html', - styleUrls: ['./shortest-path.component.scss'] -}) -export class ShortestPathComponent implements OnDestroy { - - loadTask: BackgroundTask<[], any>; - shortestPathLoadedSub: Subscription; - - loadedQuery: number; - displayType: string; - graphData: GraphData; - - constructor( - public shortestPathService: ShortestPathService, - ) { - this.loadTask = new BackgroundTask(() => { - return combineLatest( - this.shortestPathService.getShortestPathQueryResult(this.loadedQuery), - ); - }); - this.shortestPathLoadedSub = this.loadTask.results$.subscribe(({ - result: [shortestPathResult], - value: [], - }) => { - this.graphData = { - nodes: shortestPathResult.nodes, - edges: shortestPathResult.edges, - }; - }); - } - - ngOnDestroy() { - this.shortestPathLoadedSub.unsubscribe(); - } - - changeDisplayType(type: string) { - this.displayType = type; - } - - loadNewQuery(query: number) { - this.graphData = null; - this.loadedQuery = query; - this.loadTask.update([]); - } -} diff --git a/client/src/app/shortest-path/services/shortest-path.service.ts b/client/src/app/shortest-path/services/shortest-path.service.ts deleted file mode 100644 index 84a4ba0337..0000000000 --- a/client/src/app/shortest-path/services/shortest-path.service.ts +++ /dev/null @@ -1,39 +0,0 @@ -import { HttpClient } from '@angular/common/http'; -import { Injectable } from '@angular/core'; - -import { Observable } from 'rxjs'; -import { map } from 'rxjs/operators'; - -import { AuthenticationService } from 'app/auth/services/authentication.service'; -import { AbstractService } from 'app/shared/services/abstract-service'; - -@Injectable({ - providedIn: 'root' -}) -export class ShortestPathService extends AbstractService { - readonly kgAPI = '/api/knowledge-graph'; - - constructor(auth: AuthenticationService, http: HttpClient) { - super(auth, http); - } - - getShortestPathQueryResult(queryId: number): Observable { - return this.http.get<{result: any}>( - `${this.kgAPI}/shortest-path-query/${queryId}`, { - ...this.getHttpOptions(true), - } - ).pipe( - map((resp: any) => resp.result), - ); - } - - getShortestPathQueryList(): Observable { - return this.http.get<{result: Map}>( - `${this.kgAPI}/shortest-path-query-list`, { - ...this.getHttpOptions(true), - } - ).pipe( - map((resp: any) => resp.result), - ); - } -} diff --git a/client/src/app/shortest-path/shortest-path.module.ts b/client/src/app/shortest-path/shortest-path.module.ts deleted file mode 100644 index 13801d1634..0000000000 --- a/client/src/app/shortest-path/shortest-path.module.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { NgModule } from '@angular/core'; -import { CommonModule } from '@angular/common'; - -import { SharedModule } from 'app/shared/shared.module'; - -import { ShortestPathComponent } from './containers/shortest-path.component'; -import { RouteSearchComponent } from './containers/route-search.component'; -import { RouteBuilderComponent } from './components/route-builder.component'; -import { RouteDisplayComponent } from './components/route-display.component'; - -const components = [ - ShortestPathComponent, - RouteSearchComponent, - RouteBuilderComponent, - RouteDisplayComponent, -]; - -@NgModule({ - declarations: [...components], - imports: [ - CommonModule, - SharedModule, - ] -}) -export class ShortestPathModule { } diff --git a/client/src/environments/development.css b/client/src/environments/development.css index 8b13789179..e69de29bb2 100644 --- a/client/src/environments/development.css +++ b/client/src/environments/development.css @@ -1 +0,0 @@ - diff --git a/client/src/environments/production.css b/client/src/environments/production.css index 8b13789179..e69de29bb2 100644 --- a/client/src/environments/production.css +++ b/client/src/environments/production.css @@ -1 +0,0 @@ - diff --git a/deployment b/deployment deleted file mode 160000 index 0f1943080f..0000000000 --- a/deployment +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 0f1943080f4dd462a64535248e2f63ede809faa2 diff --git a/docker-compose.ci-test.yml b/docker-compose.ci-test.yml index dec5adbd8c..2a85d2aae5 100644 --- a/docker-compose.ci-test.yml +++ b/docker-compose.ci-test.yml @@ -2,11 +2,11 @@ version: "3.8" x-appserver: &appserver + build: + context: appserver + dockerfile: Dockerfile image: lifelike.azurecr.io/kg-appserver:${GITHUB_HASH} environment: - - NEO4J_HOST=neo4j - - NEO4J_AUTH=neo4j/password - - NEO4J_PORT=7687 - FLASK_DEBUG=1 - FLASK_ENV=development - FLASK_APP=app @@ -33,7 +33,6 @@ x-appserver: &appserver depends_on: - postgres - elasticsearch - - neo4j - redis - arangodb @@ -52,13 +51,6 @@ services: environment: - POSTGRES_PASSWORD=postgres - neo4j: - image: neo4j:4.4-community - restart: on-failure - environment: - - NEO4J_AUTH=neo4j/password - - NEO4JLABS_PLUGINS=["apoc", "n10s"] - elasticsearch: image: ghcr.io/sbrg/lifelike-elasticsearch:latest restart: on-failure diff --git a/docker-compose.override.yml b/docker-compose.override.yml index ca4ec289d8..2df05883fb 100644 --- a/docker-compose.override.yml +++ b/docker-compose.override.yml @@ -2,8 +2,6 @@ version: "3.8" x-appserver: &appserver build: ./appserver - env_file: - - azure-secrets.env environment: # Log Services - FORMAT_AS_JSON=false diff --git a/docker/Makefile b/docker/Makefile index 1ca4de303e..44cc20abe2 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -3,7 +3,7 @@ DOCKER_REGISTRY:=us.gcr.io/able-goods-221820 # Base Docker Compose command -------------------------------------------------- # docker-compose.yml -> Base services containers -# docker-compose.services.yml -> Third party services (PostgreSQL, Neo4j, Elasticsearch, Redis) +# docker-compose.services.yml -> Third party services (PostgreSQL, ArangoDB, Elasticsearch, Redis) # docker-compose.dev.yml -> Override base services for local development COMPOSE_COMMAND=docker-compose -p lifelike \ -f ${DOCKER_DIR}/docker-compose.yml \ diff --git a/docker/README.md b/docker/README.md index 0296ad16ba..daa9263be4 100644 --- a/docker/README.md +++ b/docker/README.md @@ -4,7 +4,7 @@ Docker is an easy way to get started with Lifelike. ## Prerequisites -- [Docker](https://www.docker.com/get-started) +- [Docker](https://www.docker.com/get-started) ## Run locally @@ -19,7 +19,7 @@ make up ```text Building and running containers... -This may take a while if running for the first time. +This may take a while if running for the first time. [+] Running 13/13 â ŋ Network lifelike Created @@ -50,7 +50,7 @@ The stack definition is divided into three Docker Compose files: ```tree ├── docker-compose.yml --> Base core services ├── docker-compose.dev.yml --> Overrides base core services for local development and debugging. -└── docker-compose.services.yml --> Adds third party services (PostgreSQL, Neo4j, Elasticsearch, Redis) +└── docker-compose.services.yml --> Adds third party services (PostgreSQL, ArangoDB, Elasticsearch, Redis) ``` You may combine them as you need and/or add your own `docker-compose.override.yml` to override any configuration. (this file will be ignored by Git) diff --git a/docker/diagram.svg b/docker/diagram.svg index e9b292e909..e3ddfdb2bf 100644 --- a/docker/diagram.svg +++ b/docker/diagram.svg @@ -3,7 +3,7 @@ entity appserver-->cache-invalidatorelasticsearchfrontendneo4jarangodbpdfparserpostgresredisdepends ondepends ondepends ondepends ondepends ondepends ondepends ondepends ondepends ondepends on \ No newline at end of file +--> diff --git a/docker/docker-compose.services.yml b/docker/docker-compose.services.yml index 1b18dcbfbf..ceae580e7c 100644 --- a/docker/docker-compose.services.yml +++ b/docker/docker-compose.services.yml @@ -5,7 +5,7 @@ version: "3.8" services: # PostgreSQL database postgres: - image: postgres:13 + image: postgres:11 restart: unless-stopped container_name: postgres environment: @@ -18,45 +18,52 @@ services: test: ["CMD", "pg_isready", "-U", "postgres", "-q"] start_period: 10s - ## Neo4j graph database - neo4j: - image: neo4j:4.4-community - restart: unless-stopped - container_name: neo4j + ## ArangoDB graph database + arangodb: + build: + # Note that this Dockerfile simply adds a user directory and installs the helper scripts into it, and then executes the default arango + # entrypoint as normal. + context: ../arangodb + dockerfile: Dockerfile + container_name: arangodb environment: - - NEO4J_AUTH=neo4j/password - - NEO4JLABS_PLUGINS=["apoc"] + ARANGO_ROOT_PASSWORD: password + LIFELIKE_DB_NAME: lifelike ports: - - 7687:7687 - - 7474:7474 + - 8529:8529 volumes: - - neo4j:/var/lib/neo4j/data - healthcheck: - test: ["CMD", "wget", "localhost:7474", "-O", "-"] - start_period: 1m + - ../arangodb/bin:/home/dbuser/bin # the /home/dbuser directory is setup in the Dockerfile + - ../arangodb/data:/var/lib/arangodb3 + - ../arangodb/apps:/var/lib/arangodb3-apps ## Ekasticsearch search engine elasticsearch: - image: ghcr.io/sbrg/lifelike-elasticsearch:7.16.3 - restart: unless-stopped container_name: elasticsearch + build: + dockerfile: Dockerfile + context: ../elasticsearch environment: - discovery.type=single-node + - http.max_content_length=200mb #allow 200mb of content to be sent for indexing - bootstrap.memory_lock=true - ports: - - 9200 + - xpack.graph.enabled=false + - xpack.watcher.enabled=false + - xpack.license.self_generated.type=basic volumes: - - elasticsearch:/usr/share/elasticsearch/data + - ../elasticsearch:/usr/share/elasticsearch/data ulimits: memlock: soft: -1 hard: -1 + ports: + - "9200:9200" + - "9300:9300" healthcheck: test: ["CMD", "curl", "-f", "localhost:9200/_cluster/health"] start_period: 1m redis: - image: redis:alpine + image: redis:6-alpine restart: unless-stopped container_name: redis ports: @@ -68,34 +75,43 @@ services: environment: - POSTGRES_HOST=postgres - POSTGRES_PASSWORD=postgres - - NEO4J_HOST=neo4j - - NEO4J_AUTH=neo4j/password + # Arango + - ARANGO_HOST=http://arangodb:8529 + - ARANGO_USERNAME=root + - ARANGO_PASSWORD=password + - ARANGO_DB_NAME=lifelike - ELASTICSEARCH_URL=http://elasticsearch:9200 - REDIS_HOST=redis depends_on: - postgres - - neo4j + - arangodb - elasticsearch statistical-enrichment: environment: - - NEO4J_HOST=neo4j - - NEO4J_AUTH=neo4j/password + # Arango + - ARANGO_HOST=http://arangodb:8529 + - ARANGO_USERNAME=root + - ARANGO_PASSWORD=password + - ARANGO_DB_NAME=lifelike - REDIS_HOST=redis depends_on: - - neo4j + - arangodb - redis cache-invalidator: environment: - - NEO4J_HOST=neo4j - - NEO4J_AUTH=neo4j/password + # Arango + - ARANGO_HOST=http://arangodb:8529 + - ARANGO_USERNAME=root + - ARANGO_PASSWORD=password + - ARANGO_DB_NAME=lifelike - REDIS_HOST=redis depends_on: - - neo4j + - arangodb - redis volumes: postgres: - neo4j: + arangodb: elasticsearch: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 556013ab1b..48dd0de11d 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -3,7 +3,9 @@ version: "3.8" services: ## Frontend web server frontend: - image: ghcr.io/sbrg/lifelike-frontend:${FRONTEND_IMAGE_TAG:-latest} + build: + context: ../client + dockerfile: Dockerfile restart: unless-stopped container_name: frontend environment: @@ -15,7 +17,9 @@ services: ## Backend application server appserver: - image: ghcr.io/sbrg/lifelike-appserver:${APPSERVER_IMAGE_TAG:-latest} + build: + context: ../appserver + dockerfile: Dockerfile restart: unless-stopped container_name: appserver ports: @@ -43,7 +47,9 @@ services: ## Statistical enrichment service statistical-enrichment: - image: ghcr.io/sbrg/lifelike-statistical-enrichment:${STATISTICAL_ENRICHMENT_IMAGE_TAG:-latest} + build: + context: ../statistical-enrichment + dockerfile: Dockerfile restart: unless-stopped container_name: statistical-enrichment ports: @@ -51,7 +57,9 @@ services: # Cache invalidator service cache-invalidator: - image: ghcr.io/sbrg/lifelike-cache-invalidator:${CACHE_INVALIDATOR_IMAGE_TAG:-latest} + build: + context: ../cache-invalidator + dockerfile: Dockerfile restart: unless-stopped container_name: cache-invalidator diff --git a/docs/wiki/Architecture overview.drawio.png b/docs/wiki/Architecture overview.drawio.png new file mode 100644 index 0000000000..21b24aa01b Binary files /dev/null and b/docs/wiki/Architecture overview.drawio.png differ diff --git a/docs/wiki/README.md b/docs/wiki/README.md new file mode 100644 index 0000000000..70fb17a96c --- /dev/null +++ b/docs/wiki/README.md @@ -0,0 +1,159 @@ +# Architecture overview + +![Architecture overview diagram](Architecture overview.drawio.png) +NOTE: The diagram is created with [draw.io](https://draw.io), this is editable version (copy of diagram has been embedded into the file). + +# Service functional groups + +## Authentication + +Authentication related code is deposited in Front-end and appserver. Additionally, in publish enviroment keycloak is used as OAuth2 provider. + +## Annotation + +Annotation steps rely on Front-end, appserver, pdfparser, redis, postgress and graph database. +Only two types of files are annotateble: PDF and enrichment table. + +In principle annotation pipeline works as follows for PDF files: + +1. Front-end sends request to appserver to annotate file. +2. Appserver sends request to pdfparser to extract words and their locations. +3. Load core annotation terms into LMDB +4. Load annotation inclusions from graph database +5. Load annotation exclusions from postgress +6. Search for match between words and annotation terms +7. Matches are structured into json annotation structure +8. JSON annotation structure save to postgress is scheduled to redis queque + 1. Worker process reads annotation structure from redis queque and saves it to postgress +9. Appserver returns response (to request from pt. 1) with annotation structure +10. Front-end renders annotations + +In case of enrichment table, the process is similar: + +1. Front-end sends request to appserver to annotate enrichment table. +2. Enrichment table is stringified to text +3. Appserver sends request to pdfparser to extract words from text and their locations. +4. Load core annotation terms into LMDB +5. Load annotation inclusions from graph database +6. Load annotation exclusions from postgress +7. Search for match between words and annotation terms +8. Matches are structured into json annotation structure +9. JSON annotation structure save to postgress is scheduled to redis queque + 1. Worker process reads annotation structure from redis queque and saves it to postgress +10. annotation structure is translated into json table structure containing xml snippets for annotations +11. Appserver returns response (to request from pt. 1) with json table structure containing xml snippets for annotations +12. Front-end overwrites enrichment table with xml snippets for annotations +13. Front-end renders annotations +14. Front-end sends request to appserver to save enrichment table +15. Appserver schedules saving enrichment table to postgress + 15.1. Worker process reads enrichment table from redis queque and saves it to postgress + +## Search/Indexing + +Search and indexing steps rely on Front-end, appserver, postgress and elasticsearch. + +In appserver code upon each modification to file: + +1. Redis queque task is scheduled to reindex file + 1. Worker process reads file content from redis queque + 2. File content is parsed into text + 3. Text is send to elasticsearch for indexing + +In case of search request is simply send to elasticsearch and results are returned to front-end. + +## Enrichment (table) + +Enrichment table relies on Front-end, appserver, postgress and graph database. + +1. Front-end sends request to appserver to enrich gene list. +2. Appserver sends request to graph database to get related gene information. +3. Appserver returns response (to request from pt. 1) with gene information. +4. Front-end composes request from step 1 and gene information into enrichment file. +5. Front-end sends request to appserver to save enrichment file. +6. Front-end send annotation request. + +## Statistical enrichment + +Statistical enrichment relies on Front-end, appserver, redis, graph database and statistical enrichment container. + +1. Front-end sends request to appserver to perform statistical enrichment. +2. Appserver forwards request to statistical enrichment container to perform statistical enrichment. +3. Statistical enrichment checks if any part of input data or whole request is in redis cache. +4. In case of cache hit, statistical enrichment returns results from cache. + Alternatively, we query missing input data from graph database and perform statistical enrichment (caching both intermidiate steps and final results). +5. Statistical enrichment returns results to appserver. +6. Appserver returns results to front-end. +7. Front-end renders results. + +# Services in detail + +## Front-end + +[Source code](https://github.com/SBRG/kg-prototypes/tree/master/client) +[Source code welcome page](https://github.com/SBRG/lifelike-website) + +Frontend (Typescript, Angular, Bootstrap) + +## Appserver + +[Source code](https://github.com/SBRG/kg-prototypes/tree/master/appserver) +Only endpoint for frontend / core logic + +runs postgres migrations on start + +Converts text entities to annotations + +Parse file contents for elastic search + updates elasticsearch indexes + +## PDF parser + +[Source code](https://github.com/SBRG/pdfparse) +[Source code pdfbox2](https://github.com/SBRG/pdfbox2) + +Extract text blocks from pdf/text + +## Elasticsearch + +[Source code](https://github.com/SBRG/kg-prototypes/tree/master/elasticsearch) + +## Graph database + +[Source code neo4j](https://github.com/SBRG/kg-prototypes/tree/master/neo4j) +[Source code arango](https://github.com/SBRG/kg-prototypes/tree/master/arango) + +Graphdatabase for knowledge and annotation exclusions (inclusions are in Postgres) + +## Keycloak + +[Source code](https://github.com/SBRG/lifelike-keycloak) + +## Cache-invalidator + +[Source code](https://github.com/SBRG/kg-prototypes/tree/master/cache-invalidator) +Cron like service recalculating common and expensive cache values in timely manner. + +## Redis + +Used for cache, task queue. + +## Postgress + +Main db containing users, files, annotations etc. + +## Elasticsearch + +Hold logs, metrics, indexed file contents + +## Logstash + +[Source code](https://github.com/SBRG/kg-prototypes/tree/master/logstash) + +## Metricbeat + +[Source code](https://github.com/SBRG/kg-prototypes/tree/master/metricbeat) +save docker metrics to elastic + +## Filebeat + +[Source code](https://github.com/SBRG/kg-prototypes/tree/master/filebeat) +save docker logs to elastic diff --git a/docs/wiki/binderhub/cluster-issuer-prod.yaml b/docs/wiki/binderhub/cluster-issuer-prod.yaml index ee2a1306ac..a902e81e9e 100644 --- a/docs/wiki/binderhub/cluster-issuer-prod.yaml +++ b/docs/wiki/binderhub/cluster-issuer-prod.yaml @@ -7,7 +7,7 @@ spec: # The ACME server URL server: https://acme-v02.api.letsencrypt.org/directory # Email address used for ACME registration - email: e4sanchez@eng.ucsd.edu + email: # Name of a secret used to store the ACME account private key privateKeySecretRef: name: letsencrypt-prod diff --git a/docs/wiki/binderhub/cluster-issuer-stg.yaml b/docs/wiki/binderhub/cluster-issuer-stg.yaml index b17ef68316..9f7fbac7c1 100644 --- a/docs/wiki/binderhub/cluster-issuer-stg.yaml +++ b/docs/wiki/binderhub/cluster-issuer-stg.yaml @@ -7,7 +7,7 @@ spec: # The ACME server URL server: https://acme-staging-v02.api.letsencrypt.org/directory # Email address used for ACME registration - email: e4sanchez@eng.ucsd.edu + email: # Name of a secret used to store the ACME account private key privateKeySecretRef: name: letsencrypt-staging diff --git a/docs/wiki/binderhub/config-prod.yaml b/docs/wiki/binderhub/config-prod.yaml index 600efb7c29..c52802059f 100644 --- a/docs/wiki/binderhub/config-prod.yaml +++ b/docs/wiki/binderhub/config-prod.yaml @@ -1,90 +1,136 @@ config: BinderHub: auth_enabled: true - hub_url: https://jupyter-demo.lifelike.bio + cors_allow_origin: "*" + hub_url: https://jupyter.lifelike.bio + image_prefix: lifelikebinderhub.azurecr.io/binderhub/notebooks- use_registry: true - image_prefix: gcr.io/able-goods-221820/binderhub-demo GitHubRepoProvider: access_token: # Get this value from our existing Binderhub release (you can find it in the helm chart) banned_specs: - ^(?!SBRG/|\.).* -service: - type: ClusterIP - -jupyterhub: - proxy: - service: - type: ClusterIP - https: - enabled: true - type: letsencrypt ingress: - enabled: true - hosts: - - jupyter-demo.lifelike.bio annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod kubernetes.io/ingress.class: nginx kubernetes.io/tls-acme: "true" - cert-manager.io/cluster-issuer: letsencrypt-prod - tls: - - secretName: jupyter-demo-lifelike-bio-tls - hosts: - - jupyter-demo.lifelike.bio + enabled: true + hosts: + - binder.lifelike.bio + https: + enabled: true + type: letsencrypt + pathType: Prefix + tls: + - hosts: + - binder.lifelike.bio + secretName: binder-lifelike-bio-tls + +imageCleaner: + # Turning this off as it does not seem to work on AKS with this version of DinD + enabled: false + +jupyterhub: cull: - # Since we're using authenticated users, don't cull them. We would set this to true if we were using temp users. + concurrency: 10 + enabled: true + every: 600 + maxAge: 86400 + removeNamedServers: false + timeout: 1200 users: false - timeout: 1800 - removeNamedServers: true hub: allowNamedServers: true - namedServerLimitPerUser: 5 - shutdownOnLogout: true - redirectToServer: false config: BinderSpawner: auth_enabled: true - JupyterHub: - authenticator_class: google - Authenticator: - allowed_users: - - e4sanchez@@eng.ucsd.edu # Ethan - # You can add more users here + cors_allow_origin: "*" GoogleOAuthenticator: + admin_users: + - e4sanchez@ucsd.edu + allow_existing_users: true client_id: # Get this value from the Google Cloud Console client_secret: # Get this value from the Google Cloud Console - oauth_callback_url: https://jupyter-demo.lifelike.bio/hub/oauth_callback + hosted_domain: + - ucsd.edu + - biosustain.dtu.dk login_service: Google + oauth_callback_url: https://jupyter.lifelike.bio/hub/oauth_callback + JupyterHub: + admin_access: true + authenticator_class: google + consecutiveFailureLimit: 5 + namedServerLimitPerUser: 5 services: binder: + admin: true + apiToken: null oauth_client_id: service-binderhub oauth_no_confirm: true - oauth_redirect_uri: "https://binder-demo.lifelike.bio/oauth_callback" - loadRoles: - user: - scopes: - - self - - "access:services" - singleuser: - # to make notebook servers aware of hub - cmd: jupyterhub-singleuser - + oauth_redirect_uri: https://binder.lifelike.bio/oauth_callback ingress: - enabled: true - https: - enabled: true - type: letsencrypt - hosts: - - binder-demo.lifelike.bio annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod kubernetes.io/ingress.class: nginx kubernetes.io/tls-acme: "true" - cert-manager.io/cluster-issuer: letsencrypt-prod + enabled: true + hosts: + - jupyter.lifelike.bio + pathType: Prefix tls: - - secretName: binder-demo-lifelike-bio-tls - hosts: - - binder-demo.lifelike.bio + - hosts: + - jupyter.lifelike.bio + secretName: jupyter-lifelike-bio-tls + proxy: + https: + enabled: true + type: letsencrypt + rbac: + enabled: true + scheduling: + userScheduler: + # Turning this off per: https://discourse.jupyter.org/t/singleuser-pods-stuck-in-pending/6349/11 + enabled: false + singleuser: + cmd: jupyterhub-singleuser + cpu: + guarantee: 0.1 + limit: 1 + defaultUrl: /lab + extraEnv: + # Set any required environment variables here, for example: + ARANGO_PASSWORD: you-real-password + image: + # Very important this is present, the singleuser pods need credentials to pull the Jupyterhub image + pullSecrets: [bindercred] + memory: + guarantee: 256M + limit: 40G + startTimeout: 300 + storage: + capacity: 10Gi + dynamic: + pvcNameTemplate: "claim-{username}{servername}" + storageAccessModes: + - ReadWriteOnce + storageClass: standard + volumeNameTemplate: "volume-{username}{servername}" + extraLabels: {} + extraVolumeMounts: [] + extraVolumes: [] + homeMountPath: /home/jovyan + static: + pvcName: null + subPath: "{username}" + type: none + uid: 1000 + +# Docker is not a supported runtime on AKS, so we must use DinD +dind: + enabled: true registry: - password: # You can find this defined in the existing deployment - url: https://gcr.io + url: # Get this from the desired Azure Container Registry + username: # Get this from the desired Azure Container Registry + password: # Get this from the desired Azure Container Registry diff --git a/docs/wiki/binderhub/config-stg.yaml b/docs/wiki/binderhub/config-stg.yaml index ade6611806..8ede42a160 100644 --- a/docs/wiki/binderhub/config-stg.yaml +++ b/docs/wiki/binderhub/config-stg.yaml @@ -1,90 +1,136 @@ config: BinderHub: auth_enabled: true - hub_url: https://jupyter-demo.lifelike.bio + cors_allow_origin: "*" + hub_url: https://jupyter.lifelike.bio + image_prefix: lifelikebinderhub.azurecr.io/binderhub/notebooks- use_registry: true - image_prefix: gcr.io/able-goods-221820/binderhub-demo GitHubRepoProvider: access_token: # Get this value from our existing Binderhub release (you can find it in the helm chart) banned_specs: - ^(?!SBRG/|\.).* -service: - type: ClusterIP - -jupyterhub: - proxy: - service: - type: ClusterIP - https: - enabled: true - type: letsencrypt ingress: - enabled: true - hosts: - - jupyter-demo.lifelike.bio annotations: + cert-manager.io/cluster-issuer: letsencrypt-stg kubernetes.io/ingress.class: nginx kubernetes.io/tls-acme: "true" - cert-manager.io/cluster-issuer: letsencrypt-staging - tls: - - secretName: jupyter-demo-lifelike-bio-tls - hosts: - - jupyter-demo.lifelike.bio + enabled: true + hosts: + - binder.lifelike.bio + https: + enabled: true + type: letsencrypt + pathType: Prefix + tls: + - hosts: + - binder.lifelike.bio + secretName: binder-lifelike-bio-tls + +imageCleaner: + # Turning this off as it does not seem to work on AKS with this version of DinD + enabled: false + +jupyterhub: cull: - # Since we're using authenticated users, don't cull them. We would set this to true if we were using temp users. + concurrency: 10 + enabled: true + every: 600 + maxAge: 86400 + removeNamedServers: false + timeout: 1200 users: false - timeout: 1800 - removeNamedServers: true hub: allowNamedServers: true - namedServerLimitPerUser: 5 - shutdownOnLogout: true - redirectToServer: false config: BinderSpawner: auth_enabled: true - JupyterHub: - authenticator_class: google - Authenticator: - allowed_users: - - e4sanchez@@eng.ucsd.edu # Ethan - # You can add more users here + cors_allow_origin: "*" GoogleOAuthenticator: + admin_users: + - e4sanchez@ucsd.edu + allow_existing_users: true client_id: # Get this value from the Google Cloud Console client_secret: # Get this value from the Google Cloud Console - oauth_callback_url: https://jupyter-demo.lifelike.bio/hub/oauth_callback + hosted_domain: + - ucsd.edu + - biosustain.dtu.dk login_service: Google + oauth_callback_url: https://jupyter.lifelike.bio/hub/oauth_callback + JupyterHub: + admin_access: true + authenticator_class: google + consecutiveFailureLimit: 5 + namedServerLimitPerUser: 5 services: binder: + admin: true + apiToken: null oauth_client_id: service-binderhub oauth_no_confirm: true - oauth_redirect_uri: "https://binder-demo.lifelike.bio/oauth_callback" - loadRoles: - user: - scopes: - - self - - "access:services" - singleuser: - # to make notebook servers aware of hub - cmd: jupyterhub-singleuser - + oauth_redirect_uri: https://binder.lifelike.bio/oauth_callback ingress: - enabled: true - https: - enabled: true - type: letsencrypt - hosts: - - binder-demo.lifelike.bio annotations: + cert-manager.io/cluster-issuer: letsencrypt-stg kubernetes.io/ingress.class: nginx kubernetes.io/tls-acme: "true" - cert-manager.io/cluster-issuer: letsencrypt-staging + enabled: true + hosts: + - jupyter.lifelike.bio + pathType: Prefix tls: - - secretName: binder-demo-lifelike-bio-tls - hosts: - - binder-demo.lifelike.bio + - hosts: + - jupyter.lifelike.bio + secretName: jupyter-lifelike-bio-tls + proxy: + https: + enabled: true + type: letsencrypt + rbac: + enabled: true + scheduling: + userScheduler: + # Turning this off per: https://discourse.jupyter.org/t/singleuser-pods-stuck-in-pending/6349/11 + enabled: false + singleuser: + cmd: jupyterhub-singleuser + cpu: + guarantee: 0.1 + limit: 1 + defaultUrl: /lab + extraEnv: + # Set any required environment variables here, for example: + ARANGO_PASSWORD: you-real-password + image: + # Very important this is present, the singleuser pods need credentials to pull the Jupyterhub image + pullSecrets: [bindercred] + memory: + guarantee: 256M + limit: 40G + startTimeout: 300 + storage: + capacity: 10Gi + dynamic: + pvcNameTemplate: "claim-{username}{servername}" + storageAccessModes: + - ReadWriteOnce + storageClass: standard + volumeNameTemplate: "volume-{username}{servername}" + extraLabels: {} + extraVolumeMounts: [] + extraVolumes: [] + homeMountPath: /home/jovyan + static: + pvcName: null + subPath: "{username}" + type: none + uid: 1000 + +# Docker is not a supported runtime on AKS, so we must use DinD +dind: + enabled: true registry: - password: # You can find this defined in the existing deployment - url: https://gcr.io + url: # Get this from the desired Azure Container Registry + username: # Get this from the desired Azure Container Registry + password: # Get this from the desired Azure Container Registry diff --git a/docs/wiki/binderhub/ingress-nginx.yaml b/docs/wiki/binderhub/ingress-nginx.yaml index c5fcd5d3b2..367c4d88c5 100644 --- a/docs/wiki/binderhub/ingress-nginx.yaml +++ b/docs/wiki/binderhub/ingress-nginx.yaml @@ -2,6 +2,13 @@ ## Ref: https://github.com/kubernetes/ingress-nginx/blob/main/charts/ingress-nginx/values.yaml controller: + # Set any nginx configurations here + config: { "proxy-body-size": "15m", "proxy-read-timeout": "15m", "proxy-send-timeout": "15m" } + service: + annotations: + # It is critical that this annotation is present, otherwise the ingress pods may fail to start! + service.beta.kubernetes.io/azure-load-balancer-health-probe-request-path: "/healthz" + # -- Used by cloud providers to connect the resulting `LoadBalancer` to a pre-existing static IP according to https://kubernetes.io/docs/concepts/services-networking/service/#loadbalancer - loadBalancerIP: + loadBalancerIP: diff --git a/docs/wiki/binderhub/setup-binderhub.md b/docs/wiki/binderhub/setup-binderhub.md index 36fa9e178c..98ccc6735c 100644 --- a/docs/wiki/binderhub/setup-binderhub.md +++ b/docs/wiki/binderhub/setup-binderhub.md @@ -12,28 +12,32 @@ - [Install cert-manager](#install-cert-manager) - [Create a Temporary Certificate Issuer](#create-a-temporary-certificate-issuer) - [Add and Install Ingress NGINX](#add-and-install-ingress-nginx) - - [Optional: Add a Config Map for the NGINX Proxy](#optional-add-a-config-map-for-the-nginx-proxy) +- [Create Container Registry Login Secret](#create-container-registry-login-secret) - [Install Binderhub](#install-binderhub) - [Verify SSL Certs are Created](#verify-ssl-certs-are-created) - [Additional Configurations](#additional-configurations) ## Introduction -This guide will walk you through the process of creating a brand-new Binderhub cluster on Google Cloud. The example configuration files can also be used for other cloud services. +This guide will walk you through the process of creating a brand new Binderhub cluster on Microsoft Azure. -If you do not have a Google Cloud account or project, you can create one by following the instructions [here](https://cloud.google.com/resource-manager/docs/creating-managing-projects). +If you do not have a Azure account or project, you can create one [here](https://azure.microsoft.com/en-us/free). -The guide also primarily uses the Google Cloud Console Terminal, so you do not need to worry about navigating around the GUI to the various resources we create. +This guide primarily uses the web browser Azure Portal to create and manage resources, but you may also use the Azure CLI instead. -If you want to install the Google Cloud CLI on your machine, consult this [guide](https://cloud.google.com/sdk/docs/install). +If you want to install the Azure CLI on your machine, consult this [guide](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli). + +You will also need to install the Kubernetes CLI in order to create some resources on the cluster. You can find instructions on how to install the CLI [here](https://kubernetes.io/docs/tasks/tools/) Finally, we will be using [Helm](https://helm.sh/) to create several Kubernetes resources. Please install Helm locally if you wish to follow this guide on a local terminal. +You may also consider installing [Lens](https://k8slens.dev/), an IDE specifically for connecting to Kubernetes clusters and viewing/managing their resources. This is not required to follow this guide, but it can be an invaluable tool for debugging any issues that may arise. + ## Caveats These intructions assume you will be using the configuration files provided alongside the guide. This will enable a few additional features not included in a vanilla BinderHub deployment. -Most notably, user authentication will be turned on, with Google as the OAuth provider. If you do not require any authentication (highly discouraged), you can use the config-with-no-auth.yaml file instead of the recommended config-stg.yaml and config-prod.yaml. +Most notably, user authentication will be turned on, with Google as the OAuth provider. Also, do note the `GithubRepoProvider` property under the top-level `config` mapping. This is currently configured to ONLY ALLOW Github repositories from within the SBRG organization to be used with the BinderHub deployment. @@ -41,24 +45,17 @@ Finally, recognize that some of the values in the BinderHub configuration may ne ## Create the Cluster -First, create the cluster we will install Binderhub on to: +First, we need to create the cluster we will install Binderhub on to. Please follow the guide [here](https://learn.microsoft.com/en-us/azure/aks/learn/quick-kubernetes-deploy-portal?tabs=azure-cli) for general instructions on how to do so. -```bash -gcloud container clusters create \ - --machine-type n1-standard-2 \ - --num-nodes 2 \ - --zone us-central1 \ - --cluster-version latest \ - -``` +**_However_**, please use the following settings for your cluster, instead of the defaults: -Next, set the admin role binding for your Google Cloud account. This will ensure you're able to make changes to the cluster we've just created: - -```bash -kubectl create clusterrolebinding cluster-admin-binding \ - --clusterrole=cluster-admin \ - --user= -``` +- **Kubernetes version**: 1.26.10 + - This is a confirmed working version of AKS with the version of Binderhub we will be using. +- **Authentication and Authorization**: Local accounts with Kubernetes RBAC +- **Network Policy**: None + - Any other setting may cause issues with the Nginx load balancer. Use another setting at your own risk! +- **Network type (plugin)**: Kubenet + - This will likely be the default, but ensure that it is set to the correct value. Other values have not been validated with the existing Binderhub configuration! ### Create a Namespace for the Binderhub Resources @@ -76,17 +73,15 @@ Before we install Binderhub, we will first install a load balancer to handle req ### Reserve a Static IP Address -The load balancer will need a static IP so we can be certain the address won't change. You can reserve a static IP on Google Cloud with the following command: +The load balancer will need a static IP so we can be certain the address won't change. You can reserve a static IP on Azure by following [these](https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/create-public-ip-portal?tabs=option-1-create-public-ip-standard) instructions. -```bash -gcloud compute addresses create --region us-central1 -``` - -You can name the IP address whatever you want, but use something descriptive like "lifelike-binderhub-proxy". If you'd like a more in-depth explanation of how to reserve a static IP, please follow the [official guide](https://cloud.google.com/compute/docs/ip-addresses/reserve-static-external-ip-address). +You can name the IP address whatever you want, but use something descriptive like "lifelike-binderhub-proxy". ### Configure DNS with the Static IP -Registering a domain name is beyond the scope of this guide, but you will need a domain to use for an authenticated Binderhub server. If you are using Google Cloud, DNS zones can be configured under: Networking > Network Services > Cloud DNS. +Registering a domain name is beyond the scope of this guide, but you will need a domain to use for an authenticated Binderhub server. + +Note that for the Lifelike project we are currently using Google Cloud for our DNS nameservers. DNS zones in Google Cloud can be configured under: Networking > Network Services > Cloud DNS. If you are using a different cloud provider, DNS management will likely be found under a networking resource. Very minimally, you will need to create two new zone standards (most likely A records) for both the BinderHub server and the Jupyterhub server. For example, for the Lifelike project we have the "lifelike.bio" DNS zone, with A records for "binder.lifelike.bio" and "jupyter.lifelike.bio" pointing at the IP address "35.188.33.138". This means that the domains "binder.lifelike.bio" and "jupyter.lifelike.bio" refers to the IP address "35.188.33.138", which itself identifies the load balancer server. @@ -136,7 +131,7 @@ helm install ingress-nginx/ingress-nginx --namesp This will take a few moments. Note that you can name your ingress-nginx installation anything you want, but consider a descriptive name like "binderhub-ingress-nginx". Also, be sure to apply a custom configuration with the `-f` flag as in the example above. Minimally, your config file should include a definition for the load balancer IP. -See the "ingress-nginx.yaml" example file included in the same directory as this guide. +See the "ingress-nginx.yaml" example file included in the same directory as this guide. Please pay careful attention to the comments in this file! Omission of any properties within the config may lead to undesired behavior from the ingress. To check on the status of the ingress-nginx controller, you can run the following command: @@ -148,24 +143,26 @@ You should see something like the following: ```bash NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -binderhub-ingress-nginx-controller LoadBalancer 10.4.7.240 35.188.33.138 80:32402/TCP,443:32177/TCP 9d +binderhub-ingress-nginx-controller LoadBalancer 10.4.7.240 80:32402/TCP,443:32177/TCP 9d ``` If the `EXTERNAL-IP` column is empty, give the load balancer a few more moments to initialize. If it remains empty, you may have forgotten to specify a load balancer IP, or the IP may be unavailable. It should be the IP adress we reserved earlier. -### Optional: Add a Config Map for the NGINX Proxy +## Create Container Registry Login Secret + +There is one more step before we install Binderhub. We have to create a Kubernetes secret resource for our cluster pods to pull/push container images from our container registry. Note that this is not normally required for Binderhub deployments, but seems to be a side effect of running Binderhub using Docker-in-Docker, which is required on Azure Kubernetes. -It's likely that you will want to specify some non-default configurations for the nginx proxy. To do this, you simply need to add a [configmap](https://kubernetes.io/docs/concepts/configuration/configmap/) resource: +To create the secret, run the following command: ```bash -kubectl apply -f your-nginx-configmap.yaml +kubectl create secret docker-registry --docker-server= --docker-username= --docker-password= -n binderhub ``` -An example has been included in the same directory as this guide. +Note the credentials we provide to the command. These can be found in the configuration for your container registry. Also note that in our example config files for Binderhub `secret-name` is expected to be "bindercred". ## Install Binderhub -Now that we have installed the NGINX controller, we can install the binderhub helm chart. First, let's make sure we have access to it. Add the helm repo if you don't have it already: +Now that we have installed the NGINX controller and created a secret credenential for our container registry, we can install the binderhub helm chart. First, let's make sure we have access to it. Add the helm repo if you don't have it already: ```bash helm repo add jupyterhub https://jupyterhub.github.io/helm-chart @@ -180,12 +177,14 @@ helm repo update Finally, let's install Binderhub. Note that we specify a few special flags in the install command: `--version` and `-f`. ```bash -helm install jupyterhub/binderhub --version=1.0.0-0.dev.git.3128.h52ffd88 --namespace=binderhub -f config-stg.yaml +helm install jupyterhub/binderhub --version=0.2.0-n886.h4169712 --namespace=binderhub -f config-stg.yaml ``` It may take a few moments for this command to complete. -You can find a list of Binderhub releases [here](https://hub.jupyter.org/helm-chart/#development-releases-binderhub). Simply copy the version you want to install, e.g. "1.0.0-0.dev.git.3128.h52ffd88", which is the version we use in the example command. +You can find a list of Binderhub releases [here](https://hub.jupyter.org/helm-chart/#development-releases-binderhub). Simply copy the version you want to install, e.g. "0.2.0-n886.h4169712", which is the version we use in the example command. + +It is **crucial** that you use this version of Binderhub! Any other version is untested and may not work with the current configuration files! `-f` lets us specify configuration values via a yaml file. In the example, we use a file named "config-stg.yaml". See the example file of the same name in the same folder as this guide. @@ -214,7 +213,7 @@ Notice the file "cluster-issuer-prod.yaml". See the file of the same name in the Then, upgrade our BinderHub deployment to use this new cluster issuer: ```bash -helm upgrade lifelike-binderhub jupyterhub/binderhub --version=1.0.0-0.dev.git.3128.h52ffd88 --namespace=lifelike-binderhub -f config-prod.yaml +helm upgrade lifelike-binderhub jupyterhub/binderhub --version=0.2.0-n886.h4169712 --namespace=lifelike-binderhub -f config-prod.yaml ``` Congratulations! Your BinderHub deployment is complete! Verify the production certificates are indeed working by returning to your BinderHub in a web browser. Also, try creating a notebook with your favorite Github repository. If you are eventually redirected to the JupyterHub page, you've successfully deployed BinderHub! diff --git a/docs/wiki/development/general-introduction.md b/docs/wiki/development/general-introduction.md index 919c4609ec..d64d279805 100644 --- a/docs/wiki/development/general-introduction.md +++ b/docs/wiki/development/general-introduction.md @@ -101,7 +101,7 @@ For `$container_name`, you have a choice of: docker compose exec pgdatabase psql -U postgres -h pgdatabase -d postgres ``` -### Neo4j +### ArangoDB ```sh docker compose exec database cypher-shell -u neo4j diff --git a/docs/wiki/elk/introduction.md b/docs/wiki/elk/introduction.md index 05f64707f0..901a30c5c2 100644 --- a/docs/wiki/elk/introduction.md +++ b/docs/wiki/elk/introduction.md @@ -7,8 +7,6 @@ - To seed LMDB data to Kibana: ```bash -./deployment/kibana.sh -a None -# or docker compose exec appserver python neo4japp/services/annotations/index_annotations.py -a ``` diff --git a/docs/wiki/keycloak-deployment/config.yml b/docs/wiki/keycloak-deployment/config.yml index ddf4811786..c35296e5c6 100644 --- a/docs/wiki/keycloak-deployment/config.yml +++ b/docs/wiki/keycloak-deployment/config.yml @@ -1,7 +1,7 @@ affinity: {} args: [] auth: - adminPassword: ***REMOVED*** + adminPassword: "***REMOVED***" adminUser: admin existingSecret: "" passwordSecretKey: "" diff --git a/filebeat/Dockerfile b/filebeat/Dockerfile index 620fa44ab1..57f0b14800 100644 --- a/filebeat/Dockerfile +++ b/filebeat/Dockerfile @@ -1,8 +1,7 @@ FROM docker.elastic.co/beats/filebeat:7.11.1 LABEL app=kg-prototypes -ARG CONFIG_FILE=filebeat.yml -COPY config/${CONFIG_FILE} /usr/share/filebeat/filebeat.yml +COPY config/filebeat.yml /usr/share/filebeat/filebeat.yml USER root RUN chown root:filebeat /usr/share/filebeat/filebeat.yml USER filebeat diff --git a/filebeat/config/filebeat.yml b/filebeat/config/filebeat.yml index 8ed9b8330d..6f79273a13 100644 --- a/filebeat/config/filebeat.yml +++ b/filebeat/config/filebeat.yml @@ -4,7 +4,7 @@ filebeat.autodiscover: templates: - condition: contains: - docker.container.image: kg-webserver + docker.container.image: ${WEBSERVER_IMAGE:kg-webserver} config: - type: docker containers.ids: @@ -23,7 +23,7 @@ filebeat.autodiscover: templates: - condition: contains: - docker.container.image: kg-appserver + docker.container.image: ${APPSERVER_IMAGE:kg-appserver} config: - type: container paths: @@ -37,4 +37,5 @@ processors: host: "unix:///var/run/docker.sock" output.logstash: - hosts: ["logstash:5044"] + hosts: + - ${LOGSTASH_OUTPUT_HOST:logstash:5044} diff --git a/graph-db/.gitignore b/graph-db/.gitignore index 11b80917c1..6d500b20d9 100644 --- a/graph-db/.gitignore +++ b/graph-db/.gitignore @@ -1,2 +1,142 @@ docker-compose.override.yml tmp/ +.DS_Store + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# PyCharm +.idea/ + +.vscode/settings.json +.vscode/launch.json + +# shaded POM file +dependency-reduced-pom.xml diff --git a/graph-db/README.md b/graph-db/README.md index e69de29bb2..46cc693008 100644 --- a/graph-db/README.md +++ b/graph-db/README.md @@ -0,0 +1,5 @@ +## graph-db + +`extraction`: Python code that extracts data from various sources; e.g KEGG, BioCyc, etc + +`migration`: Liquibase migration for the graph-db diff --git a/graph-db/docker-compose.yml b/graph-db/docker-compose.yml index 04adee3783..ca11a7fb66 100644 --- a/graph-db/docker-compose.yml +++ b/graph-db/docker-compose.yml @@ -1,25 +1,27 @@ version: "3.8" +# Hasn't been migrated to Arango - not working + services: - neo4j: - image: neo4j:4.4-community + arangodb: + image: arangodb:4.4-community restart: on-failure environment: - - NEO4J_AUTH=neo4j/password - - NEO4JLABS_PLUGINS=["apoc"] - - NEO4J_dbms.unmanaged_extension_classes=n10s.endpoint=/rdf # for the neosemantics-4.3.0.0.jar + - ARANGODB_AUTH=arangodb/password + - ARANGODBLABS_PLUGINS=["apoc"] + - ARANGODB_dbms.unmanaged_extension_classes=n10s.endpoint=/rdf # for the neosemantics-4.3.0.0.jar ports: - 7687:7687 - 7474:7474 volumes: - - neo4j:/var/lib/neo4j/data + - arangodb:/var/lib/arangodb/data migrator: build: context: migrator environment: - - NEO4J_HOST=neo4j - - NEO4J_PASSWORD=password + - ARANGODB_HOST=arangodb + - ARANGODB_PASSWORD=password - CHANGELOG_FILE=lifelike-graph/changelog-master.xml - DATAFILES_PREFIX=stage - LOG_LEVEL=debug @@ -29,9 +31,9 @@ services: - ./changelog:/liquibase/changelog - ./tmp:/tmp depends_on: - - neo4j + - arangodb links: - - neo4j + - arangodb volumes: - neo4j: {} + arangodb: {} diff --git a/graph-db/docs/reactome/Reactome GDS.ipynb b/graph-db/docs/reactome/Reactome GDS.ipynb index 71a35cdb79..e0ced77a15 100644 --- a/graph-db/docs/reactome/Reactome GDS.ipynb +++ b/graph-db/docs/reactome/Reactome GDS.ipynb @@ -92,9 +92,9 @@ "source": [ "load_dotenv() # These params are set in .env file\n", "\n", - "URI = os.getenv(\"NEO4J_URI\")\n", - "AUTH = (os.getenv(\"NEO4J_USER\"), os.getenv(\"NEO4J_PASSWORD\"))\n", - "DATABASE = os.getenv(\"NEO4J_DATABASE\")\n", + "URI = os.getenv(\"ARANGODB_URI\")\n", + "AUTH = (os.getenv(\"ARANGODB_USER\"), os.getenv(\"ARANGODB_PASSWORD\"))\n", + "DATABASE = os.getenv(\"ARANGODB_DATABASE\")\n", "\n", "driver = GraphDatabase.driver(URI, auth=AUTH)\n", "with driver as session:\n", diff --git a/graph-db/extractor/Pipfile.lock b/graph-db/extractor/Pipfile.lock index a3a04f09d3..da00e845e9 100644 --- a/graph-db/extractor/Pipfile.lock +++ b/graph-db/extractor/Pipfile.lock @@ -263,9 +263,7 @@ "version": "==0.7.1" }, "neo4j": { - "hashes": [ - "sha256:06b12daa8fa6bf133abc8c4f35451748a2ee6dc207c52790304a8e12c997ce65" - ], + "hashes": ["sha256:06b12daa8fa6bf133abc8c4f35451748a2ee6dc207c52790304a8e12c997ce65"], "index": "pypi", "version": "==4.4.5" }, diff --git a/graph-db/extractor/docs/biocyc/spec.md b/graph-db/extractor/docs/biocyc/spec.md index 2d957a937f..60debcf44c 100644 --- a/graph-db/extractor/docs/biocyc/spec.md +++ b/graph-db/extractor/docs/biocyc/spec.md @@ -1,226 +1,230 @@ # BioCyc Spec for Knowledge Graph -The BioCyc database collection is an assortment of organism specific Pathway/ Genome Databases (PGDBs). They provide -reference to genome and metabolic pathway information for thousands of organisms. -Download site: https://brg-files.ai.sri.com/public/dist/. Search for the specific organism data for download. + +The BioCyc database collection is an assortment of organism specific Pathway/ Genome Databases (PGDBs). They provide +reference to genome and metabolic pathway information for thousands of organisms. +Download site: https://brg-files.ai.sri.com/public/dist/. Search for the specific organism data for download. Currently we loaded the following databases: -- EcoCyc: Escherichia coli K-12 substr. MG1655 (tax_id 511145) -http://brg-files.ai.sri.com/public/dist/ecoli.tar.gz -- HumanCyc: Homo sapiens (tax_id 9606) -http://brg-files.ai.sri.com/public/dist/human.tar.gz +- EcoCyc: Escherichia coli K-12 substr. MG1655 (tax_id 511145) + http://brg-files.ai.sri.com/public/dist/ecoli.tar.gz + +- HumanCyc: Homo sapiens (tax_id 9606) + http://brg-files.ai.sri.com/public/dist/human.tar.gz -- YeastCyc: Saccharomyces cerevisiae S288C (tax_id 559292) -http://brg-files.ai.sri.com/public/dist/yeastcyc.tar.gz +- YeastCyc: Saccharomyces cerevisiae S288C (tax_id 559292) + http://brg-files.ai.sri.com/public/dist/yeastcyc.tar.gz -- PseudomonasCyc: for Pseudomonas putida KT2440 (tax_id 160488) -http://brg-files.ai.sri.com/public/dist/pput160488cyc.tar.gz +- PseudomonasCyc: for Pseudomonas putida KT2440 (tax_id 160488) + http://brg-files.ai.sri.com/public/dist/pput160488cyc.tar.gz -- MetaCyc: MetaCyc is a curated database of experimentally elucidated metabolic pathways from all domains of life. -MetaCyc contains pathways involved in both primary and secondary metabolism, as well as associated metabolites, reactions, -enzymes, and genes. The goal of MetaCyc is to catalog the universe of metabolism by storing a representative sample of each experimentally elucidated pathway -https://brg-files.ai.sri.com/public/dist/meta.tar.gz +- MetaCyc: MetaCyc is a curated database of experimentally elucidated metabolic pathways from all domains of life. + MetaCyc contains pathways involved in both primary and secondary metabolism, as well as associated metabolites, reactions, + enzymes, and genes. The goal of MetaCyc is to catalog the universe of metabolism by storing a representative sample of each experimentally elucidated pathway + https://brg-files.ai.sri.com/public/dist/meta.tar.gz -In Lifelike graph database, each node in BioCyc was labeled as db_BioCyc, and nodes for each biocyc database was -labeled as additional database name, such as db_EcoCyc, db_HumanCyc. Therefore for any EcoCyc, there are 3 labels: +In Lifelike graph database, each node in BioCyc was labeled as db_BioCyc, and nodes for each biocyc database was +labeled as additional database name, such as db_EcoCyc, db_HumanCyc. Therefore for any EcoCyc, there are 3 labels: db_BioCyc, db_EcoCyc and the entity name (e.g. Gene, Protein) -### Graph database schema for BioCyc +### Graph database schema for BioCyc + ![](biocyc_graphmodel.png) -#### Node labels and attributes: +#### Node labels and attributes: + id has the same value for biocyc_id, and displayName and pathways attributes were added post-loading for annotation -| Node Label | Attribute | Source | -|:-----|:-----------|:-| -| BioCycClass | biocyc_id | UNIQUE-ID | -| BioCycClass | data_source |"BioCyc" | -| BioCycClass | displayName | =coalesce(name, biocyc_id) | -| BioCycClass | id | =biocyc_id | -| BioCycClass | name | COMMON-NAME | -| BioCycClass | synonyms | SYNONYMS | -| Compound | abbrev_name | ABBREV-NAME | -| Compound | biocyc_id | UNIQUE-ID | -| Compound | data_source | "BioCyc" | -| Compound | displayName | =coalesce(name, biocyc_id) | -| Compound | id | =biocyc_id | -| Compound | inchi_key | INCHI-KEY
    remove leading 'InChIKey='| -| Compound | name | COMMON-NAME
    strip HTML tags and remove leading 'a ' and 'an ' | -| Compound | inchi | INCHI | -| Compound | smiles | SMILES | -| DNABindingSite | abs_center_pos | ABS-CENTER-POS | -| DNABindingSite | biocyc_id | UNIQUE-ID | -| DNABindingSite | data_source |"BioCyc" | -| DNABindingSite | description | | -| DNABindingSite | displayName | =protein displayName + ' BS' of proteins linked to the DNABindingSite through Regulation

    else

    =coalesce(name, biocyc_id) -| DNABindingSite | id | =biocyc_id | -| DNABindingSite | site_length | SITE-LENGTH | -| EnzReaction | biocyc_id | UNIQUE-ID | -| EnzReaction | data_source |"BioCyc" | -| EnzReaction | description | | -| EnzReaction | displayName | = name + gene(s) that encodes the enzyme| -| EnzReaction | id | =biocyc_id | -| EnzReaction | name | COMMON-NAME | -| EnzReaction | synonyms | SYNONYMS | -| Gene | accession | ACCESSION-1 | -| Gene | biocyc_id | UNIQUE-ID | -| Gene | data_source |"BioCyc" | -| Gene | description | | -| Gene | displayName | =coalesce(name, biocyc_id) | -| Gene | id | =biocyc_id | -| Gene | left_end_position | LEFT-END-POSITION | -| Gene | name | COMMON-NAME | -| Gene | pathways | =set_gene_property_for_enrichment()| -| Gene | right_end_position | RIGHT-END-POSITION | -| Gene | strand | TRANSCRIPTION-DIRECTION | -| Gene | synonyms | SYNONYMS | -| Pathway | biocyc_id | UNIQUE-ID | -| Pathway | data_source |"BioCyc" | -| Pathway | displayName | =coalesce(name, biocyc_id) | -| Pathway | id | =biocyc_id | -| Pathway | name | COMMON-NAME | -| Pathway | synonyms | SYNONYMS | -| Promoter | biocyc_id | UNIQUE-ID | -| Promoter | data_source |"BioCyc" | -| Promoter | description | | -| Promoter | displayName | =coalesce(name, biocyc_id) | -| Promoter | id | =biocyc_id | -| Promoter | name | COMMON-NAME | -| Promoter | pos_1 | ABSOLUTE-PLUS-1-POS | -| Promoter | strand | TRANSCRIPTION-DIRECTION | -| Promoter | synonyms | SYNONYMS | -| Protein | abbrev_name | ABBREV-NAME | -| Protein | biocyc_id | UNIQUE-ID | -| Protein | data_source |"BioCyc" | -| Protein | description | | -| Protein | displayName | =coalesce(name, biocyc_id) | -| Protein | id | =biocyc_id | -| Protein | molecular_weight_kd | MOLECULAR-WEIGHT-KD | -| Protein | name | COMMON-NAM | -| Protein | pi | PI | -| Protein | synonyms | SYNONYMS | -| Protein | location | LOCATIONS | -| Protein | GO | GO-TERMS | -| RNA | abbrev_name | ABBREV-NAME | -| RNA | biocyc_id | UNIQUE-ID | -| RNA | data_source |"BioCyc" | -| RNA | description | | -| RNA | displayName | =coalesce(name, biocyc_id) | -| RNA | id | =biocyc_id | -| RNA | location | LOCATIONS | -| RNA | name | COMMON-NAME -| Reaction | biocyc_id | UNIQUE-ID | -| Reaction | data_source |"BioCyc" | -| Reaction | description | | -| Reaction | displayName | =coalesce(ec_number, name, biocyc_id) | -| Reaction | ec_number | EC-NUMBER | -| Reaction | id | =biocyc_id | -| Reaction | name | COMMON-NAME | -| Reaction | other_name | SYSTEMATIC-NAME | -| Reaction | direction | REACTION-DIRECTION | -| Reaction | location | RXN-LOCATIONS | -| Reaction | synonyms | SYNONYMS | -| Regulation | biocyc_id | UNIQUE-ID | -| Regulation | data_source |"BioCyc" | -| Regulation | displayName | ? | -| Regulation | id | =biocyc_id | -| Regulation | mechanism | MECHANISM | -| Regulation | mode | MODE | -| Regulation | type | | -| Terminator | biocyc_id | UNIQUE-ID | -| Terminator | data_source |"BioCyc" | -| Terminator | displayName | =coalesce(name, biocyc_id) | -| Terminator | description | | -| Terminator | id | =biocyc_id | -| Terminator | left_end_position | LEFT-END-POSITION | -| Terminator | right_end_position | RIGHT-END-POSITION | -| TranscriptionUnit | biocyc_id | UNIQUE-ID | -| TranscriptionUnit | data_source |"BioCyc" | -| TranscriptionUnit | description | | -| TranscriptionUnit | displayName | | -| TranscriptionUnit | id | =biocyc_id | -| TranscriptionUnit | name | COMMON-NAME | +| Node Label | Attribute | Source | +| :---------------- | :------------------ | :----------------------------------------------------------------------------------------------------------------------------------------------- | +| BioCycClass | biocyc_id | UNIQUE-ID | +| BioCycClass | data_source | "BioCyc" | +| BioCycClass | displayName | =coalesce(name, biocyc_id) | +| BioCycClass | id | =biocyc_id | +| BioCycClass | name | COMMON-NAME | +| BioCycClass | synonyms | SYNONYMS | +| Compound | abbrev_name | ABBREV-NAME | +| Compound | biocyc_id | UNIQUE-ID | +| Compound | data_source | "BioCyc" | +| Compound | displayName | =coalesce(name, biocyc_id) | +| Compound | id | =biocyc_id | +| Compound | inchi_key | INCHI-KEY
    remove leading 'InChIKey=' | +| Compound | name | COMMON-NAME
    strip HTML tags and remove leading 'a ' and 'an ' | +| Compound | inchi | INCHI | +| Compound | smiles | SMILES | +| DNABindingSite | abs_center_pos | ABS-CENTER-POS | +| DNABindingSite | biocyc_id | UNIQUE-ID | +| DNABindingSite | data_source | "BioCyc" | +| DNABindingSite | description | | +| DNABindingSite | displayName | =protein displayName + ' BS' of proteins linked to the DNABindingSite through Regulation

    else

    =coalesce(name, biocyc_id) | +| DNABindingSite | id | =biocyc_id | +| DNABindingSite | site_length | SITE-LENGTH | +| EnzReaction | biocyc_id | UNIQUE-ID | +| EnzReaction | data_source | "BioCyc" | +| EnzReaction | description | | +| EnzReaction | displayName | = name + gene(s) that encodes the enzyme | +| EnzReaction | id | =biocyc_id | +| EnzReaction | name | COMMON-NAME | +| EnzReaction | synonyms | SYNONYMS | +| Gene | accession | ACCESSION-1 | +| Gene | biocyc_id | UNIQUE-ID | +| Gene | data_source | "BioCyc" | +| Gene | description | | +| Gene | displayName | =coalesce(name, biocyc_id) | +| Gene | id | =biocyc_id | +| Gene | left_end_position | LEFT-END-POSITION | +| Gene | name | COMMON-NAME | +| Gene | pathways | =set_gene_property_for_enrichment() | +| Gene | right_end_position | RIGHT-END-POSITION | +| Gene | strand | TRANSCRIPTION-DIRECTION | +| Gene | synonyms | SYNONYMS | +| Pathway | biocyc_id | UNIQUE-ID | +| Pathway | data_source | "BioCyc" | +| Pathway | displayName | =coalesce(name, biocyc_id) | +| Pathway | id | =biocyc_id | +| Pathway | name | COMMON-NAME | +| Pathway | synonyms | SYNONYMS | +| Promoter | biocyc_id | UNIQUE-ID | +| Promoter | data_source | "BioCyc" | +| Promoter | description | | +| Promoter | displayName | =coalesce(name, biocyc_id) | +| Promoter | id | =biocyc_id | +| Promoter | name | COMMON-NAME | +| Promoter | pos_1 | ABSOLUTE-PLUS-1-POS | +| Promoter | strand | TRANSCRIPTION-DIRECTION | +| Promoter | synonyms | SYNONYMS | +| Protein | abbrev_name | ABBREV-NAME | +| Protein | biocyc_id | UNIQUE-ID | +| Protein | data_source | "BioCyc" | +| Protein | description | | +| Protein | displayName | =coalesce(name, biocyc_id) | +| Protein | id | =biocyc_id | +| Protein | molecular_weight_kd | MOLECULAR-WEIGHT-KD | +| Protein | name | COMMON-NAM | +| Protein | pi | PI | +| Protein | synonyms | SYNONYMS | +| Protein | location | LOCATIONS | +| Protein | GO | GO-TERMS | +| RNA | abbrev_name | ABBREV-NAME | +| RNA | biocyc_id | UNIQUE-ID | +| RNA | data_source | "BioCyc" | +| RNA | description | | +| RNA | displayName | =coalesce(name, biocyc_id) | +| RNA | id | =biocyc_id | +| RNA | location | LOCATIONS | +| RNA | name | COMMON-NAME | +| Reaction | biocyc_id | UNIQUE-ID | +| Reaction | data_source | "BioCyc" | +| Reaction | description | | +| Reaction | displayName | =coalesce(ec_number, name, biocyc_id) | +| Reaction | ec_number | EC-NUMBER | +| Reaction | id | =biocyc_id | +| Reaction | name | COMMON-NAME | +| Reaction | other_name | SYSTEMATIC-NAME | +| Reaction | direction | REACTION-DIRECTION | +| Reaction | location | RXN-LOCATIONS | +| Reaction | synonyms | SYNONYMS | +| Regulation | biocyc_id | UNIQUE-ID | +| Regulation | data_source | "BioCyc" | +| Regulation | displayName | ? | +| Regulation | id | =biocyc_id | +| Regulation | mechanism | MECHANISM | +| Regulation | mode | MODE | +| Regulation | type | | +| Terminator | biocyc_id | UNIQUE-ID | +| Terminator | data_source | "BioCyc" | +| Terminator | displayName | =coalesce(name, biocyc_id) | +| Terminator | description | | +| Terminator | id | =biocyc_id | +| Terminator | left_end_position | LEFT-END-POSITION | +| Terminator | right_end_position | RIGHT-END-POSITION | +| TranscriptionUnit | biocyc_id | UNIQUE-ID | +| TranscriptionUnit | data_source | "BioCyc" | +| TranscriptionUnit | description | | +| TranscriptionUnit | displayName | | +| TranscriptionUnit | id | =biocyc_id | +| TranscriptionUnit | name | COMMON-NAME | #### Node outgoing relationships -| StartNode | Relationship | EndNode | Cardinality | -|-----:|-----------:|-----:|-----------:| -| Class | CHEBI_LINK | Chemical | 1 | -| Class | COMPONENT_OF | Protein | + | -| Class | CONSUMED_BY | Reaction | + | -| Class | HAS_SYNONYM | Synonym | + | -| Class | REGULATES | Regulation | + | -| Class | TYPE_OF | Class | + | -| Compound | CHEBI_LINK | Chemical | 1 | -| Compound | COMPONENT_OF | Protein | + | -| Compound | CONSUMED_BY | Reaction | + | -| Compound | HAS_SYNONYM | Synonym | + | -| Compound | REGULATES | Regulation | + | -| Compound | TYPE_OF | Class | + | -| DNABindingSite | ELEMENT_OF | TranscriptionUnit | 1 | -| EnzReaction | CATALYZES | Reaction | 1 | -| EnzReaction | HAS_SYNONYM | Synonym | + | -| Gene | ELEMENT_OF | TranscriptionUnit | 1 | -| Gene | ENCODES | Protein | 1 | -| Gene | ENCODES | RNA | 1 | -| Gene | HAS_SYNONYM | Synonym | + | -| Gene | IS | db_NCBI Gene | 1 | -| Pathway | HAS_SYNONYM | Synonym | + | -| Pathway | IN_PATHWAY | Pathway | 1 | -| Pathway | TYPE_OF | Class | + | -| Promoter | ELEMENT_OF | TranscriptionUnit | 1 | -| Promoter | HAS_SYNONYM | Synonym | + | -| Protein | CATALYZES | EnzReaction | + | -| Protein | COMPONENT_OF | Protein | + | -| Protein | CONSUMED_BY | Reaction | + | -| Protein | GO_LINK | db_GO | + | -| Protein | HAS_SYNONYM | Synonym | + | -| Protein | MODIFIED_TO | Protein | + | -| Protein | REGULATES | Regulation | + | -| Protein | TYPE_OF | Class | + | -| RNA | COMPONENT_OF | Protein | + | -| RNA | CONSUMED_BY | Reaction | + | -| RNA | HAS_SYNONYM | Synonym | + | -| RNA | MODIFIED_TO | RNA | + | -| RNA | REGULATES | Regulation | + | -| RNA | TYPE_OF | Class | + | -| Reaction | ENZYME_LINK | EC_Number | + | -| Reaction | HAS_SYNONYM | Synonym | + | -| Reaction | IN_PATHWAY | Pathway | + | -| Reaction | PRODUCES | Class | + | -| Reaction | PRODUCES | Compound | + | -| Reaction | PRODUCES | Protein | + | -| Reaction | TYPE_OF | Class | + | -| Regulation | BINDS | DNABindingSite | 1 | -| Regulation | REGULATES | EnzReaction | 1 | -| Regulation | REGULATES | Gene | 1 | -| Regulation | REGULATES | Promoter | 1 | -| Regulation | REGULATES | Protein | 1 | -| Regulation | REGULATES | Reaction | 1 | -| Regulation | REGULATES | Terminator | 1 | -| Regulation | REGULATES | TranscriptionUnit | 1 | -| Regulation | TYPE_OF | Class | + | -| Terminator | ELEMENT_OF | TranscriptionUnit | 1 | -| TranscriptionUnit | HAS_SYNONYM | Synonym | + | - +| StartNode | Relationship | EndNode | Cardinality | +| ----------------: | -----------: | ----------------: | ----------: | +| Class | CHEBI_LINK | Chemical | 1 | +| Class | COMPONENT_OF | Protein | + | +| Class | CONSUMED_BY | Reaction | + | +| Class | HAS_SYNONYM | Synonym | + | +| Class | REGULATES | Regulation | + | +| Class | TYPE_OF | Class | + | +| Compound | CHEBI_LINK | Chemical | 1 | +| Compound | COMPONENT_OF | Protein | + | +| Compound | CONSUMED_BY | Reaction | + | +| Compound | HAS_SYNONYM | Synonym | + | +| Compound | REGULATES | Regulation | + | +| Compound | TYPE_OF | Class | + | +| DNABindingSite | ELEMENT_OF | TranscriptionUnit | 1 | +| EnzReaction | CATALYZES | Reaction | 1 | +| EnzReaction | HAS_SYNONYM | Synonym | + | +| Gene | ELEMENT_OF | TranscriptionUnit | 1 | +| Gene | ENCODES | Protein | 1 | +| Gene | ENCODES | RNA | 1 | +| Gene | HAS_SYNONYM | Synonym | + | +| Gene | IS | db_NCBI Gene | 1 | +| Pathway | HAS_SYNONYM | Synonym | + | +| Pathway | IN_PATHWAY | Pathway | 1 | +| Pathway | TYPE_OF | Class | + | +| Promoter | ELEMENT_OF | TranscriptionUnit | 1 | +| Promoter | HAS_SYNONYM | Synonym | + | +| Protein | CATALYZES | EnzReaction | + | +| Protein | COMPONENT_OF | Protein | + | +| Protein | CONSUMED_BY | Reaction | + | +| Protein | GO_LINK | db_GO | + | +| Protein | HAS_SYNONYM | Synonym | + | +| Protein | MODIFIED_TO | Protein | + | +| Protein | REGULATES | Regulation | + | +| Protein | TYPE_OF | Class | + | +| RNA | COMPONENT_OF | Protein | + | +| RNA | CONSUMED_BY | Reaction | + | +| RNA | HAS_SYNONYM | Synonym | + | +| RNA | MODIFIED_TO | RNA | + | +| RNA | REGULATES | Regulation | + | +| RNA | TYPE_OF | Class | + | +| Reaction | ENZYME_LINK | EC_Number | + | +| Reaction | HAS_SYNONYM | Synonym | + | +| Reaction | IN_PATHWAY | Pathway | + | +| Reaction | PRODUCES | Class | + | +| Reaction | PRODUCES | Compound | + | +| Reaction | PRODUCES | Protein | + | +| Reaction | TYPE_OF | Class | + | +| Regulation | BINDS | DNABindingSite | 1 | +| Regulation | REGULATES | EnzReaction | 1 | +| Regulation | REGULATES | Gene | 1 | +| Regulation | REGULATES | Promoter | 1 | +| Regulation | REGULATES | Protein | 1 | +| Regulation | REGULATES | Reaction | 1 | +| Regulation | REGULATES | Terminator | 1 | +| Regulation | REGULATES | TranscriptionUnit | 1 | +| Regulation | TYPE_OF | Class | + | +| Terminator | ELEMENT_OF | TranscriptionUnit | 1 | +| TranscriptionUnit | HAS_SYNONYM | Synonym | + | ### Scripts to run after biocyc data updated + 1. add protein synonyms for uniprot (see biocyc_protein_synonyms_for_uniprot.md) 2. set displayname and description 3. set data sources + ``` match(n:db_BioCyc) set n.data_source='BioCyc' ``` -### Set PseudomonasCyc genes as Master genes for Lifelike searching and annotations +### Set PseudomonasCyc genes as Master genes for Lifelike searching and annotations + Since Pseudomonas putida KT2440 (taxID 160488) genes do not have NCBI gene links for now, we need to set Pseudomoas genes as Master gene. -Once NCBI integrated those genes, the steps can be removed -- Follow the steps described in file 'Label_biocyc_genes_as_master.md' to update neo4j -- Generate LMDB annotation file if PseudomonasCyc updated, and handle to Binh. +Once NCBI integrated those genes, the steps can be removed + +- Follow the steps described in file 'Label_biocyc_genes_as_master.md' to update neo4j +- Generate LMDB annotation file if PseudomonasCyc updated, and handle to Binh. + ``` src/biocyc/LMDB_annotation.generate_pseudomonas_genelist_for_LMDB ``` - - - - diff --git a/graph-db/extractor/docs/kegg/kegg_spec.md b/graph-db/extractor/docs/kegg/kegg_spec.md index 9fee066ba2..5fcb682a1c 100644 --- a/graph-db/extractor/docs/kegg/kegg_spec.md +++ b/graph-db/extractor/docs/kegg/kegg_spec.md @@ -5,64 +5,69 @@ broadly categorized into systems information, genomic information, chemical information, and health information. The database names and the corresponding subdirectory names on this FTP site are as follows: -#### Systems information - - KEGG PATHWAY: pathway, xml; KEGG pathway maps - - KEGG BRITE: brite; BRITE functional hierarchies - - KEGG MODULE: module; KEGG modules - #### Genomic information - - KEGG ORTHOLOGY (KO) genes KO functional orthologs - - KEGG GENOME genes KEGG organisms (complete genomes) - - KEGG GENES genes Gene catalogs of KEGG organisms - - KEGG SSDB GENES sequence similarity - #### Chemical information - - KEGG COMPOUND ligand Metabolites and other small molecules - - KEGG GLYCAN ligand Glycans - - KEGG REACTION ligand Biochemical reactions - - KEGG RCLASS ligand Reaction class - - KEGG ENZYME ligand Enzyme nomenclature - #### Health information - - KEGG NETWORK medicus Disease-related network elements - - KEGG VARIANT medicus Human gene variants - - KEGG DISEASE medicus Human diseases - - KEGG DRUG medicus Drugs - - KEGG DGROUP medicus Drug groups - - KEGG ENVIRON medicus Crude drugs and health-related substances +#### Systems information + +- KEGG PATHWAY: pathway, xml; KEGG pathway maps +- KEGG BRITE: brite; BRITE functional hierarchies +- KEGG MODULE: module; KEGG modules + +#### Genomic information + +- KEGG ORTHOLOGY (KO) genes KO functional orthologs +- KEGG GENOME genes KEGG organisms (complete genomes) +- KEGG GENES genes Gene catalogs of KEGG organisms +- KEGG SSDB GENES sequence similarity + +#### Chemical information + +- KEGG COMPOUND ligand Metabolites and other small molecules +- KEGG GLYCAN ligand Glycans +- KEGG REACTION ligand Biochemical reactions +- KEGG RCLASS ligand Reaction class +- KEGG ENZYME ligand Enzyme nomenclature + +#### Health information + +- KEGG NETWORK medicus Disease-related network elements +- KEGG VARIANT medicus Human gene variants +- KEGG DISEASE medicus Human diseases +- KEGG DRUG medicus Drugs +- KEGG DGROUP medicus Drug groups +- KEGG ENVIRON medicus Crude drugs and health-related substances ## KEGG Pathway Maps #### Pathway Types + - metabolic - non-metabolic -#### Reference pathway map: +#### Reference pathway map: + - linked to KO entries (K numbers) - linked to Enzyme entries (EC numbers) - linked to Reaction entries (R numbers) #### Organism-specific map: + - linked to GENES entries (gene ids) ## Download Data -KEGG database download is not free. Check Vincenzo for license information. + +KEGG database download is not free. Check Vincenzo for license information. url: ftp://ftp.kegg.net + #### Files needed for current KG: -- genes/ko/ko (in ko.tar.gz) -- genes/genes_ncbi-geneid.list (from genes/links/genes_ncbi-geneid.list.gz) -- pathway/pathway.list -- pathway/links/pathway_ko.list (links.tar.gz) -- pathway/links/pathway_genome.list (links.tar.gz) + +- genes/ko/ko (in ko.tar.gz) +- genes/genes_ncbi-geneid.list (from genes/links/genes_ncbi-geneid.list.gz) +- pathway/pathway.list +- pathway/links/pathway_ko.list (links.tar.gz) +- pathway/links/pathway_genome.list (links.tar.gz) ## KEGG KG Schema + The current kegg kg was used for annotation only. KEGG data has rich information about reactions, enzymes and pathways and their assocations with diseases and drugs. Additional information could be loaded in the future. ![](KEGG_kg_nodes.png) - - - - - - - - - diff --git a/graph-db/extractor/docs/literature/literature_spec.md b/graph-db/extractor/docs/literature/literature_spec.md index 17baeb5618..158c3d6e23 100644 --- a/graph-db/extractor/docs/literature/literature_spec.md +++ b/graph-db/extractor/docs/literature/literature_spec.md @@ -1,9 +1,10 @@ # Zenodo Literature Data Set -- paper: https://zenodo.org/record/3459420#.XfAH6JNKiBu -- download the files to folder $BASE_DATA_DIR/download/literature/. Make sure to get the latest version (Sept 24, 2019). +- paper: https://zenodo.org/record/3459420#.XfAH6JNKiBu +- download the files to folder $BASE_DATA_DIR/download/literature/. Make sure to get the latest version (Sept 24, 2019). + +Download all the part-i-_.txt.gz and part-ii-_-with-themes.txt.gz files -Download all the part-i-*.txt.gz and part-ii-*-with-themes.txt.gz files ```bash > cd graph-db/extraction/data/download/literature > ./download.sh @@ -14,23 +15,24 @@ Download all the part-i-*.txt.gz and part-ii-*-with-themes.txt.gz files ![](img/LiteratureGraphDiagram_new.png) The relationship 'PREDICTS' changed to 'INDICATES' -## Old parser link: +## Old parser link: + https://github.com/SBRG/graphdb_prototypes/blob/master/src/loader/literature/literature_data_parser.py The parser may need to update to reflect the new schema. + 1. The old gene, chemical and disease only have one column for the id's. Since Literature data will be in separate domain in the new schema. -Those files will need the following properties: [id, synonyms, id_source], where id need to remove the prefix (MESH, CHEBI etc) so that -it would be easier to create the 'mapped_to' links. id', and the prefix can be stored as 'id_source'. -2. Since the literature sentence would use different names (as entry_text in INDICATES), -you may need to set the name field later (after mapped to reference Gene, Chemical or Disease). I would suggest to put all the 'entry_text' into -'synonyms' property, and separate the terms using |. -3. After literature data loaded (all nodes for db_Literature), use cypher to create the MAPPED_TO relationships, then based on the -mapped entity, set literature entity name. If a literature entity could be mapped, choose the first term in the synonyms as its name. + Those files will need the following properties: [id, synonyms, id_source], where id need to remove the prefix (MESH, CHEBI etc) so that + it would be easier to create the 'mapped_to' links. id', and the prefix can be stored as 'id_source'. +2. Since the literature sentence would use different names (as entry_text in INDICATES), + you may need to set the name field later (after mapped to reference Gene, Chemical or Disease). I would suggest to put all the 'entry_text' into + 'synonyms' property, and separate the terms using |. +3. After literature data loaded (all nodes for db_Literature), use cypher to create the MAPPED_TO relationships, then based on the + mapped entity, set literature entity name. If a literature entity could be mapped, choose the first term in the synonyms as its name. 4. Make sure to check the current database properties for Association and Snippet and INDICATES, and put the data into the correct output file. -The current parser output files were for the old data schema. -5. If you load data into a brand-new database, make sure to create constraints first (for snippet id, association id, LiteratureEntity id). -Otherwise, the merge and relationships creation will be very very slow + The current parser output files were for the old data schema. +5. If you load data into a brand-new database, make sure to create constraints first (for snippet id, association id, LiteratureEntity id). + Otherwise, the merge and relationships creation will be very very slow 6. snippet_id = pmid + sentence_num (refectoring query: SET n.id = p.pmid + '-' + n.sentence_num) -7. association_id = entry1_id + entry2_id + type (refectoring query: SET a.id = n1.id + '-' + n2.id + '-' + a.type) +7. association_id = entry1_id + entry2_id + type (refectoring query: SET a.id = n1.id + '-' + n2.id + '-' + a.type) 8. Don't forget to use eid instead of id - diff --git a/graph-db/extractor/docs/mesh/mesh_rdf_data_model.md b/graph-db/extractor/docs/mesh/mesh_rdf_data_model.md index de7130bf9a..3bdedce350 100644 --- a/graph-db/extractor/docs/mesh/mesh_rdf_data_model.md +++ b/graph-db/extractor/docs/mesh/mesh_rdf_data_model.md @@ -1,53 +1,66 @@ # MeSH Data Model (RDF imported database, not Lifelike) -### Descriptors -https://hhs.github.io/meshrdf/descriptors -MeSH headings, used to index citations in the NLM MEDLINE database and to describe the subjects for NLM Catalog + +### Descriptors + +https://hhs.github.io/meshrdf/descriptors +MeSH headings, used to index citations in the NLM MEDLINE database and to describe the subjects for NLM Catalog records. Subclss of Descriptors: -- TopicalDescriptor -- PublicationType -- CheckTag -- GeographicalDescriptor -### Qualifiers +- TopicalDescriptor +- PublicationType +- CheckTag +- GeographicalDescriptor + +### Qualifiers + A MeSH subheading, used to give additional context to a Descriptor. -MeSH has rules governing which Qualifiers can be used with a given Descriptor, +MeSH has rules governing which Qualifiers can be used with a given Descriptor, as well as which Qualifers cannot be used with a given Descriptor (pairs) ### Descriptor-Qualifier Pairs + https://hhs.github.io/meshrdf/descriptor-qualifier-pairs -- Qualifiers comprise a set of 83 terms used to add more specificity to descriptors. -- Each Descriptor has a set of Allowable Qualifiers -- Example: - - Descriptor (D015242, Ofloxacin) and one of its Allowable Qualifiers (Q000008, administration & dosage) - -### Supplementary Concept Records (SCR) + +- Qualifiers comprise a set of 83 terms used to add more specificity to descriptors. +- Each Descriptor has a set of Allowable Qualifiers +- Example: + - Descriptor (D015242, Ofloxacin) and one of its Allowable Qualifiers (Q000008, administration & dosage) + +### Supplementary Concept Records (SCR) + https://hhs.github.io/meshrdf/scrs There are four sub-classes: SCR_Chemical SCR_Disease SCR_Organism SCR_Protocol - + ### Concepts -https://hhs.github.io/meshrdf/concepts + +https://hhs.github.io/meshrdf/concepts A MeSH Concept represents a unit of meaning, all assigned ‘M’ identifiers. -- Each MeSH record consists of one or more Concepts -- Each Concept consists in one or more synonymous terms -- Collections of concepts are placed into MeSH Descriptor for topic search and retrieval + +- Each MeSH record consists of one or more Concepts +- Each Concept consists in one or more synonymous terms +- Collections of concepts are placed into MeSH Descriptor for topic search and retrieval ### Terms -https://hhs.github.io/meshrdf/terms + +https://hhs.github.io/meshrdf/terms A term is a human readable name used by a MeSH Concept. Within a MeSH concept, terms are strictly synonymous ### Tree Numbers + https://hhs.github.io/meshrdf/tree-numbers -MeSH descriptors are organized into 16 categories, each of which is further divided into sub-categories to assist in more specific classification of a descriptor. -- These hierarchical categories and subcategories are identified by MeSH tree numbers. -- A given MeSH descriptor often has more than one tree number assigned to it, meaning that it fits into the category hierarchies in multiple places. +MeSH descriptors are organized into 16 categories, each of which is further divided into sub-categories to assist in more specific classification of a descriptor. + +- These hierarchical categories and subcategories are identified by MeSH tree numbers. +- A given MeSH descriptor often has more than one tree number assigned to it, meaning that it fits into the category hierarchies in multiple places. Example: B03, B03.660, B03.660.075, B03.660.075.090.344 ## Mesh Neo4j Graph Model (RDF imported graph) -![](img/mesh_rdf_model.png) \ No newline at end of file + +![](img/mesh_rdf_model.png) diff --git a/graph-db/extractor/docs/ncbi/taxonomy_spec.md b/graph-db/extractor/docs/ncbi/taxonomy_spec.md index af864e5b58..4249adfd27 100644 --- a/graph-db/extractor/docs/ncbi/taxonomy_spec.md +++ b/graph-db/extractor/docs/ncbi/taxonomy_spec.md @@ -1,40 +1,45 @@ # NCBI Taxonomy -The NCBI Taxonomy database (http://www.ncbi.nlm.nih.gov/taxonomy) is the standard nomenclature and classification repository -for the International Nucleotide Sequence Database Collaboration (INSDC), comprising the GenBank, ENA (EMBL) and DDBJ databases. +The NCBI Taxonomy database (http://www.ncbi.nlm.nih.gov/taxonomy) is the standard nomenclature and classification repository +for the International Nucleotide Sequence Database Collaboration (INSDC), comprising the GenBank, ENA (EMBL) and DDBJ databases. It includes organism names and taxonomic lineages for each of the sequences represented in the INSDC's nucleotide and protein -sequence databases (reference: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3245000/). +sequence databases (reference: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3245000/). #### Top lavel taxonomy: -- Archaea -- Bacteria -- Eukaryota -- Viruses -- Other: Other artificial sequences -- Unclassified -The organism classification should include the first 4 catagories, and remove taxnomy with name "environmental samples". +- Archaea +- Bacteria +- Eukaryota +- Viruses +- Other: Other artificial sequences +- Unclassified + +The organism classification should include the first 4 catagories, and remove taxnomy with name "environmental samples". NCBI taxonomy download link: https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/ -#### NCBI Taxonomy Nodes in KG: -- Labels: [db_NCBI, Taxonomy] -- Node Properties: - - id: tax_id - - name: scientific name - - rank: In biological classification, taxonomic rank is the relative level of a group of organisms (a taxon) in a taxonomic hierarchy. - Examples of taxonomic ranks are species, genus, family, order, class, phylum, kingdom, domain, etc. - - species_id: species tax_id, only for organism with rank as species or below species - - data_source: NCBI Taxonomy - +#### NCBI Taxonomy Nodes in KG: + +- Labels: [db_NCBI, Taxonomy] +- Node Properties: + - id: tax_id + - name: scientific name + - rank: In biological classification, taxonomic rank is the relative level of a group of organisms (a taxon) in a taxonomic hierarchy. + Examples of taxonomic ranks are species, genus, family, order, class, phylum, kingdom, domain, etc. + - species_id: species tax_id, only for organism with rank as species or below species + - data_source: NCBI Taxonomy + #### Taxonomy parser improvement -- The current parser could be rewritten using pandas. -- Currently the data were parsed and written into files. The parser then used the output file to load data. The output -files could be eliminated and neo4j data loading could be done using the parsed data directly + +- The current parser could be rewritten using pandas. +- Currently the data were parsed and written into files. The parser then used the output file to load data. The output + files could be eliminated and neo4j data loading could be done using the parsed data directly #### Additional synonyms for strains (no rank, children nodes under species) (LL-1802) -combined all synonyms from species as string, and added as synonym for children nodes. This could help children nodes search -using a species synonym which is not part of strain synonym. Not sure if we need to update this for each taxnomy updates. + +combined all synonyms from species as string, and added as synonym for children nodes. This could help children nodes search +using a species synonym which is not part of strain synonym. Not sure if we need to update this for each taxnomy updates. + ``` match (n:Taxonomy) where n.rank='species' and (()-[:HAS_PARENT]->(n)) with n match (n)-[:HAS_SYNONYM]->(s) with n, collect(s.name) as syns @@ -45,37 +50,21 @@ using a species synonym which is not part of strain synonym. Not sure if we need with t, synonym, [s in syns where not synonym contains s] as syns where size(syns)>0 with t, apoc.text.join([synonym] + syns, '|') as syn where size(syn) < 9000 merge (s:Synonym {name:syn}) - merge (t)-[:HAS_SYNONYM {type:'combined terms'}]->(s) + merge (t)-[:HAS_SYNONYM {type:'combined terms'}]->(s) ``` #### LMDB Annotation file + In the LMDB file, set the species' tax_id as the default strain id so that any searching for the species will map to the default strain -| Strain tax_id | strain_name | species_id | species_name | -|:------ |: ---------- |: --------- |: ----------- | -| 367830 | Staphylococcus aureus subsp. aureus USA300 | 46170 | Staphylococcus aureus subsp. aureus | -| 367830 | Staphylococcus aureus subsp. aureus USA300 | 1280 | Staphylococcus aureus | -| 511145 | Escherichia coli str. K-12 substr. MG1655 | 83333 | Escherichia coli K-12 | -| 511145 | Escherichia coli str. K-12 substr. MG1655 | 562 | Escherichia coli | -| 272563 | Clostridioides difficile 630 | 1496 | Clostridioides difficile | -| 208964 | Pseudomonas aeruginosa PAO1 | 287 | Pseudomonas aeruginosa | -| 559292 | Saccharomyces cerevisiae S288C | 4932 | Saccharomyces cerevisiae | +| Strain tax_id | strain_name | species_id | species_name | +| :------------ | :----------------------------------------- | :--------- | :---------------------------------- | +| 367830 | Staphylococcus aureus subsp. aureus USA300 | 46170 | Staphylococcus aureus subsp. aureus | +| 367830 | Staphylococcus aureus subsp. aureus USA300 | 1280 | Staphylococcus aureus | +| 511145 | Escherichia coli str. K-12 substr. MG1655 | 83333 | Escherichia coli K-12 | +| 511145 | Escherichia coli str. K-12 substr. MG1655 | 562 | Escherichia coli | +| 272563 | Clostridioides difficile 630 | 1496 | Clostridioides difficile | +| 208964 | Pseudomonas aeruginosa PAO1 | 287 | Pseudomonas aeruginosa | +| 559292 | Saccharomyces cerevisiae S288C | 4932 | Saccharomyces cerevisiae | See taxonomy_LMDB_annotation.py to generate LMDB annotation file - - - - - - - - - - - - - - - - - diff --git a/graph-db/extractor/docs/other/scripts for LMDB annotation file.md b/graph-db/extractor/docs/other/scripts for LMDB annotation file.md index 7f13448719..bc9610de9d 100644 --- a/graph-db/extractor/docs/other/scripts for LMDB annotation file.md +++ b/graph-db/extractor/docs/other/scripts for LMDB annotation file.md @@ -1,25 +1,22 @@ # Scripts for generating LMDB annotation files (LL-3389) ### Generate LMDB annotation list for the following entities -- Gene: NCBI genes and Psueodmonas genes -- Protein: Uniprot protein -- Chemical: ChEBI chemicals -- Compound: Biocyc Compound -- Taxnomy: NCBI taxonomy -- Disease: MeSH -- Food: MeSH -- Anatomy: MeSH -- Phenomena: MeSH +- Gene: NCBI genes and Psueodmonas genes +- Protein: Uniprot protein +- Chemical: ChEBI chemicals +- Compound: Biocyc Compound +- Taxnomy: NCBI taxonomy +- Disease: MeSH +- Food: MeSH +- Anatomy: MeSH +- Phenomena: MeSH ### Python files -- ncbi/gene_LMDB_annotation.py -- ncbi/taxonomy_LMDB_annotation.py -- chebi/chebi_LMDB_annotation.py -- mesh/mesh_LMDB_annotation.py (Disease, Food, Anatomy, Phenomena) -- uniprot/uniprot_LMDB_annotation.py (Protein) -- biocyc/biocyc_LMDB_annotaiton.py (Compound, Gene) - - - +- ncbi/gene_LMDB_annotation.py +- ncbi/taxonomy_LMDB_annotation.py +- chebi/chebi_LMDB_annotation.py +- mesh/mesh_LMDB_annotation.py (Disease, Food, Anatomy, Phenomena) +- uniprot/uniprot_LMDB_annotation.py (Protein) +- biocyc/biocyc_LMDB_annotaiton.py (Compound, Gene) diff --git a/graph-db/extractor/src/biocyc/base_data_file_parser.py b/graph-db/extractor/src/biocyc/base_data_file_parser.py index 93771b86bf..1419035717 100644 --- a/graph-db/extractor/src/biocyc/base_data_file_parser.py +++ b/graph-db/extractor/src/biocyc/base_data_file_parser.py @@ -16,8 +16,18 @@ class BaseDataFileParser(BaseParser): """ Base parser for Biocyc .dat files. """ - def __init__(self, base_data_dir: str, biocyc_dbname, tar_file, datafile_name, entity_name, attr_names:dict, rel_names:dict, - db_link_sources: dict=None): + + def __init__( + self, + base_data_dir: str, + biocyc_dbname, + tar_file, + datafile_name, + entity_name, + attr_names: dict, + rel_names: dict, + db_link_sources: dict = None, + ): """ :param base_data_dir: the data file base directory, that is the parent folder for 'download' :param biocyc_dbname: biocyc database name, eg. DB_ECOCYC, DB_HUMANCYC @@ -32,7 +42,7 @@ def __init__(self, base_data_dir: str, biocyc_dbname, tar_file, datafile_name, e self.input_zip = os.path.join(self.download_dir, tar_file) self.db_output_dir = os.path.join(self.output_dir, biocyc_dbname.lower()) self.datafile = datafile_name - self.node_labels = [NODE_BIOCYC, 'db_' + biocyc_dbname, entity_name] + self.node_labels = [NODE_BIOCYC, 'db_' + biocyc_dbname, entity_name] self.entity_name = entity_name self.attr_name_map = attr_names self.rel_name_map = rel_names @@ -41,10 +51,10 @@ def __init__(self, base_data_dir: str, biocyc_dbname, tar_file, datafile_name, e self.version = '' self.logger = logging.getLogger(__name__) - def create_synonym_rels(self)->bool: + def create_synonym_rels(self) -> bool: return False - def get_db_version(self, tar:TarFile): + def get_db_version(self, tar: TarFile): """ find the latest version of data in the tar file. Sometimes a tar file has multiple version data. :param tar: @@ -65,7 +75,10 @@ def parse_data_file(self): self.version = self.get_db_version(tar) self.logger.info(f'Database file version: "{self.version}"') for tarinfo in tar: - if tarinfo.name.endswith('/'+ self.datafile) and self.version in tarinfo.name: + if ( + tarinfo.name.endswith('/' + self.datafile) + and self.version in tarinfo.name + ): self.logger.info('Parse ' + tarinfo.name) utf8reader = codecs.getreader('ISO-8859-1') f = utf8reader(tar.extractfile(tarinfo.name)) @@ -74,7 +87,9 @@ def parse_data_file(self): prev_line_is_comment = False for line in f: line = biocyc_utils.cleanhtml(line) - node, prev_line_is_comment = self.parse_line(line, node, nodes, prev_line_is_comment) + node, prev_line_is_comment = self.parse_line( + line, node, nodes, prev_line_is_comment + ) return nodes def parse_line(self, line, node, nodes, prev_line_is_comment): @@ -85,7 +100,12 @@ def parse_line(self, line, node, nodes, prev_line_is_comment): # add data source property node.add_attribute(PROP_DATA_SOURCE, DB_BIOCYC, "str") - if node and PROP_COMMENT in self.attr_name_map and prev_line_is_comment and line.startswith('/'): + if ( + node + and PROP_COMMENT in self.attr_name_map + and prev_line_is_comment + and line.startswith('/') + ): line = line[1:].strip() node.add_attribute(PROP_COMMENT, line, 'str') elif node: @@ -97,7 +117,9 @@ def parse_line(self, line, node, nodes, prev_line_is_comment): else: prev_line_is_comment = True if attr in self.attr_name_map: - prop_name, data_type = biocyc_utils.get_property_name_type(attr, self.attr_name_map) + prop_name, data_type = biocyc_utils.get_property_name_type( + attr, self.attr_name_map + ) node.add_attribute(prop_name, val, data_type) if attr == UNIQUE_ID: node.add_attribute(PROP_ID, val, data_type) @@ -109,7 +131,11 @@ def parse_line(self, line, node, nodes, prev_line_is_comment): db_name = tokens[0].lstrip('(') reference_id = tokens[1].strip(')').strip('"') add_prefix = tokens[1] - self.add_dblink(node, db_name, reference_id, ) + self.add_dblink( + node, + db_name, + reference_id, + ) else: rel_type = self.rel_name_map.get(attr) node.add_edge_type(rel_type, val) @@ -117,20 +143,26 @@ def parse_line(self, line, node, nodes, prev_line_is_comment): self.logger.error('line:', line) return node, prev_line_is_comment - def add_dblink(self, node:NodeData, db_name, reference_id): + def add_dblink(self, node: NodeData, db_name, reference_id): link_node = NodeData(NODE_DBLINK, PROP_REF_ID) if reference_id.startswith(db_name): - reference_id = reference_id[len(db_name)+1:] # remove db prefix + reference_id = reference_id[len(db_name) + 1 :] # remove db prefix link_node.update_attribute(PROP_REF_ID, reference_id) link_node.update_attribute(PROP_DB_NAME, db_name) node.add_edge(node, link_node, REL_DBLINKS) def create_indexes(self, database: Database): - database.create_index(self.entity_name, PROP_ID, f"index_{self.entity_name.lower}_id") - database.create_index(self.entity_name, PROP_BIOCYC_ID, f"index_{self.entity_name.lower}_biocycid") - database.create_index(self.entity_name, PROP_NAME, f"index_{self.entity_name.lower}_name") + database.create_index( + self.entity_name, PROP_ID, f"index_{self.entity_name.lower}_id" + ) + database.create_index( + self.entity_name, PROP_BIOCYC_ID, f"index_{self.entity_name.lower}_biocycid" + ) + database.create_index( + self.entity_name, PROP_NAME, f"index_{self.entity_name.lower}_name" + ) - def update_nodes_in_graphdb(self, nodes:[], database:Database, etl_load_id: str): + def update_nodes_in_graphdb(self, nodes: [], database: Database, etl_load_id: str): """ Load or update nodes in KG. This can also be called for initial loading. :param nodes: list of nodes @@ -145,10 +177,17 @@ def update_nodes_in_graphdb(self, nodes:[], database:Database, etl_load_id: str) for node in nodes: rows.append(node.to_dict()) attrs = self.attrs + [PROP_ID, PROP_DATA_SOURCE] - query = get_update_nodes_query(NODE_BIOCYC, PROP_BIOCYC_ID, attrs, self.node_labels, etl_load_id=etl_load_id, return_node_count=True) + query = get_update_nodes_query( + NODE_BIOCYC, + PROP_BIOCYC_ID, + attrs, + self.node_labels, + etl_load_id=etl_load_id, + return_node_count=True, + ) return database.load_data_from_rows(query, rows, return_node_count=True) - def add_edges_to_graphdb(self, nodes:[], database:Database, etl_load_id): + def add_edges_to_graphdb(self, nodes: [], database: Database, etl_load_id): no_of_created_nodes = 0 no_of_updated_nodes = 0 no_of_created_relations = 0 @@ -161,7 +200,7 @@ def add_edges_to_graphdb(self, nodes:[], database:Database, etl_load_id): id = node.get_attribute(PROP_BIOCYC_ID) synonyms = node.get_synonym_set() for syn in synonyms: - synonym_list.append({PROP_BIOCYC_ID:id, PROP_NAME: syn}) + synonym_list.append({PROP_BIOCYC_ID: id, PROP_NAME: syn}) for edge in node.edges: from_id = edge.source.get_attribute(edge.source.id_attr) to_id = edge.dest.get_attribute(edge.dest.id_attr) @@ -173,7 +212,9 @@ def add_edges_to_graphdb(self, nodes:[], database:Database, etl_load_id): if db_name in self.db_link_sources: if db_name not in db_link_dict: db_link_dict[db_name] = [] - db_link_dict[db_name].append({'from_id': from_id, 'to_id': to_id}) + db_link_dict[db_name].append( + {'from_id': from_id, 'to_id': to_id} + ) else: if rel not in entity_rel_dict: entity_rel_dict[rel] = [] @@ -181,48 +222,95 @@ def add_edges_to_graphdb(self, nodes:[], database:Database, etl_load_id): if synonym_list: self.logger.info('Add synonyms') - query = get_create_synonym_relationships_query(NODE_BIOCYC, PROP_BIOCYC_ID, PROP_BIOCYC_ID, PROP_NAME, [], etl_load_id=etl_load_id, return_node_count=True) + query = get_create_synonym_relationships_query( + NODE_BIOCYC, + PROP_BIOCYC_ID, + PROP_BIOCYC_ID, + PROP_NAME, + [], + etl_load_id=etl_load_id, + return_node_count=True, + ) self.logger.debug(query) - node_count, result_counters = database.load_data_from_rows(query, synonym_list, return_node_count=True) + node_count, result_counters = database.load_data_from_rows( + query, synonym_list, return_node_count=True + ) no_of_created_nodes += result_counters.nodes_created - no_of_updated_nodes += (node_count - result_counters.nodes_created) + no_of_updated_nodes += node_count - result_counters.nodes_created for rel in entity_rel_dict.keys(): self.logger.info('Add relationship ' + rel) - query = get_create_relationships_query(NODE_BIOCYC, PROP_BIOCYC_ID, 'from_id', - NODE_BIOCYC, PROP_BIOCYC_ID, 'to_id', rel, etl_load_id=etl_load_id, return_node_count=True) + query = get_create_relationships_query( + NODE_BIOCYC, + PROP_BIOCYC_ID, + 'from_id', + NODE_BIOCYC, + PROP_BIOCYC_ID, + 'to_id', + rel, + etl_load_id=etl_load_id, + return_node_count=True, + ) self.logger.debug(query) - node_count, result_counters = database.load_data_from_rows(query, entity_rel_dict[rel], return_node_count=True) + node_count, result_counters = database.load_data_from_rows( + query, entity_rel_dict[rel], return_node_count=True + ) no_of_created_relations += result_counters.relationships_created - no_of_updated_relations += (node_count - result_counters.relationships_created) + no_of_updated_relations += ( + node_count - result_counters.relationships_created + ) - _no_of_created_relations, _no_of_updated_relations = self.add_dblinks_to_graphdb(db_link_dict, database, etl_load_id) + ( + _no_of_created_relations, + _no_of_updated_relations, + ) = self.add_dblinks_to_graphdb(db_link_dict, database, etl_load_id) no_of_created_relations += _no_of_created_relations no_of_updated_relations += _no_of_updated_relations - return no_of_created_nodes, no_of_updated_nodes, no_of_created_relations, no_of_updated_relations + return ( + no_of_created_nodes, + no_of_updated_nodes, + no_of_created_relations, + no_of_updated_relations, + ) - def add_dblinks_to_graphdb(self, db_link_dict:dict, database:Database, etl_load_id): + def add_dblinks_to_graphdb( + self, db_link_dict: dict, database: Database, etl_load_id + ): no_of_created_relations = 0 no_of_updated_relations = 0 for db_name in db_link_dict.keys(): - self.logger.info('Add DB Link relationship to ' + db_name ) + self.logger.info('Add DB Link relationship to ' + db_name) dest_label = 'db_' + db_name rel = db_name.upper() + '_LINK' - query = get_create_relationships_query(NODE_BIOCYC, PROP_BIOCYC_ID, 'from_id', - dest_label, PROP_ID, 'to_id', rel, etl_load_id=etl_load_id, return_node_count=True) + query = get_create_relationships_query( + NODE_BIOCYC, + PROP_BIOCYC_ID, + 'from_id', + dest_label, + PROP_ID, + 'to_id', + rel, + etl_load_id=etl_load_id, + return_node_count=True, + ) self.logger.debug(query) - node_count, result_counters = database.load_data_from_rows(query, db_link_dict[db_name], return_node_count=True) + node_count, result_counters = database.load_data_from_rows( + query, db_link_dict[db_name], return_node_count=True + ) no_of_created_relations += result_counters.relationships_created - no_of_updated_relations += (node_count - result_counters.relationships_created) + no_of_updated_relations += ( + node_count - result_counters.relationships_created + ) return no_of_created_relations, no_of_updated_relations - def write_entity_data_files(self, nodes:[]): + def write_entity_data_files(self, nodes: []): os.makedirs(self.db_output_dir, 0o777, True) self.logger.info(f'Writing {self.entity_name} files') - with open(os.path.join(self.db_output_dir, self.entity_name.lower() + '.tsv'), 'w') as f: + with open( + os.path.join(self.db_output_dir, self.entity_name.lower() + '.tsv'), 'w' + ) as f: attrs = [PROP_ID] + [PROP_DATA_SOURCE] + self.attrs f.write('\t'.join(attrs) + '\n') f.writelines(NodeData.get_entity_data_rows(nodes, attrs)) - diff --git a/graph-db/extractor/src/biocyc/data_sources.json b/graph-db/extractor/src/biocyc/data_sources.json index c42fab9f74..76ee770860 100644 --- a/graph-db/extractor/src/biocyc/data_sources.json +++ b/graph-db/extractor/src/biocyc/data_sources.json @@ -1,22 +1,22 @@ [ - { - "name": "PseudomonasCyc", - "file": "pput160488cyc.tar.gz" - }, - { - "name": "YeastCyc", - "file": "yeast.tar.gz" - }, - { - "name": "EcoCyc", - "file": "ecoli.tar.gz" - }, - { - "name": "HumanCyc", - "file": "human.tar.gz" - }, - { - "name": "MetaCyc", - "file": "meta.tar.gz" - } -] \ No newline at end of file + { + "name": "PseudomonasCyc", + "file": "pput160488cyc.tar.gz" + }, + { + "name": "YeastCyc", + "file": "yeast.tar.gz" + }, + { + "name": "EcoCyc", + "file": "ecoli.tar.gz" + }, + { + "name": "HumanCyc", + "file": "human.tar.gz" + }, + { + "name": "MetaCyc", + "file": "meta.tar.gz" + } +] diff --git a/graph-db/extractor/src/common/liquibase_utils.py b/graph-db/extractor/src/common/liquibase_utils.py index e8ca87bd64..97b73a198a 100644 --- a/graph-db/extractor/src/common/liquibase_utils.py +++ b/graph-db/extractor/src/common/liquibase_utils.py @@ -31,12 +31,16 @@ def get_changelog_template(): class ChangeLog: def __init__(self, author: str, change_id_prefix: str): if not change_id_prefix: - raise ValueError('The argument change_id_prefix must not be null or empty string') + raise ValueError( + 'The argument change_id_prefix must not be null or empty string' + ) try: int(change_id_prefix.split('-')[1]) except Exception: - raise ValueError('The argument change_id_prefix must be the JIRA card number; e.g LL-1234') + raise ValueError( + 'The argument change_id_prefix must be the JIRA card number; e.g LL-1234' + ) self.author = author self.id_prefix = change_id_prefix self.file_prefix = f'jira-{change_id_prefix}-' @@ -72,15 +76,26 @@ def create_changelog_str(self): template = get_template(sql_template) # liquibase doesn't like the `<` character self.cypher = self.cypher.replace('<', '<') - return template.render(change_id=self.id, author=self.author, change_comment=self.comment, cypher_query=self.cypher) + return template.render( + change_id=self.id, + author=self.author, + change_comment=self.comment, + cypher_query=self.cypher, + ) class CustomChangeSet(ChangeSet): - def __init__(self, id, author, comment, cypher, - filename:str, - handler="edu.ucsd.sbrg.FileQueryHandler", - filetype='TSV', - startrow=1): + def __init__( + self, + id, + author, + comment, + cypher, + filename: str, + handler="edu.ucsd.sbrg.FileQueryHandler", + filetype='TSV', + startrow=1, + ): ChangeSet.__init__(self, id, author, comment, cypher) self.handler = handler self.filename = filename.replace('.tsv', '.zip') @@ -89,9 +104,17 @@ def __init__(self, id, author, comment, cypher, def create_changelog_str(self): template = get_template(custom_template) - return template.render(change_id=self.id, change_comment=self.comment, author=self.author, - handler_class=self.handler, cypher_query=self.cypher, data_file=self.filename, - start_at=self.start_at, file_type=self.filetype, params=CUSTOM_PARAMS) + return template.render( + change_id=self.id, + change_comment=self.comment, + author=self.author, + handler_class=self.handler, + cypher_query=self.cypher, + data_file=self.filename, + start_at=self.start_at, + file_type=self.filetype, + params=CUSTOM_PARAMS, + ) def generate_sql_changelog_file(id, author, comment, cypher, outfile): @@ -104,7 +127,9 @@ def generate_sql_changelog_file(id, author, comment, cypher, outfile): if __name__ == '__main__': cypher = 'match(n:Gene)-[r]-(:Gene) where r.score < 0.4 delete r;' comment = 'Remove ecocyc-plus string relationships with 0.4 threshold. After the update, create ecocyc-plus-10012021.dump file' - outfile = os.path.join('../../../migration/liquibase/ecocyc-plus/ecocyc-plus changelog-0010.xml') - generate_sql_changelog_file('LL-3702 cut string rels with threshold', 'robin cai', - comment, - cypher, outfile) + outfile = os.path.join( + '../../../migration/liquibase/ecocyc-plus/ecocyc-plus changelog-0010.xml' + ) + generate_sql_changelog_file( + 'LL-3702 cut string rels with threshold', 'robin cai', comment, cypher, outfile + ) diff --git a/graph-db/extractor/src/ncbi/ncbi_taxonomy_liquibase.py b/graph-db/extractor/src/ncbi/ncbi_taxonomy_liquibase.py index 72d628be25..e30d0aa70e 100644 --- a/graph-db/extractor/src/ncbi/ncbi_taxonomy_liquibase.py +++ b/graph-db/extractor/src/ncbi/ncbi_taxonomy_liquibase.py @@ -9,6 +9,7 @@ # reference to this directory directory = os.path.realpath(os.path.dirname(__file__)) + class NcbiTaxonomyChangeLog(ChangeLog): def __init__(self, author: str, change_id_prefix: str): super().__init__(author, change_id_prefix) @@ -28,8 +29,12 @@ def load_ncbi_taxonomy_nodes(self): if self.id_prefix: id = f'{self.id_prefix} {id}' comment = 'Load NCBI taxonomy nodes' - query = get_create_update_nodes_query(NODE_TAXONOMY, PROP_ID, NODE_ATTRS, [NODE_NCBI], datasource='NCBI Taxonomy') - changeset = CustomChangeSet(id, self.author, comment, query, f'{self.file_prefix}{NCBI_TAXONOMY_FILE}') + query = get_create_update_nodes_query( + NODE_TAXONOMY, PROP_ID, NODE_ATTRS, [NODE_NCBI], datasource='NCBI Taxonomy' + ) + changeset = CustomChangeSet( + id, self.author, comment, query, f'{self.file_prefix}{NCBI_TAXONOMY_FILE}' + ) self.change_sets.append(changeset) def load_ncbi_taxonomy_synonym_rels(self): @@ -37,8 +42,16 @@ def load_ncbi_taxonomy_synonym_rels(self): if self.id_prefix: id = f'{self.id_prefix} {id}' comment = 'Load NCBI gene taxonomy relationship' - query = get_create_synonym_relationships_query(NODE_TAXONOMY, PROP_ID, PROP_ID, PROP_NAME, [PROP_TYPE]) - changeset = CustomChangeSet(id, self.author, comment, query, f'{self.file_prefix}{NCBI_TAXONOMY_SYNONYM_FILE}') + query = get_create_synonym_relationships_query( + NODE_TAXONOMY, PROP_ID, PROP_ID, PROP_NAME, [PROP_TYPE] + ) + changeset = CustomChangeSet( + id, + self.author, + comment, + query, + f'{self.file_prefix}{NCBI_TAXONOMY_SYNONYM_FILE}', + ) self.change_sets.append(changeset) def load_ncbi_taxonomy_parent_rels(self): @@ -50,7 +63,9 @@ def load_ncbi_taxonomy_parent_rels(self): CALL apoc.periodic.iterate( 'MATCH (n:Taxonomy), (m:Taxonomy) WHERE m.prop = n.parent_id RETURN n, m', 'MERGE (n)-[:HAS_PARENT]->(m)', {batchSize:5000}) - """.replace('prop', PROP_ID) + """.replace( + 'prop', PROP_ID + ) changeset = ChangeSet(id, self.author, comment, query) self.change_sets.append(changeset) @@ -65,10 +80,28 @@ def set_species_id(self): def create_indexes(self): queries = [] - queries.append(get_create_constraint_query(NODE_TAXONOMY, PROP_ID, 'constraint_taxonomy_id') + ';') - queries.append(get_create_constraint_query(NODE_SYNONYM, PROP_NAME, 'constraint_synonym_name') + ';') - queries.append(get_create_index_query(NODE_TAXONOMY, PROP_NAME, 'index_taxonomy_name') + ';') - queries.append(get_create_index_query(NODE_TAXONOMY, 'species_id', 'index_taxonomy_speciesid') + ';') + queries.append( + get_create_constraint_query( + NODE_TAXONOMY, PROP_ID, 'constraint_taxonomy_id' + ) + + ';' + ) + queries.append( + get_create_constraint_query( + NODE_SYNONYM, PROP_NAME, 'constraint_synonym_name' + ) + + ';' + ) + queries.append( + get_create_index_query(NODE_TAXONOMY, PROP_NAME, 'index_taxonomy_name') + + ';' + ) + queries.append( + get_create_index_query( + NODE_TAXONOMY, 'species_id', 'index_taxonomy_speciesid' + ) + + ';' + ) return queries def add_index_change_set(self): diff --git a/graph-db/migrator/Dockerfile b/graph-db/migrator/Dockerfile index 2f15ee818e..0348bc17b9 100644 --- a/graph-db/migrator/Dockerfile +++ b/graph-db/migrator/Dockerfile @@ -20,23 +20,23 @@ RUN mvn -B clean package -DskipTests FROM liquibase/liquibase:$LIQUIBASE_IMAGE_TAG LABEL org.opencontainers.image.source https://github.com/SBRG/lifelike -ARG LIQUIBASE_NEO4J_VERSION=4.7.1.3 -ARG NEO4J_JDBC_VERSION=4.0.5 +ARG LIQUIBASE_ARANGODB_VERSION=4.7.1.3 +ARG ARANGODB_JDBC_VERSION=4.0.5 -# Install liquibase-neo4j extension -RUN lpm update && lpm add --global liquibase-neo4j@$LIQUIBASE_NEO4J_VERSION +# Install liquibase-arangodb extension +RUN lpm update && lpm add --global liquibase-arangodb@$LIQUIBASE_ARANGODB_VERSION -# Download Neo4j JDBC driver -ADD --chown=liquibase:liquibase https://github.com/neo4j-contrib/neo4j-jdbc/releases/download/${NEO4J_JDBC_VERSION}/neo4j-jdbc-driver-${NEO4J_JDBC_VERSION}.jar lib/ +# Download ArangoDB JDBC driver +ADD --chown=liquibase:liquibase https://github.com/arangodb-contrib/arangodb-jdbc/releases/download/${ARANGODB_JDBC_VERSION}/arangodb-jdbc-driver-${ARANGODB_JDBC_VERSION}.jar lib/ # Copy JAR from build stage COPY --from=build --chown=liquibase:liquibase /target/graphdb-migrator-*.jar ./lib -# Neo4j database configuration -ENV NEO4J_HOST= -ENV NEO4J_USERNAME=neo4j -ENV NEO4J_PASSWORD=neo4j -ENV NEO4J_DATABASE=neo4j +# ArangoDB database configuration +ENV ARANGODB_HOST= +ENV ARANGODB_USERNAME=arangodb +ENV ARANGODB_PASSWORD=arangodb +ENV ARANGODB_DATABASE=arangodb # Master changelog file. e.g. lifelike-graph/changelog-master.xml ENV CHANGELOG_FILE= diff --git a/graph-db/migrator/README.md b/graph-db/migrator/README.md index 2e019af219..b9aca9040f 100644 --- a/graph-db/migrator/README.md +++ b/graph-db/migrator/README.md @@ -1,6 +1,8 @@ +# Legacy - not used anymore + # Lifelike Graph DB migrator -Liquibase based migrator for Neo4j +Liquibase based migrator for ArangoDB ## Run with Docker @@ -9,8 +11,8 @@ To run migrations from Lifelike Docker registry latest image, you can run the fo ```bash # Update using bundled changelog files docker run --rm \ - --env NEO4J_HOST=neo4j:7687 \ - --env NEO4J_PASSWORD=password \ + --env ARANGODB_HOST=neo4j:7687 \ + --env ARANGODB_PASSWORD=password \ --env AZURE_ACCOUNT_STORAGE_NAME=lifelike \ --env AZURE_ACCOUNT_STORAGE_KEY= \ --env CHANGELOG_DIR=lifelike-graph \ @@ -28,10 +30,10 @@ docket run --rm \ | Variable | Default | Comment | | -------------------------- | -------------- | ----------------------------------- | | CHANGELOG_DIR | lifelike-graph | Master changelog directory path | -| NEO4J_HOST | | Host and port of Neo4j instance | -| NEO4J_USERNAME | neo4j | Neo4j authentication username | -| NEO4J_PASSWORD | password | Neo4j authentication password | -| NEO4J_DATABASE | neo4j | Neo4j target database | +| ARANGODB_HOST | | Host and port of ArangoDB instance | +| ARANGODB_USERNAME | neo4j | ArangoDB authentication username | +| ARANGODB_PASSWORD | password | ArangoDB authentication password | +| ARANGODB_DATABASE | neo4j | ArangoDB target database | | STORAGE_TYPE | azure | Remote storage type to get TSV data | | AZURE_ACCOUNT_STORAGE_NAME | | Azure Storage Account Name | | AZURE_ACCOUNT_STORAGE_KEY | | Azure Storage Key | @@ -39,8 +41,8 @@ docket run --rm \ ### Docker build arguments -| Argument | Default value | Comment | -| ----------------------- | ------------- | ------- | -| LIQUIBASE_IMAGE_TAG | 4.6 | Tag of the [Liquibase Docker image](https://hub.docker.com/r/liquibase/liquibase/tags) to use | -| LIQUIBASE_NEO4J_VERSION | 4.6.2 | [Liquibase-Neo4j plugin](https://github.com/liquibase/liquibase-neo4j) version to install | -| NEO4J_JDBC_VERSION | 4.0.5 | Neo4j JDBC driver version | +| Argument | Default value | Comment | +| -------------------------- | ------------- | --------------------------------------------------------------------------------------------- | +| LIQUIBASE_IMAGE_TAG | 4.6 | Tag of the [Liquibase Docker image](https://hub.docker.com/r/liquibase/liquibase/tags) to use | +| LIQUIBASE_ARANGODB_VERSION | 4.6.2 | [Liquibase-ArangoDB plugin](https://github.com/liquibase/liquibase-neo4j) version to install | +| ARANGODB_JDBC_VERSION | 4.0.5 | ArangoDB JDBC driver version | diff --git a/graph-db/migrator/docker-entrypoint.sh b/graph-db/migrator/docker-entrypoint.sh index 01c4e0149c..1b7b5095ab 100755 --- a/graph-db/migrator/docker-entrypoint.sh +++ b/graph-db/migrator/docker-entrypoint.sh @@ -13,12 +13,12 @@ else liquibase "$@" else ## Validate envrioment variables - if [ -z "$NEO4J_HOST" ]; then - echo "NEO4J_HOST environment variable is not set. Please set it to the hostname or IP address of the Neo4j server." + if [ -z "$ARANGODB_HOST" ]; then + echo "ARANGODB_HOST environment variable is not set. Please set it to the hostname or IP address of the ArangoDB server." exit 1 - elif [[ "$NEO4J_HOST" != *":"* ]]; then + elif [[ "$ARANGODB_HOST" != *":"* ]]; then ## If no port is specified, use the default one - NEO4J_HOST="$NEO4J_HOST:7687" + ARANGODB_HOST="$ARANGODB_HOST:7687" fi if [ "$STORAGE_TYPE" != "azure" ]; then echo "STORAGE_TYPE environment is set to an invalid valie. `azure` is currently only supported." @@ -33,21 +33,21 @@ else exit 1 fi - ## Wait until Neo4j is available - /wait-for-it.sh "$NEO4J_HOST" --timeout=600 -- echo "Neo4j is up" + ## Wait until ArangoDB is available + /wait-for-it.sh "$ARANGODB_HOST" --timeout=600 -- echo "ArangoDB is up" ## Include standard defaultsFile liquibase \ - --url="jdbc:neo4j:bolt://$NEO4J_HOST?database=${NEO4J_DATABASE:-neo4j}" \ - --username="$NEO4J_USERNAME" \ - --password="$NEO4J_PASSWORD" \ + --url="jdbc:neo4j:bolt://$ARANGODB_HOST?database=${ARANGODB_DATABASE:-neo4j}" \ + --username="$ARANGODB_USERNAME" \ + --password="$ARANGODB_PASSWORD" \ --changelog-file="$CHANGELOG_FILE" \ --log-level="$LOG_LEVEL" \ --defaults-file=/liquibase/liquibase.docker.properties \ "$@" \ - -Dneo4jHost="bolt://$NEO4J_HOST" \ - -Dneo4jCredentials="$NEO4J_USERNAME,$NEO4J_PASSWORD" \ - -Dneo4jDatabase="${NEO4J_DATABASE:-neo4j}" \ + -Dneo4jHost="bolt://$ARANGODB_HOST" \ + -Dneo4jCredentials="$ARANGODB_USERNAME,$ARANGODB_PASSWORD" \ + -Dneo4jDatabase="${ARANGODB_DATABASE:-neo4j}" \ -DazureStorageName="$AZURE_ACCOUNT_STORAGE_NAME" \ -DazureStorageKey="$AZURE_ACCOUNT_STORAGE_KEY" \ -DlocalSaveFileDir=/tmp \ diff --git a/helm/lifelike/Chart.lock b/helm/lifelike/Chart.lock index d89a3eebfc..b93530d606 100644 --- a/helm/lifelike/Chart.lock +++ b/helm/lifelike/Chart.lock @@ -2,9 +2,9 @@ dependencies: - name: postgresql repository: https://charts.bitnami.com/bitnami version: 11.0.4 -- name: neo4j - repository: https://neo4j-contrib.github.io/neo4j-helm - version: 4.4.3 +- name: kube-arangodb + repository: https://github.com/arangodb/kube-arangodb + version: 1.2.39 - name: elasticsearch repository: https://helm.elastic.co version: 7.16.3 diff --git a/helm/lifelike/Chart.yaml b/helm/lifelike/Chart.yaml index ee409bf66f..e69ad794cc 100644 --- a/helm/lifelike/Chart.yaml +++ b/helm/lifelike/Chart.yaml @@ -25,10 +25,10 @@ dependencies: repository: https://charts.bitnami.com/bitnami version: 11.0.4 condition: postgresql.enabled - - name: neo4j + - name: kube-arangodb version: 4.4.3 - repository: https://neo4j-contrib.github.io/neo4j-helm - condition: neo4j.enabled + repository: https://github.com/arangodb/kube-arangodb + condition: 1.2.39 - name: elasticsearch version: 7.16.3 repository: https://helm.elastic.co diff --git a/helm/lifelike/README.md b/helm/lifelike/README.md index 25051e94bc..041ba87563 100644 --- a/helm/lifelike/README.md +++ b/helm/lifelike/README.md @@ -17,47 +17,48 @@ helm install lifelike lifelike/lifelike Kubernetes: `>=1.20.0-0` -| Repository | Name | Version | -|------------|------|---------| -| https://charts.bitnami.com/bitnami | postgresql | 11.0.4 | -| https://charts.bitnami.com/bitnami | redis | 16.2.0 | -| https://helm.elastic.co | elasticsearch | 7.16.3 | -| https://neo4j-contrib.github.io/neo4j-helm | neo4j | 4.4.3 | +| Repository | Name | Version | +| ----------------------------------------- | ------------- | ------- | +| https://charts.bitnami.com/bitnami | postgresql | 11.0.4 | +| https://charts.bitnami.com/bitnami | redis | 16.2.0 | +| https://helm.elastic.co | elasticsearch | 7.16.3 | +| https://github.com/arangodb/kube-arangodb | kube-arangodb | 1.2.39 | ## Values -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| ingress | object | `{"annotations":{},"className":"","enabled":false,"hostname":"lifelike.local","tls":[]}` | --------------------------------------------------------------------------- | -| api | object | `{"autoScaling":{"enabled":false,"maxReplicas":4,"minReplicas":2,"targetCPUUtilizationPercentage":80,"targetMemoryUtilizationPercentage":80},"dbWaiter":{"image":{"imagePullPolicy":"IfNotPresent","repository":"willwill/wait-for-it","tag":"latest"},"timeoutSeconds":30},"extraEnv":{"INITIAL_ADMIN_EMAIL":"admin@example.com"},"extraVolumeMounts":[],"extraVolumes":[],"image":{"repository":"ghcr.io/sbrg/lifelike-appserver","tag":""},"livenessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/meta","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"lmdb":{"loadEnabled":false},"podSecurityContext":{"runAsUser":0},"readinessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/meta","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"replicaCount":1,"resources":{"requests":{"ephemeral-storage":"8Gi"}},"secret":"secret","service":{"port":5000,"type":"ClusterIP"},"strategyType":"RollingUpdate"}` | ---------------------------------------------------------------------------- | -| api.extraEnv | object | `{"INITIAL_ADMIN_EMAIL":"admin@example.com"}` | Extra environment variables to pass to the appserver | -| api.lmdb.loadEnabled | bool | `false` | Load LMDB data from storage when initializing | -| api.replicaCount | int | `1` | Number of replicas running the appserver | -| api.autoScaling.enabled | bool | `false` | If enabled, value at api.replicaCount will be ignored | -| api.strategyType | string | `"RollingUpdate"` | if using some PV that does not support readWriteMany, set this to 'Recreate' | -| api.resources | object | `{"requests":{"ephemeral-storage":"8Gi"}}` | Optional resources requests and limits | -| frontend | object | `{"autoScaling":{"enabled":false,"maxReplicas":5,"minReplicas":2,"targetCPUUtilizationPercentage":80,"targetMemoryUtilizationPercentage":80},"image":{"repository":"ghcr.io/sbrg/lifelike-frontend","tag":""},"livenessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"readinessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"replicaCount":1,"resources":{},"service":{"port":80,"type":"ClusterIP"}}` | ---------------------------------------------------------------------------- | -| statisticalEnrichment | object | `{"image":{"repository":"ghcr.io/sbrg/lifelike-statistical-enrichment","tag":""},"livenessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/healthz","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"readinessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/healthz","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"replicaCount":1,"resources":{},"service":{"port":5000,"type":"ClusterIP"}}` | ---------------------------------------------------------------------------- | -| pdfparser | object | `{"autoScaling":{"enabled":false,"maxReplicas":4,"minReplicas":2,"targetCPUUtilizationPercentage":80,"targetMemoryUtilizationPercentage":80},"image":{"repository":"ghcr.io/sbrg/lifelike-pdfparser","tag":"latest"},"livenessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"readinessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"replicaCount":1,"resources":{},"service":{"port":7600,"type":"ClusterIP"}}` | ---------------------------------------------------------------------------- | -| postgresqlExternal | object | `{"database":"postgres","existingSecret":"","host":"postgres.local","password":"password","port":5432,"user":"postgres"}` | ---------------------------------------------------------------------------- | -| neo4jExternal.host | string | `"neo4j.local"` | | -| neo4jExternal.port | int | `7687` | | -| neo4jExternal.user | string | `"neo4j"` | | -| neo4jExternal.password | string | `"password"` | | -| neo4jExternal.database | string | `"neo4j"` | | -| redisExternal.host | string | `"redis.local"` | | -| redisExternal.port | int | `6379` | | -| redisExternal.password | string | `""` | | -| elasticsearchExternal.host | string | `"elasticsearch.local"` | | -| elasticsearchExternal.port | int | `9200` | | -| elasticsearchExternal.user | string | `""` | | -| elasticsearchExternal.password | string | `""` | | -| elasticsearchExternal.ssl | bool | `false` | | -| postgresql | object | `{"auth":{"database":"database","postgresPassword":"password"},"enabled":true}` | ---------------------------------------------------------------------------- | -| postgresql.enabled | bool | `true` | Set to false to disable automatic deployment of PostgreSQL | -| neo4j | object | `{"core":{"numberOfServers":1,"persistentVolume":{"size":"100Gi"},"standalone":true},"enabled":true,"imageTag":"4.4.3-community","neo4jPassword":"password"}` | ---------------------------------------------------------------------------- | -| elasticsearch | object | `{"enabled":true,"esConfig":{"elasticsearch.yml":"node.store.allow_mmap: false\n"},"fullnameOverride":"elasticsearch","image":"ghcr.io/sbrg/lifelike-elasticsearch","imageTag":"7.16.3","volumeClaimTemplate":{"resources":{"requests":{"storage":"30Gi"}}}}` | ---------------------------------------------------------------------------- | -| redis | object | `{"auth":{"password":"password"},"commonConfiguration":"# Disable persistence to disk\nsave \"\"\n# Disable AOF https://redis.io/topics/persistence#append-only-file\nappendonly no","enabled":true,"master":{"extraFlags":["--maxmemory-policy allkeys-lru"],"persistence":{"enabled":false}},"replica":{"extraFlags":["--maxmemory-policy allkeys-lru"],"persistence":{"enabled":false}}}` | ---------------------------------------------------------------------------- | - ----------------------------------------------- +| Key | Type | Default | Description | +| ------------------------------ | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | +| ingress | object | `{"annotations":{},"className":"","enabled":false,"hostname":"lifelike.local","tls":[]}` | --------------------------------------------------------------------------- | +| api | object | `{"autoScaling":{"enabled":false,"maxReplicas":4,"minReplicas":2,"targetCPUUtilizationPercentage":80,"targetMemoryUtilizationPercentage":80},"dbWaiter":{"image":{"imagePullPolicy":"IfNotPresent","repository":"willwill/wait-for-it","tag":"latest"},"timeoutSeconds":30},"extraEnv":{"INITIAL_ADMIN_EMAIL":"admin@example.com"},"extraVolumeMounts":[],"extraVolumes":[],"image":{"repository":"ghcr.io/sbrg/lifelike-appserver","tag":""},"livenessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/meta","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"lmdb":{"loadEnabled":false},"podSecurityContext":{"runAsUser":0},"readinessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/meta","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"replicaCount":1,"resources":{"requests":{"ephemeral-storage":"8Gi"}},"secret":"secret","service":{"port":5000,"type":"ClusterIP"},"strategyType":"RollingUpdate"}` | ---------------------------------------------------------------------------- | +| api.extraEnv | object | `{"INITIAL_ADMIN_EMAIL":"admin@example.com"}` | Extra environment variables to pass to the appserver | +| api.lmdb.loadEnabled | bool | `false` | Load LMDB data from storage when initializing | +| api.replicaCount | int | `1` | Number of replicas running the appserver | +| api.autoScaling.enabled | bool | `false` | If enabled, value at api.replicaCount will be ignored | +| api.strategyType | string | `"RollingUpdate"` | if using some PV that does not support readWriteMany, set this to 'Recreate' | +| api.resources | object | `{"requests":{"ephemeral-storage":"8Gi"}}` | Optional resources requests and limits | +| frontend | object | `{"autoScaling":{"enabled":false,"maxReplicas":5,"minReplicas":2,"targetCPUUtilizationPercentage":80,"targetMemoryUtilizationPercentage":80},"image":{"repository":"ghcr.io/sbrg/lifelike-frontend","tag":""},"livenessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"readinessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"replicaCount":1,"resources":{},"service":{"port":80,"type":"ClusterIP"}}` | ---------------------------------------------------------------------------- | +| statisticalEnrichment | object | `{"image":{"repository":"ghcr.io/sbrg/lifelike-statistical-enrichment","tag":""},"livenessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/healthz","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"readinessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/healthz","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"replicaCount":1,"resources":{},"service":{"port":5000,"type":"ClusterIP"}}` | ---------------------------------------------------------------------------- | +| pdfparser | object | `{"autoScaling":{"enabled":false,"maxReplicas":4,"minReplicas":2,"targetCPUUtilizationPercentage":80,"targetMemoryUtilizationPercentage":80},"image":{"repository":"ghcr.io/sbrg/lifelike-pdfparser","tag":"latest"},"livenessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"readinessProbe":{"enabled":true,"failureThreshold":20,"initialDelaySeconds":20,"path":"/","periodSeconds":10,"successThreshold":1,"timeoutSeconds":10},"replicaCount":1,"resources":{},"service":{"port":7600,"type":"ClusterIP"}}` | ---------------------------------------------------------------------------- | +| postgresqlExternal | object | `{"database":"postgres","existingSecret":"","host":"postgres.local","password":"password","port":5432,"user":"postgres"}` | ---------------------------------------------------------------------------- | +| arangodbExternal.host | string | `"arangodb.local"` | | +| arangodbExternal.port | int | `7687` | | +| arangodbExternal.user | string | `"arangodb"` | | +| arangodbExternal.password | string | `"password"` | | +| arangodbExternal.database | string | `"arangodb"` | | +| redisExternal.host | string | `"redis.local"` | | +| redisExternal.port | int | `6379` | | +| redisExternal.password | string | `""` | | +| elasticsearchExternal.host | string | `"elasticsearch.local"` | | +| elasticsearchExternal.port | int | `9200` | | +| elasticsearchExternal.user | string | `""` | | +| elasticsearchExternal.password | string | `""` | | +| elasticsearchExternal.ssl | bool | `false` | | +| postgresql | object | `{"auth":{"database":"database","postgresPassword":"password"},"enabled":true}` | ---------------------------------------------------------------------------- | +| postgresql.enabled | bool | `true` | Set to false to disable automatic deployment of PostgreSQL | +| arangodb | object | `{"core":{"numberOfServers":1,"persistentVolume":{"size":"100Gi"},"standalone":true},"enabled":true,"imageTag":"4.4.3-community","arangodbPassword":"password"}` | ---------------------------------------------------------------------------- | +| elasticsearch | object | `{"enabled":true,"esConfig":{"elasticsearch.yml":"node.store.allow_mmap: false\n"},"fullnameOverride":"elasticsearch","image":"ghcr.io/sbrg/lifelike-elasticsearch","imageTag":"7.16.3","volumeClaimTemplate":{"resources":{"requests":{"storage":"30Gi"}}}}` | ---------------------------------------------------------------------------- | +| redis | object | `{"auth":{"password":"password"},"commonConfiguration":"# Disable persistence to disk\nsave \"\"\n# Disable AOF https://redis.io/topics/persistence#append-only-file\nappendonly no","enabled":true,"master":{"extraFlags":["--maxmemory-policy allkeys-lru"],"persistence":{"enabled":false}},"replica":{"extraFlags":["--maxmemory-policy allkeys-lru"],"persistence":{"enabled":false}}}` | ---------------------------------------------------------------------------- | + +--- + Autogenerated from chart metadata using [helm-docs v1.7.0](https://github.com/norwoodj/helm-docs/releases/v1.7.0) diff --git a/helm/lifelike/examples/external-db.yaml b/helm/lifelike/examples/external-db.yaml index 69fbebdb47..082e943463 100644 --- a/helm/lifelike/examples/external-db.yaml +++ b/helm/lifelike/examples/external-db.yaml @@ -1,7 +1,6 @@ ## This is an example values files for installing Lifelike Helm chart ## connecting to external existing database services -## instead of installing the embedded subcharts (postgres, neo4j and elasticsearch) - +## instead of installing the embedded subcharts (postgres, arangodb and elasticsearch) # Disable the embedded PostgreSQL subchart postgresql: @@ -14,14 +13,14 @@ postgresqlExternal: database: postgres password: password -# Disable embedded Neo4j subchart -neo4j: +# Disable embedded ArangoDB subchart +arangodb: enabled: false -# Configure external Neo4j database -neo4jExternal: - host: neo4j.local +# Configure external ArangoDB database +arangodbExternal: + host: arangodb.local port: 7687 - user: neo4j + user: arangodb password: password # Disable embedded Elasticsearch subchart diff --git a/helm/lifelike/templates/_helpers.tpl b/helm/lifelike/templates/_helpers.tpl index 7823c85cb0..09defdf5ea 100644 --- a/helm/lifelike/templates/_helpers.tpl +++ b/helm/lifelike/templates/_helpers.tpl @@ -163,18 +163,18 @@ Set postgres secretKey {{/* ---------------------------------------------------------------------- */}} -{{/* Neo4J */}} +{{/* ArangoDB */}} {{/* ---------------------------------------------------------------------- */}} {{/* Create a default fully qualified app name. We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). */}} -{{- define "lifelike.neo4j.fullname" -}} -{{- if .Values.neo4j.fullnameOverride -}} -{{- .Values.neo4j.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- define "lifelike.arangodb.fullname" -}} +{{- if .Values.arangodb.fullnameOverride -}} +{{- .Values.arangodb.fullnameOverride | trunc 63 | trimSuffix "-" -}} {{- else -}} -{{- $name := default "neo4j" .Values.neo4j.nameOverride -}} +{{- $name := default "arangodb" .Values.arangodb.nameOverride -}} {{- if contains $name .Release.Name -}} {{- .Release.Name | trunc 63 | trimSuffix "-" -}} {{- else -}} @@ -184,57 +184,57 @@ We truncate at 63 chars because some Kubernetes name fields are limited to this {{- end -}} {{/* -Return the Neo4j hostname +Return the ArangoDB hostname */}} -{{- define "lifelike.neo4jHost" -}} -{{- if .Values.neo4j.enabled }} - {{- printf "%s" (include "lifelike.neo4j.fullname" .) -}} +{{- define "lifelike.arangodbHost" -}} +{{- if .Values.arangodb.enabled }} + {{- printf "%s" (include "lifelike.arangodb.fullname" .) -}} {{- else -}} - {{- printf "%s" .Values.neo4jExternal.host -}} + {{- printf "%s" .Values.arangodbExternal.host -}} {{- end -}} {{- end -}} {{/* -Return the Neo4j port +Return the ArangoDB port */}} -{{- define "lifelike.neo4jPort" -}} -{{- if .Values.neo4j.enabled }} +{{- define "lifelike.arangodbPort" -}} +{{- if .Values.arangodb.enabled }} {{- printf "7687" -}} {{- else -}} - {{- .Values.neo4jExternal.port -}} + {{- .Values.arangodbExternal.port -}} {{- end -}} {{- end -}} {{/* -Return the Neo4j user +Return the ArangoDB user */}} -{{- define "lifelike.neo4jUser" -}} -{{- if .Values.neo4j.enabled }} - {{- printf "neo4j" -}} +{{- define "lifelike.arangodbUser" -}} +{{- if .Values.arangodb.enabled }} + {{- printf "arangodb" -}} {{- else -}} - {{- printf "%s" .Values.neo4jExternal.user -}} + {{- printf "%s" .Values.arangodbExternal.user -}} {{- end -}} {{- end -}} {{/* -Return the Neo4j password +Return the ArangoDB password */}} -{{- define "lifelike.neo4jPassword" -}} -{{- if .Values.neo4j.enabled }} - {{- printf "%s" .Values.neo4j.neo4jPassword -}} +{{- define "lifelike.arangodbPassword" -}} +{{- if .Values.arangodb.enabled }} + {{- printf "%s" .Values.arangodb.arangodbPassword -}} {{- else -}} - {{- printf "%s" .Values.neo4jExternal.password -}} + {{- printf "%s" .Values.arangodbExternal.password -}} {{- end -}} {{- end -}} {{/* -Return the Neo4j database name +Return the ArangoDB database name */}} -{{- define "lifelike.neo4jDatabase" -}} -{{- if .Values.neo4j.enabled }} - {{- default "neo4j" .Values.neo4j.defaultDatabase -}} +{{- define "lifelike.arangodbDatabase" -}} +{{- if .Values.arangodb.enabled }} + {{- default "arangodb" .Values.arangodb.defaultDatabase -}} {{- else -}} - {{- default "neo4j" .Values.neo4jExternal.database -}} + {{- default "arangodb" .Values.arangodbExternal.database -}} {{- end -}} {{- end -}} @@ -282,7 +282,7 @@ Return the Elasticsearch port {{- end -}} {{/* -Return the Neo4j user +Return the ArangoDB user */}} {{- define "lifelike.elasticsearchUser" -}} {{- if .Values.elasticsearch.enabled }} @@ -472,17 +472,17 @@ PostgreSQL environment variables helper {{/* -Neo4j environment variables helper +ArangoDB environment variables helper */}} -{{- define "lifelike.neo4jEnv" -}} -- name: NEO4J_HOST - value: {{ template "lifelike.neo4jHost" . }} -- name: NEO4J_PORT - value: {{ include "lifelike.neo4jPort" . | quote }} -- name: NEO4J_AUTH - value: {{ template "lifelike.neo4jUser" . }}/{{ template "lifelike.neo4jPassword" . }} -- name: NEO4J_DB - value: {{ template "lifelike.neo4jDatabase" . }} +{{- define "lifelike.arangodbEnv" -}} +- name: ARANGODB_HOST + value: {{ template "lifelike.arangodbHost" . }} +- name: ARANGODB_PORT + value: {{ include "lifelike.arangodbPort" . | quote }} +- name: ARANGODB_AUTH + value: {{ template "lifelike.arangodbUser" . }}/{{ template "lifelike.arangodbPassword" . }} +- name: ARANGODB_DB + value: {{ template "lifelike.arangodbDatabase" . }} {{- end -}} diff --git a/helm/lifelike/templates/api/deployment.yaml b/helm/lifelike/templates/api/deployment.yaml index 3b2f5dc0cd..38b09da121 100644 --- a/helm/lifelike/templates/api/deployment.yaml +++ b/helm/lifelike/templates/api/deployment.yaml @@ -47,14 +47,14 @@ spec: - --port={{ template "lifelike.postgresqlPort" . }} - --timeout={{ default 30 .Values.api.dbWaiter.timeoutSeconds }} {{- end }} - {{- if .Values.neo4j.enabled }} - - name: wait-for-neo4j + {{- if .Values.arangodb.enabled }} + - name: wait-for-arangodb image: {{ .Values.api.dbWaiter.image.repository }}:{{ .Values.api.dbWaiter.image.tag }} imagePullPolicy: {{ default "IfNotPresent" .Values.api.dbWaiter.image.imagePullPolicy }} command: - /wait-for-it.sh - - --host={{ template "lifelike.neo4jHost" . }} - - --port={{ template "lifelike.neo4jPort" . }} + - --host={{ template "lifelike.arangodbHost" . }} + - --port={{ template "lifelike.arangodbPort" . }} - --timeout={{ default 30 .Values.api.dbWaiter.timeoutSeconds }} {{- end }} - name: migrate-db diff --git a/helm/lifelike/values.schema.json b/helm/lifelike/values.schema.json index 9fc39fb453..f160f50409 100644 --- a/helm/lifelike/values.schema.json +++ b/helm/lifelike/values.schema.json @@ -1,723 +1,723 @@ { - "$schema": "http://json-schema.org/schema#", - "type": "object", - "properties": { - "api": { - "type": "object", - "properties": { - "autoScaling": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - }, - "maxReplicas": { - "type": "integer" - }, - "minReplicas": { - "type": "integer" - }, - "targetCPUUtilizationPercentage": { - "type": "integer" - }, - "targetMemoryUtilizationPercentage": { - "type": "integer" - } - } - }, - "dbWaiter": { - "type": "object", - "properties": { - "image": { - "type": "object", - "properties": { - "imagePullPolicy": { - "type": "string" - }, - "repository": { - "type": "string" - }, - "tag": { - "type": "string" - } - } - }, - "timeoutSeconds": { - "type": "integer" - } - } - }, - "extraEnv": { - "type": "object", - "properties": { - "INITIAL_ADMIN_EMAIL": { - "type": "string" - } - } - }, - "extraVolumeMounts": { - "type": "array" - }, - "extraVolumes": { - "type": "array" - }, - "image": { - "type": "object", - "properties": { - "repository": { - "type": "string" - }, - "tag": { - "type": "string" - } - } - }, - "livenessProbe": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - }, - "failureThreshold": { - "type": "integer" - }, - "initialDelaySeconds": { - "type": "integer" - }, - "path": { - "type": "string" - }, - "periodSeconds": { - "type": "integer" - }, - "successThreshold": { - "type": "integer" - }, - "timeoutSeconds": { - "type": "integer" - } - } - }, - "lmdb": { - "type": "object", - "properties": { - "loadEnabled": { - "type": "boolean" - } - } - }, - "podSecurityContext": { - "type": "object", - "properties": { - "runAsUser": { - "type": "integer" - } - } - }, - "readinessProbe": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - }, - "failureThreshold": { - "type": "integer" - }, - "initialDelaySeconds": { - "type": "integer" - }, - "path": { - "type": "string" - }, - "periodSeconds": { - "type": "integer" - }, - "successThreshold": { - "type": "integer" - }, - "timeoutSeconds": { - "type": "integer" - } - } - }, - "replicaCount": { - "type": "integer" - }, - "resources": { - "type": "object", - "properties": { - "requests": { - "type": "object", - "properties": { - "ephemeral-storage": { - "type": "string" - } - } - } - } - }, - "secret": { - "type": "string" - }, - "service": { - "type": "object", - "properties": { - "port": { - "type": "integer" - }, - "type": { - "type": "string" - } - } - }, - "strategyType": { - "type": "string" - } + "$schema": "http://json-schema.org/schema#", + "type": "object", + "properties": { + "api": { + "type": "object", + "properties": { + "autoScaling": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "maxReplicas": { + "type": "integer" + }, + "minReplicas": { + "type": "integer" + }, + "targetCPUUtilizationPercentage": { + "type": "integer" + }, + "targetMemoryUtilizationPercentage": { + "type": "integer" } + } }, - "elasticsearch": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - }, - "esConfig": { - "type": "object", - "properties": { - "elasticsearch.yml": { - "type": "string" - } - } - }, - "fullnameOverride": { - "type": "string" - }, - "image": { - "type": "string" - }, - "imageTag": { - "type": "string" - }, - "volumeClaimTemplate": { - "type": "object", - "properties": { - "resources": { - "type": "object", - "properties": { - "requests": { - "type": "object", - "properties": { - "storage": { - "type": "string" - } - } - } - } - } - } + "dbWaiter": { + "type": "object", + "properties": { + "image": { + "type": "object", + "properties": { + "imagePullPolicy": { + "type": "string" + }, + "repository": { + "type": "string" + }, + "tag": { + "type": "string" } + } + }, + "timeoutSeconds": { + "type": "integer" } + } }, - "elasticsearchExternal": { - "type": "object", - "properties": { - "host": { - "type": "string" - }, - "password": { - "type": "string" - }, - "port": { - "type": "integer" - }, - "ssl": { - "type": "boolean" - }, - "user": { - "type": "string" - } + "extraEnv": { + "type": "object", + "properties": { + "INITIAL_ADMIN_EMAIL": { + "type": "string" } + } }, - "frontend": { - "type": "object", - "properties": { - "autoScaling": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - }, - "maxReplicas": { - "type": "integer" - }, - "minReplicas": { - "type": "integer" - }, - "targetCPUUtilizationPercentage": { - "type": "integer" - }, - "targetMemoryUtilizationPercentage": { - "type": "integer" - } - } - }, - "image": { - "type": "object", - "properties": { - "repository": { - "type": "string" - }, - "tag": { - "type": "string" - } - } - }, - "livenessProbe": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - }, - "failureThreshold": { - "type": "integer" - }, - "initialDelaySeconds": { - "type": "integer" - }, - "path": { - "type": "string" - }, - "periodSeconds": { - "type": "integer" - }, - "successThreshold": { - "type": "integer" - }, - "timeoutSeconds": { - "type": "integer" - } - } - }, - "readinessProbe": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - }, - "failureThreshold": { - "type": "integer" - }, - "initialDelaySeconds": { - "type": "integer" - }, - "path": { - "type": "string" - }, - "periodSeconds": { - "type": "integer" - }, - "successThreshold": { - "type": "integer" - }, - "timeoutSeconds": { - "type": "integer" - } - } - }, - "replicaCount": { - "type": "integer" - }, - "resources": { - "type": "object" - }, - "service": { - "type": "object", - "properties": { - "port": { - "type": "integer" - }, - "type": { - "type": "string" - } - } - } + "extraVolumeMounts": { + "type": "array" + }, + "extraVolumes": { + "type": "array" + }, + "image": { + "type": "object", + "properties": { + "repository": { + "type": "string" + }, + "tag": { + "type": "string" } + } }, - "ingress": { - "type": "object", - "properties": { - "annotations": { - "type": "object" - }, - "className": { - "type": "string" - }, - "enabled": { - "type": "boolean" - }, - "hostname": { - "type": "string" - }, - "tls": { - "type": "array" - } + "livenessProbe": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "failureThreshold": { + "type": "integer" + }, + "initialDelaySeconds": { + "type": "integer" + }, + "path": { + "type": "string" + }, + "periodSeconds": { + "type": "integer" + }, + "successThreshold": { + "type": "integer" + }, + "timeoutSeconds": { + "type": "integer" } + } }, - "neo4j": { - "type": "object", - "properties": { - "core": { - "type": "object", - "properties": { - "numberOfServers": { - "type": "integer" - }, - "persistentVolume": { - "type": "object", - "properties": { - "size": { - "type": "string" - } - } - }, - "standalone": { - "type": "boolean" - } - } - }, - "enabled": { - "type": "boolean" - }, - "imageTag": { - "type": "string" - }, - "neo4jPassword": { - "type": "string" - } + "lmdb": { + "type": "object", + "properties": { + "loadEnabled": { + "type": "boolean" } + } }, - "neo4jExternal": { - "type": "object", - "properties": { - "database": { - "type": "string" - }, - "host": { - "type": "string" - }, - "password": { - "type": "string" - }, - "port": { - "type": "integer" - }, - "user": { - "type": "string" - } + "podSecurityContext": { + "type": "object", + "properties": { + "runAsUser": { + "type": "integer" } + } }, - "pdfparser": { - "type": "object", - "properties": { - "autoScaling": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - }, - "maxReplicas": { - "type": "integer" - }, - "minReplicas": { - "type": "integer" - }, - "targetCPUUtilizationPercentage": { - "type": "integer" - }, - "targetMemoryUtilizationPercentage": { - "type": "integer" - } - } - }, - "image": { - "type": "object", - "properties": { - "repository": { - "type": "string" - }, - "tag": { - "type": "string" - } - } - }, - "livenessProbe": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - }, - "failureThreshold": { - "type": "integer" - }, - "initialDelaySeconds": { - "type": "integer" - }, - "path": { - "type": "string" - }, - "periodSeconds": { - "type": "integer" - }, - "successThreshold": { - "type": "integer" - }, - "timeoutSeconds": { - "type": "integer" - } - } - }, - "readinessProbe": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - }, - "failureThreshold": { - "type": "integer" - }, - "initialDelaySeconds": { - "type": "integer" - }, - "path": { - "type": "string" - }, - "periodSeconds": { - "type": "integer" - }, - "successThreshold": { - "type": "integer" - }, - "timeoutSeconds": { - "type": "integer" - } - } - }, - "replicaCount": { - "type": "integer" - }, - "resources": { - "type": "object" - }, - "service": { - "type": "object", - "properties": { - "port": { - "type": "integer" - }, - "type": { - "type": "string" - } - } + "readinessProbe": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "failureThreshold": { + "type": "integer" + }, + "initialDelaySeconds": { + "type": "integer" + }, + "path": { + "type": "string" + }, + "periodSeconds": { + "type": "integer" + }, + "successThreshold": { + "type": "integer" + }, + "timeoutSeconds": { + "type": "integer" + } + } + }, + "replicaCount": { + "type": "integer" + }, + "resources": { + "type": "object", + "properties": { + "requests": { + "type": "object", + "properties": { + "ephemeral-storage": { + "type": "string" } + } + } + } + }, + "secret": { + "type": "string" + }, + "service": { + "type": "object", + "properties": { + "port": { + "type": "integer" + }, + "type": { + "type": "string" } + } + }, + "strategyType": { + "type": "string" + } + } + }, + "elasticsearch": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "esConfig": { + "type": "object", + "properties": { + "elasticsearch.yml": { + "type": "string" + } + } + }, + "fullnameOverride": { + "type": "string" + }, + "image": { + "type": "string" + }, + "imageTag": { + "type": "string" }, - "postgresql": { - "type": "object", - "properties": { - "auth": { - "type": "object", - "properties": { - "database": { - "type": "string" - }, - "postgresPassword": { - "type": "string" - } + "volumeClaimTemplate": { + "type": "object", + "properties": { + "resources": { + "type": "object", + "properties": { + "requests": { + "type": "object", + "properties": { + "storage": { + "type": "string" } - }, - "enabled": { - "type": "boolean" + } } + } } + } + } + } + }, + "elasticsearchExternal": { + "type": "object", + "properties": { + "host": { + "type": "string" }, - "postgresqlExternal": { - "type": "object", - "properties": { - "database": { - "type": "string" - }, - "existingSecret": { - "type": "string" - }, - "host": { - "type": "string" - }, - "password": { - "type": "string" - }, - "port": { - "type": "integer" - }, - "user": { - "type": "string" + "password": { + "type": "string" + }, + "port": { + "type": "integer" + }, + "ssl": { + "type": "boolean" + }, + "user": { + "type": "string" + } + } + }, + "frontend": { + "type": "object", + "properties": { + "autoScaling": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "maxReplicas": { + "type": "integer" + }, + "minReplicas": { + "type": "integer" + }, + "targetCPUUtilizationPercentage": { + "type": "integer" + }, + "targetMemoryUtilizationPercentage": { + "type": "integer" + } + } + }, + "image": { + "type": "object", + "properties": { + "repository": { + "type": "string" + }, + "tag": { + "type": "string" + } + } + }, + "livenessProbe": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "failureThreshold": { + "type": "integer" + }, + "initialDelaySeconds": { + "type": "integer" + }, + "path": { + "type": "string" + }, + "periodSeconds": { + "type": "integer" + }, + "successThreshold": { + "type": "integer" + }, + "timeoutSeconds": { + "type": "integer" + } + } + }, + "readinessProbe": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "failureThreshold": { + "type": "integer" + }, + "initialDelaySeconds": { + "type": "integer" + }, + "path": { + "type": "string" + }, + "periodSeconds": { + "type": "integer" + }, + "successThreshold": { + "type": "integer" + }, + "timeoutSeconds": { + "type": "integer" + } + } + }, + "replicaCount": { + "type": "integer" + }, + "resources": { + "type": "object" + }, + "service": { + "type": "object", + "properties": { + "port": { + "type": "integer" + }, + "type": { + "type": "string" + } + } + } + } + }, + "ingress": { + "type": "object", + "properties": { + "annotations": { + "type": "object" + }, + "className": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "hostname": { + "type": "string" + }, + "tls": { + "type": "array" + } + } + }, + "arangodb": { + "type": "object", + "properties": { + "core": { + "type": "object", + "properties": { + "numberOfServers": { + "type": "integer" + }, + "persistentVolume": { + "type": "object", + "properties": { + "size": { + "type": "string" } + } + }, + "standalone": { + "type": "boolean" } + } }, - "redis": { - "type": "object", - "properties": { - "auth": { - "type": "object", - "properties": { - "password": { - "type": "string" - } - } - }, - "commonConfiguration": { - "type": "string" - }, + "enabled": { + "type": "boolean" + }, + "imageTag": { + "type": "string" + }, + "arangodbPassword": { + "type": "string" + } + } + }, + "arangodbExternal": { + "type": "object", + "properties": { + "database": { + "type": "string" + }, + "host": { + "type": "string" + }, + "password": { + "type": "string" + }, + "port": { + "type": "integer" + }, + "user": { + "type": "string" + } + } + }, + "pdfparser": { + "type": "object", + "properties": { + "autoScaling": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "maxReplicas": { + "type": "integer" + }, + "minReplicas": { + "type": "integer" + }, + "targetCPUUtilizationPercentage": { + "type": "integer" + }, + "targetMemoryUtilizationPercentage": { + "type": "integer" + } + } + }, + "image": { + "type": "object", + "properties": { + "repository": { + "type": "string" + }, + "tag": { + "type": "string" + } + } + }, + "livenessProbe": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "failureThreshold": { + "type": "integer" + }, + "initialDelaySeconds": { + "type": "integer" + }, + "path": { + "type": "string" + }, + "periodSeconds": { + "type": "integer" + }, + "successThreshold": { + "type": "integer" + }, + "timeoutSeconds": { + "type": "integer" + } + } + }, + "readinessProbe": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "failureThreshold": { + "type": "integer" + }, + "initialDelaySeconds": { + "type": "integer" + }, + "path": { + "type": "string" + }, + "periodSeconds": { + "type": "integer" + }, + "successThreshold": { + "type": "integer" + }, + "timeoutSeconds": { + "type": "integer" + } + } + }, + "replicaCount": { + "type": "integer" + }, + "resources": { + "type": "object" + }, + "service": { + "type": "object", + "properties": { + "port": { + "type": "integer" + }, + "type": { + "type": "string" + } + } + } + } + }, + "postgresql": { + "type": "object", + "properties": { + "auth": { + "type": "object", + "properties": { + "database": { + "type": "string" + }, + "postgresPassword": { + "type": "string" + } + } + }, + "enabled": { + "type": "boolean" + } + } + }, + "postgresqlExternal": { + "type": "object", + "properties": { + "database": { + "type": "string" + }, + "existingSecret": { + "type": "string" + }, + "host": { + "type": "string" + }, + "password": { + "type": "string" + }, + "port": { + "type": "integer" + }, + "user": { + "type": "string" + } + } + }, + "redis": { + "type": "object", + "properties": { + "auth": { + "type": "object", + "properties": { + "password": { + "type": "string" + } + } + }, + "commonConfiguration": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "master": { + "type": "object", + "properties": { + "extraFlags": { + "type": "array", + "items": { + "type": "string" + } + }, + "persistence": { + "type": "object", + "properties": { "enabled": { - "type": "boolean" - }, - "master": { - "type": "object", - "properties": { - "extraFlags": { - "type": "array", - "items": { - "type": "string" - } - }, - "persistence": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - } - } - } - } - }, - "replica": { - "type": "object", - "properties": { - "extraFlags": { - "type": "array", - "items": { - "type": "string" - } - }, - "persistence": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - } - } - } - } + "type": "boolean" } + } } + } }, - "redisExternal": { - "type": "object", - "properties": { - "host": { - "type": "string" - }, - "password": { - "type": "string" - }, - "port": { - "type": "integer" + "replica": { + "type": "object", + "properties": { + "extraFlags": { + "type": "array", + "items": { + "type": "string" + } + }, + "persistence": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" } + } } + } + } + } + }, + "redisExternal": { + "type": "object", + "properties": { + "host": { + "type": "string" }, - "statisticalEnrichment": { - "type": "object", - "properties": { - "image": { - "type": "object", - "properties": { - "repository": { - "type": "string" - }, - "tag": { - "type": "string" - } - } - }, - "livenessProbe": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - }, - "failureThreshold": { - "type": "integer" - }, - "initialDelaySeconds": { - "type": "integer" - }, - "path": { - "type": "string" - }, - "periodSeconds": { - "type": "integer" - }, - "successThreshold": { - "type": "integer" - }, - "timeoutSeconds": { - "type": "integer" - } - } - }, - "readinessProbe": { - "type": "object", - "properties": { - "enabled": { - "type": "boolean" - }, - "failureThreshold": { - "type": "integer" - }, - "initialDelaySeconds": { - "type": "integer" - }, - "path": { - "type": "string" - }, - "periodSeconds": { - "type": "integer" - }, - "successThreshold": { - "type": "integer" - }, - "timeoutSeconds": { - "type": "integer" - } - } - }, - "replicaCount": { - "type": "integer" - }, - "resources": { - "type": "object" - }, - "service": { - "type": "object", - "properties": { - "port": { - "type": "integer" - }, - "type": { - "type": "string" - } - } - } + "password": { + "type": "string" + }, + "port": { + "type": "integer" + } + } + }, + "statisticalEnrichment": { + "type": "object", + "properties": { + "image": { + "type": "object", + "properties": { + "repository": { + "type": "string" + }, + "tag": { + "type": "string" + } + } + }, + "livenessProbe": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "failureThreshold": { + "type": "integer" + }, + "initialDelaySeconds": { + "type": "integer" + }, + "path": { + "type": "string" + }, + "periodSeconds": { + "type": "integer" + }, + "successThreshold": { + "type": "integer" + }, + "timeoutSeconds": { + "type": "integer" + } + } + }, + "readinessProbe": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "failureThreshold": { + "type": "integer" + }, + "initialDelaySeconds": { + "type": "integer" + }, + "path": { + "type": "string" + }, + "periodSeconds": { + "type": "integer" + }, + "successThreshold": { + "type": "integer" + }, + "timeoutSeconds": { + "type": "integer" + } + } + }, + "replicaCount": { + "type": "integer" + }, + "resources": { + "type": "object" + }, + "service": { + "type": "object", + "properties": { + "port": { + "type": "integer" + }, + "type": { + "type": "string" } + } } + } } + } } diff --git a/helm/lifelike/values.yaml b/helm/lifelike/values.yaml index 915f62b3f6..cb6cfbb1fa 100644 --- a/helm/lifelike/values.yaml +++ b/helm/lifelike/values.yaml @@ -11,7 +11,8 @@ ingress: className: "" ## -- Ingress annotations - annotations: {} + annotations: + {} # cert-manager.io/cluster-issuer: letsencrypt-prod # nginx.ingress.kubernetes.io/proxy-body-size: "0" @@ -21,7 +22,6 @@ ingress: # hosts: # - lifelike.local - # ------------------------------------------------------------------------------ # API (appserver) # ------------------------------------------------------------------------------ @@ -136,7 +136,8 @@ frontend: targetMemoryUtilizationPercentage: 80 ## -- Optional resources requests and limits - resources: {} + resources: + {} # requests: # cpu: 500m # memory: 2000Mi @@ -180,7 +181,8 @@ statisticalEnrichment: replicaCount: 1 ## -- Optional resources requests and limits - resources: {} + resources: + {} # requests: # cpu: 500m # memory: 2000Mi @@ -207,12 +209,11 @@ statisticalEnrichment: periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 - + service: type: ClusterIP port: 5000 - # ------------------------------------------------------------------------------ # PDF Parser # ------------------------------------------------------------------------------ @@ -234,7 +235,8 @@ pdfparser: targetCPUUtilizationPercentage: 80 targetMemoryUtilizationPercentage: 80 - resources: {} + resources: + {} # requests: # cpu: 500m # memory: 2000Mi @@ -266,7 +268,6 @@ pdfparser: type: ClusterIP port: 7600 - # ------------------------------------------------------------------------------ # External services # ------------------------------------------------------------------------------ @@ -282,14 +283,13 @@ postgresqlExternal: ## -- Read password from an existing secret, must contain key "postgres-password" existingSecret: "" -## -- External Neo4j connection details, ignored if neo4j.enabled is set to true -neo4jExternal: - host: neo4j.local +## -- External ArangoDB connection details, ignored if arangodb.enabled is set to true +arangodbExternal: + host: arangodb.local port: 7687 - user: neo4j + user: arangodb password: password - database: neo4j - + database: arangodb ## -- External Redis connection details, ignored if redis.enabled is set to true redisExternal: @@ -297,7 +297,6 @@ redisExternal: port: 6379 password: "" - ## -- External Elasticsearch connection details, ignored if elasticsearch.enabled is set to true elasticsearchExternal: host: elasticsearch.local @@ -306,14 +305,12 @@ elasticsearchExternal: password: "" ssl: false - # ---------------------------------------------------------------------------- # # # # Subcharts # # # # ---------------------------------------------------------------------------- # - # ------------------------------------------------------------------------------ # PostgreSQL database # ------------------------------------------------------------------------------ @@ -328,19 +325,19 @@ postgresql: database: database # ------------------------------------------------------------------------------ -# Neo4j database +# ArangoDB database # ------------------------------------------------------------------------------ -## -- Embedded Neo4j subchart deployment -## -- See all available values: https://github.com/neo4j-contrib/neo4j-helm/blob/master/values.yaml -neo4j: - ## -- Set to false to disable automatic deployment of Neo4j +## -- Embedded ArangoDB subchart deployment +## -- See all available values: https://github.com/arangodb-contrib/arangodb-helm/blob/master/values.yaml +arangodb: + ## -- Set to false to disable automatic deployment of ArangoDB enabled: true imageTag: 4.4.3-community - ## -- Neo4j password - neo4jPassword: password + ## -- ArangoDB password + arangodbPassword: password core: ## -- Whether to deploy a standalone server or a replicated cluster @@ -351,7 +348,6 @@ neo4j: ## -- Size of the persistent volume for each server size: 100Gi - # ------------------------------------------------------------------------------ # Elasticsearch # ------------------------------------------------------------------------------ @@ -379,7 +375,6 @@ elasticsearch: requests: storage: 30Gi - # ------------------------------------------------------------------------------ # Redis (cache) # ------------------------------------------------------------------------------ diff --git a/keycloak b/keycloak deleted file mode 160000 index 738310c786..0000000000 --- a/keycloak +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 738310c786fafb5b1f2f15c3c313e8cb535fe287 diff --git a/lifelike-oss b/lifelike-oss deleted file mode 160000 index 034c727d7c..0000000000 --- a/lifelike-oss +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 034c727d7c5cafbfdcc022fb0ffdadc727b2fdca diff --git a/logstash/.dockerignore b/logstash/.dockerignore new file mode 100644 index 0000000000..1d1fe94df4 --- /dev/null +++ b/logstash/.dockerignore @@ -0,0 +1 @@ +Dockerfile \ No newline at end of file diff --git a/makefile b/makefile index d0adea1d44..a6d04e8ddf 100644 --- a/makefile +++ b/makefile @@ -1,19 +1,6 @@ APPSERVER_PATH=./appserver -ANSIBLE_PATH=./deployment/ansible LMDB_PATH = $(APPSERVER_PATH)/neo4japp/services/annotations/lmdb -# Fetches the password to unlock Ansible vault files -ansible-secrets: - az storage blob download --account-name lifelike --container-name lifelike-secrets --name .vault_secrets_pw --file $(ANSIBLE_PATH)/.vault_secrets_pw --auth-mode login - -# Fetches the credentials (env file) for Azure services -azure-secrets: - az storage blob download --account-name lifelike --container-name lifelike-secrets --name azure-secrets.env --file ./azure-secrets.env --auth-mode login - -# Log into azure container registry -container-login: - az acr login --name lifelike - # Fetches the LMDB files needed to run the application lmdb: docker compose up -d appserver @@ -22,7 +9,7 @@ lmdb: # Sets up everything you need to run the application # Mostly used for first time dev environment setup -init: ansible-secrets azure-secrets container-login githooks docker-build lmdb +init: githooks docker-build lmdb # Sets up commit hooks for linting githooks: @@ -32,11 +19,11 @@ docker-build: docker compose build # Runs enough containers for the application to function -docker-run: azure-secrets container-login lmdb +docker-run: lmdb docker compose up -d # Runs additional containers such as Kibana/Logstash/Filebeat -docker-run-all: azure-secrets container-login lmdb +docker-run-all: lmdb docker compose -f docker-compose.yml -f docker-compose.override.yml -f docker-compose.middleware.yml up -d docker-stop: diff --git a/statistical-enrichment/Dockerfile b/statistical-enrichment/Dockerfile index a9e34baaf2..0bbc5c830d 100644 --- a/statistical-enrichment/Dockerfile +++ b/statistical-enrichment/Dockerfile @@ -1,7 +1,7 @@ # ======================================== # Base image # ======================================== -FROM python:3.10-slim as base +FROM python:3.8-slim as base ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 @@ -45,11 +45,11 @@ USER app # Copy application code COPY --chown=app . . -# Neo4j configuration -ENV NEO4J_HOST=neo4j -ENV NEO4J_PORT=7687 -ENV NEO4J_AUTH=neo4j/password -ENV NEO4J_SCHEME=bolt +# ArangoDB configuration +ENV ARANGODB_HOST='' +ENV ARANGODB_PORT='' +ENV ARANGODB_AUTH='' +ENV ARANGODB_SCHEME='' # Redis cache configuration ENV REDIS_HOST=redis diff --git a/statistical-enrichment/Pipfile.lock b/statistical-enrichment/Pipfile.lock index ec732d59cd..ea8f8149ae 100644 --- a/statistical-enrichment/Pipfile.lock +++ b/statistical-enrichment/Pipfile.lock @@ -364,9 +364,7 @@ "version": "==2.1.0" }, "elastic-apm": { - "extras": [ - "flask" - ], + "extras": ["flask"], "hashes": [ "sha256:5d5cbe0c04d9c00303422f1f4e5bb5b4f0a5cc4dddd53f399db9a16fa8289367", "sha256:c78937e44ece380e6fc98b7292cdf28eaf68917d723cef585efaa083168b3b84" @@ -1884,9 +1882,7 @@ "version": "==0.770" }, "mypy-extensions": { - "hashes": [ - "sha256:c8b707883a96efe9b4bb3aaf0dcc07e7e217d7d8368eec4db4049ee9e142f4fd" - ], + "hashes": ["sha256:c8b707883a96efe9b4bb3aaf0dcc07e7e217d7d8368eec4db4049ee9e142f4fd"], "markers": "python_version >= '2.7'", "version": "==0.4.4" }, diff --git a/tests/cypress/browserstack.json b/tests/cypress/browserstack.json index 63cfda5015..ecfddafeaa 100644 --- a/tests/cypress/browserstack.json +++ b/tests/cypress/browserstack.json @@ -7,23 +7,17 @@ { "os": "OS X Big Sur", "browser": "chrome", - "versions": [ - "latest" - ] + "versions": ["latest"] }, { "os": "Windows 10", "browser": "chrome", - "versions": [ - "latest" - ] + "versions": ["latest"] }, { "os": "OS X Big Sur", "browser": "firefox", - "versions": [ - "latest" - ] + "versions": ["latest"] } ], "run_settings": { @@ -36,8 +30,6 @@ "project_name": "Lifelike App", "build_name": "DEMO environment", "parallels": "1", - "system_env_vars": [ - "PERCY_TOKEN" - ] + "system_env_vars": ["PERCY_TOKEN"] } } diff --git a/tests/cypress/cypress/integration/projects.spec.js b/tests/cypress/cypress/integration/projects.spec.js index de0251e03c..1bc8490f37 100644 --- a/tests/cypress/cypress/integration/projects.spec.js +++ b/tests/cypress/cypress/integration/projects.spec.js @@ -1,43 +1,43 @@ /// -const projectName = Cypress.env("existing_project_name") || "CAG-Center"; +const projectName = Cypress.env('existing_project_name') || 'CAG-Center'; -describe("Projects", () => { +describe('Projects', () => { beforeEach(() => { - cy.loginByPassword(Cypress.env("auth_email"), Cypress.env("auth_password")); + cy.loginByPassword(Cypress.env('auth_email'), Cypress.env('auth_password')); }); - it("shows project list", () => { - cy.clickMenuItem("Workbench"); - cy.getActiveTab().contains("h1", "File Browser"); + it('shows project list', () => { + cy.clickMenuItem('Workbench'); + cy.getActiveTab().contains('h1', 'File Browser'); // Should have a list of projects - cy.get("app-browser-project-list .tile a").should("have.length.above", 10); + cy.get('app-browser-project-list .tile a').should('have.length.above', 10); cy.screenshot(); cy.percySnapshot(); }); - it("shows project files", () => { + it('shows project files', () => { cy.visit(`/projects/${projectName}`); // Should have a list of files - cy.get("app-object-list tr").should("have.length.above", 10); + cy.get('app-object-list tr').should('have.length.above', 10); cy.screenshot(); cy.percySnapshot(); }); - it("shows project entity cloud", () => { + it('shows project entity cloud', () => { cy.visit(`/projects/${projectName}`); // Should have a list of files - cy.get("[ngbtooltip='Entity Cloud'").should("be.visible").click(); - cy.contains("button", "For entire project").click(); + cy.get("[ngbtooltip='Entity Cloud'").should('be.visible').click(); + cy.contains('button', 'For entire project').click(); // There should be more that 10 entities in the word cluud - cy.get("app-word-cloud svg text", { timeout: 60000 }) - .should("have.length.above", 10) + cy.get('app-word-cloud svg text', { timeout: 60000 }) + .should('have.length.above', 10) .wait(2000); cy.screenshot(); diff --git a/tests/cypress/cypress/integration/tabs.spec.js b/tests/cypress/cypress/integration/tabs.spec.js index 1c542c70b5..64fcff3d4c 100644 --- a/tests/cypress/cypress/integration/tabs.spec.js +++ b/tests/cypress/cypress/integration/tabs.spec.js @@ -1,25 +1,25 @@ -describe("Navigation bugs", function () { +describe('Navigation bugs', function () { beforeEach(() => { - cy.loginByPassword(Cypress.env("auth_email"), Cypress.env("auth_password")); + cy.loginByPassword(Cypress.env('auth_email'), Cypress.env('auth_password')); }); - it("Displays tabs content after returning to workbench", function () { - const fbLabel = "File Browser"; - const kgLabel = "Knowledge Graph Statistics"; + it('Displays tabs content after returning to workbench', function () { + const fbLabel = 'File Browser'; + const kgLabel = 'Knowledge Graph Statistics'; cy.log('Click on "File Browser" tab 5 times'); Cypress._.times(5, () => cy.clickMenuItem(fbLabel)); - cy.getActiveTab().contains("h1", fbLabel); + cy.getActiveTab().contains('h1', fbLabel); cy.screenshot(); cy.log(`Navigating away to ${kgLabel} section`); cy.clickMenuItem(kgLabel); - cy.contains(".module-title", kgLabel).should("be.visible"); + cy.contains('.module-title', kgLabel).should('be.visible'); cy.screenshot(); - cy.log("Navigate back to the Workbench tab"); - cy.clickMenuItem("Workbench"); - cy.getActiveTab().contains("h1", fbLabel); + cy.log('Navigate back to the Workbench tab'); + cy.clickMenuItem('Workbench'); + cy.getActiveTab().contains('h1', fbLabel); cy.screenshot(); }); }); diff --git a/tests/cypress/cypress/support/commands.js b/tests/cypress/cypress/support/commands.js index 1a7ee33c9e..06d79adfb2 100644 --- a/tests/cypress/cypress/support/commands.js +++ b/tests/cypress/cypress/support/commands.js @@ -11,31 +11,29 @@ // // -- This is a parent command -- // Cypress.Commands.add('login', (email, password) => { ... }) -Cypress.Commands.add("loginByPassword", (email, password) => { +Cypress.Commands.add('loginByPassword', (email, password) => { cy.log(`Logging in as ${email}`); - cy.request("POST", Cypress.env("auth_login_url"), { email, password }).then( - ({ body }) => { - cy.debug(`Login response: ${JSON.stringify(body)}`); - const { accessToken, refreshToken, user } = body; - const auth = { loggedIn: true, targetUrl: "/", user }; - window.localStorage.setItem("auth", JSON.stringify(auth)); - window.localStorage.setItem("authId", user.id); - window.localStorage.setItem("expires_at", accessToken.exp); - window.localStorage.setItem("access_jwt", accessToken.token); - window.localStorage.setItem("refresh_jwt", refreshToken.token); - } - ); + cy.request('POST', Cypress.env('auth_login_url'), { email, password }).then(({ body }) => { + cy.debug(`Login response: ${JSON.stringify(body)}`); + const { accessToken, refreshToken, user } = body; + const auth = { loggedIn: true, targetUrl: '/', user }; + window.localStorage.setItem('auth', JSON.stringify(auth)); + window.localStorage.setItem('authId', user.id); + window.localStorage.setItem('expires_at', accessToken.exp); + window.localStorage.setItem('access_jwt', accessToken.token); + window.localStorage.setItem('refresh_jwt', refreshToken.token); + }); // Navigate to workbench - return cy.visit("/workspaces/local"); + return cy.visit('/workspaces/local'); }); -Cypress.Commands.add("clickMenuItem", (item) => { +Cypress.Commands.add('clickMenuItem', (item) => { return cy.get(`[ngbtooltip="${item}`).click(); }); -Cypress.Commands.add("getActiveTab", () => { - return cy.get('[ng-reflect-active="true"]').should("be.visible"); +Cypress.Commands.add('getActiveTab', () => { + return cy.get('[ng-reflect-active="true"]').should('be.visible'); }); // // diff --git a/tests/cypress/cypress/support/index.js b/tests/cypress/cypress/support/index.js index 60136d2333..0c15558213 100644 --- a/tests/cypress/cypress/support/index.js +++ b/tests/cypress/cypress/support/index.js @@ -13,10 +13,10 @@ // https://on.cypress.io/configuration // *********************************************************** -import "@percy/cypress"; +import '@percy/cypress'; // Import commands.js using ES2015 syntax: -import "./commands"; +import './commands'; // Alternatively you can use CommonJS syntax: // require('./commands') diff --git a/website b/website deleted file mode 160000 index 81ad65ebf3..0000000000 --- a/website +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 81ad65ebf32776cbcfff37f40fc10c540fc7074f