Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add backup routine for SPAR postgres DB #1556

Merged
merged 21 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions .github/workflows/.deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,24 +114,26 @@ jobs:
timeout-minutes: 10
strategy:
matrix:
name: [backend, frontend, oracle-api]
name: [backend, backup, frontend, oracle-api]
include:
- name: backend
file: backend/openshift.deploy.yml
overwrite: true
parameters:
-p AWS_COGNITO_ISSUER_URI=https://cognito-idp.ca-central-1.amazonaws.com/${{ vars.VITE_USER_POOLS_ID }}
verification_path: "health"
- name: backup
file: common/openshift.backup.yml
parameters:
-p PG_DB_IMAGE=postgis/postgis:15-master
post_rollout: oc create job --from=cronjob/${{ github.event.repository.name }}-${{ inputs.target }}-database-backup ${{ github.event.repository.name }}-${{ inputs.target }}-database-backup-$(date +%Y%m%d%H%M%S)
- name: frontend
file: frontend/openshift.deploy.yml
overwrite: true
parameters:
-p FAM_MODDED_ZONE=${{ needs.init.outputs.fam-modded-zone }}
-p VITE_SPAR_BUILD_VERSION=snapshot-${{ inputs.target || github.event.number }}
-p VITE_USER_POOLS_ID=${{ vars.VITE_USER_POOLS_ID }}
- name: oracle-api
file: oracle-api/openshift.deploy.yml
overwrite: true
parameters:
-p AWS_COGNITO_ISSUER_URI=https://cognito-idp.ca-central-1.amazonaws.com/${{ vars.VITE_USER_POOLS_ID }}
${{ github.event_name == 'pull_request' && '-p CPU_LIMIT=100m' || '' }}
Expand All @@ -147,13 +149,14 @@ jobs:
oc_namespace: ${{ vars.OC_NAMESPACE }}
oc_server: ${{ vars.OC_SERVER }}
oc_token: ${{ secrets.OC_TOKEN }}
overwrite: ${{ matrix.overwrite }}
overwrite: true
parameters:
-p TAG=${{ inputs.tag }}
-p ZONE=${{ inputs.target }}
${{ github.event_name == 'pull_request' && '-p MIN_REPLICAS=1' || '' }}
${{ github.event_name == 'pull_request' && '-p MAX_REPLICAS=1' || '' }}
${{ matrix.parameters }}
post_rollout: ${{ matrix.post_rollout || '' }}
verification_path: ${{ matrix.verification_path }}
verification_retry_attempts: 5
verification_retry_seconds: 20
Expand Down
239 changes: 239 additions & 0 deletions common/openshift.backup.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
apiVersion: template.openshift.io/v1
kind: Template
labels:
app: ${NAME}-${ZONE}
app.kubernetes.io/part-of: ${NAME}-${ZONE}
parameters:
- name: NAME
description: Product name
value: nr-spar
- name: COMPONENT
description: Component name
value: database-backup
- name: ZONE
description: Deployment zone, e.g. pr-### or prod
required: true
- name: RESTORE_DIR
description: Directory to be used for restoring the backup
value: /tmp/restore
- name: REGISTRY
description: Container registry to import from (internal is image-registry.openshift-image-registry.svc:5000)
value: ghcr.io
- name: BACKUP_DIR
description: "The name of the root backup directory"
required: true
value: /tmp/backup
- name: NUM_BACKUPS
description: The number of backup files to be retained
required: false
value: "5"
- name: "JOB_SERVICE_ACCOUNT"
description: "Name of the Service Account To Exeucte the Job As."
value: "default"
required: true
- name: "SUCCESS_JOBS_HISTORY_LIMIT"
description: "The number of successful jobs that will be retained"
value: "5"
required: true
- name: "FAILED_JOBS_HISTORY_LIMIT"
description: "The number of failed jobs that will be retained"
value: "2"
required: true
- name: "JOB_BACKOFF_LIMIT"
description: "The number of attempts to try for a successful job outcome"
value: "0"
- name: PVC_SIZE
description: Volume space available for data, e.g. 512Mi, 2Gi.
value: 256Mi
- name: PG_DB_IMAGE
description: PostgreSQL Image (namespace/name:tag) to be used for backup
required: true
- name: TAG
description: Dummy param, for convenience
- name: MIN_REPLICAS
description: Dummy param, for convenience
- name: MAX_REPLICAS
description: Dummy param, for convenience
objects:
- kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: ${NAME}-${ZONE}-${COMPONENT}
labels:
app: ${NAME}-${ZONE}
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: "${PVC_SIZE}"
storageClassName: netapp-file-standard
- kind: CronJob
apiVersion: "batch/v1"
metadata:
name: ${NAME}-${ZONE}-${COMPONENT}
labels:
app: ${NAME}-${ZONE}
cronjob: ${NAME}-${ZONE}
spec:
schedule: "0 0 * * *"
concurrencyPolicy: "Replace"
successfulJobsHistoryLimit: ${{SUCCESS_JOBS_HISTORY_LIMIT}}
failedJobsHistoryLimit: ${{FAILED_JOBS_HISTORY_LIMIT}}
jobTemplate:
metadata:
labels:
app: ${NAME}-${ZONE}
cronjob: ${NAME}-${ZONE}
spec:
backoffLimit: ${{JOB_BACKOFF_LIMIT}}
template:
metadata:
labels:
app: ${NAME}-${ZONE}
cronjob: ${NAME}-${ZONE}
spec:
containers:
- name: ${NAME}-${ZONE}-${COMPONENT}
image: ${REGISTRY}/${PG_DB_IMAGE}
command: ["/bin/sh", "-c"]
args:
- |
pg_dump \
-U ${POSTGRESQL_USER} \
-h ${NAME}-${ZONE}-database \
-d ${POSTGRESQL_DATABASE} \
--data-only \
--schema=nr-spar \
--inserts \
--no-comments \
--on-conflict-do-nothing \
--no-sync \
--exclude-table=nr-spar.cone_collection_method_list \
--exclude-table=nr-spar.gametic_methodology_list \
--exclude-table=nr-spar.genetic_class_list \
--exclude-table=nr-spar.genetic_worth_list \
--exclude-table=nr-spar.method_of_payment_list \
--exclude-table=nr-spar.seedlot_source_list \
--exclude-table=nr-spar.seedlot_status_list \
--exclude-table=nr-spar.etl_execution_log_hist \
--exclude-table=nr-spar.etl_execution_map \
--exclude-table=nr-spar.etl_execution_schedule \
--file=${BACKUP_DIR}/backup_$(date +%Y-%m-%d).sql \
&&
find "${BACKUP_DIR}" -type f -mtime +$NUM_BACKUPS -exec rm -f {} \; &&
cp -r ${BACKUP_DIR}/backup_$(date +%Y-%m-%d).sql ${RESTORE_DIR}/W0__restore.sql
volumeMounts:
- mountPath: "${BACKUP_DIR}"
name: ${NAME}-${ZONE}-${COMPONENT}
- mountPath: "${RESTORE_DIR}"
name: ${NAME}-${ZONE}-${COMPONENT}
env:
- name: RESTORE_DIR
value: "${RESTORE_DIR}"
- name: BACKUP_DIR
value: "${BACKUP_DIR}"
- name: NUM_BACKUPS
value: "${NUM_BACKUPS}"
- name: POSTGRESQL_DATABASE
valueFrom:
secretKeyRef:
name: ${NAME}-${ZONE}-database
key: database-name
- name: POSTGRESQL_USER
valueFrom:
secretKeyRef:
name: ${NAME}-${ZONE}-database
key: database-user
- name: POSTGRESQL_PASSWORD
valueFrom:
secretKeyRef:
name: ${NAME}-${ZONE}-database
key: database-password
- name: PGPASSWORD
valueFrom:
secretKeyRef:
name: ${NAME}-${ZONE}-database
key: database-password
volumes:
- name: ${NAME}-${ZONE}-${COMPONENT}
persistentVolumeClaim:
claimName: ${NAME}-${ZONE}-${COMPONENT}
restartPolicy: "Never"
terminationGracePeriodSeconds: 30
activeDeadlineSeconds: 1600
dnsPolicy: "ClusterFirst"
serviceAccountName: "${JOB_SERVICE_ACCOUNT}"
serviceAccount: "${JOB_SERVICE_ACCOUNT}"
- kind: CronJob
apiVersion: "batch/v1"
metadata:
name: ${NAME}-${ZONE}-${COMPONENT}-restore
labels:
app: ${NAME}-${ZONE}
cronjob: ${NAME}-${ZONE}
spec:
schedule: "0 0 31 2 *"
concurrencyPolicy: "Replace"
successfulJobsHistoryLimit: ${{SUCCESS_JOBS_HISTORY_LIMIT}}
failedJobsHistoryLimit: ${{FAILED_JOBS_HISTORY_LIMIT}}
jobTemplate:
metadata:
labels:
app: ${NAME}-${ZONE}
cronjob: ${NAME}-${ZONE}
spec:
backoffLimit: ${{JOB_BACKOFF_LIMIT}}
template:
metadata:
labels:
app: ${NAME}-${ZONE}
cronjob: ${NAME}-${ZONE}
spec:
containers:
- name: ${NAME}-${ZONE}-${COMPONENT}-restore
image: ${REGISTRY}/${PG_DB_IMAGE}
command: ["/bin/sh", "-c"]
args:
- |
find ${RESTORE_DIR} -type f -name "*.sql" -print0 | sort -zV |
while IFS= read -r -d '' sql_file; do
echo "Running SQL file: $sql_file"
psql -h ${TARGET_HOST} -U ${POSTGRESQL_USER} -d ${POSTGRESQL_DATABASE} -f $sql_file
done
volumeMounts:
- mountPath: "${RESTORE_DIR}"
name: ${NAME}-${ZONE}-${COMPONENT}
env:
- name: RESTORE_DIR
value: "${RESTORE_DIR}"
- name: POSTGRESQL_DATABASE
valueFrom:
secretKeyRef:
name: ${NAME}-${ZONE}-database
key: database-name
- name: POSTGRESQL_USER
valueFrom:
secretKeyRef:
name: ${NAME}-${ZONE}-database
key: database-user
- name: POSTGRESQL_PASSWORD
valueFrom:
secretKeyRef:
name: ${NAME}-${ZONE}-database
key: database-password
- name: PGPASSWORD
valueFrom:
secretKeyRef:
name: ${NAME}-${ZONE}-database
key: database-password
volumes:
- name: ${NAME}-${ZONE}-${COMPONENT}
persistentVolumeClaim:
claimName: ${NAME}-${ZONE}-${COMPONENT}
restartPolicy: "Never"
terminationGracePeriodSeconds: 30
activeDeadlineSeconds: 1600
dnsPolicy: "ClusterFirst"
serviceAccountName: "${JOB_SERVICE_ACCOUNT}"
serviceAccount: "${JOB_SERVICE_ACCOUNT}"
Loading