Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New pg_upgrade script #18

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 5 additions & 8 deletions tasks/postgres-pgupgrades.yml
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
---
- set_fact:
postgres_dbname: "{{ postgres_pgupgrade.dbname }}"
postgres_pgbouncer_uri: "{{ postgres_pgupgrade.pgbouncer_uri|default(None) }}"
postgres_pgbouncer_databases: "{{ postgres_pgupgrade.databases|default([]) }}"
postgres_cluster_name: "{{ postgres_pgupgrade.cluster_name }}"
postgres_port: "{{ postgres_pgupgrade.port }}"
postgres_old_cluster_version: "{{ postgres_pgupgrade.old_cluster_version }}"
postgres_old_cluster_name: "{{ postgres_pgupgrade.old_cluster_name }}"
postgres_new_cluster_version: "{{ postgres_pgupgrade.new_cluster_version }}"
postgres_new_cluster_name: "{{ postgres_pgupgrade.new_cluster_name }}"
postgres_standby_server: "{{ postgres_pgupgrade.standby_server }}"
postgres_standby_old_cluster_name: "{{ postgres_pgupgrade.standby_old_cluster_name|default(postgres_pgupgrade.old_cluster_name) }}"
postgres_standby_new_cluster_name: "{{ postgres_pgupgrade.standby_new_cluster_name|default(postgres_pgupgrade.new_cluster_name) }}"

- name: Find matching new cluster
set_fact:
postgres_new_cluster: "{{ postgres_clusters|selectattr('name','equalto',postgres_new_cluster_name)|selectattr('version','equalto', postgres_new_cluster_version|float)|list|first }}"
postgres_new_cluster: "{{ postgres_clusters|selectattr('name','equalto',postgres_cluster_name)|selectattr('version','equalto', postgres_new_cluster_version|float)|list|first }}"

- name: Extract database port of new matching cluster
set_fact:
postgres_new_cluster_port: "{{ (postgres_new_cluster|default({'port': 5432}))['port'] }}"

- name: Upload pg_upgrade.sh script
template: src=pg_upgrade.sh.j2 dest=/var/lib/postgresql/pg_upgrade_{{ postgres_old_cluster_version }}_{{ postgres_new_cluster_version }}_{{ postgres_dbname }} owner=postgres group=postgres mode=0700
template: src=pg_upgrade.sh.j2 dest=/var/lib/postgresql/pg_upgrade_{{ postgres_old_cluster_version }}_{{ postgres_new_cluster_version }}_{{ postgres_cluster_name }} owner=postgres group=postgres mode=0700
no_log: True
231 changes: 80 additions & 151 deletions templates/pg_upgrade.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -7,169 +7,98 @@
# - Paul Bonaud
# Description:
# This helper tries to strictly follow the official documentation of PostgreSQL
# from the pg_upgrade page: https://www.postgresql.org/docs/9.5/static/pgupgrade.html
# from the pg_upgrade page: https://www.postgresql.org/docs/current/static/pgupgrade.html
# Usage:
# WARNING: Please read both the documentation and this script if you want to run it
# 1. edit this file and remove the "DRY_RUN=1" line
# 2. execute this script as the postgres admin user
# 1. run this script on the postgresql server with a user able to use sudo as root and postgres users
# 2. edit this file and remove the "exit 1" line to really upgrade the cluster

{% if postgres_pgbouncer_uri is defined and postgres_pgbouncer_uri %}
PGBOUNCER_PG_URI={{ postgres_pgbouncer_uri }}
{% endif %}
DATABASE_NAME={{ postgres_dbname }}
DATABASE_PORT={{ postgres_new_cluster_port }}
# Output every line executed and stop on the first error
set -xe
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be problematic to fail on pipes too?

Suggested change
set -xe
set -xeo pipefail

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, this options were passed from the bash command (in the first line of the file)
Could you change the command from #!/bin/bash -ex to #!/usr/bin/env bash please? (for better cross-platform compatibility)


# Enforce English messages
export LC_ALL=C

OLD_CLUSTER_VERSION={{ postgres_old_cluster_version }}
OLD_CLUSTER_NAME={{ postgres_old_cluster_name }}
OLD_CLUSTER_DATADIR=/var/lib/postgresql/$OLD_CLUSTER_VERSION/$OLD_CLUSTER_NAME
NEW_CLUSTER_VERSION={{ postgres_new_cluster_version }}
NEW_CLUSTER_NAME={{ postgres_new_cluster_name }}
NEW_CLUSTER_DATADIR=/var/lib/postgresql/$NEW_CLUSTER_VERSION/$NEW_CLUSTER_NAME
CLUSTER_PORT={{ postgres_port }}
CLUSTER_NAME={{ postgres_cluster_name }}

STANDBY_SERVER={{ postgres_standby_server }}
STANDBY_OLD_CLUSTER_NAME={{ postgres_standby_old_cluster_name }}
STANDBY_OLD_CLUSTER_DATADIR=/var/lib/postgresql/$OLD_CLUSTER_VERSION/$STANDBY_OLD_CLUSTER_NAME
STANDBY_NEW_CLUSTER_NAME={{ postgres_standby_new_cluster_name }}
STANDBY_NEW_CLUSTER_DATADIR=/var/lib/postgresql/$NEW_CLUSTER_VERSION/$STANDBY_NEW_CLUSTER_NAME
CLUSTER_OLD_VERSION={{ postgres_old_cluster_version }}
CLUSTER_NEW_VERSION={{ postgres_new_cluster_version }}

DRY_RUN=1
{% if postgres_pgbouncer_uri is defined and postgres_pgbouncer_uri %}
PGBOUNCER_URI={{ postgres_pgbouncer_uri }}
PGBOUNCER_DATABASES="{{ postgres_pgbouncer_databases | join(', ') }}"
{% endif %}

if [ "$(whoami)" != "postgres" ]; then
echo "ERROR: you must run this script as the 'postgres' admin local user"
exit 1
fi
# Test your current host has access to all servers in SSH
sudo whoami && echo $(hostname)" OK"

{% if postgres_pgbouncer_uri is defined and postgres_pgbouncer_uri %}
# Test the configuration is correcly deployed on pgbouncer
psql $PGBOUNCER_PG_URI --command 'show databases;' | grep "^ $DATABASE_NAME "
# Test connection to pgbouncer is OK
psql $PGBOUNCER_PG_URI --command 'show pools;'
# Test connection to pgbouncer is OK and see PGBOUNCER_DATABASE
POOLS=$(psql -P pager "$PGBOUNCER_URI" --command 'SHOW POOLS;')

for PGBOUNCER_DATABASE in $PGBOUNCER_DATABASES; do
if [[ ! $POOLS =~ "$PGBOUNCER_DATABASE" ]]
then
echo "$PGBOUNCER_DATABASE not found!"
exit 1
fi
done
{% endif %}

if [ ! -f /usr/lib/postgresql/$NEW_CLUSTER_VERSION/bin/pg_upgrade ]; then
echo "ERROR: /usr/lib/postgresql/$NEW_CLUSTER_VERSION/bin/pg_upgrade is not available on this machine."
exit 1
fi
# We need ssh access to the standby server
ssh $STANDBY_SERVER "ls -A $STANDBY_OLD_CLUSTER_DATADIR 2>&1>/dev/null"
ssh $STANDBY_SERVER "ls -A $STANDBY_NEW_CLUSTER_DATADIR 2>&1>/dev/null"
if [ ! -d $OLD_CLUSTER_DATADIR ] || [ ! -d $NEW_CLUSTER_DATADIR ]; then
echo "ERROR: Make sure both old cluster and new cluster data dirs exists"
echo "ERROR: on both primary and standby servers ($STANDBY_SERVER)."
echo "ERROR: old_cluster datadir: $OLD_CLUSTER_DATADIR"
echo "ERROR: new_cluster datadir: $NEW_CLUSTER_DATADIR"
#exit 1
fi

echo "WARNING: You are about to:"
# Test old cluster exists and version matches
sudo pg_ctlcluster $CLUSTER_OLD_VERSION $CLUSTER_NAME status
sudo -u postgres /bin/sh -c 'psql --tuples-only --no-align --port=$CLUSTER_PORT --command=\"SHOW SERVER_VERSION;\"'" | grep "^$CLUSTER_OLD_VERSION\.

# Test new version is installed
sudo /usr/lib/postgresql/$CLUSTER_NEW_VERSION/bin/pg_upgrade --version
# Test new cluster exists and is stopped
sudo pg_ctlcluster $CLUSTER_NEW_VERSION $CLUSTER_NAME status | grep 'no server running'
# Test the port of the new cluster is the same as the old one
grep 'port =' /etc/postgresql/$CLUSTER_NEW_VERSION/$CLUSTER_NAME/postgresql.conf | grep $CLUSTER_PORT

PG_UPGRADE_COMMAND='cd /var/lib/postgresql && sudo -u postgres /usr/lib/postgresql/'$CLUSTER_NEW_VERSION'/bin/pg_upgrade --link --jobs=8 \
-b /usr/lib/postgresql/'$CLUSTER_OLD_VERSION'/bin \
-B /usr/lib/postgresql/'$CLUSTER_NEW_VERSION'/bin \
-d /var/lib/postgresql/'$CLUSTER_OLD_VERSION'/'$CLUSTER_NAME' \
-D /var/lib/postgresql/'$CLUSTER_NEW_VERSION'/'$CLUSTER_NAME' \
-o " -c config_file=/etc/postgresql/'$CLUSTER_OLD_VERSION'/'$CLUSTER_NAME'/postgresql.conf" \
-O " -c config_file=/etc/postgresql/'$CLUSTER_NEW_VERSION'/'$CLUSTER_NAME'/postgresql.conf"'

# Test to upgrade the data
$PG_UPGRADE_COMMAND --check

# Force CHECKPOINT so the CHECKPOINT on shutdown will be faster
sudo -u postgres /bin/sh -c 'psql --port=$CLUSTER_PORT --command=\"CHECKPOINT;\"'

# Remove to perform the full upgrade
exit 1;

{% if postgres_pgbouncer_uri is defined and postgres_pgbouncer_uri %}
echo "WARNING: * PAUSE connections to $DATABASE_NAME database."
for PGBOUNCER_DATABASE in $PGBOUNCER_DATABASES; do
# Pause the databases;
psql "$PGBOUNCER_URI" --command "PAUSE $PGBOUNCER_DATABASE;"
done
{% endif %}
echo "WARNING: * STOP the primary database in $OLD_CLUSTER_VERSION version (and standbys)"
echo "WARNING: * dry-run the pg_upgrade from $OLD_CLUSTER_VERSION to $NEW_CLUSTER_VERSION"
echo "WARNING: * run the pg_upgrade in place with hardlinks"
echo "WARNING: * START the primary database in $NEW_CLUSTER_VERSION version (and standbys)"

# Shut down the cluster. BEWARE: ensure no other databases are used in this cluster.
sudo pg_ctlcluster $CLUSTER_OLD_VERSION $CLUSTER_NAME stop -m fast

# Really upgrade the data
$PG_UPGRADE_COMMAND

# Start the cluster
sudo pg_ctlcluster $CLUSTER_NEW_VERSION $CLUSTER_NAME start

# Test new cluster is accepting connections
sudo -u postgres /bin/sh -c 'psql --port=$CLUSTER_PORT --command=\"SELECT NOW();\"'

{% if postgres_pgbouncer_uri is defined and postgres_pgbouncer_uri %}
echo "WARNING: * RESUME connections to $DATABASE_NAME."
for PGBOUNCER_DATABASE in $PGBOUNCER_DATABASES; do
# Resume the connexions
psql "$PGBOUNCER_URI" --command "RESUME $PGBOUNCER_DATABASE;"
done
{% endif %}
echo "================"
read -p "Are you sure? (y/n)" -n 1 -r
echo "================"
if [[ -z $DRY_RUN ]] && [[ $REPLY =~ ^[Yy]$ ]]; then
{% if postgres_pgbouncer_uri is defined and postgres_pgbouncer_uri %}
# Pause the databases;
psql $PGBOUNCER_PG_URI --command "PAUSE $DATABASE_NAME;"
{% endif %}

# Stop the $OLD_CLUSTER_VERSION server
pg_ctlcluster $OLD_CLUSTER_VERSION $OLD_CLUSTER_NAME stop -m fast

# Get latest checkpoint location of primary
echo "INFO: Primary PG controldata with latest checkpoint location"
/usr/lib/postgresql/$OLD_CLUSTER_VERSION/bin/pg_controldata $OLD_CLUSTER_DATADIR | head -n8

# Wait for standbys to catch up latest checkpoint
# TODO: how long to wait?
echo "Waiting for 9 seconds..."
sleep 9
echo "INFO: Standby PG controldata with latest checkpoint location (after shutdown)"
ssh $STANDBY_SERVER pg_ctlcluster $OLD_CLUSTER_VERSION $STANDBY_OLD_CLUSTER_NAME stop -m fast
ssh $STANDBY_SERVER /usr/lib/postgresql/$OLD_CLUSTER_VERSION/bin/pg_controldata $STANDBY_OLD_CLUSTER_DATADIR | head -n8

echo "WARNING: 'Latest Checkpoint location' values should match on primary and on standby"
echo "================"
read -p "Do they match? PLEASE ANSWER (y/n)" -n 1 -r
echo "================"

if [[ $REPLY =~ ^[Yy]$ ]]; then
# Test to upgrade the data
time /usr/lib/postgresql/$NEW_CLUSTER_VERSION/bin/pg_upgrade --check --link \
-b /usr/lib/postgresql/$OLD_CLUSTER_VERSION/bin \
-B /usr/lib/postgresql/$NEW_CLUSTER_VERSION/bin \
-d $OLD_CLUSTER_DATADIR \
-D $NEW_CLUSTER_DATADIR \
-o " -c config_file=/etc/postgresql/$OLD_CLUSTER_VERSION/$OLD_CLUSTER_NAME/postgresql.conf" \
-O " -c config_file=/etc/postgresql/$NEW_CLUSTER_VERSION/$NEW_CLUSTER_NAME/postgresql.conf"

# Really upgrade the data
time /usr/lib/postgresql/$NEW_CLUSTER_VERSION/bin/pg_upgrade --link \
-b /usr/lib/postgresql/$OLD_CLUSTER_VERSION/bin \
-B /usr/lib/postgresql/$NEW_CLUSTER_VERSION/bin \
-d $OLD_CLUSTER_DATADIR \
-D $NEW_CLUSTER_DATADIR \
-o " -c config_file=/etc/postgresql/$OLD_CLUSTER_VERSION/$OLD_CLUSTER_NAME/postgresql.conf" \
-O " -c config_file=/etc/postgresql/$NEW_CLUSTER_VERSION/$NEW_CLUSTER_NAME/postgresql.conf"

## Upgrade standby server

### Making sure new datadir is empty
if [ -z "$(ssh $STANDBY_SERVER ls -A $STANDBY_NEW_CLUSTER_DATADIR)" ]; then
echo "INFO: New cluster datadir is empty on standby server. Good."
else
echo "WARNING: New cluster datadir is NOT empty on standby server. Deleting the content of $STANDBY_NEW_CLUSTER_DATADIR on $STANDBY_SERVER now..."
ssh $STANDBY_SERVER rm -rf $STANDBY_NEW_CLUSTER_DATADIR
fi

### Save configuration files
TMPDIR=$(ssh $STANDBY_SERVER mktemp -d -t pg_upgrade_XXXX)
ssh $STANDBY_SERVER mv $STANDBY_OLD_CLUSTER_DATADIR/recovery.conf $TMPDIR

### Upgrade (via rsync)
rsync --archive --delete --hard-links --size-only --no-inc-recursive $OLD_CLUSTER_DATADIR $NEW_CLUSTER_DATADIR $STANDBY_SERVER:$(dirname $STANDBY_NEW_CLUSTER_DATADIR)

### Restore saved configured files
ssh $STANDBY_SERVER mv $TMPDIR/recovery.conf $STANDBY_NEW_CLUSTER_DATADIR/
ssh $STANDBY_SERVER rm -rf $TMPDIR

# Start the $NEW_CLUSTER_VERSION primary server
pg_ctlcluster $NEW_CLUSTER_VERSION $NEW_CLUSTER_NAME start

# Test local primary is accepting connections
psql --port=$DATABASE_PORT --dbname=$DATABASE_NAME --command="SELECT NOW();"

{% if postgres_pgbouncer_uri is defined and postgres_pgbouncer_uri %}
# Resume the connexions
psql $PGBOUNCER_PG_URI --command "RESUME $DATABASE_NAME;"
{% endif %}

# Start the $NEW_CLUSTER_VERSION standby server
ssh $STANDBY_SERVER pg_ctlcluster $NEW_CLUSTER_VERSION $STANDBY_NEW_CLUSTER_NAME start

echo "DONE. Upgrade of PG cluster from $OLD_CLUSTER_VERSION/$OLD_CLUSTER_NAME to $NEW_CLUSTER_VESION/$NEW_CLUSTER_NAME is finished! Well done!"
else
# Start the $OLD_CLUSTER_VERSION primary server
pg_ctlcluster $OLD_CLUSTER_VERSION $OLD_CLUSTER_NAME start
# Start the $OLD_CLUSTER_VERSION standby server
ssh $STANDBY_SERVER pg_ctlcluster $OLD_CLUSTER_VERSION $OLD_CLUSTER_NAME start

# Test local primary is accepting connections
psql --dbname=$DATABASE_NAME --command="SELECT NOW();"

{% if postgres_pgbouncer_uri is defined and postgres_pgbouncer_uri %}
# Resume the connexions
psql $PGBOUNCER_PG_URI --command "RESUME $DATABASE_NAME;"
{% endif %}
fi
else
echo "INFO: You didn't want to continue or was in 'dry-run' mode"
fi

# Force generate minimal optimizer statistics rapidly
sudo -u postgres /usr/lib/postgresql/$CLUSTER_NEW_VERSION/bin/vacuumdb --port=$CLUSTER_PORT --jobs=8 --all --analyze-in-stages