From 21bd4f22c36a6b5861e316e0df02fc1dbec71f87 Mon Sep 17 00:00:00 2001 From: Tom Wiesing Date: Fri, 10 Jul 2020 11:50:33 +0200 Subject: [PATCH] Add a new backup script This commit adds a backup script to backup all instances regularly. Fixes #11. --- README.md | 27 ++++++++++++++++ distillery/.env.sample | 3 ++ distillery/backup.sh | 62 ++++++++++++++++++++++++++++++++++++ distillery/lib/10_config.sh | 11 +++++++ distillery/lib/20_sql.sh | 11 ++++++- distillery/system_install.sh | 2 ++ misc/dump_graphdb.sh | 16 ---------- 7 files changed, 115 insertions(+), 17 deletions(-) create mode 100644 distillery/backup.sh delete mode 100644 misc/dump_graphdb.sh diff --git a/README.md b/README.md index d0c5617..3828bdc 100644 --- a/README.md +++ b/README.md @@ -204,6 +204,33 @@ To list all instances, the following command can be used: sudo bash /distillery/ls.sh ``` +## Backups -- 'backup.sh' + +This project comes with a backup script. +To make a backup, run: + +```bash +sudo bash /distillery/backup.sh +``` + +Backups are stored in the `backups/final` directory. +They contain: +- a filesystem backup of all instances +- a complete backup of the SQL database +- nquads of all the GraphDB repositories +- a backup of the config file + +Files are `.tar.gz`ipped. +By default, backups are kept for up to thirty days, after which they are removed. + +This script does not automatically provision a cronjob. +An example job to e.g. run a backup every saturday at 9:00 am is: + +``` +MAILTO="some-admin-email@example.com" +0 9 * * 6 /bin/bash /distillery/backup.sh +``` + ## License This project and associated files in this repository are licensed as follows: diff --git a/distillery/.env.sample b/distillery/.env.sample index cff7479..a28014b 100644 --- a/distillery/.env.sample +++ b/distillery/.env.sample @@ -16,6 +16,9 @@ SELF_REDIRECT= # This email address can be configured here. CERTBOT_EMAIL= +# The maximum age (in days) for backups to be kept. +# Backups older than this will be removed when a new backup is made. +MAX_BACKUP_AGE=30 # Each Drupal instance requires a corresponding system user, database users and databases. diff --git a/distillery/backup.sh b/distillery/backup.sh new file mode 100644 index 0000000..4c53323 --- /dev/null +++ b/distillery/backup.sh @@ -0,0 +1,62 @@ +#!/bin/bash +set -e + +# read the lib/shared.sh and read the slug argument. +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +cd "$DIR" +source "$DIR/lib/lib.sh" + + +log_info " => Starting backup process. This might take a while. " +wait_for_sql + +BACKUP_SLUG="$(date +%Y%m%dT%H%M%S)-$(randompw)" +BACKUP_INSTANCE_DIR="$DEPLOY_BACKUP_INPROGRESS_DIR/$BACKUP_SLUG" +BACKUP_FINAL_FILE="$DEPLOY_BACKUP_FINAL_DIR/$BACKUP_SLUG.tar.gz" + +BACKUP_SQL_FILE="$BACKUP_INSTANCE_DIR/backup.sql" + +BACKUP_TRIPLESTORE_DIR="$BACKUP_INSTANCE_DIR/triplestore" +BACKUP_TRIPLESTORE_SYSTEM="$BACKUP_TRIPLESTORE_DIR/system.nq" + +BACKUP_FILESYSTEM_DIR="$BACKUP_INSTANCE_DIR/instances" + +# create the backup directories +log_info " => Making '$BACKUP_INSTANCE_DIR'" +mkdir -p "$BACKUP_INSTANCE_DIR" +mkdir -p "$DEPLOY_BACKUP_FINAL_DIR" + +# backup the configuration +cp "$CONFIG_FILE" "$BACKUP_INSTANCE_DIR/.env" + +# Backup the sql backup +log_info " => Backing up the SQL database" +dockerized_mysqldump --all-databases > "$BACKUP_SQL_FILE" + +# Backup the triplestore system +log_info " => Backing up Triplestore System" +mkdir -p "$BACKUP_TRIPLESTORE_DIR" +curl -X GET -H "Accept:application/n-quads" "http://localhost:7200/repositories/SYSTEM/statements?infer=false" > "$BACKUP_TRIPLESTORE_SYSTEM" + +# backup individual repos +for REPO in `grep -oP '(?<=#repositoryID> ")[^"]+' $BACKUP_TRIPLESTORE_SYSTEM`; do + log_info " => Backing up Triplestore Repository '$REPO'" + curl -X GET -H "Accept:application/n-quads" "http://localhost:7200/repositories/$REPO/statements?infer=false" > "$BACKUP_TRIPLESTORE_DIR/repo_$REPO.nq" +done + +# backup the filesystem +log_info " => Backing up instance filesystem" +cp -rpT "$DEPLOY_INSTANCES_DIR" "$BACKUP_FILESYSTEM_DIR" + +# Package the backup into a .tar.gz +log_info " => Packaging '$BACKUP_FINAL_FILE'" +pushd "$BACKUP_INSTANCE_DIR" > /dev/null +tar --totals --checkpoint=10000 -zcf "$BACKUP_FINAL_FILE" . +popd > /dev/null + +# Clean up the unpacked backup +log_info " => Cleaning up '$BACKUP_INSTANCE_DIR'" +rm -rf "$BACKUP_INSTANCE_DIR" + +log_info " => Removing backups older than $MAX_BACKUP_AGE days" +find "$DEPLOY_BACKUP_FINAL_DIR" -type f -mtime "+$MAX_BACKUP_AGE" -print -exec rm -f {} \; \ No newline at end of file diff --git a/distillery/lib/10_config.sh b/distillery/lib/10_config.sh index 4cd02bd..85f4d9e 100644 --- a/distillery/lib/10_config.sh +++ b/distillery/lib/10_config.sh @@ -159,6 +159,13 @@ if ! is_valid_number "$PASSWORD_LENGTH"; then exit 1; fi +# The 'MAX_BACKUP_AGE' variable must be a valid number. +if ! is_valid_number "$MAX_BACKUP_AGE"; then + log_error "Variable 'MAX_BACKUP_AGE' is missing or not a valid number. "; + log_info "Please verify that it is set correctly in '.env'. "; + exit 1; +fi + # The 'CERTBOT_EMAIL' variable should either be empty or a valid email if [ -n "$SELF_REDIRECT" ]; then if ! is_valid_https_url "$SELF_REDIRECT"; then @@ -178,5 +185,9 @@ DEPLOY_TRIPLESTORE_DIR="$DEPLOY_ROOT/core/triplestore" DEPLOY_SQL_DIR="$DEPLOY_ROOT/core/sql" DEPLOY_INSTANCES_DIR="$DEPLOY_ROOT/instances" +DEPLOY_BACKUP_DIR="$DEPLOY_ROOT/backups" +DEPLOY_BACKUP_INPROGRESS_DIR="$DEPLOY_BACKUP_DIR/inprogress" +DEPLOY_BACKUP_FINAL_DIR="$DEPLOY_BACKUP_DIR/final" + log_ok "Read and validated configuration file. " \ No newline at end of file diff --git a/distillery/lib/20_sql.sh b/distillery/lib/20_sql.sh index 086092e..30c1a8b 100644 --- a/distillery/lib/20_sql.sh +++ b/distillery/lib/20_sql.sh @@ -41,7 +41,7 @@ function dockerized_mysql() { return $retval } -# 'dockerized_mysql' runs an sql command in the sql docker container interactively +# 'dockerized_mysql_interactive' runs an sql command in the sql docker container interactively function dockerized_mysql_interactive() { pushd "$DEPLOY_SQL_DIR" > /dev/null docker exec -ti `docker-compose ps -q sql` mysql "$@" @@ -50,6 +50,15 @@ function dockerized_mysql_interactive() { return $retval } +# 'dockerized_mysqldump' runs a mysqldump command +function dockerized_mysqldump() { + pushd "$DEPLOY_SQL_DIR" > /dev/null + docker exec -i `docker-compose ps -q sql` mysqldump "$@" + retval=$? + popd > /dev/null + return $retval +} + ### ### Bookkeeping sql ### diff --git a/distillery/system_install.sh b/distillery/system_install.sh index 46cd020..d361a5f 100755 --- a/distillery/system_install.sh +++ b/distillery/system_install.sh @@ -51,6 +51,8 @@ mkdir -p "$DEPLOY_WEB_DIR" mkdir -p "$DEPLOY_SELF_DIR" mkdir -p "$DEPLOY_TRIPLESTORE_DIR" mkdir -p "$DEPLOY_SQL_DIR" +mkdir -p "$DEPLOY_BACKUP_INPROGRESS_DIR" +mkdir -p "$DEPLOY_BACKUP_FINAL_DIR" log_info "=> Creating 'distillery' network" docker network create distillery || true diff --git a/misc/dump_graphdb.sh b/misc/dump_graphdb.sh deleted file mode 100644 index e242d8a..0000000 --- a/misc/dump_graphdb.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -DATE=`date +%Y%m%dT%H%M%S` -mkdir $DATE - -mkdir $DATE/graphdb - -curl -X GET -H "Accept:application/n-quads" "http://localhost:7200/repositories/SYSTEM/statements?infer=false" > "$DATE/graphdb/SYSTEM.nq" - -for REPO in `grep -oP '(?<=#repositoryID> ")[^"]+' $DATE/graphdb/SYSTEM.nq`; do - echo "dumping $REPO ..." - curl -X GET -H "Accept:application/n-quads" "http://localhost:7200/repositories/$REPO/statements?infer=false" > "$DATE/graphdb/${REPO}.nq" -done - -tar cfz "$DATE.tgz" "$DATE/" -rm -r "$DATE" \ No newline at end of file