From f64e9e6b782abf2af0498b600787ccb1320c4df6 Mon Sep 17 00:00:00 2001 From: Albert Fougy Date: Wed, 27 Mar 2019 21:10:40 -0400 Subject: [PATCH 1/3] improved on script with aria2c --- get_latest_dumps.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/get_latest_dumps.sh b/get_latest_dumps.sh index 3044d68..93b478d 100755 --- a/get_latest_dumps.sh +++ b/get_latest_dumps.sh @@ -14,7 +14,12 @@ TEST="" echo "" > $D_TMP for f in `wget -c --user-agent="$USER_AGENT" --header="$ACCEPT" -qO- $D_URL_LIST | grep -Eio "$D_PATTERN" | sort | uniq | tail -n 4` ; do - echo $D_URL_DIR$f >> $D_TMP + echo $D_URL_DIR$f >> $D_TMP done -wget -c --user-agent="$USER_AGENT" --header="$ACCEPT" --no-clobber --input-file=$D_TMP $TEST --progress=bar +IFS=' +' + +for f in $(cat $D_TMP); do + aria2c -c "$f" +done \ No newline at end of file From e5002c0052aecb3ec157b6c49837da58f0e4728e Mon Sep 17 00:00:00 2001 From: Albert Fougy Date: Fri, 29 Mar 2019 17:12:09 -0400 Subject: [PATCH 2/3] refactored with conditional --- get_latest_dumps.sh | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/get_latest_dumps.sh b/get_latest_dumps.sh index 93b478d..daf7503 100755 --- a/get_latest_dumps.sh +++ b/get_latest_dumps.sh @@ -1,4 +1,4 @@ -#/bin/bash +#!/bin/bash #set -xv USER_AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22" @@ -8,18 +8,25 @@ D_URL_DIR="http://discogs-data.s3-us-west-2.amazonaws.com/data/"$(date +"%Y")"/" D_TMP=/tmp/discogs.urls D_PATTERN="discogs_[0-9]{8}_(artists|labels|masters|releases).xml.gz" + TEST="" [[ "$1" == '--test' ]] && TEST='--spider -S' echo "" > $D_TMP -for f in `wget -c --user-agent="$USER_AGENT" --header="$ACCEPT" -qO- $D_URL_LIST | grep -Eio "$D_PATTERN" | sort | uniq | tail -n 4` ; do +for f in $(wget -c --user-agent="$USER_AGENT" --header="$ACCEPT" \ + -qO- $D_URL_LIST | grep -Eio "$D_PATTERN" | sort | uniq | tail -n 4) ; do echo $D_URL_DIR$f >> $D_TMP done -IFS=' -' - -for f in $(cat $D_TMP); do - aria2c -c "$f" -done \ No newline at end of file +if ! type "aria2c" > /dev/null; then + wget -c --user-agent="$USER_AGENT" --header="$ACCEPT" --no-clobber \ + --input-file=$D_TMP $TEST --show-progress --progress=bar +else + IFS=' + ' + + for f in $(cat $D_TMP); do + aria2c -c "$f" + done +fi \ No newline at end of file From d1b84f880a15733725e7ccec4bdaccf66331cd70 Mon Sep 17 00:00:00 2001 From: Albert Fougy Date: Fri, 29 Mar 2019 17:16:39 -0400 Subject: [PATCH 3/3] unecessary space --- get_latest_dumps.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/get_latest_dumps.sh b/get_latest_dumps.sh index daf7503..6c2e09c 100755 --- a/get_latest_dumps.sh +++ b/get_latest_dumps.sh @@ -8,7 +8,6 @@ D_URL_DIR="http://discogs-data.s3-us-west-2.amazonaws.com/data/"$(date +"%Y")"/" D_TMP=/tmp/discogs.urls D_PATTERN="discogs_[0-9]{8}_(artists|labels|masters|releases).xml.gz" - TEST="" [[ "$1" == '--test' ]] && TEST='--spider -S'