forked from blockchain-etl/ethereum-etl-postgres
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ethereum_bigquery_to_gcs.sh
79 lines (65 loc) · 3.5 KB
/
ethereum_bigquery_to_gcs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#set -o xtrace
usage() { echo "Usage: $0 <output_bucket>" 1>&2; exit 1; }
output_bucket=$1
if [ -z "${output_bucket}" ]; then
usage
fi
start_date=$2
end_date=$3
filter_date=false
if [ -n "${start_date}" ] && [ -n "${end_date}" ]; then
filter_date=true
fi
# The logs and contracts tables contain columns with type ARRAY<STRING>.
# BigQuery can't export it to CSV so we need to flatten it.
export_temp_dataset="export_temp_dataset"
export_temp_logs_table="flattened_logs"
export_temp_contracts_table="flattened_contracts"
bq rm -r -f ${export_temp_dataset}
bq mk ${export_temp_dataset}
# Use awk to trim comments in sql files.
flatten_crypto_ethereum_logs_sql=$(cat ./flatten_crypto_ethereum_logs.sql | awk -F '--' '{print $1}'| tr '\n' ' ')
flatten_crypto_ethereum_contracts_sql=$(cat ./flatten_crypto_ethereum_contracts.sql | awk -F '--' '{print $1}' | tr '\n' ' ')
if [ "${filter_date}" = "true" ]; then
flatten_crypto_ethereum_logs_sql="${flatten_crypto_ethereum_logs_sql} where date(block_timestamp) >= '${start_date}' and date(block_timestamp) <= '${end_date}'"
flatten_crypto_ethereum_contracts_sql="${flatten_crypto_ethereum_contracts_sql} where date(block_timestamp) >= '${start_date}' and date(block_timestamp) <= '${end_date}'"
fi
echo "Executing query ${flatten_crypto_ethereum_logs_sql}"
bq --location=US query --destination_table ${export_temp_dataset}.${export_temp_logs_table} --use_legacy_sql=false "${flatten_crypto_ethereum_logs_sql}"
echo "Executing query ${flatten_crypto_ethereum_contracts_sql}"
bq --location=US query --destination_table ${export_temp_dataset}.${export_temp_contracts_table} --use_legacy_sql=false "${flatten_crypto_ethereum_contracts_sql}"
declare -a tables=(
"bigquery-public-data:crypto_ethereum.blocks"
"bigquery-public-data:crypto_ethereum.transactions"
"bigquery-public-data:crypto_ethereum.token_transfers"
"bigquery-public-data:crypto_ethereum.traces"
"bigquery-public-data:crypto_ethereum.tokens"
"${export_temp_dataset}.${export_temp_logs_table}"
"${export_temp_dataset}.${export_temp_contracts_table}"
)
for table in "${tables[@]}"
do
echo "Exporting BigQuery table ${table}"
if [ "${filter_date}" = "true" ]; then
query="select * from \`${table//:/.}\`"
timestamp_column="block_timestamp"
if [ "${table}" = "bigquery-public-data:crypto_ethereum.blocks" ]; then
timestamp_column="timestamp"
fi
query="${query} where date(${timestamp_column}) >= '${start_date}' and date(${timestamp_column}) <= '${end_date}'"
fitered_table_name="${table//[.:-]/_}_fitered"
echo "Executing query ${query}"
bq --location=US query --destination_table "${export_temp_dataset}.${fitered_table_name}" --use_legacy_sql=false "${query}"
output_folder=${fitered_table_name}
bash bigquery_to_gcs.sh "${export_temp_dataset}.${fitered_table_name}" ${output_bucket} ${output_folder}
gsutil -m mv gs://${output_bucket}/${output_folder}/* gs://${output_bucket}/${table}/
else
output_folder=${table}
bash bigquery_to_gcs.sh ${table} ${output_bucket} ${output_folder}
fi
done
# Rename output folder for flattened tables
gsutil -m mv gs://${output_bucket}/${export_temp_dataset}.${export_temp_logs_table}/* gs://${output_bucket}/bigquery-public-data:crypto_ethereum.logs/
gsutil -m mv gs://${output_bucket}/${export_temp_dataset}.${export_temp_contracts_table}/* gs://${output_bucket}/bigquery-public-data:crypto_ethereum.contracts/
# Cleanup
bq rm -r -f ${export_temp_dataset}