Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

upgrade bcdata to support catalogue api updates and caching sources to file #602

Merged
merged 2 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ghcr.io/osgeo/gdal:ubuntu-small-3.8.3
FROM ghcr.io/osgeo/gdal:ubuntu-small-3.10.0

RUN apt-get update && apt-get --assume-yes upgrade \
&& apt-get -qq install -y --no-install-recommends postgresql-common \
Expand All @@ -12,13 +12,22 @@ RUN apt-get update && apt-get --assume-yes upgrade \
&& apt-get -qq install -y --no-install-recommends zip \
&& apt-get -qq install -y --no-install-recommends unzip \
&& apt-get -qq install -y --no-install-recommends parallel \
&& apt-get -qq install -y --no-install-recommends python3-dev \
&& apt-get -qq install -y --no-install-recommends python3-pip \
&& apt-get -qq install -y --no-install-recommends python3-dev \
&& apt-get -qq install -y --no-install-recommends python3-venv \
&& apt-get -qq install -y --no-install-recommends python3-psycopg2 \
&& pip3 install --upgrade numpy \
&& pip3 install bcdata==0.10.4 \
&& pip3 install scikit-image \
&& curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \
&& rm -rf /var/lib/apt/lists/*

RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \
&& unzip awscliv2.zip \
&& ./aws/install \
&& rm -rf /var/lib/apt/lists/*
&& ./aws/install

WORKDIR /home/bcfishpass

RUN python3 -m venv /opt/venv && \
/opt/venv/bin/python -m pip install -U pip && \
/opt/venv/bin/python -m pip install --no-cache-dir --upgrade numpy && \
/opt/venv/bin/python -m pip install --no-cache-dir bcdata && \
/opt/venv/bin/python -m pip install --no-cache-dir scikit-image

ENV PATH="/opt/venv/bin:$PATH"
94 changes: 94 additions & 0 deletions jobs/bcgw_sources.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
[
{
"source": "whse_fish.fiss_fish_obsrvtn_pnt_sp",
"schedule": "W"
},
{
"source": "whse_fish.fiss_obstacles_pnt_sp",
"schedule": "W"
},
{
"source": "whse_fish.pscis_assessment_svw",
"schedule": "W"
},
{
"source": "whse_fish.pscis_design_proposal_svw",
"schedule": "W"
},
{
"source": "whse_fish.pscis_habitat_confirmation_svw",
"schedule": "W"
},
{
"source": "whse_fish.pscis_remediation_svw",
"schedule": "W"
},
{
"source": "whse_admin_boundaries.clab_indian_reserves",
"schedule": "M"
},
{
"source": "whse_admin_boundaries.clab_national_parks",
"schedule": "M"
},
{
"source": "whse_basemapping.gba_local_reg_greenspaces_sp",
"schedule": "M"
},
{
"source": "whse_basemapping.gba_railway_structure_lines_sp",
"schedule": "M"
},
{
"source": "whse_basemapping.gba_railway_tracks_sp",
"schedule": "M"
},
{
"source": "whse_basemapping.gba_transmission_lines_sp",
"schedule": "M"
},
{
"source": "whse_basemapping.gns_geographical_names_sp",
"schedule": "M"
},
{
"source": "whse_environmental_monitoring.envcan_hydrometric_stn_sp",
"schedule": "M"
},
{
"source": "whse_fish.fiss_stream_sample_sites_sp",
"schedule": "M"
},
{
"source": "whse_forest_tenure.ften_range_poly_svw",
"schedule": "M"
},
{
"source": "whse_imagery_and_base_maps.mot_road_structure_sp",
"schedule": "M"
},
{
"source": "whse_legal_admin_boundaries.abms_municipalities_sp",
"schedule": "M"
},
{
"source": "whse_mineral_tenure.og_petrlm_dev_rds_pre06_pub_sp",
"schedule": "M"
},
{
"source": "whse_tantalis.ta_conservancy_areas_svw",
"schedule": "M"
},
{
"source": "whse_tantalis.ta_park_ecores_pa_svw",
"schedule": "M"
},
{
"source": "whse_forest_tenure.ften_road_section_lines_svw",
"schedule": "M"
},
{
"source": "whse_mineral_tenure.og_road_segment_permit_sp",
"schedule": "M"
}
]
30 changes: 15 additions & 15 deletions jobs/load_monthly
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,21 @@ PSQL="psql $DATABASE_URL -v ON_ERROR_STOP=1"

# bcdata loads
# ----
bcdata bc2pg -r whse_admin_boundaries.clab_indian_reserves
bcdata bc2pg -r whse_admin_boundaries.clab_national_parks
bcdata bc2pg -r whse_basemapping.gba_local_reg_greenspaces_sp
bcdata bc2pg -r whse_basemapping.gba_railway_structure_lines_sp
bcdata bc2pg -r whse_basemapping.gba_railway_tracks_sp
bcdata bc2pg -r whse_basemapping.gba_transmission_lines_sp
bcdata bc2pg -r whse_basemapping.gns_geographical_names_sp
bcdata bc2pg -r whse_environmental_monitoring.envcan_hydrometric_stn_sp
bcdata bc2pg -r whse_fish.fiss_stream_sample_sites_sp
bcdata bc2pg -r whse_forest_tenure.ften_range_poly_svw
bcdata bc2pg -r whse_imagery_and_base_maps.mot_road_structure_sp
bcdata bc2pg -r whse_legal_admin_boundaries.abms_municipalities_sp
bcdata bc2pg -r whse_mineral_tenure.og_petrlm_dev_rds_pre06_pub_sp
bcdata bc2pg -r whse_tantalis.ta_conservancy_areas_svw
bcdata bc2pg -r whse_tantalis.ta_park_ecores_pa_svw
jq -c '.[]' bcgw_sources.json | while read item; do
schedule=$(jq -r '.schedule' <<< "$item")
source=$(jq -r '.source' <<< "$item")
if [ "$schedule" == "M" ] ; then
echo "Loading $source from cache"

ogr2ogr -f PostgreSQL \
"PG:$DATABASE_URL" \
--config OGR_TRUNCATE=YES \
-append \
-nln $source \
/vsis3/bchamp/bcdata/$source.parquet \
$source
fi
done

# load DRA from bchamp bucket (with public portion of source transport_line schema)
$PSQL -c "drop table if exists bcdata.transport_line" # in case of any failed loads
Expand Down
27 changes: 17 additions & 10 deletions jobs/load_weekly
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,35 @@ PSQL="psql $DATABASE_URL -v ON_ERROR_STOP=1"
# weekly bcfishobs processing is scheduled via workflow bcfishobs repository

# bcdata loads
bcdata bc2pg -r whse_fish.fiss_fish_obsrvtn_pnt_sp --query "POINT_TYPE_CODE = 'Observation'"
bcdata bc2pg -r whse_fish.fiss_obstacles_pnt_sp
bcdata bc2pg -r whse_fish.pscis_assessment_svw
bcdata bc2pg -r whse_fish.pscis_design_proposal_svw
bcdata bc2pg -r whse_fish.pscis_habitat_confirmation_svw
bcdata bc2pg -r whse_fish.pscis_remediation_svw
bcdata bc2pg -r whse_forest_tenure.ften_road_section_lines_svw
bcdata bc2pg -r whse_mineral_tenure.og_road_segment_permit_sp
jq -c '.[]' bcgw_sources.json | while read item; do
schedule=$(jq -r '.schedule' <<< "$item")
source=$(jq -r '.source' <<< "$item")
if [ "$schedule" == "W" ] ; then
echo "Loading $source from cache"

ogr2ogr -f PostgreSQL \
"PG:$DATABASE_URL" \
--config OGR_TRUNCATE=YES \
-append \
-nln $source \
/vsis3/bchamp/bcdata/$source.parquet \
$source
fi
done

# cabd
$PSQL -c "truncate cabd.dams"
ogr2ogr -f PostgreSQL \
"PG:$DATABASE_URL" \
-append \
--config OGR_TRUNCATE=YES \
-nln cabd.dams \
"https://cabd-web.azurewebsites.net/cabd-api/features/dams?filter=province_territory_code:eq:bc&filter=use_analysis:eq:true" \
OGRGeoJSON

$PSQL -c "truncate cabd.waterfalls"
ogr2ogr -f PostgreSQL \
"PG:$DATABASE_URL" \
-append \
--config OGR_TRUNCATE=YES \
-nln cabd.waterfalls \
"https://cabd-web.azurewebsites.net/cabd-api/features/waterfalls?filter=province_territory_code:eq:bc&filter=use_analysis:eq:true" \
OGRGeoJSON
17 changes: 17 additions & 0 deletions jobs/replicate_bcgw
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash
set -euxo pipefail

# dump sources matching frequency (D/M/W/Q/A) to object storage
jq -c '.[]' bcgw_sources.json | while read item; do
schedule=$(jq -r '.schedule' <<< "$item")
source=$(jq -r '.source' <<< "$item")
if [ "$schedule" == $1 ] ; then

echo "Replicating $source to object storage"

bcdata dump $source --promote-to-multi |
ogr2ogr -f Parquet \
/vsis3/bchamp/bcdata/$source.parquet \
/vsistdin/
fi
done
Loading