Skip to content

Commit

Permalink
Daily process doc + new script
Browse files Browse the repository at this point in the history
  • Loading branch information
CharlesNepote committed Dec 4, 2024
1 parent e470cbe commit 2edef01
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 0 deletions.
18 changes: 18 additions & 0 deletions scripts/mirabelle/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,21 @@ Restart Datasette and check if you're running the last version: https://mirabell

Restart Datasette and check if you're running the last plugin version: https://mirabelle.openfoodfacts.org/-/plugins


## Daily process

Here are the cron jobs launched every day:
```bash
# BEWARE: '%' must be escaped when using $()
# Update table of false positive data quality issues (ie. products that can't be fixed for several reasons)
# Can be launch at any time
0 4 * * * bash /home/off/mirabelle/update_not_fixable.sh >> /home/off/mirabelle/update_not_fixable.$(date +'\%Y-\%m').log
# Database daily update. Should be launch after Open Food Facts export is ok
30 4 * * * bash /home/off/mirabelle/products_daily_update.sh >> /home/off/mirabelle/products_daily_update.$(date +'\%Y-\%m').log
# Launch data quality daily email; must be launched after the database update
0 8 * * * bash /home/off/mirabelle/distri-qual.sh --normal >> /home/off/mirabelle/distri-qual.$(date +'\%Y-\%m').log 2>&1
```

68 changes: 68 additions & 0 deletions scripts/mirabelle/update_not_fixable.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash


cd /home/off/mirabelle

# ---- Setup
<<comments
Target DB (currently we only use id and code fields).
sqlite3 dq-issues.db < <( cat <<EOF
CREATE TABLE IF NOT EXISTS false_positives (
id INTEGER PRIMARY KEY,
code TEXT NOT NULL /*,
property TEXT,
value TEXT */
);
EOF
id|code |property |value
1 |0023673947553 |producer_data_issue |Fat typo, 13 is not possible
2 |1575583805368 |producer_data_issue |Fat can't be 0 g
comments

function log {
echo "$(date +'%Y-%m-%dT%H:%M:%S') - $1"
}

log "Getting non fixable..."

# TODO: get other types of non-fixable products:
# * barcode_conflict:yes
# * barcode_clash:
# * ingredient_list:multiple:yes ?
# * nutrition_facts:multiple:yes ?
# * data_quality:product_opener_issue ?
# * wrong_barcode:yes ?
# * data_quality
#
issues=$(curl --silent https://api.folksonomy.openfoodfacts.org/products?k=producer_data_issue)
response=$?
# Exit if an error occured
[[ ${response} -ne 0 ]] && { log "Curl error: ${response}"; exit 1; }
issues=$(echo ${issues} | jq '.[].product')
#jq --raw-output '.[] | [.product, .v] | @csv'
# Exit if the API has returned nothing
[[ "${issues}" = "" ]] && { log "Found no issues. Process error? Keeping previous DB and exit..."; exit 1; }

# Delete previous file and create the new one
log "Delete previous DB and create the new one..."
rm dq-issues-non-fixable.db.bak
cp dq-issues-non-fixable.db dq-issues-non-fixable.db.bak || { log "Error: $?"; exit 1; }
rm dq-issues-non-fixable.db
touch dq-issues-non-fixable.db || { log "Error: $?"; exit 1; }

# Update false positive
sqlite3 dq-issues-non-fixable.db < <( cat <<EOF
CREATE TABLE IF NOT EXISTS non_fixable (id INTEGER PRIMARY KEY, code TEXT NOT NULL);
-- Insert
INSERT OR IGNORE INTO non_fixable (code) VALUES $(echo "${issues}" | sed 's/.*/(&)/;:l;N;s/\n\(.*\)$/, (\1)/;tl');
SELECT * FROM non_fixable;
EOF
)
[[ $? -ne "0" ]] && { log "Error: $?. Recovering backup."; mv -f dq-issues-non-fixable.db.bak dq-issues-non-fixable.db; exit 1; }

log "End of script. Normal exit with error code 0"
exit 0

0 comments on commit 2edef01

Please sign in to comment.