From 1e1ca99ddf001e7aa5a2a217e0c4c78c4c5b9f85 Mon Sep 17 00:00:00 2001
From: Charles Nepote <charles@nepote.org>
Date: Wed, 4 Dec 2024 15:07:00 +0100
Subject: [PATCH] Major update

---
 scripts/mirabelle/empty2null.sh              |  51 +++++
 scripts/mirabelle/products.schema.2024-01-18 | 210 +++++++++++++++++++
 scripts/mirabelle/products_daily_update.sh   | 156 +++++++++++---
 3 files changed, 384 insertions(+), 33 deletions(-)
 create mode 100755 scripts/mirabelle/empty2null.sh
 create mode 100644 scripts/mirabelle/products.schema.2024-01-18

diff --git a/scripts/mirabelle/empty2null.sh b/scripts/mirabelle/empty2null.sh
new file mode 100755
index 00000000..51249bbb
--- /dev/null
+++ b/scripts/mirabelle/empty2null.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# usage: ./empty2null.sh mydb.db
+
+teefile=tmp.txt
+
+function empty2null() {
+  for t in $(
+sqlite3 "$db" "SELECT name FROM sqlite_master
+WHERE type=='table' ORDER BY name" | tee ${teefile} )
+  do
+    nc=0
+    onec=''
+    mulc=''
+    where='WHERE'
+    cma="UPDATE OR ABORT \"${t}\" SET
+ "
+    for c in $( 
+printf "SELECT '\"'||name||'\"' FROM pragma_table_info('%s')
+WHERE type == 'INTEGER' or type == 'FLOAT' ORDER BY cid ASC;\\n" "$t" \
+      | sqlite3 "$db" | tee -a ${teefile} )
+    do
+      onec+=$(printf "%s %s = NULL" "$cma" "$c")
+      mulc+=$(printf "%s %s = IIF(%s=='',NULL,%s)" "$cma" "$c" "$c" "$c")
+      cma='
+,'
+      test $nc -gt 0 && where+=' OR'
+      where+=" $c==''"
+      nc=$(( nc + 1 ))
+    done
+    if [ $nc -gt 0 ]
+    then
+      if [ $nc -gt 1 ]
+      then
+        printf '%s\n  %s;\n' "$mulc" "${where}"
+      else
+        printf '%s\n  %s;\n' "$onec" "${where}"
+      fi
+    fi
+  done
+}
+
+db=$1
+
+echo "$(date +'%Y-%m-%dT%H:%M:%S') - Building query"
+sql=$(empty2null)
+echo "$sql"
+
+echo "$(date +'%Y-%m-%dT%H:%M:%S') - Converting..."
+sqlite3 "$db" "$sql"
+echo "$(date +'%Y-%m-%dT%H:%M:%S') - ... converting empty to NULL ended"
diff --git a/scripts/mirabelle/products.schema.2024-01-18 b/scripts/mirabelle/products.schema.2024-01-18
new file mode 100644
index 00000000..47162d1f
--- /dev/null
+++ b/scripts/mirabelle/products.schema.2024-01-18
@@ -0,0 +1,210 @@
+/* head -n 1 en.openfoodfacts.org.products.csv | /usr/local/bin/vd -f tsv */;
+/* head -n 1 en.openfoodfacts.org.products.csv | tr "\t" "\n" | wc -l */;
+CREATE TABLE IF NOT EXISTS "all" (
+   [code] TEXT,
+   [url] TEXT,
+   [creator] TEXT,
+   [created_t] INTEGER,
+   [created_datetime] TEXT,
+   [last_modified_t] INTEGER,
+   [last_modified_datetime] TEXT,
+   [last_modified_by] TEXT,
+   [last_updated_t] INTEGER,
+   [last_updated_datetime] TEXT,
+   [product_name] TEXT,
+   [abbreviated_product_name] TEXT,
+   [generic_name] TEXT,
+   [quantity] TEXT,
+   [packaging] TEXT,
+   [packaging_tags] TEXT,
+   [packaging_en] TEXT,
+   [packaging_text] TEXT,
+   [brands] TEXT,
+   [brands_tags] TEXT,
+   [categories] TEXT,
+   [categories_tags] TEXT,
+   [categories_en] TEXT,
+   [origins] TEXT,
+   [origins_tags] TEXT,
+   [origins_en] TEXT,
+   [manufacturing_places] TEXT,
+   [manufacturing_places_tags] TEXT,
+   [labels] TEXT,
+   [labels_tags] TEXT,
+   [labels_en] TEXT,
+   [emb_codes] TEXT,
+   [emb_codes_tags] TEXT,
+   [first_packaging_code_geo] TEXT,
+   [cities] TEXT,
+   [cities_tags] TEXT,
+   [purchase_places] TEXT,
+   [stores] TEXT,
+   [countries] TEXT,
+   [countries_tags] TEXT,
+   [countries_en] TEXT,
+   [ingredients_text] TEXT,
+   [ingredients_tags] TEXT,
+   [ingredients_analysis_tags] TEXT,
+   [allergens] TEXT,
+   [allergens_en] TEXT,
+   [traces] TEXT,
+   [traces_tags] TEXT,
+   [traces_en] TEXT,
+   [serving_size] TEXT,
+   [serving_quantity] FLOAT,
+   [no_nutrition_data] INTEGER,
+   [additives_n] INTEGER,
+   [additives] TEXT,
+   [additives_tags] TEXT,
+   [additives_en] TEXT,
+   [nutriscore_score] INTEGER,
+   [nutriscore_grade] TEXT,
+   [nova_group] INTEGER,
+   [pnns_groups_1] TEXT,
+   [pnns_groups_2] TEXT,
+   [food_groups] TEXT,
+   [food_groups_tags] TEXT,
+   [food_groups_en] TEXT,
+   [states] TEXT,
+   [states_tags] TEXT,
+   [states_en] TEXT,
+   [brand_owner] TEXT,
+   [ecoscore_score] FLOAT,
+   [ecoscore_grade] TEXT,
+   [nutrient_levels_tags] TEXT,
+   [product_quantity] TEXT,
+   [owner] TEXT,
+   [data_quality_errors_tags] TEXT,
+   [unique_scans_n] INTEGER,
+   [popularity_tags] TEXT,
+   [completeness] FLOAT,
+   [last_image_t] INTEGER,
+   [last_image_datetime] TEXT,
+   [main_category] TEXT,
+   [main_category_en] TEXT,
+   [image_url] TEXT,
+   [image_small_url] TEXT,
+   [image_ingredients_url] TEXT,
+   [image_ingredients_small_url] TEXT,
+   [image_nutrition_url] TEXT,
+   [image_nutrition_small_url] TEXT,
+   [energy-kj_100g] FLOAT,
+   [energy-kcal_100g] FLOAT,
+   [energy_100g] FLOAT,
+   [energy-from-fat_100g] FLOAT,
+   [fat_100g] FLOAT,
+   [saturated-fat_100g] FLOAT,
+   [butyric-acid_100g] FLOAT,
+   [caproic-acid_100g] FLOAT,
+   [caprylic-acid_100g] FLOAT,
+   [capric-acid_100g] FLOAT,
+   [lauric-acid_100g] FLOAT,
+   [myristic-acid_100g] FLOAT,
+   [palmitic-acid_100g] FLOAT,
+   [stearic-acid_100g] FLOAT,
+   [arachidic-acid_100g] FLOAT,
+   [behenic-acid_100g] FLOAT,
+   [lignoceric-acid_100g] FLOAT,
+   [cerotic-acid_100g] FLOAT,
+   [montanic-acid_100g] FLOAT,
+   [melissic-acid_100g] FLOAT,
+   [unsaturated-fat_100g] FLOAT,
+   [monounsaturated-fat_100g] FLOAT,
+   [omega-9-fat_100g] FLOAT,
+   [polyunsaturated-fat_100g] FLOAT,
+   [omega-3-fat_100g] FLOAT,
+   [omega-6-fat_100g] FLOAT,
+   [alpha-linolenic-acid_100g] FLOAT,
+   [eicosapentaenoic-acid_100g] FLOAT,
+   [docosahexaenoic-acid_100g] FLOAT,
+   [linoleic-acid_100g] FLOAT,
+   [arachidonic-acid_100g] FLOAT,
+   [gamma-linolenic-acid_100g] FLOAT,
+   [dihomo-gamma-linolenic-acid_100g] FLOAT,
+   [oleic-acid_100g] FLOAT,
+   [elaidic-acid_100g] FLOAT,
+   [gondoic-acid_100g] FLOAT,
+   [mead-acid_100g] FLOAT,
+   [erucic-acid_100g] FLOAT,
+   [nervonic-acid_100g] FLOAT,
+   [trans-fat_100g] FLOAT,
+   [cholesterol_100g] FLOAT,
+   [carbohydrates_100g] FLOAT,
+   [sugars_100g] FLOAT,
+   [added-sugars_100g] FLOAT,
+   [sucrose_100g] FLOAT,
+   [glucose_100g] FLOAT,
+   [fructose_100g] FLOAT,
+   [lactose_100g] FLOAT,
+   [maltose_100g] FLOAT,
+   [maltodextrins_100g] FLOAT,
+   [starch_100g] FLOAT,
+   [polyols_100g] FLOAT,
+   [erythritol_100g] FLOAT,
+   [fiber_100g] FLOAT,
+   [soluble-fiber_100g] FLOAT,
+   [insoluble-fiber_100g] FLOAT,
+   [proteins_100g] FLOAT,
+   [casein_100g] FLOAT,
+   [serum-proteins_100g] FLOAT,
+   [nucleotides_100g] FLOAT,
+   [salt_100g] FLOAT,
+   [added-salt_100g] FLOAT,
+   [sodium_100g] FLOAT,
+   [alcohol_100g] FLOAT,
+   [vitamin-a_100g] FLOAT,
+   [beta-carotene_100g] FLOAT,
+   [vitamin-d_100g] FLOAT,
+   [vitamin-e_100g] FLOAT,
+   [vitamin-k_100g] FLOAT,
+   [vitamin-c_100g] FLOAT,
+   [vitamin-b1_100g] FLOAT,
+   [vitamin-b2_100g] FLOAT,
+   [vitamin-pp_100g] FLOAT,
+   [vitamin-b6_100g] FLOAT,
+   [vitamin-b9_100g] FLOAT,
+   [folates_100g] FLOAT,
+   [vitamin-b12_100g] FLOAT,
+   [biotin_100g] FLOAT,
+   [pantothenic-acid_100g] FLOAT,
+   [silica_100g] FLOAT,
+   [bicarbonate_100g] FLOAT,
+   [potassium_100g] FLOAT,
+   [chloride_100g] FLOAT,
+   [calcium_100g] FLOAT,
+   [phosphorus_100g] FLOAT,
+   [iron_100g] FLOAT,
+   [magnesium_100g] FLOAT,
+   [zinc_100g] FLOAT,
+   [copper_100g] FLOAT,
+   [manganese_100g] FLOAT,
+   [fluoride_100g] FLOAT,
+   [selenium_100g] FLOAT,
+   [chromium_100g] FLOAT,
+   [molybdenum_100g] FLOAT,
+   [iodine_100g] FLOAT,
+   [caffeine_100g] FLOAT,
+   [taurine_100g] FLOAT,
+   [ph_100g] FLOAT,
+   [fruits-vegetables-nuts_100g] FLOAT,
+   [fruits-vegetables-nuts-dried_100g] FLOAT,
+   [fruits-vegetables-nuts-estimate_100g] FLOAT,
+   [fruits-vegetables-nuts-estimate-from-ingredients_100g] FLOAT,
+   [collagen-meat-protein-ratio_100g] FLOAT,
+   [cocoa_100g] FLOAT,
+   [chlorophyl_100g] FLOAT,
+   [carbon-footprint_100g] FLOAT,
+   [carbon-footprint-from-meat-or-fish_100g] FLOAT,
+   [nutrition-score-fr_100g] FLOAT,
+   [nutrition-score-uk_100g] FLOAT,
+   [glycemic-index_100g] FLOAT,
+   [water-hardness_100g] FLOAT,
+   [choline_100g] FLOAT,
+   [phylloquinone_100g] FLOAT,
+   [beta-glucan_100g] FLOAT,
+   [inositol_100g] FLOAT,
+   [carnitine_100g] FLOAT,
+   [sulphate_100g] FLOAT,
+   [nitrate_100g] FLOAT,
+   [acidity_100g] FLOAT
+);
diff --git a/scripts/mirabelle/products_daily_update.sh b/scripts/mirabelle/products_daily_update.sh
index 277f7b83..5c85a8e9 100644
--- a/scripts/mirabelle/products_daily_update.sh
+++ b/scripts/mirabelle/products_daily_update.sh
@@ -1,13 +1,17 @@
 #!/bin/bash
 path=/home/off/mirabelle
-cd $path
+cd $path || { echo "$(date +'%Y-%m-%dT%H:%M:%S') - Can't cd ${path}"; exit 1; }
 mode=""
 file=""
+stats=""
+TODAY=$(date "+%Y-%m-%d")
+#csv="en.openfoodfacts.org.products.csv"
+url="https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv"
 
 
-# Read commandline arguments. Try -h to get usage.
-usage() { echo "$0 usage:" && grep " .)\ #" $0; exit 0; }
-while getopts ":hf:i" arg; do
+# Read commanline arguments. Try -h to get usage.
+usage() { echo "$0 usage:" && grep " .)\ #" "$0"; exit 0; }
+while getopts ":hf:in" arg; do
   case $arg in
     f) # Specify a filename.
       file=${OPTARG}
@@ -16,6 +20,9 @@ while getopts ":hf:i" arg; do
     i) # Specify interactive mode.
       mode="i"
       ;;
+    n) # don't run stats.
+      stats="s"
+      ;;
     h | *) # Display help.
       usage
       exit 0
@@ -24,7 +31,6 @@ while getopts ":hf:i" arg; do
 done
 
 
-TODAY=`date "+%Y-%m-%d"`
 export PATH="/usr/local/bin:$PATH"
 
 [[ $mode == "i" ]] && read -p "Press [Enter] key to begin the script in interactive mode."
@@ -37,72 +43,154 @@ echo "$(date +'%Y-%m-%dT%H:%M:%S') - Old CSV weights $old_csv bytes for $old_csv
 
 
 # Choose CSV depending on the command line -f argument.
-if [[ ${file} == "" ]]; then
-  if [[ `date -r en.openfoodfacts.org.products.csv "+%Y-%m-%d"` == ${TODAY} ]]; then
+if [[ "${file}" == "" ]]; then
+  # Don't download the file again if it has already been done today.
+  if [[ $(date -r en.openfoodfacts.org.products.csv "+%Y-%m-%d") == "${TODAY}" ]]; then
     echo "$(date +'%Y-%m-%dT%H:%M:%S') - CSV file has already been downloaded today. Copying it..."
     cp en.openfoodfacts.org.products.csv newdata.csv
-  else
-    [[ $mode == "i" ]] && read -p "Press [Enter] key to download CSV..."
+  else # Else download the file
+    [[ "$mode" == "i" ]] && read -p "Press [Enter] key to download CSV..."
     echo "$(date +'%Y-%m-%dT%H:%M:%S') - Downloading CSV..."
-    wget -c https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv -O newdata.csv
-    [[ $? -ne 0 ]] && { echo "Download failed, error $?"; exit 1; }
-    printf "ls\n$(ls -la newdata.csv)\n\n"
+
+    # Get the date of the file to be downloaded. Retry 24 times until the date is not corresponding to today.
+    export_date=$(curl -L -s -v -X HEAD https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv 2>&1 | grep -ioP '.*Last-Modified: \K(.*)$' | tr -d '\r')
+    # We don't use .gz file because it is generated dozens of minutes later
+    counter=1
+    # Trying to download the file. Attention: sometimes curl command does not return anything.
+    while [[ ${export_date} == "" || "$(date -d "${export_date}" +%Y-%m-%d)" < "${TODAY}" ]]; do
+        # Exit after 60 tries.
+        [[ "${counter}" -gt 60 ]] && { echo "$(date +'%Y-%m-%dT%H:%M:%S') - Retried ${counter} times, but en.openfoodfacts.org.products.csv is from ${export_date} and not ${TODAY}. Now exit..." | tee >(cat >&2); exit 1; }
+        ((counter++))
+        echo "$(date +'%Y-%m-%dT%H:%M:%S') - CSV export is from ${export_date} and not today (${TODAY}). Retrying ${counter} in 10 minutes"
+        sleep 10m
+        export_date=$(curl -L -s -v -X HEAD https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv 2>&1 | grep -ioP '.*Last-Modified: \K(.*)$' | tr -d '\r')
+    done
+
+    echo "$(date +'%Y-%m-%dT%H:%M:%S') - Remote CSV is from ${export_date}. Downloading CSV..."
+    #wget -O newdata.csv \        # name of the ouput file
+    #     --quiet \               # Turn off Wget's output.
+    #     --server-response \     # Print the headers sent by HTTP servers and responses sent by FTP servers.
+    #     --continue \            # Compression does not work with --continue or --start-pos, they will be disabled.
+    #     https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv
+    wget -O newdata.csv \
+         --quiet \
+         \ #--server-response \
+         --compression=gzip \
+         \ #--header="accept-encoding: gzip" \
+         https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv
+    echo -e "ls -la newdata.csv\n$(ls -la newdata.csv)\n"
+    # compare local and remote size to be sure download is ok
+    #source_size=$(wget --spider "${url}" 2>&1 | awk '/Length/ {print $2}')
+    #source_size=$(curl -s -v -X HEAD "${url}" 2>&1 | grep -ioP '.*content-length: \K(.*)$' | tr -d '\r')
+    #downloaded_size=$(wc -c < "newdata.csv")
+    #[[ ${source_size} -ne ${downloaded_size} ]] && { echo "Source size (${source_size}) is different from downloaded file size (${downloaded_size})..."; exit 1; } \
+    #    || echo "Source size (${source_size}) is equal to downloaded file size (${downloaded_size})..."
   fi
 else
   [[ $mode == "i" ]] && read -p "Press [Enter] key to use ${file} as source of the new DB."
   # Use -f filename as source file. Exit if the file can't be copied.
   echo "$(date +'%Y-%m-%dT%H:%M:%S') - Use -f filename (${file}) as source file."
-  cp ${file} newdata.csv || { echo "$(date +'%Y-%m-%dT%H:%M:%S') - cp error"; exit 1; }
+  cp "${file}" newdata.csv || { echo "$(date +'%Y-%m-%dT%H:%M:%S') - cp error"; exit 1; }
 fi
 
 
+# TODO: verify number of fields is not different from current database
+# nb_of_fields=$(head -n 1 newdata.csv | tr "\t" "\n" | wc -l)
+
 # Display information on source CSV
 [[ $mode == "i" ]] && read -p "Press [Enter] key to compare CSV from today and yesterday..."
 new_csv=$(wc -c newdata.csv | awk '{print $1}')
 new_csv_lines=$(wc -l newdata.csv | awk '{print $1}')
 echo "$(date +'%Y-%m-%dT%H:%M:%S') - New CSV weights $new_csv bytes for $new_csv_lines lines"
+echo "$(date +'%Y-%m-%dT%H:%M:%S') - Old CSV size $old_csv_lines"
 
 
 # Create a temporary DB if today's CSV is bigger than yesterday
-if [[ "$new_csv" -ge "$old_csv" ]]; then
-  [[ $mode == "i" ]] && read -p "Press [Enter] key to create new db..."
+# TODO: better test new CSV (find newest product?)
+if (( "$new_csv_lines" > "$old_csv_lines"-5000 )); then
+  [[ "$mode" == "i" ]] && read -p "Press [Enter] key to create new db..."
   mv -f en.openfoodfacts.org.products.csv en.openfoodfacts.org.products.csv.bak
   mv newdata.csv en.openfoodfacts.org.products.csv
   echo "$(date +'%Y-%m-%dT%H:%M:%S') - Creating new DB..."
   # Create DB and import CSV data
-  time sqlite3 products_new.db <<EOS
+  [[ -f "products_new.db" ]] && rm products_new.db # Delete the file before or data will be added to the existing one.
+  sqlite3 products_new.db <<EOS
 /* Optimisations. See: https://avi.im/blag/2021/fast-sqlite-inserts/ */;
 PRAGMA journal_mode=OFF;
 PRAGMA synchronous=0;
 PRAGMA locking_mode=EXCLUSIVE;
 PRAGMA temp_store=MEMORY;
 PRAGMA page_size = 32768;
-$(curl https://gist.githubusercontent.com/CharlesNepote/80fb813a416ad445fdd6e4738b4c8156/raw/3b029183cb28dd410f0ef8748f06c9174b2518a8/create_from_new_csv.sql)
+$(cat products.schema)
 .mode ascii
 .separator "\t" "\n"
 .import --skip 1 en.openfoodfacts.org.products.csv all
-$(curl https://gist.githubusercontent.com/CharlesNepote/80fb813a416ad445fdd6e4738b4c8156/raw/ff008e945ebb1379d713097358fd1284d4b71831/index_new_csv.sql)
-CREATE VIEW simplified AS select rowid, code, url, creator, created_datetime, last_modified_datetime, product_name, generic_name, quantity, packaging_en, packaging_text, brands, categories, categories_en, origins_en, manufacturing_places, manufacturing_places_tags, labels, labels_en, emb_codes, emb_codes_tags, first_packaging_code_geo, cities, cities_tags, purchase_places, stores, countries_en, ingredients_text, ingredients_tags, allergens_en, traces_en, serving_size, serving_quantity, no_nutriments, additives_n, additives, additives_en, nutriscore_score, nutriscore_grade, nova_group, pnns_groups_1, pnns_groups_2, food_groups_en, states_en, brand_owner, ecoscore_score, ecoscore_grade, main_category_en, image_url, image_ingredients_url, image_nutrition_url, [energy-kj_100g], [energy-kcal_100g], energy_100g, [energy-from-fat_100g], fat_100g, [saturated-fat_100g], carbohydrates_100g, sugars_100g, fiber_100g, proteins_100g, salt_100g, sodium_100g, alcohol_100g, [fruits-vegetables-nuts_100g], [fruits-vegetables-nuts-dried_100g], [fruits-vegetables-nuts-estimate_100g], [fruits-vegetables-nuts-estimate-from-ingredients_100g], [nutrition-score-fr_100g], [nutrition-score-uk_100g] from [all];
+SELECT strftime('%Y-%m-%dT%H:%M:%S', 'now') || " - Creating index...";
+CREATE INDEX ["all_code"] ON [all]("code");
+CREATE INDEX ["all_creator"] ON [all]("creator");
+CREATE INDEX ["all_created_datetime"] ON [all]("created_datetime");
+CREATE INDEX ["all_last_modified_datetime"] ON [all]("last_modified_datetime");
+CREATE INDEX ["all_last_modified_by"] ON [all]("last_modified_by");
+CREATE INDEX ["all_countries_en"] ON [all]("countries_en");
+CREATE INDEX ["all_ingredients_tags"] ON [all]("ingredients_tags");
+CREATE INDEX ["all_brands"] ON [all]("brands");
+CREATE INDEX ["all_main_category_en"] ON [all]("main_category_en");
+CREATE INDEX ["all_nutriscore_grade"] ON [all]("nutriscore_grade");
+CREATE INDEX ["all_nova_group"] ON [all]("nova_group");
+CREATE INDEX ["all_states_tags"] ON [all]("states_tags");
+CREATE INDEX ["all_ecoscore_grade"] ON [all]("ecoscore_grade");
+CREATE INDEX ["all_data_quality_errors_tags"] ON [all]("data_quality_errors_tags");
+CREATE INDEX ["all_unique_scans_n"] ON [all]("unique_scans_n");
+CREATE INDEX ["all_popularity_tags"] ON [all]("popularity_tags");
+CREATE INDEX ["all_completeness"] ON [all]("completeness");
+CREATE INDEX ["all_last_image_datetime"] ON [all]("last_image_datetime");
+CREATE INDEX ["all_no_nutrition_data"] ON [all]("no_nutrition_data");
+CREATE INDEX ["all_energy_100g"] ON [all]("energy_100g");
+CREATE INDEX ["all_fat_100g"] ON [all]("fat_100g");
+CREATE INDEX ["all_saturated-fat_100g"] ON [all]("saturated-fat_100g");
+CREATE INDEX ["all_carbohydrates_100g"] ON [all]("carbohydrates_100g");
+CREATE INDEX ["all_proteins"] ON [all]("proteins_100g");
+CREATE VIEW simplified AS select rowid, code, url, creator, created_datetime, last_modified_datetime, last_modified_by,
+  product_name, generic_name, quantity, packaging_en, packaging_text, brands, categories, categories_en,
+  origins_en, manufacturing_places, manufacturing_places_tags, labels, labels_en, emb_codes, emb_codes_tags,
+  first_packaging_code_geo, cities, cities_tags, purchase_places, stores, countries_en, ingredients_text,
+  ingredients_tags, allergens_en, traces_en, serving_size, serving_quantity, no_nutrition_data, additives_n,
+  additives, additives_en, nutriscore_score, nutriscore_grade, nova_group, pnns_groups_1, pnns_groups_2,
+  food_groups_en, states_en, brand_owner, ecoscore_score, ecoscore_grade, main_category_en, image_url,
+  image_ingredients_url, image_nutrition_url, [energy-kj_100g], [energy-kcal_100g], energy_100g,
+  [energy-from-fat_100g], fat_100g, [saturated-fat_100g], carbohydrates_100g, sugars_100g, fiber_100g,
+  proteins_100g, salt_100g, sodium_100g, alcohol_100g, [fruits-vegetables-nuts_100g], [fruits-vegetables-nuts-dried_100g],
+  [fruits-vegetables-nuts-estimate_100g], [fruits-vegetables-nuts-estimate-from-ingredients_100g],
+  [nutrition-score-fr_100g], [nutrition-score-uk_100g] from [all];
 EOS
 # real    3m2.761s
 else
-  echo "$(date +'%Y-%m-%dT%H:%M:%S') - CSV error?"
+  echo "$(date +'%Y-%m-%dT%H:%M:%S') - CSV error? - Exiting..."
   exit 1
 fi
 
 
+# https://www.sqlite.org/forum/info/6351a2cba50fc0aa
+# The import previously made by the .import command does not manage empty values, even if we declare DEFAULT NULL
+# for each integer and each float. Not having null is annoying for sorting or comparing values.
+# So we have to deal with this manually.
+# There's a proposed change to allow null values on csv import: https://sqlite.org/forum/forumpost/35dea9db69
+# See also: https://sqlite.org/forum/forumpost/6351a2cba50fc0aa
+
 # Converting empty to NULL for columns which are either FLOAT or INTEGER
 echo "$(date +'%Y-%m-%dT%H:%M:%S') - Converting empty to NULL"
-time sqlite3 products_new.db ".schema all" | \
-  sed -nr 's/.*\[(.*)\] (INTEGER|FLOAT).*/\1/gp' | \
-  xargs -I % sqlite3 products_new.db -cmd \
-    "SELECT 'Convert empty to NULL for [%]';" \
-    "PRAGMA journal_mode=OFF;" \
-    "PRAGMA synchronous=0;" \
-    "PRAGMA locking_mode=EXCLUSIVE;" \
-    "PRAGMA temp_store=MEMORY;" \
-    "PRAGMA page_size = 32768;" \
-    "UPDATE [all] SET [%] = NULL WHERE [%] = '';"
+$path/empty2null.sh products_new.db
+#sqlite3 products_new.db ".schema all" | \
+#  sed -nr 's/.*\[(.*)\] (INTEGER|FLOAT).*/\1/gp' | \
+#  xargs -I % sqlite3 products_new.db -cmd \
+#    "SELECT 'Convert empty to NULL for [%]';" \
+#    "PRAGMA journal_mode=OFF;" \
+#    "PRAGMA synchronous=0;" \
+#    "PRAGMA locking_mode=EXCLUSIVE;" \
+#    "PRAGMA temp_store=MEMORY;" \
+#    "PRAGMA page_size = 32768;" \
+#    "UPDATE [all] SET [%] = NULL WHERE [%] = '';"
+printf "\n\n\n"
 
 
 # If the new DB contains less than 2,500,000 products, there is probably an issue => exit
@@ -111,13 +199,15 @@ TODAY_DB=$(sqlite3 products_new.db "select count(code) from [all];")
 
 
 # Backup the old DB and replace it by the new one
-echo "$(date +'%Y-%m-%dT%H:%M:%S')  - Backup the old DB and replace it by the new one"
+echo "$(date +'%Y-%m-%dT%H:%M:%S') - Backup the old DB and replace it by the new one"
 mv products.db previous.db
 mv products_new.db products.db
+printf "\n\n"
 
 
 # Launch script to build data quality stats.
-time $path/data-quality.sh
+[[ ${stats} != "s" ]] && echo "$(date +'%Y-%m-%dT%H:%M:%S') - Build data quality stats" || echo "$(date +'%Y-%m-%dT%H:%M:%S') - Don't build data quality stats"
+[[ ${stats} != "s" ]] && $path/data-quality.sh off-stats.db
 
 
 # Restart mirabelle server
@@ -135,4 +225,4 @@ sudo $path/clear_cache.sh "products"
 # curl ........;
 
 
-echo "$(date +'%Y-%m-%dT%H:%M:%S') - END of script"
+echo -e "$(date +'%Y-%m-%dT%H:%M:%S') - END of script\n"