From 0273b92aee5e01f7ae7608cc061c352862044a64 Mon Sep 17 00:00:00 2001 From: Raphael Odini Date: Tue, 31 Dec 2024 12:57:42 +0100 Subject: [PATCH 1/2] refactor(Price tags): matching script: try to match on price only --- .../match_price_tags_with_existing_prices.py | 6 +++++ open_prices/proofs/utils.py | 27 +++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/open_prices/proofs/management/commands/match_price_tags_with_existing_prices.py b/open_prices/proofs/management/commands/match_price_tags_with_existing_prices.py index 72b4dfa0..7aa01ee2 100644 --- a/open_prices/proofs/management/commands/match_price_tags_with_existing_prices.py +++ b/open_prices/proofs/management/commands/match_price_tags_with_existing_prices.py @@ -5,6 +5,7 @@ from open_prices.proofs.models import PriceTag, Proof from open_prices.proofs.utils import ( match_category_price_tag_with_category_price, + match_price_tag_with_price, match_product_price_tag_with_product_price, ) @@ -72,6 +73,11 @@ def handle(self, *args, **options) -> None: # type: ignore price_tag.status = 1 price_tag.save() break + elif match_price_tag_with_price(price_tag, price): + price_tag.price_id = price.id + price_tag.status = 1 + price_tag.save() + break self.stdout.write("=== Stats after...") stats() diff --git a/open_prices/proofs/utils.py b/open_prices/proofs/utils.py index fcf1536b..076a238a 100644 --- a/open_prices/proofs/utils.py +++ b/open_prices/proofs/utils.py @@ -254,6 +254,9 @@ def cleanup_price_tag_prediction_barcode(barcode: str) -> str: def match_product_price_tag_with_product_price( price_tag: PriceTag, price: Price ) -> bool: + """ + Match on barcode and price. + """ price_tag_prediction_data = price_tag.predictions.first().data price_tag_prediction_barcode = price_tag_prediction_data.get("barcode") price_tag_prediction_barcode = cleanup_price_tag_prediction_barcode( @@ -270,6 +273,9 @@ def match_product_price_tag_with_product_price( def match_category_price_tag_with_category_price( price_tag: PriceTag, price: Price ) -> bool: + """ + Match on product (category_tag) and price. + """ price_tag_prediction_data = price_tag.predictions.first().data price_tag_prediction_product = price_tag_prediction_data.get("product") price_tag_prediction_price = price_tag_prediction_data.get("price") @@ -278,3 +284,24 @@ def match_category_price_tag_with_category_price( and (price.product_code == price_tag_prediction_product) and match_decimal_with_float(price.price, price_tag_prediction_price) ) + + +def match_price_tag_with_price(price_tag: PriceTag, price: Price) -> bool: + """ + Match only on price. + We make sure this price is unique. + """ + price_tag_prediction_data = price_tag.predictions.first().data + price_tag_prediction_price = price_tag_prediction_data.get("price") + proof = price_tag.proof + proof_prices = list(proof.prices.values_list("price", flat=True)) + proof_price_tag_prices = [ + price_tag.predictions.first().data.get("price") + for price_tag in proof.price_tags.all() + ] + return ( + # (price_tag_prediction_data["product"] == "other") + match_decimal_with_float(price.price, price_tag_prediction_price) + and proof_prices.count(price.price) == 1 + and proof_price_tag_prices.count(price_tag_prediction_price) == 1 + ) From fc0fa2951408efe94b3df66ee0449613be6a3e24 Mon Sep 17 00:00:00 2001 From: Raphael Odini Date: Tue, 31 Dec 2024 13:16:55 +0100 Subject: [PATCH 2/2] cleanup --- .../match_price_tags_with_existing_prices.py | 7 +++++-- open_prices/proofs/utils.py | 12 +++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/open_prices/proofs/management/commands/match_price_tags_with_existing_prices.py b/open_prices/proofs/management/commands/match_price_tags_with_existing_prices.py index 7aa01ee2..34be3901 100644 --- a/open_prices/proofs/management/commands/match_price_tags_with_existing_prices.py +++ b/open_prices/proofs/management/commands/match_price_tags_with_existing_prices.py @@ -39,9 +39,10 @@ def handle(self, *args, **options) -> None: # type: ignore stats() self.stdout.write("=== Running matching script...") - for proof in Proof.objects.has_type_price_tag().prefetch_related( + proof_qs = Proof.objects.has_type_price_tag().prefetch_related( "prices", "price_tags", "price_tags__predictions" - ): + ) + for index, proof in enumerate(proof_qs): if proof.price_tags.count() == 0: continue elif proof.prices.count() == 0: @@ -78,6 +79,8 @@ def handle(self, *args, **options) -> None: # type: ignore price_tag.status = 1 price_tag.save() break + if index % 500 == 0: + self.stdout.write(f"Processed {index} proofs") self.stdout.write("=== Stats after...") stats() diff --git a/open_prices/proofs/utils.py b/open_prices/proofs/utils.py index 076a238a..549ff852 100644 --- a/open_prices/proofs/utils.py +++ b/open_prices/proofs/utils.py @@ -289,18 +289,20 @@ def match_category_price_tag_with_category_price( def match_price_tag_with_price(price_tag: PriceTag, price: Price) -> bool: """ Match only on price. - We make sure this price is unique. + We make sure this price is unique in the proof to avoid errors. """ price_tag_prediction_data = price_tag.predictions.first().data price_tag_prediction_price = price_tag_prediction_data.get("price") - proof = price_tag.proof - proof_prices = list(proof.prices.values_list("price", flat=True)) + proof_prices = list( + Price.objects.filter(proof_id=price_tag.proof_id).values_list( + "price", flat=True + ) + ) proof_price_tag_prices = [ price_tag.predictions.first().data.get("price") - for price_tag in proof.price_tags.all() + for price_tag in PriceTag.objects.filter(proof_id=price_tag.proof_id) ] return ( - # (price_tag_prediction_data["product"] == "other") match_decimal_with_float(price.price, price_tag_prediction_price) and proof_prices.count(price.price) == 1 and proof_price_tag_prices.count(price_tag_prediction_price) == 1