From cae2d75121413e897e32f3ac8f03c2b41ef845ff Mon Sep 17 00:00:00 2001 From: Emirhan Akdeniz <73674035+kagermanov27@users.noreply.github.com> Date: Tue, 20 Jun 2023 17:33:41 +0300 Subject: [PATCH] Fix an issue born from recent change to values - Fix an issue born from recent change to values (some description are parsed as service options in SerpApi traditional results) - Bump up the version --- google-local-results-ai-parser.gemspec | 2 +- lib/google-local-results-ai-parser.rb | 44 ++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/google-local-results-ai-parser.gemspec b/google-local-results-ai-parser.gemspec index 07e5932..fc6661f 100644 --- a/google-local-results-ai-parser.gemspec +++ b/google-local-results-ai-parser.gemspec @@ -1,6 +1,6 @@ Gem::Specification.new do |spec| spec.name = "google-local-results-ai-parser" - spec.version = "0.1.4" + spec.version = "0.1.5" spec.summary = "A gem to be used with serpapi/bert-base-local-results model to predict different parts of Google Local Listings." spec.description = "A gem to be used with serpapi/bert-base-local-results model to predict different parts of Google Local Listings. This gem uses BERT model at https://huggingface.co/serpapi/bert-base-local-results in the background. For serving private servers, head to https://github.com/serpapi/google-local-results-ai-server to get more information." spec.homepage = "https://github.com/serpapi/google-local-results-ai-parser" diff --git a/lib/google-local-results-ai-parser.rb b/lib/google-local-results-ai-parser.rb index a2c674b..e5d09ab 100644 --- a/lib/google-local-results-ai-parser.rb +++ b/lib/google-local-results-ai-parser.rb @@ -98,6 +98,7 @@ def sort_results(results, extracted_text, unsplit_text, iteration, doc) results, label_order, duplicates = button_text_as_hours_confusion(results, label_order, duplicates) results, label_order, duplicates = button_text_as_address_confusion(results, label_order, duplicates) results, label_order, duplicates = button_text_as_service_options_confusion(results, label_order, duplicates) + results, label_order, duplicates = service_options_as_description_or_type_confusion(results, label_order, duplicates) # General clashes line_result = check_if_on_different_lines(results, duplicates, unsplit_text) @@ -437,6 +438,49 @@ def description_as_hours_confusion(results, label_order, duplicates) return results, label_order, duplicates end + # On-site services, Online appointments + # Fixes `On-site services`, `Online appointments` + def service_options_as_description_or_type_confusion(results, label_order, duplicates) + known_errors = ["On-site services", "On-site services not available", "Online appointments", "Online appointments not available"] + caught_results_indices = results.map.with_index {|result, index| index if known_errors.include?(result[:input])}.compact + return results, label_order, duplicates if caught_results_indices == [] + + not_service_option_duplicate = duplicates.find.with_index do |duplicate, duplicate_index| + caught_results_indices.each do |caught_index| + if results[caught_index][:result][0][0]["label"] != "service_options" + duplicate_index + end + end + end + + # Zero out the `type` or `description`, and put it to last position + caught_results_indices.each do |caught_index| + service_options_hash = results[caught_index][:result][0].find {|hash| hash["label"] == "service options" } + service_options_index = results[caught_index][:result][0].index(service_options_hash) + old_result_hash = results[caught_index][:result][0][0] + results[caught_index][:result][0][0] = {"label" => "service options", "score" => 1.0} + results[caught_index][:result][0].delete_at(service_options_index) + old_result_hash["score"] = 0.0 + results[caught_index][:result][0] << old_result_hash + end + + # Rearranging `label_order` + caught_results_indices.each {|caught_index| label_order[caught_index] = "service_options"} + + # Rearranging duplicates + not_service_option_duplicate.each do |duplicate_index| + duplicate_arr = duplicates.find{|duplicate| duplicate.include?(2)} + last_item = duplicate_arr[-1] + duplicates[duplicates.index(duplicate_arr)].delete(last_item) + end + + if (duplicate_arr = duplicates[duplicates.index(not_service_option_duplicate)]) && duplicate_arr.size == 1 + duplicates.delete(duplicate_arr) + end + + return results, label_order, duplicates + end + # Takeaway ⋅ Dine-in ... # Fixes `Takeaway` def service_options_as_type_confusion(results, label_order, duplicates)