From 26f9fe59e5c479d90a709ff14f9e42aab1a7adf6 Mon Sep 17 00:00:00 2001 From: mrupp Date: Wed, 29 Nov 2023 10:16:38 +0100 Subject: [PATCH] Fix issue 'too many SQL variables' While quering detailed info for entry_ids, the sqlite3 error 'sqlite3.OperationalError: too many SQL variables' could occur. - Fixed by splitting query in batches - Added a test case to test this issue. --- cpe_search.py | 33 ++++++++++++++++++++------------- test.py | 9 +++++++++ 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/cpe_search.py b/cpe_search.py index caca2a2..a33467c 100755 --- a/cpe_search.py +++ b/cpe_search.py @@ -2,6 +2,7 @@ import argparse from collections import Counter +from itertools import chain import math import os import pprint @@ -429,19 +430,25 @@ def words_in_line(words, line): all_cpe_entry_ids.append(eid) # iterate over all retrieved CPE infos and find best matching CPEs for queries - if not all_cpe_entry_ids: - iterator = [] - - param_in_str = ('?,' * len(all_cpe_entry_ids))[:-1] - if keep_data_in_memory: - db_query = 'SELECT cpe, term_frequencies, abs_term_frequency FROM cpe_entries WHERE entry_id IN (%s)' % param_in_str - cpe_infos = db_cursor.execute(db_query, all_cpe_entry_ids).fetchall() - relevant_cpe_infos = cpe_infos - iterator = relevant_cpe_infos - else: - db_query = 'SELECT cpe, term_frequencies, abs_term_frequency FROM cpe_entries WHERE entry_id IN (%s)' % param_in_str - db_cursor.execute(db_query, all_cpe_entry_ids) - iterator = db_cursor + iterator = [] + max_results_per_query = 250000 + remaining = len(all_cpe_entry_ids) + while remaining > 0: + if remaining > max_results_per_query: + count_params_in_str = max_results_per_query + else: + count_params_in_str = remaining + param_in_str = ('?,' * count_params_in_str)[:-1] + if keep_data_in_memory: + db_query = 'SELECT cpe, term_frequencies, abs_term_frequency FROM cpe_entries WHERE entry_id IN (%s)' % param_in_str + cpe_infos = db_cursor.execute(db_query, all_cpe_entry_ids[remaining-count_params_in_str:remaining]).fetchall() + relevant_cpe_infos = cpe_infos + iterator = chain(iterator, relevant_cpe_infos) + else: + db_query = 'SELECT cpe, term_frequencies, abs_term_frequency FROM cpe_entries WHERE entry_id IN (%s)' % param_in_str + db_cursor.execute(db_query, all_cpe_entry_ids[remaining-count_params_in_str:remaining]) + iterator = chain(iterator, db_cursor) + remaining -= max_results_per_query for cpe_info in iterator: cpe, cpe_tf, cpe_abs = cpe_info diff --git a/test.py b/test.py index ab03be2..98cdab0 100644 --- a/test.py +++ b/test.py @@ -86,5 +86,14 @@ def test_search_electron_1317(self): self.assertEqual(result[query][0][0], test_best_match_cpe) self.assertEqual(str(result[query][0][1]), test_best_match_score) + def test_search_blackice_agent_for_server_30(self): + self.maxDiff = None + query = 'BlackIce Agent for Server 3.0' + test_best_match_cpe = 'cpe:2.3:a:iss:blackice_agent_for_server:3.0:*:*:*:*:*:*:*' + test_best_match_score = '0.9128709291752767' + result = search_cpes(queries=[query]) + self.assertEqual(result[query][0][0], test_best_match_cpe) + self.assertEqual(str(result[query][0][1]), test_best_match_score) + if __name__ == '__main__': unittest.main()