diff --git a/assets/rules_prevalence.pickle b/assets/rules_prevalence.pickle deleted file mode 100644 index 7a29e925d..000000000 Binary files a/assets/rules_prevalence.pickle and /dev/null differ diff --git a/capa/render/default.py b/capa/render/default.py index 259b7ff7a..ac2c2eef5 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -6,8 +6,9 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -import pickle +import json import collections +from pathlib import Path import tabulate @@ -84,12 +85,16 @@ def render_capabilities(doc: rd.ResultDocument, ostream: StringIO): +-------------------------------------------------------+-------------------------------------------------+ """ - def load_rules_prevalence(filename: str) -> dict: - with open(filename, "rb") as pickle_file: - return pickle.load(pickle_file) + def load_rules_prevalence(file: Path) -> dict: + try: + return json.load(file.open("r")) + except FileNotFoundError: + raise FileNotFoundError(f"File '{file}' not found.") + except Exception as e: + raise RuntimeError(f"An error occurred while loading '{file}': {e}") subrule_matches = find_subrule_matches(doc) - rules_prevalence = load_rules_prevalence("./assets/rules_prevalence.pickle") + rules_prevalence = load_rules_prevalence(Path("./assets/rules_prevalence.json")) # seperate rules based on their prevalence common = [] @@ -101,13 +106,13 @@ def load_rules_prevalence(filename: str) -> dict: count = len(rule.matches) matches = f"({count} matches)" if count > 1 else "" - rule_prevalence = float(rules_prevalence.get(rule.meta.name + "\n", 0)) + rule_prevalence = float(rules_prevalence.get(rule.meta.name, 0)) if rule_prevalence < 0: raise ValueError("Match probability cannot be negative") prevalences = [rutils.bold("rare"), rutils.bold("common"), "unknown"] - if rule_prevalence >= 0.05 or rule_prevalence == 0: + if rule_prevalence == 0 or rule_prevalence >= 0.05: prevalence = prevalences[2] if rule_prevalence == 0 else prevalences[1] common.append((rule.meta.namespace, rule.meta.name, matches, prevalence)) else: diff --git a/try.py b/try.py deleted file mode 100644 index c4e6aadc3..000000000 --- a/try.py +++ /dev/null @@ -1,54 +0,0 @@ -import os , subprocess, pickle, tqdm -import pandas as pd - -def get_files_with_extensions(directory, extensions): - files = [] - for filename in os.listdir(directory): - if os.path.isfile(os.path.join(directory, filename)): - _, ext = os.path.splitext(filename) - if ext.lower() in extensions: - files.append(os.path.join(directory, filename)) - return files - -extensions = ['.exe_', '.dll_', '.sys_', '.elf_', '.raw32', '.raw64', '.cs_', '.aspx_', '.py_'] -directory = r"C:\Users\HP\Documents\GitHub\capa\tests\data" - -all_paths = get_files_with_extensions(directory, extensions) -print("Total number of files to be processed ", len(all_paths)) - -pickle_path = "./all_rules_entropy.pickle" -write_path = "./rules_entropy.txt" -pbar = tqdm.tqdm -entropy = {} - -for file_path in pbar(all_paths): - cmd = ["capa", file_path] - - with open(write_path, 'w') as file: - file.write('') - - try: - result = subprocess.run(cmd, capture_output=True, text=True, check=True) - except subprocess.CalledProcessError as e: - print("Error running capa on " + file_path + " : " + str(e)) - - with open(write_path, "r") as f: - for line in f.readlines(): - line.strip() - entropy[line] = entropy.get(line, 0) + 1 - - with open(pickle_path, 'wb') as pickle_file: - pickle.dump(entropy, pickle_file) - -with open(pickle_path, 'wb') as pickle_file: - pickle.dump(entropy, pickle_file) - -def save_dict_to_excel(data_dict, excel_file_path): - # Convert the dictionary to a pandas DataFrame - df = pd.DataFrame(data_dict.items()) - - # Save the DataFrame to an Excel file - df.to_excel(excel_file_path, index=False, header=["Rule", "Number of Matches"]) - -excel_file_path = "./entropy.xlsx" -save_dict_to_excel(entropy, excel_file_path)