From e8ea98552c973f9ac126e728d465de3723a9701f Mon Sep 17 00:00:00 2001 From: 18alantom <2.alan.tom@gmail.com> Date: Tue, 23 Jan 2024 17:20:16 +0530 Subject: [PATCH] fix: add safety filter for untarring --- bench/app.py | 7 ++++++- bench/utils/__init__.py | 28 +++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/bench/app.py b/bench/app.py index 322afa54a..54de17582 100755 --- a/bench/app.py +++ b/bench/app.py @@ -24,6 +24,7 @@ from bench.utils import ( UNSET_ARG, fetch_details_from_tag, + get_app_cache_extract_filter, get_available_folder_name, get_bench_cache_path, is_bench_directory, @@ -343,7 +344,11 @@ def get_cached(self) -> bool: click.secho(f"Getting {self.app_name} from cache", fg="yellow") with tarfile.open(cache_path, mode) as tar: - tar.extractall(app_path.parent) + try: + tar.extractall(app_path.parent, filter=get_app_cache_extract_filter()) + except: + shutil.rmtree(app_path) + return False return True diff --git a/bench/utils/__init__.py b/bench/utils/__init__.py index bd07ec4b1..8c5b0a714 100644 --- a/bench/utils/__init__.py +++ b/bench/utils/__init__.py @@ -9,7 +9,8 @@ from glob import glob from pathlib import Path from shlex import split -from typing import List, Optional, Tuple +from tarfile import data_filter, AbsoluteLinkError, TarInfo +from typing import Callable, List, Optional, Tuple # imports - third party imports import click @@ -569,3 +570,28 @@ def get_cmd_from_sysargv(): break return cmd_from_ctx + + +def get_app_cache_extract_filter( + count_threshold: int = 10_000, + size_threshold: int = 1_000_000_000, +) -> Callable[[TarInfo, str], TarInfo | None]: + state = dict(count=0, size=0) + + def filter_function(member: TarInfo, dest_path: str) -> Optional[TarInfo]: + state["count"] += 1 + state["size"] += member.size + + if state["count"] > count_threshold: + raise Exception(f"Number of entries exceeds threshold ({state['count']})") + + if state["size"] > size_threshold: + raise Exception(f"Extracted size exceeds threshold ({state['size']})") + + try: + return data_filter(member, dest_path) + except AbsoluteLinkError: + # Links created by `frappe` after extraction + return None + + return filter_function