Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: Map Extracted Files to Artifact Definitions in image_export.py #4949

Open
wants to merge 31 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
de1f7ce
Map Extracted Files to Artifact Definitions in image_export.py
sa3eed3ed Dec 30, 2024
a545f8b
remove os-dependent path operations in artifacts_trie
sa3eed3ed Dec 31, 2024
2a7e31b
nits
sa3eed3ed Dec 31, 2024
96a3bd1
handle cases where path segments are sanitized at time when output is…
sa3eed3ed Jan 2, 2025
ddce55d
Update image_export_tool.py
joachimmetz Jan 19, 2025
03578d8
Update image_export_tool.py
joachimmetz Jan 19, 2025
e191125
remove typos in artifacts_trie.py
sa3eed3ed Jan 19, 2025
ff8b7a4
Update image_export_tool.py
joachimmetz Jan 20, 2025
335a5c8
Update image_export_tool.py
joachimmetz Jan 20, 2025
f4ca547
Update artifact_filters.py
joachimmetz Jan 20, 2025
c7ceabb
fixing registery typo
sa3eed3ed Jan 20, 2025
2d9b665
adapt tests and minor linter complain after changes in review commits
sa3eed3ed Jan 20, 2025
fcac56f
Update image_export_tool.py
joachimmetz Jan 21, 2025
42727b7
Update artifact_filters.py
joachimmetz Jan 21, 2025
6c02826
Update artifact_filters.py
joachimmetz Jan 22, 2025
6f5b2c3
Update artifact_filters.py
joachimmetz Jan 22, 2025
7afc410
Update artifacts_trie.py
joachimmetz Jan 22, 2025
d7ee20d
Update artifacts_trie.py
joachimmetz Jan 22, 2025
ecf5d40
Update artifacts_trie.py
joachimmetz Jan 22, 2025
01e9e98
Update artifacts_trie.py
joachimmetz Jan 22, 2025
5d77c1b
applying reviewer comments
sa3eed3ed Jan 22, 2025
456fe83
Update image_export_tool.py
joachimmetz Jan 23, 2025
a6ecb92
Update image_export_tool.py
joachimmetz Jan 23, 2025
67e99c2
Update artifacts_trie.py
joachimmetz Jan 24, 2025
4d055ec
Update artifacts_trie.py
joachimmetz Jan 24, 2025
106c5af
Update engine.py
joachimmetz Jan 24, 2025
e368b16
Update engine.py
joachimmetz Jan 24, 2025
60539fa
Update path_helper.py
joachimmetz Jan 24, 2025
9838064
Update file_entry.py
joachimmetz Jan 24, 2025
e81fc01
Update image_export_tool.py
joachimmetz Jan 24, 2025
9787837
fix for comments
sa3eed3ed Jan 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 41 additions & 25 deletions plaso/cli/image_export_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,6 @@ class ImageExportTool(storage_media_tool.StorageMediaTool):

_COPY_BUFFER_SIZE = 32768

_DIRTY_CHARACTERS = frozenset([
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
os.path.sep, '!', '$', '%', '&', '*', '+', ':', ';', '<', '>',
'?', '@', '|', '~', '\x7f'])

_HASHES_FILENAME = 'hashes.json'

_READ_BUFFER_SIZE = 4096
Expand Down Expand Up @@ -84,10 +76,12 @@ def __init__(self, input_reader=None, output_writer=None):
self._abort = False
self._artifact_definitions_path = None
self._artifact_filters = None
self._artifacts_paths_map = collections.defaultdict(list)
self._artifacts_registry = None
self._custom_artifacts_path = None
self._destination_path = None
self._digests = {}
self._enable_artifacts_map = False
self._filter_collection = file_entry_filters.FileEntryFilterCollection()
self._filter_file = None
self._no_hashes = False
Expand Down Expand Up @@ -151,11 +145,7 @@ def _CreateSanitizedDestination(
path = getattr(file_system_path_spec, 'location', None)
path_segments = file_system.SplitPath(path)

# Sanitize each path segment.
for index, path_segment in enumerate(path_segments):
path_segments[index] = ''.join([
character if character not in self._DIRTY_CHARACTERS else '_'
for character in path_segment])
path_segments = path_helper.PathHelper.SanitizePathSegments(path_segments)

target_filename = path_segments.pop()

Expand Down Expand Up @@ -213,17 +203,9 @@ def _ExtractDataStream(

target_directory, target_filename = self._CreateSanitizedDestination(
file_entry, file_entry.path_spec, data_stream_name, destination_path)

# If does not exist, append path separator to have consistent behaviour.
if not destination_path.endswith(os.path.sep):
destination_path = destination_path + os.path.sep

# TODO: refactor
path = None

path = path_helper.PathHelper.GetRelativePath(
target_directory, target_filename, destination_path)
target_path = os.path.join(target_directory, target_filename)
if target_path.startswith(destination_path):
path = target_path[len(destination_path):]

self._paths_by_hash[digest].append(path)

Expand All @@ -247,6 +229,13 @@ def _ExtractDataStream(
f'exists.'))
return

# Generate a map between artifacts and extracted paths.
if self._enable_artifacts_map:
for artifact_name in self._filter_collection.GetMatchingArtifacts(
path, os.sep):
path_list = self._artifacts_paths_map.setdefault(artifact_name, [])
path_list.append(path)

try:
self._WriteFileEntry(file_entry, data_stream_name, target_path)
except (IOError, dfvfs_errors.BackEndError) as exception:
Expand Down Expand Up @@ -348,13 +337,19 @@ def _Extract(

try:
extraction_engine.BuildCollectionFilters(
environment_variables, user_accounts,
environment_variables,
user_accounts,
artifact_filter_names=artifact_filters,
filter_file_path=filter_file)
filter_file_path=filter_file,
enable_artifacts_map=self._enable_artifacts_map)
except errors.InvalidFilter as exception:
raise errors.BadConfigOption(
f'Unable to build collection filters with error: {exception!s}')

if self._enable_artifacts_map:
atrifacts_trie = extraction_engine.GetArtifactsTrie()
self._filter_collection.SetArtifactsTrie(atrifacts_trie)

excluded_find_specs = extraction_engine.GetCollectionExcludedFindSpecs()
included_find_specs = extraction_engine.GetCollectionIncludedFindSpecs()

Expand Down Expand Up @@ -654,6 +649,12 @@ def ParseArguments(self, arguments):

self.AddFilterOptions(argument_parser)

argument_parser.add_argument(
'--enable_artifacts_map', dest='enable_artifacts_map',
action='store_true', default=False, help=(
'Output a JSON file mapping extracted files/directories to '
'artifact definitions.'))

argument_parser.add_argument(
'-w', '--write', action='store', dest='path', type=str,
metavar='PATH', default='export', help=(
Expand Down Expand Up @@ -785,6 +786,9 @@ def ParseOptions(self, options):

self._EnforceProcessMemoryLimit(self._process_memory_limit)

self._enable_artifacts_map = getattr(
options, 'enable_artifacts_map', False)

def PrintFilterCollection(self):
"""Prints the filter collection."""
self._filter_collection.Print(self._output_writer)
Expand All @@ -799,6 +803,12 @@ def ProcessSource(self):
"""
try:
self.ScanSource(self._source_path)
if self._source_type not in self._SOURCE_TYPES_TO_PREPROCESS:
source_types = ', '.join(self._SOURCE_TYPES_TO_PREPROCESS)
self._output_writer.Write((
f'Input must be in "{source_types:s}" the type: '
f'"{self._source_type}" is not supported.\n'))
return
except dfvfs_errors.UserAbort as exception:
raise errors.UserAbort(exception)

Expand All @@ -823,5 +833,11 @@ def ProcessSource(self):
json_data.append({'sha256': sha256, 'paths': paths})
json.dump(json_data, file_object)

if self._enable_artifacts_map:
artifacts_map_file = os.path.join(
self._destination_path, 'artifacts_map.json')
with open(artifacts_map_file, 'w', encoding='utf-8') as file_object:
json.dump(self._artifacts_paths_map, file_object)

self._output_writer.Write('Export completed.\n')
self._output_writer.Write('\n')
Loading