Skip to content

Commit

Permalink
Refactor BytecodeTrie to its own module
Browse files Browse the repository at this point in the history
  • Loading branch information
agostbiro committed Jan 22, 2025
1 parent 0a99d3b commit 7bcb871
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 81 deletions.
83 changes: 83 additions & 0 deletions crates/edr_solidity/src/bytecode_trie.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use std::{collections::HashMap, sync::Arc};

use crate::build_model::ContractMetadata;

/// The result of searching for a bytecode in a [`BytecodeTrie`].
pub enum TrieSearch<'a> {
/// An exact match was found.
ExactHit(Arc<ContractMetadata>),
/// No exact match found; a node with the longest prefix is returned.
LongestPrefixNode(&'a BytecodeTrie),
}

/// This class represent a somewhat special Trie of bytecodes.
///
/// What makes it special is that every node has a set of all of its descendants
/// and its depth.
#[derive(Debug, Clone)]
pub struct BytecodeTrie {
pub descendants: Vec<Arc<ContractMetadata>>,
pub match_: Option<Arc<ContractMetadata>>,
pub depth: Option<u32>,
child_nodes: HashMap<u8, Box<BytecodeTrie>>,
}

impl BytecodeTrie {
pub fn new(depth: Option<u32>) -> BytecodeTrie {
BytecodeTrie {
child_nodes: HashMap::new(),
descendants: Vec::new(),
match_: None,
depth,
}
}

pub fn add(&mut self, bytecode: Arc<ContractMetadata>) {
let mut cursor = self;

let bytecode_normalized_code = &bytecode.normalized_code;
for (index, byte) in bytecode_normalized_code.iter().enumerate() {
cursor.descendants.push(bytecode.clone());

let node = cursor
.child_nodes
.entry(*byte)
.or_insert_with(|| Box::new(BytecodeTrie::new(Some(index as u32))));

cursor = node;
}

// If multiple contracts with the exact same bytecode are added we keep the last
// of them. Note that this includes the metadata hash, so the chances of
// happening are pretty remote, except in super artificial cases that we
// have in our test suite.
cursor.match_ = Some(bytecode.clone());
}

/// Searches for a bytecode. If it's an exact match, it is returned. If
/// there's no match, but a prefix of the code is found in the trie, the
/// node of the longest prefix is returned. If the entire code is
/// covered by the trie, and there's no match, we return None.
pub fn search(&self, code: &[u8], current_code_byte: u32) -> Option<TrieSearch<'_>> {
if current_code_byte > code.len() as u32 {
return None;
}

let mut cursor = self;

for byte in code.iter().skip(current_code_byte as usize) {
let child_node = cursor.child_nodes.get(byte);

if let Some(node) = child_node {
cursor = node;
} else {
return Some(TrieSearch::LongestPrefixNode(cursor));
}
}

cursor
.match_
.as_ref()
.map(|bytecode| TrieSearch::ExactHit(bytecode.clone()))
}
}
85 changes: 4 additions & 81 deletions crates/edr_solidity/src/contracts_identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,87 +10,10 @@ use std::{borrow::Cow, collections::HashMap, sync::Arc};
use edr_eth::Address;
use edr_evm::interpreter::OpCode;

use crate::build_model::ContractMetadata;

/// The result of searching for a bytecode in a [`BytecodeTrie`].
enum TrieSearch<'a> {
/// An exact match was found.
ExactHit(Arc<ContractMetadata>),
/// No exact match found; a node with the longest prefix is returned.
LongestPrefixNode(&'a BytecodeTrie),
}

/// This class represent a somewhat special Trie of bytecodes.
///
/// What makes it special is that every node has a set of all of its descendants
/// and its depth.
#[derive(Debug, Clone)]
struct BytecodeTrie {
child_nodes: HashMap<u8, Box<BytecodeTrie>>,
descendants: Vec<Arc<ContractMetadata>>,
match_: Option<Arc<ContractMetadata>>,
depth: Option<u32>,
}

impl BytecodeTrie {
fn new(depth: Option<u32>) -> BytecodeTrie {
BytecodeTrie {
child_nodes: HashMap::new(),
descendants: Vec::new(),
match_: None,
depth,
}
}

fn add(&mut self, bytecode: Arc<ContractMetadata>) {
let mut cursor = self;

let bytecode_normalized_code = &bytecode.normalized_code;
for (index, byte) in bytecode_normalized_code.iter().enumerate() {
cursor.descendants.push(bytecode.clone());

let node = cursor
.child_nodes
.entry(*byte)
.or_insert_with(|| Box::new(BytecodeTrie::new(Some(index as u32))));

cursor = node;
}

// If multiple contracts with the exact same bytecode are added we keep the last
// of them. Note that this includes the metadata hash, so the chances of
// happening are pretty remote, except in super artificial cases that we
// have in our test suite.
cursor.match_ = Some(bytecode.clone());
}

/// Searches for a bytecode. If it's an exact match, it is returned. If
/// there's no match, but a prefix of the code is found in the trie, the
/// node of the longest prefix is returned. If the entire code is
/// covered by the trie, and there's no match, we return None.
fn search(&self, code: &[u8], current_code_byte: u32) -> Option<TrieSearch<'_>> {
if current_code_byte > code.len() as u32 {
return None;
}

let mut cursor = self;

for byte in code.iter().skip(current_code_byte as usize) {
let child_node = cursor.child_nodes.get(byte);

if let Some(node) = child_node {
cursor = node;
} else {
return Some(TrieSearch::LongestPrefixNode(cursor));
}
}

cursor
.match_
.as_ref()
.map(|bytecode| TrieSearch::ExactHit(bytecode.clone()))
}
}
use crate::{
build_model::ContractMetadata,
bytecode_trie::{BytecodeTrie, TrieSearch},
};

/// Returns true if the `last_byte` is placed right when the metadata starts or
/// after it.
Expand Down
1 change: 1 addition & 0 deletions crates/edr_solidity/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pub mod nested_tracer;
pub mod solidity_stack_trace;
pub mod solidity_tracer;

mod bytecode_trie;
mod error_inferrer;
mod mapped_inline_internal_functions_heuristics;
mod return_data;
Expand Down

0 comments on commit 7bcb871

Please sign in to comment.