Skip to content

Commit

Permalink
Merge pull request #8 from FuzzingLabs/feat/decompiler_output
Browse files Browse the repository at this point in the history
Improve decompiler output
  • Loading branch information
Rog3rSm1th authored May 3, 2024
2 parents cf0e38e + 76b7871 commit 9ff7b1b
Show file tree
Hide file tree
Showing 8 changed files with 185 additions and 55 deletions.
Binary file modified doc/images/decompiler-output.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ edition = "2021"
cairo-lang-sierra = "~2.6.3"
colored = "2.1.0"
graphviz-rust = "0.9.0"
hex = "0.4.3"
lazy_static = "1.4.0"
num-bigint = "0.4.4"
regex = "1.10.4"

[dev-dependencies]
Expand Down
52 changes: 43 additions & 9 deletions lib/src/decompiler/decompiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::decompiler::cfg::BasicBlock;
use crate::decompiler::cfg::EdgeType;
use crate::decompiler::function::Function;
use crate::decompiler::function::SierraStatement;
use crate::decompiler::libfuncs_patterns::IS_ZERO_REGEX;
use crate::sierra_program::SierraProgram;

/// A struct that represents a decompiler for a Sierra program
Expand Down Expand Up @@ -61,7 +62,15 @@ impl<'a> Decompiler<'a> {
let functions = self.decompile_functions();

// Format the output string
format!("{}\n\n{}\n\n{}", types, libfuncs, functions)
let mut output = String::new();
if self.verbose {
output.push_str(&types);
output.push_str("\n\n");
output.push_str(&libfuncs);
output.push_str("\n\n");
}
output.push_str(&functions);
output
}

/// Decompiles the type declarations
Expand Down Expand Up @@ -468,23 +477,17 @@ impl<'a> Decompiler<'a> {
let mut decompiled_basic_block = String::new();
let indentation = "\t".repeat(self.indentation as usize);

// Define the bold brace
let bold_brace_open = "{".blue().bold();

// Append each statement to the string block
for statement in &block.statements {
// If condition
if let Some(conditional_branch) = statement.as_conditional_branch() {
if block.edges.len() == 2 {
let function_name = &conditional_branch.function;
let function_arguments = conditional_branch.parameters.join(", ");
decompiled_basic_block += &format!(
"{}if ({}({}) == 0) {}{}\n",
indentation,
decompiled_basic_block += &self.format_if_statement(
function_name,
function_arguments,
bold_brace_open,
"\t".repeat(self.indentation as usize + 1) // Adjust for nested content indentation
self.indentation as usize,
);
}
}
Expand All @@ -506,6 +509,37 @@ impl<'a> Decompiler<'a> {
decompiled_basic_block
}

/// Formats an `if` statement
fn format_if_statement(
&self,
function_name: &str,
function_arguments: String,
indentation: usize,
) -> String {
let bold_brace_open = "{".blue().bold();
let indentation_str = "\t".repeat(indentation);

// Check if the function name matches the IS_ZERO_REGEX
if IS_ZERO_REGEX.is_match(function_name) && !self.verbose {
let argument = function_arguments.trim();
return format!(
"{}if ({argument} == 0) {}{}\n",
indentation_str,
bold_brace_open,
"\t".repeat(indentation + 1)
);
}

format!(
"{}if ({}({}) == 0) {}{}\n",
indentation_str,
function_name,
function_arguments,
bold_brace_open,
"\t".repeat(indentation + 1) // Adjust for nested content indentation
)
}

/// Generates a control flow graph representation (CFG) in DOT format
pub fn generate_cfg(&mut self) -> String {
let mut dot = String::from("digraph {\n");
Expand Down
90 changes: 67 additions & 23 deletions lib/src/decompiler/function.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use colored::*;
use lazy_static::lazy_static;
use regex::Regex;
use num_bigint::BigInt;

use cairo_lang_sierra::program::BranchTarget;
use cairo_lang_sierra::program::GenFunction;
Expand All @@ -9,30 +8,14 @@ use cairo_lang_sierra::program::StatementIdx;

use crate::decompiler::cfg::ControlFlowGraph;
use crate::decompiler::cfg::SierraConditionalBranch;
use crate::decompiler::libfuncs_patterns::{
ADDITION_REGEX, CONST_REGEXES, DROP_REGEX, DUP_REGEX, FUNCTION_CALL_REGEX,
MULTIPLICATION_REGEX, STORE_TEMP_REGEX, SUBSTRACTION_REGEX, VARIABLE_ASSIGNMENT_REGEX,
};
use crate::decompiler::utils::decode_hex_bigint;
use crate::extract_parameters;
use crate::parse_element_name;

lazy_static! {
/// Those libfuncs id patterns are blacklisted from the regular decompiler output (not the verbose)
/// to make it more readable
///
/// We use lazy_static for performances issues
// Variable drop
static ref DROP_REGEX: Regex = Regex::new(r"drop(<.*>)?").unwrap();
// Store temporary variable
static ref STORE_TEMP_REGEX: Regex = Regex::new(r"store_temp(<.*>)?").unwrap();

/// These are libfuncs id patterns whose representation in the decompiler output can be improved
// User defined function call
static ref FUNCTION_CALL_REGEX: Regex = Regex::new(r"function_call<(.*)>").unwrap();
// Arithmetic operations
static ref ADDITION_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)(_overflowing)?_add").unwrap();
static ref SUBSTRACTION_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)(_overflowing)?_sub").unwrap();
static ref MULTIPLICATION_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)(_overflowing)?_mul").unwrap();
}

/// A struct representing a statement
#[derive(Debug, Clone)]
pub struct SierraStatement {
Expand Down Expand Up @@ -113,6 +96,7 @@ impl SierraStatement {
&assigned_variables_str,
&libfunc_id_str,
&parameters,
&verbose,
))
}
}
Expand Down Expand Up @@ -147,6 +131,7 @@ impl SierraStatement {
assigned_variables_str: &str,
libfunc_id_str: &str,
parameters: &[String],
verbose: &bool,
) -> String {
// Join parameters for general use
let parameters_str = parameters.join(", ");
Expand All @@ -168,6 +153,65 @@ impl SierraStatement {
}
}

if *verbose {
// If verbose is true, return the invocation as is
if assigned_variables_str.is_empty() {
return format!("{}({})", libfunc_id_str.blue(), parameters_str);
} else {
return format!(
"{} = {}({})",
assigned_variables_str,
libfunc_id_str.blue(),
parameters_str
);
}
}

// Handling variables duplications
// In the Sierra IR it it represented like : v1, v2 = dup<felt252>(v1)
// But we can represent it as a variable assignment such as : v2 = v1
if DUP_REGEX.is_match(libfunc_id_str) {
if let Some((first_var, second_var)) = assigned_variables_str.split_once(", ") {
return format!("{} = {}", second_var, first_var);
}
}

// Handling variables assignments
if VARIABLE_ASSIGNMENT_REGEX
.iter()
.any(|regex| regex.is_match(libfunc_id_str))
{
if let Some(old_var) = parameters.first().cloned() {
let assigned_variable = assigned_variables_str.to_string();
return format!("{} = {}", assigned_variable, old_var);
}
}

// Handling const declarations
for regex in CONST_REGEXES.iter() {
if let Some(captures) = regex.captures(libfunc_id_str) {
if let Some(const_value) = captures.name("const") {
// Convert string to a BigInt in order to decode it
let const_value_str = const_value.as_str();
let const_value_bigint =
BigInt::parse_bytes(const_value_str.as_bytes(), 10).unwrap();

// If the const integer can be decoded to a valid string, use the string as a comment
if let Some(decoded_string) = decode_hex_bigint(&const_value_bigint) {
let string_comment = format!(r#"// "{}""#, decoded_string).green();
return format!(
"{} = {} {}",
assigned_variables_str, const_value_str, string_comment
);
}
// If the string can not be decoded as a valid string
else {
return format!("{} = {}", assigned_variables_str, const_value_str);
}
}
}
}

// Handling arithmetic operations
let operator = if ADDITION_REGEX.is_match(libfunc_id_str) {
"+"
Expand Down
44 changes: 44 additions & 0 deletions lib/src/decompiler/libfuncs_patterns.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
use lazy_static::lazy_static;
use regex::Regex;

lazy_static! {
/// Those libfuncs id patterns are blacklisted from the regular decompiler output (not the verbose)
/// to make it more readable
///
/// We use lazy_static for performances issues
// Variable drop
pub static ref DROP_REGEX: Regex = Regex::new(r"drop(<.*>)?").unwrap();

// Store temporary variable
pub static ref STORE_TEMP_REGEX: Regex = Regex::new(r"store_temp(<.*>)?").unwrap();

/// These are libfuncs id patterns whose representation in the decompiler output can be improved
// User defined function call
pub static ref FUNCTION_CALL_REGEX: Regex = Regex::new(r"function_call<(.*)>").unwrap();

// Arithmetic operations
pub static ref ADDITION_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)(_overflowing)?_add").unwrap();
pub static ref SUBSTRACTION_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)(_overflowing)?_sub").unwrap();
pub static ref MULTIPLICATION_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)(_overflowing)?_mul").unwrap();

// Variable duplication
pub static ref DUP_REGEX: Regex = Regex::new(r"dup(<.*>)?").unwrap();

// Variable renaming
pub static ref VARIABLE_ASSIGNMENT_REGEX: Vec<Regex> = vec![
Regex::new(r"rename<.+>").unwrap(),
Regex::new(r"store_temp<.+>").unwrap()
];

// Check if an integer is 0
pub static ref IS_ZERO_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)_is_zero").unwrap();

// Consts declarations
pub static ref CONST_REGEXES: Vec<Regex> = vec![
Regex::new(r"const_as_immediate<Const<.+, (?P<const>-?[0-9]+)>>").unwrap(),
Regex::new(r"storage_base_address_const<(?P<const>-?[0-9]+)>").unwrap(),
Regex::new(r"(felt|u)_?(8|16|32|64|128|252)_const<(?P<const>-?[0-9]+)>").unwrap(),
];
}
2 changes: 2 additions & 0 deletions lib/src/decompiler/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
pub mod cfg;
pub mod decompiler;
pub mod function;
pub mod libfuncs_patterns;
pub mod macros;
pub mod utils;
20 changes: 20 additions & 0 deletions lib/src/decompiler/utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
use num_bigint::BigInt;
use std::str;

/// Convert an integer to it's string value or hex value
/// Used to decode consts
pub fn decode_hex_bigint(bigint: &BigInt) -> Option<String> {
// Convert the BigInt to a hexadecimal string
let hex_string = format!("{:x}", bigint);

// Decode the hexadecimal string to a byte vector
let bytes = hex::decode(hex_string.clone()).ok()?;

// Convert the byte vector to a string or hex value
let string = match str::from_utf8(&bytes) {
Ok(s) => Some(s.to_string()),
Err(_) => Some(format!("0x{hex_string}")),
};

string
}
30 changes: 7 additions & 23 deletions lib/tests/test_decompiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,13 @@ fn test_decompiler_output() {
let use_color = false;
let decompiler_output = decompiler.decompile(use_color);

let expected_output = r#"type felt252
type Const<felt252, 1>
type NonZero<felt252>
libfunc disable_ap_tracking
libfunc dup<felt252>
libfunc felt252_is_zero
libfunc branch_align
libfunc drop<felt252>
libfunc store_temp<felt252>
libfunc drop<NonZero<felt252>>
libfunc felt252_add
libfunc const_as_immediate<Const<felt252, 1>>
libfunc felt252_sub
libfunc function_call<user@examples::fib::fib>
// Function 1
let expected_output = r#"// Function 1
func examples::fib::fib (v0: felt252, v1: felt252, v2: felt252) -> (felt252) {
v2, v3 = dup<felt252>(v2)
if (felt252_is_zero(v3) == 0) {
v1, v5 = dup<felt252>(v1)
v3 = v2
if (v3 == 0) {
v5 = v1
v6 = v0 + v5
v7 = const_as_immediate<Const<felt252, 1>>()
v7 = 1
v8 = v2 - v7
v9 = user@examples::fib::fib(v1, v6, v8)
return (v9)
Expand Down Expand Up @@ -93,9 +77,9 @@ func examples::fib::fib (v0: felt252, v1: felt252, v2: felt252) -> (felt252) {
branch_align()
drop<NonZero<felt252>>(v4)
v1, v5 = dup<felt252>(v1)
v6 = v0 + v5
v6 = felt252_add(v0, v5)
v7 = const_as_immediate<Const<felt252, 1>>()
v8 = v2 - v7
v8 = felt252_sub(v2, v7)
v1 = store_temp<felt252>(v1)
v6 = store_temp<felt252>(v6)
v8 = store_temp<felt252>(v8)
Expand Down

0 comments on commit 9ff7b1b

Please sign in to comment.