Skip to content
This repository has been archived by the owner on Sep 9, 2024. It is now read-only.

fix: circlular constructor dependencies #240

Merged
merged 16 commits into from
Feb 26, 2023
78 changes: 52 additions & 26 deletions huff_codegen/src/irgen/statements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub fn statement_gen(
label_indices: &mut LabelIndices,
table_instances: &mut Jumps,
utilized_tables: &mut Vec<TableDefinition>,
circular_codesize_invocations: &mut CircularCodeSizeIndices,
starting_offset: usize,
) -> Result<Vec<(usize, Bytes)>, CodegenError> {
let mut bytes = vec![];
Expand Down Expand Up @@ -102,6 +103,7 @@ pub fn statement_gen(
*offset,
mis,
false,
Some(circular_codesize_invocations),
) {
Ok(r) => r,
Err(e) => {
Expand Down Expand Up @@ -180,33 +182,57 @@ pub fn statement_gen(
})
};

let res: BytecodeRes = match Codegen::macro_to_bytecode(
ir_macro.clone(),
contract,
scope,
*offset,
mis,
ir_macro.name.eq("CONSTRUCTOR"),
) {
Ok(r) => r,
Err(e) => {
tracing::error!(
target: "codegen",
"FAILED TO RECURSE INTO MACRO \"{}\"",
ir_macro.name
);
return Err(e)
}
};

let size = format_even_bytes(format!(
"{:02x}",
(res.bytes.iter().map(|(_, b)| b.0.len()).sum::<usize>() / 2)
));
let push_bytes = format!("{:02x}{size}", 95 + size.len() / 2);
// Get the name of the macro being passed to __codesize
let codesize_arg = bf.args[0].name.as_ref().unwrap();
let is_previous_parent = scope.iter().any(|def| def.name == *codesize_arg);

// Special case:
// If the macro provided to __codesize is the current macro, we need to avoid a
// circular reference If this is the case we will store a
// place holder inside the bytecode and fill it in later when
// we have adequate information about the macros eventual size.
// We also need to avoid if the codesize arg is any of the previous macros to
// avoid a circular reference
if is_previous_parent || macro_def.name.eq(codesize_arg) {
tracing::debug!(target: "codegen", "CIRCULAR CODESIZE INVOCATION DETECTED INJECTING PLACEHOLDER | macro: {}", ir_macro.name);

// Save the invocation for later
circular_codesize_invocations.insert((codesize_arg.to_string(), *offset));

// Progress offset by placeholder size
*offset += 2;
bytes.push((starting_offset, Bytes("cccc".to_string())));
} else {
// We will still need to recurse to get accurate values
let res: BytecodeRes = match Codegen::macro_to_bytecode(
ir_macro.clone(),
contract,
scope,
*offset,
mis,
ir_macro.name.eq("CONSTRUCTOR"),
Some(circular_codesize_invocations),
) {
Ok(r) => r,
Err(e) => {
tracing::error!(
target: "codegen",
"FAILED TO RECURSE INTO MACRO \"{}\"",
ir_macro.name
);
return Err(e)
}
};

let size = format_even_bytes(format!(
"{:02x}",
(res.bytes.iter().map(|(_, b)| b.0.len()).sum::<usize>() / 2)
));
let push_bytes = format!("{:02x}{size}", 95 + size.len() / 2);

*offset += push_bytes.len() / 2;
bytes.push((starting_offset, Bytes(push_bytes)));
*offset += push_bytes.len() / 2;
bytes.push((starting_offset, Bytes(push_bytes)));
}
}
BuiltinFunctionKind::Tablesize => {
let ir_table = if let Some(t) =
Expand Down
88 changes: 88 additions & 0 deletions huff_codegen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ impl Codegen {
0,
&mut Vec::default(),
false,
None,
)?;

tracing::debug!(target: "codegen", "Generated main bytecode. Appending table bytecode...");
Expand Down Expand Up @@ -101,6 +102,7 @@ impl Codegen {
0,
&mut Vec::default(),
false,
None,
)?;

// Check if the constructor performs its own code generation
Expand Down Expand Up @@ -277,6 +279,7 @@ impl Codegen {
mut offset: usize,
mis: &mut Vec<(usize, MacroInvocation)>,
recursing_constructor: bool,
circular_codesize_invocations: Option<&mut CircularCodeSizeIndices>,
) -> Result<BytecodeRes, CodegenError> {
// Get intermediate bytecode representation of the macro definition
let mut bytes: Vec<(usize, Bytes)> = Vec::default();
Expand All @@ -287,6 +290,8 @@ impl Codegen {
let mut label_indices = LabelIndices::new();
let mut table_instances = Jumps::new();
let mut utilized_tables: Vec<TableDefinition> = Vec::new();
let mut ccsi = CircularCodeSizeIndices::new();
let circular_codesize_invocations = circular_codesize_invocations.unwrap_or(&mut ccsi);

// Loop through all intermediate bytecode representations generated from the AST
for (_ir_bytes_index, ir_byte) in ir_bytes.into_iter().enumerate() {
Expand Down Expand Up @@ -319,6 +324,7 @@ impl Codegen {
&mut label_indices,
&mut table_instances,
&mut utilized_tables,
circular_codesize_invocations,
starting_offset,
)?;
bytes.append(&mut push_bytes);
Expand Down Expand Up @@ -367,6 +373,15 @@ impl Codegen {
// Fill JUMPDEST placeholders
let (bytes, unmatched_jumps) = Codegen::fill_unmatched(bytes, &jump_table, &label_indices)?;

// Fill in circular codesize invocations
// Workout how to increase the offset the correct amount within here if it is longer than 2
// bytes
let bytes = Codegen::fill_circular_codesize_invocations(
bytes,
circular_codesize_invocations,
&macro_def.name,
)?;

Ok(BytecodeRes { bytes, label_indices, unmatched_jumps, table_instances, utilized_tables })
}

Expand Down Expand Up @@ -437,6 +452,78 @@ impl Codegen {
Ok((bytes, unmatched_jumps))
}

/// Helper associated function to fill circular codesize invocations.
///
/// ## Overview
///
/// This function should run after all other code generation has been completed.
/// If there are placeholders for circular codesize invocations, this function will
/// fill them in with the correct offset.
///
/// If there are multiple invocations of the same macro, the function will take into
/// account the total number of invocations and increase its offset accordingly.
///
/// On success, returns a tuple of generated bytes.
/// On failure, returns a CodegenError.
pub fn fill_circular_codesize_invocations(
bytes: Vec<(usize, Bytes)>,
circular_codesize_invocations: &mut CircularCodeSizeIndices,
macro_name: &str,
) -> Result<Vec<(usize, Bytes)>, CodegenError> {
// Get the length of the macro
let num_invocations = circular_codesize_invocations.len();
if num_invocations == 0 {
return Ok(bytes)
}

tracing::debug!(target: "codegen", "Circular Codesize Invocation: Bytes before expansion: {:#?}", bytes);
let length: usize = bytes.iter().map(|(_, b)| b.0.len()).sum::<usize>() / 2;

// If there are more than 256 opcodes in a macro, we need 2 bytes to represent it
// The next threshold is 65536 opcodes which is past the codesize limit
let mut offset_increase = 0;
if length > 255 {
offset_increase = 1;
}
// Codesize will increase by 1 byte for every codesize that exists
let extended_length = length + (offset_increase * num_invocations);

let push_size = format_even_bytes(format!("{extended_length:02x}"));
let push_bytes = format!("{:02x}{push_size}", 95 + push_size.len() / 2);

// Track the number of bytes added if there is an offset increase with codesize
let mut running_increase = 0;
let bytes = bytes.into_iter().fold(
Vec::default(),
|mut acc, (mut code_index, mut formatted_bytes)| {
// Check if a jump table exists at `code_index` (starting offset of `b`)
if let Some((_, _index)) =
circular_codesize_invocations.get(&(macro_name.to_string(), code_index))
{
// Check if a jump dest placeholder is present
if !&formatted_bytes.0.eq("cccc") {
tracing::error!(
target: "codegen",
"CIRCULAR CODESIZE PLACEHOLDER NOT FOUND"
);
}

// Replace the "cccc" placeholder with the jump value
formatted_bytes = Bytes(push_bytes.to_string());
running_increase += offset_increase;
} else {
// Increase the code index by the number of bytes added past the placeholder
code_index += running_increase;
}

acc.push((code_index, formatted_bytes));
acc
},
);

Ok(bytes)
}

/// Helper associated function to append functions to the end of the bytecode.
///
/// ## Overview
Expand Down Expand Up @@ -469,6 +556,7 @@ impl Codegen {
*offset + 1,
mis,
false,
None,
)?;

for j in res.unmatched_jumps.iter_mut() {
Expand Down
Loading