From 40c7b7e56b77392de78bed4a55b87e4f34aa075e Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Sun, 22 Jan 2023 23:04:38 +0000 Subject: [PATCH 01/16] wip: circular codesize dependencies --- huff_codegen/src/irgen/statements.rs | 94 +++++++++------ huff_codegen/src/lib.rs | 165 ++++++++++++++++++++++++++- huff_utils/src/ast.rs | 4 +- huff_utils/src/bytecode.rs | 5 +- 4 files changed, 223 insertions(+), 45 deletions(-) diff --git a/huff_codegen/src/irgen/statements.rs b/huff_codegen/src/irgen/statements.rs index cd45449f..fdb83bb0 100644 --- a/huff_codegen/src/irgen/statements.rs +++ b/huff_codegen/src/irgen/statements.rs @@ -15,6 +15,7 @@ pub fn statement_gen( label_indices: &mut LabelIndices, table_instances: &mut Jumps, utilized_tables: &mut Vec, + circular_codesize_invocations: &mut CircularCodeSizeIndices, starting_offset: usize, ) -> Result, CodegenError> { let mut bytes = vec![]; @@ -36,7 +37,7 @@ pub fn statement_gen( kind: CodegenErrorKind::InvalidMacroInvocation(mi.macro_name.clone()), span: mi.span.clone(), token: None, - }) + }); }; tracing::info!(target: "codegen", "FOUND INNER MACRO: {}", ir_macro.name); @@ -48,7 +49,7 @@ pub fn statement_gen( kind: CodegenErrorKind::TestInvocation(ir_macro.name.clone()), span: ir_macro.span, token: None, - }) + }); } // If invoked macro is a function (outlined), insert a jump to the function's code and a @@ -110,7 +111,7 @@ pub fn statement_gen( "FAILED TO RECURSE INTO MACRO \"{}\"", ir_macro.name ); - return Err(e) + return Err(e); } }; @@ -177,36 +178,55 @@ pub fn statement_gen( ), span: bf.span.clone(), token: None, - }) - }; - - let res: BytecodeRes = match Codegen::macro_to_bytecode( - ir_macro.clone(), - contract, - scope, - *offset, - mis, - ir_macro.name.eq("CONSTRUCTOR"), - ) { - Ok(r) => r, - Err(e) => { - tracing::error!( - target: "codegen", - "FAILED TO RECURSE INTO MACRO \"{}\"", - ir_macro.name - ); - return Err(e) - } + }); }; - let size = format_even_bytes(format!( - "{:02x}", - (res.bytes.iter().map(|(_, b)| b.0.len()).sum::() / 2) - )); - let push_bytes = format!("{:02x}{size}", 95 + size.len() / 2); + // Special case: + // If the macro provided to __codesize is the current macro, we need to avoid a circular reference + // If this is the case we will store a place holder inside the bytecode and fill it in later when + // we have adequate information about the macros eventual size. + // + // We + // TODO: remove this unwrap / clone + if bf.args[0].name.clone().unwrap() == ir_macro.name { + tracing::debug!(target: "codegen", "CIRCULAR CODESIZE INVOCATION DETECTED INJECTING PLACEHOLDER | macro: {}", ir_macro.name); + + // Save the invocation for later + circular_codesize_invocations.insert(*offset); + + // Progress offset by placeholder size + *offset += 2; + bytes.push((starting_offset, Bytes(format!("cccc")))); + } else { + // We will still need to recurse to get accurate values + let res: BytecodeRes = match Codegen::macro_to_bytecode( + ir_macro.clone(), + contract, + scope, + *offset, + mis, + ir_macro.name.eq("CONSTRUCTOR"), + ) { + Ok(r) => r, + Err(e) => { + tracing::error!( + target: "codegen", + "FAILED TO RECURSE INTO MACRO \"{}\"", + ir_macro.name + ); + return Err(e); + } + }; + + let size = format_even_bytes(format!( + "{:02x}", + (res.bytes.iter().map(|(_, b)| b.0.len()).sum::() / 2) + )); + let push_bytes = format!("{:02x}{size}", 95 + size.len() / 2); - *offset += push_bytes.len() / 2; - bytes.push((starting_offset, Bytes(push_bytes))); + *offset += push_bytes.len() / 2; + bytes.push((starting_offset, Bytes(push_bytes))); + } } BuiltinFunctionKind::Tablesize => { let ir_table = if let Some(t) = @@ -225,7 +245,7 @@ pub fn statement_gen( ), span: bf.span.clone(), token: None, - }) + }); }; let size = bytes32_to_string(&ir_table.size, false); @@ -265,7 +285,7 @@ pub fn statement_gen( ), span: bf.span.clone(), token: None, - }) + }); } } BuiltinFunctionKind::FunctionSignature => { @@ -324,7 +344,7 @@ pub fn statement_gen( ), span: bf.span.clone(), token: None, - }) + }); } } BuiltinFunctionKind::EventHash => { @@ -374,7 +394,7 @@ pub fn statement_gen( ), span: bf.span.clone(), token: None, - }) + }); } } BuiltinFunctionKind::Error => { @@ -391,7 +411,7 @@ pub fn statement_gen( )), span: bf.span.clone(), token: None, - }) + }); } if let Some(error) = contract @@ -417,7 +437,7 @@ pub fn statement_gen( ), span: bf.span.clone(), token: None, - }) + }); } } BuiltinFunctionKind::RightPad => { @@ -550,7 +570,7 @@ pub fn statement_gen( kind: CodegenErrorKind::InvalidMacroStatement, span: s.span.clone(), token: None, - }) + }); } } diff --git a/huff_codegen/src/lib.rs b/huff_codegen/src/lib.rs index b12463eb..795d3928 100644 --- a/huff_codegen/src/lib.rs +++ b/huff_codegen/src/lib.rs @@ -148,7 +148,7 @@ impl Codegen { .collect::>(), ), token: None, - }) + }); } tracing::info!(target: "codegen", "GENERATING JUMPTABLE BYTECODE"); @@ -287,6 +287,8 @@ impl Codegen { let mut label_indices = LabelIndices::new(); let mut table_instances = Jumps::new(); let mut utilized_tables: Vec = Vec::new(); + let mut circular_codesize_invocations: CircularCodeSizeIndices = + CircularCodeSizeIndices::new(); // Loop through all intermediate bytecode representations generated from the AST for (_ir_bytes_index, ir_byte) in ir_bytes.into_iter().enumerate() { @@ -306,7 +308,7 @@ impl Codegen { // if we have a codesize call for the constructor here, from within the // constructor, we skip if recursing_constructor { - continue + continue; } let mut push_bytes = statement_gen( &s, @@ -319,6 +321,7 @@ impl Codegen { &mut label_indices, &mut table_instances, &mut utilized_tables, + &mut circular_codesize_invocations, starting_offset, )?; bytes.append(&mut push_bytes); @@ -367,9 +370,161 @@ impl Codegen { // Fill JUMPDEST placeholders let (bytes, unmatched_jumps) = Codegen::fill_unmatched(bytes, &jump_table, &label_indices)?; + // Fill in circular codesize invocations + // let bytes = + // Workout how to increase the offset the correct amount within here if it is longer than 2 bytes + let bytes = Codegen::fill_circular_codesize_invocations( + macro_def, + contract, + scope, + &mut offset, + mis, + recursing_constructor, + bytes, + circular_codesize_invocations, + )?; + Ok(BytecodeRes { bytes, label_indices, unmatched_jumps, table_instances, utilized_tables }) } + // TODO: move this lower + pub fn fill_circular_codesize_invocations( + macro_def: MacroDefinition, + contract: &Contract, + scope: &mut Vec, + offset: &mut usize, + mis: &mut Vec<(usize, MacroInvocation)>, + recursing_constructor: bool, + bytes: Vec<(usize, Bytes)>, + circular_codesize_invocations: CircularCodeSizeIndices, + ) -> Result, CodegenError> { + // ) -> Result<(), CodegenError> { + // Get the length of the macro + let offsets_num = circular_codesize_invocations.len(); + if offsets_num == 0 { + return Ok(bytes); + // return Ok(bytes); + } + + // TODO: MANUALLY COPIED FROM THE statement rs file - refactor + let ir_macro = if let Some(m) = contract.find_macro_by_name(¯o_def.name) { + m + } else { + tracing::error!( + target: "codegen", + "MISSING MACRO PASSED TO __codesize \"{}\"", + ¯o_def.name + ); + return Err(CodegenError { + kind: CodegenErrorKind::MissingMacroDefinition(macro_def.name /* yuck */), + span: macro_def.span.clone(), + token: None, + }); + }; + + let length = { + let res: BytecodeRes = match Codegen::macro_to_bytecode( + ir_macro.clone(), + contract, + scope, + 0, + mis, + ir_macro.name.eq("CONSTRUCTOR"), + ) { + Ok(r) => r, + Err(e) => { + tracing::error!( + target: "codegen", + "FAILED TO RECURSE INTO MACRO \"{}\"", + ir_macro.name + ); + return Err(e); + } + }; + res.bytes.iter().map(|(_, b)| b.0.len()).sum::() / 2 + }; + + // Fill in the circular codesize invocations depending on the length increase by injecting them + tracing::debug!( + target: "codegen", + "FILLING IN CIRCULAR CODESIZE INVOCATIONS: length without before fill - {:?}", + length + ); + + let real_size = length + (1 + offsets_num); + let real_size_formatted = format_even_bytes(format!("{}", real_size)); + tracing::debug!( + target: "codegen", + "REAL SIZE - {:?}", + real_size + ); + let push_bytes = format!("{:02x}{real_size_formatted}", 95 + real_size_formatted.len() / 2); + + tracing::debug!( + target: "codegen", + "FILLING IN CIRCULAR CODESIZE INVOCATIONS: length without after fill - {:?}", + real_size + ); + + // Replace the "xxxx" placeholder with the jump value + tracing::debug!( + target: "codegen", + "FILLING IN CIRCULAR CODESIZE INVOCATIONS: before - {:#?}", + bytes + ); + + // let bytecode_str = bytes.clone().into_iter().map(|(_, b)| b.0).collect::(); + let bytes = + bytes.into_iter().fold(Vec::default(), |mut acc, (code_index, mut formatted_bytes)| { + // Check if a jump table exists at `code_index` (starting offset of `b`) + if let Some(index) = circular_codesize_invocations.get(&code_index) { + // Get the bytes before & after the placeholder + let before = &formatted_bytes.0[0..code_index]; + let after = &formatted_bytes.0[code_index + 4..]; + + // Check if a jump dest placeholder is present + if !&formatted_bytes.0[code_index..code_index + 4].eq("cccc") { + tracing::error!( + target: "codegen", + "JUMP DESTINATION PLACEHOLDER NOT FOUND FOR JUMPLABEL {}", + push_bytes + ); + } + + // Replace the "xxxx" placeholder with the jump value + formatted_bytes = Bytes(format!("{before}{push_bytes}{after}")); + } + + acc.push((code_index, formatted_bytes)); + acc + }); + + Ok((bytes)) + // // Increase the offset by the new increase + // for offset in circular_codesize_invocations.offsets { + // // Format the jump index as a 2 byte hex number + + // let bytecode_str = bytes.clone().into_iter().map(|(_, b)| b.0).collect::(); + // // Get the bytes before & after the placeholder + // let before = &bytes[0..offset + 2]; + // let after = &bytes[offset + 4..]; + + // // Check if a jump dest placeholder is present + // if !&bytecode_str[offset + 2..offset + 6].eq("cccc") { + // tracing::error!( + // target: "codegen", + // "CIRCULAR CODESIZE INVOCATION PLACEHOLDER NOT FOUND AT OFFSET {}", + // offset + // ); + // } + + // // bytes = (offset, Bytes(format!("{before}{push_bytes}{after}"))); + // } + + // // replace each instance with the real size + // Ok(bytes) + } + /// Helper associated function to fill unmatched jump dests. /// /// ## Overview @@ -608,7 +763,7 @@ impl Codegen { kind: CodegenErrorKind::InvalidDynArgIndex, span: AstSpan(vec![Span { start: 0, end: 0, file: None }]), token: None, - }) + }); } // Constructor size optimizations @@ -683,7 +838,7 @@ impl Codegen { })), }]), token: None, - }) + }); } } if let Err(e) = fs::write(file_path, serialized_artifact) { @@ -701,7 +856,7 @@ impl Codegen { })), }]), token: None, - }) + }); } Ok(()) } diff --git a/huff_utils/src/ast.rs b/huff_utils/src/ast.rs index 92396b0b..e861ba75 100644 --- a/huff_utils/src/ast.rs +++ b/huff_utils/src/ast.rs @@ -205,7 +205,7 @@ impl Contract { let mut i = 0; loop { if i >= statements.len() { - break + break; } match &statements[i].clone().ty { StatementType::Constant(const_name) => { @@ -264,7 +264,7 @@ impl Contract { } } StatementType::BuiltinFunctionCall(bfc) => { - tracing::debug!(target: "ast", "Deriving Storage Pointrs: Found builtin function {:?}", bfc.kind); + tracing::debug!(target: "ast", "Deriving Storage Pointers: Found builtin function {:?}", bfc.kind); for a in &bfc.args { if let Some(name) = &a.name { match self diff --git a/huff_utils/src/bytecode.rs b/huff_utils/src/bytecode.rs index f063752e..bccd2cf6 100644 --- a/huff_utils/src/bytecode.rs +++ b/huff_utils/src/bytecode.rs @@ -4,7 +4,7 @@ use crate::prelude::{AstSpan, Statement, TableDefinition}; use std::{ - collections::BTreeMap, + collections::{BTreeMap, BTreeSet}, fmt::{self, Display}, }; @@ -115,5 +115,8 @@ pub type Jumps = Vec; /// Type to map `Jump` labels to their bytecode indices pub type LabelIndices = BTreeMap; +/// Typw to map circular_codesize labels to their bytecode indices +pub type CircularCodeSizeIndices = BTreeSet; + /// Type for a map of bytecode indexes to `Jumps`. Represents a Jump Table. pub type JumpTable = BTreeMap; From 1dcec4077bca480a35144a3ee0db84c1f3291803 Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Sun, 22 Jan 2023 23:05:20 +0000 Subject: [PATCH 02/16] clippy --- huff_codegen/src/irgen/statements.rs | 2 +- huff_codegen/src/lib.rs | 10 +++++----- huff_utils/src/error.rs | 2 ++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/huff_codegen/src/irgen/statements.rs b/huff_codegen/src/irgen/statements.rs index fdb83bb0..f6b505ef 100644 --- a/huff_codegen/src/irgen/statements.rs +++ b/huff_codegen/src/irgen/statements.rs @@ -196,7 +196,7 @@ pub fn statement_gen( // Progress offset by placeholder size *offset += 2; - bytes.push((starting_offset, Bytes(format!("cccc")))); + bytes.push((starting_offset, Bytes("cccc".to_string()))); } else { // We will still need to recurse to get accurate values let res: BytecodeRes = match Codegen::macro_to_bytecode( diff --git a/huff_codegen/src/lib.rs b/huff_codegen/src/lib.rs index 795d3928..6bb00194 100644 --- a/huff_codegen/src/lib.rs +++ b/huff_codegen/src/lib.rs @@ -392,9 +392,9 @@ impl Codegen { macro_def: MacroDefinition, contract: &Contract, scope: &mut Vec, - offset: &mut usize, + _offset: &mut usize, mis: &mut Vec<(usize, MacroInvocation)>, - recursing_constructor: bool, + _recursing_constructor: bool, bytes: Vec<(usize, Bytes)>, circular_codesize_invocations: CircularCodeSizeIndices, ) -> Result, CodegenError> { @@ -452,7 +452,7 @@ impl Codegen { ); let real_size = length + (1 + offsets_num); - let real_size_formatted = format_even_bytes(format!("{}", real_size)); + let real_size_formatted = format_even_bytes(format!("{real_size}")); tracing::debug!( target: "codegen", "REAL SIZE - {:?}", @@ -477,7 +477,7 @@ impl Codegen { let bytes = bytes.into_iter().fold(Vec::default(), |mut acc, (code_index, mut formatted_bytes)| { // Check if a jump table exists at `code_index` (starting offset of `b`) - if let Some(index) = circular_codesize_invocations.get(&code_index) { + if let Some(_index) = circular_codesize_invocations.get(&code_index) { // Get the bytes before & after the placeholder let before = &formatted_bytes.0[0..code_index]; let after = &formatted_bytes.0[code_index + 4..]; @@ -499,7 +499,7 @@ impl Codegen { acc }); - Ok((bytes)) + Ok(bytes) // // Increase the offset by the new increase // for offset in circular_codesize_invocations.offsets { // // Format the jump index as a 2 byte hex number diff --git a/huff_utils/src/error.rs b/huff_utils/src/error.rs index 4e71186d..0efd4cac 100644 --- a/huff_utils/src/error.rs +++ b/huff_utils/src/error.rs @@ -157,6 +157,8 @@ pub enum CodegenErrorKind { AbiGenerationFailure, /// Unmatched Jump UnmatchedJumpLabel, + /// Unhandled circular codesize invocation + // UnhandledCircularCodesizeInvocation(String), /// An IO Error IOError(String), /// ArgCall has an unknown type From a2393cff9350b1cbbd82ad28413e9abc5a8ff930 Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Sun, 22 Jan 2023 23:17:09 +0000 Subject: [PATCH 03/16] fix: typo --- huff_codegen/src/irgen/statements.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/huff_codegen/src/irgen/statements.rs b/huff_codegen/src/irgen/statements.rs index f6b505ef..b67b588f 100644 --- a/huff_codegen/src/irgen/statements.rs +++ b/huff_codegen/src/irgen/statements.rs @@ -188,7 +188,7 @@ pub fn statement_gen( // // We // TODO: remove this unwrap / clone - if bf.args[0].name.clone().unwrap() == ir_macro.name { + if bf.args[0].name.clone().unwrap() == macro_def.name { tracing::debug!(target: "codegen", "CIRCULAR CODESIZE INVOCATION DETECTED INJECTING PLACEHOLDER | macro: {}", ir_macro.name); // Save the invocation for later From c03832bdb6311611c3ca04b44eae33ea80b2cfc4 Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Sun, 22 Jan 2023 23:24:47 +0000 Subject: [PATCH 04/16] fix: miscalculated jumps --- huff_codegen/src/lib.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/huff_codegen/src/lib.rs b/huff_codegen/src/lib.rs index 6bb00194..60c69902 100644 --- a/huff_codegen/src/lib.rs +++ b/huff_codegen/src/lib.rs @@ -451,7 +451,8 @@ impl Codegen { length ); - let real_size = length + (1 + offsets_num); + // TODO: this needs to be smarter if the macro is massive + let real_size = length + (2 * offsets_num); let real_size_formatted = format_even_bytes(format!("{real_size}")); tracing::debug!( target: "codegen", @@ -466,28 +467,27 @@ impl Codegen { real_size ); - // Replace the "xxxx" placeholder with the jump value + // Replace the "cccc" placeholder with the calculated codesize value tracing::debug!( target: "codegen", "FILLING IN CIRCULAR CODESIZE INVOCATIONS: before - {:#?}", bytes ); - // let bytecode_str = bytes.clone().into_iter().map(|(_, b)| b.0).collect::(); let bytes = bytes.into_iter().fold(Vec::default(), |mut acc, (code_index, mut formatted_bytes)| { // Check if a jump table exists at `code_index` (starting offset of `b`) if let Some(_index) = circular_codesize_invocations.get(&code_index) { // Get the bytes before & after the placeholder - let before = &formatted_bytes.0[0..code_index]; - let after = &formatted_bytes.0[code_index + 4..]; + let before = &formatted_bytes.0[0..0]; + let after = &formatted_bytes.0[0 + 4..]; // Check if a jump dest placeholder is present - if !&formatted_bytes.0[code_index..code_index + 4].eq("cccc") { + if !&formatted_bytes.0[code_index..0 + 4].eq("cccc") { + // TODO: fix up legacy error message tracing::error!( target: "codegen", - "JUMP DESTINATION PLACEHOLDER NOT FOUND FOR JUMPLABEL {}", - push_bytes + "CIRCULAR CODESIZE PLACEHOLDER NOT FOUND" ); } From 98052f7e8c3de2e6c6a58d554089edb97828675f Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Sun, 22 Jan 2023 23:39:38 +0000 Subject: [PATCH 05/16] clippy fmt --- huff_codegen/src/irgen/statements.rs | 5 +++-- huff_codegen/src/lib.rs | 26 ++++++++++++++------------ huff_utils/src/ast.rs | 2 +- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/huff_codegen/src/irgen/statements.rs b/huff_codegen/src/irgen/statements.rs index b67b588f..317184a6 100644 --- a/huff_codegen/src/irgen/statements.rs +++ b/huff_codegen/src/irgen/statements.rs @@ -182,8 +182,9 @@ pub fn statement_gen( }; // Special case: - // If the macro provided to __codesize is the current macro, we need to avoid a circular reference - // If this is the case we will store a place holder inside the bytecode and fill it in later when + // If the macro provided to __codesize is the current macro, we need to avoid a + // circular reference If this is the case we will store a + // place holder inside the bytecode and fill it in later when // we have adequate information about the macros eventual size. // // We diff --git a/huff_codegen/src/lib.rs b/huff_codegen/src/lib.rs index 60c69902..8d09757f 100644 --- a/huff_codegen/src/lib.rs +++ b/huff_codegen/src/lib.rs @@ -148,7 +148,7 @@ impl Codegen { .collect::>(), ), token: None, - }); + }) } tracing::info!(target: "codegen", "GENERATING JUMPTABLE BYTECODE"); @@ -308,7 +308,7 @@ impl Codegen { // if we have a codesize call for the constructor here, from within the // constructor, we skip if recursing_constructor { - continue; + continue } let mut push_bytes = statement_gen( &s, @@ -372,7 +372,8 @@ impl Codegen { // Fill in circular codesize invocations // let bytes = - // Workout how to increase the offset the correct amount within here if it is longer than 2 bytes + // Workout how to increase the offset the correct amount within here if it is longer than 2 + // bytes let bytes = Codegen::fill_circular_codesize_invocations( macro_def, contract, @@ -402,7 +403,7 @@ impl Codegen { // Get the length of the macro let offsets_num = circular_codesize_invocations.len(); if offsets_num == 0 { - return Ok(bytes); + return Ok(bytes) // return Ok(bytes); } @@ -419,7 +420,7 @@ impl Codegen { kind: CodegenErrorKind::MissingMacroDefinition(macro_def.name /* yuck */), span: macro_def.span.clone(), token: None, - }); + }) }; let length = { @@ -438,13 +439,14 @@ impl Codegen { "FAILED TO RECURSE INTO MACRO \"{}\"", ir_macro.name ); - return Err(e); + return Err(e) } }; res.bytes.iter().map(|(_, b)| b.0.len()).sum::() / 2 }; - // Fill in the circular codesize invocations depending on the length increase by injecting them + // Fill in the circular codesize invocations depending on the length increase by injecting + // them tracing::debug!( target: "codegen", "FILLING IN CIRCULAR CODESIZE INVOCATIONS: length without before fill - {:?}", @@ -480,10 +482,10 @@ impl Codegen { if let Some(_index) = circular_codesize_invocations.get(&code_index) { // Get the bytes before & after the placeholder let before = &formatted_bytes.0[0..0]; - let after = &formatted_bytes.0[0 + 4..]; + let after = &formatted_bytes.0[4..]; // Check if a jump dest placeholder is present - if !&formatted_bytes.0[code_index..0 + 4].eq("cccc") { + if !&formatted_bytes.0[code_index..4].eq("cccc") { // TODO: fix up legacy error message tracing::error!( target: "codegen", @@ -763,7 +765,7 @@ impl Codegen { kind: CodegenErrorKind::InvalidDynArgIndex, span: AstSpan(vec![Span { start: 0, end: 0, file: None }]), token: None, - }); + }) } // Constructor size optimizations @@ -838,7 +840,7 @@ impl Codegen { })), }]), token: None, - }); + }) } } if let Err(e) = fs::write(file_path, serialized_artifact) { @@ -856,7 +858,7 @@ impl Codegen { })), }]), token: None, - }); + }) } Ok(()) } diff --git a/huff_utils/src/ast.rs b/huff_utils/src/ast.rs index e861ba75..de8f0744 100644 --- a/huff_utils/src/ast.rs +++ b/huff_utils/src/ast.rs @@ -205,7 +205,7 @@ impl Contract { let mut i = 0; loop { if i >= statements.len() { - break; + break } match &statements[i].clone().ty { StatementType::Constant(const_name) => { From 75f3ee6cfb7b5d8e18257b11add8a87e2cf0a5a7 Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Sun, 22 Jan 2023 23:59:48 +0000 Subject: [PATCH 06/16] fix: format as hex --- huff_codegen/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/huff_codegen/src/lib.rs b/huff_codegen/src/lib.rs index 8d09757f..394fbc6b 100644 --- a/huff_codegen/src/lib.rs +++ b/huff_codegen/src/lib.rs @@ -455,7 +455,7 @@ impl Codegen { // TODO: this needs to be smarter if the macro is massive let real_size = length + (2 * offsets_num); - let real_size_formatted = format_even_bytes(format!("{real_size}")); + let real_size_formatted = format_even_bytes(format!("{real_size:02x}")); tracing::debug!( target: "codegen", "REAL SIZE - {:?}", From ea277560dc92e4b90cd7b70e8abbf95bbc65c913 Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Mon, 23 Jan 2023 00:02:48 +0000 Subject: [PATCH 07/16] fmt --- huff_codegen/src/irgen/statements.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/huff_codegen/src/irgen/statements.rs b/huff_codegen/src/irgen/statements.rs index 317184a6..b442e1d3 100644 --- a/huff_codegen/src/irgen/statements.rs +++ b/huff_codegen/src/irgen/statements.rs @@ -37,7 +37,7 @@ pub fn statement_gen( kind: CodegenErrorKind::InvalidMacroInvocation(mi.macro_name.clone()), span: mi.span.clone(), token: None, - }); + }) }; tracing::info!(target: "codegen", "FOUND INNER MACRO: {}", ir_macro.name); @@ -49,7 +49,7 @@ pub fn statement_gen( kind: CodegenErrorKind::TestInvocation(ir_macro.name.clone()), span: ir_macro.span, token: None, - }); + }) } // If invoked macro is a function (outlined), insert a jump to the function's code and a @@ -111,7 +111,7 @@ pub fn statement_gen( "FAILED TO RECURSE INTO MACRO \"{}\"", ir_macro.name ); - return Err(e); + return Err(e) } }; @@ -178,7 +178,7 @@ pub fn statement_gen( ), span: bf.span.clone(), token: None, - }); + }) }; // Special case: @@ -215,7 +215,7 @@ pub fn statement_gen( "FAILED TO RECURSE INTO MACRO \"{}\"", ir_macro.name ); - return Err(e); + return Err(e) } }; @@ -246,7 +246,7 @@ pub fn statement_gen( ), span: bf.span.clone(), token: None, - }); + }) }; let size = bytes32_to_string(&ir_table.size, false); @@ -286,7 +286,7 @@ pub fn statement_gen( ), span: bf.span.clone(), token: None, - }); + }) } } BuiltinFunctionKind::FunctionSignature => { @@ -345,7 +345,7 @@ pub fn statement_gen( ), span: bf.span.clone(), token: None, - }); + }) } } BuiltinFunctionKind::EventHash => { @@ -395,7 +395,7 @@ pub fn statement_gen( ), span: bf.span.clone(), token: None, - }); + }) } } BuiltinFunctionKind::Error => { @@ -412,7 +412,7 @@ pub fn statement_gen( )), span: bf.span.clone(), token: None, - }); + }) } if let Some(error) = contract @@ -438,7 +438,7 @@ pub fn statement_gen( ), span: bf.span.clone(), token: None, - }); + }) } } BuiltinFunctionKind::RightPad => { @@ -571,7 +571,7 @@ pub fn statement_gen( kind: CodegenErrorKind::InvalidMacroStatement, span: s.span.clone(), token: None, - }); + }) } } From f3e9efe3f7af337ceb9069c8088c27d07180586d Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Mon, 23 Jan 2023 00:11:27 +0000 Subject: [PATCH 08/16] clean --- huff_codegen/src/lib.rs | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/huff_codegen/src/lib.rs b/huff_codegen/src/lib.rs index 394fbc6b..793a58f5 100644 --- a/huff_codegen/src/lib.rs +++ b/huff_codegen/src/lib.rs @@ -502,29 +502,6 @@ impl Codegen { }); Ok(bytes) - // // Increase the offset by the new increase - // for offset in circular_codesize_invocations.offsets { - // // Format the jump index as a 2 byte hex number - - // let bytecode_str = bytes.clone().into_iter().map(|(_, b)| b.0).collect::(); - // // Get the bytes before & after the placeholder - // let before = &bytes[0..offset + 2]; - // let after = &bytes[offset + 4..]; - - // // Check if a jump dest placeholder is present - // if !&bytecode_str[offset + 2..offset + 6].eq("cccc") { - // tracing::error!( - // target: "codegen", - // "CIRCULAR CODESIZE INVOCATION PLACEHOLDER NOT FOUND AT OFFSET {}", - // offset - // ); - // } - - // // bytes = (offset, Bytes(format!("{before}{push_bytes}{after}"))); - // } - - // // replace each instance with the real size - // Ok(bytes) } /// Helper associated function to fill unmatched jump dests. From 367cc76d1abe9cf2c17f722c90dc243365dd6c72 Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Mon, 23 Jan 2023 00:14:34 +0000 Subject: [PATCH 09/16] doc --- huff_codegen/src/lib.rs | 149 +++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 69 deletions(-) diff --git a/huff_codegen/src/lib.rs b/huff_codegen/src/lib.rs index 793a58f5..9e3f3218 100644 --- a/huff_codegen/src/lib.rs +++ b/huff_codegen/src/lib.rs @@ -388,7 +388,86 @@ impl Codegen { Ok(BytecodeRes { bytes, label_indices, unmatched_jumps, table_instances, utilized_tables }) } - // TODO: move this lower + /// Helper associated function to fill unmatched jump dests. + /// + /// ## Overview + /// + /// Iterates over the vec of generated bytes. At each index, check if a jump is tracked. + /// If one is, find the index of label and inplace the formatted location. + /// If there is no label matching the jump, we append the jump to a list of unmatched jumps, + /// updating the jump's bytecode index. + /// + /// On success, returns a tuple of generated bytes and unmatched jumps. + /// On failure, returns a CodegenError. + #[allow(clippy::type_complexity)] + pub fn fill_unmatched( + bytes: Vec<(usize, Bytes)>, + jump_table: &JumpTable, + label_indices: &LabelIndices, + ) -> Result<(Vec<(usize, Bytes)>, Vec), CodegenError> { + let mut unmatched_jumps = Jumps::default(); + let bytes = + bytes.into_iter().fold(Vec::default(), |mut acc, (code_index, mut formatted_bytes)| { + // Check if a jump table exists at `code_index` (starting offset of `b`) + if let Some(jt) = jump_table.get(&code_index) { + // Loop through jumps inside of the found JumpTable + for jump in jt { + // Check if the jump label has been defined. If not, add `jump` to the + // unmatched jumps and define its `bytecode_index` + // at `code_index` + if let Some(jump_index) = label_indices.get(jump.label.as_str()) { + // Format the jump index as a 2 byte hex number + let jump_value = format!("{jump_index:04x}"); + + // Get the bytes before & after the placeholder + let before = &formatted_bytes.0[0..jump.bytecode_index + 2]; + let after = &formatted_bytes.0[jump.bytecode_index + 6..]; + + // Check if a jump dest placeholder is present + if !&formatted_bytes.0[jump.bytecode_index + 2..jump.bytecode_index + 6] + .eq("xxxx") + { + tracing::error!( + target: "codegen", + "JUMP DESTINATION PLACEHOLDER NOT FOUND FOR JUMPLABEL {}", + jump.label + ); + } + + // Replace the "xxxx" placeholder with the jump value + formatted_bytes = Bytes(format!("{before}{jump_value}{after}")); + } else { + // The jump did not have a corresponding label index. Add it to the + // unmatched jumps vec. + unmatched_jumps.push(Jump { + label: jump.label.clone(), + bytecode_index: code_index, + span: jump.span.clone(), + }); + } + } + } + + acc.push((code_index, formatted_bytes)); + acc + }); + + Ok((bytes, unmatched_jumps)) + } + + /// Helper associated function to fill circular codesize invocations. + /// + /// ## Overview + /// + /// This function should run after all other code generation has been completed. + /// If there are placeholders for circular codesize invocations, this function will + /// fill them in with the correct offset. + /// + /// If there are multiple invocations of the same macro, the function will take into + /// account the total number of invocations and increase its offset accordingly. + /// + /// On success, returns a tuple of generated bytes. + /// On failure, returns a CodegenError. pub fn fill_circular_codesize_invocations( macro_def: MacroDefinition, contract: &Contract, @@ -404,7 +483,6 @@ impl Codegen { let offsets_num = circular_codesize_invocations.len(); if offsets_num == 0 { return Ok(bytes) - // return Ok(bytes); } // TODO: MANUALLY COPIED FROM THE statement rs file - refactor @@ -504,73 +582,6 @@ impl Codegen { Ok(bytes) } - /// Helper associated function to fill unmatched jump dests. - /// - /// ## Overview - /// - /// Iterates over the vec of generated bytes. At each index, check if a jump is tracked. - /// If one is, find the index of label and inplace the formatted location. - /// If there is no label matching the jump, we append the jump to a list of unmatched jumps, - /// updating the jump's bytecode index. - /// - /// On success, returns a tuple of generated bytes and unmatched jumps. - /// On failure, returns a CodegenError. - #[allow(clippy::type_complexity)] - pub fn fill_unmatched( - bytes: Vec<(usize, Bytes)>, - jump_table: &JumpTable, - label_indices: &LabelIndices, - ) -> Result<(Vec<(usize, Bytes)>, Vec), CodegenError> { - let mut unmatched_jumps = Jumps::default(); - let bytes = - bytes.into_iter().fold(Vec::default(), |mut acc, (code_index, mut formatted_bytes)| { - // Check if a jump table exists at `code_index` (starting offset of `b`) - if let Some(jt) = jump_table.get(&code_index) { - // Loop through jumps inside of the found JumpTable - for jump in jt { - // Check if the jump label has been defined. If not, add `jump` to the - // unmatched jumps and define its `bytecode_index` - // at `code_index` - if let Some(jump_index) = label_indices.get(jump.label.as_str()) { - // Format the jump index as a 2 byte hex number - let jump_value = format!("{jump_index:04x}"); - - // Get the bytes before & after the placeholder - let before = &formatted_bytes.0[0..jump.bytecode_index + 2]; - let after = &formatted_bytes.0[jump.bytecode_index + 6..]; - - // Check if a jump dest placeholder is present - if !&formatted_bytes.0[jump.bytecode_index + 2..jump.bytecode_index + 6] - .eq("xxxx") - { - tracing::error!( - target: "codegen", - "JUMP DESTINATION PLACEHOLDER NOT FOUND FOR JUMPLABEL {}", - jump.label - ); - } - - // Replace the "xxxx" placeholder with the jump value - formatted_bytes = Bytes(format!("{before}{jump_value}{after}")); - } else { - // The jump did not have a corresponding label index. Add it to the - // unmatched jumps vec. - unmatched_jumps.push(Jump { - label: jump.label.clone(), - bytecode_index: code_index, - span: jump.span.clone(), - }); - } - } - } - - acc.push((code_index, formatted_bytes)); - acc - }); - - Ok((bytes, unmatched_jumps)) - } - /// Helper associated function to append functions to the end of the bytecode. /// /// ## Overview From f13c97b8e38631f9a1ca1645d4b8670e5934866c Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Mon, 23 Jan 2023 00:15:36 +0000 Subject: [PATCH 10/16] :broom: --- huff_codegen/src/lib.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/huff_codegen/src/lib.rs b/huff_codegen/src/lib.rs index 9e3f3218..4102527c 100644 --- a/huff_codegen/src/lib.rs +++ b/huff_codegen/src/lib.rs @@ -378,9 +378,7 @@ impl Codegen { macro_def, contract, scope, - &mut offset, mis, - recursing_constructor, bytes, circular_codesize_invocations, )?; @@ -472,9 +470,7 @@ impl Codegen { macro_def: MacroDefinition, contract: &Contract, scope: &mut Vec, - _offset: &mut usize, mis: &mut Vec<(usize, MacroInvocation)>, - _recursing_constructor: bool, bytes: Vec<(usize, Bytes)>, circular_codesize_invocations: CircularCodeSizeIndices, ) -> Result, CodegenError> { From 98358d892f239e1065ca6869066c3954b4e3a65c Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Sat, 28 Jan 2023 00:11:15 +0000 Subject: [PATCH 11/16] fix: handle codesize invocations at the byte boundry --- huff_codegen/src/lib.rs | 123 +++++-------------- huff_core/tests/test_circular_constructor.rs | 99 +++++++++++++++ 2 files changed, 129 insertions(+), 93 deletions(-) create mode 100644 huff_core/tests/test_circular_constructor.rs diff --git a/huff_codegen/src/lib.rs b/huff_codegen/src/lib.rs index 4102527c..26c71fc8 100644 --- a/huff_codegen/src/lib.rs +++ b/huff_codegen/src/lib.rs @@ -371,17 +371,10 @@ impl Codegen { let (bytes, unmatched_jumps) = Codegen::fill_unmatched(bytes, &jump_table, &label_indices)?; // Fill in circular codesize invocations - // let bytes = // Workout how to increase the offset the correct amount within here if it is longer than 2 // bytes - let bytes = Codegen::fill_circular_codesize_invocations( - macro_def, - contract, - scope, - mis, - bytes, - circular_codesize_invocations, - )?; + let bytes = + Codegen::fill_circular_codesize_invocations(bytes, circular_codesize_invocations)?; Ok(BytecodeRes { bytes, label_indices, unmatched_jumps, table_instances, utilized_tables }) } @@ -467,113 +460,57 @@ impl Codegen { /// On success, returns a tuple of generated bytes. /// On failure, returns a CodegenError. pub fn fill_circular_codesize_invocations( - macro_def: MacroDefinition, - contract: &Contract, - scope: &mut Vec, - mis: &mut Vec<(usize, MacroInvocation)>, bytes: Vec<(usize, Bytes)>, circular_codesize_invocations: CircularCodeSizeIndices, ) -> Result, CodegenError> { - // ) -> Result<(), CodegenError> { // Get the length of the macro - let offsets_num = circular_codesize_invocations.len(); - if offsets_num == 0 { + let num_invocations = circular_codesize_invocations.len(); + if num_invocations == 0 { return Ok(bytes) } - // TODO: MANUALLY COPIED FROM THE statement rs file - refactor - let ir_macro = if let Some(m) = contract.find_macro_by_name(¯o_def.name) { - m - } else { - tracing::error!( - target: "codegen", - "MISSING MACRO PASSED TO __codesize \"{}\"", - ¯o_def.name - ); - return Err(CodegenError { - kind: CodegenErrorKind::MissingMacroDefinition(macro_def.name /* yuck */), - span: macro_def.span.clone(), - token: None, - }) - }; - - let length = { - let res: BytecodeRes = match Codegen::macro_to_bytecode( - ir_macro.clone(), - contract, - scope, - 0, - mis, - ir_macro.name.eq("CONSTRUCTOR"), - ) { - Ok(r) => r, - Err(e) => { - tracing::error!( - target: "codegen", - "FAILED TO RECURSE INTO MACRO \"{}\"", - ir_macro.name - ); - return Err(e) - } - }; - res.bytes.iter().map(|(_, b)| b.0.len()).sum::() / 2 - }; - - // Fill in the circular codesize invocations depending on the length increase by injecting - // them - tracing::debug!( - target: "codegen", - "FILLING IN CIRCULAR CODESIZE INVOCATIONS: length without before fill - {:?}", - length - ); - - // TODO: this needs to be smarter if the macro is massive - let real_size = length + (2 * offsets_num); - let real_size_formatted = format_even_bytes(format!("{real_size:02x}")); - tracing::debug!( - target: "codegen", - "REAL SIZE - {:?}", - real_size - ); - let push_bytes = format!("{:02x}{real_size_formatted}", 95 + real_size_formatted.len() / 2); + tracing::debug!(target: "codegen", "Circular Codesize Invocation: Bytes before expansion: {:#?}", bytes); + let length: usize = bytes.iter().map(|(_, b)| b.0.len()).sum::() / 2; - tracing::debug!( - target: "codegen", - "FILLING IN CIRCULAR CODESIZE INVOCATIONS: length without after fill - {:?}", - real_size - ); + // If there are more than 256 opcodes in a macro, we need 2 bytes to represent it + // The next threshold is 65536 opcodes which is past the codesize limit + let mut offset_increase = 0; + if length > 255 { + offset_increase = 1; + } + // Codesize will increase by 1 byte for every codesize that exists + let extended_length = length + (offset_increase * num_invocations); - // Replace the "cccc" placeholder with the calculated codesize value - tracing::debug!( - target: "codegen", - "FILLING IN CIRCULAR CODESIZE INVOCATIONS: before - {:#?}", - bytes - ); + let push_size = format_even_bytes(format!("{extended_length:02x}")); + let push_bytes = format!("{:02x}{push_size}", 95 + push_size.len() / 2); - let bytes = - bytes.into_iter().fold(Vec::default(), |mut acc, (code_index, mut formatted_bytes)| { + // Track the number of bytes added if there is an offset increase with codesize + let mut running_increase = 0; + let bytes = bytes.into_iter().fold( + Vec::default(), + |mut acc, (mut code_index, mut formatted_bytes)| { // Check if a jump table exists at `code_index` (starting offset of `b`) if let Some(_index) = circular_codesize_invocations.get(&code_index) { - // Get the bytes before & after the placeholder - let before = &formatted_bytes.0[0..0]; - let after = &formatted_bytes.0[4..]; - // Check if a jump dest placeholder is present - if !&formatted_bytes.0[code_index..4].eq("cccc") { - // TODO: fix up legacy error message + if !&formatted_bytes.0.eq("cccc") { tracing::error!( target: "codegen", "CIRCULAR CODESIZE PLACEHOLDER NOT FOUND" ); } - // Replace the "xxxx" placeholder with the jump value - formatted_bytes = Bytes(format!("{before}{push_bytes}{after}")); + // Replace the "cccc" placeholder with the jump value + formatted_bytes = Bytes(format!("{push_bytes}")); + running_increase += offset_increase; + } else { + // Increase the code index by the number of bytes added past the placeholder + code_index += running_increase; } acc.push((code_index, formatted_bytes)); acc - }); + }, + ); Ok(bytes) } diff --git a/huff_core/tests/test_circular_constructor.rs b/huff_core/tests/test_circular_constructor.rs new file mode 100644 index 00000000..5aeb2612 --- /dev/null +++ b/huff_core/tests/test_circular_constructor.rs @@ -0,0 +1,99 @@ +use huff_codegen::*; +use huff_lexer::*; +use huff_parser::*; +use huff_utils::prelude::*; + +#[test] +fn test_circular_large_constructors() { + let source = r#" + #define macro CONSTRUCTOR() = { + __codesize(CONSTRUCTOR) + FILLER_MACRO() + } + + // 254 program counters where the codesize should push us over the word limit + #define macro FILLER_MACRO() = { + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc + } + + #define macro MAIN() = { + 0x00 + } + "#; + + let full_source = FullFileSource { source, file: None, spans: vec![] }; + let lexer = Lexer::new(full_source); + let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let mut parser = Parser::new(tokens, Some("".to_string())); + let mut contract = parser.parse().unwrap(); + contract.derive_storage_pointers(); + + // Create constructor bytecode + match Codegen::generate_constructor_bytecode(&contract, None) { + Ok(mb) => assert_eq!("60ff58585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858".to_string(), mb), + Err(_) => panic!("moose"), + } +} + +#[test] +fn test_circular_constructor_at_word_boundry() { + let source = r#" + #define macro CONSTRUCTOR() = { + __codesize(CONSTRUCTOR) + FILLER_MACRO() + __codesize(CONSTRUCTOR) + } + + // 254 program counters where the codesize should push us over the word limit + #define macro FILLER_MACRO() = { + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc pc + pc pc pc pc pc pc pc pc pc pc pc pc pc + } + + #define macro MAIN() = { + 0x00 + } + "#; + + let full_source = FullFileSource { source, file: None, spans: vec![] }; + let lexer = Lexer::new(full_source); + let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let mut parser = Parser::new(tokens, Some("".to_string())); + let mut contract = parser.parse().unwrap(); + contract.derive_storage_pointers(); + + // Create constructor bytecode + match Codegen::generate_constructor_bytecode(&contract, None) { + Ok(mb) => assert_eq!("61010358585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858610103".to_string(), mb), + Err(_) => panic!("moose"), + } +} From eb489fbd364bdff0b9a6f3c80a75006634e438c8 Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Sat, 28 Jan 2023 00:14:53 +0000 Subject: [PATCH 12/16] clippy --- huff_codegen/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/huff_codegen/src/lib.rs b/huff_codegen/src/lib.rs index 26c71fc8..1cc638d9 100644 --- a/huff_codegen/src/lib.rs +++ b/huff_codegen/src/lib.rs @@ -500,7 +500,7 @@ impl Codegen { } // Replace the "cccc" placeholder with the jump value - formatted_bytes = Bytes(format!("{push_bytes}")); + formatted_bytes = Bytes(push_bytes.to_string()); running_increase += offset_increase; } else { // Increase the code index by the number of bytes added past the placeholder From d19388f8dff21b076c4fa3bb3d146d6c6c823269 Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Sat, 28 Jan 2023 00:20:52 +0000 Subject: [PATCH 13/16] rm: unused error --- huff_utils/src/error.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/huff_utils/src/error.rs b/huff_utils/src/error.rs index 0efd4cac..4e71186d 100644 --- a/huff_utils/src/error.rs +++ b/huff_utils/src/error.rs @@ -157,8 +157,6 @@ pub enum CodegenErrorKind { AbiGenerationFailure, /// Unmatched Jump UnmatchedJumpLabel, - /// Unhandled circular codesize invocation - // UnhandledCircularCodesizeInvocation(String), /// An IO Error IOError(String), /// ArgCall has an unknown type From 4fa304394b7cca78c2bbedb896093ad4d1de4251 Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Sat, 28 Jan 2023 00:27:24 +0000 Subject: [PATCH 14/16] clean: use ref rather than clone --- huff_codegen/src/irgen/statements.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/huff_codegen/src/irgen/statements.rs b/huff_codegen/src/irgen/statements.rs index b442e1d3..789fcab7 100644 --- a/huff_codegen/src/irgen/statements.rs +++ b/huff_codegen/src/irgen/statements.rs @@ -186,10 +186,7 @@ pub fn statement_gen( // circular reference If this is the case we will store a // place holder inside the bytecode and fill it in later when // we have adequate information about the macros eventual size. - // - // We - // TODO: remove this unwrap / clone - if bf.args[0].name.clone().unwrap() == macro_def.name { + if bf.args[0].name.as_ref().unwrap() == ¯o_def.name { tracing::debug!(target: "codegen", "CIRCULAR CODESIZE INVOCATION DETECTED INJECTING PLACEHOLDER | macro: {}", ir_macro.name); // Save the invocation for later From e85189db0fa2ee98fa32de4c8bc4a16605f356ff Mon Sep 17 00:00:00 2001 From: Andreas Bigger Date: Sun, 26 Feb 2023 10:45:01 -0700 Subject: [PATCH 15/16] fix nested circular macro invocations --- huff_codegen/src/irgen/statements.rs | 12 +++- huff_codegen/src/lib.rs | 24 +++++-- huff_core/tests/test_circular_constructor.rs | 72 ++++++++++++++++++++ huff_tests/src/runner.rs | 1 + huff_utils/src/ast.rs | 9 ++- huff_utils/src/bytecode.rs | 2 +- huff_utils/tests/files.rs | 2 +- 7 files changed, 109 insertions(+), 13 deletions(-) diff --git a/huff_codegen/src/irgen/statements.rs b/huff_codegen/src/irgen/statements.rs index 789fcab7..b2b05107 100644 --- a/huff_codegen/src/irgen/statements.rs +++ b/huff_codegen/src/irgen/statements.rs @@ -103,6 +103,7 @@ pub fn statement_gen( *offset, mis, false, + Some(circular_codesize_invocations), ) { Ok(r) => r, Err(e) => { @@ -181,16 +182,22 @@ pub fn statement_gen( }) }; + // Get the name of the macro being passed to __codesize + let codesize_arg = bf.args[0].name.as_ref().unwrap(); + let is_previous_parent = scope.iter().any(|def| def.name == *codesize_arg); + // Special case: // If the macro provided to __codesize is the current macro, we need to avoid a // circular reference If this is the case we will store a // place holder inside the bytecode and fill it in later when // we have adequate information about the macros eventual size. - if bf.args[0].name.as_ref().unwrap() == ¯o_def.name { + // We also need to avoid if the codesize arg is any of the previous macros to + // avoid a circular reference + if is_previous_parent || macro_def.name.eq(codesize_arg) { tracing::debug!(target: "codegen", "CIRCULAR CODESIZE INVOCATION DETECTED INJECTING PLACEHOLDER | macro: {}", ir_macro.name); // Save the invocation for later - circular_codesize_invocations.insert(*offset); + circular_codesize_invocations.insert((codesize_arg.to_string(), *offset)); // Progress offset by placeholder size *offset += 2; @@ -204,6 +211,7 @@ pub fn statement_gen( *offset, mis, ir_macro.name.eq("CONSTRUCTOR"), + Some(circular_codesize_invocations), ) { Ok(r) => r, Err(e) => { diff --git a/huff_codegen/src/lib.rs b/huff_codegen/src/lib.rs index 1cc638d9..3c77f813 100644 --- a/huff_codegen/src/lib.rs +++ b/huff_codegen/src/lib.rs @@ -73,6 +73,7 @@ impl Codegen { 0, &mut Vec::default(), false, + None, )?; tracing::debug!(target: "codegen", "Generated main bytecode. Appending table bytecode..."); @@ -101,6 +102,7 @@ impl Codegen { 0, &mut Vec::default(), false, + None, )?; // Check if the constructor performs its own code generation @@ -277,6 +279,7 @@ impl Codegen { mut offset: usize, mis: &mut Vec<(usize, MacroInvocation)>, recursing_constructor: bool, + circular_codesize_invocations: Option<&mut CircularCodeSizeIndices>, ) -> Result { // Get intermediate bytecode representation of the macro definition let mut bytes: Vec<(usize, Bytes)> = Vec::default(); @@ -287,8 +290,8 @@ impl Codegen { let mut label_indices = LabelIndices::new(); let mut table_instances = Jumps::new(); let mut utilized_tables: Vec = Vec::new(); - let mut circular_codesize_invocations: CircularCodeSizeIndices = - CircularCodeSizeIndices::new(); + let mut ccsi = CircularCodeSizeIndices::new(); + let circular_codesize_invocations = circular_codesize_invocations.unwrap_or(&mut ccsi); // Loop through all intermediate bytecode representations generated from the AST for (_ir_bytes_index, ir_byte) in ir_bytes.into_iter().enumerate() { @@ -321,7 +324,7 @@ impl Codegen { &mut label_indices, &mut table_instances, &mut utilized_tables, - &mut circular_codesize_invocations, + circular_codesize_invocations, starting_offset, )?; bytes.append(&mut push_bytes); @@ -373,8 +376,11 @@ impl Codegen { // Fill in circular codesize invocations // Workout how to increase the offset the correct amount within here if it is longer than 2 // bytes - let bytes = - Codegen::fill_circular_codesize_invocations(bytes, circular_codesize_invocations)?; + let bytes = Codegen::fill_circular_codesize_invocations( + bytes, + circular_codesize_invocations, + ¯o_def.name, + )?; Ok(BytecodeRes { bytes, label_indices, unmatched_jumps, table_instances, utilized_tables }) } @@ -461,7 +467,8 @@ impl Codegen { /// On failure, returns a CodegenError. pub fn fill_circular_codesize_invocations( bytes: Vec<(usize, Bytes)>, - circular_codesize_invocations: CircularCodeSizeIndices, + circular_codesize_invocations: &mut CircularCodeSizeIndices, + macro_name: &str, ) -> Result, CodegenError> { // Get the length of the macro let num_invocations = circular_codesize_invocations.len(); @@ -490,7 +497,9 @@ impl Codegen { Vec::default(), |mut acc, (mut code_index, mut formatted_bytes)| { // Check if a jump table exists at `code_index` (starting offset of `b`) - if let Some(_index) = circular_codesize_invocations.get(&code_index) { + if let Some((_, _index)) = + circular_codesize_invocations.get(&(macro_name.to_string(), code_index)) + { // Check if a jump dest placeholder is present if !&formatted_bytes.0.eq("cccc") { tracing::error!( @@ -547,6 +556,7 @@ impl Codegen { *offset + 1, mis, false, + None, )?; for j in res.unmatched_jumps.iter_mut() { diff --git a/huff_core/tests/test_circular_constructor.rs b/huff_core/tests/test_circular_constructor.rs index 5aeb2612..ebc512f3 100644 --- a/huff_core/tests/test_circular_constructor.rs +++ b/huff_core/tests/test_circular_constructor.rs @@ -97,3 +97,75 @@ fn test_circular_constructor_at_word_boundry() { Err(_) => panic!("moose"), } } + +#[test] +fn test_double_circular_constructor_multiple_macro_invocations() { + let source = r#" + #define macro CONSTRUCTOR() = { + __codesize(CONSTRUCTOR) + CODESIZE_CONSTRUCTOR() + __codesize(CONSTRUCTOR) + } + + // Tries to get the codesize of the constructor macro + #define macro CODESIZE_CONSTRUCTOR() = { + pc __codesize(CONSTRUCTOR) + } + + #define macro MAIN() = { + 0x00 + } + "#; + + let full_source = FullFileSource { source, file: None, spans: vec![] }; + let lexer = Lexer::new(full_source); + let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let mut parser = Parser::new(tokens, Some("".to_string())); + let mut contract = parser.parse().unwrap(); + contract.derive_storage_pointers(); + + // Create constructor bytecode + match Codegen::generate_constructor_bytecode(&contract, None) { + Ok(mb) => assert_eq!("60075860076007".to_string(), mb), + Err(_) => panic!("moose"), + } +} + +#[test] +fn test_double_circular_constructor_nested_macro_invocations() { + let source = r#" + #define macro CONSTRUCTOR() = { + __codesize(CONSTRUCTOR) + CODESIZED_CODESIZE() + __codesize(CONSTRUCTOR) + } + + #define macro CODESIZED_CODESIZE() = { + CODESIZE_CONSTRUCTOR() + pc __codesize(CODESIZE_CONSTRUCTOR) + } + + #define macro CODESIZE_CONSTRUCTOR() = { + pc __codesize(CONSTRUCTOR) + } + + #define macro MAIN() = { + 0x00 + } + "#; + + let full_source = FullFileSource { source, file: None, spans: vec![] }; + let lexer = Lexer::new(full_source); + let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let mut parser = Parser::new(tokens, Some("".to_string())); + let mut contract = parser.parse().unwrap(); + contract.derive_storage_pointers(); + + // Create constructor bytecode + match Codegen::generate_constructor_bytecode(&contract, None) { + Ok(mb) => assert_eq!("600a58600a586003600a".to_string(), mb), + Err(_) => panic!("moose"), + } +} + +// TODO: there could be some arg invocations that mess with codesize circulation diff --git a/huff_tests/src/runner.rs b/huff_tests/src/runner.rs index 95b40cf5..f4d50650 100644 --- a/huff_tests/src/runner.rs +++ b/huff_tests/src/runner.rs @@ -169,6 +169,7 @@ impl TestRunner { 0, &mut Vec::default(), false, + None, ) { // Generate table bytecode for compiled test macro Ok(res) => match Codegen::gen_table_bytecode(res) { diff --git a/huff_utils/src/ast.rs b/huff_utils/src/ast.rs index de8f0744..97ccdf13 100644 --- a/huff_utils/src/ast.rs +++ b/huff_utils/src/ast.rs @@ -255,7 +255,12 @@ impl Contract { self.recurse_ast_constants(md, storage_pointers, last_p, true); } } else { - self.recurse_ast_constants(md, storage_pointers, last_p, false); + self.recurse_ast_constants( + md, + storage_pointers, + last_p, + checking_constructor, + ); } } None => { @@ -289,7 +294,7 @@ impl Contract { md, storage_pointers, last_p, - false, + checking_constructor, ); } } diff --git a/huff_utils/src/bytecode.rs b/huff_utils/src/bytecode.rs index bccd2cf6..bd520a2b 100644 --- a/huff_utils/src/bytecode.rs +++ b/huff_utils/src/bytecode.rs @@ -116,7 +116,7 @@ pub type Jumps = Vec; pub type LabelIndices = BTreeMap; /// Typw to map circular_codesize labels to their bytecode indices -pub type CircularCodeSizeIndices = BTreeSet; +pub type CircularCodeSizeIndices = BTreeSet<(String, usize)>; /// Type for a map of bytecode indexes to `Jumps`. Represents a Jump Table. pub type JumpTable = BTreeMap; diff --git a/huff_utils/tests/files.rs b/huff_utils/tests/files.rs index 948c00f0..6dda8be5 100644 --- a/huff_utils/tests/files.rs +++ b/huff_utils/tests/files.rs @@ -8,7 +8,7 @@ fn test_generate_remappings() { let subscriber_builder = tracing_subscriber::fmt(); let env_filter = EnvFilter::from_default_env().add_directive(tracing::Level::DEBUG.into()); if let Err(e) = subscriber_builder.with_env_filter(env_filter).try_init() { - println!("Failed to initialize tracing!\nError: {e:?}") + eprintln!("Failed to initialize tracing!\nError: {e:?}") } let remapper = files::Remapper::new("../"); From e5234a99dfc6d4fb040fa93e7eb526383beab937 Mon Sep 17 00:00:00 2001 From: Cheethas <47148561+cheethas@users.noreply.github.com> Date: Sun, 26 Feb 2023 18:07:39 +0000 Subject: [PATCH 16/16] fix: rebase --- huff_core/tests/test_circular_constructor.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/huff_core/tests/test_circular_constructor.rs b/huff_core/tests/test_circular_constructor.rs index ebc512f3..6a7bf630 100644 --- a/huff_core/tests/test_circular_constructor.rs +++ b/huff_core/tests/test_circular_constructor.rs @@ -45,7 +45,7 @@ fn test_circular_large_constructors() { // Create constructor bytecode match Codegen::generate_constructor_bytecode(&contract, None) { - Ok(mb) => assert_eq!("60ff58585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858".to_string(), mb), + Ok((mb, _)) => assert_eq!("60ff58585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858".to_string(), mb), Err(_) => panic!("moose"), } } @@ -93,7 +93,7 @@ fn test_circular_constructor_at_word_boundry() { // Create constructor bytecode match Codegen::generate_constructor_bytecode(&contract, None) { - Ok(mb) => assert_eq!("61010358585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858610103".to_string(), mb), + Ok((mb, _)) => assert_eq!("61010358585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858585858610103".to_string(), mb), Err(_) => panic!("moose"), } } @@ -126,7 +126,7 @@ fn test_double_circular_constructor_multiple_macro_invocations() { // Create constructor bytecode match Codegen::generate_constructor_bytecode(&contract, None) { - Ok(mb) => assert_eq!("60075860076007".to_string(), mb), + Ok((mb, _)) => assert_eq!("60075860076007".to_string(), mb), Err(_) => panic!("moose"), } } @@ -163,7 +163,7 @@ fn test_double_circular_constructor_nested_macro_invocations() { // Create constructor bytecode match Codegen::generate_constructor_bytecode(&contract, None) { - Ok(mb) => assert_eq!("600a58600a586003600a".to_string(), mb), + Ok((mb, _)) => assert_eq!("600a58600a586003600a".to_string(), mb), Err(_) => panic!("moose"), } }