From 2d5f07d12eaeaaad05c9db36ebaf7f82e994cc4e Mon Sep 17 00:00:00 2001 From: Sean Billig Date: Thu, 20 Jun 2024 14:50:14 -0700 Subject: [PATCH 1/7] New pest-based parser --- Cargo.toml | 10 +- crates/codegen/Cargo.toml | 2 +- crates/codegen/src/critical_edge.rs | 60 +- crates/codegen/src/domtree.rs | 30 +- crates/codegen/src/loop_analysis.rs | 34 +- crates/codegen/src/optim/adce.rs | 49 +- crates/codegen/src/optim/gvn.rs | 80 +-- crates/codegen/src/optim/insn_simplify.rs | 44 +- crates/codegen/src/optim/licm.rs | 18 +- crates/codegen/src/optim/sccp.rs | 26 +- crates/codegen/src/post_domtree.rs | 30 +- crates/filecheck/Cargo.toml | 8 +- .../fixtures/adce/all_dests_remove.sntn | 8 +- .../fixtures/adce/basic_empty_block.sntn | 7 +- .../fixtures/adce/infinite_loop.sntn | 5 +- .../fixtures/adce/loop_body_removed.sntn | 5 +- .../filecheck/fixtures/adce/no_dead_code.sntn | 3 +- .../filecheck/fixtures/adce/rewrite_dest.sntn | 9 +- .../filecheck/fixtures/adce/simple_dce.sntn | 5 +- .../fixtures/adce/whole_loop_removed.sntn | 5 +- .../filecheck/fixtures/gvn/branch_simple.sntn | 6 +- .../filecheck/fixtures/gvn/commutativity.sntn | 3 +- .../fixtures/gvn/fold_predicted_value.sntn | 5 +- .../gvn/llvm_cyclic_phi_handling.sntn | 5 +- .../fixtures/gvn/llvm_phi-edge-handling.sntn | 5 +- .../gvn/llvm_phi-of-ops-move-block.sntn | 14 +- ...hi-of-ops-simplification-dependencies.sntn | 14 +- ...lified-to-existing-value-then-changes.sntn | 10 +- .../gvn/llvm_todo-pr35074-phi-of-ops.sntn | 3 +- ...m_todo-pr37121-seens-this-value-a-lot.sntn | 3 +- .../gvn/llvm_todo-pr42422-phi-of-ops.sntn | 9 +- crates/filecheck/fixtures/gvn/no_branch.sntn | 3 +- .../filecheck/fixtures/gvn/not_dominated.sntn | 3 +- .../fixtures/gvn/predicted_branch.sntn | 5 +- .../fixtures/gvn/redundancy_by_predicate.sntn | 9 +- .../fixtures/gvn/unreachable_edge.sntn | 11 +- .../fixtures/gvn/value_phi_loop.sntn | 3 +- .../fixtures/gvn/value_phi_many_preds.sntn | 3 +- .../fixtures/gvn/value_phi_recursive.sntn | 6 +- .../fixtures/gvn/value_phi_remote_block.sntn | 3 +- .../gvn/value_phi_resolution_must_fail.sntn | 3 +- .../fixtures/gvn/value_phi_same_arg.sntn | 3 +- .../fixtures/gvn/value_phi_simple.sntn | 3 +- .../filecheck/fixtures/insn_simplify/and.sntn | 36 +- .../filecheck/fixtures/insn_simplify/cmp.sntn | 36 +- .../filecheck/fixtures/insn_simplify/neg.sntn | 7 +- .../filecheck/fixtures/insn_simplify/not.sntn | 33 +- .../filecheck/fixtures/insn_simplify/or.sntn | 66 +- .../filecheck/fixtures/insn_simplify/phi.sntn | 5 +- .../fixtures/insn_simplify/simple.sntn | 5 +- .../filecheck/fixtures/insn_simplify/xor.sntn | 37 +- crates/filecheck/fixtures/licm/basic.sntn | 3 +- .../filecheck/fixtures/licm/header_entry.sntn | 3 +- .../fixtures/licm/multiple_preheader.sntn | 7 +- .../filecheck/fixtures/licm/nested_loops.sntn | 43 +- .../filecheck/fixtures/sccp/complex_loop.sntn | 5 +- .../filecheck/fixtures/sccp/const_branch.sntn | 6 +- .../filecheck/fixtures/sccp/const_loop.sntn | 5 +- .../filecheck/fixtures/sccp/non_folding.sntn | 7 +- .../filecheck/fixtures/sccp/simple_sccp.sntn | 5 +- crates/filecheck/fixtures/sccp/unary.sntn | 5 +- .../filecheck/fixtures/sccp/unreachable.sntn | 5 +- crates/filecheck/src/lib.rs | 37 +- crates/interpreter/Cargo.toml | 4 +- crates/interpreter/src/state.rs | 69 ++- crates/ir/Cargo.toml | 4 +- crates/ir/src/builder/func_builder.rs | 328 +++++----- crates/ir/src/builder/mod.rs | 38 +- crates/ir/src/builder/module_builder.rs | 19 +- crates/ir/src/builder/ssa.rs | 86 ++- crates/ir/src/dfg.rs | 2 +- crates/ir/src/func_cursor.rs | 229 ++++--- crates/ir/src/function.rs | 25 +- crates/ir/src/global_variable.rs | 2 +- crates/ir/src/graphviz/mod.rs | 11 +- crates/ir/src/insn.rs | 89 ++- crates/ir/src/ir_writer.rs | 18 +- crates/ir/src/linkage.rs | 18 +- crates/ir/src/module.rs | 2 - crates/ir/src/types.rs | 5 +- crates/ir/src/value.rs | 2 +- crates/parser/src/parser.rs | 256 ++++---- crates/parser2/Cargo.toml | 33 + crates/parser2/src/ast.rs | 580 ++++++++++++++++++ crates/parser2/src/lib.rs | 230 +++++++ crates/parser2/src/sonatina.pest | 88 +++ crates/parser2/src/syntax.rs | 250 ++++++++ .../parser2/test_files/syntax/func/empty.snap | 10 + .../parser2/test_files/syntax/func/empty.sntn | 1 + .../test_files/syntax/func/simple.snap | 79 +++ .../test_files/syntax/func/simple.sntn | 9 + .../test_files/syntax/module/simple.ast.snap | 564 +++++++++++++++++ .../test_files/syntax/module/simple.ir.snap | 58 ++ .../test_files/syntax/module/simple.snap | 360 +++++++++++ .../test_files/syntax/module/simple.sntn | 47 ++ .../test_files/syntax/stmts/bin_op.snap | 20 + .../test_files/syntax/stmts/bin_op.sntn | 18 + .../parser2/test_files/syntax/stmts/cast.snap | 49 ++ .../parser2/test_files/syntax/stmts/cast.sntn | 4 + .../test_files/syntax/stmts/control_flow.snap | 73 +++ .../test_files/syntax/stmts/control_flow.sntn | 8 + .../test_files/syntax/stmts/stmts.snap | 103 ++++ .../test_files/syntax/stmts/stmts.sntn | 8 + .../test_files/syntax/stmts/unary_op.snap | 27 + .../test_files/syntax/stmts/unary_op.sntn | 2 + crates/parser2/tests/syntax.rs | 153 +++++ crates/triple/src/lib.rs | 24 +- 107 files changed, 3992 insertions(+), 939 deletions(-) create mode 100644 crates/parser2/Cargo.toml create mode 100644 crates/parser2/src/ast.rs create mode 100644 crates/parser2/src/lib.rs create mode 100644 crates/parser2/src/sonatina.pest create mode 100644 crates/parser2/src/syntax.rs create mode 100644 crates/parser2/test_files/syntax/func/empty.snap create mode 100644 crates/parser2/test_files/syntax/func/empty.sntn create mode 100644 crates/parser2/test_files/syntax/func/simple.snap create mode 100644 crates/parser2/test_files/syntax/func/simple.sntn create mode 100644 crates/parser2/test_files/syntax/module/simple.ast.snap create mode 100644 crates/parser2/test_files/syntax/module/simple.ir.snap create mode 100644 crates/parser2/test_files/syntax/module/simple.snap create mode 100644 crates/parser2/test_files/syntax/module/simple.sntn create mode 100644 crates/parser2/test_files/syntax/stmts/bin_op.snap create mode 100644 crates/parser2/test_files/syntax/stmts/bin_op.sntn create mode 100644 crates/parser2/test_files/syntax/stmts/cast.snap create mode 100644 crates/parser2/test_files/syntax/stmts/cast.sntn create mode 100644 crates/parser2/test_files/syntax/stmts/control_flow.snap create mode 100644 crates/parser2/test_files/syntax/stmts/control_flow.sntn create mode 100644 crates/parser2/test_files/syntax/stmts/stmts.snap create mode 100644 crates/parser2/test_files/syntax/stmts/stmts.sntn create mode 100644 crates/parser2/test_files/syntax/stmts/unary_op.snap create mode 100644 crates/parser2/test_files/syntax/stmts/unary_op.sntn create mode 100644 crates/parser2/tests/syntax.rs diff --git a/Cargo.toml b/Cargo.toml index 2a339e4a..bc21ff68 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,11 @@ [workspace] resolver = "2" -members = ["crates/ir", "crates/codegen", "crates/object", "crates/parser", "crates/filecheck", "crates/triple", "crates/interpreter"] +members = [ + "crates/ir", + "crates/codegen", + "crates/object", + "crates/parser2", + "crates/filecheck", + "crates/triple", + "crates/interpreter", +] diff --git a/crates/codegen/Cargo.toml b/crates/codegen/Cargo.toml index 759e0517..a2e86d34 100644 --- a/crates/codegen/Cargo.toml +++ b/crates/codegen/Cargo.toml @@ -17,6 +17,6 @@ keywords = ["compiler", "evm", "wasm", "smart-contract"] [dependencies] cranelift-entity = "0.104" smallvec = "1.7.0" -fxhash = "0.2.1" +rustc-hash = "1.1.0" sonatina-ir = { path = "../ir", version = "0.0.3-alpha" } sonatina-triple = { path = "../triple", version = "0.0.3-alpha" } diff --git a/crates/codegen/src/critical_edge.rs b/crates/codegen/src/critical_edge.rs index 484813e0..83b87442 100644 --- a/crates/codegen/src/critical_edge.rs +++ b/crates/codegen/src/critical_edge.rs @@ -1,7 +1,7 @@ -use sonatina_ir::ControlFlowGraph; +use sonatina_ir::{func_cursor::FuncCursor, ControlFlowGraph}; use sonatina_ir::{ - func_cursor::{CursorLocation, FuncCursor, InsnInserter}, + func_cursor::{CursorLocation, InsnInserter}, insn::InsnData, Block, Function, Insn, }; @@ -65,10 +65,10 @@ impl CriticalEdgeSplitter { // critical edge. let inserted_dest = func.dfg.make_block(); let jump = func.dfg.make_insn(InsnData::jump(original_dest)); - let mut cursor = InsnInserter::new(func, CursorLocation::BlockTop(original_dest)); - cursor.append_block(inserted_dest); - cursor.set_loc(CursorLocation::BlockTop(inserted_dest)); - cursor.append_insn(jump); + let mut cursor = InsnInserter::at_location(CursorLocation::BlockTop(original_dest)); + cursor.append_block(func, inserted_dest); + cursor.set_location(CursorLocation::BlockTop(inserted_dest)); + cursor.append_insn(func, jump); // Rewrite branch destination to the new block. func.dfg @@ -123,8 +123,7 @@ mod tests { #[test] fn critical_edge_basic() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let a = builder.append_block(); let b = builder.append_block(); @@ -141,9 +140,8 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - - let mut module = test_module_builder.build(); + let mut module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &mut module.funcs[func_ref]; let mut cfg = ControlFlowGraph::default(); cfg.compute(func); @@ -151,7 +149,7 @@ mod tests { assert_eq!( dump_func(func), - "func public %test_func() -> void: + "func public %test_func() -> void { block0: br 1.i32 block3 block1; @@ -164,6 +162,7 @@ mod tests { block3: jump block2; +} " ); @@ -175,8 +174,7 @@ mod tests { #[test] #[allow(clippy::many_single_char_names)] fn critical_edge_to_same_block() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let a = builder.append_block(); let b = builder.append_block(); @@ -201,9 +199,8 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - - let mut module = test_module_builder.build(); + let mut module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &mut module.funcs[func_ref]; let mut cfg = ControlFlowGraph::default(); cfg.compute(func); @@ -211,7 +208,7 @@ mod tests { assert_eq!( dump_func(func), - "func public %test_func() -> void: + "func public %test_func() -> void { block0: br 1.i8 block5 block1; @@ -233,6 +230,7 @@ mod tests { block6: jump block3; +} " ); @@ -243,8 +241,7 @@ mod tests { #[test] fn critical_edge_phi() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let a = builder.append_block(); let b = builder.append_block(); @@ -255,7 +252,7 @@ mod tests { builder.jump(b); builder.switch_to_block(b); - let phi_value = builder.phi(&[(v1, a)]); + let phi_value = builder.phi(Type::I8, &[(v1, a)]); let v2 = builder.add(phi_value, v1); builder.append_phi_arg(phi_value, v2, b); builder.br(phi_value, c, b); @@ -264,9 +261,8 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - - let mut module = test_module_builder.build(); + let mut module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &mut module.funcs[func_ref]; let mut cfg = ControlFlowGraph::default(); cfg.compute(func); @@ -274,7 +270,7 @@ mod tests { assert_eq!( dump_func(func), - "func public %test_func() -> void: + "func public %test_func() -> void { block0: jump block1; @@ -289,6 +285,7 @@ mod tests { block3: jump block1; +} " ); @@ -299,8 +296,7 @@ mod tests { #[test] fn critical_edge_br_table() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let a = builder.append_block(); let b = builder.append_block(); @@ -328,9 +324,8 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - - let mut module = test_module_builder.build(); + let mut module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &mut module.funcs[func_ref]; let mut cfg = ControlFlowGraph::default(); cfg.compute(func); @@ -338,9 +333,9 @@ mod tests { assert_eq!( dump_func(func), - "func public %test_func() -> void: + "func public %test_func() -> void { block0: - br -1.i1 block5 block6; + br 1.i1 block5 block6; block1: br_table 0.i32 block2 (1.i32 block3) (2.i32 block7); @@ -363,6 +358,7 @@ mod tests { block7: jump block4; +} " ); diff --git a/crates/codegen/src/domtree.rs b/crates/codegen/src/domtree.rs index 9b23c32e..80b041b0 100644 --- a/crates/codegen/src/domtree.rs +++ b/crates/codegen/src/domtree.rs @@ -1,6 +1,6 @@ //! This module contains dominantor tree related structs. //! -//! The algorithm is based on Keith D. Cooper., Timothy J. Harvey., and Ken Kennedy.: A Simple, Fast Dominance Algorithm: +//! The algorithm is based on Keith D. Cooper., Timothy J. Harvey., and Ken Kennedy.: A Simple, Fast Dominance Algorithm: //! use std::collections::BTreeSet; @@ -230,8 +230,7 @@ mod tests { #[test] fn dom_tree_if_else() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let entry_block = builder.append_block(); let then_block = builder.append_block(); @@ -252,9 +251,9 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; let (dom_tree, df) = calc_dom(func); @@ -271,8 +270,7 @@ mod tests { #[test] fn unreachable_edge() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let a = builder.append_block(); let b = builder.append_block(); @@ -297,9 +295,9 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; let (dom_tree, df) = calc_dom(func); @@ -319,8 +317,7 @@ mod tests { #[test] fn dom_tree_complex() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let a = builder.append_block(); let b = builder.append_block(); @@ -377,9 +374,9 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; let (dom_tree, df) = calc_dom(func); @@ -412,8 +409,7 @@ mod tests { #[test] fn dom_tree_br_table() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let a = builder.append_block(); let b = builder.append_block(); @@ -445,9 +441,9 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; let (dom_tree, df) = calc_dom(func); diff --git a/crates/codegen/src/loop_analysis.rs b/crates/codegen/src/loop_analysis.rs index 973764a6..6d21b6b7 100644 --- a/crates/codegen/src/loop_analysis.rs +++ b/crates/codegen/src/loop_analysis.rs @@ -1,5 +1,5 @@ use cranelift_entity::{entity_impl, packed_option::PackedOption, PrimaryMap, SecondaryMap}; -use fxhash::FxHashMap; +use rustc_hash::FxHashMap; use smallvec::SmallVec; use crate::domtree::DomTree; @@ -261,8 +261,7 @@ mod tests { #[test] fn simple_loop() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let b0 = builder.append_block(); let b1 = builder.append_block(); @@ -274,7 +273,7 @@ mod tests { builder.jump(b1); builder.switch_to_block(b1); - let v1 = builder.phi(&[(v0, b0)]); + let v1 = builder.phi(Type::I32, &[(v0, b0)]); let c0 = builder.make_imm_value(10i32); let v2 = builder.eq(v1, c0); builder.br(v2, b3, b2); @@ -289,9 +288,9 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; let lpt = compute_loop(func); @@ -307,8 +306,7 @@ mod tests { #[test] fn continue_loop() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let b0 = builder.append_block(); let b1 = builder.append_block(); @@ -323,7 +321,7 @@ mod tests { builder.jump(b1); builder.switch_to_block(b1); - let v1 = builder.phi(&[(v0, b0)]); + let v1 = builder.phi(Type::I32, &[(v0, b0)]); let c0 = builder.make_imm_value(10i32); let v2 = builder.eq(v1, c0); builder.br(v2, b5, b2); @@ -352,9 +350,9 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; let lpt = compute_loop(func); @@ -374,8 +372,7 @@ mod tests { #[test] fn single_block_loop() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[Type::I1], Type::Void); + let mut builder = test_func_builder(&[Type::I1], Type::Void); let b0 = builder.append_block(); let b1 = builder.append_block(); let b2 = builder.append_block(); @@ -392,9 +389,9 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; let lpt = compute_loop(func); @@ -408,8 +405,7 @@ mod tests { #[test] fn nested_loop() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[Type::I1], Type::Void); + let mut builder = test_func_builder(&[Type::I1], Type::Void); let b0 = builder.append_block(); let b1 = builder.append_block(); @@ -463,9 +459,9 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; let lpt = compute_loop(func); diff --git a/crates/codegen/src/optim/adce.rs b/crates/codegen/src/optim/adce.rs index c6bf583b..f98eb76b 100644 --- a/crates/codegen/src/optim/adce.rs +++ b/crates/codegen/src/optim/adce.rs @@ -133,27 +133,27 @@ impl AdceSolver { return false; }; - let mut inserter = InsnInserter::new(func, CursorLocation::BlockTop(entry)); + let mut inserter = InsnInserter::at_location(CursorLocation::BlockTop(entry)); loop { match inserter.loc() { CursorLocation::At(insn) => { if self.does_insn_live(insn) { - inserter.proceed(); + inserter.proceed(func); } else { - inserter.remove_insn() + inserter.remove_insn(func) } } CursorLocation::BlockTop(block) => { if self.does_block_live(block) { - inserter.proceed() + inserter.proceed(func) } else { - inserter.remove_block() + inserter.remove_block(func) } } CursorLocation::BlockBottom(_) => { - inserter.proceed(); + inserter.proceed(func); } CursorLocation::NoWhere => break, @@ -161,11 +161,11 @@ impl AdceSolver { } // Modify branch insns to remove unreachable edges. - inserter.set_to_entry(); + inserter.set_to_entry(func); let mut br_insn_modified = false; - while let Some(block) = inserter.block() { - br_insn_modified |= self.modify_branch(&mut inserter, block); - inserter.proceed_block(); + while let Some(block) = inserter.block(func) { + br_insn_modified |= self.modify_branch(func, &mut inserter, block); + inserter.proceed_block(func); } br_insn_modified @@ -183,19 +183,19 @@ impl AdceSolver { } /// Returns `true` if branch insn is modified. - fn modify_branch(&self, inserter: &mut InsnInserter, block: Block) -> bool { - let last_insn = match inserter.func().layout.last_insn_of(block) { + fn modify_branch( + &self, + func: &mut Function, + inserter: &mut InsnInserter, + block: Block, + ) -> bool { + let last_insn = match func.layout.last_insn_of(block) { Some(insn) => insn, None => return false, }; - inserter.set_loc(CursorLocation::At(last_insn)); + inserter.set_location(CursorLocation::At(last_insn)); - let dests: Vec<_> = inserter - .func() - .dfg - .analyze_branch(last_insn) - .iter_dests() - .collect(); + let dests: Vec<_> = func.dfg.analyze_branch(last_insn).iter_dests().collect(); let mut changed = false; for dest in dests { @@ -206,14 +206,11 @@ impl AdceSolver { match self.living_post_dom(dest) { // If the destination is dead but its post dominator is living, then change the // destination to the post dominator. - Some(postdom) => inserter - .func_mut() - .dfg - .rewrite_branch_dest(last_insn, dest, postdom), + Some(postdom) => func.dfg.rewrite_branch_dest(last_insn, dest, postdom), // If the block doesn't have post dominator, then remove the dest. None => { - inserter.func_mut().dfg.remove_branch_dest(last_insn, dest); + func.dfg.remove_branch_dest(last_insn, dest); } } @@ -221,13 +218,13 @@ impl AdceSolver { } // Turn branch insn to `jump` if all dests is the same. - let branch_info = inserter.func().dfg.analyze_branch(last_insn); + let branch_info = func.dfg.analyze_branch(last_insn); if branch_info.dests_num() > 1 { let mut branch_dests = branch_info.iter_dests(); let first_dest = branch_dests.next().unwrap(); if branch_dests.all(|dest| dest == first_dest) { changed = true; - inserter.replace(InsnData::jump(first_dest)); + inserter.replace(func, InsnData::jump(first_dest)); } } diff --git a/crates/codegen/src/optim/gvn.rs b/crates/codegen/src/optim/gvn.rs index 7b128a27..de476f81 100644 --- a/crates/codegen/src/optim/gvn.rs +++ b/crates/codegen/src/optim/gvn.rs @@ -10,7 +10,7 @@ use std::collections::BTreeSet; use cranelift_entity::{entity_impl, packed_option::PackedOption, PrimaryMap, SecondaryMap}; -use fxhash::{FxHashMap, FxHashSet}; +use rustc_hash::{FxHashMap, FxHashSet}; use crate::domtree::{DomTree, DominatorTreeTraversable}; @@ -504,7 +504,7 @@ impl GvnSolver { value: Value, edge: Edge, ) -> Value { - let mut rep_value = self.leader(value); + let mut rep_value = self.leader(func.dfg.resolve_alias(value)); if let Some(inferred_value) = self.infer_value_impl(edge, rep_value) { rep_value = inferred_value; @@ -641,7 +641,7 @@ impl GvnSolver { let edges = &self.blocks[block].in_edges; let mut phi_args = Vec::with_capacity(values.len()); - for (&value, &from) in (values).iter().zip(blocks.iter()) { + for (&value, &from) in values.iter().zip(blocks.iter()) { let edge = self.find_edge(edges, from, block); // Ignore an argument from an unreachable block. if !self.edge_data(edge).reachable { @@ -1403,11 +1403,11 @@ impl<'a> RedundantCodeRemover<'a> { self.avail_set[idom].clone() }; - let mut inserter = InsnInserter::new(func, CursorLocation::BlockTop(block)); + let mut inserter = InsnInserter::at_location(CursorLocation::BlockTop(block)); loop { match inserter.loc() { CursorLocation::BlockTop(_) => { - inserter.proceed(); + inserter.proceed(func); } CursorLocation::BlockBottom(..) | CursorLocation::NoWhere => { @@ -1415,23 +1415,23 @@ impl<'a> RedundantCodeRemover<'a> { } CursorLocation::At(insn) => { - let block = inserter.block().unwrap(); - if let Some(insn_result) = inserter.func().dfg.insn_result(insn) { + let block = inserter.block(func).unwrap(); + if let Some(insn_result) = func.dfg.insn_result(insn) { let class = self.solver.value_class(insn_result); // Use representative value if the class is in avail set. if let Some(value) = avails.get(&class) { - inserter.func_mut().dfg.change_to_alias(insn_result, *value); - inserter.remove_insn(); + func.dfg.change_to_alias(insn_result, *value); + inserter.remove_insn(func); continue; } // Try rewrite phi insn to reflect edge's reachability. - self.rewrite_phi(inserter.func_mut(), insn, block); + self.rewrite_phi(func, insn, block); avails.insert(class, insn_result); } - inserter.proceed(); + inserter.proceed(func); } } } @@ -1442,11 +1442,11 @@ impl<'a> RedundantCodeRemover<'a> { /// Resolve value phis in the block. fn resolve_value_phi_in_block(&mut self, func: &mut Function, block: Block) { - let mut inserter = InsnInserter::new(func, CursorLocation::BlockTop(block)); + let mut inserter = InsnInserter::at_location(CursorLocation::BlockTop(block)); loop { match inserter.loc() { CursorLocation::BlockTop(_) => { - inserter.proceed(); + inserter.proceed(func); } CursorLocation::BlockBottom(..) | CursorLocation::NoWhere => { @@ -1454,24 +1454,29 @@ impl<'a> RedundantCodeRemover<'a> { } CursorLocation::At(insn) => { - let block = inserter.block().unwrap(); - if let Some(insn_result) = inserter.func().dfg.insn_result(insn) { + let block = inserter.block(func).unwrap(); + if let Some(insn_result) = func.dfg.insn_result(insn) { // If value phi exists for the `insn_result` and its resolution succeeds, // then use resolved phi value and remove insn. let class = self.solver.value_class(insn_result); if let Some(value_phi) = &self.solver.classes[class].value_phi { - let ty = inserter.func().dfg.value_ty(insn_result); + let ty = func.dfg.value_ty(insn_result); if self.is_value_phi_resolvable(value_phi, block) { - let value = - self.resolve_value_phi(&mut inserter, value_phi, ty, block); - inserter.func_mut().dfg.change_to_alias(insn_result, value); - inserter.remove_insn(); + let value = self.resolve_value_phi( + func, + &mut inserter, + value_phi, + ty, + block, + ); + func.dfg.change_to_alias(insn_result, value); + inserter.remove_insn(func); continue; } } } - inserter.proceed(); + inserter.proceed(func); } } } @@ -1505,6 +1510,7 @@ impl<'a> RedundantCodeRemover<'a> { /// the inserted phi insn. fn resolve_value_phi( &mut self, + func: &mut Function, inserter: &mut InsnInserter, value_phi: &ValuePhi, ty: Type, @@ -1529,18 +1535,19 @@ impl<'a> RedundantCodeRemover<'a> { // Resolve phi value's arguments and append them to the newly `InsnData::Phi`. let mut phi = InsnData::phi(ty); for (value_phi, phi_block) in &phi_insn.args { - let resolved = self.resolve_value_phi(inserter, value_phi, ty, *phi_block); + let resolved = + self.resolve_value_phi(func, inserter, value_phi, ty, *phi_block); phi.append_phi_arg(resolved, *phi_block); } // Insert new phi insn to top of the phi_insn block. - inserter.set_loc(CursorLocation::BlockTop(phi_insn.block)); - let insn = inserter.insert_insn_data(phi); - let result = inserter.make_result(insn).unwrap(); - inserter.attach_result(insn, result); + inserter.set_location(CursorLocation::BlockTop(phi_insn.block)); + let insn = inserter.insert_insn_data(func, phi); + let result = inserter.make_result(func, insn).unwrap(); + inserter.attach_result(func, insn, result); // Restore the inserter loc. - inserter.set_loc(current_inserter_loc); + inserter.set_location(current_inserter_loc); // Store resolved value phis. self.resolved_value_phis.insert(value_phi.clone(), result); @@ -1554,32 +1561,29 @@ impl<'a> RedundantCodeRemover<'a> { /// Remove unreachable edges and blocks. fn remove_unreachable_edges(&self, func: &mut Function) { let entry_block = func.layout.entry_block().unwrap(); - let mut inserter = InsnInserter::new(func, CursorLocation::BlockTop(entry_block)); + let mut inserter = InsnInserter::at_location(CursorLocation::BlockTop(entry_block)); loop { match inserter.loc() { CursorLocation::BlockTop(block) => { if !self.solver.blocks[block].reachable { - inserter.remove_block(); + inserter.remove_block(func); } else { - inserter.proceed(); + inserter.proceed(func); } } - CursorLocation::BlockBottom(..) => inserter.proceed(), + CursorLocation::BlockBottom(..) => inserter.proceed(func), CursorLocation::At(insn) => { - if inserter.func().dfg.is_branch(insn) { - let block = inserter.block().unwrap(); + if func.dfg.is_branch(insn) { + let block = inserter.block(func).unwrap(); for &out_edge in self.solver.unreachable_out_edges(block) { let edge_data = self.solver.edge_data(out_edge); - inserter - .func_mut() - .dfg - .remove_branch_dest(insn, edge_data.to); + func.dfg.remove_branch_dest(insn, edge_data.to); } } - inserter.proceed(); + inserter.proceed(func); } CursorLocation::NoWhere => break, diff --git a/crates/codegen/src/optim/insn_simplify.rs b/crates/codegen/src/optim/insn_simplify.rs index 006170c8..48a46354 100644 --- a/crates/codegen/src/optim/insn_simplify.rs +++ b/crates/codegen/src/optim/insn_simplify.rs @@ -26,39 +26,41 @@ impl InsnSimplifySolver { Some(entry) => entry, None => return, }; - let mut inserter = InsnInserter::new(func, CursorLocation::BlockTop(entry)); + let mut inserter = InsnInserter::at_location(CursorLocation::BlockTop(entry)); while inserter.loc() != CursorLocation::NoWhere { let insn = match inserter.insn() { Some(insn) => insn, None => { - inserter.proceed(); + inserter.proceed(func); continue; } }; - self.simplify(&mut inserter, insn); + self.simplify(func, &mut inserter, insn); } while let Some(insn) = self.worklist.pop_front() { - if !inserter.func().layout.is_insn_inserted(insn) { + if !func.layout.is_insn_inserted(insn) { continue; } - inserter.set_loc(CursorLocation::At(insn)); - self.simplify(&mut inserter, insn); + inserter.set_location(CursorLocation::At(insn)); + self.simplify(func, &mut inserter, insn); } } - pub fn simplify(&mut self, inserter: &mut InsnInserter, insn: Insn) { - match simplify_insn(&mut inserter.func_mut().dfg, insn) { - Some(SimplifyResult::Value(val)) => self.replace_insn_with_value(inserter, insn, val), + pub fn simplify(&mut self, func: &mut Function, inserter: &mut InsnInserter, insn: Insn) { + match simplify_insn(&mut func.dfg, insn) { + Some(SimplifyResult::Value(val)) => { + self.replace_insn_with_value(func, inserter, insn, val) + } Some(SimplifyResult::Insn(data)) => { - self.replace_insn_with_data(inserter, insn, data); + self.replace_insn_with_data(func, inserter, insn, data); } - None => inserter.proceed(), + None => inserter.proceed(func), } } @@ -68,34 +70,34 @@ impl InsnSimplifySolver { pub fn replace_insn_with_value( &mut self, + func: &mut Function, inserter: &mut InsnInserter, insn: Insn, value: Value, ) { - if let Some(insn_result) = inserter.func().dfg.insn_result(insn) { - self.worklist - .extend(inserter.func().dfg.users(insn_result).copied()); + if let Some(insn_result) = func.dfg.insn_result(insn) { + self.worklist.extend(func.dfg.users(insn_result).copied()); self.worklist.push_back(insn); - inserter.func_mut().dfg.change_to_alias(insn_result, value); + func.dfg.change_to_alias(insn_result, value); }; - inserter.remove_insn(); + inserter.remove_insn(func); } pub fn replace_insn_with_data( &mut self, + func: &mut Function, inserter: &mut InsnInserter, insn: Insn, data: InsnData, ) { - if let Some(res) = inserter.func().dfg.insn_result(insn) { - self.worklist - .extend(inserter.func().dfg.users(res).copied()); + if let Some(res) = func.dfg.insn_result(insn) { + self.worklist.extend(func.dfg.users(res).copied()); self.worklist.push_back(insn); } - inserter.replace(data); - inserter.proceed(); + inserter.replace(func, data); + inserter.proceed(func); } } diff --git a/crates/codegen/src/optim/licm.rs b/crates/codegen/src/optim/licm.rs index c21ee067..d69723a7 100644 --- a/crates/codegen/src/optim/licm.rs +++ b/crates/codegen/src/optim/licm.rs @@ -1,5 +1,5 @@ // TODO: Add control flow hoisting. -use fxhash::{FxHashMap, FxHashSet}; +use rustc_hash::{FxHashMap, FxHashSet}; use crate::loop_analysis::{Loop, LoopTree}; @@ -112,12 +112,12 @@ impl LicmSolver { // Create preheader and insert it before the loop header. let new_preheader = func.dfg.make_block(); - let mut inserter = InsnInserter::new(func, CursorLocation::BlockTop(lp_header)); - inserter.insert_block_before(new_preheader); + let mut inserter = InsnInserter::at_location(CursorLocation::BlockTop(lp_header)); + inserter.insert_block_before(func, new_preheader); // Insert jump insn of which destination is the loop header. - inserter.set_loc(CursorLocation::BlockTop(new_preheader)); - inserter.insert_insn_data(InsnData::jump(lp_header)); + inserter.set_location(CursorLocation::BlockTop(new_preheader)); + inserter.insert_insn_data(func, InsnData::jump(lp_header)); cfg.add_edge(new_preheader, lp_header); // Rewrite branch destination of original preheaders and modify cfg. @@ -183,10 +183,10 @@ impl LicmSolver { None => { // Insert new phi insn to the preheader. let mut inserter = - InsnInserter::new(func, CursorLocation::BlockTop(new_preheader)); - let new_phi_insn = inserter.insert_insn_data(phi_insn_data.clone()); - let result = inserter.make_result(new_phi_insn).unwrap(); - inserter.attach_result(new_phi_insn, result); + InsnInserter::at_location(CursorLocation::BlockTop(new_preheader)); + let new_phi_insn = inserter.insert_insn_data(func, phi_insn_data.clone()); + let result = inserter.make_result(func, new_phi_insn).unwrap(); + inserter.attach_result(func, new_phi_insn, result); // Add phi_insn_data to `inserted_phis` for reusing. inserted_phis.insert(phi_insn_data, result); diff --git a/crates/codegen/src/optim/sccp.rs b/crates/codegen/src/optim/sccp.rs index 75c72f55..f6c47161 100644 --- a/crates/codegen/src/optim/sccp.rs +++ b/crates/codegen/src/optim/sccp.rs @@ -1,7 +1,7 @@ //! This module contains a solver for Sparse Conditional Constant Propagation. //! -//! The algorithm is based on Mark N. Wegman., Frank Kcnncth Zadeck.: Constant propagation with conditional branches: -//! ACM Transactions on Programming Languages and Systems Volume 13 Issue 2 April 1991 pp 181–210: +//! The algorithm is based on Mark N. Wegman., Frank Kcnncth Zadeck.: Constant propagation with conditional branches: +//! ACM Transactions on Programming Languages and Systems Volume 13 Issue 2 April 1991 pp 181–210: //! use std::{collections::BTreeSet, ops}; @@ -140,6 +140,8 @@ impl SccpSolver { for (i, from) in func.dfg.phi_blocks(insn).iter().enumerate() { if self.is_reachable(func, *from, block) { let phi_arg = func.dfg.insn_arg(insn, i); + let phi_arg = func.dfg.resolve_alias(phi_arg); + let v_cell = self.lattice[phi_arg]; eval_result = eval_result.join(v_cell); } @@ -312,30 +314,30 @@ impl SccpSolver { /// Remove unreachable edges and blocks. fn remove_unreachable_edges(&self, func: &mut Function) { let entry_block = func.layout.entry_block().unwrap(); - let mut inserter = InsnInserter::new(func, CursorLocation::BlockTop(entry_block)); + let mut inserter = InsnInserter::at_location(CursorLocation::BlockTop(entry_block)); loop { match inserter.loc() { CursorLocation::BlockTop(block) => { if !self.reachable_blocks.contains(&block) { - inserter.remove_block(); + inserter.remove_block(func); } else { - inserter.proceed(); + inserter.proceed(func); } } - CursorLocation::BlockBottom(..) => inserter.proceed(), + CursorLocation::BlockBottom(..) => inserter.proceed(func), CursorLocation::At(insn) => { - if inserter.func().dfg.is_branch(insn) { - let branch_info = inserter.func().dfg.analyze_branch(insn); + if func.dfg.is_branch(insn) { + let branch_info = func.dfg.analyze_branch(insn); for dest in branch_info.iter_dests().collect::>() { if !self.is_reachable_edge(insn, dest) { - inserter.func_mut().dfg.remove_branch_dest(insn, dest); + func.dfg.remove_branch_dest(insn, dest); } } } - inserter.proceed(); + inserter.proceed(func); } CursorLocation::NoWhere => break, @@ -368,7 +370,7 @@ impl SccpSolver { match self.lattice[insn_result].to_imm() { Some(imm) => { - InsnInserter::new(func, CursorLocation::At(insn)).remove_insn(); + InsnInserter::at_location(CursorLocation::At(insn)).remove_insn(func); let new_value = func.dfg.make_imm_value(imm); func.dfg.change_to_alias(insn_result, new_value); } @@ -394,7 +396,7 @@ impl SccpSolver { let phi_value = func.dfg.insn_result(insn).unwrap(); func.dfg .change_to_alias(phi_value, func.dfg.insn_arg(insn, 0)); - InsnInserter::new(func, CursorLocation::At(insn)).remove_insn(); + InsnInserter::at_location(CursorLocation::At(insn)).remove_insn(func); } } diff --git a/crates/codegen/src/post_domtree.rs b/crates/codegen/src/post_domtree.rs index 9ca59322..3a4191bf 100644 --- a/crates/codegen/src/post_domtree.rs +++ b/crates/codegen/src/post_domtree.rs @@ -170,8 +170,7 @@ mod tests { #[test] fn pd_if_else() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[Type::I64], Type::Void); + let mut builder = test_func_builder(&[Type::I64], Type::Void); let entry_block = builder.append_block(); let then_block = builder.append_block(); @@ -192,14 +191,14 @@ mod tests { builder.jump(merge_block); builder.switch_to_block(merge_block); - let v3 = builder.phi(&[(v1, then_block), (v2, else_block)]); + let v3 = builder.phi(Type::I64, &[(v1, then_block), (v2, else_block)]); builder.add(v3, arg0); builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; let (post_dom_tree, pdf) = calc_dom(func); @@ -216,17 +215,16 @@ mod tests { #[test] fn infinite_loop() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let a = builder.append_block(); builder.switch_to_block(a); builder.jump(a); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; let (post_dom_tree, pdf) = calc_dom(func); @@ -236,8 +234,7 @@ mod tests { #[test] fn test_multiple_return() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let a = builder.append_block(); let b = builder.append_block(); @@ -262,9 +259,9 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; let (post_dom_tree, pdf) = calc_dom(func); @@ -283,8 +280,7 @@ mod tests { #[test] fn pd_complex() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let a = builder.append_block(); let b = builder.append_block(); @@ -321,9 +317,9 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; let (post_dom_tree, pdf) = calc_dom(func); diff --git a/crates/filecheck/Cargo.toml b/crates/filecheck/Cargo.toml index 57e722db..d2e398d4 100644 --- a/crates/filecheck/Cargo.toml +++ b/crates/filecheck/Cargo.toml @@ -9,9 +9,9 @@ publish = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -filecheck = "0.5.0" -sonatina-ir = {path = "../ir"} -sonatina-codegen = {path = "../codegen"} -sonatina-parser = {path = "../parser"} +filecheck = "0.5.0" # { path = "/Users/sean/src/filecheck" } +sonatina-ir = { path = "../ir" } +sonatina-codegen = { path = "../codegen" } +sonatina-parser2 = { path = "../parser2" } termcolor = "1.1.2" walkdir = "2" diff --git a/crates/filecheck/fixtures/adce/all_dests_remove.sntn b/crates/filecheck/fixtures/adce/all_dests_remove.sntn index a748ce51..42dfedf4 100644 --- a/crates/filecheck/fixtures/adce/all_dests_remove.sntn +++ b/crates/filecheck/fixtures/adce/all_dests_remove.sntn @@ -6,7 +6,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block3: # nextln: return v0; -func public %all_dests_removed() -> i8: +func public %all_dests_removed() -> i8 { block0: v0.i8 = or 1.i8 0.i8; v1.i8 = add v0 1.i8; @@ -26,6 +26,7 @@ func public %all_dests_removed() -> i8: block3: v7.i8 = phi (v3 block1) (v4 block2); return v0; +} # check: block0: # nextln: v1.i32 = add v0 v0; @@ -33,10 +34,10 @@ func public %all_dests_removed() -> i8: # nextln: # nextln: block3: # nextln: return v1; -func public %all_dests_removed2(v0.i32) -> i8: +func public %all_dests_removed2(v0.i32) -> i8 { block0: v1.i32 = add v0 v0; - br_table v0 undef (v1 block1) (2.i32 block2); + br_table v0 (v1 block1) (2.i32 block2); block1: v3.i8 = add v0 -10.i8; @@ -51,3 +52,4 @@ func public %all_dests_removed2(v0.i32) -> i8: block3: v7.i8 = phi (v3 block1) (v4 block2); return v1; +} diff --git a/crates/filecheck/fixtures/adce/basic_empty_block.sntn b/crates/filecheck/fixtures/adce/basic_empty_block.sntn index 9510e9b9..a3d29b8d 100644 --- a/crates/filecheck/fixtures/adce/basic_empty_block.sntn +++ b/crates/filecheck/fixtures/adce/basic_empty_block.sntn @@ -1,6 +1,6 @@ target = "evm-ethereum-london" - -# sameln: func public %basic_empty_block() -> i8: + +# sameln: func public %basic_empty_block() -> i8 { # nextln: block0: # nextln: v0.i8 = or 1.i8 0.i8; # nextln: br v0 block2 block3; @@ -10,7 +10,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block3: # nextln: return v0; -func public %basic_empty_block() -> i8: +func public %basic_empty_block() -> i8 { block0: v0.i8 = or 1.i8 0.i8; br v0 block1 block3; @@ -24,3 +24,4 @@ func public %basic_empty_block() -> i8: block3: return v0; +} diff --git a/crates/filecheck/fixtures/adce/infinite_loop.sntn b/crates/filecheck/fixtures/adce/infinite_loop.sntn index ac3826a7..bb164cb4 100644 --- a/crates/filecheck/fixtures/adce/infinite_loop.sntn +++ b/crates/filecheck/fixtures/adce/infinite_loop.sntn @@ -2,15 +2,16 @@ target = "evm-ethereum-london" -# sameln: func public %infinite_loop() -> i8: +# sameln: func public %infinite_loop() -> i8 { # nextln: block0: # nextln: jump block1 # nextln: # nextln: block1: # nextln: jump block0 -func public %infinite_loop() -> i8: +func public %infinite_loop() -> i8 { block0: jump block1; block1: jump block0; +} diff --git a/crates/filecheck/fixtures/adce/loop_body_removed.sntn b/crates/filecheck/fixtures/adce/loop_body_removed.sntn index ffcb7a4c..88b646a5 100644 --- a/crates/filecheck/fixtures/adce/loop_body_removed.sntn +++ b/crates/filecheck/fixtures/adce/loop_body_removed.sntn @@ -1,13 +1,13 @@ target = "evm-ethereum-london" -# sameln: func public %loop_body_removed() -> i8: +# sameln: func public %loop_body_removed() -> i8 { # nextln: block0: # nextln: v0.i8 = or 1.i8 0.i8; # nextln: br v0 block0 block3; # nextln: # nextln: block3: # nextln: return v0; -func public %loop_body_removed() -> i8: +func public %loop_body_removed() -> i8 { block0: v0.i8 = or 1.i8 0.i8; br v0 block1 block3; @@ -23,3 +23,4 @@ func public %loop_body_removed() -> i8: block3: return v0; +} diff --git a/crates/filecheck/fixtures/adce/no_dead_code.sntn b/crates/filecheck/fixtures/adce/no_dead_code.sntn index 56b76b27..4a241df5 100644 --- a/crates/filecheck/fixtures/adce/no_dead_code.sntn +++ b/crates/filecheck/fixtures/adce/no_dead_code.sntn @@ -21,7 +21,7 @@ declare external %external_add(i8, i8) -> i8; # nextln: v6.i8 = phi (v3 block1) (v5 block2); # nextln: v7.i8 = call %external_add v6 1.i8; # nextln: return v7; -func public %no_dead_code() -> i8: +func public %no_dead_code() -> i8 { block0: v0.i1 = sgt 1.i8 2.i8; v1.i8 = sext v0; @@ -41,3 +41,4 @@ func public %no_dead_code() -> i8: v6.i8 = phi (v3 block1) (v5 block2); v7.i8 = call %external_add v6 1.i8; return v7; +} diff --git a/crates/filecheck/fixtures/adce/rewrite_dest.sntn b/crates/filecheck/fixtures/adce/rewrite_dest.sntn index e099bd65..a5d5bcf2 100644 --- a/crates/filecheck/fixtures/adce/rewrite_dest.sntn +++ b/crates/filecheck/fixtures/adce/rewrite_dest.sntn @@ -6,7 +6,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block2: # nextln: return v1; -func public %rewrite_dest(v0.i1) -> i32: +func public %rewrite_dest(v0.i1) -> i32 { block0: v1.i32 = sext v0; br v0 block1 block2; @@ -18,6 +18,7 @@ func public %rewrite_dest(v0.i1) -> i32: block2: return v1; +} # check: block0: # nextln: v1.i32 = add v0 1.i32; @@ -25,7 +26,7 @@ func public %rewrite_dest(v0.i1) -> i32: # nextln: # nextln: block3: # nextln: return v1; -func public %rewrite_dest2(v0.i32) -> i32: +func public %rewrite_dest2(v0.i32) -> i32 { block0: v1.i32 = add v0 1.i32; v2.i32 = add v0 2.i32; @@ -40,6 +41,7 @@ func public %rewrite_dest2(v0.i32) -> i32: block3: return v1; +} # check: block0: # nextln: v1.i32 = add v0 1.i32; @@ -51,7 +53,7 @@ func public %rewrite_dest2(v0.i32) -> i32: # nextln: # nextln: block3: # nextln: return v1; -func public %rewrite_dest3(v0.i32) -> i32: +func public %rewrite_dest3(v0.i32) -> i32 { block0: v1.i32 = add v0 1.i32; v2.i32 = add v0 2.i32; @@ -66,3 +68,4 @@ func public %rewrite_dest3(v0.i32) -> i32: block3: return v1; +} diff --git a/crates/filecheck/fixtures/adce/simple_dce.sntn b/crates/filecheck/fixtures/adce/simple_dce.sntn index d68051e0..2a0dd0cf 100644 --- a/crates/filecheck/fixtures/adce/simple_dce.sntn +++ b/crates/filecheck/fixtures/adce/simple_dce.sntn @@ -1,10 +1,11 @@ target = "evm-ethereum-london" -# sameln: func public %simple(v0.i8) -> i8: +# sameln: func public %simple(v0.i8) -> i8 { # nextln: block0: # nextln: return 2.i8; -func public %simple(v0.i8) -> i8: +func public %simple(v0.i8) -> i8 { block0: v1.i8 = sub v0 1.i8; v2.i8 = udiv v1 v0; return 2.i8; +} diff --git a/crates/filecheck/fixtures/adce/whole_loop_removed.sntn b/crates/filecheck/fixtures/adce/whole_loop_removed.sntn index 5864c23c..2467af5e 100644 --- a/crates/filecheck/fixtures/adce/whole_loop_removed.sntn +++ b/crates/filecheck/fixtures/adce/whole_loop_removed.sntn @@ -1,9 +1,9 @@ target = "evm-ethereum-london" -# sameln: func public %whole_loop_removed() -> i8: +# sameln: func public %whole_loop_removed() -> i8 { # nextln: block3: # nextln: return 1.i8; -func public %whole_loop_removed() -> i8: +func public %whole_loop_removed() -> i8 { block0: v0.i1 = or 1.i8 0.i8; v1.i8 = sext v0; @@ -22,3 +22,4 @@ func public %whole_loop_removed() -> i8: block3: return 1.i8; +} diff --git a/crates/filecheck/fixtures/gvn/branch_simple.sntn b/crates/filecheck/fixtures/gvn/branch_simple.sntn index 4db4f434..979cd17b 100644 --- a/crates/filecheck/fixtures/gvn/branch_simple.sntn +++ b/crates/filecheck/fixtures/gvn/branch_simple.sntn @@ -12,7 +12,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block3: # nextln: return v3; -func public %simple_branch(v0.i1, v1.i8, v2.i8) -> i8: +func public %simple_branch(v0.i1, v1.i8, v2.i8) -> i8 { block0: v3.i8 = add v1 v2; br v0 block1 block2; @@ -28,6 +28,7 @@ func public %simple_branch(v0.i1, v1.i8, v2.i8) -> i8: block3: v6.i8 = phi (v4 block1) (v5 block2); return v6; +} # check: block1: # nextln: v2.i8 = add v1 v1; @@ -35,7 +36,7 @@ func public %simple_branch(v0.i1, v1.i8, v2.i8) -> i8: # check: block2: # nextln: v3.i8 = add v1 v1; # nextln: return v3; -func public %simple_branch2(v0.i1, v1.i8) -> i8: +func public %simple_branch2(v0.i1, v1.i8) -> i8 { block0: br v0 block1 block2; @@ -47,3 +48,4 @@ func public %simple_branch2(v0.i1, v1.i8) -> i8: v3.i8 = add v1 v1; v4.i8 = add v1 v1; return v4; +} diff --git a/crates/filecheck/fixtures/gvn/commutativity.sntn b/crates/filecheck/fixtures/gvn/commutativity.sntn index d54dcdd7..7145c469 100644 --- a/crates/filecheck/fixtures/gvn/commutativity.sntn +++ b/crates/filecheck/fixtures/gvn/commutativity.sntn @@ -8,7 +8,7 @@ target = "evm-ethereum-london" # nextln: v20.i8 = sub v0 v1; # nextln: v22.i8 = sub v1 v0; # nextln: return; -func public %commutativity(v0.i8, v1.i8) -> void: +func public %commutativity(v0.i8, v1.i8) -> void { block1: v10.i8 = add v0 v1; v11.i8 = add v0 v1; @@ -20,3 +20,4 @@ func public %commutativity(v0.i8, v1.i8) -> void: v21.i8 = sub v0 v1; v22.i8 = sub v1 v0; return; +} diff --git a/crates/filecheck/fixtures/gvn/fold_predicted_value.sntn b/crates/filecheck/fixtures/gvn/fold_predicted_value.sntn index 406cf860..e06f0501 100644 --- a/crates/filecheck/fixtures/gvn/fold_predicted_value.sntn +++ b/crates/filecheck/fixtures/gvn/fold_predicted_value.sntn @@ -1,10 +1,10 @@ target = "evm-ethereum-london" # check: block1: -# nextln: return -1.i1; +# nextln: return 1.i1; # check: block2: # nextln: return 0.i1; -func public %fold_with_predicted_value(v0.i1) -> i8: +func public %fold_with_predicted_value(v0.i1) -> i8 { block0: br v0 block1 block2; @@ -15,3 +15,4 @@ func public %fold_with_predicted_value(v0.i1) -> i8: block2: v2.i8 = or v0 v0; return v2; +} diff --git a/crates/filecheck/fixtures/gvn/llvm_cyclic_phi_handling.sntn b/crates/filecheck/fixtures/gvn/llvm_cyclic_phi_handling.sntn index f0107e76..8f35463f 100644 --- a/crates/filecheck/fixtures/gvn/llvm_cyclic_phi_handling.sntn +++ b/crates/filecheck/fixtures/gvn/llvm_cyclic_phi_handling.sntn @@ -1,5 +1,5 @@ #! This test case is translated from -#! +#! target = "evm-ethereum-london" @@ -19,7 +19,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block3: # nextln: return; -func public %llvm_foo(v0.i32, v1.i32) -> void: +func public %llvm_foo(v0.i32, v1.i32) -> void { block0: jump block1; @@ -35,3 +35,4 @@ func public %llvm_foo(v0.i32, v1.i32) -> void: block3: return; +} diff --git a/crates/filecheck/fixtures/gvn/llvm_phi-edge-handling.sntn b/crates/filecheck/fixtures/gvn/llvm_phi-edge-handling.sntn index 3aa206ff..779ac0c1 100644 --- a/crates/filecheck/fixtures/gvn/llvm_phi-edge-handling.sntn +++ b/crates/filecheck/fixtures/gvn/llvm_phi-edge-handling.sntn @@ -20,7 +20,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block5: # nextln: jump block3; -func public %llvm_hoge(v0.i1) -> i16: +func public %llvm_hoge(v0.i1) -> i16 { block0: br v0 block1 block2; @@ -38,4 +38,5 @@ func public %llvm_hoge(v0.i1) -> i16: return v1; block5: - br -1.i1 block3 block4; + br 1.i1 block3 block4; +} diff --git a/crates/filecheck/fixtures/gvn/llvm_phi-of-ops-move-block.sntn b/crates/filecheck/fixtures/gvn/llvm_phi-of-ops-move-block.sntn index 0640e544..1dd812e4 100644 --- a/crates/filecheck/fixtures/gvn/llvm_phi-of-ops-move-block.sntn +++ b/crates/filecheck/fixtures/gvn/llvm_phi-of-ops-move-block.sntn @@ -22,15 +22,15 @@ target = "evm-ethereum-london" # nextln: jump block3; # nextln: # nextln: block4: -# nextln: $(var1=$VALUE).i1 = phi (v2 block1) (-1.i1 block2); -# nextln: $(var2=$VALUE).i1 = phi (0.i1 block1) (-1.i1 block2); +# nextln: $(var1=$VALUE).i1 = phi (v2 block1) (1.i1 block2); +# nextln: $(var2=$VALUE).i1 = phi (0.i1 block1) (1.i1 block2); # nextln: v5.i32 = phi (0.i32 block1) (-1.i32 block2); # nextln: v8.i32 = add v1 -1.i32; # nextln: br $var1 block1 block5; # nextln: # nextln: block5: # nextln: return; -func public %llvm_test(v0.*i32, v100.*i64) -> void: +func public %llvm_test(v0.*i32, v100.*i64) -> void { block0: jump block1; @@ -57,6 +57,7 @@ func public %llvm_test(v0.*i32, v100.*i64) -> void: block5: return; +} # regex: VALUE=\bv\d+\b # check: block0: @@ -77,15 +78,15 @@ func public %llvm_test(v0.*i32, v100.*i64) -> void: # nextln: jump block5; # nextln: # nextln: block5: -# nextln: $(var1=$VALUE).i1 = phi (v2 block1) (-1.i1 block3) (-1.i1 block4); -# nextln: $(var2=$VALUE).i1 = phi (0.i1 block1) (-1.i1 block3) (-1.i1 block4); +# nextln: $(var1=$VALUE).i1 = phi (v2 block1) (1.i1 block3) (1.i1 block4); +# nextln: $(var2=$VALUE).i1 = phi (0.i1 block1) (1.i1 block3) (1.i1 block4); # nextln: v3.i32 = phi (0.i32 block1) (-1.i32 block3) (-1.i32 block4); # nextln: v6.i32 = add v1 -1.i32; # nextln: br $var1 block1 block6; # nextln: # nextln: block6: # nextln: return; -func public %llvm_test2(v0.i1) -> void: +func public %llvm_test2(v0.i1) -> void { block0: jump block1; @@ -112,3 +113,4 @@ func public %llvm_test2(v0.i1) -> void: block6: return; +} diff --git a/crates/filecheck/fixtures/gvn/llvm_phi-of-ops-simplification-dependencies.sntn b/crates/filecheck/fixtures/gvn/llvm_phi-of-ops-simplification-dependencies.sntn index c7b86952..1d493ebc 100644 --- a/crates/filecheck/fixtures/gvn/llvm_phi-of-ops-simplification-dependencies.sntn +++ b/crates/filecheck/fixtures/gvn/llvm_phi-of-ops-simplification-dependencies.sntn @@ -31,7 +31,7 @@ target = "evm-ethereum-london" # nextln: v5.i32 = add v1 1.i32; # nextln: store @memory v10 v5; # nextln: jump block4; -func public %llvm_test1(v0.i1, v10.*i32) -> void: +func public %llvm_test1(v0.i1, v10.*i32) -> void { block0: jump block1; @@ -58,14 +58,14 @@ func public %llvm_test1(v0.i1, v10.*i32) -> void: v5.i32 = add v1 1.i32; store @memory v10 v5; jump block4; - +} # regex: VALUE=\bv\d+\b # check: block0: # nextln: jump block1; # nextln: # nextln: block1: -# nextln: v15.i64 = phi (-1.i64 block0) (v3 block5); +# nextln: $(var1=$VALUE).i64 = phi (-1.i64 block0) (v3 block5); # nextln: v3.i64 = phi (v7 block5) (0.i64 block0); # nextln: br v0 block2 block4; # nextln: @@ -79,7 +79,7 @@ func public %llvm_test1(v0.i1, v10.*i32) -> void: # nextln: return; # nextln: # nextln: block4: -# nextln: store @memory v1 v15; +# nextln: store @memory v1 $var1; # nextln: jump block5; # nextln: # nextln: block5: @@ -89,7 +89,7 @@ func public %llvm_test1(v0.i1, v10.*i32) -> void: # nextln: # nextln: block6: # nextln: return; -func public %llvm_test2(v0.i1, v1.*i64, v2.i64) -> void: +func public %llvm_test2(v0.i1, v1.*i64, v2.i64) -> void { block0: jump block1; @@ -118,6 +118,7 @@ func public %llvm_test2(v0.i1, v1.*i64, v2.i64) -> void: block6: return; +} # check: block0: # nextln: jump block1; @@ -142,7 +143,7 @@ func public %llvm_test2(v0.i1, v1.*i64, v2.i64) -> void: # nextln: # nextln: block5: # nextln: return; -func public %llvm_pr49873_cmp_simplification_dependency(v0.*i32, v1.i1) -> void: +func public %llvm_pr49873_cmp_simplification_dependency(v0.*i32, v1.i1) -> void { block0: jump block1; @@ -167,3 +168,4 @@ func public %llvm_pr49873_cmp_simplification_dependency(v0.*i32, v1.i1) -> void: block5: return; +} diff --git a/crates/filecheck/fixtures/gvn/llvm_phi-of-ops-simplified-to-existing-value-then-changes.sntn b/crates/filecheck/fixtures/gvn/llvm_phi-of-ops-simplified-to-existing-value-then-changes.sntn index d4fd303f..5432bd1c 100644 --- a/crates/filecheck/fixtures/gvn/llvm_phi-of-ops-simplified-to-existing-value-then-changes.sntn +++ b/crates/filecheck/fixtures/gvn/llvm_phi-of-ops-simplified-to-existing-value-then-changes.sntn @@ -8,7 +8,7 @@ target = "evm-ethereum-london" # nextln: jump block1; # nextln: # nextln: block1: -# nextln: $(var1=$VALUE).i1 = phi (0.i1 block0) (-1.i1 block7); +# nextln: $(var1=$VALUE).i1 = phi (0.i1 block0) (1.i1 block7); # nextln: v1.i32 = phi (-2022207984.i32 block0) (0.i32 block7); # nextln: br v0 block3 block2; # nextln: @@ -24,13 +24,13 @@ target = "evm-ethereum-london" # nextln: jump block7; # nextln: # nextln: block7: -# nextln: $(var3=$VALUE).i1 = phi ($var2 block3) (-1.i1 block6); +# nextln: $(var3=$VALUE).i1 = phi ($var2 block3) (1.i1 block6); # nextln: v4.i32 = phi (v2 block3) (0.i32 block6); # nextln: br $var3 block8 block1; # nextln: # nextln: block8: # nextln: return; -func public %llvm_pr36501(v0.i1) -> void: +func public %llvm_pr36501(v0.i1) -> void { block0: jump block1; @@ -56,6 +56,7 @@ func public %llvm_pr36501(v0.i1) -> void: block8: return; +} # regex: VALUE=\bv\d+\b # check: block0 @@ -88,7 +89,7 @@ func public %llvm_pr36501(v0.i1) -> void: # nextln: v9.i32 = phi (v5 block11) (1.i32 block18); # nextln: store @memory v31 $var; # nextln: return; -func public %llvm_PR42557(v0.i32, v1.i1, v2.i1, v31.*i32) -> void: +func public %llvm_PR42557(v0.i32, v1.i1, v2.i1, v31.*i32) -> void { block0: jump block1; @@ -119,3 +120,4 @@ func public %llvm_PR42557(v0.i32, v1.i1, v2.i1, v31.*i32) -> void: v10.i1 = eq v9 0.i32; store @memory v31 v10; return; +} diff --git a/crates/filecheck/fixtures/gvn/llvm_todo-pr35074-phi-of-ops.sntn b/crates/filecheck/fixtures/gvn/llvm_todo-pr35074-phi-of-ops.sntn index d7aea3d0..e6baa474 100644 --- a/crates/filecheck/fixtures/gvn/llvm_todo-pr35074-phi-of-ops.sntn +++ b/crates/filecheck/fixtures/gvn/llvm_todo-pr35074-phi-of-ops.sntn @@ -30,7 +30,7 @@ target = "evm-ethereum-london" # nextln: v6.i32 = add v2 1.i32; # nextln: store @memory v0 v6; # nextln: jump block4; -func public %llvm_crash1_pr35074(v0.*i32, v1.i1) -> void: +func public %llvm_crash1_pr35074(v0.*i32, v1.i1) -> void { block0: jump block1; @@ -57,3 +57,4 @@ func public %llvm_crash1_pr35074(v0.*i32, v1.i1) -> void: v6.i32 = add v2 1.i32; store @memory v0 v6; jump block4; +} diff --git a/crates/filecheck/fixtures/gvn/llvm_todo-pr37121-seens-this-value-a-lot.sntn b/crates/filecheck/fixtures/gvn/llvm_todo-pr37121-seens-this-value-a-lot.sntn index 5cb3eaf5..4cf06f83 100644 --- a/crates/filecheck/fixtures/gvn/llvm_todo-pr37121-seens-this-value-a-lot.sntn +++ b/crates/filecheck/fixtures/gvn/llvm_todo-pr37121-seens-this-value-a-lot.sntn @@ -13,7 +13,7 @@ target = "evm-ethereum-london" # nextln: jump block1; # nextln: # nextln: -func public %llvm_foo(v100.i8, v101.*i8) -> void: +func public %llvm_foo(v100.i8, v101.*i8) -> void { block0: v0.i8 = load @memory v101; jump block1; @@ -27,3 +27,4 @@ func public %llvm_foo(v100.i8, v101.*i8) -> void: v3.i8 = phi (v2 block1); store @memory v101 v3; return; +} diff --git a/crates/filecheck/fixtures/gvn/llvm_todo-pr42422-phi-of-ops.sntn b/crates/filecheck/fixtures/gvn/llvm_todo-pr42422-phi-of-ops.sntn index 06a4bcf3..3ca9eb3e 100644 --- a/crates/filecheck/fixtures/gvn/llvm_todo-pr42422-phi-of-ops.sntn +++ b/crates/filecheck/fixtures/gvn/llvm_todo-pr42422-phi-of-ops.sntn @@ -56,7 +56,7 @@ target = "evm-ethereum-london" # nextln: jump block16; # nextln: # nextln: block16: -# nextln: $(var1=$VALUE).i1 = phi (0.i1 block5) (0.i1 block14) (-1.i1 block15); +# nextln: $(var1=$VALUE).i1 = phi (0.i1 block5) (0.i1 block14) (1.i1 block15); # nextln: v8.i32 = phi (0.i32 block15) (v7 block14) (1.i32 block5); # nextln: br $var1 block17 block19; # nextln: @@ -68,13 +68,13 @@ target = "evm-ethereum-london" # nextln: jump block19; # nextln: # nextln: block19: -# nextln: $(var2=$VALUE).i1 = phi ($var1 block16) (-1.i1 block18); +# nextln: $(var2=$VALUE).i1 = phi ($var1 block16) (1.i1 block18); # nextln: v11.i32 = phi (v8 block16) (0.i32 block18); # nextln: br $var2 block1 block20; # nextln: # nextln: block20: # nextln: return; -func public %llvm_d(v0.i1, v1.i1) -> void: +func public %llvm_d(v0.i1, v1.i1) -> void { block0: jump block1; @@ -114,7 +114,7 @@ func public %llvm_d(v0.i1, v1.i1) -> void: jump block9; block11: - br -1.i1 block12 block13; + br 1.i1 block12 block13; block12: jump block14; @@ -148,3 +148,4 @@ func public %llvm_d(v0.i1, v1.i1) -> void: block20: return; +} diff --git a/crates/filecheck/fixtures/gvn/no_branch.sntn b/crates/filecheck/fixtures/gvn/no_branch.sntn index 99bf648f..c5d20f39 100644 --- a/crates/filecheck/fixtures/gvn/no_branch.sntn +++ b/crates/filecheck/fixtures/gvn/no_branch.sntn @@ -3,8 +3,9 @@ target = "evm-ethereum-london" # check: block0: # nextln: v1.i8 = add v0 10.i8; # nextln: return v1; -func public %no_branch(v0.i8) -> i8: +func public %no_branch(v0.i8) -> i8 { block0: v1.i8 = add v0 10.i8; v2.i8 = add v0 10.i8; return v2; +} diff --git a/crates/filecheck/fixtures/gvn/not_dominated.sntn b/crates/filecheck/fixtures/gvn/not_dominated.sntn index 6b3e69e0..c766731a 100644 --- a/crates/filecheck/fixtures/gvn/not_dominated.sntn +++ b/crates/filecheck/fixtures/gvn/not_dominated.sntn @@ -6,7 +6,7 @@ target = "evm-ethereum-london" # check: block2: # nextln: v3.i8 = add v1 v1; # nextln: return v3; -func public %not_dominated_by_leader(v0.i1, v1.i8) -> i8: +func public %not_dominated_by_leader(v0.i1, v1.i8) -> i8 { block0: br v0 block1 block2; @@ -17,3 +17,4 @@ func public %not_dominated_by_leader(v0.i1, v1.i8) -> i8: block2: v3.i8 = add v1 v1; return v3; +} diff --git a/crates/filecheck/fixtures/gvn/predicted_branch.sntn b/crates/filecheck/fixtures/gvn/predicted_branch.sntn index ab835ec2..817e8e0a 100644 --- a/crates/filecheck/fixtures/gvn/predicted_branch.sntn +++ b/crates/filecheck/fixtures/gvn/predicted_branch.sntn @@ -7,8 +7,8 @@ target = "evm-ethereum-london" # nextln: return 0.i1; # nextln: # nextln: block4: -# nextln: return -1.i1; -func public %unreachable_edge(v0.i1) -> i1: +# nextln: return 1.i1; +func public %unreachable_edge(v0.i1) -> i1 { block0: br v0 block1 block2; @@ -26,3 +26,4 @@ func public %unreachable_edge(v0.i1) -> i1: block4: v4.i1 = or v0 v0; return v4; +} diff --git a/crates/filecheck/fixtures/gvn/redundancy_by_predicate.sntn b/crates/filecheck/fixtures/gvn/redundancy_by_predicate.sntn index 8d326fc3..f1842dfc 100644 --- a/crates/filecheck/fixtures/gvn/redundancy_by_predicate.sntn +++ b/crates/filecheck/fixtures/gvn/redundancy_by_predicate.sntn @@ -4,7 +4,7 @@ target = "evm-ethereum-london" # nextln: jump block3; # check: block3: # nextln: return v2; -func public %redundancy_by_predicate(v0.i32, v1.i32) -> i32: +func public %redundancy_by_predicate(v0.i32, v1.i32) -> i32 { block0: v2.i32 = add v0 1.i32; v3.i1 = eq v0 v1; @@ -20,12 +20,13 @@ func public %redundancy_by_predicate(v0.i32, v1.i32) -> i32: block3: v6.i32 = phi (v4 block1) (v2 block2); return v6; +} # check: block1: # nextln: jump block3; # check: block3: # nextln: return v2; -func public %redundancy_by_predicate1(v0.i32, v1.i32) -> i32: +func public %redundancy_by_predicate1(v0.i32, v1.i32) -> i32 { block0: v2.i32 = add v0 1.i32; v3.i1 = eq v1 v0; @@ -43,12 +44,13 @@ func public %redundancy_by_predicate1(v0.i32, v1.i32) -> i32: block3: v7.i32 = phi (v6 block1) (v2 block2); return v7; +} # check: block2: # nextln: jump block3; # check: block3: # nextln: return v2; -func public %redundancy_by_predicate2(v0.i32, v1.i32) -> i32: +func public %redundancy_by_predicate2(v0.i32, v1.i32) -> i32 { block0: v2.i32 = add v0 1.i32; v3.i1 = ne v1 v0; @@ -66,3 +68,4 @@ func public %redundancy_by_predicate2(v0.i32, v1.i32) -> i32: block3: v7.i32 = phi (v2 block1) (v6 block2); return v7; +} diff --git a/crates/filecheck/fixtures/gvn/unreachable_edge.sntn b/crates/filecheck/fixtures/gvn/unreachable_edge.sntn index 2942def3..f265aee8 100644 --- a/crates/filecheck/fixtures/gvn/unreachable_edge.sntn +++ b/crates/filecheck/fixtures/gvn/unreachable_edge.sntn @@ -6,8 +6,8 @@ target = "evm-ethereum-london" # nextln: return 0.i1; # nextln: # nextln: block4: -# nextln: return -1.i1; -func public %unreachable_edge(v0.i1) -> i1: +# nextln: return 1.i1; +func public %unreachable_edge(v0.i1) -> i1 { block0: br v0 block1 block2; @@ -25,6 +25,7 @@ func public %unreachable_edge(v0.i1) -> i1: block4: v4.i1 = or v0 v0; return v4; +} # check: block0: # nextln: jump block1; @@ -34,7 +35,7 @@ func public %unreachable_edge(v0.i1) -> i1: # nextln: # nextln: block3: # nextln: return v1; -func public %unreachable_edge2(v0.i8) -> i8: +func public %unreachable_edge2(v0.i8) -> i8 { block0: br 1.i1 block1 block2; @@ -49,6 +50,7 @@ func public %unreachable_edge2(v0.i8) -> i8: block3: v3.i8 = phi (v1 block1) (v2 block2); return v3; +} # check: block0: # nextln: jump block1; @@ -65,7 +67,7 @@ func public %unreachable_edge2(v0.i8) -> i8: # nextln: block4: # nextln: v5.i8 = phi (v1 block1) (v4 block3); # nextln: return v5; -func public %unreachable_edge3(v0.i8) -> i8: +func public %unreachable_edge3(v0.i8) -> i8 { block0: br 1.i1 block1 block2; @@ -85,3 +87,4 @@ func public %unreachable_edge3(v0.i8) -> i8: block4: v5.i8 = phi (v1 block1) (v3 block2) (v4 block3); return v5; +} diff --git a/crates/filecheck/fixtures/gvn/value_phi_loop.sntn b/crates/filecheck/fixtures/gvn/value_phi_loop.sntn index b8db4f1d..f4925896 100644 --- a/crates/filecheck/fixtures/gvn/value_phi_loop.sntn +++ b/crates/filecheck/fixtures/gvn/value_phi_loop.sntn @@ -27,7 +27,7 @@ target = "evm-ethereum-london" # nextln: $(var2=$VALUE).i32 = phi (v10 block3) (v42 block4); # nextln: v50.i32 = phi ($var1 block3) (v40 block4); # nextln: return $var2; -func public %value_phi_loop(v100.i32, v101.i32, v102.i1, v103.i1, v104.i32, v105.i32) -> i32: +func public %value_phi_loop(v100.i32, v101.i32, v102.i1, v103.i1, v104.i32, v105.i32) -> i32 { block0: v1.i32 = add v100 v101; jump block1; @@ -55,3 +55,4 @@ func public %value_phi_loop(v100.i32, v101.i32, v102.i1, v103.i1, v104.i32, v105 v50.i32 = phi (v11 block3) (v40 block4); v51.i32 = sub v50 v101; return v51; +} diff --git a/crates/filecheck/fixtures/gvn/value_phi_many_preds.sntn b/crates/filecheck/fixtures/gvn/value_phi_many_preds.sntn index ca3746dc..897199bd 100644 --- a/crates/filecheck/fixtures/gvn/value_phi_many_preds.sntn +++ b/crates/filecheck/fixtures/gvn/value_phi_many_preds.sntn @@ -17,7 +17,7 @@ target = "evm-ethereum-london" # nextln: $(var=$VALUE).i8 = phi (v31 block3) (v41 block4) (v51 block5); # nextln: v60.i8 = phi (3.i8 block3) (4.i8 block4) (5.i8 block5); # nextln: return $var; -func public %many_preds(v0.i1, v1.i1, v2.i8) -> i8: +func public %many_preds(v0.i1, v1.i1, v2.i8) -> i8 { block0: br v0 block1 block2; @@ -46,3 +46,4 @@ func public %many_preds(v0.i1, v1.i1, v2.i8) -> i8: v60.i8 = phi (v30 block3) (v40 block4) (v50 block5); v61.i8 = add v2 v60; return v61; +} diff --git a/crates/filecheck/fixtures/gvn/value_phi_recursive.sntn b/crates/filecheck/fixtures/gvn/value_phi_recursive.sntn index 1e4c1b49..82f5419b 100644 --- a/crates/filecheck/fixtures/gvn/value_phi_recursive.sntn +++ b/crates/filecheck/fixtures/gvn/value_phi_recursive.sntn @@ -8,7 +8,7 @@ target = "evm-ethereum-london" # nextln: $(var2=$VALUE).i8 = phi (v5 block5) ($var1 block6); # nextln: v7.i8 = phi (v6 block6) (5.i8 block5); # check: return $var2; -func public %value_phi_recursive(v0.i1, v1.i1, v2.i8) -> i8: +func public %value_phi_recursive(v0.i1, v1.i1, v2.i8) -> i8 { block0: br v0 block1 block2; @@ -38,6 +38,7 @@ func public %value_phi_recursive(v0.i1, v1.i1, v2.i8) -> i8: v7.i8 = phi (v6 block6) (5.i8 block5); v8.i8 = add v7 v2; return v8; +} # regex: VALUE=\bv\d+\b # check: block0: @@ -76,7 +77,7 @@ func public %value_phi_recursive(v0.i1, v1.i1, v2.i8) -> i8: # nextln: $(var3=$VALUE).i8 = phi (v52 block5) ($var2 block6); # nextln: v70.i8 = phi ($var1 block6) (v50 block5); # nextln: return $var3; -func public %value_phi_recursive2(v0.i1, v1.i1, v2.i8, v3.i8) -> i8: +func public %value_phi_recursive2(v0.i1, v1.i1, v2.i8, v3.i8) -> i8 { block0: br v0 block1 block2; @@ -112,3 +113,4 @@ func public %value_phi_recursive2(v0.i1, v1.i1, v2.i8, v3.i8) -> i8: v70.i8 = phi (v61 block6) (v50 block5); v72.i8 = add v70 v3; return v72; +} diff --git a/crates/filecheck/fixtures/gvn/value_phi_remote_block.sntn b/crates/filecheck/fixtures/gvn/value_phi_remote_block.sntn index 291a486d..8e70578a 100644 --- a/crates/filecheck/fixtures/gvn/value_phi_remote_block.sntn +++ b/crates/filecheck/fixtures/gvn/value_phi_remote_block.sntn @@ -22,7 +22,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block5: # nextln: return $var; -func public %phi_value_remote_block(v0.i1, v1.i1, v2.i32) -> i32: +func public %phi_value_remote_block(v0.i1, v1.i1, v2.i32) -> i32 { block0: br v0 block1 block2; @@ -45,3 +45,4 @@ func public %phi_value_remote_block(v0.i1, v1.i1, v2.i32) -> i32: block5: v7.i32 = add v5 v2; return v7; +} diff --git a/crates/filecheck/fixtures/gvn/value_phi_resolution_must_fail.sntn b/crates/filecheck/fixtures/gvn/value_phi_resolution_must_fail.sntn index 875ea769..e13f84a4 100644 --- a/crates/filecheck/fixtures/gvn/value_phi_resolution_must_fail.sntn +++ b/crates/filecheck/fixtures/gvn/value_phi_resolution_must_fail.sntn @@ -4,7 +4,7 @@ target = "evm-ethereum-london" # nextln: v5.i64 = phi (v2 block1) (v1 block2); # nextln: v6.i64 = add v5 v5; # nextln: return v6; -func public %value_phi_resolution_must_fail(v0.i1, v1.i64, v2.i64) -> i64: +func public %value_phi_resolution_must_fail(v0.i1, v1.i64, v2.i64) -> i64 { block0: br v0 block1 block2; @@ -20,3 +20,4 @@ func public %value_phi_resolution_must_fail(v0.i1, v1.i64, v2.i64) -> i64: v5.i64 = phi (v2 block1) (v1 block2); v6.i64 = add v5 v5; return v6; +} diff --git a/crates/filecheck/fixtures/gvn/value_phi_same_arg.sntn b/crates/filecheck/fixtures/gvn/value_phi_same_arg.sntn index 5cc7a31e..6756eb6c 100644 --- a/crates/filecheck/fixtures/gvn/value_phi_same_arg.sntn +++ b/crates/filecheck/fixtures/gvn/value_phi_same_arg.sntn @@ -5,7 +5,7 @@ target = "evm-ethereum-london" # nextln: $(var=$VALUE).i64 = phi (v3 block1) (v4 block2); # nextln: v5.i64 = phi (v1 block1) (v2 block2); # nextln: return $var; -func public %value_phi_same_arg(v0.i1, v1.i64, v2.i64) -> i64: +func public %value_phi_same_arg(v0.i1, v1.i64, v2.i64) -> i64 { block0: br v0 block1 block2; @@ -21,3 +21,4 @@ func public %value_phi_same_arg(v0.i1, v1.i64, v2.i64) -> i64: v5.i64 = phi (v1 block1) (v2 block2); v6.i64 = add v5 v5; return v6; +} diff --git a/crates/filecheck/fixtures/gvn/value_phi_simple.sntn b/crates/filecheck/fixtures/gvn/value_phi_simple.sntn index f2ff6e10..97b91496 100644 --- a/crates/filecheck/fixtures/gvn/value_phi_simple.sntn +++ b/crates/filecheck/fixtures/gvn/value_phi_simple.sntn @@ -16,7 +16,7 @@ target = "evm-ethereum-london" # nextln: $(var=$VALUE).i32 = phi (v3 block1) (v4 block2); # nextln: v5.i32 = phi (v1 block1) (v2 block2); # nextln: return $var; -func public %value_phi_simple(v0.i1, v1.i32, v2.i32) -> i32: +func public %value_phi_simple(v0.i1, v1.i32, v2.i32) -> i32 { block0: br v0 block1 block2; @@ -32,3 +32,4 @@ func public %value_phi_simple(v0.i1, v1.i32, v2.i32) -> i32: v5.i32 = phi (v1 block1) (v2 block2); v6.i32 = add v5 1.i32; return v6; +} diff --git a/crates/filecheck/fixtures/insn_simplify/and.sntn b/crates/filecheck/fixtures/insn_simplify/and.sntn index eb89f22a..c662bdc2 100644 --- a/crates/filecheck/fixtures/insn_simplify/and.sntn +++ b/crates/filecheck/fixtures/insn_simplify/and.sntn @@ -2,102 +2,114 @@ target = "evm-ethereum-london" # v0 & v0 => v0 # check: return v0; -func public %and0(v0.i8) -> i8: +func public %and0(v0.i8) -> i8 { block0: v1.i8 = and v0 v0; return v1; +} # v0 & 0 => 0 # check: return 0.i8; -func public %and1(v0.i8) -> i8: +func public %and1(v0.i8) -> i8 { block0: v1.i8 = and 0.i8 v0; return v1; +} # v0 & -1 => v0 # check: return v0; -func public %and2(v0.i8) -> i8: +func public %and2(v0.i8) -> i8 { block0: v1.i8 = and -1.i8 v0; return v1; +} # v0 & !v0 => 0 # check: return 0.i8; -func public %and3(v0.i8) -> i8: +func public %and3(v0.i8) -> i8 { block0: v1.i8 = not v0; v2.i8 = and v0 v1; return v2; +} # v0 & (v0 | _) => v0 # check: return v0; -func public %and4(v0.i8, v1.i8) -> i8: +func public %and4(v0.i8, v1.i8) -> i8 { block0: v2.i8 = or v0 v1; v3.i8 = and v2 v0; return v3; +} # v0 & (v1 | v0) => v0 # check: return v0; -func public %and5(v0.i8, v1.i8) -> i8: +func public %and5(v0.i8, v1.i8) -> i8 { block0: v2.i8 = or v1 v0; v3.i8 = and v0 v2; return v3; +} # (v0 | v1) & (v0 | !v1) => v0 # check: return v0; -func public %and6(v0.i8, v1.i8) -> i8: +func public %and6(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v1; v3.i8 = or v0 v1; v4.i8 = or v0 v2; v5.i8 = and v3 v4; return v5; +} # (v0 | v1) & (!v1 | v0) => v0 # check: return v0; -func public %and7(v0.i8, v1.i8) -> i8: +func public %and7(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v1; v3.i8 = or v0 v1; v4.i8 = or v2 v0; v5.i8 = and v3 v4; return v5; +} # (v0 | v1) & (!v0 | v1) => b # check: return v1; -func public %and9(v0.i8, v1.i8) -> i8: +func public %and9(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v0; v3.i8 = or v0 v1; v4.i8 = or v2 v1; v5.i8 = and v3 v4; return v5; +} # (v0 | v1) & (v1 | !v0) => b # check: return v1; -func public %and10(v0.i8, v1.i8) -> i8: +func public %and10(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v0; v3.i8 = or v0 v1; v4.i8 = or v1 v2; v5.i8 = and v3 v4; return v5; +} # v0 & (-v0) => v0 where v0: is_power_of_two. # check: return 8.i8; -func public %and11() -> i8: +func public %and11() -> i8 { block0: v0.i8 = neg 8.i8; v1.i8 = and 8.i8 v0; return v1; +} # v0 & (v0 - 1) => 0 where v0: is_power_of_two. # check: return 0.i8; -func public %and12() -> i8: +func public %and12() -> i8 { block0: v0.i8 = sub 8.i8 1.i8; v1.i8 = and 8.i8 v0; return v1; +} diff --git a/crates/filecheck/fixtures/insn_simplify/cmp.sntn b/crates/filecheck/fixtures/insn_simplify/cmp.sntn index 993a3e62..7868dc6d 100644 --- a/crates/filecheck/fixtures/insn_simplify/cmp.sntn +++ b/crates/filecheck/fixtures/insn_simplify/cmp.sntn @@ -1,71 +1,81 @@ target = "evm-ethereum-london" # a == a => 1 -# check: return -1.i1; -func public %eq0(v0.i64) -> i1: +# check: return 1.i1; +func public %eq0(v0.i64) -> i1 { block0: v1.i1 = eq v0 v0; return v1; +} # a != a => 0 # check: return 0.i1; -func public %eq1(v0.i64) -> i1: +func public %eq1(v0.i64) -> i1 { block0: v1.i1 = ne v0 v0; return v1; +} # a < a => 0 # check: return 0.i1; -func public %lt(v0.i64) -> i1: +func public %lt(v0.i64) -> i1 { block0: v1.i1 = lt v0 v0; return v1; +} # a > b => b < a # check: v2.i1 = lt v1 v0; -func public %gt(v0.i64, v1.i64) -> i1: +func public %gt(v0.i64, v1.i64) -> i1 { block0: v2.i1 = gt v0 v1; return v2; +} # a < a => 0 # check: return 0.i1; -func public %slt(v0.i64) -> i1: +func public %slt(v0.i64) -> i1 { block0: v1.i1 = slt v0 v0; return v1; +} # a > b => b < a # check: v2.i1 = slt v1 v0; -func public %sgt(v0.i64, v1.i64) -> i1: +func public %sgt(v0.i64, v1.i64) -> i1 { block0: v2.i1 = sgt v0 v1; return v2; +} # a <= a => 1 -# check: return -1.i1; -func public %le(v0.i64) -> i1: +# check: return 1.i1; +func public %le(v0.i64) -> i1 { block0: v1.i1 = le v0 v0; return v1; +} # a >= b => b <= a # check: v2.i1 = le v1 v0; -func public %ge(v0.i64, v1.i64) -> i1: +func public %ge(v0.i64, v1.i64) -> i1 { block0: v2.i1 = ge v0 v1; return v2; +} # a <= a => 1 -# check: return -1.i1; -func public %sle(v0.i64) -> i1: +# check: return 1.i1; +func public %sle(v0.i64) -> i1 { block0: v1.i1 = sle v0 v0; return v1; +} # a >= b => b <= a # check: v2.i1 = sle v1 v0; -func public %sge(v0.i64, v1.i64) -> i1: +func public %sge(v0.i64, v1.i64) -> i1 { block0: v2.i1 = sge v0 v1; return v2; +} diff --git a/crates/filecheck/fixtures/insn_simplify/neg.sntn b/crates/filecheck/fixtures/insn_simplify/neg.sntn index c9262583..19c6b97b 100644 --- a/crates/filecheck/fixtures/insn_simplify/neg.sntn +++ b/crates/filecheck/fixtures/insn_simplify/neg.sntn @@ -2,17 +2,18 @@ target = "evm-ethereum-london" # -(-v0) => v0 # check: return v0; -func public %neg0(v0.i8) -> i8: +func public %neg0(v0.i8) -> i8 { block0: v1.i8 = neg v0; v2.i8 = neg v1; return v2; - +} # -(!v0) => v0 # check: v2.i16 = add v0 1.i16; -func public %neg1(v0.i16) -> i16: +func public %neg1(v0.i16) -> i16 { block0: v1.i8 = not v0; v2.i8 = neg v1; return v2; +} diff --git a/crates/filecheck/fixtures/insn_simplify/not.sntn b/crates/filecheck/fixtures/insn_simplify/not.sntn index fefdf603..8977f325 100644 --- a/crates/filecheck/fixtures/insn_simplify/not.sntn +++ b/crates/filecheck/fixtures/insn_simplify/not.sntn @@ -2,88 +2,99 @@ target = "evm-ethereum-london" # !(!v0) => v0 # check: return v0; -func public %not0(v0.i8) -> i8: +func public %not0(v0.i8) -> i8 { block0: v1.i8 = not v0; v2.i8 = not v1; return v2; +} # !(-v0) => v0 - 1 # check: v2.i8 = sub v0 1.i8; -func public %not1(v0.i8) -> i8: +func public %not1(v0.i8) -> i8 { block0: v1.i8 = neg v0; v2.i8 = not v1; return v2; +} # !(-v0) => v0 - 1 # check: v2.i8 = sub v0 1.i8; -func public %not2(v0.i8) -> i8: +func public %not2(v0.i8) -> i8 { block0: v1.i8 = neg v0; v2.i8 = not v1; return v2; +} # !(a == b) => a != b; # check: v3.i1 = ne v0 v1; -func public %not3(v0.i8, v1.i8) -> i8: +func public %not3(v0.i8, v1.i8) -> i8 { block0: v2.i1 = eq v0 v1; v3.i1 = not v2; return v3; +} # !(a != b) => a == b; # check: v3.i1 = eq v0 v1; -func public %not4(v0.i8, v1.i8) -> i8: +func public %not4(v0.i8, v1.i8) -> i8 { block0: v2.i1 = ne v0 v1; v3.i1 = not v2; return v3; +} # !(a < b) => (b <= a); # check: v3.i1 = le v1 v0; -func public %not5(v0.i8, v1.i8) -> i8: +func public %not5(v0.i8, v1.i8) -> i8 { block0: v2.i1 = lt v0 v1; v3.i1 = not v2; return v3; +} # !(a <= b) => (b < a); # check: v3.i1 = lt v1 v0; -func public %not6(v0.i8, v1.i8) -> i8: +func public %not6(v0.i8, v1.i8) -> i8 { block0: v2.i1 = le v0 v1; v3.i1 = not v2; return v3; +} # !(a < b) => (b <= a); # check: v3.i1 = sle v1 v0; -func public %not7(v0.i8, v1.i8) -> i8: +func public %not7(v0.i8, v1.i8) -> i8 { block0: v2.i1 = slt v0 v1; v3.i1 = not v2; return v3; +} # !(a <= b) => (b < a); # check: v3.i1 = slt v1 v0; -func public %not8(v0.i8, v1.i8) -> i8: +func public %not8(v0.i8, v1.i8) -> i8 { block0: v2.i1 = sle v0 v1; v3.i1 = not v2; return v3; +} # !(a > b) => !(b < a) => a <= b # check: v3.i1 = le v0 v1; -func public %not9(v0.i8, v1.i8) -> i8: +func public %not9(v0.i8, v1.i8) -> i8 { block0: v2.i1 = gt v0 v1; v3.i1 = not v2; return v3; +} # !(a >= b) => !(b <= a) => a < b # check: v3.i1 = lt v0 v1; -func public %not10(v0.i8, v1.i8) -> i8: +func public %not10(v0.i8, v1.i8) -> i8 { block0: v2.i1 = ge v0 v1; v3.i1 = not v2; return v3; +} diff --git a/crates/filecheck/fixtures/insn_simplify/or.sntn b/crates/filecheck/fixtures/insn_simplify/or.sntn index a54918af..e1c98772 100644 --- a/crates/filecheck/fixtures/insn_simplify/or.sntn +++ b/crates/filecheck/fixtures/insn_simplify/or.sntn @@ -2,168 +2,186 @@ target = "evm-ethereum-london" # v0 | -1 => -1 # check: return -1.i8; -func public %or0(v0.i8) -> i8: +func public %or0(v0.i8) -> i8 { block0: v1.i8 = or v0 -1.i8; return v1; +} # v0 | v0 => v0 # check: return v0; -func public %or1(v0.i8) -> i8: +func public %or1(v0.i8) -> i8 { block0: v1.i8 = or v0 v0; return v1; +} # v0 | 0 => v0 # check: return v0; -func public %or2(v0.i8) -> i8: +func public %or2(v0.i8) -> i8 { block0: v1.i8 = or v0 0.i8; return v1; +} # v0 | !v0 => -1 # check: return -1.i8; -func public %or3(v0.i8) -> i8: +func public %or3(v0.i8) -> i8 { block0: v1.i8 = not v0; v2.i8 = or v0 v1; return v2; +} # v0 | (v0 & v1) => v0 # check: return v0; -func public %or4(v0.i8, v1.i8) -> i8: +func public %or4(v0.i8, v1.i8) -> i8 { block0: v2.i8 = and v0 v1; v3.i8 = or v0 v2; return v3; +} # v0 | (v1 & v0) => v0 # check: return v0; -func public %or5(v0.i8, v1.i8) -> i8: +func public %or5(v0.i8, v1.i8) -> i8 { block0: v2.i8 = and v1 v0; v3.i8 = or v0 v2; return v3; +} # v0 | !(v0 & v1) => -1 # check: return -1.i8; -func public %or6(v0.i8, v1.i8) -> i8: +func public %or6(v0.i8, v1.i8) -> i8 { block0: v2.i8 = and v0 v1; v3.i8 = not v2; v4.i8 = or v0 v3; return v4; +} # v0 | !(v1 & v0) => -1 # check: return -1.i8; -func public %or7(v0.i8, v1.i8) -> i8: +func public %or7(v0.i8, v1.i8) -> i8 { block0: v2.i8 = and v1 v0; v3.i8 = not v2; v4.i8 = or v0 v3; return v4; +} # (v0 ^ v1) | (v0 & !v1) => (v0 ^ v1) # check: return v2; -func public %or8(v0.i8, v1.i8) -> i8: +func public %or8(v0.i8, v1.i8) -> i8 { block0: v2.i8 = xor v0 v1; v3.i8 = not v1; v4.i8 = and v0 v3; v5.i8 = or v2 v4; return v5; +} # (v0 ^ 1) | (!v1 & v0) => (v0 ^ v1) # check: return v2; -func public %or9(v0.i8, v1.i8) -> i8: +func public %or9(v0.i8, v1.i8) -> i8 { block0: v2.i8 = xor v0 v1; v3.i8 = not v1; v4.i8 = and v3 v0; v5.i8 = or v2 v4; return v5; +} # (v0 ^ v1) | (!v0 & v1) => (v0 ^ v1) # check: return v2; -func public %or10(v0.i8, v1.i8) -> i8: +func public %or10(v0.i8, v1.i8) -> i8 { block0: v2.i8 = xor v0 v1; v3.i8 = not v0; v4.i8 = and v3 v1; v5.i8 = or v2 v4; return v5; +} # (v0 ^ v1) | (v1 & !v0) => (v0 ^ v1) # check: return v2; -func public %or11(v0.i8, v1.i8) -> i8: +func public %or11(v0.i8, v1.i8) -> i8 { block0: v2.i8 = xor v0 v1; v3.i8 = not v0; v4.i8 = and v1 v3; v5.i8 = or v2 v4; return v5; +} # (!v0 ^ v1) | (v0 & v1) => (!v0 ^ v1) # check: return v3; -func public %or12(v0.i8, v1.i8) -> i8: +func public %or12(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v0; v3.i8 = xor v2 v1; v4.i8 = and v0 v1; v5.i8 = or v3 v4; return v5; +} # (!v0 ^ v1) | (v1 & v0) => (!v0 ^ v1) # check: return v3; -func public %or13(v0.i8, v1.i8) -> i8: +func public %or13(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v0; v3.i8 = xor v2 v1; v4.i8 = and v1 v0; v5.i8 = or v3 v4; return v5; +} # (v0 ^ !v1) | (v0 & v1) => (v0 ^ !v1) # check: return v3; -func public %or14(v0.i8, v1.i8) -> i8: +func public %or14(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v1; v3.i8 = xor v0 v2; v4.i8 = and v0 v1; v5.i8 = or v3 v4; return v5; +} # (v0 ^ !v1) | (v1 & v0) => (v0 ^ !v1) # check: return v3; -func public %or15(v0.i8, v1.i8) -> i8: +func public %or15(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v1; v3.i8 = xor v0 v2; v4.i8 = and v1 v0; v5.i8 = or v3 v4; return v5; +} # (v0 | v1) | (v0 ^ v1) => (v0 | v1) # check: return v2; -func public %or16(v0.i8, v1.i8) -> i8: +func public %or16(v0.i8, v1.i8) -> i8 { block0: v2.i8 = or v0 v1; v3.i8 = xor v0 v1; v4.i8 = or v2 v3; return v4; +} # (v0 | v1) | (v1 ^ v0) => (v0 | v1) # check: return v2; -func public %or17(v0.i8, v1.i8) -> i8: +func public %or17(v0.i8, v1.i8) -> i8 { block0: v2.i8 = or v0 v1; v3.i8 = xor v1 v0; v4.i8 = or v2 v3; return v4; +} # (!v0 & v1) | !(v0 | v1) => !v0 # check: return v2; -func public %or18(v0.i8, v1.i8) -> i8: +func public %or18(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v0; v3.i8 = and v2 v1; @@ -171,10 +189,11 @@ func public %or18(v0.i8, v1.i8) -> i8: v5.i8 = not v4; v6.i8 = or v3 v5; return v6; +} # (!v0 & v1) | !(v1 | v0) => !v0 # check: return v2; -func public %or19(v0.i8, v1.i8) -> i8: +func public %or19(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v0; v3.i8 = and v2 v1; @@ -182,10 +201,11 @@ func public %or19(v0.i8, v1.i8) -> i8: v5.i8 = not v4; v6.i8 = or v3 v5; return v6; +} # (v0 & !v1) | !(v0 | v1) => !v1 # check: return v2; -func public %or20(v0.i8, v1.i8) -> i8: +func public %or20(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v1; v3.i8 = and v0 v2; @@ -193,10 +213,11 @@ func public %or20(v0.i8, v1.i8) -> i8: v5.i8 = not v4; v6.i8 = or v3 v5; return v6; +} # (v0 & !v1) | !(v1 | v0) => !v1 # check: return v2; -func public %or21(v0.i8, v1.i8) -> i8: +func public %or21(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v1; v3.i8 = and v0 v2; @@ -204,3 +225,4 @@ func public %or21(v0.i8, v1.i8) -> i8: v5.i8 = not v4; v6.i8 = or v3 v5; return v6; +} diff --git a/crates/filecheck/fixtures/insn_simplify/phi.sntn b/crates/filecheck/fixtures/insn_simplify/phi.sntn index 72d304be..80082f8f 100644 --- a/crates/filecheck/fixtures/insn_simplify/phi.sntn +++ b/crates/filecheck/fixtures/insn_simplify/phi.sntn @@ -1,6 +1,6 @@ target = "evm-ethereum-london" -# sameln: func public %phi(v0.i8) -> i8: +# sameln: func public %phi(v0.i8) -> i8 { # nextln: block0: # nextln: jump block1; # nextln: @@ -9,7 +9,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block2: # nextln: return v0; -func public %phi(v0.i8) -> i8: +func public %phi(v0.i8) -> i8 { block0: v1.i8 = mul v0 0.i8; jump block1; @@ -23,3 +23,4 @@ func public %phi(v0.i8) -> i8: block2: return v4; +} diff --git a/crates/filecheck/fixtures/insn_simplify/simple.sntn b/crates/filecheck/fixtures/insn_simplify/simple.sntn index 77fe124c..95b78e25 100644 --- a/crates/filecheck/fixtures/insn_simplify/simple.sntn +++ b/crates/filecheck/fixtures/insn_simplify/simple.sntn @@ -1,13 +1,14 @@ target = "evm-ethereum-london" -# sameln: func public %simple(v0.i8) -> i8: +# sameln: func public %simple(v0.i8) -> i8 { # nextln: block0: # nextln: v4.i8 = neg v0; # nextln: return v4; -func public %simple(v0.i8) -> i8: +func public %simple(v0.i8) -> i8 { block0: v1.i8 = mul 0.i8 v0; v2.i8 = add v0 v1; v3.i8 = sub v2 v0; v4.i8 = sub v1 v0; return v4; +} diff --git a/crates/filecheck/fixtures/insn_simplify/xor.sntn b/crates/filecheck/fixtures/insn_simplify/xor.sntn index d0b920f9..3877260c 100644 --- a/crates/filecheck/fixtures/insn_simplify/xor.sntn +++ b/crates/filecheck/fixtures/insn_simplify/xor.sntn @@ -2,109 +2,120 @@ target = "evm-ethereum-london" # v0 ^ 0 => v0 # check: return v0; -func public %xor0(v0.i8) -> i8: +func public %xor0(v0.i8) -> i8 { block0: v1.i8 = xor v0 0.i8; return v1; +} # v0 ^ v0 => 0 # check: return 0.i8; -func public %xor1(v0.i8) -> i8: +func public %xor1(v0.i8) -> i8 { block0: v1.i8 = xor v0 v0; return v1; +} # v0 ^ !v0 => -1 # check: return -1.i8; -func public %xor2(v0.i8) -> i8: +func public %xor2(v0.i8) -> i8 { block0: v1.i8 = not v0; v2.i8 = xor v0 v1; return v2; +} # v0 ^ (v0 ^ v1) => v1 # check: return v1; -func public %xor3(v0.i8, v1.i8) -> i8: +func public %xor3(v0.i8, v1.i8) -> i8 { block0: v2.i8 = xor v0 v1; v3.i8 = xor v0 v2; return v3; +} # v0 ^ (v1 ^ v0) => v1 # check: return v1; -func public %xor4(v0.i8, v1.i8) -> i8: +func public %xor4(v0.i8, v1.i8) -> i8 { block0: v2.i8 = xor v1 v0; v3.i8 = xor v0 v2; return v3; +} # (v0 | v1) ^ (!v0 & v1) => v0 # check: return v0; -func public %xor5(v0.i8, v1.i8) -> i8: +func public %xor5(v0.i8, v1.i8) -> i8 { block0: v2.i8 = or v0 v1; v3.i8 = not v0; v4.i8 = and v3 v1; v5.i8 = xor v2 v4; return v5; +} # (v0 | v1) ^ (v1 & !v0) => v0 # check: return v0; -func public %xor6(v0.i8, v1.i8) -> i8: +func public %xor6(v0.i8, v1.i8) -> i8 { block0: v2.i8 = or v0 v1; v3.i8 = not v0; v4.i8 = and v1 v3; v5.i8 = xor v2 v4; return v5; +} # (v0 | v1) ^ (v0 & !v1) => v1 # check: return v1; -func public %xor7(v0.i8, v1.i8) -> i8: +func public %xor7(v0.i8, v1.i8) -> i8 { block0: v2.i8 = or v0 v1; v3.i8 = not v1; v4.i8 = and v0 v3; v5.i8 = xor v2 v4; return v5; +} # (v0 & v1) ^ (!v0 | v1) => !v0 # check: return v3; -func public %xor8(v0.i8, v1.i8) -> i8: +func public %xor8(v0.i8, v1.i8) -> i8 { block0: v2.i8 = and v0 v1; v3.i8 = not v0; v4.i8 = or v3 v1; v5.i8 = xor v2 v4; return v5; +} # (v0 & v1) ^ (v1 | !v0) => !v0 # check: return v3; -func public %xor9(v0.i8, v1.i8) -> i8: +func public %xor9(v0.i8, v1.i8) -> i8 { block0: v2.i8 = and v0 v1; v3.i8 = not v0; v4.i8 = or v1 v3; v5.i8 = xor v2 v4; return v5; - +} # (v0 & v1) ^ (v0 | !v1) => !v1 # check: return v3; -func public %xor10(v0.i8, v1.i8) -> i8: +func public %xor10(v0.i8, v1.i8) -> i8 { block0: v2.i8 = and v0 v1; v3.i8 = not v1; v4.i8 = or v0 v3; v5.i8 = xor v2 v4; return v5; +} # (v0 & v1) ^ (!v1 | v0) => !v1 # check: return v3; -func public %xor11(v0.i8, v1.i8) -> i8: +func public %xor11(v0.i8, v1.i8) -> i8 { block0: v2.i8 = and v0 v1; v3.i8 = not v1; v4.i8 = or v3 v0; v5.i8 = xor v2 v4; return v5; +} diff --git a/crates/filecheck/fixtures/licm/basic.sntn b/crates/filecheck/fixtures/licm/basic.sntn index 8055262d..83cae52d 100644 --- a/crates/filecheck/fixtures/licm/basic.sntn +++ b/crates/filecheck/fixtures/licm/basic.sntn @@ -15,7 +15,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block3: # nextln: return; -func public %basic(v0.i32, v1.i32) -> void: +func public %basic(v0.i32, v1.i32) -> void { block0: jump block1; @@ -31,3 +31,4 @@ func public %basic(v0.i32, v1.i32) -> void: block3: return; +} diff --git a/crates/filecheck/fixtures/licm/header_entry.sntn b/crates/filecheck/fixtures/licm/header_entry.sntn index 5e9bfadd..402b4e13 100644 --- a/crates/filecheck/fixtures/licm/header_entry.sntn +++ b/crates/filecheck/fixtures/licm/header_entry.sntn @@ -13,7 +13,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block2: # nextln: return v3; -func public %header_entry(v0.i32, v1.i1) -> i32: +func public %header_entry(v0.i32, v1.i1) -> i32 { block0: v2.i32 = add v0 1.i32; jump block1; @@ -24,3 +24,4 @@ func public %header_entry(v0.i32, v1.i1) -> i32: block2: return v3; +} diff --git a/crates/filecheck/fixtures/licm/multiple_preheader.sntn b/crates/filecheck/fixtures/licm/multiple_preheader.sntn index 77ab8f84..e0221395 100644 --- a/crates/filecheck/fixtures/licm/multiple_preheader.sntn +++ b/crates/filecheck/fixtures/licm/multiple_preheader.sntn @@ -28,7 +28,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block5: # nextln: return; -func public %multiple_preheader(v0.i1, v1.i32) -> void: +func public %multiple_preheader(v0.i1, v1.i32) -> void { block0: v2.i32 = add v1 1.i32; v3.i32 = sub v1 1.i32; @@ -52,7 +52,7 @@ func public %multiple_preheader(v0.i1, v1.i32) -> void: block5: return; - +} # regex: VALUE=\bv\d+\b # check: block0: # nextln: v2.i32 = add v1 1.i32; @@ -83,7 +83,7 @@ func public %multiple_preheader(v0.i1, v1.i32) -> void: # nextln: # nextln: block5: # nextln: return; -func public %multiple_preheader2(v0.i1, v1.i32) -> void: +func public %multiple_preheader2(v0.i1, v1.i32) -> void { block0: v2.i32 = add v1 1.i32; v3.i32 = sub v1 1.i32; @@ -109,3 +109,4 @@ func public %multiple_preheader2(v0.i1, v1.i32) -> void: block5: return; +} diff --git a/crates/filecheck/fixtures/licm/nested_loops.sntn b/crates/filecheck/fixtures/licm/nested_loops.sntn index 0a22facd..5bf227e6 100644 --- a/crates/filecheck/fixtures/licm/nested_loops.sntn +++ b/crates/filecheck/fixtures/licm/nested_loops.sntn @@ -1,32 +1,32 @@ target = "evm-ethereum-london" # check: block0: -# nextln: v5.i32 = add v0 v1; -# nextln: jump block1; +# nextln: v5.i32 = add v0 v1; +# nextln: jump block1; # nextln: -# nextln: block1: -# nextln: v2.i32 = phi (0.i32 block0) (v9 block5); -# nextln: v4.i32 = add v0 v2; -# nextln: v6.i32 = mul v4 v5; -# nextln: jump block2; +# nextln: block1: +# nextln: v2.i32 = phi (0.i32 block0) (v9 block5); +# nextln: v4.i32 = add v0 v2; +# nextln: v6.i32 = mul v4 v5; +# nextln: jump block2; # nextln: -# nextln: block2: -# nextln: v3.i32 = phi (0.i32 block1) (v7 block3); -# nextln: jump block3; +# nextln: block2: +# nextln: v3.i32 = phi (0.i32 block1) (v7 block3); +# nextln: jump block3; # nextln: -# nextln: block3: -# nextln: v7.i32 = sub v6 v3; -# nextln: v8.i1 = slt v7 10.i32; -# nextln: br v8 block4 block2; +# nextln: block3: +# nextln: v7.i32 = sub v6 v3; +# nextln: v8.i1 = slt v7 10.i32; +# nextln: br v8 block4 block2; # nextln: -# nextln: block4: -# nextln: v9.i32 = add v2 1.i32; -# nextln: v10.i1 = slt v9 100.i32; -# nextln: br v10 block1 block5; +# nextln: block4: +# nextln: v9.i32 = add v2 1.i32; +# nextln: v10.i1 = slt v9 100.i32; +# nextln: br v10 block1 block5; # nextln: -# nextln: block5: -# nextln: return; -func public %nested_loops(v0.i32, v1.i32) -> void: +# nextln: block5: +# nextln: return; +func public %nested_loops(v0.i32, v1.i32) -> void { block0: jump block1; @@ -53,3 +53,4 @@ func public %nested_loops(v0.i32, v1.i32) -> void: block5: return; +} diff --git a/crates/filecheck/fixtures/sccp/complex_loop.sntn b/crates/filecheck/fixtures/sccp/complex_loop.sntn index a418759b..8e9f1b5d 100644 --- a/crates/filecheck/fixtures/sccp/complex_loop.sntn +++ b/crates/filecheck/fixtures/sccp/complex_loop.sntn @@ -1,6 +1,6 @@ target = "evm-ethereum-london" -# sameln: func public %complex_loop() -> i8: +# sameln: func public %complex_loop() -> i8 { # nextln: block1: # nextln: jump block2; # nextln: @@ -21,7 +21,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block7: # nextln: jump block2; -func public %complex_loop() -> i8: +func public %complex_loop() -> i8 { block1: v0.i8 = add 1.i8 0.i8; v1.i8 = add v0 0.i8; @@ -53,3 +53,4 @@ func public %complex_loop() -> i8: v9.i8 = phi (v0 block5) (v4 block6); v10.i8 = phi (v7 block5) (v8 block6); jump block2; +} diff --git a/crates/filecheck/fixtures/sccp/const_branch.sntn b/crates/filecheck/fixtures/sccp/const_branch.sntn index 4ac98b7c..f23e8775 100644 --- a/crates/filecheck/fixtures/sccp/const_branch.sntn +++ b/crates/filecheck/fixtures/sccp/const_branch.sntn @@ -10,7 +10,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block3: # nextln: return 12.i8; -func public %const_branch() -> i8: +func public %const_branch() -> i8 { block0: v0.i1 = sgt -1.i8 0.i8; br v0 block1 block2; @@ -23,13 +23,14 @@ func public %const_branch() -> i8: block3: return 12.i8; +} # check: block0: # nextln: jump block2; # nextln: # nextln: block2: # nextln: return 2.i32; -func public %const_branch2() -> i32: +func public %const_branch2() -> i32 { block0: br_table 1.i32 block1 (1.i32 block2) (2.i32 block3); @@ -41,3 +42,4 @@ func public %const_branch2() -> i32: block3: return 3.i32; +} diff --git a/crates/filecheck/fixtures/sccp/const_loop.sntn b/crates/filecheck/fixtures/sccp/const_loop.sntn index 3e8fdac3..ce33b8ac 100644 --- a/crates/filecheck/fixtures/sccp/const_loop.sntn +++ b/crates/filecheck/fixtures/sccp/const_loop.sntn @@ -2,7 +2,7 @@ target = "evm-ethereum-london" -# sameln: func public %const_loop() -> i8: +# sameln: func public %const_loop() -> i8 { # nextln: block1: # nextln: jump block2; # nextln: @@ -11,7 +11,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block3: # nextln: return 11.i8; -func public %const_loop() -> i8: +func public %const_loop() -> i8 { block1: jump block2; @@ -23,3 +23,4 @@ func public %const_loop() -> i8: block3: return v2; +} diff --git a/crates/filecheck/fixtures/sccp/non_folding.sntn b/crates/filecheck/fixtures/sccp/non_folding.sntn index d434e23e..598571a3 100644 --- a/crates/filecheck/fixtures/sccp/non_folding.sntn +++ b/crates/filecheck/fixtures/sccp/non_folding.sntn @@ -2,17 +2,18 @@ target = "evm-ethereum-london" -# sameln: func public %non_folding(v0.*i64) -> void: +# sameln: func public %non_folding(v0.*i64) -> void { # nextln: block1: # nextln: v1.i64 = load @storage v0; # nextln: v2.i64 = add 1.i64 v0; # nextln: v3.i64 = add -1.i64 v1; # nextln: v4.*i64 = alloca i64; # nextln: v5.*i64 = add v4 8.i256; -func public %non_folding(v0.*i64) -> void: +func public %non_folding(v0.*i64) -> void { block1: v1.i64 = load @storage v0; v2.i64 = add 1.i64 v0; v3.i64 = add -1.i64 v1; v4.*i64 = alloca i64; - v5.*i64 = add v4 8.i256; \ No newline at end of file + v5.*i64 = add v4 8.i256; +} diff --git a/crates/filecheck/fixtures/sccp/simple_sccp.sntn b/crates/filecheck/fixtures/sccp/simple_sccp.sntn index ba5fbd65..ebafccf9 100644 --- a/crates/filecheck/fixtures/sccp/simple_sccp.sntn +++ b/crates/filecheck/fixtures/sccp/simple_sccp.sntn @@ -2,12 +2,13 @@ target = "evm-ethereum-london" -# sameln: func public %simple_fold() -> i8: +# sameln: func public %simple_fold() -> i8 { # nextln: block0: # nextln: return 30.i8; -func public %simple_fold() -> i8: +func public %simple_fold() -> i8 { block0: v0.i8 = add 10.i8 20.i8; v1.i8 = xor -1.i8 -1.i8; v2.i8 = add v0 v1; return v2; +} diff --git a/crates/filecheck/fixtures/sccp/unary.sntn b/crates/filecheck/fixtures/sccp/unary.sntn index 4fcda9ca..c50028de 100644 --- a/crates/filecheck/fixtures/sccp/unary.sntn +++ b/crates/filecheck/fixtures/sccp/unary.sntn @@ -2,12 +2,13 @@ target = "evm-ethereum-london" -# sameln: func public %unary() -> i8: +# sameln: func public %unary() -> i8 { # nextln: block0: # nextln: return 1.i8; -func public %unary() -> i8: +func public %unary() -> i8 { block0: v0.i8 = neg -10.i8; v1.i8 = not -10.i8; v2.i8 = sub v0 v1; return v2; +} diff --git a/crates/filecheck/fixtures/sccp/unreachable.sntn b/crates/filecheck/fixtures/sccp/unreachable.sntn index fae04e9b..223a7fb3 100644 --- a/crates/filecheck/fixtures/sccp/unreachable.sntn +++ b/crates/filecheck/fixtures/sccp/unreachable.sntn @@ -2,7 +2,7 @@ target = "evm-ethereum-london" -# sameln: func public %unreachable_blocks() -> i8: +# sameln: func public %unreachable_blocks() -> i8 { # nextln: block0: # nextln: jump block1; # nextln: @@ -11,7 +11,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block3: # nextln: return 3.i8; -func public %unreachable_blocks() -> i8: +func public %unreachable_blocks() -> i8 { block0: v0.i8 = add 1.i8 2.i8; br v0 block1 block2; @@ -27,3 +27,4 @@ func public %unreachable_blocks() -> i8: block4: return 10.i8; +} diff --git a/crates/filecheck/src/lib.rs b/crates/filecheck/src/lib.rs index 711e25d1..29aac49c 100644 --- a/crates/filecheck/src/lib.rs +++ b/crates/filecheck/src/lib.rs @@ -13,10 +13,7 @@ use std::{ use sonatina_ir::{ir_writer::FuncWriter, module::FuncRef, Function}; -use sonatina_parser::{ - parser::{ParsedModule, Parser}, - ErrorKind, -}; +use sonatina_parser2::{parse_module, ParsedModule}; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use walkdir::WalkDir; @@ -168,32 +165,26 @@ impl<'a> FileChecker<'a> { fn parse_file(&self) -> Result { let input = fs::read_to_string(self.file_path).unwrap(); - let parser = Parser::default(); - match parser.parse(&input) { + + match parse_module(&input) { Ok(module) => Ok(module), - Err(err) => match err.kind { - ErrorKind::InvalidToken(msg) => Err(format!( - "failed to parse file: invalid token: {}. line: {}", - msg, err.line - )), - - ErrorKind::SyntaxError(msg) => Err(format!( - "failed to parse file: invalid syntax: {}. line: {}", - msg, err.line - )), - - ErrorKind::SemanticError(msg) => Err(format!( - "failed to parse file: invalid semantics: {}. line: {}", - msg, err.line - )), - }, + Err(errs) => { + let mut v = vec![]; + for e in errs { + e.print(&mut v, self.file_path.to_str().unwrap(), &input) + .unwrap() + } + Err(String::from_utf8(v).unwrap()) + } } } fn build_checker(&self, directives: &[String]) -> filecheck::Checker { let mut builder = filecheck::CheckerBuilder::new(); for d in directives { - builder.directive(d).unwrap(); + if !builder.directive(d).unwrap() && d.contains("nextln") { + panic!("not a directive: `{}`", d); + } } builder.finish() } diff --git a/crates/interpreter/Cargo.toml b/crates/interpreter/Cargo.toml index 8101af68..62254c3f 100644 --- a/crates/interpreter/Cargo.toml +++ b/crates/interpreter/Cargo.toml @@ -14,9 +14,9 @@ keywords = ["compiler", "evm", "wasm", "smart-contract"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -byteorder = { version = "1.4.3", default-features = false } +byteorder = "1.5.0" cranelift-entity = "0.104" sonatina-ir = { path = "../ir", version = "0.0.3-alpha" } [dev-dependencies] -sonatina-parser = { path = "../parser", version = "0.0.3-alpha" } +sonatina-parser2 = { path = "../parser2" } diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs index 71f03998..4541b640 100644 --- a/crates/interpreter/src/state.rs +++ b/crates/interpreter/src/state.rs @@ -270,13 +270,22 @@ impl State { #[cfg(test)] mod test { - use sonatina_parser::parser::Parser; - use super::*; + fn parse_module(input: &str) -> Module { + match sonatina_parser2::parse_module(input) { + Ok(pm) => pm.module, + Err(errs) => { + for err in errs { + eprintln!("{}", err.print_to_string("[test]", input)); + } + panic!("parsing failed"); + } + } + } + fn parse_module_make_state(input: &str) -> State { - let parser = Parser::default(); - let module = parser.parse(input).unwrap().module; + let module = parse_module(input); let func_ref = module.iter_functions().next().unwrap(); State::new(module, func_ref, &[]) @@ -287,11 +296,12 @@ mod test { let input = " target = \"evm-ethereum-london\" - func private %test() -> i32: + func private %test() -> i32 { block0: v1.i32 = not 0.i32; v2.i32 = neg v1; return v2; + } "; let state = parse_module_make_state(input); @@ -306,13 +316,14 @@ mod test { let input = " target = \"evm-ethereum-london\" - func private %test() -> i16: + func private %test() -> i16 { block0: v0.i16 = add 3.i16 4.i16; v1.i16 = sub v0 1.i16; v2.i16 = udiv v1 2.i16; v3.i8 = sdiv v2 65535.i16; return v3; + } "; let state = parse_module_make_state(input); @@ -327,10 +338,11 @@ mod test { let input = " target = \"evm-ethereum-london\" - func private %test() -> i16: + func private %test() -> i16 { block0: v0.i16 = sext -128.i8; return v0; + } "; let state = parse_module_make_state(input); @@ -345,10 +357,11 @@ mod test { let input = " target = \"evm-ethereum-london\" - func private %test() -> i16: + func private %test() -> i16 { block0: v0.i16 = zext -128.i8; return v0; + } "; let state = parse_module_make_state(input); @@ -363,12 +376,13 @@ mod test { let input = " target = \"evm-ethereum-london\" - func private %test() -> i32: + func private %test() -> i32 { block0: v0.*i32 = alloca i32; store @memory v0 1.i32; v1.*i32 = load @memory v0; return v1; + } "; let state = parse_module_make_state(input); @@ -383,19 +397,20 @@ mod test { let input = " target = \"evm-ethereum-london\" - func public %test_callee(v0.i8) -> i8: + func public %test_callee(v0.i8) -> i8 { block0: v1.i8 = mul v0 1.i8; - return v1; + return v1; + } - func public %test() -> i8: + func public %test() -> i8 { block0: v0.i8 = call %test_callee 0.i8; return v0; + } "; - let parser = Parser::default(); - let module = parser.parse(input).unwrap().module; + let module = parse_module(input); let func_ref = module.iter_functions().nth(1).unwrap(); let state = State::new(module, func_ref, &[]); @@ -410,13 +425,14 @@ mod test { let input = " target = \"evm-ethereum-london\" - func private %test() -> i1: + func private %test() -> i1 { block0: jump block2; block1: return 1.i1; block2: return 0.i1; + } "; let state = parse_module_make_state(input); @@ -431,13 +447,14 @@ mod test { let input = " target = \"evm-ethereum-london\" - func private %test() -> i8: + func private %test() -> i8 { block0: br 1.i1 block1 block2; block1: return 1.i8; block2: return 2.i8; + } "; let state = parse_module_make_state(input); @@ -452,15 +469,16 @@ mod test { let input = " target = \"evm-ethereum-london\" - func private %test() -> i64: + func private %test() -> i64 { block0: - br_table 1.i64 undef (0.i64 block1) (1.i64 block2); + br_table 1.i64 (0.i64 block1) (1.i64 block2); block1: return 1.i64; block2: return 2.i64; block3: return 3.i64; + } "; let state = parse_module_make_state(input); @@ -475,7 +493,7 @@ mod test { let input = " target = \"evm-ethereum-london\" - func private %test() -> i8: + func private %test() -> i8 { block0: jump block1; block1: @@ -483,6 +501,7 @@ mod test { block2: v0.i8 = phi (1.i8 block0) (-1.i8 block1); return v0; + } "; let state = parse_module_make_state(input); @@ -499,11 +518,12 @@ mod test { type %s1 = {i32, i64, i1}; - func private %test() -> *i1: + func private %test() -> *i1 { block0: v0.*%s1 = alloca %s1; v1.*i1 = gep v0 2.i8; return v1; + } "; let state = parse_module_make_state(input); @@ -520,9 +540,10 @@ mod test { type %s1 = {i32, i64, i1}; - func private %test() -> void: + func private %test() -> void { block0: return; + } "; let state = parse_module_make_state(input); @@ -538,11 +559,12 @@ mod test { let input = " target = \"evm-ethereum-london\" - func private %test() -> *i1: + func private %test() -> *i1 { block0: v0.*[*i32; 3] = alloca [*i32; 3]; v1.*i32 = gep v0 2.i8; return v1; + } "; let state = parse_module_make_state(input); @@ -559,11 +581,12 @@ mod test { type %s1 = {i32, [i16; 3], [i8; 2]}; - func private %test() -> *i1: + func private %test() -> *i1 { block0: v0.*%s1 = alloca %s1; v1.*i1 = gep v0 2.i8 1.i8; return v1; + } "; let state = parse_module_make_state(input); diff --git a/crates/ir/Cargo.toml b/crates/ir/Cargo.toml index 2b719884..677b6ff4 100644 --- a/crates/ir/Cargo.toml +++ b/crates/ir/Cargo.toml @@ -17,8 +17,10 @@ keywords = ["compiler", "evm", "wasm", "smart-contract"] primitive-types = { version = "0.12", default-features = false } cranelift-entity = "0.104" smallvec = "1.7.0" -fxhash = "0.2.1" +rustc-hash = "1.1.0" dyn-clone = "1.0.4" sonatina-triple = { path = "../triple", version = "0.0.3-alpha" } indexmap = "2.0.0" dot2 = { git = "https://github.com/sanpii/dot2.rs.git" } +bimap = "0.6.3" +smol_str = "0.2.2" diff --git a/crates/ir/src/builder/func_builder.rs b/crates/ir/src/builder/func_builder.rs index 42a8c393..66ec6d11 100644 --- a/crates/ir/src/builder/func_builder.rs +++ b/crates/ir/src/builder/func_builder.rs @@ -1,10 +1,10 @@ use smallvec::SmallVec; use crate::{ - func_cursor::{CursorLocation, FuncCursor, InsnInserter}, + func_cursor::{CursorLocation, FuncCursor}, insn::{BinaryOp, CastOp, DataLocationKind, InsnData, UnaryOp}, module::FuncRef, - Block, Function, GlobalVariable, Immediate, Type, Value, + Block, Function, GlobalVariable, Immediate, Type, Value, ValueData, }; use super::{ @@ -12,35 +12,19 @@ use super::{ ModuleBuilder, }; -pub struct FunctionBuilder<'a> { - module_builder: &'a mut ModuleBuilder, +pub struct FunctionBuilder { + pub module_builder: ModuleBuilder, + pub func: Function, func_ref: FuncRef, - loc: CursorLocation, + pub cursor: C, ssa_builder: SsaBuilder, -} - -macro_rules! impl_unary_insn { - ($name:ident, $code:path) => { - pub fn $name(&mut self, lhs: Value) -> Value { - let insn_data = InsnData::Unary { - code: $code, - args: [lhs], - }; - - self.insert_insn(insn_data).unwrap() - } - }; + undefined: Vec, } macro_rules! impl_binary_insn { ($name:ident, $code:path) => { pub fn $name(&mut self, lhs: Value, rhs: Value) -> Value { - let insn_data = InsnData::Binary { - code: $code, - args: [lhs, rhs], - }; - - self.insert_insn(insn_data).unwrap() + self.binary_op($code, lhs, rhs) } }; } @@ -48,47 +32,109 @@ macro_rules! impl_binary_insn { macro_rules! impl_cast_insn { ($name:ident, $code:path) => { pub fn $name(&mut self, lhs: Value, ty: Type) -> Value { - let insn_data = InsnData::Cast { - code: $code, - args: [lhs], - ty, - }; - - self.insert_insn(insn_data).unwrap() + self.cast_op($code, lhs, ty) } }; } -impl<'a> FunctionBuilder<'a> { - pub fn new(module_builder: &'a mut ModuleBuilder, func_ref: FuncRef) -> Self { +impl FunctionBuilder +where + C: FuncCursor, +{ + pub fn new(module_builder: ModuleBuilder, func_ref: FuncRef, cursor: C) -> Self { + let func = module_builder.funcs[func_ref].clone(); Self { module_builder, + func, func_ref, - loc: CursorLocation::NoWhere, + cursor, ssa_builder: SsaBuilder::new(), + undefined: Default::default(), } } + pub fn finish(self) -> ModuleBuilder { + if cfg!(debug_assertions) { + for block in self.func.layout.iter_block() { + debug_assert!(self.is_sealed(block), "all blocks must be sealed"); + } + } + + let Self { + mut module_builder, + func, + func_ref, + undefined, + .. + } = self; + + debug_assert!(undefined.is_empty()); // xxx + + module_builder.funcs[func_ref] = func; + module_builder + } + + pub fn append_parameter(&mut self, ty: Type) -> Value { + let idx = self.func.arg_values.len(); + + let value_data = self.func.dfg.make_arg_value(ty, idx); + let value = self.func.dfg.make_value(value_data); + self.func.arg_values.push(value); + value + } + pub fn append_block(&mut self) -> Block { - let block = self.cursor().make_block(); - self.cursor().append_block(block); + let block = self.cursor.make_block(&mut self.func); + self.cursor.append_block(&mut self.func, block); block } pub fn switch_to_block(&mut self, block: Block) { - self.loc = CursorLocation::BlockBottom(block); + self.cursor.set_location(CursorLocation::BlockBottom(block)); + } + + pub fn name_value(&mut self, value: Value, name: &str) { + if let Some(v) = self.func.value_names.get_by_right(name) { + if let Some(pos) = self.undefined.iter().position(|u| u == v) { + self.func.dfg.change_to_alias(*v, value); + // self.func.dfg.values[*v] = ValueData::Alias { alias: value }; + self.undefined.remove(pos); + } else { + panic!("value names must be unique"); + } + } + self.func.value_names.insert(value, name.into()); + } + + pub fn get_named_value(&mut self, name: &str) -> Value { + if let Some(v) = self.func.value_names.get_by_right(name).copied() { + v + } else { + let v = self.func.dfg.make_value(ValueData::Immediate { + imm: Immediate::I128(424242), + ty: Type::I128, + }); + + self.undefined.push(v); + self.name_value(v, name); + v + } } pub fn make_imm_value(&mut self, imm: Imm) -> Value where Imm: Into, { - self.func_mut().dfg.make_imm_value(imm) + self.func.dfg.make_imm_value(imm) } /// Return pointer value to the global variable. pub fn make_global_value(&mut self, gv: GlobalVariable) -> Value { - self.func_mut().dfg.make_global_value(gv) + self.func.dfg.make_global_value(gv) + } + + pub fn ptr_type(&mut self, ty: Type) -> Type { + self.module_builder.ptr_type(ty) } pub fn declare_array_type(&mut self, elem: Type, len: usize) -> Type { @@ -100,8 +146,29 @@ impl<'a> FunctionBuilder<'a> { .declare_struct_type(name, fields, packed) } - impl_unary_insn!(not, UnaryOp::Not); - impl_unary_insn!(neg, UnaryOp::Neg); + pub fn unary_op(&mut self, op: UnaryOp, lhs: Value) -> Value { + let insn_data = InsnData::Unary { + code: op, + args: [lhs], + }; + self.insert_insn(insn_data).unwrap() + } + + pub fn not(&mut self, lhs: Value) -> Value { + self.unary_op(UnaryOp::Not, lhs) + } + + pub fn neg(&mut self, lhs: Value) -> Value { + self.unary_op(UnaryOp::Neg, lhs) + } + + pub fn binary_op(&mut self, op: BinaryOp, lhs: Value, rhs: Value) -> Value { + let insn_data = InsnData::Binary { + code: op, + args: [lhs, rhs], + }; + self.insert_insn(insn_data).unwrap() + } impl_binary_insn!(add, BinaryOp::Add); impl_binary_insn!(sub, BinaryOp::Sub); @@ -121,58 +188,51 @@ impl<'a> FunctionBuilder<'a> { impl_binary_insn!(and, BinaryOp::And); impl_binary_insn!(or, BinaryOp::Or); + pub fn cast_op(&mut self, op: CastOp, value: Value, ty: Type) -> Value { + let insn_data = InsnData::Cast { + code: op, + args: [value], + ty, + }; + self.insert_insn(insn_data).unwrap() + } + impl_cast_insn!(sext, CastOp::Sext); impl_cast_insn!(zext, CastOp::Zext); impl_cast_insn!(trunc, CastOp::Trunc); impl_cast_insn!(bitcast, CastOp::BitCast); - /// Build memory load instruction. - pub fn memory_load(&mut self, addr: Value) -> Value { - let insn_data = InsnData::Load { - args: [addr], - loc: DataLocationKind::Memory, - }; + pub fn load(&mut self, loc: DataLocationKind, addr: Value) -> Value { + let insn_data = InsnData::Load { args: [addr], loc }; self.insert_insn(insn_data).unwrap() } - /// Build memory store instruction. - pub fn memory_store(&mut self, addr: Value, data: Value) { + pub fn store(&mut self, loc: DataLocationKind, addr: Value, data: Value) { let insn_data = InsnData::Store { args: [addr, data], - loc: DataLocationKind::Memory, + loc, }; self.insert_insn(insn_data); } - /// Build storage load instruction. - pub fn storage_load(&mut self, addr: Value) -> Value { - let insn_data = InsnData::Load { - args: [addr], - loc: DataLocationKind::Storage, - }; - self.insert_insn(insn_data).unwrap() + /// Build memory load instruction. + pub fn memory_load(&mut self, addr: Value) -> Value { + self.load(DataLocationKind::Memory, addr) } - pub fn call(&mut self, func: FuncRef, args: &[Value]) -> Value { - let sig = self.module_builder.funcs[func].sig.clone(); - let ret_ty = sig.ret_ty(); - self.func_mut().callees.insert(func, sig); + /// Build memory store instruction. + pub fn memory_store(&mut self, addr: Value, data: Value) { + self.store(DataLocationKind::Memory, addr, data) + } - let insn_data = InsnData::Call { - func, - args: args.into(), - ret_ty, - }; - self.insert_insn(insn_data).unwrap() + /// Build storage load instruction. + pub fn storage_load(&mut self, addr: Value) -> Value { + self.load(DataLocationKind::Storage, addr) } /// Build storage store instruction. pub fn storage_store(&mut self, addr: Value, data: Value) { - let insn_data = InsnData::Store { - args: [addr, data], - loc: DataLocationKind::Storage, - }; - self.insert_insn(insn_data); + self.store(DataLocationKind::Storage, addr, data) } /// Build alloca instruction. @@ -185,7 +245,7 @@ impl<'a> FunctionBuilder<'a> { debug_assert!(!self.ssa_builder.is_sealed(dest)); let insn_data = InsnData::Jump { dests: [dest] }; - let pred = self.cursor().block(); + let pred = self.cursor.block(&self.func); self.ssa_builder.append_pred(dest, pred.unwrap()); self.insert_insn(insn_data); } @@ -214,7 +274,7 @@ impl<'a> FunctionBuilder<'a> { default, table: blocks, }; - let block = self.cursor().block().unwrap(); + let block = self.cursor.block(&self.func).unwrap(); if let Some(default) = default { self.ssa_builder.append_pred(default, block); @@ -234,12 +294,23 @@ impl<'a> FunctionBuilder<'a> { dests: [then, else_], }; - let block = self.cursor().block().unwrap(); + let block = self.cursor.block(&self.func).unwrap(); self.ssa_builder.append_pred(then, block); self.ssa_builder.append_pred(else_, block); self.insert_insn(insn_data); } + pub fn call(&mut self, func: FuncRef, args: &[Value]) -> Option { + let sig = self.module_builder.get_sig(func).clone(); + let insn_data = InsnData::Call { + func, + args: args.into(), + ret_ty: sig.ret_ty(), + }; + self.func.callees.insert(func, sig); + self.insert_insn(insn_data) + } + pub fn ret(&mut self, args: Option) { let insn_data = InsnData::Return { args }; self.insert_insn(insn_data); @@ -250,8 +321,7 @@ impl<'a> FunctionBuilder<'a> { self.insert_insn(insn_data) } - pub fn phi(&mut self, args: &[(Value, Block)]) -> Value { - let ty = self.func().dfg.value_ty(args[0].0); + pub fn phi(&mut self, ty: Type, args: &[(Value, Block)]) -> Value { let insn_data = InsnData::Phi { values: args.iter().map(|(val, _)| *val).collect(), blocks: args.iter().map(|(_, block)| *block).collect(), @@ -262,15 +332,15 @@ impl<'a> FunctionBuilder<'a> { pub fn append_phi_arg(&mut self, phi_value: Value, value: Value, block: Block) { let insn = self - .func() + .func .dfg .value_insn(phi_value) .expect("value must be the result of phi function"); debug_assert!( - self.func().dfg.is_phi(insn), + self.func.dfg.is_phi(insn), "value must be the result of phi function" ); - self.func_mut().dfg.append_phi_arg(insn, value, block); + self.func.dfg.append_phi_arg(insn, value, block); } pub fn declare_var(&mut self, ty: Type) -> Variable { @@ -278,51 +348,36 @@ impl<'a> FunctionBuilder<'a> { } pub fn use_var(&mut self, var: Variable) -> Value { - let block = self.cursor().block().unwrap(); - self.ssa_builder - .use_var(&mut self.module_builder.funcs[self.func_ref], var, block) + let block = self.cursor.block(&self.func).unwrap(); + self.ssa_builder.use_var(&mut self.func, var, block) } pub fn def_var(&mut self, var: Variable, value: Value) { - debug_assert_eq!( - self.module_builder.funcs[self.func_ref].dfg.value_ty(value), - self.ssa_builder.var_ty(var) - ); + debug_assert_eq!(self.func.dfg.value_ty(value), self.ssa_builder.var_ty(var)); - let block = self.cursor().block().unwrap(); + let block = self.cursor.block(&self.func).unwrap(); self.ssa_builder.def_var(var, value, block); } pub fn seal_block(&mut self) { - let block = self.cursor().block().unwrap(); - self.ssa_builder - .seal_block(&mut self.module_builder.funcs[self.func_ref], block); + let block = self.cursor.block(&self.func).unwrap(); + self.ssa_builder.seal_block(&mut self.func, block); } pub fn seal_all(&mut self) { - self.ssa_builder - .seal_all(&mut self.module_builder.funcs[self.func_ref]); + self.ssa_builder.seal_all(&mut self.func); } pub fn is_sealed(&self, block: Block) -> bool { self.ssa_builder.is_sealed(block) } - pub fn finish(self) -> FuncRef { - if cfg!(debug_assertions) { - for block in self.func().layout.iter_block() { - debug_assert!(self.is_sealed(block), "all blocks must be sealed"); - } - } - self.func_ref - } - pub fn type_of(&self, value: Value) -> Type { - self.func().dfg.value_ty(value) + self.func.dfg.value_ty(value) } pub fn args(&self) -> &[Value] { - &self.func().arg_values + &self.func.arg_values } pub fn address_type(&self) -> Type { @@ -337,28 +392,19 @@ impl<'a> FunctionBuilder<'a> { self.module_builder.ctx.isa.type_provider().gas_type() } - fn cursor(&mut self) -> InsnInserter { - InsnInserter::new(&mut self.module_builder.funcs[self.func_ref], self.loc) - } + // fn cursor(&mut self) -> InsnInserter { + // InsnInserter::new(&mut self.func, self.loc) + // } fn insert_insn(&mut self, insn_data: InsnData) -> Option { - let mut cursor = self.cursor(); - let insn = cursor.insert_insn_data(insn_data); - let result = cursor.make_result(insn); + let insn = self.cursor.insert_insn_data(&mut self.func, insn_data); + let result = self.cursor.make_result(&mut self.func, insn); if let Some(result) = result { - cursor.attach_result(insn, result); + self.cursor.attach_result(&mut self.func, insn, result); } - self.loc = CursorLocation::At(insn); + self.cursor.set_location(CursorLocation::At(insn)); result } - - fn func(&self) -> &Function { - &self.module_builder.funcs[self.func_ref] - } - - fn func_mut(&mut self) -> &mut Function { - &mut self.module_builder.funcs[self.func_ref] - } } #[cfg(test)] @@ -367,8 +413,7 @@ mod tests { #[test] fn entry_block() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let b0 = builder.append_block(); builder.switch_to_block(b0); @@ -379,25 +424,25 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); assert_eq!( dump_func(&module.funcs[func_ref]), - "func public %test_func() -> void: + "func public %test_func() -> void { block0: v2.i8 = add 1.i8 2.i8; v3.i8 = sub v2 1.i8; return; +} " ); } #[test] fn entry_block_with_args() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[Type::I32, Type::I64], Type::Void); + let mut builder = test_func_builder(&[Type::I32, Type::I64], Type::Void); let entry_block = builder.append_block(); builder.switch_to_block(entry_block); @@ -409,25 +454,25 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); assert_eq!( dump_func(&module.funcs[func_ref]), - "func public %test_func(v0.i32, v1.i64) -> void: + "func public %test_func(v0.i32, v1.i64) -> void { block0: v2.i64 = sext v0; v3.i64 = mul v2 v1; return; +} " ); } #[test] fn entry_block_with_return() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::I32); + let mut builder = test_func_builder(&[], Type::I32); let entry_block = builder.append_block(); @@ -435,23 +480,23 @@ mod tests { let v0 = builder.make_imm_value(1i32); builder.ret(Some(v0)); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); assert_eq!( dump_func(&module.funcs[func_ref]), - "func public %test_func() -> i32: + "func public %test_func() -> i32 { block0: return 1.i32; +} " ); } #[test] fn then_else_merge_block() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[Type::I64], Type::Void); + let mut builder = test_func_builder(&[Type::I64], Type::Void); let entry_block = builder.append_block(); let then_block = builder.append_block(); @@ -472,17 +517,17 @@ mod tests { builder.jump(merge_block); builder.switch_to_block(merge_block); - let v3 = builder.phi(&[(v1, then_block), (v2, else_block)]); + let v3 = builder.phi(Type::I64, &[(v1, then_block), (v2, else_block)]); builder.add(v3, arg0); builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); assert_eq!( dump_func(&module.funcs[func_ref]), - "func public %test_func(v0.i64) -> void: + "func public %test_func(v0.i64) -> void { block0: br v0 block1 block2; @@ -497,6 +542,7 @@ mod tests { v4.i64 = add v3 v0; return; +} " ); } diff --git a/crates/ir/src/builder/mod.rs b/crates/ir/src/builder/mod.rs index 37c46436..76a9c95a 100644 --- a/crates/ir/src/builder/mod.rs +++ b/crates/ir/src/builder/mod.rs @@ -13,9 +13,10 @@ pub mod test_util { use sonatina_triple::TargetTriple; use crate::{ + func_cursor::InsnInserter, ir_writer::FuncWriter, isa::{IsaBuilder, TargetIsa}, - module::{FuncRef, Module, ModuleCtx}, + module::ModuleCtx, Function, Linkage, Signature, Type, }; @@ -24,40 +25,17 @@ pub mod test_util { IsaBuilder::new(triple).build() } - pub struct TestModuleBuilder { - module_builder: ModuleBuilder, - func_ref: Option, - } + pub fn test_func_builder(args: &[Type], ret_ty: Type) -> FunctionBuilder { + let ctx = ModuleCtx::new(build_test_isa()); + let mut mb = ModuleBuilder::new(ctx); - impl TestModuleBuilder { - pub fn new() -> Self { - Self::default() - } - - pub fn func_builder(&mut self, args: &[Type], ret_ty: Type) -> FunctionBuilder { - let sig = Signature::new("test_func", Linkage::Public, args, ret_ty); - let func_ref = self.module_builder.declare_function(sig); - self.func_ref = Some(func_ref); - self.module_builder.func_builder(func_ref) - } - - pub fn build(self) -> Module { - self.module_builder.build() - } + let sig = Signature::new("test_func", Linkage::Public, args, ret_ty); + let func_ref = mb.declare_function(sig); + mb.build_function(func_ref) } pub fn dump_func(func: &Function) -> String { let mut writer = FuncWriter::new(func); writer.dump_string().unwrap() } - - impl Default for TestModuleBuilder { - fn default() -> Self { - let ctx = ModuleCtx::new(build_test_isa()); - Self { - module_builder: ModuleBuilder::new(ctx), - func_ref: None, - } - } - } } diff --git a/crates/ir/src/builder/module_builder.rs b/crates/ir/src/builder/module_builder.rs index c3c84a48..c6db2dd3 100644 --- a/crates/ir/src/builder/module_builder.rs +++ b/crates/ir/src/builder/module_builder.rs @@ -1,7 +1,8 @@ use cranelift_entity::PrimaryMap; -use fxhash::FxHashMap; +use rustc_hash::FxHashMap; use crate::{ + func_cursor::{CursorLocation, FuncCursor}, module::{FuncRef, ModuleCtx}, Function, GlobalVariable, GlobalVariableData, Module, Signature, Type, }; @@ -43,10 +44,6 @@ impl ModuleBuilder { &self.funcs[func].sig } - pub fn get_func(&self, func: &str) -> Option { - self.declared_funcs.get(func).copied() - } - pub fn make_global(&self, global: GlobalVariableData) -> GlobalVariable { self.ctx.with_gv_store_mut(|s| s.make_gv(global)) } @@ -64,6 +61,10 @@ impl ModuleBuilder { self.ctx.with_ty_store_mut(|s| s.make_array(elem, len)) } + pub fn ptr_type(&mut self, ty: Type) -> Type { + self.ctx.with_ty_store_mut(|s| s.make_ptr(ty)) + } + pub fn get_func_ref(&self, name: &str) -> Option { self.declared_funcs.get(name).copied() } @@ -72,8 +73,12 @@ impl ModuleBuilder { &self.funcs[func].sig } - pub fn func_builder(&mut self, func: FuncRef) -> FunctionBuilder { - FunctionBuilder::new(self, func) + pub fn build_function(self, func: FuncRef) -> FunctionBuilder + where + C: FuncCursor, + { + let cursor = C::at_location(CursorLocation::NoWhere); + FunctionBuilder::new(self, func, cursor) } pub fn build(self) -> Module { diff --git a/crates/ir/src/builder/ssa.rs b/crates/ir/src/builder/ssa.rs index 6f95e4da..7cdc4159 100644 --- a/crates/ir/src/builder/ssa.rs +++ b/crates/ir/src/builder/ssa.rs @@ -131,7 +131,7 @@ impl SsaBuilder { func.dfg.change_to_alias(phi_value, first); self.trivial_phis.insert(phi); - InsnInserter::new(func, CursorLocation::At(phi)).remove_insn(); + InsnInserter::at_location(CursorLocation::At(phi)).remove_insn(func); for i in 0..func.dfg.users_num(phi_value) { let user = func.dfg.user(phi_value, i); @@ -148,11 +148,11 @@ impl SsaBuilder { blocks: SmallVec::new(), ty, }; - let mut cursor = InsnInserter::new(func, CursorLocation::BlockTop(block)); - let insn = cursor.prepend_insn_data(insn_data); - let value = cursor.make_result(insn); + let mut cursor = InsnInserter::at_location(CursorLocation::BlockTop(block)); + let insn = cursor.prepend_insn_data(func, insn_data); + let value = cursor.make_result(func, insn); if let Some(value) = value { - cursor.attach_result(insn, value); + cursor.attach_result(func, insn, value); } (insn, value.unwrap()) } @@ -213,8 +213,7 @@ mod tests { #[test] fn use_var_local() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let var = builder.declare_var(Type::I32); @@ -226,26 +225,26 @@ mod tests { builder.add(v1, v0); builder.ret(None); builder.seal_block(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; assert_eq!( dump_func(func), - "func public %test_func() -> void: + "func public %test_func() -> void { block0: v1.i32 = add 1.i32 1.i32; return; +} " ); } #[test] fn use_var_global_if() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let var = builder.declare_var(Type::I32); @@ -275,14 +274,14 @@ mod tests { builder.use_var(var); builder.ret(None); builder.seal_block(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; assert_eq!( dump_func(func), - "func public %test_func() -> void: + "func public %test_func() -> void { block0: br 1.i32 block2 block1; @@ -296,14 +295,14 @@ mod tests { v3.i32 = phi (2.i32 block1) (3.i32 block2); return; +} " ); } #[test] fn use_var_global_many_preds() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let var0 = builder.declare_var(Type::I32); let var1 = builder.declare_var(Type::I32); @@ -356,14 +355,14 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; assert_eq!( dump_func(func), - "func public %test_func() -> void: + "func public %test_func() -> void { block0: br 0.i32 block1 block2; @@ -391,14 +390,14 @@ mod tests { v7.i32 = add v5 v6; return; +} " ) } #[test] fn use_var_global_loop() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let var = builder.declare_var(Type::I32); @@ -430,14 +429,14 @@ mod tests { builder.add(val, val); builder.ret(None); builder.seal_block(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; assert_eq!( dump_func(func), - "func public %test_func() -> void: + "func public %test_func() -> void { block0: jump block1; @@ -452,14 +451,14 @@ mod tests { v3.i32 = add v4 v4; return; +} " ); } #[test] fn use_var_global_complex() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let var = builder.declare_var(Type::I32); @@ -506,14 +505,14 @@ mod tests { builder.add(val, val); builder.ret(None); builder.seal_block(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; assert_eq!( dump_func(func), - "func public %test_func() -> void: + "func public %test_func() -> void { block0: jump block1; @@ -538,14 +537,14 @@ mod tests { v3.i32 = add v4 v4; return; +} " ); } #[test] fn use_var_global_complex_seal_all() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let var = builder.declare_var(Type::I32); @@ -587,14 +586,14 @@ mod tests { builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; assert_eq!( dump_func(func), - "func public %test_func() -> void: + "func public %test_func() -> void { block0: jump block1; @@ -619,14 +618,14 @@ mod tests { v3.i32 = add v4 v4; return; +} " ); } #[test] fn br_table() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[Type::I32], Type::I32); + let mut builder = test_func_builder(&[Type::I32], Type::I32); let var = builder.declare_var(Type::I32); let b0 = builder.append_block(); @@ -666,14 +665,14 @@ mod tests { builder.ret(ret.into()); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let func = &module.funcs[func_ref]; assert_eq!( dump_func(func), - "func public %test_func(v0.i32) -> i32: + "func public %test_func(v0.i32) -> i32 { block0: br_table v0 block4 (1.i32 block1) (2.i32 block2) (3.i32 block3); @@ -690,6 +689,7 @@ mod tests { v5.i32 = phi (0.i32 block0) (1.i32 block1) (2.i32 block2) (3.i32 block3); return v5; +} " ) } @@ -697,8 +697,7 @@ mod tests { #[test] #[should_panic] fn undef_use() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let var = builder.declare_var(Type::I32); let b1 = builder.append_block(); @@ -710,8 +709,7 @@ mod tests { #[test] #[should_panic] fn unreachable_use() { - let mut test_module_builder = TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[], Type::Void); + let mut builder = test_func_builder(&[], Type::Void); let var = builder.declare_var(Type::I32); let b1 = builder.append_block(); diff --git a/crates/ir/src/dfg.rs b/crates/ir/src/dfg.rs index ce337cc5..05fbd8b3 100644 --- a/crates/ir/src/dfg.rs +++ b/crates/ir/src/dfg.rs @@ -2,7 +2,7 @@ use std::collections::BTreeSet; use cranelift_entity::{entity_impl, packed_option::PackedOption, PrimaryMap, SecondaryMap}; -use fxhash::FxHashMap; +use rustc_hash::FxHashMap; use crate::{global_variable::ConstantValue, module::ModuleCtx, GlobalVariable}; diff --git a/crates/ir/src/func_cursor.rs b/crates/ir/src/func_cursor.rs index 86cdab9c..808052f2 100644 --- a/crates/ir/src/func_cursor.rs +++ b/crates/ir/src/func_cursor.rs @@ -1,91 +1,122 @@ use super::{Block, Function, Insn, InsnData, Value}; -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] pub enum CursorLocation { At(Insn), BlockTop(Block), BlockBottom(Block), + #[default] NoWhere, } pub trait FuncCursor { - fn set_loc(&mut self, loc: CursorLocation); - fn func(&self) -> &Function; - fn func_mut(&mut self) -> &mut Function; + fn at_location(loc: CursorLocation) -> Self; + fn set_location(&mut self, loc: CursorLocation); fn loc(&self) -> CursorLocation; - fn set_to_entry(&mut self) { - let loc = if let Some(entry) = self.func().layout.entry_block() { + fn set_to_entry(&mut self, func: &Function) { + let loc = if let Some(entry) = func.layout.entry_block() { CursorLocation::BlockTop(entry) } else { CursorLocation::NoWhere }; - self.set_loc(loc); + self.set_location(loc); } - fn insert_insn(&mut self, insn: Insn) { + fn insert_insn(&mut self, func: &mut Function, insn: Insn) { match self.loc() { - CursorLocation::At(at) => self.func_mut().layout.insert_insn_after(insn, at), - CursorLocation::BlockTop(block) => self.func_mut().layout.prepend_insn(insn, block), - CursorLocation::BlockBottom(block) => self.func_mut().layout.append_insn(insn, block), + CursorLocation::At(at) => func.layout.insert_insn_after(insn, at), + CursorLocation::BlockTop(block) => func.layout.prepend_insn(insn, block), + CursorLocation::BlockBottom(block) => func.layout.append_insn(insn, block), CursorLocation::NoWhere => panic!("cursor loc points to `NoWhere`"), } } - fn append_insn(&mut self, insn: Insn) { - let current_block = self.expect_block(); - self.func_mut().layout.append_insn(insn, current_block); + fn append_insn(&mut self, func: &mut Function, insn: Insn) { + let current_block = self.expect_block(func); + func.layout.append_insn(insn, current_block); } - fn prepend_insn(&mut self, insn: Insn) { - let current_block = self.expect_block(); - self.func_mut().layout.prepend_insn(insn, current_block); + fn prepend_insn(&mut self, func: &mut Function, insn: Insn) { + let current_block = self.expect_block(func); + func.layout.prepend_insn(insn, current_block); } - fn replace(&mut self, insn_data: InsnData) { + fn insert_insn_data(&mut self, func: &mut Function, data: InsnData) -> Insn { + let insn = func.dfg.make_insn(data); + self.insert_insn(func, insn); + insn + } + + fn append_insn_data(&mut self, func: &mut Function, data: InsnData) -> Insn { + let insn = func.dfg.make_insn(data); + self.append_insn(func, insn); + insn + } + + fn prepend_insn_data(&mut self, func: &mut Function, data: InsnData) -> Insn { + let insn = func.dfg.make_insn(data); + self.prepend_insn(func, insn); + insn + } + + fn replace(&mut self, func: &mut Function, insn_data: InsnData) { let insn = self.expect_insn(); - self.func_mut().dfg.replace_insn(insn, insn_data); + func.dfg.replace_insn(insn, insn_data); } - fn remove_insn(&mut self) { + fn remove_insn(&mut self, func: &mut Function) { let insn = self.expect_insn(); - let next_loc = self.next_loc(); + let next_loc = self.next_loc(func); - for idx in 0..self.func().dfg.insn_args_num(insn) { - let arg = self.func().dfg.insn_arg(insn, idx); - self.func_mut().dfg.remove_user(arg, insn); + for idx in 0..func.dfg.insn_args_num(insn) { + let arg = func.dfg.insn_arg(insn, idx); + func.dfg.remove_user(arg, insn); } - self.func_mut().layout.remove_insn(insn); + func.layout.remove_insn(insn); - self.set_loc(next_loc); + self.set_location(next_loc); } - fn remove_block(&mut self) { + fn make_result(&mut self, func: &mut Function, insn: Insn) -> Option { + let value_data = func.dfg.make_result(insn)?; + Some(func.dfg.make_value(value_data)) + } + + fn attach_result(&mut self, func: &mut Function, insn: Insn, value: Value) { + func.dfg.attach_result(insn, value) + } + + fn make_block(&mut self, func: &mut Function) -> Block { + func.dfg.make_block() + } + + fn remove_block(&mut self, func: &mut Function) { let block = match self.loc() { - CursorLocation::At(insn) => self.func_mut().layout.insn_block(insn), + CursorLocation::At(insn) => func.layout.insn_block(insn), CursorLocation::BlockTop(block) | CursorLocation::BlockBottom(block) => block, CursorLocation::NoWhere => panic!("cursor loc points `NoWhere`"), }; // Store next block of the current block for later use. - let next_block = self.func().layout.next_block_of(block); + let next_block = func.layout.next_block_of(block); // Remove all insns in the current block. - if let Some(first_insn) = self.func().layout.first_insn_of(block) { - self.set_loc(CursorLocation::At(first_insn)); + if let Some(first_insn) = func.layout.first_insn_of(block) { + self.set_location(CursorLocation::At(first_insn)); while matches!(self.loc(), CursorLocation::At(..)) { - self.remove_insn(); + self.remove_insn(func); } } // Remove current block. - self.func_mut().layout.remove_block(block); + func.layout.remove_block(block); // Set cursor location to next block if exists. if let Some(next_block) = next_block { - self.set_loc(CursorLocation::BlockTop(next_block)) + self.set_location(CursorLocation::BlockTop(next_block)) } else { - self.set_loc(CursorLocation::NoWhere) + self.set_location(CursorLocation::NoWhere) } } @@ -102,51 +133,49 @@ pub trait FuncCursor { .expect("current cursor location doesn't point to insn") } - fn block(&self) -> Option { + fn block(&self, func: &Function) -> Option { match self.loc() { - CursorLocation::At(insn) => Some(self.func().layout.insn_block(insn)), + CursorLocation::At(insn) => Some(func.layout.insn_block(insn)), CursorLocation::BlockTop(block) | CursorLocation::BlockBottom(block) => Some(block), CursorLocation::NoWhere => None, } } - fn expect_block(&self) -> Block { - self.block().expect("cursor loc points to `NoWhere`") + fn expect_block(&self, func: &Function) -> Block { + self.block(func).expect("cursor loc points to `NoWhere`") } - fn insert_block(&mut self, block: Block) { - if let Some(current) = self.block() { - self.func_mut().layout.insert_block_after(block, current) + fn insert_block(&mut self, func: &mut Function, block: Block) { + if let Some(current) = self.block(func) { + func.layout.insert_block_after(block, current) } else { panic!("cursor loc points `NoWhere`") } } - fn insert_block_before(&mut self, block: Block) { - if let Some(current) = self.block() { - self.func_mut().layout.insert_block_before(block, current) + fn insert_block_before(&mut self, func: &mut Function, block: Block) { + if let Some(current) = self.block(func) { + func.layout.insert_block_before(block, current) } else { panic!("cursor loc points `NoWhere`") } } - fn append_block(&mut self, block: Block) { - self.func_mut().layout.append_block(block); + fn append_block(&mut self, func: &mut Function, block: Block) { + func.layout.append_block(block); } - fn next_loc(&self) -> CursorLocation { + fn next_loc(&self, func: &Function) -> CursorLocation { match self.loc() { - CursorLocation::At(insn) => self.func().layout.next_insn_of(insn).map_or_else( - || CursorLocation::BlockBottom(self.func().layout.insn_block(insn)), + CursorLocation::At(insn) => func.layout.next_insn_of(insn).map_or_else( + || CursorLocation::BlockBottom(func.layout.insn_block(insn)), CursorLocation::At, ), - CursorLocation::BlockTop(block) => self - .func() + CursorLocation::BlockTop(block) => func .layout .first_insn_of(block) .map_or_else(|| CursorLocation::BlockBottom(block), CursorLocation::At), - CursorLocation::BlockBottom(block) => self - .func() + CursorLocation::BlockBottom(block) => func .layout .next_block_of(block) .map_or(CursorLocation::NoWhere, |next_block| { @@ -156,21 +185,19 @@ pub trait FuncCursor { } } - fn prev_loc(&self) -> CursorLocation { + fn prev_loc(&self, func: &Function) -> CursorLocation { match self.loc() { - CursorLocation::At(insn) => self.func().layout.prev_insn_of(insn).map_or_else( - || CursorLocation::BlockTop(self.func().layout.insn_block(insn)), + CursorLocation::At(insn) => func.layout.prev_insn_of(insn).map_or_else( + || CursorLocation::BlockTop(func.layout.insn_block(insn)), CursorLocation::At, ), - CursorLocation::BlockTop(block) => self - .func() + CursorLocation::BlockTop(block) => func .layout .prev_block_of(block) .map_or(CursorLocation::NoWhere, |prev_block| { CursorLocation::BlockBottom(prev_block) }), - CursorLocation::BlockBottom(block) => self - .func() + CursorLocation::BlockBottom(block) => func .layout .last_insn_of(block) .map_or_else(|| CursorLocation::BlockTop(block), CursorLocation::At), @@ -178,91 +205,49 @@ pub trait FuncCursor { } } - fn proceed(&mut self) { - self.set_loc(self.next_loc()); + fn proceed(&mut self, func: &Function) { + self.set_location(self.next_loc(func)); } - fn proceed_block(&mut self) { - let loc = if let Some(block) = self.next_block() { + fn proceed_block(&mut self, func: &mut Function) { + let loc = if let Some(block) = self.next_block(func) { CursorLocation::BlockTop(block) } else { CursorLocation::NoWhere }; - self.set_loc(loc) + self.set_location(loc) } - fn back(&mut self) { - self.set_loc(self.prev_loc()); + fn back(&mut self, func: &Function) { + self.set_location(self.prev_loc(func)); } - fn next_block(&self) -> Option { - let block = self.block()?; - self.func().layout.next_block_of(block) + fn next_block(&self, func: &Function) -> Option { + let block = self.block(func)?; + func.layout.next_block_of(block) } - fn prev_block(&self) -> Option { - let block = self.block()?; - self.func().layout.prev_block_of(block) + fn prev_block(&self, func: &Function) -> Option { + let block = self.block(func)?; + func.layout.prev_block_of(block) } } #[derive(Debug)] -pub struct InsnInserter<'a> { - func: &'a mut Function, +pub struct InsnInserter { loc: CursorLocation, } -impl<'a> FuncCursor for InsnInserter<'a> { - fn set_loc(&mut self, loc: CursorLocation) { - self.loc = loc; - } - - fn func(&self) -> &Function { - self.func +impl FuncCursor for InsnInserter { + fn at_location(loc: CursorLocation) -> Self { + Self { loc } } - fn func_mut(&mut self) -> &mut Function { - self.func + fn set_location(&mut self, loc: CursorLocation) { + self.loc = loc; } fn loc(&self) -> CursorLocation { self.loc } } - -impl<'a> InsnInserter<'a> { - pub fn new(func: &'a mut Function, loc: CursorLocation) -> Self { - Self { func, loc } - } - - pub fn insert_insn_data(&mut self, data: InsnData) -> Insn { - let insn = self.func.dfg.make_insn(data); - self.insert_insn(insn); - insn - } - - pub fn append_insn_data(&mut self, data: InsnData) -> Insn { - let insn = self.func.dfg.make_insn(data); - self.append_insn(insn); - insn - } - - pub fn prepend_insn_data(&mut self, data: InsnData) -> Insn { - let insn = self.func.dfg.make_insn(data); - self.prepend_insn(insn); - insn - } - - pub fn make_result(&mut self, insn: Insn) -> Option { - let value_data = self.func.dfg.make_result(insn)?; - Some(self.func.dfg.make_value(value_data)) - } - - pub fn attach_result(&mut self, insn: Insn, value: Value) { - self.func.dfg.attach_result(insn, value) - } - - pub fn make_block(&mut self) -> Block { - self.func.dfg.make_block() - } -} diff --git a/crates/ir/src/function.rs b/crates/ir/src/function.rs index df0bbf77..ce9615e3 100644 --- a/crates/ir/src/function.rs +++ b/crates/ir/src/function.rs @@ -1,22 +1,27 @@ -use std::fmt::{self, Write}; - -use fxhash::FxHashMap; -use smallvec::SmallVec; - +use super::{module::FuncRef, DataFlowGraph, Layout, Type, Value}; use crate::{module::ModuleCtx, types::DisplayType, Linkage}; +use rustc_hash::{FxHashMap, FxHasher}; +use smallvec::SmallVec; +use smol_str::SmolStr; +use std::{ + fmt::{self, Write}, + hash::BuildHasherDefault, +}; -use super::{module::FuncRef, DataFlowGraph, Layout, Type, Value}; +type Bimap = bimap::BiHashMap>; #[derive(Debug, Clone)] pub struct Function { /// Signature of the function. pub sig: Signature, pub arg_values: smallvec::SmallVec<[Value; 8]>, - pub dfg: DataFlowGraph, pub layout: Layout, - /// Stores signatures of all functions that called by the function. + // xxx move + pub value_names: Bimap, + + /// Stores signatures of all functions that are called by the function. pub callees: FxHashMap, } @@ -38,12 +43,13 @@ impl Function { arg_values, dfg, layout: Layout::default(), + value_names: Bimap::default(), callees: FxHashMap::default(), } } } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Default)] pub struct Signature { /// Name of the function. name: String, @@ -72,6 +78,7 @@ impl Signature { self.linkage } + // xxx remove pub fn append_arg(&mut self, arg: Type) { self.args.push(arg); } diff --git a/crates/ir/src/global_variable.rs b/crates/ir/src/global_variable.rs index 6743c335..249cdc8d 100644 --- a/crates/ir/src/global_variable.rs +++ b/crates/ir/src/global_variable.rs @@ -1,7 +1,7 @@ use std::fmt; use cranelift_entity::PrimaryMap; -use fxhash::FxHashMap; +use rustc_hash::FxHashMap; use crate::{Immediate, Linkage, Type}; diff --git a/crates/ir/src/graphviz/mod.rs b/crates/ir/src/graphviz/mod.rs index 14605d78..1c57b293 100644 --- a/crates/ir/src/graphviz/mod.rs +++ b/crates/ir/src/graphviz/mod.rs @@ -19,14 +19,13 @@ pub fn render_to(func: &Function, output: &mut W) -> io::Result<() #[cfg(test)] mod test { - use crate::{builder, Type}; + use crate::{builder::test_util::test_func_builder, Type}; use super::*; #[test] fn test_dump_ir() { - let mut test_module_builder = builder::test_util::TestModuleBuilder::new(); - let mut builder = test_module_builder.func_builder(&[Type::I64], Type::Void); + let mut builder = test_func_builder(&[Type::I64], Type::Void); let entry_block = builder.append_block(); let then_block = builder.append_block(); @@ -47,13 +46,13 @@ mod test { builder.jump(merge_block); builder.switch_to_block(merge_block); - let v3 = builder.phi(&[(v1, then_block), (v2, else_block)]); + let v3 = builder.phi(Type::I64, &[(v1, then_block), (v2, else_block)]); builder.add(v3, arg0); builder.ret(None); builder.seal_all(); - let func_ref = builder.finish(); - let module = test_module_builder.build(); + let module = builder.finish().build(); + let func_ref = module.iter_functions().next().unwrap(); let mut text = vec![]; render_to(&module.funcs[func_ref], &mut text).unwrap(); diff --git a/crates/ir/src/insn.rs b/crates/ir/src/insn.rs index eb89e9cc..d6872567 100644 --- a/crates/ir/src/insn.rs +++ b/crates/ir/src/insn.rs @@ -1,7 +1,7 @@ //! This module contains Sonatine IR instructions definitions. // TODO: Add type checker for instruction arguments. -use std::fmt; +use std::{fmt, str::FromStr}; use smallvec::SmallVec; @@ -134,17 +134,30 @@ pub enum DataLocationKind { Storage, } +impl DataLocationKind { + pub(super) fn as_str(self) -> &'static str { + match self { + Self::Memory => "@memory", + Self::Storage => "@storage", + } + } +} + +impl FromStr for DataLocationKind { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "@memory" => Ok(Self::Memory), + "@storage" => Ok(Self::Storage), + _ => Err(()), + } + } +} + impl fmt::Display for DataLocationKind { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use DataLocationKind::*; - write!( - f, - "{}", - match self { - Memory => "mem", - Storage => "store", - } - ) + f.write_str(self.as_str()) } } @@ -538,6 +551,18 @@ impl UnaryOp { } } +impl FromStr for UnaryOp { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "not" => Ok(Self::Not), + "neg" => Ok(Self::Neg), + _ => Err(()), + } + } +} + impl fmt::Display for UnaryOp { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(self.as_str()) @@ -629,6 +654,34 @@ impl fmt::Display for BinaryOp { } } +impl FromStr for BinaryOp { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "add" => Ok(Self::Add), + "sub" => Ok(Self::Sub), + "mul" => Ok(Self::Mul), + "udiv" => Ok(Self::Udiv), + "sdiv" => Ok(Self::Sdiv), + "lt" => Ok(Self::Lt), + "gt" => Ok(Self::Gt), + "slt" => Ok(Self::Slt), + "sgt" => Ok(Self::Sgt), + "le" => Ok(Self::Le), + "ge" => Ok(Self::Ge), + "sle" => Ok(Self::Sle), + "sge" => Ok(Self::Sge), + "eq" => Ok(Self::Eq), + "ne" => Ok(Self::Ne), + "and" => Ok(Self::And), + "or" => Ok(Self::Or), + "xor" => Ok(Self::Xor), + _ => Err(()), + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum CastOp { Sext, @@ -643,7 +696,7 @@ impl CastOp { Self::Sext => "sext", Self::Zext => "zext", Self::Trunc => "trunc", - Self::BitCast => "BitCast", + Self::BitCast => "bitcast", } } } @@ -654,6 +707,20 @@ impl fmt::Display for CastOp { } } +impl FromStr for CastOp { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "sext" => Ok(Self::Sext), + "zext" => Ok(Self::Zext), + "trunc" => Ok(Self::Trunc), + "bitcast" => Ok(Self::BitCast), + _ => Err(()), + } + } +} + #[derive(Clone, Copy)] pub enum BranchInfo<'a> { NotBranch, diff --git a/crates/ir/src/ir_writer.rs b/crates/ir/src/ir_writer.rs index 99cc7bc4..1886c05c 100644 --- a/crates/ir/src/ir_writer.rs +++ b/crates/ir/src/ir_writer.rs @@ -47,6 +47,12 @@ impl<'a> ModuleWriter<'a> { Ok(()) } + + pub fn dump_string(&mut self) -> io::Result { + let mut s = Vec::new(); + self.write(&mut s)?; + unsafe { Ok(String::from_utf8_unchecked(s)) } + } } pub struct FuncWriter<'a> { @@ -60,6 +66,8 @@ impl<'a> FuncWriter<'a> { } pub fn write(&mut self, mut w: impl io::Write) -> io::Result<()> { + // TODO: extern declarations aren't printed correctly + w.write_fmt(format_args!( "func {} %{}(", self.func.sig.linkage(), @@ -73,13 +81,17 @@ impl<'a> FuncWriter<'a> { write!(w, ") -> ")?; self.func.sig.ret_ty().ir_write(self.ctx(), &mut w)?; - self.enter(&mut w)?; + writeln!(w, " {{")?; + self.level += 1; + for block in self.func.layout.iter_block() { self.write_block_with_insn(block, &mut w)?; self.newline(&mut w)?; self.newline(&mut w)?; } - self.leave(); + + self.level -= 1; + writeln!(w, "}}")?; Ok(()) } @@ -167,6 +179,8 @@ impl IrWrite for Value { writer .ctx() .with_gv_store(|s| write!(w, "%{}", s.gv_data(gv).symbol)) + } else if let Some(name) = writer.func.value_names.get_by_left(&value) { + write!(w, "{name}") } else { write!(w, "v{}", value.0) } diff --git a/crates/ir/src/linkage.rs b/crates/ir/src/linkage.rs index 50a2f239..5f89ae01 100644 --- a/crates/ir/src/linkage.rs +++ b/crates/ir/src/linkage.rs @@ -1,11 +1,12 @@ -use std::fmt; +use std::{fmt, str::FromStr}; -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] /// Linkage of symbols. pub enum Linkage { /// The symbol is defined in the module, and can be used from the outside of the module. Public, + #[default] /// The symbol is defined in the module, and can NOT be called from another module. Private, @@ -22,3 +23,16 @@ impl fmt::Display for Linkage { } } } + +impl FromStr for Linkage { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "public" => Ok(Self::Public), + "private" => Ok(Self::Private), + "external" => Ok(Self::External), + _ => Err(()), + } + } +} diff --git a/crates/ir/src/module.rs b/crates/ir/src/module.rs index 4813086f..e5d270ac 100644 --- a/crates/ir/src/module.rs +++ b/crates/ir/src/module.rs @@ -42,9 +42,7 @@ impl Module { #[derive(Debug, Clone)] pub struct ModuleCtx { pub isa: TargetIsa, - // TODO: Consider using `RwLock` instead of `Mutex`. type_store: Arc>, - // TODO: Consider using `RwLock` instead of `Mutex`. gv_store: Arc>, } diff --git a/crates/ir/src/types.rs b/crates/ir/src/types.rs index 5bc3186c..5dae308b 100644 --- a/crates/ir/src/types.rs +++ b/crates/ir/src/types.rs @@ -2,8 +2,8 @@ use std::{cmp, fmt}; use cranelift_entity::PrimaryMap; -use fxhash::FxHashMap; use indexmap::IndexMap; +use rustc_hash::FxHashMap; use crate::DataFlowGraph; @@ -121,7 +121,7 @@ impl TypeStore { } /// Sonatina IR types definition. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] pub enum Type { I1, I8, @@ -131,6 +131,7 @@ pub enum Type { I128, I256, Compound(CompoundType), + #[default] Void, } diff --git a/crates/ir/src/value.rs b/crates/ir/src/value.rs index 5f086974..36004235 100644 --- a/crates/ir/src/value.rs +++ b/crates/ir/src/value.rs @@ -370,7 +370,7 @@ impl fmt::Display for Immediate { match self { Self::I1(v) => { if *v { - write!(f, "-1") + write!(f, "1") } else { write!(f, "0") } diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index da33e30f..6365ee63 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -213,21 +213,21 @@ impl<'a, 'b> FuncParser<'a, 'b> { // Use `Void` for dummy return type. let sig = Signature::new(fn_name, linkage, &[], Type::Void); let mut func = Function::new(&self.module_builder.ctx, sig); - let mut inserter = InsnInserter::new(&mut func); + let mut inserter = InsnInserter::at_location(CursorLocation::NoWhere); if let Some(value) = eat_token!(self.lexer, Token::Value(..))? { let value = Value(value.id()); - inserter.def_value(value, self.lexer.line())?; + inserter.def_value(&mut func, value, self.lexer.line())?; expect_token!(self.lexer, Token::Dot, "dot")?; let ty = expect_ty(&self.module_builder.ctx, self.lexer)?; - inserter.append_arg_value(value, ty); + inserter.append_arg_value(&mut func, value, ty); while eat_token!(self.lexer, Token::Comma)?.is_some() { let value = Value(expect_token!(self.lexer, Token::Value(..), "value")?.id()); - inserter.def_value(value, self.lexer.line())?; + inserter.def_value(&mut func, value, self.lexer.line())?; expect_token!(self.lexer, Token::Dot, "dot")?; let ty = expect_ty(&self.module_builder.ctx, self.lexer)?; - inserter.append_arg_value(value, ty); + inserter.append_arg_value(&mut func, value, ty); } } expect_token!(self.lexer, Token::RParen, ")")?; @@ -235,11 +235,11 @@ impl<'a, 'b> FuncParser<'a, 'b> { // Parse return type. expect_token!(self.lexer, Token::RArrow, "->")?; let ret_ty = expect_ty(&self.module_builder.ctx, self.lexer)?; - inserter.func.sig.set_ret_ty(ret_ty); + func.sig.set_ret_ty(ret_ty); expect_token!(self.lexer, Token::Colon, ":")?; let signature_line = self.lexer.line(); - self.parse_body(&mut inserter)?; + self.parse_body(&mut func, &mut inserter)?; let func_ref = match self.module_builder.get_func_ref(func.sig.name()) { Some(declared) if self.module_builder.sig(declared) == &func.sig => declared, @@ -258,19 +258,24 @@ impl<'a, 'b> FuncParser<'a, 'b> { Ok(Some(ParsedFunction { func_ref, comments })) } - fn parse_body(&mut self, inserter: &mut InsnInserter) -> Result<()> { + fn parse_body(&mut self, func: &mut Function, inserter: &mut InsnInserter) -> Result<()> { while let Some(id) = eat_token!(self.lexer, Token::Block(..))? { expect_token!(self.lexer, Token::Colon, ":")?; - self.parse_block_body(inserter, Block(id.id()))?; + self.parse_block_body(func, inserter, Block(id.id()))?; } Ok(()) } - fn parse_block_body(&mut self, inserter: &mut InsnInserter, block: Block) -> Result<()> { - inserter.def_block(block, self.lexer.line(), BlockData::default())?; - inserter.append_block(block); - inserter.set_loc(CursorLocation::BlockTop(block)); + fn parse_block_body( + &mut self, + func: &mut Function, + inserter: &mut InsnInserter, + block: Block, + ) -> Result<()> { + inserter.def_block(func, block, self.lexer.line(), BlockData::default())?; + inserter.append_block(func, block); + inserter.set_location(CursorLocation::BlockTop(block)); loop { if let Some(value) = eat_token!(self.lexer, Token::Value(..))? { @@ -278,14 +283,14 @@ impl<'a, 'b> FuncParser<'a, 'b> { let ty = expect_ty(&self.module_builder.ctx, self.lexer)?; expect_token!(self.lexer, Token::Eq, "=")?; let opcode = expect_token!(self.lexer, Token::OpCode(..), "opcode")?.opcode(); - let insn = opcode.make_insn(self, inserter, Some(ty))?; + let insn = opcode.make_insn(self, func, inserter, Some(ty))?; let value = Value(value.id()); - inserter.def_value(value, self.lexer.line())?; - let result = inserter.func.dfg.make_result(insn).unwrap(); - inserter.func.dfg.values[value] = result; - inserter.func.dfg.attach_result(insn, value); + inserter.def_value(func, value, self.lexer.line())?; + let result = func.dfg.make_result(insn).unwrap(); + func.dfg.values[value] = result; + func.dfg.attach_result(insn, value); } else if let Some(opcode) = eat_token!(self.lexer, Token::OpCode(..))? { - opcode.opcode().make_insn(self, inserter, None)?; + opcode.opcode().make_insn(self, func, inserter, None)?; } else { break; } @@ -296,6 +301,7 @@ impl<'a, 'b> FuncParser<'a, 'b> { fn expect_insn_arg( &mut self, + func: &mut Function, inserter: &mut InsnInserter, idx: usize, undefs: &mut Vec, @@ -307,20 +313,19 @@ impl<'a, 'b> FuncParser<'a, 'b> { } Ok(value) } else if let Some(ident) = eat_token!(self.lexer, Token::Ident(..))? { - let gv = inserter - .func() + let gv = func .dfg .ctx .with_gv_store(|s| s.gv_by_symbol(ident.string())) .unwrap(); - Ok(inserter.func_mut().dfg.make_global_value(gv)) + Ok(func.dfg.make_global_value(gv)) } else { let number = expect_token!(self.lexer, Token::Integer(..), "immediate or value")?.string(); expect_token!(self.lexer, Token::Dot, "type annotation for immediate")?; let ty = expect_ty(&self.module_builder.ctx, self.lexer)?; let imm = build_imm_data(number, &ty, self.lexer.line())?; - Ok(inserter.def_imm(imm)) + Ok(inserter.def_imm(func, imm)) } } @@ -452,8 +457,8 @@ fn expect_constant(ctx: &ModuleCtx, lexer: &mut Lexer, ty: Type) -> Result { - func: &'a mut Function, +#[derive(Default)] +pub struct InsnInserter { loc: CursorLocation, defined_values: HashSet, defined_blocks: HashSet, @@ -461,19 +466,8 @@ struct InsnInserter<'a> { undefs: HashSet<(Insn, usize)>, } -impl<'a> InsnInserter<'a> { - fn new(func: &'a mut Function) -> Self { - Self { - func, - loc: CursorLocation::NoWhere, - defined_values: HashSet::new(), - defined_blocks: HashSet::new(), - defined_imms: HashSet::new(), - undefs: HashSet::new(), - } - } - - fn def_value(&mut self, value: Value, line: u32) -> Result<()> { +impl InsnInserter { + pub fn def_value(&mut self, func: &mut Function, value: Value, line: u32) -> Result<()> { if self.defined_values.contains(&value) { return Err(Error::new( ErrorKind::SemanticError(format!("v{} is already defined", value.0)), @@ -482,14 +476,14 @@ impl<'a> InsnInserter<'a> { } self.defined_values.insert(value); - let value_len = self.func.dfg.values.len(); + let value_len = func.dfg.values.len(); let value_id = value.0 as usize; if value_len <= value_id { - self.func.dfg.values.reserve(value_id); + func.dfg.values.reserve(value_id); for _ in 0..(value_id - value_len + 1) { // Make dummy value. - self.func.dfg.values.push(ValueData::Arg { + func.dfg.values.push(ValueData::Arg { ty: Type::I8, idx: usize::MAX, }); @@ -497,11 +491,11 @@ impl<'a> InsnInserter<'a> { } if self.defined_imms.contains(&value) { - let imm_data = self.func.dfg.value_data(value).clone(); - let new_imm_value = self.func.dfg.make_value(imm_data); + let imm_data = func.dfg.value_data(value).clone(); + let new_imm_value = func.dfg.make_value(imm_data); let mut must_replace = vec![]; - for &user in self.func.dfg.users(value) { - for (idx, &arg) in self.func.dfg.insn_args(user).iter().enumerate() { + for &user in func.dfg.users(value) { + for (idx, &arg) in func.dfg.insn_args(user).iter().enumerate() { if arg == value && !self.undefs.contains(&(user, idx)) { must_replace.push((user, idx)); } @@ -509,11 +503,11 @@ impl<'a> InsnInserter<'a> { } for (insn, idx) in must_replace { - self.func.dfg.replace_insn_arg(insn, new_imm_value, idx); + func.dfg.replace_insn_arg(insn, new_imm_value, idx); } - let imm = self.func.dfg.value_imm(new_imm_value).unwrap(); - self.func.dfg.immediates.insert(imm, new_imm_value); + let imm = func.dfg.value_imm(new_imm_value).unwrap(); + func.dfg.immediates.insert(imm, new_imm_value); self.defined_imms.remove(&value); self.defined_imms.insert(new_imm_value); } @@ -521,13 +515,19 @@ impl<'a> InsnInserter<'a> { Ok(()) } - fn def_imm(&mut self, imm: Immediate) -> Value { - let value = self.func.dfg.make_imm_value(imm); + fn def_imm(&mut self, func: &mut Function, imm: Immediate) -> Value { + let value = func.dfg.make_imm_value(imm); self.defined_imms.insert(value); value } - fn def_block(&mut self, block: Block, line: u32, block_data: BlockData) -> Result<()> { + pub fn def_block( + &mut self, + func: &mut Function, + block: Block, + line: u32, + block_data: BlockData, + ) -> Result<()> { if self.defined_blocks.contains(&block) { return Err(Error::new( ErrorKind::SemanticError(format!("block{} is already defined", block.0)), @@ -537,48 +537,47 @@ impl<'a> InsnInserter<'a> { self.defined_blocks.insert(block); let block_id = block.0 as usize; - let block_len = self.func.dfg.blocks.len(); + let block_len = func.dfg.blocks.len(); if block_len <= block_id { - self.func.dfg.blocks.reserve(block_id); + func.dfg.blocks.reserve(block_id); for _ in 0..(block_id - block_len + 1) { // Make dummy block. - self.func.dfg.blocks.push(BlockData::default()); + func.dfg.blocks.push(BlockData::default()); } } - self.func.dfg.blocks[block] = block_data; + func.dfg.blocks[block] = block_data; Ok(()) } - fn insert_insn_data(&mut self, insn_data: InsnData) -> Insn { - let insn = self.func.dfg.make_insn(insn_data); - self.insert_insn(insn); - self.set_loc(CursorLocation::At(insn)); + fn insert_insn_data(&mut self, func: &mut Function, insn_data: InsnData) -> Insn { + let insn = func.dfg.make_insn(insn_data); + self.insert_insn(func, insn); + self.set_location(CursorLocation::At(insn)); insn } - fn append_arg_value(&mut self, value: Value, ty: Type) { - let idx = self.func.arg_values.len(); + fn append_arg_value(&mut self, func: &mut Function, value: Value, ty: Type) { + let idx = func.arg_values.len(); - let value_data = self.func.dfg.make_arg_value(ty, idx); - self.func.sig.append_arg(ty); - self.func.dfg.values[value] = value_data; - self.func.arg_values.push(value); + let value_data = func.dfg.make_arg_value(ty, idx); + func.sig.append_arg(ty); + func.dfg.values[value] = value_data; + func.arg_values.push(value); } } -impl<'a> FuncCursor for InsnInserter<'a> { - fn set_loc(&mut self, loc: CursorLocation) { - self.loc = loc; - } - - fn func(&self) -> &Function { - self.func +impl FuncCursor for InsnInserter { + fn at_location(loc: CursorLocation) -> Self { + Self { + loc, + ..Default::default() + } } - fn func_mut(&mut self) -> &mut Function { - self.func + fn set_location(&mut self, loc: CursorLocation) { + self.loc = loc; } fn loc(&self) -> CursorLocation { @@ -587,8 +586,8 @@ impl<'a> FuncCursor for InsnInserter<'a> { } macro_rules! make_unary { - ($parser:ident, $inserter:ident, $code:path, $undefs:expr) => {{ - let lhs = $parser.expect_insn_arg($inserter, 0, $undefs)?; + ($parser:ident, $func:ident, $inserter:ident, $code:path, $undefs:expr) => {{ + let lhs = $parser.expect_insn_arg($func, $inserter, 0, $undefs)?; expect_token!($parser.lexer, Token::SemiColon, ";")?; InsnData::Unary { code: $code, @@ -598,9 +597,9 @@ macro_rules! make_unary { } macro_rules! make_binary { - ($parser:ident, $inserter:ident, $code:path, $undefs:expr) => {{ - let lhs = $parser.expect_insn_arg($inserter, 0, $undefs)?; - let rhs = $parser.expect_insn_arg($inserter, 1, $undefs)?; + ($parser:ident, $func:ident, $inserter:ident, $code:path, $undefs:expr) => {{ + let lhs = $parser.expect_insn_arg($func, $inserter, 0, $undefs)?; + let rhs = $parser.expect_insn_arg($func, $inserter, 1, $undefs)?; expect_token!($parser.lexer, Token::SemiColon, ";")?; InsnData::Binary { code: $code, @@ -610,8 +609,8 @@ macro_rules! make_binary { } macro_rules! make_cast { - ($parser:ident, $inserter:ident, $cast_to:expr, $code:path, $undefs:expr) => {{ - let arg = $parser.expect_insn_arg($inserter, 0, $undefs)?; + ($parser:ident, $func:ident, $inserter:ident, $cast_to:expr, $code:path, $undefs:expr) => {{ + let arg = $parser.expect_insn_arg($func, $inserter, 0, $undefs)?; expect_token!($parser.lexer, Token::SemiColon, ";")?; InsnData::Cast { code: $code, @@ -634,35 +633,51 @@ impl Code { fn make_insn( self, parser: &mut FuncParser, + func: &mut Function, inserter: &mut InsnInserter, ret_ty: Option, ) -> Result { let mut undefs = vec![]; let insn_data = match self { - Self::Not => make_unary!(parser, inserter, UnaryOp::Not, &mut undefs), - Self::Neg => make_unary!(parser, inserter, UnaryOp::Neg, &mut undefs), - Self::Add => make_binary!(parser, inserter, BinaryOp::Add, &mut undefs), - Self::Sub => make_binary!(parser, inserter, BinaryOp::Sub, &mut undefs), - Self::Mul => make_binary!(parser, inserter, BinaryOp::Mul, &mut undefs), - Self::Udiv => make_binary!(parser, inserter, BinaryOp::Udiv, &mut undefs), - Self::Sdiv => make_binary!(parser, inserter, BinaryOp::Sdiv, &mut undefs), - Self::Lt => make_binary!(parser, inserter, BinaryOp::Lt, &mut undefs), - Self::Gt => make_binary!(parser, inserter, BinaryOp::Gt, &mut undefs), - Self::Slt => make_binary!(parser, inserter, BinaryOp::Slt, &mut undefs), - Self::Sgt => make_binary!(parser, inserter, BinaryOp::Sgt, &mut undefs), - Self::Le => make_binary!(parser, inserter, BinaryOp::Le, &mut undefs), - Self::Ge => make_binary!(parser, inserter, BinaryOp::Ge, &mut undefs), - Self::Sle => make_binary!(parser, inserter, BinaryOp::Sle, &mut undefs), - Self::Sge => make_binary!(parser, inserter, BinaryOp::Sge, &mut undefs), - Self::Eq => make_binary!(parser, inserter, BinaryOp::Eq, &mut undefs), - Self::Ne => make_binary!(parser, inserter, BinaryOp::Ne, &mut undefs), - Self::And => make_binary!(parser, inserter, BinaryOp::And, &mut undefs), - Self::Or => make_binary!(parser, inserter, BinaryOp::Or, &mut undefs), - Self::Xor => make_binary!(parser, inserter, BinaryOp::Xor, &mut undefs), - Self::Sext => make_cast!(parser, inserter, ret_ty.unwrap(), CastOp::Sext, &mut undefs), - Self::Zext => make_cast!(parser, inserter, ret_ty.unwrap(), CastOp::Zext, &mut undefs), + Self::Not => make_unary!(parser, func, inserter, UnaryOp::Not, &mut undefs), + Self::Neg => make_unary!(parser, func, inserter, UnaryOp::Neg, &mut undefs), + Self::Add => make_binary!(parser, func, inserter, BinaryOp::Add, &mut undefs), + Self::Sub => make_binary!(parser, func, inserter, BinaryOp::Sub, &mut undefs), + Self::Mul => make_binary!(parser, func, inserter, BinaryOp::Mul, &mut undefs), + Self::Udiv => make_binary!(parser, func, inserter, BinaryOp::Udiv, &mut undefs), + Self::Sdiv => make_binary!(parser, func, inserter, BinaryOp::Sdiv, &mut undefs), + Self::Lt => make_binary!(parser, func, inserter, BinaryOp::Lt, &mut undefs), + Self::Gt => make_binary!(parser, func, inserter, BinaryOp::Gt, &mut undefs), + Self::Slt => make_binary!(parser, func, inserter, BinaryOp::Slt, &mut undefs), + Self::Sgt => make_binary!(parser, func, inserter, BinaryOp::Sgt, &mut undefs), + Self::Le => make_binary!(parser, func, inserter, BinaryOp::Le, &mut undefs), + Self::Ge => make_binary!(parser, func, inserter, BinaryOp::Ge, &mut undefs), + Self::Sle => make_binary!(parser, func, inserter, BinaryOp::Sle, &mut undefs), + Self::Sge => make_binary!(parser, func, inserter, BinaryOp::Sge, &mut undefs), + Self::Eq => make_binary!(parser, func, inserter, BinaryOp::Eq, &mut undefs), + Self::Ne => make_binary!(parser, func, inserter, BinaryOp::Ne, &mut undefs), + Self::And => make_binary!(parser, func, inserter, BinaryOp::And, &mut undefs), + Self::Or => make_binary!(parser, func, inserter, BinaryOp::Or, &mut undefs), + Self::Xor => make_binary!(parser, func, inserter, BinaryOp::Xor, &mut undefs), + Self::Sext => make_cast!( + parser, + func, + inserter, + ret_ty.unwrap(), + CastOp::Sext, + &mut undefs + ), + Self::Zext => make_cast!( + parser, + func, + inserter, + ret_ty.unwrap(), + CastOp::Zext, + &mut undefs + ), Self::BitCast => make_cast!( parser, + func, inserter, ret_ty.unwrap(), CastOp::BitCast, @@ -670,6 +685,7 @@ impl Code { ), Self::Trunc => make_cast!( parser, + func, inserter, ret_ty.unwrap(), CastOp::Trunc, @@ -678,14 +694,14 @@ impl Code { Self::Load => { let loc = parser.expect_data_loc_kind()?; - let arg = parser.expect_insn_arg(inserter, 0, &mut undefs)?; + let arg = parser.expect_insn_arg(func, inserter, 0, &mut undefs)?; expect_token!(parser.lexer, Token::SemiColon, ";")?; InsnData::Load { args: [arg], loc } } Self::Store => { let loc = parser.expect_data_loc_kind()?; - let lhs = parser.expect_insn_arg(inserter, 0, &mut undefs)?; - let rhs = parser.expect_insn_arg(inserter, 1, &mut undefs)?; + let lhs = parser.expect_insn_arg(func, inserter, 0, &mut undefs)?; + let rhs = parser.expect_insn_arg(func, inserter, 1, &mut undefs)?; expect_token!(parser.lexer, Token::SemiColon, ";")?; InsnData::Store { args: [lhs, rhs], @@ -699,12 +715,12 @@ impl Code { let mut args = smallvec![]; let mut idx = 0; while eat_token!(parser.lexer, Token::SemiColon)?.is_none() { - let arg = parser.expect_insn_arg(inserter, idx, &mut undefs)?; + let arg = parser.expect_insn_arg(func, inserter, idx, &mut undefs)?; args.push(arg); idx += 1; } - let func = parser + let callee = parser .module_builder .get_func_ref(func_name) .ok_or_else(|| { @@ -713,16 +729,20 @@ impl Code { parser.lexer.line(), ) })?; - let sig = parser.module_builder.get_sig(func).clone(); + let sig = parser.module_builder.get_sig(callee).clone(); let ret_ty = sig.ret_ty(); - inserter.func_mut().callees.insert(func, sig); - InsnData::Call { func, args, ret_ty } + func.callees.insert(callee, sig); + InsnData::Call { + func: callee, + args, + ret_ty, + } } Self::Jump => make_jump!(parser), Self::Br => { - let cond = parser.expect_insn_arg(inserter, 0, &mut undefs)?; + let cond = parser.expect_insn_arg(func, inserter, 0, &mut undefs)?; let then = parser.expect_block()?; let else_ = parser.expect_block()?; expect_token!(parser.lexer, Token::SemiColon, ";")?; @@ -734,7 +754,7 @@ impl Code { Self::BrTable => { let mut arg_idx = 0; let mut args = smallvec![]; - let cond = parser.expect_insn_arg(inserter, arg_idx, &mut undefs)?; + let cond = parser.expect_insn_arg(func, inserter, arg_idx, &mut undefs)?; args.push(cond); arg_idx += 1; @@ -746,7 +766,7 @@ impl Code { let mut table = smallvec![]; while eat_token!(parser.lexer, Token::LParen)?.is_some() { - let value = parser.expect_insn_arg(inserter, arg_idx, &mut undefs)?; + let value = parser.expect_insn_arg(func, inserter, arg_idx, &mut undefs)?; args.push(value); let block = parser.expect_block()?; table.push(block); @@ -765,7 +785,7 @@ impl Code { let mut args = smallvec![]; let mut idx = 0; while eat_token!(parser.lexer, Token::SemiColon)?.is_none() { - let arg = parser.expect_insn_arg(inserter, idx, &mut undefs)?; + let arg = parser.expect_insn_arg(func, inserter, idx, &mut undefs)?; args.push(arg); idx += 1; } @@ -783,7 +803,7 @@ impl Code { if eat_token!(parser.lexer, Token::SemiColon)?.is_some() { InsnData::Return { args: None } } else { - let value = parser.expect_insn_arg(inserter, 0, &mut undefs)?; + let value = parser.expect_insn_arg(func, inserter, 0, &mut undefs)?; expect_token!(parser.lexer, Token::SemiColon, ";")?; InsnData::Return { args: Some(value) } } @@ -794,7 +814,7 @@ impl Code { let mut blocks = smallvec![]; let mut idx = 0; while eat_token!(parser.lexer, Token::LParen)?.is_some() { - let value = parser.expect_insn_arg(inserter, idx, &mut undefs)?; + let value = parser.expect_insn_arg(func, inserter, idx, &mut undefs)?; values.push(value); let block = parser.expect_block()?; blocks.push(block); @@ -810,7 +830,7 @@ impl Code { } }; - let insn = inserter.insert_insn_data(insn_data); + let insn = inserter.insert_insn_data(func, insn_data); for undef in undefs { inserter.undefs.insert((insn, undef)); } diff --git a/crates/parser2/Cargo.toml b/crates/parser2/Cargo.toml new file mode 100644 index 00000000..ef642cc9 --- /dev/null +++ b/crates/parser2/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "sonatina-parser2" +version = "0.0.3-alpha" +edition = "2021" +authors = ["Sonatina Developers"] +license = "Apache-2.0" +readme = "../../README.md" +homepage = "https://github.com/fe-lang/sonatina/tree/main/crates/parser" +repository = "https://github.com/fe-lang/sonatina" +description = "Parser for sonatina-ir text format" +categories = ["compilers", "parser", "wasm"] +keywords = ["compiler", "evm", "wasm", "smart-contract"] + +[dependencies] +ir = { package = "sonatina-ir", path = "../ir", version = "0.0.3-alpha" } +sonatina-triple = { path = "../triple", version = "0.0.3-alpha" } +smallvec = "1.7.0" +cranelift-entity = "0.104" +pest = "2.7.10" +pest_derive = "2.7.10" +pest-ast = "0.3.4" +from-pest = "0.3.2" +smol_str = "0.2.2" +hex = "0.4.3" +num-traits = { version = "0.2.19", default-features = false } +either = { version = "1.12.0", default-features = false } +annotate-snippets = "0.11.4" + +[dev-dependencies] +dir-test = { git = "https://github.com/sbillig/dir-test", rev = "c4115dd" } +insta = { version = "1.38.0" } +indenter = "0.3.3" +ariadne = { version = "0.4.1", features = ["auto-color"] } diff --git a/crates/parser2/src/ast.rs b/crates/parser2/src/ast.rs new file mode 100644 index 00000000..5797afda --- /dev/null +++ b/crates/parser2/src/ast.rs @@ -0,0 +1,580 @@ +use super::syntax::Node; +use crate::syntax::{FromSyntax, Parser, Rule}; +use annotate_snippets::{Level, Renderer, Snippet}; +use either::Either; +use hex::FromHex; +pub use ir::{ + insn::{BinaryOp, CastOp, UnaryOp}, + DataLocationKind, Immediate, Linkage, +}; +use ir::{I256, U256}; +use pest::Parser as _; +use smol_str::SmolStr; +pub use sonatina_triple::{InvalidTriple, TargetTriple}; +use std::{io, ops::Range, str::FromStr}; + +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +pub enum Error { + NumberOutOfBounds(Range), + InvalidTarget(InvalidTriple, Range), + SyntaxError(pest::error::Error), +} + +pub fn parse(input: &str) -> Result> { + pest::set_error_detail(true); // xxx + + match Parser::parse(Rule::module, input) { + Err(err) => Err(vec![Error::SyntaxError(err)]), + Ok(mut pairs) => { + let pair = pairs.next().unwrap(); + debug_assert_eq!(pair.as_rule(), Rule::module); + let mut node = Node::new(pair); + + let module = Module::from_syntax(&mut node); + + if node.errors.is_empty() { + Ok(module) + } else { + Err(node.errors) + } + } + } +} + +#[derive(Debug)] +pub struct Module { + pub target: Option, + pub declared_functions: Vec, + pub functions: Vec, + pub comments: Vec, +} + +impl FromSyntax for Module { + fn from_syntax(node: &mut Node) -> Self { + let target = match node + .get_opt(Rule::target_triple) + .map(|p| TargetTriple::parse(p.as_str())) + { + Some(Ok(t)) => Some(t), + Some(Err(e)) => { + node.error(Error::InvalidTarget(e, node.span.clone())); + None + } + None => None, + }; + + let module_comments = node.map_while(|p| { + if p.as_rule() == Rule::COMMENT && p.as_str().starts_with("#!") { + Either::Right(p.as_str().into()) + } else { + Either::Left(p) + } + }); + + let mut declared_functions = vec![]; + let mut functions = vec![]; + loop { + let comments = node.map_while(|p| { + if p.as_rule() == Rule::COMMENT { + Either::Right(p.as_str().to_string()) + } else { + Either::Left(p) + } + }); + + if let Some(func) = node.single_opt(Rule::function_declaration) { + declared_functions.push(func); + } else { + match node.single_opt::(Rule::function) { + Some(mut func) => { + func.comments = comments; + functions.push(func); + } + None => break, + } + } + } + Module { + target, + declared_functions, + functions, + comments: module_comments, + } + } +} + +#[derive(Debug)] +pub struct Func { + pub signature: FuncSignature, + pub blocks: Vec, + pub comments: Vec, +} + +impl FromSyntax for Func { + fn from_syntax(node: &mut Node) -> Self { + Func { + signature: node.single(Rule::function_signature), + blocks: node.multi(Rule::block), + comments: vec![], + } + } +} + +#[derive(Debug)] +pub struct FuncSignature { + pub linkage: Linkage, + pub name: FunctionName, + pub params: Vec, + pub ret_type: Option, +} + +impl FromSyntax for FuncSignature { + fn from_syntax(node: &mut Node) -> Self { + let linkage = node + .parse_str_opt(Rule::function_linkage) + .unwrap_or(Linkage::Private); + + FuncSignature { + linkage, + name: node.single(Rule::function_identifier), + params: node.descend_into(Rule::function_params, |n| n.multi(Rule::value_declaration)), + ret_type: node.descend_into_opt(Rule::function_ret_type, |n| n.single(Rule::type_name)), + } + } +} + +#[derive(Debug)] +pub struct FuncDeclaration { + pub linkage: Linkage, + pub name: FunctionName, + pub params: Vec, + pub ret_type: Option, +} + +impl FromSyntax for FuncDeclaration { + fn from_syntax(node: &mut Node) -> Self { + let linkage = node + .parse_str_opt(Rule::function_linkage) + .unwrap_or(Linkage::Private); + + FuncDeclaration { + linkage, + name: node.single(Rule::function_identifier), + params: node.descend_into(Rule::function_param_type_list, |n| n.multi(Rule::type_name)), + ret_type: node.descend_into_opt(Rule::function_ret_type, |n| n.single(Rule::type_name)), + } + } +} + +#[derive(Debug)] +pub struct Block { + pub id: BlockId, + pub stmts: Vec, +} + +impl FromSyntax for Block { + fn from_syntax(node: &mut Node) -> Self { + Self { + id: node.single(Rule::block_ident), + stmts: node.multi(Rule::stmt), + } + } +} + +#[derive(Debug)] +pub struct BlockId(pub Option); + +impl FromSyntax for BlockId { + fn from_syntax(node: &mut Node) -> Self { + node.descend(); + debug_assert_eq!(node.rule, Rule::block_number); + BlockId(node.txt.parse().ok()) + } +} + +#[derive(Debug)] +pub struct Stmt { + pub kind: StmtKind, + // pub comments: Vec, +} + +impl FromSyntax for Stmt { + fn from_syntax(node: &mut Node) -> Self { + node.descend(); + let kind = match node.rule { + Rule::define_stmt => StmtKind::Define( + node.single(Rule::value_declaration), + node.single(Rule::expr), + ), + Rule::store_stmt => StmtKind::Store( + node.parse_str(Rule::location), + node.single(Rule::value), + node.single(Rule::value), + ), + Rule::return_stmt => StmtKind::Return(node.single_opt(Rule::value)), + Rule::jump_stmt => StmtKind::Jump(node.single(Rule::block_ident)), + Rule::br_stmt => StmtKind::Branch( + node.single(Rule::value), + node.single(Rule::block_ident), + node.single(Rule::block_ident), + ), + Rule::br_table_stmt => StmtKind::BranchTable( + node.single(Rule::value), + node.single_opt(Rule::block_ident), + node.multi(Rule::br_table_case), + ), + _ => unreachable!(), + }; + Stmt { kind } + } +} + +#[derive(Debug)] +pub enum StmtKind { + Define(ValueDeclaration, Expr), + Store(DataLocationKind, Value, Value), + Return(Option), + Jump(BlockId), + Branch(Value, BlockId, BlockId), + BranchTable(Value, Option, Vec<(Value, BlockId)>), + Call(Call), +} + +impl FromSyntax for (Value, BlockId) { + fn from_syntax(node: &mut Node) -> Self { + (node.single(Rule::value), node.single(Rule::block_ident)) + } +} + +#[derive(Debug)] +pub enum Type { + Int(IntType), + Ptr(Box), + Array(Box, usize), + Void, + Error, +} + +impl FromSyntax for Type { + fn from_syntax(node: &mut Node) -> Self { + node.descend(); + match node.rule { + Rule::primitive_type => Type::Int(IntType::from_str(node.txt).unwrap()), + Rule::ptr_type => Type::Ptr(Box::new(node.single(Rule::type_name))), + Rule::array_type => { + let Ok(size) = usize::from_str(node.get(Rule::array_size).as_str()) else { + node.error(Error::NumberOutOfBounds(node.span.clone())); + return Type::Error; + }; + Type::Array(Box::new(node.single(Rule::type_name)), size) + } + Rule::void_type => Type::Void, + _ => unreachable!(), + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum IntType { + I1, + I8, + I16, + I32, + I64, + I128, + I256, +} + +impl From for ir::Type { + fn from(value: IntType) -> Self { + match value { + IntType::I1 => ir::Type::I1, + IntType::I8 => ir::Type::I8, + IntType::I16 => ir::Type::I16, + IntType::I32 => ir::Type::I32, + IntType::I64 => ir::Type::I64, + IntType::I128 => ir::Type::I128, + IntType::I256 => ir::Type::I256, + } + } +} + +#[derive(Debug)] +pub enum Expr { + Binary(BinaryOp, Value, Value), + Unary(UnaryOp, Value), + Cast(CastOp, Value), + Load(DataLocationKind, Value), + Alloca(Type), + Call(Call), + Gep(Vec), + Phi(Vec<(Value, BlockId)>), +} + +impl FromSyntax for Expr { + fn from_syntax(node: &mut Node) -> Self { + node.descend(); + match node.rule { + Rule::bin_expr => Expr::Binary( + node.parse_str(Rule::bin_op), + node.single(Rule::value), + node.single(Rule::value), + ), + Rule::una_expr => Expr::Unary(node.parse_str(Rule::una_op), node.single(Rule::value)), + Rule::alloca_expr => Expr::Alloca(node.single(Rule::type_name)), + Rule::call_expr => Expr::Call(Call( + node.single(Rule::function_identifier), + node.multi(Rule::value), + )), + Rule::cast_expr => Expr::Cast(node.parse_str(Rule::cast_op), node.single(Rule::value)), + + Rule::gep_expr => Expr::Gep(node.multi(Rule::value)), + Rule::load_expr => Expr::Load(node.parse_str(Rule::location), node.single(Rule::value)), + Rule::phi_expr => Expr::Phi(node.multi(Rule::phi_value)), + _ => unreachable!(), + } + } +} + +#[derive(Debug)] +pub struct Call(pub FunctionName, pub Vec); + +/// Doesn't include `%` prefix. +#[derive(Debug)] +pub struct FunctionName(pub SmolStr); + +impl FromSyntax for FunctionName { + fn from_syntax(node: &mut Node) -> Self { + FunctionName(node.parse_str(Rule::function_name)) + } +} + +#[derive(Debug)] +pub struct ValueName(pub SmolStr); + +impl FromSyntax for ValueName { + fn from_syntax(node: &mut Node) -> Self { + Self(node.txt.into()) + } +} + +#[derive(Debug)] +pub struct ValueDeclaration(pub ValueName, pub Type); + +impl FromSyntax for ValueDeclaration { + fn from_syntax(node: &mut Node) -> Self { + ValueDeclaration(node.single(Rule::value_name), node.single(Rule::type_name)) + } +} + +#[derive(Debug)] +pub enum Value { + Immediate(Immediate), + Named(ValueName), + Error, +} + +impl FromSyntax for Value { + fn from_syntax(node: &mut Node) -> Self { + node.descend(); + match node.rule { + Rule::value_name => Value::Named(ValueName(node.txt.into())), + Rule::imm_number => { + let ty: IntType = node.parse_str(Rule::primitive_type); + node.descend(); + let mut txt = node.txt; + match node.rule { + Rule::decimal => match ty { + IntType::I1 => imm_or_err(node, || { + let b = match u8::from_str(txt).ok()? { + 0 => false, + 1 => true, + _ => return None, + }; + Some(Immediate::I1(b)) + }), + IntType::I8 => imm_or_err(node, || Some(Immediate::I8(txt.parse().ok()?))), + IntType::I16 => { + imm_or_err(node, || Some(Immediate::I16(txt.parse().ok()?))) + } + IntType::I32 => { + imm_or_err(node, || Some(Immediate::I32(txt.parse().ok()?))) + } + IntType::I64 => { + imm_or_err(node, || Some(Immediate::I64(txt.parse().ok()?))) + } + IntType::I128 => { + imm_or_err(node, || Some(Immediate::I128(txt.parse().ok()?))) + } + IntType::I256 => { + let s = txt.strip_prefix('-'); + let is_negative = s.is_some(); + txt = s.unwrap_or(txt); + + imm_or_err(node, || { + let mut i256 = U256::from_dec_str(txt).ok()?.into(); + if is_negative { + i256 = I256::zero().overflowing_sub(i256).0; + } + Some(Immediate::I256(i256)) + }) + } + }, + + Rule::hex => match ty { + IntType::I1 => { + node.error(Error::NumberOutOfBounds(node.span.clone())); + Value::Error + } + IntType::I8 => imm_or_err(node, || { + Some(Immediate::I8(i8::from_be_bytes(hex_bytes(txt)?))) + }), + IntType::I16 => imm_or_err(node, || { + Some(Immediate::I16(i16::from_be_bytes(hex_bytes(txt)?))) + }), + IntType::I32 => imm_or_err(node, || { + Some(Immediate::I32(i32::from_be_bytes(hex_bytes(txt)?))) + }), + IntType::I64 => imm_or_err(node, || { + Some(Immediate::I64(i64::from_be_bytes(hex_bytes(txt)?))) + }), + IntType::I128 => imm_or_err(node, || { + Some(Immediate::I128(i128::from_be_bytes(hex_bytes(txt)?))) + }), + IntType::I256 => { + let s = txt.strip_prefix('-'); + let is_negative = s.is_some(); + txt = s.unwrap_or(txt); + + imm_or_err(node, || { + let mut i256 = U256::from_big_endian(&hex_bytes::<32>(txt)?).into(); + if is_negative { + i256 = I256::zero().overflowing_sub(i256).0; + } + Some(Immediate::I256(i256)) + }) + } + }, + _ => unreachable!(), + } + } + _ => unreachable!(), + } + } +} + +impl FromStr for IntType { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "i1" => Ok(Self::I1), + "i8" => Ok(Self::I8), + "i16" => Ok(Self::I16), + "i32" => Ok(Self::I32), + "i64" => Ok(Self::I64), + "i128" => Ok(Self::I128), + "i256" => Ok(Self::I256), + _ => Err(()), + } + } +} + +impl Error { + pub fn span(&self) -> Range { + match self { + Error::NumberOutOfBounds(span) => span.clone(), + Error::InvalidTarget(_, span) => span.clone(), + Error::SyntaxError(err) => match err.location { + pest::error::InputLocation::Pos(p) => p..p, + pest::error::InputLocation::Span((s, e)) => s..e, + }, + } + } + + pub fn print(&self, mut w: impl io::Write, path: &str, content: &str) -> io::Result<()> { + let label = match self { + Error::NumberOutOfBounds(_) => "number out of bounds".into(), + Error::InvalidTarget(err, _) => err.to_string(), + Error::SyntaxError(err) => err.to_string(), + }; + let snippet = Level::Error.title("parse error").snippet( + Snippet::source(content) + .line_start(0) + .origin(path) + .fold(true) + .annotation(Level::Error.span(self.span()).label(&label)), + ); + let rend = Renderer::styled(); + let disp = rend.render(snippet); + write!(w, "{}", disp) + } + + pub fn print_to_string(&self, path: &str, content: &str) -> String { + let mut v = vec![]; + self.print(&mut v, path, content).unwrap(); + String::from_utf8(v).unwrap() + } +} + +fn imm_or_err(node: &mut Node, f: F) -> Value +where + F: Fn() -> Option, +{ + let Some(imm) = f() else { + let span = node.span.clone(); + node.error(Error::NumberOutOfBounds(span)); + return Value::Error; + }; + Value::Immediate(imm) +} + +fn hex_bytes(mut s: &str) -> Option<[u8; N]> { + s = s.strip_prefix("0x").unwrap(); + let bytes = Vec::::from_hex(s).unwrap(); + + if bytes.len() > N { + return None; + } + + let mut out = [0; N]; + out[N - bytes.len()..].copy_from_slice(&bytes); + Some(out) +} + +// xxx remove +// pub fn parse_immediate( +// val: &str, +// loc: Range, +// ) -> Result> { +// let mut chunks = val.split('.'); +// let num = chunks.next().unwrap(); +// let t = chunks.next().unwrap(); + +// let imm = match t { +// "i1" => Immediate::I1(parse_num(num, loc)?), +// "i8" => Immediate::I8(parse_num(num, loc)?), +// "i16" => Immediate::I16(parse_num(num, loc)?), +// "i32" => Immediate::I32(parse_num(num, loc)?), +// "i64" => Immediate::I64(parse_num(num, loc)?), +// "i128" => Immediate::I128(parse_num(num, loc)?), +// "i256" => todo!(), +// _ => { +// unreachable!() +// } +// }; +// Ok(Value::Immediate(imm)) +// } + +// pub fn parse_num( +// s: &str, +// loc: Range, +// ) -> Result> +// where +// T: FromStr, +// { +// T::from_str(s).map_err(|_| ParseError::User { +// error: Error::NumberOutOfBounds(loc), +// }) +// } diff --git a/crates/parser2/src/lib.rs b/crates/parser2/src/lib.rs new file mode 100644 index 00000000..a9d87228 --- /dev/null +++ b/crates/parser2/src/lib.rs @@ -0,0 +1,230 @@ +use ast::{Error, ValueDeclaration}; +use cranelift_entity::SecondaryMap; +use ir::{ + self, + builder::{FunctionBuilder, ModuleBuilder}, + func_cursor::{CursorLocation, FuncCursor, InsnInserter}, + isa::IsaBuilder, + module::{FuncRef, ModuleCtx}, + Module, Signature, +}; + +pub mod ast; +pub mod syntax; + +pub fn parse_module(input: &str) -> Result> { + let ast = ast::parse(input)?; + + let isa = IsaBuilder::new(ast.target.unwrap()).build(); // xxx + let ctx = ModuleCtx::new(isa); + let mut builder = ModuleBuilder::new(ctx); + + for func in ast.declared_functions { + let params = func + .params + .iter() + .map(|t| build_type(&mut builder, t)) + .collect::>(); + let ret_ty = func + .ret_type + .as_ref() + .map(|t| build_type(&mut builder, t)) + .unwrap_or(ir::Type::Void); + + let sig = Signature::new(&func.name.0, func.linkage, ¶ms, ret_ty); + builder.declare_function(sig); + } + + for func in ast.functions.iter() { + let sig = &func.signature; + let args = sig + .params + .iter() + .map(|decl| build_type(&mut builder, &decl.1)) + .collect::>(); + + let ret_ty = sig + .ret_type + .as_ref() + .map(|t| build_type(&mut builder, t)) + .unwrap_or(ir::Type::Void); + let sig = Signature::new(&sig.name.0, sig.linkage, &args, ret_ty); + + builder.declare_function(sig); + } + + let mut func_comments = SecondaryMap::default(); + + for func in ast.functions { + let id = builder.get_func_ref(&func.signature.name.0).unwrap(); + let mut fb = builder.build_function(id); + build_func(&mut fb, &func); + fb.seal_all(); + builder = fb.finish(); + + func_comments[id] = func.comments; + } + + let module = builder.build(); + Ok(ParsedModule { + module, + module_comments: ast.comments, + func_comments, + }) +} + +pub struct ParsedModule { + pub module: Module, + pub module_comments: Vec, + pub func_comments: SecondaryMap>, +} + +fn build_func(builder: &mut FunctionBuilder, func: &ast::Func) { + for (i, ValueDeclaration(name, _ty)) in func.signature.params.iter().enumerate() { + builder.name_value(builder.func.arg_values[i], &name.0); + } + + // "forward declare" all block ids + if let Some(max_block_id) = func.blocks.iter().map(|b| b.id.0.unwrap()).max() { + while builder.func.dfg.blocks.len() <= max_block_id as usize { + builder.cursor.make_block(&mut builder.func); + } + } + + for block in &func.blocks { + let block_id = ir::Block(block.id.0.unwrap()); + builder.cursor.append_block(&mut builder.func, block_id); + builder + .cursor + .set_location(CursorLocation::BlockTop(block_id)); + + for stmt in &block.stmts { + match &stmt.kind { + ast::StmtKind::Define(ValueDeclaration(val, ty), expr) => { + let ty = build_type(&mut builder.module_builder, ty); + + let result_val = match expr { + ast::Expr::Binary(op, lhs, rhs) => { + let lhs = build_value(builder, lhs); + let rhs = build_value(builder, rhs); + builder.binary_op(*op, lhs, rhs) + } + ast::Expr::Unary(op, val) => { + let val = build_value(builder, val); + builder.unary_op(*op, val) + } + ast::Expr::Cast(op, val) => { + let val = build_value(builder, val); + builder.cast_op(*op, val, ty) + } + ast::Expr::Load(location, addr) => { + let addr = build_value(builder, addr); + match location { + ir::DataLocationKind::Memory => builder.memory_load(addr), + ir::DataLocationKind::Storage => builder.storage_load(addr), + } + } + ast::Expr::Alloca(ty) => { + let ty = build_type(&mut builder.module_builder, ty); + builder.alloca(ty) + } + ast::Expr::Call(ast::Call(name, args)) => { + let func_ref = builder.module_builder.get_func_ref(&name.0).unwrap(); + let args = args + .iter() + .map(|val| build_value(builder, val)) + .collect::>(); + builder.call(func_ref, &args).unwrap() + } + ast::Expr::Gep(vals) => { + let vals = vals + .iter() + .map(|val| build_value(builder, val)) + .collect::>(); + builder.gep(&vals).unwrap() + } + ast::Expr::Phi(vals) => { + let args = vals + .iter() + .map(|(val, block)| { + // xxx declare block + let b = ir::Block(block.0.unwrap()); + let v = build_value(builder, val); + (v, b) + }) + .collect::>(); + builder.phi(ty, &args) + } + }; + builder.name_value(result_val, &val.0) + } + ast::StmtKind::Store(loc, addr, val) => { + let addr = build_value(builder, addr); + let val = build_value(builder, val); + + match loc { + ir::DataLocationKind::Memory => builder.memory_store(addr, val), + ir::DataLocationKind::Storage => builder.storage_store(addr, val), + } + } + ast::StmtKind::Return(val) => { + let val = val.as_ref().map(|v| build_value(builder, v)); + builder.ret(val); + } + ast::StmtKind::Jump(block_id) => { + let block_id = ir::Block(block_id.0.unwrap()); + builder.jump(block_id); + } + ast::StmtKind::Branch(cond, true_block, false_block) => { + let cond = build_value(builder, cond); + let true_block = ir::Block(true_block.0.unwrap()); + let false_block = ir::Block(false_block.0.unwrap()); + builder.br(cond, true_block, false_block); + } + ast::StmtKind::BranchTable(index, default_block, table) => { + let index = build_value(builder, index); + let default_block = default_block.as_ref().map(|b| ir::Block(b.0.unwrap())); + let table = table + .iter() + .map(|(val, block)| { + (build_value(builder, val), ir::Block(block.0.unwrap())) + }) + .collect::>(); + builder.br_table(index, default_block, &table); + } + ast::StmtKind::Call(ast::Call(name, args)) => { + let func_ref = builder.module_builder.get_func_ref(&name.0).unwrap(); + let args = args + .iter() + .map(|val| build_value(builder, val)) + .collect::>(); + builder.call(func_ref, &args).unwrap(); + } + } + } + } +} + +fn build_value(builder: &mut FunctionBuilder, val: &ast::Value) -> ir::Value { + match val { + ast::Value::Immediate(imm) => builder.make_imm_value(*imm), + ast::Value::Named(v) => builder.get_named_value(&v.0), + ast::Value::Error => unreachable!(), + } +} + +fn build_type(builder: &mut ModuleBuilder, t: &ast::Type) -> ir::Type { + match t { + ast::Type::Int(i) => (*i).into(), + ast::Type::Ptr(t) => { + let t = build_type(builder, t); + builder.ptr_type(t) + } + ast::Type::Array(t, n) => { + let elem = build_type(builder, t); + builder.declare_array_type(elem, *n) + } + ast::Type::Void => ir::Type::Void, + ast::Type::Error => todo!(), + } +} diff --git a/crates/parser2/src/sonatina.pest b/crates/parser2/src/sonatina.pest new file mode 100644 index 00000000..5aa980df --- /dev/null +++ b/crates/parser2/src/sonatina.pest @@ -0,0 +1,88 @@ +module = { SOI ~ NEWLINE* ~ target_specifier ~ (NEWLINE+ ~ function_declaration)* ~ (NEWLINE+ ~ function)* ~ NEWLINE* ~ EOI } + +WHITESPACE = _{ " " | "\t" } +COMMENT = { "#" ~ (!NEWLINE ~ ANY)* } + +target_specifier = _{ "target" ~ "=" ~ "\"" ~ target_triple ~ "\"" } +target_triple = @{ ASCII_ALPHA* ~ "-" ~ ASCII_ALPHA* ~ "-" ~ ASCII_ALPHA* } + +ident_start_char = { ASCII_ALPHA | "_" } +ident_body_char = { ASCII_ALPHANUMERIC | "_" } + +function = { function_signature ~ function_body } +_functions = _{ (NEWLINE* ~ function ~ NEWLINE*)* } +function_signature = { "func" ~ function_linkage? ~ function_identifier ~ function_params ~ function_ret_type? } +function_ret_type = { "->" ~ type_name } +function_linkage = { "public" | "private" | "external" } +function_name = @{ ident_start_char ~ ident_body_char* } +function_identifier = ${ "%" ~ function_name } +function_params = { "(" ~ (value_declaration ~ ",")* ~ value_declaration? ~ ")" } +function_body = _{ "{" ~ (NEWLINE+ ~ block?)* ~ "}" } +block = { block_ident ~ ":" ~ (NEWLINE+ ~ stmt)* } +_stmts = _{ (stmt ~ NEWLINE+)* } + +function_declaration = { "declare" ~ function_linkage? ~ function_identifier ~ function_param_type_list ~ function_ret_type? ~ ";" } +function_param_type_list = { "(" ~ (type_name ~ ",")* ~ type_name? ~ ")" } + +block_ident = ${ "block" ~ block_number } +block_number = { ASCII_DIGIT+ } +value_name = ${ "v" ~ ASCII_DIGIT+ } + +type_name = { primitive_type | ptr_type | array_type | void_type } +primitive_type = { "i8" | "i16" | "i32" | "i64" | "i128" | "i256" | "i1" } +ptr_type = ${ "*" ~ type_name } +array_type = !{ "[" ~ type_name ~ ";" ~ array_size ~ "]" } +array_size = { ASCII_DIGIT+ } +void_type = { "void" } + +value_declaration = ${ value_name ~ "." ~ type_name } + +// Stmts +stmt = { (define_stmt | store_stmt | return_stmt | jump_stmt | br_stmt | br_table_stmt) ~ ";" } +store_stmt = { "store" ~ location ~ value ~ value } +location = { "@memory" | "@storage" } +return_stmt = { "return" ~ value? } +jump_stmt = { "jump" ~ block_ident } +br_stmt = { "br" ~ value ~ block_ident ~ block_ident } +br_table_stmt = { "br_table" ~ value ~ block_ident? ~ ("(" ~ br_table_case ~ ")")+ } +br_table_case = { value ~ block_ident } + +define_stmt = { value_declaration ~ "=" ~ expr } +expr = { bin_expr | una_expr | alloca_expr | call_expr | cast_expr | gep_expr | load_expr | phi_expr } +bin_expr = { bin_op ~ value ~ value } +bin_op = { + "add" + | "sub" + | "mul" + | "udiv" + | "sdiv" + | "lt" + | "gt" + | "slt" + | "sgt" + | "le" + | "ge" + | "sle" + | "sge" + | "eq" + | "ne" + | "and" + | "or" + | "xor" +} +una_expr = { una_op ~ value } +una_op = { "not" | "neg" } +value = { value_name | imm_number } +imm_number = ${ number ~ "." ~ primitive_type } +number = _{ hex | decimal } +decimal = @{ "-"? ~ ASCII_DIGIT+ } +hex = @{ "0x" ~ ASCII_HEX_DIGIT+ } + +alloca_expr = { "alloca" ~ type_name } +call_expr = { "call" ~ function_identifier ~ value* } +load_expr = { "load" ~ location ~ value } +gep_expr = { "gep" ~ value{2, } } +cast_expr = { cast_op ~ value } +cast_op = { "sext" | "zext" | "bitcast" | "trunc" } +phi_expr = { "phi" ~ phi_value+ } +phi_value = { "(" ~ value ~ block_ident ~ ")" } diff --git a/crates/parser2/src/syntax.rs b/crates/parser2/src/syntax.rs new file mode 100644 index 00000000..8b61c98f --- /dev/null +++ b/crates/parser2/src/syntax.rs @@ -0,0 +1,250 @@ +use std::{ops::Range, str::FromStr}; + +use either::Either; +use pest::iterators::Pair; + +#[derive(pest_derive::Parser)] +#[grammar = "sonatina.pest"] +pub struct Parser; + +pub trait FromSyntax { + fn from_syntax(node: &mut Node) -> Self; +} + +pub struct Node<'i, E> { + pub rule: Rule, + pub txt: &'i str, + pub span: Range, + pairs: Vec>>, + pub errors: Vec, + child: Option>, +} + +impl<'i, E> Node<'i, E> { + pub fn new(pair: Pair<'i, Rule>) -> Self { + let mut n = Self::default(); + n.set_pair(pair); + n + } + + fn set_pair(&mut self, pair: Pair<'i, Rule>) { + self.rule = pair.as_rule(); + self.txt = pair.as_str(); + let s = pair.as_span(); + self.span = s.start()..s.end(); + self.pairs.clear(); + self.pairs.extend(pair.into_inner().map(Some)); + debug_assert!(self.errors.is_empty()); + } + + fn reset(&mut self, pair: Pair<'i, Rule>, with_errors: F) + where + F: FnMut(E), + { + self.clear(with_errors); + self.set_pair(pair); + } + + fn clear(&mut self, with_errors: F) + where + F: FnMut(E), + { + self.errors.drain(..).for_each(with_errors); + self.pairs.clear(); + } + + fn with_child(&mut self, pair: Pair<'i, Rule>, f: F) -> T + where + F: FnOnce(&mut Self) -> T, + { + let mut child = self.child.take().unwrap_or_default(); + child.set_pair(pair); + let r = f(&mut child); + + child.clear(|err| self.errors.push(err)); + self.child = Some(child); + r + } + + pub fn error(&mut self, err: E) { + self.errors.push(err); + } + + pub fn is_empty(&self) -> bool { + self.pairs.is_empty() + || (self.pairs.len() == 1 && self.pairs[0].as_ref().unwrap().as_rule() == Rule::EOI) + } + + pub fn descend(&mut self) { + debug_assert_eq!(self.pairs.len(), 1); + let p = self.pairs.remove(0).unwrap(); + self.set_pair(p); + } + + pub fn descend_into(&mut self, rule: Rule, f: F) -> T + where + F: FnOnce(&mut Self) -> T, + { + self.descend_into_opt(rule, f).unwrap() + } + + pub fn descend_into_opt(&mut self, rule: Rule, f: F) -> Option + where + F: FnOnce(&mut Self) -> T, + { + let p = self.get_opt(rule)?; + Some(self.with_child(p, f)) + } + + pub fn single>(&mut self, rule: Rule) -> T { + self.single_opt(rule).unwrap() + } + + pub fn single_opt>(&mut self, rule: Rule) -> Option { + let p = self.get_opt(rule)?; + Some(self.with_child(p, T::from_syntax)) + } + + pub fn multi>(&mut self, rule: Rule) -> Vec { + let mut child = self.child.take().unwrap_or_default(); + let mut errors = vec![]; + + // `take` the pairs that match the `rule`, and convert them to T + let r = self + .pairs + .iter_mut() + .filter_map(|p| { + if p.as_ref().unwrap().as_rule() == rule { + let p = p.take().unwrap(); + child.reset(p, |err| errors.push(err)); + Some(T::from_syntax(&mut child)) + } else { + None + } + }) + .collect(); + + // remove the pairs that were taken + self.pairs.retain(|p| p.is_some()); + + self.errors.append(&mut errors); + child.clear(|e| self.errors.push(e)); + self.child = Some(child); + r + } + + pub fn get(&mut self, rule: Rule) -> Pair<'i, Rule> { + let r = self.get_opt(rule); + debug_assert!( + r.is_some(), + "Failed to get {rule:?} inside {:?}, with pairs: {:?}", + self.rule, + self.pairs + ); + r.unwrap() + } + + pub fn get_opt(&mut self, rule: Rule) -> Option> { + let pos = self + .pairs + .iter() + .position(|p| p.as_ref().unwrap().as_rule() == rule)?; + Some(self.pairs.remove(pos).unwrap()) + } + + pub fn parse_str(&mut self, rule: Rule) -> T + where + T: FromStr, + T::Err: std::fmt::Debug, + { + self.parse_str_opt(rule).unwrap() + } + + pub fn parse_str_opt(&mut self, rule: Rule) -> Option + where + T: FromStr, + T::Err: std::fmt::Debug, + { + self.get_opt(rule).map(|p| p.as_str().parse().unwrap()) + } + + pub fn map_while(&mut self, mut f: F) -> Vec + where + F: FnMut(Pair<'i, Rule>) -> Either, T>, + { + let mut out = vec![]; + for p in self.pairs.iter_mut() { + match f(p.take().unwrap()) { + Either::Left(pp) => { + *p = Some(pp); + break; + } + Either::Right(r) => { + out.push(r); + } + } + } + self.pairs.retain(|p| p.is_some()); + out + } +} + +impl<'i, E> std::default::Default for Node<'i, E> { + fn default() -> Self { + Self { + rule: Rule::EOI, + txt: Default::default(), + span: Default::default(), + pairs: vec![], + errors: vec![], + child: None, + } + } +} + +// #[cfg(test)] +// mod tests { +// use super::*; + +// #[test] +// fn test_with_module_comment() { +// let input = " +// #! Module comment 1 +// #! Module comment 2 + +// target = \"evm-ethereum-london\" + +// # f1 start 1 +// # f1 start 2 +// func private %f1() -> i32 { +// block0: +// return 311.i32; +// } + +// # f2 start 1 +// # f2 start 2 +// func public %f2() -> i32 { +// block0: +// return 311.i32; +// }"; + +// let parsed_module = parse_module2(input).unwrap(); +// let module_comments = parsed_module.module_comments; +// assert_eq!(module_comments[0], "#! Module comment 1"); +// assert_eq!(module_comments[1], "#! Module comment 2"); + +// let module = parsed_module.module; +// let mut funcs = module.iter_functions(); +// let func1 = funcs.next().unwrap(); +// let func1_comment = &parsed_module.func_comments[func1]; +// assert_eq!(func1_comment.len(), 2); +// assert_eq!(func1_comment[0], "# f1 start 1"); +// assert_eq!(func1_comment[1], "# f1 start 2"); + +// let func2 = funcs.next().unwrap(); +// let func2_comment = &parsed_module.func_comments[func2]; +// assert_eq!(func2_comment.len(), 2); +// assert_eq!(func2_comment[0], "# f2 start 1"); +// assert_eq!(func2_comment[1], "# f2 start 2"); +// } +// } diff --git a/crates/parser2/test_files/syntax/func/empty.snap b/crates/parser2/test_files/syntax/func/empty.snap new file mode 100644 index 00000000..f54d3f77 --- /dev/null +++ b/crates/parser2/test_files/syntax/func/empty.snap @@ -0,0 +1,10 @@ +--- +source: crates/parser2/tests/syntax.rs +expression: s +input_file: crates/parser2/test_files/syntax/func/empty.sntn +--- +function "func %foo() {}" + function_signature "func %foo() " + function_identifier "%foo" + function_name "foo" + function_params "()" diff --git a/crates/parser2/test_files/syntax/func/empty.sntn b/crates/parser2/test_files/syntax/func/empty.sntn new file mode 100644 index 00000000..1ccc8e60 --- /dev/null +++ b/crates/parser2/test_files/syntax/func/empty.sntn @@ -0,0 +1 @@ +func %foo() {} \ No newline at end of file diff --git a/crates/parser2/test_files/syntax/func/simple.snap b/crates/parser2/test_files/syntax/func/simple.snap new file mode 100644 index 00000000..89742760 --- /dev/null +++ b/crates/parser2/test_files/syntax/func/simple.snap @@ -0,0 +1,79 @@ +--- +source: crates/parser2/tests/syntax.rs +expression: s +input_file: crates/parser2/test_files/syntax/func/simple.sntn +--- +function "func %foo() -> i32 { +block0: + jump block1; +block1: + v0.i32 = phi (v1 block2) (100.i32 block0); + v1.i32 = add 1.i32 1.i32; +block2: + jump block1; +}" + function_signature "func %foo() -> i32" + function_identifier "%foo" + function_name "foo" + function_params "()" + function_ret_type "-> i32" + type_name "i32" + primitive_type "i32" + block "block0: + jump block1;" + block_ident "block0" + block_number "0" + stmt "jump block1;" + jump_stmt "jump block1" + block_ident "block1" + block_number "1" + block "block1: + v0.i32 = phi (v1 block2) (100.i32 block0); + v1.i32 = add 1.i32 1.i32;" + block_ident "block1" + block_number "1" + stmt "v0.i32 = phi (v1 block2) (100.i32 block0);" + define_stmt "v0.i32 = phi (v1 block2) (100.i32 block0)" + value_declaration "v0.i32" + value_name "v0" + type_name "i32" + primitive_type "i32" + expr "phi (v1 block2) (100.i32 block0)" + phi_expr "phi (v1 block2) (100.i32 block0)" + phi_value "(v1 block2)" + value "v1" + value_name "v1" + block_ident "block2" + block_number "2" + phi_value "(100.i32 block0)" + value "100.i32" + imm_number "100.i32" + decimal "100" + primitive_type "i32" + block_ident "block0" + block_number "0" + stmt "v1.i32 = add 1.i32 1.i32;" + define_stmt "v1.i32 = add 1.i32 1.i32" + value_declaration "v1.i32" + value_name "v1" + type_name "i32" + primitive_type "i32" + expr "add 1.i32 1.i32" + bin_expr "add 1.i32 1.i32" + bin_op "add" + value "1.i32" + imm_number "1.i32" + decimal "1" + primitive_type "i32" + value "1.i32" + imm_number "1.i32" + decimal "1" + primitive_type "i32" + block "block2: + jump block1;" + block_ident "block2" + block_number "2" + stmt "jump block1;" + jump_stmt "jump block1" + block_ident "block1" + block_number "1" diff --git a/crates/parser2/test_files/syntax/func/simple.sntn b/crates/parser2/test_files/syntax/func/simple.sntn new file mode 100644 index 00000000..3957eaf8 --- /dev/null +++ b/crates/parser2/test_files/syntax/func/simple.sntn @@ -0,0 +1,9 @@ +func %foo() -> i32 { +block0: + jump block1; +block1: + v0.i32 = phi (v1 block2) (100.i32 block0); + v1.i32 = add 1.i32 1.i32; +block2: + jump block1; +} diff --git a/crates/parser2/test_files/syntax/module/simple.ast.snap b/crates/parser2/test_files/syntax/module/simple.ast.snap new file mode 100644 index 00000000..1eb9ee1d --- /dev/null +++ b/crates/parser2/test_files/syntax/module/simple.ast.snap @@ -0,0 +1,564 @@ +--- +source: crates/parser2/tests/syntax.rs +expression: "format!(\"{:#?}\", module)" +input_file: crates/parser2/test_files/syntax/module/simple.sntn +--- +Module { + target: Some( + TargetTriple { + architecture: Evm, + chain: Ethereum, + version: EvmVersion( + London, + ), + }, + ), + declared_functions: [ + FuncDeclaration { + linkage: External, + name: FunctionName( + "add_i8", + ), + params: [ + Int( + I8, + ), + Int( + I8, + ), + ], + ret_type: Some( + Int( + I8, + ), + ), + }, + ], + functions: [ + Func { + signature: FuncSignature { + linkage: Public, + name: FunctionName( + "main", + ), + params: [], + ret_type: None, + }, + blocks: [ + Block { + id: BlockId( + Some( + 0, + ), + ), + stmts: [ + Stmt { + kind: Define( + ValueDeclaration( + ValueName( + "v0", + ), + Int( + I8, + ), + ), + Call( + Call( + FunctionName( + "foo", + ), + [ + Immediate( + I8( + 100, + ), + ), + ], + ), + ), + ), + }, + Stmt { + kind: Return( + None, + ), + }, + ], + }, + ], + comments: [], + }, + Func { + signature: FuncSignature { + linkage: Private, + name: FunctionName( + "foo", + ), + params: [ + ValueDeclaration( + ValueName( + "v0", + ), + Int( + I8, + ), + ), + ], + ret_type: Some( + Int( + I8, + ), + ), + }, + blocks: [ + Block { + id: BlockId( + Some( + 0, + ), + ), + stmts: [ + Stmt { + kind: Define( + ValueDeclaration( + ValueName( + "v1", + ), + Int( + I8, + ), + ), + Binary( + Mul, + Named( + ValueName( + "v0", + ), + ), + Immediate( + I8( + 2, + ), + ), + ), + ), + }, + Stmt { + kind: Define( + ValueDeclaration( + ValueName( + "v2", + ), + Int( + I8, + ), + ), + Call( + Call( + FunctionName( + "add_i8", + ), + [ + Named( + ValueName( + "v0", + ), + ), + Named( + ValueName( + "v1", + ), + ), + ], + ), + ), + ), + }, + Stmt { + kind: Return( + Some( + Named( + ValueName( + "v2", + ), + ), + ), + ), + }, + ], + }, + ], + comments: [ + "# multiplies arg by 2", + ], + }, + Func { + signature: FuncSignature { + linkage: Private, + name: FunctionName( + "types", + ), + params: [ + ValueDeclaration( + ValueName( + "v0", + ), + Ptr( + Int( + I8, + ), + ), + ), + ValueDeclaration( + ValueName( + "v1", + ), + Array( + Int( + I8, + ), + 2, + ), + ), + ValueDeclaration( + ValueName( + "v2", + ), + Array( + Ptr( + Int( + I8, + ), + ), + 2, + ), + ), + ValueDeclaration( + ValueName( + "v3", + ), + Array( + Array( + Int( + I8, + ), + 2, + ), + 2, + ), + ), + ], + ret_type: None, + }, + blocks: [ + Block { + id: BlockId( + Some( + 0, + ), + ), + stmts: [ + Stmt { + kind: Return( + None, + ), + }, + ], + }, + ], + comments: [], + }, + Func { + signature: FuncSignature { + linkage: Private, + name: FunctionName( + "table", + ), + params: [ + ValueDeclaration( + ValueName( + "v0", + ), + Int( + I8, + ), + ), + ], + ret_type: None, + }, + blocks: [ + Block { + id: BlockId( + Some( + 0, + ), + ), + stmts: [ + Stmt { + kind: BranchTable( + Named( + ValueName( + "v0", + ), + ), + Some( + BlockId( + Some( + 0, + ), + ), + ), + [ + ( + Immediate( + I8( + 1, + ), + ), + BlockId( + Some( + 1, + ), + ), + ), + ( + Immediate( + I8( + 2, + ), + ), + BlockId( + Some( + 2, + ), + ), + ), + ], + ), + }, + ], + }, + Block { + id: BlockId( + Some( + 1, + ), + ), + stmts: [ + Stmt { + kind: Return( + Some( + Immediate( + I8( + 1, + ), + ), + ), + ), + }, + ], + }, + Block { + id: BlockId( + Some( + 2, + ), + ), + stmts: [ + Stmt { + kind: Return( + Some( + Immediate( + I8( + 2, + ), + ), + ), + ), + }, + ], + }, + ], + comments: [], + }, + Func { + signature: FuncSignature { + linkage: Private, + name: FunctionName( + "flow", + ), + params: [ + ValueDeclaration( + ValueName( + "v0", + ), + Int( + I64, + ), + ), + ], + ret_type: Some( + Int( + I64, + ), + ), + }, + blocks: [ + Block { + id: BlockId( + Some( + 0, + ), + ), + stmts: [ + Stmt { + kind: Jump( + BlockId( + Some( + 1, + ), + ), + ), + }, + ], + }, + Block { + id: BlockId( + Some( + 1, + ), + ), + stmts: [ + Stmt { + kind: Define( + ValueDeclaration( + ValueName( + "v1", + ), + Int( + I64, + ), + ), + Phi( + [ + ( + Named( + ValueName( + "v0", + ), + ), + BlockId( + Some( + 0, + ), + ), + ), + ( + Immediate( + I64( + 100, + ), + ), + BlockId( + Some( + 2, + ), + ), + ), + ], + ), + ), + }, + Stmt { + kind: Define( + ValueDeclaration( + ValueName( + "v2", + ), + Int( + I1, + ), + ), + Binary( + Gt, + Named( + ValueName( + "v1", + ), + ), + Immediate( + I64( + 10, + ), + ), + ), + ), + }, + Stmt { + kind: Branch( + Named( + ValueName( + "v2", + ), + ), + BlockId( + Some( + 2, + ), + ), + BlockId( + Some( + 3, + ), + ), + ), + }, + ], + }, + Block { + id: BlockId( + Some( + 2, + ), + ), + stmts: [ + Stmt { + kind: Jump( + BlockId( + Some( + 1, + ), + ), + ), + }, + ], + }, + Block { + id: BlockId( + Some( + 3, + ), + ), + stmts: [ + Stmt { + kind: Return( + Some( + Named( + ValueName( + "v1", + ), + ), + ), + ), + }, + ], + }, + ], + comments: [], + }, + ], + comments: [ + "#! this is a module", + "#! with two functions", + ], +} diff --git a/crates/parser2/test_files/syntax/module/simple.ir.snap b/crates/parser2/test_files/syntax/module/simple.ir.snap new file mode 100644 index 00000000..3e068a4d --- /dev/null +++ b/crates/parser2/test_files/syntax/module/simple.ir.snap @@ -0,0 +1,58 @@ +--- +source: crates/parser2/tests/syntax.rs +expression: w.dump_string().unwrap() +input_file: crates/parser2/test_files/syntax/module/simple.sntn +--- +target = evm-ethereum-london +func external %add_i8(v0.i8, v1.i8) -> i8 { +} + +func public %main() -> void { + block0: + v0.i8 = call %foo 100.i8; + return; + +} + +func private %foo(v0.i8) -> i8 { + block0: + v1.i8 = mul v0 2.i8; + v2.i8 = call %add_i8 v0 v1; + return v2; + +} + +func private %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2]) -> void { + block0: + return; + +} + +func private %table(v0.i8) -> void { + block0: + br_table v0 block0 (1.i8 block1) (2.i8 block2); + + block1: + return 1.i8; + + block2: + return 2.i8; + +} + +func private %flow(v0.i64) -> i64 { + block0: + jump block1; + + block1: + v1.i64 = phi (v0 block0) (100.i64 block2); + v2.i1 = gt v1 10.i64; + br v2 block2 block3; + + block2: + jump block1; + + block3: + return v1; + +} diff --git a/crates/parser2/test_files/syntax/module/simple.snap b/crates/parser2/test_files/syntax/module/simple.snap new file mode 100644 index 00000000..c417d3d7 --- /dev/null +++ b/crates/parser2/test_files/syntax/module/simple.snap @@ -0,0 +1,360 @@ +--- +source: crates/parser2/tests/syntax.rs +expression: s +input_file: crates/parser2/test_files/syntax/module/simple.sntn +--- +module "target = "evm-ethereum-london" + +#! this is a module +#! with two functions + +declare external %add_i8(i8, i8) -> i8; + +func public %main() { + block0: + v0.i8 = call %foo 100.i8; + return; +} + +# multiplies arg by 2 +func private %foo(v0.i8) -> i8 { +block0: +v1.i8 = mul v0 2.i8; +v2.i8 = call %add_i8 v0 v1; +return v2; +} + +func %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2]) { + block0: + return; +} + +func %table(v0.i8) { + block0: + br_table v0 block0 (1.i8 block1) (2.i8 block2); + block1: + return 1.i8; + block2: + return 2.i8; +} + +func %flow(v0.i64) -> i64 { + block0: + jump block1; + block1: + v1.i64 = phi (v0 block0) (100.i64 block2); + v2.i1 = gt v1 10.i64; + br v2 block2 block3; + block2: + jump block1; + block3: + return v1; +} +" + target_triple "evm-ethereum-london" + COMMENT "#! this is a module" + COMMENT "#! with two functions" + function_declaration "declare external %add_i8(i8, i8) -> i8;" + function_linkage "external" + function_identifier "%add_i8" + function_name "add_i8" + function_param_type_list "(i8, i8)" + type_name "i8" + primitive_type "i8" + type_name "i8" + primitive_type "i8" + function_ret_type "-> i8" + type_name "i8" + primitive_type "i8" + function "func public %main() { + block0: + v0.i8 = call %foo 100.i8; + return; + }" + function_signature "func public %main() " + function_linkage "public" + function_identifier "%main" + function_name "main" + function_params "()" + block "block0: + v0.i8 = call %foo 100.i8; + return;" + block_ident "block0" + block_number "0" + stmt "v0.i8 = call %foo 100.i8;" + define_stmt "v0.i8 = call %foo 100.i8" + value_declaration "v0.i8" + value_name "v0" + type_name "i8" + primitive_type "i8" + expr "call %foo 100.i8" + call_expr "call %foo 100.i8" + function_identifier "%foo" + function_name "foo" + value "100.i8" + imm_number "100.i8" + decimal "100" + primitive_type "i8" + stmt "return;" + return_stmt "return" + COMMENT "# multiplies arg by 2" + function "func private %foo(v0.i8) -> i8 { + block0: + v1.i8 = mul v0 2.i8; + v2.i8 = call %add_i8 v0 v1; + return v2; + }" + function_signature "func private %foo(v0.i8) -> i8" + function_linkage "private" + function_identifier "%foo" + function_name "foo" + function_params "(v0.i8)" + value_declaration "v0.i8" + value_name "v0" + type_name "i8" + primitive_type "i8" + function_ret_type "-> i8" + type_name "i8" + primitive_type "i8" + block "block0: + v1.i8 = mul v0 2.i8; + v2.i8 = call %add_i8 v0 v1; + return v2;" + block_ident "block0" + block_number "0" + stmt "v1.i8 = mul v0 2.i8;" + define_stmt "v1.i8 = mul v0 2.i8" + value_declaration "v1.i8" + value_name "v1" + type_name "i8" + primitive_type "i8" + expr "mul v0 2.i8" + bin_expr "mul v0 2.i8" + bin_op "mul" + value "v0" + value_name "v0" + value "2.i8" + imm_number "2.i8" + decimal "2" + primitive_type "i8" + stmt "v2.i8 = call %add_i8 v0 v1;" + define_stmt "v2.i8 = call %add_i8 v0 v1" + value_declaration "v2.i8" + value_name "v2" + type_name "i8" + primitive_type "i8" + expr "call %add_i8 v0 v1" + call_expr "call %add_i8 v0 v1" + function_identifier "%add_i8" + function_name "add_i8" + value "v0" + value_name "v0" + value "v1" + value_name "v1" + stmt "return v2;" + return_stmt "return v2" + value "v2" + value_name "v2" + function "func %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2]) { + block0: + return; + }" + function_signature "func %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2]) " + function_identifier "%types" + function_name "types" + function_params "(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2])" + value_declaration "v0.*i8" + value_name "v0" + type_name "*i8" + ptr_type "*i8" + type_name "i8" + primitive_type "i8" + value_declaration "v1.[i8; 2]" + value_name "v1" + type_name "[i8; 2]" + array_type "[i8; 2]" + type_name "i8" + primitive_type "i8" + array_size "2" + value_declaration "v2.[*i8; 2]" + value_name "v2" + type_name "[*i8; 2]" + array_type "[*i8; 2]" + type_name "*i8" + ptr_type "*i8" + type_name "i8" + primitive_type "i8" + array_size "2" + value_declaration "v3.[[i8; 2]; 2]" + value_name "v3" + type_name "[[i8; 2]; 2]" + array_type "[[i8; 2]; 2]" + type_name "[i8; 2]" + array_type "[i8; 2]" + type_name "i8" + primitive_type "i8" + array_size "2" + array_size "2" + block "block0: + return;" + block_ident "block0" + block_number "0" + stmt "return;" + return_stmt "return" + function "func %table(v0.i8) { + block0: + br_table v0 block0 (1.i8 block1) (2.i8 block2); + block1: + return 1.i8; + block2: + return 2.i8; + }" + function_signature "func %table(v0.i8) " + function_identifier "%table" + function_name "table" + function_params "(v0.i8)" + value_declaration "v0.i8" + value_name "v0" + type_name "i8" + primitive_type "i8" + block "block0: + br_table v0 block0 (1.i8 block1) (2.i8 block2);" + block_ident "block0" + block_number "0" + stmt "br_table v0 block0 (1.i8 block1) (2.i8 block2);" + br_table_stmt "br_table v0 block0 (1.i8 block1) (2.i8 block2)" + value "v0" + value_name "v0" + block_ident "block0" + block_number "0" + br_table_case "1.i8 block1" + value "1.i8" + imm_number "1.i8" + decimal "1" + primitive_type "i8" + block_ident "block1" + block_number "1" + br_table_case "2.i8 block2" + value "2.i8" + imm_number "2.i8" + decimal "2" + primitive_type "i8" + block_ident "block2" + block_number "2" + block "block1: + return 1.i8;" + block_ident "block1" + block_number "1" + stmt "return 1.i8;" + return_stmt "return 1.i8" + value "1.i8" + imm_number "1.i8" + decimal "1" + primitive_type "i8" + block "block2: + return 2.i8;" + block_ident "block2" + block_number "2" + stmt "return 2.i8;" + return_stmt "return 2.i8" + value "2.i8" + imm_number "2.i8" + decimal "2" + primitive_type "i8" + function "func %flow(v0.i64) -> i64 { + block0: + jump block1; + block1: + v1.i64 = phi (v0 block0) (100.i64 block2); + v2.i1 = gt v1 10.i64; + br v2 block2 block3; + block2: + jump block1; + block3: + return v1; + }" + function_signature "func %flow(v0.i64) -> i64" + function_identifier "%flow" + function_name "flow" + function_params "(v0.i64)" + value_declaration "v0.i64" + value_name "v0" + type_name "i64" + primitive_type "i64" + function_ret_type "-> i64" + type_name "i64" + primitive_type "i64" + block "block0: + jump block1;" + block_ident "block0" + block_number "0" + stmt "jump block1;" + jump_stmt "jump block1" + block_ident "block1" + block_number "1" + block "block1: + v1.i64 = phi (v0 block0) (100.i64 block2); + v2.i1 = gt v1 10.i64; + br v2 block2 block3;" + block_ident "block1" + block_number "1" + stmt "v1.i64 = phi (v0 block0) (100.i64 block2);" + define_stmt "v1.i64 = phi (v0 block0) (100.i64 block2)" + value_declaration "v1.i64" + value_name "v1" + type_name "i64" + primitive_type "i64" + expr "phi (v0 block0) (100.i64 block2)" + phi_expr "phi (v0 block0) (100.i64 block2)" + phi_value "(v0 block0)" + value "v0" + value_name "v0" + block_ident "block0" + block_number "0" + phi_value "(100.i64 block2)" + value "100.i64" + imm_number "100.i64" + decimal "100" + primitive_type "i64" + block_ident "block2" + block_number "2" + stmt "v2.i1 = gt v1 10.i64;" + define_stmt "v2.i1 = gt v1 10.i64" + value_declaration "v2.i1" + value_name "v2" + type_name "i1" + primitive_type "i1" + expr "gt v1 10.i64" + bin_expr "gt v1 10.i64" + bin_op "gt" + value "v1" + value_name "v1" + value "10.i64" + imm_number "10.i64" + decimal "10" + primitive_type "i64" + stmt "br v2 block2 block3;" + br_stmt "br v2 block2 block3" + value "v2" + value_name "v2" + block_ident "block2" + block_number "2" + block_ident "block3" + block_number "3" + block "block2: + jump block1;" + block_ident "block2" + block_number "2" + stmt "jump block1;" + jump_stmt "jump block1" + block_ident "block1" + block_number "1" + block "block3: + return v1;" + block_ident "block3" + block_number "3" + stmt "return v1;" + return_stmt "return v1" + value "v1" + value_name "v1" + EOI "" diff --git a/crates/parser2/test_files/syntax/module/simple.sntn b/crates/parser2/test_files/syntax/module/simple.sntn new file mode 100644 index 00000000..8a4898c5 --- /dev/null +++ b/crates/parser2/test_files/syntax/module/simple.sntn @@ -0,0 +1,47 @@ +target = "evm-ethereum-london" + +#! this is a module +#! with two functions + +declare external %add_i8(i8, i8) -> i8; + +func public %main() { + block0: + v0.i8 = call %foo 100.i8; + return; +} + +# multiplies arg by 2 +func private %foo(v0.i8) -> i8 { +block0: +v1.i8 = mul v0 2.i8; +v2.i8 = call %add_i8 v0 v1; +return v2; +} + +func %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2]) { + block0: + return; +} + +func %table(v0.i8) { + block0: + br_table v0 block0 (1.i8 block1) (2.i8 block2); + block1: + return 1.i8; + block2: + return 2.i8; +} + +func %flow(v0.i64) -> i64 { + block0: + jump block1; + block1: + v1.i64 = phi (v0 block0) (100.i64 block2); + v2.i1 = gt v1 10.i64; + br v2 block2 block3; + block2: + jump block1; + block3: + return v1; +} diff --git a/crates/parser2/test_files/syntax/stmts/bin_op.snap b/crates/parser2/test_files/syntax/stmts/bin_op.snap new file mode 100644 index 00000000..5b3043d4 --- /dev/null +++ b/crates/parser2/test_files/syntax/stmts/bin_op.snap @@ -0,0 +1,20 @@ +--- +source: crates/parser2/tests/syntax.rs +expression: s +input_file: crates/parser2/test_files/syntax/stmts/bin_op.sntn +--- +stmt "v1.i64 = add 0x10.i64 v0;" + define_stmt "v1.i64 = add 0x10.i64 v0" + value_declaration "v1.i64" + value_name "v1" + type_name "i64" + primitive_type "i64" + expr "add 0x10.i64 v0" + bin_expr "add 0x10.i64 v0" + bin_op "add" + value "0x10.i64" + imm_number "0x10.i64" + hex "0x10" + primitive_type "i64" + value "v0" + value_name "v0" diff --git a/crates/parser2/test_files/syntax/stmts/bin_op.sntn b/crates/parser2/test_files/syntax/stmts/bin_op.sntn new file mode 100644 index 00000000..e3d7e1b1 --- /dev/null +++ b/crates/parser2/test_files/syntax/stmts/bin_op.sntn @@ -0,0 +1,18 @@ +v1.i64 = add 0x10.i64 v0; +v1.i64 = sub 0b10.i64 v0; +v1.i64 = mul -10.i64 v0; +v1.i64 = udiv 01.i64 v0; +v1.i64 = sdiv 1.i64 v0; +v1.i64 = lt 1.i64 v0; +v1.i64 = gt 1.i64 v0; +v1.i64 = slt 1.i64 v0; +v1.i64 = sgt 1.i64 v0; +v1.i64 = le 1.i64 v0; +v1.i64 = ge 1.i64 v0; +v1.i64 = sle 1.i64 v0; +v1.i64 = sge 1.i64 v0; +v1.i64 = eq 1.i64 v0; +v1.i64 = ne 1.i64 v0; +v1.i64 = and 1.i64 v0; +v1.i64 = or 1.i64 v0; +v1.i64 = xor 1.i64 v0; diff --git a/crates/parser2/test_files/syntax/stmts/cast.snap b/crates/parser2/test_files/syntax/stmts/cast.snap new file mode 100644 index 00000000..b01dcb87 --- /dev/null +++ b/crates/parser2/test_files/syntax/stmts/cast.snap @@ -0,0 +1,49 @@ +--- +source: crates/parser2/tests/syntax.rs +expression: s +input_file: crates/parser2/test_files/syntax/stmts/cast.sntn +--- +stmt "v0.i32 = sext v1;" + define_stmt "v0.i32 = sext v1" + value_declaration "v0.i32" + value_name "v0" + type_name "i32" + primitive_type "i32" + expr "sext v1" + cast_expr "sext v1" + cast_op "sext" + value "v1" + value_name "v1" +stmt "v0.i32 = zext v1;" + define_stmt "v0.i32 = zext v1" + value_declaration "v0.i32" + value_name "v0" + type_name "i32" + primitive_type "i32" + expr "zext v1" + cast_expr "zext v1" + cast_op "zext" + value "v1" + value_name "v1" +stmt "v0.i32 = bitcast v1;" + define_stmt "v0.i32 = bitcast v1" + value_declaration "v0.i32" + value_name "v0" + type_name "i32" + primitive_type "i32" + expr "bitcast v1" + cast_expr "bitcast v1" + cast_op "bitcast" + value "v1" + value_name "v1" +stmt "v0.i32 = trunc v1;" + define_stmt "v0.i32 = trunc v1" + value_declaration "v0.i32" + value_name "v0" + type_name "i32" + primitive_type "i32" + expr "trunc v1" + cast_expr "trunc v1" + cast_op "trunc" + value "v1" + value_name "v1" diff --git a/crates/parser2/test_files/syntax/stmts/cast.sntn b/crates/parser2/test_files/syntax/stmts/cast.sntn new file mode 100644 index 00000000..d50a3fe5 --- /dev/null +++ b/crates/parser2/test_files/syntax/stmts/cast.sntn @@ -0,0 +1,4 @@ +v0.i32 = sext v1; +v0.i32 = zext v1; +v0.i32 = bitcast v1; +v0.i32 = trunc v1; diff --git a/crates/parser2/test_files/syntax/stmts/control_flow.snap b/crates/parser2/test_files/syntax/stmts/control_flow.snap new file mode 100644 index 00000000..95d2d0c1 --- /dev/null +++ b/crates/parser2/test_files/syntax/stmts/control_flow.snap @@ -0,0 +1,73 @@ +--- +source: crates/parser2/tests/syntax.rs +expression: s +input_file: crates/parser2/test_files/syntax/stmts/control_flow.sntn +--- +stmt "jump block0;" + jump_stmt "jump block0" + block_ident "block0" + block_number "0" +stmt "jump block100;" + jump_stmt "jump block100" + block_ident "block100" + block_number "100" +stmt "return 100.i32;" + return_stmt "return 100.i32" + value "100.i32" + imm_number "100.i32" + decimal "100" + primitive_type "i32" +stmt "return v0;" + return_stmt "return v0" + value "v0" + value_name "v0" +stmt "return;" + return_stmt "return" +stmt "br v0 block1 block2;" + br_stmt "br v0 block1 block2" + value "v0" + value_name "v0" + block_ident "block1" + block_number "1" + block_ident "block2" + block_number "2" +stmt "br_table v0 block1 (1.i32 block2) (2.i32 block3);" + br_table_stmt "br_table v0 block1 (1.i32 block2) (2.i32 block3)" + value "v0" + value_name "v0" + block_ident "block1" + block_number "1" + br_table_case "1.i32 block2" + value "1.i32" + imm_number "1.i32" + decimal "1" + primitive_type "i32" + block_ident "block2" + block_number "2" + br_table_case "2.i32 block3" + value "2.i32" + imm_number "2.i32" + decimal "2" + primitive_type "i32" + block_ident "block3" + block_number "3" +stmt "br_table 1.i8 (1.i8 block2) (2.i8 block3);" + br_table_stmt "br_table 1.i8 (1.i8 block2) (2.i8 block3)" + value "1.i8" + imm_number "1.i8" + decimal "1" + primitive_type "i8" + br_table_case "1.i8 block2" + value "1.i8" + imm_number "1.i8" + decimal "1" + primitive_type "i8" + block_ident "block2" + block_number "2" + br_table_case "2.i8 block3" + value "2.i8" + imm_number "2.i8" + decimal "2" + primitive_type "i8" + block_ident "block3" + block_number "3" diff --git a/crates/parser2/test_files/syntax/stmts/control_flow.sntn b/crates/parser2/test_files/syntax/stmts/control_flow.sntn new file mode 100644 index 00000000..15f7539f --- /dev/null +++ b/crates/parser2/test_files/syntax/stmts/control_flow.sntn @@ -0,0 +1,8 @@ +jump block0; +jump block100; +return 100.i32; +return v0; +return; +br v0 block1 block2; +br_table v0 block1 (1.i32 block2) (2.i32 block3); +br_table 1.i8 (1.i8 block2) (2.i8 block3); diff --git a/crates/parser2/test_files/syntax/stmts/stmts.snap b/crates/parser2/test_files/syntax/stmts/stmts.snap new file mode 100644 index 00000000..2ad27b81 --- /dev/null +++ b/crates/parser2/test_files/syntax/stmts/stmts.snap @@ -0,0 +1,103 @@ +--- +source: crates/parser2/tests/syntax.rs +expression: s +input_file: crates/parser2/test_files/syntax/stmts/stmts.sntn +--- +stmt "store @memory v1 v2;" + store_stmt "store @memory v1 v2" + location "@memory" + value "v1" + value_name "v1" + value "v2" + value_name "v2" +stmt "store @storage 0.i32 1000.i64;" + store_stmt "store @storage 0.i32 1000.i64" + location "@storage" + value "0.i32" + imm_number "0.i32" + decimal "0" + primitive_type "i32" + value "1000.i64" + imm_number "1000.i64" + decimal "1000" + primitive_type "i64" +stmt "v1.i64 = load @storage v0;" + define_stmt "v1.i64 = load @storage v0" + value_declaration "v1.i64" + value_name "v1" + type_name "i64" + primitive_type "i64" + expr "load @storage v0" + load_expr "load @storage v0" + location "@storage" + value "v0" + value_name "v0" +stmt "v1.i64 = load @memory 0.i32;" + define_stmt "v1.i64 = load @memory 0.i32" + value_declaration "v1.i64" + value_name "v1" + type_name "i64" + primitive_type "i64" + expr "load @memory 0.i32" + load_expr "load @memory 0.i32" + location "@memory" + value "0.i32" + imm_number "0.i32" + decimal "0" + primitive_type "i32" +stmt "v4.i64 = call %foo v0 0.i8 v1 v2 v3;" + define_stmt "v4.i64 = call %foo v0 0.i8 v1 v2 v3" + value_declaration "v4.i64" + value_name "v4" + type_name "i64" + primitive_type "i64" + expr "call %foo v0 0.i8 v1 v2 v3" + call_expr "call %foo v0 0.i8 v1 v2 v3" + function_identifier "%foo" + function_name "foo" + value "v0" + value_name "v0" + value "0.i8" + imm_number "0.i8" + decimal "0" + primitive_type "i8" + value "v1" + value_name "v1" + value "v2" + value_name "v2" + value "v3" + value_name "v3" +stmt "v4.*i64 = alloca i64;" + define_stmt "v4.*i64 = alloca i64" + value_declaration "v4.*i64" + value_name "v4" + type_name "*i64" + ptr_type "*i64" + type_name "i64" + primitive_type "i64" + expr "alloca i64" + alloca_expr "alloca i64" + type_name "i64" + primitive_type "i64" +stmt "v0.i64 = gep v0 0.i8 1.i8 1.i8;" + define_stmt "v0.i64 = gep v0 0.i8 1.i8 1.i8" + value_declaration "v0.i64" + value_name "v0" + type_name "i64" + primitive_type "i64" + expr "gep v0 0.i8 1.i8 1.i8" + gep_expr "gep v0 0.i8 1.i8 1.i8" + value "v0" + value_name "v0" + value "0.i8" + imm_number "0.i8" + decimal "0" + primitive_type "i8" + value "1.i8" + imm_number "1.i8" + decimal "1" + primitive_type "i8" + value "1.i8" + imm_number "1.i8" + decimal "1" + primitive_type "i8" diff --git a/crates/parser2/test_files/syntax/stmts/stmts.sntn b/crates/parser2/test_files/syntax/stmts/stmts.sntn new file mode 100644 index 00000000..21db8d42 --- /dev/null +++ b/crates/parser2/test_files/syntax/stmts/stmts.sntn @@ -0,0 +1,8 @@ +store @memory v1 v2; +store @storage 0.i32 1000.i64; +v1.i64 = load @storage v0; +v1.i64 = load @memory 0.i32; +v4.i64 = call %foo v0 0.i8 v1 v2 v3; +v4.*i64 = alloca i64; +v0.i64 = gep v0 0.i8 1.i8 1.i8; +v1.i8 = phi v0 (0.i8 block1) (1.i8 block2) (v5 block3) (v32 block4); diff --git a/crates/parser2/test_files/syntax/stmts/unary_op.snap b/crates/parser2/test_files/syntax/stmts/unary_op.snap new file mode 100644 index 00000000..cc3c0b2d --- /dev/null +++ b/crates/parser2/test_files/syntax/stmts/unary_op.snap @@ -0,0 +1,27 @@ +--- +source: crates/parser2/tests/syntax.rs +expression: s +input_file: crates/parser2/test_files/syntax/stmts/unary_op.sntn +--- +stmt "v1.i1 = not v0;" + define_stmt "v1.i1 = not v0" + value_declaration "v1.i1" + value_name "v1" + type_name "i1" + primitive_type "i1" + expr "not v0" + una_expr "not v0" + una_op "not" + value "v0" + value_name "v0" +stmt "v3.i8 = neg v2;" + define_stmt "v3.i8 = neg v2" + value_declaration "v3.i8" + value_name "v3" + type_name "i8" + primitive_type "i8" + expr "neg v2" + una_expr "neg v2" + una_op "neg" + value "v2" + value_name "v2" diff --git a/crates/parser2/test_files/syntax/stmts/unary_op.sntn b/crates/parser2/test_files/syntax/stmts/unary_op.sntn new file mode 100644 index 00000000..3bbbcfdc --- /dev/null +++ b/crates/parser2/test_files/syntax/stmts/unary_op.sntn @@ -0,0 +1,2 @@ +v1.i1 = not v0; +v3.i8 = neg v2; diff --git a/crates/parser2/tests/syntax.rs b/crates/parser2/tests/syntax.rs new file mode 100644 index 00000000..5af88b2d --- /dev/null +++ b/crates/parser2/tests/syntax.rs @@ -0,0 +1,153 @@ +use ariadne::{Label, Report, ReportKind, Source}; +use dir_test::{dir_test, Fixture}; +use indenter::indented; +use ir::ir_writer::ModuleWriter; +use pest::{error::InputLocation, iterators::Pairs, Parser as _}; +use sonatina_parser2::{ + ast, parse_module, + syntax::{Parser, Rule}, +}; + +use std::{ + fmt::{self, Write}, + ops::Range, +}; + +#[dir_test( + dir: "$CARGO_MANIFEST_DIR/test_files/syntax/stmts", + glob: "*.sntn" +)] +fn test_stmts(fixture: Fixture<&str>) { + test_rule(Rule::_stmts, fixture) +} + +#[dir_test( + dir: "$CARGO_MANIFEST_DIR/test_files/syntax/func", + glob: "*.sntn" +)] +fn test_func(fixture: Fixture<&str>) { + test_rule(Rule::_functions, fixture) +} + +#[dir_test( + dir: "$CARGO_MANIFEST_DIR/test_files/syntax/module", + glob: "*.sntn" +)] +fn test_module(fixture: Fixture<&str>) { + test_rule(Rule::module, fixture) +} + +#[dir_test( + dir: "$CARGO_MANIFEST_DIR/test_files/syntax/module", + glob: "*.sntn" +)] +fn test_module_ast(fixture: Fixture<&str>) { + let module = ast::parse(fixture.content()).unwrap(); + snap_test!(format!("{:#?}", module), fixture.path(), Some("ast")); +} + +#[dir_test( + dir: "$CARGO_MANIFEST_DIR/test_files/syntax/module", + glob: "*.sntn" +)] +fn test_module_ir(fixture: Fixture<&str>) { + let module = parse_module(fixture.content()).unwrap(); + let mut w = ModuleWriter::new(&module.module); + snap_test!(w.dump_string().unwrap(), fixture.path(), Some("ir")); +} + +fn test_rule(rule: Rule, fixture: Fixture<&str>) { + match Parser::parse(rule, fixture.content()) { + Ok(r) => { + let s = format!("{:?}", PairsWrapper(r)); + snap_test!(s, fixture.path()); + } + Err(err) => { + report_error(err, &fixture); + panic!(); + } + } +} + +fn location_range(loc: InputLocation) -> Range { + match loc { + InputLocation::Pos(pos) => pos..pos, + InputLocation::Span((s, e)) => s..e, + } +} + +fn report_error(err: pest::error::Error, fixture: &Fixture<&str>) { + let mut s = Vec::new(); + + Report::build(ReportKind::Error, fixture.path(), 12) + .with_code(3) + .with_message("parse error".to_string()) + .with_label( + Label::new((fixture.path(), location_range(err.location))) + .with_message(format!("{}", err.variant.message())), + ) + .finish() + .write_for_stdout((fixture.path(), Source::from(fixture.content())), &mut s) + .unwrap(); + + eprintln!("{}", std::str::from_utf8(&s).unwrap()); +} + +struct PairsWrapper<'i>(Pairs<'i, Rule>); + +impl<'i> fmt::Debug for PairsWrapper<'i> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for pair in self.0.clone() { + let rule = pair.as_rule(); + writeln!(f, "{rule:?} \"{}\"", pair.as_str(),)?; + let pairs = pair.into_inner(); + if pairs.len() > 0 { + write!(indented(f).with_str(" "), "{:?}", &PairsWrapper(pairs))?; + } + } + Ok(()) + } +} + +// xxx copied from fe test-utils +#[doc(hidden)] +pub use insta as _insta; +/// A macro to assert that a value matches a snapshot. +/// If the snapshot does not exist, it will be created in the same directory as +/// the test file. +#[macro_export] +macro_rules! snap_test { + ($value:expr, $fixture_path: expr) => { + snap_test!($value, $fixture_path, None) + }; + + ($value:expr, $fixture_path: expr, $suffix: expr) => { + let mut settings = insta::Settings::new(); + let fixture_path = ::std::path::Path::new($fixture_path); + let fixture_dir = fixture_path.parent().unwrap(); + let fixture_name = fixture_path.file_stem().unwrap().to_str().unwrap(); + + settings.set_snapshot_path(fixture_dir); + settings.set_input_file($fixture_path); + settings.set_prepend_module_to_snapshot(false); + let suffix: Option<&str> = $suffix; + let name = if let Some(suffix) = suffix { + format!("{fixture_name}.{suffix}") + } else { + fixture_name.into() + }; + settings.bind(|| { + insta::_macro_support::assert_snapshot( + name.into(), + &$value, + env!("CARGO_MANIFEST_DIR"), + fixture_name, + module_path!(), + file!(), + line!(), + stringify!($value), + ) + .unwrap() + }) + }; +} diff --git a/crates/triple/src/lib.rs b/crates/triple/src/lib.rs index 1bb7c6f9..5e0d940d 100644 --- a/crates/triple/src/lib.rs +++ b/crates/triple/src/lib.rs @@ -20,18 +20,28 @@ impl TargetTriple { pub fn parse(s: &str) -> Result { let mut triple = s.split('-'); - let arch = Architecture::parse(triple.next().ok_or(InvalidTriple::InvalidFormat(s))?)?; - let chain = Chain::parse(triple.next().ok_or(InvalidTriple::InvalidFormat(s))?)?; + let arch = Architecture::parse( + triple + .next() + .ok_or_else(|| InvalidTriple::InvalidFormat(s.to_string()))?, + )?; + let chain = Chain::parse( + triple + .next() + .ok_or_else(|| InvalidTriple::InvalidFormat(s.to_string()))?, + )?; let version = Version::parse( arch, chain, - triple.next().ok_or(InvalidTriple::InvalidFormat(s))?, + triple + .next() + .ok_or_else(|| InvalidTriple::InvalidFormat(s.to_string()))?, )?; if triple.next().is_none() { Ok(Self::new(arch, chain, version)) } else { - Err(InvalidTriple::InvalidFormat(s)) + Err(InvalidTriple::InvalidFormat(s.to_string())) } } } @@ -127,10 +137,10 @@ pub enum EvmVersion { Istanbul, London, } -#[derive(Debug, Clone, Copy, Error)] -pub enum InvalidTriple<'a> { +#[derive(Debug, Clone, Error)] +pub enum InvalidTriple { #[error("the format of triple must be `architecture-chain-version: but got `{0}`")] - InvalidFormat(&'a str), + InvalidFormat(String), #[error("given architecture is not supported")] ArchitectureNotSupported, From ef5f8ca4c16c3b43486f74b24d97b7180a68f64c Mon Sep 17 00:00:00 2001 From: Sean Billig Date: Thu, 20 Jun 2024 20:37:12 -0700 Subject: [PATCH 2/7] Parser support for struct declarations --- crates/ir/src/builder/module_builder.rs | 4 + crates/ir/src/ir_writer.rs | 4 +- crates/parser2/src/ast.rs | 128 ++++++++++++------ crates/parser2/src/lib.rs | 14 ++ crates/parser2/src/sonatina.pest | 24 ++-- .../test_files/syntax/module/simple.ast.snap | 56 ++++++++ .../test_files/syntax/module/simple.ir.snap | 4 +- .../test_files/syntax/module/simple.snap | 46 ++++++- .../test_files/syntax/module/simple.sntn | 5 +- 9 files changed, 230 insertions(+), 55 deletions(-) diff --git a/crates/ir/src/builder/module_builder.rs b/crates/ir/src/builder/module_builder.rs index c6db2dd3..c5e2668d 100644 --- a/crates/ir/src/builder/module_builder.rs +++ b/crates/ir/src/builder/module_builder.rs @@ -57,6 +57,10 @@ impl ModuleBuilder { .with_ty_store_mut(|s| s.make_struct(name, fields, packed)) } + pub fn get_struct_type(&self, name: &str) -> Option { + self.ctx.with_ty_store(|s| s.struct_type_by_name(name)) + } + pub fn declare_array_type(&mut self, elem: Type, len: usize) -> Type { self.ctx.with_ty_store_mut(|s| s.make_array(elem, len)) } diff --git a/crates/ir/src/ir_writer.rs b/crates/ir/src/ir_writer.rs index 1886c05c..c7bc5acb 100644 --- a/crates/ir/src/ir_writer.rs +++ b/crates/ir/src/ir_writer.rs @@ -261,9 +261,9 @@ impl StructData { } if self.packed { - write!(w, "}}>;") + writeln!(w, "}}>;") } else { - write!(w, "}};") + writeln!(w, "}};") } } } diff --git a/crates/parser2/src/ast.rs b/crates/parser2/src/ast.rs index 5797afda..1892f0b0 100644 --- a/crates/parser2/src/ast.rs +++ b/crates/parser2/src/ast.rs @@ -46,23 +46,14 @@ pub fn parse(input: &str) -> Result> { pub struct Module { pub target: Option, pub declared_functions: Vec, + pub struct_types: Vec, pub functions: Vec, pub comments: Vec, } impl FromSyntax for Module { fn from_syntax(node: &mut Node) -> Self { - let target = match node - .get_opt(Rule::target_triple) - .map(|p| TargetTriple::parse(p.as_str())) - { - Some(Ok(t)) => Some(t), - Some(Err(e)) => { - node.error(Error::InvalidTarget(e, node.span.clone())); - None - } - None => None, - }; + let target = node.single_opt(Rule::target_triple).flatten(); let module_comments = node.map_while(|p| { if p.as_rule() == Rule::COMMENT && p.as_str().starts_with("#!") { @@ -72,8 +63,10 @@ impl FromSyntax for Module { } }); + let mut struct_types = vec![]; let mut declared_functions = vec![]; let mut functions = vec![]; + loop { let comments = node.map_while(|p| { if p.as_rule() == Rule::COMMENT { @@ -83,7 +76,9 @@ impl FromSyntax for Module { } }); - if let Some(func) = node.single_opt(Rule::function_declaration) { + if let Some(struct_) = node.single_opt(Rule::struct_declaration) { + struct_types.push(struct_); + } else if let Some(func) = node.single_opt(Rule::function_declaration) { declared_functions.push(func); } else { match node.single_opt::(Rule::function) { @@ -98,12 +93,88 @@ impl FromSyntax for Module { Module { target, declared_functions, + struct_types, functions, comments: module_comments, } } } +impl FromSyntax for Option { + fn from_syntax(node: &mut Node) -> Self { + match TargetTriple::parse(node.txt) { + Ok(t) => Some(t), + Err(e) => { + node.error(Error::InvalidTarget(e, node.span.clone())); + None + } + } + } +} + +impl FromSyntax for SmolStr { + fn from_syntax(node: &mut Node) -> Self { + node.txt.into() + } +} + +#[derive(Debug)] +pub struct FuncDeclaration { + pub linkage: Linkage, + pub name: FunctionName, + pub params: Vec, + pub ret_type: Option, +} + +impl FromSyntax for FuncDeclaration { + fn from_syntax(node: &mut Node) -> Self { + let linkage = node + .parse_str_opt(Rule::function_linkage) + .unwrap_or(Linkage::Private); + + FuncDeclaration { + linkage, + name: node.single(Rule::function_identifier), + params: node.descend_into(Rule::function_param_type_list, |n| n.multi(Rule::type_name)), + ret_type: node.descend_into_opt(Rule::function_ret_type, |n| n.single(Rule::type_name)), + } + } +} + +#[derive(Debug)] +pub struct Struct { + pub name: StructName, + pub fields: Vec, + pub packed: bool, +} + +impl FromSyntax for Struct { + fn from_syntax(node: &mut Node) -> Self { + let name = node.single(Rule::struct_identifier); + node.descend(); + let (fields, packed) = match node.rule { + Rule::normal_field_list => (node.multi(Rule::type_name), false), + Rule::packed_field_list => (node.multi(Rule::type_name), true), + _ => unreachable!(), + }; + + Self { + name, + fields, + packed, + } + } +} + +#[derive(Debug)] +pub struct StructName(pub SmolStr); + +impl FromSyntax for StructName { + fn from_syntax(node: &mut Node) -> Self { + Self(node.single(Rule::struct_name)) + } +} + #[derive(Debug)] pub struct Func { pub signature: FuncSignature, @@ -144,26 +215,13 @@ impl FromSyntax for FuncSignature { } } +/// Doesn't include `%` prefix. #[derive(Debug)] -pub struct FuncDeclaration { - pub linkage: Linkage, - pub name: FunctionName, - pub params: Vec, - pub ret_type: Option, -} +pub struct FunctionName(pub SmolStr); -impl FromSyntax for FuncDeclaration { +impl FromSyntax for FunctionName { fn from_syntax(node: &mut Node) -> Self { - let linkage = node - .parse_str_opt(Rule::function_linkage) - .unwrap_or(Linkage::Private); - - FuncDeclaration { - linkage, - name: node.single(Rule::function_identifier), - params: node.descend_into(Rule::function_param_type_list, |n| n.multi(Rule::type_name)), - ret_type: node.descend_into_opt(Rule::function_ret_type, |n| n.single(Rule::type_name)), - } + FunctionName(node.parse_str(Rule::function_name)) } } @@ -252,6 +310,7 @@ pub enum Type { Int(IntType), Ptr(Box), Array(Box, usize), + Struct(SmolStr), Void, Error, } @@ -270,6 +329,7 @@ impl FromSyntax for Type { Type::Array(Box::new(node.single(Rule::type_name)), size) } Rule::void_type => Type::Void, + Rule::struct_identifier => Type::Struct(node.parse_str(Rule::struct_name)), _ => unreachable!(), } } @@ -340,16 +400,6 @@ impl FromSyntax for Expr { #[derive(Debug)] pub struct Call(pub FunctionName, pub Vec); -/// Doesn't include `%` prefix. -#[derive(Debug)] -pub struct FunctionName(pub SmolStr); - -impl FromSyntax for FunctionName { - fn from_syntax(node: &mut Node) -> Self { - FunctionName(node.parse_str(Rule::function_name)) - } -} - #[derive(Debug)] pub struct ValueName(pub SmolStr); diff --git a/crates/parser2/src/lib.rs b/crates/parser2/src/lib.rs index a9d87228..2246f8f6 100644 --- a/crates/parser2/src/lib.rs +++ b/crates/parser2/src/lib.rs @@ -19,6 +19,15 @@ pub fn parse_module(input: &str) -> Result> { let ctx = ModuleCtx::new(isa); let mut builder = ModuleBuilder::new(ctx); + for st in ast.struct_types { + let fields = st + .fields + .iter() + .map(|t| build_type(&mut builder, t)) + .collect::>(); + builder.declare_struct_type(&st.name.0, &fields, false); + } + for func in ast.declared_functions { let params = func .params @@ -225,6 +234,11 @@ fn build_type(builder: &mut ModuleBuilder, t: &ast::Type) -> ir::Type { builder.declare_array_type(elem, *n) } ast::Type::Void => ir::Type::Void, + ast::Type::Struct(name) => builder.get_struct_type(name).unwrap_or_else(|| { + // xxx error on undeclared struct + eprintln!("struct type not found: {name}"); + ir::Type::Void + }), ast::Type::Error => todo!(), } } diff --git a/crates/parser2/src/sonatina.pest b/crates/parser2/src/sonatina.pest index 5aa980df..94833a15 100644 --- a/crates/parser2/src/sonatina.pest +++ b/crates/parser2/src/sonatina.pest @@ -1,34 +1,42 @@ -module = { SOI ~ NEWLINE* ~ target_specifier ~ (NEWLINE+ ~ function_declaration)* ~ (NEWLINE+ ~ function)* ~ NEWLINE* ~ EOI } +module = { SOI ~ NEWLINE* ~ target_specifier ~ (NEWLINE+ ~ declaration)* ~ (NEWLINE+ ~ function)* ~ NEWLINE* ~ EOI } WHITESPACE = _{ " " | "\t" } COMMENT = { "#" ~ (!NEWLINE ~ ANY)* } +ident_start_char = { ASCII_ALPHA | "_" } +ident_body_char = { ASCII_ALPHANUMERIC | "_" } + target_specifier = _{ "target" ~ "=" ~ "\"" ~ target_triple ~ "\"" } target_triple = @{ ASCII_ALPHA* ~ "-" ~ ASCII_ALPHA* ~ "-" ~ ASCII_ALPHA* } -ident_start_char = { ASCII_ALPHA | "_" } -ident_body_char = { ASCII_ALPHANUMERIC | "_" } +declaration = _{ function_declaration | struct_declaration } +function_declaration = { "declare" ~ function_linkage? ~ function_identifier ~ function_param_type_list ~ function_ret_type? ~ ";" } +function_param_type_list = { "(" ~ (type_name ~ ",")* ~ type_name? ~ ")" } +struct_declaration = { "type" ~ struct_identifier ~ "=" ~ struct_fields ~ ";" } +struct_identifier = ${ "%" ~ struct_name } +struct_fields = _{ normal_field_list | packed_field_list } +normal_field_list = { "{" ~ type_list ~ "}" } +packed_field_list = { "<{" ~ type_list ~ "}>" } +type_list = _{ (type_name ~ ",")* ~ type_name? } +struct_name = @{ ident_start_char ~ ident_body_char* } function = { function_signature ~ function_body } _functions = _{ (NEWLINE* ~ function ~ NEWLINE*)* } function_signature = { "func" ~ function_linkage? ~ function_identifier ~ function_params ~ function_ret_type? } function_ret_type = { "->" ~ type_name } function_linkage = { "public" | "private" | "external" } -function_name = @{ ident_start_char ~ ident_body_char* } function_identifier = ${ "%" ~ function_name } +function_name = @{ ident_start_char ~ ident_body_char* } function_params = { "(" ~ (value_declaration ~ ",")* ~ value_declaration? ~ ")" } function_body = _{ "{" ~ (NEWLINE+ ~ block?)* ~ "}" } block = { block_ident ~ ":" ~ (NEWLINE+ ~ stmt)* } _stmts = _{ (stmt ~ NEWLINE+)* } -function_declaration = { "declare" ~ function_linkage? ~ function_identifier ~ function_param_type_list ~ function_ret_type? ~ ";" } -function_param_type_list = { "(" ~ (type_name ~ ",")* ~ type_name? ~ ")" } - block_ident = ${ "block" ~ block_number } block_number = { ASCII_DIGIT+ } value_name = ${ "v" ~ ASCII_DIGIT+ } -type_name = { primitive_type | ptr_type | array_type | void_type } +type_name = { primitive_type | ptr_type | array_type | void_type | struct_identifier } primitive_type = { "i8" | "i16" | "i32" | "i64" | "i128" | "i256" | "i1" } ptr_type = ${ "*" ~ type_name } array_type = !{ "[" ~ type_name ~ ";" ~ array_size ~ "]" } diff --git a/crates/parser2/test_files/syntax/module/simple.ast.snap b/crates/parser2/test_files/syntax/module/simple.ast.snap index 1eb9ee1d..38f631a5 100644 --- a/crates/parser2/test_files/syntax/module/simple.ast.snap +++ b/crates/parser2/test_files/syntax/module/simple.ast.snap @@ -34,6 +34,44 @@ Module { ), }, ], + struct_types: [ + Struct { + name: StructName( + "foo", + ), + fields: [ + Int( + I8, + ), + Int( + I16, + ), + Ptr( + Int( + I64, + ), + ), + ], + packed: false, + }, + Struct { + name: StructName( + "bar", + ), + fields: [ + Int( + I8, + ), + Array( + Int( + I8, + ), + 31, + ), + ], + packed: true, + }, + ], functions: [ Func { signature: FuncSignature { @@ -247,6 +285,24 @@ Module { 2, ), ), + ValueDeclaration( + ValueName( + "v4", + ), + Struct( + "foo", + ), + ), + ValueDeclaration( + ValueName( + "v5", + ), + Ptr( + Struct( + "foo", + ), + ), + ), ], ret_type: None, }, diff --git a/crates/parser2/test_files/syntax/module/simple.ir.snap b/crates/parser2/test_files/syntax/module/simple.ir.snap index 3e068a4d..efed5d25 100644 --- a/crates/parser2/test_files/syntax/module/simple.ir.snap +++ b/crates/parser2/test_files/syntax/module/simple.ir.snap @@ -4,6 +4,8 @@ expression: w.dump_string().unwrap() input_file: crates/parser2/test_files/syntax/module/simple.sntn --- target = evm-ethereum-london +type %foo = {i8, i16, *i64}; +type %bar = {i8, [i8; 31]}; func external %add_i8(v0.i8, v1.i8) -> i8 { } @@ -22,7 +24,7 @@ func private %foo(v0.i8) -> i8 { } -func private %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2]) -> void { +func private %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2], v4.%foo, v5.*%foo) -> void { block0: return; diff --git a/crates/parser2/test_files/syntax/module/simple.snap b/crates/parser2/test_files/syntax/module/simple.snap index c417d3d7..d13c2f6f 100644 --- a/crates/parser2/test_files/syntax/module/simple.snap +++ b/crates/parser2/test_files/syntax/module/simple.snap @@ -10,6 +10,9 @@ module "target = "evm-ethereum-london" declare external %add_i8(i8, i8) -> i8; +type %foo = { i8, i16, *i64 }; +type %bar = <{ i8, [i8; 31] }>; + func public %main() { block0: v0.i8 = call %foo 100.i8; @@ -24,7 +27,7 @@ v2.i8 = call %add_i8 v0 v1; return v2; } -func %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2]) { +func %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2], v4.%foo, v5.*%foo) { block0: return; } @@ -66,6 +69,29 @@ func %flow(v0.i64) -> i64 { function_ret_type "-> i8" type_name "i8" primitive_type "i8" + struct_declaration "type %foo = { i8, i16, *i64 };" + struct_identifier "%foo" + struct_name "foo" + normal_field_list "{ i8, i16, *i64 }" + type_name "i8" + primitive_type "i8" + type_name "i16" + primitive_type "i16" + type_name "*i64" + ptr_type "*i64" + type_name "i64" + primitive_type "i64" + struct_declaration "type %bar = <{ i8, [i8; 31] }>;" + struct_identifier "%bar" + struct_name "bar" + packed_field_list "<{ i8, [i8; 31] }>" + type_name "i8" + primitive_type "i8" + type_name "[i8; 31]" + array_type "[i8; 31]" + type_name "i8" + primitive_type "i8" + array_size "31" function "func public %main() { block0: v0.i8 = call %foo 100.i8; @@ -155,14 +181,14 @@ func %flow(v0.i64) -> i64 { return_stmt "return v2" value "v2" value_name "v2" - function "func %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2]) { + function "func %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2], v4.%foo, v5.*%foo) { block0: return; }" - function_signature "func %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2]) " + function_signature "func %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2], v4.%foo, v5.*%foo) " function_identifier "%types" function_name "types" - function_params "(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2])" + function_params "(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2], v4.%foo, v5.*%foo)" value_declaration "v0.*i8" value_name "v0" type_name "*i8" @@ -195,6 +221,18 @@ func %flow(v0.i64) -> i64 { primitive_type "i8" array_size "2" array_size "2" + value_declaration "v4.%foo" + value_name "v4" + type_name "%foo" + struct_identifier "%foo" + struct_name "foo" + value_declaration "v5.*%foo" + value_name "v5" + type_name "*%foo" + ptr_type "*%foo" + type_name "%foo" + struct_identifier "%foo" + struct_name "foo" block "block0: return;" block_ident "block0" diff --git a/crates/parser2/test_files/syntax/module/simple.sntn b/crates/parser2/test_files/syntax/module/simple.sntn index 8a4898c5..a075b522 100644 --- a/crates/parser2/test_files/syntax/module/simple.sntn +++ b/crates/parser2/test_files/syntax/module/simple.sntn @@ -5,6 +5,9 @@ target = "evm-ethereum-london" declare external %add_i8(i8, i8) -> i8; +type %foo = { i8, i16, *i64 }; +type %bar = <{ i8, [i8; 31] }>; + func public %main() { block0: v0.i8 = call %foo 100.i8; @@ -19,7 +22,7 @@ v2.i8 = call %add_i8 v0 v1; return v2; } -func %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2]) { +func %types(v0.*i8, v1.[i8; 2], v2.[*i8; 2], v3.[[i8; 2]; 2], v4.%foo, v5.*%foo) { block0: return; } From 4a652e6e4f34f78ad9a35175a8499f74105dd9c9 Mon Sep 17 00:00:00 2001 From: Sean Billig Date: Thu, 20 Jun 2024 20:37:36 -0700 Subject: [PATCH 3/7] Fix parsing of immediates greater than signed int max --- crates/parser2/src/ast.rs | 77 +++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/crates/parser2/src/ast.rs b/crates/parser2/src/ast.rs index 1892f0b0..e1ee2216 100644 --- a/crates/parser2/src/ast.rs +++ b/crates/parser2/src/ast.rs @@ -425,6 +425,32 @@ pub enum Value { Error, } +macro_rules! parse_dec { + ($node:ident, $imm:expr, $ity:ty, $uty:ty) => { + if let Ok(v) = $node + .txt + .parse::<$ity>() + .or_else(|_| $node.txt.parse::<$uty>().map(|v| v as $ity)) + { + Value::Immediate($imm(v)) + } else { + let span = $node.span.clone(); + $node.error(Error::NumberOutOfBounds(span)); + Value::Error + } + }; +} + +macro_rules! parse_hex { + ($node:ident, $imm:expr, $ity:ty) => { + if let Some(bytes) = hex_bytes($node.txt) { + Value::Immediate($imm(<$ity>::from_be_bytes(bytes))) + } else { + Value::Error + } + }; +} + impl FromSyntax for Value { fn from_syntax(node: &mut Node) -> Self { node.descend(); @@ -444,19 +470,12 @@ impl FromSyntax for Value { }; Some(Immediate::I1(b)) }), - IntType::I8 => imm_or_err(node, || Some(Immediate::I8(txt.parse().ok()?))), - IntType::I16 => { - imm_or_err(node, || Some(Immediate::I16(txt.parse().ok()?))) - } - IntType::I32 => { - imm_or_err(node, || Some(Immediate::I32(txt.parse().ok()?))) - } - IntType::I64 => { - imm_or_err(node, || Some(Immediate::I64(txt.parse().ok()?))) - } - IntType::I128 => { - imm_or_err(node, || Some(Immediate::I128(txt.parse().ok()?))) - } + IntType::I8 => parse_dec!(node, Immediate::I8, i8, u8), + IntType::I16 => parse_dec!(node, Immediate::I16, i16, u16), + IntType::I32 => parse_dec!(node, Immediate::I32, i32, u32), + IntType::I64 => parse_dec!(node, Immediate::I64, i64, u64), + IntType::I128 => parse_dec!(node, Immediate::I128, i128, u128), + IntType::I256 => { let s = txt.strip_prefix('-'); let is_negative = s.is_some(); @@ -477,33 +496,27 @@ impl FromSyntax for Value { node.error(Error::NumberOutOfBounds(node.span.clone())); Value::Error } - IntType::I8 => imm_or_err(node, || { - Some(Immediate::I8(i8::from_be_bytes(hex_bytes(txt)?))) - }), - IntType::I16 => imm_or_err(node, || { - Some(Immediate::I16(i16::from_be_bytes(hex_bytes(txt)?))) - }), - IntType::I32 => imm_or_err(node, || { - Some(Immediate::I32(i32::from_be_bytes(hex_bytes(txt)?))) - }), - IntType::I64 => imm_or_err(node, || { - Some(Immediate::I64(i64::from_be_bytes(hex_bytes(txt)?))) - }), - IntType::I128 => imm_or_err(node, || { - Some(Immediate::I128(i128::from_be_bytes(hex_bytes(txt)?))) - }), + IntType::I8 => parse_hex!(node, Immediate::I8, i8), + IntType::I16 => parse_hex!(node, Immediate::I16, i16), + IntType::I32 => parse_hex!(node, Immediate::I32, i32), + IntType::I64 => parse_hex!(node, Immediate::I64, i64), + IntType::I128 => parse_hex!(node, Immediate::I128, i128), IntType::I256 => { let s = txt.strip_prefix('-'); let is_negative = s.is_some(); txt = s.unwrap_or(txt); - imm_or_err(node, || { - let mut i256 = U256::from_big_endian(&hex_bytes::<32>(txt)?).into(); + if let Some(bytes) = hex_bytes::<32>(txt) { + let mut i256 = U256::from_big_endian(&bytes).into(); if is_negative { i256 = I256::zero().overflowing_sub(i256).0; } - Some(Immediate::I256(i256)) - }) + Value::Immediate(Immediate::I256(i256)) + } else { + let span = node.span.clone(); + node.error(Error::NumberOutOfBounds(span)); + Value::Error + } } }, _ => unreachable!(), From 33ab215dc868b21464fe2ec74e8ef804ff2070ff Mon Sep 17 00:00:00 2001 From: Sean Billig Date: Thu, 20 Jun 2024 21:14:53 -0700 Subject: [PATCH 4/7] Replace old parser with new --- Cargo.toml | 2 +- crates/filecheck/Cargo.toml | 2 +- crates/filecheck/src/lib.rs | 2 +- crates/interpreter/Cargo.toml | 2 +- crates/interpreter/src/state.rs | 2 +- crates/parser/Cargo.toml | 18 +- crates/{parser2 => parser}/src/ast.rs | 10 +- crates/parser/src/lexer.rs | 793 ------------ crates/parser/src/lib.rs | 263 +++- crates/parser/src/parser.rs | 1138 ----------------- crates/{parser2 => parser}/src/sonatina.pest | 0 crates/{parser2 => parser}/src/syntax.rs | 0 .../test_files/syntax/func/empty.snap | 0 .../test_files/syntax/func/empty.sntn | 0 .../test_files/syntax/func/simple.snap | 0 .../test_files/syntax/func/simple.sntn | 0 .../test_files/syntax/module/simple.ast.snap | 0 .../test_files/syntax/module/simple.ir.snap | 0 .../test_files/syntax/module/simple.snap | 0 .../test_files/syntax/module/simple.sntn | 0 .../test_files/syntax/stmts/bin_op.snap | 0 .../test_files/syntax/stmts/bin_op.sntn | 0 .../test_files/syntax/stmts/cast.snap | 0 .../test_files/syntax/stmts/cast.sntn | 0 .../test_files/syntax/stmts/control_flow.snap | 0 .../test_files/syntax/stmts/control_flow.sntn | 0 .../test_files/syntax/stmts/stmts.snap | 0 .../test_files/syntax/stmts/stmts.sntn | 0 .../test_files/syntax/stmts/unary_op.snap | 0 .../test_files/syntax/stmts/unary_op.sntn | 0 crates/{parser2 => parser}/tests/syntax.rs | 35 +- crates/parser2/Cargo.toml | 33 - crates/parser2/src/lib.rs | 244 ---- 33 files changed, 273 insertions(+), 2271 deletions(-) rename crates/{parser2 => parser}/src/ast.rs (98%) delete mode 100644 crates/parser/src/lexer.rs delete mode 100644 crates/parser/src/parser.rs rename crates/{parser2 => parser}/src/sonatina.pest (100%) rename crates/{parser2 => parser}/src/syntax.rs (100%) rename crates/{parser2 => parser}/test_files/syntax/func/empty.snap (100%) rename crates/{parser2 => parser}/test_files/syntax/func/empty.sntn (100%) rename crates/{parser2 => parser}/test_files/syntax/func/simple.snap (100%) rename crates/{parser2 => parser}/test_files/syntax/func/simple.sntn (100%) rename crates/{parser2 => parser}/test_files/syntax/module/simple.ast.snap (100%) rename crates/{parser2 => parser}/test_files/syntax/module/simple.ir.snap (100%) rename crates/{parser2 => parser}/test_files/syntax/module/simple.snap (100%) rename crates/{parser2 => parser}/test_files/syntax/module/simple.sntn (100%) rename crates/{parser2 => parser}/test_files/syntax/stmts/bin_op.snap (100%) rename crates/{parser2 => parser}/test_files/syntax/stmts/bin_op.sntn (100%) rename crates/{parser2 => parser}/test_files/syntax/stmts/cast.snap (100%) rename crates/{parser2 => parser}/test_files/syntax/stmts/cast.sntn (100%) rename crates/{parser2 => parser}/test_files/syntax/stmts/control_flow.snap (100%) rename crates/{parser2 => parser}/test_files/syntax/stmts/control_flow.sntn (100%) rename crates/{parser2 => parser}/test_files/syntax/stmts/stmts.snap (100%) rename crates/{parser2 => parser}/test_files/syntax/stmts/stmts.sntn (100%) rename crates/{parser2 => parser}/test_files/syntax/stmts/unary_op.snap (100%) rename crates/{parser2 => parser}/test_files/syntax/stmts/unary_op.sntn (100%) rename crates/{parser2 => parser}/tests/syntax.rs (80%) delete mode 100644 crates/parser2/Cargo.toml delete mode 100644 crates/parser2/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index bc21ff68..cf02c73c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ members = [ "crates/ir", "crates/codegen", "crates/object", - "crates/parser2", + "crates/parser", "crates/filecheck", "crates/triple", "crates/interpreter", diff --git a/crates/filecheck/Cargo.toml b/crates/filecheck/Cargo.toml index d2e398d4..b4c751b9 100644 --- a/crates/filecheck/Cargo.toml +++ b/crates/filecheck/Cargo.toml @@ -12,6 +12,6 @@ publish = false filecheck = "0.5.0" # { path = "/Users/sean/src/filecheck" } sonatina-ir = { path = "../ir" } sonatina-codegen = { path = "../codegen" } -sonatina-parser2 = { path = "../parser2" } +sonatina-parser = { path = "../parser" } termcolor = "1.1.2" walkdir = "2" diff --git a/crates/filecheck/src/lib.rs b/crates/filecheck/src/lib.rs index 29aac49c..c18a31ee 100644 --- a/crates/filecheck/src/lib.rs +++ b/crates/filecheck/src/lib.rs @@ -13,7 +13,7 @@ use std::{ use sonatina_ir::{ir_writer::FuncWriter, module::FuncRef, Function}; -use sonatina_parser2::{parse_module, ParsedModule}; +use sonatina_parser::{parse_module, ParsedModule}; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use walkdir::WalkDir; diff --git a/crates/interpreter/Cargo.toml b/crates/interpreter/Cargo.toml index 62254c3f..f679d9ab 100644 --- a/crates/interpreter/Cargo.toml +++ b/crates/interpreter/Cargo.toml @@ -19,4 +19,4 @@ cranelift-entity = "0.104" sonatina-ir = { path = "../ir", version = "0.0.3-alpha" } [dev-dependencies] -sonatina-parser2 = { path = "../parser2" } +sonatina-parser = { path = "../parser" } diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs index 4541b640..63d36034 100644 --- a/crates/interpreter/src/state.rs +++ b/crates/interpreter/src/state.rs @@ -273,7 +273,7 @@ mod test { use super::*; fn parse_module(input: &str) -> Module { - match sonatina_parser2::parse_module(input) { + match sonatina_parser::parse_module(input) { Ok(pm) => pm.module, Err(errs) => { for err in errs { diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml index 73758117..fa9954c1 100644 --- a/crates/parser/Cargo.toml +++ b/crates/parser/Cargo.toml @@ -11,10 +11,22 @@ description = "Parser for sonatina-ir text format" categories = ["compilers", "parser", "wasm"] keywords = ["compiler", "evm", "wasm", "smart-contract"] -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] -sonatina-ir = { path = "../ir", version = "0.0.3-alpha" } +ir = { package = "sonatina-ir", path = "../ir", version = "0.0.3-alpha" } sonatina-triple = { path = "../triple", version = "0.0.3-alpha" } smallvec = "1.7.0" cranelift-entity = "0.104" +pest = "2.7.10" +pest_derive = "2.7.10" +pest-ast = "0.3.4" +from-pest = "0.3.2" +smol_str = "0.2.2" +hex = "0.4.3" +num-traits = { version = "0.2.19", default-features = false } +either = { version = "1.12.0", default-features = false } +annotate-snippets = "0.11.4" + +[dev-dependencies] +dir-test = { git = "https://github.com/sbillig/dir-test", rev = "c4115dd" } +insta = { version = "1.38.0" } +indenter = "0.3.3" diff --git a/crates/parser2/src/ast.rs b/crates/parser/src/ast.rs similarity index 98% rename from crates/parser2/src/ast.rs rename to crates/parser/src/ast.rs index e1ee2216..27d71956 100644 --- a/crates/parser2/src/ast.rs +++ b/crates/parser/src/ast.rs @@ -1,4 +1,4 @@ -use super::syntax::Node; +use super::{syntax::Node, Error}; use crate::syntax::{FromSyntax, Parser, Rule}; use annotate_snippets::{Level, Renderer, Snippet}; use either::Either; @@ -13,14 +13,6 @@ use smol_str::SmolStr; pub use sonatina_triple::{InvalidTriple, TargetTriple}; use std::{io, ops::Range, str::FromStr}; -#[derive(Debug)] -#[allow(clippy::large_enum_variant)] -pub enum Error { - NumberOutOfBounds(Range), - InvalidTarget(InvalidTriple, Range), - SyntaxError(pest::error::Error), -} - pub fn parse(input: &str) -> Result> { pest::set_error_detail(true); // xxx diff --git a/crates/parser/src/lexer.rs b/crates/parser/src/lexer.rs deleted file mode 100644 index e2f09d82..00000000 --- a/crates/parser/src/lexer.rs +++ /dev/null @@ -1,793 +0,0 @@ -use std::fmt; - -use sonatina_ir::{insn::DataLocationKind, Linkage, Type}; - -use super::{Error, ErrorKind, Result}; - -pub(super) struct Lexer<'a> { - input: &'a [u8], - peek: Option>>, - cur: usize, - line: u32, -} - -macro_rules! try_eat_variant { - ( - $self:ident, - ($first_name:expr, $first_code:expr), - $(($name:expr, $code:expr),)* - ) => { - if $self.eat_string_if($first_name).is_some() { - Some($first_code) - } $(else if $self.eat_string_if($name).is_some() { - Some($code) - })* else { - None - } - } -} - -impl<'a> Lexer<'a> { - pub(super) fn new(input: &'a str) -> Self { - debug_assert!(input.is_ascii()); - - Self { - input: input.as_bytes(), - peek: None, - cur: 0, - line: 1, - } - } - - pub(super) fn next_token(&mut self) -> Result>>> { - self.peek_token()?; - Ok(self.peek.take()) - } - - pub(super) fn peek_token(&mut self) -> Result>>> { - if self.peek.is_some() { - return Ok(self.peek.as_ref()); - } - - while let Some(c) = self.eat_char_if(|c| c.is_whitespace() || c.is_ascii_control()) { - if c == '\n' { - self.line += 1; - } - } - - if self.peek_char().is_none() { - return Ok(None); - } - - let token = if self.eat_char_if(|c| c == ':').is_some() { - Token::Colon - } else if self.eat_char_if(|c| c == ';').is_some() { - Token::SemiColon - } else if self.eat_char_if(|c| c == ',').is_some() { - Token::Comma - } else if self.eat_char_if(|c| c == '(').is_some() { - Token::LParen - } else if self.eat_char_if(|c| c == ')').is_some() { - Token::RParen - } else if self.eat_char_if(|c| c == '[').is_some() { - Token::LBracket - } else if self.eat_char_if(|c| c == ']').is_some() { - Token::RBracket - } else if self.eat_char_if(|c| c == '{').is_some() { - Token::LBrace - } else if self.eat_char_if(|c| c == '}').is_some() { - Token::RBrace - } else if self.eat_char_if(|c| c == '<').is_some() { - Token::LAngleBracket - } else if self.eat_char_if(|c| c == '>').is_some() { - Token::RAngleBracket - } else if self.eat_char_if(|c| c == '=').is_some() { - Token::Eq - } else if self.eat_char_if(|c| c == '.').is_some() { - Token::Dot - } else if self.eat_char_if(|c| c == '*').is_some() { - Token::Star - } else if self.eat_char_if(|c| c == '@').is_some() { - let loc = if self.eat_string_if(b"memory").is_some() { - DataLocationKind::Memory - } else if self.eat_string_if(b"storage").is_some() { - DataLocationKind::Storage - } else { - return Err(self.invalid_token()); - }; - Token::DataLocationKind(loc) - } else if self.eat_char_if(|c| c == '#').is_some() { - let is_module = self.eat_char_if(|c| c == '!').is_some(); - let start = self.cur; - while self.eat_char_if(|c| c != '\n').is_some() {} - let end = self.cur; - let comment = self.str_slice(start, end); - if is_module { - Token::ModuleComment(comment) - } else { - Token::FuncComment(comment) - } - } else if self.eat_char_if(|c| c == '%').is_some() { - if let Some(ident) = self.try_eat_ident() { - Token::Ident(ident) - } else { - return Err(self.invalid_token()); - } - } else if self.eat_char_if(|c| c == '"').is_some() { - self.eat_string_lit()? - } else if self.eat_string_if(b"target").is_some() { - Token::Target - } else if self.eat_string_if(b"func").is_some() { - Token::Func - } else if self.eat_string_if(b"gv").is_some() { - Token::Gv - } else if self.eat_string_if(b"const").is_some() { - Token::Const - } else if self.eat_string_if(b"declare").is_some() { - Token::Declare - } else if self.eat_string_if(b"public").is_some() { - Token::Linkage(Linkage::Public) - } else if self.eat_string_if(b"private").is_some() { - Token::Linkage(Linkage::Private) - } else if self.eat_string_if(b"external").is_some() { - Token::Linkage(Linkage::External) - } else if self.eat_string_if(b"undef").is_some() { - Token::Undef - } else if self.eat_string_if(b"type").is_some() { - Token::Type - } else if self.eat_string_if(b"->").is_some() { - Token::RArrow - } else if let Some(code) = self.try_eat_opcode() { - Token::OpCode(code) - } else if let Some(ty) = self.try_eat_base_ty() { - Token::BaseTy(ty) - } else if self.eat_string_if(b"block").is_some() { - if let Some(id) = self.try_eat_id() { - Token::Block(id) - } else { - return Err(self.invalid_token()); - } - } else if self.eat_string_if(b"v").is_some() { - if let Some(id) = self.try_eat_id() { - Token::Value(id) - } else { - return Err(self.invalid_token()); - } - } else if let Some(integer) = self.try_eat_integer() { - Token::Integer(integer) - } else { - return Err(self.invalid_token()); - }; - - self.peek = Some(WithLoc { - item: token, - line: self.line, - }); - Ok(self.peek.as_ref()) - } - - pub(super) fn line(&mut self) -> u32 { - self.line - } - - fn eat_char_if(&mut self, f: impl FnOnce(char) -> bool) -> Option { - match self.peek_char() { - Some(peek) if f(peek) => { - self.next_char(); - Some(peek) - } - _ => None, - } - } - - fn eat_string_if(&mut self, s: &[u8]) -> Option<&'a str> { - let start = self.cur; - let mut cur = self.cur; - for i in s { - if *i == self.input[cur] { - cur += 1; - } else { - return None; - } - } - - self.cur = cur; - Some(self.str_slice(start, cur)) - } - - fn eat_string_lit(&mut self) -> Result> { - let start = self.cur; - let mut cur = self.cur; - loop { - match self.input.get(cur) { - Some(c) => { - if *c == b'"' { - self.cur = cur + 1; - break; - } else { - cur += 1; - } - } - None => { - return Err(Error::new( - ErrorKind::SyntaxError("missing closing `\"`".into()), - self.line, - )) - } - } - } - - Ok(Token::String(self.str_slice(start, cur))) - } - - fn try_eat_opcode(&mut self) -> Option { - try_eat_variant! { - self, - (b"gep", Code::Gep), - (b"not", Code::Not), - (b"neg", Code::Neg), - (b"add", Code::Add), - (b"sub", Code::Sub), - (b"mul", Code::Mul), - (b"udiv", Code::Udiv), - (b"sdiv", Code::Sdiv), - (b"lt", Code::Lt), - (b"gt", Code::Gt), - (b"slt", Code::Slt), - (b"sgt", Code::Sgt), - (b"le", Code::Le), - (b"ge", Code::Ge), - (b"sle", Code::Sle), - (b"sge", Code::Sge), - (b"eq", Code::Eq), - (b"ne", Code::Ne), - (b"and", Code::And), - (b"or", Code::Or), - (b"xor", Code::Xor), - (b"sext", Code::Sext), - (b"zext", Code::Zext), - (b"bitcast", Code::BitCast), - (b"trunc", Code::Trunc), - (b"load", Code::Load), - (b"store", Code::Store), - (b"call", Code::Call), - (b"jump", Code::Jump), - (b"br_table", Code::BrTable), - (b"br", Code::Br), - (b"alloca", Code::Alloca), - (b"return", Code::Return), - (b"phi", Code::Phi), - } - } - - fn try_eat_base_ty(&mut self) -> Option { - try_eat_variant! { - self, - (b"i8", Type::I8), - (b"i16", Type::I16), - (b"i32", Type::I32), - (b"i64", Type::I64), - (b"i128", Type::I128), - (b"i256", Type::I256), - (b"i1", Type::I1), - (b"void", Type::Void), - } - } - - fn try_eat_id(&mut self) -> Option { - let start = self.cur; - while self.eat_char_if(|c| c.is_ascii_digit()).is_some() {} - let end = self.cur; - self.str_slice(start, end).parse().ok() - } - - fn try_eat_ident(&mut self) -> Option<&'a str> { - let start = self.cur; - while self - .eat_char_if(|c| c.is_alphanumeric() || c == '_') - .is_some() - {} - let end = self.cur; - if start == end { - None - } else { - Some(self.str_slice(start, end)) - } - } - - fn try_eat_integer(&mut self) -> Option<&'a str> { - let start = self.cur; - self.eat_char_if(|c| c == '-'); - while self.eat_char_if(|c| c.is_ascii_digit()).is_some() {} - let end = self.cur; - - if start == end { - None - } else { - Some(self.str_slice(start, end)) - } - } - - fn next_char(&mut self) -> Option { - let next = self.peek_char(); - self.cur += 1; - next - } - - fn peek_char(&mut self) -> Option { - self.input.get(self.cur).map(|peek| *peek as char) - } - - fn str_slice(&self, start: usize, end: usize) -> &'a str { - unsafe { std::str::from_utf8_unchecked(&self.input[start..end]) } - } - - fn invalid_token(&mut self) -> Error { - let start = self.cur; - while self - .eat_char_if(|c| !c.is_whitespace() && !c.is_ascii_control()) - .is_some() - {} - let end = self.cur; - let invalid_token = self.str_slice(start, end); - Error::new( - ErrorKind::InvalidToken(invalid_token.to_string()), - self.line, - ) - } -} - -#[derive(Debug, Clone)] -pub(super) struct WithLoc { - pub(super) item: T, - pub(super) line: u32, -} - -#[derive(Debug, Clone)] -pub(super) enum Token<'a> { - Func, - Gv, - Const, - Declare, - Linkage(Linkage), - RArrow, - Colon, - SemiColon, - Comma, - LParen, - RParen, - LBracket, - RBracket, - LBrace, - RBrace, - LAngleBracket, - RAngleBracket, - Eq, - Dot, - Star, - Undef, - Type, - Target, - ModuleComment(&'a str), - FuncComment(&'a str), - Block(u32), - Value(u32), - Ident(&'a str), - String(&'a str), - DataLocationKind(DataLocationKind), - OpCode(Code), - BaseTy(Type), - Integer(&'a str), -} - -impl<'a> Token<'a> { - pub(super) fn id(&self) -> u32 { - match self { - Self::Block(id) | Self::Value(id) => *id, - _ => unreachable!(), - } - } - - pub(super) fn string(&self) -> &'a str { - match self { - Self::ModuleComment(s) - | Self::FuncComment(s) - | Self::Ident(s) - | Self::Integer(s) - | Self::String(s) => s, - _ => unreachable!(), - } - } - - pub(super) fn opcode(&self) -> Code { - if let Self::OpCode(code) = self { - *code - } else { - unreachable!() - } - } - - pub(super) fn ty(&self) -> Type { - if let Self::BaseTy(ty) = self { - *ty - } else { - unreachable!() - } - } -} - -impl<'a> fmt::Display for Token<'a> { - fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { - match self { - Self::Func => write!(w, "func"), - Self::Gv => write!(w, "gv"), - Self::Const => write!(w, "const"), - Self::Declare => write!(w, "declare"), - Self::Linkage(linkage) => write!(w, "{}", linkage), - Self::RArrow => write!(w, "=>"), - Self::Colon => write!(w, ":"), - Self::SemiColon => write!(w, ";"), - Self::Comma => write!(w, ","), - Self::LParen => write!(w, "("), - Self::RParen => write!(w, ")"), - Self::LBrace => write!(w, "{{"), - Self::RBrace => write!(w, "}}"), - Self::LBracket => write!(w, "["), - Self::RBracket => write!(w, "]"), - Self::LAngleBracket => write!(w, "<"), - Self::RAngleBracket => write!(w, ">"), - Self::Eq => write!(w, "="), - Self::DataLocationKind(loc) => { - write!(w, "@")?; - - match loc { - DataLocationKind::Memory => write!(w, "memory"), - DataLocationKind::Storage => write!(w, "storage"), - } - } - Self::Dot => write!(w, "."), - Self::Star => write!(w, "*"), - Self::Undef => write!(w, "undef"), - Self::Type => write!(w, "type"), - Self::Target => write!(w, "target"), - Self::String(s) => write!(w, "{}", s), - Self::ModuleComment(comment) => write!(w, "#!{}", comment), - Self::FuncComment(comment) => write!(w, "#{}", comment), - Self::Block(id) => write!(w, "block{}", id), - Self::Value(id) => write!(w, "v{}", id), - Self::Ident(ident) => write!(w, "%{}", ident), - Self::OpCode(code) => write!(w, "{}", code), - Self::BaseTy(_) => write!(w, "type"), - Self::Integer(num) => w.write_str(num), - } - } -} - -#[derive(Debug, Clone, Copy)] -pub(super) enum Code { - // Unary ops. - Not, - Neg, - - // Binary ops. - Add, - Sub, - Mul, - Udiv, - Sdiv, - Lt, - Gt, - Slt, - Sgt, - Le, - Ge, - Sle, - Sge, - Eq, - Ne, - And, - Or, - Xor, - - // Cast ops. - Sext, - Zext, - Trunc, - BitCast, - - Load, - Store, - - // Function Call ops. - Call, - - // Jump ops. - Jump, - - // Branch ops. - Br, - BrTable, - - Gep, - - Alloca, - - Return, - - Phi, -} - -impl fmt::Display for Code { - fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { - use Code::*; - - let s = match self { - Not => "not", - Neg => "neg", - Add => "add", - Sub => "sub", - Mul => "mul", - Udiv => "udiv", - Sdiv => "sdiv", - BitCast => "bitcast", - Lt => "lt", - Gt => "gt", - Slt => "slt", - Sgt => "sgt", - Le => "le", - Ge => "ge", - Sle => "sle", - Sge => "sge", - Eq => "eq", - Ne => "ne", - And => "and", - Or => "or", - Xor => "xor", - Sext => "sext", - Zext => "zext", - Trunc => "trunc", - Load => "load", - Store => "store", - Call => "call", - Jump => "jump", - Gep => "gep", - Alloca => "alloca", - Br => "br", - BrTable => "br_table", - Return => "return", - Phi => "phi", - }; - - w.write_str(s) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn lexer_with_return() { - let input = "func private %test_func() -> i32, i64: - block0: - return 311.i32 -120.i64;"; - let mut lexer = Lexer::new(input); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Func - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Linkage(Linkage::Private), - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Ident("test_func") - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::LParen - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::RParen - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::RArrow - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I32) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Comma - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I64) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Colon - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Block(0) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Colon - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::OpCode(Code::Return) - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Integer("311") - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Dot - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I32) - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Integer("-120") - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Dot - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I64) - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::SemiColon - )); - - assert!(lexer.next_token().unwrap().is_none()); - } - - #[test] - fn lexer_with_arg() { - let input = "func public %test_func(i32, i64): - block0: - v2.i64 = sext v0; - v3.i64 = mul v2 v1; - return; -"; - let mut lexer = Lexer::new(input); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Func - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Linkage(Linkage::Public) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Ident("test_func") - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::LParen - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I32) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Comma - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I64) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::RParen - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Colon - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Block(0) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Colon - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Value(2) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Dot - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I64) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Eq - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::OpCode(Code::Sext) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Value(0) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::SemiColon - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Value(3) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Dot - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I64) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Eq - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::OpCode(Code::Mul) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Value(2) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Value(1) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::SemiColon - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::OpCode(Code::Return) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::SemiColon - )); - - assert!(lexer.next_token().unwrap().is_none()); - } -} diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 2edd94fc..ba13ba2d 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -1,27 +1,256 @@ -//! This crate provides a parser for sonatina-IR text format. -//! The text format is mainly used for debugging and testing. +use std::ops::Range; -pub mod parser; +use ast::ValueDeclaration; +use cranelift_entity::SecondaryMap; +use ir::{ + self, + builder::{FunctionBuilder, ModuleBuilder}, + func_cursor::{CursorLocation, FuncCursor, InsnInserter}, + isa::IsaBuilder, + module::{FuncRef, ModuleCtx}, + Module, Signature, +}; +use sonatina_triple::InvalidTriple; +use syntax::Rule; -mod lexer; - -pub type Result = std::result::Result; +pub mod ast; +pub mod syntax; #[derive(Debug)] -pub struct Error { - pub kind: ErrorKind, - pub line: u32, +#[allow(clippy::large_enum_variant)] +pub enum Error { + NumberOutOfBounds(Range), + InvalidTarget(InvalidTriple, Range), + SyntaxError(pest::error::Error), } -impl Error { - pub fn new(kind: ErrorKind, line: u32) -> Self { - Self { kind, line } +pub fn parse_module(input: &str) -> Result> { + let ast = ast::parse(input)?; + + let isa = IsaBuilder::new(ast.target.unwrap()).build(); // xxx + let ctx = ModuleCtx::new(isa); + let mut builder = ModuleBuilder::new(ctx); + + for st in ast.struct_types { + let fields = st + .fields + .iter() + .map(|t| build_type(&mut builder, t)) + .collect::>(); + builder.declare_struct_type(&st.name.0, &fields, false); + } + + for func in ast.declared_functions { + let params = func + .params + .iter() + .map(|t| build_type(&mut builder, t)) + .collect::>(); + let ret_ty = func + .ret_type + .as_ref() + .map(|t| build_type(&mut builder, t)) + .unwrap_or(ir::Type::Void); + + let sig = Signature::new(&func.name.0, func.linkage, ¶ms, ret_ty); + builder.declare_function(sig); + } + + for func in ast.functions.iter() { + let sig = &func.signature; + let args = sig + .params + .iter() + .map(|decl| build_type(&mut builder, &decl.1)) + .collect::>(); + + let ret_ty = sig + .ret_type + .as_ref() + .map(|t| build_type(&mut builder, t)) + .unwrap_or(ir::Type::Void); + let sig = Signature::new(&sig.name.0, sig.linkage, &args, ret_ty); + + builder.declare_function(sig); + } + + let mut func_comments = SecondaryMap::default(); + + for func in ast.functions { + let id = builder.get_func_ref(&func.signature.name.0).unwrap(); + let mut fb = builder.build_function(id); + build_func(&mut fb, &func); + fb.seal_all(); + builder = fb.finish(); + + func_comments[id] = func.comments; } + + let module = builder.build(); + Ok(ParsedModule { + module, + module_comments: ast.comments, + func_comments, + }) } -#[derive(Debug)] -pub enum ErrorKind { - InvalidToken(String), - SyntaxError(String), - SemanticError(String), +pub struct ParsedModule { + pub module: Module, + pub module_comments: Vec, + pub func_comments: SecondaryMap>, +} + +fn build_func(builder: &mut FunctionBuilder, func: &ast::Func) { + for (i, ValueDeclaration(name, _ty)) in func.signature.params.iter().enumerate() { + builder.name_value(builder.func.arg_values[i], &name.0); + } + + // "forward declare" all block ids + if let Some(max_block_id) = func.blocks.iter().map(|b| b.id.0.unwrap()).max() { + while builder.func.dfg.blocks.len() <= max_block_id as usize { + builder.cursor.make_block(&mut builder.func); + } + } + + for block in &func.blocks { + let block_id = ir::Block(block.id.0.unwrap()); + builder.cursor.append_block(&mut builder.func, block_id); + builder + .cursor + .set_location(CursorLocation::BlockTop(block_id)); + + for stmt in &block.stmts { + match &stmt.kind { + ast::StmtKind::Define(ValueDeclaration(val, ty), expr) => { + let ty = build_type(&mut builder.module_builder, ty); + + let result_val = match expr { + ast::Expr::Binary(op, lhs, rhs) => { + let lhs = build_value(builder, lhs); + let rhs = build_value(builder, rhs); + builder.binary_op(*op, lhs, rhs) + } + ast::Expr::Unary(op, val) => { + let val = build_value(builder, val); + builder.unary_op(*op, val) + } + ast::Expr::Cast(op, val) => { + let val = build_value(builder, val); + builder.cast_op(*op, val, ty) + } + ast::Expr::Load(location, addr) => { + let addr = build_value(builder, addr); + match location { + ir::DataLocationKind::Memory => builder.memory_load(addr), + ir::DataLocationKind::Storage => builder.storage_load(addr), + } + } + ast::Expr::Alloca(ty) => { + let ty = build_type(&mut builder.module_builder, ty); + builder.alloca(ty) + } + ast::Expr::Call(ast::Call(name, args)) => { + let func_ref = builder.module_builder.get_func_ref(&name.0).unwrap(); + let args = args + .iter() + .map(|val| build_value(builder, val)) + .collect::>(); + builder.call(func_ref, &args).unwrap() + } + ast::Expr::Gep(vals) => { + let vals = vals + .iter() + .map(|val| build_value(builder, val)) + .collect::>(); + builder.gep(&vals).unwrap() + } + ast::Expr::Phi(vals) => { + let args = vals + .iter() + .map(|(val, block)| { + // xxx declare block + let b = ir::Block(block.0.unwrap()); + let v = build_value(builder, val); + (v, b) + }) + .collect::>(); + builder.phi(ty, &args) + } + }; + builder.name_value(result_val, &val.0) + } + ast::StmtKind::Store(loc, addr, val) => { + let addr = build_value(builder, addr); + let val = build_value(builder, val); + + match loc { + ir::DataLocationKind::Memory => builder.memory_store(addr, val), + ir::DataLocationKind::Storage => builder.storage_store(addr, val), + } + } + ast::StmtKind::Return(val) => { + let val = val.as_ref().map(|v| build_value(builder, v)); + builder.ret(val); + } + ast::StmtKind::Jump(block_id) => { + let block_id = ir::Block(block_id.0.unwrap()); + builder.jump(block_id); + } + ast::StmtKind::Branch(cond, true_block, false_block) => { + let cond = build_value(builder, cond); + let true_block = ir::Block(true_block.0.unwrap()); + let false_block = ir::Block(false_block.0.unwrap()); + builder.br(cond, true_block, false_block); + } + ast::StmtKind::BranchTable(index, default_block, table) => { + let index = build_value(builder, index); + let default_block = default_block.as_ref().map(|b| ir::Block(b.0.unwrap())); + let table = table + .iter() + .map(|(val, block)| { + (build_value(builder, val), ir::Block(block.0.unwrap())) + }) + .collect::>(); + builder.br_table(index, default_block, &table); + } + ast::StmtKind::Call(ast::Call(name, args)) => { + let func_ref = builder.module_builder.get_func_ref(&name.0).unwrap(); + let args = args + .iter() + .map(|val| build_value(builder, val)) + .collect::>(); + builder.call(func_ref, &args).unwrap(); + } + } + } + } +} + +fn build_value(builder: &mut FunctionBuilder, val: &ast::Value) -> ir::Value { + match val { + ast::Value::Immediate(imm) => builder.make_imm_value(*imm), + ast::Value::Named(v) => builder.get_named_value(&v.0), + ast::Value::Error => unreachable!(), + } +} + +fn build_type(builder: &mut ModuleBuilder, t: &ast::Type) -> ir::Type { + match t { + ast::Type::Int(i) => (*i).into(), + ast::Type::Ptr(t) => { + let t = build_type(builder, t); + builder.ptr_type(t) + } + ast::Type::Array(t, n) => { + let elem = build_type(builder, t); + builder.declare_array_type(elem, *n) + } + ast::Type::Void => ir::Type::Void, + ast::Type::Struct(name) => builder.get_struct_type(name).unwrap_or_else(|| { + // xxx error on undeclared struct + eprintln!("struct type not found: {name}"); + ir::Type::Void + }), + ast::Type::Error => todo!(), + } } diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs deleted file mode 100644 index 6365ee63..00000000 --- a/crates/parser/src/parser.rs +++ /dev/null @@ -1,1138 +0,0 @@ -// TODO: Refactor and refactor and refactor!!! -use std::collections::HashSet; - -use cranelift_entity::SecondaryMap; -use smallvec::smallvec; - -use sonatina_ir::{ - builder::ModuleBuilder, - func_cursor::{CursorLocation, FuncCursor}, - global_variable::ConstantValue, - insn::{BinaryOp, CastOp, DataLocationKind, UnaryOp}, - isa::IsaBuilder, - module::{FuncRef, ModuleCtx}, - Block, BlockData, Function, GlobalVariableData, Immediate, Insn, InsnData, Linkage, Module, - Signature, Type, Value, ValueData, I256, U256, -}; -use sonatina_triple::TargetTriple; - -use super::{ - lexer::{Code, Lexer, Token, WithLoc}, - Error, ErrorKind, Result, -}; - -#[derive(Default)] -pub struct Parser {} - -macro_rules! eat_token { - ($lexer:expr, $token:pat) => { - if matches!($lexer.peek_token()?, Some(WithLoc { item: $token, .. })) { - Ok(Some($lexer.next_token()?.unwrap().item)) - } else { - Ok(None) - } - }; -} - -macro_rules! expect_token { - ($lexer:expr, $token:pat, $expected:expr) => { - if let Some(tok) = eat_token!($lexer, $token)? { - Ok(tok) - } else { - let (tok, line) = match $lexer.next_token()? { - Some(tok) => ((tok.item.to_string(), tok.line)), - None => (("EOF".to_string(), $lexer.line())), - }; - Err(Error::new( - ErrorKind::SyntaxError(format!("expected `{}`, but got `{}`", $expected, tok)), - line, - )) - } - }; -} - -impl Parser { - pub fn parse(self, input: &str) -> Result { - let mut lexer = Lexer::new(input); - - // Parse comments. - let mut module_comments = Vec::new(); - while let Some(WithLoc { - item: Token::ModuleComment(comment), - .. - }) = lexer.peek_token()? - { - module_comments.push(comment.to_string()); - lexer.next_token()?; - } - - // Parse target triple. - let triple = self.parse_target_triple(&mut lexer)?; - let isa = IsaBuilder::new(triple).build(); - let ctx = ModuleCtx::new(isa); - - let mut module_builder = ModuleBuilder::new(ctx); - - // Parse declared struct types. - while eat_token!(lexer, Token::Type)?.is_some() { - let name = expect_token!(lexer, Token::Ident(_), "type name")?.string(); - expect_token!(lexer, Token::Eq, "=")?; - let packed = eat_token!(lexer, Token::LAngleBracket)?.is_some(); - expect_token!(lexer, Token::LBrace, "{")?; - - let mut fields = vec![]; - if eat_token!(lexer, Token::RBrace)?.is_none() { - loop { - let ty = expect_ty(&module_builder.ctx, &mut lexer)?; - fields.push(ty); - if eat_token!(lexer, Token::RBrace)?.is_some() { - break; - } - expect_token!(lexer, Token::Comma, ",")?; - } - } - if packed { - expect_token!(lexer, Token::RAngleBracket, ">")?; - } - expect_token!(lexer, Token::SemiColon, ";")?; - - module_builder.declare_struct_type(name, &fields, packed); - } - - // Parse global variables. - while eat_token!(lexer, Token::Gv)?.is_some() { - let linkage = expect_linkage(&mut lexer)?; - let is_const = eat_token!(lexer, Token::Const)?.is_some(); - let symbol = expect_token!(lexer, Token::Ident(_), "global variable name")?.string(); - expect_token!(lexer, Token::Colon, ":")?; - let ty = expect_ty(&module_builder.ctx, &mut lexer)?; - - let init = eat_token!(lexer, Token::Eq)? - .map(|_| { - let init = expect_constant(&module_builder.ctx, &mut lexer, ty)?; - Ok(init) - }) - .transpose()?; - - expect_token!(lexer, Token::SemiColon, ";")?; - let gv_data = GlobalVariableData::new(symbol.to_string(), ty, linkage, is_const, init); - module_builder.make_global(gv_data); - } - - // Parse declared functions. - while eat_token!(lexer, Token::Declare)?.is_some() { - let sig = self.parse_declared_func_sig(&module_builder.ctx, &mut lexer)?; - expect_token!(lexer, Token::SemiColon, ";")?; - module_builder.declare_function(sig); - } - - // Parse functions. - let mut func_comments = SecondaryMap::default(); - while let Some(parsed_func) = FuncParser::new(&mut lexer, &mut module_builder).parse()? { - let func_ref = parsed_func.func_ref; - func_comments[func_ref] = parsed_func.comments; - } - - Ok(ParsedModule { - module: module_builder.build(), - module_comments, - func_comments, - }) - } - - fn parse_target_triple(&self, lexer: &mut Lexer) -> Result { - expect_token!(lexer, Token::Target, "target")?; - expect_token!(lexer, Token::Eq, "=")?; - let triple = expect_token!(lexer, Token::String(..), "target triple")?.string(); - - TargetTriple::parse(triple) - .map_err(|e| Error::new(ErrorKind::SemanticError(format!("{}", e)), lexer.line())) - } - - fn parse_declared_func_sig(&self, ctx: &ModuleCtx, lexer: &mut Lexer) -> Result { - let linkage = expect_linkage(lexer)?; - let name = expect_token!(lexer, Token::Ident(..), "func name")?.string(); - - // Parse argument types. - expect_token!(lexer, Token::LParen, "(")?; - let mut args = vec![]; - if eat_token!(lexer, Token::RParen)?.is_none() { - let ty = expect_ty(ctx, lexer)?; - args.push(ty); - while eat_token!(lexer, Token::RParen)?.is_none() { - expect_token!(lexer, Token::Comma, ",")?; - let ty = expect_ty(ctx, lexer)?; - args.push(ty); - } - } - - // Parse return type. - expect_token!(lexer, Token::RArrow, "->")?; - let ret_ty = expect_ty(ctx, lexer)?; - - Ok(Signature::new(name, linkage, &args, ret_ty)) - } -} - -pub struct ParsedModule { - pub module: Module, - pub module_comments: Vec, - pub func_comments: SecondaryMap>, -} - -struct ParsedFunction { - func_ref: FuncRef, - comments: Vec, -} - -struct FuncParser<'a, 'b> { - lexer: &'b mut Lexer<'a>, - module_builder: &'b mut ModuleBuilder, -} - -impl<'a, 'b> FuncParser<'a, 'b> { - fn new(lexer: &'b mut Lexer<'a>, module_builder: &'b mut ModuleBuilder) -> Self { - Self { - lexer, - module_builder, - } - } - - fn parse(&mut self) -> Result> { - if self.lexer.peek_token()?.is_none() { - return Ok(None); - } - - let comments = self.parse_comment()?; - expect_token!(self.lexer, Token::Func, "func")?; - let linkage = expect_linkage(self.lexer)?; - - let fn_name = expect_token!(self.lexer, Token::Ident(..), "func name")?.string(); - - expect_token!(self.lexer, Token::LParen, "(")?; - // Use `Void` for dummy return type. - let sig = Signature::new(fn_name, linkage, &[], Type::Void); - let mut func = Function::new(&self.module_builder.ctx, sig); - let mut inserter = InsnInserter::at_location(CursorLocation::NoWhere); - - if let Some(value) = eat_token!(self.lexer, Token::Value(..))? { - let value = Value(value.id()); - inserter.def_value(&mut func, value, self.lexer.line())?; - expect_token!(self.lexer, Token::Dot, "dot")?; - let ty = expect_ty(&self.module_builder.ctx, self.lexer)?; - inserter.append_arg_value(&mut func, value, ty); - - while eat_token!(self.lexer, Token::Comma)?.is_some() { - let value = Value(expect_token!(self.lexer, Token::Value(..), "value")?.id()); - inserter.def_value(&mut func, value, self.lexer.line())?; - expect_token!(self.lexer, Token::Dot, "dot")?; - let ty = expect_ty(&self.module_builder.ctx, self.lexer)?; - inserter.append_arg_value(&mut func, value, ty); - } - } - expect_token!(self.lexer, Token::RParen, ")")?; - - // Parse return type. - expect_token!(self.lexer, Token::RArrow, "->")?; - let ret_ty = expect_ty(&self.module_builder.ctx, self.lexer)?; - func.sig.set_ret_ty(ret_ty); - expect_token!(self.lexer, Token::Colon, ":")?; - - let signature_line = self.lexer.line(); - self.parse_body(&mut func, &mut inserter)?; - - let func_ref = match self.module_builder.get_func_ref(func.sig.name()) { - Some(declared) if self.module_builder.sig(declared) == &func.sig => declared, - Some(_) => { - return Err(Error::new( - ErrorKind::SemanticError( - "signature mismatch with the corresponding declared function".to_string(), - ), - signature_line, - )) - } - None => self.module_builder.declare_function(func.sig.clone()), - }; - - std::mem::swap(&mut self.module_builder.funcs[func_ref], &mut func); - Ok(Some(ParsedFunction { func_ref, comments })) - } - - fn parse_body(&mut self, func: &mut Function, inserter: &mut InsnInserter) -> Result<()> { - while let Some(id) = eat_token!(self.lexer, Token::Block(..))? { - expect_token!(self.lexer, Token::Colon, ":")?; - self.parse_block_body(func, inserter, Block(id.id()))?; - } - - Ok(()) - } - - fn parse_block_body( - &mut self, - func: &mut Function, - inserter: &mut InsnInserter, - block: Block, - ) -> Result<()> { - inserter.def_block(func, block, self.lexer.line(), BlockData::default())?; - inserter.append_block(func, block); - inserter.set_location(CursorLocation::BlockTop(block)); - - loop { - if let Some(value) = eat_token!(self.lexer, Token::Value(..))? { - expect_token!(self.lexer, Token::Dot, ".")?; - let ty = expect_ty(&self.module_builder.ctx, self.lexer)?; - expect_token!(self.lexer, Token::Eq, "=")?; - let opcode = expect_token!(self.lexer, Token::OpCode(..), "opcode")?.opcode(); - let insn = opcode.make_insn(self, func, inserter, Some(ty))?; - let value = Value(value.id()); - inserter.def_value(func, value, self.lexer.line())?; - let result = func.dfg.make_result(insn).unwrap(); - func.dfg.values[value] = result; - func.dfg.attach_result(insn, value); - } else if let Some(opcode) = eat_token!(self.lexer, Token::OpCode(..))? { - opcode.opcode().make_insn(self, func, inserter, None)?; - } else { - break; - } - } - - Ok(()) - } - - fn expect_insn_arg( - &mut self, - func: &mut Function, - inserter: &mut InsnInserter, - idx: usize, - undefs: &mut Vec, - ) -> Result { - if let Some(value) = eat_token!(self.lexer, Token::Value(..))? { - let value = Value(value.id()); - if !inserter.defined_values.contains(&value) { - undefs.push(idx); - } - Ok(value) - } else if let Some(ident) = eat_token!(self.lexer, Token::Ident(..))? { - let gv = func - .dfg - .ctx - .with_gv_store(|s| s.gv_by_symbol(ident.string())) - .unwrap(); - Ok(func.dfg.make_global_value(gv)) - } else { - let number = - expect_token!(self.lexer, Token::Integer(..), "immediate or value")?.string(); - expect_token!(self.lexer, Token::Dot, "type annotation for immediate")?; - let ty = expect_ty(&self.module_builder.ctx, self.lexer)?; - let imm = build_imm_data(number, &ty, self.lexer.line())?; - Ok(inserter.def_imm(func, imm)) - } - } - - fn expect_block(&mut self) -> Result { - let id = expect_token!(self.lexer, Token::Block(..), "block")?.id(); - Ok(Block(id)) - } - - fn expect_data_loc_kind(&mut self) -> Result { - let token = expect_token!(self.lexer, Token::DataLocationKind(..), "data location")?; - - match token { - Token::DataLocationKind(loc) => Ok(loc), - _ => unreachable!(), - } - } - - fn parse_comment(&mut self) -> Result> { - let mut comments = Vec::new(); - while let Some(line) = eat_token!(self.lexer, Token::FuncComment(..))? { - comments.push(line.string().to_string()); - } - Ok(comments) - } -} - -fn expect_ty(ctx: &ModuleCtx, lexer: &mut Lexer) -> Result { - if let Some(ty) = eat_token!(lexer, Token::BaseTy(..))?.map(|tok| tok.ty()) { - return Ok(ty); - }; - - if eat_token!(lexer, Token::LBracket)?.is_some() { - // Try parse array element type. - let elem_ty = expect_ty(ctx, lexer)?; - expect_token!(lexer, Token::SemiColon, ";")?; - // Try parse array length. - let len = expect_token!(lexer, Token::Integer(..), " or value")? - .string() - .parse() - .map_err(|err| Error::new(ErrorKind::SyntaxError(format!("{}", err)), lexer.line()))?; - expect_token!(lexer, Token::RBracket, "]")?; - Ok(ctx.with_ty_store_mut(|s| s.make_array(elem_ty, len))) - } else if eat_token!(lexer, Token::Star)?.is_some() { - // Try parse ptr base type. - let elem_ty = expect_ty(ctx, lexer)?; - Ok(ctx.with_ty_store_mut(|s| s.make_ptr(elem_ty))) - } else if let Some(tok) = eat_token!(lexer, Token::Ident(..))? { - let name = tok.string(); - ctx.with_ty_store(|s| s.struct_type_by_name(name)) - .ok_or_else(|| { - Error::new( - ErrorKind::SemanticError(format!("type `{name}` is not declared")), - lexer.line(), - ) - }) - } else { - Err(Error::new( - ErrorKind::SyntaxError("invalid type".into()), - lexer.line(), - )) - } -} - -fn expect_linkage(lexer: &mut Lexer) -> Result { - let token = expect_token!(lexer, Token::Linkage { .. }, "linkage")?; - match token { - Token::Linkage(linkage) => Ok(linkage), - _ => unreachable!(), - } -} - -fn expect_constant(ctx: &ModuleCtx, lexer: &mut Lexer, ty: Type) -> Result { - if let Some(number) = eat_token!(lexer, Token::Integer(..))? { - if !ty.is_integral() { - return Err(Error::new( - ErrorKind::SemanticError("expected integral type".to_string()), - lexer.line(), - )); - } - - let data = build_imm_data(number.string(), &ty, lexer.line())?; - Ok(ConstantValue::Immediate(data)) - } else if eat_token!(lexer, Token::LBracket)?.is_some() { - let (elem_ty, mut len) = ctx.with_ty_store(|s| s.array_def(ty)).ok_or_else(|| { - Error::new( - ErrorKind::SemanticError("expcted array type".into()), - lexer.line(), - ) - })?; - - let mut data = Vec::with_capacity(len); - while len > 0 { - let elem = expect_constant(ctx, lexer, elem_ty)?; - data.push(elem); - if len > 1 { - expect_token!(lexer, Token::Comma, ",")?; - } - len -= 1; - } - - expect_token!(lexer, Token::RBracket, "]")?; - Ok(ConstantValue::Array(data)) - } else if eat_token!(lexer, Token::LBrace)?.is_some() { - let fields = ctx - .with_ty_store(|s| s.struct_def(ty).map(|def| def.fields.clone())) - .ok_or_else(|| { - Error::new( - ErrorKind::SemanticError("expected struct type".into()), - lexer.line(), - ) - })?; - - let mut data = Vec::with_capacity(fields.len()); - let field_len = fields.len(); - for (i, field_ty) in fields.into_iter().enumerate() { - let field = expect_constant(ctx, lexer, field_ty)?; - data.push(field); - if i < field_len - 1 { - expect_token!(lexer, Token::Comma, ",")?; - } - } - expect_token!(lexer, Token::RBrace, "}")?; - Ok(ConstantValue::Struct(data)) - } else { - Err(Error::new( - ErrorKind::SyntaxError("invalid constant".into()), - lexer.line(), - )) - } -} - -#[derive(Default)] -pub struct InsnInserter { - loc: CursorLocation, - defined_values: HashSet, - defined_blocks: HashSet, - defined_imms: HashSet, - undefs: HashSet<(Insn, usize)>, -} - -impl InsnInserter { - pub fn def_value(&mut self, func: &mut Function, value: Value, line: u32) -> Result<()> { - if self.defined_values.contains(&value) { - return Err(Error::new( - ErrorKind::SemanticError(format!("v{} is already defined", value.0)), - line, - )); - } - self.defined_values.insert(value); - - let value_len = func.dfg.values.len(); - let value_id = value.0 as usize; - - if value_len <= value_id { - func.dfg.values.reserve(value_id); - for _ in 0..(value_id - value_len + 1) { - // Make dummy value. - func.dfg.values.push(ValueData::Arg { - ty: Type::I8, - idx: usize::MAX, - }); - } - } - - if self.defined_imms.contains(&value) { - let imm_data = func.dfg.value_data(value).clone(); - let new_imm_value = func.dfg.make_value(imm_data); - let mut must_replace = vec![]; - for &user in func.dfg.users(value) { - for (idx, &arg) in func.dfg.insn_args(user).iter().enumerate() { - if arg == value && !self.undefs.contains(&(user, idx)) { - must_replace.push((user, idx)); - } - } - } - - for (insn, idx) in must_replace { - func.dfg.replace_insn_arg(insn, new_imm_value, idx); - } - - let imm = func.dfg.value_imm(new_imm_value).unwrap(); - func.dfg.immediates.insert(imm, new_imm_value); - self.defined_imms.remove(&value); - self.defined_imms.insert(new_imm_value); - } - - Ok(()) - } - - fn def_imm(&mut self, func: &mut Function, imm: Immediate) -> Value { - let value = func.dfg.make_imm_value(imm); - self.defined_imms.insert(value); - value - } - - pub fn def_block( - &mut self, - func: &mut Function, - block: Block, - line: u32, - block_data: BlockData, - ) -> Result<()> { - if self.defined_blocks.contains(&block) { - return Err(Error::new( - ErrorKind::SemanticError(format!("block{} is already defined", block.0)), - line, - )); - } - self.defined_blocks.insert(block); - - let block_id = block.0 as usize; - let block_len = func.dfg.blocks.len(); - - if block_len <= block_id { - func.dfg.blocks.reserve(block_id); - for _ in 0..(block_id - block_len + 1) { - // Make dummy block. - func.dfg.blocks.push(BlockData::default()); - } - } - - func.dfg.blocks[block] = block_data; - Ok(()) - } - - fn insert_insn_data(&mut self, func: &mut Function, insn_data: InsnData) -> Insn { - let insn = func.dfg.make_insn(insn_data); - self.insert_insn(func, insn); - self.set_location(CursorLocation::At(insn)); - insn - } - - fn append_arg_value(&mut self, func: &mut Function, value: Value, ty: Type) { - let idx = func.arg_values.len(); - - let value_data = func.dfg.make_arg_value(ty, idx); - func.sig.append_arg(ty); - func.dfg.values[value] = value_data; - func.arg_values.push(value); - } -} - -impl FuncCursor for InsnInserter { - fn at_location(loc: CursorLocation) -> Self { - Self { - loc, - ..Default::default() - } - } - - fn set_location(&mut self, loc: CursorLocation) { - self.loc = loc; - } - - fn loc(&self) -> CursorLocation { - self.loc - } -} - -macro_rules! make_unary { - ($parser:ident, $func:ident, $inserter:ident, $code:path, $undefs:expr) => {{ - let lhs = $parser.expect_insn_arg($func, $inserter, 0, $undefs)?; - expect_token!($parser.lexer, Token::SemiColon, ";")?; - InsnData::Unary { - code: $code, - args: [lhs], - } - }}; -} - -macro_rules! make_binary { - ($parser:ident, $func:ident, $inserter:ident, $code:path, $undefs:expr) => {{ - let lhs = $parser.expect_insn_arg($func, $inserter, 0, $undefs)?; - let rhs = $parser.expect_insn_arg($func, $inserter, 1, $undefs)?; - expect_token!($parser.lexer, Token::SemiColon, ";")?; - InsnData::Binary { - code: $code, - args: [lhs, rhs], - } - }}; -} - -macro_rules! make_cast { - ($parser:ident, $func:ident, $inserter:ident, $cast_to:expr, $code:path, $undefs:expr) => {{ - let arg = $parser.expect_insn_arg($func, $inserter, 0, $undefs)?; - expect_token!($parser.lexer, Token::SemiColon, ";")?; - InsnData::Cast { - code: $code, - args: [arg], - ty: $cast_to, - } - }}; -} - -macro_rules! make_jump { - ($parser:ident) => {{ - let dest = $parser.expect_block()?; - expect_token!($parser.lexer, Token::SemiColon, ";")?; - InsnData::Jump { dests: [dest] } - }}; -} - -impl Code { - /// Read args and create insn data. - fn make_insn( - self, - parser: &mut FuncParser, - func: &mut Function, - inserter: &mut InsnInserter, - ret_ty: Option, - ) -> Result { - let mut undefs = vec![]; - let insn_data = match self { - Self::Not => make_unary!(parser, func, inserter, UnaryOp::Not, &mut undefs), - Self::Neg => make_unary!(parser, func, inserter, UnaryOp::Neg, &mut undefs), - Self::Add => make_binary!(parser, func, inserter, BinaryOp::Add, &mut undefs), - Self::Sub => make_binary!(parser, func, inserter, BinaryOp::Sub, &mut undefs), - Self::Mul => make_binary!(parser, func, inserter, BinaryOp::Mul, &mut undefs), - Self::Udiv => make_binary!(parser, func, inserter, BinaryOp::Udiv, &mut undefs), - Self::Sdiv => make_binary!(parser, func, inserter, BinaryOp::Sdiv, &mut undefs), - Self::Lt => make_binary!(parser, func, inserter, BinaryOp::Lt, &mut undefs), - Self::Gt => make_binary!(parser, func, inserter, BinaryOp::Gt, &mut undefs), - Self::Slt => make_binary!(parser, func, inserter, BinaryOp::Slt, &mut undefs), - Self::Sgt => make_binary!(parser, func, inserter, BinaryOp::Sgt, &mut undefs), - Self::Le => make_binary!(parser, func, inserter, BinaryOp::Le, &mut undefs), - Self::Ge => make_binary!(parser, func, inserter, BinaryOp::Ge, &mut undefs), - Self::Sle => make_binary!(parser, func, inserter, BinaryOp::Sle, &mut undefs), - Self::Sge => make_binary!(parser, func, inserter, BinaryOp::Sge, &mut undefs), - Self::Eq => make_binary!(parser, func, inserter, BinaryOp::Eq, &mut undefs), - Self::Ne => make_binary!(parser, func, inserter, BinaryOp::Ne, &mut undefs), - Self::And => make_binary!(parser, func, inserter, BinaryOp::And, &mut undefs), - Self::Or => make_binary!(parser, func, inserter, BinaryOp::Or, &mut undefs), - Self::Xor => make_binary!(parser, func, inserter, BinaryOp::Xor, &mut undefs), - Self::Sext => make_cast!( - parser, - func, - inserter, - ret_ty.unwrap(), - CastOp::Sext, - &mut undefs - ), - Self::Zext => make_cast!( - parser, - func, - inserter, - ret_ty.unwrap(), - CastOp::Zext, - &mut undefs - ), - Self::BitCast => make_cast!( - parser, - func, - inserter, - ret_ty.unwrap(), - CastOp::BitCast, - &mut undefs - ), - Self::Trunc => make_cast!( - parser, - func, - inserter, - ret_ty.unwrap(), - CastOp::Trunc, - &mut undefs - ), - - Self::Load => { - let loc = parser.expect_data_loc_kind()?; - let arg = parser.expect_insn_arg(func, inserter, 0, &mut undefs)?; - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::Load { args: [arg], loc } - } - Self::Store => { - let loc = parser.expect_data_loc_kind()?; - let lhs = parser.expect_insn_arg(func, inserter, 0, &mut undefs)?; - let rhs = parser.expect_insn_arg(func, inserter, 1, &mut undefs)?; - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::Store { - args: [lhs, rhs], - loc, - } - } - - Self::Call => { - let func_name = - expect_token!(parser.lexer, Token::Ident(..), "func name")?.string(); - let mut args = smallvec![]; - let mut idx = 0; - while eat_token!(parser.lexer, Token::SemiColon)?.is_none() { - let arg = parser.expect_insn_arg(func, inserter, idx, &mut undefs)?; - args.push(arg); - idx += 1; - } - - let callee = parser - .module_builder - .get_func_ref(func_name) - .ok_or_else(|| { - Error::new( - ErrorKind::SemanticError(format!("%{} is not declared", func_name)), - parser.lexer.line(), - ) - })?; - let sig = parser.module_builder.get_sig(callee).clone(); - let ret_ty = sig.ret_ty(); - func.callees.insert(callee, sig); - InsnData::Call { - func: callee, - args, - ret_ty, - } - } - - Self::Jump => make_jump!(parser), - - Self::Br => { - let cond = parser.expect_insn_arg(func, inserter, 0, &mut undefs)?; - let then = parser.expect_block()?; - let else_ = parser.expect_block()?; - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::Branch { - args: [cond], - dests: [then, else_], - } - } - Self::BrTable => { - let mut arg_idx = 0; - let mut args = smallvec![]; - let cond = parser.expect_insn_arg(func, inserter, arg_idx, &mut undefs)?; - args.push(cond); - arg_idx += 1; - - let default = if eat_token!(parser.lexer, Token::Undef)?.is_some() { - None - } else { - Some(parser.expect_block()?) - }; - - let mut table = smallvec![]; - while eat_token!(parser.lexer, Token::LParen)?.is_some() { - let value = parser.expect_insn_arg(func, inserter, arg_idx, &mut undefs)?; - args.push(value); - let block = parser.expect_block()?; - table.push(block); - expect_token!(parser.lexer, Token::RParen, ")")?; - arg_idx += 1; - } - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::BrTable { - args, - default, - table, - } - } - - Self::Gep => { - let mut args = smallvec![]; - let mut idx = 0; - while eat_token!(parser.lexer, Token::SemiColon)?.is_none() { - let arg = parser.expect_insn_arg(func, inserter, idx, &mut undefs)?; - args.push(arg); - idx += 1; - } - - InsnData::Gep { args } - } - - Self::Alloca => { - let ty = expect_ty(&parser.module_builder.ctx, parser.lexer)?; - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::Alloca { ty } - } - - Self::Return => { - if eat_token!(parser.lexer, Token::SemiColon)?.is_some() { - InsnData::Return { args: None } - } else { - let value = parser.expect_insn_arg(func, inserter, 0, &mut undefs)?; - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::Return { args: Some(value) } - } - } - - Self::Phi => { - let mut values = smallvec![]; - let mut blocks = smallvec![]; - let mut idx = 0; - while eat_token!(parser.lexer, Token::LParen)?.is_some() { - let value = parser.expect_insn_arg(func, inserter, idx, &mut undefs)?; - values.push(value); - let block = parser.expect_block()?; - blocks.push(block); - expect_token!(parser.lexer, Token::RParen, ")")?; - idx += 1; - } - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::Phi { - values, - blocks, - ty: ret_ty.unwrap(), - } - } - }; - - let insn = inserter.insert_insn_data(func, insn_data); - for undef in undefs { - inserter.undefs.insert((insn, undef)); - } - - Ok(insn) - } -} - -fn build_imm_data(number: &str, ty: &Type, line: u32) -> Result { - match ty { - Type::I1 => number - .parse::() - .map(|val| Immediate::I1(val != 0)) - .map_err(|err| parse_imm_error(err, line)), - - Type::I8 => number - .parse::() - .or_else(|_| number.parse::().map(|v| v as i8)) - .map(Into::into) - .map_err(|err| parse_imm_error(err, line)), - - Type::I16 => number - .parse::() - .or_else(|_| number.parse::().map(|v| v as i16)) - .map(Into::into) - .map_err(|err| parse_imm_error(err, line)), - - Type::I32 => number - .parse::() - .or_else(|_| number.parse::().map(|v| v as i32)) - .map(Into::into) - .map_err(|err| parse_imm_error(err, line)), - - Type::I64 => number - .parse::() - .or_else(|_| number.parse::().map(|v| v as i64)) - .map(Into::into) - .map_err(|err| parse_imm_error(err, line)), - - Type::I128 => number - .parse::() - .or_else(|_| number.parse::().map(|v| v as i128)) - .map(Into::into) - .map_err(|err| parse_imm_error(err, line)), - - Type::I256 => { - let number = number.to_string(); - let is_negative = number.as_bytes()[0] as char == '-'; - let number = if is_negative { &number[1..] } else { &number }; - let mut i256: I256 = U256::from_str_radix(number, 10) - .map(Into::into) - .map_err(|err| parse_imm_error(err, line))?; - - if is_negative { - i256 = I256::zero().overflowing_sub(i256).0; - } - - Ok(Immediate::I256(i256)) - } - - _ => Err(Error::new( - ErrorKind::SemanticError("can't use non integral types for immediates".into()), - line, - )), - } -} - -fn parse_imm_error(err: impl std::fmt::Display, line: u32) -> Error { - Error::new( - ErrorKind::SemanticError(format!("failed to parse immediate: {}", err)), - line, - ) -} - -#[cfg(test)] -mod tests { - use super::*; - - use sonatina_ir::ir_writer::FuncWriter; - - fn test_func_parser(input: &str) -> bool { - let mut lexer = Lexer::new(input); - let triple = TargetTriple::parse("evm-ethereum-london").unwrap(); - let isa = IsaBuilder::new(triple).build(); - let mut module_builder = ModuleBuilder::new(ModuleCtx::new(isa)); - let parsed_func = FuncParser::new(&mut lexer, &mut module_builder) - .parse() - .unwrap() - .unwrap(); - let module = module_builder.build(); - let mut writer = FuncWriter::new(&module.funcs[parsed_func.func_ref]); - - input.trim() == writer.dump_string().unwrap().trim() - } - - #[test] - fn parser_with_return() { - assert!(test_func_parser( - "func private %test_func() -> i32: - block0: - return 311.i32;" - )); - } - - #[test] - fn test_with_arg() { - assert!(test_func_parser( - "func public %test_func(v0.i32, v1.i64) -> void: - block0: - v2.i64 = sext v0; - v3.i64 = mul v2 v1; - return; -" - )); - } - - #[test] - fn parser_with_non_continuous_value() { - assert!(test_func_parser( - "func private %test_func() -> i32: - block64: - jump block1; - - block1: - return 311.i32;" - )); - } - - #[test] - fn test_gep() { - assert!(test_func_parser( - "func public %test(v0.*i32, v1.*[*i64; 10]) -> *i32: - block0: - v2.*i32 = gep v0 10.i32; - v3.**i64 = gep v1 10.i32; - return v1;" - )); - } - - #[test] - fn parser_with_phi() { - assert!(test_func_parser( - "func private %test_func() -> void: - block0: - jump block1; - - block1: - v4.i32 = phi (1.i32 block0) (v5 block5); - br 1.i32 block6 block2; - - block2: - br 1.i32 block4 block3; - - block3: - jump block5; - - block4: - jump block5; - - block5: - v5.i32 = phi (2.i32 block3) (v4 block4); - jump block1; - - block6: - v3.i32 = add v4 v4; - return; - " - )); - } - - #[test] - fn parser_with_immediate() { - assert!(test_func_parser( - "func private %test_func() -> i8: - block64: - v0.i8 = add -1.i8 127.i8; - v1.i8 = add v0 3.i8; - jump block1; - - block1: - v2.i16 = zext -128.i8; - return v1;" - )); - } - - #[test] - fn test_with_module_comment() { - let input = " - #! Module comment 1 - #! Module comment 2 - - target = \"evm-ethereum-london\" - - # f1 start 1 - # f1 start 2 - func private %f1() -> i32: - block0: - return 311.i32; - - # f2 start 1 - # f2 start 2 - func public %f2() -> i32: - block0: - return 311.i32; - "; - - let parser = Parser::default(); - let parsed_module = parser.parse(input).unwrap(); - let module_comments = parsed_module.module_comments; - assert_eq!(module_comments[0], " Module comment 1"); - assert_eq!(module_comments[1], " Module comment 2"); - - let module = parsed_module.module; - let mut funcs = module.iter_functions(); - let func1 = funcs.next().unwrap(); - let func1_comment = &parsed_module.func_comments[func1]; - assert_eq!(func1_comment[0], " f1 start 1"); - assert_eq!(func1_comment[1], " f1 start 2"); - - let func2 = funcs.next().unwrap(); - let func2_comment = &parsed_module.func_comments[func2]; - assert_eq!(func2_comment[0], " f2 start 1"); - assert_eq!(func2_comment[1], " f2 start 2"); - } - - #[test] - fn test_with_struct_type() { - let input = " - target = \"evm-ethereum-london\" - - type %s1 = {i32, i64}; - type %s2_packed = <{i32, i64, *%s1}>; - - func public %test(v0.*%s1, v1.*%s2_packed) -> i32: - block0: - return 311.i32; - "; - - let parser = Parser::default(); - let module = parser.parse(input).unwrap().module; - - module.ctx.with_ty_store(|s| { - let ty = s.struct_type_by_name("s1").unwrap(); - let def = s.struct_def(ty).unwrap(); - assert_eq!(def.fields.len(), 2); - assert_eq!(def.fields[0], Type::I32); - assert_eq!(def.fields[1], Type::I64); - assert!(!def.packed); - }); - - let s1_ptr_ty = module.ctx.with_ty_store_mut(|s| { - let ty = s.struct_type_by_name("s1").unwrap(); - s.make_ptr(ty) - }); - module.ctx.with_ty_store(|s| { - let ty = s.struct_type_by_name("s2_packed").unwrap(); - let def = s.struct_def(ty).unwrap(); - assert_eq!(def.fields.len(), 3); - assert_eq!(def.fields[0], Type::I32); - assert_eq!(def.fields[1], Type::I64); - assert_eq!(def.fields[2], s1_ptr_ty); - assert!(def.packed); - }); - } - - #[test] - fn test_with_gv() { - let input = " - target = \"evm-ethereum-london\" - - gv public const %CONST_PUBLIC: i32 = 1; - gv external %GLOBAL_EXTERNAL: i32; - - func public %test() -> i32: - block0: - v2.i32 = add %CONST_PUBLIC %GLOBAL_EXTERNAL; - return v2; - "; - - let parser = Parser::default(); - let module = parser.parse(input).unwrap().module; - - module.ctx.with_gv_store(|s| { - let symbol = "CONST_PUBLIC"; - let gv = s.gv_by_symbol(symbol).unwrap(); - let data = s.gv_data(gv); - assert_eq!(data.symbol, symbol); - assert_eq!(data.ty, Type::I32); - assert_eq!(data.linkage, Linkage::Public); - assert!(data.is_const); - assert_eq!(data.data, Some(ConstantValue::make_imm(1i32))); - }); - - module.ctx.with_gv_store(|s| { - let symbol = "GLOBAL_EXTERNAL"; - let gv = s.gv_by_symbol(symbol).unwrap(); - let data = s.gv_data(gv); - assert_eq!(data.symbol, symbol); - assert_eq!(data.ty, Type::I32); - assert_eq!(data.linkage, Linkage::External); - assert!(!data.is_const); - assert_eq!(data.data, None) - }); - } -} diff --git a/crates/parser2/src/sonatina.pest b/crates/parser/src/sonatina.pest similarity index 100% rename from crates/parser2/src/sonatina.pest rename to crates/parser/src/sonatina.pest diff --git a/crates/parser2/src/syntax.rs b/crates/parser/src/syntax.rs similarity index 100% rename from crates/parser2/src/syntax.rs rename to crates/parser/src/syntax.rs diff --git a/crates/parser2/test_files/syntax/func/empty.snap b/crates/parser/test_files/syntax/func/empty.snap similarity index 100% rename from crates/parser2/test_files/syntax/func/empty.snap rename to crates/parser/test_files/syntax/func/empty.snap diff --git a/crates/parser2/test_files/syntax/func/empty.sntn b/crates/parser/test_files/syntax/func/empty.sntn similarity index 100% rename from crates/parser2/test_files/syntax/func/empty.sntn rename to crates/parser/test_files/syntax/func/empty.sntn diff --git a/crates/parser2/test_files/syntax/func/simple.snap b/crates/parser/test_files/syntax/func/simple.snap similarity index 100% rename from crates/parser2/test_files/syntax/func/simple.snap rename to crates/parser/test_files/syntax/func/simple.snap diff --git a/crates/parser2/test_files/syntax/func/simple.sntn b/crates/parser/test_files/syntax/func/simple.sntn similarity index 100% rename from crates/parser2/test_files/syntax/func/simple.sntn rename to crates/parser/test_files/syntax/func/simple.sntn diff --git a/crates/parser2/test_files/syntax/module/simple.ast.snap b/crates/parser/test_files/syntax/module/simple.ast.snap similarity index 100% rename from crates/parser2/test_files/syntax/module/simple.ast.snap rename to crates/parser/test_files/syntax/module/simple.ast.snap diff --git a/crates/parser2/test_files/syntax/module/simple.ir.snap b/crates/parser/test_files/syntax/module/simple.ir.snap similarity index 100% rename from crates/parser2/test_files/syntax/module/simple.ir.snap rename to crates/parser/test_files/syntax/module/simple.ir.snap diff --git a/crates/parser2/test_files/syntax/module/simple.snap b/crates/parser/test_files/syntax/module/simple.snap similarity index 100% rename from crates/parser2/test_files/syntax/module/simple.snap rename to crates/parser/test_files/syntax/module/simple.snap diff --git a/crates/parser2/test_files/syntax/module/simple.sntn b/crates/parser/test_files/syntax/module/simple.sntn similarity index 100% rename from crates/parser2/test_files/syntax/module/simple.sntn rename to crates/parser/test_files/syntax/module/simple.sntn diff --git a/crates/parser2/test_files/syntax/stmts/bin_op.snap b/crates/parser/test_files/syntax/stmts/bin_op.snap similarity index 100% rename from crates/parser2/test_files/syntax/stmts/bin_op.snap rename to crates/parser/test_files/syntax/stmts/bin_op.snap diff --git a/crates/parser2/test_files/syntax/stmts/bin_op.sntn b/crates/parser/test_files/syntax/stmts/bin_op.sntn similarity index 100% rename from crates/parser2/test_files/syntax/stmts/bin_op.sntn rename to crates/parser/test_files/syntax/stmts/bin_op.sntn diff --git a/crates/parser2/test_files/syntax/stmts/cast.snap b/crates/parser/test_files/syntax/stmts/cast.snap similarity index 100% rename from crates/parser2/test_files/syntax/stmts/cast.snap rename to crates/parser/test_files/syntax/stmts/cast.snap diff --git a/crates/parser2/test_files/syntax/stmts/cast.sntn b/crates/parser/test_files/syntax/stmts/cast.sntn similarity index 100% rename from crates/parser2/test_files/syntax/stmts/cast.sntn rename to crates/parser/test_files/syntax/stmts/cast.sntn diff --git a/crates/parser2/test_files/syntax/stmts/control_flow.snap b/crates/parser/test_files/syntax/stmts/control_flow.snap similarity index 100% rename from crates/parser2/test_files/syntax/stmts/control_flow.snap rename to crates/parser/test_files/syntax/stmts/control_flow.snap diff --git a/crates/parser2/test_files/syntax/stmts/control_flow.sntn b/crates/parser/test_files/syntax/stmts/control_flow.sntn similarity index 100% rename from crates/parser2/test_files/syntax/stmts/control_flow.sntn rename to crates/parser/test_files/syntax/stmts/control_flow.sntn diff --git a/crates/parser2/test_files/syntax/stmts/stmts.snap b/crates/parser/test_files/syntax/stmts/stmts.snap similarity index 100% rename from crates/parser2/test_files/syntax/stmts/stmts.snap rename to crates/parser/test_files/syntax/stmts/stmts.snap diff --git a/crates/parser2/test_files/syntax/stmts/stmts.sntn b/crates/parser/test_files/syntax/stmts/stmts.sntn similarity index 100% rename from crates/parser2/test_files/syntax/stmts/stmts.sntn rename to crates/parser/test_files/syntax/stmts/stmts.sntn diff --git a/crates/parser2/test_files/syntax/stmts/unary_op.snap b/crates/parser/test_files/syntax/stmts/unary_op.snap similarity index 100% rename from crates/parser2/test_files/syntax/stmts/unary_op.snap rename to crates/parser/test_files/syntax/stmts/unary_op.snap diff --git a/crates/parser2/test_files/syntax/stmts/unary_op.sntn b/crates/parser/test_files/syntax/stmts/unary_op.sntn similarity index 100% rename from crates/parser2/test_files/syntax/stmts/unary_op.sntn rename to crates/parser/test_files/syntax/stmts/unary_op.sntn diff --git a/crates/parser2/tests/syntax.rs b/crates/parser/tests/syntax.rs similarity index 80% rename from crates/parser2/tests/syntax.rs rename to crates/parser/tests/syntax.rs index 5af88b2d..d1d6f3b9 100644 --- a/crates/parser2/tests/syntax.rs +++ b/crates/parser/tests/syntax.rs @@ -1,17 +1,13 @@ -use ariadne::{Label, Report, ReportKind, Source}; use dir_test::{dir_test, Fixture}; use indenter::indented; use ir::ir_writer::ModuleWriter; -use pest::{error::InputLocation, iterators::Pairs, Parser as _}; -use sonatina_parser2::{ +use pest::{iterators::Pairs, Parser as _}; +use sonatina_parser::{ ast, parse_module, syntax::{Parser, Rule}, + Error, }; - -use std::{ - fmt::{self, Write}, - ops::Range, -}; +use std::fmt::{self, Write}; #[dir_test( dir: "$CARGO_MANIFEST_DIR/test_files/syntax/stmts", @@ -69,28 +65,9 @@ fn test_rule(rule: Rule, fixture: Fixture<&str>) { } } -fn location_range(loc: InputLocation) -> Range { - match loc { - InputLocation::Pos(pos) => pos..pos, - InputLocation::Span((s, e)) => s..e, - } -} - fn report_error(err: pest::error::Error, fixture: &Fixture<&str>) { - let mut s = Vec::new(); - - Report::build(ReportKind::Error, fixture.path(), 12) - .with_code(3) - .with_message("parse error".to_string()) - .with_label( - Label::new((fixture.path(), location_range(err.location))) - .with_message(format!("{}", err.variant.message())), - ) - .finish() - .write_for_stdout((fixture.path(), Source::from(fixture.content())), &mut s) - .unwrap(); - - eprintln!("{}", std::str::from_utf8(&s).unwrap()); + let s = Error::SyntaxError(err).print_to_string(fixture.path(), fixture.content()); + eprintln!("{s}"); } struct PairsWrapper<'i>(Pairs<'i, Rule>); diff --git a/crates/parser2/Cargo.toml b/crates/parser2/Cargo.toml deleted file mode 100644 index ef642cc9..00000000 --- a/crates/parser2/Cargo.toml +++ /dev/null @@ -1,33 +0,0 @@ -[package] -name = "sonatina-parser2" -version = "0.0.3-alpha" -edition = "2021" -authors = ["Sonatina Developers"] -license = "Apache-2.0" -readme = "../../README.md" -homepage = "https://github.com/fe-lang/sonatina/tree/main/crates/parser" -repository = "https://github.com/fe-lang/sonatina" -description = "Parser for sonatina-ir text format" -categories = ["compilers", "parser", "wasm"] -keywords = ["compiler", "evm", "wasm", "smart-contract"] - -[dependencies] -ir = { package = "sonatina-ir", path = "../ir", version = "0.0.3-alpha" } -sonatina-triple = { path = "../triple", version = "0.0.3-alpha" } -smallvec = "1.7.0" -cranelift-entity = "0.104" -pest = "2.7.10" -pest_derive = "2.7.10" -pest-ast = "0.3.4" -from-pest = "0.3.2" -smol_str = "0.2.2" -hex = "0.4.3" -num-traits = { version = "0.2.19", default-features = false } -either = { version = "1.12.0", default-features = false } -annotate-snippets = "0.11.4" - -[dev-dependencies] -dir-test = { git = "https://github.com/sbillig/dir-test", rev = "c4115dd" } -insta = { version = "1.38.0" } -indenter = "0.3.3" -ariadne = { version = "0.4.1", features = ["auto-color"] } diff --git a/crates/parser2/src/lib.rs b/crates/parser2/src/lib.rs deleted file mode 100644 index 2246f8f6..00000000 --- a/crates/parser2/src/lib.rs +++ /dev/null @@ -1,244 +0,0 @@ -use ast::{Error, ValueDeclaration}; -use cranelift_entity::SecondaryMap; -use ir::{ - self, - builder::{FunctionBuilder, ModuleBuilder}, - func_cursor::{CursorLocation, FuncCursor, InsnInserter}, - isa::IsaBuilder, - module::{FuncRef, ModuleCtx}, - Module, Signature, -}; - -pub mod ast; -pub mod syntax; - -pub fn parse_module(input: &str) -> Result> { - let ast = ast::parse(input)?; - - let isa = IsaBuilder::new(ast.target.unwrap()).build(); // xxx - let ctx = ModuleCtx::new(isa); - let mut builder = ModuleBuilder::new(ctx); - - for st in ast.struct_types { - let fields = st - .fields - .iter() - .map(|t| build_type(&mut builder, t)) - .collect::>(); - builder.declare_struct_type(&st.name.0, &fields, false); - } - - for func in ast.declared_functions { - let params = func - .params - .iter() - .map(|t| build_type(&mut builder, t)) - .collect::>(); - let ret_ty = func - .ret_type - .as_ref() - .map(|t| build_type(&mut builder, t)) - .unwrap_or(ir::Type::Void); - - let sig = Signature::new(&func.name.0, func.linkage, ¶ms, ret_ty); - builder.declare_function(sig); - } - - for func in ast.functions.iter() { - let sig = &func.signature; - let args = sig - .params - .iter() - .map(|decl| build_type(&mut builder, &decl.1)) - .collect::>(); - - let ret_ty = sig - .ret_type - .as_ref() - .map(|t| build_type(&mut builder, t)) - .unwrap_or(ir::Type::Void); - let sig = Signature::new(&sig.name.0, sig.linkage, &args, ret_ty); - - builder.declare_function(sig); - } - - let mut func_comments = SecondaryMap::default(); - - for func in ast.functions { - let id = builder.get_func_ref(&func.signature.name.0).unwrap(); - let mut fb = builder.build_function(id); - build_func(&mut fb, &func); - fb.seal_all(); - builder = fb.finish(); - - func_comments[id] = func.comments; - } - - let module = builder.build(); - Ok(ParsedModule { - module, - module_comments: ast.comments, - func_comments, - }) -} - -pub struct ParsedModule { - pub module: Module, - pub module_comments: Vec, - pub func_comments: SecondaryMap>, -} - -fn build_func(builder: &mut FunctionBuilder, func: &ast::Func) { - for (i, ValueDeclaration(name, _ty)) in func.signature.params.iter().enumerate() { - builder.name_value(builder.func.arg_values[i], &name.0); - } - - // "forward declare" all block ids - if let Some(max_block_id) = func.blocks.iter().map(|b| b.id.0.unwrap()).max() { - while builder.func.dfg.blocks.len() <= max_block_id as usize { - builder.cursor.make_block(&mut builder.func); - } - } - - for block in &func.blocks { - let block_id = ir::Block(block.id.0.unwrap()); - builder.cursor.append_block(&mut builder.func, block_id); - builder - .cursor - .set_location(CursorLocation::BlockTop(block_id)); - - for stmt in &block.stmts { - match &stmt.kind { - ast::StmtKind::Define(ValueDeclaration(val, ty), expr) => { - let ty = build_type(&mut builder.module_builder, ty); - - let result_val = match expr { - ast::Expr::Binary(op, lhs, rhs) => { - let lhs = build_value(builder, lhs); - let rhs = build_value(builder, rhs); - builder.binary_op(*op, lhs, rhs) - } - ast::Expr::Unary(op, val) => { - let val = build_value(builder, val); - builder.unary_op(*op, val) - } - ast::Expr::Cast(op, val) => { - let val = build_value(builder, val); - builder.cast_op(*op, val, ty) - } - ast::Expr::Load(location, addr) => { - let addr = build_value(builder, addr); - match location { - ir::DataLocationKind::Memory => builder.memory_load(addr), - ir::DataLocationKind::Storage => builder.storage_load(addr), - } - } - ast::Expr::Alloca(ty) => { - let ty = build_type(&mut builder.module_builder, ty); - builder.alloca(ty) - } - ast::Expr::Call(ast::Call(name, args)) => { - let func_ref = builder.module_builder.get_func_ref(&name.0).unwrap(); - let args = args - .iter() - .map(|val| build_value(builder, val)) - .collect::>(); - builder.call(func_ref, &args).unwrap() - } - ast::Expr::Gep(vals) => { - let vals = vals - .iter() - .map(|val| build_value(builder, val)) - .collect::>(); - builder.gep(&vals).unwrap() - } - ast::Expr::Phi(vals) => { - let args = vals - .iter() - .map(|(val, block)| { - // xxx declare block - let b = ir::Block(block.0.unwrap()); - let v = build_value(builder, val); - (v, b) - }) - .collect::>(); - builder.phi(ty, &args) - } - }; - builder.name_value(result_val, &val.0) - } - ast::StmtKind::Store(loc, addr, val) => { - let addr = build_value(builder, addr); - let val = build_value(builder, val); - - match loc { - ir::DataLocationKind::Memory => builder.memory_store(addr, val), - ir::DataLocationKind::Storage => builder.storage_store(addr, val), - } - } - ast::StmtKind::Return(val) => { - let val = val.as_ref().map(|v| build_value(builder, v)); - builder.ret(val); - } - ast::StmtKind::Jump(block_id) => { - let block_id = ir::Block(block_id.0.unwrap()); - builder.jump(block_id); - } - ast::StmtKind::Branch(cond, true_block, false_block) => { - let cond = build_value(builder, cond); - let true_block = ir::Block(true_block.0.unwrap()); - let false_block = ir::Block(false_block.0.unwrap()); - builder.br(cond, true_block, false_block); - } - ast::StmtKind::BranchTable(index, default_block, table) => { - let index = build_value(builder, index); - let default_block = default_block.as_ref().map(|b| ir::Block(b.0.unwrap())); - let table = table - .iter() - .map(|(val, block)| { - (build_value(builder, val), ir::Block(block.0.unwrap())) - }) - .collect::>(); - builder.br_table(index, default_block, &table); - } - ast::StmtKind::Call(ast::Call(name, args)) => { - let func_ref = builder.module_builder.get_func_ref(&name.0).unwrap(); - let args = args - .iter() - .map(|val| build_value(builder, val)) - .collect::>(); - builder.call(func_ref, &args).unwrap(); - } - } - } - } -} - -fn build_value(builder: &mut FunctionBuilder, val: &ast::Value) -> ir::Value { - match val { - ast::Value::Immediate(imm) => builder.make_imm_value(*imm), - ast::Value::Named(v) => builder.get_named_value(&v.0), - ast::Value::Error => unreachable!(), - } -} - -fn build_type(builder: &mut ModuleBuilder, t: &ast::Type) -> ir::Type { - match t { - ast::Type::Int(i) => (*i).into(), - ast::Type::Ptr(t) => { - let t = build_type(builder, t); - builder.ptr_type(t) - } - ast::Type::Array(t, n) => { - let elem = build_type(builder, t); - builder.declare_array_type(elem, *n) - } - ast::Type::Void => ir::Type::Void, - ast::Type::Struct(name) => builder.get_struct_type(name).unwrap_or_else(|| { - // xxx error on undeclared struct - eprintln!("struct type not found: {name}"); - ir::Type::Void - }), - ast::Type::Error => todo!(), - } -} From 13b1a3c7423e5bf6cc5e7f9dc8c9942e4bf9e10b Mon Sep 17 00:00:00 2001 From: Sean Billig Date: Sat, 22 Jun 2024 22:23:29 -0700 Subject: [PATCH 5/7] Basic semantic error handling; cleanup --- crates/codegen/Cargo.toml | 2 +- crates/codegen/src/critical_edge.rs | 12 +- crates/filecheck/Cargo.toml | 2 +- crates/filecheck/src/lib.rs | 8 +- crates/interpreter/src/state.rs | 2 +- crates/ir/Cargo.toml | 2 +- crates/ir/src/builder/func_builder.rs | 43 +- crates/ir/src/builder/mod.rs | 9 +- crates/ir/src/builder/ssa.rs | 21 +- crates/ir/src/function.rs | 19 +- crates/ir/src/ir_writer.rs | 47 +- crates/parser/Cargo.toml | 4 +- crates/parser/build.rs | 4 + crates/parser/src/ast.rs | 199 ++-- crates/parser/src/error.rs | 81 ++ crates/parser/src/lib.rs | 424 ++++--- crates/parser/src/syntax.rs | 95 +- .../parser/test_files/errors/bad_target.snap | 11 + .../parser/test_files/errors/bad_target.sntn | 1 + .../test_files/errors/duplicate_val.snap | 11 + .../test_files/errors/duplicate_val.sntn | 8 + crates/parser/test_files/errors/numbers.snap | 21 + crates/parser/test_files/errors/numbers.sntn | 9 + .../parser/test_files/errors/parse_error.snap | 16 + .../parser/test_files/errors/parse_error.sntn | 6 + .../parser/test_files/errors/undefined.snap | 26 + .../parser/test_files/errors/undefined.sntn | 13 + .../test_files/syntax/module/simple.ast.snap | 1002 ++++++++++++----- crates/parser/tests/common/mod.rs | 40 + crates/parser/tests/errors.rs | 26 + crates/parser/tests/syntax.rs | 48 +- 31 files changed, 1485 insertions(+), 727 deletions(-) create mode 100644 crates/parser/build.rs create mode 100644 crates/parser/src/error.rs create mode 100644 crates/parser/test_files/errors/bad_target.snap create mode 100644 crates/parser/test_files/errors/bad_target.sntn create mode 100644 crates/parser/test_files/errors/duplicate_val.snap create mode 100644 crates/parser/test_files/errors/duplicate_val.sntn create mode 100644 crates/parser/test_files/errors/numbers.snap create mode 100644 crates/parser/test_files/errors/numbers.sntn create mode 100644 crates/parser/test_files/errors/parse_error.snap create mode 100644 crates/parser/test_files/errors/parse_error.sntn create mode 100644 crates/parser/test_files/errors/undefined.snap create mode 100644 crates/parser/test_files/errors/undefined.sntn create mode 100644 crates/parser/tests/common/mod.rs create mode 100644 crates/parser/tests/errors.rs diff --git a/crates/codegen/Cargo.toml b/crates/codegen/Cargo.toml index a2e86d34..e9dea30f 100644 --- a/crates/codegen/Cargo.toml +++ b/crates/codegen/Cargo.toml @@ -17,6 +17,6 @@ keywords = ["compiler", "evm", "wasm", "smart-contract"] [dependencies] cranelift-entity = "0.104" smallvec = "1.7.0" -rustc-hash = "1.1.0" +rustc-hash = "2.0.0" sonatina-ir = { path = "../ir", version = "0.0.3-alpha" } sonatina-triple = { path = "../triple", version = "0.0.3-alpha" } diff --git a/crates/codegen/src/critical_edge.rs b/crates/codegen/src/critical_edge.rs index 83b87442..eacbb511 100644 --- a/crates/codegen/src/critical_edge.rs +++ b/crates/codegen/src/critical_edge.rs @@ -148,7 +148,7 @@ mod tests { CriticalEdgeSplitter::new().run(func, &mut cfg); assert_eq!( - dump_func(func), + dump_func(&module, func_ref), "func public %test_func() -> void { block0: br 1.i32 block3 block1; @@ -166,6 +166,7 @@ mod tests { " ); + let func = &mut module.funcs[func_ref]; let mut cfg_split = ControlFlowGraph::default(); cfg_split.compute(func); assert_eq!(cfg, cfg_split); @@ -207,7 +208,7 @@ mod tests { CriticalEdgeSplitter::new().run(func, &mut cfg); assert_eq!( - dump_func(func), + dump_func(&module, func_ref), "func public %test_func() -> void { block0: br 1.i8 block5 block1; @@ -234,6 +235,7 @@ mod tests { " ); + let func = &mut module.funcs[func_ref]; let mut cfg_split = ControlFlowGraph::default(); cfg_split.compute(func); assert_eq!(cfg, cfg_split); @@ -269,7 +271,7 @@ mod tests { CriticalEdgeSplitter::new().run(func, &mut cfg); assert_eq!( - dump_func(func), + dump_func(&module, func_ref), "func public %test_func() -> void { block0: jump block1; @@ -289,6 +291,7 @@ mod tests { " ); + let func = &mut module.funcs[func_ref]; let mut cfg_split = ControlFlowGraph::default(); cfg_split.compute(func); assert_eq!(cfg, cfg_split); @@ -332,7 +335,7 @@ mod tests { CriticalEdgeSplitter::new().run(func, &mut cfg); assert_eq!( - dump_func(func), + dump_func(&module, func_ref), "func public %test_func() -> void { block0: br 1.i1 block5 block6; @@ -362,6 +365,7 @@ mod tests { " ); + let func = &mut module.funcs[func_ref]; let mut cfg_split = ControlFlowGraph::default(); cfg_split.compute(func); assert_eq!(cfg, cfg_split); diff --git a/crates/filecheck/Cargo.toml b/crates/filecheck/Cargo.toml index b4c751b9..8c71ac6b 100644 --- a/crates/filecheck/Cargo.toml +++ b/crates/filecheck/Cargo.toml @@ -9,7 +9,7 @@ publish = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -filecheck = "0.5.0" # { path = "/Users/sean/src/filecheck" } +filecheck = "0.5.0" sonatina-ir = { path = "../ir" } sonatina-codegen = { path = "../codegen" } sonatina-parser = { path = "../parser" } diff --git a/crates/filecheck/src/lib.rs b/crates/filecheck/src/lib.rs index c18a31ee..d9e20f60 100644 --- a/crates/filecheck/src/lib.rs +++ b/crates/filecheck/src/lib.rs @@ -145,10 +145,12 @@ impl<'a> FileChecker<'a> { func_ref: FuncRef, ) -> FileCheckResult { let func = &mut parsed_module.module.funcs[func_ref]; - let comments = &parsed_module.func_comments[func_ref]; + let comments = &parsed_module.debug.func_comments[func_ref]; self.transformer.transform(func); - let func_ir = FuncWriter::new(func).dump_string().unwrap(); + let func_ir = FuncWriter::new(func_ref, func, Some(&parsed_module.debug)) + .dump_string() + .unwrap(); let checker = self.build_checker(comments); @@ -171,7 +173,7 @@ impl<'a> FileChecker<'a> { Err(errs) => { let mut v = vec![]; for e in errs { - e.print(&mut v, self.file_path.to_str().unwrap(), &input) + e.print(&mut v, self.file_path.to_str().unwrap(), &input, true) .unwrap() } Err(String::from_utf8(v).unwrap()) diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs index 63d36034..1a662665 100644 --- a/crates/interpreter/src/state.rs +++ b/crates/interpreter/src/state.rs @@ -277,7 +277,7 @@ mod test { Ok(pm) => pm.module, Err(errs) => { for err in errs { - eprintln!("{}", err.print_to_string("[test]", input)); + eprintln!("{}", err.print_to_string("[test]", input, true)); } panic!("parsing failed"); } diff --git a/crates/ir/Cargo.toml b/crates/ir/Cargo.toml index 677b6ff4..2911073c 100644 --- a/crates/ir/Cargo.toml +++ b/crates/ir/Cargo.toml @@ -17,7 +17,7 @@ keywords = ["compiler", "evm", "wasm", "smart-contract"] primitive-types = { version = "0.12", default-features = false } cranelift-entity = "0.104" smallvec = "1.7.0" -rustc-hash = "1.1.0" +rustc-hash = "2.0.0" dyn-clone = "1.0.4" sonatina-triple = { path = "../triple", version = "0.0.3-alpha" } indexmap = "2.0.0" diff --git a/crates/ir/src/builder/func_builder.rs b/crates/ir/src/builder/func_builder.rs index 66ec6d11..a40e6626 100644 --- a/crates/ir/src/builder/func_builder.rs +++ b/crates/ir/src/builder/func_builder.rs @@ -4,7 +4,7 @@ use crate::{ func_cursor::{CursorLocation, FuncCursor}, insn::{BinaryOp, CastOp, DataLocationKind, InsnData, UnaryOp}, module::FuncRef, - Block, Function, GlobalVariable, Immediate, Type, Value, ValueData, + Block, Function, GlobalVariable, Immediate, Type, Value, }; use super::{ @@ -18,7 +18,6 @@ pub struct FunctionBuilder { func_ref: FuncRef, pub cursor: C, ssa_builder: SsaBuilder, - undefined: Vec, } macro_rules! impl_binary_insn { @@ -49,7 +48,6 @@ where func_ref, cursor, ssa_builder: SsaBuilder::new(), - undefined: Default::default(), } } @@ -64,12 +62,9 @@ where mut module_builder, func, func_ref, - undefined, .. } = self; - debug_assert!(undefined.is_empty()); // xxx - module_builder.funcs[func_ref] = func; module_builder } @@ -93,34 +88,6 @@ where self.cursor.set_location(CursorLocation::BlockBottom(block)); } - pub fn name_value(&mut self, value: Value, name: &str) { - if let Some(v) = self.func.value_names.get_by_right(name) { - if let Some(pos) = self.undefined.iter().position(|u| u == v) { - self.func.dfg.change_to_alias(*v, value); - // self.func.dfg.values[*v] = ValueData::Alias { alias: value }; - self.undefined.remove(pos); - } else { - panic!("value names must be unique"); - } - } - self.func.value_names.insert(value, name.into()); - } - - pub fn get_named_value(&mut self, name: &str) -> Value { - if let Some(v) = self.func.value_names.get_by_right(name).copied() { - v - } else { - let v = self.func.dfg.make_value(ValueData::Immediate { - imm: Immediate::I128(424242), - ty: Type::I128, - }); - - self.undefined.push(v); - self.name_value(v, name); - v - } - } - pub fn make_imm_value(&mut self, imm: Imm) -> Value where Imm: Into, @@ -428,7 +395,7 @@ mod tests { let module = builder.finish().build(); let func_ref = module.iter_functions().next().unwrap(); assert_eq!( - dump_func(&module.funcs[func_ref]), + dump_func(&module, func_ref), "func public %test_func() -> void { block0: v2.i8 = add 1.i8 2.i8; @@ -458,7 +425,7 @@ mod tests { let module = builder.finish().build(); let func_ref = module.iter_functions().next().unwrap(); assert_eq!( - dump_func(&module.funcs[func_ref]), + dump_func(&module, func_ref), "func public %test_func(v0.i32, v1.i64) -> void { block0: v2.i64 = sext v0; @@ -484,7 +451,7 @@ mod tests { let module = builder.finish().build(); let func_ref = module.iter_functions().next().unwrap(); assert_eq!( - dump_func(&module.funcs[func_ref]), + dump_func(&module, func_ref), "func public %test_func() -> i32 { block0: return 1.i32; @@ -526,7 +493,7 @@ mod tests { let module = builder.finish().build(); let func_ref = module.iter_functions().next().unwrap(); assert_eq!( - dump_func(&module.funcs[func_ref]), + dump_func(&module, func_ref), "func public %test_func(v0.i64) -> void { block0: br v0 block1 block2; diff --git a/crates/ir/src/builder/mod.rs b/crates/ir/src/builder/mod.rs index 76a9c95a..3b30ce02 100644 --- a/crates/ir/src/builder/mod.rs +++ b/crates/ir/src/builder/mod.rs @@ -16,8 +16,8 @@ pub mod test_util { func_cursor::InsnInserter, ir_writer::FuncWriter, isa::{IsaBuilder, TargetIsa}, - module::ModuleCtx, - Function, Linkage, Signature, Type, + module::{FuncRef, ModuleCtx}, + Linkage, Module, Signature, Type, }; pub fn build_test_isa() -> TargetIsa { @@ -34,8 +34,9 @@ pub mod test_util { mb.build_function(func_ref) } - pub fn dump_func(func: &Function) -> String { - let mut writer = FuncWriter::new(func); + pub fn dump_func(module: &Module, func_ref: FuncRef) -> String { + let func = &module.funcs[func_ref]; + let mut writer = FuncWriter::new(func_ref, func, None); writer.dump_string().unwrap() } } diff --git a/crates/ir/src/builder/ssa.rs b/crates/ir/src/builder/ssa.rs index 7cdc4159..90ab0e99 100644 --- a/crates/ir/src/builder/ssa.rs +++ b/crates/ir/src/builder/ssa.rs @@ -228,10 +228,9 @@ mod tests { let module = builder.finish().build(); let func_ref = module.iter_functions().next().unwrap(); - let func = &module.funcs[func_ref]; assert_eq!( - dump_func(func), + dump_func(&module, func_ref), "func public %test_func() -> void { block0: v1.i32 = add 1.i32 1.i32; @@ -277,10 +276,9 @@ mod tests { let module = builder.finish().build(); let func_ref = module.iter_functions().next().unwrap(); - let func = &module.funcs[func_ref]; assert_eq!( - dump_func(func), + dump_func(&module, func_ref), "func public %test_func() -> void { block0: br 1.i32 block2 block1; @@ -358,10 +356,9 @@ mod tests { let module = builder.finish().build(); let func_ref = module.iter_functions().next().unwrap(); - let func = &module.funcs[func_ref]; assert_eq!( - dump_func(func), + dump_func(&module, func_ref), "func public %test_func() -> void { block0: br 0.i32 block1 block2; @@ -432,10 +429,9 @@ mod tests { let module = builder.finish().build(); let func_ref = module.iter_functions().next().unwrap(); - let func = &module.funcs[func_ref]; assert_eq!( - dump_func(func), + dump_func(&module, func_ref), "func public %test_func() -> void { block0: jump block1; @@ -508,10 +504,9 @@ mod tests { let module = builder.finish().build(); let func_ref = module.iter_functions().next().unwrap(); - let func = &module.funcs[func_ref]; assert_eq!( - dump_func(func), + dump_func(&module, func_ref), "func public %test_func() -> void { block0: jump block1; @@ -589,10 +584,9 @@ mod tests { let module = builder.finish().build(); let func_ref = module.iter_functions().next().unwrap(); - let func = &module.funcs[func_ref]; assert_eq!( - dump_func(func), + dump_func(&module, func_ref), "func public %test_func() -> void { block0: jump block1; @@ -668,10 +662,9 @@ mod tests { let module = builder.finish().build(); let func_ref = module.iter_functions().next().unwrap(); - let func = &module.funcs[func_ref]; assert_eq!( - dump_func(func), + dump_func(&module, func_ref), "func public %test_func(v0.i32) -> i32 { block0: br_table v0 block4 (1.i32 block1) (2.i32 block2) (3.i32 block3); diff --git a/crates/ir/src/function.rs b/crates/ir/src/function.rs index ce9615e3..90b15660 100644 --- a/crates/ir/src/function.rs +++ b/crates/ir/src/function.rs @@ -1,14 +1,8 @@ use super::{module::FuncRef, DataFlowGraph, Layout, Type, Value}; use crate::{module::ModuleCtx, types::DisplayType, Linkage}; -use rustc_hash::{FxHashMap, FxHasher}; +use rustc_hash::FxHashMap; use smallvec::SmallVec; -use smol_str::SmolStr; -use std::{ - fmt::{self, Write}, - hash::BuildHasherDefault, -}; - -type Bimap = bimap::BiHashMap>; +use std::fmt::{self, Write}; #[derive(Debug, Clone)] pub struct Function { @@ -18,9 +12,6 @@ pub struct Function { pub dfg: DataFlowGraph, pub layout: Layout, - // xxx move - pub value_names: Bimap, - /// Stores signatures of all functions that are called by the function. pub callees: FxHashMap, } @@ -43,7 +34,6 @@ impl Function { arg_values, dfg, layout: Layout::default(), - value_names: Bimap::default(), callees: FxHashMap::default(), } } @@ -78,11 +68,6 @@ impl Signature { self.linkage } - // xxx remove - pub fn append_arg(&mut self, arg: Type) { - self.args.push(arg); - } - pub fn args(&self) -> &[Type] { &self.args } diff --git a/crates/ir/src/ir_writer.rs b/crates/ir/src/ir_writer.rs index c7bc5acb..663e2e7f 100644 --- a/crates/ir/src/ir_writer.rs +++ b/crates/ir/src/ir_writer.rs @@ -1,20 +1,40 @@ use std::io; use crate::{ - module::ModuleCtx, + module::{FuncRef, ModuleCtx}, types::{CompoundType, CompoundTypeData, StructData}, DataLocationKind, GlobalVariableData, Module, }; use super::{Block, Function, Insn, InsnData, Type, Value}; +pub trait DebugProvider { + fn value_name(&self, _func: FuncRef, _value: Value) -> Option<&str> { + None + } +} +impl DebugProvider for () {} + pub struct ModuleWriter<'a> { module: &'a Module, + debug: Option<&'a dyn DebugProvider>, } +impl<'a> ModuleWriter<'a> {} + impl<'a> ModuleWriter<'a> { pub fn new(module: &'a Module) -> Self { - Self { module } + Self { + module, + debug: None, + } + } + + pub fn with_debug_provider(module: &'a Module, debug: &'a dyn DebugProvider) -> Self { + Self { + module, + debug: Some(debug), + } } pub fn write(&mut self, mut w: impl io::Write) -> io::Result<()> { @@ -40,7 +60,7 @@ impl<'a> ModuleWriter<'a> { for func_ref in self.module.funcs.keys() { let func = &self.module.funcs[func_ref]; - let mut func_writer = FuncWriter::new(func); + let mut func_writer = FuncWriter::new(func_ref, func, self.debug); func_writer.write(&mut w)?; writeln!(w)?; } @@ -56,13 +76,24 @@ impl<'a> ModuleWriter<'a> { } pub struct FuncWriter<'a> { + func_ref: FuncRef, func: &'a Function, level: u8, + debug: Option<&'a dyn DebugProvider>, } impl<'a> FuncWriter<'a> { - pub fn new(func: &'a Function) -> Self { - Self { func, level: 0 } + pub fn new( + func_ref: FuncRef, + func: &'a Function, + debug: Option<&'a dyn DebugProvider>, + ) -> Self { + Self { + func_ref, + func, + level: 0, + debug, + } } pub fn write(&mut self, mut w: impl io::Write) -> io::Result<()> { @@ -106,6 +137,10 @@ impl<'a> FuncWriter<'a> { unsafe { Ok(String::from_utf8_unchecked(s)) } } + pub fn value_name(&self, value: Value) -> Option<&str> { + self.debug.and_then(|d| d.value_name(self.func_ref, value)) + } + fn write_block_with_insn(&mut self, block: Block, mut w: impl io::Write) -> io::Result<()> { self.indent(&mut w)?; block.write(self, &mut w)?; @@ -179,7 +214,7 @@ impl IrWrite for Value { writer .ctx() .with_gv_store(|s| write!(w, "%{}", s.gv_data(gv).symbol)) - } else if let Some(name) = writer.func.value_names.get_by_left(&value) { + } else if let Some(name) = writer.value_name(value) { write!(w, "{name}") } else { write!(w, "v{}", value.0) diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml index fa9954c1..b7868a39 100644 --- a/crates/parser/Cargo.toml +++ b/crates/parser/Cargo.toml @@ -25,8 +25,10 @@ hex = "0.4.3" num-traits = { version = "0.2.19", default-features = false } either = { version = "1.12.0", default-features = false } annotate-snippets = "0.11.4" +rustc-hash = "2.0.0" +bimap = "0.6.3" [dev-dependencies] -dir-test = { git = "https://github.com/sbillig/dir-test", rev = "c4115dd" } +dir-test = "0.3" insta = { version = "1.38.0" } indenter = "0.3.3" diff --git a/crates/parser/build.rs b/crates/parser/build.rs new file mode 100644 index 00000000..8e048f92 --- /dev/null +++ b/crates/parser/build.rs @@ -0,0 +1,4 @@ +fn main() { + #[cfg(test)] + println!("cargo:rerun-if-changed=./test_files"); +} diff --git a/crates/parser/src/ast.rs b/crates/parser/src/ast.rs index 27d71956..389bc401 100644 --- a/crates/parser/src/ast.rs +++ b/crates/parser/src/ast.rs @@ -1,6 +1,8 @@ use super::{syntax::Node, Error}; -use crate::syntax::{FromSyntax, Parser, Rule}; -use annotate_snippets::{Level, Renderer, Snippet}; +use crate::{ + syntax::{FromSyntax, Parser, Rule, Spanned}, + Span, +}; use either::Either; use hex::FromHex; pub use ir::{ @@ -11,11 +13,9 @@ use ir::{I256, U256}; use pest::Parser as _; use smol_str::SmolStr; pub use sonatina_triple::{InvalidTriple, TargetTriple}; -use std::{io, ops::Range, str::FromStr}; +use std::str::FromStr; pub fn parse(input: &str) -> Result> { - pest::set_error_detail(true); // xxx - match Parser::parse(Rule::module, input) { Err(err) => Err(vec![Error::SyntaxError(err)]), Ok(mut pairs) => { @@ -45,7 +45,7 @@ pub struct Module { impl FromSyntax for Module { fn from_syntax(node: &mut Node) -> Self { - let target = node.single_opt(Rule::target_triple).flatten(); + let target = node.single(Rule::target_triple); let module_comments = node.map_while(|p| { if p.as_rule() == Rule::COMMENT && p.as_str().starts_with("#!") { @@ -97,7 +97,7 @@ impl FromSyntax for Option { match TargetTriple::parse(node.txt) { Ok(t) => Some(t), Err(e) => { - node.error(Error::InvalidTarget(e, node.span.clone())); + node.error(Error::InvalidTarget(e, node.span)); None } } @@ -223,6 +223,11 @@ pub struct Block { pub stmts: Vec, } +impl Block { + pub fn id(&self) -> u32 { + self.id.id.unwrap() + } +} impl FromSyntax for Block { fn from_syntax(node: &mut Node) -> Self { Self { @@ -233,20 +238,27 @@ impl FromSyntax for Block { } #[derive(Debug)] -pub struct BlockId(pub Option); +pub struct BlockId { + pub id: Option, + pub span: Span, +} impl FromSyntax for BlockId { fn from_syntax(node: &mut Node) -> Self { + let span = node.span; node.descend(); debug_assert_eq!(node.rule, Rule::block_number); - BlockId(node.txt.parse().ok()) + let id = node.txt.parse().ok(); + if id.is_none() { + node.error(Error::NumberOutOfBounds(node.span)); + } + BlockId { id, span } } } #[derive(Debug)] pub struct Stmt { pub kind: StmtKind, - // pub comments: Vec, } impl FromSyntax for Stmt { @@ -298,7 +310,13 @@ impl FromSyntax for (Value, BlockId) { } #[derive(Debug)] -pub enum Type { +pub struct Type { + pub kind: TypeKind, + pub span: Span, +} + +#[derive(Debug)] +pub enum TypeKind { Int(IntType), Ptr(Box), Array(Box, usize), @@ -310,19 +328,26 @@ pub enum Type { impl FromSyntax for Type { fn from_syntax(node: &mut Node) -> Self { node.descend(); - match node.rule { - Rule::primitive_type => Type::Int(IntType::from_str(node.txt).unwrap()), - Rule::ptr_type => Type::Ptr(Box::new(node.single(Rule::type_name))), + let kind = match node.rule { + Rule::primitive_type => TypeKind::Int(IntType::from_str(node.txt).unwrap()), + Rule::ptr_type => TypeKind::Ptr(Box::new(node.single(Rule::type_name))), Rule::array_type => { let Ok(size) = usize::from_str(node.get(Rule::array_size).as_str()) else { - node.error(Error::NumberOutOfBounds(node.span.clone())); - return Type::Error; + node.error(Error::NumberOutOfBounds(node.span)); + return Type { + kind: TypeKind::Error, + span: node.span, + }; }; - Type::Array(Box::new(node.single(Rule::type_name)), size) + TypeKind::Array(Box::new(node.single(Rule::type_name)), size) } - Rule::void_type => Type::Void, - Rule::struct_identifier => Type::Struct(node.parse_str(Rule::struct_name)), + Rule::void_type => TypeKind::Void, + Rule::struct_identifier => TypeKind::Struct(node.parse_str(Rule::struct_name)), _ => unreachable!(), + }; + Type { + kind, + span: node.span, } } } @@ -390,14 +415,20 @@ impl FromSyntax for Expr { } #[derive(Debug)] -pub struct Call(pub FunctionName, pub Vec); +pub struct Call(pub Spanned, pub Vec); #[derive(Debug)] -pub struct ValueName(pub SmolStr); +pub struct ValueName { + pub string: SmolStr, + pub span: Span, +} impl FromSyntax for ValueName { fn from_syntax(node: &mut Node) -> Self { - Self(node.txt.into()) + Self { + string: node.txt.into(), + span: node.span, + } } } @@ -411,7 +442,13 @@ impl FromSyntax for ValueDeclaration { } #[derive(Debug)] -pub enum Value { +pub struct Value { + pub kind: ValueKind, + pub span: Span, +} + +#[derive(Debug)] +pub enum ValueKind { Immediate(Immediate), Named(ValueName), Error, @@ -419,16 +456,16 @@ pub enum Value { macro_rules! parse_dec { ($node:ident, $imm:expr, $ity:ty, $uty:ty) => { - if let Ok(v) = $node + match $node .txt .parse::<$ity>() .or_else(|_| $node.txt.parse::<$uty>().map(|v| v as $ity)) { - Value::Immediate($imm(v)) - } else { - let span = $node.span.clone(); - $node.error(Error::NumberOutOfBounds(span)); - Value::Error + Ok(v) => ValueKind::Immediate($imm(v)), + Err(_) => { + $node.error(Error::NumberOutOfBounds($node.span)); + ValueKind::Error + } } }; } @@ -436,9 +473,9 @@ macro_rules! parse_dec { macro_rules! parse_hex { ($node:ident, $imm:expr, $ity:ty) => { if let Some(bytes) = hex_bytes($node.txt) { - Value::Immediate($imm(<$ity>::from_be_bytes(bytes))) + ValueKind::Immediate($imm(<$ity>::from_be_bytes(bytes))) } else { - Value::Error + ValueKind::Error } }; } @@ -446,8 +483,8 @@ macro_rules! parse_hex { impl FromSyntax for Value { fn from_syntax(node: &mut Node) -> Self { node.descend(); - match node.rule { - Rule::value_name => Value::Named(ValueName(node.txt.into())), + let kind = match node.rule { + Rule::value_name => ValueKind::Named(ValueName::from_syntax(node)), Rule::imm_number => { let ty: IntType = node.parse_str(Rule::primitive_type); node.descend(); @@ -485,8 +522,8 @@ impl FromSyntax for Value { Rule::hex => match ty { IntType::I1 => { - node.error(Error::NumberOutOfBounds(node.span.clone())); - Value::Error + node.error(Error::NumberOutOfBounds(node.span)); + ValueKind::Error } IntType::I8 => parse_hex!(node, Immediate::I8, i8), IntType::I16 => parse_hex!(node, Immediate::I16, i16), @@ -503,11 +540,10 @@ impl FromSyntax for Value { if is_negative { i256 = I256::zero().overflowing_sub(i256).0; } - Value::Immediate(Immediate::I256(i256)) + ValueKind::Immediate(Immediate::I256(i256)) } else { - let span = node.span.clone(); - node.error(Error::NumberOutOfBounds(span)); - Value::Error + node.error(Error::NumberOutOfBounds(node.span)); + ValueKind::Error } } }, @@ -515,6 +551,10 @@ impl FromSyntax for Value { } } _ => unreachable!(), + }; + Value { + kind, + span: node.span, } } } @@ -536,53 +576,16 @@ impl FromStr for IntType { } } -impl Error { - pub fn span(&self) -> Range { - match self { - Error::NumberOutOfBounds(span) => span.clone(), - Error::InvalidTarget(_, span) => span.clone(), - Error::SyntaxError(err) => match err.location { - pest::error::InputLocation::Pos(p) => p..p, - pest::error::InputLocation::Span((s, e)) => s..e, - }, - } - } - - pub fn print(&self, mut w: impl io::Write, path: &str, content: &str) -> io::Result<()> { - let label = match self { - Error::NumberOutOfBounds(_) => "number out of bounds".into(), - Error::InvalidTarget(err, _) => err.to_string(), - Error::SyntaxError(err) => err.to_string(), - }; - let snippet = Level::Error.title("parse error").snippet( - Snippet::source(content) - .line_start(0) - .origin(path) - .fold(true) - .annotation(Level::Error.span(self.span()).label(&label)), - ); - let rend = Renderer::styled(); - let disp = rend.render(snippet); - write!(w, "{}", disp) - } - - pub fn print_to_string(&self, path: &str, content: &str) -> String { - let mut v = vec![]; - self.print(&mut v, path, content).unwrap(); - String::from_utf8(v).unwrap() - } -} - -fn imm_or_err(node: &mut Node, f: F) -> Value +fn imm_or_err(node: &mut Node, f: F) -> ValueKind where F: Fn() -> Option, { let Some(imm) = f() else { - let span = node.span.clone(); + let span = node.span; node.error(Error::NumberOutOfBounds(span)); - return Value::Error; + return ValueKind::Error; }; - Value::Immediate(imm) + ValueKind::Immediate(imm) } fn hex_bytes(mut s: &str) -> Option<[u8; N]> { @@ -597,39 +600,3 @@ fn hex_bytes(mut s: &str) -> Option<[u8; N]> { out[N - bytes.len()..].copy_from_slice(&bytes); Some(out) } - -// xxx remove -// pub fn parse_immediate( -// val: &str, -// loc: Range, -// ) -> Result> { -// let mut chunks = val.split('.'); -// let num = chunks.next().unwrap(); -// let t = chunks.next().unwrap(); - -// let imm = match t { -// "i1" => Immediate::I1(parse_num(num, loc)?), -// "i8" => Immediate::I8(parse_num(num, loc)?), -// "i16" => Immediate::I16(parse_num(num, loc)?), -// "i32" => Immediate::I32(parse_num(num, loc)?), -// "i64" => Immediate::I64(parse_num(num, loc)?), -// "i128" => Immediate::I128(parse_num(num, loc)?), -// "i256" => todo!(), -// _ => { -// unreachable!() -// } -// }; -// Ok(Value::Immediate(imm)) -// } - -// pub fn parse_num( -// s: &str, -// loc: Range, -// ) -> Result> -// where -// T: FromStr, -// { -// T::from_str(s).map_err(|_| ParseError::User { -// error: Error::NumberOutOfBounds(loc), -// }) -// } diff --git a/crates/parser/src/error.rs b/crates/parser/src/error.rs new file mode 100644 index 00000000..0d81a124 --- /dev/null +++ b/crates/parser/src/error.rs @@ -0,0 +1,81 @@ +use std::io; + +use crate::{syntax::Rule, Span}; +use annotate_snippets::{Level, Renderer, Snippet}; +use smol_str::SmolStr; +use sonatina_triple::InvalidTriple; + +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +pub enum Error { + NumberOutOfBounds(Span), + InvalidTarget(InvalidTriple, Span), + SyntaxError(pest::error::Error), + Undefined(UndefinedKind, Span), + DuplicateValueName(SmolStr, Span), +} + +#[derive(Debug)] +pub enum UndefinedKind { + Block(ir::Block), + Func(SmolStr), + Type(SmolStr), + Value(SmolStr), +} + +impl Error { + pub fn span(&self) -> Span { + match self { + Error::NumberOutOfBounds(span) => *span, + Error::InvalidTarget(_, span) => *span, + Error::Undefined(_, span) => *span, + + Error::DuplicateValueName(_, span) => *span, + Error::SyntaxError(err) => match err.location { + pest::error::InputLocation::Pos(p) => Span(p as u32, p as u32), + pest::error::InputLocation::Span((s, e)) => Span(s as u32, e as u32), + }, + } + } + + pub fn print( + &self, + mut w: impl io::Write, + path: &str, + content: &str, + colors: bool, + ) -> io::Result<()> { + let label = match self { + Error::NumberOutOfBounds(_) => "number out of bounds".into(), + Error::InvalidTarget(err, _) => err.to_string(), + Error::SyntaxError(err) => err.to_string(), + Error::Undefined(kind, _) => match kind { + UndefinedKind::Block(id) => format!("undefined block: `block{}`", id.0), + UndefinedKind::Func(name) => format!("undefined function: `%{name}`"), + UndefinedKind::Type(name) => format!("undefined type: `%{name}`"), + UndefinedKind::Value(name) => format!("undefined value: `{name}`"), + }, + Error::DuplicateValueName(name, _) => format!("value name `{name}` is already defined"), + }; + let snippet = Level::Error.title("parse error").snippet( + Snippet::source(content) + .line_start(0) + .origin(path) + .fold(true) + .annotation(Level::Error.span(self.span().as_range()).label(&label)), + ); + let rend = if colors { + Renderer::styled() + } else { + Renderer::plain() + }; + let disp = rend.render(snippet); + write!(w, "{}", disp) + } + + pub fn print_to_string(&self, path: &str, content: &str, colors: bool) -> String { + let mut v = vec![]; + self.print(&mut v, path, content, colors).unwrap(); + String::from_utf8(v).unwrap() + } +} diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index ba13ba2d..2ca93875 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -1,41 +1,45 @@ -use std::ops::Range; - use ast::ValueDeclaration; use cranelift_entity::SecondaryMap; use ir::{ self, builder::{FunctionBuilder, ModuleBuilder}, func_cursor::{CursorLocation, FuncCursor, InsnInserter}, + ir_writer::DebugProvider, isa::IsaBuilder, module::{FuncRef, ModuleCtx}, Module, Signature, }; -use sonatina_triple::InvalidTriple; -use syntax::Rule; +use rustc_hash::{FxHashMap, FxHashSet, FxHasher}; +use smol_str::SmolStr; +use std::hash::BuildHasherDefault; +use syntax::Spanned; pub mod ast; +mod error; pub mod syntax; +pub use error::{Error, UndefinedKind}; +pub use syntax::Span; + +type Bimap = bimap::BiHashMap>; -#[derive(Debug)] -#[allow(clippy::large_enum_variant)] -pub enum Error { - NumberOutOfBounds(Range), - InvalidTarget(InvalidTriple, Range), - SyntaxError(pest::error::Error), +pub struct ParsedModule { + pub module: Module, + pub debug: DebugInfo, } pub fn parse_module(input: &str) -> Result> { let ast = ast::parse(input)?; - let isa = IsaBuilder::new(ast.target.unwrap()).build(); // xxx - let ctx = ModuleCtx::new(isa); - let mut builder = ModuleBuilder::new(ctx); + let isa = IsaBuilder::new(ast.target.unwrap()).build(); + let mut builder = ModuleBuilder::new(ModuleCtx::new(isa)); + + let mut ctx = BuildCtx::default(); for st in ast.struct_types { let fields = st .fields .iter() - .map(|t| build_type(&mut builder, t)) + .map(|t| ctx.type_(&mut builder, t)) .collect::>(); builder.declare_struct_type(&st.name.0, &fields, false); } @@ -44,12 +48,12 @@ pub fn parse_module(input: &str) -> Result> { let params = func .params .iter() - .map(|t| build_type(&mut builder, t)) + .map(|t| ctx.type_(&mut builder, t)) .collect::>(); let ret_ty = func .ret_type .as_ref() - .map(|t| build_type(&mut builder, t)) + .map(|t| ctx.type_(&mut builder, t)) .unwrap_or(ir::Type::Void); let sig = Signature::new(&func.name.0, func.linkage, ¶ms, ret_ty); @@ -61,13 +65,13 @@ pub fn parse_module(input: &str) -> Result> { let args = sig .params .iter() - .map(|decl| build_type(&mut builder, &decl.1)) + .map(|decl| ctx.type_(&mut builder, &decl.1)) .collect::>(); let ret_ty = sig .ret_type .as_ref() - .map(|t| build_type(&mut builder, t)) + .map(|t| ctx.type_(&mut builder, t)) .unwrap_or(ir::Type::Void); let sig = Signature::new(&sig.name.0, sig.linkage, &args, ret_ty); @@ -78,179 +82,269 @@ pub fn parse_module(input: &str) -> Result> { for func in ast.functions { let id = builder.get_func_ref(&func.signature.name.0).unwrap(); - let mut fb = builder.build_function(id); - build_func(&mut fb, &func); - fb.seal_all(); - builder = fb.finish(); + builder = ctx.build_func(builder.build_function(id), id, &func); func_comments[id] = func.comments; } - let module = builder.build(); - Ok(ParsedModule { - module, - module_comments: ast.comments, - func_comments, - }) + if ctx.errors.is_empty() { + let module = builder.build(); + Ok(ParsedModule { + module, + debug: DebugInfo { + module_comments: ast.comments, + func_comments, + value_names: ctx.value_names, + }, + }) + } else { + Err(ctx.errors) + } } -pub struct ParsedModule { - pub module: Module, +pub struct DebugInfo { pub module_comments: Vec, pub func_comments: SecondaryMap>, + pub value_names: FxHashMap>, } -fn build_func(builder: &mut FunctionBuilder, func: &ast::Func) { - for (i, ValueDeclaration(name, _ty)) in func.signature.params.iter().enumerate() { - builder.name_value(builder.func.arg_values[i], &name.0); +impl DebugProvider for DebugInfo { + fn value_name(&self, func: FuncRef, value: ir::Value) -> Option<&str> { + let names = self.value_names.get(&func)?; + names.get_by_left(&value).map(|s| s.as_str()) } +} + +#[derive(Default)] +struct BuildCtx { + errors: Vec, + blocks: FxHashSet, + undefined: FxHashMap, + value_names: FxHashMap>, + func_value_names: Bimap, +} + +impl BuildCtx { + fn build_func( + &mut self, + mut fb: FunctionBuilder, + func_ref: FuncRef, + func: &ast::Func, + ) -> ModuleBuilder { + self.blocks.clear(); + self.undefined.clear(); - // "forward declare" all block ids - if let Some(max_block_id) = func.blocks.iter().map(|b| b.id.0.unwrap()).max() { - while builder.func.dfg.blocks.len() <= max_block_id as usize { - builder.cursor.make_block(&mut builder.func); + for (i, ValueDeclaration(name, _ty)) in func.signature.params.iter().enumerate() { + let value = fb.func.arg_values[i]; + self.name_value(&mut fb.func, value, name); } - } - for block in &func.blocks { - let block_id = ir::Block(block.id.0.unwrap()); - builder.cursor.append_block(&mut builder.func, block_id); - builder - .cursor - .set_location(CursorLocation::BlockTop(block_id)); - - for stmt in &block.stmts { - match &stmt.kind { - ast::StmtKind::Define(ValueDeclaration(val, ty), expr) => { - let ty = build_type(&mut builder.module_builder, ty); - - let result_val = match expr { - ast::Expr::Binary(op, lhs, rhs) => { - let lhs = build_value(builder, lhs); - let rhs = build_value(builder, rhs); - builder.binary_op(*op, lhs, rhs) - } - ast::Expr::Unary(op, val) => { - let val = build_value(builder, val); - builder.unary_op(*op, val) - } - ast::Expr::Cast(op, val) => { - let val = build_value(builder, val); - builder.cast_op(*op, val, ty) - } - ast::Expr::Load(location, addr) => { - let addr = build_value(builder, addr); - match location { - ir::DataLocationKind::Memory => builder.memory_load(addr), - ir::DataLocationKind::Storage => builder.storage_load(addr), + // collect all defined block ids + self.blocks + .extend(func.blocks.iter().map(|b| ir::Block(b.id()))); + if let Some(max) = self.blocks.iter().max() { + while fb.func.dfg.blocks.len() <= max.0 as usize { + fb.cursor.make_block(&mut fb.func); + } + } + + for block in &func.blocks { + let block_id = ir::Block(block.id()); + fb.cursor.append_block(&mut fb.func, block_id); + fb.cursor.set_location(CursorLocation::BlockTop(block_id)); + + for stmt in &block.stmts { + match &stmt.kind { + ast::StmtKind::Define(ValueDeclaration(val, ty), expr) => { + let ty = self.type_(&mut fb.module_builder, ty); + + let result_val = match expr { + ast::Expr::Binary(op, lhs, rhs) => { + let lhs = self.value(&mut fb, lhs); + let rhs = self.value(&mut fb, rhs); + fb.binary_op(*op, lhs, rhs) } + ast::Expr::Unary(op, val) => { + let val = self.value(&mut fb, val); + fb.unary_op(*op, val) + } + ast::Expr::Cast(op, val) => { + let val = self.value(&mut fb, val); + fb.cast_op(*op, val, ty) + } + ast::Expr::Load(location, addr) => { + let addr = self.value(&mut fb, addr); + match location { + ir::DataLocationKind::Memory => fb.memory_load(addr), + ir::DataLocationKind::Storage => fb.storage_load(addr), + } + } + ast::Expr::Alloca(ty) => { + let ty = self.type_(&mut fb.module_builder, ty); + fb.alloca(ty) + } + ast::Expr::Call(ast::Call(name, args)) => { + let func = self.func_ref(&mut fb.module_builder, name); + + let args = args + .iter() + .map(|val| self.value(&mut fb, val)) + .collect::>(); + fb.call(func, &args).unwrap() + } + ast::Expr::Gep(vals) => { + let vals = vals + .iter() + .map(|val| self.value(&mut fb, val)) + .collect::>(); + fb.gep(&vals).unwrap() + } + ast::Expr::Phi(vals) => { + let args = vals + .iter() + .map(|(val, block)| { + let b = self.block(block); + let v = self.value(&mut fb, val); + (v, b) + }) + .collect::>(); + fb.phi(ty, &args) + } + }; + self.name_value(&mut fb.func, result_val, val) + } + ast::StmtKind::Store(loc, addr, val) => { + let addr = self.value(&mut fb, addr); + let val = self.value(&mut fb, val); + + match loc { + ir::DataLocationKind::Memory => fb.memory_store(addr, val), + ir::DataLocationKind::Storage => fb.storage_store(addr, val), } - ast::Expr::Alloca(ty) => { - let ty = build_type(&mut builder.module_builder, ty); - builder.alloca(ty) - } - ast::Expr::Call(ast::Call(name, args)) => { - let func_ref = builder.module_builder.get_func_ref(&name.0).unwrap(); - let args = args - .iter() - .map(|val| build_value(builder, val)) - .collect::>(); - builder.call(func_ref, &args).unwrap() - } - ast::Expr::Gep(vals) => { - let vals = vals - .iter() - .map(|val| build_value(builder, val)) - .collect::>(); - builder.gep(&vals).unwrap() - } - ast::Expr::Phi(vals) => { - let args = vals - .iter() - .map(|(val, block)| { - // xxx declare block - let b = ir::Block(block.0.unwrap()); - let v = build_value(builder, val); - (v, b) - }) - .collect::>(); - builder.phi(ty, &args) - } - }; - builder.name_value(result_val, &val.0) - } - ast::StmtKind::Store(loc, addr, val) => { - let addr = build_value(builder, addr); - let val = build_value(builder, val); + } + ast::StmtKind::Return(val) => { + let val = val.as_ref().map(|v| self.value(&mut fb, v)); + fb.ret(val); + } + ast::StmtKind::Jump(block_id) => { + let block_id = self.block(block_id); + fb.jump(block_id); + } + ast::StmtKind::Branch(cond, true_block, false_block) => { + let cond = self.value(&mut fb, cond); + let true_block = self.block(true_block); + let false_block = self.block(false_block); + fb.br(cond, true_block, false_block); + } + ast::StmtKind::BranchTable(index, default_block, table) => { + let index = self.value(&mut fb, index); + let default_block = default_block.as_ref().map(|b| self.block(b)); - match loc { - ir::DataLocationKind::Memory => builder.memory_store(addr, val), - ir::DataLocationKind::Storage => builder.storage_store(addr, val), + let table = table + .iter() + .map(|(val, block)| { + let block = self.block(block); + (self.value(&mut fb, val), block) + }) + .collect::>(); + fb.br_table(index, default_block, &table); + } + ast::StmtKind::Call(ast::Call(name, args)) => { + let func_ref = self.func_ref(&mut fb.module_builder, name); + + let args = args + .iter() + .map(|val| self.value(&mut fb, val)) + .collect::>(); + fb.call(func_ref, &args).unwrap(); } } - ast::StmtKind::Return(val) => { - let val = val.as_ref().map(|v| build_value(builder, v)); - builder.ret(val); - } - ast::StmtKind::Jump(block_id) => { - let block_id = ir::Block(block_id.0.unwrap()); - builder.jump(block_id); - } - ast::StmtKind::Branch(cond, true_block, false_block) => { - let cond = build_value(builder, cond); - let true_block = ir::Block(true_block.0.unwrap()); - let false_block = ir::Block(false_block.0.unwrap()); - builder.br(cond, true_block, false_block); - } - ast::StmtKind::BranchTable(index, default_block, table) => { - let index = build_value(builder, index); - let default_block = default_block.as_ref().map(|b| ir::Block(b.0.unwrap())); - let table = table - .iter() - .map(|(val, block)| { - (build_value(builder, val), ir::Block(block.0.unwrap())) - }) - .collect::>(); - builder.br_table(index, default_block, &table); - } - ast::StmtKind::Call(ast::Call(name, args)) => { - let func_ref = builder.module_builder.get_func_ref(&name.0).unwrap(); - let args = args - .iter() - .map(|val| build_value(builder, val)) - .collect::>(); - builder.call(func_ref, &args).unwrap(); - } } } + + for (val, span) in self.undefined.drain() { + let name = self.func_value_names.get_by_left(&val).unwrap(); + self.errors + .push(Error::Undefined(UndefinedKind::Value(name.clone()), span)); + } + let names = std::mem::take(&mut self.func_value_names); + self.value_names.insert(func_ref, names); + fb.seal_all(); + fb.finish() + } + + fn func_ref(&mut self, mb: &mut ModuleBuilder, name: &Spanned) -> FuncRef { + mb.get_func_ref(&name.inner.0).unwrap_or_else(|| { + self.errors.push(Error::Undefined( + UndefinedKind::Func(name.inner.0.clone()), + name.span, + )); + FuncRef::from_u32(0) + }) + } + + fn block(&mut self, b: &ast::BlockId) -> ir::Block { + let block = ir::Block(b.id.unwrap()); + if !self.blocks.contains(&block) { + self.errors + .push(Error::Undefined(UndefinedKind::Block(block), b.span)); + } + block + } + + pub fn name_value(&mut self, func: &mut ir::Function, value: ir::Value, name: &ast::ValueName) { + if let Some(v) = self.func_value_names.get_by_right(&name.string) { + if self.undefined.remove(v).is_some() { + func.dfg.change_to_alias(*v, value); + } else { + self.errors + .push(Error::DuplicateValueName(name.string.clone(), name.span)); + } + } + self.func_value_names.insert(value, name.string.clone()); } -} -fn build_value(builder: &mut FunctionBuilder, val: &ast::Value) -> ir::Value { - match val { - ast::Value::Immediate(imm) => builder.make_imm_value(*imm), - ast::Value::Named(v) => builder.get_named_value(&v.0), - ast::Value::Error => unreachable!(), + pub fn get_named_value(&mut self, func: &mut ir::Function, name: &ast::ValueName) -> ir::Value { + if let Some(v) = self.func_value_names.get_by_right(&name.string).copied() { + v + } else { + let v = func.dfg.make_value(ir::ValueData::Immediate { + imm: ir::Immediate::I128(424242), + ty: ir::Type::I128, + }); + + self.undefined.insert(v, name.span); + self.name_value(func, v, name); + v + } } -} -fn build_type(builder: &mut ModuleBuilder, t: &ast::Type) -> ir::Type { - match t { - ast::Type::Int(i) => (*i).into(), - ast::Type::Ptr(t) => { - let t = build_type(builder, t); - builder.ptr_type(t) + fn value(&mut self, fb: &mut FunctionBuilder, val: &ast::Value) -> ir::Value { + match &val.kind { + ast::ValueKind::Immediate(imm) => fb.make_imm_value(*imm), + ast::ValueKind::Named(v) => self.get_named_value(&mut fb.func, v), + ast::ValueKind::Error => unreachable!(), } - ast::Type::Array(t, n) => { - let elem = build_type(builder, t); - builder.declare_array_type(elem, *n) + } + + fn type_(&mut self, mb: &mut ModuleBuilder, t: &ast::Type) -> ir::Type { + match &t.kind { + ast::TypeKind::Int(i) => (*i).into(), + ast::TypeKind::Ptr(t) => { + let t = self.type_(mb, t); + mb.ptr_type(t) + } + ast::TypeKind::Array(t, n) => { + let elem = self.type_(mb, t); + mb.declare_array_type(elem, *n) + } + ast::TypeKind::Void => ir::Type::Void, + ast::TypeKind::Struct(name) => mb.get_struct_type(name).unwrap_or_else(|| { + self.errors + .push(Error::Undefined(UndefinedKind::Type(name.clone()), t.span)); + ir::Type::Void + }), + ast::TypeKind::Error => unreachable!(), } - ast::Type::Void => ir::Type::Void, - ast::Type::Struct(name) => builder.get_struct_type(name).unwrap_or_else(|| { - // xxx error on undeclared struct - eprintln!("struct type not found: {name}"); - ir::Type::Void - }), - ast::Type::Error => todo!(), } } diff --git a/crates/parser/src/syntax.rs b/crates/parser/src/syntax.rs index 8b61c98f..6b64515f 100644 --- a/crates/parser/src/syntax.rs +++ b/crates/parser/src/syntax.rs @@ -7,6 +7,50 @@ use pest::iterators::Pair; #[grammar = "sonatina.pest"] pub struct Parser; +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] +pub struct Span(pub u32, pub u32); + +impl Span { + pub fn from_range(r: Range) -> Self { + Self(r.start as u32, r.end as u32) + } + + pub fn as_range(&self) -> Range { + self.0 as usize..self.1 as usize + } +} + +#[derive(Debug, Clone)] +pub struct Spanned { + pub span: Span, + pub inner: T, +} + +impl AsRef for Spanned { + fn as_ref(&self) -> &T { + &self.inner + } +} + +impl AsMut for Spanned { + fn as_mut(&mut self) -> &mut T { + &mut self.inner + } +} + +impl FromSyntax for Spanned +where + T: FromSyntax, +{ + fn from_syntax(node: &mut Node) -> Self { + let inner = T::from_syntax(node); + Self { + span: node.span, + inner, + } + } +} + pub trait FromSyntax { fn from_syntax(node: &mut Node) -> Self; } @@ -14,7 +58,7 @@ pub trait FromSyntax { pub struct Node<'i, E> { pub rule: Rule, pub txt: &'i str, - pub span: Range, + pub span: Span, pairs: Vec>>, pub errors: Vec, child: Option>, @@ -31,7 +75,7 @@ impl<'i, E> Node<'i, E> { self.rule = pair.as_rule(); self.txt = pair.as_str(); let s = pair.as_span(); - self.span = s.start()..s.end(); + self.span = Span::from_range(s.start()..s.end()); self.pairs.clear(); self.pairs.extend(pair.into_inner().map(Some)); debug_assert!(self.errors.is_empty()); @@ -201,50 +245,3 @@ impl<'i, E> std::default::Default for Node<'i, E> { } } } - -// #[cfg(test)] -// mod tests { -// use super::*; - -// #[test] -// fn test_with_module_comment() { -// let input = " -// #! Module comment 1 -// #! Module comment 2 - -// target = \"evm-ethereum-london\" - -// # f1 start 1 -// # f1 start 2 -// func private %f1() -> i32 { -// block0: -// return 311.i32; -// } - -// # f2 start 1 -// # f2 start 2 -// func public %f2() -> i32 { -// block0: -// return 311.i32; -// }"; - -// let parsed_module = parse_module2(input).unwrap(); -// let module_comments = parsed_module.module_comments; -// assert_eq!(module_comments[0], "#! Module comment 1"); -// assert_eq!(module_comments[1], "#! Module comment 2"); - -// let module = parsed_module.module; -// let mut funcs = module.iter_functions(); -// let func1 = funcs.next().unwrap(); -// let func1_comment = &parsed_module.func_comments[func1]; -// assert_eq!(func1_comment.len(), 2); -// assert_eq!(func1_comment[0], "# f1 start 1"); -// assert_eq!(func1_comment[1], "# f1 start 2"); - -// let func2 = funcs.next().unwrap(); -// let func2_comment = &parsed_module.func_comments[func2]; -// assert_eq!(func2_comment.len(), 2); -// assert_eq!(func2_comment[0], "# f2 start 1"); -// assert_eq!(func2_comment[1], "# f2 start 2"); -// } -// } diff --git a/crates/parser/test_files/errors/bad_target.snap b/crates/parser/test_files/errors/bad_target.snap new file mode 100644 index 00000000..eabec865 --- /dev/null +++ b/crates/parser/test_files/errors/bad_target.snap @@ -0,0 +1,11 @@ +--- +source: crates/parser/tests/errors.rs +expression: s +input_file: crates/parser/test_files/errors/bad_target.sntn +--- +error: parse error +--> bad_target.sntn:0:11 + | +0 | target = "ewasm-ethereum-foo" + | ^^^^^^^^^^^^^^^^^^ given architecture is not supported + | diff --git a/crates/parser/test_files/errors/bad_target.sntn b/crates/parser/test_files/errors/bad_target.sntn new file mode 100644 index 00000000..33a5de25 --- /dev/null +++ b/crates/parser/test_files/errors/bad_target.sntn @@ -0,0 +1 @@ +target = "ewasm-ethereum-foo" diff --git a/crates/parser/test_files/errors/duplicate_val.snap b/crates/parser/test_files/errors/duplicate_val.snap new file mode 100644 index 00000000..ca9e35fe --- /dev/null +++ b/crates/parser/test_files/errors/duplicate_val.snap @@ -0,0 +1,11 @@ +--- +source: crates/parser/tests/errors.rs +expression: s +input_file: crates/parser/test_files/errors/duplicate_val.sntn +--- +error: parse error + --> duplicate_val.sntn:5:9 + | +5 | v0.i8 = add 2.i8 3.i8; + | ^^ value name `v0` is already defined + | diff --git a/crates/parser/test_files/errors/duplicate_val.sntn b/crates/parser/test_files/errors/duplicate_val.sntn new file mode 100644 index 00000000..d89d0029 --- /dev/null +++ b/crates/parser/test_files/errors/duplicate_val.sntn @@ -0,0 +1,8 @@ +target = "evm-ethereum-london" + +func public %main() { + block0: + v0.i8 = add 0.i8 1.i8; + v0.i8 = add 2.i8 3.i8; + return v0; +} diff --git a/crates/parser/test_files/errors/numbers.snap b/crates/parser/test_files/errors/numbers.snap new file mode 100644 index 00000000..4163beb5 --- /dev/null +++ b/crates/parser/test_files/errors/numbers.snap @@ -0,0 +1,21 @@ +--- +source: crates/parser/tests/errors.rs +expression: s +input_file: crates/parser/test_files/errors/numbers.sntn +--- +error: parse error + --> numbers.sntn:4:27 + | +4 | v0.i8 = call %foo 1000.i8; + | ^^^^ number out of bounds + |error: parse error + --> numbers.sntn:6:28 + | +6 | v2.i16 = add 1.i16 -50000.i16; + | ^^^^^^ number out of bounds + |error: parse error + --> numbers.sntn:7:19 + | +7 | jump block203948029830482; + | ^^^^^^^^^^^^^^^ number out of bounds + | diff --git a/crates/parser/test_files/errors/numbers.sntn b/crates/parser/test_files/errors/numbers.sntn new file mode 100644 index 00000000..a825efa8 --- /dev/null +++ b/crates/parser/test_files/errors/numbers.sntn @@ -0,0 +1,9 @@ +target = "evm-ethereum-london" + +func public %main() { + block0: + v0.i8 = call %foo 1000.i8; + v129830918203.i8 = add v0 v0; + v2.i16 = add 1.i16 -50000.i16; + jump block203948029830482; +} diff --git a/crates/parser/test_files/errors/parse_error.snap b/crates/parser/test_files/errors/parse_error.snap new file mode 100644 index 00000000..49ed4dc9 --- /dev/null +++ b/crates/parser/test_files/errors/parse_error.snap @@ -0,0 +1,16 @@ +--- +source: crates/parser/tests/errors.rs +expression: s +input_file: crates/parser/test_files/errors/parse_error.sntn +--- +error: parse error + --> parse_error.sntn:3:5 + | +3 | v0.i8 = call %foo 100.i8; + | ^ --> 4:5 + | +4 | v0.i8 = call %foo 100.i8; + | ^--- + | + = expected COMMENT or block_ident + | diff --git a/crates/parser/test_files/errors/parse_error.sntn b/crates/parser/test_files/errors/parse_error.sntn new file mode 100644 index 00000000..a7d0991a --- /dev/null +++ b/crates/parser/test_files/errors/parse_error.sntn @@ -0,0 +1,6 @@ +target = "evm-ethereum-london" + +func %f() { + v0.i8 = call %foo 100.i8; + v1 +} diff --git a/crates/parser/test_files/errors/undefined.snap b/crates/parser/test_files/errors/undefined.snap new file mode 100644 index 00000000..343d20f9 --- /dev/null +++ b/crates/parser/test_files/errors/undefined.snap @@ -0,0 +1,26 @@ +--- +source: crates/parser/tests/errors.rs +expression: s +input_file: crates/parser/test_files/errors/undefined.sntn +--- +error: parse error + --> undefined.sntn:2:25 + | +2 | type %foo = { i8, i16, *%s1 }; + | ^^^ undefined type: `%s1` + |error: parse error + --> undefined.sntn:6:22 + | +6 | v0.i8 = call %foo 100.i8; + | ^^^^ undefined function: `%foo` + |error: parse error + --> undefined.sntn:9:14 + | +9 | jump block2; + | ^^^^^^ undefined block: `block2` + |error: parse error + --> undefined.sntn:7:24 + | +7 | v2.i8 = add v0 v1; + | ^^ undefined value: `v1` + | diff --git a/crates/parser/test_files/errors/undefined.sntn b/crates/parser/test_files/errors/undefined.sntn new file mode 100644 index 00000000..1a0769ec --- /dev/null +++ b/crates/parser/test_files/errors/undefined.sntn @@ -0,0 +1,13 @@ +target = "evm-ethereum-london" + +type %foo = { i8, i16, *%s1 }; + +func public %main() { + block0: + v0.i8 = call %foo 100.i8; + v2.i8 = add v0 v1; + v3.i8 = add v1 v1; + jump block2; + block1: + return; +} diff --git a/crates/parser/test_files/syntax/module/simple.ast.snap b/crates/parser/test_files/syntax/module/simple.ast.snap index 38f631a5..59b6c67d 100644 --- a/crates/parser/test_files/syntax/module/simple.ast.snap +++ b/crates/parser/test_files/syntax/module/simple.ast.snap @@ -1,7 +1,7 @@ --- -source: crates/parser2/tests/syntax.rs +source: crates/parser/tests/syntax.rs expression: "format!(\"{:#?}\", module)" -input_file: crates/parser2/test_files/syntax/module/simple.sntn +input_file: crates/parser/test_files/syntax/module/simple.sntn --- Module { target: Some( @@ -20,17 +20,35 @@ Module { "add_i8", ), params: [ - Int( - I8, - ), - Int( - I8, - ), + Type { + kind: Int( + I8, + ), + span: Span( + 100, + 102, + ), + }, + Type { + kind: Int( + I8, + ), + span: Span( + 104, + 106, + ), + }, ], ret_type: Some( - Int( - I8, - ), + Type { + kind: Int( + I8, + ), + span: Span( + 111, + 113, + ), + }, ), }, ], @@ -40,17 +58,41 @@ Module { "foo", ), fields: [ - Int( - I8, - ), - Int( - I16, - ), - Ptr( - Int( - I64, + Type { + kind: Int( + I8, ), - ), + span: Span( + 130, + 132, + ), + }, + Type { + kind: Int( + I16, + ), + span: Span( + 134, + 137, + ), + }, + Type { + kind: Ptr( + Type { + kind: Int( + I64, + ), + span: Span( + 140, + 143, + ), + }, + ), + span: Span( + 139, + 143, + ), + }, ], packed: false, }, @@ -59,15 +101,33 @@ Module { "bar", ), fields: [ - Int( - I8, - ), - Array( - Int( + Type { + kind: Int( I8, ), - 31, - ), + span: Span( + 162, + 164, + ), + }, + Type { + kind: Array( + Type { + kind: Int( + I8, + ), + span: Span( + 167, + 169, + ), + }, + 31, + ), + span: Span( + 166, + 174, + ), + }, ], packed: true, }, @@ -84,33 +144,59 @@ Module { }, blocks: [ Block { - id: BlockId( - Some( + id: BlockId { + id: Some( 0, ), - ), + span: Span( + 206, + 212, + ), + }, stmts: [ Stmt { kind: Define( ValueDeclaration( - ValueName( - "v0", - ), - Int( - I8, - ), + ValueName { + string: "v0", + span: Span( + 222, + 224, + ), + }, + Type { + kind: Int( + I8, + ), + span: Span( + 225, + 227, + ), + }, ), Call( Call( - FunctionName( - "foo", - ), + Spanned { + span: Span( + 235, + 239, + ), + inner: FunctionName( + "foo", + ), + }, [ - Immediate( - I8( - 100, + Value { + kind: Immediate( + I8( + 100, + ), ), - ), + span: Span( + 240, + 243, + ), + }, ], ), ), @@ -134,79 +220,161 @@ Module { ), params: [ ValueDeclaration( - ValueName( - "v0", - ), - Int( - I8, - ), + ValueName { + string: "v0", + span: Span( + 307, + 309, + ), + }, + Type { + kind: Int( + I8, + ), + span: Span( + 310, + 312, + ), + }, ), ], ret_type: Some( - Int( - I8, - ), + Type { + kind: Int( + I8, + ), + span: Span( + 317, + 319, + ), + }, ), }, blocks: [ Block { - id: BlockId( - Some( + id: BlockId { + id: Some( 0, ), - ), + span: Span( + 322, + 328, + ), + }, stmts: [ Stmt { kind: Define( ValueDeclaration( - ValueName( - "v1", - ), - Int( - I8, - ), + ValueName { + string: "v1", + span: Span( + 330, + 332, + ), + }, + Type { + kind: Int( + I8, + ), + span: Span( + 333, + 335, + ), + }, ), Binary( Mul, - Named( - ValueName( - "v0", + Value { + kind: Named( + ValueName { + string: "v0", + span: Span( + 342, + 344, + ), + }, ), - ), - Immediate( - I8( - 2, + span: Span( + 342, + 344, ), - ), + }, + Value { + kind: Immediate( + I8( + 2, + ), + ), + span: Span( + 345, + 346, + ), + }, ), ), }, Stmt { kind: Define( ValueDeclaration( - ValueName( - "v2", - ), - Int( - I8, - ), + ValueName { + string: "v2", + span: Span( + 351, + 353, + ), + }, + Type { + kind: Int( + I8, + ), + span: Span( + 354, + 356, + ), + }, ), Call( Call( - FunctionName( - "add_i8", - ), + Spanned { + span: Span( + 364, + 371, + ), + inner: FunctionName( + "add_i8", + ), + }, [ - Named( - ValueName( - "v0", + Value { + kind: Named( + ValueName { + string: "v0", + span: Span( + 372, + 374, + ), + }, ), - ), - Named( - ValueName( - "v1", + span: Span( + 372, + 374, ), - ), + }, + Value { + kind: Named( + ValueName { + string: "v1", + span: Span( + 375, + 377, + ), + }, + ), + span: Span( + 375, + 377, + ), + }, ], ), ), @@ -215,11 +383,21 @@ Module { Stmt { kind: Return( Some( - Named( - ValueName( - "v2", + Value { + kind: Named( + ValueName { + string: "v2", + span: Span( + 386, + 388, + ), + }, ), - ), + span: Span( + 386, + 388, + ), + }, ), ), }, @@ -238,81 +416,187 @@ Module { ), params: [ ValueDeclaration( - ValueName( - "v0", - ), - Ptr( - Int( - I8, + ValueName { + string: "v0", + span: Span( + 405, + 407, ), - ), + }, + Type { + kind: Ptr( + Type { + kind: Int( + I8, + ), + span: Span( + 409, + 411, + ), + }, + ), + span: Span( + 408, + 411, + ), + }, ), ValueDeclaration( - ValueName( - "v1", - ), - Array( - Int( - I8, + ValueName { + string: "v1", + span: Span( + 413, + 415, ), - 2, - ), + }, + Type { + kind: Array( + Type { + kind: Int( + I8, + ), + span: Span( + 417, + 419, + ), + }, + 2, + ), + span: Span( + 416, + 423, + ), + }, ), ValueDeclaration( - ValueName( - "v2", - ), - Array( - Ptr( - Int( - I8, - ), + ValueName { + string: "v2", + span: Span( + 425, + 427, ), - 2, - ), + }, + Type { + kind: Array( + Type { + kind: Ptr( + Type { + kind: Int( + I8, + ), + span: Span( + 430, + 432, + ), + }, + ), + span: Span( + 429, + 432, + ), + }, + 2, + ), + span: Span( + 428, + 436, + ), + }, ), ValueDeclaration( - ValueName( - "v3", - ), - Array( - Array( - Int( - I8, - ), + ValueName { + string: "v3", + span: Span( + 438, + 440, + ), + }, + Type { + kind: Array( + Type { + kind: Array( + Type { + kind: Int( + I8, + ), + span: Span( + 443, + 445, + ), + }, + 2, + ), + span: Span( + 442, + 449, + ), + }, 2, ), - 2, - ), + span: Span( + 441, + 453, + ), + }, ), ValueDeclaration( - ValueName( - "v4", - ), - Struct( - "foo", - ), + ValueName { + string: "v4", + span: Span( + 455, + 457, + ), + }, + Type { + kind: Struct( + "foo", + ), + span: Span( + 458, + 462, + ), + }, ), ValueDeclaration( - ValueName( - "v5", - ), - Ptr( - Struct( - "foo", + ValueName { + string: "v5", + span: Span( + 464, + 466, ), - ), + }, + Type { + kind: Ptr( + Type { + kind: Struct( + "foo", + ), + span: Span( + 468, + 472, + ), + }, + ), + span: Span( + 467, + 472, + ), + }, ), ], ret_type: None, }, blocks: [ Block { - id: BlockId( - Some( + id: BlockId { + id: Some( 0, ), - ), + span: Span( + 480, + 486, + ), + }, stmts: [ Stmt { kind: Return( @@ -332,62 +616,110 @@ Module { ), params: [ ValueDeclaration( - ValueName( - "v0", - ), - Int( - I8, - ), + ValueName { + string: "v0", + span: Span( + 519, + 521, + ), + }, + Type { + kind: Int( + I8, + ), + span: Span( + 522, + 524, + ), + }, ), ], ret_type: None, }, blocks: [ Block { - id: BlockId( - Some( + id: BlockId { + id: Some( 0, ), - ), + span: Span( + 532, + 538, + ), + }, stmts: [ Stmt { kind: BranchTable( - Named( - ValueName( - "v0", + Value { + kind: Named( + ValueName { + string: "v0", + span: Span( + 557, + 559, + ), + }, ), - ), + span: Span( + 557, + 559, + ), + }, Some( - BlockId( - Some( + BlockId { + id: Some( 0, ), - ), + span: Span( + 560, + 566, + ), + }, ), [ ( - Immediate( - I8( - 1, + Value { + kind: Immediate( + I8( + 1, + ), ), - ), - BlockId( - Some( + span: Span( + 568, + 569, + ), + }, + BlockId { + id: Some( 1, ), - ), + span: Span( + 573, + 579, + ), + }, ), ( - Immediate( - I8( - 2, + Value { + kind: Immediate( + I8( + 2, + ), ), - ), - BlockId( - Some( + span: Span( + 582, + 583, + ), + }, + BlockId { + id: Some( 2, ), - ), + span: Span( + 587, + 593, + ), + }, ), ], ), @@ -395,40 +727,60 @@ Module { ], }, Block { - id: BlockId( - Some( + id: BlockId { + id: Some( 1, ), - ), + span: Span( + 600, + 606, + ), + }, stmts: [ Stmt { kind: Return( Some( - Immediate( - I8( - 1, + Value { + kind: Immediate( + I8( + 1, + ), ), - ), + span: Span( + 623, + 624, + ), + }, ), ), }, ], }, Block { - id: BlockId( - Some( + id: BlockId { + id: Some( 2, ), - ), + span: Span( + 633, + 639, + ), + }, stmts: [ Stmt { kind: Return( Some( - Immediate( - I8( - 2, + Value { + kind: Immediate( + I8( + 2, + ), ), - ), + span: Span( + 656, + 657, + ), + }, ), ), }, @@ -445,81 +797,143 @@ Module { ), params: [ ValueDeclaration( - ValueName( - "v0", - ), - Int( - I64, - ), + ValueName { + string: "v0", + span: Span( + 676, + 678, + ), + }, + Type { + kind: Int( + I64, + ), + span: Span( + 679, + 682, + ), + }, ), ], ret_type: Some( - Int( - I64, - ), + Type { + kind: Int( + I64, + ), + span: Span( + 687, + 690, + ), + }, ), }, blocks: [ Block { - id: BlockId( - Some( + id: BlockId { + id: Some( 0, ), - ), + span: Span( + 697, + 703, + ), + }, stmts: [ Stmt { kind: Jump( - BlockId( - Some( + BlockId { + id: Some( 1, ), - ), + span: Span( + 718, + 724, + ), + }, ), }, ], }, Block { - id: BlockId( - Some( + id: BlockId { + id: Some( 1, ), - ), + span: Span( + 730, + 736, + ), + }, stmts: [ Stmt { kind: Define( ValueDeclaration( - ValueName( - "v1", - ), - Int( - I64, - ), + ValueName { + string: "v1", + span: Span( + 746, + 748, + ), + }, + Type { + kind: Int( + I64, + ), + span: Span( + 749, + 752, + ), + }, ), Phi( [ ( - Named( - ValueName( - "v0", + Value { + kind: Named( + ValueName { + string: "v0", + span: Span( + 760, + 762, + ), + }, ), - ), - BlockId( - Some( + span: Span( + 760, + 762, + ), + }, + BlockId { + id: Some( 0, ), - ), + span: Span( + 763, + 769, + ), + }, ), ( - Immediate( - I64( - 100, + Value { + kind: Immediate( + I64( + 100, + ), ), - ), - BlockId( - Some( + span: Span( + 772, + 775, + ), + }, + BlockId { + id: Some( 2, ), - ), + span: Span( + 780, + 786, + ), + }, ), ], ), @@ -528,82 +942,148 @@ Module { Stmt { kind: Define( ValueDeclaration( - ValueName( - "v2", - ), - Int( - I1, - ), + ValueName { + string: "v2", + span: Span( + 797, + 799, + ), + }, + Type { + kind: Int( + I1, + ), + span: Span( + 800, + 802, + ), + }, ), Binary( Gt, - Named( - ValueName( - "v1", + Value { + kind: Named( + ValueName { + string: "v1", + span: Span( + 808, + 810, + ), + }, ), - ), - Immediate( - I64( - 10, + span: Span( + 808, + 810, ), - ), + }, + Value { + kind: Immediate( + I64( + 10, + ), + ), + span: Span( + 811, + 813, + ), + }, ), ), }, Stmt { kind: Branch( - Named( - ValueName( - "v2", + Value { + kind: Named( + ValueName { + string: "v2", + span: Span( + 830, + 832, + ), + }, ), - ), - BlockId( - Some( + span: Span( + 830, + 832, + ), + }, + BlockId { + id: Some( 2, ), - ), - BlockId( - Some( + span: Span( + 833, + 839, + ), + }, + BlockId { + id: Some( 3, ), - ), + span: Span( + 840, + 846, + ), + }, ), }, ], }, Block { - id: BlockId( - Some( + id: BlockId { + id: Some( 2, ), - ), + span: Span( + 852, + 858, + ), + }, stmts: [ Stmt { kind: Jump( - BlockId( - Some( + BlockId { + id: Some( 1, ), - ), + span: Span( + 873, + 879, + ), + }, ), }, ], }, Block { - id: BlockId( - Some( + id: BlockId { + id: Some( 3, ), - ), + span: Span( + 885, + 891, + ), + }, stmts: [ Stmt { kind: Return( Some( - Named( - ValueName( - "v1", + Value { + kind: Named( + ValueName { + string: "v1", + span: Span( + 908, + 910, + ), + }, ), - ), + span: Span( + 908, + 910, + ), + }, ), ), }, diff --git a/crates/parser/tests/common/mod.rs b/crates/parser/tests/common/mod.rs new file mode 100644 index 00000000..71f72834 --- /dev/null +++ b/crates/parser/tests/common/mod.rs @@ -0,0 +1,40 @@ +// copied from fe test-utils +/// A macro to assert that a value matches a snapshot. +/// If the snapshot does not exist, it will be created in the same directory as +/// the test file. +#[macro_export] +macro_rules! snap_test { + ($value:expr, $fixture_path: expr) => { + snap_test!($value, $fixture_path, None) + }; + + ($value:expr, $fixture_path: expr, $suffix: expr) => { + let mut settings = insta::Settings::new(); + let fixture_path = ::std::path::Path::new($fixture_path); + let fixture_dir = fixture_path.parent().unwrap(); + let fixture_name = fixture_path.file_stem().unwrap().to_str().unwrap(); + + settings.set_snapshot_path(fixture_dir); + settings.set_input_file($fixture_path); + settings.set_prepend_module_to_snapshot(false); + let suffix: Option<&str> = $suffix; + let name = if let Some(suffix) = suffix { + format!("{fixture_name}.{suffix}") + } else { + fixture_name.into() + }; + settings.bind(|| { + insta::_macro_support::assert_snapshot( + name.into(), + &$value, + env!("CARGO_MANIFEST_DIR"), + fixture_name, + module_path!(), + file!(), + line!(), + stringify!($value), + ) + .unwrap() + }) + }; +} diff --git a/crates/parser/tests/errors.rs b/crates/parser/tests/errors.rs new file mode 100644 index 00000000..29ecc378 --- /dev/null +++ b/crates/parser/tests/errors.rs @@ -0,0 +1,26 @@ +use std::path::Path; + +use dir_test::{dir_test, Fixture}; +use sonatina_parser::parse_module; +mod common; + +#[dir_test( + dir: "$CARGO_MANIFEST_DIR/test_files/errors/", + glob: "*.sntn" +)] +fn test_errors(fixture: Fixture<&str>) { + let Err(errs) = parse_module(fixture.content()) else { + panic!("expected parse_module to fail with errors"); + }; + let path = Path::new(fixture.path()) + .file_name() + .unwrap() + .to_string_lossy(); + + let mut v = vec![]; + for err in errs { + err.print(&mut v, &path, fixture.content(), false).unwrap(); + } + let s = String::from_utf8(v).unwrap(); + snap_test!(s, fixture.path()); +} diff --git a/crates/parser/tests/syntax.rs b/crates/parser/tests/syntax.rs index d1d6f3b9..332b1f75 100644 --- a/crates/parser/tests/syntax.rs +++ b/crates/parser/tests/syntax.rs @@ -8,6 +8,7 @@ use sonatina_parser::{ Error, }; use std::fmt::{self, Write}; +mod common; #[dir_test( dir: "$CARGO_MANIFEST_DIR/test_files/syntax/stmts", @@ -48,7 +49,7 @@ fn test_module_ast(fixture: Fixture<&str>) { )] fn test_module_ir(fixture: Fixture<&str>) { let module = parse_module(fixture.content()).unwrap(); - let mut w = ModuleWriter::new(&module.module); + let mut w = ModuleWriter::with_debug_provider(&module.module, &module.debug); snap_test!(w.dump_string().unwrap(), fixture.path(), Some("ir")); } @@ -66,7 +67,7 @@ fn test_rule(rule: Rule, fixture: Fixture<&str>) { } fn report_error(err: pest::error::Error, fixture: &Fixture<&str>) { - let s = Error::SyntaxError(err).print_to_string(fixture.path(), fixture.content()); + let s = Error::SyntaxError(err).print_to_string(fixture.path(), fixture.content(), true); eprintln!("{s}"); } @@ -85,46 +86,3 @@ impl<'i> fmt::Debug for PairsWrapper<'i> { Ok(()) } } - -// xxx copied from fe test-utils -#[doc(hidden)] -pub use insta as _insta; -/// A macro to assert that a value matches a snapshot. -/// If the snapshot does not exist, it will be created in the same directory as -/// the test file. -#[macro_export] -macro_rules! snap_test { - ($value:expr, $fixture_path: expr) => { - snap_test!($value, $fixture_path, None) - }; - - ($value:expr, $fixture_path: expr, $suffix: expr) => { - let mut settings = insta::Settings::new(); - let fixture_path = ::std::path::Path::new($fixture_path); - let fixture_dir = fixture_path.parent().unwrap(); - let fixture_name = fixture_path.file_stem().unwrap().to_str().unwrap(); - - settings.set_snapshot_path(fixture_dir); - settings.set_input_file($fixture_path); - settings.set_prepend_module_to_snapshot(false); - let suffix: Option<&str> = $suffix; - let name = if let Some(suffix) = suffix { - format!("{fixture_name}.{suffix}") - } else { - fixture_name.into() - }; - settings.bind(|| { - insta::_macro_support::assert_snapshot( - name.into(), - &$value, - env!("CARGO_MANIFEST_DIR"), - fixture_name, - module_path!(), - file!(), - line!(), - stringify!($value), - ) - .unwrap() - }) - }; -} From e7cb9bf8570fecc56e62f58b0092c27c3f1835e9 Mon Sep 17 00:00:00 2001 From: Sean Billig Date: Sun, 23 Jun 2024 10:32:05 -0700 Subject: [PATCH 6/7] Hide spans from ast debug output to fix insta tests on windows --- .github/workflows/ci.yml | 2 +- crates/parser/Cargo.toml | 1 + crates/parser/src/ast.rs | 16 +- crates/parser/src/syntax.rs | 7 +- .../syntax/module/newlines.ast.snap | 84 ++++ .../test_files/syntax/module/newlines.ir.snap | 12 + .../test_files/syntax/module/newlines.snap | 48 ++ .../test_files/syntax/module/newlines.sntn | 6 + .../test_files/syntax/module/simple.ast.snap | 470 ++++-------------- 9 files changed, 262 insertions(+), 384 deletions(-) create mode 100644 crates/parser/test_files/syntax/module/newlines.ast.snap create mode 100644 crates/parser/test_files/syntax/module/newlines.ir.snap create mode 100644 crates/parser/test_files/syntax/module/newlines.snap create mode 100644 crates/parser/test_files/syntax/module/newlines.sntn diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1c358f0d..fbe6011e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,7 +66,7 @@ jobs: toolchain: ${{ matrix.rust }} - name: Test - run: cargo test --workspace --all-targets + run: cargo test --workspace --all-targets --no-fail-fast - name: Filecheck run: cargo run -p sonatina-filecheck diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml index b7868a39..9d6555cd 100644 --- a/crates/parser/Cargo.toml +++ b/crates/parser/Cargo.toml @@ -27,6 +27,7 @@ either = { version = "1.12.0", default-features = false } annotate-snippets = "0.11.4" rustc-hash = "2.0.0" bimap = "0.6.3" +derive_more = { version = "=1.0.0-beta.6", default-features = false, features = ["debug"] } [dev-dependencies] dir-test = "0.3" diff --git a/crates/parser/src/ast.rs b/crates/parser/src/ast.rs index 389bc401..a1b1e567 100644 --- a/crates/parser/src/ast.rs +++ b/crates/parser/src/ast.rs @@ -15,6 +15,10 @@ use smol_str::SmolStr; pub use sonatina_triple::{InvalidTriple, TargetTriple}; use std::str::FromStr; +// `Span`s aren't printed in the Debug output because the pest +// code locations differ on windows vs *nix, which breaks the ast tests. +use derive_more::Debug as Dbg; + pub fn parse(input: &str) -> Result> { match Parser::parse(Rule::module, input) { Err(err) => Err(vec![Error::SyntaxError(err)]), @@ -237,9 +241,10 @@ impl FromSyntax for Block { } } -#[derive(Debug)] +#[derive(Dbg)] pub struct BlockId { pub id: Option, + #[debug(skip)] pub span: Span, } @@ -309,9 +314,10 @@ impl FromSyntax for (Value, BlockId) { } } -#[derive(Debug)] +#[derive(Dbg)] pub struct Type { pub kind: TypeKind, + #[debug(skip)] pub span: Span, } @@ -417,9 +423,10 @@ impl FromSyntax for Expr { #[derive(Debug)] pub struct Call(pub Spanned, pub Vec); -#[derive(Debug)] +#[derive(Dbg)] pub struct ValueName { pub string: SmolStr, + #[debug(skip)] pub span: Span, } @@ -441,9 +448,10 @@ impl FromSyntax for ValueDeclaration { } } -#[derive(Debug)] +#[derive(Dbg)] pub struct Value { pub kind: ValueKind, + #[debug(skip)] pub span: Span, } diff --git a/crates/parser/src/syntax.rs b/crates/parser/src/syntax.rs index 6b64515f..849ce767 100644 --- a/crates/parser/src/syntax.rs +++ b/crates/parser/src/syntax.rs @@ -1,7 +1,7 @@ -use std::{ops::Range, str::FromStr}; - +use derive_more::Debug as Dbg; use either::Either; use pest::iterators::Pair; +use std::{ops::Range, str::FromStr}; #[derive(pest_derive::Parser)] #[grammar = "sonatina.pest"] @@ -20,8 +20,9 @@ impl Span { } } -#[derive(Debug, Clone)] +#[derive(Dbg, Clone)] pub struct Spanned { + #[debug(skip)] pub span: Span, pub inner: T, } diff --git a/crates/parser/test_files/syntax/module/newlines.ast.snap b/crates/parser/test_files/syntax/module/newlines.ast.snap new file mode 100644 index 00000000..6dc6c92e --- /dev/null +++ b/crates/parser/test_files/syntax/module/newlines.ast.snap @@ -0,0 +1,84 @@ +--- +source: crates/parser/tests/syntax.rs +expression: "format!(\"{:#?}\", module)" +input_file: crates/parser/test_files/syntax/module/newlines.sntn +--- +Module { + target: Some( + TargetTriple { + architecture: Evm, + chain: Ethereum, + version: EvmVersion( + London, + ), + }, + ), + declared_functions: [], + struct_types: [], + functions: [ + Func { + signature: FuncSignature { + linkage: Public, + name: FunctionName( + "main", + ), + params: [], + ret_type: None, + }, + blocks: [ + Block { + id: BlockId { + id: Some( + 0, + ), + .. + }, + stmts: [ + Stmt { + kind: Define( + ValueDeclaration( + ValueName { + string: "v0", + .. + }, + Type { + kind: Int( + I8, + ), + .. + }, + ), + Binary( + Add, + Value { + kind: Immediate( + I8( + 1, + ), + ), + .. + }, + Value { + kind: Immediate( + I8( + 2, + ), + ), + .. + }, + ), + ), + }, + Stmt { + kind: Return( + None, + ), + }, + ], + }, + ], + comments: [], + }, + ], + comments: [], +} diff --git a/crates/parser/test_files/syntax/module/newlines.ir.snap b/crates/parser/test_files/syntax/module/newlines.ir.snap new file mode 100644 index 00000000..1bae15fa --- /dev/null +++ b/crates/parser/test_files/syntax/module/newlines.ir.snap @@ -0,0 +1,12 @@ +--- +source: crates/parser/tests/syntax.rs +expression: w.dump_string().unwrap() +input_file: crates/parser/test_files/syntax/module/newlines.sntn +--- +target = evm-ethereum-london +func public %main() -> void { + block0: + v0.i8 = add 1.i8 2.i8; + return; + +} diff --git a/crates/parser/test_files/syntax/module/newlines.snap b/crates/parser/test_files/syntax/module/newlines.snap new file mode 100644 index 00000000..1d8f64b5 --- /dev/null +++ b/crates/parser/test_files/syntax/module/newlines.snap @@ -0,0 +1,48 @@ +--- +source: crates/parser/tests/syntax.rs +expression: s +input_file: crates/parser/test_files/syntax/module/newlines.sntn +--- +module "target = "evm-ethereum-london" +func public %main() { + block0: + v0.i8 = add 1.i8 2.i8; + return; +} +" + target_triple "evm-ethereum-london" + function "func public %main() { + block0: + v0.i8 = add 1.i8 2.i8; + return; + }" + function_signature "func public %main() " + function_linkage "public" + function_identifier "%main" + function_name "main" + function_params "()" + block "block0: + v0.i8 = add 1.i8 2.i8; + return;" + block_ident "block0" + block_number "0" + stmt "v0.i8 = add 1.i8 2.i8;" + define_stmt "v0.i8 = add 1.i8 2.i8" + value_declaration "v0.i8" + value_name "v0" + type_name "i8" + primitive_type "i8" + expr "add 1.i8 2.i8" + bin_expr "add 1.i8 2.i8" + bin_op "add" + value "1.i8" + imm_number "1.i8" + decimal "1" + primitive_type "i8" + value "2.i8" + imm_number "2.i8" + decimal "2" + primitive_type "i8" + stmt "return;" + return_stmt "return" + EOI "" diff --git a/crates/parser/test_files/syntax/module/newlines.sntn b/crates/parser/test_files/syntax/module/newlines.sntn new file mode 100644 index 00000000..9037546f --- /dev/null +++ b/crates/parser/test_files/syntax/module/newlines.sntn @@ -0,0 +1,6 @@ +target = "evm-ethereum-london" +func public %main() { + block0: + v0.i8 = add 1.i8 2.i8; + return; +} diff --git a/crates/parser/test_files/syntax/module/simple.ast.snap b/crates/parser/test_files/syntax/module/simple.ast.snap index 59b6c67d..ac3d1b01 100644 --- a/crates/parser/test_files/syntax/module/simple.ast.snap +++ b/crates/parser/test_files/syntax/module/simple.ast.snap @@ -24,19 +24,13 @@ Module { kind: Int( I8, ), - span: Span( - 100, - 102, - ), + .. }, Type { kind: Int( I8, ), - span: Span( - 104, - 106, - ), + .. }, ], ret_type: Some( @@ -44,10 +38,7 @@ Module { kind: Int( I8, ), - span: Span( - 111, - 113, - ), + .. }, ), }, @@ -62,19 +53,13 @@ Module { kind: Int( I8, ), - span: Span( - 130, - 132, - ), + .. }, Type { kind: Int( I16, ), - span: Span( - 134, - 137, - ), + .. }, Type { kind: Ptr( @@ -82,16 +67,10 @@ Module { kind: Int( I64, ), - span: Span( - 140, - 143, - ), + .. }, ), - span: Span( - 139, - 143, - ), + .. }, ], packed: false, @@ -105,10 +84,7 @@ Module { kind: Int( I8, ), - span: Span( - 162, - 164, - ), + .. }, Type { kind: Array( @@ -116,17 +92,11 @@ Module { kind: Int( I8, ), - span: Span( - 167, - 169, - ), + .. }, 31, ), - span: Span( - 166, - 174, - ), + .. }, ], packed: true, @@ -148,10 +118,7 @@ Module { id: Some( 0, ), - span: Span( - 206, - 212, - ), + .. }, stmts: [ Stmt { @@ -159,31 +126,22 @@ Module { ValueDeclaration( ValueName { string: "v0", - span: Span( - 222, - 224, - ), + .. }, Type { kind: Int( I8, ), - span: Span( - 225, - 227, - ), + .. }, ), Call( Call( Spanned { - span: Span( - 235, - 239, - ), inner: FunctionName( "foo", ), + .. }, [ Value { @@ -192,10 +150,7 @@ Module { 100, ), ), - span: Span( - 240, - 243, - ), + .. }, ], ), @@ -222,19 +177,13 @@ Module { ValueDeclaration( ValueName { string: "v0", - span: Span( - 307, - 309, - ), + .. }, Type { kind: Int( I8, ), - span: Span( - 310, - 312, - ), + .. }, ), ], @@ -243,10 +192,7 @@ Module { kind: Int( I8, ), - span: Span( - 317, - 319, - ), + .. }, ), }, @@ -256,10 +202,7 @@ Module { id: Some( 0, ), - span: Span( - 322, - 328, - ), + .. }, stmts: [ Stmt { @@ -267,19 +210,13 @@ Module { ValueDeclaration( ValueName { string: "v1", - span: Span( - 330, - 332, - ), + .. }, Type { kind: Int( I8, ), - span: Span( - 333, - 335, - ), + .. }, ), Binary( @@ -288,16 +225,10 @@ Module { kind: Named( ValueName { string: "v0", - span: Span( - 342, - 344, - ), + .. }, ), - span: Span( - 342, - 344, - ), + .. }, Value { kind: Immediate( @@ -305,10 +236,7 @@ Module { 2, ), ), - span: Span( - 345, - 346, - ), + .. }, ), ), @@ -318,62 +246,41 @@ Module { ValueDeclaration( ValueName { string: "v2", - span: Span( - 351, - 353, - ), + .. }, Type { kind: Int( I8, ), - span: Span( - 354, - 356, - ), + .. }, ), Call( Call( Spanned { - span: Span( - 364, - 371, - ), inner: FunctionName( "add_i8", ), + .. }, [ Value { kind: Named( ValueName { string: "v0", - span: Span( - 372, - 374, - ), + .. }, ), - span: Span( - 372, - 374, - ), + .. }, Value { kind: Named( ValueName { string: "v1", - span: Span( - 375, - 377, - ), + .. }, ), - span: Span( - 375, - 377, - ), + .. }, ], ), @@ -387,16 +294,10 @@ Module { kind: Named( ValueName { string: "v2", - span: Span( - 386, - 388, - ), + .. }, ), - span: Span( - 386, - 388, - ), + .. }, ), ), @@ -418,10 +319,7 @@ Module { ValueDeclaration( ValueName { string: "v0", - span: Span( - 405, - 407, - ), + .. }, Type { kind: Ptr( @@ -429,25 +327,16 @@ Module { kind: Int( I8, ), - span: Span( - 409, - 411, - ), + .. }, ), - span: Span( - 408, - 411, - ), + .. }, ), ValueDeclaration( ValueName { string: "v1", - span: Span( - 413, - 415, - ), + .. }, Type { kind: Array( @@ -455,26 +344,17 @@ Module { kind: Int( I8, ), - span: Span( - 417, - 419, - ), + .. }, 2, ), - span: Span( - 416, - 423, - ), + .. }, ), ValueDeclaration( ValueName { string: "v2", - span: Span( - 425, - 427, - ), + .. }, Type { kind: Array( @@ -484,32 +364,20 @@ Module { kind: Int( I8, ), - span: Span( - 430, - 432, - ), + .. }, ), - span: Span( - 429, - 432, - ), + .. }, 2, ), - span: Span( - 428, - 436, - ), + .. }, ), ValueDeclaration( ValueName { string: "v3", - span: Span( - 438, - 440, - ), + .. }, Type { kind: Array( @@ -519,51 +387,33 @@ Module { kind: Int( I8, ), - span: Span( - 443, - 445, - ), + .. }, 2, ), - span: Span( - 442, - 449, - ), + .. }, 2, ), - span: Span( - 441, - 453, - ), + .. }, ), ValueDeclaration( ValueName { string: "v4", - span: Span( - 455, - 457, - ), + .. }, Type { kind: Struct( "foo", ), - span: Span( - 458, - 462, - ), + .. }, ), ValueDeclaration( ValueName { string: "v5", - span: Span( - 464, - 466, - ), + .. }, Type { kind: Ptr( @@ -571,16 +421,10 @@ Module { kind: Struct( "foo", ), - span: Span( - 468, - 472, - ), + .. }, ), - span: Span( - 467, - 472, - ), + .. }, ), ], @@ -592,10 +436,7 @@ Module { id: Some( 0, ), - span: Span( - 480, - 486, - ), + .. }, stmts: [ Stmt { @@ -618,19 +459,13 @@ Module { ValueDeclaration( ValueName { string: "v0", - span: Span( - 519, - 521, - ), + .. }, Type { kind: Int( I8, ), - span: Span( - 522, - 524, - ), + .. }, ), ], @@ -642,10 +477,7 @@ Module { id: Some( 0, ), - span: Span( - 532, - 538, - ), + .. }, stmts: [ Stmt { @@ -654,26 +486,17 @@ Module { kind: Named( ValueName { string: "v0", - span: Span( - 557, - 559, - ), + .. }, ), - span: Span( - 557, - 559, - ), + .. }, Some( BlockId { id: Some( 0, ), - span: Span( - 560, - 566, - ), + .. }, ), [ @@ -684,19 +507,13 @@ Module { 1, ), ), - span: Span( - 568, - 569, - ), + .. }, BlockId { id: Some( 1, ), - span: Span( - 573, - 579, - ), + .. }, ), ( @@ -706,19 +523,13 @@ Module { 2, ), ), - span: Span( - 582, - 583, - ), + .. }, BlockId { id: Some( 2, ), - span: Span( - 587, - 593, - ), + .. }, ), ], @@ -731,10 +542,7 @@ Module { id: Some( 1, ), - span: Span( - 600, - 606, - ), + .. }, stmts: [ Stmt { @@ -746,10 +554,7 @@ Module { 1, ), ), - span: Span( - 623, - 624, - ), + .. }, ), ), @@ -761,10 +566,7 @@ Module { id: Some( 2, ), - span: Span( - 633, - 639, - ), + .. }, stmts: [ Stmt { @@ -776,10 +578,7 @@ Module { 2, ), ), - span: Span( - 656, - 657, - ), + .. }, ), ), @@ -799,19 +598,13 @@ Module { ValueDeclaration( ValueName { string: "v0", - span: Span( - 676, - 678, - ), + .. }, Type { kind: Int( I64, ), - span: Span( - 679, - 682, - ), + .. }, ), ], @@ -820,10 +613,7 @@ Module { kind: Int( I64, ), - span: Span( - 687, - 690, - ), + .. }, ), }, @@ -833,10 +623,7 @@ Module { id: Some( 0, ), - span: Span( - 697, - 703, - ), + .. }, stmts: [ Stmt { @@ -845,10 +632,7 @@ Module { id: Some( 1, ), - span: Span( - 718, - 724, - ), + .. }, ), }, @@ -859,10 +643,7 @@ Module { id: Some( 1, ), - span: Span( - 730, - 736, - ), + .. }, stmts: [ Stmt { @@ -870,19 +651,13 @@ Module { ValueDeclaration( ValueName { string: "v1", - span: Span( - 746, - 748, - ), + .. }, Type { kind: Int( I64, ), - span: Span( - 749, - 752, - ), + .. }, ), Phi( @@ -892,25 +667,16 @@ Module { kind: Named( ValueName { string: "v0", - span: Span( - 760, - 762, - ), + .. }, ), - span: Span( - 760, - 762, - ), + .. }, BlockId { id: Some( 0, ), - span: Span( - 763, - 769, - ), + .. }, ), ( @@ -920,19 +686,13 @@ Module { 100, ), ), - span: Span( - 772, - 775, - ), + .. }, BlockId { id: Some( 2, ), - span: Span( - 780, - 786, - ), + .. }, ), ], @@ -944,19 +704,13 @@ Module { ValueDeclaration( ValueName { string: "v2", - span: Span( - 797, - 799, - ), + .. }, Type { kind: Int( I1, ), - span: Span( - 800, - 802, - ), + .. }, ), Binary( @@ -965,16 +719,10 @@ Module { kind: Named( ValueName { string: "v1", - span: Span( - 808, - 810, - ), + .. }, ), - span: Span( - 808, - 810, - ), + .. }, Value { kind: Immediate( @@ -982,10 +730,7 @@ Module { 10, ), ), - span: Span( - 811, - 813, - ), + .. }, ), ), @@ -996,34 +741,22 @@ Module { kind: Named( ValueName { string: "v2", - span: Span( - 830, - 832, - ), + .. }, ), - span: Span( - 830, - 832, - ), + .. }, BlockId { id: Some( 2, ), - span: Span( - 833, - 839, - ), + .. }, BlockId { id: Some( 3, ), - span: Span( - 840, - 846, - ), + .. }, ), }, @@ -1034,10 +767,7 @@ Module { id: Some( 2, ), - span: Span( - 852, - 858, - ), + .. }, stmts: [ Stmt { @@ -1046,10 +776,7 @@ Module { id: Some( 1, ), - span: Span( - 873, - 879, - ), + .. }, ), }, @@ -1060,10 +787,7 @@ Module { id: Some( 3, ), - span: Span( - 885, - 891, - ), + .. }, stmts: [ Stmt { @@ -1073,16 +797,10 @@ Module { kind: Named( ValueName { string: "v1", - span: Span( - 908, - 910, - ), + .. }, ), - span: Span( - 908, - 910, - ), + .. }, ), ), From f5d4b7c6fda8ae20110a5f6eeb34f447c9f78af8 Mon Sep 17 00:00:00 2001 From: Sean Billig Date: Sun, 30 Jun 2024 10:06:44 -0700 Subject: [PATCH 7/7] Cleanup unused deps and whitespace --- .github/workflows/ci.yml | 29 ++++++++++-- .../filecheck/fixtures/insn_simplify/or.sntn | 44 +++++++++---------- crates/filecheck/fixtures/licm/basic.sntn | 2 +- crates/ir/Cargo.toml | 2 - crates/ir/src/builder/func_builder.rs | 4 -- crates/parser/Cargo.toml | 4 -- 6 files changed, 48 insertions(+), 37 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fbe6011e..76b56cdd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,10 +14,11 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable with: toolchain: stable + - uses: Swatinem/rust-cache@v2 - run: cargo doc --no-deps fmt: @@ -25,11 +26,12 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable with: toolchain: nightly components: rustfmt + - uses: Swatinem/rust-cache@v2 - run: cargo fmt --all -- --check clippy: @@ -37,12 +39,29 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable with: toolchain: stable + - uses: Swatinem/rust-cache@v2 - run: cargo clippy --workspace --all-features --all-targets -- -D clippy::all + unused_deps: + name: Unused dependencies + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + toolchain: nightly + - name: Install cargo-udeps + uses: baptiste0928/cargo-install@v3 + with: + crate: cargo-udeps + + - run: cargo +nightly udeps + test: name: Test defaults: @@ -60,11 +79,13 @@ jobs: - nightly steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable with: toolchain: ${{ matrix.rust }} + - uses: Swatinem/rust-cache@v2 + - name: Test run: cargo test --workspace --all-targets --no-fail-fast diff --git a/crates/filecheck/fixtures/insn_simplify/or.sntn b/crates/filecheck/fixtures/insn_simplify/or.sntn index e1c98772..e3e8a1a4 100644 --- a/crates/filecheck/fixtures/insn_simplify/or.sntn +++ b/crates/filecheck/fixtures/insn_simplify/or.sntn @@ -2,7 +2,7 @@ target = "evm-ethereum-london" # v0 | -1 => -1 # check: return -1.i8; -func public %or0(v0.i8) -> i8 { +func public %or0(v0.i8) -> i8 { block0: v1.i8 = or v0 -1.i8; return v1; @@ -10,7 +10,7 @@ func public %or0(v0.i8) -> i8 { # v0 | v0 => v0 # check: return v0; -func public %or1(v0.i8) -> i8 { +func public %or1(v0.i8) -> i8 { block0: v1.i8 = or v0 v0; return v1; @@ -18,7 +18,7 @@ func public %or1(v0.i8) -> i8 { # v0 | 0 => v0 # check: return v0; -func public %or2(v0.i8) -> i8 { +func public %or2(v0.i8) -> i8 { block0: v1.i8 = or v0 0.i8; return v1; @@ -26,7 +26,7 @@ func public %or2(v0.i8) -> i8 { # v0 | !v0 => -1 # check: return -1.i8; -func public %or3(v0.i8) -> i8 { +func public %or3(v0.i8) -> i8 { block0: v1.i8 = not v0; v2.i8 = or v0 v1; @@ -35,7 +35,7 @@ func public %or3(v0.i8) -> i8 { # v0 | (v0 & v1) => v0 # check: return v0; -func public %or4(v0.i8, v1.i8) -> i8 { +func public %or4(v0.i8, v1.i8) -> i8 { block0: v2.i8 = and v0 v1; v3.i8 = or v0 v2; @@ -44,7 +44,7 @@ func public %or4(v0.i8, v1.i8) -> i8 { # v0 | (v1 & v0) => v0 # check: return v0; -func public %or5(v0.i8, v1.i8) -> i8 { +func public %or5(v0.i8, v1.i8) -> i8 { block0: v2.i8 = and v1 v0; v3.i8 = or v0 v2; @@ -53,7 +53,7 @@ func public %or5(v0.i8, v1.i8) -> i8 { # v0 | !(v0 & v1) => -1 # check: return -1.i8; -func public %or6(v0.i8, v1.i8) -> i8 { +func public %or6(v0.i8, v1.i8) -> i8 { block0: v2.i8 = and v0 v1; v3.i8 = not v2; @@ -63,7 +63,7 @@ func public %or6(v0.i8, v1.i8) -> i8 { # v0 | !(v1 & v0) => -1 # check: return -1.i8; -func public %or7(v0.i8, v1.i8) -> i8 { +func public %or7(v0.i8, v1.i8) -> i8 { block0: v2.i8 = and v1 v0; v3.i8 = not v2; @@ -73,7 +73,7 @@ func public %or7(v0.i8, v1.i8) -> i8 { # (v0 ^ v1) | (v0 & !v1) => (v0 ^ v1) # check: return v2; -func public %or8(v0.i8, v1.i8) -> i8 { +func public %or8(v0.i8, v1.i8) -> i8 { block0: v2.i8 = xor v0 v1; v3.i8 = not v1; @@ -84,7 +84,7 @@ func public %or8(v0.i8, v1.i8) -> i8 { # (v0 ^ 1) | (!v1 & v0) => (v0 ^ v1) # check: return v2; -func public %or9(v0.i8, v1.i8) -> i8 { +func public %or9(v0.i8, v1.i8) -> i8 { block0: v2.i8 = xor v0 v1; v3.i8 = not v1; @@ -95,7 +95,7 @@ func public %or9(v0.i8, v1.i8) -> i8 { # (v0 ^ v1) | (!v0 & v1) => (v0 ^ v1) # check: return v2; -func public %or10(v0.i8, v1.i8) -> i8 { +func public %or10(v0.i8, v1.i8) -> i8 { block0: v2.i8 = xor v0 v1; v3.i8 = not v0; @@ -106,7 +106,7 @@ func public %or10(v0.i8, v1.i8) -> i8 { # (v0 ^ v1) | (v1 & !v0) => (v0 ^ v1) # check: return v2; -func public %or11(v0.i8, v1.i8) -> i8 { +func public %or11(v0.i8, v1.i8) -> i8 { block0: v2.i8 = xor v0 v1; v3.i8 = not v0; @@ -117,7 +117,7 @@ func public %or11(v0.i8, v1.i8) -> i8 { # (!v0 ^ v1) | (v0 & v1) => (!v0 ^ v1) # check: return v3; -func public %or12(v0.i8, v1.i8) -> i8 { +func public %or12(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v0; v3.i8 = xor v2 v1; @@ -128,7 +128,7 @@ func public %or12(v0.i8, v1.i8) -> i8 { # (!v0 ^ v1) | (v1 & v0) => (!v0 ^ v1) # check: return v3; -func public %or13(v0.i8, v1.i8) -> i8 { +func public %or13(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v0; v3.i8 = xor v2 v1; @@ -139,7 +139,7 @@ func public %or13(v0.i8, v1.i8) -> i8 { # (v0 ^ !v1) | (v0 & v1) => (v0 ^ !v1) # check: return v3; -func public %or14(v0.i8, v1.i8) -> i8 { +func public %or14(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v1; v3.i8 = xor v0 v2; @@ -150,7 +150,7 @@ func public %or14(v0.i8, v1.i8) -> i8 { # (v0 ^ !v1) | (v1 & v0) => (v0 ^ !v1) # check: return v3; -func public %or15(v0.i8, v1.i8) -> i8 { +func public %or15(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v1; v3.i8 = xor v0 v2; @@ -161,7 +161,7 @@ func public %or15(v0.i8, v1.i8) -> i8 { # (v0 | v1) | (v0 ^ v1) => (v0 | v1) # check: return v2; -func public %or16(v0.i8, v1.i8) -> i8 { +func public %or16(v0.i8, v1.i8) -> i8 { block0: v2.i8 = or v0 v1; v3.i8 = xor v0 v1; @@ -171,7 +171,7 @@ func public %or16(v0.i8, v1.i8) -> i8 { # (v0 | v1) | (v1 ^ v0) => (v0 | v1) # check: return v2; -func public %or17(v0.i8, v1.i8) -> i8 { +func public %or17(v0.i8, v1.i8) -> i8 { block0: v2.i8 = or v0 v1; v3.i8 = xor v1 v0; @@ -181,7 +181,7 @@ func public %or17(v0.i8, v1.i8) -> i8 { # (!v0 & v1) | !(v0 | v1) => !v0 # check: return v2; -func public %or18(v0.i8, v1.i8) -> i8 { +func public %or18(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v0; v3.i8 = and v2 v1; @@ -193,7 +193,7 @@ func public %or18(v0.i8, v1.i8) -> i8 { # (!v0 & v1) | !(v1 | v0) => !v0 # check: return v2; -func public %or19(v0.i8, v1.i8) -> i8 { +func public %or19(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v0; v3.i8 = and v2 v1; @@ -205,7 +205,7 @@ func public %or19(v0.i8, v1.i8) -> i8 { # (v0 & !v1) | !(v0 | v1) => !v1 # check: return v2; -func public %or20(v0.i8, v1.i8) -> i8 { +func public %or20(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v1; v3.i8 = and v0 v2; @@ -217,7 +217,7 @@ func public %or20(v0.i8, v1.i8) -> i8 { # (v0 & !v1) | !(v1 | v0) => !v1 # check: return v2; -func public %or21(v0.i8, v1.i8) -> i8 { +func public %or21(v0.i8, v1.i8) -> i8 { block0: v2.i8 = not v1; v3.i8 = and v0 v2; diff --git a/crates/filecheck/fixtures/licm/basic.sntn b/crates/filecheck/fixtures/licm/basic.sntn index 83cae52d..17f2d091 100644 --- a/crates/filecheck/fixtures/licm/basic.sntn +++ b/crates/filecheck/fixtures/licm/basic.sntn @@ -15,7 +15,7 @@ target = "evm-ethereum-london" # nextln: # nextln: block3: # nextln: return; -func public %basic(v0.i32, v1.i32) -> void { +func public %basic(v0.i32, v1.i32) -> void { block0: jump block1; diff --git a/crates/ir/Cargo.toml b/crates/ir/Cargo.toml index 2911073c..98f5d373 100644 --- a/crates/ir/Cargo.toml +++ b/crates/ir/Cargo.toml @@ -22,5 +22,3 @@ dyn-clone = "1.0.4" sonatina-triple = { path = "../triple", version = "0.0.3-alpha" } indexmap = "2.0.0" dot2 = { git = "https://github.com/sanpii/dot2.rs.git" } -bimap = "0.6.3" -smol_str = "0.2.2" diff --git a/crates/ir/src/builder/func_builder.rs b/crates/ir/src/builder/func_builder.rs index a40e6626..6c01b897 100644 --- a/crates/ir/src/builder/func_builder.rs +++ b/crates/ir/src/builder/func_builder.rs @@ -359,10 +359,6 @@ where self.module_builder.ctx.isa.type_provider().gas_type() } - // fn cursor(&mut self) -> InsnInserter { - // InsnInserter::new(&mut self.func, self.loc) - // } - fn insert_insn(&mut self, insn_data: InsnData) -> Option { let insn = self.cursor.insert_insn_data(&mut self.func, insn_data); let result = self.cursor.make_result(&mut self.func, insn); diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml index 9d6555cd..9444e354 100644 --- a/crates/parser/Cargo.toml +++ b/crates/parser/Cargo.toml @@ -14,15 +14,11 @@ keywords = ["compiler", "evm", "wasm", "smart-contract"] [dependencies] ir = { package = "sonatina-ir", path = "../ir", version = "0.0.3-alpha" } sonatina-triple = { path = "../triple", version = "0.0.3-alpha" } -smallvec = "1.7.0" cranelift-entity = "0.104" pest = "2.7.10" pest_derive = "2.7.10" -pest-ast = "0.3.4" -from-pest = "0.3.2" smol_str = "0.2.2" hex = "0.4.3" -num-traits = { version = "0.2.19", default-features = false } either = { version = "1.12.0", default-features = false } annotate-snippets = "0.11.4" rustc-hash = "2.0.0"