From ea638a69e05ab27d4ab444f1c88d7c52d8aaeb0f Mon Sep 17 00:00:00 2001 From: Aki Date: Tue, 5 Sep 2023 23:56:17 +0200 Subject: [PATCH 01/14] Implement interpreter --- Cargo.toml | 2 +- crates/interpreter/Cargo.toml | 11 ++ crates/interpreter/src/frame.rs | 176 +++++++++++++++++++++++ crates/interpreter/src/lib.rs | 8 ++ crates/interpreter/src/pc.rs | 37 +++++ crates/interpreter/src/state.rs | 246 ++++++++++++++++++++++++++++++++ crates/interpreter/src/value.rs | 65 +++++++++ 7 files changed, 544 insertions(+), 1 deletion(-) create mode 100644 crates/interpreter/Cargo.toml create mode 100644 crates/interpreter/src/frame.rs create mode 100644 crates/interpreter/src/lib.rs create mode 100644 crates/interpreter/src/pc.rs create mode 100644 crates/interpreter/src/state.rs create mode 100644 crates/interpreter/src/value.rs diff --git a/Cargo.toml b/Cargo.toml index 492fefe7..fadbfecb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,2 +1,2 @@ [workspace] -members = ["crates/ir", "crates/codegen", "crates/object", "crates/parser", "crates/filecheck", "crates/triple"] +members = ["crates/ir", "crates/codegen", "crates/object", "crates/parser", "crates/filecheck", "crates/triple", "crates/interpreter"] diff --git a/crates/interpreter/Cargo.toml b/crates/interpreter/Cargo.toml new file mode 100644 index 00000000..f74e990a --- /dev/null +++ b/crates/interpreter/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "interpreter" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +cranelift-entity = "0.99.1" +sonatina-codegen = { path = "../codegen" } +sonatina-ir = { path = "../ir" } diff --git a/crates/interpreter/src/frame.rs b/crates/interpreter/src/frame.rs new file mode 100644 index 00000000..ece18682 --- /dev/null +++ b/crates/interpreter/src/frame.rs @@ -0,0 +1,176 @@ +use std::mem; + +use cranelift_entity::SecondaryMap; + +use sonatina_ir::{ + module::ModuleCtx, + types::{CompoundType, CompoundTypeData}, + DataFlowGraph, Function, Insn, Type, Value, ValueData, I256, +}; + +use crate::{Literal, ProgramCounter}; + +pub struct Frame { + pub ret_addr: ProgramCounter, + local_values: SecondaryMap, // 256-bit register + alloca_region: Vec, // big endian +} + +impl Frame { + pub fn new(func: &Function, ret_addr: ProgramCounter, args: Vec) -> Self { + let mut local_values = SecondaryMap::new(); + for (v, literal_value) in func.arg_values.iter().zip(args.into_iter()) { + local_values[*v] = literal_value + } + let alloca_region = Vec::new(); + + Self { + ret_addr, + local_values, + alloca_region, + } + } + + pub fn load(&mut self, /*ctx: Context,*/ v: Value, dfg: &DataFlowGraph) -> Literal { + if !self.is_assigned(v) { + let v = match dfg.value_data(v) { + ValueData::Insn { insn, .. } => { + let result_v = dfg.insn_result(*insn).unwrap(); + if self.is_assigned(result_v) { + return self.local_values[result_v]; + } + result_v + } + _ => v, + }; + let i256 = dfg.value_imm(v).unwrap().as_i256(); + self.local_values[v] = Literal(i256); + } + self.local_values[v] + } + + pub fn map(&mut self, literal: Literal, insn: Insn, dfg: &DataFlowGraph) { + let v = dfg.insn_result(insn).unwrap(); + debug_assert!(!self.is_assigned(v)); + self.local_values[v] = literal + } + + pub fn alloca(&mut self, ctx: &ModuleCtx, ty: Type, insn: Insn, dfg: &DataFlowGraph) { + let v = dfg.insn_result(insn).unwrap(); + debug_assert!(!self.is_assigned(v)); + + let addr = self.alloca_region.len(); + + let size_of_data = byte_size_of_ty(ctx, ty); + for _ in 0..size_of_data { + self.alloca_region.push(0u8); + } + self.local_values[v] = Literal::from_usize(addr); + } + + pub fn gep(&mut self, ctx: &ModuleCtx, args: &[Value], dfg: &DataFlowGraph) -> Literal { + let ptr_v = args[0]; + let ptr = self.load(ptr_v, dfg); + let base_addr = ptr.as_usize(); + let ptr_ty = dfg.value_ty(ptr_v); + debug_assert!(ctx.with_ty_store(|s| s.is_ptr(ptr_ty))); + + let pointee_ty = ctx.with_ty_store(|s| s.deref(ptr_ty)).unwrap(); + debug_assert!(!pointee_ty.is_integral() && !ctx.with_ty_store(|s| s.is_ptr(ptr_ty))); + + let mut cmpd_ty = to_cmpd_ty(pointee_ty); + let mut offset = 0usize; + for arg in &args[1..] { + let index = self.load(*arg, dfg).as_usize(); + + ctx.with_ty_store(|s| match s.resolve_compound(cmpd_ty.unwrap()) { + CompoundTypeData::Array { elem, .. } => { + offset += index * byte_size_of_ty(ctx, *elem); + cmpd_ty = to_cmpd_ty(*elem); + } + CompoundTypeData::Struct(data) => { + for ty in &data.fields[..index] { + offset += byte_size_of_ty(ctx, *ty); + } + cmpd_ty = to_cmpd_ty(data.fields[index]); + } + _ => unreachable!(), + }) + } + Literal::from_usize(base_addr + offset) + } + + pub fn ldr(&mut self, ctx: &ModuleCtx, ptr: Value, insn: Insn, dfg: &DataFlowGraph) { + let addr = self.load(ptr, dfg).as_usize(); + debug_assert!(self.is_alloca(addr)); + + let ty = dfg.insn_result_ty(insn).unwrap(); + let size = byte_size_of_ty(ctx, ty); + let mut literal_b = Vec::new(); + for b in &self.alloca_region[addr..addr + size] { + literal_b.push(*b) + } + let Some(data) = Literal::deserialize(ctx, ty, literal_b) else { + return; + }; + self.map(data, insn, dfg); + } + + pub fn str(&mut self, ctx: &ModuleCtx, ptr: Value, v: Value, dfg: &DataFlowGraph) { + let addr = self.load(ptr, dfg).as_usize(); + let data = self.load(v, dfg); + let data_ty = dfg.value_ty(v); + let data_b = data.serialize(ctx, data_ty); + for (i, b) in data_b.into_iter().enumerate() { + self.alloca_region[addr + i] = b; + } + } + + pub fn eq(&mut self, lhs: Value, rhs: Value, dfg: &DataFlowGraph) -> bool { + self.load(lhs, dfg) == self.load(rhs, dfg) + } + + fn is_assigned(&self, v: Value) -> bool { + for local_v in self.local_values.keys() { + if v == local_v { + return true; + } + } + false + } + + fn is_alloca(&self, addr: usize) -> bool { + addr < self.alloca_region.len() + } +} + +pub fn byte_size_of_ty(ctx: &ModuleCtx, ty: Type) -> usize { + match ty { + Type::I1 => mem::size_of::(), + Type::I8 => mem::size_of::(), + Type::I16 => mem::size_of::(), + Type::I32 => mem::size_of::(), + Type::I64 => mem::size_of::(), + Type::I128 => mem::size_of::(), + Type::I256 => mem::size_of::(), + Type::Compound(ty) => { + use CompoundTypeData::*; + ctx.with_ty_store(|s| match s.resolve_compound(ty) { + Array { len, elem } => len * byte_size_of_ty(ctx, *elem), + Ptr(_) => mem::size_of::(), + Struct(data) => data + .fields + .iter() + .fold(0usize, |acc, ty| acc + byte_size_of_ty(ctx, *ty)), + }) + } + Type::Void => mem::size_of::<()>(), + } +} + +fn to_cmpd_ty(ty: Type) -> Option { + match ty { + Type::Compound(ty) => Some(ty), + _ => None, + } +} diff --git a/crates/interpreter/src/lib.rs b/crates/interpreter/src/lib.rs new file mode 100644 index 00000000..a9f197b6 --- /dev/null +++ b/crates/interpreter/src/lib.rs @@ -0,0 +1,8 @@ +pub mod frame; +pub mod pc; +pub mod state; +pub mod value; + +pub use frame::Frame; +pub use pc::ProgramCounter; +pub use value::Literal; diff --git a/crates/interpreter/src/pc.rs b/crates/interpreter/src/pc.rs new file mode 100644 index 00000000..e095f2ae --- /dev/null +++ b/crates/interpreter/src/pc.rs @@ -0,0 +1,37 @@ +use sonatina_ir::{module::FuncRef, Block, Insn, Layout}; + +#[derive(Clone, Copy)] +pub struct ProgramCounter { + pub func_ref: FuncRef, + pub insn: Insn, +} + +impl ProgramCounter { + pub fn new(entry_func: FuncRef, layout: &Layout) -> Self { + let entry = layout.entry_block().unwrap(); + let insn = layout.first_insn_of(entry).unwrap(); + + Self { + func_ref: entry_func, + insn, + } + } + + pub fn call(&mut self, callee_ref: FuncRef, callee_layout: &Layout) { + *self = ProgramCounter::new(callee_ref, &callee_layout) + } + + pub fn next_insn(&mut self, layout: &Layout) { + self.insn = layout.next_insn_of(self.insn).unwrap(); + } + + pub fn branch_to(&mut self, block: Block, layout: &Layout) { + self.insn = layout.first_insn_of(block).unwrap(); + } + + pub fn resume_frame_at(&mut self, ret_addr: Self) { + let ProgramCounter { func_ref, insn } = ret_addr; + self.func_ref = func_ref; + self.insn = insn; + } +} diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs new file mode 100644 index 00000000..95399810 --- /dev/null +++ b/crates/interpreter/src/state.rs @@ -0,0 +1,246 @@ +use std::ops::{Add, BitAnd, BitOr, BitXor, Mul, Neg, Not, Sub}; + +use sonatina_ir::{ + insn::{BinaryOp, CastOp, UnaryOp}, + module::FuncRef, + Block, DataLocationKind, Immediate, InsnData, Module, +}; + +use crate::{Frame, Literal, ProgramCounter}; + +struct State { + module: Module, + frames: Vec, + pc: ProgramCounter, + prev_block: Option, +} + +impl State { + // the cpu + pub fn new(module: Module, entry_func: FuncRef) -> Self { + let func = &module.funcs[entry_func]; + let pc = ProgramCounter::new(entry_func, &func.layout); + let entry_frame = Frame::new(func, pc, vec![]); + let frames = vec![entry_frame]; + + Self { + module, + frames, + pc, + prev_block: None, + } + } + + pub fn repl(mut self) -> Option { + loop { + if let Some(arg) = self.step() { + return arg; + } + } + } + + pub fn step(&mut self) -> Option> { + let frame = self.frames.last_mut().unwrap(); + let insn = self.pc.insn; + let ctx = &self.module.ctx; + let func = &self.module.funcs[self.pc.func_ref]; + + let dfg = &func.dfg; + let layout = &func.layout; + + let insn_data = dfg.insn_data(insn); + + use InsnData::*; + match insn_data { + Unary { code, args } => { + let arg = frame.load(args[0], dfg).0; + use UnaryOp::*; + let result = Literal(match code { + Not => arg.not(), + Neg => arg.neg(), + }); + + frame.map(result, insn, dfg); + + self.pc.next_insn(layout); + None + } + Binary { code, args } => { + let lhs: Immediate = frame.load(args[0], dfg).0.into(); + let rhs: Immediate = frame.load(args[1], dfg).0.into(); + use BinaryOp::*; + let result = Literal( + match code { + Add => lhs.add(rhs), + Sub => lhs.sub(rhs), + Mul => lhs.mul(rhs), + Udiv => lhs.udiv(rhs), + Sdiv => lhs.sdiv(rhs), + Lt => lhs.lt(rhs), + Gt => lhs.gt(rhs), + Slt => lhs.slt(rhs), + Sgt => lhs.sgt(rhs), + Le => lhs.le(rhs), + Ge => lhs.ge(rhs), + Sle => lhs.sle(rhs), + Sge => lhs.sge(rhs), + Eq => lhs.imm_eq(rhs), + Ne => lhs.imm_ne(rhs), + And => lhs.bitand(rhs), + Or => lhs.bitor(rhs), + Xor => lhs.bitxor(rhs), + } + .as_i256(), + ); + + frame.map(result, insn, dfg); + + self.pc.next_insn(layout); + None + } + Cast { code, args, ty } => { + let arg: Immediate = frame.load(args[0], dfg).0.into(); + use CastOp::*; + let result = Literal( + match code { + Sext => arg.sext(*ty), + Zext => arg.zext(*ty), + Trunc => arg.trunc(*ty), + BitCast => arg, + } + .as_i256(), + ); + + frame.map(result, insn, dfg); + + self.pc.next_insn(layout); + None + } + Load { args, loc } => { + use DataLocationKind::*; + match loc { + Memory => { + frame.ldr(ctx, args[0], insn, dfg); + } + Storage => todo!(), + } + + self.pc.next_insn(layout); + None + } + Store { args, loc } => { + use DataLocationKind::*; + match loc { + Memory => { + frame.str(ctx, args[0], args[1], dfg); + } + Storage => todo!(), + } + + self.pc.next_insn(layout); + None + } + Call { func, args, .. } => { + let mut literal_args = Vec::with_capacity(args.len()); + for arg in args { + let arg = frame.load(*arg, dfg); + literal_args.push(arg.clone()) + } + + // Function prologue + + let ret_addr = self.pc; + + let callee = &self.module.funcs[*func]; + let new_frame = Frame::new(callee, ret_addr, literal_args); + self.frames.push(new_frame); + + self.pc.call(*func, &callee.layout); + None + } + Jump { dests, .. } => { + let block = layout.insn_block(insn); + self.prev_block = Some(block); + + self.pc.branch_to(dests[0], layout); + None + } + Branch { args, dests } => { + let arg = frame.load(args[0], dfg).0; + let idx = arg.not().to_u256().as_usize(); + + let block = layout.insn_block(insn); + self.prev_block = Some(block); + self.pc.branch_to(dests[idx], layout); + None + } + BrTable { + args, + default, + table, + } => { + let block = layout.insn_block(insn); + self.prev_block = Some(block); + + let cond = args[0]; + for (idx, arg) in args[1..].iter().enumerate() { + if frame.eq(cond, *arg, dfg) { + self.pc.branch_to(table[idx], layout); + return None; + } + } + if let Some(block) = *default { + self.pc.branch_to(block, layout); + } + None + } + Alloca { ty } => { + frame.alloca(ctx, *ty, insn, dfg); + + self.pc.next_insn(layout); + None + } + Return { args } => { + let arg = args.map(|arg| frame.load(arg, dfg).clone()); + + let frame = self.frames.pop().unwrap(); // pop returning frame + match self.frames.last_mut() { + Some(caller_frame) => { + // Function epilogue + + self.pc.resume_frame_at(frame.ret_addr); + + let caller = &self.module.funcs[self.pc.func_ref]; + if let Some(lit) = arg { + caller_frame.map(lit, self.pc.insn, &caller.dfg); + } + + self.pc.next_insn(&caller.layout); + None + } + None => return Some(arg), + } + } + Gep { args } => { + let ptr = frame.gep(ctx, &args, dfg); + + frame.map(ptr, insn, dfg); + + self.pc.next_insn(layout); + None + } + Phi { values, blocks, .. } => { + let _block = layout.insn_block(insn); + let prev_block = self.prev_block.unwrap(); + for (v, block) in values.iter().zip(blocks.iter()) { + if prev_block == *block { + let lit = frame.load(*v, dfg).clone(); + frame.map(lit, insn, dfg); + break; + } + } + None + } + } + } +} diff --git a/crates/interpreter/src/value.rs b/crates/interpreter/src/value.rs new file mode 100644 index 00000000..60fccb9f --- /dev/null +++ b/crates/interpreter/src/value.rs @@ -0,0 +1,65 @@ +use sonatina_ir::{module::ModuleCtx, Type, I256, U256}; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Literal(pub I256); + +impl Default for Literal { + fn default() -> Self { + Literal(I256::zero()) + } +} + +impl Literal { + pub fn from_usize(addr: usize) -> Self { + Self(I256::from_u256(U256::from(addr))) + } + + pub fn as_usize(&self) -> usize { + self.0.to_u256().as_usize() + } + + pub fn deserialize(ctx: &ModuleCtx, ty: Type, b: Vec) -> Option { + Some(Self(match ty { + Type::I1 => (b[0] & 0b1).into(), + Type::I8 => i8::from_be_bytes(b.try_into().unwrap()).into(), + Type::I16 => i16::from_be_bytes(b.try_into().unwrap()).into(), + Type::I32 => i32::from_be_bytes(b.try_into().unwrap()).into(), + Type::I64 => i64::from_be_bytes(b.try_into().unwrap()).into(), + Type::I128 => i128::from_be_bytes(b.try_into().unwrap()).into(), + Type::I256 => I256::from_u256(U256::from_big_endian(&b)), + Type::Compound(ty) => { + debug_assert!(ctx.with_ty_store(|s| s.resolve_compound(ty).is_ptr())); + debug_assert!(b.len() == std::mem::size_of::()); + U256::from(usize::from_be_bytes(b.try_into().unwrap())).into() + } + Type::Void => return None, + })) + } + + pub fn serialize(&self, ctx: &ModuleCtx, ty: Type) -> Vec { + match ty { + Type::I1 => self.i256().trunc_to_i8().to_be_bytes().to_vec(), + Type::I8 => self.i256().trunc_to_i8().to_be_bytes().to_vec(), + Type::I16 => self.i256().trunc_to_i16().to_be_bytes().to_vec(), + Type::I32 => self.i256().trunc_to_i32().to_be_bytes().to_vec(), + Type::I64 => self.i256().trunc_to_i64().to_be_bytes().to_vec(), + Type::I128 => self.i256().trunc_to_i128().to_be_bytes().to_vec(), + Type::I256 => { + let mut b = [0u8; 32]; + self.i256().to_u256().to_big_endian(&mut b); + b.to_vec() + } + Type::Compound(ty) => { + debug_assert!(ctx.with_ty_store(|s| s.resolve_compound(ty).is_ptr())); + let mut b = [0u8; 32]; + self.i256().to_u256().to_big_endian(&mut b); + b[32 - std::mem::size_of::()..].to_vec() + } + Type::Void => Vec::new(), + } + } + + pub fn i256(&self) -> I256 { + self.0 + } +} From 6b1fed084b49e80d1601011c26dbe8eeb5b59d23 Mon Sep 17 00:00:00 2001 From: Aki Date: Fri, 8 Sep 2023 16:30:03 +0200 Subject: [PATCH 02/14] Debug interpreter --- crates/interpreter/Cargo.toml | 16 +- crates/interpreter/src/frame.rs | 80 ++--- crates/interpreter/src/lib.rs | 2 +- crates/interpreter/src/pc.rs | 4 +- crates/interpreter/src/state.rs | 407 +++++++++++++++++++++--- crates/interpreter/src/value.rs | 31 +- crates/ir/src/bigint.rs | 2 + crates/ir/src/builder/func_builder.rs | 9 + crates/ir/src/builder/module_builder.rs | 4 + crates/ir/src/value.rs | 2 +- 10 files changed, 444 insertions(+), 113 deletions(-) diff --git a/crates/interpreter/Cargo.toml b/crates/interpreter/Cargo.toml index f74e990a..e0b0496c 100644 --- a/crates/interpreter/Cargo.toml +++ b/crates/interpreter/Cargo.toml @@ -1,11 +1,21 @@ [package] name = "interpreter" -version = "0.1.0" +version = "0.0.3-alpha" edition = "2021" +authors = ["Sonatina Developers"] +license = "Apache-2.0" +readme = "../../README.md" +homepage = "https://github.com/fe-lang/sonatina/tree/main/crates/interpreter" +repository = "https://github.com/fe-lang/sonatina" +description = "Interpreter of sonatina intermediate representation" +categories = ["compilers", "wasm"] +keywords = ["compiler", "evm", "wasm", "smart-contract"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] cranelift-entity = "0.99.1" -sonatina-codegen = { path = "../codegen" } -sonatina-ir = { path = "../ir" } +sonatina-ir = { path = "../ir", version = "0.0.3-alpha" } + +[dev-dependencies] +sonatina-parser = { path = "../parser", version = "0.0.3-alpha" } diff --git a/crates/interpreter/src/frame.rs b/crates/interpreter/src/frame.rs index ece18682..8c06e2fe 100644 --- a/crates/interpreter/src/frame.rs +++ b/crates/interpreter/src/frame.rs @@ -8,19 +8,19 @@ use sonatina_ir::{ DataFlowGraph, Function, Insn, Type, Value, ValueData, I256, }; -use crate::{Literal, ProgramCounter}; +use crate::{value::EvalValue, ProgramCounter}; pub struct Frame { pub ret_addr: ProgramCounter, - local_values: SecondaryMap, // 256-bit register - alloca_region: Vec, // big endian + local_values: SecondaryMap, // 256-bit register + alloca_region: Vec, // big endian } impl Frame { - pub fn new(func: &Function, ret_addr: ProgramCounter, args: Vec) -> Self { + pub fn new(func: &Function, ret_addr: ProgramCounter, args: Vec) -> Self { let mut local_values = SecondaryMap::new(); for (v, literal_value) in func.arg_values.iter().zip(args.into_iter()) { - local_values[*v] = literal_value + local_values[*v] = EvalValue::from_i256(literal_value) } let alloca_region = Vec::new(); @@ -31,28 +31,28 @@ impl Frame { } } - pub fn load(&mut self, /*ctx: Context,*/ v: Value, dfg: &DataFlowGraph) -> Literal { + pub fn load(&mut self, /*ctx: Context,*/ v: Value, dfg: &DataFlowGraph) -> I256 { if !self.is_assigned(v) { let v = match dfg.value_data(v) { ValueData::Insn { insn, .. } => { let result_v = dfg.insn_result(*insn).unwrap(); if self.is_assigned(result_v) { - return self.local_values[result_v]; + return self.local_values[result_v].i256(); } result_v } _ => v, }; let i256 = dfg.value_imm(v).unwrap().as_i256(); - self.local_values[v] = Literal(i256); + self.local_values[v] = EvalValue::from_i256(i256); } - self.local_values[v] + self.local_values[v].i256() } - pub fn map(&mut self, literal: Literal, insn: Insn, dfg: &DataFlowGraph) { + pub fn map(&mut self, literal: I256, insn: Insn, dfg: &DataFlowGraph) { let v = dfg.insn_result(insn).unwrap(); debug_assert!(!self.is_assigned(v)); - self.local_values[v] = literal + self.local_values[v] = EvalValue::from_i256(literal) } pub fn alloca(&mut self, ctx: &ModuleCtx, ty: Type, insn: Insn, dfg: &DataFlowGraph) { @@ -62,31 +62,33 @@ impl Frame { let addr = self.alloca_region.len(); let size_of_data = byte_size_of_ty(ctx, ty); + for _ in 0..size_of_data { self.alloca_region.push(0u8); } - self.local_values[v] = Literal::from_usize(addr); + self.local_values[v] = EvalValue::from_usize(addr); } - pub fn gep(&mut self, ctx: &ModuleCtx, args: &[Value], dfg: &DataFlowGraph) -> Literal { + pub fn gep(&mut self, ctx: &ModuleCtx, args: &[Value], dfg: &DataFlowGraph) -> I256 { let ptr_v = args[0]; let ptr = self.load(ptr_v, dfg); - let base_addr = ptr.as_usize(); + let base_addr = ptr.to_u256().as_usize(); let ptr_ty = dfg.value_ty(ptr_v); debug_assert!(ctx.with_ty_store(|s| s.is_ptr(ptr_ty))); let pointee_ty = ctx.with_ty_store(|s| s.deref(ptr_ty)).unwrap(); - debug_assert!(!pointee_ty.is_integral() && !ctx.with_ty_store(|s| s.is_ptr(ptr_ty))); - + debug_assert!(!pointee_ty.is_integral() && !ctx.with_ty_store(|s| s.is_ptr(pointee_ty))); let mut cmpd_ty = to_cmpd_ty(pointee_ty); + let mut offset = 0usize; - for arg in &args[1..] { - let index = self.load(*arg, dfg).as_usize(); - ctx.with_ty_store(|s| match s.resolve_compound(cmpd_ty.unwrap()) { + for arg in &args[1..] { + let index = self.load(*arg, dfg).to_u256().as_usize(); + let cmpd_ty_data = ctx.with_ty_store(|s| s.resolve_compound(cmpd_ty.unwrap()).clone()); + match cmpd_ty_data { CompoundTypeData::Array { elem, .. } => { - offset += index * byte_size_of_ty(ctx, *elem); - cmpd_ty = to_cmpd_ty(*elem); + offset += index * byte_size_of_ty(ctx, elem); + cmpd_ty = to_cmpd_ty(elem); } CompoundTypeData::Struct(data) => { for ty in &data.fields[..index] { @@ -95,13 +97,13 @@ impl Frame { cmpd_ty = to_cmpd_ty(data.fields[index]); } _ => unreachable!(), - }) + } } - Literal::from_usize(base_addr + offset) + (base_addr + offset).into() } pub fn ldr(&mut self, ctx: &ModuleCtx, ptr: Value, insn: Insn, dfg: &DataFlowGraph) { - let addr = self.load(ptr, dfg).as_usize(); + let addr = self.load(ptr, dfg).to_u256().as_usize(); debug_assert!(self.is_alloca(addr)); let ty = dfg.insn_result_ty(insn).unwrap(); @@ -110,17 +112,17 @@ impl Frame { for b in &self.alloca_region[addr..addr + size] { literal_b.push(*b) } - let Some(data) = Literal::deserialize(ctx, ty, literal_b) else { + let Some(data) = EvalValue::deserialize(ctx, ty, literal_b) else { return; }; - self.map(data, insn, dfg); + self.map(data.i256(), insn, dfg); } pub fn str(&mut self, ctx: &ModuleCtx, ptr: Value, v: Value, dfg: &DataFlowGraph) { - let addr = self.load(ptr, dfg).as_usize(); + let addr = self.load(ptr, dfg).to_u256().as_usize(); let data = self.load(v, dfg); let data_ty = dfg.value_ty(v); - let data_b = data.serialize(ctx, data_ty); + let data_b = EvalValue::from_i256(data).serialize(ctx, data_ty); for (i, b) in data_b.into_iter().enumerate() { self.alloca_region[addr + i] = b; } @@ -130,10 +132,10 @@ impl Frame { self.load(lhs, dfg) == self.load(rhs, dfg) } - fn is_assigned(&self, v: Value) -> bool { - for local_v in self.local_values.keys() { + pub fn is_assigned(&self, v: Value) -> bool { + for (local_v, local) in self.local_values.iter() { if v == local_v { - return true; + return matches!(local, EvalValue::Literal(_)); } } false @@ -153,16 +155,16 @@ pub fn byte_size_of_ty(ctx: &ModuleCtx, ty: Type) -> usize { Type::I64 => mem::size_of::(), Type::I128 => mem::size_of::(), Type::I256 => mem::size_of::(), - Type::Compound(ty) => { + Type::Compound(cmpd_ty) => { use CompoundTypeData::*; - ctx.with_ty_store(|s| match s.resolve_compound(ty) { - Array { len, elem } => len * byte_size_of_ty(ctx, *elem), + let cmpd_ty_data = ctx.with_ty_store(|s| s.resolve_compound(cmpd_ty).clone()); + match cmpd_ty_data { + Array { len, elem } => len * byte_size_of_ty(ctx, elem), Ptr(_) => mem::size_of::(), - Struct(data) => data - .fields - .iter() - .fold(0usize, |acc, ty| acc + byte_size_of_ty(ctx, *ty)), - }) + Struct(data) => data.fields.iter().fold(0usize, |acc, field_ty| { + acc + byte_size_of_ty(ctx, *field_ty) + }), + } } Type::Void => mem::size_of::<()>(), } diff --git a/crates/interpreter/src/lib.rs b/crates/interpreter/src/lib.rs index a9f197b6..9748e254 100644 --- a/crates/interpreter/src/lib.rs +++ b/crates/interpreter/src/lib.rs @@ -5,4 +5,4 @@ pub mod value; pub use frame::Frame; pub use pc::ProgramCounter; -pub use value::Literal; +pub use state::State; diff --git a/crates/interpreter/src/pc.rs b/crates/interpreter/src/pc.rs index e095f2ae..b4d0206e 100644 --- a/crates/interpreter/src/pc.rs +++ b/crates/interpreter/src/pc.rs @@ -1,6 +1,6 @@ use sonatina_ir::{module::FuncRef, Block, Insn, Layout}; -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub struct ProgramCounter { pub func_ref: FuncRef, pub insn: Insn, @@ -18,7 +18,7 @@ impl ProgramCounter { } pub fn call(&mut self, callee_ref: FuncRef, callee_layout: &Layout) { - *self = ProgramCounter::new(callee_ref, &callee_layout) + *self = ProgramCounter::new(callee_ref, callee_layout) } pub fn next_insn(&mut self, layout: &Layout) { diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs index 95399810..1d7fa11f 100644 --- a/crates/interpreter/src/state.rs +++ b/crates/interpreter/src/state.rs @@ -3,12 +3,12 @@ use std::ops::{Add, BitAnd, BitOr, BitXor, Mul, Neg, Not, Sub}; use sonatina_ir::{ insn::{BinaryOp, CastOp, UnaryOp}, module::FuncRef, - Block, DataLocationKind, Immediate, InsnData, Module, + Block, DataLocationKind, Immediate, InsnData, Module, I256, }; -use crate::{Frame, Literal, ProgramCounter}; +use crate::{Frame, ProgramCounter}; -struct State { +pub struct State { module: Module, frames: Vec, pc: ProgramCounter, @@ -16,7 +16,6 @@ struct State { } impl State { - // the cpu pub fn new(module: Module, entry_func: FuncRef) -> Self { let func = &module.funcs[entry_func]; let pc = ProgramCounter::new(entry_func, &func.layout); @@ -31,7 +30,7 @@ impl State { } } - pub fn repl(mut self) -> Option { + pub fn repl(mut self) -> Option { loop { if let Some(arg) = self.step() { return arg; @@ -39,7 +38,7 @@ impl State { } } - pub fn step(&mut self) -> Option> { + pub fn step(&mut self) -> Option> { let frame = self.frames.last_mut().unwrap(); let insn = self.pc.insn; let ctx = &self.module.ctx; @@ -53,12 +52,12 @@ impl State { use InsnData::*; match insn_data { Unary { code, args } => { - let arg = frame.load(args[0], dfg).0; + let arg = frame.load(args[0], dfg); use UnaryOp::*; - let result = Literal(match code { + let result = match code { Not => arg.not(), Neg => arg.neg(), - }); + }; frame.map(result, insn, dfg); @@ -66,50 +65,43 @@ impl State { None } Binary { code, args } => { - let lhs: Immediate = frame.load(args[0], dfg).0.into(); - let rhs: Immediate = frame.load(args[1], dfg).0.into(); + let lhs: Immediate = frame.load(args[0], dfg).into(); + let rhs: Immediate = frame.load(args[1], dfg).into(); use BinaryOp::*; - let result = Literal( - match code { - Add => lhs.add(rhs), - Sub => lhs.sub(rhs), - Mul => lhs.mul(rhs), - Udiv => lhs.udiv(rhs), - Sdiv => lhs.sdiv(rhs), - Lt => lhs.lt(rhs), - Gt => lhs.gt(rhs), - Slt => lhs.slt(rhs), - Sgt => lhs.sgt(rhs), - Le => lhs.le(rhs), - Ge => lhs.ge(rhs), - Sle => lhs.sle(rhs), - Sge => lhs.sge(rhs), - Eq => lhs.imm_eq(rhs), - Ne => lhs.imm_ne(rhs), - And => lhs.bitand(rhs), - Or => lhs.bitor(rhs), - Xor => lhs.bitxor(rhs), - } - .as_i256(), - ); + let result = match code { + Add => lhs.add(rhs), + Sub => lhs.sub(rhs), + Mul => lhs.mul(rhs), + Udiv => lhs.udiv(rhs), + Sdiv => lhs.sdiv(rhs), + Lt => lhs.lt(rhs), + Gt => lhs.gt(rhs), + Slt => lhs.slt(rhs), + Sgt => lhs.sgt(rhs), + Le => lhs.le(rhs), + Ge => lhs.ge(rhs), + Sle => lhs.sle(rhs), + Sge => lhs.sge(rhs), + Eq => lhs.imm_eq(rhs), + Ne => lhs.imm_ne(rhs), + And => lhs.bitand(rhs), + Or => lhs.bitor(rhs), + Xor => lhs.bitxor(rhs), + } + .as_i256(); frame.map(result, insn, dfg); self.pc.next_insn(layout); None } - Cast { code, args, ty } => { - let arg: Immediate = frame.load(args[0], dfg).0.into(); + Cast { code, args, .. } => { + let arg = frame.load(args[0], dfg); use CastOp::*; - let result = Literal( - match code { - Sext => arg.sext(*ty), - Zext => arg.zext(*ty), - Trunc => arg.trunc(*ty), - BitCast => arg, - } - .as_i256(), - ); + let result = match code { + Zext => arg.neg(), + Sext | Trunc | BitCast => arg, + }; frame.map(result, insn, dfg); @@ -144,7 +136,7 @@ impl State { let mut literal_args = Vec::with_capacity(args.len()); for arg in args { let arg = frame.load(*arg, dfg); - literal_args.push(arg.clone()) + literal_args.push(arg) } // Function prologue @@ -166,7 +158,7 @@ impl State { None } Branch { args, dests } => { - let arg = frame.load(args[0], dfg).0; + let arg = frame.load(args[0], dfg); let idx = arg.not().to_u256().as_usize(); let block = layout.insn_block(insn); @@ -201,7 +193,7 @@ impl State { None } Return { args } => { - let arg = args.map(|arg| frame.load(arg, dfg).clone()); + let arg = args.map(|arg| frame.load(arg, dfg)); let frame = self.frames.pop().unwrap(); // pop returning frame match self.frames.last_mut() { @@ -218,11 +210,11 @@ impl State { self.pc.next_insn(&caller.layout); None } - None => return Some(arg), + None => Some(arg), } } Gep { args } => { - let ptr = frame.gep(ctx, &args, dfg); + let ptr = frame.gep(ctx, args, dfg); frame.map(ptr, insn, dfg); @@ -230,17 +222,328 @@ impl State { None } Phi { values, blocks, .. } => { - let _block = layout.insn_block(insn); let prev_block = self.prev_block.unwrap(); for (v, block) in values.iter().zip(blocks.iter()) { if prev_block == *block { - let lit = frame.load(*v, dfg).clone(); + let lit = frame.load(*v, dfg); frame.map(lit, insn, dfg); break; } } + self.pc.next_insn(layout); None } } } } + +#[cfg(test)] +mod test { + use sonatina_ir::I256; + use sonatina_parser::parser::Parser; + + use super::*; + + fn parse_module_make_state(input: &str) -> State { + let parser = Parser::default(); + let module = parser.parse(input).unwrap().module; + let func_ref = module.iter_functions().next().unwrap(); + + State::new(module, func_ref) + } + + #[test] + fn unary() { + let input = " + target = \"evm-ethereum-london\" + + func private %test() -> i32: + block0: + v1.i32 = not 0.i32; + v2.i32 = neg v1; + return v2; + "; + + let state = parse_module_make_state(input); + + let result = state.repl(); + + assert_eq!(result.unwrap(), I256::all_one().neg()); + } + + #[test] + fn binary_arithmetic() { + let input = " + target = \"evm-ethereum-london\" + + func private %test() -> i16: + block0: + v0.i16 = add 3.i16 4.i16; + v1.i16 = sub v0 1.i16; + v2.i16 = udiv v1 2.i16; + v3.i8 = sdiv v2 65535.i16; + return v3; + "; + + let state = parse_module_make_state(input); + + let result = state.repl(); + + assert_eq!(result.unwrap(), (-3).into()); + } + + #[test] + fn cast_sext() { + let input = " + target = \"evm-ethereum-london\" + + func private %test() -> i16: + block0: + v0.i16 = sext -128.i8; + return v0; + "; + + let state = parse_module_make_state(input); + + let result = state.repl(); + + const NEG_128: i16 = i16::from_be_bytes([0xff, 0x80]); + + assert_eq!(result.unwrap(), NEG_128.into()); + } + + #[test] + fn cast_zext() { + let input = " + target = \"evm-ethereum-london\" + + func private %test() -> i16: + block0: + v0.i16 = zext -128.i8; + return v0; + "; + + let state = parse_module_make_state(input); + + let elem_ptr = state.repl(); + + let result = i16::from_be_bytes([0x0, 0x80]); + + assert_eq!(elem_ptr.unwrap(), result.into()); + } + + #[test] + fn load_store() { + let input = " + target = \"evm-ethereum-london\" + + func private %test() -> i32: + block0: + v0.*i32 = alloca i32; + store @memory v0 1.i32; + v1.*i32 = load @memory v0; + return v1; + "; + + let state = parse_module_make_state(input); + + let data = state.repl(); + + assert_eq!(data.unwrap(), 1.into()); + } + + #[test] + fn call() { + let input = " + target = \"evm-ethereum-london\" + + func public %test_callee(v0.i8) -> i8: + block0: + v1.i8 = mul v0 1.i8; + return v1; + + func public %test() -> i8: + block0: + v0.i8 = call %test_callee 0.i8; + return v0; + "; + + let parser = Parser::default(); + let module = parser.parse(input).unwrap().module; + let func_ref = module.iter_functions().nth(1).unwrap(); + + let state = State::new(module, func_ref); + + let data = state.repl(); + + assert_eq!(data.unwrap(), 0.into()); + } + + #[test] + fn jump() { + let input = " + target = \"evm-ethereum-london\" + + func private %test() -> i1: + block0: + jump block2; + block1: + return 1.i1; + block2: + return 0.i1; + "; + + let state = parse_module_make_state(input); + + let boolean = state.repl().unwrap(); + + assert_eq!(boolean, I256::zero()) + } + + #[test] + fn branch() { + let input = " + target = \"evm-ethereum-london\" + + func private %test() -> i8: + block0: + br 1.i1 block1 block2; + block1: + return 1.i8; + block2: + return 2.i8; + "; + + let state = parse_module_make_state(input); + + let result = state.repl().unwrap(); + + assert_eq!(result, 1.into()); + } + + #[test] + fn br_table() { + let input = " + target = \"evm-ethereum-london\" + + func private %test() -> i64: + block0: + br_table 1.i64 undef (0.i64 block1) (1.i64 block2); + block1: + return 1.i64; + block2: + return 2.i64; + block3: + return 3.i64; + "; + + let state = parse_module_make_state(input); + + let result = state.repl().unwrap(); + + assert_eq!(result, 2.into()); + } + + #[test] + fn phi() { + let input = " + target = \"evm-ethereum-london\" + + func private %test() -> i8: + block0: + jump block1; + block1: + jump block2; + block2: + v0.i8 = phi (1.i8 block0) (-1.i8 block1); + return v0; + "; + + let state = parse_module_make_state(input); + + let result = state.repl().unwrap(); + + assert_eq!(result, (-1).into()); + } + + #[test] + fn gep() { + let input = " + target = \"evm-ethereum-london\" + + type %s1 = {i32, i64, i1}; + + func private %test() -> *i1: + block0: + v0.*%s1 = alloca %s1; + v1.*i1 = gep v0 2.i8; + return v1; + "; + + let state = parse_module_make_state(input); + + let elem_ptr = state.repl(); + + assert_eq!(elem_ptr.unwrap(), 12.into()); + } + + #[test] + fn ret_void() { + let input = " + target = \"evm-ethereum-london\" + + type %s1 = {i32, i64, i1}; + + func private %test() -> void: + block0: + return; + "; + + let state = parse_module_make_state(input); + + let arg = state.repl(); + + assert!(arg.is_none()); + } + + #[cfg(target_arch = "aarch64")] + #[test] + fn gep_ptr_ty() { + let input = " + target = \"evm-ethereum-london\" + + func private %test() -> *i1: + block0: + v0.*[*i32; 3] = alloca [*i32; 3]; + v1.*i32 = gep v0 2.i8; + return v1; + "; + + let state = parse_module_make_state(input); + + let elem_ptr = state.repl(); + + assert_eq!(elem_ptr.unwrap(), 16.into()); + } + + #[test] + fn gep_nested_aggr_ty() { + let input = " + target = \"evm-ethereum-london\" + + type %s1 = {i32, [i16; 3], [i8; 2]}; + + func private %test() -> *i1: + block0: + v0.*%s1 = alloca %s1; + v1.*i1 = gep v0 2.i8 1.i8; + return v1; + "; + + let state = parse_module_make_state(input); + + let elem_ptr = state.repl(); + + assert_eq!(elem_ptr.unwrap(), 11.into()); + } +} diff --git a/crates/interpreter/src/value.rs b/crates/interpreter/src/value.rs index 60fccb9f..a94eee21 100644 --- a/crates/interpreter/src/value.rs +++ b/crates/interpreter/src/value.rs @@ -1,25 +1,30 @@ use sonatina_ir::{module::ModuleCtx, Type, I256, U256}; -#[derive(Clone, Copy, PartialEq, Eq)] -pub struct Literal(pub I256); +#[derive(Clone, Copy, PartialEq, Eq, Debug, Default)] +pub enum EvalValue { + Literal(I256), + #[default] + Undefined, +} -impl Default for Literal { - fn default() -> Self { - Literal(I256::zero()) +impl EvalValue { + pub fn from_i256(i256: I256) -> Self { + Self::Literal(i256) } -} -impl Literal { pub fn from_usize(addr: usize) -> Self { - Self(I256::from_u256(U256::from(addr))) + Self::Literal(addr.into()) } - pub fn as_usize(&self) -> usize { - self.0.to_u256().as_usize() + pub fn i256(&self) -> I256 { + match self { + Self::Literal(i256) => *i256, + _ => panic!(), + } } pub fn deserialize(ctx: &ModuleCtx, ty: Type, b: Vec) -> Option { - Some(Self(match ty { + Some(Self::Literal(match ty { Type::I1 => (b[0] & 0b1).into(), Type::I8 => i8::from_be_bytes(b.try_into().unwrap()).into(), Type::I16 => i16::from_be_bytes(b.try_into().unwrap()).into(), @@ -58,8 +63,4 @@ impl Literal { Type::Void => Vec::new(), } } - - pub fn i256(&self) -> I256 { - self.0 - } } diff --git a/crates/ir/src/bigint.rs b/crates/ir/src/bigint.rs index f6b8f587..112ccbeb 100644 --- a/crates/ir/src/bigint.rs +++ b/crates/ir/src/bigint.rs @@ -80,6 +80,7 @@ impl I256 { } pub fn trunc_to_i8(self) -> i8 { + println!("{:?}", self.to_u256().low_u32() as i8); self.to_u256().low_u32() as i8 } @@ -276,3 +277,4 @@ impl_from!(u16, unsigned); impl_from!(u32, unsigned); impl_from!(u64, unsigned); impl_from!(u128, unsigned); +impl_from!(usize, unsigned); diff --git a/crates/ir/src/builder/func_builder.rs b/crates/ir/src/builder/func_builder.rs index a1510828..015298f5 100644 --- a/crates/ir/src/builder/func_builder.rs +++ b/crates/ir/src/builder/func_builder.rs @@ -91,6 +91,10 @@ impl<'a> FunctionBuilder<'a> { self.func_mut().dfg.make_global_value(gv) } + pub fn declare_array_type(&mut self, elem: Type, len: usize) -> Type { + self.module_builder.declare_array_type(elem, len) + } + pub fn declare_struct_type(&mut self, name: &str, fields: &[Type], packed: bool) -> Type { self.module_builder .declare_struct_type(name, fields, packed) @@ -244,6 +248,11 @@ impl<'a> FunctionBuilder<'a> { self.insert_insn(insn_data); } + pub fn gep(&mut self, args: &[Value]) -> Option { + let insn_data = InsnData::Gep { args: args.into() }; + self.insert_insn(insn_data) + } + pub fn phi(&mut self, args: &[(Value, Block)]) -> Value { let ty = self.func().dfg.value_ty(args[0].0); let insn_data = InsnData::Phi { diff --git a/crates/ir/src/builder/module_builder.rs b/crates/ir/src/builder/module_builder.rs index daa84fcd..c3c84a48 100644 --- a/crates/ir/src/builder/module_builder.rs +++ b/crates/ir/src/builder/module_builder.rs @@ -60,6 +60,10 @@ impl ModuleBuilder { .with_ty_store_mut(|s| s.make_struct(name, fields, packed)) } + pub fn declare_array_type(&mut self, elem: Type, len: usize) -> Type { + self.ctx.with_ty_store_mut(|s| s.make_array(elem, len)) + } + pub fn get_func_ref(&self, name: &str) -> Option { self.declared_funcs.get(name).copied() } diff --git a/crates/ir/src/value.rs b/crates/ir/src/value.rs index 6ab9b67e..5f086974 100644 --- a/crates/ir/src/value.rs +++ b/crates/ir/src/value.rs @@ -251,7 +251,7 @@ impl Immediate { self.as_i256().to_u256().as_usize() } - fn from_i256(val: I256, ty: Type) -> Self { + pub fn from_i256(val: I256, ty: Type) -> Self { match ty { Type::I1 => Self::I1(val.trunc_to_i1()), Type::I8 => Self::I8(val.trunc_to_i8()), From 66d0870b77af129e6f5c28b9c73a3d321019b17b Mon Sep 17 00:00:00 2001 From: Aki <140970520+k0aki@users.noreply.github.com> Date: Sun, 17 Sep 2023 11:12:02 +0200 Subject: [PATCH 03/14] Fix interpreter manifest Co-authored-by: Yoshitomo Nakanishi --- crates/interpreter/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/interpreter/Cargo.toml b/crates/interpreter/Cargo.toml index e0b0496c..44f32dc2 100644 --- a/crates/interpreter/Cargo.toml +++ b/crates/interpreter/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "interpreter" +name = "sonatina-interpreter" version = "0.0.3-alpha" edition = "2021" authors = ["Sonatina Developers"] From 6dd3fd9bc9c7183f543a472a883d8edb89742d09 Mon Sep 17 00:00:00 2001 From: Aki Date: Sun, 17 Sep 2023 12:37:06 +0200 Subject: [PATCH 04/14] Remove dead code in load frame function --- crates/interpreter/src/frame.rs | 33 ++++++++---------- crates/interpreter/src/state.rs | 18 +++++----- crates/interpreter/src/types.rs | 61 +++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 27 deletions(-) create mode 100644 crates/interpreter/src/types.rs diff --git a/crates/interpreter/src/frame.rs b/crates/interpreter/src/frame.rs index 8c06e2fe..72681769 100644 --- a/crates/interpreter/src/frame.rs +++ b/crates/interpreter/src/frame.rs @@ -5,7 +5,7 @@ use cranelift_entity::SecondaryMap; use sonatina_ir::{ module::ModuleCtx, types::{CompoundType, CompoundTypeData}, - DataFlowGraph, Function, Insn, Type, Value, ValueData, I256, + DataFlowGraph, Function, Insn, Type, Value, I256, }; use crate::{value::EvalValue, ProgramCounter}; @@ -31,18 +31,15 @@ impl Frame { } } - pub fn load(&mut self, /*ctx: Context,*/ v: Value, dfg: &DataFlowGraph) -> I256 { + pub fn load(&mut self, ctx: &ModuleCtx, v: Value, dfg: &DataFlowGraph) -> I256 { if !self.is_assigned(v) { - let v = match dfg.value_data(v) { - ValueData::Insn { insn, .. } => { - let result_v = dfg.insn_result(*insn).unwrap(); - if self.is_assigned(result_v) { - return self.local_values[result_v].i256(); + if let Some(gv) = dfg.value_gv(v) { + ctx.with_gv_store(|s| { + if !s.is_const(gv) { + todo!() } - result_v - } - _ => v, - }; + }) + } let i256 = dfg.value_imm(v).unwrap().as_i256(); self.local_values[v] = EvalValue::from_i256(i256); } @@ -71,7 +68,7 @@ impl Frame { pub fn gep(&mut self, ctx: &ModuleCtx, args: &[Value], dfg: &DataFlowGraph) -> I256 { let ptr_v = args[0]; - let ptr = self.load(ptr_v, dfg); + let ptr = self.load(ctx, ptr_v, dfg); let base_addr = ptr.to_u256().as_usize(); let ptr_ty = dfg.value_ty(ptr_v); debug_assert!(ctx.with_ty_store(|s| s.is_ptr(ptr_ty))); @@ -83,7 +80,7 @@ impl Frame { let mut offset = 0usize; for arg in &args[1..] { - let index = self.load(*arg, dfg).to_u256().as_usize(); + let index = self.load(ctx, *arg, dfg).to_u256().as_usize(); let cmpd_ty_data = ctx.with_ty_store(|s| s.resolve_compound(cmpd_ty.unwrap()).clone()); match cmpd_ty_data { CompoundTypeData::Array { elem, .. } => { @@ -103,7 +100,7 @@ impl Frame { } pub fn ldr(&mut self, ctx: &ModuleCtx, ptr: Value, insn: Insn, dfg: &DataFlowGraph) { - let addr = self.load(ptr, dfg).to_u256().as_usize(); + let addr = self.load(ctx, ptr, dfg).to_u256().as_usize(); debug_assert!(self.is_alloca(addr)); let ty = dfg.insn_result_ty(insn).unwrap(); @@ -119,8 +116,8 @@ impl Frame { } pub fn str(&mut self, ctx: &ModuleCtx, ptr: Value, v: Value, dfg: &DataFlowGraph) { - let addr = self.load(ptr, dfg).to_u256().as_usize(); - let data = self.load(v, dfg); + let addr = self.load(ctx, ptr, dfg).to_u256().as_usize(); + let data = self.load(ctx, v, dfg); let data_ty = dfg.value_ty(v); let data_b = EvalValue::from_i256(data).serialize(ctx, data_ty); for (i, b) in data_b.into_iter().enumerate() { @@ -128,8 +125,8 @@ impl Frame { } } - pub fn eq(&mut self, lhs: Value, rhs: Value, dfg: &DataFlowGraph) -> bool { - self.load(lhs, dfg) == self.load(rhs, dfg) + pub fn eq(&mut self, ctx: &ModuleCtx, lhs: Value, rhs: Value, dfg: &DataFlowGraph) -> bool { + self.load(ctx, lhs, dfg) == self.load(ctx, rhs, dfg) } pub fn is_assigned(&self, v: Value) -> bool { diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs index 1d7fa11f..07995644 100644 --- a/crates/interpreter/src/state.rs +++ b/crates/interpreter/src/state.rs @@ -52,7 +52,7 @@ impl State { use InsnData::*; match insn_data { Unary { code, args } => { - let arg = frame.load(args[0], dfg); + let arg = frame.load(ctx, args[0], dfg); use UnaryOp::*; let result = match code { Not => arg.not(), @@ -65,8 +65,8 @@ impl State { None } Binary { code, args } => { - let lhs: Immediate = frame.load(args[0], dfg).into(); - let rhs: Immediate = frame.load(args[1], dfg).into(); + let lhs: Immediate = frame.load(ctx, args[0], dfg).into(); + let rhs: Immediate = frame.load(ctx, args[1], dfg).into(); use BinaryOp::*; let result = match code { Add => lhs.add(rhs), @@ -96,7 +96,7 @@ impl State { None } Cast { code, args, .. } => { - let arg = frame.load(args[0], dfg); + let arg = frame.load(ctx, args[0], dfg); use CastOp::*; let result = match code { Zext => arg.neg(), @@ -135,7 +135,7 @@ impl State { Call { func, args, .. } => { let mut literal_args = Vec::with_capacity(args.len()); for arg in args { - let arg = frame.load(*arg, dfg); + let arg = frame.load(ctx, *arg, dfg); literal_args.push(arg) } @@ -158,7 +158,7 @@ impl State { None } Branch { args, dests } => { - let arg = frame.load(args[0], dfg); + let arg = frame.load(ctx, args[0], dfg); let idx = arg.not().to_u256().as_usize(); let block = layout.insn_block(insn); @@ -176,7 +176,7 @@ impl State { let cond = args[0]; for (idx, arg) in args[1..].iter().enumerate() { - if frame.eq(cond, *arg, dfg) { + if frame.eq(ctx, cond, *arg, dfg) { self.pc.branch_to(table[idx], layout); return None; } @@ -193,7 +193,7 @@ impl State { None } Return { args } => { - let arg = args.map(|arg| frame.load(arg, dfg)); + let arg = args.map(|arg| frame.load(ctx, arg, dfg)); let frame = self.frames.pop().unwrap(); // pop returning frame match self.frames.last_mut() { @@ -225,7 +225,7 @@ impl State { let prev_block = self.prev_block.unwrap(); for (v, block) in values.iter().zip(blocks.iter()) { if prev_block == *block { - let lit = frame.load(*v, dfg); + let lit = frame.load(ctx, *v, dfg); frame.map(lit, insn, dfg); break; } diff --git a/crates/interpreter/src/types.rs b/crates/interpreter/src/types.rs new file mode 100644 index 00000000..cb96b0e4 --- /dev/null +++ b/crates/interpreter/src/types.rs @@ -0,0 +1,61 @@ +use std::mem; + +use sonatina_ir::{I256, module::ModuleCtx, Type, types::{CompoundTypeData, CompoundType}}; + +pub fn byte_size_of_ty(ctx: &ModuleCtx, ty: Type) -> usize { + match ty { + Type::I1 => mem::size_of::(), + Type::I8 => mem::size_of::(), + Type::I16 => mem::size_of::(), + Type::I32 => mem::size_of::(), + Type::I64 => mem::size_of::(), + Type::I128 => mem::size_of::(), + Type::I256 => mem::size_of::(), + Type::Compound(cmpd_ty) => { + use CompoundTypeData::*; + let cmpd_ty_data = ctx.with_ty_store(|s| s.resolve_compound(cmpd_ty).clone()); + match cmpd_ty_data { + Array { len, elem } => len * byte_size_of_ty(ctx, elem), + Ptr(_) => mem::size_of::(), + Struct(data) => data.fields.iter().fold(0usize, |acc, field_ty| { + acc + byte_size_of_ty(ctx, *field_ty) + }), + } + } + Type::Void => mem::size_of::<()>(), + } +} + +fn to_cmpd_ty(ty: Type) -> Option { + match ty { + Type::Compound(ty) => Some(ty), + _ => None, + } +} + +pub fn gep(ctx: &ModuleCtx, base_addr: I256, ptr_ty: Type, args: &[I256]) -> I256 { + let pointee_ty = ctx.with_ty_store(|s| s.deref(ptr_ty)).unwrap(); + debug_assert!(!pointee_ty.is_integral() && !ctx.with_ty_store(|s| s.is_ptr(pointee_ty))); + let mut cmpd_ty = to_cmpd_ty(pointee_ty); + + let mut offset = 0usize; + + for arg in &args[1..] { + let index = arg.to_u256().as_usize(); + let cmpd_ty_data = ctx.with_ty_store(|s| s.resolve_compound(cmpd_ty.unwrap()).clone()); + match cmpd_ty_data { + CompoundTypeData::Array { elem, .. } => { + offset += index * byte_size_of_ty(ctx, elem); + cmpd_ty = to_cmpd_ty(elem); + } + CompoundTypeData::Struct(data) => { + for ty in &data.fields[..index] { + offset += byte_size_of_ty(ctx, *ty); + } + cmpd_ty = to_cmpd_ty(data.fields[index]); + } + _ => unreachable!(), + } + } + (base_addr.to_u256().as_usize() + offset).into() +} \ No newline at end of file From 194586619ba2a226c82f1cd1adbf0ab747bd5c76 Mon Sep 17 00:00:00 2001 From: Aki Date: Sun, 17 Sep 2023 13:59:27 +0200 Subject: [PATCH 05/14] Separate instruction eval and frame mgmt --- crates/interpreter/src/frame.rs | 107 ++++---------------------------- crates/interpreter/src/lib.rs | 1 + crates/interpreter/src/state.rs | 40 ++++++++---- crates/interpreter/src/types.rs | 17 +++-- 4 files changed, 56 insertions(+), 109 deletions(-) diff --git a/crates/interpreter/src/frame.rs b/crates/interpreter/src/frame.rs index 72681769..c3be1517 100644 --- a/crates/interpreter/src/frame.rs +++ b/crates/interpreter/src/frame.rs @@ -1,14 +1,8 @@ -use std::mem; - use cranelift_entity::SecondaryMap; -use sonatina_ir::{ - module::ModuleCtx, - types::{CompoundType, CompoundTypeData}, - DataFlowGraph, Function, Insn, Type, Value, I256, -}; +use sonatina_ir::{module::ModuleCtx, DataFlowGraph, Function, Type, Value, I256}; -use crate::{value::EvalValue, ProgramCounter}; +use crate::{types, value::EvalValue, ProgramCounter}; pub struct Frame { pub ret_addr: ProgramCounter, @@ -46,65 +40,27 @@ impl Frame { self.local_values[v].i256() } - pub fn map(&mut self, literal: I256, insn: Insn, dfg: &DataFlowGraph) { - let v = dfg.insn_result(insn).unwrap(); + pub fn map(&mut self, literal: I256, v: Value) { debug_assert!(!self.is_assigned(v)); self.local_values[v] = EvalValue::from_i256(literal) } - pub fn alloca(&mut self, ctx: &ModuleCtx, ty: Type, insn: Insn, dfg: &DataFlowGraph) { - let v = dfg.insn_result(insn).unwrap(); + pub fn alloca(&mut self, ctx: &ModuleCtx, ty: Type, v: Value) { debug_assert!(!self.is_assigned(v)); let addr = self.alloca_region.len(); - let size_of_data = byte_size_of_ty(ctx, ty); - - for _ in 0..size_of_data { + for _ in 0..types::byte_size_of_ty(ctx, ty) { self.alloca_region.push(0u8); } self.local_values[v] = EvalValue::from_usize(addr); } - pub fn gep(&mut self, ctx: &ModuleCtx, args: &[Value], dfg: &DataFlowGraph) -> I256 { - let ptr_v = args[0]; - let ptr = self.load(ctx, ptr_v, dfg); - let base_addr = ptr.to_u256().as_usize(); - let ptr_ty = dfg.value_ty(ptr_v); - debug_assert!(ctx.with_ty_store(|s| s.is_ptr(ptr_ty))); - - let pointee_ty = ctx.with_ty_store(|s| s.deref(ptr_ty)).unwrap(); - debug_assert!(!pointee_ty.is_integral() && !ctx.with_ty_store(|s| s.is_ptr(pointee_ty))); - let mut cmpd_ty = to_cmpd_ty(pointee_ty); - - let mut offset = 0usize; - - for arg in &args[1..] { - let index = self.load(ctx, *arg, dfg).to_u256().as_usize(); - let cmpd_ty_data = ctx.with_ty_store(|s| s.resolve_compound(cmpd_ty.unwrap()).clone()); - match cmpd_ty_data { - CompoundTypeData::Array { elem, .. } => { - offset += index * byte_size_of_ty(ctx, elem); - cmpd_ty = to_cmpd_ty(elem); - } - CompoundTypeData::Struct(data) => { - for ty in &data.fields[..index] { - offset += byte_size_of_ty(ctx, *ty); - } - cmpd_ty = to_cmpd_ty(data.fields[index]); - } - _ => unreachable!(), - } - } - (base_addr + offset).into() - } - - pub fn ldr(&mut self, ctx: &ModuleCtx, ptr: Value, insn: Insn, dfg: &DataFlowGraph) { - let addr = self.load(ctx, ptr, dfg).to_u256().as_usize(); - debug_assert!(self.is_alloca(addr)); + pub fn ldr(&mut self, ctx: &ModuleCtx, addr: I256, v: Value, ty: Type) { + let addr = addr.to_u256().as_usize(); + debug_assert!(addr < self.alloca_region.len()); - let ty = dfg.insn_result_ty(insn).unwrap(); - let size = byte_size_of_ty(ctx, ty); + let size = types::byte_size_of_ty(ctx, ty); let mut literal_b = Vec::new(); for b in &self.alloca_region[addr..addr + size] { literal_b.push(*b) @@ -112,14 +68,12 @@ impl Frame { let Some(data) = EvalValue::deserialize(ctx, ty, literal_b) else { return; }; - self.map(data.i256(), insn, dfg); + self.map(data.i256(), v); } - pub fn str(&mut self, ctx: &ModuleCtx, ptr: Value, v: Value, dfg: &DataFlowGraph) { - let addr = self.load(ctx, ptr, dfg).to_u256().as_usize(); - let data = self.load(ctx, v, dfg); - let data_ty = dfg.value_ty(v); - let data_b = EvalValue::from_i256(data).serialize(ctx, data_ty); + pub fn str(&mut self, ctx: &ModuleCtx, addr: I256, data: I256, ty: Type) { + let addr = addr.to_u256().as_usize(); + let data_b = EvalValue::from_i256(data).serialize(ctx, ty); for (i, b) in data_b.into_iter().enumerate() { self.alloca_region[addr + i] = b; } @@ -137,39 +91,4 @@ impl Frame { } false } - - fn is_alloca(&self, addr: usize) -> bool { - addr < self.alloca_region.len() - } -} - -pub fn byte_size_of_ty(ctx: &ModuleCtx, ty: Type) -> usize { - match ty { - Type::I1 => mem::size_of::(), - Type::I8 => mem::size_of::(), - Type::I16 => mem::size_of::(), - Type::I32 => mem::size_of::(), - Type::I64 => mem::size_of::(), - Type::I128 => mem::size_of::(), - Type::I256 => mem::size_of::(), - Type::Compound(cmpd_ty) => { - use CompoundTypeData::*; - let cmpd_ty_data = ctx.with_ty_store(|s| s.resolve_compound(cmpd_ty).clone()); - match cmpd_ty_data { - Array { len, elem } => len * byte_size_of_ty(ctx, elem), - Ptr(_) => mem::size_of::(), - Struct(data) => data.fields.iter().fold(0usize, |acc, field_ty| { - acc + byte_size_of_ty(ctx, *field_ty) - }), - } - } - Type::Void => mem::size_of::<()>(), - } -} - -fn to_cmpd_ty(ty: Type) -> Option { - match ty { - Type::Compound(ty) => Some(ty), - _ => None, - } } diff --git a/crates/interpreter/src/lib.rs b/crates/interpreter/src/lib.rs index 9748e254..440ff657 100644 --- a/crates/interpreter/src/lib.rs +++ b/crates/interpreter/src/lib.rs @@ -1,6 +1,7 @@ pub mod frame; pub mod pc; pub mod state; +pub mod types; pub mod value; pub use frame::Frame; diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs index 07995644..174d87cd 100644 --- a/crates/interpreter/src/state.rs +++ b/crates/interpreter/src/state.rs @@ -6,7 +6,7 @@ use sonatina_ir::{ Block, DataLocationKind, Immediate, InsnData, Module, I256, }; -use crate::{Frame, ProgramCounter}; +use crate::{types, Frame, ProgramCounter}; pub struct State { module: Module, @@ -59,7 +59,8 @@ impl State { Neg => arg.neg(), }; - frame.map(result, insn, dfg); + let v = dfg.insn_result(insn).unwrap(); + frame.map(result, v); self.pc.next_insn(layout); None @@ -90,7 +91,8 @@ impl State { } .as_i256(); - frame.map(result, insn, dfg); + let v = dfg.insn_result(insn).unwrap(); + frame.map(result, v); self.pc.next_insn(layout); None @@ -103,7 +105,8 @@ impl State { Sext | Trunc | BitCast => arg, }; - frame.map(result, insn, dfg); + let v = dfg.insn_result(insn).unwrap(); + frame.map(result, v); self.pc.next_insn(layout); None @@ -112,7 +115,10 @@ impl State { use DataLocationKind::*; match loc { Memory => { - frame.ldr(ctx, args[0], insn, dfg); + let addr = frame.load(ctx, args[0], dfg); + let v = dfg.insn_result(insn).unwrap(); + let ty = dfg.insn_result_ty(insn).unwrap(); + frame.ldr(ctx, addr, v, ty); } Storage => todo!(), } @@ -124,7 +130,10 @@ impl State { use DataLocationKind::*; match loc { Memory => { - frame.str(ctx, args[0], args[1], dfg); + let addr = frame.load(ctx, args[0], dfg); + let data = frame.load(ctx, args[1], dfg); + let ty = dfg.value_ty(args[1]); + frame.str(ctx, addr, data, ty); } Storage => todo!(), } @@ -187,7 +196,8 @@ impl State { None } Alloca { ty } => { - frame.alloca(ctx, *ty, insn, dfg); + let v = dfg.insn_result(insn).unwrap(); + frame.alloca(ctx, *ty, v); self.pc.next_insn(layout); None @@ -204,7 +214,8 @@ impl State { let caller = &self.module.funcs[self.pc.func_ref]; if let Some(lit) = arg { - caller_frame.map(lit, self.pc.insn, &caller.dfg); + let v = caller.dfg.insn_result(self.pc.insn).unwrap(); + caller_frame.map(lit, v); } self.pc.next_insn(&caller.layout); @@ -214,9 +225,15 @@ impl State { } } Gep { args } => { - let ptr = frame.gep(ctx, args, dfg); + let mut arg_literals = args.iter().map(|arg| frame.load(ctx, *arg, dfg)); + let base_addr = arg_literals.next().unwrap(); + let ty = dfg.value_ty(args[0]); + debug_assert!(ctx.with_ty_store(|s| s.is_ptr(ty))); - frame.map(ptr, insn, dfg); + let elem_ptr = types::gep(ctx, base_addr, ty, arg_literals); + + let v = dfg.insn_result(insn).unwrap(); + frame.map(elem_ptr, v); self.pc.next_insn(layout); None @@ -226,7 +243,8 @@ impl State { for (v, block) in values.iter().zip(blocks.iter()) { if prev_block == *block { let lit = frame.load(ctx, *v, dfg); - frame.map(lit, insn, dfg); + let v = dfg.insn_result(insn).unwrap(); + frame.map(lit, v); break; } } diff --git a/crates/interpreter/src/types.rs b/crates/interpreter/src/types.rs index cb96b0e4..93554b03 100644 --- a/crates/interpreter/src/types.rs +++ b/crates/interpreter/src/types.rs @@ -1,6 +1,10 @@ use std::mem; -use sonatina_ir::{I256, module::ModuleCtx, Type, types::{CompoundTypeData, CompoundType}}; +use sonatina_ir::{ + module::ModuleCtx, + types::{CompoundType, CompoundTypeData}, + Type, I256, +}; pub fn byte_size_of_ty(ctx: &ModuleCtx, ty: Type) -> usize { match ty { @@ -33,14 +37,19 @@ fn to_cmpd_ty(ty: Type) -> Option { } } -pub fn gep(ctx: &ModuleCtx, base_addr: I256, ptr_ty: Type, args: &[I256]) -> I256 { +pub fn gep( + ctx: &ModuleCtx, + base_addr: I256, + ptr_ty: Type, + args: impl Iterator, +) -> I256 { let pointee_ty = ctx.with_ty_store(|s| s.deref(ptr_ty)).unwrap(); debug_assert!(!pointee_ty.is_integral() && !ctx.with_ty_store(|s| s.is_ptr(pointee_ty))); let mut cmpd_ty = to_cmpd_ty(pointee_ty); let mut offset = 0usize; - for arg in &args[1..] { + for arg in args { let index = arg.to_u256().as_usize(); let cmpd_ty_data = ctx.with_ty_store(|s| s.resolve_compound(cmpd_ty.unwrap()).clone()); match cmpd_ty_data { @@ -58,4 +67,4 @@ pub fn gep(ctx: &ModuleCtx, base_addr: I256, ptr_ty: Type, args: &[I256]) -> I25 } } (base_addr.to_u256().as_usize() + offset).into() -} \ No newline at end of file +} From c9d3a3af04cdb87527099ba9843e6dffe025b6ca Mon Sep 17 00:00:00 2001 From: Aki Date: Sun, 17 Sep 2023 14:56:35 +0200 Subject: [PATCH 06/14] Improve frame constructor --- crates/interpreter/src/frame.rs | 12 ++++++++---- crates/interpreter/src/state.rs | 18 ++++++++++-------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/crates/interpreter/src/frame.rs b/crates/interpreter/src/frame.rs index c3be1517..93b14dbb 100644 --- a/crates/interpreter/src/frame.rs +++ b/crates/interpreter/src/frame.rs @@ -1,6 +1,6 @@ use cranelift_entity::SecondaryMap; -use sonatina_ir::{module::ModuleCtx, DataFlowGraph, Function, Type, Value, I256}; +use sonatina_ir::{module::ModuleCtx, DataFlowGraph, Type, Value, I256}; use crate::{types, value::EvalValue, ProgramCounter}; @@ -11,10 +11,14 @@ pub struct Frame { } impl Frame { - pub fn new(func: &Function, ret_addr: ProgramCounter, args: Vec) -> Self { + pub fn new( + ret_addr: ProgramCounter, + args: impl Iterator, + arg_literals: impl Iterator, + ) -> Self { let mut local_values = SecondaryMap::new(); - for (v, literal_value) in func.arg_values.iter().zip(args.into_iter()) { - local_values[*v] = EvalValue::from_i256(literal_value) + for (v, literal_value) in args.zip(arg_literals) { + local_values[v] = EvalValue::from_i256(literal_value) } let alloca_region = Vec::new(); diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs index 174d87cd..9e775fb5 100644 --- a/crates/interpreter/src/state.rs +++ b/crates/interpreter/src/state.rs @@ -1,4 +1,7 @@ -use std::ops::{Add, BitAnd, BitOr, BitXor, Mul, Neg, Not, Sub}; +use std::{ + iter, + ops::{Add, BitAnd, BitOr, BitXor, Mul, Neg, Not, Sub}, +}; use sonatina_ir::{ insn::{BinaryOp, CastOp, UnaryOp}, @@ -19,7 +22,8 @@ impl State { pub fn new(module: Module, entry_func: FuncRef) -> Self { let func = &module.funcs[entry_func]; let pc = ProgramCounter::new(entry_func, &func.layout); - let entry_frame = Frame::new(func, pc, vec![]); + debug_assert!(func.arg_values.is_empty()); + let entry_frame = Frame::new(pc, iter::empty(), iter::empty()); let frames = vec![entry_frame]; Self { @@ -142,18 +146,16 @@ impl State { None } Call { func, args, .. } => { - let mut literal_args = Vec::with_capacity(args.len()); - for arg in args { - let arg = frame.load(ctx, *arg, dfg); - literal_args.push(arg) - } + let arg_literals = args.iter().map(|arg| frame.load(ctx, *arg, dfg)); // Function prologue let ret_addr = self.pc; let callee = &self.module.funcs[*func]; - let new_frame = Frame::new(callee, ret_addr, literal_args); + debug_assert!(callee.arg_values.len() == args.len()); + let new_frame = + Frame::new(ret_addr, callee.arg_values.iter().copied(), arg_literals); self.frames.push(new_frame); self.pc.call(*func, &callee.layout); From 61a9ca7c494ae41a6de0bce129d8822e6ee61885 Mon Sep 17 00:00:00 2001 From: Aki Date: Sun, 17 Sep 2023 14:57:47 +0200 Subject: [PATCH 07/14] Set agnostic name to interpreter usage --- crates/interpreter/src/state.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs index 9e775fb5..127d2fb1 100644 --- a/crates/interpreter/src/state.rs +++ b/crates/interpreter/src/state.rs @@ -34,7 +34,7 @@ impl State { } } - pub fn repl(mut self) -> Option { + pub fn run(mut self) -> Option { loop { if let Some(arg) = self.step() { return arg; @@ -286,7 +286,7 @@ mod test { let state = parse_module_make_state(input); - let result = state.repl(); + let result = state.run(); assert_eq!(result.unwrap(), I256::all_one().neg()); } @@ -307,7 +307,7 @@ mod test { let state = parse_module_make_state(input); - let result = state.repl(); + let result = state.run(); assert_eq!(result.unwrap(), (-3).into()); } @@ -325,7 +325,7 @@ mod test { let state = parse_module_make_state(input); - let result = state.repl(); + let result = state.run(); const NEG_128: i16 = i16::from_be_bytes([0xff, 0x80]); @@ -345,7 +345,7 @@ mod test { let state = parse_module_make_state(input); - let elem_ptr = state.repl(); + let elem_ptr = state.run(); let result = i16::from_be_bytes([0x0, 0x80]); @@ -367,7 +367,7 @@ mod test { let state = parse_module_make_state(input); - let data = state.repl(); + let data = state.run(); assert_eq!(data.unwrap(), 1.into()); } @@ -394,7 +394,7 @@ mod test { let state = State::new(module, func_ref); - let data = state.repl(); + let data = state.run(); assert_eq!(data.unwrap(), 0.into()); } @@ -415,7 +415,7 @@ mod test { let state = parse_module_make_state(input); - let boolean = state.repl().unwrap(); + let boolean = state.run().unwrap(); assert_eq!(boolean, I256::zero()) } @@ -436,7 +436,7 @@ mod test { let state = parse_module_make_state(input); - let result = state.repl().unwrap(); + let result = state.run().unwrap(); assert_eq!(result, 1.into()); } @@ -459,7 +459,7 @@ mod test { let state = parse_module_make_state(input); - let result = state.repl().unwrap(); + let result = state.run().unwrap(); assert_eq!(result, 2.into()); } @@ -481,7 +481,7 @@ mod test { let state = parse_module_make_state(input); - let result = state.repl().unwrap(); + let result = state.run().unwrap(); assert_eq!(result, (-1).into()); } @@ -502,7 +502,7 @@ mod test { let state = parse_module_make_state(input); - let elem_ptr = state.repl(); + let elem_ptr = state.run(); assert_eq!(elem_ptr.unwrap(), 12.into()); } @@ -521,7 +521,7 @@ mod test { let state = parse_module_make_state(input); - let arg = state.repl(); + let arg = state.run(); assert!(arg.is_none()); } @@ -541,7 +541,7 @@ mod test { let state = parse_module_make_state(input); - let elem_ptr = state.repl(); + let elem_ptr = state.run(); assert_eq!(elem_ptr.unwrap(), 16.into()); } @@ -562,7 +562,7 @@ mod test { let state = parse_module_make_state(input); - let elem_ptr = state.repl(); + let elem_ptr = state.run(); assert_eq!(elem_ptr.unwrap(), 11.into()); } From 72a973bd67b57106923ab43f78704f75000e43c8 Mon Sep 17 00:00:00 2001 From: Aki Date: Sun, 17 Sep 2023 16:41:05 +0200 Subject: [PATCH 08/14] Cast eval result to return type --- crates/interpreter/src/frame.rs | 2 +- crates/interpreter/src/lib.rs | 1 + crates/interpreter/src/state.rs | 66 +++++++++++---------- crates/interpreter/src/value.rs | 102 ++++++++++++++++++++++++++++++-- crates/ir/src/bigint.rs | 1 - 5 files changed, 134 insertions(+), 38 deletions(-) diff --git a/crates/interpreter/src/frame.rs b/crates/interpreter/src/frame.rs index 93b14dbb..e2899b5a 100644 --- a/crates/interpreter/src/frame.rs +++ b/crates/interpreter/src/frame.rs @@ -2,7 +2,7 @@ use cranelift_entity::SecondaryMap; use sonatina_ir::{module::ModuleCtx, DataFlowGraph, Type, Value, I256}; -use crate::{types, value::EvalValue, ProgramCounter}; +use crate::{types, EvalValue, ProgramCounter}; pub struct Frame { pub ret_addr: ProgramCounter, diff --git a/crates/interpreter/src/lib.rs b/crates/interpreter/src/lib.rs index 440ff657..69190838 100644 --- a/crates/interpreter/src/lib.rs +++ b/crates/interpreter/src/lib.rs @@ -7,3 +7,4 @@ pub mod value; pub use frame::Frame; pub use pc::ProgramCounter; pub use state::State; +pub use value::{EvalResult, EvalValue}; diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs index 127d2fb1..77190188 100644 --- a/crates/interpreter/src/state.rs +++ b/crates/interpreter/src/state.rs @@ -6,10 +6,10 @@ use std::{ use sonatina_ir::{ insn::{BinaryOp, CastOp, UnaryOp}, module::FuncRef, - Block, DataLocationKind, Immediate, InsnData, Module, I256, + Block, DataLocationKind, Immediate, InsnData, Module, }; -use crate::{types, Frame, ProgramCounter}; +use crate::{types, EvalResult, Frame, ProgramCounter}; pub struct State { module: Module, @@ -34,7 +34,7 @@ impl State { } } - pub fn run(mut self) -> Option { + pub fn run(mut self) -> EvalResult { loop { if let Some(arg) = self.step() { return arg; @@ -42,7 +42,7 @@ impl State { } } - pub fn step(&mut self) -> Option> { + pub fn step(&mut self) -> Option { let frame = self.frames.last_mut().unwrap(); let insn = self.pc.insn; let ctx = &self.module.ctx; @@ -205,9 +205,8 @@ impl State { None } Return { args } => { - let arg = args.map(|arg| frame.load(ctx, arg, dfg)); + let mut frame = self.frames.pop().unwrap(); // pop returning frame - let frame = self.frames.pop().unwrap(); // pop returning frame match self.frames.last_mut() { Some(caller_frame) => { // Function epilogue @@ -215,15 +214,23 @@ impl State { self.pc.resume_frame_at(frame.ret_addr); let caller = &self.module.funcs[self.pc.func_ref]; - if let Some(lit) = arg { + if let Some(arg) = *args { + let arg_literal = frame.load(ctx, arg, dfg); let v = caller.dfg.insn_result(self.pc.insn).unwrap(); - caller_frame.map(lit, v); + caller_frame.map(arg_literal, v); } self.pc.next_insn(&caller.layout); None } - None => Some(arg), + None => { + let Some(arg) = *args else { + return Some(EvalResult::Void); + }; + let arg_literal = frame.load(ctx, arg, dfg); + let ty = dfg.value_ty(arg); + Some(EvalResult::from_i256(ctx, arg_literal, ty)) + } } } Gep { args } => { @@ -259,7 +266,6 @@ impl State { #[cfg(test)] mod test { - use sonatina_ir::I256; use sonatina_parser::parser::Parser; use super::*; @@ -288,7 +294,7 @@ mod test { let result = state.run(); - assert_eq!(result.unwrap(), I256::all_one().neg()); + assert_eq!(result.into_i32(), 1i32); } #[test] @@ -309,7 +315,7 @@ mod test { let result = state.run(); - assert_eq!(result.unwrap(), (-3).into()); + assert_eq!(result.into_i16(), -3i16); } #[test] @@ -327,9 +333,7 @@ mod test { let result = state.run(); - const NEG_128: i16 = i16::from_be_bytes([0xff, 0x80]); - - assert_eq!(result.unwrap(), NEG_128.into()); + assert_eq!(result.into_i16(), -128i16); } #[test] @@ -347,9 +351,7 @@ mod test { let elem_ptr = state.run(); - let result = i16::from_be_bytes([0x0, 0x80]); - - assert_eq!(elem_ptr.unwrap(), result.into()); + assert_eq!(elem_ptr.into_i16(), 128i16); } #[test] @@ -369,7 +371,7 @@ mod test { let data = state.run(); - assert_eq!(data.unwrap(), 1.into()); + assert_eq!(data.into_i32(), 1i32); } #[test] @@ -396,7 +398,7 @@ mod test { let data = state.run(); - assert_eq!(data.unwrap(), 0.into()); + assert_eq!(data.into_i8(), 0i8); } #[test] @@ -415,9 +417,9 @@ mod test { let state = parse_module_make_state(input); - let boolean = state.run().unwrap(); + let boolean = state.run(); - assert_eq!(boolean, I256::zero()) + assert!(!boolean.into_bool()) } #[test] @@ -436,9 +438,9 @@ mod test { let state = parse_module_make_state(input); - let result = state.run().unwrap(); + let result = state.run(); - assert_eq!(result, 1.into()); + assert_eq!(result.into_i8(), 1i8); } #[test] @@ -459,9 +461,9 @@ mod test { let state = parse_module_make_state(input); - let result = state.run().unwrap(); + let result = state.run(); - assert_eq!(result, 2.into()); + assert_eq!(result.into_i64(), 2i64); } #[test] @@ -481,9 +483,9 @@ mod test { let state = parse_module_make_state(input); - let result = state.run().unwrap(); + let result = state.run(); - assert_eq!(result, (-1).into()); + assert_eq!(result.into_i8(), -1i8); } #[test] @@ -504,7 +506,7 @@ mod test { let elem_ptr = state.run(); - assert_eq!(elem_ptr.unwrap(), 12.into()); + assert_eq!(elem_ptr.into_usize(), 12usize); } #[test] @@ -523,7 +525,7 @@ mod test { let arg = state.run(); - assert!(arg.is_none()); + arg.into_void(); } #[cfg(target_arch = "aarch64")] @@ -543,7 +545,7 @@ mod test { let elem_ptr = state.run(); - assert_eq!(elem_ptr.unwrap(), 16.into()); + assert_eq!(elem_ptr.into_usize(), 16usize); } #[test] @@ -564,6 +566,6 @@ mod test { let elem_ptr = state.run(); - assert_eq!(elem_ptr.unwrap(), 11.into()); + assert_eq!(elem_ptr.into_usize(), 11usize); } } diff --git a/crates/interpreter/src/value.rs b/crates/interpreter/src/value.rs index a94eee21..d108845c 100644 --- a/crates/interpreter/src/value.rs +++ b/crates/interpreter/src/value.rs @@ -17,10 +17,10 @@ impl EvalValue { } pub fn i256(&self) -> I256 { - match self { - Self::Literal(i256) => *i256, - _ => panic!(), - } + let Self::Literal(i256) = *self else { + panic!("undefined"); + }; + i256 } pub fn deserialize(ctx: &ModuleCtx, ty: Type, b: Vec) -> Option { @@ -64,3 +64,97 @@ impl EvalValue { } } } + +pub enum EvalResult { + I1(bool), + I8(i8), + I16(i16), + I32(i32), + I64(i64), + I128(i128), + I256(I256), + Void, + Addr(usize), +} + +impl EvalResult { + pub fn from_i256(ctx: &ModuleCtx, i256: I256, ty: Type) -> Self { + use EvalResult::*; + match ty { + Type::I1 => I1(i256.trunc_to_i1()), + Type::I8 => I8(i256.trunc_to_i8()), + Type::I16 => I16(i256.trunc_to_i16()), + Type::I32 => I32(i256.trunc_to_i32()), + Type::I64 => I64(i256.trunc_to_i64()), + Type::I128 => I128(i256.trunc_to_i128()), + Type::I256 => I256(i256), + Type::Compound(_) => { + debug_assert!(ctx.with_ty_store(|s| s.is_ptr(ty))); + Addr(i256.to_u256().as_usize()) + } + _ => unreachable!(), + } + } + + pub fn into_bool(self) -> bool { + let Self::I1(boolean) = self else { + panic!("not a boolean") + }; + boolean + } + + pub fn into_i8(self) -> i8 { + let Self::I8(i8) = self else { + panic!("not an i8") + }; + i8 + } + + pub fn into_i16(self) -> i16 { + let Self::I16(i16) = self else { + panic!("not an i16") + }; + i16 + } + + pub fn into_i32(self) -> i32 { + let Self::I32(i32) = self else { + panic!("not an i32") + }; + i32 + } + + pub fn into_i64(self) -> i64 { + let Self::I64(i64) = self else { + panic!("not an i64") + }; + i64 + } + + pub fn into_i128(self) -> i128 { + let Self::I128(i128) = self else { + panic!("not an i128") + }; + i128 + } + + pub fn into_i256(self) -> I256 { + let Self::I256(i256) = self else { + panic!("not an i256") + }; + i256 + } + + pub fn into_void(self) { + let Self::Void = self else { + panic!("not a void") + }; + } + + pub fn into_usize(self) -> usize { + let Self::Addr(usize) = self else { + panic!("not a memory address") + }; + usize + } +} diff --git a/crates/ir/src/bigint.rs b/crates/ir/src/bigint.rs index 112ccbeb..0e00fb98 100644 --- a/crates/ir/src/bigint.rs +++ b/crates/ir/src/bigint.rs @@ -80,7 +80,6 @@ impl I256 { } pub fn trunc_to_i8(self) -> i8 { - println!("{:?}", self.to_u256().low_u32() as i8); self.to_u256().low_u32() as i8 } From 38cec28c7f8969436c814d4c150e254d29f22830 Mon Sep 17 00:00:00 2001 From: Aki Date: Wed, 20 Sep 2023 12:36:11 +0200 Subject: [PATCH 09/14] Pass args to entry function --- crates/interpreter/src/frame.rs | 29 +++++++++++++---------------- crates/interpreter/src/pc.rs | 15 ++++++++++++++- crates/interpreter/src/state.rs | 28 +++++++++++++++------------- 3 files changed, 42 insertions(+), 30 deletions(-) diff --git a/crates/interpreter/src/frame.rs b/crates/interpreter/src/frame.rs index e2899b5a..16bb825e 100644 --- a/crates/interpreter/src/frame.rs +++ b/crates/interpreter/src/frame.rs @@ -1,31 +1,28 @@ -use cranelift_entity::SecondaryMap; +use cranelift_entity::{packed_option::PackedOption, SecondaryMap}; use sonatina_ir::{module::ModuleCtx, DataFlowGraph, Type, Value, I256}; use crate::{types, EvalValue, ProgramCounter}; +#[derive(Default)] pub struct Frame { - pub ret_addr: ProgramCounter, + pub ret_addr: PackedOption, local_values: SecondaryMap, // 256-bit register alloca_region: Vec, // big endian } impl Frame { - pub fn new( - ret_addr: ProgramCounter, - args: impl Iterator, - arg_literals: impl Iterator, - ) -> Self { - let mut local_values = SecondaryMap::new(); - for (v, literal_value) in args.zip(arg_literals) { - local_values[v] = EvalValue::from_i256(literal_value) - } - let alloca_region = Vec::new(); + pub fn new() -> Self { + Self::default() + } + + pub fn set_ret_addr(&mut self, ret_addr: ProgramCounter) { + self.ret_addr = ret_addr.into(); + } - Self { - ret_addr, - local_values, - alloca_region, + pub fn load_args(&mut self, args: &[Value], arg_literals: impl Iterator) { + for (v, literal_value) in args.iter().zip(arg_literals) { + self.local_values[*v] = EvalValue::from_i256(literal_value) } } diff --git a/crates/interpreter/src/pc.rs b/crates/interpreter/src/pc.rs index b4d0206e..57332d14 100644 --- a/crates/interpreter/src/pc.rs +++ b/crates/interpreter/src/pc.rs @@ -1,11 +1,24 @@ +use cranelift_entity::packed_option::ReservedValue; use sonatina_ir::{module::FuncRef, Block, Insn, Layout}; -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct ProgramCounter { pub func_ref: FuncRef, pub insn: Insn, } +impl ReservedValue for ProgramCounter { + fn reserved_value() -> Self { + let func_ref = FuncRef::reserved_value(); + let insn = Insn::reserved_value(); + ProgramCounter { func_ref, insn } + } + + fn is_reserved_value(&self) -> bool { + self.func_ref == FuncRef::reserved_value() && self.insn == Insn::reserved_value() + } +} + impl ProgramCounter { pub fn new(entry_func: FuncRef, layout: &Layout) -> Self { let entry = layout.entry_block().unwrap(); diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs index 77190188..1a86402d 100644 --- a/crates/interpreter/src/state.rs +++ b/crates/interpreter/src/state.rs @@ -1,12 +1,9 @@ -use std::{ - iter, - ops::{Add, BitAnd, BitOr, BitXor, Mul, Neg, Not, Sub}, -}; +use std::ops::{Add, BitAnd, BitOr, BitXor, Mul, Neg, Not, Sub}; use sonatina_ir::{ insn::{BinaryOp, CastOp, UnaryOp}, module::FuncRef, - Block, DataLocationKind, Immediate, InsnData, Module, + Block, DataLocationKind, Immediate, InsnData, Module, Value, }; use crate::{types, EvalResult, Frame, ProgramCounter}; @@ -19,11 +16,15 @@ pub struct State { } impl State { - pub fn new(module: Module, entry_func: FuncRef) -> Self { + pub fn new(module: Module, entry_func: FuncRef, args: &[Value]) -> Self { let func = &module.funcs[entry_func]; let pc = ProgramCounter::new(entry_func, &func.layout); - debug_assert!(func.arg_values.is_empty()); - let entry_frame = Frame::new(pc, iter::empty(), iter::empty()); + + let mut entry_frame = Frame::new(); + debug_assert!(func.arg_values.len() == args.len()); + for arg in args { + entry_frame.load(&module.ctx, *arg, &func.dfg); + } let frames = vec![entry_frame]; Self { @@ -153,9 +154,10 @@ impl State { let ret_addr = self.pc; let callee = &self.module.funcs[*func]; + let mut new_frame = Frame::new(); debug_assert!(callee.arg_values.len() == args.len()); - let new_frame = - Frame::new(ret_addr, callee.arg_values.iter().copied(), arg_literals); + new_frame.load_args(&callee.arg_values, arg_literals); + new_frame.set_ret_addr(ret_addr); self.frames.push(new_frame); self.pc.call(*func, &callee.layout); @@ -211,7 +213,7 @@ impl State { Some(caller_frame) => { // Function epilogue - self.pc.resume_frame_at(frame.ret_addr); + self.pc.resume_frame_at(frame.ret_addr.unwrap()); let caller = &self.module.funcs[self.pc.func_ref]; if let Some(arg) = *args { @@ -275,7 +277,7 @@ mod test { let module = parser.parse(input).unwrap().module; let func_ref = module.iter_functions().next().unwrap(); - State::new(module, func_ref) + State::new(module, func_ref, &[]) } #[test] @@ -394,7 +396,7 @@ mod test { let module = parser.parse(input).unwrap().module; let func_ref = module.iter_functions().nth(1).unwrap(); - let state = State::new(module, func_ref); + let state = State::new(module, func_ref, &[]); let data = state.run(); From 2e7733257b4b5cf06e1df9050f88955f1e503c1e Mon Sep 17 00:00:00 2001 From: Aki Date: Mon, 25 Sep 2023 11:44:16 +0200 Subject: [PATCH 10/14] Scope instruction args loading --- crates/interpreter/src/frame.rs | 8 ++------ crates/interpreter/src/state.rs | 32 +++++++++++++++++--------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/crates/interpreter/src/frame.rs b/crates/interpreter/src/frame.rs index 16bb825e..76e1cdee 100644 --- a/crates/interpreter/src/frame.rs +++ b/crates/interpreter/src/frame.rs @@ -26,10 +26,10 @@ impl Frame { } } - pub fn load(&mut self, ctx: &ModuleCtx, v: Value, dfg: &DataFlowGraph) -> I256 { + pub fn load(&mut self, v: Value, dfg: &DataFlowGraph) -> I256 { if !self.is_assigned(v) { if let Some(gv) = dfg.value_gv(v) { - ctx.with_gv_store(|s| { + dfg.ctx.with_gv_store(|s| { if !s.is_const(gv) { todo!() } @@ -80,10 +80,6 @@ impl Frame { } } - pub fn eq(&mut self, ctx: &ModuleCtx, lhs: Value, rhs: Value, dfg: &DataFlowGraph) -> bool { - self.load(ctx, lhs, dfg) == self.load(ctx, rhs, dfg) - } - pub fn is_assigned(&self, v: Value) -> bool { for (local_v, local) in self.local_values.iter() { if v == local_v { diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs index 1a86402d..71f03998 100644 --- a/crates/interpreter/src/state.rs +++ b/crates/interpreter/src/state.rs @@ -23,7 +23,7 @@ impl State { let mut entry_frame = Frame::new(); debug_assert!(func.arg_values.len() == args.len()); for arg in args { - entry_frame.load(&module.ctx, *arg, &func.dfg); + entry_frame.load(*arg, &func.dfg); } let frames = vec![entry_frame]; @@ -57,7 +57,7 @@ impl State { use InsnData::*; match insn_data { Unary { code, args } => { - let arg = frame.load(ctx, args[0], dfg); + let arg = frame.load(args[0], dfg); use UnaryOp::*; let result = match code { Not => arg.not(), @@ -71,8 +71,8 @@ impl State { None } Binary { code, args } => { - let lhs: Immediate = frame.load(ctx, args[0], dfg).into(); - let rhs: Immediate = frame.load(ctx, args[1], dfg).into(); + let lhs: Immediate = frame.load(args[0], dfg).into(); + let rhs: Immediate = frame.load(args[1], dfg).into(); use BinaryOp::*; let result = match code { Add => lhs.add(rhs), @@ -103,7 +103,7 @@ impl State { None } Cast { code, args, .. } => { - let arg = frame.load(ctx, args[0], dfg); + let arg = frame.load(args[0], dfg); use CastOp::*; let result = match code { Zext => arg.neg(), @@ -120,7 +120,7 @@ impl State { use DataLocationKind::*; match loc { Memory => { - let addr = frame.load(ctx, args[0], dfg); + let addr = frame.load(args[0], dfg); let v = dfg.insn_result(insn).unwrap(); let ty = dfg.insn_result_ty(insn).unwrap(); frame.ldr(ctx, addr, v, ty); @@ -135,8 +135,8 @@ impl State { use DataLocationKind::*; match loc { Memory => { - let addr = frame.load(ctx, args[0], dfg); - let data = frame.load(ctx, args[1], dfg); + let addr = frame.load(args[0], dfg); + let data = frame.load(args[1], dfg); let ty = dfg.value_ty(args[1]); frame.str(ctx, addr, data, ty); } @@ -147,7 +147,7 @@ impl State { None } Call { func, args, .. } => { - let arg_literals = args.iter().map(|arg| frame.load(ctx, *arg, dfg)); + let arg_literals = args.iter().map(|arg| frame.load(*arg, dfg)); // Function prologue @@ -171,7 +171,7 @@ impl State { None } Branch { args, dests } => { - let arg = frame.load(ctx, args[0], dfg); + let arg = frame.load(args[0], dfg); let idx = arg.not().to_u256().as_usize(); let block = layout.insn_block(insn); @@ -189,7 +189,9 @@ impl State { let cond = args[0]; for (idx, arg) in args[1..].iter().enumerate() { - if frame.eq(ctx, cond, *arg, dfg) { + let cond = frame.load(cond, dfg); + let arg = frame.load(*arg, dfg); + if cond == arg { self.pc.branch_to(table[idx], layout); return None; } @@ -217,7 +219,7 @@ impl State { let caller = &self.module.funcs[self.pc.func_ref]; if let Some(arg) = *args { - let arg_literal = frame.load(ctx, arg, dfg); + let arg_literal = frame.load(arg, dfg); let v = caller.dfg.insn_result(self.pc.insn).unwrap(); caller_frame.map(arg_literal, v); } @@ -229,14 +231,14 @@ impl State { let Some(arg) = *args else { return Some(EvalResult::Void); }; - let arg_literal = frame.load(ctx, arg, dfg); + let arg_literal = frame.load(arg, dfg); let ty = dfg.value_ty(arg); Some(EvalResult::from_i256(ctx, arg_literal, ty)) } } } Gep { args } => { - let mut arg_literals = args.iter().map(|arg| frame.load(ctx, *arg, dfg)); + let mut arg_literals = args.iter().map(|arg| frame.load(*arg, dfg)); let base_addr = arg_literals.next().unwrap(); let ty = dfg.value_ty(args[0]); debug_assert!(ctx.with_ty_store(|s| s.is_ptr(ty))); @@ -253,7 +255,7 @@ impl State { let prev_block = self.prev_block.unwrap(); for (v, block) in values.iter().zip(blocks.iter()) { if prev_block == *block { - let lit = frame.load(ctx, *v, dfg); + let lit = frame.load(*v, dfg); let v = dfg.insn_result(insn).unwrap(); frame.map(lit, v); break; From 1bf784c0318aab2380c8ab80f198772d528b96e7 Mon Sep 17 00:00:00 2001 From: Aki Date: Mon, 25 Sep 2023 11:46:50 +0200 Subject: [PATCH 11/14] Update cranelift dependency --- crates/interpreter/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/interpreter/Cargo.toml b/crates/interpreter/Cargo.toml index 44f32dc2..56f07834 100644 --- a/crates/interpreter/Cargo.toml +++ b/crates/interpreter/Cargo.toml @@ -14,7 +14,7 @@ keywords = ["compiler", "evm", "wasm", "smart-contract"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -cranelift-entity = "0.99.1" +cranelift-entity = "0.100" sonatina-ir = { path = "../ir", version = "0.0.3-alpha" } [dev-dependencies] From c20e38d0f17ea2a1e0ce3d55f8c2fce69e5b576a Mon Sep 17 00:00:00 2001 From: Aki <140970520+k0aki@users.noreply.github.com> Date: Sat, 30 Sep 2023 08:20:35 +0200 Subject: [PATCH 12/14] Use existing method of Vec Co-authored-by: Yoshitomo Nakanishi --- crates/interpreter/src/frame.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/interpreter/src/frame.rs b/crates/interpreter/src/frame.rs index 76e1cdee..823ba405 100644 --- a/crates/interpreter/src/frame.rs +++ b/crates/interpreter/src/frame.rs @@ -51,9 +51,8 @@ impl Frame { let addr = self.alloca_region.len(); - for _ in 0..types::byte_size_of_ty(ctx, ty) { - self.alloca_region.push(0u8); - } + let size = types::byte_size_of_ty(ctx, ty); + self.alloca_region.resize(addr + size, 0); self.local_values[v] = EvalValue::from_usize(addr); } From da3ac24c46164a487ced11b660f44482b2aae39e Mon Sep 17 00:00:00 2001 From: Aki <140970520+k0aki@users.noreply.github.com> Date: Sat, 30 Sep 2023 08:27:36 +0200 Subject: [PATCH 13/14] Avoid heap allocation Co-authored-by: Yoshitomo Nakanishi --- crates/interpreter/Cargo.toml | 1 + crates/interpreter/src/frame.rs | 12 ++++------ crates/interpreter/src/value.rs | 40 ++++++++++++++++++--------------- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/crates/interpreter/Cargo.toml b/crates/interpreter/Cargo.toml index 56f07834..fad59ff8 100644 --- a/crates/interpreter/Cargo.toml +++ b/crates/interpreter/Cargo.toml @@ -14,6 +14,7 @@ keywords = ["compiler", "evm", "wasm", "smart-contract"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +byteorder = { version = "1.4.3", default-features = false } cranelift-entity = "0.100" sonatina-ir = { path = "../ir", version = "0.0.3-alpha" } diff --git a/crates/interpreter/src/frame.rs b/crates/interpreter/src/frame.rs index 823ba405..c7ca3153 100644 --- a/crates/interpreter/src/frame.rs +++ b/crates/interpreter/src/frame.rs @@ -61,10 +61,7 @@ impl Frame { debug_assert!(addr < self.alloca_region.len()); let size = types::byte_size_of_ty(ctx, ty); - let mut literal_b = Vec::new(); - for b in &self.alloca_region[addr..addr + size] { - literal_b.push(*b) - } + let literal_b = &self.alloca_region[addr..addr + size]; let Some(data) = EvalValue::deserialize(ctx, ty, literal_b) else { return; }; @@ -73,10 +70,9 @@ impl Frame { pub fn str(&mut self, ctx: &ModuleCtx, addr: I256, data: I256, ty: Type) { let addr = addr.to_u256().as_usize(); - let data_b = EvalValue::from_i256(data).serialize(ctx, ty); - for (i, b) in data_b.into_iter().enumerate() { - self.alloca_region[addr + i] = b; - } + let size = types::byte_size_of_ty(ctx, ty); + let reg_value = EvalValue::from_i256(data); + reg_value.serialize(ctx, ty, &mut self.alloca_region[addr..size]); } pub fn is_assigned(&self, v: Value) -> bool { diff --git a/crates/interpreter/src/value.rs b/crates/interpreter/src/value.rs index d108845c..4a2efd0d 100644 --- a/crates/interpreter/src/value.rs +++ b/crates/interpreter/src/value.rs @@ -1,3 +1,6 @@ +use std::mem; + +use byteorder::{BigEndian, WriteBytesExt}; use sonatina_ir::{module::ModuleCtx, Type, I256, U256}; #[derive(Clone, Copy, PartialEq, Eq, Debug, Default)] @@ -23,7 +26,7 @@ impl EvalValue { i256 } - pub fn deserialize(ctx: &ModuleCtx, ty: Type, b: Vec) -> Option { + pub fn deserialize(ctx: &ModuleCtx, ty: Type, b: &[u8]) -> Option { Some(Self::Literal(match ty { Type::I1 => (b[0] & 0b1).into(), Type::I8 => i8::from_be_bytes(b.try_into().unwrap()).into(), @@ -31,7 +34,7 @@ impl EvalValue { Type::I32 => i32::from_be_bytes(b.try_into().unwrap()).into(), Type::I64 => i64::from_be_bytes(b.try_into().unwrap()).into(), Type::I128 => i128::from_be_bytes(b.try_into().unwrap()).into(), - Type::I256 => I256::from_u256(U256::from_big_endian(&b)), + Type::I256 => I256::from_u256(U256::from_big_endian(b)), Type::Compound(ty) => { debug_assert!(ctx.with_ty_store(|s| s.resolve_compound(ty).is_ptr())); debug_assert!(b.len() == std::mem::size_of::()); @@ -41,26 +44,27 @@ impl EvalValue { })) } - pub fn serialize(&self, ctx: &ModuleCtx, ty: Type) -> Vec { + pub fn serialize(&self, ctx: &ModuleCtx, ty: Type, mut buff: &mut [u8]) { + macro_rules! write_be_bytes { + ($bytes:expr) => {{ + let data = self.i256().trunc_to_i128(); + buff.write_int128::(data, $bytes).unwrap() + }}; + } + match ty { - Type::I1 => self.i256().trunc_to_i8().to_be_bytes().to_vec(), - Type::I8 => self.i256().trunc_to_i8().to_be_bytes().to_vec(), - Type::I16 => self.i256().trunc_to_i16().to_be_bytes().to_vec(), - Type::I32 => self.i256().trunc_to_i32().to_be_bytes().to_vec(), - Type::I64 => self.i256().trunc_to_i64().to_be_bytes().to_vec(), - Type::I128 => self.i256().trunc_to_i128().to_be_bytes().to_vec(), - Type::I256 => { - let mut b = [0u8; 32]; - self.i256().to_u256().to_big_endian(&mut b); - b.to_vec() - } + Type::I1 => write_be_bytes!(1), + Type::I8 => write_be_bytes!(1), + Type::I16 => write_be_bytes!(2), + Type::I32 => write_be_bytes!(4), + Type::I64 => write_be_bytes!(8), + Type::I128 => write_be_bytes!(16), + Type::I256 => self.i256().to_u256().to_big_endian(buff), Type::Compound(ty) => { debug_assert!(ctx.with_ty_store(|s| s.resolve_compound(ty).is_ptr())); - let mut b = [0u8; 32]; - self.i256().to_u256().to_big_endian(&mut b); - b[32 - std::mem::size_of::()..].to_vec() + write_be_bytes!(mem::size_of::()); } - Type::Void => Vec::new(), + Type::Void => (), } } } From 77132d60f0b9984a3879c1d359b6d156597316b5 Mon Sep 17 00:00:00 2001 From: Aki Date: Sat, 30 Sep 2023 12:25:48 +0200 Subject: [PATCH 14/14] Fix bug in size of type data --- crates/interpreter/src/frame.rs | 6 +++--- crates/interpreter/src/types.rs | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/interpreter/src/frame.rs b/crates/interpreter/src/frame.rs index c7ca3153..c985ae18 100644 --- a/crates/interpreter/src/frame.rs +++ b/crates/interpreter/src/frame.rs @@ -51,7 +51,7 @@ impl Frame { let addr = self.alloca_region.len(); - let size = types::byte_size_of_ty(ctx, ty); + let size = types::size_of_ty_data(ctx, ty); self.alloca_region.resize(addr + size, 0); self.local_values[v] = EvalValue::from_usize(addr); } @@ -60,7 +60,7 @@ impl Frame { let addr = addr.to_u256().as_usize(); debug_assert!(addr < self.alloca_region.len()); - let size = types::byte_size_of_ty(ctx, ty); + let size = types::size_of_ty_data(ctx, ty); let literal_b = &self.alloca_region[addr..addr + size]; let Some(data) = EvalValue::deserialize(ctx, ty, literal_b) else { return; @@ -70,7 +70,7 @@ impl Frame { pub fn str(&mut self, ctx: &ModuleCtx, addr: I256, data: I256, ty: Type) { let addr = addr.to_u256().as_usize(); - let size = types::byte_size_of_ty(ctx, ty); + let size = types::size_of_ty_data(ctx, ty); let reg_value = EvalValue::from_i256(data); reg_value.serialize(ctx, ty, &mut self.alloca_region[addr..size]); } diff --git a/crates/interpreter/src/types.rs b/crates/interpreter/src/types.rs index 93554b03..45e1ca07 100644 --- a/crates/interpreter/src/types.rs +++ b/crates/interpreter/src/types.rs @@ -6,7 +6,7 @@ use sonatina_ir::{ Type, I256, }; -pub fn byte_size_of_ty(ctx: &ModuleCtx, ty: Type) -> usize { +pub fn size_of_ty_data(ctx: &ModuleCtx, ty: Type) -> usize { match ty { Type::I1 => mem::size_of::(), Type::I8 => mem::size_of::(), @@ -14,15 +14,15 @@ pub fn byte_size_of_ty(ctx: &ModuleCtx, ty: Type) -> usize { Type::I32 => mem::size_of::(), Type::I64 => mem::size_of::(), Type::I128 => mem::size_of::(), - Type::I256 => mem::size_of::(), + Type::I256 => 32, Type::Compound(cmpd_ty) => { use CompoundTypeData::*; let cmpd_ty_data = ctx.with_ty_store(|s| s.resolve_compound(cmpd_ty).clone()); match cmpd_ty_data { - Array { len, elem } => len * byte_size_of_ty(ctx, elem), + Array { len, elem } => len * size_of_ty_data(ctx, elem), Ptr(_) => mem::size_of::(), Struct(data) => data.fields.iter().fold(0usize, |acc, field_ty| { - acc + byte_size_of_ty(ctx, *field_ty) + acc + size_of_ty_data(ctx, *field_ty) }), } } @@ -54,12 +54,12 @@ pub fn gep( let cmpd_ty_data = ctx.with_ty_store(|s| s.resolve_compound(cmpd_ty.unwrap()).clone()); match cmpd_ty_data { CompoundTypeData::Array { elem, .. } => { - offset += index * byte_size_of_ty(ctx, elem); + offset += index * size_of_ty_data(ctx, elem); cmpd_ty = to_cmpd_ty(elem); } CompoundTypeData::Struct(data) => { for ty in &data.fields[..index] { - offset += byte_size_of_ty(ctx, *ty); + offset += size_of_ty_data(ctx, *ty); } cmpd_ty = to_cmpd_ty(data.fields[index]); }