From c979f42a9a5fc02f7f9c686ea92cce44cddc5fd0 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 29 Sep 2017 17:47:23 +0200 Subject: [PATCH 01/17] Values and types can now be copied and cloned. --- src/types/array_type.rs | 2 +- src/types/float_type.rs | 2 +- src/types/fn_type.rs | 2 +- src/types/int_type.rs | 2 +- src/types/mod.rs | 2 +- src/types/ptr_type.rs | 2 +- src/types/struct_type.rs | 2 +- src/types/vec_type.rs | 2 +- src/types/void_type.rs | 2 +- src/values/array_value.rs | 2 +- src/values/float_value.rs | 2 +- src/values/fn_value.rs | 2 +- src/values/int_value.rs | 2 +- src/values/metadata_value.rs | 2 +- src/values/mod.rs | 2 +- src/values/phi_value.rs | 2 +- src/values/ptr_value.rs | 2 +- src/values/struct_value.rs | 2 +- src/values/vec_value.rs | 2 +- 19 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/types/array_type.rs b/src/types/array_type.rs index 9c07da9f3d05e..f4dd734252373 100644 --- a/src/types/array_type.rs +++ b/src/types/array_type.rs @@ -8,7 +8,7 @@ use types::traits::AsTypeRef; use types::{Type, BasicType, PointerType, FunctionType}; use values::{BasicValue, ArrayValue, PointerValue, IntValue}; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct ArrayType { array_type: Type, } diff --git a/src/types/float_type.rs b/src/types/float_type.rs index a3e4b997936d2..9e7acabe6605f 100644 --- a/src/types/float_type.rs +++ b/src/types/float_type.rs @@ -9,7 +9,7 @@ use types::traits::AsTypeRef; use types::{Type, PointerType, FunctionType, BasicType, ArrayType, VectorType}; use values::{FloatValue, GenericValue, PointerValue, IntValue}; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct FloatType { float_type: Type, } diff --git a/src/types/fn_type.rs b/src/types/fn_type.rs index 6e9daa189c2db..dffab54f0b9ec 100644 --- a/src/types/fn_type.rs +++ b/src/types/fn_type.rs @@ -10,7 +10,7 @@ use types::traits::AsTypeRef; use types::{Type, BasicTypeEnum}; // use values::FunctionValue; -#[derive(PartialEq, Eq)] +#[derive(PartialEq, Eq, Clone, Copy)] pub struct FunctionType { fn_type: Type, } diff --git a/src/types/int_type.rs b/src/types/int_type.rs index 8d7097d9a4dd0..9b0bdf4ea0964 100644 --- a/src/types/int_type.rs +++ b/src/types/int_type.rs @@ -9,7 +9,7 @@ use types::traits::AsTypeRef; use types::{Type, ArrayType, BasicType, VectorType, PointerType, FunctionType}; use values::{GenericValue, IntValue, PointerValue}; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct IntType { int_type: Type, } diff --git a/src/types/mod.rs b/src/types/mod.rs index 8864a748f2b28..60419c4fb6ac3 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -34,7 +34,7 @@ use values::{IntValue, PointerValue}; // Worth noting that types seem to be singletons. At the very least, primitives are. // Though this is likely only true per thread since LLVM claims to not be very thread-safe. // REVIEW: Maybe move this into its own module? -#[derive(PartialEq, Eq)] +#[derive(PartialEq, Eq, Clone, Copy)] struct Type { type_: LLVMTypeRef, } diff --git a/src/types/ptr_type.rs b/src/types/ptr_type.rs index 30b8eab2183af..59c5980f02cc4 100644 --- a/src/types/ptr_type.rs +++ b/src/types/ptr_type.rs @@ -8,7 +8,7 @@ use types::traits::AsTypeRef; use types::{Type, BasicType, ArrayType, FunctionType, VectorType}; use values::{PointerValue, IntValue}; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct PointerType { ptr_type: Type, } diff --git a/src/types/struct_type.rs b/src/types/struct_type.rs index 22950fa85570c..39f2c040e8881 100644 --- a/src/types/struct_type.rs +++ b/src/types/struct_type.rs @@ -9,7 +9,7 @@ use types::traits::AsTypeRef; use types::{Type, BasicType, BasicTypeEnum, ArrayType, PointerType, FunctionType, VectorType}; use values::{BasicValue, StructValue, PointerValue, IntValue}; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct StructType { struct_type: Type, } diff --git a/src/types/vec_type.rs b/src/types/vec_type.rs index 0cb5e2c1f2f80..15379c42a97b5 100644 --- a/src/types/vec_type.rs +++ b/src/types/vec_type.rs @@ -9,7 +9,7 @@ use values::{BasicValue, PointerValue, VectorValue, IntValue}; // REVIEW: vec_type() is impl for IntType & FloatType. Need to // find out if it is valid for other types too. Maybe PointerType? -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct VectorType { vec_type: Type, } diff --git a/src/types/void_type.rs b/src/types/void_type.rs index 565f5e51adeff..5c6772cd62c92 100644 --- a/src/types/void_type.rs +++ b/src/types/void_type.rs @@ -8,7 +8,7 @@ use values::PointerValue; use std::ffi::CStr; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct VoidType { void_type: Type, } diff --git a/src/values/array_value.rs b/src/values/array_value.rs index d920411926a07..05ea9f06fbee0 100644 --- a/src/values/array_value.rs +++ b/src/values/array_value.rs @@ -8,7 +8,7 @@ use types::ArrayType; use values::traits::AsValueRef; use values::{Value, InstructionValue, MetadataValue}; -#[derive(PartialEq, Eq)] +#[derive(PartialEq, Eq, Clone, Copy)] pub struct ArrayValue { array_value: Value } diff --git a/src/values/float_value.rs b/src/values/float_value.rs index 4820fb788f28a..2fa07f2ea3570 100644 --- a/src/values/float_value.rs +++ b/src/values/float_value.rs @@ -8,7 +8,7 @@ use types::{AsTypeRef, FloatType, IntType}; use values::traits::AsValueRef; use values::{InstructionValue, IntValue, Value, MetadataValue}; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct FloatValue { float_value: Value } diff --git a/src/values/fn_value.rs b/src/values/fn_value.rs index 84a0234d7d747..f17b3d2b17145 100644 --- a/src/values/fn_value.rs +++ b/src/values/fn_value.rs @@ -12,7 +12,7 @@ use types::{BasicTypeEnum, FunctionType}; use values::traits::AsValueRef; use values::{BasicValueEnum, Value, MetadataValue}; -#[derive(PartialEq, Eq)] +#[derive(PartialEq, Eq, Clone, Copy)] pub struct FunctionValue { fn_value: Value, } diff --git a/src/values/int_value.rs b/src/values/int_value.rs index 06584c4b53044..7ed44cf472301 100644 --- a/src/values/int_value.rs +++ b/src/values/int_value.rs @@ -8,7 +8,7 @@ use types::{AsTypeRef, FloatType, PointerType, IntType}; use values::traits::AsValueRef; use values::{FloatValue, InstructionValue, PointerValue, Value, MetadataValue}; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct IntValue { int_value: Value, } diff --git a/src/values/metadata_value.rs b/src/values/metadata_value.rs index a931653b653d3..50a44d39a349b 100644 --- a/src/values/metadata_value.rs +++ b/src/values/metadata_value.rs @@ -11,7 +11,7 @@ use std::slice::from_raw_parts; pub const FIRST_CUSTOM_METADATA_KIND_ID: u32 = 14; // TODO: Varies by version -#[derive(PartialEq, Eq)] +#[derive(PartialEq, Eq, Clone, Copy)] pub struct MetadataValue { metadata_value: Value, } diff --git a/src/values/mod.rs b/src/values/mod.rs index ff29909df6f3c..251b31bdbe32c 100644 --- a/src/values/mod.rs +++ b/src/values/mod.rs @@ -33,7 +33,7 @@ use llvm_sys::prelude::{LLVMValueRef, LLVMTypeRef}; use std::ffi::{CString, CStr}; use std::fmt; -#[derive(PartialEq, Eq)] +#[derive(PartialEq, Eq, Clone, Copy)] struct Value { value: LLVMValueRef, } diff --git a/src/values/phi_value.rs b/src/values/phi_value.rs index 0643da2b8ea13..f961f7dce4745 100644 --- a/src/values/phi_value.rs +++ b/src/values/phi_value.rs @@ -8,7 +8,7 @@ use values::traits::AsValueRef; use values::{BasicValue, BasicValueEnum, InstructionValue, Value}; // REVIEW: Metadata for phi values? -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct PhiValue { phi_value: Value } diff --git a/src/values/ptr_value.rs b/src/values/ptr_value.rs index 6617215631925..40bdf7d83c0cd 100644 --- a/src/values/ptr_value.rs +++ b/src/values/ptr_value.rs @@ -6,7 +6,7 @@ use std::ffi::CStr; use types::{AsTypeRef, IntType, PointerType}; use values::{AsValueRef, InstructionValue, IntValue, Value, MetadataValue}; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct PointerValue { ptr_value: Value, } diff --git a/src/values/struct_value.rs b/src/values/struct_value.rs index 79735faeb895a..1ef14e83d0339 100644 --- a/src/values/struct_value.rs +++ b/src/values/struct_value.rs @@ -6,7 +6,7 @@ use types::StructType; use values::traits::AsValueRef; use values::{InstructionValue, Value, MetadataValue}; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct StructValue { struct_value: Value } diff --git a/src/values/vec_value.rs b/src/values/vec_value.rs index 939ea720c6742..f5d444d06cea3 100644 --- a/src/values/vec_value.rs +++ b/src/values/vec_value.rs @@ -7,7 +7,7 @@ use types::{VectorType}; use values::traits::AsValueRef; use values::{BasicValueEnum, BasicValue, InstructionValue, Value, IntValue, MetadataValue}; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct VectorValue { vec_value: Value, } From dbe5a40e78f688654b4d79089709a7224c897e34 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 29 Sep 2017 18:22:32 +0200 Subject: [PATCH 02/17] Documented instruction clone; added instruction copy --- src/values/instruction_value.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/values/instruction_value.rs b/src/values/instruction_value.rs index 5ef8659b60eab..5837ffeaa9d27 100644 --- a/src/values/instruction_value.rs +++ b/src/values/instruction_value.rs @@ -151,7 +151,7 @@ impl InstructionOpcode { } } -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Copy)] pub struct InstructionValue { instruction_value: Value, } @@ -231,6 +231,8 @@ impl InstructionValue { } impl Clone for InstructionValue { + /// Creates a clone of this `InstructionValue`, and returns it. + /// The clone will have no parent, and no name. fn clone(&self) -> Self { let value = unsafe { LLVMInstructionClone(self.as_value_ref()) From 26bb26a6a2516787224ca722c0d1dc8655292939 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 29 Sep 2017 18:23:11 +0200 Subject: [PATCH 03/17] Added tests for copying --- tests/test_types.rs | 9 +++++++++ tests/test_values.rs | 11 +++++++++++ 2 files changed, 20 insertions(+) diff --git a/tests/test_types.rs b/tests/test_types.rs index 9ffb789b1b31c..f0a7d80a01f6f 100644 --- a/tests/test_types.rs +++ b/tests/test_types.rs @@ -221,3 +221,12 @@ fn test_vec_type() { assert_eq!(vec_type.get_size(), 42); } + +#[test] +fn test_type_copies() { + let context = Context::create(); + let i8_type = context.i8_type(); + let i8_type_copy = i8_type; + + assert_eq!(i8_type, i8_type_copy); +} diff --git a/tests/test_values.rs b/tests/test_values.rs index e29f4bdef1d2b..c3f930c9ee539 100644 --- a/tests/test_values.rs +++ b/tests/test_values.rs @@ -683,3 +683,14 @@ fn test_int_from_string() { assert_eq!(i8_val.print_to_string(), &*CString::new("i8 -15").unwrap()); } + +#[test] +fn test_value_copies() { + let context = Context::create(); + let i8_type = context.i8_type(); + + let i8_value = i8_type.const_int(12, false); + let i8_value_copy = i8_value; + + assert_eq!(i8_value, i8_value_copy); +} \ No newline at end of file From ff031c0a9bcdb3e5e243b571b0ec7893571371a1 Mon Sep 17 00:00:00 2001 From: Gregoire Geis Date: Fri, 29 Sep 2017 18:32:27 +0200 Subject: [PATCH 04/17] Added tests for instruction cloning / copying. --- tests/test_values.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/test_values.rs b/tests/test_values.rs index c3f930c9ee539..7f7895a37c7b6 100644 --- a/tests/test_values.rs +++ b/tests/test_values.rs @@ -53,6 +53,17 @@ fn test_instructions() { assert_eq!(ptr.as_instruction().unwrap().get_opcode(), IntToPtr); assert_eq!(free_instruction.get_opcode(), Call); assert_eq!(return_instruction.get_opcode(), Return); + + // test instruction cloning + let instruction_clone = return_instruction.clone(); + + assert_eq!(instruction_clone.get_opcode(), return_instruction.get_opcode()); + assert_ne!(instruction_clone, return_instruction); + + // test copying + let instruction_clone_copy = instruction_clone; + + assert_eq!(instruction_clone, instruction_clone_copy); } #[test] @@ -693,4 +704,4 @@ fn test_value_copies() { let i8_value_copy = i8_value; assert_eq!(i8_value, i8_value_copy); -} \ No newline at end of file +} From 65cd8dc7586a29aa0bb0b69b8e97c88e34ed195b Mon Sep 17 00:00:00 2001 From: Gregoire Geis Date: Fri, 29 Sep 2017 23:13:02 +0200 Subject: [PATCH 05/17] Added Kaleidoscope example; not working yet. --- examples/kaleidoscope.rs | 610 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 610 insertions(+) create mode 100644 examples/kaleidoscope.rs diff --git a/examples/kaleidoscope.rs b/examples/kaleidoscope.rs new file mode 100644 index 0000000000000..77d9b026baa86 --- /dev/null +++ b/examples/kaleidoscope.rs @@ -0,0 +1,610 @@ +extern crate inkwell; + +use std::collections::HashMap; +use std::io; +use std::iter::Peekable; +use std::str::Chars; +use std::ops::DerefMut; + +use inkwell::builder::Builder; +use inkwell::types::FloatType; +use inkwell::values::{BasicValue, FloatValue, FunctionValue}; + +use Token::*; + + +// ====================================================================================== +// LEXER ================================================================================ +// ====================================================================================== + +/// Represents a primitive syntax token. +#[derive(Clone)] +pub enum Token { + EOF, + Comment, + LParen, + RParen, + Comma, + Def, + Extern, + Ident(String), + Number(f64), + Op(char) +} + +/// Defines an error encountered by the `Lexer`. +pub struct LexError { + pub error: &'static str, + pub index: usize +} + +impl LexError { + pub fn new(msg: &'static str) -> LexError { + LexError { error: msg, index: 0 } + } + + pub fn with_index(msg: &'static str, index: usize) -> LexError { + LexError { error: msg, index: index } + } +} + +/// Defines the result of a lexing operation; namely a +/// `Token` on success, or a `LexError` on failure. +pub type LexResult = Result; + +/// Defines a lexer which transforms an input `String` into +/// a `Token` stream. +pub struct Lexer<'a> { + input: &'a str, + chars: Box>>, + pos: usize +} + +impl<'a> Lexer<'a> { + /// Creates a new `Lexer`, given its source `input`. + pub fn new(input: &'a str) -> Lexer<'a> { + Lexer { input: input, chars: Box::new(input.chars().peekable()), pos: 0 } + } + + /// Lexes and returns the next `Token` from the source code. + pub fn lex(&mut self) -> LexResult { + let chars = self.chars.deref_mut(); + let src = self.input; + + let mut pos = self.pos; + + // Skip whitespaces + loop { + { + let ch = chars.peek(); + + if ch.is_none() { + self.pos = pos; + + return Ok(Token::EOF); + } + + if !ch.unwrap().is_whitespace() { + break; + } + } + + { + chars.next(); + pos += 1; + } + } + + let start = pos; + let next = chars.next(); + + if next.is_none() { + return Ok(Token::EOF); + } + + pos += 1; + + // Actually get the next token. + let result = match next.unwrap() { + '(' => Ok(Token::LParen), + ')' => Ok(Token::RParen), + ',' => Ok(Token::Comma), + + '#' => { + // Comment + loop { + let ch = chars.next(); + pos += 1; + + if ch == Some('\n') { + break; + } + } + + Ok(Token::Comment) + }, + + ch @ '.' | ch @ '0' ... '9' => { + // Parse number literal + loop { + let ch = *chars.peek(); + + if ch.is_none() { + return Ok(Token::EOF); + } + + let ch = ch.unwrap(); + + // Parse float. + if ch != '.' && !ch.is_digit(16) { + break; + } + + chars.next(); + pos += 1; + } + + Ok(Token::Number(src[start..pos].parse().unwrap())) + }, + + 'a' ... 'z' | 'A' ... 'Z' | '_' => { + // Parse identifier + loop { + let ch = chars.peek(); + + if ch.is_none() { + break; + } + + let ch = *ch.unwrap(); + + // A word-like identifier only contains underscores and alphanumeric characters. + if ch != '_' && !ch.is_alphanumeric() { + break; + } + + chars.next(); + pos += 1; + } + + match &src[start..pos] { + "def" => Ok(Token::Def), + "extern" => Ok(Token::Extern), + ident => Ok(Token::Ident(ident.to_string())) + } + }, + + op => { + // Parse operator + Ok(Token::Op(op)) + } + }; + + // Update stored position, and return + self.pos = pos; + + result + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Token; + + /// Lexes the next `Token` and returns it. + /// On EOF or failure, `None` will be returned. + fn next(&mut self) -> Option { + match self.lex() { + Ok(EOF) | Err(_) => None, + Ok(token) => Some(token) + } + } +} + + +// ====================================================================================== +// PARSER =============================================================================== +// ====================================================================================== + +/// Defines a primitive expression. +#[derive(Debug)] +pub enum Expr { + Number(f64), + Variable(String), + Binary(char, Box, Box), + Call(String, Vec) +} + +/// Defines the prototype (name and parameters) of a function. +#[derive(Debug)] +pub struct Prototype { + pub name: String, + pub args: Vec +} + +/// Defines a user-defined function. +#[derive(Debug)] +pub struct Function { + pub prototype: Prototype, + pub body: Expr +} + +/// Represents the `Expr` parser. +pub struct Parser { + tokens: Vec, + pos: usize, + prec: HashMap<&'static str, i32> +} + +impl Parser { + /// Creates a new parser, given an input `str` and a `HashMap` binding + /// an operator and its precedence in binary expressions. + pub fn new(input: String, op_precedence: HashMap<&'static str, i32>) -> Parser { + let mut lexer = Lexer::new(input.as_str()); + let tokens = lexer.by_ref().collect(); + + Parser { + tokens: tokens, + prec: op_precedence, + pos: 0 + } + } + + /// Parses the content of the parser. + pub fn parse(&mut self) -> Result { + match self.curr() { + Ident(ref id) if id == "def" => self.parse_def(), + Ident(ref id) if id == "extern" => self.parse_extern(), + _ => self.parse_toplevel_expr() + } + } + + /// Returns the current `Token`. + pub fn curr(&self) -> Token { + self.tokens[self.pos].clone() + } + + /// Returns the current `Token`, or `None` if the end of the input has been reached. + pub fn current(&self) -> Option { + if self.pos >= self.tokens.len() { + None + } else { + Some(self.tokens[self.pos].clone()) + } + } + + /// Advances the position, and returns whether or not a new + /// `Token` is available. + pub fn advance(&mut self) -> bool { + let npos = self.pos + 1; + + self.pos = npos; + + npos < self.tokens.len() + } + + /// Returns a value indicating whether or not the `Parser` + /// has reached the end of the input. + pub fn at_end(&self) -> bool { + self.pos >= self.tokens.len() + } + + /// Returns the precedence of the current `Token`, or 0 if it is not recognized as a binary operator. + fn get_tok_precedence(&self) -> i32 { + if let Some(Token::Ident(id)) = self.current() { + *self.prec.get(id.as_str()).unwrap_or(&100) + } else { + -1 + } + } + + /// Parses the prototype of a function, whether external or user-defined. + fn parse_prototype(&mut self) -> Result { + let id = match self.curr() { + Ident(id) => id, + _ => { return Err("Expected identifier in prototype declaration.") } + }; + + self.advance(); + + match self.curr() { + LParen => (), + _ => { return Err("Expected '(' character in prototype declaration.") } + } + + self.advance(); + + let mut args = vec![]; + + loop { + match self.curr() { + Ident(name) => args.push(name), + _ => { return Err("Expected identifier in parameter declaration.") } + } + + self.advance(); + + match self.curr() { + RParen => break, + Comma => (), + _ => { return Err("Expected ',' or ')' character in prototype declaration.") } + } + } + + Ok(Prototype { name: id, args: args }) + } + + /// Parses a user-defined function. + fn parse_def(&mut self) -> Result { + // Eat 'def' keyword + self.pos += 1; + + // Parse signature of function + let proto = self.parse_prototype(); + + if let Err(err) = proto { + return Err(err); + } + + // Parse body of function + let body = self.parse_expr(); + + if let Err(err) = body { + return Err(err); + } + + // Return new function + Ok(Function { prototype: proto.unwrap(), body: body.unwrap() }) + } + + /// Parses an external function declaration. + fn parse_extern(&mut self) -> Result { + // Eat 'extern' keyword + self.pos += 1; + + // Parse signature of extern function + let proto = self.parse_prototype(); + + if let Err(err) = proto { + return Err(err); + } + + // Return signature of extern function + Ok(Function { prototype: proto.unwrap(), body: Expr::Number(std::f64::NAN) }) + } + + /// Parses any expression. + fn parse_expr(&mut self) -> Result { + match self.parse_primary() { + Ok(left) => self.parse_binary_expr(0, left), + err => err + } + } + + /// Parses a literal number. + fn parse_nb_expr(&mut self) -> Result { + // Simply convert Token::Number to Expr::Number + match self.curr() { + Number(nb) => { + self.advance(); + Ok(Expr::Number(nb)) + }, + _ => Err("Expected number literal.") + } + } + + /// Parses an expression enclosed in parenthesis. + fn parse_paren_expr(&mut self) -> Result { + match self.curr() { + LParen => (), + _ => { return Err("Expected '(' character at start of parenthesized expression.") } + } + + self.advance(); + + let expr = self.parse_expr(); + + if expr.is_err() { + return expr; + } + + match self.curr() { + RParen => (), + _ => { return Err("Expected ')' character at end of parenthesized expression.") } + } + + self.advance(); + + Ok(expr.unwrap()) + } + + /// Parses an expression that starts with an identifier (either a variable or a function call). + fn parse_id_expr(&mut self) -> Result { + let id = match self.curr() { + Ident(id) => id, + _ => { return Err("Expected identifier."); } + }; + + self.advance(); + + match self.curr() { + LParen => { + let mut args = vec![]; + + loop { + self.advance(); + + match self.curr() { + RParen => break, + + _ => { + match self.parse_expr() { + Ok(expr) => args.push(expr), + err => { return err; } + } + + self.advance(); + + match self.curr() { + Comma => (), + _ => { return Err("Expected ',' character in function call."); } + } + } + } + } + + self.advance(); + + Ok(Expr::Call(id, args)) + }, + + _ => Ok(Expr::Variable(id)) + } + } + + /// Parses a binary expression, given its left-hand expression. + fn parse_binary_expr(&mut self, prec: i32, left: Expr) -> Result { + let mut left = left; + + loop { + let curr_prec = self.get_tok_precedence(); + + if curr_prec == -1 || curr_prec < prec { + return Ok(left); + } + + let op = match self.curr() { + Op(op) => op, + _ => { return Err("Invalid operator."); } + }; + + self.advance(); + + let mut right = self.parse_primary(); + + if right.is_err() { + return right; + } + + let next_prec = self.get_tok_precedence(); + + if curr_prec < next_prec { + right = self.parse_binary_expr(curr_prec + 1, right.unwrap()); + + if right.is_err() { + return right; + } + } + + left = Expr::Binary(op, Box::new(left), Box::new(right.unwrap())); + } + } + + /// Parses a primary expression (an identifier, a number or a parenthesized expression). + fn parse_primary(&mut self) -> Result { + match self.curr() { + Ident(_) => self.parse_id_expr(), + Number(_) => self.parse_nb_expr(), + LParen => self.parse_paren_expr(), + _ => Err("Unknown expression.") + } + } + + /// Parses a top-level expression and makes an anonymous function out of it, + /// for easier compilation. + fn parse_toplevel_expr(&mut self) -> Result { + match self.parse_expr() { + Ok(expr) => { + Ok(Function { + prototype: Prototype { name: "anonymous".to_string(), args: vec![] }, + body: expr + }) + }, + + Err(err) => Err(err) + } + } +} + + +// ====================================================================================== +// COMPILER ============================================================================= +// ====================================================================================== + +/// Defines the `Expr` compiler. +pub struct Compiler { + pub variables: HashMap, + pub functions: HashMap, + pub builder: Builder +} + +impl Compiler { + /// Compiles the specified `Expr` into a LLVM `FloatValue`. + pub fn compile(&self, expr: &Expr) -> Result { + match expr { + &Expr::Number(nb) => Ok(FloatType::f64_type().const_float(nb)), + &Expr::Variable(ref name) => { + match self.variables.get(name.as_str()) { + Some(var) => Ok(*var), + None => Err("Could not find a matching variable.") + } + }, + + &Expr::Binary(op, ref left, ref right) => { + let lhs = self.compile(&left)?; + let rhs = self.compile(&right)?; + + match op { + '+' => Ok(self.builder.build_float_add(&lhs, &rhs, "tmp")), + '-' => Ok(self.builder.build_float_sub(&lhs, &rhs, "tmp")), + '*' => Ok(self.builder.build_float_mul(&lhs, &rhs, "tmp")), + _ => Err("Unimplemented operator.") + } + }, + + &Expr::Call(ref name, ref args) => { + match self.functions.get(name.as_str()) { + Some(fun) => { + let args: Vec = args.iter().map(|expr| self.compile(expr).unwrap()).collect(); + let mut argsv: Vec<&BasicValue> = args.iter().by_ref().map(|val| val as &BasicValue).collect(); + + match self.builder.build_call(&fun, argsv.as_slice(), "tmp", false).left() { + Some(value) => Ok(value.into_float_value()), + None => Err("Invalid call produced.") + } + }, + None => Err("Unknown function.") + } + } + } + } +} + + +// ====================================================================================== +// PROGRAM ============================================================================== +// ====================================================================================== + +/// Entry point of the program; acts as a REPL. +pub fn main() { + loop { + println!("> "); + + // Read input from stdin + let mut input = String::new(); + io::stdin().read_line(&mut input).expect("Could not read from standard input."); + + // Build precedence map + let mut prec = HashMap::with_capacity(4); + + prec.insert("<", 10); + prec.insert("+", 20); + prec.insert("-", 20); + prec.insert("*", 40); + + // Parse input + match Parser::new(input, prec).parse() { + Ok(expr) => println!("Expression parsed: {:?}", expr), + Err(err) => println!("Error parsing expression: {}", err) + } + } +} From 60474f32e6319a5c97b9ce9738a076618354cf87 Mon Sep 17 00:00:00 2001 From: Gregoire Geis Date: Sun, 1 Oct 2017 17:15:41 +0200 Subject: [PATCH 06/17] Chapter 6 of kaleidoscope: DONE --- examples/kaleidoscope.rs | 829 ++++++++++++++++++++++++++++++++------- 1 file changed, 690 insertions(+), 139 deletions(-) diff --git a/examples/kaleidoscope.rs b/examples/kaleidoscope.rs index 77d9b026baa86..3f49a7eeff0ed 100644 --- a/examples/kaleidoscope.rs +++ b/examples/kaleidoscope.rs @@ -1,32 +1,48 @@ +#![feature(box_syntax)] +//! This is an example of the [Kaleidoscope tutorial](https://llvm.org/docs/tutorial/) +//! made in Rust, using Inkwell. +//! Currently, all features up to the [4th chapter](https://llvm.org/docs/tutorial/LangImpl04.html) +//! are available. + extern crate inkwell; use std::collections::HashMap; -use std::io; +use std::io::{self, Write}; use std::iter::Peekable; use std::str::Chars; use std::ops::DerefMut; +use inkwell::FloatPredicate; use inkwell::builder::Builder; -use inkwell::types::FloatType; -use inkwell::values::{BasicValue, FloatValue, FunctionValue}; +use inkwell::context::Context; +use inkwell::module::Module; +use inkwell::passes::PassManager; +use inkwell::targets::{InitializationConfig, Target}; +use inkwell::types::BasicType; +use inkwell::values::{BasicValue, FloatValue, FunctionValue, PhiValue}; use Token::*; +const ANONYMOUS_FUNCTION_NAME: &str = "anonymous"; + // ====================================================================================== // LEXER ================================================================================ // ====================================================================================== /// Represents a primitive syntax token. -#[derive(Clone)] +#[derive(Debug, Clone)] pub enum Token { EOF, Comment, LParen, RParen, Comma, - Def, - Extern, + Def, Extern, + If, Then, Else, + For, In, + Unary, Binary, + Ident(String), Number(f64), Op(char) @@ -127,13 +143,10 @@ impl<'a> Lexer<'a> { ch @ '.' | ch @ '0' ... '9' => { // Parse number literal loop { - let ch = *chars.peek(); - - if ch.is_none() { - return Ok(Token::EOF); - } - - let ch = ch.unwrap(); + let ch = match chars.peek() { + Some(ch) => *ch, + None => { return Ok(Token::EOF); } + }; // Parse float. if ch != '.' && !ch.is_digit(16) { @@ -150,13 +163,10 @@ impl<'a> Lexer<'a> { 'a' ... 'z' | 'A' ... 'Z' | '_' => { // Parse identifier loop { - let ch = chars.peek(); - - if ch.is_none() { - break; - } - - let ch = *ch.unwrap(); + let ch = match chars.peek() { + Some(ch) => *ch, + None => { return Ok(Token::EOF); } + }; // A word-like identifier only contains underscores and alphanumeric characters. if ch != '_' && !ch.is_alphanumeric() { @@ -170,6 +180,14 @@ impl<'a> Lexer<'a> { match &src[start..pos] { "def" => Ok(Token::Def), "extern" => Ok(Token::Extern), + "if" => Ok(Token::If), + "then" => Ok(Token::Then), + "else" => Ok(Token::Else), + "for" => Ok(Token::For), + "in" => Ok(Token::In), + "unary" => Ok(Token::Unary), + "binary" => Ok(Token::Binary), + ident => Ok(Token::Ident(ident.to_string())) } }, @@ -210,35 +228,61 @@ impl<'a> Iterator for Lexer<'a> { pub enum Expr { Number(f64), Variable(String), - Binary(char, Box, Box), - Call(String, Vec) + + Binary { + op: char, + left: Box, + right: Box + }, + + Call { + fn_name: String, + args: Vec + }, + + Conditional { + cond: Box, + consequence: Box, + alternative: Box + }, + + For { + var_name: String, + start: Box, + end: Box, + step: Option>, + body: Box + } } /// Defines the prototype (name and parameters) of a function. #[derive(Debug)] pub struct Prototype { pub name: String, - pub args: Vec + pub args: Vec, + pub is_op: bool, + pub prec: usize } /// Defines a user-defined function. #[derive(Debug)] pub struct Function { pub prototype: Prototype, - pub body: Expr + pub body: Expr, + pub is_anon: bool } /// Represents the `Expr` parser. -pub struct Parser { +pub struct Parser<'a> { tokens: Vec, pos: usize, - prec: HashMap<&'static str, i32> + prec: &'a mut HashMap } -impl Parser { +impl<'a> Parser<'a> { /// Creates a new parser, given an input `str` and a `HashMap` binding /// an operator and its precedence in binary expressions. - pub fn new(input: String, op_precedence: HashMap<&'static str, i32>) -> Parser { + pub fn new(input: String, op_precedence: &'a mut HashMap) -> Self { let mut lexer = Lexer::new(input.as_str()); let tokens = lexer.by_ref().collect(); @@ -252,46 +296,52 @@ impl Parser { /// Parses the content of the parser. pub fn parse(&mut self) -> Result { match self.curr() { - Ident(ref id) if id == "def" => self.parse_def(), - Ident(ref id) if id == "extern" => self.parse_extern(), + Def => self.parse_def(), + Extern => self.parse_extern(), _ => self.parse_toplevel_expr() } } - /// Returns the current `Token`. + /// Returns the current `Token`, without performing safety checks beforehand. pub fn curr(&self) -> Token { self.tokens[self.pos].clone() } - /// Returns the current `Token`, or `None` if the end of the input has been reached. - pub fn current(&self) -> Option { + /// Returns the current `Token`, or an error that + /// indicates that the end of the file has been unexpectedly reached if it is the case. + pub fn current(&self) -> Result { if self.pos >= self.tokens.len() { - None + Err("Unexpected end of file.") } else { - Some(self.tokens[self.pos].clone()) + Ok(self.tokens[self.pos].clone()) } } - /// Advances the position, and returns whether or not a new - /// `Token` is available. - pub fn advance(&mut self) -> bool { + /// Advances the position, and returns an empty `Result` whose error + /// indicates that the end of the file has been unexpectedly reached. + /// This allows to use the `self.advance()?;` syntax. + pub fn advance(&mut self) -> Result<(), &'static str> { let npos = self.pos + 1; self.pos = npos; - npos < self.tokens.len() + if npos < self.tokens.len() { + Ok(()) + } else { + Err("Unexpected end of file.") + } } /// Returns a value indicating whether or not the `Parser` /// has reached the end of the input. pub fn at_end(&self) -> bool { - self.pos >= self.tokens.len() + self.pos > self.tokens.len() - 2 } /// Returns the precedence of the current `Token`, or 0 if it is not recognized as a binary operator. fn get_tok_precedence(&self) -> i32 { - if let Some(Token::Ident(id)) = self.current() { - *self.prec.get(id.as_str()).unwrap_or(&100) + if let Ok(Op(op)) = self.current() { + *self.prec.get(&op).unwrap_or(&100) } else { -1 } @@ -299,19 +349,71 @@ impl Parser { /// Parses the prototype of a function, whether external or user-defined. fn parse_prototype(&mut self) -> Result { - let id = match self.curr() { - Ident(id) => id, + let (id, is_operator, precedence) = match self.curr() { + Ident(id) => { + self.advance()?; + + (id, false, 0) + }, + + Binary => { + self.advance()?; + + let op = match self.curr() { + Op(ch) => ch, + _ => { return Err("Expected operator in custom operator declaration."); } + }; + + self.advance()?; + + let mut name = String::from("binary"); + + name.push(op); + + let prec = if let Number(prec) = self.curr() { + self.advance()?; + + prec as usize + } else { + 0 + }; + + self.prec.insert(op, prec as i32); + + (name, true, prec) + }, + + Unary => { + self.advance()?; + + let op = match self.curr() { + Op(ch) => ch, + _ => { return Err("Expected operator in custom operator declaration."); } + }; + + let mut name = String::from("unary"); + + name.push(op); + + self.advance()?; + + (name, true, 0) + }, + _ => { return Err("Expected identifier in prototype declaration.") } }; - self.advance(); - match self.curr() { LParen => (), _ => { return Err("Expected '(' character in prototype declaration.") } } - self.advance(); + self.advance()?; + + if let RParen = self.curr() { + self.advance(); + return Ok(Prototype { name: id, args: vec![], is_op: is_operator, prec: precedence }); + } let mut args = vec![]; @@ -321,16 +423,21 @@ impl Parser { _ => { return Err("Expected identifier in parameter declaration.") } } - self.advance(); + self.advance()?; match self.curr() { - RParen => break, - Comma => (), + RParen => { + self.advance(); + break; + }, + Comma => { + self.advance(); + }, _ => { return Err("Expected ',' or ')' character in prototype declaration.") } } } - Ok(Prototype { name: id, args: args }) + Ok(Prototype { name: id, args: args, is_op: is_operator, prec: precedence }) } /// Parses a user-defined function. @@ -339,21 +446,13 @@ impl Parser { self.pos += 1; // Parse signature of function - let proto = self.parse_prototype(); - - if let Err(err) = proto { - return Err(err); - } + let proto = self.parse_prototype()?; // Parse body of function - let body = self.parse_expr(); - - if let Err(err) = body { - return Err(err); - } + let body = self.parse_expr()?; // Return new function - Ok(Function { prototype: proto.unwrap(), body: body.unwrap() }) + Ok(Function { prototype: proto, body: body, is_anon: false }) } /// Parses an external function declaration. @@ -362,19 +461,15 @@ impl Parser { self.pos += 1; // Parse signature of extern function - let proto = self.parse_prototype(); - - if let Err(err) = proto { - return Err(err); - } + let proto = self.parse_prototype()?; // Return signature of extern function - Ok(Function { prototype: proto.unwrap(), body: Expr::Number(std::f64::NAN) }) + Ok(Function { prototype: proto, body: Expr::Number(std::f64::NAN), is_anon: false }) } /// Parses any expression. fn parse_expr(&mut self) -> Result { - match self.parse_primary() { + match self.parse_unary_expr() { Ok(left) => self.parse_binary_expr(0, left), err => err } @@ -394,27 +489,23 @@ impl Parser { /// Parses an expression enclosed in parenthesis. fn parse_paren_expr(&mut self) -> Result { - match self.curr() { + match self.current()? { LParen => (), _ => { return Err("Expected '(' character at start of parenthesized expression.") } } - self.advance(); - - let expr = self.parse_expr(); + self.advance()?; - if expr.is_err() { - return expr; - } + let expr = self.parse_expr()?; - match self.curr() { + match self.current()? { RParen => (), _ => { return Err("Expected ')' character at end of parenthesized expression.") } } self.advance(); - Ok(expr.unwrap()) + Ok(expr) } /// Parses an expression that starts with an identifier (either a variable or a function call). @@ -424,43 +515,58 @@ impl Parser { _ => { return Err("Expected identifier."); } }; - self.advance(); + if let Err(_) = self.advance() { + return Ok(Expr::Variable(id)); + } match self.curr() { LParen => { + self.advance()?; + + if let RParen = self.curr() { + return Ok(Expr::Call { fn_name: id, args: vec![] }); + } + let mut args = vec![]; loop { - self.advance(); + args.push(self.parse_expr()?); - match self.curr() { + match self.current()? { + Comma => (), RParen => break, - - _ => { - match self.parse_expr() { - Ok(expr) => args.push(expr), - err => { return err; } - } - - self.advance(); - - match self.curr() { - Comma => (), - _ => { return Err("Expected ',' character in function call."); } - } - } + _ => { return Err("Expected ',' character in function call."); } } + + self.advance()?; } self.advance(); - Ok(Expr::Call(id, args)) + Ok(Expr::Call { fn_name: id, args: args }) }, _ => Ok(Expr::Variable(id)) } } + /// Parses an unary expression. + fn parse_unary_expr(&mut self) -> Result { + let op = match self.current()? { + Op(ch) => { + self.advance()?; + ch + }, + _ => { return self.parse_primary(); } + }; + + let mut name = String::from("unary"); + + name.push(op); + + Ok(Expr::Call { fn_name: name, args: vec![ self.parse_unary_expr()? ] }) + } + /// Parses a binary expression, given its left-hand expression. fn parse_binary_expr(&mut self, prec: i32, left: Expr) -> Result { let mut left = left; @@ -468,7 +574,7 @@ impl Parser { loop { let curr_prec = self.get_tok_precedence(); - if curr_prec == -1 || curr_prec < prec { + if curr_prec < prec || self.at_end() { return Ok(left); } @@ -477,26 +583,109 @@ impl Parser { _ => { return Err("Invalid operator."); } }; - self.advance(); - - let mut right = self.parse_primary(); + self.advance()?; - if right.is_err() { - return right; - } + let mut right = self.parse_unary_expr()?; let next_prec = self.get_tok_precedence(); if curr_prec < next_prec { - right = self.parse_binary_expr(curr_prec + 1, right.unwrap()); - - if right.is_err() { - return right; - } + right = self.parse_binary_expr(curr_prec + 1, right)?; } - left = Expr::Binary(op, Box::new(left), Box::new(right.unwrap())); + left = Expr::Binary { + op: op, + left: Box::new(left), + right: Box::new(right) + }; + } + } + + /// Parses a conditional if..then..else expression. + fn parse_conditional_expr(&mut self) -> Result { + // eat 'if' token + self.advance()?; + + let cond = self.parse_expr()?; + + // eat 'then' token + match self.current() { + Ok(Then) => { self.advance()? }, + _ => { return Err("Expected 'then' keyword."); } + } + + let then = self.parse_expr()?; + + // eat 'else' token + match self.current() { + Ok(Else) => { self.advance()? }, + _ => { return Err("Expected 'else' keyword."); } + } + + let otherwise = self.parse_expr()?; + + Ok(Expr::Conditional { + cond: Box::new(cond), + consequence: Box::new(then), + alternative: Box::new(otherwise) + }) + } + + /// Parses a loop for..in.. expression. + fn parse_for_expr(&mut self) -> Result { + // eat 'for' token + self.advance()?; + + let name = match self.curr() { + Ident(n) => n, + _ => { return Err("Expected identifier in for loop."); } + }; + + // eat identifier + self.advance()?; + + // eat '=' token + match self.curr() { + Op('=') => { self.advance()?; }, + _ => { return Err("Expected '=' character in for loop."); } + } + + let start = self.parse_expr()?; + + // eat ',' token + match self.current()? { + Comma => { self.advance()?; }, + _ => { return Err("Expected ',' character in for loop."); } + } + + let end = self.parse_expr()?; + + // parse (optional) step expression + let step = match self.current()? { + Comma => { + self.advance()?; + + Some(self.parse_expr()?) + }, + + _ => None + }; + + // eat 'in' token + match self.current()? { + In => { self.advance()?; }, + _ => { return Err("Expected 'in' keyword in for loop."); } } + + let body = self.parse_expr()?; + + Ok(Expr::For { + var_name: name, + start: Box::new(start), + end: Box::new(end), + step: step.map(Box::new), + body: Box::new(body) + }) } /// Parses a primary expression (an identifier, a number or a parenthesized expression). @@ -505,6 +694,8 @@ impl Parser { Ident(_) => self.parse_id_expr(), Number(_) => self.parse_nb_expr(), LParen => self.parse_paren_expr(), + If => self.parse_conditional_expr(), + For => self.parse_for_expr(), _ => Err("Unknown expression.") } } @@ -515,8 +706,9 @@ impl Parser { match self.parse_expr() { Ok(expr) => { Ok(Function { - prototype: Prototype { name: "anonymous".to_string(), args: vec![] }, - body: expr + prototype: Prototype { name: ANONYMOUS_FUNCTION_NAME.to_string(), args: vec![], is_op: false, prec: 0 }, + body: expr, + is_anon: true }) }, @@ -531,17 +723,32 @@ impl Parser { // ====================================================================================== /// Defines the `Expr` compiler. -pub struct Compiler { - pub variables: HashMap, - pub functions: HashMap, - pub builder: Builder +pub struct Compiler<'a> { + pub context: &'a Context, + pub builder: &'a Builder, + pub fpm: &'a PassManager, + pub module: &'a Module, + pub function: &'a Function, + pub fn_value: FunctionValue, + pub variables: HashMap } -impl Compiler { - /// Compiles the specified `Expr` into a LLVM `FloatValue`. - pub fn compile(&self, expr: &Expr) -> Result { +fn to_float_value(phi: PhiValue) -> FloatValue { + unsafe { std::mem::transmute(phi) } +} + +impl<'a> Compiler<'a> { + /// Gets a defined function given its name. + pub fn get_function(&self, name: &str) -> Option { + self.module.get_function(name) + } + + /// Compiles the specified `Expr` into an LLVM `BasicValue`. + /// Note: This method currently returns a `Box`, since it returns either `PhiValue` or `FloatValue`. + pub fn compile_expr(&mut self, expr: &Expr) -> Result { match expr { - &Expr::Number(nb) => Ok(FloatType::f64_type().const_float(nb)), + &Expr::Number(nb) => Ok(self.context.f64_type().const_float(nb)), + &Expr::Variable(ref name) => { match self.variables.get(name.as_str()) { Some(var) => Ok(*var), @@ -549,23 +756,56 @@ impl Compiler { } }, - &Expr::Binary(op, ref left, ref right) => { - let lhs = self.compile(&left)?; - let rhs = self.compile(&right)?; + &Expr::Binary { op, ref left, ref right } => { + let lhs = self.compile_expr(&left)?; + let rhs = self.compile_expr(&right)?; match op { - '+' => Ok(self.builder.build_float_add(&lhs, &rhs, "tmp")), - '-' => Ok(self.builder.build_float_sub(&lhs, &rhs, "tmp")), - '*' => Ok(self.builder.build_float_mul(&lhs, &rhs, "tmp")), - _ => Err("Unimplemented operator.") + '+' => Ok(self.builder.build_float_add(&lhs, &rhs, "tmpadd")), + '-' => Ok(self.builder.build_float_sub(&lhs, &rhs, "tmpsub")), + '*' => Ok(self.builder.build_float_mul(&lhs, &rhs, "tmpmul")), + '/' => Ok(self.builder.build_float_div(&lhs, &rhs, "tmpdiv")), + + '<' => Ok({ + let cmp = self.builder.build_float_compare(&FloatPredicate::ULT, &lhs, &rhs, "tmpcmp"); + + self.builder.build_unsigned_int_to_float(&cmp, &self.context.f64_type(), "tmpbool") + }), + '>' => Ok({ + let cmp = self.builder.build_float_compare(&FloatPredicate::ULT, &rhs, &lhs, "tmpcmp"); + + self.builder.build_unsigned_int_to_float(&cmp, &self.context.f64_type(), "tmpbool") + }), + + custom => { + let mut name = String::from("binary"); + + name.push(custom); + + match self.get_function(name.as_str()) { + Some(fun) => { + match self.builder.build_call(&fun, &[ &lhs, &rhs ], "tmpbin", false).left() { + Some(value) => Ok(value.into_float_value()), + None => Err("Invalid call produced.") + } + }, + + None => Err("Undefined binary operator.") + } + } } }, - &Expr::Call(ref name, ref args) => { - match self.functions.get(name.as_str()) { + &Expr::Call { ref fn_name, ref args } => { + match self.get_function(fn_name.as_str()) { Some(fun) => { - let args: Vec = args.iter().map(|expr| self.compile(expr).unwrap()).collect(); - let mut argsv: Vec<&BasicValue> = args.iter().by_ref().map(|val| val as &BasicValue).collect(); + let mut compiled_args = Vec::with_capacity(args.len()); + + for arg in args { + compiled_args.push(self.compile_expr(arg)?); + } + + let argsv: Vec<&BasicValue> = compiled_args.iter().by_ref().map(|val| val as &BasicValue).collect(); match self.builder.build_call(&fun, argsv.as_slice(), "tmp", false).left() { Some(value) => Ok(value.into_float_value()), @@ -574,9 +814,183 @@ impl Compiler { }, None => Err("Unknown function.") } + }, + + &Expr::Conditional { ref cond, ref consequence, ref alternative } => { + let parent = self.fn_value; + let zero_const = self.context.f64_type().const_float(0.0); + + // create condition by comparing without 0.0 and returning an int + let cond = self.compile_expr(&cond)?; + let cond = self.builder.build_float_compare(&FloatPredicate::ONE, &cond, &zero_const, "ifcond"); + + // build branch + let then_bb = self.context.append_basic_block(&parent, "then"); + let else_bb = self.context.append_basic_block(&parent, "else"); + let cont_bb = self.context.append_basic_block(&parent, "ifcont"); + + self.builder.build_conditional_branch(&cond, &then_bb, &else_bb); + + // build then block + self.builder.position_at_end(&then_bb); + let then_val = self.compile_expr(&consequence)?; + self.builder.build_unconditional_branch(&cont_bb); + + let then_bb = self.builder.get_insert_block().unwrap(); + + // build else block + self.builder.position_at_end(&else_bb); + let else_val = self.compile_expr(&alternative)?; + self.builder.build_unconditional_branch(&cont_bb); + + let else_bb = self.builder.get_insert_block().unwrap(); + + // emit merge block + self.builder.position_at_end(&cont_bb); + + let phi = self.builder.build_phi(&self.context.f64_type(), "iftmp"); + + phi.add_incoming(&[ + (&then_val, &then_bb), + (&else_val, &else_bb) + ]); + + Ok(to_float_value(phi)) + }, + + &Expr::For { ref var_name, ref start, ref end, ref step, ref body } => { + let parent = self.fn_value; + let zero_const = self.context.f64_type().const_float(0.0); + + let start = self.compile_expr(&start)?; + let preheader_bb = self.builder.get_insert_block().unwrap(); + let loop_bb = self.context.append_basic_block(&parent, "loop"); + + // go from current block to loop block + self.builder.build_unconditional_branch(&loop_bb); + + self.builder.position_at_end(&loop_bb); + + let variable = self.builder.build_phi(&self.context.f64_type(), var_name); + + variable.add_incoming(&[ + (&start, &preheader_bb) + ]); + + let old_val = match self.variables.get(var_name.as_str()) { + Some(val) => Some(*val), + None => None + }; + + self.variables.insert(var_name.to_owned(), to_float_value(variable)); + + self.compile_expr(&body)?; + + let step = match step { + &Some(ref step) => self.compile_expr(&step)?, + &None => self.context.f64_type().const_float(1.0) + }; + + let variable_f = to_float_value(variable); + let next_var = self.builder.build_float_add(&variable_f, &step, "nextvar"); + + let end_cond = self.compile_expr(&end)?; + let end_cond = self.builder.build_float_compare(&FloatPredicate::ONE, &end_cond, &zero_const, "loopcond"); + + let loop_end_bb = self.builder.get_insert_block().unwrap(); + let after_bb = self.context.append_basic_block(&parent, "afterloop"); + + self.builder.build_conditional_branch(&end_cond, &loop_bb, &after_bb); + self.builder.position_at_end(&after_bb); + + variable.add_incoming(&[ + (&next_var, &loop_end_bb) + ]); + + self.variables.remove(var_name); + + if let Some(val) = old_val { + self.variables.insert(var_name.to_owned(), val); + } + + Ok(zero_const) } } } + + /// Compiles the specified `Prototype` into an extern LLVM `FunctionValue`. + fn compile_prototype(&self, proto: &Prototype) -> Result { + let ret_type: &BasicType = &self.context.f64_type(); + let args_types = std::iter::repeat(ret_type).take(proto.args.len()).collect::>(); + let args_types = args_types.as_slice(); + + let fn_type = self.context.f64_type().fn_type(args_types, false); + let fn_val = self.module.add_function(proto.name.as_str(), &fn_type, None); + + // set arguments names + for (i, arg) in fn_val.params().enumerate() { + arg.into_float_value().set_name(proto.args[i].as_str()); + } + + // finally return built prototype + Ok(fn_val) + } + + /// Compiles the specified `Function` into an LLVM `FunctionValue`. + fn compile_fn(&mut self) -> Result { + let proto = &self.function.prototype; + let function = self.compile_prototype(proto)?; + let entry = self.context.append_basic_block(&function, "entry"); + + self.builder.position_at_end(&entry); + + // build variables map + { + // this is in its own scope to borrow as shortly as possible + let variables = &mut self.variables; + + variables.reserve(proto.args.len()); + + for (i, arg) in function.params().enumerate() { + variables.insert(proto.args[i].clone(), arg.into_float_value()); + } + } + + // update fields + self.fn_value = function; + + // compile body + let body = self.compile_expr(&self.function.body)?; + + self.builder.build_return(Some(&body)); + + // return the whole thing after verification and optimization + if function.verify(true) { + self.fpm.run_on_function(&function); + + Ok(function) + } else { + unsafe { + function.delete(); + } + + Err("Invalid generated function.") + } + } + + pub fn compile(context: &'a Context, builder: &'a Builder, pass_manager: &'a PassManager, module: &'a Module, function: &Function) -> Result { + let mut compiler = Compiler { + context: context, + builder: builder, + fpm: pass_manager, + module: module, + function: function, + fn_value: unsafe { std::mem::transmute(0usize) }, + variables: HashMap::new() + }; + + compiler.compile_fn() + } } @@ -584,27 +998,164 @@ impl Compiler { // PROGRAM ============================================================================== // ====================================================================================== + +// macro used to print & flush without printing a new line +macro_rules! print_flush { + ( $( $x:expr ),* ) => { + print!( $($x, )* ); + + std::io::stdout().flush().expect("Could not flush to standard output."); + }; +} + +pub extern fn putchard(x: f64) -> f64 { + print_flush!("{}", x as u8 as char); + x +} + +pub extern fn printd(x: f64) -> f64 { + println!("Fn called"); + println!("{}", x); + x +} + /// Entry point of the program; acts as a REPL. pub fn main() { + // use self::inkwell::support::add_symbol; + + let show_lexed_input = true; + let show_parsed_input = false; + let show_compiled_input = true; + + Target::initialize_native(&InitializationConfig::default()).expect("Failed to initialize native target."); + + // add_symbol("printd", &printd, printd as *const ()); + // add_symbol("putchard", &putchard, putchard as *const ()); + + let context = Context::create(); + let module = context.create_module("repl"); + let builder = context.create_builder(); + + // Create FPM + let fpm = PassManager::create_for_function(&module); + + fpm.add_instruction_combining_pass(); + fpm.add_reassociate_pass(); + fpm.add_gvn_pass(); + fpm.add_cfg_simplification_pass(); + fpm.add_basic_alias_analysis_pass(); + + fpm.initialize(); + + let mut previous_exprs = Vec::new(); + loop { - println!("> "); + print_flush!("> "); // Read input from stdin let mut input = String::new(); io::stdin().read_line(&mut input).expect("Could not read from standard input."); + if input.starts_with("exit") { + break; + } + // Build precedence map let mut prec = HashMap::with_capacity(4); - prec.insert("<", 10); - prec.insert("+", 20); - prec.insert("-", 20); - prec.insert("*", 40); + prec.insert('<', 10); + prec.insert('+', 20); + prec.insert('-', 20); + prec.insert('*', 40); + prec.insert('/', 40); + + // Parse and (optionally) display input + if show_lexed_input { + println!("Attempting to parse lexed input: {:?}", Lexer::new(input.as_str()).collect::>()); + } + + // make module + let module = context.create_module("tmp"); + let mut printd_fn = None; + let mut putchard_fn = None; + + for prev in previous_exprs.iter() { + let fun = Compiler::compile(&context, &builder, &fpm, &module, prev).expect("Cannot re-add previously compiled function."); + + match fun.get_name().to_str().unwrap() { + "printd" => { + println!("Setting printd to {:?}", fun); + printd_fn = Some(fun); + }, + + "putchard" => { + putchard_fn = Some(fun); + }, + + _ => () + } + } + + let (name, is_anonymous) = match Parser::new(input, &mut prec).parse() { + Ok(fun) => { + if show_parsed_input { + println!("Expression parsed: {:?}", fun); + } + + match Compiler::compile(&context, &builder, &fpm, &module, &fun) { + Ok(function) => { + if show_compiled_input { + // Not printing a new line since LLVM automatically + // prefixes the generated string with one + print_flush!("Expression compiled to IR:"); + function.print_to_stderr(); + } + + let fn_name = function.get_name().to_str().unwrap(); + + if fn_name.starts_with(ANONYMOUS_FUNCTION_NAME) { + (fn_name.to_string(), true) + } else { + previous_exprs.push(fun); + + (fn_name.to_string(), false) + } + }, + Err(err) => { + println!("Error compiling function: {}", err); + continue; + } + } + }, + Err(err) => { + println!("Error parsing expression: {}", err); + continue; + } + }; + + if is_anonymous { + let mut ee = module.create_jit_execution_engine(0).unwrap(); + + if let Some(fun) = printd_fn { + println!("Setting global mapping for {:p} {:p} {:p}", &printd, &mut printd, *printd); + ee.add_global_mapping(&fun, unsafe { std::mem::transmute(&mut printd) }); + } + + if let Some(fun) = putchard_fn { + ee.add_global_mapping(&fun, unsafe { std::mem::transmute(&putchard) }); + } + + let addr = match ee.get_function_address(name.as_str()) { + Ok(addr) => addr, + Err(err) => { + println!("Error during execution: {:?}", err); + continue; + } + }; + + let compiled_fn: extern "C" fn() -> f64 = unsafe { std::mem::transmute(addr) }; - // Parse input - match Parser::new(input, prec).parse() { - Ok(expr) => println!("Expression parsed: {:?}", expr), - Err(err) => println!("Error parsing expression: {}", err) + println!("Result: {}", compiled_fn()); } } } From 8190a7909ac39cb33e7406abd85579538d7ec666 Mon Sep 17 00:00:00 2001 From: Gregoire Geis Date: Sun, 1 Oct 2017 17:15:59 +0200 Subject: [PATCH 07/17] Added add_global_mapping function to EE --- src/execution_engine.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/execution_engine.rs b/src/execution_engine.rs index 1072fea325c26..d3556bc702121 100644 --- a/src/execution_engine.rs +++ b/src/execution_engine.rs @@ -1,9 +1,9 @@ use llvm_sys::core::LLVMDisposeMessage; -use llvm_sys::execution_engine::{LLVMGetExecutionEngineTargetData, LLVMExecutionEngineRef, LLVMRunFunction, LLVMRunFunctionAsMain, LLVMDisposeExecutionEngine, LLVMGetFunctionAddress, LLVMAddModule, LLVMFindFunction, LLVMLinkInMCJIT, LLVMLinkInInterpreter, LLVMRemoveModule, LLVMGenericValueRef}; +use llvm_sys::execution_engine::{LLVMGetExecutionEngineTargetData, LLVMExecutionEngineRef, LLVMAddGlobalMapping, LLVMRunFunction, LLVMRunFunctionAsMain, LLVMDisposeExecutionEngine, LLVMGetFunctionAddress, LLVMAddModule, LLVMFindFunction, LLVMLinkInMCJIT, LLVMLinkInInterpreter, LLVMRemoveModule, LLVMGenericValueRef}; use module::Module; use targets::TargetData; -use values::{AsValueRef, FunctionValue, GenericValue}; +use values::{AsValueRef, AnyValue, FunctionValue, GenericValue}; use std::ffi::{CStr, CString}; use std::mem::{forget, uninitialized, zeroed}; @@ -50,6 +50,12 @@ impl ExecutionEngine { } } + pub fn add_global_mapping(&mut self, global: &AnyValue, addr: *const ()) { + unsafe { + LLVMAddGlobalMapping(self.execution_engine, global.as_value_ref(), addr as *mut ::libc::c_void) + } + } + // TODOC: EE must *own* modules and deal out references pub fn add_module(&mut self, module: Module) -> &Module { unsafe { From 20726e2220859ca788015f973f9c04ca68c4dc32 Mon Sep 17 00:00:00 2001 From: Greg Date: Mon, 2 Oct 2017 11:37:35 +0200 Subject: [PATCH 08/17] Removed unnecessary feature 'box_syntax' --- examples/kaleidoscope.rs | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/examples/kaleidoscope.rs b/examples/kaleidoscope.rs index 3f49a7eeff0ed..5a8643b93fa6b 100644 --- a/examples/kaleidoscope.rs +++ b/examples/kaleidoscope.rs @@ -1,4 +1,3 @@ -#![feature(box_syntax)] //! This is an example of the [Kaleidoscope tutorial](https://llvm.org/docs/tutorial/) //! made in Rust, using Inkwell. //! Currently, all features up to the [4th chapter](https://llvm.org/docs/tutorial/LangImpl04.html) @@ -706,7 +705,12 @@ impl<'a> Parser<'a> { match self.parse_expr() { Ok(expr) => { Ok(Function { - prototype: Prototype { name: ANONYMOUS_FUNCTION_NAME.to_string(), args: vec![], is_op: false, prec: 0 }, + prototype: Prototype { + name: ANONYMOUS_FUNCTION_NAME.to_string(), + args: vec![], + is_op: false, + prec: 0 + }, body: expr, is_anon: true }) @@ -1136,14 +1140,17 @@ pub fn main() { if is_anonymous { let mut ee = module.create_jit_execution_engine(0).unwrap(); - if let Some(fun) = printd_fn { - println!("Setting global mapping for {:p} {:p} {:p}", &printd, &mut printd, *printd); - ee.add_global_mapping(&fun, unsafe { std::mem::transmute(&mut printd) }); - } + // 2017-02-10 <6A> I still can't add my own functions with either add_global_mapping or add_symbol. + // However, importing extern functions such as cos(x) or sin(x) works. - if let Some(fun) = putchard_fn { - ee.add_global_mapping(&fun, unsafe { std::mem::transmute(&putchard) }); - } + // if let Some(fun) = printd_fn { + // println!("Setting global mapping for {:p} {:p} {:p}", &printd, &mut printd, *printd); + // ee.add_global_mapping(&fun, unsafe { std::mem::transmute(&mut printd) }); + // } + + // if let Some(fun) = putchard_fn { + // ee.add_global_mapping(&fun, unsafe { std::mem::transmute(&putchard) }); + // } let addr = match ee.get_function_address(name.as_str()) { Ok(addr) => addr, From 90ff2dec17c60b26dd2f5611650b8ec79593c235 Mon Sep 17 00:00:00 2001 From: Greg Date: Mon, 2 Oct 2017 22:57:05 +0200 Subject: [PATCH 09/17] Added mut variables to kaleidoscope --- examples/kaleidoscope.rs | 298 +++++++++++++++++++++++++++------------ 1 file changed, 210 insertions(+), 88 deletions(-) diff --git a/examples/kaleidoscope.rs b/examples/kaleidoscope.rs index 5a8643b93fa6b..7ccf43811b254 100644 --- a/examples/kaleidoscope.rs +++ b/examples/kaleidoscope.rs @@ -5,20 +5,22 @@ extern crate inkwell; +use std::borrow::Borrow; use std::collections::HashMap; use std::io::{self, Write}; use std::iter::Peekable; use std::str::Chars; use std::ops::DerefMut; -use inkwell::FloatPredicate; -use inkwell::builder::Builder; -use inkwell::context::Context; -use inkwell::module::Module; -use inkwell::passes::PassManager; -use inkwell::targets::{InitializationConfig, Target}; -use inkwell::types::BasicType; -use inkwell::values::{BasicValue, FloatValue, FunctionValue, PhiValue}; +use self::inkwell::basic_block::BasicBlock; +use self::inkwell::builder::Builder; +use self::inkwell::context::Context; +use self::inkwell::module::Module; +use self::inkwell::passes::PassManager; +use self::inkwell::targets::{InitializationConfig, Target}; +use self::inkwell::types::BasicType; +use self::inkwell::values::{BasicValue, FloatValue, FunctionValue, PointerValue}; +use self::inkwell::FloatPredicate; use Token::*; @@ -41,6 +43,7 @@ pub enum Token { If, Then, Else, For, In, Unary, Binary, + Var, Ident(String), Number(f64), @@ -82,6 +85,7 @@ impl<'a> Lexer<'a> { } /// Lexes and returns the next `Token` from the source code. + #[allow(unused_variables)] pub fn lex(&mut self) -> LexResult { let chars = self.chars.deref_mut(); let src = self.input; @@ -186,6 +190,7 @@ impl<'a> Lexer<'a> { "in" => Ok(Token::In), "unary" => Ok(Token::Unary), "binary" => Ok(Token::Binary), + "var" => Ok(Token::Var), ident => Ok(Token::Ident(ident.to_string())) } @@ -251,6 +256,11 @@ pub enum Expr { end: Box, step: Option>, body: Box + }, + + VarIn { + variables: Vec<(String, Option)>, + body: Box } } @@ -278,6 +288,7 @@ pub struct Parser<'a> { prec: &'a mut HashMap } +#[allow(unused_must_use)] impl<'a> Parser<'a> { /// Creates a new parser, given an input `str` and a `HashMap` binding /// an operator and its precedence in binary expressions. @@ -687,6 +698,54 @@ impl<'a> Parser<'a> { }) } + /// Parses a var..in expression. + fn parse_var_expr(&mut self) -> Result { + // eat 'var' token + self.advance()?; + + let mut variables = Vec::new(); + + // parse variables + loop { + let name = match self.curr() { + Ident(name) => name, + _ => return Err("Expected identifier in 'var..in' declaration.") + }; + + self.advance()?; + + // read (optional) initializer + let initializer = match self.curr() { + Op('=') => Some({ + self.advance()?; + self.parse_expr()? + }), + + _ => None + }; + + variables.push((name, initializer)); + + match self.curr() { + Op(',') => { + self.advance()?; + }, + In => { + self.advance()?; + break; + } + _ => { + return Err("Expected comma or 'in' keyword in variable declaration.") + } + } + } + + // parse body + let body = self.parse_expr()?; + + Ok(Expr::VarIn { variables: variables, body: Box::new(body) }) + } + /// Parses a primary expression (an identifier, a number or a parenthesized expression). fn parse_primary(&mut self) -> Result { match self.curr() { @@ -695,6 +754,7 @@ impl<'a> Parser<'a> { LParen => self.parse_paren_expr(), If => self.parse_conditional_expr(), For => self.parse_for_expr(), + Var => self.parse_var_expr(), _ => Err("Unknown expression.") } } @@ -734,67 +794,128 @@ pub struct Compiler<'a> { pub module: &'a Module, pub function: &'a Function, pub fn_value: FunctionValue, - pub variables: HashMap -} - -fn to_float_value(phi: PhiValue) -> FloatValue { - unsafe { std::mem::transmute(phi) } + pub variables: HashMap } impl<'a> Compiler<'a> { /// Gets a defined function given its name. - pub fn get_function(&self, name: &str) -> Option { + fn get_function(&self, name: &str) -> Option { self.module.get_function(name) } + /// Cretes a new stack allocation instruction in the entry block of the function. + fn create_entry_block_alloca(&self, name: &str, entry: Option<&BasicBlock>) -> PointerValue { + let builder = self.context.create_builder(); + + // let entry = match entry { + // Some(entry) => entry, + // None => + // }; + let owned_entry = self.fn_value.get_entry_basic_block(); + let entry = owned_entry.as_ref().or(entry).unwrap(); + + match entry.get_first_instruction() { + Some(first_instr) => builder.position_before(&first_instr), + None => builder.position_at_end(entry) + } + + builder.build_stack_allocation(&self.context.f64_type(), name) + } + /// Compiles the specified `Expr` into an LLVM `BasicValue`. /// Note: This method currently returns a `Box`, since it returns either `PhiValue` or `FloatValue`. - pub fn compile_expr(&mut self, expr: &Expr) -> Result { + fn compile_expr(&mut self, expr: &Expr) -> Result { match expr { &Expr::Number(nb) => Ok(self.context.f64_type().const_float(nb)), &Expr::Variable(ref name) => { match self.variables.get(name.as_str()) { - Some(var) => Ok(*var), + Some(var) => Ok(self.builder.build_load(&var, name.as_str()).into_float_value()), None => Err("Could not find a matching variable.") } }, + &Expr::VarIn { ref variables, ref body } => { + let mut old_bindings = Vec::new(); + + for &(ref var_name, ref initializer) in variables { + let var_name = var_name.as_str(); + + let initial_val = match initializer { + &Some(ref init) => self.compile_expr(&init)?, + &None => self.context.f64_type().const_float(0.) + }; + + let alloca = self.create_entry_block_alloca(var_name, None); + + self.builder.build_store(&alloca, &initial_val); + + if let Some(old_binding) = self.variables.remove(var_name) { + old_bindings.push(old_binding); + } + + self.variables.insert(var_name.to_string(), alloca); + } + + let body = self.compile_expr(&body)?; + + for binding in old_bindings { + self.variables.insert(binding.get_name().to_str().unwrap().to_string(), binding); + } + + Ok(body) + }, + &Expr::Binary { op, ref left, ref right } => { - let lhs = self.compile_expr(&left)?; - let rhs = self.compile_expr(&right)?; + if op == '=' { + // handle assignement + let var_name = match left.borrow() { + &Expr::Variable(ref var_name) => var_name, + _ => { + return Err("Expected variable as left-hand operator of assignement."); + } + }; - match op { - '+' => Ok(self.builder.build_float_add(&lhs, &rhs, "tmpadd")), - '-' => Ok(self.builder.build_float_sub(&lhs, &rhs, "tmpsub")), - '*' => Ok(self.builder.build_float_mul(&lhs, &rhs, "tmpmul")), - '/' => Ok(self.builder.build_float_div(&lhs, &rhs, "tmpdiv")), + let var_val = self.compile_expr(&right)?; + let var = self.variables.get(var_name.as_str()).ok_or("")?; + + Ok(unsafe { std::mem::transmute(self.builder.build_store(&var, &var_val)) }) + } else { + let lhs = self.compile_expr(&left)?; + let rhs = self.compile_expr(&right)?; - '<' => Ok({ - let cmp = self.builder.build_float_compare(&FloatPredicate::ULT, &lhs, &rhs, "tmpcmp"); + match op { + '+' => Ok(self.builder.build_float_add(&lhs, &rhs, "tmpadd")), + '-' => Ok(self.builder.build_float_sub(&lhs, &rhs, "tmpsub")), + '*' => Ok(self.builder.build_float_mul(&lhs, &rhs, "tmpmul")), + '/' => Ok(self.builder.build_float_div(&lhs, &rhs, "tmpdiv")), - self.builder.build_unsigned_int_to_float(&cmp, &self.context.f64_type(), "tmpbool") - }), - '>' => Ok({ - let cmp = self.builder.build_float_compare(&FloatPredicate::ULT, &rhs, &lhs, "tmpcmp"); + '<' => Ok({ + let cmp = self.builder.build_float_compare(&FloatPredicate::ULT, &lhs, &rhs, "tmpcmp"); - self.builder.build_unsigned_int_to_float(&cmp, &self.context.f64_type(), "tmpbool") - }), + self.builder.build_unsigned_int_to_float(&cmp, &self.context.f64_type(), "tmpbool") + }), + '>' => Ok({ + let cmp = self.builder.build_float_compare(&FloatPredicate::ULT, &rhs, &lhs, "tmpcmp"); - custom => { - let mut name = String::from("binary"); + self.builder.build_unsigned_int_to_float(&cmp, &self.context.f64_type(), "tmpbool") + }), - name.push(custom); + custom => { + let mut name = String::from("binary"); - match self.get_function(name.as_str()) { - Some(fun) => { - match self.builder.build_call(&fun, &[ &lhs, &rhs ], "tmpbin", false).left() { - Some(value) => Ok(value.into_float_value()), - None => Err("Invalid call produced.") - } - }, + name.push(custom); - None => Err("Undefined binary operator.") + match self.get_function(name.as_str()) { + Some(fun) => { + match self.builder.build_call(&fun, &[ &lhs, &rhs ], "tmpbin", false).left() { + Some(value) => Ok(value.into_float_value()), + None => Err("Invalid call produced.") + } + }, + + None => Err("Undefined binary operator.") + } } } } @@ -859,7 +980,7 @@ impl<'a> Compiler<'a> { (&else_val, &else_bb) ]); - Ok(to_float_value(phi)) + Ok(unsafe { std::mem::transmute(phi) }) }, &Expr::For { ref var_name, ref start, ref end, ref step, ref body } => { @@ -867,26 +988,24 @@ impl<'a> Compiler<'a> { let zero_const = self.context.f64_type().const_float(0.0); let start = self.compile_expr(&start)?; - let preheader_bb = self.builder.get_insert_block().unwrap(); - let loop_bb = self.context.append_basic_block(&parent, "loop"); + let start_alloca = self.create_entry_block_alloca(var_name, None); + + self.builder.build_store(&start_alloca, &start); // go from current block to loop block - self.builder.build_unconditional_branch(&loop_bb); + let loop_bb = self.context.append_basic_block(&parent, "loop"); + self.builder.build_unconditional_branch(&loop_bb); self.builder.position_at_end(&loop_bb); - let variable = self.builder.build_phi(&self.context.f64_type(), var_name); - - variable.add_incoming(&[ - (&start, &preheader_bb) - ]); + let variable = self.create_entry_block_alloca(var_name, None); let old_val = match self.variables.get(var_name.as_str()) { Some(val) => Some(*val), None => None }; - self.variables.insert(var_name.to_owned(), to_float_value(variable)); + self.variables.insert(var_name.to_owned(), variable); self.compile_expr(&body)?; @@ -895,26 +1014,23 @@ impl<'a> Compiler<'a> { &None => self.context.f64_type().const_float(1.0) }; - let variable_f = to_float_value(variable); - let next_var = self.builder.build_float_add(&variable_f, &step, "nextvar"); - let end_cond = self.compile_expr(&end)?; - let end_cond = self.builder.build_float_compare(&FloatPredicate::ONE, &end_cond, &zero_const, "loopcond"); - let loop_end_bb = self.builder.get_insert_block().unwrap(); + let curr_var = self.builder.build_load(&start_alloca, var_name); + let next_var = self.builder.build_float_add(&curr_var.as_float_value(), &step, "nextvar"); + + self.builder.build_store(&unsafe { std::mem::transmute(next_var) }, &start_alloca); + + let end_cond = self.builder.build_float_compare(&FloatPredicate::ONE, &end_cond, &zero_const, "loopcond"); let after_bb = self.context.append_basic_block(&parent, "afterloop"); self.builder.build_conditional_branch(&end_cond, &loop_bb, &after_bb); self.builder.position_at_end(&after_bb); - variable.add_incoming(&[ - (&next_var, &loop_end_bb) - ]); - - self.variables.remove(var_name); - if let Some(val) = old_val { self.variables.insert(var_name.to_owned(), val); + } else { + self.variables.remove(var_name); } Ok(zero_const) @@ -948,20 +1064,20 @@ impl<'a> Compiler<'a> { self.builder.position_at_end(&entry); + // update fn field + self.fn_value = function; + // build variables map - { - // this is in its own scope to borrow as shortly as possible - let variables = &mut self.variables; + self.variables.reserve(proto.args.len()); - variables.reserve(proto.args.len()); + for (i, arg) in function.params().enumerate() { + let arg_name = proto.args[i].as_str(); + let alloca = self.create_entry_block_alloca(arg_name, Some(&entry)); - for (i, arg) in function.params().enumerate() { - variables.insert(proto.args[i].clone(), arg.into_float_value()); - } - } + self.builder.build_store(&alloca, &arg); - // update fields - self.fn_value = function; + self.variables.insert(proto.args[i].clone(), alloca); + } // compile body let body = self.compile_expr(&self.function.body)?; @@ -971,6 +1087,8 @@ impl<'a> Compiler<'a> { // return the whole thing after verification and optimization if function.verify(true) { self.fpm.run_on_function(&function); + + function.verify(true); Ok(function) } else { @@ -1048,6 +1166,9 @@ pub fn main() { fpm.add_gvn_pass(); fpm.add_cfg_simplification_pass(); fpm.add_basic_alias_analysis_pass(); + fpm.add_promote_memory_to_register_pass(); + fpm.add_instruction_combining_pass(); + fpm.add_reassociate_pass(); fpm.initialize(); @@ -1067,6 +1188,7 @@ pub fn main() { // Build precedence map let mut prec = HashMap::with_capacity(4); + prec.insert('=', 2); prec.insert('<', 10); prec.insert('+', 20); prec.insert('-', 20); @@ -1080,24 +1202,24 @@ pub fn main() { // make module let module = context.create_module("tmp"); - let mut printd_fn = None; - let mut putchard_fn = None; + + //let mut printd_fn = None; + //let mut putchard_fn = None; for prev in previous_exprs.iter() { - let fun = Compiler::compile(&context, &builder, &fpm, &module, prev).expect("Cannot re-add previously compiled function."); + Compiler::compile(&context, &builder, &fpm, &module, prev).expect("Cannot re-add previously compiled function."); - match fun.get_name().to_str().unwrap() { - "printd" => { - println!("Setting printd to {:?}", fun); - printd_fn = Some(fun); - }, + // match fun.get_name().to_str().unwrap() { + // "printd" => { + // printd_fn = Some(fun); + // }, - "putchard" => { - putchard_fn = Some(fun); - }, + // "putchard" => { + // putchard_fn = Some(fun); + // }, - _ => () - } + // _ => () + // } } let (name, is_anonymous) = match Parser::new(input, &mut prec).parse() { @@ -1117,7 +1239,7 @@ pub fn main() { let fn_name = function.get_name().to_str().unwrap(); - if fn_name.starts_with(ANONYMOUS_FUNCTION_NAME) { + if fn_name == ANONYMOUS_FUNCTION_NAME { (fn_name.to_string(), true) } else { previous_exprs.push(fun); @@ -1138,7 +1260,7 @@ pub fn main() { }; if is_anonymous { - let mut ee = module.create_jit_execution_engine(0).unwrap(); + let ee = module.create_jit_execution_engine(0).unwrap(); // 2017-02-10 <6A> I still can't add my own functions with either add_global_mapping or add_symbol. // However, importing extern functions such as cos(x) or sin(x) works. From 374dd3f3a8c151ac1c695a408efb2f8c3d4091e2 Mon Sep 17 00:00:00 2001 From: Greg Date: Mon, 2 Oct 2017 23:07:08 +0200 Subject: [PATCH 10/17] Made write_bitcode_to_file compile on Windows --- src/module.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/module.rs b/src/module.rs index fe306411df203..cd52673fc8b58 100644 --- a/src/module.rs +++ b/src/module.rs @@ -1,5 +1,5 @@ use llvm_sys::analysis::{LLVMVerifyModule, LLVMVerifierFailureAction}; -use llvm_sys::bit_writer::{LLVMWriteBitcodeToFile, LLVMWriteBitcodeToMemoryBuffer, LLVMWriteBitcodeToFD}; +use llvm_sys::bit_writer::{LLVMWriteBitcodeToFile, LLVMWriteBitcodeToMemoryBuffer}; use llvm_sys::core::{LLVMAddFunction, LLVMAddGlobal, LLVMDisposeMessage, LLVMDumpModule, LLVMGetNamedFunction, LLVMGetTypeByName, LLVMSetDataLayout, LLVMSetInitializer, LLVMSetTarget, LLVMCloneModule, LLVMDisposeModule, LLVMGetTarget, LLVMGetDataLayout, LLVMModuleCreateWithName, LLVMGetModuleContext, LLVMGetFirstFunction, LLVMGetLastFunction, LLVMSetLinkage, LLVMAddGlobalInAddressSpace, LLVMPrintModuleToString, LLVMGetNamedMetadataNumOperands, LLVMAddNamedMetadataOperand, LLVMGetNamedMetadataOperands, LLVMGetFirstGlobal, LLVMGetLastGlobal, LLVMGetNamedGlobal}; use llvm_sys::execution_engine::{LLVMCreateJITCompilerForModule, LLVMCreateMCJITCompilerForModule}; use llvm_sys::prelude::{LLVMValueRef, LLVMModuleRef}; @@ -8,7 +8,6 @@ use llvm_sys::LLVMLinkage; use std::ffi::{CString, CStr}; use std::fs::File; use std::mem::{forget, uninitialized, zeroed}; -use std::os::unix::io::AsRawFd; use std::path::Path; use std::slice::from_raw_parts; @@ -270,7 +269,11 @@ impl Module { } // See GH issue #6 - fn write_bitcode_to_file(&self, file: &File, should_close: bool, unbuffered: bool) -> bool { + #[cfg(unix)] + pub fn write_bitcode_to_file(&self, file: &File, should_close: bool, unbuffered: bool) -> bool { + use std::os::unix::io::AsRawFd; + use llvm_sys::bit_writer::LLVMWriteBitcodeToFD; + // REVIEW: as_raw_fd docs suggest it only works in *nix // Also, should_close should maybe be hardcoded to true? unsafe { @@ -278,6 +281,12 @@ impl Module { } } + #[cfg(windows)] + #[allow(unused_variables)] + pub fn write_bitcode_to_file(&self, file: &File, should_close: bool, unbuffered: bool) -> bool { + false + } + pub fn write_bitcode_to_memory(&self) -> MemoryBuffer { let memory_buffer = unsafe { LLVMWriteBitcodeToMemoryBuffer(self.module) From 0c87e17416e64c84e21a68a0c99081b2314ef251 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20Geis?= Date: Tue, 3 Oct 2017 19:58:59 +0200 Subject: [PATCH 11/17] Fixed bugs, improved overall code --- examples/kaleidoscope.rs | 183 ++++++++++++++++++++++++--------------- 1 file changed, 112 insertions(+), 71 deletions(-) diff --git a/examples/kaleidoscope.rs b/examples/kaleidoscope.rs index 7ccf43811b254..f828a90ae8f50 100644 --- a/examples/kaleidoscope.rs +++ b/examples/kaleidoscope.rs @@ -1,7 +1,16 @@ //! This is an example of the [Kaleidoscope tutorial](https://llvm.org/docs/tutorial/) //! made in Rust, using Inkwell. -//! Currently, all features up to the [4th chapter](https://llvm.org/docs/tutorial/LangImpl04.html) +//! Currently, all features up to the [7th chapter](https://llvm.org/docs/tutorial/LangImpl07.html) //! are available. +//! This example is supposed to be ran as a executable, which launches a REPL. +//! The source code is in the following order: +//! - Lexer, +//! - Parser, +//! - Compiler, +//! - Program. +//! +//! Both the `Parser` and the `Compiler` may fail, in which case they would return +//! an error represented by `Result`, for easier error reporting. extern crate inkwell; @@ -34,20 +43,24 @@ const ANONYMOUS_FUNCTION_NAME: &str = "anonymous"; /// Represents a primitive syntax token. #[derive(Debug, Clone)] pub enum Token { - EOF, - Comment, - LParen, - RParen, + Binary, Comma, - Def, Extern, - If, Then, Else, - For, In, - Unary, Binary, - Var, - + Comment, + Def, + Else, + EOF, + Extern, + For, Ident(String), + If, + In, + LParen, Number(f64), - Op(char) + Op(char), + RParen, + Then, + Unary, + Var } /// Defines an error encountered by the `Lexer`. @@ -94,6 +107,9 @@ impl<'a> Lexer<'a> { // Skip whitespaces loop { + // Note: the following lines are in their own scope to + // limit how long 'chars' is borrowed, and in order to allow + // it to be borrowed again in the loop by 'chars.next()'. { let ch = chars.peek(); @@ -108,10 +124,8 @@ impl<'a> Lexer<'a> { } } - { - chars.next(); - pos += 1; - } + chars.next(); + pos += 1; } let start = pos; @@ -230,9 +244,6 @@ impl<'a> Iterator for Lexer<'a> { /// Defines a primitive expression. #[derive(Debug)] pub enum Expr { - Number(f64), - Variable(String), - Binary { op: char, left: Box, @@ -258,6 +269,10 @@ pub enum Expr { body: Box }, + Number(f64), + + Variable(String), + VarIn { variables: Vec<(String, Option)>, body: Box @@ -288,6 +303,8 @@ pub struct Parser<'a> { prec: &'a mut HashMap } +// I'm ignoring the 'must_use' lint in order to call 'self.advance' without checking +// the result when an EOF is acceptable. #[allow(unused_must_use)] impl<'a> Parser<'a> { /// Creates a new parser, given an input `str` and a `HashMap` binding @@ -793,25 +810,29 @@ pub struct Compiler<'a> { pub fpm: &'a PassManager, pub module: &'a Module, pub function: &'a Function, - pub fn_value: FunctionValue, - pub variables: HashMap + + variables: HashMap, + fn_value_opt: Option } impl<'a> Compiler<'a> { /// Gets a defined function given its name. + #[inline] fn get_function(&self, name: &str) -> Option { self.module.get_function(name) } + /// Returns the `FunctionValue` representing the function being compiled. + #[inline] + fn fn_value(&self) -> FunctionValue { + self.fn_value_opt.unwrap() + } + /// Cretes a new stack allocation instruction in the entry block of the function. fn create_entry_block_alloca(&self, name: &str, entry: Option<&BasicBlock>) -> PointerValue { let builder = self.context.create_builder(); - - // let entry = match entry { - // Some(entry) => entry, - // None => - // }; - let owned_entry = self.fn_value.get_entry_basic_block(); + + let owned_entry = self.fn_value().get_entry_basic_block(); let entry = owned_entry.as_ref().or(entry).unwrap(); match entry.get_first_instruction() { @@ -877,9 +898,11 @@ impl<'a> Compiler<'a> { }; let var_val = self.compile_expr(&right)?; - let var = self.variables.get(var_name.as_str()).ok_or("")?; + let var = self.variables.get(var_name.as_str()).ok_or("Undefined variable.")?; + + self.builder.build_store(&var, &var_val); - Ok(unsafe { std::mem::transmute(self.builder.build_store(&var, &var_val)) }) + Ok(var_val) } else { let lhs = self.compile_expr(&left)?; let rhs = self.compile_expr(&right)?; @@ -942,7 +965,7 @@ impl<'a> Compiler<'a> { }, &Expr::Conditional { ref cond, ref consequence, ref alternative } => { - let parent = self.fn_value; + let parent = self.fn_value(); let zero_const = self.context.f64_type().const_float(0.0); // create condition by comparing without 0.0 and returning an int @@ -980,15 +1003,14 @@ impl<'a> Compiler<'a> { (&else_val, &else_bb) ]); - Ok(unsafe { std::mem::transmute(phi) }) + Ok(phi.as_basic_value().into_float_value()) }, &Expr::For { ref var_name, ref start, ref end, ref step, ref body } => { - let parent = self.fn_value; - let zero_const = self.context.f64_type().const_float(0.0); + let parent = self.fn_value(); - let start = self.compile_expr(&start)?; let start_alloca = self.create_entry_block_alloca(var_name, None); + let start = self.compile_expr(&start)?; self.builder.build_store(&start_alloca, &start); @@ -998,42 +1020,40 @@ impl<'a> Compiler<'a> { self.builder.build_unconditional_branch(&loop_bb); self.builder.position_at_end(&loop_bb); - let variable = self.create_entry_block_alloca(var_name, None); + let old_val = self.variables.remove(var_name.as_str()); - let old_val = match self.variables.get(var_name.as_str()) { - Some(val) => Some(*val), - None => None - }; - - self.variables.insert(var_name.to_owned(), variable); + self.variables.insert(var_name.to_owned(), start_alloca); + // emit body self.compile_expr(&body)?; + // emit step let step = match step { &Some(ref step) => self.compile_expr(&step)?, &None => self.context.f64_type().const_float(1.0) }; + // compile end condition let end_cond = self.compile_expr(&end)?; let curr_var = self.builder.build_load(&start_alloca, var_name); let next_var = self.builder.build_float_add(&curr_var.as_float_value(), &step, "nextvar"); - self.builder.build_store(&unsafe { std::mem::transmute(next_var) }, &start_alloca); + self.builder.build_store(&start_alloca, &next_var); - let end_cond = self.builder.build_float_compare(&FloatPredicate::ONE, &end_cond, &zero_const, "loopcond"); + let end_cond = self.builder.build_float_compare(&FloatPredicate::ONE, &end_cond, &self.context.f64_type().const_float(0.0), "loopcond"); let after_bb = self.context.append_basic_block(&parent, "afterloop"); self.builder.build_conditional_branch(&end_cond, &loop_bb, &after_bb); self.builder.position_at_end(&after_bb); + self.variables.remove(var_name); + if let Some(val) = old_val { self.variables.insert(var_name.to_owned(), val); - } else { - self.variables.remove(var_name); } - Ok(zero_const) + Ok(self.context.f64_type().const_float(0.0)) } } } @@ -1065,7 +1085,7 @@ impl<'a> Compiler<'a> { self.builder.position_at_end(&entry); // update fn field - self.fn_value = function; + self.fn_value_opt = Some(function); // build variables map self.variables.reserve(proto.args.len()); @@ -1083,12 +1103,13 @@ impl<'a> Compiler<'a> { let body = self.compile_expr(&self.function.body)?; self.builder.build_return(Some(&body)); + + function.print_to_stderr(); // return the whole thing after verification and optimization if function.verify(true) { self.fpm.run_on_function(&function); - - function.verify(true); + Ok(function) } else { @@ -1100,6 +1121,7 @@ impl<'a> Compiler<'a> { } } + /// Compiles the specified `Function` in the given `Context` and using the specified `Builder`, `PassManager`, and `Module`. pub fn compile(context: &'a Context, builder: &'a Builder, pass_manager: &'a PassManager, module: &'a Module, function: &Function) -> Result { let mut compiler = Compiler { context: context, @@ -1107,7 +1129,7 @@ impl<'a> Compiler<'a> { fpm: pass_manager, module: module, function: function, - fn_value: unsafe { std::mem::transmute(0usize) }, + fn_value_opt: None, variables: HashMap::new() }; @@ -1130,24 +1152,40 @@ macro_rules! print_flush { }; } -pub extern fn putchard(x: f64) -> f64 { - print_flush!("{}", x as u8 as char); - x -} - -pub extern fn printd(x: f64) -> f64 { - println!("Fn called"); - println!("{}", x); - x -} +// Greg <6A>: 2017-10-03 +// The two following functions are supposed to be found by the JIT +// using the 'extern' keyword, but it currently does not work on my machine. +// I tried using add_symbol, add_global_mapping, and simple extern declaration, +// but nothing worked. +// However, extern functions such as cos(x) and sin(x) can be imported without any problem. +// Other lines related to this program can be found further down. + +// pub extern "C" fn putchard(x: f64) -> f64 { +// print_flush!("{}", x as u8 as char); +// x +// } + +// pub extern "C" fn printd(x: f64) -> f64 { +// println!("Fn called"); +// println!("{}", x); +// x +// } /// Entry point of the program; acts as a REPL. pub fn main() { // use self::inkwell::support::add_symbol; - - let show_lexed_input = true; - let show_parsed_input = false; - let show_compiled_input = true; + let mut display_lexer_output = false; + let mut display_parser_output = false; + let mut display_compiler_output = false; + + for arg in std::env::args() { + match arg.as_str() { + "--dl" => display_lexer_output = true, + "--dp" => display_parser_output = true, + "--dc" => display_compiler_output = true, + _ => () + } + } Target::initialize_native(&InitializationConfig::default()).expect("Failed to initialize native target."); @@ -1175,7 +1213,7 @@ pub fn main() { let mut previous_exprs = Vec::new(); loop { - print_flush!("> "); + print_flush!(" > "); // Read input from stdin let mut input = String::new(); @@ -1196,7 +1234,7 @@ pub fn main() { prec.insert('/', 40); // Parse and (optionally) display input - if show_lexed_input { + if display_lexer_output { println!("Attempting to parse lexed input: {:?}", Lexer::new(input.as_str()).collect::>()); } @@ -1206,9 +1244,12 @@ pub fn main() { //let mut printd_fn = None; //let mut putchard_fn = None; + // recompile every previously parsed function into the new module for prev in previous_exprs.iter() { Compiler::compile(&context, &builder, &fpm, &module, prev).expect("Cannot re-add previously compiled function."); + // Not working; see comment above. + // // match fun.get_name().to_str().unwrap() { // "printd" => { // printd_fn = Some(fun); @@ -1224,13 +1265,13 @@ pub fn main() { let (name, is_anonymous) = match Parser::new(input, &mut prec).parse() { Ok(fun) => { - if show_parsed_input { + if display_parser_output { println!("Expression parsed: {:?}", fun); } match Compiler::compile(&context, &builder, &fpm, &module, &fun) { Ok(function) => { - if show_compiled_input { + if display_compiler_output { // Not printing a new line since LLVM automatically // prefixes the generated string with one print_flush!("Expression compiled to IR:"); @@ -1262,8 +1303,7 @@ pub fn main() { if is_anonymous { let ee = module.create_jit_execution_engine(0).unwrap(); - // 2017-02-10 <6A> I still can't add my own functions with either add_global_mapping or add_symbol. - // However, importing extern functions such as cos(x) or sin(x) works. + // Not working ATM; see comment above. // if let Some(fun) = printd_fn { // println!("Setting global mapping for {:p} {:p} {:p}", &printd, &mut printd, *printd); @@ -1284,7 +1324,8 @@ pub fn main() { let compiled_fn: extern "C" fn() -> f64 = unsafe { std::mem::transmute(addr) }; - println!("Result: {}", compiled_fn()); + println!("=> {}", compiled_fn()); + println!(); } } } From 044ab99700e73d0677149e7c0d30a9a11dbea5a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20Geis?= Date: Tue, 3 Oct 2017 23:10:21 +0200 Subject: [PATCH 12/17] Removed currently unused add_global_mapping --- src/execution_engine.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/execution_engine.rs b/src/execution_engine.rs index d3556bc702121..a613b8c2f3776 100644 --- a/src/execution_engine.rs +++ b/src/execution_engine.rs @@ -1,9 +1,9 @@ use llvm_sys::core::LLVMDisposeMessage; -use llvm_sys::execution_engine::{LLVMGetExecutionEngineTargetData, LLVMExecutionEngineRef, LLVMAddGlobalMapping, LLVMRunFunction, LLVMRunFunctionAsMain, LLVMDisposeExecutionEngine, LLVMGetFunctionAddress, LLVMAddModule, LLVMFindFunction, LLVMLinkInMCJIT, LLVMLinkInInterpreter, LLVMRemoveModule, LLVMGenericValueRef}; +use llvm_sys::execution_engine::{LLVMGetExecutionEngineTargetData, LLVMExecutionEngineRef, LLVMRunFunction, LLVMRunFunctionAsMain, LLVMDisposeExecutionEngine, LLVMGetFunctionAddress, LLVMAddModule, LLVMFindFunction, LLVMLinkInMCJIT, LLVMLinkInInterpreter, LLVMRemoveModule, LLVMGenericValueRef}; use module::Module; use targets::TargetData; -use values::{AsValueRef, AnyValue, FunctionValue, GenericValue}; +use values::{AsValueRef, FunctionValue, GenericValue}; use std::ffi::{CStr, CString}; use std::mem::{forget, uninitialized, zeroed}; @@ -50,11 +50,11 @@ impl ExecutionEngine { } } - pub fn add_global_mapping(&mut self, global: &AnyValue, addr: *const ()) { - unsafe { - LLVMAddGlobalMapping(self.execution_engine, global.as_value_ref(), addr as *mut ::libc::c_void) - } - } + // pub fn add_global_mapping(&mut self, global: &AnyValue, addr: *const ()) { + // unsafe { + // LLVMAddGlobalMapping(self.execution_engine, global.as_value_ref(), addr as *mut ::libc::c_void) + // } + // } // TODOC: EE must *own* modules and deal out references pub fn add_module(&mut self, module: Module) -> &Module { From 78357e8df9c67212c8a47d8ca65be43d593cc4c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20Geis?= Date: Tue, 3 Oct 2017 23:11:17 +0200 Subject: [PATCH 13/17] Removed debug informations --- examples/kaleidoscope.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/examples/kaleidoscope.rs b/examples/kaleidoscope.rs index f828a90ae8f50..c40b2327e9459 100644 --- a/examples/kaleidoscope.rs +++ b/examples/kaleidoscope.rs @@ -1103,14 +1103,11 @@ impl<'a> Compiler<'a> { let body = self.compile_expr(&self.function.body)?; self.builder.build_return(Some(&body)); - - function.print_to_stderr(); // return the whole thing after verification and optimization if function.verify(true) { self.fpm.run_on_function(&function); - Ok(function) } else { unsafe { From 544158943fc7c49837fa75bdd34c063bf68a0ce6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20Geis?= Date: Tue, 3 Oct 2017 23:29:27 +0200 Subject: [PATCH 14/17] Improved readability; fixed some parsing bugs --- examples/kaleidoscope.rs | 136 +++++++++++++++++++++++---------------- 1 file changed, 82 insertions(+), 54 deletions(-) diff --git a/examples/kaleidoscope.rs b/examples/kaleidoscope.rs index c40b2327e9459..1939ae8b4ab72 100644 --- a/examples/kaleidoscope.rs +++ b/examples/kaleidoscope.rs @@ -322,21 +322,33 @@ impl<'a> Parser<'a> { /// Parses the content of the parser. pub fn parse(&mut self) -> Result { - match self.curr() { + let result = match self.current()? { Def => self.parse_def(), Extern => self.parse_extern(), _ => self.parse_toplevel_expr() + }; + + match result { + Ok(result) => { + if !self.at_end() { + Err("Unexpected token after parsed expression.") + } else { + Ok(result) + } + }, + + err => err } } /// Returns the current `Token`, without performing safety checks beforehand. - pub fn curr(&self) -> Token { + fn curr(&self) -> Token { self.tokens[self.pos].clone() } /// Returns the current `Token`, or an error that /// indicates that the end of the file has been unexpectedly reached if it is the case. - pub fn current(&self) -> Result { + fn current(&self) -> Result { if self.pos >= self.tokens.len() { Err("Unexpected end of file.") } else { @@ -347,7 +359,7 @@ impl<'a> Parser<'a> { /// Advances the position, and returns an empty `Result` whose error /// indicates that the end of the file has been unexpectedly reached. /// This allows to use the `self.advance()?;` syntax. - pub fn advance(&mut self) -> Result<(), &'static str> { + fn advance(&mut self) -> Result<(), &'static str> { let npos = self.pos + 1; self.pos = npos; @@ -361,8 +373,8 @@ impl<'a> Parser<'a> { /// Returns a value indicating whether or not the `Parser` /// has reached the end of the input. - pub fn at_end(&self) -> bool { - self.pos > self.tokens.len() - 2 + fn at_end(&self) -> bool { + self.pos >= self.tokens.len() } /// Returns the precedence of the current `Token`, or 0 if it is not recognized as a binary operator. @@ -388,7 +400,7 @@ impl<'a> Parser<'a> { let op = match self.curr() { Op(ch) => ch, - _ => { return Err("Expected operator in custom operator declaration."); } + _ => return Err("Expected operator in custom operator declaration.") }; self.advance()?; @@ -415,7 +427,7 @@ impl<'a> Parser<'a> { let op = match self.curr() { Op(ch) => ch, - _ => { return Err("Expected operator in custom operator declaration."); } + _ => return Err("Expected operator in custom operator declaration.") }; let mut name = String::from("unary"); @@ -427,19 +439,25 @@ impl<'a> Parser<'a> { (name, true, 0) }, - _ => { return Err("Expected identifier in prototype declaration.") } + _ => return Err("Expected identifier in prototype declaration.") }; match self.curr() { LParen => (), - _ => { return Err("Expected '(' character in prototype declaration.") } + _ => return Err("Expected '(' character in prototype declaration.") } self.advance()?; if let RParen = self.curr() { self.advance(); - return Ok(Prototype { name: id, args: vec![], is_op: is_operator, prec: precedence }); + + return Ok(Prototype { + name: id, + args: vec![], + is_op: is_operator, + prec: precedence + }); } let mut args = vec![]; @@ -447,7 +465,7 @@ impl<'a> Parser<'a> { loop { match self.curr() { Ident(name) => args.push(name), - _ => { return Err("Expected identifier in parameter declaration.") } + _ => return Err("Expected identifier in parameter declaration.") } self.advance()?; @@ -460,11 +478,16 @@ impl<'a> Parser<'a> { Comma => { self.advance(); }, - _ => { return Err("Expected ',' or ')' character in prototype declaration.") } + _ => return Err("Expected ',' or ')' character in prototype declaration.") } } - Ok(Prototype { name: id, args: args, is_op: is_operator, prec: precedence }) + Ok(Prototype { + name: id, + args: args, + is_op: is_operator, + prec: precedence + }) } /// Parses a user-defined function. @@ -479,7 +502,11 @@ impl<'a> Parser<'a> { let body = self.parse_expr()?; // Return new function - Ok(Function { prototype: proto, body: body, is_anon: false }) + Ok(Function { + prototype: proto, + body: body, + is_anon: false + }) } /// Parses an external function declaration. @@ -491,7 +518,11 @@ impl<'a> Parser<'a> { let proto = self.parse_prototype()?; // Return signature of extern function - Ok(Function { prototype: proto, body: Expr::Number(std::f64::NAN), is_anon: false }) + Ok(Function { + prototype: proto, + body: Expr::Number(std::f64::NAN), + is_anon: false + }) } /// Parses any expression. @@ -518,7 +549,7 @@ impl<'a> Parser<'a> { fn parse_paren_expr(&mut self) -> Result { match self.current()? { LParen => (), - _ => { return Err("Expected '(' character at start of parenthesized expression.") } + _ => return Err("Expected '(' character at start of parenthesized expression.") } self.advance()?; @@ -527,7 +558,7 @@ impl<'a> Parser<'a> { match self.current()? { RParen => (), - _ => { return Err("Expected ')' character at end of parenthesized expression.") } + _ => return Err("Expected ')' character at end of parenthesized expression.") } self.advance(); @@ -539,7 +570,7 @@ impl<'a> Parser<'a> { fn parse_id_expr(&mut self) -> Result { let id = match self.curr() { Ident(id) => id, - _ => { return Err("Expected identifier."); } + _ => return Err("Expected identifier.") }; if let Err(_) = self.advance() { @@ -562,7 +593,7 @@ impl<'a> Parser<'a> { match self.current()? { Comma => (), RParen => break, - _ => { return Err("Expected ',' character in function call."); } + _ => return Err("Expected ',' character in function call.") } self.advance()?; @@ -584,14 +615,17 @@ impl<'a> Parser<'a> { self.advance()?; ch }, - _ => { return self.parse_primary(); } + _ => return self.parse_primary() }; let mut name = String::from("unary"); name.push(op); - Ok(Expr::Call { fn_name: name, args: vec![ self.parse_unary_expr()? ] }) + Ok(Expr::Call { + fn_name: name, + args: vec![ self.parse_unary_expr()? ] + }) } /// Parses a binary expression, given its left-hand expression. @@ -607,7 +641,7 @@ impl<'a> Parser<'a> { let op = match self.curr() { Op(op) => op, - _ => { return Err("Invalid operator."); } + _ => return Err("Invalid operator.") }; self.advance()?; @@ -637,16 +671,16 @@ impl<'a> Parser<'a> { // eat 'then' token match self.current() { - Ok(Then) => { self.advance()? }, - _ => { return Err("Expected 'then' keyword."); } + Ok(Then) => self.advance()?, + _ => return Err("Expected 'then' keyword.") } let then = self.parse_expr()?; // eat 'else' token match self.current() { - Ok(Else) => { self.advance()? }, - _ => { return Err("Expected 'else' keyword."); } + Ok(Else) => self.advance()?, + _ => return Err("Expected 'else' keyword.") } let otherwise = self.parse_expr()?; @@ -665,7 +699,7 @@ impl<'a> Parser<'a> { let name = match self.curr() { Ident(n) => n, - _ => { return Err("Expected identifier in for loop."); } + _ => return Err("Expected identifier in for loop.") }; // eat identifier @@ -673,16 +707,16 @@ impl<'a> Parser<'a> { // eat '=' token match self.curr() { - Op('=') => { self.advance()?; }, - _ => { return Err("Expected '=' character in for loop."); } + Op('=') => self.advance()?, + _ => return Err("Expected '=' character in for loop.") } let start = self.parse_expr()?; // eat ',' token match self.current()? { - Comma => { self.advance()?; }, - _ => { return Err("Expected ',' character in for loop."); } + Comma => self.advance()?, + _ => return Err("Expected ',' character in for loop.") } let end = self.parse_expr()?; @@ -700,8 +734,8 @@ impl<'a> Parser<'a> { // eat 'in' token match self.current()? { - In => { self.advance()?; }, - _ => { return Err("Expected 'in' keyword in for loop."); } + In => self.advance()?, + _ => return Err("Expected 'in' keyword in for loop.") } let body = self.parse_expr()?; @@ -744,7 +778,7 @@ impl<'a> Parser<'a> { variables.push((name, initializer)); match self.curr() { - Op(',') => { + Comma => { self.advance()?; }, In => { @@ -760,7 +794,10 @@ impl<'a> Parser<'a> { // parse body let body = self.parse_expr()?; - Ok(Expr::VarIn { variables: variables, body: Box::new(body) }) + Ok(Expr::VarIn { + variables: variables, + body: Box::new(body) + }) } /// Parses a primary expression (an identifier, a number or a parenthesized expression). @@ -1210,7 +1247,8 @@ pub fn main() { let mut previous_exprs = Vec::new(); loop { - print_flush!(" > "); + println!(); + print_flush!("?> "); // Read input from stdin let mut input = String::new(); @@ -1218,6 +1256,8 @@ pub fn main() { if input.starts_with("exit") { break; + } else if input.chars().all(char::is_whitespace) { + continue; } // Build precedence map @@ -1263,7 +1303,7 @@ pub fn main() { let (name, is_anonymous) = match Parser::new(input, &mut prec).parse() { Ok(fun) => { if display_parser_output { - println!("Expression parsed: {:?}", fun); + println!("-> Expression parsed: {:?}", fun); } match Compiler::compile(&context, &builder, &fpm, &module, &fun) { @@ -1271,7 +1311,7 @@ pub fn main() { if display_compiler_output { // Not printing a new line since LLVM automatically // prefixes the generated string with one - print_flush!("Expression compiled to IR:"); + print_flush!("-> Expression compiled to IR:"); function.print_to_stderr(); } @@ -1286,13 +1326,13 @@ pub fn main() { } }, Err(err) => { - println!("Error compiling function: {}", err); + println!("!> Error compiling function: {}", err); continue; } } }, Err(err) => { - println!("Error parsing expression: {}", err); + println!("!> Error parsing expression: {}", err); continue; } }; @@ -1300,21 +1340,10 @@ pub fn main() { if is_anonymous { let ee = module.create_jit_execution_engine(0).unwrap(); - // Not working ATM; see comment above. - - // if let Some(fun) = printd_fn { - // println!("Setting global mapping for {:p} {:p} {:p}", &printd, &mut printd, *printd); - // ee.add_global_mapping(&fun, unsafe { std::mem::transmute(&mut printd) }); - // } - - // if let Some(fun) = putchard_fn { - // ee.add_global_mapping(&fun, unsafe { std::mem::transmute(&putchard) }); - // } - let addr = match ee.get_function_address(name.as_str()) { Ok(addr) => addr, Err(err) => { - println!("Error during execution: {:?}", err); + println!("!> Error during execution: {:?}", err); continue; } }; @@ -1322,7 +1351,6 @@ pub fn main() { let compiled_fn: extern "C" fn() -> f64 = unsafe { std::mem::transmute(addr) }; println!("=> {}", compiled_fn()); - println!(); } } } From 91b24ecfe552facb6507e961b7fa64bd09678213 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20Geis?= Date: Tue, 3 Oct 2017 23:49:21 +0200 Subject: [PATCH 15/17] Updated Cargo.toml for new location of kaleidoscope example --- Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 1097ffde489be..7c74dc2fa54c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,10 @@ enum-methods = "0.0.7" libc = "*" llvm-sys = "37.0.5" # TODO: Configure based on feature toggle. GH#1 +[[example]] +name = "kaleidoscope" +path = "examples/kaleidoscope/main.rs" + [badges] travis-ci = { repository = "TheDan64/inkwell" } codecov = { repository = "TheDan64/inkwell" } From 00f430cb3ac80b433d5fe635f85b679bfa293bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20Geis?= Date: Tue, 3 Oct 2017 23:49:41 +0200 Subject: [PATCH 16/17] Moved kaleidoscope to its own directory --- examples/kaleidoscope/README.md | 46 +++++++++++++++++++ .../{kaleidoscope.rs => kaleidoscope/main.rs} | 23 ++++++---- 2 files changed, 59 insertions(+), 10 deletions(-) create mode 100644 examples/kaleidoscope/README.md rename examples/{kaleidoscope.rs => kaleidoscope/main.rs} (98%) diff --git a/examples/kaleidoscope/README.md b/examples/kaleidoscope/README.md new file mode 100644 index 0000000000000..3ad33dcf819df --- /dev/null +++ b/examples/kaleidoscope/README.md @@ -0,0 +1,46 @@ +# Kaleidoscope + +This example shows how one can implement the [Kaleidoscope programming language](https://llvm.org/docs/tutorial/index.html) using Inkwell. +It implements every feature up to the [7th chapter](https://llvm.org/docs/tutorial/LangImpl07.html). + +When running this example (using the `cargo run --example kaleidoscope` command), a prompt will be displayed; for example: + +``` +?> 1 + 1 +=> 2 + +?> var a = 5, b = 10 in a * b +=> 50 + +?> def fib(n) if n < 2 then n else fib(n - 1) + fib(n - 2) + +?> fib(40) +=> 102334155 + +?> +``` + +Additional arguments can be passed to the produced executable: +- `--dc`: **D**isplay **C**ompiler output +- `--dp`: **D**isplay **P**arser output +- `--dl`: **D**isplay **L**exer output + +For example, running with all three switches may lead to the following output: +``` +?> 1 + 2 * 2 +-> Attempting to parse lexed input: +[Number(1), Op('+'), Number(2), Op('*'), Number(2)] + +-> Expression parsed: +Function { prototype: Prototype { name: "anonymous", args: [], is_op: false, prec: 0 }, body: Binary { op: '+', left: Number(1), right: Binary { op: '*', left: Number(2), right: Number(2) } }, is_anon: true } + +-> Expression compiled to IR: +define double @anonymous() { +entry: + ret double 5.000000e+00 +} + +=> 5 +``` + +Finally, the prompt can be exited by entering "exit" or "quit". \ No newline at end of file diff --git a/examples/kaleidoscope.rs b/examples/kaleidoscope/main.rs similarity index 98% rename from examples/kaleidoscope.rs rename to examples/kaleidoscope/main.rs index 1939ae8b4ab72..a86a6e6e621a1 100644 --- a/examples/kaleidoscope.rs +++ b/examples/kaleidoscope/main.rs @@ -1254,7 +1254,7 @@ pub fn main() { let mut input = String::new(); io::stdin().read_line(&mut input).expect("Could not read from standard input."); - if input.starts_with("exit") { + if input.starts_with("exit") || input.starts_with("quit") { break; } else if input.chars().all(char::is_whitespace) { continue; @@ -1272,7 +1272,7 @@ pub fn main() { // Parse and (optionally) display input if display_lexer_output { - println!("Attempting to parse lexed input: {:?}", Lexer::new(input.as_str()).collect::>()); + println!("-> Attempting to parse lexed input: \n{:?}\n", Lexer::new(input.as_str()).collect::>()); } // make module @@ -1302,8 +1302,14 @@ pub fn main() { let (name, is_anonymous) = match Parser::new(input, &mut prec).parse() { Ok(fun) => { + let is_anon = fun.is_anon; + if display_parser_output { - println!("-> Expression parsed: {:?}", fun); + if is_anon { + println!("-> Expression parsed: \n{:?}\n", fun.body); + } else { + println!("-> Function parsed: \n{:?}\n", fun); + } } match Compiler::compile(&context, &builder, &fpm, &module, &fun) { @@ -1315,15 +1321,12 @@ pub fn main() { function.print_to_stderr(); } - let fn_name = function.get_name().to_str().unwrap(); - - if fn_name == ANONYMOUS_FUNCTION_NAME { - (fn_name.to_string(), true) - } else { + if !is_anon { + // only add it now to ensure it is correct previous_exprs.push(fun); - - (fn_name.to_string(), false) } + + (function.get_name().to_str().unwrap().to_string(), is_anon) }, Err(err) => { println!("!> Error compiling function: {}", err); From 92ae1daf08d87727ba20d9b3f4747eeb80425707 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20Geis?= Date: Tue, 3 Oct 2017 23:54:43 +0200 Subject: [PATCH 17/17] Fixed code example --- examples/kaleidoscope/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/kaleidoscope/README.md b/examples/kaleidoscope/README.md index 3ad33dcf819df..819b1e378bfd9 100644 --- a/examples/kaleidoscope/README.md +++ b/examples/kaleidoscope/README.md @@ -32,7 +32,7 @@ For example, running with all three switches may lead to the following output: [Number(1), Op('+'), Number(2), Op('*'), Number(2)] -> Expression parsed: -Function { prototype: Prototype { name: "anonymous", args: [], is_op: false, prec: 0 }, body: Binary { op: '+', left: Number(1), right: Binary { op: '*', left: Number(2), right: Number(2) } }, is_anon: true } +Binary { op: '+', left: Number(1), right: Binary { op: '*', left: Number(2), right: Number(2) } } -> Expression compiled to IR: define double @anonymous() {