Skip to content

Commit

Permalink
fix(qf): token start index
Browse files Browse the repository at this point in the history
  • Loading branch information
Ph0enixKM committed Jul 29, 2024
1 parent aacbe2d commit 80f4f80
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 25 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "heraclitus-compiler"
version = "1.7.0"
version = "1.7.1"
edition = "2021"
description = "Compiler frontend for developing great programming languages"
license = "MIT"
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ let tokens = cc.tokenize()?;

# Change log 🚀

## Version 1.7.1
### Fix:
- Bugfixes for calculating start index in tokens

## Version 1.7.0
### Feature:
- Tokens now contain information about their index of the first character in the source code
Expand Down
59 changes: 36 additions & 23 deletions src/compiling/lexing/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ pub struct Lexer<'a> {
scoping_mode: ScopingMode,
is_escaped: bool,
position: (usize, usize),
index: usize
index: usize,
token_start_index: usize
}

impl<'a> Lexer<'a> {
Expand All @@ -58,7 +59,8 @@ impl<'a> Lexer<'a> {
scoping_mode: cc.scoping_mode.clone(),
is_escaped: false,
position: (0, 0),
index: 0
index: 0,
token_start_index: 0
}
}

Expand All @@ -73,7 +75,7 @@ impl<'a> Lexer<'a> {
self.lexem.push(Token {
word,
pos: (row, 1),
start: self.reader.get_index(),
start: self.token_start_index,
});
self.position = (0, 0);
String::new()
Expand All @@ -87,7 +89,7 @@ impl<'a> Lexer<'a> {
self.lexem.push(Token {
word,
pos: self.position,
start: self.index
start: self.token_start_index
});
self.position = (0, 0);
String::new()
Expand All @@ -102,7 +104,7 @@ impl<'a> Lexer<'a> {
self.lexem.push(Token {
word,
pos: self.position,
start: self.index
start: self.token_start_index
});
self.position = (0, 0);
String::new()
Expand All @@ -124,9 +126,11 @@ impl<'a> Lexer<'a> {
#[inline]
fn pattern_add_symbol(&mut self, mut word: String, letter: char) -> String {
word = self.add_word(word);
word.push(letter);
if word.is_empty() {
self.token_start_index = self.index;
}
self.word_push(&mut word, letter);
self.position = self.reader.get_position();
self.index = self.reader.get_index();
self.add_word_inclusively(word)
}

Expand All @@ -135,25 +139,34 @@ impl<'a> Lexer<'a> {
#[inline]
fn pattern_begin(&mut self, mut word: String, letter: char) -> String {
word = self.add_word(word);
word.push(letter);
self.word_push(&mut word, letter);
word
}

/// Pattern code for ending current region
/// **]**
#[inline]
fn pattern_end(&mut self, mut word: String, letter: char) -> String {
word.push(letter);
self.word_push(&mut word, letter);
self.add_word_inclusively(word)
}

/// Push letter to the word and set token start index
fn word_push(&mut self, word: &mut String, letter: char) {
if word.is_empty() {
self.token_start_index = self.index;
}
word.push(letter);
}

/// Tokenize source code
///
/// Run lexer and tokenize code. The result is stored in the lexem attribute
pub fn run(&mut self) -> Result<(), LexerError> {
let mut word = String::new();
let mut is_indenting = false;
while let Some(letter) = self.reader.next() {
self.index = self.reader.get_index();

/****************/
/* Set Position */
Expand Down Expand Up @@ -219,7 +232,7 @@ impl<'a> Lexer<'a> {
RegionReaction::Pass => {
match self.compound.handle_compound(letter, &self.reader, self.is_tokenized_region(&reaction)) {
CompoundReaction::Begin => word = self.pattern_begin(word, letter),
CompoundReaction::Keep => word.push(letter),
CompoundReaction::Keep => self.word_push(&mut word, letter),
CompoundReaction::End => word = self.pattern_end(word, letter),
CompoundReaction::Pass => {
// Handle region scope
Expand All @@ -237,7 +250,7 @@ impl<'a> Lexer<'a> {
PositionInfo::at_pos(self.path.clone(), pos, 0).data(region.name.clone())
))
}
word.push(letter);
self.word_push(&mut word, letter);
}
else {

Expand All @@ -249,7 +262,7 @@ impl<'a> Lexer<'a> {
if let ScopingMode::Indent = self.scoping_mode {
// If we are still in the indent region - proceed
if is_indenting && vec![' ', '\t'].contains(&letter) {
word.push(letter);
self.word_push(&mut word, letter);
}
// If it's the new line - start indent region
if letter == '\n' {
Expand Down Expand Up @@ -290,7 +303,7 @@ impl<'a> Lexer<'a> {
}
// Handle word
else {
word.push(letter);
self.word_push(&mut word, letter);
}
}
}
Expand Down Expand Up @@ -402,15 +415,15 @@ mod test {
let symbols = vec![':'];
let regions = reg![];
let expected = vec![
("if".to_string(), 1, 1),
("condition".to_string(), 1, 4),
(":".to_string(), 1, 13),
("\n ".to_string(), 2, 1),
("if".to_string(), 2, 5),
("subcondition".to_string(), 2, 8),
(":".to_string(), 2, 20),
("\n ".to_string(), 3, 1),
("pass".to_string(), 3, 9)
("if".to_string(), (1, 1), 0),
("condition".to_string(), (1, 4), 3),
(":".to_string(), (1, 13), 12),
("\n ".to_string(), (2, 1), 13),
("if".to_string(), (2, 5), 18),
("subcondition".to_string(), (2, 8), 21),
(":".to_string(), (2, 20), 33),
("\n ".to_string(), (3, 1), 34),
("pass".to_string(), (3, 9), 43)
];
let rules = Rules::new(symbols, vec![], regions);
let mut cc: Compiler = Compiler::new("Testhon", rules);
Expand All @@ -426,7 +439,7 @@ mod test {
let res = lexer.run();
assert!(res.is_ok());
for lex in lexer.lexem {
result.push((lex.word, lex.pos.0, lex.pos.1));
result.push((lex.word, (lex.pos.0, lex.pos.1), lex.start));
}
assert_eq!(expected, result);
}
Expand Down

0 comments on commit 80f4f80

Please sign in to comment.