-
Hi. I'm new to Chevrotain, and I must say it's awesome. However, while trying to build with it, I've encountered a problem that I just can't solve even after trying to track the issue within Lexer itself. import { Lexer, createToken } from 'chevrotain';
export const Space = createToken({ name: 'Whitespace', pattern: /\s+/, group: Lexer.SKIPPED });
export const IfFunction = createToken({ name: 'If Function', pattern: /IF\(/, label: 'IF' });
export const LParen = createToken({ name: 'Left Parenthesis', pattern: /\(/, label: '(' });
export const RParen = createToken({ name: 'Right Parenthesis', pattern: /\)/, label: ')' });
export const Comma = createToken({ name: 'Comma', pattern: /,/, label: ',' });
export const LocalVar = createToken({ name: 'Local Var', pattern: /\$?[A-Za-z]{1,3}\$?[0-9]{1,3}/ });
export const GlobalVar = createToken({ name: 'Global Var', pattern: /[А-Яа-я_]+/ });
export const Bool = createToken({ name: 'Boolean', pattern: /TRUE|FALSE/, label: '<bool>' });
export const Float = createToken({ name: 'Float', pattern: /[0-9]+\.?[0-9]*/, label: '<f64>' });
export const MulOp = createToken({ name: 'Multiplicative Operator', pattern: Lexer.NA });
export const MulSign = createToken({ name: 'Mul Sign', pattern: /\*/, label: '*', categories: MulOp });
export const DivSign = createToken({ name: 'Div Sign', pattern: /\//, label: '/', categories: MulOp });
export const AddOp = createToken({ name: 'Additive Operator', pattern: Lexer.NA });
export const AddSign = createToken({ name: 'Add Sign', pattern: /\+/, label: '+', categories: AddOp });
export const SubSign = createToken({ name: 'Sub Sign', pattern: /-/, label: '-', categories: AddOp });
export const CompOp = createToken({ name: 'Comparison Operator', pattern: Lexer.NA });
export const EqSign = createToken({ name: 'Eq Sign', pattern: /=/, label: '=', categories: CompOp });
export const GteSign = createToken({ name: 'GTE Sign', pattern: />=/, label: '>=', categories: CompOp });
export const LteSign = createToken({ name: 'LTE Sign', pattern: /<=/, label: '<=', categories: CompOp });
export const GtSign = createToken({ name: 'GT Sign', pattern: />/, label: '>', categories: CompOp });
export const LtSign = createToken({ name: 'LT Sign', pattern: /</, label: '<', categories: CompOp });
export const allTokens = [
Space,
IfFunction,
LParen,
RParen,
Comma,
LocalVar,
GlobalVar,
Bool,
Float,
MulOp,
AddOp,
CompOp,
];
const lexer = new Lexer(allTokens, { positionTracking: 'onlyOffset', ensureOptimizations: true, traceInitPerf: true });
export const lex = (input: string) => lexer.tokenize(input); The above Lexer fails to match any characters in the const result = lex('ВАР-IF(ВАР=0,1,0');
// result.errors[0].message === 'unexpected character: ->-<- at offset: 8, skipped 1 characters.'
// result.errors[1].message === 'unexpected character: ->=<- at offset: 20, skipped 1 characters.' |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments 2 replies
-
Hey @yohgen, The lexer errors are expected, since |
Beta Was this translation helpful? Give feedback.
-
I wonder if we could / should expand the token categories automatically |
Beta Was this translation helpful? Give feedback.
Hey @yohgen,
The lexer errors are expected, since
-
and=
characters cannot be lexed with the given tokens. Even if you assign your token a category, you still need to explicitly add the token type to the lexer. They can later be used in the parser using the correct category (for example:CONSUME(AddOp)
), but the lexer doesn't care about categories at all.