fix: more flexible squared property read expression, fixes #643

harttle · Aug 22, 2023 · 96f136c · 96f136c
1 parent dc6a301
commit 96f136c
Show file tree

Hide file tree

Showing 12 changed files with 214 additions and 151 deletions.
diff --git a/src/parser/match-operator.spec.ts b/src/parser/match-operator.spec.ts
diff --git a/src/parser/match-operator.ts b/src/parser/match-operator.ts
diff --git a/src/parser/tokenizer.spec.ts b/src/parser/tokenizer.spec.ts
@@ -1,5 +1,7 @@
 import { LiquidTagToken, HTMLToken, QuotedToken, OutputToken, TagToken, OperatorToken, RangeToken, PropertyAccessToken, NumberToken, IdentifierToken } from '../tokens'
 import { Tokenizer } from './tokenizer'
+import { defaultOperators } from '../render/operator'
+import { createTrie } from '../util/operator-trie'
 
 describe('Tokenizer', function () {
   it('should read quoted', () => {
@@ -15,12 +17,31 @@ describe('Tokenizer', function () {
     // eslint-disable-next-line deprecation/deprecation
     expect(new Tokenizer('foo bar').readWord()).toHaveProperty('content', 'foo')
   })
-  it('should read number value', () => {
-    const token: NumberToken = new Tokenizer('2.33.2').readValueOrThrow() as any
+  it('should read integer number', () => {
+    const token: NumberToken = new Tokenizer('123').readValueOrThrow() as any
     expect(token).toBeInstanceOf(NumberToken)
-    expect(token.whole.getText()).toBe('2')
-    expect(token.decimal!.getText()).toBe('33')
-    expect(token.getText()).toBe('2.33')
+    expect(token.getText()).toBe('123')
+    expect(token.number).toBe(123)
+  })
+  it('should read negative number', () => {
+    const token: NumberToken = new Tokenizer('-123').readValueOrThrow() as any
+    expect(token).toBeInstanceOf(NumberToken)
+    expect(token.getText()).toBe('-123')
+    expect(token.number).toBe(-123)
+  })
+  it('should read float number', () => {
+    const token: NumberToken = new Tokenizer('1.23').readValueOrThrow() as any
+    expect(token).toBeInstanceOf(NumberToken)
+    expect(token.getText()).toBe('1.23')
+    expect(token.number).toBe(1.23)
+  })
+  it('should treat 1.2.3 as property read', () => {
+    const token: PropertyAccessToken = new Tokenizer('1.2.3').readValueOrThrow() as any
+    expect(token).toBeInstanceOf(PropertyAccessToken)
+    expect(token.props).toHaveLength(3)
+    expect(token.props[0].getText()).toBe('1')
+    expect(token.props[1].getText()).toBe('2')
+    expect(token.props[2].getText()).toBe('3')
   })
   it('should read quoted value', () => {
     const value = new Tokenizer('"foo"a').readValue()
@@ -33,11 +54,7 @@ describe('Tokenizer', function () {
   it('should read quoted property access value', () => {
     const value = new Tokenizer('["a prop"]').readValue()
     expect(value).toBeInstanceOf(PropertyAccessToken)
-    expect((value as PropertyAccessToken).variable.getText()).toBe('"a prop"')
-  })
-  it('should throw for broken quoted property access', () => {
-    const tokenizer = new Tokenizer('[5]')
-    expect(() => tokenizer.readValueOrThrow()).toThrow()
+    expect((value as QuotedToken).getText()).toBe('["a prop"]')
   })
   it('should throw for incomplete quoted property access', () => {
     const tokenizer = new Tokenizer('["a prop"')
@@ -277,10 +294,10 @@ describe('Tokenizer', function () {
 
       const pa: PropertyAccessToken = token!.args[0] as any
       expect(token!.args[0]).toBeInstanceOf(PropertyAccessToken)
-      expect((pa.variable as any).content).toBe('arr')
-      expect(pa.props).toHaveLength(1)
-      expect(pa.props[0]).toBeInstanceOf(NumberToken)
-      expect(pa.props[0].getText()).toBe('0')
+      expect(pa.props).toHaveLength(2)
+      expect((pa.props[0] as any).content).toBe('arr')
+      expect(pa.props[1]).toBeInstanceOf(NumberToken)
+      expect(pa.props[1].getText()).toBe('0')
     })
     it('should read a filter with obj.foo argument', function () {
       const tokenizer = new Tokenizer('| plus: obj.foo')
@@ -290,10 +307,10 @@ describe('Tokenizer', function () {
 
       const pa: PropertyAccessToken = token!.args[0] as any
       expect(token!.args[0]).toBeInstanceOf(PropertyAccessToken)
-      expect((pa.variable as any).content).toBe('obj')
-      expect(pa.props).toHaveLength(1)
-      expect(pa.props[0]).toBeInstanceOf(IdentifierToken)
-      expect(pa.props[0].getText()).toBe('foo')
+      expect(pa.props).toHaveLength(2)
+      expect((pa.props[0] as any).content).toBe('obj')
+      expect(pa.props[1]).toBeInstanceOf(IdentifierToken)
+      expect(pa.props[1].getText()).toBe('foo')
     })
     it('should read a filter with obj["foo"] argument', function () {
       const tokenizer = new Tokenizer('| plus: obj["good luck"]')
@@ -304,8 +321,8 @@ describe('Tokenizer', function () {
       const pa: PropertyAccessToken = token!.args[0] as any
       expect(token!.args[0]).toBeInstanceOf(PropertyAccessToken)
       expect(pa.getText()).toBe('obj["good luck"]')
-      expect((pa.variable as any).content).toBe('obj')
-      expect(pa.props[0].getText()).toBe('"good luck"')
+      expect((pa.props[0] as any).content).toBe('obj')
+      expect(pa.props[1].getText()).toBe('"good luck"')
     })
   })
   describe('#readFilters()', () => {
@@ -341,7 +358,7 @@ describe('Tokenizer', function () {
       expect(tokens[2].args).toHaveLength(1)
       expect(tokens[2].args[0]).toBeInstanceOf(PropertyAccessToken)
       expect((tokens[2].args[0] as any).getText()).toBe('foo[a.b["c d"]]')
-      expect((tokens[2].args[0] as any).props[0].getText()).toBe('a.b["c d"]')
+      expect((tokens[2].args[0] as any).props[1].getText()).toBe('a.b["c d"]')
     })
   })
   describe('#readExpression()', () => {
@@ -358,10 +375,10 @@ describe('Tokenizer', function () {
       expect(exp).toHaveLength(1)
       const pa = exp[0] as PropertyAccessToken
       expect(pa).toBeInstanceOf(PropertyAccessToken)
-      expect((pa.variable as any).content).toEqual('a')
-      expect(pa.props).toHaveLength(2)
+      expect(pa.props).toHaveLength(3)
+      expect((pa.props[0] as any).content).toEqual('a')
 
-      const [p1, p2] = pa.props
+      const [, p1, p2] = pa.props
       expect(p1).toBeInstanceOf(IdentifierToken)
       expect(p1.getText()).toBe('')
       expect(p2).toBeInstanceOf(PropertyAccessToken)
@@ -373,8 +390,8 @@ describe('Tokenizer', function () {
       expect(exp).toHaveLength(1)
       const pa = exp[0] as PropertyAccessToken
       expect(pa).toBeInstanceOf(PropertyAccessToken)
-      expect((pa.variable as any).content).toEqual('a')
-      expect(pa.props).toHaveLength(0)
+      expect(pa.props).toHaveLength(1)
+      expect((pa.props[0] as any).content).toEqual('a')
     })
     it('should read expression `a ==`', () => {
       const exp = [...new Tokenizer('a ==').readExpressionTokens()]
@@ -481,6 +498,30 @@ describe('Tokenizer', function () {
       expect(rhs.getText()).toEqual('"\\""')
     })
   })
+  describe('#matchTrie()', function () {
+    const opTrie = createTrie(defaultOperators)
+    it('should match contains', () => {
+      expect(new Tokenizer('contains').matchTrie(opTrie)).toBe(8)
+    })
+    it('should match comparision', () => {
+      expect(new Tokenizer('>').matchTrie(opTrie)).toBe(1)
+      expect(new Tokenizer('>=').matchTrie(opTrie)).toBe(2)
+      expect(new Tokenizer('<').matchTrie(opTrie)).toBe(1)
+      expect(new Tokenizer('<=').matchTrie(opTrie)).toBe(2)
+    })
+    it('should match binary logic', () => {
+      expect(new Tokenizer('and').matchTrie(opTrie)).toBe(3)
+      expect(new Tokenizer('or').matchTrie(opTrie)).toBe(2)
+    })
+    it('should not match if word not terminate', () => {
+      expect(new Tokenizer('true1').matchTrie(opTrie)).toBe(-1)
+      expect(new Tokenizer('containsa').matchTrie(opTrie)).toBe(-1)
+    })
+    it('should match if word boundary found', () => {
+      expect(new Tokenizer('>=1').matchTrie(opTrie)).toBe(2)
+      expect(new Tokenizer('contains b').matchTrie(opTrie)).toBe(8)
+    })
+  })
   describe('#readLiquidTagTokens', () => {
     it('should read newline terminated tokens', () => {
       const tokenizer = new Tokenizer('echo \'hello\'')

diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts
@@ -1,16 +1,17 @@
 import { FilteredValueToken, TagToken, HTMLToken, HashToken, QuotedToken, LiquidTagToken, OutputToken, ValueToken, Token, RangeToken, FilterToken, TopLevelToken, PropertyAccessToken, OperatorToken, LiteralToken, IdentifierToken, NumberToken } from '../tokens'
-import { Trie, createTrie, ellipsis, literalValues, TokenizationError, TYPES, QUOTE, BLANK, IDENTIFIER } from '../util'
+import { OperatorHandler } from '../render/operator'
+import { TrieNode, isQuotedToken, isWordToken, Trie, createTrie, ellipsis, literalValues, TokenizationError, TYPES, QUOTE, BLANK, IDENTIFIER, NUMBER, SIGN } from '../util'
 import { Operators, Expression } from '../render'
 import { NormalizedFullOptions, defaultOptions } from '../liquid-options'
 import { FilterArg } from './filter-arg'
-import { matchOperator } from './match-operator'
 import { whiteSpaceCtrl } from './whitespace-ctrl'
 
 export class Tokenizer {
   p: number
   N: number
   private rawBeginAt = -1
-  private opTrie: Trie
+  private opTrie: Trie<OperatorHandler>
+  private literalTrie: Trie<typeof literalValues[keyof typeof literalValues]>
 
   constructor (
     public input: string,
@@ -21,6 +22,7 @@ export class Tokenizer {
     this.p = range ? range[0] : 0
     this.N = range ? range[1] : input.length
     this.opTrie = createTrie(operators)
+    this.literalTrie = createTrie(literalValues)
   }
 
   readExpression () {
@@ -44,10 +46,22 @@ export class Tokenizer {
   }
   readOperator (): OperatorToken | undefined {
     this.skipBlank()
-    const end = matchOperator(this.input, this.p, this.opTrie)
+    const end = this.matchTrie(this.opTrie)
     if (end === -1) return
     return new OperatorToken(this.input, this.p, (this.p = end), this.file)
   }
+  matchTrie<T> (trie: Trie<T>) {
+    let node: TrieNode<T> = trie
+    let i = this.p
+    let info
+    while (node[this.input[i]] && i < this.N) {
+      node = node[this.input[i++]]
+      if (node['end']) info = node
+    }
+    if (!info) return -1
+    if (info['needBoundary'] && (this.peekType(i - this.p) & IDENTIFIER)) return -1
+    return i
+  }
   readFilteredValue (): FilteredValueToken {
     const begin = this.p
     const initial = this.readExpression()
@@ -272,8 +286,8 @@ export class Tokenizer {
     return this.input.slice(this.p, this.N)
   }
 
-  advance (i = 1) {
-    this.p += i
+  advance (step = 1) {
+    this.p += step
   }
 
   end () {
@@ -289,43 +303,68 @@ export class Tokenizer {
   }
 
   readValue (): ValueToken | undefined {
-    const value = this.readQuoted() || this.readRange()
-    if (value) return value
-
-    if (this.peek() === '[') {
-      this.p++
-      const prop = this.readQuoted()
-      if (!prop) return
-      if (this.peek() !== ']') return
-      this.p++
-      return new PropertyAccessToken(prop, [], this.p)
-    }
-
-    const variable = this.readIdentifier()
-    if (!variable.size()) return
-
-    let isNumber = variable.isNumber(true)
+    this.skipBlank()
+    const begin = this.p
+    const variable = this.readLiteral() || this.readQuoted() || this.readRange() || this.readNumber()
     const props: (QuotedToken | IdentifierToken)[] = []
     while (true) {
       if (this.peek() === '[') {
-        isNumber = false
         this.p++
         const prop = this.readValue() || new IdentifierToken(this.input, this.p, this.p, this.file)
-        this.readTo(']')
+        this.assert(this.readTo(']') !== -1, '[ not closed')
         props.push(prop)
-      } else if (this.peek() === '.' && this.peek(1) !== '.') { // skip range syntax
+        continue
+      }
+      if (!variable && !props.length) {
+        const prop = this.readIdentifier()
+        if (prop.size()) {
+          props.push(prop)
+          continue
+        }
+      }
+      if (this.peek() === '.' && this.peek(1) !== '.') { // skip range syntax
         this.p++
         const prop = this.readIdentifier()
         if (!prop.size()) break
-        if (!prop.isNumber()) isNumber = false
         props.push(prop)
+        continue
+      }
+      break
+    }
+    if (!props.length) return variable
+    return new PropertyAccessToken(variable, props, this.input, begin, this.p)
+  }
+
+  readNumber (): NumberToken | undefined {
+    this.skipBlank()
+    let decimalFound = false
+    let digitFound = false
+    let n = 0
+    if (this.peekType() & SIGN) n++
+    while (this.p + n <= this.N) {
+      if (this.peekType(n) & NUMBER) {
+        digitFound = true
+        n++
+      } else if (this.peek(n) === '.' && this.peek(n + 1) !== '.') {
+        if (decimalFound || !digitFound) return
+        decimalFound = true
+        n++
       } else break
     }
-    if (!props.length && literalValues.hasOwnProperty(variable.content)) {
-      return new LiteralToken(this.input, variable.begin, variable.end, this.file)
+    if (digitFound && !(this.peekType(n) & IDENTIFIER)) {
+      const num = new NumberToken(this.input, this.p, this.p + n, this.file)
+      this.advance(n)
+      return num
     }
-    if (isNumber) return new NumberToken(variable, props[0] as IdentifierToken)
-    return new PropertyAccessToken(variable, props, this.p)
+  }
+
+  readLiteral (): LiteralToken | undefined {
+    this.skipBlank()
+    const end = this.matchTrie(this.literalTrie)
+    if (end === -1) return
+    const literal = new LiteralToken(this.input, this.p, end, this.file)
+    this.p = end
+    return literal
   }
 
   readRange (): RangeToken | undefined {
@@ -388,7 +427,7 @@ export class Tokenizer {
   }
 
   peekType (n = 0) {
-    return TYPES[this.input.charCodeAt(this.p + n)]
+    return this.p + n >= this.N ? 0 : TYPES[this.input.charCodeAt(this.p + n)]
   }
 
   peek (n = 0): string {