Skip to content

Commit

Permalink
Fix: start and end of tokens now respect quotes closes tobymao#1677
Browse files Browse the repository at this point in the history
  • Loading branch information
tobymao authored and adrianisk committed Jun 21, 2023
1 parent d8edd2c commit 7b6435a
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 15 deletions.
4 changes: 2 additions & 2 deletions sqlglot/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,7 +942,7 @@ def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
"""
token = token or self._curr or self._prev or Token.string("")
start = token.start
end = token.end
end = token.end + 1
start_context = self.sql[max(start - self.error_message_context, 0) : start]
highlight = self.sql[start:end]
end_context = self.sql[end : end + self.error_message_context]
Expand Down Expand Up @@ -1005,7 +1005,7 @@ def validate_expression(
self.raise_error(error_message)

def _find_sql(self, start: Token, end: Token) -> str:
return self.sql[start.start : end.end]
return self.sql[start.start : end.end + 1]

def _advance(self, times: int = 1) -> None:
self._index += times
Expand Down
33 changes: 20 additions & 13 deletions sqlglot/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ class TokenType(AutoName):


class Token:
__slots__ = ("token_type", "text", "line", "col", "end", "comments")
__slots__ = ("token_type", "text", "line", "col", "start", "end", "comments")

@classmethod
def number(cls, number: int) -> Token:
Expand Down Expand Up @@ -353,22 +353,28 @@ def __init__(
text: str,
line: int = 1,
col: int = 1,
start: int = 0,
end: int = 0,
comments: t.List[str] = [],
) -> None:
"""Token initializer.
Args:
token_type: The TokenType Enum.
text: The text of the token.
line: The line that the token ends on.
col: The column that the token ends on.
start: The start index of the token.
end: The ending index of the token.
"""
self.token_type = token_type
self.text = text
self.line = line
size = len(text)
self.col = col
self.end = end if end else size
self.start = start
self.end = end
self.comments = comments

@property
def start(self) -> int:
"""Returns the start of the token."""
return self.end - len(self.text)

def __repr__(self) -> str:
attributes = ", ".join(f"{k}: {getattr(self, k)}" for k in self.__slots__)
return f"<Token {attributes}>"
Expand Down Expand Up @@ -892,11 +898,12 @@ def _add(self, token_type: TokenType, text: t.Optional[str] = None) -> None:
self.tokens.append(
Token(
token_type,
self._text if text is None else text,
self._line,
self._col,
self._current,
self._comments,
text=self._text if text is None else text,
line=self._line,
col=self._col,
start=self._start,
end=self._current - 1,
comments=self._comments,
)
)
self._comments = []
Expand Down
4 changes: 4 additions & 0 deletions tests/test_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ def test_token_line_col(self):
self.assertEqual(tokens[1].line, 1)
self.assertEqual(tokens[1].col, 8)

self.assertEqual(Tokenizer().tokenize("'''abc'")[0].start, 0)
self.assertEqual(Tokenizer().tokenize("'''abc'")[0].end, 6)
self.assertEqual(Tokenizer().tokenize("'abc'")[0].start, 0)

def test_command(self):
tokens = Tokenizer().tokenize("SHOW;")
self.assertEqual(tokens[0].token_type, TokenType.SHOW)
Expand Down

0 comments on commit 7b6435a

Please sign in to comment.