Skip to content

Commit

Permalink
gh-104016: Fixed off by 1 error in f string tokenizer (#104047)
Browse files Browse the repository at this point in the history
Co-authored-by: sunmy2019 <[email protected]>
Co-authored-by: Ken Jin <[email protected]>
Co-authored-by: Pablo Galindo <[email protected]>
  • Loading branch information
4 people authored May 1, 2023
1 parent 2d526cd commit 5078eed
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 5 deletions.
16 changes: 16 additions & 0 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,23 @@ def test_fstring_nested_too_deeply(self):
self.assertAllRaise(SyntaxError,
"f-string: expressions nested too deeply",
['f"{1+2:{1+2:{1+1:{1}}}}"'])

def create_nested_fstring(n):
if n == 0:
return "1+1"
prev = create_nested_fstring(n-1)
return f'f"{{{prev}}}"'

self.assertAllRaise(SyntaxError,
"too many nested f-strings",
[create_nested_fstring(160)])

def test_syntax_error_in_nested_fstring(self):
# See gh-104016 for more information on this crash
self.assertAllRaise(SyntaxError,
"invalid syntax",
['f"{1 1:' + ('{f"1:' * 199)])

def test_double_braces(self):
self.assertEqual(f'{{', '{')
self.assertEqual(f'a{{', 'a{')
Expand Down
7 changes: 5 additions & 2 deletions Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@
#ifdef Py_DEBUG
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
assert(tok->tok_mode_stack_index >= 0);
assert(tok->tok_mode_stack_index < MAXLEVEL);
assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
}
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
assert(tok->tok_mode_stack_index >= 0);
assert(tok->tok_mode_stack_index < MAXLEVEL);
assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
}
#else
Expand Down Expand Up @@ -2235,6 +2235,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t

p_start = tok->start;
p_end = tok->cur;
if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
return MAKE_TOKEN(syntaxerror(tok, "too many nested f-strings"));
}
tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
the_current_tok->kind = TOK_FSTRING_MODE;
the_current_tok->f_string_quote = quote;
Expand Down
7 changes: 4 additions & 3 deletions Parser/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ extern "C" {

#include "pycore_token.h" /* For token types */

#define MAXINDENT 100 /* Max indentation level */
#define MAXLEVEL 200 /* Max parentheses level */
#define MAXINDENT 100 /* Max indentation level */
#define MAXLEVEL 200 /* Max parentheses level */
#define MAXFSTRINGLEVEL 150 /* Max f-string nesting level */

enum decoding_state {
STATE_INIT,
Expand Down Expand Up @@ -123,7 +124,7 @@ struct tok_state {
enum interactive_underflow_t interactive_underflow;
int report_warnings;
// TODO: Factor this into its own thing
tokenizer_mode tok_mode_stack[MAXLEVEL];
tokenizer_mode tok_mode_stack[MAXFSTRINGLEVEL];
int tok_mode_stack_index;
int tok_report_warnings;
#ifdef Py_DEBUG
Expand Down

0 comments on commit 5078eed

Please sign in to comment.