Skip to content

Commit

Permalink
Use ast instead of eval for string extraction
Browse files Browse the repository at this point in the history
This is safer (as we don't actually execute anything),
and allows us to parse f-strings too.

Closes #769 (supersedes it)
Refs #715 (doesn't add an error yet, but doesn't crash on f-strings)
  • Loading branch information
akx committed Oct 31, 2022
1 parent c7d04e8 commit f324539
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 9 deletions.
36 changes: 27 additions & 9 deletions babel/messages/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
:copyright: (c) 2013-2022 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""

import ast
import os
from os.path import relpath
import sys
Expand Down Expand Up @@ -487,14 +487,9 @@ def extract_python(fileobj, keywords, comment_tags, options):
if nested:
funcname = value
elif tok == STRING:
# Unwrap quotes in a safe manner, maintaining the string's
# encoding
# https://sourceforge.net/tracker/?func=detail&atid=355470&
# aid=617979&group_id=5470
code = compile('# coding=%s\n%s' % (str(encoding), value),
'<string>', 'eval', future_flags)
value = eval(code, {'__builtins__': {}}, {})
buf.append(value)
val = _parse_python_string(value, encoding, future_flags)
if val is not None:
buf.append(val)
elif tok == OP and value == ',':
if buf:
messages.append(''.join(buf))
Expand All @@ -516,6 +511,29 @@ def extract_python(fileobj, keywords, comment_tags, options):
funcname = value


def _parse_python_string(value, encoding, future_flags):
# Unwrap quotes in a safe manner, maintaining the string's encoding
# https://sourceforge.net/tracker/?func=detail&atid=355470&aid=617979&group_id=5470
code = compile(
f'# coding={str(encoding)}\n{value}',
'<string>',
'eval',
ast.PyCF_ONLY_AST | future_flags,
)
if not isinstance(code, ast.Expression):
return None
body = code.body
if isinstance(body, ast.Str):
return body.s
if isinstance(body, ast.JoinedStr): # f-string
if all(isinstance(node, ast.Str) for node in body.values):
return ''.join(node.s for node in body.values)
if all(isinstance(node, ast.Constant) for node in body.values):
return ''.join(str(node.value) for node in body.values)
# TODO: could raise an error or warning when not all nodes are constants
return None


def extract_javascript(fileobj, keywords, comment_tags, options):
"""Extract messages from JavaScript source code.
Expand Down
11 changes: 11 additions & 0 deletions tests/messages/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,3 +528,14 @@ def test_future(self):
messages = list(extract.extract('python', buf,
extract.DEFAULT_KEYWORDS, [], {}))
assert messages[0][1] == u'\xa0'

def test_f_strings(self):
buf = BytesIO(br"""
t1 = _('foobar')
t2 = _(f'spameggs' f'feast') # should be extracted; constant parts only
t3 = _(f'spameggs {t1}') # should not be extracted
""")
messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
assert len(messages) == 2
assert messages[0][1] == u'foobar'
assert messages[1][1] == u'spameggsfeast'

0 comments on commit f324539

Please sign in to comment.