Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use ast instead of eval for string extraction #915

Merged
merged 1 commit into from
Nov 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 26 additions & 9 deletions babel/messages/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
:copyright: (c) 2013-2022 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""

import ast
import os
from os.path import relpath
import sys
Expand Down Expand Up @@ -487,14 +487,9 @@ def extract_python(fileobj, keywords, comment_tags, options):
if nested:
funcname = value
elif tok == STRING:
# Unwrap quotes in a safe manner, maintaining the string's
# encoding
# https://sourceforge.net/tracker/?func=detail&atid=355470&
# aid=617979&group_id=5470
code = compile('# coding=%s\n%s' % (str(encoding), value),
'<string>', 'eval', future_flags)
value = eval(code, {'__builtins__': {}}, {})
buf.append(value)
val = _parse_python_string(value, encoding, future_flags)
if val is not None:
buf.append(val)
elif tok == OP and value == ',':
if buf:
messages.append(''.join(buf))
Expand All @@ -516,6 +511,28 @@ def extract_python(fileobj, keywords, comment_tags, options):
funcname = value


def _parse_python_string(value, encoding, future_flags):
# Unwrap quotes in a safe manner, maintaining the string's encoding
# https://sourceforge.net/tracker/?func=detail&atid=355470&aid=617979&group_id=5470
code = compile(
f'# coding={str(encoding)}\n{value}',
'<string>',
'eval',
ast.PyCF_ONLY_AST | future_flags,
)
if isinstance(code, ast.Expression):
body = code.body
if isinstance(body, ast.Str):
return body.s
if isinstance(body, ast.JoinedStr): # f-string
if all(isinstance(node, ast.Str) for node in body.values):
return ''.join(node.s for node in body.values)
if all(isinstance(node, ast.Constant) for node in body.values):
return ''.join(str(node.value) for node in body.values)
# TODO: we could raise an error or warning when not all nodes are constants
return None


def extract_javascript(fileobj, keywords, comment_tags, options):
"""Extract messages from JavaScript source code.

Expand Down
27 changes: 27 additions & 0 deletions tests/messages/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,3 +528,30 @@ def test_future(self):
messages = list(extract.extract('python', buf,
extract.DEFAULT_KEYWORDS, [], {}))
assert messages[0][1] == u'\xa0'

def test_f_strings(self):
buf = BytesIO(br"""
t1 = _('foobar')
t2 = _(f'spameggs' f'feast') # should be extracted; constant parts only
t2 = _(f'spameggs' 'kerroshampurilainen') # should be extracted (mixing f with no f)
t3 = _(f'''whoa! a ''' # should be extracted (continues on following lines)
f'flying shark'
'... hello'
)
t4 = _(f'spameggs {t1}') # should not be extracted
""")
messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
assert len(messages) == 4
assert messages[0][1] == u'foobar'
assert messages[1][1] == u'spameggsfeast'
assert messages[2][1] == u'spameggskerroshampurilainen'
assert messages[3][1] == u'whoa! a flying shark... hello'

def test_f_strings_non_utf8(self):
buf = BytesIO(b"""
# -- coding: latin-1 --
t2 = _(f'\xe5\xe4\xf6' f'\xc5\xc4\xd6')
""")
messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
assert len(messages) == 1
assert messages[0][1] == u'åäöÅÄÖ'