Skip to content

Commit

Permalink
detect file encoding with chardet before parsing the .py file (#103)
Browse files Browse the repository at this point in the history
  • Loading branch information
Florian Maas authored Sep 14, 2022
1 parent d9aef9c commit 4a90ae1
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 2 deletions.
9 changes: 8 additions & 1 deletion deptry/import_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from pathlib import Path
from typing import List, Union

import chardet

from deptry.notebook_import_extractor import NotebookImportExtractor

RECURSION_TYPES = [ast.If, ast.Try, ast.ExceptHandler, ast.FunctionDef, ast.ClassDef]
Expand Down Expand Up @@ -45,7 +47,7 @@ def get_imported_modules_from_str(self, file_str: str) -> List[str]:
return self._get_import_modules_from(import_nodes)

def _get_imported_modules_from_py(self, path_to_py_file: Path) -> List[str]:
with open(path_to_py_file) as f:
with open(path_to_py_file, encoding=self._get_file_encoding(path_to_py_file)) as f:
root = ast.parse(f.read(), path_to_py_file) # type: ignore
import_nodes = self._get_import_nodes_from(root)
return self._get_import_modules_from(import_nodes)
Expand Down Expand Up @@ -102,3 +104,8 @@ def _filter_exceptions(modules: List[str]):
logging.debug(f"Found module {exception} to be imported, omitting from the list of modules.")
modules = [module for module in modules if not module == exception]
return modules

@staticmethod
def _get_file_encoding(file_name: Union[str, Path]) -> str:
rawdata = open(file_name, "rb").read()
return chardet.detect(rawdata)["encoding"]
14 changes: 13 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ toml = "^0.10.2"
isort = "^5.10.1"
click = "^8.0.0"
importlib-metadata = { version = "*", python = "<=3.7" }
chardet = "^5.0.0"

[tool.poetry.dev-dependencies]
black = "^22.6.0"
Expand Down
50 changes: 50 additions & 0 deletions tests/test_import_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,53 @@ def test_import_parser_ignores_setuptools(tmp_path):
f.write("import setuptools\nimport foo")
imported_modules = ImportParser().get_imported_modules_for_list_of_files(["file.py"])
assert set(imported_modules) == set(["foo"])


def test_import_parser_ignores_setuptools(tmp_path):

with run_within_dir(tmp_path):
with open("file1.py", "w", encoding="utf-8") as f:
f.write(
"""#!/usr/bin/python
# -*- encoding: utf-8 -*-
import foo
print('嘉大')
"""
)
with open("file2.py", "w", encoding="iso-8859-15") as f:
f.write(
"""
#!/usr/bin/python
# -*- encoding: iso-8859-15 -*-
import foo
print('Æ Ç')
"""
)
with open("file3.py", "w", encoding="utf-16") as f:
f.write(
"""#!/usr/bin/python
# -*- encoding: utf-16 -*-
import foo
print('嘉大')
"""
)
with open("file4.py", "w", encoding="cp861") as f:
f.write(
"""#!/usr/bin/python
# -*- encoding: cp861 -*-
import foo
print('foo')
"""
)

imported_modules = ImportParser().get_imported_modules_from_file(Path("file1.py"))
assert set(imported_modules) == set(["foo"])

imported_modules = ImportParser().get_imported_modules_from_file(Path("file2.py"))
assert set(imported_modules) == set(["foo"])

imported_modules = ImportParser().get_imported_modules_from_file(Path("file3.py"))
assert set(imported_modules) == set(["foo"])

imported_modules = ImportParser().get_imported_modules_from_file(Path("file4.py"))
assert set(imported_modules) == set(["foo"])

0 comments on commit 4a90ae1

Please sign in to comment.