diff --git a/deptry/import_parser.py b/deptry/import_parser.py index 363b8c79..b0969dec 100644 --- a/deptry/import_parser.py +++ b/deptry/import_parser.py @@ -3,6 +3,8 @@ from pathlib import Path from typing import List, Union +import chardet + from deptry.notebook_import_extractor import NotebookImportExtractor RECURSION_TYPES = [ast.If, ast.Try, ast.ExceptHandler, ast.FunctionDef, ast.ClassDef] @@ -45,7 +47,7 @@ def get_imported_modules_from_str(self, file_str: str) -> List[str]: return self._get_import_modules_from(import_nodes) def _get_imported_modules_from_py(self, path_to_py_file: Path) -> List[str]: - with open(path_to_py_file) as f: + with open(path_to_py_file, encoding=self._get_file_encoding(path_to_py_file)) as f: root = ast.parse(f.read(), path_to_py_file) # type: ignore import_nodes = self._get_import_nodes_from(root) return self._get_import_modules_from(import_nodes) @@ -102,3 +104,8 @@ def _filter_exceptions(modules: List[str]): logging.debug(f"Found module {exception} to be imported, omitting from the list of modules.") modules = [module for module in modules if not module == exception] return modules + + @staticmethod + def _get_file_encoding(file_name: Union[str, Path]) -> str: + rawdata = open(file_name, "rb").read() + return chardet.detect(rawdata)["encoding"] diff --git a/poetry.lock b/poetry.lock index c2b6c919..d4705ad0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -162,6 +162,14 @@ python-versions = "*" [package.dependencies] pycparser = "*" +[[package]] +name = "chardet" +version = "5.0.0" +description = "Universal encoding detector for Python 3" +category = "main" +optional = false +python-versions = ">=3.6" + [[package]] name = "charset-normalizer" version = "2.1.1" @@ -1285,7 +1293,7 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>= [metadata] lock-version = "1.1" python-versions = ">=3.7,<4.0" -content-hash = "a015e2e9c0e165dc8fd9082e72ee26bc4fb8bcb28050383a63541f0a65f79972" +content-hash = "c0f6c6483f7562d5459c47d367fcc0b078dd66e210089eef52330f2dba715887" [metadata.files] anyio = [ @@ -1438,6 +1446,10 @@ cffi = [ {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"}, {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"}, ] +chardet = [ + {file = "chardet-5.0.0-py3-none-any.whl", hash = "sha256:d3e64f022d254183001eccc5db4040520c0f23b1a3f33d6413e099eb7f126557"}, + {file = "chardet-5.0.0.tar.gz", hash = "sha256:0368df2bfd78b5fc20572bb4e9bb7fb53e2c094f60ae9993339e8671d0afb8aa"}, +] charset-normalizer = [ {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, diff --git a/pyproject.toml b/pyproject.toml index 4592861b..7693f287 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ toml = "^0.10.2" isort = "^5.10.1" click = "^8.0.0" importlib-metadata = { version = "*", python = "<=3.7" } +chardet = "^5.0.0" [tool.poetry.dev-dependencies] black = "^22.6.0" diff --git a/tests/test_import_parser.py b/tests/test_import_parser.py index d05ba322..aaf9a8ca 100644 --- a/tests/test_import_parser.py +++ b/tests/test_import_parser.py @@ -83,3 +83,53 @@ def test_import_parser_ignores_setuptools(tmp_path): f.write("import setuptools\nimport foo") imported_modules = ImportParser().get_imported_modules_for_list_of_files(["file.py"]) assert set(imported_modules) == set(["foo"]) + + +def test_import_parser_ignores_setuptools(tmp_path): + + with run_within_dir(tmp_path): + with open("file1.py", "w", encoding="utf-8") as f: + f.write( + """#!/usr/bin/python +# -*- encoding: utf-8 -*- +import foo +print('嘉大') +""" + ) + with open("file2.py", "w", encoding="iso-8859-15") as f: + f.write( + """ +#!/usr/bin/python +# -*- encoding: iso-8859-15 -*- +import foo +print('Æ Ç') +""" + ) + with open("file3.py", "w", encoding="utf-16") as f: + f.write( + """#!/usr/bin/python +# -*- encoding: utf-16 -*- +import foo +print('嘉大') +""" + ) + with open("file4.py", "w", encoding="cp861") as f: + f.write( + """#!/usr/bin/python +# -*- encoding: cp861 -*- +import foo +print('foo') +""" + ) + + imported_modules = ImportParser().get_imported_modules_from_file(Path("file1.py")) + assert set(imported_modules) == set(["foo"]) + + imported_modules = ImportParser().get_imported_modules_from_file(Path("file2.py")) + assert set(imported_modules) == set(["foo"]) + + imported_modules = ImportParser().get_imported_modules_from_file(Path("file3.py")) + assert set(imported_modules) == set(["foo"]) + + imported_modules = ImportParser().get_imported_modules_from_file(Path("file4.py")) + assert set(imported_modules) == set(["foo"])