diff --git a/README.md b/README.md index 61492de..ed2e29b 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ The main supports the following additional arguments: - `--track_import_stack`: Store the stack trace of imports belonging to the tracked module - `--detect_transitive`: Mark each dependency as either "direct" (imported directly) or "transitive" (inherited from a direct import) - `--full_depth`: Track all dependencies, including transitive dependencies of direct third-party deps +- `--show_optional`: Show whether each dependency is optional or required ## Integrating `import_tracker` into a project diff --git a/import_tracker/__main__.py b/import_tracker/__main__.py index 0de6213..d2ba997 100644 --- a/import_tracker/__main__.py +++ b/import_tracker/__main__.py @@ -78,6 +78,13 @@ def main(): default=False, help="Detect whether each dependency is 'direct' or 'transitive'", ) + parser.add_argument( + "--show_optional", + "-o", + action="store_true", + default=False, + help="Show whether each dependency is optional or required", + ) parser.add_argument( "--log_level", "-l", @@ -109,6 +116,7 @@ def main(): track_import_stack=args.track_import_stack, full_depth=args.full_depth, detect_transitive=args.detect_transitive, + show_optional=args.show_optional, ), indent=args.indent, ) diff --git a/import_tracker/constants.py b/import_tracker/constants.py index 040d895..c3eda14 100644 --- a/import_tracker/constants.py +++ b/import_tracker/constants.py @@ -11,3 +11,8 @@ # Labels for direct vs transitive dependencies TYPE_DIRECT = "direct" TYPE_TRANSITIVE = "transitive" + +# Info section headers +INFO_TYPE = "type" +INFO_STACK = "stack" +INFO_OPTIONAL = "optional" diff --git a/import_tracker/import_tracker.py b/import_tracker/import_tracker.py index dbce0d9..88008ae 100644 --- a/import_tracker/import_tracker.py +++ b/import_tracker/import_tracker.py @@ -4,14 +4,14 @@ """ # Standard from types import ModuleType -from typing import Any, Dict, Iterable, List, Optional, Set, Union +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union import dis import importlib import os import sys # Local -from .constants import THIS_PACKAGE, TYPE_DIRECT, TYPE_TRANSITIVE +from . import constants from .log import log ## Public ###################################################################### @@ -24,6 +24,7 @@ def track_module( track_import_stack: bool = False, full_depth: bool = False, detect_transitive: bool = False, + show_optional: bool = False, ) -> Union[Dict[str, List[str]], Dict[str, Dict[str, Any]]]: """Track the dependencies of a single python module @@ -46,6 +47,9 @@ def track_module( library. detect_transitive: bool Detect whether each dependency is 'direct' or 'transitive' + show_optional: bool + Show whether each requirement is optional (behind a try/except) or + not Returns: import_mapping: Union[Dict[str, List[str]], Dict[str, Dict[str, Any]]] @@ -72,8 +76,10 @@ def track_module( for module_to_check in modules_to_check: # Figure out all direct imports from this module - module_imports = _get_imports(module_to_check) - module_import_names = {mod.__name__ for mod in module_imports} + req_imports, opt_imports = _get_imports(module_to_check) + opt_dep_names = {mod.__name__ for mod in opt_imports} + all_imports = req_imports.union(opt_imports) + module_import_names = {mod.__name__ for mod in all_imports} log.debug3( "Full import names for [%s]: %s", module_to_check.__name__, @@ -84,10 +90,14 @@ def track_module( non_std_module_names = _get_non_std_modules(module_import_names) log.debug3("Non std module names: %s", non_std_module_names) non_std_module_imports = [ - mod for mod in module_imports if mod.__name__ in non_std_module_names + mod for mod in all_imports if mod.__name__ in non_std_module_names ] - module_deps_map[module_to_check.__name__] = non_std_module_names + # Set the deps for this module as a mapping from each dep to its + # optional status + module_deps_map[module_to_check.__name__] = { + mod: mod in opt_dep_names for mod in non_std_module_names + } log.debug2( "Deps for [%s] -> %s", module_to_check.__name__, @@ -164,11 +174,11 @@ def track_module( } log.debug("Raw output deps map: %s", flattened_deps) - # If not detecting transitive or import stacks, the values are simple lists - # of dependency names - if not detect_transitive and not track_import_stack: + # If not displaying any of the extra info, the values are simple lists of + # dependency names + if not any([detect_transitive, track_import_stack, show_optional]): deps_out = { - mod: list(sorted(deps.keys())) for mod, deps in flattened_deps.items() + mod: list(sorted(deps.keys())) for mod, (deps, _) in flattened_deps.items() } # Otherwise, the values will be dicts with some combination of "type" and @@ -179,22 +189,32 @@ def track_module( # If detecting transitive deps, look through the stacks and mark each dep as # transitive or direct if detect_transitive: - for mod, deps in flattened_deps.items(): + for mod, (deps, _) in flattened_deps.items(): for dep_name, dep_stacks in deps.items(): - deps_out.setdefault(mod, {}).setdefault(dep_name, {})["type"] = ( - TYPE_DIRECT + deps_out.setdefault(mod, {}).setdefault(dep_name, {})[ + constants.INFO_TYPE + ] = ( + constants.TYPE_DIRECT if any(len(dep_stack) == 1 for dep_stack in dep_stacks) - else TYPE_TRANSITIVE + else constants.TYPE_TRANSITIVE ) # If tracking import stacks, move them to the "stack" key in the output if track_import_stack: - for mod, deps in flattened_deps.items(): + for mod, (deps, _) in flattened_deps.items(): for dep_name, dep_stacks in deps.items(): deps_out.setdefault(mod, {}).setdefault(dep_name, {})[ - "stack" + constants.INFO_STACK ] = dep_stacks + # If showing optional, add the optional status of each dependency + if show_optional: + for mod, (deps, optional_mapping) in flattened_deps.items(): + for dep_name, dep_stacks in deps.items(): + deps_out.setdefault(mod, {}).setdefault(dep_name, {})[ + constants.INFO_OPTIONAL + ] = optional_mapping.get(dep_name, False) + log.debug("Final output: %s", deps_out) return deps_out @@ -280,7 +300,7 @@ def _is_third_party(mod_name: str) -> bool: mod_name not in sys.modules or _get_import_parent_path(mod_name) not in [_std_lib_dir, _std_dylib_dir] ) - and mod_pkg != THIS_PACKAGE + and mod_pkg != constants.THIS_PACKAGE and mod_pkg not in _known_std_pkgs ) @@ -299,6 +319,21 @@ def _get_value_col(dis_line: str) -> str: return "" +def _get_op_number(dis_line: str) -> Optional[int]: + """Get the opcode number out of the line of `dis` output""" + line_parts = dis_line.split() + if not line_parts: + return None + opcode_idx = min([i for i, val in enumerate(line_parts) if val.isupper()]) + assert opcode_idx > 0, f"Opcode found at the beginning of line! [{dis_line}]" + return int(line_parts[opcode_idx - 1]) + + +def _get_try_end_number(dis_line: str) -> int: + """For a SETUP_FINALLY/SETUP_EXPECT line, extract the target end line""" + return int(_get_value_col(dis_line).split()[-1]) + + def _figure_out_import( mod: ModuleType, dots: Optional[int], @@ -355,12 +390,13 @@ def _figure_out_import( return sys.modules.get(import_name) -def _get_imports(mod: ModuleType) -> Set[ModuleType]: - """Get the list of import string from a module by parsing the module's - bytecode +def _get_imports(mod: ModuleType) -> Tuple[Set[ModuleType], Set[ModuleType]]: + """Get the sets of required and optional imports for the given module by + parsing its bytecode """ log.debug2("Getting imports for %s", mod.__name__) - all_imports = set() + req_imports = set() + opt_imports = set() # Attempt to disassemble the byte code for this module. If the module has no # code, we ignore it since it's most likely a c extension @@ -369,10 +405,10 @@ def _get_imports(mod: ModuleType) -> Set[ModuleType]: mod_code = loader.get_code(mod.__name__) except (AttributeError, ImportError): log.warning("Couldn't find a loader for %s!", mod.__name__) - return all_imports + return req_imports, opt_imports if mod_code is None: log.debug2("No code object found for %s", mod.__name__) - return all_imports + return req_imports, opt_imports bcode = dis.Bytecode(mod_code) # Parse all bytecode lines @@ -380,10 +416,19 @@ def _get_imports(mod: ModuleType) -> Set[ModuleType]: current_import_name = None current_import_from = None open_import = False + open_tries = set() log.debug4("Byte Code:") for line in bcode.dis().split("\n"): log.debug4(line) line_val = _get_value_col(line) + + # Check whether this line ends a try + op_num = _get_op_number(line) + if op_num in open_tries: + open_tries.remove(op_num) + log.debug3("Closed try %d. Remaining open tries: %s", op_num, open_tries) + + # Parse the individual ops if "LOAD_CONST" in line: if line_val.isnumeric(): current_dots = int(line_val) @@ -394,6 +439,13 @@ def _get_imports(mod: ModuleType) -> Set[ModuleType]: open_import = True current_import_from = line_val else: + # If this is a SETUP_FINALLY (try:), increment the number of try + # blocks open + if "SETUP_FINALLY" in line or "SETUP_EXCEPT" in line: + # Get the end target for this try + open_tries.add(_get_try_end_number(line)) + log.debug3("Open tries: %s", open_tries) + # This closes an import, so figure out what the module is that is # being imported! if open_import: @@ -402,7 +454,15 @@ def _get_imports(mod: ModuleType) -> Set[ModuleType]: ) if import_mod is not None: log.debug2("Adding import module [%s]", import_mod.__name__) - all_imports.add(import_mod) + if open_tries: + log.debug( + "Found optional dependency of [%s]: %s", + mod.__name__, + import_mod.__name__, + ) + opt_imports.add(import_mod) + else: + req_imports.add(import_mod) # If this is a STORE_NAME, subsequent "from" statements may use the # same dots and name @@ -422,7 +482,7 @@ def _get_imports(mod: ModuleType) -> Set[ModuleType]: current_import_from, ) - return all_imports + return req_imports, opt_imports def _find_parent_direct_deps( @@ -444,14 +504,16 @@ def _find_parent_direct_deps( for i in range(1, len(mod_name_parts)): parent_mod_name = ".".join(mod_name_parts[:i]) parent_deps = module_deps_map.get(parent_mod_name, {}) - for dep in parent_deps: - if not dep.startswith(mod_base_name) and dep not in mod_deps: + for dep, parent_dep_opt in parent_deps.items(): + currently_optional = mod_deps.get(dep, True) + if not dep.startswith(mod_base_name) and currently_optional: log.debug3( - "Adding direct-dependency of parent mod [%s]: %s", + "Adding direct-dependency of parent mod [%s] to [%s]: %s", parent_mod_name, + mod_name, dep, ) - mod_deps.add(dep) + mod_deps[dep] = currently_optional and parent_dep_opt parent_direct_deps.setdefault(mod_name, {}).setdefault( parent_mod_name, set() ).add(dep) @@ -463,7 +525,7 @@ def _flatten_deps( module_name: str, module_deps_map: Dict[str, List[str]], parent_direct_deps: Dict[str, Dict[str, List[str]]], -) -> Dict[str, List[str]]: +) -> Tuple[Dict[str, List[str]], Dict[str, bool]]: """Flatten the names of all modules that the target module depends on""" # Look through all modules that are directly required by this target module. @@ -521,16 +583,46 @@ def _flatten_deps( # Create the flattened dependencies with the source lists for each mod_base_name = module_name.partition(".")[0] flat_base_deps = {} + optional_deps_map = {} for dep, dep_sources in all_deps.items(): if not dep.startswith(mod_base_name): # Truncate the dep_sources entries and trim to avoid duplicates dep_root_mod_name = dep.partition(".")[0] flat_dep_sources = flat_base_deps.setdefault(dep_root_mod_name, []) + opt_dep_values = optional_deps_map.setdefault(dep_root_mod_name, []) for dep_source in dep_sources: log.debug4("Considering dep source list for %s: %s", dep, dep_source) + + # If any link in the dep_source is optional, the whole + # dep_source should be considered optional + is_optional = False + for parent_idx, dep_mod in enumerate(dep_source[1:] + [dep]): + dep_parent = dep_source[parent_idx] + log.debug4( + "Checking whether [%s -> %s] is optional (dep=%s)", + dep_parent, + dep_mod, + dep_root_mod_name, + ) + if module_deps_map.get(dep_parent, {}).get(dep_mod, False): + log.debug4("Found optional link %s -> %s", dep_parent, dep_mod) + is_optional = True + break + opt_dep_values.append( + [ + is_optional, + dep_source, + ] + ) + flat_dep_source = dep_source if dep_root_mod_name in dep_source: flat_dep_source = dep_source[: dep_source.index(dep_root_mod_name)] if flat_dep_source not in flat_dep_sources: flat_dep_sources.append(flat_dep_source) - return flat_base_deps + log.debug3("Optional deps map for [%s]: %s", module_name, optional_deps_map) + optional_deps_map = { + mod: all([opt_val[0] for opt_val in opt_vals]) + for mod, opt_vals in optional_deps_map.items() + } + return flat_base_deps, optional_deps_map diff --git a/import_tracker/setup_tools.py b/import_tracker/setup_tools.py index c641438..d6ebcca 100644 --- a/import_tracker/setup_tools.py +++ b/import_tracker/setup_tools.py @@ -11,7 +11,7 @@ import sys # Local -from .constants import TYPE_DIRECT +from .constants import INFO_OPTIONAL from .import_tracker import track_module from .log import log @@ -23,6 +23,7 @@ def parse_requirements( library_name: str, extras_modules: Optional[List[str]] = None, full_depth: bool = True, + keep_optional: Union[bool, Dict[str, List[str]]] = False, **kwargs, ) -> Tuple[List[str], Dict[str, List[str]]]: """This helper uses the lists of required modules and parameters for the @@ -41,6 +42,13 @@ def parse_requirements( Passthrough to track_module. The default here is switched to True so that modules which are both direct and transitive dependencies of the library are correctly allocated. + keep_optional: Union[bool, Dict[str, List[str]]] + Indicate which optional dependencies should be kept when computing + the extras sets. If True, all optional dependencies will be kept. If + False, none will be kept. Otherwise, the argument should be a dict + mapping known optional dependencies of specific modules that should + be kept and all optional dependencies not represented in the dict + will be dropped. **kwargs: Additional keyword arguments to pass through to track_module @@ -74,10 +82,28 @@ def parse_requirements( submodules=True, detect_transitive=True, full_depth=full_depth, + show_optional=True, **kwargs, ) log.debug4("Library Import Mapping:\n%s", library_import_mapping) + # Remove any unwanted optional imports + if keep_optional is not True: + keep_optional = keep_optional or {} + log.debug2("Trimming optional deps (keep: %s)", keep_optional) + library_import_mapping = { + mod_name: { + dep_name: dep_info + for dep_name, dep_info in deps_info.items() + if ( + not dep_info[INFO_OPTIONAL] + or dep_name in keep_optional.get(mod_name, []) + ) + } + for mod_name, deps_info in library_import_mapping.items() + } + log.debug4("Trimmed Import Mapping:\n%s", library_import_mapping) + # If no extras_modules are given, track them all if not extras_modules: extras_modules = list(library_import_mapping.keys()) diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index 4d8e07b..f5ddd99 100755 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -17,7 +17,7 @@ then procs_arg="-n $procs" else echo "Running tests in serial" - procs_arg="" + procs_arg="--log-cli-level DEBUG4" fi FAIL_THRESH=100.0 diff --git a/test/sample_libs/optional_deps/__init__.py b/test/sample_libs/optional_deps/__init__.py new file mode 100644 index 0000000..01d12b0 --- /dev/null +++ b/test/sample_libs/optional_deps/__init__.py @@ -0,0 +1,14 @@ +""" +This sample library has two dependencies: alog and yaml. The alog dependency is +held as optional in optional_deps.opt and as non-optional in +optional_deps.not_opt. The yaml dependency is held as optional in +optional_deps.not_opt, but it imported _directly_ in the root of optional_deps. +The resulting tracking should indicate that yaml is not optional everywhere +while alog is optional in opt and nowhere else. +""" + +# Third Party +import yaml + +# Local +from . import not_opt, opt diff --git a/test/sample_libs/optional_deps/not_opt.py b/test/sample_libs/optional_deps/not_opt.py new file mode 100644 index 0000000..2d13780 --- /dev/null +++ b/test/sample_libs/optional_deps/not_opt.py @@ -0,0 +1,3 @@ +# Import alog not optionally! +# First Party +import alog diff --git a/test/sample_libs/optional_deps/opt.py b/test/sample_libs/optional_deps/opt.py new file mode 100644 index 0000000..58c592b --- /dev/null +++ b/test/sample_libs/optional_deps/opt.py @@ -0,0 +1,27 @@ +# Standard +import sys + +try: + # First Party + import alog + + print("imported alog!") +except ImportError: + print("Can't import alog") +except: + print("Double except, just to be sure!") +finally: + HAVE_ALOG = "alog" in sys.modules + + +try: + # Third Party + import yaml +finally: + HAVE_YAML = "yaml" in sys.modules + + +# Third Party +# Import a non-optional dependency here to ensure that try blocks are closed +# correctly on all versions of python +import google.protobuf diff --git a/test/sample_libs/optional_deps_upstream/__init__.py b/test/sample_libs/optional_deps_upstream/__init__.py new file mode 100644 index 0000000..25ee526 --- /dev/null +++ b/test/sample_libs/optional_deps_upstream/__init__.py @@ -0,0 +1,12 @@ +""" +This sample library includes a "third party" library as optional which itself +includes a different "third party" library as non-optional. The transitive +third party should also be considered optional since the interim link in the +import chain is optional. +""" + +try: + # Third Party + import single_extra +except ImportError: + print("nothing to see here!") diff --git a/test/test_import_tracker.py b/test/test_import_tracker.py index 24dbe42..7bf88f6 100644 --- a/test/test_import_tracker.py +++ b/test/test_import_tracker.py @@ -351,6 +351,45 @@ def test_deep_siblings(): } +def test_optional_deps(): + """Make sure that optional deps are correctly tracked when try/except is + used + """ + assert track_module("optional_deps", submodules=True, show_optional=True) == { + "optional_deps.not_opt": { + "yaml": {"optional": False}, + "alog": {"optional": False}, + }, + "optional_deps": { + "yaml": {"optional": False}, + "alog": {"optional": False}, + "google": {"optional": False}, + }, + "optional_deps.opt": { + "yaml": {"optional": False}, + "alog": {"optional": True}, + "google": {"optional": False}, + }, + } + + +def test_updatream_optional_deps(): + """Make sure that a module which holds a third-party dep as optional where + that third-party dep includes _other_ third-party deps as non-optional + should have the transitive deps held as optional due to the optional dep in + the transitive chain. + """ + assert track_module( + "optional_deps_upstream", full_depth=True, show_optional=True + ) == { + "optional_deps_upstream": { + "yaml": {"optional": True}, + "alog": {"optional": True}, + "single_extra": {"optional": True}, + }, + } + + ## Details ##################################################################### @@ -359,7 +398,7 @@ def test_get_imports_no_bytecode(): bytecode to ensure that they doesn't explode! """ new_mod = ModuleType("new_mod") - assert _get_imports(new_mod) == set() + assert _get_imports(new_mod) == (set(), set()) assert not _mod_defined_in_init_file(new_mod) diff --git a/test/test_setup_tools.py b/test/test_setup_tools.py index 1081bb1..2be2e61 100644 --- a/test/test_setup_tools.py +++ b/test/test_setup_tools.py @@ -235,3 +235,65 @@ def test_full_depth_direct_and_transitive(): "full_depth_direct_and_transitive.foo": [], "full_depth_direct_and_transitive.bar": ["single_extra"], } + + +def test_setup_tools_keep_optionals(): + """Make sure that the semantics of keep_optionals work as expected for all + valid inputs to keep_optionals + """ + # Without keep_optionals, optional_deps.opt should not depend on alog + requirements, extras_require = parse_requirements( + ["alchemy-logging", "PyYaml"], + "optional_deps", + ["optional_deps.opt", "optional_deps.not_opt"], + ) + assert requirements == ["PyYaml"] + assert extras_require == { + "all": sorted(["alchemy-logging", "PyYaml"]), + "optional_deps.opt": [], + "optional_deps.not_opt": ["alchemy-logging"], + } + + # With keep_optionals=True, optional_deps.opt should depend on alog + requirements, extras_require = parse_requirements( + ["alchemy-logging", "PyYaml"], + "optional_deps", + ["optional_deps.opt", "optional_deps.not_opt"], + keep_optional=True, + ) + assert requirements == sorted(["alchemy-logging", "PyYaml"]) + assert extras_require == { + "all": sorted(["alchemy-logging", "PyYaml"]), + "optional_deps.opt": [], + "optional_deps.not_opt": [], + } + + # With keep_optionals={"optional_deps.opt": ["alog"]}, optional_deps.opt + # should depend on alog + requirements, extras_require = parse_requirements( + ["alchemy-logging", "PyYaml"], + "optional_deps", + ["optional_deps.opt", "optional_deps.not_opt"], + keep_optional={"optional_deps.opt": ["alog"]}, + ) + assert requirements == sorted(["alchemy-logging", "PyYaml"]) + assert extras_require == { + "all": sorted(["alchemy-logging", "PyYaml"]), + "optional_deps.opt": [], + "optional_deps.not_opt": [], + } + + # With keep_optionals={"optional_deps.opt": ["something_else"]}, + # optional_deps.opt should depend on alog + requirements, extras_require = parse_requirements( + ["alchemy-logging", "PyYaml"], + "optional_deps", + ["optional_deps.opt", "optional_deps.not_opt"], + keep_optional={"optional_deps.opt": ["something_else"]}, + ) + assert requirements == sorted(["PyYaml"]) + assert extras_require == { + "all": sorted(["alchemy-logging", "PyYaml"]), + "optional_deps.opt": [], + "optional_deps.not_opt": ["alchemy-logging"], + }