diff --git a/dvc/api/show.py b/dvc/api/show.py index 973812bf20..3323fff6b7 100644 --- a/dvc/api/show.py +++ b/dvc/api/show.py @@ -7,10 +7,6 @@ from dvc.repo import Repo -def _onerror_raise(exception: Exception, *args, **kwargs): - raise exception - - def _postprocess(results): processed: Dict[str, Dict] = {} for rev, rev_data in results.items(): @@ -23,7 +19,6 @@ def _postprocess(results): for file_data in rev_data["data"].values(): for k in file_data["data"]: counts[k] += 1 - for file_name, file_data in rev_data["data"].items(): to_merge = { (k if counts[k] == 1 else f"{file_name}:{k}"): v @@ -138,13 +133,17 @@ def metrics_show( .. _Git revision: https://git-scm.com/docs/revisions """ + from dvc.repo.metrics.show import to_relpath with Repo.open(repo, config=config) as _repo: metrics = _repo.metrics.show( targets=targets, revs=rev if rev is None else [rev], - onerror=_onerror_raise, + on_error="raise", ) + metrics = { + k: to_relpath(_repo.fs, _repo.root_dir, v) for k, v in metrics.items() + } metrics = _postprocess(metrics) @@ -382,6 +381,8 @@ def params_show( https://git-scm.com/docs/revisions """ + from dvc.repo.metrics.show import to_relpath + if isinstance(stages, str): stages = [stages] @@ -389,10 +390,11 @@ def params_show( params = _repo.params.show( revs=rev if rev is None else [rev], targets=targets, - deps=deps, - onerror=_onerror_raise, + deps_only=deps, + on_error="raise", stages=stages, ) + params = {k: to_relpath(_repo.fs, _repo.root_dir, v) for k, v in params.items()} params = _postprocess(params) diff --git a/dvc/commands/metrics.py b/dvc/commands/metrics.py index ed46bef3f6..ba7a0c7da0 100644 --- a/dvc/commands/metrics.py +++ b/dvc/commands/metrics.py @@ -4,7 +4,6 @@ from dvc.cli import completion from dvc.cli.command import CmdBase from dvc.cli.utils import append_doc_link, fix_subparsers -from dvc.exceptions import DvcException from dvc.ui import ui from dvc.utils.serialize import encode_exception @@ -20,17 +19,25 @@ class CmdMetricsBase(CmdBase): class CmdMetricsShow(CmdMetricsBase): def run(self): - try: - metrics = self.repo.metrics.show( - self.args.targets, - all_branches=self.args.all_branches, - all_tags=self.args.all_tags, - all_commits=self.args.all_commits, - recursive=self.args.recursive, + from dvc.repo.metrics.show import to_relpath + from dvc.utils import errored_revisions + + metrics = self.repo.metrics.show( + self.args.targets, + all_branches=self.args.all_branches, + all_tags=self.args.all_tags, + all_commits=self.args.all_commits, + ) + metrics = { + k: to_relpath(self.repo.fs, self.repo.root_dir, v) + for k, v in metrics.items() + } + + if errored := errored_revisions(metrics): + ui.error_write( + "DVC failed to load some metrics for following revisions:" + f" '{', '.join(errored)}'." ) - except DvcException: - logger.exception("") - return 1 if self.args.json: ui.write_json(metrics, default=encode_exception) @@ -52,17 +59,26 @@ def run(self): class CmdMetricsDiff(CmdMetricsBase): def run(self): - try: - diff = self.repo.metrics.diff( - a_rev=self.args.a_rev, - b_rev=self.args.b_rev, - targets=self.args.targets, - recursive=self.args.recursive, - all=self.args.all, + import os + from os.path import relpath + + diff_result = self.repo.metrics.diff( + a_rev=self.args.a_rev, + b_rev=self.args.b_rev, + targets=self.args.targets, + all=self.args.all, + ) + + errored = [rev for rev, err in diff_result.get("errors", {}).items() if err] + if errored: + ui.error_write( + "DVC failed to load some metrics for following revisions:" + f" '{', '.join(errored)}'." ) - except DvcException: - logger.exception("failed to show metrics diff") - return 1 + + start = relpath(os.getcwd(), self.repo.root_dir) + diff = diff_result.get("diff", {}) + diff = {relpath(path, start): result for path, result in diff.items()} if self.args.json: ui.write_json(diff) @@ -184,10 +200,14 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) metrics_diff_parser.add_argument( - "a_rev", nargs="?", help="Old Git commit to compare (defaults to HEAD)" + "a_rev", + nargs="?", + help="Old Git commit to compare (defaults to HEAD)", + default="HEAD", ) metrics_diff_parser.add_argument( "b_rev", + default="workspace", nargs="?", help="New Git commit to compare (defaults to the current workspace)", ) diff --git a/dvc/commands/params.py b/dvc/commands/params.py index 41b66a0a0f..e972925427 100644 --- a/dvc/commands/params.py +++ b/dvc/commands/params.py @@ -4,7 +4,6 @@ from dvc.cli import completion from dvc.cli.command import CmdBase from dvc.cli.utils import append_doc_link, fix_subparsers -from dvc.exceptions import DvcException from dvc.ui import ui logger = logging.getLogger(__name__) @@ -14,17 +13,27 @@ class CmdParamsDiff(CmdBase): UNINITIALIZED = True def run(self): - try: - diff = self.repo.params.diff( - a_rev=self.args.a_rev, - b_rev=self.args.b_rev, - targets=self.args.targets, - all=self.args.all, - deps=self.args.deps, + import os + from os.path import relpath + + diff_result = self.repo.params.diff( + a_rev=self.args.a_rev, + b_rev=self.args.b_rev, + targets=self.args.targets, + all=self.args.all, + deps_only=self.args.deps, + ) + + errored = [rev for rev, err in diff_result.get("errors", {}).items() if err] + if errored: + ui.error_write( + "DVC failed to load some metrics for following revisions:" + f" '{', '.join(errored)}'." ) - except DvcException: - logger.exception("failed to show params diff") - return 1 + + start = relpath(os.getcwd(), self.repo.root_dir) + diff = diff_result.get("diff", {}) + diff = {relpath(path, start): result for path, result in diff.items()} if self.args.json: ui.write_json(diff) @@ -74,10 +83,14 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) params_diff_parser.add_argument( - "a_rev", nargs="?", help="Old Git commit to compare (defaults to HEAD)" + "a_rev", + nargs="?", + default="HEAD", + help="Old Git commit to compare (defaults to HEAD)", ) params_diff_parser.add_argument( "b_rev", + default="workspace", nargs="?", help="New Git commit to compare (defaults to the current workspace)", ) diff --git a/dvc/dependency/__init__.py b/dvc/dependency/__init__.py index 2904da8a8a..ce800be7bf 100644 --- a/dvc/dependency/__init__.py +++ b/dvc/dependency/__init__.py @@ -1,5 +1,5 @@ from collections import defaultdict -from typing import Any, Mapping, Set +from typing import Any, Dict, List, Mapping, Set from dvc.output import ARTIFACT_SCHEMA, DIR_FILES_SCHEMA, Output @@ -58,7 +58,7 @@ def loads_from(stage, s_list, erepo=None, fs_config=None): ] -def _merge_params(s_list): +def _merge_params(s_list) -> Dict[str, List[str]]: d = defaultdict(list) default_file = ParamsDependency.DEFAULT_PARAMS_FILE diff --git a/dvc/dependency/param.py b/dvc/dependency/param.py index 4116902862..d38f6b60c9 100644 --- a/dvc/dependency/param.py +++ b/dvc/dependency/param.py @@ -2,7 +2,7 @@ import os import typing from collections import defaultdict -from typing import Any, Dict +from typing import TYPE_CHECKING, Any, Dict, List, Optional import dpath @@ -12,6 +12,9 @@ from .base import Dependency +if TYPE_CHECKING: + from dvc.fs import FileSystem + logger = logging.getLogger(__name__) @@ -31,6 +34,36 @@ class BadParamFileError(DvcException): pass +def read_param_file( + fs: "FileSystem", + path: str, + key_paths: Optional[List[str]] = None, + flatten: bool = False, +) -> Any: + config = load_path(path, fs) + if not key_paths: + return config + + ret = {} + if flatten: + for key_path in key_paths: + try: + ret[key_path] = dpath.get(config, key_path, separator=".") + except KeyError: + continue + return ret + + from dpath import merge + + for key_path in key_paths: + merge( + ret, + dpath.search(config, key_path, separator="."), + separator=".", + ) + return ret + + class ParamsDependency(Dependency): PARAM_PARAMS = "params" DEFAULT_PARAMS_FILE = "params.yaml" @@ -75,31 +108,19 @@ def read_params( self, flatten: bool = True, **kwargs: typing.Any ) -> Dict[str, typing.Any]: try: - config = self.read_file() + self.validate_filepath() except MissingParamsFile: - config = {} - - if not self.params: - return config - - ret = {} - if flatten: - for param in self.params: - try: - ret[param] = dpath.get(config, param, separator=".") - except KeyError: - continue - return ret + return {} - from dpath import merge - - for param in self.params: - merge( - ret, - dpath.search(config, param, separator="."), - separator=".", + try: + return read_param_file( + self.repo.fs, + self.fs_path, + list(self.params) if self.params else None, + flatten=flatten, ) - return ret + except ParseError as exc: + raise BadParamFileError(f"Unable to read parameters from '{self}'") from exc def workspace_status(self): if not self.exists: @@ -149,13 +170,6 @@ def validate_filepath(self): f"'{self}' is a directory, expected a parameters file" ) - def read_file(self): - self.validate_filepath() - try: - return load_path(self.fs_path, self.repo.fs) - except ParseError as exc: - raise BadParamFileError(f"Unable to read parameters from '{self}'") from exc - def get_hash(self): info = self.read_params() diff --git a/dvc/repo/experiments/executor/base.py b/dvc/repo/experiments/executor/base.py index 0bf02c076a..28128e53d4 100644 --- a/dvc/repo/experiments/executor/base.py +++ b/dvc/repo/experiments/executor/base.py @@ -23,7 +23,6 @@ Union, ) -from funcy import get_in from scmrepo.exceptions import SCMError from dvc.env import DVC_EXP_AUTO_PUSH, DVC_EXP_GIT_REMOTE @@ -644,13 +643,15 @@ def _repro_dvc( # noqa: C901 info.status = TaskStatus.FAILED raise finally: + from dvc.repo.metrics.show import _gather_metrics + post_live_metrics( "done", info.baseline_rev, info.name, # type: ignore[arg-type] "dvc", experiment_rev=dvc.experiments.scm.get_ref(EXEC_BRANCH), - metrics=get_in(dvc.metrics.show(), ["", "data"]), + metrics=_gather_metrics(dvc, on_error="return"), dvc_studio_config=dvc_studio_config, ) diff --git a/dvc/repo/experiments/serialize.py b/dvc/repo/experiments/serialize.py index 6f3cb854a4..b853e6042c 100644 --- a/dvc/repo/experiments/serialize.py +++ b/dvc/repo/experiments/serialize.py @@ -1,15 +1,16 @@ import json from dataclasses import asdict, dataclass, field from datetime import datetime -from typing import TYPE_CHECKING, Any, Callable, Dict, Iterator, List, Literal, Optional +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Literal, Optional from dvc.exceptions import DvcException from dvc.repo.metrics.show import _gather_metrics from dvc.repo.params.show import _gather_params -from dvc.utils import onerror_collect, relpath +from dvc.utils import relpath if TYPE_CHECKING: from dvc.repo import Repo + from dvc.repo.metrics.show import FileResult class DeserializeError(DvcException): @@ -29,8 +30,8 @@ class SerializableExp: rev: str timestamp: Optional[datetime] = None - params: Dict[str, Any] = field(default_factory=dict) - metrics: Dict[str, Any] = field(default_factory=dict) + params: Dict[str, "FileResult"] = field(default_factory=dict) + metrics: Dict[str, "FileResult"] = field(default_factory=dict) deps: Dict[str, "ExpDep"] = field(default_factory=dict) outs: Dict[str, "ExpOut"] = field(default_factory=dict) meta: Dict[str, Any] = field(default_factory=dict) @@ -40,7 +41,6 @@ def from_repo( cls, repo: "Repo", rev: Optional[str] = None, - onerror: Optional[Callable] = None, param_deps: bool = False, **kwargs, ) -> "SerializableExp": @@ -51,25 +51,11 @@ def from_repo( """ from dvc.dependency import ParamsDependency, RepoDependency - if not onerror: - onerror = onerror_collect - rev = rev or repo.get_rev() assert rev - # NOTE: _gather_params/_gather_metrics return defaultdict which is not - # supported in dataclasses.asdict() on all python releases - # see https://bugs.python.org/issue35540 - params = dict(_gather_params(repo, deps=param_deps, onerror=onerror)) - params = {k: dict(v) for k, v in params.items()} - metrics = dict( - _gather_metrics( - repo, - targets=None, - rev=rev[:7], - recursive=False, - onerror=onerror_collect, - ) - ) + + params = _gather_params(repo, deps_only=param_deps, on_error="return") + metrics = _gather_metrics(repo, on_error="return") return cls( rev=rev, params=params, @@ -119,11 +105,8 @@ def from_bytes(cls, data: bytes): @property def contains_error(self) -> bool: - return ( - self.params.get("error") - or any(value.get("error") for value in self.params.values()) - or self.metrics.get("error") - or any(value.get("error") for value in self.metrics.values()) + return any(value.get("error") for value in self.params.values()) or any( + value.get("error") for value in self.metrics.values() ) diff --git a/dvc/repo/metrics/diff.py b/dvc/repo/metrics/diff.py index 7719f7957a..157ca5865f 100644 --- a/dvc/repo/metrics/diff.py +++ b/dvc/repo/metrics/diff.py @@ -1,21 +1,61 @@ -from dvc.utils.diff import diff as _diff +from typing import TYPE_CHECKING, Dict, TypedDict, Union + +from funcy import compact + +from dvc.utils.diff import diff as _diff_dict from dvc.utils.diff import format_dict +if TYPE_CHECKING: + from dvc.repo import Repo + + from .show import Result + + +class DiffResult(TypedDict, total=False): + errors: Dict[str, Union[Exception, Dict[str, Exception]]] + diff: Dict[str, Dict[str, Dict]] -def diff(repo, *args, a_rev=None, b_rev=None, **kwargs): - if repo.scm.no_commits: - return {} - with_unchanged = kwargs.pop("all", False) +def _diff( + result: Dict[str, "Result"], + old_rev: str, + new_rev: str, + **kwargs, +) -> DiffResult: + old = result.get(old_rev, {}) + new = result.get(new_rev, {}) - a_rev = a_rev or "HEAD" - b_rev = b_rev or "workspace" + old_data = old.get("data", {}) + new_data = new.get("data", {}) - metrics = repo.metrics.show( - *args, **kwargs, revs=[a_rev, b_rev], hide_workspace=False - ) + res = DiffResult() + errors = res.setdefault("errors", {}) - old = metrics.get(a_rev, {}).get("data", {}) - new = metrics.get(b_rev, {}).get("data", {}) + if old_error := old.get("error"): + errors[old_rev] = old_error + else: + errors[old_rev] = {f: d["error"] for f, d in old_data.items() if "error" in d} + + if new_error := new.get("error"): + errors[new_rev] = new_error + else: + errors[new_rev] = {f: d["error"] for f, d in new_data.items() if "error" in d} + + diff_data = _diff_dict(format_dict(old_data), format_dict(new_data), **kwargs) + res = DiffResult(errors=errors, diff=diff_data) + res["errors"] = compact(res.get("errors", {})) # type: ignore[assignment] + return compact(res) # type: ignore[no-any-return] + + +def diff( + repo: "Repo", + a_rev: str = "HEAD", + b_rev: str = "workspace", + all: bool = False, # noqa: A002 # pylint: disable=redefined-builtin + **kwargs, +) -> DiffResult: + if repo.scm.no_commits: + return {} - return _diff(format_dict(old), format_dict(new), with_unchanged=with_unchanged) + metrics = repo.metrics.show(revs=[a_rev, b_rev], hide_workspace=False, **kwargs) + return _diff(metrics, a_rev, b_rev, with_unchanged=all) diff --git a/dvc/repo/metrics/show.py b/dvc/repo/metrics/show.py index f04556c383..8a30d75587 100644 --- a/dvc/repo/metrics/show.py +++ b/dvc/repo/metrics/show.py @@ -1,36 +1,38 @@ import logging import os -from typing import TYPE_CHECKING, List - +from itertools import chain +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + Iterator, + List, + Optional, + Tuple, + TypedDict, + Union, +) + +from funcy import ldistinct from scmrepo.exceptions import SCMError -from dvc.fs.dvc import DVCFileSystem from dvc.repo import locked -from dvc.repo.collect import collect from dvc.scm import NoSCMError -from dvc.utils import as_posix, error_handler, errored_revisions, onerror_collect +from dvc.utils import as_posix, expand_paths from dvc.utils.collections import ensure_list from dvc.utils.serialize import load_path if TYPE_CHECKING: + from dvc.fs import FileSystem from dvc.output import Output + from dvc.repo import Repo + from dvc.scm import Git, NoSCM logger = logging.getLogger(__name__) -def _is_metric(out: "Output") -> bool: - return bool(out.metric) - - -def _to_fs_paths(metrics: List["Output"]) -> List["str"]: - result = [] - for out in metrics: - if out.metric: - result.append(out.repo.dvcfs.from_os_path(out.fs_path)) - return result - - -def _collect_top_level_metrics(repo): +def _collect_top_level_metrics(repo: "Repo") -> Iterator[str]: top_metrics = repo.index._metrics # pylint: disable=protected-access for dvcfile, metrics in top_metrics.items(): wdir = repo.fs.path.relpath(repo.fs.path.parent(dvcfile), repo.root_dir) @@ -39,14 +41,7 @@ def _collect_top_level_metrics(repo): yield repo.fs.path.normpath(path) -def _collect_metrics(repo, targets, recursive): - metrics, fs_paths = collect( - repo, targets=targets, output_filter=_is_metric, recursive=recursive - ) - return _to_fs_paths(metrics) + list(fs_paths) - - -def _extract_metrics(metrics, path, rev): +def _extract_metrics(metrics, path: str): if isinstance(metrics, (int, float, str)): return metrics @@ -55,74 +50,160 @@ def _extract_metrics(metrics, path, rev): ret = {} for key, val in metrics.items(): - m = _extract_metrics(val, path, rev) + m = _extract_metrics(val, path) if m not in (None, {}): ret[key] = m else: logger.debug( ( - "Could not parse '%s' metric from '%s' at '%s' " + "Could not parse '%s' metric from '%s'" "due to its unsupported type: '%s'" ), key, path, - rev, - type(val).__name__, + type(val), ) return ret -@error_handler -def _read_metric(path, fs, rev, **kwargs): +def _read_metric(fs: "FileSystem", path: str) -> Any: val = load_path(path, fs) - val = _extract_metrics(val, path, rev) + val = _extract_metrics(val, path) return val or {} -def _read_metrics(repo, metrics, rev, onerror=None): - fs = DVCFileSystem(repo=repo) - - relpath = "" - if repo.root_dir != repo.fs.path.getcwd(): - relpath = repo.fs.path.relpath(repo.root_dir, repo.fs.path.getcwd()) - - res = {} +def _read_metrics( + fs: "FileSystem", metrics: Iterable[str] +) -> Iterator[Tuple[str, Union[Exception, Any]]]: for metric in metrics: - rel_metric_path = os.path.join(relpath, *fs.path.parts(metric)) - if not fs.isfile(metric): - if fs.isfile(rel_metric_path): - metric = rel_metric_path - else: - continue - - res[rel_metric_path] = _read_metric(metric, fs, rev, onerror=onerror) + try: + yield metric, _read_metric(fs, metric) + except Exception as exc: # noqa: BLE001 # pylint:disable=broad-exception-caught + logger.debug(exc) + yield metric, exc + + +def metrics_from_target(repo: "Repo", targets: List[str]) -> Iterator["Output"]: + stages = chain.from_iterable(repo.stage.collect(target) for target in targets) + for stage in stages: + yield from stage.metrics + + +def _collect_metrics( + repo: "Repo", + targets: Optional[List[str]] = None, + stages: Optional[List[str]] = None, + outs_only: bool = False, +) -> List[str]: + metrics: List[str] = [] + + if targets: + # target is a repo-relative path + metrics.extend(targets) + + if not targets or outs_only: + outs = metrics_from_target(repo, stages) if stages else repo.index.metrics + relpath = repo.fs.path.relpath + metrics.extend(relpath(out.fs_path, repo.root_dir) for out in outs) + + if not targets and not outs_only and not stages: + # _collect_top_level_metrics returns repo-relative paths + metrics.extend(_collect_top_level_metrics(repo)) + + fs = repo.dvcfs + + # convert to posixpath for DVCFileSystem + paths = (fs.from_os_path(metric) for metric in metrics) + # make paths absolute for DVCFileSystem + repo_paths = (f"{fs.root_marker}{path}" for path in paths) + return ldistinct(expand_paths(fs, repo_paths)) + + +class FileResult(TypedDict, total=False): + data: Any + error: Exception + + +class Result(TypedDict, total=False): + data: Dict[str, FileResult] + error: Exception + + +def to_relpath(fs: "FileSystem", root_dir: str, d: Result) -> Result: + relpath = fs.path.relpath + cwd = fs.path.getcwd() + + start = relpath(cwd, root_dir) + data = d.get("data") + if data is not None: + d["data"] = {relpath(path, start): result for path, result in data.items()} + return d + + +def _gather_metrics( + repo: "Repo", + targets: Optional[List[str]] = None, + outs_only: bool = False, + stages: Optional[List[str]] = None, + on_error: str = "return", +) -> Dict[str, FileResult]: + assert on_error in ("raise", "return", "ignore") + + # `files` is a repo-relative posixpath that can be passed to DVCFileSystem + # It is absolute, i.e. has a root_marker `/` in front which we strip when returning + # the result and convert to appropriate repo-relative os.path. + files = _collect_metrics(repo, targets=targets, stages=stages, outs_only=outs_only) + data = {} + + fs = repo.dvcfs + for fs_path, result in _read_metrics(fs, files): + repo_path = fs_path.lstrip(fs.root_marker) + repo_os_path = os.sep.join(fs.path.parts(repo_path)) + if not isinstance(result, Exception): + data.update({repo_os_path: FileResult(data=result)}) + continue + + if on_error == "raise": + raise result + if on_error == "return": + data.update({repo_os_path: FileResult(error=result)}) + return data + + +def _hide_workspace( + scm: Union["Git", "NoSCM"], res: Dict[str, Result] +) -> Dict[str, Result]: + # Hide workspace params if they are the same as in the active branch + try: + active_branch = scm.active_branch() + except (SCMError, NoSCMError): + # SCMError - detached head + # NoSCMError - no repo case + pass + else: + if res.get("workspace") == res.get(active_branch): + res.pop("workspace", None) return res -def _gather_metrics(repo, targets, rev, recursive, onerror=None): - metrics = _collect_metrics(repo, targets, recursive) - metrics.extend(_collect_top_level_metrics(repo)) - return _read_metrics(repo, metrics, rev, onerror=onerror) - - @locked def show( - repo, - targets=None, - all_branches=False, - all_tags=False, - recursive=False, - revs=None, - all_commits=False, - onerror=None, - hide_workspace=True, -): - if onerror is None: - onerror = onerror_collect - - targets = ensure_list(targets) + repo: "Repo", + targets: Optional[List[str]] = None, + stages: Optional[List[str]] = None, + outs_only: bool = False, + all_branches: bool = False, + all_tags: bool = False, + revs: Optional[List[str]] = None, + all_commits: bool = False, + hide_workspace: bool = True, + on_error: str = "return", +) -> Dict[str, Result]: + assert on_error in ("raise", "return", "ignore") + + targets = [os.path.abspath(target) for target in ensure_list(targets)] targets = [repo.dvcfs.from_os_path(target) for target in targets] res = {} @@ -132,29 +213,23 @@ def show( all_tags=all_tags, all_commits=all_commits, ): - res[rev] = error_handler(_gather_metrics)( - repo, targets, rev, recursive, onerror=onerror - ) - - if hide_workspace: - # Hide workspace metrics if they are the same as in the active branch try: - active_branch = repo.scm.active_branch() - except (SCMError, NoSCMError): - # SCMError - detached head - # NoSCMError - no repo case - pass - else: - if res.get("workspace") == res.get(active_branch): - res.pop("workspace", None) - - errored = errored_revisions(res) - if errored: - from dvc.ui import ui + result = _gather_metrics( + repo, + targets=targets, + stages=stages, + outs_only=outs_only, + on_error=on_error, + ) + res[rev] = Result(data=result) + except Exception as exc: # noqa: BLE001 # pylint:disable=broad-exception-caught + if on_error == "raise": + raise - ui.error_write( - "DVC failed to load some metrics for following revisions:" - f" '{', '.join(errored)}'." - ) + logger.warning("failed to load metrics in revision %r, %s", rev, str(exc)) + if on_error == "return": + res[rev] = Result(error=exc) + if hide_workspace: + _hide_workspace(repo.scm, res) return res diff --git a/dvc/repo/params/diff.py b/dvc/repo/params/diff.py index 2ddb6bd3fe..6a7c9748f1 100644 --- a/dvc/repo/params/diff.py +++ b/dvc/repo/params/diff.py @@ -1,21 +1,21 @@ -from dvc.utils.diff import diff as _diff -from dvc.utils.diff import format_dict +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from dvc.repo import Repo + from dvc.repo.metrics.diff import DiffResult -def diff(repo, *args, a_rev=None, b_rev=None, **kwargs): + +def diff( + repo: "Repo", + a_rev: str = "HEAD", + b_rev: str = "workspace", + all: bool = False, # noqa: A002 # pylint: disable=redefined-builtin + **kwargs, +) -> "DiffResult": if repo.scm.no_commits: return {} - with_unchanged = kwargs.pop("all", False) - - a_rev = a_rev or "HEAD" - b_rev = b_rev or "workspace" - - params = repo.params.show( - *args, **kwargs, revs=[a_rev, b_rev], hide_workspace=False - ) - - old = params.get(a_rev, {}).get("data", {}) - new = params.get(b_rev, {}).get("data", {}) + from dvc.repo.metrics.diff import _diff - return _diff(format_dict(old), format_dict(new), with_unchanged=with_unchanged) + params = repo.params.show(revs=[a_rev, b_rev], hide_workspace=False, **kwargs) + return _diff(params, a_rev, b_rev, with_unchanged=all) diff --git a/dvc/repo/params/show.py b/dvc/repo/params/show.py index 22bab2ae0c..742881c57c 100644 --- a/dvc/repo/params/show.py +++ b/dvc/repo/params/show.py @@ -1,33 +1,24 @@ import logging import os from collections import defaultdict -from copy import copy -from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Tuple +from itertools import chain +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union -from scmrepo.exceptions import SCMError - -from dvc.dependency.param import ParamsDependency +from dvc.dependency.param import ParamsDependency, read_param_file from dvc.repo import locked -from dvc.repo.collect import collect -from dvc.scm import NoSCMError +from dvc.repo.metrics.show import FileResult, Result from dvc.stage import PipelineStage -from dvc.ui import ui -from dvc.utils import as_posix, error_handler, errored_revisions, onerror_collect +from dvc.utils import as_posix, expand_paths from dvc.utils.collections import ensure_list -from dvc.utils.serialize import load_path if TYPE_CHECKING: - from dvc.output import Output + from dvc.fs import FileSystem from dvc.repo import Repo logger = logging.getLogger(__name__) -def _is_params(dep: "Output"): - return isinstance(dep, ParamsDependency) - - -def _collect_top_level_params(repo): +def _collect_top_level_params(repo: "Repo") -> Iterator[str]: top_params = repo.index._params # pylint: disable=protected-access for dvcfile, params in top_params.items(): wdir = repo.fs.path.relpath(repo.fs.path.parent(dvcfile), repo.root_dir) @@ -36,67 +27,58 @@ def _collect_top_level_params(repo): yield repo.fs.path.normpath(path) -def _collect_configs( - repo: "Repo", targets=None, deps=False, stages=None -) -> Tuple[List["Output"], List[str]]: - params, fs_paths = collect( - repo, - targets=targets or [], - deps=True, - output_filter=_is_params, - duplicates=deps or stages is not None, - ) - all_fs_paths = fs_paths + [p.fs_path for p in params] - if not any([deps, targets, stages]): - default_params = repo.fs.path.join( - repo.root_dir, ParamsDependency.DEFAULT_PARAMS_FILE - ) - if default_params not in all_fs_paths and repo.fs.exists(default_params): - fs_paths.append(default_params) - if targets and (deps or stages) and not params: - # A target has been provided but it is not used in the stages - fs_paths = [] - return params, fs_paths +def params_from_target( + repo: "Repo", targets: List[str] +) -> Iterator["ParamsDependency"]: + stages = chain.from_iterable(repo.stage.collect(target) for target in targets) + for stage in stages: + yield from stage.params -@error_handler -def _read_fs_path(fs, fs_path, **kwargs): - return load_path(fs_path, fs) +def _collect_params( + repo: "Repo", + targets: Union[List[str], Dict[str, List[str]], None] = None, + stages: Optional[List[str]] = None, + deps_only: bool = False, + default_file: Optional[str] = None, +) -> Dict[str, List[str]]: + from dvc.dependency import _merge_params + if isinstance(targets, list): + targets = {target: [] for target in targets} -def _read_params( - repo, - params, - params_fs_paths, - deps=False, - onerror: Optional[Callable] = None, - stages: Optional[Iterable[str]] = None, -): - res: Dict[str, Dict] = defaultdict(lambda: defaultdict(dict)) - fs_paths = copy(params_fs_paths) + params: List[Dict[str, List[str]]] = [] - if deps or stages: - for param in params: - if stages and param.stage.addressing not in stages: - continue - params_dict = error_handler(param.read_params)( - onerror=onerror, flatten=False - ) - if params_dict: - name = os.sep.join(repo.fs.path.relparts(param.fs_path)) - res[name]["data"].update(params_dict["data"]) - if name in fs_paths: - fs_paths.remove(name) - else: - fs_paths += [param.fs_path for param in params] - - for fs_path in fs_paths: - from_path = _read_fs_path(repo.fs, fs_path, onerror=onerror) - if from_path: - name = os.sep.join(repo.fs.path.relparts(fs_path)) - res[name] = from_path + if targets: + # target is a repo-relative path + params.extend({file: params} for file, params in targets.items()) - return res + if not targets or stages: + deps = params_from_target(repo, stages) if stages else repo.index.params + relpath = repo.fs.path.relpath + params.extend( + {relpath(dep.fs_path, repo.root_dir): list(dep.params)} for dep in deps + ) + + fs = repo.dvcfs + + if not targets and not deps_only and not stages: + # _collect_top_level_params returns repo-relative paths + params.extend({param: []} for param in _collect_top_level_params(repo)) + if default_file and fs.exists(f"{fs.root_marker}{default_file}"): + params.append({default_file: []}) + + # combine all the param files and the keypaths to track + all_params = _merge_params(params) + + ret = {} + for param, _params in all_params.items(): + # convert to posixpath for DVCFileSystem + path = fs.from_os_path(param) + # make paths absolute for DVCFileSystem + repo_path = f"{fs.root_marker}{path}" + ret.update({file: _params for file in expand_paths(fs, [repo_path])}) + return ret def _collect_vars(repo, params, stages=None) -> Dict: @@ -109,82 +91,116 @@ def _collect_vars(repo, params, stages=None) -> Dict: for file, vars_ in stage.tracked_vars.items(): # `params` file are shown regardless of `tracked` or not # to reduce noise and duplication, they are skipped - if file in params: + + # `file` is relative + abspath = repo.fs.path.abspath(file) + repo_path = repo.dvcfs.from_os_path(abspath) + if repo_path in params: continue - name = os.sep.join(repo.fs.path.parts(file)) - vars_params[name].update(vars_) - return vars_params + vars_params[repo_path].update(vars_) + return dict(vars_params) + + +def _read_params( + fs: "FileSystem", params: Dict[str, List[str]] +) -> Iterator[Tuple[str, Union[Exception, Any]]]: + for file_path, key_paths in params.items(): + try: + yield file_path, read_param_file(fs, file_path, key_paths) + except Exception as exc: # noqa: BLE001 # pylint:disable=broad-exception-caught + logger.debug(exc) + yield file_path, exc + + +def _gather_params( + repo: "Repo", + targets: Union[List[str], Dict[str, List[str]], None] = None, + deps_only: bool = False, + stages: Optional[List[str]] = None, + on_error: str = "return", +): + assert on_error in ("raise", "return", "ignore") + + # `files` is a repo-relative posixpath that can be passed to DVCFileSystem + # It is absolute, i.e. has a root_marker `/` in front which we strip when returning + # the result and convert to appropriate repo-relative os.path. + files_keypaths = _collect_params( + repo, + targets=targets, + stages=stages, + deps_only=deps_only, + default_file=ParamsDependency.DEFAULT_PARAMS_FILE, + ) + + data: Dict[str, FileResult] = {} + + fs = repo.dvcfs + for fs_path, result in _read_params(fs, files_keypaths): + repo_path = fs_path.lstrip(fs.root_marker) + repo_os_path = os.sep.join(fs.path.parts(repo_path)) + if not isinstance(result, Exception): + data.update({repo_os_path: FileResult(data=result)}) + continue + + if on_error == "raise": + raise result + if on_error == "return": + data.update({repo_os_path: FileResult(error=result)}) + + if not (stages or targets): + data.update( + { + path: FileResult(data=result) + for path, result in _collect_vars(repo, data).items() + } + ) + return data @locked def show( - repo, - revs=None, - targets=None, - deps=False, - onerror: Optional[Callable] = None, - stages=None, - hide_workspace=True, -): - if onerror is None: - onerror = onerror_collect + repo: "Repo", + targets: Optional[List[str]] = None, + stages: Optional[List[str]] = None, + deps_only: bool = False, + all_branches: bool = False, + all_tags: bool = False, + revs: Optional[List[str]] = None, + all_commits: bool = False, + hide_workspace: bool = True, + on_error: str = "return", +) -> Dict[str, Result]: + assert on_error in ("raise", "return", "ignore") res = {} targets = ensure_list(targets) targets = [repo.dvcfs.from_os_path(target) for target in targets] - for branch in repo.brancher(revs=revs): - params = error_handler(_gather_params)( - repo=repo, - targets=targets, - deps=deps, - onerror=onerror, - stages=stages, - ) - - if params: - res[branch] = params + for rev in repo.brancher( + revs=revs, + all_branches=all_branches, + all_tags=all_tags, + all_commits=all_commits, + ): + try: + params = _gather_params( + repo=repo, + targets=targets, + stages=stages, + deps_only=deps_only, + on_error=on_error, + ) + res[rev] = Result(data=params) + except Exception as exc: # noqa: BLE001 # pylint:disable=broad-exception-caught + if on_error == "raise": + raise + logger.warning("failed to load params in revision %r, %s", rev, str(exc)) + if on_error == "return": + res[rev] = Result(error=exc) if hide_workspace: - # Hide workspace params if they are the same as in the active branch - try: - active_branch = repo.scm.active_branch() - except (SCMError, NoSCMError): - # SCMError - detached head - # NoSCMError - no repo case - pass - else: - if res.get("workspace") == res.get(active_branch): - res.pop("workspace", None) - - errored = errored_revisions(res) - if errored: - ui.error_write( - "DVC failed to load some parameters for following revisions:" - f" '{', '.join(errored)}'." - ) + from dvc.repo.metrics.show import _hide_workspace + _hide_workspace(repo.scm, res) return res - - -def _gather_params(repo, targets=None, deps=False, onerror=None, stages=None): - param_outs, params_fs_paths = _collect_configs( - repo, targets=targets, deps=deps, stages=stages - ) - params_fs_paths.extend(_collect_top_level_params(repo=repo)) - params = _read_params( - repo, - params=param_outs, - params_fs_paths=params_fs_paths, - deps=deps, - onerror=onerror, - stages=stages, - ) - vars_params = _collect_vars(repo, params, stages=stages) - - # NOTE: only those that are not added as a ParamDependency are - # included so we don't need to recursively merge them yet. - for key, vals in vars_params.items(): - params[key]["data"] = vals - return params diff --git a/dvc/stage/__init__.py b/dvc/stage/__init__.py index 522967b489..f51e6f6360 100644 --- a/dvc/stage/__init__.py +++ b/dvc/stage/__init__.py @@ -43,6 +43,7 @@ ) if TYPE_CHECKING: + from dvc.dependency import ParamsDependency from dvc.dvcfile import ProjectFile, SingleStageFile from dvc.output import Output from dvc.repo import Repo @@ -205,6 +206,16 @@ def dvcfile(self) -> Union["ProjectFile", "SingleStageFile"]: def dvcfile(self, dvcfile: Union["ProjectFile", "SingleStageFile"]) -> None: self._dvcfile = dvcfile + @property + def params(self) -> List["ParamsDependency"]: + from dvc.dependency import ParamsDependency + + return [dep for dep in self.deps if isinstance(dep, ParamsDependency)] + + @property + def metrics(self) -> List["Output"]: + return [out for out in self.outs if out.metric] + def __repr__(self): return f"Stage: '{self.addressing}'" diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index 4022691d49..668f2b8e53 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -6,10 +6,13 @@ import os import re import sys -from typing import Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Dict, Iterable, Iterator, List, Optional, Tuple import colorama +if TYPE_CHECKING: + from dvc.fs import FileSystem + logger = logging.getLogger(__name__) LARGE_DIR_SIZE = 100 @@ -413,3 +416,11 @@ def errored_revisions(rev_data: Dict) -> List: if nested_contains(data, "error"): result.append(revision) return result + + +def expand_paths(fs: "FileSystem", paths: Iterable[str]) -> Iterator[str]: + for path in paths: + if fs.isdir(path): + yield from fs.find(path) + else: + yield path diff --git a/tests/func/api/test_show.py b/tests/func/api/test_show.py index f181b4ff5e..6b0c0e091b 100644 --- a/tests/func/api/test_show.py +++ b/tests/func/api/test_show.py @@ -159,9 +159,7 @@ def test_params_show_targets(params_repo): "bar": 2, "foobar": 3, } - assert api.params_show("params.yaml", stages="stage-1") == { - "foo": 5, - } + assert api.params_show("params.yaml", stages="stage-1") == {"bar": 2, "foo": 5} def test_params_show_deps(params_repo): @@ -181,7 +179,7 @@ def test_params_show_stages(params_repo): stages=["stage-1", "stage-2", "stage-3"] ) - assert api.params_show("params.json", stages="stage-3") == {"foobar": 3} + assert api.params_show("params.json", stages="stage-3") == {"bar": 2, "foobar": 3} assert api.params_show(stages="stage-0") == {} @@ -286,9 +284,7 @@ def test_params_show_untracked_target(params_repo, tmp_dir): assert api.params_show("params_foo.yaml") == {"foo": 1} - assert api.params_show("params_foo.yaml", stages="stage-0") == {} - - assert api.params_show("params_foo.yaml", deps=True) == {} + assert api.params_show("params_foo.yaml", stages="stage-0") == {"foo": 1} def test_metrics_show_no_args(metrics_repo): diff --git a/tests/func/metrics/test_diff.py b/tests/func/metrics/test_diff.py index f1498847c6..79d3ae6ccf 100644 --- a/tests/func/metrics/test_diff.py +++ b/tests/func/metrics/test_diff.py @@ -5,6 +5,7 @@ from dvc.cli import main from dvc.utils import relpath +from dvc.utils.serialize import JSONFileCorruptedError def test_metrics_diff_simple(tmp_dir, scm, dvc, run_copy_metrics): @@ -21,7 +22,7 @@ def _gen(val): expected = {"m.yaml": {"": {"old": 1, "new": 3, "diff": 2}}} - assert dvc.metrics.diff(a_rev="HEAD~2") == expected + assert dvc.metrics.diff(a_rev="HEAD~2") == {"diff": expected} def test_metrics_diff_yaml(tmp_dir, scm, dvc, run_copy_metrics): @@ -47,7 +48,7 @@ def _gen(val): } } - assert dvc.metrics.diff(a_rev="HEAD~2") == expected + assert dvc.metrics.diff(a_rev="HEAD~2") == {"diff": expected} def test_metrics_diff_json(tmp_dir, scm, dvc, run_copy_metrics): @@ -72,7 +73,7 @@ def _gen(val): "a.b.c": {"old": 1, "new": 3, "diff": 2}, } } - assert dvc.metrics.diff(a_rev="HEAD~2") == expected + assert dvc.metrics.diff(a_rev="HEAD~2") == {"diff": expected} def test_metrics_diff_json_unchanged(tmp_dir, scm, dvc, run_copy_metrics): @@ -94,7 +95,7 @@ def _gen(val): assert dvc.metrics.diff(a_rev="HEAD~2") == {} -def test_metrics_diff_broken_json(tmp_dir, scm, dvc, run_copy_metrics): +def test_metrics_diff_broken_json(M, tmp_dir, scm, dvc, run_copy_metrics): metrics = {"a": {"b": {"c": 1, "d": 1, "e": "3"}}} (tmp_dir / "m_temp.json").dump(metrics) run_copy_metrics( @@ -108,11 +109,14 @@ def test_metrics_diff_broken_json(tmp_dir, scm, dvc, run_copy_metrics): (tmp_dir / "m.json").write_text(json.dumps(metrics) + "ma\nlformed\n") assert dvc.metrics.diff() == { - "m.json": { - "a.b.e": {"old": "3", "new": None}, - "a.b.c": {"old": 1, "new": None}, - "a.b.d": {"old": 1, "new": None}, - } + "diff": { + "m.json": { + "a.b.e": {"old": "3", "new": None}, + "a.b.c": {"old": 1, "new": None}, + "a.b.d": {"old": 1, "new": None}, + } + }, + "errors": {"workspace": {"m.json": M.instance_of(JSONFileCorruptedError)}}, } @@ -129,15 +133,17 @@ def test_metrics_diff_new_metric(tmp_dir, scm, dvc, run_copy_metrics): ) assert dvc.metrics.diff() == { - "m.json": { - "a.b.e": {"old": None, "new": "3"}, - "a.b.c": {"old": None, "new": 1}, - "a.b.d": {"old": None, "new": 1}, + "diff": { + "m.json": { + "a.b.e": {"old": None, "new": "3"}, + "a.b.c": {"old": None, "new": 1}, + "a.b.d": {"old": None, "new": 1}, + } } } -def test_metrics_diff_deleted_metric(tmp_dir, scm, dvc, run_copy_metrics): +def test_metrics_diff_deleted_metric(M, tmp_dir, scm, dvc, run_copy_metrics): metrics = {"a": {"b": {"c": 1, "d": 1, "e": "3"}}} (tmp_dir / "m_temp.json").dump(metrics) run_copy_metrics( @@ -151,11 +157,14 @@ def test_metrics_diff_deleted_metric(tmp_dir, scm, dvc, run_copy_metrics): (tmp_dir / "m.json").unlink() assert dvc.metrics.diff() == { - "m.json": { - "a.b.e": {"old": "3", "new": None}, - "a.b.c": {"old": 1, "new": None}, - "a.b.d": {"old": 1, "new": None}, - } + "diff": { + "m.json": { + "a.b.e": {"old": "3", "new": None}, + "a.b.c": {"old": 1, "new": None}, + "a.b.d": {"old": 1, "new": None}, + } + }, + "errors": {"workspace": {"m.json": M.instance_of(FileNotFoundError)}}, } @@ -173,9 +182,11 @@ def test_metrics_diff_with_unchanged(tmp_dir, scm, dvc, run_copy_metrics): tmp_dir.scm_gen("metrics.yaml", "foo: 3\nxyz: 10", commit="3") assert dvc.metrics.diff(a_rev="HEAD~2", all=True) == { - "metrics.yaml": { - "foo": {"old": 1, "new": 3, "diff": 2}, - "xyz": {"old": 10, "new": 10, "diff": 0}, + "diff": { + "metrics.yaml": { + "foo": {"old": 1, "new": 3, "diff": 2}, + "xyz": {"old": 10, "new": 10, "diff": 0}, + } } } @@ -206,7 +217,7 @@ def _gen(val): expected = {"m.yaml": {"": {"old": 3, "new": 4, "diff": 1}}} - assert dvc.metrics.diff() == expected + assert dvc.metrics.diff() == {"diff": expected} def test_metrics_diff_cli(tmp_dir, scm, dvc, run_copy_metrics, caplog, capsys): @@ -242,7 +253,9 @@ def _gen(val): _gen(3) result = dvc.metrics.diff(targets=["some_file.yaml"], a_rev="HEAD~2") - assert result == {"some_file.yaml": {"foo": {"old": 1, "new": 3, "diff": 2}}} + assert result == { + "diff": {"some_file.yaml": {"foo": {"old": 1, "new": 3, "diff": 2}}} + } @pytest.mark.parametrize( @@ -264,7 +277,9 @@ def test_diff_top_level_metrics(tmp_dir, dvc, scm, dvcfile, metrics_file): metrics_file.dump({"foo": 5}) assert dvc.metrics.diff() == { - relpath(directory / metrics_file): {"foo": {"diff": 2, "new": 5, "old": 3}} + "diff": { + relpath(directory / metrics_file): {"foo": {"diff": 2, "new": 5, "old": 3}} + } } diff --git a/tests/func/metrics/test_show.py b/tests/func/metrics/test_show.py index 209e168c61..1ea9252e9f 100644 --- a/tests/func/metrics/test_show.py +++ b/tests/func/metrics/test_show.py @@ -1,13 +1,16 @@ +import json import os +from os.path import join import pytest from funcy import get_in +from dvc.cli import main from dvc.dvcfile import PROJECT_FILE from dvc.exceptions import OverlappingOutputPathsError from dvc.repo import Repo from dvc.utils.fs import remove -from dvc.utils.serialize import JSONFileCorruptedError, YAMLFileCorruptedError +from dvc.utils.serialize import JSONFileCorruptedError def test_show_simple(tmp_dir, dvc, run_copy_metrics): @@ -32,11 +35,13 @@ def test_show_simple_from_subdir(tmp_dir, dvc, run_copy_metrics): expected_path = os.path.join("subdir", "metrics.yaml") assert dvc.metrics.show() == {"": {"data": {expected_path: {"data": 1.1}}}} - expected_path = os.path.join("..", "subdir", "metrics.yaml") + expected_path = os.path.join("subdir", "metrics.yaml") with subdir.chdir(): assert dvc.metrics.show() == {"": {"data": {expected_path: {"data": 1.1}}}} subdir2 = tmp_dir / "subdir2" subdir2.mkdir() + + expected_path = os.path.join("subdir", "metrics.yaml") with subdir2.chdir(): assert dvc.metrics.show() == {"": {"data": {expected_path: {"data": 1.1}}}} @@ -129,7 +134,7 @@ def test_show_subrepo_with_preexisting_tags(tmp_dir, scm): scm.commit("init metrics") scm.tag("v1") - expected_path = os.path.join("subdir", "metrics.yaml") + expected_path = "metrics.yaml" assert dvc.metrics.show(all_tags=True) == { "workspace": {"data": {expected_path: {"data": {"foo": 1}}}}, "v1": {"data": {expected_path: {"data": {"foo": 1}}}}, @@ -201,13 +206,13 @@ def test_show_non_metric_branch(tmp_dir, scm, use_dvc): assert not (tmp_dir / ".dvc").exists() -def test_non_metric_and_recursive_show(tmp_dir, dvc, run_copy_metrics): +def test_non_metric_and_dir_show(tmp_dir, dvc, run_copy_metrics): tmp_dir.gen({"metrics_t.yaml": "foo: 1.1", "metrics": {"metric1.yaml": "bar: 1.2"}}) metric2 = os.fspath(tmp_dir / "metrics" / "metric2.yaml") run_copy_metrics("metrics_t.yaml", metric2, name="copy-metric2", metrics=[metric2]) - assert dvc.metrics.show(targets=["metrics_t.yaml", "metrics"], recursive=True) == { + assert dvc.metrics.show(targets=["metrics_t.yaml", "metrics"]) == { "": { "data": { os.path.join("metrics", "metric1.yaml"): {"data": {"bar": 1.2}}, @@ -244,15 +249,18 @@ def test_show_malformed_metric(tmp_dir, scm, dvc, caplog): ) -def test_metrics_show_no_target(tmp_dir, dvc, capsys): - assert dvc.metrics.show(targets=["metrics.json"]) == {"": {}} +def test_metrics_show_no_target(M, tmp_dir, dvc, capsys): + assert dvc.metrics.show(targets=["metrics.json"]) == { + "": {"data": {"metrics.json": {"error": M.instance_of(FileNotFoundError)}}} + } def test_show_no_metrics_files(tmp_dir, dvc, caplog): - assert dvc.metrics.show() == {"": {}} + assert dvc.metrics.show() == {"": {"data": {}}} @pytest.mark.parametrize("clear_before_run", [True, False]) +@pytest.mark.skip(reason="no longer raising graph errors") def test_metrics_show_overlap(tmp_dir, dvc, run_copy_metrics, clear_before_run): data_dir = tmp_dir / "data" data_dir.mkdir() @@ -286,13 +294,19 @@ def test_metrics_show_overlap(tmp_dir, dvc, run_copy_metrics, clear_before_run): @pytest.mark.parametrize( - "file,error_path", + "file,error_path,err_type", ( - (PROJECT_FILE, ["workspace", "error"]), - ("metrics.yaml", ["workspace", "data", "metrics.yaml", "error"]), + (PROJECT_FILE, ["workspace", "error", "type"], "YAMLSyntaxError"), + ( + "metrics.yaml", + ["workspace", "data", "metrics.yaml", "error", "type"], + "YAMLFileCorruptedError", + ), ), ) -def test_log_errors(tmp_dir, scm, dvc, capsys, run_copy_metrics, file, error_path): +def test_log_errors( + tmp_dir, scm, dvc, capsys, run_copy_metrics, file, error_path, err_type +): tmp_dir.gen("metrics_t.yaml", "m: 1.1") run_copy_metrics( "metrics_t.yaml", @@ -306,11 +320,34 @@ def test_log_errors(tmp_dir, scm, dvc, capsys, run_copy_metrics, file, error_pat with open(file, "a", encoding="utf-8") as fd: fd.write("\nMALFORMED!") - result = dvc.metrics.show(revs=["v1"]) + assert main(["metrics", "show", "--all-tags", "--json"]) == 0 - _, error = capsys.readouterr() + out, error = capsys.readouterr() + result = json.loads(out) - assert isinstance(get_in(result, error_path), YAMLFileCorruptedError) + assert get_in(result, error_path) == err_type assert ( "DVC failed to load some metrics for following revisions: 'workspace'." in error ) + + +def test_cached_metrics(tmp_dir, dvc, scm, remote): + tmp_dir.dvc_gen( + { + "dir": {"metrics.yaml": "foo: 3\nbar: 10"}, + "dir2": {"metrics.yaml": "foo: 42\nbar: 4"}, + } + ) + dvc.push() + dvc.cache.local.clear() + + (tmp_dir / "dvc.yaml").dump({"metrics": ["dir/metrics.yaml", "dir2"]}) + + assert dvc.metrics.show() == { + "": { + "data": { + join("dir", "metrics.yaml"): {"data": {"foo": 3, "bar": 10}}, + join("dir2", "metrics.yaml"): {"data": {"foo": 42, "bar": 4}}, + } + } + } diff --git a/tests/func/params/test_diff.py b/tests/func/params/test_diff.py index 1d0021b0c1..4280558628 100644 --- a/tests/func/params/test_diff.py +++ b/tests/func/params/test_diff.py @@ -27,7 +27,7 @@ def test_diff(tmp_dir, scm, dvc): tmp_dir.scm_gen("params.yaml", "foo: qux", commit="qux") assert dvc.params.diff(a_rev="HEAD~2") == { - "params.yaml": {"foo": {"old": "bar", "new": "qux"}} + "diff": {"params.yaml": {"foo": {"old": "bar", "new": "qux"}}} } @@ -40,17 +40,21 @@ def test_diff_dirty(tmp_dir, scm, dvc): tmp_dir.scm_gen("params.yaml", "foo: baz", commit="baz") tmp_dir.gen("params.yaml", "foo: qux") - assert dvc.params.diff() == {"params.yaml": {"foo": {"old": "baz", "new": "qux"}}} + assert dvc.params.diff() == { + "diff": {"params.yaml": {"foo": {"old": "baz", "new": "qux"}}} + } def test_diff_new(tmp_dir, scm, dvc): tmp_dir.gen("params.yaml", "foo: bar") dvc.run(cmd="echo params.yaml", params=["foo"], name="echo-params") - assert dvc.params.diff() == {"params.yaml": {"foo": {"old": None, "new": "bar"}}} + assert dvc.params.diff() == { + "diff": {"params.yaml": {"foo": {"old": None, "new": "bar"}}} + } -def test_diff_deleted(tmp_dir, scm, dvc): +def test_diff_deleted(M, tmp_dir, scm, dvc): tmp_dir.gen("params.yaml", "foo: bar") dvc.run(cmd="echo params.yaml", params=["foo"], name="echo-params") scm.add(["params.yaml", "Dvcfile"]) @@ -58,7 +62,10 @@ def test_diff_deleted(tmp_dir, scm, dvc): (tmp_dir / "params.yaml").unlink() - assert dvc.params.diff() == {"params.yaml": {"foo": {"old": "bar", "new": None}}} + assert dvc.params.diff() == { + "diff": {"params.yaml": {"foo": {"old": "bar", "new": None}}}, + "errors": {"workspace": {"params.yaml": M.instance_of(FileNotFoundError)}}, + } def test_diff_list(tmp_dir, scm, dvc): @@ -70,8 +77,10 @@ def test_diff_list(tmp_dir, scm, dvc): tmp_dir.gen("params.yaml", "foo:\n- bar\n- baz\n- qux") assert dvc.params.diff() == { - "params.yaml": { - "foo": {"old": "['bar', 'baz']", "new": "['bar', 'baz', 'qux']"} + "diff": { + "params.yaml": { + "foo": {"old": "['bar', 'baz']", "new": "['bar', 'baz', 'qux']"} + } } } @@ -85,7 +94,7 @@ def test_diff_dict(tmp_dir, scm, dvc): tmp_dir.gen("params.yaml", "foo:\n bar: qux") assert dvc.params.diff() == { - "params.yaml": {"foo.bar": {"old": "baz", "new": "qux"}} + "diff": {"params.yaml": {"foo.bar": {"old": "baz", "new": "qux"}}} } @@ -99,9 +108,11 @@ def test_diff_with_unchanged(tmp_dir, scm, dvc): tmp_dir.scm_gen("params.yaml", "foo: qux\nxyz: val", commit="qux") assert dvc.params.diff(a_rev="HEAD~2", all=True) == { - "params.yaml": { - "foo": {"old": "bar", "new": "qux"}, - "xyz": {"old": "val", "new": "val"}, + "diff": { + "params.yaml": { + "foo": {"old": "bar", "new": "qux"}, + "xyz": {"old": "val", "new": "val"}, + } } } @@ -119,7 +130,7 @@ def test_pipeline_tracked_params(tmp_dir, scm, dvc, run_copy): tmp_dir.scm_gen("params.yaml", "foo: qux\nxyz: val", commit="qux") assert dvc.params.diff(a_rev="HEAD~2") == { - "params.yaml": {"foo": {"old": "bar", "new": "qux"}} + "diff": {"params.yaml": {"foo": {"old": "bar", "new": "qux"}}} } @@ -148,9 +159,11 @@ def test_vars_shows_on_params_diff(tmp_dir, scm, dvc): } (tmp_dir / "dvc.yaml").dump(d) assert dvc.params.diff() == { - "test_params.yaml": { - "vars.model1.epoch": {"new": 15, "old": None}, - "vars.model2.epoch": {"new": 35, "old": None}, + "diff": { + "test_params.yaml": { + "vars.model1.epoch": {"new": 15, "old": None}, + "vars.model2.epoch": {"new": 35, "old": None}, + } } } scm.add(["dvc.yaml", "test_params.yaml"]) @@ -159,15 +172,19 @@ def test_vars_shows_on_params_diff(tmp_dir, scm, dvc): param_data["vars"]["model1"]["epoch"] = 20 (tmp_dir / params_file).dump(param_data) assert dvc.params.diff() == { - "test_params.yaml": {"vars.model1.epoch": {"new": 20, "old": 15, "diff": 5}} + "diff": { + "test_params.yaml": {"vars.model1.epoch": {"new": 20, "old": 15, "diff": 5}} + } } data_dir = tmp_dir / "data" data_dir.mkdir() with data_dir.chdir(): assert dvc.params.diff() == { - relpath(params_file): { - "vars.model1.epoch": {"new": 20, "old": 15, "diff": 5} + "diff": { + str(params_file.relative_to(tmp_dir)): { + "vars.model1.epoch": {"new": 20, "old": 15, "diff": 5} + } } } @@ -202,16 +219,18 @@ def test_diff_targeted(tmp_dir, scm, dvc, run_copy): ) assert dvc.params.diff(a_rev="HEAD~2") == { - "params.yaml": {"foo": {"old": "bar", "new": "qux"}}, - "other_params.yaml": {"xyz": {"old": "val", "new": "val3"}}, + "diff": { + "params.yaml": {"foo": {"old": "bar", "new": "qux"}}, + "other_params.yaml": {"xyz": {"old": "val", "new": "val3"}}, + } } assert dvc.params.diff(a_rev="HEAD~2", targets=["params.yaml"]) == { - "params.yaml": {"foo": {"old": "bar", "new": "qux"}} + "diff": {"params.yaml": {"foo": {"old": "bar", "new": "qux"}}} } assert dvc.params.diff(a_rev="HEAD~2", targets=["other_params.yaml"]) == { - "other_params.yaml": {"xyz": {"old": "val", "new": "val3"}} + "diff": {"other_params.yaml": {"xyz": {"old": "val", "new": "val3"}}} } @@ -228,10 +247,12 @@ def test_diff_without_targets_specified(tmp_dir, dvc, scm, file): params_file.dump({"foo": {"bar": "baz"}, "y": "100"}) assert dvc.params.diff() == { - file: { - "foo.bar": {"new": "baz", "old": "bar"}, - "x": {"new": None, "old": "0"}, - "y": {"new": "100", "old": None}, + "diff": { + file: { + "foo.bar": {"new": "baz", "old": "bar"}, + "x": {"new": None, "old": "0"}, + "y": {"new": "100", "old": None}, + } } } @@ -255,7 +276,9 @@ def test_diff_top_level_params(tmp_dir, dvc, scm, dvcfile, params_file): params_file.dump({"foo": 5}) assert dvc.params.diff() == { - relpath(directory / params_file): {"foo": {"diff": 2, "new": 5, "old": 3}} + "diff": { + relpath(directory / params_file): {"foo": {"diff": 2, "new": 5, "old": 3}} + } } diff --git a/tests/func/params/test_show.py b/tests/func/params/test_show.py index dc8d849e0d..1fa60d39c1 100644 --- a/tests/func/params/test_show.py +++ b/tests/func/params/test_show.py @@ -1,15 +1,13 @@ -import operator -from functools import reduce +from os.path import join import pytest from dvc.repo import Repo from dvc.repo.stage import PROJECT_FILE -from dvc.utils.serialize import YAMLFileCorruptedError def test_show_empty(dvc): - assert dvc.params.show() == {} + assert dvc.params.show() == {"": {"data": {}}} def test_show(tmp_dir, dvc): @@ -98,7 +96,7 @@ def test_pipeline_params(tmp_dir, scm, dvc, run_copy): tmp_dir.scm_gen("params.yaml", "foo: baz\nxyz: val\nabc: ignore", commit="baz") tmp_dir.scm_gen("params.yaml", "foo: qux\nxyz: val\nabc: ignore", commit="qux") - assert dvc.params.show(revs=["master"], deps=True) == { + assert dvc.params.show(revs=["master"], deps_only=True) == { "master": {"data": {"params.yaml": {"data": {"foo": "qux", "xyz": "val"}}}} } assert dvc.params.show(revs=["master"]) == { @@ -120,41 +118,6 @@ def test_show_no_repo(tmp_dir): } -@pytest.mark.parametrize( - "file,error_path", - ( - (PROJECT_FILE, ["v1", "error"]), - ("params_other.yaml", ["v1", "data", "params_other.yaml", "error"]), - ), -) -def test_log_errors(tmp_dir, scm, dvc, capsys, file, error_path): - tmp_dir.gen("params_other.yaml", "foo: bar") - dvc.run( - cmd="echo params_other.yaml", - params=["params_other.yaml:foo"], - name="train", - ) - - rename = (tmp_dir / file).read_text() - with open(tmp_dir / file, "a", encoding="utf-8") as fd: - fd.write("\nmalformed!") - - scm.add([PROJECT_FILE, "params_other.yaml"]) - scm.commit("init") - scm.tag("v1") - - (tmp_dir / file).write_text(rename) - - result = dvc.params.show(revs=["v1"]) - - _, error = capsys.readouterr() - - assert isinstance( - reduce(operator.getitem, error_path, result), YAMLFileCorruptedError - ) - assert "DVC failed to load some parameters for following revisions: 'v1'." in error - - @pytest.mark.parametrize("file", ["params.yaml", "other_params.yaml"]) def test_show_without_targets_specified(tmp_dir, dvc, scm, file): params_file = tmp_dir / file @@ -177,7 +140,7 @@ def test_deps_multi_stage(tmp_dir, scm, dvc, run_copy): scm.add(["params.yaml", PROJECT_FILE]) scm.commit("add stage") - assert dvc.params.show(revs=["master"], deps=True) == { + assert dvc.params.show(revs=["master"], deps_only=True) == { "master": {"data": {"params.yaml": {"data": {"foo": "bar", "xyz": "val"}}}} } @@ -190,21 +153,32 @@ def test_deps_with_targets(tmp_dir, scm, dvc, run_copy): scm.add(["params.yaml", PROJECT_FILE]) scm.commit("add stage") - assert dvc.params.show(targets=["params.yaml"], deps=True) == { - "": {"data": {"params.yaml": {"data": {"foo": "bar", "xyz": "val"}}}} + assert dvc.params.show(targets=["params.yaml"], deps_only=True) == { + "": { + "data": { + "params.yaml": {"data": {"abc": "ignore", "foo": "bar", "xyz": "val"}} + } + } } -def test_deps_with_bad_target(tmp_dir, scm, dvc, run_copy): - tmp_dir.gen( +def test_cached_params(tmp_dir, dvc, scm, remote): + tmp_dir.dvc_gen( { - "foo": "foo", - "foobar": "", - "params.yaml": "foo: bar\nxyz: val\nabc: ignore", + "dir": {"params.yaml": "foo: 3\nbar: 10"}, + "dir2": {"params.yaml": "foo: 42\nbar: 4"}, } ) - run_copy("foo", "bar", name="copy-foo-bar", params=["foo"]) - run_copy("foo", "bar1", name="copy-foo-bar-1", params=["xyz"]) - scm.add(["params.yaml", PROJECT_FILE]) - scm.commit("add stage") - assert dvc.params.show(targets=["foobar"], deps=True) == {} + dvc.push() + dvc.cache.local.clear() + + (tmp_dir / "dvc.yaml").dump({"params": ["dir/params.yaml", "dir2"]}) + + assert dvc.params.show() == { + "": { + "data": { + join("dir", "params.yaml"): {"data": {"foo": 3, "bar": 10}}, + join("dir2", "params.yaml"): {"data": {"foo": 42, "bar": 4}}, + } + } + } diff --git a/tests/unit/command/test_metrics.py b/tests/unit/command/test_metrics.py index 57b88f7aef..9dccb63dbd 100644 --- a/tests/unit/command/test_metrics.py +++ b/tests/unit/command/test_metrics.py @@ -11,7 +11,6 @@ def test_metrics_diff(dvc, mocker, capsys): "diff", "HEAD~10", "HEAD~1", - "-R", "--all", "--md", "--targets", @@ -24,7 +23,10 @@ def test_metrics_diff(dvc, mocker, capsys): assert cli_args.func == CmdMetricsDiff cmd = cli_args.func(cli_args) - diff = {"metrics.yaml": {"": {"old": 1, "new": 3}}} + diff = { + "diff": {"metrics.yaml": {"": {"old": 1, "new": 3}}}, + "errors": {"workspace": Exception}, + } metrics_diff = mocker.patch("dvc.repo.metrics.diff.diff", return_value=diff) show_diff_mock = mocker.patch("dvc.compare.show_diff") @@ -35,11 +37,10 @@ def test_metrics_diff(dvc, mocker, capsys): targets=["target1", "target2"], a_rev="HEAD~10", b_rev="HEAD~1", - recursive=True, all=True, ) show_diff_mock.assert_called_once_with( - diff, + diff["diff"], title="Metric", no_path=True, precision=5, @@ -57,7 +58,6 @@ def test_metrics_diff_json(dvc, mocker, capsys): "diff", "HEAD~10", "HEAD~1", - "-R", "--all", "--json", "--targets", @@ -72,7 +72,7 @@ def test_metrics_diff_json(dvc, mocker, capsys): assert cli_args.func == CmdMetricsDiff cmd = cli_args.func(cli_args) - diff = {"metrics.yaml": {"": {"old": 1, "new": 3}}} + diff = {"diff": {"metrics.yaml": {"": {"old": 1, "new": 3}}}} metrics_diff = mocker.patch("dvc.repo.metrics.diff.diff", return_value=diff) show_diff_mock = mocker.patch("dvc.compare.show_diff") @@ -83,11 +83,10 @@ def test_metrics_diff_json(dvc, mocker, capsys): targets=["target1", "target2"], a_rev="HEAD~10", b_rev="HEAD~1", - recursive=True, all=True, ) show_diff_mock.assert_not_called() - assert json.dumps(diff) in out + assert json.dumps(diff["diff"]) in out def test_metrics_show(dvc, mocker): @@ -95,7 +94,6 @@ def test_metrics_show(dvc, mocker): [ "metrics", "show", - "-R", "--all-tags", "--all-branches", "--all-commits", @@ -116,7 +114,6 @@ def test_metrics_show(dvc, mocker): m1.assert_called_once_with( cmd.repo, ["target1", "target2"], - recursive=True, all_tags=True, all_branches=True, all_commits=True, @@ -138,7 +135,6 @@ def test_metrics_show_json(dvc, mocker, capsys): "metrics", "show", "--json", - "-R", "--all-tags", "--all-branches", "--all-commits", @@ -163,7 +159,6 @@ def test_metrics_show_json(dvc, mocker, capsys): metrics_show.assert_called_once_with( cmd.repo, ["target1", "target2"], - recursive=True, all_tags=True, all_branches=True, all_commits=True, diff --git a/tests/unit/command/test_params.py b/tests/unit/command/test_params.py index 31eb2b629b..000c537e87 100644 --- a/tests/unit/command/test_params.py +++ b/tests/unit/command/test_params.py @@ -32,7 +32,7 @@ def test_params_diff(dvc, mocker): b_rev="HEAD~1", targets=["target"], all=True, - deps=True, + deps_only=True, ) show_diff_mock.assert_not_called() @@ -49,11 +49,11 @@ def test_params_diff_from_cli(dvc, mocker): params_diff.assert_called_once_with( cmd.repo, - a_rev=None, - b_rev=None, + a_rev="HEAD", + b_rev="workspace", all=False, targets=None, - deps=False, + deps_only=False, ) show_diff_mock.assert_called_once_with( {}, @@ -61,15 +61,21 @@ def test_params_diff_from_cli(dvc, mocker): markdown=False, no_path=False, show_changes=False, - a_rev=None, - b_rev=None, + a_rev="HEAD", + b_rev="workspace", ) def test_params_diff_show_json(dvc, mocker, capsys): cli_args = parse_args(["params", "diff", "HEAD~10", "HEAD~1", "--json"]) cmd = cli_args.func(cli_args) - mocker.patch("dvc.repo.params.diff.diff", return_value={"params.yaml": {"a": "b"}}) + mocker.patch( + "dvc.repo.params.diff.diff", + return_value={ + "diff": {"params.yaml": {"a": "b"}}, + "errors": {"rev": Exception}, + }, + ) show_diff_mock = mocker.patch("dvc.compare.show_diff") assert cmd.run() == 0