diff --git a/dvc/command/metrics.py b/dvc/command/metrics.py index 91cae9a474..5e97f89266 100644 --- a/dvc/command/metrics.py +++ b/dvc/command/metrics.py @@ -14,11 +14,9 @@ def show_metrics( metrics, all_branches=False, all_tags=False, all_commits=False ): - """ - Args: - metrics (list): Where each element is either a `list` - if an xpath was specified, otherwise a `str` - """ + from flatten_json import flatten + from dvc.utils.diff import format_dict + # When `metrics` contains a `None` key, it means that some files # specified as `targets` in `repo.metrics.show` didn't contain any metrics. missing = metrics.pop(None, None) @@ -28,21 +26,13 @@ def show_metrics( logger.info("{branch}:".format(branch=branch)) for fname, metric in val.items(): - if isinstance(metric, dict): - lines = list(metric.values()) - elif isinstance(metric, list): - lines = metric - else: - lines = metric.splitlines() + if not isinstance(metric, dict): + logger.info("\t{}: {}".format(fname, str(metric))) + continue - if len(lines) > 1: - logger.info("\t{fname}:".format(fname=fname)) - - for line in lines: - logger.info("\t\t{content}".format(content=line)) - - else: - logger.info("\t{}: {}".format(fname, metric)) + logger.info("\t{}:".format(fname)) + for key, value in flatten(format_dict(metric), ".").items(): + logger.info("\t\t{}: {}".format(key, value)) if missing: raise BadMetricError(missing) @@ -53,35 +43,25 @@ def run(self): try: metrics = self.repo.metrics.show( self.args.targets, - typ=self.args.type, - xpath=self.args.xpath, all_branches=self.args.all_branches, all_tags=self.args.all_tags, all_commits=self.args.all_commits, recursive=self.args.recursive, ) - show_metrics( - metrics, - self.args.all_branches, - self.args.all_tags, - self.args.all_commits, - ) - except DvcException: - logger.exception("failed to show metrics") - return 1 - - return 0 - + if self.args.show_json: + import json -class CmdMetricsModify(CmdBase): - def run(self): - try: - self.repo.metrics.modify( - self.args.path, typ=self.args.type, xpath=self.args.xpath - ) + logger.info(json.dumps(metrics)) + else: + show_metrics( + metrics, + self.args.all_branches, + self.args.all_tags, + self.args.all_commits, + ) except DvcException: - logger.exception("failed to modify metric file settings") + logger.exception("failed to show metrics") return 1 return 0 @@ -90,9 +70,7 @@ def run(self): class CmdMetricsAdd(CmdBase): def run(self): try: - self.repo.metrics.add( - self.args.path, self.args.type, self.args.xpath - ) + self.repo.metrics.add(self.args.path) except DvcException: msg = "failed to add metric file '{}'".format(self.args.path) logger.exception(msg) @@ -114,11 +92,14 @@ def run(self): def _show_diff(diff): + from collections import OrderedDict + from dvc.utils.diff import table rows = [] for fname, mdiff in diff.items(): - for metric, change in mdiff.items(): + sorted_mdiff = OrderedDict(sorted(mdiff.items())) + for metric, change in sorted_mdiff.items(): rows.append( [ fname, @@ -138,9 +119,8 @@ def run(self): a_rev=self.args.a_rev, b_rev=self.args.b_rev, targets=self.args.targets, - typ=self.args.type, - xpath=self.args.xpath, recursive=self.args.recursive, + all=self.args.all, ) if self.args.show_json: @@ -185,32 +165,9 @@ def add_parser(subparsers, parent_parser): help=METRICS_ADD_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) - metrics_add_parser.add_argument( - "-t", "--type", help="Type of metrics (json/yaml).", metavar="", - ) - metrics_add_parser.add_argument( - "-x", "--xpath", help="json/yaml path.", metavar="", - ) metrics_add_parser.add_argument("path", help="Path to a metric file.") metrics_add_parser.set_defaults(func=CmdMetricsAdd) - METRICS_MODIFY_HELP = "Modify metric default formatting." - metrics_modify_parser = metrics_subparsers.add_parser( - "modify", - parents=[parent_parser], - description=append_doc_link(METRICS_MODIFY_HELP, "metrics/modify"), - help=METRICS_MODIFY_HELP, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - metrics_modify_parser.add_argument( - "-t", "--type", help="Type of metrics (json/yaml).", metavar="", - ) - metrics_modify_parser.add_argument( - "-x", "--xpath", help="json/yaml path.", metavar="", - ) - metrics_modify_parser.add_argument("path", help="Path to a metric file.") - metrics_modify_parser.set_defaults(func=CmdMetricsModify) - METRICS_SHOW_HELP = "Print metrics, with optional formatting." metrics_show_parser = metrics_subparsers.add_parser( "show", @@ -224,19 +181,6 @@ def add_parser(subparsers, parent_parser): nargs="*", help="Metric files or directories (see -R) to show", ) - metrics_show_parser.add_argument( - "-t", - "--type", - help=( - "Type of metrics (json/yaml). " - "It can be detected by the file extension automatically. " - "Unsupported types will be treated as raw." - ), - metavar="", - ) - metrics_show_parser.add_argument( - "-x", "--xpath", help="json/yaml path.", metavar="", - ) metrics_show_parser.add_argument( "-a", "--all-branches", @@ -267,6 +211,12 @@ def add_parser(subparsers, parent_parser): "metric files." ), ) + metrics_show_parser.add_argument( + "--show-json", + action="store_true", + default=False, + help="Show output in JSON format.", + ) metrics_show_parser.set_defaults(func=CmdMetricsShow) METRICS_DIFF_HELP = "Show changes in metrics between commits" @@ -295,19 +245,6 @@ def add_parser(subparsers, parent_parser): ), metavar="", ) - metrics_diff_parser.add_argument( - "-t", - "--type", - help=( - "Type of metrics (json/yaml). " - "It can be detected by the file extension automatically. " - "Unsupported types will be treated as raw." - ), - metavar="", - ) - metrics_diff_parser.add_argument( - "-x", "--xpath", help="json/yaml path.", metavar="", - ) metrics_diff_parser.add_argument( "-R", "--recursive", @@ -318,6 +255,12 @@ def add_parser(subparsers, parent_parser): "metric files." ), ) + metrics_diff_parser.add_argument( + "--all", + action="store_true", + default=False, + help="Show unchanged metrics as well.", + ) metrics_diff_parser.add_argument( "--show-json", action="store_true", diff --git a/dvc/remote/local.py b/dvc/remote/local.py index 2e247bf7d6..9bebfe47a5 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -87,6 +87,7 @@ def get(self, md5): return self.checksum_to_path_info(md5).url def exists(self, path_info): + assert is_working_tree(self.repo.tree) assert path_info.scheme == "local" return self.repo.tree.exists(fspath_py35(path_info)) diff --git a/dvc/repo/metrics/__init__.py b/dvc/repo/metrics/__init__.py index 7fce413da3..5e05ae7b2c 100644 --- a/dvc/repo/metrics/__init__.py +++ b/dvc/repo/metrics/__init__.py @@ -7,11 +7,6 @@ def add(self, *args, **kwargs): return add(self.repo, *args, **kwargs) - def modify(self, *args, **kwargs): - from dvc.repo.metrics.modify import modify - - return modify(self.repo, *args, **kwargs) - def show(self, *args, **kwargs): from dvc.repo.metrics.show import show diff --git a/dvc/repo/metrics/add.py b/dvc/repo/metrics/add.py index 45a172dac5..7828c63ef4 100644 --- a/dvc/repo/metrics/add.py +++ b/dvc/repo/metrics/add.py @@ -1,7 +1,5 @@ from dvc.repo.metrics.modify import modify -def add(repo, path, typ=None, xpath=None): - if not typ: - typ = "raw" - modify(repo, path, typ, xpath) +def add(repo, path): + modify(repo, path) diff --git a/dvc/repo/metrics/diff.py b/dvc/repo/metrics/diff.py index 697afeae25..e3ef2177a8 100644 --- a/dvc/repo/metrics/diff.py +++ b/dvc/repo/metrics/diff.py @@ -1,4 +1,4 @@ -import dvc.utils.diff +from dvc.utils.diff import format_dict, diff as _diff from dvc.exceptions import NoMetricsError @@ -13,7 +13,11 @@ def _get_metrics(repo, *args, rev=None, **kwargs): def diff(repo, *args, a_rev=None, b_rev=None, **kwargs): + with_unchanged = kwargs.pop("all", False) + old = _get_metrics(repo, *args, **kwargs, rev=(a_rev or "HEAD")) new = _get_metrics(repo, *args, **kwargs, rev=b_rev) - return dvc.utils.diff.diff(old, new) + return _diff( + format_dict(old), format_dict(new), with_unchanged=with_unchanged + ) diff --git a/dvc/repo/metrics/modify.py b/dvc/repo/metrics/modify.py index 05e7716670..d191431ecb 100644 --- a/dvc/repo/metrics/modify.py +++ b/dvc/repo/metrics/modify.py @@ -4,8 +4,7 @@ @locked -def modify(repo, path, typ=None, xpath=None, delete=False): - supported_types = ["raw", "json", "csv", "tsv", "hcsv", "htsv"] +def modify(repo, path, delete=False): outs = repo.find_outs_by_path(path) assert len(outs) == 1 out = outs[0] @@ -14,25 +13,6 @@ def modify(repo, path, typ=None, xpath=None, delete=False): msg = "output '{}' scheme '{}' is not supported for metrics" raise DvcException(msg.format(out.path, out.path_info.scheme)) - if typ is not None: - typ = typ.lower().strip() - if typ not in ["raw", "json", "csv", "tsv", "hcsv", "htsv"]: - msg = ( - "metric type '{typ}' is not supported, " - "must be one of [{types}]" - ) - raise DvcException( - msg.format(typ=typ, types=", ".join(supported_types)) - ) - if not isinstance(out.metric, dict): - out.metric = {} - out.metric[out.PARAM_METRIC_TYPE] = typ - - if xpath is not None: - if not isinstance(out.metric, dict): - out.metric = {} - out.metric[out.PARAM_METRIC_XPATH] = xpath - if delete: out.metric = None diff --git a/dvc/repo/metrics/show.py b/dvc/repo/metrics/show.py index fcd71b632c..4ecf23e369 100644 --- a/dvc/repo/metrics/show.py +++ b/dvc/repo/metrics/show.py @@ -1,258 +1,79 @@ -import csv -import errno -import json +import yaml import logging import os -import io - -from jsonpath_ng.ext import parse +from dvc.path_info import PathInfo +from dvc.compat import fspath_py35 from dvc.exceptions import NoMetricsError -from dvc.exceptions import OutputNotFoundError from dvc.repo import locked +from dvc.repo.tree import RepoTree -NO_METRICS_FILE_AT_REFERENCE_WARNING = ( - "Metrics file '{}' does not exist at the reference '{}'." -) logger = logging.getLogger(__name__) -def _read_metric_json(fd, json_path): - parser = parse(json_path) - return {str(x.full_path): x.value for x in parser.find(json.load(fd))} - - -def _get_values(row): - if isinstance(row, dict): - return list(row.values()) - else: - return row - - -def _do_read_metric_xsv(reader, row, col): - if col is not None and row is not None: - return [reader[row][col]] - elif col is not None: - return [r[col] for r in reader] - elif row is not None: - return _get_values(reader[row]) - return [_get_values(r) for r in reader] - - -def _read_metric_hxsv(fd, hxsv_path, delimiter): - indices = hxsv_path.split(",") - row = indices[0] - row = int(row) if row else None - col = indices[1] if len(indices) > 1 and indices[1] else None - reader = list(csv.DictReader(fd, delimiter=delimiter)) - return _do_read_metric_xsv(reader, row, col) - - -def _read_metric_xsv(fd, xsv_path, delimiter): - indices = xsv_path.split(",") - row = indices[0] - row = int(row) if row else None - col = int(indices[1]) if len(indices) > 1 and indices[1] else None - reader = list(csv.reader(fd, delimiter=delimiter)) - return _do_read_metric_xsv(reader, row, col) - - -def _read_typed_metric(typ, xpath, fd): - if typ == "json": - ret = _read_metric_json(fd, xpath) - elif typ == "csv": - ret = _read_metric_xsv(fd, xpath, ",") - elif typ == "tsv": - ret = _read_metric_xsv(fd, xpath, "\t") - elif typ == "hcsv": - ret = _read_metric_hxsv(fd, xpath, ",") - elif typ == "htsv": - ret = _read_metric_hxsv(fd, xpath, "\t") - else: - ret = fd.read().strip() - return ret - - -def _format_csv(content, delimiter): - """Format delimited text to have same column width. - - Args: - content (str): The content of a metric. - delimiter (str): Value separator - - Returns: - str: Formatted content. +def _collect_metrics(repo, targets, recursive): + metrics = set() - Example: + for stage in repo.stages: + for out in stage.outs: + if not out.metric: + continue - >>> content = ( - "value_mse,deviation_mse,data_set\n" - "0.421601,0.173461,train\n" - "0.67528,0.289545,testing\n" - "0.671502,0.297848,validation\n" - ) - >>> _format_csv(content, ",") + metrics.add(out.path_info) - "value_mse deviation_mse data_set\n" - "0.421601 0.173461 train\n" - "0.67528 0.289545 testing\n" - "0.671502 0.297848 validation\n" - """ - reader = csv.reader(io.StringIO(content), delimiter=delimiter) - rows = [row for row in reader] - max_widths = [max(map(len, column)) for column in zip(*rows)] - - lines = [ - " ".join( - "{entry:{width}}".format(entry=entry, width=width + 2) - for entry, width in zip(row, max_widths) - ) - for row in rows - ] - - return "\n".join(lines) - - -def _format_output(content, typ): - """Tabularize the content according to its type. - - Args: - content (str): The content of a metric. - typ (str): The type of metric -- (raw|json|tsv|htsv|csv|hcsv). + if not targets: + return list(metrics) - Returns: - str: Content in a raw or tabular format. - """ + target_infos = [PathInfo(os.path.abspath(target)) for target in targets] - if "csv" in typ: - return _format_csv(content, delimiter=",") + def _filter(path_info): + for info in target_infos: + func = path_info.isin_or_eq if recursive else path_info.__eq__ + if func(info): + return True + return False - if "tsv" in typ: - return _format_csv(content, delimiter="\t") + return list(filter(_filter, metrics)) - return content +def _extract_metrics(metrics): + if isinstance(metrics, (int, float)): + return metrics -def _read_metric(fd, typ=None, xpath=None, fname=None, branch=None): - typ = typ.lower().strip() if typ else typ - try: - if xpath: - return _read_typed_metric(typ, xpath.strip(), fd) - else: - return _format_output(fd.read().strip(), typ) - # Json path library has to be replaced or wrapped in - # order to fix this too broad except clause. - except Exception: - logger.exception( - "unable to read metric in '{}' in branch '{}'".format( - fname, branch - ) - ) + if not isinstance(metrics, dict): return None + ret = {} + for key, val in metrics.items(): + m = _extract_metrics(val) + if m: + ret[key] = m -def _collect_metrics(repo, path, recursive, typ, xpath, branch): - """Gather all the metric outputs. - - Args: - path (str): Path to a metric file or a directory. - recursive (bool): If path is a directory, do a recursive search for - metrics on the given path. - typ (str): The type that will be used to interpret the metric file, - one of the followings - (raw|json|tsv|htsv|csv|hcsv). - xpath (str): Path to search for. - branch (str): Branch to look up for metrics. - - Returns: - list(tuple): (output, typ, xpath) - - output: - - typ: - - xpath: - """ - outs = [out for stage in repo.stages for out in stage.outs] - - if path: - try: - outs = repo.find_outs_by_path(path, outs=outs, recursive=recursive) - except OutputNotFoundError: - logger.debug( - "DVC-file not for found for '{}' in branch '{}'".format( - path, branch - ) - ) - return [] - - res = [] - for o in outs: - if not o.metric: - continue - - # NOTE this case assumes that typ has not been provided in CLI call - # and one of the following cases: - # - stage file contains metric type - # - typ will be read from file extension later - if not typ and isinstance(o.metric, dict): - t = o.metric.get(o.PARAM_METRIC_TYPE, typ) - # NOTE user might want to check different xpath, hence xpath first - x = xpath or o.metric.get(o.PARAM_METRIC_XPATH) - else: - t = typ - x = xpath - - res.append((o, t, x)) - - return res - - -def _read_metrics(repo, metrics, branch): - """Read the content of each metric file and format it. + return ret - Args: - metrics (list): List of metric touples - branch (str): Branch to look up for metrics. - Returns: - A dict mapping keys with metrics path name and content. - For example: +def _read_metrics(repo, metrics, rev): + tree = RepoTree(repo) - {'metric.csv': ("value_mse deviation_mse data_set\n" - "0.421601 0.173461 train\n" - "0.67528 0.289545 testing\n" - "0.671502 0.297848 validation\n")} - """ res = {} - for out, typ, xpath in metrics: - assert out.scheme == "local" - if not typ: - typ = os.path.splitext(out.fspath.lower())[1].replace(".", "") - if out.use_cache: - open_fun = open - path = repo.cache.local.get(out.checksum) - else: - open_fun = repo.tree.open - path = out.fspath - try: + for metric in metrics: + if not tree.exists(fspath_py35(metric)): + continue - with open_fun(path) as fd: - metric = _read_metric( - fd, typ=typ, xpath=xpath, fname=str(out), branch=branch + with tree.open(fspath_py35(metric), "r") as fobj: + try: + # NOTE this also supports JSON + val = yaml.safe_load(fobj) + except yaml.YAMLError: + logger.debug( + "failed to read '%s' on '%s'", metric, rev, exc_info=True ) - except IOError as e: - if e.errno == errno.ENOENT: - logger.warning( - NO_METRICS_FILE_AT_REFERENCE_WARNING.format( - out.path_info, branch - ) - ) - metric = None - else: - raise - - if not metric: - continue + continue - res[str(out)] = metric + val = _extract_metrics(val) + if val: + res[str(metric)] = val return res @@ -261,8 +82,6 @@ def _read_metrics(repo, metrics, branch): def show( repo, targets=None, - typ=None, - xpath=None, all_branches=False, all_tags=False, recursive=False, @@ -270,34 +89,20 @@ def show( all_commits=False, ): res = {} - found = set() - if not targets: - # Iterate once to call `_collect_metrics` on all the stages - targets = [None] - - for branch in repo.brancher( + for rev in repo.brancher( revs=revs, all_branches=all_branches, all_tags=all_tags, all_commits=all_commits, ): - metrics = {} - - for target in targets: - entries = _collect_metrics( - repo, target, recursive, typ, xpath, branch - ) - metric = _read_metrics(repo, entries, branch) + metrics = _collect_metrics(repo, targets, recursive) + vals = _read_metrics(repo, metrics, rev) - if metric: - found.add(target) - metrics.update(metric) + if vals: + res[rev] = vals - if metrics: - res[branch] = metrics - - if not res and not any(targets): + if not res: raise NoMetricsError() # Hide working tree metrics if they are the same as in the active branch @@ -309,9 +114,4 @@ def show( if res.get("working tree") == res.get(active_branch): res.pop("working tree", None) - missing = set(targets) - found - - if missing: - res[None] = missing - return res diff --git a/dvc/repo/params/diff.py b/dvc/repo/params/diff.py index 0b22e212c0..d40afc1c56 100644 --- a/dvc/repo/params/diff.py +++ b/dvc/repo/params/diff.py @@ -1,4 +1,4 @@ -import dvc.utils.diff +from dvc.utils.diff import format_dict, diff as _diff from .show import NoParamsError @@ -10,25 +10,12 @@ def _get_params(repo, *args, rev=None, **kwargs): return {} -def _format(params): - ret = {} - for key, val in params.items(): - if isinstance(val, dict): - new_val = _format(val) - elif isinstance(val, list): - new_val = str(val) - else: - new_val = val - ret[key] = new_val - return ret - - def diff(repo, *args, a_rev=None, b_rev=None, **kwargs): with_unchanged = kwargs.pop("all", False) old = _get_params(repo, *args, **kwargs, rev=(a_rev or "HEAD")) new = _get_params(repo, *args, **kwargs, rev=b_rev) - return dvc.utils.diff.diff( - _format(old), _format(new), with_unchanged=with_unchanged + return _diff( + format_dict(old), format_dict(new), with_unchanged=with_unchanged ) diff --git a/dvc/repo/tree.py b/dvc/repo/tree.py index 5554f1c140..9303ea396b 100644 --- a/dvc/repo/tree.py +++ b/dvc/repo/tree.py @@ -1,7 +1,7 @@ import os import errno -from dvc.scm.tree import BaseTree +from dvc.scm.tree import BaseTree, WorkingTree from dvc.path_info import PathInfo from dvc.exceptions import OutputNotFoundError @@ -32,10 +32,17 @@ def open(self, path, mode="r", encoding="utf-8"): raise IOError(errno.EISDIR) out = outs[0] - if not out.changed_cache(): - return open(out.cache_path, mode=mode, encoding=encoding) + # temporary hack to make cache use WorkingTree and not GitTree, because + # cache dir doesn't exist in the latter. + saved_tree = self.repo.tree + self.repo.tree = WorkingTree(self.repo.root_dir) + try: + if out.changed_cache(): + raise FileNotFoundError + finally: + self.repo.tree = saved_tree - raise FileNotFoundError + return open(out.cache_path, mode=mode, encoding=encoding) def exists(self, path): try: @@ -115,3 +122,20 @@ def isdvc(self, path): def isexec(self, path): return False + + +class RepoTree(BaseTree): + def __init__(self, repo): + self.repo = repo + self.dvctree = DvcTree(repo) + + def open(self, *args, **kwargs): + try: + return self.dvctree.open(*args, **kwargs) + except FileNotFoundError: + pass + + return self.repo.tree.open(*args, **kwargs) + + def exists(self, path): + return self.repo.tree.exists(path) or self.dvctree.exists(path) diff --git a/dvc/utils/diff.py b/dvc/utils/diff.py index 0341e1937c..9b57f623bd 100644 --- a/dvc/utils/diff.py +++ b/dvc/utils/diff.py @@ -1,8 +1,6 @@ import json from collections import defaultdict -from flatten_json import flatten - def _parse(raw): if raw is None or isinstance(raw, (dict, list, int, float)): @@ -37,7 +35,9 @@ def _flatten(d): return defaultdict(lambda: None) if isinstance(d, dict): - return defaultdict(lambda: None, flatten(d, ".")) + from flatten_json import flatten as fltn + + return defaultdict(lambda: None, fltn(d, ".")) return defaultdict(lambda: "unable to parse") @@ -103,3 +103,16 @@ def table(header, rows): t.add_rows([header] + rows) return t.draw() + + +def format_dict(d): + ret = {} + for key, val in d.items(): + if isinstance(val, dict): + new_val = format_dict(val) + elif isinstance(val, list): + new_val = str(val) + else: + new_val = val + ret[key] = new_val + return ret diff --git a/tests/func/metrics/__init__.py b/tests/func/metrics/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/func/metrics/test_diff.py b/tests/func/metrics/test_diff.py new file mode 100644 index 0000000000..cae538f91e --- /dev/null +++ b/tests/func/metrics/test_diff.py @@ -0,0 +1,135 @@ +import json +import yaml + + +def test_metrics_diff_simple(tmp_dir, scm, dvc): + def _gen(val): + tmp_dir.gen({"m.yaml": str(val)}) + dvc.run(cmd="", metrics=["m.yaml"]) + dvc.scm.add(["m.yaml.dvc"]) + dvc.scm.commit(str(val)) + + _gen(1) + _gen(2) + _gen(3) + + expected = {"m.yaml": {"": {"old": 1, "new": 3, "diff": 2}}} + + assert dvc.metrics.diff(a_rev="HEAD~2") == expected + + +def test_metrics_diff_yaml(tmp_dir, scm, dvc): + def _gen(val): + metrics = {"a": {"b": {"c": val, "d": 1, "e": str(val)}}} + tmp_dir.gen({"m.yaml": yaml.dump(metrics)}) + dvc.run(cmd="", metrics=["m.yaml"]) + dvc.scm.add(["m.yaml.dvc"]) + dvc.scm.commit(str(val)) + + _gen(1) + _gen(2) + _gen(3) + + expected = {"m.yaml": {"a.b.c": {"old": 1, "new": 3, "diff": 2}}} + + assert dvc.metrics.diff(a_rev="HEAD~2") == expected + + +def test_metrics_diff_json(tmp_dir, scm, dvc): + def _gen(val): + metrics = {"a": {"b": {"c": val, "d": 1, "e": str(val)}}} + tmp_dir.gen({"m.json": json.dumps(metrics)}) + dvc.run(cmd="", metrics=["m.json"]) + dvc.scm.add(["m.json.dvc"]) + dvc.scm.commit(str(val)) + + _gen(1) + _gen(2) + _gen(3) + + expected = {"m.json": {"a.b.c": {"old": 1, "new": 3, "diff": 2}}} + assert dvc.metrics.diff(a_rev="HEAD~2") == expected + + +def test_metrics_diff_json_unchanged(tmp_dir, scm, dvc): + def _gen(val): + metrics = {"a": {"b": {"c": val, "d": 1, "e": str(val)}}} + tmp_dir.gen({"m.json": json.dumps(metrics)}) + dvc.run(cmd="", metrics=["m.json"]) + dvc.scm.add(["m.json.dvc"]) + dvc.scm.commit(str(val)) + + _gen(1) + _gen(2) + _gen(1) + + assert dvc.metrics.diff(a_rev="HEAD~2") == {} + + +def test_metrics_diff_broken_json(tmp_dir, scm, dvc): + metrics = {"a": {"b": {"c": 1, "d": 1, "e": "3"}}} + tmp_dir.gen({"m.json": json.dumps(metrics)}) + dvc.run(cmd="", metrics_no_cache=["m.json"]) + dvc.scm.add(["m.json.dvc", "m.json"]) + dvc.scm.commit("add metrics") + + (tmp_dir / "m.json").write_text(json.dumps(metrics) + "ma\nlformed\n") + + assert dvc.metrics.diff() == { + "m.json": { + "a.b.c": {"old": 1, "new": None}, + "a.b.d": {"old": 1, "new": None}, + } + } + + +def test_metrics_diff_no_metrics(tmp_dir, scm, dvc): + tmp_dir.scm_gen({"foo": "foo"}, commit="add foo") + assert dvc.metrics.diff(a_rev="HEAD~1") == {} + + +def test_metrics_diff_new_metric(tmp_dir, scm, dvc): + metrics = {"a": {"b": {"c": 1, "d": 1, "e": "3"}}} + tmp_dir.gen({"m.json": json.dumps(metrics)}) + dvc.run(cmd="", metrics_no_cache=["m.json"]) + + assert dvc.metrics.diff() == { + "m.json": { + "a.b.c": {"old": None, "new": 1}, + "a.b.d": {"old": None, "new": 1}, + } + } + + +def test_metrics_diff_deleted_metric(tmp_dir, scm, dvc): + metrics = {"a": {"b": {"c": 1, "d": 1, "e": "3"}}} + tmp_dir.gen({"m.json": json.dumps(metrics)}) + dvc.run(cmd="", metrics_no_cache=["m.json"]) + dvc.scm.add(["m.json.dvc", "m.json"]) + dvc.scm.commit("add metrics") + + (tmp_dir / "m.json").unlink() + + assert dvc.metrics.diff() == { + "m.json": { + "a.b.c": {"old": 1, "new": None}, + "a.b.d": {"old": 1, "new": None}, + } + } + + +def test_metrics_diff_with_unchanged(tmp_dir, scm, dvc): + tmp_dir.gen("metrics.yaml", "foo: 1\nxyz: 10") + dvc.run(metrics_no_cache=["metrics.yaml"]) + scm.add(["metrics.yaml", "metrics.yaml.dvc"]) + scm.commit("1") + + tmp_dir.scm_gen("metrics.yaml", "foo: 2\nxyz: 10", commit="2") + tmp_dir.scm_gen("metrics.yaml", "foo: 3\nxyz: 10", commit="3") + + assert dvc.metrics.diff(a_rev="HEAD~2", all=True) == { + "metrics.yaml": { + "foo": {"old": 1, "new": 3, "diff": 2}, + "xyz": {"old": 10, "new": 10, "diff": 0}, + } + } diff --git a/tests/func/metrics/test_show.py b/tests/func/metrics/test_show.py new file mode 100644 index 0000000000..608aa0522c --- /dev/null +++ b/tests/func/metrics/test_show.py @@ -0,0 +1,50 @@ +import pytest + +from dvc.repo.metrics.show import NoMetricsError + + +def test_show_empty(dvc): + with pytest.raises(NoMetricsError): + dvc.metrics.show() + + +def test_show_simple(tmp_dir, dvc): + tmp_dir.gen("metrics.yaml", "1.1") + dvc.run(metrics=["metrics.yaml"]) + assert dvc.metrics.show() == {"": {"metrics.yaml": 1.1}} + + +def test_show(tmp_dir, dvc): + tmp_dir.gen("metrics.yaml", "foo: 1.1") + dvc.run(metrics=["metrics.yaml"]) + assert dvc.metrics.show() == {"": {"metrics.yaml": {"foo": 1.1}}} + + +def test_show_multiple(tmp_dir, dvc): + tmp_dir.gen("foo", "foo: 1\n") + tmp_dir.gen("baz", "baz: 2\n") + dvc.run(fname="foo.dvc", metrics=["foo"]) + dvc.run(fname="baz.dvc", metrics=["baz"]) + assert dvc.metrics.show() == {"": {"foo": {"foo": 1}, "baz": {"baz": 2}}} + + +def test_show_invalid_metric(tmp_dir, dvc): + tmp_dir.gen("metrics.yaml", "foo:\n- bar\n- baz\nxyz: string") + dvc.run(metrics=["metrics.yaml"]) + with pytest.raises(NoMetricsError): + dvc.metrics.show() + + +def test_show_branch(tmp_dir, scm, dvc): + tmp_dir.gen("metrics.yaml", "foo: 1") + dvc.run(metrics_no_cache=["metrics.yaml"]) + scm.add(["metrics.yaml", "metrics.yaml.dvc"]) + scm.commit("init") + + with tmp_dir.branch("branch", new=True): + tmp_dir.scm_gen("metrics.yaml", "foo: 2", commit="branch") + + assert dvc.metrics.show(revs=["branch"]) == { + "working tree": {"metrics.yaml": {"foo": 1}}, + "branch": {"metrics.yaml": {"foo": 2}}, + } diff --git a/tests/func/test_init.py b/tests/func/test_init.py index 39f2e2d949..be135a33c5 100644 --- a/tests/func/test_init.py +++ b/tests/func/test_init.py @@ -69,10 +69,11 @@ def test_init_no_scm_cli(tmp_dir): def test_init_quiet_should_not_display_welcome_screen(tmp_dir, scm, caplog): - ret = main(["init", "--quiet"]) + with caplog.at_level(logging.INFO, logger="dvc"): + ret = main(["init", "--quiet"]) - assert 0 == ret - assert "" == caplog.text + assert 0 == ret + assert "" == caplog.text def test_allow_init_dvc_subdir(tmp_dir, scm, monkeypatch): diff --git a/tests/func/test_metrics.py b/tests/func/test_metrics.py deleted file mode 100644 index f50972fe23..0000000000 --- a/tests/func/test_metrics.py +++ /dev/null @@ -1,1018 +0,0 @@ -# -*- coding: utf-8 -*- - -import json -import logging -import os - -import pytest - -from dvc.exceptions import DvcException -from dvc.exceptions import NoMetricsError -from dvc.main import main -from dvc.repo import Repo as DvcRepo -from dvc.repo.metrics.show import NO_METRICS_FILE_AT_REFERENCE_WARNING -from dvc.utils import relpath -from dvc.scm.base import NoSCMError -from tests.basic_env import TestDvcGit - - -class TestMetricsBase(TestDvcGit): - def setUp(self): - super().setUp() - self.dvc.scm.commit("init") - - branches = ["foo", "bar", "baz"] - - for branch in branches: - self.dvc.scm.repo.create_head(branch) - - for branch in branches: - self.dvc.scm.checkout(branch) - - self.create("metric", branch) - self.create("metric_json", json.dumps({"branch": branch})) - self.create("metric_csv", branch) - self.create("metric_hcsv", "branch\n" + branch) - self.create("metric_tsv", branch) - self.create("metric_htsv", "branch\n" + branch) - - if branch == "foo": - deviation_mse_train = 0.173461 - else: - deviation_mse_train = 0.356245 - - self.create( - "metric_json_ext", - json.dumps( - { - "metrics": [ - { - "dataset": "train", - "deviation_mse": deviation_mse_train, - "value_mse": 0.421601, - }, - { - "dataset": "testing", - "deviation_mse": 0.289545, - "value_mse": 0.297848, - }, - { - "dataset": "validation", - "deviation_mse": 0.67528, - "value_mse": 0.671502, - }, - ] - } - ), - ) - - files = [ - "metric", - "metric_json", - "metric_tsv", - "metric_htsv", - "metric_csv", - "metric_hcsv", - "metric_json_ext", - ] - - self.dvc.run(metrics_no_cache=files, overwrite=True) - - self.dvc.scm.add(files + ["metric.dvc"]) - - self.dvc.scm.commit("metric") - - self.dvc.scm.checkout("master") - - -def test_show_dirty(tmp_dir, scm, dvc): - tmp_dir.gen("metric", "master") - dvc.run(metrics_no_cache=["metric"], overwrite=True) - tmp_dir.scm_add(["metric", "metric.dvc"], commit="add metric") - - tmp_dir.gen("metric", "dirty") - - assert dvc.metrics.show(["metric"]) == {"": {"metric": "dirty"}} - - assert dvc.metrics.show(["metric"], all_branches=True) == { - "working tree": {"metric": "dirty"}, - "master": {"metric": "master"}, - } - - assert dvc.metrics.show(["metric"], all_tags=True) == { - "working tree": {"metric": "dirty"} - } - - -class TestMetrics(TestMetricsBase): - def test_show(self): - ret = self.dvc.metrics.show(["metric"], all_branches=True) - self.assertEqual(len(ret), 3) - self.assertEqual(ret["foo"]["metric"], "foo") - self.assertEqual(ret["bar"]["metric"], "bar") - self.assertEqual(ret["baz"]["metric"], "baz") - - ret = self.dvc.metrics.show( - ["metric_json"], typ="json", xpath="branch", all_branches=True - ) - self.assertEqual(len(ret), 3) - self.assertSequenceEqual(ret["foo"]["metric_json"], {"branch": "foo"}) - self.assertSequenceEqual(ret["bar"]["metric_json"], {"branch": "bar"}) - self.assertSequenceEqual(ret["baz"]["metric_json"], {"branch": "baz"}) - - ret = self.dvc.metrics.show( - ["metric_tsv"], typ="tsv", xpath="0,0", all_branches=True - ) - self.assertEqual(len(ret), 3) - self.assertSequenceEqual(ret["foo"]["metric_tsv"], ["foo"]) - self.assertSequenceEqual(ret["bar"]["metric_tsv"], ["bar"]) - self.assertSequenceEqual(ret["baz"]["metric_tsv"], ["baz"]) - - ret = self.dvc.metrics.show( - ["metric_htsv"], typ="htsv", xpath="0,branch", all_branches=True - ) - self.assertEqual(len(ret), 3) - self.assertSequenceEqual(ret["foo"]["metric_htsv"], ["foo"]) - self.assertSequenceEqual(ret["bar"]["metric_htsv"], ["bar"]) - self.assertSequenceEqual(ret["baz"]["metric_htsv"], ["baz"]) - - ret = self.dvc.metrics.show( - ["metric_csv"], typ="csv", xpath="0,0", all_branches=True - ) - self.assertEqual(len(ret), 3) - self.assertSequenceEqual(ret["foo"]["metric_csv"], ["foo"]) - self.assertSequenceEqual(ret["bar"]["metric_csv"], ["bar"]) - self.assertSequenceEqual(ret["baz"]["metric_csv"], ["baz"]) - - ret = self.dvc.metrics.show( - ["metric_hcsv"], typ="hcsv", xpath="0,branch", all_branches=True - ) - self.assertEqual(len(ret), 3) - self.assertSequenceEqual(ret["foo"]["metric_hcsv"], ["foo"]) - self.assertSequenceEqual(ret["bar"]["metric_hcsv"], ["bar"]) - self.assertSequenceEqual(ret["baz"]["metric_hcsv"], ["baz"]) - - ret = self.dvc.metrics.show( - ["metric_json_ext"], - typ="json", - xpath="$.metrics[?(@.deviation_mse<0.30) & (@.value_mse>0.4)]", - all_branches=True, - ) - self.assertEqual(len(ret), 1) - self.assertSequenceEqual( - ret["foo"]["metric_json_ext"], - { - "metrics.[0]": { - "dataset": "train", - "deviation_mse": 0.173461, - "value_mse": 0.421601, - } - }, - ) - self.assertRaises(KeyError, lambda: ret["bar"]) - self.assertRaises(KeyError, lambda: ret["baz"]) - - def test_unknown_type_ignored(self): - ret = self.dvc.metrics.show( - ["metric_hcsv"], typ="unknown", xpath="0,branch", all_branches=True - ) - self.assertEqual(len(ret), 3) - for b in ["foo", "bar", "baz"]: - self.assertEqual(ret[b]["metric_hcsv"].split(), ["branch", b]) - - def test_type_case_normalized(self): - ret = self.dvc.metrics.show( - ["metric_hcsv"], typ=" hCSV ", xpath="0,branch", all_branches=True - ) - self.assertEqual(len(ret), 3) - for b in ["foo", "bar", "baz"]: - self.assertSequenceEqual(ret[b]["metric_hcsv"], [b]) - - def test_xpath_is_empty(self): - ret = self.dvc.metrics.show( - ["metric_json"], typ="json", xpath="", all_branches=True - ) - self.assertEqual(len(ret), 3) - for b in ["foo", "bar", "baz"]: - self.assertEqual(ret[b]["metric_json"], json.dumps({"branch": b})) - - def test_xpath_is_none(self): - ret = self.dvc.metrics.show( - ["metric_json"], typ="json", xpath=None, all_branches=True - ) - self.assertEqual(len(ret), 3) - for b in ["foo", "bar", "baz"]: - self.assertEqual(ret[b]["metric_json"], json.dumps({"branch": b})) - - def test_xpath_all_columns(self): - ret = self.dvc.metrics.show( - ["metric_hcsv"], typ="hcsv ", xpath="0,", all_branches=True - ) - self.assertEqual(len(ret), 3) - for b in ["foo", "bar", "baz"]: - self.assertSequenceEqual(ret[b]["metric_hcsv"], [b]) - - def test_xpath_all_rows(self): - ret = self.dvc.metrics.show( - ["metric_csv"], typ="csv", xpath=",0", all_branches=True - ) - self.assertEqual(len(ret), 3) - for b in ["foo", "bar", "baz"]: - self.assertSequenceEqual(ret[b]["metric_csv"], [b]) - - def test_xpath_all(self): - ret = self.dvc.metrics.show( - ["metric_csv"], typ="csv", xpath=",", all_branches=True - ) - self.assertEqual(len(ret), 3) - for b in ["foo", "bar", "baz"]: - self.assertSequenceEqual(ret[b]["metric_csv"], [[b]]) - - def test_xpath_all_with_header(self): - ret = self.dvc.metrics.show( - ["metric_hcsv"], typ="hcsv", xpath=",", all_branches=True - ) - self.assertEqual(len(ret), 3) - for b in ["foo", "bar", "baz"]: - self.assertSequenceEqual(ret[b]["metric_hcsv"], [[b]]) - - def test_formatted_output(self): - # Labels are in Spanish to test UTF-8 - self.create( - "metrics.csv", - ( - "valor_mse,desviación_mse,data_set\n" - "0.421601,0.173461,entrenamiento\n" - "0.67528,0.289545,pruebas\n" - "0.671502,0.297848,validación\n" - ), - ) - - # Contains quoted newlines to test output correctness - self.create( - "metrics.tsv", - ( - "value_mse\tdeviation_mse\tdata_set\n" - "0.421601\t0.173461\ttrain\n" - '0.67528\t0.289545\t"test\\ning"\n' - "0.671502\t0.297848\tvalidation\n" - ), - ) - - self.create( - "metrics.json", - ( - "{\n" - ' "data_set": [\n' - ' "train",\n' - ' "testing",\n' - ' "validation"\n' - " ],\n" - ' "deviation_mse": [\n' - ' "0.173461",\n' - ' "0.289545",\n' - ' "0.297848"\n' - " ],\n" - ' "value_mse": [\n' - ' "0.421601",\n' - ' "0.67528",\n' - ' "0.671502"\n' - " ]\n" - "}" - ), - ) - - self.create( - "metrics.txt", "ROC_AUC: 0.64\nKS: 78.9999999996\nF_SCORE: 77\n" - ) - - self.dvc.run( - fname="testing_metrics_output.dvc", - metrics_no_cache=[ - "metrics.csv", - "metrics.tsv", - "metrics.json", - "metrics.txt", - ], - ) - - self.dvc.metrics.modify("metrics.csv", typ="csv") - self.dvc.metrics.modify("metrics.tsv", typ="tsv") - self.dvc.metrics.modify("metrics.json", typ="json") - - self._caplog.clear() - - with self._caplog.at_level(logging.INFO, logger="dvc"): - ret = main(["metrics", "show"]) - self.assertEqual(ret, 0) - - expected_csv = ( - "\tmetrics.csv:\n" - "\t\tvalor_mse desviación_mse data_set \n" - "\t\t0.421601 0.173461 entrenamiento \n" - "\t\t0.67528 0.289545 pruebas \n" - "\t\t0.671502 0.297848 validación" - ) - - expected_tsv = ( - "\tmetrics.tsv:\n" - "\t\tvalue_mse deviation_mse data_set \n" - "\t\t0.421601 0.173461 train \n" - "\t\t0.67528 0.289545 test\\ning \n" - "\t\t0.671502 0.297848 validation" - ) - - expected_txt = ( - "\tmetrics.txt:\n" - "\t\tROC_AUC: 0.64\n" - "\t\tKS: 78.9999999996\n" - "\t\tF_SCORE: 77" - ) - - expected_json = ( - "\tmetrics.json:\n" - "\t\t{\n" - '\t\t "data_set": [\n' - '\t\t "train",\n' - '\t\t "testing",\n' - '\t\t "validation"\n' - "\t\t ],\n" - '\t\t "deviation_mse": [\n' - '\t\t "0.173461",\n' - '\t\t "0.289545",\n' - '\t\t "0.297848"\n' - "\t\t ],\n" - '\t\t "value_mse": [\n' - '\t\t "0.421601",\n' - '\t\t "0.67528",\n' - '\t\t "0.671502"\n' - "\t\t ]\n" - "\t\t}" - ) - - stdout = "\n".join(record.message for record in self._caplog.records) - - assert expected_tsv in stdout - assert expected_csv in stdout - assert expected_txt in stdout - assert expected_json in stdout - - def test_show_all_should_be_current_dir_agnostic(self): - os.chdir(self.DATA_DIR) - - metrics = self.dvc.metrics.show(all_branches=True) - self.assertMetricsHaveRelativePaths(metrics) - - def assertMetricsHaveRelativePaths(self, metrics): - root_relpath = relpath(self.dvc.root_dir) - metric_path = os.path.join(root_relpath, "metric") - metric_json_path = os.path.join(root_relpath, "metric_json") - metric_tsv_path = os.path.join(root_relpath, "metric_tsv") - metric_htsv_path = os.path.join(root_relpath, "metric_htsv") - metric_csv_path = os.path.join(root_relpath, "metric_csv") - metric_hcsv_path = os.path.join(root_relpath, "metric_hcsv") - metric_json_ext_path = os.path.join(root_relpath, "metric_json_ext") - for branch in ["bar", "baz", "foo"]: - self.assertEqual( - set(metrics[branch].keys()), - { - metric_path, - metric_json_path, - metric_tsv_path, - metric_htsv_path, - metric_csv_path, - metric_hcsv_path, - metric_json_ext_path, - }, - ) - - -class TestMetricsRecursive(TestDvcGit): - def setUp(self): - super().setUp() - self.dvc.scm.commit("init") - - self.dvc.scm.checkout("nested", create_new=True) - - os.mkdir("nested") - os.mkdir(os.path.join("nested", "subnested")) - - ret = main( - [ - "run", - "-M", - os.path.join("nested", "metric_nested"), - "echo", - "nested", - ">>", - os.path.join("nested", "metric_nested"), - ] - ) - - self.assertEqual(ret, 0) - - ret = main( - [ - "run", - "-M", - os.path.join("nested", "subnested", "metric_subnested"), - "echo", - "subnested", - ">>", - os.path.join("nested", "subnested", "metric_subnested"), - ] - ) - - self.assertEqual(ret, 0) - - self.dvc.scm.add( - ["nested", "metric_nested.dvc", "metric_subnested.dvc"] - ) - self.dvc.scm.commit("nested metrics") - - self.dvc.scm.checkout("master") - - def test(self): - ret = self.dvc.metrics.show( - ["nested"], all_branches=True, recursive=False - ) - self.assertEqual(len(ret), 1) - - ret = self.dvc.metrics.show( - ["nested"], all_branches=True, recursive=True - ) - self.assertEqual(len(ret), 1) - self.assertEqual( - ret["nested"][ - os.path.join("nested", "subnested", "metric_subnested") - ], - "subnested", - ) - self.assertEqual( - ret["nested"][os.path.join("nested", "metric_nested")], "nested" - ) - - -class TestMetricsReproCLI(TestDvcGit): - def test(self): - stage = self.dvc.run( - metrics_no_cache=["metrics"], - cmd="python {} {} {}".format(self.CODE, self.FOO, "metrics"), - ) - - ret = main(["repro", "-m", stage.path]) - self.assertEqual(ret, 0) - - ret = main(["metrics", "remove", "metrics"]) - self.assertEqual(ret, 0) - - ret = main(["repro", "-f", "-m", stage.path]) - self.assertNotEqual(ret, 0) - - ret = main(["metrics", "add", "metrics"]) - self.assertEqual(ret, 0) - - ret = main(["metrics", "modify", "-t", "CSV", "-x", "0,0", "metrics"]) - self.assertEqual(ret, 0) - - ret = main(["repro", "-f", "-m", stage.path]) - self.assertEqual(ret, 0) - - def test_dir(self): - os.mkdir("metrics_dir") - - with self.assertRaises(DvcException): - self.dvc.run(metrics_no_cache=["metrics_dir"]) - - def test_binary(self): - with open("metrics_bin", "w+") as fd: - fd.write("\0\0\0\0\0\0\0\0") - - with self.assertRaises(DvcException): - self.dvc.run(metrics_no_cache=["metrics_bin"]) - - -class TestMetricsCLI(TestMetricsBase): - def test(self): - # FIXME check output - ret = main(["metrics", "show", "-a", "metric", "-v"]) - self.assertEqual(ret, 0) - - ret = main( - [ - "metrics", - "show", - "-a", - "metric_json", - "-t", - "json", - "-x", - "branch", - ] - ) - self.assertEqual(ret, 0) - ret = main( - ["metrics", "show", "-a", "metric_tsv", "-t", "tsv", "-x", "0,0"] - ) - self.assertEqual(ret, 0) - ret = main( - [ - "metrics", - "show", - "-a", - "metric_htsv", - "-t", - "htsv", - "-x", - "0,branch", - ] - ) - self.assertEqual(ret, 0) - - ret = main( - ["metrics", "show", "-a", "metric_csv", "-t", "csv", "-x", "0,0"] - ) - self.assertEqual(ret, 0) - - ret = main( - [ - "metrics", - "show", - "-a", - "metric_hcsv", - "-t", - "hcsv", - "-x", - "0,branch", - ] - ) - self.assertEqual(ret, 0) - - def test_dir(self): - os.mkdir("metrics_dir") - - with self.assertRaises(DvcException): - self.dvc.run(outs_no_cache=["metrics_dir"]) - self.dvc.metrics.add("metrics_dir") - - def test_binary(self): - with open("metrics_bin", "w+") as fd: - fd.write("\0\0\0\0\0\0\0\0") - - with self.assertRaises(DvcException): - self.dvc.run(outs_no_cache=["metrics_bin"]) - self.dvc.metrics.add("metrics_bin") - - def test_non_existing(self): - ret = main(["metrics", "add", "non-existing"]) - self.assertNotEqual(ret, 0) - - ret = main(["metrics", "modify", "non-existing"]) - self.assertNotEqual(ret, 0) - - ret = main(["metrics", "remove", "non-existing"]) - self.assertNotEqual(ret, 0) - - def test_wrong_type_add(self): - with open("metric.unknown", "w+") as fd: - fd.write("unknown") - fd.flush() - - ret = main(["add", "metric.unknown"]) - assert ret == 0 - - self._caplog.clear() - ret = main(["metrics", "add", "metric.unknown", "-t", "unknown"]) - assert ret == 1 - - assert ( - "failed to add metric file 'metric.unknown'" - ) in self._caplog.text - - assert ( - "'unknown' is not supported, must be one of " - "[raw, json, csv, tsv, hcsv, htsv]" - ) in self._caplog.text - - ret = main(["metrics", "add", "metric.unknown", "-t", "raw"]) - assert ret == 0 - - self._caplog.clear() - ret = main(["metrics", "show", "metric.unknown"]) - assert ret == 0 - - assert "\tmetric.unknown: unknown" in self._caplog.text - - def test_wrong_type_modify(self): - with open("metric.unknown", "w+") as fd: - fd.write("unknown") - fd.flush() - - ret = main(["run", "-m", "metric.unknown"]) - assert ret == 0 - - self._caplog.clear() - - ret = main(["metrics", "modify", "metric.unknown", "-t", "unknown"]) - assert ret == 1 - - assert "failed to modify metric file settings" in self._caplog.text - - assert ( - "metric type 'unknown' is not supported, must be one of " - "[raw, json, csv, tsv, hcsv, htsv]" - ) in self._caplog.text - - ret = main(["metrics", "modify", "metric.unknown", "-t", "CSV"]) - assert ret == 0 - - self._caplog.clear() - - ret = main(["metrics", "show", "metric.unknown"]) - assert ret == 0 - - assert "\tmetric.unknown: unknown" in self._caplog.text - - def test_wrong_type_show(self): - with open("metric.unknown", "w+") as fd: - fd.write("unknown") - fd.flush() - - ret = main(["run", "-m", "metric.unknown"]) - assert ret == 0 - - self._caplog.clear() - - ret = main( - ["metrics", "show", "metric.unknown", "-t", "unknown", "-x", "0,0"] - ) - assert ret == 0 - assert "\tmetric.unknown: unknown" in self._caplog.text - - -class TestNoMetrics(TestDvcGit): - def test(self): - with self.assertRaises(NoMetricsError): - self.dvc.metrics.show() - - def test_cli(self): - ret = main(["metrics", "show"]) - self.assertNotEqual(ret, 0) - - -class TestCachedMetrics(TestDvcGit): - def _do_add(self, branch): - self.dvc.scm.checkout(branch) - self.dvc.checkout(force=True) - assert not os.path.exists("metrics.json") - - with open("metrics.json", "w+") as fd: - json.dump({"metrics": branch}, fd) - - stages = self.dvc.add("metrics.json") - self.dvc.metrics.add("metrics.json", typ="json", xpath="metrics") - self.assertEqual(len(stages), 1) - stage = stages[0] - self.assertIsNotNone(stage) - - self.dvc.scm.add([".gitignore", "metrics.json.dvc"]) - self.dvc.scm.commit(branch) - - def _do_run(self, branch): - self.dvc.scm.checkout(branch) - self.dvc.checkout(force=True) - - with open("code.py", "w+") as fobj: - fobj.write("import sys\n") - fobj.write("import os\n") - fobj.write("import json\n") - fobj.write( - 'print(json.dumps({{"metrics": "{branch}"}}))\n'.format( - branch=branch - ) - ) - - stage = self.dvc.run( - deps=["code.py"], - metrics=["metrics.json"], - cmd="python code.py metrics.json > metrics.json", - ) - self.assertIsNotNone(stage) - self.assertEqual(stage.relpath, "metrics.json.dvc") - - self.dvc.scm.add(["code.py", ".gitignore", "metrics.json.dvc"]) - self.dvc.scm.commit(branch) - - def _test_metrics(self, func): - self.dvc.scm.commit("init") - - self.dvc.scm.branch("one") - self.dvc.scm.branch("two") - - func("master") - func("one") - func("two") - - # TestDvc currently is based on TestGit, so it is safe to use - # scm.git for now - self.dvc.scm.repo.git.clean("-fd") - - self.dvc = DvcRepo(".") - - res = self.dvc.metrics.show( - ["metrics.json"], all_branches=True, typ="json", xpath="metrics" - ) - - self.assertEqual( - res, - { - "master": {"metrics.json": {"metrics": "master"}}, - "one": {"metrics.json": {"metrics": "one"}}, - "two": {"metrics.json": {"metrics": "two"}}, - }, - ) - - res = self.dvc.metrics.show( - all_branches=True, typ="json", xpath="metrics" - ) - - self.assertEqual( - res, - { - "master": {"metrics.json": {"metrics": "master"}}, - "one": {"metrics.json": {"metrics": "one"}}, - "two": {"metrics.json": {"metrics": "two"}}, - }, - ) - - def test_add(self): - self._test_metrics(self._do_add) - - def test_run(self): - self._test_metrics(self._do_run) - - -class TestMetricsType(TestDvcGit): - branches = ["foo", "bar", "baz"] - files = [ - "metric", - "metric.txt", - "metric.json", - "metric.tsv", - "metric.htsv", - "metric.csv", - "metric.hcsv", - ] - xpaths = [None, None, "branch", "0,0", "0,branch", "0,0", "0,branch"] - - def setUp(self): - super().setUp() - self.dvc.scm.commit("init") - - for branch in self.branches: - self.dvc.scm.checkout(branch, create_new=True) - with open("metric", "w+") as fd: - fd.write(branch) - with open("metric.txt", "w+") as fd: - fd.write(branch) - with open("metric.json", "w+") as fd: - json.dump({"branch": branch}, fd) - with open("metric.csv", "w+") as fd: - fd.write(branch) - with open("metric.hcsv", "w+") as fd: - fd.write("branch\n") - fd.write(branch) - with open("metric.tsv", "w+") as fd: - fd.write(branch) - with open("metric.htsv", "w+") as fd: - fd.write("branch\n") - fd.write(branch) - self.dvc.run(metrics_no_cache=self.files, overwrite=True) - self.dvc.scm.add(self.files + ["metric.dvc"]) - self.dvc.scm.commit("metric") - - self.dvc.scm.checkout("master") - - def test_show(self): - for file_name, xpath in zip(self.files, self.xpaths): - self._do_show(file_name, xpath) - - def _do_show(self, file_name, xpath): - ret = self.dvc.metrics.show( - [file_name], xpath=xpath, all_branches=True - ) - self.assertEqual(len(ret), 3) - for branch in self.branches: - if isinstance(ret[branch][file_name], list): - self.assertSequenceEqual(ret[branch][file_name], [branch]) - elif isinstance(ret[branch][file_name], dict): - self.assertSequenceEqual( - ret[branch][file_name], {"branch": branch} - ) - else: - self.assertSequenceEqual(ret[branch][file_name], branch) - - -def test_display_missing_metrics(tmp_dir, scm, dvc, caplog): - scm.branch("branch") - - # Create a metric in master - tmp_dir.gen("metric", "0.5") - assert 0 == main(["run", "-m", "metric"]) - tmp_dir.scm_add("metric.dvc", commit="master commit") - - # Create a metric in branch - scm.checkout("branch") - tmp_dir.gen("metric", "0.5") - assert 0 == main(["run", "-M", "metric"]) - tmp_dir.scm_add("metric.dvc", commit="branch commit") - - os.remove("metric") - assert 0 == main(["metrics", "show", "-a"]) - assert ( - NO_METRICS_FILE_AT_REFERENCE_WARNING.format("metric", "branch") - in caplog.text - ) - - -def test_show_xpath_should_override_stage_xpath(tmp_dir, dvc): - tmp_dir.gen("metric", json.dumps({"m1": 0.1, "m2": 0.2})) - - dvc.run(cmd="", overwrite=True, metrics=["metric"]) - dvc.metrics.modify("metric", typ="json", xpath="m2") - - assert dvc.metrics.show(xpath="m1") == {"": {"metric": {"m1": 0.1}}} - - -def test_show_multiple_outputs(tmp_dir, dvc, caplog): - tmp_dir.gen( - { - "1.json": json.dumps({"AUC": 1}), - "2.json": json.dumps({"AUC": 2}), - "metrics/3.json": json.dumps({"AUC": 3}), - } - ) - - dvc.run(cmd="", overwrite=True, metrics=["1.json"]) - dvc.run(cmd="", overwrite=True, metrics=["2.json"]) - dvc.run(cmd="", overwrite=True, metrics=["metrics/3.json"]) - - with caplog.at_level(logging.INFO, logger="dvc"): - assert 0 == main(["metrics", "show", "1.json", "2.json"]) - assert '1.json: {"AUC": 1}' in caplog.text - assert '2.json: {"AUC": 2}' in caplog.text - - caplog.clear() - - with caplog.at_level(logging.INFO, logger="dvc"): - assert 0 == main(["metrics", "show", "-R", "1.json", "metrics"]) - assert '1.json: {"AUC": 1}' in caplog.text - assert '3.json: {"AUC": 3}' in caplog.text - - caplog.clear() - - with caplog.at_level(logging.INFO, logger="dvc"): - assert 1 == main(["metrics", "show", "1.json", "not-found"]) - assert '1.json: {"AUC": 1}' in caplog.text - assert ( - "the following metrics do not exist, " - "are not metric files or are malformed: 'not-found'" - ) in caplog.text - - -def test_metrics_diff_raw(tmp_dir, scm, dvc): - def _gen(val): - tmp_dir.gen({"metrics": val}) - dvc.run(cmd="", metrics=["metrics"]) - dvc.scm.add(["metrics.dvc"]) - dvc.scm.commit(str(val)) - - _gen("raw 1") - _gen("raw 2") - _gen("raw 3") - - assert dvc.metrics.diff(a_rev="HEAD~2") == { - "metrics": {"": {"old": "raw 1", "new": "raw 3"}} - } - - -def test_metrics_diff_raw_unchanged(tmp_dir, scm, dvc): - def _gen(val): - tmp_dir.gen({"metrics": val}) - dvc.run(cmd="", metrics=["metrics"]) - dvc.scm.add(["metrics.dvc"]) - dvc.scm.commit(str(val)) - - _gen("raw 1") - _gen("raw 2") - _gen("raw 1") - - assert dvc.metrics.diff(a_rev="HEAD~2") == {} - - -@pytest.mark.parametrize("xpath", [True, False]) -def test_metrics_diff_json(tmp_dir, scm, dvc, xpath): - def _gen(val): - metrics = {"a": {"b": {"c": val, "d": 1, "e": str(val)}}} - tmp_dir.gen({"m.json": json.dumps(metrics)}) - dvc.run(cmd="", metrics=["m.json"]) - dvc.metrics.modify("m.json", typ="json") - if xpath: - dvc.metrics.modify("m.json", xpath="a.b.c") - dvc.scm.add(["m.json.dvc"]) - dvc.scm.commit(str(val)) - - _gen(1) - _gen(2) - _gen(3) - - expected = {"m.json": {"a.b.c": {"old": 1, "new": 3, "diff": 2}}} - - if not xpath: - expected["m.json"]["a.b.e"] = {"old": "1", "new": "3"} - - assert expected == dvc.metrics.diff(a_rev="HEAD~2") - - -@pytest.mark.parametrize("xpath", [True, False]) -def test_metrics_diff_json_unchanged(tmp_dir, scm, dvc, xpath): - def _gen(val): - metrics = {"a": {"b": {"c": val, "d": 1, "e": str(val)}}} - tmp_dir.gen({"m.json": json.dumps(metrics)}) - dvc.run(cmd="", metrics=["m.json"]) - dvc.metrics.modify("m.json", typ="json") - if xpath: - dvc.metrics.modify("m.json", xpath="a.b.c") - dvc.scm.add(["m.json.dvc"]) - dvc.scm.commit(str(val)) - - _gen(1) - _gen(2) - _gen(1) - - assert dvc.metrics.diff(a_rev="HEAD~2") == {} - - -def test_metrics_diff_broken_json(tmp_dir, scm, dvc): - metrics = {"a": {"b": {"c": 1, "d": 1, "e": "3"}}} - tmp_dir.gen({"m.json": json.dumps(metrics)}) - dvc.run(cmd="", metrics_no_cache=["m.json"]) - dvc.scm.add(["m.json.dvc", "m.json"]) - dvc.scm.commit("add metrics") - - (tmp_dir / "m.json").write_text(json.dumps(metrics) + "ma\nlformed\n") - - assert dvc.metrics.diff() == { - "m.json": { - "a.b.c": {"old": 1, "new": "unable to parse"}, - "a.b.d": {"old": 1, "new": "unable to parse"}, - "a.b.e": {"old": "3", "new": "unable to parse"}, - } - } - - -def test_metrics_diff_no_metrics(tmp_dir, scm, dvc): - tmp_dir.scm_gen({"foo": "foo"}, commit="add foo") - assert dvc.metrics.diff(a_rev="HEAD~1") == {} - - -def test_metrics_diff_new_metric(tmp_dir, scm, dvc): - metrics = {"a": {"b": {"c": 1, "d": 1, "e": "3"}}} - tmp_dir.gen({"m.json": json.dumps(metrics)}) - dvc.run(cmd="", metrics_no_cache=["m.json"]) - - assert dvc.metrics.diff() == { - "m.json": { - "a.b.c": {"old": None, "new": 1}, - "a.b.d": {"old": None, "new": 1}, - "a.b.e": {"old": None, "new": "3"}, - } - } - - -def test_metrics_diff_deleted_metric(tmp_dir, scm, dvc): - metrics = {"a": {"b": {"c": 1, "d": 1, "e": "3"}}} - tmp_dir.gen({"m.json": json.dumps(metrics)}) - dvc.run(cmd="", metrics_no_cache=["m.json"]) - dvc.scm.add(["m.json.dvc", "m.json"]) - dvc.scm.commit("add metrics") - - (tmp_dir / "m.json").unlink() - - assert dvc.metrics.diff() == { - "m.json": { - "a.b.c": {"old": 1, "new": None}, - "a.b.d": {"old": 1, "new": None}, - "a.b.e": {"old": "3", "new": None}, - } - } - - -def test_metrics_without_scm(tmp_dir, dvc): - metrics = {"acc": 0.97, "recall": 0.95} - metrics_name = "metrics.json" - tmp_dir.gen({metrics_name: json.dumps(metrics)}) - dvc.add(metrics_name) - dvc.metrics.add(metrics_name) - with pytest.raises(NoSCMError): - dvc.metrics.diff() diff --git a/tests/unit/command/test_metrics.py b/tests/unit/command/test_metrics.py index d254da4e56..f0ab0821a4 100644 --- a/tests/unit/command/test_metrics.py +++ b/tests/unit/command/test_metrics.py @@ -9,11 +9,8 @@ def test_metrics_diff(dvc, mocker): "diff", "HEAD~10", "HEAD~1", - "-t", - "json", - "-x", - "x.path", "-R", + "--all", "--show-json", "--targets", "target1", @@ -32,9 +29,8 @@ def test_metrics_diff(dvc, mocker): targets=["target1", "target2"], a_rev="HEAD~10", b_rev="HEAD~1", - typ="json", - xpath="x.path", recursive=True, + all=True, ) @@ -90,10 +86,6 @@ def test_metrics_show(dvc, mocker): [ "metrics", "show", - "-t", - "json", - "-x", - "x.path", "-R", "--all-tags", "--all-branches", @@ -112,8 +104,6 @@ def test_metrics_show(dvc, mocker): m.assert_called_once_with( cmd.repo, ["target1", "target2"], - typ="json", - xpath="x.path", recursive=True, all_tags=True, all_branches=True, @@ -128,3 +118,20 @@ def test_metrics_diff_prec(): " Path Metric Value Change\n" "other.json a.b 0.0043 0.0001" ) + + +def test_metrics_diff_sorted(): + assert _show_diff( + { + "metrics.yaml": { + "x.b": {"old": 5, "new": 6, "diff": 1}, + "a.d.e": {"old": 3, "new": 4, "diff": 1}, + "a.b.c": {"old": 1, "new": 2, "diff": 1}, + } + } + ) == ( + " Path Metric Value Change\n" + "metrics.yaml a.b.c 2 1 \n" + "metrics.yaml a.d.e 4 1 \n" + "metrics.yaml x.b 6 1 " + ) diff --git a/tests/unit/repo/test_tree.py b/tests/unit/repo/test_tree.py index 4f4bb163fd..208a0c8ecc 100644 --- a/tests/unit/repo/test_tree.py +++ b/tests/unit/repo/test_tree.py @@ -24,6 +24,26 @@ def test_open(tmp_dir, dvc): assert fobj.read() == "foo" +def test_open_in_history(tmp_dir, scm, dvc): + tmp_dir.gen("foo", "foo") + dvc.add("foo") + dvc.scm.add(["foo.dvc", ".gitignore"]) + dvc.scm.commit("foo") + + tmp_dir.gen("foo", "foofoo") + dvc.add("foo") + dvc.scm.add(["foo.dvc", ".gitignore"]) + dvc.scm.commit("foofoo") + + for rev in dvc.brancher(revs=["HEAD~1"]): + if rev == "working tree": + continue + + tree = DvcTree(dvc) + with tree.open("foo", "r") as fobj: + assert fobj.read() == "foo" + + def test_isdir_isfile(tmp_dir, dvc): tmp_dir.gen({"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}})