Skip to content

Commit

Permalink
dvc: introduce dvc params diff
Browse files Browse the repository at this point in the history
  • Loading branch information
efiop committed Apr 9, 2020
1 parent b5a8ad7 commit 124e49c
Show file tree
Hide file tree
Showing 15 changed files with 534 additions and 109 deletions.
2 changes: 2 additions & 0 deletions dvc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
ls,
lock,
metrics,
params,
move,
pipeline,
remote,
Expand Down Expand Up @@ -62,6 +63,7 @@
remote,
cache,
metrics,
params,
install,
root,
ls,
Expand Down
24 changes: 7 additions & 17 deletions dvc/command/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,21 +114,9 @@ def run(self):


def _show_diff(diff):
from texttable import Texttable
from dvc.utils.diff import table

if not diff:
return ""

table = Texttable()

# disable automatic formatting
table.set_cols_dtype(["t", "t", "t", "t"])

# remove borders to make it easier for users to copy stuff
table.set_chars(("", "", "", ""))
table.set_deco(0)

rows = [["Path", "Metric", "Value", "Change"]]
rows = []
for fname, mdiff in diff.items():
for metric, change in mdiff.items():
rows.append(
Expand All @@ -139,8 +127,8 @@ def _show_diff(diff):
change.get("diff", "diff not supported"),
]
)
table.add_rows(rows)
return table.draw()

return table(["Path", "Metric", "Value", "Change"], rows)


class CmdMetricsDiff(CmdBase):
Expand All @@ -160,7 +148,9 @@ def run(self):

logger.info(json.dumps(diff))
else:
logger.info(_show_diff(diff))
table = _show_diff(diff)
if table:
logger.info(table)

except DvcException:
logger.exception("failed to show metrics diff")
Expand Down
90 changes: 90 additions & 0 deletions dvc/command/params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import argparse
import logging

from dvc.command.base import append_doc_link
from dvc.command.base import CmdBase
from dvc.command.base import fix_subparsers
from dvc.exceptions import DvcException


logger = logging.getLogger(__name__)


def _show_diff(diff):
from dvc.utils.diff import table

rows = []
for fname, mdiff in diff.items():
for param, change in mdiff.items():
rows.append([fname, param, change["old"], change["new"]])

return table(["Path", "Param", "Old", "New"], rows)


class CmdParamsDiff(CmdBase):
def run(self):
try:
diff = self.repo.params.diff(
a_rev=self.args.a_rev, b_rev=self.args.b_rev,
)

if self.args.show_json:
import json

logger.info(json.dumps(diff))
else:
table = _show_diff(diff)
if table:
logger.info(table)

except DvcException:
logger.exception("failed to show params diff")
return 1

return 0


def add_parser(subparsers, parent_parser):
PARAMS_HELP = "Commands to display params."

params_parser = subparsers.add_parser(
"params",
parents=[parent_parser],
description=append_doc_link(PARAMS_HELP, "params"),
help=PARAMS_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)

params_subparsers = params_parser.add_subparsers(
dest="cmd",
help="Use `dvc params CMD --help` to display command-specific help.",
)

fix_subparsers(params_subparsers)

PARAMS_DIFF_HELP = (
"Show changes in params between commits in the DVC repository, or "
"between a commit and the workspace."
)
params_diff_parser = params_subparsers.add_parser(
"diff",
parents=[parent_parser],
description=append_doc_link(PARAMS_DIFF_HELP, "params/diff"),
help=PARAMS_DIFF_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
params_diff_parser.add_argument(
"a_rev", nargs="?", help="Old Git commit to compare (defaults to HEAD)"
)
params_diff_parser.add_argument(
"b_rev",
nargs="?",
help=("New Git commit to compare (defaults to the current workspace)"),
)
params_diff_parser.add_argument(
"--show-json",
action="store_true",
default=False,
help="Show output in JSON format.",
)
params_diff_parser.set_defaults(func=CmdParamsDiff)
6 changes: 3 additions & 3 deletions dvc/dependency/param.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def status(self):
return status

status = defaultdict(dict)
info = self._get_info()
info = self.read_params()
for param in self.params:
if param not in info.keys():
st = "deleted"
Expand All @@ -74,7 +74,7 @@ def dumpd(self):
self.PARAM_PARAMS: self.info or self.params,
}

def _get_info(self):
def read_params(self):
if not self.exists:
return {}

Expand All @@ -95,7 +95,7 @@ def _get_info(self):
return ret

def save_info(self):
info = self._get_info()
info = self.read_params()

missing_params = set(self.params) - set(info.keys())
if missing_params:
Expand Down
2 changes: 2 additions & 0 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def __init__(self, root_dir=None):
from dvc.cache import Cache
from dvc.data_cloud import DataCloud
from dvc.repo.metrics import Metrics
from dvc.repo.params import Params
from dvc.scm.tree import WorkingTree
from dvc.repo.tag import Tag
from dvc.utils.fs import makedirs
Expand Down Expand Up @@ -102,6 +103,7 @@ def __init__(self, root_dir=None):
self.cloud = DataCloud(self)

self.metrics = Metrics(self)
self.params = Params(self)
self.tag = Tag(self)

self._ignore()
Expand Down
85 changes: 2 additions & 83 deletions dvc/repo/metrics/diff.py
Original file line number Diff line number Diff line change
@@ -1,80 +1,7 @@
import json
from collections import defaultdict

from flatten_json import flatten

import dvc.utils.diff
from dvc.exceptions import NoMetricsError


def _parse(raw):
if raw is None or isinstance(raw, (dict, list, int, float)):
return raw

assert isinstance(raw, str)
try:
return json.loads(raw)
except json.JSONDecodeError:
return raw


def _diff_vals(old, new):
if (
isinstance(new, list)
and isinstance(old, list)
and len(old) == len(new) == 1
):
return _diff_vals(old[0], new[0])

if old == new:
return {}

res = {"old": old, "new": new}
if isinstance(new, (int, float)) and isinstance(old, (int, float)):
res["diff"] = new - old
return res


def _flatten(d):
if not d:
return defaultdict(lambda: None)

if isinstance(d, dict):
return defaultdict(lambda: None, flatten(d, "."))

return defaultdict(lambda: "unable to parse")


def _diff_dicts(old_dict, new_dict):
new = _flatten(new_dict)
old = _flatten(old_dict)

res = defaultdict(dict)

xpaths = set(old.keys())
xpaths.update(set(new.keys()))
for xpath in xpaths:
old_val = old[xpath]
new_val = new[xpath]
val_diff = _diff_vals(old_val, new_val)
if val_diff:
res[xpath] = val_diff
return dict(res)


def _diff(old_raw, new_raw):
old = _parse(old_raw)
new = _parse(new_raw)

if isinstance(new, dict) or isinstance(old, dict):
return _diff_dicts(old, new)

val_diff = _diff_vals(old, new)
if val_diff:
return {"": val_diff}

return {}


def _get_metrics(repo, *args, rev=None, **kwargs):
try:
metrics = repo.metrics.show(
Expand All @@ -89,12 +16,4 @@ def diff(repo, *args, a_rev=None, b_rev=None, **kwargs):
old = _get_metrics(repo, *args, **kwargs, rev=(a_rev or "HEAD"))
new = _get_metrics(repo, *args, **kwargs, rev=b_rev)

paths = set(old.keys())
paths.update(set(new.keys()))

res = defaultdict(dict)
for path in paths:
path_diff = _diff(old.get(path), new.get(path))
if path_diff:
res[path] = path_diff
return dict(res)
return dvc.utils.diff.diff(old, new)
13 changes: 13 additions & 0 deletions dvc/repo/params/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class Params(object):
def __init__(self, repo):
self.repo = repo

def show(self, *args, **kwargs):
from .show import show

return show(self.repo, *args, **kwargs)

def diff(self, *args, **kwargs):
from .diff import diff

return diff(self.repo, *args, **kwargs)
30 changes: 30 additions & 0 deletions dvc/repo/params/diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import dvc.utils.diff
from .show import NoParamsError


def _get_params(repo, *args, rev=None, **kwargs):
try:
params = repo.params.show(*args, **kwargs, revs=[rev] if rev else None)
return params.get(rev or "", {})
except NoParamsError:
return {}


def _format(params):
ret = {}
for key, val in params.items():
if isinstance(val, dict):
new_val = _format(val)
elif isinstance(val, list):
new_val = str(val)
else:
new_val = val
ret[key] = new_val
return ret


def diff(repo, *args, a_rev=None, b_rev=None, **kwargs):
old = _get_params(repo, *args, **kwargs, rev=(a_rev or "HEAD"))
new = _get_params(repo, *args, **kwargs, rev=b_rev)

return dvc.utils.diff.diff(_format(old), _format(new))
Loading

0 comments on commit 124e49c

Please sign in to comment.