-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
dvc.yaml: make persist/cache bool flags in outs/metrics #3804
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,8 @@ | |
|
||
from dvc import dependency, output | ||
from dvc.output import CHECKSUMS_SCHEMA | ||
from dvc.stage.params import OutputParams, StageParams | ||
from dvc.output.base import BaseOutput | ||
from dvc.stage.params import StageParams | ||
|
||
STAGES = "stages" | ||
SINGLE_STAGE_SCHEMA = { | ||
|
@@ -34,7 +35,24 @@ | |
Optional(StageParams.PARAM_LOCKED): bool, | ||
Optional(StageParams.PARAM_META): object, | ||
Optional(StageParams.PARAM_ALWAYS_CHANGED): bool, | ||
**{Optional(p.value): [str] for p in OutputParams}, | ||
Optional(StageParams.PARAM_OUTS): [ | ||
Any( | ||
str, | ||
{ | ||
str: { | ||
Optional(BaseOutput.PARAM_CACHE): bool, | ||
Optional(BaseOutput.PARAM_PERSIST): bool, | ||
} | ||
}, | ||
) | ||
], | ||
Optional(StageParams.PARAM_OUTS_NO_CACHE): [str], | ||
Optional(StageParams.PARAM_METRICS): [ | ||
Any(str, {str: {Optional(BaseOutput.PARAM_CACHE): bool}}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not supporting persist for metrics because it is the way we currently generate it during the serialization. Plus we don't really have a CLI option for it. But seems like Note that in old dvc-files that(persistent metrics) was supported if user set |
||
], | ||
Optional(StageParams.PARAM_METRICS_NO_CACHE): [str], | ||
Optional(StageParams.PARAM_OUTS_PERSIST): [str], | ||
Optional(StageParams.PARAM_OUTS_PERSIST_NO_CACHE): [str], | ||
} | ||
} | ||
MULTI_STAGE_SCHEMA = {STAGES: SINGLE_PIPELINE_STAGE_SCHEMA} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,14 +24,17 @@ | |
def _get_outs(stage: "PipelineStage"): | ||
outs_bucket = {} | ||
for o in sort_by_path(stage.outs): | ||
bucket_key = ["metrics"] if o.metric else ["outs"] | ||
key = "metrics" if o.metric else "outs" | ||
|
||
if not o.metric and o.persist: | ||
bucket_key += ["persist"] | ||
extra = {} | ||
if o.persist: | ||
extra["persist"] = True | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just matching existing style in this method, so using plain strings. Maybe we should discuss strings vs constants one more time, to see how people feel about it these days. |
||
if not o.use_cache: | ||
bucket_key += ["no_cache"] | ||
key = "_".join(bucket_key) | ||
outs_bucket[key] = outs_bucket.get(key, []) + [o.def_path] | ||
extra["cache"] = False | ||
|
||
value = {o.def_path: extra} if extra else o.def_path | ||
|
||
outs_bucket[key] = outs_bucket.get(key, []) + [value] | ||
return [(key, outs_bucket[key]) for key in sorted(outs_bucket.keys())] | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,3 @@ | ||
from enum import Enum | ||
|
||
|
||
class StageParams: | ||
PARAM_MD5 = "md5" | ||
PARAM_CMD = "cmd" | ||
|
@@ -12,11 +9,9 @@ class StageParams: | |
PARAM_ALWAYS_CHANGED = "always_changed" | ||
PARAM_PARAMS = "params" | ||
|
||
|
||
class OutputParams(Enum): | ||
PERSIST = "outs_persist" | ||
PERSIST_NO_CACHE = "outs_persist_no_cache" | ||
METRICS_NO_CACHE = "metrics_no_cache" | ||
METRICS = "metrics" | ||
NO_CACHE = "outs_no_cache" | ||
OUTS = "outs" | ||
PARAM_OUTS_PERSIST = "outs_persist" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Btw, might be worth renaming it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. may be in this case we can make it a flag within the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @shcheklein It is that already :) But we also keep There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, sounds good to me! never liked these JAVA-style - |
||
PARAM_OUTS_PERSIST_NO_CACHE = "outs_persist_no_cache" | ||
PARAM_METRICS_NO_CACHE = "metrics_no_cache" | ||
PARAM_METRICS = "metrics" | ||
PARAM_OUTS_NO_CACHE = "outs_no_cache" | ||
PARAM_OUTS = "outs" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Being a bit restrictive here and not allowing additional flags (e.g. persist=True) for such fields.