Skip to content

Commit

Permalink
get: handle non-DVC repositories
Browse files Browse the repository at this point in the history
Allows us to `dvc get` from non-DVC source repositories.

Fixes iterative#3089
  • Loading branch information
fabiosantoscode committed Jan 10, 2020
1 parent 095464d commit 0dd5647
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 31 deletions.
5 changes: 0 additions & 5 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,6 @@ def __init__(self, ignore_dirname):
)


class UrlNotDvcRepoError(DvcException):
def __init__(self, url):
super().__init__("URL '{}' is not a dvc repository.".format(url))


class GitHookAlreadyExistsError(DvcException):
def __init__(self, hook_name):
super().__init__(
Expand Down
4 changes: 2 additions & 2 deletions dvc/external_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None):
repo.close()


def cached_clone(url, rev=None, **_ignored_kwargs):
def cached_clone(url, rev=None, clone_path=None, **_ignored_kwargs):
"""Clone an external git repo to a temporary directory.
Returns the path to a local temporary directory with the specified
Expand All @@ -44,7 +44,7 @@ def cached_clone(url, rev=None, **_ignored_kwargs):
"""

new_path = tempfile.mkdtemp("dvc-erepo")
new_path = clone_path or tempfile.mkdtemp("dvc-erepo")

# Copy and adjust existing clean clone
if (url, None, None) in REPO_CACHE:
Expand Down
39 changes: 22 additions & 17 deletions dvc/repo/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
DvcException,
NotDvcRepoError,
OutputNotFoundError,
UrlNotDvcRepoError,
PathMissingError,
)
from dvc.external_repo import external_repo
from dvc.external_repo import cached_clone
from dvc.path_info import PathInfo
from dvc.stage import Stage
from dvc.utils import resolve_output
Expand All @@ -28,8 +27,15 @@ def __init__(self):
)


# Dummy exception raised to signal a plain file copy is needed
class _DoPlainCopy(DvcException):
pass


@staticmethod
def get(url, path, out=None, rev=None):
from dvc.repo import Repo

out = resolve_output(path, out)

if Stage.is_valid_filename(out):
Expand All @@ -43,7 +49,8 @@ def get(url, path, out=None, rev=None):
dpath = os.path.dirname(os.path.abspath(out))
tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
try:
with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
cached_clone(url, rev=rev, clone_path=tmp_dir)
try:
# Try any links possible to avoid data duplication.
#
# Not using symlink, because we need to remove cache after we are
Expand All @@ -53,26 +60,24 @@ def get(url, path, out=None, rev=None):
#
# Also, we can't use theoretical "move" link type here, because
# the same cache file might be used a few times in a directory.
repo = Repo(tmp_dir)
repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]
output = repo.find_out_by_relpath(path)
if not output.use_cache:
# Catch this below and go for a plain old fs_copy
raise _DoPlainCopy
_get_cached(repo, output, out)

try:
output = repo.find_out_by_relpath(path)
except OutputNotFoundError:
output = None

if output and output.use_cache:
_get_cached(repo, output, out)
else:
# Either an uncached out with absolute path or a user error
if os.path.isabs(path):
raise FileNotFoundError
except (NotDvcRepoError, OutputNotFoundError, _DoPlainCopy):
# It's an uncached out with absolute path, a non-DVC repo, or a
# user error
if os.path.isabs(path):
raise FileNotFoundError

fs_copy(os.path.join(repo.root_dir, path), out)
fs_copy(os.path.join(tmp_dir, path), out)

except (OutputNotFoundError, FileNotFoundError):
raise PathMissingError(path, url)
except NotDvcRepoError:
raise UrlNotDvcRepoError(url)
finally:
remove(tmp_dir)

Expand Down
10 changes: 3 additions & 7 deletions tests/func/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from dvc.cache import Cache
from dvc.config import Config
from dvc.exceptions import UrlNotDvcRepoError
from dvc.repo.get import GetDVCFileError, PathMissingError
from dvc.repo import Repo
from dvc.system import System
Expand Down Expand Up @@ -87,9 +86,10 @@ def test_get_repo_rev(tmp_dir, erepo_dir):
def test_get_from_non_dvc_repo(tmp_dir, erepo_dir):
erepo_dir.scm.repo.index.remove([erepo_dir.dvc.dvc_dir], r=True)
erepo_dir.scm.commit("remove dvc")
erepo_dir.scm_gen({"some_file": "contents"}, commit="create file")

with pytest.raises(UrlNotDvcRepoError):
Repo.get(fspath(erepo_dir), "some_file.zip")
Repo.get(fspath(erepo_dir), "some_file", "file_imported")
assert (tmp_dir / "file_imported").read_text() == "contents"


def test_get_a_dvc_file(tmp_dir, erepo_dir):
Expand Down Expand Up @@ -164,10 +164,6 @@ def test_get_from_non_dvc_master(tmp_dir, erepo_dir, caplog):
erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True)
erepo_dir.dvc.scm.commit("remove .dvc")

# sanity check
with pytest.raises(UrlNotDvcRepoError):
Repo.get(fspath(erepo_dir), "some_file")

caplog.clear()
dst = "file_imported"
with caplog.at_level(logging.INFO, logger="dvc"):
Expand Down

0 comments on commit 0dd5647

Please sign in to comment.