diff --git a/dvc/exceptions.py b/dvc/exceptions.py index e48014e95a..a551e35db1 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -240,11 +240,6 @@ def __init__(self, ignore_dirname): ) -class UrlNotDvcRepoError(DvcException): - def __init__(self, url): - super().__init__("URL '{}' is not a dvc repository.".format(url)) - - class GitHookAlreadyExistsError(DvcException): def __init__(self, hook_name): super().__init__( diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 9ff2f2a413..8f7cfaef55 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -33,7 +33,7 @@ def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None): repo.close() -def cached_clone(url, rev=None, **_ignored_kwargs): +def cached_clone(url, rev=None, clone_path=None, **_ignored_kwargs): """Clone an external git repo to a temporary directory. Returns the path to a local temporary directory with the specified @@ -44,7 +44,7 @@ def cached_clone(url, rev=None, **_ignored_kwargs): """ - new_path = tempfile.mkdtemp("dvc-erepo") + new_path = clone_path or tempfile.mkdtemp("dvc-erepo") # Copy and adjust existing clean clone if (url, None, None) in REPO_CACHE: diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 17607bbfae..58eddfcbe9 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -7,10 +7,9 @@ DvcException, NotDvcRepoError, OutputNotFoundError, - UrlNotDvcRepoError, PathMissingError, ) -from dvc.external_repo import external_repo +from dvc.external_repo import cached_clone from dvc.path_info import PathInfo from dvc.stage import Stage from dvc.utils import resolve_output @@ -28,8 +27,15 @@ def __init__(self): ) +# Dummy exception raised to signal a plain file copy is needed +class _DoPlainCopy(DvcException): + pass + + @staticmethod def get(url, path, out=None, rev=None): + from dvc.repo import Repo + out = resolve_output(path, out) if Stage.is_valid_filename(out): @@ -43,7 +49,8 @@ def get(url, path, out=None, rev=None): dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) try: - with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo: + cached_clone(url, rev=rev, clone_path=tmp_dir) + try: # Try any links possible to avoid data duplication. # # Not using symlink, because we need to remove cache after we are @@ -53,26 +60,24 @@ def get(url, path, out=None, rev=None): # # Also, we can't use theoretical "move" link type here, because # the same cache file might be used a few times in a directory. + repo = Repo(tmp_dir) repo.cache.local.cache_types = ["reflink", "hardlink", "copy"] + output = repo.find_out_by_relpath(path) + if not output.use_cache: + # Catch this below and go for a plain old fs_copy + raise _DoPlainCopy + _get_cached(repo, output, out) - try: - output = repo.find_out_by_relpath(path) - except OutputNotFoundError: - output = None - - if output and output.use_cache: - _get_cached(repo, output, out) - else: - # Either an uncached out with absolute path or a user error - if os.path.isabs(path): - raise FileNotFoundError + except (NotDvcRepoError, OutputNotFoundError, _DoPlainCopy): + # It's an uncached out with absolute path, a non-DVC repo, or a + # user error + if os.path.isabs(path): + raise FileNotFoundError - fs_copy(os.path.join(repo.root_dir, path), out) + fs_copy(os.path.join(tmp_dir, path), out) except (OutputNotFoundError, FileNotFoundError): raise PathMissingError(path, url) - except NotDvcRepoError: - raise UrlNotDvcRepoError(url) finally: remove(tmp_dir) diff --git a/tests/func/test_get.py b/tests/func/test_get.py index 99fd23840b..f988a5c286 100644 --- a/tests/func/test_get.py +++ b/tests/func/test_get.py @@ -5,7 +5,6 @@ from dvc.cache import Cache from dvc.config import Config -from dvc.exceptions import UrlNotDvcRepoError from dvc.repo.get import GetDVCFileError, PathMissingError from dvc.repo import Repo from dvc.system import System @@ -87,9 +86,10 @@ def test_get_repo_rev(tmp_dir, erepo_dir): def test_get_from_non_dvc_repo(tmp_dir, erepo_dir): erepo_dir.scm.repo.index.remove([erepo_dir.dvc.dvc_dir], r=True) erepo_dir.scm.commit("remove dvc") + erepo_dir.scm_gen({"some_file": "contents"}, commit="create file") - with pytest.raises(UrlNotDvcRepoError): - Repo.get(fspath(erepo_dir), "some_file.zip") + Repo.get(fspath(erepo_dir), "some_file", "file_imported") + assert (tmp_dir / "file_imported").read_text() == "contents" def test_get_a_dvc_file(tmp_dir, erepo_dir): @@ -164,10 +164,6 @@ def test_get_from_non_dvc_master(tmp_dir, erepo_dir, caplog): erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True) erepo_dir.dvc.scm.commit("remove .dvc") - # sanity check - with pytest.raises(UrlNotDvcRepoError): - Repo.get(fspath(erepo_dir), "some_file") - caplog.clear() dst = "file_imported" with caplog.at_level(logging.INFO, logger="dvc"):