Skip to content

Commit

Permalink
dvc: add sanity checks for hard/symlinks
Browse files Browse the repository at this point in the history
  • Loading branch information
efiop committed Jan 22, 2020
1 parent b99c5e4 commit d863c46
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 16 deletions.
20 changes: 9 additions & 11 deletions dvc/command/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ def get_fs_type(path):
@staticmethod
def get_linktype_support_info(repo):
links = {
"reflink": System.reflink,
"hardlink": System.hardlink,
"symlink": System.symlink,
"reflink": (System.reflink, None),
"hardlink": (System.hardlink, System.is_hardlink),
"symlink": (System.symlink, System.is_symlink),
}

fname = "." + str(uuid.uuid4())
Expand All @@ -98,18 +98,16 @@ def get_linktype_support_info(repo):

cache = []

for name, link in links.items():
for name, (link, is_link) in links.items():
try:
link(src, dst)
status = "supported"
if is_link and not is_link(dst):
status = "broken"
os.unlink(dst)
supported = True
except DvcException:
supported = False
cache.append(
"{name} - {supported}".format(
name=name, supported=True if supported else False
)
)
status = "not supported"
cache.append("{name} - {status}".format(name=name, status=status))
os.remove(src)

return ", ".join(cache)
Expand Down
13 changes: 12 additions & 1 deletion dvc/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,13 +392,24 @@ def _link(self, from_info, to_info, link_types):

self._try_links(from_info, to_info, link_types)

def _verify_link(self, path_info, link_type):
if self.cache_type_confirmed:
return

is_link = getattr(self, "is_{}".format(link_type), None)
if is_link and not is_link(path_info):
self.remove(path_info)
raise DvcException("failed to verify {}".format(link_type))

self.cache_type_confirmed = True

@slow_link_guard
def _try_links(self, from_info, to_info, link_types):
while link_types:
link_method = getattr(self, link_types[0])
try:
self._do_link(from_info, to_info, link_method)
self.cache_type_confirmed = True
self._verify_link(to_info, link_types[0])
return

except DvcException as exc:
Expand Down
8 changes: 8 additions & 0 deletions dvc/remote/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ def copy(self, from_info, to_info):
def symlink(from_info, to_info):
System.symlink(from_info, to_info)

@staticmethod
def is_symlink(path_info):
return System.is_symlink(path_info)

def hardlink(self, from_info, to_info):
# If there are a lot of empty files (which happens a lot in datasets),
# and the cache type is `hardlink`, we might reach link limits and
Expand All @@ -204,6 +208,10 @@ def hardlink(self, from_info, to_info):

System.hardlink(from_info, to_info)

@staticmethod
def is_hardlink(path_info):
return System.is_hardlink(path_info)

@staticmethod
def reflink(from_info, to_info):
System.reflink(from_info, to_info)
Expand Down
8 changes: 4 additions & 4 deletions tests/func/test_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ def test_info_in_repo(tmp_dir, dvc, caplog):
assert re.search(r"Platform: .*", caplog.text)
assert re.search(r"Binary: (True|False)", caplog.text)
assert re.search(r"Package: .*", caplog.text)
assert re.search(r"(Cache: (.*link - (True|False)(,\s)?){3})", caplog.text)
assert re.search(
r"(Cache: (.*link - (not )?supported(,\s)?){3})", caplog.text
)


@pytest.mark.skipif(psutil is None, reason="No psutil.")
Expand All @@ -37,9 +39,7 @@ def test_info_outside_of_repo(tmp_dir, caplog):
assert re.search(r"Platform: .*", caplog.text)
assert re.search(r"Binary: (True|False)", caplog.text)
assert re.search(r"Package: .*", caplog.text)
assert not re.search(
r"(Cache: (.*link - (True|False)(,\s)?){3})", caplog.text
)
assert not re.search(r"(Cache: (.*link - (not )?(,\s)?){3})", caplog.text)


@pytest.mark.skipif(psutil is None, reason="No psutil.")
Expand Down

0 comments on commit d863c46

Please sign in to comment.