diff --git a/dvc/remote/local.py b/dvc/remote/local.py index 4cfe48d574..26fabe1e48 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -21,7 +21,7 @@ from dvc.scheme import Schemes from dvc.scm.tree import is_working_tree from dvc.system import System -from dvc.utils import copyfile +from dvc.utils.fs import copyfile from dvc.utils import file_md5 from dvc.utils import relpath from dvc.utils import tmp_fname diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index 63d51387a5..9a6157ef96 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -105,36 +105,6 @@ def dict_md5(d, exclude=()): return bytes_md5(byts) -def copyfile(src, dest, no_progress_bar=False, name=None): - """Copy file with progress bar""" - from dvc.exceptions import DvcException - from dvc.progress import Tqdm - from dvc.system import System - - src = fspath_py35(src) - dest = fspath_py35(dest) - - name = name if name else os.path.basename(dest) - total = os.stat(src).st_size - - if os.path.isdir(dest): - dest = os.path.join(dest, os.path.basename(src)) - - try: - System.reflink(src, dest) - except DvcException: - with Tqdm( - desc=name, disable=no_progress_bar, total=total, bytes=True - ) as pbar: - with open(src, "rb") as fsrc, open(dest, "wb+") as fdest: - while True: - buf = fsrc.read(LOCAL_CHUNK_SIZE) - if not buf: - break - fdest.write(buf) - pbar.update(len(buf)) - - def _split(list_to_split, chunk_size): return [ list_to_split[i : i + chunk_size] diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index 426baa4100..7a3017b7fb 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -18,6 +18,8 @@ logger = logging.getLogger(__name__) +LOCAL_CHUNK_SIZE = 2 ** 20 # 1 MB + def fs_copy(src, dst): if os.path.isdir(src): @@ -169,3 +171,33 @@ def makedirs(path, exist_ok=False, mode=None): os.makedirs(path, exist_ok=exist_ok) finally: os.umask(umask) + + +def copyfile(src, dest, no_progress_bar=False, name=None): + """Copy file with progress bar""" + from dvc.exceptions import DvcException + from dvc.progress import Tqdm + from dvc.system import System + + src = fspath_py35(src) + dest = fspath_py35(dest) + + name = name if name else os.path.basename(dest) + total = os.stat(src).st_size + + if os.path.isdir(dest): + dest = os.path.join(dest, os.path.basename(src)) + + try: + System.reflink(src, dest) + except DvcException: + with Tqdm( + desc=name, disable=no_progress_bar, total=total, bytes=True + ) as pbar: + with open(src, "rb") as fsrc, open(dest, "wb+") as fdest: + while True: + buf = fsrc.read(LOCAL_CHUNK_SIZE) + if not buf: + break + fdest.write(buf) + pbar.update(len(buf)) diff --git a/tests/func/test_fs.py b/tests/func/test_fs.py index be5dcb3ac7..0dbdce8bed 100644 --- a/tests/func/test_fs.py +++ b/tests/func/test_fs.py @@ -3,7 +3,7 @@ import pytest -from dvc.utils.fs import makedirs +from dvc.utils.fs import makedirs, copyfile @pytest.mark.skipif(os.name == "nt", reason="Not supported for Windows.") @@ -18,3 +18,19 @@ def test_makedirs_permissions(tmp_dir): assert stat.S_IMODE(os.stat(test_dir).st_mode) == dir_mode assert stat.S_IMODE(os.stat(intermediate_dir).st_mode) == dir_mode + + +def test_copyfile(tmp_dir): + src = "file1" + dest = "file2" + dest_dir = "testdir" + + tmp_dir.gen(src, "file1contents") + + os.mkdir(dest_dir) + + copyfile(src, dest) + assert (tmp_dir / dest).read_text() == "file1contents" + + copyfile(src, dest_dir) + assert (tmp_dir / dest_dir / src).read_text() == "file1contents" diff --git a/tests/func/test_utils.py b/tests/func/test_utils.py index 07747092c4..996b2ce2f8 100644 --- a/tests/func/test_utils.py +++ b/tests/func/test_utils.py @@ -1,25 +1,8 @@ # encoding: utf-8 -import os from dvc import utils -def test_copyfile(tmp_dir): - src = "file1" - dest = "file2" - dest_dir = "testdir" - - tmp_dir.gen(src, "file1contents") - - os.mkdir(dest_dir) - - utils.copyfile(src, dest) - assert (tmp_dir / dest).read_text() == "file1contents" - - utils.copyfile(src, dest_dir) - assert (tmp_dir / dest_dir / src).read_text() == "file1contents" - - def test_file_md5_crlf(tmp_dir): tmp_dir.gen("cr", b"a\nb\nc") tmp_dir.gen("crlf", b"a\r\nb\r\nc") diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index 169482e514..33ab5d342a 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -1,3 +1,4 @@ +import filecmp import os from unittest import TestCase @@ -11,6 +12,7 @@ from dvc.system import System from dvc.utils import relpath from dvc.utils.fs import BasePathNotInCheckedPathException +from dvc.utils.fs import copyfile from dvc.utils.fs import contains_symlink_up_to from dvc.utils.fs import get_inode from dvc.utils.fs import get_mtime_and_size @@ -216,3 +218,29 @@ def test_makedirs(repo_dir): makedirs(path_info) assert os.path.isdir(path_info.fspath) + + +@pytest.mark.parametrize("path", [TestDir.DATA, TestDir.DATA_DIR]) +def test_copyfile(path, repo_dir): + src = repo_dir.FOO + dest = path + src_info = PathInfo(repo_dir.BAR) + dest_info = PathInfo(path) + + copyfile(src, dest) + if os.path.isdir(dest): + assert filecmp.cmp( + src, os.path.join(dest, os.path.basename(src)), shallow=False + ) + else: + assert filecmp.cmp(src, dest, shallow=False) + + copyfile(src_info, dest_info) + if os.path.isdir(dest_info.fspath): + assert filecmp.cmp( + src_info.fspath, + os.path.join(dest_info.fspath, os.path.basename(src_info.fspath)), + shallow=False, + ) + else: + assert filecmp.cmp(src_info.fspath, dest_info.fspath, shallow=False) diff --git a/tests/unit/utils/test_utils.py b/tests/unit/utils/test_utils.py index 18f68fb02d..17870fe0bd 100644 --- a/tests/unit/utils/test_utils.py +++ b/tests/unit/utils/test_utils.py @@ -1,18 +1,15 @@ -import filecmp import re import os import pytest from dvc.path_info import PathInfo -from dvc.utils import copyfile from dvc.utils import file_md5 from dvc.utils import fix_env from dvc.utils import relpath from dvc.utils import to_chunks from dvc.utils import tmp_fname from dvc.utils import walk_files -from tests.basic_env import TestDir @pytest.mark.parametrize( @@ -88,32 +85,6 @@ def test_file_md5(repo_dir): assert file_md5(fname) == file_md5(fname_object) -@pytest.mark.parametrize("path", [TestDir.DATA, TestDir.DATA_DIR]) -def test_copyfile(path, repo_dir): - src = repo_dir.FOO - dest = path - src_info = PathInfo(repo_dir.BAR) - dest_info = PathInfo(path) - - copyfile(src, dest) - if os.path.isdir(dest): - assert filecmp.cmp( - src, os.path.join(dest, os.path.basename(src)), shallow=False - ) - else: - assert filecmp.cmp(src, dest, shallow=False) - - copyfile(src_info, dest_info) - if os.path.isdir(dest_info.fspath): - assert filecmp.cmp( - src_info.fspath, - os.path.join(dest_info.fspath, os.path.basename(src_info.fspath)), - shallow=False, - ) - else: - assert filecmp.cmp(src_info.fspath, dest_info.fspath, shallow=False) - - def test_tmp_fname(): file_path = os.path.join("path", "to", "file") file_path_info = PathInfo(file_path)