From eca5d2dde4f172c8193216d682a48b2915ef02ba Mon Sep 17 00:00:00 2001 From: Aman Sharma Date: Thu, 9 Jan 2020 04:17:43 +0530 Subject: [PATCH 1/3] Move `makedirs` to fs.py --- dvc/analytics.py | 3 ++- dvc/remote/base.py | 4 ++-- dvc/remote/local.py | 4 +--- dvc/repo/__init__.py | 2 +- dvc/utils/__init__.py | 17 ----------------- dvc/utils/fs.py | 17 +++++++++++++++++ tests/dir_helpers.py | 2 +- tests/func/test_fs.py | 20 ++++++++++++++++++++ tests/func/test_get.py | 2 +- tests/func/test_get_url.py | 2 +- tests/func/test_import.py | 2 +- tests/func/test_import_url.py | 2 +- tests/func/test_utils.py | 17 ----------------- tests/unit/utils/test_fs.py | 14 ++++++++++++++ tests/unit/utils/test_utils.py | 14 -------------- 15 files changed, 62 insertions(+), 60 deletions(-) create mode 100644 tests/func/test_fs.py diff --git a/dvc/analytics.py b/dvc/analytics.py index 18aea64288..7c0a693105 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -16,7 +16,8 @@ from dvc.lock import Lock, LockError from dvc.repo import Repo from dvc.scm import SCM -from dvc.utils import env2bool, is_binary, makedirs +from dvc.utils import env2bool, is_binary +from dvc.utils.fs import makedirs logger = logging.getLogger(__name__) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 1c316416b2..36409645eb 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -24,8 +24,8 @@ from dvc.progress import Tqdm from dvc.remote.slow_link_detection import slow_link_guard from dvc.state import StateNoop -from dvc.utils import makedirs, relpath, tmp_fname -from dvc.utils.fs import move +from dvc.utils import relpath, tmp_fname +from dvc.utils.fs import move, makedirs from dvc.utils.http import open_url logger = logging.getLogger(__name__) diff --git a/dvc/remote/local.py b/dvc/remote/local.py index 1117ca9939..4cfe48d574 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -23,13 +23,11 @@ from dvc.system import System from dvc.utils import copyfile from dvc.utils import file_md5 -from dvc.utils import makedirs from dvc.utils import relpath from dvc.utils import tmp_fname from dvc.utils import walk_files from dvc.compat import fspath_py35 -from dvc.utils.fs import move -from dvc.utils.fs import remove +from dvc.utils.fs import move, makedirs, remove logger = logging.getLogger(__name__) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index af68180aab..9cef82e11f 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -72,7 +72,7 @@ def __init__(self, root_dir=None): from dvc.repo.metrics import Metrics from dvc.scm.tree import WorkingTree from dvc.repo.tag import Tag - from dvc.utils import makedirs + from dvc.utils.fs import makedirs root_dir = self.find_root(root_dir) diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index cca9ebebef..63d51387a5 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -135,23 +135,6 @@ def copyfile(src, dest, no_progress_bar=False, name=None): pbar.update(len(buf)) -def makedirs(path, exist_ok=False, mode=None): - path = fspath_py35(path) - - if mode is None: - os.makedirs(path, exist_ok=exist_ok) - return - - # utilize umask to set proper permissions since Python 3.7 the `mode` - # `makedirs` argument no longer affects the file permission bits of - # newly-created intermediate-level directories. - umask = os.umask(0o777 - mode) - try: - os.makedirs(path, exist_ok=exist_ok) - finally: - os.umask(umask) - - def _split(list_to_split, chunk_size): return [ list_to_split[i : i + chunk_size] diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index 21e0118ef3..426baa4100 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -152,3 +152,20 @@ def normalize_path(path): parent = os.path.join(normalize_path(parent), "") child = normalize_path(child) return child != parent and child.startswith(parent) + + +def makedirs(path, exist_ok=False, mode=None): + path = fspath_py35(path) + + if mode is None: + os.makedirs(path, exist_ok=exist_ok) + return + + # utilize umask to set proper permissions since Python 3.7 the `mode` + # `makedirs` argument no longer affects the file permission bits of + # newly-created intermediate-level directories. + umask = os.umask(0o777 - mode) + try: + os.makedirs(path, exist_ok=exist_ok) + finally: + os.umask(umask) diff --git a/tests/dir_helpers.py b/tests/dir_helpers.py index cd001c0028..c531e4b8cc 100644 --- a/tests/dir_helpers.py +++ b/tests/dir_helpers.py @@ -50,7 +50,7 @@ import pytest from funcy.py3 import lmap, retry -from dvc.utils import makedirs +from dvc.utils.fs import makedirs from dvc.compat import fspath, fspath_py35 diff --git a/tests/func/test_fs.py b/tests/func/test_fs.py new file mode 100644 index 0000000000..be5dcb3ac7 --- /dev/null +++ b/tests/func/test_fs.py @@ -0,0 +1,20 @@ +import os +import stat + +import pytest + +from dvc.utils.fs import makedirs + + +@pytest.mark.skipif(os.name == "nt", reason="Not supported for Windows.") +def test_makedirs_permissions(tmp_dir): + dir_mode = 0o755 + intermediate_dir = "тестовая-директория" + test_dir = os.path.join(intermediate_dir, "data") + + assert not os.path.exists(intermediate_dir) + + makedirs(test_dir, mode=dir_mode) + + assert stat.S_IMODE(os.stat(test_dir).st_mode) == dir_mode + assert stat.S_IMODE(os.stat(intermediate_dir).st_mode) == dir_mode diff --git a/tests/func/test_get.py b/tests/func/test_get.py index 99fd23840b..cb2d050767 100644 --- a/tests/func/test_get.py +++ b/tests/func/test_get.py @@ -9,7 +9,7 @@ from dvc.repo.get import GetDVCFileError, PathMissingError from dvc.repo import Repo from dvc.system import System -from dvc.utils import makedirs +from dvc.utils.fs import makedirs from dvc.compat import fspath from tests.utils import trees_equal diff --git a/tests/func/test_get_url.py b/tests/func/test_get_url.py index 3657dd7137..ae287f9036 100644 --- a/tests/func/test_get_url.py +++ b/tests/func/test_get_url.py @@ -4,7 +4,7 @@ import pytest from dvc.repo import Repo -from dvc.utils import makedirs +from dvc.utils.fs import makedirs def test_get_file(repo_dir): diff --git a/tests/func/test_import.py b/tests/func/test_import.py index 551ae38a87..c3a2c7b000 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -12,7 +12,7 @@ from dvc.exceptions import NoOutputInExternalRepoError from dvc.stage import Stage from dvc.system import System -from dvc.utils import makedirs +from dvc.utils.fs import makedirs from dvc.compat import fspath from tests.utils import trees_equal diff --git a/tests/func/test_import_url.py b/tests/func/test_import_url.py index de212da73c..7d73cff79b 100644 --- a/tests/func/test_import_url.py +++ b/tests/func/test_import_url.py @@ -7,7 +7,7 @@ import dvc from dvc.main import main -from dvc.utils import makedirs +from dvc.utils.fs import makedirs from tests.basic_env import TestDvc from tests.utils import spy diff --git a/tests/func/test_utils.py b/tests/func/test_utils.py index 70ef3dee8e..07747092c4 100644 --- a/tests/func/test_utils.py +++ b/tests/func/test_utils.py @@ -1,8 +1,5 @@ # encoding: utf-8 import os -import stat - -import pytest from dvc import utils @@ -62,17 +59,3 @@ def test_boxify(): ) assert expected == utils.boxify("message") - - -@pytest.mark.skipif(os.name == "nt", reason="Not supported for Windows.") -def test_makedirs_permissions(tmp_dir): - dir_mode = 0o755 - intermediate_dir = "тестовая-директория" - test_dir = os.path.join(intermediate_dir, "data") - - assert not os.path.exists(intermediate_dir) - - utils.makedirs(test_dir, mode=dir_mode) - - assert stat.S_IMODE(os.stat(test_dir).st_mode) == dir_mode - assert stat.S_IMODE(os.stat(intermediate_dir).st_mode) == dir_mode diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index 2c11d287cd..169482e514 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -16,6 +16,7 @@ from dvc.utils.fs import get_mtime_and_size from dvc.utils.fs import move from dvc.utils.fs import path_isin, remove +from dvc.utils.fs import makedirs from tests.basic_env import TestDir from tests.utils import spy @@ -202,3 +203,16 @@ def test_path_isin_with_absolute_path(): child = os.path.join(parent, "to", "folder") assert path_isin(child, parent) + + +def test_makedirs(repo_dir): + path = os.path.join(repo_dir.root_dir, "directory") + path_info = PathInfo( + os.path.join(repo_dir.root_dir, "another", "directory") + ) + + makedirs(path) + assert os.path.isdir(path) + + makedirs(path_info) + assert os.path.isdir(path_info.fspath) diff --git a/tests/unit/utils/test_utils.py b/tests/unit/utils/test_utils.py index 15b2c541f4..18f68fb02d 100644 --- a/tests/unit/utils/test_utils.py +++ b/tests/unit/utils/test_utils.py @@ -8,7 +8,6 @@ from dvc.utils import copyfile from dvc.utils import file_md5 from dvc.utils import fix_env -from dvc.utils import makedirs from dvc.utils import relpath from dvc.utils import to_chunks from dvc.utils import tmp_fname @@ -115,19 +114,6 @@ def test_copyfile(path, repo_dir): assert filecmp.cmp(src_info.fspath, dest_info.fspath, shallow=False) -def test_makedirs(repo_dir): - path = os.path.join(repo_dir.root_dir, "directory") - path_info = PathInfo( - os.path.join(repo_dir.root_dir, "another", "directory") - ) - - makedirs(path) - assert os.path.isdir(path) - - makedirs(path_info) - assert os.path.isdir(path_info.fspath) - - def test_tmp_fname(): file_path = os.path.join("path", "to", "file") file_path_info = PathInfo(file_path) From a79816920dddf54f1fb547e690fd03cdeb0bd8a7 Mon Sep 17 00:00:00 2001 From: Aman Sharma Date: Thu, 9 Jan 2020 04:57:53 +0530 Subject: [PATCH 2/3] Move `copyfile` to fs.py --- dvc/remote/local.py | 2 +- dvc/utils/__init__.py | 30 ------------------------------ dvc/utils/fs.py | 32 ++++++++++++++++++++++++++++++++ tests/func/test_fs.py | 18 +++++++++++++++++- tests/func/test_utils.py | 17 ----------------- tests/unit/utils/test_fs.py | 28 ++++++++++++++++++++++++++++ tests/unit/utils/test_utils.py | 29 ----------------------------- 7 files changed, 78 insertions(+), 78 deletions(-) diff --git a/dvc/remote/local.py b/dvc/remote/local.py index 4cfe48d574..26fabe1e48 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -21,7 +21,7 @@ from dvc.scheme import Schemes from dvc.scm.tree import is_working_tree from dvc.system import System -from dvc.utils import copyfile +from dvc.utils.fs import copyfile from dvc.utils import file_md5 from dvc.utils import relpath from dvc.utils import tmp_fname diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index 63d51387a5..9a6157ef96 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -105,36 +105,6 @@ def dict_md5(d, exclude=()): return bytes_md5(byts) -def copyfile(src, dest, no_progress_bar=False, name=None): - """Copy file with progress bar""" - from dvc.exceptions import DvcException - from dvc.progress import Tqdm - from dvc.system import System - - src = fspath_py35(src) - dest = fspath_py35(dest) - - name = name if name else os.path.basename(dest) - total = os.stat(src).st_size - - if os.path.isdir(dest): - dest = os.path.join(dest, os.path.basename(src)) - - try: - System.reflink(src, dest) - except DvcException: - with Tqdm( - desc=name, disable=no_progress_bar, total=total, bytes=True - ) as pbar: - with open(src, "rb") as fsrc, open(dest, "wb+") as fdest: - while True: - buf = fsrc.read(LOCAL_CHUNK_SIZE) - if not buf: - break - fdest.write(buf) - pbar.update(len(buf)) - - def _split(list_to_split, chunk_size): return [ list_to_split[i : i + chunk_size] diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index 426baa4100..7a3017b7fb 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -18,6 +18,8 @@ logger = logging.getLogger(__name__) +LOCAL_CHUNK_SIZE = 2 ** 20 # 1 MB + def fs_copy(src, dst): if os.path.isdir(src): @@ -169,3 +171,33 @@ def makedirs(path, exist_ok=False, mode=None): os.makedirs(path, exist_ok=exist_ok) finally: os.umask(umask) + + +def copyfile(src, dest, no_progress_bar=False, name=None): + """Copy file with progress bar""" + from dvc.exceptions import DvcException + from dvc.progress import Tqdm + from dvc.system import System + + src = fspath_py35(src) + dest = fspath_py35(dest) + + name = name if name else os.path.basename(dest) + total = os.stat(src).st_size + + if os.path.isdir(dest): + dest = os.path.join(dest, os.path.basename(src)) + + try: + System.reflink(src, dest) + except DvcException: + with Tqdm( + desc=name, disable=no_progress_bar, total=total, bytes=True + ) as pbar: + with open(src, "rb") as fsrc, open(dest, "wb+") as fdest: + while True: + buf = fsrc.read(LOCAL_CHUNK_SIZE) + if not buf: + break + fdest.write(buf) + pbar.update(len(buf)) diff --git a/tests/func/test_fs.py b/tests/func/test_fs.py index be5dcb3ac7..0dbdce8bed 100644 --- a/tests/func/test_fs.py +++ b/tests/func/test_fs.py @@ -3,7 +3,7 @@ import pytest -from dvc.utils.fs import makedirs +from dvc.utils.fs import makedirs, copyfile @pytest.mark.skipif(os.name == "nt", reason="Not supported for Windows.") @@ -18,3 +18,19 @@ def test_makedirs_permissions(tmp_dir): assert stat.S_IMODE(os.stat(test_dir).st_mode) == dir_mode assert stat.S_IMODE(os.stat(intermediate_dir).st_mode) == dir_mode + + +def test_copyfile(tmp_dir): + src = "file1" + dest = "file2" + dest_dir = "testdir" + + tmp_dir.gen(src, "file1contents") + + os.mkdir(dest_dir) + + copyfile(src, dest) + assert (tmp_dir / dest).read_text() == "file1contents" + + copyfile(src, dest_dir) + assert (tmp_dir / dest_dir / src).read_text() == "file1contents" diff --git a/tests/func/test_utils.py b/tests/func/test_utils.py index 07747092c4..996b2ce2f8 100644 --- a/tests/func/test_utils.py +++ b/tests/func/test_utils.py @@ -1,25 +1,8 @@ # encoding: utf-8 -import os from dvc import utils -def test_copyfile(tmp_dir): - src = "file1" - dest = "file2" - dest_dir = "testdir" - - tmp_dir.gen(src, "file1contents") - - os.mkdir(dest_dir) - - utils.copyfile(src, dest) - assert (tmp_dir / dest).read_text() == "file1contents" - - utils.copyfile(src, dest_dir) - assert (tmp_dir / dest_dir / src).read_text() == "file1contents" - - def test_file_md5_crlf(tmp_dir): tmp_dir.gen("cr", b"a\nb\nc") tmp_dir.gen("crlf", b"a\r\nb\r\nc") diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index 169482e514..33ab5d342a 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -1,3 +1,4 @@ +import filecmp import os from unittest import TestCase @@ -11,6 +12,7 @@ from dvc.system import System from dvc.utils import relpath from dvc.utils.fs import BasePathNotInCheckedPathException +from dvc.utils.fs import copyfile from dvc.utils.fs import contains_symlink_up_to from dvc.utils.fs import get_inode from dvc.utils.fs import get_mtime_and_size @@ -216,3 +218,29 @@ def test_makedirs(repo_dir): makedirs(path_info) assert os.path.isdir(path_info.fspath) + + +@pytest.mark.parametrize("path", [TestDir.DATA, TestDir.DATA_DIR]) +def test_copyfile(path, repo_dir): + src = repo_dir.FOO + dest = path + src_info = PathInfo(repo_dir.BAR) + dest_info = PathInfo(path) + + copyfile(src, dest) + if os.path.isdir(dest): + assert filecmp.cmp( + src, os.path.join(dest, os.path.basename(src)), shallow=False + ) + else: + assert filecmp.cmp(src, dest, shallow=False) + + copyfile(src_info, dest_info) + if os.path.isdir(dest_info.fspath): + assert filecmp.cmp( + src_info.fspath, + os.path.join(dest_info.fspath, os.path.basename(src_info.fspath)), + shallow=False, + ) + else: + assert filecmp.cmp(src_info.fspath, dest_info.fspath, shallow=False) diff --git a/tests/unit/utils/test_utils.py b/tests/unit/utils/test_utils.py index 18f68fb02d..17870fe0bd 100644 --- a/tests/unit/utils/test_utils.py +++ b/tests/unit/utils/test_utils.py @@ -1,18 +1,15 @@ -import filecmp import re import os import pytest from dvc.path_info import PathInfo -from dvc.utils import copyfile from dvc.utils import file_md5 from dvc.utils import fix_env from dvc.utils import relpath from dvc.utils import to_chunks from dvc.utils import tmp_fname from dvc.utils import walk_files -from tests.basic_env import TestDir @pytest.mark.parametrize( @@ -88,32 +85,6 @@ def test_file_md5(repo_dir): assert file_md5(fname) == file_md5(fname_object) -@pytest.mark.parametrize("path", [TestDir.DATA, TestDir.DATA_DIR]) -def test_copyfile(path, repo_dir): - src = repo_dir.FOO - dest = path - src_info = PathInfo(repo_dir.BAR) - dest_info = PathInfo(path) - - copyfile(src, dest) - if os.path.isdir(dest): - assert filecmp.cmp( - src, os.path.join(dest, os.path.basename(src)), shallow=False - ) - else: - assert filecmp.cmp(src, dest, shallow=False) - - copyfile(src_info, dest_info) - if os.path.isdir(dest_info.fspath): - assert filecmp.cmp( - src_info.fspath, - os.path.join(dest_info.fspath, os.path.basename(src_info.fspath)), - shallow=False, - ) - else: - assert filecmp.cmp(src_info.fspath, dest_info.fspath, shallow=False) - - def test_tmp_fname(): file_path = os.path.join("path", "to", "file") file_path_info = PathInfo(file_path) From 74850fe728b3c5ff693c113cd8e91b708cd67acf Mon Sep 17 00:00:00 2001 From: Aman Sharma Date: Thu, 9 Jan 2020 14:55:49 +0530 Subject: [PATCH 3/3] Move `walk_files` to fs.py --- dvc/remote/local.py | 2 +- dvc/utils/__init__.py | 6 ------ dvc/utils/fs.py | 6 ++++++ tests/func/test_checkout.py | 3 ++- tests/unit/remote/test_remote_dir.py | 2 +- tests/unit/utils/test_fs.py | 5 +++++ tests/unit/utils/test_utils.py | 5 ----- 7 files changed, 15 insertions(+), 14 deletions(-) diff --git a/dvc/remote/local.py b/dvc/remote/local.py index 26fabe1e48..602e9d875c 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -25,9 +25,9 @@ from dvc.utils import file_md5 from dvc.utils import relpath from dvc.utils import tmp_fname -from dvc.utils import walk_files from dvc.compat import fspath_py35 from dvc.utils.fs import move, makedirs, remove +from dvc.utils.fs import walk_files logger = logging.getLogger(__name__) diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index 9a6157ef96..edb77e93bb 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -231,12 +231,6 @@ def to_yaml_string(data): return stream.getvalue() -def walk_files(directory): - for root, _, files in os.walk(fspath(directory)): - for f in files: - yield os.path.join(root, f) - - def colorize(message, color=None): """Returns a message in a specified color.""" if not color: diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index 7a3017b7fb..56f3c9975b 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -201,3 +201,9 @@ def copyfile(src, dest, no_progress_bar=False, name=None): break fdest.write(buf) pbar.update(len(buf)) + + +def walk_files(directory): + for root, _, files in os.walk(fspath(directory)): + for f in files: + yield os.path.join(root, f) diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index db02cdc58b..1f49ca4d84 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -18,7 +18,8 @@ from dvc.stage import StageFileBadNameError from dvc.stage import StageFileDoesNotExistError from dvc.system import System -from dvc.utils import relpath, walk_files +from dvc.utils import relpath +from dvc.utils.fs import walk_files from dvc.utils.stage import dump_stage_file from dvc.utils.stage import load_stage_file from tests.basic_env import TestDvc diff --git a/tests/unit/remote/test_remote_dir.py b/tests/unit/remote/test_remote_dir.py index b6d6c337b9..a39ef6981d 100644 --- a/tests/unit/remote/test_remote_dir.py +++ b/tests/unit/remote/test_remote_dir.py @@ -2,7 +2,7 @@ import pytest import os from dvc.remote.s3 import RemoteS3 -from dvc.utils import walk_files +from dvc.utils.fs import walk_files from dvc.path_info import PathInfo from tests.remotes import GCP, S3Mocked diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index 33ab5d342a..fbde5fcdf0 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -19,6 +19,7 @@ from dvc.utils.fs import move from dvc.utils.fs import path_isin, remove from dvc.utils.fs import makedirs +from dvc.utils.fs import walk_files from tests.basic_env import TestDir from tests.utils import spy @@ -244,3 +245,7 @@ def test_copyfile(path, repo_dir): ) else: assert filecmp.cmp(src_info.fspath, dest_info.fspath, shallow=False) + + +def test_walk_files(tmp_dir): + assert list(walk_files(".")) == list(walk_files(tmp_dir)) diff --git a/tests/unit/utils/test_utils.py b/tests/unit/utils/test_utils.py index 17870fe0bd..c4849ae307 100644 --- a/tests/unit/utils/test_utils.py +++ b/tests/unit/utils/test_utils.py @@ -9,7 +9,6 @@ from dvc.utils import relpath from dvc.utils import to_chunks from dvc.utils import tmp_fname -from dvc.utils import walk_files @pytest.mark.parametrize( @@ -105,7 +104,3 @@ def test_relpath(): path_info = PathInfo(path) assert relpath(path) == relpath(path_info) - - -def test_walk_files(tmp_dir): - assert list(walk_files(".")) == list(walk_files(tmp_dir))