Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LocalFileSystem fix _strip_protocol for root directory #1477

Merged
merged 34 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
93d92c8
Fix _strip_protocol for windows root directory
fleming79 Dec 22, 2023
f6d44d4
Update tests for windows root and remove_trailing_slash
fleming79 Dec 23, 2023
5b77125
FSMap._key_to_str now always strips trailing separator
fleming79 Dec 23, 2023
ffb2eb8
make_path_posix now uses sep to identify os type
fleming79 Dec 23, 2023
00b742f
LocalFileSystem - switch from using os.name to sep
fleming79 Dec 23, 2023
9d01d9b
local.py - make_path_posix removed temporary nested function _make_p…
fleming79 Dec 23, 2023
e15955d
Fixed LocalFileSystem._parent & added test
fleming79 Dec 24, 2023
c0c7399
make_path_posix - made slightly faster in recursive calls by calling …
fleming79 Dec 24, 2023
7abb0c4
Divide make_path_posix and _parent method into native posix and nt ve…
fleming79 Dec 26, 2023
6e2df0f
FSMap - use new `remove_trailing_slash` to get root
fleming79 Dec 26, 2023
16c2c82
make_path_posix
fleming79 Dec 26, 2023
0b4d069
Merge branch 'master' of https://github.com/fsspec/filesystem_spec
fleming79 Jan 3, 2024
9fa75a9
make_path_posix - use stringify_path
fleming79 Jan 3, 2024
4b25084
Merge branch 'fsspec:master' into master
fleming79 Jan 4, 2024
97ee256
LocalFileSystem._strip_protocol - will now lstrip for 'local' on NT.
fleming79 Jan 4, 2024
a0ee144
_strip_protocol fix for 3.8 string missing removeprefix
Jan 8, 2024
a68e536
Merge branch 'fsspec:master' into master
fleming79 Jan 8, 2024
41147e6
Fix version check
Jan 9, 2024
2e902a3
Merge branch 'fsspec:master' into master
fleming79 Jan 15, 2024
aaefdc5
Drop version specific version of _strip_protocol
Jan 16, 2024
778cd95
remove sep as argument for make_path_posix , _strip_protocol, _parent
Jan 24, 2024
7570de8
fix test_make_path_posix
fleming79 Jan 24, 2024
502413f
Merge branch 'fsspec:master' into master
fleming79 Feb 6, 2024
64433f5
Merge branch 'fsspec:master' into master
fleming79 Feb 9, 2024
fe93b1d
Merge branch 'fsspec:master' into master
fleming79 Feb 23, 2024
ac990da
Merge branch 'fsspec:master' into master
fleming79 Mar 1, 2024
e0de0a6
Fix ruff lint PIE810
Mar 1, 2024
05f073a
Merge branch 'fsspec:master' into master
fleming79 Mar 5, 2024
7799d4e
Merge branch 'fsspec:master' into master
fleming79 Mar 7, 2024
c2352bf
Re-enable jupyter test_simple for windows
fleming79 Mar 8, 2024
006718d
Merge branch 'fsspec:master' into master
fleming79 Mar 8, 2024
ec56ea4
Merge branch 'master' of https://github.com/fleming79/filesystem_spec
fleming79 Mar 8, 2024
03b3106
Revert setting of root in 'FSMap' and added line to test_strip_protoc…
fleming79 Mar 9, 2024
082b544
Merge branch 'fsspec:master' into master
fleming79 Mar 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 65 additions & 57 deletions fsspec/implementations/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import logging
import os
import os.path as osp
import re
import shutil
import stat
import tempfile
Expand All @@ -16,6 +15,12 @@
logger = logging.getLogger("fsspec.local")


def _remove_prefix(text: str, prefix: str):
martindurant marked this conversation as resolved.
Show resolved Hide resolved
if text.startswith(prefix):
return text[len(prefix) :]
return text


class LocalFileSystem(AbstractFileSystem):
"""Interface to files on local storage

Expand Down Expand Up @@ -116,8 +121,8 @@ def lexists(self, path, **kwargs):
return osp.lexists(path)

def cp_file(self, path1, path2, **kwargs):
path1 = self._strip_protocol(path1).rstrip("/")
path2 = self._strip_protocol(path2).rstrip("/")
path1 = self._strip_protocol(path1, remove_trailing_slash=True)
path2 = self._strip_protocol(path2, remove_trailing_slash=True)
if self.auto_mkdir:
self.makedirs(self._parent(path2), exist_ok=True)
if self.isfile(path1):
Expand All @@ -138,8 +143,8 @@ def put_file(self, path1, path2, callback=None, **kwargs):
return self.cp_file(path1, path2, **kwargs)

def mv_file(self, path1, path2, **kwargs):
path1 = self._strip_protocol(path1).rstrip("/")
path2 = self._strip_protocol(path2).rstrip("/")
path1 = self._strip_protocol(path1, remove_trailing_slash=True)
path2 = self._strip_protocol(path2, remove_trailing_slash=True)
shutil.move(path1, path2)

def link(self, src, dst, **kwargs):
Expand All @@ -163,7 +168,7 @@ def rm(self, path, recursive=False, maxdepth=None):
path = [path]

for p in path:
p = self._strip_protocol(p).rstrip("/")
p = self._strip_protocol(p, remove_trailing_slash=True)
if self.isdir(p):
if not recursive:
raise ValueError("Cannot delete directory, set recursive=True")
Expand Down Expand Up @@ -206,24 +211,32 @@ def modified(self, path):

@classmethod
def _parent(cls, path):
path = cls._strip_protocol(path).rstrip("/")
if "/" in path:
return path.rsplit("/", 1)[0]
path = cls._strip_protocol(path, remove_trailing_slash=True)
if os.sep == "/":
# posix native
return path.rsplit("/", 1)[0] or "/"
else:
return cls.root_marker
# NT
path_ = path.rsplit("/", 1)[0]
if len(path_) <= 3:
if path_[1:2] == ":":
# nt root (something like c:/)
return path_[0] + ":/"
# More cases may be required here
return path_

@classmethod
def _strip_protocol(cls, path):
def _strip_protocol(cls, path, remove_trailing_slash=False):
path = stringify_path(path)
if path.startswith("file://"):
path = path[7:]
elif path.startswith("file:"):
path = path[5:]
elif path.startswith("local://"):
path = path[8:]
if path.startswith("file:"):
fleming79 marked this conversation as resolved.
Show resolved Hide resolved
path = _remove_prefix(_remove_prefix(path, "file://"), "file:")
martindurant marked this conversation as resolved.
Show resolved Hide resolved
if os.sep == "\\":
path = path.lstrip("/")
martindurant marked this conversation as resolved.
Show resolved Hide resolved
elif path.startswith("local:"):
path = path[6:]
return make_path_posix(path).rstrip("/") or cls.root_marker
path = _remove_prefix(_remove_prefix(path, "local://"), "local:")
if os.sep == "\\":
path = path.lstrip("/")
return make_path_posix(path, remove_trailing_slash)

def _isfilestore(self):
# Inheriting from DaskFileSystem makes this False (S3, etc. were)
Expand All @@ -236,47 +249,42 @@ def chmod(self, path, mode):
return os.chmod(path, mode)


def make_path_posix(path, sep=os.sep):
"""Make path generic"""
if isinstance(path, (list, set, tuple)):
return type(path)(make_path_posix(p) for p in path)
if "~" in path:
path = osp.expanduser(path)
if sep == "/":
# most common fast case for posix
def make_path_posix(path, remove_trailing_slash=False):
"""Make path generic for current OS"""
if not isinstance(path, str):
if isinstance(path, (list, set, tuple)):
return type(path)(make_path_posix(p, remove_trailing_slash) for p in path)
else:
path = str(stringify_path(path))
if os.sep == "/":
# Native posix
if path.startswith("/"):
return path
if path.startswith("./"):
# most common fast case for posix
return path.rstrip("/") or "/" if remove_trailing_slash else path
elif path.startswith("~"):
return make_path_posix(osp.expanduser(path), remove_trailing_slash)
elif path.startswith("./"):
path = path[2:]
path = f"{os.getcwd()}/{path}"
return path.rstrip("/") or "/" if remove_trailing_slash else path
return f"{os.getcwd()}/{path}"
if (
(sep not in path and "/" not in path)
or (sep == "/" and not path.startswith("/"))
or (sep == "\\" and ":" not in path and not path.startswith("\\\\"))
):
# relative path like "path" or "rel\\path" (win) or rel/path"
if os.sep == "\\":
# abspath made some more '\\' separators
return make_path_posix(osp.abspath(path))
else:
return f"{os.getcwd()}/{path}"
if path.startswith("file://"):
path = path[7:]
if re.match("/[A-Za-z]:", path):
# for windows file URI like "file:///C:/folder/file"
# or "file:///C:\\dir\\file"
path = path[1:].replace("\\", "/").replace("//", "/")
if path.startswith("\\\\"):
# special case for windows UNC/DFS-style paths, do nothing,
# just flip the slashes around (case below does not work!)
return path.replace("\\", "/")
if re.match("[A-Za-z]:", path):
# windows full path like "C:\\local\\path"
return path.lstrip("\\").replace("\\", "/").replace("//", "/")
if path.startswith("\\"):
# windows network path like "\\server\\path"
return "/" + path.lstrip("\\").replace("\\", "/").replace("//", "/")
return path
else:
# NT handling
if len(path) > 1:
if path[1] == ":":
# windows full path like "C:\\local\\path"
if len(path) <= 3:
# nt root (something like c:/)
return path[0] + ":/"
path = path.replace("\\", "/").replace("//", "/")
return path.rstrip("/") if remove_trailing_slash else path
elif path[0] == "~":
return make_path_posix(osp.expanduser(path), remove_trailing_slash)
elif path.startswith(("\\\\", "//")):
# windows UNC/DFS-style paths
path = "//" + path[2:].replace("\\", "/").replace("//", "/")
return path.rstrip("/") if remove_trailing_slash else path
return make_path_posix(osp.abspath(path), remove_trailing_slash)


def trailing_sep(path):
Expand Down
3 changes: 2 additions & 1 deletion fsspec/implementations/tests/test_jupyter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
import pytest

import fsspec
from fsspec.tests.test_utils import WIN

pytest.importorskip("notebook")
requests = pytest.importorskip("requests")


@pytest.fixture()
def jupyter(tmpdir):

tmpdir = str(tmpdir)
os.environ["JUPYTER_TOKEN"] = "blah"
try:
Expand All @@ -38,6 +38,7 @@ def jupyter(tmpdir):
P.terminate()


@pytest.mark.skipif(WIN, reason="Subprocess gets stuck in a loop")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't notice this before. Any idea what's going on?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found the problem - Something to do with the temp dir passed to Jupyterlab in the launch call showing up as an invalid path. Surrounding the path with "" fixed it. The test now passes when I run it on Windows.

cmd = f'jupyter notebook --notebook-dir="{tmpdir}" --no-browser --port=5566'

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line was to be removed according to the thread above

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Quite right - I made the changes but forgot to push them. It is done now.

def test_simple(jupyter):
url, d = jupyter
fs = fsspec.filesystem("jupyter", url=url)
Expand Down
33 changes: 25 additions & 8 deletions fsspec/implementations/tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,33 +472,48 @@ def test_make_path_posix():
drive = cwd[0]
assert make_path_posix("/a/posix/path") == f"{drive}:/a/posix/path"
assert make_path_posix("/posix") == f"{drive}:/posix"
# Windows drive requires trailing slash
assert make_path_posix("C:\\") == "C:/"
assert make_path_posix("C:\\", remove_trailing_slash=True) == "C:/"
else:
assert make_path_posix("/a/posix/path") == "/a/posix/path"
assert make_path_posix("/posix") == "/posix"
assert make_path_posix("relpath") == posixpath.join(make_path_posix(cwd), "relpath")
assert make_path_posix("rel/path") == posixpath.join(
make_path_posix(cwd), "rel/path"
)
# NT style
if WIN:
assert make_path_posix("C:\\path") == "C:/path"
assert make_path_posix("file://C:\\path\\file") == "C:/path/file"
martindurant marked this conversation as resolved.
Show resolved Hide resolved
if WIN:
assert (
make_path_posix(
"\\\\windows-server\\someshare\\path\\more\\path\\dir\\foo.parquet"
"\\\\windows-server\\someshare\\path\\more\\path\\dir\\foo.parquet",
)
== "//windows-server/someshare/path/more/path/dir/foo.parquet"
)
assert (
make_path_posix(
r"\\SERVER\UserHomeFolder$\me\My Documents\project1\data\filen.csv"
"\\\\SERVER\\UserHomeFolder$\\me\\My Documents\\proj\\data\\fname.csv",
)
== "//SERVER/UserHomeFolder$/me/My Documents/project1/data/filen.csv"
== "//SERVER/UserHomeFolder$/me/My Documents/proj/data/fname.csv"
)
assert "/" in make_path_posix("rel\\path")

# Relative
pp = make_path_posix("./path")
assert "./" not in pp and ".\\" not in pp
cd = make_path_posix(cwd)
assert pp == cd + "/path"
# Userpath
userpath = make_path_posix("~/path")
assert userpath.endswith("/path")


def test_parent():
if WIN:
assert LocalFileSystem._parent("C:\\file or folder") == "C:/"
assert LocalFileSystem._parent("C:\\") == "C:/"
else:
assert LocalFileSystem._parent("/file or folder") == "/"
assert LocalFileSystem._parent("/") == "/"


def test_linked_files(tmpdir):
Expand Down Expand Up @@ -638,9 +653,11 @@ def test_strip_protocol_expanduser():
path = "file://~\\foo\\bar" if WIN else "file://~/foo/bar"
stripped = LocalFileSystem._strip_protocol(path)
assert path != stripped
assert "~" not in stripped
assert "file://" not in stripped
assert stripped.startswith(os.path.expanduser("~").replace("\\", "/"))
assert not LocalFileSystem._strip_protocol("./").endswith("/")
path = LocalFileSystem._strip_protocol("./", remove_trailing_slash=True)
assert not path.endswith("/")


def test_strip_protocol_no_authority():
Expand Down
7 changes: 5 additions & 2 deletions fsspec/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ class FSMap(MutableMapping):

def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
self.fs = fs
self.root = fs._strip_protocol(root).rstrip("/")
try:
self.root = fs._strip_protocol(root, remove_trailing_slash=True)
except TypeError:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unfortunate; in what case does rstrip fail? In this case, I think we might be explicitly joining on the separator anyway.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On Windows if the root is something like c:/, rstrip will cause a root of c: which is treated like '.', the current working directory instead of the root.
On posix rstrip will make / change to .

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the only place that the special case for LocalFileSystem leaks, and I don't like it :|. I think it may be better to let through strange behaviour if the user thinks they want to may their whole C drive (and gets only cwd files instead).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've changed this back.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But without .rstrip("/")

self.root = fs._strip_protocol(root).rstrip("/")
self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
if missing_exceptions is None:
missing_exceptions = (
Expand Down Expand Up @@ -138,7 +141,7 @@ def _key_to_str(self, key):
if isinstance(key, list):
key = tuple(key)
key = str(key)
return f"{self._root_key_to_str}{key}"
return f"{self._root_key_to_str}{key}".rstrip("/")

def _str_to_key(self, s):
"""Strip path of to leave key name"""
Expand Down
20 changes: 12 additions & 8 deletions fsspec/tests/test_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,19 +200,23 @@ def test_fsmap_error_on_protocol_keys():
_ = m[f"memory://{root}/a"]


# on Windows opening a directory will raise PermissionError
# see: https://bugs.python.org/issue43095
@pytest.mark.skipif(
platform.system() == "Windows", reason="raises PermissionError on windows"
)
def test_fsmap_access_with_suffix(tmp_path):
tmp_path.joinpath("b").mkdir()
tmp_path.joinpath("b", "a").write_bytes(b"data")
m = fsspec.get_mapper(f"file://{tmp_path}")

if platform.system() == "Windows":
# on Windows opening a directory will raise PermissionError
# see: https://bugs.python.org/issue43095
missing_exceptions = (
FileNotFoundError,
IsADirectoryError,
NotADirectoryError,
PermissionError,
)
else:
missing_exceptions = None
m = fsspec.get_mapper(f"file://{tmp_path}", missing_exceptions=missing_exceptions)
with pytest.raises(KeyError):
_ = m["b/"]

assert m["b/a/"] == b"data"


Expand Down
Loading