Skip to content

Commit

Permalink
gh-102511: Speed up os.path.splitroot() with native helpers (GH-118089)
Browse files Browse the repository at this point in the history
  • Loading branch information
nineteendo authored Apr 25, 2024
1 parent e38b43c commit 10bb90e
Show file tree
Hide file tree
Showing 8 changed files with 337 additions and 108 deletions.
2 changes: 2 additions & 0 deletions Include/internal/pycore_fileutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,8 @@ extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t
extern HRESULT PathCchSkipRoot(const wchar_t *pszPath, const wchar_t **ppszRootEnd);
#endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */

extern void _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize);

// Macros to protect CRT calls against instant termination when passed an
// invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler.
// Usage:
Expand Down
116 changes: 68 additions & 48 deletions Lib/ntpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,56 +167,76 @@ def splitdrive(p):
return drive, root + tail


def splitroot(p):
"""Split a pathname into drive, root and tail. The drive is defined
exactly as in splitdrive(). On Windows, the root may be a single path
separator or an empty string. The tail contains anything after the root.
For example:
splitroot('//server/share/') == ('//server/share', '/', '')
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
"""
p = os.fspath(p)
if isinstance(p, bytes):
sep = b'\\'
altsep = b'/'
colon = b':'
unc_prefix = b'\\\\?\\UNC\\'
empty = b''
else:
sep = '\\'
altsep = '/'
colon = ':'
unc_prefix = '\\\\?\\UNC\\'
empty = ''
normp = p.replace(altsep, sep)
if normp[:1] == sep:
if normp[1:2] == sep:
# UNC drives, e.g. \\server\share or \\?\UNC\server\share
# Device drives, e.g. \\.\device or \\?\device
start = 8 if normp[:8].upper() == unc_prefix else 2
index = normp.find(sep, start)
if index == -1:
return p, empty, empty
index2 = normp.find(sep, index + 1)
if index2 == -1:
return p, empty, empty
return p[:index2], p[index2:index2 + 1], p[index2 + 1:]
try:
from nt import _path_splitroot_ex
except ImportError:
def splitroot(p):
"""Split a pathname into drive, root and tail. The drive is defined
exactly as in splitdrive(). On Windows, the root may be a single path
separator or an empty string. The tail contains anything after the root.
For example:
splitroot('//server/share/') == ('//server/share', '/', '')
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
"""
p = os.fspath(p)
if isinstance(p, bytes):
sep = b'\\'
altsep = b'/'
colon = b':'
unc_prefix = b'\\\\?\\UNC\\'
empty = b''
else:
# Relative path with root, e.g. \Windows
return empty, p[:1], p[1:]
elif normp[1:2] == colon:
if normp[2:3] == sep:
# Absolute drive-letter path, e.g. X:\Windows
return p[:2], p[2:3], p[3:]
sep = '\\'
altsep = '/'
colon = ':'
unc_prefix = '\\\\?\\UNC\\'
empty = ''
normp = p.replace(altsep, sep)
if normp[:1] == sep:
if normp[1:2] == sep:
# UNC drives, e.g. \\server\share or \\?\UNC\server\share
# Device drives, e.g. \\.\device or \\?\device
start = 8 if normp[:8].upper() == unc_prefix else 2
index = normp.find(sep, start)
if index == -1:
return p, empty, empty
index2 = normp.find(sep, index + 1)
if index2 == -1:
return p, empty, empty
return p[:index2], p[index2:index2 + 1], p[index2 + 1:]
else:
# Relative path with root, e.g. \Windows
return empty, p[:1], p[1:]
elif normp[1:2] == colon:
if normp[2:3] == sep:
# Absolute drive-letter path, e.g. X:\Windows
return p[:2], p[2:3], p[3:]
else:
# Relative path with drive, e.g. X:Windows
return p[:2], empty, p[2:]
else:
# Relative path with drive, e.g. X:Windows
return p[:2], empty, p[2:]
else:
# Relative path, e.g. Windows
return empty, empty, p
# Relative path, e.g. Windows
return empty, empty, p
else:
def splitroot(p):
"""Split a pathname into drive, root and tail. The drive is defined
exactly as in splitdrive(). On Windows, the root may be a single path
separator or an empty string. The tail contains anything after the root.
For example:
splitroot('//server/share/') == ('//server/share', '/', '')
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
"""
p = os.fspath(p)
if isinstance(p, bytes):
drive, root, tail = _path_splitroot_ex(os.fsdecode(p))
return os.fsencode(drive), os.fsencode(root), os.fsencode(tail)
return _path_splitroot_ex(p)


# Split a path in head (everything up to the last '/') and tail (the
Expand Down
74 changes: 47 additions & 27 deletions Lib/posixpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,33 +134,53 @@ def splitdrive(p):
return p[:0], p


def splitroot(p):
"""Split a pathname into drive, root and tail. On Posix, drive is always
empty; the root may be empty, a single slash, or two slashes. The tail
contains anything after the root. For example:
splitroot('foo/bar') == ('', '', 'foo/bar')
splitroot('/foo/bar') == ('', '/', 'foo/bar')
splitroot('//foo/bar') == ('', '//', 'foo/bar')
splitroot('///foo/bar') == ('', '/', '//foo/bar')
"""
p = os.fspath(p)
if isinstance(p, bytes):
sep = b'/'
empty = b''
else:
sep = '/'
empty = ''
if p[:1] != sep:
# Relative path, e.g.: 'foo'
return empty, empty, p
elif p[1:2] != sep or p[2:3] == sep:
# Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
return empty, sep, p[1:]
else:
# Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
return empty, p[:2], p[2:]
try:
from posix import _path_splitroot_ex
except ImportError:
def splitroot(p):
"""Split a pathname into drive, root and tail. On Posix, drive is always
empty; the root may be empty, a single slash, or two slashes. The tail
contains anything after the root. For example:
splitroot('foo/bar') == ('', '', 'foo/bar')
splitroot('/foo/bar') == ('', '/', 'foo/bar')
splitroot('//foo/bar') == ('', '//', 'foo/bar')
splitroot('///foo/bar') == ('', '/', '//foo/bar')
"""
p = os.fspath(p)
if isinstance(p, bytes):
sep = b'/'
empty = b''
else:
sep = '/'
empty = ''
if p[:1] != sep:
# Relative path, e.g.: 'foo'
return empty, empty, p
elif p[1:2] != sep or p[2:3] == sep:
# Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
return empty, sep, p[1:]
else:
# Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
return empty, p[:2], p[2:]
else:
def splitroot(p):
"""Split a pathname into drive, root and tail. On Posix, drive is always
empty; the root may be empty, a single slash, or two slashes. The tail
contains anything after the root. For example:
splitroot('foo/bar') == ('', '', 'foo/bar')
splitroot('/foo/bar') == ('', '/', 'foo/bar')
splitroot('//foo/bar') == ('', '//', 'foo/bar')
splitroot('///foo/bar') == ('', '/', '//foo/bar')
"""
p = os.fspath(p)
if isinstance(p, bytes):
# Optimisation: the drive is always empty
_, root, tail = _path_splitroot_ex(os.fsdecode(p))
return b'', os.fsencode(root), os.fsencode(tail)
return _path_splitroot_ex(p)


# Return the tail (basename) part of a path, same as split(path)[1].
Expand Down
1 change: 1 addition & 0 deletions Lib/test/test_ntpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ def test_normpath(self):
tester("ntpath.normpath('\\\\foo\\')", '\\\\foo\\')
tester("ntpath.normpath('\\\\foo')", '\\\\foo')
tester("ntpath.normpath('\\\\')", '\\\\')
tester("ntpath.normpath('//?/UNC/server/share/..')", '\\\\?\\UNC\\server\\share\\')

def test_realpath_curdir(self):
expected = ntpath.normpath(os.getcwd())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Speed up :func:`os.path.splitroot` with a native implementation.
60 changes: 59 additions & 1 deletion Modules/clinic/posixmodule.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 44 additions & 0 deletions Modules/posixmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -5467,6 +5467,49 @@ os__path_islink_impl(PyObject *module, PyObject *path)
#endif /* MS_WINDOWS */


/*[clinic input]
os._path_splitroot_ex
path: unicode
[clinic start generated code]*/

static PyObject *
os__path_splitroot_ex_impl(PyObject *module, PyObject *path)
/*[clinic end generated code: output=de97403d3dfebc40 input=f1470e12d899f9ac]*/
{
Py_ssize_t len, drvsize, rootsize;
PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL;

wchar_t *buffer = PyUnicode_AsWideCharString(path, &len);
if (!buffer) {
goto exit;
}

_Py_skiproot(buffer, len, &drvsize, &rootsize);
drv = PyUnicode_FromWideChar(buffer, drvsize);
if (drv == NULL) {
goto exit;
}
root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize);
if (root == NULL) {
goto exit;
}
tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize],
len - drvsize - rootsize);
if (tail == NULL) {
goto exit;
}
result = Py_BuildValue("(OOO)", drv, root, tail);
exit:
PyMem_Free(buffer);
Py_XDECREF(drv);
Py_XDECREF(root);
Py_XDECREF(tail);
return result;
}


/*[clinic input]
os._path_normpath
Expand Down Expand Up @@ -16799,6 +16842,7 @@ static PyMethodDef posix_methods[] = {
OS__FINDFIRSTFILE_METHODDEF
OS__GETVOLUMEPATHNAME_METHODDEF
OS__PATH_SPLITROOT_METHODDEF
OS__PATH_SPLITROOT_EX_METHODDEF
OS__PATH_NORMPATH_METHODDEF
OS_GETLOADAVG_METHODDEF
OS_URANDOM_METHODDEF
Expand Down
Loading

0 comments on commit 10bb90e

Please sign in to comment.