From 40d178d86d6c09e797deb170b66ccb399bb96475 Mon Sep 17 00:00:00 2001 From: Simon Haegler Date: Thu, 2 Feb 2023 17:10:42 +0100 Subject: [PATCH 01/11] git ignore CLion project files --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index af6e02bde2..e3d19e7c2f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ .p4* .DS_Store .AppleDouble - +.idea \ No newline at end of file From 5c2b550a2a1b5f8f0e1bf232bbd269afede8ca07 Mon Sep 17 00:00:00 2001 From: Simon Haegler Date: Thu, 26 Jan 2023 17:46:01 +0100 Subject: [PATCH 02/11] Arch: long path handling for Win32 I/O API calls --- pxr/base/arch/fileSystem.cpp | 131 +++++++++++++++++++++++++++-------- pxr/base/arch/fileSystem.h | 21 ++++-- 2 files changed, 116 insertions(+), 36 deletions(-) diff --git a/pxr/base/arch/fileSystem.cpp b/pxr/base/arch/fileSystem.cpp index 50ce636cb3..4d691dac89 100644 --- a/pxr/base/arch/fileSystem.cpp +++ b/pxr/base/arch/fileSystem.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -48,10 +49,17 @@ using std::set; #if defined (ARCH_OS_WINDOWS) namespace { + +const std::string LONG_PATH_PREFIX = "\\\\?\\"; +const std::wstring LONG_PATH_PREFIX_W = L"\\\\?\\"; +const std::string UNC_LONG_PATH_PREFIX = LONG_PATH_PREFIX + "UNC\\"; +const std::wstring UNC_LONG_PATH_PREFIX_W = LONG_PATH_PREFIX_W + L"UNC\\"; + static inline HANDLE _FileToWinHANDLE(FILE *file) { return reinterpret_cast(_get_osfhandle(_fileno(file))); } + } #endif // ARCH_OS_WINDOWS @@ -94,9 +102,11 @@ FILE* ArchOpenFile(char const* fileName, char const* mode) return nullptr; } + const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(fileName)); + // Call CreateFileW. HANDLE hfile = CreateFileW( - ArchWindowsUtf8ToUtf16(fileName).c_str(), + apiPath.c_str(), desiredAccess, shareMode, /* securityAttributes=*/nullptr, @@ -132,10 +142,20 @@ FILE* ArchOpenFile(char const* fileName, char const* mode) #endif } +int ArchUnlinkFile(const char* path) { +#if defined(ARCH_OS_WINDOWS) + const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); + return _wunlink(apiPath.c_str()); +#else + return unlink(path); +#endif +} + #if defined(ARCH_OS_WINDOWS) int ArchRmDir(const char* path) { - return RemoveDirectoryW(ArchWindowsUtf8ToUtf16(path).c_str()) ? 0 : -1; + const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); + return RemoveDirectoryW(apiPath.c_str()) ? 0 : -1; } #endif @@ -165,7 +185,8 @@ ArchGetModificationTime(const char* pathname, double* time) { ArchStatType st; #if defined(ARCH_OS_WINDOWS) - if (_wstat64(ArchWindowsUtf8ToUtf16(pathname).c_str(), &st) == 0) + const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(pathname)); + if (_wstat64(apiPath.c_str(), &st) == 0) #else if (stat(pathname, &st) == 0) #endif @@ -388,12 +409,20 @@ ArchAbsPath(const string& path) } #if defined(ARCH_OS_WINDOWS) - // @TODO support 32,767 long paths on windows by prepending "\\?\" to the // path - wchar_t buffer[ARCH_PATH_MAX]; - if (GetFullPathNameW(ArchWindowsUtf8ToUtf16(path).c_str(), - ARCH_PATH_MAX, buffer, nullptr)) { - return ArchWindowsUtf16ToUtf8(buffer); + const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); + std::vector buffer(ARCH_PATH_MAX, 0); + auto requiredBufferSize = GetFullPathNameW(apiPath.c_str(), buffer.size(), buffer.data(), nullptr); + if (requiredBufferSize > buffer.size()) { + buffer.resize(requiredBufferSize, 0); + requiredBufferSize = GetFullPathNameW(apiPath.c_str(), buffer.size(), buffer.data(), nullptr); + } + if (requiredBufferSize > 0) { + std::string result = ArchWindowsUtf16ToUtf8(buffer.data()); + if (result.find(LONG_PATH_PREFIX) == 0) + return result.substr(LONG_PATH_PREFIX.size()); + else + return result; // implicit conversion through std::wstring } else { return path; @@ -418,7 +447,8 @@ ArchGetStatMode(const char *pathname, int *mode) { ArchStatType st; #if defined(ARCH_OS_WINDOWS) - if (__stat64(pathname, &st) == 0) { + const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(pathname)); + if (_wstat64(apiPath.c_str(), &st) == 0) { #else if (stat(pathname, &st) == 0) { #endif @@ -496,10 +526,11 @@ ArchGetFileLength(const char* fileName) #elif defined (ARCH_OS_WINDOWS) // Open a handle with 0 as the desired access and full sharing. // This opens the file even if exclusively locked. + const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(fileName)); HANDLE handle = - CreateFileW(ArchWindowsUtf8ToUtf16(fileName).c_str(), 0, + CreateFileW(apiPath.c_str(), 0, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, - nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); + nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); if (handle) { const auto result = _GetFileLength(handle); CloseHandle(handle); @@ -626,21 +657,27 @@ MakeUnique( #endif +namespace { + +constexpr const char* TMPDIR_FMT = "%s" ARCH_PATH_SEP "%s.XXXXXX"; + +} // namespace + int ArchMakeTmpFile(const std::string& tmpdir, const std::string& prefix, std::string* pathname) { // Format the template. - std::string sTemplate = - ArchStringPrintf("%s/%s.XXXXXX", tmpdir.c_str(), prefix.c_str()); + std::string sTemplate = ArchStringPrintf(TMPDIR_FMT, tmpdir.c_str(), prefix.c_str()); #if defined(ARCH_OS_WINDOWS) int fd = -1; auto cTemplate = MakeUnique(sTemplate, [&fd](const char* name){ - _wsopen_s(&fd, ArchWindowsUtf8ToUtf16(name).c_str(), - _O_CREAT | _O_EXCL | _O_RDWR | _O_BINARY, - _SH_DENYNO, _S_IREAD | _S_IWRITE); + const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(name)); + _wsopen_s(&fd, apiPath.c_str(), + _O_CREAT | _O_EXCL | _O_RDWR | _O_BINARY, + _SH_DENYNO, _S_IREAD | _S_IWRITE); return fd != -1; }); #else @@ -676,13 +713,13 @@ ArchMakeTmpSubdir(const std::string& tmpdir, // Format the template. std::string sTemplate = - ArchStringPrintf("%s/%s.XXXXXX", tmpdir.c_str(), prefix.c_str()); + ArchStringPrintf(TMPDIR_FMT, tmpdir.c_str(), prefix.c_str()); #if defined(ARCH_OS_WINDOWS) retstr = MakeUnique(sTemplate, [](const char* name){ - return CreateDirectoryW( - ArchWindowsUtf8ToUtf16(name).c_str(), NULL) != FALSE; + const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(name)); + return (CreateDirectoryW(apiPath.c_str(), NULL) != 0); }); #else // Copy template to a writable buffer. @@ -710,11 +747,15 @@ void Arch_InitTmpDir() { #if defined(ARCH_OS_WINDOWS) - wchar_t tmpPath[MAX_PATH]; + std::vector tmpPath(ARCH_PATH_MAX, 0); // On Windows, let GetTempPath use the standard env vars, not our own. - int sizeOfPath = GetTempPathW(MAX_PATH - 1, tmpPath); - if (sizeOfPath > MAX_PATH || sizeOfPath == 0) { + int sizeOfPath = GetTempPathW(tmpPath.size() - 1, tmpPath.data()); + if (sizeOfPath > tmpPath.size()) { + tmpPath.resize(sizeOfPath, 0); + sizeOfPath = GetTempPathW(tmpPath.size() - 1, tmpPath.data()); + } + if (sizeOfPath > tmpPath.size() || sizeOfPath == 0) { ARCH_ERROR("Call to GetTempPath failed."); _TmpDir = "."; return; @@ -722,7 +763,7 @@ Arch_InitTmpDir() // Strip the trailing slash tmpPath[sizeOfPath-1] = 0; - _TmpDir = _strdup(ArchWindowsUtf16ToUtf8(tmpPath).c_str()); + _TmpDir = _strdup(ArchWindowsUtf16ToUtf8(tmpPath.data()).c_str()); #else const std::string tmpdir = ArchGetEnv("TMPDIR"); if (!tmpdir.empty()) { @@ -1085,9 +1126,9 @@ static int Arch_FileAccessError() int ArchFileAccess(const char* path, int mode) { // Simple existence check is handled specially. - std::wstring wpath{ ArchWindowsUtf8ToUtf16(path) }; + const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); if (mode == F_OK) { - return (GetFileAttributesW(wpath.c_str()) != INVALID_FILE_ATTRIBUTES) + return (GetFileAttributesW(apiPath.c_str()) != INVALID_FILE_ATTRIBUTES) ? 0 : Arch_FileAccessError(); } @@ -1097,7 +1138,7 @@ int ArchFileAccess(const char* path, int mode) // Get the SECURITY_DESCRIPTOR size. DWORD length = 0; - if (!GetFileSecurityW(wpath.c_str(), securityInfo, NULL, 0, &length)) { + if (!GetFileSecurityW(apiPath.c_str(), securityInfo, NULL, 0, &length)) { if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) { return Arch_FileAccessError(); } @@ -1107,7 +1148,7 @@ int ArchFileAccess(const char* path, int mode) std::unique_ptr buffer(new unsigned char[length]); PSECURITY_DESCRIPTOR security = (PSECURITY_DESCRIPTOR)buffer.get(); if (!GetFileSecurityW( - wpath.c_str(), securityInfo, security, length, &length)) { + apiPath.c_str(), securityInfo, security, length, &length)) { return Arch_FileAccessError(); } @@ -1199,9 +1240,10 @@ typedef struct _REPARSE_DATA_BUFFER { std::string ArchReadLink(const char* path) { + const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); HANDLE handle = ::CreateFileW( - ArchWindowsUtf8ToUtf16(path).c_str(), GENERIC_READ, FILE_SHARE_READ, - NULL, OPEN_EXISTING, + apiPath.c_str(), GENERIC_READ, FILE_SHARE_READ, + NULL, OPEN_EXISTING, FILE_FLAG_OPEN_REPARSE_POINT | FILE_FLAG_BACKUP_SEMANTICS, NULL); @@ -1365,4 +1407,35 @@ void ArchFileAdvise( #endif } +#if defined(ARCH_OS_WINDOWS) + +ARCH_API +std::wstring ArchHandleLongWindowsPaths(const std::wstring& path) +{ + // Subtracting 12 (8.3) so this function also works for CreateDirectoryW: + // https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectoryw + // ARCH_PATH_MAX counts the null terminator as well + if (path.size() >= ARCH_PATH_MAX - 12 - 1) { + std::wstring longPath = path; + + // the \\?\ prefix requires strict backslash separators: + // see https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation + std::replace(std::begin(longPath), std::end(longPath), L'/', L'\\'); + + // prevent duplicate prefixing + if (longPath.find(LONG_PATH_PREFIX_W) != 0) { + // if it still starts with two backslashes, it is a network path + if (longPath[0] == L'\\' && longPath[1] == L'\\') { + return UNC_LONG_PATH_PREFIX_W + longPath.substr(2); + } else { + return LONG_PATH_PREFIX_W + longPath; + } + } + } + + return path; +} + +#endif + PXR_NAMESPACE_CLOSE_SCOPE diff --git a/pxr/base/arch/fileSystem.h b/pxr/base/arch/fileSystem.h index 432d6d7063..702fc80d6b 100644 --- a/pxr/base/arch/fileSystem.h +++ b/pxr/base/arch/fileSystem.h @@ -128,11 +128,10 @@ ArchOpenFile(char const* fileName, char const* mode); # define ArchCloseFile(fd) close(fd) #endif -#if defined(ARCH_OS_WINDOWS) -# define ArchUnlinkFile(path) _unlink(path) -#else -# define ArchUnlinkFile(path) unlink(path) -#endif +/// Deletes a file. +/// +/// Returns 0 on success, or -1 otherwise. +ARCH_API int ArchUnlinkFile(const char* path); #if defined(ARCH_OS_WINDOWS) ARCH_API int ArchFileAccess(const char* path, int mode); @@ -158,6 +157,9 @@ ArchOpenFile(char const* fileName, char const* mode); # define ArchFileIsaTTY(stream) isatty(stream) #endif +/// Delete an empty directory +/// +/// Returns 0 on success, or -1 otherwise. #if defined(ARCH_OS_WINDOWS) ARCH_API int ArchRmDir(const char* path); #else @@ -274,10 +276,10 @@ ARCH_API int ArchMakeTmpFile(const std::string& tmpdir, const std::string& prefix, std::string* pathname = 0); -/// Create a temporary sub-direcrory, in a given temporary directory. +/// Create a temporary sub-directory, in a given temporary directory. /// /// The result returned has the form TMPDIR/prefix.XXXXXX/ where TMPDIR is the -/// given temporary directory and XXXXXX is a unique suffix. Returns the the +/// given temporary directory and XXXXXX is a unique suffix. Returns the /// full path to the subdir in pathname. Returns empty string on failure and /// errno is set. /// @@ -443,6 +445,11 @@ inline std::wstring ArchWindowsUtf8ToUtf16(const std::string &str) return wstr; } +/// Expects an UTF-16 path and prepends the Windows long path prefix if necessary. +/// see https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=registry +ARCH_API +std::wstring ArchHandleLongWindowsPaths(const std::wstring& path); + #endif ///@} From 2070972c10a040565dfb10825abad3a9a42bdd77 Mon Sep 17 00:00:00 2001 From: Simon Haegler Date: Fri, 27 Jan 2023 13:40:31 +0100 Subject: [PATCH 03/11] Arch: add tests for Windows long path handling --- pxr/base/arch/testenv/testFileSystem.cpp | 80 ++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/pxr/base/arch/testenv/testFileSystem.cpp b/pxr/base/arch/testenv/testFileSystem.cpp index bbd790699d..7931914f56 100644 --- a/pxr/base/arch/testenv/testFileSystem.cpp +++ b/pxr/base/arch/testenv/testFileSystem.cpp @@ -14,6 +14,7 @@ #include #include #include +#include PXR_NAMESPACE_USING_DIRECTIVE @@ -63,6 +64,7 @@ _AbsPathFilter(const std::string& path) return path; #endif } + } static bool @@ -73,9 +75,83 @@ TestArchAbsPath() ARCH_AXIOM(_AbsPathFilter(ArchAbsPath("/foo/bar")) == "/foo/bar"); ARCH_AXIOM(_AbsPathFilter(ArchAbsPath("/foo/bar/../baz")) == "/foo/baz"); + ARCH_AXIOM(_AbsPathFilter(ArchAbsPath("/foo/bar/../baz")) == "/foo/baz"); + + return true; +} + +#ifdef ARCH_OS_WINDOWS + +namespace { + +std::string _CreateLongWindowsPath(bool dir, bool dotted) { + std::string p = ArchGetTmpDir(); + for (size_t i = 0; i < 15; ++i) + p += "\\abcdefghijklmnopqrs"; + if (dotted) + p += "\\.\\..\\abcdefghijklmnopqrs"; + if (!dir) + p += "\\foo.bar"; + ARCH_AXIOM(p.size() > ARCH_PATH_MAX); + return p; +} + +std::string _CreatePhysicalLongPathDirectory() { + std::string tmpDir(ArchGetTmpDir()); + std::string prefix(150, 'a'); + tmpDir = ArchMakeTmpSubdir(tmpDir, prefix); + prefix.assign(150, 'b'); + tmpDir = ArchMakeTmpSubdir(tmpDir, prefix); + ARCH_AXIOM(tmpDir.size() > ARCH_PATH_MAX); + return tmpDir; +} + +} // namespace + +static bool TestLongPaths() +{ + const std::string longFilePathDotted = _CreateLongWindowsPath(false, true); + const std::string longFilePath = _CreateLongWindowsPath(false, false); + const std::string longFilePathForwardSlash = [&longFilePath]() { + std::string t = longFilePath; + std::replace(t.begin(), t.end(), '\\', '/'); + return t; + }(); + + { + const std::string actual = ArchNormPath(longFilePathDotted, false); + const std::string expected = longFilePathForwardSlash; + ARCH_AXIOM(actual == expected); + } + { + const std::string actual = ArchAbsPath(longFilePathDotted); + const std::string expected = longFilePath; + ARCH_AXIOM(actual == expected); + } + { + std::string longTmpDir = _CreatePhysicalLongPathDirectory(); + const std::string longTmpFilePath = longTmpDir + '\\' + "foo.bar"; + + FILE *file; + ARCH_AXIOM((file = ArchOpenFile(longTmpFilePath.c_str(), "wb")) != NULL); + std::fprintf(file, "%s", "hello"); + fclose(file); // TODO: fd arg of ArchCloseFile is not symmetrical?? + ARCH_AXIOM(ArchUnlinkFile(longTmpFilePath.c_str()) == 0); + ARCH_AXIOM(ArchRmDir(longTmpDir.c_str()) == 0); + } + + // ArchMakeTmpFile + // ArchFileAccess + // ArchReadLink + // ArchGetModificationTime + // ArchGetStatMode + // ArchGetFileLength + return true; } +#endif + int main() { std::string firstName = ArchMakeTmpFileName("archFS"); @@ -137,5 +213,9 @@ int main() TestArchNormPath(); TestArchAbsPath(); +#ifdef ARCH_OS_WINDOWS + TestLongPaths(); +#endif + return 0; } From bd686e18a887ff6aed8f432d4f8ce81f117f8182 Mon Sep 17 00:00:00 2001 From: Simon Haegler Date: Mon, 6 Feb 2023 19:28:26 +0100 Subject: [PATCH 04/11] Arch/Tf: introduce ArchTouchFile and forward from TfTouchFile --- pxr/base/arch/fileSystem.cpp | 47 ++++++++++++++++++++++++++++++++++++ pxr/base/arch/fileSystem.h | 5 ++++ pxr/base/tf/fileUtils.cpp | 40 +----------------------------- 3 files changed, 53 insertions(+), 39 deletions(-) diff --git a/pxr/base/arch/fileSystem.cpp b/pxr/base/arch/fileSystem.cpp index 4d691dac89..1e898da224 100644 --- a/pxr/base/arch/fileSystem.cpp +++ b/pxr/base/arch/fileSystem.cpp @@ -32,13 +32,16 @@ #include #include #include +#include #include #include #else #include #include #include +#include #include +#include #endif PXR_NAMESPACE_OPEN_SCOPE @@ -142,6 +145,50 @@ FILE* ArchOpenFile(char const* fileName, char const* mode) #endif } +bool ArchTouchFile(const std::string& fileName, bool create) { +#if defined(ARCH_OS_WINDOWS) + const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(fileName)); +#endif + + if (create) { +#if !defined(ARCH_OS_WINDOWS) + // Attempt to create the file so it is readable and writable by user, + // group and other. + int fd = open(fileName.c_str(), + O_WRONLY | O_CREAT | O_NONBLOCK | O_NOCTTY, + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); + if (fd == -1) + return false; + close(fd); +#else + HANDLE fileHandle = + ::CreateFileW(apiPath.c_str(), + GENERIC_WRITE, // open for write + 0, // not for sharing + NULL, // default security + OPEN_ALWAYS, // opens existing + FILE_ATTRIBUTE_NORMAL, //normal file + NULL); // no template + + if (fileHandle == INVALID_HANDLE_VALUE) { + return false; + } + + // Close the file + ::CloseHandle(fileHandle); +#endif + } + + // Passing NULL to the 'times' argument sets both the atime and mtime to + // the current time, with millisecond precision. +#if defined(ARCH_OS_WINDOWS) + return _wutime(apiPath.c_str(), /* times */ NULL) == 0; +#else + return utimes(fileName.c_str(), /* times */ NULL) == 0; +#endif + +} + int ArchUnlinkFile(const char* path) { #if defined(ARCH_OS_WINDOWS) const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); diff --git a/pxr/base/arch/fileSystem.h b/pxr/base/arch/fileSystem.h index 702fc80d6b..7cfafd882a 100644 --- a/pxr/base/arch/fileSystem.h +++ b/pxr/base/arch/fileSystem.h @@ -128,6 +128,11 @@ ArchOpenFile(char const* fileName, char const* mode); # define ArchCloseFile(fd) close(fd) #endif +/// Touch a file. +/// +/// Returns true upon success, false otherwise. +ARCH_API bool ArchTouchFile(const std::string& fileName, bool create); + /// Deletes a file. /// /// Returns 0 on success, or -1 otherwise. diff --git a/pxr/base/tf/fileUtils.cpp b/pxr/base/tf/fileUtils.cpp index d4b10e4c2a..79a71bd9cb 100644 --- a/pxr/base/tf/fileUtils.cpp +++ b/pxr/base/tf/fileUtils.cpp @@ -30,14 +30,11 @@ #if !defined(ARCH_OS_WINDOWS) #include #include -#include #include -#include #else #include #include #include -#include #endif using std::set; using std::string; @@ -642,42 +639,7 @@ TfListDir(string const& path, bool recursive) TF_API bool TfTouchFile(string const &fileName, bool create) { - if (create) { -#if !defined(ARCH_OS_WINDOWS) - // Attempt to create the file so it is readable and writable by user, - // group and other. - int fd = open(fileName.c_str(), - O_WRONLY | O_CREAT | O_NONBLOCK | O_NOCTTY, - S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); - if (fd == -1) - return false; - close(fd); -#else - HANDLE fileHandle = - ::CreateFileW(ArchWindowsUtf8ToUtf16(fileName).c_str(), - GENERIC_WRITE, // open for write - 0, // not for sharing - NULL, // default security - OPEN_ALWAYS, // opens existing - FILE_ATTRIBUTE_NORMAL, //normal file - NULL); // no template - - if (fileHandle == INVALID_HANDLE_VALUE) { - return false; - } - - // Close the file - ::CloseHandle(fileHandle); -#endif - } - - // Passing NULL to the 'times' argument sets both the atime and mtime to - // the current time, with millisecond precision. -#if defined(ARCH_OS_WINDOWS) - return _utime(fileName.c_str(), /* times */ NULL) == 0; -#else - return utimes(fileName.c_str(), /* times */ NULL) == 0; -#endif + return ArchTouchFile(fileName, create); } PXR_NAMESPACE_CLOSE_SCOPE From a1a4d276f118c4e4ee47bc06b1b734ba4802f247 Mon Sep 17 00:00:00 2001 From: Simon Haegler Date: Mon, 6 Feb 2023 16:55:47 +0100 Subject: [PATCH 05/11] Arch: hide ArchHandleLongWindowsPaths --- pxr/base/arch/fileSystem.cpp | 83 +++++++++++++++++------------------- pxr/base/arch/fileSystem.h | 5 --- 2 files changed, 40 insertions(+), 48 deletions(-) diff --git a/pxr/base/arch/fileSystem.cpp b/pxr/base/arch/fileSystem.cpp index 1e898da224..20edc8a04e 100644 --- a/pxr/base/arch/fileSystem.cpp +++ b/pxr/base/arch/fileSystem.cpp @@ -63,6 +63,34 @@ static inline HANDLE _FileToWinHANDLE(FILE *file) return reinterpret_cast(_get_osfhandle(_fileno(file))); } +/// Expects an UTF-16 path and prepends the Windows long path prefix if necessary. +/// see https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=registry +std::wstring _ArchHandleLongWindowsPaths(const std::wstring& path) +{ + // Subtracting 12 (8.3) so this function also works for CreateDirectoryW: + // https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectoryw + // ARCH_PATH_MAX counts the null terminator as well + if (path.size() >= ARCH_PATH_MAX - 12 - 1) { + std::wstring longPath = path; + + // the \\?\ prefix requires strict backslash separators: + // see https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation + std::replace(std::begin(longPath), std::end(longPath), L'/', L'\\'); + + // prevent duplicate prefixing + if (longPath.find(LONG_PATH_PREFIX_W) != 0) { + // if it still starts with two backslashes, it is a network path + if (longPath[0] == L'\\' && longPath[1] == L'\\') { + return UNC_LONG_PATH_PREFIX_W + longPath.substr(2); + } else { + return LONG_PATH_PREFIX_W + longPath; + } + } + } + + return path; +} + } #endif // ARCH_OS_WINDOWS @@ -105,7 +133,7 @@ FILE* ArchOpenFile(char const* fileName, char const* mode) return nullptr; } - const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(fileName)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(fileName)); // Call CreateFileW. HANDLE hfile = CreateFileW( @@ -147,7 +175,7 @@ FILE* ArchOpenFile(char const* fileName, char const* mode) bool ArchTouchFile(const std::string& fileName, bool create) { #if defined(ARCH_OS_WINDOWS) - const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(fileName)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(fileName)); #endif if (create) { @@ -191,7 +219,7 @@ bool ArchTouchFile(const std::string& fileName, bool create) { int ArchUnlinkFile(const char* path) { #if defined(ARCH_OS_WINDOWS) - const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); return _wunlink(apiPath.c_str()); #else return unlink(path); @@ -201,7 +229,7 @@ int ArchUnlinkFile(const char* path) { #if defined(ARCH_OS_WINDOWS) int ArchRmDir(const char* path) { - const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); return RemoveDirectoryW(apiPath.c_str()) ? 0 : -1; } #endif @@ -232,7 +260,7 @@ ArchGetModificationTime(const char* pathname, double* time) { ArchStatType st; #if defined(ARCH_OS_WINDOWS) - const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(pathname)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(pathname)); if (_wstat64(apiPath.c_str(), &st) == 0) #else if (stat(pathname, &st) == 0) @@ -457,7 +485,7 @@ ArchAbsPath(const string& path) #if defined(ARCH_OS_WINDOWS) // path - const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); std::vector buffer(ARCH_PATH_MAX, 0); auto requiredBufferSize = GetFullPathNameW(apiPath.c_str(), buffer.size(), buffer.data(), nullptr); if (requiredBufferSize > buffer.size()) { @@ -494,7 +522,7 @@ ArchGetStatMode(const char *pathname, int *mode) { ArchStatType st; #if defined(ARCH_OS_WINDOWS) - const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(pathname)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(pathname)); if (_wstat64(apiPath.c_str(), &st) == 0) { #else if (stat(pathname, &st) == 0) { @@ -573,7 +601,7 @@ ArchGetFileLength(const char* fileName) #elif defined (ARCH_OS_WINDOWS) // Open a handle with 0 as the desired access and full sharing. // This opens the file even if exclusively locked. - const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(fileName)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(fileName)); HANDLE handle = CreateFileW(apiPath.c_str(), 0, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, @@ -721,7 +749,7 @@ ArchMakeTmpFile(const std::string& tmpdir, int fd = -1; auto cTemplate = MakeUnique(sTemplate, [&fd](const char* name){ - const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(name)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(name)); _wsopen_s(&fd, apiPath.c_str(), _O_CREAT | _O_EXCL | _O_RDWR | _O_BINARY, _SH_DENYNO, _S_IREAD | _S_IWRITE); @@ -765,7 +793,7 @@ ArchMakeTmpSubdir(const std::string& tmpdir, #if defined(ARCH_OS_WINDOWS) retstr = MakeUnique(sTemplate, [](const char* name){ - const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(name)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(name)); return (CreateDirectoryW(apiPath.c_str(), NULL) != 0); }); #else @@ -1173,7 +1201,7 @@ static int Arch_FileAccessError() int ArchFileAccess(const char* path, int mode) { // Simple existence check is handled specially. - const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); if (mode == F_OK) { return (GetFileAttributesW(apiPath.c_str()) != INVALID_FILE_ATTRIBUTES) ? 0 : Arch_FileAccessError(); @@ -1287,7 +1315,7 @@ typedef struct _REPARSE_DATA_BUFFER { std::string ArchReadLink(const char* path) { - const std::wstring apiPath = ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); HANDLE handle = ::CreateFileW( apiPath.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, @@ -1454,35 +1482,4 @@ void ArchFileAdvise( #endif } -#if defined(ARCH_OS_WINDOWS) - -ARCH_API -std::wstring ArchHandleLongWindowsPaths(const std::wstring& path) -{ - // Subtracting 12 (8.3) so this function also works for CreateDirectoryW: - // https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectoryw - // ARCH_PATH_MAX counts the null terminator as well - if (path.size() >= ARCH_PATH_MAX - 12 - 1) { - std::wstring longPath = path; - - // the \\?\ prefix requires strict backslash separators: - // see https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation - std::replace(std::begin(longPath), std::end(longPath), L'/', L'\\'); - - // prevent duplicate prefixing - if (longPath.find(LONG_PATH_PREFIX_W) != 0) { - // if it still starts with two backslashes, it is a network path - if (longPath[0] == L'\\' && longPath[1] == L'\\') { - return UNC_LONG_PATH_PREFIX_W + longPath.substr(2); - } else { - return LONG_PATH_PREFIX_W + longPath; - } - } - } - - return path; -} - -#endif - PXR_NAMESPACE_CLOSE_SCOPE diff --git a/pxr/base/arch/fileSystem.h b/pxr/base/arch/fileSystem.h index 7cfafd882a..b6e3194b76 100644 --- a/pxr/base/arch/fileSystem.h +++ b/pxr/base/arch/fileSystem.h @@ -450,11 +450,6 @@ inline std::wstring ArchWindowsUtf8ToUtf16(const std::string &str) return wstr; } -/// Expects an UTF-16 path and prepends the Windows long path prefix if necessary. -/// see https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=registry -ARCH_API -std::wstring ArchHandleLongWindowsPaths(const std::wstring& path); - #endif ///@} From 6958231b8a73a647e67cb5d44bb473f1181234d4 Mon Sep 17 00:00:00 2001 From: Simon Haegler Date: Mon, 6 Feb 2023 19:29:17 +0100 Subject: [PATCH 06/11] Arch: add commonly used overload for _ArchHandleLongWindowsPaths --- pxr/base/arch/fileSystem.cpp | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/pxr/base/arch/fileSystem.cpp b/pxr/base/arch/fileSystem.cpp index 20edc8a04e..caf4b0c145 100644 --- a/pxr/base/arch/fileSystem.cpp +++ b/pxr/base/arch/fileSystem.cpp @@ -91,6 +91,14 @@ std::wstring _ArchHandleLongWindowsPaths(const std::wstring& path) return path; } +/// convenience wrapper for the above +/// Expects a non-null UTF-8 string pointer. +std::wstring _ArchHandleLongWindowsPaths(const char* path) { + if (path == nullptr) + return {}; + return _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); +} + } #endif // ARCH_OS_WINDOWS @@ -133,7 +141,7 @@ FILE* ArchOpenFile(char const* fileName, char const* mode) return nullptr; } - const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(fileName)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(fileName); // Call CreateFileW. HANDLE hfile = CreateFileW( @@ -175,7 +183,7 @@ FILE* ArchOpenFile(char const* fileName, char const* mode) bool ArchTouchFile(const std::string& fileName, bool create) { #if defined(ARCH_OS_WINDOWS) - const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(fileName)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(fileName.c_str()); #endif if (create) { @@ -219,7 +227,7 @@ bool ArchTouchFile(const std::string& fileName, bool create) { int ArchUnlinkFile(const char* path) { #if defined(ARCH_OS_WINDOWS) - const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(path); return _wunlink(apiPath.c_str()); #else return unlink(path); @@ -229,7 +237,7 @@ int ArchUnlinkFile(const char* path) { #if defined(ARCH_OS_WINDOWS) int ArchRmDir(const char* path) { - const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(path); return RemoveDirectoryW(apiPath.c_str()) ? 0 : -1; } #endif @@ -260,7 +268,7 @@ ArchGetModificationTime(const char* pathname, double* time) { ArchStatType st; #if defined(ARCH_OS_WINDOWS) - const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(pathname)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(pathname); if (_wstat64(apiPath.c_str(), &st) == 0) #else if (stat(pathname, &st) == 0) @@ -485,7 +493,7 @@ ArchAbsPath(const string& path) #if defined(ARCH_OS_WINDOWS) // path - const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(path.c_str()); std::vector buffer(ARCH_PATH_MAX, 0); auto requiredBufferSize = GetFullPathNameW(apiPath.c_str(), buffer.size(), buffer.data(), nullptr); if (requiredBufferSize > buffer.size()) { @@ -522,7 +530,7 @@ ArchGetStatMode(const char *pathname, int *mode) { ArchStatType st; #if defined(ARCH_OS_WINDOWS) - const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(pathname)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(pathname); if (_wstat64(apiPath.c_str(), &st) == 0) { #else if (stat(pathname, &st) == 0) { @@ -601,7 +609,7 @@ ArchGetFileLength(const char* fileName) #elif defined (ARCH_OS_WINDOWS) // Open a handle with 0 as the desired access and full sharing. // This opens the file even if exclusively locked. - const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(fileName)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(fileName); HANDLE handle = CreateFileW(apiPath.c_str(), 0, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, @@ -749,7 +757,7 @@ ArchMakeTmpFile(const std::string& tmpdir, int fd = -1; auto cTemplate = MakeUnique(sTemplate, [&fd](const char* name){ - const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(name)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(name); _wsopen_s(&fd, apiPath.c_str(), _O_CREAT | _O_EXCL | _O_RDWR | _O_BINARY, _SH_DENYNO, _S_IREAD | _S_IWRITE); @@ -793,7 +801,7 @@ ArchMakeTmpSubdir(const std::string& tmpdir, #if defined(ARCH_OS_WINDOWS) retstr = MakeUnique(sTemplate, [](const char* name){ - const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(name)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(name); return (CreateDirectoryW(apiPath.c_str(), NULL) != 0); }); #else @@ -1201,7 +1209,7 @@ static int Arch_FileAccessError() int ArchFileAccess(const char* path, int mode) { // Simple existence check is handled specially. - const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(path); if (mode == F_OK) { return (GetFileAttributesW(apiPath.c_str()) != INVALID_FILE_ATTRIBUTES) ? 0 : Arch_FileAccessError(); @@ -1315,7 +1323,7 @@ typedef struct _REPARSE_DATA_BUFFER { std::string ArchReadLink(const char* path) { - const std::wstring apiPath = _ArchHandleLongWindowsPaths(ArchWindowsUtf8ToUtf16(path)); + const std::wstring apiPath = _ArchHandleLongWindowsPaths(path); HANDLE handle = ::CreateFileW( apiPath.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, From e085dc705f77529221f5d4f7b9965d988965d31f Mon Sep 17 00:00:00 2001 From: Simon Haegler Date: Mon, 6 Feb 2023 19:12:47 +0100 Subject: [PATCH 07/11] Arch: additional test coverage --- pxr/base/arch/testenv/testFileSystem.cpp | 57 +++++++++++++++++++----- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/pxr/base/arch/testenv/testFileSystem.cpp b/pxr/base/arch/testenv/testFileSystem.cpp index 7931914f56..518cfd85e0 100644 --- a/pxr/base/arch/testenv/testFileSystem.cpp +++ b/pxr/base/arch/testenv/testFileSystem.cpp @@ -96,16 +96,33 @@ std::string _CreateLongWindowsPath(bool dir, bool dotted) { return p; } +// slightly awkward way of creating and deleting a long path without +// having to introduce recursive deletion etc. std::string _CreatePhysicalLongPathDirectory() { + static const std::string tmpDirPart0("UsdArchTestLongPaths"); + static const std::string tmpDirPart1(150, 'a'); + static const std::string tmpDirPart2(150, 'b'); std::string tmpDir(ArchGetTmpDir()); - std::string prefix(150, 'a'); - tmpDir = ArchMakeTmpSubdir(tmpDir, prefix); - prefix.assign(150, 'b'); - tmpDir = ArchMakeTmpSubdir(tmpDir, prefix); + tmpDir = ArchMakeTmpSubdir(tmpDir, tmpDirPart0); + tmpDir = ArchMakeTmpSubdir(tmpDir, tmpDirPart1); + tmpDir = ArchMakeTmpSubdir(tmpDir, tmpDirPart2); ARCH_AXIOM(tmpDir.size() > ARCH_PATH_MAX); return tmpDir; } +// implying structure from above +void _RemoveLongPathDirectory(const std::string& longTmpDir) { + ARCH_AXIOM(ArchRmDir(longTmpDir.c_str()) == 0); + + std::string::size_type lastSep = longTmpDir.find_last_of('\\'); + ARCH_AXIOM(lastSep != std::string::npos); + ARCH_AXIOM(ArchRmDir(longTmpDir.substr(0, lastSep).c_str()) == 0); + + lastSep = longTmpDir.find_last_of('\\', lastSep-1); + ARCH_AXIOM(lastSep != std::string::npos); + ARCH_AXIOM(ArchRmDir(longTmpDir.substr(0, lastSep).c_str()) == 0); +} + } // namespace static bool TestLongPaths() @@ -135,17 +152,33 @@ static bool TestLongPaths() FILE *file; ARCH_AXIOM((file = ArchOpenFile(longTmpFilePath.c_str(), "wb")) != NULL); std::fprintf(file, "%s", "hello"); - fclose(file); // TODO: fd arg of ArchCloseFile is not symmetrical?? + fclose(file); + + ARCH_AXIOM(ArchFileAccess(longTmpFilePath.c_str(), W_OK) == 0); + ARCH_AXIOM(ArchGetFileLength(longTmpFilePath.c_str()) == 5); + + ARCH_AXIOM(ArchUnlinkFile(longTmpFilePath.c_str()) == 0); + _RemoveLongPathDirectory(longTmpDir); + } + { + std::string longTmpDir = _CreatePhysicalLongPathDirectory(); + const std::string longTmpFilePath = longTmpDir + '\\' + "foo.bar"; + ARCH_AXIOM(ArchTouchFile(longTmpFilePath, true)); ARCH_AXIOM(ArchUnlinkFile(longTmpFilePath.c_str()) == 0); - ARCH_AXIOM(ArchRmDir(longTmpDir.c_str()) == 0); + _RemoveLongPathDirectory(longTmpDir); } + { + std::string longTmpDir = _CreatePhysicalLongPathDirectory(); + + std::string longTmpFilePath; + int tmpFileHandle = ArchMakeTmpFile(longTmpDir, "foo", &longTmpFilePath); + ARCH_AXIOM(tmpFileHandle != -1); + ArchCloseFile(tmpFileHandle); + ARCH_AXIOM(longTmpFilePath.size() == (longTmpDir.size() + 1 + 3 + 7)); - // ArchMakeTmpFile - // ArchFileAccess - // ArchReadLink - // ArchGetModificationTime - // ArchGetStatMode - // ArchGetFileLength + ARCH_AXIOM(ArchUnlinkFile(longTmpFilePath.c_str()) == 0); + _RemoveLongPathDirectory(longTmpDir); + } return true; } From 862b9f245955238b8ce3cf4e6aebda66be75e0cd Mon Sep 17 00:00:00 2001 From: Simon Haegler Date: Sun, 2 Apr 2023 20:53:20 +0200 Subject: [PATCH 08/11] Arch: don't leak temp file in ArchPWrite and ArchPRead test --- pxr/base/arch/testenv/testFileSystem.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pxr/base/arch/testenv/testFileSystem.cpp b/pxr/base/arch/testenv/testFileSystem.cpp index 518cfd85e0..fd9bf65756 100644 --- a/pxr/base/arch/testenv/testFileSystem.cpp +++ b/pxr/base/arch/testenv/testFileSystem.cpp @@ -219,7 +219,7 @@ int main() mfm.get()[0] = 'T'; mfm.get()[2] = 's'; ARCH_AXIOM(memcmp("Test", mfm.get(), strlen("Test")) == 0); mfm.reset(); - ArchUnlinkFile(firstName.c_str()); + ARCH_AXIOM(ArchUnlinkFile(firstName.c_str()) == 0); // Test ArchPWrite and ArchPRead. int64_t len = strlen(testContent); @@ -235,6 +235,8 @@ int main() ARCH_AXIOM(ArchPRead(firstFile, buf2.get(), strlen("written in a"), 9/*index of 'written in a'*/) == strlen("written in a")); ARCH_AXIOM(memcmp("written in a", buf2.get(), strlen("written in a")) == 0); + fclose(firstFile); + ARCH_AXIOM(ArchUnlinkFile(firstName.c_str()) == 0); // create and remove a tmp subdir std::string retpath; From 13929e8ea8bce3d8840100fae03961c036e4e4e2 Mon Sep 17 00:00:00 2001 From: Simon Haegler Date: Sat, 3 Aug 2024 13:46:27 +0200 Subject: [PATCH 09/11] Arch: move ArchNormPath helpers to top, so we can use it in Windows long path handling --- pxr/base/arch/fileSystem.cpp | 320 +++++++++++++++++------------------ 1 file changed, 160 insertions(+), 160 deletions(-) diff --git a/pxr/base/arch/fileSystem.cpp b/pxr/base/arch/fileSystem.cpp index caf4b0c145..f0123bd146 100644 --- a/pxr/base/arch/fileSystem.cpp +++ b/pxr/base/arch/fileSystem.cpp @@ -50,6 +50,166 @@ using std::pair; using std::string; using std::set; +namespace { // Helpers for ArchNormPath. + +enum TokenType { Dot, DotDot, Elem }; + +typedef pair Token; +typedef pair RToken; + +template +inline pair +_NextToken(Iter i, Iter end) +{ + pair t; + for (t.first = i; + t.first != end && *t.first == '/'; ++t.first) {} + for (t.second = t.first; + t.second != end && *t.second != '/'; ++t.second) {} + return t; +} + +template +inline TokenType +_GetTokenType(pair t) { + size_t len = distance(t.first, t.second); + if (len == 1 && t.first[0] == '.') + return Dot; + if (len == 2 && t.first[0] == '.' && t.first[1] == '.') + return DotDot; + return Elem; +} + +string +_NormPath(string const &inPath) +{ + // We take one pass through the string, transforming it into a normalized + // path in-place. This works since the normalized path never grows, except + // in the trivial case of '' -> '.'. In all other cases, every + // transformation we make either shrinks the string or maintains its size. + // + // We track a current 'write' iterator, indicating the end of the normalized + // path we've built so far and a current token 't', the next slash-delimited + // path element we will process. For example, let's walk through the steps + // we take to normalize the input '/foo/../bar' to produce '/bar'. To + // start, the state looks like the following, with the write iterator past + // any leading slashes, and 't' at the first path token. + // + // /foo/../bar + // w <------ 'write' iterator + // [ ] <------ next token 't' + // + // We look at the token 't' to determine its type: one of DotDot, Dot, or + // Elem. In this case, it's a regular path Elem 'foo' so we simply copy it + // to the 'write' iterator and advance 't' to the next token. Then the + // state looks like: + // + // /foo/../bar + // w + // [ ] + // + // Now 't' is a DotDot token '..', so we remove the last path element in the + // normalized result by scanning backwards from 'w' resetting 'w' to that + // location to effectively remove the element, then advance 't' to the next + // token. Now the state looks like: + // + // /foo/../bar + // w [ ] + // + // The final token is the regular path Elem 'bar' so we copy it and trim the + // string to produce the final result '/bar'. + // + + // This code is fairly optimized for libstdc++'s copy-on-write string. It + // takes a copy of 'inPath' to start (refcount bump) but it avoids doing any + // mutating operation on 'path' until it actually has to. Doing a mutating + // operation (even grabbing a non-const iterator) will pay for the malloc + // and deep copy so we want to avoid that in the common case where the input + // path is already normalized. + + string path(inPath); + + // Find the first path token. + Token t = _NextToken(inPath.begin(), inPath.end()); + + // Allow zero, one, or two leading slashes, per POSIX. Three or more get + // collapsed to one. + const size_t numLeadingSlashes = distance(inPath.begin(), t.first); + size_t writeIdx = numLeadingSlashes >= 3 ? 1 : numLeadingSlashes; + + // Save a reverse iterator at where we start the output, we'll use this when + // scanning backward to handle DotDot tokens. + size_t firstWriteIdx = writeIdx; + + // Now walk through the string, copying tokens, looking for slashes and dots + // to handle. + for (; t.first != inPath.end(); t = _NextToken(t.second, inPath.end())) { + switch (_GetTokenType(t)) { + case Elem: + // Copy the elem. We avoid mutating 'path' if we've made no changes + // to the output yet, which is true if the write head is in the same + // place in the output as it is in the input. + if (inPath.begin() + writeIdx == t.first) { + writeIdx += distance(t.first, t.second); + t.first = t.second; + if (writeIdx != path.size()) + ++writeIdx; + } else { + while (t.first != t.second) + path[writeIdx++] = *t.first++; + if (writeIdx != path.size()) + path[writeIdx++] = '/'; + } + break; + case Dot: + // Do nothing, Dots are simply ignored. + break; + case DotDot: { + // Here we are very likely to be modifying the string, so we use + // non-const iterators and mutate. + string::reverse_iterator + rstart(path.begin() + firstWriteIdx), + rwrite(path.begin() + writeIdx); + // Find the last token of the output by finding the next token in + // reverse. + RToken backToken = _NextToken(rwrite, rstart); + // If there are no more Elems to consume with DotDots and this is a + // relative path, or this token is already a DotDot, then copy it to + // the output. + if ((rstart == path.rend() && backToken.first == rstart) || + _GetTokenType(backToken) == DotDot) { + path[writeIdx++] = '.'; + path[writeIdx++] = '.'; + if (writeIdx != path.size()) + path[writeIdx++] = '/'; + } else if (backToken.first != rstart) { + // Otherwise, consume the last elem by moving writeIdx back to + // before the elem. + writeIdx = distance(path.begin(), backToken.second.base()); + } + } + break; + }; + } + + // Remove a trailing slash if we wrote one. We're careful to use const + // iterators here to avoid incurring a string copy if it's not necessary (in + // the case of libstdc++'s copy-on-write basic_string) + if (writeIdx > firstWriteIdx && path.cbegin()[writeIdx-1] == '/') + --writeIdx; + + // Trim the string to length if necessary. + if (writeIdx != path.size()) + path.erase(writeIdx); + + // If the resulting path is empty, return "." + if (path.empty()) + path.assign("."); + + return path; +} +} // anon + #if defined (ARCH_OS_WINDOWS) namespace { @@ -295,166 +455,6 @@ ArchGetModificationTime(const ArchStatType& st) #endif } -namespace { // Helpers for ArchNormPath. - -enum TokenType { Dot, DotDot, Elem }; - -typedef pair Token; -typedef pair RToken; - -template -inline pair -_NextToken(Iter i, Iter end) -{ - pair t; - for (t.first = i; - t.first != end && *t.first == '/'; ++t.first) {} - for (t.second = t.first; - t.second != end && *t.second != '/'; ++t.second) {} - return t; -} - -template -inline TokenType -_GetTokenType(pair t) { - size_t len = distance(t.first, t.second); - if (len == 1 && t.first[0] == '.') - return Dot; - if (len == 2 && t.first[0] == '.' && t.first[1] == '.') - return DotDot; - return Elem; -} - -string -_NormPath(string const &inPath) -{ - // We take one pass through the string, transforming it into a normalized - // path in-place. This works since the normalized path never grows, except - // in the trivial case of '' -> '.'. In all other cases, every - // transformation we make either shrinks the string or maintains its size. - // - // We track a current 'write' iterator, indicating the end of the normalized - // path we've built so far and a current token 't', the next slash-delimited - // path element we will process. For example, let's walk through the steps - // we take to normalize the input '/foo/../bar' to produce '/bar'. To - // start, the state looks like the following, with the write iterator past - // any leading slashes, and 't' at the first path token. - // - // /foo/../bar - // w <------ 'write' iterator - // [ ] <------ next token 't' - // - // We look at the token 't' to determine its type: one of DotDot, Dot, or - // Elem. In this case, it's a regular path Elem 'foo' so we simply copy it - // to the 'write' iterator and advance 't' to the next token. Then the - // state looks like: - // - // /foo/../bar - // w - // [ ] - // - // Now 't' is a DotDot token '..', so we remove the last path element in the - // normalized result by scanning backwards from 'w' resetting 'w' to that - // location to effectively remove the element, then advance 't' to the next - // token. Now the state looks like: - // - // /foo/../bar - // w [ ] - // - // The final token is the regular path Elem 'bar' so we copy it and trim the - // string to produce the final result '/bar'. - // - - // This code is fairly optimized for libstdc++'s copy-on-write string. It - // takes a copy of 'inPath' to start (refcount bump) but it avoids doing any - // mutating operation on 'path' until it actually has to. Doing a mutating - // operation (even grabbing a non-const iterator) will pay for the malloc - // and deep copy so we want to avoid that in the common case where the input - // path is already normalized. - - string path(inPath); - - // Find the first path token. - Token t = _NextToken(inPath.begin(), inPath.end()); - - // Allow zero, one, or two leading slashes, per POSIX. Three or more get - // collapsed to one. - const size_t numLeadingSlashes = distance(inPath.begin(), t.first); - size_t writeIdx = numLeadingSlashes >= 3 ? 1 : numLeadingSlashes; - - // Save a reverse iterator at where we start the output, we'll use this when - // scanning backward to handle DotDot tokens. - size_t firstWriteIdx = writeIdx; - - // Now walk through the string, copying tokens, looking for slashes and dots - // to handle. - for (; t.first != inPath.end(); t = _NextToken(t.second, inPath.end())) { - switch (_GetTokenType(t)) { - case Elem: - // Copy the elem. We avoid mutating 'path' if we've made no changes - // to the output yet, which is true if the write head is in the same - // place in the output as it is in the input. - if (inPath.begin() + writeIdx == t.first) { - writeIdx += distance(t.first, t.second); - t.first = t.second; - if (writeIdx != path.size()) - ++writeIdx; - } else { - while (t.first != t.second) - path[writeIdx++] = *t.first++; - if (writeIdx != path.size()) - path[writeIdx++] = '/'; - } - break; - case Dot: - // Do nothing, Dots are simply ignored. - break; - case DotDot: { - // Here we are very likely to be modifying the string, so we use - // non-const iterators and mutate. - string::reverse_iterator - rstart(path.begin() + firstWriteIdx), - rwrite(path.begin() + writeIdx); - // Find the last token of the output by finding the next token in - // reverse. - RToken backToken = _NextToken(rwrite, rstart); - // If there are no more Elems to consume with DotDots and this is a - // relative path, or this token is already a DotDot, then copy it to - // the output. - if ((rstart == path.rend() && backToken.first == rstart) || - _GetTokenType(backToken) == DotDot) { - path[writeIdx++] = '.'; - path[writeIdx++] = '.'; - if (writeIdx != path.size()) - path[writeIdx++] = '/'; - } else if (backToken.first != rstart) { - // Otherwise, consume the last elem by moving writeIdx back to - // before the elem. - writeIdx = distance(path.begin(), backToken.second.base()); - } - } - break; - }; - } - - // Remove a trailing slash if we wrote one. We're careful to use const - // iterators here to avoid incurring a string copy if it's not necessary (in - // the case of libstdc++'s copy-on-write basic_string) - if (writeIdx > firstWriteIdx && path.cbegin()[writeIdx-1] == '/') - --writeIdx; - - // Trim the string to length if necessary. - if (writeIdx != path.size()) - path.erase(writeIdx); - - // If the resulting path is empty, return "." - if (path.empty()) - path.assign("."); - - return path; -} -} // anon - #if defined(ARCH_OS_WINDOWS) string ArchNormPath(const string& inPath, bool stripDriveSpecifier) From c9494200ca8e55f1423d8d233bda15a2e3d92759 Mon Sep 17 00:00:00 2001 From: Simon Haegler Date: Sat, 3 Aug 2024 13:47:20 +0200 Subject: [PATCH 10/11] Arch: templatize _NormPath to support wide strings --- pxr/base/arch/fileSystem.cpp | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pxr/base/arch/fileSystem.cpp b/pxr/base/arch/fileSystem.cpp index f0123bd146..9abea62f55 100644 --- a/pxr/base/arch/fileSystem.cpp +++ b/pxr/base/arch/fileSystem.cpp @@ -48,14 +48,17 @@ PXR_NAMESPACE_OPEN_SCOPE using std::pair; using std::string; +using std::basic_string; using std::set; namespace { // Helpers for ArchNormPath. enum TokenType { Dot, DotDot, Elem }; -typedef pair Token; -typedef pair RToken; +template +using Token = pair::const_iterator, typename basic_string::const_iterator>; +template +using RToken = pair::reverse_iterator, typename basic_string::reverse_iterator>; template inline pair @@ -80,8 +83,9 @@ _GetTokenType(pair t) { return Elem; } -string -_NormPath(string const &inPath) +template +basic_string +_NormPath(basic_string const &inPath) { // We take one pass through the string, transforming it into a normalized // path in-place. This works since the normalized path never grows, except @@ -127,10 +131,10 @@ _NormPath(string const &inPath) // and deep copy so we want to avoid that in the common case where the input // path is already normalized. - string path(inPath); + basic_string path(inPath); // Find the first path token. - Token t = _NextToken(inPath.begin(), inPath.end()); + Token t = _NextToken(inPath.begin(), inPath.end()); // Allow zero, one, or two leading slashes, per POSIX. Three or more get // collapsed to one. @@ -167,12 +171,12 @@ _NormPath(string const &inPath) case DotDot: { // Here we are very likely to be modifying the string, so we use // non-const iterators and mutate. - string::reverse_iterator + typename basic_string::reverse_iterator rstart(path.begin() + firstWriteIdx), rwrite(path.begin() + writeIdx); // Find the last token of the output by finding the next token in // reverse. - RToken backToken = _NextToken(rwrite, rstart); + RToken backToken = _NextToken(rwrite, rstart); // If there are no more Elems to consume with DotDots and this is a // relative path, or this token is already a DotDot, then copy it to // the output. @@ -204,7 +208,7 @@ _NormPath(string const &inPath) // If the resulting path is empty, return "." if (path.empty()) - path.assign("."); + path.assign(1, '.'); return path; } From dfe27db1aacbd194198083d9dfad6aef385801dd Mon Sep 17 00:00:00 2001 From: Simon Haegler Date: Sat, 3 Aug 2024 13:49:15 +0200 Subject: [PATCH 11/11] Arch: normalize the long path to satisfy Win32 long path requirements (e.g. no dotdot and dot) --- pxr/base/arch/fileSystem.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pxr/base/arch/fileSystem.cpp b/pxr/base/arch/fileSystem.cpp index 9abea62f55..2fca5cbee6 100644 --- a/pxr/base/arch/fileSystem.cpp +++ b/pxr/base/arch/fileSystem.cpp @@ -235,7 +235,8 @@ std::wstring _ArchHandleLongWindowsPaths(const std::wstring& path) // https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectoryw // ARCH_PATH_MAX counts the null terminator as well if (path.size() >= ARCH_PATH_MAX - 12 - 1) { - std::wstring longPath = path; + // the \\?\ prefix requires removal of any dotdot and dot, need to normalize + std::wstring longPath = _NormPath(path); // the \\?\ prefix requires strict backslash separators: // see https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation