From c0ceebc85e5b911233d296b5282e88ef790a96d7 Mon Sep 17 00:00:00 2001 From: Daniel McCloy Date: Mon, 4 Jan 2021 11:32:05 -0600 Subject: [PATCH 1/8] add extract_dir to init (alternative to suffix) --- pooch/processors.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/pooch/processors.py b/pooch/processors.py index 2f8576d0..a8291094 100644 --- a/pooch/processors.py +++ b/pooch/processors.py @@ -40,8 +40,9 @@ class ExtractorProcessor: # pylint: disable=too-few-public-methods # String appended to unpacked archive. To be implemented in subclass suffix = None - def __init__(self, members=None): + def __init__(self, members=None, extract_dir=None): self.members = members + self.extract_dir = extract_dir def __call__(self, fname, action, pooch): """ @@ -68,21 +69,25 @@ def __call__(self, fname, action, pooch): A list of the full path to all files in the extracted archive. """ - if self.suffix is None: + if self.suffix is None and self.extract_dir is None: raise NotImplementedError( - "Derived classes must define the 'suffix' attribute." + "Derived classes must define either a 'suffix' attribute or " + "an 'extract_dir' attribute." ) - extract_dir = fname + self.suffix - if action in ("update", "download") or not os.path.exists(extract_dir): + if self.extract_dir is None: + self.extract_dir = fname + self.suffix + elif self.suffix is not None: + get_logger().warn("Ignoring 'suffix' because 'extract_dir' was provided.") + if action in ("update", "download") or not os.path.exists(self.extract_dir): # Make sure that the folder with the extracted files exists - if not os.path.exists(extract_dir): - os.makedirs(extract_dir) - self._extract_file(fname, extract_dir) + if not os.path.exists(self.extract_dir): + os.makedirs(self.extract_dir) + self._extract_file(fname, self.extract_dir) # Get a list of all file names (including subdirectories) in our folder # of unzipped files. fnames = [ os.path.join(path, fname) - for path, _, files in os.walk(extract_dir) + for path, _, files in os.walk(self.extract_dir) for fname in files ] return fnames From bbae7137e246562d4b49ff416c8d3a21460a68a8 Mon Sep 17 00:00:00 2001 From: Daniel McCloy Date: Mon, 4 Jan 2021 11:36:22 -0600 Subject: [PATCH 2/8] handle members nested within subfolders --- pooch/processors.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pooch/processors.py b/pooch/processors.py index a8291094..17450dc8 100644 --- a/pooch/processors.py +++ b/pooch/processors.py @@ -80,8 +80,7 @@ def __call__(self, fname, action, pooch): get_logger().warn("Ignoring 'suffix' because 'extract_dir' was provided.") if action in ("update", "download") or not os.path.exists(self.extract_dir): # Make sure that the folder with the extracted files exists - if not os.path.exists(self.extract_dir): - os.makedirs(self.extract_dir) + os.makedirs(self.extract_dir, exist_ok=True) self._extract_file(fname, self.extract_dir) # Get a list of all file names (including subdirectories) in our folder # of unzipped files. @@ -139,6 +138,11 @@ def _extract_file(self, fname, extract_dir): get_logger().info( "Extracting '%s' from '%s' to '%s'", member, fname, extract_dir ) + # make sure the target folder exists for nested members + if len(member.split(os.path.sep)) > 1: + member_dir, _ = member.rsplit(os.path.sep, maxsplit=1) + full_dir_path = os.path.join(extract_dir, member_dir) + os.makedirs(full_dir_path, exist_ok=True) # Extract the data file from within the archive with zip_file.open(member) as data_file: # Save it to our desired file name @@ -185,6 +189,11 @@ def _extract_file(self, fname, extract_dir): get_logger().info( "Extracting '%s' from '%s' to '%s'", member, fname, extract_dir ) + # make sure the target folder exists for nested members + if len(member.split(os.path.sep)) > 1: + member_dir, _ = member.rsplit(os.path.sep, maxsplit=1) + full_dir_path = os.path.join(extract_dir, member_dir) + os.makedirs(full_dir_path, exist_ok=True) # Extract the data file from within the archive # Python 2.7: extractfile doesn't return a context manager data_file = tar_file.extractfile(member) From 50a3525a687e7f90c0063246bc5daee84fa0ac7d Mon Sep 17 00:00:00 2001 From: Daniel McCloy Date: Mon, 4 Jan 2021 11:46:59 -0600 Subject: [PATCH 3/8] update docstrings --- pooch/processors.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pooch/processors.py b/pooch/processors.py index 17450dc8..e1d87264 100644 --- a/pooch/processors.py +++ b/pooch/processors.py @@ -116,6 +116,13 @@ class Unzip(ExtractorProcessor): # pylint: disable=too-few-public-methods If None, will unpack all files in the zip archive. Otherwise, *members* must be a list of file names to unpack from the archive. Only these files will be unpacked. + extract_dir : str or None + If None, files will be unpacked to the default location (a folder in + the same location as the downloaded zip file, with the suffix + ``.unzip`` added). Otherwise, files will be unpacked to + ``extract_dir``, which is interpreted as a *relative path* (relative to + the cache location provided by :func:`pooch.retrieve` or + :meth:`pooch.Pooch.fetch`). """ @@ -168,6 +175,13 @@ class Untar(ExtractorProcessor): # pylint: disable=too-few-public-methods If None, will unpack all files in the archive. Otherwise, *members* must be a list of file names to unpack from the archive. Only these files will be unpacked. + extract_dir : str or None + If None, files will be unpacked to the default location (a folder in + the same location as the downloaded tar file, with the suffix + ``.untar`` added). Otherwise, files will be unpacked to + ``extract_dir``, which is interpreted as a *relative path* (relative to + the cache location provided by :func:`pooch.retrieve` or + :meth:`pooch.Pooch.fetch`). """ suffix = ".untar" From 83a7aee804b7b335ff4becf4f119ab8349959510 Mon Sep 17 00:00:00 2001 From: Daniel McCloy Date: Mon, 4 Jan 2021 12:03:08 -0600 Subject: [PATCH 4/8] add example --- doc/processors.rst | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/doc/processors.rst b/doc/processors.rst index 3c78c84a..6c83dc8a 100644 --- a/doc/processors.rst +++ b/doc/processors.rst @@ -67,7 +67,7 @@ For example, to extract a single file from a zip archive: Load a large zipped sample data as a pandas.DataFrame. """ # Extract the file "actual-data-file.txt" from the archive - unpack = Unzip(members=["actual-data-file.txt"]) + unpack = Unzip(members=["actual-data-file.txt"]) # Pass in the processor to unzip the data file fnames = GOODBOY.fetch("zipped-data-file.zip", processor=unpack) # Returns the paths of all extract members (in our case, only one) @@ -77,7 +77,34 @@ For example, to extract a single file from a zip archive: data = pandas.read_csv(fname) return data -Or to extract all files into a folder and return the path to each file: +By default, the :class:`~pooch.Unzip` processor (and similarly the +:class:`~pooch.Untar` processor) will create a new folder in the same location +as the downloaded archive file, and give it the same name as the archive file +with the suffix ``.unzip`` (or ``.untar``) appended. If you want to change the +location of the unpacked files, you can provide a parameter ``extract_dir`` to +the processor to tell it where you want to unpack the files: + +.. code:: python + + from pooch import Untar + + + def fetch_and_unpack_tar_file(): + """ + Unpack a file from a tar archive to a custom subdirectory in the cache. + """ + # Extract a single file from the archive, to a specific location + unpack_to_custom_dir = Untar(members=["actual-data-file.txt"], + extract_dir="custom_folder") + # Pass in the processor to untar the data file + fnames = GOODBOY.fetch("tarred-data-file.tar.gz", processor=unpack) + # Returns the paths of all extract members (in our case, only one) + fname = fnames[0] + return fname + + +To extract all files into a folder and return the path to each file, simply +omit the ``members`` parameter: .. code:: python @@ -85,10 +112,8 @@ Or to extract all files into a folder and return the path to each file: """ Load all files from a zipped archive. """ - # Pass in the processor to unzip the data file fnames = GOODBOY.fetch("zipped-archive.zip", processor=Unzip()) - data = [pandas.read_csv(fname) for fname in fnames] - return data + return fnames Use :class:`pooch.Untar` to do the exact same for tar archives (with optional compression). From 841516d246bc444fd5b2e7a297a9403109588f20 Mon Sep 17 00:00:00 2001 From: Daniel McCloy Date: Mon, 4 Jan 2021 12:16:45 -0600 Subject: [PATCH 5/8] add to AUTHORS (and fix a typo in same) --- AUTHORS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS.md b/AUTHORS.md index bdd206ab..32fbfe96 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -8,8 +8,9 @@ order by last name) and are considered "The Pooch Developers": * [Mathias Hauser](https://github.com/mathause) - Institute for Atmospheric and Climate Science, ETH Zurich, Zurich, Switzerland (ORCID: [0000-0002-0057-4878](https://orcid.org/0000-0002-0057-4878)) * [Danilo Horta](https://github.com/horta) - EMBL-EBI, UK * [Hugo van Kemenade](https://github.com/hugovk) - Independent (Non-affiliated) (ORCID: [0000-0001-5715-8632](https://www.orcid.org/0000-0001-5715-8632)) -* [Kacper Kowalik](https://github.com/Xarthisius) - National Center for Supercomputing Applications, Univeristy of Illinois at Urbana-Champaign, USA (ORCID: [0000-0003-1709-3744](https://www.orcid.org/0000-0003-1709-3744)) +* [Kacper Kowalik](https://github.com/Xarthisius) - National Center for Supercomputing Applications, University of Illinois at Urbana-Champaign, USA (ORCID: [0000-0003-1709-3744](https://www.orcid.org/0000-0003-1709-3744)) * [John Leeman](https://github.com/jrleeman) +* [Daniel McCloy](https://github.com/drammock) - University of Washington, USA (ORCID: [0000-0002-7572-3241](https://orcid.org/0000-0002-7572-3241)) * [Rémi Rampin](https://github.com/remram44) - New York University, USA (ORCID: [0000-0002-0524-2282](https://www.orcid.org/0000-0002-0524-2282)) * [Daniel Shapero](https://github.com/danshapero) - Polar Science Center, University of Washington Applied Physics Lab, USA (ORCID: [0000-0002-3651-0649](https://www.orcid.org/0000-0002-3651-0649)) * [Santiago Soler](https://github.com/santisoler) - CONICET, Argentina; Instituto Geofísico Sismológico Volponi, Universidad Nacional de San Juan, Argentina (ORCID: [0000-0001-9202-5317](https://www.orcid.org/0000-0001-9202-5317)) From 3e57481f85928f7791b63c33aed1dd2f094c1f55 Mon Sep 17 00:00:00 2001 From: Daniel McCloy Date: Mon, 4 Jan 2021 12:14:06 -0600 Subject: [PATCH 6/8] add test --- pooch/processors.py | 5 ++- pooch/tests/test_processors.py | 74 +++++++++++++++++++++++++++++----- 2 files changed, 67 insertions(+), 12 deletions(-) diff --git a/pooch/processors.py b/pooch/processors.py index e1d87264..9f709e88 100644 --- a/pooch/processors.py +++ b/pooch/processors.py @@ -76,8 +76,9 @@ def __call__(self, fname, action, pooch): ) if self.extract_dir is None: self.extract_dir = fname + self.suffix - elif self.suffix is not None: - get_logger().warn("Ignoring 'suffix' because 'extract_dir' was provided.") + else: + archive_dir = fname.rsplit(os.path.sep, maxsplit=1)[0] + self.extract_dir = os.path.join(archive_dir, self.extract_dir) if action in ("update", "download") or not os.path.exists(self.extract_dir): # Make sure that the folder with the extracted files exists os.makedirs(self.extract_dir, exist_ok=True) diff --git a/pooch/tests/test_processors.py b/pooch/tests/test_processors.py index 88164cae..efb8fae9 100644 --- a/pooch/tests/test_processors.py +++ b/pooch/tests/test_processors.py @@ -111,13 +111,20 @@ def test_extractprocessor_fails(): @pytest.mark.parametrize( - "proc_cls,ext", [(Unzip, ".zip"), (Untar, ".tar.gz")], ids=["Unzip", "Untar"] + "proc_cls,ext,_dir", + [ + (Unzip, ".zip", None), + (Untar, ".tar.gz", None), + (Unzip, ".zip", "foo"), + (Untar, ".tar.gz", "foo"), + ], + ids=["Unzip", "Untar", "Unzip_to_custom_dir", "Untar_to_custom_dir"], ) -def test_processors(proc_cls, ext): +def test_processors(proc_cls, ext, _dir): "Setup a hook and make sure it's only executed when downloading" - processor = proc_cls(members=["tiny-data.txt"]) + processor = proc_cls(members=["tiny-data.txt"], extract_dir=_dir) suffix = proc_cls.suffix - extract_dir = "tiny-data" + ext + suffix + extract_dir = "tiny-data" + ext + suffix if _dir is None else _dir with TemporaryDirectory() as local_store: path = Path(local_store) true_path = str(path / extract_dir / "tiny-data.txt") @@ -147,15 +154,20 @@ def test_processors(proc_cls, ext): @pytest.mark.parametrize( - "proc_cls,ext,msg", - [(Unzip, ".zip", "Unzipping"), (Untar, ".tar.gz", "Untarring")], - ids=["Unzip", "Untar"], + "proc_cls,ext,_dir,msg", + [ + (Unzip, ".zip", None, "Unzipping"), + (Untar, ".tar.gz", None, "Untarring"), + (Unzip, ".zip", "foo", "Unzipping"), + (Untar, ".tar.gz", "foo", "Untarring"), + ], + ids=["Unzip", "Untar", "Unzip_to_custom_dir", "Untar_to_custom_dir"], ) -def test_processor_multiplefiles(proc_cls, ext, msg): +def test_processor_multiplefiles(proc_cls, ext, _dir, msg): "Setup a processor to unzip/untar a file and return multiple fnames" - processor = proc_cls() + processor = proc_cls(extract_dir=_dir) suffix = proc_cls.suffix - extract_dir = "store" + ext + suffix + extract_dir = "store" + ext + suffix if _dir is None else _dir with TemporaryDirectory() as local_store: path = Path(local_store) true_paths = { @@ -184,3 +196,45 @@ def test_processor_multiplefiles(proc_cls, ext, msg): assert true_paths == set(fnames) for fname in fnames: check_tiny_data(fname) + + +@pytest.mark.parametrize( + "proc_cls,ext,_dir", + [ + (Unzip, ".zip", None), + (Untar, ".tar.gz", None), + (Unzip, ".zip", "foo"), + (Untar, ".tar.gz", "foo"), + ], + ids=["Unzip", "Untar", "Unzip_to_custom_dir", "Untar_to_custom_dir"], +) +def test_processor_nested_file(proc_cls, ext, _dir): + "Setup a processor to unzip/untar a file and return multiple fnames" + processor = proc_cls(members=["store/subdir/tiny-data.txt"], extract_dir=_dir) + suffix = proc_cls.suffix + extract_dir = "store" + ext + suffix if _dir is None else _dir + with TemporaryDirectory() as local_store: + path = Path(local_store) + true_path = str(path / extract_dir / "store" / "subdir" / "tiny-data.txt") + # Setup a pooch in a temp dir + pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY) + # Check the logs when downloading and from the processor + with capture_log() as log_file: + fnames = pup.fetch("store" + ext, processor=processor) + logs = log_file.getvalue() + lines = logs.splitlines() + assert len(lines) == 2 + assert lines[0].split()[0] == "Downloading" + assert lines[-1].startswith("Extracting 'store/subdir/tiny-data.txt'") + assert len(fnames) == 1 + fname = fnames[0] + assert true_path == fname + check_tiny_data(fname) + # Check that processor doesn't execute when not downloading + with capture_log() as log_file: + fnames = pup.fetch("store" + ext, processor=processor) + assert log_file.getvalue() == "" + assert len(fnames) == 1 + fname = fnames[0] + assert true_path == fname + check_tiny_data(fname) From ef96a244f9970f4b641821c45fbd3e153a714d65 Mon Sep 17 00:00:00 2001 From: Daniel McCloy Date: Mon, 11 Jan 2021 10:46:56 -0600 Subject: [PATCH 7/8] unify tests; try to fix tests for windows --- pooch/tests/test_processors.py | 144 ++++++++------------------------- 1 file changed, 35 insertions(+), 109 deletions(-) diff --git a/pooch/tests/test_processors.py b/pooch/tests/test_processors.py index efb8fae9..5979119c 100644 --- a/pooch/tests/test_processors.py +++ b/pooch/tests/test_processors.py @@ -110,131 +110,57 @@ def test_extractprocessor_fails(): assert not exception.value.args +@pytest.mark.parametrize("_dir", [None, "foo"], ids=["default_dir", "custom_dir"]) @pytest.mark.parametrize( - "proc_cls,ext,_dir", - [ - (Unzip, ".zip", None), - (Untar, ".tar.gz", None), - (Unzip, ".zip", "foo"), - (Untar, ".tar.gz", "foo"), - ], - ids=["Unzip", "Untar", "Unzip_to_custom_dir", "Untar_to_custom_dir"], -) -def test_processors(proc_cls, ext, _dir): - "Setup a hook and make sure it's only executed when downloading" - processor = proc_cls(members=["tiny-data.txt"], extract_dir=_dir) - suffix = proc_cls.suffix - extract_dir = "tiny-data" + ext + suffix if _dir is None else _dir - with TemporaryDirectory() as local_store: - path = Path(local_store) - true_path = str(path / extract_dir / "tiny-data.txt") - # Setup a pooch in a temp dir - pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY) - # Check the logs when downloading and from the processor - with capture_log() as log_file: - fnames = pup.fetch("tiny-data" + ext, processor=processor) - fname = fnames[0] - assert len(fnames) == 1 - logs = log_file.getvalue() - lines = logs.splitlines() - assert len(lines) == 2 - assert lines[0].split()[0] == "Downloading" - assert lines[-1].startswith("Extracting 'tiny-data.txt'") - - assert fname == true_path - check_tiny_data(fname) - # Check that processor doesn't execute when not downloading - with capture_log() as log_file: - fnames = pup.fetch("tiny-data" + ext, processor=processor) - fname = fnames[0] - assert len(fnames) == 1 - assert log_file.getvalue() == "" - assert fname == true_path - check_tiny_data(fname) - - -@pytest.mark.parametrize( - "proc_cls,ext,_dir,msg", - [ - (Unzip, ".zip", None, "Unzipping"), - (Untar, ".tar.gz", None, "Untarring"), - (Unzip, ".zip", "foo", "Unzipping"), - (Untar, ".tar.gz", "foo", "Untarring"), - ], - ids=["Unzip", "Untar", "Unzip_to_custom_dir", "Untar_to_custom_dir"], + "proc_cls,file_ext,msg", + [(Unzip, ".zip", "Unzipping"), (Untar, ".tar.gz", "Untarring")], + ids=["Unzip", "Untar"], ) -def test_processor_multiplefiles(proc_cls, ext, _dir, msg): - "Setup a processor to unzip/untar a file and return multiple fnames" - processor = proc_cls(extract_dir=_dir) - suffix = proc_cls.suffix - extract_dir = "store" + ext + suffix if _dir is None else _dir - with TemporaryDirectory() as local_store: - path = Path(local_store) - true_paths = { - str(path / extract_dir / "store" / "tiny-data.txt"), - str(path / extract_dir / "store" / "subdir" / "tiny-data.txt"), - } - # Setup a pooch in a temp dir - pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY) - # Check the logs when downloading and from the processor - with capture_log() as log_file: - fnames = pup.fetch("store" + ext, processor=processor) - logs = log_file.getvalue() - lines = logs.splitlines() - assert len(lines) == 2 - assert lines[0].split()[0] == "Downloading" - assert lines[-1].startswith(f"{msg} contents") - assert len(fnames) == 2 - assert true_paths == set(fnames) - for fname in fnames: - check_tiny_data(fname) - # Check that processor doesn't execute when not downloading - with capture_log() as log_file: - fnames = pup.fetch("store" + ext, processor=processor) - assert log_file.getvalue() == "" - assert len(fnames) == 2 - assert true_paths == set(fnames) - for fname in fnames: - check_tiny_data(fname) - - @pytest.mark.parametrize( - "proc_cls,ext,_dir", + "archive_basename,members", [ - (Unzip, ".zip", None), - (Untar, ".tar.gz", None), - (Unzip, ".zip", "foo"), - (Untar, ".tar.gz", "foo"), + ("tiny-data", ["tiny-data.txt"]), # 1 compressed file + ("store", None), # all files in an archive + ("store", ["store/subdir/tiny-data.txt"]), # 1 file nested in archive ], - ids=["Unzip", "Untar", "Unzip_to_custom_dir", "Untar_to_custom_dir"], + ids=["onefile", "all_files", "nested"], ) -def test_processor_nested_file(proc_cls, ext, _dir): - "Setup a processor to unzip/untar a file and return multiple fnames" - processor = proc_cls(members=["store/subdir/tiny-data.txt"], extract_dir=_dir) - suffix = proc_cls.suffix - extract_dir = "store" + ext + suffix if _dir is None else _dir +def test_processors(_dir, proc_cls, file_ext, msg, archive_basename, members): + "Setup a hook and make sure it's only executed when downloading" + processor = proc_cls(members=members, extract_dir=_dir) + _dir = archive_basename + file_ext + proc_cls.suffix if _dir is None else _dir with TemporaryDirectory() as local_store: path = Path(local_store) - true_path = str(path / extract_dir / "store" / "subdir" / "tiny-data.txt") + true_paths = [ + str(path / _dir / "tiny-data.txt"), + str(path / _dir / "store" / "tiny-data.txt"), + str(path / _dir / "store" / "subdir" / "tiny-data.txt"), + ] + if archive_basename == "tiny-data": + true_paths = set(true_paths[:1]) + log_line = "Extracting 'tiny-data.txt'" + elif members is None: + true_paths = set(true_paths[1:]) + log_line = f"{msg} contents" + else: + true_paths = set(true_paths[-1:]) + log_line = "Extracting 'store/subdir/tiny-data.txt'" # Setup a pooch in a temp dir pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY) # Check the logs when downloading and from the processor with capture_log() as log_file: - fnames = pup.fetch("store" + ext, processor=processor) - logs = log_file.getvalue() - lines = logs.splitlines() + fnames = pup.fetch(archive_basename + file_ext, processor=processor) + assert set(fnames) == true_paths + lines = log_file.getvalue().splitlines() assert len(lines) == 2 assert lines[0].split()[0] == "Downloading" - assert lines[-1].startswith("Extracting 'store/subdir/tiny-data.txt'") - assert len(fnames) == 1 - fname = fnames[0] - assert true_path == fname + assert lines[-1].startswith(log_line) + for fname in fnames: check_tiny_data(fname) # Check that processor doesn't execute when not downloading with capture_log() as log_file: - fnames = pup.fetch("store" + ext, processor=processor) + fnames = pup.fetch(archive_basename + file_ext, processor=processor) + assert set(fnames) == true_paths assert log_file.getvalue() == "" - assert len(fnames) == 1 - fname = fnames[0] - assert true_path == fname + for fname in fnames: check_tiny_data(fname) From 861ebee68902ddc92c8b2ea27eecf3a7c3279d64 Mon Sep 17 00:00:00 2001 From: Daniel McCloy Date: Mon, 11 Jan 2021 16:13:19 -0600 Subject: [PATCH 8/8] try again to fix windows tests --- pooch/processors.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pooch/processors.py b/pooch/processors.py index 9f709e88..eda436ba 100644 --- a/pooch/processors.py +++ b/pooch/processors.py @@ -13,6 +13,7 @@ import gzip import lzma import shutil +from pathlib import Path from zipfile import ZipFile from tarfile import TarFile @@ -147,9 +148,9 @@ def _extract_file(self, fname, extract_dir): "Extracting '%s' from '%s' to '%s'", member, fname, extract_dir ) # make sure the target folder exists for nested members - if len(member.split(os.path.sep)) > 1: - member_dir, _ = member.rsplit(os.path.sep, maxsplit=1) - full_dir_path = os.path.join(extract_dir, member_dir) + parts = Path(member).parts + if len(parts) > 1: + full_dir_path = os.path.join(extract_dir, *parts[:-1]) os.makedirs(full_dir_path, exist_ok=True) # Extract the data file from within the archive with zip_file.open(member) as data_file: @@ -205,9 +206,9 @@ def _extract_file(self, fname, extract_dir): "Extracting '%s' from '%s' to '%s'", member, fname, extract_dir ) # make sure the target folder exists for nested members - if len(member.split(os.path.sep)) > 1: - member_dir, _ = member.rsplit(os.path.sep, maxsplit=1) - full_dir_path = os.path.join(extract_dir, member_dir) + parts = Path(member).parts + if len(parts) > 1: + full_dir_path = os.path.join(extract_dir, *parts[:-1]) os.makedirs(full_dir_path, exist_ok=True) # Extract the data file from within the archive # Python 2.7: extractfile doesn't return a context manager