diff --git a/src/tufup/client.py b/src/tufup/client.py index 80d3307..d73cf25 100644 --- a/src/tufup/client.py +++ b/src/tufup/client.py @@ -7,19 +7,20 @@ from typing import Callable, Dict, Iterator, List, Optional, Tuple, Union from urllib import parse -import bsdiff4 import requests from requests.auth import AuthBase from tuf.api.exceptions import DownloadError, UnsignedMetadataError import tuf.ngclient -from tufup.common import TargetMeta +from tufup.common import Patcher, TargetMeta from tufup.utils.platform_specific import install_update logger = logging.getLogger(__name__) DEFAULT_EXTRACT_DIR = pathlib.Path(tempfile.gettempdir()) / 'tufup' SUFFIX_FAILED = '.failed' +# do full update if patch-size/full-size > MAX_SIZE_RATIO +MAX_SIZE_RATIO = 0.8 class Client(tuf.ngclient.Updater): @@ -209,7 +210,9 @@ def check_for_updates( # is not available, we must do a full update) self.new_targets = new_patches no_patches = total_patch_size == 0 - patch_too_big = total_patch_size > self.new_archive_info.length + patch_too_big = ( + total_patch_size / self.new_archive_info.length > MAX_SIZE_RATIO + ) no_archive = not self.current_archive_local_path.exists() if not patch or no_patches or patch_too_big or no_archive or abort_patch: # fall back on full update @@ -251,29 +254,29 @@ def _apply_updates( Note this has a side-effect: if self.extract_dir is not specified, an extract_dir is created in a platform-specific temporary location. """ - # patch current archive (if we have patches) or use new full archive - archive_bytes = None - file_path = None - target = None + # either patch the current archive (if we have patches) or use new full archive try: - for target, file_path in sorted(self.downloaded_target_files.items()): - if target.is_archive: - # just ensure the full archive file is available - assert len(self.downloaded_target_files) == 1, 'too many targets' - assert self.new_archive_local_path.exists(), 'new archive missing' - elif target.is_patch: - # create new archive by patching current archive (patches - # must be sorted by increasing version) - if archive_bytes is None: - archive_bytes = self.current_archive_local_path.read_bytes() - archive_bytes = bsdiff4.patch(archive_bytes, file_path.read_bytes()) - if archive_bytes: - # verify the patched archive length and hash - self.new_archive_info.verify_length_and_hashes(data=archive_bytes) - # write the patched new archive - self.new_archive_local_path.write_bytes(archive_bytes) + if next(iter(self.downloaded_target_files.keys())).is_archive: + # full archive is available + if len(self.downloaded_target_files) != 1: + raise ValueError('there should be only one downloaded *archive*') + if not self.new_archive_local_path.exists(): + raise FileNotFoundError('the new archive file does not exist') + else: + # reconstruct full archive from patch(es) + if not all( + target.is_patch for target in self.downloaded_target_files.keys() + ): + raise ValueError('all downloaded targets must be patches') + Patcher.patch_and_verify( + src_path=self.current_archive_local_path, + dst_path=self.new_archive_local_path, + patch_targets=self.downloaded_target_files, + ) except Exception as e: - if target and file_path and file_path.exists(): + # rename all failed targets in order to skip them (patches) or retry + # them (archive) on the next run + for target, file_path in self.downloaded_target_files.items(): renamed_path = file_path.replace( file_path.with_suffix(file_path.suffix + SUFFIX_FAILED) ) diff --git a/src/tufup/common.py b/src/tufup/common.py index 5620dbe..5ca2b61 100644 --- a/src/tufup/common.py +++ b/src/tufup/common.py @@ -1,7 +1,9 @@ +import gzip +import hashlib import logging import pathlib import re -from typing import Optional, Union +from typing import Dict, Optional, Union import bsdiff4 from packaging.version import Version, InvalidVersion @@ -155,27 +157,97 @@ def compose_filename(cls, name: str, version: str, is_archive: bool): class Patcher(object): + DEFAULT_HASH_ALGORITHM = 'sha256' + + @staticmethod + def _get_tar_size_and_hash( + tar_content: Optional[bytes] = None, algorithm: str = DEFAULT_HASH_ALGORITHM + ) -> dict: + """ + Determines the size and hash of the specified data. + + Note we could also use tuf.api.metadata.TargetFile for this, but that we'll + keep this part independent from python-tuf, for clarity and flexibility. + """ + hash_obj = getattr(hashlib, algorithm)() + hash_obj.update(tar_content) + # hexdigest returns digest as string + return dict( + tar_size=len(tar_content), + tar_hash=hash_obj.hexdigest(), + tar_hash_algorithm=algorithm, + ) + @classmethod - def create_patch( - cls, src_path: pathlib.Path, dst_path: pathlib.Path - ) -> pathlib.Path: + def _verify_tar_size_and_hash(cls, tar_content: bytes, expected: dict): """ - Create a binary patch file based on source and destination files. + Verifies that size and hash of data match the expected values. - Patch file path matches destination file path, except for suffix. + Raises an exception if this is not the case. """ - # replace suffix twice, in case we have a .tar.gz - patch_path = dst_path.with_suffix('').with_suffix(SUFFIX_PATCH) - bsdiff4.file_diff(src_path=src_path, dst_path=dst_path, patch_path=patch_path) - return patch_path + result = cls._get_tar_size_and_hash( + tar_content=tar_content, algorithm=expected['tar_hash_algorithm'] + ) + for key in ['tar_size', 'tar_hash']: + if result[key] != expected[key]: + raise Exception(f'verification failed: {key} mismatch') @classmethod - def apply_patch(cls, src_path: pathlib.Path, patch_path: pathlib.Path): + def diff_and_hash( + cls, src_path: pathlib.Path, dst_path: pathlib.Path, patch_path: pathlib.Path + ) -> dict: """ - Apply binary patch file to source file to create destination file. + Creates a patch file from the binary difference between source and destination + .tar archives. The source and destination files are expected to be + gzip-compressed tar archives (.tar.gz). - Destination file path matches patch file path, except for suffix. + Returns a dict with size and hash of the *uncompressed* destination archive. """ - dst_path = patch_path.with_suffix(SUFFIX_ARCHIVE) - bsdiff4.file_patch(src_path=src_path, dst_path=dst_path, patch_path=patch_path) - return dst_path + with gzip.open(src_path, mode='rb') as src_file: + with gzip.open(dst_path, mode='rb') as dst_file: + dst_tar_content = dst_file.read() + patch_path.write_bytes( + bsdiff4.diff(src_bytes=src_file.read(), dst_bytes=dst_tar_content) + ) + return cls._get_tar_size_and_hash(tar_content=dst_tar_content) + + @classmethod + def patch_and_verify( + cls, + src_path: pathlib.Path, + dst_path: pathlib.Path, + patch_targets: Dict[TargetMeta, pathlib.Path], + ) -> None: + """ + Applies one or more binary patch files to a source file in order to + reconstruct a destination file. + + Source file and destination file are gzip-compressed tar archives, but the + patches are applied to the *uncompressed* tar archives. The reason is that + small changes in uncompressed data can cause (very) large differences in + gzip compressed data, leading to excessively large patch files (see #69). + + The integrity of the patched .tar archive is verified using expected length + and hash (from custom tuf metadata), similar to python-tuf's download + verification. If the patched archive fails this check, the destination file + is not written. + """ + if not patch_targets: + raise ValueError('no patch targets') + # decompress .tar data from source .tar.gz file + with gzip.open(src_path, mode='rb') as src_file: + tar_bytes = src_file.read() + # apply cumulative patches (sorted by version, in ascending order) + for patch_meta, patch_path in sorted(patch_targets.items()): + logger.info(f'applying patch: {patch_meta.name}') + tar_bytes = bsdiff4.patch( + src_bytes=tar_bytes, patch_bytes=patch_path.read_bytes() + ) + # verify integrity of the final result (raises exception on failure) + cls._verify_tar_size_and_hash( + tar_content=tar_bytes, + expected=patch_meta.custom, # noqa + ) + # compress .tar data into destination .tar.gz file + with gzip.open(dst_path, mode='wb') as dst_file: + dst_file.write(tar_bytes) diff --git a/src/tufup/repo/__init__.py b/src/tufup/repo/__init__.py index d06fc8e..c1e75c5 100644 --- a/src/tufup/repo/__init__.py +++ b/src/tufup/repo/__init__.py @@ -11,6 +11,7 @@ except AssertionError: pass # assuming we are on the client side... import shutil +import tarfile from typing import Any, Dict, Iterable, List, Optional, TypedDict, Union from securesystemslib.exceptions import CryptoError @@ -37,7 +38,7 @@ ) from tuf.api.serialization.json import JSONSerializer -from tufup.common import Patcher, SUFFIX_ARCHIVE, SUFFIX_PATCH, TargetMeta +from tufup.common import Patcher, SUFFIX_PATCH, TargetMeta from tufup.utils.platform_specific import _patched_resolve logger = logging.getLogger(__name__) @@ -79,12 +80,19 @@ def make_gztar_archive( dst_dir: Union[pathlib.Path, str], app_name: str, version: str, - **kwargs, # allowed kwargs are passed on to shutil.make_archive + tar_format: int = tarfile.PAX_FORMAT, ) -> Optional[TargetMeta]: - # remove disallowed kwargs - for key in ['base_name', 'root_dir', 'format']: - if kwargs.pop(key, None): - logger.warning(f'{key} ignored: using default') + """ + Create a gzipped tar archive in the dst_dir, based on content of src_dir. + + The PAX_FORMAT is currently the default tar format [1] used by the tarfile + module. For improved portability [2] and reproducibility [3], this can be changed + e.g. to USTAR_FORMAT. + + [1]: https://www.gnu.org/software/tar/manual/html_node/Formats.html#Formats + [2]: https://www.gnu.org/software/tar/manual/html_node/Portability.html#Portability + [3]: https://www.gnu.org/software/tar/manual/html_node/Reproducibility.html#Reproducibility + """ # ensure paths src_dir = pathlib.Path(src_dir) dst_dir = pathlib.Path(dst_dir) @@ -97,15 +105,11 @@ def make_gztar_archive( if input(f'Found existing archive: {archive_path}.\nOverwrite? [n]/y') != 'y': print('Using existing archive.') return TargetMeta(archive_path) - # make archive - base_name = str(dst_dir / archive_filename.replace(SUFFIX_ARCHIVE, '')) - archive_path_str = shutil.make_archive( - base_name=base_name, # archive file path, no suffix - root_dir=str(src_dir), # paths in archive will be relative to root_dir - format='gztar', - **kwargs, - ) - return TargetMeta(target_path=archive_path_str) + # make gzipped tar archive + with tarfile.open(archive_path, mode='w:gz', format=tar_format) as tar: + # filter could be used in future versions to modify the tarinfo objects + tar.add(name=src_dir, arcname='.', recursive=True, filter=None) + return TargetMeta(target_path=archive_path) class RolesDict(TypedDict): @@ -383,6 +387,7 @@ def add_or_update_target( target_file_path=url_path, local_path=str(local_path) ) if custom: + # todo: handle creation of patch metadata here? # todo: should we verify that custom is a dict? target_file_info.unrecognized_fields['custom'] = custom # note we assume self.targets has been initialized @@ -765,11 +770,18 @@ def add_bundle( ) # create patch, if possible, and register that too if latest_archive and not skip_patch: - patch_path = Patcher.create_patch( - src_path=self.targets_dir / latest_archive.path, - dst_path=self.targets_dir / new_archive.path, + src_path = self.targets_dir / latest_archive.path + dst_path = self.targets_dir / new_archive.path + patch_path = dst_path.with_suffix('').with_suffix(SUFFIX_PATCH) + # create patch + dst_size_and_hash = Patcher.diff_and_hash( + src_path=src_path, dst_path=dst_path, patch_path=patch_path + ) + # register patch (size and hash are used by the client to verify the + # integrity of the patched archive) + self.roles.add_or_update_target( + local_path=patch_path, custom=dst_size_and_hash ) - self.roles.add_or_update_target(local_path=patch_path) def remove_latest_bundle(self): """ diff --git a/tests/data/repository/metadata/1.root.json b/tests/data/repository/metadata/1.root.json index 03a6d63..32f7456 100755 --- a/tests/data/repository/metadata/1.root.json +++ b/tests/data/repository/metadata/1.root.json @@ -2,17 +2,17 @@ "signatures": [ { "keyid": "b7ad916e4138911155b771d0ede66666e9647e7fb6c85a1904be97dee5653568", - "sig": "8582f12a66a923c8069a4385ef594c345ca2bd69741c0ba2691c4cb20e005e7a771f6ca651852d1264d13107d108c5843d3f9b69bcd20500f7108cca6e6c8901" + "sig": "0f634a6e5f82af4447accce63c2987350c9c16fe6f8ce391ed504da106be8a127e1d606424c97a27822038cfd35e4daa96da2ec07a4a75bc2610df3bfc95cd0c" }, { "keyid": "d4ec748f9476f9f7e1f0a247b917dde4abe8a024de9ba34c7458b41bec8be6b2", - "sig": "3f2a6d6cd8232d0ca1f2b75445a7dc9bc4342f72fe88204fac7e7acad48eb6102ff1ba4b1efaf8f8ec32ee11cf68a5f92e34300f66b37e5970e878f77b2e9c0b" + "sig": "678256d67bcf6022f75920ff380dc2111e2d68120af834f1769d694665236a2c7fb57ea5731f4050e1562a8b2be870b6594a2203f52182b1b77fa98ae89ed90c" } ], "signed": { "_type": "root", "consistent_snapshot": false, - "expires": "2051-06-25T13:08:41Z", + "expires": "2051-06-27T21:21:03Z", "keys": { "5ef48ab6f5398d2bf17f1f4c4fc0e0440c4aa3734a05ae523561e02e8a99957a": { "keytype": "ed25519", diff --git a/tests/data/repository/metadata/2.root.json b/tests/data/repository/metadata/2.root.json index 52d548e..c51f327 100755 --- a/tests/data/repository/metadata/2.root.json +++ b/tests/data/repository/metadata/2.root.json @@ -1,22 +1,22 @@ { "signatures": [ { - "keyid": "b7ad916e4138911155b771d0ede66666e9647e7fb6c85a1904be97dee5653568", - "sig": "740d4c6945050abd3abba7023cb5128a4e344e83ae0f52f9c978b7b3582dd21213e72a66dec6cd4206093c634cb973cf3ec0940103e54e6a81c4424322cf2d01" + "keyid": "1bd53d9d6f08f6efba19477880b348906f5f29a67d78cbca8a44aedfad12d003", + "sig": "47a42813ae34829c60539dcceba0d4b9a8a9286beaa8d5f07d3de3050d404426c22bc95b271e7c5e7ee529bc3180f009eb31313fb825f76c3ed9ca2c501bd503" }, { - "keyid": "1bd53d9d6f08f6efba19477880b348906f5f29a67d78cbca8a44aedfad12d003", - "sig": "58ed242676830567413936feec20c80cd79d03fc31bdad38ffd0ef69e40298dfd8fe15edb7a4fd504a01ee5a7cddd3bfbd169ccd9bd2c6067e452aeee3a18102" + "keyid": "b7ad916e4138911155b771d0ede66666e9647e7fb6c85a1904be97dee5653568", + "sig": "421d85636350a89805abc4561acd3019ecf17246a37e91374a53276b5d56638c83754960c27d038c7d1193bdb33db12faf69b7a19099627c745c569093ee0005" }, { "keyid": "d4ec748f9476f9f7e1f0a247b917dde4abe8a024de9ba34c7458b41bec8be6b2", - "sig": "7ea041490934e6637998eb22ab367f1d260b3d0cdde144cc5a776dda7a65c27a6061d1b62986851ecbc49ad04c7a428987b323c1c961f65f8e0143c792deb706" + "sig": "a65dbf32349f1a57dd1dd6fc058c69a98be467f5ad408179da6e3b67abc6f2361415eb70214588d21079a9d0351500808f8c244b69f40b35a41999294461ca00" } ], "signed": { "_type": "root", "consistent_snapshot": false, - "expires": "2051-06-25T13:08:48Z", + "expires": "2051-06-27T21:21:13Z", "keys": { "1bd53d9d6f08f6efba19477880b348906f5f29a67d78cbca8a44aedfad12d003": { "keytype": "ed25519", diff --git a/tests/data/repository/metadata/root.json b/tests/data/repository/metadata/root.json index 52d548e..c51f327 100755 --- a/tests/data/repository/metadata/root.json +++ b/tests/data/repository/metadata/root.json @@ -1,22 +1,22 @@ { "signatures": [ { - "keyid": "b7ad916e4138911155b771d0ede66666e9647e7fb6c85a1904be97dee5653568", - "sig": "740d4c6945050abd3abba7023cb5128a4e344e83ae0f52f9c978b7b3582dd21213e72a66dec6cd4206093c634cb973cf3ec0940103e54e6a81c4424322cf2d01" + "keyid": "1bd53d9d6f08f6efba19477880b348906f5f29a67d78cbca8a44aedfad12d003", + "sig": "47a42813ae34829c60539dcceba0d4b9a8a9286beaa8d5f07d3de3050d404426c22bc95b271e7c5e7ee529bc3180f009eb31313fb825f76c3ed9ca2c501bd503" }, { - "keyid": "1bd53d9d6f08f6efba19477880b348906f5f29a67d78cbca8a44aedfad12d003", - "sig": "58ed242676830567413936feec20c80cd79d03fc31bdad38ffd0ef69e40298dfd8fe15edb7a4fd504a01ee5a7cddd3bfbd169ccd9bd2c6067e452aeee3a18102" + "keyid": "b7ad916e4138911155b771d0ede66666e9647e7fb6c85a1904be97dee5653568", + "sig": "421d85636350a89805abc4561acd3019ecf17246a37e91374a53276b5d56638c83754960c27d038c7d1193bdb33db12faf69b7a19099627c745c569093ee0005" }, { "keyid": "d4ec748f9476f9f7e1f0a247b917dde4abe8a024de9ba34c7458b41bec8be6b2", - "sig": "7ea041490934e6637998eb22ab367f1d260b3d0cdde144cc5a776dda7a65c27a6061d1b62986851ecbc49ad04c7a428987b323c1c961f65f8e0143c792deb706" + "sig": "a65dbf32349f1a57dd1dd6fc058c69a98be467f5ad408179da6e3b67abc6f2361415eb70214588d21079a9d0351500808f8c244b69f40b35a41999294461ca00" } ], "signed": { "_type": "root", "consistent_snapshot": false, - "expires": "2051-06-25T13:08:48Z", + "expires": "2051-06-27T21:21:13Z", "keys": { "1bd53d9d6f08f6efba19477880b348906f5f29a67d78cbca8a44aedfad12d003": { "keytype": "ed25519", diff --git a/tests/data/repository/metadata/snapshot.json b/tests/data/repository/metadata/snapshot.json index db4b04a..c05b5ad 100755 --- a/tests/data/repository/metadata/snapshot.json +++ b/tests/data/repository/metadata/snapshot.json @@ -2,12 +2,12 @@ "signatures": [ { "keyid": "5ef48ab6f5398d2bf17f1f4c4fc0e0440c4aa3734a05ae523561e02e8a99957a", - "sig": "29c6c8a45e7c0940e51cac1b9052304bb0baec1e1df35885522846ae5abd039c1846c453cd599ccc36e11c4f0a52de6b772d71627886e22dc77822b4404af602" + "sig": "73a146f5e1f12c0a36e88c8d7bf613baa1d528ea0c9480fe0d2ccd74d6da239da04470f68d283738194185cc82289c5f9f1312efea373b51dc8722965ca1fc0b" } ], "signed": { "_type": "snapshot", - "expires": "2051-06-25T13:08:48Z", + "expires": "2051-06-27T21:21:13Z", "meta": { "targets.json": { "version": 6 diff --git a/tests/data/repository/metadata/targets.json b/tests/data/repository/metadata/targets.json index 9b5797f..122d603 100755 --- a/tests/data/repository/metadata/targets.json +++ b/tests/data/repository/metadata/targets.json @@ -2,12 +2,12 @@ "signatures": [ { "keyid": "cd9930c92ac25c02a2f92ae3128b50459b53d7532ef9c0f364e78f388d5808a5", - "sig": "dbcc91a73275a2478489e491b3054328659ce5e7cdeeb7623fe41e745cbe585a0dc874e05d80a291c5658138549051a24d81f3fb61093b6133b5d3e8927e9e01" + "sig": "344b1c779103db5c8462508d7a5e72ef9ae8dea0c5fd303d55cace03a87fd67312ff5ca01fc2e377d7d0dcbbbf3f4dff378f5c9759801590340c0b9e3d23bc07" } ], "signed": { "_type": "targets", - "expires": "2051-06-25T13:08:48Z", + "expires": "2051-06-27T21:21:13Z", "spec_version": "1.0.31", "targets": { "example_app-1.0.tar.gz": { @@ -17,10 +17,15 @@ "length": 101613 }, "example_app-2.0.patch": { + "custom": { + "tar_hash": "855c631eb1a8d756bbad8441b76b5452505d292a162b3d497a60877fee2140b5", + "tar_hash_algorithm": "sha256", + "tar_size": 112640 + }, "hashes": { - "sha256": "f7ee90e00fa69d5832eeee193f9b6bb2d32ff028c413f47fbaf853b3d2add27f" + "sha256": "f2be4504e464bd23c022772c7f3c011e0082295775a24e3fc986bb2504df0f53" }, - "length": 18709 + "length": 318 }, "example_app-2.0.tar.gz": { "custom": { @@ -36,10 +41,15 @@ "length": 101744 }, "example_app-3.0rc0.patch": { + "custom": { + "tar_hash": "3cd260c121d05f4c6ed55b6e87569d3710e539e0a86e6fce98189ddca20c99f5", + "tar_hash_algorithm": "sha256", + "tar_size": 112640 + }, "hashes": { - "sha256": "dcf2ce8ca9fc0ccc0e541fb0fca97a7772374177197a41fa4cf17653ef850956" + "sha256": "01fa6f30ac54fd405dfb5bd6e39f71af572c8e341675f5b2822b35ec341ce6f9" }, - "length": 6458 + "length": 323 }, "example_app-3.0rc0.tar.gz": { "custom": { @@ -55,10 +65,15 @@ "length": 101841 }, "example_app-4.0a0.patch": { + "custom": { + "tar_hash": "3d3efe43388f3bbae910af39232526ef624d1540cfbb69cf0d4c66b7d5dc4b45", + "tar_hash_algorithm": "sha256", + "tar_size": 112640 + }, "hashes": { - "sha256": "0fc6918d6d0757e234fe550e26bca659503166da3d3f493ec6bdbb5281b356ce" + "sha256": "e19ccd94e60d1d817dcc44c481f4fb48f54fa335cc0bd7a7e60377a4f6ccf2ea" }, - "length": 102567 + "length": 100988 }, "example_app-4.0a0.tar.gz": { "custom": { diff --git a/tests/data/repository/metadata/timestamp.json b/tests/data/repository/metadata/timestamp.json index 7c7b0f4..e2ce07a 100755 --- a/tests/data/repository/metadata/timestamp.json +++ b/tests/data/repository/metadata/timestamp.json @@ -2,12 +2,12 @@ "signatures": [ { "keyid": "eddb87d254d513c1404d71e17620ecf5260e1836babdaa55197916c582f37a00", - "sig": "bc22b24fadb6fabbc915cd7c5dd704cd338bad94d268f53e5c5e743fbbb707dfa8e339b5d5a6fefa4360431d610e383c6744d2e7306121d4cd4bd2388ecfef02" + "sig": "4f05f9947e1fd704ffb877fa994e8841eb1a34a2ccf021c2eac5a618eca3c11baad8531d154fa5c8aa187e0bfaa57b521901ef502bd7a3dc601bfa1c408a4106" } ], "signed": { "_type": "timestamp", - "expires": "2051-06-25T13:08:48Z", + "expires": "2051-06-27T21:21:13Z", "meta": { "snapshot.json": { "version": 7 diff --git a/tests/data/repository/targets/example_app-2.0.patch b/tests/data/repository/targets/example_app-2.0.patch index 1a9628a..a029fcd 100644 Binary files a/tests/data/repository/targets/example_app-2.0.patch and b/tests/data/repository/targets/example_app-2.0.patch differ diff --git a/tests/data/repository/targets/example_app-3.0rc0.patch b/tests/data/repository/targets/example_app-3.0rc0.patch index 3d0d978..a4b50f1 100644 Binary files a/tests/data/repository/targets/example_app-3.0rc0.patch and b/tests/data/repository/targets/example_app-3.0rc0.patch differ diff --git a/tests/data/repository/targets/example_app-4.0a0.patch b/tests/data/repository/targets/example_app-4.0a0.patch index 8874b82..1f968db 100644 Binary files a/tests/data/repository/targets/example_app-4.0a0.patch and b/tests/data/repository/targets/example_app-4.0a0.patch differ diff --git a/tests/test_client.py b/tests/test_client.py index d1737a3..c9ad4ac 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -9,10 +9,10 @@ import packaging.version from requests.auth import HTTPBasicAuth import tuf.api.exceptions -from tuf.api.exceptions import LengthOrHashMismatchError from tuf.ngclient import TargetFile from tests import TempDirTestCase, TEST_REPO_DIR +import tufup.client from tufup.client import AuthRequestsFetcher, Client, SUFFIX_FAILED from tufup.common import TargetMeta @@ -98,15 +98,19 @@ def test_init(self): def test_trusted_target_metas(self): client = self.get_refreshed_client() self.assertTrue(client.trusted_target_metas) - # in the test data, only the archives have custom metadata, as defined in - # the repo_workflow_example.py script + # The archives have custom metadata from the user, as defined in the + # repo_workflow_example.py script. The patches have custom metadata for .tar + # integrity check (internal). for meta in client.trusted_target_metas: with self.subTest(msg=meta): - if meta.is_archive and str(meta.version) != '1.0': - self.assertTrue(meta.custom) - self.assertIsInstance(meta.custom, dict) - else: - self.assertIsNone(meta.custom) + if str(meta.version) != '1.0': + if meta.is_archive: + self.assertTrue(meta.custom) + example_key = 'changes' # see repo workflow example + self.assertIn(example_key, meta.custom) + else: + # patches must have tar hash information + self.assertIn('tar_hash', meta.custom) def test_get_targetinfo(self): client = self.get_refreshed_client() @@ -254,9 +258,9 @@ def test__apply_updates(self): with self.subTest(msg='patch failure due to mismatch'): mock_install = Mock() with patch.object( - client.new_archive_info, - 'verify_length_and_hashes', - Mock(side_effect=LengthOrHashMismatchError()), + tufup.client.Patcher, + '_verify_tar_size_and_hash', + Mock(side_effect=Exception()), ): client._apply_updates(install=mock_install, skip_confirmation=True) mock_install.assert_not_called() diff --git a/tests/test_common.py b/tests/test_common.py index 90b80c6..e4d99fb 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -1,3 +1,5 @@ +import gzip +import hashlib import logging import pathlib from typing import Hashable @@ -155,38 +157,81 @@ def test_compose_filename(self): class PatcherTests(TempDirTestCase): def setUp(self) -> None: super().setUp() - # dummy paths - self.old_archive_path = self.temp_dir_path / 'my_app-1.0.tar.gz' - self.new_archive_path = self.temp_dir_path / 'my_app-2.0.tar.gz' - self.new_patch_path = self.temp_dir_path / 'my_app-2.0.patch' - # write dummy archive data to files - self.old_archive_path.write_bytes(b'old archive data') - self.new_archive_data = b'new archive data' - self.new_archive_path.write_bytes(self.new_archive_data) - # create patch file (see Patcher.create_patch) - bsdiff4.file_diff( - src_path=self.old_archive_path, - dst_path=self.new_archive_path, - patch_path=self.new_patch_path, - ) - self.new_patch_data = self.new_patch_path.read_bytes() - - def test_create_patch(self): - # remove existing patch file, just to be sure - self.new_patch_path.unlink() + # define dummy .tar content + self.tar_content = { + 'v-1': b'this is the original content', + 'v-2': b'this content is somewhat different', + 'v-3': b'this content has changed again', + } + # create patch content + self.patch_content = dict() + for src, dst in [('v-1', 'v-2'), ('v-2', 'v-3')]: + self.patch_content[dst] = bsdiff4.diff( + src_bytes=self.tar_content[src], + dst_bytes=self.tar_content[dst], + ) + # create dummy files + self.targz_paths = dict() + for key, tar_content in self.tar_content.items(): + self.targz_paths[key] = self.temp_dir_path / f'{key}.tar.gz' + with gzip.open(self.targz_paths[key], mode='wb') as gz_file: + gz_file.write(tar_content) + self.patch_paths = dict() + for key, patch_content in self.patch_content.items(): + self.patch_paths[key] = self.temp_dir_path / f'{key}.patch' + self.patch_paths[key].write_bytes(patch_content) + # determine size and hash + hash_algorithm = 'sha256' + self.tar_fingerprints = dict() + for key, tar_content in self.tar_content.items(): + if key == 'v-1': + continue + hash_obj = getattr(hashlib, hash_algorithm)() + hash_obj.update(tar_content) + self.tar_fingerprints[key] = dict( + tar_size=len(tar_content), + tar_hash=hash_obj.hexdigest(), + tar_hash_algorithm=hash_algorithm, + ) + + def test_diff_and_hash(self): + # prepare + src = 'v-1' + dst = 'v-2' + patch_path = self.temp_dir_path / 'test.patch' # test - new_patch_path = Patcher.create_patch( - src_path=self.old_archive_path, dst_path=self.new_archive_path + dst_fingerprint = Patcher.diff_and_hash( + src_path=self.targz_paths[src], + dst_path=self.targz_paths[dst], + patch_path=patch_path, ) - self.assertTrue(new_patch_path.exists()) - self.assertEqual(self.new_patch_data, new_patch_path.read_bytes()) - - def test_apply_patch(self): - # remove existing "new archive" file, just to be sure - self.new_archive_path.unlink() + self.assertTrue(patch_path.exists()) + self.assertEqual(self.patch_content[dst], patch_path.read_bytes()) + self.assertEqual(self.tar_fingerprints[dst], dst_fingerprint) + + def test_patch_and_verify(self): + # prepare + src = 'v-1' + dst = 'v-3' # note we're skipping v-2 + patch_targets = dict() + for key, patch_path in self.patch_paths.items(): + patch_meta = TargetMeta( + target_path=patch_path, custom=self.tar_fingerprints[key] + ) + self.assertTrue(patch_meta.is_patch) # just to be sure + patch_targets[patch_meta] = patch_path + # verify that we're applying two patches cumulatively + self.assertEqual(2, len(patch_targets)) # test - new_archive_path = Patcher.apply_patch( - src_path=self.old_archive_path, patch_path=self.new_patch_path + dst_path = self.temp_dir_path / 'reconstructed.tar.gz' + Patcher.patch_and_verify( + src_path=self.targz_paths[src], + dst_path=dst_path, + patch_targets=patch_targets, ) - self.assertTrue(new_archive_path.exists()) - self.assertEqual(self.new_archive_data, new_archive_path.read_bytes()) + self.assertTrue(dst_path.exists()) + # note that gzip compressed files are not reproducible by default (even when + # using identical uncompressed data), so we must compare the uncompressed data + with gzip.open(self.targz_paths[dst], mode='rb') as original_tar: + with gzip.open(dst_path, mode='rb') as reconstructed_tar: + self.assertEqual(original_tar.read(), reconstructed_tar.read()) diff --git a/tests/test_repo.py b/tests/test_repo.py index f352ee7..c98560a 100644 --- a/tests/test_repo.py +++ b/tests/test_repo.py @@ -1,4 +1,5 @@ import copy +import tarfile from datetime import date, datetime, timedelta import json import logging @@ -96,8 +97,7 @@ def test_make_gztar_archive(self): dst_dir=self.temp_dir_path, app_name=app_name, version=version, - base_dir='.', # this kwarg is allowed - root_dir='some path', # this kwarg is removed + tar_format=tarfile.USTAR_FORMAT, ) self.assertIsInstance(archive, TargetMeta) self.assertEqual(exists, mock_input_no.called)