diff --git a/salt/fileclient.py b/salt/fileclient.py index cb3b210a037c..35c63b2cb11f 100644 --- a/salt/fileclient.py +++ b/salt/fileclient.py @@ -185,12 +185,13 @@ def file_list_emptydirs(self, saltenv=u'base', prefix=u''): ''' raise NotImplementedError - def cache_file(self, path, saltenv=u'base', cachedir=None): + def cache_file(self, path, saltenv=u'base', cachedir=None, source_hash=None): ''' Pull a file down from the file server and store it in the minion file cache ''' - return self.get_url(path, u'', True, saltenv, cachedir=cachedir) + return self.get_url( + path, u'', True, saltenv, cachedir=cachedir, source_hash=source_hash) def cache_files(self, paths, saltenv=u'base', cachedir=None): ''' @@ -470,7 +471,7 @@ def get_dir(self, path, dest=u'', saltenv=u'base', gzip=None, return ret def get_url(self, url, dest, makedirs=False, saltenv=u'base', - no_cache=False, cachedir=None): + no_cache=False, cachedir=None, source_hash=None): ''' Get a single file from a URL. ''' @@ -525,6 +526,18 @@ def get_url(self, url, dest, makedirs=False, saltenv=u'base', return u'' elif not no_cache: dest = self._extrn_path(url, saltenv, cachedir=cachedir) + if source_hash is not None: + try: + source_hash = source_hash.split('=')[-1] + form = salt.utils.files.HASHES_REVMAP[len(source_hash)] + if salt.utils.get_hash(dest, form) == source_hash: + log.debug( + 'Cached copy of %s (%s) matches source_hash %s, ' + 'skipping download', url, dest, source_hash + ) + return dest + except (AttributeError, KeyError, IOError, OSError): + pass destdir = os.path.dirname(dest) if not os.path.isdir(destdir): os.makedirs(destdir) @@ -532,7 +545,9 @@ def get_url(self, url, dest, makedirs=False, saltenv=u'base', if url_data.scheme == u's3': try: def s3_opt(key, default=None): - u'''Get value of s3. from Minion config or from Pillar''' + ''' + Get value of s3. from Minion config or from Pillar + ''' if u's3.' + key in self.opts: return self.opts[u's3.' + key] try: @@ -785,7 +800,7 @@ def get_template( def _extrn_path(self, url, saltenv, cachedir=None): ''' - Return the extn_filepath for a given url + Return the extrn_filepath for a given url ''' url_data = urlparse(url) if salt.utils.platform.is_windows(): diff --git a/salt/modules/archive.py b/salt/modules/archive.py index 70ef0bdeccf4..7d627f7fdbad 100644 --- a/salt/modules/archive.py +++ b/salt/modules/archive.py @@ -60,7 +60,8 @@ def list_(name, strip_components=None, clean=False, verbose=False, - saltenv='base'): + saltenv='base', + source_hash=None): ''' .. versionadded:: 2016.11.0 .. versionchanged:: 2016.11.2 @@ -149,6 +150,14 @@ def list_(name, ``archive``. This is only applicable when ``archive`` is a file from the ``salt://`` fileserver. + source_hash + If ``name`` is an http(s)/ftp URL and the file exists in the minion's + file cache, this option can be passed to keep the minion from + re-downloading the archive if the cached copy matches the specified + hash. + + .. versionadded:: Oxygen + .. _tarfile: https://docs.python.org/2/library/tarfile.html .. _xz: http://tukaani.org/xz/ @@ -160,6 +169,7 @@ def list_(name, salt '*' archive.list /path/to/myfile.tar.gz strip_components=1 salt '*' archive.list salt://foo.tar.gz salt '*' archive.list https://domain.tld/myfile.zip + salt '*' archive.list https://domain.tld/myfile.zip source_hash=f1d2d2f924e986ac86fdf7b36c94bcdf32beec15 salt '*' archive.list ftp://10.1.2.3/foo.rar ''' def _list_tar(name, cached, decompress_cmd, failhard=False): @@ -309,7 +319,7 @@ def _list_rar(name, cached): ) return dirs, files, [] - cached = __salt__['cp.cache_file'](name, saltenv) + cached = __salt__['cp.cache_file'](name, saltenv, source_hash=source_hash) if not cached: raise CommandExecutionError('Failed to cache {0}'.format(name)) @@ -1094,7 +1104,7 @@ def unzip(zip_file, return _trim_files(cleaned_files, trim_output) -def is_encrypted(name, clean=False, saltenv='base'): +def is_encrypted(name, clean=False, saltenv='base', source_hash=None): ''' .. versionadded:: 2016.11.0 @@ -1113,6 +1123,18 @@ def is_encrypted(name, clean=False, saltenv='base'): If there is an error listing the archive's contents, the cached file will not be removed, to allow for troubleshooting. + saltenv : base + Specifies the fileserver environment from which to retrieve + ``archive``. This is only applicable when ``archive`` is a file from + the ``salt://`` fileserver. + + source_hash + If ``name`` is an http(s)/ftp URL and the file exists in the minion's + file cache, this option can be passed to keep the minion from + re-downloading the archive if the cached copy matches the specified + hash. + + .. versionadded:: Oxygen CLI Examples: @@ -1122,9 +1144,10 @@ def is_encrypted(name, clean=False, saltenv='base'): salt '*' archive.is_encrypted salt://foo.zip salt '*' archive.is_encrypted salt://foo.zip saltenv=dev salt '*' archive.is_encrypted https://domain.tld/myfile.zip clean=True + salt '*' archive.is_encrypted https://domain.tld/myfile.zip source_hash=f1d2d2f924e986ac86fdf7b36c94bcdf32beec15 salt '*' archive.is_encrypted ftp://10.1.2.3/foo.zip ''' - cached = __salt__['cp.cache_file'](name, saltenv) + cached = __salt__['cp.cache_file'](name, saltenv, source_hash=source_hash) if not cached: raise CommandExecutionError('Failed to cache {0}'.format(name)) diff --git a/salt/modules/cp.py b/salt/modules/cp.py index 86634d559c3d..cdbeb4434ef3 100644 --- a/salt/modules/cp.py +++ b/salt/modules/cp.py @@ -352,7 +352,7 @@ def get_dir(path, dest, saltenv='base', template=None, gzip=None, **kwargs): return _client().get_dir(path, dest, saltenv, gzip) -def get_url(path, dest='', saltenv='base', makedirs=False): +def get_url(path, dest='', saltenv='base', makedirs=False, source_hash=None): ''' .. versionchanged:: Oxygen ``dest`` can now be a directory @@ -386,6 +386,13 @@ def get_url(path, dest='', saltenv='base', makedirs=False): Salt fileserver envrionment from which to retrieve the file. Ignored if ``path`` is not a ``salt://`` URL. + source_hash + If ``path`` is an http(s) or ftp URL and the file exists in the + minion's file cache, this option can be passed to keep the minion from + re-downloading the file if the cached copy matches the specified hash. + + .. versionadded:: Oxygen + CLI Example: .. code-block:: bash @@ -394,9 +401,11 @@ def get_url(path, dest='', saltenv='base', makedirs=False): salt '*' cp.get_url http://www.slashdot.org /tmp/index.html ''' if isinstance(dest, six.string_types): - result = _client().get_url(path, dest, makedirs, saltenv) + result = _client().get_url( + path, dest, makedirs, saltenv, source_hash=source_hash) else: - result = _client().get_url(path, None, makedirs, saltenv, no_cache=True) + result = _client().get_url( + path, None, makedirs, saltenv, no_cache=True, source_hash=source_hash) if not result: log.error( 'Unable to fetch file {0} from saltenv {1}.'.format( @@ -429,11 +438,18 @@ def get_file_str(path, saltenv='base'): return fn_ -def cache_file(path, saltenv='base'): +def cache_file(path, saltenv='base', source_hash=None): ''' Used to cache a single file on the Minion - Returns the location of the new cached file on the Minion. + Returns the location of the new cached file on the Minion + + source_hash + If ``name`` is an http(s) or ftp URL and the file exists in the + minion's file cache, this option can be passed to keep the minion from + re-downloading the file if the cached copy matches the specified hash. + + .. versionadded:: Oxygen CLI Example: @@ -485,7 +501,7 @@ def cache_file(path, saltenv='base'): if senv: saltenv = senv - result = _client().cache_file(path, saltenv) + result = _client().cache_file(path, saltenv, source_hash=source_hash) if not result: log.error( u'Unable to cache file \'%s\' from saltenv \'%s\'.', diff --git a/salt/modules/file.py b/salt/modules/file.py index 944736f74023..7dfd5ced0111 100644 --- a/salt/modules/file.py +++ b/salt/modules/file.py @@ -60,6 +60,7 @@ import salt.utils.templates import salt.utils.url from salt.exceptions import CommandExecutionError, MinionError, SaltInvocationError, get_error_message as _get_error_message +from salt.utils.files import HASHES, HASHES_REVMAP log = logging.getLogger(__name__) @@ -67,16 +68,6 @@ 'makedirs_': 'makedirs' } -HASHES = { - 'sha512': 128, - 'sha384': 96, - 'sha256': 64, - 'sha224': 56, - 'sha1': 40, - 'md5': 32, -} -HASHES_REVMAP = dict([(y, x) for x, y in six.iteritems(HASHES)]) - def __virtual__(): ''' @@ -3767,14 +3758,8 @@ def source_list(source, source_hash, saltenv): ret = (single_src, single_hash) break elif proto.startswith('http') or proto == 'ftp': - try: - if __salt__['cp.cache_file'](single_src): - ret = (single_src, single_hash) - break - except MinionError as exc: - # Error downloading file. Log the caught exception and - # continue on to the next source. - log.exception(exc) + ret = (single_src, single_hash) + break elif proto == 'file' and os.path.exists(urlparsed_single_src.path): ret = (single_src, single_hash) break @@ -3794,9 +3779,8 @@ def source_list(source, source_hash, saltenv): ret = (single, source_hash) break elif proto.startswith('http') or proto == 'ftp': - if __salt__['cp.cache_file'](single): - ret = (single, source_hash) - break + ret = (single, source_hash) + break elif single.startswith('/') and os.path.exists(single): ret = (single, source_hash) break @@ -4007,11 +3991,14 @@ def _get_local_file_source_sum(path): else: sfn = cached_dest - # If we didn't have the template or remote file, let's get it - # Similarly when the file has been updated and the cache has to be refreshed + # If we didn't have the template or remote file, or the file has been + # updated and the cache has to be refreshed, download the file. if not sfn or cache_refetch: try: - sfn = __salt__['cp.cache_file'](source, saltenv) + sfn = __salt__['cp.cache_file']( + source, + saltenv, + source_hash=source_sum.get('hsum')) except Exception as exc: # A 404 or other error code may raise an exception, catch it # and return a comment that will fail the calling state. @@ -4675,7 +4662,7 @@ def check_file_meta( ''' changes = {} if not source_sum: - source_sum = dict() + source_sum = {} lstats = stats(name, hash_type=source_sum.get('hash_type', None), follow_symlinks=False) if not lstats: changes['newfile'] = name @@ -4683,7 +4670,10 @@ def check_file_meta( if 'hsum' in source_sum: if source_sum['hsum'] != lstats['sum']: if not sfn and source: - sfn = __salt__['cp.cache_file'](source, saltenv) + sfn = __salt__['cp.cache_file']( + source, + saltenv, + source_hash=source_sum['hsum']) if sfn: try: changes['diff'] = get_diff( @@ -4750,7 +4740,9 @@ def get_diff(file1, saltenv='base', show_filenames=True, show_changes=True, - template=False): + template=False, + source_hash_file1=None, + source_hash_file2=None): ''' Return unified diff of two files @@ -4785,6 +4777,22 @@ def get_diff(file1, .. versionadded:: Oxygen + source_hash_file1 + If ``file1`` is an http(s)/ftp URL and the file exists in the minion's + file cache, this option can be passed to keep the minion from + re-downloading the archive if the cached copy matches the specified + hash. + + .. versionadded:: Oxygen + + source_hash_file2 + If ``file2`` is an http(s)/ftp URL and the file exists in the minion's + file cache, this option can be passed to keep the minion from + re-downloading the archive if the cached copy matches the specified + hash. + + .. versionadded:: Oxygen + CLI Examples: .. code-block:: bash @@ -4793,14 +4801,17 @@ def get_diff(file1, salt '*' file.get_diff /tmp/foo.txt /tmp/bar.txt ''' files = (file1, file2) + source_hashes = (source_hash_file1, source_hash_file2) paths = [] errors = [] - for filename in files: + for filename, source_hash in zip(files, source_hashes): try: # Local file paths will just return the same path back when passed # to cp.cache_file. - cached_path = __salt__['cp.cache_file'](filename, saltenv) + cached_path = __salt__['cp.cache_file'](filename, + saltenv, + source_hash=source_hash) if cached_path is False: errors.append( u'File {0} not found'.format( diff --git a/salt/states/archive.py b/salt/states/archive.py index c2308cbbd0e1..2a1454f99da4 100644 --- a/salt/states/archive.py +++ b/salt/states/archive.py @@ -64,16 +64,30 @@ def _gen_checksum(path): 'hash_type': __opts__['hash_type']} -def _update_checksum(cached_source): - cached_source_sum = '.'.join((cached_source, 'hash')) - source_sum = _gen_checksum(cached_source) +def _checksum_file_path(path): + relpath = '.'.join((os.path.relpath(path, __opts__['cachedir']), 'hash')) + if re.match(r'..[/\\]', relpath): + # path is a local file + relpath = salt.utils.path.join( + 'local', + os.path.splitdrive(path)[-1].lstrip('/\\'), + ) + return salt.utils.path.join(__opts__['cachedir'], 'archive_hash', relpath) + + +def _update_checksum(path): + checksum_file = _checksum_file_path(path) + checksum_dir = os.path.dirname(checksum_file) + if not os.path.isdir(checksum_dir): + os.makedirs(checksum_dir) + source_sum = _gen_checksum(path) hash_type = source_sum.get('hash_type') hsum = source_sum.get('hsum') if hash_type and hsum: lines = [] try: try: - with salt.utils.files.fopen(cached_source_sum, 'r') as fp_: + with salt.utils.files.fopen(checksum_file, 'r') as fp_: for line in fp_: try: lines.append(line.rstrip('\n').split(':', 1)) @@ -83,7 +97,7 @@ def _update_checksum(cached_source): if exc.errno != errno.ENOENT: raise - with salt.utils.files.fopen(cached_source_sum, 'w') as fp_: + with salt.utils.files.fopen(checksum_file, 'w') as fp_: for line in lines: if line[0] == hash_type: line[1] = hsum @@ -93,16 +107,16 @@ def _update_checksum(cached_source): except (IOError, OSError) as exc: log.warning( 'Failed to update checksum for %s: %s', - cached_source, exc.__str__() + path, exc.__str__(), exc_info=True ) -def _read_cached_checksum(cached_source, form=None): +def _read_cached_checksum(path, form=None): if form is None: form = __opts__['hash_type'] - path = '.'.join((cached_source, 'hash')) + checksum_file = _checksum_file_path(path) try: - with salt.utils.files.fopen(path, 'r') as fp_: + with salt.utils.files.fopen(checksum_file, 'r') as fp_: for line in fp_: # Should only be one line in this file but just in case it # isn't, read only a single line to avoid overuse of memory. @@ -117,9 +131,9 @@ def _read_cached_checksum(cached_source, form=None): return {'hash_type': hash_type, 'hsum': hsum} -def _compare_checksum(cached_source, source_sum): +def _compare_checksum(cached, source_sum): cached_sum = _read_cached_checksum( - cached_source, + cached, form=source_sum.get('hash_type', __opts__['hash_type']) ) return source_sum == cached_sum @@ -155,7 +169,6 @@ def extracted(name, user=None, group=None, if_missing=None, - keep=False, trim_output=False, use_cmd_unzip=None, extract_perms=True, @@ -391,6 +404,22 @@ def extracted(name, .. versionadded:: 2016.3.4 + keep_source : True + For ``source`` archives not local to the minion (i.e. from the Salt + fileserver or a remote source such as ``http(s)`` or ``ftp``), Salt + will need to download the archive to the minion cache before they can + be extracted. To remove the downloaded archive after extraction, set + this argument to ``False``. + + .. versionadded:: 2017.7.3 + + keep : True + Same as ``keep_source``. + + .. note:: + If both ``keep_source`` and ``keep`` are used, ``keep`` will be + ignored. + password **For ZIP archives only.** Password used for extraction. @@ -518,13 +547,6 @@ def extracted(name, simply checked for existence and extraction will be skipped if if is present. - keep : False - For ``source`` archives not local to the minion (i.e. from the Salt - fileserver or a remote source such as ``http(s)`` or ``ftp``), Salt - will need to download the archive to the minion cache before they can - be extracted. After extraction, these source archives will be removed - unless this argument is set to ``True``. - trim_output : False Useful for archives with many files in them. This can either be set to ``True`` (in which case only the first 100 files extracted will be @@ -626,6 +648,21 @@ def extracted(name, # Remove pub kwargs as they're irrelevant here. kwargs = salt.utils.args.clean_kwargs(**kwargs) + if 'keep_source' in kwargs and 'keep' in kwargs: + ret.setdefault('warnings', []).append( + 'Both \'keep_source\' and \'keep\' were used. Since these both ' + 'do the same thing, \'keep\' was ignored.' + ) + keep_source = bool(kwargs.pop('keep_source')) + kwargs.pop('keep') + elif 'keep_source' in kwargs: + keep_source = bool(kwargs.pop('keep_source')) + elif 'keep' in kwargs: + keep_source = bool(kwargs.pop('keep')) + else: + # Neither was passed, default is True + keep_source = True + if not _path_is_abs(name): ret['comment'] = '{0} is not an absolute path'.format(name) return ret @@ -721,10 +758,10 @@ def extracted(name, urlparsed_source = _urlparse(source_match) source_hash_basename = urlparsed_source.path or urlparsed_source.netloc - source_is_local = urlparsed_source.scheme in ('', 'file') + source_is_local = urlparsed_source.scheme in salt.utils.files.LOCAL_PROTOS if source_is_local: # Get rid of "file://" from start of source_match - source_match = urlparsed_source.path + source_match = os.path.realpath(os.path.expanduser(urlparsed_source.path)) if not os.path.isfile(source_match): ret['comment'] = 'Source file \'{0}\' does not exist'.format(source_match) return ret @@ -858,95 +895,59 @@ def extracted(name, source_sum = {} if source_is_local: - cached_source = source_match - else: - cached_source = os.path.join( - __opts__['cachedir'], - 'files', - __env__, - re.sub(r'[:/\\]', '_', source_hash_basename), - ) - - if os.path.isdir(cached_source): - # Prevent a traceback from attempting to read from a directory path - salt.utils.files.rm_rf(cached_source) - - existing_cached_source_sum = _read_cached_checksum(cached_source) - - if source_is_local: - # No need to download archive, it's local to the minion - update_source = False + cached = source_match else: - if not os.path.isfile(cached_source): - # Archive not cached, we need to download it - update_source = True - else: - # Archive is cached, keep=True likely used in prior run. If we need - # to verify the hash, then we *have* to update the source archive - # to know whether or not the hash changed. Hence the below - # statement. bool(source_hash) will be True if source_hash was - # passed, and otherwise False. - update_source = bool(source_hash) - - if update_source: if __opts__['test']: ret['result'] = None ret['comment'] = ( - 'Archive {0} would be downloaded to cache and checked to ' - 'discover if extraction is necessary'.format( + 'Archive {0} would be cached (if necessary) and checked to ' + 'discover if extraction is needed'.format( salt.utils.url.redact_http_basic_auth(source_match) ) ) return ret - # NOTE: This will result in more than one copy of the source archive on - # the minion. The reason this is necessary is because if we are - # tracking the checksum using source_hash_update, we need a location - # where we can place the checksum file alongside the cached source - # file, where it won't be overwritten by caching a file with the same - # name in the same parent dir as the source file. Long term, we should - # come up with a better solution for this. - file_result = __states__['file.managed'](cached_source, - source=source_match, - source_hash=source_hash, - source_hash_name=source_hash_name, - makedirs=True, - skip_verify=skip_verify) - log.debug('file.managed: {0}'.format(file_result)) - - # Prevent a traceback if errors prevented the above state from getting - # off the ground. - if isinstance(file_result, list): - try: - ret['comment'] = '\n'.join(file_result) - except TypeError: - ret['comment'] = '\n'.join([str(x) for x in file_result]) + if 'file.cached' not in __states__: + # Shouldn't happen unless there is a traceback keeping + # salt/states/file.py from being processed through the loader. If + # that is the case, we have much more important problems as _all_ + # file states would be unavailable. + ret['comment'] = ( + 'Unable to cache {0}, file.cached state not available'.format( + source_match + ) + ) return ret try: - if not file_result['result']: - log.debug( - 'failed to download %s', - salt.utils.url.redact_http_basic_auth(source_match) - ) - return file_result - except TypeError: - if not file_result: - log.debug( - 'failed to download %s', - salt.utils.url.redact_http_basic_auth(source_match) - ) - return file_result + result = __states__['file.cached'](source_match, + source_hash=source_hash, + source_hash_name=source_hash_name, + skip_verify=skip_verify, + saltenv=__env__) + except Exception as exc: + msg = 'Failed to cache {0}: {1}'.format(source_match, exc.__str__()) + log.exception(msg) + ret['comment'] = msg + return ret + else: + log.debug('file.cached: {0}'.format(result)) - else: - log.debug( - 'Archive %s is already in cache', - salt.utils.url.redact_http_basic_auth(source_match) - ) + if result['result']: + # Get the path of the file in the minion cache + cached = __salt__['cp.is_cached'](source_match) + else: + log.debug( + 'failed to download %s', + salt.utils.url.redact_http_basic_auth(source_match) + ) + return result + + existing_cached_source_sum = _read_cached_checksum(cached) if source_hash and source_hash_update and not skip_verify: # Create local hash sum file if we're going to track sum update - _update_checksum(cached_source) + _update_checksum(cached) if archive_format == 'zip' and not password: log.debug('Checking %s to see if it is password-protected', @@ -955,7 +956,7 @@ def extracted(name, # implicitly enabled by setting the "options" argument. try: encrypted_zip = __salt__['archive.is_encrypted']( - cached_source, + cached, clean=False, saltenv=__env__) except CommandExecutionError: @@ -973,7 +974,7 @@ def extracted(name, return ret try: - contents = __salt__['archive.list'](cached_source, + contents = __salt__['archive.list'](cached, archive_format=archive_format, options=list_options, strip_components=strip_components, @@ -1142,7 +1143,7 @@ def extracted(name, if not extraction_needed \ and source_hash_update \ and existing_cached_source_sum is not None \ - and not _compare_checksum(cached_source, existing_cached_source_sum): + and not _compare_checksum(cached, existing_cached_source_sum): extraction_needed = True source_hash_trigger = True else: @@ -1200,13 +1201,13 @@ def extracted(name, __states__['file.directory'](name, user=user, makedirs=True) created_destdir = True - log.debug('Extracting {0} to {1}'.format(cached_source, name)) + log.debug('Extracting {0} to {1}'.format(cached, name)) try: if archive_format == 'zip': if use_cmd_unzip: try: files = __salt__['archive.cmd_unzip']( - cached_source, + cached, name, options=options, trim_output=trim_output, @@ -1216,7 +1217,7 @@ def extracted(name, ret['comment'] = exc.strerror return ret else: - files = __salt__['archive.unzip'](cached_source, + files = __salt__['archive.unzip'](cached, name, options=options, trim_output=trim_output, @@ -1225,7 +1226,7 @@ def extracted(name, **kwargs) elif archive_format == 'rar': try: - files = __salt__['archive.unrar'](cached_source, + files = __salt__['archive.unrar'](cached, name, trim_output=trim_output, **kwargs) @@ -1235,7 +1236,7 @@ def extracted(name, else: if options is None: try: - with closing(tarfile.open(cached_source, 'r')) as tar: + with closing(tarfile.open(cached, 'r')) as tar: tar.extractall(name) files = tar.getnames() if trim_output: @@ -1243,7 +1244,7 @@ def extracted(name, except tarfile.ReadError: if salt.utils.path.which('xz'): if __salt__['cmd.retcode']( - ['xz', '-t', cached_source], + ['xz', '-t', cached], python_shell=False, ignore_retcode=True) == 0: # XZ-compressed data @@ -1259,7 +1260,7 @@ def extracted(name, # pipe it to tar for extraction. cmd = 'xz --decompress --stdout {0} | tar xvf -' results = __salt__['cmd.run_all']( - cmd.format(_cmd_quote(cached_source)), + cmd.format(_cmd_quote(cached)), cwd=name, python_shell=True) if results['retcode'] != 0: @@ -1329,7 +1330,7 @@ def extracted(name, tar_cmd.append(tar_shortopts) tar_cmd.extend(tar_longopts) - tar_cmd.extend(['-f', cached_source]) + tar_cmd.extend(['-f', cached]) results = __salt__['cmd.run_all'](tar_cmd, cwd=name, @@ -1500,18 +1501,15 @@ def extracted(name, for item in enforce_failed: ret['comment'] += '\n- {0}'.format(item) - if not source_is_local and not keep: - for path in (cached_source, __salt__['cp.is_cached'](source_match)): - if not path: - continue - log.debug('Cleaning cached source file %s', path) - try: - os.remove(path) - except OSError as exc: - if exc.errno != errno.ENOENT: - log.error( - 'Failed to clean cached source file %s: %s', - cached_source, exc.__str__() - ) + if not source_is_local: + if keep_source: + log.debug('Keeping cached source file %s', cached) + else: + log.debug('Cleaning cached source file %s', cached) + result = __states__['file.not_cached'](source_match, saltenv=__env__) + if not result['result']: + # Don't let failure to delete cached file cause the state + # itself to fail, just drop it in the warnings. + ret.setdefault('warnings', []).append(result['comment']) return ret diff --git a/salt/states/file.py b/salt/states/file.py index 05801ff54425..1d89feb29570 100644 --- a/salt/states/file.py +++ b/salt/states/file.py @@ -299,6 +299,7 @@ def run(): # Import 3rd-party libs from salt.ext import six from salt.ext.six.moves import zip_longest +from salt.ext.six.moves.urllib.parse import urlparse as _urlparse # pylint: disable=no-name-in-module if salt.utils.platform.is_windows(): import pywintypes import win32com.client @@ -1530,6 +1531,7 @@ def managed(name, source=None, source_hash='', source_hash_name=None, + keep_source=True, user=None, group=None, mode=None, @@ -1729,6 +1731,15 @@ def managed(name, .. versionadded:: 2016.3.5 + keep_source : True + Set to ``False`` to discard the cached copy of the source file once the + state completes. This can be useful for larger files to keep them from + taking up space in minion cache. However, keep in mind that discarding + the source file will result in the state needing to re-download the + source file if the state is run again. + + .. versionadded:: 2017.7.3 + user The user to own the file, this defaults to the user salt is running as on the minion @@ -2440,8 +2451,9 @@ def managed(name, except Exception as exc: ret['changes'] = {} log.debug(traceback.format_exc()) - if os.path.isfile(tmp_filename): - os.remove(tmp_filename) + salt.utils.files.remove(tmp_filename) + if not keep_source and sfn: + salt.utils.files.remove(sfn) return _error(ret, 'Unable to check_cmd file: {0}'.format(exc)) # file being updated to verify using check_cmd @@ -2459,15 +2471,9 @@ def managed(name, cret = mod_run_check_cmd(check_cmd, tmp_filename, **check_cmd_opts) if isinstance(cret, dict): ret.update(cret) - if os.path.isfile(tmp_filename): - os.remove(tmp_filename) - if sfn and os.path.isfile(sfn): - os.remove(sfn) + salt.utils.files.remove(tmp_filename) return ret - if sfn and os.path.isfile(sfn): - os.remove(sfn) - # Since we generated a new tempfile and we are not returning here # lets change the original sfn to the new tempfile or else we will # get file not found @@ -2516,10 +2522,10 @@ def managed(name, log.debug(traceback.format_exc()) return _error(ret, 'Unable to manage file: {0}'.format(exc)) finally: - if tmp_filename and os.path.isfile(tmp_filename): - os.remove(tmp_filename) - if sfn and os.path.isfile(sfn): - os.remove(sfn) + if tmp_filename: + salt.utils.files.remove(tmp_filename) + if not keep_source and sfn: + salt.utils.files.remove(sfn) _RECURSE_TYPES = ['user', 'group', 'mode', 'ignore_files', 'ignore_dirs'] @@ -3048,6 +3054,7 @@ def directory(name, def recurse(name, source, + keep_source=True, clean=False, require=None, user=None, @@ -3080,6 +3087,15 @@ def recurse(name, located on the master in the directory named spam, and is called eggs, the source string is salt://spam/eggs + keep_source : True + Set to ``False`` to discard the cached copy of the source file once the + state completes. This can be useful for larger files to keep them from + taking up space in minion cache. However, keep in mind that discarding + the source file will result in the state needing to re-download the + source file if the state is run again. + + .. versionadded:: 2017.7.3 + clean Make sure that only files that are set up by salt and required by this function are kept. If this option is set then everything in this @@ -3360,6 +3376,7 @@ def manage_file(path, source, replace): _ret = managed( path, source=source, + keep_source=keep_source, user=user, group=group, mode='keep' if keep_mode else file_mode, @@ -6426,3 +6443,314 @@ def shortcut( ret['comment'] += (', but was unable to set ownership to ' '{0}'.format(user)) return ret + + +def cached(name, + source_hash='', + source_hash_name=None, + skip_verify=False, + saltenv='base'): + ''' + .. versionadded:: 2017.7.3 + + Ensures that a file is saved to the minion's cache. This state is primarily + invoked by other states to ensure that we do not re-download a source file + if we do not need to. + + name + The URL of the file to be cached. To cache a file from an environment + other than ``base``, either use the ``saltenv`` argument or include the + saltenv in the URL (e.g. ``salt://path/to/file.conf?saltenv=dev``). + + .. note:: + A list of URLs is not supported, this must be a single URL. If a + local file is passed here, then the state will obviously not try to + download anything, but it will compare a hash if one is specified. + + source_hash + See the documentation for this same argument in the + :py:func:`file.managed ` state. + + .. note:: + For remote files not originating from the ``salt://`` fileserver, + such as http(s) or ftp servers, this state will not re-download the + file if the locally-cached copy matches this hash. This is done to + prevent unnecessary downloading on repeated runs of this state. To + update the cached copy of a file, it is necessary to update this + hash. + + source_hash_name + See the documentation for this same argument in the + :py:func:`file.managed ` state. + + skip_verify + See the documentation for this same argument in the + :py:func:`file.managed ` state. + + .. note:: + Setting this to ``True`` will result in a copy of the file being + downloaded from a remote (http(s), ftp, etc.) source each time the + state is run. + + saltenv + Used to specify the environment from which to download a file from the + Salt fileserver (i.e. those with ``salt://`` URL). + + + This state will in most cases not be useful in SLS files, but it is useful + when writing a state or remote-execution module that needs to make sure + that a file at a given URL has been downloaded to the cachedir. One example + of this is in the :py:func:`archive.extracted ` + state: + + .. code-block:: python + + result = __states__['file.cached'](source_match, + source_hash=source_hash, + source_hash_name=source_hash_name, + skip_verify=skip_verify, + saltenv=__env__) + + This will return a dictionary containing the state's return data, including + a ``result`` key which will state whether or not the state was successful. + Note that this will not catch exceptions, so it is best used within a + try/except. + + Once this state has been run from within another state or remote-execution + module, the actual location of the cached file can be obtained using + :py:func:`cp.is_cached `: + + .. code-block:: python + + cached = __salt__['cp.is_cached'](source_match) + + This function will return the cached path of the file, or an empty string + if the file is not present in the minion cache. + ''' + ret = {'changes': {}, + 'comment': '', + 'name': name, + 'result': False} + + try: + parsed = _urlparse(name) + except Exception: + ret['comment'] = 'Only URLs or local file paths are valid input' + return ret + + # This if statement will keep the state from proceeding if a remote source + # is specified and no source_hash is presented (unless we're skipping hash + # verification). + if not skip_verify \ + and not source_hash \ + and parsed.scheme in salt.utils.files.REMOTE_PROTOS: + ret['comment'] = ( + 'Unable to verify upstream hash of source file {0}, please set ' + 'source_hash or set skip_verify to True'.format(name) + ) + return ret + + if source_hash: + # Get the hash and hash type from the input. This takes care of parsing + # the hash out of a file containing checksums, if that is how the + # source_hash was specified. + try: + source_sum = __salt__['file.get_source_sum']( + source=name, + source_hash=source_hash, + source_hash_name=source_hash_name, + saltenv=saltenv) + except CommandExecutionError as exc: + ret['comment'] = exc.strerror + return ret + else: + if not source_sum: + # We shouldn't get here, problems in retrieving the hash in + # file.get_source_sum should result in a CommandExecutionError + # being raised, which we catch above. Nevertheless, we should + # provide useful information in the event that + # file.get_source_sum regresses. + ret['comment'] = ( + 'Failed to get source hash from {0}. This may be a bug. ' + 'If this error persists, please report it and set ' + 'skip_verify to True to work around it.'.format(source_hash) + ) + return ret + else: + source_sum = {} + + if parsed.scheme in salt.utils.files.LOCAL_PROTOS: + # Source is a local file path + full_path = os.path.realpath(os.path.expanduser(parsed.path)) + if os.path.exists(full_path): + if not skip_verify and source_sum: + # Enforce the hash + local_hash = __salt__['file.get_hash']( + full_path, + source_sum.get('hash_type', __opts__['hash_type'])) + if local_hash == source_sum['hsum']: + ret['result'] = True + ret['comment'] = ( + 'File {0} is present on the minion and has hash ' + '{1}'.format(full_path, local_hash) + ) + else: + ret['comment'] = ( + 'File {0} is present on the minion, but the hash ({1}) ' + 'does not match the specified hash ({2})'.format( + full_path, local_hash, source_sum['hsum'] + ) + ) + return ret + else: + ret['result'] = True + ret['comment'] = 'File {0} is present on the minion'.format( + full_path + ) + return ret + else: + ret['comment'] = 'File {0} is not present on the minion'.format( + full_path + ) + return ret + + local_copy = __salt__['cp.is_cached'](name, saltenv=saltenv) + + if local_copy: + # File is already cached + pre_hash = __salt__['file.get_hash']( + local_copy, + source_sum.get('hash_type', __opts__['hash_type'])) + + if not skip_verify and source_sum: + # Get the local copy's hash to compare with the hash that was + # specified via source_hash. If it matches, we can exit early from + # the state without going any further, because the file is cached + # with the correct hash. + if pre_hash == source_sum['hsum']: + ret['result'] = True + ret['comment'] = ( + 'File is already cached to {0} with hash {1}'.format( + local_copy, pre_hash + ) + ) + else: + pre_hash = None + + # Cache the file. Note that this will not actually download the file if + # either of the following is true: + # 1. source is a salt:// URL and the fileserver determines that the hash + # of the minion's copy matches that of the fileserver. + # 2. File is remote (http(s), ftp, etc.) and the specified source_hash + # matches the cached copy. + # Remote, non salt:// sources _will_ download if a copy of the file was + # not already present in the minion cache. + try: + local_copy = __salt__['cp.cache_file']( + name, + saltenv=saltenv, + source_hash=source_sum.get('hsum')) + except Exception as exc: + ret['comment'] = exc.__str__() + return ret + + if not local_copy: + ret['comment'] = ( + 'Failed to cache {0}, check minion log for more ' + 'information'.format(name) + ) + return ret + + post_hash = __salt__['file.get_hash']( + local_copy, + source_sum.get('hash_type', __opts__['hash_type'])) + + if pre_hash != post_hash: + ret['changes']['hash'] = {'old': pre_hash, 'new': post_hash} + + # Check the hash, if we're enforcing one. Note that this will be the first + # hash check if the file was not previously cached, and the 2nd hash check + # if it was cached and the + if not skip_verify and source_sum: + if post_hash == source_sum['hsum']: + ret['result'] = True + ret['comment'] = ( + 'File is already cached to {0} with hash {1}'.format( + local_copy, post_hash + ) + ) + else: + ret['comment'] = ( + 'File is cached to {0}, but the hash ({1}) does not match ' + 'the specified hash ({2})'.format( + local_copy, post_hash, source_sum['hsum'] + ) + ) + return ret + + # We're not enforcing a hash, and we already know that the file was + # successfully cached, so we know the state was successful. + ret['result'] = True + ret['comment'] = 'File is cached to {0}'.format(local_copy) + return ret + + +def not_cached(name, saltenv='base'): + ''' + .. versionadded:: 2017.7.3 + + Ensures that a file is saved to the minion's cache. This state is primarily + invoked by other states to ensure that we do not re-download a source file + if we do not need to. + + name + The URL of the file to be cached. To cache a file from an environment + other than ``base``, either use the ``saltenv`` argument or include the + saltenv in the URL (e.g. ``salt://path/to/file.conf?saltenv=dev``). + + .. note:: + A list of URLs is not supported, this must be a single URL. If a + local file is passed here, the state will take no action. + + saltenv + Used to specify the environment from which to download a file from the + Salt fileserver (i.e. those with ``salt://`` URL). + ''' + ret = {'changes': {}, + 'comment': '', + 'name': name, + 'result': False} + + try: + parsed = _urlparse(name) + except Exception: + ret['comment'] = 'Only URLs or local file paths are valid input' + return ret + else: + if parsed.scheme in salt.utils.files.LOCAL_PROTOS: + full_path = os.path.realpath(os.path.expanduser(parsed.path)) + ret['result'] = True + ret['comment'] = ( + 'File {0} is a local path, no action taken'.format( + full_path + ) + ) + return ret + + local_copy = __salt__['cp.is_cached'](name, saltenv=saltenv) + + if local_copy: + try: + os.remove(local_copy) + except Exception as exc: + ret['comment'] = 'Failed to delete {0}: {1}'.format( + local_copy, exc.__str__() + ) + else: + ret['result'] = True + ret['changes']['deleted'] = True + ret['comment'] = '{0} was deleted'.format(local_copy) + else: + ret['result'] = True + ret['comment'] = '{0} is not cached'.format(name) + return ret diff --git a/salt/utils/files.py b/salt/utils/files.py index 1d7068987a21..c55ac86324d5 100644 --- a/salt/utils/files.py +++ b/salt/utils/files.py @@ -35,10 +35,21 @@ log = logging.getLogger(__name__) +LOCAL_PROTOS = ('', 'file') REMOTE_PROTOS = ('http', 'https', 'ftp', 'swift', 's3') VALID_PROTOS = ('salt', 'file') + REMOTE_PROTOS TEMPFILE_PREFIX = '__salt.tmp.' +HASHES = { + 'sha512': 128, + 'sha384': 96, + 'sha256': 64, + 'sha224': 56, + 'sha1': 40, + 'md5': 32, +} +HASHES_REVMAP = dict([(y, x) for x, y in six.iteritems(HASHES)]) + def guess_archive_type(name): ''' @@ -538,3 +549,14 @@ def is_text_file(fp_, blocksize=512): nontext = block.translate(None, text_characters) return float(len(nontext)) / len(block) <= 0.30 + + +def remove(path): + ''' + Runs os.remove(path) and suppresses the OSError if the file doesn't exist + ''' + try: + os.remove(path) + except OSError as exc: + if exc.errno != errno.ENOENT: + raise