Skip to content

Commit

Permalink
Merge pull request #43518 from terminalmage/issue38971
Browse files Browse the repository at this point in the history
Reduce unnecessary file downloading in archive/file states
  • Loading branch information
Mike Place authored Sep 22, 2017
2 parents 530a6bb + 49bb3b0 commit 7e3a6b9
Show file tree
Hide file tree
Showing 7 changed files with 588 additions and 175 deletions.
25 changes: 20 additions & 5 deletions salt/fileclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,13 @@ def file_list_emptydirs(self, saltenv=u'base', prefix=u''):
'''
raise NotImplementedError

def cache_file(self, path, saltenv=u'base', cachedir=None):
def cache_file(self, path, saltenv=u'base', cachedir=None, source_hash=None):
'''
Pull a file down from the file server and store it in the minion
file cache
'''
return self.get_url(path, u'', True, saltenv, cachedir=cachedir)
return self.get_url(
path, u'', True, saltenv, cachedir=cachedir, source_hash=source_hash)

def cache_files(self, paths, saltenv=u'base', cachedir=None):
'''
Expand Down Expand Up @@ -470,7 +471,7 @@ def get_dir(self, path, dest=u'', saltenv=u'base', gzip=None,
return ret

def get_url(self, url, dest, makedirs=False, saltenv=u'base',
no_cache=False, cachedir=None):
no_cache=False, cachedir=None, source_hash=None):
'''
Get a single file from a URL.
'''
Expand Down Expand Up @@ -525,14 +526,28 @@ def get_url(self, url, dest, makedirs=False, saltenv=u'base',
return u''
elif not no_cache:
dest = self._extrn_path(url, saltenv, cachedir=cachedir)
if source_hash is not None:
try:
source_hash = source_hash.split('=')[-1]
form = salt.utils.files.HASHES_REVMAP[len(source_hash)]
if salt.utils.get_hash(dest, form) == source_hash:
log.debug(
'Cached copy of %s (%s) matches source_hash %s, '
'skipping download', url, dest, source_hash
)
return dest
except (AttributeError, KeyError, IOError, OSError):
pass
destdir = os.path.dirname(dest)
if not os.path.isdir(destdir):
os.makedirs(destdir)

if url_data.scheme == u's3':
try:
def s3_opt(key, default=None):
u'''Get value of s3.<key> from Minion config or from Pillar'''
'''
Get value of s3.<key> from Minion config or from Pillar
'''
if u's3.' + key in self.opts:
return self.opts[u's3.' + key]
try:
Expand Down Expand Up @@ -785,7 +800,7 @@ def get_template(

def _extrn_path(self, url, saltenv, cachedir=None):
'''
Return the extn_filepath for a given url
Return the extrn_filepath for a given url
'''
url_data = urlparse(url)
if salt.utils.platform.is_windows():
Expand Down
31 changes: 27 additions & 4 deletions salt/modules/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ def list_(name,
strip_components=None,
clean=False,
verbose=False,
saltenv='base'):
saltenv='base',
source_hash=None):
'''
.. versionadded:: 2016.11.0
.. versionchanged:: 2016.11.2
Expand Down Expand Up @@ -149,6 +150,14 @@ def list_(name,
``archive``. This is only applicable when ``archive`` is a file from
the ``salt://`` fileserver.
source_hash
If ``name`` is an http(s)/ftp URL and the file exists in the minion's
file cache, this option can be passed to keep the minion from
re-downloading the archive if the cached copy matches the specified
hash.
.. versionadded:: Oxygen
.. _tarfile: https://docs.python.org/2/library/tarfile.html
.. _xz: http://tukaani.org/xz/
Expand All @@ -160,6 +169,7 @@ def list_(name,
salt '*' archive.list /path/to/myfile.tar.gz strip_components=1
salt '*' archive.list salt://foo.tar.gz
salt '*' archive.list https://domain.tld/myfile.zip
salt '*' archive.list https://domain.tld/myfile.zip source_hash=f1d2d2f924e986ac86fdf7b36c94bcdf32beec15
salt '*' archive.list ftp://10.1.2.3/foo.rar
'''
def _list_tar(name, cached, decompress_cmd, failhard=False):
Expand Down Expand Up @@ -309,7 +319,7 @@ def _list_rar(name, cached):
)
return dirs, files, []

cached = __salt__['cp.cache_file'](name, saltenv)
cached = __salt__['cp.cache_file'](name, saltenv, source_hash=source_hash)
if not cached:
raise CommandExecutionError('Failed to cache {0}'.format(name))

Expand Down Expand Up @@ -1094,7 +1104,7 @@ def unzip(zip_file,
return _trim_files(cleaned_files, trim_output)


def is_encrypted(name, clean=False, saltenv='base'):
def is_encrypted(name, clean=False, saltenv='base', source_hash=None):
'''
.. versionadded:: 2016.11.0
Expand All @@ -1113,6 +1123,18 @@ def is_encrypted(name, clean=False, saltenv='base'):
If there is an error listing the archive's contents, the cached
file will not be removed, to allow for troubleshooting.
saltenv : base
Specifies the fileserver environment from which to retrieve
``archive``. This is only applicable when ``archive`` is a file from
the ``salt://`` fileserver.
source_hash
If ``name`` is an http(s)/ftp URL and the file exists in the minion's
file cache, this option can be passed to keep the minion from
re-downloading the archive if the cached copy matches the specified
hash.
.. versionadded:: Oxygen
CLI Examples:
Expand All @@ -1122,9 +1144,10 @@ def is_encrypted(name, clean=False, saltenv='base'):
salt '*' archive.is_encrypted salt://foo.zip
salt '*' archive.is_encrypted salt://foo.zip saltenv=dev
salt '*' archive.is_encrypted https://domain.tld/myfile.zip clean=True
salt '*' archive.is_encrypted https://domain.tld/myfile.zip source_hash=f1d2d2f924e986ac86fdf7b36c94bcdf32beec15
salt '*' archive.is_encrypted ftp://10.1.2.3/foo.zip
'''
cached = __salt__['cp.cache_file'](name, saltenv)
cached = __salt__['cp.cache_file'](name, saltenv, source_hash=source_hash)
if not cached:
raise CommandExecutionError('Failed to cache {0}'.format(name))

Expand Down
28 changes: 22 additions & 6 deletions salt/modules/cp.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def get_dir(path, dest, saltenv='base', template=None, gzip=None, **kwargs):
return _client().get_dir(path, dest, saltenv, gzip)


def get_url(path, dest='', saltenv='base', makedirs=False):
def get_url(path, dest='', saltenv='base', makedirs=False, source_hash=None):
'''
.. versionchanged:: Oxygen
``dest`` can now be a directory
Expand Down Expand Up @@ -386,6 +386,13 @@ def get_url(path, dest='', saltenv='base', makedirs=False):
Salt fileserver envrionment from which to retrieve the file. Ignored if
``path`` is not a ``salt://`` URL.
source_hash
If ``path`` is an http(s) or ftp URL and the file exists in the
minion's file cache, this option can be passed to keep the minion from
re-downloading the file if the cached copy matches the specified hash.
.. versionadded:: Oxygen
CLI Example:
.. code-block:: bash
Expand All @@ -394,9 +401,11 @@ def get_url(path, dest='', saltenv='base', makedirs=False):
salt '*' cp.get_url http://www.slashdot.org /tmp/index.html
'''
if isinstance(dest, six.string_types):
result = _client().get_url(path, dest, makedirs, saltenv)
result = _client().get_url(
path, dest, makedirs, saltenv, source_hash=source_hash)
else:
result = _client().get_url(path, None, makedirs, saltenv, no_cache=True)
result = _client().get_url(
path, None, makedirs, saltenv, no_cache=True, source_hash=source_hash)
if not result:
log.error(
'Unable to fetch file {0} from saltenv {1}.'.format(
Expand Down Expand Up @@ -429,11 +438,18 @@ def get_file_str(path, saltenv='base'):
return fn_


def cache_file(path, saltenv='base'):
def cache_file(path, saltenv='base', source_hash=None):
'''
Used to cache a single file on the Minion
Returns the location of the new cached file on the Minion.
Returns the location of the new cached file on the Minion
source_hash
If ``name`` is an http(s) or ftp URL and the file exists in the
minion's file cache, this option can be passed to keep the minion from
re-downloading the file if the cached copy matches the specified hash.
.. versionadded:: Oxygen
CLI Example:
Expand Down Expand Up @@ -485,7 +501,7 @@ def cache_file(path, saltenv='base'):
if senv:
saltenv = senv

result = _client().cache_file(path, saltenv)
result = _client().cache_file(path, saltenv, source_hash=source_hash)
if not result:
log.error(
u'Unable to cache file \'%s\' from saltenv \'%s\'.',
Expand Down
69 changes: 40 additions & 29 deletions salt/modules/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,23 +60,14 @@
import salt.utils.templates
import salt.utils.url
from salt.exceptions import CommandExecutionError, MinionError, SaltInvocationError, get_error_message as _get_error_message
from salt.utils.files import HASHES, HASHES_REVMAP

log = logging.getLogger(__name__)

__func_alias__ = {
'makedirs_': 'makedirs'
}

HASHES = {
'sha512': 128,
'sha384': 96,
'sha256': 64,
'sha224': 56,
'sha1': 40,
'md5': 32,
}
HASHES_REVMAP = dict([(y, x) for x, y in six.iteritems(HASHES)])


def __virtual__():
'''
Expand Down Expand Up @@ -3767,14 +3758,8 @@ def source_list(source, source_hash, saltenv):
ret = (single_src, single_hash)
break
elif proto.startswith('http') or proto == 'ftp':
try:
if __salt__['cp.cache_file'](single_src):
ret = (single_src, single_hash)
break
except MinionError as exc:
# Error downloading file. Log the caught exception and
# continue on to the next source.
log.exception(exc)
ret = (single_src, single_hash)
break
elif proto == 'file' and os.path.exists(urlparsed_single_src.path):
ret = (single_src, single_hash)
break
Expand All @@ -3794,9 +3779,8 @@ def source_list(source, source_hash, saltenv):
ret = (single, source_hash)
break
elif proto.startswith('http') or proto == 'ftp':
if __salt__['cp.cache_file'](single):
ret = (single, source_hash)
break
ret = (single, source_hash)
break
elif single.startswith('/') and os.path.exists(single):
ret = (single, source_hash)
break
Expand Down Expand Up @@ -4007,11 +3991,14 @@ def _get_local_file_source_sum(path):
else:
sfn = cached_dest

# If we didn't have the template or remote file, let's get it
# Similarly when the file has been updated and the cache has to be refreshed
# If we didn't have the template or remote file, or the file has been
# updated and the cache has to be refreshed, download the file.
if not sfn or cache_refetch:
try:
sfn = __salt__['cp.cache_file'](source, saltenv)
sfn = __salt__['cp.cache_file'](
source,
saltenv,
source_hash=source_sum.get('hsum'))
except Exception as exc:
# A 404 or other error code may raise an exception, catch it
# and return a comment that will fail the calling state.
Expand Down Expand Up @@ -4675,15 +4662,18 @@ def check_file_meta(
'''
changes = {}
if not source_sum:
source_sum = dict()
source_sum = {}
lstats = stats(name, hash_type=source_sum.get('hash_type', None), follow_symlinks=False)
if not lstats:
changes['newfile'] = name
return changes
if 'hsum' in source_sum:
if source_sum['hsum'] != lstats['sum']:
if not sfn and source:
sfn = __salt__['cp.cache_file'](source, saltenv)
sfn = __salt__['cp.cache_file'](
source,
saltenv,
source_hash=source_sum['hsum'])
if sfn:
try:
changes['diff'] = get_diff(
Expand Down Expand Up @@ -4750,7 +4740,9 @@ def get_diff(file1,
saltenv='base',
show_filenames=True,
show_changes=True,
template=False):
template=False,
source_hash_file1=None,
source_hash_file2=None):
'''
Return unified diff of two files
Expand Down Expand Up @@ -4785,6 +4777,22 @@ def get_diff(file1,
.. versionadded:: Oxygen
source_hash_file1
If ``file1`` is an http(s)/ftp URL and the file exists in the minion's
file cache, this option can be passed to keep the minion from
re-downloading the archive if the cached copy matches the specified
hash.
.. versionadded:: Oxygen
source_hash_file2
If ``file2`` is an http(s)/ftp URL and the file exists in the minion's
file cache, this option can be passed to keep the minion from
re-downloading the archive if the cached copy matches the specified
hash.
.. versionadded:: Oxygen
CLI Examples:
.. code-block:: bash
Expand All @@ -4793,14 +4801,17 @@ def get_diff(file1,
salt '*' file.get_diff /tmp/foo.txt /tmp/bar.txt
'''
files = (file1, file2)
source_hashes = (source_hash_file1, source_hash_file2)
paths = []
errors = []

for filename in files:
for filename, source_hash in zip(files, source_hashes):
try:
# Local file paths will just return the same path back when passed
# to cp.cache_file.
cached_path = __salt__['cp.cache_file'](filename, saltenv)
cached_path = __salt__['cp.cache_file'](filename,
saltenv,
source_hash=source_hash)
if cached_path is False:
errors.append(
u'File {0} not found'.format(
Expand Down
Loading

0 comments on commit 7e3a6b9

Please sign in to comment.