Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce unnecessary file downloading in archive/file states #43518

Merged
merged 6 commits into from
Sep 22, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions salt/fileclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,13 @@ def file_list_emptydirs(self, saltenv=u'base', prefix=u''):
'''
raise NotImplementedError

def cache_file(self, path, saltenv=u'base', cachedir=None):
def cache_file(self, path, saltenv=u'base', cachedir=None, source_hash=None):
'''
Pull a file down from the file server and store it in the minion
file cache
'''
return self.get_url(path, u'', True, saltenv, cachedir=cachedir)
return self.get_url(
path, u'', True, saltenv, cachedir=cachedir, source_hash=source_hash)

def cache_files(self, paths, saltenv=u'base', cachedir=None):
'''
Expand Down Expand Up @@ -470,7 +471,7 @@ def get_dir(self, path, dest=u'', saltenv=u'base', gzip=None,
return ret

def get_url(self, url, dest, makedirs=False, saltenv=u'base',
no_cache=False, cachedir=None):
no_cache=False, cachedir=None, source_hash=None):
'''
Get a single file from a URL.
'''
Expand Down Expand Up @@ -525,14 +526,28 @@ def get_url(self, url, dest, makedirs=False, saltenv=u'base',
return u''
elif not no_cache:
dest = self._extrn_path(url, saltenv, cachedir=cachedir)
if source_hash is not None:
try:
source_hash = source_hash.split('=')[-1]
form = salt.utils.files.HASHES_REVMAP[len(source_hash)]
if salt.utils.get_hash(dest, form) == source_hash:
log.debug(
'Cached copy of %s (%s) matches source_hash %s, '
'skipping download', url, dest, source_hash
)
return dest
except (AttributeError, KeyError, IOError, OSError):
pass
destdir = os.path.dirname(dest)
if not os.path.isdir(destdir):
os.makedirs(destdir)

if url_data.scheme == u's3':
try:
def s3_opt(key, default=None):
u'''Get value of s3.<key> from Minion config or from Pillar'''
'''
Get value of s3.<key> from Minion config or from Pillar
'''
if u's3.' + key in self.opts:
return self.opts[u's3.' + key]
try:
Expand Down Expand Up @@ -785,7 +800,7 @@ def get_template(

def _extrn_path(self, url, saltenv, cachedir=None):
'''
Return the extn_filepath for a given url
Return the extrn_filepath for a given url
'''
url_data = urlparse(url)
if salt.utils.platform.is_windows():
Expand Down
31 changes: 27 additions & 4 deletions salt/modules/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ def list_(name,
strip_components=None,
clean=False,
verbose=False,
saltenv='base'):
saltenv='base',
source_hash=None):
'''
.. versionadded:: 2016.11.0
.. versionchanged:: 2016.11.2
Expand Down Expand Up @@ -149,6 +150,14 @@ def list_(name,
``archive``. This is only applicable when ``archive`` is a file from
the ``salt://`` fileserver.
source_hash
If ``name`` is an http(s)/ftp URL and the file exists in the minion's
file cache, this option can be passed to keep the minion from
re-downloading the archive if the cached copy matches the specified
hash.
.. versionadded:: Oxygen
.. _tarfile: https://docs.python.org/2/library/tarfile.html
.. _xz: http://tukaani.org/xz/
Expand All @@ -160,6 +169,7 @@ def list_(name,
salt '*' archive.list /path/to/myfile.tar.gz strip_components=1
salt '*' archive.list salt://foo.tar.gz
salt '*' archive.list https://domain.tld/myfile.zip
salt '*' archive.list https://domain.tld/myfile.zip source_hash=f1d2d2f924e986ac86fdf7b36c94bcdf32beec15
salt '*' archive.list ftp://10.1.2.3/foo.rar
'''
def _list_tar(name, cached, decompress_cmd, failhard=False):
Expand Down Expand Up @@ -309,7 +319,7 @@ def _list_rar(name, cached):
)
return dirs, files, []

cached = __salt__['cp.cache_file'](name, saltenv)
cached = __salt__['cp.cache_file'](name, saltenv, source_hash=source_hash)
if not cached:
raise CommandExecutionError('Failed to cache {0}'.format(name))

Expand Down Expand Up @@ -1094,7 +1104,7 @@ def unzip(zip_file,
return _trim_files(cleaned_files, trim_output)


def is_encrypted(name, clean=False, saltenv='base'):
def is_encrypted(name, clean=False, saltenv='base', source_hash=None):
'''
.. versionadded:: 2016.11.0
Expand All @@ -1113,6 +1123,18 @@ def is_encrypted(name, clean=False, saltenv='base'):
If there is an error listing the archive's contents, the cached
file will not be removed, to allow for troubleshooting.
saltenv : base
Specifies the fileserver environment from which to retrieve
``archive``. This is only applicable when ``archive`` is a file from
the ``salt://`` fileserver.
source_hash
If ``name`` is an http(s)/ftp URL and the file exists in the minion's
file cache, this option can be passed to keep the minion from
re-downloading the archive if the cached copy matches the specified
hash.
.. versionadded:: Oxygen
CLI Examples:
Expand All @@ -1122,9 +1144,10 @@ def is_encrypted(name, clean=False, saltenv='base'):
salt '*' archive.is_encrypted salt://foo.zip
salt '*' archive.is_encrypted salt://foo.zip saltenv=dev
salt '*' archive.is_encrypted https://domain.tld/myfile.zip clean=True
salt '*' archive.is_encrypted https://domain.tld/myfile.zip source_hash=f1d2d2f924e986ac86fdf7b36c94bcdf32beec15
salt '*' archive.is_encrypted ftp://10.1.2.3/foo.zip
'''
cached = __salt__['cp.cache_file'](name, saltenv)
cached = __salt__['cp.cache_file'](name, saltenv, source_hash=source_hash)
if not cached:
raise CommandExecutionError('Failed to cache {0}'.format(name))

Expand Down
28 changes: 22 additions & 6 deletions salt/modules/cp.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def get_dir(path, dest, saltenv='base', template=None, gzip=None, **kwargs):
return _client().get_dir(path, dest, saltenv, gzip)


def get_url(path, dest='', saltenv='base', makedirs=False):
def get_url(path, dest='', saltenv='base', makedirs=False, source_hash=None):
'''
.. versionchanged:: Oxygen
``dest`` can now be a directory
Expand Down Expand Up @@ -386,6 +386,13 @@ def get_url(path, dest='', saltenv='base', makedirs=False):
Salt fileserver envrionment from which to retrieve the file. Ignored if
``path`` is not a ``salt://`` URL.
source_hash
If ``path`` is an http(s) or ftp URL and the file exists in the
minion's file cache, this option can be passed to keep the minion from
re-downloading the file if the cached copy matches the specified hash.
.. versionadded:: Oxygen
CLI Example:
.. code-block:: bash
Expand All @@ -394,9 +401,11 @@ def get_url(path, dest='', saltenv='base', makedirs=False):
salt '*' cp.get_url http://www.slashdot.org /tmp/index.html
'''
if isinstance(dest, six.string_types):
result = _client().get_url(path, dest, makedirs, saltenv)
result = _client().get_url(
path, dest, makedirs, saltenv, source_hash=source_hash)
else:
result = _client().get_url(path, None, makedirs, saltenv, no_cache=True)
result = _client().get_url(
path, None, makedirs, saltenv, no_cache=True, source_hash=source_hash)
if not result:
log.error(
'Unable to fetch file {0} from saltenv {1}.'.format(
Expand Down Expand Up @@ -429,11 +438,18 @@ def get_file_str(path, saltenv='base'):
return fn_


def cache_file(path, saltenv='base'):
def cache_file(path, saltenv='base', source_hash=None):
'''
Used to cache a single file on the Minion
Returns the location of the new cached file on the Minion.
Returns the location of the new cached file on the Minion
source_hash
If ``name`` is an http(s) or ftp URL and the file exists in the
minion's file cache, this option can be passed to keep the minion from
re-downloading the file if the cached copy matches the specified hash.
.. versionadded:: Oxygen
CLI Example:
Expand Down Expand Up @@ -485,7 +501,7 @@ def cache_file(path, saltenv='base'):
if senv:
saltenv = senv

result = _client().cache_file(path, saltenv)
result = _client().cache_file(path, saltenv, source_hash=source_hash)
if not result:
log.error(
u'Unable to cache file \'%s\' from saltenv \'%s\'.',
Expand Down
69 changes: 40 additions & 29 deletions salt/modules/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,23 +60,14 @@
import salt.utils.templates
import salt.utils.url
from salt.exceptions import CommandExecutionError, MinionError, SaltInvocationError, get_error_message as _get_error_message
from salt.utils.files import HASHES, HASHES_REVMAP

log = logging.getLogger(__name__)

__func_alias__ = {
'makedirs_': 'makedirs'
}

HASHES = {
'sha512': 128,
'sha384': 96,
'sha256': 64,
'sha224': 56,
'sha1': 40,
'md5': 32,
}
HASHES_REVMAP = dict([(y, x) for x, y in six.iteritems(HASHES)])


def __virtual__():
'''
Expand Down Expand Up @@ -3767,14 +3758,8 @@ def source_list(source, source_hash, saltenv):
ret = (single_src, single_hash)
break
elif proto.startswith('http') or proto == 'ftp':
try:
if __salt__['cp.cache_file'](single_src):
ret = (single_src, single_hash)
break
except MinionError as exc:
# Error downloading file. Log the caught exception and
# continue on to the next source.
log.exception(exc)
ret = (single_src, single_hash)
break
elif proto == 'file' and os.path.exists(urlparsed_single_src.path):
ret = (single_src, single_hash)
break
Expand All @@ -3794,9 +3779,8 @@ def source_list(source, source_hash, saltenv):
ret = (single, source_hash)
break
elif proto.startswith('http') or proto == 'ftp':
if __salt__['cp.cache_file'](single):
ret = (single, source_hash)
break
ret = (single, source_hash)
break
elif single.startswith('/') and os.path.exists(single):
ret = (single, source_hash)
break
Expand Down Expand Up @@ -4007,11 +3991,14 @@ def _get_local_file_source_sum(path):
else:
sfn = cached_dest

# If we didn't have the template or remote file, let's get it
# Similarly when the file has been updated and the cache has to be refreshed
# If we didn't have the template or remote file, or the file has been
# updated and the cache has to be refreshed, download the file.
if not sfn or cache_refetch:
try:
sfn = __salt__['cp.cache_file'](source, saltenv)
sfn = __salt__['cp.cache_file'](
source,
saltenv,
source_hash=source_sum.get('hsum'))
except Exception as exc:
# A 404 or other error code may raise an exception, catch it
# and return a comment that will fail the calling state.
Expand Down Expand Up @@ -4675,15 +4662,18 @@ def check_file_meta(
'''
changes = {}
if not source_sum:
source_sum = dict()
source_sum = {}
lstats = stats(name, hash_type=source_sum.get('hash_type', None), follow_symlinks=False)
if not lstats:
changes['newfile'] = name
return changes
if 'hsum' in source_sum:
if source_sum['hsum'] != lstats['sum']:
if not sfn and source:
sfn = __salt__['cp.cache_file'](source, saltenv)
sfn = __salt__['cp.cache_file'](
source,
saltenv,
source_hash=source_sum['hsum'])
if sfn:
try:
changes['diff'] = get_diff(
Expand Down Expand Up @@ -4750,7 +4740,9 @@ def get_diff(file1,
saltenv='base',
show_filenames=True,
show_changes=True,
template=False):
template=False,
source_hash_file1=None,
source_hash_file2=None):
'''
Return unified diff of two files
Expand Down Expand Up @@ -4785,6 +4777,22 @@ def get_diff(file1,
.. versionadded:: Oxygen
source_hash_file1
If ``file1`` is an http(s)/ftp URL and the file exists in the minion's
file cache, this option can be passed to keep the minion from
re-downloading the archive if the cached copy matches the specified
hash.
.. versionadded:: Oxygen
source_hash_file2
If ``file2`` is an http(s)/ftp URL and the file exists in the minion's
file cache, this option can be passed to keep the minion from
re-downloading the archive if the cached copy matches the specified
hash.
.. versionadded:: Oxygen
CLI Examples:
.. code-block:: bash
Expand All @@ -4793,14 +4801,17 @@ def get_diff(file1,
salt '*' file.get_diff /tmp/foo.txt /tmp/bar.txt
'''
files = (file1, file2)
source_hashes = (source_hash_file1, source_hash_file2)
paths = []
errors = []

for filename in files:
for filename, source_hash in zip(files, source_hashes):
try:
# Local file paths will just return the same path back when passed
# to cp.cache_file.
cached_path = __salt__['cp.cache_file'](filename, saltenv)
cached_path = __salt__['cp.cache_file'](filename,
saltenv,
source_hash=source_hash)
if cached_path is False:
errors.append(
u'File {0} not found'.format(
Expand Down
Loading