Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] Add Etag support for file.managed web sources #61391

Merged
merged 9 commits into from
Jan 10, 2022
1 change: 1 addition & 0 deletions changelog/61270.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add Etag support for file.managed web sources
30 changes: 28 additions & 2 deletions salt/fileclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,13 @@ def file_list_emptydirs(self, saltenv="base", prefix=""):
raise NotImplementedError

def cache_file(
self, path, saltenv="base", cachedir=None, source_hash=None, verify_ssl=True
self,
path,
saltenv="base",
cachedir=None,
source_hash=None,
verify_ssl=True,
use_etag=False,
):
"""
Pull a file down from the file server and store it in the minion
Expand All @@ -185,6 +191,7 @@ def cache_file(
cachedir=cachedir,
source_hash=source_hash,
verify_ssl=verify_ssl,
use_etag=use_etag,
)

def cache_files(self, paths, saltenv="base", cachedir=None):
Expand Down Expand Up @@ -465,6 +472,7 @@ def get_url(
cachedir=None,
source_hash=None,
verify_ssl=True,
use_etag=False,
):
"""
Get a single file from a URL.
Expand Down Expand Up @@ -642,6 +650,7 @@ def swift_opt(key, default):
fixed_url = url

destfp = None
dest_etag = "{}.etag".format(dest)
try:
# Tornado calls streaming_callback on redirect response bodies.
# But we need streaming to support fetching large files (> RAM
Expand Down Expand Up @@ -687,7 +696,10 @@ def on_header(hdr):
# Try to find out what content type encoding is used if
# this is a text file
write_body[1].parse_line(hdr) # pylint: disable=no-member
if "Content-Type" in write_body[1]:
if use_etag and "Etag" in write_body[1]:
with salt.utils.files.fopen(dest_etag, "w") as etagfp:
etag = etagfp.write(write_body[1].get("Etag"))
elif "Content-Type" in write_body[1]:
content_type = write_body[1].get(
"Content-Type"
) # pylint: disable=no-member
Expand Down Expand Up @@ -744,6 +756,14 @@ def on_chunk(chunk):
if write_body[0]:
destfp.write(chunk)

# ETag is only used for refetch. Cached file and previous ETag
# should be present for verification.
header_dict = {}
if use_etag and os.path.exists(dest_etag) and os.path.exists(dest):
with salt.utils.files.fopen(dest_etag, "r") as etagfp:
etag = etagfp.read().replace("\n", "").strip()
header_dict["If-None-Match"] = etag

query = salt.utils.http.query(
fixed_url,
stream=True,
Expand All @@ -753,8 +773,14 @@ def on_chunk(chunk):
password=url_data.password,
opts=self.opts,
verify_ssl=verify_ssl,
header_dict=header_dict,
**get_kwargs
)

# 304 Not Modified is returned when If-None-Match header
# matches server ETag for requested file.
if use_etag and query.get("status") == 304:
return dest
if "handle" not in query:
raise MinionError(
"Error: {} reading {}".format(query["error"], url_data.path)
Expand Down
15 changes: 12 additions & 3 deletions salt/modules/cp.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def get_file_str(path, saltenv="base"):
return fn_


def cache_file(path, saltenv="base", source_hash=None, verify_ssl=True):
def cache_file(path, saltenv="base", source_hash=None, verify_ssl=True, use_etag=False):
"""
Used to cache a single file on the Minion

Expand All @@ -443,6 +443,15 @@ def cache_file(path, saltenv="base", source_hash=None, verify_ssl=True):

.. versionadded:: 3002

use_etag
If ``True``, remote http/https file sources will attempt to use the
ETag header to determine if the remote file needs to be downloaded.
This provides a lightweight mechanism for promptly refreshing files
changed on a web server without requiring a full hash comparison via
the ``source_hash`` parameter.

.. versionadded:: 3005

CLI Example:

.. code-block:: bash
Expand Down Expand Up @@ -496,9 +505,9 @@ def cache_file(path, saltenv="base", source_hash=None, verify_ssl=True):
saltenv = senv

result = _client().cache_file(
path, saltenv, source_hash=source_hash, verify_ssl=verify_ssl
path, saltenv, source_hash=source_hash, verify_ssl=verify_ssl, use_etag=use_etag
)
if not result:
if not result and not use_etag:
log.error("Unable to cache file '%s' from saltenv '%s'.", path, saltenv)
if path_is_remote:
# Cache was successful, store the result in __context__ to prevent
Expand Down
51 changes: 41 additions & 10 deletions salt/modules/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -4462,6 +4462,7 @@ def get_managed(
defaults,
skip_verify=False,
verify_ssl=True,
use_etag=False,
**kwargs
):
"""
Expand Down Expand Up @@ -4518,6 +4519,15 @@ def get_managed(

.. versionadded:: 3002

use_etag
If ``True``, remote http/https file sources will attempt to use the
ETag header to determine if the remote file needs to be downloaded.
This provides a lightweight mechanism for promptly refreshing files
changed on a web server without requiring a full hash comparison via
the ``source_hash`` parameter.

.. versionadded:: 3005

CLI Example:

.. code-block:: bash
Expand Down Expand Up @@ -4589,7 +4599,7 @@ def _get_local_file_source_sum(path):
)
except CommandExecutionError as exc:
return "", {}, exc.strerror
else:
elif not use_etag:
msg = (
"Unable to verify upstream hash of source file {}, "
"please set source_hash or set skip_verify to True".format(
Expand All @@ -4602,15 +4612,17 @@ def _get_local_file_source_sum(path):
# Check if we have the template or remote file cached
cache_refetch = False
cached_dest = __salt__["cp.is_cached"](source, saltenv)
if cached_dest and (source_hash or skip_verify):
if cached_dest and (source_hash or skip_verify or use_etag):
htype = source_sum.get("hash_type", "sha256")
cached_sum = get_hash(cached_dest, form=htype)
if skip_verify:
# prev: if skip_verify or cached_sum == source_sum['hsum']:
# but `cached_sum == source_sum['hsum']` is elliptical as prev if
sfn = cached_dest
source_sum = {"hsum": cached_sum, "hash_type": htype}
elif cached_sum != source_sum.get("hsum", __opts__["hash_type"]):
elif use_etag or cached_sum != source_sum.get(
"hsum", __opts__["hash_type"]
):
cache_refetch = True
else:
sfn = cached_dest
Expand All @@ -4624,6 +4636,7 @@ def _get_local_file_source_sum(path):
saltenv,
source_hash=source_sum.get("hsum"),
verify_ssl=verify_ssl,
use_etag=use_etag,
)
except Exception as exc: # pylint: disable=broad-except
# A 404 or other error code may raise an exception, catch it
Expand Down Expand Up @@ -5812,6 +5825,7 @@ def manage_file(
setype=None,
serange=None,
verify_ssl=True,
use_etag=False,
**kwargs
):
"""
Expand Down Expand Up @@ -5932,6 +5946,15 @@ def manage_file(

.. versionadded:: 3002

use_etag
If ``True``, remote http/https file sources will attempt to use the
ETag header to determine if the remote file needs to be downloaded.
This provides a lightweight mechanism for promptly refreshing files
changed on a web server without requiring a full hash comparison via
the ``source_hash`` parameter.

.. versionadded:: 3005

CLI Example:

.. code-block:: bash
Expand All @@ -5943,6 +5966,12 @@ def manage_file(

"""
name = os.path.expanduser(name)
check_web_source_hash = bool(
source
and urllib.parse.urlparse(source).scheme != "salt"
and not skip_verify
and not use_etag
)
nicholasmhughes marked this conversation as resolved.
Show resolved Hide resolved

if not ret:
ret = {"name": name, "changes": {}, "comment": "", "result": True}
Expand Down Expand Up @@ -5988,13 +6017,15 @@ def manage_file(
or source_sum.get("hsum", __opts__["hash_type"]) != name_sum
):
if not sfn:
sfn = __salt__["cp.cache_file"](source, saltenv, verify_ssl=verify_ssl)
sfn = __salt__["cp.cache_file"](
source, saltenv, verify_ssl=verify_ssl, use_etag=use_etag
)
if not sfn:
return _error(ret, "Source file '{}' not found".format(source))
# If the downloaded file came from a non salt server or local
# source, and we are not skipping checksum verification, then
# verify that it matches the specified checksum.
if not skip_verify and urllib.parse.urlparse(source).scheme != "salt":
if check_web_source_hash:
dl_sum = get_hash(sfn, source_sum["hash_type"])
if dl_sum != source_sum["hsum"]:
ret["comment"] = (
Expand All @@ -6016,9 +6047,9 @@ def manage_file(
ret["changes"]["diff"] = "<show_changes=False>"
else:
try:
ret["changes"]["diff"] = get_diff(
real_name, sfn, show_filenames=False
)
file_diff = get_diff(real_name, sfn, show_filenames=False)
if file_diff:
ret["changes"]["diff"] = file_diff
except CommandExecutionError as exc:
ret["changes"]["diff"] = exc.strerror

Expand Down Expand Up @@ -6091,7 +6122,7 @@ def manage_file(
return _error(ret, "Source file '{}' not found".format(source))
# If the downloaded file came from a non salt server source verify
# that it matches the intended sum value
if not skip_verify and urllib.parse.urlparse(source).scheme != "salt":
if check_web_source_hash:
dl_sum = get_hash(sfn, source_sum["hash_type"])
if dl_sum != source_sum["hsum"]:
ret["comment"] = (
Expand Down Expand Up @@ -6200,7 +6231,7 @@ def _set_mode_and_make_dirs(name, dir_mode, mode, user, group):
return _error(ret, "Source file '{}' not found".format(source))
# If the downloaded file came from a non salt server source verify
# that it matches the intended sum value
if not skip_verify and urllib.parse.urlparse(source).scheme != "salt":
if check_web_source_hash:
dl_sum = get_hash(sfn, source_sum["hash_type"])
if dl_sum != source_sum["hsum"]:
ret["comment"] = (
Expand Down
13 changes: 13 additions & 0 deletions salt/states/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -2136,6 +2136,7 @@ def managed(
win_inheritance=True,
win_perms_reset=False,
verify_ssl=True,
use_etag=False,
**kwargs
):
r"""
Expand Down Expand Up @@ -2716,6 +2717,15 @@ def managed(
will not attempt to validate the servers certificate. Default is True.

.. versionadded:: 3002

use_etag
If ``True``, remote http/https file sources will attempt to use the
ETag header to determine if the remote file needs to be downloaded.
This provides a lightweight mechanism for promptly refreshing files
changed on a web server without requiring a full hash comparison via
the ``source_hash`` parameter.

.. versionadded:: 3005
"""
if "env" in kwargs:
# "env" is not supported; Use "saltenv".
Expand Down Expand Up @@ -3082,6 +3092,7 @@ def managed(
defaults,
skip_verify,
verify_ssl=verify_ssl,
use_etag=use_etag,
**kwargs
)
except Exception as exc: # pylint: disable=broad-except
Expand Down Expand Up @@ -3136,6 +3147,7 @@ def managed(
serole=serole,
setype=setype,
serange=serange,
use_etag=use_etag,
**kwargs
)
except Exception as exc: # pylint: disable=broad-except
Expand Down Expand Up @@ -3214,6 +3226,7 @@ def managed(
serole=serole,
setype=setype,
serange=serange,
use_etag=use_etag,
**kwargs
)
except Exception as exc: # pylint: disable=broad-except
Expand Down
Loading