Skip to content

Commit

Permalink
fix(metadata): enhance retry logic for metadata server access in _met…
Browse files Browse the repository at this point in the history
…adata.py (#1545)

* fix(metadata): Use exponential backoff retry logic for GCE metadata server access
  • Loading branch information
mrvarmazyar authored Jul 12, 2024
1 parent 461a3f5 commit 61c2432
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 16 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ A few notes on making changes to ``google-auth-library-python``.
- If you've added a new feature or modified an existing feature, be sure to
add or update any applicable documentation in docstrings and in the
documentation (in ``docs/``). You can re-generate the reference documentation
using ``nox -s docgen``.
using ``nox -s docs``.

- The change must work fully on the following CPython versions:
3.7, 3.8, 3.9, 3.10, 3.11 and 3.12 across macOS, Linux, and Windows.
Expand Down
21 changes: 11 additions & 10 deletions google/auth/compute_engine/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@
from google.auth import environment_vars
from google.auth import exceptions
from google.auth import metrics
from google.auth._exponential_backoff import ExponentialBackoff

_LOGGER = logging.getLogger(__name__)

# Environment variable GCE_METADATA_HOST is originally named
# GCE_METADATA_ROOT. For compatiblity reasons, here it checks
# GCE_METADATA_ROOT. For compatibility reasons, here it checks
# the new variable first; if not set, the system falls back
# to the old variable.
_GCE_METADATA_HOST = os.getenv(environment_vars.GCE_METADATA_HOST, None)
Expand Down Expand Up @@ -119,11 +120,12 @@ def ping(request, timeout=_METADATA_DEFAULT_TIMEOUT, retry_count=3):
# could lead to false negatives in the event that we are on GCE, but
# the metadata resolution was particularly slow. The latter case is
# "unlikely".
retries = 0
headers = _METADATA_HEADERS.copy()
headers[metrics.API_CLIENT_HEADER] = metrics.mds_ping()

while retries < retry_count:
backoff = ExponentialBackoff(total_attempts=retry_count)

for attempt in backoff:
try:
response = request(
url=_METADATA_IP_ROOT, method="GET", headers=headers, timeout=timeout
Expand All @@ -139,11 +141,10 @@ def ping(request, timeout=_METADATA_DEFAULT_TIMEOUT, retry_count=3):
_LOGGER.warning(
"Compute Engine Metadata server unavailable on "
"attempt %s of %s. Reason: %s",
retries + 1,
attempt,
retry_count,
e,
)
retries += 1

return False

Expand Down Expand Up @@ -179,7 +180,7 @@ def get(
Returns:
Union[Mapping, str]: If the metadata server returns JSON, a mapping of
the decoded JSON is return. Otherwise, the response content is
the decoded JSON is returned. Otherwise, the response content is
returned as a string.
Raises:
Expand All @@ -198,8 +199,9 @@ def get(

url = _helpers.update_query(base_url, query_params)

retries = 0
while retries < retry_count:
backoff = ExponentialBackoff(total_attempts=retry_count)

for attempt in backoff:
try:
response = request(url=url, method="GET", headers=headers_to_use)
break
Expand All @@ -208,11 +210,10 @@ def get(
_LOGGER.warning(
"Compute Engine Metadata server unavailable on "
"attempt %s of %s. Reason: %s",
retries + 1,
attempt,
retry_count,
e,
)
retries += 1
else:
raise exceptions.TransportError(
"Failed to retrieve {} from the Google Compute Engine "
Expand Down
15 changes: 10 additions & 5 deletions tests/compute_engine/test__metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,15 @@ def test_ping_success_retry(mock_metrics_header_value):
assert request.call_count == 2


def test_ping_failure_bad_flavor():
@mock.patch("time.sleep", return_value=None)
def test_ping_failure_bad_flavor(mock_sleep):
request = make_request("", headers={_metadata._METADATA_FLAVOR_HEADER: "meep"})

assert not _metadata.ping(request)


def test_ping_failure_connection_failed():
@mock.patch("time.sleep", return_value=None)
def test_ping_failure_connection_failed(mock_sleep):
request = make_request("")
request.side_effect = exceptions.TransportError()

Expand Down Expand Up @@ -194,7 +196,8 @@ def test_get_success_json_content_type_charset():
assert result[key] == value


def test_get_success_retry():
@mock.patch("time.sleep", return_value=None)
def test_get_success_retry(mock_sleep):
key, value = "foo", "bar"

data = json.dumps({key: value})
Expand Down Expand Up @@ -310,7 +313,8 @@ def test_get_success_custom_root_old_variable():
)


def test_get_failure():
@mock.patch("time.sleep", return_value=None)
def test_get_failure(mock_sleep):
request = make_request("Metadata error", status=http_client.NOT_FOUND)

with pytest.raises(exceptions.TransportError) as excinfo:
Expand All @@ -337,7 +341,8 @@ def test_get_return_none_for_not_found_error():
)


def test_get_failure_connection_failed():
@mock.patch("time.sleep", return_value=None)
def test_get_failure_connection_failed(mock_sleep):
request = make_request("")
request.side_effect = exceptions.TransportError()

Expand Down

0 comments on commit 61c2432

Please sign in to comment.