From a8e7f4793ab792cfa5694bbe7c534eeb39dd3360 Mon Sep 17 00:00:00 2001 From: Ilya Figotin Date: Wed, 22 Jul 2020 17:54:06 -0700 Subject: [PATCH 1/3] Use kaggle-web-client for kaggle_secrets --- patches/kaggle_secrets.py | 68 +++--------------------------------- patches/kaggle_web_client.py | 23 ++++++++---- 2 files changed, 21 insertions(+), 70 deletions(-) diff --git a/patches/kaggle_secrets.py b/patches/kaggle_secrets.py index 1ee81f78..802fde20 100644 --- a/patches/kaggle_secrets.py +++ b/patches/kaggle_secrets.py @@ -4,31 +4,12 @@ (ie. BigQuery). """ -import json import os -import socket -import urllib.request from datetime import datetime, timedelta from enum import Enum, unique from typing import Optional, Tuple -from urllib.error import HTTPError, URLError - -_KAGGLE_DEFAULT_URL_BASE = "https://www.kaggle.com" -_KAGGLE_URL_BASE_ENV_VAR_NAME = "KAGGLE_URL_BASE" -_KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME = "KAGGLE_USER_SECRETS_TOKEN" -TIMEOUT_SECS = 40 - - -class CredentialError(Exception): - pass - - -class BackendError(Exception): - pass - - -class ValidationError(Exception): - pass +from kaggle_web_client import KaggleWebClient +from kaggle_web_client import (CredentialError, BackendError, ValidationError) class NotFoundError(Exception): pass @@ -56,48 +37,9 @@ def service(self): class UserSecretsClient(): GET_USER_SECRET_ENDPOINT = '/requests/GetUserSecretRequest' GET_USER_SECRET_BY_LABEL_ENDPOINT = '/requests/GetUserSecretByLabelRequest' - BIGQUERY_TARGET_VALUE = 1 def __init__(self): - url_base_override = os.getenv(_KAGGLE_URL_BASE_ENV_VAR_NAME) - self.url_base = url_base_override or _KAGGLE_DEFAULT_URL_BASE - # Follow the OAuth 2.0 Authorization standard (https://tools.ietf.org/html/rfc6750) - self.jwt_token = os.getenv(_KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME) - if self.jwt_token is None: - raise CredentialError( - 'A JWT Token is required to use the UserSecretsClient, ' - f'but none found in environment variable {_KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME}') - self.headers = {'Content-type': 'application/json'} - - def _make_post_request(self, data: dict, endpoint: str = GET_USER_SECRET_ENDPOINT) -> dict: - # TODO(b/148309982) This code and the code in the constructor should be - # removed and this class should use the new KaggleWebClient class instead. - url = f'{self.url_base}{endpoint}' - request_body = dict(data) - request_body['JWE'] = self.jwt_token - req = urllib.request.Request(url, headers=self.headers, data=bytes( - json.dumps(request_body), encoding="utf-8")) - try: - with urllib.request.urlopen(req, timeout=TIMEOUT_SECS) as response: - response_json = json.loads(response.read()) - if not response_json.get('wasSuccessful') or 'result' not in response_json: - raise BackendError( - f'Unexpected response from the service. Response: {response_json}.') - return response_json['result'] - except (URLError, socket.timeout) as e: - if isinstance( - e, socket.timeout) or isinstance( - e.reason, socket.timeout): - raise ConnectionError( - 'Timeout error trying to communicate with service. Please ensure internet is on.') from e - raise ConnectionError( - 'Connection error trying to communicate with service.') from e - except HTTPError as e: - if e.code == 401 or e.code == 403: - raise CredentialError( - f'Service responded with error code {e.code}.' - ' Please ensure you have access to the resource.') from e - raise BackendError('Unexpected response from the service.') from e + self.web_client = KaggleWebClient() def get_secret(self, label) -> str: """Retrieves a user secret value by its label. @@ -113,7 +55,7 @@ def get_secret(self, label) -> str: request_body = { 'Label': label, } - response_json = self._make_post_request(request_body, self.GET_USER_SECRET_BY_LABEL_ENDPOINT) + response_json = self.web_client.make_post_request(request_body, self.GET_USER_SECRET_BY_LABEL_ENDPOINT) if 'secret' not in response_json: raise BackendError( f'Unexpected response from the service. Response: {response_json}') @@ -174,7 +116,7 @@ def _get_access_token(self, target: GcpTarget) -> Tuple[str, Optional[datetime]] request_body = { 'Target': target.target } - response_json = self._make_post_request(request_body) + response_json = self.web_client.make_post_request(request_body, self.GET_USER_SECRET_ENDPOINT) if 'secret' not in response_json: raise BackendError( f'Unexpected response from the service. Response: {response_json}') diff --git a/patches/kaggle_web_client.py b/patches/kaggle_web_client.py index 46205c2e..bdeaae5d 100644 --- a/patches/kaggle_web_client.py +++ b/patches/kaggle_web_client.py @@ -2,14 +2,23 @@ import os import socket import urllib.request -from datetime import datetime, timedelta -from enum import Enum, unique -from typing import Optional, Tuple from urllib.error import HTTPError, URLError -from kaggle_secrets import (_KAGGLE_DEFAULT_URL_BASE, - _KAGGLE_URL_BASE_ENV_VAR_NAME, - _KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME, - CredentialError, BackendError, ValidationError) + +_KAGGLE_DEFAULT_URL_BASE = "https://www.kaggle.com" +_KAGGLE_URL_BASE_ENV_VAR_NAME = "KAGGLE_URL_BASE" +_KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME = "KAGGLE_USER_SECRETS_TOKEN" +TIMEOUT_SECS = 40 + +class CredentialError(Exception): + pass + + +class BackendError(Exception): + pass + + +class ValidationError(Exception): + pass class KaggleWebClient: TIMEOUT_SECS = 600 From 4e076475a33893483bc027b10e527e353f6a2f35 Mon Sep 17 00:00:00 2001 From: Ilya Figotin Date: Wed, 22 Jul 2020 18:28:19 -0700 Subject: [PATCH 2/3] Fix tests --- tests/test_datasets.py | 4 ++-- tests/test_user_secrets.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index e956738c..e6592272 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -6,10 +6,10 @@ from test.support import EnvironmentVarGuard from urllib.parse import urlparse -from kaggle_secrets import (_KAGGLE_URL_BASE_ENV_VAR_NAME, +from kaggle_web_client import (KaggleWebClient, + _KAGGLE_URL_BASE_ENV_VAR_NAME, _KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME, CredentialError, BackendError, ValidationError) -from kaggle_web_client import KaggleWebClient from kaggle_datasets import KaggleDatasets, _KAGGLE_TPU_NAME_ENV_VAR_NAME _TEST_JWT = 'test-secrets-key' diff --git a/tests/test_user_secrets.py b/tests/test_user_secrets.py index 748cb652..12c02105 100644 --- a/tests/test_user_secrets.py +++ b/tests/test_user_secrets.py @@ -10,10 +10,11 @@ from google.auth.exceptions import DefaultCredentialsError from google.cloud import bigquery -from kaggle_secrets import (_KAGGLE_URL_BASE_ENV_VAR_NAME, +from kaggle_secrets import (GcpTarget, UserSecretsClient, + NotFoundError) +from kaggle_web_client import (_KAGGLE_URL_BASE_ENV_VAR_NAME, _KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME, - CredentialError, GcpTarget, UserSecretsClient, - BackendError, NotFoundError, ValidationError) + CredentialError, BackendError, ValidationError) _TEST_JWT = 'test-secrets-key' From 2259a073cae511afcc8a30f1026d5f76ecf2846b Mon Sep 17 00:00:00 2001 From: Ilya Figotin Date: Thu, 23 Jul 2020 09:22:03 -0700 Subject: [PATCH 3/3] Move timeout --- patches/kaggle_datasets.py | 3 ++- patches/kaggle_secrets.py | 5 ++++- patches/kaggle_web_client.py | 8 ++------ tests/test_datasets.py | 2 +- tests/test_user_secrets.py | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/patches/kaggle_datasets.py b/patches/kaggle_datasets.py index 654e025f..f5134673 100644 --- a/patches/kaggle_datasets.py +++ b/patches/kaggle_datasets.py @@ -5,6 +5,7 @@ class KaggleDatasets: GET_GCS_PATH_ENDPOINT = '/requests/CopyDatasetVersionToKnownGcsBucketRequest' + TIMEOUT_SECS = 600 # Integration types for GCS AUTO_ML = 1 @@ -20,5 +21,5 @@ def get_gcs_path(self, dataset_dir: str = None) -> str: 'MountSlug': dataset_dir, 'IntegrationType': integration_type, } - result = self.web_client.make_post_request(data, self.GET_GCS_PATH_ENDPOINT) + result = self.web_client.make_post_request(data, self.GET_GCS_PATH_ENDPOINT, self.TIMEOUT_SECS) return result['destinationBucket'] diff --git a/patches/kaggle_secrets.py b/patches/kaggle_secrets.py index 802fde20..a52896b0 100644 --- a/patches/kaggle_secrets.py +++ b/patches/kaggle_secrets.py @@ -9,7 +9,10 @@ from enum import Enum, unique from typing import Optional, Tuple from kaggle_web_client import KaggleWebClient -from kaggle_web_client import (CredentialError, BackendError, ValidationError) +from kaggle_web_client import (CredentialError, BackendError) + +class ValidationError(Exception): + pass class NotFoundError(Exception): pass diff --git a/patches/kaggle_web_client.py b/patches/kaggle_web_client.py index bdeaae5d..5b6463ed 100644 --- a/patches/kaggle_web_client.py +++ b/patches/kaggle_web_client.py @@ -17,11 +17,7 @@ class BackendError(Exception): pass -class ValidationError(Exception): - pass - class KaggleWebClient: - TIMEOUT_SECS = 600 def __init__(self): url_base_override = os.getenv(_KAGGLE_URL_BASE_ENV_VAR_NAME) @@ -38,14 +34,14 @@ def __init__(self): 'X-Kaggle-Authorization': f'Bearer {self.jwt_token}', } - def make_post_request(self, data: dict, endpoint: str) -> dict: + def make_post_request(self, data: dict, endpoint: str, timeout: int = TIMEOUT_SECS) -> dict: url = f'{self.url_base}{endpoint}' request_body = dict(data) request_body['JWE'] = self.jwt_token req = urllib.request.Request(url, headers=self.headers, data=bytes( json.dumps(request_body), encoding="utf-8")) try: - with urllib.request.urlopen(req, timeout=self.TIMEOUT_SECS) as response: + with urllib.request.urlopen(req, timeout=timeout) as response: response_json = json.loads(response.read()) if not response_json.get('wasSuccessful') or 'result' not in response_json: raise BackendError( diff --git a/tests/test_datasets.py b/tests/test_datasets.py index e6592272..54516318 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -9,7 +9,7 @@ from kaggle_web_client import (KaggleWebClient, _KAGGLE_URL_BASE_ENV_VAR_NAME, _KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME, - CredentialError, BackendError, ValidationError) + CredentialError, BackendError) from kaggle_datasets import KaggleDatasets, _KAGGLE_TPU_NAME_ENV_VAR_NAME _TEST_JWT = 'test-secrets-key' diff --git a/tests/test_user_secrets.py b/tests/test_user_secrets.py index 12c02105..c9c77693 100644 --- a/tests/test_user_secrets.py +++ b/tests/test_user_secrets.py @@ -11,10 +11,10 @@ from google.auth.exceptions import DefaultCredentialsError from google.cloud import bigquery from kaggle_secrets import (GcpTarget, UserSecretsClient, - NotFoundError) + NotFoundError, ValidationError) from kaggle_web_client import (_KAGGLE_URL_BASE_ENV_VAR_NAME, _KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME, - CredentialError, BackendError, ValidationError) + CredentialError, BackendError) _TEST_JWT = 'test-secrets-key'