Skip to content

Commit

Permalink
Glue: Allow for assuming role for Glue
Browse files Browse the repository at this point in the history
  • Loading branch information
Fokko committed Nov 6, 2024
1 parent ef5c6ef commit e712d09
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 19 deletions.
22 changes: 12 additions & 10 deletions mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ For the FileIO there are several configuration options available:
| s3.access-key-id | admin | Configure the static access key id used to access the FileIO. |
| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. |
| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. |
| s3.session-name | session | An optional identifier for the assumed role session. |
| s3.role-session-name | session | An optional identifier for the assumed role session. |
| s3.role-arn | arn:aws:... | AWS Role ARN. If provided instead of access_key and secret_key, temporary credentials will be fetched by assuming this role. |
| s3.signer | bearer | Configure the signature version of the FileIO. |
| s3.signer.uri | <http://my.signer:8080/s3> | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.signer.uri>/<s3.signer.endpoint>`. |
Expand Down Expand Up @@ -331,16 +331,18 @@ catalog:

<!-- markdown-link-check-disable -->

| Key | Example | Description |
| ---------------------- | ------------------------------------ | ------------------------------------------------------------------------------- |
| glue.id | 111111111111 | Configure the 12-digit ID of the Glue Catalog |
| glue.skip-archive | true | Configure whether to skip the archival of older table versions. Default to true |
| Key | Example | Description |
|------------------------|----------------------------------------| ------------------------------------------------------------------------------- |
| glue.id | 111111111111 | Configure the 12-digit ID of the Glue Catalog |
| glue.skip-archive | true | Configure whether to skip the archival of older table versions. Default to true |
| glue.endpoint | <https://glue.us-east-1.amazonaws.com> | Configure an alternative endpoint of the Glue service for GlueCatalog to access |
| glue.profile-name | default | Configure the static profile used to access the Glue Catalog |
| glue.region | us-east-1 | Set the region of the Glue Catalog |
| glue.access-key-id | admin | Configure the static access key id used to access the Glue Catalog |
| glue.secret-access-key | password | Configure the static secret access key used to access the Glue Catalog |
| glue.session-token | AQoDYXdzEJr... | Configure the static session token used to access the Glue Catalog |
| glue.profile-name | default | Configure the static profile used to access the Glue Catalog |
| glue.role-session-name | session | An optional identifier for the assumed role session. |
| glue.role-arn | arn:aws:... | AWS Role ARN. If provided instead of access_key and secret_key, temporary credentials will be fetched by assuming this role. |
| glue.region | us-east-1 | Set the region of the Glue Catalog |
| glue.access-key-id | admin | Configure the static access key id used to access the Glue Catalog |
| glue.secret-access-key | password | Configure the static secret access key used to access the Glue Catalog |
| glue.session-token | AQoDYXdzEJr... | Configure the static session token used to access the Glue Catalog |

<!-- markdown-link-check-enable-->

Expand Down
53 changes: 49 additions & 4 deletions pyiceberg/catalog/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
)

import boto3
from botocore.credentials import AssumeRoleCredentialFetcher, Credentials, DeferredRefreshableCredentials
from mypy_boto3_glue.client import GlueClient
from mypy_boto3_glue.type_defs import (
ColumnTypeDef,
Expand Down Expand Up @@ -59,7 +60,14 @@
NoSuchTableError,
TableAlreadyExistsError,
)
from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
from pyiceberg.io import (
AWS_ACCESS_KEY_ID,
AWS_REGION,
AWS_ROLE_ARN,
AWS_ROLE_SESSION_NAME,
AWS_SECRET_ACCESS_KEY,
AWS_SESSION_TOKEN,
)
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
from pyiceberg.schema import Schema, SchemaVisitor, visit
from pyiceberg.serializers import FromInputFile
Expand Down Expand Up @@ -127,6 +135,8 @@
GLUE_ACCESS_KEY_ID = "glue.access-key-id"
GLUE_SECRET_ACCESS_KEY = "glue.secret-access-key"
GLUE_SESSION_TOKEN = "glue.session-token"
GLUE_ROLE_ARN = "glue.role-arn"
GLUE_ROLE_SESSION_NAME = "glue.role-session-name"


def _construct_parameters(
Expand Down Expand Up @@ -296,13 +306,48 @@ class GlueCatalog(MetastoreCatalog):
def __init__(self, name: str, **properties: Any):
super().__init__(name, **properties)

credentials = Credentials(
access_key=get_first_property_value(properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
secret_key=get_first_property_value(properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
token=get_first_property_value(properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN),
)

session = boto3.Session(
profile_name=properties.get(GLUE_PROFILE_NAME),
region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION),
aws_access_key_id=get_first_property_value(properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
aws_secret_access_key=get_first_property_value(properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
aws_session_token=get_first_property_value(properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN),
aws_access_key_id=credentials.access_key,
aws_secret_access_key=credentials.secret_key,
aws_session_token=credentials.token,
)

if role_arn := get_first_property_value(properties, GLUE_ROLE_ARN, AWS_ROLE_ARN):
extra_args = {}
if role_session_name := get_first_property_value(properties, GLUE_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME):
extra_args["RoleSessionName"] = role_session_name

fetcher = AssumeRoleCredentialFetcher(
client_creator=session.client,
source_credentials=credentials,
role_arn=role_arn,
extra_args=extra_args,
)
refreshable_credentials = DeferredRefreshableCredentials(
method="assume-role",
refresh_using=fetcher.fetch_credentials,
)
from botocore.session import Session as BotoSession

botocore_session = BotoSession()
botocore_session._credentials = refreshable_credentials # noqa: SLF001
session = boto3.Session(
profile_name=properties.get(GLUE_PROFILE_NAME),
region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION),
aws_access_key_id=credentials.access_key,
aws_secret_access_key=credentials.secret_key,
aws_session_token=credentials.token,
botocore_session=botocore_session,
)

self.glue: GlueClient = session.client("glue", endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT))

if glue_catalog_id := properties.get(GLUE_ID):
Expand Down
4 changes: 2 additions & 2 deletions pyiceberg/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
AWS_SECRET_ACCESS_KEY = "client.secret-access-key"
AWS_SESSION_TOKEN = "client.session-token"
AWS_ROLE_ARN = "aws.role-arn"
AWS_SESSION_NAME = "aws.session-name"
AWS_ROLE_SESSION_NAME = "aws.role-session-name"
S3_ENDPOINT = "s3.endpoint"
S3_ACCESS_KEY_ID = "s3.access-key-id"
S3_SECRET_ACCESS_KEY = "s3.secret-access-key"
Expand All @@ -73,7 +73,7 @@
S3_SIGNER_ENDPOINT = "s3.signer.endpoint"
S3_SIGNER_ENDPOINT_DEFAULT = "v1/aws/s3/sign"
S3_ROLE_ARN = "s3.role-arn"
S3_SESSION_NAME = "s3.session-name"
S3_ROLE_SESSION_NAME = "s3.role-session-name"
HDFS_HOST = "hdfs.host"
HDFS_PORT = "hdfs.port"
HDFS_USER = "hdfs.user"
Expand Down
6 changes: 3 additions & 3 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@
AWS_ACCESS_KEY_ID,
AWS_REGION,
AWS_ROLE_ARN,
AWS_ROLE_SESSION_NAME,
AWS_SECRET_ACCESS_KEY,
AWS_SESSION_NAME,
AWS_SESSION_TOKEN,
GCS_DEFAULT_LOCATION,
GCS_ENDPOINT,
Expand All @@ -104,8 +104,8 @@
S3_PROXY_URI,
S3_REGION,
S3_ROLE_ARN,
S3_ROLE_SESSION_NAME,
S3_SECRET_ACCESS_KEY,
S3_SESSION_NAME,
S3_SESSION_TOKEN,
FileIO,
InputFile,
Expand Down Expand Up @@ -369,7 +369,7 @@ def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSyste
if role_arn := get_first_property_value(self.properties, S3_ROLE_ARN, AWS_ROLE_ARN):
client_kwargs["role_arn"] = role_arn

if session_name := get_first_property_value(self.properties, S3_SESSION_NAME, AWS_SESSION_NAME):
if session_name := get_first_property_value(self.properties, S3_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME):
client_kwargs["session_name"] = session_name

return S3FileSystem(**client_kwargs)
Expand Down

0 comments on commit e712d09

Please sign in to comment.