Skip to content

Commit

Permalink
Implement pre-existing session support for dynamodb catalog (apache#104)
Browse files Browse the repository at this point in the history
  • Loading branch information
waifairer authored Jan 18, 2024
1 parent 8f7927b commit d796878
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 4 deletions.
13 changes: 13 additions & 0 deletions mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,19 @@ catalog:
table-name: iceberg
```

If you prefer to pass the credentials explicitly to the client instead of relying on environment variables,

```yaml
catalog:
default:
type: dynamodb
table-name: iceberg
aws_access_key_id: <ACCESS_KEY_ID>
aws_secret_access_key: <SECRET_ACCESS_KEY>
aws_session_token: <SESSION_TOKEN>
region_name: <REGION_NAME>
```

# Concurrency

PyIceberg uses multiple threads to parallelize operations. The number of workers can be configured by supplying a `max-workers` entry in the configuration file, or by setting the `PYICEBERG_MAX_WORKERS` environment variable. The default value depends on the system hardware and Python version. See [the Python documentation](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) for more details.
4 changes: 3 additions & 1 deletion mkdocs/docs/contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@ For the development, Poetry is used for packing and dependency management. You c
pip install poetry
```

If you have an older version of pip and virtualenv you need to update these:
Make sure you're using an up-to-date environment from venv

```bash
pip install --upgrade virtualenv pip
python -m venv ./venv
source ./venv/bin/activate
```

To get started, you can run `make install`, which installs Poetry and all the dependencies of the Iceberg library. This also installs the development dependencies. If you don't want to install the development dependencies, you need to install using `poetry install --no-dev`.
Expand Down
10 changes: 9 additions & 1 deletion pyiceberg/catalog/dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,15 @@
class DynamoDbCatalog(Catalog):
def __init__(self, name: str, **properties: str):
super().__init__(name, **properties)
self.dynamodb = boto3.client(DYNAMODB_CLIENT)
session = boto3.Session(
profile_name=properties.get("profile_name"),
region_name=properties.get("region_name"),
botocore_session=properties.get("botocore_session"),
aws_access_key_id=properties.get("aws_access_key_id"),
aws_secret_access_key=properties.get("aws_secret_access_key"),
aws_session_token=properties.get("aws_session_token"),
)
self.dynamodb = session.client(DYNAMODB_CLIENT)
self.dynamodb_table_name = self.properties.get(DYNAMODB_TABLE_NAME, DYNAMODB_TABLE_NAME_DEFAULT)
self._ensure_catalog_table_exists_or_create()

Expand Down
28 changes: 26 additions & 2 deletions tests/catalog/test_dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,20 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import List
from typing import Any, Dict, List

import boto3
import pytest
from moto import mock_dynamodb
from unittest import mock

from pyiceberg.catalog import METADATA_LOCATION, TABLE_TYPE
from pyiceberg.catalog.dynamodb import (
DYNAMODB_COL_CREATED_AT,
DYNAMODB_COL_IDENTIFIER,
DYNAMODB_COL_NAMESPACE,
DYNAMODB_TABLE_NAME_DEFAULT,
ACTIVE,
DynamoDbCatalog,
_add_property_prefix,
)
Expand All @@ -47,12 +49,13 @@ def test_create_dynamodb_catalog_with_table_name(_dynamodb, _bucket_initialize:
DynamoDbCatalog("test_ddb_catalog")
response = _dynamodb.describe_table(TableName=DYNAMODB_TABLE_NAME_DEFAULT)
assert response["Table"]["TableName"] == DYNAMODB_TABLE_NAME_DEFAULT
assert response["Table"]["TableStatus"] == ACTIVE

custom_table_name = "custom_table_name"
DynamoDbCatalog("test_ddb_catalog", **{"table-name": custom_table_name})
response = _dynamodb.describe_table(TableName=custom_table_name)
assert response["Table"]["TableName"] == custom_table_name

assert response["Table"]["TableStatus"] == ACTIVE

@mock_dynamodb
def test_create_table_with_database_location(
Expand Down Expand Up @@ -506,3 +509,24 @@ def test_update_namespace_properties_overlap_update_removal(_bucket_initialize:
test_catalog.update_namespace_properties(database_name, removals, updates)
# should not modify the properties
assert test_catalog.load_namespace_properties(database_name) == test_properties

def test_passing_provided_profile() -> None:
catalog_name = "test_ddb_catalog"
session_props = {
"aws_access_key_id": "abc",
"aws_secret_access_key": "def",
"aws_session_token": "ghi",
"region_name": "eu-central-1",
"botocore_session": None,
"profile_name": None
}
props = {"py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
props.update(session_props)
with mock.patch('boto3.Session', return_value=mock.Mock()) as mock_session:
mock_client = mock.Mock()
mock_session.return_value.client.return_value = mock_client
mock_client.describe_table.return_value = {'Table': {'TableStatus': 'ACTIVE'}}
test_catalog = DynamoDbCatalog(catalog_name, **props)
assert test_catalog.dynamodb is mock_client
mock_session.assert_called_with(**session_props)
assert test_catalog.dynamodb is mock_session().client()

0 comments on commit d796878

Please sign in to comment.