Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Adding NotebookVersion Parameter as specified in official AWS Docs #2867

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions awswrangler/athena/_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def create_spark_session(
default_executor_dpu_size: int = 1,
additional_configs: dict[str, Any] | None = None,
spark_properties: dict[str, Any] | None = None,
notebook_version: str | None = None,
idle_timeout: int = 15,
boto3_session: boto3.Session | None = None,
) -> str:
Expand All @@ -116,8 +117,11 @@ def create_spark_session(
spark_properties: Dict[str, Any], optional
Contains SparkProperties in the form of key-value pairs.Specifies custom jar files and Spark properties
for use cases like cluster encryption, table formats, and general Spark tuning.
notebook_version: str, optional
The notebook version. This value is supplied automatically for notebook sessions in the Athena console and is not required for programmatic session access.
The only valid notebook version is Athena notebook version 1. If you specify a value for NotebookVersion, you must also specify a value for NotebookId
idle_timeout : int, optional
The idle timeout in minutes for the session. The default is 15.
The idle timeout in minutes for the session. The default is 15.
boto3_session : boto3.Session(), optional
Boto3 Session. The default boto3 session will be used if boto3_session receive None.

Expand All @@ -142,10 +146,13 @@ def create_spark_session(
engine_configuration["AdditionalConfigs"] = additional_configs
if spark_properties:
engine_configuration["SparkProperties"] = spark_properties
kwargs: Any = {"SessionIdleTimeoutInMinutes": idle_timeout}
if notebook_version:
kwargs["NotebookVersion"] = notebook_version
response = client_athena.start_session(
WorkGroup=workgroup,
EngineConfiguration=engine_configuration,
SessionIdleTimeoutInMinutes=idle_timeout,
**kwargs,
)
_logger.info("Session info:\n%s", response)
session_id: str = response["SessionId"]
Expand All @@ -166,6 +173,7 @@ def run_spark_calculation(
default_executor_dpu_size: int = 1,
additional_configs: dict[str, Any] | None = None,
spark_properties: dict[str, Any] | None = None,
notebook_version: str | None = None,
idle_timeout: int = 15,
boto3_session: boto3.Session | None = None,
) -> dict[str, Any]:
Expand All @@ -192,6 +200,9 @@ def run_spark_calculation(
spark_properties: Dict[str, Any], optional
Contains SparkProperties in the form of key-value pairs.Specifies custom jar files and Spark properties
for use cases like cluster encryption, table formats, and general Spark tuning.
notebook_version: str, optional
The notebook version. This value is supplied automatically for notebook sessions in the Athena console and is not required for programmatic session access.
The only valid notebook version is Athena notebook version 1. If you specify a value for NotebookVersion, you must also specify a value for NotebookId
idle_timeout : int, optional
The idle timeout in minutes for the session. The default is 15.
boto3_session : boto3.Session(), optional
Expand Down Expand Up @@ -221,6 +232,7 @@ def run_spark_calculation(
default_executor_dpu_size=default_executor_dpu_size,
additional_configs=additional_configs,
spark_properties=spark_properties,
notebook_version=notebook_version,
idle_timeout=idle_timeout,
boto3_session=boto3_session,
)
Expand Down
Loading