From 2cdb0c1f39b1eddd2161524369aa220f5c7f3717 Mon Sep 17 00:00:00 2001 From: harrywatts125 <91128789+harrywatts125@users.noreply.github.com> Date: Thu, 24 Feb 2022 13:08:04 +0000 Subject: [PATCH 1/2] Fix for file_path publishing error & experimental parquet - Corrections to publisher.py to address variable naming issue which caused publishing to fail. - Hyperprocess experimental parquet paramenter removed as now included as a stable feature. - TSC library requirements updated to version 0.17.0 --- hyperleaup/creator.py | 7 ++----- hyperleaup/publisher.py | 8 +++----- requirements.txt | 2 +- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/hyperleaup/creator.py b/hyperleaup/creator.py index 9be0cae..8e333d2 100644 --- a/hyperleaup/creator.py +++ b/hyperleaup/creator.py @@ -135,11 +135,8 @@ def copy_data_into_hyper_file(csv_path: str, name: str, table_def: TableDefiniti def copy_parquet_to_hyper_file(parquet_path: str, name: str, table_def: TableDefinition) -> str: """Helper function that copies data from a Parquet file to a .hyper file.""" hyper_database_path = f"/tmp/hyperleaup/{name}/{name}.hyper" - hyper_process_params = { - "experimental_external_format_parquet": "on" - } - with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, - parameters=hyper_process_params) as hp: + + with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp: with Connection(endpoint=hp.endpoint, database=Path(hyper_database_path), create_mode=CreateMode.CREATE_AND_REPLACE) as connection: diff --git a/hyperleaup/publisher.py b/hyperleaup/publisher.py index 1731b00..eb82cd9 100644 --- a/hyperleaup/publisher.py +++ b/hyperleaup/publisher.py @@ -101,11 +101,9 @@ def publish(self, creation_mode='CreateNew'): # Finally, publish the Hyper File to the Tableau server logging.info(f'Publishing Hyper File located at: "{self.hyper_file_path}"') logging.info(f'Create mode: {create_mode}') - datasource_item = TSC.DatasourceItem(project_id=self.project_id, name=self.datasource_name) - logging.info(f'Publishing datasource: \n{datasource_to_string(datasource_item)}') - datasource_item = server.datasources.publish(datasource_item=datasource_item, - file_path=self.hyper_file_path, - mode=create_mode) + datasource_item_id = TSC.DatasourceItem(project_id=self.project_id, name=self.datasource_name) + logging.info(f'Publishing datasource: \n{datasource_to_string(datasource_item_id)}') + datasource_item = server.datasources.publish(datasource_item_id, self.hyper_file_path, create_mode) self.datasource_luid = datasource_item.id logging.info(f'Published datasource to Tableau server. Datasource LUID : {self.datasource_luid}') diff --git a/requirements.txt b/requirements.txt index 20f4841..e96fbf9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -tableauserverclient==0.16.0 +tableauserverclient==0.17.0 pyspark==3.1.2 requests==2.26.0 tableauhyperapi==0.0.13129 From ea06ed6c6ba62d323ee27084eb66bd94d04c5a6d Mon Sep 17 00:00:00 2001 From: harrywatts125 <91128789+harrywatts125@users.noreply.github.com> Date: Thu, 24 Feb 2022 14:12:35 +0000 Subject: [PATCH 2/2] Streamlined create_mode to create/overwrite Removed logic checking if data source already exists and simply set it to create_mode = Overwrite. If data source doesn't exist, it will be created using this setting so the resulting publish logic should be the same as intended. --- hyperleaup/publisher.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/hyperleaup/publisher.py b/hyperleaup/publisher.py index eb82cd9..8926f82 100644 --- a/hyperleaup/publisher.py +++ b/hyperleaup/publisher.py @@ -41,7 +41,7 @@ def __init__(self, tableau_server_url: str, self.datasource_luid = None self.hyper_file_path = hyper_file_path - def publish(self, creation_mode='CreateNew'): + def publish(self, creation_mode = 'Overwrite'): """Publishes a Hyper File to a Tableau Server""" # Ensure that the Hyper File exists @@ -75,8 +75,8 @@ def publish(self, creation_mode='CreateNew'): f'on the Tableau server.') # Next, check if the datasource already exists and needs to be overwritten - create_mode = TSC.Server.PublishMode.CreateNew - if creation_mode.upper() == 'CREATENEW': + create_mode = TSC.Server.PublishMode.Overwrite + if creation_mode.upper() == 'OVERWRITE': # Search for the datasource under project name req_options = TSC.RequestOptions() @@ -87,14 +87,11 @@ def publish(self, creation_mode='CreateNew'): TSC.RequestOptions.Operator.Equals, self.datasource_name)) datasources, pagination = server.datasources.get(req_options=req_options) - for datasource in datasources: - # the datasource already exists, overwrite - if datasource.name == self.datasource_name: - logging.info(f'Overwriting existing datasource named "{self.datasource_name}".') - create_mode = TSC.Server.PublishMode.Overwrite - break + elif creation_mode.upper() == 'APPEND': create_mode = TSC.Server.PublishMode.Append + + else: raise ValueError(f'Invalid "creation_mode" : {creation_mode}')