Skip to content

Commit

Permalink
Add gsheet utilities
Browse files Browse the repository at this point in the history
Utilities:
1. Append given tabular data to the given gsheet
   id and worksheet name.
2. Return url for a gsheet given its ID.
3. Adds unit tests for the above utilities.
  • Loading branch information
gargnitingoogle committed Sep 24, 2024
1 parent edb8dc3 commit 4fd500c
Show file tree
Hide file tree
Showing 2 changed files with 237 additions and 0 deletions.
144 changes: 144 additions & 0 deletions perfmetrics/scripts/testing_on_gke/examples/utils/gsheet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import tempfile
from typing import Tuple
from google.oauth2 import service_account
from googleapiclient.discovery import build
from .run_tests_common import run_command

_SCOPES = ['https://www.googleapis.com/auth/spreadsheets']


def _get_sheets_service_client(localServiceAccountKeyFile):
creds = service_account.Credentials.from_service_account_file(
localServiceAccountKeyFile, scopes=_SCOPES
)
# Alternatively, use from_service_account_info for client-creation,
# documented at
# https://google-auth.readthedocs.io/en/master/reference/google.oauth2.service_account.html
# .
client = build('sheets', 'v4', credentials=creds)
return client


def download_gcs_object_locally(gcsObjectUri: str) -> str:
"""Downloads the given gcs file-object locally (collision-file) and returns the full-path of the local-file.
gcsObjectUri is of the form: gs://<bucket-name>/object-name .
On failure, returned int will be non-zero.
Caller has to delete the tempfile after usage if a proper tempfile was
returned, to avoid disk-leak.
"""
if not gcsObjectUri.startswith('gs:'):
raise ValueError(
f'Passed input {gcsObjectUri} is not proper. Expected:'
' gs://<bucket>/<object-name>'
)
with tempfile.NamedTemporaryFile(mode='w+b', dir='/tmp', delete=False) as fp:
returncode = run_command(f'gcloud storage cp {gcsObjectUri} {fp.name}')
if returncode == 0:
return fp.name
else:
raise f'failed to copy gcs object {gcsObjectUri} to local-file {fp.name}: returncode={returncode}. Deleting tempfile {fp.name}...'
os.remove(fp.name)


def append_data_to_gsheet(
serviceAccountKeyFile: str,
worksheet: str,
data: dict,
gsheet_id: str,
repeat_header: bool = False,
) -> None:
"""Calls the API to append the given data at the end of the given worksheet in the given gsheet.
If the passed header matches the first row of the file, then the
header is not inserted again, unless repeat_header is passed as
True.
Args:
serviceAccountKeyFile: Path of a service-account key-file for authentication
read/write from/to the given gsheet.
worksheet: string, name of the worksheet to be edited appended by a "!"
data: Dictionary of {'header': tuple, 'values': list(tuples)}, to be added
to the worksheet.
gsheet_id: Unique ID to identify a gsheet.
repeat_header: Always add the passed header as a new row in the file.
Raises:
HttpError: For any Google Sheets API call related errors
"""
for arg in [serviceAccountKeyFile, worksheet, gsheet_id]:
if not arg or not arg.strip():
raise ValueError(
f"Passed argument '{serviceAccountKeyFile}' is not proper"
)

def _using_local_service_key_file(localServiceAccountKeyFile: str):
# Open a read-write gsheet client using
client = _get_sheets_service_client(localServiceAccountKeyFile)

def _read_from_range(cell_range: str):
"""Returns a list of list of values for the given range in the worksheet."""
gsheet_response = (
client.spreadsheets()
.values()
.get(spreadsheetId=gsheet_id, range=f'{worksheet}!{cell_range}')
.execute()
)
return gsheet_response['values'] if 'values' in gsheet_response else []

def _write_at_address(cell_address: str, data):
"""Writes a list of tuple of values at the given cell_cell_address in the worksheet."""
client.spreadsheets().values().update(
spreadsheetId=gsheet_id,
valueInputOption='USER_ENTERED',
body={'majorDimension': 'ROWS', 'values': data},
range=f'{worksheet}!{cell_address}',
).execute()

data_in_first_column = _read_from_range('A1:A')
num_rows = len(data_in_first_column)
data_in_first_row = _read_from_range('A1:1')
original_header = tuple(data_in_first_row[0]) if data_in_first_row else ()
new_header = data['header']

# Insert header in the file, if needed.
if (
not original_header
or repeat_header
or not original_header == new_header
):
# Append header after last row.
_write_at_address(f'A{num_rows+1}', [new_header])
num_rows = num_rows + 1

# Append given values after the last row.
_write_at_address(f'A{num_rows+1}', data['values'])
num_rows = num_rows + 1

if serviceAccountKeyFile.startswith('gs://'):
localServiceAccountKeyFile = download_gcs_object_locally(
serviceAccountKeyFile
)
_using_local_service_key_file(localServiceAccountKeyFile)
os.remove(localServiceAccountKeyFile)
else:
_using_local_service_key_file(serviceAccountKeyFile)


def url(gsheet_id: str) -> str:
return f'https://docs.google.com/spreadsheets/d/{gsheet_id}'
93 changes: 93 additions & 0 deletions perfmetrics/scripts/testing_on_gke/examples/utils/gsheet_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""This file defines unit tests for functionalities in utils.py"""

# Copyright 2018 The Kubernetes Authors.
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from os import open
from os.path import isfile
import random
from random import choices
import string
import unittest
from gsheet import append_data_to_gsheet, download_gcs_object_as_tempfile, url


class GsheetTest(unittest.TestCase):

# @classmethod
# def setUpClass(self):
# self.project_id = 'gcs-fuse-test'

def test_append_data_to_gsheet(self):
_DEFAULT_GSHEET_ID = '1UghIdsyarrV1HVNc6lugFZS1jJRumhdiWnPgoEC8Fe4'

def _default_service_account_key_file(
project_id: str, localfile: bool
) -> str:
if localfile:
if project_id == 'gcs-fuse-test':
return '20240919-gcs-fuse-test-bc1a2c0aac45.json'
elif project_id == 'gcs-fuse-test-ml':
return '20240919-gcs-fuse-test-ml-d6e0247b2cf1.json'
else:
raise Exception(f'Unknown project-id: {project_id}')
else:
if project_id in ['gcs-fuse-test', 'gcs-fuse-test-ml']:
return f'gs://gcsfuse-aiml-test-outputs/creds/{project_id}.json'
else:
raise Exception(f'Unknown project-id: {project_id}')

for project_id in ['gcs-fuse-test', 'gcs-fuse-test-ml']:
for worksheet in ['fio-test', 'dlio-test']:
for localkeyfile in [False]:
serviceAccountKeyFile = _default_service_account_key_file(
project_id, localkeyfile
)
append_data_to_gsheet(
worksheet=worksheet,
data={
'header': ('Column1', 'Column2'),
'values': [(
''.join(random.choices(string.ascii_letters, k=9)),
random.random(),
)],
},
serviceAccountKeyFile=serviceAccountKeyFile,
gsheet_id=_DEFAULT_GSHEET_ID,
)

def test_gsheet_url(self):
gsheet_id = ''.join(random.choices(string.ascii_letters, k=20))
gsheet_url = url(gsheet_id)
self.assertTrue(gsheet_id in gsheet_url)
self.assertTrue(len(gsheet_id) < len(gsheet_url))

def test_download_gcs_object_as_tempfile(self):
gcs_object = 'gs://gcsfuse-aiml-test-outputs/creds/gcs-fuse-test.json'
localfile = download_gcs_object_as_tempfile(gcs_object)
self.assertTrue(localfile)
self.assertTrue(localfile.strip())
os.stat(localfile)
os.remove(localfile)

def test_download_gcs_object_as_tempfile_nonexistent(self):
gcs_object = 'gs://non/existing/gcs/object'
localfile = download_gcs_object_as_tempfile(gcs_object)
self.assertIsNone(localfile)


if __name__ == '__main__':
unittest.main()

0 comments on commit 4fd500c

Please sign in to comment.