From 2b0eb869aacc259355b269ee6d4c707d011f28d1 Mon Sep 17 00:00:00 2001 From: Yuan Teoh Date: Thu, 14 Dec 2023 08:38:32 -0800 Subject: [PATCH 01/10] add cloudsql cicd workflow --- .../datastore/providers/postgres_test.py | 2 +- retrieval_service/pyproject.toml | 5 + retrieval_service/sql-proxy/Dockerfile | 1 + .../sql-proxy/cloudsql.tests.cloudbuild.yaml | 161 ++++++++++++++++++ 4 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 retrieval_service/sql-proxy/Dockerfile create mode 100644 retrieval_service/sql-proxy/cloudsql.tests.cloudbuild.yaml diff --git a/retrieval_service/datastore/providers/postgres_test.py b/retrieval_service/datastore/providers/postgres_test.py index 7c9af966..f367d7ba 100644 --- a/retrieval_service/datastore/providers/postgres_test.py +++ b/retrieval_service/datastore/providers/postgres_test.py @@ -27,7 +27,7 @@ from .test_data import query_embedding1, query_embedding2, query_embedding3 from .utils import get_env_var -pytestmark = pytest.mark.asyncio(scope="module") +pytestmark = [pytest.mark.asyncio(scope="module"), pytest.mark.postgres] @pytest.fixture(scope="module") diff --git a/retrieval_service/pyproject.toml b/retrieval_service/pyproject.toml index 01141ff6..fe100524 100644 --- a/retrieval_service/pyproject.toml +++ b/retrieval_service/pyproject.toml @@ -8,3 +8,8 @@ warn_unused_configs = true [[tool.mypy.overrides]] module = ["pgvector.asyncpg"] ignore_missing_imports = true + +[tool.pytest.ini_options] +markers = [ + "postgres", +] diff --git a/retrieval_service/sql-proxy/Dockerfile b/retrieval_service/sql-proxy/Dockerfile new file mode 100644 index 00000000..2b263ea5 --- /dev/null +++ b/retrieval_service/sql-proxy/Dockerfile @@ -0,0 +1 @@ +FROM gcr.io/gcp-runtimes/ubuntu_20_0_4 diff --git a/retrieval_service/sql-proxy/cloudsql.tests.cloudbuild.yaml b/retrieval_service/sql-proxy/cloudsql.tests.cloudbuild.yaml new file mode 100644 index 00000000..327bd019 --- /dev/null +++ b/retrieval_service/sql-proxy/cloudsql.tests.cloudbuild.yaml @@ -0,0 +1,161 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +steps: + - id: "docker-build" + name: "gcr.io/cloud-builders/docker" + args: ["build", "-t", "${_IMAGE_NAME}", "retrieval_service/sql-proxy/."] + + - id: "docker-push" + name: "gcr.io/cloud-builders/docker" + args: ["push", "${_IMAGE_NAME}"] + + - id: "docker-layer" + name: "gcr.io/cloud-builders/docker" + entrypoint: /bin/bash + args: + - '-c' + - | + echo "FROM $_IMAGE_NAME + COPY --from=gcr.io/cloud-sql-connectors/cloud-sql-proxy /cloud-sql-proxy /cloudsql/cloud-sql-proxy" > Dockerfile-proxy; + + docker build -f Dockerfile-proxy -t ${_IMAGE_NAME}-proxy . + + - id: Install dependencies + name: python:3.11 + dir: retrieval_service + entrypoint: pip + args: + [ + "install", + "-r", + "requirements.txt", + "-r", + "requirements-test.txt", + "--user", + ] + + - id: Create database + name: ${_IMAGE_NAME}-proxy + dir: retrieval_service/sql-proxy + entrypoint: /bin/bash + secretEnv: + - CLOUDSQLUSER # Use built-in env vars for database connection + - CLOUDSQLPASSWORD + args: + - "-c" + - | + /cloudsql/cloud-sql-proxy --port ${_DATABASE_PORT} ${_INSTANCE_CONNECTION_NAME} & sleep 2; + psql -h ${_DATABASE_HOST} -c "CREATE DATABASE ${_DATABASE_NAME};" || echo "Database '${_DATABASE_NAME}' already exists." + psql -h ${_DATABASE_HOST} -d ${_DATABASE_NAME} -c 'CREATE EXTENSION vector;' || echo "Extension 'vector' already exists." + + - id: Initialize data + name: python:3.11 + dir: retrieval_service + secretEnv: + - CLOUDSQLUSER + - CLOUDSQLPASSWORD + entrypoint: /bin/bash + args: + - "-c" + - | + # Create config + cp example-config.yml config.yml + sed -i "/127.0.0.1/d" config.yml + sed -i "s/postgres/cloudsql-postgres/g" config.yml + sed -i "s/my_database/${_DATABASE_NAME}/g" config.yml + sed -i "s/my-user/$$CLOUDSQLUSER/g" config.yml + sed -i "s/my-password/$$CLOUDSQLPASSWORD/g" config.yml + echo -e '\n project: "'$PROJECT_ID'"' >> config.yml + echo ' region: "'${_CLOUDSQL_REGION}'"' >> config.yml + echo ' instance: "'${_CLOUDSQL_INSTANCE}'"' >> config.yml + # Run script + python run_database_init.py + + - id: Run Alloy DB integration tests + name: python:3.11 + dir: retrieval_service + env: # Set env var expected by tests + - "DB_NAME=${_DATABASE_NAME}" + - "DB_HOST=${_DATABASE_HOST}" + - "DB_PROJECT=$PROJECT_ID" + - "DB_REGION=${_CLOUDSQL_REGION}" + - "DB_INSTANCE=${_CLOUDSQL_INSTANCE}" + secretEnv: + - CLOUDSQLUSER + - CLOUDSQLPASSWORD + entrypoint: /bin/bash + args: + - "-c" + - | + # Set env var expected by tests + export DB_USER=$$CLOUDSQLUSER + export DB_PASS=$$CLOUDSQLPASSWORD + python -m pytest -m "not postgres" + + - id: Run database export + name: python:3.11 + dir: retrieval_service + entrypoint: /bin/bash + args: + - "-c" + - | + # Run script + python run_database_export.py + cd ../data + diff --strip-trailing-cr -Z airport_dataset.csv airport_dataset.csv.new || (echo "airport dataset export fail" && exit 1) + diff --strip-trailing-cr -Z amenity_dataset.csv amenity_dataset.csv.new || (echo "amenity dataset export fail" && exit 1) + diff --strip-trailing-cr -Z flights_dataset.csv flights_dataset.csv.new || (echo "flight dataset export fail" && exit 1) + + - id: Clean exported files + name: python:3.11 + dir: data + entrypoint: /bin/bash + args: + - "-c" + - | + rm airport_dataset.csv.new amenity_dataset.csv.new flights_dataset.csv.new + + + - id: Clean database + name: postgres + entrypoint: /bin/bash + secretEnv: + - CLOUDSQLUSER + - CLOUDSQLPASSWORD + args: + - "-c" + - | + psql -h ${_DATABASE_HOST} -c "DROP DATABASE IF EXISTS ${_DATABASE_NAME};" + +substitutions: + _DATABASE_NAME: test_${SHORT_SHA} + _DATABASE_USER: postgres + _DATABASE_HOST: 127.0.0.1 + _CLOUDSQL_REGION: "us-central1" + _CLOUDSQL_INSTANCE: "my-cloudsql-instance" + _INSTANCE_CONNECTION_NAME: ${PROJECT_ID}:us-central1:myinstance + _DATABASE_PORT: "5432" + _IMAGE_NAME: gcr.io/${PROJECT_ID}/sample-sql-proxy + +availableSecrets: + secretManager: + - versionName: projects/$PROJECT_ID/secrets/cloudsql_pass/versions/latest + env: CLOUDSQLPASSWORD + - versionName: projects/$PROJECT_ID/secrets/cloudsql_user/versions/latest + env: CLOUDSQLUSER + +options: + substitutionOption: 'ALLOW_LOOSE' + dynamic_substitutions: true From 779a172579d05c829213b7943c3df2f051f79ce7 Mon Sep 17 00:00:00 2001 From: Yuan Teoh Date: Wed, 20 Dec 2023 07:47:14 -0800 Subject: [PATCH 02/10] remove markers for test and only run one test per cloudbuild --- retrieval_service/datastore/providers/postgres_test.py | 2 +- retrieval_service/pyproject.toml | 5 ----- retrieval_service/sql-proxy/cloudsql.tests.cloudbuild.yaml | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/retrieval_service/datastore/providers/postgres_test.py b/retrieval_service/datastore/providers/postgres_test.py index f367d7ba..7c9af966 100644 --- a/retrieval_service/datastore/providers/postgres_test.py +++ b/retrieval_service/datastore/providers/postgres_test.py @@ -27,7 +27,7 @@ from .test_data import query_embedding1, query_embedding2, query_embedding3 from .utils import get_env_var -pytestmark = [pytest.mark.asyncio(scope="module"), pytest.mark.postgres] +pytestmark = pytest.mark.asyncio(scope="module") @pytest.fixture(scope="module") diff --git a/retrieval_service/pyproject.toml b/retrieval_service/pyproject.toml index fe100524..01141ff6 100644 --- a/retrieval_service/pyproject.toml +++ b/retrieval_service/pyproject.toml @@ -8,8 +8,3 @@ warn_unused_configs = true [[tool.mypy.overrides]] module = ["pgvector.asyncpg"] ignore_missing_imports = true - -[tool.pytest.ini_options] -markers = [ - "postgres", -] diff --git a/retrieval_service/sql-proxy/cloudsql.tests.cloudbuild.yaml b/retrieval_service/sql-proxy/cloudsql.tests.cloudbuild.yaml index 327bd019..b1916559 100644 --- a/retrieval_service/sql-proxy/cloudsql.tests.cloudbuild.yaml +++ b/retrieval_service/sql-proxy/cloudsql.tests.cloudbuild.yaml @@ -102,7 +102,7 @@ steps: # Set env var expected by tests export DB_USER=$$CLOUDSQLUSER export DB_PASS=$$CLOUDSQLPASSWORD - python -m pytest -m "not postgres" + python -m pytest -m ../datastore/providers/cloudsql_postgres.py - id: Run database export name: python:3.11 From a7f20aaffd829b30f47b0e50909a49c081bbfc32 Mon Sep 17 00:00:00 2001 From: Yuan Teoh Date: Thu, 21 Dec 2023 10:16:00 -0800 Subject: [PATCH 03/10] move cloudbuild to retrieval_service --- retrieval_service/Dockerfile-cloudsql | 37 ++++++++ .../cloudsql.tests.cloudbuild.yaml | 86 ++++++++----------- retrieval_service/example-config-cloudsql.yml | 10 +++ retrieval_service/sql-proxy/Dockerfile | 1 - 4 files changed, 81 insertions(+), 53 deletions(-) create mode 100644 retrieval_service/Dockerfile-cloudsql rename retrieval_service/{sql-proxy => }/cloudsql.tests.cloudbuild.yaml (64%) create mode 100644 retrieval_service/example-config-cloudsql.yml delete mode 100644 retrieval_service/sql-proxy/Dockerfile diff --git a/retrieval_service/Dockerfile-cloudsql b/retrieval_service/Dockerfile-cloudsql new file mode 100644 index 00000000..e244c3b4 --- /dev/null +++ b/retrieval_service/Dockerfile-cloudsql @@ -0,0 +1,37 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Set python runtime +FROM python:3.11-slim + +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive \ + apt-get install --no-install-recommends --assume-yes \ + postgresql-client + +COPY --from=gcr.io/cloud-sql-connectors/cloud-sql-proxy /cloud-sql-proxy /cloudsql/cloud-sql-proxy + +# Copy local code to the container image. +WORKDIR /app + +# Install dependencies. +COPY ./requirements.txt requirements.txt +COPY ./requirements-test.txt requirements-test.txt +RUN pip install --no-cache-dir -r requirements.txt -r requirements-test.txt + +# Copy local code to the container image. +COPY . ./ + +# Set placeholder command +CMD pytest datastore/providers/cloudsql_postgres_test.py diff --git a/retrieval_service/sql-proxy/cloudsql.tests.cloudbuild.yaml b/retrieval_service/cloudsql.tests.cloudbuild.yaml similarity index 64% rename from retrieval_service/sql-proxy/cloudsql.tests.cloudbuild.yaml rename to retrieval_service/cloudsql.tests.cloudbuild.yaml index b1916559..4b928761 100644 --- a/retrieval_service/sql-proxy/cloudsql.tests.cloudbuild.yaml +++ b/retrieval_service/cloudsql.tests.cloudbuild.yaml @@ -15,23 +15,14 @@ steps: - id: "docker-build" name: "gcr.io/cloud-builders/docker" - args: ["build", "-t", "${_IMAGE_NAME}", "retrieval_service/sql-proxy/."] + dir: retrieval_service + args: ["build", "-f", "Dockerfile-cloudsql", "-t", "${_IMAGE_NAME}", "."] - id: "docker-push" name: "gcr.io/cloud-builders/docker" + dir: retrieval_service args: ["push", "${_IMAGE_NAME}"] - - id: "docker-layer" - name: "gcr.io/cloud-builders/docker" - entrypoint: /bin/bash - args: - - '-c' - - | - echo "FROM $_IMAGE_NAME - COPY --from=gcr.io/cloud-sql-connectors/cloud-sql-proxy /cloud-sql-proxy /cloudsql/cloud-sql-proxy" > Dockerfile-proxy; - - docker build -f Dockerfile-proxy -t ${_IMAGE_NAME}-proxy . - - id: Install dependencies name: python:3.11 dir: retrieval_service @@ -47,12 +38,12 @@ steps: ] - id: Create database - name: ${_IMAGE_NAME}-proxy - dir: retrieval_service/sql-proxy + name: ${_IMAGE_NAME} + dir: retrieval_service entrypoint: /bin/bash secretEnv: - - CLOUDSQLUSER # Use built-in env vars for database connection - - CLOUDSQLPASSWORD + - PGUSER # Use built-in env vars for database connection + - PGPASSWORD args: - "-c" - | @@ -61,56 +52,56 @@ steps: psql -h ${_DATABASE_HOST} -d ${_DATABASE_NAME} -c 'CREATE EXTENSION vector;' || echo "Extension 'vector' already exists." - id: Initialize data - name: python:3.11 + name: ${_IMAGE_NAME} dir: retrieval_service secretEnv: - - CLOUDSQLUSER - - CLOUDSQLPASSWORD + - PGUSER + - PGPASSWORD entrypoint: /bin/bash args: - "-c" - | + /cloudsql/cloud-sql-proxy --port ${_DATABASE_PORT} ${_INSTANCE_CONNECTION_NAME} & sleep 2; # Create config - cp example-config.yml config.yml - sed -i "/127.0.0.1/d" config.yml - sed -i "s/postgres/cloudsql-postgres/g" config.yml + cp example-config-cloudsql.yml config.yml sed -i "s/my_database/${_DATABASE_NAME}/g" config.yml - sed -i "s/my-user/$$CLOUDSQLUSER/g" config.yml - sed -i "s/my-password/$$CLOUDSQLPASSWORD/g" config.yml - echo -e '\n project: "'$PROJECT_ID'"' >> config.yml - echo ' region: "'${_CLOUDSQL_REGION}'"' >> config.yml - echo ' instance: "'${_CLOUDSQL_INSTANCE}'"' >> config.yml + sed -i "s/my-user/$$PGUSER/g" config.yml + sed -i "s/my-password/$$PGPASSWORD/g" config.yml + sed -i "s/my-project/$PROJECT_ID/g" config.yml + sed -i "s/my-region/${_CLOUDSQL_REGION}/g" config.yml + sed -i "s/my-instance/${_CLOUDSQL_INSTANCE}/g" config.yml # Run script python run_database_init.py - id: Run Alloy DB integration tests - name: python:3.11 + name: ${_IMAGE_NAME} dir: retrieval_service env: # Set env var expected by tests - "DB_NAME=${_DATABASE_NAME}" - - "DB_HOST=${_DATABASE_HOST}" - "DB_PROJECT=$PROJECT_ID" - "DB_REGION=${_CLOUDSQL_REGION}" - "DB_INSTANCE=${_CLOUDSQL_INSTANCE}" secretEnv: - - CLOUDSQLUSER - - CLOUDSQLPASSWORD + - PGUSER + - PGPASSWORD entrypoint: /bin/bash args: - "-c" - | + /cloudsql/cloud-sql-proxy --port ${_DATABASE_PORT} ${_INSTANCE_CONNECTION_NAME} & sleep 2; # Set env var expected by tests - export DB_USER=$$CLOUDSQLUSER - export DB_PASS=$$CLOUDSQLPASSWORD - python -m pytest -m ../datastore/providers/cloudsql_postgres.py + export DB_USER=$$PGUSER + export DB_PASS=$$PGPASSWORD + python -m pytest datastore/providers/cloudsql_postgres_test.py - id: Run database export - name: python:3.11 + name: ${_IMAGE_NAME} dir: retrieval_service entrypoint: /bin/bash args: - "-c" - | + /cloudsql/cloud-sql-proxy --port ${_DATABASE_PORT} ${_INSTANCE_CONNECTION_NAME} & sleep 2; # Run script python run_database_export.py cd ../data @@ -118,25 +109,16 @@ steps: diff --strip-trailing-cr -Z amenity_dataset.csv amenity_dataset.csv.new || (echo "amenity dataset export fail" && exit 1) diff --strip-trailing-cr -Z flights_dataset.csv flights_dataset.csv.new || (echo "flight dataset export fail" && exit 1) - - id: Clean exported files - name: python:3.11 - dir: data - entrypoint: /bin/bash - args: - - "-c" - - | - rm airport_dataset.csv.new amenity_dataset.csv.new flights_dataset.csv.new - - - id: Clean database - name: postgres + name: ${_IMAGE_NAME} entrypoint: /bin/bash secretEnv: - - CLOUDSQLUSER - - CLOUDSQLPASSWORD + - PGUSER + - PGPASSWORD args: - "-c" - | + /cloudsql/cloud-sql-proxy --port ${_DATABASE_PORT} ${_INSTANCE_CONNECTION_NAME} & sleep 2; psql -h ${_DATABASE_HOST} -c "DROP DATABASE IF EXISTS ${_DATABASE_NAME};" substitutions: @@ -145,16 +127,16 @@ substitutions: _DATABASE_HOST: 127.0.0.1 _CLOUDSQL_REGION: "us-central1" _CLOUDSQL_INSTANCE: "my-cloudsql-instance" - _INSTANCE_CONNECTION_NAME: ${PROJECT_ID}:us-central1:myinstance + _INSTANCE_CONNECTION_NAME: ${PROJECT_ID}:${_CLOUDSQL_REGION}:${_CLOUDSQL_INSTANCE} _DATABASE_PORT: "5432" - _IMAGE_NAME: gcr.io/${PROJECT_ID}/sample-sql-proxy + _IMAGE_NAME: gcr.io/${PROJECT_ID}/cloudsql-test-proxy availableSecrets: secretManager: - versionName: projects/$PROJECT_ID/secrets/cloudsql_pass/versions/latest - env: CLOUDSQLPASSWORD + env: PGPASSWORD - versionName: projects/$PROJECT_ID/secrets/cloudsql_user/versions/latest - env: CLOUDSQLUSER + env: PGUSER options: substitutionOption: 'ALLOW_LOOSE' diff --git a/retrieval_service/example-config-cloudsql.yml b/retrieval_service/example-config-cloudsql.yml new file mode 100644 index 00000000..a1ab6c96 --- /dev/null +++ b/retrieval_service/example-config-cloudsql.yml @@ -0,0 +1,10 @@ +host: 0.0.0.0 +datastore: + # Example for Cloud SQL + kind: "cloudsql-postgres" + project: "my-project" + region: "my-region" + instance: "my-instance" + database: "my_database" + user: "my-user" + password: "my-password" diff --git a/retrieval_service/sql-proxy/Dockerfile b/retrieval_service/sql-proxy/Dockerfile deleted file mode 100644 index 2b263ea5..00000000 --- a/retrieval_service/sql-proxy/Dockerfile +++ /dev/null @@ -1 +0,0 @@ -FROM gcr.io/gcp-runtimes/ubuntu_20_0_4 From 394ab84ab2a6f1bdc5db4625f736dc0c4bce680c Mon Sep 17 00:00:00 2001 From: Yuan Teoh Date: Thu, 4 Jan 2024 06:38:33 -0800 Subject: [PATCH 04/10] update init and export for testing in datastore --- .../cloudsql.tests.cloudbuild.yaml | 25 +------- .../providers/cloudsql_postgres_test.py | 61 ++++++++++++++++++- 2 files changed, 62 insertions(+), 24 deletions(-) diff --git a/retrieval_service/cloudsql.tests.cloudbuild.yaml b/retrieval_service/cloudsql.tests.cloudbuild.yaml index 4b928761..628281aa 100644 --- a/retrieval_service/cloudsql.tests.cloudbuild.yaml +++ b/retrieval_service/cloudsql.tests.cloudbuild.yaml @@ -51,8 +51,8 @@ steps: psql -h ${_DATABASE_HOST} -c "CREATE DATABASE ${_DATABASE_NAME};" || echo "Database '${_DATABASE_NAME}' already exists." psql -h ${_DATABASE_HOST} -d ${_DATABASE_NAME} -c 'CREATE EXTENSION vector;' || echo "Extension 'vector' already exists." - - id: Initialize data - name: ${_IMAGE_NAME} + - id: Update config + name: python:3.11 dir: retrieval_service secretEnv: - PGUSER @@ -61,7 +61,6 @@ steps: args: - "-c" - | - /cloudsql/cloud-sql-proxy --port ${_DATABASE_PORT} ${_INSTANCE_CONNECTION_NAME} & sleep 2; # Create config cp example-config-cloudsql.yml config.yml sed -i "s/my_database/${_DATABASE_NAME}/g" config.yml @@ -70,11 +69,9 @@ steps: sed -i "s/my-project/$PROJECT_ID/g" config.yml sed -i "s/my-region/${_CLOUDSQL_REGION}/g" config.yml sed -i "s/my-instance/${_CLOUDSQL_INSTANCE}/g" config.yml - # Run script - python run_database_init.py - id: Run Alloy DB integration tests - name: ${_IMAGE_NAME} + name: python:3.11 dir: retrieval_service env: # Set env var expected by tests - "DB_NAME=${_DATABASE_NAME}" @@ -88,27 +85,11 @@ steps: args: - "-c" - | - /cloudsql/cloud-sql-proxy --port ${_DATABASE_PORT} ${_INSTANCE_CONNECTION_NAME} & sleep 2; # Set env var expected by tests export DB_USER=$$PGUSER export DB_PASS=$$PGPASSWORD python -m pytest datastore/providers/cloudsql_postgres_test.py - - id: Run database export - name: ${_IMAGE_NAME} - dir: retrieval_service - entrypoint: /bin/bash - args: - - "-c" - - | - /cloudsql/cloud-sql-proxy --port ${_DATABASE_PORT} ${_INSTANCE_CONNECTION_NAME} & sleep 2; - # Run script - python run_database_export.py - cd ../data - diff --strip-trailing-cr -Z airport_dataset.csv airport_dataset.csv.new || (echo "airport dataset export fail" && exit 1) - diff --strip-trailing-cr -Z amenity_dataset.csv amenity_dataset.csv.new || (echo "amenity dataset export fail" && exit 1) - diff --strip-trailing-cr -Z flights_dataset.csv flights_dataset.csv.new || (echo "flight dataset export fail" && exit 1) - - id: Clean database name: ${_IMAGE_NAME} entrypoint: /bin/bash diff --git a/retrieval_service/datastore/providers/cloudsql_postgres_test.py b/retrieval_service/datastore/providers/cloudsql_postgres_test.py index 87408269..6df4812f 100644 --- a/retrieval_service/datastore/providers/cloudsql_postgres_test.py +++ b/retrieval_service/datastore/providers/cloudsql_postgres_test.py @@ -18,6 +18,7 @@ import pytest import pytest_asyncio +from csv_diff import compare, load_csv # type: ignore import models @@ -78,12 +79,68 @@ async def ds( instance=db_instance, ) ds = await datastore.create(cfg) + + airports_ds_path = "../data/airport_dataset.csv" + amenities_ds_path = "../data/amenity_dataset.csv" + flights_ds_path = "../data/flights_dataset.csv" + airports, amenities, flights = await ds.load_dataset( + airports_ds_path, amenities_ds_path, flights_ds_path + ) + await ds.initialize_data(airports, amenities, flights) + if ds is None: raise TypeError("datastore creation failure") yield ds - print("after yield") await ds.close() - print("closed database") + + +async def test_export_dataset(ds: postgres.Client): + airports, amenities, flights = await ds.export_data() + + airports_ds_path = "../data/airport_dataset.csv" + amenities_ds_path = "../data/amenity_dataset.csv" + flights_ds_path = "../data/flights_dataset.csv" + + airports_new_path = "../data/airport_dataset.csv.new" + amenities_new_path = "../data/amenity_dataset.csv.new" + flights_new_path = "../data/flights_dataset.csv.new" + + await ds.export_dataset( + airports, + amenities, + flights, + airports_new_path, + amenities_new_path, + flights_new_path, + ) + + diff_airports = compare( + load_csv(open(airports_ds_path), "id"), load_csv(open(airports_new_path), "id") + ) + assert diff_airports["added"] == [] + assert diff_airports["removed"] == [] + assert diff_airports["changed"] == [] + assert diff_airports["columns_added"] == [] + assert diff_airports["columns_removed"] == [] + + diff_amenities = compare( + load_csv(open(amenities_ds_path), "id"), + load_csv(open(amenities_new_path), "id"), + ) + assert diff_amenities["added"] == [] + assert diff_amenities["removed"] == [] + assert diff_amenities["changed"] == [] + assert diff_amenities["columns_added"] == [] + assert diff_amenities["columns_removed"] == [] + + diff_flights = compare( + load_csv(open(flights_ds_path), "id"), load_csv(open(flights_new_path), "id") + ) + assert diff_flights["added"] == [] + assert diff_flights["removed"] == [] + assert diff_flights["changed"] == [] + assert diff_flights["columns_added"] == [] + assert diff_flights["columns_removed"] == [] async def test_get_airport_by_id(ds: cloudsql_postgres.Client): From d954819b1a2a11227e5728f492ac4caa4993fe95 Mon Sep 17 00:00:00 2001 From: Yuan Teoh Date: Thu, 4 Jan 2024 06:40:40 -0800 Subject: [PATCH 05/10] update dockerfile --- retrieval_service/Dockerfile-cloudsql | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/retrieval_service/Dockerfile-cloudsql b/retrieval_service/Dockerfile-cloudsql index e244c3b4..f4ddd537 100644 --- a/retrieval_service/Dockerfile-cloudsql +++ b/retrieval_service/Dockerfile-cloudsql @@ -26,8 +26,7 @@ COPY --from=gcr.io/cloud-sql-connectors/cloud-sql-proxy /cloud-sql-proxy /clouds WORKDIR /app # Install dependencies. -COPY ./requirements.txt requirements.txt -COPY ./requirements-test.txt requirements-test.txt +COPY ./requirements.txt ./requirements-test.txt ./ RUN pip install --no-cache-dir -r requirements.txt -r requirements-test.txt # Copy local code to the container image. From 5bcd1b54a1838ba66b5c3c091cedefaf255fddb7 Mon Sep 17 00:00:00 2001 From: Yuan Teoh Date: Thu, 4 Jan 2024 06:42:35 -0800 Subject: [PATCH 06/10] resolve error --- retrieval_service/datastore/providers/cloudsql_postgres_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retrieval_service/datastore/providers/cloudsql_postgres_test.py b/retrieval_service/datastore/providers/cloudsql_postgres_test.py index 6df4812f..a31e218c 100644 --- a/retrieval_service/datastore/providers/cloudsql_postgres_test.py +++ b/retrieval_service/datastore/providers/cloudsql_postgres_test.py @@ -94,7 +94,7 @@ async def ds( await ds.close() -async def test_export_dataset(ds: postgres.Client): +async def test_export_dataset(ds: cloudsql_postgres.Client): airports, amenities, flights = await ds.export_data() airports_ds_path = "../data/airport_dataset.csv" From 1b5507221b20e4f9f500fd5bdda0fb7dd40d778f Mon Sep 17 00:00:00 2001 From: Yuan Teoh Date: Fri, 5 Jan 2024 08:22:24 -0800 Subject: [PATCH 07/10] move create db as fixture --- retrieval_service/Dockerfile-cloudsql | 36 ----------------- .../cloudsql.tests.cloudbuild.yaml | 39 ------------------- .../providers/cloudsql_postgres_test.py | 21 ++++++++++ 3 files changed, 21 insertions(+), 75 deletions(-) delete mode 100644 retrieval_service/Dockerfile-cloudsql diff --git a/retrieval_service/Dockerfile-cloudsql b/retrieval_service/Dockerfile-cloudsql deleted file mode 100644 index f4ddd537..00000000 --- a/retrieval_service/Dockerfile-cloudsql +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Set python runtime -FROM python:3.11-slim - -RUN apt-get update \ - && DEBIAN_FRONTEND=noninteractive \ - apt-get install --no-install-recommends --assume-yes \ - postgresql-client - -COPY --from=gcr.io/cloud-sql-connectors/cloud-sql-proxy /cloud-sql-proxy /cloudsql/cloud-sql-proxy - -# Copy local code to the container image. -WORKDIR /app - -# Install dependencies. -COPY ./requirements.txt ./requirements-test.txt ./ -RUN pip install --no-cache-dir -r requirements.txt -r requirements-test.txt - -# Copy local code to the container image. -COPY . ./ - -# Set placeholder command -CMD pytest datastore/providers/cloudsql_postgres_test.py diff --git a/retrieval_service/cloudsql.tests.cloudbuild.yaml b/retrieval_service/cloudsql.tests.cloudbuild.yaml index 628281aa..d5591e54 100644 --- a/retrieval_service/cloudsql.tests.cloudbuild.yaml +++ b/retrieval_service/cloudsql.tests.cloudbuild.yaml @@ -13,16 +13,6 @@ # limitations under the License. steps: - - id: "docker-build" - name: "gcr.io/cloud-builders/docker" - dir: retrieval_service - args: ["build", "-f", "Dockerfile-cloudsql", "-t", "${_IMAGE_NAME}", "."] - - - id: "docker-push" - name: "gcr.io/cloud-builders/docker" - dir: retrieval_service - args: ["push", "${_IMAGE_NAME}"] - - id: Install dependencies name: python:3.11 dir: retrieval_service @@ -37,20 +27,6 @@ steps: "--user", ] - - id: Create database - name: ${_IMAGE_NAME} - dir: retrieval_service - entrypoint: /bin/bash - secretEnv: - - PGUSER # Use built-in env vars for database connection - - PGPASSWORD - args: - - "-c" - - | - /cloudsql/cloud-sql-proxy --port ${_DATABASE_PORT} ${_INSTANCE_CONNECTION_NAME} & sleep 2; - psql -h ${_DATABASE_HOST} -c "CREATE DATABASE ${_DATABASE_NAME};" || echo "Database '${_DATABASE_NAME}' already exists." - psql -h ${_DATABASE_HOST} -d ${_DATABASE_NAME} -c 'CREATE EXTENSION vector;' || echo "Extension 'vector' already exists." - - id: Update config name: python:3.11 dir: retrieval_service @@ -90,27 +66,12 @@ steps: export DB_PASS=$$PGPASSWORD python -m pytest datastore/providers/cloudsql_postgres_test.py - - id: Clean database - name: ${_IMAGE_NAME} - entrypoint: /bin/bash - secretEnv: - - PGUSER - - PGPASSWORD - args: - - "-c" - - | - /cloudsql/cloud-sql-proxy --port ${_DATABASE_PORT} ${_INSTANCE_CONNECTION_NAME} & sleep 2; - psql -h ${_DATABASE_HOST} -c "DROP DATABASE IF EXISTS ${_DATABASE_NAME};" - substitutions: _DATABASE_NAME: test_${SHORT_SHA} _DATABASE_USER: postgres _DATABASE_HOST: 127.0.0.1 _CLOUDSQL_REGION: "us-central1" _CLOUDSQL_INSTANCE: "my-cloudsql-instance" - _INSTANCE_CONNECTION_NAME: ${PROJECT_ID}:${_CLOUDSQL_REGION}:${_CLOUDSQL_INSTANCE} - _DATABASE_PORT: "5432" - _IMAGE_NAME: gcr.io/${PROJECT_ID}/cloudsql-test-proxy availableSecrets: secretManager: diff --git a/retrieval_service/datastore/providers/cloudsql_postgres_test.py b/retrieval_service/datastore/providers/cloudsql_postgres_test.py index a31e218c..6bff8854 100644 --- a/retrieval_service/datastore/providers/cloudsql_postgres_test.py +++ b/retrieval_service/datastore/providers/cloudsql_postgres_test.py @@ -18,6 +18,7 @@ import pytest import pytest_asyncio +import asyncpg from csv_diff import compare, load_csv # type: ignore import models @@ -59,9 +60,27 @@ def db_region() -> str: def db_instance() -> str: return get_env_var("DB_INSTANCE", "instance for cloud sql") +@pytest.fixture(scope="module") +async def create_db(db_user: str, db_name: str) -> AsyncGenerator[None, None]: + try: + conn = await asyncpg.connect(user=db_user, database=db_name) + except asyncpg.InvalidCatalogNameError: + # Database does not exist, create it. + sys_conn = await asyncpg.connect( + database='template1', + user=db_user, + ) + await sys_conn.execute(f'CREATE DATABASE "{db_name}";') + conn = await asyncpg.connect(user=db_user, database=db_name) + await conn.execute("CREATE EXTENSION vector;") + print("created") + await sys_conn.close() + yield + await conn.execute(f'DROP DATABASE IF EXISTS "{db_name}";') @pytest_asyncio.fixture(scope="module") async def ds( + create_db: None, db_user: str, db_pass: str, db_name: str, @@ -69,6 +88,7 @@ async def ds( db_region: str, db_instance: str, ) -> AsyncGenerator[datastore.Client, None]: + t = create_db cfg = cloudsql_postgres.Config( kind="cloudsql-postgres", user=db_user, @@ -78,6 +98,7 @@ async def ds( region=db_region, instance=db_instance, ) + t = create_db ds = await datastore.create(cfg) airports_ds_path = "../data/airport_dataset.csv" From d62a313071b08dc3d221ee7ce3562dbebc8a0648 Mon Sep 17 00:00:00 2001 From: Yuan Teoh Date: Mon, 8 Jan 2024 00:05:40 -0800 Subject: [PATCH 08/10] fix asyncpg connect --- .../cloudsql.tests.cloudbuild.yaml | 1 - .../providers/cloudsql_postgres_test.py | 58 +++++++++++-------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/retrieval_service/cloudsql.tests.cloudbuild.yaml b/retrieval_service/cloudsql.tests.cloudbuild.yaml index d5591e54..9b50b0a9 100644 --- a/retrieval_service/cloudsql.tests.cloudbuild.yaml +++ b/retrieval_service/cloudsql.tests.cloudbuild.yaml @@ -69,7 +69,6 @@ steps: substitutions: _DATABASE_NAME: test_${SHORT_SHA} _DATABASE_USER: postgres - _DATABASE_HOST: 127.0.0.1 _CLOUDSQL_REGION: "us-central1" _CLOUDSQL_INSTANCE: "my-cloudsql-instance" diff --git a/retrieval_service/datastore/providers/cloudsql_postgres_test.py b/retrieval_service/datastore/providers/cloudsql_postgres_test.py index 6bff8854..cf09c731 100644 --- a/retrieval_service/datastore/providers/cloudsql_postgres_test.py +++ b/retrieval_service/datastore/providers/cloudsql_postgres_test.py @@ -12,14 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio from datetime import datetime from ipaddress import IPv4Address from typing import Any, AsyncGenerator, List +import asyncpg import pytest import pytest_asyncio -import asyncpg from csv_diff import compare, load_csv # type: ignore +from google.cloud.sql.connector import Connector import models @@ -41,11 +43,6 @@ def db_pass() -> str: return get_env_var("DB_PASS", "password for the postgres user") -@pytest.fixture(scope="module") -def db_name() -> str: - return get_env_var("DB_NAME", "name of a postgres database") - - @pytest.fixture(scope="module") def db_project() -> str: return get_env_var("DB_PROJECT", "project id for google cloud") @@ -60,35 +57,48 @@ def db_region() -> str: def db_instance() -> str: return get_env_var("DB_INSTANCE", "instance for cloud sql") + @pytest.fixture(scope="module") -async def create_db(db_user: str, db_name: str) -> AsyncGenerator[None, None]: - try: - conn = await asyncpg.connect(user=db_user, database=db_name) - except asyncpg.InvalidCatalogNameError: - # Database does not exist, create it. - sys_conn = await asyncpg.connect( - database='template1', - user=db_user, - ) - await sys_conn.execute(f'CREATE DATABASE "{db_name}";') - conn = await asyncpg.connect(user=db_user, database=db_name) - await conn.execute("CREATE EXTENSION vector;") - print("created") - await sys_conn.close() - yield +async def create_db( + db_user: str, db_pass: str, db_project: str, db_region: str, db_instance: str +) -> AsyncGenerator[str, None]: + db_name = get_env_var("DB_NAME", "name of a postgres database") + loop = asyncio.get_running_loop() + connector = Connector(loop=loop) + # Database does not exist, create it. + sys_conn: asyncpg.Connection = await connector.connect_async( + f"{db_project}:{db_region}:{db_instance}", + "asyncpg", + user=f"{db_user}", + password=f"{db_pass}", + db="postgres", + ) + await sys_conn.execute(f'DROP DATABASE IF EXISTS "{db_name}";') + await sys_conn.execute(f'CREATE DATABASE "{db_name}";') + await sys_conn.close() + conn: asyncpg.Connection = await connector.connect_async( + f"{db_project}:{db_region}:{db_instance}", + "asyncpg", + user=f"{db_user}", + password=f"{db_pass}", + db=f"{db_name}", + ) + await conn.execute("CREATE EXTENSION IF NOT EXISTS vector;") + yield db_name await conn.execute(f'DROP DATABASE IF EXISTS "{db_name}";') + await conn.close() + @pytest_asyncio.fixture(scope="module") async def ds( - create_db: None, + create_db: AsyncGenerator[str, None], db_user: str, db_pass: str, - db_name: str, db_project: str, db_region: str, db_instance: str, ) -> AsyncGenerator[datastore.Client, None]: - t = create_db + db_name = await create_db.__anext__() cfg = cloudsql_postgres.Config( kind="cloudsql-postgres", user=db_user, From c78aa8103c8ba157f1f65d9c7e8f6278d5cc912b Mon Sep 17 00:00:00 2001 From: Yuan Teoh Date: Wed, 10 Jan 2024 10:25:52 -0800 Subject: [PATCH 09/10] update db name --- retrieval_service/cloudsql.tests.cloudbuild.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retrieval_service/cloudsql.tests.cloudbuild.yaml b/retrieval_service/cloudsql.tests.cloudbuild.yaml index 9b50b0a9..5a263569 100644 --- a/retrieval_service/cloudsql.tests.cloudbuild.yaml +++ b/retrieval_service/cloudsql.tests.cloudbuild.yaml @@ -46,7 +46,7 @@ steps: sed -i "s/my-region/${_CLOUDSQL_REGION}/g" config.yml sed -i "s/my-instance/${_CLOUDSQL_INSTANCE}/g" config.yml - - id: Run Alloy DB integration tests + - id: Run Cloud SQL DB integration tests name: python:3.11 dir: retrieval_service env: # Set env var expected by tests From 2169f8821814ff5b4c6141ce4e955ef122091be6 Mon Sep 17 00:00:00 2001 From: Yuan Teoh Date: Tue, 16 Jan 2024 06:58:27 -0800 Subject: [PATCH 10/10] update to 2024 --- retrieval_service/cloudsql.tests.cloudbuild.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retrieval_service/cloudsql.tests.cloudbuild.yaml b/retrieval_service/cloudsql.tests.cloudbuild.yaml index 5a263569..e7cb2f2e 100644 --- a/retrieval_service/cloudsql.tests.cloudbuild.yaml +++ b/retrieval_service/cloudsql.tests.cloudbuild.yaml @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.