Skip to content

Commit

Permalink
update init and export for testing in datastore
Browse files Browse the repository at this point in the history
  • Loading branch information
Yuan325 committed Jan 4, 2024
1 parent 20b8955 commit 460acb1
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 24 deletions.
25 changes: 3 additions & 22 deletions retrieval_service/cloudsql.tests.cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ steps:
psql -h ${_DATABASE_HOST} -c "CREATE DATABASE ${_DATABASE_NAME};" || echo "Database '${_DATABASE_NAME}' already exists."
psql -h ${_DATABASE_HOST} -d ${_DATABASE_NAME} -c 'CREATE EXTENSION vector;' || echo "Extension 'vector' already exists."
- id: Initialize data
name: ${_IMAGE_NAME}
- id: Update config
name: python:3.11
dir: retrieval_service
secretEnv:
- PGUSER
Expand All @@ -61,7 +61,6 @@ steps:
args:
- "-c"
- |
/cloudsql/cloud-sql-proxy --port ${_DATABASE_PORT} ${_INSTANCE_CONNECTION_NAME} & sleep 2;
# Create config
cp example-config-cloudsql.yml config.yml
sed -i "s/my_database/${_DATABASE_NAME}/g" config.yml
Expand All @@ -70,11 +69,9 @@ steps:
sed -i "s/my-project/$PROJECT_ID/g" config.yml
sed -i "s/my-region/${_CLOUDSQL_REGION}/g" config.yml
sed -i "s/my-instance/${_CLOUDSQL_INSTANCE}/g" config.yml
# Run script
python run_database_init.py
- id: Run Alloy DB integration tests
name: ${_IMAGE_NAME}
name: python:3.11
dir: retrieval_service
env: # Set env var expected by tests
- "DB_NAME=${_DATABASE_NAME}"
Expand All @@ -88,27 +85,11 @@ steps:
args:
- "-c"
- |
/cloudsql/cloud-sql-proxy --port ${_DATABASE_PORT} ${_INSTANCE_CONNECTION_NAME} & sleep 2;
# Set env var expected by tests
export DB_USER=$$PGUSER
export DB_PASS=$$PGPASSWORD
python -m pytest datastore/providers/cloudsql_postgres_test.py
- id: Run database export
name: ${_IMAGE_NAME}
dir: retrieval_service
entrypoint: /bin/bash
args:
- "-c"
- |
/cloudsql/cloud-sql-proxy --port ${_DATABASE_PORT} ${_INSTANCE_CONNECTION_NAME} & sleep 2;
# Run script
python run_database_export.py
cd ../data
diff --strip-trailing-cr -Z airport_dataset.csv airport_dataset.csv.new || (echo "airport dataset export fail" && exit 1)
diff --strip-trailing-cr -Z amenity_dataset.csv amenity_dataset.csv.new || (echo "amenity dataset export fail" && exit 1)
diff --strip-trailing-cr -Z flights_dataset.csv flights_dataset.csv.new || (echo "flight dataset export fail" && exit 1)
- id: Clean database
name: ${_IMAGE_NAME}
entrypoint: /bin/bash
Expand Down
61 changes: 59 additions & 2 deletions retrieval_service/datastore/providers/cloudsql_postgres_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import pytest
import pytest_asyncio
from csv_diff import compare, load_csv # type: ignore

import models

Expand Down Expand Up @@ -78,12 +79,68 @@ async def ds(
instance=db_instance,
)
ds = await datastore.create(cfg)

airports_ds_path = "../data/airport_dataset.csv"
amenities_ds_path = "../data/amenity_dataset.csv"
flights_ds_path = "../data/flights_dataset.csv"
airports, amenities, flights = await ds.load_dataset(
airports_ds_path, amenities_ds_path, flights_ds_path
)
await ds.initialize_data(airports, amenities, flights)

if ds is None:
raise TypeError("datastore creation failure")
yield ds
print("after yield")
await ds.close()
print("closed database")


async def test_export_dataset(ds: postgres.Client):
airports, amenities, flights = await ds.export_data()

airports_ds_path = "../data/airport_dataset.csv"
amenities_ds_path = "../data/amenity_dataset.csv"
flights_ds_path = "../data/flights_dataset.csv"

airports_new_path = "../data/airport_dataset.csv.new"
amenities_new_path = "../data/amenity_dataset.csv.new"
flights_new_path = "../data/flights_dataset.csv.new"

await ds.export_dataset(
airports,
amenities,
flights,
airports_new_path,
amenities_new_path,
flights_new_path,
)

diff_airports = compare(
load_csv(open(airports_ds_path), "id"), load_csv(open(airports_new_path), "id")
)
assert diff_airports["added"] == []
assert diff_airports["removed"] == []
assert diff_airports["changed"] == []
assert diff_airports["columns_added"] == []
assert diff_airports["columns_removed"] == []

diff_amenities = compare(
load_csv(open(amenities_ds_path), "id"),
load_csv(open(amenities_new_path), "id"),
)
assert diff_amenities["added"] == []
assert diff_amenities["removed"] == []
assert diff_amenities["changed"] == []
assert diff_amenities["columns_added"] == []
assert diff_amenities["columns_removed"] == []

diff_flights = compare(
load_csv(open(flights_ds_path), "id"), load_csv(open(flights_new_path), "id")
)
assert diff_flights["added"] == []
assert diff_flights["removed"] == []
assert diff_flights["changed"] == []
assert diff_flights["columns_added"] == []
assert diff_flights["columns_removed"] == []


async def test_get_airport_by_id(ds: cloudsql_postgres.Client):
Expand Down

0 comments on commit 460acb1

Please sign in to comment.