Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs: explain different connection string formats in the docstring #1132

Merged
merged 9 commits into from
Oct 15, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def __init__(
hnsw_index_name: str = "haystack_hnsw_index",
hnsw_ef_search: Optional[int] = None,
keyword_index_name: str = "haystack_keyword_index",
connection_param_kwargs: Optional[Dict[str, Secret]] = None,
):
"""
Creates a new PgvectorDocumentStore instance.
Expand Down Expand Up @@ -129,6 +130,12 @@ def __init__(
`"hnsw"`. You can find more information about this parameter in the
[pgvector documentation](https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw).
:param keyword_index_name: Index name for the Keyword index.
:param connection_param_kwargs: A dictionary of parameters for the PostgreSQL connection.
If you prefer not to use the `connection_string`, you can specify connection parameters here.
kanenorman marked this conversation as resolved.
Show resolved Hide resolved
Common parameters include 'user', 'password', 'host', 'port', & 'dbname'. For a complete list, refer to the
[PostgreSQL documentation](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS).
Use the `Secret.from_env_var()` method to securely load parameters from environment variables.
Note that parameters specified here take precedence over those in the `connection_string`.
"""

self.connection_string = connection_string
Expand All @@ -149,6 +156,7 @@ def __init__(
self._connection = None
self._cursor = None
self._dict_cursor = None
self.connection_param_kwargs = connection_param_kwargs or {}

@property
def cursor(self):
Expand All @@ -172,8 +180,15 @@ def connection(self):
return self._connection

def _create_connection(self):
conn_str = self.connection_string.resolve_value() or ""
connection = connect(conn_str)
# if connection_param_kwargs are provided use them
if self.connection_param_kwargs:
params = {key: value.resolve_value() for key, value in self.connection_param_kwargs.items()}
connection = connect(**params)
# otherwise, use the connection string
else:
conn_str = self.connection_string.resolve_value() or ""
connection = connect(conn_str)

connection.autocommit = True
connection.execute("CREATE EXTENSION IF NOT EXISTS vector")
register_vector(connection) # Note: this must be called before creating the cursors.
Expand Down Expand Up @@ -214,6 +229,7 @@ def to_dict(self) -> Dict[str, Any]:
hnsw_ef_search=self.hnsw_ef_search,
keyword_index_name=self.keyword_index_name,
language=self.language,
connection_param_kwargs={key: value.to_dict() for key, value in self.connection_param_kwargs.items()},
)

@classmethod
Expand All @@ -226,6 +242,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "PgvectorDocumentStore":
:returns:
Deserialized component.
"""
connection_params = data["init_parameters"]["connection_param_kwargs"]
deserialize_secrets_inplace(connection_params, connection_params.keys())

deserialize_secrets_inplace(data["init_parameters"], ["connection_string"])
return default_from_dict(cls, data)

Expand Down
182 changes: 182 additions & 0 deletions integrations/pgvector/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,50 @@ def test_init(monkeypatch):
assert document_store.keyword_index_name == "my_keyword_index"


@pytest.mark.usefixtures("patches_for_unit_tests")
def test_init_with_connection_param_kwargs(monkeypatch):
monkeypatch.setenv("PG_PASSWORD", "postgres_password")
monkeypatch.setenv("PG_USER", "postgres_user")
monkeypatch.setenv("PG_HOST", "postgres_host")
monkeypatch.setenv("PG_PORT", "postgres_port")

document_store = PgvectorDocumentStore(
table_name="my_table",
embedding_dimension=512,
vector_function="l2_distance",
recreate_table=True,
search_strategy="hnsw",
hnsw_recreate_index_if_exists=True,
hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128},
hnsw_index_name="my_hnsw_index",
hnsw_ef_search=50,
keyword_index_name="my_keyword_index",
connection_param_kwargs={
"user": Secret.from_env_var("PG_USER"),
"host": Secret.from_env_var("PG_HOST"),
"password": Secret.from_env_var("PG_PASSWORD"),
"port": Secret.from_env_var("PG_PORT"),
},
)

assert document_store.table_name == "my_table"
assert document_store.embedding_dimension == 512
assert document_store.vector_function == "l2_distance"
assert document_store.recreate_table
assert document_store.search_strategy == "hnsw"
assert document_store.hnsw_recreate_index_if_exists
assert document_store.hnsw_index_creation_kwargs == {"m": 32, "ef_construction": 128}
assert document_store.hnsw_index_name == "my_hnsw_index"
assert document_store.hnsw_ef_search == 50
assert document_store.keyword_index_name == "my_keyword_index"
assert document_store.connection_param_kwargs == {
"user": Secret.from_env_var("PG_USER"),
"host": Secret.from_env_var("PG_HOST"),
"password": Secret.from_env_var("PG_PASSWORD"),
"port": Secret.from_env_var("PG_PORT"),
}


@pytest.mark.usefixtures("patches_for_unit_tests")
def test_to_dict(monkeypatch):
monkeypatch.setenv("PG_CONN_STR", "some_connection_string")
Expand Down Expand Up @@ -103,6 +147,144 @@ def test_to_dict(monkeypatch):
"hnsw_index_name": "my_hnsw_index",
"hnsw_ef_search": 50,
"keyword_index_name": "my_keyword_index",
"connection_param_kwargs": {},
},
}


def test_from_dict(monkeypatch):
monkeypatch.setenv("PG_CONN_STR", "some_connection_string")

data = {
"type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
"init_parameters": {
"connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
"table_name": "my_table",
"embedding_dimension": 512,
"vector_function": "l2_distance",
"recreate_table": True,
"search_strategy": "hnsw",
"hnsw_recreate_index_if_exists": True,
"language": "english",
"hnsw_index_creation_kwargs": {"m": 32, "ef_construction": 128},
"hnsw_index_name": "my_hnsw_index",
"hnsw_ef_search": 50,
"keyword_index_name": "my_keyword_index",
"connection_param_kwargs": {},
},
}

document_store = PgvectorDocumentStore.from_dict(data)

assert document_store.table_name == "my_table"
assert document_store.embedding_dimension == 512
assert document_store.vector_function == "l2_distance"
assert document_store.recreate_table
assert document_store.search_strategy == "hnsw"
assert document_store.hnsw_recreate_index_if_exists
assert document_store.hnsw_index_creation_kwargs == {"m": 32, "ef_construction": 128}
assert document_store.hnsw_index_name == "my_hnsw_index"
assert document_store.hnsw_ef_search == 50
assert document_store.keyword_index_name == "my_keyword_index"
assert document_store.connection_param_kwargs == {}


def test_from_dict_with_connection_param_kwargs(monkeypatch):
monkeypatch.setenv("PG_PASSWORD", "postgres_password")
monkeypatch.setenv("PG_USER", "postgres_user")
monkeypatch.setenv("PG_HOST", "postgres_host")
monkeypatch.setenv("PG_PORT", "postgres_port")

data = {
"type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
"init_parameters": {
"connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
"table_name": "my_table",
"embedding_dimension": 512,
"vector_function": "l2_distance",
"recreate_table": True,
"search_strategy": "hnsw",
"hnsw_recreate_index_if_exists": True,
"language": "english",
"hnsw_index_creation_kwargs": {"m": 32, "ef_construction": 128},
"hnsw_index_name": "my_hnsw_index",
"hnsw_ef_search": 50,
"keyword_index_name": "my_keyword_index",
"connection_param_kwargs": {
"user": {"env_vars": ["PG_USER"], "strict": True, "type": "env_var"},
"host": {"env_vars": ["PG_HOST"], "strict": True, "type": "env_var"},
"password": {"env_vars": ["PG_PASSWORD"], "strict": True, "type": "env_var"},
"port": {"env_vars": ["PG_PORT"], "strict": True, "type": "env_var"},
},
},
}

document_store = PgvectorDocumentStore.from_dict(data)

assert document_store.table_name == "my_table"
assert document_store.embedding_dimension == 512
assert document_store.vector_function == "l2_distance"
assert document_store.recreate_table
assert document_store.search_strategy == "hnsw"
assert document_store.hnsw_recreate_index_if_exists
assert document_store.hnsw_index_creation_kwargs == {"m": 32, "ef_construction": 128}
assert document_store.hnsw_index_name == "my_hnsw_index"
assert document_store.hnsw_ef_search == 50
assert document_store.keyword_index_name == "my_keyword_index"
assert document_store.connection_param_kwargs == {
"user": Secret.from_env_var("PG_USER"),
"host": Secret.from_env_var("PG_HOST"),
"password": Secret.from_env_var("PG_PASSWORD"),
"port": Secret.from_env_var("PG_PORT"),
}


def test_to_dict_with_connection_param_kwargs(monkeypatch):
monkeypatch.setenv("PG_PASSWORD", "postgres_password")
monkeypatch.setenv("PG_USER", "postgres_user")
monkeypatch.setenv("PG_HOST", "postgres_host")
monkeypatch.setenv("PG_PORT", "postgres_port")

document_store = PgvectorDocumentStore(
table_name="my_table",
embedding_dimension=512,
vector_function="l2_distance",
recreate_table=True,
search_strategy="hnsw",
hnsw_recreate_index_if_exists=True,
hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128},
hnsw_index_name="my_hnsw_index",
hnsw_ef_search=50,
keyword_index_name="my_keyword_index",
connection_param_kwargs={
"user": Secret.from_env_var("PG_USER"),
"host": Secret.from_env_var("PG_HOST"),
"password": Secret.from_env_var("PG_PASSWORD"),
"port": Secret.from_env_var("PG_PORT"),
},
)

assert document_store.to_dict() == {
"type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
"init_parameters": {
"connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
"table_name": "my_table",
"embedding_dimension": 512,
"vector_function": "l2_distance",
"recreate_table": True,
"search_strategy": "hnsw",
"hnsw_recreate_index_if_exists": True,
"language": "english",
"hnsw_index_creation_kwargs": {"m": 32, "ef_construction": 128},
"hnsw_index_name": "my_hnsw_index",
"hnsw_ef_search": 50,
"keyword_index_name": "my_keyword_index",
"connection_param_kwargs": {
"user": {"env_vars": ["PG_USER"], "strict": True, "type": "env_var"},
"host": {"env_vars": ["PG_HOST"], "strict": True, "type": "env_var"},
"password": {"env_vars": ["PG_PASSWORD"], "strict": True, "type": "env_var"},
"port": {"env_vars": ["PG_PORT"], "strict": True, "type": "env_var"},
},
},
}

Expand Down
5 changes: 5 additions & 0 deletions integrations/pgvector/tests/test_retrievers.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def test_to_dict(self, mock_store):
"hnsw_index_name": "haystack_hnsw_index",
"hnsw_ef_search": None,
"keyword_index_name": "haystack_keyword_index",
"connection_param_kwargs": {},
},
},
"filters": {"field": "value"},
Expand Down Expand Up @@ -91,6 +92,7 @@ def test_from_dict(self, monkeypatch):
"hnsw_index_name": "haystack_hnsw_index",
"hnsw_ef_search": None,
"keyword_index_name": "haystack_keyword_index",
"connection_param_kwargs": {},
},
},
"filters": {"field": "value"},
Expand Down Expand Up @@ -186,6 +188,7 @@ def test_to_dict(self, mock_store):
"hnsw_index_name": "haystack_hnsw_index",
"hnsw_ef_search": None,
"keyword_index_name": "haystack_keyword_index",
"connection_param_kwargs": {},
},
},
"filters": {"field": "value"},
Expand Down Expand Up @@ -215,6 +218,7 @@ def test_from_dict(self, monkeypatch):
"hnsw_index_name": "haystack_hnsw_index",
"hnsw_ef_search": None,
"keyword_index_name": "haystack_keyword_index",
"connection_param_kwargs": {},
},
},
"filters": {"field": "value"},
Expand Down Expand Up @@ -263,6 +267,7 @@ def test_from_dict_without_filter_policy(self, monkeypatch):
"hnsw_index_name": "haystack_hnsw_index",
"hnsw_ef_search": None,
"keyword_index_name": "haystack_keyword_index",
"connection_param_kwargs": {},
},
},
"filters": {"field": "value"},
Expand Down