From 4deb900ca0baec956477bc22821c2e54a89cbde9 Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Mon, 9 Sep 2024 16:56:36 +0200 Subject: [PATCH] Fix links in GraphVectorStore pydoc --- .../api_reference/scripts/custom_formatter.py | 15 ++-- .../graph_vectorstores/__init__.py | 81 +++++++++---------- 2 files changed, 45 insertions(+), 51 deletions(-) diff --git a/docs/api_reference/scripts/custom_formatter.py b/docs/api_reference/scripts/custom_formatter.py index d8efb5e558da9..3535661682efc 100644 --- a/docs/api_reference/scripts/custom_formatter.py +++ b/docs/api_reference/scripts/custom_formatter.py @@ -17,15 +17,16 @@ def process_toc_h3_elements(html_content: str) -> str: # Process each element for element in toc_h3_elements: - element = element.a.code.span - # Get the text content of the element - content = element.get_text() + if element.a.code: + element = element.a.code.span + # Get the text content of the element + content = element.get_text() - # Apply the regex substitution - modified_content = content.split(".")[-1] + # Apply the regex substitution + modified_content = content.split(".")[-1] - # Update the element's content - element.string = modified_content + # Update the element's content + element.string = modified_content # Return the modified HTML return str(soup) diff --git a/libs/community/langchain_community/graph_vectorstores/__init__.py b/libs/community/langchain_community/graph_vectorstores/__init__.py index 485123a96b0f5..0d3e81df38809 100644 --- a/libs/community/langchain_community/graph_vectorstores/__init__.py +++ b/libs/community/langchain_community/graph_vectorstores/__init__.py @@ -1,6 +1,9 @@ -"""**Graph Vector Store** +""".. title:: Graph Vector Store -Sometimes embedding models don’t capture all the important relationships between +Graph Vector Store +================== + +Sometimes embedding models don't capture all the important relationships between documents. Graph Vector Stores are an extension to both vector stores and retrievers that allow documents to be explicitly connected to each other. @@ -13,11 +16,10 @@ For example, a paragraph of text may be linked to URLs based on the anchor tags in it's content and linked from the URL(s) it is published at. -Link extractors can be used to extract links from documents. - -Example: +`Link extractors ` +can be used to extract links from documents. -.. code-block:: python +Example:: graph_vector_store = CassandraGraphVectorStore() link_extractor = HtmlLinkExtractor() @@ -25,13 +27,18 @@ add_links(document, links) graph_vector_store.add_document(document) -*********** -Get started -*********** +.. seealso:: -We chunk the State of the Union text and split it into documents. + - :class:`How to use a graph vector store as a retriever ` + - :class:`How to create links between documents ` + - :class:`How to link Documents on hyperlinks in HTML ` + - :class:`How to link Documents on common keywords (using KeyBERT) ` + - :class:`How to link Documents on common named entities (using GliNER) ` -.. code-block:: python +Get started +----------- + +We chunk the State of the Union text and split it into documents:: from langchain_community.document_loaders import TextLoader from langchain_text_splitters import CharacterTextSplitter @@ -41,14 +48,12 @@ documents = text_splitter.split_documents(raw_documents) Links can be added to documents manually but it's easier to use a -:class:`~langchain_community.graph_vectorstores.extractors.LinkExtractor`. +:class:`~langchain_community.graph_vectorstores.extractors.link_extractor.LinkExtractor`. Several common link extractors are available and you can build your own. For this guide, we'll use the -:class:`~langchain_community.graph_vectorstores.extractors.KeybertLinkExtractor` +:class:`~langchain_community.graph_vectorstores.extractors.keybert_link_extractor.KeybertLinkExtractor` which uses the KeyBERT model to tag documents with keywords and uses these keywords to -create links between documents. - -.. code-block:: python +create links between documents:: from langchain_community.graph_vectorstores.extractors import KeybertLinkExtractor from langchain_community.graph_vectorstores.links import add_links @@ -58,15 +63,14 @@ for doc in documents: add_links(doc, extractor.extract_one(doc)) -*********************************************** Create the graph vector store and add documents -*********************************************** +----------------------------------------------- We'll use an Apache Cassandra or Astra DB database as an example. -We create a :class:`~langchain_community.graph_vectorstores.CassandraGraphVectorStore` -from the documents and an :class:`~langchain_openai.OpenAIEmbeddings` model. - -.. code-block:: python +We create a +:class:`~langchain_community.graph_vectorstores.cassandra.CassandraGraphVectorStore` +from the documents and an :class:`~langchain_openai.embeddings.base.OpenAIEmbeddings` +model:: import cassio from langchain_community.graph_vectorstores import CassandraGraphVectorStore @@ -80,45 +84,37 @@ documents=documents, ) -***************** + Similarity search -***************** +----------------- If we don't traverse the graph, a graph vector store behaves like a regular vector store. So all methods available in a vector store are also available in a graph vector store. -The :meth:`~langchain_community.graph_vectorstores.base.GraphVectorStore.similarity_search` +The :meth:`~langchain_core.graph_vectorstores.base.GraphVectorStore.similarity_search` method returns documents similar to a query without considering -the links between documents. - -.. code-block:: python +the links between documents:: docs = store.similarity_search( "What did the president say about Ketanji Brown Jackson?" ) -**************** Traversal search -**************** +---------------- -The :meth:`~langchain_community.graph_vectorstores.base.GraphVectorStore.traversal_search` +The :meth:`~langchain_core.graph_vectorstores.base.GraphVectorStore.traversal_search` method returns documents similar to a query considering the links between documents. It first does a similarity search and then traverses the graph to -find linked documents. - -.. code-block:: python +find linked documents:: docs = list( store.traversal_search("What did the president say about Ketanji Brown Jackson?") ) -************* Async methods -************* - -The graph vector store has async versions of the methods prefixed with ``a``. +------------- -.. code-block:: python +The graph vector store has async versions of the methods prefixed with ``a``:: docs = [ doc @@ -127,15 +123,12 @@ ) ] -**************************** Graph vector store retriever -**************************** +---------------------------- The graph vector store can be converted to a retriever. It is similar to the vector store retriever but it also has traversal search methods -such as ``traversal`` and ``mmr_traversal``. - -.. code-block:: python +such as ``traversal`` and ``mmr_traversal``:: retriever = store.as_retriever(search_type="mmr_traversal") docs = retriever.invoke("What did the president say about Ketanji Brown Jackson?")