Skip to content

Commit

Permalink
feat(UI): Faster startup and document listing (#1763)
Browse files Browse the repository at this point in the history
  • Loading branch information
imartinez authored Mar 20, 2024
1 parent 5725181 commit 348df78
Showing 1 changed file with 9 additions and 8 deletions.
17 changes: 9 additions & 8 deletions private_gpt/server/ingest/ingest_service.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import tempfile
from pathlib import Path
from typing import AnyStr, BinaryIO
from typing import TYPE_CHECKING, AnyStr, BinaryIO

from injector import inject, singleton
from llama_index.core.node_parser import SentenceWindowNodeParser
Expand All @@ -17,6 +17,9 @@
from private_gpt.server.ingest.model import IngestedDoc
from private_gpt.settings.settings import settings

if TYPE_CHECKING:
from llama_index.core.storage.docstore.types import RefDocInfo

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -86,17 +89,15 @@ def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[IngestedDoc]:
return [IngestedDoc.from_document(document) for document in documents]

def list_ingested(self) -> list[IngestedDoc]:
ingested_docs = []
ingested_docs: list[IngestedDoc] = []
try:
docstore = self.storage_context.docstore
ingested_docs_ids: set[str] = set()
ref_docs: dict[str, RefDocInfo] | None = docstore.get_all_ref_doc_info()

for node in docstore.docs.values():
if node.ref_doc_id is not None:
ingested_docs_ids.add(node.ref_doc_id)
if not ref_docs:
return ingested_docs

for doc_id in ingested_docs_ids:
ref_doc_info = docstore.get_ref_doc_info(ref_doc_id=doc_id)
for doc_id, ref_doc_info in ref_docs.items():
doc_metadata = None
if ref_doc_info is not None and ref_doc_info.metadata is not None:
doc_metadata = IngestedDoc.curate_metadata(ref_doc_info.metadata)
Expand Down

0 comments on commit 348df78

Please sign in to comment.