Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DB load history #529

Merged
merged 9 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions pebblo/app/enums/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,13 @@ class ClassifierConstants(Enum):
class ApplicationTypes(Enum):
LOADER = "loader"
RETRIEVAL = "retrieval"


class SQLiteTables(Enum):
AI_APP = "aiapp"
AI_DATALOADER = "aidataloader"
AI_RETRIVAL = "airetrieval"
AI_DATASOURCE = "aidatasource"
AI_DOCUMENT = "aidocument"
AI_SNIPPETS = "aisnippets"
AI_USER = "aiuser"
74 changes: 74 additions & 0 deletions pebblo/app/models/db_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,77 @@ class AiSnippet(BaseModel):
topicDetails: Optional[dict] = {}
policyViolations: Optional[List[dict]] = []
# label_feedback: Optional[List[LabelFeedback]] = []


class Summary(BaseModel):
findings: int
findingsEntities: int
findingsTopics: int
totalFiles: int
filesWithFindings: int
dataSources: int
owner: str
createdAt: str


class LoadHistory(BaseModel):
loadId: str
reportName: str
findings: int
filesWithFindings: int
generatedOn: str


class DataSource(BaseModel):
name: str
sourcePath: str
sourceType: str
sourceSize: int
totalSnippetCount: int
displayedSnippetCount: int
findingsSummary: list
findingsDetails: Optional[list] = []
# snippets: Optional[List[Snippets]]


class TopFindings(BaseModel):
fileName: str
fileOwner: str
sourceSize: int
findingsEntities: int
findingsTopics: int
findings: int
authorizedIdentities: list


class LoaderAppListDetails(BaseModel):
name: str
topics: int
entities: int
owner: Optional[str] = None
loadId: Optional[str] = None


class LoaderAppModel(BaseModel):
applicationsAtRiskCount: int
findingsCount: int
documentsWithFindingsCount: int
dataSourceCount: int
appList: List[LoaderAppListDetails]
findings: list
documentsWithFindings: list
dataSource: list


class ReportModel(BaseModel):
name: str
description: Optional[str] = None
framework: Optional[FrameworkInfo] = Field(default_factory=FrameworkInfo)
reportSummary: Optional[Summary] = None
loadHistory: Optional[dict] = None
topFindings: Optional[List[TopFindings]] = None
instanceDetails: Optional[InstanceDetails] = None
dataSources: Optional[List[DataSource]] = None
pebbloServerVersion: Optional[str] = None
pebbloClientVersion: Optional[str] = None
clientVersion: Optional[dict] = None
16 changes: 8 additions & 8 deletions pebblo/app/models/sqltables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from pebblo.app.config.config import var_server_config_dict
from pebblo.app.enums.common import StorageTypes
from pebblo.app.enums.enums import CacheDir
from pebblo.app.enums.enums import CacheDir, SQLiteTables
from pebblo.app.utils.utils import get_full_path

Base = declarative_base()
Expand All @@ -12,49 +12,49 @@


class AiAppTable(Base):
__tablename__ = "aiapp"
__tablename__ = SQLiteTables.AI_APP.value

id = Column(Integer, primary_key=True, autoincrement=True)
data = Column(JSON)


class AiDataLoaderTable(Base):
__tablename__ = "aidataloader"
__tablename__ = SQLiteTables.AI_DATALOADER.value

id = Column(Integer, primary_key=True, autoincrement=True)
data = Column(JSON)


class AiRetrievalTable(Base):
__tablename__ = "airetrieval"
__tablename__ = SQLiteTables.AI_RETRIVAL.value

id = Column(Integer, primary_key=True, autoincrement=True)
data = Column(JSON)


class AiDataSourceTable(Base):
__tablename__ = "aidatasource"
__tablename__ = SQLiteTables.AI_DATASOURCE.value

id = Column(Integer, primary_key=True, autoincrement=True)
data = Column(JSON)


class AiDocumentTable(Base):
__tablename__ = "aidocument"
__tablename__ = SQLiteTables.AI_DOCUMENT.value

id = Column(Integer, primary_key=True, autoincrement=True)
data = Column(JSON)


class AiSnippetsTable(Base):
__tablename__ = "aisnippets"
__tablename__ = SQLiteTables.AI_SNIPPETS.value

id = Column(Integer, primary_key=True, autoincrement=True)
data = Column(JSON)


class AiUser(Base):
__tablename__ = "aiuser"
__tablename__ = SQLiteTables.AI_USER.value

id = Column(Integer, primary_key=True, autoincrement=True)
data = Column(JSON)
Expand Down
6 changes: 3 additions & 3 deletions pebblo/app/service/discovery/discovery_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def return_response(message, status_code, pebblo_server_version=None):
message=str(message),
)
return PebbloJsonResponse.build(
body=response.dict(exclude_none=True), status_code=status_code
body=response.model_dump(exclude_none=True), status_code=status_code
)

def _fetch_runtime_instance_details(self) -> InstanceDetails:
Expand Down Expand Up @@ -98,7 +98,7 @@ def create_app_obj(
elif app_type == ApplicationTypes.RETRIEVAL.value:
AppModel = AiApp
model_obj = AppModel(**ai_app)
return model_obj.dict()
return model_obj.model_dump()

def _get_app_type_and_class(self):
AppClass = None
Expand Down Expand Up @@ -153,7 +153,7 @@ def _fetch_chain_details(self, app_metadata) -> list[Chain]:

vector_db_details.append(vector_db_obj)
chain_obj = Chain(name=name, model=model, vectorDbs=vector_db_details)
chains.append(chain_obj.dict())
chains.append(chain_obj.model_dump())

logger.debug(f"Application Name [{self.app_name}]")
return chains
Expand Down
38 changes: 29 additions & 9 deletions pebblo/app/service/loader/document/document.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pebblo.app.enums.enums import ApplicationTypes
from pebblo.app.models.db_models import AiDocument
from pebblo.app.models.sqltables import AiDocumentTable
from pebblo.app.service.loader.snippet.snippet import AiSnippetHandler
Expand All @@ -15,7 +16,7 @@ def __init__(self, db, data):
self.snippet_handler = AiSnippetHandler(db, data)

@timeit
def _get_or_create_document(self, doc, data_source):
def _get_or_create_document(self, doc: dict, data_source: dict) -> AiDocumentTable:
logger.debug("Create or update AIDocument")
filter_query = {
"appName": self.app_name,
Expand Down Expand Up @@ -47,13 +48,13 @@ def _get_or_create_document(self, doc, data_source):
"lastIngested": get_current_time(),
}
ai_document_obj = AiDocument(**ai_documents)
ai_document_data = ai_document_obj.dict()
ai_document_data = ai_document_obj.model_dump()

_, doc_obj = self.db.insert_data(AiDocumentTable, ai_document_data)
return doc_obj

@staticmethod
def _update_loader_documents(app_loader_details, document):
def _update_loader_documents(app_loader_details: dict, document: dict) -> dict:
logger.debug("Updating Loader details with document and findings.")

# Updating documents value for AiDataLoader
Expand Down Expand Up @@ -86,7 +87,7 @@ def _update_loader_documents(app_loader_details, document):
return app_loader_details

@staticmethod
def _update_document(document, snippet):
def _update_document(document: dict, snippet: dict) -> dict:
logger.debug("Updating AIDocument with snippet reference.")
existing_topics = document.get("topics")
if not existing_topics:
Expand All @@ -102,28 +103,47 @@ def _update_document(document, snippet):
if entity in existing_entities.keys():
updated_entity = existing_entities[entity]
updated_entity["ref"].append(snippet.get("id"))
updated_entity["count"] += snippet.get("entities", {}).get(
entity, 0
)
existing_entities.update({entity: updated_entity})
else:
existing_entities.update({entity: {"ref": [snippet.get("id")]}})
existing_entities.update(
{
entity: {
"ref": [snippet.get("id")],
"count": snippet.get("entities").get(entity, 0),
}
}
)
if topics:
for topic in topics:
if topic in existing_topics.keys():
updated_topic = existing_topics[topic]
updated_topic["ref"].append(snippet.get("id"))
updated_topic["count"] += snippet.get("topics", {}).get(topic, 0)
existing_topics.update({topic: updated_topic})
else:
existing_topics.update({topic: {"ref": [snippet.get("id")]}})
existing_topics.update(
{
topic: {
"ref": [snippet.get("id")],
"count": snippet.get("topics", {}).get(topic, 0),
}
}
)

document["topics"] = existing_topics
document["entities"] = existing_entities
logger.debug("AIDocument Updated successfully with snippet reference")
return document

@timeit
def create_or_update_document(self, app_loader_details, data_source):
def create_or_update_document(
self, app_loader_details: ApplicationTypes.LOADER.value, data_source: dict
):
logger.debug("Create or update document snippet")
input_doc_list = self.data.get("docs", [])
doc_obj = None
for doc in input_doc_list:
doc_obj = self._get_or_create_document(doc, data_source)
existing_document = doc_obj.data
Expand All @@ -138,6 +158,6 @@ def create_or_update_document(self, app_loader_details, data_source):
app_loader_details, snippet
)

self.db.update_data(doc_obj, doc_obj.data)
self.db.update_data(doc_obj, doc_obj.data)

return app_loader_details
8 changes: 4 additions & 4 deletions pebblo/app/service/loader/loader_doc_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def _create_return_response(message, output=None, status_code=200):
output = []
response = LoaderDocResponseModel(docs=output, message=message)
return PebbloJsonResponse.build(
body=response.dict(exclude_none=True), status_code=status_code
body=response.model_dump(exclude_none=True), status_code=status_code
)

def _pdf_writer(self, file_path, data):
Expand Down Expand Up @@ -82,7 +82,7 @@ def _datetime_decoder(dct):
if isinstance(value, str):
try:
# Attempt to parse the date string
dct[key] = datetime.strptime(value, "%Y-%m-%d %H:%M:%S.%f")
dct[key] = datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f")
except (ValueError, TypeError):
# If it fails, print the error and leave the value as is
pass
Expand Down Expand Up @@ -156,7 +156,7 @@ def _update_loader_details(self, app_loader_details):
sourceFiles=loader_source_files,
lastModified=get_current_time(),
)
loader_list.append(new_loader_data.dict())
loader_list.append(new_loader_data.model_dump())
app_loader_details["loaders"] = loader_list

# self.db.update_data(table_obj, app_loader_details)
Expand Down Expand Up @@ -258,7 +258,7 @@ def _get_or_create_data_source(self):
"loader": loader_details.get("loader"),
}
ai_data_source_obj = AiDataSource(**data_source)
ai_data_source = ai_data_source_obj.dict()
ai_data_source = ai_data_source_obj.model_dump()
_, data_source_obj = self.db.insert_data(AiDataSourceTable, ai_data_source)
logger.debug("Data Source has been created successfully.")
return data_source_obj.data
Expand Down
2 changes: 1 addition & 1 deletion pebblo/app/service/loader/snippet/snippet.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def create_snippet(self, doc, data_source, document):
"topicDetails": doc.get("topic_details", {}),
}
ai_snippet_obj = AiSnippet(**snippet_details)
ai_snippet = ai_snippet_obj.dict()
ai_snippet = ai_snippet_obj.model_dump()
status, snippet_obj = self.db.insert_data(AiSnippetsTable, ai_snippet)
logger.debug("AISnippet created successfully.")
return snippet_obj.data
Loading