Skip to content

Commit

Permalink
Confidence score changes for DB (#523)
Browse files Browse the repository at this point in the history
* Confidence score changes for DB

* Modularized methods and added comments

---------

Co-authored-by: dristy.cd <[email protected]>
  • Loading branch information
dristysrivastava and dristy.cd authored Sep 2, 2024
1 parent 7680e56 commit 75d6238
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 92 deletions.
8 changes: 4 additions & 4 deletions pebblo/app/models/db_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,10 @@ class AiDataModel(BaseModel):
data: Optional[Union[list, str]] = None
entityCount: int
entities: dict
entityDetails: Optional[dict] = {}
topicCount: Optional[int] = 0
topics: Optional[dict] = {}

def dict(self, **kwargs):
kwargs["exclude_none"] = True
return super().dict(**kwargs)
topicDetails: Optional[dict] = {}


class RetrievalContext(BaseModel):
Expand Down Expand Up @@ -183,5 +181,7 @@ class AiSnippet(BaseModel):
lastModified: Optional[str] = None
entities: dict
topics: dict
entityDetails: Optional[dict] = {}
topicDetails: Optional[dict] = {}
policyViolations: Optional[List[dict]] = []
# label_feedback: Optional[List[LabelFeedback]] = []
6 changes: 6 additions & 0 deletions pebblo/app/service/loader/loader_doc_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,10 @@ def _get_doc_classification(self, doc):
data=doc.get("doc", None),
entities={},
entityCount=0,
entityDetails={},
topics={},
topicCount=0,
topicDetails={},
)
try:
if doc_info.data:
Expand All @@ -189,8 +191,10 @@ def _get_doc_classification(self, doc):
)
doc_info.topics = topics
doc_info.entities = entities
doc_info.entityDetails = entity_details
doc_info.topicCount = topic_count
doc_info.entityCount = entity_count
doc_info.topicDetails = topic_details
doc_info.data = anonymized_doc
logger.debug("Doc classification finished.")
return doc_info
Expand All @@ -209,6 +213,8 @@ def _update_doc_details(doc, doc_info):
logger.debug("Update doc details with classification result")
doc["entities"] = doc_info.entities
doc["topics"] = doc_info.topics
doc["entity_details"] = doc_info.entityDetails
doc["topic_details"] = doc_info.topicDetails
logger.debug("Input doc updated with classification result")

@timeit
Expand Down
2 changes: 2 additions & 0 deletions pebblo/app/service/loader/snippet/snippet.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ def create_snippet(self, doc, data_source, document):
"loaderSourcePath": data_source.get("sourcePath"),
"entities": doc.get("entities", {}),
"topics": doc.get("topics", {}),
"entityDetails": doc.get("entity_details", {}),
"topicDetails": doc.get("topic_details", {}),
}
ai_snippet_obj = AiSnippet(**snippet_details)
ai_snippet = ai_snippet_obj.dict()
Expand Down
238 changes: 150 additions & 88 deletions pebblo/app/service/local_ui/loader_apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,106 +37,139 @@ def __init__(self):
self.loader_document_with_findings_list = []
self.loader_findings_summary_list = []

def _get_snippet_details(self, snippet_ids, owner):
def _get_snippet_details(self, snippet_ids, owner, label_name):
"""
This function finds snippet details based on labels
"""

response = []
for snippet_id in snippet_ids:
status, output = self.db.query(AiSnippetsTable, {"id": snippet_id})
if not status or len(output) == 0:
continue
snippet_details = output[0].data
entity_details = {}
topic_details = {}
if snippet_details.get("topicDetails") and snippet_details[
"topicDetails"
].get(label_name):
topic_details = {
label_name: snippet_details["topicDetails"].get(label_name)
}
if snippet_details.get("entityDetails") and snippet_details[
"entityDetails"
].get(label_name):
entity_details = {
label_name: snippet_details["entityDetails"].get(label_name)
}
snippet_obj = {
"snippet": snippet_details["doc"],
"sourcePath": snippet_details["sourcePath"],
# "topicDetails": {}, # TODO: To be added post 0.1.18
# "entityDetails": {}, # TODO: to be added post 0.1.18
"topicDetails": topic_details,
"entityDetails": entity_details,
"fileOwner": owner,
"authorizedIdentities": [],
}
response.append(snippet_obj)
return response

def get_findings_for_loader_app(self, app_data):
topic_count = 0
entity_count = 0
total_snippet_count = 0
snippets = []
if app_data.get("docEntities"):
for entity, entity_data in app_data.get("docEntities").items():
entity_count += entity_data.get("count")
self.loader_findings += entity_data.get("count")

findings_exists = False
for findings in self.loader_findings_list:
if findings.get("labelName") == entity:
findings_exists = True
findings["findings"] += entity_data["count"]
findings["snippetCount"] += len(entity_data["snippetIds"])
findings["fileCount"] = len(app_data["documents"])
total_snippet_count += findings["snippetCount"]
snippets.extend(
self._get_snippet_details(
entity_data["snippetIds"], app_data["owner"]
)
)
break
if not findings_exists:
logger.debug("finding not exist")
findings = {
"appName": app_data["name"],
"labelName": entity,
"findings": entity_data["count"],
"findingsType": "entities",
"snippetCount": len(entity_data["snippetIds"]),
"fileCount": len(app_data["documents"]),
"snippets": self._get_snippet_details(
entity_data["snippetIds"], app_data["owner"]
),
}
total_snippet_count += findings["snippetCount"]
shallow_copy = findings.copy()
self.loader_findings_list.append(shallow_copy)
del findings["snippets"]
self.loader_findings_summary_list.append(findings)
def _findings_for_app_entities(
self, app_data, snippets, total_snippet_count, entity_count
):
"""
This function finds findings for apps with entities
"""

if app_data.get("docTopics"):
for topic, topic_data in app_data.get("docTopics").items():
topic_count += topic_data.get("count")
self.loader_findings += topic_data.get("count")

findings_exists = False
for findings in self.loader_findings_list:
if findings.get("labelName") == topic:
findings_exists = True
findings["findings"] += topic_data["count"]
findings["snippetCount"] += len(topic_data["snippetIds"])
findings["fileCount"] = len(app_data["documents"])
total_snippet_count += findings["snippetCount"]
snippets.extend(
self._get_snippet_details(
topic_data["snippetIds"], app_data["owner"]
)
for entity, entity_data in app_data.get("docEntities").items():
entity_count += entity_data.get("count")
self.loader_findings += entity_data.get("count")

findings_exists = False
for findings in self.loader_findings_list:
if findings.get("labelName") == entity:
findings_exists = True
findings["findings"] += entity_data["count"]
findings["snippetCount"] += len(entity_data["snippetIds"])
findings["fileCount"] = len(app_data["documents"])
total_snippet_count += findings["snippetCount"]
snippets.extend(
self._get_snippet_details(
entity_data["snippetIds"], app_data["owner"], entity
)
break
if not findings_exists:
findings = {
"appName": app_data["name"],
"labelName": topic,
"findings": topic_data["count"],
"findingsType": "topics",
"snippetCount": len(topic_data["snippetIds"]),
"fileCount": len(app_data["documents"]),
"snippets": self._get_snippet_details(
topic_data["snippetIds"], app_data["owner"]
),
}
)
break
if not findings_exists:
logger.debug("finding not exist")
findings = {
"appName": app_data["name"],
"labelName": entity,
"findings": entity_data["count"],
"findingsType": "entities",
"snippetCount": len(entity_data["snippetIds"]),
"fileCount": len(app_data["documents"]),
"snippets": self._get_snippet_details(
entity_data["snippetIds"], app_data["owner"], entity
),
}
total_snippet_count += findings["snippetCount"]
shallow_copy = findings.copy()
self.loader_findings_list.append(shallow_copy)
del findings["snippets"]
self.loader_findings_summary_list.append(findings)
return entity_count, snippets, total_snippet_count

def _findings_for_app_topics(
self, app_data, snippets, total_snippet_count, topic_count
):
"""
This function finds findings for apps with topics
"""

for topic, topic_data in app_data.get("docTopics").items():
topic_count += topic_data.get("count")
self.loader_findings += topic_data.get("count")

findings_exists = False
for findings in self.loader_findings_list:
if findings.get("labelName") == topic:
findings_exists = True
findings["findings"] += topic_data["count"]
findings["snippetCount"] += len(topic_data["snippetIds"])
findings["fileCount"] = len(app_data["documents"])
total_snippet_count += findings["snippetCount"]
shallow_copy = findings.copy()
self.loader_findings_list.append(shallow_copy)
del findings["snippets"]
self.loader_findings_summary_list.append(findings)
snippets.extend(
self._get_snippet_details(
topic_data["snippetIds"], app_data["owner"], topic
)
)
break
if not findings_exists:
findings = {
"appName": app_data["name"],
"labelName": topic,
"findings": topic_data["count"],
"findingsType": "topics",
"snippetCount": len(topic_data["snippetIds"]),
"fileCount": len(app_data["documents"]),
"snippets": self._get_snippet_details(
topic_data["snippetIds"], app_data["owner"], topic
),
}
total_snippet_count += findings["snippetCount"]
shallow_copy = findings.copy()
self.loader_findings_list.append(shallow_copy)
del findings["snippets"]
self.loader_findings_summary_list.append(findings)
return topic_count, snippets, total_snippet_count

def _update_loader_datasource(
self, app_data, entity_count, topic_count, total_snippet_count
):
"""
This function updates loader datasource details and count
"""

# Data Source Details
status, data_sources = self.db.query(
_, data_sources = self.db.query(
AiDataSourceTable, {"loadId": app_data.get("id")}
)
for data_source in data_sources:
Expand All @@ -158,10 +191,12 @@ def get_findings_for_loader_app(self, app_data):
# Data Source Count
self.loader_data_source = len(self.loader_data_source_list)

# Fetch required data for DocumentWithFindings
status, documents = self.db.query(
AiDocumentTable, {"loadId": app_data.get("id")}
)
def _get_documents_with_findings(self, app_data):
"""
Fetch required data for DocumentWithFindings
"""

_, documents = self.db.query(AiDocumentTable, {"loadId": app_data.get("id")})
loader_document_with_findings = app_data.get("documentsWithFindings")
documents_with_findings_data = []
for document in documents:
Expand All @@ -184,6 +219,33 @@ def get_findings_for_loader_app(self, app_data):
# Documents with findings Count
self.loader_files_findings = len(self.loader_document_with_findings_list)

def get_findings_for_loader_app(self, app_data):
"""
This function calculates findings for loader app
"""

entity_count = 0
topic_count = 0
total_snippet_count = 0
snippets = []
if app_data.get("docEntities"):
entity_count, snippets, total_snippet_count = (
self._findings_for_app_entities(
app_data, snippets, total_snippet_count, entity_count
)
)

if app_data.get("docTopics"):
topic_count, snippets, total_snippet_count = self._findings_for_app_topics(
app_data, snippets, total_snippet_count, topic_count
)

self._update_loader_datasource(
app_data, entity_count, topic_count, total_snippet_count
)

self._get_documents_with_findings(app_data)

app_details = LoaderAppListDetails(
name=app_data.get("name"),
topics=topic_count,
Expand Down Expand Up @@ -216,9 +278,9 @@ def get_all_loader_apps(self):
continue

self.loader_apps_at_risk += 1
loader_app = self.get_findings_for_loader_app(app_data)
all_loader_apps.append(loader_app)
app_processed.append(app_data["name"])
loader_app = self.get_findings_for_loader_app(app_data)
all_loader_apps.append(loader_app)
app_processed.append(app_data["name"])

# TODO: Sort loader apps
# sorted_loader_apps = self._sort_loader_apps(all_loader_apps)
Expand Down

0 comments on commit 75d6238

Please sign in to comment.