From f7bb83b14ac4ae0108906677850ea78e6d807c77 Mon Sep 17 00:00:00 2001 From: Dristy Srivastava Date: Thu, 19 Sep 2024 21:38:00 +0530 Subject: [PATCH] Incorporated review comments --- pebblo/app/service/doc_helper.py | 42 +++++++++---------- .../app/service/loader/loader_doc_service.py | 35 ++++++++++++++-- 2 files changed, 51 insertions(+), 26 deletions(-) diff --git a/pebblo/app/service/doc_helper.py b/pebblo/app/service/doc_helper.py index 5072121a..6a0c8c82 100644 --- a/pebblo/app/service/doc_helper.py +++ b/pebblo/app/service/doc_helper.py @@ -94,14 +94,14 @@ def _fetch_variables(raw_data: dict): @staticmethod def _update_raw_data( - raw_data: dict, - loader_source_snippets: dict, - total_findings: int, - findings_entities: int, - findings_topics: int, - snippet_count: int, - file_count: int, - data_source_findings: dict, + raw_data, + loader_source_snippets, + total_findings, + findings_entities, + findings_topics, + snippet_count, + file_count, + data_source_findings, ): """ Reassigning raw data @@ -144,7 +144,7 @@ def _create_doc_model(self, doc: dict, doc_info: AiDataModel) -> dict: return doc_model.model_dump() @staticmethod - def _get_top_n_findings(raw_data: dict) -> list: + def _get_top_n_findings(raw_data): """ Return top N findings from all findings """ @@ -228,7 +228,7 @@ def _get_classifier_response(self, doc: dict) -> AiDataModel: logger.error(f"Get Classifier Response Failed, Exception: {e}") return doc_info - def _update_app_details(self, raw_data: dict, ai_app_docs: list): + def _update_app_details(self, raw_data, ai_app_docs): """ Updating ai app details loader source files """ @@ -258,7 +258,7 @@ def _update_app_details(self, raw_data: dict, ai_app_docs: list): self.app_details["report_metadata"] = raw_data @staticmethod - def _create_data_source_findings(data_source_findings: list) -> list: + def _create_data_source_findings(data_source_findings): """ This function returns data source findings with entity/topic details based on label i.e, entity/topic name """ @@ -289,9 +289,7 @@ def _create_data_source_findings(data_source_findings: list) -> list: return data_source_findings @staticmethod - def _get_finding_details( - doc: dict, data_source_findings: dict, entity_type: str, raw_data: dict - ): + def _get_finding_details(doc, data_source_findings, entity_type, raw_data): """ Retrieve finding details from data source """ @@ -362,7 +360,7 @@ def _get_finding_details( else: data_source_findings[label_name]["snippets"] = [] - def _get_data_source_details(self, raw_data: dict) -> list: + def _get_data_source_details(self, raw_data): """ Create data source findings details and data source findings summary """ @@ -407,7 +405,7 @@ def _get_data_source_details(self, raw_data: dict) -> list: return data_source_obj_list @staticmethod - def _create_data_source_findings_summary(data_source_findings: list) -> list: + def _create_data_source_findings_summary(data_source_findings): """ Creating data source findings summary and return it findings summary list """ @@ -432,9 +430,7 @@ def _create_data_source_findings_summary(data_source_findings: list) -> list: return data_source_findings_summary - def _create_report_summary( - self, raw_data: dict, files_with_findings_count: int - ) -> Summary: + def _create_report_summary(self, raw_data, files_with_findings_count): """ Return report summary object """ @@ -451,7 +447,7 @@ def _create_report_summary( ) return report_summary - def _get_load_history(self) -> dict: + def _get_load_history(self): """ Retrieve previous runs details and create load history and return """ @@ -517,7 +513,7 @@ def _get_load_history(self) -> dict: load_history["moreReportsPath"] = more_report_full_path return load_history - def _get_doc_report_metadata(self, doc: dict, raw_data: dict) -> dict: + def _get_doc_report_metadata(self, doc, raw_data): """ Retrieve metadata from the document, update the raw data, and then return the updated raw data. @@ -587,7 +583,7 @@ def _get_doc_report_metadata(self, doc: dict, raw_data: dict) -> dict: ) return raw_data - def _generate_final_report(self, raw_data: dict) -> dict: + def _generate_final_report(self, raw_data): """ Aggregating all input, processing the data, and generating the final report """ @@ -625,7 +621,7 @@ def _generate_final_report(self, raw_data: dict) -> dict: ) return report_dict.model_dump() - def process_docs_and_generate_report(self) -> (dict, dict): + def process_docs_and_generate_report(self): """ Processing the doc and aggregate the report data """ diff --git a/pebblo/app/service/loader/loader_doc_service.py b/pebblo/app/service/loader/loader_doc_service.py index 175a2ac4..76c25f76 100644 --- a/pebblo/app/service/loader/loader_doc_service.py +++ b/pebblo/app/service/loader/loader_doc_service.py @@ -43,6 +43,13 @@ def __init__(self): self.anonymize_snippets = None self.entity_classifier_obj = EntityClassifier() + def _initialize_data(self, data: dict): + self.db = SQLiteClient() + self.data = data + self.app_name = data.get("name") + self._set_classifier_mode(data) + self._set_anonymize_snippets(data) + @staticmethod def _create_return_response(message, output=None, status_code=200): if output is None: @@ -277,12 +284,34 @@ def _get_or_create_data_source(self): logger.debug("Data Source has been created successfully.") return data_source_obj.data + def _set_classifier_mode(self, data: dict): + """ + This function defines the value of the classifier_mode: if it is included in the API request, + it will be used; otherwise, the value will be taken from the config. + """ + if not data.get("classifier_mode"): + self.classifier_mode = config_details.get("classifier", {}).get( + "mode", ClassificationMode.ALL.value + ) + else: + self.classifier_mode = data.get("classifier_mode") + + def _set_anonymize_snippets(self, data: dict): + """ + This function defines the value of the anonymize_snippets: if it is included in the API request, + it will be used; otherwise, the value will be taken from the config. + """ + if not data.get("anonymize_snippets"): + self.anonymize_snippets = config_details.get("classifier", {}).get( + "anonymizeSnippets", False + ) + else: + self.anonymize_snippets = data.get("anonymize_snippets") + @timeit def process_request(self, data): try: - self.db = SQLiteClient() - self.data = data - self.app_name = data.get("name") + self._initialize_data(data) if not self.data.get("classifier_mode"): self.classifier_mode = config_details.get("classifier", {}).get(