Skip to content

Commit

Permalink
feat: introduce use_reflexion to determine whether to use ReflexionAg…
Browse files Browse the repository at this point in the history
…ent to generate Cypher query (#197)

* fix some bugs in reflexion agent

* fix an vectorstore issue

* introduce use_reflexion to kg RagAgent

* prompt LLM not to generate cypher marker

* fix test error

* change vectorstore collection names to apply previous fix of collection field max length issue

* phrasing, formatting, parameter explanations

* fix type hinting

---------

Co-authored-by: fengsh <[email protected]>
Co-authored-by: slobentanzer <[email protected]>
  • Loading branch information
3 people authored Aug 17, 2024
1 parent 0736e72 commit 9546f27
Show file tree
Hide file tree
Showing 21 changed files with 155 additions and 77 deletions.
2 changes: 1 addition & 1 deletion benchmark/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

from biochatter.prompts import BioCypherPromptEngine
from biochatter.llm_connect import (
AnthropicConversation,
GptConversation,
AnthropicConversation,
XinferenceConversation,
)
from .load_dataset import get_benchmark_dataset
Expand Down
4 changes: 2 additions & 2 deletions benchmark/load_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def _get_private_key_from_env_variable() -> rsa.PrivateKey:
return private_key


def _get_encrypted_test_data() -> Dict[str, Dict[str, str]]:
def _get_encrypted_test_data() -> Dict[str, dict[str, str]]:
"""Get encrypted test data.
currently from manually copied file benchmark/encrypted_llm_test_data.json
TODO: automatically load test dataset (from github releases)?
Expand Down Expand Up @@ -259,7 +259,7 @@ def _decrypt_data(
return decrypted_test_data


def _decrypt(payload: Dict[str, str], private_key: rsa.PrivateKey) -> str:
def _decrypt(payload: dict[str, str], private_key: rsa.PrivateKey) -> str:
"""Decrypt a payload.
Args:
Expand Down
36 changes: 18 additions & 18 deletions benchmark/results/medical_exam_failure_modes.csv
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,7 @@ claude-3-5-sonnet-20240620,dichotomous:cardiology:en,no,yes,Other,b710df83a48c05
claude-3-5-sonnet-20240620,dichotomous:dermatology:de,ja,nein,Other,32cf33685a9e00909d6d16a9b7bde20f,2024-08-13 17:49:53
claude-3-5-sonnet-20240620,dichotomous:dermatology:de,"nein

ein angioödem wird typischerweise durch eine typ 1 überempfindlichkeitsreaktion (soforttyp) nach coombs und gell verursacht, nicht durch eine typ 4 reaktion
ein angioödem wird typischerweise durch eine typ 1 überempfindlichkeitsreaktion (soforttyp) nach coombs und gell verursacht, nicht durch eine typ 4 reaktion

die typ 4 reaktion ist eine verzögerte überempfindlichkeitsreaktion, die erst nach 24-72 stunden auftritt und eher mit kontaktdermatitis oder transplantatabstoßungen in verbindung gebracht wird

Expand All @@ -790,7 +790,7 @@ claude-3-5-sonnet-20240620,dichotomous:dermatology:de,"nein
das nikolski-zeichen ist nicht mit verbrühungen oder verätzungen assoziiert es ist ein klinisches zeichen, das bei bestimmten blasenbildenden hauterkrankungen auftritt, insbesondere bei pemphigus-erkrankungen bei diesem test löst sich die oberste hautschicht ab, wenn man seitlichen druck auf scheinbar gesunde haut ausübt das nikolski-zeichen wird in der dermatologie zur diagnose von autoimmunerkrankungen der haut verwendet, nicht bei thermischen oder chemischen verletzungen",ja,Other,4232ac0da6fca3616b9571eece9664ee,2024-08-13 17:50:33
claude-3-5-sonnet-20240620,dichotomous:dermatology:de,"nein

der satz ist falsch in wirklichkeit hat die melanominzidenz (die häufigkeit von neu auftretenden fällen von hautkrebs) in den letzten jahrzehnten deutlich zugenommen, nicht abgenommen
der satz ist falsch in wirklichkeit hat die melanominzidenz (die häufigkeit von neu auftretenden fällen von hautkrebs) in den letzten jahrzehnten deutlich zugenommen, nicht abgenommen

dies ist ein weltweit beobachteter trend, der auf verschiedene faktoren zurückgeführt wird, darunter:

Expand All @@ -802,7 +802,7 @@ dies ist ein weltweit beobachteter trend, der auf verschiedene faktoren zurückg
die steigende inzidenz des melanoms ist ein wichtiges thema in der öffentlichen gesundheit und der dermatologischen forschung",nein,Partial Match,be5f268836c9cf2fbc17cd0e00c1b63d,2024-08-13 17:50:52
claude-3-5-sonnet-20240620,dichotomous:dermatology:de,"nein

der satz ist falsch koplik-flecken sind nicht charakteristisch für mundsoor, sondern für masern
der satz ist falsch koplik-flecken sind nicht charakteristisch für mundsoor, sondern für masern

bei mundsoor (auch als orale candidiasis bekannt) treten typischerweise weißliche, abwischbare beläge auf der zunge, dem gaumen und der wangenschleimhaut auf diese beläge werden durch eine pilzinfektion, meist durch candida albicans, verursacht

Expand Down Expand Up @@ -859,11 +859,11 @@ therefore, the sentence is correct in stating that immunosuppression is a risk f
claude-3-5-sonnet-20240620,dichotomous:dermatology:en,"yes, the sentence is correct

vemurafenib is indeed used as an effective drug for treating cancers with braf mutations, particularly melanoma it is a braf inhibitor that specifically targets the v600e mutation in the braf gene, which is found in approximately 50% of melanomas and some other cancers vemurafenib works by blocking the activity of the mutated braf protein, which helps to slow down or stop the growth of cancer cells with this specific mutation",yes,Partial Match,8052bd5ead892c81fa9fff9527718b41,2024-08-13 17:54:19
claude-3-5-sonnet-20240620,dichotomous:dermatology:en,"no
claude-3-5-sonnet-20240620,dichotomous:dermatology:en,"no

this statement is incorrect angioedema is not typically associated with a type 4 hypersensitivity reaction according to the coombs and gell classification of hypersensitivity reactions

angioedema is most commonly associated with type i (immediate) hypersensitivity reactions or, in some cases, can be related to bradykinin-mediated mechanisms
angioedema is most commonly associated with type i (immediate) hypersensitivity reactions or, in some cases, can be related to bradykinin-mediated mechanisms

type 4 reactions, also known as delayed-type hypersensitivity reactions, are cell-mediated immune responses that typically occur 24-72 hours after exposure to an antigen these reactions are not associated with the rapid swelling characteristic of angioedema

Expand Down Expand Up @@ -957,7 +957,7 @@ zudem kann die psychische belastung durch die störung zu depressionen und in sc
daher ist es wichtig, die binge-eating-störung als ernsthafte erkrankung zu betrachten, die professionelle behandlung erfordert",nein,Partial Match,bbcaaaecf256ef1b8f392d1a820867ac,2024-08-13 17:41:43
claude-3-5-sonnet-20240620,dichotomous:mental_disorders:en,"no this sentence is incorrect

the description provided in the sentence is more accurately associated with cognitive behavioral therapy (cbt), not gestalt therapy
the description provided in the sentence is more accurately associated with cognitive behavioral therapy (cbt), not gestalt therapy

gestalt therapy is a humanistic and experiential form of psychotherapy that focuses on the individual's present life and challenges rather than exploring past experiences it emphasizes personal responsibility, self-awareness, and the importance of experiencing emotions and sensations in the present moment gestalt therapy does not typically focus on identifying and changing specific thought patterns or behaviors in the way that cbt does

Expand Down Expand Up @@ -1633,7 +1633,7 @@ claude-3-opus-20240229,regex:dermatology:de,"folgende personengruppen sind häuf

- säuglinge und ältere menschen, da ihr immunsystem noch nicht voll entwickelt bzw geschwächt ist

- personen mit geschwächtem immunsystem, zb durch hiv/aids, krebs, organtransplantationen oder immunsuppressive medikamente
- personen mit geschwächtem immunsystem, zb durch hiv/aids, krebs, organtransplantationen oder immunsuppressive medikamente

- diabetiker, besonders wenn der blutzucker schlecht eingestellt ist

Expand Down Expand Up @@ -18138,9 +18138,9 @@ llama-3.1-instruct:70:ggufv2:IQ4_XS,dichotomous:medication:en,yes,no,Other,43fe3
llama-3.1-instruct:70:ggufv2:IQ4_XS,dichotomous:mental_disorders:de,ja,nein,Other,ae14302fc8e54ce6a36cb4561a19744d,2024-08-14 14:57:09
llama-3.1-instruct:70:ggufv2:IQ4_XS,dichotomous:mental_disorders:de,ja,nein,Other,26e8e2a0036b84be9ebf8be32fbd8b26,2024-08-14 14:57:30
llama-3.1-instruct:70:ggufv2:IQ4_XS,dichotomous:mental_disorders:en,yes,no,Other,b2ab8ea86e24e799101e3d4bc8864fed,2024-08-14 15:02:54
llama-3.1-instruct:70:ggufv2:IQ4_XS,dichotomous:mental_disorders:en,"no
llama-3.1-instruct:70:ggufv2:IQ4_XS,dichotomous:mental_disorders:en,"no

the correct sentence should be:
the correct sentence should be:

in psychotherapy for generalized anxiety disorder, learning avoidance strategies to get rid of worries is not a common technique

Expand Down Expand Up @@ -18389,7 +18389,7 @@ llama-3.1-instruct:70:ggufv2:Q3_K_S,dichotomous:mental_disorders:de,ja,nein,Othe
llama-3.1-instruct:70:ggufv2:Q3_K_S,dichotomous:mental_disorders:en,yes,no,Other,b2ab8ea86e24e799101e3d4bc8864fed,2024-08-14 17:19:11
llama-3.1-instruct:70:ggufv2:Q3_K_S,dichotomous:mental_disorders:en,"no

the correct sentence should be:
the correct sentence should be:

in psychotherapy for generalized anxiety disorder, learning avoidance strategies to get rid of worries is not a common technique

Expand Down Expand Up @@ -18601,10 +18601,10 @@ llama-3.1-instruct:8:ggufv2:IQ4_XS,dichotomous:dermatology:de,nein,ja,Other,a0ba
llama-3.1-instruct:8:ggufv2:IQ4_XS,dichotomous:dermatology:de,ja,nein,Other,8769a0f64879cab927a79ff77d52423b,2024-08-13 20:09:06
llama-3.1-instruct:8:ggufv2:IQ4_XS,dichotomous:dermatology:de,nein,ja,Other,4232ac0da6fca3616b9571eece9664ee,2024-08-13 20:09:07
llama-3.1-instruct:8:ggufv2:IQ4_XS,dichotomous:dermatology:en,yes,no,Other,3f185e0a1089b6484420e1c6fdffc1a4,2024-08-13 20:10:40
llama-3.1-instruct:8:ggufv2:IQ4_XS,dichotomous:dermatology:en,"no
llama-3.1-instruct:8:ggufv2:IQ4_XS,dichotomous:dermatology:en,"no

the sentence is incorrect because it implies that finger amputation is the only treatment option for a moderately differentiated squamous cell carcinoma with a tumour thickness of 6 mm in reality, the treatment for a moderately differentiated squamous cell carcinoma with a tumour thickness of 6 mm may include other options such as mohs surgery, wide local excision, or a combination of treatments, depending on the specific case and the patient's overall health",no,Partial Match,a2bccd05f3ed202ab060cfd604fcd598,2024-08-13 20:10:49
llama-3.1-instruct:8:ggufv2:IQ4_XS,dichotomous:dermatology:en,"no
llama-3.1-instruct:8:ggufv2:IQ4_XS,dichotomous:dermatology:en,"no

angioedema is typically associated with type i hypersensitivity reactions, not type iv type iv reactions are cell-mediated hypersensitivity reactions, which are not typically associated with angioedema",no,Partial Match,6a661c4f100e7fd238f836656d35c3ed,2024-08-13 20:10:56
llama-3.1-instruct:8:ggufv2:IQ4_XS,dichotomous:dermatology:en,yes,no,Other,531580f330df86b7a55dfb2dd6d2a8d3,2024-08-13 20:10:57
Expand Down Expand Up @@ -18884,7 +18884,7 @@ llama-3.1-instruct:8:ggufv2:Q3_K_L,dichotomous:dermatology:de,ja,nein,Other,32cf
llama-3.1-instruct:8:ggufv2:Q3_K_L,dichotomous:dermatology:de,nein,ja,Other,a0ba6e4c4785ff461433b4cbb51326d2,2024-08-13 21:03:16
llama-3.1-instruct:8:ggufv2:Q3_K_L,dichotomous:dermatology:de,ja,nein,Other,8769a0f64879cab927a79ff77d52423b,2024-08-13 21:03:17
llama-3.1-instruct:8:ggufv2:Q3_K_L,dichotomous:dermatology:en,yes,no,Other,3f185e0a1089b6484420e1c6fdffc1a4,2024-08-13 21:04:42
llama-3.1-instruct:8:ggufv2:Q3_K_L,dichotomous:dermatology:en,"no
llama-3.1-instruct:8:ggufv2:Q3_K_L,dichotomous:dermatology:en,"no

angioedema is typically associated with type i hypersensitivity reactions, not type iv type iv reactions are typically associated with delayed hypersensitivity reactions, such as contact dermatitis or tuberculin skin test reactions",no,Partial Match,6a661c4f100e7fd238f836656d35c3ed,2024-08-13 21:04:50
llama-3.1-instruct:8:ggufv2:Q3_K_L,dichotomous:emergency:en,no,yes,Other,17e0f19ccbbcf0bfba6f30d8ce573421,2024-08-13 21:08:55
Expand Down Expand Up @@ -19149,7 +19149,7 @@ llama-3.1-instruct:8:ggufv2:Q4_K_M,dichotomous:mental_disorders:en,"no
the sentence should be: ""in psychotherapy for generalised anxiety disorder, learning avoidance strategies to manage worries is a common technique""

the word ""get rid of"" is not a precise or accurate way to describe the goal of learning avoidance strategies in therapy for anxiety the goal is to manage or cope with worries, not to eliminate them entirely",no,Partial Match,25db6983321e301a46af5ef5ab4f0977,2024-08-13 19:18:18
llama-3.1-instruct:8:ggufv2:Q4_K_M,dichotomous:mental_disorders:en,"no
llama-3.1-instruct:8:ggufv2:Q4_K_M,dichotomous:mental_disorders:en,"no

people with generalised anxiety disorder usually consider their worries to be overwhelming and persistent, not pointless they often feel a need to constantly worry and may find it difficult to control their anxiety, rather than wanting to get rid of their worries completely",yes,Other,54a4309bad8924868e0bcef97f329c33,2024-08-13 19:18:24
llama-3.1-instruct:8:ggufv2:Q4_K_M,dichotomous:mental_disorders:en,yes,no,Other,87eef24cb37bc2b5a1f895615087f06c,2024-08-13 19:18:24
Expand Down Expand Up @@ -19441,7 +19441,7 @@ llama-3.1-instruct:8:ggufv2:Q5_K_M,dichotomous:dermatology:en,yes,no,Other,3f185
llama-3.1-instruct:8:ggufv2:Q5_K_M,dichotomous:dermatology:en,"no

the sentence is incorrect because it implies that finger amputation is the only treatment option for a moderately differentiated squamous cell carcinoma with a tumour thickness of 6 mm in reality, the treatment for squamous cell carcinoma depends on various factors, including the size and location of the tumor, the depth of invasion, and the patient's overall health for a tumor with a thickness of 6 mm, other treatment options such as mohs surgery, wide local excision, or a combination of surgery and radiation therapy may also be considered, depending on the specific circumstances",no,Partial Match,a2bccd05f3ed202ab060cfd604fcd598,2024-08-13 22:04:07
llama-3.1-instruct:8:ggufv2:Q5_K_M,dichotomous:dermatology:en,"no
llama-3.1-instruct:8:ggufv2:Q5_K_M,dichotomous:dermatology:en,"no

angioedema is typically associated with type 1 or 3 reactions according to the coombs classification, not type 4",no,Partial Match,6a661c4f100e7fd238f836656d35c3ed,2024-08-13 22:04:13
llama-3.1-instruct:8:ggufv2:Q5_K_M,dichotomous:emergency:en,no,yes,Other,129f8c9dfddfa91be6952a03066a6fca,2024-08-13 22:08:50
Expand Down Expand Up @@ -19729,10 +19729,10 @@ llama-3.1-instruct:8:ggufv2:Q6_K,dichotomous:dermatology:de,nein,ja,Other,a0ba6e
llama-3.1-instruct:8:ggufv2:Q6_K,dichotomous:dermatology:de,ja,nein,Other,8769a0f64879cab927a79ff77d52423b,2024-08-13 22:57:31
llama-3.1-instruct:8:ggufv2:Q6_K,dichotomous:dermatology:de,nein,ja,Other,4232ac0da6fca3616b9571eece9664ee,2024-08-13 22:57:31
llama-3.1-instruct:8:ggufv2:Q6_K,dichotomous:dermatology:en,yes,no,Other,3f185e0a1089b6484420e1c6fdffc1a4,2024-08-13 22:59:28
llama-3.1-instruct:8:ggufv2:Q6_K,dichotomous:dermatology:en,"no
llama-3.1-instruct:8:ggufv2:Q6_K,dichotomous:dermatology:en,"no

the sentence is incorrect because it implies that finger amputation is the only treatment option for a moderately differentiated squamous cell carcinoma with a tumour thickness of 6 mm in reality, the treatment for squamous cell carcinoma depends on various factors, including the size and location of the tumor, the patient's overall health, and the stage of the disease treatment options may include surgery, radiation therapy, or a combination of both, and may not necessarily involve amputation",no,Partial Match,a2bccd05f3ed202ab060cfd604fcd598,2024-08-13 22:59:42
llama-3.1-instruct:8:ggufv2:Q6_K,dichotomous:dermatology:en,"no
llama-3.1-instruct:8:ggufv2:Q6_K,dichotomous:dermatology:en,"no

angioedema is typically associated with type 1 or 2 reactions according to the coombs classification, not type 4 type 4 reactions are more commonly associated with delayed hypersensitivity reactions, such as contact dermatitis or tuberculin reactions",no,Partial Match,6a661c4f100e7fd238f836656d35c3ed,2024-08-13 22:59:51
llama-3.1-instruct:8:ggufv2:Q6_K,dichotomous:emergency:en,no,yes,Other,129f8c9dfddfa91be6952a03066a6fca,2024-08-13 23:03:57
Expand Down Expand Up @@ -20001,7 +20001,7 @@ llama-3.1-instruct:8:ggufv2:Q8_0,dichotomous:dermatology:de,nein,ja,Other,a0ba6e
llama-3.1-instruct:8:ggufv2:Q8_0,dichotomous:dermatology:de,ja,nein,Other,8769a0f64879cab927a79ff77d52423b,2024-08-14 00:10:45
llama-3.1-instruct:8:ggufv2:Q8_0,dichotomous:dermatology:de,nein,ja,Other,4232ac0da6fca3616b9571eece9664ee,2024-08-14 00:10:46
llama-3.1-instruct:8:ggufv2:Q8_0,dichotomous:dermatology:en,yes,no,Other,3f185e0a1089b6484420e1c6fdffc1a4,2024-08-14 00:12:32
llama-3.1-instruct:8:ggufv2:Q8_0,dichotomous:dermatology:en,"no
llama-3.1-instruct:8:ggufv2:Q8_0,dichotomous:dermatology:en,"no

the treatment options for a moderately differentiated squamous cell carcinoma on the back of the finger with a tumour thickness of 6 mm may include surgery, such as mohs surgery or wide local excision, radiation therapy, or a combination of these, depending on the specific characteristics of the tumor and the patient's overall health finger amputation is not typically the only treatment option",no,Partial Match,a2bccd05f3ed202ab060cfd604fcd598,2024-08-14 00:12:41
llama-3.1-instruct:8:ggufv2:Q8_0,dichotomous:emergency:en,no,yes,Other,129f8c9dfddfa91be6952a03066a6fca,2024-08-14 00:16:11
Expand Down
31 changes: 23 additions & 8 deletions biochatter/database_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def __init__(
connection_args: dict,
schema_config_or_info_dict: dict,
conversation_factory: Callable,
use_reflexion: bool,
) -> None:
"""
Create a DatabaseAgent analogous to the VectorDatabaseAgentMilvus class,
Expand All @@ -28,6 +29,9 @@ def __init__(
conversation_factory (Callable): A function to create a conversation
for creating the KG query.
use_reflexion (bool): Whether to use the ReflexionAgent to generate
the query.
"""
self.conversation_factory = conversation_factory
self.prompt_engine = BioCypherPromptEngine(
Expand All @@ -37,6 +41,7 @@ def __init__(
)
self.connection_args = connection_args
self.driver = None
self.use_reflexion = use_reflexion

def connect(self) -> None:
"""
Expand All @@ -58,13 +63,23 @@ def is_connected(self) -> bool:
return not self.driver is None

def _generate_query(self, query: str):
agent = KGQueryReflexionAgent(
self.conversation_factory,
self.connection_args,
)
query_prompt = self.prompt_engine.generate_query_prompt(query)
agent_result = agent.execute(query, query_prompt)
return agent_result.answer, agent_result.tool_result
if self.use_reflexion:
agent = KGQueryReflexionAgent(
self.conversation_factory,
self.connection_args,
)
query_prompt = self.prompt_engine.generate_query_prompt(query)
agent_result = agent.execute(query, query_prompt)
tool_result = (
[agent_result.tool_result]
if agent_result.tool_result is not None
else None
)
return agent_result.answer, tool_result
else:
query = self.prompt_engine.generate_query(query)
results = self.driver.query(query=query)
return query, results

def get_query_results(self, query: str, k: int = 3) -> list[Document]:
"""
Expand All @@ -90,7 +105,7 @@ def get_query_results(self, query: str, k: int = 3) -> list[Document]:
if tool_result is not None:
# If _generate_query() already returned tool_result, we won't connect
# to graph database to query result any more
results = [tool_result]
results = tool_result
else:
results = self.driver.query(query=cypher_query)

Expand Down
4 changes: 2 additions & 2 deletions biochatter/kg_langgraph_agent.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Optional, Dict
from typing import Any, Optional
from datetime import datetime
from collections.abc import Callable
import json
Expand Down Expand Up @@ -94,7 +94,7 @@ class KGQueryReflexionAgent(ReflexionAgent):
def __init__(
self,
conversation_factory: Callable,
connection_args: Dict[str, str],
connection_args: dict[str, str],
query_lang: Optional[str] = "Cypher",
max_steps: Optional[int] = 20,
):
Expand Down
4 changes: 2 additions & 2 deletions biochatter/langgraph_agent_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Any, Union, Literal, Optional, List, Dict
from typing import Any, List, Union, Literal, Optional
from datetime import datetime
from collections.abc import Callable
import json
Expand Down Expand Up @@ -37,7 +37,7 @@ def log_step_message(
"""
pass

def log_final_result(self, final_result: Dict[str, Any]) -> None:
def log_final_result(self, final_result: dict[str, Any]) -> None:
"""
log final result
Args:
Expand Down
Loading

0 comments on commit 9546f27

Please sign in to comment.