Skip to content

Commit

Permalink
adding adapter to use message on model inferencing
Browse files Browse the repository at this point in the history
  • Loading branch information
liamjxu committed Nov 12, 2024
1 parent a18243c commit ca76693
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/helm/benchmark/adaptation/adapter_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

# Adaptation methods
ADAPT_GENERATION: str = "generation"
ADAPT_CHAT: str = "chat"
ADAPT_LANGUAGE_MODELING: str = "language_modeling"
ADAPT_MULTIPLE_CHOICE_JOINT: str = "multiple_choice_joint"
ADAPT_MULTIPLE_CHOICE_JOINT_CHAIN_OF_THOUGHT: str = "multiple_choice_joint_chain_of_thought"
Expand Down
4 changes: 4 additions & 0 deletions src/helm/benchmark/adaptation/adapters/adapter_factory.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from helm.benchmark.adaptation.adapter_spec import (
ADAPT_GENERATION,
ADAPT_CHAT,
ADAPT_GENERATION_MULTIMODAL,
ADAPT_LANGUAGE_MODELING,
ADAPT_MULTIPLE_CHOICE_JOINT,
Expand All @@ -13,6 +14,7 @@
from helm.benchmark.adaptation.adapters.adapter import Adapter
from helm.benchmark.adaptation.adapters.binary_ranking_adapter import BinaryRankingAdapter
from helm.benchmark.adaptation.adapters.generation_adapter import GenerationAdapter
from helm.benchmark.adaptation.adapters.chat_adapter import ChatAdapter
from helm.benchmark.adaptation.adapters.language_modeling_adapter import LanguageModelingAdapter
from helm.benchmark.adaptation.adapters.multimodal.generation_multimodal_adapter import GenerationMultimodalAdapter
from helm.benchmark.adaptation.adapters.multimodal.multiple_choice_joint_multimodal_adapter import (
Expand All @@ -38,6 +40,8 @@ def get_adapter(adapter_spec: AdapterSpec, tokenizer_service: TokenizerService)

if method == ADAPT_GENERATION:
adapter = GenerationAdapter(adapter_spec, tokenizer_service)
if method == ADAPT_CHAT:
adapter = ChatAdapter(adapter_spec, tokenizer_service)
elif method == ADAPT_LANGUAGE_MODELING:
adapter = LanguageModelingAdapter(adapter_spec, tokenizer_service)
elif method == ADAPT_MULTIPLE_CHOICE_JOINT:
Expand Down
49 changes: 49 additions & 0 deletions src/helm/benchmark/adaptation/adapters/chat_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from typing import List

from helm.benchmark.adaptation.prompt import Prompt
from helm.benchmark.adaptation.request_state import RequestState
from helm.benchmark.scenarios.scenario import Instance
from helm.common.request import Request
from helm.benchmark.adaptation.adapters.in_context_learning_adapter import InContextLearningAdapter


class ChatAdapter(InContextLearningAdapter):
"""
Each `Instance` in a `Scenario` has a history of the format:
[
{"role": "user", "content": <user-content>},
{"role": "assistant", "content": <assistant-content>},
{"role": "user", "content": <user-content>},
...
]
"""

def generate_requests(
self, eval_instance: Instance, train_trial_index: int, training_instances: List[Instance]
) -> List[RequestState]:
assert eval_instance.extra_data
request = Request(
model=self.adapter_spec.model,
model_deployment=self.adapter_spec.model_deployment,
messages=eval_instance.extra_data['conversation'],
num_completions=self.adapter_spec.num_outputs,
temperature=self.adapter_spec.temperature,
max_tokens=self.adapter_spec.max_tokens,
stop_sequences=self.adapter_spec.stop_sequences,
random=self.adapter_spec.random,
image_generation_parameters=self.adapter_spec.image_generation_parameters,
)
request_state = RequestState(
instance=eval_instance,
reference_index=None,
request_mode=None,
train_trial_index=train_trial_index,
output_mapping=None,
request=request,
result=None,
num_train_instances=0,
prompt_truncated=False,
)
return [request_state]
6 changes: 3 additions & 3 deletions src/helm/benchmark/annotation/wildbench_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(self, auto_client: AutoClient):
with open("src/helm/benchmark/annotation/wildbench/eval_template.score.v2.md") as f:
self._score_template = f.read()
self._pattern = re.compile(
r'"strengths"\s*:\s*"(.*?)"\s*,\s*"weaknesses"\s*:\s*"(.*?)"\s*,\s*"score"\s*:\s*"(.*?)"', re.DOTALL
r'"strengths"\s*:\s*"(.*?)"\s*,\s*"weaknesses"\s*:\s*"(.*?)"\s*,\s*"score"\s*:\s*(".*?"|\d+)', re.DOTALL
)

def annotate(self, request_state: RequestState) -> Any:
Expand All @@ -40,7 +40,7 @@ def annotate(self, request_state: RequestState) -> Any:
model_deployment="openai/gpt-4o-2024-05-13",
prompt=annotator_prompt,
temperature=0.0,
max_tokens=1000,
max_tokens=2000,
)
annotator_response = self._auto_client.make_request(annotator_request)
if not annotator_response.success:
Expand All @@ -53,7 +53,7 @@ def annotate(self, request_state: RequestState) -> Any:

strengths = annotator_response_parts[1].strip()
weaknesses = annotator_response_parts[2].strip()
score_text = annotator_response_parts[3].strip()
score_text = annotator_response_parts[3].strip().strip('"')
try:
score = float(score_text)
except ValueError:
Expand Down
3 changes: 2 additions & 1 deletion src/helm/benchmark/run_specs/lite_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from helm.benchmark.adaptation.adapter_spec import (
ADAPT_GENERATION,
ADAPT_CHAT,
ADAPT_MULTIPLE_CHOICE_JOINT,
ADAPT_MULTIPLE_CHOICE_JOINT_CHAIN_OF_THOUGHT,
AdapterSpec,
Expand Down Expand Up @@ -448,7 +449,7 @@ def get_wildbench_spec(subset: str) -> RunSpec:
)

adapter_spec = AdapterSpec(
method=ADAPT_GENERATION, input_prefix="", output_prefix="", max_tokens=1000, num_outputs=1, temperature=0.0
method=ADAPT_CHAT, input_prefix="", output_prefix="", max_tokens=1000, num_outputs=1, temperature=0.0
)
annotator_specs = [AnnotatorSpec(class_name="helm.benchmark.annotation.wildbench_annotator.WildBenchAnnotator")]
metric_specs = [MetricSpec(class_name="helm.benchmark.metrics.wildbench_metrics.WildBenchScoreMetric")]
Expand Down
1 change: 1 addition & 0 deletions src/helm/benchmark/scenarios/wildbench_scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def get_instances(self, output_path: str) -> List[Instance]:
model: baseline_outputs[model][idx]["output"][0] for model in REFERENCE_MODELS
},
"history": "\n".join(history_text),
"conversation": row["conversation_input"],
"user_query": user_query,
"checklist": "\n".join(row["checklist"]),
},
Expand Down

0 comments on commit ca76693

Please sign in to comment.