Skip to content

Commit

Permalink
fix ColossalEval (hpcaitech#4992)
Browse files Browse the repository at this point in the history
Co-authored-by: Xu Yuanchen <[email protected]>
  • Loading branch information
2 people authored and flybird11111 committed Nov 9, 2023
1 parent 62eb99f commit fa1cbd3
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ def _calculate_label_metrics(self, metric: str, category: str):
sample["output"], ref, all_classes=self.data[category]["inference_kwargs"]["all_classes"]
),
)

score = max(
score,
metric_helper.accuracy_by_options(sample["input"], sample["output"], ref),
)
softmaxs.append(references[i] if score == 1 else -1)
else:
softmaxs.append(np.argmax(np.array(list(sample["softmax_over_choices"].values()))))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,20 @@ def multi_choice_accuracy(prediction, reference, **kwargs):
return score


def accuracy_by_options(question, prediction, reference):
pattern = r"[A-Z]\. [^\n]+"
options = re.findall(pattern, question)
answer = prediction.split("\n\n")[0]

for option in options:
choice, content = option.split(". ", 1)

if choice == reference and content == answer:
return 1

return 0


def combined_single_choice_accuracy(prediction, reference, **kwargs):
return single_choice_accuracy(prediction, reference, **kwargs)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def _load_tokenizer(self, path: str, tokenizer_path: Optional[str], tokenizer_kw
self.logger.warning("pad_token_id is not set for the tokenizer. " "Using eos_token_id as pad_token_id.")
if self.tokenizer.eos_token:
self.tokenizer.pad_token = self.tokenizer.eos_token
elif self.tokenizer.eod_id:
elif hasattr(self.tokenizer, "eod_id"):
# Qwen has an eod token "<|endoftext|>".
self.tokenizer.pad_token_id = self.tokenizer.eod_id

Expand Down

0 comments on commit fa1cbd3

Please sign in to comment.