Skip to content

Commit

Permalink
Add genai task type
Browse files Browse the repository at this point in the history
Signed-off-by: Kartik Choudhary <[email protected]>
  • Loading branch information
kartik727 authored and imatiach-msft committed Jan 23, 2024
1 parent 6f14f90 commit 99cc0fe
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 2 deletions.
7 changes: 7 additions & 0 deletions responsibleai_text/responsibleai_text/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ class ModelTask(str, Enum):
QUESTION_ANSWERING = 'question_answering'
ENTAILMENT = 'entailment'
SUMMARIZATIONS = 'summarizations'
GENERATIVE_TEXT = 'generative_text'
GENERATIVE_TEXT_CHAT = 'generative_text_chat'
UNKNOWN = 'unknown'


Expand All @@ -34,3 +36,8 @@ class QuestionAnsweringFields(object):
QUESTION = "question"
CONTEXT = "context"
ANSWERS = "answers"

class GenerativeTextFields(object):
PROMPT = "prompt"
SYS_PROMPT = "sys_prompt"
RESPONSE = "response"
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ def __init__(self, model, dataset, is_multilabel, task_type, classes=None):
self.predictions = self.model.predict(
self.dataset.loc[:, ['context', 'questions']])
self.predictions = np.array(self.predictions)
elif self.task_type == ModelTask.GENERATIVE_TEXT:
# FIXME: Copying from QUESTION_ANSWERING for now
self.predictions = self.model.predict(
self.dataset.loc[:, ['context', 'questions']])
self.predictions = np.array(self.predictions)
else:
raise ValueError("Unknown task type: {}".format(self.task_type))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ def compute(self):
eval_examples.append(question + SEP + context)
self._explanation = [explainer_start(eval_examples),
explainer_end(eval_examples)]
elif self._task_type == ModelTask.GENERATIVE_TEXT:
raise NotImplementedError('Generative text is not supported yet')
else:
raise ValueError("Unknown task type: {}".format(self._task_type))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ def _add_extra_metadata_features(task_type, feature_metadata):
if is_cat_empty:
feature_metadata.categorical_features = []
feature_metadata.categorical_features.append(_QUESTION_TYPE)
if task_type == ModelTask.GENERATIVE_TEXT:
# TODO: Make this configurable
feature_metadata.prompt_col = 'questions'
feature_metadata.context_col = 'context'
return feature_metadata


Expand Down Expand Up @@ -269,7 +273,9 @@ def _validate_model(self, model: Any, test: pd.DataFrame,
target_column, axis=1)
small_test_data = get_text_columns(small_test_data, text_column)
small_test_data = small_test_data.iloc[0]
if task_type != ModelTask.QUESTION_ANSWERING:
if task_type not in [
ModelTask.QUESTION_ANSWERING,
ModelTask.GENERATIVE_TEXT]:
small_test_data = small_test_data.tolist()
# Call the model
try:
Expand Down Expand Up @@ -319,7 +325,8 @@ def _validate_rai_insights_input_parameters(
ModelTask.SENTIMENT_ANALYSIS.value,
ModelTask.QUESTION_ANSWERING.value,
ModelTask.ENTAILMENT.value,
ModelTask.SUMMARIZATIONS.value
ModelTask.SUMMARIZATIONS.value,
ModelTask.GENERATIVE_TEXT.value,
]

if task_type not in valid_tasks:
Expand Down Expand Up @@ -514,6 +521,8 @@ def _get_test_text_data(self, is_classification_task):
dataset = self.test.drop(target_column, axis=1)
elif self.task_type == ModelTask.QUESTION_ANSWERING:
dataset = self.test.drop([self.target_column], axis=1)
elif self.task_type == ModelTask.GENERATIVE_TEXT:
dataset = self.test.drop([self.target_column], axis=1)
else:
raise ValueError("Unknown task type: {}".format(self.task_type))
dataset = get_text_columns(dataset, self._text_column)
Expand Down Expand Up @@ -592,6 +601,18 @@ def _get_dataset(self):
"Model predict_proba output of unsupported type,") from ex
dashboard_dataset.probability_y = probability_y

# add prompt and (optionally) context to dataset
# for generative text tasks
if self.task_type == ModelTask.GENERATIVE_TEXT:
prompt = self.test[self._feature_metadata.prompt_col]
context = self.test.get(self._feature_metadata.context_col)

dashboard_dataset.prompt = convert_to_list(prompt)
if context is None:
dashboard_dataset.context = None
else:
dashboard_dataset.context = convert_to_list(context)

return dashboard_dataset

def _convert_labels(self, labels, class_names, unique_labels=None):
Expand Down Expand Up @@ -811,6 +832,8 @@ def compute_question_answering_metrics(
selection_indexes,
question_answering_cache
):
# return self.compute_genai_metrics(selection_indexes, question_answering_cache)
print('compute_question_answering_metrics')
dashboard_dataset = self.get_data().dataset
true_y = dashboard_dataset.true_y
predicted_y = dashboard_dataset.predicted_y
Expand Down Expand Up @@ -853,3 +876,93 @@ def compute_question_answering_metrics(
except ValueError:
all_cohort_metrics.append([0, 0, 0, 0, 0, 0])
return all_cohort_metrics

def compute_genai_metrics(
self,
selection_indexes,
question_answering_cache
):
print('compute_genai_metrics')
curr_file_dir = Path(__file__).resolve().parent

dashboard_dataset = self.get_data().dataset
true_y = dashboard_dataset.true_y
predicted_y = dashboard_dataset.predicted_y

eval_model = self.temp_eval_model
questions = self.temp_questions
context = self.temp_context

all_cohort_metrics = []
for cohort_indices in selection_indexes:
print('cohort metrics')
true_y_cohort = [true_y[cohort_index] for cohort_index
in cohort_indices]
predicted_y_cohort = [predicted_y[cohort_index] for cohort_index
in cohort_indices]
questions_cohort = [questions[cohort_index] for cohort_index
in cohort_indices]
context_cohort = [context[cohort_index] for cohort_index
in cohort_indices]
try:
print('exact match')
exact_match = evaluate.load('exact_match')
exact_match_results = exact_match.compute(
predictions=predicted_y_cohort, references=true_y_cohort)

print('coherence')
coherence = evaluate.load(
str(curr_file_dir.joinpath('metrics/coherence.py')))
coherence_results = coherence.compute(
predictions=predicted_y_cohort,
references=questions_cohort,
wrapper_model=eval_model)
# coherence_results = {'scores' : [3.4]}

print('equivalence')
equivalence = evaluate.load(
str(curr_file_dir.joinpath('metrics/equivalence.py')))
equivalence_results = equivalence.compute(
predictions=predicted_y_cohort,
references=questions_cohort,
answers=true_y_cohort,
wrapper_model=eval_model)

print('fluency')
fluency = evaluate.load(
str(curr_file_dir.joinpath('metrics/fluency.py')))
fluency_results = fluency.compute(
predictions=predicted_y_cohort,
references=questions_cohort,
wrapper_model=eval_model)

print('groundedness')
# groundedness = evaluate.load(
# str(curr_file_dir.joinpath('metrics/groundedness.py')))
# groundedness_results = groundedness.compute(
# predictions=predicted_y_cohort,
# references=context_cohort,
# wrapper_model=eval_model)
groundedness_results = {'scores' : [3.4]}

print('relevance')
# relevance = evaluate.load(
# str(curr_file_dir.joinpath('metrics/relevance.py')))
# relevance_results = relevance.compute(
# predictions=predicted_y_cohort,
# references=context_cohort,
# questions=questions_cohort,
# wrapper_model=eval_model)
relevance_results = {'scores' : [3.5]}

all_cohort_metrics.append([
exact_match_results['exact_match'],
np.mean(coherence_results['scores']),
np.mean(equivalence_results['scores']),
np.mean(fluency_results['scores']),
np.mean(groundedness_results['scores']),
np.mean(relevance_results['scores'])])
except ValueError:
all_cohort_metrics.append([0, 0, 0, 0, 0, 0])
print('all done')
return all_cohort_metrics
16 changes: 16 additions & 0 deletions responsibleai_text/responsibleai_text/utils/feature_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,19 @@ def extract_features(text_dataset: pd.DataFrame,
feature_names.append(prefix + "maximum_parse_tree_depth")
feature_names.append("question_type")
feature_names.append("context_overlap")
elif task_type == ModelTask.GENERATIVE_TEXT:
# TODO: Add feature names for generative text
start_meta_index += 1
feature_names = []
prefixes = [QuestionAnsweringFields.CONTEXT + "_",
QuestionAnsweringFields.QUESTION + "_"]
for prefix in prefixes:
for feature_name in base_feature_names:
feature_names.append(prefix + feature_name)
feature_names.append(prefix + "average_parse_tree_depth")
feature_names.append(prefix + "maximum_parse_tree_depth")
feature_names.append("question_type")
feature_names.append("context_overlap")
else:
raise ValueError("Unknown task type: {}".format(task_type))
# copy over the metadata column names
Expand Down Expand Up @@ -101,6 +114,9 @@ def extract_features(text_dataset: pd.DataFrame,
extracted_features, has_dropped_features,
dropped_features, column_names)
results.append(extracted_features)
elif task_type == ModelTask.GENERATIVE_TEXT:
# TODO: Add feature extraction for generative text
pass
else:
raise ValueError("Unknown task type: {}".format(task_type))
return results, feature_names
Expand Down

0 comments on commit 99cc0fe

Please sign in to comment.