From 2899deafeffb4a15098a1daa695124e7b6e4eb90 Mon Sep 17 00:00:00 2001 From: Kartik Choudhary Date: Sun, 15 Oct 2023 18:53:17 -0400 Subject: [PATCH 1/6] Added info about required packages --- ...d-question-answering-model-debugging.ipynb | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb b/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb index 3b663cfc61..4af484b9f1 100644 --- a/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb +++ b/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb @@ -42,6 +42,31 @@ "The following section examines the code necessary to create datasets and a model. It then generates insights using the `responsibleai` API that can be visually analyzed." ] }, +{ + "cell_type": "markdown", + "id": "6174bcad", + "metadata": {}, + "source": [ + "### Prepare\n", + "\n", + "To run this notebook, we need to install the following packages:\n", + "\n", + "```requirements.txt\n", + "raiutils\n", + "raiwidgets\n", + "datasets\n", + "transformers\n", + "responsibleai_text\n", + "torch\n", + "```\n", + "\n", + "Run the following command to load the spacy pipeline:\n", + "\n", + "```bash\n", + "python -m spacy download en_core_web_sm\n", + "```" + ] + }, { "cell_type": "markdown", "id": "40739025", From 80b0c3454bb8b34998da9e0bdb71af3c079bf829 Mon Sep 17 00:00:00 2001 From: Kartik Choudhary Date: Sun, 15 Oct 2023 18:53:55 -0400 Subject: [PATCH 2/6] Update responsibleaidashboard-question-answering-model-debugging.ipynb --- ...ponsibleaidashboard-question-answering-model-debugging.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb b/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb index 4af484b9f1..d804c9bed8 100644 --- a/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb +++ b/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb @@ -51,7 +51,7 @@ "\n", "To run this notebook, we need to install the following packages:\n", "\n", - "```requirements.txt\n", + "```\n", "raiutils\n", "raiwidgets\n", "datasets\n", From af0099398be0a75b2588a219a79db69057c63bf2 Mon Sep 17 00:00:00 2001 From: Kartik Choudhary Date: Sun, 15 Oct 2023 19:00:01 -0400 Subject: [PATCH 3/6] show example prediction --- ...d-question-answering-model-debugging.ipynb | 57 +++++++++++-------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb b/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb index d804c9bed8..2d8de2ffcd 100644 --- a/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb +++ b/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb @@ -111,16 +111,7 @@ "metadata": {}, "outputs": [], "source": [ - "dataset = datasets.load_dataset(\"squad\", split=\"train\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a0eef443", - "metadata": {}, - "outputs": [], - "source": [ + "dataset = datasets.load_dataset(\"squad\", split=\"train\")\n", "dataset" ] }, @@ -155,17 +146,9 @@ "metadata": {}, "outputs": [], "source": [ - "data = pd.DataFrame({'context': context, 'questions': questions, 'answers': answers})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e6f87e9c", - "metadata": {}, - "outputs": [], - "source": [ - "data" + "data = pd.DataFrame({'context': context, 'questions': questions, 'answers': answers})\n", + "data = data.sample(frac=1.0, random_state=42).reset_index(drop=True)\n", + "data.head()" ] }, { @@ -184,18 +167,42 @@ "outputs": [], "source": [ "# load the question-answering model\n", - "pmodel = pipeline('question-answering')" + "pipeline_model = pipeline('question-answering')\n", + "test_size = 5\n", + "\n", + "train_data = data\n", + "test_data = data[:test_size]" + ] + }, + { + "cell_type": "markdown", + "id": "7cf8327b", + "metadata": {}, + "source": [ + "See an example of the model's predictions" ] }, { "cell_type": "code", "execution_count": null, - "id": "04801887", + "id": "ce087699", "metadata": {}, "outputs": [], "source": [ - "train_data = data\n", - "test_data = data[:5]" + "def get_answer(dataset, idx):\n", + " model_output = pipeline_model(question=dataset['questions'][idx], \n", + " context=dataset['context'][idx])\n", + " pred = model_output['answer']\n", + " return pred\n", + "\n", + "def check_answer(dataset, idx):\n", + " pred = get_answer(dataset, idx)\n", + " print('Question : ', dataset['questions'][idx])\n", + " print('Answer : ', dataset['answers'][idx])\n", + " print('Predicted : ', pred)\n", + " print('Correct : ', pred == dataset['answers'][idx])\n", + "\n", + "check_answer(test_data, 0)\n" ] }, { From 6c19f0fd3187d18631324a699d6ef9ecb24e9916 Mon Sep 17 00:00:00 2001 From: Kartik Choudhary Date: Sun, 15 Oct 2023 19:01:09 -0400 Subject: [PATCH 4/6] Update responsibleaidashboard-question-answering-model-debugging.ipynb --- ...onsibleaidashboard-question-answering-model-debugging.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb b/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb index 2d8de2ffcd..dbcb6b8dc9 100644 --- a/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb +++ b/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb @@ -241,8 +241,7 @@ "metadata": {}, "outputs": [], "source": [ - "rai_insights = RAITextInsights(pmodel, test_data,\n", - " \"answers\",\n", + "rai_insights = RAITextInsights(pipeline_model, test_data, \"answers\",\n", " task_type=ModelTask.QUESTION_ANSWERING)" ] }, From b2e954014ca5f412a89087aa5bc5585bd3bd9c0b Mon Sep 17 00:00:00 2001 From: Kartik Choudhary Date: Tue, 30 Jan 2024 10:49:08 -0500 Subject: [PATCH 5/6] add helper method for genai metrics Signed-off-by: Kartik Choudhary --- .../utils/genai_metrics/metrics.py | 14 +++ .../tests/test_genai_metrics.py | 94 ++++++++----------- 2 files changed, 51 insertions(+), 57 deletions(-) diff --git a/responsibleai_text/responsibleai_text/utils/genai_metrics/metrics.py b/responsibleai_text/responsibleai_text/utils/genai_metrics/metrics.py index e0e4934d76..784c38a7c1 100644 --- a/responsibleai_text/responsibleai_text/utils/genai_metrics/metrics.py +++ b/responsibleai_text/responsibleai_text/utils/genai_metrics/metrics.py @@ -3,6 +3,7 @@ """Compute AI-assisted metrics for generative text models.""" +import numpy as np import logging from pathlib import Path @@ -30,3 +31,16 @@ def get_genai_metric(metric_name, **metric_kwargs): metric = evaluate.load( str(curr_file_dir.joinpath(f'scripts/{metric_name}.py'))) return metric.compute(**metric_kwargs) + + +def get_genai_metric_mean(metric_name, **metric_kwargs): + """Get the mean of the metric from the genai library. + + :param metric_name: The name of the metric. + :type metric_name: str + :param metric_kwargs: The keyword arguments to pass to the metric. + :type metric_kwargs: dict + :return: The mean of the metric. + :rtype: float + """ + return np.mean(get_genai_metric(metric_name, **metric_kwargs)['scores']) diff --git a/responsibleai_text/tests/test_genai_metrics.py b/responsibleai_text/tests/test_genai_metrics.py index 5285d6c623..8cf530e5ad 100644 --- a/responsibleai_text/tests/test_genai_metrics.py +++ b/responsibleai_text/tests/test_genai_metrics.py @@ -1,7 +1,8 @@ # Copyright (c) Microsoft Corporation # Licensed under the MIT License. -from responsibleai_text.utils.genai_metrics.metrics import get_genai_metric +from responsibleai_text.utils.genai_metrics.metrics import ( + get_genai_metric, get_genai_metric_mean) PREDICTIONS = ['This is a prediction'] REFERENCES = ['This is a reference'] @@ -15,69 +16,48 @@ def predict(self, inp): class TestGenAIMetrics: - def test_coherence(self): - metric = get_genai_metric('coherence', - predictions=PREDICTIONS, - references=REFERENCES, + def assert_metrics(self, metric_name, + expected, input_len, + **metric_kwargs): + metric = get_genai_metric(metric_name, **metric_kwargs, wrapper_model=DummyModelWrapper()) - assert metric['scores'] == [1] + assert metric['scores'] == [expected] - metric = get_genai_metric('coherence', - predictions=PREDICTIONS * 5, - references=REFERENCES * 5, - wrapper_model=DummyModelWrapper()) - assert metric['scores'] == [1] * 5 + metric_mean = get_genai_metric_mean(metric_name, **metric_kwargs, + wrapper_model=DummyModelWrapper()) + assert metric_mean == expected - def test_equivalence(self): - metric = get_genai_metric('equivalence', - predictions=PREDICTIONS, - references=REFERENCES, - answers=ANSWERS, - wrapper_model=DummyModelWrapper()) - assert metric['scores'] == [1] + kwargs_multi = {k: v * input_len for k, v in metric_kwargs.items()} + metric_multi = get_genai_metric(metric_name, **kwargs_multi, + wrapper_model=DummyModelWrapper()) + assert metric_multi['scores'] == [expected] * input_len - metric = get_genai_metric('equivalence', - predictions=PREDICTIONS * 5, - references=REFERENCES * 5, - answers=ANSWERS * 5, - wrapper_model=DummyModelWrapper()) - assert metric['scores'] == [1] * 5 + metric_mean_multi = get_genai_metric_mean( + metric_name, **kwargs_multi, wrapper_model=DummyModelWrapper()) + assert metric_mean_multi == expected - def test_fluency(self): - metric = get_genai_metric('fluency', - predictions=PREDICTIONS, - references=REFERENCES, - wrapper_model=DummyModelWrapper()) - assert metric['scores'] == [1] + def test_coherence(self): + self.assert_metrics('coherence', 1, 5, + predictions=PREDICTIONS, + references=REFERENCES) - metric = get_genai_metric('fluency', - predictions=PREDICTIONS * 5, - references=REFERENCES * 5, - wrapper_model=DummyModelWrapper()) - assert metric['scores'] == [1] * 5 + def test_equivalence(self): + self.assert_metrics('equivalence', 1, 5, + predictions=PREDICTIONS, + references=REFERENCES, + answers=ANSWERS) - def test_groundedness(self): - metric = get_genai_metric('groundedness', - predictions=PREDICTIONS, - references=REFERENCES, - wrapper_model=DummyModelWrapper()) - assert metric['scores'] == [1] + def test_fluency(self): + self.assert_metrics('fluency', 1, 5, + predictions=PREDICTIONS, + references=REFERENCES) - metric = get_genai_metric('groundedness', - predictions=PREDICTIONS * 5, - references=REFERENCES * 5, - wrapper_model=DummyModelWrapper()) - assert metric['scores'] == [1] * 5 + def test_groundedness(self): + self.assert_metrics('groundedness', 1, 5, + predictions=PREDICTIONS, + references=REFERENCES) def test_relevance(self): - metric = get_genai_metric('relevance', - predictions=PREDICTIONS, - references=REFERENCES, - wrapper_model=DummyModelWrapper()) - assert metric['scores'] == [1] - - metric = get_genai_metric('relevance', - predictions=PREDICTIONS * 5, - references=REFERENCES * 5, - wrapper_model=DummyModelWrapper()) - assert metric['scores'] == [1] * 5 + self.assert_metrics('relevance', 1, 5, + predictions=PREDICTIONS, + references=REFERENCES) From eeefe71cff27f18dec7e05bf05ce5d66a959b610 Mon Sep 17 00:00:00 2001 From: Kartik Choudhary Date: Tue, 30 Jan 2024 10:50:20 -0500 Subject: [PATCH 6/6] Fix import order in metrics.py Signed-off-by: Kartik Choudhary --- .../responsibleai_text/utils/genai_metrics/metrics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/responsibleai_text/responsibleai_text/utils/genai_metrics/metrics.py b/responsibleai_text/responsibleai_text/utils/genai_metrics/metrics.py index 784c38a7c1..214c435c02 100644 --- a/responsibleai_text/responsibleai_text/utils/genai_metrics/metrics.py +++ b/responsibleai_text/responsibleai_text/utils/genai_metrics/metrics.py @@ -3,10 +3,11 @@ """Compute AI-assisted metrics for generative text models.""" -import numpy as np import logging from pathlib import Path +import numpy as np + module_logger = logging.getLogger(__name__) module_logger.setLevel(logging.INFO)