From c36c09bdd613d6cd99b2e7bb39061f31fe720919 Mon Sep 17 00:00:00 2001 From: Ilya Matiach Date: Wed, 20 Sep 2023 13:12:23 -0400 Subject: [PATCH] add RAITextInsights openai notebook example and tests (#2345) --- ...leaidashboard-openai-model-debugging.ipynb | 380 ++++++++++++++++++ notebooks/test_notebooks.py | 9 + scripts/e2e-widget.js | 3 +- 3 files changed, 391 insertions(+), 1 deletion(-) create mode 100644 notebooks/responsibleaidashboard/text/responsibleaidashboard-openai-model-debugging.ipynb diff --git a/notebooks/responsibleaidashboard/text/responsibleaidashboard-openai-model-debugging.ipynb b/notebooks/responsibleaidashboard/text/responsibleaidashboard-openai-model-debugging.ipynb new file mode 100644 index 0000000000..4a6ac68186 --- /dev/null +++ b/notebooks/responsibleaidashboard/text/responsibleaidashboard-openai-model-debugging.ipynb @@ -0,0 +1,380 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9ce93873", + "metadata": {}, + "source": [ + "# Assess predictions on Stanford Question Answering Dataset (SQuAD) with an openai endpoint" + ] + }, + { + "cell_type": "markdown", + "id": "37a9763b", + "metadata": {}, + "source": [ + "This notebook demonstrates the use of the `responsibleai` API to assess an openai endpoint on the SQuAD dataset (see https://huggingface.co/datasets/squad for more information about the dataset). It walks through the API calls necessary to create a widget with model analysis insights, then guides a visual analysis of the openai model." + ] + }, + { + "cell_type": "markdown", + "id": "b2cbaf26", + "metadata": {}, + "source": [ + "* [Launch Responsible AI Toolbox](#Launch-Responsible-AI-Toolbox)\n", + " * [Load Model and Data](#Load-Model-and-Data)\n", + " * [Create Model and Data Insights](#Create-Model-and-Data-Insights)" + ] + }, + { + "cell_type": "markdown", + "id": "f858497f", + "metadata": {}, + "source": [ + "## Launch Responsible AI Toolbox" + ] + }, + { + "cell_type": "markdown", + "id": "2b0053f1", + "metadata": {}, + "source": [ + "The following section examines the code necessary to create datasets and a model. It then generates insights using the `responsibleai` API that can be visually analyzed." + ] + }, + { + "cell_type": "markdown", + "id": "21dfdd4f", + "metadata": {}, + "source": [ + "### Load Model and Data\n", + "*The following section can be skipped. It loads a dataset and trains a model for illustrative purposes.*" + ] + }, + { + "cell_type": "markdown", + "id": "11f154c8", + "metadata": {}, + "source": [ + "First we import all necessary dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75ef9e91", + "metadata": {}, + "outputs": [], + "source": [ + "import datasets\n", + "import pandas as pd\n", + "from transformers import pipeline\n", + "\n", + "from responsibleai_text import RAITextInsights, ModelTask" + ] + }, + { + "cell_type": "markdown", + "id": "0d20782b", + "metadata": {}, + "source": [ + "Next we load the SQuAD dataset from huggingface datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8824a592", + "metadata": {}, + "outputs": [], + "source": [ + "dataset = datasets.load_dataset(\"squad\", split=\"train\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0eef443", + "metadata": {}, + "outputs": [], + "source": [ + "dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46cd83d3", + "metadata": {}, + "outputs": [], + "source": [ + "def replace_error_chars(message):\n", + " message = message.replace('`', '')\n", + " return message" + ] + }, + { + "cell_type": "markdown", + "id": "f1bffd45", + "metadata": {}, + "source": [ + "Reformat the dataset to be a pandas dataframe with three columns: context, questions and answers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88cd5fed", + "metadata": {}, + "outputs": [], + "source": [ + "questions = []\n", + "context = []\n", + "answers = []\n", + "for row in dataset:\n", + " context.append(row['context'])\n", + " questions.append(row['question'])\n", + " answers.append(replace_error_chars(row['answers']['text'][0]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "051d4017", + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.DataFrame({'context': context, 'questions': questions, 'answers': answers})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6f87e9c", + "metadata": {}, + "outputs": [], + "source": [ + "data.iloc[:5]['answers'][0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91b5b448", + "metadata": {}, + "outputs": [], + "source": [ + "from ml_wrappers.model import OpenaiWrapperModel" + ] + }, + { + "cell_type": "markdown", + "id": "c1b4c807", + "metadata": {}, + "source": [ + "Create an openai endpoint wrapper through ml-wrappers library.\n", + "Please enter the api_base and api_key for your openai endpoint below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4082bbf9", + "metadata": {}, + "outputs": [], + "source": [ + "api_type = \"azure\"\n", + "api_base = \"https://your.openai.azure.com/\"\n", + "api_version = \"2023-03-15-preview\"\n", + "api_key = \"put_your_secret_key_here\"\n", + "\n", + "# Openai Wrapper that calls endpoint with given questions\n", + "openai_model = OpenaiWrapperModel(api_type, api_base, api_version, api_key, engine='gpt-4')" + ] + }, + { + "cell_type": "markdown", + "id": "6a42d4eb-bc8f-4afe-ba22-edf11b17339a", + "metadata": {}, + "source": [ + "Here we modify the template for the request sent to openai by wrapping the OpenaiWrapperModel. Please remove the path below and only return the predictions when running on your openai endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1030b055-8afb-4003-ac7c-f8cff9db9ef7", + "metadata": {}, + "outputs": [], + "source": [ + "# modify the template for the questions passed to openai\n", + "import pandas as pd\n", + "import numpy as np\n", + "from unittest.mock import patch\n", + "\n", + "class template(object):\n", + " def __init__(self, model):\n", + " self.model = model\n", + "\n", + " def predict(self, dataset):\n", + " template = 'Answer the question given the context.'\n", + " for i, (context, question) in enumerate(zip(dataset['context'], dataset['questions'])):\n", + " templated_question = template + '\\n\\ncontext: ' + context + '\\nquestion: ' + question\n", + " if isinstance(dataset, pd.DataFrame):\n", + " dataset.iloc[i]['questions'] = templated_question\n", + " else:\n", + " dataset['questions'] = templated_question\n", + " # NOTE: Remove this patch when calling your openai model\n", + " with patch('ml_wrappers.model.OpenaiWrapperModel.predict') as mock_predict:\n", + " # wrap return value in mock class\n", + " if isinstance(dataset, pd.DataFrame):\n", + " mock_predict.return_value = np.array(data['answers'][dataset.index])\n", + " else:\n", + " mock_predict.return_value = np.array(data['answers'][0])\n", + " context = {}\n", + " # Note: When calling a real openai model just return this line here\n", + " return self.model.predict(dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04801887", + "metadata": {}, + "outputs": [], + "source": [ + "train_data = data\n", + "test_data = data[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89e81dee-28a4-4e02-a2bf-cf112297c25d", + "metadata": {}, + "outputs": [], + "source": [ + "pipeline = template(openai_model)\n", + "pipeline.predict(test_data)" + ] + }, + { + "cell_type": "markdown", + "id": "9fa4f8f2", + "metadata": {}, + "source": [ + "### Create Model and Data Insights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2f6c956", + "metadata": {}, + "outputs": [], + "source": [ + "from responsibleai_text import RAITextInsights, ModelTask\n", + "from raiwidgets import ResponsibleAIDashboard" + ] + }, + { + "cell_type": "markdown", + "id": "6a4a3036", + "metadata": {}, + "source": [ + "To use Responsible AI Dashboard, initialize a RAITextInsights object upon which different components can be loaded.\n", + "\n", + "RAITextInsights accepts the model, the test dataset, the classes and the task type as its arguments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc8021d3", + "metadata": {}, + "outputs": [], + "source": [ + "rai_insights = RAITextInsights(pipeline, test_data,\n", + " \"answers\",\n", + " task_type=ModelTask.QUESTION_ANSWERING)" + ] + }, + { + "cell_type": "markdown", + "id": "a54b8c10", + "metadata": {}, + "source": [ + "Add the components of the toolbox for model assessment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "526aca04", + "metadata": {}, + "outputs": [], + "source": [ + "rai_insights.error_analysis.add()\n", + "# Note: explanations are not yet available for openai models\n", + "# rai_insights.explainer.add()" + ] + }, + { + "cell_type": "markdown", + "id": "48ecdf3b", + "metadata": {}, + "source": [ + "Once all the desired components have been loaded, compute insights on the test set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b978e05", + "metadata": {}, + "outputs": [], + "source": [ + "rai_insights.compute()" + ] + }, + { + "cell_type": "markdown", + "id": "ef788805", + "metadata": {}, + "source": [ + "Finally, visualize and explore the model insights. Use the resulting widget or follow the link to view this in a new tab." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b63dbfa2", + "metadata": {}, + "outputs": [], + "source": [ + "ResponsibleAIDashboard(rai_insights)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/test_notebooks.py b/notebooks/test_notebooks.py index 7b3a01ae3f..8fe1150024 100644 --- a/notebooks/test_notebooks.py +++ b/notebooks/test_notebooks.py @@ -353,3 +353,12 @@ def test_responsibleaidashboard_question_answering_model_debugging(): test_values = {} assay_one_notebook(nb_path, nb_name, test_values) + + +@pytest.mark.text_notebooks +def test_responsibleaidashboard_openai_model_debugging(): + nb_path = TEXT + nb_name = ("responsibleaidashboard-openai-model-debugging") + + test_values = {} + assay_one_notebook(nb_path, nb_name, test_values) diff --git a/scripts/e2e-widget.js b/scripts/e2e-widget.js index c2a32ae039..4b09f2a5ea 100644 --- a/scripts/e2e-widget.js +++ b/scripts/e2e-widget.js @@ -33,7 +33,8 @@ const textFileNames = [ "responsibleaidashboard-covid-event-multilabel-text-classification-model-debugging" ]; const ignoredFiles = [ - "responsibleaidashboard-question-answering-model-debugging" + "responsibleaidashboard-question-answering-model-debugging", + "responsibleaidashboard-openai-model-debugging" ]; const fileNames = tabularFileNames .concat(visionFileNames)