From b8c3a38caaba9fa3644831171f30627ee1cd5b22 Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Mon, 18 Dec 2023 10:20:12 -0800 Subject: [PATCH 01/19] Mark instrumentation points for SDK (#1009) * Mark instrumentation points for SDK * Remove duplicated assertion * Fixup: assert attribute not function --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- newrelic/hooks/external_botocore.py | 7 +++++ newrelic/hooks/mlmodel_openai.py | 31 +++++++++++++------ .../test_bedrock_chat_completion.py | 4 +++ .../test_bedrock_embeddings.py | 6 +++- tests/mlmodel_openai/test_chat_completion.py | 5 +++ tests/mlmodel_openai/test_embeddings.py | 5 +++ 6 files changed, 47 insertions(+), 11 deletions(-) diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index 2a327a84a..5a7c2e56c 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -592,6 +592,12 @@ def _nr_clientcreator__create_api_method_(wrapped, instance, args, kwargs): return tracer(wrapped) +def _nr_clientcreator__create_methods(wrapped, instance, args, kwargs): + class_attributes = wrapped(*args, **kwargs) + class_attributes["_nr_wrapped"] = True + return class_attributes + + def _bind_make_request_params(operation_model, request_dict, *args, **kwargs): return operation_model, request_dict @@ -622,3 +628,4 @@ def instrument_botocore_endpoint(module): def instrument_botocore_client(module): wrap_function_wrapper(module, "ClientCreator._create_api_method", _nr_clientcreator__create_api_method_) + wrap_function_wrapper(module, "ClientCreator._create_methods", _nr_clientcreator__create_methods) diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index e200e80f0..40eb4f61c 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -884,21 +884,33 @@ async def wrap_base_client_process_response_async(wrapped, instance, args, kwarg def instrument_openai_util(module): - wrap_function_wrapper(module, "convert_to_openai_object", wrap_convert_to_openai_object) + if hasattr(module, "convert_to_openai_object"): + wrap_function_wrapper(module, "convert_to_openai_object", wrap_convert_to_openai_object) + # This is to mark where we instrument so the SDK knows not to instrument them + # again. + setattr(module.convert_to_openai_object, "_nr_wrapped", True) def instrument_openai_api_resources_embedding(module): - if hasattr(module.Embedding, "create"): - wrap_function_wrapper(module, "Embedding.create", wrap_embedding_sync) - if hasattr(module.Embedding, "acreate"): - wrap_function_wrapper(module, "Embedding.acreate", wrap_embedding_async) + if hasattr(module, "Embedding"): + if hasattr(module.Embedding, "create"): + wrap_function_wrapper(module, "Embedding.create", wrap_embedding_sync) + if hasattr(module.Embedding, "acreate"): + wrap_function_wrapper(module, "Embedding.acreate", wrap_embedding_async) + # This is to mark where we instrument so the SDK knows not to instrument them + # again. + setattr(module.Embedding, "_nr_wrapped", True) def instrument_openai_api_resources_chat_completion(module): - if hasattr(module.ChatCompletion, "create"): - wrap_function_wrapper(module, "ChatCompletion.create", wrap_chat_completion_sync) - if hasattr(module.ChatCompletion, "acreate"): - wrap_function_wrapper(module, "ChatCompletion.acreate", wrap_chat_completion_async) + if hasattr(module, "ChatCompletion"): + if hasattr(module.ChatCompletion, "create"): + wrap_function_wrapper(module, "ChatCompletion.create", wrap_chat_completion_sync) + if hasattr(module.ChatCompletion, "acreate"): + wrap_function_wrapper(module, "ChatCompletion.acreate", wrap_chat_completion_async) + # This is to mark where we instrument so the SDK knows not to instrument them + # again. + setattr(module.ChatCompletion, "_nr_wrapped", True) def instrument_openai_resources_chat_completions(module): @@ -908,7 +920,6 @@ def instrument_openai_resources_chat_completions(module): wrap_function_wrapper(module, "AsyncCompletions.create", wrap_chat_completion_async) -# OpenAI v1 instrumentation points def instrument_openai_resources_embeddings(module): if hasattr(module, "Embeddings"): if hasattr(module.Embeddings, "create"): diff --git a/tests/external_botocore/test_bedrock_chat_completion.py b/tests/external_botocore/test_bedrock_chat_completion.py index 604771c82..efcc7cec0 100644 --- a/tests/external_botocore/test_bedrock_chat_completion.py +++ b/tests/external_botocore/test_bedrock_chat_completion.py @@ -287,3 +287,7 @@ def _test(): exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) _test() + + +def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): + assert bedrock_server._nr_wrapped diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py index 7a5740e46..cc442fc15 100644 --- a/tests/external_botocore/test_bedrock_embeddings.py +++ b/tests/external_botocore/test_bedrock_embeddings.py @@ -1,4 +1,4 @@ - # Copyright 2010 New Relic, Inc. +# Copyright 2010 New Relic, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -172,3 +172,7 @@ def _test(): exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) _test() + + +def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): + assert bedrock_server._nr_wrapped diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index f2c31b262..e141e45e5 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -371,3 +371,8 @@ def test_openai_chat_completion_async_disabled_custom_event_settings(loop): model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100 ) ) + + +def test_openai_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(): + assert openai.ChatCompletion._nr_wrapped + assert openai.util.convert_to_openai_object._nr_wrapped diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py index ae2c048fc..65ac33e87 100644 --- a/tests/mlmodel_openai/test_embeddings.py +++ b/tests/mlmodel_openai/test_embeddings.py @@ -148,3 +148,8 @@ def test_openai_embedding_async_disabled_custom_insights_events(loop): loop.run_until_complete( openai.Embedding.acreate(input="This is an embedding test.", model="text-embedding-ada-002") ) + + +def test_openai_embedding_functions_marked_as_wrapped_for_sdk_compatibility(): + assert openai.Embedding._nr_wrapped + assert openai.util.convert_to_openai_object._nr_wrapped From 0bb34db25c5c729e374ba95256d8fa20b191475f Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Wed, 20 Dec 2023 16:14:34 -0800 Subject: [PATCH 02/19] Prefix conversation id with llm (#1012) * Change conversation_id->llm.conversation_id * Fixup formatting --- newrelic/hooks/external_botocore.py | 2 +- newrelic/hooks/mlmodel_openai.py | 4 ++-- .../test_bedrock_chat_completion.py | 9 ++++----- tests/mlmodel_openai/test_chat_completion.py | 6 +++--- tests/mlmodel_openai/test_chat_completion_error.py | 12 ++++++------ .../mlmodel_openai/test_chat_completion_error_v1.py | 8 ++++---- tests/mlmodel_openai/test_chat_completion_v1.py | 6 +++--- tests/mlmodel_openai/test_get_llm_message_ids.py | 10 +++++++--- tests/mlmodel_openai/test_get_llm_message_ids_v1.py | 4 ++-- 9 files changed, 32 insertions(+), 29 deletions(-) diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index 5a7c2e56c..33ba94e21 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -511,7 +511,7 @@ def handle_chat_completion_event( span_id, ): custom_attrs_dict = transaction._custom_params - conversation_id = custom_attrs_dict.get("conversation_id", "") + conversation_id = custom_attrs_dict.get("llm.conversation_id", "") chat_completion_id = str(uuid.uuid4()) diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index 40eb4f61c..07c1d75bb 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -193,7 +193,7 @@ def wrap_chat_completion_sync(wrapped, instance, args, kwargs): # Get conversation ID off of the transaction custom_attrs_dict = transaction._custom_params - conversation_id = custom_attrs_dict.get("conversation_id", "") + conversation_id = custom_attrs_dict.get("llm.conversation_id", "") settings = transaction.settings if transaction.settings is not None else global_settings() app_name = settings.app_name @@ -654,7 +654,7 @@ async def wrap_chat_completion_async(wrapped, instance, args, kwargs): # Get conversation ID off of the transaction custom_attrs_dict = transaction._custom_params - conversation_id = custom_attrs_dict.get("conversation_id", "") + conversation_id = custom_attrs_dict.get("llm.conversation_id", "") settings = transaction.settings if transaction.settings is not None else global_settings() app_name = settings.app_name diff --git a/tests/external_botocore/test_bedrock_chat_completion.py b/tests/external_botocore/test_bedrock_chat_completion.py index efcc7cec0..2c4925a43 100644 --- a/tests/external_botocore/test_bedrock_chat_completion.py +++ b/tests/external_botocore/test_bedrock_chat_completion.py @@ -23,7 +23,6 @@ chat_completion_expected_events, chat_completion_invalid_access_key_error_events, chat_completion_payload_templates, - chat_completion_invalid_access_key_error_events, ) from conftest import BOTOCORE_VERSION from testing_support.fixtures import ( @@ -128,7 +127,7 @@ def test_bedrock_chat_completion_in_txn_with_convo_id(set_trace_info, exercise_m @background_task(name="test_bedrock_chat_completion_in_txn_with_convo_id") def _test(): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") exercise_model(prompt=_test_bedrock_chat_completion_prompt, temperature=0.7, max_tokens=100) _test() @@ -160,7 +159,7 @@ def _test(): @reset_core_stats_engine() @validate_custom_event_count(count=0) def test_bedrock_chat_completion_outside_txn(set_trace_info, exercise_model): - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") exercise_model(prompt=_test_bedrock_chat_completion_prompt, temperature=0.7, max_tokens=100) @@ -237,7 +236,7 @@ def test_bedrock_chat_completion_error_invalid_model(bedrock_server, set_trace_i @background_task(name="test_bedrock_chat_completion_error_invalid_model") def _test(): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") with pytest.raises(_client_error): bedrock_server.invoke_model( body=b"{}", @@ -283,7 +282,7 @@ def _test(): with pytest.raises(_client_error): # not sure where this exception actually comes from set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) _test() diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index e141e45e5..76017a22a 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -146,7 +146,7 @@ @background_task() def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100 ) @@ -272,7 +272,7 @@ def test_openai_chat_completion_sync_in_txn_no_convo_id(set_trace_info): @reset_core_stats_engine() @validate_custom_event_count(count=0) def test_openai_chat_completion_sync_outside_txn(): - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100 ) @@ -335,7 +335,7 @@ def test_openai_chat_completion_async_conversation_id_unset(loop, set_trace_info @background_task() def test_openai_chat_completion_async_conversation_id_set(loop, set_trace_info): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") loop.run_until_complete( openai.ChatCompletion.acreate( diff --git a/tests/mlmodel_openai/test_chat_completion_error.py b/tests/mlmodel_openai/test_chat_completion_error.py index fe298c02b..a8d3bdc51 100644 --- a/tests/mlmodel_openai/test_chat_completion_error.py +++ b/tests/mlmodel_openai/test_chat_completion_error.py @@ -131,7 +131,7 @@ def test_chat_completion_invalid_request_error_no_model(set_trace_info): with pytest.raises(openai.InvalidRequestError): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") openai.ChatCompletion.create( # no model provided, messages=_test_openai_chat_completion_messages, @@ -215,7 +215,7 @@ def test_chat_completion_invalid_request_error_no_model(set_trace_info): def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): with pytest.raises(openai.InvalidRequestError): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") openai.ChatCompletion.create( model="does-not-exist", messages=({"role": "user", "content": "Model does not exist."},), @@ -315,7 +315,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): def test_chat_completion_authentication_error(monkeypatch, set_trace_info): with pytest.raises(openai.error.AuthenticationError): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") monkeypatch.setattr(openai, "api_key", None) # openai.api_key = None openai.ChatCompletion.create( model="gpt-3.5-turbo", @@ -439,7 +439,7 @@ def test_chat_completion_wrong_api_key_error(monkeypatch, set_trace_info): def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_info): with pytest.raises(openai.InvalidRequestError): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") loop.run_until_complete( openai.ChatCompletion.acreate( # no model provided, @@ -481,7 +481,7 @@ def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_in def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_trace_info): with pytest.raises(openai.InvalidRequestError): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") loop.run_until_complete( openai.ChatCompletion.acreate( model="does-not-exist", @@ -520,7 +520,7 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra def test_chat_completion_authentication_error_async(loop, monkeypatch, set_trace_info): with pytest.raises(openai.error.AuthenticationError): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") monkeypatch.setattr(openai, "api_key", None) # openai.api_key = None loop.run_until_complete( openai.ChatCompletion.acreate( diff --git a/tests/mlmodel_openai/test_chat_completion_error_v1.py b/tests/mlmodel_openai/test_chat_completion_error_v1.py index 70dc58f99..670689c92 100644 --- a/tests/mlmodel_openai/test_chat_completion_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_error_v1.py @@ -127,7 +127,7 @@ def test_chat_completion_invalid_request_error_no_model(set_trace_info, sync_openai_client): with pytest.raises(TypeError): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") sync_openai_client.chat.completions.create( messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100 ) @@ -160,7 +160,7 @@ def test_chat_completion_invalid_request_error_no_model(set_trace_info, sync_ope def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_info, async_openai_client): with pytest.raises(TypeError): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") loop.run_until_complete( async_openai_client.chat.completions.create( messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100 @@ -242,7 +242,7 @@ def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_in def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, sync_openai_client): with pytest.raises(openai.NotFoundError): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") sync_openai_client.chat.completions.create( model="does-not-exist", messages=({"role": "user", "content": "Model does not exist."},), @@ -281,7 +281,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_trace_info, async_openai_client): with pytest.raises(openai.NotFoundError): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") loop.run_until_complete( async_openai_client.chat.completions.create( model="does-not-exist", diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py index 4df977a6c..b1b35826c 100644 --- a/tests/mlmodel_openai/test_chat_completion_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_v1.py @@ -146,7 +146,7 @@ @background_task() def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info, sync_openai_client): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") sync_openai_client.chat.completions.create( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100 ) @@ -272,7 +272,7 @@ def test_openai_chat_completion_sync_in_txn_no_convo_id(set_trace_info, sync_ope @reset_core_stats_engine() @validate_custom_event_count(count=0) def test_openai_chat_completion_sync_outside_txn(sync_openai_client): - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") sync_openai_client.chat.completions.create( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100 ) @@ -335,7 +335,7 @@ def test_openai_chat_completion_async_conversation_id_unset(loop, set_trace_info @background_task() def test_openai_chat_completion_async_conversation_id_set(loop, set_trace_info, async_openai_client): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") loop.run_until_complete( async_openai_client.chat.completions.create( diff --git a/tests/mlmodel_openai/test_get_llm_message_ids.py b/tests/mlmodel_openai/test_get_llm_message_ids.py index af073f730..8489f4f3d 100644 --- a/tests/mlmodel_openai/test_get_llm_message_ids.py +++ b/tests/mlmodel_openai/test_get_llm_message_ids.py @@ -13,10 +13,14 @@ # limitations under the License. import openai +from testing_support.fixtures import ( + reset_core_stats_engine, + validate_custom_event_count, +) + from newrelic.api.background_task import background_task from newrelic.api.ml_model import get_llm_message_ids, record_llm_feedback_event from newrelic.api.transaction import add_custom_attribute, current_transaction -from testing_support.fixtures import reset_core_stats_engine, validate_custom_event_count _test_openai_chat_completion_messages_1 = ( {"role": "system", "content": "You are a scientist."}, @@ -114,7 +118,7 @@ def test_get_llm_message_ids_outside_transaction(): @background_task() def test_get_llm_message_ids_mulitple_async(loop, set_trace_info): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") async def _run(): res1 = await openai.ChatCompletion.acreate( @@ -172,7 +176,7 @@ async def _run(): @background_task() def test_get_llm_message_ids_mulitple_sync(set_trace_info): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") results = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages_1, temperature=0.7, max_tokens=100 diff --git a/tests/mlmodel_openai/test_get_llm_message_ids_v1.py b/tests/mlmodel_openai/test_get_llm_message_ids_v1.py index f85a26c2a..094ddcd5a 100644 --- a/tests/mlmodel_openai/test_get_llm_message_ids_v1.py +++ b/tests/mlmodel_openai/test_get_llm_message_ids_v1.py @@ -116,7 +116,7 @@ def test_get_llm_message_ids_outside_transaction(): @background_task() def test_get_llm_message_ids_mulitple_async(loop, set_trace_info, async_openai_client): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") async def _run(): res1 = await async_openai_client.chat.completions.create( @@ -174,7 +174,7 @@ async def _run(): @background_task() def test_get_llm_message_ids_mulitple_sync(set_trace_info, sync_openai_client): set_trace_info() - add_custom_attribute("conversation_id", "my-awesome-id") + add_custom_attribute("llm.conversation_id", "my-awesome-id") results = sync_openai_client.chat.completions.create( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages_1, temperature=0.7, max_tokens=100 From dbcbca57a41551c521278f3404e379f4786da4db Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Thu, 21 Dec 2023 10:38:39 -0800 Subject: [PATCH 03/19] Add support for Meta Llama2. (#1010) * Add support for Llama2. * Fixup: lint errors * [Mega-Linter] Apply linters fixes * Trigger tests --------- Co-authored-by: Hannah Stepanek Co-authored-by: hmstepanek --- newrelic/hooks/external_botocore.py | 43 ++++++- newrelic/hooks/mlmodel_openai.py | 2 +- .../_mock_external_bedrock_server.py | 21 +++- .../_test_bedrock_chat_completion.py | 115 ++++++++++++++++++ .../test_bedrock_chat_completion.py | 1 + 5 files changed, 176 insertions(+), 6 deletions(-) diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index 33ba94e21..6e3be661b 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -144,7 +144,7 @@ def create_chat_completion_message_event( "response.model": request_model, "vendor": "bedrock", "ingest_source": "Python", - "is_response": True + "is_response": True, } transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict) @@ -246,7 +246,7 @@ def extract_bedrock_claude_model(request_body, response_body=None): chat_completion_summary_dict = { "request.max_tokens": request_body.get("max_tokens_to_sample", ""), "request.temperature": request_body.get("temperature", ""), - "response.number_of_messages": len(input_message_list) + "response.number_of_messages": len(input_message_list), } if response_body: @@ -264,6 +264,40 @@ def extract_bedrock_claude_model(request_body, response_body=None): return input_message_list, output_message_list, chat_completion_summary_dict +def extract_bedrock_llama_model(request_body, response_body=None): + request_body = json.loads(request_body) + if response_body: + response_body = json.loads(response_body) + + input_message_list = [{"role": "user", "content": request_body.get("prompt", "")}] + + chat_completion_summary_dict = { + "request.max_tokens": request_body.get("max_gen_len", ""), + "request.temperature": request_body.get("temperature", ""), + "response.number_of_messages": len(input_message_list), + } + + if response_body: + output_message_list = [{"role": "assistant", "content": response_body.get("generation", "")}] + prompt_tokens = response_body.get("prompt_token_count", None) + completion_tokens = response_body.get("generation_token_count", None) + total_tokens = prompt_tokens + completion_tokens if prompt_tokens and completion_tokens else None + + chat_completion_summary_dict.update( + { + "response.usage.completion_tokens": completion_tokens, + "response.usage.prompt_tokens": prompt_tokens, + "response.usage.total_tokens": total_tokens, + "response.choices.finish_reason": response_body.get("stop_reason", ""), + "response.number_of_messages": len(input_message_list) + len(output_message_list), + } + ) + else: + output_message_list = [] + + return input_message_list, output_message_list, chat_completion_summary_dict + + def extract_bedrock_cohere_model(request_body, response_body=None): request_body = json.loads(request_body) if response_body: @@ -274,7 +308,7 @@ def extract_bedrock_cohere_model(request_body, response_body=None): chat_completion_summary_dict = { "request.max_tokens": request_body.get("max_tokens", ""), "request.temperature": request_body.get("temperature", ""), - "response.number_of_messages": len(input_message_list) + "response.number_of_messages": len(input_message_list), } if response_body: @@ -300,6 +334,7 @@ def extract_bedrock_cohere_model(request_body, response_body=None): ("ai21.j2", extract_bedrock_ai21_j2_model), ("cohere", extract_bedrock_cohere_model), ("anthropic.claude", extract_bedrock_claude_model), + ("meta.llama2", extract_bedrock_llama_model), ] @@ -368,7 +403,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs): notice_error_attributes = { "http.statusCode": error_attributes["http.statusCode"], "error.message": error_attributes["error.message"], - "error.code": error_attributes["error.code"] + "error.code": error_attributes["error.code"], } if is_embedding: diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index 07c1d75bb..7b0ecbe4f 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -868,7 +868,7 @@ def wrap_base_client_process_response_sync(wrapped, instance, args, kwargs): nr_response_headers = getattr(response, "headers") return_val = wrapped(*args, **kwargs) - # Obtain reponse headers for v1 + # Obtain response headers for v1 return_val._nr_response_headers = nr_response_headers return return_val diff --git a/tests/external_botocore/_mock_external_bedrock_server.py b/tests/external_botocore/_mock_external_bedrock_server.py index da5ff68dd..609e7afa9 100644 --- a/tests/external_botocore/_mock_external_bedrock_server.py +++ b/tests/external_botocore/_mock_external_bedrock_server.py @@ -3332,6 +3332,16 @@ "prompt": "What is 212 degrees Fahrenheit converted to Celsius?", }, ], + "meta.llama2-13b-chat-v1::What is 212 degrees Fahrenheit converted to Celsius?": [ + {"Content-Type": "application/json", "x-amzn-RequestId": "9a64cdb0-3e82-41c7-873a-c12a77e0143a"}, + 200, + { + "generation": " Here's the answer:\n\n212°F = 100°C\n\nSo, 212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "prompt_token_count": 17, + "generation_token_count": 46, + "stop_reason": "stop", + }, + ], "does-not-exist::": [ { "Content-Type": "application/json", @@ -3395,6 +3405,15 @@ 403, {"message": "The security token included in the request is invalid."}, ], + "meta.llama2-13b-chat-v1::Invalid Token": [ + { + "Content-Type": "application/json", + "x-amzn-RequestId": "22476490-a0d6-42db-b5ea-32d0b8a7f751", + "x-amzn-ErrorType": "UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/", + }, + 403, + {"message": "The security token included in the request is invalid."}, + ], } MODEL_PATH_RE = re.compile(r"/model/([^/]+)/invoke") @@ -3454,7 +3473,7 @@ def __init__(self, handler=simple_get, port=None, *args, **kwargs): if __name__ == "__main__": # Use this to sort dict for easier future incremental updates print("RESPONSES = %s" % dict(sorted(RESPONSES.items(), key=lambda i: (i[1][1], i[0])))) - + with MockExternalBedrockServer() as server: print("MockExternalBedrockServer serving on port %s" % str(server.port)) while True: diff --git a/tests/external_botocore/_test_bedrock_chat_completion.py b/tests/external_botocore/_test_bedrock_chat_completion.py index e3f53fd31..f1d21c73c 100644 --- a/tests/external_botocore/_test_bedrock_chat_completion.py +++ b/tests/external_botocore/_test_bedrock_chat_completion.py @@ -3,6 +3,7 @@ "ai21.j2-mid-v1": '{"prompt": "%s", "temperature": %f, "maxTokens": %d}', "anthropic.claude-instant-v1": '{"prompt": "Human: %s Assistant:", "temperature": %f, "max_tokens_to_sample": %d}', "cohere.command-text-v14": '{"prompt": "%s", "temperature": %f, "max_tokens": %d}', + "meta.llama2-13b-chat-v1": '{"prompt": "%s", "temperature": %f, "max_gen_len": %d}', } chat_completion_expected_events = { @@ -263,6 +264,72 @@ }, ), ], + "meta.llama2-13b-chat-v1": [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (external_botocore)", + "conversation_id": "my-awesome-id", + "transaction_id": "transaction-id", + "span_id": None, + "trace_id": "trace-id", + "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a", + "api_key_last_four_digits": "CRET", + "duration": None, # Response time varies each test run + "request.model": "meta.llama2-13b-chat-v1", + "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 46, + "response.usage.total_tokens": 63, + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "stop", + "vendor": "bedrock", + "ingest_source": "Python", + "response.number_of_messages": 2, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (external_botocore)", + "conversation_id": "my-awesome-id", + "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 0, + "response.model": "meta.llama2-13b-chat-v1", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (external_botocore)", + "conversation_id": "my-awesome-id", + "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": " Here's the answer:\n\n212°F = 100°C\n\nSo, 212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "role": "assistant", + "completion_id": None, + "sequence": 1, + "response.model": "meta.llama2-13b-chat-v1", + "vendor": "bedrock", + "ingest_source": "Python", + "is_response": True, + }, + ), + ], } chat_completion_invalid_model_error_events = [ @@ -480,6 +547,49 @@ }, ), ], + "meta.llama2-13b-chat-v1": [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (external_botocore)", + "conversation_id": "my-awesome-id", + "transaction_id": "transaction-id", + "span_id": None, + "trace_id": "trace-id", + "request_id": "", + "api_key_last_four_digits": "-KEY", + "duration": None, # Response time varies each test run + "request.model": "meta.llama2-13b-chat-v1", + "response.model": "meta.llama2-13b-chat-v1", + "request.temperature": 0.7, + "request.max_tokens": 100, + "vendor": "bedrock", + "ingest_source": "Python", + "response.number_of_messages": 1, + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (external_botocore)", + "conversation_id": "my-awesome-id", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "Invalid Token", + "role": "user", + "completion_id": None, + "sequence": 0, + "response.model": "meta.llama2-13b-chat-v1", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), + ], } chat_completion_expected_client_errors = { @@ -503,4 +613,9 @@ "error.message": "The security token included in the request is invalid.", "error.code": "UnrecognizedClientException", }, + "meta.llama2-13b-chat-v1": { + "http.statusCode": 403, + "error.message": "The security token included in the request is invalid.", + "error.code": "UnrecognizedClientException", + }, } diff --git a/tests/external_botocore/test_bedrock_chat_completion.py b/tests/external_botocore/test_bedrock_chat_completion.py index 2c4925a43..c5c2a4706 100644 --- a/tests/external_botocore/test_bedrock_chat_completion.py +++ b/tests/external_botocore/test_bedrock_chat_completion.py @@ -56,6 +56,7 @@ def is_file_payload(request): "ai21.j2-mid-v1", "anthropic.claude-instant-v1", "cohere.command-text-v14", + "meta.llama2-13b-chat-v1", ], ) def model_id(request): From 3f8202122a523450e07dc177888a44190f14857f Mon Sep 17 00:00:00 2001 From: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> Date: Wed, 10 Jan 2024 15:17:14 -0800 Subject: [PATCH 04/19] Add bedrock feedback into preview (#1030) * Add AWS Bedrock testing infrastructure * Squashed commit of the following: commit 2834663794c649124052e510c1c9557a830c060a Author: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Mon Oct 9 17:42:05 2023 -0700 OpenAI Mock Backend (#929) * Add mock external openai server * Add mocked OpenAI server fixtures * Set up recorded responses. * Clean mock server to depend on http server * Linting * Pin flask version for flask restx tests. (#931) * Ignore new redis methods. (#932) Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> * Remove approved paths * Update CI Image (#930) * Update available python versions in CI * Update makefile with overrides * Fix default branch detection for arm builds --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Add mocking for embedding endpoint * [Mega-Linter] Apply linters fixes * Add ratelimit headers * [Mega-Linter] Apply linters fixes * Only get package version once (#928) * Only get package version once * Add disconnect method * Add disconnect method --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Add datalib dependency for embedding testing. * Add OpenAI Test Infrastructure (#926) * Add openai to tox * Add OpenAI test files. * Add test functions. * [Mega-Linter] Apply linters fixes --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] * Add mock external openai server * Add mocked OpenAI server fixtures * Set up recorded responses. * Clean mock server to depend on http server * Linting * Remove approved paths * Add mocking for embedding endpoint * [Mega-Linter] Apply linters fixes * Add ratelimit headers * [Mega-Linter] Apply linters fixes * Add datalib dependency for embedding testing. --------- Co-authored-by: Uma Annamalai Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: TimPansino Co-authored-by: Hannah Stepanek Co-authored-by: mergify[bot] commit db63d4598c94048986c0e00ebb2cd8827100b54c Author: Uma Annamalai Date: Mon Oct 2 15:31:38 2023 -0700 Add OpenAI Test Infrastructure (#926) * Add openai to tox * Add OpenAI test files. * Add test functions. * [Mega-Linter] Apply linters fixes --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] * Squashed commit of the following: commit 182c7a8c8a91e2d0f234f7ed7d4a14a2422c8342 Author: Uma Annamalai Date: Fri Oct 13 10:12:55 2023 -0700 Add request/ response IDs. commit f6d13f822c22d2039ec32be86b2c54f9dc3de1c9 Author: Uma Annamalai Date: Thu Oct 12 13:23:39 2023 -0700 Test cleanup. commit d0576631d009e481bd5887a3243aac99b097d823 Author: Uma Annamalai Date: Tue Oct 10 10:23:00 2023 -0700 Remove commented code. commit dd29433e719482babbe5c724e7330b1f6324abd7 Author: Uma Annamalai Date: Tue Oct 10 10:19:01 2023 -0700 Add openai sync instrumentation. commit 2834663794c649124052e510c1c9557a830c060a Author: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Mon Oct 9 17:42:05 2023 -0700 OpenAI Mock Backend (#929) * Add mock external openai server * Add mocked OpenAI server fixtures * Set up recorded responses. * Clean mock server to depend on http server * Linting * Pin flask version for flask restx tests. (#931) * Ignore new redis methods. (#932) Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> * Remove approved paths * Update CI Image (#930) * Update available python versions in CI * Update makefile with overrides * Fix default branch detection for arm builds --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Add mocking for embedding endpoint * [Mega-Linter] Apply linters fixes * Add ratelimit headers * [Mega-Linter] Apply linters fixes * Only get package version once (#928) * Only get package version once * Add disconnect method * Add disconnect method --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Add datalib dependency for embedding testing. * Add OpenAI Test Infrastructure (#926) * Add openai to tox * Add OpenAI test files. * Add test functions. * [Mega-Linter] Apply linters fixes --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] * Add mock external openai server * Add mocked OpenAI server fixtures * Set up recorded responses. * Clean mock server to depend on http server * Linting * Remove approved paths * Add mocking for embedding endpoint * [Mega-Linter] Apply linters fixes * Add ratelimit headers * [Mega-Linter] Apply linters fixes * Add datalib dependency for embedding testing. --------- Co-authored-by: Uma Annamalai Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: TimPansino Co-authored-by: Hannah Stepanek Co-authored-by: mergify[bot] commit db63d4598c94048986c0e00ebb2cd8827100b54c Author: Uma Annamalai Date: Mon Oct 2 15:31:38 2023 -0700 Add OpenAI Test Infrastructure (#926) * Add openai to tox * Add OpenAI test files. * Add test functions. * [Mega-Linter] Apply linters fixes --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] * TEMP * Bedrock titan extraction nearly complete * Bedrock Testing Infrastructure (#937) * Add AWS Bedrock testing infrastructure * Cache Package Version Lookups (#946) * Cache _get_package_version * Add Python 2.7 support to get_package_version caching * [Mega-Linter] Apply linters fixes * Bump tests --------- Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com> Co-authored-by: TimPansino * Fix Redis Generator Methods (#947) * Fix scan_iter for redis * Replace generator methods * Update instance info instrumentation * Remove mistake from uninstrumented methods * Add skip condition to asyncio generator tests * Add skip condition to asyncio generator tests --------- Co-authored-by: Lalleh Rafeei Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Automatic RPM System Updates (#948) * Checkout old action * Adding RPM action * Add dry run * Incorporating action into workflow * Wire secret into custom action * Enable action * Correct action name * Fix syntax * Fix quoting issues * Drop pre-verification. Does not work on python * Fix merge artifact * Remove OpenAI references --------- Co-authored-by: Uma Annamalai Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com> Co-authored-by: TimPansino Co-authored-by: Lalleh Rafeei Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Cleaning up titan bedrock implementation * TEMP * Tests for bedrock passing Co-authored-by: Lalleh Rafeei * Cleaned up titan testing Co-authored-by: Lalleh Rafeei Co-authored-by: Hannah Stepanek * Parametrized bedrock testing * Add support for AI21-J2 models * Change to dynamic no conversation id events * Drop all openai refs * [Mega-Linter] Apply linters fixes * Adding response_id and response_model * Apply suggestions from code review * Remove unused import * Bedrock Sync Chat Completion Instrumentation (#953) * Add AWS Bedrock testing infrastructure * Squashed commit of the following: commit 2834663794c649124052e510c1c9557a830c060a Author: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Mon Oct 9 17:42:05 2023 -0700 OpenAI Mock Backend (#929) * Add mock external openai server * Add mocked OpenAI server fixtures * Set up recorded responses. * Clean mock server to depend on http server * Linting * Pin flask version for flask restx tests. (#931) * Ignore new redis methods. (#932) Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> * Remove approved paths * Update CI Image (#930) * Update available python versions in CI * Update makefile with overrides * Fix default branch detection for arm builds --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Add mocking for embedding endpoint * [Mega-Linter] Apply linters fixes * Add ratelimit headers * [Mega-Linter] Apply linters fixes * Only get package version once (#928) * Only get package version once * Add disconnect method * Add disconnect method --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Add datalib dependency for embedding testing. * Add OpenAI Test Infrastructure (#926) * Add openai to tox * Add OpenAI test files. * Add test functions. * [Mega-Linter] Apply linters fixes --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] * Add mock external openai server * Add mocked OpenAI server fixtures * Set up recorded responses. * Clean mock server to depend on http server * Linting * Remove approved paths * Add mocking for embedding endpoint * [Mega-Linter] Apply linters fixes * Add ratelimit headers * [Mega-Linter] Apply linters fixes * Add datalib dependency for embedding testing. --------- Co-authored-by: Uma Annamalai Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: TimPansino Co-authored-by: Hannah Stepanek Co-authored-by: mergify[bot] commit db63d4598c94048986c0e00ebb2cd8827100b54c Author: Uma Annamalai Date: Mon Oct 2 15:31:38 2023 -0700 Add OpenAI Test Infrastructure (#926) * Add openai to tox * Add OpenAI test files. * Add test functions. * [Mega-Linter] Apply linters fixes --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] * Squashed commit of the following: commit 182c7a8c8a91e2d0f234f7ed7d4a14a2422c8342 Author: Uma Annamalai Date: Fri Oct 13 10:12:55 2023 -0700 Add request/ response IDs. commit f6d13f822c22d2039ec32be86b2c54f9dc3de1c9 Author: Uma Annamalai Date: Thu Oct 12 13:23:39 2023 -0700 Test cleanup. commit d0576631d009e481bd5887a3243aac99b097d823 Author: Uma Annamalai Date: Tue Oct 10 10:23:00 2023 -0700 Remove commented code. commit dd29433e719482babbe5c724e7330b1f6324abd7 Author: Uma Annamalai Date: Tue Oct 10 10:19:01 2023 -0700 Add openai sync instrumentation. commit 2834663794c649124052e510c1c9557a830c060a Author: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Mon Oct 9 17:42:05 2023 -0700 OpenAI Mock Backend (#929) * Add mock external openai server * Add mocked OpenAI server fixtures * Set up recorded responses. * Clean mock server to depend on http server * Linting * Pin flask version for flask restx tests. (#931) * Ignore new redis methods. (#932) Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> * Remove approved paths * Update CI Image (#930) * Update available python versions in CI * Update makefile with overrides * Fix default branch detection for arm builds --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Add mocking for embedding endpoint * [Mega-Linter] Apply linters fixes * Add ratelimit headers * [Mega-Linter] Apply linters fixes * Only get package version once (#928) * Only get package version once * Add disconnect method * Add disconnect method --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Add datalib dependency for embedding testing. * Add OpenAI Test Infrastructure (#926) * Add openai to tox * Add OpenAI test files. * Add test functions. * [Mega-Linter] Apply linters fixes --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] * Add mock external openai server * Add mocked OpenAI server fixtures * Set up recorded responses. * Clean mock server to depend on http server * Linting * Remove approved paths * Add mocking for embedding endpoint * [Mega-Linter] Apply linters fixes * Add ratelimit headers * [Mega-Linter] Apply linters fixes * Add datalib dependency for embedding testing. --------- Co-authored-by: Uma Annamalai Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: TimPansino Co-authored-by: Hannah Stepanek Co-authored-by: mergify[bot] commit db63d4598c94048986c0e00ebb2cd8827100b54c Author: Uma Annamalai Date: Mon Oct 2 15:31:38 2023 -0700 Add OpenAI Test Infrastructure (#926) * Add openai to tox * Add OpenAI test files. * Add test functions. * [Mega-Linter] Apply linters fixes --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] * Cache Package Version Lookups (#946) * Cache _get_package_version * Add Python 2.7 support to get_package_version caching * [Mega-Linter] Apply linters fixes * Bump tests --------- Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com> Co-authored-by: TimPansino * Fix Redis Generator Methods (#947) * Fix scan_iter for redis * Replace generator methods * Update instance info instrumentation * Remove mistake from uninstrumented methods * Add skip condition to asyncio generator tests * Add skip condition to asyncio generator tests --------- Co-authored-by: Lalleh Rafeei Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * TEMP * Automatic RPM System Updates (#948) * Checkout old action * Adding RPM action * Add dry run * Incorporating action into workflow * Wire secret into custom action * Enable action * Correct action name * Fix syntax * Fix quoting issues * Drop pre-verification. Does not work on python * Fix merge artifact * Bedrock titan extraction nearly complete * Cleaning up titan bedrock implementation * TEMP * Tests for bedrock passing Co-authored-by: Lalleh Rafeei * Cleaned up titan testing Co-authored-by: Lalleh Rafeei Co-authored-by: Hannah Stepanek * Parametrized bedrock testing * Add support for AI21-J2 models * Change to dynamic no conversation id events * Drop all openai refs * [Mega-Linter] Apply linters fixes * Adding response_id and response_model * Drop python 3.7 tests for Hypercorn (#954) * Apply suggestions from code review * Remove unused import --------- Co-authored-by: Uma Annamalai Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com> Co-authored-by: TimPansino Co-authored-by: Lalleh Rafeei Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Lalleh Rafeei Co-authored-by: Hannah Stepanek Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> * Initial feedback commit for botocore * Feature bedrock cohere instrumentation (#955) * Add AWS Bedrock testing infrastructure * Squashed commit of the following: commit 2834663794c649124052e510c1c9557a830c060a Author: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Mon Oct 9 17:42:05 2023 -0700 OpenAI Mock Backend (#929) * Add mock external openai server * Add mocked OpenAI server fixtures * Set up recorded responses. * Clean mock server to depend on http server * Linting * Pin flask version for flask restx tests. (#931) * Ignore new redis methods. (#932) Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> * Remove approved paths * Update CI Image (#930) * Update available python versions in CI * Update makefile with overrides * Fix default branch detection for arm builds --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Add mocking for embedding endpoint * [Mega-Linter] Apply linters fixes * Add ratelimit headers * [Mega-Linter] Apply linters fixes * Only get package version once (#928) * Only get package version once * Add disconnect method * Add disconnect method --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Add datalib dependency for embedding testing. * Add OpenAI Test Infrastructure (#926) * Add openai to tox * Add OpenAI test files. * Add test functions. * [Mega-Linter] Apply linters fixes --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] * Add mock external openai server * Add mocked OpenAI server fixtures * Set up recorded responses. * Clean mock server to depend on http server * Linting * Remove approved paths * Add mocking for embedding endpoint * [Mega-Linter] Apply linters fixes * Add ratelimit headers * [Mega-Linter] Apply linters fixes * Add datalib dependency for embedding testing. --------- Co-authored-by: Uma Annamalai Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: TimPansino Co-authored-by: Hannah Stepanek Co-authored-by: mergify[bot] commit db63d4598c94048986c0e00ebb2cd8827100b54c Author: Uma Annamalai Date: Mon Oct 2 15:31:38 2023 -0700 Add OpenAI Test Infrastructure (#926) * Add openai to tox * Add OpenAI test files. * Add test functions. * [Mega-Linter] Apply linters fixes --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] * Squashed commit of the following: commit 182c7a8c8a91e2d0f234f7ed7d4a14a2422c8342 Author: Uma Annamalai Date: Fri Oct 13 10:12:55 2023 -0700 Add request/ response IDs. commit f6d13f822c22d2039ec32be86b2c54f9dc3de1c9 Author: Uma Annamalai Date: Thu Oct 12 13:23:39 2023 -0700 Test cleanup. commit d0576631d009e481bd5887a3243aac99b097d823 Author: Uma Annamalai Date: Tue Oct 10 10:23:00 2023 -0700 Remove commented code. commit dd29433e719482babbe5c724e7330b1f6324abd7 Author: Uma Annamalai Date: Tue Oct 10 10:19:01 2023 -0700 Add openai sync instrumentation. commit 2834663794c649124052e510c1c9557a830c060a Author: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Mon Oct 9 17:42:05 2023 -0700 OpenAI Mock Backend (#929) * Add mock external openai server * Add mocked OpenAI server fixtures * Set up recorded responses. * Clean mock server to depend on http server * Linting * Pin flask version for flask restx tests. (#931) * Ignore new redis methods. (#932) Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> * Remove approved paths * Update CI Image (#930) * Update available python versions in CI * Update makefile with overrides * Fix default branch detection for arm builds --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Add mocking for embedding endpoint * [Mega-Linter] Apply linters fixes * Add ratelimit headers * [Mega-Linter] Apply linters fixes * Only get package version once (#928) * Only get package version once * Add disconnect method * Add disconnect method --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Add datalib dependency for embedding testing. * Add OpenAI Test Infrastructure (#926) * Add openai to tox * Add OpenAI test files. * Add test functions. * [Mega-Linter] Apply linters fixes --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] * Add mock external openai server * Add mocked OpenAI server fixtures * Set up recorded responses. * Clean mock server to depend on http server * Linting * Remove approved paths * Add mocking for embedding endpoint * [Mega-Linter] Apply linters fixes * Add ratelimit headers * [Mega-Linter] Apply linters fixes * Add datalib dependency for embedding testing. --------- Co-authored-by: Uma Annamalai Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: TimPansino Co-authored-by: Hannah Stepanek Co-authored-by: mergify[bot] commit db63d4598c94048986c0e00ebb2cd8827100b54c Author: Uma Annamalai Date: Mon Oct 2 15:31:38 2023 -0700 Add OpenAI Test Infrastructure (#926) * Add openai to tox * Add OpenAI test files. * Add test functions. * [Mega-Linter] Apply linters fixes --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] * TEMP * Bedrock titan extraction nearly complete * Cleaning up titan bedrock implementation * TEMP * Tests for bedrock passing Co-authored-by: Lalleh Rafeei * Cleaned up titan testing Co-authored-by: Lalleh Rafeei Co-authored-by: Hannah Stepanek * Parametrized bedrock testing * Add support for AI21-J2 models * Change to dynamic no conversation id events * Add cohere model * Remove openai instrumentation from this branch * Remove OpenAI from newrelic/config.py --------- Co-authored-by: Uma Annamalai Co-authored-by: Tim Pansino Co-authored-by: Lalleh Rafeei Co-authored-by: Hannah Stepanek * Bedrock feedback w/ testing for titan and jurassic models * AWS Bedrock Embedding Instrumentation (#957) * AWS Bedrock embedding instrumentation * Correct symbol name * Add support for bedrock claude (#960) Co-authored-by: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> * Fix merge conflicts * Combine Botocore Tests (#959) * Initial file migration * Enable DT on all span tests * Add pytest skip for older botocore versions * Fixup: app name merge conflict --------- Co-authored-by: Hannah Stepanek * Add to and move feedback tests * Handle 0.32.0.post1 version in tests (#963) * Remove response_id dependency in bedrock * Change API name * Update moto * Bedrock Error Tracing (#966) * Cache Package Version Lookups (#946) * Cache _get_package_version * Add Python 2.7 support to get_package_version caching * [Mega-Linter] Apply linters fixes * Bump tests --------- Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com> Co-authored-by: TimPansino * Fix Redis Generator Methods (#947) * Fix scan_iter for redis * Replace generator methods * Update instance info instrumentation * Remove mistake from uninstrumented methods * Add skip condition to asyncio generator tests * Add skip condition to asyncio generator tests --------- Co-authored-by: Lalleh Rafeei Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Automatic RPM System Updates (#948) * Checkout old action * Adding RPM action * Add dry run * Incorporating action into workflow * Wire secret into custom action * Enable action * Correct action name * Fix syntax * Fix quoting issues * Drop pre-verification. Does not work on python * Fix merge artifact * Drop python 3.7 tests for Hypercorn (#954) * Fix pyenv installation for devcontainer (#936) Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Remove duplicate kafka import hook (#956) Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Initial bedrock error tracing commit * Handle 0.32.0.post1 version in tests (#963) * Add status code to mock bedrock server * Updating error response recording logic * Work on bedrock errror tracing * Chat completion error tracing * Adding embedding error tracing * Delete comment * Update moto --------- Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com> Co-authored-by: TimPansino Co-authored-by: Lalleh Rafeei Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> Co-authored-by: Hannah Stepanek * Change ids to match other tests * move message_ids declaration outside for loop * Add comment to tox.ini * Drop py27 from memcache testing. * Drop pypy27 from memcache testing. * Update flaskrestx testing #1004 * Remove tastypie 0.14.3 testing * Remove tastypie 0.14.3 testing * Remove python 3.12 support (for now) * Remove untouched files from diff list --------- Co-authored-by: Uma Annamalai Co-authored-by: Tim Pansino Co-authored-by: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com> Co-authored-by: TimPansino Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Lalleh Rafeei Co-authored-by: Hannah Stepanek Co-authored-by: Hannah Stepanek --- newrelic/api/ml_model.py | 9 +- newrelic/hooks/external_botocore.py | 9 +- .../_test_bedrock_chat_completion.py | 87 +++++++++++++++++++ .../_test_bedrock_embeddings.py | 18 +++- .../test_bedrock_embeddings.py | 2 +- tox.ini | 1 + 6 files changed, 119 insertions(+), 7 deletions(-) diff --git a/newrelic/api/ml_model.py b/newrelic/api/ml_model.py index 3d15cf8d3..03408253b 100644 --- a/newrelic/api/ml_model.py +++ b/newrelic/api/ml_model.py @@ -40,12 +40,15 @@ def wrap_mlmodel(model, name=None, version=None, feature_names=None, label_names def get_llm_message_ids(response_id=None): transaction = current_transaction() - if response_id and transaction: + if transaction: nr_message_ids = getattr(transaction, "_nr_message_ids", {}) - message_id_info = nr_message_ids.pop(response_id, ()) + message_id_info = ( + nr_message_ids.pop("bedrock_key", ()) if not response_id else nr_message_ids.pop(response_id, ()) + ) if not message_id_info: - warnings.warn("No message ids found for %s" % response_id) + response_id_warning = "." if not response_id else " for %s." % response_id + warnings.warn("No message ids found%s" % response_id_warning) return [] conversation_id, request_id, ids = message_id_info diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index 6e3be661b..69a2fd936 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -97,6 +97,7 @@ def create_chat_completion_message_event( if not transaction: return + message_ids = [] for index, message in enumerate(input_message_list): if response_id: id_ = "%s-%d" % (response_id, index) # Response ID was set, append message index to it. @@ -128,6 +129,7 @@ def create_chat_completion_message_event( id_ = "%s-%d" % (response_id, index) # Response ID was set, append message index to it. else: id_ = str(uuid.uuid4()) # No response IDs, use random UUID + message_ids.append(id_) chat_completion_message_dict = { "id": id_, @@ -147,6 +149,7 @@ def create_chat_completion_message_event( "is_response": True, } transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict) + return (conversation_id, request_id, message_ids) def extract_bedrock_titan_text_model(request_body, response_body=None): @@ -577,7 +580,7 @@ def handle_chat_completion_event( transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) - create_chat_completion_message_event( + message_ids = create_chat_completion_message_event( transaction=transaction, app_name=settings.app_name, input_message_list=input_message_list, @@ -591,6 +594,10 @@ def handle_chat_completion_event( response_id=response_id, ) + if not hasattr(transaction, "_nr_message_ids"): + transaction._nr_message_ids = {} + transaction._nr_message_ids["bedrock_key"] = message_ids + CUSTOM_TRACE_POINTS = { ("sns", "publish"): message_trace("SNS", "Produce", "Topic", extract(("TopicArn", "TargetArn"), "PhoneNumber")), diff --git a/tests/external_botocore/_test_bedrock_chat_completion.py b/tests/external_botocore/_test_bedrock_chat_completion.py index f1d21c73c..652027719 100644 --- a/tests/external_botocore/_test_bedrock_chat_completion.py +++ b/tests/external_botocore/_test_bedrock_chat_completion.py @@ -1,3 +1,17 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + chat_completion_payload_templates = { "amazon.titan-text-express-v1": '{ "inputText": "%s", "textGenerationConfig": {"temperature": %f, "maxTokenCount": %d }}', "ai21.j2-mid-v1": '{"prompt": "%s", "temperature": %f, "maxTokens": %d}', @@ -6,6 +20,79 @@ "meta.llama2-13b-chat-v1": '{"prompt": "%s", "temperature": %f, "max_gen_len": %d}', } +chat_completion_get_llm_message_ids = { + "amazon.titan-text-express-v1": { + "bedrock_key": [ + { + "conversation_id": "my-awesome-id", + "request_id": "03524118-8d77-430f-9e08-63b5c03a40cf", + "message_id": None, # UUID that varies with each run + }, + { + "conversation_id": "my-awesome-id", + "request_id": "03524118-8d77-430f-9e08-63b5c03a40cf", + "message_id": None, # UUID that varies with each run + }, + ] + }, + "ai21.j2-mid-v1": { + "bedrock_key": [ + { + "conversation_id": "my-awesome-id", + "request_id": "c863d9fc-888b-421c-a175-ac5256baec62", + "message_id": "1234-0", + }, + { + "conversation_id": "my-awesome-id", + "request_id": "c863d9fc-888b-421c-a175-ac5256baec62", + "message_id": "1234-1", + }, + ] + }, + "anthropic.claude-instant-v1": { + "bedrock_key": [ + { + "conversation_id": "my-awesome-id", + "request_id": "7b0b37c6-85fb-4664-8f5b-361ca7b1aa18", + "message_id": None, # UUID that varies with each run + }, + { + "conversation_id": "my-awesome-id", + "request_id": "7b0b37c6-85fb-4664-8f5b-361ca7b1aa18", + "message_id": None, # UUID that varies with each run + }, + ] + }, + "cohere.command-text-v14": { + "bedrock_key": [ + { + "conversation_id": "my-awesome-id", + "request_id": "e77422c8-fbbf-4e17-afeb-c758425c9f97", + "message_id": "e77422c8-fbbf-4e17-afeb-c758425c9f97-0", + }, + { + "conversation_id": "my-awesome-id", + "request_id": "e77422c8-fbbf-4e17-afeb-c758425c9f97", + "message_id": "e77422c8-fbbf-4e17-afeb-c758425c9f97-1", + }, + ] + }, + "meta.llama2-13b-chat-v1": { + "bedrock_key": [ + { + "conversation_id": "my-awesome-id", + "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a", + "message_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a-0", + }, + { + "conversation_id": "my-awesome-id", + "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a", + "message_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a-1", + }, + ] + }, +} + chat_completion_expected_events = { "amazon.titan-text-express-v1": [ ( diff --git a/tests/external_botocore/_test_bedrock_embeddings.py b/tests/external_botocore/_test_bedrock_embeddings.py index ec677b426..05c8a390c 100644 --- a/tests/external_botocore/_test_bedrock_embeddings.py +++ b/tests/external_botocore/_test_bedrock_embeddings.py @@ -1,3 +1,17 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + embedding_payload_templates = { "amazon.titan-embed-text-v1": '{ "inputText": "%s" }', "amazon.titan-embed-g1-text-02": '{ "inputText": "%s" }', @@ -68,7 +82,7 @@ "request_id": "", "vendor": "bedrock", "ingest_source": "Python", - "error": True + "error": True, }, ), ], @@ -89,7 +103,7 @@ "request_id": "", "vendor": "bedrock", "ingest_source": "Python", - "error": True + "error": True, }, ), ], diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py index cc442fc15..9fc016471 100644 --- a/tests/external_botocore/test_bedrock_embeddings.py +++ b/tests/external_botocore/test_bedrock_embeddings.py @@ -19,8 +19,8 @@ import pytest from _test_bedrock_embeddings import ( embedding_expected_client_errors, - embedding_expected_events, embedding_expected_error_events, + embedding_expected_events, embedding_payload_templates, ) from conftest import BOTOCORE_VERSION diff --git a/tox.ini b/tox.ini index 969f98548..878ef4ac4 100644 --- a/tox.ini +++ b/tox.ini @@ -207,6 +207,7 @@ deps = component_flask_rest: flask-restful component_flask_rest: jinja2 component_flask_rest: itsdangerous + component_flask_rest-flaskrestxlatest: flask component_flask_rest-flaskrestxlatest: flask-restx component_flask_rest-flaskrestxlatest: flask ; flask-restx only supports Flask v3 after flask-restx v1.3.0 From abf31c5ebc9fe12e0f706715849d95f4ccd1a46b Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Tue, 16 Jan 2024 20:59:20 -0800 Subject: [PATCH 05/19] Fix instrumentation for openai 1.8.0 --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 878ef4ac4..c58e8d0e3 100644 --- a/tox.ini +++ b/tox.ini @@ -402,7 +402,7 @@ commands = allowlist_externals={toxinidir}/.github/scripts/* install_command= - {toxinidir}/.github/scripts/retry.sh 3 pip install {opts} {packages} + pip install {opts} {packages} extras = agent_streaming: infinite-tracing From ef2cfeb423ccbc8dd17e0af8aaa102f8a7094f42 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Wed, 31 Jan 2024 11:31:41 -0800 Subject: [PATCH 06/19] Add LLM attribute to transactions. (#1050) * Add LLM attr to transactions. * Remove newlines. * Add llm attribute to transaction event default attrs list. * Linting. --- newrelic/core/attribute.py | 1 + newrelic/hooks/external_botocore.py | 1 + newrelic/hooks/mlmodel_openai.py | 4 ++++ tests/external_botocore/test_bedrock_chat_completion.py | 2 ++ tests/external_botocore/test_bedrock_embeddings.py | 2 ++ tests/mlmodel_openai/test_chat_completion.py | 3 +++ tests/mlmodel_openai/test_chat_completion_v1.py | 3 +++ tests/mlmodel_openai/test_embeddings.py | 3 +++ tests/mlmodel_openai/test_embeddings_v1.py | 3 +++ 9 files changed, 22 insertions(+) diff --git a/newrelic/core/attribute.py b/newrelic/core/attribute.py index 880597a05..ea2023764 100644 --- a/newrelic/core/attribute.py +++ b/newrelic/core/attribute.py @@ -71,6 +71,7 @@ "host.displayName", "http.statusCode", "http.url", + "llm", "message.queueName", "message.routingKey", "peer.address", diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index 69a2fd936..821af702e 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -351,6 +351,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs): return wrapped(*args, **kwargs) transaction.add_ml_model_info("Bedrock", BOTOCORE_VERSION) + transaction._add_agent_attribute("llm", True) # Read and replace request file stream bodies request_body = kwargs["body"] diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index 7b0ecbe4f..0741aaaea 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -35,6 +35,7 @@ def wrap_embedding_sync(wrapped, instance, args, kwargs): # Framework metric also used for entity tagging in the UI transaction.add_ml_model_info("OpenAI", OPENAI_VERSION) + transaction._add_agent_attribute("llm", True) # Obtain attributes to be stored on embedding events regardless of whether we hit an error embedding_id = str(uuid.uuid4()) @@ -181,6 +182,7 @@ def wrap_chat_completion_sync(wrapped, instance, args, kwargs): # Framework metric also used for entity tagging in the UI transaction.add_ml_model_info("OpenAI", OPENAI_VERSION) + transaction._add_agent_attribute("llm", True) request_message_list = kwargs.get("messages", []) @@ -496,6 +498,7 @@ async def wrap_embedding_async(wrapped, instance, args, kwargs): # Framework metric also used for entity tagging in the UI transaction.add_ml_model_info("OpenAI", OPENAI_VERSION) + transaction._add_agent_attribute("llm", True) # Obtain attributes to be stored on embedding events regardless of whether we hit an error embedding_id = str(uuid.uuid4()) @@ -642,6 +645,7 @@ async def wrap_chat_completion_async(wrapped, instance, args, kwargs): # Framework metric also used for entity tagging in the UI transaction.add_ml_model_info("OpenAI", OPENAI_VERSION) + transaction._add_agent_attribute("llm", True) request_message_list = kwargs.get("messages", []) diff --git a/tests/external_botocore/test_bedrock_chat_completion.py b/tests/external_botocore/test_bedrock_chat_completion.py index c5c2a4706..08b26b306 100644 --- a/tests/external_botocore/test_bedrock_chat_completion.py +++ b/tests/external_botocore/test_bedrock_chat_completion.py @@ -30,6 +30,7 @@ override_application_settings, reset_core_stats_engine, validate_custom_event_count, + validate_attributes, ) from testing_support.validators.validate_custom_events import validate_custom_events from testing_support.validators.validate_error_trace_attributes import ( @@ -125,6 +126,7 @@ def test_bedrock_chat_completion_in_txn_with_convo_id(set_trace_info, exercise_m ], background_task=True, ) + @validate_attributes("agent", ["llm"]) @background_task(name="test_bedrock_chat_completion_in_txn_with_convo_id") def _test(): set_trace_info() diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py index 9fc016471..9dac7d3b5 100644 --- a/tests/external_botocore/test_bedrock_embeddings.py +++ b/tests/external_botocore/test_bedrock_embeddings.py @@ -29,6 +29,7 @@ override_application_settings, reset_core_stats_engine, validate_custom_event_count, + validate_attributes, ) from testing_support.validators.validate_custom_events import validate_custom_events from testing_support.validators.validate_error_trace_attributes import ( @@ -109,6 +110,7 @@ def test_bedrock_embedding(set_trace_info, exercise_model, expected_events): ], background_task=True, ) + @validate_attributes("agent", ["llm"]) @background_task(name="test_bedrock_embedding") def _test(): set_trace_info() diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index 76017a22a..5681dbb57 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -17,6 +17,7 @@ override_application_settings, reset_core_stats_engine, validate_custom_event_count, + validate_attributes, ) from testing_support.validators.validate_custom_events import validate_custom_events from testing_support.validators.validate_transaction_metrics import ( @@ -143,6 +144,7 @@ ], background_task=True, ) +@validate_attributes("agent", ["llm"]) @background_task() def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info): set_trace_info() @@ -332,6 +334,7 @@ def test_openai_chat_completion_async_conversation_id_unset(loop, set_trace_info ], background_task=True, ) +@validate_attributes("agent", ["llm"]) @background_task() def test_openai_chat_completion_async_conversation_id_set(loop, set_trace_info): set_trace_info() diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py index b1b35826c..1084fef3d 100644 --- a/tests/mlmodel_openai/test_chat_completion_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_v1.py @@ -17,6 +17,7 @@ override_application_settings, reset_core_stats_engine, validate_custom_event_count, + validate_attributes, ) from testing_support.validators.validate_custom_events import validate_custom_events from testing_support.validators.validate_transaction_metrics import ( @@ -143,6 +144,7 @@ ], background_task=True, ) +@validate_attributes("agent", ["llm"]) @background_task() def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info, sync_openai_client): set_trace_info() @@ -332,6 +334,7 @@ def test_openai_chat_completion_async_conversation_id_unset(loop, set_trace_info ], background_task=True, ) +@validate_attributes("agent", ["llm"]) @background_task() def test_openai_chat_completion_async_conversation_id_set(loop, set_trace_info, async_openai_client): set_trace_info() diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py index 65ac33e87..287ba3fab 100644 --- a/tests/mlmodel_openai/test_embeddings.py +++ b/tests/mlmodel_openai/test_embeddings.py @@ -17,6 +17,7 @@ override_application_settings, reset_core_stats_engine, validate_custom_event_count, + validate_attributes, ) from testing_support.validators.validate_custom_events import validate_custom_events from testing_support.validators.validate_transaction_metrics import ( @@ -72,6 +73,7 @@ ], background_task=True, ) +@validate_attributes("agent", ["llm"]) @background_task() def test_openai_embedding_sync(set_trace_info): set_trace_info() @@ -114,6 +116,7 @@ def test_openai_embedding_sync_disabled_settings(set_trace_info): ], background_task=True, ) +@validate_attributes("agent", ["llm"]) @background_task() def test_openai_embedding_async(loop, set_trace_info): set_trace_info() diff --git a/tests/mlmodel_openai/test_embeddings_v1.py b/tests/mlmodel_openai/test_embeddings_v1.py index 9bf91967a..223cbfbee 100644 --- a/tests/mlmodel_openai/test_embeddings_v1.py +++ b/tests/mlmodel_openai/test_embeddings_v1.py @@ -17,6 +17,7 @@ override_application_settings, reset_core_stats_engine, validate_custom_event_count, + validate_attributes, ) from testing_support.validators.validate_custom_events import validate_custom_events from testing_support.validators.validate_transaction_metrics import ( @@ -72,6 +73,7 @@ ], background_task=True, ) +@validate_attributes("agent", ["llm"]) @background_task() def test_openai_embedding_sync(set_trace_info, sync_openai_client): set_trace_info() @@ -114,6 +116,7 @@ def test_openai_embedding_sync_disabled_settings(set_trace_info, sync_openai_cli ], background_task=True, ) +@validate_attributes("agent", ["llm"]) @background_task() def test_openai_embedding_async(loop, set_trace_info, async_openai_client): set_trace_info() From 505008909d313c9e018f39f2665c262e1461a84d Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Wed, 31 Jan 2024 11:31:47 -0800 Subject: [PATCH 07/19] Remove imports for moto on py37. (#1053) --- tests/external_botocore/test_botocore_sqs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/external_botocore/test_botocore_sqs.py b/tests/external_botocore/test_botocore_sqs.py index c9d8585fd..17ed07fd2 100644 --- a/tests/external_botocore/test_botocore_sqs.py +++ b/tests/external_botocore/test_botocore_sqs.py @@ -31,7 +31,6 @@ BOTOCORE_VERSION = get_package_version_tuple("botocore") url = "sqs.us-east-1.amazonaws.com" - if BOTOCORE_VERSION < (1, 29, 0): url = "queue.amazonaws.com" From e2cd6ebc5c9f3c679d282f4b79e4cfadad4816b5 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Tue, 30 Jan 2024 12:12:55 -0800 Subject: [PATCH 08/19] Update botocore tests. (#1051) * Update moto decorators in tests. * Remove py27 botocore dependencies. * Drop testing for Python 3.7 --- tests/external_botocore/test_boto3_s3.py | 1 - tests/external_botocore/test_boto3_sns.py | 1 - tests/external_botocore/test_botocore_dynamodb.py | 1 - tests/external_botocore/test_botocore_ec2.py | 1 - 4 files changed, 4 deletions(-) diff --git a/tests/external_botocore/test_boto3_s3.py b/tests/external_botocore/test_boto3_s3.py index 1d91c4636..b6347e322 100644 --- a/tests/external_botocore/test_boto3_s3.py +++ b/tests/external_botocore/test_boto3_s3.py @@ -28,7 +28,6 @@ from newrelic.common.package_version_utils import get_package_version_tuple MOTO_VERSION = get_package_version_tuple("moto") - AWS_ACCESS_KEY_ID = "AAAAAAAAAAAACCESSKEY" AWS_SECRET_ACCESS_KEY = "AAAAAASECRETKEY" # nosec AWS_REGION_NAME = "us-west-2" diff --git a/tests/external_botocore/test_boto3_sns.py b/tests/external_botocore/test_boto3_sns.py index a1ffc1331..baa0687bc 100644 --- a/tests/external_botocore/test_boto3_sns.py +++ b/tests/external_botocore/test_boto3_sns.py @@ -30,7 +30,6 @@ from newrelic.common.package_version_utils import get_package_version_tuple MOTO_VERSION = get_package_version_tuple("moto") - AWS_ACCESS_KEY_ID = "AAAAAAAAAAAACCESSKEY" AWS_SECRET_ACCESS_KEY = "AAAAAASECRETKEY" # nosec (This is fine for testing purposes) AWS_REGION_NAME = "us-east-1" diff --git a/tests/external_botocore/test_botocore_dynamodb.py b/tests/external_botocore/test_botocore_dynamodb.py index 539993206..28dbd8ac1 100644 --- a/tests/external_botocore/test_botocore_dynamodb.py +++ b/tests/external_botocore/test_botocore_dynamodb.py @@ -30,7 +30,6 @@ from newrelic.common.package_version_utils import get_package_version_tuple MOTO_VERSION = get_package_version_tuple("moto") - AWS_ACCESS_KEY_ID = "AAAAAAAAAAAACCESSKEY" AWS_SECRET_ACCESS_KEY = "AAAAAASECRETKEY" # nosec (This is fine for testing purposes) AWS_REGION = "us-east-1" diff --git a/tests/external_botocore/test_botocore_ec2.py b/tests/external_botocore/test_botocore_ec2.py index cfbf48e3b..84c7e9e65 100644 --- a/tests/external_botocore/test_botocore_ec2.py +++ b/tests/external_botocore/test_botocore_ec2.py @@ -30,7 +30,6 @@ from newrelic.common.package_version_utils import get_package_version_tuple MOTO_VERSION = get_package_version_tuple("moto") - AWS_ACCESS_KEY_ID = "AAAAAAAAAAAACCESSKEY" AWS_SECRET_ACCESS_KEY = "AAAAAASECRETKEY" # nosec (This is fine for testing purposes) AWS_REGION = "us-east-1" From bcb7dc573ef5b82ee249ab2b32bdb86df48e2960 Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Tue, 24 Oct 2023 10:08:01 -0700 Subject: [PATCH 09/19] Add support for streaming in openai --- newrelic/config.py | 6 + newrelic/hooks/mlmodel_openai.py | 353 ++++++++++++---- .../_mock_external_openai_server.py | 266 +++++++++++- tests/mlmodel_openai/conftest.py | 94 ++++- tests/mlmodel_openai/test_chat_completion.py | 2 +- .../test_chat_completion_stream.py | 389 ++++++++++++++++++ 6 files changed, 1012 insertions(+), 98 deletions(-) create mode 100644 tests/mlmodel_openai/test_chat_completion_stream.py diff --git a/newrelic/config.py b/newrelic/config.py index 6ea18983c..dd241d193 100644 --- a/newrelic/config.py +++ b/newrelic/config.py @@ -2061,6 +2061,12 @@ def _process_module_builtin_defaults(): "newrelic.hooks.mlmodel_openai", "instrument_openai_util", ) + _process_module_definition( + "openai.api_resources.abstract.engine_api_resource", + "newrelic.hooks.mlmodel_openai", + "instrument_openai_api_resources_abstract_engine_api_resource", + ) + _process_module_definition( "openai.resources.chat.completions", "newrelic.hooks.mlmodel_openai", diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index 0741aaaea..e2f5c61f2 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import uuid import openai @@ -19,8 +20,9 @@ from newrelic.api.function_trace import FunctionTrace from newrelic.api.time_trace import get_trace_linking_metadata from newrelic.api.transaction import current_transaction -from newrelic.common.object_wrapper import wrap_function_wrapper +from newrelic.common.object_wrapper import ObjectProxy, wrap_function_wrapper from newrelic.common.package_version_utils import get_package_version +from newrelic.common.signature import bind_args from newrelic.core.config import global_settings OPENAI_VERSION = get_package_version("openai") @@ -203,91 +205,107 @@ def wrap_chat_completion_sync(wrapped, instance, args, kwargs): function_name = wrapped.__name__ - with FunctionTrace(name=function_name, group="Llm/completion/OpenAI") as ft: - # Get trace information - available_metadata = get_trace_linking_metadata() - span_id = available_metadata.get("span.id", "") - trace_id = available_metadata.get("trace.id", "") - - try: - return_val = wrapped(*args, **kwargs) - if kwargs.get("stream", False): - return return_val - except Exception as exc: - if kwargs.get("stream", False): - raise - if OPENAI_V1: - response = getattr(exc, "response", "") - response_headers = getattr(response, "headers", "") - exc_organization = response_headers.get("openai-organization", "") if response_headers else "" - # There appears to be a bug here in openai v1 where despite having code, - # param, etc in the error response, they are not populated on the exception - # object so grab them from the response body object instead. - body = getattr(exc, "body", {}) or {} - notice_error_attributes = { - "http.statusCode": getattr(exc, "status_code", "") or "", - "error.message": body.get("message", "") or "", - "error.code": body.get("code", "") or "", - "error.param": body.get("param", "") or "", - "completion_id": completion_id, - } - else: - exc_organization = getattr(exc, "organization", "") - notice_error_attributes = { - "http.statusCode": getattr(exc, "http_status", ""), - "error.message": getattr(exc, "_message", ""), - "error.code": getattr(getattr(exc, "error", ""), "code", ""), - "error.param": getattr(exc, "param", ""), - "completion_id": completion_id, - } - # Override the default message if it is not empty. - message = notice_error_attributes.pop("error.message") - if message: - exc._nr_message = message - - ft.notice_error( - attributes=notice_error_attributes, - ) - # Gather attributes to add to chat completion summary event in error context - error_chat_completion_dict = { - "id": completion_id, - "appName": app_name, - "conversation_id": conversation_id, - "api_key_last_four_digits": api_key_last_four_digits, - "span_id": span_id, - "trace_id": trace_id, - "transaction_id": transaction.guid, - "response.number_of_messages": len(request_message_list), - "request.model": kwargs.get("model") or kwargs.get("engine") or "", - "request.temperature": kwargs.get("temperature", ""), - "request.max_tokens": kwargs.get("max_tokens", ""), - "vendor": "openAI", - "ingest_source": "Python", - "response.organization": "" if exc_organization is None else exc_organization, - "duration": ft.duration, - "error": True, + ft = FunctionTrace(name=function_name, group="Llm/completion/OpenAI") + ft.__enter__() + # Get trace information + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id", "") + trace_id = available_metadata.get("trace.id", "") + try: + return_val = wrapped(*args, **kwargs) + return_val._nr_ft = ft + except Exception as exc: + if OPENAI_V1: + response = getattr(exc, "response", "") + response_headers = getattr(response, "headers", "") + exc_organization = response_headers.get("openai-organization", "") if response_headers else "" + # There appears to be a bug here in openai v1 where despite having code, + # param, etc in the error response, they are not populated on the exception + # object so grab them from the response body object instead. + body = getattr(exc, "body", {}) or {} + notice_error_attributes = { + "http.statusCode": getattr(exc, "status_code", "") or "", + "error.message": body.get("message", "") or "", + "error.code": body.get("code", "") or "", + "error.param": body.get("param", "") or "", + "completion_id": completion_id, } - transaction.record_custom_event("LlmChatCompletionSummary", error_chat_completion_dict) - - create_chat_completion_message_event( - transaction, - app_name, - request_message_list, - completion_id, - span_id, - trace_id, - "", - None, - "", - conversation_id, - None, - ) - - raise + else: + exc_organization = getattr(exc, "organization", "") + notice_error_attributes = { + "http.statusCode": getattr(exc, "http_status", ""), + "error.message": getattr(exc, "_message", ""), + "error.code": getattr(getattr(exc, "error", ""), "code", ""), + "error.param": getattr(exc, "param", ""), + "completion_id": completion_id, + } + # Override the default message if it is not empty. + message = notice_error_attributes.pop("error.message") + if message: + exc._nr_message = message + + ft.notice_error( + attributes=notice_error_attributes, + ) + # Gather attributes to add to embedding summary event in error context + error_chat_completion_dict = { + "id": completion_id, + "appName": app_name, + "conversation_id": conversation_id, + "api_key_last_four_digits": api_key_last_four_digits, + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction.guid, + "response.number_of_messages": len(request_message_list), + "request.model": kwargs.get("model") or kwargs.get("engine") or "", + "request.temperature": kwargs.get("temperature", ""), + "request.max_tokens": kwargs.get("max_tokens", ""), + "vendor": "openAI", + "ingest_source": "Python", + "response.organization": "" if exc_organization is None else exc_organization, + "duration": ft.duration, + "error": True, + } + transaction.record_custom_event("LlmChatCompletionSummary", error_chat_completion_dict) + + create_chat_completion_message_event( + transaction, + app_name, + request_message_list, + completion_id, + span_id, + trace_id, + "", + None, + "", + conversation_id, + None, + ) + + ft.__exit__(*sys.exc_info()) + raise + + stream = kwargs.get("stream", False) + # If response is not a stream generator, we exit the function trace now. + if not stream: + ft.__exit__(None, None, None) if not return_val: return return_val + if stream: + # The function trace will be exited when in the final iteration of the response + # generator. + setattr(return_val, "_nr_ft", ft) + setattr(return_val, "_nr_openai_attrs", getattr(return_val, "_nr_openai_attrs", {})) + return_val._nr_openai_attrs["messages"] = kwargs.get("messages", []) + return_val._nr_openai_attrs["temperature"] = kwargs.get("temperature", "") + return_val._nr_openai_attrs["max_tokens"] = kwargs.get("max_tokens", "") + return_val._nr_openai_attrs["request.model"] = kwargs.get("model") or kwargs.get("engine") or "" + return_val._nr_openai_attrs["api_key_last_four_digits"] = api_key_last_four_digits + return return_val + + # If response is not a stream generator, record the event data. # At this point, we have a response so we can grab attributes only available on the response object response_headers = getattr(return_val, "_nr_response_headers", {}) # In v1, response objects are pydantic models so this function call converts the @@ -895,6 +913,174 @@ def instrument_openai_util(module): setattr(module.convert_to_openai_object, "_nr_wrapped", True) +class GeneratorProxy(ObjectProxy): + def __init__(self, wrapped): + super(GeneratorProxy, self).__init__(wrapped) + + def __iter__(self): + return self + + def __next__(self): + transaction = current_transaction() + if not transaction: + return self.__wrapped__.__next__() + + return_val = None + try: + return_val = self.__wrapped__.__next__() + if return_val: + choices = return_val.get("choices", []) + self._nr_openai_attrs["response.model"] = return_val.get("model", "") + self._nr_openai_attrs["id"] = return_val.get("id", "") + self._nr_openai_attrs["response.organization"] = return_val.get("organization", "") + if choices: + delta = choices[0].get("delta", {}) + if delta: + self._nr_openai_attrs["content"] = self._nr_openai_attrs.get("content", "") + delta.get( + "content", "" + ) + self._nr_openai_attrs["role"] = self._nr_openai_attrs.get("role", None) or delta.get("role") + self._nr_openai_attrs["finish_reason"] = choices[0].get("finish_reason", "") + self._nr_openai_attrs["response_headers"] = getattr(return_val, "_nr_response_headers", {}) + except StopIteration as e: + if hasattr(self, "_nr_ft"): + openai_attrs = getattr(self, "_nr_openai_attrs", {}) + self._nr_ft.__exit__(None, None, None) + + # If there are no openai attrs exit early as there's no data to record. + if not openai_attrs: + raise + + message_ids = self.record_streaming_chat_completion_events(transaction) + # Cache message ids on transaction for retrieval after open ai call completion. + if not hasattr(transaction, "_nr_message_ids"): + transaction._nr_message_ids = {} + response_id = openai_attrs.get("response_id", None) + transaction._nr_message_ids[response_id] = message_ids + raise + except Exception as e: + if hasattr(self, "_nr_ft"): + openai_attrs = getattr(self, "_nr_openai_attrs", {}) + self._nr_ft.__exit__(*sys.exc_info()) + + # If there are no openai attrs exit early as there's no data to record. + if not openai_attrs: + raise + + self.record_streaming_chat_completion_events(transaction) + raise + return return_val + + def record_streaming_chat_completion_events(self, transaction): + openai_attrs = getattr(self, "_nr_openai_attrs", {}) + + # If there are no openai attrs exit early as there's no data to record. + if not openai_attrs: + raise + + content = openai_attrs.get("content", None) + role = openai_attrs.get("role") + + custom_attrs_dict = transaction._custom_params + conversation_id = custom_attrs_dict.get("llm.conversation_id", "") + + chat_completion_id = str(uuid.uuid4()) + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id", "") + trace_id = available_metadata.get("trace.id", "") + + response_headers = openai_attrs.get("response_headers", {}) + settings = transaction.settings if transaction.settings is not None else global_settings() + response_id = openai_attrs.get("id", None) + request_id = response_headers.get("x-request-id", "") + organization = response_headers.get("openai-organization", "") + + api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "") + + messages = openai_attrs.get("messages", []) + + chat_completion_summary_dict = { + "id": chat_completion_id, + "appName": settings.app_name, + "conversation_id": conversation_id, + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction.guid, + "request_id": request_id, + "api_key_last_four_digits": api_key_last_four_digits, + "duration": self._nr_ft.duration, + "request.model": openai_attrs.get("request.model", ""), + "response.model": openai_attrs.get("response.model", ""), + "response.organization": organization, + # Usage tokens are not supported in streaming for now. + "request.temperature": openai_attrs.get("temperature", ""), + "request.max_tokens": openai_attrs.get("max_tokens", ""), + "response.choices.finish_reason": openai_attrs.get("finish_reason", ""), + "response.headers.llmVersion": response_headers.get("openai-version", ""), + "response.headers.ratelimitLimitRequests": check_rate_limit_header( + response_headers, "x-ratelimit-limit-requests", True + ), + "response.headers.ratelimitLimitTokens": check_rate_limit_header( + response_headers, "x-ratelimit-limit-tokens", True + ), + "response.headers.ratelimitResetTokens": check_rate_limit_header( + response_headers, "x-ratelimit-reset-tokens", False + ), + "response.headers.ratelimitResetRequests": check_rate_limit_header( + response_headers, "x-ratelimit-reset-requests", False + ), + "response.headers.ratelimitRemainingTokens": check_rate_limit_header( + response_headers, "x-ratelimit-remaining-tokens", True + ), + "response.headers.ratelimitRemainingRequests": check_rate_limit_header( + response_headers, "x-ratelimit-remaining-requests", True + ), + "vendor": "openAI", + "ingest_source": "Python", + "response.number_of_messages": len(messages) + (1 if content else 0), + } + + transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) + + output_message_list = [] + if content: + output_message_list = [{"content": content, "role": role}] + + return create_chat_completion_message_event( + transaction, + settings.app_name, + list(messages), + chat_completion_id, + span_id, + trace_id, + openai_attrs.get("response.model", ""), + response_id, + request_id, + conversation_id, + output_message_list, + ) + + def close(self): + return super(GeneratorProxy, self).close() + + +def wrap_engine_api_resource_create(wrapped, instance, args, kwargs): + transaction = current_transaction() + + if not transaction: + return wrapped(*args, **kwargs) + + bound_args = bind_args(wrapped, args, kwargs) + stream = bound_args["params"].get("stream", False) + + return_val = wrapped(*args, **kwargs) + + if stream: + return GeneratorProxy(return_val) + else: + return return_val + + def instrument_openai_api_resources_embedding(module): if hasattr(module, "Embedding"): if hasattr(module.Embedding, "create"): @@ -942,3 +1128,8 @@ def instrument_openai_base_client(module): wrap_function_wrapper(module, "SyncAPIClient._process_response", wrap_base_client_process_response_sync) if hasattr(module.AsyncAPIClient, "_process_response"): wrap_function_wrapper(module, "AsyncAPIClient._process_response", wrap_base_client_process_response_async) + + +def instrument_openai_api_resources_abstract_engine_api_resource(module): + if hasattr(module.EngineAPIResource, "create"): + wrap_function_wrapper(module, "EngineAPIResource.create", wrap_engine_api_resource_create) diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py index edcfc47f3..a95914329 100644 --- a/tests/mlmodel_openai/_mock_external_openai_server.py +++ b/tests/mlmodel_openai/_mock_external_openai_server.py @@ -30,6 +30,261 @@ # created by an external call. # 3) This app runs on a separate thread meaning it won't block the test app. +STREAMED_RESPONSES = { + "You are a scientist.": [ + { + "Content-Type": "text/event-stream", + "openai-model": "gpt-3.5-turbo-0613", + "openai-organization": "new-relic-nkmd8b", + "openai-processing-ms": "516", + "openai-version": "2020-10-01", + "x-ratelimit-limit-requests": "200", + "x-ratelimit-limit-tokens": "40000", + "x-ratelimit-remaining-requests": "199", + "x-ratelimit-remaining-tokens": "39940", + "x-ratelimit-reset-requests": "7m12s", + "x-ratelimit-reset-tokens": "90ms", + "x-request-id": "49dbbffbd3c3f4612aa48def69059ccd", + }, + 200, + [ + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [ + {"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": None, "finish_reason": None} + ], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " degrees"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " Fahrenheit"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " is"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " equal"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " to"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " "}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "100"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " degrees"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " Celsius"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "."}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {}, "logprobs": None, "finish_reason": "stop"}], + }, + ], + ] +} + +RESPONSES_V1 = { + "You are a scientist.": [ + { + "Content-Type": "text/event-stream", + "openai-model": "gpt-3.5-turbo-0613", + "openai-organization": "foobar-jtbczk", + "openai-processing-ms": "516", + "openai-version": "2020-10-01", + "x-ratelimit-limit-requests": "200", + "x-ratelimit-limit-tokens": "40000", + "x-ratelimit-remaining-requests": "196", + "x-ratelimit-remaining-tokens": "39880", + "x-ratelimit-reset-requests": "23m5.129s", + "x-ratelimit-reset-tokens": "180ms", + "x-request-id": "5c53c9b80af57a1c9b38568f01dcde7f", + }, + 200, + [ + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [ + {"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": None, "finish_reason": None} + ], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " degrees"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " Fahrenheit"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " is"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " equal"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " to"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " "}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "100"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " degrees"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " Celsius"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "."}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {}, "logprobs": None, "finish_reason": "stop"}], + }, + ], + ] +} RESPONSES_V1 = { "You are a scientist.": [ { @@ -304,7 +559,7 @@ def simple_get(openai_version, extract_shortened_prompt): def _simple_get(self): content_len = int(self.headers.get("content-length")) content = json.loads(self.rfile.read(content_len).decode("utf-8")) - + stream = content.get("stream", False) prompt = extract_shortened_prompt(content) if not prompt: self.send_response(500) @@ -316,6 +571,8 @@ def _simple_get(self): if openai_version < (1, 0): mocked_responses = RESPONSES + if stream: + mocked_responses = STREAMED_RESPONSES else: mocked_responses = RESPONSES_V1 @@ -338,7 +595,12 @@ def _simple_get(self): self.end_headers() # Send response body - self.wfile.write(json.dumps(response).encode("utf-8")) + if stream: + for resp in response: + data = json.dumps(resp).encode("utf-8") + self.wfile.write(b"data: %s\n" % data) + else: + self.wfile.write(json.dumps(response).encode("utf-8")) return return _simple_get diff --git a/tests/mlmodel_openai/conftest.py b/tests/mlmodel_openai/conftest.py index 180bec9cc..daa6b4584 100644 --- a/tests/mlmodel_openai/conftest.py +++ b/tests/mlmodel_openai/conftest.py @@ -32,7 +32,7 @@ ) from newrelic.api.transaction import current_transaction -from newrelic.common.object_wrapper import wrap_function_wrapper +from newrelic.common.object_wrapper import ObjectProxy, wrap_function_wrapper _default_settings = { "transaction_tracer.explain_threshold": 0.0, @@ -164,6 +164,11 @@ def openai_server( wrap_function_wrapper( "openai.api_requestor", "APIRequestor._interpret_response", wrap_openai_api_requestor_interpret_response ) + wrap_function_wrapper( + "openai.api_resources.abstract.engine_api_resource", + "EngineAPIResource.create", + wrap_engine_api_resource_create, + ) yield # Run tests else: # Apply function wrappers to record data @@ -247,20 +252,22 @@ def _wrap_openai_api_requestor_request(wrapped, instance, args, kwargs): # Send request result = wrapped(*args, **kwargs) - # Clean up data - data = result[0].data - headers = result[0]._headers - headers = dict( - filter( - lambda k: k[0].lower() in RECORDED_HEADERS - or k[0].lower().startswith("openai") - or k[0].lower().startswith("x-ratelimit"), - headers.items(), + # Append response data to audit log + if not kwargs.get("stream", False): + # Clean up data + data = result[0].data + headers = result[0]._headers + headers = dict( + filter( + lambda k: k[0].lower() in RECORDED_HEADERS + or k[0].lower().startswith("openai") + or k[0].lower().startswith("x-ratelimit"), + headers.items(), + ) ) - ) - - # Log response - OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, 200, data # Append response data to audit log + OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, 200, data + else: + OPENAI_AUDIT_LOG_CONTENTS[prompt] = [None, 200, []] return result return _wrap_openai_api_requestor_request @@ -272,3 +279,62 @@ def bind_request_params(method, url, params=None, *args, **kwargs): def bind_request_interpret_response_params(result, stream): return result.content.decode("utf-8"), result.status_code, result.headers + + +class GeneratorProxy(ObjectProxy): + def __init__(self, wrapped): + super(GeneratorProxy, self).__init__(wrapped) + + def __iter__(self): + return self + + # Make this Proxy a pass through to our instrumentation's proxy by passing along + # get attr and set attr calls to our instrumentation's proxy. + def __getattr__(self, attr): + return self.__wrapped__.__getattr__(attr) + + def __setattr__(self, attr, value): + return self.__wrapped__.__setattr__(attr, value) + + def __next__(self): + transaction = current_transaction() + if not transaction: + return self.__wrapped__.__next__() + + try: + return_val = self.__wrapped__.__next__() + if return_val: + prompt = [k for k in OPENAI_AUDIT_LOG_CONTENTS.keys()][-1] + headers = dict( + filter( + lambda k: k[0].lower() in RECORDED_HEADERS + or k[0].lower().startswith("openai") + or k[0].lower().startswith("x-ratelimit"), + return_val._nr_response_headers.items(), + ) + ) + OPENAI_AUDIT_LOG_CONTENTS[prompt][0] = headers + OPENAI_AUDIT_LOG_CONTENTS[prompt][2].append(return_val.to_dict_recursive()) + return return_val + except Exception as e: + raise + + def close(self): + return super(GeneratorProxy, self).close() + + +def wrap_engine_api_resource_create(wrapped, instance, args, kwargs): + transaction = current_transaction() + + if not transaction: + return wrapped(*args, **kwargs) + + bound_args = bind_args(wrapped, args, kwargs) + stream = bound_args["params"].get("stream", False) + + return_val = wrapped(*args, **kwargs) + + if stream: + return GeneratorProxy(return_val) + else: + return return_val diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index 5681dbb57..bfbdf2013 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -16,8 +16,8 @@ from testing_support.fixtures import ( override_application_settings, reset_core_stats_engine, - validate_custom_event_count, validate_attributes, + validate_custom_event_count, ) from testing_support.validators.validate_custom_events import validate_custom_events from testing_support.validators.validate_transaction_metrics import ( diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py new file mode 100644 index 000000000..62e9e0545 --- /dev/null +++ b/tests/mlmodel_openai/test_chat_completion_stream.py @@ -0,0 +1,389 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import openai +from testing_support.fixtures import ( + override_application_settings, + reset_core_stats_engine, + validate_attributes, + validate_custom_event_count, +) +from testing_support.validators.validate_custom_events import validate_custom_events +from testing_support.validators.validate_transaction_metrics import ( + validate_transaction_metrics, +) + +from newrelic.api.background_task import background_task +from newrelic.api.transaction import add_custom_attribute + +disabled_custom_insights_settings = {"custom_insights_events.enabled": False} + +_test_openai_chat_completion_messages = ( + {"role": "system", "content": "You are a scientist."}, + {"role": "user", "content": "What is 212 degrees Fahrenheit converted to Celsius?"}, +) + +chat_completion_recorded_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "transaction_id": "transaction-id", + "span_id": None, + "trace_id": "trace-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "api_key_last_four_digits": "sk-CRET", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.model": "gpt-3.5-turbo-0613", + "response.organization": "new-relic-nkmd8b", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "stop", + "response.headers.llmVersion": "2020-10-01", + "response.headers.ratelimitLimitRequests": 200, + "response.headers.ratelimitLimitTokens": 40000, + "response.headers.ratelimitResetTokens": "90ms", + "response.headers.ratelimitResetRequests": "7m12s", + "response.headers.ratelimitRemainingTokens": 39940, + "response.headers.ratelimitRemainingRequests": 199, + "vendor": "openAI", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0", + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openAI", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1", + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openAI", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2", + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openAI", + "is_response": True, + "ingest_source": "Python", + }, + ), +] + + +@reset_core_stats_engine() +@validate_custom_events(chat_completion_recorded_events) +# One summary event, one system message, one user message, and one response message from the assistant +@validate_custom_event_count(count=4) +@validate_transaction_metrics( + name="test_chat_completion_stream:test_openai_chat_completion_sync_in_txn_with_convo_id", + custom_metrics=[ + ("Python/ML/OpenAI/%s" % openai.__version__, 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + generator = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + for resp in generator: + assert resp + + +chat_completion_recorded_events_no_convo_id = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "", + "transaction_id": "transaction-id", + "span_id": None, + "trace_id": "trace-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "api_key_last_four_digits": "sk-CRET", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.model": "gpt-3.5-turbo-0613", + "response.organization": "new-relic-nkmd8b", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "stop", + "response.headers.llmVersion": "2020-10-01", + "response.headers.ratelimitLimitRequests": 200, + "response.headers.ratelimitLimitTokens": 40000, + "response.headers.ratelimitResetTokens": "90ms", + "response.headers.ratelimitResetRequests": "7m12s", + "response.headers.ratelimitRemainingTokens": 39940, + "response.headers.ratelimitRemainingRequests": 199, + "vendor": "openAI", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0", + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openAI", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1", + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openAI", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2", + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openAI", + "is_response": True, + "ingest_source": "Python", + }, + ), +] + + +@reset_core_stats_engine() +@validate_custom_events(chat_completion_recorded_events_no_convo_id) +# One summary event, one system message, one user message, and one response message from the assistant +@validate_custom_event_count(count=4) +@validate_transaction_metrics( + "test_chat_completion_stream:test_openai_chat_completion_sync_in_txn_no_convo_id", + scoped_metrics=[("Llm/completion/OpenAI/create", 1)], + rollup_metrics=[("Llm/completion/OpenAI/create", 1)], + background_task=True, +) +@background_task() +def test_openai_chat_completion_sync_in_txn_no_convo_id(set_trace_info): + set_trace_info() + generator = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + for resp in generator: + assert resp + + +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +def test_openai_chat_completion_sync_outside_txn(): + add_custom_attribute("llm.conversation_id", "my-awesome-id") + openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + + +@reset_core_stats_engine() +@validate_custom_events(chat_completion_recorded_events_no_convo_id) +@validate_custom_event_count(count=4) +@validate_transaction_metrics( + "test_chat_completion_stream:test_openai_chat_completion_async_conversation_id_unset", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@background_task() +def test_openai_chat_completion_async_conversation_id_unset(loop, set_trace_info): + set_trace_info() + + loop.run_until_complete( + openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + ) + for resp in generator: + assert resp + + +@reset_core_stats_engine() +@validate_custom_events(chat_completion_recorded_events) +@validate_custom_event_count(count=4) +@validate_transaction_metrics( + "test_chat_completion_stream:test_openai_chat_completion_async_conversation_id_set", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@validate_transaction_metrics( + name="test_chat_completion_stream:test_openai_chat_completion_async_conversation_id_set", + custom_metrics=[ + ("Python/ML/OpenAI/%s" % openai.__version__, 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_openai_chat_completion_async_conversation_id_set(loop, set_trace_info): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + + loop.run_until_complete( + openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + ) + for resp in generator: + assert resp + + +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +def test_openai_chat_completion_async_outside_transaction(loop): + loop.run_until_complete( + openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + ) + for resp in generator: + assert resp + + +@override_application_settings(disabled_custom_insights_settings) +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +@validate_transaction_metrics( + name="test_chat_completion_stream:test_openai_chat_completion_async_disabled_custom_event_settings", + custom_metrics=[ + ("Python/ML/OpenAI/%s" % openai.__version__, 1), + ], + background_task=True, +) +@background_task() +def test_openai_chat_completion_async_disabled_custom_event_settings(loop): + loop.run_until_complete( + openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + ) + for resp in generator: + assert resp + + +def test_openai_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(): + assert openai.ChatCompletion._nr_wrapped + assert openai.util.convert_to_openai_object._nr_wrapped From a432b9b61b6aa18a0d0043397577cc9f72605124 Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Thu, 8 Feb 2024 09:08:38 -0800 Subject: [PATCH 10/19] Add support for streaming errors --- .../_mock_external_openai_server.py | 31 +- tests/mlmodel_openai/conftest.py | 1 + .../test_chat_completion_stream_error.py | 597 ++++++++++++++++++ 3 files changed, 627 insertions(+), 2 deletions(-) create mode 100644 tests/mlmodel_openai/test_chat_completion_stream_error.py diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py index a95914329..d378789b3 100644 --- a/tests/mlmodel_openai/_mock_external_openai_server.py +++ b/tests/mlmodel_openai/_mock_external_openai_server.py @@ -31,6 +31,33 @@ # 3) This app runs on a separate thread meaning it won't block the test app. STREAMED_RESPONSES = { + "Invalid API key.": [ + {"Content-Type": "application/json; charset=utf-8", "x-request-id": "4f8f61a7d0401e42a6760ea2ca2049f6"}, + 401, + { + "error": { + "message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.", + "type": "invalid_request_error", + "param": None, + "code": "invalid_api_key", + } + }, + ], + "Model does not exist.": [ + { + "Content-Type": "application/json; charset=utf-8", + "x-request-id": "cfdf51fb795362ae578c12a21796262c", + }, + 404, + { + "error": { + "message": "The model `does-not-exist` does not exist", + "type": "invalid_request_error", + "param": None, + "code": "model_not_found", + } + }, + ], "You are a scientist.": [ { "Content-Type": "text/event-stream", @@ -155,7 +182,7 @@ "choices": [{"index": 0, "delta": {}, "logprobs": None, "finish_reason": "stop"}], }, ], - ] + ], } RESPONSES_V1 = { @@ -595,7 +622,7 @@ def _simple_get(self): self.end_headers() # Send response body - if stream: + if stream and status_code < 400: for resp in response: data = json.dumps(resp).encode("utf-8") self.wfile.write(b"data: %s\n" % data) diff --git a/tests/mlmodel_openai/conftest.py b/tests/mlmodel_openai/conftest.py index daa6b4584..884ab79c6 100644 --- a/tests/mlmodel_openai/conftest.py +++ b/tests/mlmodel_openai/conftest.py @@ -33,6 +33,7 @@ from newrelic.api.transaction import current_transaction from newrelic.common.object_wrapper import ObjectProxy, wrap_function_wrapper +from newrelic.common.signature import bind_args _default_settings = { "transaction_tracer.explain_threshold": 0.0, diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py new file mode 100644 index 000000000..7a3aeb05b --- /dev/null +++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py @@ -0,0 +1,597 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import openai +import pytest +from testing_support.fixtures import ( + dt_enabled, + reset_core_stats_engine, + validate_custom_event_count, +) +from testing_support.validators.validate_custom_events import validate_custom_events +from testing_support.validators.validate_error_trace_attributes import ( + validate_error_trace_attributes, +) +from testing_support.validators.validate_span_events import validate_span_events +from testing_support.validators.validate_transaction_metrics import ( + validate_transaction_metrics, +) + +from newrelic.api.background_task import background_task +from newrelic.api.transaction import add_custom_attribute +from newrelic.common.object_names import callable_name + +_test_openai_chat_completion_messages = ( + {"role": "system", "content": "You are a scientist."}, + {"role": "user", "content": "What is 212 degrees Fahrenheit converted to Celsius?"}, +) + +# Sync tests: +expected_events_on_no_model_error = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "transaction_id": "transaction-id", + "conversation_id": "my-awesome-id", + "span_id": None, + "trace_id": "trace-id", + "api_key_last_four_digits": "sk-CRET", + "duration": None, # Response time varies each test run + "request.model": "", # No model in this test case + "response.organization": "", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.number_of_messages": 2, + "vendor": "openAI", + "ingest_source": "Python", + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "You are a scientist.", + "role": "system", + "response.model": "", + "completion_id": None, + "sequence": 0, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "response.model": "", + "sequence": 1, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), +] + + +# No model provided +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.InvalidRequestError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "error.param": "engine", + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "Must provide an 'engine' or 'model' parameter to create a ", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_no_model", + scoped_metrics=[("Llm/completion/OpenAI/create", 1)], + rollup_metrics=[("Llm/completion/OpenAI/create", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_no_model_error) +@validate_custom_event_count(count=3) +@background_task() +def test_chat_completion_invalid_request_error_no_model(set_trace_info): + with pytest.raises(openai.InvalidRequestError): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + generator = openai.ChatCompletion.create( + # no model provided, + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + for resp in generator: + assert resp + + +expected_events_on_invalid_model_error = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "transaction_id": "transaction-id", + "conversation_id": "my-awesome-id", + "span_id": None, + "trace_id": "trace-id", + "api_key_last_four_digits": "sk-CRET", + "duration": None, # Response time varies each test run + "request.model": "does-not-exist", + "response.organization": "", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.number_of_messages": 1, + "vendor": "openAI", + "ingest_source": "Python", + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "Model does not exist.", + "role": "user", + "response.model": "", + "completion_id": None, + "sequence": 0, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), +] + + +# Invalid model provided +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.InvalidRequestError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "error.code": "model_not_found", + "http.statusCode": 404, + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "The model `does-not-exist` does not exist", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model", + scoped_metrics=[("Llm/completion/OpenAI/create", 1)], + rollup_metrics=[("Llm/completion/OpenAI/create", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_invalid_model_error) +@validate_custom_event_count(count=2) +@background_task() +def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): + with pytest.raises(openai.InvalidRequestError): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + generator = openai.ChatCompletion.create( + model="does-not-exist", + messages=({"role": "user", "content": "Model does not exist."},), + temperature=0.7, + max_tokens=100, + stream=True, + ) + for resp in generator: + assert resp + + +expected_events_on_auth_error = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "transaction_id": "transaction-id", + "conversation_id": "my-awesome-id", + "span_id": None, + "trace_id": "trace-id", + "api_key_last_four_digits": "", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.organization": "", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.number_of_messages": 2, + "vendor": "openAI", + "ingest_source": "Python", + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "You are a scientist.", + "role": "system", + "response.model": "", + "completion_id": None, + "sequence": 0, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "response.model": "", + "sequence": 1, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), +] + + +# No api_key provided +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.error.AuthenticationError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": {}, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "No API key provided. You can set your API key in code using 'openai.api_key = ', or you can set the environment variable OPENAI_API_KEY=). If your API key is stored in a file, you can point the openai module at it with 'openai.api_key_path = '. You can generate API keys in the OpenAI web interface. See https://platform.openai.com/account/api-keys for details.", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_authentication_error", + scoped_metrics=[("Llm/completion/OpenAI/create", 1)], + rollup_metrics=[("Llm/completion/OpenAI/create", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_auth_error) +@validate_custom_event_count(count=3) +@background_task() +def test_chat_completion_authentication_error(monkeypatch, set_trace_info): + with pytest.raises(openai.error.AuthenticationError): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + monkeypatch.setattr(openai, "api_key", None) # openai.api_key = None + generator = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + for resp in generator: + assert resp + + +expected_events_on_wrong_api_key_error = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "transaction_id": "transaction-id", + "conversation_id": "", + "span_id": None, + "trace_id": "trace-id", + "api_key_last_four_digits": "sk-BEEF", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.organization": "", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.number_of_messages": 1, + "vendor": "openAI", + "ingest_source": "Python", + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "Invalid API key.", + "role": "user", + "completion_id": None, + "response.model": "", + "sequence": 0, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), +] + + +# Wrong api_key provided +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.error.AuthenticationError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 401, + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_wrong_api_key_error", + scoped_metrics=[("Llm/completion/OpenAI/create", 1)], + rollup_metrics=[("Llm/completion/OpenAI/create", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_wrong_api_key_error) +@validate_custom_event_count(count=2) +@background_task() +def test_chat_completion_wrong_api_key_error(monkeypatch, set_trace_info): + with pytest.raises(openai.error.AuthenticationError): + set_trace_info() + monkeypatch.setattr(openai, "api_key", "DEADBEEF") + generator = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=({"role": "user", "content": "Invalid API key."},), + temperature=0.7, + max_tokens=100, + stream=True, + ) + for resp in generator: + assert resp + + +# Async tests: +# No model provided +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.InvalidRequestError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "error.param": "engine", + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "Must provide an 'engine' or 'model' parameter to create a ", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_no_model_async", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_no_model_error) +@validate_custom_event_count(count=3) +@background_task() +def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_info): + with pytest.raises(openai.InvalidRequestError): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + generator = loop.run_until_complete( + openai.ChatCompletion.acreate( + # no model provided, + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + ) + for resp in generator: + assert resp + + +# Invalid model provided +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.InvalidRequestError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "error.code": "model_not_found", + "http.statusCode": 404, + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "The model `does-not-exist` does not exist", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_async", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_invalid_model_error) +@validate_custom_event_count(count=2) +@background_task() +def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_trace_info): + with pytest.raises(openai.InvalidRequestError): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + generator = loop.run_until_complete( + openai.ChatCompletion.acreate( + model="does-not-exist", + messages=({"role": "user", "content": "Model does not exist."},), + temperature=0.7, + max_tokens=100, + stream=True, + ) + ) + for resp in generator: + assert resp + + +# No api_key provided +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.error.AuthenticationError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": {}, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "No API key provided. You can set your API key in code using 'openai.api_key = ', or you can set the environment variable OPENAI_API_KEY=). If your API key is stored in a file, you can point the openai module at it with 'openai.api_key_path = '. You can generate API keys in the OpenAI web interface. See https://platform.openai.com/account/api-keys for details.", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_authentication_error_async", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_auth_error) +@validate_custom_event_count(count=3) +@background_task() +def test_chat_completion_authentication_error_async(loop, monkeypatch, set_trace_info): + with pytest.raises(openai.error.AuthenticationError): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + monkeypatch.setattr(openai, "api_key", None) # openai.api_key = None + generator = loop.run_until_complete( + openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + ) + for resp in generator: + assert resp + + +# Wrong api_key provided +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.error.AuthenticationError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 401, + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_wrong_api_key_error_async", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_wrong_api_key_error) +@validate_custom_event_count(count=2) +@background_task() +def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_info): + with pytest.raises(openai.error.AuthenticationError): + set_trace_info() + monkeypatch.setattr(openai, "api_key", "DEADBEEF") + generator = loop.run_until_complete( + openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=({"role": "user", "content": "Invalid API key."},), + temperature=0.7, + max_tokens=100, + stream=True, + ) + ) + for resp in generator: + assert resp From 549b745be94b38b02c02925efcb1aba6f9375f35 Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Fri, 9 Feb 2024 18:57:12 -0800 Subject: [PATCH 11/19] Support async generators --- newrelic/hooks/mlmodel_openai.py | 344 ++++++++++++++---- .../_mock_external_openai_server.py | 18 + .../test_chat_completion_stream.py | 57 +-- .../test_chat_completion_stream_error.py | 68 ++-- 4 files changed, 349 insertions(+), 138 deletions(-) diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index e2f5c61f2..f4fb18d5c 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -658,7 +658,7 @@ async def wrap_embedding_async(wrapped, instance, args, kwargs): async def wrap_chat_completion_async(wrapped, instance, args, kwargs): transaction = current_transaction() - if not transaction or kwargs.get("stream", False): + if not transaction: return await wrapped(*args, **kwargs) # Framework metric also used for entity tagging in the UI @@ -683,88 +683,107 @@ async def wrap_chat_completion_async(wrapped, instance, args, kwargs): completion_id = str(uuid.uuid4()) function_name = wrapped.__name__ + ft = FunctionTrace(name=function_name, group="Llm/completion/OpenAI") + ft.__enter__() + # Get trace information + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id", "") + trace_id = available_metadata.get("trace.id", "") + try: + return_val = await wrapped(*args, **kwargs) + return_val._nr_ft = ft + except Exception as exc: + if OPENAI_V1: + response = getattr(exc, "response", "") + response_headers = getattr(response, "headers", "") + exc_organization = response_headers.get("openai-organization", "") if response_headers else "" + # There appears to be a bug here in openai v1 where despite having code, + # param, etc in the error response, they are not populated on the exception + # object so grab them from the response body object instead. + body = getattr(exc, "body", {}) or {} + notice_error_attributes = { + "http.statusCode": getattr(exc, "status_code", "") or "", + "error.message": body.get("message", "") or "", + "error.code": body.get("code", "") or "", + "error.param": body.get("param", "") or "", + "completion_id": completion_id, + } + else: + exc_organization = getattr(exc, "organization", "") + notice_error_attributes = { + "http.statusCode": getattr(exc, "http_status", ""), + "error.message": getattr(exc, "_message", ""), + "error.code": getattr(getattr(exc, "error", ""), "code", ""), + "error.param": getattr(exc, "param", ""), + "completion_id": completion_id, + } + # Override the default message if it is not empty. + message = notice_error_attributes.pop("error.message") + if message: + exc._nr_message = message - with FunctionTrace(name=function_name, group="Llm/completion/OpenAI") as ft: - # Get trace information - available_metadata = get_trace_linking_metadata() - span_id = available_metadata.get("span.id", "") - trace_id = available_metadata.get("trace.id", "") + ft.notice_error( + attributes=notice_error_attributes, + ) + # Gather attributes to add to embedding summary event in error context + error_chat_completion_dict = { + "id": completion_id, + "appName": app_name, + "conversation_id": conversation_id, + "api_key_last_four_digits": api_key_last_four_digits, + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction.guid, + "response.number_of_messages": len(request_message_list), + "request.model": kwargs.get("model") or kwargs.get("engine") or "", + "request.temperature": kwargs.get("temperature", ""), + "request.max_tokens": kwargs.get("max_tokens", ""), + "vendor": "openAI", + "ingest_source": "Python", + "response.organization": "" if exc_organization is None else exc_organization, + "duration": ft.duration, + "error": True, + } + transaction.record_custom_event("LlmChatCompletionSummary", error_chat_completion_dict) - try: - return_val = await wrapped(*args, **kwargs) - except Exception as exc: - if OPENAI_V1: - response = getattr(exc, "response", "") - response_headers = getattr(response, "headers", "") - exc_organization = response_headers.get("openai-organization", "") if response_headers else "" - # There appears to be a bug here in openai v1 where despite having code, - # param, etc in the error response, they are not populated on the exception - # object so grab them from the response body object instead. - body = getattr(exc, "body", {}) or {} - notice_error_attributes = { - "http.statusCode": getattr(exc, "status_code", "") or "", - "error.message": body.get("message", "") or "", - "error.code": body.get("code", "") or "", - "error.param": body.get("param", "") or "", - "completion_id": completion_id, - } - else: - exc_organization = getattr(exc, "organization", "") - notice_error_attributes = { - "http.statusCode": getattr(exc, "http_status", ""), - "error.message": getattr(exc, "_message", ""), - "error.code": getattr(getattr(exc, "error", ""), "code", ""), - "error.param": getattr(exc, "param", ""), - "completion_id": completion_id, - } - # Override the default message if it is not empty. - message = notice_error_attributes.pop("error.message") - if message: - exc._nr_message = message + create_chat_completion_message_event( + transaction, + app_name, + request_message_list, + completion_id, + span_id, + trace_id, + "", + None, + "", + conversation_id, + None, + ) - ft.notice_error( - attributes=notice_error_attributes, - ) - # Gather attributes to add to chat completion summary event in error context - error_chat_completion_dict = { - "id": completion_id, - "appName": app_name, - "conversation_id": conversation_id, - "api_key_last_four_digits": api_key_last_four_digits, - "span_id": span_id, - "trace_id": trace_id, - "transaction_id": transaction.guid, - "response.number_of_messages": len(request_message_list), - "request.model": kwargs.get("model") or kwargs.get("engine") or "", - "request.temperature": kwargs.get("temperature", ""), - "request.max_tokens": kwargs.get("max_tokens", ""), - "vendor": "openAI", - "ingest_source": "Python", - "response.organization": "" if exc_organization is None else exc_organization, - "duration": ft.duration, - "error": True, - } - transaction.record_custom_event("LlmChatCompletionSummary", error_chat_completion_dict) - - create_chat_completion_message_event( - transaction, - app_name, - request_message_list, - completion_id, - span_id, - trace_id, - "", - None, - "", - conversation_id, - None, - ) + ft.__exit__(*sys.exc_info()) + raise - raise + stream = kwargs.get("stream", False) + # If response is not a stream generator, we exit the function trace now. + if not stream: + ft.__exit__(None, None, None) if not return_val: return return_val + if stream: + # The function trace will be exited when in the final iteration of the response + # generator. + setattr(return_val, "_nr_ft", ft) + setattr(return_val, "_nr_openai_attrs", getattr(return_val, "_nr_openai_attrs", {})) + return_val._nr_openai_attrs["messages"] = kwargs.get("messages", []) + return_val._nr_openai_attrs["temperature"] = kwargs.get("temperature", "") + return_val._nr_openai_attrs["max_tokens"] = kwargs.get("max_tokens", "") + return_val._nr_openai_attrs["request.model"] = kwargs.get("model") or kwargs.get("engine") or "" + return_val._nr_openai_attrs["api_key_last_four_digits"] = api_key_last_four_digits + return return_val + + # If response is not a stream generator, record the event data. # At this point, we have a response so we can grab attributes only available on the response object response_headers = getattr(return_val, "_nr_response_headers", None) # In v1, response objects are pydantic models so this function call converts the @@ -1064,7 +1083,159 @@ def close(self): return super(GeneratorProxy, self).close() -def wrap_engine_api_resource_create(wrapped, instance, args, kwargs): +class AsyncGeneratorProxy(ObjectProxy): + def __init__(self, wrapped): + super(AsyncGeneratorProxy, self).__init__(wrapped) + + def __aiter__(self): + self._nr_wrapped_iter = self.__wrapped__.__aiter__() + return self + + async def __anext__(self): + transaction = current_transaction() + if not transaction: + return await self._nr_wrapped_iter.__anext__() + + return_val = None + try: + return_val = await self._nr_wrapped_iter.__anext__() + if return_val: + choices = return_val.get("choices", []) + self._nr_openai_attrs["response.model"] = return_val.get("model", "") + self._nr_openai_attrs["id"] = return_val.get("id", "") + self._nr_openai_attrs["response.organization"] = return_val.get("organization", "") + if choices: + delta = choices[0].get("delta", {}) + if delta: + self._nr_openai_attrs["content"] = self._nr_openai_attrs.get("content", "") + delta.get( + "content", "" + ) + self._nr_openai_attrs["role"] = self._nr_openai_attrs.get("role", None) or delta.get("role") + self._nr_openai_attrs["finish_reason"] = choices[0].get("finish_reason", "") + self._nr_openai_attrs["response_headers"] = getattr(return_val, "_nr_response_headers", {}) + except StopIteration as e: + if hasattr(self, "_nr_ft"): + openai_attrs = getattr(self, "_nr_openai_attrs", {}) + self._nr_ft.__exit__(None, None, None) + + # If there are no openai attrs exit early as there's no data to record. + if not openai_attrs: + raise + + message_ids = self.record_streaming_chat_completion_events(transaction) + # Cache message ids on transaction for retrieval after open ai call completion. + if not hasattr(transaction, "_nr_message_ids"): + transaction._nr_message_ids = {} + response_id = openai_attrs.get("response_id", None) + transaction._nr_message_ids[response_id] = message_ids + raise + except Exception as e: + if hasattr(self, "_nr_ft"): + openai_attrs = getattr(self, "_nr_openai_attrs", {}) + self._nr_ft.__exit__(*sys.exc_info()) + + # If there are no openai attrs exit early as there's no data to record. + if not openai_attrs: + raise + + self.record_streaming_chat_completion_events(transaction) + raise + return return_val + + def record_streaming_chat_completion_events(self, transaction): + openai_attrs = getattr(self, "_nr_openai_attrs", {}) + + # If there are no openai attrs exit early as there's no data to record. + if not openai_attrs: + raise + + content = openai_attrs.get("content", None) + role = openai_attrs.get("role") + + custom_attrs_dict = transaction._custom_params + conversation_id = custom_attrs_dict.get("llm.conversation_id", "") + + chat_completion_id = str(uuid.uuid4()) + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id", "") + trace_id = available_metadata.get("trace.id", "") + + response_headers = openai_attrs.get("response_headers", {}) + settings = transaction.settings if transaction.settings is not None else global_settings() + response_id = openai_attrs.get("id", None) + request_id = response_headers.get("x-request-id", "") + organization = response_headers.get("openai-organization", "") + + api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "") + + messages = openai_attrs.get("messages", []) + + chat_completion_summary_dict = { + "id": chat_completion_id, + "appName": settings.app_name, + "conversation_id": conversation_id, + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction.guid, + "request_id": request_id, + "api_key_last_four_digits": api_key_last_four_digits, + "duration": self._nr_ft.duration, + "request.model": openai_attrs.get("request.model", ""), + "response.model": openai_attrs.get("response.model", ""), + "response.organization": organization, + # Usage tokens are not supported in streaming for now. + "request.temperature": openai_attrs.get("temperature", ""), + "request.max_tokens": openai_attrs.get("max_tokens", ""), + "response.choices.finish_reason": openai_attrs.get("finish_reason", ""), + "response.headers.llmVersion": response_headers.get("openai-version", ""), + "response.headers.ratelimitLimitRequests": check_rate_limit_header( + response_headers, "x-ratelimit-limit-requests", True + ), + "response.headers.ratelimitLimitTokens": check_rate_limit_header( + response_headers, "x-ratelimit-limit-tokens", True + ), + "response.headers.ratelimitResetTokens": check_rate_limit_header( + response_headers, "x-ratelimit-reset-tokens", False + ), + "response.headers.ratelimitResetRequests": check_rate_limit_header( + response_headers, "x-ratelimit-reset-requests", False + ), + "response.headers.ratelimitRemainingTokens": check_rate_limit_header( + response_headers, "x-ratelimit-remaining-tokens", True + ), + "response.headers.ratelimitRemainingRequests": check_rate_limit_header( + response_headers, "x-ratelimit-remaining-requests", True + ), + "vendor": "openAI", + "ingest_source": "Python", + "response.number_of_messages": len(messages) + (1 if content else 0), + } + + transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) + + output_message_list = [] + if content: + output_message_list = [{"content": content, "role": role}] + + return create_chat_completion_message_event( + transaction, + settings.app_name, + list(messages), + chat_completion_id, + span_id, + trace_id, + openai_attrs.get("response.model", ""), + response_id, + request_id, + conversation_id, + output_message_list, + ) + + async def aclose(self): + return await super(AsyncGeneratorProxy, self).aclose() + + +def wrap_engine_api_resource_create_sync(wrapped, instance, args, kwargs): transaction = current_transaction() if not transaction: @@ -1081,6 +1252,23 @@ def wrap_engine_api_resource_create(wrapped, instance, args, kwargs): return return_val +async def wrap_engine_api_resource_create_async(wrapped, instance, args, kwargs): + transaction = current_transaction() + + if not transaction: + return await wrapped(*args, **kwargs) + + bound_args = bind_args(wrapped, args, kwargs) + stream = bound_args["params"].get("stream", False) + + return_val = await wrapped(*args, **kwargs) + + if stream: + return AsyncGeneratorProxy(return_val) + else: + return return_val + + def instrument_openai_api_resources_embedding(module): if hasattr(module, "Embedding"): if hasattr(module.Embedding, "create"): @@ -1132,4 +1320,6 @@ def instrument_openai_base_client(module): def instrument_openai_api_resources_abstract_engine_api_resource(module): if hasattr(module.EngineAPIResource, "create"): - wrap_function_wrapper(module, "EngineAPIResource.create", wrap_engine_api_resource_create) + wrap_function_wrapper(module, "EngineAPIResource.create", wrap_engine_api_resource_create_sync) + if hasattr(module.EngineAPIResource, "acreate"): + wrap_function_wrapper(module, "EngineAPIResource.acreate", wrap_engine_api_resource_create_async) diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py index d378789b3..988dae8b1 100644 --- a/tests/mlmodel_openai/_mock_external_openai_server.py +++ b/tests/mlmodel_openai/_mock_external_openai_server.py @@ -31,6 +31,24 @@ # 3) This app runs on a separate thread meaning it won't block the test app. STREAMED_RESPONSES = { + "Stream parsing error.": [ + { + "Content-Type": "text/event-stream", + "openai-model": "gpt-3.5-turbo-0613", + "openai-organization": "new-relic-nkmd8b", + "openai-processing-ms": "516", + "openai-version": "2020-10-01", + "x-ratelimit-limit-requests": "200", + "x-ratelimit-limit-tokens": "40000", + "x-ratelimit-remaining-requests": "199", + "x-ratelimit-remaining-tokens": "39940", + "x-ratelimit-reset-requests": "7m12s", + "x-ratelimit-reset-tokens": "90ms", + "x-request-id": "49dbbffbd3c3f4612aa48def69059ccd", + }, + 200, + ["Bad response"], + ], "Invalid API key.": [ {"Content-Type": "application/json; charset=utf-8", "x-request-id": "4f8f61a7d0401e42a6760ea2ca2049f6"}, 401, diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py index 62e9e0545..72bdaf9b3 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream.py +++ b/tests/mlmodel_openai/test_chat_completion_stream.py @@ -14,7 +14,6 @@ import openai from testing_support.fixtures import ( - override_application_settings, reset_core_stats_engine, validate_attributes, validate_custom_event_count, @@ -295,17 +294,18 @@ def test_openai_chat_completion_sync_outside_txn(): def test_openai_chat_completion_async_conversation_id_unset(loop, set_trace_info): set_trace_info() - loop.run_until_complete( - openai.ChatCompletion.acreate( + async def consumer(): + generator = await openai.ChatCompletion.acreate( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100, stream=True, ) - ) - for resp in generator: - assert resp + async for resp in generator: + assert resp + + loop.run_until_complete(consumer()) @reset_core_stats_engine() @@ -330,58 +330,35 @@ def test_openai_chat_completion_async_conversation_id_set(loop, set_trace_info): set_trace_info() add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - openai.ChatCompletion.acreate( + async def consumer(): + generator = await openai.ChatCompletion.acreate( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100, stream=True, ) - ) - for resp in generator: - assert resp + async for resp in generator: + assert resp + + loop.run_until_complete(consumer()) @reset_core_stats_engine() @validate_custom_event_count(count=0) def test_openai_chat_completion_async_outside_transaction(loop): - loop.run_until_complete( - openai.ChatCompletion.acreate( + async def consumer(): + generator = await openai.ChatCompletion.acreate( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100, stream=True, ) - ) - for resp in generator: - assert resp - + async for resp in generator: + assert resp -@override_application_settings(disabled_custom_insights_settings) -@reset_core_stats_engine() -@validate_custom_event_count(count=0) -@validate_transaction_metrics( - name="test_chat_completion_stream:test_openai_chat_completion_async_disabled_custom_event_settings", - custom_metrics=[ - ("Python/ML/OpenAI/%s" % openai.__version__, 1), - ], - background_task=True, -) -@background_task() -def test_openai_chat_completion_async_disabled_custom_event_settings(loop): - loop.run_until_complete( - openai.ChatCompletion.acreate( - model="gpt-3.5-turbo", - messages=_test_openai_chat_completion_messages, - temperature=0.7, - max_tokens=100, - stream=True, - ) - ) - for resp in generator: - assert resp + loop.run_until_complete(consumer()) def test_openai_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(): diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py index 7a3aeb05b..8463c9819 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py @@ -37,7 +37,6 @@ {"role": "user", "content": "What is 212 degrees Fahrenheit converted to Celsius?"}, ) -# Sync tests: expected_events_on_no_model_error = [ ( {"type": "LlmChatCompletionSummary"}, @@ -101,7 +100,6 @@ ] -# No model provided @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -187,7 +185,6 @@ def test_chat_completion_invalid_request_error_no_model(set_trace_info): ] -# Invalid model provided @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -293,7 +290,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): ] -# No api_key provided @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -378,7 +374,6 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): ] -# Wrong api_key provided @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -420,8 +415,6 @@ def test_chat_completion_wrong_api_key_error(monkeypatch, set_trace_info): assert resp -# Async tests: -# No model provided @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -452,7 +445,7 @@ def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_in with pytest.raises(openai.InvalidRequestError): set_trace_info() add_custom_attribute("llm.conversation_id", "my-awesome-id") - generator = loop.run_until_complete( + loop.run_until_complete( openai.ChatCompletion.acreate( # no model provided, messages=_test_openai_chat_completion_messages, @@ -461,11 +454,8 @@ def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_in stream=True, ) ) - for resp in generator: - assert resp -# Invalid model provided @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -497,7 +487,7 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra with pytest.raises(openai.InvalidRequestError): set_trace_info() add_custom_attribute("llm.conversation_id", "my-awesome-id") - generator = loop.run_until_complete( + loop.run_until_complete( openai.ChatCompletion.acreate( model="does-not-exist", messages=({"role": "user", "content": "Model does not exist."},), @@ -506,11 +496,8 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra stream=True, ) ) - for resp in generator: - assert resp -# No api_key provided @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -540,7 +527,7 @@ def test_chat_completion_authentication_error_async(loop, monkeypatch, set_trace set_trace_info() add_custom_attribute("llm.conversation_id", "my-awesome-id") monkeypatch.setattr(openai, "api_key", None) # openai.api_key = None - generator = loop.run_until_complete( + loop.run_until_complete( openai.ChatCompletion.acreate( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, @@ -549,11 +536,8 @@ def test_chat_completion_authentication_error_async(loop, monkeypatch, set_trace stream=True, ) ) - for resp in generator: - assert resp -# Wrong api_key provided @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -584,7 +568,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_ with pytest.raises(openai.error.AuthenticationError): set_trace_info() monkeypatch.setattr(openai, "api_key", "DEADBEEF") - generator = loop.run_until_complete( + loop.run_until_complete( openai.ChatCompletion.acreate( model="gpt-3.5-turbo", messages=({"role": "user", "content": "Invalid API key."},), @@ -593,5 +577,47 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_ stream=True, ) ) - for resp in generator: + + +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.error.AuthenticationError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 401, + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_stream_parsing_error_async", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_wrong_api_key_error) +@validate_custom_event_count(count=2) +@background_task() +def test_chat_completion_stream_parsing_error_async(loop, monkeypatch, set_trace_info): + # with pytest.raises(openai.error.APIError): + set_trace_info() + + async def consumer(): + generator = await openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=({"role": "user", "content": "Stream parsing error."},), + temperature=0.7, + max_tokens=100, + stream=True, + ) + async for resp in generator: assert resp + + loop.run_until_complete(consumer()) From c989a15fa3df0581005b7d1b9488993736e3d34a Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Mon, 12 Feb 2024 17:07:48 -0800 Subject: [PATCH 12/19] Add support for error during streaming --- newrelic/hooks/mlmodel_openai.py | 212 ++++++++++++++++-- .../_mock_external_openai_server.py | 27 ++- .../test_chat_completion_stream_error.py | 107 ++++++++- 3 files changed, 313 insertions(+), 33 deletions(-) diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index f4fb18d5c..93060a877 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -888,7 +888,9 @@ def wrap_convert_to_openai_object(wrapped, instance, args, kwargs): resp = args[0] returned_response = wrapped(*args, **kwargs) - if isinstance(resp, openai.openai_response.OpenAIResponse): + if isinstance(returned_response, openai.openai_object.OpenAIObject) and isinstance( + resp, openai.openai_response.OpenAIResponse + ): setattr(returned_response, "_nr_response_headers", getattr(resp, "_headers", {})) return returned_response @@ -970,33 +972,118 @@ def __next__(self): if not openai_attrs: raise - message_ids = self.record_streaming_chat_completion_events(transaction) + message_ids = self.record_streaming_chat_completion_events(transaction, openai_attrs) # Cache message ids on transaction for retrieval after open ai call completion. if not hasattr(transaction, "_nr_message_ids"): transaction._nr_message_ids = {} response_id = openai_attrs.get("response_id", None) transaction._nr_message_ids[response_id] = message_ids raise - except Exception as e: + except Exception as exc: if hasattr(self, "_nr_ft"): openai_attrs = getattr(self, "_nr_openai_attrs", {}) - self._nr_ft.__exit__(*sys.exc_info()) # If there are no openai attrs exit early as there's no data to record. if not openai_attrs: + self._nr_ft.__exit__(*sys.exc_info()) raise - self.record_streaming_chat_completion_events(transaction) + self.record_streaming_chat_completion_events_error(transaction, openai_attrs, exc) raise return return_val - def record_streaming_chat_completion_events(self, transaction): - openai_attrs = getattr(self, "_nr_openai_attrs", {}) + def record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc): + chat_completion_id = str(uuid.uuid4()) + if OPENAI_V1: + response = getattr(exc, "response", "") + response_headers = getattr(response, "headers", "") + organization = response_headers.get("openai-organization", "") if response_headers else "" + # There appears to be a bug here in openai v1 where despite having code, + # param, etc in the error response, they are not populated on the exception + # object so grab them from the response body object instead. + body = getattr(exc, "body", {}) or {} + notice_error_attributes = { + "http.statusCode": getattr(exc, "status_code", "") or "", + "error.message": body.get("message", "") or "", + "error.code": body.get("code", "") or "", + "error.param": body.get("param", "") or "", + "completion_id": chat_completion_id, + } + else: + organization = getattr(exc, "organization", "") + notice_error_attributes = { + "http.statusCode": getattr(exc, "http_status", ""), + "error.message": getattr(exc, "_message", ""), + "error.code": getattr(getattr(exc, "error", ""), "code", ""), + "error.param": getattr(exc, "param", ""), + "completion_id": chat_completion_id, + } + message = notice_error_attributes.pop("error.message") + if message: + exc._nr_message = message + self._nr_ft.notice_error( + attributes=notice_error_attributes, + ) + self._nr_ft.__exit__(*sys.exc_info()) + content = openai_attrs.get("content", None) + role = openai_attrs.get("role") + + custom_attrs_dict = transaction._custom_params + conversation_id = custom_attrs_dict.get("llm.conversation_id", "") + + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id", "") + trace_id = available_metadata.get("trace.id", "") - # If there are no openai attrs exit early as there's no data to record. - if not openai_attrs: - raise + response_headers = openai_attrs.get("response_headers", {}) + settings = transaction.settings if transaction.settings is not None else global_settings() + response_id = openai_attrs.get("id", None) + request_id = response_headers.get("x-request-id", "") + api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "") + + messages = openai_attrs.get("messages", []) + + chat_completion_summary_dict = { + "id": chat_completion_id, + "appName": settings.app_name, + "conversation_id": conversation_id, + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction.guid, + "api_key_last_four_digits": api_key_last_four_digits, + "duration": self._nr_ft.duration, + "request.model": openai_attrs.get("request.model", ""), + # Usage tokens are not supported in streaming for now. + "request.temperature": openai_attrs.get("temperature", ""), + "request.max_tokens": openai_attrs.get("max_tokens", ""), + "vendor": "openAI", + "ingest_source": "Python", + "response.number_of_messages": len(messages) + (1 if content else 0), + "response.organization": organization, + "error": True, + } + transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) + + output_message_list = [] + if content: + output_message_list = [{"content": content, "role": role}] + + return create_chat_completion_message_event( + transaction, + settings.app_name, + list(messages), + chat_completion_id, + span_id, + trace_id, + openai_attrs.get("response.model", ""), + response_id, + request_id, + conversation_id, + output_message_list, + ) + + def record_streaming_chat_completion_events(self, transaction, openai_attrs): content = openai_attrs.get("content", None) role = openai_attrs.get("role") @@ -1113,7 +1200,7 @@ async def __anext__(self): self._nr_openai_attrs["role"] = self._nr_openai_attrs.get("role", None) or delta.get("role") self._nr_openai_attrs["finish_reason"] = choices[0].get("finish_reason", "") self._nr_openai_attrs["response_headers"] = getattr(return_val, "_nr_response_headers", {}) - except StopIteration as e: + except StopAsyncIteration as e: if hasattr(self, "_nr_ft"): openai_attrs = getattr(self, "_nr_openai_attrs", {}) self._nr_ft.__exit__(None, None, None) @@ -1122,33 +1209,118 @@ async def __anext__(self): if not openai_attrs: raise - message_ids = self.record_streaming_chat_completion_events(transaction) + message_ids = self.record_streaming_chat_completion_events(transaction, openai_attrs) # Cache message ids on transaction for retrieval after open ai call completion. if not hasattr(transaction, "_nr_message_ids"): transaction._nr_message_ids = {} response_id = openai_attrs.get("response_id", None) transaction._nr_message_ids[response_id] = message_ids raise - except Exception as e: + except Exception as exc: if hasattr(self, "_nr_ft"): openai_attrs = getattr(self, "_nr_openai_attrs", {}) - self._nr_ft.__exit__(*sys.exc_info()) # If there are no openai attrs exit early as there's no data to record. if not openai_attrs: + self._nr_ft.__exit__(*sys.exc_info()) raise - self.record_streaming_chat_completion_events(transaction) + self.record_streaming_chat_completion_events_error(transaction, openai_attrs, exc) raise return return_val - def record_streaming_chat_completion_events(self, transaction): - openai_attrs = getattr(self, "_nr_openai_attrs", {}) + def record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc): + chat_completion_id = str(uuid.uuid4()) + if OPENAI_V1: + response = getattr(exc, "response", "") + response_headers = getattr(response, "headers", "") + organization = response_headers.get("openai-organization", "") if response_headers else "" + # There appears to be a bug here in openai v1 where despite having code, + # param, etc in the error response, they are not populated on the exception + # object so grab them from the response body object instead. + body = getattr(exc, "body", {}) or {} + notice_error_attributes = { + "http.statusCode": getattr(exc, "status_code", "") or "", + "error.message": body.get("message", "") or "", + "error.code": body.get("code", "") or "", + "error.param": body.get("param", "") or "", + "completion_id": chat_completion_id, + } + else: + organization = getattr(exc, "organization", "") + notice_error_attributes = { + "http.statusCode": getattr(exc, "http_status", ""), + "error.message": getattr(exc, "_message", ""), + "error.code": getattr(getattr(exc, "error", ""), "code", ""), + "error.param": getattr(exc, "param", ""), + "completion_id": chat_completion_id, + } + message = notice_error_attributes.pop("error.message") + if message: + exc._nr_message = message + self._nr_ft.notice_error( + attributes=notice_error_attributes, + ) + self._nr_ft.__exit__(*sys.exc_info()) + content = openai_attrs.get("content", None) + role = openai_attrs.get("role") - # If there are no openai attrs exit early as there's no data to record. - if not openai_attrs: - raise + custom_attrs_dict = transaction._custom_params + conversation_id = custom_attrs_dict.get("llm.conversation_id", "") + + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id", "") + trace_id = available_metadata.get("trace.id", "") + + response_headers = openai_attrs.get("response_headers", {}) + settings = transaction.settings if transaction.settings is not None else global_settings() + response_id = openai_attrs.get("id", None) + request_id = response_headers.get("x-request-id", "") + + api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "") + + messages = openai_attrs.get("messages", []) + + chat_completion_summary_dict = { + "id": chat_completion_id, + "appName": settings.app_name, + "conversation_id": conversation_id, + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction.guid, + "api_key_last_four_digits": api_key_last_four_digits, + "duration": self._nr_ft.duration, + "request.model": openai_attrs.get("request.model", ""), + # Usage tokens are not supported in streaming for now. + "request.temperature": openai_attrs.get("temperature", ""), + "request.max_tokens": openai_attrs.get("max_tokens", ""), + "vendor": "openAI", + "ingest_source": "Python", + "response.number_of_messages": len(messages) + (1 if content else 0), + "response.organization": organization, + "error": True, + } + transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) + + output_message_list = [] + if content: + output_message_list = [{"content": content, "role": role}] + + return create_chat_completion_message_event( + transaction, + settings.app_name, + list(messages), + chat_completion_id, + span_id, + trace_id, + openai_attrs.get("response.model", ""), + response_id, + request_id, + conversation_id, + output_message_list, + ) + def record_streaming_chat_completion_events(self, transaction, openai_attrs): content = openai_attrs.get("content", None) role = openai_attrs.get("role") diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py index 988dae8b1..e1ed8271d 100644 --- a/tests/mlmodel_openai/_mock_external_openai_server.py +++ b/tests/mlmodel_openai/_mock_external_openai_server.py @@ -47,7 +47,26 @@ "x-request-id": "49dbbffbd3c3f4612aa48def69059ccd", }, 200, - ["Bad response"], + [ + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [ + {"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": None, "finish_reason": None} + ], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": None, "finish_reason": None}], + }, + ], ], "Invalid API key.": [ {"Content-Type": "application/json; charset=utf-8", "x-request-id": "4f8f61a7d0401e42a6760ea2ca2049f6"}, @@ -643,7 +662,11 @@ def _simple_get(self): if stream and status_code < 400: for resp in response: data = json.dumps(resp).encode("utf-8") - self.wfile.write(b"data: %s\n" % data) + if prompt == "Stream parsing error.": + # Force a parsing error by writing an invalid streamed response. + self.wfile.write(b"data: %s" % data) + else: + self.wfile.write(b"data: %s\n" % data) else: self.wfile.write(json.dumps(response).encode("utf-8")) return diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py index 8463c9819..267f97e96 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py @@ -579,21 +579,65 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_ ) +expected_events_stream_parsing_error = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "transaction_id": "transaction-id", + "conversation_id": "", + "span_id": None, + "trace_id": "trace-id", + "api_key_last_four_digits": "sk-CRET", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.organization": "new-relic-nkmd8b", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.number_of_messages": 1, + "vendor": "openAI", + "ingest_source": "Python", + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "Stream parsing error.", + "role": "user", + "completion_id": None, + "response.model": "", + "sequence": 0, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), +] + + @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( - callable_name(openai.error.AuthenticationError), + callable_name(openai.error.APIError), exact_attrs={ "agent": {}, "intrinsic": {}, "user": { - "http.statusCode": 401, + "http.statusCode": 200, }, }, ) @validate_span_events( exact_agents={ - "error.message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.", + "error.message": 'HTTP code 200 from API ({"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", "object": "chat.completion.chunk", "created": 1706565311, "model": "gpt-3.5-turbo-0613", "system_fingerprint": null, "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": null, "finish_reason": null}]}data: {"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", "object": "chat.completion.chunk", "created": 1706565311, "model": "gpt-3.5-turbo-0613", "system_fingerprint": null, "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": null, "finish_reason": null}]})', } ) @validate_transaction_metrics( @@ -602,22 +646,63 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_ rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], background_task=True, ) -@validate_custom_events(expected_events_on_wrong_api_key_error) +@validate_custom_events(expected_events_stream_parsing_error) @validate_custom_event_count(count=2) @background_task() def test_chat_completion_stream_parsing_error_async(loop, monkeypatch, set_trace_info): - # with pytest.raises(openai.error.APIError): - set_trace_info() + with pytest.raises(openai.error.APIError): + set_trace_info() + + async def consumer(): + generator = await openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=({"role": "user", "content": "Stream parsing error."},), + temperature=0.7, + max_tokens=100, + stream=True, + ) + async for resp in generator: + assert resp + + loop.run_until_complete(consumer()) + + +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.error.APIError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 200, + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": 'HTTP code 200 from API ({"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", "object": "chat.completion.chunk", "created": 1706565311, "model": "gpt-3.5-turbo-0613", "system_fingerprint": null, "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": null, "finish_reason": null}]}data: {"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", "object": "chat.completion.chunk", "created": 1706565311, "model": "gpt-3.5-turbo-0613", "system_fingerprint": null, "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": null, "finish_reason": null}]})', + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_stream_parsing_error", + scoped_metrics=[("Llm/completion/OpenAI/create", 1)], + rollup_metrics=[("Llm/completion/OpenAI/create", 1)], + background_task=True, +) +@validate_custom_events(expected_events_stream_parsing_error) +@validate_custom_event_count(count=2) +@background_task() +def test_chat_completion_stream_parsing_error(loop, monkeypatch, set_trace_info): + with pytest.raises(openai.error.APIError): + set_trace_info() - async def consumer(): - generator = await openai.ChatCompletion.acreate( + generator = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=({"role": "user", "content": "Stream parsing error."},), temperature=0.7, max_tokens=100, stream=True, ) - async for resp in generator: + for resp in generator: assert resp - - loop.run_until_complete(consumer()) From 7433512b190d8e1505fd2025b76e09794e579cbb Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Mon, 12 Feb 2024 19:09:18 -0800 Subject: [PATCH 13/19] Ignore v1 tests in v0 --- tests/mlmodel_openai/conftest.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/mlmodel_openai/conftest.py b/tests/mlmodel_openai/conftest.py index 884ab79c6..36a0f08dd 100644 --- a/tests/mlmodel_openai/conftest.py +++ b/tests/mlmodel_openai/conftest.py @@ -55,17 +55,18 @@ "test_chat_completion_v1.py", "test_chat_completion_error_v1.py", "test_embeddings_v1.py", - "test_get_llm_message_ids_v1.py", - "test_chat_completion_error_v1.py", "test_embeddings_error_v1.py", + "test_get_llm_message_ids_v1.py", ] else: collect_ignore = [ "test_embeddings.py", "test_embeddings_error.py", "test_chat_completion.py", - "test_get_llm_message_ids.py", "test_chat_completion_error.py", + "test_chat_completion_stream.py", + "test_chat_completion_stream_error.py", + "test_get_llm_message_ids.py", ] From f3cda4c761d7ac388f05aaee981ad78cf88a6e27 Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Mon, 12 Feb 2024 19:36:02 -0800 Subject: [PATCH 14/19] Refactor generator wrappers --- newrelic/hooks/mlmodel_openai.py | 610 +++++++++++-------------------- 1 file changed, 207 insertions(+), 403 deletions(-) diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index 93060a877..65de5cdf3 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -949,225 +949,237 @@ def __next__(self): return_val = None try: return_val = self.__wrapped__.__next__() - if return_val: - choices = return_val.get("choices", []) - self._nr_openai_attrs["response.model"] = return_val.get("model", "") - self._nr_openai_attrs["id"] = return_val.get("id", "") - self._nr_openai_attrs["response.organization"] = return_val.get("organization", "") - if choices: - delta = choices[0].get("delta", {}) - if delta: - self._nr_openai_attrs["content"] = self._nr_openai_attrs.get("content", "") + delta.get( - "content", "" - ) - self._nr_openai_attrs["role"] = self._nr_openai_attrs.get("role", None) or delta.get("role") - self._nr_openai_attrs["finish_reason"] = choices[0].get("finish_reason", "") - self._nr_openai_attrs["response_headers"] = getattr(return_val, "_nr_response_headers", {}) + record_stream_chunk(self, return_val) except StopIteration as e: - if hasattr(self, "_nr_ft"): - openai_attrs = getattr(self, "_nr_openai_attrs", {}) - self._nr_ft.__exit__(None, None, None) - - # If there are no openai attrs exit early as there's no data to record. - if not openai_attrs: - raise - - message_ids = self.record_streaming_chat_completion_events(transaction, openai_attrs) - # Cache message ids on transaction for retrieval after open ai call completion. - if not hasattr(transaction, "_nr_message_ids"): - transaction._nr_message_ids = {} - response_id = openai_attrs.get("response_id", None) - transaction._nr_message_ids[response_id] = message_ids + record_events_on_stop_iteration(self, transaction) raise except Exception as exc: - if hasattr(self, "_nr_ft"): - openai_attrs = getattr(self, "_nr_openai_attrs", {}) + record_error(self, transaction, exc) + raise + return return_val + + def close(self): + return super(GeneratorProxy, self).close() - # If there are no openai attrs exit early as there's no data to record. - if not openai_attrs: - self._nr_ft.__exit__(*sys.exc_info()) - raise - self.record_streaming_chat_completion_events_error(transaction, openai_attrs, exc) +def record_stream_chunk(self, return_val): + if return_val: + choices = return_val.get("choices", []) + self._nr_openai_attrs["response.model"] = return_val.get("model", "") + self._nr_openai_attrs["id"] = return_val.get("id", "") + self._nr_openai_attrs["response.organization"] = return_val.get("organization", "") + if choices: + delta = choices[0].get("delta", {}) + if delta: + self._nr_openai_attrs["content"] = self._nr_openai_attrs.get("content", "") + delta.get("content", "") + self._nr_openai_attrs["role"] = self._nr_openai_attrs.get("role", None) or delta.get("role") + self._nr_openai_attrs["finish_reason"] = choices[0].get("finish_reason", "") + self._nr_openai_attrs["response_headers"] = getattr(return_val, "_nr_response_headers", {}) + + +def record_events_on_stop_iteration(self, transaction): + if hasattr(self, "_nr_ft"): + openai_attrs = getattr(self, "_nr_openai_attrs", {}) + self._nr_ft.__exit__(None, None, None) + + # If there are no openai attrs exit early as there's no data to record. + if not openai_attrs: raise - return return_val - def record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc): - chat_completion_id = str(uuid.uuid4()) - if OPENAI_V1: - response = getattr(exc, "response", "") - response_headers = getattr(response, "headers", "") - organization = response_headers.get("openai-organization", "") if response_headers else "" - # There appears to be a bug here in openai v1 where despite having code, - # param, etc in the error response, they are not populated on the exception - # object so grab them from the response body object instead. - body = getattr(exc, "body", {}) or {} - notice_error_attributes = { - "http.statusCode": getattr(exc, "status_code", "") or "", - "error.message": body.get("message", "") or "", - "error.code": body.get("code", "") or "", - "error.param": body.get("param", "") or "", - "completion_id": chat_completion_id, - } - else: - organization = getattr(exc, "organization", "") - notice_error_attributes = { - "http.statusCode": getattr(exc, "http_status", ""), - "error.message": getattr(exc, "_message", ""), - "error.code": getattr(getattr(exc, "error", ""), "code", ""), - "error.param": getattr(exc, "param", ""), - "completion_id": chat_completion_id, - } - message = notice_error_attributes.pop("error.message") - if message: - exc._nr_message = message - self._nr_ft.notice_error( - attributes=notice_error_attributes, - ) - self._nr_ft.__exit__(*sys.exc_info()) - content = openai_attrs.get("content", None) - role = openai_attrs.get("role") + message_ids = record_streaming_chat_completion_events(self, transaction, openai_attrs) + # Cache message ids on transaction for retrieval after open ai call completion. + if not hasattr(transaction, "_nr_message_ids"): + transaction._nr_message_ids = {} + response_id = openai_attrs.get("response_id", None) + transaction._nr_message_ids[response_id] = message_ids - custom_attrs_dict = transaction._custom_params - conversation_id = custom_attrs_dict.get("llm.conversation_id", "") - available_metadata = get_trace_linking_metadata() - span_id = available_metadata.get("span.id", "") - trace_id = available_metadata.get("trace.id", "") +def record_error(self, transaction, exc): + if hasattr(self, "_nr_ft"): + openai_attrs = getattr(self, "_nr_openai_attrs", {}) - response_headers = openai_attrs.get("response_headers", {}) - settings = transaction.settings if transaction.settings is not None else global_settings() - response_id = openai_attrs.get("id", None) - request_id = response_headers.get("x-request-id", "") + # If there are no openai attrs exit early as there's no data to record. + if not openai_attrs: + self._nr_ft.__exit__(*sys.exc_info()) + raise - api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "") + record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc) - messages = openai_attrs.get("messages", []) - chat_completion_summary_dict = { - "id": chat_completion_id, - "appName": settings.app_name, - "conversation_id": conversation_id, - "span_id": span_id, - "trace_id": trace_id, - "transaction_id": transaction.guid, - "api_key_last_four_digits": api_key_last_four_digits, - "duration": self._nr_ft.duration, - "request.model": openai_attrs.get("request.model", ""), - # Usage tokens are not supported in streaming for now. - "request.temperature": openai_attrs.get("temperature", ""), - "request.max_tokens": openai_attrs.get("max_tokens", ""), - "vendor": "openAI", - "ingest_source": "Python", - "response.number_of_messages": len(messages) + (1 if content else 0), - "response.organization": organization, - "error": True, +def record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc): + chat_completion_id = str(uuid.uuid4()) + if OPENAI_V1: + response = getattr(exc, "response", "") + response_headers = getattr(response, "headers", "") + organization = response_headers.get("openai-organization", "") if response_headers else "" + # There appears to be a bug here in openai v1 where despite having code, + # param, etc in the error response, they are not populated on the exception + # object so grab them from the response body object instead. + body = getattr(exc, "body", {}) or {} + notice_error_attributes = { + "http.statusCode": getattr(exc, "status_code", "") or "", + "error.message": body.get("message", "") or "", + "error.code": body.get("code", "") or "", + "error.param": body.get("param", "") or "", + "completion_id": chat_completion_id, } - transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) + else: + organization = getattr(exc, "organization", "") + notice_error_attributes = { + "http.statusCode": getattr(exc, "http_status", ""), + "error.message": getattr(exc, "_message", ""), + "error.code": getattr(getattr(exc, "error", ""), "code", ""), + "error.param": getattr(exc, "param", ""), + "completion_id": chat_completion_id, + } + message = notice_error_attributes.pop("error.message") + if message: + exc._nr_message = message + self._nr_ft.notice_error( + attributes=notice_error_attributes, + ) + self._nr_ft.__exit__(*sys.exc_info()) + content = openai_attrs.get("content", None) + role = openai_attrs.get("role") - output_message_list = [] - if content: - output_message_list = [{"content": content, "role": role}] + custom_attrs_dict = transaction._custom_params + conversation_id = custom_attrs_dict.get("llm.conversation_id", "") - return create_chat_completion_message_event( - transaction, - settings.app_name, - list(messages), - chat_completion_id, - span_id, - trace_id, - openai_attrs.get("response.model", ""), - response_id, - request_id, - conversation_id, - output_message_list, - ) + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id", "") + trace_id = available_metadata.get("trace.id", "") - def record_streaming_chat_completion_events(self, transaction, openai_attrs): - content = openai_attrs.get("content", None) - role = openai_attrs.get("role") + response_headers = openai_attrs.get("response_headers", {}) + settings = transaction.settings if transaction.settings is not None else global_settings() + response_id = openai_attrs.get("id", None) + request_id = response_headers.get("x-request-id", "") - custom_attrs_dict = transaction._custom_params - conversation_id = custom_attrs_dict.get("llm.conversation_id", "") + api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "") - chat_completion_id = str(uuid.uuid4()) - available_metadata = get_trace_linking_metadata() - span_id = available_metadata.get("span.id", "") - trace_id = available_metadata.get("trace.id", "") + messages = openai_attrs.get("messages", []) - response_headers = openai_attrs.get("response_headers", {}) - settings = transaction.settings if transaction.settings is not None else global_settings() - response_id = openai_attrs.get("id", None) - request_id = response_headers.get("x-request-id", "") - organization = response_headers.get("openai-organization", "") + chat_completion_summary_dict = { + "id": chat_completion_id, + "appName": settings.app_name, + "conversation_id": conversation_id, + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction.guid, + "api_key_last_four_digits": api_key_last_four_digits, + "duration": self._nr_ft.duration, + "request.model": openai_attrs.get("request.model", ""), + # Usage tokens are not supported in streaming for now. + "request.temperature": openai_attrs.get("temperature", ""), + "request.max_tokens": openai_attrs.get("max_tokens", ""), + "vendor": "openAI", + "ingest_source": "Python", + "response.number_of_messages": len(messages) + (1 if content else 0), + "response.organization": organization, + "error": True, + } + transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) - api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "") + output_message_list = [] + if content: + output_message_list = [{"content": content, "role": role}] - messages = openai_attrs.get("messages", []) + return create_chat_completion_message_event( + transaction, + settings.app_name, + list(messages), + chat_completion_id, + span_id, + trace_id, + openai_attrs.get("response.model", ""), + response_id, + request_id, + conversation_id, + output_message_list, + ) - chat_completion_summary_dict = { - "id": chat_completion_id, - "appName": settings.app_name, - "conversation_id": conversation_id, - "span_id": span_id, - "trace_id": trace_id, - "transaction_id": transaction.guid, - "request_id": request_id, - "api_key_last_four_digits": api_key_last_four_digits, - "duration": self._nr_ft.duration, - "request.model": openai_attrs.get("request.model", ""), - "response.model": openai_attrs.get("response.model", ""), - "response.organization": organization, - # Usage tokens are not supported in streaming for now. - "request.temperature": openai_attrs.get("temperature", ""), - "request.max_tokens": openai_attrs.get("max_tokens", ""), - "response.choices.finish_reason": openai_attrs.get("finish_reason", ""), - "response.headers.llmVersion": response_headers.get("openai-version", ""), - "response.headers.ratelimitLimitRequests": check_rate_limit_header( - response_headers, "x-ratelimit-limit-requests", True - ), - "response.headers.ratelimitLimitTokens": check_rate_limit_header( - response_headers, "x-ratelimit-limit-tokens", True - ), - "response.headers.ratelimitResetTokens": check_rate_limit_header( - response_headers, "x-ratelimit-reset-tokens", False - ), - "response.headers.ratelimitResetRequests": check_rate_limit_header( - response_headers, "x-ratelimit-reset-requests", False - ), - "response.headers.ratelimitRemainingTokens": check_rate_limit_header( - response_headers, "x-ratelimit-remaining-tokens", True - ), - "response.headers.ratelimitRemainingRequests": check_rate_limit_header( - response_headers, "x-ratelimit-remaining-requests", True - ), - "vendor": "openAI", - "ingest_source": "Python", - "response.number_of_messages": len(messages) + (1 if content else 0), - } - transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) +def record_streaming_chat_completion_events(self, transaction, openai_attrs): + content = openai_attrs.get("content", None) + role = openai_attrs.get("role") - output_message_list = [] - if content: - output_message_list = [{"content": content, "role": role}] + custom_attrs_dict = transaction._custom_params + conversation_id = custom_attrs_dict.get("llm.conversation_id", "") - return create_chat_completion_message_event( - transaction, - settings.app_name, - list(messages), - chat_completion_id, - span_id, - trace_id, - openai_attrs.get("response.model", ""), - response_id, - request_id, - conversation_id, - output_message_list, - ) + chat_completion_id = str(uuid.uuid4()) + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id", "") + trace_id = available_metadata.get("trace.id", "") - def close(self): - return super(GeneratorProxy, self).close() + response_headers = openai_attrs.get("response_headers", {}) + settings = transaction.settings if transaction.settings is not None else global_settings() + response_id = openai_attrs.get("id", None) + request_id = response_headers.get("x-request-id", "") + organization = response_headers.get("openai-organization", "") + + api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "") + + messages = openai_attrs.get("messages", []) + + chat_completion_summary_dict = { + "id": chat_completion_id, + "appName": settings.app_name, + "conversation_id": conversation_id, + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction.guid, + "request_id": request_id, + "api_key_last_four_digits": api_key_last_four_digits, + "duration": self._nr_ft.duration, + "request.model": openai_attrs.get("request.model", ""), + "response.model": openai_attrs.get("response.model", ""), + "response.organization": organization, + # Usage tokens are not supported in streaming for now. + "request.temperature": openai_attrs.get("temperature", ""), + "request.max_tokens": openai_attrs.get("max_tokens", ""), + "response.choices.finish_reason": openai_attrs.get("finish_reason", ""), + "response.headers.llmVersion": response_headers.get("openai-version", ""), + "response.headers.ratelimitLimitRequests": check_rate_limit_header( + response_headers, "x-ratelimit-limit-requests", True + ), + "response.headers.ratelimitLimitTokens": check_rate_limit_header( + response_headers, "x-ratelimit-limit-tokens", True + ), + "response.headers.ratelimitResetTokens": check_rate_limit_header( + response_headers, "x-ratelimit-reset-tokens", False + ), + "response.headers.ratelimitResetRequests": check_rate_limit_header( + response_headers, "x-ratelimit-reset-requests", False + ), + "response.headers.ratelimitRemainingTokens": check_rate_limit_header( + response_headers, "x-ratelimit-remaining-tokens", True + ), + "response.headers.ratelimitRemainingRequests": check_rate_limit_header( + response_headers, "x-ratelimit-remaining-requests", True + ), + "vendor": "openAI", + "ingest_source": "Python", + "response.number_of_messages": len(messages) + (1 if content else 0), + } + + transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) + + output_message_list = [] + if content: + output_message_list = [{"content": content, "role": role}] + + return create_chat_completion_message_event( + transaction, + settings.app_name, + list(messages), + chat_completion_id, + span_id, + trace_id, + openai_attrs.get("response.model", ""), + response_id, + request_id, + conversation_id, + output_message_list, + ) class AsyncGeneratorProxy(ObjectProxy): @@ -1186,223 +1198,15 @@ async def __anext__(self): return_val = None try: return_val = await self._nr_wrapped_iter.__anext__() - if return_val: - choices = return_val.get("choices", []) - self._nr_openai_attrs["response.model"] = return_val.get("model", "") - self._nr_openai_attrs["id"] = return_val.get("id", "") - self._nr_openai_attrs["response.organization"] = return_val.get("organization", "") - if choices: - delta = choices[0].get("delta", {}) - if delta: - self._nr_openai_attrs["content"] = self._nr_openai_attrs.get("content", "") + delta.get( - "content", "" - ) - self._nr_openai_attrs["role"] = self._nr_openai_attrs.get("role", None) or delta.get("role") - self._nr_openai_attrs["finish_reason"] = choices[0].get("finish_reason", "") - self._nr_openai_attrs["response_headers"] = getattr(return_val, "_nr_response_headers", {}) + record_stream_chunk(self, return_val) except StopAsyncIteration as e: - if hasattr(self, "_nr_ft"): - openai_attrs = getattr(self, "_nr_openai_attrs", {}) - self._nr_ft.__exit__(None, None, None) - - # If there are no openai attrs exit early as there's no data to record. - if not openai_attrs: - raise - - message_ids = self.record_streaming_chat_completion_events(transaction, openai_attrs) - # Cache message ids on transaction for retrieval after open ai call completion. - if not hasattr(transaction, "_nr_message_ids"): - transaction._nr_message_ids = {} - response_id = openai_attrs.get("response_id", None) - transaction._nr_message_ids[response_id] = message_ids + record_events_on_stop_iteration(self, transaction) raise except Exception as exc: - if hasattr(self, "_nr_ft"): - openai_attrs = getattr(self, "_nr_openai_attrs", {}) - - # If there are no openai attrs exit early as there's no data to record. - if not openai_attrs: - self._nr_ft.__exit__(*sys.exc_info()) - raise - - self.record_streaming_chat_completion_events_error(transaction, openai_attrs, exc) + record_error(self, transaction, exc) raise return return_val - def record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc): - chat_completion_id = str(uuid.uuid4()) - if OPENAI_V1: - response = getattr(exc, "response", "") - response_headers = getattr(response, "headers", "") - organization = response_headers.get("openai-organization", "") if response_headers else "" - # There appears to be a bug here in openai v1 where despite having code, - # param, etc in the error response, they are not populated on the exception - # object so grab them from the response body object instead. - body = getattr(exc, "body", {}) or {} - notice_error_attributes = { - "http.statusCode": getattr(exc, "status_code", "") or "", - "error.message": body.get("message", "") or "", - "error.code": body.get("code", "") or "", - "error.param": body.get("param", "") or "", - "completion_id": chat_completion_id, - } - else: - organization = getattr(exc, "organization", "") - notice_error_attributes = { - "http.statusCode": getattr(exc, "http_status", ""), - "error.message": getattr(exc, "_message", ""), - "error.code": getattr(getattr(exc, "error", ""), "code", ""), - "error.param": getattr(exc, "param", ""), - "completion_id": chat_completion_id, - } - message = notice_error_attributes.pop("error.message") - if message: - exc._nr_message = message - self._nr_ft.notice_error( - attributes=notice_error_attributes, - ) - self._nr_ft.__exit__(*sys.exc_info()) - content = openai_attrs.get("content", None) - role = openai_attrs.get("role") - - custom_attrs_dict = transaction._custom_params - conversation_id = custom_attrs_dict.get("llm.conversation_id", "") - - available_metadata = get_trace_linking_metadata() - span_id = available_metadata.get("span.id", "") - trace_id = available_metadata.get("trace.id", "") - - response_headers = openai_attrs.get("response_headers", {}) - settings = transaction.settings if transaction.settings is not None else global_settings() - response_id = openai_attrs.get("id", None) - request_id = response_headers.get("x-request-id", "") - - api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "") - - messages = openai_attrs.get("messages", []) - - chat_completion_summary_dict = { - "id": chat_completion_id, - "appName": settings.app_name, - "conversation_id": conversation_id, - "span_id": span_id, - "trace_id": trace_id, - "transaction_id": transaction.guid, - "api_key_last_four_digits": api_key_last_four_digits, - "duration": self._nr_ft.duration, - "request.model": openai_attrs.get("request.model", ""), - # Usage tokens are not supported in streaming for now. - "request.temperature": openai_attrs.get("temperature", ""), - "request.max_tokens": openai_attrs.get("max_tokens", ""), - "vendor": "openAI", - "ingest_source": "Python", - "response.number_of_messages": len(messages) + (1 if content else 0), - "response.organization": organization, - "error": True, - } - transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) - - output_message_list = [] - if content: - output_message_list = [{"content": content, "role": role}] - - return create_chat_completion_message_event( - transaction, - settings.app_name, - list(messages), - chat_completion_id, - span_id, - trace_id, - openai_attrs.get("response.model", ""), - response_id, - request_id, - conversation_id, - output_message_list, - ) - - def record_streaming_chat_completion_events(self, transaction, openai_attrs): - content = openai_attrs.get("content", None) - role = openai_attrs.get("role") - - custom_attrs_dict = transaction._custom_params - conversation_id = custom_attrs_dict.get("llm.conversation_id", "") - - chat_completion_id = str(uuid.uuid4()) - available_metadata = get_trace_linking_metadata() - span_id = available_metadata.get("span.id", "") - trace_id = available_metadata.get("trace.id", "") - - response_headers = openai_attrs.get("response_headers", {}) - settings = transaction.settings if transaction.settings is not None else global_settings() - response_id = openai_attrs.get("id", None) - request_id = response_headers.get("x-request-id", "") - organization = response_headers.get("openai-organization", "") - - api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "") - - messages = openai_attrs.get("messages", []) - - chat_completion_summary_dict = { - "id": chat_completion_id, - "appName": settings.app_name, - "conversation_id": conversation_id, - "span_id": span_id, - "trace_id": trace_id, - "transaction_id": transaction.guid, - "request_id": request_id, - "api_key_last_four_digits": api_key_last_four_digits, - "duration": self._nr_ft.duration, - "request.model": openai_attrs.get("request.model", ""), - "response.model": openai_attrs.get("response.model", ""), - "response.organization": organization, - # Usage tokens are not supported in streaming for now. - "request.temperature": openai_attrs.get("temperature", ""), - "request.max_tokens": openai_attrs.get("max_tokens", ""), - "response.choices.finish_reason": openai_attrs.get("finish_reason", ""), - "response.headers.llmVersion": response_headers.get("openai-version", ""), - "response.headers.ratelimitLimitRequests": check_rate_limit_header( - response_headers, "x-ratelimit-limit-requests", True - ), - "response.headers.ratelimitLimitTokens": check_rate_limit_header( - response_headers, "x-ratelimit-limit-tokens", True - ), - "response.headers.ratelimitResetTokens": check_rate_limit_header( - response_headers, "x-ratelimit-reset-tokens", False - ), - "response.headers.ratelimitResetRequests": check_rate_limit_header( - response_headers, "x-ratelimit-reset-requests", False - ), - "response.headers.ratelimitRemainingTokens": check_rate_limit_header( - response_headers, "x-ratelimit-remaining-tokens", True - ), - "response.headers.ratelimitRemainingRequests": check_rate_limit_header( - response_headers, "x-ratelimit-remaining-requests", True - ), - "vendor": "openAI", - "ingest_source": "Python", - "response.number_of_messages": len(messages) + (1 if content else 0), - } - - transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) - - output_message_list = [] - if content: - output_message_list = [{"content": content, "role": role}] - - return create_chat_completion_message_event( - transaction, - settings.app_name, - list(messages), - chat_completion_id, - span_id, - trace_id, - openai_attrs.get("response.model", ""), - response_id, - request_id, - conversation_id, - output_message_list, - ) - async def aclose(self): return await super(AsyncGeneratorProxy, self).aclose() From 88babfef2fd2592efacfc142e1cb0bbc0d36e109 Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Tue, 13 Feb 2024 08:36:10 -0800 Subject: [PATCH 15/19] Fixup: tox merge conflicts --- tox.ini | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index c58e8d0e3..969f98548 100644 --- a/tox.ini +++ b/tox.ini @@ -207,7 +207,6 @@ deps = component_flask_rest: flask-restful component_flask_rest: jinja2 component_flask_rest: itsdangerous - component_flask_rest-flaskrestxlatest: flask component_flask_rest-flaskrestxlatest: flask-restx component_flask_rest-flaskrestxlatest: flask ; flask-restx only supports Flask v3 after flask-restx v1.3.0 @@ -402,7 +401,7 @@ commands = allowlist_externals={toxinidir}/.github/scripts/* install_command= - pip install {opts} {packages} + {toxinidir}/.github/scripts/retry.sh 3 pip install {opts} {packages} extras = agent_streaming: infinite-tracing From 82b8e3257a515e0fd2640cb4c530c6be75ac539e Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Tue, 13 Feb 2024 10:32:18 -0800 Subject: [PATCH 16/19] Use fixture --- tests/mlmodel_openai/conftest.py | 103 +++++++++++++++++-------------- 1 file changed, 56 insertions(+), 47 deletions(-) diff --git a/tests/mlmodel_openai/conftest.py b/tests/mlmodel_openai/conftest.py index 36a0f08dd..976ba7875 100644 --- a/tests/mlmodel_openai/conftest.py +++ b/tests/mlmodel_openai/conftest.py @@ -150,6 +150,7 @@ def openai_server( wrap_openai_api_requestor_request, wrap_openai_api_requestor_interpret_response, wrap_httpx_client_send, + wrap_engine_api_resource_create, ): """ This fixture will either create a mocked backend for testing purposes, or will @@ -283,60 +284,68 @@ def bind_request_interpret_response_params(result, stream): return result.content.decode("utf-8"), result.status_code, result.headers -class GeneratorProxy(ObjectProxy): - def __init__(self, wrapped): - super(GeneratorProxy, self).__init__(wrapped) +@pytest.fixture(scope="session") +def generator_proxy(): + class GeneratorProxy(ObjectProxy): + def __init__(self, wrapped): + super(GeneratorProxy, self).__init__(wrapped) + + def __iter__(self): + return self + + # Make this Proxy a pass through to our instrumentation's proxy by passing along + # get attr and set attr calls to our instrumentation's proxy. + def __getattr__(self, attr): + return self.__wrapped__.__getattr__(attr) + + def __setattr__(self, attr, value): + return self.__wrapped__.__setattr__(attr, value) + + def __next__(self): + transaction = current_transaction() + if not transaction: + return self.__wrapped__.__next__() + + try: + return_val = self.__wrapped__.__next__() + if return_val: + prompt = [k for k in OPENAI_AUDIT_LOG_CONTENTS.keys()][-1] + headers = dict( + filter( + lambda k: k[0].lower() in RECORDED_HEADERS + or k[0].lower().startswith("openai") + or k[0].lower().startswith("x-ratelimit"), + return_val._nr_response_headers.items(), + ) + ) + OPENAI_AUDIT_LOG_CONTENTS[prompt][0] = headers + OPENAI_AUDIT_LOG_CONTENTS[prompt][2].append(return_val.to_dict_recursive()) + return return_val + except Exception as e: + raise - def __iter__(self): - return self + def close(self): + return super(GeneratorProxy, self).close() - # Make this Proxy a pass through to our instrumentation's proxy by passing along - # get attr and set attr calls to our instrumentation's proxy. - def __getattr__(self, attr): - return self.__wrapped__.__getattr__(attr) + return GeneratorProxy - def __setattr__(self, attr, value): - return self.__wrapped__.__setattr__(attr, value) - def __next__(self): +@pytest.fixture(scope="session") +def wrap_engine_api_resource_create(generator_proxy): + def _wrap_engine_api_resource_create(wrapped, instance, args, kwargs): transaction = current_transaction() - if not transaction: - return self.__wrapped__.__next__() - - try: - return_val = self.__wrapped__.__next__() - if return_val: - prompt = [k for k in OPENAI_AUDIT_LOG_CONTENTS.keys()][-1] - headers = dict( - filter( - lambda k: k[0].lower() in RECORDED_HEADERS - or k[0].lower().startswith("openai") - or k[0].lower().startswith("x-ratelimit"), - return_val._nr_response_headers.items(), - ) - ) - OPENAI_AUDIT_LOG_CONTENTS[prompt][0] = headers - OPENAI_AUDIT_LOG_CONTENTS[prompt][2].append(return_val.to_dict_recursive()) - return return_val - except Exception as e: - raise - - def close(self): - return super(GeneratorProxy, self).close() + if not transaction: + return wrapped(*args, **kwargs) -def wrap_engine_api_resource_create(wrapped, instance, args, kwargs): - transaction = current_transaction() - - if not transaction: - return wrapped(*args, **kwargs) + bound_args = bind_args(wrapped, args, kwargs) + stream = bound_args["params"].get("stream", False) - bound_args = bind_args(wrapped, args, kwargs) - stream = bound_args["params"].get("stream", False) + return_val = wrapped(*args, **kwargs) - return_val = wrapped(*args, **kwargs) + if stream: + return generator_proxy(return_val) + else: + return return_val - if stream: - return GeneratorProxy(return_val) - else: - return return_val + return _wrap_engine_api_resource_create From b38b0675176c5ae34984e79c7b948bc44c6085e2 Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Tue, 13 Feb 2024 10:39:04 -0800 Subject: [PATCH 17/19] Remove 3.8 from langchain tests --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 969f98548..acc0c41bd 100644 --- a/tox.ini +++ b/tox.ini @@ -145,7 +145,7 @@ envlist = python-mlmodel_openai-openai107-{py312}, python-mlmodel_openai-openailatest-{py37,py38,py39,py310,py311,py312,pypy38}, ; langchain dependency faiss-cpu isn't compatible with 3.12 yet. - python-mlmodel_langchain-{py38,py39,py310,py311,pypy38}, + python-mlmodel_langchain-{py39,py310,py311}, python-logger_logging-{py27,py37,py38,py39,py310,py311,py312,pypy27,pypy38}, python-logger_loguru-{py37,py38,py39,py310,py311,py312,pypy38}-logurulatest, python-logger_loguru-py39-loguru{06,05}, From 974e5646f8c576b9bd4db255c30d0e8ab2009e44 Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Wed, 14 Feb 2024 16:47:02 -0800 Subject: [PATCH 18/19] Add empty line between lines --- tests/mlmodel_openai/_mock_external_openai_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py index e1ed8271d..17ce71ada 100644 --- a/tests/mlmodel_openai/_mock_external_openai_server.py +++ b/tests/mlmodel_openai/_mock_external_openai_server.py @@ -666,7 +666,7 @@ def _simple_get(self): # Force a parsing error by writing an invalid streamed response. self.wfile.write(b"data: %s" % data) else: - self.wfile.write(b"data: %s\n" % data) + self.wfile.write(b"data: %s\n\n" % data) else: self.wfile.write(json.dumps(response).encode("utf-8")) return From d61d0655d93daf23e1abfec14fb414cef415e257 Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Thu, 15 Feb 2024 09:01:06 -0800 Subject: [PATCH 19/19] Remove unneeded loop fixture --- tests/mlmodel_openai/test_chat_completion_stream_error.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py index 267f97e96..15fb1512d 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py @@ -693,7 +693,7 @@ async def consumer(): @validate_custom_events(expected_events_stream_parsing_error) @validate_custom_event_count(count=2) @background_task() -def test_chat_completion_stream_parsing_error(loop, monkeypatch, set_trace_info): +def test_chat_completion_stream_parsing_error(monkeypatch, set_trace_info): with pytest.raises(openai.error.APIError): set_trace_info()