From b8c3a38caaba9fa3644831171f30627ee1cd5b22 Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Mon, 18 Dec 2023 10:20:12 -0800
Subject: [PATCH 01/19] Mark instrumentation points for SDK (#1009)

* Mark instrumentation points for SDK

* Remove duplicated assertion

* Fixup: assert attribute not function

---------

Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
---
 newrelic/hooks/external_botocore.py           |  7 +++++
 newrelic/hooks/mlmodel_openai.py              | 31 +++++++++++++------
 .../test_bedrock_chat_completion.py           |  4 +++
 .../test_bedrock_embeddings.py                |  6 +++-
 tests/mlmodel_openai/test_chat_completion.py  |  5 +++
 tests/mlmodel_openai/test_embeddings.py       |  5 +++
 6 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py
index 2a327a84a..5a7c2e56c 100644
--- a/newrelic/hooks/external_botocore.py
+++ b/newrelic/hooks/external_botocore.py
@@ -592,6 +592,12 @@ def _nr_clientcreator__create_api_method_(wrapped, instance, args, kwargs):
     return tracer(wrapped)
 
 
+def _nr_clientcreator__create_methods(wrapped, instance, args, kwargs):
+    class_attributes = wrapped(*args, **kwargs)
+    class_attributes["_nr_wrapped"] = True
+    return class_attributes
+
+
 def _bind_make_request_params(operation_model, request_dict, *args, **kwargs):
     return operation_model, request_dict
 
@@ -622,3 +628,4 @@ def instrument_botocore_endpoint(module):
 
 def instrument_botocore_client(module):
     wrap_function_wrapper(module, "ClientCreator._create_api_method", _nr_clientcreator__create_api_method_)
+    wrap_function_wrapper(module, "ClientCreator._create_methods", _nr_clientcreator__create_methods)
diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
index e200e80f0..40eb4f61c 100644
--- a/newrelic/hooks/mlmodel_openai.py
+++ b/newrelic/hooks/mlmodel_openai.py
@@ -884,21 +884,33 @@ async def wrap_base_client_process_response_async(wrapped, instance, args, kwarg
 
 
 def instrument_openai_util(module):
-    wrap_function_wrapper(module, "convert_to_openai_object", wrap_convert_to_openai_object)
+    if hasattr(module, "convert_to_openai_object"):
+        wrap_function_wrapper(module, "convert_to_openai_object", wrap_convert_to_openai_object)
+        # This is to mark where we instrument so the SDK knows not to instrument them
+        # again.
+        setattr(module.convert_to_openai_object, "_nr_wrapped", True)
 
 
 def instrument_openai_api_resources_embedding(module):
-    if hasattr(module.Embedding, "create"):
-        wrap_function_wrapper(module, "Embedding.create", wrap_embedding_sync)
-    if hasattr(module.Embedding, "acreate"):
-        wrap_function_wrapper(module, "Embedding.acreate", wrap_embedding_async)
+    if hasattr(module, "Embedding"):
+        if hasattr(module.Embedding, "create"):
+            wrap_function_wrapper(module, "Embedding.create", wrap_embedding_sync)
+        if hasattr(module.Embedding, "acreate"):
+            wrap_function_wrapper(module, "Embedding.acreate", wrap_embedding_async)
+        # This is to mark where we instrument so the SDK knows not to instrument them
+        # again.
+        setattr(module.Embedding, "_nr_wrapped", True)
 
 
 def instrument_openai_api_resources_chat_completion(module):
-    if hasattr(module.ChatCompletion, "create"):
-        wrap_function_wrapper(module, "ChatCompletion.create", wrap_chat_completion_sync)
-    if hasattr(module.ChatCompletion, "acreate"):
-        wrap_function_wrapper(module, "ChatCompletion.acreate", wrap_chat_completion_async)
+    if hasattr(module, "ChatCompletion"):
+        if hasattr(module.ChatCompletion, "create"):
+            wrap_function_wrapper(module, "ChatCompletion.create", wrap_chat_completion_sync)
+        if hasattr(module.ChatCompletion, "acreate"):
+            wrap_function_wrapper(module, "ChatCompletion.acreate", wrap_chat_completion_async)
+        # This is to mark where we instrument so the SDK knows not to instrument them
+        # again.
+        setattr(module.ChatCompletion, "_nr_wrapped", True)
 
 
 def instrument_openai_resources_chat_completions(module):
@@ -908,7 +920,6 @@ def instrument_openai_resources_chat_completions(module):
         wrap_function_wrapper(module, "AsyncCompletions.create", wrap_chat_completion_async)
 
 
-# OpenAI v1 instrumentation points
 def instrument_openai_resources_embeddings(module):
     if hasattr(module, "Embeddings"):
         if hasattr(module.Embeddings, "create"):
diff --git a/tests/external_botocore/test_bedrock_chat_completion.py b/tests/external_botocore/test_bedrock_chat_completion.py
index 604771c82..efcc7cec0 100644
--- a/tests/external_botocore/test_bedrock_chat_completion.py
+++ b/tests/external_botocore/test_bedrock_chat_completion.py
@@ -287,3 +287,7 @@ def _test():
             exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100)
 
     _test()
+
+
+def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server):
+    assert bedrock_server._nr_wrapped
diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py
index 7a5740e46..cc442fc15 100644
--- a/tests/external_botocore/test_bedrock_embeddings.py
+++ b/tests/external_botocore/test_bedrock_embeddings.py
@@ -1,4 +1,4 @@
- # Copyright 2010 New Relic, Inc.
+# Copyright 2010 New Relic, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -172,3 +172,7 @@ def _test():
             exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100)
 
     _test()
+
+
+def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server):
+    assert bedrock_server._nr_wrapped
diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py
index f2c31b262..e141e45e5 100644
--- a/tests/mlmodel_openai/test_chat_completion.py
+++ b/tests/mlmodel_openai/test_chat_completion.py
@@ -371,3 +371,8 @@ def test_openai_chat_completion_async_disabled_custom_event_settings(loop):
             model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100
         )
     )
+
+
+def test_openai_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility():
+    assert openai.ChatCompletion._nr_wrapped
+    assert openai.util.convert_to_openai_object._nr_wrapped
diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py
index ae2c048fc..65ac33e87 100644
--- a/tests/mlmodel_openai/test_embeddings.py
+++ b/tests/mlmodel_openai/test_embeddings.py
@@ -148,3 +148,8 @@ def test_openai_embedding_async_disabled_custom_insights_events(loop):
     loop.run_until_complete(
         openai.Embedding.acreate(input="This is an embedding test.", model="text-embedding-ada-002")
     )
+
+
+def test_openai_embedding_functions_marked_as_wrapped_for_sdk_compatibility():
+    assert openai.Embedding._nr_wrapped
+    assert openai.util.convert_to_openai_object._nr_wrapped

From 0bb34db25c5c729e374ba95256d8fa20b191475f Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Wed, 20 Dec 2023 16:14:34 -0800
Subject: [PATCH 02/19] Prefix conversation id with llm (#1012)

* Change conversation_id->llm.conversation_id

* Fixup formatting
---
 newrelic/hooks/external_botocore.py                  |  2 +-
 newrelic/hooks/mlmodel_openai.py                     |  4 ++--
 .../test_bedrock_chat_completion.py                  |  9 ++++-----
 tests/mlmodel_openai/test_chat_completion.py         |  6 +++---
 tests/mlmodel_openai/test_chat_completion_error.py   | 12 ++++++------
 .../mlmodel_openai/test_chat_completion_error_v1.py  |  8 ++++----
 tests/mlmodel_openai/test_chat_completion_v1.py      |  6 +++---
 tests/mlmodel_openai/test_get_llm_message_ids.py     | 10 +++++++---
 tests/mlmodel_openai/test_get_llm_message_ids_v1.py  |  4 ++--
 9 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py
index 5a7c2e56c..33ba94e21 100644
--- a/newrelic/hooks/external_botocore.py
+++ b/newrelic/hooks/external_botocore.py
@@ -511,7 +511,7 @@ def handle_chat_completion_event(
     span_id,
 ):
     custom_attrs_dict = transaction._custom_params
-    conversation_id = custom_attrs_dict.get("conversation_id", "")
+    conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
 
     chat_completion_id = str(uuid.uuid4())
 
diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
index 40eb4f61c..07c1d75bb 100644
--- a/newrelic/hooks/mlmodel_openai.py
+++ b/newrelic/hooks/mlmodel_openai.py
@@ -193,7 +193,7 @@ def wrap_chat_completion_sync(wrapped, instance, args, kwargs):
 
     # Get conversation ID off of the transaction
     custom_attrs_dict = transaction._custom_params
-    conversation_id = custom_attrs_dict.get("conversation_id", "")
+    conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
 
     settings = transaction.settings if transaction.settings is not None else global_settings()
     app_name = settings.app_name
@@ -654,7 +654,7 @@ async def wrap_chat_completion_async(wrapped, instance, args, kwargs):
 
     # Get conversation ID off of the transaction
     custom_attrs_dict = transaction._custom_params
-    conversation_id = custom_attrs_dict.get("conversation_id", "")
+    conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
 
     settings = transaction.settings if transaction.settings is not None else global_settings()
     app_name = settings.app_name
diff --git a/tests/external_botocore/test_bedrock_chat_completion.py b/tests/external_botocore/test_bedrock_chat_completion.py
index efcc7cec0..2c4925a43 100644
--- a/tests/external_botocore/test_bedrock_chat_completion.py
+++ b/tests/external_botocore/test_bedrock_chat_completion.py
@@ -23,7 +23,6 @@
     chat_completion_expected_events,
     chat_completion_invalid_access_key_error_events,
     chat_completion_payload_templates,
-    chat_completion_invalid_access_key_error_events,
 )
 from conftest import BOTOCORE_VERSION
 from testing_support.fixtures import (
@@ -128,7 +127,7 @@ def test_bedrock_chat_completion_in_txn_with_convo_id(set_trace_info, exercise_m
     @background_task(name="test_bedrock_chat_completion_in_txn_with_convo_id")
     def _test():
         set_trace_info()
-        add_custom_attribute("conversation_id", "my-awesome-id")
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
         exercise_model(prompt=_test_bedrock_chat_completion_prompt, temperature=0.7, max_tokens=100)
 
     _test()
@@ -160,7 +159,7 @@ def _test():
 @reset_core_stats_engine()
 @validate_custom_event_count(count=0)
 def test_bedrock_chat_completion_outside_txn(set_trace_info, exercise_model):
-    add_custom_attribute("conversation_id", "my-awesome-id")
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
     exercise_model(prompt=_test_bedrock_chat_completion_prompt, temperature=0.7, max_tokens=100)
 
 
@@ -237,7 +236,7 @@ def test_bedrock_chat_completion_error_invalid_model(bedrock_server, set_trace_i
     @background_task(name="test_bedrock_chat_completion_error_invalid_model")
     def _test():
         set_trace_info()
-        add_custom_attribute("conversation_id", "my-awesome-id")
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
         with pytest.raises(_client_error):
             bedrock_server.invoke_model(
                 body=b"{}",
@@ -283,7 +282,7 @@ def _test():
 
         with pytest.raises(_client_error):  # not sure where this exception actually comes from
             set_trace_info()
-            add_custom_attribute("conversation_id", "my-awesome-id")
+            add_custom_attribute("llm.conversation_id", "my-awesome-id")
             exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100)
 
     _test()
diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py
index e141e45e5..76017a22a 100644
--- a/tests/mlmodel_openai/test_chat_completion.py
+++ b/tests/mlmodel_openai/test_chat_completion.py
@@ -146,7 +146,7 @@
 @background_task()
 def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info):
     set_trace_info()
-    add_custom_attribute("conversation_id", "my-awesome-id")
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
     openai.ChatCompletion.create(
         model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100
     )
@@ -272,7 +272,7 @@ def test_openai_chat_completion_sync_in_txn_no_convo_id(set_trace_info):
 @reset_core_stats_engine()
 @validate_custom_event_count(count=0)
 def test_openai_chat_completion_sync_outside_txn():
-    add_custom_attribute("conversation_id", "my-awesome-id")
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
     openai.ChatCompletion.create(
         model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100
     )
@@ -335,7 +335,7 @@ def test_openai_chat_completion_async_conversation_id_unset(loop, set_trace_info
 @background_task()
 def test_openai_chat_completion_async_conversation_id_set(loop, set_trace_info):
     set_trace_info()
-    add_custom_attribute("conversation_id", "my-awesome-id")
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
 
     loop.run_until_complete(
         openai.ChatCompletion.acreate(
diff --git a/tests/mlmodel_openai/test_chat_completion_error.py b/tests/mlmodel_openai/test_chat_completion_error.py
index fe298c02b..a8d3bdc51 100644
--- a/tests/mlmodel_openai/test_chat_completion_error.py
+++ b/tests/mlmodel_openai/test_chat_completion_error.py
@@ -131,7 +131,7 @@
 def test_chat_completion_invalid_request_error_no_model(set_trace_info):
     with pytest.raises(openai.InvalidRequestError):
         set_trace_info()
-        add_custom_attribute("conversation_id", "my-awesome-id")
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
         openai.ChatCompletion.create(
             # no model provided,
             messages=_test_openai_chat_completion_messages,
@@ -215,7 +215,7 @@ def test_chat_completion_invalid_request_error_no_model(set_trace_info):
 def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
     with pytest.raises(openai.InvalidRequestError):
         set_trace_info()
-        add_custom_attribute("conversation_id", "my-awesome-id")
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
         openai.ChatCompletion.create(
             model="does-not-exist",
             messages=({"role": "user", "content": "Model does not exist."},),
@@ -315,7 +315,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
 def test_chat_completion_authentication_error(monkeypatch, set_trace_info):
     with pytest.raises(openai.error.AuthenticationError):
         set_trace_info()
-        add_custom_attribute("conversation_id", "my-awesome-id")
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
         monkeypatch.setattr(openai, "api_key", None)  # openai.api_key = None
         openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
@@ -439,7 +439,7 @@ def test_chat_completion_wrong_api_key_error(monkeypatch, set_trace_info):
 def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_info):
     with pytest.raises(openai.InvalidRequestError):
         set_trace_info()
-        add_custom_attribute("conversation_id", "my-awesome-id")
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
         loop.run_until_complete(
             openai.ChatCompletion.acreate(
                 # no model provided,
@@ -481,7 +481,7 @@ def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_in
 def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_trace_info):
     with pytest.raises(openai.InvalidRequestError):
         set_trace_info()
-        add_custom_attribute("conversation_id", "my-awesome-id")
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
         loop.run_until_complete(
             openai.ChatCompletion.acreate(
                 model="does-not-exist",
@@ -520,7 +520,7 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra
 def test_chat_completion_authentication_error_async(loop, monkeypatch, set_trace_info):
     with pytest.raises(openai.error.AuthenticationError):
         set_trace_info()
-        add_custom_attribute("conversation_id", "my-awesome-id")
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
         monkeypatch.setattr(openai, "api_key", None)  # openai.api_key = None
         loop.run_until_complete(
             openai.ChatCompletion.acreate(
diff --git a/tests/mlmodel_openai/test_chat_completion_error_v1.py b/tests/mlmodel_openai/test_chat_completion_error_v1.py
index 70dc58f99..670689c92 100644
--- a/tests/mlmodel_openai/test_chat_completion_error_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_error_v1.py
@@ -127,7 +127,7 @@
 def test_chat_completion_invalid_request_error_no_model(set_trace_info, sync_openai_client):
     with pytest.raises(TypeError):
         set_trace_info()
-        add_custom_attribute("conversation_id", "my-awesome-id")
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
         sync_openai_client.chat.completions.create(
             messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100
         )
@@ -160,7 +160,7 @@ def test_chat_completion_invalid_request_error_no_model(set_trace_info, sync_ope
 def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_info, async_openai_client):
     with pytest.raises(TypeError):
         set_trace_info()
-        add_custom_attribute("conversation_id", "my-awesome-id")
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
         loop.run_until_complete(
             async_openai_client.chat.completions.create(
                 messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100
@@ -242,7 +242,7 @@ def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_in
 def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, sync_openai_client):
     with pytest.raises(openai.NotFoundError):
         set_trace_info()
-        add_custom_attribute("conversation_id", "my-awesome-id")
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
         sync_openai_client.chat.completions.create(
             model="does-not-exist",
             messages=({"role": "user", "content": "Model does not exist."},),
@@ -281,7 +281,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn
 def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_trace_info, async_openai_client):
     with pytest.raises(openai.NotFoundError):
         set_trace_info()
-        add_custom_attribute("conversation_id", "my-awesome-id")
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
         loop.run_until_complete(
             async_openai_client.chat.completions.create(
                 model="does-not-exist",
diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py
index 4df977a6c..b1b35826c 100644
--- a/tests/mlmodel_openai/test_chat_completion_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_v1.py
@@ -146,7 +146,7 @@
 @background_task()
 def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info, sync_openai_client):
     set_trace_info()
-    add_custom_attribute("conversation_id", "my-awesome-id")
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
     sync_openai_client.chat.completions.create(
         model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100
     )
@@ -272,7 +272,7 @@ def test_openai_chat_completion_sync_in_txn_no_convo_id(set_trace_info, sync_ope
 @reset_core_stats_engine()
 @validate_custom_event_count(count=0)
 def test_openai_chat_completion_sync_outside_txn(sync_openai_client):
-    add_custom_attribute("conversation_id", "my-awesome-id")
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
     sync_openai_client.chat.completions.create(
         model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100
     )
@@ -335,7 +335,7 @@ def test_openai_chat_completion_async_conversation_id_unset(loop, set_trace_info
 @background_task()
 def test_openai_chat_completion_async_conversation_id_set(loop, set_trace_info, async_openai_client):
     set_trace_info()
-    add_custom_attribute("conversation_id", "my-awesome-id")
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
 
     loop.run_until_complete(
         async_openai_client.chat.completions.create(
diff --git a/tests/mlmodel_openai/test_get_llm_message_ids.py b/tests/mlmodel_openai/test_get_llm_message_ids.py
index af073f730..8489f4f3d 100644
--- a/tests/mlmodel_openai/test_get_llm_message_ids.py
+++ b/tests/mlmodel_openai/test_get_llm_message_ids.py
@@ -13,10 +13,14 @@
 # limitations under the License.
 
 import openai
+from testing_support.fixtures import (
+    reset_core_stats_engine,
+    validate_custom_event_count,
+)
+
 from newrelic.api.background_task import background_task
 from newrelic.api.ml_model import get_llm_message_ids, record_llm_feedback_event
 from newrelic.api.transaction import add_custom_attribute, current_transaction
-from testing_support.fixtures import reset_core_stats_engine, validate_custom_event_count
 
 _test_openai_chat_completion_messages_1 = (
     {"role": "system", "content": "You are a scientist."},
@@ -114,7 +118,7 @@ def test_get_llm_message_ids_outside_transaction():
 @background_task()
 def test_get_llm_message_ids_mulitple_async(loop, set_trace_info):
     set_trace_info()
-    add_custom_attribute("conversation_id", "my-awesome-id")
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
 
     async def _run():
         res1 = await openai.ChatCompletion.acreate(
@@ -172,7 +176,7 @@ async def _run():
 @background_task()
 def test_get_llm_message_ids_mulitple_sync(set_trace_info):
     set_trace_info()
-    add_custom_attribute("conversation_id", "my-awesome-id")
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
 
     results = openai.ChatCompletion.create(
         model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages_1, temperature=0.7, max_tokens=100
diff --git a/tests/mlmodel_openai/test_get_llm_message_ids_v1.py b/tests/mlmodel_openai/test_get_llm_message_ids_v1.py
index f85a26c2a..094ddcd5a 100644
--- a/tests/mlmodel_openai/test_get_llm_message_ids_v1.py
+++ b/tests/mlmodel_openai/test_get_llm_message_ids_v1.py
@@ -116,7 +116,7 @@ def test_get_llm_message_ids_outside_transaction():
 @background_task()
 def test_get_llm_message_ids_mulitple_async(loop, set_trace_info, async_openai_client):
     set_trace_info()
-    add_custom_attribute("conversation_id", "my-awesome-id")
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
 
     async def _run():
         res1 = await async_openai_client.chat.completions.create(
@@ -174,7 +174,7 @@ async def _run():
 @background_task()
 def test_get_llm_message_ids_mulitple_sync(set_trace_info, sync_openai_client):
     set_trace_info()
-    add_custom_attribute("conversation_id", "my-awesome-id")
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
 
     results = sync_openai_client.chat.completions.create(
         model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages_1, temperature=0.7, max_tokens=100

From dbcbca57a41551c521278f3404e379f4786da4db Mon Sep 17 00:00:00 2001
From: Uma Annamalai <uannamalai@newrelic.com>
Date: Thu, 21 Dec 2023 10:38:39 -0800
Subject: [PATCH 03/19] Add support for Meta Llama2. (#1010)

* Add support for Llama2.

* Fixup: lint errors

* [Mega-Linter] Apply linters fixes

* Trigger tests

---------

Co-authored-by: Hannah Stepanek <hstepanek@newrelic.com>
Co-authored-by: hmstepanek <hmstepanek@users.noreply.github.com>
---
 newrelic/hooks/external_botocore.py           |  43 ++++++-
 newrelic/hooks/mlmodel_openai.py              |   2 +-
 .../_mock_external_bedrock_server.py          |  21 +++-
 .../_test_bedrock_chat_completion.py          | 115 ++++++++++++++++++
 .../test_bedrock_chat_completion.py           |   1 +
 5 files changed, 176 insertions(+), 6 deletions(-)

diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py
index 33ba94e21..6e3be661b 100644
--- a/newrelic/hooks/external_botocore.py
+++ b/newrelic/hooks/external_botocore.py
@@ -144,7 +144,7 @@ def create_chat_completion_message_event(
             "response.model": request_model,
             "vendor": "bedrock",
             "ingest_source": "Python",
-            "is_response": True
+            "is_response": True,
         }
         transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict)
 
@@ -246,7 +246,7 @@ def extract_bedrock_claude_model(request_body, response_body=None):
     chat_completion_summary_dict = {
         "request.max_tokens": request_body.get("max_tokens_to_sample", ""),
         "request.temperature": request_body.get("temperature", ""),
-        "response.number_of_messages": len(input_message_list)
+        "response.number_of_messages": len(input_message_list),
     }
 
     if response_body:
@@ -264,6 +264,40 @@ def extract_bedrock_claude_model(request_body, response_body=None):
     return input_message_list, output_message_list, chat_completion_summary_dict
 
 
+def extract_bedrock_llama_model(request_body, response_body=None):
+    request_body = json.loads(request_body)
+    if response_body:
+        response_body = json.loads(response_body)
+
+    input_message_list = [{"role": "user", "content": request_body.get("prompt", "")}]
+
+    chat_completion_summary_dict = {
+        "request.max_tokens": request_body.get("max_gen_len", ""),
+        "request.temperature": request_body.get("temperature", ""),
+        "response.number_of_messages": len(input_message_list),
+    }
+
+    if response_body:
+        output_message_list = [{"role": "assistant", "content": response_body.get("generation", "")}]
+        prompt_tokens = response_body.get("prompt_token_count", None)
+        completion_tokens = response_body.get("generation_token_count", None)
+        total_tokens = prompt_tokens + completion_tokens if prompt_tokens and completion_tokens else None
+
+        chat_completion_summary_dict.update(
+            {
+                "response.usage.completion_tokens": completion_tokens,
+                "response.usage.prompt_tokens": prompt_tokens,
+                "response.usage.total_tokens": total_tokens,
+                "response.choices.finish_reason": response_body.get("stop_reason", ""),
+                "response.number_of_messages": len(input_message_list) + len(output_message_list),
+            }
+        )
+    else:
+        output_message_list = []
+
+    return input_message_list, output_message_list, chat_completion_summary_dict
+
+
 def extract_bedrock_cohere_model(request_body, response_body=None):
     request_body = json.loads(request_body)
     if response_body:
@@ -274,7 +308,7 @@ def extract_bedrock_cohere_model(request_body, response_body=None):
     chat_completion_summary_dict = {
         "request.max_tokens": request_body.get("max_tokens", ""),
         "request.temperature": request_body.get("temperature", ""),
-        "response.number_of_messages": len(input_message_list)
+        "response.number_of_messages": len(input_message_list),
     }
 
     if response_body:
@@ -300,6 +334,7 @@ def extract_bedrock_cohere_model(request_body, response_body=None):
     ("ai21.j2", extract_bedrock_ai21_j2_model),
     ("cohere", extract_bedrock_cohere_model),
     ("anthropic.claude", extract_bedrock_claude_model),
+    ("meta.llama2", extract_bedrock_llama_model),
 ]
 
 
@@ -368,7 +403,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs):
                 notice_error_attributes = {
                     "http.statusCode": error_attributes["http.statusCode"],
                     "error.message": error_attributes["error.message"],
-                    "error.code": error_attributes["error.code"]
+                    "error.code": error_attributes["error.code"],
                 }
 
                 if is_embedding:
diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
index 07c1d75bb..7b0ecbe4f 100644
--- a/newrelic/hooks/mlmodel_openai.py
+++ b/newrelic/hooks/mlmodel_openai.py
@@ -868,7 +868,7 @@ def wrap_base_client_process_response_sync(wrapped, instance, args, kwargs):
     nr_response_headers = getattr(response, "headers")
 
     return_val = wrapped(*args, **kwargs)
-    # Obtain reponse headers for v1
+    # Obtain response headers for v1
     return_val._nr_response_headers = nr_response_headers
     return return_val
 
diff --git a/tests/external_botocore/_mock_external_bedrock_server.py b/tests/external_botocore/_mock_external_bedrock_server.py
index da5ff68dd..609e7afa9 100644
--- a/tests/external_botocore/_mock_external_bedrock_server.py
+++ b/tests/external_botocore/_mock_external_bedrock_server.py
@@ -3332,6 +3332,16 @@
             "prompt": "What is 212 degrees Fahrenheit converted to Celsius?",
         },
     ],
+    "meta.llama2-13b-chat-v1::What is 212 degrees Fahrenheit converted to Celsius?": [
+        {"Content-Type": "application/json", "x-amzn-RequestId": "9a64cdb0-3e82-41c7-873a-c12a77e0143a"},
+        200,
+        {
+            "generation": " Here's the answer:\n\n212°F = 100°C\n\nSo, 212 degrees Fahrenheit is equal to 100 degrees Celsius.",
+            "prompt_token_count": 17,
+            "generation_token_count": 46,
+            "stop_reason": "stop",
+        },
+    ],
     "does-not-exist::": [
         {
             "Content-Type": "application/json",
@@ -3395,6 +3405,15 @@
         403,
         {"message": "The security token included in the request is invalid."},
     ],
+    "meta.llama2-13b-chat-v1::Invalid Token": [
+        {
+            "Content-Type": "application/json",
+            "x-amzn-RequestId": "22476490-a0d6-42db-b5ea-32d0b8a7f751",
+            "x-amzn-ErrorType": "UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/",
+        },
+        403,
+        {"message": "The security token included in the request is invalid."},
+    ],
 }
 
 MODEL_PATH_RE = re.compile(r"/model/([^/]+)/invoke")
@@ -3454,7 +3473,7 @@ def __init__(self, handler=simple_get, port=None, *args, **kwargs):
 if __name__ == "__main__":
     # Use this to sort dict for easier future incremental updates
     print("RESPONSES = %s" % dict(sorted(RESPONSES.items(), key=lambda i: (i[1][1], i[0]))))
-    
+
     with MockExternalBedrockServer() as server:
         print("MockExternalBedrockServer serving on port %s" % str(server.port))
         while True:
diff --git a/tests/external_botocore/_test_bedrock_chat_completion.py b/tests/external_botocore/_test_bedrock_chat_completion.py
index e3f53fd31..f1d21c73c 100644
--- a/tests/external_botocore/_test_bedrock_chat_completion.py
+++ b/tests/external_botocore/_test_bedrock_chat_completion.py
@@ -3,6 +3,7 @@
     "ai21.j2-mid-v1": '{"prompt": "%s", "temperature": %f, "maxTokens": %d}',
     "anthropic.claude-instant-v1": '{"prompt": "Human: %s Assistant:", "temperature": %f, "max_tokens_to_sample": %d}',
     "cohere.command-text-v14": '{"prompt": "%s", "temperature": %f, "max_tokens": %d}',
+    "meta.llama2-13b-chat-v1": '{"prompt": "%s", "temperature": %f, "max_gen_len": %d}',
 }
 
 chat_completion_expected_events = {
@@ -263,6 +264,72 @@
             },
         ),
     ],
+    "meta.llama2-13b-chat-v1": [
+        (
+            {"type": "LlmChatCompletionSummary"},
+            {
+                "id": None,  # UUID that varies with each run
+                "appName": "Python Agent Test (external_botocore)",
+                "conversation_id": "my-awesome-id",
+                "transaction_id": "transaction-id",
+                "span_id": None,
+                "trace_id": "trace-id",
+                "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a",
+                "api_key_last_four_digits": "CRET",
+                "duration": None,  # Response time varies each test run
+                "request.model": "meta.llama2-13b-chat-v1",
+                "response.model": "meta.llama2-13b-chat-v1",
+                "response.usage.prompt_tokens": 17,
+                "response.usage.completion_tokens": 46,
+                "response.usage.total_tokens": 63,
+                "request.temperature": 0.7,
+                "request.max_tokens": 100,
+                "response.choices.finish_reason": "stop",
+                "vendor": "bedrock",
+                "ingest_source": "Python",
+                "response.number_of_messages": 2,
+            },
+        ),
+        (
+            {"type": "LlmChatCompletionMessage"},
+            {
+                "id": None,  # UUID that varies with each run
+                "appName": "Python Agent Test (external_botocore)",
+                "conversation_id": "my-awesome-id",
+                "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a",
+                "span_id": None,
+                "trace_id": "trace-id",
+                "transaction_id": "transaction-id",
+                "content": "What is 212 degrees Fahrenheit converted to Celsius?",
+                "role": "user",
+                "completion_id": None,
+                "sequence": 0,
+                "response.model": "meta.llama2-13b-chat-v1",
+                "vendor": "bedrock",
+                "ingest_source": "Python",
+            },
+        ),
+        (
+            {"type": "LlmChatCompletionMessage"},
+            {
+                "id": None,  # UUID that varies with each run
+                "appName": "Python Agent Test (external_botocore)",
+                "conversation_id": "my-awesome-id",
+                "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a",
+                "span_id": None,
+                "trace_id": "trace-id",
+                "transaction_id": "transaction-id",
+                "content": " Here's the answer:\n\n212°F = 100°C\n\nSo, 212 degrees Fahrenheit is equal to 100 degrees Celsius.",
+                "role": "assistant",
+                "completion_id": None,
+                "sequence": 1,
+                "response.model": "meta.llama2-13b-chat-v1",
+                "vendor": "bedrock",
+                "ingest_source": "Python",
+                "is_response": True,
+            },
+        ),
+    ],
 }
 
 chat_completion_invalid_model_error_events = [
@@ -480,6 +547,49 @@
             },
         ),
     ],
+    "meta.llama2-13b-chat-v1": [
+        (
+            {"type": "LlmChatCompletionSummary"},
+            {
+                "id": None,  # UUID that varies with each run
+                "appName": "Python Agent Test (external_botocore)",
+                "conversation_id": "my-awesome-id",
+                "transaction_id": "transaction-id",
+                "span_id": None,
+                "trace_id": "trace-id",
+                "request_id": "",
+                "api_key_last_four_digits": "-KEY",
+                "duration": None,  # Response time varies each test run
+                "request.model": "meta.llama2-13b-chat-v1",
+                "response.model": "meta.llama2-13b-chat-v1",
+                "request.temperature": 0.7,
+                "request.max_tokens": 100,
+                "vendor": "bedrock",
+                "ingest_source": "Python",
+                "response.number_of_messages": 1,
+                "error": True,
+            },
+        ),
+        (
+            {"type": "LlmChatCompletionMessage"},
+            {
+                "id": None,  # UUID that varies with each run
+                "appName": "Python Agent Test (external_botocore)",
+                "conversation_id": "my-awesome-id",
+                "request_id": "",
+                "span_id": None,
+                "trace_id": "trace-id",
+                "transaction_id": "transaction-id",
+                "content": "Invalid Token",
+                "role": "user",
+                "completion_id": None,
+                "sequence": 0,
+                "response.model": "meta.llama2-13b-chat-v1",
+                "vendor": "bedrock",
+                "ingest_source": "Python",
+            },
+        ),
+    ],
 }
 
 chat_completion_expected_client_errors = {
@@ -503,4 +613,9 @@
         "error.message": "The security token included in the request is invalid.",
         "error.code": "UnrecognizedClientException",
     },
+    "meta.llama2-13b-chat-v1": {
+        "http.statusCode": 403,
+        "error.message": "The security token included in the request is invalid.",
+        "error.code": "UnrecognizedClientException",
+    },
 }
diff --git a/tests/external_botocore/test_bedrock_chat_completion.py b/tests/external_botocore/test_bedrock_chat_completion.py
index 2c4925a43..c5c2a4706 100644
--- a/tests/external_botocore/test_bedrock_chat_completion.py
+++ b/tests/external_botocore/test_bedrock_chat_completion.py
@@ -56,6 +56,7 @@ def is_file_payload(request):
         "ai21.j2-mid-v1",
         "anthropic.claude-instant-v1",
         "cohere.command-text-v14",
+        "meta.llama2-13b-chat-v1",
     ],
 )
 def model_id(request):

From 3f8202122a523450e07dc177888a44190f14857f Mon Sep 17 00:00:00 2001
From: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>
Date: Wed, 10 Jan 2024 15:17:14 -0800
Subject: [PATCH 04/19] Add bedrock feedback into preview (#1030)

* Add AWS Bedrock testing infrastructure

* Squashed commit of the following:

commit 2834663794c649124052e510c1c9557a830c060a
Author: Timothy Pansino <11214426+TimPansino@users.noreply.github.com>
Date:   Mon Oct 9 17:42:05 2023 -0700

    OpenAI Mock Backend (#929)

    * Add mock external openai server

    * Add mocked OpenAI server fixtures

    * Set up recorded responses.

    * Clean mock server to depend on http server

    * Linting

    * Pin flask version for flask restx tests. (#931)

    * Ignore new redis methods. (#932)

    Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>

    * Remove approved paths

    * Update CI Image (#930)

    * Update available python versions in CI

    * Update makefile with overrides

    * Fix default branch detection for arm builds

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

    * Add mocking for embedding endpoint

    * [Mega-Linter] Apply linters fixes

    * Add ratelimit headers

    * [Mega-Linter] Apply linters fixes

    * Only get package version once (#928)

    * Only get package version once

    * Add disconnect method

    * Add disconnect method

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

    * Add datalib dependency for embedding testing.

    * Add OpenAI Test Infrastructure (#926)

    * Add openai to tox

    * Add OpenAI test files.

    * Add test functions.

    * [Mega-Linter] Apply linters fixes

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

    * Add mock external openai server

    * Add mocked OpenAI server fixtures

    * Set up recorded responses.

    * Clean mock server to depend on http server

    * Linting

    * Remove approved paths

    * Add mocking for embedding endpoint

    * [Mega-Linter] Apply linters fixes

    * Add ratelimit headers

    * [Mega-Linter] Apply linters fixes

    * Add datalib dependency for embedding testing.

    ---------

    Co-authored-by: Uma Annamalai <uannamalai@newrelic.com>
    Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>
    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: TimPansino <TimPansino@users.noreply.github.com>
    Co-authored-by: Hannah Stepanek <hstepanek@newrelic.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

commit db63d4598c94048986c0e00ebb2cd8827100b54c
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Mon Oct 2 15:31:38 2023 -0700

    Add OpenAI Test Infrastructure (#926)

    * Add openai to tox

    * Add OpenAI test files.

    * Add test functions.

    * [Mega-Linter] Apply linters fixes

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

* Squashed commit of the following:

commit 182c7a8c8a91e2d0f234f7ed7d4a14a2422c8342
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Fri Oct 13 10:12:55 2023 -0700

    Add request/ response IDs.

commit f6d13f822c22d2039ec32be86b2c54f9dc3de1c9
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Thu Oct 12 13:23:39 2023 -0700

    Test cleanup.

commit d0576631d009e481bd5887a3243aac99b097d823
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Tue Oct 10 10:23:00 2023 -0700

    Remove commented code.

commit dd29433e719482babbe5c724e7330b1f6324abd7
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Tue Oct 10 10:19:01 2023 -0700

    Add openai sync instrumentation.

commit 2834663794c649124052e510c1c9557a830c060a
Author: Timothy Pansino <11214426+TimPansino@users.noreply.github.com>
Date:   Mon Oct 9 17:42:05 2023 -0700

    OpenAI Mock Backend (#929)

    * Add mock external openai server

    * Add mocked OpenAI server fixtures

    * Set up recorded responses.

    * Clean mock server to depend on http server

    * Linting

    * Pin flask version for flask restx tests. (#931)

    * Ignore new redis methods. (#932)

    Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>

    * Remove approved paths

    * Update CI Image (#930)

    * Update available python versions in CI

    * Update makefile with overrides

    * Fix default branch detection for arm builds

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

    * Add mocking for embedding endpoint

    * [Mega-Linter] Apply linters fixes

    * Add ratelimit headers

    * [Mega-Linter] Apply linters fixes

    * Only get package version once (#928)

    * Only get package version once

    * Add disconnect method

    * Add disconnect method

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

    * Add datalib dependency for embedding testing.

    * Add OpenAI Test Infrastructure (#926)

    * Add openai to tox

    * Add OpenAI test files.

    * Add test functions.

    * [Mega-Linter] Apply linters fixes

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

    * Add mock external openai server

    * Add mocked OpenAI server fixtures

    * Set up recorded responses.

    * Clean mock server to depend on http server

    * Linting

    * Remove approved paths

    * Add mocking for embedding endpoint

    * [Mega-Linter] Apply linters fixes

    * Add ratelimit headers

    * [Mega-Linter] Apply linters fixes

    * Add datalib dependency for embedding testing.

    ---------

    Co-authored-by: Uma Annamalai <uannamalai@newrelic.com>
    Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>
    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: TimPansino <TimPansino@users.noreply.github.com>
    Co-authored-by: Hannah Stepanek <hstepanek@newrelic.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

commit db63d4598c94048986c0e00ebb2cd8827100b54c
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Mon Oct 2 15:31:38 2023 -0700

    Add OpenAI Test Infrastructure (#926)

    * Add openai to tox

    * Add OpenAI test files.

    * Add test functions.

    * [Mega-Linter] Apply linters fixes

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

* TEMP

* Bedrock titan extraction nearly complete

* Bedrock Testing Infrastructure (#937)

* Add AWS Bedrock testing infrastructure

* Cache Package Version Lookups (#946)

* Cache _get_package_version

* Add Python 2.7 support to get_package_version caching

* [Mega-Linter] Apply linters fixes

* Bump tests

---------

Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com>
Co-authored-by: TimPansino <TimPansino@users.noreply.github.com>

* Fix Redis Generator Methods (#947)

* Fix scan_iter for redis

* Replace generator methods

* Update instance info instrumentation

* Remove mistake from uninstrumented methods

* Add skip condition to asyncio generator tests

* Add skip condition to asyncio generator tests

---------

Co-authored-by: Lalleh Rafeei <lrafeei@newrelic.com>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

* Automatic RPM System Updates (#948)

* Checkout old action

* Adding RPM action

* Add dry run

* Incorporating action into workflow

* Wire secret into custom action

* Enable action

* Correct action name

* Fix syntax

* Fix quoting issues

* Drop pre-verification. Does not work on python

* Fix merge artifact

* Remove OpenAI references

---------

Co-authored-by: Uma Annamalai <uannamalai@newrelic.com>
Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com>
Co-authored-by: TimPansino <TimPansino@users.noreply.github.com>
Co-authored-by: Lalleh Rafeei <lrafeei@newrelic.com>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

* Cleaning up titan bedrock implementation

* TEMP

* Tests for bedrock passing

Co-authored-by: Lalleh Rafeei <lrafeei@users.noreply.github.com>

* Cleaned up titan testing

Co-authored-by: Lalleh Rafeei <lrafeei@users.noreply.github.com>
Co-authored-by: Hannah Stepanek <hmstepanek@users.noreply.github.com>

* Parametrized bedrock testing

* Add support for AI21-J2 models

* Change to dynamic no conversation id events

* Drop all openai refs

* [Mega-Linter] Apply linters fixes

* Adding response_id and response_model

* Apply suggestions from code review

* Remove unused import

* Bedrock Sync Chat Completion Instrumentation (#953)

* Add AWS Bedrock testing infrastructure

* Squashed commit of the following:

commit 2834663794c649124052e510c1c9557a830c060a
Author: Timothy Pansino <11214426+TimPansino@users.noreply.github.com>
Date:   Mon Oct 9 17:42:05 2023 -0700

    OpenAI Mock Backend (#929)

    * Add mock external openai server

    * Add mocked OpenAI server fixtures

    * Set up recorded responses.

    * Clean mock server to depend on http server

    * Linting

    * Pin flask version for flask restx tests. (#931)

    * Ignore new redis methods. (#932)

    Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>

    * Remove approved paths

    * Update CI Image (#930)

    * Update available python versions in CI

    * Update makefile with overrides

    * Fix default branch detection for arm builds

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

    * Add mocking for embedding endpoint

    * [Mega-Linter] Apply linters fixes

    * Add ratelimit headers

    * [Mega-Linter] Apply linters fixes

    * Only get package version once (#928)

    * Only get package version once

    * Add disconnect method

    * Add disconnect method

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

    * Add datalib dependency for embedding testing.

    * Add OpenAI Test Infrastructure (#926)

    * Add openai to tox

    * Add OpenAI test files.

    * Add test functions.

    * [Mega-Linter] Apply linters fixes

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

    * Add mock external openai server

    * Add mocked OpenAI server fixtures

    * Set up recorded responses.

    * Clean mock server to depend on http server

    * Linting

    * Remove approved paths

    * Add mocking for embedding endpoint

    * [Mega-Linter] Apply linters fixes

    * Add ratelimit headers

    * [Mega-Linter] Apply linters fixes

    * Add datalib dependency for embedding testing.

    ---------

    Co-authored-by: Uma Annamalai <uannamalai@newrelic.com>
    Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>
    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: TimPansino <TimPansino@users.noreply.github.com>
    Co-authored-by: Hannah Stepanek <hstepanek@newrelic.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

commit db63d4598c94048986c0e00ebb2cd8827100b54c
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Mon Oct 2 15:31:38 2023 -0700

    Add OpenAI Test Infrastructure (#926)

    * Add openai to tox

    * Add OpenAI test files.

    * Add test functions.

    * [Mega-Linter] Apply linters fixes

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

* Squashed commit of the following:

commit 182c7a8c8a91e2d0f234f7ed7d4a14a2422c8342
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Fri Oct 13 10:12:55 2023 -0700

    Add request/ response IDs.

commit f6d13f822c22d2039ec32be86b2c54f9dc3de1c9
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Thu Oct 12 13:23:39 2023 -0700

    Test cleanup.

commit d0576631d009e481bd5887a3243aac99b097d823
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Tue Oct 10 10:23:00 2023 -0700

    Remove commented code.

commit dd29433e719482babbe5c724e7330b1f6324abd7
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Tue Oct 10 10:19:01 2023 -0700

    Add openai sync instrumentation.

commit 2834663794c649124052e510c1c9557a830c060a
Author: Timothy Pansino <11214426+TimPansino@users.noreply.github.com>
Date:   Mon Oct 9 17:42:05 2023 -0700

    OpenAI Mock Backend (#929)

    * Add mock external openai server

    * Add mocked OpenAI server fixtures

    * Set up recorded responses.

    * Clean mock server to depend on http server

    * Linting

    * Pin flask version for flask restx tests. (#931)

    * Ignore new redis methods. (#932)

    Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>

    * Remove approved paths

    * Update CI Image (#930)

    * Update available python versions in CI

    * Update makefile with overrides

    * Fix default branch detection for arm builds

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

    * Add mocking for embedding endpoint

    * [Mega-Linter] Apply linters fixes

    * Add ratelimit headers

    * [Mega-Linter] Apply linters fixes

    * Only get package version once (#928)

    * Only get package version once

    * Add disconnect method

    * Add disconnect method

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

    * Add datalib dependency for embedding testing.

    * Add OpenAI Test Infrastructure (#926)

    * Add openai to tox

    * Add OpenAI test files.

    * Add test functions.

    * [Mega-Linter] Apply linters fixes

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

    * Add mock external openai server

    * Add mocked OpenAI server fixtures

    * Set up recorded responses.

    * Clean mock server to depend on http server

    * Linting

    * Remove approved paths

    * Add mocking for embedding endpoint

    * [Mega-Linter] Apply linters fixes

    * Add ratelimit headers

    * [Mega-Linter] Apply linters fixes

    * Add datalib dependency for embedding testing.

    ---------

    Co-authored-by: Uma Annamalai <uannamalai@newrelic.com>
    Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>
    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: TimPansino <TimPansino@users.noreply.github.com>
    Co-authored-by: Hannah Stepanek <hstepanek@newrelic.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

commit db63d4598c94048986c0e00ebb2cd8827100b54c
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Mon Oct 2 15:31:38 2023 -0700

    Add OpenAI Test Infrastructure (#926)

    * Add openai to tox

    * Add OpenAI test files.

    * Add test functions.

    * [Mega-Linter] Apply linters fixes

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

* Cache Package Version Lookups (#946)

* Cache _get_package_version

* Add Python 2.7 support to get_package_version caching

* [Mega-Linter] Apply linters fixes

* Bump tests

---------

Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com>
Co-authored-by: TimPansino <TimPansino@users.noreply.github.com>

* Fix Redis Generator Methods (#947)

* Fix scan_iter for redis

* Replace generator methods

* Update instance info instrumentation

* Remove mistake from uninstrumented methods

* Add skip condition to asyncio generator tests

* Add skip condition to asyncio generator tests

---------

Co-authored-by: Lalleh Rafeei <lrafeei@newrelic.com>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

* TEMP

* Automatic RPM System Updates (#948)

* Checkout old action

* Adding RPM action

* Add dry run

* Incorporating action into workflow

* Wire secret into custom action

* Enable action

* Correct action name

* Fix syntax

* Fix quoting issues

* Drop pre-verification. Does not work on python

* Fix merge artifact

* Bedrock titan extraction nearly complete

* Cleaning up titan bedrock implementation

* TEMP

* Tests for bedrock passing

Co-authored-by: Lalleh Rafeei <lrafeei@users.noreply.github.com>

* Cleaned up titan testing

Co-authored-by: Lalleh Rafeei <lrafeei@users.noreply.github.com>
Co-authored-by: Hannah Stepanek <hmstepanek@users.noreply.github.com>

* Parametrized bedrock testing

* Add support for AI21-J2 models

* Change to dynamic no conversation id events

* Drop all openai refs

* [Mega-Linter] Apply linters fixes

* Adding response_id and response_model

* Drop python 3.7 tests for Hypercorn (#954)

* Apply suggestions from code review

* Remove unused import

---------

Co-authored-by: Uma Annamalai <uannamalai@newrelic.com>
Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com>
Co-authored-by: TimPansino <TimPansino@users.noreply.github.com>
Co-authored-by: Lalleh Rafeei <lrafeei@newrelic.com>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
Co-authored-by: Lalleh Rafeei <lrafeei@users.noreply.github.com>
Co-authored-by: Hannah Stepanek <hmstepanek@users.noreply.github.com>
Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>

* Initial feedback commit for botocore

* Feature bedrock cohere instrumentation (#955)

* Add AWS Bedrock testing infrastructure

* Squashed commit of the following:

commit 2834663794c649124052e510c1c9557a830c060a
Author: Timothy Pansino <11214426+TimPansino@users.noreply.github.com>
Date:   Mon Oct 9 17:42:05 2023 -0700

    OpenAI Mock Backend (#929)

    * Add mock external openai server

    * Add mocked OpenAI server fixtures

    * Set up recorded responses.

    * Clean mock server to depend on http server

    * Linting

    * Pin flask version for flask restx tests. (#931)

    * Ignore new redis methods. (#932)

    Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>

    * Remove approved paths

    * Update CI Image (#930)

    * Update available python versions in CI

    * Update makefile with overrides

    * Fix default branch detection for arm builds

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

    * Add mocking for embedding endpoint

    * [Mega-Linter] Apply linters fixes

    * Add ratelimit headers

    * [Mega-Linter] Apply linters fixes

    * Only get package version once (#928)

    * Only get package version once

    * Add disconnect method

    * Add disconnect method

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

    * Add datalib dependency for embedding testing.

    * Add OpenAI Test Infrastructure (#926)

    * Add openai to tox

    * Add OpenAI test files.

    * Add test functions.

    * [Mega-Linter] Apply linters fixes

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

    * Add mock external openai server

    * Add mocked OpenAI server fixtures

    * Set up recorded responses.

    * Clean mock server to depend on http server

    * Linting

    * Remove approved paths

    * Add mocking for embedding endpoint

    * [Mega-Linter] Apply linters fixes

    * Add ratelimit headers

    * [Mega-Linter] Apply linters fixes

    * Add datalib dependency for embedding testing.

    ---------

    Co-authored-by: Uma Annamalai <uannamalai@newrelic.com>
    Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>
    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: TimPansino <TimPansino@users.noreply.github.com>
    Co-authored-by: Hannah Stepanek <hstepanek@newrelic.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

commit db63d4598c94048986c0e00ebb2cd8827100b54c
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Mon Oct 2 15:31:38 2023 -0700

    Add OpenAI Test Infrastructure (#926)

    * Add openai to tox

    * Add OpenAI test files.

    * Add test functions.

    * [Mega-Linter] Apply linters fixes

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

* Squashed commit of the following:

commit 182c7a8c8a91e2d0f234f7ed7d4a14a2422c8342
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Fri Oct 13 10:12:55 2023 -0700

    Add request/ response IDs.

commit f6d13f822c22d2039ec32be86b2c54f9dc3de1c9
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Thu Oct 12 13:23:39 2023 -0700

    Test cleanup.

commit d0576631d009e481bd5887a3243aac99b097d823
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Tue Oct 10 10:23:00 2023 -0700

    Remove commented code.

commit dd29433e719482babbe5c724e7330b1f6324abd7
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Tue Oct 10 10:19:01 2023 -0700

    Add openai sync instrumentation.

commit 2834663794c649124052e510c1c9557a830c060a
Author: Timothy Pansino <11214426+TimPansino@users.noreply.github.com>
Date:   Mon Oct 9 17:42:05 2023 -0700

    OpenAI Mock Backend (#929)

    * Add mock external openai server

    * Add mocked OpenAI server fixtures

    * Set up recorded responses.

    * Clean mock server to depend on http server

    * Linting

    * Pin flask version for flask restx tests. (#931)

    * Ignore new redis methods. (#932)

    Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>

    * Remove approved paths

    * Update CI Image (#930)

    * Update available python versions in CI

    * Update makefile with overrides

    * Fix default branch detection for arm builds

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

    * Add mocking for embedding endpoint

    * [Mega-Linter] Apply linters fixes

    * Add ratelimit headers

    * [Mega-Linter] Apply linters fixes

    * Only get package version once (#928)

    * Only get package version once

    * Add disconnect method

    * Add disconnect method

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

    * Add datalib dependency for embedding testing.

    * Add OpenAI Test Infrastructure (#926)

    * Add openai to tox

    * Add OpenAI test files.

    * Add test functions.

    * [Mega-Linter] Apply linters fixes

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

    * Add mock external openai server

    * Add mocked OpenAI server fixtures

    * Set up recorded responses.

    * Clean mock server to depend on http server

    * Linting

    * Remove approved paths

    * Add mocking for embedding endpoint

    * [Mega-Linter] Apply linters fixes

    * Add ratelimit headers

    * [Mega-Linter] Apply linters fixes

    * Add datalib dependency for embedding testing.

    ---------

    Co-authored-by: Uma Annamalai <uannamalai@newrelic.com>
    Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>
    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: TimPansino <TimPansino@users.noreply.github.com>
    Co-authored-by: Hannah Stepanek <hstepanek@newrelic.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

commit db63d4598c94048986c0e00ebb2cd8827100b54c
Author: Uma Annamalai <uannamalai@newrelic.com>
Date:   Mon Oct 2 15:31:38 2023 -0700

    Add OpenAI Test Infrastructure (#926)

    * Add openai to tox

    * Add OpenAI test files.

    * Add test functions.

    * [Mega-Linter] Apply linters fixes

    ---------

    Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
    Co-authored-by: mergify[bot] <mergify[bot]@users.noreply.github.com>

* TEMP

* Bedrock titan extraction nearly complete

* Cleaning up titan bedrock implementation

* TEMP

* Tests for bedrock passing

Co-authored-by: Lalleh Rafeei <lrafeei@users.noreply.github.com>

* Cleaned up titan testing

Co-authored-by: Lalleh Rafeei <lrafeei@users.noreply.github.com>
Co-authored-by: Hannah Stepanek <hmstepanek@users.noreply.github.com>

* Parametrized bedrock testing

* Add support for AI21-J2 models

* Change to dynamic no conversation id events

* Add cohere model

* Remove openai instrumentation from this branch

* Remove OpenAI from newrelic/config.py

---------

Co-authored-by: Uma Annamalai <uannamalai@newrelic.com>
Co-authored-by: Tim Pansino <timpansino@gmail.com>
Co-authored-by: Lalleh Rafeei <lrafeei@users.noreply.github.com>
Co-authored-by: Hannah Stepanek <hmstepanek@users.noreply.github.com>

* Bedrock feedback w/ testing for titan and jurassic models

* AWS Bedrock Embedding Instrumentation (#957)

* AWS Bedrock embedding instrumentation

* Correct symbol name

* Add support for bedrock claude (#960)

Co-authored-by: Timothy Pansino <11214426+TimPansino@users.noreply.github.com>

* Fix merge conflicts

* Combine Botocore Tests (#959)

* Initial file migration

* Enable DT on all span tests

* Add pytest skip for older botocore versions

* Fixup: app name merge conflict

---------

Co-authored-by: Hannah Stepanek <hstepanek@newrelic.com>

* Add to and move feedback tests

* Handle 0.32.0.post1 version in tests (#963)

* Remove response_id dependency in bedrock

* Change API name

* Update moto

* Bedrock Error Tracing (#966)

* Cache Package Version Lookups (#946)

* Cache _get_package_version

* Add Python 2.7 support to get_package_version caching

* [Mega-Linter] Apply linters fixes

* Bump tests

---------

Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com>
Co-authored-by: TimPansino <TimPansino@users.noreply.github.com>

* Fix Redis Generator Methods (#947)

* Fix scan_iter for redis

* Replace generator methods

* Update instance info instrumentation

* Remove mistake from uninstrumented methods

* Add skip condition to asyncio generator tests

* Add skip condition to asyncio generator tests

---------

Co-authored-by: Lalleh Rafeei <lrafeei@newrelic.com>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

* Automatic RPM System Updates (#948)

* Checkout old action

* Adding RPM action

* Add dry run

* Incorporating action into workflow

* Wire secret into custom action

* Enable action

* Correct action name

* Fix syntax

* Fix quoting issues

* Drop pre-verification. Does not work on python

* Fix merge artifact

* Drop python 3.7 tests for Hypercorn (#954)

* Fix pyenv installation for devcontainer (#936)

Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

* Remove duplicate kafka import hook (#956)

Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

* Initial bedrock error tracing commit

* Handle 0.32.0.post1 version in tests (#963)

* Add status code to mock bedrock server

* Updating error response recording logic

* Work on bedrock errror tracing

* Chat completion error tracing

* Adding embedding error tracing

* Delete comment

* Update moto

---------

Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com>
Co-authored-by: TimPansino <TimPansino@users.noreply.github.com>
Co-authored-by: Lalleh Rafeei <lrafeei@newrelic.com>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
Co-authored-by: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com>
Co-authored-by: Hannah Stepanek <hstepanek@newrelic.com>

* Change ids to match other tests

* move message_ids declaration outside for loop

* Add comment to tox.ini

* Drop py27 from memcache testing.

* Drop pypy27 from memcache testing.

* Update flaskrestx testing #1004

* Remove tastypie 0.14.3 testing

* Remove tastypie 0.14.3 testing

* Remove python 3.12 support (for now)

* Remove untouched files from diff list

---------

Co-authored-by: Uma Annamalai <uannamalai@newrelic.com>
Co-authored-by: Tim Pansino <timpansino@gmail.com>
Co-authored-by: Timothy Pansino <11214426+TimPansino@users.noreply.github.com>
Co-authored-by: SlavaSkvortsov <29122694+SlavaSkvortsov@users.noreply.github.com>
Co-authored-by: TimPansino <TimPansino@users.noreply.github.com>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
Co-authored-by: Lalleh Rafeei <lrafeei@users.noreply.github.com>
Co-authored-by: Hannah Stepanek <hmstepanek@users.noreply.github.com>
Co-authored-by: Hannah Stepanek <hstepanek@newrelic.com>
---
 newrelic/api/ml_model.py                      |  9 +-
 newrelic/hooks/external_botocore.py           |  9 +-
 .../_test_bedrock_chat_completion.py          | 87 +++++++++++++++++++
 .../_test_bedrock_embeddings.py               | 18 +++-
 .../test_bedrock_embeddings.py                |  2 +-
 tox.ini                                       |  1 +
 6 files changed, 119 insertions(+), 7 deletions(-)

diff --git a/newrelic/api/ml_model.py b/newrelic/api/ml_model.py
index 3d15cf8d3..03408253b 100644
--- a/newrelic/api/ml_model.py
+++ b/newrelic/api/ml_model.py
@@ -40,12 +40,15 @@ def wrap_mlmodel(model, name=None, version=None, feature_names=None, label_names
 
 def get_llm_message_ids(response_id=None):
     transaction = current_transaction()
-    if response_id and transaction:
+    if transaction:
         nr_message_ids = getattr(transaction, "_nr_message_ids", {})
-        message_id_info = nr_message_ids.pop(response_id, ())
+        message_id_info = (
+            nr_message_ids.pop("bedrock_key", ()) if not response_id else nr_message_ids.pop(response_id, ())
+        )
 
         if not message_id_info:
-            warnings.warn("No message ids found for %s" % response_id)
+            response_id_warning = "." if not response_id else " for %s." % response_id
+            warnings.warn("No message ids found%s" % response_id_warning)
             return []
 
         conversation_id, request_id, ids = message_id_info
diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py
index 6e3be661b..69a2fd936 100644
--- a/newrelic/hooks/external_botocore.py
+++ b/newrelic/hooks/external_botocore.py
@@ -97,6 +97,7 @@ def create_chat_completion_message_event(
     if not transaction:
         return
 
+    message_ids = []
     for index, message in enumerate(input_message_list):
         if response_id:
             id_ = "%s-%d" % (response_id, index)  # Response ID was set, append message index to it.
@@ -128,6 +129,7 @@ def create_chat_completion_message_event(
             id_ = "%s-%d" % (response_id, index)  # Response ID was set, append message index to it.
         else:
             id_ = str(uuid.uuid4())  # No response IDs, use random UUID
+        message_ids.append(id_)
 
         chat_completion_message_dict = {
             "id": id_,
@@ -147,6 +149,7 @@ def create_chat_completion_message_event(
             "is_response": True,
         }
         transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict)
+    return (conversation_id, request_id, message_ids)
 
 
 def extract_bedrock_titan_text_model(request_body, response_body=None):
@@ -577,7 +580,7 @@ def handle_chat_completion_event(
 
     transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict)
 
-    create_chat_completion_message_event(
+    message_ids = create_chat_completion_message_event(
         transaction=transaction,
         app_name=settings.app_name,
         input_message_list=input_message_list,
@@ -591,6 +594,10 @@ def handle_chat_completion_event(
         response_id=response_id,
     )
 
+    if not hasattr(transaction, "_nr_message_ids"):
+        transaction._nr_message_ids = {}
+    transaction._nr_message_ids["bedrock_key"] = message_ids
+
 
 CUSTOM_TRACE_POINTS = {
     ("sns", "publish"): message_trace("SNS", "Produce", "Topic", extract(("TopicArn", "TargetArn"), "PhoneNumber")),
diff --git a/tests/external_botocore/_test_bedrock_chat_completion.py b/tests/external_botocore/_test_bedrock_chat_completion.py
index f1d21c73c..652027719 100644
--- a/tests/external_botocore/_test_bedrock_chat_completion.py
+++ b/tests/external_botocore/_test_bedrock_chat_completion.py
@@ -1,3 +1,17 @@
+# Copyright 2010 New Relic, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 chat_completion_payload_templates = {
     "amazon.titan-text-express-v1": '{ "inputText": "%s", "textGenerationConfig": {"temperature": %f, "maxTokenCount": %d }}',
     "ai21.j2-mid-v1": '{"prompt": "%s", "temperature": %f, "maxTokens": %d}',
@@ -6,6 +20,79 @@
     "meta.llama2-13b-chat-v1": '{"prompt": "%s", "temperature": %f, "max_gen_len": %d}',
 }
 
+chat_completion_get_llm_message_ids = {
+    "amazon.titan-text-express-v1": {
+        "bedrock_key": [
+            {
+                "conversation_id": "my-awesome-id",
+                "request_id": "03524118-8d77-430f-9e08-63b5c03a40cf",
+                "message_id": None,  # UUID that varies with each run
+            },
+            {
+                "conversation_id": "my-awesome-id",
+                "request_id": "03524118-8d77-430f-9e08-63b5c03a40cf",
+                "message_id": None,  # UUID that varies with each run
+            },
+        ]
+    },
+    "ai21.j2-mid-v1": {
+        "bedrock_key": [
+            {
+                "conversation_id": "my-awesome-id",
+                "request_id": "c863d9fc-888b-421c-a175-ac5256baec62",
+                "message_id": "1234-0",
+            },
+            {
+                "conversation_id": "my-awesome-id",
+                "request_id": "c863d9fc-888b-421c-a175-ac5256baec62",
+                "message_id": "1234-1",
+            },
+        ]
+    },
+    "anthropic.claude-instant-v1": {
+        "bedrock_key": [
+            {
+                "conversation_id": "my-awesome-id",
+                "request_id": "7b0b37c6-85fb-4664-8f5b-361ca7b1aa18",
+                "message_id": None,  # UUID that varies with each run
+            },
+            {
+                "conversation_id": "my-awesome-id",
+                "request_id": "7b0b37c6-85fb-4664-8f5b-361ca7b1aa18",
+                "message_id": None,  # UUID that varies with each run
+            },
+        ]
+    },
+    "cohere.command-text-v14": {
+        "bedrock_key": [
+            {
+                "conversation_id": "my-awesome-id",
+                "request_id": "e77422c8-fbbf-4e17-afeb-c758425c9f97",
+                "message_id": "e77422c8-fbbf-4e17-afeb-c758425c9f97-0",
+            },
+            {
+                "conversation_id": "my-awesome-id",
+                "request_id": "e77422c8-fbbf-4e17-afeb-c758425c9f97",
+                "message_id": "e77422c8-fbbf-4e17-afeb-c758425c9f97-1",
+            },
+        ]
+    },
+    "meta.llama2-13b-chat-v1": {
+        "bedrock_key": [
+            {
+                "conversation_id": "my-awesome-id",
+                "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a",
+                "message_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a-0",
+            },
+            {
+                "conversation_id": "my-awesome-id",
+                "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a",
+                "message_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a-1",
+            },
+        ]
+    },
+}
+
 chat_completion_expected_events = {
     "amazon.titan-text-express-v1": [
         (
diff --git a/tests/external_botocore/_test_bedrock_embeddings.py b/tests/external_botocore/_test_bedrock_embeddings.py
index ec677b426..05c8a390c 100644
--- a/tests/external_botocore/_test_bedrock_embeddings.py
+++ b/tests/external_botocore/_test_bedrock_embeddings.py
@@ -1,3 +1,17 @@
+# Copyright 2010 New Relic, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 embedding_payload_templates = {
     "amazon.titan-embed-text-v1": '{ "inputText": "%s" }',
     "amazon.titan-embed-g1-text-02": '{ "inputText": "%s" }',
@@ -68,7 +82,7 @@
                 "request_id": "",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
-                "error": True
+                "error": True,
             },
         ),
     ],
@@ -89,7 +103,7 @@
                 "request_id": "",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
-                "error": True
+                "error": True,
             },
         ),
     ],
diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py
index cc442fc15..9fc016471 100644
--- a/tests/external_botocore/test_bedrock_embeddings.py
+++ b/tests/external_botocore/test_bedrock_embeddings.py
@@ -19,8 +19,8 @@
 import pytest
 from _test_bedrock_embeddings import (
     embedding_expected_client_errors,
-    embedding_expected_events,
     embedding_expected_error_events,
+    embedding_expected_events,
     embedding_payload_templates,
 )
 from conftest import BOTOCORE_VERSION
diff --git a/tox.ini b/tox.ini
index 969f98548..878ef4ac4 100644
--- a/tox.ini
+++ b/tox.ini
@@ -207,6 +207,7 @@ deps =
     component_flask_rest: flask-restful
     component_flask_rest: jinja2
     component_flask_rest: itsdangerous
+    component_flask_rest-flaskrestxlatest: flask
     component_flask_rest-flaskrestxlatest: flask-restx
     component_flask_rest-flaskrestxlatest: flask
     ; flask-restx only supports Flask v3 after flask-restx v1.3.0

From abf31c5ebc9fe12e0f706715849d95f4ccd1a46b Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Tue, 16 Jan 2024 20:59:20 -0800
Subject: [PATCH 05/19] Fix instrumentation for openai 1.8.0

---
 tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 878ef4ac4..c58e8d0e3 100644
--- a/tox.ini
+++ b/tox.ini
@@ -402,7 +402,7 @@ commands =
 allowlist_externals={toxinidir}/.github/scripts/*
 
 install_command=
-    {toxinidir}/.github/scripts/retry.sh 3 pip install {opts} {packages}
+    pip install {opts} {packages}
 
 extras =
     agent_streaming: infinite-tracing

From ef2cfeb423ccbc8dd17e0af8aaa102f8a7094f42 Mon Sep 17 00:00:00 2001
From: Uma Annamalai <uannamalai@newrelic.com>
Date: Wed, 31 Jan 2024 11:31:41 -0800
Subject: [PATCH 06/19] Add LLM attribute to transactions. (#1050)

* Add LLM attr to transactions.

* Remove newlines.

* Add llm attribute to transaction event default attrs list.

* Linting.
---
 newrelic/core/attribute.py                              | 1 +
 newrelic/hooks/external_botocore.py                     | 1 +
 newrelic/hooks/mlmodel_openai.py                        | 4 ++++
 tests/external_botocore/test_bedrock_chat_completion.py | 2 ++
 tests/external_botocore/test_bedrock_embeddings.py      | 2 ++
 tests/mlmodel_openai/test_chat_completion.py            | 3 +++
 tests/mlmodel_openai/test_chat_completion_v1.py         | 3 +++
 tests/mlmodel_openai/test_embeddings.py                 | 3 +++
 tests/mlmodel_openai/test_embeddings_v1.py              | 3 +++
 9 files changed, 22 insertions(+)

diff --git a/newrelic/core/attribute.py b/newrelic/core/attribute.py
index 880597a05..ea2023764 100644
--- a/newrelic/core/attribute.py
+++ b/newrelic/core/attribute.py
@@ -71,6 +71,7 @@
         "host.displayName",
         "http.statusCode",
         "http.url",
+        "llm",
         "message.queueName",
         "message.routingKey",
         "peer.address",
diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py
index 69a2fd936..821af702e 100644
--- a/newrelic/hooks/external_botocore.py
+++ b/newrelic/hooks/external_botocore.py
@@ -351,6 +351,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs):
         return wrapped(*args, **kwargs)
 
     transaction.add_ml_model_info("Bedrock", BOTOCORE_VERSION)
+    transaction._add_agent_attribute("llm", True)
 
     # Read and replace request file stream bodies
     request_body = kwargs["body"]
diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
index 7b0ecbe4f..0741aaaea 100644
--- a/newrelic/hooks/mlmodel_openai.py
+++ b/newrelic/hooks/mlmodel_openai.py
@@ -35,6 +35,7 @@ def wrap_embedding_sync(wrapped, instance, args, kwargs):
 
     # Framework metric also used for entity tagging in the UI
     transaction.add_ml_model_info("OpenAI", OPENAI_VERSION)
+    transaction._add_agent_attribute("llm", True)
 
     # Obtain attributes to be stored on embedding events regardless of whether we hit an error
     embedding_id = str(uuid.uuid4())
@@ -181,6 +182,7 @@ def wrap_chat_completion_sync(wrapped, instance, args, kwargs):
 
     # Framework metric also used for entity tagging in the UI
     transaction.add_ml_model_info("OpenAI", OPENAI_VERSION)
+    transaction._add_agent_attribute("llm", True)
 
     request_message_list = kwargs.get("messages", [])
 
@@ -496,6 +498,7 @@ async def wrap_embedding_async(wrapped, instance, args, kwargs):
 
     # Framework metric also used for entity tagging in the UI
     transaction.add_ml_model_info("OpenAI", OPENAI_VERSION)
+    transaction._add_agent_attribute("llm", True)
 
     # Obtain attributes to be stored on embedding events regardless of whether we hit an error
     embedding_id = str(uuid.uuid4())
@@ -642,6 +645,7 @@ async def wrap_chat_completion_async(wrapped, instance, args, kwargs):
 
     # Framework metric also used for entity tagging in the UI
     transaction.add_ml_model_info("OpenAI", OPENAI_VERSION)
+    transaction._add_agent_attribute("llm", True)
 
     request_message_list = kwargs.get("messages", [])
 
diff --git a/tests/external_botocore/test_bedrock_chat_completion.py b/tests/external_botocore/test_bedrock_chat_completion.py
index c5c2a4706..08b26b306 100644
--- a/tests/external_botocore/test_bedrock_chat_completion.py
+++ b/tests/external_botocore/test_bedrock_chat_completion.py
@@ -30,6 +30,7 @@
     override_application_settings,
     reset_core_stats_engine,
     validate_custom_event_count,
+    validate_attributes,
 )
 from testing_support.validators.validate_custom_events import validate_custom_events
 from testing_support.validators.validate_error_trace_attributes import (
@@ -125,6 +126,7 @@ def test_bedrock_chat_completion_in_txn_with_convo_id(set_trace_info, exercise_m
         ],
         background_task=True,
     )
+    @validate_attributes("agent", ["llm"])
     @background_task(name="test_bedrock_chat_completion_in_txn_with_convo_id")
     def _test():
         set_trace_info()
diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py
index 9fc016471..9dac7d3b5 100644
--- a/tests/external_botocore/test_bedrock_embeddings.py
+++ b/tests/external_botocore/test_bedrock_embeddings.py
@@ -29,6 +29,7 @@
     override_application_settings,
     reset_core_stats_engine,
     validate_custom_event_count,
+    validate_attributes,
 )
 from testing_support.validators.validate_custom_events import validate_custom_events
 from testing_support.validators.validate_error_trace_attributes import (
@@ -109,6 +110,7 @@ def test_bedrock_embedding(set_trace_info, exercise_model, expected_events):
         ],
         background_task=True,
     )
+    @validate_attributes("agent", ["llm"])
     @background_task(name="test_bedrock_embedding")
     def _test():
         set_trace_info()
diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py
index 76017a22a..5681dbb57 100644
--- a/tests/mlmodel_openai/test_chat_completion.py
+++ b/tests/mlmodel_openai/test_chat_completion.py
@@ -17,6 +17,7 @@
     override_application_settings,
     reset_core_stats_engine,
     validate_custom_event_count,
+    validate_attributes,
 )
 from testing_support.validators.validate_custom_events import validate_custom_events
 from testing_support.validators.validate_transaction_metrics import (
@@ -143,6 +144,7 @@
     ],
     background_task=True,
 )
+@validate_attributes("agent", ["llm"])
 @background_task()
 def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info):
     set_trace_info()
@@ -332,6 +334,7 @@ def test_openai_chat_completion_async_conversation_id_unset(loop, set_trace_info
     ],
     background_task=True,
 )
+@validate_attributes("agent", ["llm"])
 @background_task()
 def test_openai_chat_completion_async_conversation_id_set(loop, set_trace_info):
     set_trace_info()
diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py
index b1b35826c..1084fef3d 100644
--- a/tests/mlmodel_openai/test_chat_completion_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_v1.py
@@ -17,6 +17,7 @@
     override_application_settings,
     reset_core_stats_engine,
     validate_custom_event_count,
+    validate_attributes,
 )
 from testing_support.validators.validate_custom_events import validate_custom_events
 from testing_support.validators.validate_transaction_metrics import (
@@ -143,6 +144,7 @@
     ],
     background_task=True,
 )
+@validate_attributes("agent", ["llm"])
 @background_task()
 def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info, sync_openai_client):
     set_trace_info()
@@ -332,6 +334,7 @@ def test_openai_chat_completion_async_conversation_id_unset(loop, set_trace_info
     ],
     background_task=True,
 )
+@validate_attributes("agent", ["llm"])
 @background_task()
 def test_openai_chat_completion_async_conversation_id_set(loop, set_trace_info, async_openai_client):
     set_trace_info()
diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py
index 65ac33e87..287ba3fab 100644
--- a/tests/mlmodel_openai/test_embeddings.py
+++ b/tests/mlmodel_openai/test_embeddings.py
@@ -17,6 +17,7 @@
     override_application_settings,
     reset_core_stats_engine,
     validate_custom_event_count,
+    validate_attributes,
 )
 from testing_support.validators.validate_custom_events import validate_custom_events
 from testing_support.validators.validate_transaction_metrics import (
@@ -72,6 +73,7 @@
     ],
     background_task=True,
 )
+@validate_attributes("agent", ["llm"])
 @background_task()
 def test_openai_embedding_sync(set_trace_info):
     set_trace_info()
@@ -114,6 +116,7 @@ def test_openai_embedding_sync_disabled_settings(set_trace_info):
     ],
     background_task=True,
 )
+@validate_attributes("agent", ["llm"])
 @background_task()
 def test_openai_embedding_async(loop, set_trace_info):
     set_trace_info()
diff --git a/tests/mlmodel_openai/test_embeddings_v1.py b/tests/mlmodel_openai/test_embeddings_v1.py
index 9bf91967a..223cbfbee 100644
--- a/tests/mlmodel_openai/test_embeddings_v1.py
+++ b/tests/mlmodel_openai/test_embeddings_v1.py
@@ -17,6 +17,7 @@
     override_application_settings,
     reset_core_stats_engine,
     validate_custom_event_count,
+    validate_attributes,
 )
 from testing_support.validators.validate_custom_events import validate_custom_events
 from testing_support.validators.validate_transaction_metrics import (
@@ -72,6 +73,7 @@
     ],
     background_task=True,
 )
+@validate_attributes("agent", ["llm"])
 @background_task()
 def test_openai_embedding_sync(set_trace_info, sync_openai_client):
     set_trace_info()
@@ -114,6 +116,7 @@ def test_openai_embedding_sync_disabled_settings(set_trace_info, sync_openai_cli
     ],
     background_task=True,
 )
+@validate_attributes("agent", ["llm"])
 @background_task()
 def test_openai_embedding_async(loop, set_trace_info, async_openai_client):
     set_trace_info()

From 505008909d313c9e018f39f2665c262e1461a84d Mon Sep 17 00:00:00 2001
From: Uma Annamalai <uannamalai@newrelic.com>
Date: Wed, 31 Jan 2024 11:31:47 -0800
Subject: [PATCH 07/19] Remove imports for moto on py37. (#1053)

---
 tests/external_botocore/test_botocore_sqs.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/external_botocore/test_botocore_sqs.py b/tests/external_botocore/test_botocore_sqs.py
index c9d8585fd..17ed07fd2 100644
--- a/tests/external_botocore/test_botocore_sqs.py
+++ b/tests/external_botocore/test_botocore_sqs.py
@@ -31,7 +31,6 @@
 BOTOCORE_VERSION = get_package_version_tuple("botocore")
 
 url = "sqs.us-east-1.amazonaws.com"
-
 if BOTOCORE_VERSION < (1, 29, 0):
     url = "queue.amazonaws.com"
 

From e2cd6ebc5c9f3c679d282f4b79e4cfadad4816b5 Mon Sep 17 00:00:00 2001
From: Uma Annamalai <uannamalai@newrelic.com>
Date: Tue, 30 Jan 2024 12:12:55 -0800
Subject: [PATCH 08/19] Update botocore tests. (#1051)

* Update moto decorators in tests.

* Remove py27 botocore dependencies.

* Drop testing for Python 3.7
---
 tests/external_botocore/test_boto3_s3.py          | 1 -
 tests/external_botocore/test_boto3_sns.py         | 1 -
 tests/external_botocore/test_botocore_dynamodb.py | 1 -
 tests/external_botocore/test_botocore_ec2.py      | 1 -
 4 files changed, 4 deletions(-)

diff --git a/tests/external_botocore/test_boto3_s3.py b/tests/external_botocore/test_boto3_s3.py
index 1d91c4636..b6347e322 100644
--- a/tests/external_botocore/test_boto3_s3.py
+++ b/tests/external_botocore/test_boto3_s3.py
@@ -28,7 +28,6 @@
 from newrelic.common.package_version_utils import get_package_version_tuple
 
 MOTO_VERSION = get_package_version_tuple("moto")
-
 AWS_ACCESS_KEY_ID = "AAAAAAAAAAAACCESSKEY"
 AWS_SECRET_ACCESS_KEY = "AAAAAASECRETKEY"  # nosec
 AWS_REGION_NAME = "us-west-2"
diff --git a/tests/external_botocore/test_boto3_sns.py b/tests/external_botocore/test_boto3_sns.py
index a1ffc1331..baa0687bc 100644
--- a/tests/external_botocore/test_boto3_sns.py
+++ b/tests/external_botocore/test_boto3_sns.py
@@ -30,7 +30,6 @@
 from newrelic.common.package_version_utils import get_package_version_tuple
 
 MOTO_VERSION = get_package_version_tuple("moto")
-
 AWS_ACCESS_KEY_ID = "AAAAAAAAAAAACCESSKEY"
 AWS_SECRET_ACCESS_KEY = "AAAAAASECRETKEY"  # nosec (This is fine for testing purposes)
 AWS_REGION_NAME = "us-east-1"
diff --git a/tests/external_botocore/test_botocore_dynamodb.py b/tests/external_botocore/test_botocore_dynamodb.py
index 539993206..28dbd8ac1 100644
--- a/tests/external_botocore/test_botocore_dynamodb.py
+++ b/tests/external_botocore/test_botocore_dynamodb.py
@@ -30,7 +30,6 @@
 from newrelic.common.package_version_utils import get_package_version_tuple
 
 MOTO_VERSION = get_package_version_tuple("moto")
-
 AWS_ACCESS_KEY_ID = "AAAAAAAAAAAACCESSKEY"
 AWS_SECRET_ACCESS_KEY = "AAAAAASECRETKEY"  # nosec (This is fine for testing purposes)
 AWS_REGION = "us-east-1"
diff --git a/tests/external_botocore/test_botocore_ec2.py b/tests/external_botocore/test_botocore_ec2.py
index cfbf48e3b..84c7e9e65 100644
--- a/tests/external_botocore/test_botocore_ec2.py
+++ b/tests/external_botocore/test_botocore_ec2.py
@@ -30,7 +30,6 @@
 from newrelic.common.package_version_utils import get_package_version_tuple
 
 MOTO_VERSION = get_package_version_tuple("moto")
-
 AWS_ACCESS_KEY_ID = "AAAAAAAAAAAACCESSKEY"
 AWS_SECRET_ACCESS_KEY = "AAAAAASECRETKEY"  # nosec (This is fine for testing purposes)
 AWS_REGION = "us-east-1"

From bcb7dc573ef5b82ee249ab2b32bdb86df48e2960 Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Tue, 24 Oct 2023 10:08:01 -0700
Subject: [PATCH 09/19] Add support for streaming in openai

---
 newrelic/config.py                            |   6 +
 newrelic/hooks/mlmodel_openai.py              | 353 ++++++++++++----
 .../_mock_external_openai_server.py           | 266 +++++++++++-
 tests/mlmodel_openai/conftest.py              |  94 ++++-
 tests/mlmodel_openai/test_chat_completion.py  |   2 +-
 .../test_chat_completion_stream.py            | 389 ++++++++++++++++++
 6 files changed, 1012 insertions(+), 98 deletions(-)
 create mode 100644 tests/mlmodel_openai/test_chat_completion_stream.py

diff --git a/newrelic/config.py b/newrelic/config.py
index 6ea18983c..dd241d193 100644
--- a/newrelic/config.py
+++ b/newrelic/config.py
@@ -2061,6 +2061,12 @@ def _process_module_builtin_defaults():
         "newrelic.hooks.mlmodel_openai",
         "instrument_openai_util",
     )
+    _process_module_definition(
+        "openai.api_resources.abstract.engine_api_resource",
+        "newrelic.hooks.mlmodel_openai",
+        "instrument_openai_api_resources_abstract_engine_api_resource",
+    )
+
     _process_module_definition(
         "openai.resources.chat.completions",
         "newrelic.hooks.mlmodel_openai",
diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
index 0741aaaea..e2f5c61f2 100644
--- a/newrelic/hooks/mlmodel_openai.py
+++ b/newrelic/hooks/mlmodel_openai.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import uuid
 
 import openai
@@ -19,8 +20,9 @@
 from newrelic.api.function_trace import FunctionTrace
 from newrelic.api.time_trace import get_trace_linking_metadata
 from newrelic.api.transaction import current_transaction
-from newrelic.common.object_wrapper import wrap_function_wrapper
+from newrelic.common.object_wrapper import ObjectProxy, wrap_function_wrapper
 from newrelic.common.package_version_utils import get_package_version
+from newrelic.common.signature import bind_args
 from newrelic.core.config import global_settings
 
 OPENAI_VERSION = get_package_version("openai")
@@ -203,91 +205,107 @@ def wrap_chat_completion_sync(wrapped, instance, args, kwargs):
 
     function_name = wrapped.__name__
 
-    with FunctionTrace(name=function_name, group="Llm/completion/OpenAI") as ft:
-        # Get trace information
-        available_metadata = get_trace_linking_metadata()
-        span_id = available_metadata.get("span.id", "")
-        trace_id = available_metadata.get("trace.id", "")
-
-        try:
-            return_val = wrapped(*args, **kwargs)
-            if kwargs.get("stream", False):
-                return return_val
-        except Exception as exc:
-            if kwargs.get("stream", False):
-                raise
-            if OPENAI_V1:
-                response = getattr(exc, "response", "")
-                response_headers = getattr(response, "headers", "")
-                exc_organization = response_headers.get("openai-organization", "") if response_headers else ""
-                # There appears to be a bug here in openai v1 where despite having code,
-                # param, etc in the error response, they are not populated on the exception
-                # object so grab them from the response body object instead.
-                body = getattr(exc, "body", {}) or {}
-                notice_error_attributes = {
-                    "http.statusCode": getattr(exc, "status_code", "") or "",
-                    "error.message": body.get("message", "") or "",
-                    "error.code": body.get("code", "") or "",
-                    "error.param": body.get("param", "") or "",
-                    "completion_id": completion_id,
-                }
-            else:
-                exc_organization = getattr(exc, "organization", "")
-                notice_error_attributes = {
-                    "http.statusCode": getattr(exc, "http_status", ""),
-                    "error.message": getattr(exc, "_message", ""),
-                    "error.code": getattr(getattr(exc, "error", ""), "code", ""),
-                    "error.param": getattr(exc, "param", ""),
-                    "completion_id": completion_id,
-                }
-            # Override the default message if it is not empty.
-            message = notice_error_attributes.pop("error.message")
-            if message:
-                exc._nr_message = message
-
-            ft.notice_error(
-                attributes=notice_error_attributes,
-            )
-            # Gather attributes to add to chat completion summary event in error context
-            error_chat_completion_dict = {
-                "id": completion_id,
-                "appName": app_name,
-                "conversation_id": conversation_id,
-                "api_key_last_four_digits": api_key_last_four_digits,
-                "span_id": span_id,
-                "trace_id": trace_id,
-                "transaction_id": transaction.guid,
-                "response.number_of_messages": len(request_message_list),
-                "request.model": kwargs.get("model") or kwargs.get("engine") or "",
-                "request.temperature": kwargs.get("temperature", ""),
-                "request.max_tokens": kwargs.get("max_tokens", ""),
-                "vendor": "openAI",
-                "ingest_source": "Python",
-                "response.organization": "" if exc_organization is None else exc_organization,
-                "duration": ft.duration,
-                "error": True,
+    ft = FunctionTrace(name=function_name, group="Llm/completion/OpenAI")
+    ft.__enter__()
+    # Get trace information
+    available_metadata = get_trace_linking_metadata()
+    span_id = available_metadata.get("span.id", "")
+    trace_id = available_metadata.get("trace.id", "")
+    try:
+        return_val = wrapped(*args, **kwargs)
+        return_val._nr_ft = ft
+    except Exception as exc:
+        if OPENAI_V1:
+            response = getattr(exc, "response", "")
+            response_headers = getattr(response, "headers", "")
+            exc_organization = response_headers.get("openai-organization", "") if response_headers else ""
+            # There appears to be a bug here in openai v1 where despite having code,
+            # param, etc in the error response, they are not populated on the exception
+            # object so grab them from the response body object instead.
+            body = getattr(exc, "body", {}) or {}
+            notice_error_attributes = {
+                "http.statusCode": getattr(exc, "status_code", "") or "",
+                "error.message": body.get("message", "") or "",
+                "error.code": body.get("code", "") or "",
+                "error.param": body.get("param", "") or "",
+                "completion_id": completion_id,
             }
-            transaction.record_custom_event("LlmChatCompletionSummary", error_chat_completion_dict)
-
-            create_chat_completion_message_event(
-                transaction,
-                app_name,
-                request_message_list,
-                completion_id,
-                span_id,
-                trace_id,
-                "",
-                None,
-                "",
-                conversation_id,
-                None,
-            )
-
-            raise
+        else:
+            exc_organization = getattr(exc, "organization", "")
+            notice_error_attributes = {
+                "http.statusCode": getattr(exc, "http_status", ""),
+                "error.message": getattr(exc, "_message", ""),
+                "error.code": getattr(getattr(exc, "error", ""), "code", ""),
+                "error.param": getattr(exc, "param", ""),
+                "completion_id": completion_id,
+            }
+        # Override the default message if it is not empty.
+        message = notice_error_attributes.pop("error.message")
+        if message:
+            exc._nr_message = message
+
+        ft.notice_error(
+            attributes=notice_error_attributes,
+        )
+        # Gather attributes to add to embedding summary event in error context
+        error_chat_completion_dict = {
+            "id": completion_id,
+            "appName": app_name,
+            "conversation_id": conversation_id,
+            "api_key_last_four_digits": api_key_last_four_digits,
+            "span_id": span_id,
+            "trace_id": trace_id,
+            "transaction_id": transaction.guid,
+            "response.number_of_messages": len(request_message_list),
+            "request.model": kwargs.get("model") or kwargs.get("engine") or "",
+            "request.temperature": kwargs.get("temperature", ""),
+            "request.max_tokens": kwargs.get("max_tokens", ""),
+            "vendor": "openAI",
+            "ingest_source": "Python",
+            "response.organization": "" if exc_organization is None else exc_organization,
+            "duration": ft.duration,
+            "error": True,
+        }
+        transaction.record_custom_event("LlmChatCompletionSummary", error_chat_completion_dict)
+
+        create_chat_completion_message_event(
+            transaction,
+            app_name,
+            request_message_list,
+            completion_id,
+            span_id,
+            trace_id,
+            "",
+            None,
+            "",
+            conversation_id,
+            None,
+        )
+
+        ft.__exit__(*sys.exc_info())
+        raise
+
+    stream = kwargs.get("stream", False)
+    # If response is not a stream generator, we exit the function trace now.
+    if not stream:
+        ft.__exit__(None, None, None)
 
     if not return_val:
         return return_val
 
+    if stream:
+        # The function trace will be exited when in the final iteration of the response
+        # generator.
+        setattr(return_val, "_nr_ft", ft)
+        setattr(return_val, "_nr_openai_attrs", getattr(return_val, "_nr_openai_attrs", {}))
+        return_val._nr_openai_attrs["messages"] = kwargs.get("messages", [])
+        return_val._nr_openai_attrs["temperature"] = kwargs.get("temperature", "")
+        return_val._nr_openai_attrs["max_tokens"] = kwargs.get("max_tokens", "")
+        return_val._nr_openai_attrs["request.model"] = kwargs.get("model") or kwargs.get("engine") or ""
+        return_val._nr_openai_attrs["api_key_last_four_digits"] = api_key_last_four_digits
+        return return_val
+
+    # If response is not a stream generator, record the event data.
     # At this point, we have a response so we can grab attributes only available on the response object
     response_headers = getattr(return_val, "_nr_response_headers", {})
     # In v1, response objects are pydantic models so this function call converts the
@@ -895,6 +913,174 @@ def instrument_openai_util(module):
         setattr(module.convert_to_openai_object, "_nr_wrapped", True)
 
 
+class GeneratorProxy(ObjectProxy):
+    def __init__(self, wrapped):
+        super(GeneratorProxy, self).__init__(wrapped)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        transaction = current_transaction()
+        if not transaction:
+            return self.__wrapped__.__next__()
+
+        return_val = None
+        try:
+            return_val = self.__wrapped__.__next__()
+            if return_val:
+                choices = return_val.get("choices", [])
+                self._nr_openai_attrs["response.model"] = return_val.get("model", "")
+                self._nr_openai_attrs["id"] = return_val.get("id", "")
+                self._nr_openai_attrs["response.organization"] = return_val.get("organization", "")
+                if choices:
+                    delta = choices[0].get("delta", {})
+                    if delta:
+                        self._nr_openai_attrs["content"] = self._nr_openai_attrs.get("content", "") + delta.get(
+                            "content", ""
+                        )
+                        self._nr_openai_attrs["role"] = self._nr_openai_attrs.get("role", None) or delta.get("role")
+                    self._nr_openai_attrs["finish_reason"] = choices[0].get("finish_reason", "")
+                self._nr_openai_attrs["response_headers"] = getattr(return_val, "_nr_response_headers", {})
+        except StopIteration as e:
+            if hasattr(self, "_nr_ft"):
+                openai_attrs = getattr(self, "_nr_openai_attrs", {})
+                self._nr_ft.__exit__(None, None, None)
+
+                # If there are no openai attrs exit early as there's no data to record.
+                if not openai_attrs:
+                    raise
+
+                message_ids = self.record_streaming_chat_completion_events(transaction)
+                # Cache message ids on transaction for retrieval after open ai call completion.
+                if not hasattr(transaction, "_nr_message_ids"):
+                    transaction._nr_message_ids = {}
+                response_id = openai_attrs.get("response_id", None)
+                transaction._nr_message_ids[response_id] = message_ids
+            raise
+        except Exception as e:
+            if hasattr(self, "_nr_ft"):
+                openai_attrs = getattr(self, "_nr_openai_attrs", {})
+                self._nr_ft.__exit__(*sys.exc_info())
+
+                # If there are no openai attrs exit early as there's no data to record.
+                if not openai_attrs:
+                    raise
+
+                self.record_streaming_chat_completion_events(transaction)
+            raise
+        return return_val
+
+    def record_streaming_chat_completion_events(self, transaction):
+        openai_attrs = getattr(self, "_nr_openai_attrs", {})
+
+        # If there are no openai attrs exit early as there's no data to record.
+        if not openai_attrs:
+            raise
+
+        content = openai_attrs.get("content", None)
+        role = openai_attrs.get("role")
+
+        custom_attrs_dict = transaction._custom_params
+        conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
+
+        chat_completion_id = str(uuid.uuid4())
+        available_metadata = get_trace_linking_metadata()
+        span_id = available_metadata.get("span.id", "")
+        trace_id = available_metadata.get("trace.id", "")
+
+        response_headers = openai_attrs.get("response_headers", {})
+        settings = transaction.settings if transaction.settings is not None else global_settings()
+        response_id = openai_attrs.get("id", None)
+        request_id = response_headers.get("x-request-id", "")
+        organization = response_headers.get("openai-organization", "")
+
+        api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "")
+
+        messages = openai_attrs.get("messages", [])
+
+        chat_completion_summary_dict = {
+            "id": chat_completion_id,
+            "appName": settings.app_name,
+            "conversation_id": conversation_id,
+            "span_id": span_id,
+            "trace_id": trace_id,
+            "transaction_id": transaction.guid,
+            "request_id": request_id,
+            "api_key_last_four_digits": api_key_last_four_digits,
+            "duration": self._nr_ft.duration,
+            "request.model": openai_attrs.get("request.model", ""),
+            "response.model": openai_attrs.get("response.model", ""),
+            "response.organization": organization,
+            # Usage tokens are not supported in streaming for now.
+            "request.temperature": openai_attrs.get("temperature", ""),
+            "request.max_tokens": openai_attrs.get("max_tokens", ""),
+            "response.choices.finish_reason": openai_attrs.get("finish_reason", ""),
+            "response.headers.llmVersion": response_headers.get("openai-version", ""),
+            "response.headers.ratelimitLimitRequests": check_rate_limit_header(
+                response_headers, "x-ratelimit-limit-requests", True
+            ),
+            "response.headers.ratelimitLimitTokens": check_rate_limit_header(
+                response_headers, "x-ratelimit-limit-tokens", True
+            ),
+            "response.headers.ratelimitResetTokens": check_rate_limit_header(
+                response_headers, "x-ratelimit-reset-tokens", False
+            ),
+            "response.headers.ratelimitResetRequests": check_rate_limit_header(
+                response_headers, "x-ratelimit-reset-requests", False
+            ),
+            "response.headers.ratelimitRemainingTokens": check_rate_limit_header(
+                response_headers, "x-ratelimit-remaining-tokens", True
+            ),
+            "response.headers.ratelimitRemainingRequests": check_rate_limit_header(
+                response_headers, "x-ratelimit-remaining-requests", True
+            ),
+            "vendor": "openAI",
+            "ingest_source": "Python",
+            "response.number_of_messages": len(messages) + (1 if content else 0),
+        }
+
+        transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict)
+
+        output_message_list = []
+        if content:
+            output_message_list = [{"content": content, "role": role}]
+
+        return create_chat_completion_message_event(
+            transaction,
+            settings.app_name,
+            list(messages),
+            chat_completion_id,
+            span_id,
+            trace_id,
+            openai_attrs.get("response.model", ""),
+            response_id,
+            request_id,
+            conversation_id,
+            output_message_list,
+        )
+
+    def close(self):
+        return super(GeneratorProxy, self).close()
+
+
+def wrap_engine_api_resource_create(wrapped, instance, args, kwargs):
+    transaction = current_transaction()
+
+    if not transaction:
+        return wrapped(*args, **kwargs)
+
+    bound_args = bind_args(wrapped, args, kwargs)
+    stream = bound_args["params"].get("stream", False)
+
+    return_val = wrapped(*args, **kwargs)
+
+    if stream:
+        return GeneratorProxy(return_val)
+    else:
+        return return_val
+
+
 def instrument_openai_api_resources_embedding(module):
     if hasattr(module, "Embedding"):
         if hasattr(module.Embedding, "create"):
@@ -942,3 +1128,8 @@ def instrument_openai_base_client(module):
             wrap_function_wrapper(module, "SyncAPIClient._process_response", wrap_base_client_process_response_sync)
         if hasattr(module.AsyncAPIClient, "_process_response"):
             wrap_function_wrapper(module, "AsyncAPIClient._process_response", wrap_base_client_process_response_async)
+
+
+def instrument_openai_api_resources_abstract_engine_api_resource(module):
+    if hasattr(module.EngineAPIResource, "create"):
+        wrap_function_wrapper(module, "EngineAPIResource.create", wrap_engine_api_resource_create)
diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py
index edcfc47f3..a95914329 100644
--- a/tests/mlmodel_openai/_mock_external_openai_server.py
+++ b/tests/mlmodel_openai/_mock_external_openai_server.py
@@ -30,6 +30,261 @@
 #    created by an external call.
 # 3) This app runs on a separate thread meaning it won't block the test app.
 
+STREAMED_RESPONSES = {
+    "You are a scientist.": [
+        {
+            "Content-Type": "text/event-stream",
+            "openai-model": "gpt-3.5-turbo-0613",
+            "openai-organization": "new-relic-nkmd8b",
+            "openai-processing-ms": "516",
+            "openai-version": "2020-10-01",
+            "x-ratelimit-limit-requests": "200",
+            "x-ratelimit-limit-tokens": "40000",
+            "x-ratelimit-remaining-requests": "199",
+            "x-ratelimit-remaining-tokens": "39940",
+            "x-ratelimit-reset-requests": "7m12s",
+            "x-ratelimit-reset-tokens": "90ms",
+            "x-request-id": "49dbbffbd3c3f4612aa48def69059ccd",
+        },
+        200,
+        [
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [
+                    {"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": None, "finish_reason": None}
+                ],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " degrees"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " Fahrenheit"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " is"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " equal"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " to"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " "}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": "100"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " degrees"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " Celsius"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": "."}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {}, "logprobs": None, "finish_reason": "stop"}],
+            },
+        ],
+    ]
+}
+
+RESPONSES_V1 = {
+    "You are a scientist.": [
+        {
+            "Content-Type": "text/event-stream",
+            "openai-model": "gpt-3.5-turbo-0613",
+            "openai-organization": "foobar-jtbczk",
+            "openai-processing-ms": "516",
+            "openai-version": "2020-10-01",
+            "x-ratelimit-limit-requests": "200",
+            "x-ratelimit-limit-tokens": "40000",
+            "x-ratelimit-remaining-requests": "196",
+            "x-ratelimit-remaining-tokens": "39880",
+            "x-ratelimit-reset-requests": "23m5.129s",
+            "x-ratelimit-reset-tokens": "180ms",
+            "x-request-id": "5c53c9b80af57a1c9b38568f01dcde7f",
+        },
+        200,
+        [
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [
+                    {"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": None, "finish_reason": None}
+                ],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " degrees"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " Fahrenheit"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " is"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " equal"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " to"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " "}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": "100"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " degrees"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " Celsius"}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": "."}, "logprobs": None, "finish_reason": None}],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {}, "logprobs": None, "finish_reason": "stop"}],
+            },
+        ],
+    ]
+}
 RESPONSES_V1 = {
     "You are a scientist.": [
         {
@@ -304,7 +559,7 @@ def simple_get(openai_version, extract_shortened_prompt):
     def _simple_get(self):
         content_len = int(self.headers.get("content-length"))
         content = json.loads(self.rfile.read(content_len).decode("utf-8"))
-
+        stream = content.get("stream", False)
         prompt = extract_shortened_prompt(content)
         if not prompt:
             self.send_response(500)
@@ -316,6 +571,8 @@ def _simple_get(self):
 
         if openai_version < (1, 0):
             mocked_responses = RESPONSES
+            if stream:
+                mocked_responses = STREAMED_RESPONSES
         else:
             mocked_responses = RESPONSES_V1
 
@@ -338,7 +595,12 @@ def _simple_get(self):
         self.end_headers()
 
         # Send response body
-        self.wfile.write(json.dumps(response).encode("utf-8"))
+        if stream:
+            for resp in response:
+                data = json.dumps(resp).encode("utf-8")
+                self.wfile.write(b"data: %s\n" % data)
+        else:
+            self.wfile.write(json.dumps(response).encode("utf-8"))
         return
 
     return _simple_get
diff --git a/tests/mlmodel_openai/conftest.py b/tests/mlmodel_openai/conftest.py
index 180bec9cc..daa6b4584 100644
--- a/tests/mlmodel_openai/conftest.py
+++ b/tests/mlmodel_openai/conftest.py
@@ -32,7 +32,7 @@
 )
 
 from newrelic.api.transaction import current_transaction
-from newrelic.common.object_wrapper import wrap_function_wrapper
+from newrelic.common.object_wrapper import ObjectProxy, wrap_function_wrapper
 
 _default_settings = {
     "transaction_tracer.explain_threshold": 0.0,
@@ -164,6 +164,11 @@ def openai_server(
             wrap_function_wrapper(
                 "openai.api_requestor", "APIRequestor._interpret_response", wrap_openai_api_requestor_interpret_response
             )
+            wrap_function_wrapper(
+                "openai.api_resources.abstract.engine_api_resource",
+                "EngineAPIResource.create",
+                wrap_engine_api_resource_create,
+            )
             yield  # Run tests
         else:
             # Apply function wrappers to record data
@@ -247,20 +252,22 @@ def _wrap_openai_api_requestor_request(wrapped, instance, args, kwargs):
         # Send request
         result = wrapped(*args, **kwargs)
 
-        # Clean up data
-        data = result[0].data
-        headers = result[0]._headers
-        headers = dict(
-            filter(
-                lambda k: k[0].lower() in RECORDED_HEADERS
-                or k[0].lower().startswith("openai")
-                or k[0].lower().startswith("x-ratelimit"),
-                headers.items(),
+        # Append response data to audit log
+        if not kwargs.get("stream", False):
+            # Clean up data
+            data = result[0].data
+            headers = result[0]._headers
+            headers = dict(
+                filter(
+                    lambda k: k[0].lower() in RECORDED_HEADERS
+                    or k[0].lower().startswith("openai")
+                    or k[0].lower().startswith("x-ratelimit"),
+                    headers.items(),
+                )
             )
-        )
-
-        # Log response
-        OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, 200, data  # Append response data to audit log
+            OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, 200, data
+        else:
+            OPENAI_AUDIT_LOG_CONTENTS[prompt] = [None, 200, []]
         return result
 
     return _wrap_openai_api_requestor_request
@@ -272,3 +279,62 @@ def bind_request_params(method, url, params=None, *args, **kwargs):
 
 def bind_request_interpret_response_params(result, stream):
     return result.content.decode("utf-8"), result.status_code, result.headers
+
+
+class GeneratorProxy(ObjectProxy):
+    def __init__(self, wrapped):
+        super(GeneratorProxy, self).__init__(wrapped)
+
+    def __iter__(self):
+        return self
+
+    # Make this Proxy a pass through to our instrumentation's proxy by passing along
+    # get attr and set attr calls to our instrumentation's proxy.
+    def __getattr__(self, attr):
+        return self.__wrapped__.__getattr__(attr)
+
+    def __setattr__(self, attr, value):
+        return self.__wrapped__.__setattr__(attr, value)
+
+    def __next__(self):
+        transaction = current_transaction()
+        if not transaction:
+            return self.__wrapped__.__next__()
+
+        try:
+            return_val = self.__wrapped__.__next__()
+            if return_val:
+                prompt = [k for k in OPENAI_AUDIT_LOG_CONTENTS.keys()][-1]
+                headers = dict(
+                    filter(
+                        lambda k: k[0].lower() in RECORDED_HEADERS
+                        or k[0].lower().startswith("openai")
+                        or k[0].lower().startswith("x-ratelimit"),
+                        return_val._nr_response_headers.items(),
+                    )
+                )
+                OPENAI_AUDIT_LOG_CONTENTS[prompt][0] = headers
+                OPENAI_AUDIT_LOG_CONTENTS[prompt][2].append(return_val.to_dict_recursive())
+            return return_val
+        except Exception as e:
+            raise
+
+    def close(self):
+        return super(GeneratorProxy, self).close()
+
+
+def wrap_engine_api_resource_create(wrapped, instance, args, kwargs):
+    transaction = current_transaction()
+
+    if not transaction:
+        return wrapped(*args, **kwargs)
+
+    bound_args = bind_args(wrapped, args, kwargs)
+    stream = bound_args["params"].get("stream", False)
+
+    return_val = wrapped(*args, **kwargs)
+
+    if stream:
+        return GeneratorProxy(return_val)
+    else:
+        return return_val
diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py
index 5681dbb57..bfbdf2013 100644
--- a/tests/mlmodel_openai/test_chat_completion.py
+++ b/tests/mlmodel_openai/test_chat_completion.py
@@ -16,8 +16,8 @@
 from testing_support.fixtures import (
     override_application_settings,
     reset_core_stats_engine,
-    validate_custom_event_count,
     validate_attributes,
+    validate_custom_event_count,
 )
 from testing_support.validators.validate_custom_events import validate_custom_events
 from testing_support.validators.validate_transaction_metrics import (
diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py
new file mode 100644
index 000000000..62e9e0545
--- /dev/null
+++ b/tests/mlmodel_openai/test_chat_completion_stream.py
@@ -0,0 +1,389 @@
+# Copyright 2010 New Relic, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import openai
+from testing_support.fixtures import (
+    override_application_settings,
+    reset_core_stats_engine,
+    validate_attributes,
+    validate_custom_event_count,
+)
+from testing_support.validators.validate_custom_events import validate_custom_events
+from testing_support.validators.validate_transaction_metrics import (
+    validate_transaction_metrics,
+)
+
+from newrelic.api.background_task import background_task
+from newrelic.api.transaction import add_custom_attribute
+
+disabled_custom_insights_settings = {"custom_insights_events.enabled": False}
+
+_test_openai_chat_completion_messages = (
+    {"role": "system", "content": "You are a scientist."},
+    {"role": "user", "content": "What is 212 degrees Fahrenheit converted to Celsius?"},
+)
+
+chat_completion_recorded_events = [
+    (
+        {"type": "LlmChatCompletionSummary"},
+        {
+            "id": None,  # UUID that varies with each run
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "my-awesome-id",
+            "transaction_id": "transaction-id",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "api_key_last_four_digits": "sk-CRET",
+            "duration": None,  # Response time varies each test run
+            "request.model": "gpt-3.5-turbo",
+            "response.model": "gpt-3.5-turbo-0613",
+            "response.organization": "new-relic-nkmd8b",
+            "request.temperature": 0.7,
+            "request.max_tokens": 100,
+            "response.choices.finish_reason": "stop",
+            "response.headers.llmVersion": "2020-10-01",
+            "response.headers.ratelimitLimitRequests": 200,
+            "response.headers.ratelimitLimitTokens": 40000,
+            "response.headers.ratelimitResetTokens": "90ms",
+            "response.headers.ratelimitResetRequests": "7m12s",
+            "response.headers.ratelimitRemainingTokens": 39940,
+            "response.headers.ratelimitRemainingRequests": 199,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+            "response.number_of_messages": 3,
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0",
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "my-awesome-id",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "transaction_id": "transaction-id",
+            "content": "You are a scientist.",
+            "role": "system",
+            "completion_id": None,
+            "sequence": 0,
+            "response.model": "gpt-3.5-turbo-0613",
+            "vendor": "openAI",
+            "ingest_source": "Python",
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1",
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "my-awesome-id",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "transaction_id": "transaction-id",
+            "content": "What is 212 degrees Fahrenheit converted to Celsius?",
+            "role": "user",
+            "completion_id": None,
+            "sequence": 1,
+            "response.model": "gpt-3.5-turbo-0613",
+            "vendor": "openAI",
+            "ingest_source": "Python",
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2",
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "my-awesome-id",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "transaction_id": "transaction-id",
+            "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.",
+            "role": "assistant",
+            "completion_id": None,
+            "sequence": 2,
+            "response.model": "gpt-3.5-turbo-0613",
+            "vendor": "openAI",
+            "is_response": True,
+            "ingest_source": "Python",
+        },
+    ),
+]
+
+
+@reset_core_stats_engine()
+@validate_custom_events(chat_completion_recorded_events)
+# One summary event, one system message, one user message, and one response message from the assistant
+@validate_custom_event_count(count=4)
+@validate_transaction_metrics(
+    name="test_chat_completion_stream:test_openai_chat_completion_sync_in_txn_with_convo_id",
+    custom_metrics=[
+        ("Python/ML/OpenAI/%s" % openai.__version__, 1),
+    ],
+    background_task=True,
+)
+@validate_attributes("agent", ["llm"])
+@background_task()
+def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info):
+    set_trace_info()
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
+    generator = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=_test_openai_chat_completion_messages,
+        temperature=0.7,
+        max_tokens=100,
+        stream=True,
+    )
+    for resp in generator:
+        assert resp
+
+
+chat_completion_recorded_events_no_convo_id = [
+    (
+        {"type": "LlmChatCompletionSummary"},
+        {
+            "id": None,  # UUID that varies with each run
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "",
+            "transaction_id": "transaction-id",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "api_key_last_four_digits": "sk-CRET",
+            "duration": None,  # Response time varies each test run
+            "request.model": "gpt-3.5-turbo",
+            "response.model": "gpt-3.5-turbo-0613",
+            "response.organization": "new-relic-nkmd8b",
+            "request.temperature": 0.7,
+            "request.max_tokens": 100,
+            "response.choices.finish_reason": "stop",
+            "response.headers.llmVersion": "2020-10-01",
+            "response.headers.ratelimitLimitRequests": 200,
+            "response.headers.ratelimitLimitTokens": 40000,
+            "response.headers.ratelimitResetTokens": "90ms",
+            "response.headers.ratelimitResetRequests": "7m12s",
+            "response.headers.ratelimitRemainingTokens": 39940,
+            "response.headers.ratelimitRemainingRequests": 199,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+            "response.number_of_messages": 3,
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0",
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "transaction_id": "transaction-id",
+            "content": "You are a scientist.",
+            "role": "system",
+            "completion_id": None,
+            "sequence": 0,
+            "response.model": "gpt-3.5-turbo-0613",
+            "vendor": "openAI",
+            "ingest_source": "Python",
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1",
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "transaction_id": "transaction-id",
+            "content": "What is 212 degrees Fahrenheit converted to Celsius?",
+            "role": "user",
+            "completion_id": None,
+            "sequence": 1,
+            "response.model": "gpt-3.5-turbo-0613",
+            "vendor": "openAI",
+            "ingest_source": "Python",
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2",
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "transaction_id": "transaction-id",
+            "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.",
+            "role": "assistant",
+            "completion_id": None,
+            "sequence": 2,
+            "response.model": "gpt-3.5-turbo-0613",
+            "vendor": "openAI",
+            "is_response": True,
+            "ingest_source": "Python",
+        },
+    ),
+]
+
+
+@reset_core_stats_engine()
+@validate_custom_events(chat_completion_recorded_events_no_convo_id)
+# One summary event, one system message, one user message, and one response message from the assistant
+@validate_custom_event_count(count=4)
+@validate_transaction_metrics(
+    "test_chat_completion_stream:test_openai_chat_completion_sync_in_txn_no_convo_id",
+    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
+    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
+    background_task=True,
+)
+@background_task()
+def test_openai_chat_completion_sync_in_txn_no_convo_id(set_trace_info):
+    set_trace_info()
+    generator = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=_test_openai_chat_completion_messages,
+        temperature=0.7,
+        max_tokens=100,
+        stream=True,
+    )
+    for resp in generator:
+        assert resp
+
+
+@reset_core_stats_engine()
+@validate_custom_event_count(count=0)
+def test_openai_chat_completion_sync_outside_txn():
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
+    openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=_test_openai_chat_completion_messages,
+        temperature=0.7,
+        max_tokens=100,
+        stream=True,
+    )
+
+
+@reset_core_stats_engine()
+@validate_custom_events(chat_completion_recorded_events_no_convo_id)
+@validate_custom_event_count(count=4)
+@validate_transaction_metrics(
+    "test_chat_completion_stream:test_openai_chat_completion_async_conversation_id_unset",
+    scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    background_task=True,
+)
+@background_task()
+def test_openai_chat_completion_async_conversation_id_unset(loop, set_trace_info):
+    set_trace_info()
+
+    loop.run_until_complete(
+        openai.ChatCompletion.acreate(
+            model="gpt-3.5-turbo",
+            messages=_test_openai_chat_completion_messages,
+            temperature=0.7,
+            max_tokens=100,
+            stream=True,
+        )
+    )
+    for resp in generator:
+        assert resp
+
+
+@reset_core_stats_engine()
+@validate_custom_events(chat_completion_recorded_events)
+@validate_custom_event_count(count=4)
+@validate_transaction_metrics(
+    "test_chat_completion_stream:test_openai_chat_completion_async_conversation_id_set",
+    scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    background_task=True,
+)
+@validate_transaction_metrics(
+    name="test_chat_completion_stream:test_openai_chat_completion_async_conversation_id_set",
+    custom_metrics=[
+        ("Python/ML/OpenAI/%s" % openai.__version__, 1),
+    ],
+    background_task=True,
+)
+@validate_attributes("agent", ["llm"])
+@background_task()
+def test_openai_chat_completion_async_conversation_id_set(loop, set_trace_info):
+    set_trace_info()
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
+
+    loop.run_until_complete(
+        openai.ChatCompletion.acreate(
+            model="gpt-3.5-turbo",
+            messages=_test_openai_chat_completion_messages,
+            temperature=0.7,
+            max_tokens=100,
+            stream=True,
+        )
+    )
+    for resp in generator:
+        assert resp
+
+
+@reset_core_stats_engine()
+@validate_custom_event_count(count=0)
+def test_openai_chat_completion_async_outside_transaction(loop):
+    loop.run_until_complete(
+        openai.ChatCompletion.acreate(
+            model="gpt-3.5-turbo",
+            messages=_test_openai_chat_completion_messages,
+            temperature=0.7,
+            max_tokens=100,
+            stream=True,
+        )
+    )
+    for resp in generator:
+        assert resp
+
+
+@override_application_settings(disabled_custom_insights_settings)
+@reset_core_stats_engine()
+@validate_custom_event_count(count=0)
+@validate_transaction_metrics(
+    name="test_chat_completion_stream:test_openai_chat_completion_async_disabled_custom_event_settings",
+    custom_metrics=[
+        ("Python/ML/OpenAI/%s" % openai.__version__, 1),
+    ],
+    background_task=True,
+)
+@background_task()
+def test_openai_chat_completion_async_disabled_custom_event_settings(loop):
+    loop.run_until_complete(
+        openai.ChatCompletion.acreate(
+            model="gpt-3.5-turbo",
+            messages=_test_openai_chat_completion_messages,
+            temperature=0.7,
+            max_tokens=100,
+            stream=True,
+        )
+    )
+    for resp in generator:
+        assert resp
+
+
+def test_openai_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility():
+    assert openai.ChatCompletion._nr_wrapped
+    assert openai.util.convert_to_openai_object._nr_wrapped

From a432b9b61b6aa18a0d0043397577cc9f72605124 Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Thu, 8 Feb 2024 09:08:38 -0800
Subject: [PATCH 10/19] Add support for streaming errors

---
 .../_mock_external_openai_server.py           |  31 +-
 tests/mlmodel_openai/conftest.py              |   1 +
 .../test_chat_completion_stream_error.py      | 597 ++++++++++++++++++
 3 files changed, 627 insertions(+), 2 deletions(-)
 create mode 100644 tests/mlmodel_openai/test_chat_completion_stream_error.py

diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py
index a95914329..d378789b3 100644
--- a/tests/mlmodel_openai/_mock_external_openai_server.py
+++ b/tests/mlmodel_openai/_mock_external_openai_server.py
@@ -31,6 +31,33 @@
 # 3) This app runs on a separate thread meaning it won't block the test app.
 
 STREAMED_RESPONSES = {
+    "Invalid API key.": [
+        {"Content-Type": "application/json; charset=utf-8", "x-request-id": "4f8f61a7d0401e42a6760ea2ca2049f6"},
+        401,
+        {
+            "error": {
+                "message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.",
+                "type": "invalid_request_error",
+                "param": None,
+                "code": "invalid_api_key",
+            }
+        },
+    ],
+    "Model does not exist.": [
+        {
+            "Content-Type": "application/json; charset=utf-8",
+            "x-request-id": "cfdf51fb795362ae578c12a21796262c",
+        },
+        404,
+        {
+            "error": {
+                "message": "The model `does-not-exist` does not exist",
+                "type": "invalid_request_error",
+                "param": None,
+                "code": "model_not_found",
+            }
+        },
+    ],
     "You are a scientist.": [
         {
             "Content-Type": "text/event-stream",
@@ -155,7 +182,7 @@
                 "choices": [{"index": 0, "delta": {}, "logprobs": None, "finish_reason": "stop"}],
             },
         ],
-    ]
+    ],
 }
 
 RESPONSES_V1 = {
@@ -595,7 +622,7 @@ def _simple_get(self):
         self.end_headers()
 
         # Send response body
-        if stream:
+        if stream and status_code < 400:
             for resp in response:
                 data = json.dumps(resp).encode("utf-8")
                 self.wfile.write(b"data: %s\n" % data)
diff --git a/tests/mlmodel_openai/conftest.py b/tests/mlmodel_openai/conftest.py
index daa6b4584..884ab79c6 100644
--- a/tests/mlmodel_openai/conftest.py
+++ b/tests/mlmodel_openai/conftest.py
@@ -33,6 +33,7 @@
 
 from newrelic.api.transaction import current_transaction
 from newrelic.common.object_wrapper import ObjectProxy, wrap_function_wrapper
+from newrelic.common.signature import bind_args
 
 _default_settings = {
     "transaction_tracer.explain_threshold": 0.0,
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py
new file mode 100644
index 000000000..7a3aeb05b
--- /dev/null
+++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py
@@ -0,0 +1,597 @@
+# Copyright 2010 New Relic, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import openai
+import pytest
+from testing_support.fixtures import (
+    dt_enabled,
+    reset_core_stats_engine,
+    validate_custom_event_count,
+)
+from testing_support.validators.validate_custom_events import validate_custom_events
+from testing_support.validators.validate_error_trace_attributes import (
+    validate_error_trace_attributes,
+)
+from testing_support.validators.validate_span_events import validate_span_events
+from testing_support.validators.validate_transaction_metrics import (
+    validate_transaction_metrics,
+)
+
+from newrelic.api.background_task import background_task
+from newrelic.api.transaction import add_custom_attribute
+from newrelic.common.object_names import callable_name
+
+_test_openai_chat_completion_messages = (
+    {"role": "system", "content": "You are a scientist."},
+    {"role": "user", "content": "What is 212 degrees Fahrenheit converted to Celsius?"},
+)
+
+# Sync tests:
+expected_events_on_no_model_error = [
+    (
+        {"type": "LlmChatCompletionSummary"},
+        {
+            "id": None,  # UUID that varies with each run
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "transaction_id": "transaction-id",
+            "conversation_id": "my-awesome-id",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "api_key_last_four_digits": "sk-CRET",
+            "duration": None,  # Response time varies each test run
+            "request.model": "",  # No model in this test case
+            "response.organization": "",
+            "request.temperature": 0.7,
+            "request.max_tokens": 100,
+            "response.number_of_messages": 2,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+            "error": True,
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": None,
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "my-awesome-id",
+            "request_id": "",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "transaction_id": "transaction-id",
+            "content": "You are a scientist.",
+            "role": "system",
+            "response.model": "",
+            "completion_id": None,
+            "sequence": 0,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": None,
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "my-awesome-id",
+            "request_id": "",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "transaction_id": "transaction-id",
+            "content": "What is 212 degrees Fahrenheit converted to Celsius?",
+            "role": "user",
+            "completion_id": None,
+            "response.model": "",
+            "sequence": 1,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+        },
+    ),
+]
+
+
+# No model provided
+@dt_enabled
+@reset_core_stats_engine()
+@validate_error_trace_attributes(
+    callable_name(openai.InvalidRequestError),
+    exact_attrs={
+        "agent": {},
+        "intrinsic": {},
+        "user": {
+            "error.param": "engine",
+        },
+    },
+)
+@validate_span_events(
+    exact_agents={
+        "error.message": "Must provide an 'engine' or 'model' parameter to create a <class 'openai.api_resources.chat_completion.ChatCompletion'>",
+    }
+)
+@validate_transaction_metrics(
+    "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_no_model",
+    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
+    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
+    background_task=True,
+)
+@validate_custom_events(expected_events_on_no_model_error)
+@validate_custom_event_count(count=3)
+@background_task()
+def test_chat_completion_invalid_request_error_no_model(set_trace_info):
+    with pytest.raises(openai.InvalidRequestError):
+        set_trace_info()
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
+        generator = openai.ChatCompletion.create(
+            # no model provided,
+            messages=_test_openai_chat_completion_messages,
+            temperature=0.7,
+            max_tokens=100,
+            stream=True,
+        )
+        for resp in generator:
+            assert resp
+
+
+expected_events_on_invalid_model_error = [
+    (
+        {"type": "LlmChatCompletionSummary"},
+        {
+            "id": None,  # UUID that varies with each run
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "transaction_id": "transaction-id",
+            "conversation_id": "my-awesome-id",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "api_key_last_four_digits": "sk-CRET",
+            "duration": None,  # Response time varies each test run
+            "request.model": "does-not-exist",
+            "response.organization": "",
+            "request.temperature": 0.7,
+            "request.max_tokens": 100,
+            "response.number_of_messages": 1,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+            "error": True,
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": None,
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "my-awesome-id",
+            "request_id": "",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "transaction_id": "transaction-id",
+            "content": "Model does not exist.",
+            "role": "user",
+            "response.model": "",
+            "completion_id": None,
+            "sequence": 0,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+        },
+    ),
+]
+
+
+# Invalid model provided
+@dt_enabled
+@reset_core_stats_engine()
+@validate_error_trace_attributes(
+    callable_name(openai.InvalidRequestError),
+    exact_attrs={
+        "agent": {},
+        "intrinsic": {},
+        "user": {
+            "error.code": "model_not_found",
+            "http.statusCode": 404,
+        },
+    },
+)
+@validate_span_events(
+    exact_agents={
+        "error.message": "The model `does-not-exist` does not exist",
+    }
+)
+@validate_transaction_metrics(
+    "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model",
+    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
+    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
+    background_task=True,
+)
+@validate_custom_events(expected_events_on_invalid_model_error)
+@validate_custom_event_count(count=2)
+@background_task()
+def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
+    with pytest.raises(openai.InvalidRequestError):
+        set_trace_info()
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
+        generator = openai.ChatCompletion.create(
+            model="does-not-exist",
+            messages=({"role": "user", "content": "Model does not exist."},),
+            temperature=0.7,
+            max_tokens=100,
+            stream=True,
+        )
+        for resp in generator:
+            assert resp
+
+
+expected_events_on_auth_error = [
+    (
+        {"type": "LlmChatCompletionSummary"},
+        {
+            "id": None,  # UUID that varies with each run
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "transaction_id": "transaction-id",
+            "conversation_id": "my-awesome-id",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "api_key_last_four_digits": "",
+            "duration": None,  # Response time varies each test run
+            "request.model": "gpt-3.5-turbo",
+            "response.organization": "",
+            "request.temperature": 0.7,
+            "request.max_tokens": 100,
+            "response.number_of_messages": 2,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+            "error": True,
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": None,
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "my-awesome-id",
+            "request_id": "",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "transaction_id": "transaction-id",
+            "content": "You are a scientist.",
+            "role": "system",
+            "response.model": "",
+            "completion_id": None,
+            "sequence": 0,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": None,
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "my-awesome-id",
+            "request_id": "",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "transaction_id": "transaction-id",
+            "content": "What is 212 degrees Fahrenheit converted to Celsius?",
+            "role": "user",
+            "completion_id": None,
+            "response.model": "",
+            "sequence": 1,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+        },
+    ),
+]
+
+
+# No api_key provided
+@dt_enabled
+@reset_core_stats_engine()
+@validate_error_trace_attributes(
+    callable_name(openai.error.AuthenticationError),
+    exact_attrs={
+        "agent": {},
+        "intrinsic": {},
+        "user": {},
+    },
+)
+@validate_span_events(
+    exact_agents={
+        "error.message": "No API key provided. You can set your API key in code using 'openai.api_key = <API-KEY>', or you can set the environment variable OPENAI_API_KEY=<API-KEY>). If your API key is stored in a file, you can point the openai module at it with 'openai.api_key_path = <PATH>'. You can generate API keys in the OpenAI web interface. See https://platform.openai.com/account/api-keys for details.",
+    }
+)
+@validate_transaction_metrics(
+    "test_chat_completion_stream_error:test_chat_completion_authentication_error",
+    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
+    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
+    background_task=True,
+)
+@validate_custom_events(expected_events_on_auth_error)
+@validate_custom_event_count(count=3)
+@background_task()
+def test_chat_completion_authentication_error(monkeypatch, set_trace_info):
+    with pytest.raises(openai.error.AuthenticationError):
+        set_trace_info()
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
+        monkeypatch.setattr(openai, "api_key", None)  # openai.api_key = None
+        generator = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=_test_openai_chat_completion_messages,
+            temperature=0.7,
+            max_tokens=100,
+            stream=True,
+        )
+        for resp in generator:
+            assert resp
+
+
+expected_events_on_wrong_api_key_error = [
+    (
+        {"type": "LlmChatCompletionSummary"},
+        {
+            "id": None,  # UUID that varies with each run
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "transaction_id": "transaction-id",
+            "conversation_id": "",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "api_key_last_four_digits": "sk-BEEF",
+            "duration": None,  # Response time varies each test run
+            "request.model": "gpt-3.5-turbo",
+            "response.organization": "",
+            "request.temperature": 0.7,
+            "request.max_tokens": 100,
+            "response.number_of_messages": 1,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+            "error": True,
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": None,
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "",
+            "request_id": "",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "transaction_id": "transaction-id",
+            "content": "Invalid API key.",
+            "role": "user",
+            "completion_id": None,
+            "response.model": "",
+            "sequence": 0,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+        },
+    ),
+]
+
+
+# Wrong api_key provided
+@dt_enabled
+@reset_core_stats_engine()
+@validate_error_trace_attributes(
+    callable_name(openai.error.AuthenticationError),
+    exact_attrs={
+        "agent": {},
+        "intrinsic": {},
+        "user": {
+            "http.statusCode": 401,
+        },
+    },
+)
+@validate_span_events(
+    exact_agents={
+        "error.message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.",
+    }
+)
+@validate_transaction_metrics(
+    "test_chat_completion_stream_error:test_chat_completion_wrong_api_key_error",
+    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
+    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
+    background_task=True,
+)
+@validate_custom_events(expected_events_on_wrong_api_key_error)
+@validate_custom_event_count(count=2)
+@background_task()
+def test_chat_completion_wrong_api_key_error(monkeypatch, set_trace_info):
+    with pytest.raises(openai.error.AuthenticationError):
+        set_trace_info()
+        monkeypatch.setattr(openai, "api_key", "DEADBEEF")
+        generator = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=({"role": "user", "content": "Invalid API key."},),
+            temperature=0.7,
+            max_tokens=100,
+            stream=True,
+        )
+        for resp in generator:
+            assert resp
+
+
+# Async tests:
+# No model provided
+@dt_enabled
+@reset_core_stats_engine()
+@validate_error_trace_attributes(
+    callable_name(openai.InvalidRequestError),
+    exact_attrs={
+        "agent": {},
+        "intrinsic": {},
+        "user": {
+            "error.param": "engine",
+        },
+    },
+)
+@validate_span_events(
+    exact_agents={
+        "error.message": "Must provide an 'engine' or 'model' parameter to create a <class 'openai.api_resources.chat_completion.ChatCompletion'>",
+    }
+)
+@validate_transaction_metrics(
+    "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_no_model_async",
+    scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    background_task=True,
+)
+@validate_custom_events(expected_events_on_no_model_error)
+@validate_custom_event_count(count=3)
+@background_task()
+def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_info):
+    with pytest.raises(openai.InvalidRequestError):
+        set_trace_info()
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
+        generator = loop.run_until_complete(
+            openai.ChatCompletion.acreate(
+                # no model provided,
+                messages=_test_openai_chat_completion_messages,
+                temperature=0.7,
+                max_tokens=100,
+                stream=True,
+            )
+        )
+        for resp in generator:
+            assert resp
+
+
+# Invalid model provided
+@dt_enabled
+@reset_core_stats_engine()
+@validate_error_trace_attributes(
+    callable_name(openai.InvalidRequestError),
+    exact_attrs={
+        "agent": {},
+        "intrinsic": {},
+        "user": {
+            "error.code": "model_not_found",
+            "http.statusCode": 404,
+        },
+    },
+)
+@validate_span_events(
+    exact_agents={
+        "error.message": "The model `does-not-exist` does not exist",
+    }
+)
+@validate_transaction_metrics(
+    "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_async",
+    scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    background_task=True,
+)
+@validate_custom_events(expected_events_on_invalid_model_error)
+@validate_custom_event_count(count=2)
+@background_task()
+def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_trace_info):
+    with pytest.raises(openai.InvalidRequestError):
+        set_trace_info()
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
+        generator = loop.run_until_complete(
+            openai.ChatCompletion.acreate(
+                model="does-not-exist",
+                messages=({"role": "user", "content": "Model does not exist."},),
+                temperature=0.7,
+                max_tokens=100,
+                stream=True,
+            )
+        )
+        for resp in generator:
+            assert resp
+
+
+# No api_key provided
+@dt_enabled
+@reset_core_stats_engine()
+@validate_error_trace_attributes(
+    callable_name(openai.error.AuthenticationError),
+    exact_attrs={
+        "agent": {},
+        "intrinsic": {},
+        "user": {},
+    },
+)
+@validate_span_events(
+    exact_agents={
+        "error.message": "No API key provided. You can set your API key in code using 'openai.api_key = <API-KEY>', or you can set the environment variable OPENAI_API_KEY=<API-KEY>). If your API key is stored in a file, you can point the openai module at it with 'openai.api_key_path = <PATH>'. You can generate API keys in the OpenAI web interface. See https://platform.openai.com/account/api-keys for details.",
+    }
+)
+@validate_transaction_metrics(
+    "test_chat_completion_stream_error:test_chat_completion_authentication_error_async",
+    scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    background_task=True,
+)
+@validate_custom_events(expected_events_on_auth_error)
+@validate_custom_event_count(count=3)
+@background_task()
+def test_chat_completion_authentication_error_async(loop, monkeypatch, set_trace_info):
+    with pytest.raises(openai.error.AuthenticationError):
+        set_trace_info()
+        add_custom_attribute("llm.conversation_id", "my-awesome-id")
+        monkeypatch.setattr(openai, "api_key", None)  # openai.api_key = None
+        generator = loop.run_until_complete(
+            openai.ChatCompletion.acreate(
+                model="gpt-3.5-turbo",
+                messages=_test_openai_chat_completion_messages,
+                temperature=0.7,
+                max_tokens=100,
+                stream=True,
+            )
+        )
+        for resp in generator:
+            assert resp
+
+
+# Wrong api_key provided
+@dt_enabled
+@reset_core_stats_engine()
+@validate_error_trace_attributes(
+    callable_name(openai.error.AuthenticationError),
+    exact_attrs={
+        "agent": {},
+        "intrinsic": {},
+        "user": {
+            "http.statusCode": 401,
+        },
+    },
+)
+@validate_span_events(
+    exact_agents={
+        "error.message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.",
+    }
+)
+@validate_transaction_metrics(
+    "test_chat_completion_stream_error:test_chat_completion_wrong_api_key_error_async",
+    scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    background_task=True,
+)
+@validate_custom_events(expected_events_on_wrong_api_key_error)
+@validate_custom_event_count(count=2)
+@background_task()
+def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_info):
+    with pytest.raises(openai.error.AuthenticationError):
+        set_trace_info()
+        monkeypatch.setattr(openai, "api_key", "DEADBEEF")
+        generator = loop.run_until_complete(
+            openai.ChatCompletion.acreate(
+                model="gpt-3.5-turbo",
+                messages=({"role": "user", "content": "Invalid API key."},),
+                temperature=0.7,
+                max_tokens=100,
+                stream=True,
+            )
+        )
+        for resp in generator:
+            assert resp

From 549b745be94b38b02c02925efcb1aba6f9375f35 Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Fri, 9 Feb 2024 18:57:12 -0800
Subject: [PATCH 11/19] Support async generators

---
 newrelic/hooks/mlmodel_openai.py              | 344 ++++++++++++++----
 .../_mock_external_openai_server.py           |  18 +
 .../test_chat_completion_stream.py            |  57 +--
 .../test_chat_completion_stream_error.py      |  68 ++--
 4 files changed, 349 insertions(+), 138 deletions(-)

diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
index e2f5c61f2..f4fb18d5c 100644
--- a/newrelic/hooks/mlmodel_openai.py
+++ b/newrelic/hooks/mlmodel_openai.py
@@ -658,7 +658,7 @@ async def wrap_embedding_async(wrapped, instance, args, kwargs):
 async def wrap_chat_completion_async(wrapped, instance, args, kwargs):
     transaction = current_transaction()
 
-    if not transaction or kwargs.get("stream", False):
+    if not transaction:
         return await wrapped(*args, **kwargs)
 
     # Framework metric also used for entity tagging in the UI
@@ -683,88 +683,107 @@ async def wrap_chat_completion_async(wrapped, instance, args, kwargs):
     completion_id = str(uuid.uuid4())
 
     function_name = wrapped.__name__
+    ft = FunctionTrace(name=function_name, group="Llm/completion/OpenAI")
+    ft.__enter__()
+    # Get trace information
+    available_metadata = get_trace_linking_metadata()
+    span_id = available_metadata.get("span.id", "")
+    trace_id = available_metadata.get("trace.id", "")
+    try:
+        return_val = await wrapped(*args, **kwargs)
+        return_val._nr_ft = ft
+    except Exception as exc:
+        if OPENAI_V1:
+            response = getattr(exc, "response", "")
+            response_headers = getattr(response, "headers", "")
+            exc_organization = response_headers.get("openai-organization", "") if response_headers else ""
+            # There appears to be a bug here in openai v1 where despite having code,
+            # param, etc in the error response, they are not populated on the exception
+            # object so grab them from the response body object instead.
+            body = getattr(exc, "body", {}) or {}
+            notice_error_attributes = {
+                "http.statusCode": getattr(exc, "status_code", "") or "",
+                "error.message": body.get("message", "") or "",
+                "error.code": body.get("code", "") or "",
+                "error.param": body.get("param", "") or "",
+                "completion_id": completion_id,
+            }
+        else:
+            exc_organization = getattr(exc, "organization", "")
+            notice_error_attributes = {
+                "http.statusCode": getattr(exc, "http_status", ""),
+                "error.message": getattr(exc, "_message", ""),
+                "error.code": getattr(getattr(exc, "error", ""), "code", ""),
+                "error.param": getattr(exc, "param", ""),
+                "completion_id": completion_id,
+            }
+        # Override the default message if it is not empty.
+        message = notice_error_attributes.pop("error.message")
+        if message:
+            exc._nr_message = message
 
-    with FunctionTrace(name=function_name, group="Llm/completion/OpenAI") as ft:
-        # Get trace information
-        available_metadata = get_trace_linking_metadata()
-        span_id = available_metadata.get("span.id", "")
-        trace_id = available_metadata.get("trace.id", "")
+        ft.notice_error(
+            attributes=notice_error_attributes,
+        )
+        # Gather attributes to add to embedding summary event in error context
+        error_chat_completion_dict = {
+            "id": completion_id,
+            "appName": app_name,
+            "conversation_id": conversation_id,
+            "api_key_last_four_digits": api_key_last_four_digits,
+            "span_id": span_id,
+            "trace_id": trace_id,
+            "transaction_id": transaction.guid,
+            "response.number_of_messages": len(request_message_list),
+            "request.model": kwargs.get("model") or kwargs.get("engine") or "",
+            "request.temperature": kwargs.get("temperature", ""),
+            "request.max_tokens": kwargs.get("max_tokens", ""),
+            "vendor": "openAI",
+            "ingest_source": "Python",
+            "response.organization": "" if exc_organization is None else exc_organization,
+            "duration": ft.duration,
+            "error": True,
+        }
+        transaction.record_custom_event("LlmChatCompletionSummary", error_chat_completion_dict)
 
-        try:
-            return_val = await wrapped(*args, **kwargs)
-        except Exception as exc:
-            if OPENAI_V1:
-                response = getattr(exc, "response", "")
-                response_headers = getattr(response, "headers", "")
-                exc_organization = response_headers.get("openai-organization", "") if response_headers else ""
-                # There appears to be a bug here in openai v1 where despite having code,
-                # param, etc in the error response, they are not populated on the exception
-                # object so grab them from the response body object instead.
-                body = getattr(exc, "body", {}) or {}
-                notice_error_attributes = {
-                    "http.statusCode": getattr(exc, "status_code", "") or "",
-                    "error.message": body.get("message", "") or "",
-                    "error.code": body.get("code", "") or "",
-                    "error.param": body.get("param", "") or "",
-                    "completion_id": completion_id,
-                }
-            else:
-                exc_organization = getattr(exc, "organization", "")
-                notice_error_attributes = {
-                    "http.statusCode": getattr(exc, "http_status", ""),
-                    "error.message": getattr(exc, "_message", ""),
-                    "error.code": getattr(getattr(exc, "error", ""), "code", ""),
-                    "error.param": getattr(exc, "param", ""),
-                    "completion_id": completion_id,
-                }
-            # Override the default message if it is not empty.
-            message = notice_error_attributes.pop("error.message")
-            if message:
-                exc._nr_message = message
+        create_chat_completion_message_event(
+            transaction,
+            app_name,
+            request_message_list,
+            completion_id,
+            span_id,
+            trace_id,
+            "",
+            None,
+            "",
+            conversation_id,
+            None,
+        )
 
-            ft.notice_error(
-                attributes=notice_error_attributes,
-            )
-            # Gather attributes to add to chat completion summary event in error context
-            error_chat_completion_dict = {
-                "id": completion_id,
-                "appName": app_name,
-                "conversation_id": conversation_id,
-                "api_key_last_four_digits": api_key_last_four_digits,
-                "span_id": span_id,
-                "trace_id": trace_id,
-                "transaction_id": transaction.guid,
-                "response.number_of_messages": len(request_message_list),
-                "request.model": kwargs.get("model") or kwargs.get("engine") or "",
-                "request.temperature": kwargs.get("temperature", ""),
-                "request.max_tokens": kwargs.get("max_tokens", ""),
-                "vendor": "openAI",
-                "ingest_source": "Python",
-                "response.organization": "" if exc_organization is None else exc_organization,
-                "duration": ft.duration,
-                "error": True,
-            }
-            transaction.record_custom_event("LlmChatCompletionSummary", error_chat_completion_dict)
-
-            create_chat_completion_message_event(
-                transaction,
-                app_name,
-                request_message_list,
-                completion_id,
-                span_id,
-                trace_id,
-                "",
-                None,
-                "",
-                conversation_id,
-                None,
-            )
+        ft.__exit__(*sys.exc_info())
+        raise
 
-            raise
+    stream = kwargs.get("stream", False)
+    # If response is not a stream generator, we exit the function trace now.
+    if not stream:
+        ft.__exit__(None, None, None)
 
     if not return_val:
         return return_val
 
+    if stream:
+        # The function trace will be exited when in the final iteration of the response
+        # generator.
+        setattr(return_val, "_nr_ft", ft)
+        setattr(return_val, "_nr_openai_attrs", getattr(return_val, "_nr_openai_attrs", {}))
+        return_val._nr_openai_attrs["messages"] = kwargs.get("messages", [])
+        return_val._nr_openai_attrs["temperature"] = kwargs.get("temperature", "")
+        return_val._nr_openai_attrs["max_tokens"] = kwargs.get("max_tokens", "")
+        return_val._nr_openai_attrs["request.model"] = kwargs.get("model") or kwargs.get("engine") or ""
+        return_val._nr_openai_attrs["api_key_last_four_digits"] = api_key_last_four_digits
+        return return_val
+
+    # If response is not a stream generator, record the event data.
     # At this point, we have a response so we can grab attributes only available on the response object
     response_headers = getattr(return_val, "_nr_response_headers", None)
     # In v1, response objects are pydantic models so this function call converts the
@@ -1064,7 +1083,159 @@ def close(self):
         return super(GeneratorProxy, self).close()
 
 
-def wrap_engine_api_resource_create(wrapped, instance, args, kwargs):
+class AsyncGeneratorProxy(ObjectProxy):
+    def __init__(self, wrapped):
+        super(AsyncGeneratorProxy, self).__init__(wrapped)
+
+    def __aiter__(self):
+        self._nr_wrapped_iter = self.__wrapped__.__aiter__()
+        return self
+
+    async def __anext__(self):
+        transaction = current_transaction()
+        if not transaction:
+            return await self._nr_wrapped_iter.__anext__()
+
+        return_val = None
+        try:
+            return_val = await self._nr_wrapped_iter.__anext__()
+            if return_val:
+                choices = return_val.get("choices", [])
+                self._nr_openai_attrs["response.model"] = return_val.get("model", "")
+                self._nr_openai_attrs["id"] = return_val.get("id", "")
+                self._nr_openai_attrs["response.organization"] = return_val.get("organization", "")
+                if choices:
+                    delta = choices[0].get("delta", {})
+                    if delta:
+                        self._nr_openai_attrs["content"] = self._nr_openai_attrs.get("content", "") + delta.get(
+                            "content", ""
+                        )
+                        self._nr_openai_attrs["role"] = self._nr_openai_attrs.get("role", None) or delta.get("role")
+                    self._nr_openai_attrs["finish_reason"] = choices[0].get("finish_reason", "")
+                self._nr_openai_attrs["response_headers"] = getattr(return_val, "_nr_response_headers", {})
+        except StopIteration as e:
+            if hasattr(self, "_nr_ft"):
+                openai_attrs = getattr(self, "_nr_openai_attrs", {})
+                self._nr_ft.__exit__(None, None, None)
+
+                # If there are no openai attrs exit early as there's no data to record.
+                if not openai_attrs:
+                    raise
+
+                message_ids = self.record_streaming_chat_completion_events(transaction)
+                # Cache message ids on transaction for retrieval after open ai call completion.
+                if not hasattr(transaction, "_nr_message_ids"):
+                    transaction._nr_message_ids = {}
+                response_id = openai_attrs.get("response_id", None)
+                transaction._nr_message_ids[response_id] = message_ids
+            raise
+        except Exception as e:
+            if hasattr(self, "_nr_ft"):
+                openai_attrs = getattr(self, "_nr_openai_attrs", {})
+                self._nr_ft.__exit__(*sys.exc_info())
+
+                # If there are no openai attrs exit early as there's no data to record.
+                if not openai_attrs:
+                    raise
+
+                self.record_streaming_chat_completion_events(transaction)
+            raise
+        return return_val
+
+    def record_streaming_chat_completion_events(self, transaction):
+        openai_attrs = getattr(self, "_nr_openai_attrs", {})
+
+        # If there are no openai attrs exit early as there's no data to record.
+        if not openai_attrs:
+            raise
+
+        content = openai_attrs.get("content", None)
+        role = openai_attrs.get("role")
+
+        custom_attrs_dict = transaction._custom_params
+        conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
+
+        chat_completion_id = str(uuid.uuid4())
+        available_metadata = get_trace_linking_metadata()
+        span_id = available_metadata.get("span.id", "")
+        trace_id = available_metadata.get("trace.id", "")
+
+        response_headers = openai_attrs.get("response_headers", {})
+        settings = transaction.settings if transaction.settings is not None else global_settings()
+        response_id = openai_attrs.get("id", None)
+        request_id = response_headers.get("x-request-id", "")
+        organization = response_headers.get("openai-organization", "")
+
+        api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "")
+
+        messages = openai_attrs.get("messages", [])
+
+        chat_completion_summary_dict = {
+            "id": chat_completion_id,
+            "appName": settings.app_name,
+            "conversation_id": conversation_id,
+            "span_id": span_id,
+            "trace_id": trace_id,
+            "transaction_id": transaction.guid,
+            "request_id": request_id,
+            "api_key_last_four_digits": api_key_last_four_digits,
+            "duration": self._nr_ft.duration,
+            "request.model": openai_attrs.get("request.model", ""),
+            "response.model": openai_attrs.get("response.model", ""),
+            "response.organization": organization,
+            # Usage tokens are not supported in streaming for now.
+            "request.temperature": openai_attrs.get("temperature", ""),
+            "request.max_tokens": openai_attrs.get("max_tokens", ""),
+            "response.choices.finish_reason": openai_attrs.get("finish_reason", ""),
+            "response.headers.llmVersion": response_headers.get("openai-version", ""),
+            "response.headers.ratelimitLimitRequests": check_rate_limit_header(
+                response_headers, "x-ratelimit-limit-requests", True
+            ),
+            "response.headers.ratelimitLimitTokens": check_rate_limit_header(
+                response_headers, "x-ratelimit-limit-tokens", True
+            ),
+            "response.headers.ratelimitResetTokens": check_rate_limit_header(
+                response_headers, "x-ratelimit-reset-tokens", False
+            ),
+            "response.headers.ratelimitResetRequests": check_rate_limit_header(
+                response_headers, "x-ratelimit-reset-requests", False
+            ),
+            "response.headers.ratelimitRemainingTokens": check_rate_limit_header(
+                response_headers, "x-ratelimit-remaining-tokens", True
+            ),
+            "response.headers.ratelimitRemainingRequests": check_rate_limit_header(
+                response_headers, "x-ratelimit-remaining-requests", True
+            ),
+            "vendor": "openAI",
+            "ingest_source": "Python",
+            "response.number_of_messages": len(messages) + (1 if content else 0),
+        }
+
+        transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict)
+
+        output_message_list = []
+        if content:
+            output_message_list = [{"content": content, "role": role}]
+
+        return create_chat_completion_message_event(
+            transaction,
+            settings.app_name,
+            list(messages),
+            chat_completion_id,
+            span_id,
+            trace_id,
+            openai_attrs.get("response.model", ""),
+            response_id,
+            request_id,
+            conversation_id,
+            output_message_list,
+        )
+
+    async def aclose(self):
+        return await super(AsyncGeneratorProxy, self).aclose()
+
+
+def wrap_engine_api_resource_create_sync(wrapped, instance, args, kwargs):
     transaction = current_transaction()
 
     if not transaction:
@@ -1081,6 +1252,23 @@ def wrap_engine_api_resource_create(wrapped, instance, args, kwargs):
         return return_val
 
 
+async def wrap_engine_api_resource_create_async(wrapped, instance, args, kwargs):
+    transaction = current_transaction()
+
+    if not transaction:
+        return await wrapped(*args, **kwargs)
+
+    bound_args = bind_args(wrapped, args, kwargs)
+    stream = bound_args["params"].get("stream", False)
+
+    return_val = await wrapped(*args, **kwargs)
+
+    if stream:
+        return AsyncGeneratorProxy(return_val)
+    else:
+        return return_val
+
+
 def instrument_openai_api_resources_embedding(module):
     if hasattr(module, "Embedding"):
         if hasattr(module.Embedding, "create"):
@@ -1132,4 +1320,6 @@ def instrument_openai_base_client(module):
 
 def instrument_openai_api_resources_abstract_engine_api_resource(module):
     if hasattr(module.EngineAPIResource, "create"):
-        wrap_function_wrapper(module, "EngineAPIResource.create", wrap_engine_api_resource_create)
+        wrap_function_wrapper(module, "EngineAPIResource.create", wrap_engine_api_resource_create_sync)
+    if hasattr(module.EngineAPIResource, "acreate"):
+        wrap_function_wrapper(module, "EngineAPIResource.acreate", wrap_engine_api_resource_create_async)
diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py
index d378789b3..988dae8b1 100644
--- a/tests/mlmodel_openai/_mock_external_openai_server.py
+++ b/tests/mlmodel_openai/_mock_external_openai_server.py
@@ -31,6 +31,24 @@
 # 3) This app runs on a separate thread meaning it won't block the test app.
 
 STREAMED_RESPONSES = {
+    "Stream parsing error.": [
+        {
+            "Content-Type": "text/event-stream",
+            "openai-model": "gpt-3.5-turbo-0613",
+            "openai-organization": "new-relic-nkmd8b",
+            "openai-processing-ms": "516",
+            "openai-version": "2020-10-01",
+            "x-ratelimit-limit-requests": "200",
+            "x-ratelimit-limit-tokens": "40000",
+            "x-ratelimit-remaining-requests": "199",
+            "x-ratelimit-remaining-tokens": "39940",
+            "x-ratelimit-reset-requests": "7m12s",
+            "x-ratelimit-reset-tokens": "90ms",
+            "x-request-id": "49dbbffbd3c3f4612aa48def69059ccd",
+        },
+        200,
+        ["Bad response"],
+    ],
     "Invalid API key.": [
         {"Content-Type": "application/json; charset=utf-8", "x-request-id": "4f8f61a7d0401e42a6760ea2ca2049f6"},
         401,
diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py
index 62e9e0545..72bdaf9b3 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream.py
@@ -14,7 +14,6 @@
 
 import openai
 from testing_support.fixtures import (
-    override_application_settings,
     reset_core_stats_engine,
     validate_attributes,
     validate_custom_event_count,
@@ -295,17 +294,18 @@ def test_openai_chat_completion_sync_outside_txn():
 def test_openai_chat_completion_async_conversation_id_unset(loop, set_trace_info):
     set_trace_info()
 
-    loop.run_until_complete(
-        openai.ChatCompletion.acreate(
+    async def consumer():
+        generator = await openai.ChatCompletion.acreate(
             model="gpt-3.5-turbo",
             messages=_test_openai_chat_completion_messages,
             temperature=0.7,
             max_tokens=100,
             stream=True,
         )
-    )
-    for resp in generator:
-        assert resp
+        async for resp in generator:
+            assert resp
+
+    loop.run_until_complete(consumer())
 
 
 @reset_core_stats_engine()
@@ -330,58 +330,35 @@ def test_openai_chat_completion_async_conversation_id_set(loop, set_trace_info):
     set_trace_info()
     add_custom_attribute("llm.conversation_id", "my-awesome-id")
 
-    loop.run_until_complete(
-        openai.ChatCompletion.acreate(
+    async def consumer():
+        generator = await openai.ChatCompletion.acreate(
             model="gpt-3.5-turbo",
             messages=_test_openai_chat_completion_messages,
             temperature=0.7,
             max_tokens=100,
             stream=True,
         )
-    )
-    for resp in generator:
-        assert resp
+        async for resp in generator:
+            assert resp
+
+    loop.run_until_complete(consumer())
 
 
 @reset_core_stats_engine()
 @validate_custom_event_count(count=0)
 def test_openai_chat_completion_async_outside_transaction(loop):
-    loop.run_until_complete(
-        openai.ChatCompletion.acreate(
+    async def consumer():
+        generator = await openai.ChatCompletion.acreate(
             model="gpt-3.5-turbo",
             messages=_test_openai_chat_completion_messages,
             temperature=0.7,
             max_tokens=100,
             stream=True,
         )
-    )
-    for resp in generator:
-        assert resp
-
+        async for resp in generator:
+            assert resp
 
-@override_application_settings(disabled_custom_insights_settings)
-@reset_core_stats_engine()
-@validate_custom_event_count(count=0)
-@validate_transaction_metrics(
-    name="test_chat_completion_stream:test_openai_chat_completion_async_disabled_custom_event_settings",
-    custom_metrics=[
-        ("Python/ML/OpenAI/%s" % openai.__version__, 1),
-    ],
-    background_task=True,
-)
-@background_task()
-def test_openai_chat_completion_async_disabled_custom_event_settings(loop):
-    loop.run_until_complete(
-        openai.ChatCompletion.acreate(
-            model="gpt-3.5-turbo",
-            messages=_test_openai_chat_completion_messages,
-            temperature=0.7,
-            max_tokens=100,
-            stream=True,
-        )
-    )
-    for resp in generator:
-        assert resp
+    loop.run_until_complete(consumer())
 
 
 def test_openai_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility():
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py
index 7a3aeb05b..8463c9819 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_error.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py
@@ -37,7 +37,6 @@
     {"role": "user", "content": "What is 212 degrees Fahrenheit converted to Celsius?"},
 )
 
-# Sync tests:
 expected_events_on_no_model_error = [
     (
         {"type": "LlmChatCompletionSummary"},
@@ -101,7 +100,6 @@
 ]
 
 
-# No model provided
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -187,7 +185,6 @@ def test_chat_completion_invalid_request_error_no_model(set_trace_info):
 ]
 
 
-# Invalid model provided
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -293,7 +290,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
 ]
 
 
-# No api_key provided
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -378,7 +374,6 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info):
 ]
 
 
-# Wrong api_key provided
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -420,8 +415,6 @@ def test_chat_completion_wrong_api_key_error(monkeypatch, set_trace_info):
             assert resp
 
 
-# Async tests:
-# No model provided
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -452,7 +445,7 @@ def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_in
     with pytest.raises(openai.InvalidRequestError):
         set_trace_info()
         add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        generator = loop.run_until_complete(
+        loop.run_until_complete(
             openai.ChatCompletion.acreate(
                 # no model provided,
                 messages=_test_openai_chat_completion_messages,
@@ -461,11 +454,8 @@ def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_in
                 stream=True,
             )
         )
-        for resp in generator:
-            assert resp
 
 
-# Invalid model provided
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -497,7 +487,7 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra
     with pytest.raises(openai.InvalidRequestError):
         set_trace_info()
         add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        generator = loop.run_until_complete(
+        loop.run_until_complete(
             openai.ChatCompletion.acreate(
                 model="does-not-exist",
                 messages=({"role": "user", "content": "Model does not exist."},),
@@ -506,11 +496,8 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra
                 stream=True,
             )
         )
-        for resp in generator:
-            assert resp
 
 
-# No api_key provided
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -540,7 +527,7 @@ def test_chat_completion_authentication_error_async(loop, monkeypatch, set_trace
         set_trace_info()
         add_custom_attribute("llm.conversation_id", "my-awesome-id")
         monkeypatch.setattr(openai, "api_key", None)  # openai.api_key = None
-        generator = loop.run_until_complete(
+        loop.run_until_complete(
             openai.ChatCompletion.acreate(
                 model="gpt-3.5-turbo",
                 messages=_test_openai_chat_completion_messages,
@@ -549,11 +536,8 @@ def test_chat_completion_authentication_error_async(loop, monkeypatch, set_trace
                 stream=True,
             )
         )
-        for resp in generator:
-            assert resp
 
 
-# Wrong api_key provided
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -584,7 +568,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_
     with pytest.raises(openai.error.AuthenticationError):
         set_trace_info()
         monkeypatch.setattr(openai, "api_key", "DEADBEEF")
-        generator = loop.run_until_complete(
+        loop.run_until_complete(
             openai.ChatCompletion.acreate(
                 model="gpt-3.5-turbo",
                 messages=({"role": "user", "content": "Invalid API key."},),
@@ -593,5 +577,47 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_
                 stream=True,
             )
         )
-        for resp in generator:
+
+
+@dt_enabled
+@reset_core_stats_engine()
+@validate_error_trace_attributes(
+    callable_name(openai.error.AuthenticationError),
+    exact_attrs={
+        "agent": {},
+        "intrinsic": {},
+        "user": {
+            "http.statusCode": 401,
+        },
+    },
+)
+@validate_span_events(
+    exact_agents={
+        "error.message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.",
+    }
+)
+@validate_transaction_metrics(
+    "test_chat_completion_stream_error:test_chat_completion_stream_parsing_error_async",
+    scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
+    background_task=True,
+)
+@validate_custom_events(expected_events_on_wrong_api_key_error)
+@validate_custom_event_count(count=2)
+@background_task()
+def test_chat_completion_stream_parsing_error_async(loop, monkeypatch, set_trace_info):
+    # with pytest.raises(openai.error.APIError):
+    set_trace_info()
+
+    async def consumer():
+        generator = await openai.ChatCompletion.acreate(
+            model="gpt-3.5-turbo",
+            messages=({"role": "user", "content": "Stream parsing error."},),
+            temperature=0.7,
+            max_tokens=100,
+            stream=True,
+        )
+        async for resp in generator:
             assert resp
+
+    loop.run_until_complete(consumer())

From c989a15fa3df0581005b7d1b9488993736e3d34a Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Mon, 12 Feb 2024 17:07:48 -0800
Subject: [PATCH 12/19] Add support for error during streaming

---
 newrelic/hooks/mlmodel_openai.py              | 212 ++++++++++++++++--
 .../_mock_external_openai_server.py           |  27 ++-
 .../test_chat_completion_stream_error.py      | 107 ++++++++-
 3 files changed, 313 insertions(+), 33 deletions(-)

diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
index f4fb18d5c..93060a877 100644
--- a/newrelic/hooks/mlmodel_openai.py
+++ b/newrelic/hooks/mlmodel_openai.py
@@ -888,7 +888,9 @@ def wrap_convert_to_openai_object(wrapped, instance, args, kwargs):
     resp = args[0]
     returned_response = wrapped(*args, **kwargs)
 
-    if isinstance(resp, openai.openai_response.OpenAIResponse):
+    if isinstance(returned_response, openai.openai_object.OpenAIObject) and isinstance(
+        resp, openai.openai_response.OpenAIResponse
+    ):
         setattr(returned_response, "_nr_response_headers", getattr(resp, "_headers", {}))
 
     return returned_response
@@ -970,33 +972,118 @@ def __next__(self):
                 if not openai_attrs:
                     raise
 
-                message_ids = self.record_streaming_chat_completion_events(transaction)
+                message_ids = self.record_streaming_chat_completion_events(transaction, openai_attrs)
                 # Cache message ids on transaction for retrieval after open ai call completion.
                 if not hasattr(transaction, "_nr_message_ids"):
                     transaction._nr_message_ids = {}
                 response_id = openai_attrs.get("response_id", None)
                 transaction._nr_message_ids[response_id] = message_ids
             raise
-        except Exception as e:
+        except Exception as exc:
             if hasattr(self, "_nr_ft"):
                 openai_attrs = getattr(self, "_nr_openai_attrs", {})
-                self._nr_ft.__exit__(*sys.exc_info())
 
                 # If there are no openai attrs exit early as there's no data to record.
                 if not openai_attrs:
+                    self._nr_ft.__exit__(*sys.exc_info())
                     raise
 
-                self.record_streaming_chat_completion_events(transaction)
+                self.record_streaming_chat_completion_events_error(transaction, openai_attrs, exc)
             raise
         return return_val
 
-    def record_streaming_chat_completion_events(self, transaction):
-        openai_attrs = getattr(self, "_nr_openai_attrs", {})
+    def record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc):
+        chat_completion_id = str(uuid.uuid4())
+        if OPENAI_V1:
+            response = getattr(exc, "response", "")
+            response_headers = getattr(response, "headers", "")
+            organization = response_headers.get("openai-organization", "") if response_headers else ""
+            # There appears to be a bug here in openai v1 where despite having code,
+            # param, etc in the error response, they are not populated on the exception
+            # object so grab them from the response body object instead.
+            body = getattr(exc, "body", {}) or {}
+            notice_error_attributes = {
+                "http.statusCode": getattr(exc, "status_code", "") or "",
+                "error.message": body.get("message", "") or "",
+                "error.code": body.get("code", "") or "",
+                "error.param": body.get("param", "") or "",
+                "completion_id": chat_completion_id,
+            }
+        else:
+            organization = getattr(exc, "organization", "")
+            notice_error_attributes = {
+                "http.statusCode": getattr(exc, "http_status", ""),
+                "error.message": getattr(exc, "_message", ""),
+                "error.code": getattr(getattr(exc, "error", ""), "code", ""),
+                "error.param": getattr(exc, "param", ""),
+                "completion_id": chat_completion_id,
+            }
+        message = notice_error_attributes.pop("error.message")
+        if message:
+            exc._nr_message = message
+        self._nr_ft.notice_error(
+            attributes=notice_error_attributes,
+        )
+        self._nr_ft.__exit__(*sys.exc_info())
+        content = openai_attrs.get("content", None)
+        role = openai_attrs.get("role")
+
+        custom_attrs_dict = transaction._custom_params
+        conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
+
+        available_metadata = get_trace_linking_metadata()
+        span_id = available_metadata.get("span.id", "")
+        trace_id = available_metadata.get("trace.id", "")
 
-        # If there are no openai attrs exit early as there's no data to record.
-        if not openai_attrs:
-            raise
+        response_headers = openai_attrs.get("response_headers", {})
+        settings = transaction.settings if transaction.settings is not None else global_settings()
+        response_id = openai_attrs.get("id", None)
+        request_id = response_headers.get("x-request-id", "")
 
+        api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "")
+
+        messages = openai_attrs.get("messages", [])
+
+        chat_completion_summary_dict = {
+            "id": chat_completion_id,
+            "appName": settings.app_name,
+            "conversation_id": conversation_id,
+            "span_id": span_id,
+            "trace_id": trace_id,
+            "transaction_id": transaction.guid,
+            "api_key_last_four_digits": api_key_last_four_digits,
+            "duration": self._nr_ft.duration,
+            "request.model": openai_attrs.get("request.model", ""),
+            # Usage tokens are not supported in streaming for now.
+            "request.temperature": openai_attrs.get("temperature", ""),
+            "request.max_tokens": openai_attrs.get("max_tokens", ""),
+            "vendor": "openAI",
+            "ingest_source": "Python",
+            "response.number_of_messages": len(messages) + (1 if content else 0),
+            "response.organization": organization,
+            "error": True,
+        }
+        transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict)
+
+        output_message_list = []
+        if content:
+            output_message_list = [{"content": content, "role": role}]
+
+        return create_chat_completion_message_event(
+            transaction,
+            settings.app_name,
+            list(messages),
+            chat_completion_id,
+            span_id,
+            trace_id,
+            openai_attrs.get("response.model", ""),
+            response_id,
+            request_id,
+            conversation_id,
+            output_message_list,
+        )
+
+    def record_streaming_chat_completion_events(self, transaction, openai_attrs):
         content = openai_attrs.get("content", None)
         role = openai_attrs.get("role")
 
@@ -1113,7 +1200,7 @@ async def __anext__(self):
                         self._nr_openai_attrs["role"] = self._nr_openai_attrs.get("role", None) or delta.get("role")
                     self._nr_openai_attrs["finish_reason"] = choices[0].get("finish_reason", "")
                 self._nr_openai_attrs["response_headers"] = getattr(return_val, "_nr_response_headers", {})
-        except StopIteration as e:
+        except StopAsyncIteration as e:
             if hasattr(self, "_nr_ft"):
                 openai_attrs = getattr(self, "_nr_openai_attrs", {})
                 self._nr_ft.__exit__(None, None, None)
@@ -1122,33 +1209,118 @@ async def __anext__(self):
                 if not openai_attrs:
                     raise
 
-                message_ids = self.record_streaming_chat_completion_events(transaction)
+                message_ids = self.record_streaming_chat_completion_events(transaction, openai_attrs)
                 # Cache message ids on transaction for retrieval after open ai call completion.
                 if not hasattr(transaction, "_nr_message_ids"):
                     transaction._nr_message_ids = {}
                 response_id = openai_attrs.get("response_id", None)
                 transaction._nr_message_ids[response_id] = message_ids
             raise
-        except Exception as e:
+        except Exception as exc:
             if hasattr(self, "_nr_ft"):
                 openai_attrs = getattr(self, "_nr_openai_attrs", {})
-                self._nr_ft.__exit__(*sys.exc_info())
 
                 # If there are no openai attrs exit early as there's no data to record.
                 if not openai_attrs:
+                    self._nr_ft.__exit__(*sys.exc_info())
                     raise
 
-                self.record_streaming_chat_completion_events(transaction)
+                self.record_streaming_chat_completion_events_error(transaction, openai_attrs, exc)
             raise
         return return_val
 
-    def record_streaming_chat_completion_events(self, transaction):
-        openai_attrs = getattr(self, "_nr_openai_attrs", {})
+    def record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc):
+        chat_completion_id = str(uuid.uuid4())
+        if OPENAI_V1:
+            response = getattr(exc, "response", "")
+            response_headers = getattr(response, "headers", "")
+            organization = response_headers.get("openai-organization", "") if response_headers else ""
+            # There appears to be a bug here in openai v1 where despite having code,
+            # param, etc in the error response, they are not populated on the exception
+            # object so grab them from the response body object instead.
+            body = getattr(exc, "body", {}) or {}
+            notice_error_attributes = {
+                "http.statusCode": getattr(exc, "status_code", "") or "",
+                "error.message": body.get("message", "") or "",
+                "error.code": body.get("code", "") or "",
+                "error.param": body.get("param", "") or "",
+                "completion_id": chat_completion_id,
+            }
+        else:
+            organization = getattr(exc, "organization", "")
+            notice_error_attributes = {
+                "http.statusCode": getattr(exc, "http_status", ""),
+                "error.message": getattr(exc, "_message", ""),
+                "error.code": getattr(getattr(exc, "error", ""), "code", ""),
+                "error.param": getattr(exc, "param", ""),
+                "completion_id": chat_completion_id,
+            }
+        message = notice_error_attributes.pop("error.message")
+        if message:
+            exc._nr_message = message
+        self._nr_ft.notice_error(
+            attributes=notice_error_attributes,
+        )
+        self._nr_ft.__exit__(*sys.exc_info())
+        content = openai_attrs.get("content", None)
+        role = openai_attrs.get("role")
 
-        # If there are no openai attrs exit early as there's no data to record.
-        if not openai_attrs:
-            raise
+        custom_attrs_dict = transaction._custom_params
+        conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
+
+        available_metadata = get_trace_linking_metadata()
+        span_id = available_metadata.get("span.id", "")
+        trace_id = available_metadata.get("trace.id", "")
+
+        response_headers = openai_attrs.get("response_headers", {})
+        settings = transaction.settings if transaction.settings is not None else global_settings()
+        response_id = openai_attrs.get("id", None)
+        request_id = response_headers.get("x-request-id", "")
+
+        api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "")
+
+        messages = openai_attrs.get("messages", [])
+
+        chat_completion_summary_dict = {
+            "id": chat_completion_id,
+            "appName": settings.app_name,
+            "conversation_id": conversation_id,
+            "span_id": span_id,
+            "trace_id": trace_id,
+            "transaction_id": transaction.guid,
+            "api_key_last_four_digits": api_key_last_four_digits,
+            "duration": self._nr_ft.duration,
+            "request.model": openai_attrs.get("request.model", ""),
+            # Usage tokens are not supported in streaming for now.
+            "request.temperature": openai_attrs.get("temperature", ""),
+            "request.max_tokens": openai_attrs.get("max_tokens", ""),
+            "vendor": "openAI",
+            "ingest_source": "Python",
+            "response.number_of_messages": len(messages) + (1 if content else 0),
+            "response.organization": organization,
+            "error": True,
+        }
+        transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict)
+
+        output_message_list = []
+        if content:
+            output_message_list = [{"content": content, "role": role}]
+
+        return create_chat_completion_message_event(
+            transaction,
+            settings.app_name,
+            list(messages),
+            chat_completion_id,
+            span_id,
+            trace_id,
+            openai_attrs.get("response.model", ""),
+            response_id,
+            request_id,
+            conversation_id,
+            output_message_list,
+        )
 
+    def record_streaming_chat_completion_events(self, transaction, openai_attrs):
         content = openai_attrs.get("content", None)
         role = openai_attrs.get("role")
 
diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py
index 988dae8b1..e1ed8271d 100644
--- a/tests/mlmodel_openai/_mock_external_openai_server.py
+++ b/tests/mlmodel_openai/_mock_external_openai_server.py
@@ -47,7 +47,26 @@
             "x-request-id": "49dbbffbd3c3f4612aa48def69059ccd",
         },
         200,
-        ["Bad response"],
+        [
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [
+                    {"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": None, "finish_reason": None}
+                ],
+            },
+            {
+                "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv",
+                "object": "chat.completion.chunk",
+                "created": 1706565311,
+                "model": "gpt-3.5-turbo-0613",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": None, "finish_reason": None}],
+            },
+        ],
     ],
     "Invalid API key.": [
         {"Content-Type": "application/json; charset=utf-8", "x-request-id": "4f8f61a7d0401e42a6760ea2ca2049f6"},
@@ -643,7 +662,11 @@ def _simple_get(self):
         if stream and status_code < 400:
             for resp in response:
                 data = json.dumps(resp).encode("utf-8")
-                self.wfile.write(b"data: %s\n" % data)
+                if prompt == "Stream parsing error.":
+                    # Force a parsing error by writing an invalid streamed response.
+                    self.wfile.write(b"data: %s" % data)
+                else:
+                    self.wfile.write(b"data: %s\n" % data)
         else:
             self.wfile.write(json.dumps(response).encode("utf-8"))
         return
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py
index 8463c9819..267f97e96 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_error.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py
@@ -579,21 +579,65 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_
         )
 
 
+expected_events_stream_parsing_error = [
+    (
+        {"type": "LlmChatCompletionSummary"},
+        {
+            "id": None,  # UUID that varies with each run
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "transaction_id": "transaction-id",
+            "conversation_id": "",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "api_key_last_four_digits": "sk-CRET",
+            "duration": None,  # Response time varies each test run
+            "request.model": "gpt-3.5-turbo",
+            "response.organization": "new-relic-nkmd8b",
+            "request.temperature": 0.7,
+            "request.max_tokens": 100,
+            "response.number_of_messages": 1,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+            "error": True,
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": None,
+            "appName": "Python Agent Test (mlmodel_openai)",
+            "conversation_id": "",
+            "request_id": "",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "transaction_id": "transaction-id",
+            "content": "Stream parsing error.",
+            "role": "user",
+            "completion_id": None,
+            "response.model": "",
+            "sequence": 0,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+        },
+    ),
+]
+
+
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
-    callable_name(openai.error.AuthenticationError),
+    callable_name(openai.error.APIError),
     exact_attrs={
         "agent": {},
         "intrinsic": {},
         "user": {
-            "http.statusCode": 401,
+            "http.statusCode": 200,
         },
     },
 )
 @validate_span_events(
     exact_agents={
-        "error.message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.",
+        "error.message": 'HTTP code 200 from API ({"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", "object": "chat.completion.chunk", "created": 1706565311, "model": "gpt-3.5-turbo-0613", "system_fingerprint": null, "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": null, "finish_reason": null}]}data: {"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", "object": "chat.completion.chunk", "created": 1706565311, "model": "gpt-3.5-turbo-0613", "system_fingerprint": null, "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": null, "finish_reason": null}]})',
     }
 )
 @validate_transaction_metrics(
@@ -602,22 +646,63 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_
     rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
     background_task=True,
 )
-@validate_custom_events(expected_events_on_wrong_api_key_error)
+@validate_custom_events(expected_events_stream_parsing_error)
 @validate_custom_event_count(count=2)
 @background_task()
 def test_chat_completion_stream_parsing_error_async(loop, monkeypatch, set_trace_info):
-    # with pytest.raises(openai.error.APIError):
-    set_trace_info()
+    with pytest.raises(openai.error.APIError):
+        set_trace_info()
+
+        async def consumer():
+            generator = await openai.ChatCompletion.acreate(
+                model="gpt-3.5-turbo",
+                messages=({"role": "user", "content": "Stream parsing error."},),
+                temperature=0.7,
+                max_tokens=100,
+                stream=True,
+            )
+            async for resp in generator:
+                assert resp
+
+        loop.run_until_complete(consumer())
+
+
+@dt_enabled
+@reset_core_stats_engine()
+@validate_error_trace_attributes(
+    callable_name(openai.error.APIError),
+    exact_attrs={
+        "agent": {},
+        "intrinsic": {},
+        "user": {
+            "http.statusCode": 200,
+        },
+    },
+)
+@validate_span_events(
+    exact_agents={
+        "error.message": 'HTTP code 200 from API ({"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", "object": "chat.completion.chunk", "created": 1706565311, "model": "gpt-3.5-turbo-0613", "system_fingerprint": null, "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": null, "finish_reason": null}]}data: {"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", "object": "chat.completion.chunk", "created": 1706565311, "model": "gpt-3.5-turbo-0613", "system_fingerprint": null, "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": null, "finish_reason": null}]})',
+    }
+)
+@validate_transaction_metrics(
+    "test_chat_completion_stream_error:test_chat_completion_stream_parsing_error",
+    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
+    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
+    background_task=True,
+)
+@validate_custom_events(expected_events_stream_parsing_error)
+@validate_custom_event_count(count=2)
+@background_task()
+def test_chat_completion_stream_parsing_error(loop, monkeypatch, set_trace_info):
+    with pytest.raises(openai.error.APIError):
+        set_trace_info()
 
-    async def consumer():
-        generator = await openai.ChatCompletion.acreate(
+        generator = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
             messages=({"role": "user", "content": "Stream parsing error."},),
             temperature=0.7,
             max_tokens=100,
             stream=True,
         )
-        async for resp in generator:
+        for resp in generator:
             assert resp
-
-    loop.run_until_complete(consumer())

From 7433512b190d8e1505fd2025b76e09794e579cbb Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Mon, 12 Feb 2024 19:09:18 -0800
Subject: [PATCH 13/19] Ignore v1 tests in v0

---
 tests/mlmodel_openai/conftest.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/mlmodel_openai/conftest.py b/tests/mlmodel_openai/conftest.py
index 884ab79c6..36a0f08dd 100644
--- a/tests/mlmodel_openai/conftest.py
+++ b/tests/mlmodel_openai/conftest.py
@@ -55,17 +55,18 @@
         "test_chat_completion_v1.py",
         "test_chat_completion_error_v1.py",
         "test_embeddings_v1.py",
-        "test_get_llm_message_ids_v1.py",
-        "test_chat_completion_error_v1.py",
         "test_embeddings_error_v1.py",
+        "test_get_llm_message_ids_v1.py",
     ]
 else:
     collect_ignore = [
         "test_embeddings.py",
         "test_embeddings_error.py",
         "test_chat_completion.py",
-        "test_get_llm_message_ids.py",
         "test_chat_completion_error.py",
+        "test_chat_completion_stream.py",
+        "test_chat_completion_stream_error.py",
+        "test_get_llm_message_ids.py",
     ]
 
 

From f3cda4c761d7ac388f05aaee981ad78cf88a6e27 Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Mon, 12 Feb 2024 19:36:02 -0800
Subject: [PATCH 14/19] Refactor generator wrappers

---
 newrelic/hooks/mlmodel_openai.py | 610 +++++++++++--------------------
 1 file changed, 207 insertions(+), 403 deletions(-)

diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
index 93060a877..65de5cdf3 100644
--- a/newrelic/hooks/mlmodel_openai.py
+++ b/newrelic/hooks/mlmodel_openai.py
@@ -949,225 +949,237 @@ def __next__(self):
         return_val = None
         try:
             return_val = self.__wrapped__.__next__()
-            if return_val:
-                choices = return_val.get("choices", [])
-                self._nr_openai_attrs["response.model"] = return_val.get("model", "")
-                self._nr_openai_attrs["id"] = return_val.get("id", "")
-                self._nr_openai_attrs["response.organization"] = return_val.get("organization", "")
-                if choices:
-                    delta = choices[0].get("delta", {})
-                    if delta:
-                        self._nr_openai_attrs["content"] = self._nr_openai_attrs.get("content", "") + delta.get(
-                            "content", ""
-                        )
-                        self._nr_openai_attrs["role"] = self._nr_openai_attrs.get("role", None) or delta.get("role")
-                    self._nr_openai_attrs["finish_reason"] = choices[0].get("finish_reason", "")
-                self._nr_openai_attrs["response_headers"] = getattr(return_val, "_nr_response_headers", {})
+            record_stream_chunk(self, return_val)
         except StopIteration as e:
-            if hasattr(self, "_nr_ft"):
-                openai_attrs = getattr(self, "_nr_openai_attrs", {})
-                self._nr_ft.__exit__(None, None, None)
-
-                # If there are no openai attrs exit early as there's no data to record.
-                if not openai_attrs:
-                    raise
-
-                message_ids = self.record_streaming_chat_completion_events(transaction, openai_attrs)
-                # Cache message ids on transaction for retrieval after open ai call completion.
-                if not hasattr(transaction, "_nr_message_ids"):
-                    transaction._nr_message_ids = {}
-                response_id = openai_attrs.get("response_id", None)
-                transaction._nr_message_ids[response_id] = message_ids
+            record_events_on_stop_iteration(self, transaction)
             raise
         except Exception as exc:
-            if hasattr(self, "_nr_ft"):
-                openai_attrs = getattr(self, "_nr_openai_attrs", {})
+            record_error(self, transaction, exc)
+            raise
+        return return_val
+
+    def close(self):
+        return super(GeneratorProxy, self).close()
 
-                # If there are no openai attrs exit early as there's no data to record.
-                if not openai_attrs:
-                    self._nr_ft.__exit__(*sys.exc_info())
-                    raise
 
-                self.record_streaming_chat_completion_events_error(transaction, openai_attrs, exc)
+def record_stream_chunk(self, return_val):
+    if return_val:
+        choices = return_val.get("choices", [])
+        self._nr_openai_attrs["response.model"] = return_val.get("model", "")
+        self._nr_openai_attrs["id"] = return_val.get("id", "")
+        self._nr_openai_attrs["response.organization"] = return_val.get("organization", "")
+        if choices:
+            delta = choices[0].get("delta", {})
+            if delta:
+                self._nr_openai_attrs["content"] = self._nr_openai_attrs.get("content", "") + delta.get("content", "")
+                self._nr_openai_attrs["role"] = self._nr_openai_attrs.get("role", None) or delta.get("role")
+            self._nr_openai_attrs["finish_reason"] = choices[0].get("finish_reason", "")
+        self._nr_openai_attrs["response_headers"] = getattr(return_val, "_nr_response_headers", {})
+
+
+def record_events_on_stop_iteration(self, transaction):
+    if hasattr(self, "_nr_ft"):
+        openai_attrs = getattr(self, "_nr_openai_attrs", {})
+        self._nr_ft.__exit__(None, None, None)
+
+        # If there are no openai attrs exit early as there's no data to record.
+        if not openai_attrs:
             raise
-        return return_val
 
-    def record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc):
-        chat_completion_id = str(uuid.uuid4())
-        if OPENAI_V1:
-            response = getattr(exc, "response", "")
-            response_headers = getattr(response, "headers", "")
-            organization = response_headers.get("openai-organization", "") if response_headers else ""
-            # There appears to be a bug here in openai v1 where despite having code,
-            # param, etc in the error response, they are not populated on the exception
-            # object so grab them from the response body object instead.
-            body = getattr(exc, "body", {}) or {}
-            notice_error_attributes = {
-                "http.statusCode": getattr(exc, "status_code", "") or "",
-                "error.message": body.get("message", "") or "",
-                "error.code": body.get("code", "") or "",
-                "error.param": body.get("param", "") or "",
-                "completion_id": chat_completion_id,
-            }
-        else:
-            organization = getattr(exc, "organization", "")
-            notice_error_attributes = {
-                "http.statusCode": getattr(exc, "http_status", ""),
-                "error.message": getattr(exc, "_message", ""),
-                "error.code": getattr(getattr(exc, "error", ""), "code", ""),
-                "error.param": getattr(exc, "param", ""),
-                "completion_id": chat_completion_id,
-            }
-        message = notice_error_attributes.pop("error.message")
-        if message:
-            exc._nr_message = message
-        self._nr_ft.notice_error(
-            attributes=notice_error_attributes,
-        )
-        self._nr_ft.__exit__(*sys.exc_info())
-        content = openai_attrs.get("content", None)
-        role = openai_attrs.get("role")
+        message_ids = record_streaming_chat_completion_events(self, transaction, openai_attrs)
+        # Cache message ids on transaction for retrieval after open ai call completion.
+        if not hasattr(transaction, "_nr_message_ids"):
+            transaction._nr_message_ids = {}
+        response_id = openai_attrs.get("response_id", None)
+        transaction._nr_message_ids[response_id] = message_ids
 
-        custom_attrs_dict = transaction._custom_params
-        conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
 
-        available_metadata = get_trace_linking_metadata()
-        span_id = available_metadata.get("span.id", "")
-        trace_id = available_metadata.get("trace.id", "")
+def record_error(self, transaction, exc):
+    if hasattr(self, "_nr_ft"):
+        openai_attrs = getattr(self, "_nr_openai_attrs", {})
 
-        response_headers = openai_attrs.get("response_headers", {})
-        settings = transaction.settings if transaction.settings is not None else global_settings()
-        response_id = openai_attrs.get("id", None)
-        request_id = response_headers.get("x-request-id", "")
+        # If there are no openai attrs exit early as there's no data to record.
+        if not openai_attrs:
+            self._nr_ft.__exit__(*sys.exc_info())
+            raise
 
-        api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "")
+        record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc)
 
-        messages = openai_attrs.get("messages", [])
 
-        chat_completion_summary_dict = {
-            "id": chat_completion_id,
-            "appName": settings.app_name,
-            "conversation_id": conversation_id,
-            "span_id": span_id,
-            "trace_id": trace_id,
-            "transaction_id": transaction.guid,
-            "api_key_last_four_digits": api_key_last_four_digits,
-            "duration": self._nr_ft.duration,
-            "request.model": openai_attrs.get("request.model", ""),
-            # Usage tokens are not supported in streaming for now.
-            "request.temperature": openai_attrs.get("temperature", ""),
-            "request.max_tokens": openai_attrs.get("max_tokens", ""),
-            "vendor": "openAI",
-            "ingest_source": "Python",
-            "response.number_of_messages": len(messages) + (1 if content else 0),
-            "response.organization": organization,
-            "error": True,
+def record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc):
+    chat_completion_id = str(uuid.uuid4())
+    if OPENAI_V1:
+        response = getattr(exc, "response", "")
+        response_headers = getattr(response, "headers", "")
+        organization = response_headers.get("openai-organization", "") if response_headers else ""
+        # There appears to be a bug here in openai v1 where despite having code,
+        # param, etc in the error response, they are not populated on the exception
+        # object so grab them from the response body object instead.
+        body = getattr(exc, "body", {}) or {}
+        notice_error_attributes = {
+            "http.statusCode": getattr(exc, "status_code", "") or "",
+            "error.message": body.get("message", "") or "",
+            "error.code": body.get("code", "") or "",
+            "error.param": body.get("param", "") or "",
+            "completion_id": chat_completion_id,
         }
-        transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict)
+    else:
+        organization = getattr(exc, "organization", "")
+        notice_error_attributes = {
+            "http.statusCode": getattr(exc, "http_status", ""),
+            "error.message": getattr(exc, "_message", ""),
+            "error.code": getattr(getattr(exc, "error", ""), "code", ""),
+            "error.param": getattr(exc, "param", ""),
+            "completion_id": chat_completion_id,
+        }
+    message = notice_error_attributes.pop("error.message")
+    if message:
+        exc._nr_message = message
+    self._nr_ft.notice_error(
+        attributes=notice_error_attributes,
+    )
+    self._nr_ft.__exit__(*sys.exc_info())
+    content = openai_attrs.get("content", None)
+    role = openai_attrs.get("role")
 
-        output_message_list = []
-        if content:
-            output_message_list = [{"content": content, "role": role}]
+    custom_attrs_dict = transaction._custom_params
+    conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
 
-        return create_chat_completion_message_event(
-            transaction,
-            settings.app_name,
-            list(messages),
-            chat_completion_id,
-            span_id,
-            trace_id,
-            openai_attrs.get("response.model", ""),
-            response_id,
-            request_id,
-            conversation_id,
-            output_message_list,
-        )
+    available_metadata = get_trace_linking_metadata()
+    span_id = available_metadata.get("span.id", "")
+    trace_id = available_metadata.get("trace.id", "")
 
-    def record_streaming_chat_completion_events(self, transaction, openai_attrs):
-        content = openai_attrs.get("content", None)
-        role = openai_attrs.get("role")
+    response_headers = openai_attrs.get("response_headers", {})
+    settings = transaction.settings if transaction.settings is not None else global_settings()
+    response_id = openai_attrs.get("id", None)
+    request_id = response_headers.get("x-request-id", "")
 
-        custom_attrs_dict = transaction._custom_params
-        conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
+    api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "")
 
-        chat_completion_id = str(uuid.uuid4())
-        available_metadata = get_trace_linking_metadata()
-        span_id = available_metadata.get("span.id", "")
-        trace_id = available_metadata.get("trace.id", "")
+    messages = openai_attrs.get("messages", [])
 
-        response_headers = openai_attrs.get("response_headers", {})
-        settings = transaction.settings if transaction.settings is not None else global_settings()
-        response_id = openai_attrs.get("id", None)
-        request_id = response_headers.get("x-request-id", "")
-        organization = response_headers.get("openai-organization", "")
+    chat_completion_summary_dict = {
+        "id": chat_completion_id,
+        "appName": settings.app_name,
+        "conversation_id": conversation_id,
+        "span_id": span_id,
+        "trace_id": trace_id,
+        "transaction_id": transaction.guid,
+        "api_key_last_four_digits": api_key_last_four_digits,
+        "duration": self._nr_ft.duration,
+        "request.model": openai_attrs.get("request.model", ""),
+        # Usage tokens are not supported in streaming for now.
+        "request.temperature": openai_attrs.get("temperature", ""),
+        "request.max_tokens": openai_attrs.get("max_tokens", ""),
+        "vendor": "openAI",
+        "ingest_source": "Python",
+        "response.number_of_messages": len(messages) + (1 if content else 0),
+        "response.organization": organization,
+        "error": True,
+    }
+    transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict)
 
-        api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "")
+    output_message_list = []
+    if content:
+        output_message_list = [{"content": content, "role": role}]
 
-        messages = openai_attrs.get("messages", [])
+    return create_chat_completion_message_event(
+        transaction,
+        settings.app_name,
+        list(messages),
+        chat_completion_id,
+        span_id,
+        trace_id,
+        openai_attrs.get("response.model", ""),
+        response_id,
+        request_id,
+        conversation_id,
+        output_message_list,
+    )
 
-        chat_completion_summary_dict = {
-            "id": chat_completion_id,
-            "appName": settings.app_name,
-            "conversation_id": conversation_id,
-            "span_id": span_id,
-            "trace_id": trace_id,
-            "transaction_id": transaction.guid,
-            "request_id": request_id,
-            "api_key_last_four_digits": api_key_last_four_digits,
-            "duration": self._nr_ft.duration,
-            "request.model": openai_attrs.get("request.model", ""),
-            "response.model": openai_attrs.get("response.model", ""),
-            "response.organization": organization,
-            # Usage tokens are not supported in streaming for now.
-            "request.temperature": openai_attrs.get("temperature", ""),
-            "request.max_tokens": openai_attrs.get("max_tokens", ""),
-            "response.choices.finish_reason": openai_attrs.get("finish_reason", ""),
-            "response.headers.llmVersion": response_headers.get("openai-version", ""),
-            "response.headers.ratelimitLimitRequests": check_rate_limit_header(
-                response_headers, "x-ratelimit-limit-requests", True
-            ),
-            "response.headers.ratelimitLimitTokens": check_rate_limit_header(
-                response_headers, "x-ratelimit-limit-tokens", True
-            ),
-            "response.headers.ratelimitResetTokens": check_rate_limit_header(
-                response_headers, "x-ratelimit-reset-tokens", False
-            ),
-            "response.headers.ratelimitResetRequests": check_rate_limit_header(
-                response_headers, "x-ratelimit-reset-requests", False
-            ),
-            "response.headers.ratelimitRemainingTokens": check_rate_limit_header(
-                response_headers, "x-ratelimit-remaining-tokens", True
-            ),
-            "response.headers.ratelimitRemainingRequests": check_rate_limit_header(
-                response_headers, "x-ratelimit-remaining-requests", True
-            ),
-            "vendor": "openAI",
-            "ingest_source": "Python",
-            "response.number_of_messages": len(messages) + (1 if content else 0),
-        }
 
-        transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict)
+def record_streaming_chat_completion_events(self, transaction, openai_attrs):
+    content = openai_attrs.get("content", None)
+    role = openai_attrs.get("role")
 
-        output_message_list = []
-        if content:
-            output_message_list = [{"content": content, "role": role}]
+    custom_attrs_dict = transaction._custom_params
+    conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
 
-        return create_chat_completion_message_event(
-            transaction,
-            settings.app_name,
-            list(messages),
-            chat_completion_id,
-            span_id,
-            trace_id,
-            openai_attrs.get("response.model", ""),
-            response_id,
-            request_id,
-            conversation_id,
-            output_message_list,
-        )
+    chat_completion_id = str(uuid.uuid4())
+    available_metadata = get_trace_linking_metadata()
+    span_id = available_metadata.get("span.id", "")
+    trace_id = available_metadata.get("trace.id", "")
 
-    def close(self):
-        return super(GeneratorProxy, self).close()
+    response_headers = openai_attrs.get("response_headers", {})
+    settings = transaction.settings if transaction.settings is not None else global_settings()
+    response_id = openai_attrs.get("id", None)
+    request_id = response_headers.get("x-request-id", "")
+    organization = response_headers.get("openai-organization", "")
+
+    api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "")
+
+    messages = openai_attrs.get("messages", [])
+
+    chat_completion_summary_dict = {
+        "id": chat_completion_id,
+        "appName": settings.app_name,
+        "conversation_id": conversation_id,
+        "span_id": span_id,
+        "trace_id": trace_id,
+        "transaction_id": transaction.guid,
+        "request_id": request_id,
+        "api_key_last_four_digits": api_key_last_four_digits,
+        "duration": self._nr_ft.duration,
+        "request.model": openai_attrs.get("request.model", ""),
+        "response.model": openai_attrs.get("response.model", ""),
+        "response.organization": organization,
+        # Usage tokens are not supported in streaming for now.
+        "request.temperature": openai_attrs.get("temperature", ""),
+        "request.max_tokens": openai_attrs.get("max_tokens", ""),
+        "response.choices.finish_reason": openai_attrs.get("finish_reason", ""),
+        "response.headers.llmVersion": response_headers.get("openai-version", ""),
+        "response.headers.ratelimitLimitRequests": check_rate_limit_header(
+            response_headers, "x-ratelimit-limit-requests", True
+        ),
+        "response.headers.ratelimitLimitTokens": check_rate_limit_header(
+            response_headers, "x-ratelimit-limit-tokens", True
+        ),
+        "response.headers.ratelimitResetTokens": check_rate_limit_header(
+            response_headers, "x-ratelimit-reset-tokens", False
+        ),
+        "response.headers.ratelimitResetRequests": check_rate_limit_header(
+            response_headers, "x-ratelimit-reset-requests", False
+        ),
+        "response.headers.ratelimitRemainingTokens": check_rate_limit_header(
+            response_headers, "x-ratelimit-remaining-tokens", True
+        ),
+        "response.headers.ratelimitRemainingRequests": check_rate_limit_header(
+            response_headers, "x-ratelimit-remaining-requests", True
+        ),
+        "vendor": "openAI",
+        "ingest_source": "Python",
+        "response.number_of_messages": len(messages) + (1 if content else 0),
+    }
+
+    transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict)
+
+    output_message_list = []
+    if content:
+        output_message_list = [{"content": content, "role": role}]
+
+    return create_chat_completion_message_event(
+        transaction,
+        settings.app_name,
+        list(messages),
+        chat_completion_id,
+        span_id,
+        trace_id,
+        openai_attrs.get("response.model", ""),
+        response_id,
+        request_id,
+        conversation_id,
+        output_message_list,
+    )
 
 
 class AsyncGeneratorProxy(ObjectProxy):
@@ -1186,223 +1198,15 @@ async def __anext__(self):
         return_val = None
         try:
             return_val = await self._nr_wrapped_iter.__anext__()
-            if return_val:
-                choices = return_val.get("choices", [])
-                self._nr_openai_attrs["response.model"] = return_val.get("model", "")
-                self._nr_openai_attrs["id"] = return_val.get("id", "")
-                self._nr_openai_attrs["response.organization"] = return_val.get("organization", "")
-                if choices:
-                    delta = choices[0].get("delta", {})
-                    if delta:
-                        self._nr_openai_attrs["content"] = self._nr_openai_attrs.get("content", "") + delta.get(
-                            "content", ""
-                        )
-                        self._nr_openai_attrs["role"] = self._nr_openai_attrs.get("role", None) or delta.get("role")
-                    self._nr_openai_attrs["finish_reason"] = choices[0].get("finish_reason", "")
-                self._nr_openai_attrs["response_headers"] = getattr(return_val, "_nr_response_headers", {})
+            record_stream_chunk(self, return_val)
         except StopAsyncIteration as e:
-            if hasattr(self, "_nr_ft"):
-                openai_attrs = getattr(self, "_nr_openai_attrs", {})
-                self._nr_ft.__exit__(None, None, None)
-
-                # If there are no openai attrs exit early as there's no data to record.
-                if not openai_attrs:
-                    raise
-
-                message_ids = self.record_streaming_chat_completion_events(transaction, openai_attrs)
-                # Cache message ids on transaction for retrieval after open ai call completion.
-                if not hasattr(transaction, "_nr_message_ids"):
-                    transaction._nr_message_ids = {}
-                response_id = openai_attrs.get("response_id", None)
-                transaction._nr_message_ids[response_id] = message_ids
+            record_events_on_stop_iteration(self, transaction)
             raise
         except Exception as exc:
-            if hasattr(self, "_nr_ft"):
-                openai_attrs = getattr(self, "_nr_openai_attrs", {})
-
-                # If there are no openai attrs exit early as there's no data to record.
-                if not openai_attrs:
-                    self._nr_ft.__exit__(*sys.exc_info())
-                    raise
-
-                self.record_streaming_chat_completion_events_error(transaction, openai_attrs, exc)
+            record_error(self, transaction, exc)
             raise
         return return_val
 
-    def record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc):
-        chat_completion_id = str(uuid.uuid4())
-        if OPENAI_V1:
-            response = getattr(exc, "response", "")
-            response_headers = getattr(response, "headers", "")
-            organization = response_headers.get("openai-organization", "") if response_headers else ""
-            # There appears to be a bug here in openai v1 where despite having code,
-            # param, etc in the error response, they are not populated on the exception
-            # object so grab them from the response body object instead.
-            body = getattr(exc, "body", {}) or {}
-            notice_error_attributes = {
-                "http.statusCode": getattr(exc, "status_code", "") or "",
-                "error.message": body.get("message", "") or "",
-                "error.code": body.get("code", "") or "",
-                "error.param": body.get("param", "") or "",
-                "completion_id": chat_completion_id,
-            }
-        else:
-            organization = getattr(exc, "organization", "")
-            notice_error_attributes = {
-                "http.statusCode": getattr(exc, "http_status", ""),
-                "error.message": getattr(exc, "_message", ""),
-                "error.code": getattr(getattr(exc, "error", ""), "code", ""),
-                "error.param": getattr(exc, "param", ""),
-                "completion_id": chat_completion_id,
-            }
-        message = notice_error_attributes.pop("error.message")
-        if message:
-            exc._nr_message = message
-        self._nr_ft.notice_error(
-            attributes=notice_error_attributes,
-        )
-        self._nr_ft.__exit__(*sys.exc_info())
-        content = openai_attrs.get("content", None)
-        role = openai_attrs.get("role")
-
-        custom_attrs_dict = transaction._custom_params
-        conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
-
-        available_metadata = get_trace_linking_metadata()
-        span_id = available_metadata.get("span.id", "")
-        trace_id = available_metadata.get("trace.id", "")
-
-        response_headers = openai_attrs.get("response_headers", {})
-        settings = transaction.settings if transaction.settings is not None else global_settings()
-        response_id = openai_attrs.get("id", None)
-        request_id = response_headers.get("x-request-id", "")
-
-        api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "")
-
-        messages = openai_attrs.get("messages", [])
-
-        chat_completion_summary_dict = {
-            "id": chat_completion_id,
-            "appName": settings.app_name,
-            "conversation_id": conversation_id,
-            "span_id": span_id,
-            "trace_id": trace_id,
-            "transaction_id": transaction.guid,
-            "api_key_last_four_digits": api_key_last_four_digits,
-            "duration": self._nr_ft.duration,
-            "request.model": openai_attrs.get("request.model", ""),
-            # Usage tokens are not supported in streaming for now.
-            "request.temperature": openai_attrs.get("temperature", ""),
-            "request.max_tokens": openai_attrs.get("max_tokens", ""),
-            "vendor": "openAI",
-            "ingest_source": "Python",
-            "response.number_of_messages": len(messages) + (1 if content else 0),
-            "response.organization": organization,
-            "error": True,
-        }
-        transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict)
-
-        output_message_list = []
-        if content:
-            output_message_list = [{"content": content, "role": role}]
-
-        return create_chat_completion_message_event(
-            transaction,
-            settings.app_name,
-            list(messages),
-            chat_completion_id,
-            span_id,
-            trace_id,
-            openai_attrs.get("response.model", ""),
-            response_id,
-            request_id,
-            conversation_id,
-            output_message_list,
-        )
-
-    def record_streaming_chat_completion_events(self, transaction, openai_attrs):
-        content = openai_attrs.get("content", None)
-        role = openai_attrs.get("role")
-
-        custom_attrs_dict = transaction._custom_params
-        conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
-
-        chat_completion_id = str(uuid.uuid4())
-        available_metadata = get_trace_linking_metadata()
-        span_id = available_metadata.get("span.id", "")
-        trace_id = available_metadata.get("trace.id", "")
-
-        response_headers = openai_attrs.get("response_headers", {})
-        settings = transaction.settings if transaction.settings is not None else global_settings()
-        response_id = openai_attrs.get("id", None)
-        request_id = response_headers.get("x-request-id", "")
-        organization = response_headers.get("openai-organization", "")
-
-        api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "")
-
-        messages = openai_attrs.get("messages", [])
-
-        chat_completion_summary_dict = {
-            "id": chat_completion_id,
-            "appName": settings.app_name,
-            "conversation_id": conversation_id,
-            "span_id": span_id,
-            "trace_id": trace_id,
-            "transaction_id": transaction.guid,
-            "request_id": request_id,
-            "api_key_last_four_digits": api_key_last_four_digits,
-            "duration": self._nr_ft.duration,
-            "request.model": openai_attrs.get("request.model", ""),
-            "response.model": openai_attrs.get("response.model", ""),
-            "response.organization": organization,
-            # Usage tokens are not supported in streaming for now.
-            "request.temperature": openai_attrs.get("temperature", ""),
-            "request.max_tokens": openai_attrs.get("max_tokens", ""),
-            "response.choices.finish_reason": openai_attrs.get("finish_reason", ""),
-            "response.headers.llmVersion": response_headers.get("openai-version", ""),
-            "response.headers.ratelimitLimitRequests": check_rate_limit_header(
-                response_headers, "x-ratelimit-limit-requests", True
-            ),
-            "response.headers.ratelimitLimitTokens": check_rate_limit_header(
-                response_headers, "x-ratelimit-limit-tokens", True
-            ),
-            "response.headers.ratelimitResetTokens": check_rate_limit_header(
-                response_headers, "x-ratelimit-reset-tokens", False
-            ),
-            "response.headers.ratelimitResetRequests": check_rate_limit_header(
-                response_headers, "x-ratelimit-reset-requests", False
-            ),
-            "response.headers.ratelimitRemainingTokens": check_rate_limit_header(
-                response_headers, "x-ratelimit-remaining-tokens", True
-            ),
-            "response.headers.ratelimitRemainingRequests": check_rate_limit_header(
-                response_headers, "x-ratelimit-remaining-requests", True
-            ),
-            "vendor": "openAI",
-            "ingest_source": "Python",
-            "response.number_of_messages": len(messages) + (1 if content else 0),
-        }
-
-        transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict)
-
-        output_message_list = []
-        if content:
-            output_message_list = [{"content": content, "role": role}]
-
-        return create_chat_completion_message_event(
-            transaction,
-            settings.app_name,
-            list(messages),
-            chat_completion_id,
-            span_id,
-            trace_id,
-            openai_attrs.get("response.model", ""),
-            response_id,
-            request_id,
-            conversation_id,
-            output_message_list,
-        )
-
     async def aclose(self):
         return await super(AsyncGeneratorProxy, self).aclose()
 

From 88babfef2fd2592efacfc142e1cb0bbc0d36e109 Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Tue, 13 Feb 2024 08:36:10 -0800
Subject: [PATCH 15/19] Fixup: tox merge conflicts

---
 tox.ini | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tox.ini b/tox.ini
index c58e8d0e3..969f98548 100644
--- a/tox.ini
+++ b/tox.ini
@@ -207,7 +207,6 @@ deps =
     component_flask_rest: flask-restful
     component_flask_rest: jinja2
     component_flask_rest: itsdangerous
-    component_flask_rest-flaskrestxlatest: flask
     component_flask_rest-flaskrestxlatest: flask-restx
     component_flask_rest-flaskrestxlatest: flask
     ; flask-restx only supports Flask v3 after flask-restx v1.3.0
@@ -402,7 +401,7 @@ commands =
 allowlist_externals={toxinidir}/.github/scripts/*
 
 install_command=
-    pip install {opts} {packages}
+    {toxinidir}/.github/scripts/retry.sh 3 pip install {opts} {packages}
 
 extras =
     agent_streaming: infinite-tracing

From 82b8e3257a515e0fd2640cb4c530c6be75ac539e Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Tue, 13 Feb 2024 10:32:18 -0800
Subject: [PATCH 16/19] Use fixture

---
 tests/mlmodel_openai/conftest.py | 103 +++++++++++++++++--------------
 1 file changed, 56 insertions(+), 47 deletions(-)

diff --git a/tests/mlmodel_openai/conftest.py b/tests/mlmodel_openai/conftest.py
index 36a0f08dd..976ba7875 100644
--- a/tests/mlmodel_openai/conftest.py
+++ b/tests/mlmodel_openai/conftest.py
@@ -150,6 +150,7 @@ def openai_server(
     wrap_openai_api_requestor_request,
     wrap_openai_api_requestor_interpret_response,
     wrap_httpx_client_send,
+    wrap_engine_api_resource_create,
 ):
     """
     This fixture will either create a mocked backend for testing purposes, or will
@@ -283,60 +284,68 @@ def bind_request_interpret_response_params(result, stream):
     return result.content.decode("utf-8"), result.status_code, result.headers
 
 
-class GeneratorProxy(ObjectProxy):
-    def __init__(self, wrapped):
-        super(GeneratorProxy, self).__init__(wrapped)
+@pytest.fixture(scope="session")
+def generator_proxy():
+    class GeneratorProxy(ObjectProxy):
+        def __init__(self, wrapped):
+            super(GeneratorProxy, self).__init__(wrapped)
+
+        def __iter__(self):
+            return self
+
+        # Make this Proxy a pass through to our instrumentation's proxy by passing along
+        # get attr and set attr calls to our instrumentation's proxy.
+        def __getattr__(self, attr):
+            return self.__wrapped__.__getattr__(attr)
+
+        def __setattr__(self, attr, value):
+            return self.__wrapped__.__setattr__(attr, value)
+
+        def __next__(self):
+            transaction = current_transaction()
+            if not transaction:
+                return self.__wrapped__.__next__()
+
+            try:
+                return_val = self.__wrapped__.__next__()
+                if return_val:
+                    prompt = [k for k in OPENAI_AUDIT_LOG_CONTENTS.keys()][-1]
+                    headers = dict(
+                        filter(
+                            lambda k: k[0].lower() in RECORDED_HEADERS
+                            or k[0].lower().startswith("openai")
+                            or k[0].lower().startswith("x-ratelimit"),
+                            return_val._nr_response_headers.items(),
+                        )
+                    )
+                    OPENAI_AUDIT_LOG_CONTENTS[prompt][0] = headers
+                    OPENAI_AUDIT_LOG_CONTENTS[prompt][2].append(return_val.to_dict_recursive())
+                return return_val
+            except Exception as e:
+                raise
 
-    def __iter__(self):
-        return self
+        def close(self):
+            return super(GeneratorProxy, self).close()
 
-    # Make this Proxy a pass through to our instrumentation's proxy by passing along
-    # get attr and set attr calls to our instrumentation's proxy.
-    def __getattr__(self, attr):
-        return self.__wrapped__.__getattr__(attr)
+    return GeneratorProxy
 
-    def __setattr__(self, attr, value):
-        return self.__wrapped__.__setattr__(attr, value)
 
-    def __next__(self):
+@pytest.fixture(scope="session")
+def wrap_engine_api_resource_create(generator_proxy):
+    def _wrap_engine_api_resource_create(wrapped, instance, args, kwargs):
         transaction = current_transaction()
-        if not transaction:
-            return self.__wrapped__.__next__()
-
-        try:
-            return_val = self.__wrapped__.__next__()
-            if return_val:
-                prompt = [k for k in OPENAI_AUDIT_LOG_CONTENTS.keys()][-1]
-                headers = dict(
-                    filter(
-                        lambda k: k[0].lower() in RECORDED_HEADERS
-                        or k[0].lower().startswith("openai")
-                        or k[0].lower().startswith("x-ratelimit"),
-                        return_val._nr_response_headers.items(),
-                    )
-                )
-                OPENAI_AUDIT_LOG_CONTENTS[prompt][0] = headers
-                OPENAI_AUDIT_LOG_CONTENTS[prompt][2].append(return_val.to_dict_recursive())
-            return return_val
-        except Exception as e:
-            raise
-
-    def close(self):
-        return super(GeneratorProxy, self).close()
 
+        if not transaction:
+            return wrapped(*args, **kwargs)
 
-def wrap_engine_api_resource_create(wrapped, instance, args, kwargs):
-    transaction = current_transaction()
-
-    if not transaction:
-        return wrapped(*args, **kwargs)
+        bound_args = bind_args(wrapped, args, kwargs)
+        stream = bound_args["params"].get("stream", False)
 
-    bound_args = bind_args(wrapped, args, kwargs)
-    stream = bound_args["params"].get("stream", False)
+        return_val = wrapped(*args, **kwargs)
 
-    return_val = wrapped(*args, **kwargs)
+        if stream:
+            return generator_proxy(return_val)
+        else:
+            return return_val
 
-    if stream:
-        return GeneratorProxy(return_val)
-    else:
-        return return_val
+    return _wrap_engine_api_resource_create

From b38b0675176c5ae34984e79c7b948bc44c6085e2 Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Tue, 13 Feb 2024 10:39:04 -0800
Subject: [PATCH 17/19] Remove 3.8 from langchain tests

---
 tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 969f98548..acc0c41bd 100644
--- a/tox.ini
+++ b/tox.ini
@@ -145,7 +145,7 @@ envlist =
     python-mlmodel_openai-openai107-{py312},
     python-mlmodel_openai-openailatest-{py37,py38,py39,py310,py311,py312,pypy38},
     ; langchain dependency faiss-cpu isn't compatible with 3.12 yet.
-    python-mlmodel_langchain-{py38,py39,py310,py311,pypy38},
+    python-mlmodel_langchain-{py39,py310,py311},
     python-logger_logging-{py27,py37,py38,py39,py310,py311,py312,pypy27,pypy38},
     python-logger_loguru-{py37,py38,py39,py310,py311,py312,pypy38}-logurulatest,
     python-logger_loguru-py39-loguru{06,05},

From 974e5646f8c576b9bd4db255c30d0e8ab2009e44 Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Wed, 14 Feb 2024 16:47:02 -0800
Subject: [PATCH 18/19] Add empty line between lines

---
 tests/mlmodel_openai/_mock_external_openai_server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py
index e1ed8271d..17ce71ada 100644
--- a/tests/mlmodel_openai/_mock_external_openai_server.py
+++ b/tests/mlmodel_openai/_mock_external_openai_server.py
@@ -666,7 +666,7 @@ def _simple_get(self):
                     # Force a parsing error by writing an invalid streamed response.
                     self.wfile.write(b"data: %s" % data)
                 else:
-                    self.wfile.write(b"data: %s\n" % data)
+                    self.wfile.write(b"data: %s\n\n" % data)
         else:
             self.wfile.write(json.dumps(response).encode("utf-8"))
         return

From d61d0655d93daf23e1abfec14fb414cef415e257 Mon Sep 17 00:00:00 2001
From: Hannah Stepanek <hstepanek@newrelic.com>
Date: Thu, 15 Feb 2024 09:01:06 -0800
Subject: [PATCH 19/19] Remove unneeded loop fixture

---
 tests/mlmodel_openai/test_chat_completion_stream_error.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py
index 267f97e96..15fb1512d 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_error.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py
@@ -693,7 +693,7 @@ async def consumer():
 @validate_custom_events(expected_events_stream_parsing_error)
 @validate_custom_event_count(count=2)
 @background_task()
-def test_chat_completion_stream_parsing_error(loop, monkeypatch, set_trace_info):
+def test_chat_completion_stream_parsing_error(monkeypatch, set_trace_info):
     with pytest.raises(openai.error.APIError):
         set_trace_info()