Skip to content

Commit

Permalink
Add openai sync instrumentation.
Browse files Browse the repository at this point in the history
  • Loading branch information
umaannamalai committed Oct 10, 2023
1 parent 2834663 commit dd29433
Show file tree
Hide file tree
Showing 4 changed files with 318 additions and 4 deletions.
10 changes: 10 additions & 0 deletions newrelic/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2037,6 +2037,16 @@ def _process_trace_cache_import_hooks():


def _process_module_builtin_defaults():
_process_module_definition(
"openai.api_resources.chat_completion",
"newrelic.hooks.mlmodel_openai",
"instrument_openai_api_resources_chat_completion",
)
_process_module_definition(
"openai.util",
"newrelic.hooks.mlmodel_openai",
"instrument_openai_util",
)
_process_module_definition(
"asyncio.base_events",
"newrelic.hooks.coroutines_asyncio",
Expand Down
138 changes: 138 additions & 0 deletions newrelic/hooks/mlmodel_openai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@

# Copyright 2010 New Relic, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import openai
import uuid
from newrelic.api.function_trace import FunctionTrace
from newrelic.common.object_wrapper import wrap_function_wrapper
from newrelic.api.transaction import current_transaction
from newrelic.api.time_trace import get_trace_linking_metadata
from newrelic.core.config import global_settings
from newrelic.common.object_names import callable_name
from newrelic.core.attribute import MAX_LOG_MESSAGE_LENGTH


def wrap_chat_completion_create(wrapped, instance, args, kwargs):
transaction = current_transaction()

if not transaction:
return

ft_name = callable_name(wrapped)
with FunctionTrace(ft_name) as ft:
response = wrapped(*args, **kwargs)

if not response:
return

custom_attrs_dict = transaction._custom_params
conversation_id = custom_attrs_dict["conversation_id"] if "conversation_id" in custom_attrs_dict.keys() else str(uuid.uuid4())

chat_completion_id = str(uuid.uuid4())
available_metadata = get_trace_linking_metadata()
span_id = available_metadata.get("span.id", "")
trace_id = available_metadata.get("trace.id", "")

response_headers = getattr(response, "_nr_response_headers")
response_model = response.model
settings = transaction.settings if transaction.settings is not None else global_settings()

chat_completion_summary_dict = {
"id": chat_completion_id,
"appName": settings.app_name,
"conversation_id": conversation_id,
"span_id": span_id,
"trace_id": trace_id,
"transaction_id": transaction._transaction_id,
"api_key_last_four_digits": f"sk-{response.api_key[-4:]}",
"response_time": ft.duration,
"request.model": kwargs.get("model") or kwargs.get("engine"),
"response.model": response_model,
"response.organization": response.organization,
"response.usage.completion_tokens": response.usage.completion_tokens,
"response.usage.total_tokens": response.usage.total_tokens,
"response.usage.prompt_tokens": response.usage.prompt_tokens,
"request.temperature": kwargs.get("temperature"),
"request.max_tokens": kwargs.get("max_tokens"),
"response.choices.finish_reason": response.choices[0].finish_reason,
"response.api_type": response.api_type,
"response.headers.llmVersion": response_headers.get("openai-version"),
"response.headers.ratelimitLimitRequests": check_rate_limit_header(response_headers, "x-ratelimit-limit-requests", True),
"response.headers.ratelimitLimitTokens": check_rate_limit_header(response_headers, "x-ratelimit-limit-tokens", True),
"response.headers.ratelimitResetTokens": check_rate_limit_header(response_headers, "x-ratelimit-reset-tokens", False),
"response.headers.ratelimitResetRequests": check_rate_limit_header(response_headers, "x-ratelimit-reset-requests", False),
"response.headers.ratelimitRemainingTokens": check_rate_limit_header(response_headers, "x-ratelimit-remaining-tokens", True),
"response.headers.ratelimitRemainingRequests": check_rate_limit_header(response_headers, "x-ratelimit-remaining-requests", True),
"vendor": "openAI",
"ingest_source": "Python",
"number_of_messages": len(kwargs.get("messages", [])) + len(response.choices),
"api_version": response_headers.get("openai-version")
}

transaction.record_ml_event("LlmChatCompletionSummary", chat_completion_summary_dict)
message_list = list(kwargs.get("messages", [])) + [response.choices[0].message]

create_chat_completion_message_event(transaction, message_list, chat_completion_id, span_id, trace_id, response_model)

return response


def check_rate_limit_header(response_headers, header_name, is_int):
if header_name in response_headers:
header_value = response_headers.get(header_name)
if is_int:
header_value = int(header_value)
return header_value
else:
return None


def create_chat_completion_message_event(transaction, message_list, chat_completion_id, span_id, trace_id, response_model):
if not transaction:
return

for index, message in enumerate(message_list):
chat_completion_message_dict = {
"id": str(uuid.uuid4()),
"span_id": span_id,
"trace_id": trace_id,
"transaction_id": transaction._transaction_id,
"content": message.get("content", "")[:MAX_LOG_MESSAGE_LENGTH],
"role": message.get("role"),
"completion_id": chat_completion_id,
"sequence": index,
"model": response_model,
"vendor": "openAI",
"ingest_source": "Python",
}
transaction.record_ml_event("LlmChatCompletionMessage", chat_completion_message_dict)


def wrap_convert_to_openai_object(wrapped, instance, args, kwargs):
resp = args[0]
returned_response = wrapped(*args, **kwargs)

if isinstance(resp, openai.openai_response.OpenAIResponse):
setattr(returned_response, "_nr_response_headers", getattr(resp, "_headers", {}))

return returned_response


def instrument_openai_api_resources_chat_completion(module):
if hasattr(module.ChatCompletion, "create"):
wrap_function_wrapper(module, "ChatCompletion.create", wrap_chat_completion_create)


def instrument_openai_util(module):
wrap_function_wrapper(module, "convert_to_openai_object", wrap_convert_to_openai_object)
5 changes: 4 additions & 1 deletion tests/mlmodel_openai/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,11 @@
"transaction_tracer.stack_trace_threshold": 0.0,
"debug.log_data_collector_payloads": True,
"debug.record_transaction_failure": True,
"ml_insights_event.enabled": True,
"machine_learning.enabled": True,
#"machine_learning.inference_events_value.enabled": True,
"ml_insights_events.enabled": True
}

collector_agent_registration = collector_agent_registration_fixture(
app_name="Python Agent Test (mlmodel_openai)",
default_settings=_default_settings,
Expand Down
169 changes: 166 additions & 3 deletions tests/mlmodel_openai/test_chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,182 @@
# limitations under the License.

import openai
import pytest
from testing_support.fixtures import ( # function_not_called,; override_application_settings,
function_not_called,
override_application_settings,
reset_core_stats_engine,
)

from newrelic.api.time_trace import current_trace
from newrelic.api.transaction import current_transaction, add_custom_attribute
from testing_support.validators.validate_ml_event_count import validate_ml_event_count
from testing_support.validators.validate_ml_event_payload import (
validate_ml_event_payload,
)
from testing_support.validators.validate_ml_events import validate_ml_events
from testing_support.validators.validate_ml_events_outside_transaction import (
validate_ml_events_outside_transaction,
)

import newrelic.core.otlp_utils
from newrelic.api.application import application_instance as application
from newrelic.api.background_task import background_task
from newrelic.api.transaction import record_ml_event
from newrelic.core.config import global_settings
from newrelic.packages import six

_test_openai_chat_completion_sync_messages = (
{"role": "system", "content": "You are a scientist."},
{"role": "user", "content": "What is the boiling point of water?"},
{"role": "assistant", "content": "The boiling point of water is 212 degrees Fahrenheit."},
#{"role": "user", "content": "What is the boiling point of water?"},
#{"role": "assistant", "content": "The boiling point of water is 212 degrees Fahrenheit."},
{"role": "user", "content": "What is 212 degrees Fahrenheit converted to Celsius?"},
)


def test_openai_chat_completion_sync():
def set_trace_info():
txn = current_transaction()
if txn:
txn._trace_id = "trace-id"
trace = current_trace()
if trace:
trace.guid = "span-id"


sync_chat_completion_recorded_events = [
(
{'type': 'LlmChatCompletionSummary'},
{
'id': None, # UUID that varies with each run
'appName': 'Python Agent Test (mlmodel_openai)',
'conversation_id': 'my-awesome-id',
'transaction_id': None,
'span_id': "span-id",
'trace_id': "trace-id",
'api_key_last_four_digits': 'sk-CRET',
'response_time': None, # Response time varies each test run
'request.model': 'gpt-3.5-turbo',
'response.model': 'gpt-3.5-turbo-0613',
'response.organization': 'new-relic-nkmd8b',
'response.usage.completion_tokens': 11,
'response.usage.total_tokens': 64,
'response.usage.prompt_tokens': 53,
'request.temperature': 0.7,
'request.max_tokens': 100,
'response.choices.finish_reason': 'stop',
'response.api_type': 'None',
'response.headers.llmVersion': '2020-10-01',
'response.headers.ratelimitLimitRequests': 200,
'response.headers.ratelimitLimitTokens': 40000,
'response.headers.ratelimitResetTokens': "90ms",
'response.headers.ratelimitResetRequests': "7m12s",
'response.headers.ratelimitRemainingTokens': 39940,
'response.headers.ratelimitRemainingRequests': 199,
'vendor': 'openAI',
'ingest_source': 'Python',
'number_of_messages': 3,
'api_version': '2020-10-01'
},
),
(
{'type': 'LlmChatCompletionMessage'},
{
'id': None,
'span_id': "span-id",
'trace_id': "trace-id",
'transaction_id': None,
'content': 'You are a scientist.',
'role': 'system',
'completion_id': None,
'sequence': 0,
'model': 'gpt-3.5-turbo-0613',
'vendor': 'openAI',
'ingest_source': 'Python'
},
),
(
{'type': 'LlmChatCompletionMessage'},
{
'id': None,
'span_id': "span-id",
'trace_id': "trace-id",
'transaction_id': None,
'content': 'What is 212 degrees Fahrenheit converted to Celsius?',
'role': 'user',
'completion_id': None,
'sequence': 1,
'model': 'gpt-3.5-turbo-0613',
'vendor': 'openAI',
'ingest_source': 'Python'
},
),
(
{'type': 'LlmChatCompletionMessage'},
{
'id': None,
'span_id': "span-id",
'trace_id': "trace-id",
'transaction_id': None,
'content': '212 degrees Fahrenheit is equal to 100 degrees Celsius.',
'role': 'assistant',
'completion_id': None,
'sequence': 2,
'model': 'gpt-3.5-turbo-0613',
'vendor': 'openAI',
'ingest_source': 'Python'
}
),
]


@reset_core_stats_engine()
@validate_ml_events(sync_chat_completion_recorded_events)
# One summary event, one system message, one user message, and one response message from the assistant
@validate_ml_event_count(count=4)
@background_task()
def test_openai_chat_completion_sync_in_txn():
set_trace_info()
add_custom_attribute("conversation_id", "my-awesome-id")
openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=_test_openai_chat_completion_sync_messages,
temperature=0.7,
max_tokens=100
)


@reset_core_stats_engine()
# One summary event, one system message, one user message, and one response message from the assistant
@validate_ml_event_count(count=0)
def test_openai_chat_completion_sync_outside_txn():
set_trace_info()
add_custom_attribute("conversation_id", "my-awesome-id")
openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=_test_openai_chat_completion_sync_messages,
temperature=0.7,
max_tokens=100
)


disabled_ml_settings = {
"machine_learning.enabled": False,
"machine_learning.inference_events_value.enabled": False,
"ml_insights_events.enabled": False
}


@override_application_settings(disabled_ml_settings)
@reset_core_stats_engine()
# One summary event, one system message, one user message, and one response message from the assistant
@validate_ml_event_count(count=0)
def test_openai_chat_completion_sync_disabled_settings():
set_trace_info()
openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=_test_openai_chat_completion_sync_messages,
temperature=0.7,
max_tokens=100
)


Expand Down

0 comments on commit dd29433

Please sign in to comment.