Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add streaming support for openai v0 #951

Merged
merged 21 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions newrelic/api/ml_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,15 @@ def wrap_mlmodel(model, name=None, version=None, feature_names=None, label_names

def get_llm_message_ids(response_id=None):
transaction = current_transaction()
if response_id and transaction:
if transaction:
nr_message_ids = getattr(transaction, "_nr_message_ids", {})
message_id_info = nr_message_ids.pop(response_id, ())
message_id_info = (
nr_message_ids.pop("bedrock_key", ()) if not response_id else nr_message_ids.pop(response_id, ())
)

if not message_id_info:
warnings.warn("No message ids found for %s" % response_id)
response_id_warning = "." if not response_id else " for %s." % response_id
warnings.warn("No message ids found%s" % response_id_warning)
return []

conversation_id, request_id, ids = message_id_info
Expand Down
6 changes: 6 additions & 0 deletions newrelic/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2061,6 +2061,12 @@ def _process_module_builtin_defaults():
"newrelic.hooks.mlmodel_openai",
"instrument_openai_util",
)
_process_module_definition(
"openai.api_resources.abstract.engine_api_resource",
"newrelic.hooks.mlmodel_openai",
"instrument_openai_api_resources_abstract_engine_api_resource",
)

_process_module_definition(
"openai.resources.chat.completions",
"newrelic.hooks.mlmodel_openai",
Expand Down
62 changes: 56 additions & 6 deletions newrelic/hooks/external_botocore.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def create_chat_completion_message_event(
if not transaction:
return

message_ids = []
for index, message in enumerate(input_message_list):
if response_id:
id_ = "%s-%d" % (response_id, index) # Response ID was set, append message index to it.
Expand Down Expand Up @@ -128,6 +129,7 @@ def create_chat_completion_message_event(
id_ = "%s-%d" % (response_id, index) # Response ID was set, append message index to it.
else:
id_ = str(uuid.uuid4()) # No response IDs, use random UUID
message_ids.append(id_)

chat_completion_message_dict = {
"id": id_,
Expand All @@ -144,9 +146,10 @@ def create_chat_completion_message_event(
"response.model": request_model,
"vendor": "bedrock",
"ingest_source": "Python",
"is_response": True
"is_response": True,
}
transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict)
return (conversation_id, request_id, message_ids)


def extract_bedrock_titan_text_model(request_body, response_body=None):
Expand Down Expand Up @@ -246,7 +249,7 @@ def extract_bedrock_claude_model(request_body, response_body=None):
chat_completion_summary_dict = {
"request.max_tokens": request_body.get("max_tokens_to_sample", ""),
"request.temperature": request_body.get("temperature", ""),
"response.number_of_messages": len(input_message_list)
"response.number_of_messages": len(input_message_list),
}

if response_body:
Expand All @@ -264,6 +267,40 @@ def extract_bedrock_claude_model(request_body, response_body=None):
return input_message_list, output_message_list, chat_completion_summary_dict


def extract_bedrock_llama_model(request_body, response_body=None):
request_body = json.loads(request_body)
if response_body:
response_body = json.loads(response_body)

input_message_list = [{"role": "user", "content": request_body.get("prompt", "")}]

chat_completion_summary_dict = {
"request.max_tokens": request_body.get("max_gen_len", ""),
"request.temperature": request_body.get("temperature", ""),
"response.number_of_messages": len(input_message_list),
}

if response_body:
output_message_list = [{"role": "assistant", "content": response_body.get("generation", "")}]
prompt_tokens = response_body.get("prompt_token_count", None)
completion_tokens = response_body.get("generation_token_count", None)
total_tokens = prompt_tokens + completion_tokens if prompt_tokens and completion_tokens else None

chat_completion_summary_dict.update(
{
"response.usage.completion_tokens": completion_tokens,
"response.usage.prompt_tokens": prompt_tokens,
"response.usage.total_tokens": total_tokens,
"response.choices.finish_reason": response_body.get("stop_reason", ""),
"response.number_of_messages": len(input_message_list) + len(output_message_list),
}
)
else:
output_message_list = []

return input_message_list, output_message_list, chat_completion_summary_dict


def extract_bedrock_cohere_model(request_body, response_body=None):
request_body = json.loads(request_body)
if response_body:
Expand All @@ -274,7 +311,7 @@ def extract_bedrock_cohere_model(request_body, response_body=None):
chat_completion_summary_dict = {
"request.max_tokens": request_body.get("max_tokens", ""),
"request.temperature": request_body.get("temperature", ""),
"response.number_of_messages": len(input_message_list)
"response.number_of_messages": len(input_message_list),
}

if response_body:
Expand All @@ -300,6 +337,7 @@ def extract_bedrock_cohere_model(request_body, response_body=None):
("ai21.j2", extract_bedrock_ai21_j2_model),
("cohere", extract_bedrock_cohere_model),
("anthropic.claude", extract_bedrock_claude_model),
("meta.llama2", extract_bedrock_llama_model),
]


Expand All @@ -313,6 +351,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs):
return wrapped(*args, **kwargs)

transaction.add_ml_model_info("Bedrock", BOTOCORE_VERSION)
transaction._add_agent_attribute("llm", True)

# Read and replace request file stream bodies
request_body = kwargs["body"]
Expand Down Expand Up @@ -368,7 +407,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs):
notice_error_attributes = {
"http.statusCode": error_attributes["http.statusCode"],
"error.message": error_attributes["error.message"],
"error.code": error_attributes["error.code"]
"error.code": error_attributes["error.code"],
}

if is_embedding:
Expand Down Expand Up @@ -511,7 +550,7 @@ def handle_chat_completion_event(
span_id,
):
custom_attrs_dict = transaction._custom_params
conversation_id = custom_attrs_dict.get("conversation_id", "")
conversation_id = custom_attrs_dict.get("llm.conversation_id", "")

chat_completion_id = str(uuid.uuid4())

Expand Down Expand Up @@ -542,7 +581,7 @@ def handle_chat_completion_event(

transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict)

create_chat_completion_message_event(
message_ids = create_chat_completion_message_event(
transaction=transaction,
app_name=settings.app_name,
input_message_list=input_message_list,
Expand All @@ -556,6 +595,10 @@ def handle_chat_completion_event(
response_id=response_id,
)

if not hasattr(transaction, "_nr_message_ids"):
transaction._nr_message_ids = {}
transaction._nr_message_ids["bedrock_key"] = message_ids


CUSTOM_TRACE_POINTS = {
("sns", "publish"): message_trace("SNS", "Produce", "Topic", extract(("TopicArn", "TargetArn"), "PhoneNumber")),
Expand Down Expand Up @@ -592,6 +635,12 @@ def _nr_clientcreator__create_api_method_(wrapped, instance, args, kwargs):
return tracer(wrapped)


def _nr_clientcreator__create_methods(wrapped, instance, args, kwargs):
class_attributes = wrapped(*args, **kwargs)
class_attributes["_nr_wrapped"] = True
return class_attributes


def _bind_make_request_params(operation_model, request_dict, *args, **kwargs):
return operation_model, request_dict

Expand Down Expand Up @@ -622,3 +671,4 @@ def instrument_botocore_endpoint(module):

def instrument_botocore_client(module):
wrap_function_wrapper(module, "ClientCreator._create_api_method", _nr_clientcreator__create_api_method_)
wrap_function_wrapper(module, "ClientCreator._create_methods", _nr_clientcreator__create_methods)
Loading
Loading