diff --git a/evals/benchmark/stresscli/commands/config.ini b/evals/benchmark/stresscli/commands/config.ini index e954fd13..16a47653 100644 --- a/evals/benchmark/stresscli/commands/config.ini +++ b/evals/benchmark/stresscli/commands/config.ini @@ -15,7 +15,10 @@ First_token_latency_P50 = First token latency\(ms\),\s+P50:\s+([\d.]+) First_token_latency_P90 = First token latency\(ms\),\s+P50:[\s\d.,]+P90:\s+([\d.]+) First_token_latency_P99 = First token latency\(ms\),\s+P50:[\s\d.,]+P90:\s+[\s\d.,]+P99:\s+([\d.]+) First_token_latency_Avg = First token latency\(ms\),\s+P50:[\s\d.,]+P90:\s+[\s\d.,]+P99:\s+[\s\d.,]+Avg:\s+([\d.]+) -Average_Next_token_latency = Average Next token latency\(ms\):\s+([\d.]+) +Next_token_latency_P50 = Next token latency\(ms\),\s+P50:\s+([\d.]+) +Next_token_latency_P90 = Next token latency\(ms\),\s+P50:[\s\d.,]+P90:\s+([\d.]+) +Next_token_latency_P99 = Next token latency\(ms\),\s+P50:[\s\d.,]+P90:\s+[\s\d.,]+P99:\s+([\d.]+) +Next_token_latency_Avg = Next token latency\(ms\),\s+P50:[\s\d.,]+P90:\s+[\s\d.,]+P99:\s+[\s\d.,]+Avg:\s+([\d.]+) Average_token_latency = Average token latency\(ms\)\s+:\s+([\d.]+) locust_num_requests = \"num_requests\":\s+(\d+) locust_num_failures = \"num_failures\":\s+(\d+) diff --git a/evals/benchmark/stresscli/locust/tokenresponse.py b/evals/benchmark/stresscli/locust/tokenresponse.py index 75943915..fc3aff6a 100644 --- a/evals/benchmark/stresscli/locust/tokenresponse.py +++ b/evals/benchmark/stresscli/locust/tokenresponse.py @@ -64,7 +64,7 @@ def staticsOutput(environment, reqlist): ) e2e_msg = "End to End latency(ms), P50: {:.2f}, P90: {:.2f}, P99: {:.2f}, Avg: {:.2f}" first_msg = "First token latency(ms), P50: {:.2f}, P90: {:.2f}, P99: {:.2f}, Avg: {:.2f}" - next_msg = "Average Next token latency(ms): {:.2f}" + next_msg = "Next token latency(ms), P50: {:.2f}, P90: {:.2f}, P99: {:.2f}, Avg: {:.2f}" average_msg = "Average token latency(ms) : {:.2f}" console_logger.warning("\n=================Total statistics=====================") if tokens_output == 0: @@ -108,7 +108,14 @@ def staticsOutput(environment, reqlist): numpy.average(first_token), ) ) - console_logger.warning(next_msg.format(numpy.average(next_token))) + console_logger.warning( + next_msg.format( + numpy.percentile(next_token, 50), + numpy.percentile(next_token, 90), + numpy.percentile(next_token, 99), + numpy.average(next_token), + ) + ) console_logger.warning(average_msg.format(numpy.average(avg_token))) console_logger.warning("======================================================\n\n") logging.shutdown()