Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add benchmark for fix length input and output #5857

Merged
merged 26 commits into from
Jul 7, 2024
Merged
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
afa0e91
add benchmark for fix length input and output
haichuan1221 Jun 26, 2024
ffcc34c
fix format issue
haichuan1221 Jun 26, 2024
4f4962e
fix format issue
haichuan1221 Jun 26, 2024
3c405e0
fix format issue
haichuan1221 Jun 26, 2024
6b7415c
fix format issue
haichuan1221 Jun 26, 2024
7c5f1d9
fix format issue
haichuan1221 Jun 26, 2024
0f8fbf4
fix format issue
haichuan1221 Jun 26, 2024
cab9aa4
fix format issue
haichuan1221 Jun 26, 2024
66710c6
fix format issue
haichuan1221 Jun 26, 2024
ab11c8a
Merge branch 'vllm-project:main' into main
haichuan1221 Jun 26, 2024
0af62a8
Merge branch 'vllm-project:main' into main
haichuan1221 Jun 26, 2024
68e25fb
Merge branch 'vllm-project:main' into main
haichuan1221 Jun 27, 2024
b798b8e
Merge branch 'vllm-project:main' into main
haichuan1221 Jun 28, 2024
bac04ae
Merge branch 'vllm-project:main' into main
haichuan1221 Jun 30, 2024
e8fd224
Merge branch 'vllm-project:main' into main
haichuan1221 Jul 1, 2024
ff9a0b7
Merge branch 'vllm-project:main' into main
haichuan1221 Jul 2, 2024
13fbce4
Merge branch 'vllm-project:main' into main
haichuan1221 Jul 3, 2024
e2d4290
Merge branch 'vllm-project:main' into main
haichuan1221 Jul 4, 2024
80eb2ed
Merge branch 'vllm-project:main' into main
haichuan1221 Jul 7, 2024
bea16c3
add typing name and help message
haichuan1221 Jul 7, 2024
1608c7a
Merge branch 'main' of github.com:haichuan1221/vllm into main
haichuan1221 Jul 7, 2024
32add2a
fix comma issue
haichuan1221 Jul 7, 2024
e306c5b
fix comma issue
haichuan1221 Jul 7, 2024
fee6383
update format
ywang96 Jul 7, 2024
26a67cd
remove unneeded noqa
ywang96 Jul 7, 2024
5b4897f
move up args
ywang96 Jul 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 60 additions & 5 deletions benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
--dataset-path <path to dataset> \
--request-rate <request_rate> \ # By default <request_rate> is inf
--num-prompts <num_prompts> # By default <num_prompts> is 1000

when using tgi backend, add
--endpoint /generate_stream
to the end of the command above.
Expand Down Expand Up @@ -77,7 +77,6 @@ def sample_sharegpt_requests(
) -> List[Tuple[str, int, int]]:
if fixed_output_len is not None and fixed_output_len < 4:
raise ValueError("output_len too small")

# Load the dataset.
with open(dataset_path) as f:
dataset = json.load(f)
Expand Down Expand Up @@ -185,6 +184,31 @@ def sample_sonnet_requests(
return sampled_requests


def sample_random_requests(
input_len: int, output_len: int, num_prompts: int, range_ratio: float,
tokenizer: PreTrainedTokenizerBase) -> List[Tuple[str, int, int]]:

input_lens = np.random.randint(
int(input_len * range_ratio),
input_len + 1,
size=num_prompts,
)
output_lens = np.random.randint(
int(output_len * range_ratio),
output_len + 1,
size=num_prompts,
)
offsets = np.random.randint(0, tokenizer.vocab_size, size=num_prompts)
input_requests = []
for i in range(args.num_prompts):
prompt = tokenizer.decode([(offsets[i] + i + j) % tokenizer.vocab_size
for j in range(input_lens[i])])
input_requests.append(
(prompt, int(input_lens[i]), int(output_lens[i])))

return input_requests


async def get_request(
input_requests: List[Tuple[str, int, int]],
request_rate: float,
Expand All @@ -196,6 +220,7 @@ async def get_request(
if request_rate == float("inf"):
# If the request rate is infinity, then we don't need to wait.
continue

# Sample the request interval from the exponential distribution.
interval = np.random.exponential(1.0 / request_rate)
# The next request will be sent after the interval.
Expand All @@ -219,7 +244,7 @@ def calculate_metrics(
# We use the tokenizer to count the number of output tokens for all
# serving backends instead of looking at len(outputs[i].itl) since
# multiple output tokens may be bundled together
# Note: this may inflate the output token count slightly
# Note : this may inflate the output token count slightly
output_len = len(
tokenizer(outputs[i].generated_text,
add_special_tokens=False).input_ids)
Expand Down Expand Up @@ -456,6 +481,15 @@ def main(args: argparse.Namespace):
for prompt, prompt_formatted, prompt_len,
output_len in input_requests]

elif args.dataset_name == "random":
input_requests = sample_random_requests(
input_len=args.input_len,
output_len=args.output_len,
num_prompts=args.num_prompts,
range_ratio=args.range_ratio,
tokenizer=tokenizer,
)

else:
raise ValueError(f"Unknown dataset: {args.dataset_name}")

Expand Down Expand Up @@ -549,7 +583,7 @@ def main(args: argparse.Namespace):
"--dataset-name",
type=str,
default="sharegpt",
choices=["sharegpt", "sonnet"],
choices=["sharegpt", "sonnet", "random"],
help="Name of the dataset to benchmark on.",
)
parser.add_argument("--dataset-path",
Expand All @@ -566,7 +600,7 @@ def main(args: argparse.Namespace):
"--tokenizer",
type=str,
help=
"Name or path of the tokenizer, if not using the default tokenizer.",
"Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501
)
parser.add_argument(
"--best-of",
Expand Down Expand Up @@ -609,6 +643,27 @@ def main(args: argparse.Namespace):
help=
"Number of prefix tokens per request, used only for sonnet dataset.",
)
parser.add_argument(
"--random-input-len",
type=int,
default=1024,
help=
"Number of input tokens per request, used only for random sampling.",
)
parser.add_argument(
"--random-output-len",
type=int,
default=128,
help=
"Number of output tokens per request, used only for random sampling.",
)
parser.add_argument(
"--random-range-ratio",
type=float,
default=1.0,
help="Range of sampled ratio of input/output length, "
"used only for random sampling.",
)
parser.add_argument(
"--request-rate",
type=float,
Expand Down
Loading