From a81fb4e211d6656854a5dd24cf8631dc319234d1 Mon Sep 17 00:00:00 2001 From: Mark Schmidt Date: Sun, 5 Mar 2023 11:28:50 -0500 Subject: [PATCH] changed max_seq_len 1024 to 2048 The models support a 2048 context window. This is not well advertised and people are getting confused. No sense having a smaller size here, as it just adds to the confusion. --- llama/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama/model.py b/llama/model.py index 03a72daea..baac760d7 100755 --- a/llama/model.py +++ b/llama/model.py @@ -27,7 +27,7 @@ class ModelArgs: norm_eps: float = 1e-5 max_batch_size: int = 32 - max_seq_len: int = 1024 + max_seq_len: int = 2048 class RMSNorm(torch.nn.Module):