convert.py: Set gpt2 as tokenizer model when using BPE

ggerganov · Aug 29, 2023 · ce00528 · ce00528
1 parent 58fa4dc
commit ce00528
Showing 1 changed file with 6 additions and 1 deletion.
diff --git a/convert.py b/convert.py
@@ -846,7 +846,12 @@ def add_meta_vocab(self, vocab: Vocab) -> None:
             scores.append(score)
             toktypes.append(toktype)
 
-        self.gguf.add_tokenizer_model("llama")
+        if isinstance(vocab, SentencePieceVocab):
+            self.gguf.add_tokenizer_model("llama")
+        elif isinstance(vocab, BpeVocab):
+            self.gguf.add_tokenizer_model("gpt2")
+        else:
+            raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab')
         self.gguf.add_token_list(tokens)
         self.gguf.add_token_scores(scores)
         self.gguf.add_token_types(toktypes)