Skip to content

Commit

Permalink
Merge branch 'turboderp:master' into code-chat
Browse files Browse the repository at this point in the history
  • Loading branch information
SinanAkkoyun authored Oct 4, 2023
2 parents 433c1fa + a03d9bd commit fe047c4
Show file tree
Hide file tree
Showing 7 changed files with 340 additions and 77 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,7 @@ the converter to better facilitate scripted jobs.

**2023-09-27**: Prebuilt wheels are now available, credit to [@jllllll](https://github.com/jllllll). They're on the
[releases page here](https://github.com/turboderp/exllamav2/releases). A solution to installing prebuilt wheels straight
from PyPI is still pending. Updated installation instructions above.
from PyPI is still pending. Updated installation instructions above.

**2023-10-03**: Added support for extended vocabularies and alternative BOS/EOS/UNK tokens and the ability to
encode/decode sequences with special tokens. Added Orca template to the chatbot example.
103 changes: 41 additions & 62 deletions examples/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@
)

from chat_formatting import CodeBlockFormatter
from chat_prompts import prompt_formats
prompt_formats_list = list(prompt_formats.keys())

# Options

parser = argparse.ArgumentParser(description = "Simple Llama2 chat example for ExLlamaV2")
parser.add_argument("-mode", "--mode", choices = ["llama", "raw", "codellama"], help = "Chat mode. Use llama for Llama 1/2 chat finetunes.")
parser.add_argument("-modes", "--modes", action = "store_true", help = "List available modes and exit.")
parser.add_argument("-mode", "--mode", choices = prompt_formats_list, help = "Chat mode. Use llama for Llama 1/2 chat finetunes.")
parser.add_argument("-un", "--username", type = str, default = "User", help = "Username when using raw chat mode")
parser.add_argument("-bn", "--botname", type = str, default = "Chatbort", help = "Bot name when using raw chat mode")
parser.add_argument("-sp", "--system_prompt", type = str, help = "Use custom system prompt")
Expand All @@ -37,86 +40,61 @@
parser.add_argument("-resc", "--response_chunk", type = int, default = 250, help = "Space to reserve in context for reply, default = 250")
parser.add_argument("-ncf", "--no_code_formatting", action = "store_true", help = "Disable code formatting/syntax highlighting")

# Initialize model and tokenizer
# Arrrgs

model_init.add_args(parser)
args = parser.parse_args()
model_init.check_args(args)
model_init.print_options(args)
model, tokenizer = model_init.init(args)

# Create cache

cache = ExLlamaV2Cache(model)
# Prompt templates/modes

# Prompt templates
if args.modes:
print(" -- Available formats:")
for k, v in prompt_formats.items():
print(f" -- {k:12} : {v().description}")
sys.exit()

username = args.username
botname = args.botname
system_prompt = args.system_prompt
mode = args.mode

if mode == "llama" or mode == "codellama":

if not system_prompt:

if mode == "llama":

system_prompt = \
"""You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. """ + \
"""Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. """ + \
"""Please ensure that your responses are socially unbiased and positive in nature."""

elif mode == "codellama":

system_prompt = \
"""You are a helpful coding assistant. Always answer as helpfully as possible."""

first_prompt = \
"""[INST] <<SYS>>\n<|system_prompt|>\n<</SYS>>\n\n<|user_prompt|> [/INST]"""

subs_prompt = \
"""[INST] <|user_prompt|> [/INST]"""

elif mode == "raw":
if args.mode is None:
print(" ## Error: No mode specified.")
sys.exit()

if not system_prompt:
prompt_format = prompt_formats[args.mode]()
prompt_format.botname = botname
prompt_format.username = username
if system_prompt is None: system_prompt = prompt_format.default_system_prompt()

system_prompt = \
f"""This is a conversation between a helpful AI assistant named {botname} and a """ + ("""user named {username}.""" if username != "User" else """user.""")
# Initialize model and tokenizer

first_prompt = \
f"""<|system_prompt|>\n{username}: <|user_prompt|>\n{botname}:"""
model_init.check_args(args)
model_init.print_options(args)
model, tokenizer = model_init.init(args)

subs_prompt = \
f"""{username}: <|user_prompt|>\n{botname}:"""
# Create cache

else:
cache = ExLlamaV2Cache(model)

print(" ## Error: Incorrect/no mode specified.")
sys.exit()

# Chat context

def format_prompt(user_prompt, first):
global system_prompt, first_prompt, subs_prompt
global system_prompt, prompt_format

if first:
return first_prompt \
return prompt_format.first_prompt() \
.replace("<|system_prompt|>", system_prompt) \
.replace("<|user_prompt|>", user_prompt)
else:
return subs_prompt \
return prompt_format.subs_prompt() \
.replace("<|user_prompt|>", user_prompt)

def encode_prompt(text):
global tokenizer, mode
global tokenizer, prompt_format

if mode == "llama" or mode == "codellama":
return tokenizer.encode(text, add_bos = True)

if mode == "raw":
return tokenizer.encode(text)
add_bos, add_eos, encode_special_tokens = prompt_format.encoding_options()
return tokenizer.encode(text, add_bos = add_bos, add_eos = add_eos, encode_special_tokens = encode_special_tokens)

user_prompts = []
responses_ids = []
Expand All @@ -130,7 +108,8 @@ def get_tokenized_context(max_len):

for turn in range(len(user_prompts)):

up_ids = encode_prompt(format_prompt(user_prompts[turn], context.shape[-1] == 0))
up_text = format_prompt(user_prompts[turn], context.shape[-1] == 0)
up_ids = encode_prompt(up_text)
context = torch.cat([context, up_ids], dim=-1)

if turn < len(responses_ids):
Expand Down Expand Up @@ -161,20 +140,15 @@ def get_tokenized_context(max_len):

# Stop conditions

if mode == "llama" or mode == "codellama":

generator.set_stop_conditions([tokenizer.eos_token_id])

if mode == "raw":

generator.set_stop_conditions([username + ":", username[0:1] + ":", username.upper() + ":", username.lower() + ":", tokenizer.eos_token_id])
generator.set_stop_conditions(prompt_format.stop_conditions(tokenizer))

# ANSI color codes

col_default = "\u001b[0m"
col_user = "\u001b[33;1m" # Yellow
col_bot = "\u001b[34;1m" # Blue
col_error = "\u001b[31;1m" # Magenta
col_sysprompt = "\u001b[37;1m" # Grey

# Code block formatting

Expand All @@ -188,6 +162,11 @@ def get_tokenized_context(max_len):

# Main loop

print(f" -- Prompt format: {args.mode}")
print(f" -- System prompt:")
print()
print(col_sysprompt + system_prompt.strip() + col_default)

while True:

# Get user prompt
Expand All @@ -207,7 +186,7 @@ def get_tokenized_context(max_len):

# Stream response

if mode == "raw":
if prompt_format.print_bot_name():

print(col_bot + botname + ": " + col_default, end = "")

Expand Down Expand Up @@ -288,7 +267,7 @@ def get_tokenized_context(max_len):

if eos:

if mode == "llama" or mode == "codellama":
if prompt_format.print_extra_newline():
print()

break
Expand Down
177 changes: 177 additions & 0 deletions examples/chat_prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@

class PromptFormat:

botname = "Chatbort"
username = "User"

def __init__(self):
pass

#

def default_system_prompt(self):
raise NotImplementedError

def first_prompt(self):
raise NotImplementedError

def subs_prompt(self):
raise NotImplementedError

def stop_conditions(self, tokenizer):
raise NotImplementedError

def encoding_options(self): # (add_bos, add_eos, encode_special_tokens)
raise NotImplementedError

def print_bot_name(self):
return False

def print_extra_newline(self):
return False


class PromptFormat_raw(PromptFormat):

description = "Model-agnostic mode simulating a raw chatlog"

def __init__(self):
super().__init__()
pass

def default_system_prompt(self):
return \
f"""This is a conversation between a helpful AI assistant named {self.botname} and a """ + \
(f"""user named {self.username}.""" if self.username != "User" else """user.""")

def first_prompt(self):
return \
f"""<|system_prompt|>\n{self.username}: <|user_prompt|>\n{self.botname}:"""

def subs_prompt(self):
return \
f"""{self.username}: <|user_prompt|>\n{self.botname}:"""

def stop_conditions(self, tokenizer):
return \
[self.username + ":",
self.username[0:1] + ":",
self.username.upper() + ":",
self.username.lower() + ":",
tokenizer.eos_token_id]

def encoding_options(self):
return False, False, False

def print_bot_name(self):
return True


class PromptFormat_llama(PromptFormat):

description = "Llama-chat, Llama2-chat and Mistral-instruct models"

def __init__(self):
super().__init__()
pass

def default_system_prompt(self):
return \
"""You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. """ + \
"""Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. """ + \
"""Please ensure that your responses are socially unbiased and positive in nature."""

def first_prompt(self):
return \
"""[INST] <<SYS>>\n<|system_prompt|>\n<</SYS>>\n\n<|user_prompt|> [/INST]"""

def subs_prompt(self):
return \
"""[INST] <|user_prompt|> [/INST]"""

def stop_conditions(self, tokenizer):
return \
[tokenizer.eos_token_id]

def encoding_options(self):
return True, False, False

def print_extra_newline(self):
return True


class PromptFormat_codellama(PromptFormat_llama):

description = "CodeLlama-instruct"

def __init__(self):
super().__init__()
pass

def default_system_prompt(self):
return \
"""You are a helpful coding assistant. Always answer as helpfully as possible."""


class PromptFormat_chatml(PromptFormat):

description = "ChatML format, as used by e.g. (Mistral)Orca"

def __init__(self):
super().__init__()
pass

def default_system_prompt(self):
return \
f"""You are {self.botname}, a large language model. Answer as concisely as possible."""

def first_prompt(self):
return \
"""<|im_start|>system\n""" + \
"""<|system_prompt|>\n""" + \
"""<|im_end|>\n""" + \
"""<|im_start|>user\n""" + \
"""<|user_prompt|><|im_end|>\n""" + \
"""<|im_start|>assistant\n"""

def subs_prompt(self):
return \
"""<|im_end|>\n""" + \
"""<|im_start|>user\n""" + \
"""<|user_prompt|><|im_end|>\n""" + \
"""<|im_start|>assistant\n"""

def stop_conditions(self, tokenizer):
return \
[tokenizer.eos_token_id,
"""<|im_end|>"""]

def encoding_options(self):
return False, False, True

def print_extra_newline(self):
return True


class PromptFormat_tinyllama(PromptFormat_chatml):

description = "ChatML format, but ignoring special/added tokens. Use for TinyLlama-chat v0.3"

def encoding_options(self):
return False, False, False


prompt_formats = \
{
"raw": PromptFormat_raw,
"llama": PromptFormat_llama,
"codellama": PromptFormat_codellama,
"chatml": PromptFormat_chatml,
"tinyllama": PromptFormat_tinyllama,
}






Loading

0 comments on commit fe047c4

Please sign in to comment.