From 5e949c63ec38773fe639131bfcc800409172c495 Mon Sep 17 00:00:00 2001 From: Li Bo Date: Mon, 31 Jul 2023 19:44:56 +1000 Subject: [PATCH] Update otter_image_incontext.py --- pipeline/demo/otter_image_incontext.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pipeline/demo/otter_image_incontext.py b/pipeline/demo/otter_image_incontext.py index f9937cac..10779243 100644 --- a/pipeline/demo/otter_image_incontext.py +++ b/pipeline/demo/otter_image_incontext.py @@ -79,7 +79,6 @@ def get_response(image_list, prompt: str, model=None, image_processor=None, in_c lang_x_input_ids = lang_x["input_ids"] lang_x_attention_mask = lang_x["attention_mask"] - bad_words_id = model.text_tokenizer(["User:", "GPT1:", "GFT:", "GPT:"], add_special_tokens=False).input_ids generated_text = model.generate( vision_x=vision_x.to(model.device), lang_x=lang_x_input_ids.to(model.device), @@ -87,7 +86,6 @@ def get_response(image_list, prompt: str, model=None, image_processor=None, in_c max_new_tokens=512, num_beams=3, no_repeat_ngram_size=3, - bad_words_ids=bad_words_id, ) parsed_output = ( model.text_tokenizer.decode(generated_text[0]) @@ -115,7 +113,7 @@ def get_response(image_list, prompt: str, model=None, image_processor=None, in_c precision["torch_dtype"] = torch.float16 elif load_bit == "fp32": precision["torch_dtype"] = torch.float32 - model = OtterForConditionalGeneration.from_pretrained("luodian/OTTER-9B-LA-InContext", device_map="sequential", **precision) + model = OtterForConditionalGeneration.from_pretrained("luodian/OTTER-Image-MPT7B", device_map="sequential", **precision) model.text_tokenizer.padding_side = "left" tokenizer = model.text_tokenizer image_processor = transformers.CLIPImageProcessor()