Skip to content

Commit

Permalink
Merge pull request #2227 from bmaltais/dev
Browse files Browse the repository at this point in the history
v23.1.2
  • Loading branch information
bmaltais committed Apr 8, 2024
2 parents c839fad + ff70a1e commit aa9fcf3
Show file tree
Hide file tree
Showing 7 changed files with 181 additions and 102 deletions.
2 changes: 1 addition & 1 deletion .release
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v23.1.1
v23.1.2
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ The GUI allows you to set the training parameters and generate and run the requi
- [SDXL training](#sdxl-training)
- [Masked loss](#masked-loss)
- [Change History](#change-history)
- [2024/04/08 (v23.1.2)](#20240408-v2312)
- [2024/04/07 (v23.1.1)](#20240407-v2311)
- [2024/04/07 (v23.1.0)](#20240407-v2310)
- [2024/03/21 (v23.0.15)](#20240321-v23015)
Expand Down Expand Up @@ -404,6 +405,10 @@ ControlNet dataset is used to specify the mask. The mask images should be the RG
## Change History
### 2024/04/08 (v23.1.2)
- Added config.toml support for wd14_caption.
### 2024/04/07 (v23.1.1)
- Added support for Huber loss under the Parameters / Advanced tab.
Expand Down
24 changes: 24 additions & 0 deletions config example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,27 @@ sample_sampler = "euler_a" # Sampler to use for image sampling
[sdxl]
sdxl_cache_text_encoder_outputs = false # Cache text encoder outputs
sdxl_no_half_vae = true # No half VAE

[wd14_caption]
always_first_tags = "" # comma-separated list of tags to always put at the beginning, e.g. 1girl,1boy
append_tags = false # Append TAGs
batch_size = 8 # Batch size
caption_extension = ".txt" # Extension for caption file (e.g., .caption, .txt)
caption_separator = ", " # Caption Separator
character_tag_expand = false # Expand tag tail parenthesis to another tag for character tags. `chara_name_(series)` becomes `chara_name, series`
character_threshold = 0.35 # Character threshold
debug = false # Debug mode
force_download = false # Force model re-download when switching to onnx
frequency_tags = false # Frequency tags
general_threshold = 0.35 # General threshold
max_data_loader_n_workers = 2 # Max dataloader workers
onnx = true # ONNX
recursive = false # Recursive
remove_underscore = false # Remove underscore
repo_id = "SmilingWolf/wd-convnext-tagger-v3" # Repo id for wd14 tagger on Hugging Face
tag_replacement = "" # Tag replacement in the format of `source1,target1;source2,target2; ...`. Escape `,` and `;` with `\`. e.g. `tag1,tag2;tag3,tag4`
thresh = 0.36 # Threshold
train_data_dir = "" # Image folder to caption (containing the images to caption)
undesired_tags = "" # comma-separated list of tags to remove, e.g. 1girl,1boy
use_rating_tags = false # Use rating tags
use_rating_tags_as_last_tag = false # Use rating tags as last tagging tags
1 change: 1 addition & 0 deletions kohya_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def UI(**kwargs):
logging_dir_input=logging_dir_input,
enable_copy_info_button=True,
headless=headless,
config=config,
)
with gr.Tab("LoRA"):
_ = LoRATools(headless=headless)
Expand Down
27 changes: 16 additions & 11 deletions kohya_gui/blip2_caption_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def generate_caption(
max_new_tokens=40,
min_new_tokens=20,
do_sample=True,
temperature=1.0,
top_p=0.0,
):
"""
Expand Down Expand Up @@ -108,6 +109,7 @@ def generate_caption(
top_p=top_p,
max_new_tokens=max_new_tokens,
min_new_tokens=min_new_tokens,
temperature=temperature,
)

generated_text = processor.batch_decode(
Expand Down Expand Up @@ -154,7 +156,7 @@ def caption_images_beam_search(
model=model,
device=device,
num_beams=int(num_beams),
repetition_penalty=repetition_penalty,
repetition_penalty=float(repetition_penalty),
length_penalty=length_penalty,
min_new_tokens=int(min_new_tokens),
max_new_tokens=int(max_new_tokens),
Expand All @@ -165,6 +167,7 @@ def caption_images_beam_search(
def caption_images_nucleus(
directory_path,
do_sample,
temperature,
top_p,
min_new_tokens,
max_new_tokens,
Expand All @@ -190,6 +193,7 @@ def caption_images_nucleus(
model=model,
device=device,
do_sample=do_sample,
temperature=temperature,
top_p=top_p,
min_new_tokens=int(min_new_tokens),
max_new_tokens=int(max_new_tokens),
Expand Down Expand Up @@ -278,16 +282,6 @@ def list_train_dirs(path):
label="Number of beams",
)

temperature = gr.Slider(
minimum=0.5,
maximum=1.0,
value=1.0,
step=0.1,
interactive=True,
label="Temperature",
info="used with nucleus sampling",
)

len_penalty = gr.Slider(
minimum=-1.0,
maximum=2.0,
Expand Down Expand Up @@ -326,6 +320,16 @@ def list_train_dirs(path):
with gr.Tab("Nucleus sampling"):
with gr.Row():
do_sample = gr.Checkbox(label="Sample", value=True)

temperature = gr.Slider(
minimum=0.5,
maximum=1.0,
value=1.0,
step=0.1,
interactive=True,
label="Temperature",
info="used with nucleus sampling",
)

top_p = gr.Slider(
minimum=-0,
Expand All @@ -344,6 +348,7 @@ def list_train_dirs(path):
inputs=[
directory_path_dir,
do_sample,
temperature,
top_p,
min_new_tokens,
max_new_tokens,
Expand Down
3 changes: 2 additions & 1 deletion kohya_gui/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ def utilities_tab(
enable_copy_info_button=bool(False),
enable_dreambooth_tab=True,
headless=False,
config: dict = {},
):
with gr.Tab("Captioning"):
gradio_basic_caption_gui_tab(headless=headless)
gradio_blip_caption_gui_tab(headless=headless)
gradio_blip2_caption_gui_tab(headless=headless)
gradio_git_caption_gui_tab(headless=headless)
gradio_wd14_caption_gui_tab(headless=headless)
gradio_wd14_caption_gui_tab(headless=headless, config=config)
gradio_manual_caption_gui_tab(headless=headless)
gradio_convert_model_tab(headless=headless)
gradio_group_images_gui_tab(headless=headless)
Expand Down
Loading

0 comments on commit aa9fcf3

Please sign in to comment.