Update train_db_fixed to v9

bmaltais · Nov 19, 2022 · 0e8b993 · 0e8b993
1 parent f56340d
commit 0e8b993
Show file tree

Hide file tree

Showing 7 changed files with 596 additions and 149 deletions.
diff --git a/README.md b/README.md
@@ -113,9 +113,10 @@ accelerate launch --num_cpu_threads_per_process 6 train_db_fixed-ber.py `
     --cache_latents `
     --save_every_n_epochs=1 `
     --fine_tuning `
+    --enable_bucket `
     --dataset_repeats=200 `
     --seed=23 `
-    --save_half
+    ---save_precision="fp16"
 ```
 
 Refer to this url for more details about finetuning: https://note.com/kohya_ss/n/n1269f1e1a54e
@@ -125,7 +126,12 @@ Refer to this url for more details about finetuning: https://note.com/kohya_ss/n
 * 11/7 (v7): Text Encoder supports checkpoint files in different storage formats (it is converted at the time of import, so export will be in normal format). Changed the average value of EPOCH loss to output to the screen. Added a function to save epoch and global step in checkpoint in SD format (add values if there is existing data). The reg_data_dir option is enabled during fine tuning (fine tuning while mixing regularized images). Added dataset_repeats option that is valid for fine tuning (specified when the number of teacher images is small and the epoch is extremely short).
 * 11/9 (v8): supports Diffusers 0.7.2. To upgrade diffusers run `pip install --upgrade diffusers[torch]`
 * 11/14 (diffusers_fine_tuning v2):
-- script name is now fine_tune.py.
-- Added option to learn Text Encoder --train_text_encoder.
-- The data format of checkpoint at the time of saving can be specified with the --save_precision option. You can choose float, fp16, and bf16.
-- Added a --save_state option to save the learning state (optimizer, etc.) in the middle. It can be resumed with the --resume option.
+    - script name is now fine_tune.py.
+    - Added option to learn Text Encoder --train_text_encoder.
+    - The data format of checkpoint at the time of saving can be specified with the --save_precision option. You can choose float, fp16, and bf16.
+    - Added a --save_state option to save the learning state (optimizer, etc.) in the middle. It can be resumed with the --resume option.
+* 11/18 (v9):
+    - Added support for Aspect Ratio Bucketing (enable_bucket option). (--enable_bucket)
+    - Added support for selecting data format (fp16/bf16/float) when saving checkpoint (--save_precision)
+    - Added support for saving learning state (--save_state, --resume)
+    - Added support for logging (--logging_dir)
diff --git a/examples/caption.ps1 b/examples/caption.ps1
@@ -2,9 +2,12 @@
 #
 # Usefull to create base caption that will be augmented on a per image basis
 
-$folder = "D:\dreambooth\train_sylvia_ritter\raw_data\all-images\"
+$folder = "D:\some\folder\location\"
 $file_pattern="*.*"
-$text_fir_file="a digital painting of xxx, by silvery trait"
+$caption_text="some caption text"
 
-$files = Get-ChildItem $folder$file_pattern
-foreach ($file in $files) {New-Item -ItemType file -Path $folder -Name "$($file.BaseName).txt" -Value $text_fir_file}
+$files = Get-ChildItem $folder$file_pattern -Include *.png,*.jpg,*.webp -File
+foreach ($file in $files) 
+{
+    New-Item -ItemType file -Path $folder -Name "$($file.BaseName).txt" -Value $caption_text
+}
diff --git a/examples/caption_subfolders.ps1 b/examples/caption_subfolders.ps1
@@ -0,0 +1,20 @@
+# This powershell script will create a text file for each files in the folder
+#
+# Usefull to create base caption that will be augmented on a per image basis
+
+$folder = "D:\test\t2\"
+$file_pattern="*.*"
+$text_fir_file="bigeyes style"
+
+foreach ($file in Get-ChildItem $folder\$file_pattern -File) 
+{
+    New-Item -ItemType file -Path $folder -Name "$($file.BaseName).txt" -Value $text_fir_file
+}
+
+foreach($directory in Get-ChildItem -path $folder -Directory)
+{
+    foreach ($file in Get-ChildItem $folder\$directory\$file_pattern) 
+    {
+        New-Item -ItemType file -Path $folder\$directory -Name "$($file.BaseName).txt" -Value $text_fir_file
+    }
+}
diff --git a/examples/kohya-1-folders.ps1 b/examples/kohya-1-folders.ps1
@@ -0,0 +1,87 @@
+# This powershell script will create a model using the fine tuning dreambooth method. It will require landscape,
+# portrait and square images.
+#
+# Adjust the script to your own needs
+
+# Sylvia Ritter
+# variable values
+$pretrained_model_name_or_path = "D:\models\v1-5-pruned-mse-vae.ckpt"
+$data_dir = "D:\test\squat"
+$train_dir = "D:\test\"
+$resolution = "512,512"
+
+$image_num = Get-ChildItem $data_dir -Recurse -File -Include *.png | Measure-Object | %{$_.Count}
+
+Write-Output "image_num: $image_num"
+
+$learning_rate = 1e-6
+$dataset_repeats = 40
+$train_batch_size = 8
+$epoch = 1
+$save_every_n_epochs=1
+$mixed_precision="fp16"
+$num_cpu_threads_per_process=6
+
+# You should not have to change values past this point
+
+$output_dir = $train_dir + "\model"
+$repeats = $image_num * $dataset_repeats
+$mts = [Math]::Ceiling($repeats / $train_batch_size * $epoch)
+
+Write-Output "Repeats: $repeats"
+
+.\venv\Scripts\activate
+
+accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed-ber.py `
+    --pretrained_model_name_or_path=$pretrained_model_name_or_path `
+    --train_data_dir=$data_dir `
+    --output_dir=$output_dir `
+    --resolution=$resolution `
+    --train_batch_size=$train_batch_size `
+    --learning_rate=$learning_rate `
+    --max_train_steps=$mts `
+    --use_8bit_adam `
+    --xformers `
+    --mixed_precision=$mixed_precision `
+    --cache_latents `
+    --save_every_n_epochs=$save_every_n_epochs `
+    --fine_tuning `
+    --dataset_repeats=$dataset_repeats `
+    --save_precision="fp16"
+
+# 2nd pass at half the dataset repeat value
+
+accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
+    --pretrained_model_name_or_path=$output_dir"\last.ckpt" `
+    --train_data_dir=$data_dir `
+    --output_dir=$output_dir"2" `
+    --resolution=$resolution `
+    --train_batch_size=$train_batch_size `
+    --learning_rate=$learning_rate `
+    --max_train_steps=$([Math]::Ceiling($mts/2)) `
+    --use_8bit_adam `
+    --xformers `
+    --mixed_precision=$mixed_precision `
+    --cache_latents `
+    --save_every_n_epochs=$save_every_n_epochs `
+    --fine_tuning `
+    --dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
+    --save_precision="fp16"
+
+    accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed-ber.py `
+    --pretrained_model_name_or_path=$output_dir"\last.ckpt" `
+    --train_data_dir=$data_dir `
+    --output_dir=$output_dir"2" `
+    --resolution=$resolution `
+    --train_batch_size=$train_batch_size `
+    --learning_rate=$learning_rate `
+    --max_train_steps=$mts `
+    --use_8bit_adam `
+    --xformers `
+    --mixed_precision=$mixed_precision `
+    --cache_latents `
+    --save_every_n_epochs=$save_every_n_epochs `
+    --fine_tuning `
+    --dataset_repeats=$dataset_repeats `
+    --save_precision="fp16"
+
diff --git a/examples/kohya-3-folders.ps1 b/examples/kohya-3-folders.ps1
@@ -0,0 +1,154 @@
+# This powershell script will create a model using the fine tuning dreambooth method. It will require landscape,
+# portrait and square images.
+#
+# Adjust the script to your own needs
+
+# Sylvia Ritter
+# variable values
+$pretrained_model_name_or_path = "D:\models\v1-5-pruned-mse-vae.ckpt"
+$train_dir = "D:\dreambooth\train_sylvia_ritter\raw_data"
+
+$landscape_image_num = 4
+$portrait_image_num = 25
+$square_image_num = 2
+
+$learning_rate = 1e-6
+$dataset_repeats = 120
+$train_batch_size = 4
+$epoch = 1
+$save_every_n_epochs=1
+$mixed_precision="fp16"
+$num_cpu_threads_per_process=6
+
+$landscape_folder_name = "landscape-pp"
+$landscape_resolution = "832,512"
+$portrait_folder_name = "portrait-pp"
+$portrait_resolution = "448,896"
+$square_folder_name = "square-pp"
+$square_resolution = "512,512"
+
+# You should not have to change values past this point
+
+$landscape_data_dir = $train_dir + "\" + $landscape_folder_name
+$portrait_data_dir = $train_dir + "\" + $portrait_folder_name
+$square_data_dir = $train_dir + "\" + $square_folder_name
+$landscape_output_dir = $train_dir + "\model-l"
+$portrait_output_dir = $train_dir + "\model-lp"
+$square_output_dir = $train_dir + "\model-lps"
+
+$landscape_repeats = $landscape_image_num * $dataset_repeats
+$portrait_repeats = $portrait_image_num * $dataset_repeats
+$square_repeats = $square_image_num * $dataset_repeats
+
+$landscape_mts = [Math]::Ceiling($landscape_repeats / $train_batch_size * $epoch)
+$portrait_mts = [Math]::Ceiling($portrait_repeats / $train_batch_size * $epoch)
+$square_mts = [Math]::Ceiling($square_repeats / $train_batch_size * $epoch)
+
+# Write-Output $landscape_repeats
+
+.\venv\Scripts\activate
+
+accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
+    --pretrained_model_name_or_path=$pretrained_model_name_or_path `
+    --train_data_dir=$landscape_data_dir `
+    --output_dir=$landscape_output_dir `
+    --resolution=$landscape_resolution `
+    --train_batch_size=$train_batch_size `
+    --learning_rate=$learning_rate `
+    --max_train_steps=$landscape_mts `
+    --use_8bit_adam `
+    --xformers `
+    --mixed_precision=$mixed_precision `
+    --cache_latents `
+    --save_every_n_epochs=$save_every_n_epochs `
+    --fine_tuning `
+    --dataset_repeats=$dataset_repeats `
+    --save_precision="fp16"
+
+accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
+    --pretrained_model_name_or_path=$landscape_output_dir"\last.ckpt" `
+    --train_data_dir=$portrait_data_dir `
+    --output_dir=$portrait_output_dir `
+    --resolution=$portrait_resolution `
+    --train_batch_size=$train_batch_size `
+    --learning_rate=$learning_rate `
+    --max_train_steps=$portrait_mts `
+    --use_8bit_adam `
+    --xformers `
+    --mixed_precision=$mixed_precision `
+    --cache_latents `
+    --save_every_n_epochs=$save_every_n_epochs `
+    --fine_tuning `
+    --dataset_repeats=$dataset_repeats `
+    --save_precision="fp16"
+
+accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
+    --pretrained_model_name_or_path=$portrait_output_dir"\last.ckpt" `
+    --train_data_dir=$square_data_dir `
+    --output_dir=$square_output_dir `
+    --resolution=$square_resolution `
+    --train_batch_size=$train_batch_size `
+    --learning_rate=$learning_rate `
+    --max_train_steps=$square_mts `
+    --use_8bit_adam `
+    --xformers `
+    --mixed_precision=$mixed_precision `
+    --cache_latents `
+    --save_every_n_epochs=$save_every_n_epochs `
+    --fine_tuning `
+    --dataset_repeats=$dataset_repeats `
+    --save_precision="fp16"
+
+# 2nd pass at half the dataset repeat value
+
+accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
+    --pretrained_model_name_or_path=$square_output_dir"\last.ckpt" `
+    --train_data_dir=$landscape_data_dir `
+    --output_dir=$landscape_output_dir"2" `
+    --resolution=$landscape_resolution `
+    --train_batch_size=$train_batch_size `
+    --learning_rate=$learning_rate `
+    --max_train_steps=$([Math]::Ceiling($landscape_mts/2)) `
+    --use_8bit_adam `
+    --xformers `
+    --mixed_precision=$mixed_precision `
+    --cache_latents `
+    --save_every_n_epochs=$save_every_n_epochs `
+    --fine_tuning `
+    --dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
+    --save_precision="fp16"
+
+accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
+    --pretrained_model_name_or_path=$landscape_output_dir"2\last.ckpt" `
+    --train_data_dir=$portrait_data_dir `
+    --output_dir=$portrait_output_dir"2" `
+    --resolution=$portrait_resolution `
+    --train_batch_size=$train_batch_size `
+    --learning_rate=$learning_rate `
+    --max_train_steps=$([Math]::Ceiling($portrait_mts/2)) `
+    --use_8bit_adam `
+    --xformers `
+    --mixed_precision=$mixed_precision `
+    --cache_latents `
+    --save_every_n_epochs=$save_every_n_epochs `
+    --fine_tuning `
+    --dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
+    --save_precision="fp16"
+
+accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
+    --pretrained_model_name_or_path=$portrait_output_dir"2\last.ckpt" `
+    --train_data_dir=$square_data_dir `
+    --output_dir=$square_output_dir"2" `
+    --resolution=$square_resolution `
+    --train_batch_size=$train_batch_size `
+    --learning_rate=$learning_rate `
+    --max_train_steps=$([Math]::Ceiling($square_mts/2)) `
+    --use_8bit_adam `
+    --xformers `
+    --mixed_precision=$mixed_precision `
+    --cache_latents `
+    --save_every_n_epochs=$save_every_n_epochs `
+    --fine_tuning `
+    --dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
+    --save_precision="fp16"
+
diff --git a/examples/kohya_diffuser.ps1 b/examples/kohya_diffuser.ps1
@@ -55,7 +55,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\
     --use_8bit_adam --xformers `
     --mixed_precision=$mixed_precision `
     --save_every_n_epochs=$save_every_n_epochs `
-    --save_half
+    --save_precision="fp16"
 
 accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\kohya_ss\diffusers_fine_tuning\fine_tune.py `
     --pretrained_model_name_or_path=$train_dir"\fine_tuned\last.ckpt" `
@@ -69,4 +69,4 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\
     --use_8bit_adam --xformers `
     --mixed_precision=$mixed_precision `
     --save_every_n_epochs=$save_every_n_epochs `
-    --save_half
+    --save_precision="fp16"