Gradio Simplification (#344)

* add version.json for future proofing * musicgen remove send to demucs * improve default magnet params on gradio * tortoise clear model before loading the next one * add unload tortoise model function * remove Joutai * reduce max bark outputs to 1 * add Unload model button to tortoise
rsxdalv · Jul 15, 2024 · ff9987d · ff9987d
1 parent c8fbce7
commit ff9987d
Show file tree

Hide file tree

Showing 21 changed files with 144 additions and 311 deletions.
diff --git a/README.md b/README.md
@@ -18,6 +18,13 @@ List of models: Bark, MusicGen + AudioGen, Tortoise, RVC, Vocos, Demucs, Seamles
 
 ## Changelog
 
+July 15:
+* Comment - As the React UI has been out for a long time now, Gradio UI is going to have the role of serving only the functions to the user, without the extremely complicated UI that it cannot handle. There is a real shortage of development time to add new models and features, but the old style of integration was not viable. As the new APIs and 'the role of the model' is defined, it will be possible to have extensions for entire models, enabling a lot more flexibility and lighter installations.
+* Start scaling back Gradio UI complexity - removed _send to RVC/Demucs/Voice_ buttons. (Remove internal component Joutai).
+* Add version.json for better updates in the future.
+* Reduce Gradio Bark maximum number of outputs to 1.
+* Add unload model button to Tortoise, also unload the model before loading the next one/changing parameters, thus tortoise no longer uses 2x model memory during the settings change.
+
 July 14:
 * Regroup Gradio tabs into groups - Text to Speech, Audio Conversion, Music Generation, Outputs and Settings
 * Clean up the header, add link for feedback

diff --git a/server.py b/server.py
@@ -16,8 +16,7 @@
 
 setup_or_recover.dummy()
 dotenv_init.init()
-# required for proper rendering due to gr.* methods
-from src.Joutai import Joutai
+
 
 def reload_config_and_restart_ui():
     os._exit(0)
@@ -36,7 +35,7 @@ def reload_config_and_restart_ui():
 )
 
 
-def generic_error_tab(e, name="SeamlessM4Tv2Model", id="seamless"):
+def generic_error_tab(e, name="", id=""):
     with gr.Tab(name + " (!)", id=id):
         gr.Markdown(f"""Failed to load {name} tab. Please check your configuration.""")
         gr.Markdown(f"""Error: {e}""")
@@ -49,20 +48,20 @@ def generic_error_tab(e, name="SeamlessM4Tv2Model", id="seamless"):
 ) as demo:
     gr.Markdown(
         """
-# TTS Generation WebUI [React UI](http://localhost:3000) [Feedback / Bug reports](https://forms.gle/2L62owhBsGFzdFBC8)
+# TTS Generation WebUI (Legacy - Gradio) [React UI](http://localhost:3000) [Feedback / Bug reports](https://forms.gle/2L62owhBsGFzdFBC8)
 ### _(Text To Speech, Audio & Music Generation, Conversion)_
 """
     )
     with gr.Tabs():
         with gr.Tab("Text-to-Speech"), gr.Tabs():
             from src.bark.generation_tab_bark import generation_tab_bark
 
-            register_use_as_history_button = generation_tab_bark()
+            generation_tab_bark()
 
             try:
                 from src.bark.clone.tab_voice_clone import tab_voice_clone
 
-                tab_voice_clone(register_use_as_history_button)
+                tab_voice_clone()
             except Exception as e:
                 from src.bark.clone.tab_voice_clone_error import tab_voice_clone_error
 
@@ -188,16 +187,15 @@ def generic_error_tab(e, name="SeamlessM4Tv2Model", id="seamless"):
             from src.history_tab.main import history_tab
 
             collections_directories_atom.render()
-            history_tab(register_use_as_history_button)
-            history_tab(register_use_as_history_button, directory="favorites")
+            history_tab()
+            history_tab(directory="favorites")
             history_tab(
-                register_use_as_history_button,
                 directory="outputs",
                 show_collections=True,
             )
             from src.history_tab.voices_tab import voices_tab
 
-            voices_tab(register_use_as_history_button)
+            voices_tab()
 
         with gr.Tab("Settings"), gr.Tabs():
             from src.settings_tab_gradio import settings_tab_gradio
@@ -213,8 +211,10 @@ def generic_error_tab(e, name="SeamlessM4Tv2Model", id="seamless"):
 
             model_location_settings_tab()
             from src.utils.gpu_info_tab import gpu_info_tab
+
             gpu_info_tab()
             from src.utils.pip_list_tab import pip_list_tab
+
             pip_list_tab()
 
         # from src.studio.studio_tab import simple_remixer_tab

diff --git a/src/Joutai.py b/src/Joutai.py
@@ -3,52 +3,13 @@
 
 class Joutai:
     def __init__(self):
-        # self.tabs = gr.Tabs()
-        self.remixer_input = gr.Audio(label="Input Audio")
-        self.rvc_input = gr.Audio(label="Original Audio", type="filepath")
-        self.demucs_input = gr.Audio(label="Input", type="filepath")
-        self.vocos_input_npz = gr.File(
-            label="Input NPZ", file_types=[".npz"], interactive=True
-        )
-
-    def send_to_remixer(self, **kwargs):
-        remixer_input = self.remixer_input
-        return {
-            "fn": lambda x: remixer_input.update(value=x),
-            "outputs": [remixer_input],
-            **kwargs,
-        }
-
-    def sent_to_rvc(self, **kwargs):
-        rvc_input = self.rvc_input
-        return {
-            "fn": lambda x: rvc_input.update(value=x),
-            "outputs": [rvc_input],
-            **kwargs,
-        }
-
-    def send_to_demucs(self, **kwargs):
-        demucs_input = self.demucs_input
-        return {
-            "fn": lambda x: demucs_input.update(value=x),
-            "outputs": [demucs_input],
-            **kwargs,
-        }
-
-    def send_to_vocos_npz(self, **kwargs):
-        vocos_input_npz = self.vocos_input_npz
-        return {
-            "fn": lambda x: vocos_input_npz.update(value=x),
-            "outputs": [vocos_input_npz],
-            **kwargs,
-        }
+        self.a = 1
 
     def switch_to_tab(self, tab: str):
         def empty_fn():
             pass
+
         return {
-            # "fn": lambda: gr.Tabs.update(selected=tab),
-            # "outputs": self.tabs,
             "fn": empty_fn,
             "outputs": [],
         }

diff --git a/src/bark/clone/tab_voice_clone.py b/src/bark/clone/tab_voice_clone.py
@@ -133,7 +133,7 @@ def generate_cloned_voice_metadata(full_generation, date):
     }
 
 
-def tab_voice_clone(register_use_as_history_button):
+def tab_voice_clone():
     with gr.Tab("Bark Voice Clone"), gr.Row(equal_height=False):
         with gr.Column():
             gr.Markdown(
@@ -235,9 +235,7 @@ def load_tokenizer(tokenizer_and_repo: str, use_gpu: bool):
 
             audio_preview = gr.Audio(label="Encodec audio preview")
 
-            use_as_history_button = gr.Button(
-                value="Use as history", variant="secondary"
-            )
+            gr.Markdown("Use as history button is now only available in React UI")
 
         def generate_voice(wav_file: str, use_gpu: bool):
             full_generation = get_prompts(wav_file, use_gpu)
@@ -253,13 +251,8 @@ def generate_voice(wav_file: str, use_gpu: bool):
             api_name="bark_voice_generate",
         )
 
-        register_use_as_history_button(
-            use_as_history_button,
-            voice_file_name,
-        )
-
 
 if __name__ == "__main__":
     with gr.Blocks() as demo:
-        tab_voice_clone(lambda *args: None)
+        tab_voice_clone()
     demo.launch()
diff --git a/src/bark/generation_tab_bark.py b/src/bark/generation_tab_bark.py
@@ -653,7 +653,7 @@ def update_max_length(value):
             history_prompt_semantic_dropdown,
         ]
 
-        MAX_OUTPUTS = 9
+        MAX_OUTPUTS = 1
 
         with gr.Row():
             output_components, output_cols, seeds = map(
@@ -716,18 +716,6 @@ def generate_button(text, count, variant):
             outputs=[seed_input],
         )
 
-    def register_use_as_history_button(button, source):
-        button.click(
-            fn=lambda value: {
-                old_generation_dropdown: value,
-                history_setting: HistorySettings.NPZ_FILE,
-            },
-            inputs=[source],
-            outputs=[old_generation_dropdown, history_setting],
-        ).then(**Joutai.singleton.switch_to_tab(tab="generation_bark"))
-
-    return register_use_as_history_button
-
 
 def old_generation_dropdown_ui(label):
     with gr.Row():
@@ -824,34 +812,6 @@ def create_components(
         with gr.Row(visible=False) as buttons_row:
             save_button = gr.Button("Save", size="sm")
             reuse_seed_button = gr.Button("Seed", size="sm")
-            gr.Button("Remix", size="sm").click(
-                **Joutai.singleton.send_to_remixer(
-                    inputs=[audio],
-                )
-            ).then(
-                **Joutai.singleton.switch_to_tab(
-                    tab="simple_remixer",
-                )
-            )
-            gr.Button("RVC", size="sm").click(
-                **Joutai.singleton.sent_to_rvc(
-                    inputs=[audio],
-                )
-            ).then(
-                **Joutai.singleton.switch_to_tab(
-                    tab="rvc_tab",
-                )
-            )
-            gr.Button("Demucs", size="sm").click(
-                **Joutai.singleton.send_to_demucs(
-                    inputs=[audio],
-                )
-            ).then(
-                **Joutai.singleton.switch_to_tab(
-                    tab="demucs",
-                )
-            )
-            send_to_vocos_button = gr.Button("Vocos", size="sm")
             continue_button = gr.Button("Use as history", size="sm")
             continue_semantic_button = gr.Button("Use as semantic history", size="sm")
         npz = gr.Textbox(
@@ -878,16 +838,6 @@ def create_components(
             outputs=[seed_input],
         )
 
-        send_to_vocos_button.click(
-            **Joutai.singleton.send_to_vocos_npz(
-                inputs=[npz],
-            )
-        ).then(
-            **Joutai.singleton.switch_to_tab(
-                tab="vocos",
-            )
-        )
-
         continue_button.click(
             fn=insert_npz_file,
             inputs=[npz],

diff --git a/src/demucs/demucs_tab.py b/src/demucs/demucs_tab.py
@@ -4,7 +4,6 @@
 from demucs import pretrained
 from demucs.apply import apply_model
 from demucs.audio import convert_audio
-from src.Joutai import Joutai
 
 
 _demucs_model = None
@@ -59,8 +58,7 @@ def demucs_ui():
     )
     with gr.Row(equal_height=False):
         with gr.Column():
-            demucs_input = Joutai.singleton.demucs_input
-            demucs_input.render()
+            demucs_input = gr.Audio(label="Input", type="filepath")
             button = gr.Button("Separate")
         with gr.Column():
             outputs = [gr.Audio(label=label) for label in COMPONENTS]

diff --git a/src/history_tab/main.py b/src/history_tab/main.py
@@ -49,18 +49,18 @@ def save_to_voices_cb(npz_filename: str):
 
 
 def history_tab(
-    register_use_as_history_button, directory="outputs", show_collections=False
+    directory="outputs", show_collections=False
 ):
     with gr.Tab(
         show_collections and "Collections" or directory.capitalize()
     ) as history_tab:
         return history_content(
-            register_use_as_history_button, directory, history_tab, show_collections
+            directory, history_tab, show_collections
         )
 
 
 def history_content(
-    register_use_as_history_button, directory, history_tab, show_collections
+    directory, history_tab, show_collections
 ):
     directories = get_collections()
     directory_dropdown = gr.Dropdown(
@@ -125,11 +125,7 @@ def history_content(
                 save_to_favorites_history = gr.Button(
                     value="Save to favorites", variant="primary", visible=False
                 )
-                use_as_voice = gr.Button(
-                    value="Use as voice",
-                    variant=directory == "favorites" and "primary" or "secondary",
-                    visible=False,
-                )
+                gr.Markdown("""Use as voice button is now only available in React UI""")
                 save_to_voices = gr.Button(
                     value="Save to voices", variant="secondary", visible=False
                 )
@@ -144,11 +140,6 @@ def history_content(
                     outputs=save_to_favorites_history,
                 )
 
-                register_use_as_history_button(
-                    use_as_voice,
-                    history_npz,
-                )
-
                 save_to_voices.click(
                     fn=save_to_voices_cb,
                     inputs=history_npz,
@@ -175,7 +166,6 @@ def _select_audio_history(filename: str, json_text):
             save_to_favorites_history: gr.Button.update(
                 visible=directory != "favorites", value="Save to favorites"
             ),
-            use_as_voice: gr.Button.update(visible=True, value="Use as voice"),
             save_to_voices: gr.Button.update(visible=True, value="Save to voices"),
             open_folder_button: gr.Button.update(visible=True),
         }
@@ -195,7 +185,6 @@ def select_audio_history2(_list, evt: gr.SelectData, table):
         history_npz,
         delete_from_history,
         save_to_favorites_history,
-        use_as_voice,
         save_to_voices,
         open_folder_button,
     ]

diff --git a/src/history_tab/voices_tab.py b/src/history_tab/voices_tab.py
@@ -18,7 +18,7 @@ def update_voices_tab():
     return gr.List.update(value=get_npz_files_voices())
 
 
-def voices_tab(register_use_as_history_button, directory="voices"):
+def voices_tab(directory="voices"):
     with gr.Tab(directory.capitalize()) as voices_tab, gr.Row(equal_height=False):
         with gr.Column():
             with gr.Accordion("Gallery Selector (Click to Open)", open=False):
@@ -69,7 +69,7 @@ def voices_tab(register_use_as_history_button, directory="voices"):
             with gr.Row():
                 rename_voice_button = gr.Button(value="Rename voice")
                 delete_voice_button = gr.Button(value="Delete voice", variant="stop")
-                use_voice_button = gr.Button(value="Use voice", variant="primary")
+                gr.Markdown("""Use voice button is now only available in React UI""")
 
             metadata = gr.JSON(label="Metadata")
             metadata_input = edit_metadata_ui(voice_file_name, metadata)
@@ -163,10 +163,6 @@ def crop_voice(voice_file_name, audio_in):
         inputs=[voice_file_name, new_voice_file_name],
         outputs=[rename_voice_button, voices_list, voice_file_name],
     )
-    register_use_as_history_button(
-        use_voice_button,
-        voice_file_name,
-    )
     delete_voice_button.click(
         fn=delete_voice,
         inputs=[voice_file_name],
@@ -239,13 +235,14 @@ def select(_list_data, evt: gr.SelectData):
     def select_gallery(_list_data, evt: gr.SelectData):
         def get_gallery_file_selection(_gallery_data, evt: gr.SelectData):
             selected_image = _gallery_data[evt.index]
-            image_path = selected_image['name']
+            image_path = selected_image["name"]
             import os
+
             image_name = os.path.basename(image_path)
-            return image_name.replace('.png', '')
-        
+            return image_name.replace(".png", "")
+
         filename_base = get_gallery_file_selection(_list_data, evt)
-        return select_filename(f'voices/{filename_base}.npz')
+        return select_filename(f"voices/{filename_base}.npz")
 
     history_list_as_gallery.select(
         fn=select_gallery, inputs=[history_list_as_gallery], outputs=outputs