Skip to content

Commit

Permalink
Gradio Simplification (#344)
Browse files Browse the repository at this point in the history
* add version.json for future proofing

* musicgen remove send to demucs

* improve default magnet params on gradio

* tortoise clear model before loading the next one

* add unload tortoise model function

* remove Joutai

* reduce max bark outputs to 1

* add Unload model button to tortoise
  • Loading branch information
rsxdalv authored Jul 15, 2024
1 parent c8fbce7 commit ff9987d
Show file tree
Hide file tree
Showing 21 changed files with 144 additions and 311 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ List of models: Bark, MusicGen + AudioGen, Tortoise, RVC, Vocos, Demucs, Seamles

## Changelog

July 15:
* Comment - As the React UI has been out for a long time now, Gradio UI is going to have the role of serving only the functions to the user, without the extremely complicated UI that it cannot handle. There is a real shortage of development time to add new models and features, but the old style of integration was not viable. As the new APIs and 'the role of the model' is defined, it will be possible to have extensions for entire models, enabling a lot more flexibility and lighter installations.
* Start scaling back Gradio UI complexity - removed _send to RVC/Demucs/Voice_ buttons. (Remove internal component Joutai).
* Add version.json for better updates in the future.
* Reduce Gradio Bark maximum number of outputs to 1.
* Add unload model button to Tortoise, also unload the model before loading the next one/changing parameters, thus tortoise no longer uses 2x model memory during the settings change.

July 14:
* Regroup Gradio tabs into groups - Text to Speech, Audio Conversion, Music Generation, Outputs and Settings
* Clean up the header, add link for feedback
Expand Down
20 changes: 10 additions & 10 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@

setup_or_recover.dummy()
dotenv_init.init()
# required for proper rendering due to gr.* methods
from src.Joutai import Joutai


def reload_config_and_restart_ui():
os._exit(0)
Expand All @@ -36,7 +35,7 @@ def reload_config_and_restart_ui():
)


def generic_error_tab(e, name="SeamlessM4Tv2Model", id="seamless"):
def generic_error_tab(e, name="", id=""):
with gr.Tab(name + " (!)", id=id):
gr.Markdown(f"""Failed to load {name} tab. Please check your configuration.""")
gr.Markdown(f"""Error: {e}""")
Expand All @@ -49,20 +48,20 @@ def generic_error_tab(e, name="SeamlessM4Tv2Model", id="seamless"):
) as demo:
gr.Markdown(
"""
# TTS Generation WebUI [React UI](http://localhost:3000) [Feedback / Bug reports](https://forms.gle/2L62owhBsGFzdFBC8)
# TTS Generation WebUI (Legacy - Gradio) [React UI](http://localhost:3000) [Feedback / Bug reports](https://forms.gle/2L62owhBsGFzdFBC8)
### _(Text To Speech, Audio & Music Generation, Conversion)_
"""
)
with gr.Tabs():
with gr.Tab("Text-to-Speech"), gr.Tabs():
from src.bark.generation_tab_bark import generation_tab_bark

register_use_as_history_button = generation_tab_bark()
generation_tab_bark()

try:
from src.bark.clone.tab_voice_clone import tab_voice_clone

tab_voice_clone(register_use_as_history_button)
tab_voice_clone()
except Exception as e:
from src.bark.clone.tab_voice_clone_error import tab_voice_clone_error

Expand Down Expand Up @@ -188,16 +187,15 @@ def generic_error_tab(e, name="SeamlessM4Tv2Model", id="seamless"):
from src.history_tab.main import history_tab

collections_directories_atom.render()
history_tab(register_use_as_history_button)
history_tab(register_use_as_history_button, directory="favorites")
history_tab()
history_tab(directory="favorites")
history_tab(
register_use_as_history_button,
directory="outputs",
show_collections=True,
)
from src.history_tab.voices_tab import voices_tab

voices_tab(register_use_as_history_button)
voices_tab()

with gr.Tab("Settings"), gr.Tabs():
from src.settings_tab_gradio import settings_tab_gradio
Expand All @@ -213,8 +211,10 @@ def generic_error_tab(e, name="SeamlessM4Tv2Model", id="seamless"):

model_location_settings_tab()
from src.utils.gpu_info_tab import gpu_info_tab

gpu_info_tab()
from src.utils.pip_list_tab import pip_list_tab

pip_list_tab()

# from src.studio.studio_tab import simple_remixer_tab
Expand Down
43 changes: 2 additions & 41 deletions src/Joutai.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,52 +3,13 @@

class Joutai:
def __init__(self):
# self.tabs = gr.Tabs()
self.remixer_input = gr.Audio(label="Input Audio")
self.rvc_input = gr.Audio(label="Original Audio", type="filepath")
self.demucs_input = gr.Audio(label="Input", type="filepath")
self.vocos_input_npz = gr.File(
label="Input NPZ", file_types=[".npz"], interactive=True
)

def send_to_remixer(self, **kwargs):
remixer_input = self.remixer_input
return {
"fn": lambda x: remixer_input.update(value=x),
"outputs": [remixer_input],
**kwargs,
}

def sent_to_rvc(self, **kwargs):
rvc_input = self.rvc_input
return {
"fn": lambda x: rvc_input.update(value=x),
"outputs": [rvc_input],
**kwargs,
}

def send_to_demucs(self, **kwargs):
demucs_input = self.demucs_input
return {
"fn": lambda x: demucs_input.update(value=x),
"outputs": [demucs_input],
**kwargs,
}

def send_to_vocos_npz(self, **kwargs):
vocos_input_npz = self.vocos_input_npz
return {
"fn": lambda x: vocos_input_npz.update(value=x),
"outputs": [vocos_input_npz],
**kwargs,
}
self.a = 1

def switch_to_tab(self, tab: str):
def empty_fn():
pass

return {
# "fn": lambda: gr.Tabs.update(selected=tab),
# "outputs": self.tabs,
"fn": empty_fn,
"outputs": [],
}
Expand Down
13 changes: 3 additions & 10 deletions src/bark/clone/tab_voice_clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def generate_cloned_voice_metadata(full_generation, date):
}


def tab_voice_clone(register_use_as_history_button):
def tab_voice_clone():
with gr.Tab("Bark Voice Clone"), gr.Row(equal_height=False):
with gr.Column():
gr.Markdown(
Expand Down Expand Up @@ -235,9 +235,7 @@ def load_tokenizer(tokenizer_and_repo: str, use_gpu: bool):

audio_preview = gr.Audio(label="Encodec audio preview")

use_as_history_button = gr.Button(
value="Use as history", variant="secondary"
)
gr.Markdown("Use as history button is now only available in React UI")

def generate_voice(wav_file: str, use_gpu: bool):
full_generation = get_prompts(wav_file, use_gpu)
Expand All @@ -253,13 +251,8 @@ def generate_voice(wav_file: str, use_gpu: bool):
api_name="bark_voice_generate",
)

register_use_as_history_button(
use_as_history_button,
voice_file_name,
)


if __name__ == "__main__":
with gr.Blocks() as demo:
tab_voice_clone(lambda *args: None)
tab_voice_clone()
demo.launch()
52 changes: 1 addition & 51 deletions src/bark/generation_tab_bark.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,7 @@ def update_max_length(value):
history_prompt_semantic_dropdown,
]

MAX_OUTPUTS = 9
MAX_OUTPUTS = 1

with gr.Row():
output_components, output_cols, seeds = map(
Expand Down Expand Up @@ -716,18 +716,6 @@ def generate_button(text, count, variant):
outputs=[seed_input],
)

def register_use_as_history_button(button, source):
button.click(
fn=lambda value: {
old_generation_dropdown: value,
history_setting: HistorySettings.NPZ_FILE,
},
inputs=[source],
outputs=[old_generation_dropdown, history_setting],
).then(**Joutai.singleton.switch_to_tab(tab="generation_bark"))

return register_use_as_history_button


def old_generation_dropdown_ui(label):
with gr.Row():
Expand Down Expand Up @@ -824,34 +812,6 @@ def create_components(
with gr.Row(visible=False) as buttons_row:
save_button = gr.Button("Save", size="sm")
reuse_seed_button = gr.Button("Seed", size="sm")
gr.Button("Remix", size="sm").click(
**Joutai.singleton.send_to_remixer(
inputs=[audio],
)
).then(
**Joutai.singleton.switch_to_tab(
tab="simple_remixer",
)
)
gr.Button("RVC", size="sm").click(
**Joutai.singleton.sent_to_rvc(
inputs=[audio],
)
).then(
**Joutai.singleton.switch_to_tab(
tab="rvc_tab",
)
)
gr.Button("Demucs", size="sm").click(
**Joutai.singleton.send_to_demucs(
inputs=[audio],
)
).then(
**Joutai.singleton.switch_to_tab(
tab="demucs",
)
)
send_to_vocos_button = gr.Button("Vocos", size="sm")
continue_button = gr.Button("Use as history", size="sm")
continue_semantic_button = gr.Button("Use as semantic history", size="sm")
npz = gr.Textbox(
Expand All @@ -878,16 +838,6 @@ def create_components(
outputs=[seed_input],
)

send_to_vocos_button.click(
**Joutai.singleton.send_to_vocos_npz(
inputs=[npz],
)
).then(
**Joutai.singleton.switch_to_tab(
tab="vocos",
)
)

continue_button.click(
fn=insert_npz_file,
inputs=[npz],
Expand Down
4 changes: 1 addition & 3 deletions src/demucs/demucs_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from demucs import pretrained
from demucs.apply import apply_model
from demucs.audio import convert_audio
from src.Joutai import Joutai


_demucs_model = None
Expand Down Expand Up @@ -59,8 +58,7 @@ def demucs_ui():
)
with gr.Row(equal_height=False):
with gr.Column():
demucs_input = Joutai.singleton.demucs_input
demucs_input.render()
demucs_input = gr.Audio(label="Input", type="filepath")
button = gr.Button("Separate")
with gr.Column():
outputs = [gr.Audio(label=label) for label in COMPONENTS]
Expand Down
19 changes: 4 additions & 15 deletions src/history_tab/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,18 @@ def save_to_voices_cb(npz_filename: str):


def history_tab(
register_use_as_history_button, directory="outputs", show_collections=False
directory="outputs", show_collections=False
):
with gr.Tab(
show_collections and "Collections" or directory.capitalize()
) as history_tab:
return history_content(
register_use_as_history_button, directory, history_tab, show_collections
directory, history_tab, show_collections
)


def history_content(
register_use_as_history_button, directory, history_tab, show_collections
directory, history_tab, show_collections
):
directories = get_collections()
directory_dropdown = gr.Dropdown(
Expand Down Expand Up @@ -125,11 +125,7 @@ def history_content(
save_to_favorites_history = gr.Button(
value="Save to favorites", variant="primary", visible=False
)
use_as_voice = gr.Button(
value="Use as voice",
variant=directory == "favorites" and "primary" or "secondary",
visible=False,
)
gr.Markdown("""Use as voice button is now only available in React UI""")
save_to_voices = gr.Button(
value="Save to voices", variant="secondary", visible=False
)
Expand All @@ -144,11 +140,6 @@ def history_content(
outputs=save_to_favorites_history,
)

register_use_as_history_button(
use_as_voice,
history_npz,
)

save_to_voices.click(
fn=save_to_voices_cb,
inputs=history_npz,
Expand All @@ -175,7 +166,6 @@ def _select_audio_history(filename: str, json_text):
save_to_favorites_history: gr.Button.update(
visible=directory != "favorites", value="Save to favorites"
),
use_as_voice: gr.Button.update(visible=True, value="Use as voice"),
save_to_voices: gr.Button.update(visible=True, value="Save to voices"),
open_folder_button: gr.Button.update(visible=True),
}
Expand All @@ -195,7 +185,6 @@ def select_audio_history2(_list, evt: gr.SelectData, table):
history_npz,
delete_from_history,
save_to_favorites_history,
use_as_voice,
save_to_voices,
open_folder_button,
]
Expand Down
17 changes: 7 additions & 10 deletions src/history_tab/voices_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def update_voices_tab():
return gr.List.update(value=get_npz_files_voices())


def voices_tab(register_use_as_history_button, directory="voices"):
def voices_tab(directory="voices"):
with gr.Tab(directory.capitalize()) as voices_tab, gr.Row(equal_height=False):
with gr.Column():
with gr.Accordion("Gallery Selector (Click to Open)", open=False):
Expand Down Expand Up @@ -69,7 +69,7 @@ def voices_tab(register_use_as_history_button, directory="voices"):
with gr.Row():
rename_voice_button = gr.Button(value="Rename voice")
delete_voice_button = gr.Button(value="Delete voice", variant="stop")
use_voice_button = gr.Button(value="Use voice", variant="primary")
gr.Markdown("""Use voice button is now only available in React UI""")

metadata = gr.JSON(label="Metadata")
metadata_input = edit_metadata_ui(voice_file_name, metadata)
Expand Down Expand Up @@ -163,10 +163,6 @@ def crop_voice(voice_file_name, audio_in):
inputs=[voice_file_name, new_voice_file_name],
outputs=[rename_voice_button, voices_list, voice_file_name],
)
register_use_as_history_button(
use_voice_button,
voice_file_name,
)
delete_voice_button.click(
fn=delete_voice,
inputs=[voice_file_name],
Expand Down Expand Up @@ -239,13 +235,14 @@ def select(_list_data, evt: gr.SelectData):
def select_gallery(_list_data, evt: gr.SelectData):
def get_gallery_file_selection(_gallery_data, evt: gr.SelectData):
selected_image = _gallery_data[evt.index]
image_path = selected_image['name']
image_path = selected_image["name"]
import os

image_name = os.path.basename(image_path)
return image_name.replace('.png', '')
return image_name.replace(".png", "")

filename_base = get_gallery_file_selection(_list_data, evt)
return select_filename(f'voices/{filename_base}.npz')
return select_filename(f"voices/{filename_base}.npz")

history_list_as_gallery.select(
fn=select_gallery, inputs=[history_list_as_gallery], outputs=outputs
Expand Down
Loading

0 comments on commit ff9987d

Please sign in to comment.