From cfe4247537469455c052636ec985307c3d6499cf Mon Sep 17 00:00:00 2001 From: freddyaboulton Date: Thu, 12 Sep 2024 12:07:33 -0400 Subject: [PATCH 01/38] guides --- .../01_streaming-ai-generated-audio.md} | 0 .../02_object-detection-from-webcam.md | 93 +++++++++++++++++++ .../03_real-time-speech-recognition.md} | 0 .../01_custom-components-in-five-minutes.md | 0 .../02_key-component-concepts.md | 0 .../03_configuration.md | 0 .../04_backend.md | 0 .../05_frontend.md | 0 .../06_frequently-asked-questions.md | 0 .../07_pdf-component-example.md | 0 .../08_multimodal-chatbot-part1.md | 0 .../09_documenting-custom-components.md | 0 ..._getting-started-with-the-python-client.md | 0 .../02_getting-started-with-the-js-client.md | 0 .../03_querying-gradio-apps-with-curl.md | 0 .../04_gradio-and-llm-agents.md | 0 .../05_gradio-lite.md | 0 .../06_gradio-lite-and-transformers-js.md | 0 .../07_fastapi-app-with-the-gradio-client.md | 0 .../01_using-hugging-face-integrations.md | 0 .../Gradio-and-Comet.md | 0 .../Gradio-and-ONNX-on-Hugging-Face.md | 0 .../Gradio-and-Wandb-Integration.md | 0 .../create-your-own-friends-with-a-gan.md | 0 ...creating-a-dashboard-from-bigquery-data.md | 0 ...creating-a-dashboard-from-supabase-data.md | 0 ...a-realtime-dashboard-from-google-sheets.md | 0 .../deploying-gradio-with-docker.md | 0 .../developing-faster-with-reload-mode.md | 0 .../how-to-use-3D-model-component.md | 0 .../image-classification-in-pytorch.md | 0 .../image-classification-in-tensorflow.md | 0 ...classification-with-vision-transformers.md | 0 ...talling-gradio-in-a-virtual-environment.md | 0 .../named-entity-recognition.md | 0 .../plot-component-for-maps.md | 0 .../running-background-tasks.md | 0 ...ng-gradio-on-your-web-server-with-nginx.md | 0 ...tting-up-a-demo-for-maximum-performance.md | 0 .../styling-the-gradio-dataframe.md | 0 .../theming-guide.md | 0 .../using-flagging.md | 0 .../using-gradio-for-tabular-workflows.md | 0 .../wrapping-layouts.md | 0 44 files changed, 93 insertions(+) rename guides/{09_other-tutorials/streaming-ai-generated-audio.md => 07_streaming/01_streaming-ai-generated-audio.md} (100%) create mode 100644 guides/07_streaming/02_object-detection-from-webcam.md rename guides/{09_other-tutorials/real-time-speech-recognition.md => 07_streaming/03_real-time-speech-recognition.md} (100%) rename guides/{07_custom-components => 08_custom-components}/01_custom-components-in-five-minutes.md (100%) rename guides/{07_custom-components => 08_custom-components}/02_key-component-concepts.md (100%) rename guides/{07_custom-components => 08_custom-components}/03_configuration.md (100%) rename guides/{07_custom-components => 08_custom-components}/04_backend.md (100%) rename guides/{07_custom-components => 08_custom-components}/05_frontend.md (100%) rename guides/{07_custom-components => 08_custom-components}/06_frequently-asked-questions.md (100%) rename guides/{07_custom-components => 08_custom-components}/07_pdf-component-example.md (100%) rename guides/{07_custom-components => 08_custom-components}/08_multimodal-chatbot-part1.md (100%) rename guides/{07_custom-components => 08_custom-components}/09_documenting-custom-components.md (100%) rename guides/{08_gradio-clients-and-lite => 09_gradio-clients-and-lite}/01_getting-started-with-the-python-client.md (100%) rename guides/{08_gradio-clients-and-lite => 09_gradio-clients-and-lite}/02_getting-started-with-the-js-client.md (100%) rename guides/{08_gradio-clients-and-lite => 09_gradio-clients-and-lite}/03_querying-gradio-apps-with-curl.md (100%) rename guides/{08_gradio-clients-and-lite => 09_gradio-clients-and-lite}/04_gradio-and-llm-agents.md (100%) rename guides/{08_gradio-clients-and-lite => 09_gradio-clients-and-lite}/05_gradio-lite.md (100%) rename guides/{08_gradio-clients-and-lite => 09_gradio-clients-and-lite}/06_gradio-lite-and-transformers-js.md (100%) rename guides/{08_gradio-clients-and-lite => 09_gradio-clients-and-lite}/07_fastapi-app-with-the-gradio-client.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/01_using-hugging-face-integrations.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/Gradio-and-Comet.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/Gradio-and-ONNX-on-Hugging-Face.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/Gradio-and-Wandb-Integration.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/create-your-own-friends-with-a-gan.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/creating-a-dashboard-from-bigquery-data.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/creating-a-dashboard-from-supabase-data.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/creating-a-realtime-dashboard-from-google-sheets.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/deploying-gradio-with-docker.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/developing-faster-with-reload-mode.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/how-to-use-3D-model-component.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/image-classification-in-pytorch.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/image-classification-in-tensorflow.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/image-classification-with-vision-transformers.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/installing-gradio-in-a-virtual-environment.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/named-entity-recognition.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/plot-component-for-maps.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/running-background-tasks.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/running-gradio-on-your-web-server-with-nginx.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/setting-up-a-demo-for-maximum-performance.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/styling-the-gradio-dataframe.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/theming-guide.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/using-flagging.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/using-gradio-for-tabular-workflows.md (100%) rename guides/{09_other-tutorials => 10_other-tutorials}/wrapping-layouts.md (100%) diff --git a/guides/09_other-tutorials/streaming-ai-generated-audio.md b/guides/07_streaming/01_streaming-ai-generated-audio.md similarity index 100% rename from guides/09_other-tutorials/streaming-ai-generated-audio.md rename to guides/07_streaming/01_streaming-ai-generated-audio.md diff --git a/guides/07_streaming/02_object-detection-from-webcam.md b/guides/07_streaming/02_object-detection-from-webcam.md new file mode 100644 index 0000000000000..7f97ead26cf62 --- /dev/null +++ b/guides/07_streaming/02_object-detection-from-webcam.md @@ -0,0 +1,93 @@ +# Object Detection from a Webcam Stream + +Tags: VISION, STREAMING, WEBCAM + +In this guide we'll use Yolo-v10 to do near-real time object detection in Gradio from a user's webcam. +Along the way, we'll be using the latest streaming features introduced in Gradio 5.0. + +## Setting up the Model + +First, we'll follow the installation instructions for [Yolov10n](https://huggingface.co/jameslahm/yolov10n) on the Hugging Face hub. + +Run `pip install git+https://github.com/THU-MIG/yolov10.git` in your virtual environment. + +Then, we'll download the model from the Hub (`ultralytics` is the library we've just installed). + +```python +from ultralytics import YOLOv10 + +model = YOLOv10.from_pretrained('jameslahm/yolov10n') +``` + +We are using the `yolov10-n` variant because it has the lowest latency. See the [Performance](https://github.com/THU-MIG/yolov10?tab=readme-ov-file#performance) section of the README in the github repository. + + +## The Inference Function + +Our inference function will accept a PIL image from the webcam as well as a desired conference threshold. +Object detection models like YOLO identify many objects and assign a confidence score to each object. The lower the confidence, the higher the chance of a false positive. So we will let our users play with the conference threshold. + +```python +def yolov10_inference(image, conf_threshold): + width, _ = image.size + results = model.predict(source=image, imgsz=width, conf=conf_threshold) + annotated_image = results[0].plot() + return annotated_image[:, :, ::-1] +``` + +We will use the `plot` method to draw a bounding box around each detected object. YoloV10 asses images are in the BGR color format, so we will flip them to be in the expected RGB format of web browsers. + +## The Gradio Demo + +The Gradio demo will be pretty straightforward but we'll do a couple of things that are specific to streaming: + +* The user's webcam will be both an input and an output. That way, the user will only see their stream with the detected objects. +* We'll use the `time_limit` and `stream_every` parameters of the `stream` event. The `time_limit` parameter will mean that we'll process each user's stream for that amount of time. The `stream_every` function will control how frequently the webcam stream is sent to the server. + +In addition, we'll apply some custom css so that the webcam and slider are centered on the page. + +```python +css=""".my-group {max-width: 600px !important; max-height: 600 !important;} + .my-column {display: flex !important; justify-content: center !important; align-items: center !important};""" + + +with gr.Blocks(css=css) as app: + gr.HTML( + """ +

+ YOLOv10 Webcam Stream +

+ """) + gr.HTML( + """ +

+ arXiv | github +

+ """) + with gr.Column(elem_classes=["my-column"]): + with gr.Group(elem_classes=["my-group"]): + image = gr.Image(type="pil", label="Image", sources="webcam") + conf_threshold = gr.Slider( + label="Confidence Threshold", + minimum=0.0, + maximum=1.0, + step=0.05, + value=0.30, + ) + image.stream( + fn=yolov10_inference, + inputs=[image, conf_threshold], + outputs=[image], + stream_every=0.1, + time_limit=30 + ) +``` + + +## Conclusion + +You can check out our demo hosted on Hugging Face Spaces [here](https://huggingface.co/spaces/gradio/YOLOv10-webcam-stream). + +It is also embedded on this page below + +$demo_ \ No newline at end of file diff --git a/guides/09_other-tutorials/real-time-speech-recognition.md b/guides/07_streaming/03_real-time-speech-recognition.md similarity index 100% rename from guides/09_other-tutorials/real-time-speech-recognition.md rename to guides/07_streaming/03_real-time-speech-recognition.md diff --git a/guides/07_custom-components/01_custom-components-in-five-minutes.md b/guides/08_custom-components/01_custom-components-in-five-minutes.md similarity index 100% rename from guides/07_custom-components/01_custom-components-in-five-minutes.md rename to guides/08_custom-components/01_custom-components-in-five-minutes.md diff --git a/guides/07_custom-components/02_key-component-concepts.md b/guides/08_custom-components/02_key-component-concepts.md similarity index 100% rename from guides/07_custom-components/02_key-component-concepts.md rename to guides/08_custom-components/02_key-component-concepts.md diff --git a/guides/07_custom-components/03_configuration.md b/guides/08_custom-components/03_configuration.md similarity index 100% rename from guides/07_custom-components/03_configuration.md rename to guides/08_custom-components/03_configuration.md diff --git a/guides/07_custom-components/04_backend.md b/guides/08_custom-components/04_backend.md similarity index 100% rename from guides/07_custom-components/04_backend.md rename to guides/08_custom-components/04_backend.md diff --git a/guides/07_custom-components/05_frontend.md b/guides/08_custom-components/05_frontend.md similarity index 100% rename from guides/07_custom-components/05_frontend.md rename to guides/08_custom-components/05_frontend.md diff --git a/guides/07_custom-components/06_frequently-asked-questions.md b/guides/08_custom-components/06_frequently-asked-questions.md similarity index 100% rename from guides/07_custom-components/06_frequently-asked-questions.md rename to guides/08_custom-components/06_frequently-asked-questions.md diff --git a/guides/07_custom-components/07_pdf-component-example.md b/guides/08_custom-components/07_pdf-component-example.md similarity index 100% rename from guides/07_custom-components/07_pdf-component-example.md rename to guides/08_custom-components/07_pdf-component-example.md diff --git a/guides/07_custom-components/08_multimodal-chatbot-part1.md b/guides/08_custom-components/08_multimodal-chatbot-part1.md similarity index 100% rename from guides/07_custom-components/08_multimodal-chatbot-part1.md rename to guides/08_custom-components/08_multimodal-chatbot-part1.md diff --git a/guides/07_custom-components/09_documenting-custom-components.md b/guides/08_custom-components/09_documenting-custom-components.md similarity index 100% rename from guides/07_custom-components/09_documenting-custom-components.md rename to guides/08_custom-components/09_documenting-custom-components.md diff --git a/guides/08_gradio-clients-and-lite/01_getting-started-with-the-python-client.md b/guides/09_gradio-clients-and-lite/01_getting-started-with-the-python-client.md similarity index 100% rename from guides/08_gradio-clients-and-lite/01_getting-started-with-the-python-client.md rename to guides/09_gradio-clients-and-lite/01_getting-started-with-the-python-client.md diff --git a/guides/08_gradio-clients-and-lite/02_getting-started-with-the-js-client.md b/guides/09_gradio-clients-and-lite/02_getting-started-with-the-js-client.md similarity index 100% rename from guides/08_gradio-clients-and-lite/02_getting-started-with-the-js-client.md rename to guides/09_gradio-clients-and-lite/02_getting-started-with-the-js-client.md diff --git a/guides/08_gradio-clients-and-lite/03_querying-gradio-apps-with-curl.md b/guides/09_gradio-clients-and-lite/03_querying-gradio-apps-with-curl.md similarity index 100% rename from guides/08_gradio-clients-and-lite/03_querying-gradio-apps-with-curl.md rename to guides/09_gradio-clients-and-lite/03_querying-gradio-apps-with-curl.md diff --git a/guides/08_gradio-clients-and-lite/04_gradio-and-llm-agents.md b/guides/09_gradio-clients-and-lite/04_gradio-and-llm-agents.md similarity index 100% rename from guides/08_gradio-clients-and-lite/04_gradio-and-llm-agents.md rename to guides/09_gradio-clients-and-lite/04_gradio-and-llm-agents.md diff --git a/guides/08_gradio-clients-and-lite/05_gradio-lite.md b/guides/09_gradio-clients-and-lite/05_gradio-lite.md similarity index 100% rename from guides/08_gradio-clients-and-lite/05_gradio-lite.md rename to guides/09_gradio-clients-and-lite/05_gradio-lite.md diff --git a/guides/08_gradio-clients-and-lite/06_gradio-lite-and-transformers-js.md b/guides/09_gradio-clients-and-lite/06_gradio-lite-and-transformers-js.md similarity index 100% rename from guides/08_gradio-clients-and-lite/06_gradio-lite-and-transformers-js.md rename to guides/09_gradio-clients-and-lite/06_gradio-lite-and-transformers-js.md diff --git a/guides/08_gradio-clients-and-lite/07_fastapi-app-with-the-gradio-client.md b/guides/09_gradio-clients-and-lite/07_fastapi-app-with-the-gradio-client.md similarity index 100% rename from guides/08_gradio-clients-and-lite/07_fastapi-app-with-the-gradio-client.md rename to guides/09_gradio-clients-and-lite/07_fastapi-app-with-the-gradio-client.md diff --git a/guides/09_other-tutorials/01_using-hugging-face-integrations.md b/guides/10_other-tutorials/01_using-hugging-face-integrations.md similarity index 100% rename from guides/09_other-tutorials/01_using-hugging-face-integrations.md rename to guides/10_other-tutorials/01_using-hugging-face-integrations.md diff --git a/guides/09_other-tutorials/Gradio-and-Comet.md b/guides/10_other-tutorials/Gradio-and-Comet.md similarity index 100% rename from guides/09_other-tutorials/Gradio-and-Comet.md rename to guides/10_other-tutorials/Gradio-and-Comet.md diff --git a/guides/09_other-tutorials/Gradio-and-ONNX-on-Hugging-Face.md b/guides/10_other-tutorials/Gradio-and-ONNX-on-Hugging-Face.md similarity index 100% rename from guides/09_other-tutorials/Gradio-and-ONNX-on-Hugging-Face.md rename to guides/10_other-tutorials/Gradio-and-ONNX-on-Hugging-Face.md diff --git a/guides/09_other-tutorials/Gradio-and-Wandb-Integration.md b/guides/10_other-tutorials/Gradio-and-Wandb-Integration.md similarity index 100% rename from guides/09_other-tutorials/Gradio-and-Wandb-Integration.md rename to guides/10_other-tutorials/Gradio-and-Wandb-Integration.md diff --git a/guides/09_other-tutorials/create-your-own-friends-with-a-gan.md b/guides/10_other-tutorials/create-your-own-friends-with-a-gan.md similarity index 100% rename from guides/09_other-tutorials/create-your-own-friends-with-a-gan.md rename to guides/10_other-tutorials/create-your-own-friends-with-a-gan.md diff --git a/guides/09_other-tutorials/creating-a-dashboard-from-bigquery-data.md b/guides/10_other-tutorials/creating-a-dashboard-from-bigquery-data.md similarity index 100% rename from guides/09_other-tutorials/creating-a-dashboard-from-bigquery-data.md rename to guides/10_other-tutorials/creating-a-dashboard-from-bigquery-data.md diff --git a/guides/09_other-tutorials/creating-a-dashboard-from-supabase-data.md b/guides/10_other-tutorials/creating-a-dashboard-from-supabase-data.md similarity index 100% rename from guides/09_other-tutorials/creating-a-dashboard-from-supabase-data.md rename to guides/10_other-tutorials/creating-a-dashboard-from-supabase-data.md diff --git a/guides/09_other-tutorials/creating-a-realtime-dashboard-from-google-sheets.md b/guides/10_other-tutorials/creating-a-realtime-dashboard-from-google-sheets.md similarity index 100% rename from guides/09_other-tutorials/creating-a-realtime-dashboard-from-google-sheets.md rename to guides/10_other-tutorials/creating-a-realtime-dashboard-from-google-sheets.md diff --git a/guides/09_other-tutorials/deploying-gradio-with-docker.md b/guides/10_other-tutorials/deploying-gradio-with-docker.md similarity index 100% rename from guides/09_other-tutorials/deploying-gradio-with-docker.md rename to guides/10_other-tutorials/deploying-gradio-with-docker.md diff --git a/guides/09_other-tutorials/developing-faster-with-reload-mode.md b/guides/10_other-tutorials/developing-faster-with-reload-mode.md similarity index 100% rename from guides/09_other-tutorials/developing-faster-with-reload-mode.md rename to guides/10_other-tutorials/developing-faster-with-reload-mode.md diff --git a/guides/09_other-tutorials/how-to-use-3D-model-component.md b/guides/10_other-tutorials/how-to-use-3D-model-component.md similarity index 100% rename from guides/09_other-tutorials/how-to-use-3D-model-component.md rename to guides/10_other-tutorials/how-to-use-3D-model-component.md diff --git a/guides/09_other-tutorials/image-classification-in-pytorch.md b/guides/10_other-tutorials/image-classification-in-pytorch.md similarity index 100% rename from guides/09_other-tutorials/image-classification-in-pytorch.md rename to guides/10_other-tutorials/image-classification-in-pytorch.md diff --git a/guides/09_other-tutorials/image-classification-in-tensorflow.md b/guides/10_other-tutorials/image-classification-in-tensorflow.md similarity index 100% rename from guides/09_other-tutorials/image-classification-in-tensorflow.md rename to guides/10_other-tutorials/image-classification-in-tensorflow.md diff --git a/guides/09_other-tutorials/image-classification-with-vision-transformers.md b/guides/10_other-tutorials/image-classification-with-vision-transformers.md similarity index 100% rename from guides/09_other-tutorials/image-classification-with-vision-transformers.md rename to guides/10_other-tutorials/image-classification-with-vision-transformers.md diff --git a/guides/09_other-tutorials/installing-gradio-in-a-virtual-environment.md b/guides/10_other-tutorials/installing-gradio-in-a-virtual-environment.md similarity index 100% rename from guides/09_other-tutorials/installing-gradio-in-a-virtual-environment.md rename to guides/10_other-tutorials/installing-gradio-in-a-virtual-environment.md diff --git a/guides/09_other-tutorials/named-entity-recognition.md b/guides/10_other-tutorials/named-entity-recognition.md similarity index 100% rename from guides/09_other-tutorials/named-entity-recognition.md rename to guides/10_other-tutorials/named-entity-recognition.md diff --git a/guides/09_other-tutorials/plot-component-for-maps.md b/guides/10_other-tutorials/plot-component-for-maps.md similarity index 100% rename from guides/09_other-tutorials/plot-component-for-maps.md rename to guides/10_other-tutorials/plot-component-for-maps.md diff --git a/guides/09_other-tutorials/running-background-tasks.md b/guides/10_other-tutorials/running-background-tasks.md similarity index 100% rename from guides/09_other-tutorials/running-background-tasks.md rename to guides/10_other-tutorials/running-background-tasks.md diff --git a/guides/09_other-tutorials/running-gradio-on-your-web-server-with-nginx.md b/guides/10_other-tutorials/running-gradio-on-your-web-server-with-nginx.md similarity index 100% rename from guides/09_other-tutorials/running-gradio-on-your-web-server-with-nginx.md rename to guides/10_other-tutorials/running-gradio-on-your-web-server-with-nginx.md diff --git a/guides/09_other-tutorials/setting-up-a-demo-for-maximum-performance.md b/guides/10_other-tutorials/setting-up-a-demo-for-maximum-performance.md similarity index 100% rename from guides/09_other-tutorials/setting-up-a-demo-for-maximum-performance.md rename to guides/10_other-tutorials/setting-up-a-demo-for-maximum-performance.md diff --git a/guides/09_other-tutorials/styling-the-gradio-dataframe.md b/guides/10_other-tutorials/styling-the-gradio-dataframe.md similarity index 100% rename from guides/09_other-tutorials/styling-the-gradio-dataframe.md rename to guides/10_other-tutorials/styling-the-gradio-dataframe.md diff --git a/guides/09_other-tutorials/theming-guide.md b/guides/10_other-tutorials/theming-guide.md similarity index 100% rename from guides/09_other-tutorials/theming-guide.md rename to guides/10_other-tutorials/theming-guide.md diff --git a/guides/09_other-tutorials/using-flagging.md b/guides/10_other-tutorials/using-flagging.md similarity index 100% rename from guides/09_other-tutorials/using-flagging.md rename to guides/10_other-tutorials/using-flagging.md diff --git a/guides/09_other-tutorials/using-gradio-for-tabular-workflows.md b/guides/10_other-tutorials/using-gradio-for-tabular-workflows.md similarity index 100% rename from guides/09_other-tutorials/using-gradio-for-tabular-workflows.md rename to guides/10_other-tutorials/using-gradio-for-tabular-workflows.md diff --git a/guides/09_other-tutorials/wrapping-layouts.md b/guides/10_other-tutorials/wrapping-layouts.md similarity index 100% rename from guides/09_other-tutorials/wrapping-layouts.md rename to guides/10_other-tutorials/wrapping-layouts.md From 3a28eac01d7f6545929b1c79ec2ff1d251ab9ece Mon Sep 17 00:00:00 2001 From: freddyaboulton Date: Thu, 12 Sep 2024 12:36:01 -0400 Subject: [PATCH 02/38] Add demo --- demo/yolov10_webcam_stream/run.ipynb | 1 + demo/yolov10_webcam_stream/run.py | 58 ++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 demo/yolov10_webcam_stream/run.ipynb create mode 100644 demo/yolov10_webcam_stream/run.py diff --git a/demo/yolov10_webcam_stream/run.ipynb b/demo/yolov10_webcam_stream/run.ipynb new file mode 100644 index 0000000000000..95666c88c4b63 --- /dev/null +++ b/demo/yolov10_webcam_stream/run.ipynb @@ -0,0 +1 @@ +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: yolov10_webcam_stream"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "from ultralytics import YOLOv10\n", "\n", "model = YOLOv10.from_pretrained('jameslahm/yolov10n')\n", "\n", "\n", "def yolov10_inference(image, conf_threshold):\n", " width, _ = image.size\n", " import time\n", " start = time.time()\n", " results = model.predict(source=image, imgsz=width, conf=conf_threshold)\n", " end = time.time()\n", " annotated_image = results[0].plot()\n", " print(\"time\", end - start)\n", " return annotated_image[:, :, ::-1]\n", "\n", "\n", "css=\"\"\".my-group {max-width: 600px !important; max-height: 600 !important;}\n", " .my-column {display: flex !important; justify-content: center !important; align-items: center !important};\"\"\"\n", "\n", "\n", "with gr.Blocks(css=css) as app:\n", " gr.HTML(\n", " \"\"\"\n", "

\n", " YOLOv10 Webcam Stream\n", "

\n", " \"\"\")\n", " gr.HTML(\n", " \"\"\"\n", "

\n", " arXiv | github\n", "

\n", " \"\"\")\n", " with gr.Column(elem_classes=[\"my-column\"]):\n", " with gr.Group(elem_classes=[\"my-group\"]):\n", " image = gr.Image(type=\"pil\", label=\"Image\", sources=\"webcam\")\n", " conf_threshold = gr.Slider(\n", " label=\"Confidence Threshold\",\n", " minimum=0.0,\n", " maximum=1.0,\n", " step=0.05,\n", " value=0.30,\n", " )\n", " image.stream(\n", " fn=yolov10_inference,\n", " inputs=[image, conf_threshold],\n", " outputs=[image],\n", " stream_every=0.1,\n", " time_limit=30\n", " )\n", "\n", "if __name__ == '__main__':\n", " app.launch()"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/yolov10_webcam_stream/run.py b/demo/yolov10_webcam_stream/run.py new file mode 100644 index 0000000000000..c8d27db6a281c --- /dev/null +++ b/demo/yolov10_webcam_stream/run.py @@ -0,0 +1,58 @@ +import gradio as gr + +from ultralytics import YOLOv10 + +model = YOLOv10.from_pretrained("jameslahm/yolov10n") + + +def yolov10_inference(image, conf_threshold): + width, _ = image.size + import time + + start = time.time() + results = model.predict(source=image, imgsz=width, conf=conf_threshold) + end = time.time() + annotated_image = results[0].plot() + print("time", end - start) + return annotated_image[:, :, ::-1] + + +css = """.my-group {max-width: 600px !important; max-height: 600 !important;} + .my-column {display: flex !important; justify-content: center !important; align-items: center !important};""" + + +with gr.Blocks(css=css) as app: + gr.HTML( + """ +

+ YOLOv10 Webcam Stream +

+ """ + ) + gr.HTML( + """ +

+ arXiv | github +

+ """ + ) + with gr.Column(elem_classes=["my-column"]): + with gr.Group(elem_classes=["my-group"]): + image = gr.Image(type="pil", label="Image", sources="webcam") + conf_threshold = gr.Slider( + label="Confidence Threshold", + minimum=0.0, + maximum=1.0, + step=0.05, + value=0.30, + ) + image.stream( + fn=yolov10_inference, + inputs=[image, conf_threshold], + outputs=[image], + stream_every=0.1, + time_limit=30, + ) + +if __name__ == "__main__": + app.launch() From dadda8c9e83d9ea620efbc0cb1fedebc39d714eb Mon Sep 17 00:00:00 2001 From: freddyaboulton Date: Thu, 12 Sep 2024 17:29:45 -0400 Subject: [PATCH 03/38] guide --- demo/yolov10_webcam_stream/run.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/yolov10_webcam_stream/run.ipynb b/demo/yolov10_webcam_stream/run.ipynb index 95666c88c4b63..304f0664033a4 100644 --- a/demo/yolov10_webcam_stream/run.ipynb +++ b/demo/yolov10_webcam_stream/run.ipynb @@ -1 +1 @@ -{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: yolov10_webcam_stream"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "from ultralytics import YOLOv10\n", "\n", "model = YOLOv10.from_pretrained('jameslahm/yolov10n')\n", "\n", "\n", "def yolov10_inference(image, conf_threshold):\n", " width, _ = image.size\n", " import time\n", " start = time.time()\n", " results = model.predict(source=image, imgsz=width, conf=conf_threshold)\n", " end = time.time()\n", " annotated_image = results[0].plot()\n", " print(\"time\", end - start)\n", " return annotated_image[:, :, ::-1]\n", "\n", "\n", "css=\"\"\".my-group {max-width: 600px !important; max-height: 600 !important;}\n", " .my-column {display: flex !important; justify-content: center !important; align-items: center !important};\"\"\"\n", "\n", "\n", "with gr.Blocks(css=css) as app:\n", " gr.HTML(\n", " \"\"\"\n", "

\n", " YOLOv10 Webcam Stream\n", "

\n", " \"\"\")\n", " gr.HTML(\n", " \"\"\"\n", "

\n", " arXiv | github\n", "

\n", " \"\"\")\n", " with gr.Column(elem_classes=[\"my-column\"]):\n", " with gr.Group(elem_classes=[\"my-group\"]):\n", " image = gr.Image(type=\"pil\", label=\"Image\", sources=\"webcam\")\n", " conf_threshold = gr.Slider(\n", " label=\"Confidence Threshold\",\n", " minimum=0.0,\n", " maximum=1.0,\n", " step=0.05,\n", " value=0.30,\n", " )\n", " image.stream(\n", " fn=yolov10_inference,\n", " inputs=[image, conf_threshold],\n", " outputs=[image],\n", " stream_every=0.1,\n", " time_limit=30\n", " )\n", "\n", "if __name__ == '__main__':\n", " app.launch()"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: yolov10_webcam_stream"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "from ultralytics import YOLOv10\n", "\n", "model = YOLOv10.from_pretrained(\"jameslahm/yolov10n\")\n", "\n", "\n", "def yolov10_inference(image, conf_threshold):\n", " width, _ = image.size\n", " import time\n", "\n", " start = time.time()\n", " results = model.predict(source=image, imgsz=width, conf=conf_threshold)\n", " end = time.time()\n", " annotated_image = results[0].plot()\n", " print(\"time\", end - start)\n", " return annotated_image[:, :, ::-1]\n", "\n", "\n", "css = \"\"\".my-group {max-width: 600px !important; max-height: 600 !important;}\n", " .my-column {display: flex !important; justify-content: center !important; align-items: center !important};\"\"\"\n", "\n", "\n", "with gr.Blocks(css=css) as app:\n", " gr.HTML(\n", " \"\"\"\n", "

\n", " YOLOv10 Webcam Stream\n", "

\n", " \"\"\"\n", " )\n", " gr.HTML(\n", " \"\"\"\n", "

\n", " arXiv | github\n", "

\n", " \"\"\"\n", " )\n", " with gr.Column(elem_classes=[\"my-column\"]):\n", " with gr.Group(elem_classes=[\"my-group\"]):\n", " image = gr.Image(type=\"pil\", label=\"Image\", sources=\"webcam\")\n", " conf_threshold = gr.Slider(\n", " label=\"Confidence Threshold\",\n", " minimum=0.0,\n", " maximum=1.0,\n", " step=0.05,\n", " value=0.30,\n", " )\n", " image.stream(\n", " fn=yolov10_inference,\n", " inputs=[image, conf_threshold],\n", " outputs=[image],\n", " stream_every=0.1,\n", " time_limit=30,\n", " )\n", "\n", "if __name__ == \"__main__\":\n", " app.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file From 3d54d4ea1760f04ae19fde9efe4e9e322b3e4d44 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Thu, 12 Sep 2024 15:08:12 -0700 Subject: [PATCH 04/38] Add info about Powershell client (#9343) * clients * add changeset --------- Co-authored-by: gradio-pr-bot --- .changeset/wicked-swans-wink.md | 5 +++++ .../third-party-clients/01_introduction.svx | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 .changeset/wicked-swans-wink.md diff --git a/.changeset/wicked-swans-wink.md b/.changeset/wicked-swans-wink.md new file mode 100644 index 0000000000000..3152ab867c2ec --- /dev/null +++ b/.changeset/wicked-swans-wink.md @@ -0,0 +1,5 @@ +--- +"website": minor +--- + +feat:Add info about Powershell client diff --git a/js/_website/src/lib/templates/third-party-clients/third-party-clients/01_introduction.svx b/js/_website/src/lib/templates/third-party-clients/third-party-clients/01_introduction.svx index 396af07a87ccd..73a0c96b19d63 100644 --- a/js/_website/src/lib/templates/third-party-clients/third-party-clients/01_introduction.svx +++ b/js/_website/src/lib/templates/third-party-clients/third-party-clients/01_introduction.svx @@ -14,4 +14,5 @@ ### Community Clients ## We also encourage the development and use of third party clients built by the community: -- [Rust Client](/docs/third-party-clients/rust-client): `gradio-rs` built by [@JacobLinCool](https://github.com/JacobLinCool) allows you to make requests in Rust. \ No newline at end of file +- [Rust Client](/docs/third-party-clients/rust-client): `gradio-rs` built by [@JacobLinCool](https://github.com/JacobLinCool) allows you to make requests in Rust. +- [Powershell Client](https://github.com/rrg92/powershai): `powershai` built by [@rrg92](https://github.com/rrg92) allows you to make requests to Gradio apps directly from Powershell. See [here for documentation](https://github.com/rrg92/powershai/blob/main/docs/en-US/providers/HUGGING-FACE.md) \ No newline at end of file From a9b51820e007989ac8dc072af4506b3fb0dd506e Mon Sep 17 00:00:00 2001 From: "Yuichiro Tachibana (Tsuchiya)" Date: Fri, 13 Sep 2024 01:13:04 +0100 Subject: [PATCH 05/38] Remove lite/theme.css from the Git-managed file tree (#9335) * Delete js/lite/src/theme.css from the Git managed file tree as it's dynamically generated * Remove lite-related npm scripts from spa/package.json * add changeset --------- Co-authored-by: gradio-pr-bot --- .changeset/huge-corners-tease.md | 8 + js/core/.gitignore | 1 - js/lite/.gitignore | 1 + js/lite/src/theme.css | 424 ------------------------------- js/spa/.gitignore | 1 - js/spa/package.json | 10 - 6 files changed, 9 insertions(+), 436 deletions(-) create mode 100644 .changeset/huge-corners-tease.md create mode 100644 js/lite/.gitignore delete mode 100644 js/lite/src/theme.css delete mode 100644 js/spa/.gitignore diff --git a/.changeset/huge-corners-tease.md b/.changeset/huge-corners-tease.md new file mode 100644 index 0000000000000..0a7a4fdacf0de --- /dev/null +++ b/.changeset/huge-corners-tease.md @@ -0,0 +1,8 @@ +--- +"@gradio/core": minor +"@gradio/lite": minor +"@self/spa": minor +"gradio": minor +--- + +feat:Remove lite/theme.css from the Git-managed file tree diff --git a/js/core/.gitignore b/js/core/.gitignore index 6f8fe001ac474..e69de29bb2d1d 100644 --- a/js/core/.gitignore +++ b/js/core/.gitignore @@ -1 +0,0 @@ -/src/lite/theme.css diff --git a/js/lite/.gitignore b/js/lite/.gitignore new file mode 100644 index 0000000000000..385defb4088b0 --- /dev/null +++ b/js/lite/.gitignore @@ -0,0 +1 @@ +/src/theme.css diff --git a/js/lite/src/theme.css b/js/lite/src/theme.css deleted file mode 100644 index d0667ad26dee4..0000000000000 --- a/js/lite/src/theme.css +++ /dev/null @@ -1,424 +0,0 @@ -:root { - --name: default; - --primary-50: #fff7ed; - --primary-100: #ffedd5; - --primary-200: #fed7aa; - --primary-300: #fdba74; - --primary-400: #fb923c; - --primary-500: #f97316; - --primary-600: #ea580c; - --primary-700: #c2410c; - --primary-800: #9a3412; - --primary-900: #7c2d12; - --primary-950: #6c2e12; - --secondary-50: #eff6ff; - --secondary-100: #dbeafe; - --secondary-200: #bfdbfe; - --secondary-300: #93c5fd; - --secondary-400: #60a5fa; - --secondary-500: #3b82f6; - --secondary-600: #2563eb; - --secondary-700: #1d4ed8; - --secondary-800: #1e40af; - --secondary-900: #1e3a8a; - --secondary-950: #1d3660; - --neutral-50: #f9fafb; - --neutral-100: #f3f4f6; - --neutral-200: #e5e7eb; - --neutral-300: #d1d5db; - --neutral-400: #9ca3af; - --neutral-500: #6b7280; - --neutral-600: #4b5563; - --neutral-700: #374151; - --neutral-800: #1f2937; - --neutral-900: #111827; - --neutral-950: #0b0f19; - --spacing-xxs: 1px; - --spacing-xs: 2px; - --spacing-sm: 4px; - --spacing-md: 6px; - --spacing-lg: 8px; - --spacing-xl: 10px; - --spacing-xxl: 16px; - --radius-xxs: 1px; - --radius-xs: 2px; - --radius-sm: 4px; - --radius-md: 6px; - --radius-lg: 8px; - --radius-xl: 12px; - --radius-xxl: 22px; - --text-xxs: 9px; - --text-xs: 10px; - --text-sm: 12px; - --text-md: 14px; - --text-lg: 16px; - --text-xl: 22px; - --text-xxl: 26px; - --font: 'IBM Plex Sans', 'ui-sans-serif', 'system-ui', sans-serif; - --font-mono: 'IBM Plex Mono', 'ui-monospace', 'Consolas', monospace; - --body-background-fill: var(--background-fill-primary); - --body-text-color: var(--neutral-800); - --body-text-size: var(--text-md); - --body-text-weight: 400; - --embed-radius: var(--radius-lg); - --color-accent: var(--primary-500); - --color-accent-soft: var(--primary-50); - --background-fill-primary: white; - --background-fill-secondary: var(--neutral-50); - --border-color-accent: var(--primary-300); - --border-color-primary: var(--neutral-200); - --link-text-color: var(--secondary-600); - --link-text-color-active: var(--secondary-600); - --link-text-color-hover: var(--secondary-700); - --link-text-color-visited: var(--secondary-500); - --body-text-color-subdued: var(--neutral-400); - --accordion-text-color: var(--body-text-color); - --table-text-color: var(--body-text-color); - --shadow-drop: rgba(0,0,0,0.05) 0px 1px 2px 0px; - --shadow-drop-lg: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1); - --shadow-inset: rgba(0,0,0,0.05) 0px 2px 4px 0px inset; - --shadow-spread: 3px; - --block-background-fill: var(--background-fill-primary); - --block-border-color: var(--border-color-primary); - --block-border-width: 1px; - --block-info-text-color: var(--body-text-color-subdued); - --block-info-text-size: var(--text-sm); - --block-info-text-weight: 400; - --block-label-background-fill: var(--background-fill-primary); - --block-label-border-color: var(--border-color-primary); - --block-label-border-width: 1px; - --block-label-shadow: var(--block-shadow); - --block-label-text-color: var(--neutral-500); - --block-label-margin: 0; - --block-label-padding: var(--spacing-sm) var(--spacing-lg); - --block-label-radius: calc(var(--radius-lg) - 1px) 0 calc(var(--radius-lg) - 1px) 0; - --block-label-right-radius: 0 calc(var(--radius-lg) - 1px) 0 calc(var(--radius-lg) - 1px); - --block-label-text-size: var(--text-sm); - --block-label-text-weight: 400; - --block-padding: var(--spacing-xl) calc(var(--spacing-xl) + 2px); - --block-radius: var(--radius-lg); - --block-shadow: var(--shadow-drop); - --block-title-background-fill: none; - --block-title-border-color: none; - --block-title-border-width: 0px; - --block-title-text-color: var(--neutral-500); - --block-title-padding: 0; - --block-title-radius: none; - --block-title-text-size: var(--text-md); - --block-title-text-weight: 400; - --container-radius: var(--radius-lg); - --form-gap-width: 1px; - --layout-gap: var(--spacing-xxl); - --panel-background-fill: var(--background-fill-secondary); - --panel-border-color: var(--border-color-primary); - --panel-border-width: 0; - --section-header-text-size: var(--text-md); - --section-header-text-weight: 400; - --border-color-accent-subdued: var(--primary-200); - --code-background-fill: var(--neutral-100); - --checkbox-background-color: var(--background-fill-primary); - --checkbox-background-color-focus: var(--checkbox-background-color); - --checkbox-background-color-hover: var(--checkbox-background-color); - --checkbox-background-color-selected: var(--secondary-600); - --checkbox-border-color: var(--neutral-300); - --checkbox-border-color-focus: var(--secondary-500); - --checkbox-border-color-hover: var(--neutral-300); - --checkbox-border-color-selected: var(--secondary-600); - --checkbox-border-radius: var(--radius-sm); - --checkbox-border-width: var(--input-border-width); - --checkbox-label-background-fill: linear-gradient(to top, var(--neutral-50), white); - --checkbox-label-background-fill-hover: linear-gradient(to top, var(--neutral-100), white); - --checkbox-label-background-fill-selected: var(--checkbox-label-background-fill); - --checkbox-label-border-color: var(--border-color-primary); - --checkbox-label-border-color-hover: var(--checkbox-label-border-color); - --checkbox-label-border-width: var(--input-border-width); - --checkbox-label-gap: var(--spacing-lg); - --checkbox-label-padding: var(--spacing-md) calc(2 * var(--spacing-md)); - --checkbox-label-shadow: var(--shadow-drop); - --checkbox-label-text-size: var(--text-md); - --checkbox-label-text-weight: 400; - --checkbox-check: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e"); - --radio-circle: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e"); - --checkbox-shadow: var(--input-shadow); - --checkbox-label-text-color: var(--body-text-color); - --checkbox-label-text-color-selected: var(--checkbox-label-text-color); - --error-background-fill: #fef2f2; - --error-border-color: #b91c1c; - --error-border-width: 1px; - --error-text-color: #b91c1c; - --error-icon-color: #b91c1c; - --input-background-fill: white; - --input-background-fill-focus: var(--secondary-500); - --input-background-fill-hover: var(--input-background-fill); - --input-border-color: var(--border-color-primary); - --input-border-color-focus: var(--secondary-300); - --input-border-color-hover: var(--input-border-color); - --input-border-width: 1px; - --input-padding: var(--spacing-xl); - --input-placeholder-color: var(--neutral-400); - --input-radius: var(--radius-lg); - --input-shadow: 0 0 0 var(--shadow-spread) transparent, var(--shadow-inset); - --input-shadow-focus: 0 0 0 var(--shadow-spread) var(--secondary-50), var(--shadow-inset); - --input-text-size: var(--text-md); - --input-text-weight: 400; - --loader-color: var(--color-accent); - --prose-text-size: var(--text-md); - --prose-text-weight: 400; - --prose-header-text-weight: 600; - --slider-color: #2563eb; - --stat-background-fill: linear-gradient(to right, var(--primary-400), var(--primary-200)); - --table-border-color: var(--neutral-300); - --table-even-background-fill: white; - --table-odd-background-fill: var(--neutral-50); - --table-radius: var(--radius-lg); - --table-row-focus: var(--color-accent-soft); - --button-border-width: var(--input-border-width); - --button-cancel-background-fill: linear-gradient(to bottom right, #fee2e2, #fecaca); - --button-cancel-background-fill-hover: linear-gradient(to bottom right, #fee2e2, #fee2e2); - --button-cancel-border-color: #fecaca; - --button-cancel-border-color-hover: var(--button-cancel-border-color); - --button-cancel-text-color: #dc2626; - --button-cancel-text-color-hover: var(--button-cancel-text-color); - --button-large-padding: var(--spacing-lg) calc(2 * var(--spacing-lg)); - --button-large-radius: var(--radius-lg); - --button-large-text-size: var(--text-lg); - --button-large-text-weight: 600; - --button-primary-background-fill: linear-gradient(to bottom right, var(--primary-100), var(--primary-300)); - --button-primary-background-fill-hover: linear-gradient(to bottom right, var(--primary-100), var(--primary-200)); - --button-primary-border-color: var(--primary-200); - --button-primary-border-color-hover: var(--button-primary-border-color); - --button-primary-text-color: var(--primary-600); - --button-primary-text-color-hover: var(--button-primary-text-color); - --button-secondary-background-fill: linear-gradient(to bottom right, var(--neutral-100), var(--neutral-200)); - --button-secondary-background-fill-hover: linear-gradient(to bottom right, var(--neutral-100), var(--neutral-100)); - --button-secondary-border-color: var(--neutral-200); - --button-secondary-border-color-hover: var(--button-secondary-border-color); - --button-secondary-text-color: var(--neutral-700); - --button-secondary-text-color-hover: var(--button-secondary-text-color); - --button-shadow: var(--shadow-drop); - --button-shadow-active: var(--shadow-inset); - --button-shadow-hover: var(--shadow-drop-lg); - --button-small-padding: var(--spacing-sm) calc(2 * var(--spacing-sm)); - --button-small-radius: var(--radius-lg); - --button-small-text-size: var(--text-md); - --button-small-text-weight: 400; - --button-transition: none; -} -.dark { - --body-background-fill: var(--background-fill-primary); - --body-text-color: var(--neutral-100); - --color-accent-soft: var(--neutral-700); - --background-fill-primary: var(--neutral-950); - --background-fill-secondary: var(--neutral-900); - --border-color-accent: var(--neutral-600); - --border-color-primary: var(--neutral-700); - --link-text-color-active: var(--secondary-500); - --link-text-color: var(--secondary-500); - --link-text-color-hover: var(--secondary-400); - --link-text-color-visited: var(--secondary-600); - --body-text-color-subdued: var(--neutral-400); - --accordion-text-color: var(--body-text-color); - --table-text-color: var(--body-text-color); - --shadow-spread: 1px; - --block-background-fill: var(--neutral-800); - --block-border-color: var(--border-color-primary); - --block_border_width: None; - --block-info-text-color: var(--body-text-color-subdued); - --block-label-background-fill: var(--background-fill-secondary); - --block-label-border-color: var(--border-color-primary); - --block_label_border_width: None; - --block-label-text-color: var(--neutral-200); - --block_shadow: None; - --block_title_background_fill: None; - --block_title_border_color: None; - --block_title_border_width: None; - --block-title-text-color: var(--neutral-200); - --panel-background-fill: var(--background-fill-secondary); - --panel-border-color: var(--border-color-primary); - --panel_border_width: None; - --border-color-accent-subdued: var(--border-color-accent); - --code-background-fill: var(--neutral-800); - --checkbox-background-color: var(--neutral-800); - --checkbox-background-color-focus: var(--checkbox-background-color); - --checkbox-background-color-hover: var(--checkbox-background-color); - --checkbox-background-color-selected: var(--secondary-600); - --checkbox-border-color: var(--neutral-700); - --checkbox-border-color-focus: var(--secondary-500); - --checkbox-border-color-hover: var(--neutral-600); - --checkbox-border-color-selected: var(--secondary-600); - --checkbox-border-width: var(--input-border-width); - --checkbox-label-background-fill: linear-gradient(to top, var(--neutral-900), var(--neutral-800)); - --checkbox-label-background-fill-hover: linear-gradient(to top, var(--neutral-900), var(--neutral-800)); - --checkbox-label-background-fill-selected: var(--checkbox-label-background-fill); - --checkbox-label-border-color: var(--border-color-primary); - --checkbox-label-border-color-hover: var(--checkbox-label-border-color); - --checkbox-label-border-width: var(--input-border-width); - --checkbox-label-text-color: var(--body-text-color); - --checkbox-label-text-color-selected: var(--checkbox-label-text-color); - --error-background-fill: var(--neutral-900); - --error-border-color: #ef4444; - --error_border_width: None; - --error-text-color: #fef2f2; - --error-icon-color: #ef4444; - --input-background-fill: var(--neutral-800); - --input-background-fill-focus: var(--secondary-600); - --input-background-fill-hover: var(--input-background-fill); - --input-border-color: var(--border-color-primary); - --input-border-color-focus: var(--neutral-700); - --input-border-color-hover: var(--input-border-color); - --input_border_width: None; - --input-placeholder-color: var(--neutral-500); - --input_shadow: None; - --input-shadow-focus: 0 0 0 var(--shadow-spread) var(--neutral-700), var(--shadow-inset); - --loader_color: None; - --slider_color: None; - --stat-background-fill: linear-gradient(to right, var(--primary-400), var(--primary-600)); - --table-border-color: var(--neutral-700); - --table-even-background-fill: var(--neutral-950); - --table-odd-background-fill: var(--neutral-900); - --table-row-focus: var(--color-accent-soft); - --button-border-width: var(--input-border-width); - --button-cancel-background-fill: linear-gradient(to bottom right, #dc2626, #b91c1c); - --button-cancel-background-fill-hover: linear-gradient(to bottom right, #dc2626, #dc2626); - --button-cancel-border-color: #dc2626; - --button-cancel-border-color-hover: var(--button-cancel-border-color); - --button-cancel-text-color: white; - --button-cancel-text-color-hover: var(--button-cancel-text-color); - --button-primary-background-fill: linear-gradient(to bottom right, var(--primary-500), var(--primary-600)); - --button-primary-background-fill-hover: linear-gradient(to bottom right, var(--primary-500), var(--primary-500)); - --button-primary-border-color: var(--primary-500); - --button-primary-border-color-hover: var(--button-primary-border-color); - --button-primary-text-color: white; - --button-primary-text-color-hover: var(--button-primary-text-color); - --button-secondary-background-fill: linear-gradient(to bottom right, var(--neutral-600), var(--neutral-700)); - --button-secondary-background-fill-hover: linear-gradient(to bottom right, var(--neutral-600), var(--neutral-600)); - --button-secondary-border-color: var(--neutral-600); - --button-secondary-border-color-hover: var(--button-secondary-border-color); - --button-secondary-text-color: white; - --button-secondary-text-color-hover: var(--button-secondary-text-color); - --name: default; - --primary-50: #fff7ed; - --primary-100: #ffedd5; - --primary-200: #fed7aa; - --primary-300: #fdba74; - --primary-400: #fb923c; - --primary-500: #f97316; - --primary-600: #ea580c; - --primary-700: #c2410c; - --primary-800: #9a3412; - --primary-900: #7c2d12; - --primary-950: #6c2e12; - --secondary-50: #eff6ff; - --secondary-100: #dbeafe; - --secondary-200: #bfdbfe; - --secondary-300: #93c5fd; - --secondary-400: #60a5fa; - --secondary-500: #3b82f6; - --secondary-600: #2563eb; - --secondary-700: #1d4ed8; - --secondary-800: #1e40af; - --secondary-900: #1e3a8a; - --secondary-950: #1d3660; - --neutral-50: #f9fafb; - --neutral-100: #f3f4f6; - --neutral-200: #e5e7eb; - --neutral-300: #d1d5db; - --neutral-400: #9ca3af; - --neutral-500: #6b7280; - --neutral-600: #4b5563; - --neutral-700: #374151; - --neutral-800: #1f2937; - --neutral-900: #111827; - --neutral-950: #0b0f19; - --spacing-xxs: 1px; - --spacing-xs: 2px; - --spacing-sm: 4px; - --spacing-md: 6px; - --spacing-lg: 8px; - --spacing-xl: 10px; - --spacing-xxl: 16px; - --radius-xxs: 1px; - --radius-xs: 2px; - --radius-sm: 4px; - --radius-md: 6px; - --radius-lg: 8px; - --radius-xl: 12px; - --radius-xxl: 22px; - --text-xxs: 9px; - --text-xs: 10px; - --text-sm: 12px; - --text-md: 14px; - --text-lg: 16px; - --text-xl: 22px; - --text-xxl: 26px; - --font: 'IBM Plex Sans', 'ui-sans-serif', 'system-ui', sans-serif; - --font-mono: 'IBM Plex Mono', 'ui-monospace', 'Consolas', monospace; - --body-text-size: var(--text-md); - --body-text-weight: 400; - --embed-radius: var(--radius-lg); - --color-accent: var(--primary-500); - --shadow-drop: rgba(0,0,0,0.05) 0px 1px 2px 0px; - --shadow-drop-lg: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1); - --shadow-inset: rgba(0,0,0,0.05) 0px 2px 4px 0px inset; - --block-border-width: 1px; - --block-info-text-size: var(--text-sm); - --block-info-text-weight: 400; - --block-label-border-width: 1px; - --block-label-shadow: var(--block-shadow); - --block-label-margin: 0; - --block-label-padding: var(--spacing-sm) var(--spacing-lg); - --block-label-radius: calc(var(--radius-lg) - 1px) 0 calc(var(--radius-lg) - 1px) 0; - --block-label-right-radius: 0 calc(var(--radius-lg) - 1px) 0 calc(var(--radius-lg) - 1px); - --block-label-text-size: var(--text-sm); - --block-label-text-weight: 400; - --block-padding: var(--spacing-xl) calc(var(--spacing-xl) + 2px); - --block-radius: var(--radius-lg); - --block-shadow: var(--shadow-drop); - --block-title-background-fill: none; - --block-title-border-color: none; - --block-title-border-width: 0px; - --block-title-padding: 0; - --block-title-radius: none; - --block-title-text-size: var(--text-md); - --block-title-text-weight: 400; - --container-radius: var(--radius-lg); - --form-gap-width: 1px; - --layout-gap: var(--spacing-xxl); - --panel-border-width: 0; - --section-header-text-size: var(--text-md); - --section-header-text-weight: 400; - --checkbox-border-radius: var(--radius-sm); - --checkbox-label-gap: var(--spacing-lg); - --checkbox-label-padding: var(--spacing-md) calc(2 * var(--spacing-md)); - --checkbox-label-shadow: var(--shadow-drop); - --checkbox-label-text-size: var(--text-md); - --checkbox-label-text-weight: 400; - --checkbox-check: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e"); - --radio-circle: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e"); - --checkbox-shadow: var(--input-shadow); - --error-border-width: 1px; - --input-border-width: 1px; - --input-padding: var(--spacing-xl); - --input-radius: var(--radius-lg); - --input-shadow: 0 0 0 var(--shadow-spread) transparent, var(--shadow-inset); - --input-text-size: var(--text-md); - --input-text-weight: 400; - --loader-color: var(--color-accent); - --prose-text-size: var(--text-md); - --prose-text-weight: 400; - --prose-header-text-weight: 600; - --slider-color: #2563eb; - --table-radius: var(--radius-lg); - --button-large-padding: var(--spacing-lg) calc(2 * var(--spacing-lg)); - --button-large-radius: var(--radius-lg); - --button-large-text-size: var(--text-lg); - --button-large-text-weight: 600; - --button-shadow: var(--shadow-drop); - --button-shadow-active: var(--shadow-inset); - --button-shadow-hover: var(--shadow-drop-lg); - --button-small-padding: var(--spacing-sm) calc(2 * var(--spacing-sm)); - --button-small-radius: var(--radius-lg); - --button-small-text-size: var(--text-md); - --button-small-text-weight: 400; - --button-transition: none; -} \ No newline at end of file diff --git a/js/spa/.gitignore b/js/spa/.gitignore deleted file mode 100644 index 6f8fe001ac474..0000000000000 --- a/js/spa/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/src/lite/theme.css diff --git a/js/spa/package.json b/js/spa/package.json index 3083f43e4b212..12b0a388834d9 100644 --- a/js/spa/package.json +++ b/js/spa/package.json @@ -5,22 +5,12 @@ "type": "module", "scripts": { "dev": "vite --port 9876", - "dev:lite": "run-p dev:lite:*", - "dev:lite:self": "vite --port 9876 --mode development:lite", - "dev:lite:worker": "pnpm --filter @gradio/wasm dev", "build": "vite build --mode production --emptyOutDir", - "cssbuild": "python ../../scripts/generate_theme.py --outfile ./src/lite/theme.css", - "pybuild:gradio": "cd ../../ && hatch build -t lite", - "pybuild:gradio-client": "cd ../../client/python && python -m build", - "pybuild": "run-p pybuild:*", - "build:lite": "pnpm pybuild && pnpm cssbuild && pnpm --filter @gradio/client build && pnpm --filter @gradio/wasm build && vite build --mode production:lite", "preview": "vite preview", "test:snapshot": "pnpm exec playwright test snapshots/ --config=../../.config/playwright.config.js", "test:browser": "pnpm exec playwright test test/ --grep-invert 'reload.spec.ts' --config=../../.config/playwright.config.js", "test:browser:dev": "pnpm exec playwright test test/ --ui --config=../../.config/playwright.config.js", "test:browser:reload": "CI=1 pnpm exec playwright test test/ --grep 'reload.spec.ts' --config=../../.config/playwright.config.js", - "test:browser:lite": "GRADIO_E2E_TEST_LITE=1 pnpm test:browser", - "test:browser:lite:dev": "GRADIO_E2E_TEST_LITE=1 pnpm test:browser:dev", "build:css": "pollen -c pollen.config.cjs -o src/pollen-dev.css" }, "devDependencies": { From f7f7885b5334db809420d574e07151c1c3ce0b76 Mon Sep 17 00:00:00 2001 From: Freddy Boulton Date: Thu, 12 Sep 2024 23:51:28 -0400 Subject: [PATCH 06/38] 9227 chatinterface retry bug (#9316) * first draft * add code * tip * add changeset * delete dead code * Type check notebook * consolidate like section with guide * Add comments * add value * Lint * lint * guide --------- Co-authored-by: gradio-pr-bot Co-authored-by: Abubakar Abid --- .changeset/nine-hotels-juggle.md | 6 + demo/chatbot_retry_undo_like/run.ipynb | 1 + demo/chatbot_retry_undo_like/run.py | 64 ++++++++ gradio/__init__.py | 2 + gradio/chat_interface.py | 124 ++++++++------- gradio/events.py | 65 ++++++++ ...2_creating-a-custom-chatbot-with-blocks.md | 29 ---- .../05_chatbots/04_chatbot-specific-events.md | 142 ++++++++++++++++++ ...eating-a-discord-bot-from-a-gradio-app.md} | 0 js/chatbot/Index.svelte | 9 +- js/chatbot/shared/ChatBot.svelte | 48 +++--- js/chatbot/shared/utils.ts | 5 + .../test_chatinterface_streaming_echo.spec.ts | 2 +- 13 files changed, 386 insertions(+), 111 deletions(-) create mode 100644 .changeset/nine-hotels-juggle.md create mode 100644 demo/chatbot_retry_undo_like/run.ipynb create mode 100644 demo/chatbot_retry_undo_like/run.py create mode 100644 guides/05_chatbots/04_chatbot-specific-events.md rename guides/05_chatbots/{04_creating-a-discord-bot-from-a-gradio-app.md => 05_creating-a-discord-bot-from-a-gradio-app.md} (100%) diff --git a/.changeset/nine-hotels-juggle.md b/.changeset/nine-hotels-juggle.md new file mode 100644 index 0000000000000..d0ca3d059de35 --- /dev/null +++ b/.changeset/nine-hotels-juggle.md @@ -0,0 +1,6 @@ +--- +"@gradio/chatbot": minor +"gradio": minor +--- + +feat:9227 chatinterface retry bug diff --git a/demo/chatbot_retry_undo_like/run.ipynb b/demo/chatbot_retry_undo_like/run.ipynb new file mode 100644 index 0000000000000..f43836b9e365e --- /dev/null +++ b/demo/chatbot_retry_undo_like/run.ipynb @@ -0,0 +1 @@ +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatbot_retry_undo_like"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["from huggingface_hub import InferenceClient\n", "import gradio as gr\n", "\n", "client = InferenceClient()\n", "\n", "def respond(\n", " prompt: str,\n", " history,\n", "):\n", " if not history:\n", " history = [{\"role\": \"system\", \"content\": \"You are a friendly chatbot\"}]\n", " history.append({\"role\": \"user\", \"content\": prompt})\n", "\n", " yield history\n", "\n", " response = {\"role\": \"assistant\", \"content\": \"\"}\n", " for message in client.chat_completion( # type: ignore\n", " history,\n", " temperature=0.95,\n", " top_p=0.9,\n", " max_tokens=512,\n", " stream=True,\n", " model=\"HuggingFaceH4/zephyr-7b-beta\"\n", " ):\n", " response[\"content\"] += message.choices[0].delta.content or \"\"\n", " yield history + [response]\n", "\n", "\n", "def handle_undo(history, undo_data: gr.UndoData):\n", " return history[:undo_data.index], history[undo_data.index]['content']\n", "\n", "def handle_retry(history, retry_data: gr.RetryData):\n", " new_history = history[:retry_data.index]\n", " previous_prompt = history[retry_data.index]['content']\n", " yield from respond(previous_prompt, new_history)\n", "\n", "\n", "def handle_like(data: gr.LikeData):\n", " if data.liked:\n", " print(\"You upvoted this response: \", data.value)\n", " else:\n", " print(\"You downvoted this response: \", data.value)\n", "\n", "\n", "with gr.Blocks() as demo:\n", " gr.Markdown(\"# Chat with Hugging Face Zephyr 7b \ud83e\udd17\")\n", " chatbot = gr.Chatbot(\n", " label=\"Agent\",\n", " type=\"messages\",\n", " avatar_images=(\n", " None,\n", " \"https://em-content.zobj.net/source/twitter/376/hugging-face_1f917.png\",\n", " ),\n", " )\n", " prompt = gr.Textbox(max_lines=1, label=\"Chat Message\")\n", " prompt.submit(respond, [prompt, chatbot], [chatbot])\n", " prompt.submit(lambda: \"\", None, [prompt])\n", " chatbot.undo(handle_undo, chatbot, [chatbot, prompt])\n", " chatbot.retry(handle_retry, chatbot, [chatbot])\n", " chatbot.like(handle_like, None, None)\n", "\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/chatbot_retry_undo_like/run.py b/demo/chatbot_retry_undo_like/run.py new file mode 100644 index 0000000000000..5defaec383e7c --- /dev/null +++ b/demo/chatbot_retry_undo_like/run.py @@ -0,0 +1,64 @@ +from huggingface_hub import InferenceClient +import gradio as gr + +client = InferenceClient() + +def respond( + prompt: str, + history, +): + if not history: + history = [{"role": "system", "content": "You are a friendly chatbot"}] + history.append({"role": "user", "content": prompt}) + + yield history + + response = {"role": "assistant", "content": ""} + for message in client.chat_completion( # type: ignore + history, + temperature=0.95, + top_p=0.9, + max_tokens=512, + stream=True, + model="HuggingFaceH4/zephyr-7b-beta" + ): + response["content"] += message.choices[0].delta.content or "" + yield history + [response] + + +def handle_undo(history, undo_data: gr.UndoData): + return history[:undo_data.index], history[undo_data.index]['content'] + +def handle_retry(history, retry_data: gr.RetryData): + new_history = history[:retry_data.index] + previous_prompt = history[retry_data.index]['content'] + yield from respond(previous_prompt, new_history) + + +def handle_like(data: gr.LikeData): + if data.liked: + print("You upvoted this response: ", data.value) + else: + print("You downvoted this response: ", data.value) + + +with gr.Blocks() as demo: + gr.Markdown("# Chat with Hugging Face Zephyr 7b 🤗") + chatbot = gr.Chatbot( + label="Agent", + type="messages", + avatar_images=( + None, + "https://em-content.zobj.net/source/twitter/376/hugging-face_1f917.png", + ), + ) + prompt = gr.Textbox(max_lines=1, label="Chat Message") + prompt.submit(respond, [prompt, chatbot], [chatbot]) + prompt.submit(lambda: "", None, [prompt]) + chatbot.undo(handle_undo, chatbot, [chatbot, prompt]) + chatbot.retry(handle_retry, chatbot, [chatbot]) + chatbot.like(handle_like, None, None) + + +if __name__ == "__main__": + demo.launch() diff --git a/gradio/__init__.py b/gradio/__init__.py index 86051cf1fac22..5538d75cd36b3 100644 --- a/gradio/__init__.py +++ b/gradio/__init__.py @@ -68,7 +68,9 @@ EventData, KeyUpData, LikeData, + RetryData, SelectData, + UndoData, on, ) from gradio.exceptions import Error diff --git a/gradio/chat_interface.py b/gradio/chat_interface.py index d9600308ec9fd..acc1479e8d7f3 100644 --- a/gradio/chat_interface.py +++ b/gradio/chat_interface.py @@ -26,8 +26,8 @@ get_component_instance, ) from gradio.components.chatbot import FileDataDict, Message, MessageDict, TupleFormat -from gradio.components.multimodal_textbox import MultimodalData -from gradio.events import Dependency, on +from gradio.components.multimodal_textbox import MultimodalPostprocess +from gradio.events import Dependency from gradio.helpers import create_examples as Examples # noqa: N812 from gradio.helpers import special_args, update from gradio.layouts import Accordion, Group, Row @@ -258,6 +258,7 @@ def __init__( self.chatbot_state = ( State(self.chatbot.value) if self.chatbot.value else State([]) ) + self.previous_input = State(value=[]) self.show_progress = show_progress self._setup_events() self._setup_api() @@ -267,14 +268,12 @@ def _setup_events(self) -> None: submit_triggers = [self.textbox.submit] submit_event = ( - on( - submit_triggers, + self.textbox.submit( self._clear_and_save_textbox, - [self.textbox], - [self.textbox, self.saved_input], + [self.textbox, self.previous_input], + [self.textbox, self.saved_input, self.previous_input], show_api=False, queue=False, - preprocess=False, ) .then( self._display_input, @@ -344,22 +343,10 @@ def _setup_events(self) -> None: ) self._setup_stop_events([self.chatbot.retry], retry_event) - async def format_textbox(data: str | MultimodalData) -> str | dict: - if isinstance(data, MultimodalData): - return {"text": data.text, "files": [x.path for x in data.files]} - else: - return data - self.chatbot.undo( - self._delete_prev_fn, - [self.saved_input, self.chatbot], - [self.chatbot, self.saved_input], - show_api=False, - queue=False, - ).then( - format_textbox, - [self.saved_input], - [self.textbox], + self._undo_msg, + [self.previous_input, self.chatbot], + [self.chatbot, self.textbox, self.saved_input, self.previous_input], show_api=False, queue=False, ) @@ -445,46 +432,62 @@ async def api_fn(message, history, *args, **kwargs): ) def _clear_and_save_textbox( - self, message: str | dict - ) -> tuple[Textbox | MultimodalTextbox, str | MultimodalData]: + self, + message: str | MultimodalPostprocess, + previous_input: list[str | MultimodalPostprocess], + ) -> tuple[ + Textbox | MultimodalTextbox, + str | MultimodalPostprocess, + list[str | MultimodalPostprocess], + ]: if self.multimodal: - return MultimodalTextbox( - "", interactive=False, placeholder="" - ), MultimodalData(**cast(dict, message)) + previous_input += [message] + return ( + MultimodalTextbox("", interactive=False, placeholder=""), + message, + previous_input, + ) else: - return Textbox("", interactive=False, placeholder=""), cast(str, message) + previous_input += [message] + return ( + Textbox("", interactive=False, placeholder=""), + message, + previous_input, + ) def _append_multimodal_history( self, - message: MultimodalData, + message: MultimodalPostprocess, response: MessageDict | str | None, history: list[MessageDict] | TupleFormat, ): if self.type == "tuples": - for x in message.files: - history.append([(x.path,), None]) # type: ignore - if message.text is None or not isinstance(message.text, str): + for x in message["files"]: + history.append([(x,), None]) # type: ignore + if message["text"] is None or not isinstance(message["text"], str): return - elif message.text == "" and message.files != []: + elif message["text"] == "" and message["files"] != []: history.append([None, response]) # type: ignore else: - history.append([message.text, cast(str, response)]) # type: ignore + history.append([message["text"], cast(str, response)]) # type: ignore else: - for x in message.files: + for x in message["files"]: history.append( {"role": "user", "content": cast(FileDataDict, x.model_dump())} # type: ignore ) - if message.text is None or not isinstance(message.text, str): + if message["text"] is None or not isinstance(message["text"], str): return else: - history.append({"role": "user", "content": message.text}) # type: ignore + history.append({"role": "user", "content": message["text"]}) # type: ignore if response: history.append(cast(MessageDict, response)) # type: ignore async def _display_input( - self, message: str | MultimodalData, history: TupleFormat | list[MessageDict] + self, + message: str | MultimodalPostprocess, + history: TupleFormat | list[MessageDict], ) -> tuple[TupleFormat, TupleFormat] | tuple[list[MessageDict], list[MessageDict]]: - if self.multimodal and isinstance(message, MultimodalData): + if self.multimodal and isinstance(message, dict): self._append_multimodal_history(message, None, history) elif isinstance(message, str) and self.type == "tuples": history.append([message, None]) # type: ignore @@ -503,17 +506,15 @@ def response_as_dict(self, response: MessageDict | Message | str) -> MessageDict def _process_msg_and_trim_history( self, - message: str | MultimodalData, + message: str | MultimodalPostprocess, history_with_input: TupleFormat | list[MessageDict], - ) -> tuple[str | dict, TupleFormat | list[MessageDict]]: - if isinstance(message, MultimodalData): - remove_input = len(message.files) + int(message.text is not None) + ) -> tuple[str | MultimodalPostprocess, TupleFormat | list[MessageDict]]: + if isinstance(message, dict): + remove_input = len(message["files"]) + int(message["text"] is not None) history = history_with_input[:-remove_input] - message_serialized = message.model_dump() else: history = history_with_input[:-1] - message_serialized = message - return message_serialized, history + return message, history def _append_history(self, history, message, first_response=True): if self.type == "tuples": @@ -527,7 +528,7 @@ def _append_history(self, history, message, first_response=True): async def _submit_fn( self, - message: str | MultimodalData, + message: str | MultimodalPostprocess, history_with_input: TupleFormat | list[MessageDict], request: Request, *args, @@ -552,7 +553,7 @@ async def _submit_fn( async def _stream_fn( self, - message: str | MultimodalData, + message: str | MultimodalPostprocess, history_with_input: TupleFormat | list[MessageDict], request: Request, *args, @@ -620,21 +621,28 @@ async def _examples_stream_fn( async def _delete_prev_fn( self, - message: str | MultimodalData | None, + message: str | MultimodalPostprocess | None, history: list[MessageDict] | TupleFormat, - ) -> tuple[ - list[MessageDict] | TupleFormat, - str | MultimodalData, - list[MessageDict] | TupleFormat, - ]: + ) -> tuple[list[MessageDict] | TupleFormat, str | MultimodalPostprocess]: extra = 1 if self.type == "messages" else 0 - if self.multimodal and isinstance(message, MultimodalData): + if self.multimodal and isinstance(message, dict): remove_input = ( - len(message.files) + 1 - if message.text is not None - else len(message.files) + len(message["files"]) + 1 + if message["text"] is not None + else len(message["files"]) ) + extra history = history[:-remove_input] else: history = history[: -(1 + extra)] return history, message or "" # type: ignore + + async def _undo_msg( + self, + previous_input: list[str | MultimodalPostprocess], + history: list[MessageDict] | TupleFormat, + ): + msg = previous_input.pop() + + history, msg = await self._delete_prev_fn(msg, history) + previous_msg = previous_input[-1] if len(previous_input) else msg + return history, msg, previous_msg, previous_input diff --git a/gradio/events.py b/gradio/events.py index ae75499fcd7ab..8499a54aaf17e 100644 --- a/gradio/events.py +++ b/gradio/events.py @@ -306,6 +306,71 @@ def __init__(self, target: Block | None, data: Any): """ +@document() +class RetryData(EventData): + """ + The gr.RetryData class is a subclass of gr.Event data that specifically carries information about the `.retry()` event. When gr.RetryData + is added as a type hint to an argument of an event listener method, a gr.RetryData object will automatically be passed as the value of that argument. + The attributes of this object contains information about the event that triggered the listener. + Example: + import gradio as gr + + def retry(retry_data: gr.RetryData, history: list[gr.MessageDict]): + history_up_to_retry = history[:retry_data.index] + new_response = "" + for token in api.chat_completion(history): + new_response += token + yield history + [new_response] + + with gr.Blocks() as demo: + chatbot = gr.Chatbot() + chatbot.retry(retry, chatbot, chatbot) + demo.launch() + """ + + def __init__(self, target: Block | None, data: Any): + super().__init__(target, data) + self.index: int | tuple[int, int] = data["index"] + """ + The index of the user message that should be retried. + """ + self.value: Any = data["value"] + """ + The value of the user message that should be retried. + """ + + +@document() +class UndoData(EventData): + """ + The gr.UndoData class is a subclass of gr.Event data that specifically carries information about the `.undo()` event. When gr.UndoData + is added as a type hint to an argument of an event listener method, a gr.UndoData object will automatically be passed as the value of that argument. + The attributes of this object contains information about the event that triggered the listener. + Example: + import gradio as gr + + def undo(retry_data: gr.UndoData, history: list[gr.MessageDict]): + history_up_to_retry = history[:retry_data.index] + return history_up_to_retry + + with gr.Blocks() as demo: + chatbot = gr.Chatbot() + chatbot.undo(undo, chatbot, chatbot) + demo.launch() + """ + + def __init__(self, target: Block | None, data: Any): + super().__init__(target, data) + self.index: int | tuple[int, int] = data["index"] + """ + The index of the user message that should be undone. + """ + self.value: Any = data["value"] + """ + The value of the user message that should be undone. + """ + + @dataclasses.dataclass class EventListenerMethod: block: Block | None diff --git a/guides/05_chatbots/02_creating-a-custom-chatbot-with-blocks.md b/guides/05_chatbots/02_creating-a-custom-chatbot-with-blocks.md index 7efe42a54df4f..8abb22ed609e6 100644 --- a/guides/05_chatbots/02_creating-a-custom-chatbot-with-blocks.md +++ b/guides/05_chatbots/02_creating-a-custom-chatbot-with-blocks.md @@ -60,35 +60,6 @@ Of course, in practice, you would replace `bot()` with your own more complex fun Finally, we enable queuing by running `demo.queue()`, which is required for streaming intermediate outputs. You can try the improved chatbot by scrolling to the demo at the top of this page. -## Liking / Disliking Chat Messages - -Once you've created your `gr.Chatbot`, you can add the ability for users to like or dislike messages. This can be useful if you would like users to vote on a bot's responses or flag inappropriate results. - -To add this functionality to your Chatbot, simply attach a `.like()` event to your Chatbot. A chatbot that has the `.like()` event will automatically feature a thumbs-up icon and a thumbs-down icon next to every bot message. - -The `.like()` method requires you to pass in a function that is called when a user clicks on these icons. In your function, you should have an argument whose type is `gr.LikeData`. Gradio will automatically supply the parameter to this argument with an object that contains information about the liked or disliked message. Here's a simplistic example of how you can have users like or dislike chat messages: - -```py -import gradio as gr - -def greet(history, msg): - return history + {"role": "user", "content": msg} + {"role": "assistant", "content": "Hello, " + msg} - -def vote(data: gr.LikeData): - if data.liked: - print("You upvoted this response: " + data.value["value"]) - else: - print("You downvoted this response: " + data.value["value"]) - - -with gr.Blocks() as demo: - chatbot = gr.Chatbot(type="messages") - textbox = gr.Textbox() - textbox.submit(greet, [chatbot, textbox], [chatbot]) - chatbot.like(vote, None, None) # Adding this line causes the like/dislike icons to appear in your chatbot - -demo.launch() -``` ## Adding Markdown, Images, Audio, or Videos diff --git a/guides/05_chatbots/04_chatbot-specific-events.md b/guides/05_chatbots/04_chatbot-specific-events.md new file mode 100644 index 0000000000000..b56907692723d --- /dev/null +++ b/guides/05_chatbots/04_chatbot-specific-events.md @@ -0,0 +1,142 @@ +# Liking, Retrying and Undoing Messages + +Tags: LLM, CHAT + +Users expect modern chatbot UIs to let them easily interact with individual chat messages: for example, users might want to retry message generations, undo messages, or click on a like/dislike button to upvote or downvote a generated message. + +Thankfully, the Gradio Chatbot exposes three events, `.retry`, `.undo`, and `like`, to let you build this functionality into your application. As an application developer, you can attach functions to any of these event, allowing you to run arbitrary Python functions e.g. when a user interacts with a message. + +In this demo, we'll build a UI that implements these events. You can see our finished demo deployed on Hugging Face spaces here: + +$demo_chatbot_retry_undo_like + +Tip: `gr.ChatInterface` automatically uses the `retry` and `.undo` events so it's best to start there in order get a fully working application quickly. + + +## The UI + +First, we'll build the UI without handling these events and build from there. +We'll use the Hugging Face InferenceClient in order to get started without setting up +any API keys. + +This is what the first draft of our application looks like: + +```python +from huggingface_hub import InferenceClient +import gradio as gr + +client = InferenceClient() + +def respond( + prompt: str, + history, +): + if not history: + history = [{"role": "system", "content": "You are a friendly chatbot"}] + history.append({"role": "user", "content": prompt}) + + yield history + + response = {"role": "assistant", "content": ""} + for message in client.chat_completion( + history, + temperature=0.95, + top_p=0.9, + max_tokens=512, + stream=True, + model="HuggingFaceH4/zephyr-7b-beta" + ): + response["content"] += message.choices[0].delta.content or "" + + yield history + [response] + + +with gr.Blocks() as demo: + gr.Markdown("# Chat with Hugging Face Zephyr 7b 🤗") + chatbot = gr.Chatbot( + label="Agent", + type="messages", + avatar_images=( + None, + "https://em-content.zobj.net/source/twitter/376/hugging-face_1f917.png", + ), + ) + prompt = gr.Textbox(max_lines=1, label="Chat Message") + prompt.submit(respond, [prompt, chatbot], [chatbot]) + prompt.submit(lambda: "", None, [prompt]) + + +if __name__ == "__main__": + demo.launch() +``` + +## The Undo Event + +Our undo event will populate the textbox with the previous user message and also remove all subsequent assistant responses. + +In order to know the index of the last user message, we can pass `gr.UndoData` to our event handler function like so: + +``python +def handle_undo(history, undo_data: gr.UndoData): + return history[:undo_data.index], history[undo_data.index]['content'] +``` + +We then pass this function to the `undo` event! + +```python + chatbot.undo(handle_undo, chatbot, [chatbot, prompt]) +``` + +You'll notice that every bot response will now have an "undo icon" you can use to undo the response - + +![undo_event](https://github.com/user-attachments/assets/180b5302-bc4a-4c3e-903c-f14ec2adcaa6) + +Tip: You can also access the content of the user message with `undo_data.value` + +## The Retry Event + +The retry event will work similarly. We'll use `gr.RetryData` to get the index of the previous user message and remove all the subsequent messages from the history. Then we'll use the `respond` function to generate a new response. We could also get the previous prompt via the `value` property of `gr.RetryData`. + +```python +def handle_retry(history, retry_data: gr.RetryData): + new_history = history[:retry_data.index] + previous_prompt = history[retry_data.index]['content'] + yield from respond(previous_prompt, new_history) + +... + +chatbot.retry(handle_retry, chatbot, [chatbot]) +``` + +You'll see that the bot messages have a "retry" icon now - + +![retry_event](https://github.com/user-attachments/assets/cec386a7-c4cd-4fb3-a2d7-78fd806ceac6) + +Tip: The Hugging Face inference API caches responses, so in this demo, the retry button will not generate a new response. + +## The Like Event + +By now you should hopefully be seeing the pattern! +To let users like a message, we'll add a `.like` event to our chatbot. +We'll pass it a function that accepts a `gr.LikeData` object. +In this case, we'll just print the message that was either liked or disliked. + +```python +def handle_like(data: gr.LikeData): + if data.liked: + print("You upvoted this response: ", data.value) + else: + print("You downvoted this response: ", data.value) + +... + +chatbot.like(vote, None, None) +``` + + +## Conclusion + +That's it! You now know how you can implement the retry, undo, and like events for the Chatbot. + + + diff --git a/guides/05_chatbots/04_creating-a-discord-bot-from-a-gradio-app.md b/guides/05_chatbots/05_creating-a-discord-bot-from-a-gradio-app.md similarity index 100% rename from guides/05_chatbots/04_creating-a-discord-bot-from-a-gradio-app.md rename to guides/05_chatbots/05_creating-a-discord-bot-from-a-gradio-app.md diff --git a/js/chatbot/Index.svelte b/js/chatbot/Index.svelte index ff2b13b644264..0ed6bed2f1d62 100644 --- a/js/chatbot/Index.svelte +++ b/js/chatbot/Index.svelte @@ -6,6 +6,7 @@ import type { Gradio, SelectData, LikeData } from "@gradio/utils"; import ChatBot from "./shared/ChatBot.svelte"; + import type { UndoRetryData } from "./shared/utils"; import { Block, BlockLabel } from "@gradio/atoms"; import type { LoadingStatus } from "@gradio/statustracker"; import { Chat } from "@gradio/icons"; @@ -50,8 +51,8 @@ error: string; like: LikeData; clear_status: LoadingStatus; - retry: null; - undo: null; + retry: UndoRetryData; + undo: UndoRetryData; clear: null; }>; export let avatar_images: [FileData | null, FileData | null] = [null, null]; @@ -123,8 +124,8 @@ on:like={(e) => gradio.dispatch("like", e.detail)} on:share={(e) => gradio.dispatch("share", e.detail)} on:error={(e) => gradio.dispatch("error", e.detail)} - on:retry={() => gradio.dispatch("retry")} - on:undo={() => gradio.dispatch("undo")} + on:retry={(e) => gradio.dispatch("retry", e.detail)} + on:undo={(e) => gradio.dispatch("undo", e.detail)} on:clear={() => { value = []; gradio.dispatch("clear"); diff --git a/js/chatbot/shared/ChatBot.svelte b/js/chatbot/shared/ChatBot.svelte index 4792fa5df1828..fdee3c6804b1a 100644 --- a/js/chatbot/shared/ChatBot.svelte +++ b/js/chatbot/shared/ChatBot.svelte @@ -1,5 +1,9 @@ + +
diff --git a/js/chatbot/Index.svelte b/js/chatbot/Index.svelte index 0ed6bed2f1d62..58d044ef76e87 100644 --- a/js/chatbot/Index.svelte +++ b/js/chatbot/Index.svelte @@ -156,4 +156,8 @@ width: 100%; height: 100%; } + + :global(.progress-text) { + right: auto; + } diff --git a/js/chatbot/shared/ActionButton.svelte b/js/chatbot/shared/ActionButton.svelte deleted file mode 100644 index 35fc3bc3553b1..0000000000000 --- a/js/chatbot/shared/ActionButton.svelte +++ /dev/null @@ -1,44 +0,0 @@ - - - - - diff --git a/js/chatbot/shared/ButtonPanel.svelte b/js/chatbot/shared/ButtonPanel.svelte index a3c55a2d38835..88f42c5694c9f 100644 --- a/js/chatbot/shared/ButtonPanel.svelte +++ b/js/chatbot/shared/ButtonPanel.svelte @@ -6,9 +6,8 @@ import { DownloadLink } from "@gradio/wasm/svelte"; import type { NormalisedMessage, TextMessage } from "../types"; import { is_component_message } from "./utils"; - import ActionButton from "./ActionButton.svelte"; - import { Retry } from "@gradio/icons"; - import Remove from "./Remove.svelte"; + import { Retry, Undo } from "@gradio/icons"; + import { IconButtonWrapper, IconButton } from "@gradio/atoms"; export let likeable: boolean; export let _retryable: boolean; @@ -51,96 +50,57 @@ {#if show}
- {#if show_copy} - - {/if} - {#if show_download && !Array.isArray(message) && is_component_message(message)} - - - - - - {/if} - {#if _retryable} - - - - {/if} - {#if _undoable} - - - - {/if} - {#if likeable} - - {/if} + + {#if show_copy} + + {/if} + {#if show_download && !Array.isArray(message) && is_component_message(message)} + + + + {/if} + {#if _retryable} + handle_action("retry")} + disabled={disable} + /> + {/if} + {#if _undoable} + handle_action("undo")} + disabled={disable} + /> + {/if} + {#if likeable} + + {/if} +
{/if} + label={copied ? "Copied message" : "Copy message"} + Icon={copied ? Check : Copy} +/> diff --git a/js/chatbot/shared/CopyAll.svelte b/js/chatbot/shared/CopyAll.svelte index b6538e957ef30..aa05eecb0561d 100644 --- a/js/chatbot/shared/CopyAll.svelte +++ b/js/chatbot/shared/CopyAll.svelte @@ -2,6 +2,7 @@ import { onDestroy } from "svelte"; import { Copy, Check } from "@gradio/icons"; import type { NormalisedMessage } from "../types"; + import { IconButton } from "@gradio/atoms"; let copied = false; export let value: NormalisedMessage[] | null; @@ -45,29 +46,8 @@ }); - - - + label={copied ? "Copied conversation" : "Copy conversation"} +> diff --git a/js/chatbot/shared/LikeDislike.svelte b/js/chatbot/shared/LikeDislike.svelte index 133bf35b7d7cc..71fbdab2ae150 100644 --- a/js/chatbot/shared/LikeDislike.svelte +++ b/js/chatbot/shared/LikeDislike.svelte @@ -1,68 +1,35 @@ - +/> - - - +/> diff --git a/js/icons/src/Retry.svelte b/js/icons/src/Retry.svelte index d611df60d8efd..9d61bdd9c504e 100644 --- a/js/icons/src/Retry.svelte +++ b/js/icons/src/Retry.svelte @@ -1,12 +1,36 @@ + + + + + diff --git a/js/icons/src/Undo.svelte b/js/icons/src/Undo.svelte index 2e1ab9da6eddd..f5df83d669535 100644 --- a/js/icons/src/Undo.svelte +++ b/js/icons/src/Undo.svelte @@ -1,11 +1,12 @@ Date: Fri, 13 Sep 2024 16:15:11 +0100 Subject: [PATCH 08/38] Added gradio-in-r (#9340) * Added gradio-in-r * add changeset * section * remove * tweaks * delete changeset * R * Updated using-gradio-in-other-programming-languages.md --------- Co-authored-by: Abubakar Abid Co-authored-by: gradio-pr-bot --- ...g-gradio-in-other-programming-languages.md | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 guides/10_other-tutorials/using-gradio-in-other-programming-languages.md diff --git a/guides/10_other-tutorials/using-gradio-in-other-programming-languages.md b/guides/10_other-tutorials/using-gradio-in-other-programming-languages.md new file mode 100644 index 0000000000000..6be3ab36ea35a --- /dev/null +++ b/guides/10_other-tutorials/using-gradio-in-other-programming-languages.md @@ -0,0 +1,176 @@ +# Using Gradio in Other Programming Languages + +The core `gradio` library is a Python library. But you can also use `gradio` to create UIs around programs written in other languages, thanks to Python's ability to interface with external processes. Using Python's `subprocess` module, you can call programs written in C++, Rust, or virtually any other language, allowing `gradio` to become a flexible UI layer for non-Python applications. + +In this post, we'll walk through how to integrate `gradio` with C++ and Rust, using Python's `subprocess` module to invoke code written in these languages. We'll also discuss how to use Gradio with R, which is even easier, thanks to the [reticulate](https://rstudio.github.io/reticulate/) R package, which makes it possible to install and import Python modules in R. + +## Using Gradio with C++ + +Let’s start with a simple example of integrating a C++ program into a Gradio app. Suppose we have the following C++ program that adds two numbers: + +```cpp +// add.cpp +#include + +int main() { + double a, b; + std::cin >> a >> b; + std::cout << a + b << std::endl; + return 0; +} +``` + +This program reads two numbers from standard input, adds them, and outputs the result. + +We can build a Gradio interface around this C++ program using Python's `subprocess` module. Here’s the corresponding Python code: + +```python +import gradio as gr +import subprocess + +def add_numbers(a, b): + process = subprocess.Popen( + ['./add'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + output, error = process.communicate(input=f"{a} {b}\n".encode()) + + if error: + return f"Error: {error.decode()}" + return float(output.decode().strip()) + +demo = gr.Interface( + fn=add_numbers, + inputs=[gr.Number(label="Number 1"), gr.Number(label="Number 2")], + outputs=gr.Textbox(label="Result") +) + +demo.launch() +``` + +Here, `subprocess.Popen` is used to execute the compiled C++ program (`add`), pass the input values, and capture the output. You can compile the C++ program by running: + +```bash +g++ -o add add.cpp +``` + +This example shows how easy it is to call C++ from Python using `subprocess` and build a Gradio interface around it. + +## Using Gradio with Rust + +Now, let’s move to another example: calling a Rust program to apply a sepia filter to an image. The Rust code could look something like this: + +```rust +// sepia.rs +extern crate image; + +use image::{GenericImageView, ImageBuffer, Rgba}; + +fn sepia_filter(input: &str, output: &str) { + let img = image::open(input).unwrap(); + let (width, height) = img.dimensions(); + let mut img_buf = ImageBuffer::new(width, height); + + for (x, y, pixel) in img.pixels() { + let (r, g, b, a) = (pixel[0] as f32, pixel[1] as f32, pixel[2] as f32, pixel[3]); + let tr = (0.393 * r + 0.769 * g + 0.189 * b).min(255.0); + let tg = (0.349 * r + 0.686 * g + 0.168 * b).min(255.0); + let tb = (0.272 * r + 0.534 * g + 0.131 * b).min(255.0); + img_buf.put_pixel(x, y, Rgba([tr as u8, tg as u8, tb as u8, a])); + } + + img_buf.save(output).unwrap(); +} + +fn main() { + let args: Vec = std::env::args().collect(); + if args.len() != 3 { + eprintln!("Usage: sepia "); + return; + } + sepia_filter(&args[1], &args[2]); +} +``` + +This Rust program applies a sepia filter to an image. It takes two command-line arguments: the input image path and the output image path. You can compile this program using: + +```bash +cargo build --release +``` + +Now, we can call this Rust program from Python and use Gradio to build the interface: + +```python +import gradio as gr +import subprocess + +def apply_sepia(input_path): + output_path = "output.png" + + process = subprocess.Popen( + ['./target/release/sepia', input_path, output_path], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + process.wait() + + return output_path + +demo = gr.Interface( + fn=apply_sepia, + inputs=gr.Image(type="filepath", label="Input Image"), + outputs=gr.Image(label="Sepia Image") +) + +demo.launch() +``` + +Here, when a user uploads an image and clicks submit, Gradio calls the Rust binary (`sepia`) to process the image, and returns the sepia-filtered output to Gradio. + +This setup showcases how you can integrate performance-critical or specialized code written in Rust into a Gradio interface. + +## Using Gradio with R (via `reticulate`) + +Integrating Gradio with R is particularly straightforward thanks to the `reticulate` package, which allows you to run Python code directly in R. Let’s walk through an example of using Gradio in R. + +**Installation** + +First, you need to install the `reticulate` package in R: + +```r +install.packages("reticulate") +``` + + +Once installed, you can use the package to run Gradio directly from within an R script. + + +```r +library(reticulate) + +py_install("gradio", pip = TRUE) + +gr <- import("gradio") # import gradio as gr +``` + +**Building a Gradio Application** + +With gradio installed and imported, we now have access to gradio's app building methods. Let's build a simple app for an R function that returns a greeting + +```r +greeting <- \(name) paste("Hello", name) + +app <- gr$Interface( + fn = greeting, + inputs = gr$Text(label = "Name"), + outputs = gr$Text(label = "Greeting"), + title = "Hello! 😃 👋" +) + +app$launch(server_name = "localhost", + server_port = as.integer(3000)) +``` + +Credit to [@IfeanyiIdiaye](https://github.com/Ifeanyi55) for contributing this section. You can see more examples [here](https://github.com/Ifeanyi55/Gradio-in-R/tree/main/Code), including using Gradio Blocks to build a machine learning application in R. From 2e034c6e7239860f42b3f8d26760ecf63663e912 Mon Sep 17 00:00:00 2001 From: "Yuichiro Tachibana (Tsuchiya)" Date: Fri, 13 Sep 2024 16:35:25 +0100 Subject: [PATCH 09/38] Enhance Lite E2E tests and fix a networking problem on Lite (#9333) * Add Lite E2E test to check a matplotlib problem which was fixed in https://github.com/gradio-app/gradio/pull/9312 * Restore js/app/test/image_remote_url.spec.ts, which was deleted in https://github.com/gradio-app/gradio/pull/8716 * Fix tootils import * Format * Fix processing_utils.resolve_with_google_dns to use the HTTPX client instead of urllib so it works on Lite * add changeset * add changeset * Move js/app/test/image_remote_url.spec.ts -> js/spa/test/image_remote_url.spec.ts * Use pyodide.http in resolve_with_google_dns on Lite --------- Co-authored-by: gradio-pr-bot --- .changeset/fancy-pianos-dig.md | 5 +++++ .config/playwright.config.js | 3 ++- gradio/processing_utils.py | 13 +++++++++--- js/spa/test/image_remote_url.spec.ts | 30 ++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 .changeset/fancy-pianos-dig.md create mode 100644 js/spa/test/image_remote_url.spec.ts diff --git a/.changeset/fancy-pianos-dig.md b/.changeset/fancy-pianos-dig.md new file mode 100644 index 0000000000000..291793f193082 --- /dev/null +++ b/.changeset/fancy-pianos-dig.md @@ -0,0 +1,5 @@ +--- +"gradio": minor +--- + +feat:Enhance Lite E2E tests and fix a networking problem on Lite diff --git a/.config/playwright.config.js b/.config/playwright.config.js index 546b36707b59e..6b5f971c16b67 100644 --- a/.config/playwright.config.js +++ b/.config/playwright.config.js @@ -51,7 +51,8 @@ const lite = defineConfig(base, { "**/file_component_events.spec.ts", "**/kitchen_sink.spec.ts", "**/gallery_component_events.spec.ts", - "**/image_remote_url.spec.ts" // To detect the bugs on Lite fixed in https://github.com/gradio-app/gradio/pull/8011 and https://github.com/gradio-app/gradio/pull/8026 + "**/image_remote_url.spec.ts", // To detect the bugs on Lite fixed in https://github.com/gradio-app/gradio/pull/8011 and https://github.com/gradio-app/gradio/pull/8026 + "**/outbreak_forecast.spec.ts" // To test matplotlib on Lite ], workers: 1, retries: 3, diff --git a/gradio/processing_utils.py b/gradio/processing_utils.py index 0e12cf6de13ec..b4ab9c4d3c3e2 100644 --- a/gradio/processing_utils.py +++ b/gradio/processing_utils.py @@ -10,7 +10,6 @@ import socket import subprocess import tempfile -import urllib.request import warnings from functools import lru_cache from io import BytesIO @@ -278,8 +277,16 @@ def save_file_to_cache(file_path: str | Path, cache_dir: str) -> str: def resolve_with_google_dns(hostname: str) -> str | None: url = f"https://dns.google/resolve?name={hostname}&type=A" - with urllib.request.urlopen(url) as response: - data = json.loads(response.read().decode()) + if wasm_utils.IS_WASM: + import pyodide.http + + content = pyodide.http.open_url(url) + data = json.load(content) + else: + import urllib.request + + with urllib.request.urlopen(url) as response: + data = json.loads(response.read().decode()) if data.get("Status") == 0 and "Answer" in data: for answer in data["Answer"]: diff --git a/js/spa/test/image_remote_url.spec.ts b/js/spa/test/image_remote_url.spec.ts new file mode 100644 index 0000000000000..6b74a27fbe749 --- /dev/null +++ b/js/spa/test/image_remote_url.spec.ts @@ -0,0 +1,30 @@ +import { test, expect } from "@self/tootils"; + +test("Image displays remote image correctly", async ({ page }) => { + const example_image = page.locator( + 'div.block:has(div.label:has-text("Examples")) img' + ); + const input_image = page.locator( + 'div.block:has(label:has-text("InputImage")) img' + ); + const loopback_image = page.locator( + 'div.block:has(label:has-text("Loopback")) img' + ); + const remote_output_image = page.locator( + 'div.block:has(label:has-text("RemoteImage")) img' + ); + const submit_button = page.locator('button:has-text("Submit")'); + + await expect(example_image).toHaveJSProperty("complete", true); + await expect(example_image).not.toHaveJSProperty("naturalWidth", 0); + + await expect(input_image).toHaveJSProperty("complete", true); + await expect(input_image).not.toHaveJSProperty("naturalWidth", 0); + + await submit_button.click(); + + await expect(loopback_image).toHaveJSProperty("complete", true); + await expect(loopback_image).not.toHaveJSProperty("naturalWidth", 0); + await expect(remote_output_image).toHaveJSProperty("complete", true); + await expect(remote_output_image).not.toHaveJSProperty("naturalWidth", 0); +}); From 4ddb5db2b25ebcfabfe9fdb87ce32adb65552d13 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Fri, 13 Sep 2024 11:22:25 -0700 Subject: [PATCH 10/38] Do not attach `content_disposition_type = "attachment"` headers for files explicitly allowed by developer (#9348) * changes * add changeset * format * fix type * type * add test --------- Co-authored-by: gradio-pr-bot --- .changeset/young-candles-stare.md | 5 +++++ gradio/blocks.py | 6 ++---- gradio/processing_utils.py | 18 +++++++++--------- gradio/routes.py | 11 +++++------ gradio/utils.py | 13 +++++++------ test/test_routes.py | 16 ++++++++++++++-- test/test_utils.py | 12 ++++++++---- 7 files changed, 50 insertions(+), 31 deletions(-) create mode 100644 .changeset/young-candles-stare.md diff --git a/.changeset/young-candles-stare.md b/.changeset/young-candles-stare.md new file mode 100644 index 0000000000000..1d290441f42ed --- /dev/null +++ b/.changeset/young-candles-stare.md @@ -0,0 +1,5 @@ +--- +"gradio": minor +--- + +feat:Do not attach `content_disposition_type = "attachment"` headers for files explicitly allowed by developer diff --git a/gradio/blocks.py b/gradio/blocks.py index b6e3cf32d9bad..991c7324d3179 100644 --- a/gradio/blocks.py +++ b/gradio/blocks.py @@ -99,8 +99,6 @@ if TYPE_CHECKING: # Only import for type checking (is False at runtime). - from fastapi.applications import FastAPI - from gradio.components.base import Component from gradio.renderable import Renderable @@ -2227,7 +2225,7 @@ def launch( _frontend: bool = True, enable_monitoring: bool | None = None, strict_cors: bool = True, - ) -> tuple[FastAPI, str, str]: + ) -> tuple[routes.App, str, str]: """ Launches a simple web server that serves the demo. Can also be used to create a public link used by anyone to access the demo from their browser by setting share=True. @@ -2253,7 +2251,7 @@ def launch( ssl_verify: If False, skips certificate validation which allows self-signed certificates to be used. quiet: If True, suppresses most print statements. show_api: If True, shows the api docs in the footer of the app. Default True. - allowed_paths: List of complete filepaths or parent directories that gradio is allowed to serve. Must be absolute paths. Warning: if you provide directories, any files in these directories or their subdirectories are accessible to all users of your app. Can be set by comma separated environment variable GRADIO_ALLOWED_PATHS. + allowed_paths: List of complete filepaths or parent directories that gradio is allowed to serve. Must be absolute paths. Warning: if you provide directories, any files in these directories or their subdirectories are accessible to all users of your app. Can be set by comma separated environment variable GRADIO_ALLOWED_PATHS. These files are generally assumed to be secure and will be displayed in the browser when possible. blocked_paths: List of complete filepaths or parent directories that gradio is not allowed to serve (i.e. users of your app are not allowed to access). Must be absolute paths. Warning: takes precedence over `allowed_paths` and all other directories exposed by Gradio by default. Can be set by comma separated environment variable GRADIO_BLOCKED_PATHS. root_path: The root path (or "mount point") of the application, if it's not served from the root ("/") of the domain. Often used when the application is behind a reverse proxy that forwards requests to the application. For example, if the application is served at "https://example.com/myapp", the `root_path` should be set to "/myapp". A full URL beginning with http:// or https:// can be provided, which will be used as the root path in its entirety. Can be set by environment variable GRADIO_ROOT_PATH. Defaults to "". app_kwargs: Additional keyword arguments to pass to the underlying FastAPI app as a dictionary of parameter keys and argument values. For example, `{"docs_url": "/docs"}` diff --git a/gradio/processing_utils.py b/gradio/processing_utils.py index b4ab9c4d3c3e2..0e065f06c391b 100644 --- a/gradio/processing_utils.py +++ b/gradio/processing_utils.py @@ -512,18 +512,18 @@ def _check_allowed(path: str | Path, check_in_upload_folder: bool): abs_path = utils.abspath(path) - # if check_in_upload_folder=True - # we are running this during pre-process - # in which case only files in the upload_folder (cache_dir) - # are accepted - allowed = [utils.get_upload_folder()] - if not check_in_upload_folder: - allowed += blocks.allowed_paths + [os.getcwd(), tempfile.gettempdir()] - + created_paths = [utils.get_upload_folder()] + # if check_in_upload_folder=True, we are running this during pre-process + # in which case only files in the upload_folder (cache_dir) are accepted + if check_in_upload_folder: + allowed_paths = [] + else: + allowed_paths = blocks.allowed_paths + [os.getcwd(), tempfile.gettempdir()] allowed, reason = utils.is_allowed_file( abs_path, blocked_paths=blocks.blocked_paths, - allowed_paths=allowed, + allowed_paths=allowed_paths, + created_paths=created_paths, ) if not allowed: msg = f"Cannot move {abs_path} to the gradio cache dir because " diff --git a/gradio/routes.py b/gradio/routes.py index cf97ed7e6b9c4..a98c799dbb0e5 100644 --- a/gradio/routes.py +++ b/gradio/routes.py @@ -574,19 +574,18 @@ async def file(path_or_url: str, request: fastapi.Request): from gradio.data_classes import _StaticFiles - allowed, _ = utils.is_allowed_file( + allowed, reason = utils.is_allowed_file( abs_path, blocked_paths=blocks.blocked_paths, - allowed_paths=blocks.allowed_paths - + [app.uploaded_file_dir, utils.get_cache_folder()] - + _StaticFiles.all_paths, + allowed_paths=blocks.allowed_paths + _StaticFiles.all_paths, + created_paths=[app.uploaded_file_dir, utils.get_cache_folder()], ) if not allowed: raise HTTPException(403, f"File not allowed: {path_or_url}.") mime_type, _ = mimetypes.guess_type(abs_path) - if mime_type in XSS_SAFE_MIMETYPES: - media_type = mime_type + if mime_type in XSS_SAFE_MIMETYPES or reason == "allowed": + media_type = mime_type or "application/octet-stream" content_disposition_type = "inline" else: media_type = "application/octet-stream" diff --git a/gradio/utils.py b/gradio/utils.py index 3086f1edd56d7..cb94221ca3ac0 100644 --- a/gradio/utils.py +++ b/gradio/utils.py @@ -1468,16 +1468,17 @@ def is_allowed_file( path: Path, blocked_paths: Sequence[str | Path], allowed_paths: Sequence[str | Path], -) -> tuple[bool, Literal["in_blocklist", "allowed", "not_created_or_allowed"]]: + created_paths: Sequence[str | Path], +) -> tuple[ + bool, Literal["in_blocklist", "allowed", "created", "not_created_or_allowed"] +]: in_blocklist = any( is_in_or_equal(path, blocked_path) for blocked_path in blocked_paths ) if in_blocklist: return False, "in_blocklist" - - in_allowedlist = any( - is_in_or_equal(path, allowed_path) for allowed_path in allowed_paths - ) - if in_allowedlist: + if any(is_in_or_equal(path, allowed_path) for allowed_path in allowed_paths): return True, "allowed" + if any(is_in_or_equal(path, created_path) for created_path in created_paths): + return True, "created" return False, "not_created_or_allowed" diff --git a/test/test_routes.py b/test/test_routes.py index 19162a4a40bcc..ff51880f76e12 100644 --- a/test/test_routes.py +++ b/test/test_routes.py @@ -273,10 +273,18 @@ def test_response_attachment_format(self): app, _, _ = io.launch( prevent_thread_lock=True, allowed_paths=[ - os.path.dirname(image_file.name), - os.path.dirname(html_file.name), + image_file.name, + html_file.name, ], ) + + html_file2 = tempfile.NamedTemporaryFile( + mode="w", delete=False, suffix=".html", dir=app.uploaded_file_dir + ) + html_file2.write("Hello, world!") + html_file2.flush() + html_file2_name = str(Path(app.uploaded_file_dir) / html_file2.name) + client = TestClient(app) file_response = client.get(f"{API_PREFIX}/file={image_file.name}") @@ -284,6 +292,10 @@ def test_response_attachment_format(self): assert "inline" in file_response.headers["Content-Disposition"] file_response = client.get(f"{API_PREFIX}/file={html_file.name}") + assert file_response.headers["Content-Type"] == "text/html; charset=utf-8" + assert "inline" in file_response.headers["Content-Disposition"] + + file_response = client.get(f"{API_PREFIX}/file={html_file2_name}") assert file_response.headers["Content-Type"] == "application/octet-stream" assert "attachment" in file_response.headers["Content-Disposition"] diff --git a/test/test_utils.py b/test/test_utils.py index 49bfc05f19010..faa0a5b805714 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -429,24 +429,26 @@ def test_is_in_or_equal_fuzzer(path_1, path_2): path=create_path_string(), blocked_paths=create_path_list(), allowed_paths=create_path_list(), + created_paths=create_path_list(), ) def test_is_allowed_file_fuzzer( path: Path, blocked_paths: Sequence[Path], allowed_paths: Sequence[Path], + created_paths: Sequence[Path], ): - result, reason = is_allowed_file(path, blocked_paths, allowed_paths) + result, reason = is_allowed_file(path, blocked_paths, allowed_paths, created_paths) assert isinstance(result, bool) assert reason in [ "in_blocklist", "allowed", "not_created_or_allowed", - "created_by_app", + "created", ] if result: - assert reason == "allowed" + assert reason in ("allowed", "created") elif reason == "in_blocklist": assert any(is_in_or_equal(path, blocked_path) for blocked_path in blocked_paths) elif reason == "not_created_or_allowed": @@ -456,6 +458,8 @@ def test_is_allowed_file_fuzzer( if reason == "allowed": assert any(is_in_or_equal(path, allowed_path) for allowed_path in allowed_paths) + elif reason == "created": + assert any(is_in_or_equal(path, created_path) for created_path in created_paths) @pytest.mark.parametrize( @@ -469,7 +473,7 @@ def test_is_allowed_file_fuzzer( ], ) def is_allowed_file_corner_cases(path, blocked_paths, allowed_paths, result): - assert is_allowed_file(path, blocked_paths, allowed_paths) == result + assert is_allowed_file(path, blocked_paths, allowed_paths, []) == result # Additional test for known edge cases From d04ab1849b16c9a7b4bda04ca3b3a41bfc42d1d7 Mon Sep 17 00:00:00 2001 From: Hannah Date: Fri, 13 Sep 2024 22:10:15 +0200 Subject: [PATCH 11/38] Fix overflowing markdown in Chatbot (#9260) * fix markdown overflowing table * add changeset * revert undo icon * add changeset * Revert "revert undo icon" This reverts commit 855b012a2083cc672783d6be1bc098677ab3cbbc. * add changeset --------- Co-authored-by: gradio-pr-bot --- .changeset/long-donkeys-hang.md | 6 ++++++ js/markdown/shared/MarkdownCode.svelte | 4 ++++ 2 files changed, 10 insertions(+) create mode 100644 .changeset/long-donkeys-hang.md diff --git a/.changeset/long-donkeys-hang.md b/.changeset/long-donkeys-hang.md new file mode 100644 index 0000000000000..cd3db25b6c7f3 --- /dev/null +++ b/.changeset/long-donkeys-hang.md @@ -0,0 +1,6 @@ +--- +"@gradio/markdown": minor +"gradio": minor +--- + +feat:Fix overflowing markdown in Chatbot diff --git a/js/markdown/shared/MarkdownCode.svelte b/js/markdown/shared/MarkdownCode.svelte index db600bc6c7f2d..f425ebe8c1e0d 100644 --- a/js/markdown/shared/MarkdownCode.svelte +++ b/js/markdown/shared/MarkdownCode.svelte @@ -188,4 +188,8 @@ span.md :global(.md-header-anchor > svg) { color: var(--body-text-color-subdued); } + + span :global(table) { + word-break: break-word; + } From bae18df3bd5f2f692a1076ceb8709e6f0f80e17f Mon Sep 17 00:00:00 2001 From: freddyaboulton Date: Mon, 16 Sep 2024 11:04:45 -0400 Subject: [PATCH 12/38] demo name --- guides/07_streaming/02_object-detection-from-webcam.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guides/07_streaming/02_object-detection-from-webcam.md b/guides/07_streaming/02_object-detection-from-webcam.md index 7f97ead26cf62..15fbe96ba5dc8 100644 --- a/guides/07_streaming/02_object-detection-from-webcam.md +++ b/guides/07_streaming/02_object-detection-from-webcam.md @@ -90,4 +90,4 @@ You can check out our demo hosted on Hugging Face Spaces [here](https://huggingf It is also embedded on this page below -$demo_ \ No newline at end of file +$demo_YOLOv10-webcam-stream \ No newline at end of file From 500f4e7768aa7ff7959d0e959f87a3fac15bac5d Mon Sep 17 00:00:00 2001 From: Freddy Boulton Date: Wed, 18 Sep 2024 17:42:41 -0400 Subject: [PATCH 13/38] Guide on Streaming Video for Object Detection (#9365) * Add code * notebooks * Suggestions * Add gif --- demo/rt-detr-object-detection/draw_boxes.py | 45 +++++ .../rt-detr-object-detection/requirements.txt | 4 + demo/rt-detr-object-detection/run.ipynb | 1 + demo/rt-detr-object-detection/run.py | 115 +++++++++++ gradio/route_utils.py | 4 +- .../03_object-detection-from-video.md | 181 ++++++++++++++++++ ....md => 04_real-time-speech-recognition.md} | 0 7 files changed, 348 insertions(+), 2 deletions(-) create mode 100644 demo/rt-detr-object-detection/draw_boxes.py create mode 100644 demo/rt-detr-object-detection/requirements.txt create mode 100644 demo/rt-detr-object-detection/run.ipynb create mode 100644 demo/rt-detr-object-detection/run.py create mode 100644 guides/07_streaming/03_object-detection-from-video.md rename guides/07_streaming/{03_real-time-speech-recognition.md => 04_real-time-speech-recognition.md} (100%) diff --git a/demo/rt-detr-object-detection/draw_boxes.py b/demo/rt-detr-object-detection/draw_boxes.py new file mode 100644 index 0000000000000..bbc5aa13ec6d8 --- /dev/null +++ b/demo/rt-detr-object-detection/draw_boxes.py @@ -0,0 +1,45 @@ +from PIL import ImageDraw, ImageFont # type: ignore +import colorsys + + +def get_color(label): + # Simple hash function to generate consistent colors for each label + hash_value = hash(label) + hue = (hash_value % 100) / 100.0 + saturation = 0.7 + value = 0.9 + rgb = colorsys.hsv_to_rgb(hue, saturation, value) + return tuple(int(x * 255) for x in rgb) + + +def draw_bounding_boxes(image, results: dict, model, threshold=0.3): + draw = ImageDraw.Draw(image) + font = ImageFont.load_default() + + for score, label_id, box in zip( + results["scores"], results["labels"], results["boxes"] + ): + if score > threshold: + label = model.config.id2label[label_id.item()] + box = [round(i, 2) for i in box.tolist()] + color = get_color(label) + + # Draw bounding box + draw.rectangle(box, outline=color, width=3) + + # Prepare text + text = f"{label}: {score:.2f}" + text_bbox = draw.textbbox((0, 0), text, font=font) + text_width = text_bbox[2] - text_bbox[0] + text_height = text_bbox[3] - text_bbox[1] + + # Draw text background + draw.rectangle( + [box[0], box[1] - text_height - 4, box[0] + text_width, box[1]], + fill=color, + ) + + # Draw text + draw.text((box[0], box[1] - text_height - 4), text, fill="white", font=font) + + return image diff --git a/demo/rt-detr-object-detection/requirements.txt b/demo/rt-detr-object-detection/requirements.txt new file mode 100644 index 0000000000000..c55994f2c0094 --- /dev/null +++ b/demo/rt-detr-object-detection/requirements.txt @@ -0,0 +1,4 @@ +safetensors==0.4.3 +opencv-python +torch +transformers>=4.43.0 \ No newline at end of file diff --git a/demo/rt-detr-object-detection/run.ipynb b/demo/rt-detr-object-detection/run.ipynb new file mode 100644 index 0000000000000..75412f0d91183 --- /dev/null +++ b/demo/rt-detr-object-detection/run.ipynb @@ -0,0 +1 @@ +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: rt-detr-object-detection"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio safetensors==0.4.3 opencv-python torch transformers>=4.43.0"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/rt-detr-object-detection/draw_boxes.py"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import spaces\n", "import gradio as gr\n", "import cv2\n", "from PIL import Image\n", "import torch\n", "import time\n", "import numpy as np\n", "import uuid\n", "\n", "from transformers import RTDetrForObjectDetection, RTDetrImageProcessor # type: ignore\n", "\n", "from draw_boxes import draw_bounding_boxes\n", "\n", "image_processor = RTDetrImageProcessor.from_pretrained(\"PekingU/rtdetr_r50vd\")\n", "model = RTDetrForObjectDetection.from_pretrained(\"PekingU/rtdetr_r50vd\").to(\"cuda\")\n", "\n", "\n", "SUBSAMPLE = 2\n", "\n", "\n", "@spaces.GPU\n", "def stream_object_detection(video, conf_threshold):\n", " cap = cv2.VideoCapture(video)\n", "\n", " video_codec = cv2.VideoWriter_fourcc(*\"mp4v\") # type: ignore\n", " fps = int(cap.get(cv2.CAP_PROP_FPS))\n", "\n", " desired_fps = fps // SUBSAMPLE\n", " width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) // 2\n", " height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) // 2\n", "\n", " iterating, frame = cap.read()\n", "\n", " n_frames = 0\n", "\n", " name = f\"output_{uuid.uuid4()}.mp4\"\n", " segment_file = cv2.VideoWriter(name, video_codec, desired_fps, (width, height)) # type: ignore\n", " batch = []\n", "\n", " while iterating:\n", " frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)\n", " frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n", " if n_frames % SUBSAMPLE == 0:\n", " batch.append(frame)\n", " if len(batch) == 2 * desired_fps:\n", " inputs = image_processor(images=batch, return_tensors=\"pt\").to(\"cuda\")\n", "\n", " print(f\"starting batch of size {len(batch)}\")\n", " start = time.time()\n", " with torch.no_grad():\n", " outputs = model(**inputs)\n", " end = time.time()\n", " print(\"time taken for inference\", end - start)\n", "\n", " start = time.time()\n", " boxes = image_processor.post_process_object_detection(\n", " outputs,\n", " target_sizes=torch.tensor([(height, width)] * len(batch)),\n", " threshold=conf_threshold,\n", " )\n", "\n", " for _, (array, box) in enumerate(zip(batch, boxes)):\n", " pil_image = draw_bounding_boxes(\n", " Image.fromarray(array), box, model, conf_threshold\n", " )\n", " frame = np.array(pil_image)\n", " # Convert RGB to BGR\n", " frame = frame[:, :, ::-1].copy()\n", " segment_file.write(frame)\n", "\n", " batch = []\n", " segment_file.release()\n", " yield name\n", " end = time.time()\n", " print(\"time taken for processing boxes\", end - start)\n", " name = f\"output_{uuid.uuid4()}.mp4\"\n", " segment_file = cv2.VideoWriter(\n", " name, video_codec, desired_fps, (width, height)\n", " ) # type: ignore\n", "\n", " iterating, frame = cap.read()\n", " n_frames += 1\n", "\n", "\n", "with gr.Blocks() as demo:\n", " gr.HTML(\n", " \"\"\"\n", "

\n", " Video Object Detection with RT-DETR\n", "

\n", " \"\"\"\n", " )\n", " with gr.Row():\n", " with gr.Column():\n", " video = gr.Video(label=\"Video Source\")\n", " conf_threshold = gr.Slider(\n", " label=\"Confidence Threshold\",\n", " minimum=0.0,\n", " maximum=1.0,\n", " step=0.05,\n", " value=0.30,\n", " )\n", " with gr.Column():\n", " output_video = gr.Video(\n", " label=\"Processed Video\", streaming=True, autoplay=True\n", " )\n", "\n", " video.upload(\n", " fn=stream_object_detection,\n", " inputs=[video, conf_threshold],\n", " outputs=[output_video],\n", " )\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/rt-detr-object-detection/run.py b/demo/rt-detr-object-detection/run.py new file mode 100644 index 0000000000000..ec089664009ed --- /dev/null +++ b/demo/rt-detr-object-detection/run.py @@ -0,0 +1,115 @@ +import spaces +import gradio as gr +import cv2 +from PIL import Image +import torch +import time +import numpy as np +import uuid + +from transformers import RTDetrForObjectDetection, RTDetrImageProcessor # type: ignore + +from draw_boxes import draw_bounding_boxes + +image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd") +model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd").to("cuda") + + +SUBSAMPLE = 2 + + +@spaces.GPU +def stream_object_detection(video, conf_threshold): + cap = cv2.VideoCapture(video) + + video_codec = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore + fps = int(cap.get(cv2.CAP_PROP_FPS)) + + desired_fps = fps // SUBSAMPLE + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) // 2 + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) // 2 + + iterating, frame = cap.read() + + n_frames = 0 + + name = f"output_{uuid.uuid4()}.mp4" + segment_file = cv2.VideoWriter(name, video_codec, desired_fps, (width, height)) # type: ignore + batch = [] + + while iterating: + frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + if n_frames % SUBSAMPLE == 0: + batch.append(frame) + if len(batch) == 2 * desired_fps: + inputs = image_processor(images=batch, return_tensors="pt").to("cuda") + + print(f"starting batch of size {len(batch)}") + start = time.time() + with torch.no_grad(): + outputs = model(**inputs) + end = time.time() + print("time taken for inference", end - start) + + start = time.time() + boxes = image_processor.post_process_object_detection( + outputs, + target_sizes=torch.tensor([(height, width)] * len(batch)), + threshold=conf_threshold, + ) + + for _, (array, box) in enumerate(zip(batch, boxes)): + pil_image = draw_bounding_boxes( + Image.fromarray(array), box, model, conf_threshold + ) + frame = np.array(pil_image) + # Convert RGB to BGR + frame = frame[:, :, ::-1].copy() + segment_file.write(frame) + + batch = [] + segment_file.release() + yield name + end = time.time() + print("time taken for processing boxes", end - start) + name = f"output_{uuid.uuid4()}.mp4" + segment_file = cv2.VideoWriter( + name, video_codec, desired_fps, (width, height) + ) # type: ignore + + iterating, frame = cap.read() + n_frames += 1 + + +with gr.Blocks() as demo: + gr.HTML( + """ +

+ Video Object Detection with RT-DETR +

+ """ + ) + with gr.Row(): + with gr.Column(): + video = gr.Video(label="Video Source") + conf_threshold = gr.Slider( + label="Confidence Threshold", + minimum=0.0, + maximum=1.0, + step=0.05, + value=0.30, + ) + with gr.Column(): + output_video = gr.Video( + label="Processed Video", streaming=True, autoplay=True + ) + + video.upload( + fn=stream_object_detection, + inputs=[video, conf_threshold], + outputs=[output_video], + ) + +if __name__ == "__main__": + demo.launch() diff --git a/gradio/route_utils.py b/gradio/route_utils.py index 88a8bfcc858cf..6b0028a1ddfbd 100644 --- a/gradio/route_utils.py +++ b/gradio/route_utils.py @@ -897,7 +897,7 @@ def __init__(self): self.ended = False self.segment_index = 0 self.playlist = "#EXTM3U\n#EXT-X-PLAYLIST-TYPE:EVENT\n#EXT-X-TARGETDURATION:10\n#EXT-X-VERSION:4\n#EXT-X-MEDIA-SEQUENCE:0\n" - self.max_length = 5 + self.max_duration = 5 async def add_segment(self, data: MediaStreamChunk | None): if not data: @@ -905,7 +905,7 @@ async def add_segment(self, data: MediaStreamChunk | None): segment_id = str(uuid.uuid4()) self.segments.append({"id": segment_id, **data}) - self.max_duration = max(self.max_length, data["duration"]) + 1 + self.max_duration = max(self.max_duration, data["duration"]) + 1 def end_stream(self): self.ended = True diff --git a/guides/07_streaming/03_object-detection-from-video.md b/guides/07_streaming/03_object-detection-from-video.md new file mode 100644 index 0000000000000..cabc12af6b9ee --- /dev/null +++ b/guides/07_streaming/03_object-detection-from-video.md @@ -0,0 +1,181 @@ +# Object Detection from a Webcam Stream + +Tags: VISION, STREAMING, VIDEO + +In this guide we'll use the [RT-DETR](https://huggingface.co/docs/transformers/en/model_doc/rt_detr) model to detect objects in a user uploaded video. We'll stream the results from the server using the new video streaming features introduced in Gradio 5.0. + +![video_object_detection_stream_latest](https://github.com/user-attachments/assets/4e27ac58-5ded-495d-9e0d-5e87e68b1355) + +## Setting up the Model + +First, we'll install the following requirements in our system: + +``` +opencv-python +torch +transformers>=4.43.0 +spaces +``` + +Then, we'll download the model from the Hugging Face Hub: + +```python +from transformers import RTDetrForObjectDetection, RTDetrImageProcessor + +image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd") +model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd").to("cuda") +``` + +We're moving the model to the GPU. We'll be deploying our model to Hugging Face Spaces and running the inference in the [free ZeroGPU cluster](https://huggingface.co/zero-gpu-explorers). + + +## The Inference Function + +Our inference function will accept a video and a desired confidence threshold. +Object detection models identify many objects and assign a confidence score to each object. The lower the confidence, the higher the chance of a false positive. So we will let our users set the conference threshold. + +Our function will iterate over the frames in the video and run the RT-DETR model over each frame. +We will then draw the bounding boxes for each detected object in the frame and save the frame to a new output video. +The function will yield each output video in chunks of two seconds. + +In order to keep inference times as low as possible on ZeroGPU (there is a time-based quota), +we will halve the original frames-per-second in the output video and resize the input frames to be half the original +size before running the model. + +The code for the inference function is below - we'll go over it piece by piece. + +```python +import spaces +import cv2 +from PIL import Image +import torch +import time +import numpy as np +import uuid + +from draw_boxes import draw_bounding_boxes + +SUBSAMPLE = 2 + +@spaces.GPU +def stream_object_detection(video, conf_threshold): + cap = cv2.VideoCapture(video) + + # This means we will output mp4 videos + video_codec = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore + fps = int(cap.get(cv2.CAP_PROP_FPS)) + + desired_fps = fps // SUBSAMPLE + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) // 2 + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) // 2 + + iterating, frame = cap.read() + + n_frames = 0 + + # Use UUID to create a unique video file + output_video_name = f"output_{uuid.uuid4()}.mp4" + + # Output Video + output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height)) # type: ignore + batch = [] + + while iterating: + frame = cv2.resize( frame, (0,0), fx=0.5, fy=0.5) + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + if n_frames % SUBSAMPLE == 0: + batch.append(frame) + if len(batch) == 2 * desired_fps: + inputs = image_processor(images=batch, return_tensors="pt").to("cuda") + + with torch.no_grad(): + outputs = model(**inputs) + + boxes = image_processor.post_process_object_detection( + outputs, + target_sizes=torch.tensor([(height, width)] * len(batch)), + threshold=conf_threshold) + + for i, (array, box) in enumerate(zip(batch, boxes)): + pil_image = draw_bounding_boxes(Image.fromarray(array), box, model, conf_threshold) + frame = np.array(pil_image) + # Convert RGB to BGR + frame = frame[:, :, ::-1].copy() + output_video.write(frame) + + batch = [] + output_video.release() + yield output_video_name + output_video_name = f"output_{uuid.uuid4()}.mp4" + output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height)) # type: ignore + + iterating, frame = cap.read() + n_frames += 1 +``` + +1. **Reading from the Webcam** + +One of the industry standards for creating videos in python is OpenCV so we will use it in this video. + +The `cap` variable is how we will read from the input video. Whenever we call `cap.read()`, we are reading the next frame in the video. + +In order to stream video in Gradio, we need to yield a different video file for each "chunk" of the output video. +We create the next video file to write to with the `output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height))` line. The `video_codec` is how we specify the type of video file. Only "mp4" and "ts" files are supported for video sreaming at the moment. + + +2. **The Inference Loop** + +For each frame in the video, we will resize it to be half the size. OpenCV reads files in `BGR` format, so will convert to the expected `RGB` format of transfomers. That's what the first two lines of the while loop are doing. + +We take every other frame and add it to a `batch` list so that the output video is half the original FPS. When the batch covers two seconds of video, we will run the model. The two second threshold was chosen to keep the processing time of each batch small enough so that video is smoothly displayed in the server while not requiring too many separate forward passes. In order for video streaming to work properly in Gradio, the batch size should be at least 1 second. + +We run the forward pass of the model and then use the `post_process_object_detection` method of the model to scale the detected bounding boxes to the size of the input frame. + +We make use of a custom function to draw the bounding boxes (source [here](https://huggingface.co/spaces/gradio/rt-detr-object-detection/blob/main/draw_boxes.py#L14)). We then have to convert from `RGB` to `BGR` before writing back to the output video. + +Once we have finished processing the batch, we create a new output video file for the next batch. + +## The Gradio Demo + +The UI code is pretty similar to other kinds of Gradio apps. +We'll use a standard two-column layout so that users can see the input and output videos side by side. + +In order for streaming to work, we have to set `streaming=True` in the output video. Setting the video +to autoplay is not necessary but it's a better experience for users. + +```python +with gr.Blocks() as app: + gr.HTML( + """ +

+ Video Object Detection with RT-DETR +

+ """) + with gr.Row(): + with gr.Column(): + video = gr.Video(label="Video Source") + conf_threshold = gr.Slider( + label="Confidence Threshold", + minimum=0.0, + maximum=1.0, + step=0.05, + value=0.30, + ) + with gr.Column(): + output_video = gr.Video(label="Processed Video", streaming=True, autoplay=True) + + video.upload( + fn=stream_object_detection, + inputs=[video, conf_threshold], + outputs=[output_video], + ) +``` + + +## Conclusion + +You can check out our demo hosted on Hugging Face Spaces [here](https://huggingface.co/spaces/gradio/rt-detr-object-detection). + +It is also embedded on this page below + +$demo_rt-detr-object-detection \ No newline at end of file diff --git a/guides/07_streaming/03_real-time-speech-recognition.md b/guides/07_streaming/04_real-time-speech-recognition.md similarity index 100% rename from guides/07_streaming/03_real-time-speech-recognition.md rename to guides/07_streaming/04_real-time-speech-recognition.md From 029e310274e80b36038090201a81025bf393a154 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Mon, 16 Sep 2024 11:23:50 -0700 Subject: [PATCH 14/38] Small tweak to how thoughts are shown in `gr.Chatbot` (#9359) * thiknk chat * add changeset * lint --------- Co-authored-by: gradio-pr-bot --- .changeset/breezy-olives-wonder.md | 6 ++++++ js/chatbot/shared/ChatBot.svelte | 5 ++++- js/chatbot/shared/MessageBox.svelte | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 .changeset/breezy-olives-wonder.md diff --git a/.changeset/breezy-olives-wonder.md b/.changeset/breezy-olives-wonder.md new file mode 100644 index 0000000000000..3473bae75c72d --- /dev/null +++ b/.changeset/breezy-olives-wonder.md @@ -0,0 +1,6 @@ +--- +"@gradio/chatbot": minor +"gradio": minor +--- + +feat:Small tweak to how thoughts are shown in `gr.Chatbot` diff --git a/js/chatbot/shared/ChatBot.svelte b/js/chatbot/shared/ChatBot.svelte index d6416045da34d..cfa5e5a4a6d99 100644 --- a/js/chatbot/shared/ChatBot.svelte +++ b/js/chatbot/shared/ChatBot.svelte @@ -422,7 +422,10 @@ > {#if message.type === "text"} {#if message.metadata.title} - + - let expanded = false; + export let expanded = false; export let title: string; function toggleExpanded(): void { From dc05f539fafa0598903bfbd55a7ab3f8190e7fbe Mon Sep 17 00:00:00 2001 From: Hannah Date: Tue, 17 Sep 2024 00:26:12 +0200 Subject: [PATCH 15/38] Use `container` param in `gr.Markdown` (#9356) * * add param * add story * add changeset * Use IconButton for copy btn * fix test --------- Co-authored-by: gradio-pr-bot --- .changeset/chatty-houses-do.md | 6 +++++ gradio/components/markdown.py | 3 +++ js/markdown/Index.svelte | 3 ++- js/markdown/Markdown.stories.svelte | 5 ++-- js/markdown/shared/Markdown.svelte | 41 +++++------------------------ 5 files changed, 21 insertions(+), 37 deletions(-) create mode 100644 .changeset/chatty-houses-do.md diff --git a/.changeset/chatty-houses-do.md b/.changeset/chatty-houses-do.md new file mode 100644 index 0000000000000..73b4a7af4ff92 --- /dev/null +++ b/.changeset/chatty-houses-do.md @@ -0,0 +1,6 @@ +--- +"@gradio/markdown": minor +"gradio": minor +--- + +feat:Use `container` param in `gr.Markdown` diff --git a/gradio/components/markdown.py b/gradio/components/markdown.py index bea5a93abc195..09e6580df6822 100644 --- a/gradio/components/markdown.py +++ b/gradio/components/markdown.py @@ -49,6 +49,7 @@ def __init__( max_height: int | str | None = None, min_height: int | str | None = None, show_copy_button: bool = False, + container: bool = False, ): """ Parameters: @@ -71,6 +72,7 @@ def __init__( max_height: The maximum height of the component, specified in pixels if a number is passed, or in CSS units if a string is passed. If markdown content exceeds the height, the component will scroll. If markdown content is shorter than the height, the component will shrink to fit the content. Will not have any effect if `height` is set and is smaller than `max_height`. min_height: The minimum height of the component, specified in pixels if a number is passed, or in CSS units if a string is passed. If markdown content exceeds the height, the component will expand to fit the content. Will not have any effect if `height` is set and is larger than `min_height`. show_copy_button: If True, includes a copy button to copy the text in the Markdown component. Default is False. + container: If True, the Markdown component will be displayed in a container. Default is False. """ self.rtl = rtl if latex_delimiters is None: @@ -95,6 +97,7 @@ def __init__( render=render, key=key, value=value, + container=container, ) def preprocess(self, payload: str | None) -> str | None: diff --git a/js/markdown/Index.svelte b/js/markdown/Index.svelte index 5b044624da44a..0e2db2228eb95 100644 --- a/js/markdown/Index.svelte +++ b/js/markdown/Index.svelte @@ -35,6 +35,7 @@ export let min_height: number | string | undefined; export let max_height: number | string | undefined; export let show_copy_button = false; + export let container = false; $: label, gradio.dispatch("change"); @@ -43,7 +44,7 @@ {visible} {elem_id} {elem_classes} - container={false} + {container} allow_overflow={true} overflow_behavior="auto" {height} diff --git a/js/markdown/Markdown.stories.svelte b/js/markdown/Markdown.stories.svelte index 3684bdaf46013..97bc7fe1c8398 100644 --- a/js/markdown/Markdown.stories.svelte +++ b/js/markdown/Markdown.stories.svelte @@ -86,10 +86,11 @@ in two separate lines.` /> diff --git a/js/markdown/shared/Markdown.svelte b/js/markdown/shared/Markdown.svelte index ee8d7ed98ab91..f28219f5fb3c7 100644 --- a/js/markdown/shared/Markdown.svelte +++ b/js/markdown/shared/Markdown.svelte @@ -2,9 +2,9 @@ import { createEventDispatcher } from "svelte"; import { copy } from "@gradio/utils"; import { Copy, Check } from "@gradio/icons"; + import { IconButton, IconButtonWrapper } from "@gradio/atoms"; import MarkdownCode from "./MarkdownCode.svelte"; - import { fade } from "svelte/transition"; export let elem_classes: string[] = []; export let visible = true; @@ -62,19 +62,13 @@ style={height ? `max-height: ${css_units(height)}; overflow-y: auto;` : ""} > {#if show_copy_button} - {#if copied} - - {:else} - - {/if} + label={copied ? "Copied conversation" : "Copy conversation"} + > + {/if} From 1cc71c31fab2a7ce549636963b55d15854e05796 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Fri, 13 Sep 2024 16:07:15 +0200 Subject: [PATCH 16/38] small fixes (#9347) --- .../02_getting-started-with-the-js-client.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/guides/09_gradio-clients-and-lite/02_getting-started-with-the-js-client.md b/guides/09_gradio-clients-and-lite/02_getting-started-with-the-js-client.md index 1a1ae365c00e8..2207381825cce 100644 --- a/guides/09_gradio-clients-and-lite/02_getting-started-with-the-js-client.md +++ b/guides/09_gradio-clients-and-lite/02_getting-started-with-the-js-client.md @@ -58,7 +58,7 @@ Start by connecting instantiating a `client` instance and connecting it to a Gra ```js import { Client } from "@gradio/client"; -const app = Client.connect("abidlabs/en2fr"); // a Space that translates from English to French +const app = await Client.connect("abidlabs/en2fr"); // a Space that translates from English to French ``` You can also connect to private Spaces by passing in your HF token with the `hf_token` property of the options parameter. You can get your HF token here: https://huggingface.co/settings/tokens @@ -66,7 +66,7 @@ You can also connect to private Spaces by passing in your HF token with the `hf_ ```js import { Client } from "@gradio/client"; -const app = Client.connect("abidlabs/my-private-space", { hf_token: "hf_..." }) +const app = await Client.connect("abidlabs/my-private-space", { hf_token: "hf_..." }) ``` ## Duplicating a Space for private use @@ -216,7 +216,7 @@ const result = await app.predict("/predict", [handle_file(audio_file)]); ## Using events -If the API you are working with can return results over time, or you wish to access information about the status of a job, you can use the iterable interface for more flexibility. This is especially useful for iterative endpoints or generator endpoints that will produce a series of values over time as discreet responses. +If the API you are working with can return results over time, or you wish to access information about the status of a job, you can use the iterable interface for more flexibility. This is especially useful for iterative endpoints or generator endpoints that will produce a series of values over time as discrete responses. ```js import { Client } from "@gradio/client"; From 7c5d26e999b30d3af6a8b664fe5b0eb8daa4eb7d Mon Sep 17 00:00:00 2001 From: Nikita Krasnytskyi Date: Mon, 16 Sep 2024 16:16:48 +0100 Subject: [PATCH 17/38] Updated Guide: Real Time Speech Recognition (#9349) * Update real-time-speech-recognition.md added necessary dependency * Update run.py updated code to handle cases with stereo microphone * Update real-time-speech-recognition.md improved english * Update run.py updated code for streaming * Update run.py --- demo/asr/run.ipynb | 2 +- demo/asr/run.py | 7 ++++++- demo/stream_asr/run.ipynb | 2 +- demo/stream_asr/run.py | 5 +++++ guides/07_streaming/04_real-time-speech-recognition.md | 8 +++----- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/demo/asr/run.ipynb b/demo/asr/run.ipynb index e8931ecaaadfc..40220c2b225cb 100644 --- a/demo/asr/run.ipynb +++ b/demo/asr/run.ipynb @@ -1 +1 @@ -{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: asr"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch torchaudio transformers"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import pipeline\n", "import numpy as np\n", "\n", "transcriber = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-base.en\")\n", "\n", "def transcribe(audio):\n", " sr, y = audio\n", " y = y.astype(np.float32)\n", " y /= np.max(np.abs(y))\n", "\n", " return transcriber({\"sampling_rate\": sr, \"raw\": y})[\"text\"] # type: ignore\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " gr.Audio(sources=[\"microphone\"]),\n", " \"text\",\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: asr"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch torchaudio transformers"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import pipeline\n", "import numpy as np\n", "\n", "transcriber = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-base.en\")\n", "\n", "def transcribe(audio):\n", " sr, y = audio\n", " \n", " # Convert to mono if stereo\n", " if y.ndim > 1:\n", " y = y.mean(axis=1)\n", " \n", " y = y.astype(np.float32)\n", " y /= np.max(np.abs(y))\n", "\n", " return transcriber({\"sampling_rate\": sr, \"raw\": y})[\"text\"] # type: ignore\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " gr.Audio(sources=\"microphone\"),\n", " \"text\",\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/asr/run.py b/demo/asr/run.py index 0d4aa4e9a05f1..9ae26ba77b219 100644 --- a/demo/asr/run.py +++ b/demo/asr/run.py @@ -6,6 +6,11 @@ def transcribe(audio): sr, y = audio + + # Convert to mono if stereo + if y.ndim > 1: + y = y.mean(axis=1) + y = y.astype(np.float32) y /= np.max(np.abs(y)) @@ -13,7 +18,7 @@ def transcribe(audio): demo = gr.Interface( transcribe, - gr.Audio(sources=["microphone"]), + gr.Audio(sources="microphone"), "text", ) diff --git a/demo/stream_asr/run.ipynb b/demo/stream_asr/run.ipynb index 1473aaca86b04..f2d007e6eaacc 100644 --- a/demo/stream_asr/run.ipynb +++ b/demo/stream_asr/run.ipynb @@ -1 +1 @@ -{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: stream_asr"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch torchaudio transformers"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import pipeline\n", "import numpy as np\n", "\n", "transcriber = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-base.en\")\n", "\n", "def transcribe(stream, new_chunk):\n", " sr, y = new_chunk\n", " y = y.astype(np.float32)\n", " y /= np.max(np.abs(y))\n", "\n", " if stream is not None:\n", " stream = np.concatenate([stream, y])\n", " else:\n", " stream = y\n", " return stream, transcriber({\"sampling_rate\": sr, \"raw\": stream})[\"text\"] # type: ignore\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " [\"state\", gr.Audio(sources=[\"microphone\"], streaming=True)],\n", " [\"state\", \"text\"],\n", " live=True,\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: stream_asr"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch torchaudio transformers"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import pipeline\n", "import numpy as np\n", "\n", "transcriber = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-base.en\")\n", "\n", "def transcribe(stream, new_chunk):\n", " sr, y = new_chunk\n", " \n", " # Convert to mono if stereo\n", " if y.ndim > 1:\n", " y = y.mean(axis=1)\n", " \n", " y = y.astype(np.float32)\n", " y /= np.max(np.abs(y))\n", "\n", " if stream is not None:\n", " stream = np.concatenate([stream, y])\n", " else:\n", " stream = y\n", " return stream, transcriber({\"sampling_rate\": sr, \"raw\": stream})[\"text\"] # type: ignore\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " [\"state\", gr.Audio(sources=[\"microphone\"], streaming=True)],\n", " [\"state\", \"text\"],\n", " live=True,\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/stream_asr/run.py b/demo/stream_asr/run.py index 6d617f49d6612..bed102fadd684 100644 --- a/demo/stream_asr/run.py +++ b/demo/stream_asr/run.py @@ -6,6 +6,11 @@ def transcribe(stream, new_chunk): sr, y = new_chunk + + # Convert to mono if stereo + if y.ndim > 1: + y = y.mean(axis=1) + y = y.astype(np.float32) y /= np.max(np.abs(y)) diff --git a/guides/07_streaming/04_real-time-speech-recognition.md b/guides/07_streaming/04_real-time-speech-recognition.md index c84cfbb23f325..d0b2816344297 100644 --- a/guides/07_streaming/04_real-time-speech-recognition.md +++ b/guides/07_streaming/04_real-time-speech-recognition.md @@ -14,7 +14,7 @@ This tutorial will show how to take a pretrained speech-to-text model and deploy Make sure you have the `gradio` Python package already [installed](/getting_started). You will also need a pretrained speech recognition model. In this tutorial, we will build demos from 2 ASR libraries: -- Transformers (for this, `pip install transformers` and `pip install torch`) +- Transformers (for this, `pip install torch transformers torchaudio`) Make sure you have at least one of these installed so that you can follow along the tutorial. You will also need `ffmpeg` [installed on your system](https://www.ffmpeg.org/download.html), if you do not already have it, to process files from the microphone. @@ -61,10 +61,8 @@ Take a look below. $code_stream_asr -Notice now we have a state variable now, because we need to track all the audio history. `transcribe` gets called whenever there is a new small chunk of audio, but we also need to keep track of all the audio that has been spoken so far in state. -As the interface runs, the `transcribe` function gets called, with a record of all the previously spoken audio in `stream`, as well as the new chunk of audio as `new_chunk`. We return the new full audio so that can be stored back in state, and we also return the transcription. -Here we naively append the audio together and simply call the `transcriber` object on the entire audio. You can imagine more efficient ways of handling this, such as re-processing only the last 5 seconds of audio whenever a new chunk of audio received. +Notice that we now have a state variable because we need to track all the audio history. `transcribe` gets called whenever there is a new small chunk of audio, but we also need to keep track of all the audio spoken so far in the state. As the interface runs, the `transcribe` function gets called, with a record of all the previously spoken audio in the `stream` and the new chunk of audio as `new_chunk`. We return the new full audio to be stored back in its current state, and we also return the transcription. Here, we naively append the audio together and call the `transcriber` object on the entire audio. You can imagine more efficient ways of handling this, such as re-processing only the last 5 seconds of audio whenever a new chunk of audio is received. $demo_stream_asr -Now the ASR model will run inference as you speak! \ No newline at end of file +Now the ASR model will run inference as you speak! From b9e5b3ea828450a80831e0018409ec0002cfda8f Mon Sep 17 00:00:00 2001 From: pngwn Date: Tue, 17 Sep 2024 15:21:49 +0800 Subject: [PATCH 18/38] chunk space uploads (#9360) * chunk space uploads * Update upload_demo_to_space.py Co-authored-by: Lucain * address comments + tweak CI --------- Co-authored-by: Lucain --- .github/workflows/previews-deploy.yml | 2 +- scripts/upload_demo_to_space.py | 152 +++++++++++++++++++------- 2 files changed, 112 insertions(+), 42 deletions(-) diff --git a/.github/workflows/previews-deploy.yml b/.github/workflows/previews-deploy.yml index 6f627aba06af1..7510855e259ae 100644 --- a/.github/workflows/previews-deploy.yml +++ b/.github/workflows/previews-deploy.yml @@ -113,7 +113,7 @@ jobs: gradio-pr-deploys/pr-${{ needs.changes.outputs.pr_number }}-all-demos \ ${{ secrets.SPACES_DEPLOY_TOKEN }} \ --gradio-version ${{ needs.changes.outputs.gradio_version }} > url.txt - echo "SPACE_URL=$(cat url.txt)" >> $GITHUB_OUTPUT + echo "SPACE_URL=$(tail -n 1 url.txt)" >> $GITHUB_OUTPUT - name: Upload Website Demos if: github.event.workflow_run.event == 'workflow_dispatch' id: upload-website-demos diff --git a/scripts/upload_demo_to_space.py b/scripts/upload_demo_to_space.py index f1bdfa669b9eb..8d989b6c55d98 100644 --- a/scripts/upload_demo_to_space.py +++ b/scripts/upload_demo_to_space.py @@ -1,78 +1,148 @@ import argparse import pathlib import shutil +import sys import tempfile import textwrap from typing import Optional -import huggingface_hub +from huggingface_hub import CommitOperationAdd, HfApi def upload_demo_to_space( demo_name: str, space_id: str, hf_token: str, gradio_version: Optional[str] ): - """Upload a demo in the demo directory to a huggingface space. + """ + Upload a demo from the demo directory to a Hugging Face Space in chunks of 50 files per commit. + Args: demo_name: The name of the demo to upload. - space_id: The id of the space to upload the demo to. - hf_token: HF api token. Need to have permission to write to space_id for this to work. - gradio_version: If not None, will set the gradio version in the created space to the given version. + space_id: The ID of the space to upload the demo to (e.g., username/space_name). + hf_token: Hugging Face API token with write permissions to the space. + gradio_version: If provided, sets the Gradio version in the created space. + + Returns: + str: URL of the uploaded Hugging Face Space. """ + print(f"Uploading demo '{demo_name}' to space '{space_id}'...") + + def split_into_chunks(lst: list, n: int) -> list[list]: + for i in range(0, len(lst), n): + yield lst[i : i + n] + + api = HfApi() + + print("Creating repository...") + + # Create the repository if it doesn't exist + space_url = api.create_repo( + repo_id=space_id, + space_sdk="gradio", + repo_type="space", + token=hf_token, + exist_ok=True, + ) + + space_id = space_url.repo_id + with tempfile.TemporaryDirectory() as tmpdir: - demo_path = pathlib.Path(pathlib.Path().absolute(), f"demo/{demo_name}") + demo_path = pathlib.Path.cwd() / "demo" / demo_name + if not demo_path.exists(): + raise FileNotFoundError(f"Demo path '{demo_path}' does not exist.") + shutil.copytree(demo_path, tmpdir, dirs_exist_ok=True) - app_file = pathlib.Path(tmpdir, "run.py") - # Rename the app file to be app.py - app_file.rename(app_file.with_stem("app")) + + # update README.md with Gradio version if provided if gradio_version: readme = pathlib.Path(tmpdir, "README.md") readme_content = f""" - --- - title: {space_id.split("/")[-1]} - emoji: 💩 - colorFrom: indigo - colorTo: indigo - sdk: gradio - sdk_version: {gradio_version} - app_file: app.py - pinned: false - --- - """ - readme.open("w").write(textwrap.dedent(readme_content)) - - api = huggingface_hub.HfApi() - huggingface_hub.create_repo( - space_id, + --- + title: {space_id.split("/")[-1]} + emoji: 💩 + colorFrom: indigo + colorTo: indigo + sdk: gradio + sdk_version: {gradio_version} + app_file: run.py + pinned: false + --- + """ + readme.write_text(textwrap.dedent(readme_content)) + + print("Uploading files to Hugging Face Space...") + + # Create the repository if it doesn't exist + api.create_repo( + repo_id=space_id, space_sdk="gradio", repo_type="space", token=hf_token, exist_ok=True, ) - api.upload_folder( - token=hf_token, - repo_id=space_id, - repo_type="space", - folder_path=tmpdir, - path_in_repo="", - ) + + print("Uploading files...") + + all_files = sorted([p for p in pathlib.Path(tmpdir).rglob("*") if p.is_file()]) + relative_files = [p.relative_to(tmpdir) for p in all_files] + + # Create CommitOperationAdd objects for all files + operations = [ + CommitOperationAdd( + path_in_repo=str(rel_path).replace("\\", "/"), + path_or_fileobj=str(pathlib.Path(tmpdir) / rel_path), + ) + for rel_path in relative_files + ] + + # Split operations into chunks of 50 + operation_chunks = list(split_into_chunks(operations, 50)) + + for idx, chunk in enumerate(operation_chunks, start=1): + commit_message = f"Commit {idx}: Add {len(chunk)} file(s)" + try: + api.create_commit( + repo_id=space_id, + operations=chunk, + commit_message=commit_message, + token=hf_token, + repo_type="space", + ) + print(f"Successfully committed chunk {idx} with {len(chunk)} file(s).") + except Exception as e: + print(f"Failed to commit chunk {idx}: {e}") + raise e + return f"https://huggingface.co/spaces/{space_id}" if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Upload a demo to a space") - parser.add_argument("demo_name", type=str, help="Name of demo to upload") + print("Starting upload...") + parser = argparse.ArgumentParser( + description="Upload a demo to a Hugging Face Space in chunks." + ) + parser.add_argument("demo_name", type=str, help="Name of the demo to upload") parser.add_argument( - "space_id", type=str, help="Name of the space to upload the demo to" + "space_id", + type=str, + help="ID of the space to upload the demo to (e.g., username/space_name)", ) - parser.add_argument("hf_token", type=str, help="HF API token") + parser.add_argument("hf_token", type=str, help="Hugging Face API token") parser.add_argument( "--gradio-version", type=str, - help="If not None, will set the gradio version in the created space to the given version.", + help="If provided, sets the Gradio version in the created space to the given version.", ) args = parser.parse_args() - new_space = upload_demo_to_space( - args.demo_name, args.space_id, args.hf_token, args.gradio_version - ) - print(new_space) + try: + print("Uploading demo to Hugging Face Space...") + new_space_url = upload_demo_to_space( + args.demo_name, args.space_id, args.hf_token, args.gradio_version + ) + print("Demo successfully uploaded to:") + # do not change the final print statement, + # it must appear as the last line of the logs for CI to pick up the URL + print(new_space_url) + except Exception as error: + print(f"An error occurred during upload: {error}") + sys.exit(1) From 4d41c803bd1ed7ceb4c3307fe8e84fec09547f0e Mon Sep 17 00:00:00 2001 From: Ali Abdalla Date: Tue, 17 Sep 2024 22:26:39 +0400 Subject: [PATCH 19/38] add find (#9368) --- .github/workflows/website-build.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/website-build.yml b/.github/workflows/website-build.yml index 2dcccc718bebc..f7132ec14cb2c 100644 --- a/.github/workflows/website-build.yml +++ b/.github/workflows/website-build.yml @@ -61,6 +61,9 @@ jobs: - name: build website run: VERCEL=1 pnpm --filter website build + + - name: find + - run: find . -type d - name: upload website artifacts uses: actions/upload-artifact@v4 From bdc9e9514dd25dce241e3d1ea4ddfd5e361f2352 Mon Sep 17 00:00:00 2001 From: Ali Abdalla Date: Tue, 17 Sep 2024 22:32:24 +0400 Subject: [PATCH 20/38] New branch (#9369) * add find * fix syntax --- .github/workflows/website-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/website-build.yml b/.github/workflows/website-build.yml index f7132ec14cb2c..0028c27906000 100644 --- a/.github/workflows/website-build.yml +++ b/.github/workflows/website-build.yml @@ -63,7 +63,7 @@ jobs: run: VERCEL=1 pnpm --filter website build - name: find - - run: find . -type d + run: find . -type d - name: upload website artifacts uses: actions/upload-artifact@v4 From 74eba65ec07a4d6ecc79ae84f7ad3714570d2df4 Mon Sep 17 00:00:00 2001 From: Ali Abdalla Date: Tue, 17 Sep 2024 22:49:56 +0400 Subject: [PATCH 21/38] New branch (#9370) * add find * fix syntax * add hidden files --- .github/workflows/website-build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/website-build.yml b/.github/workflows/website-build.yml index 0028c27906000..452f20d48b79d 100644 --- a/.github/workflows/website-build.yml +++ b/.github/workflows/website-build.yml @@ -70,3 +70,4 @@ jobs: with: name: website path: js/_website/.vercel + include-hidden-files: true \ No newline at end of file From 9dc7bb691c6abfc643e2cc9b5ddcba138af16e10 Mon Sep 17 00:00:00 2001 From: Hannah Date: Tue, 17 Sep 2024 22:41:37 +0200 Subject: [PATCH 22/38] run format --- .github/workflows/website-build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/website-build.yml b/.github/workflows/website-build.yml index 452f20d48b79d..d8973f065bed2 100644 --- a/.github/workflows/website-build.yml +++ b/.github/workflows/website-build.yml @@ -61,8 +61,8 @@ jobs: - name: build website run: VERCEL=1 pnpm --filter website build - - - name: find + + - name: find run: find . -type d - name: upload website artifacts @@ -70,4 +70,4 @@ jobs: with: name: website path: js/_website/.vercel - include-hidden-files: true \ No newline at end of file + include-hidden-files: true From ee0ae3c9920a5eba19eb08df01b2d5661534dd97 Mon Sep 17 00:00:00 2001 From: Ali Abdalla Date: Wed, 18 Sep 2024 16:33:19 +0400 Subject: [PATCH 23/38] Testing CI (#9379) * remove unnecessary redirects * add changeset * fix * formatting --------- Co-authored-by: gradio-pr-bot --- .changeset/public-webs-melt.md | 5 + js/_website/src/routes/redirects.js | 160 +--------------------------- 2 files changed, 7 insertions(+), 158 deletions(-) create mode 100644 .changeset/public-webs-melt.md diff --git a/.changeset/public-webs-melt.md b/.changeset/public-webs-melt.md new file mode 100644 index 0000000000000..b300262cf7992 --- /dev/null +++ b/.changeset/public-webs-melt.md @@ -0,0 +1,5 @@ +--- +"website": minor +--- + +feat:Testing CI diff --git a/js/_website/src/routes/redirects.js b/js/_website/src/routes/redirects.js index 35c379a8ad96c..7bd8b9be3d97a 100644 --- a/js/_website/src/routes/redirects.js +++ b/js/_website/src/routes/redirects.js @@ -2,177 +2,21 @@ export const redirects = { "/guides/creating-a-new-component": "/guides/custom-components-in-five-minutes", "/guides/five-minute-guide": "/guides/custom-components-in-five-minutes", - "/state-in-blocks": "/guides/state-in-blocks", - "/custom-CSS-and-JS": "/guides/custom-CSS-and-JS", - "/blocks-and-event-listeners": "/guides/blocks-and-event-listeners", - "/using-blocks-like-functions": "/guides/using-blocks-like-functions", - "/using-flagging": "/guides/using-flagging", - "/named-entity-recognition": "/guides/named-entity-recognition", - "/real-time-speech-recognition": "/guides/real-time-speech-recognition", - "/developing-faster-with-reload-mode": - "/guides/developing-faster-with-reload-mode", - "/create-your-own-friends-with-a-gan": - "/guides/create-your-own-friends-with-a-gan", - "/setting-up-a-demo-for-maximum-performance": - "/guides/setting-up-a-demo-for-maximum-performance", - "/creating-a-chatbot": "/guides/creating-a-chatbot-fast", - "/guides/creating-a-chatbot": "/guides/creating-a-chatbot-fast", - "/how-to-use-3D-model-component": "/guides/how-to-use-3D-model-component", - "/creating-a-new-component": "/guides/creating-a-new-component", - "/running-background-tasks": "/guides/running-background-tasks", - "/reactive-interfaces": "/guides/reactive-interfaces", - "/four-kinds-of-interfaces": "/guides/four-kinds-of-interfaces", - "/interface-state": "/guides/interface-state", - "/more-on-examples": "/guides/more-on-examples", - "/key-features": "/guides/queuing", "/guides/key-features": "/guides/queuing", "/quickstart": "/guides/quickstart", "/sharing-your-app": "/guides/sharing-your-app", - "/connecting-to-a-database": "/guides/connecting-to-a-database", - "/creating-a-realtime-dashboard-from-google-sheets": - "/guides/creating-a-realtime-dashboard-from-google-sheets", - "/plot-component-for-maps": "/guides/plot-component-for-maps", - "/creating-a-dashboard-from-bigquery-data": - "/guides/creating-a-dashboard-from-bigquery-data", - "/using-gradio-for-tabular-workflows": - "/guides/using-gradio-for-tabular-workflows", - "/image-classification-in-pytorch": "/guides/image-classification-in-pytorch", - "/using-hugging-face-integrations": "/guides/using-hugging-face-integrations", - "/Gradio-and-ONNX-on-Hugging-Face": "/guides/Gradio-and-ONNX-on-Hugging-Face", - "/image-classification-with-vision-transformers": - "/guides/image-classification-with-vision-transformers", - "/Gradio-and-Wandb-Integration": "/guides/Gradio-and-Wandb-Integration", - "/image-classification-in-tensorflow": - "/guides/image-classification-in-tensorflow", - "/Gradio-and-Comet": "/guides/Gradio-and-Comet", "/introduction_to_blocks": "/guides/blocks-and-event-listeners", - "/adding_examples_to_your_app": "/guides/key-features", - "/embedding_gradio_demos": "/guides/sharing-your-app", "/getting_started": "/guides/quickstart", - "/building_with_blocks": "/guides/blocks-and-event-listeners", "/guides/building-with-blocks": "/guides/blocks-and-event-listeners", - "/building_interfaces": "/guides/interface-state", - "/guides/building-interfaces": "/guides/interface-state", - "/tabular_data_science_and_plots": "/guides/connecting-to-a-database", - "/guides/tabular-data-science-and-plots": "/guides/connecting-to-a-database", - "/integrating_other_frameworks": "/guides/using-hugging-face-integrations", - "/guides/integrating-other-frameworks": - "/guides/using-hugging-face-integrations", - "/controlling_layout": "/guides/controlling-layout", - "/state_in_blocks": "/guides/state-in-blocks", - "/custom_CSS_and_JS": "/guides/custom-CSS-and-JS", - "/blocks_and_event_listeners": "/guides/blocks-and-event-listeners", - "/using_blocks_like_functions": "/guides/using-blocks-like-functions", - "/using_flagging": "/guides/using-flagging", - "/named_entity_recognition": "/guides/named-entity-recognition", - "/real_time_speech_recognition": "/guides/real-time-speech-recognition", - "/developing_faster_with_reload_mode": - "/guides/developing-faster-with-reload-mode", - "/create_your_own_friends_with_a_gan": - "/guides/create-your-own-friends-with-a-gan", - "/setting_up_a_demo_for_maximum_performance": - "/guides/setting-up-a-demo-for-maximum-performance", - "/creating_a_chatbot": "/guides/creating-a-chatbot", - "/how_to_use_3D_model_component": "/guides/how-to-use-3D-model-component", - "/creating_a_new_component": "/guides/creating-a-new-component", - "/running_background_tasks": "/guides/running-background-tasks", - "/reactive_interfaces": "/guides/reactive-interfaces", - "/more_on_examples_and_flagging": "/guides/more-on-examples", - "/interface_state": "/guides/interface-state", - "/key_features": "/guides/key-features", - "/sharing_your_app": "/guides/sharing-your-app", - "/connecting_to_a_database": "/guides/connecting-to-a-database", - "/creating_a_realtime_dashboard_from_google_sheets": - "/guides/creating-a-realtime-dashboard-from-google-sheets", - "/plot_component_for_maps": "/guides/plot-component-for-maps", - "/creating_a_dashboard_from_bigquery_data": - "/guides/creating-a-dashboard-from-bigquery-data", - "/using_gradio_for_tabular_workflows": - "/guides/using-gradio-for-tabular-workflows", - "/image_classification_in_pytorch": "/guides/image-classification-in-pytorch", - "/using_hugging_face_integrations": "/guides/using-hugging-face-integrations", - "/Gradio_and_ONNX_on_Hugging_Face": "/guides/Gradio-and-ONNX-on-Hugging-Face", - "/image_classification_with_vision_transformers": - "/guides/image-classification-with-vision-transformers", - "/Gradio_and_Wandb_Integration": "/guides/Gradio-and-Wandb-Integration", - "/image_classification_in_tensorflow": - "/guides/image-classification-in-tensorflow", "/demos": "/playground", - "/getting-started-with-the-python-client": - "/guides/getting-started-with-the-python-client", - "/getting-started-with-the-js-client": - "/guides/getting-started-with-the-js-client", - "/gradio-and-llm-agents": "/guides/gradio-and-llm-agents", - "/fastapi-app-with-the-gradio-client": - "/guides/fastapi-app-with-the-gradio-client", "/docs/client": "/docs/python-client/client", - "/docs/job": "/docs/python-client/job", - "/docs/set_static_paths": "/docs/gradio/set_static_paths", - "/docs/eventdata": "/docs/gradio/eventdata", - "/docs/examples": "/docs/gradio/examples", - "/docs/progress": "/docs/gradio/progress", - "/docs/make_waveform": "/docs/gradio/make_waveform", - "/docs/load": "/docs/gradio/load", - "/docs/error": "/docs/gradio/error", - "/docs/warning": "/docs/gradio/warning", - "/docs/info": "/docs/gradio/info", - "/docs/flagging": "/docs/gradio/flagging", - "/docs/request": "/docs/gradio/request", - "/docs/mount_gradio_app": "/docs/gradio/mount_gradio_app", "/docs/blocks": "/docs/gradio/blocks", - "/docs/accordion": "/docs/gradio/accordion", - "/docs/column": "/docs/gradio/column", - "/docs/row": "/docs/gradio/row", - "/docs/group": "/docs/gradio/group", - "/docs/tab": "/docs/gradio/tab", - "/docs/annotatedimage": "/docs/gradio/annotatedimage", "/docs/audio": "/docs/gradio/audio", - "/docs/barplot": "/docs/gradio/barplot", - "/docs/button": "/docs/gradio/button", - "/docs/chatbot": "/docs/gradio/chatbot", - "/docs/checkbox": "/docs/gradio/checkbox", - "/docs/checkboxgroup": "/docs/gradio/checkboxgroup", - "/docs/clearbutton": "/docs/gradio/clearbutton", - "/docs/code": "/docs/gradio/code", - "/docs/colorpicker": "/docs/gradio/colorpicker", - "/docs/dataframe": "/docs/gradio/dataframe", - "/docs/dataset": "/docs/gradio/dataset", - "/docs/downloadbutton": "/docs/gradio/downloadbutton", - "/docs/dropdown": "/docs/gradio/dropdown", - "/docs/duplicatebutton": "/docs/gradio/duplicatebutton", - "/docs/file": "/docs/gradio/file", - "/docs/fileexplorer": "/docs/gradio/fileexplorer", - "/docs/gallery": "/docs/gradio/gallery", - "/docs/html": "/docs/gradio/html", - "/docs/highlightedtext": "/docs/gradio/highlightedtext", "/docs/image": "/docs/gradio/image", - "/docs/imageeditor": "/docs/gradio/imageeditor", - "/docs/json": "/docs/gradio/json", - "/docs/label": "/docs/gradio/label", - "/docs/lineplot": "/docs/gradio/lineplot", - "/docs/loginbutton": "/docs/gradio/loginbutton", - "/docs/logoutbutton": "/docs/gradio/logoutbutton", - "/docs/markdown": "/docs/gradio/markdown", - "/docs/model3d": "/docs/gradio/model3d", - "/docs/multimodaltextbox": "/docs/gradio/multimodaltextbox", - "/docs/number": "/docs/gradio/number", - "/docs/paramviewer": "/docs/gradio/paramviewer", - "/docs/plot": "/docs/gradio/plot", - "/docs/radio": "/docs/gradio/radio", - "/docs/scatterplot": "/docs/gradio/scatterplot", - "/docs/simpleimage": "/docs/gradio/simpleimage", - "/docs/slider": "/docs/gradio/slider", - "/docs/state": "/docs/gradio/state", "/docs/textbox": "/docs/gradio/textbox", - "/docs/uploadbutton": "/docs/gradio/uploadbutton", - "/docs/video": "/docs/gradio/video", - "/docs/chatinterface": "/docs/gradio/chatinterface", "/docs/interface": "/docs/gradio/interface", - "/docs/tabbedinterface": "/docs/gradio/tabbedinterface", - "/docs/themes": "/docs/gradio/themes", - "/docs/no-reload": "/docs/gradio/NO_RELOAD", "/docs/python-client/python-client": "/docs/python-client/introduction", - "/docs/python-client/js-client": "/docs/js-client", "/docs/gradio/interface#interface-queue": "/docs/gradio/interface", - "/guides/sharing-your-app#security-and-file-access": "/guides/file-access" + "/using_hugging_face_integrations": "/guides/using-hugging-face-integrations", + "/guides/sharing-your-app#security-and-file-access": "/guides/file-access", }; From 69b5fdc51b1d0ec36a7eaa226af45cc9391c5654 Mon Sep 17 00:00:00 2001 From: Ali Abdalla Date: Wed, 18 Sep 2024 17:08:27 +0400 Subject: [PATCH 24/38] Fixes website build in 5.0-dev (#9382) * changes * add changeset --------- Co-authored-by: gradio-pr-bot --- .changeset/nice-badgers-admire.md | 5 ++ .../gradio/04_helpers/make_waveform.svx | 57 ------------------- 2 files changed, 5 insertions(+), 57 deletions(-) create mode 100644 .changeset/nice-badgers-admire.md delete mode 100644 js/_website/src/lib/templates/gradio/04_helpers/make_waveform.svx diff --git a/.changeset/nice-badgers-admire.md b/.changeset/nice-badgers-admire.md new file mode 100644 index 0000000000000..a5cd4965e9244 --- /dev/null +++ b/.changeset/nice-badgers-admire.md @@ -0,0 +1,5 @@ +--- +"website": minor +--- + +feat:Fixes website build in 5.0-dev diff --git a/js/_website/src/lib/templates/gradio/04_helpers/make_waveform.svx b/js/_website/src/lib/templates/gradio/04_helpers/make_waveform.svx deleted file mode 100644 index 7dd94e10fa41b..0000000000000 --- a/js/_website/src/lib/templates/gradio/04_helpers/make_waveform.svx +++ /dev/null @@ -1,57 +0,0 @@ - - - - -# {obj.name} - - -```python -gradio.make_waveform(···) -``` - - -### Description -## {@html style_formatted_text(obj.description)} - - - -{#if obj.example} -### Example Usage -```python -None -``` -{/if} - - -### Initialization - - - -{#if obj.demos && obj.demos.length > 0} - -### Demos - -{/if} - -{#if obj.fns && obj.fns.length > 0} - -### Methods - -{/if} - -{#if obj.guides && obj.guides.length > 0} - -### Guides - -{/if} From 633e75c8b1ed9c74d20ae9693e5f29bda297cf7b Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Wed, 18 Sep 2024 12:03:49 -0500 Subject: [PATCH 25/38] Small tweaks to improve the DX for the "tuples"/"messages" argument in `gr.Chatbot` (#9358) * change format * format * add changeset * revert * revert --------- Co-authored-by: gradio-pr-bot --- .changeset/hungry-dragons-drum.md | 5 +++++ gradio/components/chatbot.py | 24 ++++++++++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 .changeset/hungry-dragons-drum.md diff --git a/.changeset/hungry-dragons-drum.md b/.changeset/hungry-dragons-drum.md new file mode 100644 index 0000000000000..f7c5224f7afdc --- /dev/null +++ b/.changeset/hungry-dragons-drum.md @@ -0,0 +1,5 @@ +--- +"gradio": minor +--- + +feat:Small tweaks to improve the DX for the "tuples"/"messages" argument in `gr.Chatbot` diff --git a/gradio/components/chatbot.py b/gradio/components/chatbot.py index aa034e3306604..86b8b1a49ab94 100644 --- a/gradio/components/chatbot.py +++ b/gradio/components/chatbot.py @@ -141,7 +141,7 @@ def __init__( | None ) = None, *, - type: Literal["messages", "tuples"] = "tuples", + type: Literal["messages", "tuples"] | None = None, label: str | None = None, every: Timer | float | None = None, inputs: Component | Sequence[Component] | set[Component] | None = None, @@ -205,17 +205,25 @@ def __init__( show_copy_all_button: If True, will show a copy all button that copies all chatbot messages to the clipboard. """ self.likeable = likeable + if type is None: + warnings.warn( + "You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style 'role' and 'content' keys.", + UserWarning, + ) + type = "tuples" + elif type == "tuples": + warnings.warn( + "The 'tuples' format for chatbot messages is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style 'role' and 'content' keys.", + UserWarning, + ) if type not in ["messages", "tuples"]: - raise ValueError("type must be 'messages' or 'tuples', received: {type}") + raise ValueError( + f"The `type` parameter must be 'messages' or 'tuples', received: {type}" + ) self.type: Literal["tuples", "messages"] = type - if type == "messages": + if self.type == "messages": self.data_model = ChatbotDataMessages else: - # DeprecationWarning gets filtered out by default - warnings.warn( - "The 'tuples' format for chatbot messages is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead.", - UserWarning, - ) self.data_model = ChatbotDataTuples self.height = height self.max_height = max_height From 7a725c4a3e25bb3cf7a62e896cc1eeb978b0ec2f Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Wed, 18 Sep 2024 12:04:14 -0500 Subject: [PATCH 26/38] Update babylon.js to `v7` for `gr.Model3D` (#9377) * update package.json * add changeset * add changeset * update pnpm lock * add changeset --------- Co-authored-by: gradio-pr-bot --- .changeset/social-lizards-tickle.md | 6 ++++ js/model3D/package.json | 4 +-- package.json | 4 +-- pnpm-lock.yaml | 56 ++++++++++------------------- 4 files changed, 29 insertions(+), 41 deletions(-) create mode 100644 .changeset/social-lizards-tickle.md diff --git a/.changeset/social-lizards-tickle.md b/.changeset/social-lizards-tickle.md new file mode 100644 index 0000000000000..db581ce7a6d4f --- /dev/null +++ b/.changeset/social-lizards-tickle.md @@ -0,0 +1,6 @@ +--- +"@gradio/model3d": patch +"gradio": patch +--- + +feat:Update babylon.js to `v7` for `gr.Model3D` diff --git a/js/model3D/package.json b/js/model3D/package.json index 34f53cd12b85d..b0486eecf17f7 100644 --- a/js/model3D/package.json +++ b/js/model3D/package.json @@ -15,8 +15,8 @@ "@gradio/utils": "workspace:^", "@gradio/wasm": "workspace:^", "@types/babylon": "^6.16.6", - "babylonjs": "^4.2.1", - "babylonjs-loaders": "^4.2.1", + "babylonjs": "^7.25.1", + "babylonjs-loaders": "^7.25.1", "dequal": "^2.0.2", "gsplat": "^1.0.5" }, diff --git a/package.json b/package.json index 4a3007231c815..ef65cfb098292 100644 --- a/package.json +++ b/package.json @@ -55,8 +55,8 @@ "@typescript-eslint/eslint-plugin": "^7.8.0", "@typescript-eslint/parser": "^7.8.0", "autoprefixer": "^10.4.19", - "babylonjs": "^5.57.1", - "babylonjs-loaders": "^5.57.1", + "babylonjs": "^7.25.1", + "babylonjs-loaders": "^7.25.1", "esbuild": "^0.21.0", "eslint": "^9.1.1", "eslint-plugin-svelte": "^2.38.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f17d97fc2bb31..81a3f61e1b3d9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -80,11 +80,11 @@ importers: specifier: ^10.4.19 version: 10.4.19(postcss@8.4.38) babylonjs: - specifier: ^5.57.1 - version: 5.57.1 + specifier: ^7.25.1 + version: 7.25.2 babylonjs-loaders: - specifier: ^5.57.1 - version: 5.57.1 + specifier: ^7.25.1 + version: 7.25.2 esbuild: specifier: ^0.21.0 version: 0.21.0 @@ -1703,11 +1703,11 @@ importers: specifier: ^6.16.6 version: 6.16.6 babylonjs: - specifier: ^4.2.1 - version: 4.2.1 + specifier: ^7.25.1 + version: 7.25.2 babylonjs-loaders: - specifier: ^4.2.1 - version: 4.2.1 + specifier: ^7.25.1 + version: 7.25.2 dequal: specifier: ^2.0.2 version: 2.0.3 @@ -5536,23 +5536,14 @@ packages: resolution: {integrity: sha512-GAwkz0AihzY5bkwIY5QDR+LvsRQgB/B+1foMPvi0FZPMl5fjD7ICiznUiBdLYMH1QYe6vqu4gWYytZOccLouFw==} engines: {node: '>= 10.0.0'} - babylonjs-gltf2interface@4.2.1: - resolution: {integrity: sha512-ZBfKgIoztO1x1nyf9aPQJ+WXmB6Kw0VlyxvcKchIixbICqeeExiN8nmjvypwXC4hl+5ZDMnUKQNrIhh7uzulnA==} + babylonjs-gltf2interface@7.25.2: + resolution: {integrity: sha512-Vzwp0FmgTdxXH1RsPW8jRReSTkhTcrpulE6uJk4Kd4UTsRi1lpER31LpeysEYrjFf5SS+vc5gx4li8/TAUAWNw==} - babylonjs-gltf2interface@5.57.1: - resolution: {integrity: sha512-RZnaKfJ6Q/AYLdIjBYMRxCW/HPEC8jabAL1U8wJ0KVziw6NSbSV6S80S22fUCPTyaZ7nCekn1TYg1IPmJ/eA6w==} + babylonjs-loaders@7.25.2: + resolution: {integrity: sha512-FMZ5G2x8QgGowLRMxVifVuMNFnY4aaQuUIwqxrgcrKigaHKx1uNuk1+A4+vCG7i43TJJnTr4++QJF7nn6JdU0g==} - babylonjs-loaders@4.2.1: - resolution: {integrity: sha512-WLpbadXDyxbBQogU0SOrpwgAWN/DJ1xn5kCRD31NVoCbBIpvkdDIvzpRvze2esxrlv/KM8wbDu62ShJd6rQnVQ==} - - babylonjs-loaders@5.57.1: - resolution: {integrity: sha512-MGPv0rroDIky4aLHv8iitnZWUPvJfRB4dA0EbMbXJGatDzKclwwK1gJik3TjdmxwlpXpZKwa3reoj0OP00jl6A==} - - babylonjs@4.2.1: - resolution: {integrity: sha512-FQdJ2VTENUpUJQ30ddihwTjV6K94kglET0P7jV8OQzjA4eez3sotmG22Fn9+8yb069SA26KnrAGOI2sKMQ7BCw==} - - babylonjs@5.57.1: - resolution: {integrity: sha512-X1t3mi8GuJjFVziN1yBJtekphilGN9VfOHm2tn/H6gra+WS7UZkrOOHLlKwYEXKdU73opxOR95jHXmv692KR6g==} + babylonjs@7.25.2: + resolution: {integrity: sha512-hucoQfYHrBtgq7niZEHwSDcvcESDiJ74AYY42KSDA43bGI+lq8UkW2wgs6euOFrigR4RxSORHWm0nkNrXCn41Q==} balanced-match@1.0.2: resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} @@ -14132,23 +14123,14 @@ snapshots: dependencies: '@babel/types': 7.23.3 - babylonjs-gltf2interface@4.2.1: {} - - babylonjs-gltf2interface@5.57.1: {} + babylonjs-gltf2interface@7.25.2: {} - babylonjs-loaders@4.2.1: + babylonjs-loaders@7.25.2: dependencies: - babylonjs: 4.2.1 - babylonjs-gltf2interface: 4.2.1 - - babylonjs-loaders@5.57.1: - dependencies: - babylonjs: 5.57.1 - babylonjs-gltf2interface: 5.57.1 - - babylonjs@4.2.1: {} + babylonjs: 7.25.2 + babylonjs-gltf2interface: 7.25.2 - babylonjs@5.57.1: {} + babylonjs@7.25.2: {} balanced-match@1.0.2: {} From 498996e97658acf0184d6a8ff9a88a19daaf5cd0 Mon Sep 17 00:00:00 2001 From: Hannah Date: Wed, 18 Sep 2024 21:57:23 +0100 Subject: [PATCH 27/38] Fix `gr.ImageEditor` toolbar cutoff (#9371) * fix wrap alignment * add changeset --------- Co-authored-by: gradio-pr-bot --- .changeset/open-chefs-say.md | 6 ++++++ js/imageeditor/shared/ImageEditor.svelte | 3 +-- 2 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 .changeset/open-chefs-say.md diff --git a/.changeset/open-chefs-say.md b/.changeset/open-chefs-say.md new file mode 100644 index 0000000000000..85cb7ccb0c9b9 --- /dev/null +++ b/.changeset/open-chefs-say.md @@ -0,0 +1,6 @@ +--- +"@gradio/imageeditor": minor +"gradio": minor +--- + +feat:Fix `gr.ImageEditor` toolbar cutoff diff --git a/js/imageeditor/shared/ImageEditor.svelte b/js/imageeditor/shared/ImageEditor.svelte index 371464a453934..59e5d9579db71 100644 --- a/js/imageeditor/shared/ImageEditor.svelte +++ b/js/imageeditor/shared/ImageEditor.svelte @@ -378,10 +378,9 @@ .wrap { display: flex; width: 100%; - height: 100%; + height: 90%; position: relative; justify-content: center; - align-items: flex-start; } .canvas { position: absolute; From deef3b7e35a535ef2fb83f10d5c3ec3994c6eda1 Mon Sep 17 00:00:00 2001 From: Ali Abdalla Date: Thu, 19 Sep 2024 00:44:24 +0400 Subject: [PATCH 28/38] add lite upload (#9385) --- .github/workflows/previews-deploy.yml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/.github/workflows/previews-deploy.yml b/.github/workflows/previews-deploy.yml index 7510855e259ae..494dfa00bda51 100644 --- a/.github/workflows/previews-deploy.yml +++ b/.github/workflows/previews-deploy.yml @@ -94,7 +94,15 @@ jobs: export AWS_DEFAULT_REGION=us-east-1 aws s3 cp ${{ steps.set_tarball_name.outputs.tarball_path }} s3://gradio-npm-previews/${{ needs.changes.outputs.sha }}/ echo "js_tarball_url=https://gradio-npm-previews.s3.amazonaws.com/${{ needs.changes.outputs.sha }}/${{ steps.set_tarball_name.outputs.tarball_name }}" >> $GITHUB_OUTPUT - + - name: Upload Lite to S3 + id: upload_lite + run: | + export AWS_ACCESS_KEY_ID=${{ secrets.LITE_PREVIEWS_AWS_S3_ACCESS_KEY }} + export AWS_SECRET_ACCESS_KEY=${{ secrets.LITE_PREVIEWS_AWS_S3_SECRET_ACCESS_KEY }} + export AWS_DEFAULT_REGION=us-east-1 + mkdir -p ./gradio-lite-files + tar -xzf ./gradio-lite-${{ needs.changes.outputs.sha }}/gradio-lite-*.tgz -C ./gradio-lite-files + aws s3 cp ./gradio-lite-files/package/ s3://gradio-lite-previews/${{ needs.changes.outputs.sha }}/ --recursive - name: Install Hub Client Library run: pip install huggingface-hub==0.23.2 # temporary, but ensures the script cannot be modified in a PR @@ -147,6 +155,11 @@ jobs: ```bash npm install ${{ needs.deploy.outputs.js_tarball_url }} ``` + + **Use Lite from this PR** + ```html + + ``` comment-spaces-failure: uses: "./.github/workflows/comment-queue.yml" needs: [deploy, changes] From f4b335c2118fe0eacf8d791f937f0914772ac089 Mon Sep 17 00:00:00 2001 From: Ali Abdalla Date: Thu, 19 Sep 2024 01:19:39 +0400 Subject: [PATCH 29/38] fix sha (#9386) --- .github/workflows/previews-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/previews-build.yml b/.github/workflows/previews-build.yml index e6bc6b3bf10e8..31253a200c6c1 100644 --- a/.github/workflows/previews-build.yml +++ b/.github/workflows/previews-build.yml @@ -53,7 +53,7 @@ jobs: - name: Upload Lite NPM package uses: actions/upload-artifact@v4 with: - name: gradio-lite-${{ github.sha }} + name: gradio-lite-${{ needs.changes.outputs.sha }} path: js/lite/gradio-lite-*.tgz - name: install deps run: python -m pip install build From 9d017ae785f0ba405db3dba93f8367508cc8448c Mon Sep 17 00:00:00 2001 From: Ali Abdalla Date: Thu, 19 Sep 2024 01:57:34 +0400 Subject: [PATCH 30/38] Fix lite ci (#9387) * fix sha * fix name * fix name --- .github/workflows/previews-build.yml | 2 +- .github/workflows/previews-deploy.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/previews-build.yml b/.github/workflows/previews-build.yml index 31253a200c6c1..bbedf95bb2b21 100644 --- a/.github/workflows/previews-build.yml +++ b/.github/workflows/previews-build.yml @@ -53,7 +53,7 @@ jobs: - name: Upload Lite NPM package uses: actions/upload-artifact@v4 with: - name: gradio-lite-${{ needs.changes.outputs.sha }} + name: gradio-lite-tar path: js/lite/gradio-lite-*.tgz - name: install deps run: python -m pip install build diff --git a/.github/workflows/previews-deploy.yml b/.github/workflows/previews-deploy.yml index 494dfa00bda51..7e572a2097aac 100644 --- a/.github/workflows/previews-deploy.yml +++ b/.github/workflows/previews-deploy.yml @@ -101,7 +101,7 @@ jobs: export AWS_SECRET_ACCESS_KEY=${{ secrets.LITE_PREVIEWS_AWS_S3_SECRET_ACCESS_KEY }} export AWS_DEFAULT_REGION=us-east-1 mkdir -p ./gradio-lite-files - tar -xzf ./gradio-lite-${{ needs.changes.outputs.sha }}/gradio-lite-*.tgz -C ./gradio-lite-files + tar -xzf ./gradio-lite-tar/gradio-lite-*.tgz -C ./gradio-lite-files aws s3 cp ./gradio-lite-files/package/ s3://gradio-lite-previews/${{ needs.changes.outputs.sha }}/ --recursive - name: Install Hub Client Library run: pip install huggingface-hub==0.23.2 From 788f5cbd72a76268b1a90b83862ecc3de8c679d5 Mon Sep 17 00:00:00 2001 From: freddyaboulton Date: Wed, 18 Sep 2024 19:01:20 -0400 Subject: [PATCH 31/38] Add code --- demo/yolov10_webcam_stream/run.py | 9 +-------- guides/07_streaming/02_object-detection-from-webcam.md | 2 +- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/demo/yolov10_webcam_stream/run.py b/demo/yolov10_webcam_stream/run.py index c8d27db6a281c..1eb280eb22eb4 100644 --- a/demo/yolov10_webcam_stream/run.py +++ b/demo/yolov10_webcam_stream/run.py @@ -25,17 +25,10 @@ def yolov10_inference(image, conf_threshold): gr.HTML( """

- YOLOv10 Webcam Stream + YOLO V10 Webcam Stream Object Detection

""" ) - gr.HTML( - """ -

- arXiv | github -

- """ - ) with gr.Column(elem_classes=["my-column"]): with gr.Group(elem_classes=["my-group"]): image = gr.Image(type="pil", label="Image", sources="webcam") diff --git a/guides/07_streaming/02_object-detection-from-webcam.md b/guides/07_streaming/02_object-detection-from-webcam.md index 15fbe96ba5dc8..fbde63eb2a5fc 100644 --- a/guides/07_streaming/02_object-detection-from-webcam.md +++ b/guides/07_streaming/02_object-detection-from-webcam.md @@ -42,7 +42,7 @@ We will use the `plot` method to draw a bounding box around each detected object The Gradio demo will be pretty straightforward but we'll do a couple of things that are specific to streaming: * The user's webcam will be both an input and an output. That way, the user will only see their stream with the detected objects. -* We'll use the `time_limit` and `stream_every` parameters of the `stream` event. The `time_limit` parameter will mean that we'll process each user's stream for that amount of time. The `stream_every` function will control how frequently the webcam stream is sent to the server. +* We'll use the `time_limit` and `stream_every` parameters of the `stream` event. The `time_limit` parameter will mean that we'll process each user's stream for that amount of time. In a multi-user setting, such as on Spaces, this means that after this period of time, the next user in the queue will be able to use the demo. The `stream_every` function will control how frequently the webcam stream is sent to the server. In addition, we'll apply some custom css so that the webcam and slider are centered on the page. From 3b9019b7059b13eb2f234255537b6ea25ea2a01b Mon Sep 17 00:00:00 2001 From: freddyaboulton Date: Wed, 18 Sep 2024 19:03:12 -0400 Subject: [PATCH 32/38] feedback --- demo/yolov10_webcam_stream/requirements.txt | 2 ++ demo/yolov10_webcam_stream/run.ipynb | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 demo/yolov10_webcam_stream/requirements.txt diff --git a/demo/yolov10_webcam_stream/requirements.txt b/demo/yolov10_webcam_stream/requirements.txt new file mode 100644 index 0000000000000..fedb000072b32 --- /dev/null +++ b/demo/yolov10_webcam_stream/requirements.txt @@ -0,0 +1,2 @@ +safetensors==0.4.3 +git+https://github.com/THU-MIG/yolov10.git \ No newline at end of file diff --git a/demo/yolov10_webcam_stream/run.ipynb b/demo/yolov10_webcam_stream/run.ipynb index 304f0664033a4..0b0cd2599c081 100644 --- a/demo/yolov10_webcam_stream/run.ipynb +++ b/demo/yolov10_webcam_stream/run.ipynb @@ -1 +1 @@ -{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: yolov10_webcam_stream"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "from ultralytics import YOLOv10\n", "\n", "model = YOLOv10.from_pretrained(\"jameslahm/yolov10n\")\n", "\n", "\n", "def yolov10_inference(image, conf_threshold):\n", " width, _ = image.size\n", " import time\n", "\n", " start = time.time()\n", " results = model.predict(source=image, imgsz=width, conf=conf_threshold)\n", " end = time.time()\n", " annotated_image = results[0].plot()\n", " print(\"time\", end - start)\n", " return annotated_image[:, :, ::-1]\n", "\n", "\n", "css = \"\"\".my-group {max-width: 600px !important; max-height: 600 !important;}\n", " .my-column {display: flex !important; justify-content: center !important; align-items: center !important};\"\"\"\n", "\n", "\n", "with gr.Blocks(css=css) as app:\n", " gr.HTML(\n", " \"\"\"\n", "

\n", " YOLOv10 Webcam Stream\n", "

\n", " \"\"\"\n", " )\n", " gr.HTML(\n", " \"\"\"\n", "

\n", " arXiv | github\n", "

\n", " \"\"\"\n", " )\n", " with gr.Column(elem_classes=[\"my-column\"]):\n", " with gr.Group(elem_classes=[\"my-group\"]):\n", " image = gr.Image(type=\"pil\", label=\"Image\", sources=\"webcam\")\n", " conf_threshold = gr.Slider(\n", " label=\"Confidence Threshold\",\n", " minimum=0.0,\n", " maximum=1.0,\n", " step=0.05,\n", " value=0.30,\n", " )\n", " image.stream(\n", " fn=yolov10_inference,\n", " inputs=[image, conf_threshold],\n", " outputs=[image],\n", " stream_every=0.1,\n", " time_limit=30,\n", " )\n", "\n", "if __name__ == \"__main__\":\n", " app.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: yolov10_webcam_stream"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio safetensors==0.4.3 git+https://github.com/THU-MIG/yolov10.git"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "from ultralytics import YOLOv10\n", "\n", "model = YOLOv10.from_pretrained(\"jameslahm/yolov10n\")\n", "\n", "\n", "def yolov10_inference(image, conf_threshold):\n", " width, _ = image.size\n", " import time\n", "\n", " start = time.time()\n", " results = model.predict(source=image, imgsz=width, conf=conf_threshold)\n", " end = time.time()\n", " annotated_image = results[0].plot()\n", " print(\"time\", end - start)\n", " return annotated_image[:, :, ::-1]\n", "\n", "\n", "css = \"\"\".my-group {max-width: 600px !important; max-height: 600 !important;}\n", " .my-column {display: flex !important; justify-content: center !important; align-items: center !important};\"\"\"\n", "\n", "\n", "with gr.Blocks(css=css) as app:\n", " gr.HTML(\n", " \"\"\"\n", "

\n", " YOLO V10 Webcam Stream Object Detection\n", "

\n", " \"\"\"\n", " )\n", " with gr.Column(elem_classes=[\"my-column\"]):\n", " with gr.Group(elem_classes=[\"my-group\"]):\n", " image = gr.Image(type=\"pil\", label=\"Image\", sources=\"webcam\")\n", " conf_threshold = gr.Slider(\n", " label=\"Confidence Threshold\",\n", " minimum=0.0,\n", " maximum=1.0,\n", " step=0.05,\n", " value=0.30,\n", " )\n", " image.stream(\n", " fn=yolov10_inference,\n", " inputs=[image, conf_threshold],\n", " outputs=[image],\n", " stream_every=0.1,\n", " time_limit=30,\n", " )\n", "\n", "if __name__ == \"__main__\":\n", " app.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file From 5552acaf418b6888f11c5de74d604da968057656 Mon Sep 17 00:00:00 2001 From: freddyaboulton Date: Wed, 18 Sep 2024 19:14:13 -0400 Subject: [PATCH 33/38] link --- guides/04_additional-features/02_streaming-outputs.md | 8 ++++++-- guides/04_additional-features/03_streaming-inputs.md | 6 +++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/guides/04_additional-features/02_streaming-outputs.md b/guides/04_additional-features/02_streaming-outputs.md index fca501585f4f4..a479d9ab5dc4d 100644 --- a/guides/04_additional-features/02_streaming-outputs.md +++ b/guides/04_additional-features/02_streaming-outputs.md @@ -33,7 +33,7 @@ For audio, the next "chunk" can be either an `.mp3` or `.wav` file or a `bytes` For video, the next "chunk" has to be either `.mp4` file or a file with `h.264` codec with a `.ts` extension. For smooth playback, make sure chunks are consistent lengths and larger than 1 second. -We'll finish with some simple examples illustrating these points. For a deeper tutorial on streaming audio with [transformers](https://huggingface.co/docs/transformers/index), please see [this guide](/main/guides/streaming-ai-generated-audio). +We'll finish with some simple examples illustrating these points. ### Streaming Audio @@ -67,4 +67,8 @@ gr.Interface(keep_repeating, gr.Video(sources=["webcam"], format="mp4"), gr.Video(streaming=True, autoplay=True) ).launch() -``` \ No newline at end of file +``` + +## End-to-End Examples + +For an end-to-end example of streaming media, see the object detection from video [guide](/main/guides/objec-detection-from-video) or the streaming ai generated audio with [transformers](https://huggingface.co/docs/transformers/index) [guide](/main/guides/streaming-ai-generated-audio). \ No newline at end of file diff --git a/guides/04_additional-features/03_streaming-inputs.md b/guides/04_additional-features/03_streaming-inputs.md index 7c22cd7d7c91b..6deb1fd929fee 100644 --- a/guides/04_additional-features/03_streaming-inputs.md +++ b/guides/04_additional-features/03_streaming-inputs.md @@ -38,7 +38,7 @@ $demo_streaming_filter_unified Your streaming function should be stateless. It should take the current input and return its corresponding output. However, there are cases where you may want to keep track of past inputs or outputs. For example, you may want to keep a buffer of the previous `k` inputs to improve the accuracy of your transcription demo. You can do this with Gradio's `gr.State()` component. -Let's showcase this with a sample demo +Let's showcase this with a sample demo: ```python def transcribe_handler(current_audio, state, transcript): @@ -60,3 +60,7 @@ with gr.Blocks() as demo: demo.launch() ``` + +## End-to-End Examples + +For an end-to-end example of streaming from the webcam, see the object detection from webcam [guide](/main/guides/objec-detection-from-webcam). \ No newline at end of file From 96fc0327e2bc26d0b4143fd1dfc1bb20590e3639 Mon Sep 17 00:00:00 2001 From: gradio-pr-bot Date: Wed, 18 Sep 2024 23:15:11 +0000 Subject: [PATCH 34/38] add changeset --- .changeset/floppy-nails-grab.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/floppy-nails-grab.md diff --git a/.changeset/floppy-nails-grab.md b/.changeset/floppy-nails-grab.md new file mode 100644 index 0000000000000..b59772a49e004 --- /dev/null +++ b/.changeset/floppy-nails-grab.md @@ -0,0 +1,5 @@ +--- +"gradio": minor +--- + +feat:Object Detection From Webcam Stream Guide From a7fc03d6504949ed6bb21fe402e860409a973cb8 Mon Sep 17 00:00:00 2001 From: freddyaboulton Date: Wed, 18 Sep 2024 19:17:16 -0400 Subject: [PATCH 35/38] code --- guides/04_additional-features/02_streaming-outputs.md | 2 +- guides/04_additional-features/03_streaming-inputs.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/guides/04_additional-features/02_streaming-outputs.md b/guides/04_additional-features/02_streaming-outputs.md index a479d9ab5dc4d..f218a5051851a 100644 --- a/guides/04_additional-features/02_streaming-outputs.md +++ b/guides/04_additional-features/02_streaming-outputs.md @@ -71,4 +71,4 @@ gr.Interface(keep_repeating, ## End-to-End Examples -For an end-to-end example of streaming media, see the object detection from video [guide](/main/guides/objec-detection-from-video) or the streaming ai generated audio with [transformers](https://huggingface.co/docs/transformers/index) [guide](/main/guides/streaming-ai-generated-audio). \ No newline at end of file +For an end-to-end example of streaming media, see the object detection from video [guide](/main/guides/object-detection-from-video) or the streaming ai generated audio with [transformers](https://huggingface.co/docs/transformers/index) [guide](/main/guides/streaming-ai-generated-audio). \ No newline at end of file diff --git a/guides/04_additional-features/03_streaming-inputs.md b/guides/04_additional-features/03_streaming-inputs.md index 6deb1fd929fee..78c04dc84a9d2 100644 --- a/guides/04_additional-features/03_streaming-inputs.md +++ b/guides/04_additional-features/03_streaming-inputs.md @@ -63,4 +63,4 @@ demo.launch() ## End-to-End Examples -For an end-to-end example of streaming from the webcam, see the object detection from webcam [guide](/main/guides/objec-detection-from-webcam). \ No newline at end of file +For an end-to-end example of streaming from the webcam, see the object detection from webcam [guide](/main/guides/object-detection-from-webcam). \ No newline at end of file From 43fe4dfee305bc1830b749385461ff42765e5f2c Mon Sep 17 00:00:00 2001 From: freddyaboulton Date: Wed, 18 Sep 2024 19:31:01 -0400 Subject: [PATCH 36/38] check --- demo/rt-detr-object-detection/draw_boxes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/demo/rt-detr-object-detection/draw_boxes.py b/demo/rt-detr-object-detection/draw_boxes.py index bbc5aa13ec6d8..9a0442e6b5ead 100644 --- a/demo/rt-detr-object-detection/draw_boxes.py +++ b/demo/rt-detr-object-detection/draw_boxes.py @@ -25,7 +25,7 @@ def draw_bounding_boxes(image, results: dict, model, threshold=0.3): color = get_color(label) # Draw bounding box - draw.rectangle(box, outline=color, width=3) + draw.rectangle(box, outline=color, width=3) # type: ignore # Prepare text text = f"{label}: {score:.2f}" @@ -35,8 +35,8 @@ def draw_bounding_boxes(image, results: dict, model, threshold=0.3): # Draw text background draw.rectangle( - [box[0], box[1] - text_height - 4, box[0] + text_width, box[1]], - fill=color, + [box[0], box[1] - text_height - 4, box[0] + text_width, box[1]], # type: ignore + fill=color, # type: ignore ) # Draw text From e7ce4c545d8375fc4acafe9afe2da76db35138d6 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Wed, 18 Sep 2024 21:51:04 -0500 Subject: [PATCH 37/38] Update guides/04_additional-features/02_streaming-outputs.md --- guides/04_additional-features/02_streaming-outputs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guides/04_additional-features/02_streaming-outputs.md b/guides/04_additional-features/02_streaming-outputs.md index f218a5051851a..ab0cd1ff00a8a 100644 --- a/guides/04_additional-features/02_streaming-outputs.md +++ b/guides/04_additional-features/02_streaming-outputs.md @@ -71,4 +71,4 @@ gr.Interface(keep_repeating, ## End-to-End Examples -For an end-to-end example of streaming media, see the object detection from video [guide](/main/guides/object-detection-from-video) or the streaming ai generated audio with [transformers](https://huggingface.co/docs/transformers/index) [guide](/main/guides/streaming-ai-generated-audio). \ No newline at end of file +For an end-to-end example of streaming media, see the object detection from video [guide](/main/guides/object-detection-from-video) or the streaming AI-generated audio with [transformers](https://huggingface.co/docs/transformers/index) [guide](/main/guides/streaming-ai-generated-audio). \ No newline at end of file From 0fd2b0d4b9a4d6d3aff7e7feb847a94e01535bdf Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Thu, 19 Sep 2024 00:40:18 -0500 Subject: [PATCH 38/38] Update guides/07_streaming/02_object-detection-from-webcam.md --- guides/07_streaming/02_object-detection-from-webcam.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guides/07_streaming/02_object-detection-from-webcam.md b/guides/07_streaming/02_object-detection-from-webcam.md index fbde63eb2a5fc..f6439c2e7b1eb 100644 --- a/guides/07_streaming/02_object-detection-from-webcam.md +++ b/guides/07_streaming/02_object-detection-from-webcam.md @@ -90,4 +90,4 @@ You can check out our demo hosted on Hugging Face Spaces [here](https://huggingf It is also embedded on this page below -$demo_YOLOv10-webcam-stream \ No newline at end of file +$demo_yolov10_webcam_stream \ No newline at end of file