fix: image captioning (#289)

* fix: image captioning * fix: download model * fix: reqs
deeppavlov · Jan 18, 2023 · 479427b · 479427b
1 parent 7e8a14b
commit 479427b
Show file tree

Hide file tree

Showing 6 changed files with 17 additions and 12 deletions.
diff --git a/assistant_dists/dream_multimodal/pipeline_conf.json b/assistant_dists/dream_multimodal/pipeline_conf.json
@@ -124,7 +124,7 @@
                     "timeout": 3,
                     "url": "http://image-captioning:8123/respond"
                 },
-                "dialog_formatter": "state_formatters.dp_formatters:image_formatter_service",
+                "dialog_formatter": "state_formatters.dp_formatters:image_captioning_formatter",
                 "response_formatter": "state_formatters.dp_formatters:simple_formatter_service",
                 "state_manager_method": "add_annotation"
             }

diff --git a/services/image_captioning/Dockerfile b/services/image_captioning/Dockerfile
@@ -32,7 +32,7 @@ RUN apt-get install wget -y
 
 RUN mkdir -p /opt/conda/lib/python3.7/site-packages/data/models
 
-RUN gdown 1WBQl0WlzvdctslJyLNgedYpRrWAZC69X -O /opt/conda/lib/python3.7/site-packages/data/models/caption.pt
+RUN wget http://files.deeppavlov.ai/dream_data/image_captioning/caption.pt -O /opt/conda/lib/python3.7/site-packages/data/models/caption.pt
 
 COPY . /ofa
 

diff --git a/services/image_captioning/requirements.txt b/services/image_captioning/requirements.txt
@@ -7,4 +7,5 @@ sentry-sdk[flask]==0.14.1
 healthcheck==1.3.3
 jinja2<=3.0.3
 Werkzeug<=2.0.3
-gdown==4.5.1
+gdown==4.5.1
+protobuf==3.20.1
diff --git a/services/image_captioning/server.py b/services/image_captioning/server.py
@@ -125,7 +125,7 @@ def apply_half(t):
 def respond():
     st_time = time.time()
 
-    img_paths = request.json.get("text", [])
+    img_paths = request.json.get("image_paths", [])
     captions = []
     try:
         for img_path in img_paths:
@@ -140,12 +140,14 @@ def respond():
             with torch.no_grad():
                 caption, scores = eval_step(task, generator, models, sample)
 
-            captions.append(caption)
+            captions.append(caption[0])
 
     except Exception as exc:
         logger.exception(exc)
         sentry_sdk.capture_exception(exc)
+        captions = [{}] * len(img_paths)
 
     total_time = time.time() - st_time
-    logger.info(f"captioning exec time: {total_time:.3f}s")
-    return jsonify({"caption": captions})
+    logger.info(f"image-captioning exec time: {total_time:.3f}s")
+    logger.info(f"image-captioning result: {captions}")
+    return jsonify(captions)
diff --git a/services/image_captioning/test.py b/services/image_captioning/test.py
@@ -4,15 +4,13 @@
 def test_respond():
     url = "http://0.0.0.0:8123/respond"
 
-    img_path = ["example.jpg"]
+    image_paths = ["example.jpg"]
 
-    request_data = {"text": img_path}
+    request_data = {"image_paths": image_paths}
     result = requests.post(url, json=request_data).json()
-    caption = result["caption"][0][0]["caption"]
-    print(caption)
     obligatory_word = "bird"
 
-    assert obligatory_word in caption, f"Expected the word '{obligatory_word}' to present in caption"
+    assert obligatory_word in result[0]["caption"], f"Expected the word '{obligatory_word}' to present in caption"
     print("\n", "Success!!!")
 
 

diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py
@@ -996,3 +996,7 @@ def context_formatter_dialog(dialog: Dict) -> List[Dict]:
     dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
     contexts = [[uttr["text"] for uttr in dialog["utterances"][-num_last_utterances:]]]
     return [{"contexts": contexts}]
+
+def image_captioning_formatter(dialog: Dict) -> List[Dict]:
+    # Used by: image_captioning
+    return [{"image_paths": [dialog["human_utterances"][-1].get("attributes", {}).get("image")]}]