stanford-crfm · yifanmai · Aug 11, 2023 · Aug 4, 2023 · Aug 7, 2023 · Aug 7, 2023
diff --git a/src/helm/benchmark/metrics/summarization_critique_metrics.py b/src/helm/benchmark/metrics/summarization_critique_metrics.py
@@ -44,7 +44,7 @@ def __init__(self, num_respondents: int) -> None:
                 CritiqueQuestionTemplate(
                     name=_RELEVANCE_NAME,
                     question_type=QuestionType.MULTIPLE_CHOICE,
-                    text="To what extend the summary include only important information from the source document? "
+                    text="To what extent the summary include only important information from the source document? "
                     "(1 = not at all, 5 = very much)",
                     options=["1", "2", "3", "4", "5"],
                 ),

diff --git a/src/helm/proxy/clients/mechanical_turk_critique_exporter.py b/src/helm/proxy/clients/mechanical_turk_critique_exporter.py
@@ -9,6 +9,7 @@
 from helm.common.critique_request import CritiqueQuestionTemplate, CritiqueRequest, CritiqueTaskTemplate, QuestionType
 from helm.common.general import ensure_directory_exists
 from helm.common.hierarchical_logger import hlog
+from helm.proxy.clients.mechanical_turk_utils import replace_emoji_characters
 
 
 def _indent_to_level(text: str, level: int) -> str:
@@ -45,20 +46,22 @@ def _render_template_crowd_html(task_template: CritiqueTaskTemplate) -> str:
                 return valid;
             }
 
-            window.onload = function() {
+            document.addEventListener("DOMContentLoaded", function(event) {
                 document.querySelector('crowd-form').onsubmit = function(e) {
                     if (!validateForm()) {
                         alert("Please answer all the questions in order to submit.");
                         e.preventDefault();
                     }
                 }
-            }
+            });
         </script>"""
     )
 
-    instructions_crowd_html = f"<div>{_format_template_tags(task_template.instructions)}</div>"
-    instruction_question_break_html = "<br><br><h4>Please answer the questions below:</h4>"
-    questions_crowd_html = "<br>\n<br>\n".join(
+    instructions_crowd_html = (
+        f'<p style="white-space: pre-wrap;">{_format_template_tags(task_template.instructions)}</p>'
+    )
+    divider_html = "\n<hr>"
+    questions_crowd_html = "\n<hr>\n".join(
         [_render_question_crowd_html(question) for question in task_template.questions]
     )
     return textwrap.dedent(
@@ -67,8 +70,9 @@ def _render_template_crowd_html(task_template: CritiqueTaskTemplate) -> str:
         {_indent_to_level(validation_crowd_html, 2)}
         <crowd-form answer-format="flatten-objects">
             {_indent_to_level(instructions_crowd_html, 3)}
-            {_indent_to_level(instruction_question_break_html, 3)}
+            {_indent_to_level(divider_html, 3)}
             {_indent_to_level(questions_crowd_html, 3)}
+            {_indent_to_level(divider_html, 3)}
         </crowd-form>"""
     )
 
@@ -91,16 +95,16 @@ def _render_question_crowd_html(question_template: CritiqueQuestionTemplate) ->
         )
     return textwrap.dedent(
         f"""\
-        <div>
-            <p>{_format_template_tags(question_template.text)}</p>
-            {_indent_to_level(question_input_crowd_html, 3)}
-        </div>"""
+        <p style=\"white-space: pre-wrap;\">
+            {_format_template_tags(question_template.text)}
+        </p>
+        {_indent_to_level(question_input_crowd_html, 2)}"""
     )
 
 
 def _render_multiple_choice_options_crowd_html(name: str, options: List[str]) -> str:
     """Render the Crowd HTML for the options of a multiple-choice question."""
-    buttons_crowd_html = "<br>\n".join(
+    buttons_crowd_html = "\n<br>\n".join(
         [
             f"""<crowd-radio-button name="{name}.{index}">{_format_template_tags(option)}</crowd-radio-button>"""
             for index, option in enumerate(options)
@@ -116,7 +120,7 @@ def _render_multiple_choice_options_crowd_html(name: str, options: List[str]) ->
 
 def _render_checkbox_options_crowd_html(name: str, options: List[str]) -> str:
     """Render the Crowd HTML for the options of a checkbox question."""
-    return "<br>\n".join(
+    return "\n<br>\n".join(
         [
             f"""<crowd-checkbox name="{name}.{index}">{_format_template_tags(option)}</crowd-checkbox>"""
             for index, option in enumerate(options)
@@ -195,4 +199,7 @@ def export_request(request: CritiqueRequest):
     with _exporters_lock:
         if template.name not in _exporters:
             _exporters[template.name] = _MechanicalTurkCritiqueRequestExporter(template)
-    _exporters[template.name].export(request.fields)
+    encoded_fields = {
+        field_name: replace_emoji_characters(field_value) for field_name, field_value in request.fields.items()
+    }
+    _exporters[template.name].export(encoded_fields)
diff --git a/src/helm/proxy/clients/mechanical_turk_critique_importer.py b/src/helm/proxy/clients/mechanical_turk_critique_importer.py
@@ -13,7 +13,7 @@
     CritiqueRequestResult,
 )
 from helm.common.hierarchical_logger import hlog
-
+from helm.proxy.clients.mechanical_turk_utils import replace_emoji_characters
 
 # A representation of fields that can be used as a dict key.
 _CritiqueRequestKey = Tuple[Tuple[str, str], ...]
@@ -119,4 +119,7 @@ def import_request_result(request: CritiqueRequest) -> Optional[CritiqueRequestR
         if template.name not in _importer:
             _importer[template.name] = _MechanicalTurkRequestImporter(template)
             _importer[template.name].initialize()
-    return _importer[template.name].import_request_result(request.fields)
+    encoded_fields = {
+        field_name: replace_emoji_characters(field_value) for field_name, field_value in request.fields.items()
+    }
+    return _importer[template.name].import_request_result(encoded_fields)
diff --git a/src/helm/proxy/clients/mechanical_turk_utils.py b/src/helm/proxy/clients/mechanical_turk_utils.py
@@ -0,0 +1,45 @@
+import json
+import re
+import sys
+
+
+# Source: https://github.com/charman/mturk-emoji
+def replace_emoji_characters(s):
+    """Replace 4-byte characters with HTML spans with bytes as JSON array
+
+    This function takes a Unicode string containing 4-byte Unicode
+    characters, e.g. 😀, and replaces each 4-byte character with an
+    HTML span with the 4 bytes encoded as a JSON array, e.g.:
+
+      <span class='emoji-bytes' data-emoji-bytes='[240, 159, 152, 128]'></span>
+
+    Args:
+        s (Unicode string):
+    Returns:
+        Unicode string with all 4-byte Unicode characters in the source
+        string replaced with HTML spans
+    """
+
+    def _emoji_match_to_span(emoji_match):
+        """
+        Args:
+            emoji_match (MatchObject):
+
+        Returns:
+            Unicode string
+        """
+        return emoji_match.group().encode("ascii", "xmlcharrefreplace").decode()
+
+    # The procedure for stripping Emoji characters is based on this
+    # StackOverflow post:
+    #   http://stackoverflow.com/questions/12636489/python-convert-4-byte-char-to-avoid-mysql-error-incorrect-string-value
+    if sys.maxunicode == 1114111:
+        # Python was built with '--enable-unicode=ucs4'
+        highpoints = re.compile("[\U00010000-\U0010ffff]")
+    elif sys.maxunicode == 65535:
+        # Python was built with '--enable-unicode=ucs2'
+        highpoints = re.compile("[\uD800-\uDBFF][\uDC00-\uDFFF]")
+    else:
+        raise UnicodeError("Unable to determine if Python was built using UCS-2 or UCS-4")
+
+    return highpoints.sub(_emoji_match_to_span, s)