Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Set charset to utf-8 when adding headers for certain text content types #7044

Merged
1 change: 1 addition & 0 deletions changelog.d/7044.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug that renders UTF-8 text files incorrectly when loaded from media.
27 changes: 26 additions & 1 deletion synapse/rest/media/v1/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,23 @@

logger = logging.getLogger(__name__)

# list all text content types that will
# have the charset default to UTF-8 when
# none is given
TheStranjer marked this conversation as resolved.
Show resolved Hide resolved
TEXT_CONTENT_TYPES = [
TheStranjer marked this conversation as resolved.
Show resolved Hide resolved
"text/css",
"text/csv",
"text/html",
"text/calendar",
"text/plain",
"text/javascript",
"application/json",
"application/ld+json",
"application/rtf",
"image/svg+xml",
"text/xml",
]


def parse_media_id(request):
try:
Expand Down Expand Up @@ -96,7 +113,15 @@ def add_file_headers(request, media_type, file_size, upload_name):
def _quote(x):
return urllib.parse.quote(x.encode("utf-8"))

request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
# will only fire for unspecified charsets, i.e.,
TheStranjer marked this conversation as resolved.
Show resolved Hide resolved
# 'text/css' but not 'text/css; charset=UTF-16'
content_type = (
TheStranjer marked this conversation as resolved.
Show resolved Hide resolved
media_type + "; charset=UTF-8"
if media_type.lower() in TEXT_CONTENT_TYPES
else media_type
)

request.setHeader(b"Content-Type", content_type.encode("UTF-8"))
if upload_name:
# RFC6266 section 4.1 [1] defines both `filename` and `filename*`.
#
Expand Down