Skip to content

Commit

Permalink
Merge pull request #1131 from kiwix/ungarbled_binary_resources
Browse files Browse the repository at this point in the history
Ungarbled binary resources
  • Loading branch information
kelson42 committed Sep 14, 2024
2 parents f5c91cc + c8524b9 commit 327fec1
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 3 deletions.
30 changes: 29 additions & 1 deletion scripts/kiwix-compile-resources
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,32 @@ resource_decl_template = """{namespaces_open}
extern const std::string {identifier};
{namespaces_close}"""

BINARY_RESOURCE_EXTENSIONS = {'.ico', '.png', '.ttf'}

TEXT_RESOURCE_EXTENSIONS = {
'.css',
'.html',
'.js',
'.json',
'.svg',
'.tmpl',
'.webmanifest',
'.xml',
}

if not BINARY_RESOURCE_EXTENSIONS.isdisjoint(TEXT_RESOURCE_EXTENSIONS):
raise RuntimeError(f"The following file type extensions are declared to be both binary and text: {BINARY_RESOURCE_EXTENSIONS.intersection(TEXT_RESOURCE_EXTENSIONS)}")

def is_binary_resource(filename):
_, extension = os.path.splitext(filename)
is_binary = extension in BINARY_RESOURCE_EXTENSIONS
is_text = extension in TEXT_RESOURCE_EXTENSIONS
if not is_binary and not is_text:
# all file type extensions of static resources must be listed
# in either BINARY_RESOURCE_EXTENSIONS or TEXT_RESOURCE_EXTENSIONS
raise RuntimeError(f"Unknown file type extension: {extension}")
return is_binary

class Resource:
def __init__(self, base_dirs, filename, cacheid=None):
filename = filename
Expand All @@ -71,7 +97,9 @@ class Resource:
for base_dir in base_dirs:
try:
with open(os.path.join(base_dir, filename), 'rb') as f:
self.data = f.read().replace(b"\r\n", b"\n")
self.data = f.read()
if not is_binary_resource(filename):
self.data = self.data.replace(b"\r\n", b"\n")
found = True
break
except FileNotFoundError:
Expand Down
2 changes: 1 addition & 1 deletion src/server/internalServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -959,7 +959,7 @@ std::unique_ptr<Response> InternalServer::handle_search_request(const RequestCon
} catch(std::runtime_error& e) {
// Searcher->search will throw a runtime error if there is no valid xapian database to do the search.
// (in case of zim file not containing a index)
const auto cssUrl = renderUrl(m_root, RESOURCE::templates::url_of_search_results_css);
const auto cssUrl = renderUrl(m_root, RESOURCE::templates::url_of_search_results_css_tmpl);
HTTPErrorResponse response(request, MHD_HTTP_NOT_FOUND,
"fulltext-search-unavailable",
"404-page-heading",
Expand Down
2 changes: 1 addition & 1 deletion static/resources_list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ templates/catalog_v2_entry.xml
templates/catalog_v2_partial_entry.xml
templates/catalog_v2_categories.xml
templates/catalog_v2_languages.xml
templates/url_of_search_results_css
templates/url_of_search_results_css.tmpl
templates/viewer_settings.js
templates/no_js_library_page.html
templates/no_js_download.html
Expand Down
File renamed without changes.
51 changes: 51 additions & 0 deletions test/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,57 @@ R"EXPECTEDRESULT( <link type="text/css" href="/ROOT%23%3F/skin/search_results
}
}

std::string getCacheIdFromUrl(const std::string& url)
{
const std::string q("?cacheid=");
const auto i = url.find(q);
return i == std::string::npos ? "" : url.substr(i + q.size());
}

std::string runExternalCmdAndGetItsOutput(const std::string& cmd)
{
std::string cmdOutput;

#ifdef _WIN32
#define popen _popen
#define pclose _pclose
#endif

if (FILE* pPipe = popen(cmd.c_str(), "r"))
{
char buf[128];
while (fgets(buf, 128, pPipe)) {
cmdOutput += std::string(buf, buf+128);
}

pclose(pPipe);
}

return cmdOutput;
}

std::string getSha1OfResponseData(const std::string& url)
{
const std::string pythonScript =
"import urllib.request as req; "
"import hashlib; "
"print(hashlib.sha1(req.urlopen('" + url + "').read()).hexdigest())";
const std::string cmd = "python3 -c \"" + pythonScript + "\"";
return runExternalCmdAndGetItsOutput(cmd);
}

TEST_F(ServerTest, CacheIdsOfStaticResourcesMatchTheSha1HashOfResourceContent)
{
for ( const Resource& res : all200Resources() ) {
if ( res.kind == STATIC_CONTENT ) {
const TestContext ctx{ {"url", res.url} };
const std::string fullUrl = "http://localhost:" + std::to_string(SERVER_PORT) + res.url;
const std::string sha1 = getSha1OfResponseData(fullUrl);
ASSERT_EQ(sha1.substr(0, 8), getCacheIdFromUrl(res.url)) << ctx;
}
}
}

const char* urls400[] = {
"/ROOT%23%3F/search",
"/ROOT%23%3F/search?content=zimfile",
Expand Down

0 comments on commit 327fec1

Please sign in to comment.