Skip to content

Commit

Permalink
optimizing mimetype serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
mikea committed Aug 24, 2023
1 parent 617ca56 commit 51f1c9c
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 12 deletions.
32 changes: 20 additions & 12 deletions src/workerd/util/mimetype.c++
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
#include "strings.h"
#include <kj/debug.h>
#include <kj/string-tree.h>
#include <workerd/util/string-buffer.h>

namespace workerd {

namespace {

bool isWhitespace(const char c) {
return (c == '\r' || c == '\n' || c == '\t' || c == ' ');
}
Expand Down Expand Up @@ -253,39 +255,45 @@ kj::String MimeType::essence() const {
}

kj::String MimeType::paramsToString() const {
auto str = kj::strTree();
ToStringBuffer buffer(512);
paramsToString(buffer);
return buffer.toString();
}

void MimeType::paramsToString(MimeType::ToStringBuffer& buffer) const {
bool first = true;
for (auto& param : params()) {
str = kj::strTree(kj::mv(str), first ? "" : ";", param.key, "=");
buffer.append(first ? "" : ";", param.key, "=");
first = false;
if (param.value.size() == 0) {
str = kj::strTree(kj::mv(str), "\"\"");
buffer.append("\"\"");
} else if (hasInvalidCodepoints(param.value, isTokenChar)) {
auto view = param.value.asPtr();
str = kj::strTree(kj::mv(str), "\"");
buffer.append("\"");
while (view.size() > 0) {
KJ_IF_MAYBE(pos, view.findFirst('"')) {
str = kj::strTree(kj::mv(str), view.slice(0, *pos), "\\\"");
buffer.append(view.slice(0, *pos), "\\\"");
view = view.slice(*pos + 1);
} else {
str = kj::strTree(kj::mv(str), view);
buffer.append(view);
view = view.slice(view.size());
}
}
str = kj::strTree(kj::mv(str), "\"");
buffer.append("\"");
} else {
str = kj::strTree(kj::mv(str), param.value);
buffer.append(param.value);
}
}
return str.flatten();
}

kj::String MimeType::toString() const {
auto str = kj::strTree(type(), "/", subtype());
ToStringBuffer buffer(512);
buffer.append(type(), "/", subtype());
if (params_.size() > 0) {
str = kj::strTree(kj::mv(str), ";", paramsToString());
buffer.append(";");
paramsToString(buffer);
}
return str.flatten();
return buffer.toString();
}

MimeType MimeType::clone(ParseOptions options) const {
Expand Down
8 changes: 8 additions & 0 deletions src/workerd/util/mimetype.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@

namespace workerd {

template<size_t>
class StringBuffer;

class MimeType final {
public:
using MimeParams = kj::HashMap<kj::String, kj::String>;
Expand Down Expand Up @@ -96,6 +99,11 @@ class MimeType final {
kj::String type_;
kj::String subtype_;
MimeParams params_;

using ToStringBuffer = StringBuffer<128>;
// 128 bytes will keep all reasonable mimetypes on the stack.

void paramsToString(ToStringBuffer& buffer) const;
};

kj::String KJ_STRINGIFY(const MimeType& state);
Expand Down
48 changes: 48 additions & 0 deletions src/workerd/util/string-buffer-test.c++
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (c) 2023 Cloudflare, Inc.
// Licensed under the Apache 2.0 license found in the LICENSE file or at:
// https://opensource.org/licenses/Apache-2.0

#include "workerd/util/string-buffer.h"
#include <kj/test.h>

namespace workerd {
namespace {

KJ_TEST("append StringPtr") {
StringBuffer<100> buffer(100);
buffer.append(kj::StringPtr("abcdef"));
KJ_EXPECT("abcdef"_kj == buffer.toString());
}

KJ_TEST("append String") {
StringBuffer<100> buffer(100);
auto str = kj::heapString("abc"_kj);
buffer.append(str);
KJ_EXPECT("abc"_kj == buffer.toString());
}

KJ_TEST("append char array") {
StringBuffer<100> buffer(100);
auto str = kj::heapString("abc");
buffer.append(str);
KJ_EXPECT("abc"_kj == buffer.toString());
}

KJ_TEST("overflow") {
StringBuffer<10> buffer(11);

for (auto i = 0; i < 100; i++) {
// 3 character will test all sorts of boundary conditions
// with 11-bytes heap chunks.
buffer.append("abc");
}
KJ_EXPECT(buffer.toString().size() == 300);
KJ_EXPECT(
"abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
"abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
"abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
"abcabcabcabcabcabcabc"_kj == buffer.toString());
}

} // namespace
} // namespace workerd
120 changes: 120 additions & 0 deletions src/workerd/util/string-buffer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright (c) 2023 Cloudflare, Inc.
// Licensed under the Apache 2.0 license found in the LICENSE file or at:
// https://opensource.org/licenses/Apache-2.0
#pragma once

#include <algorithm>
#include <cstring>
#include <list>
#include <kj/string.h>

namespace workerd {

template<size_t StackSize>
class StringBuffer {
// String buffer optimized for appending a lot of strings together.
// Allocates StackSize chunk on the stack and uses that until full.
// Keeps allocating new chunks of at least HeapChunkSize as needed.
// Doesn't perform any heap allocations if string stays within
// StackSize bytes (without \0)

public:
KJ_DISALLOW_COPY_AND_MOVE(StringBuffer);

explicit StringBuffer(size_t heapChunkSize): heapChunkSize(heapChunkSize), tail(&arr[0]), cap(StackSize) {}

void append() {}

template <typename First, typename... Rest>
void append(First&& first, Rest&&...rest) {
appendImpl(kj::fwd<First>(first));
append(kj::fwd<Rest>(rest)...);
}

kj::String toString() {
auto result = kj::heapString(len);
copyTo(result.begin());
return result;
}

private:
const size_t heapChunkSize;
// minimum heap chunk size

char arr[StackSize];
// chunk on the stack

std::list<kj::Array<char>> chunks;
// on the heap chunks

char *tail;
// points after the last used bytes in current chunk

size_t cap;
// number of bytes available in current chunk

size_t len = 0;
// total length of the data appended so far

void appendImpl(const char* ptr, size_t size) {
size_t toCopy = std::min(size, cap);
memcpy(tail, ptr, toCopy);
tail += toCopy;
cap -= toCopy;

if (toCopy != size) {
// prepare new chunk
size_t remaining = size - toCopy;
size_t chunkSize = std::max(remaining, heapChunkSize); // don't chunk large strings
auto chunk = kj::heapArray<char>(chunkSize);

// copy the rest of the string to the new chunk
memcpy(chunk.begin(), ptr + toCopy, remaining);
tail = chunk.begin() + remaining;
cap = chunk.size() - remaining;

chunks.push_back(kj::mv(chunk));
}

len += size;
}

void appendImpl(const kj::StringPtr& str) {
appendImpl(str.begin(), str.size());
}

template<size_t size>
void appendImpl(const char (&arr)[size]) {
appendImpl(arr, size - 1 /* assume 0-terminated strings */);
}

inline void appendImpl(const kj::ArrayPtr<const char>& arr) {
appendImpl(arr.begin(), arr.size());
}

inline void appendImpl(const kj::String& str) {
appendImpl(str.asPtr());
}

void copyTo(char* dest) {
// copy stack portion first
size_t onStack = std::min(len, StackSize);
memcpy(dest, arr, onStack);
dest += onStack;

// copy from heap chunks
if (onStack < len) {
size_t remaining = len - onStack;
for (auto& chunk: chunks) {
size_t inChunk = std::min(remaining, chunk.size()); // last chunk won't be full
memcpy(dest, chunk.begin(), inChunk);
dest += inChunk;
remaining -= inChunk;
}

KJ_IREQUIRE(remaining == 0);
}
}
};

} // namespace workerd

0 comments on commit 51f1c9c

Please sign in to comment.