Skip to content

Commit

Permalink
Disk Cache for Pyodide Wheels (#1851)
Browse files Browse the repository at this point in the history
* Disk Cache for Pyodide Wheels

* Add command line argument for disk cache root

* Better API for instantiating a disabled disk cache

* Remove logs when using disk cache

* Fix bug with reportUndefinedSymbols shim
  • Loading branch information
garrettgu10 authored Apr 2, 2024
1 parent 54f0b9d commit 6044ec6
Show file tree
Hide file tree
Showing 11 changed files with 126 additions and 9 deletions.
23 changes: 23 additions & 0 deletions src/pyodide/internal/builtin_wrappers.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { default as UnsafeEval } from "internal:unsafe-eval";
import { default as DiskCache } from "pyodide-internal:disk_cache";

let lastTime;
let lastDelta = 0;
Expand Down Expand Up @@ -129,3 +130,25 @@ export async function wasmInstantiate(module, imports) {
const instance = new WebAssembly.Instance(module, imports);
return { module, instance };
}

export function patchFetch(origin) {
// Patch fetch to first go through disk cache, but only when url points to origin
const origFetch = globalThis.fetch;
globalThis.fetch = async function (url, options) {
if(url.origin !== origin) {
return origFetch(url, options);
}

const fileName = url.pathname.substring(url.pathname.lastIndexOf("/") + 1);
const cached = DiskCache.get(fileName);
if (cached) {
return new Response(cached);
}

// we didn't find it in the disk cache, continue with original fetch
const response = await origFetch(url, options);
const arrayBuffer = await response.arrayBuffer();
DiskCache.put(fileName, arrayBuffer);
return new Response(arrayBuffer);
};
}
4 changes: 3 additions & 1 deletion src/pyodide/internal/setupPackages.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ import { parseTarInfo } from "pyodide-internal:tar";
import { createTarFS } from "pyodide-internal:tarfs";
import { createMetadataFS } from "pyodide-internal:metadatafs";
import { default as LOCKFILE } from "pyodide-internal:generated/pyodide-lock.json";
import { REQUIREMENTS } from "pyodide-internal:metadata";
import { REQUIREMENTS, WORKERD_INDEX_URL } from "pyodide-internal:metadata";
import { patchFetch } from "pyodide-internal:builtin_wrappers";
import { simpleRunPython } from "pyodide-internal:util";

const canonicalizeNameRegex = /[-_.]+/g;
Expand Down Expand Up @@ -92,6 +93,7 @@ export function patchLoadPackage(pyodide) {
pyodide.loadPackage = disabledLoadPackage;
return;
}
patchFetch(new URL(WORKERD_INDEX_URL).origin);
const origLoadPackage = pyodide.loadPackage;
function loadPackage(packages, options) {
return origLoadPackage(packages, {
Expand Down
34 changes: 34 additions & 0 deletions src/workerd/api/pyodide/pyodide.c++
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#include "pyodide.h"
#include <kj/string.h>
#include <workerd/util/string-buffer.h>
#include "kj/array.h"
#include "kj/common.h"
#include "kj/debug.h"
Expand Down Expand Up @@ -120,4 +122,36 @@ jsg::Ref<PyodideMetadataReader> makePyodideMetadataReader(Worker::Reader conf) {
false /* isTracing */, false /* createBaselineSnapshot */, kj::none /* memorySnapshot */);
}

const kj::Maybe<kj::Own<const kj::Directory>> DiskCache::NULL_CACHE_ROOT = kj::none;

jsg::Optional<kj::Array<kj::byte>> DiskCache::get(jsg::Lock& js, kj::String key) {
KJ_IF_SOME(root, cacheRoot) {
kj::Path path(key);
auto file = root->tryOpenFile(path);

KJ_IF_SOME(f, file) {
return f->readAllBytes();
} else {
return kj::none;
}
} else {
return kj::none;
}
}

void DiskCache::put(jsg::Lock& js, kj::String key, kj::Array<kj::byte> data) {
KJ_IF_SOME(root, cacheRoot) {
kj::Path path(key);
auto file = root->tryOpenFile(path, kj::WriteMode::CREATE | kj::WriteMode::MODIFY);

KJ_IF_SOME(f, file) {
f->writeAll(data);
} else {
KJ_LOG(ERROR, "DiskCache: Failed to open file", key);
}
} else {
return;
}
}

} // namespace workerd::api::pyodide
21 changes: 21 additions & 0 deletions src/workerd/api/pyodide/pyodide.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "kj/array.h"
#include "kj/debug.h"
#include <kj/common.h>
#include <kj/filesystem.h>
#include <pyodide/generated/pyodide_extra.capnp.h>
#include <pyodide/pyodide.capnp.h>
#include <workerd/jsg/jsg.h>
Expand Down Expand Up @@ -241,6 +242,25 @@ class DisabledInternalJaeger : public jsg::Object {
}
};

// This cache is used by Pyodide to store wheels fetched over the internet across workerd restarts in local dev only
class DiskCache: public jsg::Object {
static const kj::Maybe<kj::Own<const kj::Directory>> NULL_CACHE_ROOT; // always set to kj::none

const kj::Maybe<kj::Own<const kj::Directory>> &cacheRoot;
public:
DiskCache(): cacheRoot(NULL_CACHE_ROOT) {}; // Disabled disk cache
DiskCache(const kj::Maybe<kj::Own<const kj::Directory>> &cacheRoot): cacheRoot(cacheRoot) {};

jsg::Optional<kj::Array<kj::byte>> get(jsg::Lock& js, kj::String key);
void put(jsg::Lock& js, kj::String key, kj::Array<kj::byte> data);

JSG_RESOURCE_TYPE(DiskCache) {
JSG_METHOD(get);
JSG_METHOD(put);
}
};


// A limiter which will throw if the startup is found to exceed limits. The script will still be
// able to run for longer than the limit, but an error will be thrown as soon as the startup
// finishes. This way we can enforce a Python-specific startup limit.
Expand Down Expand Up @@ -300,6 +320,7 @@ jsg::Ref<PyodideMetadataReader> makePyodideMetadataReader(Worker::Reader conf);
api::pyodide::PackagesTarReader, \
api::pyodide::PyodideMetadataReader, \
api::pyodide::ArtifactBundler, \
api::pyodide::DiskCache, \
api::pyodide::DisabledInternalJaeger,\
api::pyodide::SimplePythonLimiter

Expand Down
3 changes: 2 additions & 1 deletion src/workerd/server/server.c++
Original file line number Diff line number Diff line change
Expand Up @@ -2650,7 +2650,8 @@ kj::Own<Server::Service> Server::makeWorker(kj::StringPtr name, config::Worker::
featureFlags.asReader(),
*limitEnforcer,
kj::atomicAddRef(*observer),
*memoryCacheProvider);
*memoryCacheProvider,
diskCacheRoot);
auto inspectorPolicy = Worker::Isolate::InspectorPolicy::DISALLOW;
if (inspectorOverride != kj::none) {
// For workerd, if the inspector is enabled, it is always fully trusted.
Expand Down
4 changes: 4 additions & 0 deletions src/workerd/server/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ class Server: private kj::TaskSet::ErrorHandler {
void enableControl(uint fd) {
controlOverride = kj::heap<kj::FdOutputStream>(fd);
}
void setDiskCacheRoot(kj::Maybe<kj::Own<const kj::Directory>> &&dkr) {
diskCacheRoot = kj::mv(dkr);
}

// Runs the server using the given config.
kj::Promise<void> run(jsg::V8System& v8System, config::Config::Reader conf,
Expand Down Expand Up @@ -90,6 +93,7 @@ class Server: private kj::TaskSet::ErrorHandler {
kj::Network& network;
kj::EntropySource& entropySource;
kj::Function<void(kj::String)> reportConfigError;
kj::Maybe<kj::Own<const kj::Directory>> diskCacheRoot;

bool experimental = false;

Expand Down
29 changes: 25 additions & 4 deletions src/workerd/server/workerd-api.c++
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ struct WorkerdApi::Impl {
kj::Own<CompatibilityFlags::Reader> features;
JsgWorkerdIsolate jsgIsolate;
api::MemoryCacheProvider& memoryCacheProvider;
kj::Maybe<kj::Own<const kj::Directory>>& pyodideCacheRoot;

class Configuration {
public:
Expand All @@ -130,10 +131,11 @@ struct WorkerdApi::Impl {
CompatibilityFlags::Reader featuresParam,
IsolateLimitEnforcer& limitEnforcer,
kj::Own<jsg::IsolateObserver> observer,
api::MemoryCacheProvider& memoryCacheProvider)
api::MemoryCacheProvider& memoryCacheProvider,
kj::Maybe<kj::Own<const kj::Directory>>& pyodideCacheRoot)
: features(capnp::clone(featuresParam)),
jsgIsolate(v8System, Configuration(*this), kj::mv(observer), limitEnforcer.getCreateParams()),
memoryCacheProvider(memoryCacheProvider) {}
memoryCacheProvider(memoryCacheProvider), pyodideCacheRoot(pyodideCacheRoot) {}

static v8::Local<v8::String> compileTextGlobal(JsgWorkerdIsolate::Lock& lock,
capnp::Text::Reader reader) {
Expand Down Expand Up @@ -173,9 +175,10 @@ WorkerdApi::WorkerdApi(jsg::V8System& v8System,
CompatibilityFlags::Reader features,
IsolateLimitEnforcer& limitEnforcer,
kj::Own<jsg::IsolateObserver> observer,
api::MemoryCacheProvider& memoryCacheProvider)
api::MemoryCacheProvider& memoryCacheProvider,
kj::Maybe<kj::Own<const kj::Directory>> &pyodideCacheRoot)
: impl(kj::heap<Impl>(v8System, features, limitEnforcer, kj::mv(observer),
memoryCacheProvider)) {}
memoryCacheProvider, pyodideCacheRoot)) {}
WorkerdApi::~WorkerdApi() noexcept(false) {}

kj::Own<jsg::Lock> WorkerdApi::lock(jsg::V8StackScope& stackScope) const {
Expand Down Expand Up @@ -454,6 +457,24 @@ void WorkerdApi::compileModules(
jsg::ModuleRegistry::Type::INTERNAL);
}

// Inject disk cache module
{
using ModuleInfo = jsg::ModuleRegistry::ModuleInfo;
using ObjectModuleInfo = jsg::ModuleRegistry::ObjectModuleInfo;
using ResolveMethod = jsg::ModuleRegistry::ResolveMethod;
auto specifier = "pyodide-internal:disk_cache";
auto diskCache = jsg::alloc<DiskCache>(impl->pyodideCacheRoot);
modules->addBuiltinModule(
specifier,
[specifier = kj::str(specifier), diskCache = kj::mv(diskCache)](
jsg::Lock& js, ResolveMethod, kj::Maybe<const kj::Path&>&) mutable {
auto& wrapper = JsgWorkerdIsolate_TypeWrapper::from(js.v8Isolate);
auto wrap = wrapper.wrap(js.v8Context(), kj::none, kj::mv(diskCache));
return kj::Maybe(ModuleInfo(js, specifier, kj::none, ObjectModuleInfo(js, wrap)));
},
jsg::ModuleRegistry::Type::INTERNAL);
}

// Inject a (disabled) SimplePythonLimiter
{
using ModuleInfo = jsg::ModuleRegistry::ModuleInfo;
Expand Down
3 changes: 2 additions & 1 deletion src/workerd/server/workerd-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ class WorkerdApi final: public Worker::Api {
CompatibilityFlags::Reader features,
IsolateLimitEnforcer& limitEnforcer,
kj::Own<jsg::IsolateObserver> observer,
api::MemoryCacheProvider& memoryCacheProvider);
api::MemoryCacheProvider& memoryCacheProvider,
kj::Maybe<kj::Own<const kj::Directory>>& pyodideCacheRoot);
~WorkerdApi() noexcept(false);

static const WorkerdApi& from(const Worker::Api&);
Expand Down
10 changes: 9 additions & 1 deletion src/workerd/server/workerd.c++
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,9 @@ public:
"Useful for development, but not recommended in production.")
.addOption({"experimental"}, [this]() { server.allowExperimental(); return true; },
"Permit the use of experimental features which may break backwards "
"compatibility in a future release.");
"compatibility in a future release.")
.addOptionWithArg({"disk-cache-dir"}, CLI_METHOD(diskCacheDir), "<path>",
"Use <path> as a disk cache to avoid repeatedly fetching packages from the internet. ");
}

kj::MainFunc addServeOptions(kj::MainBuilder& builder) {
Expand Down Expand Up @@ -927,6 +929,12 @@ public:
server.enableControl(fd);
}

void diskCacheDir(kj::StringPtr pathStr) {
kj::Path path = fs->getCurrentPath().eval(pathStr);
kj::Maybe<kj::Own<const kj::Directory>> dir = fs->getRoot().tryOpenSubdir(path, kj::WriteMode::MODIFY);
server.setDiskCacheRoot(kj::mv(dir));
}

void watch() {
#if _WIN32
auto& w = watcher.emplace(io.win32EventPort);
Expand Down
3 changes: 2 additions & 1 deletion src/workerd/tests/test-fixture.c++
Original file line number Diff line number Diff line change
Expand Up @@ -278,12 +278,13 @@ TestFixture::TestFixture(SetupParams&& params)
isolateLimitEnforcer(kj::heap<MockIsolateLimitEnforcer>()),
errorReporter(kj::heap<MockErrorReporter>()),
memoryCacheProvider(kj::heap<api::MemoryCacheProvider>()),
diskCacheRoot(kj::none),
api(kj::heap<server::WorkerdApi>(
testV8System,
params.featureFlags.orDefault(CompatibilityFlags::Reader()),
*isolateLimitEnforcer,
kj::atomicRefcounted<IsolateObserver>(),
*memoryCacheProvider)),
*memoryCacheProvider, diskCacheRoot)),
workerIsolate(kj::atomicRefcounted<Worker::Isolate>(
kj::mv(api),
kj::atomicRefcounted<IsolateObserver>(),
Expand Down
1 change: 1 addition & 0 deletions src/workerd/tests/test-fixture.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ struct TestFixture {
kj::Own<IsolateLimitEnforcer> isolateLimitEnforcer;
kj::Own<Worker::ValidationErrorReporter> errorReporter;
kj::Own<api::MemoryCacheProvider> memoryCacheProvider;
kj::Maybe<kj::Own<const kj::Directory>> diskCacheRoot;
kj::Own<Worker::Api> api;
kj::Own<Worker::Isolate> workerIsolate;
kj::Own<Worker::Script> workerScript;
Expand Down

0 comments on commit 6044ec6

Please sign in to comment.