From cc5ae971f9d97c5007f6e7df52189d8c95d4e614 Mon Sep 17 00:00:00 2001 From: Garrett Gu Date: Mon, 15 Apr 2024 09:57:23 -0500 Subject: [PATCH 1/4] switch to new package index, decompress into .tar in javascript --- build/pyodide_bucket.bzl | 9 +++++---- src/pyodide/internal/builtin_wrappers.js | 5 ++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/build/pyodide_bucket.bzl b/build/pyodide_bucket.bzl index a03a5e1e918..67454bbdf9d 100644 --- a/build/pyodide_bucket.bzl +++ b/build/pyodide_bucket.bzl @@ -1,7 +1,8 @@ # Do not edit this file by hand. See docs/pyodide.md for info on how to generate it. # These variables are factored out here because they are being shared by the WORKSPACE files in # both edgeworker and workerd, as well as src/pyodide/BUILD.bazel -PYODIDE_PACKAGE_BUCKET_URL = "https://pub-45d734c4145d4285b343833ee450ef38.r2.dev/20240320/" -PYODIDE_GITHUB_RELEASE_URL = "https://github.com/cloudflare/pyodide-build-scripts/releases/download/20240320/" -PYODIDE_LOCK_SHA256 = "a176311d4c449aac4ef7a333977af8b6e08224c115a9a6d05c04592c841b8a58" -PYODIDE_PACKAGES_TAR_ZIP_SHA256 = "e191bae60aad75f6a9c33fac5c0ff1ad2b0e564bdd7a07fbdc848df4b62c60a1" +PYODIDE_PACKAGE_BUCKET_URL = "https://pub-45d734c4145d4285b343833ee450ef38.r2.dev/20240412-experimental/" +PYODIDE_GITHUB_RELEASE_URL = "https://github.com/cloudflare/pyodide-build-scripts/releases/download/20240412-experimental/" +PYODIDE_LOCK_SHA256 = "db29ebb43fcd05cbc6fcba051ec7eb61a9a1bc4210353e29fdad57c6f9be1a5a" +PYODIDE_PACKAGES_TAR_ZIP_SHA256 = "6579f114f007ac307c55c221f1b5018e30c95a3cc45b86a334bbbfa442c1bf1b" +PYODIDE_ALL_WHEELS_ZIP_SHA256 = "f8a34a284a7bc2ffc44ae86a160423a8aaf8cbb88eca268e1ea9300a187cf3af" diff --git a/src/pyodide/internal/builtin_wrappers.js b/src/pyodide/internal/builtin_wrappers.js index 38bc20b9328..03329779b83 100644 --- a/src/pyodide/internal/builtin_wrappers.js +++ b/src/pyodide/internal/builtin_wrappers.js @@ -131,7 +131,10 @@ export function patchFetch(origin) { // we didn't find it in the disk cache, continue with original fetch const response = await origFetch(url, options); - const arrayBuffer = await response.arrayBuffer(); + + const arrayBuffer = await new Response(response.body.pipeThrough(new DecompressionStream("gzip"))).arrayBuffer(); + + console.log("decompressed", fileName, arrayBuffer.byteLength, "bytes"); DiskCache.put(fileName, arrayBuffer); return new Response(arrayBuffer); }; From 9dd0e1dd1903ef508203c209b57b25d899bdf859 Mon Sep 17 00:00:00 2001 From: Garrett Gu Date: Mon, 15 Apr 2024 23:22:08 -0500 Subject: [PATCH 2/4] switch to new index, perform full package installation in JavaScript --- build/pyodide_bucket.bzl | 10 +- docs/pyodide.md | 3 +- src/pyodide/internal/builtin_wrappers.js | 25 ---- src/pyodide/internal/loadPackage.js | 87 ++++++++++++ src/pyodide/internal/python.js | 6 +- src/pyodide/internal/setupPackages.js | 163 ++++++++++++++--------- src/pyodide/internal/snapshot.js | 7 +- src/pyodide/internal/tar.js | 14 +- src/pyodide/internal/tarfs.js | 3 +- src/pyodide/python-entrypoint-helper.js | 8 +- 10 files changed, 213 insertions(+), 113 deletions(-) create mode 100644 src/pyodide/internal/loadPackage.js diff --git a/build/pyodide_bucket.bzl b/build/pyodide_bucket.bzl index 67454bbdf9d..55723d01eda 100644 --- a/build/pyodide_bucket.bzl +++ b/build/pyodide_bucket.bzl @@ -1,8 +1,8 @@ # Do not edit this file by hand. See docs/pyodide.md for info on how to generate it. # These variables are factored out here because they are being shared by the WORKSPACE files in # both edgeworker and workerd, as well as src/pyodide/BUILD.bazel -PYODIDE_PACKAGE_BUCKET_URL = "https://pub-45d734c4145d4285b343833ee450ef38.r2.dev/20240412-experimental/" -PYODIDE_GITHUB_RELEASE_URL = "https://github.com/cloudflare/pyodide-build-scripts/releases/download/20240412-experimental/" -PYODIDE_LOCK_SHA256 = "db29ebb43fcd05cbc6fcba051ec7eb61a9a1bc4210353e29fdad57c6f9be1a5a" -PYODIDE_PACKAGES_TAR_ZIP_SHA256 = "6579f114f007ac307c55c221f1b5018e30c95a3cc45b86a334bbbfa442c1bf1b" -PYODIDE_ALL_WHEELS_ZIP_SHA256 = "f8a34a284a7bc2ffc44ae86a160423a8aaf8cbb88eca268e1ea9300a187cf3af" +PYODIDE_PACKAGE_BUCKET_URL = "https://pub-45d734c4145d4285b343833ee450ef38.r2.dev/20240415-experimental/" +PYODIDE_GITHUB_RELEASE_URL = "https://github.com/cloudflare/pyodide-build-scripts/releases/download/20240415-experimental/" +PYODIDE_LOCK_SHA256 = "67d1a24edf4f3ab2cf85c736391c04763ff722bf3aebf9ea3469d96e5f51e1da" +PYODIDE_PACKAGES_TAR_ZIP_SHA256 = "749967941204154e7ae866fe08f1216a3e5ee58ba6a3757231a5be0d9d4430f8" +PYODIDE_ALL_WHEELS_ZIP_SHA256 = "9e7c330ee93d81d0356cc2d585f217dfee58b623ad4535282baa6e82bd063eee" diff --git a/docs/pyodide.md b/docs/pyodide.md index 4530424f703..61cb9aeeecf 100644 --- a/docs/pyodide.md +++ b/docs/pyodide.md @@ -5,10 +5,11 @@ workerd is linked against a Pyodide lock file, which is located within an R2 buc If you know where the R2 bucket is (See build/pyodide_bucket.bzl) then the `pyodide-lock.json` file is located inside the root of the R2 directory for the Pyodide package bundle release. This lock file contains some information used by workerd to pull in package requirements, including but not limited to: + - The versions of each package included in the package bundle - The file names and SHA hashes of each package available for download in the bucket - What the dependencies are for each package ## Generating pyodide_bucket.bzl -We have scripts and GitHub actions set up for building and uploading Pyodide package bundles onto R2. These are available [here](https://github.com/cloudflare/pyodide-build-scripts). Simply follow the instructions on that repo to build a new version of Pyodide or a new package bundle release. +We have scripts and GitHub actions set up for building and uploading Pyodide package bundles onto R2. These are available [here](https://github.com/cloudflare/pyodide-build-scripts). Simply follow the instructions on that repo to build a new version of Pyodide or a new package bundle release. diff --git a/src/pyodide/internal/builtin_wrappers.js b/src/pyodide/internal/builtin_wrappers.js index 03329779b83..9cc5c074597 100644 --- a/src/pyodide/internal/builtin_wrappers.js +++ b/src/pyodide/internal/builtin_wrappers.js @@ -114,28 +114,3 @@ export async function wasmInstantiate(module, imports) { const instance = new WebAssembly.Instance(module, imports); return { module, instance }; } - -export function patchFetch(origin) { - // Patch fetch to first go through disk cache, but only when url points to origin - const origFetch = globalThis.fetch; - globalThis.fetch = async function (url, options) { - if (url.origin !== origin) { - return origFetch(url, options); - } - - const fileName = url.pathname.substring(url.pathname.lastIndexOf("/") + 1); - const cached = DiskCache.get(fileName); - if (cached) { - return new Response(cached); - } - - // we didn't find it in the disk cache, continue with original fetch - const response = await origFetch(url, options); - - const arrayBuffer = await new Response(response.body.pipeThrough(new DecompressionStream("gzip"))).arrayBuffer(); - - console.log("decompressed", fileName, arrayBuffer.byteLength, "bytes"); - DiskCache.put(fileName, arrayBuffer); - return new Response(arrayBuffer); - }; -} diff --git a/src/pyodide/internal/loadPackage.js b/src/pyodide/internal/loadPackage.js new file mode 100644 index 00000000000..a21010e27a3 --- /dev/null +++ b/src/pyodide/internal/loadPackage.js @@ -0,0 +1,87 @@ +/** + * This file contains code that roughly replaces pyodide.loadPackage, with workerd-specific + * optimizations: + * - Wheels are decompressed with a DecompressionStream instead of in Python + * - Wheels are overlaid onto the site-packages dir instead of actually being copied + * - Wheels are fetched from a disk cache if available. + * + * Note that loadPackages is only used in local dev for now, internally we use the full big bundle + * that contains all the packages ready to go. + */ + +import { default as LOCKFILE } from "pyodide-internal:generated/pyodide-lock.json"; +import { WORKERD_INDEX_URL } from "pyodide-internal:metadata"; +import { SITE_PACKAGES, LOAD_WHEELS_FROM_R2, getSitePackagesPath } from "pyodide-internal:setupPackages"; +import { parseTarInfo } from "pyodide-internal:tar"; +import { default as DiskCache } from "pyodide-internal:disk_cache"; +import { createTarFS } from "pyodide-internal:tarfs"; + +async function loadBundle(requirement) { + // first check if the disk cache has what we want + const filename = LOCKFILE["packages"][requirement]["file_name"]; + const cached = DiskCache.get(filename); + if (cached) { + return [requirement, cached]; + } + + // we didn't find it in the disk cache, continue with original fetch + const url = new URL(WORKERD_INDEX_URL + filename); + const response = await fetch(url); + + const arrayBuffer = await new Response(response.body.pipeThrough(new DecompressionStream("gzip"))).arrayBuffer(); + + DiskCache.put(filename, arrayBuffer); + return [requirement, arrayBuffer]; +}; + +/** + * ArrayBufferReader wraps around an arrayBuffer in a way that tar.js is able to read from + */ +class ArrayBufferReader { + constructor(arrayBuffer) { + this.arrayBuffer = arrayBuffer; + } + + read(offset, buf){ + // buf is a Uint8Array + const size = this.arrayBuffer.byteLength; + if (offset >= size || offset < 0) { + return 0; + } + let toCopy = buf.length; + if (size - offset < toCopy) { + toCopy = size - offset; + } + buf.set(new Uint8Array(this.arrayBuffer, offset, toCopy)); + return toCopy; + } +} + +export async function loadPackages(Module, requirements) { + if (!LOAD_WHEELS_FROM_R2) return; + + let loadPromises = []; + let loading = []; + for (const req of requirements) { + if (SITE_PACKAGES.loadedRequirements.has(req)) continue; + loadPromises.push(loadBundle(req)); + loading.push(req); + } + + console.log("Loading " + loading.join(", ")); + + await Promise.all(loadPromises).then((buffers) => { + for (const [requirement, buffer] of buffers) { + const reader = new ArrayBufferReader(buffer); + const [tarInfo, soFiles] = parseTarInfo(reader); + SITE_PACKAGES.addSmallBundle(tarInfo, soFiles, requirement); + } + }); + + console.log("Loaded " + loading.join(", ")); + + const tarFS = createTarFS(Module); + const path = getSitePackagesPath(Module); + const info = SITE_PACKAGES.rootInfo; + Module.FS.mount(tarFS, { info }, path); +} diff --git a/src/pyodide/internal/python.js b/src/pyodide/internal/python.js index 63e67c627a2..5eb2e0986c6 100644 --- a/src/pyodide/internal/python.js +++ b/src/pyodide/internal/python.js @@ -1,7 +1,8 @@ Error.stackTraceLimit = Infinity; import { enterJaegerSpan } from "pyodide-internal:jaeger"; import { - SITE_PACKAGES_INFO, + TRANSITIVE_REQUIREMENTS, + SITE_PACKAGES, adjustSysPath, mountLib, } from "pyodide-internal:setupPackages"; @@ -187,7 +188,7 @@ async function instantiateEmscriptenModule(emscriptenSettings) { */ async function prepareWasmLinearMemory(Module) { // Note: if we are restoring from a snapshot, runtime is not initialized yet. - mountLib(Module, SITE_PACKAGES_INFO); + mountLib(Module, SITE_PACKAGES.rootInfo); entropyMountFiles(Module); if (SHOULD_RESTORE_SNAPSHOT) { restoreSnapshot(Module); @@ -204,6 +205,7 @@ async function prepareWasmLinearMemory(Module) { } export async function loadPyodide(lockfile, indexURL) { + console.log("loading pyodide"); const emscriptenSettings = getEmscriptenSettings(lockfile, indexURL); const Module = await enterJaegerSpan("instantiate_emscripten", () => instantiateEmscriptenModule(emscriptenSettings), diff --git a/src/pyodide/internal/setupPackages.js b/src/pyodide/internal/setupPackages.js index 188b712a6ca..04a4956d08c 100644 --- a/src/pyodide/internal/setupPackages.js +++ b/src/pyodide/internal/setupPackages.js @@ -3,7 +3,6 @@ import { createTarFS } from "pyodide-internal:tarfs"; import { createMetadataFS } from "pyodide-internal:metadatafs"; import { default as LOCKFILE } from "pyodide-internal:generated/pyodide-lock.json"; import { REQUIREMENTS, WORKERD_INDEX_URL } from "pyodide-internal:metadata"; -import { patchFetch } from "pyodide-internal:builtin_wrappers"; import { simpleRunPython } from "pyodide-internal:util"; const canonicalizeNameRegex = /[-_.]+/g; @@ -23,6 +22,86 @@ const STDLIB_PACKAGES = Object.values(LOCKFILE.packages) .filter(({ install_dir }) => install_dir === "stdlib") .map(({ name }) => canonicalizePackageName(name)); +/** + * SitePackagesDir keeps track of the virtualized view of the site-packages + * directory generated for each worker. + */ +class SitePackagesDir { + constructor() { + this.rootInfo = { + children: new Map(), + mode: 0o777, + type: 5, + modtime: 0, + size: 0, + path: "", + name: "", + parts: [], + }; + this.soFiles = []; + this.loadedRequirements = new Set(); + } + + /** + * mountOverlay "overlays" a directory onto the site-packages root directory. + * All files and subdirectories in the overlay will be accessible at site-packages by the worker. + * If a file or directory already exists, an error is thrown. + * @param {TarInfo} overlayInfo The directory that is to be "copied" into site-packages + */ + mountOverlay(overlayInfo) { + overlayInfo.children.forEach((val, key) => { + if (this.rootInfo.children.has(key)) { + throw new Error( + `File/folder ${key} being written by multiple packages`, + ); + } + this.rootInfo.children.set(key, val); + }); + } + + /** + * A small bundle contains just a single package. The entire bundle will be overlaid onto site-packages. + * A small bundle can basically be thought of as a wheel. + * @param {TarInfo} tarInfo The root tarInfo for the small bundle (See tar.js) + * @param {List} soFiles A list of .so files contained in the small bundle + * @param {String} requirement The canonicalized package name this small bundle corresponds to + */ + addSmallBundle(tarInfo, soFiles, requirement) { + for (const soFile of soFiles) { + this.soFiles.push(soFile.split("/")); + } + this.mountOverlay(tarInfo); + this.loadedRequirements.add(requirement); + } + + /** + * A big bundle contains multiple packages, each package contained in a folder whose name is the canonicalized package name. + * This function overlays the requested packages onto the site-packages directory. + * @param {TarInfo} tarInfo The root tarInfo for the big bundle (See tar.js) + * @param {List} soFiles A list of .so files contained in the big bundle + * @param {List} requirements canonicalized list of packages to pick from the big bundle + */ + addBigBundle(tarInfo, soFiles, requirements) { + // add all the .so files we will need to preload from the big bundle + for (const soFile of soFiles) { + // If folder is in list of requirements include .so file in list to preload. + const [pkg, ...rest] = soFile.split("/"); + if (requirements.has(pkg)) { + this.soFiles.push(rest); + } + } + + for (const req of requirements) { + const child = tarInfo.children.get(req); + if (!child) { + throw new Error(`Requirement ${req} not found in pyodide packages tar`); + } + this.mountOverlay(child); + this.loadedRequirements.add(req); + } + } +}; + /** * This stitches together the view of the site packages directory. Each * requirement corresponds to a folder in the original tar file. For each @@ -33,52 +112,19 @@ const STDLIB_PACKAGES = Object.values(LOCKFILE.packages) * directory so we can preload them. */ export function buildSitePackages(requirements) { - const [origTarInfo, origSoFiles] = parseTarInfo(); - // We'd like to set USE_LOAD_PACKAGE = IS_WORKERD but we also build a funny - // workerd with the downstream package set. We can distinguish between them by - // looking at the contents. This uses the fact that the downstream set is - // larger, but there are a lot of differences... - const USE_LOAD_PACKAGE = origTarInfo.children.size < 10; - if (USE_LOAD_PACKAGE) { - requirements = new Set([...STDLIB_PACKAGES]); - } else { - requirements = new Set([...STDLIB_PACKAGES, ...requirements]); - } - const soFiles = []; - for (const soFile of origSoFiles) { - // If folder is in list of requirements include .so file in list to preload. - const [pkg, ...rest] = soFile.split("/"); - if (requirements.has(pkg)) { - soFiles.push(rest); - } - } - const newTarInfo = { - children: new Map(), - mode: 0o777, - type: 5, - modtime: 0, - size: 0, - path: "", - name: "", - parts: [], - }; - - for (const req of requirements) { - const child = origTarInfo.children.get(req); - if (!child) { - throw new Error(`Requirement ${req} not found in pyodide packages tar`); - } - child.children.forEach((val, key) => { - if (newTarInfo.children.has(key)) { - throw new Error( - `File/folder ${key} being written by multiple packages`, - ); - } - newTarInfo.children.set(key, val); - }); + const [bigTarInfo, bigTarSoFiles] = parseTarInfo(); + + let LOAD_WHEELS_FROM_R2 = true; + let requirementsInBigBundle = new Set([...STDLIB_PACKAGES]); + if(bigTarInfo.children.size > 10) { + LOAD_WHEELS_FROM_R2 = false; + requirements.forEach(r => requirementsInBigBundle.add(r)); } - return [newTarInfo, soFiles, USE_LOAD_PACKAGE]; + const res = new SitePackagesDir(); + res.addBigBundle(bigTarInfo, bigTarSoFiles, requirementsInBigBundle); + + return [res, LOAD_WHEELS_FROM_R2]; } /** @@ -89,23 +135,12 @@ export function buildSitePackages(requirements) { * TODO: stop using loadPackage in workerd. */ export function patchLoadPackage(pyodide) { - if (!USE_LOAD_PACKAGE) { - pyodide.loadPackage = disabledLoadPackage; - return; - } - patchFetch(new URL(WORKERD_INDEX_URL).origin); - const origLoadPackage = pyodide.loadPackage; - function loadPackage(packages, options) { - return origLoadPackage(packages, { - checkIntegrity: false, - ...options, - }); - } - pyodide.loadPackage = loadPackage; + pyodide.loadPackage = disabledLoadPackage; + return; } function disabledLoadPackage() { - throw new Error("We only use loadPackage in workerd"); + throw new Error("pyodide.loadPackage is disabled"); } /** @@ -138,7 +173,12 @@ export function mountLib(Module, info) { const site_packages = getSitePackagesPath(Module); Module.FS.mkdirTree(site_packages); Module.FS.mkdirTree("/session/metadata"); - Module.FS.mount(tarFS, { info }, site_packages); + if (!LOAD_WHEELS_FROM_R2) { + // if we are not loading additional wheels from R2, then we're done + // with site-packages and we can mount it here. Otherwise, we must mount it in + // loadPackages(). + Module.FS.mount(tarFS, { info }, site_packages); + } Module.FS.mount(mdFS, {}, "/session/metadata"); } @@ -191,5 +231,4 @@ function addPackageToLoad(lockfile, name, toLoad) { export { REQUIREMENTS }; export const TRANSITIVE_REQUIREMENTS = getTransitiveRequirements(); -export const [SITE_PACKAGES_INFO, SITE_PACKAGES_SO_FILES, USE_LOAD_PACKAGE] = - buildSitePackages(TRANSITIVE_REQUIREMENTS); +export const [SITE_PACKAGES, LOAD_WHEELS_FROM_R2] = buildSitePackages(TRANSITIVE_REQUIREMENTS); diff --git a/src/pyodide/internal/snapshot.js b/src/pyodide/internal/snapshot.js index 6968896addf..828f8b315e5 100644 --- a/src/pyodide/internal/snapshot.js +++ b/src/pyodide/internal/snapshot.js @@ -2,8 +2,7 @@ import { default as ArtifactBundler } from "pyodide-internal:artifacts"; import { default as UnsafeEval } from "internal:unsafe-eval"; import { default as DiskCache } from "pyodide-internal:disk_cache"; import { - SITE_PACKAGES_INFO, - SITE_PACKAGES_SO_FILES, + SITE_PACKAGES, getSitePackagesPath, } from "pyodide-internal:setupPackages"; import { default as TarReader } from "pyodide-internal:packages_tar_reader"; @@ -119,7 +118,7 @@ const PRELOADED_SO_FILES = []; * there. */ export function preloadDynamicLibs(Module) { - let SO_FILES_TO_LOAD = SITE_PACKAGES_SO_FILES; + let SO_FILES_TO_LOAD = SITE_PACKAGES.soFiles; if (LOADED_BASELINE_SNAPSHOT && LOADED_SNAPSHOT_VERSION === 1) { // Ideally this should be just // [[ '_lzma.so' ], [ '_ssl.so' ]] @@ -140,7 +139,7 @@ export function preloadDynamicLibs(Module) { try { const sitePackages = getSitePackagesPath(Module); for (const soFile of SO_FILES_TO_LOAD) { - let node = SITE_PACKAGES_INFO; + let node = SITE_PACKAGES.rootInfo; for (const part of soFile) { node = node.children.get(part); } diff --git a/src/pyodide/internal/tar.js b/src/pyodide/internal/tar.js index 10bd62de708..c5e25bbca8d 100644 --- a/src/pyodide/internal/tar.js +++ b/src/pyodide/internal/tar.js @@ -1,4 +1,4 @@ -import { default as Reader } from "pyodide-internal:packages_tar_reader"; +import { default as TarReader } from "pyodide-internal:packages_tar_reader"; // This is based on the info about the tar file format on wikipedia // And some trial and error with real tar files. @@ -19,7 +19,7 @@ function decodeNumber(buf, offset, size) { return parseInt(decodeField(buf, offset, size), 8); } -function decodeHeader(buf) { +function decodeHeader(buf, reader) { const nameBase = decodeField(buf, 0, 100); const namePrefix = decodeField(buf, 345, 155); let path = namePrefix + nameBase; @@ -40,10 +40,11 @@ function decodeHeader(buf) { type, parts: [], children: undefined, + reader, }; } -export function parseTarInfo() { +export function parseTarInfo(reader = TarReader) { const directories = []; const soFiles = []; const root = { @@ -55,14 +56,15 @@ export function parseTarInfo() { path: "", name: "", parts: [], + reader, }; let directory = root; const buf = new Uint8Array(512); let offset = 0; let longName = null; // if truthy, overwrites the filename of the next header while (true) { - Reader.read(offset, buf); - const info = decodeHeader(buf); + reader.read(offset, buf); + const info = decodeHeader(buf, reader); if (isNaN(info.mode)) { // Invalid mode means we're done return [root, soFiles]; @@ -87,7 +89,7 @@ export function parseTarInfo() { } if (info.type === "L") { const buf = new Uint8Array(info.size); - Reader.read(contentsOffset, buf); + reader.read(contentsOffset, buf); longName = decodeString(buf); continue; } diff --git a/src/pyodide/internal/tarfs.js b/src/pyodide/internal/tarfs.js index fc5474a8009..990c470bafd 100644 --- a/src/pyodide/internal/tarfs.js +++ b/src/pyodide/internal/tarfs.js @@ -1,4 +1,3 @@ -import { default as TarReader } from "pyodide-internal:packages_tar_reader"; import { createReadonlyFS } from "pyodide-internal:readOnlyFS"; const FSOps = { @@ -24,7 +23,7 @@ const FSOps = { return parent.info.children.get(name); }, read(stream, position, buffer) { - return TarReader.read(stream.node.contentsOffset + position, buffer); + return stream.node.info.reader.read(stream.node.contentsOffset + position, buffer); }, }; diff --git a/src/pyodide/python-entrypoint-helper.js b/src/pyodide/python-entrypoint-helper.js index 7bd45e17691..c31f25bcff7 100644 --- a/src/pyodide/python-entrypoint-helper.js +++ b/src/pyodide/python-entrypoint-helper.js @@ -8,9 +8,7 @@ import { } from "pyodide-internal:snapshot"; import { enterJaegerSpan } from "pyodide-internal:jaeger"; import { - REQUIREMENTS, TRANSITIVE_REQUIREMENTS, - USE_LOAD_PACKAGE, patchLoadPackage, } from "pyodide-internal:setupPackages"; import { @@ -23,6 +21,7 @@ import { import { reportError } from "pyodide-internal:util"; import { default as Limiter } from "pyodide-internal:limiter"; import { entropyBeforeRequest } from "pyodide-internal:topLevelEntropy/lib"; +import { loadPackages } from "pyodide-internal:loadPackage"; function pyimportMainModule(pyodide) { if (!MAIN_MODULE_NAME.endsWith(".py")) { @@ -85,14 +84,11 @@ async function applyPatch(pyodide, patchName) { export async function setupPackages(pyodide) { return await enterJaegerSpan("setup_packages", async () => { patchLoadPackage(pyodide); - if (USE_LOAD_PACKAGE) { - await pyodide.loadPackage(REQUIREMENTS); - } + await loadPackages(pyodide._module, TRANSITIVE_REQUIREMENTS); // install any extra packages into the site-packages directory, so calculate where that is. const pymajor = pyodide._module._py_version_major(); const pyminor = pyodide._module._py_version_minor(); pyodide.site_packages = `/lib/python${pymajor}.${pyminor}/site-packages`; - // Install patches as needed if (TRANSITIVE_REQUIREMENTS.has("aiohttp")) { await applyPatch(pyodide, "aiohttp"); From 74ba19f6153ffcbd40fc32c607c46c7f2e98c63e Mon Sep 17 00:00:00 2001 From: Garrett Gu Date: Mon, 22 Apr 2024 13:13:45 -0500 Subject: [PATCH 3/4] Address nits --- src/pyodide/internal/loadPackage.js | 13 ++++++------- src/pyodide/internal/python.js | 1 - src/pyodide/internal/setupPackages.js | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/pyodide/internal/loadPackage.js b/src/pyodide/internal/loadPackage.js index a21010e27a3..d856ef5d5db 100644 --- a/src/pyodide/internal/loadPackage.js +++ b/src/pyodide/internal/loadPackage.js @@ -70,13 +70,12 @@ export async function loadPackages(Module, requirements) { console.log("Loading " + loading.join(", ")); - await Promise.all(loadPromises).then((buffers) => { - for (const [requirement, buffer] of buffers) { - const reader = new ArrayBufferReader(buffer); - const [tarInfo, soFiles] = parseTarInfo(reader); - SITE_PACKAGES.addSmallBundle(tarInfo, soFiles, requirement); - } - }); + const buffers = await Promise.all(loadPromises); + for (const [requirement, buffer] of buffers) { + const reader = new ArrayBufferReader(buffer); + const [tarInfo, soFiles] = parseTarInfo(reader); + SITE_PACKAGES.addSmallBundle(tarInfo, soFiles, requirement); + } console.log("Loaded " + loading.join(", ")); diff --git a/src/pyodide/internal/python.js b/src/pyodide/internal/python.js index 5eb2e0986c6..ee99a276796 100644 --- a/src/pyodide/internal/python.js +++ b/src/pyodide/internal/python.js @@ -205,7 +205,6 @@ async function prepareWasmLinearMemory(Module) { } export async function loadPyodide(lockfile, indexURL) { - console.log("loading pyodide"); const emscriptenSettings = getEmscriptenSettings(lockfile, indexURL); const Module = await enterJaegerSpan("instantiate_emscripten", () => instantiateEmscriptenModule(emscriptenSettings), diff --git a/src/pyodide/internal/setupPackages.js b/src/pyodide/internal/setupPackages.js index 04a4956d08c..3190aaeb14a 100644 --- a/src/pyodide/internal/setupPackages.js +++ b/src/pyodide/internal/setupPackages.js @@ -116,7 +116,7 @@ export function buildSitePackages(requirements) { let LOAD_WHEELS_FROM_R2 = true; let requirementsInBigBundle = new Set([...STDLIB_PACKAGES]); - if(bigTarInfo.children.size > 10) { + if (bigTarInfo.children.size > 10) { LOAD_WHEELS_FROM_R2 = false; requirements.forEach(r => requirementsInBigBundle.add(r)); } @@ -140,7 +140,7 @@ export function patchLoadPackage(pyodide) { } function disabledLoadPackage() { - throw new Error("pyodide.loadPackage is disabled"); + throw new Error("pyodide.loadPackage is disabled because packages are encoded in the binary"); } /** From 5559c51ad7a2b7e4ff16fb3ecc0113f57b342115 Mon Sep 17 00:00:00 2001 From: Garrett Gu Date: Mon, 6 May 2024 16:29:13 -0500 Subject: [PATCH 4/4] Fix circular import issue --- src/pyodide/internal/snapshot.js | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/pyodide/internal/snapshot.js b/src/pyodide/internal/snapshot.js index 828f8b315e5..52fd0f3b94c 100644 --- a/src/pyodide/internal/snapshot.js +++ b/src/pyodide/internal/snapshot.js @@ -24,12 +24,6 @@ let LOADED_BASELINE_SNAPSHOT; * `pyodide.loadPackage`. In trade we add memory snapshots here. */ -/** - * _createPyodideModule and pyodideWasmModule together are produced by the - * Emscripten linker - */ -import { _createPyodideModule } from "pyodide-internal:generated/pyodide.asm"; - const TOP_LEVEL_SNAPSHOT = ArtifactBundler.isEwValidating() || SHOULD_SNAPSHOT_TO_DISK; const SHOULD_UPLOAD_SNAPSHOT =