From aa190bf8214790f61a2687dc749f1c3e196ae8c2 Mon Sep 17 00:00:00 2001
From: Hood Chatham <roberthoodchatham@gmail.com>
Date: Wed, 3 Apr 2024 15:04:21 +0200
Subject: [PATCH] Python: Make top level random() raise

We don't have access to entropy outside of request scope, so `random()` will
give poor results. Raise an error if it is called.
---
 src/pyodide/BUILD.bazel                       |  10 +-
 src/pyodide/internal/builtin_wrappers.js      |  18 +-
 src/pyodide/internal/python.js                |  25 ++-
 src/pyodide/internal/snapshot.js              |  31 +--
 .../internal/topLevelEntropy/__init__.py      |   0
 .../topLevelEntropy/entropy_import_context.py | 133 +++++++++++++
 .../topLevelEntropy/entropy_patches.py        | 134 +++++++++++++
 .../topLevelEntropy/import_patch_manager.py   | 163 ++++++++++++++++
 src/pyodide/internal/topLevelEntropy/lib.js   | 177 ++++++++++++++++++
 src/pyodide/internal/util.js                  |   1 +
 src/pyodide/python-entrypoint-helper.js       |  28 +--
 src/workerd/server/tests/python/BUILD.bazel   |  11 ++
 .../server/tests/python/random/random.wd-test |  15 ++
 .../server/tests/python/random/worker.py      |  57 ++++++
 14 files changed, 744 insertions(+), 59 deletions(-)
 create mode 100644 src/pyodide/internal/topLevelEntropy/__init__.py
 create mode 100644 src/pyodide/internal/topLevelEntropy/entropy_import_context.py
 create mode 100644 src/pyodide/internal/topLevelEntropy/entropy_patches.py
 create mode 100644 src/pyodide/internal/topLevelEntropy/import_patch_manager.py
 create mode 100644 src/pyodide/internal/topLevelEntropy/lib.js
 create mode 100644 src/workerd/server/tests/python/random/random.wd-test
 create mode 100644 src/workerd/server/tests/python/random/worker.py

diff --git a/src/pyodide/BUILD.bazel b/src/pyodide/BUILD.bazel
index 453e61f56ac..faf31e05ae8 100644
--- a/src/pyodide/BUILD.bazel
+++ b/src/pyodide/BUILD.bazel
@@ -117,14 +117,14 @@ REPLACEMENTS = [
         "Date.now",
         "monotonicDateNow",
     ],
-    [
-        "crypto.getRandomValues",
-        "getRandomValues"
-    ],
     [
         "reportUndefinedSymbols()",
         "reportUndefinedSymbolsNoOp()"
     ],
+    [
+        "crypto.getRandomValues(",
+        "getRandomValues(Module, ",
+    ]
 ]
 
 load("//:build/pyodide_bucket.bzl", "PYODIDE_PACKAGE_BUCKET_URL")
@@ -153,12 +153,14 @@ wd_js_bundle(
     internal_data_modules = ["generated/python_stdlib.zip"] + glob([
         "internal/*.py",
         "internal/patches/*.py",
+        "internal/topLevelEntropy/*.py",
     ]),
     internal_json_modules = ["generated/pyodide-lock.json", "generated/pyodide-bucket.json"],
     internal_modules = [
         "generated/pyodide.asm.js",
     ] + glob([
         "internal/*.js",
+        "internal/topLevelEntropy/*.js",
     ]),
     internal_wasm_modules = ["generated/pyodide.asm.wasm"],
     schema_id = "0xbcc8f57c63814005",
diff --git a/src/pyodide/internal/builtin_wrappers.js b/src/pyodide/internal/builtin_wrappers.js
index b4006467897..38bc20b9328 100644
--- a/src/pyodide/internal/builtin_wrappers.js
+++ b/src/pyodide/internal/builtin_wrappers.js
@@ -1,5 +1,6 @@
 import { default as UnsafeEval } from "internal:unsafe-eval";
 import { default as DiskCache } from "pyodide-internal:disk_cache";
+export { getRandomValues } from "pyodide-internal:topLevelEntropy/lib";
 
 let lastTime;
 let lastDelta = 0;
@@ -19,23 +20,6 @@ export function monotonicDateNow() {
   return now + lastDelta;
 }
 
-/**
- * We initialize Python at top level, but it tries to initialize the random seed with
- * crypto.getRandomValues which will fail at top level. So we don't produce any entropy the first
- * time around and we reseed the rng in the first request context before executing user code.
- */
-export function getRandomValues(arr) {
-  try {
-    return crypto.getRandomValues(arr);
-  } catch (e) {
-    if (e.message.includes("Disallowed operation called within global scope")) {
-      // random.seed() can't work at startup. We'll seed again under the request scope.
-      return arr;
-    }
-    throw e;
-  }
-}
-
 /**
  * First check that the callee is what we expect, then use `UnsafeEval` to
  * construct a `WasmModule`.
diff --git a/src/pyodide/internal/python.js b/src/pyodide/internal/python.js
index b598a379b38..63e67c627a2 100644
--- a/src/pyodide/internal/python.js
+++ b/src/pyodide/internal/python.js
@@ -1,3 +1,4 @@
+Error.stackTraceLimit = Infinity;
 import { enterJaegerSpan } from "pyodide-internal:jaeger";
 import {
   SITE_PACKAGES_INFO,
@@ -12,6 +13,11 @@ import {
   maybeSetupSnapshotUpload,
   restoreSnapshot,
 } from "pyodide-internal:snapshot";
+import {
+  entropyMountFiles,
+  entropyAfterRuntimeInit,
+  entropyBeforeTopLevel,
+} from "pyodide-internal:topLevelEntropy/lib";
 
 /**
  * This file is a simplified version of the Pyodide loader:
@@ -121,8 +127,9 @@ function getEmscriptenSettings(lockfile, indexURL) {
     // environment variables go here
     env: {
       HOME: "/session",
-      // We don't have access to cryptographic rng at startup so we cannot support hash
-      // randomization. Setting `PYTHONHASHSEED` disables it.
+      // We don't have access to entropy at startup so we cannot support hash
+      // randomization. Setting `PYTHONHASHSEED` disables it. See further
+      // discussion in topLevelEntropy/entropy_patches.py
       PYTHONHASHSEED: "111",
     },
     // This is the index that we use as the base URL to fetch the wheels.
@@ -177,20 +184,23 @@ async function instantiateEmscriptenModule(emscriptenSettings) {
  * APIs, we call this function. If `MEMORY` is defined, then we will have passed
  * `noInitialRun: true` and so the C runtime is in an incoherent state until we
  * restore the linear memory from the snapshot.
- *
- * Returns `true` when existing memory snapshot was loaded.
  */
 async function prepareWasmLinearMemory(Module) {
   // Note: if we are restoring from a snapshot, runtime is not initialized yet.
   mountLib(Module, SITE_PACKAGES_INFO);
+  entropyMountFiles(Module);
   if (SHOULD_RESTORE_SNAPSHOT) {
     restoreSnapshot(Module);
-    // Don't call adjustSysPath here: it was called in the other branch when we
-    // were creating the snapshot so the outcome of that is already baked in.
+  }
+  // entropyAfterRuntimeInit adjusts JS state ==> always needs to be called.
+  entropyAfterRuntimeInit(Module);
+  if (SHOULD_RESTORE_SNAPSHOT) {
     return;
   }
+  // The effects of these are purely in Python state so they only need to be run
+  // if we didn't restore a snapshot.
+  entropyBeforeTopLevel(Module);
   adjustSysPath(Module);
-  maybeSetupSnapshotUpload(Module);
 }
 
 export async function loadPyodide(lockfile, indexURL) {
@@ -201,6 +211,7 @@ export async function loadPyodide(lockfile, indexURL) {
   await enterJaegerSpan("prepare_wasm_linear_memory", () =>
     prepareWasmLinearMemory(Module),
   );
+  maybeSetupSnapshotUpload(Module);
 
   // Finish setting up Pyodide's ffi so we can use the nice Python interface
   await enterJaegerSpan("finalize_bootstrap", Module.API.finalizeBootstrap);
diff --git a/src/pyodide/internal/snapshot.js b/src/pyodide/internal/snapshot.js
index c0c0b336120..6968896addf 100644
--- a/src/pyodide/internal/snapshot.js
+++ b/src/pyodide/internal/snapshot.js
@@ -15,6 +15,8 @@ import {
 } from "pyodide-internal:metadata";
 import { reportError, simpleRunPython } from "pyodide-internal:util";
 
+let LOADED_BASELINE_SNAPSHOT;
+
 /**
  * This file is a simplified version of the Pyodide loader:
  * https://github.com/pyodide/pyodide/blob/main/src/js/pyodide.ts
@@ -29,8 +31,10 @@ import { reportError, simpleRunPython } from "pyodide-internal:util";
  */
 import { _createPyodideModule } from "pyodide-internal:generated/pyodide.asm";
 
-const TOP_LEVEL_SNAPSHOT = ArtifactBundler.isEwValidating() || SHOULD_SNAPSHOT_TO_DISK;
-const SHOULD_UPLOAD_SNAPSHOT = ArtifactBundler.isEnabled() || TOP_LEVEL_SNAPSHOT;
+const TOP_LEVEL_SNAPSHOT =
+  ArtifactBundler.isEwValidating() || SHOULD_SNAPSHOT_TO_DISK;
+const SHOULD_UPLOAD_SNAPSHOT =
+  ArtifactBundler.isEnabled() || TOP_LEVEL_SNAPSHOT;
 
 /**
  * Global variable for the memory snapshot. On the first run we stick a copy of
@@ -116,10 +120,7 @@ const PRELOADED_SO_FILES = [];
  */
 export function preloadDynamicLibs(Module) {
   let SO_FILES_TO_LOAD = SITE_PACKAGES_SO_FILES;
-  if (
-    IS_CREATING_BASELINE_SNAPSHOT ||
-    DSO_METADATA?.settings?.baselineSnapshot
-  ) {
+  if (LOADED_BASELINE_SNAPSHOT && LOADED_SNAPSHOT_VERSION === 1) {
     // Ideally this should be just
     // [[ '_lzma.so' ], [ '_ssl.so' ]]
     // but we put a few more because we messed up the memory snapshot...
@@ -129,7 +130,12 @@ export function preloadDynamicLibs(Module) {
       ["_sqlite3.so"],
       ["_ssl.so"],
     ];
-    // SO_FILES_TO_LOAD = [[ '_lzma.so' ], [ '_ssl.so' ]];
+  }
+  if (
+    IS_CREATING_BASELINE_SNAPSHOT ||
+    (LOADED_BASELINE_SNAPSHOT && LOADED_SNAPSHOT_VERSION === 2)
+  ) {
+    SO_FILES_TO_LOAD = [["_lzma.so"], ["_ssl.so"]];
   }
   try {
     const sitePackages = getSitePackagesPath(Module);
@@ -313,8 +319,9 @@ export function maybeSetupSnapshotUpload(Module) {
 
 // "\x00snp"
 const SNAPSHOT_MAGIC = 0x706e7300;
-const SNAPSHOT_VERSION = 1;
+const CREATE_SNAPSHOT_VERSION = 2;
 const HEADER_SIZE = 4 * 4;
+export let LOADED_SNAPSHOT_VERSION = undefined;
 
 /**
  * Encode heap and dsoJSON into the memory snapshot artifact that we'll upload
@@ -332,7 +339,7 @@ function encodeSnapshot(heap, dsoJSON) {
   );
   const uint32View = new Uint32Array(toUpload.buffer);
   uint32View[0] = SNAPSHOT_MAGIC;
-  uint32View[1] = SNAPSHOT_VERSION;
+  uint32View[1] = CREATE_SNAPSHOT_VERSION;
   uint32View[2] = snapshotOffset;
   uint32View[3] = jsonLength;
   toUpload.subarray(snapshotOffset).set(heap);
@@ -347,9 +354,9 @@ function decodeSnapshot() {
   let offset = 0;
   MEMORY_SNAPSHOT_READER.readMemorySnapshot(offset, buf);
   offset += 8;
-  let snapshotVersion = 0;
+  LOADED_SNAPSHOT_VERSION = 0;
   if (buf[0] == SNAPSHOT_MAGIC) {
-    snapshotVersion = buf[1];
+    LOADED_SNAPSHOT_VERSION = buf[1];
     MEMORY_SNAPSHOT_READER.readMemorySnapshot(offset, buf);
     offset += 8;
   }
@@ -361,6 +368,7 @@ function decodeSnapshot() {
   MEMORY_SNAPSHOT_READER.readMemorySnapshot(offset, jsonBuf);
   const jsonTxt = new TextDecoder().decode(jsonBuf);
   DSO_METADATA = JSON.parse(jsonTxt);
+  LOADED_BASELINE_SNAPSHOT = Number(DSO_METADATA?.settings?.baselineSnapshot);
   READ_MEMORY = function (Module) {
     // restore memory from snapshot
     MEMORY_SNAPSHOT_READER.readMemorySnapshot(snapshotOffset, Module.HEAP8);
@@ -423,5 +431,6 @@ export function maybeStoreMemorySnapshot() {
     ArtifactBundler.storeMemorySnapshot(getMemoryToUpload());
   } else if (SHOULD_SNAPSHOT_TO_DISK) {
     DiskCache.put("snapshot.bin", getMemoryToUpload());
+    console.log("Saved snapshot to disk");
   }
 }
diff --git a/src/pyodide/internal/topLevelEntropy/__init__.py b/src/pyodide/internal/topLevelEntropy/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/pyodide/internal/topLevelEntropy/entropy_import_context.py b/src/pyodide/internal/topLevelEntropy/entropy_import_context.py
new file mode 100644
index 00000000000..74f1db22d8f
--- /dev/null
+++ b/src/pyodide/internal/topLevelEntropy/entropy_import_context.py
@@ -0,0 +1,133 @@
+"""
+Manage import context for modules that use getentropy() at startup.
+
+We install a metapath finder in import_patch_manager.py which executes the
+module in the context manager returned by
+get_entropy_import_context(module_name).
+
+This module defines get_entropy_import_context which
+
+"random" and "numpy.random.mtrand" also have some additional patches that need
+to be installed as part of their import context to prevent top level crashes.
+
+Other rust packages are likely to need similar treatment to pydantic_core.
+"""
+
+from contextlib import contextmanager
+from array import array
+from .import_patch_manager import block_calls
+
+import sys
+
+RUST_PACKAGES = ["pydantic_core", "tiktoken"]
+MODULES_TO_PATCH = ["random", "numpy.random", "numpy.random.mtrand"] + RUST_PACKAGES
+
+# Control number of allowed entropy calls.
+
+ALLOWED_ENTROPY_CALLS = array("b", [0])
+
+
+def get_bad_entropy_flag():
+    # simpleRunPython reads out stderr. We put the address there so we can fish it out...
+    # We could use ctypes instead of array but ctypes weighs an extra 100kb compared to array.
+    print(ALLOWED_ENTROPY_CALLS.buffer_info()[0], file=sys.stderr)
+
+
+def is_bad_entropy_enabled():
+    """This is used in entropy_patches.py to let calls to disabled functions
+    through if we are allowing bad entropy
+    """
+    return ALLOWED_ENTROPY_CALLS[0] > 0
+
+
+@contextmanager
+def allow_bad_entropy_calls(n):
+    ALLOWED_ENTROPY_CALLS[0] = n
+    yield
+    if ALLOWED_ENTROPY_CALLS[0] > 0:
+        raise RuntimeError(
+            f"{ALLOWED_ENTROPY_CALLS[0]} unexpected leftover getentropy calls "
+        )
+
+
+# Module instantiation context managers
+
+
+def get_entropy_import_context(name):
+    """Look up the import context.
+
+    If there is a function called <pkg_name>_context, we'll use that. Otherwise,
+    we have a default for rust packages. (Currently only used for tiktoken).
+    """
+    if name not in MODULES_TO_PATCH:
+        return None
+    funcname = name.replace(".", "_").replace("-", "_") + "_context"
+    res = globals().get(funcname, None)
+    if res:
+        return res
+    if name in RUST_PACKAGES:
+        # Initial import needs one entropy call to initialize std::collections::HashMap hash seed
+        return rust_package_context
+    raise Exception(f"Missing context for {name}")
+
+
+@contextmanager
+def rust_package_context(module):
+    """Rust packages need one entropy call if they create a rust hash map at
+    init time."""
+    with allow_bad_entropy_calls(1):
+        yield
+
+
+@contextmanager
+def random_context(module):
+    """Importing random calls getentropy() 10 times it seems"""
+    with allow_bad_entropy_calls(10):
+        yield
+    # Block calls to functions that use the bad random seed we produced from the
+    # ten getentropy() calls. Instantiating Random with a given seed is fine,
+    # instantiating it without a seed will call getentropy() and fail.
+    # Instantiating SystemRandom is fine, calling it's methods will call
+    # getentropy() and fail.
+    block_calls(module, allowlist=["Random", "SystemRandom"])
+
+
+@contextmanager
+def numpy_random_context(module):
+    """numpy.random doesn't call getentropy() itself, but we want to block calls
+    that might use the bad seed.
+
+    TODO: Maybe there are more calls we can whitelist?
+    TODO: Is it not enough to just block numpy.random.mtrand calls?
+    """
+    yield
+    # Calling default_rng() with a given seed is fine, calling it without a seed
+    # will call getentropy() and fail.
+    block_calls(module, allowlist=["default_rng"])
+
+
+@contextmanager
+def numpy_random_mtrand_context(module):
+    # numpy.random.mtrand calls secrets.randbits at top level to seed itself.
+    # This will fail if we don't let it through.
+    with allow_bad_entropy_calls(1):
+        yield
+    # Block calls until we get a chance to replace the bad random seed.
+    block_calls(module)
+
+
+@contextmanager
+def pydantic_core_context(module):
+    try:
+        # Initial import needs one entropy call to initialize std::collections::HashMap hash seed
+        with allow_bad_entropy_calls(1):
+            yield
+    finally:
+        try:
+            with allow_bad_entropy_calls(1):
+                # validate_core_schema makes an ahash::AHashMap which makes another entropy call for
+                # its hash seed. It will throw an error but only after making the needed entropy
+                # call.
+                module.validate_core_schema(None)
+        except module.SchemaError:
+            pass
diff --git a/src/pyodide/internal/topLevelEntropy/entropy_patches.py b/src/pyodide/internal/topLevelEntropy/entropy_patches.py
new file mode 100644
index 00000000000..93e681b6523
--- /dev/null
+++ b/src/pyodide/internal/topLevelEntropy/entropy_patches.py
@@ -0,0 +1,134 @@
+"""
+Handle the top level getentropy() mess:
+
+The C stdlib function getentropy() `getentropy()` calls
+`crpyto.getRandomValues()` but this throws an error at top level which causes a
+fatal error.
+
+Goals:
+
+1. Avoid top-level calls to the C stdlib function getentropy(), these fatally
+   fail. Patch these to raise Python errors instead.
+2. Allow top level import of `random` and `numpy.random` modules. These seed
+   themselves with the functions that we patched in step 1, we temporarily
+   replace the `getentropy()` calls with no-ops to let them through.
+3. Install wrapper modules at top level that only allow calls to a whitelisted
+   set of functions from `random` and `numpy.random` that don't use the bad
+   seeds that came from step 2.
+4. Put it all back.
+5. Reseed the rng before entering the request scope for the first time.
+
+Steps 1, part of 4, and 5 are handled here, steps 2, 3, and part of 4 are
+handled in _cloudflare_random_overlays.
+"""
+
+import _random
+import sys
+import os
+from functools import wraps
+
+from .entropy_import_context import is_bad_entropy_enabled, get_entropy_import_context
+from .import_patch_manager import (
+    install_import_patch_manager,
+    remove_import_patch_manager,
+)
+
+IN_REQUEST_CONTEXT = False
+
+
+def should_allow_entropy_call():
+    """This helps us raise Python errors rather than fatal errors in some cases.
+
+    It doesn't really matter that much since we're not likely to recover from
+    these anyways but it feels better.
+    """
+    # Allow if we've either entered request context or if we've temporarily enabled entropy.
+    return IN_REQUEST_CONTEXT or is_bad_entropy_enabled()
+
+
+# Step 1.
+#
+# Prevent calls to getentropy(). The intended way for `getentropy()` to fail is to set an EIO error,
+# which turns into a Python OSError, so we raise this same error so that if we patch `getentropy`
+# from the Emscripten C stdlib we can remove these patches without changing the behavior.
+
+EIO = 29
+
+orig_urandom = os.urandom
+
+
+@wraps(orig_urandom)
+def patch_urandom(*args):
+    if not should_allow_entropy_call():
+        raise OSError(EIO, "Cannot get entropy outside of request context")
+    return orig_urandom(*args)
+
+
+def disable_urandom():
+    """
+    Python os.urandom() calls C getentropy() which calls JS crypto.getRandomValues() which throws at
+    top level, fatally crashing the interpreter.
+
+    TODO: Patch Emscripten's getentropy() to return EIO if `crypto.getRandomValues()` throws. Then
+    we can remove this.
+    """
+    os.urandom = patch_urandom
+
+
+def restore_urandom():
+    os.urandom = orig_urandom
+
+
+orig_Random_seed = _random.Random.seed
+
+
+@wraps(orig_Random_seed)
+def patched_seed(self, val):
+    """
+    Random.seed calls _PyOs_URandom which will fatally fail in top level. Prevent this by raising a
+    RuntimeError instead.
+    """
+    if val is None and not should_allow_entropy_call():
+        raise OSError(EIO, "Cannot get entropy outside of request context")
+    return orig_Random_seed(self, val)
+
+
+def disable_random_seed():
+    # Install patch to block calls to PyOs_URandom
+    _random.Random.seed = patched_seed
+
+
+def restore_random_seed():
+    # Restore original random seed behavior
+    _random.Random.seed = orig_Random_seed
+
+
+def reseed_rng():
+    """
+    Step 5: Have to reseed randomness in the IoContext of the first request since we gave a low
+    quality seed when it was seeded at top level.
+    """
+    from random import seed
+
+    seed()
+
+    if "numpy.random" in sys.modules:
+        from numpy.random import seed
+
+        seed()
+
+
+def before_top_level():
+    disable_urandom()
+    disable_random_seed()
+    install_import_patch_manager(get_entropy_import_context)
+
+
+def before_first_request():
+    global IN_REQUEST_CONTEXT
+
+    IN_REQUEST_CONTEXT = True
+    restore_urandom()
+    restore_random_seed()
+    remove_import_patch_manager()
+    reseed_rng()
diff --git a/src/pyodide/internal/topLevelEntropy/import_patch_manager.py b/src/pyodide/internal/topLevelEntropy/import_patch_manager.py
new file mode 100644
index 00000000000..2599cdda23f
--- /dev/null
+++ b/src/pyodide/internal/topLevelEntropy/import_patch_manager.py
@@ -0,0 +1,163 @@
+"""
+A metapath finder which calls get_import_context(module_name). If it returns a
+value that is not None, this is interpreted as a context manager that should be
+used when executing the module top level scope.
+
+When we're done, we put back the original module. The wrapper module and wrapper
+stubs will persist in the wild, so we need to make sure they behave the same way
+as the originals after we put them back. This is controlled by the
+IN_REQUEST_CONTEXT variable.
+"""
+
+from functools import wraps
+import sys
+
+
+class PatchLoader:
+    """Loader that calls the original loader in the given context manager"""
+
+    def __init__(self, orig_loader, import_context):
+        self.orig_loader = orig_loader
+        self.import_context = import_context
+
+    def __getattr__(self, name):
+        return getattr(self.orig_loader, name)
+
+    def exec_module(self, module):
+        with self.import_context(module):
+            self.orig_loader.exec_module(module)
+
+
+class PatchFinder:
+    """Finder that returns our PatchLoader if get_import_context returns an import
+    context for the module. Otherwise, return None.
+    """
+
+    def __init__(self, get_import_context):
+        self.get_import_context = get_import_context
+
+    def invalidate_caches(self):
+        pass
+
+    def find_spec(
+        self,
+        fullname: str,
+        path,
+        target,
+    ):
+        import_context = self.get_import_context(fullname)
+        if not import_context:
+            # Not ours
+            return None
+
+        for finder in sys.meta_path:
+            if isinstance(finder, PatchFinder):
+                # Avoid infinite recurse. Presumably this is the first entry.
+                continue
+            spec = finder.find_spec(fullname, path, target)
+            if spec:
+                # Found original module spec
+                break
+        else:
+            # Not found. This is going to be an ImportError.
+            return None
+        # Overwrite the loader with our wrapped loader
+        spec.loader = PatchLoader(spec.loader, import_context)
+        return spec
+
+    @staticmethod
+    def install(get_import_context):
+        sys.meta_path.insert(0, PatchFinder(get_import_context))
+
+    @staticmethod
+    def remove():
+        for idx, val in enumerate(sys.meta_path):
+            if isinstance(val, PatchFinder):
+                break
+        del sys.meta_path[idx]
+
+
+def install_import_patch_manager(get_import_context):
+    PatchFinder.install(get_import_context)
+
+
+def remove_import_patch_manager():
+    PatchFinder.remove()
+    unblock_calls()
+
+
+# We remove the metapath entry and replace the patched sys.modules entries with
+# the original modules before the request context, but the patched copies can
+# still be used from top level imports. When IN_REQUEST_CONTEXT is True, we need
+# to make sure that our patches behave like the original imports.
+IN_REQUEST_CONTEXT = False
+# Keep track of the unblocked modules so we can put them backk into sys.modules
+# when we're done.
+ORIG_MODULES = {}
+
+
+def block_calls(module, *, allowlist=[]):
+    # Called from the import context for modules that need to block calls.
+    sys.modules[module.__name__] = BlockedCallModule(module, allowlist)
+    ORIG_MODULES[module.__name__] = module
+
+
+def unblock_calls():
+    # Remove the patches when we're ready to enable entropy calls.
+    global IN_REQUEST_CONTEXT
+
+    IN_REQUEST_CONTEXT = True
+    for name, val in ORIG_MODULES.items():
+        sys.modules[name] = val
+
+
+class BlockedCallModule:
+    """A proxy class that wraps a module that we want to block calls to
+
+    Attribute access is passed on to the original module but if the result is a
+    callable that isn't in the allow list, we wrap it with a function that
+    raises an error unless IN_REQUEST_CONTEXT is true.
+
+    Note that because we define __getattribute__ and __setattr__, we cannot do
+    direct reads or assignments e.g., `self.a = 1`. This risks recursion errors
+    if there is a typo. Instead, we have to call super().__setattr__.
+
+    This has the advantage that it avoids name clashes if the proxied module
+    actually defines variables called _mod or _allow_list.
+    """
+
+    def __init__(self, module, allowlist):
+        super().__setattr__("_mod", module)
+        super().__setattr__("_allow_list", allowlist)
+
+    def __getattribute__(self, key):
+        mod = super().__getattribute__("_mod")
+        orig = getattr(mod, key)
+        if IN_REQUEST_CONTEXT:
+            return orig
+        if not callable(orig):
+            return orig
+
+        if key in super().__getattribute__("_allow_list"):
+            return orig
+
+        # If we aren't in a request scope, the value is a callable, and it's not in the allow_list,
+        # return a wrapper that raises an error if it's called before entering the request scope.
+        # TODO: this doesn't wrap classes correctly, does it matter?
+        @wraps(orig)
+        def wrapper(*args, **kwargs):
+            if not IN_REQUEST_CONTEXT:
+                raise RuntimeError(
+                    f"Cannot use {mod.__name__}.{key}() outside of request context"
+                )
+            return orig(*args, **kwargs)
+
+        return wrapper
+
+    def __setattr__(self, key, val):
+        mod = super().__getattribute__("_mod")
+        setattr(mod, key, val)
+
+    def __dir__(self):
+        mod = super().__getattribute__("_mod")
+        return dir(mod)
diff --git a/src/pyodide/internal/topLevelEntropy/lib.js b/src/pyodide/internal/topLevelEntropy/lib.js
new file mode 100644
index 00000000000..0170b77f791
--- /dev/null
+++ b/src/pyodide/internal/topLevelEntropy/lib.js
@@ -0,0 +1,177 @@
+/**
+ * Handle the top level getentropy() mess. See entropy_patches.py which is the
+ * main file for the entropy patches.
+ *
+ * This file installs the relevant files and calls the exports from
+ * entropy_patches.py. setupShouldAllowBadEntropy reads out the address of the
+ * byte that we use to control calls to crypto.getRandomValues from Python.
+ */
+
+import { default as entropyPatches } from "pyodide-internal:topLevelEntropy/entropy_patches.py";
+import { default as entropyImportContext } from "pyodide-internal:topLevelEntropy/entropy_import_context.py";
+import { default as importPatchManager } from "pyodide-internal:topLevelEntropy/import_patch_manager.py";
+import { IS_TRACING } from "pyodide-internal:metadata";
+import { LOADED_SNAPSHOT_VERSION } from "pyodide-internal:snapshot";
+import { simpleRunPython } from "pyodide-internal:util";
+
+// TODO: When we've updated all the snapshots, remove this.
+const SHOULD_GATE_ENTROPY =
+  !IS_TRACING &&
+  (LOADED_SNAPSHOT_VERSION === undefined || LOADED_SNAPSHOT_VERSION === 2);
+
+let allowed_entropy_calls_addr;
+
+/**
+ * Set up a byte for communication between JS and Python.
+ *
+ * We make an array in Python and then get its address in JavaScript so
+ * shouldAllowBadEntropy can check / write back the value
+ */
+function setupShouldAllowBadEntropy(Module) {
+  // get_bad_entropy_flag prints the address we want into stderr which is returned into res.
+  // We parse this as an integer.
+  const res = simpleRunPython(
+    Module,
+    "from _cloudflare.entropy_import_context import get_bad_entropy_flag;" +
+      "get_bad_entropy_flag();" +
+      "del get_bad_entropy_flag",
+  );
+  allowed_entropy_calls_addr = Number(res);
+}
+
+function shouldAllowBadEntropy(Module) {
+  if (!SHOULD_GATE_ENTROPY) {
+    return true;
+  }
+  const val = Module.HEAP8[allowed_entropy_calls_addr];
+  if (val) {
+    Module.HEAP8[allowed_entropy_calls_addr]--;
+    return true;
+  }
+  return false;
+}
+
+/**
+ * Some packages need hash or random seeds at import time. We carefully track
+ * how much bad entropy we're giving everyone so that hopefully none of it ends
+ * up in a place where the end user needed good entropy. In particular, we think
+ * it's acceptable to give poor entropy for hash seeds but not for random seeds.
+ * The random libraries are allowed to initialize themselves with a bad seed but
+ * we disable them until we have a chance to reseed.
+ *
+ * See entropy_import_context.py where `allow_bad_entropy_calls` is used to dole
+ * out the bad entropy.
+ */
+export function getRandomValues(Module, arr) {
+  try {
+    return crypto.getRandomValues(arr);
+  } catch (e) {
+    if (
+      !e.message.includes("Disallowed operation called within global scope")
+    ) {
+      Module._dump_traceback();
+      throw e;
+    }
+    if (!shouldAllowBadEntropy(Module)) {
+      Module._dump_traceback();
+      throw e;
+    }
+    // "entropy" in the test suite is a bunch of 42's. Good to use a readily identifiable pattern
+    // here which is different than the test suite.
+    arr.fill(43);
+  }
+}
+
+/**
+ * We call this regardless of whether we are restoring from a snapshot or not,
+ * after instantiating the Emscripten module but before restoring the snapshot.
+ * Hypothetically, we could skip it for new dedicated snapshots.
+ */
+export function entropyMountFiles(Module) {
+  Module.FS.mkdir(`/lib/python3.12/site-packages/_cloudflare`);
+  Module.FS.writeFile(
+    `/lib/python3.12/site-packages/_cloudflare/__init__.py`,
+    new Uint8Array(0),
+    { canOwn: true },
+  );
+  Module.FS.writeFile(
+    `/lib/python3.12/site-packages/_cloudflare/entropy_patches.py`,
+    new Uint8Array(entropyPatches),
+    { canOwn: true },
+  );
+  Module.FS.writeFile(
+    `/lib/python3.12/site-packages/_cloudflare/entropy_import_context.py`,
+    new Uint8Array(entropyImportContext),
+    { canOwn: true },
+  );
+  Module.FS.writeFile(
+    `/lib/python3.12/site-packages/_cloudflare/import_patch_manager.py`,
+    new Uint8Array(importPatchManager),
+    { canOwn: true },
+  );
+}
+
+/**
+ * This prepares us to execute the top level scope. It changes JS state so it
+ * needs to be called whether restoring snapshot or not. We have to call this
+ * after the runtime is ready, so after restoring the snapshot in the snapshot
+ * branch and after entropyMountFiles in the no-snapshot branch.
+ */
+export function entropyAfterRuntimeInit(Module) {
+  setupShouldAllowBadEntropy(Module);
+}
+
+/**
+ * This prepares us to execute the top level scope. It changes only Python state
+ * so it doesn't need to be called when restoring from snapshot.
+ */
+export function entropyBeforeTopLevel(Module) {
+  if (!SHOULD_GATE_ENTROPY) {
+    return;
+  }
+  simpleRunPython(
+    Module,
+    `
+from _cloudflare.entropy_patches import before_top_level
+before_top_level()
+del before_top_level
+`,
+  );
+}
+
+let isReady = false;
+/**
+ * Called to reseed rngs and turn off blocks that prevent access to rng APIs.
+ */
+export function entropyBeforeRequest(Module) {
+  if (isReady) {
+    // I think this is only ever called once, but we guard it just to be sure.
+    return;
+  }
+  isReady = true;
+  if (SHOULD_GATE_ENTROPY) {
+    simpleRunPython(
+      Module,
+      `
+from _cloudflare.entropy_patches import before_first_request
+before_first_request()
+del before_first_request
+    `,
+    );
+  } else {
+    // If we shouldn't gate entropy, we just need to reseed_rng. We first have
+    // to call invalidate_caches b/c the snapshot doesn't know about
+    // _cloudflare.entropy_patches.
+    simpleRunPython(
+      Module,
+      `
+from importlib import invalidate_caches
+invalidate_caches()
+del invalidate_caches
+from _cloudflare.entropy_patches import reseed_rng
+reseed_rng()
+del reseed_rng
+    `,
+    );
+  }
+}
diff --git a/src/pyodide/internal/util.js b/src/pyodide/internal/util.js
index 9f775d299d2..f7b3b62ab55 100644
--- a/src/pyodide/internal/util.js
+++ b/src/pyodide/internal/util.js
@@ -41,4 +41,5 @@ export function simpleRunPython(emscriptenModule, code) {
     }
     throw new Error("Failed");
   }
+  return err;
 }
diff --git a/src/pyodide/python-entrypoint-helper.js b/src/pyodide/python-entrypoint-helper.js
index 7e17da36c72..7bd45e17691 100644
--- a/src/pyodide/python-entrypoint-helper.js
+++ b/src/pyodide/python-entrypoint-helper.js
@@ -2,7 +2,10 @@
 // python-entrypoint.js USER module.
 
 import { loadPyodide } from "pyodide-internal:python";
-import { uploadArtifacts, maybeStoreMemorySnapshot } from "pyodide-internal:snapshot";
+import {
+  uploadArtifacts,
+  maybeStoreMemorySnapshot,
+} from "pyodide-internal:snapshot";
 import { enterJaegerSpan } from "pyodide-internal:jaeger";
 import {
   REQUIREMENTS,
@@ -19,6 +22,7 @@ import {
 } from "pyodide-internal:metadata";
 import { reportError } from "pyodide-internal:util";
 import { default as Limiter } from "pyodide-internal:limiter";
+import { entropyBeforeRequest } from "pyodide-internal:topLevelEntropy/lib";
 
 function pyimportMainModule(pyodide) {
   if (!MAIN_MODULE_NAME.endsWith(".py")) {
@@ -124,27 +128,11 @@ function getMainModule() {
   });
 }
 
-/**
- * Have to reseed randomness in the IoContext of the first request since we gave a low quality seed
- * when it was seeded at top level.
- */
-let isSeeded = false;
-function reseedRandom(pyodide) {
-  if (isSeeded) {
-    return;
-  }
-  isSeeded = true;
-  pyodide.runPython(`
-    from random import seed
-    seed()
-    del seed
-  `);
-}
-
 async function preparePython() {
   const pyodide = await getPyodide();
-  reseedRandom(pyodide);
-  return await getMainModule();
+  const mainModule = await getMainModule();
+  entropyBeforeRequest(pyodide._module);
+  return mainModule;
 }
 
 function makeHandler(pyHandlerName) {
diff --git a/src/workerd/server/tests/python/BUILD.bazel b/src/workerd/server/tests/python/BUILD.bazel
index 7c178564672..380c8d60f2c 100644
--- a/src/workerd/server/tests/python/BUILD.bazel
+++ b/src/workerd/server/tests/python/BUILD.bazel
@@ -22,6 +22,17 @@ wd_test(
     ),
 )
 
+wd_test(
+    src = "random/random.wd-test",
+    args = ["--experimental"],
+    data = glob(
+        [
+            "random/*",
+        ],
+        exclude = ["**/*.wd-test"],
+    ),
+)
+
 # langchain test: disabled for now because it's flaky
 # TODO: reenable this?
 #
diff --git a/src/workerd/server/tests/python/random/random.wd-test b/src/workerd/server/tests/python/random/random.wd-test
new file mode 100644
index 00000000000..612bdd1645b
--- /dev/null
+++ b/src/workerd/server/tests/python/random/random.wd-test
@@ -0,0 +1,15 @@
+using Workerd = import "/workerd/workerd.capnp";
+
+const unitTests :Workerd.Config = (
+  services = [
+    ( name = "python-hello",
+      worker = (
+        modules = [
+          (name = "worker.py", pythonModule = embed "worker.py")
+        ],
+        compatibilityDate = "2024-01-15",
+        compatibilityFlags = ["python_workers"],
+      )
+    ),
+  ],
+);
diff --git a/src/workerd/server/tests/python/random/worker.py b/src/workerd/server/tests/python/random/worker.py
new file mode 100644
index 00000000000..df63bcfa830
--- /dev/null
+++ b/src/workerd/server/tests/python/random/worker.py
@@ -0,0 +1,57 @@
+"""
+Verify that calling `random` at the top-level throws.
+
+Calls to random should only work inside a request context.
+"""
+
+from random import random, randbytes, choice
+
+try:
+    random()
+except RuntimeError as e:
+    assert (
+        repr(e)
+        == "RuntimeError('Cannot use random.random() outside of request context')"
+    )
+else:
+    assert False
+
+try:
+    randbytes(5)
+except RuntimeError as e:
+    assert (
+        repr(e)
+        == "RuntimeError('Cannot use random.randbytes() outside of request context')"
+    )
+else:
+    assert False
+
+try:
+    choice([1, 2, 3])
+except RuntimeError as e:
+    assert (
+        repr(e)
+        == "RuntimeError('Cannot use random.choice() outside of request context')"
+    )
+else:
+    assert False
+
+
+def t1():
+    from random import random, randbytes
+
+    random()
+    randbytes(5)
+    choice([1, 2, 3])
+
+
+def t2():
+    random()
+    randbytes(5)
+    choice([1, 2, 3])
+
+    t1()
+
+
+def test():
+    t2()