Skip to content

Commit

Permalink
Python: Make top level random() raise
Browse files Browse the repository at this point in the history
We don't have access to entropy outside of request scope, so `random()` will
give poor results. Raise an error if it is called.
  • Loading branch information
hoodmane committed Apr 9, 2024
1 parent d81c327 commit 89f6af5
Show file tree
Hide file tree
Showing 11 changed files with 464 additions and 42 deletions.
6 changes: 1 addition & 5 deletions src/pyodide/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ copy_file(
# TODO: all of these should be fixed by linking our own Pyodide or by upstreaming.

PRELUDE = """
import { newWasmModule, monotonicDateNow, wasmInstantiate, getRandomValues } from "pyodide-internal:builtin_wrappers";
import { newWasmModule, monotonicDateNow, wasmInstantiate } from "pyodide-internal:builtin_wrappers";
// Pyodide uses `new URL(some_url, location)` to resolve the path in `loadPackage`. Setting
// `location = undefined` makes this throw an error if some_url is not an absolute url. Which is what
Expand Down Expand Up @@ -117,10 +117,6 @@ REPLACEMENTS = [
"Date.now",
"monotonicDateNow",
],
[
"crypto.getRandomValues",
"getRandomValues"
],
[
"reportUndefinedSymbols()",
"reportUndefinedSymbolsNoOp()"
Expand Down
17 changes: 0 additions & 17 deletions src/pyodide/internal/builtin_wrappers.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,6 @@ export function monotonicDateNow() {
return now + lastDelta;
}

/**
* We initialize Python at top level, but it tries to initialize the random seed with
* crypto.getRandomValues which will fail at top level. So we don't produce any entropy the first
* time around and we reseed the rng in the first request context before executing user code.
*/
export function getRandomValues(arr) {
try {
return crypto.getRandomValues(arr);
} catch (e) {
if (e.message.includes("Disallowed operation called within global scope")) {
// random.seed() can't work at startup. We'll seed again under the request scope.
return arr;
}
throw e;
}
}

/**
* First check that the callee is what we expect, then use `UnsafeEval` to
* construct a `WasmModule`.
Expand Down
10 changes: 10 additions & 0 deletions src/pyodide/internal/patches/_cloudflare_random_overlay_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
def __getattr__(key):
from _cloudflare_random_overlays import overlay_getattr

return overlay_getattr(__name__, key)


def __dir__():
from _cloudflare_random_overlays import overlay_dir

return overlay_dir(__name__)
202 changes: 202 additions & 0 deletions src/pyodide/internal/patches/_cloudflare_random_overlays.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
"""
Manage overlay modules for random modules
The actual contents of the overlay are in _cloudflare_random_overlay_module, it just implements a
module __getattr__ and __dir__ that provide the __name__ of the particular overlay and call back
into the impls here.
We only lazily import the original module so that we can install an overlay for `numpy.random` even
when `numpy.random` isn't installed. This also avoids paying to instantiate the overlaid module if
it's not necessary. This means we have to remove our overlay from `sys.modules` before importing the
original module the first time and put it back afterwards. "random" and "numpy.random.mtrand" also
have some additional patches that need to be installed as part of their import context to prevent
top level crashes.
When we're done, we put back the original module but the wrapper module and wrapper stubs will
persist in the wild, so we need to make sure they behave the same way as the originals after we put
them back. This is controlled by the IN_REQUEST_CONTEXT variable.
"""

from contextlib import contextmanager, nullcontext
from importlib import import_module
from functools import wraps
from pathlib import Path
from types import ModuleType

import sys

RANDOM_OVERLAY_MODULE_STR = Path("/lib/random_overlay/random_overlay.py").read_text()
MODULES_TO_OVERLAY = ["random", "numpy.random", "numpy.random.mtrand"]
# We remove the overlay before the request context, but it can still be used from
# top level imports. When IN_REQUEST_CONTEXT is True, we need to make sure that our patches
# behave like the original imports.
IN_REQUEST_CONTEXT = False

OVERLAY_ORIG_MODULES = {}


def load_orig_module(name):
with import_context(name):
return import_module(name)


def get_orig_module(name):
mod = OVERLAY_ORIG_MODULES[name]
if not mod:
OVERLAY_ORIG_MODULES[name] = mod = load_orig_module(name)
return mod


def install_random_overlay(name):
"""Install an overlay for the module which disables most calls.
We store the original module if it's already been imported into OVERLAY_ORIG_MODULES so we can
restore when we're done.
"""

OVERLAY_ORIG_MODULES[name] = sys.modules.get(name, None)
module = ModuleType(name)
exec(RANDOM_OVERLAY_MODULE_STR, module.__dict__)
sys.modules[name] = module


def install_random_overlays():
for name in MODULES_TO_OVERLAY:
install_random_overlay(name)


def remove_random_overlay(name):
orig_module = OVERLAY_ORIG_MODULES[name]
if orig_module:
# Put back original random module
sys.modules[name] = orig_module
else:
# The overlay wasn't ever used so just strip it out of sys.modules.
del sys.modules[name]


def remove_random_overlays():
global IN_REQUEST_CONTEXT

IN_REQUEST_CONTEXT = True
for name in MODULES_TO_OVERLAY:
remove_random_overlay(name)


def overlay_dir(name):
return dir(get_orig_module(name))


# Whitelist of functions that are definitely okay to call at top level.
MODULE_ALLOW_LIST = {
"random": ["Random", "SystemRandom"],
"numpy.random": ["default_rng"],
}


def overlay_getattr(name, key):
mod = get_orig_module(name)
orig = getattr(mod, key)
if IN_REQUEST_CONTEXT:
return orig
if not callable(orig):
return orig

allow_list = MODULE_ALLOW_LIST.get(name, [])
if key in allow_list:
return orig

# If we aren't in a request scope, the value is a callable, and it's not in the allow_list,
# return a wrapper that raises an error if it's called before entering the request scope.
# TODO: this doesn't wrap classes correctly, does it matter?
@wraps(orig)
def wrapper(*args, **kwargs):
print(name, key, "IN_REQUEST_CONTEXT", IN_REQUEST_CONTEXT)
if not IN_REQUEST_CONTEXT:
raise RuntimeError(f"Cannot use {name}.{key}() outside of request context")
return orig(*args, **kwargs)

return wrapper


@contextmanager
def import_context(name):
"""Set up the context for loading the original module
We have to remove the overlay from sys.modules and restore it afterwards, plus there is some
module-specific context needed to prevent top level errors when seeding.
"""
if IN_REQUEST_CONTEXT:
# If we've already removed the overlays, we'll hit this path if someone did
# `import some_mod` at top level, then accesses `some_mod.some_attr` in the
# request context. remove_random_overlays() will have already ensured that
# there is no entry in sys.modules, so we don't have to do anything here
yield
return
# Remove the overlay from sys.modules. Otherwise we'll import our overlay and
# not the original.
self_mod = sys.modules.pop(name)
# Choose appropriate module-specific context if any.
if name == "random":
context = import_context_random
elif name == "numpy.random.mtrand":
context = import_context_numpy_mrand
else:
context = import_context_default

try:
yield from context()
finally:
# Put overlay back in sys.modules
sys.modules[name] = self_mod


def import_context_default():
# No extra setup
yield


def import_context_random():
"""We've made _random.Random.seed raise an error, but random calls it at top level.
To prevent the top level import from failing, we need to temporarily make seed a no-op just for
the random import.
"""
import _random

orig_seed = _random.Random.seed

@wraps(orig_seed)
def patch_seed(*args):
pass

_random.Random.seed = patch_seed
try:
yield
finally:
_random.Random.seed = orig_seed


def import_context_numpy_mrand():
"""numpy.random.mrand will attempt to seed itself using secrets.randbits at import time.
To prevent top level import from failing, we need to temporarily make secrets.randbits return a
constant just for the mrand import.
"""
import secrets

orig_randbits = secrets.randbits
patched = True

@wraps(orig_randbits)
def patch_randbits(*args):
if patched:
return 0
return orig_randbits(*args)

secrets.randbits = patch_randbits
try:
yield
finally:
secrets.randbits = orig_randbits
patched = False
115 changes: 115 additions & 0 deletions src/pyodide/internal/patches/_cloudflare_random_patches.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""
Handle the randomness mess.
Goals:
1. Avoid top-level calls to the C stdlib function getentropy(), these fatally fail. Patch these to
raise Python errors instead.
2. Allow top level import of `random` and `numpy.random` modules. These seed themselves with the
functions that we patched in step 1, we temporarily replace the `getentropy()` calls with no-ops
to let them through.
3. Install wrapper modules at top level that only allow calls to a whitelisted set of functions from
`random` and `numpy.random` that don't use the bad seeds that came from step 2.
4. Put it all back.
5. Reseed the rng before entering the request scope for the first time.
Steps 1, part of 4, and 5 are handled here, steps 2, 3, and part of 4 are handled in
_cloudflare_random_overlays.
"""

import _random
import sys
import os
from functools import wraps

from _cloudflare_random_overlays import install_random_overlays, remove_random_overlays


IN_REQUEST_CONTEXT = False

# Step 1.
#
# Prevent calls to getentropy(). The intended way for `getentropy()` to fail is to set an EIO error,
# which turns into a Python OSError, so we raise this same error so that if we patch `getentropy`
# from the Emscripten C stdlib we can remove these patches without changing the behavior.

EIO = 29

orig_urandom = os.urandom


@wraps(orig_urandom)
def patch_urandom(*args):
if not IN_REQUEST_CONTEXT:
raise OSError(EIO, "Cannot get entropy outside of request context")
return orig_urandom(*args)


def disable_urandom():
"""
Python os.urandom() calls C getentropy() which calls JS crypto.getRandomValues() which throws at
top level, fatally crashing the interpreter.
TODO: Patch Emscripten's getentropy() to return EIO if `crypto.getRandomValues()` throws. Then
we can remove this.
"""
os.urandom = patch_urandom


def restore_urandom():
os.urandom = orig_urandom


orig_Random_seed = _random.Random.seed


@wraps(orig_Random_seed)
def patched_seed(self, val):
"""
Random.seed calls _PyOs_URandom which will fatally fail in top level. Prevent this by raising a
RuntimeError instead.
"""
if val is None and not IN_REQUEST_CONTEXT:
raise OSError(EIO, "Cannot get entropy outside of request context")
return orig_Random_seed(self, val)


def disable_random_seed():
# Install patch to block calls to PyOs_URandom
_random.Random.seed = patched_seed


def restore_random_seed():
# Restore original random seed behavior
_random.Random.seed = orig_Random_seed


def reseed_rng():
"""
Step 5: Have to reseed randomness in the IoContext of the first request since we gave a low
quality seed when it was seeded at top level.
"""
from random import seed

seed()

if "numpy.random" in sys.modules:
from numpy.random import seed

seed()


def before_top_level():
disable_urandom()
disable_random_seed()
install_random_overlays()


def before_first_request():
global IN_REQUEST_CONTEXT

IN_REQUEST_CONTEXT = True
restore_urandom()
restore_random_seed()
remove_random_overlays()
reseed_rng()
5 changes: 4 additions & 1 deletion src/pyodide/internal/python.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
maybeSetupSnapshotUpload,
restoreSnapshot,
} from "pyodide-internal:snapshot";
import { randomBeforeTopLevel } from "pyodide-internal:random";

/**
* This file is a simplified version of the Pyodide loader:
Expand Down Expand Up @@ -122,7 +123,8 @@ function getEmscriptenSettings(lockfile, indexURL) {
env: {
HOME: "/session",
// We don't have access to cryptographic rng at startup so we cannot support hash
// randomization. Setting `PYTHONHASHSEED` disables it.
// randomization. Setting `PYTHONHASHSEED` disables it. See further discussion in
// _cloudflare_random_patches.py
PYTHONHASHSEED: "111",
},
// This is the index that we use as the base URL to fetch the wheels.
Expand Down Expand Up @@ -189,6 +191,7 @@ async function prepareWasmLinearMemory(Module) {
// were creating the snapshot so the outcome of that is already baked in.
return;
}
randomBeforeTopLevel(Module);
adjustSysPath(Module);
maybeSetupSnapshotUpload(Module);
}
Expand Down
Loading

0 comments on commit 89f6af5

Please sign in to comment.