Skip to content

Commit

Permalink
Python: Make top level random() raise
Browse files Browse the repository at this point in the history
We don't have access to entropy outside of request scope, so `random()` will
give poor results. Raise an error if it is called.
  • Loading branch information
hoodmane committed Apr 17, 2024
1 parent b5982ec commit 698262d
Show file tree
Hide file tree
Showing 14 changed files with 678 additions and 53 deletions.
8 changes: 4 additions & 4 deletions src/pyodide/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,14 @@ REPLACEMENTS = [
"Date.now",
"monotonicDateNow",
],
[
"crypto.getRandomValues",
"getRandomValues"
],
[
"reportUndefinedSymbols()",
"reportUndefinedSymbolsNoOp()"
],
[
"crypto.getRandomValues(",
"getRandomValues(Module, ",
]
]

load("//:build/pyodide_bucket.bzl", "PYODIDE_PACKAGE_BUCKET_URL")
Expand Down
18 changes: 1 addition & 17 deletions src/pyodide/internal/builtin_wrappers.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { default as UnsafeEval } from "internal:unsafe-eval";
import { default as DiskCache } from "pyodide-internal:disk_cache";
export { getRandomValues } from "pyodide-internal:random";

let lastTime;
let lastDelta = 0;
Expand All @@ -19,23 +20,6 @@ export function monotonicDateNow() {
return now + lastDelta;
}

/**
* We initialize Python at top level, but it tries to initialize the random seed with
* crypto.getRandomValues which will fail at top level. So we don't produce any entropy the first
* time around and we reseed the rng in the first request context before executing user code.
*/
export function getRandomValues(arr) {
try {
return crypto.getRandomValues(arr);
} catch (e) {
if (e.message.includes("Disallowed operation called within global scope")) {
// random.seed() can't work at startup. We'll seed again under the request scope.
return arr;
}
throw e;
}
}

/**
* First check that the callee is what we expect, then use `UnsafeEval` to
* construct a `WasmModule`.
Expand Down
110 changes: 110 additions & 0 deletions src/pyodide/internal/patches/_cloudflare_patch_finder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from functools import wraps
import sys


class PatchLoader:
def __init__(self, orig_loader, patch_context):
self.orig_loader = orig_loader
self.patch_context = patch_context

def __getattr__(self, name):
return getattr(self.orig_loader, name)

def exec_module(self, module):
with self.patch_context(module):
self.orig_loader.exec_module(module)


class PatchFinder:
def __init__(self, get_patch_func):
self.get_patch_func = get_patch_func

def invalidate_caches(self):
pass

def find_spec(
self,
fullname: str,
path,
target,
):
patch_func = self.get_patch_func(fullname)
if not patch_func:
return None

for finder in sys.meta_path:
if isinstance(finder, PatchFinder):
continue
spec = finder.find_spec(fullname, path, target)
if spec:
break
else:
return None
spec.loader = PatchLoader(spec.loader, patch_func)
return spec

@staticmethod
def install(get_patch_func):
sys.meta_path.insert(0, PatchFinder(get_patch_func))

@staticmethod
def remove():
for idx, val in enumerate(sys.meta_path):
if isinstance(val, PatchFinder):
break
del sys.meta_path[idx]


IN_REQUEST_CONTEXT = False
ORIG_MODULES = {}


def block_calls(module, *, allowlist=[]):
sys.modules[module.__name__] = BlockedCallModule(module, allowlist)
ORIG_MODULES[module.__name__] = module


def unblock_calls():
global IN_REQUEST_CONTEXT

IN_REQUEST_CONTEXT = True
for name, val in ORIG_MODULES.items():
sys.modules[name] = val


class BlockedCallModule:
def __init__(self, module, allowlist):
super().__setattr__("_mod", module)
super().__setattr__("_allow_list", allowlist)

def __getattr__(self, key):
mod = super().__getattribute__("_mod")
orig = getattr(mod, key)
if IN_REQUEST_CONTEXT:
return orig
if not callable(orig):
return orig

if key in super().__getattribute__("_allow_list"):
return orig

# If we aren't in a request scope, the value is a callable, and it's not in the allow_list,
# return a wrapper that raises an error if it's called before entering the request scope.
# TODO: this doesn't wrap classes correctly, does it matter?
@wraps(orig)
def wrapper(*args, **kwargs):
if not IN_REQUEST_CONTEXT:
raise RuntimeError(
f"Cannot use {mod.__name__}.{key}() outside of request context"
)
return orig(*args, **kwargs)

return wrapper

def __setattr__(self, key, val):
mod = super().__getattribute__("_mod")
setattr(mod, key, val)

def __dir__(self):
mod = super().__getattribute__("_mod")
return dir(mod)
123 changes: 123 additions & 0 deletions src/pyodide/internal/patches/_cloudflare_random_overlays.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""
Manage overlay modules for random modules
The actual contents of the overlay are in _cloudflare_random_overlay_module, it just implements a
module __getattr__ and __dir__ that provide the __name__ of the particular overlay and call back
into the impls here.
We only lazily import the original module so that we can install an overlay for `numpy.random` even
when `numpy.random` isn't installed. This also avoids paying to instantiate the overlaid module if
it's not necessary. This means we have to remove our overlay from `sys.modules` before importing the
original module the first time and put it back afterwards. "random" and "numpy.random.mtrand" also
have some additional patches that need to be installed as part of their import context to prevent
top level crashes.
When we're done, we put back the original module but the wrapper module and wrapper stubs will
persist in the wild, so we need to make sure they behave the same way as the originals after we put
them back. This is controlled by the IN_REQUEST_CONTEXT variable.
Other rust packages are likely to need similar treatment to pydantic_core.
"""

from contextlib import contextmanager
from array import array
from _cloudflare_patch_finder import PatchFinder, block_calls, unblock_calls

import sys

RUST_PACKAGES = ["pydantic_core", "tiktoken"]
MODULES_TO_OVERLAY = ["random", "numpy.random", "numpy.random.mtrand"] + RUST_PACKAGES
# We remove the overlay before the request context, but it can still be used from
# top level imports. When IN_REQUEST_CONTEXT is True, we need to make sure that our patches
# behave like the original imports.
IN_REQUEST_CONTEXT = False
OVERLAY_ORIG_MODULES = {}
ALLOWED_ENTROPY_CALLS = array("b", [0])


def get_bad_entropy_flag():
# simpleRunPython reads out stderr. We put the address there so we can fish it out...
# We could use ctypes instead of array but ctypes weighs an extra 100kb compared to array.
print(ALLOWED_ENTROPY_CALLS.buffer_info()[0], file=sys.stderr)


def is_bad_entropy_enabled():
"""This is used in _cloudflare_random_patches to let calls to disabled functions through if we
are allowing bad entropy
"""
return ALLOWED_ENTROPY_CALLS[0] > 0


@contextmanager
def allow_bad_entropy_calls(n):
ALLOWED_ENTROPY_CALLS[0] = n
yield
if ALLOWED_ENTROPY_CALLS[0] > 0:
raise RuntimeError(
f"{ALLOWED_ENTROPY_CALLS[0]} unexpected leftover getentropy calls "
)


def install_random_overlays():
PatchFinder.install(get_patch_func)


def remove_random_overlays():
PatchFinder.remove()
unblock_calls()


def get_patch_func(name):
if name not in MODULES_TO_OVERLAY:
return None
funcname = name.replace(".", "_").replace("-", "_") + "_context"
res = globals().get(funcname, None)
if res:
return res
if name in RUST_PACKAGES:
# Initial import needs one entropy call to initialize std::collections::HashMap hash seed
return rust_package_context
raise Exception(f"Missing context for {name}")


@contextmanager
def random_context(module):
with allow_bad_entropy_calls(10):
yield
block_calls(module, allowlist=["Random", "SystemRandom"])


@contextmanager
def numpy_random_context(module):
yield
block_calls(module, allowlist=["default_rng"])


@contextmanager
def numpy_random_mtrand_context(module):
with allow_bad_entropy_calls(1):
yield
block_calls(module)


@contextmanager
def rust_package_context(module):
with allow_bad_entropy_calls(1):
yield


@contextmanager
def pydantic_core_context(module):
try:
# Initial import needs one entropy call to initialize std::collections::HashMap hash seed
with allow_bad_entropy_calls(1):
yield
finally:
try:
with allow_bad_entropy_calls(1):
# validate_core_schema makes an ahash::AHashMap which makes another entropy call for
# its hash seed. It will throw an error but only after making the needed entropy
# call.
module.validate_core_schema(None)
except module.SchemaError:
pass
Loading

0 comments on commit 698262d

Please sign in to comment.