-
Notifications
You must be signed in to change notification settings - Fork 303
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Python: Make top level random() raise
We don't have access to entropy outside of request scope, so `random()` will give poor results. Raise an error if it is called.
- Loading branch information
Showing
11 changed files
with
474 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
10 changes: 10 additions & 0 deletions
10
src/pyodide/internal/patches/_cloudflare_random_overlay_module.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
def __getattr__(key): | ||
from _cloudflare_random_overlays import overlay_getattr | ||
|
||
return overlay_getattr(__name__, key) | ||
|
||
|
||
def __dir__(): | ||
from _cloudflare_random_overlays import overlay_dir | ||
|
||
return overlay_dir(__name__) |
202 changes: 202 additions & 0 deletions
202
src/pyodide/internal/patches/_cloudflare_random_overlays.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
""" | ||
Manage overlay modules for random modules | ||
The actual contents of the overlay are in _cloudflare_random_overlay_module, it just implements a | ||
module __getattr__ and __dir__ that provide the __name__ of the particular overlay and call back | ||
into the impls here. | ||
We only lazily import the original module so that we can install an overlay for `numpy.random` even | ||
when `numpy.random` isn't installed. This also avoids paying to instantiate the overlaid module if | ||
it's not necessary. This means we have to remove our overlay from `sys.modules` before importing the | ||
original module the first time and put it back afterwards. "random" and "numpy.random.mtrand" also | ||
have some additional patches that need to be installed as part of their import context to prevent | ||
top level crashes. | ||
When we're done, we put back the original module but the wrapper module and wrapper stubs will | ||
persist in the wild, so we need to make sure they behave the same way as the originals after we put | ||
them back. This is controlled by the IN_REQUEST_CONTEXT variable. | ||
""" | ||
|
||
from contextlib import contextmanager, nullcontext | ||
from importlib import import_module | ||
from functools import wraps | ||
from pathlib import Path | ||
from types import ModuleType | ||
|
||
import sys | ||
|
||
RANDOM_OVERLAY_MODULE_STR = Path("/lib/random_overlay/random_overlay.py").read_text() | ||
MODULES_TO_OVERLAY = ["random", "numpy.random", "numpy.random.mtrand"] | ||
# We remove the overlay before the request context, but it can still be used from | ||
# top level imports. When IN_REQUEST_CONTEXT is True, we need to make sure that our patches | ||
# behave like the original imports. | ||
IN_REQUEST_CONTEXT = False | ||
|
||
OVERLAY_ORIG_MODULES = {} | ||
|
||
|
||
def load_orig_module(name): | ||
with import_context(name): | ||
return import_module(name) | ||
|
||
|
||
def get_orig_module(name): | ||
mod = OVERLAY_ORIG_MODULES[name] | ||
if not mod: | ||
OVERLAY_ORIG_MODULES[name] = mod = load_orig_module(name) | ||
return mod | ||
|
||
|
||
def install_random_overlay(name): | ||
"""Install an overlay for the module which disables most calls. | ||
We store the original module if it's already been imported into OVERLAY_ORIG_MODULES so we can | ||
restore when we're done. | ||
""" | ||
|
||
OVERLAY_ORIG_MODULES[name] = sys.modules.get(name, None) | ||
module = ModuleType(name) | ||
exec(RANDOM_OVERLAY_MODULE_STR, module.__dict__) | ||
sys.modules[name] = module | ||
|
||
|
||
def install_random_overlays(): | ||
for name in MODULES_TO_OVERLAY: | ||
install_random_overlay(name) | ||
|
||
|
||
def remove_random_overlay(name): | ||
orig_module = OVERLAY_ORIG_MODULES[name] | ||
if orig_module: | ||
# Put back original random module | ||
sys.modules[name] = orig_module | ||
else: | ||
# The overlay wasn't ever used so just strip it out of sys.modules. | ||
del sys.modules[name] | ||
|
||
|
||
def remove_random_overlays(): | ||
global IN_REQUEST_CONTEXT | ||
|
||
IN_REQUEST_CONTEXT = True | ||
for name in MODULES_TO_OVERLAY: | ||
remove_random_overlay(name) | ||
|
||
|
||
def overlay_dir(name): | ||
return dir(get_orig_module(name)) | ||
|
||
|
||
# Whitelist of functions that are definitely okay to call at top level. | ||
MODULE_ALLOW_LIST = { | ||
"random": ["Random", "SystemRandom"], | ||
"numpy.random": ["default_rng"], | ||
} | ||
|
||
|
||
def overlay_getattr(name, key): | ||
mod = get_orig_module(name) | ||
orig = getattr(mod, key) | ||
if IN_REQUEST_CONTEXT: | ||
return orig | ||
if not callable(orig): | ||
return orig | ||
|
||
allow_list = MODULE_ALLOW_LIST.get(name, []) | ||
if key in allow_list: | ||
return orig | ||
|
||
# If we aren't in a request scope, the value is a callable, and it's not in the allow_list, | ||
# return a wrapper that raises an error if it's called before entering the request scope. | ||
# TODO: this doesn't wrap classes correctly, does it matter? | ||
@wraps(orig) | ||
def wrapper(*args, **kwargs): | ||
print(name, key, "IN_REQUEST_CONTEXT", IN_REQUEST_CONTEXT) | ||
if not IN_REQUEST_CONTEXT: | ||
raise RuntimeError(f"Cannot use {name}.{key}() outside of request context") | ||
return orig(*args, **kwargs) | ||
|
||
return wrapper | ||
|
||
|
||
@contextmanager | ||
def import_context(name): | ||
"""Set up the context for loading the original module | ||
We have to remove the overlay from sys.modules and restore it afterwards, plus there is some | ||
module-specific context needed to prevent top level errors when seeding. | ||
""" | ||
if IN_REQUEST_CONTEXT: | ||
# If we've already removed the overlays, we'll hit this path if someone did | ||
# `import some_mod` at top level, then accesses `some_mod.some_attr` in the | ||
# request context. remove_random_overlays() will have already ensured that | ||
# there is no entry in sys.modules, so we don't have to do anything here | ||
yield | ||
return | ||
# Remove the overlay from sys.modules. Otherwise we'll import our overlay and | ||
# not the original. | ||
self_mod = sys.modules.pop(name) | ||
# Choose appropriate module-specific context if any. | ||
if name == "random": | ||
context = import_context_random | ||
elif name == "numpy.random.mtrand": | ||
context = import_context_numpy_mrand | ||
else: | ||
context = import_context_default | ||
|
||
try: | ||
yield from context() | ||
finally: | ||
# Put overlay back in sys.modules | ||
sys.modules[name] = self_mod | ||
|
||
|
||
def import_context_default(): | ||
# No extra setup | ||
yield | ||
|
||
|
||
def import_context_random(): | ||
"""We've made _random.Random.seed raise an error, but random calls it at top level. | ||
To prevent the top level import from failing, we need to temporarily make seed a no-op just for | ||
the random import. | ||
""" | ||
import _random | ||
|
||
orig_seed = _random.Random.seed | ||
|
||
@wraps(orig_seed) | ||
def patch_seed(*args): | ||
pass | ||
|
||
_random.Random.seed = patch_seed | ||
try: | ||
yield | ||
finally: | ||
_random.Random.seed = orig_seed | ||
|
||
|
||
def import_context_numpy_mrand(): | ||
"""numpy.random.mrand will attempt to seed itself using secrets.randbits at import time. | ||
To prevent top level import from failing, we need to temporarily make secrets.randbits return a | ||
constant just for the mrand import. | ||
""" | ||
import secrets | ||
|
||
orig_randbits = secrets.randbits | ||
patched = True | ||
|
||
@wraps(orig_randbits) | ||
def patch_randbits(*args): | ||
if patched: | ||
return 0 | ||
return orig_randbits(*args) | ||
|
||
secrets.randbits = patch_randbits | ||
try: | ||
yield | ||
finally: | ||
secrets.randbits = orig_randbits | ||
patched = False |
115 changes: 115 additions & 0 deletions
115
src/pyodide/internal/patches/_cloudflare_random_patches.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
""" | ||
Handle the randomness mess. | ||
Goals: | ||
1. Avoid top-level calls to the C stdlib function getentropy(), these fatally fail. Patch these to | ||
raise Python errors instead. | ||
2. Allow top level import of `random` and `numpy.random` modules. These seed themselves with the | ||
functions that we patched in step 1, we temporarily replace the `getentropy()` calls with no-ops | ||
to let them through. | ||
3. Install wrapper modules at top level that only allow calls to a whitelisted set of functions from | ||
`random` and `numpy.random` that don't use the bad seeds that came from step 2. | ||
4. Put it all back. | ||
5. Reseed the rng before entering the request scope for the first time. | ||
Steps 1, part of 4, and 5 are handled here, steps 2, 3, and part of 4 are handled in | ||
_cloudflare_random_overlays. | ||
""" | ||
|
||
import _random | ||
import sys | ||
import os | ||
from functools import wraps | ||
|
||
from _cloudflare_random_overlays import install_random_overlays, remove_random_overlays | ||
|
||
|
||
IN_REQUEST_CONTEXT = False | ||
|
||
# Step 1. | ||
# | ||
# Prevent calls to getentropy(). The intended way for `getentropy()` to fail is to set an EIO error, | ||
# which turns into a Python OSError, so we raise this same error so that if we patch `getentropy` | ||
# from the Emscripten C stdlib we can remove these patches without changing the behavior. | ||
|
||
EIO = 29 | ||
|
||
orig_urandom = os.urandom | ||
|
||
|
||
@wraps(orig_urandom) | ||
def patch_urandom(*args): | ||
if not IN_REQUEST_CONTEXT: | ||
raise OSError(EIO, "Cannot get entropy outside of request context") | ||
return orig_urandom(*args) | ||
|
||
|
||
def disable_urandom(): | ||
""" | ||
Python os.urandom() calls C getentropy() which calls JS crypto.getRandomValues() which throws at | ||
top level, fatally crashing the interpreter. | ||
TODO: Patch Emscripten's getentropy() to return EIO if `crypto.getRandomValues()` throws. Then | ||
we can remove this. | ||
""" | ||
os.urandom = patch_urandom | ||
|
||
|
||
def restore_urandom(): | ||
os.urandom = orig_urandom | ||
|
||
|
||
orig_Random_seed = _random.Random.seed | ||
|
||
|
||
@wraps(orig_Random_seed) | ||
def patched_seed(self, val): | ||
""" | ||
Random.seed calls _PyOs_URandom which will fatally fail in top level. Prevent this by raising a | ||
RuntimeError instead. | ||
""" | ||
if val is None and not IN_REQUEST_CONTEXT: | ||
raise OSError(EIO, "Cannot get entropy outside of request context") | ||
return orig_Random_seed(self, val) | ||
|
||
|
||
def disable_random_seed(): | ||
# Install patch to block calls to PyOs_URandom | ||
_random.Random.seed = patched_seed | ||
|
||
|
||
def restore_random_seed(): | ||
# Restore original random seed behavior | ||
_random.Random.seed = orig_Random_seed | ||
|
||
|
||
def reseed_rng(): | ||
""" | ||
Step 5: Have to reseed randomness in the IoContext of the first request since we gave a low | ||
quality seed when it was seeded at top level. | ||
""" | ||
from random import seed | ||
|
||
seed() | ||
|
||
if "numpy.random" in sys.modules: | ||
from numpy.random import seed | ||
|
||
seed() | ||
|
||
|
||
def before_top_level(): | ||
disable_urandom() | ||
disable_random_seed() | ||
install_random_overlays() | ||
|
||
|
||
def before_first_request(): | ||
global IN_REQUEST_CONTEXT | ||
|
||
IN_REQUEST_CONTEXT = True | ||
restore_urandom() | ||
restore_random_seed() | ||
remove_random_overlays() | ||
reseed_rng() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.