-
Notifications
You must be signed in to change notification settings - Fork 303
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Python: Make top level random() raise
We don't have access to entropy outside of request scope, so `random()` will give poor results. Raise an error if it is called.
- Loading branch information
Showing
14 changed files
with
678 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
110 changes: 110 additions & 0 deletions
110
src/pyodide/internal/patches/_cloudflare_patch_finder.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
from functools import wraps | ||
import sys | ||
|
||
|
||
class PatchLoader: | ||
def __init__(self, orig_loader, patch_context): | ||
self.orig_loader = orig_loader | ||
self.patch_context = patch_context | ||
|
||
def __getattr__(self, name): | ||
return getattr(self.orig_loader, name) | ||
|
||
def exec_module(self, module): | ||
with self.patch_context(module): | ||
self.orig_loader.exec_module(module) | ||
|
||
|
||
class PatchFinder: | ||
def __init__(self, get_patch_func): | ||
self.get_patch_func = get_patch_func | ||
|
||
def invalidate_caches(self): | ||
pass | ||
|
||
def find_spec( | ||
self, | ||
fullname: str, | ||
path, | ||
target, | ||
): | ||
patch_func = self.get_patch_func(fullname) | ||
if not patch_func: | ||
return None | ||
|
||
for finder in sys.meta_path: | ||
if isinstance(finder, PatchFinder): | ||
continue | ||
spec = finder.find_spec(fullname, path, target) | ||
if spec: | ||
break | ||
else: | ||
return None | ||
spec.loader = PatchLoader(spec.loader, patch_func) | ||
return spec | ||
|
||
@staticmethod | ||
def install(get_patch_func): | ||
sys.meta_path.insert(0, PatchFinder(get_patch_func)) | ||
|
||
@staticmethod | ||
def remove(): | ||
for idx, val in enumerate(sys.meta_path): | ||
if isinstance(val, PatchFinder): | ||
break | ||
del sys.meta_path[idx] | ||
|
||
|
||
IN_REQUEST_CONTEXT = False | ||
ORIG_MODULES = {} | ||
|
||
|
||
def block_calls(module, *, allowlist=[]): | ||
sys.modules[module.__name__] = BlockedCallModule(module, allowlist) | ||
ORIG_MODULES[module.__name__] = module | ||
|
||
|
||
def unblock_calls(): | ||
global IN_REQUEST_CONTEXT | ||
|
||
IN_REQUEST_CONTEXT = True | ||
for name, val in ORIG_MODULES.items(): | ||
sys.modules[name] = val | ||
|
||
|
||
class BlockedCallModule: | ||
def __init__(self, module, allowlist): | ||
super().__setattr__("_mod", module) | ||
super().__setattr__("_allow_list", allowlist) | ||
|
||
def __getattr__(self, key): | ||
mod = super().__getattribute__("_mod") | ||
orig = getattr(mod, key) | ||
if IN_REQUEST_CONTEXT: | ||
return orig | ||
if not callable(orig): | ||
return orig | ||
|
||
if key in super().__getattribute__("_allow_list"): | ||
return orig | ||
|
||
# If we aren't in a request scope, the value is a callable, and it's not in the allow_list, | ||
# return a wrapper that raises an error if it's called before entering the request scope. | ||
# TODO: this doesn't wrap classes correctly, does it matter? | ||
@wraps(orig) | ||
def wrapper(*args, **kwargs): | ||
if not IN_REQUEST_CONTEXT: | ||
raise RuntimeError( | ||
f"Cannot use {mod.__name__}.{key}() outside of request context" | ||
) | ||
return orig(*args, **kwargs) | ||
|
||
return wrapper | ||
|
||
def __setattr__(self, key, val): | ||
mod = super().__getattribute__("_mod") | ||
setattr(mod, key, val) | ||
|
||
def __dir__(self): | ||
mod = super().__getattribute__("_mod") | ||
return dir(mod) |
123 changes: 123 additions & 0 deletions
123
src/pyodide/internal/patches/_cloudflare_random_overlays.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
""" | ||
Manage overlay modules for random modules | ||
The actual contents of the overlay are in _cloudflare_random_overlay_module, it just implements a | ||
module __getattr__ and __dir__ that provide the __name__ of the particular overlay and call back | ||
into the impls here. | ||
We only lazily import the original module so that we can install an overlay for `numpy.random` even | ||
when `numpy.random` isn't installed. This also avoids paying to instantiate the overlaid module if | ||
it's not necessary. This means we have to remove our overlay from `sys.modules` before importing the | ||
original module the first time and put it back afterwards. "random" and "numpy.random.mtrand" also | ||
have some additional patches that need to be installed as part of their import context to prevent | ||
top level crashes. | ||
When we're done, we put back the original module but the wrapper module and wrapper stubs will | ||
persist in the wild, so we need to make sure they behave the same way as the originals after we put | ||
them back. This is controlled by the IN_REQUEST_CONTEXT variable. | ||
Other rust packages are likely to need similar treatment to pydantic_core. | ||
""" | ||
|
||
from contextlib import contextmanager | ||
from array import array | ||
from _cloudflare_patch_finder import PatchFinder, block_calls, unblock_calls | ||
|
||
import sys | ||
|
||
RUST_PACKAGES = ["pydantic_core", "tiktoken"] | ||
MODULES_TO_OVERLAY = ["random", "numpy.random", "numpy.random.mtrand"] + RUST_PACKAGES | ||
# We remove the overlay before the request context, but it can still be used from | ||
# top level imports. When IN_REQUEST_CONTEXT is True, we need to make sure that our patches | ||
# behave like the original imports. | ||
IN_REQUEST_CONTEXT = False | ||
OVERLAY_ORIG_MODULES = {} | ||
ALLOWED_ENTROPY_CALLS = array("b", [0]) | ||
|
||
|
||
def get_bad_entropy_flag(): | ||
# simpleRunPython reads out stderr. We put the address there so we can fish it out... | ||
# We could use ctypes instead of array but ctypes weighs an extra 100kb compared to array. | ||
print(ALLOWED_ENTROPY_CALLS.buffer_info()[0], file=sys.stderr) | ||
|
||
|
||
def is_bad_entropy_enabled(): | ||
"""This is used in _cloudflare_random_patches to let calls to disabled functions through if we | ||
are allowing bad entropy | ||
""" | ||
return ALLOWED_ENTROPY_CALLS[0] > 0 | ||
|
||
|
||
@contextmanager | ||
def allow_bad_entropy_calls(n): | ||
ALLOWED_ENTROPY_CALLS[0] = n | ||
yield | ||
if ALLOWED_ENTROPY_CALLS[0] > 0: | ||
raise RuntimeError( | ||
f"{ALLOWED_ENTROPY_CALLS[0]} unexpected leftover getentropy calls " | ||
) | ||
|
||
|
||
def install_random_overlays(): | ||
PatchFinder.install(get_patch_func) | ||
|
||
|
||
def remove_random_overlays(): | ||
PatchFinder.remove() | ||
unblock_calls() | ||
|
||
|
||
def get_patch_func(name): | ||
if name not in MODULES_TO_OVERLAY: | ||
return None | ||
funcname = name.replace(".", "_").replace("-", "_") + "_context" | ||
res = globals().get(funcname, None) | ||
if res: | ||
return res | ||
if name in RUST_PACKAGES: | ||
# Initial import needs one entropy call to initialize std::collections::HashMap hash seed | ||
return rust_package_context | ||
raise Exception(f"Missing context for {name}") | ||
|
||
|
||
@contextmanager | ||
def random_context(module): | ||
with allow_bad_entropy_calls(10): | ||
yield | ||
block_calls(module, allowlist=["Random", "SystemRandom"]) | ||
|
||
|
||
@contextmanager | ||
def numpy_random_context(module): | ||
yield | ||
block_calls(module, allowlist=["default_rng"]) | ||
|
||
|
||
@contextmanager | ||
def numpy_random_mtrand_context(module): | ||
with allow_bad_entropy_calls(1): | ||
yield | ||
block_calls(module) | ||
|
||
|
||
@contextmanager | ||
def rust_package_context(module): | ||
with allow_bad_entropy_calls(1): | ||
yield | ||
|
||
|
||
@contextmanager | ||
def pydantic_core_context(module): | ||
try: | ||
# Initial import needs one entropy call to initialize std::collections::HashMap hash seed | ||
with allow_bad_entropy_calls(1): | ||
yield | ||
finally: | ||
try: | ||
with allow_bad_entropy_calls(1): | ||
# validate_core_schema makes an ahash::AHashMap which makes another entropy call for | ||
# its hash seed. It will throw an error but only after making the needed entropy | ||
# call. | ||
module.validate_core_schema(None) | ||
except module.SchemaError: | ||
pass |
Oops, something went wrong.