-
Notifications
You must be signed in to change notification settings - Fork 2.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Postprocess hardening #9122
Postprocess hardening #9122
Changes from 6 commits
ed77182
57a8ff6
fa7f9c1
6a44a2d
accb0eb
151bfa0
4e56101
4cc96fe
45e957c
4fdfd67
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"gradio": minor | ||
--- | ||
|
||
feat:Postprocess hardening |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,8 +23,9 @@ | |
from PIL import Image, ImageOps, ImageSequence, PngImagePlugin | ||
|
||
from gradio import utils, wasm_utils | ||
from gradio.context import LocalContext | ||
from gradio.data_classes import FileData, GradioModel, GradioRootModel, JsonData | ||
from gradio.exceptions import Error | ||
from gradio.exceptions import Error, InvalidPathError | ||
from gradio.utils import abspath, get_hash_seed, get_upload_folder, is_in_or_equal | ||
|
||
with warnings.catch_warnings(): | ||
|
@@ -426,14 +427,8 @@ def _move_to_cache(d: dict): | |
pass | ||
elif not block.proxy_url: | ||
# If the file is on a remote server, do not move it to cache. | ||
if check_in_upload_folder and not client_utils.is_http_url_like( | ||
payload.path | ||
): | ||
path = os.path.abspath(payload.path) | ||
if not is_in_or_equal(path, get_upload_folder()): | ||
raise ValueError( | ||
f"File {path} is not in the upload folder and cannot be accessed." | ||
) | ||
if not client_utils.is_http_url_like(payload.path): | ||
_check_allowed(payload.path, check_in_upload_folder) | ||
if not payload.is_stream: | ||
temp_file_path = block.move_resource_to_block_cache(payload.path) | ||
if temp_file_path is None: | ||
|
@@ -462,6 +457,55 @@ def _move_to_cache(d: dict): | |
return client_utils.traverse(data, _move_to_cache, client_utils.is_file_obj) | ||
|
||
|
||
def _check_allowed(path: str | Path, check_in_upload_folder: bool): | ||
blocks = LocalContext.blocks.get() | ||
abidlabs marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if blocks is None or not blocks.is_running: | ||
return | ||
|
||
import tempfile | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggest putting this import at top of file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
||
abs_path = utils.abspath(path) | ||
|
||
# if check_in_upload_folder=True | ||
# we are running this during pre-process | ||
# in which case only files in the upload_folder (cache_dir) | ||
# are accepted | ||
allowed = [utils.get_upload_folder()] | ||
if not check_in_upload_folder: | ||
allowed += blocks.allowed_paths + [os.getcwd(), tempfile.gettempdir()] | ||
|
||
allowed, reason = utils.is_allowed_file( | ||
abs_path, | ||
blocked_paths=blocks.blocked_paths, | ||
allowed_paths=allowed, | ||
file_sets=[], | ||
) | ||
if not allowed: | ||
msg = f"Cannot move {abs_path} to the gradio cache dir because " | ||
if reason == "in_blocklist": | ||
msg += f"it is located in one of the blocked_paths ({', '.join(blocks.blocked_paths)})." | ||
elif check_in_upload_folder: | ||
msg += "it was not uploaded by a user." | ||
else: | ||
msg += "it was not created by the application or it is not " | ||
msg += "located in either the current working directory or your system's temp directory. " | ||
msg += "To fix this error, please ensure your function returns files located in either " | ||
msg += f"the current working directory ({os.getcwd()}), your system's temp directory ({tempfile.gettempdir()}) " | ||
msg += f"or add {str(abs_path.parent)} to the allowed_paths parameter." | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would add something like, "if you'd like to specifically allow this file to be served, you can add it to the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
raise InvalidPathError(msg) | ||
if ( | ||
utils.is_in_or_equal(abs_path, os.getcwd()) | ||
and abs_path.name.startswith(".") | ||
and not any( | ||
is_in_or_equal(path, allowed_path) for allowed_path in blocks.allowed_paths | ||
) | ||
): | ||
raise InvalidPathError( | ||
"Dotfiles located in the temporary directory cannot be moved to the cache for security reasons. " | ||
"Please check whether this file is valid and rename it if so." | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same^ |
||
) | ||
|
||
|
||
async def async_move_files_to_cache( | ||
data: Any, | ||
block: Block, | ||
|
@@ -494,14 +538,8 @@ async def _move_to_cache(d: dict): | |
pass | ||
elif not block.proxy_url: | ||
# If the file is on a remote server, do not move it to cache. | ||
if check_in_upload_folder and not client_utils.is_http_url_like( | ||
payload.path | ||
): | ||
path = os.path.abspath(payload.path) | ||
if not is_in_or_equal(path, get_upload_folder()): | ||
raise ValueError( | ||
f"File {path} is not in the upload folder and cannot be accessed." | ||
) | ||
if not client_utils.is_http_url_like(payload.path): | ||
_check_allowed(payload.path, check_in_upload_folder) | ||
if not payload.is_stream: | ||
temp_file_path = await block.async_move_resource_to_block_cache( | ||
payload.path | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -547,44 +547,22 @@ async def file(path_or_url: str, request: fastapi.Request): | |
raise HTTPException(403, f"File not allowed: {path_or_url}.") | ||
|
||
abs_path = utils.abspath(path_or_url) | ||
|
||
in_blocklist = any( | ||
utils.is_in_or_equal(abs_path, blocked_path) | ||
for blocked_path in blocks.blocked_paths | ||
) | ||
|
||
is_dir = abs_path.is_dir() | ||
|
||
if is_dir or in_blocklist: | ||
if abs_path.is_dir() or not abs_path.exists(): | ||
raise HTTPException(403, f"File not allowed: {path_or_url}.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: it would be better to set an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea good call. |
||
|
||
created_by_app = False | ||
for temp_file_set in blocks.temp_file_sets: | ||
if abs_path in temp_file_set: | ||
created_by_app = True | ||
break | ||
in_allowlist = any( | ||
utils.is_in_or_equal(abs_path, allowed_path) | ||
for allowed_path in blocks.allowed_paths | ||
) | ||
is_static_file = utils.is_static_file(abs_path) | ||
was_uploaded = utils.is_in_or_equal(abs_path, app.uploaded_file_dir) | ||
is_cached_example = utils.is_in_or_equal( | ||
abs_path, utils.abspath(utils.get_cache_folder()) | ||
) | ||
from gradio.data_classes import _StaticFiles | ||
|
||
if not ( | ||
created_by_app | ||
or in_allowlist | ||
or was_uploaded | ||
or is_cached_example | ||
or is_static_file | ||
): | ||
allowed, _ = utils.is_allowed_file( | ||
abs_path, | ||
blocked_paths=blocks.blocked_paths, | ||
allowed_paths=blocks.allowed_paths | ||
+ [app.uploaded_file_dir, utils.get_cache_folder()] | ||
+ _StaticFiles.all_paths, | ||
file_sets=blocks.temp_file_sets, | ||
) | ||
if not allowed: | ||
raise HTTPException(403, f"File not allowed: {path_or_url}.") | ||
|
||
if not abs_path.exists(): | ||
raise HTTPException(404, f"File not found: {path_or_url}.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. According to the unit tests, we actually don't raise 404 if the file does not exist |
||
|
||
range_val = request.headers.get("Range", "").strip() | ||
if range_val.startswith("bytes=") and "-" in range_val: | ||
range_val = range_val[6:] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
import sys | ||
import warnings | ||
from pathlib import Path | ||
from typing import Sequence, Set | ||
from unittest.mock import MagicMock, patch | ||
|
||
import numpy as np | ||
|
@@ -31,6 +32,7 @@ | |
get_function_params, | ||
get_type_hints, | ||
ipython_check, | ||
is_allowed_file, | ||
is_in_or_equal, | ||
is_special_typed_parameter, | ||
kaggle_check, | ||
|
@@ -387,6 +389,14 @@ def create_path_string(): | |
).map(lambda x: os.path.join(*x)) | ||
|
||
|
||
def create_path_list(): | ||
return st.lists(create_path_string(), min_size=0, max_size=5) | ||
|
||
|
||
def create_path_set(): | ||
return st.sets(create_path_string(), min_size=0, max_size=5) | ||
|
||
|
||
def my_check(path_1, path_2): | ||
try: | ||
path_1 = Path(path_1).resolve() | ||
|
@@ -414,6 +424,52 @@ def test_is_in_or_equal_fuzzer(path_1, path_2): | |
pytest.fail(f"Exception raised: {e}") | ||
|
||
|
||
@settings(derandomize=os.getenv("CI") is not None) | ||
@given( | ||
path=create_path_string(), | ||
blocked_paths=create_path_list(), | ||
allowed_paths=create_path_list(), | ||
file_sets=st.lists(create_path_set(), min_size=0, max_size=3), | ||
) | ||
def test_is_allowed_file_fuzzer( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool idea to use the fuzzer here. But if I'm understanding correctly, randomly generating paths is very, very unlikely to produce any overlaps between the parameters, right? So this is actually unlikely to test what happens if a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I greatly reduced the cardinality of the allowed possible paths set and also added some corner cases manually in a different test. |
||
path: Path, | ||
blocked_paths: Sequence[Path], | ||
allowed_paths: Sequence[Path], | ||
file_sets: Sequence[Set[Path]], | ||
): | ||
result, reason = is_allowed_file(path, blocked_paths, allowed_paths, file_sets) | ||
|
||
assert isinstance(result, bool) | ||
assert reason in [ | ||
"in_blocklist", | ||
"allowed", | ||
"not_created_or_allowed", | ||
"created_by_app", | ||
] | ||
|
||
if result: | ||
assert reason in ["allowed", "created_by_app"] | ||
assert not any( | ||
is_in_or_equal(path, blocked_path) for blocked_path in blocked_paths | ||
) | ||
elif reason == "in_blocklist": | ||
assert any(is_in_or_equal(path, blocked_path) for blocked_path in blocked_paths) | ||
elif reason == "not_created_or_allowed": | ||
assert not any( | ||
is_in_or_equal(path, allowed_path) for allowed_path in allowed_paths | ||
) | ||
assert not any(path in file_set for file_set in file_sets) | ||
|
||
if reason == "allowed": | ||
assert any(is_in_or_equal(path, allowed_path) for allowed_path in allowed_paths) | ||
|
||
if reason == "created_by_app": | ||
assert not any( | ||
is_in_or_equal(path, allowed_path) for allowed_path in allowed_paths | ||
) | ||
assert result == any(path in file_set for file_set in file_sets) | ||
|
||
|
||
# Additional test for known edge cases | ||
@pytest.mark.parametrize( | ||
"path_1,path_2,expected", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interesting, why do we set it here and not in the init of Blocks?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought this would be more "future proof" as it's the most up to date version of the Blocks before any sort of processing happens. But I don't think we need this actually.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need this to be done after init for
is_running
to be set. I think actually this is a good place for it.