Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Postprocess hardening #9122

Merged
merged 10 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/deep-ways-wink.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"gradio": minor
---

feat:Postprocess hardening
5 changes: 4 additions & 1 deletion gradio/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1772,7 +1772,6 @@ async def postprocess_data(
kwargs["render"] = False

state[block._id] = block.__class__(**kwargs)

prediction_value = postprocess_update_dict(
block=state[block._id],
update_dict=prediction_value,
Expand Down Expand Up @@ -1910,6 +1909,10 @@ async def process_api(
state_ids_to_track, hashed_values = self.get_state_ids_to_track(block_fn, state)
changed_state_ids = []

from gradio.context import LocalContext

LocalContext.blocks.set(self)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, why do we set it here and not in the init of Blocks?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought this would be more "future proof" as it's the most up to date version of the Blocks before any sort of processing happens. But I don't think we need this actually.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need this to be done after init for is_running to be set. I think actually this is a good place for it.


if batch:
max_batch_size = block_fn.max_batch_size
batch_sizes = [len(inp) for inp in inputs]
Expand Down
72 changes: 55 additions & 17 deletions gradio/processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@
from PIL import Image, ImageOps, ImageSequence, PngImagePlugin

from gradio import utils, wasm_utils
from gradio.context import LocalContext
from gradio.data_classes import FileData, GradioModel, GradioRootModel, JsonData
from gradio.exceptions import Error
from gradio.exceptions import Error, InvalidPathError
from gradio.utils import abspath, get_hash_seed, get_upload_folder, is_in_or_equal

with warnings.catch_warnings():
Expand Down Expand Up @@ -426,14 +427,8 @@ def _move_to_cache(d: dict):
pass
elif not block.proxy_url:
# If the file is on a remote server, do not move it to cache.
if check_in_upload_folder and not client_utils.is_http_url_like(
payload.path
):
path = os.path.abspath(payload.path)
if not is_in_or_equal(path, get_upload_folder()):
raise ValueError(
f"File {path} is not in the upload folder and cannot be accessed."
)
if not client_utils.is_http_url_like(payload.path):
_check_allowed(payload.path, check_in_upload_folder)
if not payload.is_stream:
temp_file_path = block.move_resource_to_block_cache(payload.path)
if temp_file_path is None:
Expand Down Expand Up @@ -462,6 +457,55 @@ def _move_to_cache(d: dict):
return client_utils.traverse(data, _move_to_cache, client_utils.is_file_obj)


def _check_allowed(path: str | Path, check_in_upload_folder: bool):
blocks = LocalContext.blocks.get()
abidlabs marked this conversation as resolved.
Show resolved Hide resolved
if blocks is None or not blocks.is_running:
return

import tempfile
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggest putting this import at top of file

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


abs_path = utils.abspath(path)

# if check_in_upload_folder=True
# we are running this during pre-process
# in which case only files in the upload_folder (cache_dir)
# are accepted
allowed = [utils.get_upload_folder()]
if not check_in_upload_folder:
allowed += blocks.allowed_paths + [os.getcwd(), tempfile.gettempdir()]

allowed, reason = utils.is_allowed_file(
abs_path,
blocked_paths=blocks.blocked_paths,
allowed_paths=allowed,
file_sets=[],
)
if not allowed:
msg = f"Cannot move {abs_path} to the gradio cache dir because "
if reason == "in_blocklist":
msg += f"it is located in one of the blocked_paths ({', '.join(blocks.blocked_paths)})."
elif check_in_upload_folder:
msg += "it was not uploaded by a user."
else:
msg += "it was not created by the application or it is not "
msg += "located in either the current working directory or your system's temp directory. "
msg += "To fix this error, please ensure your function returns files located in either "
msg += f"the current working directory ({os.getcwd()}), your system's temp directory ({tempfile.gettempdir()}) "
msg += f"or add {str(abs_path.parent)} to the allowed_paths parameter."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would add something like, "if you'd like to specifically allow this file to be served, you can add it to the allowed_paths parameter of launch()"

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

raise InvalidPathError(msg)
if (
utils.is_in_or_equal(abs_path, os.getcwd())
and abs_path.name.startswith(".")
and not any(
is_in_or_equal(path, allowed_path) for allowed_path in blocks.allowed_paths
)
):
raise InvalidPathError(
"Dotfiles located in the temporary directory cannot be moved to the cache for security reasons. "
"Please check whether this file is valid and rename it if so."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same^

)


async def async_move_files_to_cache(
data: Any,
block: Block,
Expand Down Expand Up @@ -494,14 +538,8 @@ async def _move_to_cache(d: dict):
pass
elif not block.proxy_url:
# If the file is on a remote server, do not move it to cache.
if check_in_upload_folder and not client_utils.is_http_url_like(
payload.path
):
path = os.path.abspath(payload.path)
if not is_in_or_equal(path, get_upload_folder()):
raise ValueError(
f"File {path} is not in the upload folder and cannot be accessed."
)
if not client_utils.is_http_url_like(payload.path):
_check_allowed(payload.path, check_in_upload_folder)
if not payload.is_stream:
temp_file_path = await block.async_move_resource_to_block_cache(
payload.path
Expand Down
44 changes: 11 additions & 33 deletions gradio/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,44 +547,22 @@ async def file(path_or_url: str, request: fastapi.Request):
raise HTTPException(403, f"File not allowed: {path_or_url}.")

abs_path = utils.abspath(path_or_url)

in_blocklist = any(
utils.is_in_or_equal(abs_path, blocked_path)
for blocked_path in blocks.blocked_paths
)

is_dir = abs_path.is_dir()

if is_dir or in_blocklist:
if abs_path.is_dir() or not abs_path.exists():
raise HTTPException(403, f"File not allowed: {path_or_url}.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: it would be better to set an allowed flag here, and raise a single 403 later, after the utils.is_allowed_file has completed so that malicious attackers cannot using timing attacks to infer whether a particular filepath even exists (that itself can be insecure in certain settings)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea good call.


created_by_app = False
for temp_file_set in blocks.temp_file_sets:
if abs_path in temp_file_set:
created_by_app = True
break
in_allowlist = any(
utils.is_in_or_equal(abs_path, allowed_path)
for allowed_path in blocks.allowed_paths
)
is_static_file = utils.is_static_file(abs_path)
was_uploaded = utils.is_in_or_equal(abs_path, app.uploaded_file_dir)
is_cached_example = utils.is_in_or_equal(
abs_path, utils.abspath(utils.get_cache_folder())
)
from gradio.data_classes import _StaticFiles

if not (
created_by_app
or in_allowlist
or was_uploaded
or is_cached_example
or is_static_file
):
allowed, _ = utils.is_allowed_file(
abs_path,
blocked_paths=blocks.blocked_paths,
allowed_paths=blocks.allowed_paths
+ [app.uploaded_file_dir, utils.get_cache_folder()]
+ _StaticFiles.all_paths,
file_sets=blocks.temp_file_sets,
)
if not allowed:
raise HTTPException(403, f"File not allowed: {path_or_url}.")

if not abs_path.exists():
raise HTTPException(404, f"File not found: {path_or_url}.")
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the unit tests, we actually don't raise 404 if the file does not exist


range_val = request.headers.get("Range", "").strip()
if range_val.startswith("bytes=") and "-" in range_val:
range_val = range_val[6:]
Expand Down
34 changes: 34 additions & 0 deletions gradio/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def swap_blocks(self, demo: Blocks):
# not a new queue
demo._queue = self.running_app.blocks._queue
demo.max_file_size = self.running_app.blocks.max_file_size
demo.is_running = True
self.running_app.state_holder.reset(demo)
self.running_app.blocks = demo

Expand Down Expand Up @@ -1487,3 +1488,36 @@ def safe_join(directory: DeveloperPath, path: UserProvidedPath) -> str:
raise InvalidPathError()

return fullpath


def is_allowed_file(
path: Path,
blocked_paths: Sequence[str | Path],
allowed_paths: Sequence[str | Path],
file_sets: Sequence[set[Path]],
) -> tuple[
bool, Literal["in_blocklist", "allowed", "not_created_or_allowed", "created_by_app"]
]:
in_blocklist = any(
is_in_or_equal(path, blocked_path) for blocked_path in blocked_paths
)
if in_blocklist:
return False, "in_blocklist"

in_allowedlist = any(
is_in_or_equal(path, allowed_path) for allowed_path in allowed_paths
)

if in_allowedlist:
return True, "allowed"

created_by_app = False
for temp_file_set in file_sets:
if path in temp_file_set:
created_by_app = True
break

return (
created_by_app,
"created_by_app" if created_by_app else "not_created_or_allowed",
)
30 changes: 30 additions & 0 deletions test/test_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1802,3 +1802,33 @@ def delete_fn(v):
)
finally:
demo.close()


def test_post_process_file_blocked(connect):
dotfile = pathlib.Path(".foo.txt")
file = pathlib.Path(os.getcwd()) / ".." / "file.txt"

try:
demo = gr.Interface(lambda s: s, "text", "file")
with connect(demo, show_error=True) as client:
_ = client.predict("test/test_files/bus.png")
with pytest.raises(
ValueError,
match="to the gradio cache dir because it was not created by",
):
file.write_text("Hi")
client.predict(str(file))

with connect(demo, allowed_paths=[str(file)]) as client:
_ = client.predict(str(file))

dotfile.write_text("foo")
with connect(demo, show_error=True) as client:
with pytest.raises(ValueError, match="Dotfiles located"):
_ = client.predict(str(dotfile))

with connect(demo, allowed_paths=[str(dotfile)]) as client:
_ = client.predict(str(dotfile))

finally:
dotfile.unlink()
56 changes: 56 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import sys
import warnings
from pathlib import Path
from typing import Sequence, Set
from unittest.mock import MagicMock, patch

import numpy as np
Expand All @@ -31,6 +32,7 @@
get_function_params,
get_type_hints,
ipython_check,
is_allowed_file,
is_in_or_equal,
is_special_typed_parameter,
kaggle_check,
Expand Down Expand Up @@ -387,6 +389,14 @@ def create_path_string():
).map(lambda x: os.path.join(*x))


def create_path_list():
return st.lists(create_path_string(), min_size=0, max_size=5)


def create_path_set():
return st.sets(create_path_string(), min_size=0, max_size=5)


def my_check(path_1, path_2):
try:
path_1 = Path(path_1).resolve()
Expand Down Expand Up @@ -414,6 +424,52 @@ def test_is_in_or_equal_fuzzer(path_1, path_2):
pytest.fail(f"Exception raised: {e}")


@settings(derandomize=os.getenv("CI") is not None)
@given(
path=create_path_string(),
blocked_paths=create_path_list(),
allowed_paths=create_path_list(),
file_sets=st.lists(create_path_set(), min_size=0, max_size=3),
)
def test_is_allowed_file_fuzzer(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool idea to use the fuzzer here. But if I'm understanding correctly, randomly generating paths is very, very unlikely to produce any overlaps between the parameters, right? So this is actually unlikely to test what happens if a path is in blocked_paths or allowed_paths or file_sets? Perhaps part of the path should be manually specified or the values should be chosen from a set with smaller cardinality

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I greatly reduced the cardinality of the allowed possible paths set and also added some corner cases manually in a different test.

path: Path,
blocked_paths: Sequence[Path],
allowed_paths: Sequence[Path],
file_sets: Sequence[Set[Path]],
):
result, reason = is_allowed_file(path, blocked_paths, allowed_paths, file_sets)

assert isinstance(result, bool)
assert reason in [
"in_blocklist",
"allowed",
"not_created_or_allowed",
"created_by_app",
]

if result:
assert reason in ["allowed", "created_by_app"]
assert not any(
is_in_or_equal(path, blocked_path) for blocked_path in blocked_paths
)
elif reason == "in_blocklist":
assert any(is_in_or_equal(path, blocked_path) for blocked_path in blocked_paths)
elif reason == "not_created_or_allowed":
assert not any(
is_in_or_equal(path, allowed_path) for allowed_path in allowed_paths
)
assert not any(path in file_set for file_set in file_sets)

if reason == "allowed":
assert any(is_in_or_equal(path, allowed_path) for allowed_path in allowed_paths)

if reason == "created_by_app":
assert not any(
is_in_or_equal(path, allowed_path) for allowed_path in allowed_paths
)
assert result == any(path in file_set for file_set in file_sets)


# Additional test for known edge cases
@pytest.mark.parametrize(
"path_1,path_2,expected",
Expand Down
Loading