Skip to content

Commit

Permalink
Fix #84: normalize path by handling . and ..
Browse files Browse the repository at this point in the history
  • Loading branch information
asvetlov committed Jun 24, 2017
1 parent c405598 commit c7fc56c
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 10 deletions.
31 changes: 31 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ def test_origin_no_scheme():
url.origin()


def test_drop_dots():
u = URL('http://example.com/path/../to')
assert str(u) == 'http://example.com/to'


def test_abs_cmp():
assert URL('http://example.com:8888') == URL('http://example.com:8888')
assert URL('http://example.com:8888/') == URL('http://example.com:8888/')
Expand Down Expand Up @@ -495,6 +500,11 @@ def test_div_with_colon_and_at():
assert url.raw_path == '/base/path:abc@123'


def test_div_with_dots():
url = URL('http://example.com/base') / '../path/./to'
assert url.raw_path == '/path/to'


# comparison and hashing

def test_ne_str():
Expand Down Expand Up @@ -915,6 +925,8 @@ def test_with_fragment_bad_type():
with pytest.raises(TypeError):
url.with_fragment(123)

# with_name


def test_with_name():
url = URL('http://example.com/a/b')
Expand Down Expand Up @@ -980,6 +992,16 @@ def test_with_name_within_colon_and_at():
url = URL('http://example.com/oldpath').with_name('path:abc@123')
assert url.raw_path == '/path:abc@123'


def test_with_name_dot():
with pytest.raises(ValueError):
URL('http://example.com').with_name('.')


def test_with_name_double_dot():
with pytest.raises(ValueError):
URL('http://example.com').with_name('..')

# is_absolute


Expand Down Expand Up @@ -1549,3 +1571,12 @@ def test_build_query_quoting():
assert u == URL('http://127.0.0.1/файл.jpg?arg=Привет')
assert str(u) == ('http://127.0.0.1/%D1%84%D0%B0%D0%B9%D0%BB.jpg?'
'arg=%D0%9F%D1%80%D0%B8%D0%B2%D0%B5%D1%82')


def test_build_drop_dots():
u = URL.build(
scheme='http',
host='example.com',
path='/path/../to',
)
assert str(u) == 'http://example.com/to'
59 changes: 49 additions & 10 deletions yarl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,13 +181,14 @@ def __init__(self, val='', *, encoded=False, strict=False):
user += ':' + _quote(val.password)
netloc = user + '@' + netloc

val = SplitResult(
val[0], # scheme
netloc,
_quote(val[2], safe='+@:', protected='/+', strict=strict),
query=_quote(val[3], safe='=+&?/:@',
protected=PROTECT_CHARS, qs=True, strict=strict),
fragment=_quote(val[4], safe='?/:@', strict=strict))
path = _quote(val[2], safe='+@:', protected='/+', strict=strict)
if netloc:
path = _normalize_path(path)

query = _quote(val[3], safe='=+&?/:@',
protected=PROTECT_CHARS, qs=True, strict=strict)
fragment = _quote(val[4], safe='?/:@', strict=strict)
val = SplitResult(val[0], netloc, path, query, fragment)

self._val = val
self._cache = {}
Expand All @@ -207,11 +208,16 @@ def build(cls, *, scheme='', user='', password='', host='', port=None,
raise ValueError(
"Only one of \"query\" or \"query_string\" should be passed")

netloc = cls._make_netloc(user, password, host, port)
path = _quote(path, safe='@:', protected='/')
if netloc:
path = _normalize_path(path)

url = cls(
SplitResult(
scheme,
cls._make_netloc(user, password, host, port),
_quote(path, safe='@:', protected='/'),
netloc,
path,
_quote(query_string),
fragment
),
Expand Down Expand Up @@ -290,6 +296,8 @@ def __truediv__(self, name):
parts = path.rstrip('/').split('/')
parts.append(name)
new_path = '/'.join(parts)
if self.is_absolute():
new_path = _normalize_path(new_path)
return URL(self._val._replace(path=new_path, query='', fragment=''),
encoded=True)

Expand Down Expand Up @@ -464,7 +472,8 @@ def query(self):
Empty value if URL has no query part.
"""
ret = MultiDict(parse_qsl(self.raw_query_string, keep_blank_values=True))
ret = MultiDict(parse_qsl(self.raw_query_string,
keep_blank_values=True))
return MultiDictProxy(ret)

@property
Expand Down Expand Up @@ -830,6 +839,8 @@ def with_name(self, name):
if '/' in name:
raise ValueError("Slash in name is not allowed")
name = _quote(name, safe='@:', protected='/')
if name in ('.', '..'):
raise ValueError(". and .. values are forbidden")
parts = list(self.raw_parts)
if self.is_absolute():
if len(parts) == 1:
Expand Down Expand Up @@ -873,3 +884,31 @@ def human_repr(self):
self.path,
self.query_string,
self.fragment))


def _normalize_path(path):
# Drop '.' and '..' from path

segments = path.split('/')
resolved_path = []

for seg in segments:
if seg == '..':
try:
resolved_path.pop()
except IndexError:
# ignore any .. segments that would otherwise cause an
# IndexError when popped from resolved_path if
# resolving for rfc3986
pass
elif seg == '.':
continue
else:
resolved_path.append(seg)

if segments[-1] in ('.', '..'):
# do some post-processing here. if the last segment was a relative dir,
# then we need to append the trailing '/'
resolved_path.append('')

return '/'.join(resolved_path)

0 comments on commit c7fc56c

Please sign in to comment.