Skip to content

Commit

Permalink
Normalize path (#86)
Browse files Browse the repository at this point in the history
* Add a test for nonascii query part

* Fix #84: normalize path by handling . and ..

* Fix with_path

* Add missing test file

* Improve coverage
  • Loading branch information
asvetlov committed Jun 24, 2017
1 parent 14d78e3 commit 3d0e035
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 11 deletions.
5 changes: 5 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
CHANGES
=======

0.10.3 (2017-06-13)
-------------------

* Prevent double URL args unquoting #83

0.10.2 (2017-05-05)
-------------------

Expand Down
21 changes: 21 additions & 0 deletions tests/test_normalize_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from yarl import _normalize_path as np


def test_no_dots():
assert np('path/to') == 'path/to'


def test_skip_dots():
assert np('path/./to') == 'path/to'


def test_dot_at_end():
assert np('path/to/.') == 'path/to/'


def test_double_dots():
assert np('path/../to') == 'to'


def test_extra_double_dots():
assert np('path/../../to') == 'to'
50 changes: 50 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ def test_origin_no_scheme():
url.origin()


def test_drop_dots():
u = URL('http://example.com/path/../to')
assert str(u) == 'http://example.com/to'


def test_abs_cmp():
assert URL('http://example.com:8888') == URL('http://example.com:8888')
assert URL('http://example.com:8888/') == URL('http://example.com:8888/')
Expand Down Expand Up @@ -246,6 +251,11 @@ def test_query_dont_unqoute_twice():
assert url.query['url'] == sample_url


def test_query_nonascii():
url = URL('http://example.com?ключ=знач')
assert url.query == MultiDict({'ключ': 'знач'})


def test_raw_fragment_empty():
url = URL('http://example.com')
assert '' == url.raw_fragment
Expand Down Expand Up @@ -490,6 +500,11 @@ def test_div_with_colon_and_at():
assert url.raw_path == '/base/path:abc@123'


def test_div_with_dots():
url = URL('http://example.com/base') / '../path/./to'
assert url.raw_path == '/path/to'


# comparison and hashing

def test_ne_str():
Expand Down Expand Up @@ -728,6 +743,8 @@ def test_with_port_invalid_type():
with pytest.raises(TypeError):
URL('http://example.com').with_port('123')

# with_path


def test_with_path():
url = URL('http://example.com')
Expand All @@ -741,6 +758,18 @@ def test_with_path_encoded():
) == 'http://example.com/test'


def test_with_path_dots():
url = URL('http://example.com')
assert str(url.with_path('/test/.')) == 'http://example.com/test/'


def test_with_path_relative():
url = URL('/path')
assert str(url.with_path('/new')) == '/new'


# with_query

def test_with_query():
url = URL('http://example.com')
assert str(url.with_query({'a': '1'})) == 'http://example.com/?a=1'
Expand Down Expand Up @@ -910,6 +939,8 @@ def test_with_fragment_bad_type():
with pytest.raises(TypeError):
url.with_fragment(123)

# with_name


def test_with_name():
url = URL('http://example.com/a/b')
Expand Down Expand Up @@ -975,6 +1006,16 @@ def test_with_name_within_colon_and_at():
url = URL('http://example.com/oldpath').with_name('path:abc@123')
assert url.raw_path == '/path:abc@123'


def test_with_name_dot():
with pytest.raises(ValueError):
URL('http://example.com').with_name('.')


def test_with_name_double_dot():
with pytest.raises(ValueError):
URL('http://example.com').with_name('..')

# is_absolute


Expand Down Expand Up @@ -1544,3 +1585,12 @@ def test_build_query_quoting():
assert u == URL('http://127.0.0.1/файл.jpg?arg=Привет')
assert str(u) == ('http://127.0.0.1/%D1%84%D0%B0%D0%B9%D0%BB.jpg?'
'arg=%D0%9F%D1%80%D0%B8%D0%B2%D0%B5%D1%82')


def test_build_drop_dots():
u = URL.build(
scheme='http',
host='example.com',
path='/path/../to',
)
assert str(u) == 'http://example.com/to'
63 changes: 52 additions & 11 deletions yarl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from .quoting import quote, unquote

__version__ = '0.10.2'
__version__ = '0.10.3'

__all__ = ['URL']

Expand Down Expand Up @@ -181,13 +181,14 @@ def __init__(self, val='', *, encoded=False, strict=False):
user += ':' + _quote(val.password)
netloc = user + '@' + netloc

val = SplitResult(
val[0], # scheme
netloc,
_quote(val[2], safe='+@:', protected='/+', strict=strict),
query=_quote(val[3], safe='=+&?/:@',
protected=PROTECT_CHARS, qs=True, strict=strict),
fragment=_quote(val[4], safe='?/:@', strict=strict))
path = _quote(val[2], safe='+@:', protected='/+', strict=strict)
if netloc:
path = _normalize_path(path)

query = _quote(val[3], safe='=+&?/:@',
protected=PROTECT_CHARS, qs=True, strict=strict)
fragment = _quote(val[4], safe='?/:@', strict=strict)
val = SplitResult(val[0], netloc, path, query, fragment)

self._val = val
self._cache = {}
Expand All @@ -207,11 +208,16 @@ def build(cls, *, scheme='', user='', password='', host='', port=None,
raise ValueError(
"Only one of \"query\" or \"query_string\" should be passed")

netloc = cls._make_netloc(user, password, host, port)
path = _quote(path, safe='@:', protected='/')
if netloc:
path = _normalize_path(path)

url = cls(
SplitResult(
scheme,
cls._make_netloc(user, password, host, port),
_quote(path, safe='@:', protected='/'),
netloc,
path,
_quote(query_string),
fragment
),
Expand Down Expand Up @@ -290,6 +296,8 @@ def __truediv__(self, name):
parts = path.rstrip('/').split('/')
parts.append(name)
new_path = '/'.join(parts)
if self.is_absolute():
new_path = _normalize_path(new_path)
return URL(self._val._replace(path=new_path, query='', fragment=''),
encoded=True)

Expand Down Expand Up @@ -464,7 +472,8 @@ def query(self):
Empty value if URL has no query part.
"""
ret = MultiDict(parse_qsl(self.raw_query_string, keep_blank_values=True))
ret = MultiDict(parse_qsl(self.raw_query_string,
keep_blank_values=True))
return MultiDictProxy(ret)

@property
Expand Down Expand Up @@ -708,6 +717,8 @@ def with_path(self, path, encoded=False):
"""Return a new URL with path replaced."""
if not encoded:
path = _quote(path, safe='@:', protected='/', strict=self._strict)
if self.is_absolute():
path = _normalize_path(path)
return URL(self._val._replace(path=path), encoded=True)

def with_query(self, *args, **kwargs):
Expand Down Expand Up @@ -830,6 +841,8 @@ def with_name(self, name):
if '/' in name:
raise ValueError("Slash in name is not allowed")
name = _quote(name, safe='@:', protected='/')
if name in ('.', '..'):
raise ValueError(". and .. values are forbidden")
parts = list(self.raw_parts)
if self.is_absolute():
if len(parts) == 1:
Expand Down Expand Up @@ -873,3 +886,31 @@ def human_repr(self):
self.path,
self.query_string,
self.fragment))


def _normalize_path(path):
# Drop '.' and '..' from path

segments = path.split('/')
resolved_path = []

for seg in segments:
if seg == '..':
try:
resolved_path.pop()
except IndexError:
# ignore any .. segments that would otherwise cause an
# IndexError when popped from resolved_path if
# resolving for rfc3986
pass
elif seg == '.':
continue
else:
resolved_path.append(seg)

if segments[-1] in ('.', '..'):
# do some post-processing here. if the last segment was a relative dir,
# then we need to append the trailing '/'
resolved_path.append('')

return '/'.join(resolved_path)

0 comments on commit 3d0e035

Please sign in to comment.