Skip to content

Commit

Permalink
Fixing scrub field check and tests
Browse files Browse the repository at this point in the history
The check to see if a value was in the list of scrub fields needs
to do a .lower() which will do an implicit conversion of the value
if it's not already a unicode string. That can cause UnicodeDecodeErrors
so I added a _to_text() function which will attempt a couple of
different ways at converting bytes to unicode, (for python 2.)

I also bumped the minor version and added a beta.1 tag since this
code is potentially buggy, (unicode is always difficult to get
right, especially across Python versions.)

@brianr
  • Loading branch information
coryvirok committed Oct 2, 2015
1 parent e26dddb commit 6e9879f
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 39 deletions.
69 changes: 46 additions & 23 deletions rollbar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from __future__ import absolute_import
from __future__ import unicode_literals

__version__ = '0.10.0'
__version__ = '0.11.0-beta.1'

import base64
import copy
Expand All @@ -23,12 +23,12 @@
import uuid
import wsgiref.util

from reprlib import repr

import reprlib
import requests

from builtins import str as text
from future.moves.urllib.parse import urlparse, urlencode
from future.moves.urllib.parse import urlparse, urlencode, urlunparse, urljoin, parse_qs
from past.builtins import basestring


Expand Down Expand Up @@ -323,7 +323,7 @@ def init(access_token, environment='production', **kw):
if SETTINGS.get('handler') == 'agent':
agent_log = _create_agent_log()

_repr = repr.Repr()
_repr = reprlib.Repr()
for name, size in SETTINGS['locals']['sizes'].items():
setattr(_repr, name, size)

Expand Down Expand Up @@ -939,16 +939,16 @@ def _scrub_request_data(request_data):


def _scrub_request_url(url_string):
url = urlparse.urlparse(url_string)
qs_params = urlparse.parse_qs(url.query)
url = urlparse(url_string)
qs_params = parse_qs(url.query)

# use dash for replacement character so it looks better since it wont be url escaped
scrubbed_qs_params = _scrub_obj(qs_params, replacement_character='-')

scrubbed_qs = urlencode(scrubbed_qs_params, doseq=True)

scrubbed_url = (url.scheme, url.netloc, url.path, url.params, scrubbed_qs, url.fragment)
scrubbed_url_string = urlparse.urlunparse(scrubbed_url)
scrubbed_url_string = urlunparse(scrubbed_url)

return scrubbed_url_string

Expand Down Expand Up @@ -999,11 +999,13 @@ def _scrub(obj, k=None):


def _in_scrub_fields(val, scrub_fields):
val = text(val).lower()
for field in set(scrub_fields):
field = text(field)
if field == val:
return True
if isinstance(val, basestring):
val = _to_text(val)
if val:
val = val.lower()
for field in set(scrub_fields):
if _to_text(field) == val:

This comment has been minimized.

Copy link
@brianr

brianr Oct 2, 2015

Member

Looks like this is going to do the _to_text conversion many times. Isn't scrub_fields set at config time? Perhaps we could do the _to_text call then, and just once?

return True

return False

Expand Down Expand Up @@ -1148,7 +1150,7 @@ def _build_wsgi_request_data(request):
'method': request.get('REQUEST_METHOD'),
}
if 'QUERY_STRING' in request:
request_data['GET'] = urlparse.parse_qs(request['QUERY_STRING'], keep_blank_values=True)
request_data['GET'] = parse_qs(request['QUERY_STRING'], keep_blank_values=True)
# Collapse single item arrays
request_data['GET'] = dict((k, v[0] if len(v) == 1 else v) for k, v in request_data['GET'].items())

Expand Down Expand Up @@ -1218,14 +1220,14 @@ def _post_api_appengine(path, payload, access_token=None):
# Serialize this ourselves so we can handle error cases more gracefully
payload = ErrorIgnoringJSONEncoder().encode(payload)

url = urlparse.urljoin(SETTINGS['endpoint'], path)
url = urljoin(SETTINGS['endpoint'], path)
resp = AppEngineFetch(url,
method="POST",
payload=payload,
headers=headers,
allow_truncated=False,
deadline=SETTINGS.get('timeout', DEFAULT_TIMEOUT),
validate_certificate=SETTINGS.get('verify_https', True))
payload=payload,
headers=headers,
allow_truncated=False,
deadline=SETTINGS.get('timeout', DEFAULT_TIMEOUT),
validate_certificate=SETTINGS.get('verify_https', True))

return _parse_response(path, SETTINGS['access_token'], payload, resp)

Expand All @@ -1238,7 +1240,7 @@ def _post_api(path, payload, access_token=None):
# Serialize this ourselves so we can handle error cases more gracefully
payload = ErrorIgnoringJSONEncoder().encode(payload)

url = urlparse.urljoin(SETTINGS['endpoint'], path)
url = urljoin(SETTINGS['endpoint'], path)
resp = requests.post(url,
data=payload,
headers=headers,
Expand All @@ -1250,7 +1252,7 @@ def _post_api(path, payload, access_token=None):

def _get_api(path, access_token=None, endpoint=None, **params):
access_token = access_token or SETTINGS['access_token']
url = urlparse.urljoin(endpoint or SETTINGS['endpoint'], path)
url = urljoin(endpoint or SETTINGS['endpoint'], path)
params['access_token'] = access_token
resp = requests.get(url, params=params, verify=SETTINGS.get('verify_https', True))
return _parse_response(path, access_token, params, resp, endpoint=endpoint)
Expand All @@ -1273,7 +1275,7 @@ def _post_api_tornado(path, payload, access_token=None):
# Serialize this ourselves so we can handle error cases more gracefully
payload = ErrorIgnoringJSONEncoder().encode(payload)

url = urlparse.urljoin(SETTINGS['endpoint'], path)
url = urljoin(SETTINGS['endpoint'], path)

resp = yield TornadoAsyncHTTPClient().fetch(
url, body=payload, method='POST', connect_timeout=SETTINGS.get('timeout', DEFAULT_TIMEOUT),
Expand Down Expand Up @@ -1305,7 +1307,7 @@ def _post_api_twisted(path, payload, access_token=None):
# Serialize this ourselves so we can handle error cases more gracefully
payload = ErrorIgnoringJSONEncoder().encode(payload)

url = urlparse.urljoin(SETTINGS['endpoint'], path)
url = urljoin(SETTINGS['endpoint'], path)

agent = TwistedHTTPClient(reactor, connectTimeout=SETTINGS.get('timeout', DEFAULT_TIMEOUT))
resp = yield agent.request(
Expand Down Expand Up @@ -1383,6 +1385,27 @@ def _wsgi_extract_user_ip(environ):
return environ['REMOTE_ADDR']


if python_major_version <= 2:
def _to_text(val):
if isinstance(val, unicode):
return val
elif isinstance(val, str):
conversion_options = [unicode, lambda x: unicode(x, encoding='utf8', errors='replace')]
for option in conversion_options:
try:
return option(val)
except UnicodeDecodeError:
pass

return None
else:
def _to_text(val):
if isinstance(val, basestring):
return text(val)

return None


# http://www.xormedia.com/recursively-merge-dictionaries-in-python.html
def dict_merge(a, b):
'''recursively merges dict's. not just simple a['key'] = b['key'], if
Expand Down
25 changes: 9 additions & 16 deletions rollbar/test/test_rollbar.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import unittest

import rollbar
from rollbar import urlparse
from rollbar import urlparse, parse_qs

try:
# Python 3
Expand Down Expand Up @@ -169,7 +169,7 @@ def _raise():
self.assertIn('body', payload['data'])
self.assertIn('trace', payload['data']['body'])
self.assertIn('exception', payload['data']['body']['trace'])
self.assertEqual(payload['data']['body']['trace']['exception']['message'], 'foo'.encode('utf-8'))
self.assertEqual(payload['data']['body']['trace']['exception']['message'], 'foo')
self.assertEqual(payload['data']['body']['trace']['exception']['class'], 'Exception')

self.assertNotIn('args', payload['data']['body']['trace']['frames'][-1])
Expand Down Expand Up @@ -326,8 +326,8 @@ def test_non_dict_scrubbing(self):
def test_url_scrubbing(self):
url = 'http://foo.com/?password=password&foo=bar&secret=secret'

scrubbed_url = urlparse.urlparse(rollbar._scrub_request_url(url))
qs_params = urlparse.parse_qs(scrubbed_url.query)
scrubbed_url = urlparse(rollbar._scrub_request_url(url))
qs_params = parse_qs(scrubbed_url.query)

self.assertDictEqual(qs_params, {
'password': ['--------'],
Expand All @@ -338,8 +338,8 @@ def test_url_scrubbing(self):
def test_utf8_url_val_scrubbing(self):
url = 'http://foo.com/?password=password&foo=bar&secret=%s' % SNOWMAN

scrubbed_url = urlparse.urlparse(rollbar._scrub_request_url(url))
qs_params = urlparse.parse_qs(scrubbed_url.query)
scrubbed_url = urlparse(rollbar._scrub_request_url(url))
qs_params = parse_qs(scrubbed_url.query)

self.assertDictEqual(qs_params, {
'password': ['--------'],
Expand All @@ -354,7 +354,7 @@ def test_utf8_url_key_scrubbing(self):
rollbar.SETTINGS['scrub_fields'].append(SNOWMAN)
scrubbed_url = rollbar._scrub_request_url(url)

qs_params = urlparse.parse_qs(urlparse.urlparse(scrubbed_url).query)
qs_params = parse_qs(urlparse(scrubbed_url).query)

self.assertEqual(['------'], qs_params[SNOWMAN])
self.assertEqual(['--------'], qs_params['password'])
Expand Down Expand Up @@ -922,14 +922,7 @@ def test_modify_arg(self, send_payload):

@mock.patch('rollbar.send_payload')
def test_unicode_exc_info(self, send_payload):
unicode_char = '\u221a'

if sys.version_info[0] < 3:
message = unicode_char.decode('unicode-escape')
expected = '\xe2\x88\x9a'
else:
message = unicode_char
expected = message.encode('utf-8')
message = '\u221a'

try:
raise Exception(message)
Expand All @@ -938,7 +931,7 @@ def test_unicode_exc_info(self, send_payload):

self.assertEqual(send_payload.called, True)
payload = send_payload.call_args[0][0]
self.assertEqual(payload['data']['body']['trace']['exception']['message'], expected)
self.assertEqual(payload['data']['body']['trace']['exception']['message'], message)



Expand Down

0 comments on commit 6e9879f

Please sign in to comment.