forked from myano/jenni
-
Notifications
You must be signed in to change notification settings - Fork 0
/
web.py
141 lines (114 loc) · 3.93 KB
/
web.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python
"""
web.py - Web Facilities
Copyright 2009-2013, Michael Yanovich (yanovich.net)
Copyright 2012, Dimitri Molenaars (Tyrope.nl)
Copyright 2012, Elad Alfassa ([email protected])
Copyright 2008-2013, Sean B. Palmer (inamidst.com)
More info:
* Willie: https://willie.dftba.net
* jenni: https://github.com/myano/jenni/
* Phenny: http://inamidst.com/phenny/
"""
import re
import urllib
import urllib2
from htmlentitydefs import name2codepoint
class Grab(urllib.URLopener):
def __init__(self, *args):
self.version = 'Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20100101 Firefox/17.0'
urllib.URLopener.__init__(self, *args)
def http_error_default(self, url, fp, errcode, errmsg, headers):
return urllib.addinfourl(fp, [headers, errcode], "http:" + url)
urllib._urlopener = Grab()
def get(uri):
if not uri.startswith('http'):
return
u = urllib.urlopen(uri)
bytes = u.read()
u.close()
return bytes
def head(uri):
if not uri.startswith('http'):
return
u = urllib.urlopen(uri)
info = u.info()
u.close()
return info
def post(uri, query):
if not uri.startswith('http'):
return
data = urllib.urlencode(query)
u = urllib.urlopen(uri, data)
bytes = u.read()
u.close()
return bytes
r_entity = re.compile(r'&([^;\s]+);')
def entity(match):
value = match.group(1).lower()
if value.startswith('#x'):
return unichr(int(value[2:], 16))
elif value.startswith('#'):
return unichr(int(value[1:]))
elif value in name2codepoint:
return unichr(name2codepoint[value])
return '[' + value + ']'
def decode(html):
return r_entity.sub(entity, html)
r_string = re.compile(r'("(\\.|[^"\\])*")')
r_json = re.compile(r'^[,:{}\[\]0-9.\-+Eaeflnr-u \n\r\t]+$')
env = {'__builtins__': None, 'null': None, 'true': True, 'false': False}
def json(text):
"""Evaluate JSON text safely (we hope)."""
if r_json.match(r_string.sub('', text)):
text = r_string.sub(lambda m: 'u' + m.group(1), text)
return eval(text.strip(' \t\r\n'), env, {})
raise ValueError('Input must be serialised JSON.')
#For internal use in web.py, (modules can use this if they need a urllib object they can execute read() on)
#Both handles redirects and makes sure input URI is UTF-8
def get_urllib_object(uri, timeout):
"""
Return a urllib2 object for `uri` and `timeout`. This is better than using urrlib2 directly, for it handles redirects, makes sure URI is utf8, and is shorter and easier to use.
Modules may use this if they need a urllib2 object to execute .read() on. For more information, refer to the urllib2 documentation.
"""
redirects = 0
try:
uri = uri.encode("utf-8")
except:
pass
while True:
req = urllib2.Request(uri, headers={'Accept': '*/*', 'User-Agent': 'Mozilla/5.0 (Jenni)'})
try:
u = urllib2.urlopen(req, None, timeout)
except urllib2.HTTPError, e:
return e.fp
except:
raise
info = u.info()
if not isinstance(info, list):
status = '200'
else:
status = str(info[1])
try: info = info[0]
except: pass
if status.startswith('3'):
uri = urlparse.urljoin(uri, info['Location'])
else: break
redirects += 1
if redirects >= 50:
return "Too many re-directs."
return u
#Identical to urllib2.quote
def quote(string):
"""
Identical to urllib2.quote. Use this if you already importing web in your module and don't want to import urllib2 just to use the quote function.
"""
return urllib2.quote(string)
#Identical to urllib.urlencode
def urlencode(data):
"""
Identical to urllib.urlencode. Use this if you already importing web in your module and don't want to import urllib just to use the urlencode function.
"""
return urllib.urlencode(data)
if __name__ == "__main__":
main()