-
Notifications
You must be signed in to change notification settings - Fork 0
/
akismet.py
372 lines (300 loc) · 13.1 KB
/
akismet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
# Version 0.2.0
# 2009/06/18
# Copyright Michael Foord 2005-2009
# akismet.py
# Python interface to the akismet API
# E-mail [email protected]
# http://www.voidspace.org.uk/python/modules.shtml
# http://akismet.com
# Released subject to the BSD License
# See http://www.voidspace.org.uk/python/license.shtml
"""
A python interface to the `Akismet <http://akismet.com>`_ API.
This is a web service for blocking SPAM comments to blogs - or other online
services.
You will need a Wordpress API key, from `wordpress.com <http://wordpress.com>`_.
You should pass in the keyword argument 'agent' to the name of your program,
when you create an Akismet instance. This sets the ``user-agent`` to a useful
value.
The default is : ::
Python Interface by Fuzzyman | akismet.py/0.2.0
Whatever you pass in, will replace the *Python Interface by Fuzzyman* part.
**0.2.0** will change with the version of this interface.
Usage example::
from akismet import Akismet
api = Akismet(agent='Test Script')
# if apikey.txt is in place,
# the key will automatically be set
# or you can call api.setAPIKey()
#
if api.key is None:
print "No 'apikey.txt' file."
elif not api.verify_key():
print "The API key is invalid."
else:
# data should be a dictionary of values
# They can all be filled in with defaults
# from a CGI environment
if api.comment_check(comment, data):
print 'This comment is spam.'
else:
print 'This comment is ham.'
"""
import os
from urllib import urlencode
import socket
if hasattr(socket, 'setdefaulttimeout'):
# Set the default timeout on sockets to 5 seconds
socket.setdefaulttimeout(5)
__version__ = '0.2.0'
__all__ = (
'__version__',
'Akismet',
'AkismetError',
'APIKeyError',
)
__author__ = 'Michael Foord <fuzzyman AT voidspace DOT org DOT uk>'
__docformat__ = "restructuredtext en"
user_agent = "%s | akismet.py/%s"
DEFAULTAGENT = 'Python Interface by Fuzzyman/%s'
isfile = os.path.isfile
urllib2 = None
try:
from google.appengine.api import urlfetch
except ImportError:
import urllib2
if urllib2 is None:
def _fetch_url(url, data, headers):
req = urlfetch.fetch(url=url, payload=data, method=urlfetch.POST, headers=headers)
if req.status_code == 200:
return req.content
raise Exception('Could not fetch Akismet URL: %s Response code: %s' %
(url, req.status_code))
else:
def _fetch_url(url, data, headers):
req = urllib2.Request(url, data, headers)
h = urllib2.urlopen(req)
resp = h.read()
return resp
class AkismetError(Exception):
"""Base class for all akismet exceptions."""
class APIKeyError(AkismetError):
"""Invalid API key."""
class Akismet(object):
"""A class for working with the akismet API"""
baseurl = 'rest.akismet.com/1.1/'
def __init__(self, key=None, blog_url=None, agent=None):
"""Automatically calls ``setAPIKey``."""
if agent is None:
agent = DEFAULTAGENT % __version__
self.user_agent = user_agent % (agent, __version__)
self.setAPIKey(key, blog_url)
def _getURL(self):
"""
Fetch the url to make requests to.
This comprises of api key plus the baseurl.
"""
return 'http://%s.%s' % (self.key, self.baseurl)
def _safeRequest(self, url, data, headers):
try:
resp = _fetch_url(url, data, headers)
except Exception, e:
raise AkismetError(str(e))
return resp
def setAPIKey(self, key=None, blog_url=None):
"""
Set the wordpress API key for all transactions.
If you don't specify an explicit API ``key`` and ``blog_url`` it will
attempt to load them from a file called ``apikey.txt`` in the current
directory.
This method is *usually* called automatically when you create a new
``Akismet`` instance.
"""
if key is None and isfile('apikey.txt'):
the_file = [l.strip() for l in open('apikey.txt').readlines()
if l.strip() and not l.strip().startswith('#')]
try:
self.key = the_file[0]
self.blog_url = the_file[1]
except IndexError:
raise APIKeyError("Your 'apikey.txt' is invalid.")
else:
self.key = key
self.blog_url = blog_url
def verify_key(self):
"""
This equates to the ``verify-key`` call against the akismet API.
It returns ``True`` if the key is valid.
The docs state that you *ought* to call this at the start of the
transaction.
It raises ``APIKeyError`` if you have not yet set an API key.
If the connection to akismet fails, it allows the normal ``HTTPError``
or ``URLError`` to be raised.
(*akismet.py* uses `urllib2 <http://docs.python.org/lib/module-urllib2.html>`_)
"""
if self.key is None:
raise APIKeyError("Your have not set an API key.")
data = { 'key': self.key, 'blog': self.blog_url }
# this function *doesn't* use the key as part of the URL
url = 'http://%sverify-key' % self.baseurl
# we *don't* trap the error here
# so if akismet is down it will raise an HTTPError or URLError
headers = {'User-Agent' : self.user_agent}
resp = self._safeRequest(url, urlencode(data), headers)
if resp.lower() == 'valid':
return True
else:
return False
def _build_data(self, comment, data):
"""
This function builds the data structure required by ``comment_check``,
``submit_spam``, and ``submit_ham``.
It modifies the ``data`` dictionary you give it in place. (and so
doesn't return anything)
It raises an ``AkismetError`` if the user IP or user-agent can't be
worked out.
"""
data['comment_content'] = comment
if not 'user_ip' in data:
try:
val = os.environ['REMOTE_ADDR']
except KeyError:
raise AkismetError("No 'user_ip' supplied")
data['user_ip'] = val
if not 'user_agent' in data:
try:
val = os.environ['HTTP_USER_AGENT']
except KeyError:
raise AkismetError("No 'user_agent' supplied")
data['user_agent'] = val
#
data.setdefault('referrer', os.environ.get('HTTP_REFERER', 'unknown'))
data.setdefault('permalink', '')
data.setdefault('comment_type', 'comment')
data.setdefault('comment_author', '')
data.setdefault('comment_author_email', '')
data.setdefault('comment_author_url', '')
data.setdefault('SERVER_ADDR', os.environ.get('SERVER_ADDR', ''))
data.setdefault('SERVER_ADMIN', os.environ.get('SERVER_ADMIN', ''))
data.setdefault('SERVER_NAME', os.environ.get('SERVER_NAME', ''))
data.setdefault('SERVER_PORT', os.environ.get('SERVER_PORT', ''))
data.setdefault('SERVER_SIGNATURE', os.environ.get('SERVER_SIGNATURE',
''))
data.setdefault('SERVER_SOFTWARE', os.environ.get('SERVER_SOFTWARE',
''))
data.setdefault('HTTP_ACCEPT', os.environ.get('HTTP_ACCEPT', ''))
data.setdefault('blog', self.blog_url)
def comment_check(self, comment, data=None, build_data=True, DEBUG=False):
"""
This is the function that checks comments.
It returns ``True`` for spam and ``False`` for ham.
If you set ``DEBUG=True`` then it will return the text of the response,
instead of the ``True`` or ``False`` object.
It raises ``APIKeyError`` if you have not yet set an API key.
If the connection to Akismet fails then the ``HTTPError`` or
``URLError`` will be propogated.
As a minimum it requires the body of the comment. This is the
``comment`` argument.
Akismet requires some other arguments, and allows some optional ones.
The more information you give it, the more likely it is to be able to
make an accurate diagnosise.
You supply these values using a mapping object (dictionary) as the
``data`` argument.
If ``build_data`` is ``True`` (the default), then *akismet.py* will
attempt to fill in as much information as possible, using default
values where necessary. This is particularly useful for programs
running in a {acro;CGI} environment. A lot of useful information
can be supplied from evironment variables (``os.environ``). See below.
You *only* need supply values for which you don't want defaults filled
in for. All values must be strings.
There are a few required values. If they are not supplied, and
defaults can't be worked out, then an ``AkismetError`` is raised.
If you set ``build_data=False`` and a required value is missing an
``AkismetError`` will also be raised.
The normal values (and defaults) are as follows : ::
'user_ip': os.environ['REMOTE_ADDR'] (*)
'user_agent': os.environ['HTTP_USER_AGENT'] (*)
'referrer': os.environ.get('HTTP_REFERER', 'unknown') [#]_
'permalink': ''
'comment_type': 'comment' [#]_
'comment_author': ''
'comment_author_email': ''
'comment_author_url': ''
'SERVER_ADDR': os.environ.get('SERVER_ADDR', '')
'SERVER_ADMIN': os.environ.get('SERVER_ADMIN', '')
'SERVER_NAME': os.environ.get('SERVER_NAME', '')
'SERVER_PORT': os.environ.get('SERVER_PORT', '')
'SERVER_SIGNATURE': os.environ.get('SERVER_SIGNATURE', '')
'SERVER_SOFTWARE': os.environ.get('SERVER_SOFTWARE', '')
'HTTP_ACCEPT': os.environ.get('HTTP_ACCEPT', '')
(*) Required values
You may supply as many additional 'HTTP_*' type values as you wish.
These should correspond to the http headers sent with the request.
.. [#] Note the spelling "referrer". This is a required value by the
akismet api - however, referrer information is not always
supplied by the browser or server. In fact the HTTP protocol
forbids relying on referrer information for functionality in
programs.
.. [#] The `API docs <http://akismet.com/development/api/>`_ state that this value
can be " *blank, comment, trackback, pingback, or a made up value*
*like 'registration'* ".
"""
if self.key is None:
raise APIKeyError("Your have not set an API key.")
if data is None:
data = {}
if build_data:
self._build_data(comment, data)
if 'blog' not in data:
data['blog'] = self.blog_url
url = '%scomment-check' % self._getURL()
# we *don't* trap the error here
# so if akismet is down it will raise an HTTPError or URLError
headers = {'User-Agent' : self.user_agent}
resp = self._safeRequest(url, urlencode(data), headers)
if DEBUG:
return resp
resp = resp.lower()
if resp == 'true':
return True
elif resp == 'false':
return False
else:
# NOTE: Happens when you get a 'howdy wilbur' response !
raise AkismetError('missing required argument.')
def submit_spam(self, comment, data=None, build_data=True):
"""
This function is used to tell akismet that a comment it marked as ham,
is really spam.
It takes all the same arguments as ``comment_check``, except for
*DEBUG*.
"""
if self.key is None:
raise APIKeyError("Your have not set an API key.")
if data is None:
data = {}
if build_data:
self._build_data(comment, data)
url = '%ssubmit-spam' % self._getURL()
# we *don't* trap the error here
# so if akismet is down it will raise an HTTPError or URLError
headers = {'User-Agent' : self.user_agent}
self._safeRequest(url, urlencode(data), headers)
def submit_ham(self, comment, data=None, build_data=True):
"""
This function is used to tell akismet that a comment it marked as spam,
is really ham.
It takes all the same arguments as ``comment_check``, except for
*DEBUG*.
"""
if self.key is None:
raise APIKeyError("Your have not set an API key.")
if data is None:
data = {}
if build_data:
self._build_data(comment, data)
url = '%ssubmit-ham' % self._getURL()
# we *don't* trap the error here
# so if akismet is down it will raise an HTTPError or URLError
headers = {'User-Agent' : self.user_agent}
self._safeRequest(url, urlencode(data), headers)