forked from oldpepper12/emojibot2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pushshift.py
58 lines (42 loc) · 1.58 KB
/
pushshift.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import requests
import time
class RateLimiter:
def __init__(self, rate): # rate is requests / min
self.rate = rate
self.last_request_ts = 0
def limit(self):
min_delay_sec = 60/self.rate
while time.time() - self.last_request_ts < min_delay_sec:
pass
self.last_request_ts = time.time()
def check_rate_limit():
return requests.get("https://api.pushshift.io/meta").json()["server_ratelimit_per_minute"]
rate_limiter = RateLimiter(check_rate_limit() - 20)
def submissions(subreddit, start_ts, end_ts):
rate_limiter.limit()
resp = requests.get("https://api.pushshift.io/reddit/submission/search", params={
"after": int(start_ts),
"before": int(end_ts),
"sort": "desc",
"subreddit": subreddit
})
if resp.status_code == 429: # rate limited
print("warning: ratelimited")
time.sleep(5)
return submissions(subreddit, start_ts, end_ts)
return resp.json()
def get_block(subreddit, now, block=0, blocksize=1209600):
""" return submissions in a "block" (i.e. fixed-size time units of submissions) ascending back in time """
# default blocksize = 2 weeks
start_ts = now-(block+1)*blocksize
end_ts = start_ts+blocksize
return submissions(subreddit, start_ts, end_ts)
def less_data(data):
for i in range(len(data)):
submission = data[i]
data[i] = {
"selftext": submission.get("selftext", ""),
"title": submission["title"],
"permalink": submission["permalink"]
}
return data