-
Notifications
You must be signed in to change notification settings - Fork 0
/
gettweets.py
84 lines (69 loc) · 2.19 KB
/
gettweets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from access import *
import csv
import json
def get_all_tweets(screen_name):
#initialize a list to hold all the tweepy Tweets
alltweets = []
#make initial request for most recent tweets (200 is the maximum allowed count)
new_tweets = api.user_timeline(screen_name = screen_name,count=200)
#save most recent tweets
alltweets.extend(new_tweets)
#save the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
#keep grabbing tweets until there are no tweets left to grab
while len(new_tweets) > 0:
print "getting tweets before %s" % (oldest)
#all subsiquent requests use the max_id param to prevent duplicates
new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest)
#save most recent tweets
alltweets.extend(new_tweets)
#update the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
print "...%s tweets downloaded so far" % (len(alltweets))
#transform the tweepy tweets into a 2D array that will populate the csv
outtweets = [[tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets]
#write the csv
with open('%s_tweets.csv' % screen_name, 'wb') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","text"])
writer.writerows(outtweets)
pass
file = list(csv.DictReader(open("VoterBritish_tweets.csv")))
voters = []
for tweet in file:
text = tweet['text']
text = text.replace('.', ',')
text = text.split(',')
if ' university-educated' in text:
uni = 'university-educated'
if ' non-university-educated' in text:
uni = 'non-university-educated'
start = text[0]
age = 0
try:
age = int(start[6:8])
except:
pass
if age > 10:
items = start.split(' ')
gender = items[-1]
if items[-2] != 'old':
religion = items[-2]
else:
if items[-2] == "Religious":
religion = 'religious'
else:
religion = 'irreligious'
if text[2][1:] != 'university-educated' and text[2][1:] != 'non-university-educated':
loc = " ".join(text[2][1:].split(' ')[:-1])
voters.append({
'uni': uni,
'loc': loc,
'age': age,
'gender': gender,
'religion': religion
})
with open('results.txt', 'w') as file:
json.dump(voters,file)
for voter in voters:
print voter['age']