-
Notifications
You must be signed in to change notification settings - Fork 0
/
mongodb_util.py
75 lines (64 loc) · 2.56 KB
/
mongodb_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pymongo
import mongod_connect
#create database stream object
client = MongoClient(mongod_connect)
db = client.tweetstream # use or create a database named tweetstream
tweet_collection = db.tweet_trump #use or create a collection named tweet_trump.json
tweet_collection = db.tweet_biden #use or create a collection named tweet_biden.json
tweet_collection.create_index([("id", pymongo.ASCENDING)],unique = True) # make sure the collected tweets are unique
#establish connection between local machine and database
from pymongo import MongoClient
client = MongoClient('localhost:27017')
#saves the tweetstream into alltweets.json
def get_document(MyStreamListener): #get tweetstream from the MystreamListener class
return {
'id': post['id_str'],
'text': post['text'],
'created_at': post['created_at'],
'retweet_count' : post['retweet_count'],
'favourites_count': post['user']['favourites_count'],
'lang': post['lang'],
'screen_name': post['user']['screen_name']
}
for tweet in result1:
try:
tweet_collection.insert(
get_document(alltweets) #saving into alltweets.json
)
except:
pass
#search alltweets.json for tweets for each president
def get_document(alltweets): #searching alltweets.json for tweets for donaldtrump tweets
return {
'id': post['trump''donaldtrump''donald''republican''DonaldTrump'],
'text': post['text'],
'created_at': post['created_at'],
'retweet_count' : post['retweet_count'],
'favourites_count': post['user']['favourites_count'],
'lang': post['lang'],
'screen_name': post['user']['screen_name']
}
for tweet in result1:
try:
tweet_collection.insert(
get_document(tweet_trump) #saving donald trump tweets into tweets_trump.json
)
except:
pass
def get_document(alltweets): #searching alltweets.json for tweets for biden tweets
return {
'id': post['biden''joebiden''joe_biden''liberalparty''JoeBiden'],
'text': post['text'],
'created_at': post['created_at'],
'retweet_count' : post['retweet_count'],
'favourites_count': post['user']['favourites_count'],
'lang': post['lang'],
'screen_name': post['user']['screen_name']
}
for tweet in result1:
try:
tweet_collection.insert(
get_document(tweet_trump) #saving joe biden tweets into tweets_biden.json
)
except:
pass