-
Notifications
You must be signed in to change notification settings - Fork 677
/
NLP Word Frequency
68 lines (58 loc) · 1.66 KB
/
NLP Word Frequency
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import urllib
import json
import datetime
import csv
import urllib
from bs4 import BeautifulSoup
from nltk import sent_tokenize, word_tokenize, pos_tag
import nltk
import numpy as np
import matplotlib.pyplot as plt
import codecs
reader = codecs.getreader("utf-8")
app_id = "12345"
app_secret = "12345"
access_token = app_id + "|" + app_secret
page_id = 'washingtonpost'
def feedFacebook(page_id, access_token,num_statuses):
base = "https://graph.facebook.com/v2.8"
node = "/" + page_id + "/feed"
parameters = "/?fields=message,link,likes.limit(1).summary(true),comments.limit(1).summary(true),shares&limit=%s&access_token=%s" % (num_statuses, access_token) # changed url = base + node +parameters
url = base + node + parameters
print(url)
response = urllib.request.urlopen(url)
data = json.load(reader(response))
print(json.dumps(data, indent=4, sort_keys=True))
b=json.dumps(data, indent=4, sort_keys=True)
return data
a=feedFacebook(page_id, access_token,100)
a
txt=[]
share=[]
for i in range(0,50):
txt.append(a['data'][i]['message'])
txt
tokens = word_tokenize(str(a))
tokens
long_words1 = [w for w in tokens if 7<len(w)<9]
sorted(long_words1)
fdist01 = nltk.FreqDist(long_words1)
fdist01
a1=fdist01.most_common(20)
a1
names0=[]
value0=[]
for i in range(5,len(a1)):
names0.append(a1[i][0])
value0.append(a1[i][1])
names0.reverse()
value0.reverse()
val = value0 # the bar lengths
pos = np.arange(len(a1)-5)+.5 # the bar centers on the y axis
pos
val
plt.figure(figsize=(9,4))
plt.barh(pos,val, align='center',alpha=0.7,color='rgbcmyk')
plt.yticks(pos, names0)
plt.xlabel('Mentions')
plt.title('FACEBOOK ANALYSIS\n'+page_id)