AI.Rout


R version 3.2.2 (2015-08-14) -- "Fire Safety"
Copyright (C) 2015 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64 (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

[Previously saved workspace restored]

> setwd("C:/Users/crobertson/Dropbox/nsercCREATE/Scraping/tweets/outputs/AITweets")
> x <- read.csv("test2_clean.csv")
> removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
> library(plyr)
Warning message:
package 'plyr' was built under R version 3.2.5 
> library(ggplot2)
Warning message:
package 'ggplot2' was built under R version 3.2.5 
> library(XML)
> library(tm)
Loading required package: NLP

Attaching package: 'NLP'

The following object is masked from 'package:ggplot2':

    annotate

> library(wordcloud)
Loading required package: RColorBrewer
> library(RColorBrewer)
> 
> x$D <- as.POSIXct(strptime(as.character(x$created_at), "%a %b %d %H:%M:%S %z %Y"))
> x$day <- as.Date(x$D)
> x$textC <- removeURL(x$text)
> x <- x[!duplicated(x$textC), ]
> daily <- ddply(x, .(day), summarise, numTweets = length(day)) #summarize by mean
> 
> png("FULL_dailyTweetsAI.png", width=1280,height=800)
>   ggplot(daily, aes(day, numTweets)) + geom_line() + xlab("") + ylab("Daily AI-related Tweets")
> dev.off()
null device 
          1 
> 
> recent <- subset(daily, day >= max(daily$day)-14)
> 
> png("Recent_dailyTweetsAI.png", width=1280,height=800)
>   ggplot(recent, aes(day, numTweets)) + geom_line() + xlab("") + ylab("Daily AI-related Tweets - last 14 days")
> dev.off()
null device 
          1 
> 
> 
> 
> 
> recent14 <- subset(x, day >= max(x$day)-14)
> 
> 
> ap.corpus <- Corpus(DataframeSource(data.frame(recent14$text)))
> ap.corpus <- tm_map(ap.corpus, content_transformer(tolower))
> ap.corpus <- tm_map(ap.corpus, removeWords, stopwords("english"))
> ap.corpus <- tm_map(ap.corpus, removeWords, c("bird", "flu", "avian", "influenza", "poultry"))
> ap.corpus <- tm_map(ap.corpus, removePunctuation)
> ap.corpus <- tm_map(ap.corpus, PlainTextDocument)
> ap.corpus <- tm_map(ap.corpus, stemDocument)
> 
> ap.corpus <- Corpus(VectorSource(ap.corpus))
> ap.tdm <- TermDocumentMatrix(ap.corpus)
> ap.m <- as.matrix(ap.tdm)
> ap.v <- sort(rowSums(ap.m),decreasing=TRUE)
> ap.d <- data.frame(word = names(ap.v),freq=ap.v)
> head(ap.d)
           word freq
may         may 1835
chicken chicken 1159
ang         ang  810
yung       yung  619
ako         ako  592
manok     manok  499
> pal2 <- brewer.pal(8,"Dark2")
> png("wordCloud_14day.png", width=8,height=6, units="in", res=300)
>     wordcloud(ap.d$word,ap.d$freq, scale=c(6,.2),min.freq=3,max.words=Inf, random.order=FALSE, rot.per=.15, colors=pal2)
> dev.off()
null device 
          1 
> 
> 
> 
> proc.time()
   user  system elapsed 
  68.50    1.62   73.38