-
Notifications
You must be signed in to change notification settings - Fork 0
/
example_1.R
31 lines (25 loc) · 1.31 KB
/
example_1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
## First Topic Models
##
## This file contains all that you need to setup and run your first topic model.
## It will build a 50 topic model from the set of AP articles distributed with
## the original lda-c implementation by David Blei.
## See https://www.cs.princeton.edu/~blei/lda-c/index.html
source("functions/lda.R")
source("functions/import.R")
# The directory from which to import data. The example includes the AP dataset, but
# you can easily run this with your own data by creating a new directory and adding
# the documents that you would like to model as .txt files, with one file per/document
data.dir <- "data/ap"
# This loads the documents from the directory above in a format that can be used
# with Mallet.
docs <- loadDocuments(data.dir);
# Specify a set of stop-words, or commonly used words to be removed from the documents
# in order to improve model performance.
stoplist <- "stop-words/stop-words_english_3_en.txt"
# Train a document model with 50 topics. This will run Mallet over the documents
# from data.dir and store the results along with some supporting information
# in a convenient data structure
model <- trainSimpleLDAModel(docs, 50, stoplist=stoplist)
# Print the resulting topics as wordclouds for easy visualization.
print("printing topic word clouds")
plotTopicWordcloud(model, verbose=T)