-
Notifications
You must be signed in to change notification settings - Fork 0
/
WordCloudsPerYear.R
44 lines (38 loc) · 1.33 KB
/
WordCloudsPerYear.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# Create word clouds per year
source("ExtractPubAbstractTitle.R")
# Create word cloud per year -----------------------------------------------------------------------
contents <- contents %>%
filter(as.integer(year) >= 2014)
# Install and load the necessary packages
install.packages("tm")
install.packages("wordcloud")
install.packages("RColorBrewer")
library(tm)
library(wordcloud)
library(RColorBrewer)
for (year in unique(contents$year)) {
texts <- contents %>%
filter(year == !!year) %>%
mutate(text = paste(title, abstract)) %>%
pull(text)
corpus <- Corpus(VectorSource(texts))
corpus <- tm_map(corpus, content_transformer(tolower))
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, removeWords, stopwords("english"))
tdm <- TermDocumentMatrix(corpus)
m <- as.matrix(tdm)
word_freqs <- sort(rowSums(m), decreasing=TRUE)
df <- data.frame(word=names(word_freqs), freq=word_freqs)
png(file.path("yearlyWordClouds", sprintf("words_%s.png", year)), width = 800, height = 800)
par(mar = c(0, 0, 0, 0))
wordcloud(words = df$word,
freq = df$freq,
min.freq = 1,
max.words=200,
random.order=FALSE,
rot.per=0.35,
scale = c(3, 1.5),
colors=brewer.pal(8, "Dark2"))
dev.off()
}