A word cloud is a graphical representation of frequently used words in a collection of text files. The height of each word in this picture is an indication of frequency of occurrence of the word in the entire text.
Code:
| ||
| worldCloud <- function() | ||
| { | ||
| library('ctv') | ||
| #path: (./corpus/target) | ||
| cname <- file.path(".","corpus","target") | ||
| library (tm) | ||
| docs <- Corpus(DirSource(cname)) | ||
| library (SnowballC) | ||
| #replacing '/' and '@' with a whitespace | ||
| for (j in seq(docs)) | ||
| { | ||
| docs[[j]] <- gsub("/"," ",docs[[j]]) | ||
| docs[[j]] <- gsub("@"," ",docs[[j]]) | ||
| } | ||
| docs <- tm_map(docs,tolower) | ||
| docs <- tm_map(docs, PlainTextDocument) | ||
| docs <- tm_map(docs,removeWords, stopwords("english")) | ||
| docs <- tm_map(docs,removeNumbers) | ||
| docs <- tm_map(docs,removePunctuation) | ||
| docs <- tm_map(docs,stripWhitespace) | ||
| dtm <- DocumentTermMatrix(docs) | ||
| library(wordcloud) | ||
| m <- as.matrix(dtm) | ||
| v <- sort(colSums(m),decreasing=TRUE) | ||
| head(v,14) | ||
| words <- names(v) | ||
| d <- data.frame(word=words, freq=v) | ||
| wordcloud(d$word,d$freq,min.freq=50) | ||
| return | ||
| } |
