Text mine text files for top tesms
text_mine(x, col = NULL, token = NULL, n_gram = NULL, lower = TRUE, top = NULL, title = NULL, x_label = NULL, y_label = NULL, viz = NULL)
x | a data frame |
---|---|
col | the column to be text mined, quoted |
token | either words or ngrams. If ngrams set n_gram value, character |
n_gram | Number of words to split as tokens e.g 2, numeric |
lower | convert text to lower case |
top | the number of items to include, numeric |
title | chart title, quoted |
x_label | chart x axis label, quoted |
y_label | chart y axis label, quoted |
viz | pass to ggplot to create a stacked bar chart (default is NULL) |
plot
The text mining functions are adapted from Silge & Robinson `Text Mining with R``
# NOT RUN { keywords_rank <- text_mine(lens, col = "keywords", top = 20) %>% print() text_mine(lens, col = "keywords", top = 20, viz = TRUE) title_ngrams <- text_mine(lens, col = "title", top = 20, token = "ngrams", n_gram = 2, title = "Top Terms in Title", x_label = "Terms", y_label = "Publication Count") text_mine(lens, col = "title", top = 20, token = "ngrams", n_gram = 2, title = "Top Terms in Title", x_label = "Terms", y_label = "Publication Count", viz = TRUE) abstract_ngrams <- text_mine(lens, col = "abstract", top = 20, token = "ngrams", n_gram = 2, title = "Top Terms in Title", x_label = "Terms", y_label = "Publication Count") text_mine(lens, col = "abstract", top = 20, token = "ngrams", n_gram = 2, title = "Top Terms in Title", x_label = "Terms", y_label = "Publication Count", viz = TRUE) title_words <- text_mine(lens, col = "title", top = 20, token = "words", title = "Top Terms in Title", x_label = "Terms", y_label = "Publication Count") text_mine(lens, col = "title", top = 20, token = "words", title = "Top Terms in Title", x_label = "Terms", y_label = "Publication Count", viz = TRUE) abstract_words <- text_mine(lens, col = "abstract", top = 20, token = "words", title = "Top Terms in Title", x_label = "Terms", y_label = "Publication Count") text_mine(lens, col = "abstract", top = 20, token = "words", title = "Top Terms in Title", x_label = "Terms", y_label = "Publication Count", viz = TRUE) texts_words <- text_mine(texts, col = "text", top = 20, token = "words", title = "Top Words in Titles and Abstracts", x_label = "Terms", y_label = "Publication Count") texts_ngrams <- text_mine(texts, col = "text", top = 20, token = "ngrams", n_gram = 2, title = "Top Words in Titles and Abstracts", x_label = "Terms", y_label = "Publication Count") # }