TidyTuesday: rtweet Data
Analyzing data for #tidytuesday week of 01/01/2019 (source) # LOAD PACKAGES AND PARSE DATA library(tidyverse) library(scales) library(RColorBrewer) library(forcats) library(tidytext) library(topicmodels) tweets_raw <- as_tibble(readRDS("rstats_tweets.rds")) Parse data and identify top users # IDEA BEHIND THIS IS TO FILTER OUT BOTS # FIND TOP USERS top_interactions <- tweets_raw %>% select(screen_name, favorite_count, retweet_count) %>% group_by(screen_name) %>% summarize(favorite = sum(favorite_count), retweet = sum(retweet_count)) %>% group_by(screen_name) %>% mutate(total = sum(favorite, retweet)) %>% arrange(desc(total)) %>% head(12) # JOIN TOP USERS WITH RAW DATASET tweets <- tweets_raw %>% inner_join(top_interactions, by='screen_name') # FINAL DATA PROCESSING tweets_parsed <- tweets %>% select(screen_name, text) %>% group_by(screen_name) %>% unnest_tokens(word, text) %>% anti_join(stop_words) %>% filter(!...