library(tidyverse) library(stringr) # Do fancy things with strings # Load data isms_raw <- read_csv("data/isms_full.csv") %>% # Make nicer column names rename(word = `WORD/PHRASE`, coldwar_count = `TOKENS 1`, today_count = `TOKENS 2`, coldwar_permil = `PM 1`, today_permil = `PM 2`) %>% # Recapitalize the word so that only the first letter is capitalized # This function comes from stringr mutate(word = str_to_title(word)) # Make tidy isms_top <- isms_raw %>% # Don't need these columns select(-ID, -RATIO) %>% # Sort by the per million value arrange(desc(coldwar_permil)) %>% # Only look at rows 1-5 slice(1:5) %>% # Gather the count and per million variables into one long column gather(key, value, contains("count"), contains("permil")) %>% # Split the key column into decade and vartype separate(key, c("decade", "vartype")) %>% # Spread vartype across two columns spread(vartype, value) write_csv(isms_top, "data/isms_top5.csv")