Analyzing data for #tidytuesday week of 1/22/2019 (source)

# LOAD PACKAGES AND PARSE DATA
library(tidyverse)
library(scales)
library(lubridate)
library(RColorBrewer)

prison_raw <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-22/prison_population.csv")

prison <- prison_raw

Process the raw data

total <- prison %>%
  filter(pop_category != 'Total' & pop_category != 'Male' & pop_category != 'Female') %>% 
  select(county_name, urbanicity, pop_category, population, prison_population) %>%
  na.omit() %>% 
  group_by(county_name, urbanicity, pop_category) %>%
  summarize(population = sum(population),
            prison_population = sum(prison_population)) %>%
  ungroup() %>%
  group_by(county_name, urbanicity) %>%
  mutate(pct_population = population / sum(population),
         pct_prisoner = prison_population / sum(prison_population))

What is the proportion of population:prisoners per demographic group ?

total %>%
  filter(pop_category != 'Other') %>%
  ggplot() + 
  geom_point(aes(pct_population, pct_prisoner),
             alpha = 0.1, size = 2, color = 'grey') +
  geom_smooth(aes(pct_population, pct_prisoner, color = pop_category),
              size = 1.2,
             se = FALSE) +
  theme_light() +
  scale_y_continuous(labels = percent_format()) +
  scale_x_continuous(labels = percent_format()) +
  labs(x = "County Population",
       y = "Prisoner Population",
       color = "",
       title = "Comparison of county to prison population by ethnicity from 1970 to 2016",
       subtitle = "Specific groups are overrepresented in the prisoner population",
       caption = "Source: Vera Institute of Justice") +
  geom_abline(linetype = 'dashed') +
  scale_color_brewer(palette = 'Set1') +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        legend.position = 'top',
        panel.background = element_rect(fill = 'gray97',
                                        color = 'gray97',
                                        size = 0.5, linetype = 'solid'))

Does urbanicity play a role ?

Answer: variations between different races but long answer short…not really.

total %>%
  filter(pop_category != 'Other') %>%
  ggplot() + 
  geom_point(aes(pct_population, pct_prisoner),
             alpha = 0.1, size = 2, color = 'grey') +
  geom_smooth(aes(pct_population, pct_prisoner, color = urbanicity),
              se = FALSE) +
  theme_light() +
  scale_y_continuous(labels = percent_format()) +
  scale_x_continuous(labels = percent_format()) +
  labs(x = "County Population (%)",
       y = "Prisoner Population (%)",
       color = "Urbanicity") +
  facet_wrap(~pop_category) +
  geom_abline(linetype = 'dashed')