--- title: "Geographic comparison between Paranoia Study recruitment and CloudResearch recruitment" author: "Praveen Suthaharan" date: "1/14/2021" output: html_document --- ## Load libraries ```{r setup, include=FALSE} # install.packages("DT") # install.packages("rpivotTable") # install.packages("ggplot2") # install.packages("plotly") # install.packages("openintro") # install.packages("highcharter") # install.packages("ggvis") # install.packages("lessR") # install.packages("rstatix") library(flexdashboard) library(knitr) library(DT) library(rpivotTable) library(ggplot2) library(plotly) library(dplyr) library(openintro) library(highcharter) library(ggvis) require(gridExtra) library(grid) library(png) library(lessR) library(ggpubr) library(plyr) ``` ## Load data ```{r} pandemic <- read.csv('C:/Users/ps967/Desktop/dashboard/pandemic.csv') pandemic.CR <- read.csv('C:/Users/ps967/Desktop/dashboard/pandemic_cloudresearch.csv') ``` ## Geographic map analysis of Paranoia Study ```{r} ## Paranoia study # summarize data into proportions of individuals recruited per pandemic period per state df.states <- ddply(df, .(period), summarise, perc=prop.table(table(state))*100, State=names(table(state))) # convert state abbreviations into full names df.states$State <- abbr2state(df.states$State) # subset data into pandemic group df.states.pre <- df.states[which(df.states$period == "Pre-lockdown"),] df.states.lock <- df.states[which(df.states$period == "Lockdown"),] df.states.reopen <- df.states[which(df.states$period == "Reopening"),] # Map 1: US distribution of MTurkers recruited pre-pandemic highchart() %>% hc_title(text = "Percentage of US MTurk completers") %>% hc_subtitle(text = "Period: pre-pandemic") %>% hc_add_series_map(usgeojson, df.states.pre, name = "State", value = "perc", joinBy = c("woename","State")) %>% hc_colorAxis(minColor = "#B7FFBF", maxColor = "#00AB08") %>% hc_mapNavigation(enabled=T) # Map 2: US distribution of MTurkers recruited during lockdown highchart() %>% hc_title(text = "Percentage of US MTurk completers") %>% hc_subtitle(text = "Period: lockdown") %>% hc_add_series_map(usgeojson, df.states.lock, name = "State", value = "perc", joinBy = c("woename","State")) %>% hc_colorAxis(minColor = "#B7FFBF", maxColor = "#00AB08") %>% hc_mapNavigation(enabled=T) # Map 3: US distribution of MTurkers recruited during reopening highchart() %>% hc_title(text = "Percentage of US MTurk completers") %>% hc_subtitle(text = "Period: reopening") %>% hc_add_series_map(usgeojson, df.states.reopen, name = "State", value = "perc", joinBy = c("woename","State")) %>% hc_colorAxis(minColor = "#B7FFBF", maxColor = "#00AB08") %>% hc_mapNavigation(enabled=T) ``` ## Geographic map analysis of CloudResearch Study ```{r} # convert state abbreviations into full names pandemic.CR$State <- abbr2state(pandemic.CR$state) # subset data into pandemic group pandemic.CR.pre <- pandemic.CR[which(pandemic.CR$period == "Pre-lockdown"),] pandemic.CR.lock <- pandemic.CR[which(pandemic.CR$period == "Lockdown"),] pandemic.CR.reopen <- pandemic.CR[which(pandemic.CR$period == "Reopening"),] # Map 1: US distribution of MTurkers recruited by CloudResearch during the pre-pandemic period highchart() %>% hc_title(text = "Mean US MTurk completers") %>% hc_subtitle(text = "Period: pre-pandemic") %>% hc_add_series_map(usgeojson, pandemic.CR.pre, name = "State", value = "mean", joinBy = c("woename","State")) %>% hc_mapNavigation(enabled=T) # Map 2: US distribution of MTurkers recruited by CloudResearch during the lockdown period highchart() %>% hc_title(text = "Mean US MTurk completers") %>% hc_subtitle(text = "Period: lockdown") %>% hc_add_series_map(usgeojson, pandemic.CR.lock, name = "State", value = "mean", joinBy = c("woename","State")) %>% hc_mapNavigation(enabled=T) # Map 3: US distribution of MTurkers recruited by CloudResearch during the reopening period highchart() %>% hc_title(text = "Mean US MTurk completers") %>% hc_subtitle(text = "Period: reopening") %>% hc_add_series_map(usgeojson, pandemic.CR.reopen, name = "State", value = "mean", joinBy = c("woename","State")) %>% hc_mapNavigation(enabled=T) ``` ```{r} # Regional recruitment did not differ across our study periods df.region <- ddply(df, .(period), summarise, count=table(region), region=names(table(region))) # Count of US MTurkers per region; G1 = pre-lockdown G1_Mid=43 G1_North=51 G1_South=77 G1_West=31 # Count of US MTurkers per region; G2 = lockdown G2_Mid=48 G2_North=51 G2_South=87 G2_West=45 # Count of US MTurkers per region; G3 = reopening G3_Mid=32 G3_North=32 G3_South=65 G3_West=43 regionG = data.frame(G1 = c(G1_Mid,G1_North,G1_South,G1_West), G2 = c(G2_Mid,G2_North,G2_South,G2_West), G3 = c(G3_Mid,G3_North,G3_South,G3_West)) rownames(regionG) <- c("Midwest","Northeast","South","West") colnames(regionG) <- c("Pre-lockdown","Lockdown","Reopening") regionG chisq.test(regionG, correct = F) ```