Skip to content

Latest commit

 

History

History
1222 lines (1069 loc) · 62.7 KB

pilot.md

File metadata and controls

1222 lines (1069 loc) · 62.7 KB
title output
MA pilot study
html_document
df <- read_tsv(here("scripts/formatted_data.tsv")) #%>% 
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   X1 = col_double(),
##   id = col_double(),
##   author = col_character(),
##   text = col_character(),
##   time = col_double(),
##   blacklist_text = col_logical(),
##   event = col_character(),
##   workerID = col_character(),
##   treatment_group = col_double(),
##   blacklist_author = col_logical()
## )
df <- df %>% mutate(
    treatment_group=ifelse(workerID=='A3PRZRK9IC5CBI', 1, treatment_group),
    workerID=ifelse(workerID=='A6Y7SZU9L301W', 'A2D8LB2RPJXMSQ', workerID),
    treatment_group=ifelse(workerID=='A2D8LB2RPJXMSQ', 2, treatment_group),
    workerID=ifelse(workerID=='A2RCYLK072XXO', 'AKSJ3C5O3V9RB', workerID),
    treatment_group=ifelse(workerID=='AKSJ3C5O3V9RB', 1, treatment_group),
    treatment_group=ifelse(workerID=='A3MIDLO5S7FU06', 0, treatment_group),
    treatment_group=ifelse(workerID=='A2Z6NL0CTXY0ZB', 0, treatment_group)
)
# i want to know how many tweets each respondent has seen
agg_df <- df %>%
    group_by(workerID, treatment_group, event) %>%
    count()

Basic summary

n_installs <- df %>%
    filter(event == "install") %>%
    select(workerID) %>%
    distinct() %>%
    count()
n_users_active <- df %>%
    filter(event == "show") %>%
    select(workerID) %>%
    distinct() %>%
    count()
rate <- n_users_active / n_installs

There have been 82 unique users who installed the extension. Of these, 19 have used twitter since installation. The average active user per install rate is 0.2317073.

By treatment group

Installs by treatment group

agg_df %>%
    filter(event == "install") %>%
    group_by(treatment_group) %>%
    summarize(total_n = sum(n))
## # A tibble: 4 x 2
##   treatment_group total_n
##             <dbl>   <int>
## 1               0      30
## 2               1      34
## 3               2      29
## 4              NA       3

Tweets seen by treatment group

agg_df %>%
    filter(event == "show") %>%
    group_by(treatment_group) %>%
    summarize(total_n = sum(n))
## # A tibble: 3 x 2
##   treatment_group total_n
##             <dbl>   <int>
## 1               0   13927
## 2               1    4004
## 3               2    1686

Users who have seen any tweets by treatment group

agg_df %>%
    filter(event == "show") %>%
    group_by(treatment_group) %>%
    count()
## # A tibble: 3 x 2
## # Groups:   treatment_group [3]
##   treatment_group     n
##             <dbl> <int>
## 1               0     7
## 2               1     6
## 3               2     6

Tweets seen that were eligible for removal

df %>%
    filter(event %in% c("show", "hide")) %>%
    group_by(treatment_group, blacklist_text, blacklist_author) %>%
    summarize(users = n_distinct(workerID), tweets = length(workerID))
## `summarise()` has grouped output by 'treatment_group', 'blacklist_text'. You can override using the `.groups` argument.
## # A tibble: 10 x 5
## # Groups:   treatment_group, blacklist_text [5]
##    treatment_group blacklist_text blacklist_author users tweets
##              <dbl> <lgl>          <lgl>            <int>  <int>
##  1               0 FALSE          FALSE                7  13395
##  2               0 FALSE          TRUE                 2    323
##  3               0 TRUE           FALSE                1     92
##  4               0 TRUE           TRUE                 1    117
##  5               1 FALSE          FALSE                6   3988
##  6               1 FALSE          TRUE                 4      9
##  7               1 TRUE           FALSE                1      5
##  8               1 TRUE           TRUE                 3     15
##  9               2 FALSE          FALSE                6   1684
## 10               2 FALSE          TRUE                 1      2

Tweets hidden by treatment group

agg_df %>%
    filter(event == "hide") %>%
    group_by(treatment_group) %>%
    summarize(total_n = sum(n))
## # A tibble: 1 x 2
##   treatment_group total_n
##             <dbl>   <int>
## 1               1      13

Initial Results

question_map <- c()
# Cleaning / joining response data
presurvey_df <- read_tsv(here("scripts/qualtrics/presurvey.tsv"))
## 
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   StartDate = col_character(),
##   EndDate = col_datetime(format = ""),
##   IPAddress = col_character(),
##   RecordedDate = col_datetime(format = ""),
##   ResponseId = col_character(),
##   RecipientLastName = col_logical(),
##   RecipientFirstName = col_logical(),
##   RecipientEmail = col_logical(),
##   ExternalReference = col_logical(),
##   DistributionChannel = col_character(),
##   UserLanguage = col_character(),
##   Q16 = col_logical(),
##   ResponseID = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
presurvey_2_df <- read_tsv(here("scripts/qualtrics/presurvey_2.tsv"))
## 
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   StartDate = col_character(),
##   EndDate = col_character(),
##   IPAddress = col_character(),
##   RecordedDate = col_character(),
##   ResponseId = col_character(),
##   RecipientLastName = col_logical(),
##   RecipientFirstName = col_logical(),
##   RecipientEmail = col_logical(),
##   ExternalReference = col_logical(),
##   DistributionChannel = col_character(),
##   UserLanguage = col_character(),
##   Q16 = col_logical(),
##   ResponseID = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
presurvey_3_df <- read_tsv(here("scripts/qualtrics/presurvey_3.tsv")) %>% select(-SurveyCode)
## 
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   StartDate = col_character(),
##   EndDate = col_character(),
##   IPAddress = col_character(),
##   RecordedDate = col_character(),
##   ResponseId = col_character(),
##   RecipientLastName = col_logical(),
##   RecipientFirstName = col_logical(),
##   RecipientEmail = col_logical(),
##   ExternalReference = col_logical(),
##   DistributionChannel = col_character(),
##   UserLanguage = col_character(),
##   SurveyCode = col_character(),
##   Q16 = col_logical(),
##   ResponseID = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
turkers <- read_tsv(here("scripts/mturk_pilot_v1.tsv")) %>% select(WorkerId, Answer)
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   X1 = col_double(),
##   AssignmentId = col_character(),
##   WorkerId = col_character(),
##   HITId = col_character(),
##   AssignmentStatus = col_character(),
##   AutoApprovalTime = col_datetime(format = ""),
##   AcceptTime = col_datetime(format = ""),
##   SubmitTime = col_datetime(format = ""),
##   ApprovalTime = col_datetime(format = ""),
##   Answer = col_character()
## )
turkers_2 <- read_tsv(here("scripts/mturk_pilot_v2.tsv")) %>% select(WorkerId, Answer)
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   X1 = col_double(),
##   AssignmentId = col_character(),
##   WorkerId = col_character(),
##   HITId = col_character(),
##   AssignmentStatus = col_character(),
##   AutoApprovalTime = col_datetime(format = ""),
##   AcceptTime = col_datetime(format = ""),
##   SubmitTime = col_datetime(format = ""),
##   ApprovalTime = col_datetime(format = ""),
##   Answer = col_character(),
##   RejectionTime = col_datetime(format = ""),
##   RequesterFeedback = col_character()
## )
turkers_3 <- read_tsv(here("scripts/mturk_pilot_v3.tsv")) %>% select(WorkerId, Answer)
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   X1 = col_double(),
##   AssignmentId = col_character(),
##   WorkerId = col_character(),
##   HITId = col_character(),
##   AssignmentStatus = col_character(),
##   AutoApprovalTime = col_datetime(format = ""),
##   AcceptTime = col_datetime(format = ""),
##   SubmitTime = col_datetime(format = ""),
##   ApprovalTime = col_datetime(format = ""),
##   Answer = col_character()
## )
turkers_postsurvey <- read_tsv(here("scripts/mturk_postsurvey.tsv"))
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   X1 = col_double(),
##   AssignmentId = col_character(),
##   WorkerId = col_character(),
##   HITId = col_character(),
##   AssignmentStatus = col_character(),
##   AutoApprovalTime = col_datetime(format = ""),
##   AcceptTime = col_datetime(format = ""),
##   SubmitTime = col_datetime(format = ""),
##   ApprovalTime = col_datetime(format = ""),
##   Answer = col_character()
## )
postsurvey_df <- read_csv(here("scripts/qualtrics/postsurvey2.csv"))
## 
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   .default = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
pre_df <- rbind(presurvey_df, presurvey_2_df, presurvey_3_df)
turkers_df <- rbind(turkers, turkers_2, turkers_3)

turkers_df$ResponseID <- (str_match(
    turkers_df$Answer,
    "<Answer><QuestionIdentifier>surveycode</QuestionIdentifier><FreeText>(R_[[:alnum:]]+)</FreeText>"
))[, 2]
turkers_postsurvey$ResponseID <- (str_match(turkers_postsurvey$Answer, "<QuestionIdentifier>surveycode</QuestionIdentifier><FreeText>(.*)</FreeText>"))[, 2]
pre_df_joined <- inner_join(turkers_df, pre_df, by = "ResponseID") %>% distinct(WorkerId, .keep_all=TRUE)
post_df_joined <- inner_join(postsurvey_df, turkers_postsurvey, by = "ResponseID") %>% distinct(WorkerId, .keep_all=TRUE)

id_cols <- c("WorkerId")
demo_cols <- c("politics_interest", "pid3", "ideo", "age", "gender", "educ", "media_tv", "media_newspaper", "media_radio", "media_internet", "media_discussion")
outcome_cols <- c("thermo_dems", "thermo_reps", "dist_trump", "dist_biden", "perceived_1", "perceived_2", "perceived_3", "issue_immigration", "issue_climate", "issue_covid", "issue_ukraine", "media_bias", "media_trust")

pre_df_renamed <- rename(pre_df_joined,
    politics_interest = QID1,
    pid3 = Q2,
    ideo = Q3,
    age = Q4,
    gender = Q5,
    educ = Q6,
    media_tv = Q7_1,
    media_newspaper = Q7_2,
    media_radio = Q7_3,
    media_internet = Q7_4,
    media_discussion = Q7_5,
    thermo_dems = Q8_1,
    thermo_reps = Q8_2,
    dist_trump = Q10_1,
    dist_biden = Q10_2,
    perceived_1 = Q12_1,
    perceived_2 = Q12_2,
    perceived_3 = Q12_3,
    issue_immigration = Q13_1,
    issue_climate = Q13_2,
    issue_covid = Q13_3,
    issue_ukraine = Q13_4,
    media_bias = Q14,
    media_trust = Q15
) %>% select(c(id_cols, demo_cols, outcome_cols))

post_df_renamed <- rename(post_df_joined,
    thermo_dems = Q8_1,
    thermo_reps = Q8_2,
    dist_trump = Q10_1,
    dist_biden = Q10_2,
    perceived_1 = Q12_1,
    perceived_2 = Q12_2,
    perceived_3 = Q12_3,
    issue_immigration = Q13_1,
    issue_climate = Q13_2,
    issue_covid = Q13_3,
    issue_ukraine = Q13_4,
    media_bias = Q14,
    media_trust = Q15
) %>%
    select(c(id_cols, outcome_cols)) %>%
    left_join(
        pre_df_renamed %>% select("pid3", "ideo", "WorkerId"),
        by = "WorkerId"
    ) %>%
    mutate_at(outcome_cols, as.numeric)

create_indices <- function(df) {
    # Create the main outcome indices
    df %>% mutate(
        affective_index = case_when(
            pid3 == 1 ~ (thermo_reps / 100 + (7 - dist_trump) / 7) / 2,
            pid3 == 2 ~ (thermo_dems / 100 + (7 - dist_biden) / 7) / 2,
            TRUE ~ NA_real_
        ),
        issue_index = (abs(issue_immigration - 3) + abs(issue_climate - 3) + abs(issue_covid - 3) + abs(issue_ukraine - 3)) / 4 / 2,
        media_trust_index = (abs(3 - media_bias) * 3 / 2 + media_trust - 1) / 6,
        perceived_index = (perceived_1 + perceived_2 + perceived_3) / 3 / 5,
        
    )
    # TODO create the issue polarization index
}

pre_df_index <- create_indices(pre_df_renamed) %>% mutate(
    age=(age-1) * 10 + 23,
    educ=case_when(
        educ==1 ~ 6,
        educ==2 ~ 12,
        educ==3 ~ 14,
        educ==4 ~ 16,
        educ==5 ~ 20,           
    ),
    ideo=5-ideo
)

usage_df <- df %>%
    mutate(
        saw_fox = blacklist_text | blacklist_author,
        WorkerId=workerID
    ) %>%
    group_by(WorkerId) %>%
    summarize(
        installed = sum(event=='install'),
        total_tweets = sum(event=='show', na.rm=TRUE),
        total_fox_tweets = sum(saw_fox, na.rm=TRUE), 
        fox_tweets_accounts = sum(blacklist_author, na.rm=TRUE), 
        fox_tweets_links = sum(blacklist_text, na.rm=TRUE),
        total_hidden = sum(saw_fox & treatment_group != 0, na.rm=TRUE),
        total_link_hidden = sum(saw_fox & treatment_group == 1, na.rm=TRUE),
        total_account_hidden = sum(saw_fox & treatment_group == 2, na.rm=TRUE)
    )
pre_df_usage <- left_join(
    pre_df_index,
    usage_df,
    by = "WorkerId"
)

post_df_index <- create_indices(post_df_renamed) %>%
    select(WorkerId, affective_index, issue_index, media_trust_index, perceived_index)

survey_df <- left_join(post_df_index, pre_df_usage, by = "WorkerId", suffix = c("", ".post")) %>%
    left_join(agg_df %>% filter(event == "install") %>% mutate(WorkerId = workerID), by = "WorkerId")

survey_df$treated <- survey_df$treatment_group != 0
survey_df$social <- survey_df$treatment_group == 1
survey_df$gender <- as.factor(survey_df$gender)
levels(survey_df$gender) <- c('Male', "Female", "Other")

Descriptive Statistics

Overall 103 unique respondents completed the first round survey, of which 52 installed the extension. Of these, 12 used Twitter during the treatment period and 4 were exposed to Fox tweets or had Fox tweets hidden from view.

30 users, or 0.5769231 of the eligible respondents completed the endline survey, which was offered to any user who installed the extension.

# Balance check
library(cobalt)
library(ggpubr)
## Error: package or namespace load failed for 'ggpubr' in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]):
##  namespace 'broom' 0.7.1 is already loaded, but >= 0.7.4 is required
fmla <- treated ~ ideo + age + gender + educ + total_tweets + total_fox_tweets
t1 <- love.plot(fmla, 
    data=survey_df %>% filter(treatment_group %in% c(0,1)), 
    stats = c("mean.diffs"), 
    thresholds = c(m = 0.1, v = 2), 
    abs = FALSE, 
    binary = "std", 
    var.order = "unadjusted", 
    drop.distance=TRUE
) + theme(legend.position="none") + labs(title="")
## Note: 's.d.denom' not specified; assuming pooled.
t2 <- love.plot(fmla, 
    data=survey_df %>% filter(treatment_group %in% c(0,2)), 
    stats = c("mean.diffs"), 
    thresholds = c(m = 0.1, v = 2), 
    abs = FALSE, 
    binary = "std", 
    var.order = "unadjusted", 
    drop.distance=TRUE
) + theme(legend.position="none") + labs(title="")
## Note: 's.d.denom' not specified; assuming pooled.
t3 <- love.plot(social ~ ideo + age + gender + educ + total_tweets + total_fox_tweets, 
    data=survey_df %>% filter(treatment_group %in% c(1,2)), 
    stats = c("mean.diffs"), 
    thresholds = c(m = 0.1, v = 2), 
    abs = FALSE, 
    binary = "std", 
    var.order = "unadjusted", 
    drop.distance=TRUE
) + theme(legend.position="none") + labs(title="")
## Note: 's.d.denom' not specified; assuming pooled.
ggarrange(
    t1, t2, t3, labels=c("Links vs Control", "Accounts vs Control", "Links vs Accounts")
)
## Error in ggarrange(t1, t2, t3, labels = c("Links vs Control", "Accounts vs Control", : could not find function "ggarrange"
# Covariate balance table
means <- survey_df %>% group_by(treatment_group) %>% summarize(
    mean_ideo=mean(ideo),
    mean_educ=mean(educ),
    mean_age=mean(age),
    mean_gender=mean(as.numeric(gender) - 1),
    mean_total_tweets=mean(total_tweets),
    num_saw_tweets=sum(total_tweets > 0),
    num_saw_fox=sum(total_fox_tweets > 0),
    mean_total_fox_tweets=mean(total_fox_tweets),
    ideo_sd = sd(ideo),
    educ_sd = sd(educ),
    age_sd = sd(age),
    gender_sd = sd(as.numeric(gender) - 1),
    tt_sd = sd(total_tweets),
    fox_tt_sd = sd(total_fox_tweets),
    n = n(),
    ideo_se = ideo_sd / sqrt(n),
    educ_se = educ_sd / sqrt(n),
    age_se = age_sd / sqrt(n),
    gender_se = gender_sd / sqrt(n),
    tt_se = tt_sd / sqrt(n),
    fox_tt_se = fox_tt_sd / sqrt(n),
) %>% 
mutate_all(round, 2) %>%
mutate(
    Treatment=recode(treatment_group, "1"="Accounts", "2"="Link", "0"="Control"),
    Ideology=paste0(mean_ideo, " (", ideo_se, ")"),
    Education=paste0(mean_educ, " (", educ_se, ")"),
    Age=paste0(mean_age, " (", age_se, ")"),
    Female=paste0(mean_gender, " (", gender_se, ")"),
    Tweets_Seen=paste0(mean_total_tweets, " (", tt_se, ")"),
    Fox_Tweets_Seen=paste0(mean_total_fox_tweets, " (", fox_tt_se, ")"),
    Twitter_Users=round(num_saw_tweets, 0),
    Saw_Fox=round(num_saw_fox, 0),
    Count=round(n, digits=0)
) 
library(xtable)
print(xtable(means %>% select(Treatment, Count, Ideology, Education, Age, Female), type = "latex"), file = "covariate_means.tex")
print(xtable(means %>% select(Treatment, Twitter_Users, Saw_Fox, Tweets_Seen, Fox_Tweets_Seen), type = "latex"), file = "covariate_means_2.tex")
models <- list(
    lm(treated ~ ideo + educ + age + gender + total_tweets + total_fox_tweets, data=survey_df %>% filter(treatment_group %in% c(0, 1))),
    lm(treated ~ ideo + educ + age + gender + total_tweets + total_fox_tweets, data=survey_df %>% filter(treatment_group %in% c(0, 2))),
    lm(social ~ ideo + educ + age + gender + total_tweets + total_fox_tweets, data=survey_df %>% filter(treatment_group %in% c(1, 2)))
)
stargazer(
    models,
    type='html',
    out='covariate_balance.tex'
)
Dependent variable:
treatedsocial
(1)(2)(3)
ideo-0.047-0.038-0.061
(0.083)(0.085)(0.083)
educ0.1070.046-0.050
(0.091)(0.055)(0.067)
age0.0240.0070.015
(0.017)(0.024)(0.016)
genderFemale-0.120-0.1410.149
(0.285)(0.292)(0.270)
total_tweets0.00010.0070.001
(0.001)(0.013)(0.001)
total_fox_tweets-0.011-0.198-0.134
(0.140)(0.332)(0.317)
Constant-1.893-0.2910.706
(1.657)(1.207)(1.341)
Observations192021
R20.3300.2470.186
Adjusted R2-0.005-0.100-0.163
Residual Std. Error0.514 (df = 12)0.535 (df = 13)0.552 (df = 14)
F Statistic0.986 (df = 6; 12)0.711 (df = 6; 13)0.532 (df = 6; 14)
Note:*p<0.1; **p<0.05; ***p<0.01
tweets_seen_plot <- pre_df_usage %>% filter(installed > 0) %>% 
    ggplot(aes(x=total_tweets)) + 
        geom_histogram() +
        theme_minimal() +
        labs(title="Tweets seen by respondents", x='Tweets seen', y='Num. Respondents')
fox_tweets_seen_plot <- pre_df_usage %>% filter(installed > 0) %>% 
    ggplot(aes(x=total_fox_tweets)) + 
        geom_histogram() +
        theme_minimal() +
        labs(title="Fox News Tweets seen by respondents", x='Tweets seen', y='Num. Respondents')
         
ggarrange(tweets_seen_plot, fox_tweets_seen_plot)
## Error in ggarrange(tweets_seen_plot, fox_tweets_seen_plot): could not find function "ggarrange"
ggsave(paste0("tweets.png"), width = 10, height = 5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ITT Results

# Plot
library(ggallin)
library(knitr)

plot_effects <- function(models, title, filename, covariate_labels) {
    effects <- models %>% map_dbl(~ .x$coefficients[2])
    se <- lapply(models, get_robust_se) %>% map_dbl(~ .x[2])
    effect_df <- data.frame(estimate = effects, se = se, var = c("Affective Polarization", "Issue Polarization", "Perceived Polarization", "Trust in Media")) %>%
        mutate(
            lo = estimate - 1.96 * se,
            hi = estimate + 1.96 * se
        )
    ggplot(effect_df, aes(xmin = lo, xmax = hi, x = estimate, y = var)) +
        geom_point() +
        geom_errorbarh(height = 0.1) +
        theme_minimal() +
        theme(
            panel.grid.minor = element_blank(),
            panel.grid.major.y = element_blank(),
            axis.title.x = element_blank(),
            axis.title.y = element_blank()
        ) +
        labs(
            title = title
        )
    ggsave(paste0(filename, ".png"), width = 10, height = 5)
    stargazer(
        models,
        type = "html",
        title = title,
        style = "default",
        se = lapply(models, get_robust_se),
        covariate.labels = covariate_labels,
        header = FALSE,
        object.names = FALSE,
        model.names = FALSE,
        dep.var.labels = c("Affective Polarization", "Issue Polarization", "Perceived Polarization", "Trust in Media"),
        font.size="tiny",
        out = paste0(filename, ".tex")
    )
}
models <- list(
    lm(affective_index.post ~ treated + affective_index + ideo + age + gender + educ, data = survey_df),
    lm(issue_index.post ~ treated + issue_index + ideo + age + gender + educ, data = survey_df),
    lm(perceived_index.post ~ treated + perceived_index + ideo + age + gender + educ, data = survey_df),
    lm(media_trust_index.post ~ treated + media_trust_index + ideo + age + gender + educ, data = survey_df)
)
covariate_labels <- c(
    "Treatment",
    "Affective Polarization Index",
    "Issue Polarization Index",
    "Perceived Polarization Index",
    "Media Trust Index",
    "Ideology",
    "Age",
    "Gender",
    "Education (Years)"
)

plot_effects(models, "Fox News Removal Effects (ITT)", "results_main_itt", covariate_labels)
Fox News Removal Effects (ITT)
Dependent variable:
Affective PolarizationIssue PolarizationPerceived PolarizationTrust in Media
(1)(2)(3)(4)
Treatment0.0420.024-0.094-0.099
(0.042)(0.073)(0.090)(0.087)
Affective Polarization Index0.896***
(0.100)
Issue Polarization Index0.877***
(0.184)
Perceived Polarization Index0.322
(0.239)
Media Trust Index0.733***
(0.163)
Ideology-0.0070.034*0.005-0.030
(0.012)(0.020)(0.024)(0.031)
Age-0.00020.0050.005*-0.003
(0.003)(0.007)(0.003)(0.005)
Gender0.039-0.159*0.0530.059
(0.059)(0.085)(0.078)(0.068)
Education (Years)0.0170.0070.029**0.027**
(0.012)(0.015)(0.014)(0.013)
Constant-0.206-0.318-0.118-0.042
(0.217)(0.321)(0.340)(0.263)
Observations25252830
R20.7930.5660.3180.473
Adjusted R20.7240.4210.1230.335
Residual Std. Error0.115 (df = 18)0.180 (df = 18)0.176 (df = 21)0.199 (df = 23)
F Statistic11.477*** (df = 6; 18)3.913** (df = 6; 18)1.630 (df = 6; 21)3.436** (df = 6; 23)
Note:*p<0.1; **p<0.05; ***p<0.01
filtered_df <- survey_df %>% filter(treated == TRUE)

models <- list(
    lm(affective_index.post ~ social + affective_index + ideo + age + gender + educ, data = filtered_df),
    lm(issue_index.post ~ social + issue_index + ideo + age + gender + educ, data = filtered_df),
    lm(perceived_index.post ~ social + perceived_index + ideo + age + gender + educ, data = filtered_df),
    lm(media_trust_index.post ~ social + media_trust_index + ideo + age + gender + educ, data = filtered_df)
)
plot_effects(models, "Fox News Removal Effects: Link vs Account Based Removal (ITT)", "results_social_itt", covariate_labels)
Fox News Removal Effects: Link vs Account Based Removal (ITT)
Dependent variable:
Affective PolarizationIssue PolarizationPerceived PolarizationTrust in Media
(1)(2)(3)(4)
Treatment0.091-0.0900.020-0.032
(0.063)(0.075)(0.079)(0.100)
Affective Polarization Index0.857***
(0.195)
Issue Polarization Index0.883***
(0.190)
Perceived Polarization Index0.444*
(0.265)
Media Trust Index0.802***
(0.242)
Ideology0.0020.019-0.017-0.037
(0.017)(0.027)(0.029)(0.040)
Age-0.0010.0070.005-0.006
(0.004)(0.008)(0.004)(0.006)
Gender0.024-0.099-0.0240.010
(0.102)(0.100)(0.086)(0.090)
Education (Years)0.0300.0210.0110.022
(0.023)(0.014)(0.017)(0.019)
Constant-0.389-0.5500.0680.080
(0.422)(0.400)(0.400)(0.419)
Observations17182021
R20.7660.5640.3940.506
Adjusted R20.6260.3270.1140.294
Residual Std. Error0.139 (df = 10)0.194 (df = 11)0.168 (df = 13)0.227 (df = 14)
F Statistic5.470*** (df = 6; 10)2.376 (df = 6; 11)1.408 (df = 6; 13)2.387* (df = 6; 14)
Note:*p<0.1; **p<0.05; ***p<0.01

CACE Effects

I don't think classic CACE estimation is required/appropriate for this experiment.

First, our respondent pool is limited only to those who installed the extension.

We can think of treatment-receivers as those who actually used Twitter and had Fox tweets hidden from them We can think of nontreatment-receivers as those who used Twitter regularly and would have had tweets hidden from them if they had been assigned to treatment eg, they saw some Fox tweets

We can identify these subgroups precisely using the logging data. blacklist_text == true if the content contained a fox link. unfortunately, we have to manually figure out if the account name is in the list of blacklisted authors

We can also identify a different kind of ITT effect on active twitter users broadly, if we want to.

We can also estimate a different kind of CACE effect by the amount of tweets hidden :)

complier_df <- survey_df %>% filter(total_fox_tweets > 0)

models <- list(
    lm(affective_index.post ~ treated + affective_index + ideo + age + gender + educ, data = complier_df),
    lm(issue_index.post ~ social + issue_index + ideo + age + gender + educ, data = complier_df),
    lm(perceived_index.post ~ treated + perceived_index + ideo + age + gender + educ, data = complier_df),
    lm(media_trust_index.post ~ treated + media_trust_index + ideo + age + gender + educ, data = complier_df)
)

plot_effects(models, "Fox News Removal Effects (CACE)", "results_main_cace", covariate_labels)
## Warning: Removed 4 rows containing missing values (geom_errorbarh).
Fox News Removal Effects (CACE)
Dependent variable:
Affective PolarizationIssue PolarizationPerceived PolarizationTrust in Media
(1)(2)(3)(4)
Treatment0.012-0.0670.000
Affective Polarization Index1.439
Issue Polarization Index-0.313
Perceived Polarization Index-0.500
Media Trust Index0.333
Ideology0.929
Age0.1250.0000.030
Gender
Education (Years)
educ
Constant-0.0440.5620.644-0.048
Observations3444
R21.0001.0001.0001.000
Note:*p<0.1; **p<0.05; ***p<0.01
filtered_complier_df <- complier_df %>% filter(treated)
# models <- list(
#    lm(affective_index.post ~ social + affective_index + ideo + age + gender + educ, data = filtered_complier_df),
#    lm(perceived_index.post ~ social + perceived_index + ideo + age + gender + educ, data = filtered_complier_df),
#    lm(media_trust_index.post ~ social + media_trust_index + ideo + age + gender + educ, data = filtered_complier_df)
# )

plot_effects(models, "Fox News Removal Effects: Link vs Account Based Removal (CACE)", "results_social_cace", covariate_labels)
## Warning: Removed 4 rows containing missing values (geom_errorbarh).
Fox News Removal Effects: Link vs Account Based Removal (CACE)
Dependent variable:
Affective PolarizationIssue PolarizationPerceived PolarizationTrust in Media
(1)(2)(3)(4)
Treatment0.012-0.0670.000
Affective Polarization Index1.439
Issue Polarization Index-0.313
Perceived Polarization Index-0.500
Media Trust Index0.333
Ideology0.929
Age0.1250.0000.030
Gender
Education (Years)
educ
Constant-0.0440.5620.644-0.048
Observations3444
R21.0001.0001.0001.000
Note:*p<0.1; **p<0.05; ***p<0.01

Per Tweet Effects

Finally, we can actually estimate per-tweet effects.

We kinda want to compare users who had 5 tweets hidden to users with 5 tweets shown to them right, etc.?

is this the interaction effect of treatment on # tweets saw?

right, the hypothesis is that the outcome effect is stronger the higher the number of tweets hidden right so we might expect

  1. the effect of # tweets to be positive on polarization (or zero)
  2. the interaction term of # tweets with treatment to be negative
  3. the constant effect of treatment to be negative (or zero)
  • then, the value of coefficient 1. estimates the polarizing effect of seeing a fox news tweet. and the coefficient 2. estimates the de-polarizing effect of not seeing a fox news tweeet, when you would have seen one normally.

control for total number of tweets seen?

no, this is tricky.

Uhhhh 'everything equal', seeing one additional tweet in control group leads to X effect 'everything equal', having one additional tweet hidden leads to Y effect

Alternatively, you just operationalize it as "number of tweets hidden" so everyone in control just has value 0 This seems simpler honestly I kinda prefer that

models <- list(
    lm(affective_index.post ~ total_hidden + affective_index + ideo + age + gender + educ, data = survey_df),
    lm(issue_index.post ~ total_hidden + issue_index + ideo + age + gender + educ, data = survey_df),
    lm(perceived_index.post ~ total_hidden + perceived_index + ideo + age + gender + educ, data = survey_df),
    lm(media_trust_index.post ~ total_hidden + media_trust_index + ideo + age + gender + educ, data = survey_df)
)
plot_effects(
    models, 
    "Fox News Per-Tweet Effects",
    "results_main_per_tweet",
    c(
        "Num. Tweets Hidden",
        "Affective Polarization Index",
        "Perceived Polarization Index",
        "Media Trust Index",
        "Ideology",
        "Age",
        "Gender",
        "Education (Years)"
    )
)
Fox News Per-Tweet Effects
Dependent variable:
Affective PolarizationIssue PolarizationPerceived PolarizationTrust in Media
(1)(2)(3)(4)
Num. Tweets Hidden0.041*-0.0150.011-0.010
(0.021)(0.011)(0.008)(0.011)
Affective Polarization Index0.880***
(0.100)
Perceived Polarization Index0.823***
(0.220)
Media Trust Index0.401*
(0.241)
Ideology0.687***
(0.161)
Age-0.0070.035*0.008-0.021
(0.014)(0.021)(0.024)(0.029)
Gender0.00050.0040.005-0.005
(0.003)(0.007)(0.004)(0.006)
Education (Years)0.032-0.170**0.0590.062
(0.054)(0.079)(0.077)(0.071)
educ0.021*0.0160.0200.024
(0.012)(0.017)(0.017)(0.015)
Constant-0.256-0.341-0.1170.019
(0.196)(0.320)(0.354)(0.301)
Observations25252830
R20.7930.5810.2930.453
Adjusted R20.7240.4410.0910.311
Residual Std. Error0.115 (df = 18)0.177 (df = 18)0.179 (df = 21)0.202 (df = 23)
F Statistic11.503*** (df = 6; 18)4.159*** (df = 6; 18)1.450 (df = 6; 21)3.177** (df = 6; 23)
Note:*p<0.1; **p<0.05; ***p<0.01
models <- list(
    lm(affective_index.post ~ total_account_hidden + total_link_hidden + affective_index + ideo + age + gender + educ, data = survey_df),
    lm(issue_index.post ~ total_account_hidden + total_link_hidden + issue_index + ideo + age + gender + educ, data = survey_df),
    lm(perceived_index.post ~ total_account_hidden + total_link_hidden + perceived_index + ideo + age + gender + educ, data = survey_df),
    lm(media_trust_index.post ~ total_account_hidden + total_link_hidden + media_trust_index + ideo + age + gender + educ, data = survey_df)
)
plot_effects(
    models, 
    "Fox News Per-Tweet Account vs Link Effects",
    "results_social_per_tweet",
    c(
        "Num. Tweets Hidden",
        "Affective Polarization Index",
        "Issue Polarization Index",
        "Perceived Polarization Index",
        "Media Trust Index",
        "Ideology",
        "Age",
        "Gender",
        "Education (Years)"
    )
)
Fox News Per-Tweet Account vs Link Effects
Dependent variable:
Affective PolarizationIssue PolarizationPerceived PolarizationTrust in Media
(1)(2)(3)(4)
Num. Tweets Hidden0.054**0.0190.108**-0.089*
(0.027)(0.053)(0.049)(0.048)
Affective Polarization Index-0.012-0.0150.009-0.009
(0.084)(0.012)(0.007)(0.011)
Issue Polarization Index0.862***
(0.098)
Perceived Polarization Index0.849***
(0.249)
Media Trust Index0.475*
(0.246)
Ideology0.701***
(0.166)
Age-0.0060.037*0.012-0.026
(0.015)(0.022)(0.025)(0.032)
Gender0.0010.0040.005-0.006
(0.004)(0.007)(0.004)(0.006)
Education (Years)0.035-0.166*0.0660.054
(0.056)(0.085)(0.079)(0.074)
educ0.0200.0150.0210.022
(0.013)(0.017)(0.018)(0.016)
Constant-0.266-0.370-0.2140.070
(0.208)(0.313)(0.394)(0.320)
Observations25252830
R20.7950.5830.3250.466
Adjusted R20.7100.4120.0880.296
Residual Std. Error0.118 (df = 17)0.182 (df = 17)0.180 (df = 20)0.205 (df = 22)
F Statistic9.402*** (df = 7; 17)3.402** (df = 7; 17)1.374 (df = 7; 20)2.742** (df = 7; 22)
Note:*p<0.1; **p<0.05; ***p<0.01
a_effects <- models %>% map_dbl(~ .x$coefficients[2])
l_effects <- models %>% map_dbl(~ .x$coefficients[3])
a_se <- lapply(models, get_robust_se) %>% map_dbl(~ .x[2])
l_se <- lapply(models, get_robust_se) %>% map_dbl(~ .x[3])
vars = c("Affective Polarization", "Issue Polarization", "Perceived Polarization", "Trust in Media")
account_effects <- data.frame(estimate = a_effects, se=a_se, var=vars, type="account")
link_effects <- data.frame(estimate = l_effects, se=l_se, var=vars, type="link")
effect_df <- rbind(account_effects, link_effects) %>%
    mutate(
        lo = estimate - 1.96 * se,
        hi = estimate + 1.96 * se
    )
ggplot(effect_df, aes(xmin = lo, xmax = hi, x = estimate, y = var, group=type)) +
    geom_point() +
    geom_errorbarh(height = 0.1) +
    theme_minimal() +
    theme(
        panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank()
    ) +
    labs(
        title = title
    )

plot of chunk unnamed-chunk-22