title | output |
---|---|
MA pilot study |
html_document |
df <- read_tsv(here("scripts/formatted_data.tsv")) #%>%
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## X1 = col_double(),
## id = col_double(),
## author = col_character(),
## text = col_character(),
## time = col_double(),
## blacklist_text = col_logical(),
## event = col_character(),
## workerID = col_character(),
## treatment_group = col_double(),
## blacklist_author = col_logical()
## )
df <- df %>% mutate(
treatment_group=ifelse(workerID=='A3PRZRK9IC5CBI', 1, treatment_group),
workerID=ifelse(workerID=='A6Y7SZU9L301W', 'A2D8LB2RPJXMSQ', workerID),
treatment_group=ifelse(workerID=='A2D8LB2RPJXMSQ', 2, treatment_group),
workerID=ifelse(workerID=='A2RCYLK072XXO', 'AKSJ3C5O3V9RB', workerID),
treatment_group=ifelse(workerID=='AKSJ3C5O3V9RB', 1, treatment_group),
treatment_group=ifelse(workerID=='A3MIDLO5S7FU06', 0, treatment_group),
treatment_group=ifelse(workerID=='A2Z6NL0CTXY0ZB', 0, treatment_group)
)
# i want to know how many tweets each respondent has seen
agg_df <- df %>%
group_by(workerID, treatment_group, event) %>%
count()
n_installs <- df %>%
filter(event == "install") %>%
select(workerID) %>%
distinct() %>%
count()
n_users_active <- df %>%
filter(event == "show") %>%
select(workerID) %>%
distinct() %>%
count()
rate <- n_users_active / n_installs
There have been 82 unique users who installed the extension. Of these, 19 have used twitter since installation. The average active user per install rate is 0.2317073.
Installs by treatment group
agg_df %>%
filter(event == "install") %>%
group_by(treatment_group) %>%
summarize(total_n = sum(n))
## # A tibble: 4 x 2
## treatment_group total_n
## <dbl> <int>
## 1 0 30
## 2 1 34
## 3 2 29
## 4 NA 3
Tweets seen by treatment group
agg_df %>%
filter(event == "show") %>%
group_by(treatment_group) %>%
summarize(total_n = sum(n))
## # A tibble: 3 x 2
## treatment_group total_n
## <dbl> <int>
## 1 0 13927
## 2 1 4004
## 3 2 1686
Users who have seen any tweets by treatment group
agg_df %>%
filter(event == "show") %>%
group_by(treatment_group) %>%
count()
## # A tibble: 3 x 2
## # Groups: treatment_group [3]
## treatment_group n
## <dbl> <int>
## 1 0 7
## 2 1 6
## 3 2 6
Tweets seen that were eligible for removal
df %>%
filter(event %in% c("show", "hide")) %>%
group_by(treatment_group, blacklist_text, blacklist_author) %>%
summarize(users = n_distinct(workerID), tweets = length(workerID))
## `summarise()` has grouped output by 'treatment_group', 'blacklist_text'. You can override using the `.groups` argument.
## # A tibble: 10 x 5
## # Groups: treatment_group, blacklist_text [5]
## treatment_group blacklist_text blacklist_author users tweets
## <dbl> <lgl> <lgl> <int> <int>
## 1 0 FALSE FALSE 7 13395
## 2 0 FALSE TRUE 2 323
## 3 0 TRUE FALSE 1 92
## 4 0 TRUE TRUE 1 117
## 5 1 FALSE FALSE 6 3988
## 6 1 FALSE TRUE 4 9
## 7 1 TRUE FALSE 1 5
## 8 1 TRUE TRUE 3 15
## 9 2 FALSE FALSE 6 1684
## 10 2 FALSE TRUE 1 2
Tweets hidden by treatment group
agg_df %>%
filter(event == "hide") %>%
group_by(treatment_group) %>%
summarize(total_n = sum(n))
## # A tibble: 1 x 2
## treatment_group total_n
## <dbl> <int>
## 1 1 13
question_map <- c()
# Cleaning / joining response data
presurvey_df <- read_tsv(here("scripts/qualtrics/presurvey.tsv"))
##
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## StartDate = col_character(),
## EndDate = col_datetime(format = ""),
## IPAddress = col_character(),
## RecordedDate = col_datetime(format = ""),
## ResponseId = col_character(),
## RecipientLastName = col_logical(),
## RecipientFirstName = col_logical(),
## RecipientEmail = col_logical(),
## ExternalReference = col_logical(),
## DistributionChannel = col_character(),
## UserLanguage = col_character(),
## Q16 = col_logical(),
## ResponseID = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
presurvey_2_df <- read_tsv(here("scripts/qualtrics/presurvey_2.tsv"))
##
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## StartDate = col_character(),
## EndDate = col_character(),
## IPAddress = col_character(),
## RecordedDate = col_character(),
## ResponseId = col_character(),
## RecipientLastName = col_logical(),
## RecipientFirstName = col_logical(),
## RecipientEmail = col_logical(),
## ExternalReference = col_logical(),
## DistributionChannel = col_character(),
## UserLanguage = col_character(),
## Q16 = col_logical(),
## ResponseID = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
presurvey_3_df <- read_tsv(here("scripts/qualtrics/presurvey_3.tsv")) %>% select(-SurveyCode)
##
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## StartDate = col_character(),
## EndDate = col_character(),
## IPAddress = col_character(),
## RecordedDate = col_character(),
## ResponseId = col_character(),
## RecipientLastName = col_logical(),
## RecipientFirstName = col_logical(),
## RecipientEmail = col_logical(),
## ExternalReference = col_logical(),
## DistributionChannel = col_character(),
## UserLanguage = col_character(),
## SurveyCode = col_character(),
## Q16 = col_logical(),
## ResponseID = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
turkers <- read_tsv(here("scripts/mturk_pilot_v1.tsv")) %>% select(WorkerId, Answer)
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## X1 = col_double(),
## AssignmentId = col_character(),
## WorkerId = col_character(),
## HITId = col_character(),
## AssignmentStatus = col_character(),
## AutoApprovalTime = col_datetime(format = ""),
## AcceptTime = col_datetime(format = ""),
## SubmitTime = col_datetime(format = ""),
## ApprovalTime = col_datetime(format = ""),
## Answer = col_character()
## )
turkers_2 <- read_tsv(here("scripts/mturk_pilot_v2.tsv")) %>% select(WorkerId, Answer)
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## X1 = col_double(),
## AssignmentId = col_character(),
## WorkerId = col_character(),
## HITId = col_character(),
## AssignmentStatus = col_character(),
## AutoApprovalTime = col_datetime(format = ""),
## AcceptTime = col_datetime(format = ""),
## SubmitTime = col_datetime(format = ""),
## ApprovalTime = col_datetime(format = ""),
## Answer = col_character(),
## RejectionTime = col_datetime(format = ""),
## RequesterFeedback = col_character()
## )
turkers_3 <- read_tsv(here("scripts/mturk_pilot_v3.tsv")) %>% select(WorkerId, Answer)
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## X1 = col_double(),
## AssignmentId = col_character(),
## WorkerId = col_character(),
## HITId = col_character(),
## AssignmentStatus = col_character(),
## AutoApprovalTime = col_datetime(format = ""),
## AcceptTime = col_datetime(format = ""),
## SubmitTime = col_datetime(format = ""),
## ApprovalTime = col_datetime(format = ""),
## Answer = col_character()
## )
turkers_postsurvey <- read_tsv(here("scripts/mturk_postsurvey.tsv"))
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## X1 = col_double(),
## AssignmentId = col_character(),
## WorkerId = col_character(),
## HITId = col_character(),
## AssignmentStatus = col_character(),
## AutoApprovalTime = col_datetime(format = ""),
## AcceptTime = col_datetime(format = ""),
## SubmitTime = col_datetime(format = ""),
## ApprovalTime = col_datetime(format = ""),
## Answer = col_character()
## )
postsurvey_df <- read_csv(here("scripts/qualtrics/postsurvey2.csv"))
##
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## .default = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
pre_df <- rbind(presurvey_df, presurvey_2_df, presurvey_3_df)
turkers_df <- rbind(turkers, turkers_2, turkers_3)
turkers_df$ResponseID <- (str_match(
turkers_df$Answer,
"<Answer><QuestionIdentifier>surveycode</QuestionIdentifier><FreeText>(R_[[:alnum:]]+)</FreeText>"
))[, 2]
turkers_postsurvey$ResponseID <- (str_match(turkers_postsurvey$Answer, "<QuestionIdentifier>surveycode</QuestionIdentifier><FreeText>(.*)</FreeText>"))[, 2]
pre_df_joined <- inner_join(turkers_df, pre_df, by = "ResponseID") %>% distinct(WorkerId, .keep_all=TRUE)
post_df_joined <- inner_join(postsurvey_df, turkers_postsurvey, by = "ResponseID") %>% distinct(WorkerId, .keep_all=TRUE)
id_cols <- c("WorkerId")
demo_cols <- c("politics_interest", "pid3", "ideo", "age", "gender", "educ", "media_tv", "media_newspaper", "media_radio", "media_internet", "media_discussion")
outcome_cols <- c("thermo_dems", "thermo_reps", "dist_trump", "dist_biden", "perceived_1", "perceived_2", "perceived_3", "issue_immigration", "issue_climate", "issue_covid", "issue_ukraine", "media_bias", "media_trust")
pre_df_renamed <- rename(pre_df_joined,
politics_interest = QID1,
pid3 = Q2,
ideo = Q3,
age = Q4,
gender = Q5,
educ = Q6,
media_tv = Q7_1,
media_newspaper = Q7_2,
media_radio = Q7_3,
media_internet = Q7_4,
media_discussion = Q7_5,
thermo_dems = Q8_1,
thermo_reps = Q8_2,
dist_trump = Q10_1,
dist_biden = Q10_2,
perceived_1 = Q12_1,
perceived_2 = Q12_2,
perceived_3 = Q12_3,
issue_immigration = Q13_1,
issue_climate = Q13_2,
issue_covid = Q13_3,
issue_ukraine = Q13_4,
media_bias = Q14,
media_trust = Q15
) %>% select(c(id_cols, demo_cols, outcome_cols))
post_df_renamed <- rename(post_df_joined,
thermo_dems = Q8_1,
thermo_reps = Q8_2,
dist_trump = Q10_1,
dist_biden = Q10_2,
perceived_1 = Q12_1,
perceived_2 = Q12_2,
perceived_3 = Q12_3,
issue_immigration = Q13_1,
issue_climate = Q13_2,
issue_covid = Q13_3,
issue_ukraine = Q13_4,
media_bias = Q14,
media_trust = Q15
) %>%
select(c(id_cols, outcome_cols)) %>%
left_join(
pre_df_renamed %>% select("pid3", "ideo", "WorkerId"),
by = "WorkerId"
) %>%
mutate_at(outcome_cols, as.numeric)
create_indices <- function(df) {
# Create the main outcome indices
df %>% mutate(
affective_index = case_when(
pid3 == 1 ~ (thermo_reps / 100 + (7 - dist_trump) / 7) / 2,
pid3 == 2 ~ (thermo_dems / 100 + (7 - dist_biden) / 7) / 2,
TRUE ~ NA_real_
),
issue_index = (abs(issue_immigration - 3) + abs(issue_climate - 3) + abs(issue_covid - 3) + abs(issue_ukraine - 3)) / 4 / 2,
media_trust_index = (abs(3 - media_bias) * 3 / 2 + media_trust - 1) / 6,
perceived_index = (perceived_1 + perceived_2 + perceived_3) / 3 / 5,
)
# TODO create the issue polarization index
}
pre_df_index <- create_indices(pre_df_renamed) %>% mutate(
age=(age-1) * 10 + 23,
educ=case_when(
educ==1 ~ 6,
educ==2 ~ 12,
educ==3 ~ 14,
educ==4 ~ 16,
educ==5 ~ 20,
),
ideo=5-ideo
)
usage_df <- df %>%
mutate(
saw_fox = blacklist_text | blacklist_author,
WorkerId=workerID
) %>%
group_by(WorkerId) %>%
summarize(
installed = sum(event=='install'),
total_tweets = sum(event=='show', na.rm=TRUE),
total_fox_tweets = sum(saw_fox, na.rm=TRUE),
fox_tweets_accounts = sum(blacklist_author, na.rm=TRUE),
fox_tweets_links = sum(blacklist_text, na.rm=TRUE),
total_hidden = sum(saw_fox & treatment_group != 0, na.rm=TRUE),
total_link_hidden = sum(saw_fox & treatment_group == 1, na.rm=TRUE),
total_account_hidden = sum(saw_fox & treatment_group == 2, na.rm=TRUE)
)
pre_df_usage <- left_join(
pre_df_index,
usage_df,
by = "WorkerId"
)
post_df_index <- create_indices(post_df_renamed) %>%
select(WorkerId, affective_index, issue_index, media_trust_index, perceived_index)
survey_df <- left_join(post_df_index, pre_df_usage, by = "WorkerId", suffix = c("", ".post")) %>%
left_join(agg_df %>% filter(event == "install") %>% mutate(WorkerId = workerID), by = "WorkerId")
survey_df$treated <- survey_df$treatment_group != 0
survey_df$social <- survey_df$treatment_group == 1
survey_df$gender <- as.factor(survey_df$gender)
levels(survey_df$gender) <- c('Male', "Female", "Other")
Overall 103 unique respondents completed the first round survey, of which 52 installed the extension. Of these, 12 used Twitter during the treatment period and 4 were exposed to Fox tweets or had Fox tweets hidden from view.
30 users, or 0.5769231 of the eligible respondents completed the endline survey, which was offered to any user who installed the extension.
# Balance check
library(cobalt)
library(ggpubr)
## Error: package or namespace load failed for 'ggpubr' in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]):
## namespace 'broom' 0.7.1 is already loaded, but >= 0.7.4 is required
fmla <- treated ~ ideo + age + gender + educ + total_tweets + total_fox_tweets
t1 <- love.plot(fmla,
data=survey_df %>% filter(treatment_group %in% c(0,1)),
stats = c("mean.diffs"),
thresholds = c(m = 0.1, v = 2),
abs = FALSE,
binary = "std",
var.order = "unadjusted",
drop.distance=TRUE
) + theme(legend.position="none") + labs(title="")
## Note: 's.d.denom' not specified; assuming pooled.
t2 <- love.plot(fmla,
data=survey_df %>% filter(treatment_group %in% c(0,2)),
stats = c("mean.diffs"),
thresholds = c(m = 0.1, v = 2),
abs = FALSE,
binary = "std",
var.order = "unadjusted",
drop.distance=TRUE
) + theme(legend.position="none") + labs(title="")
## Note: 's.d.denom' not specified; assuming pooled.
t3 <- love.plot(social ~ ideo + age + gender + educ + total_tweets + total_fox_tweets,
data=survey_df %>% filter(treatment_group %in% c(1,2)),
stats = c("mean.diffs"),
thresholds = c(m = 0.1, v = 2),
abs = FALSE,
binary = "std",
var.order = "unadjusted",
drop.distance=TRUE
) + theme(legend.position="none") + labs(title="")
## Note: 's.d.denom' not specified; assuming pooled.
ggarrange(
t1, t2, t3, labels=c("Links vs Control", "Accounts vs Control", "Links vs Accounts")
)
## Error in ggarrange(t1, t2, t3, labels = c("Links vs Control", "Accounts vs Control", : could not find function "ggarrange"
# Covariate balance table
means <- survey_df %>% group_by(treatment_group) %>% summarize(
mean_ideo=mean(ideo),
mean_educ=mean(educ),
mean_age=mean(age),
mean_gender=mean(as.numeric(gender) - 1),
mean_total_tweets=mean(total_tweets),
num_saw_tweets=sum(total_tweets > 0),
num_saw_fox=sum(total_fox_tweets > 0),
mean_total_fox_tweets=mean(total_fox_tweets),
ideo_sd = sd(ideo),
educ_sd = sd(educ),
age_sd = sd(age),
gender_sd = sd(as.numeric(gender) - 1),
tt_sd = sd(total_tweets),
fox_tt_sd = sd(total_fox_tweets),
n = n(),
ideo_se = ideo_sd / sqrt(n),
educ_se = educ_sd / sqrt(n),
age_se = age_sd / sqrt(n),
gender_se = gender_sd / sqrt(n),
tt_se = tt_sd / sqrt(n),
fox_tt_se = fox_tt_sd / sqrt(n),
) %>%
mutate_all(round, 2) %>%
mutate(
Treatment=recode(treatment_group, "1"="Accounts", "2"="Link", "0"="Control"),
Ideology=paste0(mean_ideo, " (", ideo_se, ")"),
Education=paste0(mean_educ, " (", educ_se, ")"),
Age=paste0(mean_age, " (", age_se, ")"),
Female=paste0(mean_gender, " (", gender_se, ")"),
Tweets_Seen=paste0(mean_total_tweets, " (", tt_se, ")"),
Fox_Tweets_Seen=paste0(mean_total_fox_tweets, " (", fox_tt_se, ")"),
Twitter_Users=round(num_saw_tweets, 0),
Saw_Fox=round(num_saw_fox, 0),
Count=round(n, digits=0)
)
library(xtable)
print(xtable(means %>% select(Treatment, Count, Ideology, Education, Age, Female), type = "latex"), file = "covariate_means.tex")
print(xtable(means %>% select(Treatment, Twitter_Users, Saw_Fox, Tweets_Seen, Fox_Tweets_Seen), type = "latex"), file = "covariate_means_2.tex")
models <- list(
lm(treated ~ ideo + educ + age + gender + total_tweets + total_fox_tweets, data=survey_df %>% filter(treatment_group %in% c(0, 1))),
lm(treated ~ ideo + educ + age + gender + total_tweets + total_fox_tweets, data=survey_df %>% filter(treatment_group %in% c(0, 2))),
lm(social ~ ideo + educ + age + gender + total_tweets + total_fox_tweets, data=survey_df %>% filter(treatment_group %in% c(1, 2)))
)
stargazer(
models,
type='html',
out='covariate_balance.tex'
)
Dependent variable: | |||
treated | social | ||
(1) | (2) | (3) | |
ideo | -0.047 | -0.038 | -0.061 |
(0.083) | (0.085) | (0.083) | |
educ | 0.107 | 0.046 | -0.050 |
(0.091) | (0.055) | (0.067) | |
age | 0.024 | 0.007 | 0.015 |
(0.017) | (0.024) | (0.016) | |
genderFemale | -0.120 | -0.141 | 0.149 |
(0.285) | (0.292) | (0.270) | |
total_tweets | 0.0001 | 0.007 | 0.001 |
(0.001) | (0.013) | (0.001) | |
total_fox_tweets | -0.011 | -0.198 | -0.134 |
(0.140) | (0.332) | (0.317) | |
Constant | -1.893 | -0.291 | 0.706 |
(1.657) | (1.207) | (1.341) | |
Observations | 19 | 20 | 21 |
R2 | 0.330 | 0.247 | 0.186 |
Adjusted R2 | -0.005 | -0.100 | -0.163 |
Residual Std. Error | 0.514 (df = 12) | 0.535 (df = 13) | 0.552 (df = 14) |
F Statistic | 0.986 (df = 6; 12) | 0.711 (df = 6; 13) | 0.532 (df = 6; 14) |
Note: | *p<0.1; **p<0.05; ***p<0.01 |
tweets_seen_plot <- pre_df_usage %>% filter(installed > 0) %>%
ggplot(aes(x=total_tweets)) +
geom_histogram() +
theme_minimal() +
labs(title="Tweets seen by respondents", x='Tweets seen', y='Num. Respondents')
fox_tweets_seen_plot <- pre_df_usage %>% filter(installed > 0) %>%
ggplot(aes(x=total_fox_tweets)) +
geom_histogram() +
theme_minimal() +
labs(title="Fox News Tweets seen by respondents", x='Tweets seen', y='Num. Respondents')
ggarrange(tweets_seen_plot, fox_tweets_seen_plot)
## Error in ggarrange(tweets_seen_plot, fox_tweets_seen_plot): could not find function "ggarrange"
ggsave(paste0("tweets.png"), width = 10, height = 5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Plot
library(ggallin)
library(knitr)
plot_effects <- function(models, title, filename, covariate_labels) {
effects <- models %>% map_dbl(~ .x$coefficients[2])
se <- lapply(models, get_robust_se) %>% map_dbl(~ .x[2])
effect_df <- data.frame(estimate = effects, se = se, var = c("Affective Polarization", "Issue Polarization", "Perceived Polarization", "Trust in Media")) %>%
mutate(
lo = estimate - 1.96 * se,
hi = estimate + 1.96 * se
)
ggplot(effect_df, aes(xmin = lo, xmax = hi, x = estimate, y = var)) +
geom_point() +
geom_errorbarh(height = 0.1) +
theme_minimal() +
theme(
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank()
) +
labs(
title = title
)
ggsave(paste0(filename, ".png"), width = 10, height = 5)
stargazer(
models,
type = "html",
title = title,
style = "default",
se = lapply(models, get_robust_se),
covariate.labels = covariate_labels,
header = FALSE,
object.names = FALSE,
model.names = FALSE,
dep.var.labels = c("Affective Polarization", "Issue Polarization", "Perceived Polarization", "Trust in Media"),
font.size="tiny",
out = paste0(filename, ".tex")
)
}
models <- list(
lm(affective_index.post ~ treated + affective_index + ideo + age + gender + educ, data = survey_df),
lm(issue_index.post ~ treated + issue_index + ideo + age + gender + educ, data = survey_df),
lm(perceived_index.post ~ treated + perceived_index + ideo + age + gender + educ, data = survey_df),
lm(media_trust_index.post ~ treated + media_trust_index + ideo + age + gender + educ, data = survey_df)
)
covariate_labels <- c(
"Treatment",
"Affective Polarization Index",
"Issue Polarization Index",
"Perceived Polarization Index",
"Media Trust Index",
"Ideology",
"Age",
"Gender",
"Education (Years)"
)
plot_effects(models, "Fox News Removal Effects (ITT)", "results_main_itt", covariate_labels)
Dependent variable: | ||||
Affective Polarization | Issue Polarization | Perceived Polarization | Trust in Media | |
(1) | (2) | (3) | (4) | |
Treatment | 0.042 | 0.024 | -0.094 | -0.099 |
(0.042) | (0.073) | (0.090) | (0.087) | |
Affective Polarization Index | 0.896*** | |||
(0.100) | ||||
Issue Polarization Index | 0.877*** | |||
(0.184) | ||||
Perceived Polarization Index | 0.322 | |||
(0.239) | ||||
Media Trust Index | 0.733*** | |||
(0.163) | ||||
Ideology | -0.007 | 0.034* | 0.005 | -0.030 |
(0.012) | (0.020) | (0.024) | (0.031) | |
Age | -0.0002 | 0.005 | 0.005* | -0.003 |
(0.003) | (0.007) | (0.003) | (0.005) | |
Gender | 0.039 | -0.159* | 0.053 | 0.059 |
(0.059) | (0.085) | (0.078) | (0.068) | |
Education (Years) | 0.017 | 0.007 | 0.029** | 0.027** |
(0.012) | (0.015) | (0.014) | (0.013) | |
Constant | -0.206 | -0.318 | -0.118 | -0.042 |
(0.217) | (0.321) | (0.340) | (0.263) | |
Observations | 25 | 25 | 28 | 30 |
R2 | 0.793 | 0.566 | 0.318 | 0.473 |
Adjusted R2 | 0.724 | 0.421 | 0.123 | 0.335 |
Residual Std. Error | 0.115 (df = 18) | 0.180 (df = 18) | 0.176 (df = 21) | 0.199 (df = 23) |
F Statistic | 11.477*** (df = 6; 18) | 3.913** (df = 6; 18) | 1.630 (df = 6; 21) | 3.436** (df = 6; 23) |
Note: | *p<0.1; **p<0.05; ***p<0.01 |
filtered_df <- survey_df %>% filter(treated == TRUE)
models <- list(
lm(affective_index.post ~ social + affective_index + ideo + age + gender + educ, data = filtered_df),
lm(issue_index.post ~ social + issue_index + ideo + age + gender + educ, data = filtered_df),
lm(perceived_index.post ~ social + perceived_index + ideo + age + gender + educ, data = filtered_df),
lm(media_trust_index.post ~ social + media_trust_index + ideo + age + gender + educ, data = filtered_df)
)
plot_effects(models, "Fox News Removal Effects: Link vs Account Based Removal (ITT)", "results_social_itt", covariate_labels)
Dependent variable: | ||||
Affective Polarization | Issue Polarization | Perceived Polarization | Trust in Media | |
(1) | (2) | (3) | (4) | |
Treatment | 0.091 | -0.090 | 0.020 | -0.032 |
(0.063) | (0.075) | (0.079) | (0.100) | |
Affective Polarization Index | 0.857*** | |||
(0.195) | ||||
Issue Polarization Index | 0.883*** | |||
(0.190) | ||||
Perceived Polarization Index | 0.444* | |||
(0.265) | ||||
Media Trust Index | 0.802*** | |||
(0.242) | ||||
Ideology | 0.002 | 0.019 | -0.017 | -0.037 |
(0.017) | (0.027) | (0.029) | (0.040) | |
Age | -0.001 | 0.007 | 0.005 | -0.006 |
(0.004) | (0.008) | (0.004) | (0.006) | |
Gender | 0.024 | -0.099 | -0.024 | 0.010 |
(0.102) | (0.100) | (0.086) | (0.090) | |
Education (Years) | 0.030 | 0.021 | 0.011 | 0.022 |
(0.023) | (0.014) | (0.017) | (0.019) | |
Constant | -0.389 | -0.550 | 0.068 | 0.080 |
(0.422) | (0.400) | (0.400) | (0.419) | |
Observations | 17 | 18 | 20 | 21 |
R2 | 0.766 | 0.564 | 0.394 | 0.506 |
Adjusted R2 | 0.626 | 0.327 | 0.114 | 0.294 |
Residual Std. Error | 0.139 (df = 10) | 0.194 (df = 11) | 0.168 (df = 13) | 0.227 (df = 14) |
F Statistic | 5.470*** (df = 6; 10) | 2.376 (df = 6; 11) | 1.408 (df = 6; 13) | 2.387* (df = 6; 14) |
Note: | *p<0.1; **p<0.05; ***p<0.01 |
I don't think classic CACE estimation is required/appropriate for this experiment.
First, our respondent pool is limited only to those who installed the extension.
We can think of treatment-receivers as those who actually used Twitter and had Fox tweets hidden from them We can think of nontreatment-receivers as those who used Twitter regularly and would have had tweets hidden from them if they had been assigned to treatment eg, they saw some Fox tweets
We can identify these subgroups precisely using the logging data. blacklist_text == true if the content contained a fox link. unfortunately, we have to manually figure out if the account name is in the list of blacklisted authors
We can also identify a different kind of ITT effect on active twitter users broadly, if we want to.
We can also estimate a different kind of CACE effect by the amount of tweets hidden :)
complier_df <- survey_df %>% filter(total_fox_tweets > 0)
models <- list(
lm(affective_index.post ~ treated + affective_index + ideo + age + gender + educ, data = complier_df),
lm(issue_index.post ~ social + issue_index + ideo + age + gender + educ, data = complier_df),
lm(perceived_index.post ~ treated + perceived_index + ideo + age + gender + educ, data = complier_df),
lm(media_trust_index.post ~ treated + media_trust_index + ideo + age + gender + educ, data = complier_df)
)
plot_effects(models, "Fox News Removal Effects (CACE)", "results_main_cace", covariate_labels)
## Warning: Removed 4 rows containing missing values (geom_errorbarh).
Dependent variable: | ||||
Affective Polarization | Issue Polarization | Perceived Polarization | Trust in Media | |
(1) | (2) | (3) | (4) | |
Treatment | 0.012 | -0.067 | 0.000 | |
Affective Polarization Index | 1.439 | |||
Issue Polarization Index | -0.313 | |||
Perceived Polarization Index | -0.500 | |||
Media Trust Index | 0.333 | |||
Ideology | 0.929 | |||
Age | 0.125 | 0.000 | 0.030 | |
Gender | ||||
Education (Years) | ||||
educ | ||||
Constant | -0.044 | 0.562 | 0.644 | -0.048 |
Observations | 3 | 4 | 4 | 4 |
R2 | 1.000 | 1.000 | 1.000 | 1.000 |
Note: | *p<0.1; **p<0.05; ***p<0.01 |
filtered_complier_df <- complier_df %>% filter(treated)
# models <- list(
# lm(affective_index.post ~ social + affective_index + ideo + age + gender + educ, data = filtered_complier_df),
# lm(perceived_index.post ~ social + perceived_index + ideo + age + gender + educ, data = filtered_complier_df),
# lm(media_trust_index.post ~ social + media_trust_index + ideo + age + gender + educ, data = filtered_complier_df)
# )
plot_effects(models, "Fox News Removal Effects: Link vs Account Based Removal (CACE)", "results_social_cace", covariate_labels)
## Warning: Removed 4 rows containing missing values (geom_errorbarh).
Dependent variable: | ||||
Affective Polarization | Issue Polarization | Perceived Polarization | Trust in Media | |
(1) | (2) | (3) | (4) | |
Treatment | 0.012 | -0.067 | 0.000 | |
Affective Polarization Index | 1.439 | |||
Issue Polarization Index | -0.313 | |||
Perceived Polarization Index | -0.500 | |||
Media Trust Index | 0.333 | |||
Ideology | 0.929 | |||
Age | 0.125 | 0.000 | 0.030 | |
Gender | ||||
Education (Years) | ||||
educ | ||||
Constant | -0.044 | 0.562 | 0.644 | -0.048 |
Observations | 3 | 4 | 4 | 4 |
R2 | 1.000 | 1.000 | 1.000 | 1.000 |
Note: | *p<0.1; **p<0.05; ***p<0.01 |
Finally, we can actually estimate per-tweet effects.
We kinda want to compare users who had 5 tweets hidden to users with 5 tweets shown to them right, etc.?
is this the interaction effect of treatment on # tweets saw?
right, the hypothesis is that the outcome effect is stronger the higher the number of tweets hidden right so we might expect
- the effect of # tweets to be positive on polarization (or zero)
- the interaction term of # tweets with treatment to be negative
- the constant effect of treatment to be negative (or zero)
- then, the value of coefficient 1. estimates the polarizing effect of seeing a fox news tweet. and the coefficient 2. estimates the de-polarizing effect of not seeing a fox news tweeet, when you would have seen one normally.
control for total number of tweets seen?
no, this is tricky.
Uhhhh 'everything equal', seeing one additional tweet in control group leads to X effect 'everything equal', having one additional tweet hidden leads to Y effect
Alternatively, you just operationalize it as "number of tweets hidden" so everyone in control just has value 0 This seems simpler honestly I kinda prefer that
models <- list(
lm(affective_index.post ~ total_hidden + affective_index + ideo + age + gender + educ, data = survey_df),
lm(issue_index.post ~ total_hidden + issue_index + ideo + age + gender + educ, data = survey_df),
lm(perceived_index.post ~ total_hidden + perceived_index + ideo + age + gender + educ, data = survey_df),
lm(media_trust_index.post ~ total_hidden + media_trust_index + ideo + age + gender + educ, data = survey_df)
)
plot_effects(
models,
"Fox News Per-Tweet Effects",
"results_main_per_tweet",
c(
"Num. Tweets Hidden",
"Affective Polarization Index",
"Perceived Polarization Index",
"Media Trust Index",
"Ideology",
"Age",
"Gender",
"Education (Years)"
)
)
Dependent variable: | ||||
Affective Polarization | Issue Polarization | Perceived Polarization | Trust in Media | |
(1) | (2) | (3) | (4) | |
Num. Tweets Hidden | 0.041* | -0.015 | 0.011 | -0.010 |
(0.021) | (0.011) | (0.008) | (0.011) | |
Affective Polarization Index | 0.880*** | |||
(0.100) | ||||
Perceived Polarization Index | 0.823*** | |||
(0.220) | ||||
Media Trust Index | 0.401* | |||
(0.241) | ||||
Ideology | 0.687*** | |||
(0.161) | ||||
Age | -0.007 | 0.035* | 0.008 | -0.021 |
(0.014) | (0.021) | (0.024) | (0.029) | |
Gender | 0.0005 | 0.004 | 0.005 | -0.005 |
(0.003) | (0.007) | (0.004) | (0.006) | |
Education (Years) | 0.032 | -0.170** | 0.059 | 0.062 |
(0.054) | (0.079) | (0.077) | (0.071) | |
educ | 0.021* | 0.016 | 0.020 | 0.024 |
(0.012) | (0.017) | (0.017) | (0.015) | |
Constant | -0.256 | -0.341 | -0.117 | 0.019 |
(0.196) | (0.320) | (0.354) | (0.301) | |
Observations | 25 | 25 | 28 | 30 |
R2 | 0.793 | 0.581 | 0.293 | 0.453 |
Adjusted R2 | 0.724 | 0.441 | 0.091 | 0.311 |
Residual Std. Error | 0.115 (df = 18) | 0.177 (df = 18) | 0.179 (df = 21) | 0.202 (df = 23) |
F Statistic | 11.503*** (df = 6; 18) | 4.159*** (df = 6; 18) | 1.450 (df = 6; 21) | 3.177** (df = 6; 23) |
Note: | *p<0.1; **p<0.05; ***p<0.01 |
models <- list(
lm(affective_index.post ~ total_account_hidden + total_link_hidden + affective_index + ideo + age + gender + educ, data = survey_df),
lm(issue_index.post ~ total_account_hidden + total_link_hidden + issue_index + ideo + age + gender + educ, data = survey_df),
lm(perceived_index.post ~ total_account_hidden + total_link_hidden + perceived_index + ideo + age + gender + educ, data = survey_df),
lm(media_trust_index.post ~ total_account_hidden + total_link_hidden + media_trust_index + ideo + age + gender + educ, data = survey_df)
)
plot_effects(
models,
"Fox News Per-Tweet Account vs Link Effects",
"results_social_per_tweet",
c(
"Num. Tweets Hidden",
"Affective Polarization Index",
"Issue Polarization Index",
"Perceived Polarization Index",
"Media Trust Index",
"Ideology",
"Age",
"Gender",
"Education (Years)"
)
)
Dependent variable: | ||||
Affective Polarization | Issue Polarization | Perceived Polarization | Trust in Media | |
(1) | (2) | (3) | (4) | |
Num. Tweets Hidden | 0.054** | 0.019 | 0.108** | -0.089* |
(0.027) | (0.053) | (0.049) | (0.048) | |
Affective Polarization Index | -0.012 | -0.015 | 0.009 | -0.009 |
(0.084) | (0.012) | (0.007) | (0.011) | |
Issue Polarization Index | 0.862*** | |||
(0.098) | ||||
Perceived Polarization Index | 0.849*** | |||
(0.249) | ||||
Media Trust Index | 0.475* | |||
(0.246) | ||||
Ideology | 0.701*** | |||
(0.166) | ||||
Age | -0.006 | 0.037* | 0.012 | -0.026 |
(0.015) | (0.022) | (0.025) | (0.032) | |
Gender | 0.001 | 0.004 | 0.005 | -0.006 |
(0.004) | (0.007) | (0.004) | (0.006) | |
Education (Years) | 0.035 | -0.166* | 0.066 | 0.054 |
(0.056) | (0.085) | (0.079) | (0.074) | |
educ | 0.020 | 0.015 | 0.021 | 0.022 |
(0.013) | (0.017) | (0.018) | (0.016) | |
Constant | -0.266 | -0.370 | -0.214 | 0.070 |
(0.208) | (0.313) | (0.394) | (0.320) | |
Observations | 25 | 25 | 28 | 30 |
R2 | 0.795 | 0.583 | 0.325 | 0.466 |
Adjusted R2 | 0.710 | 0.412 | 0.088 | 0.296 |
Residual Std. Error | 0.118 (df = 17) | 0.182 (df = 17) | 0.180 (df = 20) | 0.205 (df = 22) |
F Statistic | 9.402*** (df = 7; 17) | 3.402** (df = 7; 17) | 1.374 (df = 7; 20) | 2.742** (df = 7; 22) |
Note: | *p<0.1; **p<0.05; ***p<0.01 |
a_effects <- models %>% map_dbl(~ .x$coefficients[2])
l_effects <- models %>% map_dbl(~ .x$coefficients[3])
a_se <- lapply(models, get_robust_se) %>% map_dbl(~ .x[2])
l_se <- lapply(models, get_robust_se) %>% map_dbl(~ .x[3])
vars = c("Affective Polarization", "Issue Polarization", "Perceived Polarization", "Trust in Media")
account_effects <- data.frame(estimate = a_effects, se=a_se, var=vars, type="account")
link_effects <- data.frame(estimate = l_effects, se=l_se, var=vars, type="link")
effect_df <- rbind(account_effects, link_effects) %>%
mutate(
lo = estimate - 1.96 * se,
hi = estimate + 1.96 * se
)
ggplot(effect_df, aes(xmin = lo, xmax = hi, x = estimate, y = var, group=type)) +
geom_point() +
geom_errorbarh(height = 0.1) +
theme_minimal() +
theme(
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank()
) +
labs(
title = title
)