-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathR-error-analysis.r
85 lines (64 loc) · 3.56 KB
/
R-error-analysis.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
library(lme4)
library(sjPlot)
library(tidyverse)
library(stringr)
library(arrow)
setwd("/Users/moritzlaurer/Dropbox/PhD/Papers/meta-metrics/meta-metrics-repo/")
d = read_parquet("./results/df_test_concat.parquet.gzip") |>
# this analysis is only conducted on experiments with biased training data
filter(data_train_biased == TRUE) |>
# new columns: biased_row is 1 if the text in df_test came from the biased group df_train was sampled from
# using (partial) string matching here because group_members_train are 3 countries like "USA|DEU|FRA" and group_members_test is only one string like "USA"
#mutate(biased_row = as.numeric(group_members_test == group_members_train), error = as.numeric(label_pred != label_gold)) |>
mutate(biased_row = as.numeric(str_detect(group_members_train, fixed(group_members_test)))) |>
# add error column
mutate(error = as.numeric(label_pred != label_gold)) |>
# clean classifier names
mutate(classifier = recode(method, nli_short = "BERT-NLI", nli_void = "BERT-NLI-void", standard_dl = "BERT-base", "classical_ml" = "logistic reg.")) |>
mutate(group_col = recode(group_col, 'pres_party' = 'party', 'ISO_A3' = 'country_3', 'country_iso' = 'country', 'parfam_text' = 'party_fam')) |>
mutate(dataset = recode(dataset, 'cap-merge' = 'CAP-2', 'cap-sotu' = 'CAP-SotU', 'coronanet' = 'CoronaNet', 'pimpo' = 'PImPo')) |>
mutate(training_run = file_name)
# The ordering here decides which method is the reference method/intercept
# take BERT-NLI as reference category, since main argument in paper is about NLI.
d$classifier <- factor(as.factor(d$classifier), levels = c("BERT-NLI", "BERT-NLI-void", "BERT-base", "logistic reg."))
### single model without intercept
m_single_nointercept = glmer(error ~ -1 + classifier + classifier:biased_row + (1 | training_run), family=binomial, data=d)
tab_model(m_single_nointercept, show.se = T)
summary(m_single_nointercept)
plot_single_easier = plot_model(m_single_nointercept, type='pred', terms=c('classifier','biased_row'))
plot_single_easier
# Extract fixed effects coefficients
coefficients <- fixef(m_single_nointercept)
# Calculate odds ratios
odds_ratios <- exp(coefficients)
# probabilities without bias
odds_ratio_to_prob <- function(x) x / (x+1)
probability_error_without_bias <- sapply(odds_ratios[1:4], odds_ratio_to_prob)
# probabilities with bias
probability_error_with_bias <- sapply(odds_ratios[1:4] * odds_ratios[5:8], odds_ratio_to_prob)
bias_benefit <- probability_error_without_bias - probability_error_with_bias
### model per data + group var
models = list()
for (dataset in unique(d$dataset)) {
for (group_col in unique(d$group_col)) {
ds = d[d$dataset == dataset & d$group_col == group_col,]
if (nrow(ds) == 0) next
label = paste0(dataset, ' - ', group_col)
message(label)
#models[[label]] = glmer(error ~ classifier*biased_row + (1 | training_run), family=binomial, data=ds)
models[[label]] = glmer(error ~ -1 + classifier + classifier:biased_row + (1 | training_run), family=binomial, data=ds)
}
}
tab_model(models, dv.labels = names(models), show.ci=F, show.se=F, p.style='stars')
plots = list()
for (model in names(models)) {
plots[[model]] = plot_model(models[[model]], type='pred', title=model, terms=c('classifier','biased_row'))
}
plot_grid(plots, tags=rep('', length(plots)))
### single model with intercept
# not used in paper
#m_single = glmer(error ~ classifier*biased_row + (1 | training_run), family=binomial, data=d)
#tab_model(m_single)
#summary(m_single)
#plot_single <- plot_model(m_single, type='pred', terms=c('classifier','biased_row'))
#plot_single