# gender
df.exp1_children %>%
group_by(gender) %>%
summarise(n = n_distinct(response_id)) %>%
print()## # A tibble: 2 × 2
## gender n
## <chr> <int>
## 1 f 232
## 2 m 181
# language spoken
df.exp1_children %>%
mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
group_by(en_occurrence) %>%
summarise(n = n_distinct(response_id)) %>%
print()## # A tibble: 3 × 2
## en_occurrence n
## <chr> <int>
## 1 en 396
## 2 non-en 1
## 3 <NA> 16
df.exp1_children_different = df.exp1_children %>%
mutate(response_pattern = case_when(
distal == 1 & proximal == 1 ~ "both",
distal == 0 & proximal == 0 ~ "neither",
TRUE ~ "different"
)) %>%
filter(response_pattern == "different") %>%
mutate(difference = distal - proximal,
difference = recode(difference, `-1` = 0))
df.exp1_children_both = df.exp1_children %>%
mutate(response_pattern = case_when(
distal == 1 & proximal == 1 ~ "both",
distal == 0 & proximal == 0 ~ "neither",
TRUE ~ "different"
)) %>%
filter(response_pattern == "both")
df.exp1_children_both = df.exp1_children_both %>%
mutate(difference = 1) %>%
bind_rows(df.exp1_children_both %>%
mutate(difference = 0))
df.exp1_children_new = df.exp1_children_different %>%
bind_rows(df.exp1_children_both)
df.model = df.exp1_children_new %>%
mutate(question = as_factor(question),
question = fct_relevel(question,
"fault",
"cause",
"lexical"))
fit.brm.chain.children = brm(formula = difference ~ 1 + question * age_float + (1 | response_id) + (1 | scenario),
data = df.model,
family = "bernoulli",
seed = 1,
iter = 4000,
warmup = 1000,
file = "cache/chain_cause_children_difference")
fit.brm.chain.children ## Family: bernoulli
## Links: mu = logit
## Formula: difference ~ 1 + question * age_float + (1 | response_id) + (1 | scenario)
## Data: df.model (Number of observations: 2432)
## Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
## total post-warmup draws = 12000
##
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 412)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.90 0.10 0.71 1.09 1.00 3658 5727
##
## ~scenario (Number of levels: 2)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.97 0.73 0.17 3.01 1.00 1465 638
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.54 0.69 -2.91 0.00 1.00 1329 800
## question1 -0.93 0.26 -1.44 -0.42 1.00 4724 5887
## question2 0.39 0.24 -0.08 0.87 1.00 5227 6722
## age_float 0.21 0.04 0.14 0.29 1.00 4310 7293
## question1:age_float 0.37 0.04 0.29 0.46 1.00 5277 5741
## question2:age_float 0.05 0.04 -0.02 0.12 1.00 5102 5888
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# effect of question
fit.brm.chain.children %>%
emmeans(spec = pairwise ~ question,
type = "response")## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
## question response lower.HPD upper.HPD
## fault 0.7812 0.5442 0.974
## cause 0.6290 0.3447 0.920
## lexical 0.0852 0.0118 0.239
##
## Point estimate displayed: median
## Results are back-transformed from the logit scale
## HPD interval probability: 0.95
##
## $contrasts
## contrast odds.ratio lower.HPD upper.HPD
## fault / cause 2.11 1.63 2.68
## fault / lexical 38.34 26.02 52.41
## cause / lexical 18.19 13.12 24.48
##
## Point estimate displayed: median
## Results are back-transformed from the log odds ratio scale
## HPD interval probability: 0.95
##
## Estimate 2.5 % 97.5 %
## 0.218 0.144 0.293
##
## Term: age_float
## Type: link
## Comparison: mean(dY/dX)
## Columns: term, contrast, estimate, conf.low, conf.high, predicted_lo, predicted_hi, predicted, tmp_idx
# interaction effect
fit.brm.chain.children %>%
emtrends(specs = pairwise ~ question,
var = "age_float")## $emtrends
## question age_float.trend lower.HPD upper.HPD
## fault 0.589 0.477 0.7042
## cause 0.266 0.173 0.3570
## lexical -0.210 -0.339 -0.0777
##
## Point estimate displayed: median
## HPD interval probability: 0.95
##
## $contrasts
## contrast estimate lower.HPD upper.HPD
## fault - cause 0.323 0.195 0.453
## fault - lexical 0.799 0.637 0.963
## cause - lexical 0.477 0.329 0.629
##
## Point estimate displayed: median
## HPD interval probability: 0.95
df.exp1_children_new %>%
group_by(question) %>%
summarise(
mean = mean(difference),
ci_lower = mean_cl_normal(difference)$ymin,
ci_upper = mean_cl_normal(difference)$ymax
)## # A tibble: 3 × 4
## question mean ci_lower ci_upper
## <chr> <dbl> <dbl> <dbl>
## 1 cause 0.602 0.568 0.635
## 2 fault 0.708 0.677 0.739
## 3 lexical 0.116 0.0941 0.139
df.data = expand_grid(age_float = seq(3, 9, 0.1),
question = c("cause", "lexical", "fault"))
df.prediction = fit.brm.chain.children %>%
fitted(newdata = df.data,
re_formula = NA,
probs = c(0.1, 0.9)) %>%
as_tibble() %>%
bind_cols(df.data) %>%
clean_names()
df.linear = df.model %>%
mutate(participant_age = factor(participant_age,
levels = c(3, 4, 5, 6, 7, 8, 9),
labels = c("3", "4", "5", "6", "7", "8", "9")))
df.means = df.linear %>%
group_by(participant_age, question) %>%
summarize(
response = Hmisc::smean.cl.boot(difference)) %>%
mutate(index = c("response", "low", "high")) %>%
ungroup() %>%
pivot_wider(names_from = index,
values_from = response) %>%
mutate(participant_age = as.numeric(as.character(participant_age)))
p1 = ggplot(mapping = aes(x = age_float,
y = difference,
color = question)) +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") +
geom_smooth(data = df.prediction,
mapping = aes(y = estimate,
ymin = q10,
ymax = q90,
color = question,
fill = question),
stat = "identity",
alpha = 0.2) +
geom_pointrange(data = df.means,
mapping = aes(x = participant_age,
y = response,
ymin = low,
ymax = high,
fill = question,
shape = question),
color = "black",
size = 0.5,
show.legend = FALSE,
alpha = 0.8,
position = position_dodge(width = 0.5)) +
ylab(element_blank()) +
scale_fill_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8", "fault" = "#4DAF4A"),
breaks=c("cause", "lexical", "fault"),
labels = c("caused", "lexical", "fault")) +
scale_color_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8", "fault" = "#4DAF4A"),
breaks=c("cause", "lexical", "fault"),
labels = c("caused", "lexical", "fault")) +
scale_shape_manual(values = c(23, 21, 24)) +
scale_y_continuous(breaks = seq(0, 1, 0.25),
labels = c("100% \n proximal", "75%", "50%", "75%", "100% \n distal"),
limits = c(0, 1)) +
scale_x_continuous(name = "Age (in years)",
breaks = 3:9,
labels = c("3", "4", "5", "6", "7", "8", "9")) +
labs(y = "Probability of selecting \n proximal or distal cause") +
theme(legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 14),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 16),
axis.text.x = element_text(size = 12.5))df.plot = df.exp1_children %>%
pivot_wider(names_from = question,
values_from = c(distal, proximal)) %>%
pivot_longer(cols = distal_cause:proximal_fault,
names_to = c("type", ".value"),
names_sep = "_") %>%
mutate(response_pattern = case_when(
cause == 0 & fault == 0 & lexical == 0 ~ "none",
cause == 0 & fault == 0 & lexical == 1 ~ "lexical only",
cause == 0 & fault == 1 & lexical == 0 ~ "fault only",
cause == 1 & fault == 0 & lexical == 0 ~ "caused only",
cause == 0 & fault == 1 & lexical == 1 ~ "lexical + fault",
cause == 1 & fault == 0 & lexical == 1 ~ "caused + lexical",
cause == 1 & fault == 1 & lexical == 0 ~ "caused + fault",
cause == 1 & fault == 1 & lexical == 1 ~ "all")) %>%
group_by(participant_age, type, response_pattern) %>%
summarize(n = n()) %>%
ungroup() %>%
group_by(participant_age, type) %>%
mutate(percentage = n / sum(n) * 100) %>%
ungroup() %>%
mutate(type = factor(type, levels = c("proximal", "distal")),
response_pattern = factor(response_pattern, levels = c("caused only", "lexical only", "fault only", "caused + lexical", "caused + fault", "lexical + fault", "all", "none")))
df.labels = df.plot %>%
group_by(participant_age, type) %>%
summarize(count = sum(n)/2) %>%
ungroup() %>%
mutate(age = as.character(factor(participant_age,
levels = c(3, 4, 5, 6, 7, 8, 9),
labels = c("3", "4", "5", "6", "7", "8", "9"))),
count = ifelse(type == "proximal" & participant_age == 3, str_c("n=", count), count),
label = str_c("<span style='font-size:16px;'>", age, "</span>"),
label = ifelse(type == "proximal",
str_c("<span style='font-size:16px;'>", age, "</span>", "<br>",
"<span style='font-size:12px;'>(", count, ")", "</span>"), label),
type = factor(type, levels = c("proximal", "distal")))
df.plot = df.plot %>%
left_join(df.labels %>%
select(participant_age, type, label),
by = c("participant_age", "type")) %>%
mutate(label = factor(label, levels = df.labels$label))
p2 = ggplot(data = df.plot,
mapping = aes(x = label,
y = percentage,
group = response_pattern,
color = response_pattern,
fill = response_pattern)) +
geom_col(color = "black") +
facet_wrap(~type,
scales = "free_x") +
scale_fill_manual(values = c("caused only" = "#dc3410",
"fault only" = "#4DAF4A",
"lexical only" = "#377EB8",
"caused + fault" = "#A0522D",
"lexical + fault" = "#40E0D0",
"caused + lexical" = "#800080",
"all" = "#FFFFFF",
"none" = "#000000")) +
scale_color_manual(values = c("caused only" = "#dc3410",
"fault only" = "#4DAF4A",
"lexical only" = "#377EB8",
"caused + fault" = "#A0522D",
"lexical + fault" = "#40E0D0",
"caused + lexical" = "#800080",
"all" = "#FFFFFF")) +
scale_shape_manual(values = c(21, 23)) +
scale_y_continuous(breaks = seq(0, 100, 25),
labels = str_c(seq(0, 100, 25), "%"),
expand = expansion(add = c(0, 0))) +
scale_x_discrete(expand = expansion(add = c(0, 0))) +
coord_cartesian(clip = "off") +
labs(y = "Response pattern \n\ probability",
x = "Age (in years)") +
theme(legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 14),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 16),
axis.text.x = element_markdown(),
strip.background = element_blank(),
strip.text = element_text(size = 16),
panel.spacing = unit(.4, "cm")) +
guides(fill = guide_legend(order = 1, nrow = 2, byrow = TRUE,
override.aes = list(order = c(1, 3, 2, 6, 4, 5, 7, 8))),
color = guide_legend(order = 1, nrow = 2, byrow = TRUE,
override.aes = list(order = c(1, 3, 2, 6, 4, 5, 7, 8))))df.exp2_children = read_csv("../../data/exp2_child_clean.csv")
df.exp2_children = df.exp2_children %>%
select(scenario_order, gender_order, question, participant_age, age_float, direct, absent, scenario, response_id, gender, lan_spoken)
duplicates_check <- df.exp2_children %>%
group_by(response_id) %>%
summarise(row_count = n()) %>%
filter(row_count != 6)
print(duplicates_check)## # A tibble: 0 × 2
## # ℹ 2 variables: response_id <dbl>, row_count <int>
# gender
df.exp2_children %>%
group_by(gender) %>%
summarise(n = n_distinct(response_id)) %>%
print()## # A tibble: 3 × 2
## gender n
## <chr> <int>
## 1 f 205
## 2 m 178
## 3 o 1
# language spoken
df.exp2_children %>%
mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
group_by(en_occurrence) %>%
summarise(n = n_distinct(response_id)) %>%
print()## # A tibble: 2 × 2
## en_occurrence n
## <chr> <int>
## 1 en 379
## 2 <NA> 5
df.exp2_children_different = df.exp2_children %>%
mutate(response_pattern = case_when(
absent == 1 & direct == 1 ~ "both",
absent == 0 & direct == 0 ~ "neither",
TRUE ~ "different"
)) %>%
filter(response_pattern == "different") %>%
mutate(difference = absent - direct,
difference = recode(difference, `-1` = 0))
df.exp2_children_both = df.exp2_children %>%
mutate(response_pattern = case_when(
absent == 1 & direct == 1 ~ "both",
absent == 0 & direct == 0 ~ "neither",
TRUE ~ "different"
)) %>%
filter(response_pattern == "both")
df.exp2_children_both = df.exp2_children_both %>%
mutate(difference = 1) %>%
bind_rows(df.exp2_children_both %>%
mutate(difference = 0))
df.exp2_children_new = df.exp2_children_different %>%
bind_rows(df.exp2_children_both)
df.model = df.exp2_children_new %>%
mutate(
# Convert to factor explicitly
question = as_factor(question),
question = fct_relevel(question,
"fault",
"cause",
"lexical")
)
fit.brm.absence.children = brm(formula = difference ~ 1 + question * age_float + (1 | response_id) + (1 | scenario),
data = df.model,
family = "bernoulli",
seed = 1,
iter = 4000,
warmup = 1000,
control = list(adapt_delta = .99999),
file = "cache/absence_cause_children_difference")
fit.brm.absence.children ## Family: bernoulli
## Links: mu = logit
## Formula: difference ~ 1 + question * age_float + (1 | response_id) + (1 | scenario)
## Data: df.model (Number of observations: 2245)
## Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
## total post-warmup draws = 12000
##
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 380)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.34 0.12 1.12 1.58 1.00 4434 7760
##
## ~scenario (Number of levels: 2)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.97 1.11 0.07 4.04 1.00 3479 4475
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -2.30 0.87 -4.03 -0.37 1.00 6480 4943
## question1 -1.36 0.31 -1.97 -0.77 1.00 12252 8517
## question2 0.24 0.29 -0.33 0.81 1.00 11577 9718
## age_float 0.29 0.05 0.18 0.39 1.00 9560 9571
## question1:age_float 0.47 0.05 0.37 0.58 1.00 11648 9020
## question2:age_float 0.12 0.05 0.03 0.21 1.00 11685 9170
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# effect of question
fit.brm.absence.children %>%
emmeans(spec = pairwise ~ question,
type = "response")## $emmeans
## question response lower.HPD upper.HPD
## fault 0.7431 0.492150 1.000
## cause 0.6105 0.324045 0.970
## lexical 0.0413 0.000509 0.139
##
## Point estimate displayed: median
## Results are back-transformed from the logit scale
## HPD interval probability: 0.95
##
## $contrasts
## contrast odds.ratio lower.HPD upper.HPD
## fault / cause 1.84 1.38 2.37
## fault / lexical 67.18 41.17 101.00
## cause / lexical 36.46 22.76 53.50
##
## Point estimate displayed: median
## Results are back-transformed from the log odds ratio scale
## HPD interval probability: 0.95
##
## Estimate 2.5 % 97.5 %
## 0.29 0.184 0.395
##
## Term: age_float
## Type: link
## Comparison: mean(dY/dX)
## Columns: term, contrast, estimate, conf.low, conf.high, predicted_lo, predicted_hi, predicted, tmp_idx
# interaction effect
fit.brm.absence.children %>%
emtrends(specs = pairwise ~ question,
var = "age_float")## $emtrends
## question age_float.trend lower.HPD upper.HPD
## fault 0.758 0.621 0.910
## cause 0.405 0.284 0.525
## lexical -0.307 -0.485 -0.119
##
## Point estimate displayed: median
## HPD interval probability: 0.95
##
## $contrasts
## contrast estimate lower.HPD upper.HPD
## fault - cause 0.353 0.210 0.504
## fault - lexical 1.065 0.847 1.277
## cause - lexical 0.712 0.515 0.913
##
## Point estimate displayed: median
## HPD interval probability: 0.95
df.exp2_children_new %>%
group_by(question) %>%
summarise(
mean = mean(difference),
ci_lower = mean_cl_normal(difference)$ymin,
ci_upper = mean_cl_normal(difference)$ymax
)## # A tibble: 3 × 4
## question mean ci_lower ci_upper
## <chr> <dbl> <dbl> <dbl>
## 1 cause 0.576 0.541 0.611
## 2 fault 0.656 0.621 0.690
## 3 lexical 0.0828 0.0628 0.103
df.data = expand_grid(age_float = seq(3, 9, 0.1),
question = c("cause", "lexical", "fault"))
df.prediction = fit.brm.absence.children %>%
fitted(newdata = df.data,
re_formula = NA,
probs = c(0.1, 0.9)) %>%
as_tibble() %>%
bind_cols(df.data) %>%
clean_names()
df.linear = df.model %>%
mutate(participant_age = factor(participant_age,
levels = c(3, 4, 5, 6, 7, 8, 9),
labels = c("3", "4", "5", "6", "7", "8", "9")))
df.means = df.linear %>%
group_by(participant_age, question) %>%
summarize(
response = Hmisc::smean.cl.boot(difference)) %>%
mutate(index = c("response", "low", "high")) %>%
ungroup() %>%
pivot_wider(names_from = index,
values_from = response) %>%
mutate(participant_age = as.numeric(as.character(participant_age)))
p3 = ggplot(mapping = aes(x = age_float,
y = difference,
color = question)) +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") +
geom_smooth(data = df.prediction,
mapping = aes(y = estimate,
ymin = q10,
ymax = q90,
color = question,
fill = question),
stat = "identity",
alpha = 0.2) +
geom_pointrange(data = df.means,
mapping = aes(x = participant_age,
y = response,
ymin = low,
ymax = high,
fill = question,
shape = question),
color = "black",
size = 0.5,
show.legend = FALSE,
alpha = 0.8,
position = position_dodge(width = 0.65)) +
ylab(element_blank()) +
scale_fill_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8", "fault" = "#4DAF4A"),
breaks=c("cause", "lexical", "fault"),
labels = c("caused", "lexical", "fault")) +
scale_color_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8", "fault" = "#4DAF4A"),
breaks=c("cause", "lexical", "fault"),
labels = c("caused", "lexical", "fault")) +
scale_shape_manual(values = c(23, 21, 24)) +
scale_y_continuous(breaks = seq(0, 1, 0.25),
labels = c("100% \n direct", "75%", "50%", "75%", "100% \n absent"),
limits = c(0, 1)) +
scale_x_continuous(name = "Age (in years)",
breaks = 3:9,
labels = c("3", "4", "5", "6", "7", "8", "9")) +
labs(y = "Probability of selecting \n direct or absent cause") +
theme(legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 14),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 16),
axis.text.x = element_text(size = 12.5))df.plot = df.exp2_children %>%
pivot_wider(names_from = question,
values_from = c(direct, absent)) %>%
pivot_longer(cols = direct_cause:absent_fault,
names_to = c("type", ".value"),
names_sep = "_") %>%
mutate(response_pattern = case_when(
cause == 0 & fault == 0 & lexical == 0 ~ "none",
cause == 0 & fault == 0 & lexical == 1 ~ "lexical only",
cause == 0 & fault == 1 & lexical == 0 ~ "fault only",
cause == 1 & fault == 0 & lexical == 0 ~ "caused only",
cause == 0 & fault == 1 & lexical == 1 ~ "lexical + fault",
cause == 1 & fault == 0 & lexical == 1 ~ "caused + lexical",
cause == 1 & fault == 1 & lexical == 0 ~ "caused + fault",
cause == 1 & fault == 1 & lexical == 1 ~ "all")) %>%
group_by(participant_age, type, response_pattern) %>%
summarize(n = n()) %>%
ungroup() %>%
group_by(participant_age, type) %>%
mutate(percentage = n / sum(n) * 100) %>%
ungroup() %>%
mutate(type = factor(type, levels = c("direct", "absent")),
response_pattern = factor(response_pattern, levels = c("caused only", "lexical only", "fault only", "caused + lexical", "caused + fault", "lexical + fault", "all", "none")))
df.labels = df.plot %>%
group_by(participant_age, type) %>%
summarize(count = sum(n)/2) %>%
ungroup() %>%
mutate(age = as.character(factor(participant_age,
levels = c(3, 4, 5, 6, 7, 8, 9),
labels = c("3", "4", "5", "6", "7", "8", "9"))),
count = ifelse(type == "direct" & participant_age == 3, str_c("n=", count), count),
label = str_c("<span style='font-size:16px;'>", age, "</span>"),
label = ifelse(type == "direct",
str_c("<span style='font-size:16px;'>", age, "</span>", "<br>",
"<span style='font-size:12px;'>(", count, ")", "</span>"), label),
type = factor(type, levels = c("direct", "absent")))
df.plot = df.plot %>%
left_join(df.labels %>%
select(participant_age, type, label),
by = c("participant_age", "type")) %>%
mutate(label = factor(label, levels = df.labels$label))
p4 = ggplot(data = df.plot,
mapping = aes(x = label,
y = percentage,
group = response_pattern,
color = response_pattern,
fill = response_pattern)) +
geom_col(color = "black") +
facet_wrap(~type,
scales = "free_x") +
scale_fill_manual(values = c("caused only" = "#dc3410",
"fault only" = "#4DAF4A",
"lexical only" = "#377EB8",
"caused + fault" = "#A0522D",
"lexical + fault" = "#40E0D0",
"caused + lexical" = "#800080",
"all" = "#FFFFFF",
"none" = "#000000")) +
scale_color_manual(values = c("caused only" = "#dc3410",
"fault only" = "#4DAF4A",
"lexical only" = "#377EB8",
"caused + fault" = "#A0522D",
"lexical + fault" = "#40E0D0",
"caused + lexical" = "#800080",
"all" = "#FFFFFF")) +
scale_shape_manual(values = c(21, 23)) +
scale_y_continuous(breaks = seq(0, 100, 25),
labels = str_c(seq(0, 100, 25), "%"),
expand = expansion(add = c(0, 0))) +
scale_x_discrete(expand = expansion(add = c(0, 0))) +
coord_cartesian(clip = "off") +
labs(y = "Response pattern \n\ probability",
x = "Age (in years)") +
theme(legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 14),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 16),
axis.text.x = element_markdown(),
strip.background = element_blank(),
strip.text = element_text(size = 16),
panel.spacing = unit(.4, "cm")) +
guides(fill = guide_legend(order = 1, nrow = 2, byrow = TRUE,
override.aes = list(order = c(1, 3, 2, 6, 4, 5, 7, 8))),
color = guide_legend(order = 1, nrow = 2, byrow = TRUE,
override.aes = list(order = c(1, 3, 2, 6, 4, 5, 7, 8))))# dummy coding
options(contrasts = c("contr.treatment", "contr.poly"))
df.model = df.exp1_children %>%
filter(age_float >= 4, age_float <= 10,
question != "fault") %>%
mutate(question = as_factor(question),
question = fct_relevel(question,
"lexical"))
# Check contrast values
contrasts(df.model$question)## cause
## lexical 0
## cause 1
fit.brm.cause.children.distal = brm(formula = distal ~ 1 + question + (1 | response_id) + (1 | scenario),
data = df.model,
family = "bernoulli",
seed = 1,
iter = 4000,
warmup = 1000,
file = "cache/cause_children_distal")
fit.brm.cause.children.distal## Family: bernoulli
## Links: mu = logit
## Formula: distal ~ 1 + question + (1 | response_id) + (1 | scenario)
## Data: df.model (Number of observations: 1408)
## Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
## total post-warmup draws = 12000
##
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 352)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.10 0.15 0.81 1.41 1.00 2675 5188
##
## ~scenario (Number of levels: 2)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.09 0.89 0.15 3.60 1.00 1662 622
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -2.63 0.73 -4.26 -0.98 1.00 1525 785
## questioncause 3.46 0.21 3.06 3.89 1.00 5545 7397
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
df.model = df.exp1_children %>%
filter(age_float >= 4, age_float <= 10,
question != "fault") %>%
mutate(question = as_factor(question),
question = fct_relevel(question,
"lexical"))
# Check contrast values
contrasts(df.model$question)## cause
## lexical 0
## cause 1
# removed randmon intercept for scenario to prevent divergent transitions
fit.brm.cause.children.distal.age = brm(formula = distal ~ 1 + question*age_float + (1 | response_id),
data = df.model,
family = "bernoulli",
seed = 1,
iter = 4000,
warmup = 1000,
file = "cache/cause_children_distal_age")
fit.brm.cause.children.distal.age ## Family: bernoulli
## Links: mu = logit
## Formula: distal ~ 1 + question * age_float + (1 | response_id)
## Data: df.model (Number of observations: 1408)
## Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
## total post-warmup draws = 12000
##
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 352)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.07 0.15 0.78 1.38 1.00 4094 6553
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept -2.23 0.63 -3.48 -1.01 1.00 9936
## questioncause 1.69 0.70 0.32 3.08 1.00 10136
## age_float -0.07 0.09 -0.24 0.11 1.00 9605
## questioncause:age_float 0.25 0.10 0.06 0.45 1.00 9990
## Tail_ESS
## Intercept 9183
## questioncause 9017
## age_float 9554
## questioncause:age_float 8416
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
df.model = df.exp1_children %>%
filter(age_float >= 4, age_float <= 10,
question != "fault") %>%
mutate(question = as_factor(question),
question = fct_relevel(question,
"lexical"))
# Check contrast values
contrasts(df.model$question)## cause
## lexical 0
## cause 1
fit.brm.cause.children.proximal = brm(formula = proximal ~ 1 + question + (1 | response_id) + + (1 | scenario),
data = df.model,
family = "bernoulli",
seed = 1,
iter = 4000,
warmup = 1000,
file = "cache/cause_children_proximal")
fit.brm.cause.children.proximal## Family: bernoulli
## Links: mu = logit
## Formula: proximal ~ 1 + question + (1 | response_id) + +(1 | scenario)
## Data: df.model (Number of observations: 1408)
## Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
## total post-warmup draws = 12000
##
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 352)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.15 0.15 0.87 1.45 1.00 3301 5318
##
## ~scenario (Number of levels: 2)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.10 0.86 0.17 3.35 1.00 3424 4098
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.30 0.76 0.54 3.87 1.00 3179 2033
## questioncause -3.17 0.20 -3.56 -2.79 1.00 6843 7724
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
df.model = df.exp1_children %>%
filter(age_float >= 4, age_float <= 10,
question != "fault") %>%
mutate(question = as_factor(question),
question = fct_relevel(question,
"lexical"))
# Check contrast values
contrasts(df.model$question)## cause
## lexical 0
## cause 1
fit.brm.cause.children.proximal.age = brm(formula = proximal ~ 1 + question*age_float + (1 | response_id) + (1 | scenario),
data = df.model,
family = "bernoulli",
seed = 1,
iter = 4000,
warmup = 1000,
file = "cache/cause_children_proximal_age")
fit.brm.cause.children.proximal.age## Family: bernoulli
## Links: mu = logit
## Formula: proximal ~ 1 + question * age_float + (1 | response_id) + (1 | scenario)
## Data: df.model (Number of observations: 1408)
## Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
## total post-warmup draws = 12000
##
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 352)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.17 0.15 0.89 1.47 1.00 3289 5576
##
## ~scenario (Number of levels: 2)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.12 0.88 0.18 3.52 1.00 3341 1837
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 1.12 0.98 -0.99 3.10 1.00 2661
## questioncause -1.00 0.65 -2.29 0.27 1.00 5097
## age_float 0.18 0.09 0.01 0.35 1.00 5019
## questioncause:age_float -0.32 0.10 -0.52 -0.14 1.00 4949
## Tail_ESS
## Intercept 1345
## questioncause 7207
## age_float 7414
## questioncause:age_float 6618
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
df.data = expand_grid(age_float = seq(4, 9, 0.1),
question = c("cause", "lexical"))
df.prediction = fit.brm.cause.children.distal.age %>%
fitted(newdata = df.data,
re_formula = NA,
probs = c(0.1, 0.9)) %>%
as_tibble() %>%
bind_cols(df.data) %>%
mutate(type = "distal") %>%
bind_rows(fit.brm.cause.children.proximal.age %>%
fitted(newdata = df.data,
re_formula = NA,
probs = c(0.1, 0.9)) %>%
as_tibble() %>%
bind_cols(df.data) %>%
mutate(type = "proximal")) %>%
clean_names()
df.children = df.exp1_children %>%
filter(age_float >= 4, age_float <= 10,
question != "fault") %>%
pivot_longer(cols = distal:proximal,
names_to = "type",
values_to = "response")
df.means = df.children %>%
mutate(participant_age = factor(participant_age,
levels = c(4, 5, 6, 7, 8, 9),
labels = c("4", "5", "6", "7", "8", "9")),
question = recode(question, "cause" = "cause")) %>%
group_by(participant_age, question, type) %>%
summarize(
response = Hmisc::smean.cl.boot(response)) %>%
mutate(index = c("response", "low", "high")) %>%
ungroup() %>%
pivot_wider(names_from = index,
values_from = response) %>%
mutate(participant_age = as.numeric(as.character(participant_age)))
df.means = df.means %>%
mutate(question = factor(question, levels = c("cause", "lexical")))
custom_labels = as_labeller(c("distal" = "Distal Cause", "proximal" = "Proximal Cause"))
ggplot(mapping = aes(x = age_float,
y = response,
color = question)) +
geom_hline(yintercept=0.50, linetype="dashed", color = "black") +
geom_smooth(data = df.prediction,
mapping = aes(y = estimate,
ymin = q10,
ymax = q90,
color = question,
fill = question),
stat = "identity",
alpha = 0.2,
show.legend = F) +
geom_pointrange(data = df.means,
mapping = aes(x = participant_age,
y = response,
ymin = low,
ymax = high,
fill = question,
shape = question),
color = "black",
size = 0.5,
show.legend = TRUE,
alpha = 0.8,
position = position_dodge(width = 0.5)) +
facet_wrap(~type,
labeller = custom_labels) +
scale_fill_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8"),
breaks=c("cause", "lexical")) +
scale_color_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8"),
breaks=c("cause", "lexical")) +
scale_shape_manual(values = c(23, 21)) +
scale_y_continuous(breaks = seq(0, 1, 0.25),
labels = str_c(seq(0, 1, 0.25) * 100, "%")) +
scale_x_continuous(name = "Age",
breaks = 4:9,
labels = c("4", "5", "6", "7", "8", "9")) +
labs(y = "Probability of selecting \n causal candidate") +
theme(legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 14),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 16),
axis.text.x = element_text(size = 12.5),
strip.text = element_text(size = 16))df.model = df.exp2_children %>%
filter(age_float >= 4, age_float <= 10,
question != "fault") %>%
mutate(question = as_factor(question),
question = fct_relevel(question,
"lexical"))
# Check contrast values
contrasts(df.model$question)## cause
## lexical 0
## cause 1
fit.brm.cause.children.absent.age = brm(formula = absent ~ 1 + question*age_float + (1 | response_id) + (1 | scenario),
data = df.model,
family = "bernoulli",
seed = 1,
iter = 4000,
warmup = 1000,
file = "cache/cause_children_absent_age")
fit.brm.cause.children.absent.age ## Family: bernoulli
## Links: mu = logit
## Formula: absent ~ 1 + question * age_float + (1 | response_id) + (1 | scenario)
## Data: df.model (Number of observations: 1280)
## Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
## total post-warmup draws = 12000
##
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 320)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 2.03 0.23 1.59 2.46 1.03 125 1082
##
## ~scenario (Number of levels: 2)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.27 1.71 0.02 6.38 1.18 16 23
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept -2.87 1.59 -6.54 -0.23 1.15 17
## questioncause 0.00 0.99 -2.02 1.85 1.02 216
## age_float -0.21 0.16 -0.52 0.06 1.08 30
## questioncause:age_float 0.74 0.16 0.44 1.07 1.01 607
## Tail_ESS
## Intercept 26
## questioncause 3064
## age_float 169
## questioncause:age_float 1954
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
df.model = df.exp2_children %>%
filter(age_float >= 4, age_float <= 10,
question != "fault") %>%
mutate(question = as_factor(question),
question = fct_relevel(question,
"lexical"))
# Check contrast values
contrasts(df.model$question)## cause
## lexical 0
## cause 1
fit.brm.cause.children.direct.age = brm(formula = direct ~ 1 + question*age_float + (1 | response_id) + (1 | scenario),
data = df.model,
family = "bernoulli",
seed = 1,
iter = 4000,
warmup = 1000,
file = "cache/cause_children_direct_age")
fit.brm.cause.children.direct.age## Family: bernoulli
## Links: mu = logit
## Formula: direct ~ 1 + question * age_float + (1 | response_id) + (1 | scenario)
## Data: df.model (Number of observations: 1280)
## Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
## total post-warmup draws = 12000
##
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 320)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.90 0.20 1.53 2.32 1.00 2490 4111
##
## ~scenario (Number of levels: 2)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.91 0.95 0.03 3.59 1.00 421 195
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 1.56 1.06 -0.48 3.76 1.01 449
## questioncause -0.26 0.88 -1.96 1.42 1.01 1004
## age_float 0.27 0.13 0.02 0.53 1.00 4269
## questioncause:age_float -0.58 0.14 -0.85 -0.33 1.00 1306
## Tail_ESS
## Intercept 195
## questioncause 547
## age_float 5202
## questioncause:age_float 6537
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
df.data = expand_grid(age_float = seq(3, 9, 0.1),
question = c("cause", "lexical"))
df.prediction = fit.brm.cause.children.absent.age %>%
fitted(newdata = df.data,
re_formula = NA,
probs = c(0.1, 0.9)) %>%
as_tibble() %>%
bind_cols(df.data) %>%
mutate(type = "absent") %>%
bind_rows(fit.brm.cause.children.direct.age %>%
fitted(newdata = df.data,
re_formula = NA,
probs = c(0.1, 0.9)) %>%
as_tibble() %>%
bind_cols(df.data) %>%
mutate(type = "direct")) %>%
clean_names()
df.children = df.exp2_children %>%
filter(age_float >= 4, age_float <= 10,
question != "fault") %>%
pivot_longer(cols = absent:direct,
names_to = "type",
values_to = "response")
df.means = df.children %>%
mutate(participant_age = factor(participant_age,
levels = c(4, 5, 6, 7, 8, 9),
labels = c("4", "5", "6", "7", "8", "9")),
question = recode(question, "cause" = "cause")) %>%
group_by(participant_age, question, type) %>%
summarize(
response = Hmisc::smean.cl.boot(response)) %>%
mutate(index = c("response", "low", "high")) %>%
ungroup() %>%
pivot_wider(names_from = index,
values_from = response) %>%
mutate(participant_age = as.numeric(as.character(participant_age)))
df.means = df.means %>%
mutate(question = factor(question, levels = c("cause", "lexical")))
custom_labels = as_labeller(c("absent" = "Absent Cause", "direct" = "Direct Cause"))
ggplot(mapping = aes(x = age_float,
y = response,
color = question)) +
geom_hline(yintercept=0.50, linetype="dashed", color = "black") +
geom_smooth(data = df.prediction,
mapping = aes(y = estimate,
ymin = q10,
ymax = q90,
color = question,
fill = question),
stat = "identity",
alpha = 0.2,
show.legend = F) +
geom_pointrange(data = df.means,
mapping = aes(x = participant_age,
y = response,
ymin = low,
ymax = high,
fill = question,
shape = question),
color = "black",
size = 0.5,
show.legend = TRUE,
alpha = 0.8,
position = position_dodge(width = 0.5)) +
facet_wrap(~type,
labeller = custom_labels) +
scale_fill_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8"),
breaks=c("cause", "lexical")) +
scale_color_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8"),
breaks=c("cause", "lexical")) +
scale_shape_manual(values = c(23, 21)) +
scale_y_continuous(breaks = seq(0, 1, 0.25),
labels = str_c(seq(0, 1, 0.25) * 100, "%")) +
scale_x_continuous(name = "Age",
breaks = 4:9,
labels = c("4", "5", "6", "7", "8", "9")) +
labs(y = "Probability of selecting \n causal candidate") +
theme(legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 14),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 16),
axis.text.x = element_text(size = 12.5),
strip.text = element_text(size = 16))## R version 4.3.3 (2024-02-29)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Los_Angeles
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
## [4] dplyr_1.1.4 purrr_1.0.2 readr_2.1.5
## [7] tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1
## [10] tidyverse_2.0.0 RColorBrewer_1.1-3 ggtext_0.1.2
## [13] marginaleffects_0.22.0 knitr_1.45 patchwork_1.2.0
## [16] brms_2.21.0 Rcpp_1.0.12 emmeans_1.10.1
## [19] janitor_2.2.0 xtable_1.8-4 tidytext_0.4.1
##
## loaded via a namespace (and not attached):
## [1] gridExtra_2.3 inline_0.3.19 rlang_1.1.3
## [4] magrittr_2.0.3 snakecase_0.11.1 matrixStats_1.2.0
## [7] compiler_4.3.3 loo_2.7.0 systemfonts_1.0.6
## [10] vctrs_0.6.5 reshape2_1.4.4 pkgconfig_2.0.3
## [13] crayon_1.5.2 fastmap_1.1.1 backports_1.4.1
## [16] utf8_1.2.4 rmarkdown_2.26 markdown_1.12
## [19] tzdb_0.4.0 ragg_1.3.0 bit_4.0.5
## [22] xfun_0.43 cachem_1.0.8 jsonlite_1.8.8
## [25] tinytable_0.2.1 collapse_2.0.16 highr_0.10
## [28] SnowballC_0.7.1 parallel_4.3.3 cluster_2.1.6
## [31] R6_2.5.1 bslib_0.7.0 stringi_1.8.4
## [34] StanHeaders_2.32.6 rpart_4.1.23 jquerylib_0.1.4
## [37] estimability_1.5 bookdown_0.38 rstan_2.32.6
## [40] base64enc_0.1-3 bayesplot_1.11.1 nnet_7.3-19
## [43] Matrix_1.6-5 timechange_0.3.0 tidyselect_1.2.1
## [46] rstudioapi_0.16.0 abind_1.4-5 yaml_2.3.8
## [49] codetools_0.2-19 pkgbuild_1.4.4 lattice_0.22-5
## [52] plyr_1.8.9 withr_3.0.0 bridgesampling_1.1-2
## [55] posterior_1.5.0 coda_0.19-4.1 evaluate_0.23
## [58] foreign_0.8-86 RcppParallel_5.1.7 xml2_1.3.6
## [61] pillar_1.9.0 janeaustenr_1.0.0 tensorA_0.36.2.1
## [64] checkmate_2.3.1 stats4_4.3.3 insight_0.20.5
## [67] distributional_0.4.0 generics_0.1.3 vroom_1.6.5
## [70] hms_1.1.3 commonmark_1.9.1 rstantools_2.4.0
## [73] munsell_0.5.1 scales_1.3.0 glue_1.7.0
## [76] Hmisc_5.1-2 tools_4.3.3 diffobj_0.3.5
## [79] data.table_1.15.4 tokenizers_0.3.0 mvtnorm_1.2-4
## [82] QuickJSR_1.1.3 colorspace_2.1-0 nlme_3.1-164
## [85] htmlTable_2.4.2 Formula_1.2-5 cli_3.6.2
## [88] textshaping_0.3.7 fansi_1.0.6 Brobdingnag_1.2-9
## [91] gtable_0.3.5 sass_0.4.9 digest_0.6.35
## [94] farver_2.1.2 htmlwidgets_1.6.4 htmltools_0.5.8.1
## [97] lifecycle_1.0.4 gridtext_0.1.5 bit64_4.0.5