1 Load packages

2 Settings

# set theme 
theme_set(theme_classic())

# sum coding
options(contrasts = c("contr.sum","contr.poly"))

# suppress grouping warning 
options(dplyr.summarise.inform = F)

3 EXPERIMENT 1: CHAIN CASES

3.1 DATA

3.1.1 Read in data

df.exp1_children = read_csv("../../data/exp1_child_clean.csv") 

df.exp1_children = df.exp1_children %>%
  select(scenario_order, gender_order, question, participant_age, age_float, distal, proximal, scenario, response_id, gender, lan_spoken)

3.1.2 Demographics

# gender
df.exp1_children %>%
  group_by(gender) %>%
  summarise(n = n_distinct(response_id)) %>%
  print()
## # A tibble: 2 × 2
##   gender     n
##   <chr>  <int>
## 1 f        232
## 2 m        181
# language spoken
df.exp1_children %>%
    mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
    group_by(en_occurrence) %>%
    summarise(n = n_distinct(response_id)) %>%
    print()
## # A tibble: 3 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en              396
## 2 non-en            1
## 3 <NA>             16

3.2 STATS

3.2.1 Bayesian Model

df.exp1_children_different = df.exp1_children %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = distal - proximal,
        difference = recode(difference, `-1` = 0))

df.exp1_children_both = df.exp1_children %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both") 


 df.exp1_children_both = df.exp1_children_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.exp1_children_both %>% 
              mutate(difference = 0)) 

df.exp1_children_new = df.exp1_children_different %>% 
  bind_rows(df.exp1_children_both) 
  
df.model = df.exp1_children_new %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "fault",   
                                "cause", 
                                "lexical"))

fit.brm.chain.children = brm(formula = difference ~ 1 + question * age_float + (1 | response_id) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/chain_cause_children_difference") 

fit.brm.chain.children 
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * age_float + (1 | response_id) + (1 | scenario) 
##    Data: df.model (Number of observations: 2432) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 412) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.90      0.10     0.71     1.09 1.00     3658     5727
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.97      0.73     0.17     3.01 1.00     1465      638
## 
## Regression Coefficients:
##                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              -1.54      0.69    -2.91     0.00 1.00     1329      800
## question1              -0.93      0.26    -1.44    -0.42 1.00     4724     5887
## question2               0.39      0.24    -0.08     0.87 1.00     5227     6722
## age_float               0.21      0.04     0.14     0.29 1.00     4310     7293
## question1:age_float     0.37      0.04     0.29     0.46 1.00     5277     5741
## question2:age_float     0.05      0.04    -0.02     0.12 1.00     5102     5888
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

3.2.2 Follow-up analysis

# effect of question 
fit.brm.chain.children %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
##  question response lower.HPD upper.HPD
##  fault      0.7812    0.5442     0.974
##  cause      0.6290    0.3447     0.920
##  lexical    0.0852    0.0118     0.239
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast        odds.ratio lower.HPD upper.HPD
##  fault / cause         2.11      1.63      2.68
##  fault / lexical      38.34     26.02     52.41
##  cause / lexical      18.19     13.12     24.48
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95
# effect of age
fit.brm.chain.children %>%
  avg_slopes(variable = "age_float",
             type = "link")
## 
##  Estimate 2.5 % 97.5 %
##     0.218 0.144  0.293
## 
## Term: age_float
## Type:  link 
## Comparison: mean(dY/dX)
## Columns: term, contrast, estimate, conf.low, conf.high, predicted_lo, predicted_hi, predicted, tmp_idx
# interaction effect 
fit.brm.chain.children %>% 
  emtrends(specs = pairwise ~ question,
           var = "age_float")
## $emtrends
##  question age_float.trend lower.HPD upper.HPD
##  fault              0.589     0.477    0.7042
##  cause              0.266     0.173    0.3570
##  lexical           -0.210    -0.339   -0.0777
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast        estimate lower.HPD upper.HPD
##  fault - cause      0.323     0.195     0.453
##  fault - lexical    0.799     0.637     0.963
##  cause - lexical    0.477     0.329     0.629
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95

3.2.3 Means and Confidence Interval

df.exp1_children_new %>% 
  group_by(question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )
## # A tibble: 3 × 4
##   question  mean ci_lower ci_upper
##   <chr>    <dbl>    <dbl>    <dbl>
## 1 cause    0.602   0.568     0.635
## 2 fault    0.708   0.677     0.739
## 3 lexical  0.116   0.0941    0.139

3.3 PLOTS

3.3.1 Overall Responses by Age

df.data = expand_grid(age_float = seq(3, 9, 0.1),
                      question = c("cause", "lexical", "fault"))

df.prediction = fit.brm.chain.children %>% 
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>% 
  as_tibble() %>% 
  bind_cols(df.data) %>% 
  clean_names()

df.linear = df.model %>% 
  mutate(participant_age = factor(participant_age, 
                            levels = c(3, 4, 5, 6, 7, 8, 9), 
                            labels = c("3", "4", "5", "6", "7", "8", "9")))

df.means = df.linear %>% 
  group_by(participant_age, question) %>% 
  summarize(
    response = Hmisc::smean.cl.boot(difference)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>% 
  mutate(participant_age = as.numeric(as.character(participant_age)))

p1 = ggplot(mapping = aes(x = age_float, 
                     y = difference,
                     color = question)) + 
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90,
                            color = question,
                            fill = question),
              stat = "identity",
              alpha = 0.2) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = participant_age,
                                y = response,
                                ymin = low,
                                ymax = high,
                                fill = question,
                                shape = question),
                  color = "black",
                  size = 0.5,
                  show.legend = FALSE,
                  alpha = 0.8,
                  position = position_dodge(width = 0.5)) +
  ylab(element_blank()) +
  scale_fill_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8", "fault" = "#4DAF4A"), 
                    breaks=c("cause", "lexical", "fault"),
                    labels = c("caused", "lexical", "fault")) +
  scale_color_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8", "fault" = "#4DAF4A"), 
                     breaks=c("cause", "lexical", "fault"),
                     labels = c("caused", "lexical", "fault")) +
  scale_shape_manual(values = c(23, 21, 24)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = c("100% \n proximal", "75%", "50%", "75%", "100% \n distal"),
                     limits = c(0, 1)) + 
  scale_x_continuous(name = "Age (in years)",
                     breaks = 3:9,
                     labels = c("3", "4", "5", "6", "7", "8", "9")) +
  labs(y = "Probability of selecting \n proximal or distal cause") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_text(size = 12.5))

3.3.2 Responses Patterns by Age

df.plot = df.exp1_children %>% 
  pivot_wider(names_from = question,
              values_from = c(distal, proximal)) %>% 
  pivot_longer(cols = distal_cause:proximal_fault,
               names_to = c("type", ".value"),
               names_sep = "_") %>% 
  mutate(response_pattern = case_when(
    cause == 0 & fault == 0 & lexical == 0 ~ "none",
    cause == 0 & fault == 0 & lexical == 1 ~ "lexical only",
    cause == 0 & fault == 1 & lexical == 0 ~ "fault only",
    cause == 1 & fault == 0 & lexical == 0 ~ "caused only",
    cause == 0 & fault == 1 & lexical == 1 ~ "lexical + fault",
    cause == 1 & fault == 0 & lexical == 1 ~ "caused + lexical",
    cause == 1 & fault == 1 & lexical == 0 ~ "caused + fault",
    cause == 1 & fault == 1 & lexical == 1 ~ "all"))  %>% 
  group_by(participant_age, type, response_pattern) %>%
  summarize(n = n()) %>%
  ungroup() %>%
  group_by(participant_age, type) %>%
  mutate(percentage = n / sum(n) * 100) %>%
  ungroup() %>%
  mutate(type = factor(type, levels = c("proximal", "distal")),
         response_pattern = factor(response_pattern, levels = c("caused only", "lexical only", "fault only", "caused + lexical", "caused + fault", "lexical + fault",  "all", "none")))


df.labels = df.plot %>%
  group_by(participant_age, type) %>%
  summarize(count = sum(n)/2) %>%
  ungroup() %>%
  mutate(age = as.character(factor(participant_age,
                                   levels = c(3, 4, 5, 6, 7, 8, 9),
                                   labels = c("3", "4", "5", "6", "7", "8", "9"))),
         count = ifelse(type == "proximal" & participant_age == 3, str_c("n=", count), count),
         label = str_c("<span style='font-size:16px;'>", age, "</span>"),
         label = ifelse(type == "proximal",
                        str_c("<span style='font-size:16px;'>", age, "</span>", "<br>",
                              "<span style='font-size:12px;'>(", count, ")", "</span>"), label),
         type = factor(type, levels = c("proximal", "distal")))



df.plot = df.plot %>%
  left_join(df.labels %>%
              select(participant_age, type, label),
            by = c("participant_age", "type")) %>%
  mutate(label = factor(label, levels = df.labels$label))



p2 = ggplot(data = df.plot,
            mapping = aes(x = label,
                          y = percentage,
                          group = response_pattern,
                          color = response_pattern,
                          fill = response_pattern)) +
  geom_col(color = "black") +
  facet_wrap(~type,
             scales = "free_x") +
    scale_fill_manual(values = c("caused only" = "#dc3410",
                               "fault only" = "#4DAF4A",
                               "lexical only" = "#377EB8",
                               "caused + fault" = "#A0522D",
                               "lexical + fault" = "#40E0D0",
                               "caused + lexical" = "#800080",
                               "all" = "#FFFFFF",
                               "none" = "#000000")) +
  scale_color_manual(values = c("caused only" = "#dc3410",
                               "fault only" = "#4DAF4A",
                               "lexical only" = "#377EB8",
                               "caused + fault" = "#A0522D",
                               "lexical + fault" = "#40E0D0",
                               "caused + lexical" = "#800080",
                               "all" = "#FFFFFF")) +
  scale_shape_manual(values = c(21, 23)) +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     labels = str_c(seq(0, 100, 25), "%"),
                     expand = expansion(add = c(0, 0))) +
  scale_x_discrete(expand = expansion(add = c(0, 0))) +
  coord_cartesian(clip = "off") +
  labs(y = "Response pattern \n\ probability",
       x = "Age (in years)") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_markdown(),
        strip.background = element_blank(),
        strip.text = element_text(size = 16),
        panel.spacing = unit(.4, "cm")) +
 guides(fill = guide_legend(order = 1, nrow = 2, byrow = TRUE,
                             override.aes = list(order = c(1, 3, 2, 6, 4, 5, 7, 8))),
         color = guide_legend(order = 1, nrow = 2, byrow = TRUE,
                              override.aes = list(order = c(1, 3, 2, 6, 4, 5, 7, 8))))

3.3.3 Combine Plots

design = "
122
"    

plot = wrap_plots(
  A = p1,
  B = p2,
  design = design
) + 
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(face = "bold",
                                size = 20))


ggsave(plot, filename = "../../figures/experiment1/exp1_plot.pdf", height = 5.5, width = 13)


plot

4 EXPERIMENT 2: ABSENCE CASES

4.1 DATA

4.1.1 Read in data

df.exp2_children = read_csv("../../data/exp2_child_clean.csv") 

df.exp2_children = df.exp2_children %>%
  select(scenario_order, gender_order, question, participant_age, age_float, direct, absent, scenario, response_id, gender, lan_spoken)

duplicates_check <- df.exp2_children %>%
  group_by(response_id) %>%
  summarise(row_count = n()) %>%
  filter(row_count != 6)

print(duplicates_check)
## # A tibble: 0 × 2
## # ℹ 2 variables: response_id <dbl>, row_count <int>

4.1.2 Demographics

# gender
df.exp2_children %>%
  group_by(gender) %>%
  summarise(n = n_distinct(response_id)) %>%
  print()
## # A tibble: 3 × 2
##   gender     n
##   <chr>  <int>
## 1 f        205
## 2 m        178
## 3 o          1
# language spoken
df.exp2_children %>%
    mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
    group_by(en_occurrence) %>%
    summarise(n = n_distinct(response_id)) %>%
    print()
## # A tibble: 2 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en              379
## 2 <NA>              5

4.2 STATS

4.2.1 Bayesian Model

df.exp2_children_different = df.exp2_children %>%
  mutate(response_pattern = case_when(
    absent == 1 & direct == 1 ~ "both",
    absent == 0 & direct == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = absent - direct,
        difference = recode(difference, `-1` = 0))

df.exp2_children_both = df.exp2_children %>%
  mutate(response_pattern = case_when(
    absent == 1 & direct == 1 ~ "both",
    absent == 0 & direct == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both") 


 df.exp2_children_both = df.exp2_children_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.exp2_children_both %>% 
              mutate(difference = 0)) 

df.exp2_children_new = df.exp2_children_different %>% 
  bind_rows(df.exp2_children_both) 
  
df.model = df.exp2_children_new %>% 
  mutate(
    # Convert to factor explicitly
    question = as_factor(question),
    question = fct_relevel(question, 
                           "fault",   
                           "cause",
                           "lexical") 
  )

fit.brm.absence.children = brm(formula = difference ~ 1 + question * age_float + (1 | response_id) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .99999),
                     file = "cache/absence_cause_children_difference") 

fit.brm.absence.children 
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * age_float + (1 | response_id) + (1 | scenario) 
##    Data: df.model (Number of observations: 2245) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 380) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.34      0.12     1.12     1.58 1.00     4434     7760
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.97      1.11     0.07     4.04 1.00     3479     4475
## 
## Regression Coefficients:
##                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              -2.30      0.87    -4.03    -0.37 1.00     6480     4943
## question1              -1.36      0.31    -1.97    -0.77 1.00    12252     8517
## question2               0.24      0.29    -0.33     0.81 1.00    11577     9718
## age_float               0.29      0.05     0.18     0.39 1.00     9560     9571
## question1:age_float     0.47      0.05     0.37     0.58 1.00    11648     9020
## question2:age_float     0.12      0.05     0.03     0.21 1.00    11685     9170
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

4.2.2 Follow-up analysis

# effect of question 
fit.brm.absence.children %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")
## $emmeans
##  question response lower.HPD upper.HPD
##  fault      0.7431  0.492150     1.000
##  cause      0.6105  0.324045     0.970
##  lexical    0.0413  0.000509     0.139
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast        odds.ratio lower.HPD upper.HPD
##  fault / cause         1.84      1.38      2.37
##  fault / lexical      67.18     41.17    101.00
##  cause / lexical      36.46     22.76     53.50
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95
# effect of age
fit.brm.absence.children %>%
  avg_slopes(variable = "age_float",
             type = "link")
## 
##  Estimate 2.5 % 97.5 %
##      0.29 0.184  0.395
## 
## Term: age_float
## Type:  link 
## Comparison: mean(dY/dX)
## Columns: term, contrast, estimate, conf.low, conf.high, predicted_lo, predicted_hi, predicted, tmp_idx
# interaction effect 
fit.brm.absence.children %>% 
  emtrends(specs = pairwise ~ question,
           var = "age_float")
## $emtrends
##  question age_float.trend lower.HPD upper.HPD
##  fault              0.758     0.621     0.910
##  cause              0.405     0.284     0.525
##  lexical           -0.307    -0.485    -0.119
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast        estimate lower.HPD upper.HPD
##  fault - cause      0.353     0.210     0.504
##  fault - lexical    1.065     0.847     1.277
##  cause - lexical    0.712     0.515     0.913
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95

4.2.3 Means and Confidence Interval

df.exp2_children_new %>% 
  group_by(question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )
## # A tibble: 3 × 4
##   question   mean ci_lower ci_upper
##   <chr>     <dbl>    <dbl>    <dbl>
## 1 cause    0.576    0.541     0.611
## 2 fault    0.656    0.621     0.690
## 3 lexical  0.0828   0.0628    0.103

4.3 PLOTS

4.3.1 Overall Responses by Age

df.data = expand_grid(age_float = seq(3, 9, 0.1),
                      question = c("cause", "lexical", "fault"))

df.prediction = fit.brm.absence.children %>% 
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>% 
  as_tibble() %>% 
  bind_cols(df.data) %>% 
  clean_names()

df.linear = df.model %>% 
  mutate(participant_age = factor(participant_age, 
                            levels = c(3, 4, 5, 6, 7, 8, 9), 
                            labels = c("3", "4", "5", "6", "7", "8", "9")))

df.means = df.linear %>% 
  group_by(participant_age, question) %>% 
  summarize(
    response = Hmisc::smean.cl.boot(difference)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>% 
  mutate(participant_age = as.numeric(as.character(participant_age)))

p3 = ggplot(mapping = aes(x = age_float, 
                     y = difference,
                     color = question)) + 
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90,
                            color = question,
                            fill = question),
              stat = "identity",
              alpha = 0.2) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = participant_age,
                                y = response,
                                ymin = low,
                                ymax = high,
                                fill = question,
                                shape = question),
                  color = "black",
                  size = 0.5,
                  show.legend = FALSE,
                  alpha = 0.8,
                  position = position_dodge(width = 0.65)) +
  ylab(element_blank()) +
    scale_fill_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8", "fault" = "#4DAF4A"), 
                    breaks=c("cause", "lexical", "fault"),
                    labels = c("caused", "lexical", "fault")) +
  scale_color_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8", "fault" = "#4DAF4A"), 
                     breaks=c("cause", "lexical", "fault"),
                     labels = c("caused", "lexical", "fault")) +
  scale_shape_manual(values = c(23, 21, 24)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = c("100% \n direct", "75%", "50%", "75%", "100% \n absent"),
                     limits = c(0, 1)) + 
  scale_x_continuous(name = "Age (in years)",
                     breaks = 3:9,
                     labels = c("3", "4", "5", "6", "7", "8", "9")) +
  labs(y = "Probability of selecting \n direct or absent cause") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_text(size = 12.5))

4.3.2 Responses Patterns by Age

df.plot = df.exp2_children %>% 
  pivot_wider(names_from = question,
              values_from = c(direct, absent)) %>% 
  pivot_longer(cols = direct_cause:absent_fault,
               names_to = c("type", ".value"),
               names_sep = "_") %>% 
  mutate(response_pattern = case_when(
    cause == 0 & fault == 0 & lexical == 0 ~ "none",
    cause == 0 & fault == 0 & lexical == 1 ~ "lexical only",
    cause == 0 & fault == 1 & lexical == 0 ~ "fault only",
    cause == 1 & fault == 0 & lexical == 0 ~ "caused only",
    cause == 0 & fault == 1 & lexical == 1 ~ "lexical + fault",
    cause == 1 & fault == 0 & lexical == 1 ~ "caused + lexical",
    cause == 1 & fault == 1 & lexical == 0 ~ "caused + fault",
    cause == 1 & fault == 1 & lexical == 1 ~ "all")) %>% 
  group_by(participant_age, type, response_pattern) %>%
  summarize(n = n()) %>%
  ungroup() %>%
  group_by(participant_age, type) %>%
  mutate(percentage = n / sum(n) * 100) %>%
  ungroup() %>%
  mutate(type = factor(type, levels = c("direct", "absent")),
         response_pattern = factor(response_pattern, levels = c("caused only", "lexical only", "fault only", "caused + lexical", "caused + fault", "lexical + fault",  "all", "none")))


df.labels = df.plot %>%
  group_by(participant_age, type) %>%
  summarize(count = sum(n)/2) %>%
  ungroup() %>%
  mutate(age = as.character(factor(participant_age,
                                   levels = c(3, 4, 5, 6, 7, 8, 9),
                                   labels = c("3", "4", "5", "6", "7", "8", "9"))),
         count = ifelse(type == "direct" & participant_age == 3, str_c("n=", count), count),
         label = str_c("<span style='font-size:16px;'>", age, "</span>"),
         label = ifelse(type == "direct",
                        str_c("<span style='font-size:16px;'>", age, "</span>", "<br>",
                              "<span style='font-size:12px;'>(", count, ")", "</span>"), label),
         type = factor(type, levels = c("direct", "absent")))



df.plot = df.plot %>%
  left_join(df.labels %>%
              select(participant_age, type, label),
            by = c("participant_age", "type")) %>%
  mutate(label = factor(label, levels = df.labels$label))



p4 = ggplot(data = df.plot,
            mapping = aes(x = label,
                          y = percentage,
                          group = response_pattern,
                          color = response_pattern,
                          fill = response_pattern)) +
  geom_col(color = "black") +
  facet_wrap(~type,
             scales = "free_x") +
    scale_fill_manual(values = c("caused only" = "#dc3410",
                               "fault only" = "#4DAF4A",
                               "lexical only" = "#377EB8",
                               "caused + fault" = "#A0522D",
                               "lexical + fault" = "#40E0D0",
                               "caused + lexical" = "#800080",
                               "all" = "#FFFFFF",
                               "none" = "#000000")) +
  scale_color_manual(values = c("caused only" = "#dc3410",
                               "fault only" = "#4DAF4A",
                               "lexical only" = "#377EB8",
                               "caused + fault" = "#A0522D",
                               "lexical + fault" = "#40E0D0",
                               "caused + lexical" = "#800080",
                               "all" = "#FFFFFF")) +
  scale_shape_manual(values = c(21, 23)) +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     labels = str_c(seq(0, 100, 25), "%"),
                     expand = expansion(add = c(0, 0))) +
  scale_x_discrete(expand = expansion(add = c(0, 0))) +
  coord_cartesian(clip = "off") +
  labs(y = "Response pattern \n\ probability",
       x = "Age (in years)") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_markdown(),
        strip.background = element_blank(),
        strip.text = element_text(size = 16),
        panel.spacing = unit(.4, "cm")) +
 guides(fill = guide_legend(order = 1, nrow = 2, byrow = TRUE,
                             override.aes = list(order = c(1, 3, 2, 6, 4, 5, 7, 8))),
         color = guide_legend(order = 1, nrow = 2, byrow = TRUE,
                              override.aes = list(order = c(1, 3, 2, 6, 4, 5, 7, 8))))

4.3.3 Combine Plots

design = "
122
"    

plot = wrap_plots(
  A = p3,
  B = p4,
  design = design
) + 
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(face = "bold",
                                size = 20))


ggsave(plot, filename = "../../figures/experiment2/exp2_plot.pdf", height = 5.5, width = 13)


plot

5 APPENDIX

5.1 EXPERIMENT 1: CHAIN CASES

5.1.1 STATS

5.1.1.1 Bayesian Models

5.1.1.1.1 Distal
5.1.1.1.1.1 Question Only
# dummy coding
options(contrasts = c("contr.treatment", "contr.poly"))


df.model = df.exp1_children %>% 
  filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "lexical"))
# Check contrast values
 contrasts(df.model$question)
##         cause
## lexical     0
## cause       1
fit.brm.cause.children.distal = brm(formula = distal ~ 1 + question + (1 | response_id) + (1 | scenario),
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/cause_children_distal") 

fit.brm.cause.children.distal
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: distal ~ 1 + question + (1 | response_id) + (1 | scenario) 
##    Data: df.model (Number of observations: 1408) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 352) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.10      0.15     0.81     1.41 1.00     2675     5188
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.09      0.89     0.15     3.60 1.00     1662      622
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -2.63      0.73    -4.26    -0.98 1.00     1525      785
## questioncause     3.46      0.21     3.06     3.89 1.00     5545     7397
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
5.1.1.1.1.2 Question by Age
df.model = df.exp1_children %>% 
  filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "lexical"))
# Check contrast values
 contrasts(df.model$question)
##         cause
## lexical     0
## cause       1
 # removed randmon intercept for scenario to prevent divergent transitions
fit.brm.cause.children.distal.age = brm(formula = distal ~ 1 + question*age_float + (1 | response_id), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/cause_children_distal_age") 

fit.brm.cause.children.distal.age 
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: distal ~ 1 + question * age_float + (1 | response_id) 
##    Data: df.model (Number of observations: 1408) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 352) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.07      0.15     0.78     1.38 1.00     4094     6553
## 
## Regression Coefficients:
##                         Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                  -2.23      0.63    -3.48    -1.01 1.00     9936
## questioncause               1.69      0.70     0.32     3.08 1.00    10136
## age_float                  -0.07      0.09    -0.24     0.11 1.00     9605
## questioncause:age_float     0.25      0.10     0.06     0.45 1.00     9990
##                         Tail_ESS
## Intercept                   9183
## questioncause               9017
## age_float                   9554
## questioncause:age_float     8416
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
5.1.1.1.2 Proximal
5.1.1.1.2.1 Question Only
df.model = df.exp1_children %>% 
  filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "lexical"))
# Check contrast values
 contrasts(df.model$question)
##         cause
## lexical     0
## cause       1
fit.brm.cause.children.proximal = brm(formula = proximal ~ 1 + question + (1 | response_id) + + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      file = "cache/cause_children_proximal") 

fit.brm.cause.children.proximal
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: proximal ~ 1 + question + (1 | response_id) + +(1 | scenario) 
##    Data: df.model (Number of observations: 1408) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 352) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.15      0.15     0.87     1.45 1.00     3301     5318
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.10      0.86     0.17     3.35 1.00     3424     4098
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         2.30      0.76     0.54     3.87 1.00     3179     2033
## questioncause    -3.17      0.20    -3.56    -2.79 1.00     6843     7724
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
5.1.1.1.2.2 Question by Age
df.model = df.exp1_children %>% 
  filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "lexical"))
# Check contrast values
 contrasts(df.model$question)
##         cause
## lexical     0
## cause       1
fit.brm.cause.children.proximal.age = brm(formula = proximal ~ 1 + question*age_float + (1 | response_id) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      file = "cache/cause_children_proximal_age") 

fit.brm.cause.children.proximal.age
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: proximal ~ 1 + question * age_float + (1 | response_id) + (1 | scenario) 
##    Data: df.model (Number of observations: 1408) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 352) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.17      0.15     0.89     1.47 1.00     3289     5576
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.12      0.88     0.18     3.52 1.00     3341     1837
## 
## Regression Coefficients:
##                         Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                   1.12      0.98    -0.99     3.10 1.00     2661
## questioncause              -1.00      0.65    -2.29     0.27 1.00     5097
## age_float                   0.18      0.09     0.01     0.35 1.00     5019
## questioncause:age_float    -0.32      0.10    -0.52    -0.14 1.00     4949
##                         Tail_ESS
## Intercept                   1345
## questioncause               7207
## age_float                   7414
## questioncause:age_float     6618
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.1.2 PLOTS

5.1.2.1 Caused vs. Lexical

df.data = expand_grid(age_float = seq(4, 9, 0.1),
                      question = c("cause", "lexical"))

df.prediction = fit.brm.cause.children.distal.age %>% 
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>% 
  as_tibble() %>% 
  bind_cols(df.data) %>% 
  mutate(type = "distal") %>% 
  bind_rows(fit.brm.cause.children.proximal.age %>% 
              fitted(newdata = df.data,
                     re_formula = NA,
                     probs = c(0.1, 0.9)) %>% 
              as_tibble() %>% 
              bind_cols(df.data) %>% 
              mutate(type = "proximal")) %>% 
  clean_names()

df.children = df.exp1_children %>% 
  filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  pivot_longer(cols = distal:proximal,
               names_to = "type",
               values_to = "response")

df.means = df.children %>% 
  mutate(participant_age = factor(participant_age, 
                            levels = c(4, 5, 6, 7, 8, 9), 
                            labels = c("4", "5", "6", "7", "8", "9")),
         question = recode(question, "cause" = "cause")) %>% 
  group_by(participant_age, question, type) %>% 
  summarize(
    response = Hmisc::smean.cl.boot(response)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>% 
  mutate(participant_age = as.numeric(as.character(participant_age)))

df.means = df.means %>%
  mutate(question = factor(question, levels = c("cause", "lexical")))

custom_labels = as_labeller(c("distal" = "Distal Cause", "proximal" = "Proximal Cause"))

ggplot(mapping = aes(x = age_float, 
                     y = response,
                     color = question)) + 
  geom_hline(yintercept=0.50, linetype="dashed", color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90,
                            color = question,
                            fill = question),
              stat = "identity",
              alpha = 0.2,
              show.legend = F) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = participant_age,
                                y = response,
                                ymin = low,
                                ymax = high,
                                fill = question,
                                shape = question),
                  color = "black",
                  size = 0.5,
                  show.legend = TRUE,
                  alpha = 0.8,
                  position = position_dodge(width = 0.5)) +
    facet_wrap(~type,
             labeller = custom_labels) + 
  scale_fill_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8"), 
                    breaks=c("cause", "lexical")) +
  scale_color_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8"), 
                     breaks=c("cause", "lexical")) +
  scale_shape_manual(values = c(23, 21)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                      labels = str_c(seq(0, 1, 0.25) * 100, "%")) +
  scale_x_continuous(name = "Age",
                     breaks = 4:9,
                     labels = c("4", "5", "6", "7", "8", "9")) +
  labs(y = "Probability of selecting \n causal candidate") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_text(size = 12.5),
        strip.text = element_text(size = 16))

ggsave(filename = "../../figures/appendix/exp1_plot.pdf", height = 5, width = 8)

5.2 EXPERIMENT 2: ABSENCE CASES

5.2.1 STATS

5.2.1.1 Bayesian Models

5.2.1.1.1 Absent
5.2.1.1.1.1 Question by Age
df.model = df.exp2_children %>% 
 filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "lexical"))

# Check contrast values
 contrasts(df.model$question)
##         cause
## lexical     0
## cause       1
fit.brm.cause.children.absent.age = brm(formula = absent ~ 1 + question*age_float + (1 | response_id) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/cause_children_absent_age") 

fit.brm.cause.children.absent.age 
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: absent ~ 1 + question * age_float + (1 | response_id) + (1 | scenario) 
##    Data: df.model (Number of observations: 1280) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 320) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.03      0.23     1.59     2.46 1.03      125     1082
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.27      1.71     0.02     6.38 1.18       16       23
## 
## Regression Coefficients:
##                         Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                  -2.87      1.59    -6.54    -0.23 1.15       17
## questioncause               0.00      0.99    -2.02     1.85 1.02      216
## age_float                  -0.21      0.16    -0.52     0.06 1.08       30
## questioncause:age_float     0.74      0.16     0.44     1.07 1.01      607
##                         Tail_ESS
## Intercept                     26
## questioncause               3064
## age_float                    169
## questioncause:age_float     1954
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
5.2.1.1.2 Direct
5.2.1.1.2.1 Question by Age
df.model = df.exp2_children %>% 
 filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "lexical"))

# Check contrast values
 contrasts(df.model$question)
##         cause
## lexical     0
## cause       1
fit.brm.cause.children.direct.age = brm(formula = direct ~ 1 + question*age_float + (1 | response_id) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      file = "cache/cause_children_direct_age") 

fit.brm.cause.children.direct.age
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: direct ~ 1 + question * age_float + (1 | response_id) + (1 | scenario) 
##    Data: df.model (Number of observations: 1280) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 320) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.90      0.20     1.53     2.32 1.00     2490     4111
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.91      0.95     0.03     3.59 1.00      421      195
## 
## Regression Coefficients:
##                         Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                   1.56      1.06    -0.48     3.76 1.01      449
## questioncause              -0.26      0.88    -1.96     1.42 1.01     1004
## age_float                   0.27      0.13     0.02     0.53 1.00     4269
## questioncause:age_float    -0.58      0.14    -0.85    -0.33 1.00     1306
##                         Tail_ESS
## Intercept                    195
## questioncause                547
## age_float                   5202
## questioncause:age_float     6537
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.2.2 PLOTS

5.2.2.1 Caused vs. Lexical

df.data = expand_grid(age_float = seq(3, 9, 0.1),
                      question = c("cause", "lexical"))

df.prediction = fit.brm.cause.children.absent.age  %>% 
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>% 
  as_tibble() %>% 
  bind_cols(df.data) %>% 
  mutate(type = "absent") %>% 
  bind_rows(fit.brm.cause.children.direct.age  %>% 
              fitted(newdata = df.data,
                     re_formula = NA,
                     probs = c(0.1, 0.9)) %>% 
              as_tibble() %>% 
              bind_cols(df.data) %>% 
              mutate(type = "direct")) %>% 
  clean_names()

df.children = df.exp2_children %>% 
   filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  pivot_longer(cols = absent:direct,
               names_to = "type",
               values_to = "response")

df.means = df.children %>% 
  mutate(participant_age = factor(participant_age, 
                            levels = c(4, 5, 6, 7, 8, 9), 
                            labels = c("4", "5", "6", "7", "8", "9")),
         question = recode(question, "cause" = "cause")) %>% 
  group_by(participant_age, question, type) %>% 
  summarize(
    response = Hmisc::smean.cl.boot(response)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>% 
  mutate(participant_age = as.numeric(as.character(participant_age)))

df.means = df.means %>%
  mutate(question = factor(question, levels = c("cause", "lexical")))

custom_labels = as_labeller(c("absent" = "Absent Cause", "direct" = "Direct Cause"))

ggplot(mapping = aes(x = age_float, 
                     y = response,
                     color = question)) + 
  geom_hline(yintercept=0.50, linetype="dashed", color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90,
                            color = question,
                            fill = question),
              stat = "identity",
              alpha = 0.2,
              show.legend = F) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = participant_age,
                                y = response,
                                ymin = low,
                                ymax = high,
                                fill = question,
                                shape = question),
                  color = "black",
                  size = 0.5,
                  show.legend = TRUE,
                  alpha = 0.8,
                  position = position_dodge(width = 0.5)) +
    facet_wrap(~type,
             labeller = custom_labels) + 
  scale_fill_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8"), 
                    breaks=c("cause", "lexical")) +
  scale_color_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8"), 
                     breaks=c("cause", "lexical")) +
  scale_shape_manual(values = c(23, 21)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                      labels = str_c(seq(0, 1, 0.25) * 100, "%")) +
  scale_x_continuous(name = "Age",
                     breaks = 4:9,
                     labels = c("4", "5", "6", "7", "8", "9")) +
  labs(y = "Probability of selecting \n causal candidate") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_text(size = 12.5),
        strip.text = element_text(size = 16))

ggsave(filename = "../../figures/appendix/exp2_plot.pdf", height = 5, width = 8) 

6 SESSION INFO

sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] lubridate_1.9.3        forcats_1.0.0          stringr_1.5.1         
##  [4] dplyr_1.1.4            purrr_1.0.2            readr_2.1.5           
##  [7] tidyr_1.3.1            tibble_3.2.1           ggplot2_3.5.1         
## [10] tidyverse_2.0.0        RColorBrewer_1.1-3     ggtext_0.1.2          
## [13] marginaleffects_0.22.0 knitr_1.45             patchwork_1.2.0       
## [16] brms_2.21.0            Rcpp_1.0.12            emmeans_1.10.1        
## [19] janitor_2.2.0          xtable_1.8-4           tidytext_0.4.1        
## 
## loaded via a namespace (and not attached):
##  [1] gridExtra_2.3        inline_0.3.19        rlang_1.1.3         
##  [4] magrittr_2.0.3       snakecase_0.11.1     matrixStats_1.2.0   
##  [7] compiler_4.3.3       loo_2.7.0            systemfonts_1.0.6   
## [10] vctrs_0.6.5          reshape2_1.4.4       pkgconfig_2.0.3     
## [13] crayon_1.5.2         fastmap_1.1.1        backports_1.4.1     
## [16] utf8_1.2.4           rmarkdown_2.26       markdown_1.12       
## [19] tzdb_0.4.0           ragg_1.3.0           bit_4.0.5           
## [22] xfun_0.43            cachem_1.0.8         jsonlite_1.8.8      
## [25] tinytable_0.2.1      collapse_2.0.16      highr_0.10          
## [28] SnowballC_0.7.1      parallel_4.3.3       cluster_2.1.6       
## [31] R6_2.5.1             bslib_0.7.0          stringi_1.8.4       
## [34] StanHeaders_2.32.6   rpart_4.1.23         jquerylib_0.1.4     
## [37] estimability_1.5     bookdown_0.38        rstan_2.32.6        
## [40] base64enc_0.1-3      bayesplot_1.11.1     nnet_7.3-19         
## [43] Matrix_1.6-5         timechange_0.3.0     tidyselect_1.2.1    
## [46] rstudioapi_0.16.0    abind_1.4-5          yaml_2.3.8          
## [49] codetools_0.2-19     pkgbuild_1.4.4       lattice_0.22-5      
## [52] plyr_1.8.9           withr_3.0.0          bridgesampling_1.1-2
## [55] posterior_1.5.0      coda_0.19-4.1        evaluate_0.23       
## [58] foreign_0.8-86       RcppParallel_5.1.7   xml2_1.3.6          
## [61] pillar_1.9.0         janeaustenr_1.0.0    tensorA_0.36.2.1    
## [64] checkmate_2.3.1      stats4_4.3.3         insight_0.20.5      
## [67] distributional_0.4.0 generics_0.1.3       vroom_1.6.5         
## [70] hms_1.1.3            commonmark_1.9.1     rstantools_2.4.0    
## [73] munsell_0.5.1        scales_1.3.0         glue_1.7.0          
## [76] Hmisc_5.1-2          tools_4.3.3          diffobj_0.3.5       
## [79] data.table_1.15.4    tokenizers_0.3.0     mvtnorm_1.2-4       
## [82] QuickJSR_1.1.3       colorspace_2.1-0     nlme_3.1-164        
## [85] htmlTable_2.4.2      Formula_1.2-5        cli_3.6.2           
## [88] textshaping_0.3.7    fansi_1.0.6          Brobdingnag_1.2-9   
## [91] gtable_0.3.5         sass_0.4.9           digest_0.6.35       
## [94] farver_2.1.2         htmlwidgets_1.6.4    htmltools_0.5.8.1   
## [97] lifecycle_1.0.4      gridtext_0.1.5       bit64_4.0.5