1 Load packages

2 Settings

# set theme 
theme_set(theme_classic())

# sum coding
options(contrasts = c("contr.sum","contr.poly"))

# suppress grouping warning 
options(dplyr.summarise.inform = F)

3 EXPERIMENT 1: CHAIN CASES

3.1 DATA

3.1.1 Read in data

df.exp1_children = read_csv("../../data/exp1_child_clean.csv") 

df.exp1_children = df.exp1_children %>%
  select(scenario_order, gender_order, question, participant_age, age_float, distal, proximal, scenario, response_id, gender, lan_spoken)

3.1.2 Demographics

# gender
df.exp1_children %>%
  group_by(gender) %>%
  summarise(n = n_distinct(response_id)) %>%
  print()

## # A tibble: 2 × 2
##   gender     n
##   <chr>  <int>
## 1 f        232
## 2 m        181

# language spoken
df.exp1_children %>%
    mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
    group_by(en_occurrence) %>%
    summarise(n = n_distinct(response_id)) %>%
    print()

## # A tibble: 3 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en              396
## 2 non-en            1
## 3 <NA>             16

3.2 STATS

3.2.1 Bayesian Model

df.exp1_children_different = df.exp1_children %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = distal - proximal,
        difference = recode(difference, `-1` = 0))

df.exp1_children_both = df.exp1_children %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both") 


 df.exp1_children_both = df.exp1_children_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.exp1_children_both %>% 
              mutate(difference = 0)) 

df.exp1_children_new = df.exp1_children_different %>% 
  bind_rows(df.exp1_children_both) 
  
df.model = df.exp1_children_new %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "fault",   
                                "cause", 
                                "lexical"))

fit.brm.chain.children = brm(formula = difference ~ 1 + question * age_float + (1 | response_id) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/chain_cause_children_difference") 

fit.brm.chain.children

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * age_float + (1 | response_id) + (1 | scenario) 
##    Data: df.model (Number of observations: 2432) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 412) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.90      0.10     0.71     1.09 1.00     3658     5727
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.97      0.73     0.17     3.01 1.00     1465      638
## 
## Regression Coefficients:
##                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              -1.54      0.69    -2.91     0.00 1.00     1329      800
## question1              -0.93      0.26    -1.44    -0.42 1.00     4724     5887
## question2               0.39      0.24    -0.08     0.87 1.00     5227     6722
## age_float               0.21      0.04     0.14     0.29 1.00     4310     7293
## question1:age_float     0.37      0.04     0.29     0.46 1.00     5277     5741
## question2:age_float     0.05      0.04    -0.02     0.12 1.00     5102     5888
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

3.2.2 Follow-up analysis

# effect of question 
fit.brm.chain.children %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## NOTE: Results may be misleading due to involvement in interactions

## $emmeans
##  question response lower.HPD upper.HPD
##  fault      0.7812    0.5442     0.974
##  cause      0.6290    0.3447     0.920
##  lexical    0.0852    0.0118     0.239
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast        odds.ratio lower.HPD upper.HPD
##  fault / cause         2.11      1.63      2.68
##  fault / lexical      38.34     26.02     52.41
##  cause / lexical      18.19     13.12     24.48
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

# effect of age
fit.brm.chain.children %>%
  avg_slopes(variable = "age_float",
             type = "link")

## 
##  Estimate 2.5 % 97.5 %
##     0.218 0.144  0.293
## 
## Term: age_float
## Type:  link 
## Comparison: mean(dY/dX)
## Columns: term, contrast, estimate, conf.low, conf.high, predicted_lo, predicted_hi, predicted, tmp_idx

# interaction effect 
fit.brm.chain.children %>% 
  emtrends(specs = pairwise ~ question,
           var = "age_float")

## $emtrends
##  question age_float.trend lower.HPD upper.HPD
##  fault              0.589     0.477    0.7042
##  cause              0.266     0.173    0.3570
##  lexical           -0.210    -0.339   -0.0777
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast        estimate lower.HPD upper.HPD
##  fault - cause      0.323     0.195     0.453
##  fault - lexical    0.799     0.637     0.963
##  cause - lexical    0.477     0.329     0.629
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95

3.2.3 Means and Confidence Interval

df.exp1_children_new %>% 
  group_by(question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 3 × 4
##   question  mean ci_lower ci_upper
##   <chr>    <dbl>    <dbl>    <dbl>
## 1 cause    0.602   0.568     0.635
## 2 fault    0.708   0.677     0.739
## 3 lexical  0.116   0.0941    0.139

3.3 PLOTS

3.3.1 Overall Responses by Age

df.data = expand_grid(age_float = seq(3, 9, 0.1),
                      question = c("cause", "lexical", "fault"))

df.prediction = fit.brm.chain.children %>% 
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>% 
  as_tibble() %>% 
  bind_cols(df.data) %>% 
  clean_names()

df.linear = df.model %>% 
  mutate(participant_age = factor(participant_age, 
                            levels = c(3, 4, 5, 6, 7, 8, 9), 
                            labels = c("3", "4", "5", "6", "7", "8", "9")))

df.means = df.linear %>% 
  group_by(participant_age, question) %>% 
  summarize(
    response = Hmisc::smean.cl.boot(difference)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>% 
  mutate(participant_age = as.numeric(as.character(participant_age)))

p1 = ggplot(mapping = aes(x = age_float, 
                     y = difference,
                     color = question)) + 
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90,
                            color = question,
                            fill = question),
              stat = "identity",
              alpha = 0.2) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = participant_age,
                                y = response,
                                ymin = low,
                                ymax = high,
                                fill = question,
                                shape = question),
                  color = "black",
                  size = 0.5,
                  show.legend = FALSE,
                  alpha = 0.8,
                  position = position_dodge(width = 0.5)) +
  ylab(element_blank()) +
  scale_fill_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8", "fault" = "#4DAF4A"), 
                    breaks=c("cause", "lexical", "fault"),
                    labels = c("caused", "lexical", "fault")) +
  scale_color_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8", "fault" = "#4DAF4A"), 
                     breaks=c("cause", "lexical", "fault"),
                     labels = c("caused", "lexical", "fault")) +
  scale_shape_manual(values = c(23, 21, 24)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = c("100% \n proximal", "75%", "50%", "75%", "100% \n distal"),
                     limits = c(0, 1)) + 
  scale_x_continuous(name = "Age (in years)",
                     breaks = 3:9,
                     labels = c("3", "4", "5", "6", "7", "8", "9")) +
  labs(y = "Probability of selecting \n proximal or distal cause") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_text(size = 12.5))

3.3.2 Responses Patterns by Age

df.plot = df.exp1_children %>% 
  pivot_wider(names_from = question,
              values_from = c(distal, proximal)) %>% 
  pivot_longer(cols = distal_cause:proximal_fault,
               names_to = c("type", ".value"),
               names_sep = "_") %>% 
  mutate(response_pattern = case_when(
    cause == 0 & fault == 0 & lexical == 0 ~ "none",
    cause == 0 & fault == 0 & lexical == 1 ~ "lexical only",
    cause == 0 & fault == 1 & lexical == 0 ~ "fault only",
    cause == 1 & fault == 0 & lexical == 0 ~ "caused only",
    cause == 0 & fault == 1 & lexical == 1 ~ "lexical + fault",
    cause == 1 & fault == 0 & lexical == 1 ~ "caused + lexical",
    cause == 1 & fault == 1 & lexical == 0 ~ "caused + fault",
    cause == 1 & fault == 1 & lexical == 1 ~ "all"))  %>% 
  group_by(participant_age, type, response_pattern) %>%
  summarize(n = n()) %>%
  ungroup() %>%
  group_by(participant_age, type) %>%
  mutate(percentage = n / sum(n) * 100) %>%
  ungroup() %>%
  mutate(type = factor(type, levels = c("proximal", "distal")),
         response_pattern = factor(response_pattern, levels = c("caused only", "lexical only", "fault only", "caused + lexical", "caused + fault", "lexical + fault",  "all", "none")))


df.labels = df.plot %>%
  group_by(participant_age, type) %>%
  summarize(count = sum(n)/2) %>%
  ungroup() %>%
  mutate(age = as.character(factor(participant_age,
                                   levels = c(3, 4, 5, 6, 7, 8, 9),
                                   labels = c("3", "4", "5", "6", "7", "8", "9"))),
         count = ifelse(type == "proximal" & participant_age == 3, str_c("n=", count), count),
         label = str_c("<span style='font-size:16px;'>", age, "</span>"),
         label = ifelse(type == "proximal",
                        str_c("<span style='font-size:16px;'>", age, "</span>", "<br>",
                              "<span style='font-size:12px;'>(", count, ")", "</span>"), label),
         type = factor(type, levels = c("proximal", "distal")))



df.plot = df.plot %>%
  left_join(df.labels %>%
              select(participant_age, type, label),
            by = c("participant_age", "type")) %>%
  mutate(label = factor(label, levels = df.labels$label))



p2 = ggplot(data = df.plot,
            mapping = aes(x = label,
                          y = percentage,
                          group = response_pattern,
                          color = response_pattern,
                          fill = response_pattern)) +
  geom_col(color = "black") +
  facet_wrap(~type,
             scales = "free_x") +
    scale_fill_manual(values = c("caused only" = "#dc3410",
                               "fault only" = "#4DAF4A",
                               "lexical only" = "#377EB8",
                               "caused + fault" = "#A0522D",
                               "lexical + fault" = "#40E0D0",
                               "caused + lexical" = "#800080",
                               "all" = "#FFFFFF",
                               "none" = "#000000")) +
  scale_color_manual(values = c("caused only" = "#dc3410",
                               "fault only" = "#4DAF4A",
                               "lexical only" = "#377EB8",
                               "caused + fault" = "#A0522D",
                               "lexical + fault" = "#40E0D0",
                               "caused + lexical" = "#800080",
                               "all" = "#FFFFFF")) +
  scale_shape_manual(values = c(21, 23)) +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     labels = str_c(seq(0, 100, 25), "%"),
                     expand = expansion(add = c(0, 0))) +
  scale_x_discrete(expand = expansion(add = c(0, 0))) +
  coord_cartesian(clip = "off") +
  labs(y = "Response pattern \n\ probability",
       x = "Age (in years)") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_markdown(),
        strip.background = element_blank(),
        strip.text = element_text(size = 16),
        panel.spacing = unit(.4, "cm")) +
 guides(fill = guide_legend(order = 1, nrow = 2, byrow = TRUE,
                             override.aes = list(order = c(1, 3, 2, 6, 4, 5, 7, 8))),
         color = guide_legend(order = 1, nrow = 2, byrow = TRUE,
                              override.aes = list(order = c(1, 3, 2, 6, 4, 5, 7, 8))))

3.3.3 Combine Plots

design = "
122
"    

plot = wrap_plots(
  A = p1,
  B = p2,
  design = design
) + 
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(face = "bold",
                                size = 20))


ggsave(plot, filename = "../../figures/experiment1/exp1_plot.pdf", height = 5.5, width = 13)


plot

4 EXPERIMENT 2: ABSENCE CASES

4.1 DATA

4.1.1 Read in data

df.exp2_children = read_csv("../../data/exp2_child_clean.csv") 

df.exp2_children = df.exp2_children %>%
  select(scenario_order, gender_order, question, participant_age, age_float, direct, absent, scenario, response_id, gender, lan_spoken)

duplicates_check <- df.exp2_children %>%
  group_by(response_id) %>%
  summarise(row_count = n()) %>%
  filter(row_count != 6)

print(duplicates_check)

## # A tibble: 0 × 2
## # ℹ 2 variables: response_id <dbl>, row_count <int>

4.1.2 Demographics

# gender
df.exp2_children %>%
  group_by(gender) %>%
  summarise(n = n_distinct(response_id)) %>%
  print()

## # A tibble: 3 × 2
##   gender     n
##   <chr>  <int>
## 1 f        205
## 2 m        178
## 3 o          1

# language spoken
df.exp2_children %>%
    mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
    group_by(en_occurrence) %>%
    summarise(n = n_distinct(response_id)) %>%
    print()

## # A tibble: 2 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en              379
## 2 <NA>              5

4.2 STATS

4.2.1 Bayesian Model

df.exp2_children_different = df.exp2_children %>%
  mutate(response_pattern = case_when(
    absent == 1 & direct == 1 ~ "both",
    absent == 0 & direct == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = absent - direct,
        difference = recode(difference, `-1` = 0))

df.exp2_children_both = df.exp2_children %>%
  mutate(response_pattern = case_when(
    absent == 1 & direct == 1 ~ "both",
    absent == 0 & direct == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both") 


 df.exp2_children_both = df.exp2_children_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.exp2_children_both %>% 
              mutate(difference = 0)) 

df.exp2_children_new = df.exp2_children_different %>% 
  bind_rows(df.exp2_children_both) 
  
df.model = df.exp2_children_new %>% 
  mutate(
    # Convert to factor explicitly
    question = as_factor(question),
    question = fct_relevel(question, 
                           "fault",   
                           "cause",
                           "lexical") 
  )

fit.brm.absence.children = brm(formula = difference ~ 1 + question * age_float + (1 | response_id) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .99999),
                     file = "cache/absence_cause_children_difference") 

fit.brm.absence.children

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * age_float + (1 | response_id) + (1 | scenario) 
##    Data: df.model (Number of observations: 2245) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 380) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.34      0.12     1.12     1.58 1.00     4434     7760
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.97      1.11     0.07     4.04 1.00     3479     4475
## 
## Regression Coefficients:
##                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              -2.30      0.87    -4.03    -0.37 1.00     6480     4943
## question1              -1.36      0.31    -1.97    -0.77 1.00    12252     8517
## question2               0.24      0.29    -0.33     0.81 1.00    11577     9718
## age_float               0.29      0.05     0.18     0.39 1.00     9560     9571
## question1:age_float     0.47      0.05     0.37     0.58 1.00    11648     9020
## question2:age_float     0.12      0.05     0.03     0.21 1.00    11685     9170
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

4.2.2 Follow-up analysis

# effect of question 
fit.brm.absence.children %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## $emmeans
##  question response lower.HPD upper.HPD
##  fault      0.7431  0.492150     1.000
##  cause      0.6105  0.324045     0.970
##  lexical    0.0413  0.000509     0.139
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast        odds.ratio lower.HPD upper.HPD
##  fault / cause         1.84      1.38      2.37
##  fault / lexical      67.18     41.17    101.00
##  cause / lexical      36.46     22.76     53.50
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

# effect of age
fit.brm.absence.children %>%
  avg_slopes(variable = "age_float",
             type = "link")

## 
##  Estimate 2.5 % 97.5 %
##      0.29 0.184  0.395
## 
## Term: age_float
## Type:  link 
## Comparison: mean(dY/dX)
## Columns: term, contrast, estimate, conf.low, conf.high, predicted_lo, predicted_hi, predicted, tmp_idx

# interaction effect 
fit.brm.absence.children %>% 
  emtrends(specs = pairwise ~ question,
           var = "age_float")

## $emtrends
##  question age_float.trend lower.HPD upper.HPD
##  fault              0.758     0.621     0.910
##  cause              0.405     0.284     0.525
##  lexical           -0.307    -0.485    -0.119
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast        estimate lower.HPD upper.HPD
##  fault - cause      0.353     0.210     0.504
##  fault - lexical    1.065     0.847     1.277
##  cause - lexical    0.712     0.515     0.913
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95

4.2.3 Means and Confidence Interval

df.exp2_children_new %>% 
  group_by(question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 3 × 4
##   question   mean ci_lower ci_upper
##   <chr>     <dbl>    <dbl>    <dbl>
## 1 cause    0.576    0.541     0.611
## 2 fault    0.656    0.621     0.690
## 3 lexical  0.0828   0.0628    0.103

4.3 PLOTS

4.3.1 Overall Responses by Age

df.data = expand_grid(age_float = seq(3, 9, 0.1),
                      question = c("cause", "lexical", "fault"))

df.prediction = fit.brm.absence.children %>% 
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>% 
  as_tibble() %>% 
  bind_cols(df.data) %>% 
  clean_names()

df.linear = df.model %>% 
  mutate(participant_age = factor(participant_age, 
                            levels = c(3, 4, 5, 6, 7, 8, 9), 
                            labels = c("3", "4", "5", "6", "7", "8", "9")))

df.means = df.linear %>% 
  group_by(participant_age, question) %>% 
  summarize(
    response = Hmisc::smean.cl.boot(difference)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>% 
  mutate(participant_age = as.numeric(as.character(participant_age)))

p3 = ggplot(mapping = aes(x = age_float, 
                     y = difference,
                     color = question)) + 
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90,
                            color = question,
                            fill = question),
              stat = "identity",
              alpha = 0.2) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = participant_age,
                                y = response,
                                ymin = low,
                                ymax = high,
                                fill = question,
                                shape = question),
                  color = "black",
                  size = 0.5,
                  show.legend = FALSE,
                  alpha = 0.8,
                  position = position_dodge(width = 0.65)) +
  ylab(element_blank()) +
    scale_fill_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8", "fault" = "#4DAF4A"), 
                    breaks=c("cause", "lexical", "fault"),
                    labels = c("caused", "lexical", "fault")) +
  scale_color_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8", "fault" = "#4DAF4A"), 
                     breaks=c("cause", "lexical", "fault"),
                     labels = c("caused", "lexical", "fault")) +
  scale_shape_manual(values = c(23, 21, 24)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = c("100% \n direct", "75%", "50%", "75%", "100% \n absent"),
                     limits = c(0, 1)) + 
  scale_x_continuous(name = "Age (in years)",
                     breaks = 3:9,
                     labels = c("3", "4", "5", "6", "7", "8", "9")) +
  labs(y = "Probability of selecting \n direct or absent cause") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_text(size = 12.5))

4.3.2 Responses Patterns by Age

df.plot = df.exp2_children %>% 
  pivot_wider(names_from = question,
              values_from = c(direct, absent)) %>% 
  pivot_longer(cols = direct_cause:absent_fault,
               names_to = c("type", ".value"),
               names_sep = "_") %>% 
  mutate(response_pattern = case_when(
    cause == 0 & fault == 0 & lexical == 0 ~ "none",
    cause == 0 & fault == 0 & lexical == 1 ~ "lexical only",
    cause == 0 & fault == 1 & lexical == 0 ~ "fault only",
    cause == 1 & fault == 0 & lexical == 0 ~ "caused only",
    cause == 0 & fault == 1 & lexical == 1 ~ "lexical + fault",
    cause == 1 & fault == 0 & lexical == 1 ~ "caused + lexical",
    cause == 1 & fault == 1 & lexical == 0 ~ "caused + fault",
    cause == 1 & fault == 1 & lexical == 1 ~ "all")) %>% 
  group_by(participant_age, type, response_pattern) %>%
  summarize(n = n()) %>%
  ungroup() %>%
  group_by(participant_age, type) %>%
  mutate(percentage = n / sum(n) * 100) %>%
  ungroup() %>%
  mutate(type = factor(type, levels = c("direct", "absent")),
         response_pattern = factor(response_pattern, levels = c("caused only", "lexical only", "fault only", "caused + lexical", "caused + fault", "lexical + fault",  "all", "none")))


df.labels = df.plot %>%
  group_by(participant_age, type) %>%
  summarize(count = sum(n)/2) %>%
  ungroup() %>%
  mutate(age = as.character(factor(participant_age,
                                   levels = c(3, 4, 5, 6, 7, 8, 9),
                                   labels = c("3", "4", "5", "6", "7", "8", "9"))),
         count = ifelse(type == "direct" & participant_age == 3, str_c("n=", count), count),
         label = str_c("<span style='font-size:16px;'>", age, "</span>"),
         label = ifelse(type == "direct",
                        str_c("<span style='font-size:16px;'>", age, "</span>", "<br>",
                              "<span style='font-size:12px;'>(", count, ")", "</span>"), label),
         type = factor(type, levels = c("direct", "absent")))



df.plot = df.plot %>%
  left_join(df.labels %>%
              select(participant_age, type, label),
            by = c("participant_age", "type")) %>%
  mutate(label = factor(label, levels = df.labels$label))



p4 = ggplot(data = df.plot,
            mapping = aes(x = label,
                          y = percentage,
                          group = response_pattern,
                          color = response_pattern,
                          fill = response_pattern)) +
  geom_col(color = "black") +
  facet_wrap(~type,
             scales = "free_x") +
    scale_fill_manual(values = c("caused only" = "#dc3410",
                               "fault only" = "#4DAF4A",
                               "lexical only" = "#377EB8",
                               "caused + fault" = "#A0522D",
                               "lexical + fault" = "#40E0D0",
                               "caused + lexical" = "#800080",
                               "all" = "#FFFFFF",
                               "none" = "#000000")) +
  scale_color_manual(values = c("caused only" = "#dc3410",
                               "fault only" = "#4DAF4A",
                               "lexical only" = "#377EB8",
                               "caused + fault" = "#A0522D",
                               "lexical + fault" = "#40E0D0",
                               "caused + lexical" = "#800080",
                               "all" = "#FFFFFF")) +
  scale_shape_manual(values = c(21, 23)) +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     labels = str_c(seq(0, 100, 25), "%"),
                     expand = expansion(add = c(0, 0))) +
  scale_x_discrete(expand = expansion(add = c(0, 0))) +
  coord_cartesian(clip = "off") +
  labs(y = "Response pattern \n\ probability",
       x = "Age (in years)") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_markdown(),
        strip.background = element_blank(),
        strip.text = element_text(size = 16),
        panel.spacing = unit(.4, "cm")) +
 guides(fill = guide_legend(order = 1, nrow = 2, byrow = TRUE,
                             override.aes = list(order = c(1, 3, 2, 6, 4, 5, 7, 8))),
         color = guide_legend(order = 1, nrow = 2, byrow = TRUE,
                              override.aes = list(order = c(1, 3, 2, 6, 4, 5, 7, 8))))

4.3.3 Combine Plots

design = "
122
"    

plot = wrap_plots(
  A = p3,
  B = p4,
  design = design
) + 
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(face = "bold",
                                size = 20))


ggsave(plot, filename = "../../figures/experiment2/exp2_plot.pdf", height = 5.5, width = 13)


plot

5 APPENDIX

5.1 EXPERIMENT 1: CHAIN CASES

5.1.1 STATS

5.1.1.1 Bayesian Models

5.1.1.1.1 Distal

5.1.1.1.1.1 Question Only

# dummy coding
options(contrasts = c("contr.treatment", "contr.poly"))


df.model = df.exp1_children %>% 
  filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "lexical"))
# Check contrast values
 contrasts(df.model$question)

##         cause
## lexical     0
## cause       1

fit.brm.cause.children.distal = brm(formula = distal ~ 1 + question + (1 | response_id) + (1 | scenario),
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/cause_children_distal") 

fit.brm.cause.children.distal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: distal ~ 1 + question + (1 | response_id) + (1 | scenario) 
##    Data: df.model (Number of observations: 1408) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 352) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.10      0.15     0.81     1.41 1.00     2675     5188
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.09      0.89     0.15     3.60 1.00     1662      622
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -2.63      0.73    -4.26    -0.98 1.00     1525      785
## questioncause     3.46      0.21     3.06     3.89 1.00     5545     7397
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.1.1.1.1.2 Question by Age

df.model = df.exp1_children %>% 
  filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "lexical"))
# Check contrast values
 contrasts(df.model$question)

##         cause
## lexical     0
## cause       1

 # removed randmon intercept for scenario to prevent divergent transitions
fit.brm.cause.children.distal.age = brm(formula = distal ~ 1 + question*age_float + (1 | response_id), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/cause_children_distal_age") 

fit.brm.cause.children.distal.age

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: distal ~ 1 + question * age_float + (1 | response_id) 
##    Data: df.model (Number of observations: 1408) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 352) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.07      0.15     0.78     1.38 1.00     4094     6553
## 
## Regression Coefficients:
##                         Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                  -2.23      0.63    -3.48    -1.01 1.00     9936
## questioncause               1.69      0.70     0.32     3.08 1.00    10136
## age_float                  -0.07      0.09    -0.24     0.11 1.00     9605
## questioncause:age_float     0.25      0.10     0.06     0.45 1.00     9990
##                         Tail_ESS
## Intercept                   9183
## questioncause               9017
## age_float                   9554
## questioncause:age_float     8416
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.1.1.1.2 Proximal

5.1.1.1.2.1 Question Only

df.model = df.exp1_children %>% 
  filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "lexical"))
# Check contrast values
 contrasts(df.model$question)

##         cause
## lexical     0
## cause       1

fit.brm.cause.children.proximal = brm(formula = proximal ~ 1 + question + (1 | response_id) + + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      file = "cache/cause_children_proximal") 

fit.brm.cause.children.proximal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: proximal ~ 1 + question + (1 | response_id) + +(1 | scenario) 
##    Data: df.model (Number of observations: 1408) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 352) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.15      0.15     0.87     1.45 1.00     3301     5318
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.10      0.86     0.17     3.35 1.00     3424     4098
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         2.30      0.76     0.54     3.87 1.00     3179     2033
## questioncause    -3.17      0.20    -3.56    -2.79 1.00     6843     7724
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.1.1.1.2.2 Question by Age

df.model = df.exp1_children %>% 
  filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "lexical"))
# Check contrast values
 contrasts(df.model$question)

##         cause
## lexical     0
## cause       1

fit.brm.cause.children.proximal.age = brm(formula = proximal ~ 1 + question*age_float + (1 | response_id) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      file = "cache/cause_children_proximal_age") 

fit.brm.cause.children.proximal.age

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: proximal ~ 1 + question * age_float + (1 | response_id) + (1 | scenario) 
##    Data: df.model (Number of observations: 1408) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 352) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.17      0.15     0.89     1.47 1.00     3289     5576
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.12      0.88     0.18     3.52 1.00     3341     1837
## 
## Regression Coefficients:
##                         Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                   1.12      0.98    -0.99     3.10 1.00     2661
## questioncause              -1.00      0.65    -2.29     0.27 1.00     5097
## age_float                   0.18      0.09     0.01     0.35 1.00     5019
## questioncause:age_float    -0.32      0.10    -0.52    -0.14 1.00     4949
##                         Tail_ESS
## Intercept                   1345
## questioncause               7207
## age_float                   7414
## questioncause:age_float     6618
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.1.2 PLOTS

5.1.2.1 Caused vs. Lexical

df.data = expand_grid(age_float = seq(4, 9, 0.1),
                      question = c("cause", "lexical"))

df.prediction = fit.brm.cause.children.distal.age %>% 
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>% 
  as_tibble() %>% 
  bind_cols(df.data) %>% 
  mutate(type = "distal") %>% 
  bind_rows(fit.brm.cause.children.proximal.age %>% 
              fitted(newdata = df.data,
                     re_formula = NA,
                     probs = c(0.1, 0.9)) %>% 
              as_tibble() %>% 
              bind_cols(df.data) %>% 
              mutate(type = "proximal")) %>% 
  clean_names()

df.children = df.exp1_children %>% 
  filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  pivot_longer(cols = distal:proximal,
               names_to = "type",
               values_to = "response")

df.means = df.children %>% 
  mutate(participant_age = factor(participant_age, 
                            levels = c(4, 5, 6, 7, 8, 9), 
                            labels = c("4", "5", "6", "7", "8", "9")),
         question = recode(question, "cause" = "cause")) %>% 
  group_by(participant_age, question, type) %>% 
  summarize(
    response = Hmisc::smean.cl.boot(response)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>% 
  mutate(participant_age = as.numeric(as.character(participant_age)))

df.means = df.means %>%
  mutate(question = factor(question, levels = c("cause", "lexical")))

custom_labels = as_labeller(c("distal" = "Distal Cause", "proximal" = "Proximal Cause"))

ggplot(mapping = aes(x = age_float, 
                     y = response,
                     color = question)) + 
  geom_hline(yintercept=0.50, linetype="dashed", color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90,
                            color = question,
                            fill = question),
              stat = "identity",
              alpha = 0.2,
              show.legend = F) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = participant_age,
                                y = response,
                                ymin = low,
                                ymax = high,
                                fill = question,
                                shape = question),
                  color = "black",
                  size = 0.5,
                  show.legend = TRUE,
                  alpha = 0.8,
                  position = position_dodge(width = 0.5)) +
    facet_wrap(~type,
             labeller = custom_labels) + 
  scale_fill_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8"), 
                    breaks=c("cause", "lexical")) +
  scale_color_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8"), 
                     breaks=c("cause", "lexical")) +
  scale_shape_manual(values = c(23, 21)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                      labels = str_c(seq(0, 1, 0.25) * 100, "%")) +
  scale_x_continuous(name = "Age",
                     breaks = 4:9,
                     labels = c("4", "5", "6", "7", "8", "9")) +
  labs(y = "Probability of selecting \n causal candidate") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_text(size = 12.5),
        strip.text = element_text(size = 16))

ggsave(filename = "../../figures/appendix/exp1_plot.pdf", height = 5, width = 8)

5.2 EXPERIMENT 2: ABSENCE CASES

5.2.1 STATS

5.2.1.1 Bayesian Models

5.2.1.1.1 Absent

5.2.1.1.1.1 Question by Age

df.model = df.exp2_children %>% 
 filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "lexical"))

# Check contrast values
 contrasts(df.model$question)

##         cause
## lexical     0
## cause       1

fit.brm.cause.children.absent.age = brm(formula = absent ~ 1 + question*age_float + (1 | response_id) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/cause_children_absent_age") 

fit.brm.cause.children.absent.age

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: absent ~ 1 + question * age_float + (1 | response_id) + (1 | scenario) 
##    Data: df.model (Number of observations: 1280) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 320) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.03      0.23     1.59     2.46 1.03      125     1082
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.27      1.71     0.02     6.38 1.18       16       23
## 
## Regression Coefficients:
##                         Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                  -2.87      1.59    -6.54    -0.23 1.15       17
## questioncause               0.00      0.99    -2.02     1.85 1.02      216
## age_float                  -0.21      0.16    -0.52     0.06 1.08       30
## questioncause:age_float     0.74      0.16     0.44     1.07 1.01      607
##                         Tail_ESS
## Intercept                     26
## questioncause               3064
## age_float                    169
## questioncause:age_float     1954
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.2.1.1.2 Direct

5.2.1.1.2.1 Question by Age

df.model = df.exp2_children %>% 
 filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  mutate(question = as_factor(question),
         question = fct_relevel(question, 
                                "lexical"))

# Check contrast values
 contrasts(df.model$question)

##         cause
## lexical     0
## cause       1

fit.brm.cause.children.direct.age = brm(formula = direct ~ 1 + question*age_float + (1 | response_id) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      file = "cache/cause_children_direct_age") 

fit.brm.cause.children.direct.age

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: direct ~ 1 + question * age_float + (1 | response_id) + (1 | scenario) 
##    Data: df.model (Number of observations: 1280) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~response_id (Number of levels: 320) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.90      0.20     1.53     2.32 1.00     2490     4111
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.91      0.95     0.03     3.59 1.00      421      195
## 
## Regression Coefficients:
##                         Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                   1.56      1.06    -0.48     3.76 1.01      449
## questioncause              -0.26      0.88    -1.96     1.42 1.01     1004
## age_float                   0.27      0.13     0.02     0.53 1.00     4269
## questioncause:age_float    -0.58      0.14    -0.85    -0.33 1.00     1306
##                         Tail_ESS
## Intercept                    195
## questioncause                547
## age_float                   5202
## questioncause:age_float     6537
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.2.2 PLOTS

5.2.2.1 Caused vs. Lexical

df.data = expand_grid(age_float = seq(3, 9, 0.1),
                      question = c("cause", "lexical"))

df.prediction = fit.brm.cause.children.absent.age  %>% 
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>% 
  as_tibble() %>% 
  bind_cols(df.data) %>% 
  mutate(type = "absent") %>% 
  bind_rows(fit.brm.cause.children.direct.age  %>% 
              fitted(newdata = df.data,
                     re_formula = NA,
                     probs = c(0.1, 0.9)) %>% 
              as_tibble() %>% 
              bind_cols(df.data) %>% 
              mutate(type = "direct")) %>% 
  clean_names()

df.children = df.exp2_children %>% 
   filter(age_float >= 4, age_float <= 10,
         question != "fault") %>%
  pivot_longer(cols = absent:direct,
               names_to = "type",
               values_to = "response")

df.means = df.children %>% 
  mutate(participant_age = factor(participant_age, 
                            levels = c(4, 5, 6, 7, 8, 9), 
                            labels = c("4", "5", "6", "7", "8", "9")),
         question = recode(question, "cause" = "cause")) %>% 
  group_by(participant_age, question, type) %>% 
  summarize(
    response = Hmisc::smean.cl.boot(response)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>% 
  mutate(participant_age = as.numeric(as.character(participant_age)))

df.means = df.means %>%
  mutate(question = factor(question, levels = c("cause", "lexical")))

custom_labels = as_labeller(c("absent" = "Absent Cause", "direct" = "Direct Cause"))

ggplot(mapping = aes(x = age_float, 
                     y = response,
                     color = question)) + 
  geom_hline(yintercept=0.50, linetype="dashed", color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90,
                            color = question,
                            fill = question),
              stat = "identity",
              alpha = 0.2,
              show.legend = F) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = participant_age,
                                y = response,
                                ymin = low,
                                ymax = high,
                                fill = question,
                                shape = question),
                  color = "black",
                  size = 0.5,
                  show.legend = TRUE,
                  alpha = 0.8,
                  position = position_dodge(width = 0.5)) +
    facet_wrap(~type,
             labeller = custom_labels) + 
  scale_fill_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8"), 
                    breaks=c("cause", "lexical")) +
  scale_color_manual(values = c("cause" = "#dc3410", "lexical" = "#377EB8"), 
                     breaks=c("cause", "lexical")) +
  scale_shape_manual(values = c(23, 21)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                      labels = str_c(seq(0, 1, 0.25) * 100, "%")) +
  scale_x_continuous(name = "Age",
                     breaks = 4:9,
                     labels = c("4", "5", "6", "7", "8", "9")) +
  labs(y = "Probability of selecting \n causal candidate") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_text(size = 12.5),
        strip.text = element_text(size = 16))

ggsave(filename = "../../figures/appendix/exp2_plot.pdf", height = 5, width = 8)

6 SESSION INFO

sessionInfo()

## R version 4.3.3 (2024-02-29)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] lubridate_1.9.3        forcats_1.0.0          stringr_1.5.1         
##  [4] dplyr_1.1.4            purrr_1.0.2            readr_2.1.5           
##  [7] tidyr_1.3.1            tibble_3.2.1           ggplot2_3.5.1         
## [10] tidyverse_2.0.0        RColorBrewer_1.1-3     ggtext_0.1.2          
## [13] marginaleffects_0.22.0 knitr_1.45             patchwork_1.2.0       
## [16] brms_2.21.0            Rcpp_1.0.12            emmeans_1.10.1        
## [19] janitor_2.2.0          xtable_1.8-4           tidytext_0.4.1        
## 
## loaded via a namespace (and not attached):
##  [1] gridExtra_2.3        inline_0.3.19        rlang_1.1.3         
##  [4] magrittr_2.0.3       snakecase_0.11.1     matrixStats_1.2.0   
##  [7] compiler_4.3.3       loo_2.7.0            systemfonts_1.0.6   
## [10] vctrs_0.6.5          reshape2_1.4.4       pkgconfig_2.0.3     
## [13] crayon_1.5.2         fastmap_1.1.1        backports_1.4.1     
## [16] utf8_1.2.4           rmarkdown_2.26       markdown_1.12       
## [19] tzdb_0.4.0           ragg_1.3.0           bit_4.0.5           
## [22] xfun_0.43            cachem_1.0.8         jsonlite_1.8.8      
## [25] tinytable_0.2.1      collapse_2.0.16      highr_0.10          
## [28] SnowballC_0.7.1      parallel_4.3.3       cluster_2.1.6       
## [31] R6_2.5.1             bslib_0.7.0          stringi_1.8.4       
## [34] StanHeaders_2.32.6   rpart_4.1.23         jquerylib_0.1.4     
## [37] estimability_1.5     bookdown_0.38        rstan_2.32.6        
## [40] base64enc_0.1-3      bayesplot_1.11.1     nnet_7.3-19         
## [43] Matrix_1.6-5         timechange_0.3.0     tidyselect_1.2.1    
## [46] rstudioapi_0.16.0    abind_1.4-5          yaml_2.3.8          
## [49] codetools_0.2-19     pkgbuild_1.4.4       lattice_0.22-5      
## [52] plyr_1.8.9           withr_3.0.0          bridgesampling_1.1-2
## [55] posterior_1.5.0      coda_0.19-4.1        evaluate_0.23       
## [58] foreign_0.8-86       RcppParallel_5.1.7   xml2_1.3.6          
## [61] pillar_1.9.0         janeaustenr_1.0.0    tensorA_0.36.2.1    
## [64] checkmate_2.3.1      stats4_4.3.3         insight_0.20.5      
## [67] distributional_0.4.0 generics_0.1.3       vroom_1.6.5         
## [70] hms_1.1.3            commonmark_1.9.1     rstantools_2.4.0    
## [73] munsell_0.5.1        scales_1.3.0         glue_1.7.0          
## [76] Hmisc_5.1-2          tools_4.3.3          diffobj_0.3.5       
## [79] data.table_1.15.4    tokenizers_0.3.0     mvtnorm_1.2-4       
## [82] QuickJSR_1.1.3       colorspace_2.1-0     nlme_3.1-164        
## [85] htmlTable_2.4.2      Formula_1.2-5        cli_3.6.2           
## [88] textshaping_0.3.7    fansi_1.0.6          Brobdingnag_1.2-9   
## [91] gtable_0.3.5         sass_0.4.9           digest_0.6.35       
## [94] farver_2.1.2         htmlwidgets_1.6.4    htmltools_0.5.8.1   
## [97] lifecycle_1.0.4      gridtext_0.1.5       bit64_4.0.5

Cause and fault in development

Anonymous

2025-02-02 09:41:58.234309

1 Load packages

2 Settings

3 EXPERIMENT 1: CHAIN CASES

3.1 DATA

3.1.1 Read in data

3.1.2 Demographics

3.2 STATS

3.2.1 Bayesian Model

3.2.2 Follow-up analysis

3.2.3 Means and Confidence Interval

3.3 PLOTS

3.3.1 Overall Responses by Age

3.3.2 Responses Patterns by Age

3.3.3 Combine Plots

4 EXPERIMENT 2: ABSENCE CASES

4.1 DATA

4.1.1 Read in data

4.1.2 Demographics

4.2 STATS

4.2.1 Bayesian Model

4.2.2 Follow-up analysis

4.2.3 Means and Confidence Interval

4.3 PLOTS

4.3.1 Overall Responses by Age

4.3.2 Responses Patterns by Age

4.3.3 Combine Plots

5 APPENDIX

5.1 EXPERIMENT 1: CHAIN CASES

5.1.1 STATS

5.1.1.1 Bayesian Models

5.1.1.1.1 Distal

5.1.1.1.1.1 Question Only

5.1.1.1.1.2 Question by Age

5.1.1.1.2 Proximal

5.1.1.1.2.1 Question Only

5.1.1.1.2.2 Question by Age

5.1.2 PLOTS

5.1.2.1 Caused vs. Lexical

5.2 EXPERIMENT 2: ABSENCE CASES

5.2.1 STATS

5.2.1.1 Bayesian Models

5.2.1.1.1 Absent

5.2.1.1.1.1 Question by Age

5.2.1.1.2 Direct

5.2.1.1.2.1 Question by Age

5.2.2 PLOTS

5.2.2.1 Caused vs. Lexical

6 SESSION INFO