1 Load packages

2 Settings

# set theme 
theme_set(theme_classic())

# sum coding
options(contrasts = c("contr.sum","contr.poly"))

# suppress grouping warning 
options(dplyr.summarise.inform = F)

3 Helper functions

# for color gradients
make_gradient = function(deg = 45, n = 100, cols = blues9) {
  cols = colorRampPalette(cols)(n + 1)
  rad = deg / (180 / pi)
  mat = matrix(
    data = rep(seq(0, 1, length.out = n) * cos(rad), n),
    byrow = TRUE,
    ncol = n
  ) +
  matrix(
    data = rep(seq(0, 1, length.out = n) * sin(rad), n),
    byrow = FALSE,
    ncol = n
  )
  mat = mat - min(mat)
  mat = mat / max(mat)
  mat = 1 + mat * n
  mat = matrix(data = cols[round(mat)], ncol = n)
  rasterGrob(
    image = mat,
    width = unit(1, "npc"),
    height = unit(1, "npc"),
    interpolate = TRUE
  )
}

# softmax function
softmax = function(x, beta = 1) {
  return(exp(x * beta) / sum(exp(x * beta)))
}

4 EXPERIMENT 1: CHAINS

4.1 CAUSED VS. LEXICAL (BREAK & CRACK)

4.1.1 ADULTS

4.1.1.1 DATA

4.1.1.1.1 Read in data

df.exp1_cause_adults = read_csv("../../data/experiment1/cause/adults/exp1_cause_adults.csv")

4.1.1.1.2 Wrangle

df.exp1_cause_adults = df.exp1_cause_adults %>% 
  mutate(participant = 1:n()) %>% 
  select(participant, break_cause_processed, break_simple_processed, crack_cause_processed, crack_simple_processed, counterbalance) %>% 
  pivot_longer(-c(participant,counterbalance),
               names_to = "tmp_name",
               values_to = "response") %>% 
  separate(.,
           col = tmp_name,
           into = c("verb", "question", "tmp"),
           sep = "_") %>% 
  select(-tmp) %>% 
  mutate(distal = ifelse(counterbalance == "order_1" & str_detect(response,
                                                                  "Andy|Sophia"), 1, 0),
         distal = ifelse(counterbalance == "order_2" & str_detect(response,
                                                                  "Suzy|Bobby"), 1, distal),
         proximal = ifelse(counterbalance == "order_1" & str_detect(response,
                                                                    "Suzy|Bobby"), 1, 0),
         proximal = ifelse(counterbalance == "order_2" & str_detect(response,
                                                                    "Andy|Sophia"), 1, proximal), 
         question = str_replace_all(question, "simple", "lexical"),
         question = str_replace_all(question, "cause", "caused")) %>% 
  rename(scenario = verb)

df.exp1_cause_adults %>% 
  summarise(n_distinct(participant))

## # A tibble: 1 × 1
##   `n_distinct(participant)`
##                       <int>
## 1                        60

4.1.1.1.3 Demographics

df.exp1_cause_adults_demo = read_csv("../../data/experiment1/cause/adults/exp1_cause_adults_demo.csv")

df.exp1_cause_adults_demo  %>% 
  summarise(age_mean = mean(as.numeric(age), na.rm = TRUE))

## # A tibble: 1 × 1
##   age_mean
##      <dbl>
## 1     36.0

df.exp1_cause_adults_demo  %>% 
  summarise(age_sd = sd(as.numeric(age), na.rm = TRUE))

## # A tibble: 1 × 1
##   age_sd
##    <dbl>
## 1   13.9

df.exp1_cause_adults_demo  %>% 
  group_by(gender) %>% 
  count()

## # A tibble: 4 × 2
## # Groups:   gender [4]
##   gender         n
##   <chr>      <int>
## 1 Female        26
## 2 Male          32
## 3 Non-binary     1
## 4 <NA>           2

df.exp1_cause_adults_demo  %>% 
  group_by(race) %>% 
  count()

## # A tibble: 5 × 2
## # Groups:   race [5]
##   race                       n
##   <chr>                  <int>
## 1 Asian                      6
## 2 Black/African American     2
## 3 White                     49
## 4 other_race                 2
## 5 <NA>                       2

4.1.1.2 STATS

4.1.1.2.1 Bayesian Model

df.cause_adults_different = df.exp1_cause_adults %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = distal - proximal,
        difference = recode(difference, `-1` = 0))

df.cause_adults_both = df.exp1_cause_adults %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both")

df.cause_adults_both = df.cause_adults_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.cause_adults_both %>% 
              mutate(difference = 0)) 

df.chain_cause_adults = df.cause_adults_different %>% 
  bind_rows(df.cause_adults_both)

df.model = df.chain_cause_adults %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "caused"))

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## caused     1
## lexical   -1

fit.brm.chain.cause.adults = brm(formula = difference ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/chain_cause_adult_difference") 

fit.brm.chain.cause.adults

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 247) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 60) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.52      0.38     0.82     2.33 1.00     3023     5083
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.63      1.09     0.37     4.44 1.00     4540     3863
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     0.08      1.06    -2.16     2.30 1.00     3128     2880
## question1     1.46      0.22     1.05     1.92 1.00     6807     7133
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

4.1.1.2.2 Follow-up analysis

fit.brm.chain.cause.adults %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## $emmeans
##  question response lower.HPD upper.HPD
##  caused      0.821   0.44422     0.998
##  lexical     0.203   0.00283     0.597
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast         odds.ratio lower.HPD upper.HPD
##  caused / lexical       18.2      6.27      40.7
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

4.1.1.2.3 Means and Confidence Interval

set.seed(1)

df.chain_cause_adults %>% 
  group_by(question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 2 × 4
##   question  mean ci_lower ci_upper
##   <chr>    <dbl>    <dbl>    <dbl>
## 1 caused   0.744    0.666    0.822
## 2 lexical  0.279    0.198    0.359

4.1.2 CHILDREN

4.1.2.1 DATA

4.1.2.1.1 Read in data

df.exp1_cause_children = read_csv("../../data/experiment1/cause/children/exp1_cause_children.csv")

4.1.2.1.2 Wrangle

#clean data
df.exp1_cause_children = df.exp1_cause_children %>% 
  clean_names() %>% 
  rename(question = question_type) %>% 
  mutate(response = str_remove_all(string = response,
                                   pattern = "\""),
         response = str_remove_all(string = response,
                                   pattern = ":"),
         response = str_remove_all(string = response,
                                   pattern = "\\}"),
         response = str_remove_all(string = response,
                                   pattern = "\\."),
         response = str_remove_all(string = response,
                                   pattern = "\\,"),
         response = str_remove_all(string = response,
                                   pattern = "\\\\n"),
         response = str_to_lower(response)) %>% 
  select(-(response_id : age_range), -notes, -full_response) 

# code responses
df.exp1_cause_children = df.exp1_cause_children %>% 
  mutate(distal = ifelse(counterbalance == "andy" & str_detect(response, "andy"), 1, 0),
         distal = ifelse(counterbalance == "suzy" & str_detect(response, "suzy"), 1, distal),
         distal = ifelse(counterbalance == "bobby" & str_detect(response, "bobby"), 1, distal),
         distal = ifelse(counterbalance == "sophia" & str_detect(response, "sophia"), 1, distal),
         proximal = ifelse(counterbalance == "andy" & str_detect(response, "suzy"), 1, 0),
         proximal = ifelse(counterbalance == "suzy" & str_detect(response, "andy"), 1, proximal),
         proximal = ifelse(counterbalance == "bobby" & str_detect(response, "sophia"), 1, proximal),
         proximal = ifelse(counterbalance == "sophia" & str_detect(response, "bobby"), 1, proximal),
         age = round(rounded_age / 365, digits = 2),
         question = str_replace_all(question, "break|crack", "simple")) 

# compute age and replace simple with lexical
df.exp1_cause_children = df.exp1_cause_children %>% 
  mutate(age_whole = as.integer(age)) %>%
  mutate(age_character = as.character(age_whole)) %>%
  mutate(age_character = recode(age_character, "7" = "7-9", "8" = "7-9", "9" = "7-9"),
         question = str_replace_all(question, "simple", "lexical")) %>%
  rename(participant = participant_number)

4.1.2.1.3 Demographics

# gender
df.exp1_cause_children %>%
  group_by(gender) %>%
  summarise(n = n_distinct(participant)) %>%
  print()

## # A tibble: 3 × 2
##   gender     n
##   <chr>  <int>
## 1 female    70
## 2 male      79
## 3 n/a        1

# language spoken
df.exp1_cause_children %>%
    mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
    group_by(en_occurrence) %>%
    summarise(n = n_distinct(participant)) %>%
    print()

## # A tibble: 2 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en              137
## 2 <NA>             13

4.1.2.2 STATS

4.1.2.2.1 Bayesian Model

df.cause_children_different = df.exp1_cause_children %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = distal - proximal,
        difference = recode(difference, `-1` = 0))

df.cause_children_both = df.exp1_cause_children %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both") 

 df.cause_children_both = df.cause_children_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.cause_children_both %>% 
              mutate(difference = 0)) 

df.chain_cause_children = df.cause_children_different %>% 
  bind_rows(df.cause_children_both) 
  
df.model = df.chain_cause_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "caused"))

fit.brm.chain.cause.children = brm(formula = difference ~ 1 + question * age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/chain_cause_children_difference") 

fit.brm.chain.cause.children

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 594) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.12      0.22     0.71     1.57 1.00     3399     4209
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.22      1.09     0.15     4.36 1.00     1155      452
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -0.83      1.01    -2.85     1.32 1.00     2348      864
## question1         0.62      0.47    -0.30     1.55 1.00     6865     7604
## age               0.08      0.09    -0.11     0.26 1.00     8515     5317
## question1:age     0.11      0.07    -0.03     0.24 1.00     6366     7689
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

4.1.2.2.2 Follow-up analysis

# effect of question 
fit.brm.chain.cause.children %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## NOTE: Results may be misleading due to involvement in interactions

## $emmeans
##  question response lower.HPD upper.HPD
##  caused      0.721   0.40312     0.971
##  lexical     0.158   0.00938     0.449
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast         odds.ratio lower.HPD upper.HPD
##  caused / lexical       13.8       7.8      22.4
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

# effect of age 
fit.brm.chain.cause.children %>% 
  avg_slopes(variable = "age",
             type = "link")

## 
##  Estimate  2.5 % 97.5 %
##    0.0743 -0.106  0.262
## 
## Term: age
## Type:  link 
## Comparison: mean(dY/dX)
## Columns: term, contrast, estimate, conf.low, conf.high, predicted_lo, predicted_hi, predicted, tmp_idx

# interaction effect 
fit.brm.chain.cause.children %>% 
  emtrends(specs = pairwise ~ question,
           var = "age")

## $emtrends
##  question age.trend lower.HPD upper.HPD
##  caused      0.1810   -0.0269     0.402
##  lexical    -0.0298   -0.2798     0.201
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast         estimate lower.HPD upper.HPD
##  caused - lexical    0.209   -0.0681     0.481
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95

4.1.2.2.3 Means and Confidence Interval

set.seed(1)

df.chain_cause_children %>% 
  group_by(question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 2 × 4
##   question  mean ci_lower ci_upper
##   <chr>    <dbl>    <dbl>    <dbl>
## 1 caused   0.669    0.615    0.723
## 2 lexical  0.201    0.156    0.247

4.2 MADE VS. LEXICAL (BREAK & CRACK)

4.2.1 ADULTS

4.2.1.1 DATA

4.2.1.1.1 Read in data

df.exp1_made_adults = read_csv("../../data/experiment1/made/adults/exp1_made_adults.csv")

4.2.1.1.2 Wrangle

df.exp1_made_adults = df.exp1_made_adults %>% 
  mutate(participant = 1:n()) %>% 
  select(participant, break_cause_processed, break_simple_processed, crack_cause_processed, crack_simple_processed, counterbalance) %>% 
  pivot_longer(-c(participant,counterbalance),
               names_to = "tmp_name",
               values_to = "response") %>% 
  separate(.,
           col = tmp_name,
           into = c("verb", "question", "tmp"),
           sep = "_") %>% 
  select(-tmp) %>% 
  mutate(distal = ifelse(counterbalance == "order_1" & str_detect(response,
                                    "Andy|Sophia"), 1, 0),
         distal = ifelse(counterbalance == "order_2" & str_detect(response,
                                    "Suzy|Bobby"), 1, distal),
         proximal = ifelse(counterbalance == "order_1" & str_detect(response,
                                    "Suzy|Bobby"), 1, 0),
         proximal = ifelse(counterbalance == "order_2" & str_detect(response,
                                    "Andy|Sophia"), 1, proximal),
         question = str_replace_all(question, "cause", "made"),
         question = str_replace_all(question, "simple", "lexical")) %>% 
  rename(scenario = verb)

4.2.1.1.3 Demographics

df.exp1_made_adults_demo = read_csv("../../data/experiment1/made/adults/exp1_made_adults_demo.csv")

df.exp1_made_adults_demo %>% 
  summarise(age_mean = mean(as.numeric(age), na.rm = TRUE))

## # A tibble: 1 × 1
##   age_mean
##      <dbl>
## 1     34.9

df.exp1_made_adults_demo %>% 
  summarise(age_sd = sd(as.numeric(age), na.rm = TRUE))

## # A tibble: 1 × 1
##   age_sd
##    <dbl>
## 1   12.3

df.exp1_made_adults_demo %>% 
  group_by(gender) %>% 
  count()

## # A tibble: 3 × 2
## # Groups:   gender [3]
##   gender         n
##   <chr>      <int>
## 1 Female        29
## 2 Male          24
## 3 Non-binary     7

df.exp1_made_adults_demo %>% 
  group_by(race) %>% 
  count()

## # A tibble: 5 × 2
## # Groups:   race [5]
##   race                       n
##   <chr>                  <int>
## 1 Asian                      6
## 2 Black/African American     5
## 3 White                     44
## 4 other_race                 4
## 5 <NA>                       1

4.2.1.2 STATS

4.2.1.2.1 Bayesian Model

df.made_adults_different = df.exp1_made_adults %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = distal - proximal,
        difference = recode(difference, `-1` = 0))

df.made_adults_both = df.exp1_made_adults %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both")

df.made_adults_both = df.made_adults_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.made_adults_both %>% 
              mutate(difference = 0)) 

df.chain_made_adults = df.made_adults_different %>% 
  bind_rows(df.made_adults_both) 

df.model = df.chain_made_adults %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "made"))

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## made       1
## lexical   -1

fit.brm.chain.made.adults = brm(formula = difference ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/chain_made_adult_difference") 

fit.brm.chain.made.adults

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 244) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 60) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.54      0.36     0.89     2.32 1.00     3567     5257
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.72      1.20     0.38     5.03 1.00     2024      835
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept    -0.52      1.05    -2.64     1.84 1.00     3296     3609
## question1     0.76      0.18     0.42     1.12 1.00     8476     7293
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

4.2.1.2.2 Follow-up analysis

fit.brm.chain.made.adults %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## $emmeans
##  question response lower.HPD upper.HPD
##  made        0.549   0.16355     0.954
##  lexical     0.212   0.00606     0.636
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast       odds.ratio lower.HPD upper.HPD
##  made / lexical       4.57      1.95      8.66
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

4.2.1.2.3 Means and Confidence Interval

set.seed(1)

df.chain_made_adults %>% 
  group_by(question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 2 × 4
##   question  mean ci_lower ci_upper
##   <chr>    <dbl>    <dbl>    <dbl>
## 1 lexical  0.275    0.194    0.356
## 2 made     0.516    0.427    0.605

4.2.2 CHILDREN

4.2.2.1 DATA

4.2.2.1.1 Read in data

df.exp1_made_children = read_csv("../../data/experiment1/made/children/exp1_made_children.csv")

4.2.2.1.2 Wrangle

#clean data
df.exp1_made_children = df.exp1_made_children %>% 
  clean_names() %>% 
  rename(
         question = question_type) %>% 
  mutate(response = str_to_lower(response),
         counterbalance = str_to_lower(counterbalance)) %>% 
  select(-(response_id : age_range), -notes, -full_response) 

#code responses
df.exp1_made_children = df.exp1_made_children %>% 
  mutate(distal = ifelse(counterbalance == "andy" & str_detect(response, "andy"), 1, 0),
         distal = ifelse(counterbalance == "suzy" & str_detect(response, "suzy"), 1, distal),
         distal = ifelse(counterbalance == "bobby" & str_detect(response, "bobby"), 1, distal),
         distal = ifelse(counterbalance == "sophia" & str_detect(response, "sophia"), 1, distal),
         proximal = ifelse(counterbalance == "andy" & str_detect(response, "suzy"), 1, 0),
         proximal = ifelse(counterbalance == "suzy" & str_detect(response, "andy"), 1, proximal),
         proximal = ifelse(counterbalance == "bobby" & str_detect(response, "sophia"), 1, proximal),
         proximal = ifelse(counterbalance == "sophia" & str_detect(response, "bobby"), 1, proximal),
         age = round(rounded_age / 365, digits = 2),
         question = str_replace_all(question, "break|crack", "simple")) 

# compute age and recode question
df.exp1_made_children = df.exp1_made_children %>% 
  mutate(age_whole =as.integer(age)) %>%
  mutate(age_character = as.character(age_whole)) %>%
  mutate(age_character = recode(age_character, "7" = "7-9", "8" = "7-9", "9" = "7-9"),
         question = str_replace_all(question, "simple", "lexical"),
         scenario = str_replace_all(scenario, "mirror_break", "mirror_crack")) %>%
  rename(participant = participant_number)

4.2.2.1.3 Demographics

 # gender
df.exp1_made_children %>%
  group_by(gender) %>%
  summarise(n = n_distinct(participant)) %>%
  print()

## # A tibble: 3 × 2
##   gender     n
##   <chr>  <int>
## 1 female    84
## 2 male      65
## 3 n/a        1

 # language spoken
df.exp1_made_children %>%
    mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
    group_by(en_occurrence) %>%
    summarise(n = n_distinct(participant)) %>%
    print()

## # A tibble: 2 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en              130
## 2 <NA>             20

4.2.2.2 STATS

4.2.2.2.1 Bayesian Model

df.made_children_different = df.exp1_made_children %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = distal - proximal,
        difference = recode(difference, `-1` = 0))

df.made_children_both = df.exp1_made_children %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both")

df.made_children_both = df.made_children_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.made_children_both %>% 
              mutate(difference = 0)) 

df.chain_made_children = df.made_children_different %>% 
  bind_rows(df.made_children_both) 
  
df.model = df.chain_made_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "made"))

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## made       1
## lexical   -1

fit.brm.chain.made.children = brm(formula = difference ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/chain_made_children_difference") 

fit.brm.chain.made.children

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 598) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.47      0.23     1.04     1.95 1.00     3208     6227
## 
## ~scenario (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.15      1.46     0.37     5.81 1.00     2291     5989
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -0.51      1.45    -3.10     2.84 1.01      882      250
## question1        -1.17      0.46    -2.09    -0.27 1.00     7128     7436
## age               0.05      0.10    -0.14     0.24 1.00     6063     7179
## question1:age     0.29      0.07     0.16     0.43 1.00     6768     7232
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

4.2.2.2.2 Follow-up analysis

# effect of question 
fit.brm.chain.made.children %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## NOTE: Results may be misleading due to involvement in interactions

## $emmeans
##  question response lower.HPD upper.HPD
##  made        0.610   0.24507     0.995
##  lexical     0.248   0.00509     0.809
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast       odds.ratio lower.HPD upper.HPD
##  made / lexical        4.8      2.82      7.37
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

# effect of age 
fit.brm.chain.made.children %>% 
  avg_slopes(variable = "age",
             type = "link")

## 
##  Estimate  2.5 % 97.5 %
##     0.054 -0.137  0.246
## 
## Term: age
## Type:  link 
## Comparison: mean(dY/dX)
## Columns: term, contrast, estimate, conf.low, conf.high, predicted_lo, predicted_hi, predicted, tmp_idx

# interaction effect 
fit.brm.chain.made.children %>% 
  emtrends(specs = pairwise ~ question,
           var = "age")

## $emtrends
##  question age.trend lower.HPD upper.HPD
##  made         0.343     0.123     0.559
##  lexical     -0.237    -0.491     0.003
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast       estimate lower.HPD upper.HPD
##  made - lexical    0.584     0.312     0.858
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95

4.2.2.2.3 Means and Confidence Interval

set.seed(1)

df.chain_made_children %>% 
  group_by(question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 2 × 4
##   question  mean ci_lower ci_upper
##   <chr>    <dbl>    <dbl>    <dbl>
## 1 lexical  0.221    0.174    0.269
## 2 made     0.467    0.410    0.523

4.3 CAUSED VS. MADE (BREAK & CRACK)

4.3.1 ADULTS

4.3.1.1 DATA

4.3.1.1.1 Wrangle

df.chain_cause_made_adults = df.chain_cause_adults %>% 
  filter(question == "caused") %>% 
  bind_rows(df.chain_made_adults %>% 
              filter(question == "made"))

4.3.1.2 STATS

4.3.1.2.1 Bayesian Model

df.model = df.chain_cause_made_adults %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "caused"))

# Check contrast values
 contrasts(df.model$question)

##        [,1]
## caused    1
## made     -1

fit.brm.chain.cause.made.adults = brm(formula = difference ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/chain_cause_made_adult_difference") 

fit.brm.chain.cause.made.adults

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 249) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 60) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.33      0.34     0.70     2.03 1.01     2910     3458
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.70      1.39     0.29     5.51 1.03      133       46
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     0.48      1.41    -4.23     2.93 1.04       80       16
## question1     0.72      0.17     0.40     1.07 1.00     6521     7328
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

4.3.1.2.2 Follow-up analysis

fit.brm.chain.cause.made.adults %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## $emmeans
##  question response lower.HPD upper.HPD
##  caused      0.796   0.17578     0.998
##  made        0.483   0.00302     0.836
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast      odds.ratio lower.HPD upper.HPD
##  caused / made       4.17      1.86      7.63
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

4.3.2 CHILDREN

4.3.2.1 DATA

4.3.2.1.1 Wrangle

df.chain_cause_made_children = df.chain_cause_children %>% 
  filter(question == "caused") %>% 
  bind_rows(df.chain_made_children %>% 
              filter(question == "made"))

4.3.2.2 STATS

4.3.2.2.1 Bayesian Model

df.model = df.chain_cause_made_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "caused"))

# Check contrast values
 contrasts(df.model$question)

##        [,1]
## caused    1
## made     -1

fit.brm.chain.cause.made.children = brm(formula = difference ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/chain_cause_made_children_difference") 

fit.brm.chain.cause.made.children

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 596) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.60      0.21     0.14     0.98 1.00     1876     2113
## 
## ~scenario (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.09      0.97     0.12     3.85 1.01      875      261
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -0.94      0.81    -2.51     0.84 1.01     1459     1201
## question1         0.85      0.38     0.09     1.59 1.00     4794     6890
## age               0.21      0.06     0.09     0.34 1.00     7662     8512
## question1:age    -0.05      0.06    -0.16     0.06 1.00     4751     7279
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

4.3.2.2.2 Follow-up analysis

# effect of question 
fit.brm.chain.cause.made.children %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## NOTE: Results may be misleading due to involvement in interactions

## $emmeans
##  question response lower.HPD upper.HPD
##  caused      0.707     0.458     0.951
##  made        0.482     0.206     0.828
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast      odds.ratio lower.HPD upper.HPD
##  caused / made       2.61      1.76      3.68
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

# effect of age 
fit.brm.chain.cause.made.children %>% 
  avg_slopes(variable = "age",
             type = "link")

## 
##  Estimate  2.5 % 97.5 %
##     0.209 0.0914  0.342
## 
## Term: age
## Type:  link 
## Comparison: mean(dY/dX)
## Columns: term, contrast, estimate, conf.low, conf.high, predicted_lo, predicted_hi, predicted, tmp_idx

# interaction effect 
fit.brm.chain.cause.made.children %>% 
  emtrends(specs = pairwise ~ question,
           var = "age")

## $emtrends
##  question age.trend lower.HPD upper.HPD
##  caused       0.155   -0.0186     0.336
##  made         0.263    0.1155     0.425
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast      estimate lower.HPD upper.HPD
##  caused - made   -0.109     -0.32     0.118
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95

4.4 PLOTS

4.4.1 Responses Plots

4.4.1.1 Cause v Lexical

set.seed(1)

df.data = expand_grid(age = seq(4, 9, 0.1),
                      question = c("caused", "lexical"))

df.prediction = fit.brm.chain.cause.children %>%
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>%
  as_tibble() %>%
  bind_cols(df.data) %>%
  clean_names()

df.means = df.chain_cause_adults %>% 
  mutate(age_whole = rep(10)) %>% 
  bind_rows(df.chain_cause_children) %>% 
  mutate(age_whole = factor(age_whole,
                            levels = c(4, 5, 6, 7, 8, 9, 10),
                            labels = c("4", "5", "6", "7", "8", "9", "adults"))) %>%
  group_by(age_whole, question) %>%
  summarize(
    response = Hmisc::smean.cl.boot(difference)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>%
    pivot_wider(names_from = index,
                values_from = response) %>%
  mutate(age_whole = fct_recode(age_whole, "10" = "adults"),
         age_whole = as.numeric(as.character(age_whole)),
         age_index = ifelse(age_whole == 10, "adult", "child"))

gradient = make_gradient(deg = 90, n = 500, cols = brewer.pal(9, "RdBu"))
df.textboxes = tibble(
  question = c("lexical", "caused"),
  age = c(-Inf, -Inf),
  difference = c(Inf, Inf)
)

p1 = ggplot(mapping = aes(x = age,
                          y = difference,
                          color = question)) +
  annotation_custom(grob = gradient,
                    xmin = -Inf,
                    xmax = Inf,
                    ymin = -Inf,
                    ymax = Inf) +
  geom_textbox(data = df.textboxes,
               mapping = aes(label = question,
                             fill = question),
               hjust = 0,
               vjust = -0.1,
               halign = 0.5,
               size = 6,
               width = unit(7.5, "cm"),
               box.size = unit(0.5, "cm"),
               color = "black",
               alpha = 0.8) +
  geom_hline(yintercept = 0.5,
             linetype = "dashed",
             color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90),
              stat = "identity",
              color = "black",
              fill = "gray80",
              alpha = 0.5,
              show.legend = F) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = age_whole,
                                y = response,
                                ymin = low,
                                ymax = high,
                                shape = age_index),
                  color = "black",
                  fill = "white",
                  size = 0.75) +
  facet_wrap(~ factor(question,
                     levels = c("lexical",
                                "caused"))) +
  scale_fill_manual(values = c("lexical" = "#4DAF4A",
                               "caused" = "#FF7F00")) +
  scale_shape_manual(values = c("child" = 21,
                                "adult" = 23)) +
  scale_x_continuous(breaks = 4:10,
                     limits = c(3.8, 10.2)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = c("100% \n proximal", "75%", "50%", "75%", "100% \n distal"),
                     limits = c(0, 1),
                     expand = expansion(add = c(0, 0))) +
  coord_cartesian(clip = "off") +
  labs(y = "Probability of selecting \n proximal or distal cause") +
  theme(legend.position = "none",
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        strip.background = element_blank(),
        strip.text = element_blank(),
        axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        panel.spacing = unit(.4, "cm"),
        plot.margin = margin(t = 1.2, r = 0.1, b = 0.5, l = 0, "cm"))

4.4.1.2 Made v Lexical

set.seed(1)

df.data = expand_grid(age = seq(4, 9, 0.1),
                      question = c("made", "lexical"))

df.prediction = fit.brm.chain.made.children %>%
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>%
  as_tibble() %>%
  bind_cols(df.data) %>%
  clean_names()

df.means = df.chain_made_adults %>% 
  mutate(age_whole = rep(10)) %>% 
  bind_rows(df.chain_made_children) %>% 
  mutate(age_whole = factor(age_whole,
                            levels = c(4, 5, 6, 7, 8, 9, 10),
                            labels = c("4", "5", "6", "7", "8", "9", "adults"))) %>%
  group_by(age_whole, question) %>%
  summarize(
    response = Hmisc::smean.cl.boot(difference)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>%
    pivot_wider(names_from = index,
                values_from = response) %>%
  mutate(age_whole = fct_recode(age_whole, "10" = "adults"),
         age_whole = as.numeric(as.character(age_whole)),
         age_index = ifelse(age_whole == 10, "adult", "child"))

gradient = make_gradient(deg = 90, n = 500, cols = brewer.pal(9, "RdBu"))
df.textboxes = tibble(
  question = c("lexical", "made"),
  age = c(-Inf, -Inf),
  difference = c(Inf, Inf)
)

p2 = ggplot(mapping = aes(x = age,
                          y = difference,
                          color = question)) +
  annotation_custom(grob = gradient,
                    xmin = -Inf,
                    xmax = Inf,
                    ymin = -Inf,
                    ymax = Inf) +
  geom_textbox(data = df.textboxes,
               mapping = aes(label = question,
                             fill = question),
               hjust = 0,
               vjust = -0.1,
               halign = 0.5,
               size = 6,
               width = unit(7.5, "cm"),
               box.size = unit(0.5, "cm"),
               color = "black",
               alpha = 0.8) +
  geom_hline(yintercept = 0.5,
             linetype = "dashed",
             color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90),
              stat = "identity",
              color = "black",
              fill = "gray80",
              alpha = 0.5,
              show.legend = F) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = age_whole,
                                y = response,
                                ymin = low,
                                ymax = high,
                                shape = age_index),
                  color = "black",
                  fill = "white",
                  size = 0.75) +
  facet_wrap(~ factor(question,
                     levels = c("lexical",
                                "caused"))) +
  scale_fill_manual(values = c("lexical" = "#4DAF4A",
                               "made" = "#FFFF33")) +
  scale_shape_manual(values = c("child" = 21,
                                "adult" = 23)) +
  scale_x_continuous(breaks = 4:10,
                     limits = c(3.8, 10.2)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = c("100% \n proximal", "75%", "50%", "75%", "100% \n distal"),
                     limits = c(0, 1),
                     expand = expansion(add = c(0, 0))) +
  coord_cartesian(clip = "off") +
  labs(y = "Probability of selecting \n proximal or distal cause") +
  theme(legend.position = "none",
        axis.text.y = element_text(size = 12),
        axis.title.y = element_blank(),
        strip.background = element_blank(),
        strip.text = element_blank(),
        axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        panel.spacing = unit(.4, "cm"),
        plot.margin = margin(t = 1.2, r = 0.1, b = 0.5, l = 0, "cm"))

4.4.2 Responses Patterns by Age

4.4.2.0.1 Caused vs. Lexical

df.plot = df.exp1_cause_adults %>% 
  mutate(age_whole = 10) %>% 
  bind_rows(df.exp1_cause_children) %>% 
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both causes",
    distal == 0 & proximal == 0 ~ "neither cause",
    distal == 1 & proximal == 0  ~ "distal cause only",
    distal == 0 & proximal == 1  ~ "proximal cause only",
  ),
  question = recode(question, "cause" = "caused"),
  age_whole = as.factor(age_whole)) %>% 
  group_by(age_whole, question, response_pattern) %>% 
  summarize(n = n()) %>% 
  ungroup() %>%
  group_by(age_whole, question) %>%
  mutate(percentage = n / sum(n) * 100) %>%
  ungroup() %>% 
  mutate(question = factor(question, levels = c("lexical", "caused")),
         response_pattern = factor(response_pattern, levels = c("distal cause only", "proximal cause only", "both causes", "neither cause")))

df.labels = df.plot %>% 
  group_by(age_whole, question) %>%
  summarize(count = sum(n)/2) %>% 
  ungroup() %>% 
  mutate(age = as.character(factor(age_whole,
                                   levels = c(4, 5, 6, 7, 8, 9, 10),
                                   labels = c("4", "5", "6", "7", "8", "9", "Adults"))),
         count = ifelse(question == "lexical" & age_whole == 4, str_c("n=", count), count),
         label = str_c("<span style='font-size:16px;'>", age, "</span>"),
         label = ifelse(question == "lexical", 
                        str_c("<span style='font-size:16px;'>", age, "</span>", "<br>",
                              "<span style='font-size:12px;'>(", count, ")", "</span>"), label),
         question = factor(question, levels = c("lexical", "caused")))

df.plot = df.plot %>% 
  left_join(df.labels %>% 
              select(age_whole, question, label), 
            by = c("age_whole", "question")) %>% 
  mutate(label = factor(label, levels = df.labels$label))

p3 = ggplot(data = df.plot,
            mapping = aes(x = label,
                          y = percentage,
                          group = response_pattern,
                          color = response_pattern,
                          fill = response_pattern)) +
  geom_col(color = "black") +
  facet_wrap(~question, 
             scales = "free_x") + 
  scale_fill_manual(values = c("distal cause only" = "#E41A1C",
                               "proximal cause only" = "#377EB8",
                               "both causes" = "#984EA3",
                               "neither cause" = "#999999")) +
  scale_color_manual(values = c("distal cause only" = "#E41A1C",
                                "proximal cause only" = "#377EB8",
                                "both causes" = "#984EA3",
                                "neither cause" = "#999999")) +
  scale_shape_manual(values = c(21, 23)) +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     labels = str_c(seq(0, 100, 25), "%"),
                     expand = expansion(add = c(0, 0))) +
  scale_x_discrete(expand = expansion(add = c(0, 0))) +
  coord_cartesian(clip = "off") +
  labs(y = "Response pattern \n\ probability") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_markdown(),
        axis.title.x = element_blank(),
        strip.background = element_blank(),
        strip.text = element_blank(),
        panel.spacing = unit(.4, "cm"))

4.4.2.0.2 Made vs. Lexical

df.plot = df.exp1_made_adults %>% 
  mutate(age_whole = 10) %>% 
  bind_rows(df.exp1_made_children) %>% 
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both causes",
    distal == 0 & proximal == 0 ~ "neither cause",
    distal == 1 & proximal == 0  ~ "distal cause only",
    distal == 0 & proximal == 1  ~ "proximal cause only"),
    age_whole = as.factor(age_whole)) %>% 
  group_by(age_whole, question, response_pattern) %>% 
  summarize(n = n()) %>% 
  ungroup() %>%
  group_by(age_whole, question) %>%
  mutate(percentage = n / sum(n) * 100) %>%
  ungroup() %>% 
  mutate(question = factor(question, levels = c("lexical", "made")),
         response_pattern = factor(response_pattern, levels = c("distal cause only",
                                                                "proximal cause only",
                                                                "both causes",
                                                                "neither cause")))
df.labels = df.plot %>% 
  group_by(age_whole, question) %>%
  summarize(count = sum(n)/2) %>% 
  ungroup() %>% 
  mutate(age = as.character(factor(age_whole,
                                   levels = c(4, 5, 6, 7, 8, 9, 10),
                                   labels = c("4", "5", "6", "7", "8", "9", "Adults"))),
         count = ifelse(question == "lexical" & age_whole == 4, str_c("n=", count), count),
         label = str_c("<span style='font-size:16px;'>", age, "</span>"),
         label = ifelse(question == "lexical", 
                        str_c("<span style='font-size:16px;'>", age, "</span>", "<br>",
                              "<span style='font-size:12px;'>(", count, ")", "</span>"), label),
         question = factor(question, levels = c("lexical", "made")))

df.plot = df.plot %>% 
  left_join(df.labels %>% 
              select(age_whole, question, label), 
            by = c("age_whole", "question")) %>% 
  mutate(label = factor(label, levels = df.labels$label))
  
p4 = ggplot(data = df.plot,
       mapping = aes(x = label,
                     y = percentage,
                     group = response_pattern,
                     color = response_pattern,
                     fill = response_pattern)) +
  geom_col(color = "black") +
  facet_wrap(~ question,
             scales = "free_x") + 
  scale_fill_manual(values = c("distal cause only" = "#E41A1C",
                               "proximal cause only" = "#377EB8",
                               "both causes" = "#984EA3",
                               "neither cause" = "#999999")) +
  scale_color_manual(values = c("distal cause only" = "#E41A1C",
                                "proximal cause only" = "#377EB8",
                                "both causes" = "#984EA3",
                                "neither cause" = "#999999")) +
  scale_shape_manual(values = c(21, 23)) +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     labels = str_c(seq(0, 100, 25), "%"),
                     expand = expansion(add = c(0, 0))) +
  scale_x_discrete(expand = expansion(add = c(0, 0))) +
  coord_cartesian(clip = "off") +
  labs(y = "Response pattern \n\ probability") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_blank(),
        axis.text.x = element_markdown(),
        axis.title.x = element_blank(),
        strip.background = element_blank(),
        strip.text = element_blank(),
        panel.spacing = unit(.4, "cm"))

4.4.2.1 Combine Cause and Made Plots

design = "
1122
3344
5555
"    
plot = p1 + p2 + p3 + p4 +
  guide_area() + 
  plot_layout(design = design,
              guides = "collect",
              heights = c(1, 1, 0.2)) &
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(face = "bold",
                                size = 24),
        text = element_text(size = 20))

plot

ggsave(plot, filename = "../../figures/experiment1/chain_response_patterns.pdf", height = 8, width = 15)

5 EXPERIMENT 2: ABSENCES

5.1 CAUSED VS. LEXICAL (BURN & FLOOD)

5.1.1 ADULTS

5.1.1.1 DATA

5.1.1.1.1 Read in data

df.exp2_cause_adults = read_csv("../../data/experiment2/cause/adults/exp2_cause_adults.csv")

5.1.1.1.2 Wrangle

# clean data
df.exp2_cause_adults = df.exp2_cause_adults %>% 
  mutate(condition = str_extract(string = responses,
                                 pattern = "sunburn_simple_response|sunburn_cause_response|flood_simple_response|flood_cause_response"),
         response = str_extract(string = responses, 
                                pattern = ":(.*)"),
         response = str_remove_all(string = response,
                                   pattern = "\""),
         response = str_remove_all(string = response,
                                   pattern = ":"),
         response = str_remove_all(string = response,
                                   pattern = "\\}"),
         response = str_remove_all(string = response,
                                   pattern = "\\."),
         response = str_remove_all(string = response,
                                   pattern = "\\,"),
         response = str_remove_all(string = response,
                                   pattern = "\\\\n"),
         response = str_to_lower(response)) %>% 
  separate(condition, c("scenario", "question", "extra"), "_") %>% 
  rename(participant = workerid) %>% 
  select(participant, scenario, question, response)

# code responses
df.exp2_cause_adults = df.exp2_cause_adults %>%
  mutate(direct = ifelse(str_detect(response,
                                    "^sun$|sun | sun$"), 1, 0),
         direct = ifelse(str_detect(response,
                                    "^water$|water | water$"), 1, direct),
         direct = ifelse(str_detect(response,
                                    "^rain$|rain | rain$"), 1, direct),
         absence = ifelse(str_detect(response,
                                     "^sunscreen$|sunscreen | sunscreen$"), 1, 0),
         absence = ifelse(str_detect(response,
                                     "^window$|window | window$"), 1, absence),
         absence = ifelse(str_detect(response,
                                     "^latch$|latch | latch$"), 1, absence))

## replace simple with lexical
df.exp2_cause_adults = df.exp2_cause_adults %>% 
  mutate(question = str_replace_all(question, "simple", "lexical"),
         question = str_replace_all(question, "cause", "caused"))

5.1.1.1.3 Demographics

df.exp2_cause_adults_demo = read_csv("../../data/experiment2/cause/adults/exp2_cause_adults_demo.csv")

df.exp2_cause_adults_demo %>% 
  summarise(age_mean = mean(as.numeric(age), na.rm = TRUE))

## # A tibble: 1 × 1
##   age_mean
##      <dbl>
## 1     31.3

df.exp2_cause_adults_demo %>% 
  summarise(age_sd = sd(as.numeric(age), na.rm = TRUE))

## # A tibble: 1 × 1
##   age_sd
##    <dbl>
## 1   10.6

df.exp2_cause_adults_demo %>% 
  group_by(gender) %>% 
  count()

## # A tibble: 3 × 2
## # Groups:   gender [3]
##   gender     n
##   <chr>  <int>
## 1 Female    27
## 2 Male      32
## 3 <NA>       1

df.exp2_cause_adults_demo %>% 
  group_by(race) %>% 
  count()

## # A tibble: 6 × 2
## # Groups:   race [6]
##   race                                             n
##   <chr>                                        <int>
## 1 Asian                                            4
## 2 Black/African American                           6
## 3 Latina                                           1
## 4 White                                           43
## 5 mixed with black, white, and native american     1
## 6 <NA>                                             5

df.exp2_cause_adults_demo %>% 
  group_by(ethnicity) %>% 
  count()

## # A tibble: 3 × 2
## # Groups:   ethnicity [3]
##   ethnicity        n
##   <chr>        <int>
## 1 Hispanic         9
## 2 Non-Hispanic    45
## 3 <NA>             6

5.1.1.2 STATS

5.1.1.2.1 Bayesian Model

df.cause_adults_different = df.exp2_cause_adults %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = absence - direct,
        difference = recode(difference, `-1` = 0))

df.cause_adults_both = df.exp2_cause_adults %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both")
  
df.cause_adults_both = df.cause_adults_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.cause_adults_both %>% 
              mutate(difference = 0)) 

df.absence_cause_adults = df.cause_adults_different %>% 
  bind_rows(df.cause_adults_both)

df.model = df.absence_cause_adults %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "caused"))

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## caused     1
## lexical   -1

fit.brm.absence.cause.adults = brm(formula = difference ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/absence_cause_adult_difference") 

fit.brm.absence.cause.adults

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 258) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 60) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.23      0.18     0.01     0.65 1.00     1664     1502
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.24      1.22     0.06     4.66 1.01      350      164
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept    -0.85      0.89    -2.91     1.49 1.02      210       51
## question1     1.26      0.18     0.92     1.63 1.00     2590     3412
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.1.1.2.2 Follow-up analysis

fit.brm.absence.cause.adults %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## $emmeans
##  question response lower.HPD upper.HPD
##  caused      0.582   0.27397     0.958
##  lexical     0.102   0.00694     0.452
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast         odds.ratio lower.HPD upper.HPD
##  caused / lexical       12.4      5.01      23.2
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

5.1.1.2.3 Means and Confidence Interval

set.seed(1)

df.absence_cause_adults %>% 
  group_by(question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 2 × 4
##   question  mean ci_lower ci_upper
##   <chr>    <dbl>    <dbl>    <dbl>
## 1 caused   0.570   0.488     0.653
## 2 lexical  0.103   0.0472    0.160

5.1.2 CHILDREN

5.1.2.1 DATA

5.1.2.1.1 Read in data

df.exp2_cause_children = read_csv("../../data/experiment2/cause/children/exp2_cause_children.csv")

5.1.2.1.2 Wrangle

# clean responses
df.exp2_cause_children = df.exp2_cause_children %>% 
  rename(response = full_response,
         question = question_type) %>% 
  mutate(response = str_remove_all(string = response,
                                   pattern = "\""),
         response = str_remove_all(string = response,
                                   pattern = ":"),
         response = str_remove_all(string = response,
                                   pattern = "\\}"),
         response = str_remove_all(string = response,
                                   pattern = "\\."),
         response = str_remove_all(string = response,
                                   pattern = "\\,"),
         response = str_remove_all(string = response,
                                   pattern = "\\\\n"),
         response = str_to_lower(response))

# code responses
df.exp2_cause_children = df.exp2_cause_children %>%
  mutate(direct = ifelse(str_detect(response,
                                    "^sun$|sun | sun$"), 1, 0),
         direct = ifelse(str_detect(response,
                                    "^water$|water | water$"), 1, direct),
         direct = ifelse(str_detect(response,
                                    "^rain$|rain | rain$"), 1, direct),
         absence = ifelse(str_detect(response,
                                     "^sunscreen$|sunscreen | sunscreen$"), 1, 0),
         absence = ifelse(str_detect(response,
                                     "^window$|window | window$"), 1, absence),
         absence = ifelse(str_detect(response,
                                     "^latch$|latch | latch$"), 1, absence)) 

# compute age and replace simple with lexical
df.exp2_cause_children = df.exp2_cause_children %>%
  mutate(age = round(rounded_age / 365, digits = 2),
         age_whole =as.integer(age),
         question = str_replace_all(question, "simple", "lexical"))

5.1.2.1.3 Demographics

 # gender
df.exp2_cause_children %>%
  group_by(gender) %>%
  summarise(n = n_distinct(participant)) %>%
  print()

## # A tibble: 2 × 2
##   gender     n
##   <chr>  <int>
## 1 female    78
## 2 male      72

  # language spoken
df.exp2_cause_children %>%
    mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
    group_by(en_occurrence) %>%
    summarise(n = n_distinct(participant)) %>%
    print()

## # A tibble: 2 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en              127
## 2 <NA>             23

5.1.2.2 STATS

5.1.2.2.1 Bayesian Model

df.cause_children_different = df.exp2_cause_children %>%
  mutate(response_pattern = case_when(
   direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = absence - direct,
        difference = recode(difference, `-1` = 0))

df.cause_children_both = df.exp2_cause_children %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both") 

 df.cause_children_both = df.cause_children_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.cause_children_both %>% 
              mutate(difference = 0)) 
 
df.absence_cause_children = df.cause_children_different %>% 
  bind_rows(df.cause_children_both) 
  
df.model = df.absence_cause_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "caused"))

fit.brm.absence.cause.children = brm(formula = difference ~ 1 + question * age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/absence_cause_children_difference") 

fit.brm.absence.cause.children

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 616) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 149) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.54      0.25     0.04     1.00 1.01      398     2093
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.87      1.27     0.01     5.40 1.03      164       63
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -2.27      0.67    -3.62    -0.94 1.00     1402     1283
## question1        -0.64      0.51    -1.58     0.51 1.03      145       44
## age               0.14      0.08    -0.01     0.31 1.03      163       73
## question1:age     0.19      0.07     0.03     0.32 1.03      147       53
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.1.2.2.2 Follow-up analysis

# effect of question 
fit.brm.absence.cause.children %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## NOTE: Results may be misleading due to involvement in interactions

## $emmeans
##  question response lower.HPD upper.HPD
##  caused      0.332    0.1801     0.706
##  lexical     0.118    0.0368     0.324
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast         odds.ratio lower.HPD upper.HPD
##  caused / lexical       3.78      2.22      5.56
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

# effect of age 
fit.brm.absence.cause.children %>% 
  avg_slopes(variable = "age",
             type = "link")

## 
##  Estimate     2.5 % 97.5 %
##     0.144 -2.93e-05  0.311
## 
## Term: age
## Type:  link 
## Comparison: mean(dY/dX)
## Columns: term, contrast, estimate, conf.low, conf.high, predicted_lo, predicted_hi, predicted, tmp_idx

# interaction effect 
fit.brm.absence.cause.children %>% 
  emtrends(specs = pairwise ~ question,
           var = "age")

## $emtrends
##  question age.trend lower.HPD upper.HPD
##  caused      0.3302     0.170     0.493
##  lexical    -0.0552    -0.289     0.220
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast         estimate lower.HPD upper.HPD
##  caused - lexical    0.386    0.0766     0.654
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95

5.1.2.2.3 Means and Confidence Interval

set.seed(1)

df.absence_cause_children %>% 
  group_by(question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 2 × 4
##   question  mean ci_lower ci_upper
##   <chr>    <dbl>    <dbl>    <dbl>
## 1 caused   0.353   0.300     0.406
## 2 lexical  0.130   0.0920    0.169

5.2 MADE VS. LEXICAL (BURN & FLOOD)

5.2.1 ADULTS

5.2.1.1 DATA

5.2.1.1.1 Read in data

df.exp2_made_adults = read_csv("../../data/experiment2/made/adults/exp2_made_adults.csv")

5.2.1.1.2 Wrangle

# clean responses
df.exp2_made_adults = df.exp2_made_adults %>% 
  mutate(condition = str_extract(string = responses,
                                 pattern = "sunburn_simple_response|sunburn_made_response|flood_simple_response|flood_made_response"),
         response = str_extract(string = responses, 
                                pattern = ":(.*)"),
         response = str_remove_all(string = response,
                                   pattern = "\""),
         response = str_remove_all(string = response,
                                   pattern = ":"),
         response = str_remove_all(string = response,
                                   pattern = "\\}"),
         response = str_remove_all(string = response,
                                   pattern = "\\."),
         response = str_remove_all(string = response,
                                   pattern = "\\,"),
         response = str_remove_all(string = response,
                                   pattern = "\\\\n"),
         response = str_to_lower(response)) %>% 
  separate(condition, c("scenario", "question", "extra"), "_") %>% 
  rename(participant = workerid) %>% 
  select(participant, scenario, question, response)

# code responses
df.exp2_made_adults = df.exp2_made_adults %>%
  mutate(direct = ifelse(str_detect(response,
                                    "^sun$|sun | sun$"), 1, 0),
         direct = ifelse(str_detect(response,
                                    "^water$|water | water$"), 1, direct),
         direct = ifelse(str_detect(response,
                                    "^rain$|rain | rain$"), 1, direct),
         absence = ifelse(str_detect(response,
                                     "^sunscreen$|sunscreen | sunscreen$"), 1, 0),
         absence = ifelse(str_detect(response,
                                     "^window$|window | window$"), 1, absence),
         absence = ifelse(str_detect(response,
                                     "^latch$|latch | latch$"), 1, absence))

## replace simple with lexical
df.exp2_made_adults = df.exp2_made_adults %>% 
  mutate(question = str_replace_all(question, "simple", "lexical"))

5.2.1.1.3 Demographics

df.exp2_made_adults_demo = read_csv("../../data/experiment2/made/adults/exp2_made_adults_demo.csv")

df.exp2_made_adults_demo %>% 
  summarise(age_mean = mean(as.numeric(age), na.rm = TRUE))

## # A tibble: 1 × 1
##   age_mean
##      <dbl>
## 1     29.8

df.exp2_made_adults_demo %>% 
  summarise(age_sd = sd(as.numeric(age), na.rm = TRUE))

## # A tibble: 1 × 1
##   age_sd
##    <dbl>
## 1   10.6

df.exp2_made_adults_demo %>% 
  group_by(gender) %>% 
  count()

## # A tibble: 3 × 2
## # Groups:   gender [3]
##   gender         n
##   <chr>      <int>
## 1 Female        41
## 2 Male          17
## 3 Non-binary     2

df.exp2_made_adults_demo %>% 
  group_by(race) %>% 
  count()

## # A tibble: 7 × 2
## # Groups:   race [7]
##   race                              n
##   <chr>                         <int>
## 1 50% white, 50% asian              1
## 2 American Indian/Alaska Native     1
## 3 Asian                             8
## 4 Black/African American            6
## 5 Hispanic                          1
## 6 White                            40
## 7 <NA>                              3

df.exp2_made_adults_demo %>% 
  group_by(ethnicity) %>% 
  count()

## # A tibble: 3 × 2
## # Groups:   ethnicity [3]
##   ethnicity        n
##   <chr>        <int>
## 1 Hispanic         8
## 2 Non-Hispanic    49
## 3 <NA>             3

5.2.1.2 STATS

5.2.1.2.1 Bayesian Model

df.made_adults_different = df.exp2_made_adults %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = absence - direct,
        difference = recode(difference, `-1` = 0))

df.made_adults_both = df.exp2_made_adults %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both")
  
df.made_adults_both = df.made_adults_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.made_adults_both %>% 
              mutate(difference = 0)) 

df.absence_made_adults = df.made_adults_different %>% 
  bind_rows(df.made_adults_both)

df.model = df.absence_made_adults %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "caused"))

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## made       1
## lexical   -1

fit.brm.absence.made.adults = brm(formula = difference ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/absence_made_adult_difference") 

fit.brm.absence.made.adults

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 274) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 59) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.27      0.20     0.01     0.73 1.00     3048     4090
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.11      1.12     0.07     4.17 1.01      396       95
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept    -0.63      0.78    -2.14     1.62 1.01      633       73
## question1     0.85      0.15     0.57     1.15 1.00     2488     6024
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.2.1.2.2 Follow-up analysis

fit.brm.absence.made.adults %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## $emmeans
##  question response lower.HPD upper.HPD
##  made        0.538     0.267     0.944
##  lexical     0.176     0.012     0.492
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast       odds.ratio lower.HPD upper.HPD
##  made / lexical       5.47      2.87      9.35
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

5.2.1.2.3 Means and Confidence Interval

set.seed(1)

df.absence_made_adults %>% 
  group_by(question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 2 × 4
##   question  mean ci_lower ci_upper
##   <chr>    <dbl>    <dbl>    <dbl>
## 1 lexical  0.179    0.110    0.248
## 2 made     0.530    0.449    0.610

5.2.2 CHILDREN

5.2.2.1 DATA

5.2.2.1.1 Read in data

df.exp2_made_children = read_csv("../../data/experiment2/made/children/exp2_made_children.csv")

5.2.2.1.2 Wrangle

# clean responses
df.exp2_made_children = df.exp2_made_children %>% 
  rename(response = full_response,
         question = question_type) %>% 
  mutate(response = str_remove_all(string = response,
                                   pattern = "\""),
         response = str_remove_all(string = response,
                                   pattern = ":"),
         response = str_remove_all(string = response,
                                   pattern = "\\}"),
         response = str_remove_all(string = response,
                                   pattern = "\\."),
         response = str_remove_all(string = response,
                                   pattern = "\\,"),
         response = str_remove_all(string = response,
                                   pattern = "\\\\n"),
         response = str_to_lower(response))

# code responses
df.exp2_made_children = df.exp2_made_children %>%
  mutate(direct = ifelse(str_detect(response,
                                    "^sun$|sun | sun$"), 1, 0),
         direct = ifelse(str_detect(response,
                                    "^water$|water | water$"), 1, direct),
         direct = ifelse(str_detect(response,
                                    "^rain$|rain | rain$"), 1, direct),
         absence = ifelse(str_detect(response,
                                     "^sunscreen$|sunscreen | sunscreen$"), 1, 0),
         # note the first response in the data is 'windows' but this doesn't capture that
         absence = ifelse(str_detect(response,
                                     "^window$|window | window$"), 1, absence),
         absence = ifelse(str_detect(response,
                                     "^latch$|latch | latch$"), 1, absence)) 

# compute age and replace simple with lexical
df.exp2_made_children = df.exp2_made_children %>%
  mutate(age = round(rounded_age / 365, digits = 2),
         age_whole = as.integer(age),
         question = str_replace_all(question, "simple", "lexical")) %>% 
  rename(participant = `participant _number`)

5.2.2.1.3 Demographics

 # gender
df.exp2_made_children %>%
  group_by(gender) %>%
  summarise(n = n_distinct(participant)) %>%
  print()

## # A tibble: 2 × 2
##   gender     n
##   <chr>  <int>
## 1 female    69
## 2 male      81

 # language spoken
df.exp2_made_children %>%
    mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
    group_by(en_occurrence) %>%
    summarise(n = n_distinct(participant)) %>%
    print()

## # A tibble: 3 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en              132
## 2 non-en            1
## 3 <NA>             18

5.2.2.2 STATS

5.2.2.2.1 Bayesian Model

df.made_children_different = df.exp2_made_children %>%
  mutate(response_pattern = case_when(
     direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = absence - direct,
        difference = recode(difference, `-1` = 0))

df.made_children_both = df.exp2_made_children %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both")

df.made_children_both = df.made_children_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.made_children_both %>% 
              mutate(difference = 0)) 

df.absence_made_children = df.made_children_different %>% 
  bind_rows(df.made_children_both) 
  
df.model = df.absence_made_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "made"))

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## made       1
## lexical   -1

fit.brm.absence.made.children = brm(formula = difference ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/absence_made_children_difference") 

fit.brm.absence.made.children

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 588) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.34      0.27     0.85     1.89 1.00     2289     3125
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.74      0.81     0.02     2.87 1.00     1556     2090
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -3.37      0.94    -5.30    -1.52 1.00     1073      366
## question1         0.11      0.54    -0.94     1.18 1.00     4153     6236
## age               0.22      0.10     0.02     0.43 1.00     4358     5131
## question1:age     0.07      0.07    -0.07     0.22 1.00     4061     6306
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.2.2.2.2 Follow-up analysis

# effect of question 
fit.brm.absence.made.children %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## NOTE: Results may be misleading due to involvement in interactions

## $emmeans
##  question response lower.HPD upper.HPD
##  made       0.2111   0.02899     0.461
##  lexical    0.0726   0.00726     0.198
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast       odds.ratio lower.HPD upper.HPD
##  made / lexical       3.42      1.87      5.52
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

# effect of age 
fit.brm.absence.made.children %>% 
  avg_slopes(variable = "age",
             type = "link")

## 
##  Estimate  2.5 % 97.5 %
##     0.213 0.0195  0.429
## 
## Term: age
## Type:  link 
## Comparison: mean(dY/dX)
## Columns: term, contrast, estimate, conf.low, conf.high, predicted_lo, predicted_hi, predicted, tmp_idx

# interaction effect 
fit.brm.absence.made.children %>% 
  emtrends(specs = pairwise ~ question,
           var = "age")

## $emtrends
##  question age.trend lower.HPD upper.HPD
##  made         0.289    0.0682     0.522
##  lexical      0.138   -0.1332     0.410
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast       estimate lower.HPD upper.HPD
##  made - lexical    0.149    -0.154     0.435
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95

5.2.2.2.3 Means and Confidence Interval

set.seed(1)

df.absence_made_children %>% 
  group_by(question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 2 × 4
##   question  mean ci_lower ci_upper
##   <chr>    <dbl>    <dbl>    <dbl>
## 1 lexical  0.122   0.0836    0.159
## 2 made     0.273   0.223     0.324

5.3 CAUSED VS. MADE (BURN & FLOOD)

5.3.1 ADULTS

5.3.1.1 DATA

5.3.1.1.1 Wrangle

df.absence_cause_made_adults = df.absence_cause_adults %>% 
  filter(question == "caused") %>% 
  bind_rows(df.absence_made_adults %>% 
              filter(question == "made"))

5.3.1.2 STATS

5.3.1.2.1 Bayesian Model

df.model = df.absence_cause_made_adults %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "caused"))

# Check contrast values
 contrasts(df.model$question)

##        [,1]
## caused    1
## made     -1

fit.brm.absence.cause.made.adults = brm(formula = difference ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/absence_cause_made_adult_difference") 

fit.brm.absence.cause.made.adults

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 293) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 119) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.16      0.12     0.01     0.46 1.00     2529     5282
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.10      0.96     0.10     3.67 1.01      961      785
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     0.08      0.86    -2.66     1.68 1.02      287       75
## question1     0.08      0.12    -0.16     0.32 1.00     2849     2505
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.3.1.2.2 Follow-up analysis

fit.brm.absence.cause.made.adults %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## $emmeans
##  question response lower.HPD upper.HPD
##  caused      0.561     0.199     0.931
##  made        0.519     0.150     0.878
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast      odds.ratio lower.HPD upper.HPD
##  caused / made       1.17     0.671      1.81
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

5.3.2 CHILDREN

5.3.2.1 DATA

5.3.2.1.1 Wrangle

df.absence_cause_made_children = df.absence_cause_children %>% 
  filter(question == "caused") %>% 
  bind_rows(df.absence_made_children %>% 
              filter(question == "made"))

5.3.2.2 STATS

5.3.2.2.1 Bayesian Model

df.model = df.absence_cause_made_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "caused"))

# Check contrast values
 contrasts(df.model$question)

##        [,1]
## caused    1
## made     -1

fit.brm.absence.cause.made.children = brm(formula = difference ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/absence_cause_made_children_difference") 

fit.brm.absence.cause.made.children

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 617) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.35      0.20     0.02     0.77 1.00     1150      960
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.73      0.91     0.01     3.56 1.01      439      159
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -2.76      0.61    -4.03    -1.49 1.01      783      371
## question1        -0.11      0.43    -1.01     0.70 1.00     1515      595
## age               0.28      0.06     0.17     0.39 1.00     1192      738
## question1:age     0.04      0.06    -0.07     0.16 1.00     1400      574
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.3.2.2.2 Follow-up analysis

# effect of question 
fit.brm.absence.cause.made.children %>% 
  emmeans(spec = pairwise ~ question, 
          type = "response")

## NOTE: Results may be misleading due to involvement in interactions

## $emmeans
##  question response lower.HPD upper.HPD
##  caused      0.333    0.1259     0.569
##  made        0.264    0.0798     0.476
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast      odds.ratio lower.HPD upper.HPD
##  caused / made       1.39     0.933      1.94
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

# effect of age 
fit.brm.absence.cause.made.children %>% 
  avg_slopes(variable = "age",
             type = "link")

## 
##  Estimate 2.5 % 97.5 %
##     0.279 0.175  0.392
## 
## Term: age
## Type:  link 
## Comparison: mean(dY/dX)
## Columns: term, contrast, estimate, conf.low, conf.high, predicted_lo, predicted_hi, predicted, tmp_idx

# interaction effect 
fit.brm.absence.cause.made.children %>% 
  emtrends(specs = pairwise ~ question,
           var = "age")

## $emtrends
##  question age.trend lower.HPD upper.HPD
##  caused       0.317    0.1747     0.476
##  made         0.237    0.0674     0.404
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast      estimate lower.HPD upper.HPD
##  caused - made   0.0771    -0.152      0.32
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95

5.4 PLOTS

5.4.1 Responses Plots

5.4.1.1 Caused vs. Lexical

set.seed(1)

df.data = expand_grid(age = seq(4, 9, 0.1),
                      question = c("caused", "lexical"))

df.prediction = fit.brm.absence.cause.children %>%
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>%
  as_tibble() %>%
  bind_cols(df.data) %>%
  clean_names()

df.means = df.absence_cause_adults %>% 
  mutate(age_whole = rep(10)) %>% 
  bind_rows(df.absence_cause_children) %>% 
  mutate(age_whole = factor(age_whole,
                            levels = c(4, 5, 6, 7, 8, 9, 10),
                            labels = c("4", "5", "6", "7", "8", "9", "adults"))) %>%
  group_by(age_whole, question) %>%
  summarize(
    response = Hmisc::smean.cl.boot(difference)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>%
    pivot_wider(names_from = index,
                values_from = response) %>%
  mutate(age_whole = fct_recode(age_whole, "10" = "adults"),
         age_whole = as.numeric(as.character(age_whole)),
         age_index = ifelse(age_whole == 10, "adult", "child"))

gradient = make_gradient(deg = 90, n = 500, cols = brewer.pal(9, "RdBu"))
df.textboxes = tibble(
  question = c("lexical", "caused"),
  age = c(-Inf, -Inf),
  difference = c(Inf, Inf)
)

p5 = ggplot(mapping = aes(x = age,
                          y = difference,
                          color = question)) +
  annotation_custom(grob = gradient,
                    xmin = -Inf,
                    xmax = Inf,
                    ymin = -Inf,
                    ymax = Inf) +
  geom_textbox(data = df.textboxes,
               mapping = aes(label = question,
                             fill = question),
               hjust = 0,
               vjust = -0.1,
               halign = 0.5,
               size = 6,
               width = unit(7.6, "cm"),
               box.size = unit(0.5, "cm"),
               color = "black",
               alpha = 0.8) +
  geom_hline(yintercept = 0.5,
             linetype = "dashed",
             color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90),
              stat = "identity",
              color = "black",
              fill = "gray80",
              alpha = 0.5,
              show.legend = F) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = age_whole,
                                y = response,
                                ymin = low,
                                ymax = high,
                                shape = age_index),
                  color = "black",
                  fill = "white",
                  size = 0.75) +
  facet_wrap(~ factor(question,
                     levels = c("lexical",
                                "caused"))) +
  scale_fill_manual(values = c("lexical" = "#4DAF4A",
                               "caused" = "#FF7F00")) +
  scale_shape_manual(values = c("child" = 21,
                                "adult" = 23)) +
  scale_x_continuous(breaks = 4:10,
                     limits = c(3.8, 10.2)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = c("100% \n direct", "75%", "50%", "75%", "100% \n absent"),
                     limits = c(0, 1),
                     expand = expansion(add = c(0, 0))) +
  coord_cartesian(clip = "off") +
  labs(y = "Probability of selecting \n direct or absent cause") +
  theme(legend.position = "none",
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        strip.background = element_blank(),
        strip.text = element_blank(),
        axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        panel.spacing = unit(.4, "cm"),
        plot.margin = margin(t = 1.2, r = 0.1, b = 0.5, l = 0, "cm"))

5.4.1.2 Made vs. Lexical

set.seed(1)

df.data = expand_grid(age = seq(4, 9, 0.1),
                      question = c("made", "lexical"))

df.prediction = fit.brm.absence.made.children %>%
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>%
  as_tibble() %>%
  bind_cols(df.data) %>%
  clean_names()

df.means = df.absence_made_adults %>% 
  mutate(age_whole = rep(10)) %>% 
  bind_rows(df.absence_made_children) %>% 
  mutate(age_whole = factor(age_whole,
                            levels = c(4, 5, 6, 7, 8, 9, 10),
                            labels = c("4", "5", "6", "7", "8", "9", "adults"))) %>%
  group_by(age_whole, question) %>%
  summarize(
    response = Hmisc::smean.cl.boot(difference)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>%
    pivot_wider(names_from = index,
                values_from = response) %>%
  mutate(age_whole = fct_recode(age_whole, "10" = "adults"),
         age_whole = as.numeric(as.character(age_whole)),
         age_index = ifelse(age_whole == 10, "adult", "child"))

gradient = make_gradient(deg = 90, n = 500, cols = brewer.pal(9, "RdBu"))
df.textboxes = tibble(
  question = c("lexical", "made"),
  age = c(-Inf, -Inf),
  difference = c(Inf, Inf)
)

p6 = ggplot(mapping = aes(x = age,
                          y = difference,
                          color = question)) +
  annotation_custom(grob = gradient,
                    xmin = -Inf,
                    xmax = Inf,
                    ymin = -Inf,
                    ymax = Inf) +
  geom_textbox(data = df.textboxes,
               mapping = aes(label = question,
                             fill = question),
               hjust = 0,
               vjust = -0.1,
               halign = 0.5,
               size = 6,
               width = unit(7.6, "cm"),
               box.size = unit(0.5, "cm"),
               color = "black",
               alpha = 0.8) +
  geom_hline(yintercept = 0.5,
             linetype = "dashed",
             color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90),
              stat = "identity",
              color = "black",
              fill = "gray80",
              alpha = 0.5,
              show.legend = F) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = age_whole,
                                y = response,
                                ymin = low,
                                ymax = high,
                                shape = age_index),
                  color = "black",
                  fill = "white",
                  size = 0.75) +
  facet_wrap(~ factor(question,
                     levels = c("lexical",
                                "caused"))) +
  scale_fill_manual(values = c("lexical" = "#4DAF4A",
                               "made" = "#FFFF33")) +
  scale_shape_manual(values = c("child" = 21,
                                "adult" = 23)) +
  scale_x_continuous(breaks = 4:10,
                     limits = c(3.8, 10.2)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = c("100% \n direct", "75%", "50%", "75%", "100% \n absent"),
                     limits = c(0, 1),
                     expand = expansion(add = c(0, 0))) +
  coord_cartesian(clip = "off") +
  labs(y = "Probability of selecting \n direct or absent cause") +
  theme(legend.position = "none",
        axis.text.y = element_text(size = 12),
        axis.title.y = element_blank(),
        strip.background = element_blank(),
        strip.text = element_blank(),
        axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        panel.spacing = unit(.4, "cm"),
        plot.margin = margin(t = 1.2, r = 0.1, b = 0.5, l = 0, "cm"))

5.4.2 Responses Patterns by Age

5.4.2.0.1 Caused vs. Lexical

df.plot = df.exp2_cause_adults %>% 
  mutate(age_whole = 10) %>% 
  bind_rows(df.exp2_cause_children) %>% 
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both causes",
    direct == 0 & absence == 0 ~ "neither cause",
    direct == 1 & absence == 0  ~ "direct cause only",
    direct == 0 & absence == 1  ~ "absent cause only",
  ),
  question = recode(question, "cause" = "caused"),
  age_whole = as.factor(age_whole)) %>% 
  group_by(age_whole, question, response_pattern) %>% 
  summarize(n = n()) %>% 
  ungroup() %>%
  group_by(age_whole, question) %>%
  mutate(percentage = n / sum(n) * 100) %>%
  ungroup() %>% 
  mutate(question = factor(question, levels = c("lexical", "caused")),
         response_pattern = factor(response_pattern, levels = c("absent cause only", "direct cause only", "both causes", "neither cause")))

df.labels = df.plot %>% 
  group_by(age_whole, question) %>%
  summarize(count = sum(n)/2) %>% 
  ungroup() %>% 
  mutate(age = as.character(factor(age_whole,
                                   levels = c(4, 5, 6, 7, 8, 9, 10),
                                   labels = c("4", "5", "6", "7", "8", "9", "Adults"))),
         count = ifelse(question == "lexical" & age_whole == 4, str_c("n=", count), count),
         label = str_c("<span style='font-size:16px;'>", age, "</span>"),
         label = ifelse(question == "lexical", 
                        str_c("<span style='font-size:16px;'>", age, "</span>", "<br>",
                              "<span style='font-size:12px;'>(", count, ")", "</span>"), label),
         question = factor(question, levels = c("lexical", "caused")))

df.plot = df.plot %>% 
  left_join(df.labels %>% 
              select(age_whole, question, label), 
            by = c("age_whole", "question")) %>% 
  mutate(label = factor(label, levels = df.labels$label))

p7 = ggplot(data = df.plot,
            mapping = aes(x = label,
                          y = percentage,
                          group = response_pattern,
                          color = response_pattern,
                          fill = response_pattern)) +
  geom_col(color = "black") +
  facet_wrap(~question, 
             scales = "free_x") + 
  geom_col(color = "black") +
 scale_fill_manual(values = c("absent cause only" = "#E41A1C",
                               "direct cause only" = "#377EB8",
                               "both causes" = "#984EA3",
                               "neither cause" = "#999999")) +
  scale_color_manual(values = c("absent cause only" = "#E41A1C",
                                "direct cause only" = "#377EB8",
                                "both causes" = "#984EA3",
                                "neither cause" = "#999999")) +
  scale_shape_manual(values = c(21, 23)) +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     labels = str_c(seq(0, 100, 25), "%"),
                     expand = expansion(add = c(0, 0))) +
  scale_x_discrete(expand = expansion(add = c(0, 0))) +
  coord_cartesian(clip = "off") +
  labs(y = "Response pattern \n\ probability") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_markdown(),
        axis.title.x = element_blank(),
        strip.background = element_blank(),
        strip.text = element_blank(),
        panel.spacing = unit(.4, "cm"))

5.4.2.0.2 Made vs. Lexical

df.plot = df.exp2_made_adults %>% 
  mutate(age_whole = 10) %>% 
  bind_rows(df.exp2_made_children) %>% 
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both causes",
    direct == 0 & absence == 0 ~ "neither cause",
    direct == 1 & absence == 0  ~ "direct cause only",
    direct == 0 & absence == 1  ~ "absent cause only",
  ),
  age_whole = as.factor(age_whole)) %>% 
  group_by(age_whole, question, response_pattern) %>% 
  summarize(n = n()) %>% 
  ungroup() %>%
  group_by(age_whole, question) %>%
  mutate(percentage = n / sum(n) * 100) %>%
  ungroup() %>% 
  mutate(question = factor(question, levels = c("lexical", "made")),
         response_pattern = factor(response_pattern, levels = c("absent cause only", "direct cause only", "both causes", "neither cause")))

df.labels = df.plot %>% 
  group_by(age_whole, question) %>%
  summarize(count = sum(n)/2) %>% 
  ungroup() %>% 
  mutate(age = as.character(factor(age_whole,
                                   levels = c(4, 5, 6, 7, 8, 9, 10),
                                   labels = c("4", "5", "6", "7", "8", "9", "Adults"))),
         count = ifelse(question == "lexical" & age_whole == 4, str_c("n=", count), count),
         label = str_c("<span style='font-size:16px;'>", age, "</span>"),
         label = ifelse(question == "lexical", 
                        str_c("<span style='font-size:16px;'>", age, "</span>", "<br>",
                              "<span style='font-size:12px;'>(", count, ")", "</span>"), label),
         question = factor(question, levels = c("lexical", "made")))

df.plot = df.plot %>% 
  left_join(df.labels %>% 
              select(age_whole, question, label), 
            by = c("age_whole", "question")) %>% 
  mutate(label = factor(label, levels = df.labels$label))

p8 = ggplot(data = df.plot,
            mapping = aes(x = label,
                          y = percentage,
                          group = response_pattern,
                          color = response_pattern,
                          fill = response_pattern)) +
  geom_col(color = "black") +
  facet_wrap(~question, 
             scales = "free_x") + 
  geom_col(color = "black") +
 scale_fill_manual(values = c("absent cause only" = "#E41A1C",
                               "direct cause only" = "#377EB8",
                               "both causes" = "#984EA3",
                               "neither cause" = "#999999")) +
  scale_color_manual(values = c("absent cause only" = "#E41A1C",
                                "direct cause only" = "#377EB8",
                                "both causes" = "#984EA3",
                                "neither cause" = "#999999")) +
  scale_shape_manual(values = c(21, 23)) +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     labels = str_c(seq(0, 100, 25), "%"),
                     expand = expansion(add = c(0, 0))) +
  scale_x_discrete(expand = expansion(add = c(0, 0))) +
  coord_cartesian(clip = "off") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_blank(),
        axis.text.x = element_markdown(),
        axis.title.x = element_blank(),
        strip.background = element_blank(),
        strip.text = element_blank(),
        panel.spacing = unit(.4, "cm"))

5.4.2.1 Combine Cause and Made Plots

design = "
1122
3344
5555
"

plot = p5 + p6 + p7 + p8 +
  guide_area() + 
  plot_layout(design = design,
              guides = "collect",
              heights = c(1, 1, 0.2)) &
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(face = "bold",
                                size = 24),
        text = element_text(size = 20))

plot

ggsave(plot, filename = "../../figures/experiment2/absence_response_patterns.pdf", height = 8, width = 15)

6 EXPERIMENT 3: ABSENCES

6.1 EXPLANATION

6.1.1 ADULTS

6.1.1.1 DATA

6.1.1.1.1 Read in data

df.exp3_adults = read_csv("../../data/experiment3/adults/exp3_adults.csv")

6.1.1.1.2 Wrangle

# clean responses
df.exp3_adults = df.exp3_adults %>% 
  mutate(condition = str_extract(string = responses,
                                 pattern = "sunburn_simple_response|sunburn_made_response|flood_simple_response|flood_made_response"),
         response = str_extract(string = responses, 
                                pattern = ":(.*)"),
         response = str_remove_all(string = response,
                                   pattern = "\""),
         response = str_remove_all(string = response,
                                   pattern = ":"),
         response = str_remove_all(string = response,
                                   pattern = "\\}"),
         response = str_remove_all(string = response,
                                   pattern = "\\."),
         response = str_remove_all(string = response,
                                   pattern = "\\,"),
         response = str_remove_all(string = response,
                                   pattern = "\\\\n"),
         response = str_to_lower(response)) %>% 
  separate(condition, c("scenario", "question", "extra"), "_") %>% 
  rename(participant = workerid) %>% 
  select(participant, scenario, question, response)

# code responses
df.exp3_adults = df.exp3_adults %>%
  mutate(direct = ifelse(str_detect(response,
                                    "^sun$|sun | sun$"), 1, 0),
         direct = ifelse(str_detect(response,
                                    "^water$|water | water$"), 1, direct),
         direct = ifelse(str_detect(response,
                                    "^rain$|rain | rain$"), 1, direct),
         absence = ifelse(str_detect(response,
                                     "^sunscreen$|sunscreen | sunscreen$"), 1, 0),
         absence = ifelse(str_detect(response,
                                     "^window$|window | window$"), 1, absence),
         absence = ifelse(str_detect(response,
                                     "^latch$|latch | latch$"), 1, absence))

6.1.1.1.3 Demographics

df.exp3_adults_demo = read_csv("../../data/experiment3/adults/exp3_adults_demo.csv") 
df.exp3_adults_demo %>% 
  summarise(age_mean = mean(as.numeric(age), na.rm = TRUE))

## # A tibble: 1 × 1
##   age_mean
##      <dbl>
## 1     33.8

df.exp3_adults_demo %>% 
  summarise(age_sd = sd(as.numeric(age), na.rm = TRUE))

## # A tibble: 1 × 1
##   age_sd
##    <dbl>
## 1   12.3

df.exp3_adults_demo %>% 
  group_by(gender) %>% 
  count()

## # A tibble: 4 × 2
## # Groups:   gender [4]
##   gender         n
##   <chr>      <int>
## 1 Female        10
## 2 Male          16
## 3 Non-binary     3
## 4 <NA>           1

df.exp3_adults_demo %>% 
  group_by(race) %>% 
  count()

## # A tibble: 4 × 2
## # Groups:   race [4]
##   race                       n
##   <chr>                  <int>
## 1 Asian                      6
## 2 Black/African American     3
## 3 White                     20
## 4 <NA>                       1

df.exp3_adults_demo %>% 
  group_by(ethnicity) %>% 
  count()

## # A tibble: 3 × 2
## # Groups:   ethnicity [3]
##   ethnicity        n
##   <chr>        <int>
## 1 Hispanic         1
## 2 Non-Hispanic    27
## 3 <NA>             2

6.1.1.2 STATS

6.1.1.2.1 Bayesian Model

df.model = df.exp3_adults %>% 
  select(-response) %>% 
  pivot_longer(cols = direct:absence,
               names_to = "causal_candidate",
               values_to = "response") %>% 
  mutate(causal_candidate = factor(causal_candidate, levels = c("direct", "absence")), 
         causal_candidate= fct_relevel(causal_candidate, "absence"))

# check levels of causal_candidate
contrasts(df.model$causal_candidate)

##         [,1]
## absence    1
## direct    -1

fit.brm.exp.adults = brm(formula = response ~ 1 + causal_candidate + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/explanation_adult") 
fit.brm.exp.adults

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: response ~ 1 + causal_candidate + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 120) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 30) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.51      0.38     0.02     1.40 1.00     4162     5192
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.02      1.13     0.03     4.15 1.00     4300     5629
## 
## Regression Coefficients:
##                   Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept             0.81      0.84    -1.15     2.48 1.00     5430     4800
## causal_candidate1     2.01      0.34     1.41     2.74 1.00    12971     8135
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

6.1.1.2.2 Follow-up analysis

fit.brm.exp.adults %>% 
  emmeans(spec = pairwise ~ causal_candidate, 
          type = "response")

## $emmeans
##  causal_candidate response lower.HPD upper.HPD
##  absence             0.943     0.786     1.000
##  direct              0.240     0.002     0.508
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast         odds.ratio lower.HPD upper.HPD
##  absence / direct       53.5      9.13       183
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

6.1.1.2.3 Means and Confidence Interval

set.seed(1)

df.exp3_adults %>% 
  select(-response) %>% 
  pivot_longer(cols = direct:absence,
               names_to = "causal_candidate",
               values_to = "response") %>% 
  group_by(causal_candidate) %>%
  summarise(
    mean = mean(response),
    ci_lower = mean_cl_normal(response)$ymin,
    ci_upper = mean_cl_normal(response)$ymax,
  )

## # A tibble: 2 × 4
##   causal_candidate  mean ci_lower ci_upper
##   <chr>            <dbl>    <dbl>    <dbl>
## 1 absence          0.933    0.868    0.998
## 2 direct           0.267    0.151    0.382

6.1.2 CHILDREN

6.1.2.1 DATA

6.1.2.1.1 Read in Data

df.exp3_children = read_csv("../../data/experiment3/children/exp3_children.csv")

6.1.2.1.2 Wrangle

# clean data
df.exp3_children = df.exp3_children %>% 
  clean_names() %>% 
  rename(response_1 = full_response_11,
         response_2 = full_response_13) %>% 
  select(participant_number, rounded_age, scenario_1, scenario_2, response_1, response_2, gender, lan_spoken) %>%
  pivot_longer(
    cols = c(-participant_number, -rounded_age, -gender, -lan_spoken),
    names_to = c(".value"),
    names_pattern = "(^scenario|^response)"
  ) %>% 
  mutate(response = str_remove_all(string = response,
                                   pattern = "\""),
         response = str_remove_all(string = response,
                                   pattern = ":"),
         response = str_remove_all(string = response,
                                   pattern = "\\("),
         response = str_remove_all(string = response,
                                   pattern = "\\)"),
         response = str_remove_all(string = response,
                                   pattern = "\\}"),
         response = str_remove_all(string = response,
                                   pattern = "\\."),
         response = str_remove_all(string = response,
                                   pattern = "\\,"),
         response = str_remove_all(string = response,
                                   pattern = "\\\\n"),
         response = str_to_lower(response))

# code responses
df.exp3_children = df.exp3_children %>%
  mutate(direct = ifelse(str_detect(response,
                                    "^sun$|sun | sun$"), 1, 0),
         direct = ifelse(str_detect(response,
                                    "^water$|water | water$"), 1, direct),
         direct = ifelse(str_detect(response,
                                    "^rain$|rain | rain$"), 1, direct),
         absence = ifelse(str_detect(response,
                                     "^sunscreen$|sunscreen | sunscreen$"), 1, 0),
         absence = ifelse(str_detect(response,
                                     "^window$|window | window$"), 1, absence),
         absence = ifelse(str_detect(response,
                                     "^windows$|windows | windows$"), 1, absence),
         absence = ifelse(str_detect(response,
                                     "^latch$|latch | latch$"), 1, absence),
         age = round(rounded_age / 365, digits = 2)) %>% 
  rename(participant = participant_number)

df.exp3_children %>% 
  summarise(n_distinct(participant))

## # A tibble: 1 × 1
##   `n_distinct(participant)`
##                       <int>
## 1                        91

6.1.2.1.3 Demographics

 # gender
df.exp3_children %>%
  group_by(gender) %>%
  summarise(n = n_distinct(participant)) %>%
  drop_na() %>% 
  print()

## # A tibble: 2 × 2
##   gender     n
##   <chr>  <int>
## 1 female    53
## 2 male      38

 # language spoken
df.exp3_children %>%
    mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
    group_by(en_occurrence) %>%
    summarise(n = n_distinct(participant)) %>%
    print()

## # A tibble: 2 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en               89
## 2 <NA>              2

6.1.2.2 STATS

6.1.2.2.1 Bayesian Model

df.model = df.exp3_children %>% 
  select(-response) %>% 
  pivot_longer(cols = direct:absence,
               names_to = "causal_candidate",
               values_to = "response") %>% 
  mutate(causal_candidate = factor(causal_candidate, levels = c("direct", "absence")),
         causal_candidate= fct_relevel(causal_candidate, "absence"))

# check levels of causal_candidate
contrasts(df.model$causal_candidate)

##         [,1]
## absence    1
## direct    -1

# removed random intercepts for scenario to avoid divergent transition errors
fit.brm.exp.children = brm(formula = response ~ 1 + causal_candidate + (1 | participant), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/explanation_children") 

fit.brm.exp.children

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: response ~ 1 + causal_candidate + (1 | participant) 
##    Data: df.model (Number of observations: 360) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 90) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.14      0.10     0.01     0.39 1.00     8586     5465
## 
## Regression Coefficients:
##                   Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept            -0.24      0.13    -0.49     0.01 1.00    25781     8755
## causal_candidate1     1.24      0.13     0.99     1.49 1.00    25146     7929
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

6.1.2.2.2 Follow-up analysis

fit.brm.exp.children %>% 
  emmeans(spec = pairwise ~ causal_candidate, 
          type = "response")

## $emmeans
##  causal_candidate response lower.HPD upper.HPD
##  absence             0.730     0.664     0.791
##  direct              0.187     0.131     0.245
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast         odds.ratio lower.HPD upper.HPD
##  absence / direct       11.8      6.73      18.7
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

6.1.2.2.3 Means and Confidence Interval

set.seed(1)

df.exp3_children %>% 
  select(-response) %>% 
  drop_na() %>% 
  pivot_longer(cols = direct:absence,
               names_to = "causal_candidate",
               values_to = "response") %>% 
  group_by(causal_candidate) %>%
  summarise(
    mean = mean(response),
    ci_lower = mean_cl_normal(response)$ymin,
    ci_upper = mean_cl_normal(response)$ymax,
  )

## # A tibble: 2 × 4
##   causal_candidate  mean ci_lower ci_upper
##   <chr>            <dbl>    <dbl>    <dbl>
## 1 absence          0.725    0.658    0.791
## 2 direct           0.191    0.133    0.249

6.1.3 PLOTS

6.1.3.1 Responses Patterns by Age

df.plot = df.exp3_adults %>% 
  mutate(age_whole = 7) %>% 
  bind_rows(df.exp3_children %>% 
               mutate(age_whole = as.integer(age))) %>% 
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both causes",
    direct == 0 & absence == 0 ~ "neither cause",
    direct == 1 & absence == 0  ~ "direct cause only",
    direct == 0 & absence == 1  ~ "absent cause only",
  ),
  question = recode(question, "cause" = "caused"),
  age_whole = as.factor(age_whole)) %>% 
  group_by(age_whole, response_pattern) %>% 
  summarize(n = n()) %>% 
  ungroup() %>%
  group_by(age_whole) %>%
  mutate(percentage = n / sum(n) * 100) %>%
  ungroup() %>% 
  mutate(response_pattern = factor(response_pattern, levels = c("absent cause only", "direct cause only", "both causes", "neither cause"))) %>% 
  filter(!is.na(age_whole))

custom_labels = function(breaks) {
  counts = df.exp3_adults %>% 
    mutate(age_whole = rep(7)) %>% 
    bind_rows(df.exp3_children %>% 
                mutate(age_whole = as.integer(age))) %>% 
    mutate(age_whole = factor(age_whole,
                              levels = c(4, 5, 6, 7),
                              labels = c("4", "5", "6", "Adults"))) %>%
    filter(!is.na(age_whole)) %>% 
    group_by(age_whole) %>%
    summarize(count = n_distinct(participant)) %>%
    pull(count)
  labels = c("4", "5", "6", "Adults")
  labels_with_size = paste0("<span style='font-size:16px;'>", labels, "</span>")
  counts_label1 = paste0("<span style='font-size:12px;'>(n=", counts[1], ")</span>")
  counts_label2 = paste0("<span style='font-size:12px;'>(", counts[-1], ")</span>")
  
  combined_labels = c(paste0(labels_with_size[1], "<br>", counts_label1), 
                       paste0(labels_with_size[-1], "<br>", counts_label2))
  return(combined_labels)
}

ggplot(data = df.plot,
       mapping = aes(x = age_whole,
                     y = percentage,
                     group = response_pattern,
                     color = response_pattern,
                     fill = response_pattern)) +
  geom_col(color = "black") +
    scale_fill_manual(values = c("absent cause only" = "#E41A1C",
                               "direct cause only" = "#377EB8",
                               "both causes" = "#984EA3",
                               "neither cause" = "#999999")) +
  scale_color_manual(values = c("absent cause only" = "#E41A1C",
                                "direct cause only" = "#377EB8",
                                "both causes" = "#984EA3",
                                "neither cause" = "#999999")) +
  scale_shape_manual(values = c(21, 23)) +
   scale_y_continuous(breaks = seq(0, 100, 25),
                     labels = str_c(seq(0, 100, 25), "%"),
                     expand = expansion(add = c(0, 0))) +
 scale_x_discrete(breaks = 4:7,
                  labels = custom_labels(labels),
                  expand = expansion(add = c(0, 0))) +
  coord_cartesian(clip = "off") +
  labs(y = "Response pattern \n\ probability") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_markdown(),
        axis.title.x = element_blank(),
        strip.background = element_blank(),
        strip.text = element_blank())

## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.

ggsave("../../figures/experiment3/explanation_response_patterns.pdf", height = 4, width = 8)

## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.

7 APPENDIX

8 PREREGISTERED ANALYSES

9 EXPERIMENT 1: CHAINS

9.1 CAUSED VS. LEXICAL (BREAK & CRACK)

9.1.1 ADULTS

9.1.1.1 STATS

9.1.1.1.1 Bayesian Models

9.1.1.1.1.1 Distal

df.model = df.exp1_cause_adults %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "cause"))

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## caused     1
## lexical   -1

# removed random intercepts for scenario to avoid divergent transition errors
fit.brm.cause.adult.distal = brm(formula = distal ~ 1 + question + (1 | participant), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/cause_adult_distal") 

fit.brm.cause.adult.distal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: distal ~ 1 + question + (1 | participant) 
##    Data: df.model (Number of observations: 240) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 60) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.76      0.40     1.04     2.62 1.00     3578     5451
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     0.22      0.29    -0.36     0.80 1.00     5149     6325
## question1     1.63      0.25     1.18     2.14 1.00     6788     7561
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

9.1.1.1.1.2 Proximal

fit.brm.cause.adult.proximal = brm(formula = proximal ~ 1 + question + (1 | participant) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      control = list(adapt_delta = .999999),
                      file = "cache/cause_adult_proximal") 

fit.brm.cause.adult.proximal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: proximal ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 240) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 60) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.08      0.46     1.28     3.07 1.00     3443     6048
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.88      1.38     0.45     5.48 1.00     6987     8112
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     0.00      1.22    -2.60     2.52 1.00     6429     6248
## question1    -1.73      0.28    -2.32    -1.23 1.00     7894     8405
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

9.1.2 CHILDREN

9.1.2.1 STATS

9.1.2.1.1 Bayesian Models

9.1.2.1.1.1 Distal

df.model = df.exp1_cause_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "caused"),
         age = age - mean(age))

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## caused     1
## lexical   -1

fit.brm.cause.children.distal = brm(formula = distal ~ 1 + question + (1 | participant) + (1 | scenario),
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/cause_children_distal") 

fit.brm.cause.children.distal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: distal ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.07      0.21     0.67     1.50 1.00     2760     4177
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.14      1.18     0.09     4.35 1.00     3263     4606
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept    -0.40      0.92    -2.28     1.63 1.00     3577     3496
## question1     1.26      0.13     1.02     1.52 1.00     6373     8234
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

fit.brm.cause.children.distal.age = brm(formula = distal ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/cause_children_distal_age") 

fit.brm.cause.children.distal.age

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: distal ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.09      0.21     0.68     1.51 1.00     3219     4455
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.11      1.14     0.08     4.11 1.00     3828     3926
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -0.36      0.84    -2.03     1.55 1.00     5623     5830
## question1         1.28      0.13     1.04     1.53 1.00     7618     8223
## age               0.11      0.09    -0.07     0.29 1.00     8230     8377
## question1:age     0.12      0.07    -0.01     0.27 1.00    15153     9506
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

9.1.2.1.1.2 Proximal

# removed random intercepts for scenario to avoid divergent transition errors
fit.brm.cause.children.proximal = brm(formula = proximal ~ 1 + question + (1 | participant),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      control = list(adapt_delta = .999999),
                      file = "cache/cause_children_proximal") 

fit.brm.cause.children.proximal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: proximal ~ 1 + question + (1 | participant) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.25      0.21     0.85     1.69 1.00     3341     5390
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     0.40      0.15     0.11     0.70 1.00     6292     7575
## question1    -1.33      0.13    -1.59    -1.08 1.00     6915     8651
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

fit.brm.cause.children.proximal.age = brm(formula = proximal ~ 1 + question*age + (1 | participant) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      control = list(adapt_delta = .999999),
                      file = "cache/cause_children_proximal_age") 

fit.brm.cause.children.proximal.age

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: proximal ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.39      0.23     0.97     1.87 1.00     3602     6098
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.41      1.24     0.24     4.65 1.00     5224     6721
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         0.35      0.99    -1.79     2.38 1.00     4679     5229
## question1        -1.42      0.14    -1.71    -1.15 1.00     6628     7242
## age              -0.02      0.11    -0.23     0.19 1.00     5898     7203
## question1:age    -0.15      0.08    -0.31    -0.00 1.00    11915     8954
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

9.2 MADE VS. LEXICAL (BREAK & CRACK)

9.2.1 ADULTS

9.2.1.1 STATS

9.2.1.1.1 Bayesian Models

9.2.1.1.1.1 Distal

df.model = df.exp1_made_adults %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "made"))

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## made       1
## lexical   -1

fit.brm.made.adult.distal = brm(formula = distal ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/made_adult_distal") 

fit.brm.made.adult.distal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: distal ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 240) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 60) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.95      0.42     1.22     2.86 1.00     3940     7021
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.85      1.37     0.44     5.26 1.00     6219     7863
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept    -0.54      1.23    -3.02     2.02 1.00     5955     6100
## question1     0.91      0.20     0.54     1.31 1.00    10580     8481
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

9.2.1.1.1.2 Proximal

fit.brm.made.adult.proximal = brm(formula = proximal ~ 1 + question + (1 | participant) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      control = list(adapt_delta = .999999),
                      file = "cache/made_adult_proximal") 

fit.brm.made.adult.proximal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: proximal ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 240) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 60) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.62      0.37     0.97     2.41 1.00     3907     6115
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.78      1.33     0.39     5.34 1.00     5501     7218
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     0.54      1.16    -2.03     2.81 1.00     4566     5201
## question1    -0.73      0.18    -1.10    -0.39 1.00    12157     8960
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

9.2.2 CHILDREN

9.2.2.1 STATS

9.2.2.1.1 Bayesian Models

9.2.2.1.1.1 Distal

df.model = df.exp1_made_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "made"),
         age = age - mean(age))

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## made       1
## lexical   -1

fit.brm.made.children.distal = brm(formula = distal ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/made_children_distal") 

fit.brm.made.children.distal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: distal ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.35      0.22     0.95     1.81 1.00     4048     7725
## 
## ~scenario (Number of levels: 4) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.55      1.26     0.28     4.63 1.00     4087     6573
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept    -0.43      0.96    -2.04     1.85 1.00     5230     5617
## question1     0.76      0.11     0.54     0.98 1.00    12648     9107
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

fit.brm.made.children.distal.age = brm(formula = distal ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/made_children_distal_age") 

fit.brm.made.children.distal.age

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: distal ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.47      0.23     1.03     1.94 1.00     3842     6534
## 
## ~scenario (Number of levels: 4) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.64      1.30     0.29     4.94 1.00     3842     6035
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -0.43      0.98    -2.07     1.94 1.00     4498     5955
## question1         0.79      0.12     0.57     1.02 1.00    11171     9241
## age               0.03      0.10    -0.16     0.22 1.00     6026     7788
## question1:age     0.29      0.07     0.16     0.43 1.00    13090     9093
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

9.2.2.1.1.2 Proximal

fit.brm.proximal = brm(formula = proximal ~ 1 + question + (1 | participant) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      control = list(adapt_delta = .999999),
                      file = "cache/made_children_proximal") 

fit.brm.proximal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: proximal ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.36      0.22     0.95     1.82 1.00     3488     6169
## 
## ~scenario (Number of levels: 4) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.48      1.18     0.27     4.48 1.00     4167     7255
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     0.43      0.91    -1.76     1.96 1.00     5196     5947
## question1    -0.73      0.11    -0.96    -0.52 1.00    11986     8483
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

fit.brm.proximal.age = brm(formula = proximal ~ 1 + question*age + (1 | participant) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      control = list(adapt_delta = .999999),
                      file = "cache/made_children_proximal_age") 

fit.brm.proximal.age

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: proximal ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.45      0.23     1.03     1.92 1.00     3784     6649
## 
## ~scenario (Number of levels: 4) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.61      1.25     0.29     4.85 1.00     4998     7797
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         0.44      1.00    -1.98     2.07 1.00     7395     6172
## question1        -0.75      0.12    -0.98    -0.53 1.00    17109     8905
## age              -0.09      0.10    -0.29     0.09 1.00     9622    10067
## question1:age    -0.26      0.07    -0.40    -0.13 1.00    20926     8836
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

9.3 CAUSED VS. MADE

9.3.1 ADULTS

9.3.1.1 DATA

9.3.1.1.1 Wrangle

df.exp1_cause_made_adults = df.exp1_cause_adults %>% 
  filter(question == "caused") %>% 
  select(-participant) %>% 
  bind_rows(df.exp1_made_adults %>% 
  filter(question == "made") %>% 
  select(-participant)) %>% 
  mutate(participant = rep(1:(n()/2), each = 2))

9.3.1.2 STATS

9.3.1.2.1 Bayesian Models

9.3.1.2.1.1 Distal

df.model = df.exp1_cause_made_adults %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "made"))

# Check contrast values
 contrasts(df.model$question)

##        [,1]
## made      1
## caused   -1

fit.brm.cause.made.adult.distal = brm(formula = distal ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/cause_made_adult_distal") 

fit.brm.cause.made.adult.distal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: distal ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 240) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 120) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.61      0.64     1.52     4.03 1.00     3070     4464
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.92      1.41     0.45     5.61 1.00     6385     8576
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -0.05      1.32    -2.94     2.51 1.00     5924     6799
## questioncause     2.23      0.76     0.90     3.91 1.00     5355     6747
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

9.3.1.2.1.2 Proximal

fit.brm.cause.made.adult.proximal = brm(formula = proximal ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/cause_made_adult_proximal") 

fit.brm.cause.made.adult.proximal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: proximal ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 240) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 120) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.10      0.54     1.14     3.28 1.00     2757     4417
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.79      1.39     0.39     5.36 1.00     6007     8006
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         0.20      1.21    -2.21     2.85 1.00     5915     5857
## questioncause    -1.78      0.61    -3.10    -0.72 1.00     5550     6020
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

9.3.2 CHILDREN

9.3.2.1 DATA

9.3.2.1.1 Wrangle

df.exp1_cause_made_children = df.exp1_cause_children %>% 
  filter(question == "caused") %>% 
  select(-caused_question_order, participant) %>% 
  bind_rows(df.exp1_made_children %>% 
  filter(question == "made") %>% 
  select(-made_question_order, participant)) %>% 
  mutate(participant = rep(1:(n()/2), each = 2))

9.3.2.2 STATS

9.3.2.2.1 Bayesian Models

9.3.2.2.1.1 Distal

df.model = df.exp1_cause_made_children %>% 
  mutate(question = fct_relevel(question, "made"),
         age = age - mean(age))

# Check contrast values
 contrasts(df.model$question)

##        [,1]
## made      1
## caused   -1

fit.brm.cause.made.children.distal = brm(formula = distal ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/cause_made_children_distal.rds") 

fit.brm.cause.made.children.distal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: distal ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 528) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 264) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.66      0.30     1.12     2.27 1.00     2995     4707
## 
## ~scenario (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.91      0.90     0.06     3.28 1.00     3438     3940
## 
## Regression Coefficients:
##                    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept             -0.11      0.70    -1.55     1.37 1.00     4756     5013
## questioncaused         1.14      0.33     0.53     1.83 1.00     7332     8035
## age                    0.39      0.13     0.14     0.65 1.00     6166     7222
## questioncaused:age    -0.09      0.19    -0.47     0.27 1.00     6853     8612
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

9.3.2.2.1.2 Proximal

fit.brm.cause.made.children.proximal = brm(formula = proximal ~ 1 + question*age + (1 | participant) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      control = list(adapt_delta = .999999),
                      file = "cache/cause_made_children_proximal") 

fit.brm.cause.made.children.proximal

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: proximal ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 528) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 264) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.93      0.32     1.35     2.60 1.00     3199     6161
## 
## ~scenario (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.08      0.94     0.14     3.62 1.00     4735     5174
## 
## Regression Coefficients:
##                    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              0.15      0.78    -1.53     1.74 1.00     6771     5868
## questioncaused        -1.40      0.37    -2.17    -0.72 1.00     7565     7429
## age                   -0.44      0.15    -0.74    -0.15 1.00     6506     6823
## questioncaused:age     0.21      0.22    -0.22     0.64 1.00     7584     7812
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

9.4 PLOTS

9.4.1 Caused vs. Lexical

set.seed(1)

df.model = df.exp1_cause_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "caused"))

fit.brm.distal.cause.age2 = brm(formula = distal ~ 1 + question*age + (1 | participant) + (1 | scenario),
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .9999),
                     file = "cache/fit.brm.distal.cause.age2")

fit.brm.proximal.cause.age2 = brm(formula = proximal ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 6000,
                     warmup = 1000,
                     control = list(adapt_delta = .9999),
                     file = "cache/fit.brm.proximal.cause.age2")

df.data = expand_grid(age = seq(4, 9, 0.1),
                      question = c("caused", "lexical"))

df.prediction = fit.brm.distal.cause.age2 %>% 
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>% 
  as_tibble() %>% 
  bind_cols(df.data) %>% 
  mutate(type = "distal") %>% 
  bind_rows(fit.brm.proximal.cause.age2 %>% 
              fitted(newdata = df.data,
                     re_formula = NA,
                     probs = c(0.1, 0.9)) %>% 
              as_tibble() %>% 
              bind_cols(df.data) %>% 
              mutate(type = "proximal")) %>% 
  clean_names()

df.adults = df.exp1_cause_adults %>% 
  select(-response) %>% 
  mutate(age_whole = rep(10)) %>% 
  pivot_longer(cols = (proximal:distal),
               names_to = "type",
               values_to = "response")

df.children = df.exp1_cause_children %>% 
  select(-response) %>% 
  pivot_longer(cols = distal:proximal,
               names_to = "type",
               values_to = "response")

df.means = df.children %>% 
  bind_rows(df.adults) %>% 
  mutate(age_whole = factor(age_whole, 
                            levels = c(4, 5, 6, 7, 8, 9, 10), 
                            labels = c("4", "5", "6", "7", "8", "9", "adults")),
         question = recode(question, "cause" = "caused")) %>% 
  group_by(age_whole, question, type) %>% 
  summarize(
    response = Hmisc::smean.cl.boot(response)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>% 
  mutate(age_whole = fct_recode(age_whole, "10" = "adults"),
         age_whole = as.numeric(as.character(age_whole)))

df.means = df.means %>%
  mutate(question = factor(question, levels = c("caused", "lexical")))

custom_labels = as_labeller(c("distal" = "Distal Cause", "proximal" = "Proximal Cause"))

p3 = ggplot(mapping = aes(x = age, 
                     y = response,
                     color = question)) + 
  geom_hline(yintercept=0.50, linetype="dashed", color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90,
                            color = question,
                            fill = question),
              stat = "identity",
              alpha = 0.2,
              show.legend = F) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = age_whole,
                                y = response,
                                ymin = low,
                                ymax = high,
                                fill = question,
                                shape = question),
                  color = "black",
                  size = 0.5,
                  show.legend = TRUE,
                  alpha = 0.8,
                  position = position_dodge(width = 0.1)) +
    facet_wrap(~type,
             labeller = custom_labels) + 
  scale_fill_manual(values = c("caused" = "#FF7F00", "lexical" = "#4DAF4A"), breaks=c("caused", "lexical")) +
  scale_color_manual(values = c("caused" = "#FF7F00", "lexical" = "#4DAF4A"), breaks=c("caused", "lexical")) +
  scale_shape_manual(values = c(21, 23)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                      labels = str_c(seq(0, 1, 0.25) * 100, "%")) +
  scale_x_continuous(breaks = 4:10,
                     labels = c("4", "5", "6", "7", "8", "9", "Adults")) +
  labs(y = "Probability of selecting \n causal candidate") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        text = element_text(size = 18))

9.4.2 Made vs. Lexical

df.model = df.exp1_made_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "made"))

fit.brm.distal.age2.made = brm(formula = distal ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .9999),
                     file = "cache/fit.brm.distal.age2.made")

fit.brm.proximal.age2.made = brm(formula = proximal ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .9999),
                     file = "cache/fit.brm.proximal.age2.made")

df.data = expand_grid(age = seq(4, 9, 0.1),
                      question = c("made", "lexical"))

df.prediction = fit.brm.distal.age2.made %>% 
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>% 
  as_tibble() %>% 
  bind_cols(df.data) %>% 
  mutate(type = "distal") %>% 
  bind_rows(fit.brm.proximal.age2.made %>% 
              fitted(newdata = df.data,
                     re_formula = NA,
                     probs = c(0.1, 0.9)) %>% 
              as_tibble() %>% 
              bind_cols(df.data) %>% 
              mutate(type = "proximal")) %>% 
  clean_names()

df.adults = df.exp1_made_adults %>% 
  select(-response) %>% 
  mutate(age_whole = rep(10)) %>% 
  pivot_longer(cols = (proximal:distal),
               names_to = "type",
               values_to = "response")

df.children = df.exp1_made_children %>% 
  select(-response) %>% 
  pivot_longer(cols = distal:proximal,
               names_to = "type",
               values_to = "response")

df.means = df.children %>% 
  bind_rows(df.adults) %>% 
  mutate(age_whole = factor(age_whole, 
                            levels = c(4, 5, 6, 7, 8, 9, 10), 
                            labels = c("4", "5", "6", "7", "8", "9", "adults"))) %>% 
  group_by(age_whole, question, type) %>% 
  summarize(
    response = Hmisc::smean.cl.boot(response)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>% 
  mutate(age_whole = fct_recode(age_whole, "10" = "adults"),
         age_whole = as.numeric(as.character(age_whole)))

custom_labels = as_labeller(c("distal" = "Distal Cause", "proximal" = "Proximal Cause"))

df.means = df.means %>%
  mutate(question = factor(question, levels = c("made", "lexical")))

p4 = ggplot(mapping = aes(x = age, 
                     y = response,
                     color = question)) + 
  geom_hline(yintercept=0.50, linetype="dashed", color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90,
                            color = question,
                            fill = question),
              stat = "identity",
              alpha = 0.2,
              show.legend = F) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = age_whole,
                                y = response,
                                ymin = low,
                                ymax = high,
                                fill = question,
                                shape = question),
                  color = "black",
                  size = 0.5,
                  show.legend = TRUE,
                  alpha = 0.8,
                  position = position_dodge(width = 0.3)) +
  facet_wrap(~type,
             labeller = custom_labels) + 
  scale_fill_manual(values = c("made" = "#FFEA00", "lexical" = "#4DAF4A"), breaks=c("made", "lexical")) +
  scale_color_manual(values = c("made" = "#FFEA00", "lexical" = "#4DAF4A"), breaks=c("made", "lexical")) +
  scale_shape_manual(values = c(21, 23), breaks=c("made", "lexical")) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = str_c(seq(0, 1, 0.25) * 100, "%"),
                     limits = c(0, 1)) +
  scale_x_continuous(breaks = 4:10,
                     labels = c("4", "5", "6", "7", "8", "9", "Adults")) +
  labs(y = "Probability of selecting \n causal candidate") + 
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        text = element_text(size = 18))

9.4.3 Combine Cause and Made Plots

plot = p3 + p4 + 
plot_layout(nrow = 1) &
plot_annotation(tag_levels = "A") &
theme(plot.tag = element_text(face = "bold"))

ggsave(plot, filename = "../../figures/experiment1/chain_all.pdf", height = 5, width = 18)

plot

10 EXPERIMENT 2: ABSENCES

10.1 CAUSED VS. LEXICAL (BURN & FLOOD)

10.1.1 ADULTS

10.1.1.1 STATS

10.1.1.1.1 Bayesian Models

10.1.1.1.1.1 Direct

df.model = df.exp2_cause_adults  %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "cause"))

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## lexical    1
## caused    -1

fit.brm.direct = brm(formula = direct ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/cause_adult_direct") 
fit.brm.direct

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: direct ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 240) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 60) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.44      0.28     0.02     1.06 1.00     2884     4815
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.91      1.09     0.02     3.83 1.00     3936     5789
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     0.94      0.80    -0.87     2.50 1.00     5324     3725
## question1    -0.98      0.18    -1.33    -0.64 1.00    13197     7690
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

10.1.1.1.1.2 Absent

fit.brm.absent = brm(formula = absence ~ 1 + question + (1 | participant) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      control = list(adapt_delta = .999999),
                      file = "cache/cause_adult_absent") 

fit.brm.absent

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: absence ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 240) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 60) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.78      0.40     0.06     1.58 1.00     2147     3015
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.84      1.35     0.44     5.42 1.00     6388     8208
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept    -0.69      1.21    -3.08     1.96 1.00     6229     6382
## question1     1.80      0.26     1.34     2.35 1.00     5706     6441
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

10.1.2 CHILDREN

10.1.2.1 STATS

10.1.2.1.1 Bayesian Models

10.1.2.1.1.1 Direct

df.model = df.exp2_cause_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "caused"),
         age = age - mean(age))

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## caused     1
## lexical   -1

fit.brm.direct = brm(formula = direct ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/cause_children_direct") 
fit.brm.direct

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: direct ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.10      0.23     0.65     1.57 1.00     3018     3252
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.05      1.17     0.03     4.08 1.00     3414     4522
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         1.50      0.89    -0.71     3.02 1.00     4457     3880
## question1        -0.71      0.13    -0.96    -0.47 1.00    12601     8496
## age               0.11      0.10    -0.08     0.30 1.00     8481     8384
## question1:age    -0.24      0.08    -0.40    -0.10 1.00    12680     8770
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

10.1.2.1.1.2 Absent

# removed random intercepts for scenario to avoid divergent transition errors
fit.brm.absent = brm(formula = absence ~ 1 + question*age + (1 | participant),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      control = list(adapt_delta = .999999),
                      file = "cache/cause_children_absent") 

fit.brm.absent

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: absence ~ 1 + question * age + (1 | participant) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.39      0.24     0.96     1.90 1.00     2957     5775
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -1.65      0.20    -2.07    -1.28 1.00     3844     5354
## question1         0.86      0.13     0.61     1.12 1.00     7800     7943
## age               0.30      0.10     0.10     0.52 1.00     5815     6704
## question1:age     0.32      0.08     0.17     0.47 1.00    10485     8171
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

10.2 MADE VS. LEXICAL (BURN & FLOOD)

10.2.1 ADULTS

10.2.1.0.1 Bayesian Models

10.2.1.0.1.1 Direct

df.model = df.exp2_made_adults %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "made"))

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## made       1
## lexical   -1

fit.brm.direct = brm(formula = direct ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/made_adult_direct") 
fit.brm.direct

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: direct ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 240) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 60) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.12      0.35     0.45     1.84 1.00     3249     2870
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.01      1.20     0.02     4.23 1.00     3412     5142
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     1.12      0.90    -0.99     2.80 1.00     3834     3692
## question1    -0.80      0.18    -1.16    -0.45 1.00    12552     8890
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

10.2.1.0.1.2 Absent

fit.brm.absent = brm(formula = absence ~ 1 + question + (1 | participant) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      control = list(adapt_delta = .999999),
                      file = "cache/made_adult_absent") 

fit.brm.absent

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: absence ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 240) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 60) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.70      0.41     0.97     2.59 1.00     3504     6833
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.81      1.42     0.40     5.56 1.00     6267     7502
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept    -0.44      1.19    -2.78     2.13 1.00     6362     6473
## question1     1.69      0.27     1.20     2.25 1.00     6807     7150
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

10.2.2 CHILDREN

10.2.2.1 STATS

10.2.2.1.1 Bayesian Models

10.2.2.1.1.1 Direct

df.model = df.exp2_made_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "made"),
         age = age - mean(age)) 

# Check contrast values
 contrasts(df.model$question)

##         [,1]
## made       1
## lexical   -1

fit.brm.direct = brm(formula = direct ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/made_children_direct") 
fit.brm.direct

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: direct ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.47      0.25     1.02     1.99 1.00     4397     6350
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.84      1.12     0.01     3.93 1.00     3669     6737
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         1.74      0.79    -0.30     3.07 1.00     5064     4015
## question1        -0.54      0.13    -0.79    -0.29 1.00    16551     8622
## age               0.18      0.11    -0.03     0.40 1.00     8405     9146
## question1:age    -0.29      0.08    -0.45    -0.13 1.00    17219     9223
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

10.2.2.1.1.2 Absent

# removed random intercepts for scenario to avoid divergent transition errors
fit.brm.absent = brm(formula = absence ~ 1 + question*age + (1 | participant),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 4000,
                      warmup = 1000,
                      control = list(adapt_delta = .999999),
                      file = "cache/made_children_absent") 


fit.brm.absent

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: absence ~ 1 + question * age + (1 | participant) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.77      0.30     1.24     2.41 1.00     3330     5599
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -2.27      0.26    -2.83    -1.78 1.00     4341     6802
## question1         0.73      0.15     0.45     1.02 1.00    12110     8581
## age               0.35      0.12     0.11     0.61 1.00     5983     7423
## question1:age     0.08      0.08    -0.07     0.24 1.00    16124     9387
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

10.3 CAUSED VS. MADE

10.3.1 ADULTS

10.3.1.1 DATA

10.3.1.1.1 Wrangle

df.exp2_cause_made_adults = df.exp2_cause_adults %>% 
  filter(question == "caused") %>% 
  select(-participant) %>% 
  bind_rows(df.exp2_made_adults %>% 
  filter(question == "made") %>% 
  select(-participant)) %>% 
  mutate(participant = rep(1:(n()/2), each = 2))

10.3.1.2 STATS

10.3.1.2.1 Bayesian Models

10.3.1.2.1.1 Direct

df.model = df.exp2_cause_made_adults %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "made"))

# Check contrast values
 contrasts(df.model$question)

##        [,1]
## made      1
## caused   -1

fit.brm.cause.made.adult.direct = brm(formula = direct ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/cause_made_adult_direct") 

fit.brm.cause.made.adult.direct

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: direct ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 240) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 120) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.86      0.39     0.09     1.65 1.00     1785     2322
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.87      1.08     0.02     3.90 1.00     3784     6568
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         0.42      0.76    -1.31     1.99 1.00     5823     4428
## questioncause    -0.40      0.33    -1.08     0.24 1.00    12691     8557
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

10.3.1.2.1.2 Absent

fit.brm.cause.made.adult.absent = brm(formula = absence ~ 1 + question + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/cause_made_adult_absence") 

fit.brm.cause.made.adult.absent

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: absence ~ 1 + question + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 240) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 120) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.49      0.52     0.47     2.56 1.00     1625     1461
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.16      1.42     0.62     5.99 1.00     7552     8311
## 
## Regression Coefficients:
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         0.82      1.34    -2.05     3.44 1.00     6489     6740
## questioncause     0.05      0.46    -0.86     0.98 1.00     9075     8208
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

10.3.2 CHILDREN

10.3.2.1 DATA

10.3.2.1.1 Wrangle

df.exp2_cause_made_children = df.exp2_cause_children %>% 
  filter(question == "caused") %>% 
  select(-cause_question_order, participant) %>% 
  bind_rows(df.exp2_made_children %>% 
  filter(question == "made") %>% 
  select(-made_question_order, participant)) %>% 
  mutate(participant = rep(1:(n()/2), each = 2))

10.3.2.2 STATS

10.3.2.2.1 Bayesian Models

10.3.2.2.1.1 Direct

df.model = df.exp2_cause_made_children %>% 
  mutate(question = fct_relevel(question, "made"),
         age = age - mean(age))

# Check contrast values
 contrasts(df.model$question)

##        [,1]
## made      1
## caused   -1

fit.brm.cause.made.children.direct = brm(formula = direct ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/cause_made_children_direct.rds") 

fit.brm.cause.made.children.direct

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: direct ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 300) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.20      0.27     0.68     1.74 1.00     2101     3362
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.83      1.21     0.01     4.10 1.00     2057     1814
## 
## Regression Coefficients:
##                    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              1.12      0.90    -0.99     2.49 1.00     3081     1376
## questioncaused        -0.25      0.25    -0.75     0.24 1.00    10711     8959
## age                   -0.10      0.11    -0.31     0.11 1.00     6676     3990
## questioncaused:age    -0.04      0.15    -0.34     0.25 1.00     7290     8063
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

10.3.2.2.1.2 Absent

fit.brm.cause.made.children.absent = brm(formula = absence ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999),
                     file = "cache/cause_made_children_absence.rds") 

fit.brm.cause.made.children.absent

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: absence ~ 1 + question * age + (1 | participant) + (1 | scenario) 
##    Data: df.model (Number of observations: 600) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 300) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.97      0.32     1.39     2.65 1.00     3339     6295
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.91      1.18     0.02     4.05 1.00     3863     6629
## 
## Regression Coefficients:
##                    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept             -1.52      0.82    -3.05     0.43 1.00     5003     4412
## questioncaused         0.65      0.34    -0.01     1.33 1.00     8443     8342
## age                    0.45      0.15     0.18     0.76 1.00     6743     7189
## questioncaused:age     0.29      0.20    -0.10     0.70 1.00     6932     8676
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

10.4 PLOTS

10.4.1 Caused vs. Lexical

set.seed(1)

df.model = df.exp2_cause_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "caused"))

# removed random intercepts for scenario to avoid divergent transition errors
fit.brm.present.cause.age2 = brm(formula = direct ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 6000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999,
                                    max_treedepth = 15),
                     file = "cache/fit.brm.present.cause.age2") 

fit.brm.absence.cause.age2 = brm(formula = absence ~ 1 + question*age + (1 | participant) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 6000,
                      warmup = 1000,
                      control = list(adapt_delta = .999999,
                                     max_treedepth = 15),
                      file = "cache/fit.brm.absence.cause.age2") 

df.data = expand_grid(age = seq(4, 9, 0.1),
                      question = c("caused", "lexical"))

df.prediction = fit.brm.present.cause.age2 %>% 
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>% 
  as_tibble() %>% 
  bind_cols(df.data) %>% 
  mutate(type = "presence") %>% 
  bind_rows(fit.brm.absence.cause.age2 %>% 
              fitted(newdata = df.data,
                     re_formula = NA,
                     probs = c(0.1, 0.9)) %>% 
              as_tibble() %>% 
              bind_cols(df.data) %>% 
              mutate(type = "absence")) %>% 
  clean_names()

df.adults = df.exp2_cause_adults %>% 
  select(-response) %>% 
  mutate(age_whole = rep(10)) %>% 
  pivot_longer(cols = (direct:absence),
    names_to = "type",
    values_to = "response")

df.children = df.exp2_cause_children%>%
  select(-response) %>% 
 pivot_longer(cols = (direct:absence),
               names_to = "type",
               values_to = "response")

df.means = df.children %>% 
  bind_rows(df.adults) %>% 
  mutate(age_whole = factor(age_whole, 
                            levels = c(4, 5, 6, 7, 8, 9, 10), 
                            labels = c("4", "5", "6", "7", "8", "9", "adults")),
         question = recode(question, "cause" = "caused"),
         type = recode(type, "direct" = "presence")) %>% 
  group_by(age_whole, question, type) %>% 
  summarize(
    response = Hmisc::smean.cl.boot(response)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>% 
  mutate(age_whole = fct_recode(age_whole, "10" = "adults"),
         age_whole = as.numeric(as.character(age_whole)))

custom_labels = as_labeller(c("absence" = "Absent Cause", "presence" = "Direct Cause"))

p7 = ggplot(mapping = aes(x = age, 
                     y = response,
                     color = question)) + 
  geom_hline(yintercept=0.50, linetype="dashed", color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90,
                            color = question,
                            fill = question),
              stat = "identity",
              alpha = 0.2,
              show.legend = F) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = age_whole,
                                y = response,
                                ymin = low,
                                ymax = high,
                                fill = question,
                                shape = question),
                  color = "black",
                  size = 0.5,
                  show.legend = TRUE,
                  alpha = 0.8,
                  position = position_dodge(width = 0.5)) +
    facet_wrap(~type,
             labeller = custom_labels) + 
  scale_fill_manual(values = c("caused" = "#FF7F00", "lexical" = "#4DAF4A"), breaks=c("caused", "lexical")) +
  scale_color_manual(values = c("caused" = "#FF7F00", "lexical" = "#4DAF4A"), breaks=c("caused", "lexical")) +
  scale_shape_manual(values = c(21, 23)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                      labels = str_c(seq(0, 1, 0.25) * 100, "%")) +
  scale_x_continuous(breaks = 4:10,
                     labels = c("4", "5", "6", "7", "8", "9", "Adults")) +
  labs(y = "Probability of selecting \n causal candidate") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        text = element_text(size = 18))

10.4.2 Made vs. Lexical

set.seed(1)

df.model = df.exp2_made_children %>% 
  mutate(question = as_factor(question),
         question = fct_relevel(question, "made"))

fit.brm.present.made.age2 = brm(formula = direct ~ 1 + question*age + (1 | participant) + (1 | scenario), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 8000,
                     warmup = 1000,
                     control = list(adapt_delta = .999999,
                                    max_treedepth = 15),
                     file = "cache/fit.brm.present.made.age2") 

fit.brm.absence.made.age2 = brm(formula = absence ~ 1 + question*age + (1 | participant) + (1 | scenario),
                      data = df.model,
                      family = "bernoulli",
                      seed = 1,
                      iter = 6000,
                      control = list(adapt_delta = .999999,
                                     max_treedepth = 15),
                      file = "cache/fit.brm.absence.made.age2") 

df.data = expand_grid(age = seq(4, 9, 0.1),
                      question = c("made", "lexical"))

df.prediction = fit.brm.present.made.age2 %>% 
  fitted(newdata = df.data,
         re_formula = NA,
          probs = c(0.1, 0.9)) %>% 
  as_tibble() %>% 
  bind_cols(df.data) %>% 
  mutate(type = "presence") %>% 
  bind_rows(fit.brm.absence.made.age2 %>% 
              fitted(newdata = df.data,
                     re_formula = NA,
                     probs = c(0.1, 0.9)) %>% 
              as_tibble() %>% 
              bind_cols(df.data) %>% 
              mutate(type = "absence")) %>% 
  clean_names()

df.adults = df.exp2_made_adults %>% 
  select(-response) %>% 
  mutate(age_whole = rep(10)) %>% 
  pivot_longer(cols = (direct:absence),
    names_to = "type",
    values_to = "response")

df.children = df.exp2_made_children %>%
  select(-response) %>% 
  pivot_longer(cols = (direct:absence),
               names_to = "type",
               values_to = "response")

df.means = df.children %>% 
  mutate(age_whole = as.integer(age)) %>%
  bind_rows(df.adults) %>% 
  mutate(age_whole = factor(age_whole, 
                            levels = c(4, 5, 6, 7, 8, 9, 10), 
                            labels = c("4", "5", "6", "7", "8", "9", "adults")),
         type = recode(type, "direct" = "presence")) %>% 
  group_by(age_whole, question, type) %>% 
  summarize(
    response = Hmisc::smean.cl.boot(response)) %>%
    mutate(index = c("response", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>% 
  mutate(age_whole = fct_recode(age_whole, "10" = "adults"),
         age_whole = as.numeric(as.character(age_whole)),
         question = factor(question, levels = c("made", "lexical")))

custom_labels = as_labeller(c("absence" = "Absent Cause", "presence" = "Direct Cause"))

p8 = ggplot(mapping = aes(x = age, 
                     y = response,
                     color = question)) + 
  geom_hline(yintercept=0.50, linetype="dashed", color = "black") +
  geom_smooth(data = df.prediction,
              mapping = aes(y = estimate,
                            ymin = q10,
                            ymax = q90,
                            color = question,
                            fill = question),
              stat = "identity",
              alpha = 0.2,
              show.legend = F) +
  geom_pointrange(data = df.means,
                  mapping = aes(x = age_whole,
                                y = response,
                                ymin = low,
                                ymax = high,
                                fill = question,
                                shape = question),
                  color = "black",
                  size = 0.5,
                  show.legend = TRUE,
                  alpha = 0.8,
                  position = position_dodge(width = 0.4)) +
  facet_wrap(~type,
             labeller = custom_labels) + 
  scale_fill_manual(values = c("made" = "#FFEA00", "lexical" = "#4DAF4A"), breaks=c("made", "lexical")) +
  scale_color_manual(values = c("made" = "#FFEA00", "lexical" = "#4DAF4A"), breaks=c("made", "lexical")) +
  scale_shape_manual(values = c(21, 23), breaks=c("made", "lexical")) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = str_c(seq(0, 1, 0.25) * 100, "%"),
                     limits = c(0, 1)) +
  scale_x_continuous(breaks = 4:10,
                     labels = c("4", "5", "6", "7", "8", "9", "Adults")) +
  labs(y = "Probability of selecting \n causal candidate") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        text = element_text(size = 18))

10.4.3 Combine Cause and Made Plots

plot = p7 + p8 + 
plot_layout(nrow = 1) &
plot_annotation(tag_levels = "A") &
theme(plot.tag = element_text(face = "bold"))

ggsave(plot, filename = "../../figures/experiment2/absence_all.pdf", height = 5, width = 18)

plot

11 ORDER EFFECTS

12 EXPERIMENT 1: CHAINS

12.1 CAUSED VS. LEXICAL (BREAK & CRACK)

12.1.1 CHILDREN

12.1.1.1 DATA

12.1.1.1.1 Wrangle

df.tmp1 = df.exp1_cause_children %>% 
  mutate(order = as_factor(caused_question_order)) %>% 
   group_by(participant) %>%
   filter(row_number() <= 2) %>%
  ungroup()  %>% 
  filter(question == "caused") 

df.tmp2 = df.exp1_cause_children %>% 
  group_by(participant) %>%
  filter(row_number() <= 2) %>%
  ungroup() %>% 
  filter(question == "lexical") %>% 
   mutate(order = case_when(caused_question_order == "second" ~ "first",
                           caused_question_order == "first" ~ "second"))

df.exp1_cause_children_order_different = df.tmp1 %>% 
  bind_rows(df.tmp2)  %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = distal - proximal,
        difference = recode(difference, `-1` = 0))

df.exp1_cause_children_order_both = df.tmp1 %>% 
  bind_rows(df.tmp2)  %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both")
  
df.exp1_cause_children_order_both = df.exp1_cause_children_order_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.exp1_cause_children_order_both %>% 
              mutate(difference = 0)) 

df.exp1_cause_children_order = df.exp1_cause_children_order_different %>% 
  bind_rows(df.exp1_cause_children_order_both) %>% 
  mutate(order = as_factor(order),
         order = fct_relevel(order, "second"))

12.1.1.2 STATS

12.1.1.2.1 Bayesian Models

# Check contrast values
contrasts(df.exp1_cause_children_order$order)

##        [,1]
## second    1
## first    -1

fit.brm.exp1.cause.children.order = brm(formula = difference ~ 1 + question*order + (1 | participant) + (1 | scenario), 
                     data = df.exp1_cause_children_order,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/exp1_cause_children_order") 

fit.brm.exp1.cause.children.order

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * order + (1 | participant) + (1 | scenario) 
##    Data: df.exp1_cause_children_order (Number of observations: 296) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 148) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.57      0.36     0.03     1.32 1.00     2103     3915
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.59      1.11     0.35     4.53 1.00     2834     1575
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -0.26      1.04    -2.56     1.97 1.00     3651     2259
## question1            1.12      0.17     0.80     1.47 1.00     5375     5291
## order1               0.44      0.15     0.15     0.73 1.00    11951     7859
## question1:order1    -0.10      0.16    -0.42     0.21 1.00     7965     3388
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

12.1.1.2.2 Follow-up analysis

fit.brm.exp1.cause.children.order %>% 
  emmeans(spec = pairwise ~ order | question,
          type = "response")

## $emmeans
## question = caused:
##  order  response lower.HPD upper.HPD
##  second    0.766   0.36558     0.995
##  first     0.624   0.24075     0.992
## 
## question = lexical:
##  order  response lower.HPD upper.HPD
##  second    0.301   0.00472     0.720
##  first     0.131   0.00195     0.469
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
## question = caused:
##  contrast       odds.ratio lower.HPD upper.HPD
##  second / first       1.96     0.701      3.96
## 
## question = lexical:
##  contrast       odds.ratio lower.HPD upper.HPD
##  second / first       2.89     0.852      6.34
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

12.1.1.2.3 Means and Confidence Interval

set.seed(1)

df.exp1_cause_children_order %>% 
  group_by(order, question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 4 × 5
## # Groups:   order [2]
##   order  question  mean ci_lower ci_upper
##   <fct>  <chr>    <dbl>    <dbl>    <dbl>
## 1 second caused   0.744   0.647     0.840
## 2 second lexical  0.3     0.190     0.410
## 3 first  caused   0.576   0.453     0.698
## 4 first  lexical  0.167   0.0821    0.251

12.2 MADE VS. LEXICAL (BREAK & CRACK)

12.2.1 CHILDREN

12.2.1.1 DATA

12.2.1.1.1 Wrangle

#### MADE #####
df.tmp1 = df.exp1_made_children %>% 
  mutate(order = as_factor(made_question_order)) %>% 
   group_by(participant) %>%
   filter(row_number() <= 2) %>%
  ungroup()  %>% 
  filter(question == "made") 

df.tmp2 = df.exp1_made_children %>% 
  group_by(participant) %>%
  filter(row_number() <= 2) %>%
  ungroup() %>% 
  filter(question == "lexical") %>% 
   mutate(order = case_when(made_question_order == "second" ~ "first",
                           made_question_order == "first" ~ "second"))

df.exp1_made_children_order_different = df.tmp1 %>% 
  bind_rows(df.tmp2)  %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = distal - proximal,
        difference = recode(difference, `-1` = 0))

df.exp1_made_children_order_both = df.tmp1 %>% 
  bind_rows(df.tmp2)  %>%
  mutate(response_pattern = case_when(
    distal == 1 & proximal == 1 ~ "both",
    distal == 0 & proximal == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both")
  
df.exp1_made_children_order_both = df.exp1_made_children_order_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.exp1_made_children_order_both %>% 
              mutate(difference = 0)) 

df.exp1_made_children_order = df.exp1_made_children_order_different %>% 
  bind_rows(df.exp1_made_children_order_both) %>% 
  mutate(order = as_factor(order),
         order = fct_relevel(order, "second"))

12.2.1.2 STATS

12.2.1.2.1 Bayesian Models

# Check contrast values
contrasts(df.exp1_made_children_order$order)

##        [,1]
## second    1
## first    -1

fit.brm.exp1.made.children.order = brm(formula = difference ~ 1 + question*order + (1 | participant) + (1 | scenario), 
                     data = df.exp1_made_children_order,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/exp1_made_children_order") 

fit.brm.exp1.made.children.order

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * order + (1 | participant) + (1 | scenario) 
##    Data: df.exp1_made_children_order (Number of observations: 298) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 149) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.70      0.42     0.93     2.61 1.01     1726     2680
## 
## ~scenario (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.39      1.93     0.45     8.80 1.02      147       25
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -0.24      1.23    -2.49     2.81 1.00     1077      419
## question1           -0.71      0.17    -1.06    -0.39 1.00     5354     5860
## order1               0.09      0.16    -0.21     0.41 1.01      828     8390
## question1:order1     0.01      0.22    -0.41     0.44 1.01      834     1247
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

12.2.1.2.2 Follow-up analysis

fit.brm.exp1.made.children.order %>% 
  emmeans(spec = pairwise ~ order | question,
          type = "response")

## $emmeans
## question = lexical:
##  order  response lower.HPD upper.HPD
##  second    0.277   0.00201     0.821
##  first     0.239   0.00166     0.781
## 
## question = made:
##  order  response lower.HPD upper.HPD
##  second    0.606   0.20619     0.986
##  first     0.570   0.18781     0.990
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
## question = lexical:
##  contrast       odds.ratio lower.HPD upper.HPD
##  second / first       1.21     0.234      3.19
## 
## question = made:
##  contrast       odds.ratio lower.HPD upper.HPD
##  second / first       1.19     0.290      2.71
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

12.2.1.2.3 Means and Confidence Interval

set.seed(1)

df.exp1_made_children_order %>% 
  group_by(order, question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 4 × 5
## # Groups:   order [2]
##   order  question  mean ci_lower ci_upper
##   <fct>  <chr>    <dbl>    <dbl>    <dbl>
## 1 second lexical  0.297    0.191    0.404
## 2 second made     0.48     0.364    0.596
## 3 first  lexical  0.24     0.141    0.339
## 4 first  made     0.473    0.357    0.589

12.3 PLOTS

12.3.1 Caused vs. Lexical

set.seed(1)

df.plot = df.exp1_cause_children_order %>% 
  mutate(order = factor(order, levels = c("first", "second")))

p3 = ggplot(data = df.plot,
       mapping = aes(x = factor(question, levels = c("lexical", "caused")),
                     y = difference,
                     color = order,
                     fill = order)) +
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") +
   stat_summary(fun.data = "mean_cl_boot",
               position = position_dodge(width = 0.5),
               shape = 21,
               color = "black",
               size = 1) +
  scale_fill_brewer(palette = "Set3") +
  scale_color_brewer(palette = "Set3") +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = c("100% \n proximal", "75%", "50%", "75%", "100% \n distal"),
                      limits = c(0, 1)) +
  ylab("Probability of selecting \n proximal or distal cause") +
  theme(legend.position = "bottom",
        legend.box = "vertical",
        axis.title.x = element_blank(),
        text = element_text(size = 18))

12.3.2 Made vs. Lexical

set.seed(1)

df.plot = df.exp1_made_children_order %>% 
  mutate(order = factor(order, levels = c("first", "second")))

p4 = ggplot(data = df.plot,
       mapping = aes(x = factor(question, levels = c("lexical", "made")),
                     y = difference,
                     color = order,
                     fill = order)) +
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") +
   stat_summary(fun.data = "mean_cl_boot",
               position = position_dodge(width = 0.5),
               shape = 21,
               color = "black",
               size = 1) +
  scale_fill_brewer(palette = "Set3") +
  scale_color_brewer(palette = "Set3") +
   scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = c("100% \n proximal", "75%", "50%", "75%", "100% \n distal"),
                      limits = c(0, 1)) +
  ylab("Probability of selecting \n proximal or distal cause") +
  theme(legend.position = "bottom",
        legend.box = "vertical",
        axis.title.x = element_blank(),
        text = element_text(size = 18))

12.3.3 Combine Cause and Made Plots

plot = p3 + p4 + 
plot_layout(nrow = 1) &
plot_annotation(tag_levels = "A") &
theme(plot.tag = element_text(face = "bold"))

ggsave(plot, filename = "../../figures/experiment1/chain_order_effect.pdf", height = 5, width = 12)

plot

13 EXPERIMENT 2: ABSENCES

13.1 CAUSED VS. LEXICAL (BURN & FLOOD)

13.1.1 ADULTS

13.1.1.1 DATA

13.1.1.1.1 Wrangle

# code order
df.tmp1 = df.exp2_cause_adults %>% 
  group_by(participant) %>% 
  filter(row_number() == 1) %>% 
  mutate(scenario_order = str_c(scenario, "_first"),
         question_order = str_c(question, "_first")) %>% 
  select(participant,
         contains("order"))

df.exp2_cause_adults_order = df.exp2_cause_adults %>% 
  left_join(df.tmp1,
            by = "participant") %>% 
  relocate(contains("order"), .after = question) %>% 
  mutate(caused_question_order = case_when(question_order == "lexical_first" ~ "second",
                                          question_order == "caused_first" ~ "first"),
         question = as_factor(question),
         question = fct_relevel(question, "lexical"),
         caused_question_order = as_factor(caused_question_order),
         caused_question_order = fct_relevel(caused_question_order, "first")) %>% 
   group_by(participant) %>%
   filter(row_number() <= 2) %>%
  ungroup()

df.tmp2 = df.exp2_cause_adults_order %>% 
  filter(question == "caused") %>% 
  mutate(order = caused_question_order)

df.tmp3 = df.exp2_cause_adults_order %>% 
  filter(question == "lexical") %>% 
  mutate(order = case_when(caused_question_order == "second" ~ "first",
                           caused_question_order == "first" ~ "second"))

df.exp2_cause_adults_order_different = df.tmp2 %>% 
  bind_rows(df.tmp3)  %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = absence - direct,
        difference = recode(difference, `-1` = 0))

df.exp2_cause_adults_order_both = df.tmp2 %>% 
  bind_rows(df.tmp3)  %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both")
  
df.exp2_cause_adults_order_both = df.exp2_cause_adults_order_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.exp2_cause_adults_order_both %>% 
              mutate(difference = 0)) 

df.exp2_cause_adults_order = df.exp2_cause_adults_order_different %>% 
  bind_rows(df.exp2_cause_adults_order_both) %>% 
  mutate(order = as_factor(order),
         order = fct_relevel(order, "second"))

13.1.1.2 STATS

13.1.1.2.1 Bayesian Models

# Check contrast values
contrasts(df.exp2_cause_adults_order$order)

##        [,1]
## second    1
## first    -1

fit.brm.exp2.cause.adults.order = brm(formula = difference ~ 1 + question*order + (1 | participant) + (1 | scenario), 
                     data = df.exp2_cause_adults_order,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/exp2_cause_adults_order") 

fit.brm.exp2.cause.adults.order

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * order + (1 | participant) + (1 | scenario) 
##    Data: df.exp2_cause_adults_order (Number of observations: 123) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 56) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.40      0.31     0.02     1.13 1.00     3761     3917
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.41      1.18     0.09     4.55 1.00     2450     1419
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -1.03      0.95    -3.00     1.13 1.00     2511     1324
## question1           -1.36      0.30    -2.00    -0.81 1.00     9966     5837
## order1               0.37      0.29    -0.20     0.97 1.00     9373     7792
## question1:order1     0.17      0.30    -0.41     0.77 1.00     8654     6763
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

13.1.1.2.2 Follow-up analysis

fit.brm.exp2.cause.adults.order %>% 
  emmeans(spec = pairwise ~ order | question,
          type = "response")

## $emmeans
## question = lexical:
##  order  response lower.HPD upper.HPD
##  second   0.1344  0.002486     0.504
##  first    0.0525  0.000131     0.274
## 
## question = caused:
##  order  response lower.HPD upper.HPD
##  second   0.6217  0.261442     0.971
##  first    0.5186  0.158328     0.934
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
## question = lexical:
##  contrast       odds.ratio lower.HPD upper.HPD
##  second / first       2.83    0.0367     17.78
## 
## question = caused:
##  contrast       odds.ratio lower.HPD upper.HPD
##  second / first       1.49    0.3149      3.74
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

13.1.1.2.3 Means and Confidence Interval

set.seed(1)

df.exp2_cause_adults_order %>% 
  group_by(order, question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 4 × 5
## # Groups:   order [2]
##   order  question   mean ci_lower ci_upper
##   <fct>  <fct>     <dbl>    <dbl>    <dbl>
## 1 second lexical  0.136   -0.0194    0.292
## 2 second caused   0.579    0.414     0.743
## 3 first  lexical  0.0588  -0.0245    0.142
## 4 first  caused   0.483    0.289     0.676

13.1.2 CHILDREN

13.1.2.1 DATA

13.1.2.1.1 Wrangle

df.tmp1 = df.exp2_cause_children %>% 
  mutate(order = as_factor(cause_question_order)) %>% 
   group_by(participant) %>%
   filter(row_number() <= 2) %>%
  ungroup()  %>% 
  filter(question == "caused") 

df.tmp2 = df.exp2_cause_children %>% 
  group_by(participant) %>%
  filter(row_number() <= 2) %>%
  ungroup() %>% 
  filter(question == "lexical") %>% 
   mutate(order = case_when(cause_question_order == "second" ~ "first",
                           cause_question_order == "first" ~ "second"))

df.exp2_cause_children_order_different = df.tmp1 %>% 
  bind_rows(df.tmp2)  %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = absence - direct,
        difference = recode(difference, `-1` = 0))

df.exp2_cause_children_order_both = df.tmp1 %>% 
  bind_rows(df.tmp2)  %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both")
  
df.exp2_cause_children_order_both = df.exp2_cause_children_order_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.exp2_cause_children_order_both %>% 
              mutate(difference = 0)) 

df.exp2_cause_children_order = df.exp2_cause_children_order_different %>% 
  bind_rows(df.exp2_cause_children_order_both) %>% 
  mutate(order = as_factor(order),
         order = fct_relevel(order, "first"))

13.1.2.2 STATS

13.1.2.2.1 Bayesian Models

# Check contrast values
contrasts(df.exp2_cause_children_order$order)

##        [,1]
## first     1
## second   -1

fit.brm.exp2.cause.children.order = brm(formula = difference ~ 1 + question*order + (1 | participant) + (1 | scenario), 
                     data = df.exp2_cause_children_order,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/exp2_cause_children_order") 

fit.brm.exp2.cause.children.order

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * order + (1 | participant) + (1 | scenario) 
##    Data: df.exp2_cause_children_order (Number of observations: 311) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 148) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.41      0.29     0.02     1.06 1.00      875      871
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.93      1.05     0.02     3.92 1.03      208      210
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -1.31      0.75    -3.13     0.47 1.05      132       48
## question1            0.80      0.16     0.49     1.12 1.00     2077     4455
## order1               0.37      0.16     0.07     0.68 1.01     2014     3715
## question1:order1     0.26      0.17    -0.07     0.61 1.01     1540     1077
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

13.1.2.2.2 Follow-up analysis

fit.brm.exp2.cause.children.order %>% 
  emmeans(spec = pairwise ~ order | question,
          type = "response")

## $emmeans
## question = caused:
##  order  response lower.HPD upper.HPD
##  first    0.5190   0.23283     0.886
##  second   0.2375   0.02361     0.600
## 
## question = lexical:
##  order  response lower.HPD upper.HPD
##  first    0.1205   0.00848     0.383
##  second   0.0963   0.00674     0.344
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
## question = caused:
##  contrast       odds.ratio lower.HPD upper.HPD
##  first / second       3.46     1.450      6.67
## 
## question = lexical:
##  contrast       odds.ratio lower.HPD upper.HPD
##  first / second       1.25     0.261      3.13
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

13.1.2.2.3 Means and Confidence Interval

set.seed(1)

df.exp2_cause_children_order %>% 
  group_by(order, question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 4 × 5
## # Groups:   order [2]
##   order  question  mean ci_lower ci_upper
##   <fct>  <chr>    <dbl>    <dbl>    <dbl>
## 1 first  caused   0.506   0.395     0.617
## 2 first  lexical  0.127   0.0516    0.202
## 3 second caused   0.241   0.147     0.335
## 4 second lexical  0.103   0.0288    0.177

13.2 MADE VS. LEXICAL (BURN & FLOOD)

13.2.1 ADULTS

13.2.1.1 DATA

13.2.1.1.1 Wrangle

df.tmp1 = df.exp2_made_adults %>% 
  group_by(participant) %>% 
  filter(row_number() == 1) %>% 
  mutate(scenario_order = str_c(scenario, "_first"),
         question_order = str_c(question, "_first")) %>% 
  select(participant,
         contains("order"))

df.exp2_made_adults_order = df.exp2_made_adults %>% 
  left_join(df.tmp1,
            by = "participant") %>% 
  relocate(contains("order"), .after = question) %>% 
  mutate(made_question_order = case_when(question_order == "lexical_first" ~ "second",
                                          question_order == "made_first" ~ "first"),
         question = as_factor(question),
         question = fct_relevel(question, "lexical"),
         made_question_order = as_factor(made_question_order),
         made_question_order = fct_relevel(made_question_order, "first")) %>% 
   group_by(participant) %>%
   filter(row_number() <= 2) %>%
  ungroup()

df.tmp2 = df.exp2_made_adults_order %>% 
  filter(question == "made") %>% 
  mutate(order = made_question_order)

df.tmp3 = df.exp2_made_adults_order %>% 
  filter(question == "lexical") %>% 
  mutate(order = case_when(made_question_order == "second" ~ "first",
                           made_question_order == "first" ~ "second"))

df.exp2_made_adults_order_different = df.tmp2 %>% 
  bind_rows(df.tmp3)  %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = absence - direct,
        difference = recode(difference, `-1` = 0))

df.exp2_made_adults_order_both = df.tmp2 %>% 
  bind_rows(df.tmp3)  %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both")
  
df.exp2_made_adults_order_both = df.exp2_made_adults_order_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.exp2_made_adults_order_both %>% 
              mutate(difference = 0)) 

df.exp2_made_adults_order = df.exp2_made_adults_order_different %>% 
  bind_rows(df.exp2_made_adults_order_both) %>% 
  mutate(order = as_factor(order),
         order = fct_relevel(order, "second"))

13.2.1.2 STATS

13.2.1.2.1 Bayesian Models

# Check contrast values
contrasts(df.exp2_made_adults_order$order)

##        [,1]
## second    1
## first    -1

fit.brm.exp2.made.adults.order = brm(formula = difference ~ 1 + question*order + (1 | participant) + (1 | scenario), 
                     data = df.exp2_made_adults_order,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/exp2_made_adults_order") 

fit.brm.exp2.made.adults.order

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * order + (1 | participant) + (1 | scenario) 
##    Data: df.exp2_made_adults_order (Number of observations: 134) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 57) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.28      0.22     0.01     0.82 1.00     5108     3919
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.63      1.17     0.31     4.85 1.00     2508     1127
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -0.62      1.05    -2.70     1.79 1.00     3236     1992
## question1           -0.84      0.22    -1.29    -0.43 1.00    12338     8574
## order1              -0.21      0.22    -0.64     0.22 1.00    11992     7940
## question1:order1    -0.23      0.23    -0.68     0.22 1.00    11060     8356
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

13.2.1.2.2 Follow-up analysis

fit.brm.exp2.made.adults.order %>% 
  emmeans(spec = pairwise ~ order | question,
          type = "response")

## $emmeans
## question = lexical:
##  order  response lower.HPD upper.HPD
##  second    0.129   0.00113     0.512
##  first     0.255   0.00325     0.714
## 
## question = made:
##  order  response lower.HPD upper.HPD
##  second    0.550   0.16021     0.958
##  first     0.539   0.15268     0.939
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
## question = lexical:
##  contrast       odds.ratio lower.HPD upper.HPD
##  second / first       0.42     0.042      1.36
## 
## question = made:
##  contrast       odds.ratio lower.HPD upper.HPD
##  second / first       1.05     0.243      2.55
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

13.2.1.2.3 Means and Confidence Interval

set.seed(1)

df.exp2_made_adults_order %>% 
  group_by(order, question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 4 × 5
## # Groups:   order [2]
##   order  question  mean ci_lower ci_upper
##   <fct>  <fct>    <dbl>    <dbl>    <dbl>
## 1 second lexical  0.139   0.0202    0.258
## 2 second made     0.567   0.378     0.755
## 3 first  lexical  0.28    0.0908    0.469
## 4 first  made     0.512   0.356     0.667

13.2.2 CHILDREN

13.2.2.1 DATA

13.2.2.1.1 Wrangle

df.tmp1 = df.exp2_made_children %>% 
  mutate(order = as_factor(made_question_order)) %>% 
   group_by(participant) %>%
   filter(row_number() <= 2) %>%
  ungroup()  %>% 
  filter(question == "made") 

df.tmp2 = df.exp2_made_children %>% 
  group_by(participant) %>%
  filter(row_number() <= 2) %>%
  ungroup() %>% 
  filter(question == "lexical") %>% 
   mutate(order = case_when(made_question_order == "second" ~ "first",
                            made_question_order == "scond" ~ "first",
                           made_question_order == "first" ~ "second"))

df.exp2_made_children_order_different = df.tmp1 %>% 
  bind_rows(df.tmp2)  %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "different") %>%
  mutate(difference = absence - direct,
        difference = recode(difference, `-1` = 0))

df.exp2_made_children_order_both = df.tmp1 %>% 
  bind_rows(df.tmp2)  %>%
  mutate(response_pattern = case_when(
    direct == 1 & absence == 1 ~ "both",
    direct == 0 & absence == 0 ~ "neither",
    TRUE ~ "different"
  )) %>%
  filter(response_pattern == "both")
  
df.exp2_made_children_order_both = df.exp2_made_children_order_both %>% 
  mutate(difference = 1) %>%
  bind_rows(df.exp2_made_children_order_both %>% 
              mutate(difference = 0)) 

df.exp2_made_children_order = df.exp2_made_children_order_different %>% 
  bind_rows(df.exp2_made_children_order_both) %>% 
  mutate(order = str_replace_all(order, "frst", "first"),
         order = as_factor(order),
         order = fct_relevel(order, "first"))

13.2.2.2 STATS

13.2.2.2.1 Bayesian Models

# Check contrast values
contrasts(df.exp2_made_children_order$order)

##        [,1]
## first     1
## second   -1

fit.brm.exp2.made.children.order = brm(formula = difference ~ 1 + question*order + (1 | participant) + (1 | scenario), 
                     data = df.exp2_made_children_order,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     file = "cache/exp2_made_children_order") 

fit.brm.exp2.made.children.order

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: difference ~ 1 + question * order + (1 | participant) + (1 | scenario) 
##    Data: df.exp2_made_children_order (Number of observations: 290) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~participant (Number of levels: 145) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.24      0.52     0.18     2.29 1.00     1343     1876
## 
## ~scenario (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.97      1.08     0.02     3.90 1.00     1042      809
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -1.76      0.80    -3.26     0.33 1.01      649      154
## question1           -0.75      0.21    -1.20    -0.37 1.00     1382     1483
## order1               0.28      0.19    -0.08     0.67 1.00     6605     5795
## question1:order1    -0.22      0.22    -0.69     0.19 1.00     7110     6635
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

13.2.2.2.2 Follow-up analysis

fit.brm.exp2.made.children.order %>% 
  emmeans(spec = pairwise ~ order | question,
          type = "response")

## $emmeans
## question = lexical:
##  order  response lower.HPD upper.HPD
##  first    0.0771   0.00170     0.317
##  second   0.0708   0.00119     0.306
## 
## question = made:
##  order  response lower.HPD upper.HPD
##  first    0.3549   0.13828     0.819
##  second   0.1723   0.01120     0.542
## 
## Point estimate displayed: median 
## Results are back-transformed from the logit scale 
## HPD interval probability: 0.95 
## 
## $contrasts
## question = lexical:
##  contrast       odds.ratio lower.HPD upper.HPD
##  first / second       1.10     0.151      3.40
## 
## question = made:
##  contrast       odds.ratio lower.HPD upper.HPD
##  first / second       2.67     0.815      6.59
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

13.2.2.2.3 Means and Confidence Interval

set.seed(1)

df.exp2_made_children_order %>% 
  group_by(order, question) %>% 
  summarise(
    mean = mean(difference),
    ci_lower = mean_cl_normal(difference)$ymin,
    ci_upper = mean_cl_normal(difference)$ymax
  )

## # A tibble: 4 × 5
## # Groups:   order [2]
##   order  question  mean ci_lower ci_upper
##   <fct>  <chr>    <dbl>    <dbl>    <dbl>
## 1 first  lexical  0.118   0.0441    0.193
## 2 first  made     0.375   0.260     0.490
## 3 second lexical  0.108   0.0303    0.185
## 4 second made     0.221   0.126     0.316

13.3 PLOTS

13.3.1 Caused vs. Lexical

set.seed(1)

df.plot = df.exp2_cause_children_order %>% 
  mutate(age = "children") %>% 
  bind_rows(df.exp2_cause_adults_order %>% 
              mutate(age = "adults")) %>% 
  mutate(order = factor(order, levels = c("first", "second")))

p5 = ggplot(data = df.plot,
       mapping = aes(x = factor(question, levels = c("lexical", "caused")),
                     y = difference,
                     color = order,
                     fill = order)) +
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") +
   stat_summary(fun.data = "mean_cl_boot",
               position = position_dodge(width = 0.5),
               shape = 21,
               color = "black",
               size = 1) +
  scale_fill_brewer(palette = "Set3") +
  scale_color_brewer(palette = "Set3") +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = c("100% \n direct", "75%", "50%", "75%", "100% \n absent"),
                      limits = c(0, 1)) +
  ylab("Probability of selecting \n direct or absent cause") +
  theme(legend.position = "bottom",
        legend.box = "vertical",
        axis.title.x = element_blank(),
        text = element_text(size = 18)) +
  facet_wrap(~age)

13.3.2 Made vs. Lexical

set.seed(1)

df.plot = df.exp2_made_children_order %>% 
  mutate(age = "children") %>% 
  bind_rows(df.exp2_made_adults_order %>% 
              mutate(age = "adults")) %>% 
  mutate(order = factor(order, levels = c("first", "second")))

p6 = ggplot(data = df.plot,
       mapping = aes(x = factor(question, levels = c("lexical", "made")),
                     y = difference,
                     color = order,
                     fill = order)) +
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") +
   stat_summary(fun.data = "mean_cl_boot",
               position = position_dodge(width = 0.5),
               shape = 21,
               color = "black",
               size = 1) +
  scale_fill_brewer(palette = "Set3") +
  scale_color_brewer(palette = "Set3") +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = c("100% \n direct", "75%", "50%", "75%", "100% \n absent"),
                      limits = c(0, 1)) +
  ylab("Probability of selecting \n direct or absent cause") +
  theme(legend.position = "bottom",
        legend.box = "vertical",
        axis.title.x = element_blank(),
        text = element_text(size = 18)) +
  facet_wrap(~age)

13.3.3 Combine Cause and Made Plots

plot = p5 + p6 + 
plot_layout(nrow = 1) &
plot_annotation(tag_levels = "A") &
theme(plot.tag = element_text(face = "bold"))

ggsave(plot, filename = "../../figures/experiment2/absence_order_effect.pdf", height = 5, width = 12)

plot

14 RSA

14.1 DATA

14.1.1 Chain

14.1.1.1 Caused vs. lexical

df.chain_caused = read.csv("../../data/experiment1/cause/children/chain_cause_different_responses.csv") %>% 
  count(age_whole, question, distal) %>% 
  group_by(age_whole, question) %>%
  mutate(probability = n/sum(n)) %>%
  ungroup() %>% 
  mutate(distal = factor(distal,
                         levels = c(0, 1),
                         labels = c("no", "yes"))) %>% 
  select(-n) %>% 
  relocate(probability, .after = last_col()) %>% 
  arrange(age_whole, question, distal) %>% 
  mutate(version = rep("chain")) %>% 
  rename(age = age_whole) %>% 
  mutate(event = ifelse(distal == "yes", "distal", "proximal"),
         alternative = "caused") %>%
  select(age, question, event, probability, version, -distal, alternative)

14.1.1.2 Made vs. lexical

df.chain_made = read.csv("../../data/experiment1/made/children/chain_made_different_responses.csv") %>% 
  count(age_whole, question, distal) %>% 
  group_by(age_whole, question) %>%
  mutate(probability = n/sum(n)) %>%
  ungroup() %>% 
  mutate(distal = factor(distal,
                         levels = c(0, 1),
                         labels = c("no", "yes"))) %>% 
  select(-n) %>% 
  relocate(probability, .after = last_col()) %>% 
  arrange(age_whole, question, distal) %>% 
  mutate(version = rep("chain")) %>% 
  rename(age = age_whole) %>% 
  mutate(event = ifelse(distal == "yes", "distal", "proximal"),
         alternative = "made") %>% 
  select(age, question, event, probability, version, -distal, alternative)

14.1.2 Absence

14.1.2.1 Caused vs. lexical

df.absence_caused = read.csv("../../data/experiment2/cause/children/absence_cause_different_responses.csv") %>% 
  rename(absent = absence) %>% 
  count(age_whole, question, absent) %>% 
  group_by(age_whole, question) %>%
  mutate(probability = n/sum(n)) %>%
  ungroup() %>% 
  mutate(absent = factor(absent,
                         levels = c(0, 1),
                         labels = c("no", "yes"))) %>% 
  select(-n) %>% 
  relocate(probability, .after = last_col()) %>% 
  arrange(age_whole, question, absent) %>% 
  mutate(version = "absence") %>% 
  rename(age = age_whole) %>% 
  mutate(event = ifelse(absent == "yes", "absent", "direct"),
         alternative = "caused") %>% 
  select(age, question, event, probability, version, -absent, alternative)

14.1.2.2 Made vs. lexical

df.absence_made = read.csv("../../data/experiment2/made/children/absence_made_different_responses.csv") %>% 
  rename(absent = absence) %>% 
  mutate(age_whole = as.integer(age)) %>% 
  count(age_whole, question, absent) %>% 
  group_by(age_whole, question) %>%
  mutate(probability = n/sum(n)) %>%
  ungroup() %>% 
  mutate(absent = factor(absent,
                         levels = c(0, 1),
                         labels = c("no", "yes"))) %>% 
  select(-n) %>% 
  relocate(probability, .after = last_col()) %>% 
  arrange(age_whole, question, absent) %>% 
  mutate(version = "absence") %>% 
  rename(age = age_whole) %>% 
  mutate(event = ifelse(absent == "yes", "absent", "direct"),
         alternative = "made") %>% 
  select(age, question, event, probability, version, -absent, alternative)

14.1.3 Bootstrapped confidence intervals

set.seed(1)

fun_bootstraps = function(df, variable, scenario, alternative, nboots){
  df = df %>% 
    bootstraps(times = nboots,
               strata = age_whole) %>% 
    mutate(probability = map(.x = splits,
                             .f = ~ .x %>%
                               as_tibble() %>%
                               count(age_whole, question, {{variable}}, .drop = FALSE) %>% 
                               group_by(age_whole, question) %>%
                               mutate(probability = n/sum(n)))) %>%
    select(-splits) %>%
    unnest(cols = c(probability)) %>% 
    group_by(age_whole, question, {{variable}}) %>%
    summarize(low = quantile(probability, 0.025),
              high = quantile(probability, 0.975)) %>% 
    ungroup()
  
  if (scenario == "chain"){
    df = df %>% 
      mutate(event = factor({{variable}},
                             levels = c(0, 1),
                             labels = c("proximal", "distal")))
  }
  
  if (scenario == "absence"){
    df = df %>% 
      mutate(event = factor({{variable}},
                             levels = c(0, 1),
                             labels = c("direct", "absent"))) 
  }
  
  df = df %>% 
    select(-{{variable}}) %>% 
      mutate(version = scenario,
             alternative = alternative)

  return(df)
}

nboots = 100

df.boot = bind_rows(
  fun_bootstraps(df = read.csv("../../data/experiment1/cause/children/chain_cause_different_responses.csv"),
                 variable = distal,
                 scenario = "chain",
                 alternative = "caused",
                 nboots = nboots),
  fun_bootstraps(df = read.csv("../../data/experiment1/made/children/chain_made_different_responses.csv"),
                 variable = distal,
                 scenario = "chain",
                 alternative = "made",
                 nboots = nboots),
  fun_bootstraps(df = read.csv("../../data/experiment2/cause/children/absence_cause_different_responses.csv"),
                 variable = absence,
                 scenario = "absence",
                 alternative = "caused",
                 nboots = nboots),
  fun_bootstraps(df = read.csv("../../data/experiment2/made/children/absence_made_different_responses.csv") %>% 
                   mutate(age_whole = as.integer(age)),
                 variable = absence,
                 scenario = "absence",
                 alternative = "made",
                 nboots = nboots)) %>% 
  rename(age = age_whole)

14.1.4 Combined data

df.data = bind_rows(df.chain_caused, 
                    df.chain_made, 
                    df.absence_caused,
                    df.absence_made) %>% 
  left_join(df.boot,
            by = c("age", "question", "event", "version", "alternative")) %>% 
  mutate(alternative = factor(alternative, levels = c("caused", "made")),
         question = factor(question, levels = c("lexical", "caused", "made")),
         event = factor(event, levels = c("proximal", "distal", "direct", "absent")),
         event = ifelse(event %in% c("proximal", "direct"), "prototypical", "nonstandard"),
         event = factor(event, levels = c("prototypical", "nonstandard")),
         version = factor(version, levels = c("chain", "absence"))) %>%
  relocate(probability, .after = alternative) %>% 
  arrange(version, age, question, event, alternative)

14.2 MODEL

14.2.1 Helper functions

14.2.1.1 Listener and speaker functions

fun.l0 = function(prior){
  l0 = tibble(question = c("lexical", "alternative"),
              prototypical = c(1, 1),
              nonstandard = c(0, 1)) %>% 
    pivot_longer(cols = -question,
                 names_to = "event",
                 values_to = "value") %>% 
    # add prior
    mutate(prior = ifelse(event == "prototypical", prior["prototypical"], prior["nonstandard"]),
           value = value * prior) %>%
    group_by(question) %>%
    mutate(value = value/sum(value)) %>%
    ungroup()
  return(l0)
}

fun.s1 = function(l0, cost, beta){
  s1 = l0 %>% 
    select(-prior) %>%
    group_by(event) %>% 
    mutate(cost = ifelse(question == "lexical", 0, cost),
           value = ifelse(value == 0, 0.001, value),
           value = ifelse(value == 1, 0.999, value),
           value = log(value) - cost,
           value = softmax(value, beta = beta)) %>%
    ungroup()
  return(s1)
}

fun.l1 = function(s1, prior){
  l1 = s1 %>%
    mutate(prior = ifelse(event == "prototypical", prior["prototypical"], prior["nonstandard"]), 
           value = value * prior) %>%
    group_by(question) %>% 
    mutate(value = value/sum(value)) %>% 
    ungroup()
  return(l1)
}

fun.fit = function(par, data, fun_model){
  loss = data %>%
    group_by(age) %>%
    nest() %>%
    mutate(prediction = list(fun_model(par = par,
                                       age = age))) %>%
    unnest(c(data, prediction),
           names_sep = "_") %>%
    ungroup() %>%
    summarize(loss = sum((data_probability - prediction_value)^2)) %>%
    pull(loss)
  return(loss)
}

fun.overall = function(par, data, df_parameters, fun_model){
  tmp = data %>% 
    group_by(version, alternative) %>% 
    nest() %>% 
    left_join(df_parameters,
              by = c("version", "alternative")) %>% 
    mutate(l = map2_dbl(.x = parameters, 
                        .y = data,
                        .f = ~ fun.fit(par = par[.x],
                                       data = .y,
                                       fun_model = fun_model)))
  loss = sum(tmp$l)
  return(loss)
}

fun.overall_grid = function(data, df_parameters, fun_model){
  tmp = data %>% 
    group_by(version, alternative) %>% 
    nest() %>% 
    left_join(df_parameters,
              by = c("version", "alternative")) %>% 
    mutate(l = map2_dbl(.x = parameters, 
                        .y = data,
                        .f = ~ fun.fit(par = .x,
                                       data = .y,
                                       fun_model = fun_model)))
  loss = sum(tmp$l)
  return(loss)
}

14.2.2 Model fit

# beta = 1:2
# chain = 3
# absence 4
# caused = 5:6
# made = 7:8
df.parameters = df.data %>% 
  distinct(version, alternative) %>% 
  mutate(parameters = list(c(1:2, 3, 5:6), 
                           c(1:2, 3, 7:8),
                           c(1:2, 4, 5:6),
                           c(1:2, 4, 7:8)))

fun.model = function(par, age){
  beta = par[1] + par[2] * age
  p_nonstandard = par[3] 
  cost = par[4] + par[5] * age
  
  if(p_nonstandard <= 0){
    p_nonstandard = 0.001
  }
  if(p_nonstandard >= 1){
    p_nonstandard = 0.999
  }
  prior = c(prototypical = 1-p_nonstandard,
            nonstandard = p_nonstandard)
  
  l0 = fun.l0(prior = prior)
  s1 = fun.s1(l0,
              cost = cost,
              beta = beta)
  l1 = fun.l1(s1,
              prior = prior)
  return(l1)
}

# # best fitting parameters
par = c(-0.0971210, 0.0623497, 0.3595724, 0.1530933, -0.1393184,  0.5599144, -1.2883752,  0.3481894)

# optimization
# fit = optim(par = rep(0, max(unlist(df.parameters$parameters))),
#             fn = fun.overall,
#             data = df.data,
#             fun_model = fun.model,
#             df_parameters = df.parameters,
#             method = "Nelder-Mead",
#             control = list(maxit = 2000))

# results

14.2.3 Plots

14.2.3.1 Plotting Functions

14.2.3.1.1 Results

fun.plot_results = function(params){
  
  df.model = df.data %>% 
    group_by(alternative, version, age) %>%
    nest() %>% 
    left_join(df.parameters,
              by = c("version", "alternative")) %>% 
    mutate(prediction = map2(.x = parameters, 
                             .y = age,
                             .f = ~ fun.model(par = params[.x],
                                              age = .y))) %>% 
    unnest(c(data, prediction),
           names_sep = "_") %>% 
    ungroup() %>% 
    select(age,
           question = data_question,
           event = data_event,
           probability = data_probability,
           low = data_low,
           high = data_high,
           prediction = prediction_value,
           alternative,
           version)
  
  df.plot = df.model %>% 
    filter(event == "nonstandard")
  
  p = ggplot(data = df.plot,
             mapping = aes(x = age,
                           y = probability,
                           group = question,
                           color = question,
                           fill = question)) +
    geom_hline(yintercept = 0.50,
               linetype = "dashed",
               color = "black") +
    geom_line(mapping = aes(y = prediction),
              linewidth = 1.5,
              alpha = 0.5) +
    geom_linerange(mapping = aes(ymin = low,
                                ymax = high),
                   alpha = 0.75,
                   show.legend = F) + 
    geom_point(size = 2.5,
               show.legend = F) + 
    facet_grid(rows = vars(version),
               cols = vars(alternative)) + 
    scale_color_manual(values = c("lexical" = "#4DAF4A",
                                  "made" = "#FFEA00",
                                  "caused" = "#FF7F00"),
                       breaks = c("lexical", "made", "caused"),
                       guide = guide_legend(reverse = T)) +
    scale_y_continuous(breaks = seq(0, 1, 0.25),
                       labels = c("100% \n direct", "75%", "50%", "75%", "100% \n non-direct"),
                       limits = c(0, 1)) + 
    labs(y = "probability of selecting\neach cause",
         color = "word") + 
    theme(panel.spacing.y = unit(1, "cm"),
          text = element_text(size = 20),
          legend.position = "inside",
          legend.background = element_rect(color = "black",
                                           linewidth = 0.2),
          legend.position.inside = c(0.02, 0.47),
          legend.justification = c("left", "top"),
          legend.direction = "vertical")
  p
  
  return(p)
}

14.2.3.1.2 Parameters

fun.plot_params = function(params, nbeta, nprior, ncost){
  n_params = length(params)
  
  if(nbeta == 1){
    p.beta = ggplot(data = tibble(),
                    mapping = aes(x = x)) + 
      geom_col(mapping = aes(x = 1,
                             y = params[1],
                             fill = "black"),
               show.legend = F)
      labs(y = "softmax")
  }else{
    p.beta = ggplot(data = tibble(x = 4:9),
                    mapping = aes(x = x)) + 
      geom_line(mapping = aes(y = params[1] + params[2] * x),
                color = "black",
                linewidth = 1.5) + 
      scale_y_continuous(breaks = seq(0, 0.5, 0.1),
                         limits = c(0, 0.5),
                         expand = expansion(add = c(0, 0))) + 
      labs(x = "age",
           y = TeX(r"(speaker optimality $\alpha$)"))
  }
  
  if(nprior == 2){
    p.prior = ggplot() + 
      geom_col(mapping = aes(x = 1:2,
                             y = c(params[nbeta + 1], params[nbeta + 2])),
               color = "black", 
               fill = "gray80",
               show.legend = F) + 
      scale_x_continuous(breaks = c(1, 2),
                         labels = c("chain", "absence")) +
      scale_y_continuous(breaks = seq(0, 1, 0.25),
                         limits = c(0, 1),
                         expand = expansion(add = c(0, 0))) +
      labs(x = "scenario",
           y = TeX(r"(prior $\it{P}$(non-direct))"))
  }else{
    p.prior = ggplot(data = tibble(x = 4:9),
                    mapping = aes(x = x)) + 
      geom_line(mapping = aes(y = params[nbeta + 1] + params[nbeta + 2] * x,
                              color = "caused"),
                linewidth = 1) + 
      geom_line(mapping = aes(y = params[nbeta + 3] + params[nbeta + 4] * x,
                              color = "made"),
                linewidth = 1) + 
      scale_x_continuous(breaks = c(1, 2),
                         labels = c("chain", "absence")) +
      labs(x = "age",
           y = "prior") +
      theme(legend.position = "right",
            legend.direction = "vertical") + 
      guides(color = guide_legend(reverse = T))
  }
  
  if(ncost == 2){
    p.cost = ggplot() + 
      geom_col(mapping = aes(x = 1,
                             y = params[nbeta + nprior + 1],
                             fill = "caused"),
               show.legend = F) + 
      geom_col(mapping = aes(x = 2, 
                             y = params[nbeta + nprior + 2],
                             fill = "made"),
               show.legend = F) + 
      scale_x_continuous(breaks = c(1, 2),
                         labels = c("caused", "made")) +
      scale_fill_manual(values = c("caused" = "#FF7F00",
                                   "made" = "#FFEA00")) +
      labs(y = "cost") +
      theme(axis.title.x = element_blank())
  }else{
    p.cost = ggplot(data = tibble(x = 4:9),
                    mapping = aes(x = x)) + 
      geom_line(mapping = aes(y = params[nbeta + nprior + 1] + params[nbeta + nprior + 2] * x,
                              color = "caused"),
                linewidth = 1.5) + 
      geom_line(mapping = aes(y = params[nbeta + nprior + 3] + params[nbeta + nprior + 4] * x,
                              color = "made"),
                linewidth = 1.5) + 
      scale_color_manual(values = c("caused" = "#FF7F00",
                                   "made" = "#FFEA00")) +
      scale_y_continuous(breaks = 0:5,
                         labels = 0:5,
                         limits = c(0, 5),
                         expand = expansion(add = c(0, 0))) + 
      labs(x = "age",
           y = TeX(r"(cost $\it{u}$)"),
           color = "alternative") +
      theme(legend.position = "inside",
            legend.background = element_rect(color = "black",
                                             linewidth = 0.2),
            legend.position.inside = c(0.02, 0.99),
            legend.justification = c("left", "top"),
            legend.direction = "vertical")
  }
  
  p.beta + p.prior + p.cost +  
    plot_annotation(tag_levels = "A") &
    theme(text = element_text(size = 24),
          plot.tag = element_text(face = "bold"))
}

14.2.3.2 Result Plot

rsa.overall.plot = fun.plot_results(params = par)
rsa.overall.plot

14.2.3.3 Parameters Plot

rsa.sub.plot = fun.plot_params(params = par,
                 nbeta = 2,
                 nprior = 2,
                 ncost = 4)

rsa.sub.plot

14.2.3.4 Combine result and parameters plot

plot = rsa.sub.plot + rsa.overall.plot +
  plot_layout(nrow = 1,
              widths = c(1, 1, 1, 3)) &
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(face = "bold"),
        text = element_text(size = 21))

plot

ggsave(plot,
       filename = "../../figures/rsa/rsa_combined.pdf",
       height = 7,
       width = 22)

14.2.3.5 Scatter Plot

df.plot = df.data %>% 
  group_by(alternative, version, age) %>%
  nest() %>% 
  left_join(df.parameters,
            by = c("version", "alternative")) %>% 
  mutate(prediction = map2(.x = parameters, 
                           .y = age,
                           .f = ~ fun.model(par = par[.x],
                                            age = .y))) %>% 
  unnest(c(data, prediction),
         names_sep = "_") %>% 
  ungroup() %>% 
  select(age,
         question = data_question,
         event = data_event,
         probability = data_probability,
         low = data_low,
         high = data_high,
         prediction = prediction_value,
         alternative,
         version) %>% 
  filter(event == "nonstandard")

df.text = df.plot %>% 
  summarize(r = cor(probability, prediction),
            rmse = sqrt(mean((probability - prediction)^2))*100)


ggplot(data = df.plot,
       mapping = aes(x = prediction,
                     y = probability)) + 
  geom_smooth(method = "lm",
              formula = y ~ x, 
              fill = "lightblue",
              color = "black") +
  geom_linerange(mapping = aes(ymin = low,
                               ymax = high),
                 alpha = 0.2) +
  geom_point(mapping = aes(fill = question,
                           shape = version),
             size = 2) + 
  annotate(geom = "text",
           x = 0,
           y = c(1, 0.9),
           label = c(str_c("r = ", round(df.text$r, 2)),
                      str_c("RMSE = ", round(df.text$rmse, 2))),
           hjust = 0,
           vjust = 1,
           size = 8) + 
  labs(x = "model prediction",
       y = "probability of selecting\neach cause",
       shape = "scenario",
       fill = "utterance") + 
  scale_shape_manual(values = c("chain" = 21,
                                "absence" = 22)) +
  scale_fill_manual(values = c("lexical" = "#4DAF4A",
                                  "made" = "#FFEA00",
                                  "caused" = "#FF7F00"),
                       breaks = c("lexical", "made", "caused"),
                       guide = guide_legend(reverse = T)) + 
  scale_x_continuous(breaks = seq(0, 1, 0.25),
                       labels = c("100% \n direct", "75%", "50%", "75%", "100% \n non-direct"),
                     limits = c(0, 1)) + 
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                       labels = c("100% \n direct", "75%", "50%", "75%", "100% \n non-direct"),
                     limits = c(0, 1)) +
  guides(fill = guide_legend(override.aes = list(shape = 21, size = 3)),
         shape = guide_legend(override.aes = list(size = 3)))

ggsave("../../figures/rsa/scatterplot.pdf",
       width = 9,
       height = 6)

15 SESSION INFO

sessionInfo()

## R version 4.3.3 (2024-02-29)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] lubridate_1.9.3        forcats_1.0.0          stringr_1.5.1         
##  [4] dplyr_1.1.4            purrr_1.0.2            readr_2.1.5           
##  [7] tidyr_1.3.1            tibble_3.2.1           ggplot2_3.5.1         
## [10] tidyverse_2.0.0        latex2exp_0.9.6        rsample_1.2.1         
## [13] boot_1.3-29            kableExtra_1.4.0       RColorBrewer_1.1-3    
## [16] ggtext_0.1.2           marginaleffects_0.22.0 knitr_1.45            
## [19] patchwork_1.2.0        brms_2.21.0            Rcpp_1.0.12           
## [22] emmeans_1.10.1         janitor_2.2.0         
## 
## loaded via a namespace (and not attached):
##   [1] gridExtra_2.3        inline_0.3.19        rlang_1.1.3         
##   [4] magrittr_2.0.3       snakecase_0.11.1     furrr_0.3.1         
##   [7] matrixStats_1.2.0    compiler_4.3.3       mgcv_1.9-1          
##  [10] loo_2.7.0            systemfonts_1.0.6    vctrs_0.6.5         
##  [13] reshape2_1.4.4       pkgconfig_2.0.3      crayon_1.5.2        
##  [16] fastmap_1.1.1        backports_1.4.1      labeling_0.4.3      
##  [19] utf8_1.2.4           rmarkdown_2.26       markdown_1.12       
##  [22] tzdb_0.4.0           ragg_1.3.0           bit_4.0.5           
##  [25] xfun_0.43            cachem_1.0.8         jsonlite_1.8.8      
##  [28] tinytable_0.2.1      collapse_2.0.16      highr_0.10          
##  [31] cluster_2.1.6        parallel_4.3.3       R6_2.5.1            
##  [34] bslib_0.7.0          stringi_1.8.4        StanHeaders_2.32.6  
##  [37] rpart_4.1.23         parallelly_1.37.1    jquerylib_0.1.4     
##  [40] estimability_1.5     bookdown_0.38        rstan_2.32.6        
##  [43] base64enc_0.1-3      bayesplot_1.11.1     splines_4.3.3       
##  [46] nnet_7.3-19          Matrix_1.6-5         timechange_0.3.0    
##  [49] tidyselect_1.2.1     rstudioapi_0.16.0    abind_1.4-5         
##  [52] yaml_2.3.8           codetools_0.2-19     listenv_0.9.1       
##  [55] pkgbuild_1.4.4       lattice_0.22-5       plyr_1.8.9          
##  [58] withr_3.0.0          bridgesampling_1.1-2 posterior_1.5.0     
##  [61] coda_0.19-4.1        evaluate_0.23        foreign_0.8-86      
##  [64] future_1.33.2        RcppParallel_5.1.7   xml2_1.3.6          
##  [67] pillar_1.9.0         tensorA_0.36.2.1     checkmate_2.3.1     
##  [70] stats4_4.3.3         insight_0.20.5       distributional_0.4.0
##  [73] generics_0.1.3       vroom_1.6.5          hms_1.1.3           
##  [76] commonmark_1.9.1     rstantools_2.4.0     munsell_0.5.1       
##  [79] scales_1.3.0         globals_0.16.3       xtable_1.8-4        
##  [82] glue_1.7.0           Hmisc_5.1-2          tools_4.3.3         
##  [85] diffobj_0.3.5        data.table_1.15.4    mvtnorm_1.2-4       
##  [88] QuickJSR_1.1.3       colorspace_2.1-0     nlme_3.1-164        
##  [91] htmlTable_2.4.2      Formula_1.2-5        cli_3.6.2           
##  [94] textshaping_0.3.7    fansi_1.0.6          viridisLite_0.4.2   
##  [97] svglite_2.1.3        Brobdingnag_1.2-9    gtable_0.3.5        
## [100] sass_0.4.9           digest_0.6.35        farver_2.1.2        
## [103] htmlwidgets_1.6.4    htmltools_0.5.8.1    lifecycle_1.0.4     
## [106] gridtext_0.1.5       bit64_4.0.5

Cause and Burn in Development

David Rose, Siying Zhang, Shaun Nichols, Ellen Markman & Tobias Gerstenberg

2025-07-14

1 Load packages

2 Settings

3 Helper functions

4 EXPERIMENT 1: CHAINS

4.1 CAUSED VS. LEXICAL (BREAK & CRACK)

4.1.1 ADULTS

4.1.1.1 DATA

4.1.1.1.1 Read in data

4.1.1.1.2 Wrangle

4.1.1.1.3 Demographics

4.1.1.2 STATS

4.1.1.2.1 Bayesian Model

4.1.1.2.2 Follow-up analysis

4.1.1.2.3 Means and Confidence Interval

4.1.2 CHILDREN

4.1.2.1 DATA

4.1.2.1.1 Read in data

4.1.2.1.2 Wrangle

4.1.2.1.3 Demographics

4.1.2.2 STATS

4.1.2.2.1 Bayesian Model

4.1.2.2.2 Follow-up analysis

4.1.2.2.3 Means and Confidence Interval

4.2 MADE VS. LEXICAL (BREAK & CRACK)

4.2.1 ADULTS

4.2.1.1 DATA

4.2.1.1.1 Read in data

4.2.1.1.2 Wrangle

4.2.1.1.3 Demographics

4.2.1.2 STATS

4.2.1.2.1 Bayesian Model

4.2.1.2.2 Follow-up analysis

4.2.1.2.3 Means and Confidence Interval

4.2.2 CHILDREN

4.2.2.1 DATA

4.2.2.1.1 Read in data

4.2.2.1.2 Wrangle

4.2.2.1.3 Demographics

4.2.2.2 STATS

4.2.2.2.1 Bayesian Model

4.2.2.2.2 Follow-up analysis

4.2.2.2.3 Means and Confidence Interval

4.3 CAUSED VS. MADE (BREAK & CRACK)

4.3.1 ADULTS

4.3.1.1 DATA

4.3.1.1.1 Wrangle

4.3.1.2 STATS

4.3.1.2.1 Bayesian Model

4.3.1.2.2 Follow-up analysis

4.3.2 CHILDREN

4.3.2.1 DATA

4.3.2.1.1 Wrangle

4.3.2.2 STATS

4.3.2.2.1 Bayesian Model

4.3.2.2.2 Follow-up analysis

4.4 PLOTS

4.4.1 Responses Plots

4.4.1.1 Cause v Lexical

4.4.1.2 Made v Lexical

4.4.2 Responses Patterns by Age

4.4.2.0.1 Caused vs. Lexical

4.4.2.0.2 Made vs. Lexical

4.4.2.1 Combine Cause and Made Plots

5 EXPERIMENT 2: ABSENCES

5.1 CAUSED VS. LEXICAL (BURN & FLOOD)

5.1.1 ADULTS

5.1.1.1 DATA

5.1.1.1.1 Read in data

5.1.1.1.2 Wrangle

5.1.1.1.3 Demographics

5.1.1.2 STATS

5.1.1.2.1 Bayesian Model

5.1.1.2.2 Follow-up analysis

5.1.1.2.3 Means and Confidence Interval

5.1.2 CHILDREN

5.1.2.1 DATA

5.1.2.1.1 Read in data