1 Load packages

2 Settings

knitr::opts_chunk$set(echo = T, warning = F, message = F)

# Define a custom color palette
children_palette = brewer.pal(n = 9, name = "Greens")
adults_palette = "grey"
colors = c(children_palette, adults_palette)

# set theme 
theme_set(theme_classic())

# suppress grouping warning 
options(dplyr.summarise.inform = F)

3 Helper functions

3.1 Demographics for children

print_demographics = function(data) {
  # gender
  data %>%
    group_by(gender) %>%
    summarise(n = n_distinct(participant)) %>%
    print()
  
  # age
  print('age:')
  mean(data$age, na.rm = T) %>% print()
  sd(data$age, na.rm = T) %>% print()
  
  # language spoken
 data %>%
    mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
    group_by(en_occurrence) %>%
    summarise(n = n_distinct(participant)) %>%
    print()

}

3.2 Demographics for adults

print_demographics_adults = function(data) {
  # gender
  data %>%
    group_by(gender) %>%
    summarise(n = n()) %>%
    print()
  
  # age
  print('age:')
  mean(data$age, na.rm = T) %>% print()
  sd(data$age, na.rm = T) %>% print()

# race
  data %>%
    group_by(race) %>%
    summarise(n = n()) %>%
    print()
  
}

4 EXPERIMENT 1

4.1 CHILDREN

4.1.1 DATA

4.1.1.1 Read in Data

df.exp1_children = read_csv("../../data/experiment1/children/exp1_children.csv") %>%
  filter(row_number() >= 0 & row_number() <= 120) %>% 
  select(age, gender, lan_spoken, trial_1, trial_1_score, trial_2, trial_2_score, trial_3, trial_3_score, trial_4, trial_4_score, control_trial_score, total_score) %>%
  mutate(age_group = cut(age, breaks = c(3, 4, 5, 6, 7.1),
                         labels = c("3", "4", "5", "6"),
                         right = TRUE)) %>%
  # na.omit() %>%
  mutate(participant = 1:n(),
         total_score = as.numeric(total_score),
        across("trial_1"|"trial_2"|"trial_3"|"trial_4", ~case_when(
    grepl("(andy|emily)_correct_(left|right)_(andy|emily)", .) ~ "egg | basketball",
    grepl("(taylor|zoey)_correct_(left|right)_(taylor|zoey)", .) ~ "phone | rubberducky",
    grepl("(sara|leo)_correct_(left|right)_(sara|leo)", .) ~ "vase | toybear",
    grepl("(olivia|mason)_correct_(left|right)_(olivia|mason)", .) ~ "icecream | sunscreen",
    TRUE ~ .
  ))) %>%
  mutate(across(starts_with("trial"), as.character)) %>%
  pivot_longer(cols = c("trial_1", "trial_2", "trial_3", "trial_4"),
               names_to = "trial_order",
               values_to = "trial_name") %>%
  group_by(participant) %>%
  mutate(trial_score = case_when(
    trial_order == "trial_1" ~ trial_1_score,
    trial_order == "trial_2" ~ trial_2_score,
    trial_order == "trial_3" ~ trial_3_score,
    trial_order == "trial_4" ~ trial_4_score,
  )) %>%
  mutate(trial_score = as.numeric(trial_score)) %>%
  mutate(trial_order = ifelse(trial_order == "trial_1", "trial 1",
                               ifelse(trial_order == "trial_2", "trial 2",
                                      ifelse(trial_order == "trial_3", "trial 3",
                                             ifelse(trial_order == "trial_4", "trial 4", trial_order))))) %>%
  select(participant, gender, lan_spoken, age, age_group, trial_order, trial_name, trial_score, total_score, control_trial_score)

4.1.1.2 Demographics

print_demographics(df.exp1_children)

## # A tibble: 3 × 2
##   gender      n
##   <chr>   <int>
## 1 female     60
## 2 male       59
## 3 unknown     1
## [1] "age:"
## [1] 4.994
## [1] 1.128584
## # A tibble: 3 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en              114
## 2 non-en            5
## 3 <NA>              1

4.1.2 STATS

4.1.2.0.1 Bayesian Regression Models

4.1.2.0.1.1 Hypothesis 1

# Overall children will thank the individual who prevented the worse outcome from occurring at rates that exceed chance

fit.brm1 = brm(formula = trial_score ~ 1 + (1 | participant) + (1 | trial_name),
     data = df.exp1_children,
     family = "bernoulli", 
     cores = 4,
     file = "cache/brm1") 

fit.brm1 %>% 
  tidy(effects = c("fixed"),
       conf.int = TRUE,
       conf.level = 0.95,
       conf.method = "HPDinterval") %>% 
  mutate(across(.cols = c(estimate, conf.low, conf.high),
                .fns = ~ inv.logit(.))) %>% 
  select(estimate, contains("conf"))

## # A tibble: 1 × 3
##   estimate conf.low conf.high
##      <dbl>    <dbl>     <dbl>
## 1    0.731    0.652     0.802

4.1.2.0.1.2 Hypothesis 2

# The tendency to thank the individual who prevented the worse potential outcome from occurring will increase with age

fit.brm2 = brm(formula = trial_score ~ 1 + age + (1 | participant) + (1 | trial_name),
     data = df.exp1_children,
     family = "bernoulli", 
     cores = 4,
     file = "cache/brm2") 

fit.brm2 %>% 
  tidy(effects = c("fixed"),
       conf.int = TRUE,
       conf.level = 0.95,
       conf.method = "HPDinterval") %>% 
  filter(term == "age") %>%
  select(estimate, contains("conf"))

## # A tibble: 1 × 3
##   estimate conf.low conf.high
##      <dbl>    <dbl>     <dbl>
## 1    0.546    0.346     0.755

4.2 ADULTS

4.2.1 DATA

4.2.1.1 Read in data

df.exp1_adults = read_csv("../../data/experiment1/adults/exp1_adults.csv")

4.2.1.2 Wrangle

df.exp1_adults = df.exp1_adults %>% 
  mutate(participant = 1:30) %>% 
  relocate(participant) %>% 
  select(-workerid:-post_test_trial, -error) %>% 
   pivot_longer(cols = c("video_1", "video_2", "video_3", "video_4"
   ),
   names_to = "video_version",
   values_to = "video"
  ) %>% 
  pivot_longer(cols = c("video_1_correct_answer", "video_2_correct_answer", "video_3_correct_answer", "video_4_correct_answer"),
               names_to = "video_version_correct_answer",
               values_to = "correct_answer") %>% 
  pivot_longer(cols = c("video_1_response", "video_2_response", "video_3_response", "video_4_response"),
                names_to = "video_version_response",
   values_to = "response") %>% 
  mutate(video_version_correct_answer = str_remove_all(video_version_correct_answer, "_correct_answer"),
         video_version_response = str_remove_all(video_version_response, "_response")) %>% 
  filter(video_version == video_version_correct_answer & video_version_correct_answer == video_version_response) %>% 
  select(participant, scenario = video, correct_answer, response) %>% 
  mutate(rating = if_else(correct_answer == response, 1, 0))

4.2.1.3 Demogrpahics

# read in data
df.exp1_adults_demographics = read_csv("../../data/experiment1/adults/exp1_adults_demographics.csv")

# print demographics
print_demographics_adults(df.exp1_adults_demographics)

## # A tibble: 2 × 2
##   gender     n
##   <chr>  <int>
## 1 Female    21
## 2 Male       9
## [1] "age:"
## [1] 35.83333
## [1] 13.63839
## # A tibble: 5 × 2
##   race                                 n
##   <chr>                            <int>
## 1 Asian                                1
## 2 Black/African American               3
## 3 Multiracial                          4
## 4 Native Hawaiian/Pacific Islander     1
## 5 White                               21

4.2.2 STATS

4.2.2.1 Bayesian Regression Models

4.2.2.1.1 Hypothesis 1

# Overall adults will thank the individual who prevented the worse outcome from occurring at rates that exceed chance

# fit.brm1.adults = brm(formula = response ~ 1 + (1 | participant) + (1 | scenario),
#      data = df.exp1_adults,
#      family = "bernoulli",
#      cores = 4,
#      file = "cache/brm1.adults") 
# 
# fit.brm1.adults %>% 
#   tidy(effects = c("fixed"),
#        conf.int = TRUE,
#        conf.level = 0.95,
#        conf.method = "HPDinterval") %>% 
#   mutate(across(.cols = c(estimate, conf.low, conf.high),
#                 .fns = ~ inv.logit(.))) %>% 
#   select(estimate, contains("conf"))

4.2.3 PLOTS

4.2.3.1 Adults + children

selected_indices = seq(from = 1, to = 480, by = 4)  
selected_data = df.exp1_children[selected_indices, ]

# children total counterfactual
df.exp1.children.individual = selected_data %>%
    group_by(participant, age_group, age) %>% 
    summarize(pct_correct = mean(total_score) / 4,
              age_mean = mean(age)) %>% 
    mutate(age = as.numeric(age)) %>%
    ungroup() 


# means by age for children
df.exp1.children.means = selected_data %>%
  mutate(pct_correct = total_score / 4) %>% 
  group_by(age_group) %>%
  summarize(
    n = n(),
    age_mean = mean(age),
    response = Hmisc::smean.cl.boot(pct_correct)) %>%
    mutate(index = c("mean", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) 



# adult total counterfactual
df.exp1.adults.individual = df.exp1_adults %>%
  rename(trial_name = scenario) %>%
  group_by(participant) %>%
  summarize(
    total_score = sum(rating),
    pct_correct = mean(total_score) / 4) %>%
  ungroup() %>%
  mutate(age_group = "adults",
         age_mean = "8",
         age = "8") %>%
  select(-total_score)


# combine total counterfactual data for adults + children
df.combined.individual = rbind(df.exp1.children.individual, df.exp1.adults.individual)


# adult means
df.exp1.adults.means = df.exp1.adults.individual %>%
  group_by(age_group) %>%
  summarize(
    n = n(),
    response = Hmisc::smean.cl.boot(pct_correct)) %>%
    mutate(index = c("mean", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>%
    mutate(age_mean = as.numeric(8))


# combine means for adults + children
df.exp1.combinbed.means = rbind(df.exp1.children.means, df.exp1.adults.means)

# plot text showing n for adults + children
df.exp1.combined.text = df.exp1.combinbed.means %>% 
    select(age_group, age_mean, n) %>% 
    mutate(label = n,
           label = ifelse(age_group == "3", str_c("n = ", n), n),
           y = 0.95)

ggplot() + 
  geom_hline(yintercept = seq(0, 1, 0.25),
             linetype = "dotted",
             color = "black",
             alpha = 0.8) +
  geom_hline(yintercept = 0.5,
             linetype = "dashed",
             color = "black") +
  geom_point(data = df.exp1.children.individual,
             mapping = aes(x = age,
                           y = pct_correct,
                           fill = age,
                           color = age),
             shape = 21,
             size = 2,
             color = "grey30",
             alpha = .4) +
  geom_pointrange(data = df.exp1.combinbed.means %>% filter(age_group != "adults"),
                  mapping = aes(x = age_mean,
                                y = mean,
                                ymin = low,
                                ymax = high,
                                fill = age_mean),
                  shape = 21, 
                  color = "black", 
                  size = .7,
                  show.legend = FALSE) +
  geom_pointrange(data = df.exp1.combinbed.means %>% filter(age_group == "adults"),
                  mapping = aes(x = age_mean,
                                y = mean,
                                ymin = low,
                                ymax = high),
                  shape = 23, 
                  fill = "grey",
                  color = "black",
                  size = .7,
                  show.legend = FALSE) +
  geom_text(data = df.exp1.combined.text,
            mapping = aes(x = age_mean,
                          y = y,
                          label = label),
            size = 5,
            color = "black",
            vjust = -1.7) +  
  ggtitle("Who should receive the thank-you sticker?") +
  labs(x = "Age (in years)",
       y = "Probability of thanking the person\n who prevented a negative outcome") +
  coord_cartesian(ylim = c(0, 1)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = str_c(seq(0, 1, 0.25) * 100, "%"),
                     expand = expansion(add = c(0.05, 0.12))) + 
  scale_x_continuous(breaks = c(seq(3, 7, 1), 8),
                     labels = c(seq(3, 7, 1), "adult"), 
                     expand = expansion(add = c(0.2, 0.5))) +
  scale_fill_gradientn(colors = children_palette) + 
  scale_color_gradientn(colors = children_palette) + 
  theme(
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title = element_text(size = 11),
    plot.title = element_text(size = 12,
                              hjust = 0.5,
                              face = "bold",
                              margin = margin(b = 15)),
    legend.position = "none",
    legend.text = element_text(size = 11),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    strip.text.x = element_text(margin(b = 0, t = 7, l = 2, r = 3)),
    plot.margin = margin(t = 0.5,
                         l = 0.5,
                         r = 0.5,
                         b = 0.5,
                         unit = "cm"))

ggsave("../../figures/experiment1/exp1_results.pdf",  width = 6.5, height = 4)

5 EXPERIMENT 2

5.1 CHILDREN

5.1.1 DATA

5.1.1.1 Read in Data

df.exp2_children = read_csv("../../data/experiment2/children/exp2_children.csv") %>%
  filter(row_number() >= 0 & row_number() <= 120) %>% 
  select(age, gender, lan_spoken, trial_1, trial_1_score, trial_2, trial_2_score, trial_3, trial_3_score, trial_4, trial_4_score, trial_5, trial_5_score, trial_6, trial_6_score, total_score) %>%
  mutate(age_group = cut(age, breaks = c(3, 4, 5, 6, 7.1),
                         labels = c("3", "4", "5", "6"),
                         right = TRUE)) %>%
  na.omit() %>%
  mutate(participant = 1:n(),
         total_score = as.numeric(total_score),
        across("trial_1"|"trial_2"|"trial_3"|"trial_4"|"trial_5"|"trial_6", ~case_when(
    grepl("lightbulb_laundry_correct_(left|right)_(jessie|caleb)", .) ~ "lightbulb",
    grepl("apple_trashcan_correct_(left|right)_(lily|alex)", .) ~ "apple",
    grepl("basketball_egg_correct_(left|right)_(charlie|noah)", .) ~ "basketball",
    grepl("glass_beanbag_correct_(left|right)_(emily|lucas)", .) ~ "glass",
    grepl("rock_wine_correct_(left|right)_(amber|billy)", .) ~ "rock",
    grepl("bear_water_correct_(left|right)_(vicky|connor)", .) ~ "bear",
    TRUE ~ .
  ))) %>%
  mutate(across(starts_with("trial"), as.character)) %>%
  pivot_longer(cols = c("trial_1", "trial_2", "trial_3", "trial_4", "trial_5", "trial_6"),
               names_to = "trial_order",
               values_to = "trial_name") %>%
  group_by(participant) %>%
  mutate(trial_score = case_when(
    trial_order == "trial_1" ~ trial_1_score,
    trial_order == "trial_2" ~ trial_2_score,
    trial_order == "trial_3" ~ trial_3_score,
    trial_order == "trial_4" ~ trial_4_score,
    trial_order == "trial_5" ~ trial_5_score,
    trial_order == "trial_6" ~ trial_6_score,
  )) %>%
  mutate(trial_score = as.numeric(trial_score)) %>%
  mutate(trial_order = ifelse(trial_order == "trial_1", "trial 1",
                               ifelse(trial_order == "trial_2", "trial 2",
                                      ifelse(trial_order == "trial_3", "trial 3",
                                             ifelse(trial_order == "trial_4", "trial 4",
                                                    ifelse(trial_order == "trial_5", "trial 5",
                                                           ifelse(trial_order == "trial_6", "trial 6", trial_order))))))) %>%
  mutate(side = case_when(
    trial_name %in% c("basketball", "bear", "apple", "rock") ~ "side with things",
    trial_name %in% c("lightbulb", "glass") ~ "side with nothing",
    TRUE ~ "Uncategorized" 
  )) %>%
  mutate(prefer_things = case_when(
    side == "side with things" & trial_score == 1 ~ 1,
    side == "side with nothing" & trial_score == 0 ~ 1,
    TRUE ~ 0
  )) %>%
  select(participant, gender, lan_spoken, age, age_group, trial_order, trial_name, trial_score, side, prefer_things, total_score)

5.1.1.2 Demographics

print_demographics(df.exp2_children)

## # A tibble: 2 × 2
##   gender     n
##   <chr>  <int>
## 1 female    65
## 2 male      55
## [1] "age:"
## [1] 5.020583
## [1] 1.133005
## # A tibble: 2 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en              118
## 2 non-en            2

5.1.2 STATS

5.1.2.1 Bayesian Regression Models

5.1.2.1.1 Hypothesis 1

# Overall children will thank the individual who prevented the worse outcome from occurring at rates that exceed chance

fit.brm3 = brm(formula = trial_score ~ 1 + (1 | participant) + (1 | trial_name),
     data = df.exp2_children,
     family = "bernoulli", 
     cores = 4,
     file = "cache/brm3") 

fit.brm3 %>% 
  tidy(effects = c("fixed"),
       conf.int = TRUE,
       conf.level = 0.95,
       conf.method = "HPDinterval") %>% 
  mutate(across(.cols = c(estimate, conf.low, conf.high),
                .fns = ~ inv.logit(.))) %>% 
  select(estimate, contains("conf"))

## # A tibble: 1 × 3
##   estimate conf.low conf.high
##      <dbl>    <dbl>     <dbl>
## 1    0.593    0.515     0.669

5.1.2.1.2 Hypothesis 2

# The tendency to thank the individual who prevented the worse potential outcome from occurring will increase with age

fit.brm4 = brm(formula = trial_score ~ 1 + age + (1 | participant) + (1 | trial_name),
     data = df.exp2_children,
     family = "bernoulli", 
     cores = 4,
     file = "cache/brm4") 

fit.brm4 %>% 
  tidy(effects = c("fixed"),
       conf.int = TRUE,
       conf.level = 0.95,
       conf.method = "HPDinterval") %>% 
  filter(term == "age") %>%
  select(estimate, contains("conf"))

## # A tibble: 1 × 3
##   estimate conf.low conf.high
##      <dbl>    <dbl>     <dbl>
## 1    0.259   0.0815     0.428

5.2 ADULTS

5.2.1 DATA

5.2.1.1 Read in Data

# import raw data
df.exp2_adults = read_csv("../../data/experiment2/adults/exp2_adults.csv")

5.2.1.2 Wrangle

df.exp2_adults = df.exp2_adults %>% 
  mutate(participant = 1:n()) %>% 
  relocate(participant) %>% 
  select(-workerid, -error) %>% 
   pivot_longer(cols = c("video_1", "video_2", "video_3", "video_4", "video_5", "video_6"
   ),
   names_to = "video_version",
   values_to = "video"
  ) %>% 
  pivot_longer(cols = c("video_1_correct_answer", "video_2_correct_answer", "video_3_correct_answer", "video_4_correct_answer", "video_5_correct_answer", "video_6_correct_answer"),
               names_to = "video_version_correct_answer",
               values_to = "correct_answer") %>% 
  pivot_longer(cols = c("video_1_response", "video_2_response", "video_3_response", "video_4_response", "video_5_response", "video_6_response"),
                names_to = "video_version_response",
   values_to = "response") %>% 
  mutate(video_version_correct_answer = str_remove_all(video_version_correct_answer, "_correct_answer"),
         video_version_response = str_remove_all(video_version_response, "_response")) %>% 
  filter(video_version == video_version_correct_answer & video_version_correct_answer == video_version_response) %>% 
  mutate(age = "Adult") %>%
  select(age, participant, scenario = video, correct_answer, response) %>% 
  mutate(rating = if_else(correct_answer == response, 1, 0)) %>%
  rename(trial_response = response,
         response = rating)

5.2.1.3 Demographics

df.exp2_adults_demographics = read_csv("../../data/experiment2/adults/exp2_adults_demographics.csv") 
 

print_demographics_adults(df.exp2_adults_demographics)

## # A tibble: 3 × 2
##   gender     n
##   <chr>  <int>
## 1 Female    19
## 2 Male      11
## 3 <NA>       1
## [1] "age:"
## [1] 39.12903
## [1] 14.07774
## # A tibble: 5 × 2
##   race                       n
##   <chr>                  <int>
## 1 Asian                      1
## 2 Black/African American     7
## 3 Multiracial                2
## 4 White                     20
## 5 <NA>                       1

5.2.2 STATS

5.2.2.1 Bayesian Regression Models

5.2.2.1.1 Hypothesis 1

# Overall adults will thank the individual who prevented the worse outcome from occurring at rates that exceed chance

fit.brm3.adults = brm(formula = response ~ 1 + (1 | participant) + (1 | scenario),
     data = df.exp2_adults ,
     family = "bernoulli",
     cores = 4,
     file = "cache/brm3.adults") 

fit.brm3.adults %>% 
  tidy(effects = c("fixed"),
       conf.int = TRUE,
       conf.level = 0.95,
       conf.method = "HPDinterval") %>% 
  mutate(across(.cols = c(estimate, conf.low, conf.high),
                .fns = ~ inv.logit(.))) %>% 
  select(estimate, contains("conf"))

## # A tibble: 1 × 3
##   estimate conf.low conf.high
##      <dbl>    <dbl>     <dbl>
## 1    0.920    0.820     0.969

5.3 PLOTS

5.3.1 Adults + children

selected_indices = seq(from = 1, to = 720, by = 6)  
df.exp2_children_selected = df.exp2_children[selected_indices, ]

df.exp2.children.individual = df.exp2_children_selected %>%
    group_by(participant, age_group, age) %>% 
    summarize(pct_correct = mean(total_score) / 6,
              age_mean = mean(age)) %>% 
    mutate(age = as.numeric(age)) %>%
    ungroup() 


df.exp2.adults.individual = df.exp2_adults %>%
  rename(trial_name = scenario) %>%
  group_by(participant) %>%
  summarize(
    total_score = sum(response),
    pct_correct = mean(total_score) / 6) %>%
  ungroup() %>%
  mutate(age_group = "adults",
         age_mean = "8",
         age = "8") %>%
  select(-total_score)


df.exp2.combined.indivudual = rbind(df.exp2.children.individual, df.exp2.adults.individual)


df.exp2.children.means = df.exp2_children_selected %>%
  mutate(pct_correct = total_score / 6) %>% 
  group_by(age_group) %>%
  summarize(
    n = n(),
    age_mean = mean(age),
    response = Hmisc::smean.cl.boot(pct_correct)) %>%
    mutate(index = c("mean", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) 


df.exp2.adults.means = df.exp2.adults.individual %>%
  group_by(age_group) %>%
  summarize(
    n = n(),
    response = Hmisc::smean.cl.boot(pct_correct)) %>%
    mutate(index = c("mean", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>%
    mutate(age_mean = as.numeric(8))




df.exp2.combined.means = rbind(df.exp2.children.means, df.exp2.adults.means)


df.exp2.combined.text = df.exp2.combined.means %>% 
    select(age_group, age_mean, n) %>% 
    mutate(label = n,
           label = ifelse(age_group == "3", str_c("n = ", n), n),
           y = 0.95)


ggplot() + 
  geom_hline(yintercept = seq(0, 1, 0.25),
             linetype = "dotted",
             color = "black",
             alpha = 0.8) +
  geom_hline(yintercept = 0.5,
             linetype = "dashed",
             color = "black") +
  geom_point(data = df.exp2.children.individual,
             mapping = aes(x = age,
                           y = pct_correct,
                           fill = age,
                           color = age),
             shape = 21,
             size = 2,
             color = "grey30",
             alpha = .4) +
  geom_pointrange(data = df.exp2.combined.means %>% filter(age_group != "adults"),
                  mapping = aes(x = age_mean,
                                y = mean,
                                ymin = low,
                                ymax = high,
                                fill = age_mean),
                  shape = 21, 
                  color = "black", 
                  size = .7,
                  show.legend = FALSE) +
  geom_pointrange(data = df.exp2.combined.means %>% filter(age_group == "adults"),
                  mapping = aes(x = age_mean,
                                y = mean,
                                ymin = low,
                                ymax = high),
                  shape = 23, 
                  fill = "grey",
                  color = "black",
                  size = .7,
                  show.legend = FALSE) +
  geom_text(data = df.exp2.combined.text,
            mapping = aes(x = age_mean,
                          y = y,
                          label = label),
            size = 5,
            color = "black",
            vjust = -1.7) +  
  ggtitle("Who should receive the thank-you sticker?") +
  labs(x = "Age (in years)",
       y = "Probability of thanking the person\n who prevented a negative outcome") +
  coord_cartesian(ylim = c(0, 1)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = str_c(seq(0, 1, 0.25) * 100, "%"),
                     expand = expansion(add = c(0.05, 0.12))) + 
  scale_x_continuous(breaks = c(seq(3, 7, 1), 8),
                     labels = c(seq(3, 7, 1), "adult"), 
                     expand = expansion(add = c(0.2, 0.5))) +
  scale_fill_gradientn(colors = children_palette) + 
  scale_color_gradientn(colors = children_palette) + 
  theme(
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title = element_text(size = 11),
    plot.title = element_text(size = 12,
                              hjust = 0.5,
                              face = "bold",
                              margin = margin(b = 15)),
    legend.position = "none",
    legend.text = element_text(size = 11),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    strip.text.x = element_text(margin(b = 0, t = 7, l = 2, r = 3)),
    plot.margin = margin(t = 0.5,
                         l = 0.5,
                         r = 0.5,
                         b = 0.5,
                         unit = "cm"))

ggsave("../../figures/experiment2/exp2_results.pdf",  width = 6.5, height = 4)

6 EXPERIMENT 3

6.1 CHILDREN

6.1.1 DATA

6.1.1.1 Read in Data

df.exp3.children = read_csv("../../data/experiment3/children/exp3_children.csv")%>%
  filter(row_number() >= 0 & row_number() <= 240) %>% 
  select(age, gender, lan_spoken, trial_1, trial_1_score, trial_2, trial_2_score, trial_3, trial_3_score, trial_4, trial_4_score, trial_5, trial_5_score, trial_6, trial_6_score, total_score) %>%
  mutate(age_group = cut(age, breaks = c(3, 4, 5, 6, 7, 8, 9, 10, 11.1),
                         labels = c("3", "4", "5", "6", "7", "8", "9", "10"),
                         right = TRUE)) %>%
  na.omit() %>%
  mutate(participant = 1:n(),
         total_score = as.numeric(total_score),
        across("trial_1"|"trial_2"|"trial_3"|"trial_4"|"trial_5"|"trial_6", ~case_when(
    grepl("exp7_bulb_laundry_correct_(left|right)_(jessie|caleb)", .) ~ "lightbulb",
    grepl("exp7_apple_trashcan_correct_(left|right)_(lily|alex)", .) ~ "apple",
    grepl("exp7_basketball_egg_correct_(left|right)_(charlie|noah)", .) ~ "basketball",
    grepl("exp7_glass_beanbag_correct_(left|right)_(emily|lucas)", .) ~ "glass",
    grepl("exp7_rock_wine_correct_(left|right)_(amber|billy)", .) ~ "rock",
    grepl("exp7_bear_water_correct_(left|right)_(vicky|connor)", .) ~ "bear",
    TRUE ~ .
  ))) %>%
  mutate(across(starts_with("trial"), as.character)) %>%
  pivot_longer(cols = c("trial_1", "trial_2", "trial_3", "trial_4", "trial_5", "trial_6"),
               names_to = "trial_order",
               values_to = "trial_name") %>%
  group_by(participant) %>%
  mutate(trial_score = case_when(
    trial_order == "trial_1" ~ trial_1_score,
    trial_order == "trial_2" ~ trial_2_score,
    trial_order == "trial_3" ~ trial_3_score,
    trial_order == "trial_4" ~ trial_4_score,
    trial_order == "trial_5" ~ trial_5_score,
    trial_order == "trial_6" ~ trial_6_score,
  )) %>%
  mutate(trial_score = as.numeric(trial_score)) %>%
  mutate(trial_order = ifelse(trial_order == "trial_1", "trial 1",
                               ifelse(trial_order == "trial_2", "trial 2",
                                      ifelse(trial_order == "trial_3", "trial 3",
                                             ifelse(trial_order == "trial_4", "trial 4",
                                                    ifelse(trial_order == "trial_5", "trial 5",
                                                           ifelse(trial_order == "trial_6", "trial 6", trial_order))))))) %>%
  mutate(side = case_when(
    trial_name %in% c("basketball", "bear", "apple", "rock") ~ "side with things",
    trial_name %in% c("lightbulb", "glass") ~ "side with nothing",
    TRUE ~ "Uncategorized" 
  )) %>%
  mutate(prefer_things = case_when(
    side == "side with things" & trial_score == 1 ~ 1,
    side == "side with nothing" & trial_score == 0 ~ 1,
    TRUE ~ 0
  )) %>%
  select(participant, gender, lan_spoken, age, age_group, trial_order, trial_name, trial_score, side, prefer_things, total_score)

6.2 Demographics

print_demographics(df.exp3.children)

## # A tibble: 3 × 2
##   gender     n
##   <chr>  <int>
## 1 female   137
## 2 male     102
## 3 na         1
## [1] "age:"
## [1] 6.995583
## [1] 2.294574
## # A tibble: 2 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en              235
## 2 non-en            5

7 STATS

7.1 Bayesian Regression Models

7.1.1 Hypothesis 1

# Overall children will thank the individual who prevented the worse outcome from occurring at rates that exceed chance

fit.brm5.children = brm(formula = trial_score ~ 1 + (1 | participant) + (1 | trial_name),
     data = df.exp3.children,
     family = "bernoulli", 
     cores = 4,
     file = "cache/brm5_children") 

fit.brm5.children %>% 
  tidy(effects = c("fixed"),
       conf.int = TRUE,
       conf.level = 0.95,
       conf.method = "HPDinterval") %>% 
  mutate(across(.cols = c(estimate, conf.low, conf.high),
                .fns = ~ inv.logit(.))) %>% 
  select(estimate, contains("conf"))

## # A tibble: 1 × 3
##   estimate conf.low conf.high
##      <dbl>    <dbl>     <dbl>
## 1    0.767    0.718     0.808

7.1.2 Hypothesis 2

# The tendency to thank the individual who prevented the worse potential outcome from occurring will increase with age

fit.brm5.children.age = brm(formula = trial_score ~ 1 + age + (1 | participant) + (1 | trial_name),
     data = df.exp3.children,
     family = "bernoulli", 
     cores = 4,
     file = "cache/brm5_children_age") 

fit.brm5.children.age %>% 
  tidy(effects = c("fixed"),
       conf.int = TRUE,
       conf.level = 0.95,
       conf.method = "HPDinterval") %>% 
  filter(term == "age") %>%
  select(estimate, contains("conf"))

## # A tibble: 1 × 3
##   estimate conf.low conf.high
##      <dbl>    <dbl>     <dbl>
## 1    0.401    0.313     0.498

7.2 ADULTS

7.2.1 DATA

7.2.1.1 Read in Data

setwd("../../data/experiment3/adults")

file_names = list.files(pattern="*.csv")
file_names = file_names[!file_names %in% c("m6f0atxjkr_same_participant_as_lhjb5uy11r.csv")]
all_data = do.call(rbind, lapply(file_names, read.csv)) %>%
  distinct()

write.csv(all_data, "exp3_adult.csv", row.names = FALSE)

df.exp3.adults = read_csv("exp3_adult.csv")

7.2.1.2 Wrangle

df.exp3.adults = df.exp3.adults %>% 
  mutate(participant = 1:n()) %>% 
  relocate(participant) %>% 
   pivot_longer(cols = c("video_1", "video_2", "video_3", "video_4", "video_5", "video_6"
   ),
   names_to = "video_version",
   values_to = "video"
  ) %>% 
  pivot_longer(cols = c("video_1_correct_answer", "video_2_correct_answer", "video_3_correct_answer", "video_4_correct_answer", "video_5_correct_answer", "video_6_correct_answer"),
               names_to = "video_version_correct_answer",
               values_to = "correct_answer") %>% 
  pivot_longer(cols = c("video_1_response", "video_2_response", "video_3_response", "video_4_response", "video_5_response", "video_6_response"),
                names_to = "video_version_response",
   values_to = "response") %>% 
  mutate(video_version_correct_answer = str_remove_all(video_version_correct_answer, "_correct_answer"),
         video_version_response = str_remove_all(video_version_response, "_response")) %>% 
  filter(video_version == video_version_correct_answer & video_version_correct_answer == video_version_response) %>% 
  mutate(age = "Adult") %>%
  select(age, participant, scenario = video, correct_answer, response, feedback) %>% 
  mutate(rating = if_else(correct_answer == response, 1, 0)) %>%
  rename(trial_response = response,
         response = rating)

7.2.1.3 Demographics

# read in data
df.exp3.adults_demographics = read_csv("../../data/experiment3/adults/exp3_adult.csv")

# print demographics
print_demographics_adults(df.exp3.adults_demographics)

## # A tibble: 3 × 2
##   gender         n
##   <chr>      <int>
## 1 Female        20
## 2 Male           9
## 3 Non-binary     1
## [1] "age:"
## [1] 39.96667
## [1] 12.39712
## # A tibble: 4 × 2
##   race                       n
##   <chr>                  <int>
## 1 Asian                      1
## 2 Black/African American     5
## 3 White                     23
## 4 other_race                 1

7.2.2 STATS

7.2.2.1 Bayesian Regression Models

7.2.2.1.1 Hypothesis 1

# Overall adults will thank the individual who prevented the worse outcome from occurring at rates that exceed chance

fit.brm5.adults = brm(formula = response ~ 1 + (1 | participant) + (1 | scenario),
     data = df.exp3.adults,
     family = "bernoulli",
     cores = 4,
     file = "cache/brm5.adults") 

fit.brm5.adults %>% 
  tidy(effects = c("fixed"),
       conf.int = TRUE,
       conf.level = 0.95,
       conf.method = "HPDinterval") %>% 
  mutate(across(.cols = c(estimate, conf.low, conf.high),
                .fns = ~ inv.logit(.))) %>% 
  select(estimate, contains("conf"))

## # A tibble: 1 × 3
##   estimate conf.low conf.high
##      <dbl>    <dbl>     <dbl>
## 1    0.942    0.670     0.993

7.3 PLOTS

7.3.1 Adults + children

selected_indices = seq(from = 1, to = 1440, by = 6)  
df.exp3_children_selected = df.exp3.children[selected_indices, ]

df.exp3.children.individual = df.exp3_children_selected %>%
    group_by(participant, age_group, age) %>% 
    summarize(pct_correct = mean(total_score) / 6,
              age_mean = mean(age)) %>% 
    mutate(age = as.numeric(age)) %>%
    ungroup() 

df.exp3.adults.individual = df.exp3.adults %>%
  rename(trial_name = scenario) %>%
  group_by(participant) %>%
  summarize(
    total_score = sum(response),
    pct_correct = mean(total_score) / 6) %>%
  ungroup() %>%
  mutate(age_group = "adults",
         age_mean = "12",
         age = "12") %>%
  select(-total_score)




df.exp3.combined.individual = rbind(df.exp3.children.individual, df.exp3.adults.individual)


df.exp3.children.means = df.exp3_children_selected %>%
  mutate(pct_correct = total_score / 6) %>% 
  group_by(age_group) %>%
  summarize(
    n = n(),
    age_mean = mean(age),
    response = Hmisc::smean.cl.boot(pct_correct)) %>%
    mutate(index = c("mean", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) 


df.exp3.adults.means = df.exp3.adults.individual %>%
  group_by(age_group) %>%
  summarize(
    n = n(),
    response = Hmisc::smean.cl.boot(pct_correct)) %>%
    mutate(index = c("mean", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>%
    mutate(age_mean = as.numeric(12))




df.exp3.combined.means = rbind(df.exp3.children.means, df.exp3.adults.means)


df.exp3.combined.text = df.exp3.combined.means %>% 
    select(age_group, age_mean, n) %>% 
    mutate(label = n,
           label = ifelse(age_group == "3", str_c("n = ", n), n),
           y = 0.95)

ggplot() + 
  geom_hline(yintercept = seq(0, 1, 0.25),
             linetype = "dotted",
             color = "black",
             alpha = 0.8) +
  geom_hline(yintercept = 0.5,
             linetype = "dashed",
             color = "black") +
  geom_point(data = df.exp3.children.individual,
             mapping = aes(x = age,
                           y = pct_correct,
                           fill = age,
                           color = age),
             shape = 21,
             size = 2,
             color = "grey30",
             alpha = .4) +
  geom_pointrange(data = df.exp3.combined.means %>% filter(age_group != "adults"),
                  mapping = aes(x = age_mean,
                                y = mean,
                                ymin = low,
                                ymax = high,
                                fill = age_mean),
                  shape = 21, 
                  color = "black", 
                  size = .7,
                  show.legend = FALSE) +
  geom_pointrange(data = df.exp3.combined.means %>% filter(age_group == "adults"),
                  mapping = aes(x = age_mean,
                                y = mean,
                                ymin = low,
                                ymax = high),
                  shape = 23, 
                  fill = "grey",
                  color = "black",
                  size = .7,
                  show.legend = FALSE) +
  geom_text(data = df.exp3.combined.text,
            mapping = aes(x = age_mean,
                          y = y,
                          label = label),
            size = 5,
            color = "black",
            vjust = -1.7) +  
  ggtitle("Who should receive the thank-you sticker?") +
  labs(x = "Age (in years)",
       y = "Probability of thanking the person\n who prevented a negative outcome") +
  coord_cartesian(ylim = c(0, 1)) +
  scale_y_continuous(breaks = seq(0, 1, 0.25),
                     labels = str_c(seq(0, 1, 0.25) * 100, "%"),
                     expand = expansion(add = c(0.05, 0.12))) + 
  scale_x_continuous(breaks = c(seq(3, 11, 1), 12),
                     labels = c(seq(3, 11, 1), "adult"), 
                     expand = expansion(add = c(0.2, 0.5))) +
  scale_fill_gradientn(colors = children_palette) + 
  scale_color_gradientn(colors = children_palette) + 
  theme(
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title = element_text(size = 11),
    plot.title = element_text(size = 12,
                              hjust = 0.5,
                              face = "bold",
                              margin = margin(b = 15)),
    legend.position = "none",
    legend.text = element_text(size = 11),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    strip.text.x = element_text(margin(b = 0, t = 7, l = 2, r = 3)),
    plot.margin = margin(t = 0.5,
                         l = 0.5,
                         r = 0.5,
                         b = 0.5,
                         unit = "cm"))

ggsave("../../figures/experiment3/exp3_results.pdf",  width = 8.5, height = 4)

8 SESSION INFO

sessionInfo()

## R version 4.3.3 (2024-02-29)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.9.3     forcats_1.0.0       stringr_1.5.1      
##  [4] dplyr_1.1.4         purrr_1.0.2         readr_2.1.5        
##  [7] tidyr_1.3.1         tibble_3.2.1        ggplot2_3.5.1      
## [10] tidyverse_2.0.0     RColorBrewer_1.1-3  DT_0.33            
## [13] boot_1.3-29         broom.mixed_0.2.9.5 tinytable_0.2.1    
## [16] brms_2.21.0         Rcpp_1.0.12         knitr_1.45         
## [19] emmeans_1.10.1      xtable_1.8-4       
## 
## loaded via a namespace (and not attached):
##  [1] gridExtra_2.3        inline_0.3.19        rlang_1.1.3         
##  [4] magrittr_2.0.3       furrr_0.3.1          matrixStats_1.2.0   
##  [7] compiler_4.3.3       loo_2.7.0            systemfonts_1.0.6   
## [10] vctrs_0.6.5          pkgconfig_2.0.3      crayon_1.5.2        
## [13] fastmap_1.1.1        backports_1.4.1      labeling_0.4.3      
## [16] utf8_1.2.4           rmarkdown_2.26       tzdb_0.4.0          
## [19] ragg_1.3.0           bit_4.0.5            xfun_0.43           
## [22] cachem_1.0.8         jsonlite_1.8.8       highr_0.10          
## [25] broom_1.0.5          parallel_4.3.3       cluster_2.1.6       
## [28] R6_2.5.1             bslib_0.7.0          stringi_1.8.4       
## [31] StanHeaders_2.32.6   parallelly_1.37.1    rpart_4.1.23        
## [34] jquerylib_0.1.4      estimability_1.5     bookdown_0.38       
## [37] rstan_2.32.6         base64enc_0.1-3      bayesplot_1.11.1    
## [40] Matrix_1.6-5         splines_4.3.3        nnet_7.3-19         
## [43] timechange_0.3.0     tidyselect_1.2.1     rstudioapi_0.16.0   
## [46] abind_1.4-5          yaml_2.3.8           codetools_0.2-19    
## [49] listenv_0.9.1        pkgbuild_1.4.4       lattice_0.22-5      
## [52] withr_3.0.0          bridgesampling_1.1-2 posterior_1.5.0     
## [55] coda_0.19-4.1        evaluate_0.23        foreign_0.8-86      
## [58] future_1.33.2        RcppParallel_5.1.7   pillar_1.9.0        
## [61] tensorA_0.36.2.1     checkmate_2.3.1      stats4_4.3.3        
## [64] distributional_0.4.0 generics_0.1.3       vroom_1.6.5         
## [67] hms_1.1.3            rstantools_2.4.0     munsell_0.5.1       
## [70] scales_1.3.0         globals_0.16.3       glue_1.7.0          
## [73] Hmisc_5.1-2          tools_4.3.3          data.table_1.15.4   
## [76] mvtnorm_1.2-4        grid_4.3.3           QuickJSR_1.1.3      
## [79] colorspace_2.1-0     nlme_3.1-164         htmlTable_2.4.2     
## [82] Formula_1.2-5        cli_3.6.2            textshaping_0.3.7   
## [85] fansi_1.0.6          Brobdingnag_1.2-9    gtable_0.3.5        
## [88] sass_0.4.9           digest_0.6.35        htmlwidgets_1.6.4   
## [91] farver_2.1.2         htmltools_0.5.8.1    lifecycle_1.0.4     
## [94] bit64_4.0.5

Counterfactual thought without counterfactual langauge

David Rose, Siying Zhang, Hyowon Gweon, Tobias Gerstenberg

March 07, 2025

1 Load packages

2 Settings

3 Helper functions

3.1 Demographics for children

3.2 Demographics for adults

4 EXPERIMENT 1

4.1 CHILDREN

4.1.1 DATA

4.1.1.1 Read in Data

4.1.1.2 Demographics

4.1.2 STATS

4.1.2.0.1 Bayesian Regression Models

4.1.2.0.1.1 Hypothesis 1

4.1.2.0.1.2 Hypothesis 2

4.2 ADULTS

4.2.1 DATA

4.2.1.1 Read in data

4.2.1.2 Wrangle

4.2.1.3 Demogrpahics

4.2.2 STATS

4.2.2.1 Bayesian Regression Models

4.2.2.1.1 Hypothesis 1

4.2.3 PLOTS

4.2.3.1 Adults + children

5 EXPERIMENT 2

5.1 CHILDREN

5.1.1 DATA

5.1.1.1 Read in Data

5.1.1.2 Demographics

5.1.2 STATS

5.1.2.1 Bayesian Regression Models

5.1.2.1.1 Hypothesis 1

5.1.2.1.2 Hypothesis 2

5.2 ADULTS

5.2.1 DATA

5.2.1.1 Read in Data

5.2.1.2 Wrangle

5.2.1.3 Demographics

5.2.2 STATS

5.2.2.1 Bayesian Regression Models

5.2.2.1.1 Hypothesis 1

5.3 PLOTS

5.3.1 Adults + children

6 EXPERIMENT 3

6.1 CHILDREN

6.1.1 DATA

6.1.1.1 Read in Data

6.2 Demographics

7 STATS

7.1 Bayesian Regression Models

7.1.1 Hypothesis 1

7.1.2 Hypothesis 2

7.2 ADULTS

7.2.1 DATA

7.2.1.1 Read in Data

7.2.1.2 Wrangle

7.2.1.3 Demographics

7.2.2 STATS

7.2.2.1 Bayesian Regression Models

7.2.2.1.1 Hypothesis 1

7.3 PLOTS

7.3.1 Adults + children

8 SESSION INFO