Clean

Modified

September 17, 2024

About

This page describes how the aggregate home visit survey files are cleaned, measure by measure.

Setup

suppressPackageStartupMessages(library(tidyverse))

Load data file

# if (!('home_visit_df' %in% ls())) {
#   targets::tar_load(home_visit_df, store="../_targets")
# }
home_visit_df <- readr::read_csv(
  file.path(
    here::here(),
    "data/csv/home_visit/agg",
    "PLAY-non-mcdi-raw-latest.csv"
  ),
  col_types = cols(.default = "c"),
  show_col_types = FALSE
)

Remove metadata variables

metadata_cols <- stringr::str_detect(names(home_visit_df), "note|instructions|acknowledge")

no_metadata_cols_index <- seq_along(home_visit_df)[!metadata_cols]

home_visit_df_trim <- home_visit_df |>
  dplyr::select(all_of(no_metadata_cols_index)) |>
  dplyr::select(-start,
                -end)

The untrimmed data frame had 272 variables. The trimmed data frame has 216 variables.

Change variable names

We change some variable names to conform with later stages of processing when we merge the KoBoToolbox data with those from the Databrary session spreadsheet.

home_visit_df_trim <- home_visit_df_trim |>
  dplyr::rename(participant_guid = participant_id,
                participant_ID = subject_number)

Language variables

The raw file contains separate variables indicating whether the child is exposed to English and Spanish at home. We leave those for now.

Locomotion variables

loco_admin_cols <- names(home_visit_df_trim) %in% c(
  "locomotor_milestones.date_format",
  "locomotor_milestones.calc_5mo",
  "locomotor_milestones.calc_6mo",
  "locomotor_milestones.calc_7mo",
  "locomotor_milestones.calc_8mo",
  "locomotor_milestones.calc_9mo",
  "locomotor_milestones.calc_10mo",
  "locomotor_milestones.calc_11mo",
  "locomotor_milestones.calc_12mo",
  "locomotor_milestones.calc_13mo",
  "locomotor_milestones.calc_14mo",
  "locomotor_milestones.calc_15mo",
  "locomotor_milestones.calc_16mo",
  "locomotor_milestones.who_walk.walk_onset_check",
  "locomotor_milestones.holiday1.holiday_jan",
  "locomotor_milestones.holiday1.holiday_feb",
  "locomotor_milestones.holiday1.holiday_marchapril",
  "locomotor_milestones.holiday1.holiday_may",
  "locomotor_milestones.holiday1.holiday_june",
  "locomotor_milestones.holiday1.holiday_july",
  "locomotor_milestones.holiday1.holiday_sep",
  "locomotor_milestones.holiday1.holiday_oct",
  "locomotor_milestones.holiday1.holiday_nov",
  "locomotor_milestones.holiday1.holiday_dec",
  "locomotor_milestones.holiday1.memorable_events",
  "locomotor_milestones.k_walk.k_walk_onset_check",
  "locomotor_milestones.crawl_onset.crawl_onset_check",
  "locomotor_milestones.holiday2.holiday_jan2",
  "locomotor_milestones.holiday2.holiday_feb2",
  "locomotor_milestones.holiday2.holiday_marchapril2",
  "locomotor_milestones.holiday2.holiday_may2",
  "locomotor_milestones.holiday2.holiday_june2",
  "locomotor_milestones.holiday2.holiday_july2",
  "locomotor_milestones.holiday2.holiday_sep2",
  "locomotor_milestones.holiday2.holiday_oct2",
  "locomotor_milestones.holiday2.holiday_nov2",
  "locomotor_milestones.holiday2.holiday_dec2",
  "locomotor_milestones.holiday2.memorable_events2"
)

loco_admin_index <- seq_along(home_visit_df_trim)[!loco_admin_cols]

home_visit_df_loco <- home_visit_df_trim |>
  dplyr::select(all_of(loco_admin_index)) |>
  dplyr::rename(
    walk_mos_who = locomotor_milestones.who_walk.who_walk_onset_mo,
    walk_mos_kea = locomotor_milestones.k_walk.k_walk_onset_mo,
    crawl_mos = locomotor_milestones.crawl_onset.crawl_onset_mo,
    walk_onset_date_kea = locomotor_milestones.k_walk.k_walk_onset_date,
    walk_onset_date_who = locomotor_milestones.who_walk.who_walk_onset_date,
    walk_onset_comments_who = locomotor_milestones.who_walk.comments_who_walk_onset,
    walk_onset_comments_kea = locomotor_milestones.k_walk.comments_k_walk_onset,
    crawl_onset_date = locomotor_milestones.crawl_onset.crawl_onset_date,
    crawl_onset_comments = locomotor_milestones.crawl_onset.comments_crawl_onset
  ) |>
  dplyr::mutate(
    walk_mos_who = as.numeric(walk_mos_who),
    walk_mos_kea = as.numeric(walk_mos_kea),
    crawl_mos = as.numeric(crawl_mos)
  )

Health variables

health_admin_cols <- names(home_visit_df_loco) %in% c('instructions',
                                                      'note',
                                                      'doctor_told_you')

health_admin_index <- seq_along(home_visit_df_loco)[!health_admin_cols]

home_visit_df_health <- home_visit_df_loco |>
   dplyr::select(all_of(health_admin_index)) |>
  dplyr::rename(
    feeding_breastfeed = health.feeding_nutrition.breastfeed,
    feeding_solidfood_age = health.feeding_nutrition.solidfood_age,
    feeding_comments = health.feeding_nutrition.comments_feeding,
    child_sleeping_position = health.general_health.child_sleeping_position,
    child_health = health.general_health.child_health,
    child_recent_vaccination = health.general_health.child_vaccination,
    child_medical_specialist = health.general_health.child_medical_specialist,
    child_medical_specialist_comments = health.general_health.comments_child_medical_special,
    child_hearing_tested = health.general_health.child_hearing_tested,
    child_vision_tested = health.general_health.child_vision_tested,
    child_allergies = health.general_health.child_allergies_infections_ill.allergies,
    child_ear_infection = health.general_health.child_allergies_infections_ill.ear_infection,
    child_asthma = health.general_health.child_allergies_infections_ill.asthma,
    child_respiratory = health.general_health.child_allergies_infections_ill.respiratory,
    child_gastrointestinal = health.general_health.child_allergies_infections_ill.gastrointestinal,
    child_allergy_illness_comments = health.general_health.comments_allergy_etc,
    child_injury_times = health.general_health.child_injury_times,
    child_injury_comments = health.general_health.comment_injury,
    child_health_comments = health.general_health.comments_general_health,
    mom_prenatal_care = health.prenatal.prenatal_care,
    mom_prenatal_care_comments = health.prenatal.comments_prenatal,
    mom_pregnant_smoking = health.smoking.pregnant_smoking,
    mom_pregnant_smoking_trimester_1 = health.smoking.smoking_trimester_1,
    mom_pregnant_smoking_trimester_2 = health.smoking.smoking_trimester_2,
    mom_pregnant_smoking_trimester_3 = health.smoking.smoking_trimester_3,
    mom_smoking_now = health.smoking.mom_smoking_now,
    mom_smoking_now_amount = health.smoking.mom_smoking_now_amount,
    smoking_house = health.smoking.smoking_house,
    smoking_car = health.smoking.smoking_car,
    smoking_comments = health.smoking.comments_smoking,
    mom_pregnant_drinking = health.drinking.pregnant_drinking,
    mom_drinking_trimester_1 = health.drinking.drinking_trimester_1,
    mom_drinking_trimester_2 = health.drinking.drinking_trimester_2,
    mom_drinking_trimester_3 = health.drinking.drinking_trimester_3,
    mom_drinking_comments = health.drinking.comments_drinking,
    phq4_nervous = health.phq4.phq4_nervous,
    phq4_worrying = health.phq4.phq4_worrying,
    phq4_littleinterest = health.phq4.phq4_littleinterest,
    phq4_down = health.phq4.phq4_down,
    phq4_comments = health.phq4.comments_phq4
  ) |>
  # Remove duplicating variables
  dplyr::select(-health.general_health.child_hearing_tested.birthhospital,
                -health.general_health.child_hearing_tested.afterhome,
                -health.general_health.child_hearing_tested.no,
                -health.general_health.child_hearing_tested.refused,
                -health.general_health.child_hearing_tested.donotknow,
                -health.general_health.child_vision_tested.birthhospital,
                -health.general_health.child_vision_tested.afterhome,
                -health.general_health.child_vision_tested.no,
                -health.general_health.child_vision_tested.refused,
                -health.general_health.child_vision_tested.donotknow)

Rothbart ECBQ

rothbart_vars <- stringr::str_detect(names(home_visit_df_health), 'rothbart')

new_rothbart_var_names <- stringr::str_replace(names(home_visit_df_health)[rothbart_vars], "rothbart\\.rothbartquestions\\.", "")

names(home_visit_df_health)[rothbart_vars] <- new_rothbart_var_names

home_visit_ecbq <- home_visit_df_health |>
  dplyr::rename(comments_rothbart = rothbart.comments_rothbart)

Media use

home_visit_mediause <- home_visit_ecbq |>
  dplyr::rename(mediause_home_technology = mediause.home_technology,
                mediause_technology_child_tv = mediause.technology_child_tv,
                mediause_technology_child_computer = mediause.technology_child_computer,
                mediause_technology_child_ipad = mediause.technology_child_ipad,
                mediause_technology_child_dvd = mediause.technology_child_dvd,
                mediause_technology_child_educational = mediause.technology_child_educational,
                mediause_technology_child_videogame = mediause.technology_child_videogame,
                mediause_tv_how = mediause.tv_how,
                mediause_dvd_how = mediause.dvd_how,
                mediause_computer_how = mediause.computer_how,
                mediause_ipad_how = mediause.ipad_how,
                mediause_educational_how = mediause.educational_how,
                mediause_videogame_how = mediause.videogame_how,
                mediause_tv_hrs_per_day = mediause.tv_hours_per_day,
                mediause_techuse_meals = mediause.techuse.meals,
                mediause_techuse_playtime = mediause.techuse.playtime,
                mediause_techuse_bedtime = mediause.techuse.bedtime,
                mediause_techuse_transportation = mediause.techuse.transportation,
                mediause_comments = mediause.comments_technology)

Pets

home_visit_pets <- home_visit_mediause |>
  dplyr::rename(pets_at_home = pets.pets_at_home,
                pets_types_number = pets.pets_types_number,
                pets_indoors_outdoors = pets.pets_indoors_outdoors,
                comments_pets = pets.comments_pets)

unique(home_visit_pets$pets_types_number) |> head(20)
 [1] "1 dog"                                                                                                    
 [2] "1 cat"                                                                                                    
 [3] NA                                                                                                         
 [4] "1 Turtle"                                                                                                 
 [5] "2 dogs"                                                                                                   
 [6] "2 pugs and 2 fish"                                                                                        
 [7] "2 Dogs"                                                                                                   
 [8] "3 cats"                                                                                                   
 [9] "2 Cats, 1 bearded dragon, 1 snake, 1 tarantula and 1 frog - (just had a fish but it recently passed away)"
[10] "2 cats"                                                                                                   
[11] "7 rabbits"                                                                                                
[12] "3 dogs"                                                                                                   
[13] "1 dog, 1 cat"                                                                                             
[14] "1 dog; 1 cat"                                                                                             
[15] "1 dog and 2 cats"                                                                                         
[16] "1 dog 2 cats"                                                                                             
[17] "dog"                                                                                                      
[18] "One dog"                                                                                                  
[19] "1 dog Dalmatian"                                                                                          
[20] "One cat"                                                                                                  

Division of labor

home_visit_labor <- home_visit_pets
names(home_visit_labor) <- stringr::str_remove(names(home_visit_labor), "division_labor\\.") |>
  stringr::str_remove("laundry\\.") |>
  stringr::str_remove("cleaning\\.") |>
  stringr::str_remove("dishwashing\\.") |>
  stringr::str_remove("cooking\\.") |>
  stringr::str_remove("feeding\\.") |>
  stringr::str_remove("droppick\\.") |>
  stringr::str_remove("bed\\.") |>
  stringr::str_remove("disciplining\\.")

home_visit_labor <- home_visit_labor |>
  dplyr::select(-instruction_dishes)

Typical day

home_visit_clean <- home_visit_labor
names(home_visit_clean) <- stringr::str_remove(names(home_visit_clean), "typical_day\\.")
names(home_visit_clean)
  [1] "participant_guid"                        
  [2] "date_today"                              
  [3] "site_id"                                 
  [4] "participant_ID"                          
  [5] "test_date"                               
  [6] "child_sex"                               
  [7] "age_group"                               
  [8] "language_child"                          
  [9] "language_child.english"                  
 [10] "language_child.spanish"                  
 [11] "language_instruction"                    
 [12] "walk_onset_date_who"                     
 [13] "walk_mos_who"                            
 [14] "walk_onset_comments_who"                 
 [15] "walk_onset_date_kea"                     
 [16] "walk_mos_kea"                            
 [17] "walk_onset_comments_kea"                 
 [18] "crawl_onset_date"                        
 [19] "crawl_mos"                               
 [20] "crawl_onset_comments"                    
 [21] "feeding_breastfeed"                      
 [22] "feeding_solidfood_age"                   
 [23] "feeding_comments"                        
 [24] "child_sleeping_position"                 
 [25] "child_health"                            
 [26] "child_recent_vaccination"                
 [27] "child_medical_specialist"                
 [28] "child_medical_specialist_comments"       
 [29] "child_hearing_tested"                    
 [30] "child_vision_tested"                     
 [31] "child_allergies"                         
 [32] "child_ear_infection"                     
 [33] "child_asthma"                            
 [34] "child_respiratory"                       
 [35] "child_gastrointestinal"                  
 [36] "child_allergy_illness_comments"          
 [37] "child_injury_times"                      
 [38] "child_injury_comments"                   
 [39] "child_health_comments"                   
 [40] "mom_prenatal_care"                       
 [41] "mom_prenatal_care_comments"              
 [42] "mom_pregnant_smoking"                    
 [43] "mom_pregnant_smoking_trimester_1"        
 [44] "mom_pregnant_smoking_trimester_2"        
 [45] "mom_pregnant_smoking_trimester_3"        
 [46] "mom_smoking_now"                         
 [47] "mom_smoking_now_amount"                  
 [48] "smoking_house"                           
 [49] "smoking_car"                             
 [50] "smoking_comments"                        
 [51] "mom_pregnant_drinking"                   
 [52] "mom_drinking_trimester_1"                
 [53] "mom_drinking_trimester_2"                
 [54] "mom_drinking_trimester_3"                
 [55] "mom_drinking_comments"                   
 [56] "phq4_nervous"                            
 [57] "phq4_worrying"                           
 [58] "phq4_littleinterest"                     
 [59] "phq4_down"                               
 [60] "phq4_comments"                           
 [61] "rothbart_unfamiliarperson"               
 [62] "rothbart_troubletask"                    
 [63] "rothbart_companyofchild"                 
 [64] "rothbart_choiceactivities"               
 [65] "rothbart_quietlysung"                    
 [66] "rothbart_playingoutdoors"                
 [67] "rothbart_morethan10"                     
 [68] "rothbart_respondingremarks"              
 [69] "rothbart_excitedlovedadults"             
 [70] "rothbart_fiddlehair"                     
 [71] "rothbart_roughrowdy"                     
 [72] "rothbart_rockedhugged"                   
 [73] "rothbart_involvednewactivity"            
 [74] "rothbart_tirequickly"                    
 [75] "rothbart_callattention"                  
 [76] "rothbart_tags"                           
 [77] "rothbart_noisyenvironment"               
 [78] "rothbart_energy"                         
 [79] "rothbart_vehicles"                       
 [80] "rothbart_active"                         
 [81] "rothbart_forbidden"                      
 [82] "rothbart_sadlytearful"                   
 [83] "rothbart_downblue"                       
 [84] "rothbart_runhouse"                       
 [85] "rothbart_excitingevent"                  
 [86] "rothbart_tempertantrum"                  
 [87] "rothbart_waitpatiently"                  
 [88] "rothbart_rockedsmile"                    
 [89] "rothbart_mold"                           
 [90] "rothbart_interactadult"                  
 [91] "rothbart_careful"                        
 [92] "rothbart_enternewplace"                  
 [93] "rothbart_crymorethan3"                   
 [94] "rothbart_easilysoothed"                  
 [95] "rothbart_busyother"                      
 [96] "rothbart_differentpeople"                
 [97] "comments_rothbart"                       
 [98] "mediause_home_technology"                
 [99] "mediause.home_technology.tv"             
[100] "mediause.home_technology.dvd"            
[101] "mediause.home_technology.computer"       
[102] "mediause.home_technology.ipad"           
[103] "mediause.home_technology.educationalgame"
[104] "mediause.home_technology.videogame"      
[105] "mediause_technology_child_tv"            
[106] "mediause_tv_how"                         
[107] "mediause_technology_child_dvd"           
[108] "mediause_dvd_how"                        
[109] "mediause_technology_child_computer"      
[110] "mediause_computer_how"                   
[111] "mediause_technology_child_ipad"          
[112] "mediause_ipad_how"                       
[113] "mediause_technology_child_educational"   
[114] "mediause_educational_how"                
[115] "mediause_technology_child_videogame"     
[116] "mediause_videogame_how"                  
[117] "mediause_tv_hrs_per_day"                 
[118] "mediause_techuse_meals"                  
[119] "mediause_techuse_playtime"               
[120] "mediause_techuse_bedtime"                
[121] "mediause_techuse_transportation"         
[122] "mediause_comments"                       
[123] "pets_at_home"                            
[124] "pets_types_number"                       
[125] "pets_indoors_outdoors"                   
[126] "comments_pets"                           
[127] "laundry_self"                            
[128] "laundry_partner"                         
[129] "laundry_otherperson"                     
[130] "labor_laundry_otherperson"               
[131] "cleaning_self"                           
[132] "cleaning_partner"                        
[133] "cleaning_otherperson"                    
[134] "labor_cleaning_otherperson"              
[135] "dishes_self"                             
[136] "dishes_partner"                          
[137] "dishes_otherperson"                      
[138] "labor_dishes_otherperson"                
[139] "cooking_self"                            
[140] "cooking_partner"                         
[141] "cooking_otherperson"                     
[142] "labor_cooking_otherperson"               
[143] "feeding_self"                            
[144] "feeding_partner"                         
[145] "feeding_otherperson"                     
[146] "labor_feeding_otherperson"               
[147] "droppick_self"                           
[148] "droppick_partner"                        
[149] "droppick_otherperson"                    
[150] "labor_droppick_otherperson"              
[151] "bed_self"                                
[152] "bed_partner"                             
[153] "bed_otherperson"                         
[154] "labor_bed_otherperson"                   
[155] "disciplining_self"                       
[156] "disciplining_partner"                    
[157] "disciplining_otherperson"                
[158] "labor_disciplining_otherperson"          
[159] "comments_division_labor"                 
[160] "typical_behavior"                        
[161] "typical_behavior_specifics"              
[162] "typical_activities"                      
[163] "typical_activities_specifics"            
[164] "typical_nightmorning"                    
[165] "typical_nightmorning_specifics"          
[166] "typical_otherthanme"                     
[167] "typical_otherthanme_specifics"           

Save cleaned file

Save with “latest” label.

readr::write_csv(home_visit_clean, "../data/csv/home_visit/agg/PLAY-non-mcdi-kobo-latest.csv")

Save with date-stamp for quality control.

fn <- paste0("../data/csv/home_visit/agg/PLAY-non-mcdi-kobo-", Sys.Date(), ".csv")
readr::write_csv(home_visit_clean, fn)