Code
suppressPackageStartupMessages(library(tidyverse))
This page describes how the aggregate home visit survey files are cleaned, measure by measure.
suppressPackageStartupMessages(library(tidyverse))
This is not elegant, but it does what needs to be done.
“Split” ranking levels in Rothbart ECBQ.
::gsub_file(file = file.path(
xfun::here(),
here"data/csv/home_visit/agg",
"PLAY-non-mcdi-raw-latest.csv"), "veryrarely", "very_rarely")
::gsub_file(file = file.path(
xfun::here(),
here"data/csv/home_visit/agg",
"PLAY-non-mcdi-raw-latest.csv"), "lessthanhalf", "less_than_half")
::gsub_file(file = file.path(
xfun::here(),
here"data/csv/home_visit/agg",
"PLAY-non-mcdi-raw-latest.csv"), "abouthalf", "about_half")
::gsub_file(file = file.path(
xfun::here(),
here"data/csv/home_visit/agg",
"PLAY-non-mcdi-raw-latest.csv"), "morethanhalf", "more_than_half")
::gsub_file(file = file.path(
xfun::here(),
here"data/csv/home_visit/agg",
"PLAY-non-mcdi-raw-latest.csv"), "almostalways", "almost_always")
# if (!('home_visit_df' %in% ls())) {
# targets::tar_load(home_visit_df, store="../_targets")
# }
<- readr::read_csv(
home_visit_df file.path(
::here(),
here"data/csv/home_visit/agg",
"PLAY-non-mcdi-raw-latest.csv"
),col_types = cols(.default = "c"),
show_col_types = FALSE
)
<- stringr::str_detect(names(home_visit_df), "note|instructions|acknowledge")
metadata_cols
<- seq_along(home_visit_df)[!metadata_cols]
no_metadata_cols_index
<- home_visit_df |>
home_visit_df_trim ::select(all_of(no_metadata_cols_index)) |>
dplyr::select(-start,
dplyr-end)
The untrimmed data frame had 272 variables. The trimmed data frame has 216 variables.
We change some variable names to conform with later stages of processing when we merge the KoBoToolbox data with those from the Databrary session spreadsheet.
<- home_visit_df_trim |>
home_visit_df_trim ::rename(participant_guid = participant_id,
dplyrparticipant_ID = subject_number)
The raw file contains separate variables indicating whether the child is exposed to English and Spanish at home. We leave those for now.
<- names(home_visit_df_trim) %in% c(
loco_admin_cols "locomotor_milestones.date_format",
"locomotor_milestones.calc_5mo",
"locomotor_milestones.calc_6mo",
"locomotor_milestones.calc_7mo",
"locomotor_milestones.calc_8mo",
"locomotor_milestones.calc_9mo",
"locomotor_milestones.calc_10mo",
"locomotor_milestones.calc_11mo",
"locomotor_milestones.calc_12mo",
"locomotor_milestones.calc_13mo",
"locomotor_milestones.calc_14mo",
"locomotor_milestones.calc_15mo",
"locomotor_milestones.calc_16mo",
"locomotor_milestones.who_walk.walk_onset_check",
"locomotor_milestones.holiday1.holiday_jan",
"locomotor_milestones.holiday1.holiday_feb",
"locomotor_milestones.holiday1.holiday_marchapril",
"locomotor_milestones.holiday1.holiday_may",
"locomotor_milestones.holiday1.holiday_june",
"locomotor_milestones.holiday1.holiday_july",
"locomotor_milestones.holiday1.holiday_sep",
"locomotor_milestones.holiday1.holiday_oct",
"locomotor_milestones.holiday1.holiday_nov",
"locomotor_milestones.holiday1.holiday_dec",
"locomotor_milestones.holiday1.memorable_events",
"locomotor_milestones.k_walk.k_walk_onset_check",
"locomotor_milestones.crawl_onset.crawl_onset_check",
"locomotor_milestones.holiday2.holiday_jan2",
"locomotor_milestones.holiday2.holiday_feb2",
"locomotor_milestones.holiday2.holiday_marchapril2",
"locomotor_milestones.holiday2.holiday_may2",
"locomotor_milestones.holiday2.holiday_june2",
"locomotor_milestones.holiday2.holiday_july2",
"locomotor_milestones.holiday2.holiday_sep2",
"locomotor_milestones.holiday2.holiday_oct2",
"locomotor_milestones.holiday2.holiday_nov2",
"locomotor_milestones.holiday2.holiday_dec2",
"locomotor_milestones.holiday2.memorable_events2"
)
<- seq_along(home_visit_df_trim)[!loco_admin_cols]
loco_admin_index
<- home_visit_df_trim |>
home_visit_df_loco ::select(all_of(loco_admin_index)) |>
dplyr::rename(
dplyrwalk_mos_who = locomotor_milestones.who_walk.who_walk_onset_mo,
walk_mos_kea = locomotor_milestones.k_walk.k_walk_onset_mo,
crawl_mos = locomotor_milestones.crawl_onset.crawl_onset_mo,
walk_onset_date_kea = locomotor_milestones.k_walk.k_walk_onset_date,
walk_onset_date_who = locomotor_milestones.who_walk.who_walk_onset_date,
walk_onset_comments_who = locomotor_milestones.who_walk.comments_who_walk_onset,
walk_onset_comments_kea = locomotor_milestones.k_walk.comments_k_walk_onset,
crawl_onset_date = locomotor_milestones.crawl_onset.crawl_onset_date,
crawl_onset_comments = locomotor_milestones.crawl_onset.comments_crawl_onset
|>
) ::mutate(
dplyrwalk_mos_who = as.numeric(walk_mos_who),
walk_mos_kea = as.numeric(walk_mos_kea),
crawl_mos = as.numeric(crawl_mos)
)
<- names(home_visit_df_loco) %in% c('instructions',
health_admin_cols 'note',
'doctor_told_you')
<- seq_along(home_visit_df_loco)[!health_admin_cols]
health_admin_index
<- home_visit_df_loco |>
home_visit_df_health ::select(all_of(health_admin_index)) |>
dplyr::rename(
dplyrfeeding_breastfeed = health.feeding_nutrition.breastfeed,
feeding_solidfood_age = health.feeding_nutrition.solidfood_age,
feeding_comments = health.feeding_nutrition.comments_feeding,
child_sleeping_position = health.general_health.child_sleeping_position,
child_health = health.general_health.child_health,
child_recent_vaccination = health.general_health.child_vaccination,
child_medical_specialist = health.general_health.child_medical_specialist,
child_medical_specialist_comments = health.general_health.comments_child_medical_special,
child_hearing_tested = health.general_health.child_hearing_tested,
child_vision_tested = health.general_health.child_vision_tested,
child_allergies = health.general_health.child_allergies_infections_ill.allergies,
child_ear_infection = health.general_health.child_allergies_infections_ill.ear_infection,
child_asthma = health.general_health.child_allergies_infections_ill.asthma,
child_respiratory = health.general_health.child_allergies_infections_ill.respiratory,
child_gastrointestinal = health.general_health.child_allergies_infections_ill.gastrointestinal,
child_allergy_illness_comments = health.general_health.comments_allergy_etc,
child_injury_times = health.general_health.child_injury_times,
child_injury_comments = health.general_health.comment_injury,
child_health_comments = health.general_health.comments_general_health,
mom_prenatal_care = health.prenatal.prenatal_care,
mom_prenatal_care_comments = health.prenatal.comments_prenatal,
mom_pregnant_smoking = health.smoking.pregnant_smoking,
mom_pregnant_smoking_trimester_1 = health.smoking.smoking_trimester_1,
mom_pregnant_smoking_trimester_2 = health.smoking.smoking_trimester_2,
mom_pregnant_smoking_trimester_3 = health.smoking.smoking_trimester_3,
mom_smoking_now = health.smoking.mom_smoking_now,
mom_smoking_now_amount = health.smoking.mom_smoking_now_amount,
smoking_house = health.smoking.smoking_house,
smoking_car = health.smoking.smoking_car,
smoking_comments = health.smoking.comments_smoking,
mom_pregnant_drinking = health.drinking.pregnant_drinking,
mom_drinking_trimester_1 = health.drinking.drinking_trimester_1,
mom_drinking_trimester_2 = health.drinking.drinking_trimester_2,
mom_drinking_trimester_3 = health.drinking.drinking_trimester_3,
mom_drinking_comments = health.drinking.comments_drinking,
phq4_nervous = health.phq4.phq4_nervous,
phq4_worrying = health.phq4.phq4_worrying,
phq4_littleinterest = health.phq4.phq4_littleinterest,
phq4_down = health.phq4.phq4_down,
phq4_comments = health.phq4.comments_phq4
|>
) # Remove duplicating variables
::select(-health.general_health.child_hearing_tested.birthhospital,
dplyr-health.general_health.child_hearing_tested.afterhome,
-health.general_health.child_hearing_tested.no,
-health.general_health.child_hearing_tested.refused,
-health.general_health.child_hearing_tested.donotknow,
-health.general_health.child_vision_tested.birthhospital,
-health.general_health.child_vision_tested.afterhome,
-health.general_health.child_vision_tested.no,
-health.general_health.child_vision_tested.refused,
-health.general_health.child_vision_tested.donotknow)
<- stringr::str_detect(names(home_visit_df_health), 'rothbart')
rothbart_vars
<- stringr::str_replace(names(home_visit_df_health)[rothbart_vars],
new_rothbart_var_names "rothbart\\.rothbartquestions\\.",
"") |>
::str_replace("rothbart_", "ecbq_")
stringr
names(home_visit_df_health)[rothbart_vars] <- new_rothbart_var_names
<- home_visit_df_health |>
home_visit_ecbq ::rename(ecbq_comments = rothbart.comments_rothbart) dplyr
<- home_visit_ecbq |>
home_visit_mediause ::rename(mediause_home_technology = mediause.home_technology,
dplyrmediause_technology_child_tv = mediause.technology_child_tv,
mediause_technology_child_computer = mediause.technology_child_computer,
mediause_technology_child_ipad = mediause.technology_child_ipad,
mediause_technology_child_dvd = mediause.technology_child_dvd,
mediause_technology_child_educational = mediause.technology_child_educational,
mediause_technology_child_videogame = mediause.technology_child_videogame,
mediause_tv_how = mediause.tv_how,
mediause_dvd_how = mediause.dvd_how,
mediause_computer_how = mediause.computer_how,
mediause_ipad_how = mediause.ipad_how,
mediause_educational_how = mediause.educational_how,
mediause_videogame_how = mediause.videogame_how,
mediause_tv_hrs_per_day = mediause.tv_hours_per_day,
mediause_techuse_meals = mediause.techuse.meals,
mediause_techuse_playtime = mediause.techuse.playtime,
mediause_techuse_bedtime = mediause.techuse.bedtime,
mediause_techuse_transportation = mediause.techuse.transportation,
mediause_comments = mediause.comments_technology)
<- home_visit_mediause |>
home_visit_pets ::rename(pets_at_home = pets.pets_at_home,
dplyrpets_types_number = pets.pets_types_number,
pets_indoors_outdoors = pets.pets_indoors_outdoors,
comments_pets = pets.comments_pets)
unique(home_visit_pets$pets_types_number) |> head(20)
[1] "1 dog"
[2] "1 cat"
[3] NA
[4] "1 Turtle"
[5] "2 dogs"
[6] "2 pugs and 2 fish"
[7] "2 Dogs"
[8] "3 cats"
[9] "2 Cats, 1 bearded dragon, 1 snake, 1 tarantula and 1 frog - (just had a fish but it recently passed away)"
[10] "2 cats"
[11] "7 rabbits"
[12] "3 dogs"
[13] "1 dog, 1 cat"
[14] "1 dog; 1 cat"
[15] "1 dog and 2 cats"
[16] "1 dog 2 cats"
[17] "dog"
[18] "One dog"
[19] "1 dog Dalmatian"
[20] "One cat"
<- home_visit_pets
home_visit_labor
names(home_visit_labor) <- stringr::str_remove(names(home_visit_labor), "division_labor\\.") |>
::str_remove("laundry\\.") |>
stringr::str_remove("cleaning\\.") |>
stringr::str_remove("dishwashing\\.") |>
stringr::str_remove("cooking\\.") |>
stringr::str_remove("feeding\\.") |>
stringr::str_remove("droppick\\.") |>
stringr::str_remove("bed\\.") |>
stringr::str_remove("disciplining\\.") stringr
<- home_visit_labor
home_visit_clean names(home_visit_clean) <- stringr::str_remove(names(home_visit_clean), "typical_day\\.")
names(home_visit_clean)
[1] "participant_guid"
[2] "date_today"
[3] "site_id"
[4] "participant_ID"
[5] "test_date"
[6] "child_sex"
[7] "age_group"
[8] "language_child"
[9] "language_child.english"
[10] "language_child.spanish"
[11] "language_instruction"
[12] "walk_onset_date_who"
[13] "walk_mos_who"
[14] "walk_onset_comments_who"
[15] "walk_onset_date_kea"
[16] "walk_mos_kea"
[17] "walk_onset_comments_kea"
[18] "crawl_onset_date"
[19] "crawl_mos"
[20] "crawl_onset_comments"
[21] "feeding_breastfeed"
[22] "feeding_solidfood_age"
[23] "feeding_comments"
[24] "child_sleeping_position"
[25] "child_health"
[26] "child_recent_vaccination"
[27] "child_medical_specialist"
[28] "child_medical_specialist_comments"
[29] "child_hearing_tested"
[30] "child_vision_tested"
[31] "child_allergies"
[32] "child_ear_infection"
[33] "child_asthma"
[34] "child_respiratory"
[35] "child_gastrointestinal"
[36] "child_allergy_illness_comments"
[37] "child_injury_times"
[38] "child_injury_comments"
[39] "child_health_comments"
[40] "mom_prenatal_care"
[41] "mom_prenatal_care_comments"
[42] "mom_pregnant_smoking"
[43] "mom_pregnant_smoking_trimester_1"
[44] "mom_pregnant_smoking_trimester_2"
[45] "mom_pregnant_smoking_trimester_3"
[46] "mom_smoking_now"
[47] "mom_smoking_now_amount"
[48] "smoking_house"
[49] "smoking_car"
[50] "smoking_comments"
[51] "mom_pregnant_drinking"
[52] "mom_drinking_trimester_1"
[53] "mom_drinking_trimester_2"
[54] "mom_drinking_trimester_3"
[55] "mom_drinking_comments"
[56] "phq4_nervous"
[57] "phq4_worrying"
[58] "phq4_littleinterest"
[59] "phq4_down"
[60] "phq4_comments"
[61] "ecbq_unfamiliarperson"
[62] "ecbq_troubletask"
[63] "ecbq_companyofchild"
[64] "ecbq_choiceactivities"
[65] "ecbq_quietlysung"
[66] "ecbq_playingoutdoors"
[67] "ecbq_morethan10"
[68] "ecbq_respondingremarks"
[69] "ecbq_excitedlovedadults"
[70] "ecbq_fiddlehair"
[71] "ecbq_roughrowdy"
[72] "ecbq_rockedhugged"
[73] "ecbq_involvednewactivity"
[74] "ecbq_tirequickly"
[75] "ecbq_callattention"
[76] "ecbq_tags"
[77] "ecbq_noisyenvironment"
[78] "ecbq_energy"
[79] "ecbq_vehicles"
[80] "ecbq_active"
[81] "ecbq_forbidden"
[82] "ecbq_sadlytearful"
[83] "ecbq_downblue"
[84] "ecbq_runhouse"
[85] "ecbq_excitingevent"
[86] "ecbq_tempertantrum"
[87] "ecbq_waitpatiently"
[88] "ecbq_rockedsmile"
[89] "ecbq_mold"
[90] "ecbq_interactadult"
[91] "ecbq_careful"
[92] "ecbq_enternewplace"
[93] "ecbq_crymorethan3"
[94] "ecbq_easilysoothed"
[95] "ecbq_busyother"
[96] "ecbq_differentpeople"
[97] "ecbq_comments"
[98] "mediause_home_technology"
[99] "mediause.home_technology.tv"
[100] "mediause.home_technology.dvd"
[101] "mediause.home_technology.computer"
[102] "mediause.home_technology.ipad"
[103] "mediause.home_technology.educationalgame"
[104] "mediause.home_technology.videogame"
[105] "mediause_technology_child_tv"
[106] "mediause_tv_how"
[107] "mediause_technology_child_dvd"
[108] "mediause_dvd_how"
[109] "mediause_technology_child_computer"
[110] "mediause_computer_how"
[111] "mediause_technology_child_ipad"
[112] "mediause_ipad_how"
[113] "mediause_technology_child_educational"
[114] "mediause_educational_how"
[115] "mediause_technology_child_videogame"
[116] "mediause_videogame_how"
[117] "mediause_tv_hrs_per_day"
[118] "mediause_techuse_meals"
[119] "mediause_techuse_playtime"
[120] "mediause_techuse_bedtime"
[121] "mediause_techuse_transportation"
[122] "mediause_comments"
[123] "pets_at_home"
[124] "pets_types_number"
[125] "pets_indoors_outdoors"
[126] "comments_pets"
[127] "laundry_self"
[128] "laundry_partner"
[129] "laundry_otherperson"
[130] "labor_laundry_otherperson"
[131] "cleaning_self"
[132] "cleaning_partner"
[133] "cleaning_otherperson"
[134] "labor_cleaning_otherperson"
[135] "instruction_dishes"
[136] "dishes_self"
[137] "dishes_partner"
[138] "dishes_otherperson"
[139] "labor_dishes_otherperson"
[140] "cooking_self"
[141] "cooking_partner"
[142] "cooking_otherperson"
[143] "labor_cooking_otherperson"
[144] "feeding_self"
[145] "feeding_partner"
[146] "feeding_otherperson"
[147] "labor_feeding_otherperson"
[148] "droppick_self"
[149] "droppick_partner"
[150] "droppick_otherperson"
[151] "labor_droppick_otherperson"
[152] "bed_self"
[153] "bed_partner"
[154] "bed_otherperson"
[155] "labor_bed_otherperson"
[156] "disciplining_self"
[157] "disciplining_partner"
[158] "disciplining_otherperson"
[159] "labor_disciplining_otherperson"
[160] "comments_division_labor"
[161] "typical_behavior"
[162] "typical_behavior_specifics"
[163] "typical_activities"
[164] "typical_activities_specifics"
[165] "typical_nightmorning"
[166] "typical_nightmorning_specifics"
[167] "typical_otherthanme"
[168] "typical_otherthanme_specifics"
Save with “latest” label.
::write_csv(home_visit_clean, "../data/csv/home_visit/agg/PLAY-non-mcdi-kobo-latest.csv") readr
Save with date-stamp for quality control.
<- paste0("../data/csv/home_visit/agg/PLAY-non-mcdi-kobo-", Sys.Date(), ".csv")
fn ::write_csv(home_visit_clean, fn) readr