Home visit
Protocol
Details about the data collection protocol for the home visit can be found on the PLAY Project website.
Overview
The notes below summarize the steps needed to process the home visit survey data.
Most of the R chunks are not run, however, since we have moved the work into a \{targets\}
-based workflow.
Download data
Data files for each of the language by age-group conditions are stored on KoBoToolbox (KBT).
Store all of the data files on KBT in kb_df
.
tar_target(kb_df, list_kobo_data()),
List data forms specific to the home visit by filtering the files with names that contain “Home”.
tar_target(kb_home, dplyr::filter(kb_df, stringr::str_detect(title, "Home")))
targets::tar_load(kb_home, store="../_targets")
kb_home
## id id_string
## 1 411456 a58ZhjX6M8WXtzLGvBaEaG
## 2 740625 a8HyzdxnVjK53dt4fX3Vup
## 3 331453 aae4wz2tpkSnAkea2PAzin
## 4 363465 abYGMsrMvkGsnA4xm9kqC9
## 5 363431 acCBJaRqBkSMPYdvaaWCSu
## 6 334099 ad3NtS6TDPuKTUKCAyCuby
## 7 363349 aEMERWLYQAdhaMKMt5QkD7
## 8 307736 aeMJQLo56HWyVHkKX9oaJD
## 9 740631 ahYJ7XRUcit8gbFYwaxnAV
## 10 1151489 aLqdPLTCZb8jvFkDxcFwzy
## 11 1136694 aMteoHisncRUStdyZfsjMB
## 12 740623 aoLcmmN8KWG4N6A3XpUiRT
## 13 740628 aRJW7SwKsVyZu8b4FighiM
## 14 363466 asv7tL6CEYjiW2XCkLxhcX
## 15 411469 aswcEAtxj24u9MfuBmS9AW
## 16 740630 aThMD6zuEFjtTfptwcpZPg
## 17 411388 aTyGBYk6HvRXmki9ksb29p
## 18 740624 aUeYzPfq6WED4dDF28VT9D
## 19 363381 aWd7wujF5d5HYaYMhT4jBZ
## 20 740626 awMnFDna5kWR3LXKyGmFMd
## 21 331848 axYbEbLcJxzUNFbqMVySjJ
## 22 740629 axyJSgLRDeSk3BHB6cJmYk
## 23 408149 ayhCy68ZTaDGc7tuGthQpJ
## 24 740627 aYokcZAjTZ9JCyVXz4R5rh
## title
## 1 PLAY Home Questionaires - 12 Bilingual English (2020-03-04)
## 2 PLAY_HomeQuestionaires_12_English
## 3 PLAY Home Questionnaires (24 English)
## 4 PLAY Home Questionnaires - 24 Bilingual English (2020-03-04)
## 5 PLAY Home Questionnaires - 12 English (2020-03-04)
## 6 PLAY Home Questionnaires (12 Bilingual - English)
## 7 PLAY Home Questionnaires - 18 English (2020-03-04)
## 8 PLAY Home Questionnaires (18 English)
## 9 PLAY_HomeQuestionaires_24_Bilingual_English
## 10 Clone of Clone of PLAY_HomeQuestionnaires_18_English - NO AUTO METADATA
## 11 Clone of PLAY_HomeQuestionnaires_18_English - NO AUTO TODAY
## 12 PLAY_HomeQuestionaires_12_Bilingual_English
## 13 PLAY_HomeQuestionnaires_18_English
## 14 PLAY Home Questionnaires - 18 Bilingual English (2020-03-04)
## 15 PLAY Home Questionnaires - 12 Bilingual Spanish (2020-03-04)
## 16 PLAY_HomeQuestionaires_24_Bilingual_Spanish
## 17 PLAY Home Questionnaires - 18 Bilingual Spanish (2020-03-04)
## 18 PLAY_HomeQuestionaires_12_Bilingual_Spanish
## 19 PLAY Home Questionnaires - 24 English (2020-03-04)
## 20 PLAY_HomeQuestionaires_18_Bilingual_English
## 21 PLAY Home Questionnaires (12 English)
## 22 PLAY_HomeQuestionnaires_24_English
## 23 PLAY Home Questionnaires - 24 Bilingual Spanish (2020-03-04)
## 24 PLAY_HomeQuestionaires_18_Bilingual_Spanish
## description
## 1 PLAY Home Questionaires - 12 Bilingual (English) (2020-03-04)
## 2 PLAY_HomeQuestionaires_12_English-2021-07-29
## 3 PLAY Home Questionnaires (24 English)
## 4 PLAY Home Questionnaires - 24 Bilingual English (2019-11-20)
## 5 PLAY Home Questionnaires - 12 English (2019-11-20)
## 6 PLAY Home Questionnaires (12 Bilingual - English)
## 7 PLAY Home Questionnaires - 18 English (2018-11-20)
## 8 PLAY Home Questionnaires (18 English)
## 9 PLAY_HomeQuestionaires_24_Bilingual_English-2021-07-30
## 10 Clone of Clone of PLAY_HomeQuestionnaires_18_English - NO AUTO METADATA
## 11 Clone of PLAY_HomeQuestionnaires_18_English
## 12 PLAY_HomeQuestionaires_12_Bilingual_English-2021-07-29
## 13 PLAY_HomeQuestionnaires_18_English-2021-07-29
## 14 PLAY Home Questionnaires - 18 Bilingual English (2019-11-20)
## 15 PLAY Home Questionnaires - 12 Bilingual Spanish (2020-03-04)
## 16 PLAY_HomeQuestionaires_24_Bilingual_Spanish-2021-07-30
## 17 PLAY Home Questionnaires - 18 Bilingual Spanish (2020-03-04)
## 18 PLAY_HomeQuestionaires_12_Bilingual_Spanish-2021-07-29
## 19 PLAY Home Questionnaires - 24 English (2019-11-20)
## 20 PLAY_HomeQuestionaires_18_Bilingual_English-2021-07-29
## 21 PLAY Home Questionnaires (12 English)
## 22 PLAY_HomeQuestionnaires_24_English-2021-07-30
## 23 PLAY Home Questionnaires - 24 Bilingual Spanish (2020-02-27)
## 24 PLAY_HomeQuestionaires_18_Bilingual_Spanish-2021-07-29
## url
## 1 https://kc.kobotoolbox.org/api/v1/data/411456
## 2 https://kc.kobotoolbox.org/api/v1/data/740625
## 3 https://kc.kobotoolbox.org/api/v1/data/331453
## 4 https://kc.kobotoolbox.org/api/v1/data/363465
## 5 https://kc.kobotoolbox.org/api/v1/data/363431
## 6 https://kc.kobotoolbox.org/api/v1/data/334099
## 7 https://kc.kobotoolbox.org/api/v1/data/363349
## 8 https://kc.kobotoolbox.org/api/v1/data/307736
## 9 https://kc.kobotoolbox.org/api/v1/data/740631
## 10 https://kc.kobotoolbox.org/api/v1/data/1151489
## 11 https://kc.kobotoolbox.org/api/v1/data/1136694
## 12 https://kc.kobotoolbox.org/api/v1/data/740623
## 13 https://kc.kobotoolbox.org/api/v1/data/740628
## 14 https://kc.kobotoolbox.org/api/v1/data/363466
## 15 https://kc.kobotoolbox.org/api/v1/data/411469
## 16 https://kc.kobotoolbox.org/api/v1/data/740630
## 17 https://kc.kobotoolbox.org/api/v1/data/411388
## 18 https://kc.kobotoolbox.org/api/v1/data/740624
## 19 https://kc.kobotoolbox.org/api/v1/data/363381
## 20 https://kc.kobotoolbox.org/api/v1/data/740626
## 21 https://kc.kobotoolbox.org/api/v1/data/331848
## 22 https://kc.kobotoolbox.org/api/v1/data/740629
## 23 https://kc.kobotoolbox.org/api/v1/data/408149
## 24 https://kc.kobotoolbox.org/api/v1/data/740627
Save selected raw files to local directory
Prepare to retrieve all home visit files.
n_files <- dim(kb_home)[1]
There are \(n=\) 24 home visit data files.
tar_target(
home_visit_downloads,
retrieve_kobo_xlsx(kb_home,
"data/xlsx/home_visit/raw"),
cue = tarchetypes::tar_cue_age(
name = home_visit_downloads,
age = as.difftime(update_interval,
units = update_interval_units)
)
)
Normalize file names
Some of the form names are inconsistent, so we normalize them to fit the following pattern:
<form_id>_PLAY_HomeQuestionnaires_<age_group>_<lang_group>.xlsx
tar_target(
home_visit_renamed,
rename_home_xlsx(home_visit_downloads,
"data/xlsx/home_visit/std_name"),
cue = tarchetypes::tar_cue_age(
name = home_visit_renamed,
age = as.difftime(update_interval,
units = update_interval_units)
)
)
Save xlsx as csv
tar_target(
home_visit_xlsx_to_csv,
load_xlsx_save_many_csvs_2(home_visit_renamed,
"data/csv/home_visit/raw")
)
Split MB-CDI from other questions
Next we import a CSV for a given form year, age group, and language group, and create two new CSV files: one with the MB-CDI data and one with all of the other survey questions.
By default, the document presumes that we want to convert all of the CSV files
Extract the ‘non-mbcdi’ questions first and add ‘non_mbcdi’ to the filename.
tar_target(
home_visit_non_mbcdi,
split_non_mbcdi_csvs(home_visit_xlsx_to_csv,
"data/csv/home_visit/non_mbcdi")
)
Extracting the MB-CDI data has nearly the same function call, but the these_questions
parameter is set to ‘mbcdi’.
tar_target(
home_visit_mbcdi,
split_mbcdi_csvs(home_visit_xlsx_to_csv,
"data/csv/home_visit/mbcdi")
)
Clean data
Remove identifiers
The function remove_identifiers()
in R/kobo_export
detects the presence of names, addresses, phone numbers, email, and dates in the field names for an input file and removes these fields.
It also modifies the file name by appending _deidentified
.
The remove_identifiers()
function detects these fields
For clarity, we print it here.
source("~/rrr/KoBoToolbox/R/_OLD/functions.R", echo = FALSE, print.eval = FALSE)
remove_identifiers
## function(df) {
## require(stringr)
## stopifnot(is.data.frame(df))
##
## contains_name <- stringr::str_detect(names(df), 'name')
## contains_address <- stringr::str_detect(names(df), 'address')
## contains_phone <- stringr::str_detect(names(df), 'phone')
## contains_email <- stringr::str_detect(names(df), 'email')
## contains_birthdate <- stringr::str_detect(names(df), 'birthdate')
## contains_first <- stringr::str_detect(names(df), 'first[12]?')
## contains_last <- stringr::str_detect(names(df), 'last[12]?')
## contains_city <- stringr::str_detect(names(df), 'city')
## contains_year <- stringr::str_detect(names(df), 'year[12]?')
## contains_month <- stringr::str_detect(names(df), 'month[12]?')
## contains_day <- stringr::str_detect(names(df), '/day[12]?$')
##
## identifiable_data <- contains_name | contains_address |
## contains_phone |
## contains_email | contains_birthdate | contains_first |
## contains_last |
## contains_city | contains_year | contains_month | contains_day
##
## identifiable_cols <- (1:length(names(df)))[identifiable_data]
##
## df_deidentified <- df %>%
## dplyr::select(.,-all_of(identifiable_cols))
##
## df_deidentified
## }
The non-MBCDI file contains the identifiers, so that is the target of this removal process.
Note that we have added data
to .gitignore
in protocol/
, the root directory for the HTML protocol, so none of the data files should be made available via git or GitHub. This also means that there is no version control being done on raw data files themselves.
tar_target(
home_visit_remove_identifiers,
purrr::map_chr(
home_visit_non_mbcdi,
open_deidentify_save,
csv_save_dir = "data/csv/home_visit/non_mbcdi/deid",
these_questions = 'non_mbcdi'
)
)
Quality assurance (QA) reviews
Non-MB-CDI files
Create a helper function to create a data set with summary information about the data files.
summarize_non_mbcdi_qs <- function(fn) {
stopifnot(is.character(fn))
if (!file.exists(fn)) {
stop('File not found `', fn, '`')
} else {
df <- readr::read_csv(fn, show_col_types = FALSE)
if (!is.data.frame(df)) {
stop('Error reading data frame')
} else {
out_df <-
tibble(
file_name = basename(fn),
n_rows = dim(df)[1],
n_vars = dim(df)[2]
)
dplyr::arrange(out_df, file_name)
}
}
}
Select the de-identified CSVs to examine.
fl <-
list.files(
file.path("../data/csv/home_visit/non_mbcdi/deid"),
'^[0-9]+_non_mbcdi_[12|18|24].*deidentified',
full.names = TRUE
)
PLAY_forms <- purrr::map_df(fl, summarize_non_mbcdi_qs)
PLAY_forms %>%
knitr::kable(., format = 'html') %>%
kableExtra::kable_classic()
file_name | n_rows | n_vars |
---|---|---|
1136694_non_mbcdi_18_english_deidentified.csv | 0 | 288 |
1151489_non_mbcdi_18_english_deidentified.csv | 0 | 286 |
307736_non_mbcdi_18_english_deidentified.csv | 4 | 274 |
331453_non_mbcdi_24_english_deidentified.csv | 3 | 274 |
331848_non_mbcdi_12_english_deidentified.csv | 4 | 267 |
334099_non_mbcdi_12_bilingual_english_deidentified.csv | 1 | 267 |
363349_non_mbcdi_18_english_deidentified.csv | 9 | 280 |
363381_non_mbcdi_24_english_deidentified.csv | 8 | 280 |
363431_non_mbcdi_12_english_deidentified.csv | 10 | 281 |
363465_non_mbcdi_24_bilingual_english_deidentified.csv | 0 | 280 |
363466_non_mbcdi_18_bilingual_english_deidentified.csv | 0 | 280 |
408149_non_mbcdi_24_bilingual_spanish_deidentified.csv | 1 | 280 |
411388_non_mbcdi_18_bilingual_spanish_deidentified.csv | 0 | 280 |
411456_non_mbcdi_12_bilingual_english_deidentified.csv | 1 | 280 |
411469_non_mbcdi_12_bilingual_spanish_deidentified.csv | 1 | 280 |
740623_non_mbcdi_12_bilingual_english_deidentified.csv | 42 | 288 |
740624_non_mbcdi_12_bilingual_spanish_deidentified.csv | 4 | 288 |
740625_non_mbcdi_12_english_deidentified.csv | 220 | 288 |
740626_non_mbcdi_18_bilingual_english_deidentified.csv | 61 | 287 |
740627_non_mbcdi_18_bilingual_spanish_deidentified.csv | 7 | 287 |
740628_non_mbcdi_18_english_deidentified.csv | 210 | 288 |
740629_non_mbcdi_24_english_deidentified.csv | 174 | 287 |
740630_non_mbcdi_24_bilingual_spanish_deidentified.csv | 3 | 287 |
740631_non_mbcdi_24_bilingual_english_deidentified.csv | 45 | 287 |
740631_non_mbcdi_24_bilingual_spanish_deidentified.csv | 26 | 287 |
The later forms (with higher form numbers–the leading integers in the file names) are the newer ones. These generally have the largest number of entries and have similar numbers of columns–either 287 or 288. Accordingly, we focus our cleaning efforts here first.
We start with the data files that have \(n=288\) columns.
df740623 <-
readr::read_csv(
"../data/csv/home_visit/non_mbcdi/deid/740623_non_mbcdi_12_bilingual_english_deidentified.csv",
show_col_types = FALSE
)
df740624 <-
readr::read_csv(
"../data/csv/home_visit/non_mbcdi/deid/740624_non_mbcdi_12_bilingual_spanish_deidentified.csv",
show_col_types = FALSE
)
sum(names(df740623) == names(df740624))
## [1] 288
df740625 <-
readr::read_csv(
"../data/csv/home_visit/non_mbcdi/deid/740625_non_mbcdi_12_english_deidentified.csv",
show_col_types = FALSE
)
sum(names(df740623) == names(df740625))
## [1] 288
df740628 <-
readr::read_csv(
"../data/csv/home_visit/non_mbcdi/deid/740628_non_mbcdi_18_english_deidentified.csv",
show_col_types = FALSE
)
sum(names(df740623) == names(df740628))
## [1] 288
So, four of the most recent data files with \(n=288\) columns can be aggregated without modification.
Let’s turn to the more recent files with \(n=287\) columns.
df740626 <-
readr::read_csv(
"../data/csv/home_visit/non_mbcdi/deid/740626_non_mbcdi_18_bilingual_english_deidentified.csv",
show_col_types = FALSE
)
df740627 <-
readr::read_csv(
"../data/csv/home_visit/non_mbcdi/deid/740627_non_mbcdi_18_bilingual_spanish_deidentified.csv",
show_col_types = FALSE
)
sum(names(df740626) == names(df740627))
## [1] 100
Where does the misalignment arise?
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [41] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [51] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [71] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [81] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [141] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [151] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [161] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [171] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [191] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [201] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [231] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [251] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [261] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [271] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
## [281] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
The misalignment arises somewhere near column 92.
df740629 <-
readr::read_csv(
"../data/csv/home_visit/non_mbcdi/deid/740629_non_mbcdi_24_english_deidentified.csv",
show_col_types = FALSE
)
sum(names(df740626) == names(df740629))
## [1] 287
So, df740626
and df740629
are aligned and can be merged.
df740630 <-
readr::read_csv(
"../data/csv/home_visit/non_mbcdi/deid/740630_non_mbcdi_24_bilingual_spanish_deidentified.csv",
show_col_types = FALSE
)
sum(names(df740626) == names(df740630))
## [1] 100
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [41] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [51] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [71] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [81] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [141] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [151] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [161] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [171] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [191] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [201] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [231] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [251] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [261] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [271] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
## [281] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
These files also fall out of alignment near column 92.
df740631 <-
readr::read_csv(
"../data/csv/home_visit/non_mbcdi/deid/740631_non_mbcdi_24_bilingual_english_deidentified.csv",
show_col_types = FALSE
)
sum(names(df740626) == names(df740631))
## [1] 100
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [41] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [51] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [71] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [81] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [141] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [151] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [161] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [171] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [191] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [201] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [231] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [251] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [261] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [271] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
## [281] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
And these files fall out of alignment near column 92.
Let’s see if df740627
, df740630
, and df740631
are aligned with one another.
## [1] 287
## [1] 287
Yes, they are. So, these three can be merged. We do that first, then address the discrepancies between aggregates.
‘Older’ forms
The “older” forms have varied numbers of columns. We focus on thos with data (n_vars > 0)
df307736 <-
read_csv(
"../data/csv/home_visit/non_mbcdi/deid/307736_non_mbcdi_18_english_deidentified.csv",
show_col_types = FALSE
)
df331453 <-
read_csv(
"../data/csv/home_visit/non_mbcdi/deid/331453_non_mbcdi_24_english_deidentified.csv",
show_col_types = FALSE
)
df331848 <-
read_csv(
"../data/csv/home_visit/non_mbcdi/deid/331848_non_mbcdi_12_english_deidentified.csv",
show_col_types = FALSE
)
df334099 <-
read_csv(
"../data/csv/home_visit/non_mbcdi/deid/334099_non_mbcdi_12_bilingual_english_deidentified.csv",
show_col_types = FALSE
)
df363349 <-
read_csv(
"../data/csv/home_visit/non_mbcdi/deid/363349_non_mbcdi_18_english_deidentified.csv",
show_col_types = FALSE
)
df363381 <-
read_csv(
"../data/csv/home_visit/non_mbcdi/deid/363381_non_mbcdi_24_english_deidentified.csv",
show_col_types = FALSE
)
df363431 <-
read_csv(
"../data/csv/home_visit/non_mbcdi/deid/363431_non_mbcdi_12_english_deidentified.csv",
show_col_types = FALSE
)
df408149 <-
read_csv(
"../data/csv/home_visit/non_mbcdi/deid/408149_non_mbcdi_24_bilingual_spanish_deidentified.csv",
show_col_types = FALSE
)
df411456 <-
read_csv(
"../data/csv/home_visit/non_mbcdi/deid/411456_non_mbcdi_12_bilingual_english_deidentified.csv",
show_col_types = FALSE
)
df411469 <-
read_csv(
"../data/csv/home_visit/non_mbcdi/deid/411469_non_mbcdi_12_bilingual_spanish_deidentified.csv",
show_col_types = FALSE
)
Let’s look at the two forms that have the same number of columns, \(n=274\), 307736 and 331453.
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [193] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [205] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [217] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [229] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [253] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [265] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [1] TRUE
So, these two are identical and could be merged.
tar_target(files_274_cols, stringr::str_detect(home_visit_remove_identifiers, "/(307736|331453)"))
tar_target(df_merge_274_cols, make_aggregate_data_file(home_visit_remove_identifiers[files_274_cols]))
How about the files with \(n=267\) columns, 331848 and 334099?
## [1] FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [11] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [21] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [31] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [41] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [51] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [81] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [91] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [141] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [151] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [161] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [171] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [191] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [201] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [231] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [251] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
## [261] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [1] TRUE
## [1] "group_combinedquestionnaires/participant_id"
## [2] "start"
## [3] "end"
## [4] "group_combinedquestionnaires/note_fillthisoutbeforestudy"
## [5] "group_combinedquestionnaires/site_id"
## [6] "group_combinedquestionnaires/subject_number"
## [1] "group_jo84c13/participant_id"
## [2] "start"
## [3] "end"
## [4] "group_jo84c13/note_fillthisoutbeforestudy"
## [5] "group_jo84c13/site_id"
## [6] "group_jo84c13/subject_number"
There is an odd difference in the group label, group_combinedquestionnaires
vs. group_jo84c13
.
Let’s try deleting the initial group labels and compare again.
n1 <- names(df331848)
n2 <- names(df334099)
names(df331848) %>% stringr::str_remove("group_combinedquestionnaires/") |> head()
## [1] "participant_id" "start"
## [3] "end" "note_fillthisoutbeforestudy"
## [5] "site_id" "subject_number"
names(df334099) %>% stringr::str_remove("group_jo84c13/") |> head()
## [1] "participant_id" "start"
## [3] "end" "note_fillthisoutbeforestudy"
## [5] "site_id" "subject_number"
That looks promising.
names(df331848) %>% stringr::str_remove("group_combinedquestionnaires/") -> n1
names(df334099) %>% stringr::str_remove("group_jo84c13/") -> n2
n1 == n2
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
## [11] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [21] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [31] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [41] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [51] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [81] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [91] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [141] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [151] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [161] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [171] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [191] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [201] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [221] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [231] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [251] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
## [261] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
cbind(n1[8:15], n2[8:15])
## [,1] [,2]
## [1,] "test_date" "test_date"
## [2,] "child_sex" "child_birth_date"
## [3,] "age_group" "child_sex"
## [4,] "language_child" "age_group"
## [5,] "language_child/english" "language_child"
## [6,] "language_child/spanish" "language_child/english"
## [7,] "language_instruction" "language_child/spanish"
## [8,] "acknowledge_site" "language_instruction"
n2
or df334099
has a child_birth_date
field in position 9 that the other data frame does not have.
n1 |> str_detect("child_birth_date") |> sum()
## [1] 0
If we delete that variable, the data frames will no longer have the same number of columns. Let’s explore that anyway.
n2_2 <- n2[-9]
n1 == n2_2
## Warning in n1 == n2_2: longer object length is not a multiple of
## shorter object length
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
## [31] TRUE TRUE FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE
## [41] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [51] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
## [71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [81] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [91] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [141] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [151] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [161] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [171] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [191] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [201] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE FALSE TRUE TRUE TRUE TRUE FALSE FALSE FALSE
## [231] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [251] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [261] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
That helps a bit, but we diverge around column 29.
cbind(n1[28:51], n2_2[28:51])
## [,1]
## [1,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested"
## [2,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/birthhospital"
## [3,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/afterhome"
## [4,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/no"
## [5,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/refused"
## [6,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/donotknow"
## [7,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested"
## [8,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/birthhospital"
## [9,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/afterhome"
## [10,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/no"
## [11,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/refused"
## [12,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/donotknow"
## [13,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/allergies"
## [14,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/ear_infection"
## [15,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/asthma"
## [16,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/respiratory"
## [17,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/gastrointestinal"
## [18,] "group_homevisitquestionnaires/group_health/group_general_health/comments_allergy_etc"
## [19,] "group_homevisitquestionnaires/group_health/group_general_health/child_injury_times"
## [20,] "group_homevisitquestionnaires/group_health/group_general_health/comment_injury"
## [21,] "group_homevisitquestionnaires/group_health/group_general_health/comments_general_health"
## [22,] "group_homevisitquestionnaires/group_health/group_prenatal/instructions_prenatal"
## [23,] "group_homevisitquestionnaires/group_health/group_prenatal/prenatal_care"
## [24,] "group_homevisitquestionnaires/group_health/group_prenatal/comments_prenatal"
## [,2]
## [1,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested"
## [2,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/yes__in_the_bi"
## [3,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/yes__after_goi"
## [4,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/no"
## [5,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/refused"
## [6,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/don_t_know"
## [7,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested"
## [8,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/yes__in_the_bi"
## [9,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/yes__after_goi"
## [10,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/no"
## [11,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/refused"
## [12,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/don_t_know"
## [13,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/child_allergies_infections_ill_header"
## [14,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/allergies"
## [15,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/ear_infection"
## [16,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/asthma"
## [17,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/respiratory"
## [18,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/gastrointestinal"
## [19,] "group_homevisitquestionnaires/group_health/group_general_health/comments_allergy_etc"
## [20,] "group_homevisitquestionnaires/group_health/group_general_health/child_injury_times"
## [21,] "group_homevisitquestionnaires/group_health/group_general_health/comment_injury"
## [22,] "group_homevisitquestionnaires/group_health/group_general_health/comments_general_health"
## [23,] "group_homevisitquestionnaires/group_health/group_prenatal/instructions_prenatal"
## [24,] "group_homevisitquestionnaires/group_health/group_prenatal/comments_prenatal"
These question labels looks very similar.
There are just some minor changes in the variable names.
n2_2
has an extra variable in column 40.
n2_3 <- n2_2[-40]
Then, we can rename some of the columns in n2_3
using corresponding names from n1
.
n2_3 |> stringr::str_replace("yes__in_the_bi", "birthhospital") |> stringr::str_replace("yes__after_goi", "afterhome") |> stringr::str_replace("don_t_know", "donotknow") -> n2_4
n1 == n2_4
## Warning in n1 == n2_4: longer object length is not a multiple of
## shorter object length
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [41] FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE FALSE
## [51] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [81] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [91] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [141] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [151] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [161] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [171] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [191] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [201] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE
## [231] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [251] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [261] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
cbind(n1[39:51], n2_4[39:51])
## [,1]
## [1,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/donotknow"
## [2,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/allergies"
## [3,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/ear_infection"
## [4,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/asthma"
## [5,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/respiratory"
## [6,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/gastrointestinal"
## [7,] "group_homevisitquestionnaires/group_health/group_general_health/comments_allergy_etc"
## [8,] "group_homevisitquestionnaires/group_health/group_general_health/child_injury_times"
## [9,] "group_homevisitquestionnaires/group_health/group_general_health/comment_injury"
## [10,] "group_homevisitquestionnaires/group_health/group_general_health/comments_general_health"
## [11,] "group_homevisitquestionnaires/group_health/group_prenatal/instructions_prenatal"
## [12,] "group_homevisitquestionnaires/group_health/group_prenatal/prenatal_care"
## [13,] "group_homevisitquestionnaires/group_health/group_prenatal/comments_prenatal"
## [,2]
## [1,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/donotknow"
## [2,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/allergies"
## [3,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/ear_infection"
## [4,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/asthma"
## [5,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/respiratory"
## [6,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/gastrointestinal"
## [7,] "group_homevisitquestionnaires/group_health/group_general_health/comments_allergy_etc"
## [8,] "group_homevisitquestionnaires/group_health/group_general_health/child_injury_times"
## [9,] "group_homevisitquestionnaires/group_health/group_general_health/comment_injury"
## [10,] "group_homevisitquestionnaires/group_health/group_general_health/comments_general_health"
## [11,] "group_homevisitquestionnaires/group_health/group_prenatal/instructions_prenatal"
## [12,] "group_homevisitquestionnaires/group_health/group_prenatal/comments_prenatal"
## [13,] "group_homevisitquestionnaires/group_health/group_smoking/pregnant_smoking"
n1
has a group_medicalprof
label from allergies
through gastrointestinal
; n2_4
has child_allergies_infections_ill
for the same questions.
n2_4 |> stringr::str_replace("child_allergies_infections_ill", "group_medicalprof") -> n2_5
n1 == n2_5
## Warning in n1 == n2_5: longer object length is not a multiple of
## shorter object length
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [41] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [51] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [81] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [91] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [141] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [151] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [161] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [171] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [191] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [201] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE
## [231] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [251] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [261] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
cbind(n1[49:60], n2_5[49:60])
## [,1]
## [1,] "group_homevisitquestionnaires/group_health/group_prenatal/instructions_prenatal"
## [2,] "group_homevisitquestionnaires/group_health/group_prenatal/prenatal_care"
## [3,] "group_homevisitquestionnaires/group_health/group_prenatal/comments_prenatal"
## [4,] "group_homevisitquestionnaires/group_health/group_smoking/pregnant_smoking"
## [5,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_trimester_1"
## [6,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_trimester_2"
## [7,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_trimester_3"
## [8,] "group_homevisitquestionnaires/group_health/group_smoking/mom_smoking_now"
## [9,] "group_homevisitquestionnaires/group_health/group_smoking/mom_smoking_now_amount"
## [10,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_house"
## [11,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_car"
## [12,] "group_homevisitquestionnaires/group_health/group_smoking/comments_smoking"
## [,2]
## [1,] "group_homevisitquestionnaires/group_health/group_prenatal/instructions_prenatal"
## [2,] "group_homevisitquestionnaires/group_health/group_prenatal/comments_prenatal"
## [3,] "group_homevisitquestionnaires/group_health/group_smoking/pregnant_smoking"
## [4,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_trimester_1"
## [5,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_trimester_2"
## [6,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_trimester_3"
## [7,] "group_homevisitquestionnaires/group_health/group_smoking/mom_smoking_now"
## [8,] "group_homevisitquestionnaires/group_health/group_smoking/mom_smoking_now_amount"
## [9,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_house"
## [10,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_car"
## [11,] "group_homevisitquestionnaires/group_health/group_smoking/comments_smoking"
## [12,] "group_homevisitquestionnaires/group_health/group_drinking/pregnant_drinking"
It looks like these could be reconciled by deleting prenatal_care
from n1
.
n1_2 <- n1[-50]
n1_2 == n2_5
## Warning in n1_2 == n2_5: longer object length is not a multiple
## of shorter object length
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [41] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [51] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## [71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [81] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [91] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [141] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [151] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [161] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [171] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [191] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [201] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE
## [231] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [251] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [261] FALSE FALSE FALSE FALSE FALSE FALSE
cbind(n1_2[64:75], n2_5[64:75])
## [,1]
## [1,] "group_homevisitquestionnaires/group_health/group_drinking/comments_drinking"
## [2,] "group_homevisitquestionnaires/group_health/group_phq4/note_phq4"
## [3,] "group_homevisitquestionnaires/group_health/group_phq4/group_phq4_001/phq4_nervous"
## [4,] "group_homevisitquestionnaires/group_health/group_phq4/group_phq4_001/phq4_worrying"
## [5,] "group_homevisitquestionnaires/group_health/group_phq4/group_phq4_001/phq4_littleinterest"
## [6,] "group_homevisitquestionnaires/group_health/group_phq4/group_phq4_001/phq4_down"
## [7,] "group_homevisitquestionnaires/group_health/group_phq4/comments_phq4"
## [8,] "group_homevisitquestionnaires/group_rothbart/group_rothbartinstructions/rothbart_instructions1"
## [9,] "group_homevisitquestionnaires/group_rothbart/group_rothbartinstructions/rothbart_instructions2"
## [10,] "group_homevisitquestionnaires/group_rothbart/group_rothbart_001/rothbart_questions/rothbart_unfamiliarperson"
## [11,] "group_homevisitquestionnaires/group_rothbart/group_rothbart_001/rothbart_questions/rothbart_troubletask"
## [12,] "group_homevisitquestionnaires/group_rothbart/group_rothbart_001/rothbart_questions/rothbart_companyofchild"
## [,2]
## [1,] "group_homevisitquestionnaires/group_health/group_drinking/comments_drinking"
## [2,] "group_homevisitquestionnaires/group_health/group_phq4/Experimenter_These_stions_are_about_you"
## [3,] "group_homevisitquestionnaires/group_health/group_phq4/comments_phq4"
## [4,] "group_homevisitquestionnaires/group_rothbart/group_dd2kz32/instructor_rothbart"
## [5,] "group_homevisitquestionnaires/group_rothbart/group_dd2kz32/instructions_rothbart2"
## [6,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/rothbart_questions_header"
## [7,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/When_approached_by_a_ld_cling_to_a_parent"
## [8,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/While_having_trouble_get_easily_irritated"
## [9,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/When_a_familiar_chil_company_of_the_child"
## [10,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/When_offered_a_choic_uickly_and_go_for_it"
## [11,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/During_daily_or_even_eing_quietly_sung_to"
## [12,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/While_playing_outdoo_and_excitement_of_it"
It looks like the phq4 is not in n2_5
.
Let’s check.
n2_5 |> stringr::str_detect("phq4") |> sum()
## [1] 2
Yes, there are only two PHQ4-related questions in df334099
.
df334099 |> names() |> stringr::str_detect("phq4") |> sum()
## [1] 2
This path of reconciliation does not appear fruitful.
Make aggregate files
non-MB-CDI files with \(n=288\) columns
tar_target(files_288_cols,
stringr::str_detect(home_visit_remove_identifiers,
"2[3458]_non_mbcdi.*_deidentified\\.csv")
)
tar_target(df_merge_288_cols,
make_aggregate_data_file(
home_visit_remove_identifiers[files_288_cols])
)
non-MB-CDI files with \(n=287\) columns
tar_target(files_287_cols_1,
stringr::str_detect(home_visit_remove_identifiers,
"2[69]_non_mbcdi.*_deidentified\\.csv")),
tar_target(files_287_cols_2,
stringr::str_detect(home_visit_remove_identifiers,
"(740627|740630|740631)_non.*_deidentified\\.csv")),
tar_target(df_merge_287_cols_1,
make_aggregate_data_file(
home_visit_remove_identifiers[files_287_cols_1])),
tar_target(df_merge_287_cols_2,
make_aggregate_data_file(
home_visit_remove_identifiers[files_287_cols_2])),
Examine groups with \(n=287\) cols
We focus on the starting column where the column names diverge, column 92.
targets::tar_load(df_merge_287_cols_1, store="../_targets")
targets::tar_load(df_merge_287_cols_2, store="../_targets")
names(df_merge_287_cols_1)[92]
## [1] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_feeding_nutrition.instructions_feeding"
names(df_merge_287_cols_2)[92]
## [1] "group_combinedquestionnaires.group_homevisitquestionnaires.group_locomotor_milestones.group_health.group_feeding_nutrition.instructions_feeding"
There is an erroneous group_locomotor_milestones.
in the df_merge_287_cols_2
column name.
A bit of sleuthing determines that this group_locomotor_milestones.
label is characteristic of columns 92 to 273.
names(df_merge_287_cols_2)[92:273] |> stringr::str_detect(pattern = "group_locomotor_milestones")
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE
The following should fix this.
old_names <- names(df_merge_287_cols_2)
new_names <- old_names
new_names[92:273] <-
stringr::str_remove(new_names[92:273], "group_locomotor_milestones\\.")
names(df_merge_287_cols_2) <- new_names
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [41] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [51] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [71] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [81] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [101] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [111] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [141] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [151] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [161] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [171] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [191] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [201] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [221] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [231] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [251] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [261] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
## [271] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
## [281] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
We have a second problem with columns from 114 to 210.
## [,1]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_vision_tested.donotknow"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_vision_tested.donotknow"
## [,2]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.doctor_told_you"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.allergies"
## [,3]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.allergies"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.ear_infection"
One of the problems has to do with column 114. There is a question ending doctor_told_you
in names(df_merge_287_cols_1)
but not in names(df_merge_287_cols_2)
.
names(df_merge_287_cols_1) |> stringr::str_detect(pattern = "doctor_told_you") |> sum()
## [1] 1
names(df_merge_287_cols_2) |> stringr::str_detect(pattern = "doctor_told_you") |> sum()
## [1] 0
Deleting this question would create additional misalignments and further problems. We cannot proceed without further discussion with our team.
For now, let’s generate an array with all of the remaining differences in column names.
## [1] 103
## [,1]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.doctor_told_you"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.allergies"
## [,2]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.allergies"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.ear_infection"
## [,3]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.ear_infection"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.asthma"
## [,4]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.asthma"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.respiratory"
## [,5]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.respiratory"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.gastrointestinal"
## [,6]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.gastrointestinal"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.comments_allergy_etc"
## [,7]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.comments_allergy_etc"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_injury_times"
## [,8]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_injury_times"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.comment_injury"
## [,9]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.comment_injury"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.comments_general_health"
## [,10]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.comments_general_health"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_prenatal.instructions_prenatal"
## [,11]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_prenatal.instructions_prenatal"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_prenatal.prenatal_care"
## [,12]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_prenatal.prenatal_care"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_prenatal.comments_prenatal"
## [,13]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_prenatal.comments_prenatal"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.pregnant_smoking"
## [,14]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.pregnant_smoking"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_trimester_1"
## [,15]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_trimester_1"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_trimester_2"
## [,16]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_trimester_2"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_trimester_3"
## [,17]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_trimester_3"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.mom_smoking_now"
## [,18]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.mom_smoking_now"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.mom_smoking_now_amount"
## [,19]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.mom_smoking_now_amount"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_house"
## [,20]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_house"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_car"
## [,21]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_car"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.comments_smoking"
## [,22]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.comments_smoking"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.pregnant_drinking"
## [,23]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.pregnant_drinking"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.drinking_trimester_1"
## [,24]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.drinking_trimester_1"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.drinking_trimester_2"
## [,25]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.drinking_trimester_2"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.drinking_trimester_3"
## [,26]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.drinking_trimester_3"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.comments_drinking"
## [,27]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.comments_drinking"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.note_phq4"
## [,28]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.note_phq4"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.instructions_phq4"
## [,29]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.instructions_phq4"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_nervous"
## [,30]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_nervous"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_worrying"
## [,31]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_worrying"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_littleinterest"
## [,32]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_littleinterest"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_down"
## [,33]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_down"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.comments_phq4"
## [,34]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.comments_phq4"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.instructions_rothbart1"
## [,35]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.instructions_rothbart1"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.instructions_rothbart2"
## [,36]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.instructions_rothbart2"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.instructions_rothbart3"
## [,37]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.instructions_rothbart3"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_unfamiliarperson"
## [,38]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_unfamiliarperson"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_troubletask"
## [,39]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_troubletask"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_companyofchild"
## [,40]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_companyofchild"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_choiceactivities"
## [,41]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_choiceactivities"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_quietlysung"
## [,42]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_quietlysung"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_playingoutdoors"
## [,43]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_playingoutdoors"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_morethan10"
## [,44]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_morethan10"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_respondingremarks"
## [,45]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_respondingremarks"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_excitedlovedadults"
## [,46]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_excitedlovedadults"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_fiddlehair"
## [,47]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_fiddlehair"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_roughrowdy"
## [,48]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_roughrowdy"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_rockedhugged"
## [,49]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_rockedhugged"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_involvednewactivity"
## [,50]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_involvednewactivity"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_tirequickly"
## [,51]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_tirequickly"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_callattention"
## [,52]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_callattention"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_tags"
## [,53]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_tags"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_noisyenvironment"
## [,54]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_noisyenvironment"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_energy"
## [,55]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_energy"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_vehicles"
## [,56]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_vehicles"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_active"
## [,57]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_active"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_forbidden"
## [,58]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_forbidden"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_sadlytearful"
## [,59]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_sadlytearful"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_downblue"
## [,60]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_downblue"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_runhouse"
## [,61]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_runhouse"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_excitingevent"
## [,62]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_excitingevent"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_tempertantrum"
## [,63]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_tempertantrum"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_waitpatiently"
## [,64]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_waitpatiently"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_rockedsmile"
## [,65]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_rockedsmile"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_mold"
## [,66]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_mold"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_interactadult"
## [,67]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_interactadult"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_careful"
## [,68]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_careful"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_enternewplace"
## [,69]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_enternewplace"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_crymorethan3"
## [,70]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_crymorethan3"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_easilysoothed"
## [,71]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_easilysoothed"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_busyother"
## [,72]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_busyother"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_differentpeople"
## [,73]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_differentpeople"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.comments_rothbart"
## [,74]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.comments_rothbart"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.mediause_instructions1"
## [,75]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.mediause_instructions1"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology"
## [,76]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.tv"
## [,77]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.tv"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.dvd"
## [,78]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.dvd"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.computer"
## [,79]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.computer"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.ipad"
## [,80]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.ipad"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.educationalgame"
## [,81]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.educationalgame"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.videogame"
## [,82]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.videogame"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_tv"
## [,83]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_tv"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.tv_how"
## [,84]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.tv_how"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_dvd"
## [,85]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_dvd"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.dvd_how"
## [,86]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.dvd_how"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_computer"
## [,87]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_computer"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.computer_how"
## [,88]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.computer_how"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_ipad"
## [,89]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_ipad"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.ipad_how"
## [,90]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.ipad_how"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_educational"
## [,91]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_educational"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.educational_how"
## [,92]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.educational_how"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_videogame"
## [,93]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_videogame"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.videogame_how"
## [,94]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.videogame_how"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.tv_hours_per_day"
## [,95]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.tv_hours_per_day"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.note_tv_hours_per_day"
## [,96]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.note_tv_hours_per_day"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.instructions_technology_use"
## [,97]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.instructions_technology_use"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.technology_use_scale"
## [,98]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_typical_day.instructions_typicalday"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_typical_day.instructions_typical_day"
## [,99]
## [1,] "group_combinedquestionnaires.group_databrary.note_databraryspiel1"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_databraryspiel1"
## [,100]
## [1,] "group_combinedquestionnaires.group_databrary.note_databraryspiel2"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_databraryspiel2"
## [,101]
## [1,] "group_combinedquestionnaires.group_databrary.note_databraryspiel3"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_databraryspiel3"
## [,102]
## [1,] "group_combinedquestionnaires.group_databrary.acknowledge_databrary"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.acknowledge_databrary"
## [,103]
## [1,] "group_combinedquestionnaires.group_databrary.note_saveasdraft"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_saveasdraft"
Visual inspection suggests that these are similar with the following deviations:
- As noted,
df_merge_287_cols_1
has a column endingdoctor_told_you
that is not present indf_merge_287_cols_2
. -
df_merge_287_cols_2
has a column endingtechnology_use_scale
that is not present in thedf_merge_287_cols_1
- There are a set of fields in
group_databrary
that do not align exactly. We will almost certainly delete these, so the misalignment is not a huge problem.
As an exploration, let’s see if we can reconcile these by deleting the non-aligning columns.
df1 <- df_merge_287_cols_1
df2 <- df_merge_287_cols_2
df1 <- df1 %>%
dplyr::select(., -contains('doctor_told_you'))
df2 <- df2 %>%
dplyr::select(., -contains('technology_use_scale'))
old_names <- names(df2)
new_names <- old_names
new_names[92:273] <- stringr::str_remove(new_names[92:273], "group_locomotor_milestones\\.")
names(df2) <- new_names
names(df1) == names(df2)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [41] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [51] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [71] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [81] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [101] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [111] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [131] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [141] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [161] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [171] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [191] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [201] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [221] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [231] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [251] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [261] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [271] TRUE TRUE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE
## [281] TRUE TRUE TRUE TRUE TRUE TRUE
This looks promising.
## [,1]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_typical_day.instructions_typicalday"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_typical_day.instructions_typical_day"
This is easily fixed.
## [,1]
## [1,] "group_combinedquestionnaires.group_databrary.note_databraryspiel1"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_databraryspiel1"
## [,2]
## [1,] "group_combinedquestionnaires.group_databrary.note_databraryspiel2"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_databraryspiel2"
## [,3]
## [1,] "group_combinedquestionnaires.group_databrary.note_databraryspiel3"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_databraryspiel3"
The last misalignments relate to Databrary fields.
df1 <- df1 %>%
dplyr::select(., -contains('group_databrary'))
df2 <- df2 %>%
dplyr::select(., -contains('group_databrary'))
names(df1) == names(df2)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [193] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [205] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [217] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [229] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [253] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [265] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [277] TRUE TRUE TRUE TRUE TRUE
Success!
Combining the two groups of datasets
Now, let’s go back to the data frame with 288 cols and see if we can bring these into alignment.
targets::tar_load(df_merge_288_cols, store="../_targets")
df3 <- df_merge_288_cols
df3 <- df3 %>%
dplyr::select(., -contains('group_databrary'))
c(dim(df1), dim(df2), dim(df3))
## [1] 235 281 55 281 476 283
## Warning in names(df1) == names(df3): longer object length is not
## a multiple of shorter object length
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [41] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [51] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [71] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [81] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [101] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [111] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [141] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [151] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [161] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [171] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [191] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [201] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [231] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [251] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [261] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [271] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [281] FALSE FALSE FALSE
## [,1]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.allergies"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.doctor_told_you"
## [,2]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.ear_infection"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.allergies"
Once again, there appears to be a problem with the ‘doctor_told_you’ field. We’ll delete it to see if this fixes one of the problems.
## Warning in names(df1) == names(df3): longer object length is not
## a multiple of shorter object length
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [41] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [51] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [71] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [81] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [101] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [111] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [131] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [141] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [161] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [171] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [191] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [201] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [231] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [251] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [261] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [271] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [281] FALSE FALSE
We still have misalignments at column 210.
## [,1]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.meals"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.technology_use_scale"
## [,2]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.playtime"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.meals"
## [,3]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.bedtime"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.playtime"
## [,4]
## [1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.transportation"
## [2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.bedtime"
The ’technology_use_scale` exists in one but not the other.
## [,1] [,2]
## [1,] 235 281
## [2,] 476 281
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [41] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [51] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [71] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [81] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [101] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [111] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [131] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [141] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [161] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [171] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [191] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [201] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [221] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [231] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [251] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [261] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [271] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [281] TRUE
Future versions of the workflow will need to handle this more elegantly.
Option 1: Fix the underlying forms.
Option 2: Add the ‘missing’ columns as NA in post-processing.
For now, I’m going to create functions that align these data frames.
These are incorporated into R/functions.R
so we do not source them again here.
remove_technology_use_scale <- function(df) {
dplyr::select(df, -contains('technology_use_scale'))
}
remove_doctor_told_you <- function(df) {
dplyr::select(df, -contains('doctor_told_you'))
}
remove_databrary_fields <- function(df) {
dplyr::select(df, -contains('group_databrary'))
}
reconcile_typicalday <- function(df) {
names(df) <- stringr::str_replace_all(names(df), 'typicalday', 'typical_day')
df
}
remove_permissive_locomotor_milestones_label <- function(df) {
old_names <- names(df)
new_names <- old_names
contains_locomotor <-
stringr::str_detect(new_names, pattern = "locomotor_milestones.*health|division|rothbart|mediause|pets|typical|acknowledge")
new_names[contains_locomotor] <-
stringr::str_remove(new_names[contains_locomotor], "group_locomotor_milestones\\.")
names(df) <- new_names
df
}
remove_X_meta_cols <- function(df) {
dplyr::select(df, -contains("X_"), -contains("meta.instanceID"))
}
remove_redundant_group_labels <- function(df) {
names(df) <- stringr::str_remove_all(names(df), 'group_homevisitquestionnaires\\.')
names(df) <- stringr::str_remove_all(names(df), 'group_combinedquestionnaires\\.')
df
}
clean_dfs <- function(df) {
df %>%
reconcile_typicalday() %>%
remove_technology_use_scale() %>%
remove_doctor_told_you() %>%
remove_permissive_locomotor_milestones_label() %>%
remove_databrary_fields() %>%
remove_X_meta_cols() %>%
remove_redundant_group_labels()
}
Let’s test this workflow with the unmodified files.
targets::tar_load(df_merge_287_cols_1, store="../_targets")
targets::tar_load(df_merge_287_cols_2, store="../_targets")
targets::tar_load(df_merge_288_cols, store="../_targets")
df1m <- clean_dfs(df_merge_287_cols_1)
dim(df1m)
## [1] 235 272
df2m <- clean_dfs(df_merge_287_cols_2)
dim(df2m)
## [1] 55 272
df3m <- clean_dfs(df_merge_288_cols)
dim(df3m)
## [1] 476 272
## [1] 272
## [1] 272
Merging files at last
df <- rbind(df1m, df2m, df3m)
As of 2022-12-15, the above has now been incorporated into R/functions.R
and into the _targets.R
workflow.
Merge with Databrary info
For each session (row) in the merged data frame, we pull data from the associated Databrary volume and session. These data are merged with that drawn from KBT.
tar_target(
home_visit_w_databrary_df,
add_databrary_info_to_home_visit_df(home_visit_df)
)
The add_databrary_info_to_home_visit()
function in R/functions.R
does most of the work.