<-
fl list.files(
file.path(here::here(), "R"),
"^load|^kobo_|^file_|^screen_|^ecbq_|^health_|^databrary|^home|^make|^export|^post_visit|CONSTANTS|utils",
full.names = TRUE
)::walk(fl, source)
purrr
suppressPackageStartupMessages(library(tidyverse))
Aggregate
About
This page documents the process for combining separate files into aggregate files containing data from many participants.
Setup
Remove identifiers
The non-MBCDI file contains the identifiers, so that is the target of this removal process.
Note that we have added data
to .gitignore
in protocol/
, the root directory for the HTML protocol, so none of the data files should be made available via git or GitHub. This also means that there is no version control being done on raw data files themselves.
tar_target(
home_visit_remove_identifiers,
purrr::map_chr(
home_visit_non_mbcdi,
open_deidentify_save,
csv_save_dir = "data/csv/home_visit/non_mbcdi/deid",
these_questions = 'non_mbcdi'
)
)
<- list.files(file.path(here::here(), "data/csv/home_visit/non_mbcdi/raw"),
home_visit_non_mbcdi "\\.csv$", full.names = TRUE)
::map_chr(
purrr
home_visit_non_mbcdi,
file_open_deidentify_save,csv_save_dir = file.path(here::here(), "data/csv/home_visit/non_mbcdi/deid"),
these_questions = 'non_mbcdi'
)
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/1136694_non_mbcdi_18_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/1151489_non_mbcdi_18_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/307736_non_mbcdi_18_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/331453_non_mbcdi_24_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/331848_non_mbcdi_12_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/334099_non_mbcdi_12_bilingual_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/363349_non_mbcdi_18_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/363381_non_mbcdi_24_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/363431_non_mbcdi_12_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/363465_non_mbcdi_24_bilingual_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/363466_non_mbcdi_18_bilingual_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/408149_non_mbcdi_24_bilingual_spanish_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/411388_non_mbcdi_18_bilingual_spanish_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/411456_non_mbcdi_12_bilingual_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/411469_non_mbcdi_12_bilingual_spanish_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740623_non_mbcdi_12_bilingual_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740624_non_mbcdi_12_bilingual_spanish_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740625_non_mbcdi_12_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740626_non_mbcdi_18_bilingual_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740627_non_mbcdi_18_bilingual_spanish_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740628_non_mbcdi_18_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740629_non_mbcdi_24_english_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740630_non_mbcdi_24_bilingual_spanish_deidentified.csv`
Saved `/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740631_non_mbcdi_24_bilingual_english_deidentified.csv`
[1] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/1136694_non_mbcdi_18_english_deidentified.csv"
[2] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/1151489_non_mbcdi_18_english_deidentified.csv"
[3] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/307736_non_mbcdi_18_english_deidentified.csv"
[4] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/331453_non_mbcdi_24_english_deidentified.csv"
[5] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/331848_non_mbcdi_12_english_deidentified.csv"
[6] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/334099_non_mbcdi_12_bilingual_english_deidentified.csv"
[7] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/363349_non_mbcdi_18_english_deidentified.csv"
[8] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/363381_non_mbcdi_24_english_deidentified.csv"
[9] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/363431_non_mbcdi_12_english_deidentified.csv"
[10] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/363465_non_mbcdi_24_bilingual_english_deidentified.csv"
[11] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/363466_non_mbcdi_18_bilingual_english_deidentified.csv"
[12] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/408149_non_mbcdi_24_bilingual_spanish_deidentified.csv"
[13] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/411388_non_mbcdi_18_bilingual_spanish_deidentified.csv"
[14] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/411456_non_mbcdi_12_bilingual_english_deidentified.csv"
[15] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/411469_non_mbcdi_12_bilingual_spanish_deidentified.csv"
[16] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740623_non_mbcdi_12_bilingual_english_deidentified.csv"
[17] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740624_non_mbcdi_12_bilingual_spanish_deidentified.csv"
[18] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740625_non_mbcdi_12_english_deidentified.csv"
[19] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740626_non_mbcdi_18_bilingual_english_deidentified.csv"
[20] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740627_non_mbcdi_18_bilingual_spanish_deidentified.csv"
[21] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740628_non_mbcdi_18_english_deidentified.csv"
[22] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740629_non_mbcdi_24_english_deidentified.csv"
[23] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740630_non_mbcdi_24_bilingual_spanish_deidentified.csv"
[24] "/Users/rog1/rrr/KoBoToolbox/data/csv/home_visit/non_mbcdi/deid/740631_non_mbcdi_24_bilingual_english_deidentified.csv"
Quality assurance (QA) reviews
MB-CDI files
To be completed.
Non-MB-CDI files
Create a helper function to create a data set with summary information about the data files.
<- function(fn) {
summarize_non_mbcdi_qs stopifnot(is.character(fn))
if (!file.exists(fn)) {
stop('File not found `', fn, '`')
else {
} <- readr::read_csv(fn, show_col_types = FALSE)
df if (!is.data.frame(df)) {
stop('Error reading data frame')
else {
} <-
out_df tibble(
file_name = basename(fn),
n_rows = dim(df)[1],
n_vars = dim(df)[2]
)::arrange(out_df, file_name)
dplyr
}
} }
Select the de-identified CSVs to examine.
<-
fl list.files(
file.path(here::here(), "data/csv/home_visit/non_mbcdi/deid"),
'^[0-9]+_non_mbcdi_[12|18|24].*deidentified',
full.names = TRUE
)
<- purrr::map_df(fl, summarize_non_mbcdi_qs)
PLAY_forms
%>%
PLAY_forms ::kable(., format = 'html') %>%
knitr::kable_classic() kableExtra
file_name | n_rows | n_vars |
---|---|---|
1136694_non_mbcdi_18_english_deidentified.csv | 0 | 288 |
1151489_non_mbcdi_18_english_deidentified.csv | 0 | 286 |
307736_non_mbcdi_18_english_deidentified.csv | 4 | 274 |
331453_non_mbcdi_24_english_deidentified.csv | 3 | 274 |
331848_non_mbcdi_12_english_deidentified.csv | 4 | 267 |
334099_non_mbcdi_12_bilingual_english_deidentified.csv | 1 | 267 |
363349_non_mbcdi_18_english_deidentified.csv | 9 | 280 |
363381_non_mbcdi_24_english_deidentified.csv | 8 | 280 |
363431_non_mbcdi_12_english_deidentified.csv | 10 | 281 |
363465_non_mbcdi_24_bilingual_english_deidentified.csv | 0 | 280 |
363466_non_mbcdi_18_bilingual_english_deidentified.csv | 0 | 280 |
408149_non_mbcdi_24_bilingual_spanish_deidentified.csv | 1 | 280 |
411388_non_mbcdi_18_bilingual_spanish_deidentified.csv | 0 | 280 |
411456_non_mbcdi_12_bilingual_english_deidentified.csv | 1 | 280 |
411469_non_mbcdi_12_bilingual_spanish_deidentified.csv | 1 | 280 |
740623_non_mbcdi_12_bilingual_english_deidentified.csv | 46 | 288 |
740624_non_mbcdi_12_bilingual_spanish_deidentified.csv | 4 | 288 |
740625_non_mbcdi_12_english_deidentified.csv | 238 | 288 |
740626_non_mbcdi_18_bilingual_english_deidentified.csv | 61 | 287 |
740627_non_mbcdi_18_bilingual_spanish_deidentified.csv | 7 | 287 |
740628_non_mbcdi_18_english_deidentified.csv | 223 | 288 |
740629_non_mbcdi_24_english_deidentified.csv | 181 | 287 |
740630_non_mbcdi_24_bilingual_spanish_deidentified.csv | 4 | 287 |
740631_non_mbcdi_24_bilingual_english_deidentified.csv | 47 | 287 |
The later forms (with higher form numbers–the leading integers in the file names) are the newer ones. These generally have the largest number of entries and have similar numbers of columns–either 287 or 288. Accordingly, we focus our cleaning efforts here first.
We start with the data files that have \(n=288\) columns.
<-
df740623 ::read_csv(
readrfile.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/740623_non_mbcdi_12_bilingual_english_deidentified.csv"
),show_col_types = FALSE
)
<-
df740624 ::read_csv(
readrfile.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/740624_non_mbcdi_12_bilingual_spanish_deidentified.csv"
),show_col_types = FALSE
)
sum(names(df740623) == names(df740624))
[1] 288
<-
df740625 ::read_csv(
readrfile.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/740625_non_mbcdi_12_english_deidentified.csv"
),show_col_types = FALSE
)
sum(names(df740623) == names(df740625))
[1] 288
<-
df740628 ::read_csv(
readrfile.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/740628_non_mbcdi_18_english_deidentified.csv"
),show_col_types = FALSE
)
sum(names(df740623) == names(df740628))
[1] 288
So, four of the most recent data files with \(n=288\) columns can be aggregated without modification.
Let’s turn to the more recent files with \(n=287\) columns.
<-
df740626 ::read_csv(
readrfile.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/740626_non_mbcdi_18_bilingual_english_deidentified.csv"
),show_col_types = FALSE
)
<-
df740627 ::read_csv(
readrfile.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/740627_non_mbcdi_18_bilingual_spanish_deidentified.csv"
),show_col_types = FALSE
)
sum(names(df740626) == names(df740627))
[1] 100
Where does the misalignment arise?
names(df740626) == names(df740627)
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
[97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[277] FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
The misalignment arises somewhere near column 92.
<-
df740629 ::read_csv(
readrfile.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/740629_non_mbcdi_24_english_deidentified.csv"
),show_col_types = FALSE
)
sum(names(df740626) == names(df740629))
[1] 287
So, df740626
and df740629
are aligned and can be merged.
<-
df740630 ::read_csv(
readrfile.path(here::here(), "data/csv/home_visit/non_mbcdi/deid/740630_non_mbcdi_24_bilingual_spanish_deidentified.csv"),
show_col_types = FALSE
)
sum(names(df740626) == names(df740630))
[1] 100
names(df740626) == names(df740630)
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
[97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[277] FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
These files also fall out of alignment near column 92.
<-
df740631 ::read_csv(
readrfile.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/740631_non_mbcdi_24_bilingual_english_deidentified.csv"
),show_col_types = FALSE
)
sum(names(df740626) == names(df740631))
[1] 100
names(df740626) == names(df740631)
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
[97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[277] FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
And these files fall out of alignment near column 92.
Let’s see if df740627
, df740630
, and df740631
are aligned with one another.
sum(names(df740627) == names(df740630))
[1] 287
sum(names(df740627) == names(df740631))
[1] 287
Yes, they are. So, these three can be merged. We do that first, then address the discrepancies between aggregates.
‘Older’ forms
The “older” forms have varied numbers of columns. We focus on thos with data (n_vars > 0)
<-
df307736 read_csv(
file.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/307736_non_mbcdi_18_english_deidentified.csv"
),show_col_types = FALSE
)
<-
df331453 read_csv(
file.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/331453_non_mbcdi_24_english_deidentified.csv"
)
,show_col_types = FALSE
)
<-
df331848 read_csv(
file.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/331848_non_mbcdi_12_english_deidentified.csv"
)
,show_col_types = FALSE
)
<-
df334099 read_csv(
file.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/334099_non_mbcdi_12_bilingual_english_deidentified.csv"
),show_col_types = FALSE
)
<-
df363349 read_csv(
file.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/363349_non_mbcdi_18_english_deidentified.csv"
)
,show_col_types = FALSE
)
<-
df363381 read_csv(
file.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/363381_non_mbcdi_24_english_deidentified.csv"
)
,show_col_types = FALSE
)
<-
df363431 read_csv(
file.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/363431_non_mbcdi_12_english_deidentified.csv"
)
,show_col_types = FALSE
)
<-
df408149 read_csv(
file.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/408149_non_mbcdi_24_bilingual_spanish_deidentified.csv"
)
,show_col_types = FALSE
)
<-
df411456 read_csv(
file.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/411456_non_mbcdi_12_bilingual_english_deidentified.csv"
)
,show_col_types = FALSE
)
<-
df411469 read_csv(
file.path(
::here(),
here"data/csv/home_visit/non_mbcdi/deid/411469_non_mbcdi_12_bilingual_spanish_deidentified.csv"
),show_col_types = FALSE
)
Let’s look at the two forms that have the same number of columns, \(n=274\), 307736 and 331453.
names(df307736) == names(df331453)
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[256] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[271] TRUE TRUE TRUE TRUE
length(names(df307736) == names(df331453)) == length(names(df307736))
[1] TRUE
So, these two are identical and could be merged.
<- list.files(file.path(here::here(), "data/csv/home_visit/non_mbcdi/deid"), "\\.csv$", full.names = TRUE)
hv_deid_fl
<- stringr::str_detect(hv_deid_fl, "/(307736|331453)")
files_274_cols
<- file_make_aggregate_from_csvs(hv_deid_fl[files_274_cols]) df_merge_274_cols
How about the files with \(n=267\) columns, 331848 and 334099?
names(df331848) == names(df334099)
[1] FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[253] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
[265] TRUE TRUE TRUE
length(names(df331848) == names(df334099)) == length(names(df331848))
[1] TRUE
names(df331848) |> head()
[1] "group_combinedquestionnaires/participant_id"
[2] "start"
[3] "end"
[4] "group_combinedquestionnaires/note_fillthisoutbeforestudy"
[5] "group_combinedquestionnaires/site_id"
[6] "group_combinedquestionnaires/subject_number"
names(df334099) |> head()
[1] "group_jo84c13/participant_id"
[2] "start"
[3] "end"
[4] "group_jo84c13/note_fillthisoutbeforestudy"
[5] "group_jo84c13/site_id"
[6] "group_jo84c13/subject_number"
There is an odd difference in the group label, group_combinedquestionnaires
vs. group_jo84c13
.
Let’s try deleting the initial group labels and compare again.
<- names(df331848)
n1 <- names(df334099)
n2
names(df331848) %>% stringr::str_remove("group_combinedquestionnaires/") |> head()
[1] "participant_id" "start"
[3] "end" "note_fillthisoutbeforestudy"
[5] "site_id" "subject_number"
names(df334099) %>% stringr::str_remove("group_jo84c13/") |> head()
[1] "participant_id" "start"
[3] "end" "note_fillthisoutbeforestudy"
[5] "site_id" "subject_number"
That looks promising.
names(df331848) %>% stringr::str_remove("group_combinedquestionnaires/") -> n1
names(df334099) %>% stringr::str_remove("group_jo84c13/") -> n2
== n2 n1
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[217] FALSE FALSE FALSE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
[229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[253] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
[265] TRUE TRUE TRUE
cbind(n1[8:15], n2[8:15])
[,1] [,2]
[1,] "test_date" "test_date"
[2,] "child_sex" "child_birth_date"
[3,] "age_group" "child_sex"
[4,] "language_child" "age_group"
[5,] "language_child/english" "language_child"
[6,] "language_child/spanish" "language_child/english"
[7,] "language_instruction" "language_child/spanish"
[8,] "acknowledge_site" "language_instruction"
n2
or df334099
has a child_birth_date
field in position 9 that the other data frame does not have.
|> str_detect("child_birth_date") |> sum() n1
[1] 0
If we delete that variable, the data frames will no longer have the same number of columns. Let’s explore that anyway.
<- n2[-9]
n2_2
== n2_2 n1
Warning in n1 == n2_2: longer object length is not a multiple of shorter object
length
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[25] TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE FALSE TRUE FALSE FALSE
[37] TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[49] FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE FALSE
[229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[265] FALSE FALSE FALSE
That helps a bit, but we diverge around column 29.
cbind(n1[28:51], n2_2[28:51])
[,1]
[1,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested"
[2,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/birthhospital"
[3,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/afterhome"
[4,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/no"
[5,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/refused"
[6,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/donotknow"
[7,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested"
[8,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/birthhospital"
[9,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/afterhome"
[10,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/no"
[11,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/refused"
[12,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/donotknow"
[13,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/allergies"
[14,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/ear_infection"
[15,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/asthma"
[16,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/respiratory"
[17,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/gastrointestinal"
[18,] "group_homevisitquestionnaires/group_health/group_general_health/comments_allergy_etc"
[19,] "group_homevisitquestionnaires/group_health/group_general_health/child_injury_times"
[20,] "group_homevisitquestionnaires/group_health/group_general_health/comment_injury"
[21,] "group_homevisitquestionnaires/group_health/group_general_health/comments_general_health"
[22,] "group_homevisitquestionnaires/group_health/group_prenatal/instructions_prenatal"
[23,] "group_homevisitquestionnaires/group_health/group_prenatal/prenatal_care"
[24,] "group_homevisitquestionnaires/group_health/group_prenatal/comments_prenatal"
[,2]
[1,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested"
[2,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/yes__in_the_bi"
[3,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/yes__after_goi"
[4,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/no"
[5,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/refused"
[6,] "group_homevisitquestionnaires/group_health/group_general_health/child_hearing_tested/don_t_know"
[7,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested"
[8,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/yes__in_the_bi"
[9,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/yes__after_goi"
[10,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/no"
[11,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/refused"
[12,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/don_t_know"
[13,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/child_allergies_infections_ill_header"
[14,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/allergies"
[15,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/ear_infection"
[16,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/asthma"
[17,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/respiratory"
[18,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/gastrointestinal"
[19,] "group_homevisitquestionnaires/group_health/group_general_health/comments_allergy_etc"
[20,] "group_homevisitquestionnaires/group_health/group_general_health/child_injury_times"
[21,] "group_homevisitquestionnaires/group_health/group_general_health/comment_injury"
[22,] "group_homevisitquestionnaires/group_health/group_general_health/comments_general_health"
[23,] "group_homevisitquestionnaires/group_health/group_prenatal/instructions_prenatal"
[24,] "group_homevisitquestionnaires/group_health/group_prenatal/comments_prenatal"
These question labels looks very similar. There are just some minor changes in the variable names. n2_2
has an extra variable in column 40.
<- n2_2[-40] n2_3
Then, we can rename some of the columns in n2_3
using corresponding names from n1
.
|> stringr::str_replace("yes__in_the_bi", "birthhospital") |> stringr::str_replace("yes__after_goi", "afterhome") |> stringr::str_replace("don_t_know", "donotknow") -> n2_4
n2_3
== n2_4 n1
Warning in n1 == n2_4: longer object length is not a multiple of shorter object
length
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[37] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE
[49] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
[229] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[265] FALSE FALSE FALSE
cbind(n1[39:51], n2_4[39:51])
[,1]
[1,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/donotknow"
[2,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/allergies"
[3,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/ear_infection"
[4,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/asthma"
[5,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/respiratory"
[6,] "group_homevisitquestionnaires/group_health/group_general_health/group_medicalprof/gastrointestinal"
[7,] "group_homevisitquestionnaires/group_health/group_general_health/comments_allergy_etc"
[8,] "group_homevisitquestionnaires/group_health/group_general_health/child_injury_times"
[9,] "group_homevisitquestionnaires/group_health/group_general_health/comment_injury"
[10,] "group_homevisitquestionnaires/group_health/group_general_health/comments_general_health"
[11,] "group_homevisitquestionnaires/group_health/group_prenatal/instructions_prenatal"
[12,] "group_homevisitquestionnaires/group_health/group_prenatal/prenatal_care"
[13,] "group_homevisitquestionnaires/group_health/group_prenatal/comments_prenatal"
[,2]
[1,] "group_homevisitquestionnaires/group_health/group_general_health/child_vision_tested/donotknow"
[2,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/allergies"
[3,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/ear_infection"
[4,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/asthma"
[5,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/respiratory"
[6,] "group_homevisitquestionnaires/group_health/group_general_health/child_allergies_infections_ill/gastrointestinal"
[7,] "group_homevisitquestionnaires/group_health/group_general_health/comments_allergy_etc"
[8,] "group_homevisitquestionnaires/group_health/group_general_health/child_injury_times"
[9,] "group_homevisitquestionnaires/group_health/group_general_health/comment_injury"
[10,] "group_homevisitquestionnaires/group_health/group_general_health/comments_general_health"
[11,] "group_homevisitquestionnaires/group_health/group_prenatal/instructions_prenatal"
[12,] "group_homevisitquestionnaires/group_health/group_prenatal/comments_prenatal"
[13,] "group_homevisitquestionnaires/group_health/group_smoking/pregnant_smoking"
n1
has a group_medicalprof
label from allergies
through gastrointestinal
; n2_4
has child_allergies_infections_ill
for the same questions.
|> stringr::str_replace("child_allergies_infections_ill", "group_medicalprof") -> n2_5
n2_4 == n2_5 n1
Warning in n1 == n2_5: longer object length is not a multiple of shorter object
length
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[49] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
[229] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[265] FALSE FALSE FALSE
cbind(n1[49:60], n2_5[49:60])
[,1]
[1,] "group_homevisitquestionnaires/group_health/group_prenatal/instructions_prenatal"
[2,] "group_homevisitquestionnaires/group_health/group_prenatal/prenatal_care"
[3,] "group_homevisitquestionnaires/group_health/group_prenatal/comments_prenatal"
[4,] "group_homevisitquestionnaires/group_health/group_smoking/pregnant_smoking"
[5,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_trimester_1"
[6,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_trimester_2"
[7,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_trimester_3"
[8,] "group_homevisitquestionnaires/group_health/group_smoking/mom_smoking_now"
[9,] "group_homevisitquestionnaires/group_health/group_smoking/mom_smoking_now_amount"
[10,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_house"
[11,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_car"
[12,] "group_homevisitquestionnaires/group_health/group_smoking/comments_smoking"
[,2]
[1,] "group_homevisitquestionnaires/group_health/group_prenatal/instructions_prenatal"
[2,] "group_homevisitquestionnaires/group_health/group_prenatal/comments_prenatal"
[3,] "group_homevisitquestionnaires/group_health/group_smoking/pregnant_smoking"
[4,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_trimester_1"
[5,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_trimester_2"
[6,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_trimester_3"
[7,] "group_homevisitquestionnaires/group_health/group_smoking/mom_smoking_now"
[8,] "group_homevisitquestionnaires/group_health/group_smoking/mom_smoking_now_amount"
[9,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_house"
[10,] "group_homevisitquestionnaires/group_health/group_smoking/smoking_car"
[11,] "group_homevisitquestionnaires/group_health/group_smoking/comments_smoking"
[12,] "group_homevisitquestionnaires/group_health/group_drinking/pregnant_drinking"
It looks like these could be reconciled by deleting prenatal_care
from n1
.
<- n1[-50]
n1_2 == n2_5 n1_2
Warning in n1_2 == n2_5: longer object length is not a multiple of shorter
object length
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[217] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE FALSE FALSE
[229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[265] FALSE FALSE
cbind(n1_2[64:75], n2_5[64:75])
[,1]
[1,] "group_homevisitquestionnaires/group_health/group_drinking/comments_drinking"
[2,] "group_homevisitquestionnaires/group_health/group_phq4/note_phq4"
[3,] "group_homevisitquestionnaires/group_health/group_phq4/group_phq4_001/phq4_nervous"
[4,] "group_homevisitquestionnaires/group_health/group_phq4/group_phq4_001/phq4_worrying"
[5,] "group_homevisitquestionnaires/group_health/group_phq4/group_phq4_001/phq4_littleinterest"
[6,] "group_homevisitquestionnaires/group_health/group_phq4/group_phq4_001/phq4_down"
[7,] "group_homevisitquestionnaires/group_health/group_phq4/comments_phq4"
[8,] "group_homevisitquestionnaires/group_rothbart/group_rothbartinstructions/rothbart_instructions1"
[9,] "group_homevisitquestionnaires/group_rothbart/group_rothbartinstructions/rothbart_instructions2"
[10,] "group_homevisitquestionnaires/group_rothbart/group_rothbart_001/rothbart_questions/rothbart_unfamiliarperson"
[11,] "group_homevisitquestionnaires/group_rothbart/group_rothbart_001/rothbart_questions/rothbart_troubletask"
[12,] "group_homevisitquestionnaires/group_rothbart/group_rothbart_001/rothbart_questions/rothbart_companyofchild"
[,2]
[1,] "group_homevisitquestionnaires/group_health/group_drinking/comments_drinking"
[2,] "group_homevisitquestionnaires/group_health/group_phq4/Experimenter_These_stions_are_about_you"
[3,] "group_homevisitquestionnaires/group_health/group_phq4/comments_phq4"
[4,] "group_homevisitquestionnaires/group_rothbart/group_dd2kz32/instructor_rothbart"
[5,] "group_homevisitquestionnaires/group_rothbart/group_dd2kz32/instructions_rothbart2"
[6,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/rothbart_questions_header"
[7,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/When_approached_by_a_ld_cling_to_a_parent"
[8,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/While_having_trouble_get_easily_irritated"
[9,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/When_a_familiar_chil_company_of_the_child"
[10,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/When_offered_a_choic_uickly_and_go_for_it"
[11,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/During_daily_or_even_eing_quietly_sung_to"
[12,] "group_homevisitquestionnaires/group_rothbart/rothbart_questions/While_playing_outdoo_and_excitement_of_it"
It looks like the phq4 is not in n2_5
.
Let’s check.
|> stringr::str_detect("phq4") |> sum() n2_5
[1] 2
Yes, there are only two PHQ4-related questions in df334099
.
|> names() |> stringr::str_detect("phq4") |> sum() df334099
[1] 2
This path of reconciliation does not appear fruitful.
Make aggregate files
non-MB-CDI files with \(n=288\) columns
<- stringr::str_detect(hv_deid_fl, "2[3458]_non_mbcdi.*_deidentified\\.csv")
files_288_cols
<- file_make_aggregate_from_csvs(hv_deid_fl[files_288_cols]) df_merge_288_cols
non-MB-CDI files with \(n=287\) columns
<- stringr::str_detect(hv_deid_fl, "2[69]_non_mbcdi.*_deidentified\\.csv")
files_287_cols_1
<- stringr::str_detect(hv_deid_fl, "(740627|740630|740631)_non.*_deidentified\\.csv")
files_287_cols_2
<- file_make_aggregate_from_csvs(hv_deid_fl[files_287_cols_1])
df_merge_287_cols_1
<- file_make_aggregate_from_csvs(hv_deid_fl[files_287_cols_2]) df_merge_287_cols_2
Examine groups with \(n=287\) cols
We focus on the starting column where the column names diverge, column 92.
# targets::tar_load(df_merge_287_cols_1, store="../_targets")
# targets::tar_load(df_merge_287_cols_2, store="../_targets")
names(df_merge_287_cols_1)[92]
[1] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_feeding_nutrition.instructions_feeding"
names(df_merge_287_cols_2)[92]
[1] "group_combinedquestionnaires.group_homevisitquestionnaires.group_locomotor_milestones.group_health.group_feeding_nutrition.instructions_feeding"
There is an erroneous group_locomotor_milestones.
in the df_merge_287_cols_2
column name.
A bit of sleuthing determines that this group_locomotor_milestones.
label is characteristic of columns 92 to 273.
names(df_merge_287_cols_2)[92:273] |> stringr::str_detect(pattern = "group_locomotor_milestones")
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[181] TRUE TRUE
The following should fix this.
<- names(df_merge_287_cols_2)
old_names <- old_names
new_names 92:273] <-
new_names[::str_remove(new_names[92:273], "group_locomotor_milestones\\.")
stringrnames(df_merge_287_cols_2) <- new_names
names(df_merge_287_cols_2) == names(df_merge_287_cols_1)
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[109] TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[205] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
[217] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[229] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[253] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
[265] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE
[277] FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
We have a second problem with columns from 114 to 210.
rbind(names(df_merge_287_cols_1)[113:115], names(df_merge_287_cols_2)[113:115])
[,1]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_vision_tested.donotknow"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_vision_tested.donotknow"
[,2]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.doctor_told_you"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.allergies"
[,3]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.allergies"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.ear_infection"
One of the problems has to do with column 114. There is a question ending doctor_told_you
in names(df_merge_287_cols_1)
but not in names(df_merge_287_cols_2)
.
names(df_merge_287_cols_1) |> stringr::str_detect(pattern = "doctor_told_you") |> sum()
[1] 1
names(df_merge_287_cols_2) |> stringr::str_detect(pattern = "doctor_told_you") |> sum()
[1] 0
Deleting this question would create additional misalignments and further problems. We cannot proceed without further discussion with our team.
For now, let’s generate an array with all of the remaining differences in column names.
<- (names(df_merge_287_cols_2) != names(df_merge_287_cols_1))
names_differ sum(names_differ)
[1] 103
rbind(names(df_merge_287_cols_1)[names_differ], names(df_merge_287_cols_2)[names_differ])
[,1]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.doctor_told_you"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.allergies"
[,2]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.allergies"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.ear_infection"
[,3]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.ear_infection"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.asthma"
[,4]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.asthma"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.respiratory"
[,5]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.respiratory"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.gastrointestinal"
[,6]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.gastrointestinal"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.comments_allergy_etc"
[,7]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.comments_allergy_etc"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_injury_times"
[,8]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_injury_times"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.comment_injury"
[,9]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.comment_injury"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.comments_general_health"
[,10]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.comments_general_health"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_prenatal.instructions_prenatal"
[,11]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_prenatal.instructions_prenatal"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_prenatal.prenatal_care"
[,12]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_prenatal.prenatal_care"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_prenatal.comments_prenatal"
[,13]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_prenatal.comments_prenatal"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.pregnant_smoking"
[,14]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.pregnant_smoking"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_trimester_1"
[,15]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_trimester_1"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_trimester_2"
[,16]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_trimester_2"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_trimester_3"
[,17]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_trimester_3"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.mom_smoking_now"
[,18]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.mom_smoking_now"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.mom_smoking_now_amount"
[,19]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.mom_smoking_now_amount"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_house"
[,20]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_house"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_car"
[,21]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.smoking_car"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.comments_smoking"
[,22]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_smoking.comments_smoking"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.pregnant_drinking"
[,23]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.pregnant_drinking"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.drinking_trimester_1"
[,24]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.drinking_trimester_1"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.drinking_trimester_2"
[,25]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.drinking_trimester_2"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.drinking_trimester_3"
[,26]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.drinking_trimester_3"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.comments_drinking"
[,27]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_drinking.comments_drinking"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.note_phq4"
[,28]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.note_phq4"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.instructions_phq4"
[,29]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.instructions_phq4"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_nervous"
[,30]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_nervous"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_worrying"
[,31]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_worrying"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_littleinterest"
[,32]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_littleinterest"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_down"
[,33]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.phq4_down"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.comments_phq4"
[,34]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_phq4.comments_phq4"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.instructions_rothbart1"
[,35]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.instructions_rothbart1"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.instructions_rothbart2"
[,36]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.instructions_rothbart2"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.instructions_rothbart3"
[,37]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.instructions_rothbart3"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_unfamiliarperson"
[,38]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_unfamiliarperson"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_troubletask"
[,39]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_troubletask"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_companyofchild"
[,40]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_companyofchild"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_choiceactivities"
[,41]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_choiceactivities"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_quietlysung"
[,42]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_quietlysung"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_playingoutdoors"
[,43]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_playingoutdoors"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_morethan10"
[,44]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_morethan10"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_respondingremarks"
[,45]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_respondingremarks"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_excitedlovedadults"
[,46]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_excitedlovedadults"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_fiddlehair"
[,47]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_fiddlehair"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_roughrowdy"
[,48]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_roughrowdy"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_rockedhugged"
[,49]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_rockedhugged"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_involvednewactivity"
[,50]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_involvednewactivity"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_tirequickly"
[,51]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_tirequickly"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_callattention"
[,52]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_callattention"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_tags"
[,53]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_tags"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_noisyenvironment"
[,54]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_noisyenvironment"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_energy"
[,55]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_energy"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_vehicles"
[,56]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_vehicles"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_active"
[,57]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_active"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_forbidden"
[,58]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_forbidden"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_sadlytearful"
[,59]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_sadlytearful"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_downblue"
[,60]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_downblue"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_runhouse"
[,61]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_runhouse"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_excitingevent"
[,62]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_excitingevent"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_tempertantrum"
[,63]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_tempertantrum"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_waitpatiently"
[,64]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_waitpatiently"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_rockedsmile"
[,65]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_rockedsmile"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_mold"
[,66]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_mold"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_interactadult"
[,67]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_interactadult"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_careful"
[,68]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_careful"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_enternewplace"
[,69]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_enternewplace"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_crymorethan3"
[,70]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_crymorethan3"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_easilysoothed"
[,71]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_easilysoothed"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_busyother"
[,72]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_busyother"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_differentpeople"
[,73]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.group_rothbartquestions.rothbart_differentpeople"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.comments_rothbart"
[,74]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_rothbart.comments_rothbart"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.mediause_instructions1"
[,75]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.mediause_instructions1"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology"
[,76]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.tv"
[,77]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.tv"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.dvd"
[,78]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.dvd"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.computer"
[,79]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.computer"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.ipad"
[,80]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.ipad"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.educationalgame"
[,81]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.educationalgame"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.videogame"
[,82]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.home_technology.videogame"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_tv"
[,83]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_tv"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.tv_how"
[,84]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.tv_how"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_dvd"
[,85]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_dvd"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.dvd_how"
[,86]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.dvd_how"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_computer"
[,87]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_computer"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.computer_how"
[,88]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.computer_how"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_ipad"
[,89]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_ipad"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.ipad_how"
[,90]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.ipad_how"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_educational"
[,91]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_educational"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.educational_how"
[,92]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.educational_how"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_videogame"
[,93]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.technology_child_videogame"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.videogame_how"
[,94]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.videogame_how"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.tv_hours_per_day"
[,95]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.tv_hours_per_day"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.note_tv_hours_per_day"
[,96]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.note_tv_hours_per_day"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.instructions_technology_use"
[,97]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.instructions_technology_use"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.technology_use_scale"
[,98]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_typical_day.instructions_typicalday"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_typical_day.instructions_typical_day"
[,99]
[1,] "group_combinedquestionnaires.group_databrary.note_databraryspiel1"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_databraryspiel1"
[,100]
[1,] "group_combinedquestionnaires.group_databrary.note_databraryspiel2"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_databraryspiel2"
[,101]
[1,] "group_combinedquestionnaires.group_databrary.note_databraryspiel3"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_databraryspiel3"
[,102]
[1,] "group_combinedquestionnaires.group_databrary.acknowledge_databrary"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.acknowledge_databrary"
[,103]
[1,] "group_combinedquestionnaires.group_databrary.note_saveasdraft"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_saveasdraft"
Visual inspection suggests that these are similar with the following deviations:
- As noted,
df_merge_287_cols_1
has a column endingdoctor_told_you
that is not present indf_merge_287_cols_2
. df_merge_287_cols_2
has a column endingtechnology_use_scale
that is not present in thedf_merge_287_cols_1
- There are a set of fields in
group_databrary
that do not align exactly. We will almost certainly delete these, so the misalignment is not a huge problem.
As an exploration, let’s see if we can reconcile these by deleting the non-aligning columns.
<- df_merge_287_cols_1
df1 <- df_merge_287_cols_2
df2
<- df1 %>%
df1 ::select(., -contains('doctor_told_you'))
dplyr
<- df2 %>%
df2 ::select(., -contains('technology_use_scale'))
dplyr
<- names(df2)
old_names <- old_names
new_names 92:273] <- stringr::str_remove(new_names[92:273], "group_locomotor_milestones\\.")
new_names[names(df2) <- new_names
names(df1) == names(df2)
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[193] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[205] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[217] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[229] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[253] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
[265] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE
[277] FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
This looks promising.
rbind(names(df1)[263], names(df2)[263])
[,1]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_typical_day.instructions_typicalday"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_typical_day.instructions_typical_day"
This is easily fixed.
names(df1)[263] <- names(df2)[263]
rbind(names(df1)[273:275], names(df2)[273:275])
[,1]
[1,] "group_combinedquestionnaires.group_databrary.note_databraryspiel1"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_databraryspiel1"
[,2]
[1,] "group_combinedquestionnaires.group_databrary.note_databraryspiel2"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_databraryspiel2"
[,3]
[1,] "group_combinedquestionnaires.group_databrary.note_databraryspiel3"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_databrary.note_databraryspiel3"
The last misalignments relate to Databrary fields.
<- df1 %>%
df1 ::select(., -contains('group_databrary'))
dplyr
<- df2 %>%
df2 ::select(., -contains('group_databrary'))
dplyr
names(df1) == names(df2)
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[256] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[271] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
Success!
Combining the two groups of datasets
Now, let’s go back to the data frame with 288 cols and see if we can bring these into alignment.
<- df_merge_288_cols
df3
<- df3 %>%
df3 ::select(., -contains('group_databrary'))
dplyr
c(dim(df1), dim(df2), dim(df3))
[1] 242 281 58 281 511 283
names(df1) == names(df3)
Warning in names(df1) == names(df3): longer object length is not a multiple of
shorter object length
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[109] TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[277] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
rbind(names(df1)[114:115], names(df3)[114:115])
[,1]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.allergies"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.doctor_told_you"
[,2]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.ear_infection"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_health.group_general_health.child_allergies_infections_ill.allergies"
Once again, there appears to be a problem with the ‘doctor_told_you’ field. We’ll delete it to see if this fixes one of the problems.
<- df3 %>%
df3 ::select(., -contains('doctor_told_you'))
dplyr
names(df1) == names(df3)
Warning in names(df1) == names(df3): longer object length is not a multiple of
shorter object length
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[193] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[205] TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[277] FALSE FALSE FALSE FALSE FALSE FALSE
We still have misalignments at column 210.
rbind(names(df1)[210:213], names(df3)[210:213])
[,1]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.meals"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.technology_use_scale"
[,2]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.playtime"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.meals"
[,3]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.bedtime"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.playtime"
[,4]
[1,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.transportation"
[2,] "group_combinedquestionnaires.group_homevisitquestionnaires.group_mediause.group_techuse.bedtime"
The ’technology_use_scale` exists in one but not the other.
<- df3 %>%
df3 ::select(., -contains('technology_use_scale'))
dplyr
rbind(dim(df1), dim(df3))
[,1] [,2]
[1,] 242 281
[2,] 511 281
names(df1) == names(df3)
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[193] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[205] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[217] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[229] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[253] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
[265] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[277] TRUE TRUE TRUE TRUE TRUE
Future versions of the workflow will need to handle this more elegantly.
Option 1: Fix the underlying forms.
Option 2: Add the ‘missing’ columns as NA in post-processing.
For now, I’m going to create functions that align these data frames. These are incorporated into R/utils.R
so we do not source them again here.
<- function(df) {
remove_technology_use_scale ::select(df, -contains('technology_use_scale'))
dplyr
}
<- function(df) {
remove_doctor_told_you ::select(df, -contains('doctor_told_you'))
dplyr
}
<- function(df) {
remove_databrary_fields ::select(df, -contains('group_databrary'))
dplyr
}
<- function(df) {
reconcile_typicalday names(df) <- stringr::str_replace_all(names(df), 'typicalday', 'typical_day')
df
}
<- function(df) {
remove_permissive_locomotor_milestones_label <- names(df)
old_names <- old_names
new_names <-
contains_locomotor ::str_detect(new_names, pattern = "locomotor_milestones.*health|division|rothbart|mediause|pets|typical|acknowledge")
stringr<-
new_names[contains_locomotor] ::str_remove(new_names[contains_locomotor], "group_locomotor_milestones\\.")
stringrnames(df) <- new_names
df
}
<- function(df) {
remove_X_meta_cols ::select(df, -contains("X_"), -contains("meta.instanceID"))
dplyr
}
<- function(df) {
remove_redundant_group_labels names(df) <- stringr::str_remove_all(names(df), 'group_homevisitquestionnaires\\.')
names(df) <- stringr::str_remove_all(names(df), 'group_combinedquestionnaires\\.')
df
}
<- function(df) {
clean_dfs %>%
df reconcile_typicalday() %>%
remove_technology_use_scale() %>%
remove_doctor_told_you() %>%
remove_permissive_locomotor_milestones_label() %>%
remove_databrary_fields() %>%
remove_X_meta_cols() %>%
remove_redundant_group_labels()
}
Let’s test this workflow with the unmodified files.
<- clean_dfs(df_merge_287_cols_1)
df1m dim(df1m)
[1] 242 272
<- clean_dfs(df_merge_287_cols_2)
df2m dim(df2m)
[1] 58 272
<- clean_dfs(df_merge_288_cols)
df3m dim(df3m)
[1] 511 272
names(df1m) == names(df2m)) |> sum() (
[1] 272
names(df1m) == names(df3m)) |> sum() (
[1] 272
Merging and exporting
<- rbind(df1m, df2m, df3m) df
Save exported aggregate file.
::write_csv(df, file = file.path(here::here(), "data/csv/home_visit/agg", "PLAY-non-mcdi-raw-latest.csv")) readr