Merge files
Background
This page describes the process of merging data files.
Home visit with Databrary
First, we load the latest home visit CSV.
Code
<- file.path(here::here(), "data", "csv", "home_visit", "agg",
fn "PLAY-non-mcdi-kobo-latest.csv")
<- readr::read_csv(fn,
hv col_types = readr::cols(.default = "c"),
show_col_types = FALSE)
We create a helper function for the site_session data.
Code
<- function(fn) {
make_augmented_sess_df <- readr::read_csv(fn, col_types = readr::cols(.default = "c"))
df
<- basename(fn) |>
this_site ::str_remove("\\.csv")
stringr
::mutate(df, site_id = this_site)
dplyr }
We then map the helper function across the list of session CSVs in data/csv/site_sessions
and combine the results into a single aggregate data frame, db_agg
.
Code
<- file.path(here::here(), "data", "csv", "site_sessions")
f_path <- list.files(f_path, "\\.csv$", full.names = TRUE)
session_fl
if (is.null(length(session_fl))) {
warning("No session file data")
else {
} <- purrr::map(session_fl, make_augmented_sess_df)
db
<- db |>
db_agg ::list_rbind() |>
purrr# Omit NA in session_date
::filter(!is.na(session_date))
dplyr
message(dim(db_agg)[1], " rows imported.")
}
932 rows imported.
Finally, we join the KoBoToolbox data with the Databrary session data.
Code
<- dplyr::full_join(hv,
hv_db
db_agg,by = dplyr::join_by(site_id == site_id,
== participant_ID)) participant_ID
We export this merged file.
Save files
Save the file in data/csv/home_visit/agg/
with the name PLAY-non-mcdi-kobo-db-merged-latest.csv
.
Code
<- file.path(here::here(), "data", "csv", "home_visit", "agg",
fn "PLAY-non-mcdi-kobo-db-merged-latest.csv")
::write_csv(hv_db, fn) readr
Save with date-stamp for quality control, e.g., PLAY-non-mcdi-db-merged-2024-09-13.csv
.
Code
<- file.path(here::here(), "data", "csv", "home_visit", "agg",
fn paste0("PLAY-non-mcdi-kobo-db-merged-", Sys.Date(), ".csv"))
::write_csv(hv_db, fn) readr