Code
::walk(list.files(file.path(here::here(), "R"),
purrr"^screen_", full.names = TRUE), source)
We source a set of cleaning functions have the screen_
prefix.
::walk(list.files(file.path(here::here(), "R"),
purrr"^screen_", full.names = TRUE), source)
There are three CSV files to clean:
<- list.files("../data/csv/screening", "[Dd]emographic", full.names = TRUE)) (fn
[1] "../data/csv/screening/275882_PLAY_Demographic_Questionnaire.csv"
[2] "../data/csv/screening/334134_PLAY_Demographic_Questionnaire_Spanish.csv"
[3] "../data/csv/screening/359546_PLAY_Demographic_Questionnaire.csv"
We clean them separately, as needed, then merge them.
<-
df1 ::read_csv(fn[1],
readrcol_types = readr::cols(.default = 'c'),
show_col_types = FALSE)
<-
df2 ::read_csv(fn[2],
readrcol_types = readr::cols(.default = 'c'),
show_col_types = FALSE)
<-
df3 ::read_csv(fn[3],
readrcol_types = readr::cols(.default = 'c'),
show_col_types = FALSE)
head(names(df1), 15)
[1] "start"
[2] "end"
[3] "c_today"
[4] "play_phone_questionnaire/update_date"
[5] "play_phone_questionnaire/group_contact_info/instructions_contactinfo"
[6] "play_phone_questionnaire/group_contact_info/parent_phone"
[7] "play_phone_questionnaire/group_contact_info/parent_email"
[8] "play_phone_questionnaire/group_contact_info/group_address/note_parent_address"
[9] "play_phone_questionnaire/group_contact_info/group_address/parent_address_1"
[10] "play_phone_questionnaire/group_contact_info/group_address/parent_address_2"
[11] "play_phone_questionnaire/group_contact_info/group_address/city"
[12] "play_phone_questionnaire/group_contact_info/group_address/state"
[13] "play_phone_questionnaire/group_contact_info/acknowledge_contact_info"
[14] "play_phone_questionnaire/instructions_guid"
[15] "play_phone_questionnaire/child_first_name"
head(names(df2), 15)
[1] "start"
[2] "end"
[3] "c_today"
[4] "play_demo_questionnaire/update_date"
[5] "play_demo_questionnaire/NOTA_El_cuestionario_demogr_fico_debe_ser_completado_por_el_investigador_que_va_a_ir_a_la_visita_al_hogar_con_la_madre_por_tel_fono_La_madre_deber_a_ser_la_madre_que_va_a_participar_en_el_estudio_con_su_ni_o_a"
[6] "play_demo_questionnaire/group_siteinfo/site_id"
[7] "play_demo_questionnaire/group_siteinfo/subject_number"
[8] "play_demo_questionnaire/exp_name"
[9] "play_demo_questionnaire/group_contact_info/instructions_contactinfo"
[10] "play_demo_questionnaire/group_contact_info/parent_phone"
[11] "play_demo_questionnaire/group_contact_info/parent_email"
[12] "play_demo_questionnaire/group_contact_info/group_address/note_parent_address"
[13] "play_demo_questionnaire/group_contact_info/group_address/parent_address_1"
[14] "play_demo_questionnaire/group_contact_info/group_address/parent_address_2"
[15] "play_demo_questionnaire/group_contact_info/group_address/city"
head(names(df3), 15)
[1] "start"
[2] "end"
[3] "c_today"
[4] "play_demo_questionnaire/update_date"
[5] "play_demo_questionnaire/note_exp"
[6] "play_demo_questionnaire/group_siteinfo/site_id"
[7] "play_demo_questionnaire/group_siteinfo/subject_number"
[8] "play_demo_questionnaire/group_siteinfo/exp_name"
[9] "play_demo_questionnaire/group_contact_info/instructions_contactinfo"
[10] "play_demo_questionnaire/group_contact_info/parent_phone"
[11] "play_demo_questionnaire/group_contact_info/parent_email"
[12] "play_demo_questionnaire/group_contact_info/group_address/note_parent_address"
[13] "play_demo_questionnaire/group_contact_info/group_address/parent_address_1"
[14] "play_demo_questionnaire/group_contact_info/group_address/parent_address_2"
[15] "play_demo_questionnaire/group_contact_info/group_address/city"
head(names(df1), 15)
[1] "start"
[2] "end"
[3] "c_today"
[4] "play_phone_questionnaire/update_date"
[5] "play_phone_questionnaire/group_contact_info/instructions_contactinfo"
[6] "play_phone_questionnaire/group_contact_info/parent_phone"
[7] "play_phone_questionnaire/group_contact_info/parent_email"
[8] "play_phone_questionnaire/group_contact_info/group_address/note_parent_address"
[9] "play_phone_questionnaire/group_contact_info/group_address/parent_address_1"
[10] "play_phone_questionnaire/group_contact_info/group_address/parent_address_2"
[11] "play_phone_questionnaire/group_contact_info/group_address/city"
[12] "play_phone_questionnaire/group_contact_info/group_address/state"
[13] "play_phone_questionnaire/group_contact_info/acknowledge_contact_info"
[14] "play_phone_questionnaire/instructions_guid"
[15] "play_phone_questionnaire/child_first_name"
head(names(df2), 15)
[1] "start"
[2] "end"
[3] "c_today"
[4] "play_demo_questionnaire/update_date"
[5] "play_demo_questionnaire/NOTA_El_cuestionario_demogr_fico_debe_ser_completado_por_el_investigador_que_va_a_ir_a_la_visita_al_hogar_con_la_madre_por_tel_fono_La_madre_deber_a_ser_la_madre_que_va_a_participar_en_el_estudio_con_su_ni_o_a"
[6] "play_demo_questionnaire/group_siteinfo/site_id"
[7] "play_demo_questionnaire/group_siteinfo/subject_number"
[8] "play_demo_questionnaire/exp_name"
[9] "play_demo_questionnaire/group_contact_info/instructions_contactinfo"
[10] "play_demo_questionnaire/group_contact_info/parent_phone"
[11] "play_demo_questionnaire/group_contact_info/parent_email"
[12] "play_demo_questionnaire/group_contact_info/group_address/note_parent_address"
[13] "play_demo_questionnaire/group_contact_info/group_address/parent_address_1"
[14] "play_demo_questionnaire/group_contact_info/group_address/parent_address_2"
[15] "play_demo_questionnaire/group_contact_info/group_address/city"
head(names(df3), 15)
[1] "start"
[2] "end"
[3] "c_today"
[4] "play_demo_questionnaire/update_date"
[5] "play_demo_questionnaire/note_exp"
[6] "play_demo_questionnaire/group_siteinfo/site_id"
[7] "play_demo_questionnaire/group_siteinfo/subject_number"
[8] "play_demo_questionnaire/group_siteinfo/exp_name"
[9] "play_demo_questionnaire/group_contact_info/instructions_contactinfo"
[10] "play_demo_questionnaire/group_contact_info/parent_phone"
[11] "play_demo_questionnaire/group_contact_info/parent_email"
[12] "play_demo_questionnaire/group_contact_info/group_address/note_parent_address"
[13] "play_demo_questionnaire/group_contact_info/group_address/parent_address_1"
[14] "play_demo_questionnaire/group_contact_info/group_address/parent_address_2"
[15] "play_demo_questionnaire/group_contact_info/group_address/city"
We remove the unneeded ’play_demo_*’ and ‘play_phone_questionnaire_’ variable headers using screen_remove_variable_headers()
.
We next add Census FIPS codes for the home state and county so that we can link with county-level Census data at a later date. This depends on address, city, and state information.
The Census queries stopped working around 2023-06-16, but should be restored as of 2024-09-13, using R/screen_add_fips.R
.
It would be more efficient to query the Census geocoding database once.
We have name and address information in the screening data (e.g., ‘..parent_phone’, ‘..parent_email’, etc.). We remove this and other identifiable information using screen_remove_identifiers()
.
We remove fields that contain administrative metadata with screen_remove_metadata_fields()
.
We remove fields used only by staff in uploading data to Databrary using screen_remove_databrary_fields()
.
Then, we use dplyr::full_join()
to combine the set of individually cleaned data files. The screen_clean_raw_csv()
function combines the previous screen_remove*
functions. The screen_clean_raw_join()
function cleans each CSV then joins them.
<- screen_clean_raw_join()) (scr_df
Cleaning '/Users/rog1/rrr/KoBoToolbox/data/csv/screening/275882_PLAY_Demographic_Questionnaire.csv'.
Loading required package: stringr
Passing 8 addresses to the US Census batch geocoder
Query completed in: 1.1 seconds
Removed n = 31 of 172 columns.
Loading required package: dplyr
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Cleaning '/Users/rog1/rrr/KoBoToolbox/data/csv/screening/334134_PLAY_Demographic_Questionnaire_Spanish.csv'.
Passing 8 addresses to the US Census batch geocoder
Query completed in: 0.9 seconds
Removed n = 47 of 240 columns.
Cleaning '/Users/rog1/rrr/KoBoToolbox/data/csv/screening/359546_PLAY_Demographic_Questionnaire.csv'.
Passing 942 addresses to the US Census batch geocoder
Query completed in: 39.4 seconds
Removed n = 47 of 243 columns.
Joining with `by = join_by(start, end, c_today, update_date, site_id,
subject_number, state, check_childage, check_childage_weeks, child_sex, day,
day2, day1, play_id, language_spoken_mom, `language_spoken_mom/english`,
`language_spoken_mom/spanish`, `language_spoken_mom/other`,
language_spoken_mom_other, language_spoken_child,
`language_spoken_child/english`, `language_spoken_child/spanish`,
`language_spoken_child/other`, language_spoken_child_other,
language_spoken_house, `language_spoken_house/english`,
`language_spoken_house/spanish`, `language_spoken_house/other`,
language_spoken_house_other, `child_information/child_bornonduedate`,
`child_information/child_onterm`, `child_information/child_duedate`,
`child_information/child_birthage`, `child_information/child_weight_pounds`,
`child_information/child_weight_ounces`,
`child_information/child_birth_complications`,
`child_information/specify_birth_complications`,
`child_information/hearing_disabilities`, `child_information/specify_hearing`,
`child_information/vision_disabilities`, `child_information/specify_vision`,
`child_information/major_illnesses_injuries`,
`child_information/specify_illnesses_injuries`,
`child_information/other_developmentaldelays`,
`child_information/specify_developmentaldelays`,
`child_information/child_race`, `child_information/child_sleep_time`,
`child_information/child_wake_time`, `child_information/child_nap_hours`,
`child_information/child_sleep_location`,
`child_information/specify_child_sleep_location`,
`child_information/indicate_child`, `child_information/indicate_child_2`,
`group_family_structure/only_child`,
`group_family_structure/specify_onlychild`,
`group_family_structure/household_members`,
`group_family_structure/household_members/father_biological`,
`group_family_structure/household_members/male_partner`,
`group_family_structure/household_members/mother_biological`,
`group_family_structure/household_members/female_partner`,
`group_family_structure/household_members/other_partner`,
`group_family_structure/household_members/sibling_biological`,
`group_family_structure/household_members/sibling_nonbiological`,
`group_family_structure/household_members/grandmother`,
`group_family_structure/household_members/grandfather`,
`group_family_structure/household_members/great_grandmother`,
`group_family_structure/household_members/great_grandfather`,
`group_family_structure/household_members/aunt`,
`group_family_structure/household_members/uncle`,
`group_family_structure/household_members/cousin`,
`group_family_structure/household_members/relative`,
`group_family_structure/household_members/non_relative`,
`group_family_structure/household_members/none`,
`group_family_structure/other_relatives`,
`group_family_structure/other_non_relatives`,
`group_family_structure/indicate_familystructure`,
`group_mominfo/mom_biological`, `group_mominfo/mom_relation`,
`group_mominfo/mom_datecare`, `group_mominfo/mom_childbirth_age`,
`group_mominfo/check_mom_childbirth_age`, `group_mominfo/mom_race`,
`group_mominfo/mom_birth_country`, `group_mominfo/specify_mom_birth_country`,
`group_mominfo/mom_education`, `group_mominfo/mom_employment`,
`group_mominfo/mom_occupation`, `group_mominfo/mom_jobs_number`,
`group_mominfo/mom_training`, `group_mominfo/mom_condition`,
`group_mominfo/specify_mom_condition`, `group_mominfo/indicate_mom`,
`group_biodad/biodad_childbirth_age`,
`group_biodad/check_biodad_childbirth_age`, `group_biodad/biodad_race`,
`group_biodad/indicate_biodad`, `group_biomom/biomom_childbirth_age`,
`group_biomom/check_biomom_childbirth_age`, `group_biomom/biomom_race`,
`group_biomom/indicate_biomom`, `group_nonbiopartner/nonbiopartner_race`,
`group_nonbiopartner/indicate_nonbiopartner`,
`group_genpartner_info/partner_education`,
`group_genpartner_info/partner_employment`,
`group_genpartner_info/partner_occupation`,
`group_genpartner_info/partner_jobs_number`,
`group_genpartner_info/partner_program`,
`group_genpartner_info/indicate_genpartner`,
`group_child_care_arrangements/childcare_types`,
`group_child_care_arrangements/childcare_types/nanny_home`,
`group_child_care_arrangements/childcare_types/nanny_nothome`,
`group_child_care_arrangements/childcare_types/relative`,
`group_child_care_arrangements/childcare_types/childcare`,
`group_child_care_arrangements/childcare_types/none`,
`group_child_care_arrangements/childcare_location`,
`group_child_care_arrangements/childcare_hours`,
`group_child_care_arrangements/childcare_number`,
`group_child_care_arrangements/childcare_age`,
`group_child_care_arrangements/childcare_language`,
`group_inclusioncheck/inclusionreason`,
`group_inclusioncheck/indicate_inclusion`, indicate_demoquestionnaire,
indicate_databrary, `_id`, state_fips, county_fips, fips_state, fips_county)`
Joining with `by = join_by(start, end, c_today, update_date, site_id,
subject_number, state, check_childage, check_childage_weeks, child_sex, day,
day2, day1, language_spoken_child, `language_spoken_child/english`,
`language_spoken_child/spanish`, `language_spoken_child/other`,
language_spoken_child_other, `child_information/child_bornonduedate`,
`child_information/child_onterm`, `child_information/child_duedate`,
`child_information/child_birthage`, `child_information/child_weight_pounds`,
`child_information/child_weight_ounces`,
`child_information/child_birth_complications`,
`child_information/specify_birth_complications`,
`child_information/hearing_disabilities`, `child_information/specify_hearing`,
`child_information/vision_disabilities`, `child_information/specify_vision`,
`child_information/major_illnesses_injuries`,
`child_information/specify_illnesses_injuries`,
`child_information/child_sleep_time`, `child_information/child_wake_time`,
`child_information/child_nap_hours`, `child_information/child_sleep_location`,
`child_information/specify_child_sleep_location`,
`child_information/indicate_child`, `child_information/indicate_child_2`,
`group_family_structure/household_members`,
`group_family_structure/household_members/male_partner`,
`group_family_structure/household_members/female_partner`,
`group_family_structure/household_members/other_partner`,
`group_family_structure/household_members/grandmother`,
`group_family_structure/household_members/grandfather`,
`group_family_structure/household_members/great_grandmother`,
`group_family_structure/household_members/great_grandfather`,
`group_family_structure/household_members/aunt`,
`group_family_structure/household_members/uncle`,
`group_family_structure/household_members/cousin`,
`group_family_structure/household_members/relative`,
`group_family_structure/household_members/non_relative`,
`group_family_structure/other_relatives`,
`group_family_structure/other_non_relatives`,
`group_family_structure/indicate_familystructure`,
`group_child_care_arrangements/childcare_types`,
`group_child_care_arrangements/childcare_types/relative`,
`group_child_care_arrangements/childcare_types/childcare`,
`group_child_care_arrangements/childcare_types/none`,
`group_child_care_arrangements/childcare_hours`,
`group_child_care_arrangements/childcare_number`,
`group_child_care_arrangements/childcare_age`,
`group_child_care_arrangements/childcare_language`, `_id`, state_fips,
county_fips, fips_state, fips_county)`
# A tibble: 974 × 166
start end c_today update_date site_id subject_number state check_childage
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2020-0… 2020… 2020-0… <NA> NYUNI 230 New … 32.6772408503…
2 2020-0… 2020… 2020-0… <NA> NYUNI 229 New … 32.3813828621…
3 2019-1… 2020… 2020-0… <NA> VCOMU 002 VA 26.7258382642…
4 2020-0… 2020… 2020-0… <NA> GEORG 010 Virg… 12.7547666009…
5 2020-0… 2020… 2020-0… <NA> NYUNI 008 New … 13.4122287968…
6 2020-0… 2020… 2020-0… <NA> NYUNI 009 New … 19.3636313828…
7 2020-0… 2020… 2020-0… <NA> GEORG 011 Virg… 17.8514683322…
8 2020-0… 2020… 2020-0… <NA> NYUNI 010 NY 17.8514683322…
9 2020-0… 2020… 2020-0… <NA> NYUNI 011 New … 23.8001314924…
10 2020-0… 2020… 2020-0… <NA> VCOMU 003 VA 17.5884834538…
# ℹ 964 more rows
# ℹ 158 more variables: check_childage_weeks <chr>, child_sex <chr>, day <chr>,
# day2 <chr>, day1 <chr>, play_id <chr>, language_spoken_mom <chr>,
# `language_spoken_mom/english` <chr>, `language_spoken_mom/spanish` <chr>,
# `language_spoken_mom/other` <chr>, language_spoken_mom_other <chr>,
# language_spoken_child <chr>, `language_spoken_child/english` <chr>,
# `language_spoken_child/spanish` <chr>, …
Now, we can proceed to clean-up the merged data frame. The sequence of functions called below cleans ‘construct-specific’ variables as indicated by the function names.
<- scr_df |>
scr_df screen_clean_child_info() |>
screen_clean_lang_info() |>
screen_clean_mom_info() |>
screen_clean_biodad_father_info() |>
screen_clean_childcare_info() |>
screen_clean_family_structure() |>
screen_clean_play_id() |>
screen_remove_selected_cols() |>
screen_select_reorder_cols()
For convenience, we package this sequence in its own function, screen_clean_fields()
.
Note that all of the variables are considered character strings. The tidyverse
suite does a great job of guessing what variables are what, but sometimes it guesses wrongly. So, in preliminary stages, it has proved easier to make everything a character string.
<- screen_clean_fields(scr_df) scr_df
Loading required package: tidyr
str(scr_df)
tibble [974 × 51] (S3: tbl_df/tbl/data.frame)
$ submit_date : chr [1:974] "2020-02-27T08:24:10.551-05:00" "2020-02-27T08:43:34.275-05:00" "2019-12-18T16:37:17.554-05:00" "2020-01-01T12:54:04.550-05:00" ...
$ site_id : chr [1:974] "NYUNI" "NYUNI" "VCOMU" "GEORG" ...
$ participant_ID : chr [1:974] "230" "229" "002" "010" ...
$ play_id : chr [1:974] NA NA NA NA ...
$ child_age_mos : chr [1:974] "32.677240850317695" "32.38138286215201" "26.725838264299803" "12.754766600920446" ...
$ child_sex : chr [1:974] "male" "female" "female" "male" ...
$ child_bornonduedate : chr [1:974] "yes" "yes" "yes" "yes" ...
$ child_onterm : chr [1:974] NA NA NA NA ...
$ child_birthage : chr [1:974] "-7" "-10" "0" "1" ...
$ child_weight_pounds : chr [1:974] "8" "7" "6" "7" ...
$ child_weight_ounces : chr [1:974] "12" "10" "12" "15" ...
$ child_birth_complications : chr [1:974] "no" "no" "no" "no" ...
$ child_birth_complications_specify : chr [1:974] NA NA NA NA ...
$ child_hearing_disabilities : chr [1:974] "yes" "no" "no" "no" ...
$ child_hearing_disabilities_specify: chr [1:974] "Temporary hearing loss so tubes in his ears, one still remaining" NA NA NA ...
$ child_vision_disabilities : chr [1:974] "no" "no" "no" "no" ...
$ child_vision_disabilities_specify : chr [1:974] NA NA NA NA ...
$ child_major_illnesses_injuries : chr [1:974] "yes" "no" "no" "no" ...
$ child_illnesses_injuries_specify : chr [1:974] "Pneumonia at 5 months" "" "" "" ...
$ child_developmentaldelays : chr [1:974] NA NA NA NA ...
$ child_developmentaldelays_specify : chr [1:974] NA NA NA NA ...
$ child_sleep_time : chr [1:974] "20:15:00.000-05:00" "20:15:00.000-05:00" "19:00:00.000-05:00" "19:00:00.000-05:00" ...
$ child_wake_time : chr [1:974] "07:15:00.000-05:00" "06:45:00.000-05:00" "07:30:00.000-05:00" "06:15:00.000-05:00" ...
$ child_nap_hours : chr [1:974] "2" "3" "1.5" "2" ...
$ child_sleep_location : chr [1:974] "crib_separate" "crib_separate" "crib_separate" "crib_separate" ...
$ mom_bio : chr [1:974] "yes" "yes" "yes" "yes" ...
$ mom_childbirth_age : chr [1:974] "33.41" "38.84" "32.2" "30.38" ...
$ mom_race : chr [1:974] "white" "white" "white" "white" ...
$ mom_birth_country : chr [1:974] "unitedstates" "unitedstates" "unitedstates" "unitedstates" ...
$ mom_birth_country_specify : chr [1:974] "" "" "" "" ...
$ mom_education : chr [1:974] "masters" "doctorate" "doctorate" "masters" ...
$ mom_employment : chr [1:974] "full_time" "full_time" "full_time" "full_time" ...
$ mom_occupation : chr [1:974] "Architect" "Spanish teacher" "Nurse instructor" "School Psychologist" ...
$ mom_jobs_number : chr [1:974] "1" "1" "1" "1" ...
$ mom_training : chr [1:974] "no" "no" "no" "no" ...
$ biodad_childbirth_age : chr [1:974] "33.85_NA" "30.42_NA" "37.32_NA" "30.53_NA" ...
$ biodad_race : chr [1:974] "white_NA" "white_NA" "NA_NA" "NA_NA" ...
$ language_spoken_mom : chr [1:974] "english" "english spanish" "english" "english" ...
$ language_spoken_mom_comments : chr [1:974] NA NA NA NA ...
$ language_spoken_child : chr [1:974] "english" "spanish" "english" "english" ...
$ language_spoken_home_comments : chr [1:974] "FALSE_47706996" "TRUE_47707423" "FALSE_43397823" "FALSE_44074192" ...
$ language_spoken_child_comments : chr [1:974] NA NA NA NA ...
$ language_spoken_home : chr [1:974] "english" "english spanish" "english" "english" ...
$ language_spoken_house_other : chr [1:974] NA NA NA NA ...
$ language_spoken_home_other : chr [1:974] NA NA NA NA ...
$ childcare_types : chr [1:974] NA NA NA NA ...
$ childcare_location : chr [1:974] NA NA NA NA ...
$ childcare_hours : chr [1:974] NA NA NA NA ...
$ childcare_number : chr [1:974] NA NA NA NA ...
$ childcare_age : chr [1:974] NA NA NA NA ...
$ childcare_language : chr [1:974] NA NA NA NA ...
There is more work to do, but we have a version worth exporting.
Let’s add the Databrary volume ID info.
<- scr_df |>
scr_df screen_add_db_vol_id()
Then filter out rows that do not have valid volume IDs.
<- !is.na(scr_df$vol_id)
valid_db_vol
<- scr_df[valid_db_vol,] scr_df
There are n=970 valid Databrary volume IDs out of a total of n=974 screening records.
Next, we add a play_status
field based on the group-name
field from Databrary. We use group-name
for indicating “Gold”, “Silver”, or “Not run.”
Two targets in _targets.R
are relevant for this operation:
tar_target(
play_vols_df,
readr::read_csv("data/csv/_meta/play_site_vols.csv",
show_col_types = FALSE)
),
tar_target(
databrary_session_csvs,
purrr::map(play_vols_df$site_id, databrary_get_save_session_csv),
cue = tarchetypes::tar_cue_age(
name = databrary_session_csvs,
age = as.difftime(update_interval, units = update_interval_units)
)
)
These targets generate site-specific CSVs in data/csv/site_sessions
based on the database of PLAY sites contained in data/csv/_meta/play_site_vols.csv
. We load these CSVs into a single data frame.
<-
session_fns list.files("../data/csv/site_sessions", "\\.csv$", full.names = TRUE)
<-
df_sessions ::map(
purrr
session_fns,::read_csv,
readrcol_types = readr::cols(.default = 'c'),
show_col_types = FALSE
|>
) ::list_rbind() purrr
The group_name
variable contains status information about the sessions.
xtabs(~ group_name, data=df_sessions)
group_name
No visit No_visit No_Visit PLAY_Gold PLAY_Silver
1 54 88 752 122
We note that there are three different versions of no visit: “No visit”, “No_visit”, and “No_Visit”. In addition, there are n=24 sessions with NA in the group_name
.
Let’s modify those values that specify no visit on one of these forms to conform with one another.
<- df_sessions |>
df_sessions ::mutate(group_name = stringr::str_replace(group_name, "No.*isit", "No_visit")) dplyr
These could be sessions that are still in QA or which are scheduled, or there could be some other anomaly. Here, we want to select only those sessions that occurred and which have passed QA–those sessions for which group_name
is either ‘PLAY_Gold’ or ‘PLAY_Silver’.
<- df_sessions |>
df_sessions ::filter(stringr::str_detect(group_name, "PLAY_")) dplyr
We date-stamp the exported file so we can monitor progress as this workflow develops.
<- paste0("PLAY-screening-datab-", Sys.Date(), ".csv")
sfn ::write_csv(screen_datab_df, file.path(here::here(), "data/csv/screening/agg/", sfn)) readr
We also save a copy with “latest”.
<- "PLAY-screening-datab-latest.csv"
sfn ::write_csv(screen_datab_df, file.path(here::here(), "data/csv/screening/agg/", sfn)) readr