For checking the dataset from EDC in clinical trials. Notice, your dataset should have a postfix( _post ) or a prefix( pre_ ) in the names of variables. Column names should be unique.
For laboratory check, you need to create the excel table like in the example.
age_min | age_max | sex | human_name | name_lab_vals | name_is_norm | lab_vals_min | lab_vals_max |
18 | 45 | f|m | gluc | gluc | gluc_res | 3.9 | 5.9 |
18 | 45 | m | ast | ast | ast_res | 0 | 42 |
18 | 45 | f | ast | ast | ast_res | 0 | 39 |
id | age | sex | gluc_post | gluc_res_post | ast_post | ast_res_post |
01 | 19 | f | 5.5 | norm | 30 | norm |
02 | 20 | m | 4.1 | NA | 48 | norm |
03 | 22 | m | 9.7 | norm | 31 | norm |
# "norm" and "no" it is an example, necessary variable for the estimate, get from the dataset
refs <- system.file("labs_refer.xlsx", package = "dmtools")
obj_lab <- lab(refs, id, age, sex, "norm", "no")
obj_lab <- obj_lab %>% check(df)
# ok - analysis, which has a correct estimate of the result
obj_lab %>% choose_test("ok")
#> id age sex human_lab name_lab refs lab_vals is_norm vals_to_dbl
#> 1 01 19 f gluc gluc_post 3.9 - 5.9 5.5 norm 5.5
#> 2 01 19 f ast ast_post 0 - 39 30 norm 30.0
#> 3 03 22 m ast ast_post 0 - 42 31 norm 31.0
#> auto_norm
#> 1 norm
#> 2 norm
#> 3 norm
# mis - analysis, which has an incorrect estimate of the result
obj_lab %>% choose_test("mis")
#> id age sex human_lab name_lab refs lab_vals is_norm vals_to_dbl
#> 1 02 20 m ast ast_post 0 - 42 48 norm 48.0
#> 2 03 22 m gluc gluc_post 3.9 - 5.9 9.7 norm 9.7
#> auto_norm
#> 1 no
#> 2 no
# skip - analysis, which has an empty value of the estimate
obj_lab %>% choose_test("skip")
#> id age sex human_lab name_lab refs lab_vals is_norm vals_to_dbl
#> 1 02 20 m gluc gluc_post 3.9 - 5.9 4.1 <NA> 4.1
#> auto_norm
#> 1 <NA>
# all analyzes
obj_lab %>% get_result()
#> id age sex human_lab name_lab refs lab_vals is_norm vals_to_dbl
#> 1 01 19 f gluc gluc_post 3.9 - 5.9 5.5 norm 5.5
#> 2 01 19 f ast ast_post 0 - 39 30 norm 30.0
#> 3 02 20 m gluc gluc_post 3.9 - 5.9 4.1 <NA> 4.1
#> 4 02 20 m ast ast_post 0 - 42 48 norm 48.0
#> 5 03 22 m gluc gluc_post 3.9 - 5.9 9.7 norm 9.7
#> 6 03 22 m ast ast_post 0 - 42 31 norm 31.0
#> auto_norm is_right
#> 1 norm TRUE
#> 2 norm TRUE
#> 3 <NA> NA
#> 4 no FALSE
#> 5 no FALSE
#> 6 norm TRUE
For dates check, you need to create the excel table like in the example.
num_visit | name_visit | minus | plus | shift | standard_date | standard_name | check_equal | equal_date |
E1 | screening | 0 | 3 | 0 | screen_date_E1 | date of screening | F | NA |
E2 | rand | 0 | 0 | 0 | rand_date_E2 | date of randomization | T | rand_date_E2 |
E3 | visit 2 | 1 | 1 | 5 | rand_date_E2 | date of randomization | T | ph_date_E3 |
id | screen_date_E1 | rand_date_E2 | ph_date_E3 | bio_date_E3 |
01 | 1991-03-13 | 1991-03-15 | 1991-03-21 | 1991-03-23 |
02 | 1991-03-07 | 1991-03-11 | 1991-03-16 | 1991-03-16 |
03 | 1991-03-08 | 1991-03-10 | 1991-03-16 | 1991-03-16 |
# use parameter str_date for search columns with dates, default:"DAT"
dates <- system.file("dates.xlsx", package = "dmtools")
obj_date <- date(dates, id, dplyr::contains, dplyr::matches)
obj_date <- obj_date %>% check(df)
# out - dates, which are out of the protocol's timeline
obj_date %>% choose_test("out")
#> id standard_name standard_date name_event name_item date_item
#> 1 01 date of randomization 1991-03-15 visit 2 bio_date_E3 1991-03-23
#> standard_interval out
#> 1 1991-03-19 UTC--1991-03-21 UTC ->2
# uneq - dates, which are unequal
obj_date %>% choose_test("uneq")
#> id name_event name_item date_item stand_equal is_in_timeline
#> 1 01 visit 2 bio_date_E3 1991-03-23 1991-03-21 FALSE
# ok - correct dates
obj_date %>% choose_test("ok")
#> id standard_name standard_date name_event name_item date_item
#> 1 01 date of screening 1991-03-13 screening screen_date_E1 1991-03-13
#> 2 01 date of randomization 1991-03-15 rand rand_date_E2 1991-03-15
#> 3 01 date of randomization 1991-03-15 visit 2 ph_date_E3 1991-03-21
#> 4 02 date of screening 1991-03-07 screening screen_date_E1 1991-03-07
#> 5 02 date of randomization 1991-03-11 rand rand_date_E2 1991-03-11
#> 6 02 date of randomization 1991-03-11 visit 2 ph_date_E3 1991-03-16
#> 7 02 date of randomization 1991-03-11 visit 2 bio_date_E3 1991-03-16
#> 8 03 date of screening 1991-03-08 screening screen_date_E1 1991-03-08
#> 9 03 date of randomization 1991-03-10 rand rand_date_E2 1991-03-10
#> 10 03 date of randomization 1991-03-10 visit 2 ph_date_E3 1991-03-16
#> 11 03 date of randomization 1991-03-10 visit 2 bio_date_E3 1991-03-16
#> standard_interval stand_equal
#> 1 1991-03-13 UTC--1991-03-16 UTC 1991-03-13
#> 2 1991-03-15 UTC--1991-03-15 UTC 1991-03-15
#> 3 1991-03-19 UTC--1991-03-21 UTC 1991-03-21
#> 4 1991-03-07 UTC--1991-03-10 UTC 1991-03-07
#> 5 1991-03-11 UTC--1991-03-11 UTC 1991-03-11
#> 6 1991-03-15 UTC--1991-03-17 UTC 1991-03-16
#> 7 1991-03-15 UTC--1991-03-17 UTC 1991-03-16
#> 8 1991-03-08 UTC--1991-03-11 UTC 1991-03-08
#> 9 1991-03-10 UTC--1991-03-10 UTC 1991-03-10
#> 10 1991-03-14 UTC--1991-03-16 UTC 1991-03-16
#> 11 1991-03-14 UTC--1991-03-16 UTC 1991-03-16
# all dates
obj_date %>% get_result()
#> id standard_name standard_date name_event name_item date_item
#> 1 01 date of screening 1991-03-13 screening screen_date_E1 1991-03-13
#> 2 01 date of randomization 1991-03-15 rand rand_date_E2 1991-03-15
#> 3 01 date of randomization 1991-03-15 visit 2 ph_date_E3 1991-03-21
#> 4 01 date of randomization 1991-03-15 visit 2 bio_date_E3 1991-03-23
#> 5 02 date of screening 1991-03-07 screening screen_date_E1 1991-03-07
#> 6 02 date of randomization 1991-03-11 rand rand_date_E2 1991-03-11
#> 7 02 date of randomization 1991-03-11 visit 2 ph_date_E3 1991-03-16
#> 8 02 date of randomization 1991-03-11 visit 2 bio_date_E3 1991-03-16
#> 9 03 date of screening 1991-03-08 screening screen_date_E1 1991-03-08
#> 10 03 date of randomization 1991-03-10 rand rand_date_E2 1991-03-10
#> 11 03 date of randomization 1991-03-10 visit 2 ph_date_E3 1991-03-16
#> 12 03 date of randomization 1991-03-10 visit 2 bio_date_E3 1991-03-16
#> standard_interval stand_equal is_in_timeline is_equal out
#> 1 1991-03-13 UTC--1991-03-16 UTC 1991-03-13 TRUE TRUE 0
#> 2 1991-03-15 UTC--1991-03-15 UTC 1991-03-15 TRUE TRUE 0
#> 3 1991-03-19 UTC--1991-03-21 UTC 1991-03-21 TRUE TRUE 0
#> 4 1991-03-19 UTC--1991-03-21 UTC 1991-03-21 FALSE FALSE ->2
#> 5 1991-03-07 UTC--1991-03-10 UTC 1991-03-07 TRUE TRUE 0
#> 6 1991-03-11 UTC--1991-03-11 UTC 1991-03-11 TRUE TRUE 0
#> 7 1991-03-15 UTC--1991-03-17 UTC 1991-03-16 TRUE TRUE 0
#> 8 1991-03-15 UTC--1991-03-17 UTC 1991-03-16 TRUE TRUE 0
#> 9 1991-03-08 UTC--1991-03-11 UTC 1991-03-08 TRUE TRUE 0
#> 10 1991-03-10 UTC--1991-03-10 UTC 1991-03-10 TRUE TRUE 0
#> 11 1991-03-14 UTC--1991-03-16 UTC 1991-03-16 TRUE TRUE 0
#> 12 1991-03-14 UTC--1991-03-16 UTC 1991-03-16 TRUE TRUE 0
- A function, which select necessary visit or event e.g. dplyr::start_with, dplyr::contains. It works like df %>% select(contains("E1"))
. You also can use dplyr::start_with
, works like df %>% select(start_with("V1"))
- A function, which select dates from necessary visit e.g. dplyr::matches, dplyr::contains. It works like visit_one %>% select(contains("DAT"))
, default: dplyr::contains()
For WBCs count check, you need to create the excel table like in the example.
Formula for check is (all * relative) / 100 = absolute.
human_name | absolute | relative | all |
lymphocytes | lym_abs | lym_rel | wbc |
id | wbc_post | lym_rel_post | lym_abs_post |
01 | 5.6 | 21 | 1.18 |
02 | 7.8 | 25 | 1.95 |
03 | 8.1 | 30 | 2.13 |
wbcc_file <- system.file("wbcc.xlsx", package = "dmtools")
wbcc <- wbc(wbcc_file, id)
wbcc <- wbcc %>% check(df)
# mis - wbc, which has an incorrect calculation
wbcc %>% choose_test("mis")
#> id human_name lab_name rel all abs auto_abs
#> 1 03 lymphocytes lym_abs_post 30 8.1 2.13 2.43
# ok - wbc, which has a correct calculation
wbcc %>% choose_test("ok")
#> id human_name lab_name rel all abs auto_abs
#> 1 01 lymphocytes lym_abs_post 21 5.6 1.18 1.18
#> 2 02 lymphocytes lym_abs_post 25 7.8 1.95 1.95
# all WBCs count
wbcc %>% get_result()
#> id human_name lab_name rel all abs auto_abs is_right
#> 1 01 lymphocytes lym_abs_post 21 5.6 1.18 1.18 TRUE
#> 2 02 lymphocytes lym_abs_post 25 7.8 1.95 1.95 TRUE
#> 3 03 lymphocytes lym_abs_post 30 8.1 2.13 2.43 FALSE
If the clinical trial has different sites and lab reference ranges.
age_min | age_max | sex | human_name | name_lab_vals | name_is_norm | lab_vals_min | lab_vals_max |
18 | 45 | f|m | gluc | gluc | gluc_res | 4.0 | 5.9 |
18 | 40 | m | ast | ast | ast_res | 0 | 41 |
18 | 39 | f | ast | ast | ast_res | 0 | 43 |
age_min | age_max | sex | human_name | name_lab_vals | name_is_norm | lab_vals_min | lab_vals_max |
18 | 45 | f|m | gluc | gluc | gluc_res | 4.2 | 6.1 |
18 | 40 | m | ast | ast | ast_res | 0 | 35 |
19 | 41 | f | ast | ast | ast_res | 0 | 41 |
site | id | age | sex | gluc_post | gluc_res_post | ast_post | ast_res_post |
site 01 | 01 | 19 | f | 5.5 | norm | 30 | NA |
site 02 | 02 | 20 | m | 4.1 | no | 48 | norm |
refs_s01 <- system.file("labs_refer_s01.xlsx", package = "dmtools")
refs_s02 <- system.file("labs_refer_s02.xlsx", package = "dmtools")
s01_lab <- lab(refs_s01, id, age, sex, "norm", "no", site = "site 01")
s02_lab <- lab(refs_s02, id, age, sex, "norm", "no", site = "site 02")
labs <- list(s01_lab, s02_lab)
labs <- labs %>% check_sites(df, site)
# mis - analysis, which has an incorrect estimate of the result
labs %>% test_sites(function (lab) choose_test(lab, "mis"))
#> id age sex human_lab name_lab refs lab_vals is_norm vals_to_dbl auto_norm
#> 1 02 20 m ast ast_post 0 - 35 48 norm 48 no
#> num_site
#> 1 site 02
# ok - analysis, which has a correct estimate of the result
labs %>% test_sites(function (lab) choose_test(lab, "ok"))
#> id age sex human_lab name_lab refs lab_vals is_norm vals_to_dbl
#> 1 01 19 f gluc gluc_post 4 - 5.9 5.5 norm 5.5
#> 2 02 20 m gluc gluc_post 4.2 - 6.1 4.1 no 4.1
#> auto_norm num_site
#> 1 norm site 01
#> 2 no site 02
# skip - analysis, which has an empty value of the estimate
labs %>% test_sites(function (lab) choose_test(lab, "skip"))
#> id age sex human_lab name_lab refs lab_vals is_norm vals_to_dbl auto_norm
#> 1 01 19 f ast ast_post 0 - 43 30 <NA> 30 <NA>
#> num_site
#> 1 site 01
# all analyzes
labs %>% test_sites(function (lab) get_result(lab))
#> id age sex human_lab name_lab refs lab_vals is_norm vals_to_dbl
#> 1 01 19 f gluc gluc_post 4 - 5.9 5.5 norm 5.5
#> 2 01 19 f ast ast_post 0 - 43 30 <NA> 30.0
#> 3 02 20 m gluc gluc_post 4.2 - 6.1 4.1 no 4.1
#> 4 02 20 m ast ast_post 0 - 35 48 norm 48.0
#> auto_norm is_right num_site
#> 1 norm TRUE site 01
#> 2 <NA> NA site 01
#> 3 no TRUE site 02
#> 4 no FALSE site 02
# you can combine sites, use |
comb_lab <- lab(refs_s01, id, age, sex, "norm", "no", site = "site 01|site 02")
comb_labs <- list(comb_lab)
comb_labs <- comb_labs %>% check_sites(df, site)
comb_labs %>% test_sites(function (lab) choose_test(lab, "mis"))
#> id age sex human_lab name_lab refs lab_vals is_norm vals_to_dbl auto_norm
#> 1 02 20 m gluc gluc_post 4 - 5.9 4.1 no 4.1 norm
#> 2 02 20 m ast ast_post 0 - 41 48 norm 48.0 no
#> num_site
#> 1 site 01|site 02
#> 2 site 01|site 02
Function to rename the dataset, using crfs.