dmtools_intro

Installation

library(dmtools)

Overview

For checking the dataset from EDC in clinical trials. Notice, your dataset should have a postfix( _post ) or a prefix( pre_ ) in the names of variables. Column names should be unique.

Usage

laboratory

For laboratory check, you need to create the excel table like in the example.

lab reference ranges
age_min age_max sex human_name name_lab_vals name_is_norm lab_vals_min lab_vals_max
18 45 f|m gluc gluc gluc_res 3.9 5.9
18 45 m ast ast ast_res 0 42
18 45 f ast ast ast_res 0 39
dataset
id age sex gluc_post gluc_res_post ast_post ast_res_post
01 19 f 5.5 norm 30 norm
02 20 m 4.1 NA 48 norm
03 22 m 9.7 norm 31 norm
# "norm" and "no" it is an example, necessary variable for the estimate, get from the dataset
refs <- system.file("labs_refer.xlsx", package = "dmtools")
obj_lab <- lab(refs, id, age, sex, "norm", "no")
obj_lab <- obj_lab %>% check(df)

# ok - analysis, which has a correct estimate of the result
obj_lab %>% choose_test("ok")
#>   id age sex human_lab  name_lab      refs lab_vals is_norm vals_to_dbl
#> 1 01  19   f      gluc gluc_post 3.9 - 5.9      5.5    norm         5.5
#> 2 01  19   f       ast  ast_post    0 - 39       30    norm        30.0
#> 3 03  22   m       ast  ast_post    0 - 42       31    norm        31.0
#>   auto_norm
#> 1      norm
#> 2      norm
#> 3      norm

# mis - analysis, which has an incorrect estimate of the result
obj_lab %>% choose_test("mis")
#>   id age sex human_lab  name_lab      refs lab_vals is_norm vals_to_dbl
#> 1 02  20   m       ast  ast_post    0 - 42       48    norm        48.0
#> 2 03  22   m      gluc gluc_post 3.9 - 5.9      9.7    norm         9.7
#>   auto_norm
#> 1        no
#> 2        no

# skip - analysis, which has an empty value of the estimate
obj_lab %>% choose_test("skip")
#>   id age sex human_lab  name_lab      refs lab_vals is_norm vals_to_dbl
#> 1 02  20   m      gluc gluc_post 3.9 - 5.9      4.1    <NA>         4.1
#>   auto_norm
#> 1      <NA>

# all analyzes 
obj_lab %>% get_result()
#>   id age sex human_lab  name_lab      refs lab_vals is_norm vals_to_dbl
#> 1 01  19   f      gluc gluc_post 3.9 - 5.9      5.5    norm         5.5
#> 2 01  19   f       ast  ast_post    0 - 39       30    norm        30.0
#> 3 02  20   m      gluc gluc_post 3.9 - 5.9      4.1    <NA>         4.1
#> 4 02  20   m       ast  ast_post    0 - 42       48    norm        48.0
#> 5 03  22   m      gluc gluc_post 3.9 - 5.9      9.7    norm         9.7
#> 6 03  22   m       ast  ast_post    0 - 42       31    norm        31.0
#>   auto_norm is_right
#> 1      norm     TRUE
#> 2      norm     TRUE
#> 3      <NA>       NA
#> 4        no    FALSE
#> 5        no    FALSE
#> 6      norm     TRUE

dates

For dates check, you need to create the excel table like in the example.

timeline
num_visit name_visit minus plus shift standard_date standard_name check_equal equal_date
E1 screening 0 3 0 screen_date_E1 date of screening F NA
E2 rand 0 0 0 rand_date_E2 date of randomization T rand_date_E2
E3 visit 2 1 1 5 rand_date_E2 date of randomization T ph_date_E3
dataset
id screen_date_E1 rand_date_E2 ph_date_E3 bio_date_E3
01 1991-03-13 1991-03-15 1991-03-21 1991-03-23
02 1991-03-07 1991-03-11 1991-03-16 1991-03-16
03 1991-03-08 1991-03-10 1991-03-16 1991-03-16
# use parameter str_date for search columns with dates, default:"DAT"
dates <- system.file("dates.xlsx", package = "dmtools")
obj_date <- date(dates, id, dplyr::contains, dplyr::matches)
obj_date <- obj_date %>% check(df)

# out - dates, which are out of the protocol's timeline
obj_date %>% choose_test("out")
#>   id         standard_name standard_date name_event   name_item  date_item
#> 1 01 date of randomization    1991-03-15    visit 2 bio_date_E3 1991-03-23
#>                standard_interval out
#> 1 1991-03-19 UTC--1991-03-21 UTC ->2

# uneq - dates, which are unequal
obj_date %>% choose_test("uneq")
#>   id name_event   name_item  date_item stand_equal is_in_timeline
#> 1 01    visit 2 bio_date_E3 1991-03-23  1991-03-21          FALSE

# ok - correct dates
obj_date %>% choose_test("ok")
#>    id         standard_name standard_date name_event      name_item  date_item
#> 1  01     date of screening    1991-03-13  screening screen_date_E1 1991-03-13
#> 2  01 date of randomization    1991-03-15       rand   rand_date_E2 1991-03-15
#> 3  01 date of randomization    1991-03-15    visit 2     ph_date_E3 1991-03-21
#> 4  02     date of screening    1991-03-07  screening screen_date_E1 1991-03-07
#> 5  02 date of randomization    1991-03-11       rand   rand_date_E2 1991-03-11
#> 6  02 date of randomization    1991-03-11    visit 2     ph_date_E3 1991-03-16
#> 7  02 date of randomization    1991-03-11    visit 2    bio_date_E3 1991-03-16
#> 8  03     date of screening    1991-03-08  screening screen_date_E1 1991-03-08
#> 9  03 date of randomization    1991-03-10       rand   rand_date_E2 1991-03-10
#> 10 03 date of randomization    1991-03-10    visit 2     ph_date_E3 1991-03-16
#> 11 03 date of randomization    1991-03-10    visit 2    bio_date_E3 1991-03-16
#>                 standard_interval stand_equal
#> 1  1991-03-13 UTC--1991-03-16 UTC  1991-03-13
#> 2  1991-03-15 UTC--1991-03-15 UTC  1991-03-15
#> 3  1991-03-19 UTC--1991-03-21 UTC  1991-03-21
#> 4  1991-03-07 UTC--1991-03-10 UTC  1991-03-07
#> 5  1991-03-11 UTC--1991-03-11 UTC  1991-03-11
#> 6  1991-03-15 UTC--1991-03-17 UTC  1991-03-16
#> 7  1991-03-15 UTC--1991-03-17 UTC  1991-03-16
#> 8  1991-03-08 UTC--1991-03-11 UTC  1991-03-08
#> 9  1991-03-10 UTC--1991-03-10 UTC  1991-03-10
#> 10 1991-03-14 UTC--1991-03-16 UTC  1991-03-16
#> 11 1991-03-14 UTC--1991-03-16 UTC  1991-03-16

# all dates
obj_date %>% get_result()
#>    id         standard_name standard_date name_event      name_item  date_item
#> 1  01     date of screening    1991-03-13  screening screen_date_E1 1991-03-13
#> 2  01 date of randomization    1991-03-15       rand   rand_date_E2 1991-03-15
#> 3  01 date of randomization    1991-03-15    visit 2     ph_date_E3 1991-03-21
#> 4  01 date of randomization    1991-03-15    visit 2    bio_date_E3 1991-03-23
#> 5  02     date of screening    1991-03-07  screening screen_date_E1 1991-03-07
#> 6  02 date of randomization    1991-03-11       rand   rand_date_E2 1991-03-11
#> 7  02 date of randomization    1991-03-11    visit 2     ph_date_E3 1991-03-16
#> 8  02 date of randomization    1991-03-11    visit 2    bio_date_E3 1991-03-16
#> 9  03     date of screening    1991-03-08  screening screen_date_E1 1991-03-08
#> 10 03 date of randomization    1991-03-10       rand   rand_date_E2 1991-03-10
#> 11 03 date of randomization    1991-03-10    visit 2     ph_date_E3 1991-03-16
#> 12 03 date of randomization    1991-03-10    visit 2    bio_date_E3 1991-03-16
#>                 standard_interval stand_equal is_in_timeline is_equal out
#> 1  1991-03-13 UTC--1991-03-16 UTC  1991-03-13           TRUE     TRUE   0
#> 2  1991-03-15 UTC--1991-03-15 UTC  1991-03-15           TRUE     TRUE   0
#> 3  1991-03-19 UTC--1991-03-21 UTC  1991-03-21           TRUE     TRUE   0
#> 4  1991-03-19 UTC--1991-03-21 UTC  1991-03-21          FALSE    FALSE ->2
#> 5  1991-03-07 UTC--1991-03-10 UTC  1991-03-07           TRUE     TRUE   0
#> 6  1991-03-11 UTC--1991-03-11 UTC  1991-03-11           TRUE     TRUE   0
#> 7  1991-03-15 UTC--1991-03-17 UTC  1991-03-16           TRUE     TRUE   0
#> 8  1991-03-15 UTC--1991-03-17 UTC  1991-03-16           TRUE     TRUE   0
#> 9  1991-03-08 UTC--1991-03-11 UTC  1991-03-08           TRUE     TRUE   0
#> 10 1991-03-10 UTC--1991-03-10 UTC  1991-03-10           TRUE     TRUE   0
#> 11 1991-03-14 UTC--1991-03-16 UTC  1991-03-16           TRUE     TRUE   0
#> 12 1991-03-14 UTC--1991-03-16 UTC  1991-03-16           TRUE     TRUE   0

dplyr::contains - A function, which select necessary visit or event e.g. dplyr::start_with, dplyr::contains. It works like df %>% select(contains("E1")). You also can use dplyr::start_with, works like df %>% select(start_with("V1"))

dplyr::matches - A function, which select dates from necessary visit e.g. dplyr::matches, dplyr::contains. It works like visit_one %>% select(contains("DAT")), default: dplyr::contains()

WBCs count

For WBCs count check, you need to create the excel table like in the example.
Formula for check is (all * relative) / 100 = absolute.

wbcc
human_name absolute relative all
lymphocytes lym_abs lym_rel wbc
dataset
id wbc_post lym_rel_post lym_abs_post
01 5.6 21 1.18
02 7.8 25 1.95
03 8.1 30 2.13

sites

If the clinical trial has different sites and lab reference ranges.

lab reference ranges s01
age_min age_max sex human_name name_lab_vals name_is_norm lab_vals_min lab_vals_max
18 45 f|m gluc gluc gluc_res 4.0 5.9
18 40 m ast ast ast_res 0 41
18 39 f ast ast ast_res 0 43
lab reference ranges s02
age_min age_max sex human_name name_lab_vals name_is_norm lab_vals_min lab_vals_max
18 45 f|m gluc gluc gluc_res 4.2 6.1
18 40 m ast ast ast_res 0 35
19 41 f ast ast ast_res 0 41
dataset
site id age sex gluc_post gluc_res_post ast_post ast_res_post
site 01 01 19 f 5.5 norm 30 NA
site 02 02 20 m 4.1 no 48 norm
refs_s01 <- system.file("labs_refer_s01.xlsx", package = "dmtools")
refs_s02 <- system.file("labs_refer_s02.xlsx", package = "dmtools")

s01_lab <- lab(refs_s01, id, age, sex, "norm", "no", site = "site 01")
s02_lab <- lab(refs_s02, id, age, sex, "norm", "no", site = "site 02")

labs <- list(s01_lab, s02_lab)
labs <- labs %>% check_sites(df, site)

# mis - analysis, which has an incorrect estimate of the result
labs %>% test_sites(function (lab) choose_test(lab, "mis"))
#>   id age sex human_lab name_lab   refs lab_vals is_norm vals_to_dbl auto_norm
#> 1 02  20   m       ast ast_post 0 - 35       48    norm          48        no
#>   num_site
#> 1  site 02

# ok - analysis, which has a correct estimate of the result
labs %>% test_sites(function (lab) choose_test(lab, "ok")) 
#>   id age sex human_lab  name_lab      refs lab_vals is_norm vals_to_dbl
#> 1 01  19   f      gluc gluc_post   4 - 5.9      5.5    norm         5.5
#> 2 02  20   m      gluc gluc_post 4.2 - 6.1      4.1      no         4.1
#>   auto_norm num_site
#> 1      norm  site 01
#> 2        no  site 02

# skip - analysis, which has an empty value of the estimate
labs %>% test_sites(function (lab) choose_test(lab, "skip"))
#>   id age sex human_lab name_lab   refs lab_vals is_norm vals_to_dbl auto_norm
#> 1 01  19   f       ast ast_post 0 - 43       30    <NA>          30      <NA>
#>   num_site
#> 1  site 01

# all analyzes
labs %>% test_sites(function (lab) get_result(lab))
#>   id age sex human_lab  name_lab      refs lab_vals is_norm vals_to_dbl
#> 1 01  19   f      gluc gluc_post   4 - 5.9      5.5    norm         5.5
#> 2 01  19   f       ast  ast_post    0 - 43       30    <NA>        30.0
#> 3 02  20   m      gluc gluc_post 4.2 - 6.1      4.1      no         4.1
#> 4 02  20   m       ast  ast_post    0 - 35       48    norm        48.0
#>   auto_norm is_right num_site
#> 1      norm     TRUE  site 01
#> 2      <NA>       NA  site 01
#> 3        no     TRUE  site 02
#> 4        no    FALSE  site 02

# you can combine sites, use |
comb_lab <- lab(refs_s01, id, age, sex, "norm", "no", site = "site 01|site 02")
comb_labs <- list(comb_lab)

comb_labs <- comb_labs %>% check_sites(df, site)
comb_labs %>% test_sites(function (lab) choose_test(lab, "mis"))
#>   id age sex human_lab  name_lab    refs lab_vals is_norm vals_to_dbl auto_norm
#> 1 02  20   m      gluc gluc_post 4 - 5.9      4.1      no         4.1      norm
#> 2 02  20   m       ast  ast_post  0 - 41       48    norm        48.0        no
#>          num_site
#> 1 site 01|site 02
#> 2 site 01|site 02

rename

Function to rename the dataset, using crfs.