dmtools_intro

Usage

laboratory

For laboratory check, you need to create the excel table like in the example.

age_min - whole number, >= number
age_max - if none, type Inf, <= number
sex - for both sex, use |
human_name - friendly name for analysis
name_lab_vals - analysis from the dataset, without postfix or prefix
name_is_norm - estimate from the dataset, without postfix or prefix
lab_vals_min - lower limit of normal, >=
lab_vals_max - upper limit of normal, <=

lab reference ranges
age_min	age_max	sex	human_name	name_lab_vals	name_is_norm	lab_vals_min	lab_vals_max
18	45	f\|m	gluc	gluc	gluc_res	3.9	5.9
18	45	m	ast	ast	ast_res	0	42
18	45	f	ast	ast	ast_res	0	39

dataset
id	age	sex	gluc_post	gluc_res_post	ast_post	ast_res_post
01	19	f	5.5	norm	30	norm
02	20	m	4.1	NA	48	norm
03	22	m	9.7	norm	31	norm

# "norm" and "no" it is an example, necessary variable for the estimate, get from the dataset
refs <- system.file("labs_refer.xlsx", package = "dmtools")
obj_lab <- lab(refs, id, age, sex, "norm", "no")
obj_lab <- obj_lab %>% check(df)

# ok - analysis, which has a correct estimate of the result
obj_lab %>% choose_test("ok")
#>   id age sex human_lab  name_lab      refs lab_vals is_norm vals_to_dbl
#> 1 01  19   f      gluc gluc_post 3.9 - 5.9      5.5    norm         5.5
#> 2 01  19   f       ast  ast_post    0 - 39       30    norm        30.0
#> 3 03  22   m       ast  ast_post    0 - 42       31    norm        31.0
#>   auto_norm
#> 1      norm
#> 2      norm
#> 3      norm

# mis - analysis, which has an incorrect estimate of the result
obj_lab %>% choose_test("mis")
#>   id age sex human_lab  name_lab      refs lab_vals is_norm vals_to_dbl
#> 1 02  20   m       ast  ast_post    0 - 42       48    norm        48.0
#> 2 03  22   m      gluc gluc_post 3.9 - 5.9      9.7    norm         9.7
#>   auto_norm
#> 1        no
#> 2        no

# skip - analysis, which has an empty value of the estimate
obj_lab %>% choose_test("skip")
#>   id age sex human_lab  name_lab      refs lab_vals is_norm vals_to_dbl
#> 1 02  20   m      gluc gluc_post 3.9 - 5.9      4.1    <NA>         4.1
#>   auto_norm
#> 1      <NA>

# all analyzes 
obj_lab %>% get_result()
#>   id age sex human_lab  name_lab      refs lab_vals is_norm vals_to_dbl
#> 1 01  19   f      gluc gluc_post 3.9 - 5.9      5.5    norm         5.5
#> 2 01  19   f       ast  ast_post    0 - 39       30    norm        30.0
#> 3 02  20   m      gluc gluc_post 3.9 - 5.9      4.1    <NA>         4.1
#> 4 02  20   m       ast  ast_post    0 - 42       48    norm        48.0
#> 5 03  22   m      gluc gluc_post 3.9 - 5.9      9.7    norm         9.7
#> 6 03  22   m       ast  ast_post    0 - 42       31    norm        31.0
#>   auto_norm is_right
#> 1      norm     TRUE
#> 2      norm     TRUE
#> 3      <NA>       NA
#> 4        no    FALSE
#> 5        no    FALSE
#> 6      norm     TRUE

dates

For dates check, you need to create the excel table like in the example.

minus, plus, shift - parameter of timeline
num_visit - parameter for function e.g. contains(num_visit)
name_visit - friendly name of visit
standard_date - standard date from a dataset, with postfix or prefix
standard_name - friendly name of standard date
check_equal - check date equality within a visit
equal_date - date for check equality from a dataset, with postfix or prefix

timeline
num_visit	name_visit	minus	plus	shift	standard_date	standard_name	check_equal	equal_date
E1	screening	0	3	0	screen_date_E1	date of screening	F	NA
E2	rand	0	0	0	rand_date_E2	date of randomization	T	rand_date_E2
E3	visit 2	1	1	5	rand_date_E2	date of randomization	T	ph_date_E3

dataset
id	screen_date_E1	rand_date_E2	ph_date_E3	bio_date_E3
01	1991-03-13	1991-03-15	1991-03-21	1991-03-23
02	1991-03-07	1991-03-11	1991-03-16	1991-03-16
03	1991-03-08	1991-03-10	1991-03-16	1991-03-16

# use parameter str_date for search columns with dates, default:"DAT"
dates <- system.file("dates.xlsx", package = "dmtools")
obj_date <- date(dates, id, dplyr::contains, dplyr::matches)
obj_date <- obj_date %>% check(df)

# out - dates, which are out of the protocol's timeline
obj_date %>% choose_test("out")
#>   id         standard_name standard_date name_event   name_item  date_item
#> 1 01 date of randomization    1991-03-15    visit 2 bio_date_E3 1991-03-23
#>                standard_interval out
#> 1 1991-03-19 UTC--1991-03-21 UTC ->2

# uneq - dates, which are unequal
obj_date %>% choose_test("uneq")
#>   id name_event   name_item  date_item stand_equal is_in_timeline
#> 1 01    visit 2 bio_date_E3 1991-03-23  1991-03-21          FALSE

# ok - correct dates
obj_date %>% choose_test("ok")
#>    id         standard_name standard_date name_event      name_item  date_item
#> 1  01     date of screening    1991-03-13  screening screen_date_E1 1991-03-13
#> 2  01 date of randomization    1991-03-15       rand   rand_date_E2 1991-03-15
#> 3  01 date of randomization    1991-03-15    visit 2     ph_date_E3 1991-03-21
#> 4  02     date of screening    1991-03-07  screening screen_date_E1 1991-03-07
#> 5  02 date of randomization    1991-03-11       rand   rand_date_E2 1991-03-11
#> 6  02 date of randomization    1991-03-11    visit 2     ph_date_E3 1991-03-16
#> 7  02 date of randomization    1991-03-11    visit 2    bio_date_E3 1991-03-16
#> 8  03     date of screening    1991-03-08  screening screen_date_E1 1991-03-08
#> 9  03 date of randomization    1991-03-10       rand   rand_date_E2 1991-03-10
#> 10 03 date of randomization    1991-03-10    visit 2     ph_date_E3 1991-03-16
#> 11 03 date of randomization    1991-03-10    visit 2    bio_date_E3 1991-03-16
#>                 standard_interval stand_equal
#> 1  1991-03-13 UTC--1991-03-16 UTC  1991-03-13
#> 2  1991-03-15 UTC--1991-03-15 UTC  1991-03-15
#> 3  1991-03-19 UTC--1991-03-21 UTC  1991-03-21
#> 4  1991-03-07 UTC--1991-03-10 UTC  1991-03-07
#> 5  1991-03-11 UTC--1991-03-11 UTC  1991-03-11
#> 6  1991-03-15 UTC--1991-03-17 UTC  1991-03-16
#> 7  1991-03-15 UTC--1991-03-17 UTC  1991-03-16
#> 8  1991-03-08 UTC--1991-03-11 UTC  1991-03-08
#> 9  1991-03-10 UTC--1991-03-10 UTC  1991-03-10
#> 10 1991-03-14 UTC--1991-03-16 UTC  1991-03-16
#> 11 1991-03-14 UTC--1991-03-16 UTC  1991-03-16

# all dates
obj_date %>% get_result()
#>    id         standard_name standard_date name_event      name_item  date_item
#> 1  01     date of screening    1991-03-13  screening screen_date_E1 1991-03-13
#> 2  01 date of randomization    1991-03-15       rand   rand_date_E2 1991-03-15
#> 3  01 date of randomization    1991-03-15    visit 2     ph_date_E3 1991-03-21
#> 4  01 date of randomization    1991-03-15    visit 2    bio_date_E3 1991-03-23
#> 5  02     date of screening    1991-03-07  screening screen_date_E1 1991-03-07
#> 6  02 date of randomization    1991-03-11       rand   rand_date_E2 1991-03-11
#> 7  02 date of randomization    1991-03-11    visit 2     ph_date_E3 1991-03-16
#> 8  02 date of randomization    1991-03-11    visit 2    bio_date_E3 1991-03-16
#> 9  03     date of screening    1991-03-08  screening screen_date_E1 1991-03-08
#> 10 03 date of randomization    1991-03-10       rand   rand_date_E2 1991-03-10
#> 11 03 date of randomization    1991-03-10    visit 2     ph_date_E3 1991-03-16
#> 12 03 date of randomization    1991-03-10    visit 2    bio_date_E3 1991-03-16
#>                 standard_interval stand_equal is_in_timeline is_equal out
#> 1  1991-03-13 UTC--1991-03-16 UTC  1991-03-13           TRUE     TRUE   0
#> 2  1991-03-15 UTC--1991-03-15 UTC  1991-03-15           TRUE     TRUE   0
#> 3  1991-03-19 UTC--1991-03-21 UTC  1991-03-21           TRUE     TRUE   0
#> 4  1991-03-19 UTC--1991-03-21 UTC  1991-03-21          FALSE    FALSE ->2
#> 5  1991-03-07 UTC--1991-03-10 UTC  1991-03-07           TRUE     TRUE   0
#> 6  1991-03-11 UTC--1991-03-11 UTC  1991-03-11           TRUE     TRUE   0
#> 7  1991-03-15 UTC--1991-03-17 UTC  1991-03-16           TRUE     TRUE   0
#> 8  1991-03-15 UTC--1991-03-17 UTC  1991-03-16           TRUE     TRUE   0
#> 9  1991-03-08 UTC--1991-03-11 UTC  1991-03-08           TRUE     TRUE   0
#> 10 1991-03-10 UTC--1991-03-10 UTC  1991-03-10           TRUE     TRUE   0
#> 11 1991-03-14 UTC--1991-03-16 UTC  1991-03-16           TRUE     TRUE   0
#> 12 1991-03-14 UTC--1991-03-16 UTC  1991-03-16           TRUE     TRUE   0

dplyr::contains - A function, which select necessary visit or event e.g. dplyr::start_with, dplyr::contains. It works like df %>% select(contains("E1")). You also can use dplyr::start_with, works like df %>% select(start_with("V1"))

dplyr::matches - A function, which select dates from necessary visit e.g. dplyr::matches, dplyr::contains. It works like visit_one %>% select(contains("DAT")), default: dplyr::contains()

WBCs count

For WBCs count check, you need to create the excel table like in the example.
Formula for check is (all * relative) / 100 = absolute.

human_name - friendly name for WBCs count
absolute - absolute WBCs from dataset, without postfix or prefix
relative - relative WBCs from dataset, without postfix or prefix

wbcc
human_name	absolute	relative	all
lymphocytes	lym_abs	lym_rel	wbc

dataset
id	wbc_post	lym_rel_post	lym_abs_post
01	5.6	21	1.18
02	7.8	25	1.95
03	8.1	30	2.13

wbcc_file <- system.file("wbcc.xlsx", package = "dmtools")
wbcc <- wbc(wbcc_file, id)
wbcc <- wbcc %>% check(df)

# mis - wbc, which has an incorrect calculation
wbcc %>% choose_test("mis")
#>   id  human_name     lab_name rel all  abs auto_abs
#> 1 03 lymphocytes lym_abs_post  30 8.1 2.13     2.43

# ok - wbc, which has a correct calculation
wbcc %>% choose_test("ok")
#>   id  human_name     lab_name rel all  abs auto_abs
#> 1 01 lymphocytes lym_abs_post  21 5.6 1.18     1.18
#> 2 02 lymphocytes lym_abs_post  25 7.8 1.95     1.95

# all WBCs count
wbcc %>% get_result()
#>   id  human_name     lab_name rel all  abs auto_abs is_right
#> 1 01 lymphocytes lym_abs_post  21 5.6 1.18     1.18     TRUE
#> 2 02 lymphocytes lym_abs_post  25 7.8 1.95     1.95     TRUE
#> 3 03 lymphocytes lym_abs_post  30 8.1 2.13     2.43    FALSE

sites

If the clinical trial has different sites and lab reference ranges.

lab reference ranges s01
age_min	age_max	sex	human_name	name_lab_vals	name_is_norm	lab_vals_min	lab_vals_max
18	45	f\|m	gluc	gluc	gluc_res	4.0	5.9
18	40	m	ast	ast	ast_res	0	41
18	39	f	ast	ast	ast_res	0	43

lab reference ranges s02
age_min	age_max	sex	human_name	name_lab_vals	name_is_norm	lab_vals_min	lab_vals_max
18	45	f\|m	gluc	gluc	gluc_res	4.2	6.1
18	40	m	ast	ast	ast_res	0	35
19	41	f	ast	ast	ast_res	0	41

dataset
site	id	age	sex	gluc_post	gluc_res_post	ast_post	ast_res_post
site 01	01	19	f	5.5	norm	30	NA
site 02	02	20	m	4.1	no	48	norm

refs_s01 <- system.file("labs_refer_s01.xlsx", package = "dmtools")
refs_s02 <- system.file("labs_refer_s02.xlsx", package = "dmtools")

s01_lab <- lab(refs_s01, id, age, sex, "norm", "no", site = "site 01")
s02_lab <- lab(refs_s02, id, age, sex, "norm", "no", site = "site 02")

labs <- list(s01_lab, s02_lab)
labs <- labs %>% check_sites(df, site)

# mis - analysis, which has an incorrect estimate of the result
labs %>% test_sites(function (lab) choose_test(lab, "mis"))
#>   id age sex human_lab name_lab   refs lab_vals is_norm vals_to_dbl auto_norm
#> 1 02  20   m       ast ast_post 0 - 35       48    norm          48        no
#>   num_site
#> 1  site 02

# ok - analysis, which has a correct estimate of the result
labs %>% test_sites(function (lab) choose_test(lab, "ok")) 
#>   id age sex human_lab  name_lab      refs lab_vals is_norm vals_to_dbl
#> 1 01  19   f      gluc gluc_post   4 - 5.9      5.5    norm         5.5
#> 2 02  20   m      gluc gluc_post 4.2 - 6.1      4.1      no         4.1
#>   auto_norm num_site
#> 1      norm  site 01
#> 2        no  site 02

# skip - analysis, which has an empty value of the estimate
labs %>% test_sites(function (lab) choose_test(lab, "skip"))
#>   id age sex human_lab name_lab   refs lab_vals is_norm vals_to_dbl auto_norm
#> 1 01  19   f       ast ast_post 0 - 43       30    <NA>          30      <NA>
#>   num_site
#> 1  site 01

# all analyzes
labs %>% test_sites(function (lab) get_result(lab))
#>   id age sex human_lab  name_lab      refs lab_vals is_norm vals_to_dbl
#> 1 01  19   f      gluc gluc_post   4 - 5.9      5.5    norm         5.5
#> 2 01  19   f       ast  ast_post    0 - 43       30    <NA>        30.0
#> 3 02  20   m      gluc gluc_post 4.2 - 6.1      4.1      no         4.1
#> 4 02  20   m       ast  ast_post    0 - 35       48    norm        48.0
#>   auto_norm is_right num_site
#> 1      norm     TRUE  site 01
#> 2      <NA>       NA  site 01
#> 3        no     TRUE  site 02
#> 4        no    FALSE  site 02

# you can combine sites, use |
comb_lab <- lab(refs_s01, id, age, sex, "norm", "no", site = "site 01|site 02")
comb_labs <- list(comb_lab)

comb_labs <- comb_labs %>% check_sites(df, site)
comb_labs %>% test_sites(function (lab) choose_test(lab, "mis"))
#>   id age sex human_lab  name_lab    refs lab_vals is_norm vals_to_dbl auto_norm
#> 1 02  20   m      gluc gluc_post 4 - 5.9      4.1      no         4.1      norm
#> 2 02  20   m       ast  ast_post  0 - 41       48    norm        48.0        no
#>          num_site
#> 1 site 01|site 02
#> 2 site 01|site 02

rename

Function to rename the dataset, using crfs.

rename_dataset("./crfs", "old_name", "new_name", 2)

“./crfs” - path to crfs
“old_name” - variable for names in the dataset, without postfix or prefix
“new_name” - variable for necessary names, names should be unique
2 - a position of a sheet in the excel document, where dmtools can find “old_name” and “new_name”