ldamatch demos
Kyle Gorman & Géza Kiss
2016-06-23
Univariate case…
library(ldamatch)
set.seed(257)
SIZE <- 15
condition <- as.factor(c(rep("control", 2 * SIZE), rep("treatment", SIZE)))
covariate1 <- c(rnorm(2 * SIZE), rnorm(SIZE, 1, 2))
Univariate case (with heuristic1 search)…
is.in <- match_groups(condition, covariate1, t_halt)
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic1 search.
## Finished heuristic1 search in 0.222 seconds.
## Eventual group sizes: control: 28 treatment: 14
## Removed subjects: control: 2 treatment: 1
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 2 28
## treatment 1 14
Univariate case (with random search)…
is.in <- match_groups(condition, covariate1, t_halt, method = "random")
## Initial group sizes: control: 30 treatment: 15
## Starting random search.
## Found matching: control: 28; treatment: 13 (total: 41)
## Found matching: control: 29; treatment: 13 (total: 42)
## Found matching: control: 29; treatment: 13 (total: 42)
## Found matching: control: 29; treatment: 13 (total: 42)
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Finished random search in 21.058 seconds.
## Eventual group sizes: control: 30 treatment: 13
## Removed subjects: control: 0 treatment: 2
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 0 30
## treatment 2 13
Univariate case (with heuristic2 search)…
is.in <- match_groups(condition, covariate1, t_halt, method = "heuristic2")
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic2 search.
## Finished heuristic2 search in 0.259 seconds.
## Eventual group sizes: control: 30 treatment: 13
## Removed subjects: control: 0 treatment: 2
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 0 30
## treatment 2 13
Multivariate case…
covariate2 <- c(rnorm(2 * SIZE), rnorm(SIZE, 1, 2))
covariates <- cbind(covariate1, covariate2)
Multivariate case (with heuristic1 search)…
is.in <- match_groups(condition, covariates, t_halt)
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic1 search.
## Finished heuristic1 search in 0.083 seconds.
## Eventual group sizes: control: 27 treatment: 14
## Removed subjects: control: 3 treatment: 1
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 3 27
## treatment 1 14
Multivariate case (with heuristic2 search)…
is.in <- match_groups(condition, covariates, t_halt, method = "heuristic2")
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic2 search.
## Finished heuristic2 search in 0.42 seconds.
## Eventual group sizes: control: 30 treatment: 13
## Removed subjects: control: 0 treatment: 2
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 0 30
## treatment 2 13
Multivariate case (with random search)…
is.in <- match_groups(condition, covariates, t_halt, method = "random")
## Initial group sizes: control: 30 treatment: 15
## Starting random search.
## Found matching: control: 28; treatment: 12 (total: 40)
## Found matching: control: 29; treatment: 13 (total: 42)
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Finished random search in 15.804 seconds.
## Eventual group sizes: control: 30 treatment: 13
## Removed subjects: control: 0 treatment: 2
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 0 30
## treatment 2 13
Multivariate case (with special proportions and Wilcox test)…
my.props <- prop.table(c(control = 4, treatment = 3))
is.in <- match_groups(condition, covariates, U_halt, props = my.props)
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic1 search.
## Finished heuristic1 search in 0.061 seconds.
## Eventual group sizes: control: 18 treatment: 13
## Removed subjects: control: 12 treatment: 2
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 12 18
## treatment 2 13
Multivariate case (with Wilks test)…
is.in <- match_groups(condition, covariates, wilks_halt)
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic1 search.
## Finished heuristic1 search in 0.029 seconds.
## Eventual group sizes: control: 27 treatment: 14
## Removed subjects: control: 3 treatment: 1
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 3 27
## treatment 1 14
Multivariate case (with Wilks test and random search)…
is.in <- match_groups(condition, covariates, wilks_halt, method = "random")
## Initial group sizes: control: 30 treatment: 15
## Starting random search.
## Found matching: control: 30; treatment: 12 (total: 42)
## Found matching: control: 29; treatment: 13 (total: 42)
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Finished random search in 25.535 seconds.
## Eventual group sizes: control: 30 treatment: 13
## Removed subjects: control: 0 treatment: 2
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 0 30
## treatment 2 13
Multivariate case (with Anderson-Darling test and heuristic1 search)…
is.in <- match_groups(condition, covariates, t_halt, method = "heuristic1")
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic1 search.
## Finished heuristic1 search in 0.016 seconds.
## Eventual group sizes: control: 27 treatment: 14
## Removed subjects: control: 3 treatment: 1
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 3 27
## treatment 1 14
Multivariate case (with t-test and Anderson-Darling test simultaneously)…
t_ad_halt <- create_halting_test(c(t_halt, ad_halt))
threshes <- c(.2, .02)
is.in <- match_groups(condition, covariates, t_ad_halt, threshes)
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic1 search.
## Finished heuristic1 search in 0.579 seconds.
## Eventual group sizes: control: 25 treatment: 12
## Removed subjects: control: 5 treatment: 3
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 5 25
## treatment 3 12
Univariate case (with exhaustive search)…
estimate_exhaustive(min_preserved = 42, condition, cases_per_second = 100)
## If 44 of 45 kept: at most 45 cases. If 100 cases per second evaluated: 0.4 seconds.
## If 43 of 45 kept: at most 1035 cases. If 100 cases per second evaluated: 10.3 seconds.
## If 42 of 45 kept: at most 15225 cases. If 100 cases per second evaluated: 2.5 minutes.
## [1] 15225
foreach::registerDoSEQ()
is.ins <- match_groups(condition, covariate1, t_halt, method = "exhaustive", all_results = TRUE)
## Initial group sizes: control: 30 treatment: 15
## Starting exhaustive search.
## Created 2 group size configurations each with a total size of 44
## control: 29 treatment: 15 divergence: 0.000128651338541343
## Size of Cartesian product: 30
## Number of cases processed per second: 189.8734
## control: 30 treatment: 14 divergence: 0.000520578560755861
## Size of Cartesian product: 15
## Number of cases processed per second: 312.5
## Created 3 group size configurations each with a total size of 43
## control: 29 treatment: 14 divergence: 0.000135741532290407
## Size of Cartesian product: 450
## Number of cases processed per second: 332.5942
## control: 28 treatment: 15 divergence: 0.000536783341068091
## Size of Cartesian product: 435
## Number of cases processed per second: 314.7612
## control: 30 treatment: 13 divergence: 0.0021993283249259
## Size of Cartesian product: 105
## Number of cases processed per second: 333.3333
## Finished exhaustive search in 3.328 seconds.
## Eventual group sizes: control: 30 treatment: 13
## Removed subjects: control: 0 treatment: 2
print(table(condition, is.ins[[1]]))
##
## condition FALSE TRUE
## control 0 30
## treatment 2 13
## [1] 1
# (Confirm exhaustive search by applying heuristic1 search to it.)
is.in <- match_groups(condition[is.ins[[1]]], covariate1[is.ins[[1]]], t_halt)
## Groups are already matched.
print(table(condition[is.ins[[1]]], is.in))
## is.in
## TRUE
## control 30
## treatment 13
Univariate case for more than two groups…
set.seed(257)
SIZE <- 15
condition <- as.factor(c(rep("group1", SIZE), rep("group2", SIZE), rep("group3", SIZE)))
covariate1 <- c(rnorm(SIZE, 0, 1), rnorm(SIZE, 0, 2), rnorm(SIZE, 1, 2))
covariate2 <- c(rnorm(SIZE, 0, 1), rnorm(SIZE, 0, 2), rnorm(SIZE, 1, 2))
covariates <- cbind(covariate1, covariate2)
Univariate case for more than two groups (with heuristic1 search)…
is.in <- match_groups(condition, covariates, t_ad_halt, method = "heuristic1")
## Initial group sizes: group1: 15 group2: 15 group3: 15
## Starting heuristic1 search.
## Finished heuristic1 search in 0.215 seconds.
## Eventual group sizes: group1: 11 group2: 11 group3: 12
## Removed subjects: group1: 4 group2: 4 group3: 3
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## group1 4 11
## group2 4 11
## group3 3 12