Performance benchmark

Daijiang Li

2019-11-13

Benchmark for PSV family functions

psv

pse

pcd

Benchmark for community PGLMM (pglmm)

To compare the cpp version and R version, and the version from the pez package.

library(dplyr)
comm = comm_a
comm$site = row.names(comm)
dat = tidyr::gather(comm, key = "sp", value = "freq", -site) %>% 
  left_join(envi, by = "site") %>% 
  left_join(traits, by = "sp")
dat$pa = as.numeric(dat$freq > 0)
# data prep for pez::communityPGLMM, not necessary for phyr::pglmm
dat = arrange(dat, site, sp)
dat = filter(dat, sp %in% sample(unique(dat$sp), 10),
             site %in% sample(unique(dat$site), 5))
nspp = n_distinct(dat$sp)
nsite = n_distinct(dat$site)

dat$site = as.factor(dat$site)
dat$sp = as.factor(dat$sp)

tree = ape::drop.tip(phylotree, setdiff(phylotree$tip.label, unique(dat$sp)))
Vphy <- ape::vcv(tree)
Vphy <- Vphy/max(Vphy)
Vphy <- Vphy/exp(determinant(Vphy)$modulus[1]/nspp)
Vphy = Vphy[levels(dat$sp), levels(dat$sp)]

# prepare random effects
re.site <- list(1, site = dat$site, covar = diag(nsite))
re.sp <- list(1, sp = dat$sp, covar = diag(nspp))
re.sp.phy <- list(1, sp = dat$sp, covar = Vphy)
# sp is nested within site
re.nested.phy <- list(1, sp = dat$sp, covar = Vphy, site = dat$site)
re.nested.rep <- list(1, sp = dat$sp, covar = solve(Vphy), site = dat$site) # equal to sp__@site
# can be named 
re = list(re.sp = re.sp, re.sp.phy = re.sp.phy, re.nested.phy = re.nested.phy, re.site = re.site)

# about 4-10 times faster for a small dataset
rbenchmark::benchmark(
  "phyr c++ bobyqa" = {phyr::pglmm(freq ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site), 
              dat, cov_ranef = list(sp = phylotree), REML = FALSE, 
              cpp = TRUE, optimizer = "bobyqa")},
  "phyr c++ Nelder-Mead" = {phyr::pglmm(freq ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site), 
              dat, cov_ranef = list(sp = phylotree), REML = FALSE, 
              cpp = TRUE, optimizer = "Nelder-Mead")},
  "phyr R Nelder-Mead" = {phyr::pglmm(freq ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site), 
              dat, cov_ranef = list(sp = phylotree), REML = FALSE, 
              cpp = FALSE, optimizer = "Nelder-Mead")},
  "pez R Nelder-Mead" = {pez::communityPGLMM(freq ~ 1 + shade, data = dat, sp = dat$sp, site = dat$site, 
                      random.effects = re, REML = FALSE)},
  replications = 5,
  columns = c("test", "replications", "elapsed",
              "relative", "user.self", "sys.self")
)
#>                   test replications elapsed relative user.self sys.self
#> 4    pez R Nelder-Mead            5  32.214   88.989    30.821    0.374
#> 1      phyr c++ bobyqa            5   0.362    1.000     0.342    0.006
#> 2 phyr c++ Nelder-Mead            5   1.156    3.193     1.115    0.015
#> 3   phyr R Nelder-Mead            5  33.281   91.936    31.198    0.480

# about 6 times faster for a small dataset
rbenchmark::benchmark(
  "phyr c++ bobyqa" = {phyr::pglmm(pa ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site), 
              dat, family = "binomial", cov_ranef = list(sp = phylotree), REML = FALSE, 
              cpp = TRUE, optimizer = "bobyqa")},
  "phyr c++ Nelder-Mead" = {phyr::pglmm(pa ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site), 
              dat, family = "binomial", cov_ranef = list(sp = phylotree), REML = FALSE, 
              cpp = TRUE, optimizer = "Nelder-Mead")},
  "phyr R Nelder-Mead" = {phyr::pglmm(pa ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site), 
              dat, family = "binomial", cov_ranef = list(sp = phylotree), REML = FALSE, 
              cpp = FALSE, optimizer = "Nelder-Mead")},
  "pez R Nelder-Mead" = {pez::communityPGLMM(pa ~ 1 + shade, data = dat, sp = dat$sp, 
                                             family = "binomial", site = dat$site, 
                      random.effects = re, REML = FALSE)},
  replications = 5,
  columns = c("test", "replications", "elapsed",
              "relative", "user.self", "sys.self")
)
#>                   test replications elapsed relative user.self sys.self
#> 4    pez R Nelder-Mead            5  49.296   42.314    45.731    0.604
#> 1      phyr c++ bobyqa            5   1.840    1.579     1.750    0.033
#> 2 phyr c++ Nelder-Mead            5   1.165    1.000     1.093    0.021
#> 3   phyr R Nelder-Mead            5  24.355   20.906    23.024    0.317

Benchmark for cor_phylo()

library(ape)
# Set up parameter values for simulating data
n <- 50
phy <- rcoal(n, tip.label = 1:n)
trt_names <- paste0("par", 1:2)

R <- matrix(c(1, 0.7, 0.7, 1), nrow = 2, ncol = 2)
d <- c(0.3, 0.95)
B2 <- 1

Se <- c(0.2, 1)
M <- matrix(Se, nrow = n, ncol = 2, byrow = TRUE)
colnames(M) <- trt_names

# Set up needed matrices for the simulations
p <- length(d)

star <- stree(n)
star$edge.length <- array(1, dim = c(n, 1))
star$tip.label <- phy$tip.label

Vphy <- vcv(phy)
Vphy <- Vphy/max(Vphy)
Vphy <- Vphy/exp(determinant(Vphy)$modulus[1]/n)

tau <- matrix(1, nrow = n, ncol = 1) %*% diag(Vphy) - Vphy
C <- matrix(0, nrow = p * n, ncol = p * n)
for (i in 1:p) for (j in 1:p) {
  Cd <- (d[i]^tau * (d[j]^t(tau)) * (1 - (d[i] * d[j])^Vphy))/(1 - d[i] * d[j])
  C[(n * (i - 1) + 1):(i * n), (n * (j - 1) + 1):(j * n)] <- R[i, j] * Cd
}
MM <- matrix(M^2, ncol = 1)
V <- C + diag(as.numeric(MM))

iD <- t(chol(V))

XX <- iD %*% rnorm(2 * n)
X <- matrix(XX, n, p)
colnames(X) <- trt_names
rownames(X) <- phy$tip.label
rownames(M) <- phy$tip.label

U <- list(cbind(rnorm(n, mean = 2, sd = 10)))
names(U) <- trt_names[2]

X[,2] <- X[,2] + B2[1] * U[[1]][,1] - B2[1] * mean(U[[1]][,1])

z <- cor_phylo(variates = X,
               covariates = U,
               meas_errors = M,
               phy = phy,
               species = phy$tip.label)


U2 <- list(NULL, matrix(rnorm(n, mean = 2, sd = 10), nrow = n, ncol = 1))
rownames(U2[[2]]) <- phy$tip.label
colnames(U2[[2]]) <- "par2"
X2 = X
X2[,2] <- X2[,2] + B2[1] * U2[[2]][,1] - B2[1] * mean(U2[[2]][,1])

z_r <- corphylo(X = X2, SeM = M, U = U2, phy = phy, method = "Nelder-Mead")

rbenchmark::benchmark(
  "cor_phylo" = {cor_phylo(variates = X, covariates = U, meas_errors = M,
                           phy = phy, species = phy$tip.label)},
  "corphylo" = {corphylo(X = X2, SeM = M, U = U2, phy = phy, method = "Nelder-Mead")},
  replications = 5,
  columns = c("test", "replications", "elapsed",
              "relative", "user.self", "sys.self")
)
#>        test replications elapsed relative user.self sys.self
#> 1 cor_phylo            5   4.511    1.000     4.329    0.062
#> 2  corphylo            5  16.190    3.589    13.863    1.369