This is an example application to compare the accuracy and computational speed of DRR for different parameters to PCA.
library(DRR)
set.seed(123)
data(iris)
in_data <- iris[, 1:4]
npoints <- nrow(in_data)
nvars <- ncol(in_data)
for (i in seq_len(nvars)) in_data[[i]] <- as.numeric(in_data[[i]])
my_data <- scale(in_data[sample(npoints), ], scale = FALSE)
t0 <- system.time(pca <- prcomp(my_data, center = FALSE, scale. = FALSE))
t1 <- system.time(drr.1 <- drr(my_data, verbose = FALSE))
t2 <- system.time(drr.2 <- drr(my_data, fastkrr = 2, verbose = FALSE))
t3 <- system.time(drr.3 <- drr(my_data, fastkrr = 5, verbose = FALSE))
t4 <- system.time(drr.4 <- drr(my_data, fastkrr = 2, fastcv = TRUE,
verbose = FALSE))
rmse <- matrix(NA_real_, nrow = 5, ncol = nvars, dimnames = list(c("pca", "drr.1",
"drr.2", "drr.3", "drr.4"), seq_len(nvars)))
for (i in seq_len(nvars)) {
pca_inv <- pca$x[, 1:i, drop = FALSE] %*% t(pca$rotation[, 1:i, drop = FALSE])
rmse["pca", i] <- sqrt(sum((my_data - pca_inv)^2))
rmse["drr.1", i] <- sqrt(sum((my_data - drr.1$inverse(drr.1$fitted.data[, 1:i,
drop = FALSE]))^2))
rmse["drr.2", i] <- sqrt(sum((my_data - drr.2$inverse(drr.2$fitted.data[, 1:i,
drop = FALSE]))^2))
rmse["drr.3", i] <- sqrt(sum((my_data - drr.3$inverse(drr.3$fitted.data[, 1:i,
drop = FALSE]))^2))
rmse["drr.4", i] <- sqrt(sum((my_data - drr.4$inverse(drr.4$fitted.data[, 1:i,
drop = FALSE]))^2))
}
More blocks for fastkrr speed up calculation, too are bad for accuracy.
## 1 2 3 4
## pca 7.166770 3.899313 1.884524 1.379328e-14
## drr.1 5.549355 3.427558 1.700925 1.357570e-14
## drr.2 5.508291 3.193501 1.638346 1.358499e-14
## drr.3 5.555269 3.485765 1.734136 1.356369e-14
## drr.4 5.631050 2.302263 1.473392 1.353796e-14
## user.self sys.self elapsed
## pca 0.004 0.000 0.001
## drr.1 25.580 12.392 20.916
## drr.2 21.864 11.116 19.129
## drr.3 32.384 15.672 26.720
## drr.4 24.992 7.656 22.929