This vignette demonstrates the usage of the FDR controlling procedure of T. Bodnar and T. Dickhaus (2014). The paper is available at https://projecteuclid.org/euclid.ejs/1414588192. All following references to equations and theorems refer to this paper.
ac_fdr.test
Consider, for example, p-values \(P_i\) given by \[ \begin{aligned} P_i&=\begin{cases} U_i&\text{for }i=1,\cdots,m_0\\ \Phi\left(\mu_i+\Phi^{-1}(U_i)\right)&\text{for }i=m_0+1,\cdots,m \end{cases} \end{aligned} \] where \(U=(U_1,\cdots,U_m)^T\) is a vector of marginally uniformly distributed random variables following a Clayton copula with parameter \(\eta=1\). These can be generated as follows:
library(copula)
set.seed(1)
m <- 20
m0 <- 0.8*m
p_values <- rCopula(1,onacopulaL(copClayton,list(1,1:20)))
mu<-runif(m-m0, min=-1, max=-1/2)
p_values[1,(m0+1):m]<-pnorm(sqrt(m)*mu+qnorm(p_values[(m0+1):m]),0,1)
The FDR controlled simultaneous test of the hypotheses \(H_i:\mu_i=0\) versus the alternatives \(K_i:\mu_i<0\) can then be performed in terms of these p-values:
ac_fdr.test(p_values,setTheta(copClayton,1),m0,0.05,1e4)$test
#> [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#> [12] FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE
For a discussion of the methods than can be utilised for empirical copula calibration (i.e. estimating the copula parameter from observations) see section 5 of T. Bodnar and T. Dickhaus (2014).
The distribution function at \(z\) can be evaluated by
cdf.Z(copula::setTheta(cop,theta),z)
where cop
is a copula object (i.e. copula::copClayton
). Thus for the simulation it is only necessary to specify the calculation of \(z^*\). For the Clayton copula this is given by
z_star_Clayton <- function(m,q,eta) {
log(m) / ((m/q)^eta - (1/q)^eta)
}
Note that, to be compatible with the copula-package, this example uses a generator that differs from the one used in the paper (cf. page 2218). The generator used for the Clayton copula is given by \(\psi(x)=(1+x)^{-1/\eta}\) which leads to qualitatively similar results to those obtained in the paper.
For the Gumbel copula \(z^*\) can be calculated by
z_star_Gumbel <- function(m,q,eta) {
log(m) / ((-log(q/m))^eta-(-log(q))^eta)
}
Note: Since z_star_Gumbel
is very small for larger theta
(for example z_star_Gumbel(200,0.05,15)
is of the same magnitude as \(10^{-14}\)) a Monte Carlo approximation to the distribution function is utilized for larger theta. In the paper this was done for all theta
.
To see the full code for generating the figures execute the following code:
v <- vignette('fdr-test',package = 'MHTcop')
file.edit(paste(v$Dir,'doc',v$R,sep='/'))
The upper bound is calculated according to the formula given in corollary 3.1. The adjustement factor \(\delta\) is calculated using the function ac_fdr.calc_delta
:
calcBounds <- function(cop) {
upperBound <- (m0/m)*alpha
cat("Calculating upper bound for the",cop@name,"copula ( m =",m,")\n")
delta <- pbsapply(theta,function(theta)
ac_fdr.calc_delta(copula::setTheta(cop,theta),m,m0,
alpha=alpha,num.reps=NZ,calc.var=TRUE))
sharperUpperBound <- upperBound * delta[1,]
sharperUpperBound.var <- upperBound^2 * delta[2,]
delta <- delta[1,]
lowerBound <- sapply(theta,function(theta){alpha*(m0/m)*
(1+pgamma(log(m)/(m^(theta)-1),shape=1/(theta),scale=1)-
pgamma((log(m)*(m^(theta)))/(m^(theta)-1),shape=1/(theta),scale=1))})
list(upperBound=upperBound,sharperUpperBound=sharperUpperBound,
sharperUpperBound.var=sharperUpperBound.var,
delta=delta,lowerBound=lowerBound)
}
Samples from model (16) for use in a Monte Carlo study of the FDR using the delta calculated previously by calc_bounds
:
simFDR <- function(cop,delta) {
cat("Performing simulation for the",cop@name,"copula ( m =",m,")\n")
Sim <- do.call(cbind,pblapply(seq_along(theta),function(l) {
p_values<-matrix(0,3,m)
q_k<-(alpha/m)*seq(1,m,1)
p_values_data <- copula::rCopula(NZ,copula::onacopulaL(cop,list(theta[l],1:m)))
data_f<-function(s)
{
p_values[1,]<-p_values_data[s,]
p_values[2,1:m]<-0
mu<-runif(m-m0, min=-1, max=-1/2)
p_values[1,(m0+1):m]<-pnorm(sqrt(m)*mu+qnorm(p_values_data[s,(m0+1):m]),0,1)
p_values[2,(m0+1):m]<-1
sp_values<-matrix(0,3,m)
sp_values<-p_values[,order(p_values[1,])]
sp_values[3,]<-seq(1,m,1)
#Calculations for the FDR
R_m<-0
V_m<-0
if (sum(sp_values[1,]<q_k)>0)
{
R_m<-sum(max(sp_values[3,sp_values[1,]<q_k]))
V_m<-R_m-sum(sp_values[2,1:R_m])
}
#Calculations for the power
S_m<-0
if (sum(sp_values[1,]<q_k)>0)
{
S_m<-sum(sp_values[2,1:sum(max(sp_values[3,sp_values[1,]<q_k]))])
}
#Calculations for the power of the improved procedure
S_m.improved<-0
if (sum(sp_values[1,]<q_k/delta[l])>0)
{
S_m.improved<-sum(sp_values[2,1:sum(max(sp_values[3,sp_values[1,]<q_k/delta[l]]))])
}
return(c(V_m/max(1,R_m),S_m/(m-m0),S_m.improved/(m-m0)))
}
data_in<-sapply(1:NZ,data_f)
FDR.sd <- sd(data_in[1,])
data_in<-rowMeans(data_in)
data_in[4]<-FDR.sd
return(data_in)
}))
list(FDR=Sim[1,],empiricalPower=Sim[2,],empiricalPowerImproved=Sim[3,],
FDR.sd=Sim[4,])
}
#> Generating Figure 2 a )
#> Generating Figure 3 a )
#> Generating Figure 4 a )
#> Generating Figure 6 a )
#> Generating Figure 7 a )
#> Generating Figure 8 a )