EXAMPLE#1 Simple Simulation & ordering inference

In the first step, we generate a simple dataset. where C1 and C2 are dominated by C3, C3 is dominated by C4, and is C4 dominated by C5. There is no dominant-distribution relation between C1 and C2.

# Simulation section
nInv<-100
initMean=10
stepMean=20
std=8
simData1<-c()
simData1$Values<-rnorm(nInv,mean=initMean,sd=std)
simData1$Group<-rep(c("C1"),times=nInv)
simData1$Values<-c(simData1$Values,rnorm(nInv,mean=initMean,sd=std) )
simData1$Group<-c(simData1$Group,rep(c("C2"),times=nInv))
simData1$Values<-c(simData1$Values,rnorm(nInv,mean=initMean+2*stepMean,sd=std) )
simData1$Group<-c(simData1$Group,rep(c("C3"),times=nInv) )
simData1$Values<-c(simData1$Values,rnorm(nInv,mean=initMean+3*stepMean,sd=std) )
simData1$Group<-c(simData1$Group, rep(c("C4"),times=nInv) )
simData1$Values<-c(simData1$Values,rnorm(nInv,mean=initMean+4*stepMean,sd=std) )
simData1$Group<-c(simData1$Group, rep(c("C5"),times=nInv) )

The framework is used to analyze the data below.

# Simple ordering inference section
library(EDOIF)
## Loading required package: boot
# parameter setting
bootT=1000 # Number of times of sampling with replacement
alpha=0.05 # significance  significance level

#======= input
Values=simData1$Values
Group=simData1$Group
#=============
A1<-EDOIF(Values,Group,bootT = bootT, alpha=alpha )

We print the result of our framework below.

print(A1) # print results in text
## EDOIF (Empirical Distribution Ordering Inference Framework)
## =======================================================
## Alpha = 0.050000, Number of bootstrap resamples = 1000, CI type = perc
## Using Mann-Whitney test to report whether A <U+227A> B
## A dominant-distribution network density:0.900000
## Distribution: C2
## Mean:10.770026 95CI:[ 9.422309,12.269945]
## Distribution: C1
## Mean:10.780630 95CI:[ 9.262698,12.373455]
## Distribution: C3
## Mean:49.525450 95CI:[ 48.009576,51.025717]
## Distribution: C4
## Mean:69.691016 95CI:[ 68.416535,71.056129]
## Distribution: C5
## Mean:91.287469 95CI:[ 89.784341,92.858969]
## =======================================================
## Mean difference of C1 (n=100) minus C2 (n=100): C2 <U+2280> C1
##  :p-val 0.6316
## Mean Diff:0.010604 95CI:[ -2.161112,2.440035]
## 
## Mean difference of C3 (n=100) minus C2 (n=100): C2 <U+227A> C3
##  :p-val 0.0000
## Mean Diff:38.755424 95CI:[ 36.624740,40.733720]
## 
## Mean difference of C4 (n=100) minus C2 (n=100): C2 <U+227A> C4
##  :p-val 0.0000
## Mean Diff:58.920990 95CI:[ 56.990770,61.107980]
## 
## Mean difference of C5 (n=100) minus C2 (n=100): C2 <U+227A> C5
##  :p-val 0.0000
## Mean Diff:80.517443 95CI:[ 78.327360,82.548819]
## 
## Mean difference of C3 (n=100) minus C1 (n=100): C1 <U+227A> C3
##  :p-val 0.0000
## Mean Diff:38.744820 95CI:[ 36.682312,41.031023]
## 
## Mean difference of C4 (n=100) minus C1 (n=100): C1 <U+227A> C4
##  :p-val 0.0000
## Mean Diff:58.910386 95CI:[ 56.604560,60.945960]
## 
## Mean difference of C5 (n=100) minus C1 (n=100): C1 <U+227A> C5
##  :p-val 0.0000
## Mean Diff:80.506839 95CI:[ 78.249598,82.765810]
## 
## Mean difference of C4 (n=100) minus C3 (n=100): C3 <U+227A> C4
##  :p-val 0.0000
## Mean Diff:20.165566 95CI:[ 18.131087,22.415732]
## 
## Mean difference of C5 (n=100) minus C3 (n=100): C3 <U+227A> C5
##  :p-val 0.0000
## Mean Diff:41.762019 95CI:[ 39.623135,43.933736]
## 
## Mean difference of C5 (n=100) minus C4 (n=100): C4 <U+227A> C5
##  :p-val 0.0000
## Mean Diff:21.596453 95CI:[ 19.405200,23.727561]

The first plot is the plot of mean-difference confidence intervals

plot(A1,options =1)

plot of chunk Fig1

The second plot is the plot of mean-difference confidence intervals

plot(A1,options =2)

plot of chunk Fig2 The third plot is a dominant-distribution network.

out<-plot(A1,options =3)

plot of chunk Fig3

EXAMPLE#2 Non-normal-Distribution Simulation & ordering inference

We generate more complicated dataset of mixture distributions. C1, C2, C3, and C4 are dominated by C5. There is no dominant-distribution relation among C1, C2, C3, and C4.

library(EDOIF)
# parameter setting
bootT=1000
alpha=0.05
nInv<-1200

start_time <- Sys.time()
#======= input
simData3<-SimNonNormalDist(nInv=nInv,noisePer=0.01)
Values=simData3$Values
Group=simData3$Group
#=============
A3<-EDOIF(Values,Group, bootT=bootT, alpha=alpha, methodType ="perc")
A3
## EDOIF (Empirical Distribution Ordering Inference Framework)
## =======================================================
## Alpha = 0.050000, Number of bootstrap resamples = 1000, CI type = perc
## Using Mann-Whitney test to report whether A <U+227A> B
## A dominant-distribution network density:0.400000
## Distribution: C4
## Mean:81.416611 95CI:[ 78.717777,84.646122]
## Distribution: C2
## Mean:81.556686 95CI:[ 79.529982,83.370917]
## Distribution: C3
## Mean:83.018782 95CI:[ 81.267405,85.059096]
## Distribution: C1
## Mean:83.370651 95CI:[ 80.161340,88.146471]
## Distribution: C5
## Mean:163.180441 95CI:[ 135.170080,232.426772]
## =======================================================
## Mean difference of C2 (n=1200) minus C4 (n=1200): C4 <U+2280> C2
##  :p-val 0.5551
## Mean Diff:0.140075 95CI:[ -3.624683,3.384291]
## 
## Mean difference of C3 (n=1200) minus C4 (n=1200): C4 <U+2280> C3
##  :p-val 0.2304
## Mean Diff:1.602171 95CI:[ -2.014766,5.021584]
## 
## Mean difference of C1 (n=1200) minus C4 (n=1200): C4 <U+2280> C1
##  :p-val 0.0845
## Mean Diff:1.954040 95CI:[ -2.633002,7.128952]
## 
## Mean difference of C5 (n=1200) minus C4 (n=1200): C4 <U+227A> C5
##  :p-val 0.0000
## Mean Diff:81.763829 95CI:[ 52.999783,134.207754]
## 
## Mean difference of C3 (n=1200) minus C2 (n=1200): C2 <U+2280> C3
##  :p-val 0.1971
## Mean Diff:1.462096 95CI:[ -1.256056,4.312469]
## 
## Mean difference of C1 (n=1200) minus C2 (n=1200): C2 <U+2280> C1
##  :p-val 0.0759
## Mean Diff:1.813965 95CI:[ -2.027906,6.500924]
## 
## Mean difference of C5 (n=1200) minus C2 (n=1200): C2 <U+227A> C5
##  :p-val 0.0000
## Mean Diff:81.623755 95CI:[ 53.387017,135.040524]
## 
## Mean difference of C1 (n=1200) minus C3 (n=1200): C3 <U+2280> C1
##  :p-val 0.2812
## Mean Diff:0.351869 95CI:[ -3.282784,5.065711]
## 
## Mean difference of C5 (n=1200) minus C3 (n=1200): C3 <U+227A> C5
##  :p-val 0.0000
## Mean Diff:80.161658 95CI:[ 51.780346,132.429363]
## 
## Mean difference of C5 (n=1200) minus C1 (n=1200): C1 <U+227A> C5
##  :p-val 0.0000
## Mean Diff:79.809790 95CI:[ 50.547924,131.976584]
plot(A3)

plot of chunk Fig4plot of chunk Fig4plot of chunk Fig4

end_time <- Sys.time()
end_time - start_time
## Time difference of 8.139147 secs

Uniform noise

Generating \(A\) dominates \(B\) with different degrees of uniform noise

library(ggplot2)

nInv<-1000
simData3<-SimNonNormalDist(nInv=nInv,noisePer=0.01)
#plot(density(simData3$V3))

dat <- data.frame(dens = c(simData3$V3, simData3$V5)
                   , lines = rep(c("B", "A"), each = nInv))
#Plot.
p1<-ggplot(dat, aes(x = dens, fill = lines)) + geom_density(alpha = 0.5) +xlim(-400, 400)+ ylim(0, 0.07) + ylab("Density [0,1]") +xlab("Values") + theme( axis.text.x = element_text(face="bold",  
                                      size=12) )
theme_update(text = element_text(face="bold", size=12)  )
p1$labels$fill<-"Categories"
plot(p1)
## Warning: Removed 3 rows containing non-finite values (stat_density).

plot of chunk Fig5