RDFTensor gives a scalable implementation of RESCAL tensor factorization which includes parallelization of steps and compact representation of slices. The following is a demonstration applied to dataset UMLS which is represented in 135 x 135 x 49 Tensor.
tt=rescal(ntnsr$X,rnk=10,ainit='nvecs',verbose=1,lambdaA=0,epsilon=1e-4,lambdaR=0)
## [1] "Initializing A"
## [1] "Calculating eigen vectors..."
## [1] "initialize R and Z..."
## [1] "-----------------------------iteration: 1 ----------------------------------"
## [1] "[ 1] fit: 0.59644 | delta: 6.0e-01 | secs: 0.32000"
## [1] "-----------------------------iteration: 2 ----------------------------------"
## [1] "[ 2] fit: 0.63090 | delta: 3.4e-02 | secs: 0.28000"
## [1] "-----------------------------iteration: 3 ----------------------------------"
## [1] "[ 3] fit: 0.63930 | delta: 8.4e-03 | secs: 0.29000"
## [1] "-----------------------------iteration: 4 ----------------------------------"
## [1] "[ 4] fit: 0.64224 | delta: 2.9e-03 | secs: 0.30000"
## [1] "-----------------------------iteration: 5 ----------------------------------"
## [1] "[ 5] fit: 0.64349 | delta: 1.3e-03 | secs: 0.28000"
## [1] "-----------------------------iteration: 6 ----------------------------------"
## [1] "[ 6] fit: 0.64414 | delta: 6.4e-04 | secs: 0.27000"
## [1] "-----------------------------iteration: 7 ----------------------------------"
## [1] "[ 7] fit: 0.64452 | delta: 3.8e-04 | secs: 0.31000"
## [1] "-----------------------------iteration: 8 ----------------------------------"
## [1] "[ 8] fit: 0.64477 | delta: 2.5e-04 | secs: 0.30000"
## [1] "-----------------------------iteration: 9 ----------------------------------"
## [1] "[ 9] fit: 0.64495 | delta: 1.7e-04 | secs: 0.29000"
## [1] "-----------------------------iteration: 10 ----------------------------------"
## [1] "[ 10] fit: 0.64506 | delta: 1.2e-04 | secs: 0.30000"
## [1] "-----------------------------iteration: 11 ----------------------------------"
## [1] "[ 11] fit: 0.64514 | delta: 7.8e-05 | secs: 0.28000"
#tt=scRescal(ntnsr$X,rnk=10,ainit='nvecs',verbose=1,lambdaA=0,epsilon=1e-4,lambdaR=0,ncores = 2,OS_WIN = TRUE)
A=tt$A
R=tt$R
Use function rescal_Trp_Val to calculate scores of triples in the graph using the factorization obtained from previous step.
res=rescal_Trp_Val(R=R,A=A,ntnsr,verbose=0)
plot(density(res[,'val']),main='RESCAL Factorization rank=10, density of triples of UMLS')
print(summary(res[,'val']))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.01384 0.28338 0.73350 0.64514 0.98184 1.70056
RecRes=RescalReconstructBack(R=R,A=A,otnsr=ntnsr,ncore=2,verbose=0,OS_WIN=TRUE,generateLog=TRUE)
## [1] "NEnt=135, ChkLen=1000, cntChnks=1, cntGrp=1, grpLen=10"
Calculate Recall (True positive rate), Precision and Harmonic mean.
print(sprintf('True positive rate:%.2f %%',100*sum(RecRes$TP)/length(RecRes$TP)))
## [1] "True positive rate:77.19 %"
s=2#<affects> predicate
stats=NULL
ijk=RecRes[[1]]$ijk
val=RecRes[[1]]$val
tp_flg=RecRes$TP
for(thr in sort(unique(val[tp_flg&ijk[,2]==s]),decreasing=TRUE)){
tp=sum(tp_flg[val>=thr & ijk[,2]==s])
fp=sum(val>=thr & ijk[,2]==s)-tp
fn=sum(ntnsr$X[[s]])-tp
stats=rbind(stats,cbind(thr=thr,R=tp/(tp+fn),P=tp/(tp+fp),tp=tp,fn=fn,fp=fp))
}
HM=apply(stats,1,function(x){2/(1/x['P']+1/x['R'])})
plot(stats[,'thr'],stats[,'R']*100,type='l',col='red',lwd=2,
main=sprintf('Slice:%d, Predicate:<%s>, #Triples:%d, Max HM @ %.4f',s,ntnsr$P[s],sum(ntnsr$X[[s]]),
stats[which.max(HM),'thr']), ylab="",xlab='Threshold ',cex.main=0.85,
xlim=c(0,max(thr,1)),ylim=c(0,100))
abline(h = c(0,20,40,60,80,100), lty = 2, col = "grey")
abline(v = seq(0.1,1,0.1), lty = 2, col = "grey")
lines(stats[,'thr'],stats[,'P']*100,col='blue',lwd=2)
lines(stats[,'thr'],100*HM,col='green',lwd=2)
# grid(nx=10, lty = "dotted", lwd = 1)
legend(legend=c('Recall','Precision','Harmonic mean'),col=c('red','blue','green'),x=0.6,y=20,pch=1,cex=0.75,lwd=2)
abline(v=stats[which.max(HM),'thr'],col='grey')