library(prozor)
#library(reshape2)
rm(list=ls())
file = system.file("extdata/IDResults.txt.gz" , package = "prozor")
specMeta <- readr::read_tsv(file)
## Parsed with column specification:
## cols(
## RefSpectraId = col_integer(),
## numPeaks = col_integer(),
## peptideSeq = col_character(),
## precursorCharge = col_integer(),
## precursorMZ = col_double(),
## retentionTime = col_double(),
## copies = col_integer(),
## peptideModSeq = col_character(),
## score = col_double(),
## lengthPepSeq = col_integer(),
## fileName = col_integer(),
## SpecIDinFile = col_integer()
## )
nrow(specMeta)
## [1] 5000
hist(specMeta$score, breaks=100)
Annotate peptide sequences with protein sequences
length(unique(specMeta$peptideSeq))
## [1] 1520
upeptide <-unique(specMeta$peptideSeq)
resAll <- prozor::readPeptideFasta(system.file("extdata/Annotation_allSeq.fasta.gz" , package = "prozor"))
resRev <- prozor::readPeptideFasta(system.file("extdata/Annotation_canSeq.fasta.gz" , package = "prozor"))
annotAll <- prozor::annotatePeptides(upeptide,resAll)
## [1] 4953
pcAll <- length(unique(annotAll$peptideSeq))/ length(upeptide)
annotRev <- prozor::annotatePeptides(upeptide, resRev)
## [1] 1770
pcCan <-length(unique(annotRev$peptideSeq))/ length(upeptide)
barplot(c(Canonical = pcCan, All = pcAll))
par(mfrow=c(1,3))
plot(sort(table(annotAll$peptideSeq)),axes=F, ylab="Nr protein IDs")
axis(2)
PCProteotypic_all <- sum(table(annotAll$peptideSeq) == 1)/ length(table(annotAll$peptideSeq)) * 100
#plot(sort(table(annotIso$peptideSeq)),axes=F, ylab="Nr protein IDs")
#axis(2)
#PCProteotypic_iso <- sum(table(annotIso$peptideSeq) == 1)/ length(table(annotIso$peptideSeq)) * 100
plot(sort(table(annotRev$peptideSeq)),axes=F, ylab="Nr protein IDs")
axis(2)
PCProteotypic_canonical <- sum(table(annotRev$peptideSeq) == 1)/ length(table(annotRev$peptideSeq)) * 100
barplot(c(All = PCProteotypic_all, canonical = PCProteotypic_canonical),las=2, ylab="% proteotypic" )
library(Matrix)
precursors <- unique(subset(specMeta,select = c(peptideModSeq,precursorCharge,peptideSeq )))
Annotate the precursors with protein ID’s
annotatedPrecursors <-merge(precursors ,
subset(annotAll, select= c(peptideSeq,proteinID)),
by.x="peptideSeq",
by.y="peptideSeq")
annotatedPrecursors$precursorCharge <- annotatedPrecursors$precursorCharge
annotatedPrecursors$peptideModSeq <- annotatedPrecursors$peptideModSeq
head(annotatedPrecursors)
## peptideSeq peptideModSeq precursorCharge
## 1 AACAQLNDFLQEYGTQGCQV AAC[+57.0]AQLNDFLQEYGTQGC[+57.0]QV 3
## 2 AACAQLNDFLQEYGTQGCQV AAC[+57.0]AQLNDFLQEYGTQGC[+57.0]QV 3
## 3 AACAQLNDFLQEYGTQGCQV AAC[+57.0]AQLNDFLQEYGTQGC[+57.0]QV 3
## 4 AACAQLNDFLQEYGTQGCQV AAC[+57.0]AQLNDFLQEYGTQGC[+57.0]QV 3
## 5 AACAQLNDFLQEYGTQGCQV AAC[+57.0]AQLNDFLQEYGTQGC[+57.0]QV 3
## 6 AACAQLNDFLQEYGTQGCQV AAC[+57.0]AQLNDFLQEYGTQGC[+57.0]QV 3
## proteinID
## 1 tr|F5GXS0|F5GXS0_HUMAN
## 2 tr|A0A0G2JPR0|A0A0G2JPR0_HUMAN
## 3 tr|A0A0G2JPR0|A0A0G2JPR0_HUMAN
## 4 tr|A0A0G2JPR0|A0A0G2JPR0_HUMAN
## 5 tr|A0A140TA32|A0A140TA32_HUMAN
## 6 tr|A0A0G2JPR0|A0A0G2JPR0_HUMAN
xx<-prepareMatrix(annotatedPrecursors,proteinID = "proteinID", peptideID = "peptideSeq")
library(Matrix)
image(xx)
xxAll <- greedy(xx)
annotatedPrecursors <-merge(precursors , subset(annotRev, select= c(peptideSeq,proteinID)), by.x="peptideSeq",
by.y="peptideSeq")
annotatedPrecursors$precursorCharge <- annotatedPrecursors$precursorCharge
annotatedPrecursors$peptideModSeq <- annotatedPrecursors$peptideModSeq
xx<-prepareMatrix(annotatedPrecursors ,proteinID = "proteinID", peptideID = "peptideSeq")
image(xx)
xxCAN <- greedy(xx)
barplot(c(All = length(unique(unlist(xxAll))) , canonical = length(unique(unlist(xxCAN))) ))
Number of Proteins after protein inference.