NACHO (NAnostring quality Control dasHbOard) is developed for NanoString nCounter data.
NanoString nCounter data is a messenger-RNA/micro-RNA (mRNA/miRNA) expression assay and works with fluorescent barcodes.
Each barcode is assigned a mRNA/miRNA, which can be counted after bonding with its target.
As a result each count of a specific barcode represents the presence of its target mRNA/miRNA.
NACHO is able to load, visualise and normalise the exported NanoString nCounter data and facilitates the user in performing a quality control.
NACHO does this by visualising quality control metrics, expression of control genes, principal components and sample specific size factors in an interactive web application.
With the use of two functions, RCC files are summarised and visualised, namely: load_rcc()
and visualise()
.
load_rcc()
function is used to preprocess the data.visualise()
function initiates a Shiny-based dashboard that visualises all relevant QC plots.NACHO also includes a function normalise()
, which (re)calculates sample specific size factors and normalises the data.
normalise()
function creates a list in which your settings, the raw counts and normalised counts are stored.In addition (since v0.6.0) NACHO includes two (three) additional functions:
render()
function renders a full quality-control report (HTML) based on the results of a call to load_rcc()
or normalise()
(using print()
in a Rmarkdown chunk).autoplot()
function draws any quality-control metrics from visualise()
and render()
.For more vignette("NACHO")
and vignette("NACHO-analysis")
.
Canouil M, Bouland GA, Bonnefond A, Froguel P, Hart L, Slieker R (2019). “NACHO: an R package for quality control of NanoString nCounter data.” Bioinformatics. ISSN 1367-4803, doi: 10.1093/bioinformatics/btz647.
@Article{,
title = {{NACHO}: an {R} package for quality control of {NanoString} {nCounter} data},
author = {Mickaël Canouil and Gerard A. Bouland and Amélie Bonnefond and Philippe Froguel and Leen Hart and Roderick Slieker},
journal = {Bioinformatics},
address = {Oxford, England},
year = {2019},
month = {aug},
issn = {1367-4803},
doi = {10.1093/bioinformatics/btz647},
}
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(tibble)
library(NACHO)
## ── Conflicts ───────────────────────────────────────────────────────────────────── nacho_conflicts() ──
## x NACHO::summarize() masks dplyr::summarize()
##
## Attaching package: 'NACHO'
## The following object is masked from 'package:dplyr':
##
## summarize
library(GEOquery)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:NACHO':
##
## normalize
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## Filter, Find, Map, Position, Reduce, anyDuplicated, append,
## as.data.frame, basename, cbind, colnames, dirname, do.call,
## duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
## lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
GSE70970
from GEO (or use your own data)data_directory <- file.path(tempdir(), "GSE70970", "Data")
# Download data
gse <- getGEO("GSE70970")
## Found 1 file(s)
## GSE70970_series_matrix.txt.gz
## Parsed with column specification:
## cols(
## .default = col_double(),
## ID_REF = col_character()
## )
## See spec(...) for full column specifications.
## File stored at:
## /tmp/RtmpVNOmbj/GPL20699.soft
# Get phenotypes
targets <- pData(phenoData(gse[[1]]))
getGEOSuppFiles(GEO = "GSE70970", baseDir = tempdir())
## size isdir
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_RAW.tar 1986560 FALSE
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_characteristics_readme.txt.gz 672 FALSE
## mode
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_RAW.tar 644
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_characteristics_readme.txt.gz 644
## mtime
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_RAW.tar 2020-05-26 17:20:54
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_characteristics_readme.txt.gz 2020-05-26 17:20:55
## ctime
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_RAW.tar 2020-05-26 17:20:54
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_characteristics_readme.txt.gz 2020-05-26 17:20:55
## atime
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_RAW.tar 2020-05-26 17:20:53
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_characteristics_readme.txt.gz 2020-05-26 17:20:54
## uid gid
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_RAW.tar 1738 50
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_characteristics_readme.txt.gz 1738 50
## uname grname
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_RAW.tar mcanouil staff
## /tmp/RtmpVNOmbj/GSE70970/GSE70970_characteristics_readme.txt.gz mcanouil staff
# Unzip data
untar(
tarfile = file.path(tempdir(), "GSE70970", "GSE70970_RAW.tar"),
exdir = data_directory
)
# Add IDs
targets$IDFILE <- list.files(data_directory)
GSE70970 <- load_rcc(data_directory, targets, id_colname = "IDFILE")
## [NACHO] Importing RCC files.
## [NACHO] Performing QC and formatting data.
## [NACHO] Computing normalisation factors using "GEO" method.
## [NACHO] Missing values have been replaced with zeros for PCA.
## [NACHO] Normalising data using "GEO" method with housekeeping genes.
## [NACHO] Returning a list.
## $ access : character
## $ housekeeping_genes : character
## $ housekeeping_predict: logical
## $ housekeeping_norm : logical
## $ normalisation_method: character
## $ remove_outliers : logical
## $ n_comp : numeric
## $ data_directory : character
## $ pc_sum : data.frame
## $ nacho : data.frame
## $ outliers_thresholds : list
limma
library(limma)
##
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
selected_pheno <- GSE70970[["nacho"]] %>%
select(IDFILE, `age:ch1`, `gender:ch1`, `chemo:ch1`, `disease.event:ch1`) %>%
distinct() %>%
mutate_all(~ na_if(.x, "NA")) %>%
drop_na()
## IDFILE age:ch1 gender:ch1 chemo:ch1
## 1 GSM1824143_NPC-T-1.RCC.gz 45.97260274 Male 0
## 2 GSM1824144_NPC-T-10.RCC.gz 46.4 Male 1
## 3 GSM1824145_NPC-T-100.RCC.gz 50.36438356 Male 0
## 4 GSM1824146_NPC-T-101.RCC.gz 64.09041096 Female 1
## 5 GSM1824147_NPC-T-102.RCC.gz 27.57808219 Male 1
## 6 GSM1824148_NPC-T-103.RCC.gz 67.01369863 Male 1
## disease.event:ch1
## 1 1
## 2 1
## 3 0
## 4 0
## 5 1
## 6 0
expr_counts <- GSE70970[["nacho"]] %>%
filter(grepl("Endogenous", CodeClass)) %>%
select(IDFILE, Name, Count_Norm) %>%
pivot_wider(names_from = "Name", values_from = "Count_Norm") %>%
column_to_rownames("IDFILE") %>%
t()
## GSM1824143_NPC-T-1.RCC.gz GSM1824144_NPC-T-10.RCC.gz
## hsa-miR-758 5 3
## hsa-miR-1296 0 0
## hsa-miR-548e 1 0
## hsa-miR-874 5 20
## hsa-miR-106b 864 725
## GSM1824145_NPC-T-100.RCC.gz GSM1824146_NPC-T-101.RCC.gz
## hsa-miR-758 13 1
## hsa-miR-1296 0 0
## hsa-miR-548e 0 1
## hsa-miR-874 31 3
## hsa-miR-106b 889 875
## GSM1824147_NPC-T-102.RCC.gz
## hsa-miR-758 0
## hsa-miR-1296 0
## hsa-miR-548e 0
## hsa-miR-874 98
## hsa-miR-106b 5815
Alternatively, "Accession"
number is also available.
samples_kept <- intersect(selected_pheno[["IDFILE"]], colnames(expr_counts))
expr_counts <- expr_counts[, samples_kept]
selected_pheno <- filter(selected_pheno, IDFILE %in% !!samples_kept)
limma
eBayes(lmFit(expr_counts, design))
## An object of class "MArrayLM"
## $coefficients
## (Intercept) `disease.event:ch1`1
## hsa-miR-758 49.886792 27.779874
## hsa-miR-1296 6.245283 4.559315
## hsa-miR-548e 16.553459 5.998265
## hsa-miR-874 62.176101 53.030796
## hsa-miR-106b 1687.037736 670.341574
## 767 more rows ...
##
## $stdev.unscaled
## (Intercept) `disease.event:ch1`1
## hsa-miR-758 0.07930516 0.133355
## hsa-miR-1296 0.07930516 0.133355
## hsa-miR-548e 0.07930516 0.133355
## hsa-miR-874 0.07930516 0.133355
## hsa-miR-106b 0.07930516 0.133355
## 767 more rows ...
##
## $sigma
## [1] 110.26465 22.88524 40.41439 142.35578 2725.78256
## 767 more elements ...
##
## $df.residual
## [1] 244 244 244 244 244
## 767 more elements ...
##
## $cov.coefficients
## (Intercept) `disease.event:ch1`1
## (Intercept) 0.006289308 -0.006289308
## `disease.event:ch1`1 -0.006289308 0.017783561
##
## $pivot
## [1] 1 2
##
## $rank
## [1] 2
##
## $Amean
## hsa-miR-758 hsa-miR-1296 hsa-miR-548e hsa-miR-874 hsa-miR-106b
## 59.711382 7.857724 18.674797 80.930894 1924.109756
## 767 more elements ...
##
## $method
## [1] "ls"
##
## $design
## (Intercept) `disease.event:ch1`1
## 1 1 1
## 2 1 1
## 3 1 0
## 4 1 0
## 5 1 1
## 241 more rows ...
##
## $df.prior
## [1] 0.4830213
##
## $s2.prior
## [1] 1259.044
##
## $var.prior
## [1] 0.01270806 0.01270806
##
## $proportion
## [1] 0.01
##
## $s2.post
## [1] 12136.7587 525.1868 1632.5835 20227.6193 7415213.9426
## 767 more elements ...
##
## $t
## (Intercept) `disease.event:ch1`1
## hsa-miR-758 5.709956 1.890904
## hsa-miR-1296 3.436321 1.491878
## hsa-miR-548e 5.165943 1.113214
## hsa-miR-874 5.512513 2.796059
## hsa-miR-106b 7.811987 1.845971
## 767 more rows ...
##
## $df.total
## [1] 244.483 244.483 244.483 244.483 244.483
## 767 more elements ...
##
## $p.value
## (Intercept) `disease.event:ch1`1
## hsa-miR-758 3.264532e-08 0.059819484
## hsa-miR-1296 6.927738e-04 0.137020345
## hsa-miR-548e 4.965572e-07 0.266709928
## hsa-miR-874 8.969456e-08 0.005584017
## hsa-miR-106b 1.660223e-13 0.066105273
## 767 more rows ...
##
## $lods
## (Intercept) `disease.event:ch1`1
## hsa-miR-758 4.914698 -4.125133
## hsa-miR-1296 -1.305369 -4.402338
## hsa-miR-548e 3.210854 -4.606447
## hsa-miR-874 4.282436 -3.269257
## hsa-miR-106b 12.457684 -4.159485
## 767 more rows ...
##
## $F
## [1] 37.92181 15.57340 26.89455 43.73702 63.11409
## 767 more elements ...
##
## $F.p.value
## [1] 4.525315e-15 4.304949e-07 2.771664e-11 5.784522e-17 7.949203e-23
## 767 more elements ...
lm
(or any other model) in a tidy-waylibrary(purrr)
res <- GSE70970[["nacho"]] %>%
# filter( QC PARAMETER) %>% # possible additional QC filters
filter(grepl("Endogenous", CodeClass)) %>%
select(IDFILE, Name, Accession, Count, Count_Norm, `age:ch1`, `gender:ch1`, `chemo:ch1`, `disease.event:ch1`) %>%
mutate_all(~ na_if(.x, "NA")) %>%
drop_na() %>%
group_by(Name, Accession) %>%
nest() %>%
ungroup() %>%
slice(1:10) %>% # the ten first genes
mutate(
lm = map(.x = data, .f = function(idata) {# lm or whatever model you want
as.data.frame(summary(lm(formula = Count_Norm ~ `disease.event:ch1`, idata))$coef)
})
) %>%
select(-data)
unnest(res, lm)
## # A tibble: 20 x 6
## Name Accession Estimate `Std. Error` `t value` `Pr(>|t|)`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 hsa-miR-758 MIMAT0003879 6.29 1.30 4.86 3.56e- 6
## 2 hsa-miR-758 MIMAT0003879 -0.573 2.32 -0.247 8.05e- 1
## 3 hsa-miR-1296 MIMAT0005794 1.01 0.311 3.26 1.46e- 3
## 4 hsa-miR-1296 MIMAT0005794 0.194 0.556 0.348 7.29e- 1
## 5 hsa-miR-548e MIMAT0005874 3.84 1.14 3.36 1.04e- 3
## 6 hsa-miR-548e MIMAT0005874 -0.965 2.04 -0.472 6.38e- 1
## 7 hsa-miR-874 MIMAT0004911 19.4 2.85 6.82 3.67e-10
## 8 hsa-miR-874 MIMAT0004911 10.1 5.10 1.97 5.09e- 2
## 9 hsa-miR-106b MIMAT0000680 1313. 125. 10.5 6.87e-19
## 10 hsa-miR-106b MIMAT0000680 305. 223. 1.37 1.74e- 1
## 11 hsa-miR-1825 MIMAT0006765 14. 2.46 5.68 9.10e- 8
## 12 hsa-miR-1825 MIMAT0006765 1.44 4.41 0.326 7.45e- 1
## 13 hsa-miR-133a MIMAT0000427 17.2 6.29 2.74 7.09e- 3
## 14 hsa-miR-133a MIMAT0000427 8.32 11.3 0.739 4.61e- 1
## 15 hsa-miR-203 MIMAT0000264 281. 117. 2.40 1.77e- 2
## 16 hsa-miR-203 MIMAT0000264 362. 210. 1.73 8.63e- 2
## 17 hsa-miR-222 MIMAT0000279 1650. 184. 8.98 3.96e-15
## 18 hsa-miR-222 MIMAT0000279 -159. 329. -0.483 6.30e- 1
## 19 hsa-miR-1973 MIMAT0009448 132. 22.2 5.93 2.88e- 8
## 20 hsa-miR-1973 MIMAT0009448 103. 39.8 2.58 1.11e- 2