RScelestial Vignette

Mohammad-Hadi Foroughmand-Araabi

2020-03-23

library(RScelestial)
# We load igraph for drawing trees. If you do not want to draw,
# there is no need to import igraph.
library(igraph)
#> 
#> Attaching package: 'igraph'
#> The following objects are masked from 'package:stats':
#> 
#>     decompose, spectrum
#> The following object is masked from 'package:base':
#> 
#>     union

Installing RScelestial

The RScelestial package could be installed easily as follows

install.packages("RScelestial")

Simulation

Here we show a simulation. We build a data set with following command.

# Following command generates ten samples with 20 loci. 
# Rate of mutations on each edge of the evolutionary tree is 1.5. 
D = synthesis(10, 20, 5, seed = 7)
D
#> $seqeunce
#>     C1 C2 C3 C4 C5 C6 C7 C8 C9 C10
#> L1   3  1  3  0  0  1  3  1  3   3
#> L2   1  1  3  0  0  0  3  3  0   3
#> L3   1  0  3  0  3  3  0  3  3   0
#> L4   1  3  3  3  3  3  0  1  0   3
#> L5   1  0  1  0  3  0  3  0  0   1
#> L6   0  3  3  3  3  3  3  3  3   3
#> L7   0  3  0  3  0  3  3  1  0   3
#> L8   3  3  0  1  1  3  3  3  3   1
#> L9   1  0  3  0  3  1  3  3  0   1
#> L10  3  3  0  0  0  1  3  1  0   0
#> L11  3  0  3  3  0  3  0  3  3   0
#> L12  1  0  0  3  3  0  0  0  3   1
#> L13  3  3  1  3  0  3  0  0  1   3
#> L14  0  3  3  3  3  3  3  3  0   3
#> L15  3  1  0  1  0  0  0  0  0   0
#> L16  3  0  0  0  3  3  0  0  1   0
#> L17  0  3  3  3  3  0  3  3  0   3
#> L18  3  3  3  3  3  3  1  3  3   3
#> L19  1  0  3  1  0  3  3  0  3   3
#> L20  0  0  3  0  3  3  3  3  0   0
#> 
#> $true.sequence
#>     C1 C2 C3 C4 C5 C6 C7 C8 C9 C10
#> L1   0  0  0  0  0  0  0  0  0   0
#> L2   0  0  0  0  0  0  0  0  0   0
#> L3   0  0  0  0  0  0  0  0  0   0
#> L4   0  0  0  0  0  0  0  0  0   0
#> L5   1  0  1  0  0  0  0  0  0   1
#> L6   0  0  0  0  0  0  0  0  0   0
#> L7   0  0  0  0  0  0  0  0  0   0
#> L8   0  0  0  0  0  0  0  0  1   0
#> L9   1  0  1  0  0  0  0  0  0   1
#> L10  0  0  0  0  0  0  0  0  0   0
#> L11  0  0  0  0  0  0  0  0  0   0
#> L12  0  0  0  0  0  0  0  0  0   0
#> L13  0  0  0  0  0  0  0  0  1   0
#> L14  0  0  0  0  0  0  0  0  1   0
#> L15  0  0  0  0  0  0  0  0  0   0
#> L16  0  0  0  0  0  0  0  0  1   0
#> L17  0  0  0  0  0  0  0  0  0   0
#> L18  0  0  0  0  0  0  0  0  0   0
#> L19  0  0  0  0  0  0  0  0  0   0
#> L20  0  0  0  0  0  0  0  0  0   0
#> 
#> $true.clone
#> $true.clone$N1
#> [1] "C6" "C7"
#> 
#> $true.clone$N2
#> [1] "C2" "C5" "C8"
#> 
#> $true.clone$N3
#> [1] "C4"
#> 
#> $true.clone$N5
#> [1] "C1"  "C3"  "C10"
#> 
#> $true.clone$N6
#> [1] "C9"
#> 
#> 
#> $true.tree
#>   src dest len
#> 1  N2   N1   1
#> 2  N3   N1   1
#> 3  N5   N1   1
#> 4  N6   N1   2

Inferring the phylogenetic tree

seq = as.ten.state.matrix(D$seqeunce)
SP = scelestial(seq, return.graph = TRUE)
SP
#> $input
#>      V1  V2  V3  V4  V5  V6  V7  V8  V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19
#> C1  ./. C/C C/C C/C C/C A/A A/A ./. C/C ./. ./. C/C ./. A/A ./. ./. A/A ./. C/C
#> C10 ./. ./. A/A ./. C/C ./. ./. C/C C/C A/A A/A C/C ./. ./. A/A A/A ./. ./. ./.
#> C2  C/C C/C A/A ./. A/A ./. ./. ./. A/A ./. A/A A/A ./. ./. C/C A/A ./. ./. A/A
#> C3  ./. ./. ./. ./. C/C ./. A/A A/A ./. A/A ./. A/A C/C ./. A/A A/A ./. ./. ./.
#> C4  A/A A/A A/A ./. A/A ./. ./. C/C A/A A/A ./. ./. ./. ./. C/C A/A ./. ./. C/C
#> C5  A/A A/A ./. ./. ./. ./. A/A C/C ./. A/A A/A ./. A/A ./. A/A ./. ./. ./. A/A
#> C6  C/C A/A ./. ./. A/A ./. ./. ./. C/C C/C ./. A/A ./. ./. A/A ./. A/A ./. ./.
#> C7  ./. ./. A/A A/A ./. ./. ./. ./. ./. ./. A/A A/A A/A ./. A/A A/A ./. C/C ./.
#> C8  C/C ./. ./. C/C A/A ./. C/C ./. ./. C/C ./. A/A A/A ./. A/A A/A ./. ./. A/A
#> C9  ./. A/A ./. A/A A/A ./. A/A ./. A/A A/A ./. ./. C/C A/A A/A C/C A/A ./. ./.
#>     V20
#> C1  A/A
#> C10 A/A
#> C2  A/A
#> C3  ./.
#> C4  A/A
#> C5  ./.
#> C6  ./.
#> C7  ./.
#> C8  ./.
#> C9  A/A
#> 
#> $sequence
#>      V1  V2  V3  V4  V5  V6  V7  V8  V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19
#> C1  A/A C/C C/C C/C C/C A/A A/A C/C C/C A/A A/A C/C A/A A/A A/A A/A A/A A/A C/C
#> C10 A/A A/A A/A A/A C/C A/A A/A C/C C/C A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A
#> C2  C/C C/C A/A C/C A/A A/A C/C C/C A/A C/C A/A A/A A/A A/A C/C A/A A/A C/C A/A
#> C3  A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A
#> C4  A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A C/C A/A C/C A/A A/A A/A C/C
#> C5  A/A A/A A/A A/A C/C A/A A/A C/C C/C A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A
#> C6  C/C A/A A/A C/C A/A A/A C/C A/A C/C C/C A/A A/A A/A A/A A/A A/A A/A A/A A/A
#> C7  A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A C/C A/A
#> C8  C/C A/A A/A C/C A/A A/A C/C A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A A/A A/A
#> C9  A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A C/C A/A A/A A/A
#>     V20
#> C1  A/A
#> C10 A/A
#> C2  A/A
#> C3  A/A
#> C4  A/A
#> C5  A/A
#> C6  A/A
#> C7  A/A
#> C8  A/A
#> C9  A/A
#> 
#> $tree
#>   src dest     len
#> 1  C6   C8 7.50022
#> 2  C2   C8 8.00019
#> 3  C5  C10 8.00021
#> 4  C4   C9 8.50017
#> 5  C1  C10 8.50018
#> 6  C2   C4 8.50018
#> 7  C5   C9 8.50019
#> 8  C2   C7 8.50021
#> 9  C3   C9 9.00020
#> 
#> $graph
#> IGRAPH 8d8b072 UNW- 10 9 -- 
#> + attr: name (v/c), weight (e/n)
#> + edges from 8d8b072 (vertex names):
#> [1] C6 --C8  C8 --C2  C5 --C10 C4 --C9  C10--C1  C2 --C4  C5 --C9  C2 --C7 
#> [9] C9 --C3

You can draw the graph with following command

tree.plot(SP, vertex.size = 30)

Also, we can make a rooted tree with cell “C8” as the root of the tree as follows:

SP = scelestial(seq, root.assign.method = "fix", root = "C8", return.graph = TRUE)
tree.plot(SP, vertex.size = 30)

Setting root.assign.method to “balance” lets the algorithm decide for a root that produces minimum height tree.

SP = scelestial(seq, root.assign.method = "balance", return.graph = TRUE)
tree.plot(SP, vertex.size = 30)

Evaluating results

Following command calculates the distance array between pairs of samples.

D.distance.matrix <- distance.matrix.true.tree(D)
D.distance.matrix
#>              C6          C7          C2          C5          C8          C4
#> C6  0.000000000 0.000000000 0.007246377 0.007246377 0.007246377 0.007246377
#> C7  0.000000000 0.000000000 0.007246377 0.007246377 0.007246377 0.007246377
#> C2  0.007246377 0.007246377 0.000000000 0.000000000 0.000000000 0.014492754
#> C5  0.007246377 0.007246377 0.000000000 0.000000000 0.000000000 0.014492754
#> C8  0.007246377 0.007246377 0.000000000 0.000000000 0.000000000 0.014492754
#> C4  0.007246377 0.007246377 0.014492754 0.014492754 0.014492754 0.000000000
#> C1  0.007246377 0.007246377 0.014492754 0.014492754 0.014492754 0.014492754
#> C3  0.007246377 0.007246377 0.014492754 0.014492754 0.014492754 0.014492754
#> C10 0.007246377 0.007246377 0.014492754 0.014492754 0.014492754 0.014492754
#> C9  0.014492754 0.014492754 0.021739130 0.021739130 0.021739130 0.021739130
#>              C1          C3         C10         C9
#> C6  0.007246377 0.007246377 0.007246377 0.01449275
#> C7  0.007246377 0.007246377 0.007246377 0.01449275
#> C2  0.014492754 0.014492754 0.014492754 0.02173913
#> C5  0.014492754 0.014492754 0.014492754 0.02173913
#> C8  0.014492754 0.014492754 0.014492754 0.02173913
#> C4  0.014492754 0.014492754 0.014492754 0.02173913
#> C1  0.000000000 0.000000000 0.000000000 0.02173913
#> C3  0.000000000 0.000000000 0.000000000 0.02173913
#> C10 0.000000000 0.000000000 0.000000000 0.02173913
#> C9  0.021739130 0.021739130 0.021739130 0.00000000
SP.distance.matrix <- distance.matrix.scelestial(SP)
SP.distance.matrix
#>              C1         C10          C2          C3          C4          C5
#> C1  0.000000000 0.003687630 0.018221247 0.014750545 0.014533617 0.007158358
#> C10 0.003687630 0.000000000 0.014533617 0.011062915 0.010845987 0.003470728
#> C2  0.018221247 0.014533617 0.000000000 0.011279808 0.003687630 0.011062889
#> C3  0.014750545 0.011062915 0.011279808 0.000000000 0.007592178 0.007592187
#> C4  0.014533617 0.010845987 0.003687630 0.007592178 0.000000000 0.007375259
#> C5  0.007158358 0.003470728 0.011062889 0.007592187 0.007375259 0.000000000
#> C6  0.024945783 0.021258154 0.006724537 0.018004345 0.010412166 0.017787426
#> C7  0.021908889 0.018221260 0.003687643 0.014967451 0.007375272 0.014750532
#> C8  0.021691966 0.018004336 0.003470719 0.014750527 0.007158349 0.014533608
#> C9  0.010845992 0.007158362 0.007375255 0.003904553 0.003687625 0.003687634
#>              C6          C7          C8          C9
#> C1  0.024945783 0.021908889 0.021691966 0.010845992
#> C10 0.021258154 0.018221260 0.018004336 0.007158362
#> C2  0.006724537 0.003687643 0.003470719 0.007375255
#> C3  0.018004345 0.014967451 0.014750527 0.003904553
#> C4  0.010412166 0.007375272 0.007158349 0.003687625
#> C5  0.017787426 0.014750532 0.014533608 0.003687634
#> C6  0.000000000 0.010412179 0.003253817 0.014099792
#> C7  0.010412179 0.000000000 0.007158362 0.011062898
#> C8  0.003253817 0.007158362 0.000000000 0.010845974
#> C9  0.014099792 0.011062898 0.010845974 0.000000000
## Difference between normalized distance matrices
vertices <- rownames(SP.distance.matrix)
sum(abs(D.distance.matrix[vertices,vertices] - SP.distance.matrix))
#> [1] 0.7237071