CRAN packages for generalized linear models and with related methods

Ioannis Kosmidis

2019-10-08

Packages for glm’s and associated methods

The current vignette is a quick analysis of all CRAN packages that have “glm” in their name. The crude assumption we make here is that a package with “glm” in its name does something related to generalized linear models.

Preparing today’s CRAN package database

Download today’s CRAN database and clean and organize author names, depends, imports, suggests, enhances.

library("cranly")
p_db <- tools::CRAN_package_db()
package_db <- clean_CRAN_db(p_db)

Let’s build the CRAN package directives and collaboration networks

package_network <- build_network(package_db)
author_network <- build_network(package_db, perspective = "author")

Packages for generalized linear models

The packages that have “glm” in their name are

(glm_packages <- package_with(package_network, name = "glm"))
#>  [1] "AutoStepwiseGLM" "CPMCGLM"         "CompGLM"        
#>  [4] "DGLMExtPois"     "EBglmnet"        "GLMMRR"         
#>  [7] "GLMMadaptive"    "GLMaSPU"         "GLMpack"        
#> [10] "GLMsData"        "GlmSimulatoR"    "HBglm"          
#> [13] "HDGLM"           "HiCglmi"         "MCMCglmm"       
#> [16] "MGLM"            "QGglmm"          "RPEGLMEN"       
#> [19] "StroupGLMM"      "bestglm"         "biglm"          
#> [22] "biglmm"          "brglm"           "brglm2"         
#> [25] "cglm"            "circglmbayes"    "designGLMM"     
#> [28] "dglm"            "dhglm"           "emax.glm"       
#> [31] "ezglm"           "fastglm"         "geoRglm"        
#> [34] "glm.deploy"      "glm.predict"     "glm2"           
#> [37] "glmBfp"          "glmaag"          "glmbb"          
#> [40] "glmc"            "glmdisc"         "glmdm"          
#> [43] "glmertree"       "glmgraph"        "glmlep"         
#> [46] "glmm"            "glmmADMB"        "glmmEP"         
#> [49] "glmmLasso"       "glmmML"          "glmmTMB"        
#> [52] "glmmboot"        "glmmfields"      "glmmsr"         
#> [55] "glmnet"          "glmnetUtils"     "glmnetcr"       
#> [58] "glmpath"         "glmpathcr"       "glmpca"         
#> [61] "glmtlp"          "glmtree"         "glmulti"        
#> [64] "glmvsd"          "glmx"            "hglm"           
#> [67] "hglm.data"       "icdGLM"          "lsplsGlm"       
#> [70] "mbrglm"          "mcemGLM"         "mcglm"          
#> [73] "mdhglm"          "mglmn"           "misclassGLM"    
#> [76] "mvglmmRank"      "oglmx"           "parglm"         
#> [79] "pglm"            "plsRglm"         "poisson.glm.mix"
#> [82] "r2glmm"          "randomGLM"       "robmixglm"      
#> [85] "simglm"          "speedglm"

The sub-network for glm_packages can be visualized using

plot(package_network, package = glm_packages)

In order to focus on the sub-network with edges only between the packages in glm_packages, we do

glm_package_only_network <- subset(package_network, package = glm_packages, only = TRUE)
plot(glm_package_only_network, package = glm_packages)

The top-20 packages in terms of various statistics of the directives sub-network for generalized linear models according to the number they are imported by other packages

glm_package_network <- subset(package_network, package = glm_packages)
glm_package_summaries <- summary(glm_package_network)
#> Warning in closeness(cranly_graph, normalized = FALSE): At centrality.c:
#> 2784 :closeness centrality is not well-defined for disconnected graphs
plot(glm_package_summaries, according_to = "n_imported_by")

The top-20 in the collaboration sub-network for generalized linear models according to the number of collaborators is

glm_author_network <- subset(author_network, package = glm_packages)
glm_author_summaries <- summary(glm_author_network)
#> Warning in closeness(cranly_graph, normalized = FALSE): At centrality.c:
#> 2784 :closeness centrality is not well-defined for disconnected graphs
plot(glm_author_summaries, according_to = "n_collaborators")