Extracting information from cranly package networks: extractors and word clouds

Ioannis Kosmidis

2019-10-08

cranly extractor functions

Since version 0.3, cranly includes functions for extracting information from cranly_network objects (see ?extractor-functions). All extractor functions in cranly try to figure out what y is in the statements

y is [the] extractor-function a package/author

Let’s download, clean and organize today’s CRAN database, and build the package and author directives networks

library("cranly")
library("magrittr")
cran_db <- clean_CRAN_db()
package_network <- cran_db %>% build_network(perspective = "package")
author_network <- cran_db %>% build_network(perspective = "author")

Example queries

Packages by Kurt Hornik

package_network %>% package_by("Kurt Hornik", exact = TRUE)
#>  [1] "ISOcodes"           "MASS"               "NLP"               
#>  [4] "NLPutils"           "OAIHarvester"       "PolynomF"          
#>  [7] "RKEA"               "RKEAjars"           "ROI"               
#> [10] "ROI.plugin.msbinlp" "RWeka"              "RWekajars"         
#> [13] "Rcplex"             "Rglpk"              "Rpoppler"          
#> [16] "Rsymphony"          "TSP"                "Unicode"           
#> [19] "W3CMarkupValidator" "arules"             "aucm"              
#> [22] "bibtex"             "bindata"            "cclust"            
#> [25] "chron"              "clue"               "cluster"           
#> [28] "coin"               "colorspace"         "cordillera"        
#> [31] "ctv"                "date"               "dendextend"        
#> [34] "digest"             "e1071"              "exactRankTests"    
#> [37] "fortunes"           "gap"                "isotone"           
#> [40] "kernlab"            "mda"                "mistr"             
#> [43] "mobForest"          "movMF"              "mvord"             
#> [46] "openNLP"            "openNLPdata"        "oz"                
#> [49] "pandocfilters"      "party"              "polyclip"          
#> [52] "polynom"            "princurve"          "qrmdata"           
#> [55] "qrmtools"           "relations"          "seriation"         
#> [58] "sets"               "signal"             "skmeans"           
#> [61] "slam"               "stablelearner"      "strucchange"       
#> [64] "tau"                "textcat"            "tm"                
#> [67] "tm.plugin.mail"     "topicmodels"        "tseries"           
#> [70] "vcd"                "wordnet"            "xgobi"

Packages by people named “Ioannis”

package_network %>% package_by("Ioannis")
#>  [1] "FLR"          "MXM"          "PlackettLuce" "Rfast"       
#>  [5] "betareg"      "brglm"        "brglm2"       "cranly"      
#>  [9] "enrichwith"   "profileModel" "semnar"       "trackeR"     
#> [13] "trackeRapp"

Packages with “glm” in their name

author_network %>% package_with("glm")
#>  [1] "glmnet"          "biglm"           "biglmm"         
#>  [4] "glm2"            "glmertree"       "glmx"           
#>  [7] "cglm"            "glmmTMB"         "StroupGLMM"     
#> [10] "glmlep"          "fastglm"         "bestglm"        
#> [13] "glmBfp"          "GLMaSPU"         "glmtlp"         
#> [16] "glmbb"           "glmm"            "AutoStepwiseGLM"
#> [19] "glmnetUtils"     "glmdm"           "GLMpack"        
#> [22] "poisson.glm.mix" "glmmfields"      "HBglm"          
#> [25] "brglm"           "brglm2"          "plsRglm"        
#> [28] "glmdisc"         "GLMMadaptive"    "glm.predict"    
#> [31] "mbrglm"          "circglmbayes"    "CPMCGLM"        
#> [34] "icdGLM"          "misclassGLM"     "hglm"           
#> [37] "hglm.data"       "CompGLM"         "glmgraph"       
#> [40] "glmpath"         "GLMMRR"          "glmc"           
#> [43] "randomGLM"       "designGLMM"      "dglm"           
#> [46] "GLMsData"        "DGLMExtPois"     "dhglm"          
#> [49] "mdhglm"          "parglm"          "EBglmnet"       
#> [52] "pglm"            "glmmML"          "emax.glm"       
#> [55] "ezglm"           "glmvsd"          "lsplsGlm"       
#> [58] "speedglm"        "glmmEP"          "geoRglm"        
#> [61] "glm.deploy"      "glmaag"          "glmtree"        
#> [64] "glmmboot"        "glmmLasso"       "glmmsr"         
#> [67] "glmnetcr"        "glmpathcr"       "glmpca"         
#> [70] "GlmSimulatoR"    "glmulti"         "HDGLM"          
#> [73] "HiCglmi"         "simglm"          "MGLM"           
#> [76] "mglmn"           "MCMCglmm"        "mcemGLM"        
#> [79] "mcglm"           "robmixglm"       "mvglmmRank"     
#> [82] "r2glmm"          "oglmx"           "QGglmm"         
#> [85] "RPEGLMEN"

Authors of the lubridate package

package_network %>% author_of("lubridate", exact = TRUE)
#>  [1] "Vitalie Spinu"     "Garrett Grolemund" "Hadley Wickham"   
#>  [4] "Ian Lyttle"        "Imanuel Constigan" "Jason Law"        
#>  [7] "Doug Mitarotonda"  "Joseph Larmarange" "Jonathan Boiser"  
#> [10] "Chel Hee Lee"

Authors with “Ioan” in their name

package_network %>% author_with("Ioan")
#> [1] "Ioanna Manolopoulou"    "Ioannis N Athanasiadis"
#> [3] "Ioannis Tsamardinos"    "Ioannis Kosmidis"      
#> [5] "Eleni Ioanna Delatola"  "Ioana-Elena Oana"      
#> [7] "Lazaros Ioannidis"      "Alex Ioannides"

Packages suggested by, imported by and enhanced by the sf package

package_network %>% suggested_by("sf", exact = TRUE)
#>  [1] "blob"           "covr"           "dplyr"          "ggplot2"       
#>  [5] "knitr"          "lwgeom"         "maps"           "maptools"      
#>  [9] "mapview"        "microbenchmark" "odbc"           "pillar"        
#> [13] "pool"           "raster"         "rgdal"          "rgeos"         
#> [17] "rlang"          "rmarkdown"      "RPostgres"      "RPostgreSQL"   
#> [21] "RSQLite"        "sp"             "spatstat"       "stars"         
#> [25] "testthat"       "tibble"         "tidyr"          "tmap"          
#> [29] "vctrs"
package_network %>% imported_by("sf", exact = TRUE)
#>  [1] "classInt"  "DBI"       "graphics"  "grDevices" "grid"     
#>  [6] "magrittr"  "Rcpp"      "stats"     "tools"     "units"    
#> [11] "utils"
package_network %>% enhanced_by("sf", exact = TRUE)
#> character(0)

Packages that are suggesting, importing, enhancing the sf package

package_network %>% suggesting("sf", exact = TRUE)
#>  [1] "adklakedata"     "arcos"           "BIOMASS"        
#>  [4] "biscale"         "c14bazAAR"       "cancensus"      
#>  [7] "ckanr"           "DeclareDesign"   "echor"          
#> [10] "EcoIndR"         "eddi"            "eRTG3D"         
#> [13] "fasterize"       "geohashTools"    "geojson"        
#> [16] "geometa"         "ggformula"       "ggiraph"        
#> [19] "ggplot2"         "googlePolylines" "GSODR"          
#> [22] "gstat"           "gtfsrouter"      "ipumsr"         
#> [25] "isoband"         "janitor"         "leaflet"        
#> [28] "leafpop"         "leri"            "lutz"           
#> [31] "mapdeck"         "mlr"             "MODIStsp"       
#> [34] "mudata2"         "NetLogoR"        "nlaR"           
#> [37] "nlgeocoder"      "osmdata"         "pinochet"       
#> [40] "plotly"          "raster"          "rcartocolor"    
#> [43] "rgrass7"         "rmangal"         "rnoaa"          
#> [46] "sdcSpatial"      "sociome"         "SpaDES.core"    
#> [49] "SpaDES.tools"    "spatialreg"      "spatialwidget"  
#> [52] "spbabel"         "spData"          "stormwindmodel" 
#> [55] "streamDepletr"   "swmmr"           "tabularaster"   
#> [58] "tricolore"       "USAboundaries"   "weathercan"
package_network %>% importing("sf", exact = TRUE)
#>   [1] "amt"              "areal"            "bdl"             
#>   [4] "bnspatial"        "brazilmaps"       "btb"             
#>   [7] "capm"             "cartogram"        "cartography"     
#>  [10] "cdcfluview"       "censusxy"         "compstatr"       
#>  [13] "concaveman"       "crawl"            "crimedata"       
#>  [16] "cyclestreets"     "diffman"          "dssd"            
#>  [19] "ebirdst"          "eixport"          "elevatr"         
#>  [22] "EmissV"           "eSDM"             "eurostat"        
#>  [25] "exactextractr"    "FedData"          "fingertipscharts"
#>  [28] "foieGras"         "gdalUtilities"    "geobr"           
#>  [31] "geogrid"          "geojsonio"        "geonetwork"      
#>  [34] "geoviz"           "ggsn"             "ggspatial"       
#>  [37] "grainchanger"     "graph4lg"         "GWSDAT"          
#>  [40] "hydrolinks"       "jpmesh"           "jpndistrict"     
#>  [43] "kokudosuuchi"     "LAGOSNE"          "landsepi"        
#>  [46] "lconnect"         "leafem"           "leafpm"          
#>  [49] "lidR"             "linemap"          "link2GI"         
#>  [52] "lwgeom"           "macleish"         "mapedit"         
#>  [55] "mapi"             "mapsapi"          "mapview"         
#>  [58] "MODIS"            "MODISTools"       "moveVis"         
#>  [61] "ncdfgeom"         "nhdplusTools"     "nhdR"            
#>  [64] "NipponMap"        "NLMR"             "nlrx"            
#>  [67] "oceanis"          "openSTARS"        "Orcs"            
#>  [70] "osrm"             "ows4R"            "parlitools"      
#>  [73] "pct"              "plotdap"          "PWFSLSmoke"      
#>  [76] "qualmap"          "quickmapr"        "raceland"        
#>  [79] "RCzechia"         "readwritesqlite"  "reproducible"    
#>  [82] "rerddapXtracto"   "rgeopat2"         "rmapshaper"      
#>  [85] "rmapzen"          "rnaturalearth"    "rpostgisLT"      
#>  [88] "RPyGeo"           "RQGIS"            "Rsagacmd"        
#>  [91] "rSymbiota"        "sabre"            "sfdct"           
#>  [94] "slga"             "SMITIDstruct"     "smoothr"         
#>  [97] "spatialEco"       "SpatialPosition"  "spatialrisk"     
#> [100] "stats19"          "stlcsb"           "stplanr"         
#> [103] "sugarbag"         "tanaka"           "tidycensus"      
#> [106] "tidyRSS"          "tidytransit"      "tidyUSDA"        
#> [109] "tigris"           "tmap"             "tmaptools"       
#> [112] "trackeRapp"       "transformr"       "trigpoints"      
#> [115] "uavRmp"           "vein"             "velociraptr"     
#> [118] "velox"            "webTRISr"         "windfarmGA"
package_network %>% enhancing("sf", exact = TRUE)
#> [1] "landscapemetrics" "pointdexter"

Packages that depend on the sf package

package_network %>% depending_on("sf", exact = TRUE)
#>  [1] "bcmaps"     "GADMTools"  "geosample"  "nngeo"      "spdep"     
#>  [6] "spsurvey"   "stars"      "stcos"      "tilegramsR" "wdpar"

Packages that are dependencies of the sf package

package_network %>% dependency_of("sf", exact = TRUE)
#> [1] "methods"

Packages maintained by everyone with “Helen” in their name

package_network %>% maintained_by("Helen")
#> [1] "ActiveDriverWGS" "ActivePathways"  "GENLIB"          "bild"           
#> [5] "cold"            "glmmsr"          "microPop"

All available info, on packages maintained by everyone with “Helen” in their name

package_network %>% maintained_by("Helen", flat = FALSE) %>% dim()
#> [1]  7 65

The maintainer of data.table

package_network %>% maintainer_of("data.table", exact = TRUE)
#> [1] "Matt Dowle"

The email of the maintainer of trackeRapp

trackeRapp_maintainer <- package_network %>% maintainer_of("trackeRapp", exact = TRUE)
package_network %>% email_of(trackeRapp_maintainer, exact = TRUE)
#> [1] "ioannis.kosmidis@warwick.ac.uk"

All emails of maintainers using an email address from University of Warwick

package_network %>% email_with("warwick.ac.uk")
#>  [1] "E.Kaye.1@warwick.ac.uk"         "s.virtanen@warwick.ac.uk"      
#>  [3] "Nicole.Schwitter@warwick.ac.uk" "y.weldeselassie@warwick.ac.uk" 
#>  [5] "nick.parsons@warwick.ac.uk"     "ioannis.kosmidis@warwick.ac.uk"
#>  [7] "s.tavakoli@warwick.ac.uk"       "d.selby@warwick.ac.uk"         
#>  [9] "s.stein@warwick.ac.uk"          "s.e.f.spencer@warwick.ac.uk"   
#> [11] "D.Vats@warwick.ac.uk"           "a.dickerson@warwick.ac.uk"     
#> [13] "d.firth@warwick.ac.uk"

The title, the description, the version and the license of the semnar package

package_network %>% title_of("semnar", exact = TRUE)
#> [1] "Constructing and Interacting with Databases of Presentations"
package_network %>% description_of("semnar", exact = TRUE)
#> [1] "Provides methods for constructing and maintaining a database of presentations in R. The presentations are either ones that the user gives or gave or presentations at a particular event or event series. The package also provides a plot method for the interactive mapping of the presentations using 'leaflet' by grouping them according to country, city, year and other presentation attributes. The markers on the map come with popups providing presentation details (title, institution, event, links to materials and events, and so on)."
package_network %>% version_of("semnar", exact = TRUE)
#> [1] "0.7.1"

Distribution of the release dates of all packages in CRAN

package_network %>% release_date_of(Inf) %>%
    hist(breaks = 50, main = "", xlab = "date", freq = TRUE)

Word clouds

Since version 0.5 cranly provides methods to construct word clouds of either author names, package descriptions or package titles. For example, the word cloud of the descriptions of the packages maintained by me, Achim Zeileis, and Edzer Pebesma are

word_cloud(package_network, maintainer = "Ioannis Kosmidis", exact = TRUE, min.freq = 1)

word_cloud(package_network, maintainer = "Achim Zeileis", exact = TRUE, min.freq = 1)

word_cloud(package_network, maintainer = "Edzer Pebesma", exact = TRUE, min.freq = 1)

or the word cloud of the titles of those packages are

word_cloud(package_network, maintainer = "Ioannis Kosmidis", perspective = "title", exact = TRUE,
           scale = c(2, 0.1), min.freq = 1)

word_cloud(package_network, maintainer = "Achim Zeileis", perspective = "title", exact = TRUE,
           scale = c(2, 0.1), min.freq = 1)

word_cloud(package_network, maintainer = "Edzer Pebesma", perspective = "title", exact = TRUE,
           scale = c(2, 0.1), min.freq = 1)

More complex queries can be achieved by using the extractor functions and computing the term frequencies manually. For example, the word cloud of all packages maintained by people with “warwick.ac.uk” in their email is

warwick_emails <- package_network %>% email_with("warwick.ac.uk", flat = FALSE)
warwick_pkgs  <- warwick_emails$package
descriptions <- package_network %>% description_of(warwick_pkgs, exact = FALSE)
term_frequency <- compute_term_frequency(descriptions)
word_cloud(term_frequency, min.freq = 1)