Ring et al. (2017) Vignette 1: Generating subpopulations

Caroline Ring

2020-07-19

This vignette provides the code used to generate the virtual populations for the ten subpopulations of interest, plus a non-obese adult subpopulation.

To use the code in this vignette, you’ll first need to load a few packages.

library("httk")
library("data.table")

Because we’ll be writing some files (and then reading them in other vignettes), be aware of where your current working directory is – the files will be written there. # Set up subpopulation specs Here, we set the number of individuals in each virtual population (1000). We also specify a list of names for the virtual populations. Then we specify corresponding lists of gender, age limits, and BMI categories. HTTK-Pop default values will be used for other population specifications (e.g. race/ethnicity, kidney function categories).

nsamp<-1000
#List subpop names
ExpoCast.group<-list("Total",
                     "Age.6.11",
                     "Age.12.19",
                     "Age.20.65",
                     "Age.GT65",
                     "BMIgt30",
                     "BMIle30",
                     "Females",
                     "Males",
                     "ReproAgeFemale",
                     "Age.20.50.nonobese")
#List subpop gender specifications
gendernum <- c(rep(list(NULL),7), 
               list(list(Male=0, Female=1000)), 
               list(list(Male=1000, Female=0)), 
               list(list(Male=0, Female=1000)), 
               list(NULL))
#List subpop age limits in years
agelim<-c(list(c(0,79),
               c(6,11),
               c(12,19),
               c(20,65),
               c(66,79)),
          rep(list(c(0,79)),4),
          list(c(16,49)),
          list(c(20,50)))
#List subpop weight categories
bmi_category <- c(rep(list(c('Underweight', 
                             'Normal',
                             'Overweight',
                             'Obese')),
                      5),
                  list('Obese', c('Underweight','Normal', 'Overweight')),
                  rep(list(c('Underweight', 
                             'Normal',
                             'Overweight',
                             'Obese')),
                      3),
                  list(c('Underweight', 'Normal', 'Overweight')))

Generate populations

First, define the loop body as a function; then use parallel::clusterMap to parallelize it. Warning: This might take a couple of minutes to run.

tmpfun <- function(gendernum, agelim, bmi_category, ExpoCast_grp,
                   nsamp, method){
  result <- tryCatch({
                     pops <- httk::httkpop_generate(
                  method=method,
                  nsamp = nsamp,
                  gendernum = gendernum,
                  agelim_years = agelim,
                  weight_category = bmi_category)
                  
                  filepart <- switch(method,
                                     'virtual individuals' = 'vi',
                                     'direct resampling' = 'dr')
saveRDS(object=pops,
          file=paste0(paste('data/httkpop',
                      filepart, ExpoCast_grp, 
                      sep='_'),
                      '.Rdata'))
return(0)
}, error = function(err){
  print(paste('Error occurred:', err))
  return(1)
})
}

cluster <- parallel::makeCluster(2, # Reduced from 40 to 2 cores 
                       outfile='subpopulations_parallel_out.txt')

evalout <- parallel::clusterEvalQ(cl=cluster,
             {library(data.table)
              library(httk)})
parallel::clusterExport(cl = cluster,
              varlist = 'tmpfun')
#Set seeds on all workers for reproducibility
parallel::clusterSetRNGStream(cluster, 
                              TeachingDemos::char2seed("Caroline Ring"))
out_vi <- parallel::clusterMap(cl=cluster,
                  fun = tmpfun,
                  gendernum=gendernum,
                  agelim=agelim,
                  bmi_category=bmi_category,
                  ExpoCast_grp = ExpoCast.group,
                  MoreArgs = list(nsamp = nsamp,
                                  method = 'virtual individuals'))
out_dr <- parallel::clusterMap(cl=cluster,
                  fun = tmpfun,
                  gendernum=gendernum,
                  agelim=agelim,
                  bmi_category=bmi_category,
                  ExpoCast_grp = ExpoCast.group,
                  MoreArgs = list(nsamp = nsamp,
                                  method = 'direct resampling'))
parallel::stopCluster(cluster)