Linakis et al. (2020): Analysis and Figure Generation

Abstract

Currently it is difficult to prospectively estimate human toxicokinetics (particularly for novel chemicals) in a high-throughput manner. The R software package httk has been developed, in part, to address this deficiency, and the aim of this investigation was to develop a generalized inhalation model for httk. The structure of the inhalation model was developed from two previously published physiologically-based models from Jongeneelen et al. (2011) and Clewell et al. (2001) while calculated physicochemical data was obtained from EPA’s CompTox Chemicals Dashboard. In total, 142 exposure scenarios across 41 volatile organic chemicals were modeled and compared to published data. The slope of the regression line of best fit between log-transformed simulated and observed combined measured plasma and blood concentrations was 0.59 with an r2= 0.54 and a Root Mean Square Error (RMSE) of direct comparison between the log-transformed simulated and observed values of 0.87. Approximately 3.6% (n = 73) of the data points analyzed were > 2 orders of magnitude different than expected. The volatile organic chemicals examined in this investigation represent small, generally lipophilic molecules. Ultimately this paper details a generalized inhalation component that integrates with the httk physiologically-based toxicokinetic model to provide high-throughput estimates of inhalation chemical exposures.

Prepare for session

Load the relevant libraries

knitr::opts_chunk$set(echo = TRUE, fig.width=5, fig.height=4)
library(httk)
library(ggplot2)
library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:gdata':
## 
##     combine

library(cowplot)

## 
## ********************************************************

## Note: As of version 1.0.0, cowplot does not change the

##   default ggplot2 theme anymore. To recover the previous

##   behavior, execute:
##   theme_set(theme_cowplot())

## ********************************************************

library(ggrepel)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:gridExtra':
## 
##     combine

## The following objects are masked from 'package:data.table':
## 
##     between, first, last

## The following objects are masked from 'package:gdata':
## 
##     combine, first, last

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(stringr)
library(forcats)
library(smatr)
# Delete all objects from memory:
rm(list=ls())
# We love to give warning messages whenever assumptions are used by HTTK,
# but they will overwhelm the output of this vignette so we turn them
# off:
options(warn = -1)

###Get metabolism and concentration data

met_data <- metabolism_data_Linakis2020
conc_data <- concentration_data_Linakis2020

ANALYSIS

Identify chemicals currently in our metabolism data that we don’t have good concentration/time data for and remove them from our training dataset

Data summary for chemical properties

# Small molecule chemicals
summary(met_data$AVERAGE_MASS)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   32.04   84.93  102.18  106.77  128.26  202.26

# Generally more lipophilic chemicals
summary(met_data$OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.6059  1.0047  1.9649  2.0161  2.8286  5.4374

# Unsurprisingly then, the chemicals are generally less water-soluble
summary(met_data$WATER_SOLUBILITY_MOL.L_OPERA_PRED)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.00000  0.00188  0.01395  2.12511  0.25438 26.12290

# ~60% of samples in humans
table(conc_data$CONC_SPECIES)/nrow(conc_data)*100

## 
##    Human      Rat 
## 59.80392 40.19608

# ~72% of samples are from blood
table(conc_data$SAMPLING_MATRIX)/nrow(conc_data)*100

## 
##        ABL         BL    BL (+W)         EB    EB (+W)        EEB        MEB 
##  4.2483660 35.3408030  0.6069094 24.8366013  0.7002801  2.1008403  0.4668534 
##         PL        VBL 
##  1.4472456 30.2521008

Exposure scenarios

# Create a dataframe with 1 row for each unique external exposure scenario
unique_scenarios <- conc_data[with(conc_data,                             
  order(PREFERRED_NAME,
        CONC_SPECIES,
        SAMPLING_MATRIX,
        as.numeric(as.character(DOSE)),EXP_LENGTH,-TIME)),] %>%
  distinct(DTXSID,DOSE,DOSE_U,EXP_LENGTH,CONC_SPECIES,SAMPLING_MATRIX, .keep_all = TRUE)

Observations and Predictions

Create a list of dataframes of observed and predicted concentrations for each unique external exposure scenario

plist <- list()
simlist <- list()
obslist <- list()
for(i in 1:nrow(unique_scenarios)){
  #tryCatch({
    relconc <- subset(conc_data,conc_data$DTXSID == unique_scenarios$DTXSID[i] & 
      conc_data$DOSE == unique_scenarios$DOSE[i] & 
      conc_data$EXP_LENGTH == unique_scenarios$EXP_LENGTH[i] & 
      conc_data$CONC_SPECIES == unique_scenarios$CONC_SPECIES[i] & 
      conc_data$SAMPLING_MATRIX == unique_scenarios$SAMPLING_MATRIX[i])
    obslist[[i]] <- relconc
    name <- paste0("out",i)
    if(as.character(unique_scenarios$CONC_SPECIES[i]) == "Human"){
      solve <- assign(name, solve_gas_pbtk(
        chem.cas = unique_scenarios$CASRN[i], 
        days = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]), 
# Make sure we get conc's at the observed times:
        times=signif(obslist[[i]]$TIME,4), 
        tsteps = 500, 
        exp.conc = ((as.numeric(unique_scenarios$DOSE[i])*1e20*1000)/24450)/1e20, 
        exp.duration = unique_scenarios$EXP_LENGTH[i]*24, 
        period = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i])*24, 
        species = as.character(unique_scenarios$CONC_SPECIES[i]), 
        vmax.km = F, 
        vmax = met_data$VMAX[met_data$CASRN %in% unique_scenarios$CASRN[i] & 
        met_data$SPECIES == unique_scenarios$CONC_SPECIES[i]], 
        km = met_data$KM[met_data$CASRN %in% unique_scenarios$CASRN[i] & 
        met_data$SPECIES == unique_scenarios$CONC_SPECIES[i]],
        suppress.messages=T))
    } else {
      solve <- assign(name, solve_gas_pbtk(
        chem.cas = unique_scenarios$CASRN[i], 
        days = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]), 
# Make sure we get conc's at the observed times:
        times=signif(obslist[[i]]$TIME,4),
        tsteps = 500, 
        exp.conc = ((as.numeric(unique_scenarios$DOSE[i])*1e20*1000)/24450)/1e20, 
        exp.duration = unique_scenarios$EXP_LENGTH[i]*24, 
        period = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i])*24, 
        species = as.character(unique_scenarios$CONC_SPECIES[i]), 
        vmax.km = T, 
        vmax = met_data$VMAX[met_data$CASRN %in% unique_scenarios$CASRN[i] & 
        met_data$SPECIES == unique_scenarios$CONC_SPECIES[i]], 
        km = met_data$KM[met_data$CASRN %in% unique_scenarios$CASRN[i] &
        met_data$SPECIES == unique_scenarios$CONC_SPECIES[i]],
        suppress.messages=T))
    }
    #browser()
    solve <- as.data.frame(solve)
    # Sets the output units appropriate for the sampling matrix
    if (unique_scenarios$SAMPLING_MATRIX[i] == "VBL" | 
      unique_scenarios$SAMPLING_MATRIX[i] == "BL" | 
      unique_scenarios$SAMPLING_MATRIX[i] == "BL (+W)")
    {
      solve$simconc <- solve$Cven
      solve$unit <- "uM"
    } else if (unique_scenarios$SAMPLING_MATRIX[i] == "ABL") {
      solve$simconc <- solve$Cart
      solve$unit <- "uM"
    } else if (unique_scenarios$SAMPLING_MATRIX[i] == "EB" |
      unique_scenarios$SAMPLING_MATRIX[i] == "EEB" | 
      unique_scenarios$SAMPLING_MATRIX[i] == "EB (+W)")
    {
      solve$simconc <- solve$Cendexh * 24.45
      solve$unit <- "ppm"
    } else if (unique_scenarios$SAMPLING_MATRIX[i] == "MEB") {
      solve$simconc <- solve$Cmixexh * 24.45
      solve$unit <- "ppm"
    } else if (unique_scenarios$SAMPLING_MATRIX[i] == "PL"){
      solve$simconc <- solve$Cplasma
      solve$unit <- "uM"
    } else {
      solve$simconc <- NA
      solve$unit <- NA
    }
    simlist[[i]] <- solve
    plot.data <- solve
    name1 <- paste0("c.vs.t",i)
#Right now this is only calculating real concentrations according to mg/L in blood
    plots <- assign(name1, ggplot(plot.data, aes(time*24, simconc)) + 
      geom_line() + 
      xlab("Time (h)") + 
      ylab(paste0("Simulated ", 
        unique_scenarios$SAMPLING_MATRIX[i], 
        "\nConcentration (" , 
        solve$unit, ")")) + 
      ggtitle(paste0(
        unique_scenarios$PREFERRED_NAME[i],
        " (", 
        unique_scenarios$CONC_SPECIES[i], 
        ", ",
        round(as.numeric(unique_scenarios$DOSE[i]), digits = 2),
        unique_scenarios$DOSE_U[i], 
        " for ",
        round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2),
        "h in ", 
        unique_scenarios$SAMPLING_MATRIX[i], ")")) + 
      geom_point(data = relconc, aes(TIME*24,CONCENTRATION)) + 
      theme(text = element_text(size=10))+
      theme_bw()) 
    plist[[i]] <- plots
  #}, error = function(e){})
}
rm(list=ls(pattern='out'))
rm(list=ls(pattern='c.vs.t'))

Create a list to hold the combined observations and predictions for each scenario:

# Creation of simulated vs. observed concentration dataset
unique_scenarios$RSQD <- 0
unique_scenarios$RMSE <- 0
unique_scenarios$AIC <- 0
simobslist <- list()
obvpredlist <- list()

Merge the simulations and observations on the basis of simualation time:

for(i in 1:length(simlist))
{
  obsdata <- as.data.frame(obslist[[i]])
  simdata <- as.data.frame(simlist[[i]])
# skips over anything for which there was no observed data or 
# insufficient information to run simulation:
  if (!is.null(simlist[[i]]) & !is.null(obslist[[i]]))
  { 
# Make sure we are looking at consistent time points:
    simobscomb <- simdata[simdata$time %in% signif(obsdata$TIME,4),]
    obsdata <- subset(obsdata,signif(TIME,4) %in% simobscomb$time)
# Merge with obsdata
    colnames(obsdata)[colnames(obsdata) ==
      "TIME"] <- 
      "obstime"
# Round to match sim time:
    obsdata$time <- signif(obsdata$obstime,4)
    colnames(obsdata)[colnames(obsdata) ==
      "CONCENTRATION"] <- 
      "obsconc"
    colnames(obsdata)[colnames(obsdata) ==
      "PREFERRED_NAME"] <- 
      "chem"
    colnames(obsdata)[colnames(obsdata) ==
      "DOSE"] <- 
      "dose"
    colnames(obsdata)[colnames(obsdata) ==
      "EXP_LENGTH"] <- 
      "explen"
    colnames(obsdata)[colnames(obsdata) ==
      "CONC_SPECIES"] <- 
      "species"
    colnames(obsdata)[colnames(obsdata) ==
      "SAMPLING_MATRIX"] <- 
      "matrix"
    colnames(obsdata)[colnames(obsdata) ==
      "AVERAGE_MASS"] <- 
      "mw"
    colnames(obsdata)[colnames(obsdata) ==
      "ORIG_CONC_U"] <- 
      "orig_conc_u"
    simobscomb <- suppressWarnings(merge(obsdata[,c(
      "time",
      "obstime",
      "obsconc",
      "chem",
      "dose",
      "explen",
      "species",
      "matrix",
      "mw",
      "orig_conc_u"
      )], simobscomb, by="time", all.x=T))

# Merge with met_data
    this.met_data <- subset(met_data,
      PREFERRED_NAME == simobscomb[1,"chem"] &
      SPECIES == simobscomb[1,"species"])
    colnames(this.met_data)[colnames(this.met_data)=="CHEM_CLASS"] <-
      "chemclass"
    colnames(this.met_data)[colnames(this.met_data) ==
      "OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED"] <-
      "logp"
    colnames(this.met_data)[colnames(this.met_data) ==
      "WATER_SOLUBILITY_MOL.L_OPERA_PRED"] <-
      "sol"
    colnames(this.met_data)[colnames(this.met_data) ==
      "HENRYS_LAW_ATM.M3.MOLE_OPERA_PRED"] <-
      "henry"
    colnames(this.met_data)[colnames(this.met_data) ==
      "VMAX"] <-
      "vmax"
    colnames(this.met_data)[colnames(this.met_data) ==
      "KM"] <-
      "km"
    simobscomb <- suppressWarnings(cbind(simobscomb,this.met_data[c(
      "chemclass",
      "logp",
      "sol",
      "henry",
      "vmax",
      "km")]))
    simobslist[[i]] <- simobscomb
  }
}

Identify the appropriate matric (for example, exhaled breath) for each observation:

for(i in 1:length(simobslist))
  if (nrow(simobslist[[i]])>0)
  {
    simobscomb <- simobslist[[i]]
  # Match the matrix for each observation:    
    for (j in 1:nrow(simobscomb))
      if(!is.na(simobscomb$matrix[j]))
      {
        if (simobscomb$matrix[j] == "VBL" | 
            simobscomb$matrix[j] == "BL" | 
            simobscomb$matrix[j] == "BL (+W)")
        {
          simobscomb$simconc[j] <- simobscomb$Cven[j]
        } else if (simobscomb$matrix[j] == "ABL") {
          simobscomb$simconc[j] <- simobscomb$Cart[j]
        } else if (simobscomb$matrix[j] == "EB" | 
                   simobscomb$matrix[j] == "EEB" | 
                   simobscomb$matrix[j] == "EB (+W)") {
          simobscomb$simconc[j] <- simobscomb$Cendexh[j] * 24.45
        } else if (simobscomb$matrix[j] == "MEB") {
          simobscomb$simconc[j] <- simobscomb$Cendexh[j] * 24.45
        } else if (simobscomb$matrix[j] == "PL") {
          simobscomb$simconc[j] <- simobscomb$Cplasma[j]
        } else {
          simobscomb$simconc[j] <- NA
        }
      }
    simobslist[[i]] <- simobscomb
  }

Identify which quartile each observation occured in with respect to the latest (maximum) observed time

for(i in 1:length(simobslist))
  if (nrow(simobslist[[i]])>0)
  {
    simobscomb <- simobslist[[i]]
    for (j in 1:nrow(simobscomb))
    { 
      max.time <- max(simobscomb$time,na.rm=T)
      if (is.na(max.time)) simobscomb$tquart <- NA
      else if (max.time == 0) simobscomb$tquart <- "1"
      else if (!is.na(simobscomb$time[j])) 
      {
        simobscomb$tquart[j] <- as.character(1 +
          floor(simobscomb$time[j]/max.time/0.25))
        simobscomb$tquart[simobscomb$tquart=="5"] <-
          "4"
      } else simobscomb$tquart[j] >- NA
    }
    simobslist[[i]] <- simobscomb
  }

Calculate the area under the curve (AUC)

for(i in 1:length(simobslist))
  if (nrow(simobslist[[i]])>0)
  {
    simobscomb <- simobslist[[i]]
# Calculat the AUC with the trapezoidal rule:    
    if (nrow(simobscomb)>1) 
    {
      for (k in 2:max(nrow(simobscomb)-1,2,na.rm=T))
      {
        simobscomb$obsAUCtrap[1] <- 0
        simobscomb$simAUCtrap[1] <- 0
        if (min(simobscomb$time) <= (simobscomb$explen[1]*1.03) & 
            nrow(simobscomb) >=2)
        {
          simobscomb$obsAUCtrap[k] <- simobscomb$obsAUCtrap[k-1] + 
            0.5*(simobscomb$time[k] - simobscomb$time[k-1]) * 
            (simobscomb$obsconc[k] + simobscomb$obsconc[k-1])
          simobscomb$simAUCtrap[k] <- simobscomb$simAUCtrap[k-1] + 
            0.5*(simobscomb$time[k]-simobscomb$time[k-1]) * 
            (simobscomb$simconc[k] + simobscomb$simconc[k-1])
        } else {
          simobscomb$obsAUCtrap <- 0
          simobscomb$simAUCtrap <- 0
        }
      }
    } else {
      simobscomb$obsAUCtrap <- 0
      simobscomb$simAUCtrap <- 0
    }
    simobscomb$AUCobs <- max(simobscomb$obsAUCtrap)
    simobscomb$AUCsim <- max(simobscomb$simAUCtrap)
    simobscomb$calcAUC <- max(simobscomb$AUC)
    if (min(simobscomb$time) <= simobscomb$explen[1]*1.03)
    {
      simobscomb$Cmaxobs <- max(simobscomb$obsconc)
      simobscomb$Cmaxsim <- max(simobscomb$simconc)
    } else {
      simobscomb$Cmaxobs <- 0
      simobscomb$Cmaxsim <- 0
    }
    simobslist[[i]] <- simobscomb
  }

Calculate performance statistics

for(i in 1:length(simobslist))
  if (nrow(simobslist[[i]])>0)
  {
    simobscomb <- simobslist[[i]]
    unique_scenarios$RSQD[i] <- 1 - (
      sum((simobscomb$obsconc - simobscomb$simconc)^2) / 
      sum((simobscomb$obsconc-mean(simobscomb$obsconc))^2)
      )
    unique_scenarios$RMSE[i] <- 
      sqrt(mean((simobscomb$simconc - simobscomb$obsconc)^2))
    unique_scenarios$AIC[i] <- 
      nrow(simobscomb)*(
        log(2*pi) + 1 +
        log((sum((simobscomb$obsconc-simobscomb$simconc)^2) /
          nrow(simobscomb)))
      ) + ((44+1)*2) #44 is the number of parameters from inhalation_inits.R
    simobslist[[i]] <- simobscomb
  }

Make a plot for each scenario

for(i in 1:length(simobslist))
  if (nrow(simobslist[[i]])>0)
  {
    simobscomb <- simobslist[[i]]
    obvpredplot <- ggplot(simobscomb, aes(x = simconc, y = obsconc)) + 
      geom_point() + 
      geom_abline() + 
      xlab("Simulated Concentrations (uM)") + 
      ylab("Observed Concentrations (uM)") + 
      ggtitle(paste0(
        unique_scenarios$PREFERRED_NAME[i],
        " (", 
        unique_scenarios$CONC_SPECIES[i],
        ", ",
        round(as.numeric(unique_scenarios$DOSE[i]), digits = 2),
        unique_scenarios$DOSE_U[i], 
        " for ",
        round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2),
        "h in ", 
        unique_scenarios$SAMPLING_MATRIX[i], ")")) + 
      theme_bw() + 
      theme(plot.title = element_text(face = 'bold', size = 20),
        axis.title.x = element_text(face = 'bold', size = 20), 
        axis.text.x = element_text(size=16), 
        axis.title.y = element_text(face = 'bold', size = 20), 
        axis.text.y = element_text(size = 16),
        legend.title = element_text(face = 'bold', size = 16),
        legend.text = element_text(face = 'bold',size = 14))
    obvpredlist[[i]] <- obvpredplot
  }

simobsfull <- do.call("rbind",simobslist)
simobsfullrat <- subset(simobsfull, simobsfull$species == "Rat")
simobsfullhum <- subset(simobsfull, simobsfull$species == "Human")
unique_scenarios <- subset(unique_scenarios,!is.na(unique_scenarios$RSQD))

Creation of simulated concentration/time plots

for (i in 1:length(plist))
{
  plist[[i]] <- plist[[i]] + 
    geom_text(
      x = Inf, 
      y = Inf, 
      hjust = 1.3, 
      vjust = 1.3, 
#      size = 6, 
      label = paste0(
        "RMSE: ", 
        round(unique_scenarios$RMSE[i],digits = 2),
        "\nAIC: ", 
        round(unique_scenarios$AIC[i],digits = 2)))# + 
#    theme(
#      plot.title = element_text(face = 'bold', size = 15),
#      axis.title.x = element_text(face = 'bold', size = 20), 
#      axis.text.x = element_text(size=16), 
#      axis.title.y = element_text(face = 'bold', size = 20), 
#      axis.text.y = element_text(size = 16),
#      legend.title = element_text(face = 'bold', size = 16),
#      legend.text = element_text(face = 'bold',size = 14))
}

Regressions

Other analytics including linear regression on overall concentration vs. time observed vs. predicted

table(unique_scenarios$CONC_SPECIES)

## 
## Human   Rat 
##    72    65

nrow(simobsfull) - nrow(simobsfull[
  !is.na(simobsfull$simconc) & 
  simobsfull$simconc > 0 & 
  simobsfull$obsconc > 0,])

## [1] 568

pmiss <- (nrow(simobsfull) - 
  nrow(simobsfull[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc > 0 & 
    simobsfull$obsconc > 0,])) /
  nrow(simobsfull) * 100
missdata <- (simobsfull[
  is.na(simobsfull$simconc) | 
  simobsfull$simconc <= 0 | 
  simobsfull$obsconc <= 0,])
t0df <- simobsfull[simobsfull$obstime == 0,]
lmall <- lm(
#log transforms:
  log10(simobsfull$obsconc[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc > 0 & 
    simobsfull$obsconc > 0]) ~ 
#log transforms:
  log10(simobsfull$simconc[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc > 0 & 
    simobsfull$obsconc > 0])) 
#Linear binned 1
lmsub1 <- lm(
  simobsfull$obsconc[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc > 0 & 
    simobsfull$obsconc < 0.1] ~ 
  simobsfull$simconc[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc > 0 & 
    simobsfull$obsconc < 0.1])
#Linear binned 2
lmsub2 <- lm(
  simobsfull$obsconc[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc > 0 & 
    simobsfull$obsconc >= 0.1 & 
    simobsfull$obsconc < 10] ~ 
  simobsfull$simconc[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc > 0 & 
    simobsfull$obsconc >= 0.1 & 
    simobsfull$obsconc < 10]) 
#Linear binned 3
lmsub3 <- lm(
  simobsfull$obsconc[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc > 0 & 
    simobsfull$obsconc >= 10] ~ 
  simobsfull$simconc[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc > 0 & 
    simobsfull$obsconc >= 10]) 
lmrat <- lm(
  log10(simobsfullrat$obsconc[
    !is.na(simobsfullrat$simconc) & 
    simobsfullrat$simconc > 0 & 
    simobsfullrat$obsconc > 0]) ~ 
  log10(simobsfullrat$simconc[
    !is.na(simobsfullrat$simconc) & 
    simobsfullrat$simconc > 0 & 
    simobsfullrat$obsconc > 0]))
unique(simobsfullrat$chem)

##  [1] "1,1-Dichloroethylene"               "1,2-Dichloroethane"                
##  [3] "1,2-Dichloropropane"                "1,3-Butadiene"                     
##  [5] "2,2-Dichloro-1,1,1-trifluoroethane" "Acrylonitrile"                     
##  [7] "Benzene"                            "Carbon tetrachloride"              
##  [9] "Chloroform"                         "Decane"                            
## [11] "Ethylbenzene"                       "Furan"                             
## [13] "Isopropanol"                        "Methanol"                          
## [15] "Nonane"                             "Octane"                            
## [17] "Pyrene"                             "Styrene"                           
## [19] "Tetrachloroethylene"                "Toluene"                           
## [21] "Trichloroethylene"                  "n-Hexane"

lmhum <- lm(
  log10(simobsfullhum$obsconc[
    !is.na(simobsfullhum$simconc) & 
    simobsfullhum$simconc > 0 & 
    simobsfullhum$obsconc > 0]) ~ 
  log10(simobsfullhum$simconc[
    !is.na(simobsfullhum$simconc) & 
    simobsfullhum$simconc > 0 & 
    simobsfullhum$obsconc > 0]))
unique(simobsfullhum$chem)

##  [1] "1,1,1,2-Tetrafluoroethane"            
##  [2] "1,1,1-Trichloroethane"                
##  [3] "1,1,2-Trichloro-1,2,2-trifluoroethane"
##  [4] "1,2,4-Trimethylbenzene"               
##  [5] "1,3-Butadiene"                        
##  [6] "1,4-Dioxane"                          
##  [7] "2-Butoxyethanol"                      
##  [8] "2H-Perfluoropropane"                  
##  [9] "Benzene"                              
## [10] "Bromotrifluoromethane"                
## [11] "Chlorobenzene"                        
## [12] "Dichlorodifluoromethane"              
## [13] "Dichloromethane"                      
## [14] "Ethanol"                              
## [15] "Ethyl T-butyl ether"                  
## [16] "Ethylbenzene"                         
## [17] "Isopropanol"                          
## [18] "Methyl ethyl ketone"                  
## [19] "Methyl tert-butyl ether"              
## [20] "N-Methyl-2-pyrrolidone"               
## [21] "Styrene"                              
## [22] "Tetrachloroethylene"                  
## [23] "Tetrahydrofuran"                      
## [24] "Trichloroethylene"                    
## [25] "Vinyl chloride"                       
## [26] "tert-Amyl methyl ether"

concregslope <- summary(lmall)$coef[2,1]
concregr2 <- summary(lmall)$r.squared
concregrmse <- sqrt(mean(lmall$residuals^2))
totalrmse <- sqrt(mean((
  log10(simobsfull$simconc[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc > 0 & 
    simobsfull$obsconc > 0]) - 
  log10(simobsfull$obsconc[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc > 0 & 
    simobsfull$obsconc > 0]))^2, 
   na.rm = T))
totalmae <- mean(abs(
  log10(simobsfull$simconc[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc > 0 & 
    simobsfull$obsconc > 0]) - 
  log10(simobsfull$obsconc[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc > 0 & 
    simobsfull$obsconc > 0])), 
  na.rm = T)
totalaic <- nrow(
  simobsfull[
    !is.na(simobsfull$simconc) & 
    simobsfull$simconc >0 & 
    simobsfull$obsconc > 
    0,]) *
  (log(2*pi) + 
     1 +
     log((sum(
       (simobsfull$obsconc[
         !is.na(simobsfull$simconc) & 
         simobsfull$simconc > 0 & 
         simobsfull$obsconc > 0] - 
       simobsfull$simconc[
         !is.na(simobsfull$simconc) & 
         simobsfull$simconc > 0 & 
         simobsfull$obsconc > 0])^2,
       na.rm=T) / 
     nrow(simobsfull[
       !is.na(simobsfull$simconc) & 
       simobsfull$simconc > 0 & 
       simobsfull$obsconc > 0,])))) + 
  ((44+1)*2) #44 is the number of parameters from inhalation_inits.R
mispred <- table(abs(
  log10(simobsfull$simconc) -
  log10(simobsfull$obsconc))>2 & 
  simobsfull$simconc>0)
mispred[2]

## TRUE 
##   98

mispred[2] / nrow(simobsfull[
  !is.na(simobsfull$simconc) & 
    simobsfull$simconc >0 & 
    simobsfull$obsconc > 0,])*100

##     TRUE 
## 6.064356

overpred <- table(
  log10(simobsfull$simconc) -
  log10(simobsfull$obsconc)>2 & 
  simobsfull$simconc>0)
overpred[2]

## TRUE 
##   11

overpred[2] / nrow(simobsfull[
  !is.na(simobsfull$simconc) & 
  simobsfull$simconc >0 & 
  simobsfull$obsconc > 0,])*100

##      TRUE 
## 0.6806931

underpred <- table(
  log10(simobsfull$obsconc) - 
  log10(simobsfull$simconc)>2 & 
  simobsfull$simconc>0)
underpred[2]

## TRUE 
##   87

underpred[2] / nrow(simobsfull[
  !is.na(simobsfull$simconc) & 
  simobsfull$simconc >0 & 
  simobsfull$obsconc > 0,])*100

##     TRUE 
## 5.383663

mispredhalf <- table(abs(
  log10(simobsfull$simconc) -
  log10(simobsfull$obsconc))>0.5 & 
  simobsfull$simconc>0)
mispredhalf[2]

## TRUE 
##  641

mispredhalf[2] / nrow(simobsfull[
  !is.na(simobsfull$simconc) & 
  simobsfull$simconc >0 & 
  simobsfull$obsconc > 0,])*100

##     TRUE 
## 39.66584

overpredhalf <- table(
  log10(simobsfull$simconc) - 
  log10(simobsfull$obsconc)>0.5 & 
  simobsfull$simconc>0)
overpredhalf[2]

## TRUE 
##  339

overpredhalf[2] / nrow(simobsfull[
  !is.na(simobsfull$simconc) & 
  simobsfull$simconc >0 & 
  simobsfull$obsconc > 0,])*100

##     TRUE 
## 20.97772

underpredhalf <- table(
  log10(simobsfull$obsconc) - 
  log10(simobsfull$simconc)>0.5 & 
  simobsfull$simconc>0)
underpredhalf[2]

## TRUE 
##  302

underpredhalf[2] / nrow(simobsfull[
  !is.na(simobsfull$simconc) & 
  simobsfull$simconc > 0 & 
  simobsfull$obsconc > 0,])*100

##     TRUE 
## 18.68812

chemunderpred <- subset(simobsfull,
  log10(simobsfull$simconc) -
  log10(simobsfull$obsconc) < 0 & 
  simobsfull$simconc > 0)
table(chemunderpred$chemclass) / table(simobsfull$chemclass)*100

## 
##                      Alcohol        Aliphatic hydrocarbon 
##                    30.000000                    13.698630 
##         Aromatic hydrocarbon                        Ether 
##                    35.520362                    50.000000 
## Fluorinated organic compound Halogenated organic compound 
##                     7.807808                    38.480097 
##                        Other 
##                    65.665236

Linakis et al. (2020): Analysis and Figure Generation

Matt Linakis

January 30, 2020

Abstract

Prepare for session

Load the relevant libraries

ANALYSIS

Data summary for chemical properties

Exposure scenarios

Observations and Predictions

Creation of simulated concentration/time plots

Regressions

TABLE AND PLOT GENERATION

Concentration vs. time

Figure 2: overall observed vs. predicted plot

Create and read out plots of overall cvt, cmax, and auc observed vs. pred

Figure 4: Cmax and AUC observed vs. Predicted Values

Figure 3: Separation by chemical class

Figures S1A-S1D: Separation by time quartile and physicochemical properties

Supplemental Table 2: Leave-one-out Chemical Sensitivity Analysis

Supplemental Table 1