Ring et al. (2017) Vignette 4: Generating Figure 3

This vignette contains code to generate Figure 3 from the paper: a plot of HTTK-Pop predicted median Css vs. literature median Css.

library('data.table')
library('ggplot2')
library('httk')

Next, load in the model-predicted median Css values for the Age 20-50 non-obese sub-population.

poormetab <- TRUE
fup.censored.dist <- TRUE
grp <- 'Age.20.50.nonobese'
model <- '3compartmentss'
popmethod <- 'dr'

m.dat <- readRDS(paste0('data/',
                        paste('allchems',
                              grp,
                              popmethod,
                              'poormetab',
                              poormetab,
                              'fup.censored.dist',
                              fup.censored.dist,
                              model,
                              "FuptoFub",
                              sep='_'),
                        '.Rdata'))

And read in the literature values of median Css. Start with data from Obach et al. 2008.

#Convert clearance in mL/min/kg to Css in mg/L for a dose of 1 mg/kg/day
Obach2008[,"Css (mg/L)"] <- 1/
  (24*60*
     as.numeric(Obach2008[,
                          "CL (mL/min/kg)"])/
     1000) # 1 mg/kg/day / L/day/kg -> mg/L
literature.Css <- Obach2008[,c("Name","CAS","Css (mg/L)")]
Obach2008.dt <- as.data.table(literature.Css)

Wetmore2012 <- Wetmore2012[c(1:2,13,3:12),]
Wetmore2012.Css <- Wetmore2012[,c("Chemical","CAS","Literature.Css")]
Wetmore2012.Css[, 'Literature.Css'] <- gsub(x=Wetmore2012.Css[,
                                                              'Literature.Css'], 
                                            pattern=',', 
                                            replacement='')
#strip commas from Css values for proper conversion
Wetmore2012.Css[, "Literature.Css"] <- as.numeric(Wetmore2012.Css[,
                                                                  "Literature.Css"])
colnames(Wetmore2012.Css) <- c("Name","CAS","Css (mg/L)")
Wetmore2012.Css <- Wetmore2012.Css[!is.na(Wetmore2012.Css[,"Css (mg/L)"]),]
Wetmore2012.dt <- as.data.table(Wetmore2012.Css)

litCss.dt <- rbind(Obach2008.dt, Wetmore2012.dt)
setnames(litCss.dt, c('Name', 'Css (mg/L)'), c('Compound', 'litcss'))

Convert literature in vivo Css from mg/L to umol/L (uM), using Css/(MW/1000), where MW is molecular weight in grams per mole of substance.

#MW = weight in grams per mole of substance
#MW*1000 = weight in mg per mole of substance
#MW*1000/10^6 = MW/1000 = weight in mg per umol of substance
#Css in mg/L divided by MW in mg/umol = umol/L = uM

#get MW from HTTK cheminfo
chemlist <- as.data.table(httk::get_cheminfo(info=c('CAS', 'Compound', 'MW'),
                                             species='Human',
                                             model=model,
                                             exclude.fup.zero=FALSE))
setnames(chemlist, 'CAS', 'chemcas')
litCss.dt <- merge(litCss.dt,
                   chemlist[, .(Compound, MW)],
                   by='Compound')
#Now merge with litcss
setnames(litCss.dt,
         'CAS',
         'chemcas')
litCss.dt[, litcss:=litcss/(MW/1000)]

Next, make a data table with the literature Css values in one column and the model-predicted Css values in another.

tmp.med <- merge(m.dat[, .(chemcas, css50)],
                 litCss.dt,
                 by=c('chemcas')
                 )

Do a linear regression on the log10-scaled data, to assess the correlation between model-predicted and literature Css.

mymodel <- lm(log10(css50)~log10(litcss), data=tmp.med)
summary(mymodel)

p <- ggplot(data=tmp.med)+
  geom_point(aes(y=css50, #Plot data points
                 x=litcss),
             shape=21, #As open circles
             size=4) +
  stat_smooth(data=tmp.med, #Plot best-fit line
              mapping=aes(x=litcss, y=css50), #data will be log10-transformed before this is done, so this is the same as the linear regression above
              method="lm",
              linetype=2, #dashed line
              size=1,
              color="black",
              alpha=0.25, #shading for confidence interval
              level=0.99) + #99% CI
  geom_abline(slope=1,intercept=0, #identity line
              linetype=1, size=1, color="black")+ 
  scale_x_log10() + #scale x data as log10
  scale_y_log10() + #scale y data as log10
  labs(x='Literature median Css (uM)', 
       y='HTTK-Pop median Css (uM)') +
  theme_bw()+
  theme(axis.text.x=element_text(size=rel(2), angle=45, hjust=1),
        axis.text.y=element_text(size=rel(2)),
        axis.title.x=element_text(size=rel(2)),
        axis.title.y=element_text(size=rel(2)))
print(p)

The two chemicals with the highest literature Css values look very influential. What are they?

#First find the chemicals with the two highest literature Css values
setorder(tmp.med, litcss) #sort ascending by literature Css
knitr::kable(tmp.med[(nrow(tmp.med)-1):nrow(tmp.med),
                     .(Compound, chemcas, css50, litcss, MW)],
             format="markdown",
             col.names=c("Compound", "CASRN", "Predicted median Css", "Literature median Css", "MW (g/mol)"))

tmp.med2 <- tmp.med[1:(nrow(tmp.med)-2), ] #remove the highest two lit Css chemicals
mymodel2 <- lm(log10(css50)~log10(litcss), data=tmp.med2) #re-do regression
summary(mymodel2)

p2 <- p + stat_smooth(data=tmp.med2,#Add new best-fit line
                      mapping=aes(x=litcss, y=css50),
                      method="lm",
                      color="black",
                      linetype=3, #dotted
                      size=1.2, #slightly increase thickness
                      level=0.99, #plot 99% CI shading
                      alpha=0.25)
print(p2)
ggsave(filename="pdf_figures/Figure3_FuptoFub.pdf",
       plot=p2,
       height=11, width=14)

The intercept and slope of the best-fit line don’t change all that much when we exclude PFOS and PFOA; the correlation (R^2) decreases somewhat, from 0.32 to 0.22.

To check whether the slopes of the regressions are significantly different from 1, check whether each model is significantly different from a corresponding model with a slope of 1.

offset.model <- lm(log10(css50)~offset(log10(litcss)), data=tmp.med) #forces slope of 1
summary(offset.model)
anova(mymodel, offset.model) #Compare original regression to offset model

offset.model2 <- lm(log10(css50)~offset(log10(litcss)), data=tmp.med2) #forces slope of 1
summary(offset.model2)
anova(mymodel2, offset.model2) #Compare second regression to offset model

Finally, to generate a table of the predicted values, literature values, and the difference between them, just add a column to the data table. Sort by the ratio between predicted and literature values to see which chemicals are underpredicted or overpredicted, and by how much.

Ring et al. (2017) Vignette 4: Generating Figure 3

Caroline Ring

2020-07-19