Ring et al. (2017) Vignette 4: Generating Figure 3

Caroline Ring

2020-07-19

This vignette contains code to generate Figure 3 from the paper: a plot of HTTK-Pop predicted median Css vs. literature median Css.

First, load some packages.

library('data.table')
library('ggplot2')
library('httk')

Next, load in the model-predicted median Css values for the Age 20-50 non-obese sub-population.

poormetab <- TRUE
fup.censored.dist <- TRUE
grp <- 'Age.20.50.nonobese'
model <- '3compartmentss'
popmethod <- 'dr'

m.dat <- readRDS(paste0('data/',
                        paste('allchems',
                              grp,
                              popmethod,
                              'poormetab',
                              poormetab,
                              'fup.censored.dist',
                              fup.censored.dist,
                              model,
                              "FuptoFub",
                              sep='_'),
                        '.Rdata'))

And read in the literature values of median Css. Start with data from Obach et al. 2008.

#Convert clearance in mL/min/kg to Css in mg/L for a dose of 1 mg/kg/day
Obach2008[,"Css (mg/L)"] <- 1/
  (24*60*
     as.numeric(Obach2008[,
                          "CL (mL/min/kg)"])/
     1000) # 1 mg/kg/day / L/day/kg -> mg/L
literature.Css <- Obach2008[,c("Name","CAS","Css (mg/L)")]
Obach2008.dt <- as.data.table(literature.Css)

Then add in some data from Wetmore et al. 2012.

Wetmore2012 <- Wetmore2012[c(1:2,13,3:12),]
Wetmore2012.Css <- Wetmore2012[,c("Chemical","CAS","Literature.Css")]
Wetmore2012.Css[, 'Literature.Css'] <- gsub(x=Wetmore2012.Css[,
                                                              'Literature.Css'], 
                                            pattern=',', 
                                            replacement='')
#strip commas from Css values for proper conversion
Wetmore2012.Css[, "Literature.Css"] <- as.numeric(Wetmore2012.Css[,
                                                                  "Literature.Css"])
colnames(Wetmore2012.Css) <- c("Name","CAS","Css (mg/L)")
Wetmore2012.Css <- Wetmore2012.Css[!is.na(Wetmore2012.Css[,"Css (mg/L)"]),]
Wetmore2012.dt <- as.data.table(Wetmore2012.Css)

Combine the Wetmore and Obach data

litCss.dt <- rbind(Obach2008.dt, Wetmore2012.dt)
setnames(litCss.dt, c('Name', 'Css (mg/L)'), c('Compound', 'litcss'))

Convert literature in vivo Css from mg/L to umol/L (uM), using Css/(MW/1000), where MW is molecular weight in grams per mole of substance.

#MW = weight in grams per mole of substance
#MW*1000 = weight in mg per mole of substance
#MW*1000/10^6 = MW/1000 = weight in mg per umol of substance
#Css in mg/L divided by MW in mg/umol = umol/L = uM

#get MW from HTTK cheminfo
chemlist <- as.data.table(httk::get_cheminfo(info=c('CAS', 'Compound', 'MW'),
                                             species='Human',
                                             model=model,
                                             exclude.fup.zero=FALSE))
setnames(chemlist, 'CAS', 'chemcas')
litCss.dt <- merge(litCss.dt,
                   chemlist[, .(Compound, MW)],
                   by='Compound')
#Now merge with litcss
setnames(litCss.dt,
         'CAS',
         'chemcas')
litCss.dt[, litcss:=litcss/(MW/1000)]

Next, make a data table with the literature Css values in one column and the model-predicted Css values in another.

tmp.med <- merge(m.dat[, .(chemcas, css50)],
                 litCss.dt,
                 by=c('chemcas')
                 )

Do a linear regression on the log10-scaled data, to assess the correlation between model-predicted and literature Css.

mymodel <- lm(log10(css50)~log10(litcss), data=tmp.med)
summary(mymodel)

Plot the data, along with the identity line and linear regression.

p <- ggplot(data=tmp.med)+
  geom_point(aes(y=css50, #Plot data points
                 x=litcss),
             shape=21, #As open circles
             size=4) +
  stat_smooth(data=tmp.med, #Plot best-fit line
              mapping=aes(x=litcss, y=css50), #data will be log10-transformed before this is done, so this is the same as the linear regression above
              method="lm",
              linetype=2, #dashed line
              size=1,
              color="black",
              alpha=0.25, #shading for confidence interval
              level=0.99) + #99% CI
  geom_abline(slope=1,intercept=0, #identity line
              linetype=1, size=1, color="black")+ 
  scale_x_log10() + #scale x data as log10
  scale_y_log10() + #scale y data as log10
  labs(x='Literature median Css (uM)', 
       y='HTTK-Pop median Css (uM)') +
  theme_bw()+
  theme(axis.text.x=element_text(size=rel(2), angle=45, hjust=1),
        axis.text.y=element_text(size=rel(2)),
        axis.title.x=element_text(size=rel(2)),
        axis.title.y=element_text(size=rel(2)))
print(p)

The two chemicals with the highest literature Css values look very influential. What are they?

#First find the chemicals with the two highest literature Css values
setorder(tmp.med, litcss) #sort ascending by literature Css
knitr::kable(tmp.med[(nrow(tmp.med)-1):nrow(tmp.med),
                     .(Compound, chemcas, css50, litcss, MW)],
             format="markdown",
             col.names=c("Compound", "CASRN", "Predicted median Css", "Literature median Css", "MW (g/mol)"))

Let’s see what the regression looks like without PFOS and PFOA.

tmp.med2 <- tmp.med[1:(nrow(tmp.med)-2), ] #remove the highest two lit Css chemicals
mymodel2 <- lm(log10(css50)~log10(litcss), data=tmp.med2) #re-do regression
summary(mymodel2)

And re-do the plot with the new regression, excluding PFOS and PFOA.

p2 <- p + stat_smooth(data=tmp.med2,#Add new best-fit line
                      mapping=aes(x=litcss, y=css50),
                      method="lm",
                      color="black",
                      linetype=3, #dotted
                      size=1.2, #slightly increase thickness
                      level=0.99, #plot 99% CI shading
                      alpha=0.25)
print(p2)
ggsave(filename="pdf_figures/Figure3_FuptoFub.pdf",
       plot=p2,
       height=11, width=14)

The intercept and slope of the best-fit line don’t change all that much when we exclude PFOS and PFOA; the correlation (R^2) decreases somewhat, from 0.32 to 0.22.

To check whether the slopes of the regressions are significantly different from 1, check whether each model is significantly different from a corresponding model with a slope of 1.

offset.model <- lm(log10(css50)~offset(log10(litcss)), data=tmp.med) #forces slope of 1
summary(offset.model)
anova(mymodel, offset.model) #Compare original regression to offset model

offset.model2 <- lm(log10(css50)~offset(log10(litcss)), data=tmp.med2) #forces slope of 1
summary(offset.model2)
anova(mymodel2, offset.model2) #Compare second regression to offset model

Both p-values indicate that the slope is significantly different from 1.

Finally, to generate a table of the predicted values, literature values, and the difference between them, just add a column to the data table. Sort by the ratio between predicted and literature values to see which chemicals are underpredicted or overpredicted, and by how much.

tmp.med[, predicted.over.lit:=css50/litcss]
setorder(tmp.med, predicted.over.lit)

How many are within 10-fold?

tmp.med[, sum(predicted.over.lit<=10 & predicted.over.lit>=0.1)]

How many are within 5-fold?

tmp.med[, sum(predicted.over.lit<=5 & predicted.over.lit>=0.2)]

How many are within 2-fold?

tmp.med[, sum(predicted.over.lit<=2 & predicted.over.lit>=0.5)]

How many are overpredicted?

tmp.med[, sum(predicted.over.lit>1)]

Compared to the total:

nrow(tmp.med)