Required packages

library(vinereg)
library(quantreg)
library(ggplot2)
library(dplyr)
library(purrr)
library(scales)

Plot function for marginal effects

plot_marginal_effects <- function(covs, preds) {
    cbind(covs, preds) %>%
        tidyr::gather(alpha, prediction, -seq_len(NCOL(covs))) %>%
        dplyr::mutate(prediction = as.numeric(prediction)) %>%
        tidyr::gather(variable, value, -(alpha:prediction)) %>%
        dplyr::mutate(value = as.numeric(value)) %>%
        ggplot(aes(value, prediction, color = alpha)) +
        geom_point(alpha = 0.15) + 
        geom_smooth(span = 0.5, se = FALSE) + 
        facet_wrap(~ variable, scale = "free_x") +
        theme(legend.position = "none") +
        theme(plot.margin = unit(c(0, 0, 0, 0), "mm")) +
        xlab("")
}

Data preparation

Load data

bikedata <- read.csv("day.csv")
bikedata[, 2] <- as.Date(bikedata[, 2])
head(bikedata)
##   instant     dteday season yr mnth holiday weekday workingday weathersit
## 1       1 2011-01-01      1  0    1       0       6          0          2
## 2       2 2011-01-02      1  0    1       0       0          0          2
## 3       3 2011-01-03      1  0    1       0       1          1          1
## 4       4 2011-01-04      1  0    1       0       2          1          1
## 5       5 2011-01-05      1  0    1       0       3          1          1
## 6       6 2011-01-06      1  0    1       0       4          1          1
##       temp    atemp      hum windspeed casual registered  cnt
## 1 0.344167 0.363625 0.805833 0.1604460    331        654  985
## 2 0.363478 0.353739 0.696087 0.2485390    131        670  801
## 3 0.196364 0.189405 0.437273 0.2483090    120       1229 1349
## 4 0.200000 0.212122 0.590435 0.1602960    108       1454 1562
## 5 0.226957 0.229270 0.436957 0.1869000     82       1518 1600
## 6 0.204348 0.233209 0.518261 0.0895652     88       1518 1606

Rename variables

bikedata  <- bikedata %>%
    rename(
        temperature = atemp, 
        month = mnth,
        weathersituation = weathersit,
        humidity = hum,
        count = cnt
    )

Un-normalize variables

See variable description on UCI web page.

bikedata <- bikedata %>%
    mutate(
        temperature = 66 * temperature + 16,
        windspeed = 67 * windspeed,
        humidity = 100 * humidity
    )

Show trend

ggplot(bikedata, aes(dteday, count)) +
    geom_line() + 
    scale_x_date(labels = scales::date_format("%b %y")) + 
    xlab("date") + 
    ylab("rental count") + 
    stat_smooth(method = "lm", se = FALSE, linetype = "dashed") + 
    theme(plot.title = element_text(lineheight = 0.8, face = "bold", size = 20)) +
    theme(text = element_text(size = 18))

plot of chunk count_with_trend

Remove trend

lm_trend <- lm(count ~ instant, data = bikedata)
trend <- predict(lm_trend)
bikedata <- mutate(bikedata, count = count / trend)
ggplot(bikedata, aes(dteday, count)) + 
    geom_line() + 
    scale_x_date(labels = scales::date_format("%b %y")) + 
    xlab("date") + 
    ylab("detrended rental count") + 
    theme(plot.title = element_text(lineheight = 0.8, face = "bold", size = 20)) + 
    theme(text = element_text(size = 18))

plot of chunk count_detrended

Drop useless variables

bikedata <- bikedata %>%
    select(-instant, -dteday, -yr) %>%  # time indices
    select(-casual, -registered) %>%    # casual + registered = count
    select(-holiday) %>%                # we use 'workingday' instead
    select(-temp)                       # we use 'temperature' (feeling temperature)

Declare discrete variables as ordered

disc_vars <- c("season", "month", "weekday", "workingday", "weathersituation")
bikedata <- bikedata %>%
    mutate(weekday = ifelse(weekday == 0, 7, weekday)) %>%  # sun at end of week
    purrr::modify_at(disc_vars, as.ordered)

D-vine regression model

Fit model

fit <- vinereg(
  count ~ ., 
  data = bikedata, 
  family = c("onepar", "tll"), 
  selcrit = "aic"
)
fit
## D-vine regression model: count | temperature, humidity, windspeed, month, season, weathersituation, weekday, workingday 
## nobs = 731, edf = 71.06, cll = 438.54, caic = -734.97, cbic = -408.5
summary(fit)
##                var      edf         cll       caic       cbic
## 1            count  9.59683 -198.076002  415.34567  459.43747
## 2      temperature 21.96537  415.808987 -787.68724 -686.76925
## 3         humidity 17.92558  118.991497 -202.13184 -119.77432
## 4        windspeed  1.00000   22.861612  -43.72322  -39.12881
## 5            month  1.00000   12.460300  -22.92060  -18.32619
## 6           season  1.00000   14.230519  -26.46104  -21.86662
## 7 weathersituation  1.00000   12.499591  -22.99918  -18.40477
## 8          weekday 16.56942   29.790776  -26.44271   49.68404
## 9       workingday  1.00000    9.973117  -17.94623  -13.35182
##         p_value
## 1            NA
## 2 1.067790e-161
## 3  2.034176e-40
## 4  1.361984e-11
## 5  5.974065e-07
## 6  9.560314e-08
## 7  5.735465e-07
## 8  9.172812e-07
## 9  7.965070e-06

In-sample predictions

alpha_vec <- c(0.1, 0.5, 0.9)
pred <- fitted(fit, alpha_vec)

Marginal effects

plot_marginal_effects(
    covs = select(bikedata, temperature), 
    preds = pred
)

plot of chunk me_temperature

plot_marginal_effects(covs = select(bikedata, humidity), preds = pred) +
    xlim(c(25, 100))

plot of chunk me_humidity

plot_marginal_effects(covs = select(bikedata, windspeed), preds = pred) 

plot of chunk me_windspeed

month_labs <- c("Jan","", "Mar", "", "May", "", "Jul", "", "Sep", "", "Nov", "")
plot_marginal_effects(covs = select(bikedata, month), preds = pred) +
        scale_x_discrete(limits = 1:12, labels = month_labs)

plot of chunk me_month

plot_marginal_effects(covs = select(bikedata, weathersituation), 
                      preds = pred) +
    scale_x_discrete(limits = 1:3,labels = c("good", "medium", "bad"))

plot of chunk me_weathersituation

weekday_labs <- c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")
plot_marginal_effects(covs = select(bikedata, weekday), preds = pred) +
    scale_x_discrete(limits = 1:7, labels = weekday_labs)

plot of chunk me_weekday

plot_marginal_effects(covs = select(bikedata, workingday), preds = pred) +
    scale_x_discrete(limits = 0:1, labels = c("no", "yes")) +
    geom_smooth(method = "lm", se = FALSE) +
    xlim(c(0, 1))

plot of chunk me_workingday

season_labs <- c("spring", "summer", "fall", "winter")
plot_marginal_effects(covs = select(bikedata, season), preds = pred) +
    scale_x_discrete(limits = 1:4, labels = season_labs)

plot of chunk me_season