Runner examples

The most fundamental function in runner package is runner. With runner::runner one can apply any R function on running windows. This tutorial presents set of examples explaining how to tackle some tasks. Some of the examples are referenced to original topic on stack-overflow.

Number of unique elements in 7 days window

library(runner)

x <- sample(letters, 20, replace = TRUE)
date <- Sys.Date() + cumsum(sample(1:5, 20, replace = TRUE)) # unequaly spaced time series

runner(
  x, 
  k = "7 days", 
  idx = date, 
  f = function(x) length(unique(x))
)
##  [1] 1 2 3 2 2 2 2 3 3 4 4 3 3 3 4 3 2 2 2 2

weekly trimmed mean

library(runner)

x <- cumsum(rnorm(20))
date <- Sys.Date() + cumsum(sample(1:5, 20, replace = TRUE)) # unequaly spaced time series

runner(
  x, 
  k = "week", 
  idx = date, 
  f = function(x) mean(x, trim = 0.05)
)
##  [1] -1.25749691 -1.64713165 -2.02798213 -2.69369499 -3.18110392 -3.29694938
##  [7] -4.05063663 -3.85160954 -3.19867633 -2.57709721 -1.65170984 -1.54691603
## [13] -1.50351450 -1.02189013 -0.70260513 -0.14853595 -0.07376711 -0.54787677
## [19] -2.51124034 -2.29345682

Prediction on current day based on preceding 2-weeks regression

library(runner)

# sample data
x <- cumsum(rnorm(20))
data <- data.frame(
  date = Sys.Date() + cumsum(sample(1:3, 20, replace = TRUE)), # unequaly spaced time series, 
  y = 3 * x + rnorm(20), 
  x = cumsum(rnorm(20))
)

# solution
data$pred <- runner(
  data,
  lag = "1 days",
  k = "2 weeks",
  idx = date,
  f = function(data) {
    predict(
      lm(y ~ x, data = data)
    )[nrow(data)]
  }
)


plot(data$date, data$y, type = "l", col = "red")
lines(data$date, data$pred, col = "blue")

Rolling sums for groups with uneven time gaps

SO discussion

library(runner)
library(dplyr)

set.seed(3737)
df <- data.frame(
  user_id = c(rep(27, 7), rep(11, 7)),
  date = as.Date(rep(c('2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07', 
                       '2016-01-10', '2016-01-14', '2016-01-16'), 2)),
  value = round(rnorm(14, 15, 5), 1))

df %>%
  group_by(user_id) %>%
  mutate(
    v_minus7  = sum_run(value, 7, idx = date),
    v_minus14 = sum_run(value, 14, idx = date)
  )
## # A tibble: 14 x 5
## # Groups:   user_id [2]
##    user_id date       value v_minus7 v_minus14
##      <dbl> <date>     <dbl>    <dbl>     <dbl>
##  1      27 2016-01-01  15       15        15  
##  2      27 2016-01-03  22.4     37.4      37.4
##  3      27 2016-01-05  13.3     50.7      50.7
##  4      27 2016-01-07  21.9     72.6      72.6
##  5      27 2016-01-10  20.6     55.8      93.2
##  6      27 2016-01-14  18.6     39.2     112. 
##  7      27 2016-01-16  16.4     55.6     113. 
##  8      11 2016-01-01   6.8      6.8       6.8
##  9      11 2016-01-03  21.3     28.1      28.1
## 10      11 2016-01-05  19.8     47.9      47.9
## 11      11 2016-01-07  22       69.9      69.9
## 12      11 2016-01-10  19.4     61.2      89.3
## 13      11 2016-01-14  17.5     36.9     107. 
## 14      11 2016-01-16  19.3     56.2     119.

runner with dplyr

Unique for specified time frame

SO discussion

## # A tibble: 14 x 5
## # Groups:   user_id [2]
##    user_id date       category distinct_7 distinct_14
##      <int> <fct>      <fct>         <int>       <int>
##  1      27 2016-01-01 apple             1           1
##  2      27 2016-01-03 apple             1           1
##  3      27 2016-01-05 pear              2           2
##  4      27 2016-01-07 plum              3           3
##  5      27 2016-01-10 apple             3           3
##  6      27 2016-01-14 pear              2           3
##  7      27 2016-01-16 plum              3           3
##  8      11 2016-01-01 apple             1           1
##  9      11 2016-01-03 pear              2           2
## 10      11 2016-01-05 pear              2           2
## 11      11 2016-01-07 pear              2           2
## 12      11 2016-01-10 apple             2           2
## 13      11 2016-01-14 apple             1           2
## 14      11 2016-01-16 apple             1           2

runner with group_by mutate

## # A tibble: 20 x 6
## # Groups:   group [2]
##    date       group      y      x alpha_5  beta_5
##    <date>     <fct>  <dbl>  <dbl>   <dbl>   <dbl>
##  1 2020-05-17 a      0.368 0.0470  0.0470  0.0470
##  2 2020-05-20 a      0.457 0.998  -3.91   -3.91  
##  3 2020-05-23 a      3.73  0.967   1.00    1.00  
##  4 2020-05-26 a      3.70  1.26   34.5    34.5   
##  5 2020-05-28 a      4.76  1.98   -1.26   -1.26  
##  6 2020-05-30 a     10.9   3.52    0.365   0.365 
##  7 2020-06-02 a      6.13  2.39    0.948   0.948 
##  8 2020-06-04 a     12.0   4.06    0.659   0.659 
##  9 2020-06-06 a     11.0   3.58    0.711   0.711 
## 10 2020-06-08 a     12.6   4.49   -2.38   -2.38  
## 11 2020-06-09 b     12.9   3.60    3.60    3.60  
## 12 2020-06-10 b      8.03  2.76    1.39    1.39  
## 13 2020-06-11 b     15.8   4.68    0.768   0.768 
## 14 2020-06-14 b     15.8   5.26    0.492   0.492 
## 15 2020-06-15 b     20.8   6.95   -1.24   -1.24  
## 16 2020-06-17 b     22.7   7.67   -0.165  -0.165 
## 17 2020-06-19 b     19.0   6.55    0.862   0.862 
## 18 2020-06-21 b     20.7   6.48    0.583   0.583 
## 19 2020-06-24 b     14.6   5.18    2.05    2.05  
## 20 2020-06-26 b     16.8   5.14    5.46    5.46

Aggregating values from another data.frame in grouped_df

SO Discussion

## # A tibble: 900 x 4
## # Groups:   Company.name2 [2]
##    Company.name2 Event_date intercept   slope
##    <fct>         <date>         <dbl>   <dbl>
##  1 AAPL          2017-08-24   -0.0441  0.0247
##  2 AAPL          2019-04-21   -0.103   0.113 
##  3 AAPL          2016-06-23    0.0669 -0.0500
##  4 AAPL          2019-07-28   -0.0570  0.138 
##  5 AAPL          2016-05-27    0.0758 -0.0222
##  6 AAPL          2019-05-17   -0.0747  0.120 
##  7 AAPL          2015-11-13    0.0134 -0.0917
##  8 AAPL          2019-06-12   -0.0503  0.102 
##  9 AAPL          2017-03-14    0.0327 -0.0526
## 10 AAPL          2018-03-15    0.0252 -0.0734
## # … with 890 more rows