library(contextual)
library(data.table)
# Import personalization data-set
library(contextual); library(data.table)
dt <- fread("http://d1ie9wlkzugsxr.cloudfront.net/data_cmab_basic/data.txt")
# 0/1 reward, 10 arms, 100 features
# arms always start from 1
# z y x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 .. x100
# 1: 2 0 5 0 0 37 6 0 0 0 0 25 0 0 7 1 0 .. 0
# 2: 8 0 1 3 36 0 0 0 0 0 0 0 0 1 0 0 0 .. 10
# 3: . . . . . . . . . . . . . . . . . .. .
horizon <- nrow(dt)
simulations <- 1
# Set up formula: y ~ z | x1 + x2 + ..
# In bandit parlance: reward ~ arms | covariates or contextual features
f <- y ~ z | . - z
# Instantiate Replay Bandit (Li, 2010)
bandit <- OfflineReplayEvaluatorBandit$new(formula = f, data = dt)
# Bind Policies withs Bandits through Agents, add Agents to list
agents <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.01), bandit, "alpha = 0.01"),
Agent$new(LinUCBDisjointOptimizedPolicy$new(0.05), bandit, "alpha = 0.05"),
Agent$new(LinUCBDisjointOptimizedPolicy$new(0.1), bandit, "alpha = 0.1"),
Agent$new(LinUCBDisjointOptimizedPolicy$new(1.0), bandit, "alpha = 1.0"))
# Instantiate a Simulator
simulation <- Simulator$new(agents, horizon = nrow(dt), simulations = 1)
# Run the simulation.
history <- simulation$run()
# plot the results
plot(history, type = "cumulative", regret = FALSE, rate = TRUE,
legend_position = "bottomright", ylim = c(0,1))