noctua 1.7.1

Minor Change

Documentation:

noctua 1.7.0

New Feature

library(DBI)
con <- dbConnect(noctua::athena())
res <- dbExecute(con, "select * from some_big_table limit 10000")
dbFetch(res, 5000)

Bug Fix

Documentation

noctua 1.6.0

New Feature

library(DBI)

con = dbConnect(noctua::athena())

# upload iris dataframe for removal test
dbWriteTable(con, "iris2", iris)

# Athena method
system.time(dbRemoveTable(con, "iris2", confirm = T))
# user  system elapsed 
# 0.247   0.091   2.243 

# upload iris dataframe for removal test
dbWriteTable(con, "iris2", iris)

# Glue method
system.time(dbRemoveTable(con, "iris2", confirm = T))
# user  system elapsed 
# 0.110   0.045   1.094 
library(DBI)
con = dbConnect(RAthena::athena())
dbWriteTable(con, "iris2", iris, file.type = "json")
dbGetQuery(con, "select * from iris2")

Bug Fix

Documentation

Unit tests:

noctua 1.5.1

Bug Fix

library(readr)
library(microbenchmark)

# creating some dummy data for testing
X <- 1e8
df <- 
data.frame(
    w = runif(X),
    x = 1:X,
    y = sample(letters, X, replace = T), 
    z = sample(c(TRUE, FALSE), X, replace = T))
write_csv(df, "test.csv")

# read in text file into raw format
obj <- readBin("test.csv", what = "raw", n = file.size("test.csv"))

format(object.size(obj), units = "auto")
# 3.3 Gb

# writeBin in a loop
write_bin <- function(
  value,
  filename,
  chunk_size = 2L ^ 20L) {
  
  total_size <- length(value)
  split_vec <- seq(1, total_size, chunk_size)
  
  con <- file(filename, "a+b")
  on.exit(close(con))
  
  sapply(split_vec, function(x){writeBin(value[x:min(total_size,(x+chunk_size-1))],con)})
  invisible(TRUE)
}


microbenchmark(writeBin_loop = write_bin(obj, tempfile()),
               readr = write_file(obj, tempfile()),
               times = 5)

# Unit: seconds
# expr       min       lq      mean    median        uq       max neval
# R_loop 41.463273 41.62077 42.265778 41.908908 42.022042 44.313893     5
# readr  2.291571  2.40495  2.496871  2.542544  2.558367  2.686921     5
# Before
translate_sql("2019-01-01", con = con)
# '2019-01-01'

# Now
translate_sql("2019-01-01", con = con)
# DATE '2019-01-01'
# R code:
paste("hi", "bye", sep = "-")

# SQL translation:
('hi'||'-'||'bye')
library(DBI)
library(dplyr)

con <- dbConnect(RAthena::athena())

tbl(con, "iris") %>%
  compute(name = "temp.iris")

New Feature

library(DBI)
library(dplyr)

con <- dbConnect(noctua::athena())

# ident method:
t1 <- system.time(tbl(con, "iris"))

# sub query method:
t2 <- system.time(tbl(con, sql("select * from iris")))

# ident method
# user  system elapsed 
# 0.082   0.012   0.288 

# sub query method
# user  system elapsed 
# 0.993   0.138   3.660 

Unit test

noctua 1.5.0

New Feature

library(noctua)

noctua_options("vroom")

Unit tests

Documentation

noctua 1.4.0

Major Change

warning('Appended `file.type` is not compatible with the existing Athena DDL file type and has been converted to "', File.Type,'".', call. = FALSE)

Bug fix

Unit Tests

New Feature

Minor Change

noctua 1.3.0

Major Change

Performance results

library(DBI)
X <- 1e8
df <- data.frame(w =runif(X),
                 x = 1:X,
                 y = sample(letters, X, replace = T), 
                 z = sample(c(TRUE, FALSE), X, replace = T))
con <- dbConnect(noctua::athena())
# upload dataframe with different splits
dbWriteTable(con, "test_split1", df, compress = T, max.batch = nrow(df), overwrite = T) # no splits
dbWriteTable(con, "test_split2", df, compress = T, max.batch = 0.05 * nrow(df), overwrite = T) # 20 splits
dbWriteTable(con, "test_split3", df, compress = T, max.batch = 0.1 * nrow(df), overwrite = T) # 10 splits

AWS Athena performance results from AWS console (query executed: select count(*) from .... ):

library(DBI)
X <- 1e8
df <- data.frame(w =runif(X),
                 x = 1:X,
                 y = sample(letters, X, replace = T), 
                 z = sample(c(TRUE, FALSE), X, replace = T))
con <- dbConnect(noctua::athena())
dbWriteTable(con, "test_split1", df, compress = T, overwrite = T) # default will now split compressed file into 20 equal size files.

Added information message to inform user about what files have been added to S3 location if user is overwriting an Athena table.

Minor Change

Bug Fix

Unit tests

noctua 1.2.1

New Features:

Bug fixed

noctua 1.2.0

Minor Change

Backend Change

library(DBI)

con <- dbConnect(noctua::athena())

dbWriteTable(con, "iris", iris)

Bug Fix

Unit Tests

New Feature

Minor Change

noctua 1.1.0

New Features

Bug fix

Unit Tests

Minor Change

Major Change

noctua 1.0.0

New Features

DBI

Athena lower level api