library(santoku) x <- runif(10, 0, 10) (chopped <- chop(x, breaks = 0:10)) #> [1] [4, 5) [8, 9) [3, 4) [4, 5) [7, 8) [9, 10) [6, 7) [8, 9) [1, 2) #> [10] [4, 5) #> Levels: [1, 2) [3, 4) [4, 5) [6, 7) [7, 8) [8, 9) [9, 10) data.frame(x, chopped) #> x chopped #> 1 4.978305 [4, 5) #> 2 8.969989 [8, 9) #> 3 3.391823 [3, 4) #> 4 4.676785 [4, 5) #> 5 7.057042 [7, 8) #> 6 9.707687 [9, 10) #> 7 6.713807 [6, 7) #> 8 8.376589 [8, 9) #> 9 1.086165 [1, 2) #> 10 4.495479 [4, 5)

chopped <- chop(x, breaks = 3:7) data.frame(x, chopped) #> x chopped #> 1 4.978305 [4, 5) #> 2 8.969989 [7, 9.708] #> 3 3.391823 [3, 4) #> 4 4.676785 [4, 5) #> 5 7.057042 [7, 9.708] #> 6 9.707687 [7, 9.708] #> 7 6.713807 [6, 7) #> 8 8.376589 [7, 9.708] #> 9 1.086165 [1.086, 3) #> 10 4.495479 [4, 5)

x_fives <- x x_fives[1:5] <- 5 chopped <- chop(x_fives, c(2, 5, 5, 8)) data.frame(x_fives, chopped) #> x_fives chopped #> 1 5.000000 {5} #> 2 5.000000 {5} #> 3 5.000000 {5} #> 4 5.000000 {5} #> 5 5.000000 {5} #> 6 9.707687 [8, 9.708] #> 7 6.713807 (5, 8) #> 8 8.376589 [8, 9.708] #> 9 1.086165 [1.086, 2) #> 10 4.495479 [2, 5)

chopped <- chop_width(x, 2) data.frame(x, chopped) #> x chopped #> 1 4.978305 [3.086, 5.086) #> 2 8.969989 [7.086, 9.086) #> 3 3.391823 [3.086, 5.086) #> 4 4.676785 [3.086, 5.086) #> 5 7.057042 [5.086, 7.086) #> 6 9.707687 [9.086, 11.09) #> 7 6.713807 [5.086, 7.086) #> 8 8.376589 [7.086, 9.086) #> 9 1.086165 [1.086, 3.086) #> 10 4.495479 [3.086, 5.086)

chopped <- chop_evenly(x, intervals = 3) data.frame(x, chopped) #> x chopped #> 1 4.978305 [3.96, 6.834) #> 2 8.969989 [6.834, 9.708] #> 3 3.391823 [1.086, 3.96) #> 4 4.676785 [3.96, 6.834) #> 5 7.057042 [6.834, 9.708] #> 6 9.707687 [6.834, 9.708] #> 7 6.713807 [3.96, 6.834) #> 8 8.376589 [6.834, 9.708] #> 9 1.086165 [1.086, 3.96) #> 10 4.495479 [3.96, 6.834)

chopped <- chop_quantiles(x, c(0.25, 0.5, 0.75)) data.frame(x, chopped) #> x chopped #> 1 4.978305 [25%, 50%) #> 2 8.969989 (75%, 100%] #> 3 3.391823 [0%, 25%) #> 4 4.676785 [25%, 50%) #> 5 7.057042 [50%, 75%] #> 6 9.707687 (75%, 100%] #> 7 6.713807 [50%, 75%] #> 8 8.376589 (75%, 100%] #> 9 1.086165 [0%, 25%) #> 10 4.495479 [0%, 25%)

chopped <- chop_mean_sd(x) data.frame(x, chopped) #> x chopped #> 1 4.978305 [-1 sd, 0 sd) #> 2 8.969989 [1 sd, 2 sd) #> 3 3.391823 [-1 sd, 0 sd) #> 4 4.676785 [-1 sd, 0 sd) #> 5 7.057042 [0 sd, 1 sd) #> 6 9.707687 [1 sd, 2 sd) #> 7 6.713807 [0 sd, 1 sd) #> 8 8.376589 [0 sd, 1 sd) #> 9 1.086165 [-2 sd, -1 sd) #> 10 4.495479 [-1 sd, 0 sd)

tab_n(x, 4) #> x #> [1.086, 4.978) [4.978, 8.97) [8.97, 9.708] #> 4 4 2 tab_width(x, 2) #> x #> [1.086, 3.086) [3.086, 5.086) [5.086, 7.086) [7.086, 9.086) [9.086, 11.09) #> 1 4 2 2 1 tab_evenly(x, 5) #> x #> [1.086, 2.81) [2.81, 4.535) [4.535, 6.259) [6.259, 7.983) [7.983, 9.708] #> 1 2 2 2 3 tab_mean_sd(x) #> x #> [-2 sd, -1 sd) [-1 sd, 0 sd) [0 sd, 1 sd) [1 sd, 2 sd) #> 1 4 3 2

library(lubridate) #> #> Attaching package: 'lubridate' #> The following objects are masked from 'package:base': #> #> date, intersect, setdiff, union y2k <- as.Date("2000-01-01") + 0:365 months <- chop_width(y2k, months(1)) table(months) #> months #> [2000-01-01, 2000-02-01) [2000-02-01, 2000-03-01) [2000-03-01, 2000-04-01) #> 31 29 31 #> [2000-04-01, 2000-05-01) [2000-05-01, 2000-06-01) [2000-06-01, 2000-07-01) #> 30 31 30 #> [2000-07-01, 2000-08-01) [2000-08-01, 2000-09-01) [2000-09-01, 2000-10-01) #> 31 31 30 #> [2000-10-01, 2000-11-01) [2000-11-01, 2000-12-01) [2000-12-01, 2001-01-01) #> 31 30 31

Advanced usage

You can change factor labels with the labels argument:

chopped <- chop(x, c(2, 5, 8), labels = c("Lowest", "Low", "Higher", "Highest"))
data.frame(x, chopped)
#>           x chopped
#> 1  4.978305     Low
#> 2  8.969989 Highest
#> 3  3.391823     Low
#> 4  4.676785     Low
#> 5  7.057042  Higher
#> 6  9.707687 Highest
#> 7  6.713807  Higher
#> 8  8.376589 Highest
#> 9  1.086165  Lowest
#> 10 4.495479     Low

You need as many labels as there are intervals - one fewer than length(breaks) if your data doesn’t extend beyond breaks, one more than length(breaks) if it does.

To label intervals with a dash, use lbl_dash():

chopped <- chop(x, c(2, 5, 8), lbl_dash())
data.frame(x, chopped)
#>           x   chopped
#> 1  4.978305     2 - 5
#> 2  8.969989 8 - 9.708
#> 3  3.391823     2 - 5
#> 4  4.676785     2 - 5
#> 5  7.057042     5 - 8
#> 6  9.707687 8 - 9.708
#> 7  6.713807     5 - 8
#> 8  8.376589 8 - 9.708
#> 9  1.086165 1.086 - 2
#> 10 4.495479     2 - 5

To label intervals in order use lbl_seq():

chopped <- chop(x, c(2, 5, 8), lbl_seq())
data.frame(x, chopped)
#>           x chopped
#> 1  4.978305       b
#> 2  8.969989       d
#> 3  3.391823       b
#> 4  4.676785       b
#> 5  7.057042       c
#> 6  9.707687       d
#> 7  6.713807       c
#> 8  8.376589       d
#> 9  1.086165       a
#> 10 4.495479       b

You can use numerals or even roman numerals:

chop(x, c(2, 5, 8), lbl_seq("(1)"))
#>  [1] (2) (4) (2) (2) (3) (4) (3) (4) (1) (2)
#> Levels: (1) (2) (3) (4)
chop(x, c(2, 5, 8), lbl_seq("i."))
#>  [1] ii.  iv.  ii.  ii.  iii. iv.  iii. iv.  i.   ii. 
#> Levels: i. ii. iii. iv.

For arbitrary formatting use lbl_format() and sprintf-style format strings:

chopped <- chop(x, c(2, 5, 8), lbl_format("%s to %s"))
data.frame(x, chopped)
#>           x               chopped
#> 1  4.978305                2 to 5
#> 2  8.969989 8 to 9.70768669154495
#> 3  3.391823                2 to 5
#> 4  4.676785                2 to 5
#> 5  7.057042                5 to 8
#> 6  9.707687 8 to 9.70768669154495
#> 7  6.713807                5 to 8
#> 8  8.376589 8 to 9.70768669154495
#> 9  1.086165 1.08616470126435 to 2
#> 10 4.495479                2 to 5

Or use a formatter from the {scales} package:

library(scales)
#> 
#> Attaching package: 'scales'
#> The following object is masked from 'package:santoku':
#> 
#>     percent
r <- runif(10)
chopped <- chop(r, c(.3, .5, .7), lbl_intervals(fmt = label_percent(0.1)))
data.frame(r, chopped)
#>             r        chopped
#> 1  0.75792242 [70.0%, 98.4%]
#> 2  0.48151959 [30.0%, 50.0%)
#> 3  0.50041493 [50.0%, 70.0%)
#> 4  0.22192945  [5.7%, 30.0%)
#> 5  0.98363107 [70.0%, 98.4%]
#> 6  0.13919834  [5.7%, 30.0%)
#> 7  0.36513696 [30.0%, 50.0%)
#> 8  0.48477647 [30.0%, 50.0%)
#> 9  0.05707588  [5.7%, 30.0%)
#> 10 0.70421678 [70.0%, 98.4%]

By default, chop() extends breaks if necessary. If you don’t want that, set extend = FALSE:

chopped <- chop(x, c(3, 5, 7), extend = FALSE)
data.frame(x, chopped)
#>           x chopped
#> 1  4.978305  [3, 5)
#> 2  8.969989    <NA>
#> 3  3.391823  [3, 5)
#> 4  4.676785  [3, 5)
#> 5  7.057042    <NA>
#> 6  9.707687    <NA>
#> 7  6.713807  [5, 7)
#> 8  8.376589    <NA>
#> 9  1.086165    <NA>
#> 10 4.495479  [3, 5)

Data outside the range of breaks will become NA.

By default, intervals are closed on the left, i.e. they include their left endpoints. If you want right-closed intervals, set left = FALSE:

y <- 1:5
data.frame(
        y = y, 
        left_closed = chop(y, 1:5), 
        right_closed = chop(y, 1:5, left = FALSE)
      )
#>   y left_closed right_closed
#> 1 1      [1, 2)          {1}
#> 2 2      [2, 3)       (1, 2]
#> 3 3      [3, 4)       (2, 3]
#> 4 4      [4, 5)       (3, 4]
#> 5 5         {5}       (4, 5]

If you want to close off the last interval, set close_end = TRUE:

data.frame(
  y = y,
  rightmost_open = chop(y, 1:5),
  rightmost_closed   = chop(y, 1:5, close_end = TRUE)
)
#>   y rightmost_open rightmost_closed
#> 1 1         [1, 2)           [1, 2)
#> 2 2         [2, 3)           [2, 3)
#> 3 3         [3, 4)           [3, 4)
#> 4 4         [4, 5)           [4, 5]
#> 5 5            {5}           [4, 5]

Introduction to santoku

Installation

Basic usage

More ways to chop

Advanced usage