Skip to contents
library(squarebrackets)
#> Run `?squarebrackets::squarebrackets_help` to open the introduction help page of 'squarebrackets'.

 

Introduction

Base R’s [<-, [[<-, $<- methods perform in-place modification on subsets of objects using “copy-on-modify” semantics. The ‘squarebrackets’ R-package provides 2 alternative semantics for modification: “pass-by-reference” through the sb_set() method, and “deep-copy” through the sb_mod() method. Moreover, where base ‘R’ provides direct replacement only, ‘squarebrackets’ provides both replacement (through the rp argument) and transformation (through the tf argument) mechanics. Thus, ‘squarebrackets’ and base R are not really directly comparable in terms of benchmarking. Nonetheless, I have tried to keep the comparisons somewhat fair.

The sb_set() method is generally several times (2 to 5 times) faster than base R’s in-place modification, and generally uses about half the memory. The sb_mod() method is generally about as fast as base R’s in-place modification, and uses about the same amount of memory.

Below are some benchmarks to give one an idea of the speed loss. These are just examples; speed is determined by a great number of factors. To keep comparisons between the classes fair, all objects have approximately 1e7 elements.

 

library(bench)
library(ggplot2)
library(patchwork)
library(tinycodet)
#> Run `?tinycodet::tinycodet` to open the introduction help page of 'tinycodet'.

plotfun <- function(title1, bm1, title2, bm2) {

  plotdat1 <- bm1 |> tidyr::unnest(cols = c("time", "gc", "mem_alloc"))
  plotdat1$expression <- paste(
    plotdat1$expression,
    "\n (", as.character(plotdat1$mem_alloc), ")"
  )
  p1 <- ggplot(plotdat1, aes_pro(x = ~ time, y = ~ expression, color = ~ gc)) +
  ggbeeswarm::geom_quasirandom() + ggtitle(title1)

  plotdat2 <- bm2 |> tidyr::unnest(cols = c("time", "gc", "mem_alloc"))
  plotdat2$expression <- paste(
    plotdat2$expression,
    "\n (", as.character(plotdat2$mem_alloc), ")"
  )
  p2 <- ggplot(plotdat2, aes_pro(x = ~ time, y = ~ expression, color = ~ gc)) +
  ggbeeswarm::geom_quasirandom() + ggtitle(title2)

  combined <- p1 + p2 & theme(legend.position = "bottom")
  combined + plot_layout(guides = "collect")
}

Atomic objects

Matrix


n <- 3162 # approx sqrt(1e7)
x.mat <- matrix(seq_len(n*n), ncol = n)
x.mat2 <- as.mutable_atomic(x.mat)
colnames(x.mat) <- sample(c(letters, LETTERS, NA), n, TRUE)
sel.rows <- 1:1000
sel.cols <- 1:1000
basefun <- function(x, rows, cols, tf) {
  x[rows, cols] <- tf(x[rows, cols])
  return(x)
}
tf <- function(x) { return(-1 * x) }
bm.sb_tf.matrix <- bench::mark(
  "base [<-" =  basefun(x.mat, sel.rows, sel.cols, tf = tf),
  "sb_set" = sb_set(x.mat2, sel.rows, sel.cols, tf = tf),
  "sb_mod" = sb_mod(x.mat, sel.rows, sel.cols, tf = tf),
  check = FALSE,
  min_iterations = 500
)
bm.sb_tf.matrix
summary(bm.sb_tf.matrix)
#> # A tibble: 3 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 base [<-    23.86ms  29.02ms      33.9    87.7MB     46.4
#> 2 sb_set       4.83ms   6.07ms     165.     15.4MB     14.7
#> 3 sb_mod      24.82ms  29.69ms      32.5    87.8MB     29.5

 

Array (3D)


x.dims <- c(1900, 1900, 3) # leads to approx 1e7 elements
x.3d <- array(1:prod(x.dims), x.dims)
x.3d2 <- as.mutable_atomic(x.3d)
sel.rows <- 1:900
sel.lyrs <- c(TRUE, FALSE, TRUE)
basefun <- function(x, rows, lyrs, tf) {
  x[rows, , lyrs] <- tf(x[rows, , lyrs])
  return(x)
}
tf <- function(x) { return(-1 * x) }
bm.sb_tf.3d <- bench::mark(
  "base [<-" = basefun(x.3d, sel.rows, sel.lyrs, tf = tf ),
  "sb_set" =  sb_set(x.3d2, n(sel.rows, sel.lyrs), c(1,3), tf = tf),
  "sb_mod" = sb_mod(x.3d, n(sel.rows, sel.lyrs), c(1, 3), tf = tf),
  check = FALSE,
  min_iterations = 500
)
summary(bm.sb_tf.3d)
#> # A tibble: 3 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 base [<-     48.5ms     52ms      19.0   121.8MB     44.8
#> 2 sb_set       22.2ms   26.9ms      37.1    52.3MB     10.9
#> 3 sb_mod       45.5ms   52.9ms      18.8   121.8MB     21.9

 

Plot

#> Warning: The `trans` argument of `continuous_scale()` is deprecated as of ggplot2 3.5.0.
#>  Please use the `transform` argument instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> Orientation inferred to be along y-axis; override with
#> `position_quasirandom(orientation = 'x')`
#> Orientation inferred to be along y-axis; override with
#> `position_quasirandom(orientation = 'x')`

 

Data.frame-like


n <- 1e5
ncol <- 200 # times 2
chrmat <- matrix(
  sample(letters, n*ncol, replace = TRUE), ncol = ncol
)
intmat <- matrix(
  seq.int(n*ncol), ncol = ncol
)
x <- cbind(chrmat, intmat) |> as.data.frame()
colnames(x) <- make.names(colnames(x), unique = TRUE)
x2 <- data.table::as.data.table(x)
rm(list = c("chrmat", "intmat"))

sel.rows <- 1:1000
basefun <- function(x, rows, tf) {
  x[rows, sapply(x, is.numeric)] <- lapply(x[rows, sapply(x, is.numeric)], tf)
  return(x)
}
bm.sb_tf.df <- bench::mark(
  "base [<-" = basefun(x, sel.rows, tf = \(x) -1 * x),
  "sb_set" = sb2_set.data.table(
    x2, rows = sel.rows, vars = is.numeric, tf = \(x) -1 * x
  ),
  "sb_mod" = sb2_mod.data.frame(
    x, rows = sel.rows, vars = is.numeric, tf = \(x) -1 * x, coe = TRUE
  ),
  check = FALSE,
  min_iterations = 500
)
summary(bm.sb_tf.df)
#> # A tibble: 3 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 base [<-    237.9µs    307µs     3060.    62.3KB     12.3
#> 2 sb_set       84.3µs    122µs     7339.   932.8KB      0  
#> 3 sb_mod      100.5µs    131µs     7400.    46.7KB      0

 

#> Orientation inferred to be along y-axis; override with
#> `position_quasirandom(orientation = 'x')`