Skip to contents
library(squarebrackets)
#> Run `?squarebrackets::squarebrackets_help` to open the introduction help page of 'squarebrackets'.

 

Introduction

Base R’s [<-, [[<-, $<- methods perform in-place modification on subsets of objects using “copy-on-modify” semantics. The ‘squarebrackets’ R-package provides 2 alternative semantics for modification: “pass-by-reference” through the sb_set() method, and “deep-copy” through the sb_mod() method. Moreover, where base ‘R’ provides direct replacement only, ‘squarebrackets’ provides both replacement (through the rp argument) and transformation (through the tf argument) mechanics. Thus, ‘squarebrackets’ and base R are not really directly comparable in terms of benchmarking. Nonetheless, I have tried to keep the comparisons somewhat fair.

The sb_set() method is generally several times (2 to 5 times) faster than base R’s in-place modification, and generally uses about half the memory. The sb_mod() method is generally about as fast as base R’s in-place modification, and uses about the same amount of memory.

Below are some benchmarks to give one an idea of the speed loss. These are just examples; speed is determined by a great number of factors. To keep comparisons between the classes fair, all objects have approximately 1e7 elements.

 

library(bench)
library(ggplot2)
library(patchwork)
library(tinycodet)
#> Run `?tinycodet::tinycodet` to open the introduction help page of 'tinycodet'.

plotfun <- function(title1, bm1, title2, bm2) {

  plotdat1 <- bm1 |> tidyr::unnest(cols = c("time", "gc", "mem_alloc"))
  plotdat1$expression <- paste(
    plotdat1$expression,
    "\n (", as.character(plotdat1$mem_alloc), ")"
  )
  p1 <- ggplot(plotdat1, aes_pro(x = ~ time, y = ~ expression, color = ~ gc)) +
  ggbeeswarm::geom_quasirandom() + ggtitle(title1)

  plotdat2 <- bm2 |> tidyr::unnest(cols = c("time", "gc", "mem_alloc"))
  plotdat2$expression <- paste(
    plotdat2$expression,
    "\n (", as.character(plotdat2$mem_alloc), ")"
  )
  p2 <- ggplot(plotdat2, aes_pro(x = ~ time, y = ~ expression, color = ~ gc)) +
  ggbeeswarm::geom_quasirandom() + ggtitle(title2)

  combined <- p1 + p2 & theme(legend.position = "bottom")
  combined + plot_layout(guides = "collect")
}

Atomic objects

Matrix


n <- 3162 # approx sqrt(1e7)
x.mat <- matrix(seq_len(n*n), ncol = n)
x.mat2 <- as.mutable_atomic(x.mat)
colnames(x.mat) <- sample(c(letters, LETTERS, NA), n, TRUE)
sel.rows <- 1:1000
sel.cols <- 1:1000
basefun <- function(x, rows, cols, tf) {
  x[rows, cols] <- tf(x[rows, cols])
  return(x)
}
base_plus_idx <- function(x, rows, cols, tf) {
  x[idx(x, n(rows, cols), 1:2)] <- tf(x[idx(x, n(rows, cols), 1:2)])
  return(x)
}
tf <- function(x) { return(-1 * x) }
bm.sb_tf.matrix <- bench::mark(
  "base [<-" =  basefun(x.mat, sel.rows, sel.cols, tf = tf),
  "idx + [<-" = base_plus_idx(x.mat, sel.rows, sel.cols, tf = tf),
  "sb_set" = sb_set(x.mat2, n(sel.rows, sel.cols), tf = tf),
  "sb_mod" = sb_mod(x.mat, n(sel.rows, sel.cols), tf = tf),
  check = FALSE,
  min_iterations = 500
)
bm.sb_tf.matrix
summary(bm.sb_tf.matrix)
#> # A tibble: 4 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 base [<-     23.5ms  25.68ms      38.5    87.7MB     2.03
#> 2 idx + [<-    26.2ms  29.08ms      33.7    95.4MB     2.38
#> 3 sb_set        5.4ms   6.35ms     153.     15.3MB     1.23
#> 4 sb_mod       23.6ms  25.74ms      32.3    87.7MB     1.63

 

Array (3D)


x.dims <- c(1900, 1900, 3) # leads to approx 1e7 elements
x.3d <- array(1:prod(x.dims), x.dims)
x.3d2 <- as.mutable_atomic(x.3d)
sel.rows <- 1:900
sel.lyrs <- c(TRUE, FALSE, TRUE)
basefun <- function(x, rows, lyrs, tf) {
  x[rows, , lyrs] <- tf(x[rows, , lyrs])
  return(x)
}
base_plus_idx <- function(x, rows, lyrs, tf) {
  x[idx(x, n(rows, lyrs), c(1, 3))] <- tf(x[idx(x, n(rows, lyrs), c(1, 3))])
  return(x)
}
tf <- function(x) { return(-1 * x) }
bm.sb_tf.3d <- bench::mark(
  "base [<-" = basefun(x.3d, sel.rows, sel.lyrs, tf = tf ),
  "idx + [<-" = base_plus_idx(x.3d, sel.rows, sel.lyrs, tf = tf),
  "sb_set" =  sb_set(x.3d2, n(sel.rows, sel.lyrs), c(1,3), tf = tf),
  "sb_mod" = sb_mod(x.3d, n(sel.rows, sel.lyrs), c(1, 3), tf = tf),
  check = FALSE,
  min_iterations = 500
)
summary(bm.sb_tf.3d)
#> # A tibble: 4 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 base [<-     30.7ms   32.7ms      29.9    67.4MB    1.05 
#> 2 idx + [<-    33.1ms   36.1ms      26.7    93.5MB    1.35 
#> 3 sb_set       21.2ms   22.5ms      43.9    26.1MB    0.624
#> 4 sb_mod       31.7ms   33.5ms      29.2    67.4MB    1.22

 

Plot

#> Warning: The `trans` argument of `continuous_scale()` is deprecated as of ggplot2 3.5.0.
#>  Please use the `transform` argument instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> Orientation inferred to be along y-axis; override with
#> `position_quasirandom(orientation = 'x')`
#> Orientation inferred to be along y-axis; override with
#> `position_quasirandom(orientation = 'x')`

 

Data.frame-like


n <- 1e5
ncol <- 200 # times 2
chrmat <- matrix(
  sample(letters, n*ncol, replace = TRUE), ncol = ncol
)
intmat <- matrix(
  seq.int(n*ncol), ncol = ncol
)
df <- cbind(chrmat, intmat) |> as.data.frame()
colnames(df) <- make.names(colnames(df), unique = TRUE)
dt <- data.table::as.data.table(df)
rm(list = c("chrmat", "intmat"))

sel.rows <- 1:1000
basefun <- function(x, rows, tf) {
  x[rows, sapply(x, is.numeric)] <- lapply(x[rows, sapply(x, is.numeric)], tf)
  return(x)
}
bm.sb_tf.df <- bench::mark(
  "base [<-" = basefun(df, sel.rows, tf = \(x) -1 * x),
  "sb_set" = sb2_set(
    dt, obs = sel.rows, vars = is.numeric, tf = \(x) -1 * x
  ),
  "sb_mod" = sb2_mod(
    df, obs = sel.rows, vars = is.numeric, tf = \(x) -1 * x
  ),
  check = FALSE,
  min_iterations = 500
)
summary(bm.sb_tf.df)
#> # A tibble: 3 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 base [<-    246.5µs    272µs     3357.   58.37KB        0
#> 2 sb_set       95.3µs    103µs     8688.    6.45KB        0
#> 3 sb_mod       95.3µs    108µs     8462.    6.45KB        0

 

#> Orientation inferred to be along y-axis; override with
#> `position_quasirandom(orientation = 'x')`