library(squarebrackets)
#> Run `?squarebrackets::squarebrackets_help` to open the introduction help page of 'squarebrackets'.
Introduction
Due to the many checks and conversions performed by the
squarebrackets::
functions, to make sub-setting more
programmatically and beginner friendly, the functions are almost
necessarily slower than base R’s [
-like operators.
However, a considerable effort was made to keep the speed loss to a minimum. Generally, the speed loss is indeed negligible, and in some cases there is even speed improvement (thanks to the heavy lifting performed by the ‘collapse’ package).
Below are some benchmarks to give one an idea of the speed loss. These are just examples; speed is determined by a great number of factors.
Atomic objects
Matrix
n <- 5e3
x.mat <- matrix(seq_len(n*n), ncol = n)
colnames(x.mat) <- sample(c(letters, NA), n, TRUE)
sel.rows <- 1:100
sel.cols <- rep(sample(letters[1:13]), 10)
bm.sb_x.matrix <- bench::mark(
"sb_x" = sb_x(x.mat, sel.rows, sel.cols),
"base R" = x.mat[sel.rows, lapply(sel.cols, \(i) which(colnames(x.mat) == i)) |> unlist(), drop = FALSE],
min_iterations = 500
)
bm.sb_x.matrix
summary(bm.sb_x.matrix)
#> # A tibble: 2 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 squarebrackets 4.99ms 5.56ms 177. 9.82MB 28.3
#> 2 base R 7.81ms 8.9ms 111. 14.67MB 16.6
Array (3D)
x.dims <- c(5000, 2000, 4)
x.3d <- array(1:prod(x.dims), x.dims)
sel.rows <- 1:900
sel.lyrs <- c(TRUE, FALSE, TRUE, FALSE)
bm.sb_x.3d <- bench::mark(
"squarebrackets" = sb_x(x.3d, n(sel.rows, sel.lyrs), c(1,3)),
"base R + abind" = abind::asub(x.3d, idx = list(sel.rows, sel.lyrs), dims = c(1,3)),
min_iterations = 500
)
summary(bm.sb_x.3d)
#> # A tibble: 2 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 squarebrackets 9.64ms 10.6ms 94.1 13.7MB 7.52
#> 2 base R + abind 9.7ms 10.7ms 92.8 13.7MB 6.56
Data.frame-like objects
data.frame
n <- 1e5
chrmat <- matrix(
sample(letters, n*400, replace = TRUE), ncol = 400
)
intmat <- matrix(
seq.int(n*400), ncol = 400
)
x <- cbind(chrmat, intmat) |> as.data.frame()
rm(list = c("chrmat", "intmat"))
colnames(x) <- make.names(colnames(x), unique = TRUE)
sel.cols <- rep(sample(names(x), 10), 4)
sel.rows <- 1:1000
bm.sb_x.df <- bench::mark(
"squarebrackets" = sb2_x(x, sel.rows, sel.cols),
"collapse::ss" = collapse::ss(x, sel.rows, match(sel.cols, names(x))),
"base R" = x[sel.rows, match(sel.cols, names(x)), drop = FALSE],
min_iterations = 1e4
)
summary(bm.sb_x.df)
#> # A tibble: 2 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 squarebrackets 124µs 193µs 4466. 441KB 8.95
#> 2 base R 345µs 879µs 1052. 362KB 2.11
data.table
x <- as.data.table(x)
tempfun <- function(x, i, j) {
x <- collapse::ss(x, i, j, check = TRUE)
names(x) <- make.names(names(x), unique = TRUE)
return(x)
}
bm.sb_x.dt <- bench::mark(
"squarebrackets" = sb2_x(x, sel.rows, sel.cols),
"data.table + collapse" = tempfun(x, sel.rows, match(sel.cols, names(x))),
min_iterations = 1e4
)
summary(bm.sb_x.dt)
#> # A tibble: 2 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 squarebrackets 112µs 125µs 7610. 324KB 4.57
#> 2 data.table + collapse 106µs 124µs 5508. 339KB 0