Skip to contents

Functional forms of special data.table operations.
These functions do not use Non-Standard Evaluation.
These functions also benefit from the security measures that 'squarebrackets' implements for the pass-by-reference semantics.

Usage

dt_aggregate(x, SDcols = NULL, f, by, order_by = FALSE)

dt_setcoe(
  x,
  col = NULL,
  vars = NULL,
  v,
  chkdup = getOption("squarebrackets.chkdup", FALSE)
)

dt_setrm(
  x,
  col = NULL,
  vars = NULL,
  chkdup = getOption("squarebrackets.chkdup", FALSE)
)

dt_setadd(x, new)

dt_setreorder(x, roworder = NULL, varorder = NULL)

Arguments

x

a data.table or tidytable.

SDcols

atomic vector, giving the columns to which the aggregation function f() is to be applied on.

f

the aggregation function

by

atomic vector, giving the grouping columns.

order_by

Boolean, indicating if the aggregated result should be ordered by the columns specified in by.

col, vars

see squarebrackets_indx_args.
Duplicates are not allowed.

v

the coercive transformation function

chkdup

see squarebrackets_options.
[for performance: set to FALSE]

new

a data.frame-like object.
It must have column names that do not already exist in x.

roworder

a integer vector of the same length as nrow(x), giving the order in which the rows are to be re-order. Internally, this numeric vector will be turned into an order using order, thus ensuring it is a strict permutation of 1:nrow(x).

varorder

integer or character vector of the same length as ncol(x), giving the new column order.
See data.table::setcolorder.

Value

For dt_aggregate():

The aggregated data.table object.


For the rest of the functions:

Returns: VOID. These functions modify the object by reference.

Do not use assignments like x <- dt_setcoe(x, ...).

Since these functions return void, you'll just get NULL.


Details

dt_setreorder(x, roworder = roworder) internally creates a new column to reorder the data.table by, and then removes the new column.
The column name is randomized, and extra care is given to ensure it does not overwrite any existing columns.

Examples

# dt_aggregate on sf-data.table ====

if(requireNamespace("sf")) {
  x <- sf::st_read(system.file("shape/nc.shp", package = "sf"))
  x <- data.table::as.data.table(x)
  
  x$region <- ifelse(x$CNTY_ID <= 2000, 'high', 'low')
  d.aggr <- dt_aggregate(
    x, SDcols = "geometry", f= sf::st_union, by = "region"
  )
  
  head(d.aggr)
}
#> Loading required namespace: sf
#> Reading layer `nc' from data source `D:\Programs\R-4.4.0\library\sf\shape\nc.shp' using driver `ESRI Shapefile'
#> Simple feature collection with 100 features and 14 fields
#> Geometry type: MULTIPOLYGON
#> Dimension:     XY
#> Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
#> Geodetic CRS:  NAD27
#>    region                       geometry
#>    <char>             <sfc_MULTIPOLYGON>
#> 1:   high MULTIPOLYGON (((-75.78317 3...
#> 2:    low MULTIPOLYGON (((-76.46926 3...



#############################################################################


# dt_setcoe ====

obj <- data.table::data.table(a = 1:10, b = letters[1:10], c = 11:20, d = factor(letters[1:10]))
str(obj) # notice that columns "a" and "c" are INTEGER (`int`)
#> Classes 'data.table' and 'data.frame':	10 obs. of  4 variables:
#>  $ a: int  1 2 3 4 5 6 7 8 9 10
#>  $ b: chr  "a" "b" "c" "d" ...
#>  $ c: int  11 12 13 14 15 16 17 18 19 20
#>  $ d: Factor w/ 10 levels "a","b","c","d",..: 1 2 3 4 5 6 7 8 9 10
#>  - attr(*, ".internal.selfref")=<externalptr> 
sb2_set(
  obj, filter = ~ (a >= 2) & (c <= 17), vars = is.numeric,
  tf = sqrt # WARNING: sqrt() results in `dbl`, but columns are `int`, so decimals lost
)
#> Warning: 1.414214 (type 'double') at RHS position 1 out-of-range(NA) or truncated (precision lost) when assigning to type 'integer' (column 1 named 'a')
#> Warning: 3.464102 (type 'double') at RHS position 1 out-of-range(NA) or truncated (precision lost) when assigning to type 'integer' (column 3 named 'c')
str(obj)
#> Classes 'data.table' and 'data.frame':	10 obs. of  4 variables:
#>  $ a: int  1 1 1 2 2 2 2 8 9 10
#>  $ b: chr  "a" "b" "c" "d" ...
#>  $ c: int  11 3 3 3 3 4 4 18 19 20
#>  $ d: Factor w/ 10 levels "a","b","c","d",..: 1 2 3 4 5 6 7 8 9 10
#>  - attr(*, ".internal.selfref")=<externalptr> 
obj <- data.table::data.table(a = 1:10, b = letters[1:10], c = 11:20, d = factor(letters[1:10]))
dt_setcoe(obj, vars = is.numeric, v = as.numeric) # integers are now numeric
str(obj)
#> Classes 'data.table' and 'data.frame':	10 obs. of  4 variables:
#>  $ a: num  1 2 3 4 5 6 7 8 9 10
#>  $ b: chr  "a" "b" "c" "d" ...
#>  $ c: num  11 12 13 14 15 16 17 18 19 20
#>  $ d: Factor w/ 10 levels "a","b","c","d",..: 1 2 3 4 5 6 7 8 9 10
#>  - attr(*, ".internal.selfref")=<externalptr> 
sb2_set(obj,
  filter = ~ (a >= 2) & (c <= 17), vars = is.numeric,
  tf = sqrt # SAFE: coercion performed; so no warnings
) 
str(obj)
#> Classes 'data.table' and 'data.frame':	10 obs. of  4 variables:
#>  $ a: num  1 1.41 1.73 2 2.24 ...
#>  $ b: chr  "a" "b" "c" "d" ...
#>  $ c: num  11 3.46 3.61 3.74 3.87 ...
#>  $ d: Factor w/ 10 levels "a","b","c","d",..: 1 2 3 4 5 6 7 8 9 10
#>  - attr(*, ".internal.selfref")=<externalptr> 


#############################################################################


# dt_setrm ====

obj <- data.table::data.table(
  a = 1:10, b = letters[1:10], c = 11:20, d = factor(letters[1:10])
)
str(obj)
#> Classes 'data.table' and 'data.frame':	10 obs. of  4 variables:
#>  $ a: int  1 2 3 4 5 6 7 8 9 10
#>  $ b: chr  "a" "b" "c" "d" ...
#>  $ c: int  11 12 13 14 15 16 17 18 19 20
#>  $ d: Factor w/ 10 levels "a","b","c","d",..: 1 2 3 4 5 6 7 8 9 10
#>  - attr(*, ".internal.selfref")=<externalptr> 
dt_setrm(obj, col = 1)
str(obj)
#> Classes 'data.table' and 'data.frame':	10 obs. of  3 variables:
#>  $ b: chr  "a" "b" "c" "d" ...
#>  $ c: int  11 12 13 14 15 16 17 18 19 20
#>  $ d: Factor w/ 10 levels "a","b","c","d",..: 1 2 3 4 5 6 7 8 9 10
#>  - attr(*, ".internal.selfref")=<externalptr> 

obj <- data.table::data.table(
  a = 1:10, b = letters[1:10], c = 11:20, d = factor(letters[1:10])
)
str(obj)
#> Classes 'data.table' and 'data.frame':	10 obs. of  4 variables:
#>  $ a: int  1 2 3 4 5 6 7 8 9 10
#>  $ b: chr  "a" "b" "c" "d" ...
#>  $ c: int  11 12 13 14 15 16 17 18 19 20
#>  $ d: Factor w/ 10 levels "a","b","c","d",..: 1 2 3 4 5 6 7 8 9 10
#>  - attr(*, ".internal.selfref")=<externalptr> 
dt_setrm(obj, vars = is.numeric)
str(obj)
#> Classes 'data.table' and 'data.frame':	10 obs. of  2 variables:
#>  $ b: chr  "a" "b" "c" "d" ...
#>  $ d: Factor w/ 10 levels "a","b","c","d",..: 1 2 3 4 5 6 7 8 9 10
#>  - attr(*, ".internal.selfref")=<externalptr> 


#############################################################################


# dt_setadd ====

obj <- data.table::data.table(
  a = 1:10, b = letters[1:10], c = 11:20, d = factor(letters[1:10])
)
new <- data.table::data.table(
  e = sample(c(TRUE, FALSE), 10, TRUE),
  f = sample(c(TRUE, FALSE), 10, TRUE)
)
dt_setadd(obj, new)
print(obj)
#>         a      b     c      d      e      f
#>     <int> <char> <int> <fctr> <lgcl> <lgcl>
#>  1:     1      a    11      a  FALSE  FALSE
#>  2:     2      b    12      b  FALSE   TRUE
#>  3:     3      c    13      c  FALSE  FALSE
#>  4:     4      d    14      d  FALSE  FALSE
#>  5:     5      e    15      e   TRUE   TRUE
#>  6:     6      f    16      f  FALSE  FALSE
#>  7:     7      g    17      g  FALSE  FALSE
#>  8:     8      h    18      h   TRUE   TRUE
#>  9:     9      i    19      i   TRUE  FALSE
#> 10:    10      j    20      j  FALSE  FALSE



#############################################################################


# dt_setreorder====

n <- 1e4
obj <- data.table::data.table(
  a = 1L:n, b = n:1L, c = as.double(1:n), d = as.double(n:1)
)
dt_setreorder(obj, roworder = n:1)
head(obj)
#>        a     b     c     d
#>    <int> <int> <num> <num>
#> 1: 10000     1 10000     1
#> 2:  9999     2  9999     2
#> 3:  9998     3  9998     3
#> 4:  9997     4  9997     4
#> 5:  9996     5  9996     5
#> 6:  9995     6  9995     6
dt_setreorder(obj, varorder = ncol(obj):1)
head(obj)
#>        d     c     b     a
#>    <num> <num> <int> <int>
#> 1:     1 10000     1 10000
#> 2:     2  9999     2  9999
#> 3:     3  9998     3  9998
#> 4:     4  9997     4  9997
#> 5:     5  9996     5  9996
#> 6:     6  9995     6  9995