<- matrix(rnorm(20), nrow = 5)
mat broadcaster(mat) <- TRUE
print(mat)
#> [,1] [,2] [,3] [,4]
#> [1,] 0.06334963 -1.6549865 -1.0760115 -1.1978546
#> [2,] 0.06083312 0.5972201 -1.0047298 -1.6750830
#> [3,] -1.15681477 -0.8810856 -0.5601713 1.1812576
#> [4,] 0.60728543 1.9747099 0.5555178 -0.6313586
#> [5,] -0.32136285 0.3607446 0.5870288 -1.5049918
#> broadcaster
Practical Applications
1 Introduction
Broadcasting comes up frequent enough in real world problems. This page gives a few examples of these.
2 Sweep
Anytime you would normally use sweep()
, you can now use broadcasting, which is faster and more memory-efficient.
Consider for example the following matrix:
We want to scale each column of this matrix, by subtracting its mean and dividing by its standard deviation.
This can be done using sweep()
like so:
<- sweep(mat, 2, colMeans(mat), FUN = "-")
scaled <- sweep(scaled, 2, matrixStats::colSds(mat), FUN = "/")
scaled print(scaled)
#> [,1] [,2] [,3] [,4]
#> [1,] 0.3256330 -1.2364178 -0.9475192 -0.3730935
#> [2,] 0.3217802 0.3692197 -0.8605201 -0.7850111
#> [3,] -1.5424519 -0.6846904 -0.3179374 1.6804272
#> [4,] 1.1584046 1.3512563 1.0437588 0.1158752
#> [5,] -0.2633660 0.2006322 1.0822180 -0.6381977
#> broadcaster
But it can be done much faster and more memory-efficiently with broadcasting like so:
<- matrix(colMeans(mat), nrow = 1L)
means <- matrix(matrixStats::colSds(mat), nrow = 1L)
sds broadcaster(means) <- broadcaster(sds) <- TRUE
<- (mat - means) / sds
scaled print(scaled)
#> [,1] [,2] [,3] [,4]
#> [1,] 0.3256330 -1.2364178 -0.9475192 -0.3730935
#> [2,] 0.3217802 0.3692197 -0.8605201 -0.7850111
#> [3,] -1.5424519 -0.6846904 -0.3179374 1.6804272
#> [4,] 1.1584046 1.3512563 1.0437588 0.1158752
#> [5,] -0.2633660 0.2006322 1.0822180 -0.6381977
#> broadcaster
The larger the matrix mat
becomes, the more advantageous it becomes to use broadcasting rather than sweeping.
3 Binding arrays along an arbitrary dimension
The abind()
function, from the package of the same name, allows one to bind arrays along any arbitrary dimensions (not just along rows or columns).
Unfortunately, abind()
does not support broadcasting, which can lead to frustrations such as the following:
<- array(1:27, c(3,3,3))
x <- array(1L, c(3,3,1))
y ::abind(x, y, along = 2)
abind
#> Error in abind(x, y, along = 2) :
#> arg 'X2' has dims=3, 3, 1; but need dims=3, X, 3
Here, abind()
is complaining about the dimensions not fitting perfectly.
But intuitively, binding x
and y
should be possible, with dimension 3
from array y
being broadcasted to size 3.
The bind_array() function provided by the ‘broadcast’ package can bind the arrays without problems:
<- array(1:27, c(3,3,3))
x <- array(1L, c(3,3,1))
y bind_array(list(x, y), 2)
#> , , 1
#>
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 1 4 7 1 1 1
#> [2,] 2 5 8 1 1 1
#> [3,] 3 6 9 1 1 1
#>
#> , , 2
#>
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 10 13 16 1 1 1
#> [2,] 11 14 17 1 1 1
#> [3,] 12 15 18 1 1 1
#>
#> , , 3
#>
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 19 22 25 1 1 1
#> [2,] 20 23 26 1 1 1
#> [3,] 21 24 27 1 1 1
bind_array() is also considerably faster and more memory efficient than abind()
.
4 Perform computation on all possible pairs
Suppose you have 2 vectors of character strings, and you want to concatenate all possible pairs of these strings.
In base , this would require a either a loop (which is slow), or repeating the vectors several times (which requires more memory), or use of the outer()
function (which is both slow and requires lots of memory).
The ’broadcasted way to do this, is to make the vectors orthogonal, and concatenate the strings of the orthogonal vectors - this is both fast and memory efficient.
For example:
<- array(letters[1:10], c(10, 1))
x <- array(letters[1:10], c(1, 10))
y
<- bc.str(x, y, "+")
out dimnames(out) <- list(x, y)
print(out)
#> a b c d e f g h i j
#> a "aa" "ab" "ac" "ad" "ae" "af" "ag" "ah" "ai" "aj"
#> b "ba" "bb" "bc" "bd" "be" "bf" "bg" "bh" "bi" "bj"
#> c "ca" "cb" "cc" "cd" "ce" "cf" "cg" "ch" "ci" "cj"
#> d "da" "db" "dc" "dd" "de" "df" "dg" "dh" "di" "dj"
#> e "ea" "eb" "ec" "ed" "ee" "ef" "eg" "eh" "ei" "ej"
#> f "fa" "fb" "fc" "fd" "fe" "ff" "fg" "fh" "fi" "fj"
#> g "ga" "gb" "gc" "gd" "ge" "gf" "gg" "gh" "gi" "gj"
#> h "ha" "hb" "hc" "hd" "he" "hf" "hg" "hh" "hi" "hj"
#> i "ia" "ib" "ic" "id" "ie" "if" "ig" "ih" "ii" "ij"
#> j "ja" "jb" "jc" "jd" "je" "jf" "jg" "jh" "ji" "jj"
5 Grouped Broadcasting
5.1 Casting with equal group sizes
We have a list of points (ranging from 0 to 100) students (n = 3) have achieved in for 2 homework exercises:
<- cbind(
x student = rep(1:3, each = 2),
homework = rep(1:2, 3),
points = sample(0:100, 6)
)print(x)
#> student homework points
#> [1,] 1 1 41
#> [2,] 1 2 98
#> [3,] 2 1 33
#> [4,] 2 2 80
#> [5,] 3 1 81
#> [6,] 3 2 19
However, the teacher has realised that the second homework assignment was way more difficult than the first, and thus decided the second home work assignments should be weigh more - 2 times more to be precise.
Thus the points for homework assignment 2 should be multiplied by 2. There are various ways to do this. For the sake of demonstration, an approach using acast() is shown here, as this is an example of a grouped broadcasted operation.
‘broadcast’ allows users to cast subsets of an array onto a new dimension, based on some grouping factor - in this case the homework ID is the grouping factor, and the following will do the job:
<- 1L # we cast from the rows, so margin = 1
margin <- as.factor(x[, "homework"]) # factor to define which rows belongs to which group
grp levels(grp) <- c("assignment 1", "assignment 2") # names for the new dimension
<- acast(x, margin, grp) # casting is performed here
out broadcaster(out) <- TRUE
print(out)
#> , , assignment 1
#>
#> student homework points
#> [1,] 1 1 41
#> [2,] 2 1 33
#> [3,] 3 1 81
#>
#> , , assignment 2
#>
#> student homework points
#> [1,] 1 2 98
#> [2,] 2 2 80
#> [3,] 3 2 19
#>
#> broadcaster
Notice that the dimension-names of the new dimension (dimension 3) are equal to levels(grp)
.
With the group-cast array, one can use broadcasting to easily do things like multiply the values in each group with a different value.
In this case, we need to multiply the values for assignment 2 with 2, and leave the rest as-is.
Like so:
# create the multiplication factor array:
<- array(
mult 1,
dim = c(1, 3, 2),
dimnames = list(
NULL,
c("mult_id", "mult_homework", "mult_points"),
c("assignment 1", "assignment 2")
)
)"mult_points", c("assignment 1", "assignment 2")] <- c(1, 2)
mult[, broadcaster(mult) <- TRUE
print(mult)
#> , , assignment 1
#>
#> mult_id mult_homework mult_points
#> [1,] 1 1 1
#>
#> , , assignment 2
#>
#> mult_id mult_homework mult_points
#> [1,] 1 1 2
#>
#> broadcaster
# grouped broadcasted operation:
<- out * mult
out2 dimnames(out2) <- dimnames(out)
print(out2)
#> , , assignment 1
#>
#> student homework points
#> [1,] 1 1 41
#> [2,] 2 1 33
#> [3,] 3 1 81
#>
#> , , assignment 2
#>
#> student homework points
#> [1,] 1 2 196
#> [2,] 2 2 160
#> [3,] 3 2 38
#>
#> broadcaster
Now the array needs to be reverse-cast back to its original shape.
Reverse-casting an array can be done be combining asplit()
with bind_array():
asplit(out2, ndim(out2)) |> bind_array(along = margin, name_along = FALSE)
#> student homework points
#> [1,] 1 1 41
#> [2,] 2 1 33
#> [3,] 3 1 81
#> [4,] 1 2 196
#> [5,] 2 2 160
#> [6,] 3 2 38
…though the order of, in this case, the rows (because margin = 1
) will not necessarily be the same as the original array.
5.2 Casting with unequal group sizes
The casting arrays also works when the groups have unequal sizes, though there are a few things to keep in mind.
Let’s start with a different input array:
<- cbind(
x id = c(rep(1:3, each = 2), 1),
grp = c(rep(1:2, 3), 2),
val = rnorm(7)
)print(x)
#> id grp val
#> [1,] 1 1 1.32757908
#> [2,] 1 2 -0.66655547
#> [3,] 2 1 1.00027544
#> [4,] 2 2 -0.16637384
#> [5,] 3 1 1.08953305
#> [6,] 3 2 -0.32089598
#> [7,] 1 2 0.07443137
Once again, the acast() to fill the gaps, otherwise an error is called.
Thus one can cast in this case like so:
<- as.factor(x[, 2])
grp levels(grp) <- c("a", "b")
<- 1L
margin <- acast(x, margin, grp, fill = TRUE)
out print(out)
#> , , a
#>
#> id grp val
#> [1,] 1 1 1.327579
#> [2,] 2 1 1.000275
#> [3,] 3 1 1.089533
#> [4,] NA NA NA
#>
#> , , b
#>
#> id grp val
#> [1,] 1 2 -0.66655547
#> [2,] 2 2 -0.16637384
#> [3,] 3 2 -0.32089598
#> [4,] 1 2 0.07443137
Notice that some values are missing ( NA
); if some groups have unequal number of elements, acast() needs to fill the gaps with missing values. By default, gaps are filled with NA
if x
is atomic, and with list(NULL)
if x
is recursive. The user can change the filling value through the fill_value
argument.
Once again, we can get the original array back when we’re done like so:
asplit(out, ndim(out)) |> bind_array(along = margin)
#> id grp val
#> a.1 1 1 1.32757908
#> a.2 2 1 1.00027544
#> a.3 3 1 1.08953305
#> a.4 NA NA NA
#> b.1 1 2 -0.66655547
#> b.2 2 2 -0.16637384
#> b.3 3 2 -0.32089598
#> b.4 1 2 0.07443137
… but we do keep the missing values when the groups have an unequal number of elements.
6 Manually compute confidence/credible interval for a spline
The sd_lc() function computes the standard deviation for a linear combination of random variables. One of its notable use-cases is to compute the confidence interval (or credible interval if you’re going Bayesian) of a spline.
Packages like ‘mgcv’ provides the user to plot and analyse the smoothers with confidence from a fitted GAM.
But not all packages are as user-friendly; for example the ‘INLA’ package, though very important for high-level statistical analyses with spatial-temporal correlation, is not very user-friendly in terms of producing the confidence interval of a spline.
So one practical application of sd_lc() is computing confidence intervals of splines when a package does not provide that service in a user-friendly way.
Please note that the sd_lc() functions is just a linear algebra function, not specific to any type/class of model; so this function assumes you know what you’re doing.
For a demonstration of sd_lc(), the following will be done:
- a GAM model will be fitted using the ‘mgcv’ package. The model will consist of several low-rank thin-plate (lrtp) smoothers.
- One of these lrtp splines will be plotted with credible intervals using the
plot()
method provided by ‘mgcv’ itself. - The sd_lc() function will be used to re-create compute the credible intervals of the spline from step 2 and re-create the plot.
For the sake of this demonstration, I’ll forgo some crucially mandatory steps in statistical modelling - like data exploration, model diagnostics, and model interpretation.
First, let’s create some data, fit a GAM on it, and plot the spline for variable “x2”:
<- mgcv::gamSim(7, n = 1000L, dist = "normal", scale = 2L)
d #> Gu & Wahba 4 term additive model, correlated predictors
<- mgcv::gam(y ~ s(x1) + s(x2) + s(x3), data = d)
m par(mfrow = c(1,1))
plot(m, select = 2)
Now extract the required model parameters to recreate this plot:
# get names of relevant coefficients:
<- names(coef(m))
coeffnames <- coeffnames[grepl("x2", coeffnames)]
coeffnames print(coeffnames)
#> [1] "s(x2).1" "s(x2).2" "s(x2).3" "s(x2).4" "s(x2).5" "s(x2).6" "s(x2).7"
#> [8] "s(x2).8" "s(x2).9"
# get necessary model parameters (i.e. X, b, vc)
# but only for relevant coefficients:
<- coef(m)[coeffnames]
b <- predict(m, type="lpmatrix")[, coeffnames, drop = FALSE]
X <- vcov(m)[coeffnames, coeffnames, drop = FALSE] vc
Computing the means of the spline is trivial:
<- X %*% b means
Computing the (~ 95%) credible interval is a bit trickier - at least if you want to avoid unnecessary copies/memory-usage when you have a large dataset, and don’t want to use a slow for-loop.
But using the sd_lc() function it becomes relatively easy to compute it very memory-efficiently, even with a very large dataset:
<- sd_lc(X, vc) # get standard deviations efficiently
st.devs <- 2 # mgcv uses multiplier of 2; approx. 95% credible interval
mult <- means - mult * st.devs
lower <- means + mult * st.devs upper
Now let’s plot the manually computed spline over the original one, and see how well our own estimate fits.
I’ll use transparent green dots for the mean, and transparent red dots for the credible interval:
# defines some colours:
<- rgb(red = 0, green = 1, blue = 0, alpha = 0.15)
colour1 <- rgb(red = 1, green = 0, blue = 0, alpha = 0.15)
colour2
# original plot from the 'mgcv' R-package:
par(mfrow = c(1,1))
plot(m, select = 2)
# our own re-creation plotted over it:
<- data.frame(
lc x = d$x2,
means = means,
lower = lower,
upper = upper
)<- lc[order(lc$x),]
lc points(x = lc$x, y = lc$mean, col = colour1) # plot our means
points(x = lc$x, y = lc$lower, col = colour2) # plot our lower bound
points(x = lc$x, y = lc$upper, col = colour2) # plot our upper bound
Perfect fit.
Since one can very easily re-create the plot using sd_lc(), one can also use a different plotting framework altogether.
Let’s use the ‘tinyplot’ framework to compute the plot manually:
library(tinyplot)
tinytheme("clean")
tinyplot( # plot confidence interval
x = lc$x, ymin = lc$lower, ymax = lc$upper, type = "ribbon",
xlab = "x2", ylab = "s(x2)",
)tinyplot( # plot means over it
x = lc$x, y = lc$means, type = "l",
add = TRUE, col = "green"
)