Calculate matrix stats
Details
The statistics will be calculated in a single pass over the matrix, so this method is desirable to use for efficiency purposes compared to the more standard rowMeans or colMeans if multiple statistics are needed. The stats are ordered by complexity: nonzero, mean, then variance. All less complex stats are calculated in the process of calculating a more complicated stat. So to calculate mean and variance simultaneously, just ask for variance, which will compute mean and nonzero counts as a side-effect
Examples
mat <- matrix(rpois(100, lambda = 5), nrow = 10)
rownames(mat) <- paste0("gene", 1:10)
colnames(mat) <- paste0("cell", 1:10)
mat <- mat %>% as("dgCMatrix") %>% as("IterableMatrix")
## By default, no row or column stats are calculated
res_none <- matrix_stats(mat)
res_none
#> $row_stats
#> gene1 gene2 gene3 gene4 gene5 gene6 gene7 gene8 gene9 gene10
#>
#> $col_stats
#> cell1 cell2 cell3 cell4 cell5 cell6 cell7 cell8 cell9 cell10
#>
## Request row variance (automatically computes mean and nonzero too)
res_row_var <- matrix_stats(mat, row_stats = "variance")
res_row_var
#> $row_stats
#> gene1 gene2 gene3 gene4 gene5 gene6 gene7
#> nonzero 10.000000 10.000000 10.00000 10.000000 10.000000 10.000000 10.000000
#> mean 6.000000 5.200000 5.40000 4.800000 5.700000 5.800000 7.000000
#> variance 5.555556 1.733333 10.93333 3.288889 6.677778 3.511111 5.555556
#> gene8 gene9 gene10
#> nonzero 10.000000 10.000000 10.000000
#> mean 4.200000 3.500000 4.800000
#> variance 3.288889 3.388889 5.288889
#>
#> $col_stats
#> cell1 cell2 cell3 cell4 cell5 cell6 cell7 cell8 cell9 cell10
#>
## Request both row variance and column variance
res_both_var <- matrix_stats(
mat = mat,
row_stats = "variance",
col_stats = "mean"
)
res_both_var
#> $row_stats
#> gene1 gene2 gene3 gene4 gene5 gene6 gene7
#> nonzero 10.000000 10.000000 10.00000 10.000000 10.000000 10.000000 10.000000
#> mean 6.000000 5.200000 5.40000 4.800000 5.700000 5.800000 7.000000
#> variance 5.555556 1.733333 10.93333 3.288889 6.677778 3.511111 5.555556
#> gene8 gene9 gene10
#> nonzero 10.000000 10.000000 10.000000
#> mean 4.200000 3.500000 4.800000
#> variance 3.288889 3.388889 5.288889
#>
#> $col_stats
#> cell1 cell2 cell3 cell4 cell5 cell6 cell7 cell8 cell9 cell10
#> nonzero 10.0 10.0 10.0 10 10.0 10.0 10.0 10.0 10.0 10.0
#> mean 4.5 4.9 6.5 5 4.3 5.1 5.8 5.4 5.4 5.5
#>