Skip to contents

Write insertion counts data for one or more pseudobulks to bed/bedgraph format. Beds only hold chrom, start, and end data, while bedGraphs also provide a score column. This reports the total number of insertions at each basepair for each group listed in cell_groups.

Usage

write_insertion_bedgraph(
  fragments,
  path,
  cell_groups = rlang::rep_along(cellNames(fragments), "all"),
  insertion_mode = c("both", "start_only", "end_only"),
  tile_width = 1,
  normalization_method = c("none", "cpm", "n_cells"),
  chrom_sizes = NULL
)

write_insertion_bed(
  fragments,
  path,
  cell_groups = rlang::rep_along(cellNames(fragments), "all"),
  insertion_mode = c("both", "start_only", "end_only"),
  verbose = FALSE,
  threads = 1
)

Arguments

fragments

IterableFragments object

path

(character vector) Path(s) to save bed/bedgraphs to, optionally ending in ".gz" to add gzip compression. If cell_groups is provided, path must be a named character vector, with one name for each level in cell_groups

cell_groups

Character or factor assigning a group to each cell, in order of cellNames(fragments)

insertion_mode

(string) Which fragment ends to use for coverage calculation. One of "both", "start_only", or "end_only"

tile_width

(integer) Width of tiles to use for binning insertions. All insertions in a single bin are summed. If tile_width is 1, then this is functionally equivalent to write_insertion_bedgraph().

normalization_method

(character) Normalization method to use. One of:

  • none: No normalization

  • cpm: Normalize by total number of fragments in each group, scaling to 1 million fragments (i.e. CPM).

  • n_cells: Normalize by total number of cells in each group.

chrom_sizes

(GRanges, data.frame, list, numeric, or NULL) Chromosome sizes to clip tiles when at the end of a chromosome. If NULL, then tile_width is required to be 1. If a data.frame or list, must contain columns chr and end (See help("genomic-ranges-like")). If a numeric vector, then it is assumed to be the chromosome sizes in the order of chrNames(fragments).

verbose

(bool) Whether to provide verbose progress output to console.

threads

(int) Number of threads to use.

Value

NULL

Examples

## Prep data
frags <- get_demo_frags()
bedgraph_outputs <- file.path(tempdir(), "bedgraph_outputs")

######################################################
## `write_insertion_bedgraph()` examples
######################################################
## Write insertions
write_insertion_bedgraph(frags, file.path(bedgraph_outputs, "all.tar.gz"))
list.files(bedgraph_outputs)
#> [1] "all.tar.gz"

# With tiling
chrom_sizes <- read_ucsc_chrom_sizes("./reference", genome="hg38") %>% 
  dplyr::filter(chr %in% c("chr4", "chr11"))
write_insertion_bedgraph(frags, file.path(bedgraph_outputs, "all_tiled.bedGraph"),
  chrom_sizes = chrom_sizes, normalization_method = "cpm", tile_width = 100)
reads <- readr::read_tsv(file.path(bedgraph_outputs, "all_tiled.bedGraph"),
  col_names = c("chr", "start", "end", "score"),
  show_col_types = FALSE)
head(reads)
#> # A tibble: 6 × 4
#>   chr   start   end score
#>   <chr> <dbl> <dbl> <dbl>
#> 1 chr4  10000 10100 1.45 
#> 2 chr4  10100 10200 0.869
#> 3 chr4  10300 10400 0.290
#> 4 chr4  10400 10500 0.145
#> 5 chr4  10600 10700 0.434
#> 6 chr4  11100 11200 0.145


######################################################
## `write_insertion_bed()` examples
######################################################

# We utilize two groups this time
bed_outputs <- file.path(tempdir(), "bed_outputs")
cell_groups <- rep(c("A", "B"), length.out = length(cellNames(frags)))
bed_paths <- c(file.path(bed_outputs, "A.bed"), file.path(bed_outputs, "B.bed"))
names(bed_paths) <- c("A", "B")
write_insertion_bed(
  frags, path = bed_paths, cell_groups = cell_groups,
  verbose = TRUE
)
#> 2025-09-16 00:23:30 Writing bed file for cluster: A
#> 2025-09-16 00:23:31 Bed file for cluster: A written to: /tmp/Rtmp9Szcu7/bed_outputs/A.bed
#> 2025-09-16 00:23:31 Writing bed file for cluster: B
#> 2025-09-16 00:23:32 Bed file for cluster: B written to: /tmp/Rtmp9Szcu7/bed_outputs/B.bed
#> 2025-09-16 00:23:32 Finished writing bed files
list.files(bed_outputs)
#> [1] "A.bed" "B.bed"
head(readr::read_tsv(
  file.path(bed_outputs, "A.bed"),
  col_names = c("chr", "start", "end"), show_col_types = FALSE)
)
#> # A tibble: 6 × 3
#>   chr   start   end
#>   <chr> <dbl> <dbl>
#> 1 chr4  10035 10036
#> 2 chr4  10045 10046
#> 3 chr4  10045 10046
#> 4 chr4  10046 10047
#> 5 chr4  10046 10047
#> 6 chr4  10066 10067