Write insertion counts data for one or more pseudobulks to bed/bedgraph format.
Beds only hold chrom, start, and end data, while bedGraphs also provide a score column.
This reports the total number of insertions at each basepair for each group listed in cell_groups
.
Usage
write_insertion_bedgraph(
fragments,
path,
cell_groups = rlang::rep_along(cellNames(fragments), "all"),
insertion_mode = c("both", "start_only", "end_only"),
tile_width = 1,
normalization_method = c("none", "cpm", "n_cells"),
chrom_sizes = NULL
)
write_insertion_bed(
fragments,
path,
cell_groups = rlang::rep_along(cellNames(fragments), "all"),
insertion_mode = c("both", "start_only", "end_only"),
verbose = FALSE,
threads = 1
)
Arguments
- fragments
IterableFragments object
- path
(character vector) Path(s) to save bed/bedgraphs to, optionally ending in ".gz" to add gzip compression. If
cell_groups
is provided,path
must be a named character vector, with one name for each level incell_groups
- cell_groups
Character or factor assigning a group to each cell, in order of
cellNames(fragments)
- insertion_mode
(string) Which fragment ends to use for coverage calculation. One of "both", "start_only", or "end_only"
- tile_width
(integer) Width of tiles to use for binning insertions. All insertions in a single bin are summed. If
tile_width
is 1, then this is functionally equivalent towrite_insertion_bedgraph()
.- normalization_method
(character) Normalization method to use. One of:
none
: No normalizationcpm
: Normalize by total number of fragments in each group, scaling to 1 million fragments (i.e. CPM).n_cells
: Normalize by total number of cells in each group.
- chrom_sizes
(GRanges, data.frame, list, numeric, or NULL) Chromosome sizes to clip tiles when at the end of a chromosome. If
NULL
, then tile_width is required to be 1. If a data.frame or list, must contain columnschr
andend
(Seehelp("genomic-ranges-like")
). If a numeric vector, then it is assumed to be the chromosome sizes in the order ofchrNames(fragments)
.- verbose
(bool) Whether to provide verbose progress output to console.
- threads
(int) Number of threads to use.
Examples
## Prep data
frags <- get_demo_frags()
bedgraph_outputs <- file.path(tempdir(), "bedgraph_outputs")
######################################################
## `write_insertion_bedgraph()` examples
######################################################
## Write insertions
write_insertion_bedgraph(frags, file.path(bedgraph_outputs, "all.tar.gz"))
list.files(bedgraph_outputs)
#> [1] "all.tar.gz"
# With tiling
chrom_sizes <- read_ucsc_chrom_sizes("./reference", genome="hg38") %>%
dplyr::filter(chr %in% c("chr4", "chr11"))
write_insertion_bedgraph(frags, file.path(bedgraph_outputs, "all_tiled.bedGraph"),
chrom_sizes = chrom_sizes, normalization_method = "cpm", tile_width = 100)
reads <- readr::read_tsv(file.path(bedgraph_outputs, "all_tiled.bedGraph"),
col_names = c("chr", "start", "end", "score"),
show_col_types = FALSE)
head(reads)
#> # A tibble: 6 × 4
#> chr start end score
#> <chr> <dbl> <dbl> <dbl>
#> 1 chr4 10000 10100 1.45
#> 2 chr4 10100 10200 0.869
#> 3 chr4 10300 10400 0.290
#> 4 chr4 10400 10500 0.145
#> 5 chr4 10600 10700 0.434
#> 6 chr4 11100 11200 0.145
######################################################
## `write_insertion_bed()` examples
######################################################
# We utilize two groups this time
bed_outputs <- file.path(tempdir(), "bed_outputs")
cell_groups <- rep(c("A", "B"), length.out = length(cellNames(frags)))
bed_paths <- c(file.path(bed_outputs, "A.bed"), file.path(bed_outputs, "B.bed"))
names(bed_paths) <- c("A", "B")
write_insertion_bed(
frags, path = bed_paths, cell_groups = cell_groups,
verbose = TRUE
)
#> 2025-09-16 00:23:30 Writing bed file for cluster: A
#> 2025-09-16 00:23:31 Bed file for cluster: A written to: /tmp/Rtmp9Szcu7/bed_outputs/A.bed
#> 2025-09-16 00:23:31 Writing bed file for cluster: B
#> 2025-09-16 00:23:32 Bed file for cluster: B written to: /tmp/Rtmp9Szcu7/bed_outputs/B.bed
#> 2025-09-16 00:23:32 Finished writing bed files
list.files(bed_outputs)
#> [1] "A.bed" "B.bed"
head(readr::read_tsv(
file.path(bed_outputs, "A.bed"),
col_names = c("chr", "start", "end"), show_col_types = FALSE)
)
#> # A tibble: 6 × 3
#> chr start end
#> <chr> <dbl> <dbl>
#> 1 chr4 10035 10036
#> 2 chr4 10045 10046
#> 3 chr4 10045 10046
#> 4 chr4 10046 10047
#> 5 chr4 10046 10047
#> 6 chr4 10066 10067