Make multiple coverage heatmaps
nmatlist2heatmaps(
nmatlist,
panel_groups = NULL,
title = NULL,
caption = NULL,
upstream_length = NULL,
downstream_length = NULL,
k_clusters = 0,
k_subset = NULL,
k_colors = NULL,
k_width = grid::unit(5, "mm"),
k_method = c("euclidean", "pearson", "correlation"),
k_heatmap = main_heatmap,
partition = NULL,
rows = NULL,
row_order = NULL,
nmat_colors = NULL,
middle_color = "white",
nmat_names = NULL,
main_heatmap = 1,
anno_df = NULL,
byCols = NULL,
color_sub = NULL,
anno_row_marks = NULL,
anno_row_labels = NULL,
top_annotation = NULL,
top_anno_height = grid::unit(3, "cm"),
top_axis_side = c("right"),
legend_max_ncol = 2,
legend_base_nrow = 5,
legend_max_labels = 40,
show_heatmap_legend = TRUE,
hm_nrow = 1,
transform = "none",
signal_ceiling = NULL,
axis_name = NULL,
axis_name_gp = grid::gpar(fontsize = 8),
axis_name_rot = 90,
column_title_gp = grid::gpar(fontsize = 12),
lens = -2,
anno_lens = 8,
pos_line = FALSE,
seed = 123,
ht_gap = grid::unit(3, "mm"),
profile_value = c("mean", "sum", "abs_mean", "abs_sum"),
ylims = NULL,
border = TRUE,
iter.max = 20,
use_raster = TRUE,
raster_quality = 1,
raster_by_magick = TRUE,
do_plot = TRUE,
legend_width = grid::unit(3, "cm"),
trim_legend_title = TRUE,
heatmap_legend_param = NULL,
annotation_legend_param = NULL,
return_type = c("heatmaplist", "grid"),
show_error = FALSE,
verbose = FALSE,
...
)
list
containing normalizedMatrix
objects,
usually the output from coverage_matrix2nmat()
.
integer number of k-means clusters to
use to partition each heatmap. Use 0
or NULL
for
no clustering.
integer vector of k-means clusters to retain. Often one cluster contains mostly empty values, and can be removed using this mechanism.
vector of R colors, or NULL
to use
the output of colorjam::rainbowJam(k_clusters)
.
unit
width of the k-means cluster color
bar, used with k_clusters
.
character
string indicating the distance
used by k-means, where the common default is
"euclidean"
, however a useful alternative for
sequence coverage data is "correlation"
as implemented
in amap::Kmeans()
.
integer
indicating which one or more
normalizedMatrix
objects in nmatlist
will be used
for k-means clustering, when k_clusters
is defined
more than 1.
character
or factor
vector used to split rows
of each matrix in nmatlist
, named by rownames. This
value is ignored when k_clusters
is supplied.
character
vector of rownames(nmatlist)
or
integer
vector with index of rows to keep from each
matrix in nmatlist
.
integer vector used to order rows.
When TRUE
or NULL
it uses
the default for EnrichedHeatmap::EnrichedHeatmap()
which is the EnrichedHeatmap::enriched_score()
for the matrix main_heatmap
. When FALSE
the
rows are ordered by the order they appear in rows
,
which is either the order they appear in nmatlist
or the order after sorting anno_df
. When
TRUE
the default
named character vector of R colors,
to colorize each heatmap. When NULL
then
colorjam::rainbowJam()
is used to create colors
for each heatmap panel.
character
R compatible color used
when creating a divergent color gradient, this color
is used as the middle color. Usually this color should
be either "white"
or "black"
.
character
vector, or NULL
, optional,
used as custom names for each heatmap in nmatlist
.
When nmat_names=NULL
the signal_name
values are
used from each nmatlist
matrix.
integer index referring to the
entry in nmatlist
to use for clustering and row
ordering.
data.frame
or object that can be coerced,
used to annotate rows of each matrix. It must have
rownames(anno_df)
that match rownames(nmatlist)
.
When supplied, data can be sorted using byCols
.
Note that only the rownames(anno_df)
present in both nmatlist
and anno_df
are
used to display the heatmaps. These rows
may also be subsetted using argument rows
.
character vector of values in
colnames(anno_df)
used to sort the data.frame
via jamba::mixedSortDF()
. Any colname with
prefix -
will be reverse-sorted.
character vector
of R colors to be used
as categorical colors, whose names match items to be
colored. This argument is intended for anno_df
,
for any column in anno_df
where all values in that
column are also in names(color_sub)
will be colorized
using color_sub
instead of generating new colors.
Also colors for partition and kmeans clusters, usually
defined with k_colors
can be defined in color_sub,
if names(color_sub)
match the partition labels.
character vector of rownames
which will be labeled beside the heatmaps, using
the ComplexHeatmap::anno_mark()
method. It currently
requires anno_df
be defined, since it uses the
first column in anno_df
as a one-column heatmap,
to anchor the labels.
character vector of optional
character labels to use instead of rownames
.
If NULL
then anno_row_marks
are used. Or
anno_row_labels
may contain a character vector
of colnames(anno_df)
which will create labels
by concatenating each column value separated by
space " "
.
HeatmapAnnotation
or logical
or list
:
TRUE
to use the default approach
EnrichedHeatmap::anno_enriched()
FALSE
to prevent the display of top annotation
HeatmapAnnotation
which should be in the form
ComplexHeatmap::HeatmapAnnotation(EnrichedHeatmap::anno_enriched())
or equivalent. This form is required for the annotation
function to be called on each coverage matrix heatmap.
list
of objects suitable to be passed as a
top_annotation
argument for each coverage heatmap,
in order of nmatlist
.
unit
object to define the default
height of the top_annotation
. When top_annotation
is not defined, the default method uses
EnrichedHeatmap::anno_enriched()
with
height=top_anno_height
.
character
value indicating which side
of the top annotation to place the y-axis labels.
When there is one value, it is repeated to length(nmatlist)
,
otherwise it is mainly used when panel_groups
are
provided, in which case only one top annotation is
label per contiguous set of panels in the same panel group.
In that case "left"
will label the left side of the
first panel in each group, "right"
will label the
right side of the last panel in each group.
Values:
"left", "right", "both", "none", "all".
integer number indicating the maximum number of columns allowed for a categorical color legend.
integer number indicating the base
number of rows used for a categorical color legend, before
additional columns are added. Once the number of elements
exceeds (legend_max_ncol * legend_base_nrow)
then
rows are added, but columns never exceed legend_max_ncol
.
integer
to define the maximum labels
to display as a color legend. When any anno_df
column contains
more than this number of categorical colors, the legend is
not displayed (because it would prevent display of the
heatmaps at all).
integer number of rows used to display the heatmap panels.
either character
string referring to
a numeric transformation, or a function
that applies
a numeric transformation. Valid character
string values:
"log2signed"
applies jamba::log2signed()
which applies
log2(1+x)
transform to the absolute value, then multiplies
by the original sign(x)
; "sqrt"
applies square root;
"cubert"
applies cube root x^(1/3)
; "qrt"
applies
fourth root x^(1/4)
. When there are negative numeric
values, the transformation is applied to absolute value,
then multiplied by the original sign. Therefore, the
transformation is applied to adjust the magnitude of
the values. These values are passed to get_numeric_transform()
which may have more information.
numeric
vector whose values are recycled
to length length(nmatlist)
. The signal_ceiling
applies a maximum numeric value to the
color ramp for each matrix in nmatlist
. The value is
passed to get_nmat_ceiling()
, which recognizes three
numeric forms:
signal_ceiling > 1
: this specific numeric value
is applied as the ceiling
signal_ceiling > 0
and signal_ceiling <= 1
: this numeric
value is interpreted as a quantile threshold, for example
signal_ceiling=0.75
would calculate ceiling quantile(x, probs=0.75)
.
signal_ceiling
is NULL
: the maximum absolute value of each
matrix is used as the ceiling.
Note that the ceiling is only applied to color scale and not to the underlying data, which is useful to know because any clustering and row ordering steps will use the full data as needed.
If data needs to be strictly controlled to a
numeric ceiling, that processing should take place
on nmatlist
before calling nmatlist2heatmaps()
.
x-axis label graphic parameters,
as output from grid::gpar()
. For example to define
the x-axis font size, use the form
grid::gpar(fontsize=8)
.
numeric value either 0
or 90
indicating
whether to rotate the x-axis names, where 90
will rotate
labels, and 0
will leave labels horizontal.
heatmap title graphic parameters,
as output from grid::gpar()
. For example to define
the x-axis font size, use the form
grid::gpar(fontsize=8)
. This argument is passed
directly to ComplexHeatmap::Heatmap()
.
numeric value used to scale each heatmap
color ramp, using getColorRamp()
. Values above zero
apply the color gradient more rapidly starting from the
lowest value, making the color appear more intense for
lower numeric values. Values below zero apply the color gradient
less rapidly, which makes lower numeric values appear
less intense. This adjustment is intended to help
apply suitable color contrast depending upon the range
of numeric values. The lens
values are applied to
each matrix in nmatlist
, and so it is recycled to
length(nmatlist)
as needed. Note that signal_ceiling
is also intended to help apply the color gradient to
a suitable numeric range, and the lens
argument is
applied relative to the numeric range being used.
numeric value used to scale the annotation
heatmap color scales, see lens
for details. Values
higher than 1 make the color gradient more intense,
values below -1 make the color gradient less intense.
numeric value used with set.seed()
to
set the random seed. Set to NULL
to avoid running
set.seed()
.
unit
size to specify the gap between multiple heatmaps.
This argument is passed to ComplexHeatmap::draw()
. An example
is grid::unit(8, "mm")
to specify 8 millimeters.
character string to define the type of numeric
profile to display at the top of each heatmap. This argument is
passed to EnrichedHeatmap::anno_enriched()
. Values: "mean"
the
mean profile; "sum"
the sum; "abs_sum"
sum of absolute values;
"abs_mean"
the mean of absolute values.
vector
of maximum y-axis values for each heatmap profile;
or list
logical
indicating whether to draw a border around the
heatmap, which includes all heatmap panels in the event of
splitting by clustering. The border
can be supplied as a vector,
so the border
can be applied specifically to each heatmap
if needed.
integer value indicating the maximum iterations
performed by k-means clustering, only relevant when k_clusters
is non-zero.
logical indicating whether to create heatmaps
using raster resizing, almost always recommended TRUE
.
logical
passed to ComplexHeatmap::Heatmap()
,
used when use_raster=TRUE
and defines the level of detail retained,
and is used only when raster_by_magick=FALSE
. Using larger numbers
decreases speed substantially.
logical
passed to ComplexHeatmap::Heatmap()
,
to enable ImageMagick use during rasterization. By default this
option is TRUE
and is only disabled when the R package
"magick"
is not installed, or not properly configured.
If you see a warning "instalilng 'magick' will improve rasterization"
then check the R package with library(magick)
and see if
there are error messages. When "magick"
is not available,
the rasterization is substantially slower, and may produce
files much larger than normal.
logical indicating whether to draw the heatmaps,
where FALSE
will return the data used to create heatmaps
without actually drawing the heatmaps.
character string indicating the type of
data to return: "heatmaplist"
returns the list of heatmaps,
which can separately be arranged together using
ComplexHeatmap::draw()
or grid::grid.draw()
.
logical indicating whether to add error
bars to the profile plot at the top of each heatmap.
These error bars are calculated by
EnrichedHeatmap::anno_enriched()
using
matrixStats::colSds(x)/nrow(x)
.
logical indicating whether to print verbose output.
additional arguments are passed to
EnrichedHeatmap::EnrichedHeatmap()
to allow greater
customization of details. Note that many ...
arguments
are also passed to ComplexHeatmap::Heatmap()
.
This function takes a list of normalizedMatrix
objects,
usually the output of coverage_matrix2nmat()
, and
produces multiple heatmaps using
EnrichedHeatmap
.
This function is intended to be a convenient wrapper to help keep each data matrix in order, to apply consistent clustering and filtering across all data matrices, and to enable optional multi-row heatmap layout.
Other jam coverage heatmap functions:
coverage_matrix2nmat()
,
get_nmat_ceiling()
,
nmathm_row_order()
,
validate_heatmap_params()
,
zoom_nmatlist()
,
zoom_nmat()
## There is a small example file to use for testing
library(jamba)
#>
#> Attaching package: ‘jamba’
#> The following object is masked from ‘package:IRanges’:
#>
#> heads
cov_file1 <- system.file("data", "tss_coverage.matrix", package="platjam");
cov_file2 <- system.file("data", "h3k4me1_coverage.matrix", package="platjam");
cov_files <- c(cov_file1, cov_file2);
names(cov_files) <- gsub("[.]matrix",
"",
basename(cov_files));
nmatlist <- lapply(cov_files, coverage_matrix2nmat);
nmatlist2heatmaps(nmatlist);
# sometimes data transform can be helpful
nmatlist2heatmaps(nmatlist,
transform=c("log2signed", "sqrt"));
# k-means clusters, default uses euclidean distance
nmatlist2heatmaps(nmatlist, k_clusters=4,
transform=c("log2signed", "sqrt"));
# k-means clusters, "correlation" or "pearson" sometimes works better
nmatlist2heatmaps(nmatlist,
k_clusters=4,
k_method="pearson",
transform=c("log2signed", "sqrt"));
#> Warning: empty cluster: try a better set of initial centers
# example showing usage of top_axis_side
# and panel_groups
nmatlist2 <- nmatlist[c(1, 1, 1, 2, 2, 2)];
names(nmatlist2) <- jamba::makeNames(names(nmatlist2))
for (iname in names(nmatlist2)) {
attr(nmatlist2[[iname]], "signal_name") <- gsub("coverage", "cov", iname);
}
# top_axis_side="left"
# assumes 12x7 figure size
nmatlist2heatmaps(nmatlist2,
signal_ceiling=0.8,
nmat_colors=rep(c("firebrick", "tomato"), each=3),
panel_groups=rep(c("tss", "h3k4me1"), each=3),
ht_gap=grid::unit(4, "mm"),
top_axis_side="left",
transform=rep(c("log2signed", "sqrt"), each=3));
# top_axis_side="both"
nmatlist2heatmaps(nmatlist2,
panel_groups=rep(c("tss", "h3k4me1"), each=3),
ht_gap=grid::unit(6, "mm"),
top_axis_side="both",
transform=rep(c("log2signed", "sqrt"), each=3));
# multiple heatmap rows
nmatlist2heatmaps(nmatlist2,
k_clusters=4,
k_method="pearson",
hm_nrow=2,
panel_groups=rep(c("tss", "h3k4me1"), each=3),
ht_gap=grid::unit(6, "mm"),
top_axis_side="both",
top_anno_height=grid::unit(0.8, "cm"),
transform=rep(c("log2signed", "sqrt"), each=3));
#> Warning: empty cluster: try a better set of initial centers
# invent anno_df data.frame of additional annotations
anno_df <- data.frame(
tss_score=EnrichedHeatmap::enriched_score(jamba::log2signed(nmatlist[[1]])),
h3k4me1_score=EnrichedHeatmap::enriched_score(jamba::log2signed(nmatlist[[2]]))
);
rownames(anno_df) <- rownames(nmatlist[[1]]);
nmatlist2heatmaps(nmatlist,
title="k-means clustering across both heatmaps",
k_clusters=4,
k_method="pearson",
k_heatmap=c(1, 2),
ht_gap=grid::unit(6, "mm"),
top_axis_side="left",
anno_df=anno_df,
transform=rep(c("log2signed", "sqrt"), each=3));
#> ## (12:31:46) 21Sep2023: nmatlist2heatmaps(): Preparing ComplexHeatmap::draw(HeatmapList)