Combine SummarizedExperiment objects by row, using rbind() logic.
Usage
se_rbind(
se_list,
colnames_from = "_(n|p|neg|pos)_",
colnames_to = "_X_",
colnames_keep = NULL,
colData_action = c("identical", "all"),
colData_sep = ";",
verbose = FALSE,
...
)Arguments
- se_list
listofSummarizedExperimentobjects.- colnames_from
charactervector of patterns used withgsub()to convertcolnames()for each object inse_listto an identifier that will be shared across all objects inse_list.- colnames_to
charactervector of replacements used withgsub()alongside each entry incolnames_fromto convertcolnames()for each object inse_listto an identifier that will be shared across all objects inse_list.- colData_action
characterstring indicating the action used to combinecolData()acrossse_list:"identical": retain only those columns incolData()which are identical in allse_listobjects."all": retain all columns, but convert columns with mismatched values to store comma-delimited values.
- colData_sep
characterstring used as delimiter whencolData_action="all"and when values in a column incolData()differs across objects inse_list. Only values that differ are delimited, to minimize redundancy.- ...
additional arguments are ignored.
Value
SummarizedExperiment object whose colData() has been
processed according to colData_action - either keeping only
columns with identical values, or keeping all values delimited
as a character string when values differ.
Details
This function is intended to help the process of calling
SummarizedExperiment::rbind().
The process:
Convert
colnames()for each entry inse_listusingcolnames_fromandcolnames_to. This step is useful when each object inse_listmay be using a different set ofcolnames(). For example"sample_p_12"and"sample_n_12"might be equivalent, so renaming them withcolnames_from=c("_[np]_")andcolnames_to=c("_X_")would convert both values to"sample_X_12".Subset each object in
se_listusing only sharedcolnames().Determine how to handle
colData()columns that are not identical:colData_action="identical": will only keep columns whose values are identical across all objects inse_list.colData_action="all": will keep columns incolData(), however non-identical columns will be converted tocharacterand values will be comma-delimited.
Perform
rbind().
TODO:
Write equivalent
se_cbind()- it will wait until there is a driving use case.Consider retaining only shared
assayNames()acrossse_list.Consider optionally retaining user-defined
assayNames(). (Alternatively, the user can subset the assayNames upfront, though it might be tedious). The recommended pattern in that case:
See also
Other jamses SE utilities:
make_se_test(),
se_collapse_by_column(),
se_collapse_by_row(),
se_detected_rows(),
se_normalize(),
se_to_rowcoldata()
Examples
m1 <- matrix(rnorm(100), ncol=10);
colnames(m1) <- paste0("sample_p_", 1:10);
rownames(m1) <- paste0("row_", 1:10);
m2 <- matrix(rnorm(100), ncol=10);
colnames(m2) <- paste0("sample_n_", 1:10);
rownames(m2) <- paste0("row_", 11:20);
sample_id <- gsub("_[np]_", "_X_", colnames(m1));
m1
#> sample_p_1 sample_p_2 sample_p_3 sample_p_4 sample_p_5 sample_p_6
#> row_1 0.8376299 -1.1661847 -0.003580308 1.3052615 -1.2621945 -0.1488164
#> row_2 -1.4434929 0.4079462 -1.495826814 0.8760961 -0.5518704 0.1124638
#> row_3 -0.2085702 -0.8630042 -0.768417027 0.4637961 -1.1827995 0.7246762
#> row_4 -0.4385635 0.3040420 0.408488505 0.4771142 0.6206636 -1.1874861
#> row_5 -0.2185938 -0.1464275 1.900136335 -0.4914053 0.4463130 -0.4996002
#> row_6 1.4599659 -1.4335622 0.110009123 -1.3193853 0.4218847 -1.0736430
#> row_7 -0.5820599 -0.7906078 1.140386825 1.2954258 0.4424648 1.0572402
#> row_8 -0.7830976 0.8851125 0.768081305 -1.4202195 0.5572457 1.2790726
#> row_9 -1.5196540 0.9030761 -1.168091622 -0.9388959 0.6393565 0.7876767
#> row_10 -0.8056981 2.0055733 -0.171112652 0.6289650 -1.9686616 -1.2224034
#> sample_p_7 sample_p_8 sample_p_9 sample_p_10
#> row_1 0.4519521 0.57833494 0.5084848 -0.003988944
#> row_2 1.1504492 1.36467278 -0.1163584 0.847842769
#> row_3 0.1679410 -1.70157980 0.9255461 -0.100116526
#> row_4 -0.5661093 -0.28067628 0.6482298 -0.279629907
#> row_5 -1.0861182 0.06506802 -0.1502094 0.784438245
#> row_6 -0.6653028 0.57858929 1.0403770 -1.584616645
#> row_7 0.7148484 -1.16920662 0.2925587 0.478366148
#> row_8 -0.4316611 0.80618486 0.6687514 0.393566373
#> row_9 0.2276149 0.30739008 -0.5941776 -2.695329369
#> row_10 1.2949458 0.26380601 1.5804318 0.368377329
m2
#> sample_n_1 sample_n_2 sample_n_3 sample_n_4 sample_n_5 sample_n_6
#> row_11 -2.168417747 -1.74102202 0.8687933 -0.0768659 1.1025652 -0.5870856158
#> row_12 0.659804377 -1.99258577 1.3693517 0.6873641 -0.5766189 0.0007641864
#> row_13 -0.453913733 0.55127421 0.7626511 0.1716315 -1.8516917 2.2144653193
#> row_14 -0.694936825 -0.03474206 0.4211472 -0.8301086 -0.1128632 0.9694343957
#> row_15 -0.006846303 1.85057170 -0.8682240 -0.2901591 1.3210693 0.7680077137
#> row_16 1.373052045 0.57367511 0.7295604 -1.3191257 0.6622543 -1.1083279118
#> row_17 -0.635323077 0.84969589 0.5002659 -0.9670319 0.4413832 -0.7862359200
#> row_18 0.558103294 1.33438359 0.6342503 -0.1446111 1.1837459 2.2841164803
#> row_19 0.341157868 -0.50071910 0.4236450 -1.7981326 -0.7715014 -1.0933007640
#> row_20 -1.179518629 0.51009793 -0.2018380 -1.6885425 0.7296892 0.2144793753
#> sample_n_7 sample_n_8 sample_n_9 sample_n_10
#> row_11 0.8925711 0.3661144 -0.275890475 1.53242362
#> row_12 1.0187580 -0.8747814 0.682315245 -1.35799783
#> row_13 1.0891120 1.0244749 -0.117290715 -0.19961905
#> row_14 -0.1631290 0.9047589 -0.344675864 0.63152313
#> row_15 -0.8209867 -0.2382487 0.111620498 1.76202090
#> row_16 -0.3072572 -1.5578549 -0.283405315 0.42601436
#> row_17 -0.9020980 0.7613099 -0.591017164 -0.01375342
#> row_18 0.6270687 1.1291444 -0.315936931 -0.30755691
#> row_19 1.1203550 -0.2951078 -0.008152152 0.41430816
#> row_20 2.1272136 0.5362428 0.207495141 0.98905792
se1 <- SummarizedExperiment::SummarizedExperiment(
assays=list(counts=m1),
rowData=data.frame(measurement=rownames(m1)),
colData=data.frame(sample=colnames(m1),
sample_id=sample_id))
se2 <- SummarizedExperiment::SummarizedExperiment(
assays=list(counts=m2),
rowData=data.frame(measurement=rownames(m2)),
colData=data.frame(sample=colnames(m2),
sample_id=sample_id))
# this step fails because colnames are not shared
# do.call(SummarizedExperiment::rbind, list(se1, se2))
# keep only identical colData columns
se12 <- se_rbind(list(se1, se2))
SummarizedExperiment::colData(se12)
#> DataFrame with 10 rows and 1 column
#> sample_id
#> <character>
#> sample_X_1 sample_X_1
#> sample_X_2 sample_X_2
#> sample_X_3 sample_X_3
#> sample_X_4 sample_X_4
#> sample_X_5 sample_X_5
#> sample_X_6 sample_X_6
#> sample_X_7 sample_X_7
#> sample_X_8 sample_X_8
#> sample_X_9 sample_X_9
#> sample_X_10 sample_X_10
# keep all colData columns
se12all <- se_rbind(list(se1, se2),
colData_action="all")
SummarizedExperiment::colData(se12all)
#> DataFrame with 10 rows and 2 columns
#> sample sample_id
#> <character> <character>
#> sample_X_1 sample_p_1;sample_n_1 sample_X_1
#> sample_X_2 sample_p_2;sample_n_2 sample_X_2
#> sample_X_3 sample_p_3;sample_n_3 sample_X_3
#> sample_X_4 sample_p_4;sample_n_4 sample_X_4
#> sample_X_5 sample_p_5;sample_n_5 sample_X_5
#> sample_X_6 sample_p_6;sample_n_6 sample_X_6
#> sample_X_7 sample_p_7;sample_n_7 sample_X_7
#> sample_X_8 sample_p_8;sample_n_8 sample_X_8
#> sample_X_9 sample_p_9;sample_n_9 sample_X_9
#> sample_X_10 sample_p_10;sample_n.. sample_X_10