Shrink data.frame by group
Usage
shrink_df(
df,
by,
string_func = jamba::cPasteU,
num_func = mean,
extra_funcs = NULL,
do_test = FALSE,
verbose = FALSE,
...
)
Details
This function condenses a data.frame
by groups of rows,
applying an appropriate function to character
columns,
and numeric
columns. It is intended to be a simple but
configurable tool for the majority of scenarios.
This function uses data.table
for overall speed.
See also
Other venndir internal:
assemble_venndir_label()
,
expand_range()
,
get_venn_polygon_shapes()
,
make_color_contrast()
,
make_venn_combn_df()
,
match_list()
,
print_color_df()
,
simple_ellipse()
,
venndir_to_df()
Examples
testdf <- data.frame(check.names=FALSE,
SYMBOL=rep(c("ACTB", "GAPDH", "PPIA"), c(2, 3, 1)),
`logFC B-A`=c(1.4, 1.4, 2.3, NA, 2.3, 5.1),
probe=paste0("probe", 1:6))
shrink_df(testdf, by="SYMBOL", num_func=function(x){mean(x, na.rm=TRUE)})
#> SYMBOL logFC B-A probe
#> 1 ACTB 1.4 probe1,probe2
#> 2 GAPDH 2.3 probe3,probe4,probe5
#> 3 PPIA 5.1 probe6
# 60,000 row simulation
testdftall <- do.call(rbind, lapply(1:10000, function(i){
idf <- testdf;
idf$SYMBOL <- paste0(idf$SYMBOL, "_", i);
idf;
}))
head(testdftall, 12)
#> SYMBOL logFC B-A probe
#> 1 ACTB_1 1.4 probe1
#> 2 ACTB_1 1.4 probe2
#> 3 GAPDH_1 2.3 probe3
#> 4 GAPDH_1 NA probe4
#> 5 GAPDH_1 2.3 probe5
#> 6 PPIA_1 5.1 probe6
#> 7 ACTB_2 1.4 probe1
#> 8 ACTB_2 1.4 probe2
#> 9 GAPDH_2 2.3 probe3
#> 10 GAPDH_2 NA probe4
#> 11 GAPDH_2 2.3 probe5
#> 12 PPIA_2 5.1 probe6
shrunk_tall <- shrink_df(testdftall,
by="SYMBOL",
num_func=function(x){mean(x, na.rm=TRUE)})
if (requireNamespace("jamses", quietly=TRUE)) {
shrunk_tall2 <- jamses::shrinkDataFrame(testdftall,
groupBy="SYMBOL")
print(head(shrunk_tall2, 12))
}