Skip to contents

Shrink data.frame by group

Usage

shrink_df(
  df,
  by,
  string_func = jamba::cPasteU,
  num_func = mean,
  extra_funcs = NULL,
  do_test = FALSE,
  verbose = FALSE,
  ...
)

Details

This function condenses a data.frame by groups of rows, applying an appropriate function to character columns, and numeric columns. It is intended to be a simple but configurable tool for the majority of scenarios.

This function uses data.table for overall speed.

Examples

testdf <- data.frame(check.names=FALSE,
   SYMBOL=rep(c("ACTB", "GAPDH", "PPIA"), c(2, 3, 1)),
   `logFC B-A`=c(1.4, 1.4, 2.3, NA, 2.3, 5.1),
   probe=paste0("probe", 1:6))
shrink_df(testdf, by="SYMBOL", num_func=function(x){mean(x, na.rm=TRUE)})
#>   SYMBOL logFC B-A                probe
#> 1   ACTB       1.4        probe1,probe2
#> 2  GAPDH       2.3 probe3,probe4,probe5
#> 3   PPIA       5.1               probe6

testdftall <- do.call(rbind, lapply(1:10000, function(i){
   idf <- testdf;
   idf$SYMBOL <- paste0(idf$SYMBOL, "_", i);
   idf;
}))
head(testdftall, 12)
#>     SYMBOL logFC B-A  probe
#> 1   ACTB_1       1.4 probe1
#> 2   ACTB_1       1.4 probe2
#> 3  GAPDH_1       2.3 probe3
#> 4  GAPDH_1        NA probe4
#> 5  GAPDH_1       2.3 probe5
#> 6   PPIA_1       5.1 probe6
#> 7   ACTB_2       1.4 probe1
#> 8   ACTB_2       1.4 probe2
#> 9  GAPDH_2       2.3 probe3
#> 10 GAPDH_2        NA probe4
#> 11 GAPDH_2       2.3 probe5
#> 12  PPIA_2       5.1 probe6
shrunk_tall <- shrink_df(testdftall,
   by="SYMBOL",
   num_func=function(x){mean(x, na.rm=TRUE)})

shrunk_tall2 <- jamses::shrinkDataFrame(testdftall,
   groupBy="SYMBOL")
head(shrunk_tall2, 12)
#>          SYMBOL logFC B-A                probe
#> ACTB_1   ACTB_1       1.4        probe1,probe2
#> GAPDH_1 GAPDH_1       2.3 probe3,probe4,probe5
#> PPIA_1   PPIA_1       5.1               probe6
#> ACTB_2   ACTB_2       1.4        probe1,probe2
#> GAPDH_2 GAPDH_2       2.3 probe3,probe4,probe5
#> PPIA_2   PPIA_2       5.1               probe6
#> ACTB_3   ACTB_3       1.4        probe1,probe2
#> GAPDH_3 GAPDH_3       2.3 probe3,probe4,probe5
#> PPIA_3   PPIA_3       5.1               probe6
#> ACTB_4   ACTB_4       1.4        probe1,probe2
#> GAPDH_4 GAPDH_4       2.3 probe3,probe4,probe5
#> PPIA_4   PPIA_4       5.1               probe6