Generate setlist data for testing
Arguments
- n_items
integertotal number of items available to all sets, also known as the universe size.- n_sets
integernumber of sets that contain items.- do_signed
logicalindicating whether to return signed sets, which indicate directionality with-1or1values, named by the items.- concordance
numericbetween -1 and 1, used whendo_signed=TRUE. This value imposes an approximate amount of concordance between random pairs of sets, using the concordance equation:concordance = (agree - disagree) / (agree + disgree)where(agree + disagree) = n. This equation approximates the number of items that agree as:agree = ceiling((concordance * n + n) / 2).- min_size
integerminimum range of items that may be contained in each set.- max_size
integermaximum range of items that may be contained in each set.- items
vectororNULLthat contains the universe of items. Whenitemsis defined,n_itemsis ignored.- sizes
vectorofintegervalues, orNULL, indicating the size of each set. Whensizesis defined,min_sizeandmax_sizeis ignored. Whensizesis defined,names(sizes)are used as names for each set.- seed
numericorNULLused withset.seed()for data reproducibility. Whenseed=NULLthenset.seed()is not called.- item_prefix
characterstring used as prefix for item names, default"item_".- ...
additional arguments are ignored.
Value
list of items, either as a list of item vectors,
or when do_signed=TRUE the list of vectors, where vector
names contain the items, and vector values are signed values
from c(-1, 1).
Details
This function generates data to use as test input to
Venn diagram functions. It can generate sets of items,
or signed sets (integer values -1, 1) named by
item.
This function defines a range of set sizes, using min_size
and max_size, with roughly square-root sequence of sizes
between these two extremes.
Note that the universe size represents the total available
items, but not necessarily the total number of items
represented by the sets. For example, if n_items=1000000,
max_size=500 and n_sets=3 then the maximum number of
items actually represented is 1500.
The universe can be defined using optional argument items,
which takes priority over n_items.
The specific size of each set can be defined with optional
argument sizes, which takes priority over min_size, and
max_size.
See also
Other venndir support:
modify_venndir_overlap(),
signed_overlaps(),
venndir_legender()
Examples
## basic setlist without signed direction
setlist <- make_venn_test(n_items=100,
n_sets=3,
min_size=5,
max_size=25)
set_im <- list2im_opt(setlist);
table(jamba::pasteByRow(as.matrix(set_im)*1))
#>
#> 0_0_1 0_1_0 0_1_1 1_0_0 1_0_1 1_1_0 1_1_1
#> 9 12 5 10 3 5 1
## basic setlist with signed direction
setlist <- make_venn_test(n_items=100,
n_sets=3,
do_signed=TRUE)
jamba::sdim(setlist);
#> rows class
#> set_A 32 numeric
#> set_B 16 numeric
#> set_C 15 numeric
## some example overlap summaries
sv1 <- signed_overlaps(setlist=setlist, "overlap")
sv1
#> sets overlap num_sets count set_A set_B
#> set_A|1 0 0 set_A 1 0 0 1 16 1 0
#> set_B|0 1 0 set_B 0 1 0 1 7 0 1
#> set_C|0 0 1 set_C 0 0 1 1 7 0 0
#> set_A&set_B|1 1 0 set_A&set_B 1 1 0 2 8 1 1
#> set_A&set_C|1 0 1 set_A&set_C 1 0 1 2 7 1 0
#> set_B&set_C|0 1 1 set_B&set_C 0 1 1 2 0 0 1
#> set_A&set_B&set_C|1 1 1 set_A&set_B&set_C 1 1 1 3 1 1 1
#> set_C overlap_label
#> set_A|1 0 0 0 1
#> set_B|0 1 0 0 1
#> set_C|0 0 1 1 1
#> set_A&set_B|1 1 0 0 1 1
#> set_A&set_C|1 0 1 1 1 1
#> set_B&set_C|0 1 1 1 1 1
#> set_A&set_B&set_C|1 1 1 1 1 1 1
## Familiar named overlap counts
jamba::nameVector(sv1[,c("count","sets")])
#> set_A set_B set_C set_A&set_B
#> 16 7 7 8
#> set_A&set_C set_B&set_C set_A&set_B&set_C
#> 7 0 1
## directional count table for each combination
sv2 <- signed_overlaps(setlist=setlist, "each")
sv2
#> sets each overlap num_sets count set_A
#> set_A|-1 0 0 set_A -1 0 0 1 0 0 1 7 1
#> set_A|1 0 0 set_A 1 0 0 1 0 0 1 9 1
#> set_B|0 -1 0 set_B 0 -1 0 0 1 0 1 3 0
#> set_B|0 1 0 set_B 0 1 0 0 1 0 1 4 0
#> set_C|0 0 -1 set_C 0 0 -1 0 0 1 1 2 0
#> set_C|0 0 1 set_C 0 0 1 0 0 1 1 5 0
#> set_A&set_B|-1 -1 0 set_A&set_B -1 -1 0 1 1 0 2 3 1
#> set_A&set_B|1 -1 0 set_A&set_B 1 -1 0 1 1 0 2 2 1
#> set_A&set_B|1 1 0 set_A&set_B 1 1 0 1 1 0 2 3 1
#> set_A&set_C|-1 0 -1 set_A&set_C -1 0 -1 1 0 1 2 3 1
#> set_A&set_C|-1 0 1 set_A&set_C -1 0 1 1 0 1 2 1 1
#> set_A&set_C|1 0 -1 set_A&set_C 1 0 -1 1 0 1 2 1 1
#> set_A&set_C|1 0 1 set_A&set_C 1 0 1 1 0 1 2 2 1
#> set_B&set_C|0 1 1 set_B&set_C 0 1 1 0 1 1 2 0 0
#> set_A&set_B&set_C|1 1 1 set_A&set_B&set_C 1 1 1 1 1 1 3 1 1
#> set_B set_C overlap_label
#> set_A|-1 0 0 0 0 -1
#> set_A|1 0 0 0 0 1
#> set_B|0 -1 0 1 0 -1
#> set_B|0 1 0 1 0 1
#> set_C|0 0 -1 0 1 -1
#> set_C|0 0 1 0 1 1
#> set_A&set_B|-1 -1 0 1 0 -1 -1
#> set_A&set_B|1 -1 0 1 0 1 -1
#> set_A&set_B|1 1 0 1 0 1 1
#> set_A&set_C|-1 0 -1 0 1 -1 -1
#> set_A&set_C|-1 0 1 0 1 -1 1
#> set_A&set_C|1 0 -1 0 1 1 -1
#> set_A&set_C|1 0 1 0 1 1 1
#> set_B&set_C|0 1 1 1 1 1 1
#> set_A&set_B&set_C|1 1 1 1 1 1 1 1
## directional count table for agreement or mixed
sv3 <- signed_overlaps(setlist=setlist, "agreement")
sv3
#> sets agreement overlap num_sets count
#> set_A|agreement set_A agreement 1 0 0 1 16
#> set_B|agreement set_B agreement 0 1 0 1 7
#> set_C|agreement set_C agreement 0 0 1 1 7
#> set_A&set_B|agreement set_A&set_B agreement 1 1 0 2 6
#> set_A&set_B|mixed set_A&set_B mixed 1 1 0 2 2
#> set_A&set_C|agreement set_A&set_C agreement 1 0 1 2 5
#> set_A&set_C|mixed set_A&set_C mixed 1 0 1 2 2
#> set_B&set_C|0 1 1 set_B&set_C agreement 0 1 1 2 0
#> set_A&set_B&set_C|agreement set_A&set_B&set_C agreement 1 1 1 3 1
#> set_A set_B set_C overlap_label
#> set_A|agreement 1 0 0 agreement
#> set_B|agreement 0 1 0 agreement
#> set_C|agreement 0 0 1 agreement
#> set_A&set_B|agreement 1 1 0 agreement
#> set_A&set_B|mixed 1 1 0 mixed
#> set_A&set_C|agreement 1 0 1 agreement
#> set_A&set_C|mixed 1 0 1 mixed
#> set_B&set_C|0 1 1 0 1 1 agreement
#> set_A&set_B&set_C|agreement 1 1 1 agreement
## signed incidence matrix
imv <- list2im_value(setlist)
dim(imv)
#> [1] 46 3
head(imv)
#> set_A set_B set_C
#> item_067 -1 0 0
#> item_042 1 0 0
#> item_050 1 0 -1
#> item_043 -1 0 0
#> item_014 -1 0 0
#> item_025 1 -1 0
## text venn diagram
textvenn(setlist, overlap_type="overlap")
#> set_A&set_B
#> 8
#> set_A set_B
#> 16 7
#>
#> set_A&set_B&set_C
#> 1
#> set_A&set_C set_B&set_C
#> 7 0
#>
#>
#> set_C
#> 7
## text venn diagram with signed direction
textvenn(setlist, overlap_type="each")
#> set_A&set_B ↑↑: 3
#> 8 ↑↓: 2
#> set_A ↑: 9 ↓↓: 3 set_B ↑: 4
#> 16 ↓: 7 7 ↓: 3
#>
#> set_A&set_B&set_C ↑↑↑: 1
#> 1
#> set_A&set_C ↑↑: 2 set_B&set_C ↑↑: 0
#> 7 ↑↓: 1 0
#> ↓↑: 1
#> ↓↓: 3
#> set_C ↑: 5
#> 7 ↓: 2