Generate test sets for venndir
Arguments
- n_items
integer
total number of items available to all sets, also known as the universe size.- n_sets
integer
number of sets that contain items.- do_signed
logical
indicating whether to return signed sets, which indicate directionality with-1
or1
values, named by the items.- concordance
numeric
between -1 and 1, used whendo_signed=TRUE
. This value imposes an approximate amount of concordance between random pairs of sets, using the concordance equation:concordance = (agree - disagree) / (agree + disgree)
where(agree + disagree) = n
. This equation approximates the number of items that agree as:agree = ceiling((concordance * n + n) / 2)
.- min_size
integer
minimum range of items that may be contained in each set.- max_size
integer
maximum range of items that may be contained in each set.- items
vector
orNULL
that contains the universe of items. Whenitems
is defined,n_items
is ignored.- sizes
vector
ofinteger
values, orNULL
, indicating the size of each set. Whensizes
is defined,min_size
andmax_size
is ignored. Whensizes
is defined,names(sizes)
are used as names for each set.- seed
numeric
orNULL
used withset.seed()
for data reproducibility. Whenseed=NULL
thenset.seed()
is not called.- item_prefix
character
string used as prefix for item names, default"item_"
.- ...
additional arguments are ignored.
Value
list
of items, either as a list of item vectors,
or when do_signed=TRUE
the list of vectors, where vector
names contain the items, and vector values are signed values
from c(-1, 1)
.
Details
This function generates data to use as test input to
Venn diagram functions. It can generate sets of items,
or signed sets (integer values -1
, 1
) named by
item.
This function defines a range of set sizes, using min_size
and max_size
, with roughly square-root sequence of sizes
between these two extremes.
Note that the universe size represents the total available
items, but not necessarily the total number of items
represented by the sets. For example, if n_items=1000000
,
max_size=500
and n_sets=3
then the maximum number of
items actually represented is 1500
.
The universe can be defined using optional argument items
,
which takes priority over n_items
.
The specific size of each set can be defined with optional
argument sizes
, which takes priority over min_size
, and
max_size
.
See also
Other venndir utility:
curate_venn_labels()
,
expand_range()
,
make_color_contrast()
,
make_venn_combn_df()
,
match_list()
,
modify_venndir_overlap()
,
nudge_venndir_label()
,
plot,Venndir,ANY-method
,
print_color_df()
,
shrink_df()
,
three_point_angle()
,
venndir_legender()
,
venndir_to_df()
Examples
## basic setlist without signed direction
setlist <- make_venn_test(n_items=100,
n_sets=3,
min_size=5,
max_size=25)
set_im <- list2im_opt(setlist);
table(jamba::pasteByRow(as.matrix(set_im)*1))
#>
#> 0_0_1 0_1_0 0_1_1 1_0_0 1_0_1 1_1_0 1_1_1
#> 9 12 5 10 3 5 1
## basic setlist with signed direction
setlist <- make_venn_test(n_items=100,
n_sets=3,
do_signed=TRUE)
jamba::sdim(setlist);
#> rows class
#> set_A 32 numeric
#> set_B 16 numeric
#> set_C 15 numeric
## some example overlap summaries
sv1 <- signed_overlaps(setlist=setlist, "overlap")
sv1
#> sets overlap num_sets count set_A set_B
#> set_A|1 0 0 set_A 1 0 0 1 16 1 0
#> set_B|0 1 0 set_B 0 1 0 1 7 0 1
#> set_C|0 0 1 set_C 0 0 1 1 7 0 0
#> set_A&set_B|1 1 0 set_A&set_B 1 1 0 2 8 1 1
#> set_A&set_C|1 0 1 set_A&set_C 1 0 1 2 7 1 0
#> set_B&set_C|0 1 1 set_B&set_C 0 1 1 2 0 0 1
#> set_A&set_B&set_C|1 1 1 set_A&set_B&set_C 1 1 1 3 1 1 1
#> set_C overlap_label
#> set_A|1 0 0 0 1
#> set_B|0 1 0 0 1
#> set_C|0 0 1 1 1
#> set_A&set_B|1 1 0 0 1 1
#> set_A&set_C|1 0 1 1 1 1
#> set_B&set_C|0 1 1 1 1 1
#> set_A&set_B&set_C|1 1 1 1 1 1 1
## Familiar named overlap counts
jamba::nameVector(sv1[,c("count","sets")])
#> set_A set_B set_C set_A&set_B
#> 16 7 7 8
#> set_A&set_C set_B&set_C set_A&set_B&set_C
#> 7 0 1
## directional count table for each combination
sv2 <- signed_overlaps(setlist=setlist, "each")
sv2
#> sets each overlap num_sets count set_A
#> set_A|-1 0 0 set_A -1 0 0 1 0 0 1 7 1
#> set_A|1 0 0 set_A 1 0 0 1 0 0 1 9 1
#> set_B|0 -1 0 set_B 0 -1 0 0 1 0 1 3 0
#> set_B|0 1 0 set_B 0 1 0 0 1 0 1 4 0
#> set_C|0 0 -1 set_C 0 0 -1 0 0 1 1 2 0
#> set_C|0 0 1 set_C 0 0 1 0 0 1 1 5 0
#> set_A&set_B|-1 -1 0 set_A&set_B -1 -1 0 1 1 0 2 3 1
#> set_A&set_B|1 -1 0 set_A&set_B 1 -1 0 1 1 0 2 2 1
#> set_A&set_B|1 1 0 set_A&set_B 1 1 0 1 1 0 2 3 1
#> set_A&set_C|-1 0 -1 set_A&set_C -1 0 -1 1 0 1 2 3 1
#> set_A&set_C|-1 0 1 set_A&set_C -1 0 1 1 0 1 2 1 1
#> set_A&set_C|1 0 -1 set_A&set_C 1 0 -1 1 0 1 2 1 1
#> set_A&set_C|1 0 1 set_A&set_C 1 0 1 1 0 1 2 2 1
#> set_B&set_C|0 1 1 set_B&set_C 0 1 1 0 1 1 2 0 0
#> set_A&set_B&set_C|1 1 1 set_A&set_B&set_C 1 1 1 1 1 1 3 1 1
#> set_B set_C overlap_label
#> set_A|-1 0 0 0 0 -1
#> set_A|1 0 0 0 0 1
#> set_B|0 -1 0 1 0 -1
#> set_B|0 1 0 1 0 1
#> set_C|0 0 -1 0 1 -1
#> set_C|0 0 1 0 1 1
#> set_A&set_B|-1 -1 0 1 0 -1 -1
#> set_A&set_B|1 -1 0 1 0 1 -1
#> set_A&set_B|1 1 0 1 0 1 1
#> set_A&set_C|-1 0 -1 0 1 -1 -1
#> set_A&set_C|-1 0 1 0 1 -1 1
#> set_A&set_C|1 0 -1 0 1 1 -1
#> set_A&set_C|1 0 1 0 1 1 1
#> set_B&set_C|0 1 1 1 1 1 1
#> set_A&set_B&set_C|1 1 1 1 1 1 1 1
## directional count table for agreement or mixed
sv3 <- signed_overlaps(setlist=setlist, "agreement")
sv3
#> sets agreement overlap num_sets count
#> set_A|agreement set_A agreement 1 0 0 1 16
#> set_B|agreement set_B agreement 0 1 0 1 7
#> set_C|agreement set_C agreement 0 0 1 1 7
#> set_A&set_B|agreement set_A&set_B agreement 1 1 0 2 6
#> set_A&set_B|mixed set_A&set_B mixed 1 1 0 2 2
#> set_A&set_C|agreement set_A&set_C agreement 1 0 1 2 5
#> set_A&set_C|mixed set_A&set_C mixed 1 0 1 2 2
#> set_B&set_C|0 1 1 set_B&set_C agreement 0 1 1 2 0
#> set_A&set_B&set_C|agreement set_A&set_B&set_C agreement 1 1 1 3 1
#> set_A set_B set_C overlap_label
#> set_A|agreement 1 0 0 agreement
#> set_B|agreement 0 1 0 agreement
#> set_C|agreement 0 0 1 agreement
#> set_A&set_B|agreement 1 1 0 agreement
#> set_A&set_B|mixed 1 1 0 mixed
#> set_A&set_C|agreement 1 0 1 agreement
#> set_A&set_C|mixed 1 0 1 mixed
#> set_B&set_C|0 1 1 0 1 1 agreement
#> set_A&set_B&set_C|agreement 1 1 1 agreement
## signed incidence matrix
imv <- list2im_value(setlist)
dim(imv)
#> [1] 46 3
head(imv)
#> set_A set_B set_C
#> item_067 -1 0 0
#> item_042 1 0 0
#> item_050 1 0 -1
#> item_043 -1 0 0
#> item_014 -1 0 0
#> item_025 1 -1 0
## text venn diagram
textvenn(setlist, overlap_type="overlap")
#> set_A&set_B
#> 8
#> set_A set_B
#> 16 7
#>
#> set_A&set_B&set_C
#> 1
#> set_A&set_C set_B&set_C
#> 7 0
#>
#>
#> set_C
#> 7
## text venn diagram with signed direction
textvenn(setlist, overlap_type="each")
#> set_A&set_B ↑↑: 3
#> 8 ↑↓: 2
#> set_A ↑: 9 ↓↓: 3 set_B ↑: 4
#> 16 ↓: 7 7 ↓: 3
#>
#> set_A&set_B&set_C ↑↑↑: 1
#> 1
#> set_A&set_C ↑↑: 2 set_B&set_C ↑↑: 0
#> 7 ↑↓: 1 0
#> ↓↑: 1
#> ↓↓: 3
#> set_C ↑: 5
#> 7 ↓: 2