forked from facebookresearch/EGG
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_discriminative_dataset.pl
executable file
·109 lines (81 loc) · 3.15 KB
/
generate_discriminative_dataset.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/perl -w
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# This script generates a data set in the format expected by EGG's
# basic_games discrimination game.
# Note that the script only generates distinct input tuples. More
# precisely, no input item *set* is ever repeated (e.g., if one input
# tuple contains (A, B, C), no other input line will be made of the
# same items, e.g., no (B, A, C) or (C, B, A) is permitted to occur in
# the same data set).
# Usage:
# perl -w N_INPUTS N_ATTRIBUTES N_VALUES N_ITEMS > discri_dataset.txt
# where
# N_INPUTS specifies the number of input tuples we are requesting,
# N_ATTRIBUTES specifies the number of attributes of each item,
# N_VALUES specifies the number of possible values for each attribute and
# N_ITEMS specifies how many items there are in a tuple (target+distractors).
# Note that, in the output, the requested tuples will be followed by a
# random index pointing to the position of the target (counting from
# 0).
# Note also that 0 is a possible value, thus, if N values are
# requested, the highest possible integer observed in the output will
# be N-1
# taken from: https://stackoverflow.com/questions/4736626/how-can-i-generate-all-ordered-combinations-of-length-k-in-perl
sub ordered_combinations
{
my ($data, $k) = @_;
return @$data if $k == 1;
my @previous = ordered_combinations($data, $k-1);
my @results;
for my $symbol (@$data) {
push @results, map { "$symbol " . $_ } @previous;
}
return @results;
}
# shuffle( \@array ) : generate a random permutation
# of @array in place
# from https://www.oreilly.com/library/view/perl-cookbook/1565922433/ch04s18.html
sub shuffle {
my $array = shift;
my $i;
for ($i = @$array; --$i; ) {
my $j = int rand ($i+1);
next if $i == $j;
@$array[$i,$j] = @$array[$j,$i];
}
}
# from: https://rosettacode.org/wiki/Evaluate_binomial_coefficients#Perl
sub binom {
use bigint;
my($n,$k) = @_;
(0+$n)->bnok($k);
}
$n_distinct_samples = shift;
$item_length = shift;
$n_values = shift;
$n_items = shift;
# sanity check
$possible_distinct_items_count = $n_values**$item_length;
$max_samples_count = binom($possible_distinct_items_count,$n_items);
if ($n_distinct_samples>$max_samples_count) {
print "with $n_items distinct items of length $item_length with $n_values values, I can maximally generate $max_samples_count samples\n";
exit;
}
$n_values--;
@possible_items = (0..$n_values);
@all_distinct_combinations = ordered_combinations(\@possible_items,$item_length);
$current_sample_n = 0;
while ($current_sample_n<$n_distinct_samples) {
@random_indices = (0..$#all_distinct_combinations);
shuffle(\@random_indices);
$sorted_selected_indices = join " ",(sort(@random_indices[0..$n_items-1]));
if (!$seen_items{$sorted_selected_indices}) {
@selected_items = @all_distinct_combinations[@random_indices[0..$n_items-1]];
$target = int(rand($n_items));
print join(" . ",(@selected_items,$target)),"\n";
$seen_items{$sorted_selected_indices} = 1;
$current_sample_n++;
}
}