forked from blei-lab/hlda
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.c
executable file
·91 lines (80 loc) · 2.16 KB
/
main.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#include "utils.h"
#include "typedefs.h"
#include "doc.h"
#include "topic.h"
#include "gibbs.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#define MAX_ITER 10000
#define TEST_LAG 100
#define NRESTARTS 1
// simple gibbs sampling on a data set
void main_gibbs(int ac, char* av[])
{
assert(ac == 5);
char* corpus = av[2];
char* settings = av[3];
char* out_dir = av[4];
int restart;
for (restart = 0; restart < NRESTARTS; restart++)
{
gibbs_state* state =
init_gibbs_state_w_rep(corpus, settings, out_dir);
int iter;
for (iter = 0; iter < MAX_ITER; iter++)
{
iterate_gibbs_state(state);
}
free_gibbs_state(state);
}
}
void main_heldout(int ac, char* av[])
{
assert(ac == 6);
char* train = av[2];
char* test = av[3];
char* settings = av[4];
char* out_dir = av[5];
gibbs_state* state = init_gibbs_state_w_rep(train, settings, out_dir);
corpus* heldout_corp = corpus_new(state->corp->gem_mean,
state->corp->gem_scale);
read_corpus(test, heldout_corp, state->tr->depth);
char filename[100];
sprintf(filename, "%s/test.dat", state->run_dir);
FILE* test_log = fopen(filename, "w");
int iter;
for (iter = 0; iter < MAX_ITER; iter++)
{
iterate_gibbs_state(state);
if ((state->iter % TEST_LAG) == 0)
{
double score = mean_heldout_score(heldout_corp, state,
200, 1, 1000);
fprintf(test_log, "%04d %10.3f %d\n",
state->iter, score, ntopics_in_tree(state->tr));
fflush(test_log);
}
}
fclose(test_log);
}
int main(int ac, char* av[])
{
if (ac > 1)
{
if (strcmp(av[1], "gibbs") == 0)
{
main_gibbs(ac, av);
return(0);
}
else if (strcmp(av[1], "heldout") == 0)
{
main_heldout(ac, av);
return(0);
}
}
outlog("USAGE: ./main gibbs corpus settings out");
outlog(" ./main heldout train test settings out");
return(0);
}