-
Notifications
You must be signed in to change notification settings - Fork 189
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
pendigits analog of test.c
#12
Open
rjp
wants to merge
2
commits into
glouw:master
Choose a base branch
from
rjp:pendigits
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,200 @@ | ||
#include "Tinn.h" | ||
#include <stdio.h> | ||
#include <time.h> | ||
#include <string.h> | ||
#include <stdlib.h> | ||
|
||
typedef struct | ||
{ | ||
float** in; | ||
float** tg; | ||
int nips; | ||
int nops; | ||
int rows; | ||
} | ||
Data; | ||
|
||
typedef struct { | ||
int k; | ||
float tg; | ||
float pd; | ||
} pos; | ||
|
||
void output_svg(int j, Data data, int realnum, float pcage, int goodbad) | ||
{ | ||
printf("ln -s %05d-%d.svg %s/\n", j, realnum, goodbad ? "good" : "bad"); | ||
} | ||
|
||
static int lns(FILE* const file) | ||
{ | ||
int ch = EOF; | ||
int lines = 0; | ||
int pc = '\n'; | ||
while((ch = getc(file)) != EOF) | ||
{ | ||
if(ch == '\n') | ||
lines++; | ||
pc = ch; | ||
} | ||
if(pc != '\n') | ||
lines++; | ||
rewind(file); | ||
return lines; | ||
} | ||
|
||
static char* readln(FILE* const file) | ||
{ | ||
int ch = EOF; | ||
int reads = 0; | ||
int size = 128; | ||
char* line = (char*) malloc((size) * sizeof(char)); | ||
while((ch = getc(file)) != '\n' && ch != EOF) | ||
{ | ||
line[reads++] = ch; | ||
if(reads + 1 == size) | ||
line = (char*) realloc((line), (size *= 2) * sizeof(char)); | ||
} | ||
line[reads] = '\0'; | ||
return line; | ||
} | ||
|
||
static float** new2d(const int rows, const int cols) | ||
{ | ||
float** row = (float**) malloc((rows) * sizeof(float*)); | ||
for(int r = 0; r < rows; r++) | ||
row[r] = (float*) malloc((cols) * sizeof(float)); | ||
return row; | ||
} | ||
|
||
static Data ndata(const int nips, const int nops, const int rows) | ||
{ | ||
const Data data = { | ||
new2d(rows, nips), new2d(rows, nops), nips, nops, rows | ||
}; | ||
return data; | ||
} | ||
|
||
static void parse(const Data data, char* line, const int row) | ||
{ | ||
for(int col = 0; col < data.nips; col++) | ||
{ | ||
const float val = atof(strtok(col == 0 ? line : NULL, ", ")); | ||
data.in[row][col] = val/100.0; | ||
} | ||
const float val = atof(strtok(NULL, ", ")); | ||
for(int col = 0; col < data.nops; col++) { | ||
data.tg[row][col] = 0.0; | ||
} | ||
data.tg[row][(int)val] = 1.0; | ||
} | ||
|
||
static void dfree(const Data d) | ||
{ | ||
for(int row = 0; row < d.rows; row++) | ||
{ | ||
free(d.in[row]); | ||
free(d.tg[row]); | ||
} | ||
free(d.in); | ||
free(d.tg); | ||
} | ||
|
||
static void shuffle(const Data d) | ||
{ | ||
for(int a = 0; a < d.rows; a++) | ||
{ | ||
const int b = rand() % d.rows; | ||
float* ot = d.tg[a]; | ||
float* it = d.in[a]; | ||
// Swap output. | ||
d.tg[a] = d.tg[b]; | ||
d.tg[b] = ot; | ||
// Swap input. | ||
d.in[a] = d.in[b]; | ||
d.in[b] = it; | ||
} | ||
} | ||
|
||
static Data build(const char* path, const int nips, const int nops) | ||
{ | ||
FILE* file = fopen(path, "r"); | ||
if(file == NULL) | ||
{ | ||
printf("Could not open %s\n", path); | ||
printf("Get it from the machine learning database: "); | ||
printf("wget http://archive.ics.uci.edu/ml/machine-learning-databases/semeion/semeion.data\n"); | ||
exit(1); | ||
} | ||
const int rows = lns(file); | ||
Data data = ndata(nips, nops, rows); | ||
for(int row = 0; row < rows; row++) | ||
{ | ||
char* line = readln(file); | ||
parse(data, line, row); | ||
free(line); | ||
} | ||
fclose(file); | ||
return data; | ||
} | ||
|
||
int sort_by_pd(const void *a, const void *b) { | ||
pos x = *(pos*)a; | ||
pos y = *(pos*)b; | ||
if (x.pd > y.pd) { return -1; } | ||
if (x.pd < y.pd) { return +1; } | ||
return 0; | ||
} | ||
|
||
int main() | ||
{ | ||
// Tinn does not seed the random number generator. | ||
srand(time(0)); | ||
// Input and output size is harded coded here as machine learning | ||
// repositories usually don't include the input and output size in the data itself. | ||
const int nips = 16; | ||
const int nops = 10; | ||
// Load the training set. | ||
const Data data = build("pendigits.tes", nips, nops); | ||
// This is how you load the neural network from disk. | ||
const Tinn loaded = xtload("saved.tinn"); | ||
pos check[nops]; | ||
int correct = 0; | ||
|
||
// Now we do a prediction with the neural network we loaded from disk. | ||
for (int j = 0; j < data.rows; j++) { | ||
const float* const in = data.in[j]; | ||
const float* const tg = data.tg[j]; | ||
const float* const pd = xtpredict(loaded, in); | ||
// To find the "best match", we need to sort by probability (`pd`) | ||
// whilst keeping the target (`tg`) aligned. Copying them into | ||
// our struct and then `qsort`ing on `pd` satisfies this. | ||
for(int i = 0; i < data.nops; i++) { | ||
check[i].k = i; | ||
check[i].tg = tg[i]; | ||
check[i].pd = pd[i]; | ||
} | ||
qsort(check, data.nops, sizeof(pos), sort_by_pd); | ||
// If the highest probability guess is the correct one, success. | ||
if (check[0].tg == 1) { | ||
correct++; | ||
} | ||
// Otherwise we print out our best guess and the correct answer. | ||
else { | ||
int realnum = -1; | ||
printf("%05d %d %.5f | ", j, check[0].k, (double) check[0].pd); | ||
for (int i=1; i < data.nops; i++) { | ||
if (check[i].tg == 1) { | ||
printf("%d %.5f", check[i].k, (double) check[i].pd); | ||
realnum = i; | ||
} | ||
} | ||
printf("\n"); | ||
} | ||
} | ||
// | ||
printf("%d correct out of %d rows\n", correct, data.rows); | ||
// All done. Let's clean up. | ||
xtfree(loaded); | ||
dfree(data); | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
#include "Tinn.h" | ||
#include <stdio.h> | ||
#include <time.h> | ||
#include <string.h> | ||
#include <stdlib.h> | ||
|
||
typedef struct | ||
{ | ||
float** in; | ||
float** tg; | ||
int nips; | ||
int nops; | ||
int rows; | ||
} | ||
Data; | ||
|
||
static int lns(FILE* const file) | ||
{ | ||
int ch = EOF; | ||
int lines = 0; | ||
int pc = '\n'; | ||
while((ch = getc(file)) != EOF) | ||
{ | ||
if(ch == '\n') | ||
lines++; | ||
pc = ch; | ||
} | ||
if(pc != '\n') | ||
lines++; | ||
rewind(file); | ||
return lines; | ||
} | ||
|
||
static char* readln(FILE* const file) | ||
{ | ||
int ch = EOF; | ||
int reads = 0; | ||
int size = 128; | ||
char* line = (char*) malloc((size) * sizeof(char)); | ||
while((ch = getc(file)) != '\n' && ch != EOF) | ||
{ | ||
line[reads++] = ch; | ||
if(reads + 1 == size) | ||
line = (char*) realloc((line), (size *= 2) * sizeof(char)); | ||
} | ||
line[reads] = '\0'; | ||
return line; | ||
} | ||
|
||
static float** new2d(const int rows, const int cols) | ||
{ | ||
float** row = (float**) malloc((rows) * sizeof(float*)); | ||
for(int r = 0; r < rows; r++) | ||
row[r] = (float*) malloc((cols) * sizeof(float)); | ||
return row; | ||
} | ||
|
||
static Data ndata(const int nips, const int nops, const int rows) | ||
{ | ||
const Data data = { | ||
new2d(rows, nips), new2d(rows, nops), nips, nops, rows | ||
}; | ||
return data; | ||
} | ||
|
||
static void parse(const Data data, char* line, const int row) | ||
{ | ||
for(int col = 0; col < data.nips; col++) | ||
{ | ||
const float val = atof(strtok(col == 0 ? line : NULL, ", ")); | ||
/* Input values are 0-100 pixel coordinates; scale to 0.0-1.0 */ | ||
data.in[row][col] = val / 100.0; | ||
} | ||
/* Last value is a 0-9 numeral which we need to convert | ||
* into a size 10 vector of {0.00, 1.00} | ||
*/ | ||
const float val = atof(strtok(NULL, ", ")); | ||
for(int col = 0; col < data.nops; col++) { | ||
data.tg[row][col] = 0.0; | ||
} | ||
data.tg[row][(int)val] = 1.0; | ||
} | ||
|
||
static void dfree(const Data d) | ||
{ | ||
for(int row = 0; row < d.rows; row++) | ||
{ | ||
free(d.in[row]); | ||
free(d.tg[row]); | ||
} | ||
free(d.in); | ||
free(d.tg); | ||
} | ||
|
||
static void shuffle(const Data d) | ||
{ | ||
for(int a = 0; a < d.rows; a++) | ||
{ | ||
const int b = rand() % d.rows; | ||
float* ot = d.tg[a]; | ||
float* it = d.in[a]; | ||
// Swap output. | ||
d.tg[a] = d.tg[b]; | ||
d.tg[b] = ot; | ||
// Swap input. | ||
d.in[a] = d.in[b]; | ||
d.in[b] = it; | ||
} | ||
} | ||
|
||
static Data build(const char* path, const int nips, const int nops) | ||
{ | ||
FILE* file = fopen(path, "r"); | ||
if(file == NULL) | ||
{ | ||
printf("Could not open %s\n", path); | ||
printf("Get it from the machine learning database: "); | ||
printf("wget http://archive.ics.uci.edu/ml/machine-learning-databases/pendigits/pendigits.tra\n"); | ||
exit(1); | ||
} | ||
const int rows = lns(file); | ||
Data data = ndata(nips, nops, rows); | ||
for(int row = 0; row < rows; row++) | ||
{ | ||
char* line = readln(file); | ||
parse(data, line, row); | ||
free(line); | ||
} | ||
fclose(file); | ||
return data; | ||
} | ||
|
||
int main() | ||
{ | ||
// Tinn does not seed the random number generator. | ||
srand(time(0)); | ||
// Input and output size is harded coded here as machine learning | ||
// repositories usually don't include the input and output size in the data itself. | ||
const int nips = 16; | ||
const int nops = 10; | ||
// Hyper Parameters. | ||
// Learning rate is annealed and thus not constant. | ||
// It can be fine tuned along with the number of hidden layers. | ||
// Feel free to modify the anneal rate. | ||
// The number of iterations can be changed for stronger training. | ||
float rate = 1.0f; | ||
const int nhid = 28; | ||
const float anneal = 0.99f; | ||
const int iterations = 128; | ||
// Load the training set. | ||
const Data data = build("pendigits.tra", nips, nops); | ||
// Train, baby, train. | ||
const Tinn tinn = xtbuild(nips, nhid, nops); | ||
for(int i = 0; i < iterations; i++) | ||
{ | ||
shuffle(data); | ||
float error = 0.0f; | ||
for(int j = 0; j < data.rows; j++) | ||
{ | ||
const float* const in = data.in[j]; | ||
const float* const tg = data.tg[j]; | ||
error += xttrain(tinn, in, tg, rate); | ||
} | ||
printf("error %.12f :: learning rate %f\n", | ||
(double) error / data.rows, | ||
(double) rate); | ||
rate *= anneal; | ||
} | ||
// This is how you save the neural network to disk. | ||
xtsave(tinn, "saved.tinn"); | ||
xtfree(tinn); | ||
// This is how you load the neural network from disk. | ||
const Tinn loaded = xtload("saved.tinn"); | ||
// Now we do a prediction with the neural network we loaded from disk. | ||
// Ideally, we would also load a testing set to make the prediction with, | ||
// but for the sake of brevity here we just reuse the training set from earlier. | ||
// One data set is picked at random. | ||
const int pick = rand() % data.rows; | ||
const float* const in = data.in[pick]; | ||
const float* const tg = data.tg[pick]; | ||
const float* const pd = xtpredict(loaded, in); | ||
xtprint(tg, data.nops); | ||
xtprint(pd, data.nops); | ||
// All done. Let's clean up. | ||
xtfree(loaded); | ||
dfree(data); | ||
return 0; | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it wrog url