-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcluster_dbscan_kmer.c
71 lines (55 loc) · 1.5 KB
/
cluster_dbscan_kmer.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<ctype.h>
#include<unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include"dataset.h"
#include"cluster.h"
#include"dbscan.h"
void file_error(char* path) {
printf("failed to open file %s\n",path);
_exit(1);
}
int main(int argc, char** argv) {
dataset ds;
float epsilon;
int minpts;
data_shape shape;
FILE* fasta_f;
int kmer_fd;
FILE* kmer_f;
split_set set_of_clusters;
if(argc < 5) {
printf("Arguments are: \n"
" [file] Sequences in FASTA \n"
" [file] Corresponding kmers \n"
" Epsilon \n"
" minPoints \n"
" output-files prefix \n" );
return(1);
}
sscanf(argv[3], "%f", &epsilon);
sscanf(argv[4], "%i", &minpts);
if ( -1 == (kmer_fd = open(argv[2], O_RDONLY))) file_error(argv[2]);
shape = shape_from_kmer_file(kmer_fd);
kmer_f = fdopen(kmer_fd,"r");
ds = load_kmer_from_file_into_dataset(kmer_f, shape);
fclose(kmer_f);
#if defined(_CLUSTER_KMER_L1)
set_of_clusters = dbscan_L1(ds, epsilon, minpts);
#elif defined(_CLUSTER_KMER_L2)
set_of_clusters = dbscan_L2(ds, epsilon, minpts);
#endif
free_values_from_dataset(ds);
printf("%u clusters obtained \n", set_of_clusters.n_clusters);
if ( NULL == (fasta_f = fopen(argv[1], "r"))) file_error(argv[1]);
ds = dataset_from_fasta(fasta_f);
fclose(fasta_f);
if (set_of_clusters.n_clusters < 500) {
create_cluster_files(argv[5], set_of_clusters, ds);
}
free_sequences_from_dataset(ds);
}