-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreadFile.c
71 lines (64 loc) · 1.89 KB
/
readFile.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "wordCounter.h"
GramBT *readFileBT(char *file_name, GramBT *gramBT, int K) {
FILE *fp;
char *line = NULL;
size_t len = 0;
ssize_t read;
char word[MAX_WORD];
char wordm1[MAX_WORD];
char wordm2[MAX_WORD];
char wordm3[MAX_WORD];
int lines_counter = 0;
fp = fopen(file_name, "r");
if (fp == NULL)
exit(EXIT_FAILURE);
GramBT **gram_added = malloc(sizeof(GramBT*));
GramBT **gramm1_added = malloc(sizeof(GramBT*));
GramBT **gramm2_added = malloc(sizeof(GramBT*));
GramBT **gramm3_added = malloc(sizeof(GramBT*));
while ((read = getline(&line, &len, fp)) != -1) {
if ((rand() % K) == 0) {
if (lines_counter % 10000 == 0) printf("line: %d\n", lines_counter);
wordm1[0] = '\0';
wordm2[0] = '\0';
wordm3[0] = '\0';
int j = 0;
for (int i = 0; i < read; i++) {
if (line[i] != 13) {
if (line[i] == ' ' || line[i] == '\n') {
if (j != 0) {
word[j] = '\0';
if (wordm3[0] != '\0') {
(*gramm2_added)->next =
addWordBT((*gramm2_added)->next, word, gramm3_added);
}
if (wordm2[0] != '\0') {
strcpy(wordm3, wordm2);
(*gramm1_added)->next =
addWordBT((*gramm1_added)->next, word, gramm2_added);
}
if (wordm1[0] != '\0') {
strcpy(wordm2, wordm1);
(*gram_added)->next =
addWordBT((*gram_added)->next, word, gramm1_added);
}
gramBT = addWordBT(gramBT, word, gram_added);
strcpy(wordm1, word);
j = 0;
}
} else {
word[j++] = line[i];
}
}
}
lines_counter++;
}
}
fclose(fp);
if (line)
free(line);
return gramBT;
}