-
Notifications
You must be signed in to change notification settings - Fork 1
/
datastructs.c
176 lines (154 loc) · 5.69 KB
/
datastructs.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "datastructs.h"
/*------------------------------TokenList-------------------------------------*/
/*
Creates and returns the pointer to a new TokenList object, and sets the frequency and
word accordingly strcpy() is used to ensure changes to the parameter word will not
affect new_token->word and to avoid double free()ing
*/
TokenList *new_toklist(double freq, const char *word) {
TokenList *new_token = malloc(sizeof(TokenList));
new_token->freq = freq;
new_token->word = malloc(strlen(word)+1);
strcpy(new_token->word, word);
new_token->next = NULL;
return new_token;
}
/*
Frees all tokens and token->words for the given head of a TokenList linked list
*/
void free_toklist(TokenList *tl) {
TokenList *p = tl;
TokenList *tmp;
while (p != NULL) {
tmp = p->next;
free(p->word);
free(p);
p = tmp;
}
}
/*
Creates and inserts new TokenList instance in linked list corresponding to the token_list parameter double pointer
Sets word and frequency accordingly, while placing TokenList such that the linked list remains in alphabetical
order. Can be called repeatedly to create a TokenList linked list to store tokens for a file
*/
void insert_word (TokenList **token_list, const char *word, double freq) {
TokenList *new_token = new_toklist(freq, word);
TokenList *curr = *token_list;
if (curr == NULL || strcmp(word, curr->word) < 0) {
new_token->next = curr;
*token_list = new_token;
return;
}
while (curr->next != NULL) {
// inserts alphabetically by comparing the word parameter with the word of the next struct in the TokenList LL
if (strcmp(word, (curr->next)->word) < 0) {
break;
}
curr = curr->next;
}
new_token->next = curr->next;
curr->next = new_token;
}
/*------------------------------FileList-------------------------------------*/
/*
Creates and returns the pointer to a new FileList object, and sets the total token count,
TokenList linked list head and file name accordingly. strcpy() is used to ensure changes
to the parameter word will not affect new_token->word and to avoid double free()ing
*/
FileList *new_filelist(TokenList *token_list, const char *file_name, int total) {
FileList *new_list = malloc(sizeof(FileList));
new_list->token_list = token_list;
new_list->file_name = malloc(strlen(file_name)+1);
strcpy(new_list->file_name, file_name);
new_list->total = total;
new_list->next = NULL;
return new_list;
}
/*
Frees all tokens and token->words for the given head of a FileList linked list
*/
void free_filelist(FileList *fl) {
FileList *p = fl;
FileList *tmp;
while (p != NULL) {
tmp = p->next;
free(p->file_name);
free_toklist(p->token_list);
free(p);
p = tmp;
}
}
/*
Creates and inserts new FileList instance in linked list corresponding to the file_list parameter double pointer
Sets word, TokenList linked list and frequency accordingly, while adding the new FileList struct to the linked list.
Can be called repeatedly to create a TokenList linked list to store tokens for a file
*/
void insert_file(FileList **file_list, TokenList *token_list, const char *file_name, int total) {
FileList *new_token = new_filelist(token_list, file_name, total);
FileList *list = *file_list;
// first element
if (list == NULL) {
*file_list = new_token;
return;
}
while (list->next != NULL) {
list = list->next;
}
list->next = new_token;
}
/*------------------------------OutputList-------------------------------------*/
/*
Creates and returns a pointer to a new OutputList object for the given pair of files and their calculated
Jensen distance. malloc and strcpy file_names for each file so that double freeing will not occur.
*/
OutputList *new_outlist(FileList *f1, FileList *f2, double jensen) {
OutputList *new_list = (OutputList*)malloc(sizeof(OutputList));
new_list->file1 = malloc(strlen(f1->file_name)+1);
strcpy(new_list->file1, f1->file_name);
new_list->file2 = malloc(strlen(f2->file_name)+1);
strcpy(new_list->file2, f2->file_name);
new_list->sum = f1->total + f2->total;
new_list->jensen = jensen;
new_list->next = NULL;
return new_list;
}
/*
Given OutputList linked list head, free all OutputLists in the linked list to avoid memory leaks
*/
void free_output(OutputList *ol) {
OutputList *p = ol;
OutputList *tmp;
while (p != NULL) {
tmp = p->next;
free(p->file1);
free(p->file2);
free(p);
p = tmp;
}
}
/*
Inserts a new OutputList for the given file parameters and Jensen distance to the OutputList link list parameter.
Uses insertion sort to keep the output_list sorted from least number of total tokens to most number of total tokens.
*/
void insert_output (OutputList **output_list, FileList *f1, FileList *f2, double jensen) {
OutputList *curr = *output_list;
OutputList *new_list = new_outlist(f1, f2, jensen);
// check if new_list belongs in beginning of LL
if (curr == NULL || new_list->sum < curr->sum) {
new_list->next = curr;
*output_list = new_list;
return;
}
//insertion sort from smallest to greatest total number of tokens between the two files
while (curr->next != NULL) {
if (new_list->sum < curr->next->sum) {
break;
}
curr = curr->next;
}
new_list->next = curr->next;
curr->next = new_list;
}