forked from ssdeep-project/ssdeep
-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.cpp
340 lines (269 loc) · 9.09 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
// Fuzzy Hashing by Jesse Kornblum
// Copyright (C) 2013 Facebook
// Copyright (C) 2012 Kyrus
// Copyright (C) 2010 ManTech International Corporation
//
// $Id$
//
// This program is licensed under version 2 of the GNU Public License.
// See the file COPYING for details.
#include "ssdeep.h"
#include "match.h"
#ifdef _WIN32
// This can't go in main.h or we get multiple definitions of it
// Allows us to open standard input in binary mode by default
// See http://gnuwin32.sourceforge.net/compile.html for more
int _CRT_fmode = _O_BINARY;
#endif
static bool initialize_state(state *s)
{
if (NULL == s)
return true;
s->mode = mode_none;
s->first_file_processed = true;
s->found_meaningful_file = false;
s->processed_file = false;
s->threshold = 0;
return false;
}
// In order to fit on one Win32 screen this function should produce
// no more than 22 lines of output.
static void usage(void)
{
print_status ("%s version %s by Jesse Kornblum and the ssdeep Project", __progname, VERSION);
print_status ("For copyright information, see man page or README.TXT.");
print_status ("");
print_status ("Usage: %s [-m file] [-k file] [-dpgvrsblcxa] [-t val] [-h|-V] [FILES]",
__progname);
print_status ("-m - Match FILES against known hashes in file");
print_status ("-k - Match signatures in FILES against signatures in file");
print_status ("-d - Directory mode, compare all files in a directory");
print_status ("-p - Pretty matching mode. Similar to -d but includes all matches");
print_status ("-g - Cluster matches together");
print_status ("-v - Verbose mode. Displays filename as its being processed");
print_status ("-r - Recursive mode");
print_status ("-s - Silent mode; all errors are suppressed");
print_status ("-b - Uses only the bare name of files; all path information omitted");
print_status ("-l - Uses relative paths for filenames");
print_status ("-c - Prints output in CSV format");
print_status ("-x - Compare FILES as signature files");
print_status ("-a - Display all matches, regardless of score");
print_status ("-t - Only displays matches above the given threshold");
print_status ("-h - Display this help message");
print_status ("-V - Display version number and exit");
}
static void process_cmd_line(state *s, int argc, char **argv)
{
int i, match_files_loaded = FALSE;
while ((i=getopt(argc,argv,"gavhVpdsblcxt:rm:k:")) != -1) {
switch(i) {
case 'g':
s->mode |= mode_cluster;
break;
case 'a':
s->mode |= mode_display_all;
break;
case 'v':
if (MODE(mode_verbose))
{
print_error(s,"%s: Already at maximum verbosity", __progname);
print_error(s,
"%s: Error message displayed to user correctly",
__progname);
}
else
s->mode |= mode_verbose;
break;
case 'p':
s->mode |= mode_match_pretty;
break;
case 'd':
s->mode |= mode_directory;
break;
case 's':
s->mode |= mode_silent; break;
case 'b':
s->mode |= mode_barename; break;
case 'l':
s->mode |= mode_relative; break;
case 'c':
s->mode |= mode_csv; break;
case 'x':
s->mode |= mode_sigcompare; break;
case 'r':
s->mode |= mode_recursive; break;
case 't':
s->threshold = (uint8_t)atol(optarg);
if (s->threshold > 100)
fatal_error("%s: Illegal threshold", __progname);
s->mode |= mode_threshold;
break;
case 'm':
if (MODE(mode_compare_unknown) || MODE(mode_sigcompare))
fatal_error("Positive matching cannot be combined with other matching modes");
s->mode |= mode_match;
if (!match_load(s,optarg))
match_files_loaded = TRUE;
break;
case 'k':
if (MODE(mode_match) || MODE(mode_sigcompare))
fatal_error("Signature matching cannot be combined with other matching modes");
s->mode |= mode_compare_unknown;
if (!match_load(s,optarg))
match_files_loaded = TRUE;
break;
case 'h':
usage();
exit (EXIT_SUCCESS);
case 'V':
print_status ("%s", VERSION);
exit (EXIT_SUCCESS);
default:
try_msg();
exit (EXIT_FAILURE);
}
}
// We don't include mode_sigcompare in this list as we haven't loaded
// the matching files yet. In that mode the matching files are in fact
// the command line arguments.
sanity_check(s,
((MODE(mode_match) || MODE(mode_compare_unknown))
&& !match_files_loaded),
"No matching files loaded");
sanity_check(s,
((s->mode & mode_barename) && (s->mode & mode_relative)),
"Relative paths and bare names are mutually exclusive");
sanity_check(s,
((s->mode & mode_match_pretty) && (s->mode & mode_directory)),
"Directory mode and pretty matching are mutually exclusive");
sanity_check(s,
MODE(mode_csv) && MODE(mode_cluster),
"CSV and clustering modes cannot be combined");
// -m, -p, and -d are incompatible with -k and -x
// The former treat FILES as raw files. The latter require them to be sigs
sanity_check(s,
((MODE(mode_match) || MODE(mode_match_pretty) || MODE(mode_directory))
&&
(MODE(mode_compare_unknown) || MODE(mode_sigcompare))),
"Incompatible matching modes");
}
#ifdef _WIN32
static int prepare_windows_command_line(state *s)
{
int argc;
TCHAR **argv;
argv = CommandLineToArgvW(GetCommandLineW(),&argc);
s->argc = argc;
s->argv = argv;
return FALSE;
}
#endif
static int is_absolute_path(TCHAR *fn)
{
if (NULL == fn)
internal_error("Unknown error in is_absolute_path");
#ifdef _WIN32
return (isalpha(fn[0]) && _TEXT(':') == fn[1]);
# else
return (DIR_SEPARATOR == fn[0]);
#endif
}
static void generate_filename(state *s, TCHAR *fn, TCHAR *cwd, TCHAR *input)
{
if (NULL == fn || NULL == input)
internal_error("Error calling generate_filename");
if ((s->mode & mode_relative) || is_absolute_path(input))
_tcsncpy(fn, input, SSDEEP_PATH_MAX);
else {
// Windows systems don't have symbolic links, so we don't
// have to worry about carefully preserving the paths
// they follow. Just use the system command to resolve the paths
#ifdef _WIN32
_wfullpath(fn, input, SSDEEP_PATH_MAX);
#else
if (NULL == cwd)
// If we can't get the current working directory, we're not
// going to be able to build the relative path to this file anyway.
// So we just call realpath and make the best of things
realpath(input, fn);
else
snprintf(fn, SSDEEP_PATH_MAX, "%s%c%s", cwd, DIR_SEPARATOR, input);
#endif
}
}
int main(int argc, char **argv)
{
int count, status, goal = argc;
state *s;
TCHAR *fn, *cwd;
#ifndef __GLIBC__
// __progname = basename(argv[0]);
#endif
s = new state;
if (initialize_state(s))
fatal_error("%s: Unable to initialize state variable", __progname);
process_cmd_line(s,argc,argv);
#ifdef _WIN32
if (prepare_windows_command_line(s))
fatal_error("%s: Unable to process command line arguments", __progname);
#else
s->argc = argc;
s->argv = argv;
#endif
// Anything left on the command line at this point is a file
// or directory we're supposed to process. If there's nothing
// specified, we should tackle standard input
if (optind == argc) {
status = process_stdin(s);
}
else {
MD5DEEP_ALLOC(TCHAR, fn, SSDEEP_PATH_MAX);
MD5DEEP_ALLOC(TCHAR, cwd, SSDEEP_PATH_MAX);
cwd = _tgetcwd(cwd, SSDEEP_PATH_MAX);
if (NULL == cwd)
fatal_error("%s: %s", __progname, strerror(errno));
count = optind;
// The signature comparsion mode needs to use the command line
// arguments and argument count. We don't do wildcard expansion
// on it on Win32 (i.e. where it matters). The setting of 'goal'
// to the original argc occured at the start of main(), so we just
// need to update it if we're *not* in signature compare mode.
if (!(s->mode & mode_sigcompare)) {
goal = s->argc;
}
while (count < goal)
{
if (MODE(mode_sigcompare))
match_load(s,argv[count]);
else if (MODE(mode_compare_unknown))
match_compare_unknown(s,argv[count]);
else {
generate_filename(s, fn, cwd, s->argv[count]);
#ifdef _WIN32
status = process_win32(s, fn);
#else
status = process_normal(s, fn);
#endif
}
++count;
}
// If we processed files, but didn't find anything large enough
// to be meaningful, we should display a warning message to the user.
// This happens mostly when people are testing very small files
// e.g. $ echo "hello world" > foo && ssdeep foo
if (!s->found_meaningful_file && s->processed_file)
{
print_error(s,"%s: Did not process files large enough to produce meaningful results", __progname);
}
}
// If the user has requested us to compare signature files, use
// our existng code to pretty-print directory matching to do the
// work for us.
if (MODE(mode_sigcompare))
s->mode |= mode_match_pretty;
if (MODE(mode_match_pretty) || MODE(mode_sigcompare) || MODE(mode_cluster))
find_matches_in_known(s);
if (MODE(mode_cluster))
display_clusters(s);
return (EXIT_SUCCESS);
}