-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathunparse.c
138 lines (127 loc) · 4.06 KB
/
unparse.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <fcntl.h>
#include <stdint.h>
#include <unistd.h>
#include <stdbool.h>
#include <time.h>
#include <sys/mman.h>
#include "utils.h"
// -------------------------------------------------------------
// struct containing command line parameters and other globals
typedef struct {
char *basename;
char *outname;
} Args;
static void print_help(char *name)
{
printf("Usage: %s <basename> [options]\n\n", name);
puts("Restore the original file given a prefix free parse (files .dict and .parse)");
puts(" Options:");
puts("\t-o outfile output file (def. <basename>.out)");
puts("\t-w wsize window size (def. 10)");
puts("\t-h show help and exit");
exit(1);
}
static void parseArgs(int argc, char** argv, Args *arg ) {
extern int optind, opterr, optopt;
extern char *optarg;
int c;
puts("==== Command line:");
for(int i=0;i<argc;i++)
printf(" %s",argv[i]);
puts("\n");
arg->outname = NULL;
while ((c = getopt( argc, argv, "ho:") ) != -1) {
switch(c) {
case 'o':
arg->outname = strdup(optarg); break;
case 'h':
print_help(argv[0]); exit(1);
case '?':
puts("Unknown option. Use -h for help.");
exit(1);
}
}
// read base name as the only non-option parameter
if (argc!=optind+1)
print_help(argv[0]);
arg->basename = strdup(argv[optind]);
// if not given create output file name with .out extension
if(arg->outname==NULL) {
int e = asprintf(&arg->outname,"%s.out",arg->basename);
if(e<0) die("Error creating output file name");
}
}
void *mmap_fd(int fd, size_t *n)
{
off_t off = lseek(fd,0,SEEK_END);
if(n<0) die("seek error on dictionary file");
void *a = mmap(NULL,off,PROT_READ|PROT_WRITE,MAP_PRIVATE,fd,0);
if(a==MAP_FAILED) die("mmap error on dictionary file");
*n =off;
return a;
}
int main(int argc, char *argv[])
{
Args arg; // command line arguments
char *Dict; // dictionary
size_t n; // length of dictionary
uint32_t *Wstart; // starting positions of words inside dictionary
// read arguments
parseArgs(argc,argv,&arg);
// start measuring wall clock time
time_t start_wc = time(NULL);
// mmap dictionary file to memory
int dict_fd = fd_open_aux_file(arg.basename,EXTDICZ,O_RDONLY);
Dict = mmap_fd(dict_fd,&n);
if(close(dict_fd)!=0) die("error closing dictionary file");
// compute # words, change terminator, and save starting points
long size = 1000;
Wstart = malloc(size*sizeof(*Wstart));
if(Wstart==NULL) die("Allocation error");
long words = 0;
Wstart[0] = 0; // first word starts at Dict[0]
for(long i=1;i<n;i++) {
if(Dict[i]==EndOfWord) {
Dict[i]=0; //replace EndOfWord with a real \0
words++;
if(words==size) {
size *=2;
Wstart = realloc(Wstart,size*sizeof(*Wstart));
if(Wstart==NULL) die("Allocation error");
}
Wstart[words] = i+1; // starting position of next word
}
else assert(Dict[i]!=Dollar); // we don't expect Dollar's
}
assert(Dict[Wstart[words]]==0); // last word is dummy
fprintf(stderr,"Found %ld dictionary words\n",words);
fprintf(stderr,"Recovering file %s\n",arg.outname);
// create output file reading word id's from the parse
FILE *f = fopen(arg.outname,"wb");
if(f==NULL) die("Cannot open output file");
FILE *parse = open_aux_file(arg.basename,EXTPARSE,"rb");
if(parse==NULL) die("Cannot open parse file");
while(true) {
uint32_t w; char *s;
int e = fread(&w,4,1,parse);
if(e==0 && feof(parse)) break; // done
if(e!=1) die("Error reading parse file");
if(w==0 || w-1>=words) die("Invalid word ID in the parse file");
s = Dict + Wstart[w-1]; // dictionary word (correctly \0 terminated)
e = fputs(s,f);
if(e==EOF) die("Error writing to the output file");
}
fclose(parse);
fclose(f);
free(Wstart);
munmap(Dict,n);
free(arg.outname);
free(arg.basename);
printf("==== Elapsed time: %.0lf wall clock seconds\n", difftime(time(NULL),start_wc));
return 0;
}