-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdumpscanner.l
115 lines (99 loc) · 3.27 KB
/
dumpscanner.l
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
%option noinput nounput noyywrap nodefault prefix="dumpscanner_" outfile="lex.yy.c"
%top {
/*
* This file is part of typoscan.
*
* typoscan is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation,
* either version 3 of the License, or (at your option) any
* later version.
*
* typoscan is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public
* License along with typoscan. If not, see
* <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <stdio.h>
#include "error.h"
#include "typoscan.h"
static char pagebuf [65536];
static char pageidbuf [16];
static char textbuf [10 * 1024 * 1024];
static char *bufptr;
static int ns0seen;
}
%x IN_PAGE IN_TITLE IN_PAGEID IN_TEXT
%%
"<page>" { BEGIN(IN_PAGE); ns0seen = 0; }
<IN_PAGE>"<ns>0</ns>" ns0seen = 1;
<IN_PAGE>"</page>" {
if (ns0seen)
{
int i;
size_t textbuflen = strlen (textbuf);
for (i = 0; i < regexcount; i++)
{
int rc = pcre_exec (regexes [i].regex,
regexes [i].extra,
textbuf,
textbuflen,
0,
0,
NULL,
0);
if (!rc) /* Matched. */
{
printf ("%s\n", pagebuf);
break;
}
if (rc != PCRE_ERROR_NOMATCH)
error (0, 0, "Error while matching: %d\n", rc);
}
}
BEGIN(INITIAL);
}
<IN_PAGE>{
"<title>" BUF_INIT(page); BEGIN(IN_TITLE);
\n" <id>" BUF_INIT(pageid); BEGIN(IN_PAGEID);
"<text xml:space=\"preserve\">" BUF_INIT(text); BEGIN(IN_TEXT);
}
<IN_TITLE>{
"</title>" BUF_NUL(page); BEGIN(IN_PAGE);
"\\" BUF_ADD(page, '\\'); BUF_ADD(page, '\\');
"&" BUF_ADD(page, '&');
""" BUF_ADD(page, '"');
"<" BUF_ADD(page, '<');
">" BUF_ADD(page, '>');
"'" BUF_ADD(page, '\'');
.|\n BUF_ADD(page, *yytext);
}
<IN_PAGEID>{
"</id>" BUF_NUL(pageid); BEGIN(IN_PAGE);
.|\n BUF_ADD(pageid, *yytext);
}
<IN_TEXT>{
"</text>" {
BUF_NUL(text);
BEGIN(IN_PAGE);
}
"&" BUF_ADD(text, '&');
""" BUF_ADD(text, '"');
"<" BUF_ADD(text, '<');
">" BUF_ADD(text, '>');
"'" BUF_ADD(text, '\'');
.|\n BUF_ADD(text, *yytext);
}
<*>.|\n /* ignore */
%%
int dumpscanner_scan (void)
{
while (dumpscanner_lex ());
return 0;
}