1
1
#!/usr/bin/env python
2
2
3
- import tokenize
4
- from HTMLParser import HTMLParseError
3
+ import pytokenize as tokenize
4
+ import re
5
+ from StringIO import StringIO
5
6
from pyxl .codec .parser import PyxlParser
7
+ from pytokenize import Untokenizer
6
8
7
9
class PyxlParseError (Exception ): pass
8
10
11
+ def get_end_pos (start_pos , tvalue ):
12
+ row , col = start_pos
13
+ for c in tvalue :
14
+ if c == '\n ' :
15
+ col = 0
16
+ row += 1
17
+ else :
18
+ col += 1
19
+ return (row , col )
20
+
21
+ class RewindableTokenStream (object ):
22
+ """
23
+ A token stream, with the ability to rewind and restart tokenization while maintaining correct
24
+ token position information.
25
+
26
+ Invariants:
27
+ - zero_row and zero_col are the correct values to adjust the line and possibly column of the
28
+ tokens being produced by _tokens.
29
+ - Tokens in unshift_buffer have locations with absolute position (relative to the beginning
30
+ of the file, not relative to where we last restarted tokenization).
31
+ """
32
+
33
+ def __init__ (self , readline ):
34
+ self .orig_readline = readline
35
+ self .unshift_buffer = []
36
+ self .rewound_buffer = None
37
+ self ._tokens = tokenize .generate_tokens (self ._readline )
38
+ self .zero_row , self .zero_col = (0 , 0 )
39
+ self .stop_readline = False
40
+
41
+ def _dumpstate (self ):
42
+ print "tokenizer state:"
43
+ print " zero:" , (self .zero_row , self .zero_col )
44
+ print " rewound_buffer:" , self .rewound_buffer
45
+ print " unshift_buffer:" , self .unshift_buffer
46
+
47
+ def _readline (self ):
48
+ if self .stop_readline :
49
+ return ""
50
+ if self .rewound_buffer :
51
+ line = self .rewound_buffer .readline ()
52
+ if line :
53
+ return line
54
+ else :
55
+ self .rewound_buffer = None # fallthrough to orig_readline
56
+ return self .orig_readline ()
57
+
58
+ def _flush (self ):
59
+ self .stop_readline = True
60
+ tokens = list (tok for tok in self )
61
+ self .stop_readline = False
62
+ return tokens
63
+
64
+ def _adjust_position (self , pos ):
65
+ row , col = pos
66
+ if row == 0 :
67
+ col += self .zero_col
68
+ row += self .zero_row
69
+ return (row , col )
70
+
71
+ def rewind_and_retokenize (self , rewind_token ):
72
+ """Rewind the given token (which is expected to be the last token read from this stream, or
73
+ the end of such token); then restart tokenization."""
74
+ ttype , tvalue , (row , col ), tend , tline = rewind_token
75
+ tokens = [rewind_token ] + self ._flush ()
76
+ self .zero_row , self .zero_col = (row - 1 , col - 1 )
77
+ self .rewound_buffer = StringIO (Untokenizer ().untokenize (tokens ))
78
+ self .unshift_buffer = []
79
+ self ._tokens = tokenize .generate_tokens (self ._readline )
80
+
81
+ def next (self ):
82
+ if self .unshift_buffer :
83
+ token = self .unshift_buffer .pop (0 )
84
+ else :
85
+ ttype , tvalue , tstart , tend , tline = self ._tokens .next ()
86
+ tstart = self ._adjust_position (tstart )
87
+ tend = self ._adjust_position (tend )
88
+ token = (ttype , tvalue , tstart , tend , tline )
89
+ return token
90
+
91
+ def __iter__ (self ):
92
+ return self
93
+
94
+ def unshift (self , token ):
95
+ """Rewind the given token, without retokenizing. It will be the next token read from the
96
+ stream."""
97
+ self .unshift_buffer [:0 ] = [token ]
98
+
9
99
def pyxl_untokenize (tokens ):
10
100
parts = []
11
101
prev_row = 1
@@ -33,10 +123,14 @@ def pyxl_untokenize(tokens):
33
123
return '' .join (parts )
34
124
35
125
def pyxl_tokenize (readline ):
126
+ return transform_tokens (RewindableTokenStream (readline ))
127
+
128
+ def transform_tokens (tokens ):
36
129
last_nw_token = None
37
130
prev_token = None
38
131
39
- tokens = tokenize .generate_tokens (readline )
132
+ curly_depth = 0
133
+
40
134
while 1 :
41
135
try :
42
136
token = tokens .next ()
@@ -45,14 +139,25 @@ def pyxl_tokenize(readline):
45
139
46
140
ttype , tvalue , tstart , tend , tline = token
47
141
48
- if (ttype == tokenize .OP and tvalue == '<' and last_nw_token and
49
- ((last_nw_token [0 ] == tokenize .OP and last_nw_token [1 ] == '=' ) or
142
+ if ttype == tokenize .OP and tvalue == '{' :
143
+ curly_depth += 1
144
+ if ttype == tokenize .OP and tvalue == '}' :
145
+ curly_depth -= 1
146
+ if curly_depth < 0 :
147
+ tokens .unshift (token )
148
+ return
149
+
150
+ if (ttype == tokenize .OP and tvalue == '<' and
151
+ (last_nw_token == None or # if we have *just* entered python mode e.g
152
+ (last_nw_token [0 ] == tokenize .OP and last_nw_token [1 ] == '=' ) or
50
153
(last_nw_token [0 ] == tokenize .OP and last_nw_token [1 ] == '(' ) or
51
154
(last_nw_token [0 ] == tokenize .OP and last_nw_token [1 ] == '[' ) or
52
155
(last_nw_token [0 ] == tokenize .OP and last_nw_token [1 ] == '{' ) or
53
156
(last_nw_token [0 ] == tokenize .OP and last_nw_token [1 ] == ',' ) or
54
157
(last_nw_token [0 ] == tokenize .OP and last_nw_token [1 ] == ':' ) or
55
158
(last_nw_token [0 ] == tokenize .NAME and last_nw_token [1 ] == 'print' ) or
159
+ (last_nw_token [0 ] == tokenize .NAME and last_nw_token [1 ] == 'else' ) or
160
+ (last_nw_token [0 ] == tokenize .NAME and last_nw_token [1 ] == 'yield' ) or
56
161
(last_nw_token [0 ] == tokenize .NAME and last_nw_token [1 ] == 'return' ))):
57
162
token = get_pyxl_token (token , tokens )
58
163
@@ -97,26 +202,61 @@ def pyxl_tokenize(readline):
97
202
98
203
def get_pyxl_token (start_token , tokens ):
99
204
ttype , tvalue , tstart , tend , tline = start_token
100
- pyxl_parser = PyxlParser (tstart [0 ], tstart [1 ], tline )
205
+ pyxl_parser = PyxlParser (tstart [0 ], tstart [1 ])
101
206
pyxl_parser .feed (start_token )
102
207
103
208
for token in tokens :
104
209
ttype , tvalue , tstart , tend , tline = token
105
210
106
- try :
107
- pyxl_parser .feed (token )
108
- except HTMLParseError , html_ex :
109
- msg = 'HTMLParseError: %s (line:%d: %s)' % (html_ex .msg , tstart [0 ], tline .strip ())
110
- raise PyxlParseError (msg )
111
- except AssertionError , assert_ex :
112
- msg = '%s (line:%d: %s)' % (assert_ex , tstart [0 ], tline .strip ())
113
- raise PyxlParseError (msg )
211
+ if tvalue and tvalue [0 ] == '{' :
212
+ if pyxl_parser .python_mode_allowed ():
213
+ mid , right = tvalue [0 ], tvalue [1 :]
214
+ division = get_end_pos (tstart , mid )
215
+ pyxl_parser .feed_position_only ((ttype , mid , tstart , division , tline ))
216
+ tokens .rewind_and_retokenize ((ttype , right , division , tend , tline ))
217
+ python_tokens = list (transform_tokens (tokens ))
218
+
219
+ close_curly = tokens .next ()
220
+ ttype , tvalue , tstart , tend , tline = close_curly
221
+ close_curly_sub = (ttype , '' , tend , tend , tline )
222
+
223
+ pyxl_parser .feed_python (python_tokens + [close_curly_sub ])
224
+ continue
225
+ # else fallthrough to pyxl_parser.feed(token)
226
+ elif tvalue and ttype == tokenize .COMMENT :
227
+ if not pyxl_parser .python_comment_allowed ():
228
+ tvalue , rest = tvalue [0 ], tvalue [1 :]
229
+ division = get_end_pos (tstart , tvalue )
230
+ tokens .unshift ((tokenize .ERRORTOKEN , rest , division , tend , tline ))
231
+ token = ttype , tvalue , tstart , division , tline
232
+ # fallthrough to pyxl_parser.feed(token)
233
+ else :
234
+ pyxl_parser .feed_comment (token )
235
+ continue
236
+ elif tvalue and tvalue [0 ] == '#' :
237
+ # let the python tokenizer grab the whole comment token
238
+ tokens .rewind_and_retokenize (token )
239
+ continue
240
+ else :
241
+ sp = re .split ('([#{])' , tvalue , maxsplit = 1 )
242
+ if len (sp ) > 1 :
243
+ tvalue , mid , right = sp
244
+ division = get_end_pos (tstart , tvalue )
245
+ tokens .unshift ((ttype , mid + right , division , tend , tline ))
246
+ token = ttype , tvalue , tstart , division , tline
247
+ # fallthrough to pyxl_parser.feed(token)
248
+
249
+ pyxl_parser .feed (token )
114
250
115
251
if pyxl_parser .done (): break
116
252
117
253
if not pyxl_parser .done ():
118
- lines = ['<%s> at (line:%d: %s )' % (tag , row , line . strip () )
119
- for tag , row , line in pyxl_parser .openTags ]
254
+ lines = ['<%s> at (line:%d)' % (tag_info [ ' tag' ], tag_info [ ' row' ] )
255
+ for tag_info in pyxl_parser .open_tags ]
120
256
raise PyxlParseError ('Unclosed Tags: %s' % ', ' .join (lines ))
121
257
122
- return pyxl_parser .getToken ()
258
+ remainder = pyxl_parser .get_remainder ()
259
+ if remainder :
260
+ tokens .rewind_and_retokenize (remainder )
261
+
262
+ return pyxl_parser .get_token ()
0 commit comments