-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtokenizer.h
110 lines (88 loc) · 2.36 KB
/
tokenizer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/*
Tokenizer for the BasicDSP language
Niels A. Moseley 2016
License: GPLv2
*/
#ifndef tokenizer_h
#define tokenizer_h
// ************************************
// define token ID constants
// note that keywords start
// at 100 and their ID
// is defined by their position in
// the m_keywords vector in Tokenizer
// ************************************
// single character tokens
#define TOK_UNKNOWN 0
#define TOK_NEWLINE 1
#define TOK_LPAREN 2
#define TOK_RPAREN 3
#define TOK_SEMICOL 4
#define TOK_PLUS 5
#define TOK_MINUS 6
#define TOK_STAR 7
#define TOK_LARGER 8
#define TOK_SMALLER 9
#define TOK_EQUAL 10
#define TOK_SLASH 11
#define TOK_COMMA 13
#define TOK_LBRACKET 14
#define TOK_RBRACKET 15
// keyword tokens, excluding functions
#define TOK_DELAY 20
// other tokens
#define TOK_INTEGER 30
#define TOK_FLOAT 31
#define TOK_IDENT 32
#define TOK_EOF 99
#include <vector>
#include <string>
#include <stdint.h>
#include "reader.h"
struct token_t
{
Reader::position_info pos; // position withing the source
uint32_t tokID; // token identifier
std::string txt; // identifier, keyword or number string
};
class Tokenizer
{
public:
Tokenizer();
/** read the source and produce a list of tokens.
returns false if an error occurred.
*/
bool process(Reader *r, std::vector<token_t> &result);
/** returns the last error in string form */
std::string getErrorString() const
{
return m_lastError;
}
/** return the last error position */
Reader::position_info getErrorPosition() const
{
return m_lastErrorPos;
}
//OBSOLETE: void dumpTokens(std::ostream &stream, const std::vector<token_t> &tokens);
protected:
bool isDigit(char c) const;
bool isWhitespace(char c) const;
bool isAlpha(char c) const;
bool isNumeric(char c) const;
bool isAlphaNumeric(char c) const;
bool isAlphaNumericExtended(char c) const;
enum tok_state_t {S_BEGIN,
S_IDENT,
S_INTEGER,
S_FLOAT,
S_FLOAT_WITH_EXP,
S_FLOAT_WITH_POSEXP,
S_FLOAT_WITH_NEGEXP,
S_LARGER,
S_SMALLER,
S_COMMENT,
S_DONE};
std::string m_lastError;
Reader::position_info m_lastErrorPos;
};
#endif