Code::Blocks  SVN r11506
sqlexer.cpp
Go to the documentation of this file.
1 /*
2  see copyright notice in squirrel.h
3 */
4 #include "sqpcheader.h"
5 #include <ctype.h>
6 #include <stdlib.h>
7 #include "sqtable.h"
8 #include "sqstring.h"
9 #include "sqcompiler.h"
10 #include "sqlexer.h"
11 
12 #define CUR_CHAR (_currdata)
13 #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
14 #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
15 #define NEXT() {Next();_currentcolumn++;}
16 #define INIT_TEMP_STRING() { _longstr.resize(0);}
17 #define APPEND_CHAR(c) { _longstr.push_back(c);}
18 #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
19 #define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))
20 
21 SQLexer::SQLexer(){}
22 SQLexer::~SQLexer()
23 {
24  _keywords->Release();
25 }
26 
27 void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
28 {
29  _errfunc = efunc;
30  _errtarget = ed;
31  _sharedstate = ss;
32  _keywords = SQTable::Create(ss, 26);
33  ADD_KEYWORD(while, TK_WHILE);
34  ADD_KEYWORD(do, TK_DO);
35  ADD_KEYWORD(if, TK_IF);
36  ADD_KEYWORD(else, TK_ELSE);
37  ADD_KEYWORD(break, TK_BREAK);
38  ADD_KEYWORD(continue, TK_CONTINUE);
39  ADD_KEYWORD(return, TK_RETURN);
40  ADD_KEYWORD(null, TK_NULL);
41  ADD_KEYWORD(function, TK_FUNCTION);
42  ADD_KEYWORD(local, TK_LOCAL);
43  ADD_KEYWORD(for, TK_FOR);
44  ADD_KEYWORD(foreach, TK_FOREACH);
45  ADD_KEYWORD(in, TK_IN);
46  ADD_KEYWORD(typeof, TK_TYPEOF);
47  ADD_KEYWORD(delegate, TK_DELEGATE);
48  ADD_KEYWORD(delete, TK_DELETE);
49  ADD_KEYWORD(try, TK_TRY);
50  ADD_KEYWORD(catch, TK_CATCH);
51  ADD_KEYWORD(throw, TK_THROW);
52  ADD_KEYWORD(clone, TK_CLONE);
53  ADD_KEYWORD(yield, TK_YIELD);
54  ADD_KEYWORD(resume, TK_RESUME);
55  ADD_KEYWORD(switch, TK_SWITCH);
56  ADD_KEYWORD(case, TK_CASE);
57  ADD_KEYWORD(default, TK_DEFAULT);
58  ADD_KEYWORD(this, TK_THIS);
59  ADD_KEYWORD(parent,TK_PARENT);
60  ADD_KEYWORD(class,TK_CLASS);
61  ADD_KEYWORD(extends,TK_EXTENDS);
62  ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
63  ADD_KEYWORD(instanceof,TK_INSTANCEOF);
64  ADD_KEYWORD(vargc,TK_VARGC);
65  ADD_KEYWORD(vargv,TK_VARGV);
66  ADD_KEYWORD(true,TK_TRUE);
67  ADD_KEYWORD(false,TK_FALSE);
68  ADD_KEYWORD(static,TK_STATIC);
69  ADD_KEYWORD(enum,TK_ENUM);
70  ADD_KEYWORD(const,TK_CONST);
71 
72  _readf = rg;
73  _up = up;
74  _lasttokenline = _currentline = 1;
75  _currentcolumn = 0;
76  _prevtoken = -1;
77  Next();
78 }
79 
80 void SQLexer::Error(const SQChar *err)
81 {
82  _errfunc(_errtarget,err);
83 }
84 
85 void SQLexer::Next()
86 {
87  SQInteger t = _readf(_up);
88  if(t > MAX_CHAR) Error(_SC("Invalid character"));
89  if(t != 0) {
90  _currdata = (LexChar)t;
91  return;
92  }
93  _currdata = SQUIRREL_EOB;
94 }
95 
96 const SQChar *SQLexer::Tok2Str(SQInteger tok)
97 {
98  SQObjectPtr itr, key, val;
99  SQInteger nitr;
100  while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
101  itr = (SQInteger)nitr;
102  if(((SQInteger)_integer(val)) == tok)
103  return _stringval(key);
104  }
105  return NULL;
106 }
107 
108 void SQLexer::LexBlockComment()
109 {
110  bool done = false;
111  while(!done) {
112  switch(CUR_CHAR) {
113  case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
114  case _SC('\n'): _currentline++; NEXT(); continue;
115  case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));
116  default: NEXT();
117  }
118  }
119 }
120 
121 SQInteger SQLexer::Lex()
122 {
123  _lasttokenline = _currentline;
124  while(CUR_CHAR != SQUIRREL_EOB) {
125  switch(CUR_CHAR){
126  case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
127  case _SC('\n'):
128  _currentline++;
129  _prevtoken=_curtoken;
130  _curtoken=_SC('\n');
131  NEXT();
132  _currentcolumn=1;
133  continue;
134  case _SC('/'):
135  NEXT();
136  switch(CUR_CHAR){
137  case _SC('*'):
138  NEXT();
139  LexBlockComment();
140  continue;
141  case _SC('/'):
142  do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
143  continue;
144  case _SC('='):
145  NEXT();
146  RETURN_TOKEN(TK_DIVEQ);
147  continue;
148  case _SC('>'):
149  NEXT();
150  RETURN_TOKEN(TK_ATTR_CLOSE);
151  continue;
152  default:
153  RETURN_TOKEN('/');
154  }
155  case _SC('='):
156  NEXT();
157  if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
158  else { NEXT(); RETURN_TOKEN(TK_EQ); }
159  case _SC('<'):
160  NEXT();
161  if ( CUR_CHAR == _SC('=') ) { NEXT(); RETURN_TOKEN(TK_LE) }
162  else if ( CUR_CHAR == _SC('-') ) { NEXT(); RETURN_TOKEN(TK_NEWSLOT); }
163  else if ( CUR_CHAR == _SC('<') ) { NEXT(); RETURN_TOKEN(TK_SHIFTL); }
164  else if ( CUR_CHAR == _SC('/') ) { NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); }
165  //else if ( CUR_CHAR == _SC('[') ) { NEXT(); ReadMultilineString(); RETURN_TOKEN(TK_STRING_LITERAL); }
166  else { RETURN_TOKEN('<') }
167  case _SC('>'):
168  NEXT();
169  if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
170  else if(CUR_CHAR == _SC('>')){
171  NEXT();
172  if(CUR_CHAR == _SC('>')){
173  NEXT();
174  RETURN_TOKEN(TK_USHIFTR);
175  }
176  RETURN_TOKEN(TK_SHIFTR);
177  }
178  else { RETURN_TOKEN('>') }
179  case _SC('!'):
180  NEXT();
181  if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
182  else { NEXT(); RETURN_TOKEN(TK_NE); }
183  case _SC('@'): {
184  SQInteger stype;
185  NEXT();
186  if(CUR_CHAR != _SC('"'))
187  Error(_SC("string expected"));
188  if((stype=ReadString('"',true))!=-1) {
189  RETURN_TOKEN(stype);
190  }
191  Error(_SC("error parsing the string"));
192  }
193  case _SC('"'):
194  case _SC('\''): {
195  SQInteger stype;
196  if((stype=ReadString(CUR_CHAR,false))!=-1){
197  RETURN_TOKEN(stype);
198  }
199  Error(_SC("error parsing the string"));
200  }
201  case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
202  case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):
203  {SQInteger ret = CUR_CHAR;
204  NEXT(); RETURN_TOKEN(ret); }
205  case _SC('.'):
206  NEXT();
207  if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
208  NEXT();
209  if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }
210  NEXT();
211  RETURN_TOKEN(TK_VARPARAMS);
212  case _SC('&'):
213  NEXT();
214  if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
215  else { NEXT(); RETURN_TOKEN(TK_AND); }
216  case _SC('|'):
217  NEXT();
218  if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
219  else { NEXT(); RETURN_TOKEN(TK_OR); }
220  case _SC(':'):
221  NEXT();
222  if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
223  else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
224  case _SC('*'):
225  NEXT();
226  if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
227  else RETURN_TOKEN('*');
228  case _SC('%'):
229  NEXT();
230  if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
231  else RETURN_TOKEN('%');
232  case _SC('-'):
233  NEXT();
234  if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
235  else if (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
236  else RETURN_TOKEN('-');
237  case _SC('+'):
238  NEXT();
239  if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
240  else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
241  else RETURN_TOKEN('+');
242  case SQUIRREL_EOB:
243  return 0;
244  default:{
245  if (scisdigit(CUR_CHAR)) {
246  SQInteger ret = ReadNumber();
247  RETURN_TOKEN(ret);
248  }
249  else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
250  SQInteger t = ReadID();
251  RETURN_TOKEN(t);
252  }
253  else {
254  SQInteger c = CUR_CHAR;
255  if (sciscntrl((int)c)) Error(_SC("unexpected character(control)"));
256  NEXT();
257  RETURN_TOKEN(c);
258  }
259  RETURN_TOKEN(0);
260  }
261  }
262  }
263  return 0;
264 }
265 
266 SQInteger SQLexer::GetIDType(SQChar *s)
267 {
268  SQObjectPtr t;
269  if(_keywords->Get(SQString::Create(_sharedstate, s), t)) {
270  return SQInteger(_integer(t));
271  }
272  return TK_IDENTIFIER;
273 }
274 
275 
276 SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
277 {
279  NEXT();
280  if(IS_EOB()) return -1;
281  for(;;) {
282  while(CUR_CHAR != ndelim) {
283  switch(CUR_CHAR) {
284  case SQUIRREL_EOB:
285  Error(_SC("unfinished string"));
286  return -1;
287  case _SC('\n'):
288  if(!verbatim) Error(_SC("newline in a constant"));
290  _currentline++;
291  break;
292  case _SC('\\'):
293  if(verbatim) {
294  APPEND_CHAR('\\'); NEXT();
295  }
296  else {
297  NEXT();
298  switch(CUR_CHAR) {
299  case _SC('x'): NEXT(); {
300  if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected"));
301  const SQInteger maxdigits = 4;
302  SQChar temp[maxdigits+1];
303  SQInteger n = 0;
304  while(isxdigit(CUR_CHAR) && n < maxdigits) {
305  temp[n] = CUR_CHAR;
306  n++;
307  NEXT();
308  }
309  temp[n] = 0;
310  SQChar *sTemp;
311  APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
312  }
313  break;
314  case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
315  case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
316  case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
317  case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
318  case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
319  case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
320  case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
321  case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
322  case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
323  case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
324  case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
325  default:
326  Error(_SC("unrecognised escaper char"));
327  break;
328  }
329  }
330  break;
331  default:
333  NEXT();
334  }
335  }
336  NEXT();
337  if(verbatim && CUR_CHAR == '"') { //double quotation
339  NEXT();
340  }
341  else {
342  break;
343  }
344  }
346  SQInteger len = _longstr.size()-1;
347  if(ndelim == _SC('\'')) {
348  if(len == 0) Error(_SC("empty constant"));
349  if(len > 1) Error(_SC("constant too long"));
350  _nvalue = _longstr[0];
351  return TK_INTEGER;
352  }
353  _svalue = &_longstr[0];
354  return TK_STRING_LITERAL;
355 }
356 
357 void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
358 {
359  *res = 0;
360  while(*s != 0)
361  {
362  if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');
363  else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);
364  else { assert(0); }
365  }
366 }
367 
368 void LexInteger(const SQChar *s,SQUnsignedInteger *res)
369 {
370  *res = 0;
371  while(*s != 0)
372  {
373  *res = (*res)*10+((*s++)-'0');
374  }
375 }
376 
377 SQInteger scisodigit(SQInteger c) { return c >= _SC('0') && c <= _SC('7'); }
378 
379 void LexOctal(const SQChar *s,SQUnsignedInteger *res)
380 {
381  *res = 0;
382  while(*s != 0)
383  {
384  if(scisodigit(*s)) *res = (*res)*8+((*s++)-'0');
385  else { assert(0); }
386  }
387 }
388 
389 SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }
390 
391 
392 #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
393 SQInteger SQLexer::ReadNumber()
394 {
395 #define TINT 1
396 #define TFLOAT 2
397 #define THEX 3
398 #define TSCIENTIFIC 4
399 #define TOCTAL 5
400  SQInteger type = TINT, firstchar = CUR_CHAR;
401  SQChar *sTemp;
403  NEXT();
404  if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
405  if(scisodigit(CUR_CHAR)) {
406  type = TOCTAL;
407  while(scisodigit(CUR_CHAR)) {
409  NEXT();
410  }
411  if(scisdigit(CUR_CHAR)) Error(_SC("invalid octal number"));
412  }
413  else {
414  NEXT();
415  type = THEX;
416  while(isxdigit(CUR_CHAR)) {
418  NEXT();
419  }
420  if(_longstr.size() > MAX_HEX_DIGITS) Error(_SC("too many digits for an Hex number"));
421  }
422  }
423  else {
424  // C::B patch: Eliminate compiler warnings
425  APPEND_CHAR((char)firstchar);
426  while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
427  if(CUR_CHAR == _SC('.') || isexponent(CUR_CHAR)) type = TFLOAT;
428  if(isexponent(CUR_CHAR)) {
429  if(type != TFLOAT) Error(_SC("invalid numeric format"));
430  type = TSCIENTIFIC;
432  NEXT();
433  if(CUR_CHAR == '+' || CUR_CHAR == '-'){
435  NEXT();
436  }
437  if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));
438  }
439 
441  NEXT();
442  }
443  }
445  switch(type) {
446  case TSCIENTIFIC:
447  case TFLOAT:
448  _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
449  return TK_FLOAT;
450  case TINT:
451  LexInteger(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
452  return TK_INTEGER;
453  case THEX:
454  LexHexadecimal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
455  return TK_INTEGER;
456  case TOCTAL:
457  LexOctal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
458  return TK_INTEGER;
459  }
460  return 0;
461 }
462 
463 SQInteger SQLexer::ReadID()
464 {
465  SQInteger res;
467  do {
469  NEXT();
470  } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
472  res = GetIDType(&_longstr[0]);
473  if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR) {
474  _svalue = &_longstr[0];
475  }
476  return res;
477 }
#define APPEND_CHAR(c)
Definition: sqlexer.cpp:17
#define CUR_CHAR
Definition: sqlexer.cpp:12
#define RETURN_TOKEN(t)
Definition: sqlexer.cpp:13
#define TSCIENTIFIC
SQInteger scisodigit(SQInteger c)
Definition: sqlexer.cpp:377
#define INIT_TEMP_STRING()
Definition: sqlexer.cpp:16
#define TINT
#define MAX_HEX_DIGITS
Definition: sqlexer.cpp:392
#define TFLOAT
#define ADD_KEYWORD(key, id)
Definition: sqlexer.cpp:19
#define TOCTAL
void LexHexadecimal(const SQChar *s, SQUnsignedInteger *res)
Definition: sqlexer.cpp:357
#define TERMINATE_BUFFER()
Definition: sqlexer.cpp:18
#define THEX
#define NEXT()
Definition: sqlexer.cpp:15
void LexInteger(const SQChar *s, SQUnsignedInteger *res)
Definition: sqlexer.cpp:368
char SQChar
#define IS_EOB()
Definition: sqlexer.cpp:14
SQInteger isexponent(SQInteger c)
Definition: sqlexer.cpp:389
#define NULL
Definition: prefix.cpp:59
void LexOctal(const SQChar *s, SQUnsignedInteger *res)
Definition: sqlexer.cpp:379
const wxChar null(_T('\0'))