Code::Blocks  SVN r11506
tokenizer.cpp
Go to the documentation of this file.
1 /*
2  * This file is part of the Code::Blocks IDE and licensed under the GNU General Public License, version 3
3  * http://www.gnu.org/licenses/gpl-3.0.html
4  *
5  * $Revision: 11445 $
6  * $Id: tokenizer.cpp 11445 2018-08-12 06:23:59Z ollydbg $
7  * $HeadURL: https://svn.code.sf.net/p/codeblocks/code/trunk/src/plugins/codecompletion/parser/tokenizer.cpp $
8  */
9 
10 #include <sdk.h>
11 
12 #include <cctype>
13 #include <wx/utils.h>
14 #include <wx/file.h>
15 #include <wx/msgdlg.h>
16 
17 #include "globals.h"
18 #include "logmanager.h"
19 #include "manager.h"
20 
21 #include "cclogger.h"
22 #include "expression.h"
23 #include "tokenizer.h"
24 #include "tokentree.h"
25 
26 #define CC_TOKENIZER_DEBUG_OUTPUT 0
27 
28 #if defined(CC_GLOBAL_DEBUG_OUTPUT)
29  #if CC_GLOBAL_DEBUG_OUTPUT == 1
30  #undef CC_TOKENIZER_DEBUG_OUTPUT
31  #define CC_TOKENIZER_DEBUG_OUTPUT 1
32  #elif CC_GLOBAL_DEBUG_OUTPUT == 2
33  #undef CC_TOKENIZER_DEBUG_OUTPUT
34  #define CC_TOKENIZER_DEBUG_OUTPUT 2
35  #endif
36 #endif
37 
38 #ifdef CC_PARSER_TEST
39  #define TRACE(format, args...) \
40  CCLogger::Get()->DebugLog(F(format, ##args))
41  #define TRACE2(format, args...) \
42  CCLogger::Get()->DebugLog(F(format, ##args))
43  #define TRACE2_SET_FLAG(traceFile)
44 #else
45  #if CC_TOKENIZER_DEBUG_OUTPUT == 1
46  #define TRACE(format, args...) \
47  CCLogger::Get()->DebugLog(F(format, ##args))
48  #define TRACE2(format, args...)
49  #define TRACE2_SET_FLAG(traceFile)
50  #elif CC_TOKENIZER_DEBUG_OUTPUT == 2
51  #define TRACE(format, args...) \
52  do \
53  { \
54  if (g_EnableDebugTrace) \
55  CCLogger::Get()->DebugLog(F(format, ##args)); \
56  } \
57  while (false)
58  #define TRACE2(format, args...) \
59  CCLogger::Get()->DebugLog(F(format, ##args))
60  #define TRACE2_SET_FLAG(traceFile) \
61  g_EnableDebugTrace = !g_DebugTraceFile.IsEmpty() && traceFile.EndsWith(g_DebugTraceFile)
62  #else
63  #define TRACE(format, args...)
64  #define TRACE2(format, args...)
65  #define TRACE2_SET_FLAG(traceFile)
66  #endif
67 #endif
68 
69 namespace TokenizerConsts
70 {
71  const wxString colon (_T(":"));
72  const wxString colon_colon (_T("::"));
73  const wxString equal (_T("="));
74  const wxString kw_if (_T("if"));
75  const wxString kw_ifdef (_T("ifdef"));
76  const wxString kw_ifndef (_T("ifndef"));
77  const wxString kw_elif (_T("elif"));
78  const wxString kw_elifdef (_T("elifdef"));
79  const wxString kw_elifndef (_T("elifndef"));
80  const wxString kw_else (_T("else"));
81  const wxString kw_endif (_T("endif"));
82  const wxString hash (_T("#"));
83  const wxString tabcrlf (_T("\t\n\r"));
84  const wxString kw_define (_T("define"));
85  const wxString kw_undef (_T("undef"));
86 }// namespace TokenizerConsts
87 
88 // maximum macro replacement stack size
89 static const size_t s_MaxMacroReplaceDepth = 5;
90 
91 Tokenizer::Tokenizer(TokenTree* tokenTree, const wxString& filename) :
92  m_TokenTree(tokenTree),
93  m_Filename(filename),
94  m_BufferLen(0),
95  m_TokenIndex(0),
96  m_LineNumber(1),
97  m_NestLevel(0),
98  m_UndoTokenIndex(0),
99  m_UndoLineNumber(1),
100  m_UndoNestLevel(0),
101  m_PeekAvailable(false),
102  m_PeekTokenIndex(0),
103  m_PeekLineNumber(0),
104  m_PeekNestLevel(0),
105  m_SavedTokenIndex(0),
106  m_SavedLineNumber(1),
107  m_SavedNestingLevel(0),
108  m_IsOK(false),
109  m_State(tsNormal),
110  m_Loader(0),
111  m_NextTokenDoc(),
112  m_LastTokenIdx(-1),
113  m_ReadingMacroDefinition(false)
114 {
117 
118  if (!m_Filename.IsEmpty())
119  Init(m_Filename);
120 }
121 
123 {
124 }
125 
126 bool Tokenizer::Init(const wxString& filename, LoaderBase* loader)
127 {
128  m_Loader = loader;
129  BaseInit();
130  if ( filename.IsEmpty() )
131  {
132  if (m_Filename.IsEmpty())
133  {
134  TRACE(_T("Init() : Called without filename."));
135  return false;
136  }
137  }
138  else
139  {
140  m_Filename = filename;
141  TRACE(_T("Init() : m_Filename='%s'"), m_Filename.wx_str());
142 #ifdef CC_PARSER_TEST
143  TRACE2_SET_FLAG(filename);
144  TRACE2(filename);
145 #endif
146  }
147 
148  if ( !wxFileExists(m_Filename) )
149  {
150  TRACE(_T("Init() : File '%s' does not exist."), m_Filename.wx_str());
151  return false;
152  }
153 
154  if ( !ReadFile() )
155  {
156  TRACE(_T("Init() : File '%s' could not be read."), m_Filename.wx_str());
157  return false;
158  }
159 
160  if (!m_BufferLen)
161  {
162  TRACE(_T("Init() : File '%s' is empty."), m_Filename.wx_str());
163  return false;
164  }
165 
166  while (m_Filename.Replace(_T("\\"),_T("/"))) { ; }
167 
169 
170  m_IsOK = true;
171  return true;
172 }
173 
174 bool Tokenizer::InitFromBuffer(const wxString& buffer, const wxString& fileOfBuffer, size_t initLineNumber)
175 {
176  BaseInit();
177 
178  m_BufferLen = buffer.Length();
179  m_Buffer = buffer + _T(" "); // + 1 => sentinel
180  m_IsOK = true;
181  m_Filename = fileOfBuffer;
182  m_LineNumber = initLineNumber;
183 
184  while (m_Filename.Replace(_T("\\"),_T("/"))) { ; }
185 
187 
188  return true;
189 }
190 
192 {
193  m_BufferLen = 0;
194  m_TokenIndex = 0;
195  m_LineNumber = 1;
196  m_NestLevel = 0;
197  m_UndoTokenIndex = 0;
198  m_UndoLineNumber = 1;
199  m_UndoNestLevel = 0;
200  m_PeekTokenIndex = 0;
201  m_PeekLineNumber = 0;
202  m_PeekNestLevel = 0;
203  m_SavedTokenIndex = 0;
204  m_SavedLineNumber = 1;
206  m_IsOK = false;
207  m_Buffer.Clear();
209  m_LastTokenIdx = -1;
210 }
211 
213 {
214  bool success = false;
215  wxString fileName = wxEmptyString;
216  if (m_Loader)
217  {
218  fileName = m_Loader->FileName();
219  char* data = m_Loader->GetData();
221 
222  // the following code is faster than DetectEncodingAndConvert()
223 // DetectEncodingAndConvert(data, m_Buffer);
224 
225  // same code as in cbC2U() but with the addition of the string length (3rd param in unicode version)
226  // and the fallback encoding conversion
227 #if wxUSE_UNICODE
228  m_Buffer = wxString(data, wxConvUTF8, m_BufferLen + 1); // + 1 => sentinel
229  if (m_Buffer.Length() == 0)
230  {
231  // could not read as utf-8 encoding, try iso8859-1
232  m_Buffer = wxString(data, wxConvISO8859_1, m_BufferLen + 1); // + 1 => sentinel
233  }
234 #else
235  m_Buffer = wxString(data, m_BufferLen + 1); // + 1 => sentinel
236 #endif
237 
238  success = (data != 0);
239  }
240  else
241  {
242  if ( !wxFileExists(m_Filename) )
243  return false;
244 
245  // open file
246  wxFile file(m_Filename);
247  if ( !cbRead(file, m_Buffer) )
248  return false;
249 
250  fileName = m_Filename;
251  success = true;
252  }
253 
255 
256  // add 'sentinel' to the end of the string (not counted to the length of the string)
257  m_Buffer += _T(' ');
258 
259  return success;
260 }
261 
262 // Behaviour consistent with SkipComment
264 {
265  if (CurrentChar() > _T(' ') || IsEOF())
266  return false;
267 
268  // skip spaces, tabs, etc.
269  // don't check EOF when MoveToNextChar already does, also replace isspace() which calls msvcrt.dll
270  // with a dirty hack: CurrentChar() <= ' ' is "good enough" here
271  while (CurrentChar() <= _T(' ') && MoveToNextChar())
272  ;
273 
274  return true;
275 }
276 
277 // only be called when we are in a C-string,
278 // To check whether the current character is the real end of C-string
279 // See SkipToStringEnd() for more details
281 {
282  // Easy: If previous char is not a backslash, than it's surely not a escaped char
283  if (PreviousChar() != '\\')
284  return false;
285  else
286  {
287  // check for multiple backslashes, e.g. "\\"
288  unsigned int numBackslash = 2; // for sure we have at least two at this point
289  while ( m_TokenIndex >= numBackslash
290  && ((m_TokenIndex - numBackslash) <= m_BufferLen)
291  && (m_Buffer.GetChar(m_TokenIndex - numBackslash) == '\\') )
292  ++numBackslash; // another one...
293 
294  if ( (numBackslash%2) == 1) // number of backslashes (including current char) is odd
295  return false; // eg: "\""
296  else // number of backslashes (including current char) is even
297  return true; // eg: "\\""
298  }
299 }
300 
301 // expect we are not in a C-string
303 {
304  // skip everything until we find ch
305  while (CurrentChar() != ch && MoveToNextChar()) // don't check EOF when MoveToNextChar already does
306  ;
307 
308  return NotEOF();
309 }
310 
311 // For example: X"ABCDEFG\"HIJKLMN"Y
312 // We are now at A, and would skip to trailing char (the '"' before Y)
313 // The double quote before H is a "C-escaped-character", We shouldn't quite from that
315 {
316  while (true)
317  {
318  // go to candidate of a close quote char
319  while (CurrentChar() != ch && MoveToNextChar()) // don't check EOF when MoveToNextChar already does
320  ;
321 
322  if (IsEOF())
323  return false;
324 
325  // check to see if the close quote char is an escape char
326  if (IsEscapedChar())
327  MoveToNextChar(); // if true, skip the close quote char, and continue
328  else
329  break; // if false, it is the closing quote
330  }
331  return true;
332 }
333 
335 {
336  ++m_TokenIndex;
337  if (IsEOF())
338  {
340  return false;
341  }
342 
343  if (PreviousChar() == _T('\n'))
344  ++m_LineNumber;
345  return true;
346 }
347 
348 // return true if we really skip a string, that means m_TokenIndex has changed.
350 {
351  if (IsEOF())
352  return false;
353 
354  const wxChar ch = CurrentChar();
355  if (ch == _T('"') || ch == _T('\''))
356  {
357  // this is the case that match is inside a string!
358  MoveToNextChar();
359  SkipToStringEnd(ch);
360  MoveToNextChar();
361  return true;
362  }
363 
364  return false;
365 }
366 
367 wxString Tokenizer::ReadToEOL(bool stripUnneeded)
368 {
370  if (stripUnneeded)
371  {
372  // there are many cases when reading the #define xxx *****
373  // it can have such comments like
374  //
375  // a macro definition has multiply physical lines
376  // #define xxx yyy BACKSLASH
377  // zzz
378  //
379  // a macro defintion has mixed C comments
380  // #define xxx /*aaa*/ yyy /*bbb*/ zzz
381  //
382  // a macro definition has ending C++ comments
383  // #define xxx yyy zzz // aaa bbb
384 
385  TRACE(_T("%s : line=%u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
386  wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(),
387  PreviousChar(), NextChar());
388 
389  static const size_t maxBufferLen = 4094;
390  // the intermediate cache string, once it is full, the contents will piped to str
391  wxChar buffer[maxBufferLen + 2];
392  // we use a pointer to access the intermediate cache string
393  wxChar* p = buffer;
394  wxString str; // the returned string
395 
396  // loop all the physical lines when reading macro definition
397  for (;;)
398  {
399  // this while statement end up in one physical EOL '\n'
400  while (NotEOF() && CurrentChar() != _T('\n'))
401  {
402 
403  // a macro definition has ending C++ comments, we should stop the parsing before
404  // the "//" chars, so that the doxygen document can be added correctly to previous
405  // added Macro definition token.
406  if(CurrentChar() == _T('/') && NextChar() == _T('/'))
407  break;
408 
409  // Note that SkipComment() function won't skip the '\n' after comments
410  while (SkipComment())
411  ;
412 
413  // if we see a '\', it could be the EOL of a physical line
414  const wxChar ch = CurrentChar();
415  if (ch == _T('\n'))
416  break;
417 
418  // if we see two spaces in the buffer, we should drop the second one. Note, if the
419  // first char is space, we should always save it to buffer, this is to distinguish
420  // a function/variable like macro definition, e.g.
421  // #define MYMACRO(A) ... -> function like macro definition
422  // #define MYMACRO (A) ... -> variable like macro definition, note a space before '('
423  if (ch <= _T(' ') && p > buffer && *(p - 1) == ch)
424  {
425  MoveToNextChar();
426  continue;
427  }
428  // handle string literals, directly put the content to the output str.
429  if (ch == _T('"') || ch == _T('\''))
430  {
431  if (p > buffer) {
432  str.Append(buffer, p - buffer);
433  p = buffer;
434  }
435  int literal_start = m_TokenIndex;//remember the beginning of the m_TokenIndex
436  SkipString();// m_TokenIndex points at the next char following the string literal
437  str.Append(m_Buffer.wx_str() + literal_start, (m_TokenIndex - literal_start));
438  continue;
439  }
440 
441  *p = ch;
442  ++p;
443 
444  if (p >= buffer + maxBufferLen)
445  {
446  str.Append(buffer, p - buffer);
447  p = buffer;
448  }
449 
450  MoveToNextChar();
451  }
452 
453  // check to see it is a logical EOL, some long macro definition contains a backslash-newline
454  if (!IsBackslashBeforeEOL() || IsEOF())
455  break; //break the outer for loop
456  else
457  {
458  //remove the backslash-newline and goto next physical line
459  while (p > buffer && *(--p) <= _T(' '))
460  ;
461  MoveToNextChar();
462  }
463  }
464  // remove the extra spaces in the end of buffer
465  while (p > buffer && *(p - 1) <= _T(' '))
466  --p;
467 
468  if (p > buffer)
469  str.Append(buffer, p - buffer);
470 
471  TRACE(_T("ReadToEOL(): (END) We are now at line %u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
473  TRACE(_T("ReadToEOL(): %s"), str.wx_str());
474  m_ReadingMacroDefinition = false;
475  return str;
476  }
477  else
478  {
479  const unsigned int idx = m_TokenIndex;
480  SkipToEOL();
481  m_ReadingMacroDefinition = false;
482  return m_Buffer.Mid(idx, m_TokenIndex - idx);
483  }
484 }
485 
486 // there are some rules which make the parentheses very compact
487 // 1, there should be no space before ',' , '*' and '&', but should have a space after that
488 // 2, two or more spaces becomes one space
489 // 3, no spaces after the '(' and before ')'
490 // 4, there need a space to separate to identifier. see the "unsigned" and "int" below
491 // 5, there is a space before and after the "=" char
492 // "( unsigned int * a, float & b )" -> "(unsigned int* a, float& b)"
493 // "( int a [ 10 ], float ** b )" -> "(int a [10], float** b)"
494 // "( int a = 5)" -> "(int a = 5)"
495 
497 {
498  // brace level of '(' and ')', the str is currently "(", so the level is 1
499  int level = 1;
500 
501  while (NotEOF())
502  {
503  wxString token = DoGetToken();
504 
505  if (token == _T("("))
506  {
507  ++level;
508  str << token;
509  }
510  else if (token == _T(")"))
511  {
512  --level;
513  str << token;
514  if (level == 0)
515  break;
516  }
517  else if (token == _T("*") || token == _T("&") )
518  {
519  str << token;
520  }
521  else if (token == _T("=")) // space before and after "="
522  {
523  str << _T(" ") << token << _T(" ");
524  }
525  else if (token == _T(",")) // space after ","
526  {
527  str << token << _T(" ");
528  }
529  else
530  {
531  // there is a space between two identifier like token
532  // (int var)
533  // also, a space between '*' and "var"
534  // (int* var) or (int& var)
535  // space between ')' and the "int"
536  // (__attribute__(xxx) int var)
537  wxChar nextChar = token[0];
538  wxChar lastChar = str.Last();
539  if ( (wxIsalpha(nextChar) || nextChar == _T('_'))
540  && ( wxIsalnum(lastChar) || lastChar == _T('_')
541  || lastChar == _T('*') || lastChar == _T('&') || lastChar == _T(')')))
542  {
543  str << _T(" ") << token;
544  }
545  else // otherwise, no space is needed
546  str << token;
547  }
548 
549  if (level == 0)
550  break;
551  }//while (NotEOF())
552 
553 }
554 
556 {
557  TRACE(_T("%s : line=%u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
558  wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(),
559  PreviousChar(), NextChar());
560 
561  // skip everything until we find EOL
562  for (;;)
563  {
564  while (NotEOF() && CurrentChar() != '\n')
565  {
566  if (CurrentChar() == '/' && NextChar() == '*')
567  {
568  SkipComment();
569  if (CurrentChar() == _T('\n'))
570  break;
571  }
572 
573  MoveToNextChar();
574  }
575 
576  if (!IsBackslashBeforeEOL() || IsEOF())
577  break;
578  else
579  MoveToNextChar();
580  }
581 
582  TRACE(_T("SkipToEOL(): (END) We are now at line %u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
584 
585  return NotEOF();
586 }
587 
589 {
590  TRACE(_T("%s : line=%u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
591  wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(),
592  PreviousChar(), NextChar());
593 
594  // skip everything until we find EOL
595  while (true)
596  {
597  SkipToChar(_T('\n'));
598  if (!IsBackslashBeforeEOL() || IsEOF())
599  break;
600  else
601  MoveToNextChar();
602  }
603 
604  TRACE(_T("SkipToInlineCommentEnd(): (END) We are now at line %u, CurrentChar='%c', PreviousChar='%c',")
605  _T(" NextChar='%c'"), m_LineNumber, CurrentChar(), PreviousChar(), NextChar());
606 
607  return NotEOF();
608 }
609 
610 // if we really move forward, return true, which means we have the new m_TokenIndex
611 // if we stay here, return false
613 {
614  if (IsEOF())
615  return false;
616 
617  bool cstyle; // C or C++ style comments
618 
619  //check the comment prompt
620  if (CurrentChar() == '/')
621  {
622  if (NextChar() == '*')
623  cstyle = true;
624  else if (NextChar() == '/')
625  cstyle = false;
626  else
627  return false; // Not a comment, return false;
628  }
629  else
630  return false; // Not a comment, return false;
631 
632  MoveToNextChar(); // Skip the comment prompt
633  MoveToNextChar();
634 
635  bool isDoc = false;
637  {
638  isDoc = (CurrentChar() == '!'); // "/*!" or "//!"
639 
640  // "/*" + ? check
641  // "/**" is OK, but not "/**/" and not //* and not /***
642  // since
643  // /********* not a doxygen comment ********** */
644  // or
645  // /********************************************/
646  // /* also not a doxygen comment */
647  // /********************************************/
648  if (!isDoc && cstyle)
649  isDoc = (CurrentChar() == '*' && NextChar() != '/' && NextChar() != '*');
650 
651  if (!isDoc && !cstyle) // "//" + ?
652  isDoc = (CurrentChar() == '/' && NextChar() != '/'); // "///" but not "////"
653  }
654 
655  // m_ExpressionResult.empty() == true means we are running the Tokenizer in global level, no
656  // preprocessor branch is entered.
657  // m_ExpressionResult.top() == true means we are in the top true branch of the conditional
658  // preprocessor directives, those scopes covers the valid doxygen comments. E.g.
659  // #if 1
660  // /** valid documents */
661  // int a;
662  // #else
663  // /** invalid documents */
664  // int a;
665  // #endif
666  if (isDoc)
667  isDoc = m_ExpressionResult.empty() || m_ExpressionResult.top();
668 
669  TRACE(_T("SkipComment() : Start from line = %u"), m_LineNumber);
670 
671  if (!isDoc)
672  {
673  // Here, we are in the comment body
674  while (true)
675  {
676  if (cstyle) // C style comment
677  {
678  SkipToChar('*');
679  if (NextChar() == '/') // end of a C style comment
680  {
681  MoveToNextChar();
682  MoveToNextChar();
683  break;
684  }
685  if (!MoveToNextChar())
686  break;
687  }
688  else // C++ style comment
689  {
690  TRACE(_T("SkipComment() : Need to call SkipToInlineCommentEnd() here at line = %u"), m_LineNumber);
692  break;
693  }
694  }
695  }
696  else
697  {
698  //Inside documentation body
699  wxString doc;
700  MoveToNextChar(); // Skip '!' or '*' or '/'
701  wxChar c = CurrentChar();
702 
703  int lineToAppend = -1;
704 
705  if (c == _T('<'))
706  { // documentation for already added token - //!< or /*!< or something like this
707  MoveToNextChar();
708  c = CurrentChar();
709  lineToAppend = m_LineNumber;
710  }
711 
712  if (cstyle)
713  {
714  while (true)
715  {
716  c = CurrentChar();
717  if (c == '*' && NextChar() == '/') //End of block comment
718  {
719  MoveToNextChar(); // eat '/'
720  MoveToNextChar();
721  break;
722  }
723  else
724  {
725  doc += c; // Appending char by char may be slow
726  if (!MoveToNextChar())
727  break;
728  }
729  }
730  }
731  else // C++ style comment
732  {
733  while (true)
734  {
735  c = CurrentChar();
736  if (c == '\n')
737  {
738  if (IsBackslashBeforeEOL())
739  {
740  MoveToNextChar();
741  continue;
742  }
743  else
744  break;
745  }
746  else
747  {
748  doc += c;
749  if (!MoveToNextChar())
750  break;
751  }
752  }
753  }
754 
755  if (doc.size() > 0) // don't push empty strings
756  {
757  doc += _T('\n');
758 
759  if (lineToAppend >= 0) // we have document after the token place
760  {
761  // if we are reading the macro definition(m_ReadingMacroDefinition==true)
762  // then don't run the AppendDocumentation() to the previous Token. E.g.
763  // int aaa;
764  // #define FOO /*!< comments */
765  // That is: we read the "comments", but don't attach to Token aaa, instead, we
766  // translate this kind of comments as "document before the token", at this time
767  // the Token FOO is not constructed yet, but once it is constructed, the "comments"
768  // will attach to Token FOO
771  else
772  {
773  // we need to attach the document to recent added Token
774  if (m_LastTokenIdx != -1)
775  m_TokenTree->AppendDocumentation(m_LastTokenIdx, m_FileIdx, m_NextTokenDoc + doc);
776 
777  m_NextTokenDoc.clear();
778  }
779  }
780  else
781  {
782  // Find next token's line:
783  // At first skip whitespace
784  while (SkipWhiteSpace())
785  ;
786 
787  // Maybe there is another comment?
788  // Recursive call
789  bool skipped = SkipComment();
790 
792 
794  if (!cstyle && skipped)
795  doc = _T("@brief ") + doc + _T('\n');
796 
798  }
799  }
800  }
801  return true;
802 }
803 
805 {
806  wxChar c = CurrentChar();
807  const unsigned int startIndex = m_TokenIndex;
808 
809  if (c == _T('#'))
810  {
811  // this use the Lex() to fetch an unexpanded token
812  const PreprocessorType type = GetPreprocessorType();
813  // ptOthers is not handled here, we will pass them to Parserthread class
814  // so that #include can be handled in Parserthread
815  if (type != ptOthers)
816  {
818  c = CurrentChar();
819  return true;
820  }
821  else
822  return false;
823  }
824 
825  if (startIndex != m_TokenIndex)
826  return true;
827  else
828  return false;
829 }
830 
832 {
834  ;
835 
836  return NotEOF();
837 }
838 
840 {
844 
845  if (m_PeekAvailable)
846  {
851  }
852  else
853  {
854  if (SkipUnwanted())
855  {
856  m_Token = DoGetToken();// this function always return a fully expanded token
857  if (m_Token == _T("(") && m_State^tsRawExpression)
859  }
860  else
861  m_Token.Clear();
862  }
863 
864  m_PeekAvailable = false;
865 
866  return m_Token;
867 }
868 
870 {
871  if (!m_PeekAvailable)
872  {
873  // suppose we have such string buffer
874  // ... x1 x2 x3 x4 x5 x6 ....
875  // ^-----------------------m_TokenIndex point to the end of current token "x2"
876  // now, ideally, when we run the PeekToken, we hopefully want to get the below status
877  // ... x1 x2 x3 x4 x5 x6 ....
878  // ^-----------------------m_TokenIndex point to the end of current token "x2"
879  // *--------------------m_PeekTokenIndex point to the end of next token "x3"
880  // Note that DoGetToken() mostly manipulate on the m_TokenIndex, so after the m_TokenIndex
881  // goes one step, we need to restore its position, so that m_Saved... vars are used to save
882  // old m_TokenIndex values before we call the DoGetToken();
883  // NOTE: The m_Saved... vars will be reset to the correct position as necessary when a
884  // ReplaceBufferText() is done.
888 
889  if (SkipUnwanted())
890  {
892  if (m_PeekToken == _T("(") && m_State^tsRawExpression)
894  }
895  else
896  m_PeekToken.Clear();
897 
898  m_PeekAvailable = true; // Set after DoGetToken() to avoid recursive PeekToken() calls.
902 
906  }
907 
908  return m_PeekToken;
909 }
910 /* peek is always available when we run UngetToken() once, actually the m_TokenIndex is moved
911  * backward one step. Note that the m_UndoTokenIndex value is not updated in this function, which
912  * means you are not allowed to run this function twice.
913  */
915 {
916  // NOTE: Test below could be true even if we haven't run UngetToken() before (eg, if we have just
917  // reset the undo token)
918  // if (m_TokenIndex == m_UndoTokenIndex) // this means we have already run a UngetToken() before.
919  // return;
920 
928  m_PeekAvailable = true;
929 }
930 
931 /* this function always start from the index of m_TokenIndex
932  * int X;MYMACRO;X = 1;
933  * Token: _^
934  * Undo: ^
935  * After that, we get a lexeme of name "MYMACRO", then Token index is move forward
936  * int X;MYMACRO;X = 1;
937  * Token: _______^
938  * Undo: ^
939  *
940  * Please note that if MYMACRO need to be replaced (macro expansion), we should replace the text
941  * then run one step again, see the details in:
942  * http://forums.codeblocks.org/index.php/topic,18315.msg125579.html#msg125579
943  */
945 {
946  while(true)
947  {
948  SkipUnwanted();
949  bool identifier = Lex();
950  // only check macro usage on identifier like tokens
951  if (identifier)
952  {
953  bool replaced = CheckMacroUsageAndReplace();
954  // replaced is true means macro expansion happens, so we need to call Lex()
955  if (replaced)
956  continue;
957  else
958  return m_Lex;
959  }
960  else
961  return m_Lex;
962  }
963 }
964 
966 {
967  int start = m_TokenIndex;
968  // identifier like token
969  bool identifier = false;
970 
971  wxChar c = CurrentChar();
972 
973  if (c == '_' || wxIsalpha(c))
974  {
975  // keywords, identifiers, etc.
976 
977  // operator== is cheaper than wxIsalnum, also MoveToNextChar already includes IsEOF
978  while ( ( (c == '_') || (wxIsalnum(c)) )
979  && MoveToNextChar() )
980  c = CurrentChar(); // repeat
981 
982  if (IsEOF())
983  {
985  return false;
986  }
987 
988  identifier = true;
989  m_Lex = m_Buffer.Mid(start, m_TokenIndex - start);
990  }
991 #ifdef __WXMSW__ // This is a Windows only bug!
992  // fetch non-English characters, see more details in: http://forums.codeblocks.org/index.php/topic,11387.0.html
993  else if (c == 178 || c == 179 || c == 185)
994  {
995  m_Lex = c;
996  MoveToNextChar();
997  }
998 #endif
999  else if (wxIsdigit(c))
1000  {
1001  // numbers
1002  while (NotEOF() && CharInString(CurrentChar(), _T("0123456789.abcdefABCDEFXxLl")))
1003  MoveToNextChar();
1004 
1005  if (IsEOF())
1006  {
1007  m_Lex = wxEmptyString;
1008  return false;
1009  }
1010 
1011  m_Lex = m_Buffer.Mid(start, m_TokenIndex - start);
1012  }
1013  else if ( (c == '"') || (c == '\'') )
1014  {
1015  SkipString();
1016  //Now, we are after the end of the C-string, so return the whole string as a token.
1017  m_Lex = m_Buffer.Mid(start, m_TokenIndex - start);
1018  }
1019  else if (c == ':')
1020  {
1021  if (NextChar() == ':')
1022  {
1023  MoveToNextChar();
1024  MoveToNextChar();
1025  // this only copies a pointer, but operator= allocates memory and does a memcpy!
1027  }
1028  else
1029  {
1030  MoveToNextChar();
1032  }
1033  }
1034  else if (c == '=')
1035  {
1036  wxChar next = NextChar();
1037  if (next == _T('=') || next == _T('!') || next == _T('>') || next == _T('<'))
1038  {
1039  MoveToNextChar();
1040  MoveToNextChar();
1041  m_Lex = m_Buffer.Mid(start, m_TokenIndex - start);
1042  }
1043  else
1044  {
1045  MoveToNextChar();
1046  // this only copies a pointer, but operator= allocates memory and does a memcpy!
1048  }
1049  }
1050  else
1051  {
1052  if (c == '{')
1053  ++m_NestLevel;
1054  else if (c == '}')
1055  --m_NestLevel;
1056 
1057  m_Lex = c;
1058  MoveToNextChar();
1059  }
1060 
1061  // when m_TokenIndex exceeds the anchor point where we start the macro replacement, we should
1062  // pop the remembered macro usage
1063  while ( !m_ExpandedMacros.empty() && m_ExpandedMacros.front().m_End < m_TokenIndex)
1064  m_ExpandedMacros.pop_front();
1065 
1066  return identifier;
1067 }
1068 
1070 {
1071  const int id = m_TokenTree->TokenExists(m_Lex, -1, tkMacroDef);
1072  if (id != -1)
1073  {
1074  const Token* token = m_TokenTree->at(id);
1075  if (token)
1076  return ReplaceMacroUsage(token);// either object like macro or function like macro can be handled
1077  }
1078  return false;
1079 }
1080 
1081 // if we have such code
1082 // #define A 1
1083 // #define B 1
1084 //
1085 // #if A+B == 2
1086 // ^begin ^end
1087 //
1088 // we first call SkipToEOL() which actually doesn't care about macro expansion
1089 // then, we get the end of the expression
1090 // now, we rewind the m_TokenIndex to the "begin", and run DoGetToken(), thus we get all the
1091 // expanded tokens like "1+1 == 2"
1092 // thus, we can calculate the expression.
1094 {
1095  // need to force the tokenizer to read raw expression
1096  const TokenizerState oldState = m_State;
1097  m_State = tsRawExpression; // parentheses are not returned as a single token
1098 
1099  // we need to know what is the end of the preprocessor directive by calling SkipToEOL(), which
1100  // go to the end of the current logical line. After that, we rewind the m_TokenIndex and parse
1101  // the tokens again until we pass the EOL.
1102  const unsigned int undoIndex = m_TokenIndex;
1103  const unsigned int undoLine = m_LineNumber;
1104  SkipToEOL();
1105  // length from the current m_TokenIndex to the End
1106  const unsigned int untouchedBufferLen = m_BufferLen - m_TokenIndex;
1107  m_TokenIndex = undoIndex;
1108  m_LineNumber = undoLine;
1109 
1110  Expression exp;
1111 
1112  // rescan happens once macro expansion happens (m_TokenIndex rewind)
1113  while (m_TokenIndex < m_BufferLen - untouchedBufferLen)
1114  {
1115 
1116  // we run the while loop explicitly before calling the DoGetToken() function.
1117  // if m_TokenIndex pass the EOL, we should stop the calculating of preprocessor
1118  // condition
1119  while (SkipWhiteSpace() || SkipComment())
1120  ;
1121 
1122  if (m_TokenIndex >= m_BufferLen - untouchedBufferLen)
1123  break;
1124 
1125 
1126  wxString token = DoGetToken();
1127  // token are generally the fully macro expanded tokens, so mostly they are some numbers,
1128  // unknown tokens are pushed to Infix express, and later they will be seen as 0.
1129 
1130  if(token.Len() > 0
1131  && (token[0] == _T('_') || wxIsalnum(token[0]))) // identifier like token
1132  {
1133 
1134  if (token == _T("defined"))
1135  {
1136  if (IsMacroDefined())
1137  exp.AddToInfixExpression(_T("1"));
1138  else
1139  exp.AddToInfixExpression(_T("0"));
1140  }
1141  else
1142  exp.AddToInfixExpression(token); // not a macro usage token
1143  }
1144  else if (token.StartsWith(_T("0x"))) // hex value
1145  {
1146  long value;
1147  if (token.ToLong(&value, 16))
1148  exp.AddToInfixExpression(wxString::Format(_T("%ld"), value));
1149  else
1150  exp.AddToInfixExpression(_T("0"));
1151  }
1152  else
1153  exp.AddToInfixExpression(token); // other kinds of tokens, such as operators, numbers
1154  }
1155 
1156  // reset tokenizer's functionality
1157  m_State = oldState;
1158 
1159  exp.ConvertInfixToPostfix();
1160  if (exp.CalcPostfix())
1161  {
1162  TRACE(_T("CalcConditionExpression() : exp.GetStatus() : %d, exp.GetResult() : %d"),
1163  exp.GetStatus()?1:0, exp.GetResult()?1:0);
1164  return exp.GetStatus() && exp.GetResult();
1165  }
1166  // if exp.CalcPostfix() fails, which means it may have some undefined identifier (non-digit) in
1167  // the expression, we should return false
1168  return false;
1169 }
1170 
1172 {
1173  // pattern 1: #ifdef ( xxx )
1174  // pattern 2: #ifdef xxx
1175  while (SkipWhiteSpace() || SkipComment())
1176  ;
1177  bool haveParen = false;
1178  Lex();
1179  wxString token = m_Lex;
1180  if (token == _T("("))
1181  {
1182  haveParen = true;
1183  while (SkipWhiteSpace() || SkipComment())
1184  ;
1185  // don't call DoGetToken() here, because it automatically expand macros, call Lex() instead.
1186  Lex();
1187  token = m_Lex;
1188  }
1189  int id = m_TokenTree->TokenExists(token, -1, tkMacroDef);
1190  if (haveParen)
1191  {
1192  while (SkipWhiteSpace() || SkipComment())
1193  ;
1194  Lex(); // eat the ")"
1195  }
1196  return (id != -1);
1197 }
1198 
1200 {
1201  do
1202  {
1203  wxChar ch = CurrentChar();
1204  if (ch == _T('\'') || ch == _T('"') || ch == _T('/') || ch <= _T(' '))
1205  {
1206  while (SkipWhiteSpace() || SkipString() || SkipComment())
1207  ;
1208  ch = CurrentChar();
1209  }
1210 
1211  if (ch == _T('#'))
1212  {
1213  const unsigned int undoIndex = m_TokenIndex;
1214  const unsigned int undoLine = m_LineNumber;
1215 
1216  MoveToNextChar();
1217  while (SkipWhiteSpace() || SkipComment())
1218  ;
1219 
1220  const wxChar current = CurrentChar();
1221  const wxChar next = NextChar();
1222 
1223  // #if
1224  if (current == _T('i') && next == _T('f'))
1226 
1227  // #else #elif #elifdef #elifndef #endif
1228  else if (current == _T('e') && (next == _T('l') || next == _T('n')))
1229  {
1230  m_TokenIndex = undoIndex;
1231  m_LineNumber = undoLine;
1232  break;
1233  }
1234  }
1235  }
1236  while (MoveToNextChar());
1237 }
1238 
1240 {
1241  do
1242  {
1243  wxChar ch = CurrentChar();
1244  if (ch == _T('\'') || ch == _T('"') || ch == _T('/') || ch <= _T(' '))
1245  {
1246  while (SkipWhiteSpace() || SkipString() || SkipComment())
1247  ;
1248  ch = CurrentChar();
1249  }
1250 
1251  if (ch == _T('#'))
1252  {
1253  MoveToNextChar();
1254  while (SkipWhiteSpace() || SkipComment())
1255  ;
1256 
1257  const wxChar current = CurrentChar();
1258  const wxChar next = NextChar();
1259 
1260  // #if
1261  if (current == _T('i') && next == _T('f'))
1263 
1264  // #endif
1265  else if (current == _T('e') && next == _T('n'))
1266  {
1267  SkipToEOL();
1268  break;
1269  }
1270  }
1271  }
1272  while (MoveToNextChar());
1273 }
1274 
1276 {
1277  // those saved m_TokenIndex only rewind for
1278  // the type of ptOthers, so that Parserthread can handle # include xxxx
1279  const unsigned int undoIndex = m_TokenIndex;
1280  const unsigned int undoLine = m_LineNumber;
1281  const unsigned int undoNest = m_NestLevel;
1282 
1283  MoveToNextChar();
1284  while (SkipWhiteSpace() || SkipComment())
1285  ;
1286  Lex();
1287  const wxString token = m_Lex;
1288 
1289  switch (token.Len())
1290  {
1291  case 2:
1292  if (token == TokenizerConsts::kw_if)
1293  return ptIf;
1294  break;
1295 
1296  case 4:
1297  if (token == TokenizerConsts::kw_else)
1298  return ptElse;
1299  else if (token == TokenizerConsts::kw_elif)
1300  return ptElif;
1301  break;
1302 
1303  case 5:
1304  if (token == TokenizerConsts::kw_ifdef)
1305  return ptIfdef;
1306  else if (token == TokenizerConsts::kw_endif)
1307  return ptEndif;
1308  else if (token == TokenizerConsts::kw_undef)
1309  return ptUndef;
1310  break;
1311 
1312  case 6:
1313  if (token == TokenizerConsts::kw_ifndef)
1314  return ptIfndef;
1315  else if (token == TokenizerConsts::kw_define)
1316  return ptDefine;
1317  break;
1318 
1319  case 7:
1320  if (token == TokenizerConsts::kw_elifdef)
1321  return ptElifdef;
1322  break;
1323 
1324  case 8:
1325  if (token == TokenizerConsts::kw_elifndef)
1326  return ptElifndef;
1327  break;
1328 
1329  default:
1330  break;
1331  }
1332 
1333  // only rewind m_TokenIndex for ptOthers
1334  m_TokenIndex = undoIndex;
1335  m_LineNumber = undoLine;
1336  m_NestLevel = undoNest;
1337  return ptOthers;
1338 }
1339 
1341 {
1342  switch (type)
1343  {
1344  case ptIf:
1345  {
1346  TRACE(_T("HandleConditionPreprocessor() : #if at line = %u"), m_LineNumber);
1347  bool result;
1349  result = CalcConditionExpression();
1350  else
1351  {
1352  SkipToEOL();
1353  result = true;
1354  }
1355 
1356  m_ExpressionResult.push(result);
1357  if (!result)
1359  }
1360  break;
1361 
1362  case ptIfdef:
1363  {
1364  TRACE(_T("HandleConditionPreprocessor() : #ifdef at line = %u"), m_LineNumber);
1365  bool result;
1367  result = IsMacroDefined();
1368  else
1369  result = true; // default value
1370 
1371  SkipToEOL();
1372  m_ExpressionResult.push(result);
1373  if (!result)
1375  }
1376  break;
1377 
1378  case ptIfndef:
1379  {
1380  TRACE(_T("HandleConditionPreprocessor() : #ifndef at line = %u"), m_LineNumber);
1381  bool result;
1383  result = !IsMacroDefined();
1384  else
1385  result = true; // default value
1386 
1387  SkipToEOL();
1388  m_ExpressionResult.push(result);
1389  if (!result)
1391  }
1392  break;
1393 
1394  case ptElif:
1395  {
1396  TRACE(_T("HandleConditionPreprocessor() : #elif at line = %u"), m_LineNumber);
1397  bool result = false;
1398  if (!m_ExpressionResult.empty() && !m_ExpressionResult.top())
1399  result = CalcConditionExpression();
1400  if (result)
1401  m_ExpressionResult.top() = true;
1402  else
1404  }
1405  break;
1406 
1407  case ptElifdef:
1408  {
1409  TRACE(_T("HandleConditionPreprocessor() : #elifdef at line = %u"), m_LineNumber);
1410  bool result = false;
1411  if (!m_ExpressionResult.empty() && !m_ExpressionResult.top())
1412  {
1413  result = IsMacroDefined();
1414  SkipToEOL();
1415  }
1416 
1417  if (result)
1418  m_ExpressionResult.top() = true;
1419  else
1421  }
1422  break;
1423 
1424  case ptElifndef:
1425  {
1426  TRACE(_T("HandleConditionPreprocessor() : #elifndef at line = %u"), m_LineNumber);
1427  bool result = false;
1428  if (!m_ExpressionResult.empty() && !m_ExpressionResult.top())
1429  {
1430  result = !IsMacroDefined();
1431  SkipToEOL();
1432  }
1433 
1434  if (result)
1435  m_ExpressionResult.top() = true;
1436  else
1438  }
1439  break;
1440 
1441  case ptElse:
1442  {
1443  TRACE(_T("HandleConditionPreprocessor() : #else at line = %u"), m_LineNumber);
1444  if (!m_ExpressionResult.empty() && !m_ExpressionResult.top())
1445  SkipToEOL();
1446  else
1448  }
1449  break;
1450 
1451  case ptEndif:
1452  {
1453  TRACE(_T("HandleConditionPreprocessor() : #endif at line = %u"), m_LineNumber);
1454  SkipToEOL();
1455  if (!m_ExpressionResult.empty())
1456  m_ExpressionResult.pop();
1457  }
1458  break;
1459 
1460  case ptDefine:
1461  {
1462  TRACE(_T("HandleConditionPreprocessor() : #define at line = %u"), m_LineNumber);
1463  HandleDefines();
1464  }
1465  break;
1466 
1467  case ptUndef:
1468  {
1469  TRACE(_T("HandleConditionPreprocessor() : #undef at line = %u"), m_LineNumber);
1470  HandleUndefs();
1471  }
1472  break;
1473 
1474  case ptOthers:
1475  // ptOthers won't happens here, because it was excluded before calling this function
1476  default:
1477  break;
1478  }
1479 
1480  // reset undo token
1484 }
1485 
1487 {
1488  while (SkipWhiteSpace() || SkipComment())
1489  ;
1490  if (CurrentChar() != _T('('))
1491  return false;
1492 
1493  MoveToNextChar(); // Skip the '('
1494  while (SkipWhiteSpace() || SkipComment())
1495  ;
1496 
1497  const TokenizerState oldState = m_State;
1499  const unsigned int oldNestLevel = m_NestLevel; //
1500 
1501  int level = 1; // include '('
1502 
1503  wxString piece;
1504  while (NotEOF())
1505  {
1506  Lex();
1507  wxString token = m_Lex;
1508  if (token.IsEmpty())
1509  break;
1510 
1511  if (token == _T("("))
1512  ++level;
1513  else if (token == _T(")"))
1514  --level;
1515 
1516  // comma is a delimit only it is not wrapper by ()
1517  if (token == _T(",") && level == 1)
1518  {
1519  results.Add(piece);
1520  piece.Clear();
1521  }
1522  else if (level != 0)
1523  {
1524  if (!piece.IsEmpty() && piece.Last() > _T(' '))
1525  piece << _T(" ");
1526  piece << token;
1527  }
1528 
1529  if (level == 0)
1530  {
1531  if (!piece.IsEmpty())
1532  results.Add(piece);
1533  break;
1534  }
1535 
1536  while (SkipWhiteSpace() || SkipComment())
1537  ;
1538  }
1539 
1540  // reset tokenizer's functionality
1541  m_State = oldState;
1542  m_NestLevel = oldNestLevel;
1543  return true;
1544 }
1545 
1546 bool Tokenizer::ReplaceBufferText(const wxString& target, const Token* macro)
1547 {
1548  if (target.IsEmpty())
1549  return true; // the token is removed from the buffer, return true, so we need to fetch another token
1550 
1552  {
1553  // do not clear the macro expansion stack here
1554  // since it will destroy the anchor point recorded, for example
1555  // if we have a macro definition "#define member FF.member", and expanding the usage.
1556  // "FF" could also be expanded many times, which exceeds the s_MaxMacroReplaceDepth
1557  // When initially expand the "FF.member", the anchor point is recorded as below.
1558  // ^-----anchor point
1559  // This protect the "member" get expand again. But if we reset the m_ExpandedMacros when
1560  // expanding the "FF", then the "member" will expand again, which leads to infinite loop.
1561  //m_ExpandedMacros.clear();
1562 
1563  m_PeekAvailable = false;
1564  return true; // NOTE: we have to skip the problem token by returning true.
1565  }
1566  else if (macro) // Set replace parsing state, and save first replace token index
1567  {
1568  ExpandedMacro rep;
1569  rep.m_End = m_TokenIndex;
1570  rep.m_Macro = macro;
1571  m_ExpandedMacros.push_front(rep);
1572  }
1573  // we don't push the stack if we don't have macro referenced(macro is 0)
1574 
1575  // Keep all in one line
1576  wxString substitute(target);
1577  for (size_t i = 0; i < substitute.Len(); ++i)
1578  {
1579  switch ((wxChar)substitute.GetChar(i))
1580  {
1581  case _T('\\'):
1582  case _T('\r'):
1583  case _T('\n'):
1584  substitute.SetChar(i, _T(' '));
1585  break;
1586  default:
1587  break;
1588  }
1589  }
1590 
1591  // Increase memory if there is not enough space before the m_TokenIndex (between beginning of the
1592  // the m_Buffer to the m_TokenIndex)
1593  const size_t len = substitute.Len();
1594  if (m_TokenIndex < len)
1595  {
1596  const size_t diffLen = len - m_TokenIndex;
1597  m_Buffer.insert(0, wxString(_T(' '), diffLen));
1598  m_BufferLen += diffLen;
1599  m_TokenIndex += diffLen;
1600  // loop the macro expansion stack and adjust them
1601  for (std::list<ExpandedMacro>::iterator i = m_ExpandedMacros.begin();
1602  i != m_ExpandedMacros.end();
1603  ++i)
1604  {
1605  (*i).m_Begin += diffLen;
1606  (*i).m_End += diffLen;
1607  }
1608  }
1609 
1610  // Replacement backward
1611  wxChar* p = const_cast<wxChar*>(m_Buffer.wx_str()) + m_TokenIndex - len;
1612  TRACE(_T("ReplaceBufferText() : <FROM>%s<TO>%s"), wxString(p, len).wx_str(), substitute.wx_str());
1613  // NOTE (ollydbg#1#): This function should be changed to a native wx function if wxString (wxWidgets
1614  // library) is built with UTF8 encoding for wxString. Luckily, both wx2.8.12 and wx 3.0 use the fixed length
1615  // (wchar_t) for the wxString encoding unit, so memcpy is safe here.
1616  memcpy(p, target.wx_str(), len * sizeof(wxChar));
1617 
1618  // move the token index to the beginning of the substituted text
1619  m_TokenIndex -= len;
1620 
1621  if (macro)
1622  m_ExpandedMacros.front().m_Begin = m_TokenIndex;
1623 
1624  // Reset undo token
1628 
1629  // since m_TokenIndex is changed, peek values becomes invalid
1630  m_PeekAvailable = false;
1631 
1632  return true;
1633 }
1634 
1636 {
1637  // loop on the m_ExpandedMacros to see the macro is already used
1638  for (std::list<ExpandedMacro>::iterator i = m_ExpandedMacros.begin();
1639  i != m_ExpandedMacros.end();
1640  ++i)
1641  {
1642  if (tk == (*i).m_Macro)
1643  return false; // this macro is already used
1644  }
1645 
1646  wxString macroExpandedText;
1647  if ( GetMacroExpandedText(tk, macroExpandedText) )
1648  return ReplaceBufferText(macroExpandedText, tk);
1649 
1650  return false;
1651 }
1652 
1653 void Tokenizer::KMP_GetNextVal(const wxChar* pattern, int next[])
1654 {
1655  int j = 0, k = -1;
1656  next[0] = -1;
1657  while (pattern[j] != _T('\0'))
1658  {
1659  if (k == -1 || pattern[j] == pattern[k])
1660  {
1661  ++j;
1662  ++k;
1663  if (pattern[j] != pattern[k])
1664  next[j] = k;
1665  else
1666  next[j] = next[k];
1667  }
1668  else
1669  k = next[k];
1670  }
1671 }
1672 
1673 int Tokenizer::KMP_Find(const wxChar* text, const wxChar* pattern, const int patternLen)
1674 {
1675  if (!text || !pattern || pattern[0] == _T('\0') || text[0] == _T('\0'))
1676  return -1;
1677 
1678  if (patternLen > 1024)
1679  {
1680  if (patternLen < 5012)
1681  TRACE(_T("KMP_Find() : %s - %s"), text, pattern);
1682  else
1683  {
1684  TRACE(_T("KMP_Find: The plan buffer is too big, %d"), patternLen);
1685  return -2;
1686  }
1687  }
1688 
1689  int next[patternLen];
1690  KMP_GetNextVal(pattern, next);
1691 
1692  int index = 0, i = 0, j = 0;
1693  while (text[i] != _T('\0') && pattern[j] != _T('\0'))
1694  {
1695  if (text[i] == pattern[j])
1696  {
1697  ++i;
1698  ++j;
1699  }
1700  else
1701  {
1702  index += j - next[j];
1703  if (next[j] != -1)
1704  j = next[j];
1705  else
1706  {
1707  j = 0;
1708  ++i;
1709  }
1710  }
1711  }
1712 
1713  if (pattern[j] == _T('\0'))
1714  return index;
1715  else
1716  return -1;
1717 }
1718 
1719 void Tokenizer::SetLastTokenIdx(int tokenIdx)
1720 {
1721  m_LastTokenIdx = tokenIdx;
1722  if (tokenIdx != -1 && !m_NextTokenDoc.IsEmpty())
1723  {
1724  if (m_ExpressionResult.empty() || m_ExpressionResult.top())
1726  }
1728 }
1729 
1730 bool Tokenizer::GetMacroExpandedText(const Token* tk, wxString& expandedText)
1731 {
1732  // e.g. "#define AAA AAA" and usage "AAA(x)"
1733  if (!tk || tk->m_Name == tk->m_FullType)
1734  return false;
1735 
1736  // sanity check if we have such macro definition that #define AAA(x,y) x+y+AAA
1737  // if a macro name exists in its definition, it will cause a infinite expansion loop
1738  // Note: disable this sanity check to work around the issue here:
1739  // http://forums.codeblocks.org/index.php/topic,19661.msg134291.html#msg134291
1740  //if (tk->m_FullType.Find(tk->m_Name) != wxNOT_FOUND)
1741  // return false;
1742 
1743  // if it's a variable like macro definition simply return the replacement text
1744  if (tk->m_Args.IsEmpty())
1745  {
1746  expandedText = tk->m_FullType;
1747  return true; // return true for ReplaceBufferText()
1748  }
1749 
1750  // Now, tk is a function like macro definition we are going to expand, it's m_Args contains the
1751  // macro formal arguments, the macro actual arguments is already in m_Buffer now.
1752  // Now, suppose the buffer has such contents:
1753  // ......ABC(abc, (def)).....
1754  // ^--------m_TokenIndex
1755  // and we have a macro definition such as: #define ABC(x,y) x+y
1756  // The first thing we need to do is to breakup the formal arguments string "(x,y)", so we get a
1757  // argument list, we copy the formal arguments(tk->m_Args) to the buffer, so that the buffer
1758  // becomes, formal arguments string followed by actual arguments string
1759  // ....(x,y)(abc, (def)).....
1760  // ^---------m_TokenIndex is moved backward after a ReplaceBufferText() call, when the
1761  // formal arguments is copied to the buffer.
1762  // now, it is ready to split macro arguments by calling the SplitArguments()
1763  // After the first SplitArguments() call, m_TokenIndex go forward a bit
1764  // ....(x,y)(abc, (def)).....
1765  // ^---------m_TokenIndex after first SplitArguments() call
1766  // then
1767  // ....(x,y)(abc, (def)).....
1768  // ^---------m_TokenIndex after second SplitArguments() call
1769  // then we get a list of actual arguments, so we can construct a map which is:
1770  // x -> abc
1771  // y -> (def)
1772  // finally, the "x+y" will be replaced to "abc+(def)"
1773 
1774  // 1. break the formal args into substring with ","
1775  wxArrayString formalArgs;
1776  if (ReplaceBufferText(tk->m_Args))
1777  SplitArguments(formalArgs);
1778 
1779  // NOTE: some function like macros have empty args list, like #define MACRO() { ... }
1780  // we should handle those cases, so don't return
1781  //if (formalArgs.GetCount()==0)
1782  // return false;
1783 
1784  // 2. split the actual macro arguments
1785  wxArrayString actualArgs;
1786  // NOTE: this case is handled above in "if (tk->m_Args.IsEmpty())" test
1787  //if (!formalArgs.IsEmpty()) // e.g. #define AAA(x) x \n #define BBB AAA \n BBB(int) variable;
1788  // SplitArguments(actualArgs);
1789 
1790  // don't replace anything if the actual arguments are missing, such as in the case:
1791  // ..... AAA ;
1792  // ^----m_TokenIndex, we can't find a opening '('
1793  if (!SplitArguments(actualArgs))
1794  {
1795  // reset the m_Lex since macro expansion failed
1796  m_Lex = tk->m_Name;
1797  return false;
1798  }
1799 
1800  // NOTE: some macros have no args (see above)
1801  //if (actualArgs.GetCount()==0)
1802  // return false;
1803 
1804  //sanity check, both formalArgs.GetCount() actualArgs.GetCount() should match
1805  if (formalArgs.GetCount() != actualArgs.GetCount())
1806  return false;
1807 
1808  // 3. get actual context, the expanded text string
1809  expandedText = tk->m_FullType;
1810  const size_t totalCount = formalArgs.GetCount();
1811 
1812  // substitute all the arguments
1813  for (size_t i = 0; i < totalCount; ++i)
1814  {
1815  TRACE(_T("GetMacroExpandedText(): The formal args are '%s' and the actual args are '%s'."),
1816  formalArgs[i].wx_str(), actualArgs[i].wx_str());
1817 
1818  // we search replacement token list
1819  wxChar* data = const_cast<wxChar*>((const wxChar*)expandedText.GetData());
1820  const wxChar* dataEnd = data + expandedText.Len();
1821  // check whether a formal arg exists
1822  const wxChar* key = formalArgs[i].GetData();
1823  const int keyLen = formalArgs[i].Len();
1824 
1825  wxString alreadyReplaced;
1826  alreadyReplaced.Alloc(expandedText.Len() * 2);
1827 
1828  while (true)
1829  {
1830  // find the first exist of formal arg from data to dataEnd
1831  const int pos = GetFirstTokenPosition(data, dataEnd - data, key, keyLen);
1832  if (pos != -1)
1833  {
1834  alreadyReplaced << wxString(data, pos) << actualArgs[i];
1835  data += pos + keyLen;
1836  if (data == dataEnd)
1837  break;
1838  }
1839  else
1840  {
1841  alreadyReplaced << data;
1842  break;
1843  }
1844  }
1845 
1846  expandedText = alreadyReplaced;
1847  }
1848 
1849  // 4. handling operator ## which concatenates two tokens leaving no blank spaces between them
1850  for (int pos = expandedText.Find(_T("##"));
1851  pos != wxNOT_FOUND;
1852  pos = expandedText.Find(_T("##")))
1853  {
1854  int beginPos = pos;
1855  int length = expandedText.size();
1856  while (beginPos > 0 && expandedText[beginPos-1] == _T(' '))
1857  beginPos--;
1858  int endPos = pos + 1;
1859  while (endPos < length - 1 && expandedText[endPos+1] == _T(' '))
1860  endPos++;
1861  // remove the ## with surrounding spaces
1862  expandedText.Remove(beginPos, endPos - beginPos + 1);
1863  }
1864 
1865  // 5. handling stringizing operator #
1866  for (int pos = expandedText.Find(_T("#"));
1867  pos != wxNOT_FOUND;
1868  pos = expandedText.Find(_T("#")))
1869  {
1870  // here, we may have spaces between the # and the next token (a macro argument)
1871  // we need to locate the next token's position, here the next token is xxxxxxxxxxxxxx
1872  // # xxxxxxxxxxxxxx
1873  // ^pos ^beginPos ^endPos
1874 
1875  // let beginPos points to the space char before the next token by looping on spaces
1876  int beginPos = pos;
1877  int length = expandedText.size();
1878  while (beginPos < length - 1 && expandedText[beginPos+1] == _T(' '))
1879  beginPos++;
1880 
1881  // expandedText[beginPos] is not a space char, but we expect an identifier like token
1882  // if # is inside a string, such as "abc#+", then we should not stringizing
1883  // this is just a work around, the correct way is to use Lex() function to cut the
1884  // expandedText into tokens, and compare with tokens, not compared with raw text
1885  if (!wxIsalpha(expandedText[beginPos]))
1886  break;
1887 
1888  // let endPos points to the space char after the next token by looping on non spaces
1889  int endPos = beginPos + 1;
1890  while (endPos < length - 1 && expandedText[endPos+1] != _T(' '))
1891  endPos++;
1892  endPos++;
1893 
1894  // reach the end of string, so append an extra space
1895  if (endPos == length)
1896  expandedText << _T(" ");
1897 
1898  // replace the space to '"', also, remove the #
1899  expandedText.SetChar(pos, _T(' '));
1900  expandedText.SetChar(beginPos, _T('"'));
1901  expandedText.SetChar(endPos, _T('"'));
1902  }
1903 
1904  TRACE(_T("The actual macro expanded text is '%s'."), expandedText.wx_str());
1905  return true;
1906 }
1907 
1908 int Tokenizer::GetFirstTokenPosition(const wxChar* buffer, const size_t bufferLen,
1909  const wxChar* key, const size_t keyLen)
1910 {
1911  int pos = -1;
1912  wxChar* p = const_cast<wxChar*>(buffer);
1913  const wxChar* endBuffer = buffer + bufferLen;
1914  for (;;)
1915  {
1916  const int ret = KMP_Find(p, key, keyLen);
1917  if (ret == -1)
1918  break;
1919 
1920  // check previous char
1921  p += ret;
1922  if (p > buffer)
1923  {
1924  const wxChar ch = *(p - 1);
1925  if (ch == _T('_') || wxIsalnum(ch))
1926  {
1927  p += keyLen;
1928  continue;
1929  }
1930  }
1931 
1932  // check next char
1933  p += keyLen;
1934  if (p < endBuffer)
1935  {
1936  const wxChar ch = *p;
1937  if (ch == _T('_') || wxIsalnum(ch))
1938  continue;
1939  }
1940 
1941  // got it
1942  pos = p - buffer - keyLen;
1943  break;
1944  }
1945 
1946  return pos;
1947 }
1948 
1950 {
1951  size_t lineNr = GetLineNumber();
1952  while (SkipWhiteSpace() || SkipComment())
1953  ;
1954  Lex();
1955  wxString token = m_Lex; // read the token after #define
1956  if (token.IsEmpty())
1957  return;
1958 
1959  // in case we have such macro definition, we need to skip the first backslash
1960  // #define backslash
1961  // MACROFUNCTION(x,y) backslash
1962  // x y
1963  if (token == _T("\\"))
1964  {
1965  while (SkipWhiteSpace() || SkipComment())
1966  ;
1967  Lex();
1968  token = m_Lex; // read the token after "\\", this should be in the next line
1969  if (token.IsEmpty())
1970  return;
1971  }
1972 
1973  // do *NOT* use m_Tokenizer.GetToken()
1974  // e.g.
1975  // #define AAA
1976  // #ifdef AAA
1977  // void fly() {}
1978  // #endif
1979  // The AAA is not add to token tree, so, when call GetToken(), "#ifdef AAA" parse failed
1980  wxString readToEOL = ReadToEOL(true);
1981  wxString para; // function-like macro's args
1982  wxString replaceList;
1983  if (!readToEOL.IsEmpty())
1984  {
1985  // a '(' char follow the macro name (without space between them) is regard as a
1986  // function like macro definition
1987  if (readToEOL[0] == wxT('(')) // function-like macro definition
1988  {
1989  int level = 1;
1990  size_t pos = 0;
1991  while (level && pos < readToEOL.Len())
1992  {
1993  wxChar ch = readToEOL.GetChar(++pos);
1994  if (ch == wxT(')'))
1995  --level;
1996  else if (ch == wxT('('))
1997  ++level;
1998  }
1999  para = readToEOL.Left(++pos);
2000  replaceList << readToEOL.Right(readToEOL.Len() - (++pos));
2001  }
2002  else // variable like macro definition
2003  replaceList << readToEOL;
2004  }
2005 
2006  AddMacroDefinition(token, lineNr, para, replaceList);
2007 }
2008 
2010 {
2011  while (SkipWhiteSpace() || SkipComment())
2012  ;
2013  Lex();
2014  wxString token = m_Lex; // read the token after #undef
2015  if (!token.IsEmpty())
2016  {
2017  int index = m_TokenTree->TokenExists(token, -1, tkMacroDef);
2018  if (index != wxNOT_FOUND)
2019  {
2020  TRACE(F(_T("macro %s undefined from %s:%d"), token.wx_str(), m_Filename.wx_str(), m_LineNumber));
2021  m_TokenTree->erase(index);
2022  }
2023  }
2024  SkipToEOL();
2025 }
2026 
2027 void Tokenizer::AddMacroDefinition(wxString name, int line, wxString para, wxString substitues)
2028 {
2029  int index = m_TokenTree->TokenExists(name, -1, tkMacroDef);
2030  Token* token;
2031 
2032  if (index != wxNOT_FOUND) // already exists, so overwrite! or report a warning!
2033  token = m_TokenTree->at(index);
2034  else
2035  {
2036  token = new Token(name, m_FileIdx, line, ++m_TokenTree->m_TokenTicketCount);
2037  token->m_TokenKind = tkMacroDef;// type of the token
2038  token->m_ParentIndex = -1; // global namespace
2039  m_TokenTree->insert(token); // by default, it was added under m_ParentIndex member
2040  }
2041 
2042  // update the definition
2043  token->m_Args = para; // macro call's formal args
2044  token->m_FullType = substitues; // replace list
2045 
2046  // this will append the doxygen style comments to the Token
2047  SetLastTokenIdx(token->m_Index);
2048 }
std::stack< bool > m_ExpressionResult
preprocessor branch stack, if we meet a #if 1, then the value true will be pushed to to the stack...
Definition: tokenizer.h:558
wxString F(const wxChar *msg,...)
sprintf-like function
Definition: logmanager.h:20
bool wantPreprocessor
do we expand the macros in #if like conditional preprocessor directives
Definition: tokenizer.h:47
int TokenExists(const wxString &name, int parent, short int kindMask)
query tokens by names
Definition: tokentree.cpp:141
PreprocessorType
Enum categorizing C-preprocessor directives.
Definition: tokenizer.h:28
void UngetToken()
Undo the GetToken.
Definition: tokenizer.cpp:914
bool SkipToInlineCommentEnd()
Skip to then end of the C++ style comment.
Definition: tokenizer.cpp:588
wxChar PreviousChar() const
Return (peek) the previous character.
Definition: tokenizer.h:356
bool SkipUnwanted()
skips comments, spaces, preprocessor branch.
Definition: tokenizer.cpp:831
#endif
Definition: tokenizer.h:37
int m_ParentIndex
Parent Token index.
Definition: token.h:265
#define TRACE2_SET_FLAG(traceFile)
Definition: tokenizer.cpp:65
bool ReplaceMacroUsage(const Token *tk)
Get expanded text for the current macro usage, then replace buffer for re-parsing.
Definition: tokenizer.cpp:1635
unsigned int m_NestLevel
keep track of block nesting { }
Definition: tokenizer.h:527
unsigned int m_SavedNestingLevel
Definition: tokenizer.h:547
bool SkipString()
Skip the string literal(enclosed in double quotes) or character literal(enclosed in single quotes)...
Definition: tokenizer.cpp:349
int KMP_Find(const wxChar *text, const wxChar *pattern, const int patternLen)
KMP find, get the first position, if find nothing, return -1 https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm.
Definition: tokenizer.cpp:1673
Token * at(int idx)
Definition: tokentree.h:51
unsigned int m_BufferLen
Buffer length.
Definition: tokenizer.h:504
const wxString kw_endif(_T("endif"))
read parentheses as token lists, so it return several tokens like &#39;(&#39; ...
Definition: tokenizer.h:24
#ifndef
Definition: tokenizer.h:32
wxString m_Name
Token&#39;s name, it can be searched in the TokenTree.
Definition: token.h:188
const wxString kw_elifndef(_T("elifndef"))
#define TRACE(format, args...)
Definition: tokenizer.cpp:63
size_t GetLength()
Definition: filemanager.cpp:48
std::list< ExpandedMacro > m_ExpandedMacros
this serves as a macro replacement stack, in the above example, if AAA is replaced by BBBB...
Definition: tokenizer.h:626
wxChar CurrentChar() const
Return the current character indexed(pointed) by m_TokenIndex in the m_Buffer.
Definition: tokenizer.h:331
wxString m_Token
These variables define the current token string and its auxiliary information, such as the token name...
Definition: tokenizer.h:514
const wxString kw_else(_T("else"))
PreprocessorType GetPreprocessorType()
Get current conditional preprocessor type,.
Definition: tokenizer.cpp:1275
TokenizerOptions m_TokenizerOptions
Tokenizer options specify the token reading option.
Definition: tokenizer.h:492
int erase(int loc)
remove the Token specified by the index
Definition: tokentree.cpp:127
bool wxFileExists(const wxString &filename)
const wxString kw_define(_T("define"))
bool IsBackslashBeforeEOL()
Check the previous char before EOL is a backslash, call this function in the condition that the Curre...
Definition: tokenizer.h:386
void HandleDefines()
handle the macro definition statement: #define XXXXX
Definition: tokenizer.cpp:1949
const wxString kw_elif(_T("elif"))
#if
Definition: tokenizer.h:30
unsigned int m_LineNumber
line offset in buffer, please note that it is 1 based, not 0 based
Definition: tokenizer.h:525
#elifndef
Definition: tokenizer.h:35
unsigned int m_FileIdx
File index, useful when parsing documentation;.
Definition: tokenizer.h:500
unsigned int m_PeekNestLevel
Definition: tokenizer.h:539
void SkipToEndConditionPreprocessor()
Skip to the #endif conditional preprocessor directive.
Definition: tokenizer.cpp:1239
size_t Length() const
a container class to hold all the Tokens getting from parsing stage
Definition: tokentree.h:37
const wxString kw_if(_T("if"))
bool SkipPreprocessorBranch()
Skip the C preprocessor directive, such as #ifdef xxxx only the conditional preprocessor directives a...
Definition: tokenizer.cpp:804
#undef
Definition: tokenizer.h:39
wxString m_NextTokenDoc
normally, this record the doxygen style comments for the next token definition for example...
Definition: tokenizer.h:637
int GetFirstTokenPosition(const wxString &buffer, const wxString &target)
Search "target" in the buffer, return first position in buffer.
Definition: tokenizer.h:240
#define _T(string)
unsigned int m_End
the end token index, if beyond this index, we need to pop the buffer
Definition: tokenizer.h:589
bool SkipToStringEnd(const wxChar &ch)
Move to the end of string literal or character literal, the m_TokenIndex will point at the closing qu...
Definition: tokenizer.cpp:314
bool MoveToNextChar()
Move to the next character in the buffer.
Definition: tokenizer.cpp:334
unsigned int GetLineNumber() const
Return the line number of the current token string.
Definition: tokenizer.h:127
int m_Index
current Token index in the tree, it is index of the std::vector<Token*>, so use the index...
Definition: token.h:262
bool wxIsdigit(const wxUniChar &c)
unsigned int m_PeekLineNumber
Definition: tokenizer.h:538
wxString & Remove(size_t pos)
wxString m_Filename
Filename of the buffer.
Definition: tokenizer.h:498
static const size_t s_MaxMacroReplaceDepth
Definition: tokenizer.cpp:89
#define wxT(string)
void AppendDocumentation(int tokenIdx, unsigned int fileIdx, const wxString &doc)
associate a document string with the token
Definition: tokentree.cpp:936
#define wxNOT_FOUND
bool m_IsOK
bool variable specifies whether the buffer is ready for parsing
Definition: tokenizer.h:550
wxString m_Lex
a lexeme string return by the Lex() function, this is a candidate token string, which may be replaced...
Definition: tokenizer.h:509
#ifdef
Definition: tokenizer.h:31
int m_LastTokenIdx
store the recent added token index for example, here is a comment
Definition: tokenizer.h:648
#define
Definition: tokenizer.h:38
#include #warning and other #xxx
Definition: tokenizer.h:40
wxUSE_UNICODE_dependent wxChar
bool SkipWhiteSpace()
Skip any "tab" "white-space".
Definition: tokenizer.cpp:263
void ReadParentheses(wxString &str)
read a string from &#39;(&#39; to &#39;)&#39;, note that inner parentheses are considered
Definition: tokenizer.cpp:496
bool IsMacroDefined()
If the next token string is macro definition, return true this is used in the situation when we are r...
Definition: tokenizer.cpp:1171
wxString DoGetToken()
Do the actual lexical analysis, both GetToken() and PeekToken() will internally call this function...
Definition: tokenizer.cpp:944
#define TRACE2(format, args...)
Definition: tokenizer.cpp:64
bool m_ReadingMacroDefinition
indicates whether we are reading the macro definition This variable will affect how the doxygen comme...
Definition: tokenizer.h:654
bool ReadFile()
Read a file, and fill the m_Buffer.
Definition: tokenizer.cpp:212
bool Lex()
this function only move the m_TokenIndex and get a lexeme and store it in m_Lex, the m_Lex will be fu...
Definition: tokenizer.cpp:965
const wxString colon(_T(":"))
a symbol found in the parsed files, it can be many kinds, such as a variable, a class and so on...
Definition: token.h:82
unsigned int m_UndoNestLevel
Definition: tokenizer.h:532
wxString PeekToken()
Do a "look ahead", and return the next token string.
Definition: tokenizer.cpp:869
wxString FileName() const
Definition: filemanager.h:59
void HandleConditionPreprocessor(const PreprocessorType type)
handle the preprocessor directive: #ifdef XXX or #endif or #if or #elif or...
Definition: tokenizer.cpp:1340
size_t GetFileIndex(const wxString &filename)
Definition: tokentree.cpp:851
wxString Left(size_t count) const
bool Alloc(size_t nLen)
size_t Replace(const wxString &strOld, const wxString &strNew, bool replaceAll=true)
bool IsEscapedChar()
Check the current character is a C-Escape character in a string.
Definition: tokenizer.cpp:280
bool SkipComment()
Skip the C/C++ comment.
Definition: tokenizer.cpp:612
long GetResult() const
Definition: expression.h:87
const wxString kw_elifdef(_T("elifdef"))
bool SkipToChar(const wxChar &ch)
Skip characters until we meet a ch.
Definition: tokenizer.cpp:302
bool NotEOF() const
return true if it is Not the end of buffer
Definition: tokenizer.h:183
unsigned int m_TokenIndex
index offset in buffer, when parsing a buffer
Definition: tokenizer.h:523
replaced buffer information Here is an example of how macro are expanded
Definition: tokenizer.h:581
void SetChar(size_t n, wxUniChar ch)
void clear()
const wxStringCharType * wx_str() const
bool InitFromBuffer(const wxString &buffer, const wxString &fileOfBuffer=wxEmptyString, size_t initLineNumber=0)
Initialize the buffer by directly using a wxString&#39;s content.
Definition: tokenizer.cpp:174
TokenTree * m_TokenTree
the Token tree to store the macro definition, the token tree is shared with Parserthread ...
Definition: tokenizer.h:495
wxString wxEmptyString
const wxString tabcrlf(_T("\\))
wxString Right(size_t count) const
wxString & assign(const wxString &str, size_t pos, size_t n)
#elifdef
Definition: tokenizer.h:34
wxString m_Args
If it is a function Token, then this value is function arguments, e.g.
Definition: token.h:194
bool CalcPostfix()
Definition: expression.cpp:359
TokenizerState m_State
Tokeniser state specifies the token reading option.
Definition: tokenizer.h:552
unsigned int m_UndoTokenIndex
Backup the previous Token information.
Definition: tokenizer.h:530
~Tokenizer()
Tokenizer destructor.
Definition: tokenizer.cpp:122
bool CharInString(const wxChar ch, const wxChar *chars) const
Check if a ch matches any characters in the wxChar array.
Definition: tokenizer.h:366
const Token * m_Macro
the referenced used macro
Definition: tokenizer.h:591
bool wxIsalnum(const wxUniChar &c)
void SetLastTokenIdx(int tokenIdx)
a Token is added, associate doxygen style documents(comments before the variables) to the Token ...
Definition: tokenizer.cpp:1719
TokenizerState
Enum defines the skip state of the Tokenizer.
Definition: tokenizer.h:19
bool storeDocumentation
do we store the doxygen like document
Definition: tokenizer.h:49
bool ToLong(long *val, int base=10) const
unsigned int m_SavedLineNumber
Definition: tokenizer.h:546
wxString & Append(const char *psz)
LoaderBase * m_Loader
File loader, it load the content to the m_Buffer, either from the harddisk or memory.
Definition: tokenizer.h:554
unsigned int m_UndoLineNumber
Definition: tokenizer.h:531
const wxString kw_ifdef(_T("ifdef"))
const wxString hash(_T("#"))
bool IsEmpty() const
unsigned int m_PeekTokenIndex
Definition: tokenizer.h:537
size_type size() const
bool wxIsalpha(const wxUniChar &c)
void Clear()
size_t Len() const
void AddToInfixExpression(wxString token)
Definition: expression.cpp:232
bool CheckMacroUsageAndReplace()
check the m_Lex to see it is an identifier like token, and also if it is a macro usage, replace it.
Definition: tokenizer.cpp:1069
bool m_PeekAvailable
Peek token information.
Definition: tokenizer.h:535
DLLIMPORT bool cbRead(wxFile &file, wxString &st, wxFontEncoding encoding=wxFONTENCODING_SYSTEM)
Reads a wxString from a non-unicode file. File must be open. File is closed automatically.
Definition: globals.cpp:668
const wxString equal(_T("="))
bool GetMacroExpandedText(const Token *tk, wxString &expandedText)
Get the full expanded text.
Definition: tokenizer.cpp:1730
const wxString kw_undef(_T("undef"))
TokenKind m_TokenKind
See TokenKind class.
Definition: token.h:234
wxString & insert(size_t nPos, const wxString &str)
read parentheses as a single token
Definition: tokenizer.h:22
bool GetStatus() const
Definition: expression.h:88
char * GetData()
Definition: filemanager.cpp:42
bool CalcConditionExpression()
#if xxxx, calculate the value of "xxxx"
Definition: tokenizer.cpp:1093
size_t Add(const wxString &str, size_t copies=1)
void HandleUndefs()
handle the statement: #undef XXXXX
Definition: tokenizer.cpp:2009
bool StartsWith(const wxString &prefix, wxString *rest=NULL) const
#elif
Definition: tokenizer.h:33
void BaseInit()
Initialize some member variables.
Definition: tokenizer.cpp:191
Tokenizer(TokenTree *tokenTree, const wxString &filename=wxEmptyString)
Tokenizer constructor.
Definition: tokenizer.cpp:91
bool SkipToEOL()
Skip from the current position to the end of line, use with care outside this class! ...
Definition: tokenizer.cpp:555
size_t GetCount() const
int Find(wxUniChar ch, bool fromEnd=false) const
wxUniChar GetChar(size_t n) const
wxString ReadToEOL(bool stripUnneeded=true)
return the string from the current position to the end of current line, in most case, this function is used in handling #define, use with care outside this class!
Definition: tokenizer.cpp:367
wxUniChar Last() const
#else
Definition: tokenizer.h:36
wxChar NextChar() const
Return (peek) the next character.
Definition: tokenizer.h:347
size_t m_TokenTicketCount
Definition: tokentree.h:305
void KMP_GetNextVal(const wxChar *pattern, int next[])
used in the KMP find function
Definition: tokenizer.cpp:1653
bool ReplaceBufferText(const wxString &target, const Token *macro=0)
Backward buffer replacement for re-parsing.
Definition: tokenizer.cpp:1546
wxString m_PeekToken
Definition: tokenizer.h:536
macro definition, such as: #define AAA(x,y) f(x,y), where AAA is a token of tkMacroDef ...
Definition: token.h:63
int insert(Token *newToken)
add a new Token instance to the TokenTree
Definition: tokentree.cpp:111
void AddMacroDefinition(wxString name, int line, wxString para, wxString substitues)
add a macro definition to the Token database for example: #define AAA(x,y) x+y
Definition: tokenizer.cpp:2027
wxString m_FullType
this is the full return value (if any): e.g.
Definition: token.h:182
bool Init(const wxString &filename=wxEmptyString, LoaderBase *loader=0)
Initialize the buffer by opening a file through a loader, this function copy the contents from the lo...
Definition: tokenizer.cpp:126
void ConvertInfixToPostfix()
Definition: expression.cpp:250
unsigned int m_SavedTokenIndex
Saved token info (for PeekToken()), m_TokenIndex will be moved forward or backward when either DoGetT...
Definition: tokenizer.h:545
const wxString kw_ifndef(_T("ifndef"))
static wxString Format(const wxString &format,...)
wxString Mid(size_t first, size_t nCount=wxString::npos) const
void SkipToNextConditionPreprocessor()
Skip to the next conditional preprocessor directive branch.
Definition: tokenizer.cpp:1199
wxString m_Buffer
Buffer content, all the lexical analysis is operating on this member variable.
Definition: tokenizer.h:502
wxString GetToken()
Consume and return the current token string.
Definition: tokenizer.cpp:839
const wxCStrData GetData() const
bool IsEOF() const
Check whether the Tokenizer reaches the end of the buffer (file)
Definition: tokenizer.h:177
bool SplitArguments(wxArrayString &results)
Split the macro arguments, and store them in results, when calling this function, we expect that m_To...
Definition: tokenizer.cpp:1486
const wxString colon_colon(_T("::"))