Code::Blocks  SVN r11506
tokenizer.h
Go to the documentation of this file.
1 /*
2  * This file is part of the Code::Blocks IDE and licensed under the GNU General Public License, version 3
3  * http://www.gnu.org/licenses/gpl-3.0.html
4  */
5 
6 #ifndef TOKENIZER_H
7 #define TOKENIZER_H
8 
9 #include <wx/string.h>
10 #include <configmanager.h>
11 #include <filemanager.h>
12 #include "token.h"
13 
14 #include <stack>
15 #include <list>
16 
17 
20 {
22  tsNormal = 0x0000,
24  tsRawExpression = 0x0001
25 };
26 
29 {
30  ptIf = 0x0001,
31  ptIfdef = 0x0002,
32  ptIfndef = 0x0003,
33  ptElif = 0x0004,
34  ptElifdef = 0x0005,
35  ptElifndef = 0x0006,
36  ptElse = 0x0007,
37  ptEndif = 0x0008,
38  ptDefine = 0x0009,
39  ptUndef = 0x000A,
40  ptOthers = 0x000B
41 };
42 
45 {
50 };
51 
64 class Tokenizer
65 {
66 public:
70  Tokenizer(TokenTree* tokenTree, const wxString& filename = wxEmptyString);
71 
73  ~Tokenizer();
74 
79  bool Init(const wxString& filename = wxEmptyString, LoaderBase* loader = 0);
80 
87  bool InitFromBuffer(const wxString& buffer, const wxString& fileOfBuffer = wxEmptyString,
88  size_t initLineNumber = 0);
89 
91  wxString GetToken();
92 
94  wxString PeekToken();
95 
97  void UngetToken();
98 
101  {
102  m_TokenizerOptions.wantPreprocessor = wantPreprocessor;
103  m_TokenizerOptions.storeDocumentation = storeDocumentation;
104  };
105 
110  {
111  m_State = state;
112  };
113 
116  {
117  return m_State;
118  }
119 
121  const wxString& GetFilename() const
122  {
123  return m_Filename;
124  };
125 
127  unsigned int GetLineNumber() const
128  {
129  return m_LineNumber;
130  };
131 
135  unsigned int GetNestingLevel() const
136  {
137  return m_NestLevel;
138  };
139 
142  {
143  m_SavedNestingLevel = m_NestLevel;
144  };
145 
148  {
149  m_NestLevel = m_SavedNestingLevel;
150  };
151 
153  bool IsOK() const
154  {
155  return m_IsOK;
156  };
157 
163  wxString ReadToEOL(bool stripUnneeded = true);
164 
168  void ReadParentheses(wxString& str);
169 
171  bool SkipToEOL();
172 
174  bool SkipToInlineCommentEnd();
175 
177  bool IsEOF() const
178  {
179  return m_TokenIndex >= m_BufferLen;
180  }
181 
183  bool NotEOF() const
184  {
185  return m_TokenIndex < m_BufferLen;
186  }
187 
224  bool ReplaceBufferText(const wxString& target, const Token* macro = 0);
225 
232  bool ReplaceMacroUsage(const Token* tk);
233 
240  int GetFirstTokenPosition(const wxString& buffer, const wxString& target)
241  {
242  return GetFirstTokenPosition(buffer.GetData(), buffer.Len(), target.GetData(), target.Len());
243  }
244 
251  int GetFirstTokenPosition(const wxChar* buffer, const size_t bufferLen,
252  const wxChar* key, const size_t keyLen);
253 
257  int KMP_Find(const wxChar* text, const wxChar* pattern, const int patternLen);
258 
260  void SetLastTokenIdx(int tokenIdx);
261 
262 protected:
264  void BaseInit();
265 
270  wxString DoGetToken();
271 
276  bool CheckMacroUsageAndReplace();
277 
283  bool Lex();
284 
286  bool ReadFile();
287 
289  bool IsEscapedChar();
290 
292  bool SkipToChar(const wxChar& ch);
293 
295  bool SkipUnwanted();
296 
298  bool SkipWhiteSpace();
299 
305  bool SkipComment();
306 
313  bool SkipPreprocessorBranch();
314 
318  bool SkipString();
319 
325  bool SkipToStringEnd(const wxChar& ch);
326 
328  bool MoveToNextChar();
329 
332  {
333  if(m_TokenIndex < m_BufferLen)
334  return m_Buffer.GetChar(m_TokenIndex);
335  return 0;
336  };
337 
340  {
341  wxChar c = CurrentChar();
342  m_TokenIndex++;
343  return c;
344  };
345 
347  wxChar NextChar() const
348  {
349  if ((m_TokenIndex + 1) >= m_BufferLen) // m_TokenIndex + 1) < 0 can never be true
350  return 0;
351 
352  return m_Buffer.GetChar(m_TokenIndex + 1);
353  };
354 
357  {
358  if (m_TokenIndex > 0 && m_BufferLen > 0) // m_TokenIndex > m_BufferLen can never be true
359  return m_Buffer.GetChar(m_TokenIndex - 1);
360 
361  return 0;
362  };
363 
364 private:
366  inline bool CharInString(const wxChar ch, const wxChar* chars) const
367  {
368  int len = wxStrlen(chars);
369  for (int i = 0; i < len; ++i)
370  {
371  if (ch == chars[i])
372  return true;
373  }
374  return false;
375  };
376 
386  inline bool IsBackslashBeforeEOL()
387  {
388  wxChar last = PreviousChar();
389  // if DOS line endings, we have hit \r and we skip to \n...
390  if (last == _T('\r') && m_TokenIndex >= 2)
391  return m_Buffer.GetChar(m_TokenIndex - 2) == _T('\\');
392  return last == _T('\\');
393  }
394 
396  bool CalcConditionExpression();
397 
408  bool IsMacroDefined();
409 
411  void HandleDefines();
412 
414  void HandleUndefs();
415 
423  void AddMacroDefinition(wxString name, int line, wxString para, wxString substitues);
424 
438  void SkipToNextConditionPreprocessor();
439 
453  void SkipToEndConditionPreprocessor();
454 
456  PreprocessorType GetPreprocessorType();
457 
462  void HandleConditionPreprocessor(const PreprocessorType type);
463 
474  bool SplitArguments(wxArrayString& results);
475 
486  bool GetMacroExpandedText(const Token* tk, wxString& expandedText);
487 
489  void KMP_GetNextVal(const wxChar* pattern, int next[]);
490 
493 
496 
500  unsigned int m_FileIdx;
504  unsigned int m_BufferLen;
505 
510 
515 
523  unsigned int m_TokenIndex;
525  unsigned int m_LineNumber;
527  unsigned int m_NestLevel;
528 
530  unsigned int m_UndoTokenIndex;
531  unsigned int m_UndoLineNumber;
532  unsigned int m_UndoNestLevel;
533 
537  unsigned int m_PeekTokenIndex;
538  unsigned int m_PeekLineNumber;
539  unsigned int m_PeekNestLevel;
540 
545  unsigned int m_SavedTokenIndex;
546  unsigned int m_SavedLineNumber;
547  unsigned int m_SavedNestingLevel;
548 
550  bool m_IsOK;
558  std::stack<bool> m_ExpressionResult;
559 
560 
582  {
583  ExpandedMacro():m_Macro(0)
584  {
585  };
587  unsigned int m_Begin;
589  unsigned int m_End;
591  const Token* m_Macro;
592  };
593 
626  std::list<ExpandedMacro> m_ExpandedMacros;
627 
638 
649 
655 };
656 
657 #endif // TOKENIZER_H
std::stack< bool > m_ExpressionResult
preprocessor branch stack, if we meet a #if 1, then the value true will be pushed to to the stack...
Definition: tokenizer.h:558
bool wantPreprocessor
do we expand the macros in #if like conditional preprocessor directives
Definition: tokenizer.h:47
PreprocessorType
Enum categorizing C-preprocessor directives.
Definition: tokenizer.h:28
void SetTokenizerOption(bool wantPreprocessor, bool storeDocumentation)
Handle condition preprocessor and store documentation or not.
Definition: tokenizer.h:100
wxChar PreviousChar() const
Return (peek) the previous character.
Definition: tokenizer.h:356
#endif
Definition: tokenizer.h:37
unsigned int m_NestLevel
keep track of block nesting { }
Definition: tokenizer.h:527
unsigned int m_SavedNestingLevel
Definition: tokenizer.h:547
void RestoreNestingLevel()
Restore the brace level.
Definition: tokenizer.h:147
const wxString & GetFilename() const
Return the opened files name.
Definition: tokenizer.h:121
unsigned int m_BufferLen
Buffer length.
Definition: tokenizer.h:504
read parentheses as token lists, so it return several tokens like &#39;(&#39; ...
Definition: tokenizer.h:24
#ifndef
Definition: tokenizer.h:32
unsigned int m_Begin
the token index we begin to parse after replacement
Definition: tokenizer.h:585
size_t wxStrlen(const wxCharBuffer &s)
std::list< ExpandedMacro > m_ExpandedMacros
this serves as a macro replacement stack, in the above example, if AAA is replaced by BBBB...
Definition: tokenizer.h:626
wxChar CurrentChar() const
Return the current character indexed(pointed) by m_TokenIndex in the m_Buffer.
Definition: tokenizer.h:331
wxString m_Token
These variables define the current token string and its auxiliary information, such as the token name...
Definition: tokenizer.h:514
TokenizerOptions m_TokenizerOptions
Tokenizer options specify the token reading option.
Definition: tokenizer.h:492
bool IsBackslashBeforeEOL()
Check the previous char before EOL is a backslash, call this function in the condition that the Curre...
Definition: tokenizer.h:386
unsigned int GetNestingLevel() const
Return the brace "{}" level.
Definition: tokenizer.h:135
#if
Definition: tokenizer.h:30
unsigned int m_LineNumber
line offset in buffer, please note that it is 1 based, not 0 based
Definition: tokenizer.h:525
#elifndef
Definition: tokenizer.h:35
unsigned int m_FileIdx
File index, useful when parsing documentation;.
Definition: tokenizer.h:500
unsigned int m_PeekNestLevel
Definition: tokenizer.h:539
a container class to hold all the Tokens getting from parsing stage
Definition: tokentree.h:37
#undef
Definition: tokenizer.h:39
wxString m_NextTokenDoc
normally, this record the doxygen style comments for the next token definition for example...
Definition: tokenizer.h:637
int GetFirstTokenPosition(const wxString &buffer, const wxString &target)
Search "target" in the buffer, return first position in buffer.
Definition: tokenizer.h:240
#define _T(string)
unsigned int m_End
the end token index, if beyond this index, we need to pop the buffer
Definition: tokenizer.h:589
unsigned int GetLineNumber() const
Return the line number of the current token string.
Definition: tokenizer.h:127
unsigned int m_PeekLineNumber
Definition: tokenizer.h:538
wxString m_Filename
Filename of the buffer.
Definition: tokenizer.h:498
This is just a simple lexer class.
Definition: tokenizer.h:64
bool m_IsOK
bool variable specifies whether the buffer is ready for parsing
Definition: tokenizer.h:550
wxString m_Lex
a lexeme string return by the Lex() function, this is a candidate token string, which may be replaced...
Definition: tokenizer.h:509
#ifdef
Definition: tokenizer.h:31
int m_LastTokenIdx
store the recent added token index for example, here is a comment
Definition: tokenizer.h:648
#include #warning and other #xxx
Definition: tokenizer.h:40
#define
Definition: tokenizer.h:38
wxUSE_UNICODE_dependent wxChar
bool m_ReadingMacroDefinition
indicates whether we are reading the macro definition This variable will affect how the doxygen comme...
Definition: tokenizer.h:654
a symbol found in the parsed files, it can be many kinds, such as a variable, a class and so on...
Definition: token.h:82
unsigned int m_UndoNestLevel
Definition: tokenizer.h:532
void SetState(TokenizerState state)
Set the Tokenizer skipping options.
Definition: tokenizer.h:109
bool NotEOF() const
return true if it is Not the end of buffer
Definition: tokenizer.h:183
unsigned int m_TokenIndex
index offset in buffer, when parsing a buffer
Definition: tokenizer.h:523
replaced buffer information Here is an example of how macro are expanded
Definition: tokenizer.h:581
TokenTree * m_TokenTree
the Token tree to store the macro definition, the token tree is shared with Parserthread ...
Definition: tokenizer.h:495
wxString wxEmptyString
#elifdef
Definition: tokenizer.h:34
TokenizerState m_State
Tokeniser state specifies the token reading option.
Definition: tokenizer.h:552
unsigned int m_UndoTokenIndex
Backup the previous Token information.
Definition: tokenizer.h:530
bool CharInString(const wxChar ch, const wxChar *chars) const
Check if a ch matches any characters in the wxChar array.
Definition: tokenizer.h:366
const Token * m_Macro
the referenced used macro
Definition: tokenizer.h:591
TokenizerState
Enum defines the skip state of the Tokenizer.
Definition: tokenizer.h:19
bool storeDocumentation
do we store the doxygen like document
Definition: tokenizer.h:49
unsigned int m_SavedLineNumber
Definition: tokenizer.h:546
LoaderBase * m_Loader
File loader, it load the content to the m_Buffer, either from the harddisk or memory.
Definition: tokenizer.h:554
unsigned int m_UndoLineNumber
Definition: tokenizer.h:531
unsigned int m_PeekTokenIndex
Definition: tokenizer.h:537
size_t Len() const
bool IsOK() const
If the buffer is correctly loaded, this function return true.
Definition: tokenizer.h:153
bool m_PeekAvailable
Peek token information.
Definition: tokenizer.h:535
read parentheses as a single token
Definition: tokenizer.h:22
#elif
Definition: tokenizer.h:33
#else
Definition: tokenizer.h:36
wxChar NextChar() const
Return (peek) the next character.
Definition: tokenizer.h:347
void SaveNestingLevel()
Save the brace "{" level, the parser might need to ignore the nesting level in some cases...
Definition: tokenizer.h:141
wxString m_PeekToken
Definition: tokenizer.h:536
Whether we need to handle C-preprocessor directives.
Definition: tokenizer.h:44
unsigned int m_SavedTokenIndex
Saved token info (for PeekToken()), m_TokenIndex will be moved forward or backward when either DoGetT...
Definition: tokenizer.h:545
wxChar CurrentCharMoveNext()
Do the previous two functions sequentially.
Definition: tokenizer.h:339
wxString m_Buffer
Buffer content, all the lexical analysis is operating on this member variable.
Definition: tokenizer.h:502
const wxCStrData GetData() const
bool IsEOF() const
Check whether the Tokenizer reaches the end of the buffer (file)
Definition: tokenizer.h:177
TokenizerState GetState()
Return the token reading options value,.
Definition: tokenizer.h:115