26 #define CC_TOKENIZER_DEBUG_OUTPUT 0    28 #if defined(CC_GLOBAL_DEBUG_OUTPUT)    29     #if CC_GLOBAL_DEBUG_OUTPUT == 1    30         #undef CC_TOKENIZER_DEBUG_OUTPUT    31         #define CC_TOKENIZER_DEBUG_OUTPUT 1    32     #elif CC_GLOBAL_DEBUG_OUTPUT == 2    33         #undef CC_TOKENIZER_DEBUG_OUTPUT    34         #define CC_TOKENIZER_DEBUG_OUTPUT 2    39     #define TRACE(format, args...) \    40             CCLogger::Get()->DebugLog(F(format, ##args))    41     #define TRACE2(format, args...) \    42             CCLogger::Get()->DebugLog(F(format, ##args))    43     #define TRACE2_SET_FLAG(traceFile)    45     #if CC_TOKENIZER_DEBUG_OUTPUT == 1    46         #define TRACE(format, args...) \    47             CCLogger::Get()->DebugLog(F(format, ##args))    48         #define TRACE2(format, args...)    49         #define TRACE2_SET_FLAG(traceFile)    50     #elif CC_TOKENIZER_DEBUG_OUTPUT == 2    51         #define TRACE(format, args...)                                              \    54                 if (g_EnableDebugTrace)                                             \    55                     CCLogger::Get()->DebugLog(F(format, ##args));                   \    58         #define TRACE2(format, args...) \    59             CCLogger::Get()->DebugLog(F(format, ##args))    60         #define TRACE2_SET_FLAG(traceFile) \    61             g_EnableDebugTrace = !g_DebugTraceFile.IsEmpty() && traceFile.EndsWith(g_DebugTraceFile)    63         #define TRACE(format, args...)    64         #define TRACE2(format, args...)    65         #define TRACE2_SET_FLAG(traceFile)    92     m_TokenTree(tokenTree),
   101     m_PeekAvailable(false),
   105     m_SavedTokenIndex(0),
   106     m_SavedLineNumber(1),
   107     m_SavedNestingLevel(0),
   113     m_ReadingMacroDefinition(false)
   134             TRACE(
_T(
"Init() : Called without filename."));
   142 #ifdef CC_PARSER_TEST   214     bool success = 
false;
   238         success = (data != 0);
   288         unsigned int numBackslash = 2; 
   294         if ( (numBackslash%2) == 1) 
   355     if (ch == 
_T(
'"') || ch == 
_T(
'\''))
   385         TRACE(
_T(
"%s : line=%u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
   389         static const size_t maxBufferLen = 4094;
   391         wxChar buffer[maxBufferLen + 2];
   423                 if (ch <= 
_T(
' ') && p > buffer && *(p - 1) == ch)
   429                 if (ch == 
_T(
'"') || ch == 
_T(
'\''))
   432                         str.
Append(buffer, p - buffer);
   444                 if (p >= buffer + maxBufferLen)
   446                     str.
Append(buffer, p - buffer);
   459                 while (p > buffer && *(--p) <= 
_T(
' '))
   465         while (p > buffer && *(p - 1) <= 
_T(
' '))
   469             str.
Append(buffer, p - buffer);
   471         TRACE(
_T(
"ReadToEOL(): (END) We are now at line %u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
   505         if (token == 
_T(
"("))
   510         else if (token == 
_T(
")"))
   517         else if (token == 
_T(
"*") || token == 
_T(
"&") )
   521         else if (token == 
_T(
"=")) 
   523             str << 
_T(
" ") << token << 
_T(
" ");
   525         else if (token == 
_T(
",")) 
   527             str << token << 
_T(
" ");
   537             wxChar nextChar = token[0];
   541                     || lastChar == 
_T(
'*') || lastChar == 
_T(
'&') || lastChar == 
_T(
')')))
   543                 str << 
_T(
" ") << token;
   557     TRACE(
_T(
"%s : line=%u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
   582     TRACE(
_T(
"SkipToEOL(): (END) We are now at line %u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
   590     TRACE(
_T(
"%s : line=%u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
   604     TRACE(
_T(
"SkipToInlineCommentEnd(): (END) We are now at line %u, CurrentChar='%c', PreviousChar='%c',")
   648         if (!isDoc && cstyle)
   651         if (!isDoc && !cstyle) 
   690                 TRACE(
_T(
"SkipComment() : Need to call SkipToInlineCommentEnd() here at line = %u"), 
m_LineNumber);
   703         int lineToAppend = -1;
   759             if (lineToAppend >= 0) 
   777                     m_NextTokenDoc.clear();
   794                 if (!cstyle && skipped)
   795                     doc = 
_T(
"@brief ") + doc + 
_T(
'\n');
   949         bool identifier = 
Lex();
   969     bool identifier = 
false;
   991 #ifdef __WXMSW__ // This is a Windows only bug!   993     else if (c == 178 || c == 179 || c == 185)
  1013     else if ( (c == 
'"') || (c == 
'\'') )
  1037         if (next == 
_T(
'=') || next == 
_T(
'!') || next == 
_T(
'>') || next == 
_T(
'<'))
  1107     m_TokenIndex = undoIndex;
  1113     while (m_TokenIndex < 
m_BufferLen - untouchedBufferLen)
  1122         if (m_TokenIndex >= 
m_BufferLen - untouchedBufferLen)
  1134             if (token == 
_T(
"defined"))
  1147             if (token.
ToLong(&value, 16))
  1162         TRACE(
_T(
"CalcConditionExpression() : exp.GetStatus() : %d, exp.GetResult() : %d"),
  1177     bool haveParen = 
false;
  1180     if (token == 
_T(
"("))
  1204         if (ch == 
_T(
'\'') || ch == 
_T(
'"') || ch == 
_T(
'/') || ch <= 
_T(
' '))
  1224             if (current == 
_T(
'i') && next == 
_T(
'f'))
  1228             else if (current == 
_T(
'e') && (next == 
_T(
'l') || next == 
_T(
'n')))
  1244         if (ch == 
_T(
'\'') || ch == 
_T(
'"') || ch == 
_T(
'/') || ch <= 
_T(
' '))
  1261             if (current == 
_T(
'i') && next == 
_T(
'f'))
  1265             else if (current == 
_T(
'e') && next == 
_T(
'n'))
  1289     switch (token.
Len())
  1397             bool result = 
false;
  1410             bool result = 
false;
  1427             bool result = 
false;
  1511         if (token == 
_T(
"("))
  1513         else if (token == 
_T(
")"))
  1517         if (token == 
_T(
",") && level == 1)
  1522         else if (level != 0)
  1577     for (
size_t i = 0; i < substitute.
Len(); ++i)
  1593     const size_t len = substitute.
Len();
  1599         m_TokenIndex += diffLen;
  1605             (*i).m_Begin += diffLen;
  1606             (*i).m_End += diffLen;
  1642         if (tk == (*i).m_Macro)
  1657     while (pattern[j] != 
_T(
'\0'))
  1659         if (k == -1 || pattern[j] == pattern[k])
  1663             if (pattern[j] != pattern[k])
  1675     if (!text || !pattern || pattern[0] == 
_T(
'\0') || text[0] == 
_T(
'\0'))
  1678     if (patternLen > 1024)
  1680         if (patternLen < 5012)
  1681             TRACE(
_T(
"KMP_Find() : %s - %s"), text, pattern);
  1684             TRACE(
_T(
"KMP_Find: The plan buffer is too big, %d"), patternLen);
  1689     int next[patternLen];
  1692     int index = 0, i = 0, j = 0;
  1693     while (text[i] != 
_T(
'\0') && pattern[j] != 
_T(
'\0'))
  1695         if (text[i] == pattern[j])
  1702             index += j - next[j];
  1713     if (pattern[j] == 
_T(
'\0'))
  1810     const size_t totalCount = formalArgs.
GetCount();
  1813     for (
size_t i = 0; i < totalCount; ++i)
  1815         TRACE(
_T(
"GetMacroExpandedText(): The formal args are '%s' and the actual args are '%s'."),
  1816               formalArgs[i].wx_str(), actualArgs[i].wx_str());
  1820         const wxChar* dataEnd = data + expandedText.
Len();
  1822         const wxChar* key = formalArgs[i].GetData();
  1823         const int keyLen = formalArgs[i].Len();
  1826         alreadyReplaced.
Alloc(expandedText.
Len() * 2);
  1834                 alreadyReplaced << 
wxString(data, pos) << actualArgs[i];
  1835                 data += pos + keyLen;
  1836                 if (data == dataEnd)
  1841                 alreadyReplaced << data;
  1846         expandedText = alreadyReplaced;
  1850     for (
int pos = expandedText.
Find(
_T(
"##"));
  1852          pos = expandedText.
Find(
_T(
"##")))
  1855         int length = expandedText.
size();
  1856         while (beginPos > 0 && expandedText[beginPos-1] == 
_T(
' '))
  1858         int endPos = pos + 1;
  1859         while (endPos < length - 1 && expandedText[endPos+1] == 
_T(
' '))
  1862         expandedText.
Remove(beginPos, endPos - beginPos + 1);
  1866     for (
int pos = expandedText.
Find(
_T(
"#"));
  1868          pos = expandedText.
Find(
_T(
"#")))
  1877         int length = expandedText.
size();
  1878         while (beginPos < length - 1 && expandedText[beginPos+1] == 
_T(
' '))
  1889         int endPos = beginPos + 1;
  1890         while (endPos < length - 1 && expandedText[endPos+1] != 
_T(
' '))
  1895         if (endPos == length)
  1896             expandedText << 
_T(
" ");
  1899         expandedText.
SetChar(pos, _T(
' '));
  1900         expandedText.
SetChar(beginPos, _T(
'"'));
  1901         expandedText.
SetChar(endPos, _T(
'"'));
  1904     TRACE(
_T(
"The actual macro expanded text is '%s'."), expandedText.
wx_str());
  1909                                      const wxChar* key, 
const size_t keyLen)
  1913     const wxChar* endBuffer = buffer + bufferLen;
  1916         const int ret = 
KMP_Find(p, key, keyLen);
  1924             const wxChar ch = *(p - 1);
  1942         pos = p - buffer - keyLen;
  1963     if (token == 
_T(
"\\"))
  1987         if (readToEOL[0] == 
wxT(
'(')) 
  1991             while (level && pos < readToEOL.
Len())
  1996                 else if (ch == 
wxT(
'('))
  1999             para = readToEOL.
Left(++pos);
  2000             replaceList << readToEOL.
Right(readToEOL.
Len() - (++pos));
  2003             replaceList << readToEOL;
 std::stack< bool > m_ExpressionResult
preprocessor branch stack, if we meet a #if 1, then the value true will be pushed to to the stack...
wxString F(const wxChar *msg,...)
sprintf-like function 
bool wantPreprocessor
do we expand the macros in #if like conditional preprocessor directives 
int TokenExists(const wxString &name, int parent, short int kindMask)
query tokens by names 
PreprocessorType
Enum categorizing C-preprocessor directives. 
void UngetToken()
Undo the GetToken. 
wxChar PreviousChar() const
Return (peek) the previous character. 
bool SkipUnwanted()
skips comments, spaces, preprocessor branch. 
int m_ParentIndex
Parent Token index. 
#define TRACE2_SET_FLAG(traceFile)
bool ReplaceMacroUsage(const Token *tk)
Get expanded text for the current macro usage, then replace buffer for re-parsing. 
unsigned int m_NestLevel
keep track of block nesting { } 
unsigned int m_SavedNestingLevel
bool SkipString()
Skip the string literal(enclosed in double quotes) or character literal(enclosed in single quotes)...
int KMP_Find(const wxChar *text, const wxChar *pattern, const int patternLen)
KMP find, get the first position, if find nothing, return -1 https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm. 
unsigned int m_BufferLen
Buffer length. 
const wxString kw_endif(_T("endif"))
read parentheses as token lists, so it return several tokens like '(' ... 
wxString m_Name
Token's name, it can be searched in the TokenTree. 
const wxString kw_elifndef(_T("elifndef"))
#define TRACE(format, args...)
std::list< ExpandedMacro > m_ExpandedMacros
this serves as a macro replacement stack, in the above example, if AAA is replaced by BBBB...
wxChar CurrentChar() const
Return the current character indexed(pointed) by m_TokenIndex in the m_Buffer. 
wxString m_Token
These variables define the current token string and its auxiliary information, such as the token name...
const wxString kw_else(_T("else"))
PreprocessorType GetPreprocessorType()
Get current conditional preprocessor type,. 
TokenizerOptions m_TokenizerOptions
Tokenizer options specify the token reading option. 
int erase(int loc)
remove the Token specified by the index 
bool wxFileExists(const wxString &filename)
const wxString kw_define(_T("define"))
bool IsBackslashBeforeEOL()
Check the previous char before EOL is a backslash, call this function in the condition that the Curre...
void HandleDefines()
handle the macro definition statement: #define XXXXX 
const wxString kw_elif(_T("elif"))
unsigned int m_LineNumber
line offset in buffer, please note that it is 1 based, not 0 based 
unsigned int m_FileIdx
File index, useful when parsing documentation;. 
unsigned int m_PeekNestLevel
void SkipToEndConditionPreprocessor()
Skip to the #endif conditional preprocessor directive. 
a container class to hold all the Tokens getting from parsing stage 
const wxString kw_if(_T("if"))
bool SkipPreprocessorBranch()
Skip the C preprocessor directive, such as #ifdef xxxx only the conditional preprocessor directives a...
wxString m_NextTokenDoc
normally, this record the doxygen style comments for the next token definition for example...
int GetFirstTokenPosition(const wxString &buffer, const wxString &target)
Search "target" in the buffer, return first position in buffer. 
unsigned int m_End
the end token index, if beyond this index, we need to pop the buffer 
bool SkipToStringEnd(const wxChar &ch)
Move to the end of string literal or character literal, the m_TokenIndex will point at the closing qu...
bool MoveToNextChar()
Move to the next character in the buffer. 
unsigned int GetLineNumber() const
Return the line number of the current token string. 
int m_Index
current Token index in the tree, it is index of the std::vector<Token*>, so use the index...
bool wxIsdigit(const wxUniChar &c)
unsigned int m_PeekLineNumber
wxString & Remove(size_t pos)
wxString m_Filename
Filename of the buffer. 
static const size_t s_MaxMacroReplaceDepth
void AppendDocumentation(int tokenIdx, unsigned int fileIdx, const wxString &doc)
associate a document string with the token 
bool m_IsOK
bool variable specifies whether the buffer is ready for parsing 
wxString m_Lex
a lexeme string return by the Lex() function, this is a candidate token string, which may be replaced...
int m_LastTokenIdx
store the recent added token index for example, here is a comment 
#include #warning and other #xxx 
wxUSE_UNICODE_dependent wxChar
bool SkipWhiteSpace()
Skip any "tab" "white-space". 
void ReadParentheses(wxString &str)
read a string from '(' to ')', note that inner parentheses are considered 
bool IsMacroDefined()
If the next token string is macro definition, return true this is used in the situation when we are r...
wxString DoGetToken()
Do the actual lexical analysis, both GetToken() and PeekToken() will internally call this function...
#define TRACE2(format, args...)
bool m_ReadingMacroDefinition
indicates whether we are reading the macro definition This variable will affect how the doxygen comme...
bool ReadFile()
Read a file, and fill the m_Buffer. 
bool Lex()
this function only move the m_TokenIndex and get a lexeme and store it in m_Lex, the m_Lex will be fu...
const wxString colon(_T(":"))
a symbol found in the parsed files, it can be many kinds, such as a variable, a class and so on...
unsigned int m_UndoNestLevel
wxString PeekToken()
Do a "look ahead", and return the next token string. 
wxString FileName() const
void HandleConditionPreprocessor(const PreprocessorType type)
handle the preprocessor directive: #ifdef XXX or #endif or #if or #elif or... 
size_t GetFileIndex(const wxString &filename)
wxString Left(size_t count) const
size_t Replace(const wxString &strOld, const wxString &strNew, bool replaceAll=true)
bool IsEscapedChar()
Check the current character is a C-Escape character in a string. 
const wxString kw_elifdef(_T("elifdef"))
bool SkipToChar(const wxChar &ch)
Skip characters until we meet a ch. 
bool NotEOF() const
return true if it is Not the end of buffer 
unsigned int m_TokenIndex
index offset in buffer, when parsing a buffer 
replaced buffer information Here is an example of how macro are expanded 
void SetChar(size_t n, wxUniChar ch)
const wxStringCharType * wx_str() const
bool InitFromBuffer(const wxString &buffer, const wxString &fileOfBuffer=wxEmptyString, size_t initLineNumber=0)
Initialize the buffer by directly using a wxString's content. 
TokenTree * m_TokenTree
the Token tree to store the macro definition, the token tree is shared with Parserthread ...
const wxString tabcrlf(_T("\\))
wxString Right(size_t count) const
wxString & assign(const wxString &str, size_t pos, size_t n)
wxString m_Args
If it is a function Token, then this value is function arguments, e.g. 
TokenizerState m_State
Tokeniser state specifies the token reading option. 
unsigned int m_UndoTokenIndex
Backup the previous Token information. 
~Tokenizer()
Tokenizer destructor. 
bool CharInString(const wxChar ch, const wxChar *chars) const
Check if a ch matches any characters in the wxChar array. 
const Token * m_Macro
the referenced used macro 
bool wxIsalnum(const wxUniChar &c)
void SetLastTokenIdx(int tokenIdx)
a Token is added, associate doxygen style documents(comments before the variables) to the Token ...
TokenizerState
Enum defines the skip state of the Tokenizer. 
bool storeDocumentation
do we store the doxygen like document 
bool ToLong(long *val, int base=10) const
unsigned int m_SavedLineNumber
wxString & Append(const char *psz)
LoaderBase * m_Loader
File loader, it load the content to the m_Buffer, either from the harddisk or memory. 
unsigned int m_UndoLineNumber
const wxString kw_ifdef(_T("ifdef"))
const wxString hash(_T("#"))
unsigned int m_PeekTokenIndex
bool wxIsalpha(const wxUniChar &c)
void AddToInfixExpression(wxString token)
bool CheckMacroUsageAndReplace()
check the m_Lex to see it is an identifier like token, and also if it is a macro usage, replace it. 
bool m_PeekAvailable
Peek token information. 
DLLIMPORT bool cbRead(wxFile &file, wxString &st, wxFontEncoding encoding=wxFONTENCODING_SYSTEM)
Reads a wxString from a non-unicode file. File must be open. File is closed automatically. 
const wxString equal(_T("="))
bool GetMacroExpandedText(const Token *tk, wxString &expandedText)
Get the full expanded text. 
const wxString kw_undef(_T("undef"))
TokenKind m_TokenKind
See TokenKind class. 
wxString & insert(size_t nPos, const wxString &str)
read parentheses as a single token 
bool CalcConditionExpression()
#if xxxx, calculate the value of "xxxx" 
size_t Add(const wxString &str, size_t copies=1)
void HandleUndefs()
handle the statement: #undef XXXXX 
bool StartsWith(const wxString &prefix, wxString *rest=NULL) const
void BaseInit()
Initialize some member variables. 
Tokenizer(TokenTree *tokenTree, const wxString &filename=wxEmptyString)
Tokenizer constructor. 
bool SkipToEOL()
Skip from the current position to the end of line, use with care outside this class! ...
int Find(wxUniChar ch, bool fromEnd=false) const
wxUniChar GetChar(size_t n) const
wxString ReadToEOL(bool stripUnneeded=true)
return the string from the current position to the end of current line, in most case, this function is used in handling #define, use with care outside this class! 
wxChar NextChar() const
Return (peek) the next character. 
size_t m_TokenTicketCount
void KMP_GetNextVal(const wxChar *pattern, int next[])
used in the KMP find function 
bool ReplaceBufferText(const wxString &target, const Token *macro=0)
Backward buffer replacement for re-parsing. 
macro definition, such as: #define AAA(x,y) f(x,y), where AAA is a token of tkMacroDef ...
int insert(Token *newToken)
add a new Token instance to the TokenTree 
void AddMacroDefinition(wxString name, int line, wxString para, wxString substitues)
add a macro definition to the Token database for example: #define AAA(x,y) x+y 
wxString m_FullType
this is the full return value (if any): e.g. 
bool Init(const wxString &filename=wxEmptyString, LoaderBase *loader=0)
Initialize the buffer by opening a file through a loader, this function copy the contents from the lo...
void ConvertInfixToPostfix()
unsigned int m_SavedTokenIndex
Saved token info (for PeekToken()), m_TokenIndex will be moved forward or backward when either DoGetT...
const wxString kw_ifndef(_T("ifndef"))
static wxString Format(const wxString &format,...)
wxString Mid(size_t first, size_t nCount=wxString::npos) const
void SkipToNextConditionPreprocessor()
Skip to the next conditional preprocessor directive branch. 
wxString m_Buffer
Buffer content, all the lexical analysis is operating on this member variable. 
wxString GetToken()
Consume and return the current token string. 
const wxCStrData GetData() const
bool IsEOF() const
Check whether the Tokenizer reaches the end of the buffer (file) 
bool SplitArguments(wxArrayString &results)
Split the macro arguments, and store them in results, when calling this function, we expect that m_To...
const wxString colon_colon(_T("::"))