26 #define CC_TOKENIZER_DEBUG_OUTPUT 0 28 #if defined(CC_GLOBAL_DEBUG_OUTPUT) 29 #if CC_GLOBAL_DEBUG_OUTPUT == 1 30 #undef CC_TOKENIZER_DEBUG_OUTPUT 31 #define CC_TOKENIZER_DEBUG_OUTPUT 1 32 #elif CC_GLOBAL_DEBUG_OUTPUT == 2 33 #undef CC_TOKENIZER_DEBUG_OUTPUT 34 #define CC_TOKENIZER_DEBUG_OUTPUT 2 39 #define TRACE(format, args...) \ 40 CCLogger::Get()->DebugLog(F(format, ##args)) 41 #define TRACE2(format, args...) \ 42 CCLogger::Get()->DebugLog(F(format, ##args)) 43 #define TRACE2_SET_FLAG(traceFile) 45 #if CC_TOKENIZER_DEBUG_OUTPUT == 1 46 #define TRACE(format, args...) \ 47 CCLogger::Get()->DebugLog(F(format, ##args)) 48 #define TRACE2(format, args...) 49 #define TRACE2_SET_FLAG(traceFile) 50 #elif CC_TOKENIZER_DEBUG_OUTPUT == 2 51 #define TRACE(format, args...) \ 54 if (g_EnableDebugTrace) \ 55 CCLogger::Get()->DebugLog(F(format, ##args)); \ 58 #define TRACE2(format, args...) \ 59 CCLogger::Get()->DebugLog(F(format, ##args)) 60 #define TRACE2_SET_FLAG(traceFile) \ 61 g_EnableDebugTrace = !g_DebugTraceFile.IsEmpty() && traceFile.EndsWith(g_DebugTraceFile) 63 #define TRACE(format, args...) 64 #define TRACE2(format, args...) 65 #define TRACE2_SET_FLAG(traceFile) 92 m_TokenTree(tokenTree),
101 m_PeekAvailable(false),
105 m_SavedTokenIndex(0),
106 m_SavedLineNumber(1),
107 m_SavedNestingLevel(0),
113 m_ReadingMacroDefinition(false)
134 TRACE(
_T(
"Init() : Called without filename."));
142 #ifdef CC_PARSER_TEST 214 bool success =
false;
238 success = (data != 0);
288 unsigned int numBackslash = 2;
294 if ( (numBackslash%2) == 1)
355 if (ch ==
_T(
'"') || ch ==
_T(
'\''))
385 TRACE(
_T(
"%s : line=%u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
389 static const size_t maxBufferLen = 4094;
391 wxChar buffer[maxBufferLen + 2];
423 if (ch <=
_T(
' ') && p > buffer && *(p - 1) == ch)
429 if (ch ==
_T(
'"') || ch ==
_T(
'\''))
432 str.
Append(buffer, p - buffer);
444 if (p >= buffer + maxBufferLen)
446 str.
Append(buffer, p - buffer);
459 while (p > buffer && *(--p) <=
_T(
' '))
465 while (p > buffer && *(p - 1) <=
_T(
' '))
469 str.
Append(buffer, p - buffer);
471 TRACE(
_T(
"ReadToEOL(): (END) We are now at line %u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
505 if (token ==
_T(
"("))
510 else if (token ==
_T(
")"))
517 else if (token ==
_T(
"*") || token ==
_T(
"&") )
521 else if (token ==
_T(
"="))
523 str <<
_T(
" ") << token <<
_T(
" ");
525 else if (token ==
_T(
","))
527 str << token <<
_T(
" ");
537 wxChar nextChar = token[0];
541 || lastChar ==
_T(
'*') || lastChar ==
_T(
'&') || lastChar ==
_T(
')')))
543 str <<
_T(
" ") << token;
557 TRACE(
_T(
"%s : line=%u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
582 TRACE(
_T(
"SkipToEOL(): (END) We are now at line %u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
590 TRACE(
_T(
"%s : line=%u, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
604 TRACE(
_T(
"SkipToInlineCommentEnd(): (END) We are now at line %u, CurrentChar='%c', PreviousChar='%c',")
648 if (!isDoc && cstyle)
651 if (!isDoc && !cstyle)
690 TRACE(
_T(
"SkipComment() : Need to call SkipToInlineCommentEnd() here at line = %u"),
m_LineNumber);
703 int lineToAppend = -1;
759 if (lineToAppend >= 0)
777 m_NextTokenDoc.clear();
794 if (!cstyle && skipped)
795 doc =
_T(
"@brief ") + doc +
_T(
'\n');
949 bool identifier =
Lex();
969 bool identifier =
false;
991 #ifdef __WXMSW__ // This is a Windows only bug! 993 else if (c == 178 || c == 179 || c == 185)
1013 else if ( (c ==
'"') || (c ==
'\'') )
1037 if (next ==
_T(
'=') || next ==
_T(
'!') || next ==
_T(
'>') || next ==
_T(
'<'))
1107 m_TokenIndex = undoIndex;
1113 while (m_TokenIndex <
m_BufferLen - untouchedBufferLen)
1122 if (m_TokenIndex >=
m_BufferLen - untouchedBufferLen)
1134 if (token ==
_T(
"defined"))
1147 if (token.
ToLong(&value, 16))
1162 TRACE(
_T(
"CalcConditionExpression() : exp.GetStatus() : %d, exp.GetResult() : %d"),
1177 bool haveParen =
false;
1180 if (token ==
_T(
"("))
1204 if (ch ==
_T(
'\'') || ch ==
_T(
'"') || ch ==
_T(
'/') || ch <=
_T(
' '))
1224 if (current ==
_T(
'i') && next ==
_T(
'f'))
1228 else if (current ==
_T(
'e') && (next ==
_T(
'l') || next ==
_T(
'n')))
1244 if (ch ==
_T(
'\'') || ch ==
_T(
'"') || ch ==
_T(
'/') || ch <=
_T(
' '))
1261 if (current ==
_T(
'i') && next ==
_T(
'f'))
1265 else if (current ==
_T(
'e') && next ==
_T(
'n'))
1289 switch (token.
Len())
1397 bool result =
false;
1410 bool result =
false;
1427 bool result =
false;
1511 if (token ==
_T(
"("))
1513 else if (token ==
_T(
")"))
1517 if (token ==
_T(
",") && level == 1)
1522 else if (level != 0)
1577 for (
size_t i = 0; i < substitute.
Len(); ++i)
1593 const size_t len = substitute.
Len();
1599 m_TokenIndex += diffLen;
1605 (*i).m_Begin += diffLen;
1606 (*i).m_End += diffLen;
1642 if (tk == (*i).m_Macro)
1657 while (pattern[j] !=
_T(
'\0'))
1659 if (k == -1 || pattern[j] == pattern[k])
1663 if (pattern[j] != pattern[k])
1675 if (!text || !pattern || pattern[0] ==
_T(
'\0') || text[0] ==
_T(
'\0'))
1678 if (patternLen > 1024)
1680 if (patternLen < 5012)
1681 TRACE(
_T(
"KMP_Find() : %s - %s"), text, pattern);
1684 TRACE(
_T(
"KMP_Find: The plan buffer is too big, %d"), patternLen);
1689 int next[patternLen];
1692 int index = 0, i = 0, j = 0;
1693 while (text[i] !=
_T(
'\0') && pattern[j] !=
_T(
'\0'))
1695 if (text[i] == pattern[j])
1702 index += j - next[j];
1713 if (pattern[j] ==
_T(
'\0'))
1810 const size_t totalCount = formalArgs.
GetCount();
1813 for (
size_t i = 0; i < totalCount; ++i)
1815 TRACE(
_T(
"GetMacroExpandedText(): The formal args are '%s' and the actual args are '%s'."),
1816 formalArgs[i].wx_str(), actualArgs[i].wx_str());
1820 const wxChar* dataEnd = data + expandedText.
Len();
1822 const wxChar* key = formalArgs[i].GetData();
1823 const int keyLen = formalArgs[i].Len();
1826 alreadyReplaced.
Alloc(expandedText.
Len() * 2);
1834 alreadyReplaced <<
wxString(data, pos) << actualArgs[i];
1835 data += pos + keyLen;
1836 if (data == dataEnd)
1841 alreadyReplaced << data;
1846 expandedText = alreadyReplaced;
1850 for (
int pos = expandedText.
Find(
_T(
"##"));
1852 pos = expandedText.
Find(
_T(
"##")))
1855 int length = expandedText.
size();
1856 while (beginPos > 0 && expandedText[beginPos-1] ==
_T(
' '))
1858 int endPos = pos + 1;
1859 while (endPos < length - 1 && expandedText[endPos+1] ==
_T(
' '))
1862 expandedText.
Remove(beginPos, endPos - beginPos + 1);
1866 for (
int pos = expandedText.
Find(
_T(
"#"));
1868 pos = expandedText.
Find(
_T(
"#")))
1877 int length = expandedText.
size();
1878 while (beginPos < length - 1 && expandedText[beginPos+1] ==
_T(
' '))
1889 int endPos = beginPos + 1;
1890 while (endPos < length - 1 && expandedText[endPos+1] !=
_T(
' '))
1895 if (endPos == length)
1896 expandedText <<
_T(
" ");
1899 expandedText.
SetChar(pos, _T(
' '));
1900 expandedText.
SetChar(beginPos, _T(
'"'));
1901 expandedText.
SetChar(endPos, _T(
'"'));
1904 TRACE(
_T(
"The actual macro expanded text is '%s'."), expandedText.
wx_str());
1909 const wxChar* key,
const size_t keyLen)
1913 const wxChar* endBuffer = buffer + bufferLen;
1916 const int ret =
KMP_Find(p, key, keyLen);
1924 const wxChar ch = *(p - 1);
1942 pos = p - buffer - keyLen;
1963 if (token ==
_T(
"\\"))
1987 if (readToEOL[0] ==
wxT(
'('))
1991 while (level && pos < readToEOL.
Len())
1996 else if (ch ==
wxT(
'('))
1999 para = readToEOL.
Left(++pos);
2000 replaceList << readToEOL.
Right(readToEOL.
Len() - (++pos));
2003 replaceList << readToEOL;
std::stack< bool > m_ExpressionResult
preprocessor branch stack, if we meet a #if 1, then the value true will be pushed to to the stack...
wxString F(const wxChar *msg,...)
sprintf-like function
bool wantPreprocessor
do we expand the macros in #if like conditional preprocessor directives
int TokenExists(const wxString &name, int parent, short int kindMask)
query tokens by names
PreprocessorType
Enum categorizing C-preprocessor directives.
void UngetToken()
Undo the GetToken.
wxChar PreviousChar() const
Return (peek) the previous character.
bool SkipUnwanted()
skips comments, spaces, preprocessor branch.
int m_ParentIndex
Parent Token index.
#define TRACE2_SET_FLAG(traceFile)
bool ReplaceMacroUsage(const Token *tk)
Get expanded text for the current macro usage, then replace buffer for re-parsing.
unsigned int m_NestLevel
keep track of block nesting { }
unsigned int m_SavedNestingLevel
bool SkipString()
Skip the string literal(enclosed in double quotes) or character literal(enclosed in single quotes)...
int KMP_Find(const wxChar *text, const wxChar *pattern, const int patternLen)
KMP find, get the first position, if find nothing, return -1 https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm.
unsigned int m_BufferLen
Buffer length.
const wxString kw_endif(_T("endif"))
read parentheses as token lists, so it return several tokens like '(' ...
wxString m_Name
Token's name, it can be searched in the TokenTree.
const wxString kw_elifndef(_T("elifndef"))
#define TRACE(format, args...)
std::list< ExpandedMacro > m_ExpandedMacros
this serves as a macro replacement stack, in the above example, if AAA is replaced by BBBB...
wxChar CurrentChar() const
Return the current character indexed(pointed) by m_TokenIndex in the m_Buffer.
wxString m_Token
These variables define the current token string and its auxiliary information, such as the token name...
const wxString kw_else(_T("else"))
PreprocessorType GetPreprocessorType()
Get current conditional preprocessor type,.
TokenizerOptions m_TokenizerOptions
Tokenizer options specify the token reading option.
int erase(int loc)
remove the Token specified by the index
bool wxFileExists(const wxString &filename)
const wxString kw_define(_T("define"))
bool IsBackslashBeforeEOL()
Check the previous char before EOL is a backslash, call this function in the condition that the Curre...
void HandleDefines()
handle the macro definition statement: #define XXXXX
const wxString kw_elif(_T("elif"))
unsigned int m_LineNumber
line offset in buffer, please note that it is 1 based, not 0 based
unsigned int m_FileIdx
File index, useful when parsing documentation;.
unsigned int m_PeekNestLevel
void SkipToEndConditionPreprocessor()
Skip to the #endif conditional preprocessor directive.
a container class to hold all the Tokens getting from parsing stage
const wxString kw_if(_T("if"))
bool SkipPreprocessorBranch()
Skip the C preprocessor directive, such as #ifdef xxxx only the conditional preprocessor directives a...
wxString m_NextTokenDoc
normally, this record the doxygen style comments for the next token definition for example...
int GetFirstTokenPosition(const wxString &buffer, const wxString &target)
Search "target" in the buffer, return first position in buffer.
unsigned int m_End
the end token index, if beyond this index, we need to pop the buffer
bool SkipToStringEnd(const wxChar &ch)
Move to the end of string literal or character literal, the m_TokenIndex will point at the closing qu...
bool MoveToNextChar()
Move to the next character in the buffer.
unsigned int GetLineNumber() const
Return the line number of the current token string.
int m_Index
current Token index in the tree, it is index of the std::vector<Token*>, so use the index...
bool wxIsdigit(const wxUniChar &c)
unsigned int m_PeekLineNumber
wxString & Remove(size_t pos)
wxString m_Filename
Filename of the buffer.
static const size_t s_MaxMacroReplaceDepth
void AppendDocumentation(int tokenIdx, unsigned int fileIdx, const wxString &doc)
associate a document string with the token
bool m_IsOK
bool variable specifies whether the buffer is ready for parsing
wxString m_Lex
a lexeme string return by the Lex() function, this is a candidate token string, which may be replaced...
int m_LastTokenIdx
store the recent added token index for example, here is a comment
#include #warning and other #xxx
wxUSE_UNICODE_dependent wxChar
bool SkipWhiteSpace()
Skip any "tab" "white-space".
void ReadParentheses(wxString &str)
read a string from '(' to ')', note that inner parentheses are considered
bool IsMacroDefined()
If the next token string is macro definition, return true this is used in the situation when we are r...
wxString DoGetToken()
Do the actual lexical analysis, both GetToken() and PeekToken() will internally call this function...
#define TRACE2(format, args...)
bool m_ReadingMacroDefinition
indicates whether we are reading the macro definition This variable will affect how the doxygen comme...
bool ReadFile()
Read a file, and fill the m_Buffer.
bool Lex()
this function only move the m_TokenIndex and get a lexeme and store it in m_Lex, the m_Lex will be fu...
const wxString colon(_T(":"))
a symbol found in the parsed files, it can be many kinds, such as a variable, a class and so on...
unsigned int m_UndoNestLevel
wxString PeekToken()
Do a "look ahead", and return the next token string.
wxString FileName() const
void HandleConditionPreprocessor(const PreprocessorType type)
handle the preprocessor directive: #ifdef XXX or #endif or #if or #elif or...
size_t GetFileIndex(const wxString &filename)
wxString Left(size_t count) const
size_t Replace(const wxString &strOld, const wxString &strNew, bool replaceAll=true)
bool IsEscapedChar()
Check the current character is a C-Escape character in a string.
const wxString kw_elifdef(_T("elifdef"))
bool SkipToChar(const wxChar &ch)
Skip characters until we meet a ch.
bool NotEOF() const
return true if it is Not the end of buffer
unsigned int m_TokenIndex
index offset in buffer, when parsing a buffer
replaced buffer information Here is an example of how macro are expanded
void SetChar(size_t n, wxUniChar ch)
const wxStringCharType * wx_str() const
bool InitFromBuffer(const wxString &buffer, const wxString &fileOfBuffer=wxEmptyString, size_t initLineNumber=0)
Initialize the buffer by directly using a wxString's content.
TokenTree * m_TokenTree
the Token tree to store the macro definition, the token tree is shared with Parserthread ...
const wxString tabcrlf(_T("\\))
wxString Right(size_t count) const
wxString & assign(const wxString &str, size_t pos, size_t n)
wxString m_Args
If it is a function Token, then this value is function arguments, e.g.
TokenizerState m_State
Tokeniser state specifies the token reading option.
unsigned int m_UndoTokenIndex
Backup the previous Token information.
~Tokenizer()
Tokenizer destructor.
bool CharInString(const wxChar ch, const wxChar *chars) const
Check if a ch matches any characters in the wxChar array.
const Token * m_Macro
the referenced used macro
bool wxIsalnum(const wxUniChar &c)
void SetLastTokenIdx(int tokenIdx)
a Token is added, associate doxygen style documents(comments before the variables) to the Token ...
TokenizerState
Enum defines the skip state of the Tokenizer.
bool storeDocumentation
do we store the doxygen like document
bool ToLong(long *val, int base=10) const
unsigned int m_SavedLineNumber
wxString & Append(const char *psz)
LoaderBase * m_Loader
File loader, it load the content to the m_Buffer, either from the harddisk or memory.
unsigned int m_UndoLineNumber
const wxString kw_ifdef(_T("ifdef"))
const wxString hash(_T("#"))
unsigned int m_PeekTokenIndex
bool wxIsalpha(const wxUniChar &c)
void AddToInfixExpression(wxString token)
bool CheckMacroUsageAndReplace()
check the m_Lex to see it is an identifier like token, and also if it is a macro usage, replace it.
bool m_PeekAvailable
Peek token information.
DLLIMPORT bool cbRead(wxFile &file, wxString &st, wxFontEncoding encoding=wxFONTENCODING_SYSTEM)
Reads a wxString from a non-unicode file. File must be open. File is closed automatically.
const wxString equal(_T("="))
bool GetMacroExpandedText(const Token *tk, wxString &expandedText)
Get the full expanded text.
const wxString kw_undef(_T("undef"))
TokenKind m_TokenKind
See TokenKind class.
wxString & insert(size_t nPos, const wxString &str)
read parentheses as a single token
bool CalcConditionExpression()
#if xxxx, calculate the value of "xxxx"
size_t Add(const wxString &str, size_t copies=1)
void HandleUndefs()
handle the statement: #undef XXXXX
bool StartsWith(const wxString &prefix, wxString *rest=NULL) const
void BaseInit()
Initialize some member variables.
Tokenizer(TokenTree *tokenTree, const wxString &filename=wxEmptyString)
Tokenizer constructor.
bool SkipToEOL()
Skip from the current position to the end of line, use with care outside this class! ...
int Find(wxUniChar ch, bool fromEnd=false) const
wxUniChar GetChar(size_t n) const
wxString ReadToEOL(bool stripUnneeded=true)
return the string from the current position to the end of current line, in most case, this function is used in handling #define, use with care outside this class!
wxChar NextChar() const
Return (peek) the next character.
size_t m_TokenTicketCount
void KMP_GetNextVal(const wxChar *pattern, int next[])
used in the KMP find function
bool ReplaceBufferText(const wxString &target, const Token *macro=0)
Backward buffer replacement for re-parsing.
macro definition, such as: #define AAA(x,y) f(x,y), where AAA is a token of tkMacroDef ...
int insert(Token *newToken)
add a new Token instance to the TokenTree
void AddMacroDefinition(wxString name, int line, wxString para, wxString substitues)
add a macro definition to the Token database for example: #define AAA(x,y) x+y
wxString m_FullType
this is the full return value (if any): e.g.
bool Init(const wxString &filename=wxEmptyString, LoaderBase *loader=0)
Initialize the buffer by opening a file through a loader, this function copy the contents from the lo...
void ConvertInfixToPostfix()
unsigned int m_SavedTokenIndex
Saved token info (for PeekToken()), m_TokenIndex will be moved forward or backward when either DoGetT...
const wxString kw_ifndef(_T("ifndef"))
static wxString Format(const wxString &format,...)
wxString Mid(size_t first, size_t nCount=wxString::npos) const
void SkipToNextConditionPreprocessor()
Skip to the next conditional preprocessor directive branch.
wxString m_Buffer
Buffer content, all the lexical analysis is operating on this member variable.
wxString GetToken()
Consume and return the current token string.
const wxCStrData GetData() const
bool IsEOF() const
Check whether the Tokenizer reaches the end of the buffer (file)
bool SplitArguments(wxArrayString &results)
Split the macro arguments, and store them in results, when calling this function, we expect that m_To...
const wxString colon_colon(_T("::"))