Patch #2797 2009-07-30 20:19

techy

Various memory problem fixes (part 3)
Download
2797-Various_memory.patch (3.7 KB)
Category
Application::Bugfix
Status
Accepted
Close date
2009-08-24 07:31
Assigned to
mortenmacfly
Index: src/plugins/codecompletion/parser/tokenizer.h
===================================================================
--- src/plugins/codecompletion/parser/tokenizer.h    (revision 5716)
+++ src/plugins/codecompletion/parser/tokenizer.h    (working copy)
@@ -111,7 +111,10 @@
         {
             ++m_TokenIndex;
             if (IsEOF())
+            {
+                m_TokenIndex = m_BufferLen;
                 return false;
+            }
 
             if (CurrentChar() == _T('\n'))
                 ++m_LineNumber;
@@ -121,7 +124,10 @@
         {
             m_TokenIndex += amount;
             if (IsEOF())
+            {
+                m_TokenIndex = m_BufferLen;
                 return false;
+            }
 
             if (CurrentChar() == _T('\n'))
                 ++m_LineNumber;
@@ -136,12 +142,9 @@
 
     wxChar CurrentCharMoveNext()
     {
-        size_t i = m_TokenIndex++;
-
         if(m_TokenIndex < m_BufferLen)
-            return m_Buffer.GetChar(i);
-        else
-            return 0;
+            m_TokenIndex++;
+        return CurrentChar();
     };
 
     wxChar NextChar() const
Index: src/plugins/codecompletion/parser/tokenizer.cpp
===================================================================
--- src/plugins/codecompletion/parser/tokenizer.cpp    (revision 5716)
+++ src/plugins/codecompletion/parser/tokenizer.cpp    (working copy)
@@ -100,8 +100,10 @@
 bool Tokenizer::InitFromBuffer(const wxString& buffer)
 {
     BaseInit();
+    m_BufferLen = buffer.Length();
+    m_Buffer.Alloc(m_BufferLen + 1);
     m_Buffer = buffer;
-    m_BufferLen = buffer.Length();
+    m_Buffer += _T(' ');
     m_IsOK = true;
     m_Filename.Clear();
     return true;
@@ -140,14 +142,14 @@
         // same code as in cbC2U() but with the addition of the string length (3rd param in unicode version)
         // and the fallback encoding conversion
 #if wxUSE_UNICODE
-        m_Buffer = wxString(data, wxConvUTF8, m_BufferLen);
+        m_Buffer = wxString(data, wxConvUTF8, m_BufferLen + 1);
         if (m_Buffer.Length() == 0)
         {
             // could not read as utf-8 encoding, try iso8859-1
-            m_Buffer = wxString(data, wxConvISO8859_1, m_BufferLen);
+            m_Buffer = wxString(data, wxConvISO8859_1, m_BufferLen + 1);
         }
 #else
-        m_Buffer = wxString(data, m_BufferLen);
+        m_Buffer = wxString(data, m_BufferLen + 1);
 #endif
 
         if (m_BufferLen != m_Buffer.Length())
@@ -157,6 +159,10 @@
             m_BufferLen = m_Buffer.Length();
 //            asm("int $3;");
         }
+
+        // add 'sentinel' to the end of the string (not counted to the length of the string)
+        m_Buffer += _T(' ');
+
         return data != 0;
     };
 
@@ -170,6 +176,13 @@
         return false;
     m_BufferLen = m_Buffer.Length();
 
+    // add 'sentinel' to the end of the string (not counted to the length of the string)
+
+    // (In the above cbRead() we don't specify the allocated length of m_Buffer so the
+    // call below might force reallocation of the string. However the documentation of
+    // wxWidgets says that the available memory in string is always a multiple of 16,
+    // so we have only 1/16 probability that this happens)
+    m_Buffer += _T(' ');
     return true;
 }
 
@@ -296,7 +309,12 @@
         wxChar last = PreviousChar();
         // if DOS line endings, we 've hit \r and we skip to \n...
         if (last == '\r')
-            last = m_Buffer.GetChar(m_TokenIndex - 2);
+        {
+            if (m_TokenIndex - 2 >= 0)
+                last = m_Buffer.GetChar(m_TokenIndex - 2);
+            else
+                last = _T('\0');
+        }
         if (IsEOF() || last != '\\')
             break;
         else
techy 2009-07-30 20:32

In Tokenizer, the function

bool Tokenizer::SkipToOneOfChars(const wxChar* chars, bool supportNesting)

reads several consecutive characters from the buffer without testing whether it has reached EOF in between. When EOF occurs between these reads, SkipToOneOfChars() can read characters behind the end of the buffer.

This could be fixed by adding a test to CurrentChar() whether it has reached EOF but as this function is used quite frequently, it could slow down the parsing. Instead I allocated the buffer with one extra "sentinel" byte at the end of the buffer where the movement stops when calling MoveToNextChar() and this byte's value is read over and over by CurrentChar(). As the sentinel value I used ' ' (I was considering using '\0' as well as wxString isn't null-terminated but the documentation of wxWidgets says that not all functions of wxString may work correctly when '\0' is present and ' ' is very neutral for the tokenizer as well)