25 #include "nsUniversalDetector.h"    36     nsUniversalDetector(NS_FILTER_ALL),
    48     nsUniversalDetector(NS_FILTER_ALL),
    60     nsUniversalDetector(NS_FILTER_ALL),
   119     size_t size = file.
Length();
   132     buffer[size + 0] = 0;
   133     buffer[size + 1] = 0;
   134     buffer[size + 2] = 0;
   135     buffer[size + 3] = 0;
   137     size_t readBytes = file.
Read((
void*)buffer, size);
   152     if (cfgMgr->
ReadInt(
_T(
"/default_encoding/use_option"), 0) == 1)
   160             msg.
Printf(
_T(
"Warning: bypassing C::B's auto-detection!\n"   161                           "Encoding requested is: %s (ID: %d)"),
   178                 msg.
Printf(
_T(
"Detected encoding via BOM: %s (ID: %d)"),
   188             Reset(); nsresult res = HandleData((
char*)buffer, size);
   209                     msg.
Printf(
_T(
"Text seems to be pure ASCII!\n"   210                                   "We use user specified encoding: %s (ID: %d)"),
   224                     msg.
Printf(
_T(
"Warning: Using user specified encoding as fallback!\n"   225                                   "Encoding fallback is: %s (ID: %d)"),
   240         msg.
Printf(
_T(
"Final encoding detected: %s (ID: %d)"),
   258     if (!buffer || size == 0) 
return false;
   260     const wxByte*  buff_ptr = buffer;
   261     const wxByte*  buff_end = &buffer[size];
   267         if      (size >= 4 && memcmp(buffer, 
"\xFF\xFE\x00\x00", 4) == 0)
   273         else if (size >= 4 && memcmp(buffer, 
"\xFE\xFF\x00\x00", 4) == 0)
   279         else if (size >= 4 && memcmp(buffer, 
"\x00\x00\xFE\xFF", 4) == 0)
   285         else if (size >= 4 && memcmp(buffer, 
"\x00\x00\xFF\xFE", 4) == 0)
   291         else if (             memcmp(buffer, 
"\xFF\xFE", 2) == 0)
   297         else if (             memcmp(buffer, 
"\xFE\xFF", 2) == 0)
   303         else if (size >= 3 && memcmp(buffer, 
"\xEF\xBB\xBF", 3) == 0)
   309         else if (size >= 5 && memcmp(buffer, 
"\x2B\x2F\x76\x38\x2D", 5) == 0)
   332         unsigned int null_byte_count  = 0;
   333         unsigned int utf_bytes        = 0;
   334         unsigned int good_utf_count   = 0;
   335         unsigned int bad_utf_count    = 0;
   336         unsigned int bad_utf32_count  = 0;
   337         unsigned int bad_utf16_count  = 0;
   338         unsigned int nl_utf32le_count = 0;
   339         unsigned int nl_utf32be_count = 0;
   340         unsigned int nl_utf16le_count = 0;
   341         unsigned int nl_utf16be_count = 0;
   343         while (buff_ptr != buff_end)
   345             if (*buff_ptr == 0) ++null_byte_count;
   350                 if ((*buff_ptr & 0xC0) == 0x80 || *buff_ptr == 0)
   354                     const char c = *buff_ptr;
   356                     if      ((c & 0x80) == 0x00) utf_bytes = 1;
   357                     else if ((c & 0xE0) == 0xC0) utf_bytes = 2;
   358                     else if ((c & 0xF0) == 0xE0) utf_bytes = 3;
   359                     else if ((c & 0xF8) == 0xF0) utf_bytes = 4;
   367             else if ((*buff_ptr & 0xC0) == 0x80)
   380             if ((
wxUIntPtr)buff_ptr % 4 == 0 && buff_ptr+4 <= buff_end)
   382                 if (*((
wxUint32*)buff_ptr) == 0                        ) ++bad_utf32_count;
   388             if ((
wxUIntPtr)buff_ptr % 2 == 0 && buff_ptr+4 <= buff_end)
   390                 if (*((
wxUint16*)buff_ptr) == 0)                         ++bad_utf16_count;
   403         else if (null_byte_count)
   436     if (!buffer || size == 0)
   440             logmsg.
Printf(
_T(
"Encoding conversion has failed (buffer is empty)!"));
   463         wideBuff = conv.cMB2WC((
const char*)buffer, size + 4 - 
m_BOMSizeInBytes, &outlen);
   468         wideBuff = conv.cMB2WC((
const char*)buffer, size + 4 - 
m_BOMSizeInBytes, &outlen);
   472         wxMBConvUTF16BE conv;
   473         wideBuff = conv.cMB2WC((
const char*)buffer, size + 4 - 
m_BOMSizeInBytes, &outlen);
   477         wxMBConvUTF16LE conv;
   478         wideBuff = conv.cMB2WC((
const char*)buffer, size + 4 - 
m_BOMSizeInBytes, &outlen);
   482         wxMBConvUTF32BE conv;
   483         wideBuff = conv.cMB2WC((
const char*)buffer, size + 4 - 
m_BOMSizeInBytes, &outlen);
   487         wxMBConvUTF32LE conv;
   488         wideBuff = conv.cMB2WC((
const char*)buffer, size + 4 - 
m_BOMSizeInBytes, &outlen);
   498            && conv.
Convert((
const char*)buffer, tmp) )
   504                 logmsg.
Printf(
_T(
"Conversion succeeded using wxEncodingConverter "   505                                  "(buffer size = %lu, converted size = %lu."), static_cast<unsigned long>(size), static_cast<unsigned long>(outlen));
   515                 wideBuff = csconv.cMB2WC((
const char*)buffer, size + 4 - 
m_BOMSizeInBytes, &outlen);
   518                     logmsg.
Printf(
_T(
"Conversion succeeded using wxCSConv "   519                                      "(buffer size = %lu, converted size = %lu."), static_cast<unsigned long>(size), static_cast<unsigned long>(outlen));
   536         logmsg.
Printf(
_T(
"Encoding conversion using settings has failed!\n"   537                          "Encoding chosen was: %s (ID: %d)"),
   545     if (cfgMgr->
ReadBool(
_T(
"/default_encoding/use_system"), 
true))
   547         if (platform::windows)
   550                 logmgr->
DebugLog(
_T(
"Trying system locale as fallback..."));
   558                 logmgr->
DebugLog(
_T(
"Trying ISO-8859-1 as fallback..."));
   564         wideBuff = conv_system.cMB2WC((
const char*)buffer, size + 4 - 
m_BOMSizeInBytes, &outlen);
   571                 logmsg.
Printf(
_T(
"Encoding conversion using system locale fallback has failed!\n"   572                                  "Last encoding choosen was: %s (ID: %d)\n"   573                                  "Don't know what to do."),
   585             logmgr->
DebugLog(
_T(
"Encoding conversion has seriously failed!\n"   586                                 "Don't know what to do."));
 
static wxFontEncoding GetEncodingFromName(const wxString &encoding)
wxString F(const wxChar *msg,...)
sprintf-like function 
#define wxUINT32_SWAP_ON_BE(wxUint32_value)
wxFontEncoding m_Encoding
ConfigManager * GetConfigManager(const wxString &name_space) const
int ReadInt(const wxString &name, int defaultVal=0)
bool DetectEncoding(const wxString &filename, bool convert_to_wxstring=true)
static Manager * Get()
Use Manager::Get() to get a pointer to its instance Manager::Get() is guaranteed to never return an i...
#define wxUINT16_SWAP_ON_LE(wxUint16_value)
void resize(size_t nSize, wxUniChar ch='\0')
wxFileOffset Length() const
bool ReadBool(const wxString &name, bool defaultVal=false)
bool DetectEncodingEx(const wxByte *buffer, size_t len)
void Report(const char *aCharset) override
#define wxUINT32_SWAP_ON_LE(wxUint32_value)
wxString makeStringNoNull(const wxWCharBuffer &wideBuff)
Convert the char buffer to wxString and if there are any null-terminating characters at the end - rem...
bool Contains(const wxString &str) const
DLLIMPORT wxString cbC2U(const char *str)
Return str as a proper unicode-compatible string. 
static wxString GetSystemEncodingName()
LogManager * GetLogManager() const
wxString Read(const wxString &key, const wxString &defaultVal=wxEmptyString)
#define wxUINT16_SWAP_ON_BE(wxUint16_value)
wxString GetWxStr() const
size_t find_last_not_of(const wxString &str, size_t nStart=npos) const
const wxStringCharType * wx_str() const
static wxFontEncoding GetSystemEncoding()
ssize_t Read(void *buffer, size_t count)
EncodingDetector(const wxString &filename, bool useLog=true)
bool ConvertToWxString(const wxByte *buffer, size_t size)
int GetBOMSizeInBytes() const
void DebugLog(const wxString &msg, Logger::level lv=Logger::info)
bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method=wxCONVERT_STRICT)
wxFontEncoding GetFontEncoding() const
bool Convert(const char *input, char *output) const
virtual wxFontEncoding CharsetToEncoding(const wxString &charset, bool interactive=true)
static wxFontMapper * Get()
int Printf(const wxString &pszFormat,...)
~EncodingDetector() override