Windows 上 Multibyte 轉 Wide Char 的一些方法

#include <windows.h>
#include <cstdio>
#include <iostream>

#include <atlbase.h> 
#include <atlstr.h> 
#include <atlconv.h>
#include <string>
#include <codecvt>
#include <xlocbuf>
#include <cassert>


void PrintBytes(const wchar_t* wcsCaption, const wchar_t* wcs)
{
    static const int ROW_WIDTH = 16;

    size_t cbBuf = (wcslen(wcs) + 1) * sizeof(wchar_t);
    auto pBuf = (const BYTE*)wcs;
    wprintf_s(L"Buffer(%d) %s \n", cbBuf, wcsCaption);   
    for(size_t i = 0 ; i < cbBuf; i++)
    {
        if(i && !(i % ROW_WIDTH))
        {
            wprintf_s(L"\n");
        }
        wprintf_s(L"%02x  ", pBuf[i]);
    }
    wprintf_s(L"\n\n");
}


// Convert string in current code page/ACP (per system) to windows std::wstring in UCS2
namespace ConvFromMB 
{
    // Win32 API 
 std::wstring imp1(const char* s)
    {
        int cchRequired = ::MultiByteToWideChar(CP_ACP, 0, s, -1, nullptr, 0); // w/ null terminator
     std::wstring w(cchRequired, L'\0');                                    //  (because passed -1 as src length)
     int cchWritten = ::MultiByteToWideChar(CP_ACP, 0, s, -1, &w[0], cchRequired);
        return w;
    }

    // ATL tool, a MultiByteToWideChar wrapper
 std::wstring imp2(const char* s)
    {
        ATL::CA2W w (s, CP_ACP);
        return std::wstring(w);
    }

    // C Style conversion mbstowcs. Windows extends it as secure version and support different locale.
 //  - `mbstowcs_s` is identical to `_mbstowcs_s_l + Global locale`
 std::wstring imp3(const char* s)
    {
        _locale_t loc = ::_create_locale(LC_CTYPE, ".ACP");
        size_t cchRequired = 0;
        errno_t ret = ::_mbstowcs_s_l(&cchRequired, nullptr, 0, s, 0, loc); // w/ null terminator
     std::wstring w(cchRequired, L'\0');
        size_t cchActual = cchRequired;
        ret = ::_mbstowcs_s_l(&cchRequired, &w[0], cchActual, s, cchActual, loc);
        return w;
    }

    // Utilize printf + %hs, useful when print MB string to wide char fmt
 //  - `swprintf_s` is identical to `swprintf_s_l + Global locale`
 std::wstring imp4(const char* s)
    {
        _locale_t loc = ::_create_locale(LC_CTYPE, ".ACP");
        int cchRequired = _scwprintf_l(L"%hs", loc, s) + 1; // w/o null terminator
     std::wstring w(cchRequired, L'\0');
        int cchWritten = _swprintf_s_l(&w[0], cchRequired, L"%hs", loc, s);
        return w;
    }
}

void TestACPConv(const char* s)
{
    // 1
 PrintBytes(L"ACP/MultiByteToWideChar", ConvFromMB::imp1(s).c_str());
    PrintBytes(L"ACP/CA2W", ConvFromMB::imp2(s).c_str());
    PrintBytes(L"ACP/mbstowcs", ConvFromMB::imp3(s).c_str());
    PrintBytes(L"ACP/swprintf", ConvFromMB::imp4(s).c_str());
}

namespace ConvFromUTF8 
{
    // Win32 API
 std::wstring imp1(const char* s)
    {
        int cchRequired = ::MultiByteToWideChar(CP_UTF8, 0, s, -1, nullptr, 0); // w/ null terminator
     std::wstring w(cchRequired, L'\0');                                     // (because passed -1 as src length)
     int cchWritten = ::MultiByteToWideChar(CP_UTF8, 0, s, -1, &w[0], cchRequired);
        return w;
    }

    // ATL tool, a MultiByteToWideChar wrapper
 std::wstring imp2(const char* s)
    {
        ATL::CA2W wActual (s, CP_UTF8);
        return std::wstring(wActual);
    }

    // standard c++ locale string conversion for utf8 <-> USC2/USC4
 std::wstring imp3(const char* s)
    {
        std::wstring_convert< std::codecvt_utf8<wchar_t> > cvt;
        return cvt.from_bytes(s);
    }
}

void TestUTF8Conv(const char* s)
{
    PrintBytes(L"UTF8/MultiByteToWideChar", ConvFromUTF8::imp1(s).c_str());
    PrintBytes(L"UTF8/CA2W", ConvFromUTF8::imp2(s).c_str());
    PrintBytes(L"UTF8/wstring_convert", ConvFromUTF8::imp3(s).c_str());
}



int main(int argc, char* argv[])
{
    const char* helloTW_UTF8 = "\xE4\xBD\xA0\xE5\xA5\xBD\xE5\x97\x8E"; // 你(0xE4BDA0)好(0xE5A5BD)嗎(0xE5978E)
 const char* helloTW_ACP = "你好嗎"; // if file is saved in codepage 950 Big5

    TestUTF8Conv(helloTW_UTF8);
    TestACPConv(helloTW_ACP);

    std::cin.ignore();
    return 0;
}

comments powered by Disqus