Add support for wchar_t * and std::wstring Unicode strings on Linux
Initial contributions for Linux provided in issue #1233, modified to work on both Windows and Linux. Dual support is possible by detecting the sizeof wchar_t which is different on each of these systems.
This commit is contained in:
parent
05580ff1e7
commit
ed42422d1c
8 changed files with 327 additions and 46 deletions
|
|
@ -1,3 +1,6 @@
|
|||
// This file has a BOM for UTF-8
|
||||
// Notes for displaying UTF-8 properly in Windows: https://stackoverflow.com/questions/49476326/displaying-unicode-in-powershell
|
||||
|
||||
using System;
|
||||
using li_std_wstringNamespace;
|
||||
|
||||
|
|
@ -5,17 +8,38 @@ public class runme
|
|||
{
|
||||
static private void check_equal(char a, char b)
|
||||
{
|
||||
if (a != b)
|
||||
throw new Exception("char failed '" + a + "' != '" + b + "'");
|
||||
if (a != b)
|
||||
throw new Exception("char failed '" + a + "' != '" + b + "'");
|
||||
}
|
||||
|
||||
static private void display_bytes(string s)
|
||||
{
|
||||
Console.Write("[");
|
||||
if (s != null)
|
||||
{
|
||||
foreach (char x in s)
|
||||
{
|
||||
int n = Convert.ToInt32(x);
|
||||
Console.Write(n.ToString("X") + " ");
|
||||
}
|
||||
}
|
||||
else
|
||||
Console.Write("null");
|
||||
Console.WriteLine("]");
|
||||
}
|
||||
|
||||
static private void check_equal(string a, string b)
|
||||
{
|
||||
if (a != b)
|
||||
throw new Exception("string failed '" + a + "' != '" + b + "'");
|
||||
if (li_std_wstring.debug) {
|
||||
Console.WriteLine("check_equal {0} {1}", a, b);
|
||||
display_bytes(a);
|
||||
display_bytes(b);
|
||||
}
|
||||
if (a != b)
|
||||
throw new Exception("string failed '" + a + "' != '" + b + "'");
|
||||
}
|
||||
|
||||
static void Main()
|
||||
static void Main()
|
||||
{
|
||||
char h = 'h';
|
||||
check_equal(li_std_wstring.test_wcvalue(h), h);
|
||||
|
|
@ -30,6 +54,8 @@ public class runme
|
|||
li_std_wstring.test_pointer(null);
|
||||
li_std_wstring.test_const_pointer(null);
|
||||
|
||||
check_equal(li_std_wstring.test_ccvalue(null), null);
|
||||
|
||||
try {
|
||||
li_std_wstring.test_value(null);
|
||||
throw new Exception("NULL check failed");
|
||||
|
|
@ -37,8 +63,8 @@ public class runme
|
|||
}
|
||||
|
||||
try {
|
||||
li_std_wstring.test_reference(null);
|
||||
throw new Exception("NULL check failed");
|
||||
li_std_wstring.test_reference(null);
|
||||
throw new Exception("NULL check failed");
|
||||
} catch (ArgumentNullException e) {
|
||||
if (!e.Message.Contains("type is null"))
|
||||
throw new Exception("Missing text " + e);
|
||||
|
|
@ -54,13 +80,24 @@ public class runme
|
|||
x = "hello";
|
||||
check_equal(li_std_wstring.test_const_reference(x), x);
|
||||
|
||||
/* Postpone, tricky, std::wstring portability problem.
|
||||
/* Tricky, std::wstring portability problem.
|
||||
* std::wstring is 2 bytes on Windows, 4 bytes on Linux, LPWSTR is 2 bytes.
|
||||
* .NET marshalling should work on Windows but not Linux.
|
||||
string s = "abc";
|
||||
if (!li_std_wstring.test_equal_abc(s))
|
||||
throw new Exception("Not equal " + s);
|
||||
*/
|
||||
*/
|
||||
string ss = "abc";
|
||||
if (!li_std_wstring.test_equal_abc(ss))
|
||||
throw new Exception("Not equal " + ss);
|
||||
|
||||
ss = "JP: 日本語";
|
||||
if (!li_std_wstring.test_equal_jp(ss))
|
||||
throw new Exception("Not equal " + ss);
|
||||
|
||||
ss = "DE: Kröpeliner Straße";
|
||||
if (!li_std_wstring.test_equal_de(ss))
|
||||
throw new Exception("Not equal " + ss);
|
||||
|
||||
ss = "RU: Война и мир";
|
||||
if (!li_std_wstring.test_equal_ru(ss))
|
||||
throw new Exception("Not equal " + ss);
|
||||
|
||||
try {
|
||||
li_std_wstring.test_throw();
|
||||
|
|
@ -81,15 +118,22 @@ public class runme
|
|||
check_equal(s.wchar_t_member, h);
|
||||
s.wchar_t_ptr_member = x;
|
||||
check_equal(s.wchar_t_ptr_member, "abc");
|
||||
s.wchar_t_ptr_member = ss;
|
||||
check_equal(s.wchar_t_ptr_member, ss);
|
||||
|
||||
{
|
||||
// Unicode strings
|
||||
// Strings below are UTF8 in this file, but .NET holds them internally as UTF16
|
||||
// DE: https://www.utf8-chartable.de/
|
||||
// RU: https://www.utf8-chartable.de/unicode-utf8-table.pl?start=1024
|
||||
string[] test_strings = {
|
||||
"JP: 日本語", "DE: Kröpeliner Straße" , "RU: Война и мир", "EN: War and Peace"
|
||||
};
|
||||
|
||||
foreach (string expected in test_strings)
|
||||
{
|
||||
if (li_std_wstring.debug)
|
||||
Console.WriteLine("expected (C#): " + expected);
|
||||
string received = li_std_wstring.test_value(expected);
|
||||
check_equal(received, expected);
|
||||
}
|
||||
|
|
|
|||
5
Examples/test-suite/li_std_wstring.h
Normal file
5
Examples/test-suite/li_std_wstring.h
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
// This file has a BOM set to UTF-8, which is one way for Visual C++ to correctly interpet these strings
|
||||
// Alternatively, the /utf-8 command line option could be used
|
||||
#define JP_WSTRING L"JP: 日本語"
|
||||
#define DE_WSTRING L"DE: Kröpeliner Straße"
|
||||
#define RU_WSTRING L"RU: Война и мир"
|
||||
|
|
@ -3,44 +3,71 @@
|
|||
// The languages below are yet to provide std_wstring.i
|
||||
#if !(defined(SWIGD) || defined(SWIGGO) || defined(SWIGGUILE) || defined(SWIGJAVASCRIPT) || defined(SWIGLUA) || defined(SWIGMZSCHEME) || defined(SWIGOCAML) || defined(SWIGOCTAVE) || defined(SWIGPERL) || defined(SWIGPHP) || defined(SWIGR) || defined(SWIGSCILAB))
|
||||
|
||||
%warnfilter(SWIGWARN_TYPEMAP_WCHARLEAK_MSG) wchar_t_const_ptr_member; // Setting a const wchar_t * variable may leak memory.
|
||||
|
||||
%include <std_wstring.i>
|
||||
|
||||
|
||||
// throw is invalid in C++17 and later, only SWIG to use it
|
||||
#define TESTCASE_THROW1(T1) throw(T1)
|
||||
%{
|
||||
#define TESTCASE_THROW1(T1)
|
||||
%}
|
||||
|
||||
%{
|
||||
// Unicode strings are stored in li_std_wstring.h file which has the BOM appropriately set, primarily for Visual C++ to correctly interpret the wide strings
|
||||
#include "li_std_wstring.h"
|
||||
%}
|
||||
|
||||
%inline %{
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
bool debug = false;
|
||||
|
||||
void show_wstring_bytes(const std::wstring &s) {
|
||||
unsigned char *p = (unsigned char *)s.data();
|
||||
size_t len = s.size()*sizeof(wchar_t);
|
||||
std::wcout << L"s: " << /*s <<*/ L"[";
|
||||
for (size_t i = 0; i<len; i++) {
|
||||
std::wcout << std::hex << *p << L" ";
|
||||
p++;
|
||||
}
|
||||
std::wcout << L"]" << std::endl;
|
||||
std::wcout << std::flush;
|
||||
}
|
||||
|
||||
wchar_t test_wcvalue(wchar_t x) {
|
||||
return x;
|
||||
return x;
|
||||
}
|
||||
|
||||
const wchar_t* test_ccvalue(const wchar_t* x) {
|
||||
return x;
|
||||
return x;
|
||||
}
|
||||
|
||||
wchar_t* test_cvalue(wchar_t* x) {
|
||||
return x;
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
wchar_t* test_wchar_overload() {
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
wchar_t* test_wchar_overload(wchar_t *x) {
|
||||
return x;
|
||||
return x;
|
||||
}
|
||||
|
||||
std::wstring test_value(std::wstring x) {
|
||||
return x;
|
||||
if (debug) {
|
||||
std::wcout << "received(C++): " /*<< x */<< std::endl;
|
||||
show_wstring_bytes(x);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
const std::wstring& test_const_reference(const std::wstring &x) {
|
||||
return x;
|
||||
return x;
|
||||
}
|
||||
|
||||
void test_pointer(std::wstring *x) {
|
||||
|
|
@ -52,8 +79,28 @@ void test_const_pointer(const std::wstring *x) {
|
|||
void test_reference(std::wstring &x) {
|
||||
}
|
||||
|
||||
bool test_equal(const wchar_t *wcs, const std::wstring& s) {
|
||||
if (debug) {
|
||||
show_wstring_bytes(wcs);
|
||||
show_wstring_bytes(s);
|
||||
}
|
||||
return wcs == s;
|
||||
}
|
||||
|
||||
bool test_equal_abc(const std::wstring &s) {
|
||||
return L"abc" == s;
|
||||
return test_equal(L"abc", s);
|
||||
}
|
||||
|
||||
bool test_equal_jp(const std::wstring &s) {
|
||||
return test_equal(JP_WSTRING, s);
|
||||
}
|
||||
|
||||
bool test_equal_de(const std::wstring &s) {
|
||||
return test_equal(DE_WSTRING, s);
|
||||
}
|
||||
|
||||
bool test_equal_ru(const std::wstring &s) {
|
||||
return test_equal(RU_WSTRING, s);
|
||||
}
|
||||
|
||||
void test_throw() TESTCASE_THROW1(std::wstring){
|
||||
|
|
@ -73,6 +120,8 @@ size_t size_wstring(const std::wstring& s) {
|
|||
struct wchar_test_struct {
|
||||
wchar_t wchar_t_member;
|
||||
wchar_t* wchar_t_ptr_member;
|
||||
const wchar_t* wchar_t_const_ptr_member;
|
||||
wchar_test_struct() : wchar_t_member(), wchar_t_ptr_member(), wchar_t_const_ptr_member() {}
|
||||
};
|
||||
|
||||
%}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue