Add support for wchar_t * and std::wstring Unicode strings on Linux

Initial contributions for Linux provided in issue #1233, modified to work
on both Windows and Linux. Dual support is possible by detecting
the sizeof wchar_t which is different on each of these systems.
This commit is contained in:
William S Fulton 2022-04-29 18:13:21 +01:00
commit ed42422d1c
8 changed files with 327 additions and 46 deletions

View file

@ -1,3 +1,6 @@
// This file has a BOM for UTF-8
// Notes for displaying UTF-8 properly in Windows: https://stackoverflow.com/questions/49476326/displaying-unicode-in-powershell
using System;
using li_std_wstringNamespace;
@ -5,17 +8,38 @@ public class runme
{
static private void check_equal(char a, char b)
{
if (a != b)
throw new Exception("char failed '" + a + "' != '" + b + "'");
if (a != b)
throw new Exception("char failed '" + a + "' != '" + b + "'");
}
static private void display_bytes(string s)
{
Console.Write("[");
if (s != null)
{
foreach (char x in s)
{
int n = Convert.ToInt32(x);
Console.Write(n.ToString("X") + " ");
}
}
else
Console.Write("null");
Console.WriteLine("]");
}
static private void check_equal(string a, string b)
{
if (a != b)
throw new Exception("string failed '" + a + "' != '" + b + "'");
if (li_std_wstring.debug) {
Console.WriteLine("check_equal {0} {1}", a, b);
display_bytes(a);
display_bytes(b);
}
if (a != b)
throw new Exception("string failed '" + a + "' != '" + b + "'");
}
static void Main()
static void Main()
{
char h = 'h';
check_equal(li_std_wstring.test_wcvalue(h), h);
@ -30,6 +54,8 @@ public class runme
li_std_wstring.test_pointer(null);
li_std_wstring.test_const_pointer(null);
check_equal(li_std_wstring.test_ccvalue(null), null);
try {
li_std_wstring.test_value(null);
throw new Exception("NULL check failed");
@ -37,8 +63,8 @@ public class runme
}
try {
li_std_wstring.test_reference(null);
throw new Exception("NULL check failed");
li_std_wstring.test_reference(null);
throw new Exception("NULL check failed");
} catch (ArgumentNullException e) {
if (!e.Message.Contains("type is null"))
throw new Exception("Missing text " + e);
@ -54,13 +80,24 @@ public class runme
x = "hello";
check_equal(li_std_wstring.test_const_reference(x), x);
/* Postpone, tricky, std::wstring portability problem.
/* Tricky, std::wstring portability problem.
* std::wstring is 2 bytes on Windows, 4 bytes on Linux, LPWSTR is 2 bytes.
* .NET marshalling should work on Windows but not Linux.
string s = "abc";
if (!li_std_wstring.test_equal_abc(s))
throw new Exception("Not equal " + s);
*/
*/
string ss = "abc";
if (!li_std_wstring.test_equal_abc(ss))
throw new Exception("Not equal " + ss);
ss = "JP: 日本語";
if (!li_std_wstring.test_equal_jp(ss))
throw new Exception("Not equal " + ss);
ss = "DE: Kröpeliner Straße";
if (!li_std_wstring.test_equal_de(ss))
throw new Exception("Not equal " + ss);
ss = "RU: Война и мир";
if (!li_std_wstring.test_equal_ru(ss))
throw new Exception("Not equal " + ss);
try {
li_std_wstring.test_throw();
@ -81,15 +118,22 @@ public class runme
check_equal(s.wchar_t_member, h);
s.wchar_t_ptr_member = x;
check_equal(s.wchar_t_ptr_member, "abc");
s.wchar_t_ptr_member = ss;
check_equal(s.wchar_t_ptr_member, ss);
{
// Unicode strings
// Strings below are UTF8 in this file, but .NET holds them internally as UTF16
// DE: https://www.utf8-chartable.de/
// RU: https://www.utf8-chartable.de/unicode-utf8-table.pl?start=1024
string[] test_strings = {
"JP: 日本語", "DE: Kröpeliner Straße" , "RU: Война и мир", "EN: War and Peace"
};
foreach (string expected in test_strings)
{
if (li_std_wstring.debug)
Console.WriteLine("expected (C#): " + expected);
string received = li_std_wstring.test_value(expected);
check_equal(received, expected);
}

View file

@ -0,0 +1,5 @@
// This file has a BOM set to UTF-8, which is one way for Visual C++ to correctly interpet these strings
// Alternatively, the /utf-8 command line option could be used
#define JP_WSTRING L"JP: 日本語"
#define DE_WSTRING L"DE: Kröpeliner Straße"
#define RU_WSTRING L"RU: Война и мир"

View file

@ -3,44 +3,71 @@
// The languages below are yet to provide std_wstring.i
#if !(defined(SWIGD) || defined(SWIGGO) || defined(SWIGGUILE) || defined(SWIGJAVASCRIPT) || defined(SWIGLUA) || defined(SWIGMZSCHEME) || defined(SWIGOCAML) || defined(SWIGOCTAVE) || defined(SWIGPERL) || defined(SWIGPHP) || defined(SWIGR) || defined(SWIGSCILAB))
%warnfilter(SWIGWARN_TYPEMAP_WCHARLEAK_MSG) wchar_t_const_ptr_member; // Setting a const wchar_t * variable may leak memory.
%include <std_wstring.i>
// throw is invalid in C++17 and later, only SWIG to use it
#define TESTCASE_THROW1(T1) throw(T1)
%{
#define TESTCASE_THROW1(T1)
%}
%{
// Unicode strings are stored in li_std_wstring.h file which has the BOM appropriately set, primarily for Visual C++ to correctly interpret the wide strings
#include "li_std_wstring.h"
%}
%inline %{
#include <string>
#include <iostream>
bool debug = false;
void show_wstring_bytes(const std::wstring &s) {
unsigned char *p = (unsigned char *)s.data();
size_t len = s.size()*sizeof(wchar_t);
std::wcout << L"s: " << /*s <<*/ L"[";
for (size_t i = 0; i<len; i++) {
std::wcout << std::hex << *p << L" ";
p++;
}
std::wcout << L"]" << std::endl;
std::wcout << std::flush;
}
wchar_t test_wcvalue(wchar_t x) {
return x;
return x;
}
const wchar_t* test_ccvalue(const wchar_t* x) {
return x;
return x;
}
wchar_t* test_cvalue(wchar_t* x) {
return x;
return x;
}
wchar_t* test_wchar_overload() {
return 0;
return 0;
}
wchar_t* test_wchar_overload(wchar_t *x) {
return x;
return x;
}
std::wstring test_value(std::wstring x) {
return x;
if (debug) {
std::wcout << "received(C++): " /*<< x */<< std::endl;
show_wstring_bytes(x);
}
return x;
}
const std::wstring& test_const_reference(const std::wstring &x) {
return x;
return x;
}
void test_pointer(std::wstring *x) {
@ -52,8 +79,28 @@ void test_const_pointer(const std::wstring *x) {
void test_reference(std::wstring &x) {
}
bool test_equal(const wchar_t *wcs, const std::wstring& s) {
if (debug) {
show_wstring_bytes(wcs);
show_wstring_bytes(s);
}
return wcs == s;
}
bool test_equal_abc(const std::wstring &s) {
return L"abc" == s;
return test_equal(L"abc", s);
}
bool test_equal_jp(const std::wstring &s) {
return test_equal(JP_WSTRING, s);
}
bool test_equal_de(const std::wstring &s) {
return test_equal(DE_WSTRING, s);
}
bool test_equal_ru(const std::wstring &s) {
return test_equal(RU_WSTRING, s);
}
void test_throw() TESTCASE_THROW1(std::wstring){
@ -73,6 +120,8 @@ size_t size_wstring(const std::wstring& s) {
struct wchar_test_struct {
wchar_t wchar_t_member;
wchar_t* wchar_t_ptr_member;
const wchar_t* wchar_t_const_ptr_member;
wchar_test_struct() : wchar_t_member(), wchar_t_ptr_member(), wchar_t_const_ptr_member() {}
};
%}