Merge branch 'tamuratak-fix_ruby_wstring'

* tamuratak-fix_ruby_wstring:
  [ruby] use %fragment to clarify the dependency of code.
  [ruby] should initialize static variables inside %init{}, in which it will not be        excuted concurrently by multiple threads.
  [ruby] * use static variable to avoid creating stirngs every time.        * fix possible overflow.
  [ruby] add std::wstring tests for string including a null terminator.
  [ruby] * rewrite SWIG_AsWCharPtrAndSize and SWIG_FromWCharPtrAndSize        * use UTF-32LE and UTF-16LE to avoid BOM        * add tests
  [ruby] use WCHAR_MAX to determine the encoding of std::wstring.
  [ruby] add a few tests for std::wstring
  [ruby] fix support for std::wstring.
This commit is contained in:
William S Fulton 2017-06-20 20:13:01 +01:00
commit e9e9531dc3
6 changed files with 137 additions and 53 deletions

View file

@ -78,6 +78,10 @@ std::wstring& test_reference_out() {
return x;
}
bool test_equal_abc(const std::wstring &s) {
return L"abc" == s;
}
#if defined(_MSC_VER)
#pragma warning(disable: 4290) // C++ exception specification ignored except to indicate a function is not __declspec(nothrow)
#endif

View file

@ -21,6 +21,7 @@ CPP_TEST_CASES = \
li_std_queue \
li_std_set \
li_std_stack \
li_std_wstring \
primitive_types \
ruby_keywords \
ruby_minherit_shared_ptr \

View file

@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
require 'swig_assert'
require 'li_std_wstring'
x = "abc"
swig_assert_equal("Li_std_wstring.test_wchar_overload(x)", "x", binding)
swig_assert_equal("Li_std_wstring.test_ccvalue(x)", "x", binding)
swig_assert_equal("Li_std_wstring.test_value(Li_std_wstring::Wstring.new(x))", "x", binding)
swig_assert_equal("Li_std_wstring.test_wchar_overload()", "nil", binding)
swig_assert_equal("Li_std_wstring.test_pointer(Li_std_wstring::Wstring.new(x))", "nil", binding)
swig_assert_equal("Li_std_wstring.test_const_pointer(Li_std_wstring::Wstring.new(x))", "nil", binding)
swig_assert_equal("Li_std_wstring.test_const_pointer(Li_std_wstring::Wstring.new(x))", "nil", binding)
swig_assert_equal("Li_std_wstring.test_reference(Li_std_wstring::Wstring.new(x))", "nil", binding)
x = "y"
swig_assert_equal("Li_std_wstring.test_value(x)", "x", binding)
a = Li_std_wstring::A.new(x)
swig_assert_equal("Li_std_wstring.test_value(a)", "x", binding)
x = "hello"
swig_assert_equal("Li_std_wstring.test_const_reference(x)", "x", binding)
swig_assert_equal("Li_std_wstring.test_pointer_out", "'x'", binding)
swig_assert_equal("Li_std_wstring.test_const_pointer_out", "'x'", binding)
swig_assert_equal("Li_std_wstring.test_reference_out()", "'x'", binding)
s = "abc"
swig_assert("Li_std_wstring.test_equal_abc(s)", binding)
begin
Li_std_wstring.test_throw
rescue RuntimeError => e
swig_assert_equal("e.message", "'x'", binding)
end
x = "abc\0def"
swig_assert_equal("Li_std_wstring.test_value(x)", "x", binding)
swig_assert_equal("Li_std_wstring.test_ccvalue(x)", '"abc"', binding)
swig_assert_equal("Li_std_wstring.test_wchar_overload(x)", '"abc"', binding)

View file

@ -1,71 +1,57 @@
/* -----------------------------------------------------------------------------
* rubywstrings.swg
*
* Currently, Ruby does not support Unicode or WChar properly, so these
* are still treated as char arrays for now.
* There are other libraries available that add support to this in
* ruby including WString, FXString, etc.
* ----------------------------------------------------------------------------- */
/* ------------------------------------------------------------
* utility methods for wchar_t strings
* ------------------------------------------------------------ */
%fragment("SWIG_AsWCharPtrAndSize","header",fragment="<wchar.h>",fragment="SWIG_pwchar_descriptor",fragment="SWIG_AsCharPtrAndSize") {
%fragment("SWIG_AsWCharPtrAndSize","header",fragment="<wchar.h>",fragment="SWIG_pwchar_descriptor",fragment="SWIG_AsCharPtrAndSize",fragment="SWIG_ruby_wstring_encoding_init") {
SWIGINTERN int
SWIG_AsWCharPtrAndSize(VALUE obj, wchar_t **cptr, size_t *psize, int *alloc)
{
return SWIG_AsCharPtrAndSize( obj, (char**)cptr, psize, alloc);
// VALUE tmp = 0;
// bool ok = false;
// if ( TYPE(obj) == T_STRING ) {
// if (cptr) {
// obj = tmp = SWIG_Unicode_FromObject(obj);
// ok = true;
// }
// }
// if (ok) {
// Py_ssize_t len = PyUnicode_GetSize(obj);
// rb_notimplement();
// if (cptr) {
// *cptr = %new_array(len + 1, wchar_t);
// SWIG_Unicode_AsWideChar((PyUnicodeObject *)obj, *cptr, len);
// (*cptr)[len] = 0;
// }
// if (psize) *psize = (size_t) len + 1;
// if (alloc) *alloc = cptr ? SWIG_NEWOBJ : 0;
// return SWIG_OK;
// } else {
// swig_type_info* pwchar_descriptor = SWIG_pwchar_descriptor();
// if (pwchar_descriptor) {
// void * vptr = 0;
// if (SWIG_ConvertPtr(obj, &vptr, pwchar_descriptor, 0) == SWIG_OK) {
// if (cptr) *cptr = (wchar_t *)vptr;
// if (psize) *psize = vptr ? (wcslen((wchar_t *)vptr) + 1) : 0;
// return SWIG_OK;
// }
// }
// }
// return SWIG_TypeError;
rb_encoding* wstr_enc = swig_ruby_wstring_encoding;
if (TYPE(obj) == T_STRING) {
VALUE rstr = rb_str_conv_enc(obj, rb_enc_get(obj), wstr_enc);
wchar_t* cstr = (wchar_t*) StringValuePtr(rstr);
size_t size = RSTRING_LEN(rstr) / sizeof(wchar_t) + 1;
if ( RSTRING_LEN(rstr) % sizeof(wchar_t) != 0 ) {
rb_raise(rb_eRuntimeError,
"The length of the byte sequence of converted string is not a multiplier of sizeof(wchar_t). Invalid byte sequence is given. Or invalid SWIG_RUBY_WSTRING_ENCODING is given when compiling this binding.");
}
if (cptr && alloc) {
*alloc = SWIG_NEWOBJ;
*cptr = %new_array(size, wchar_t);
memmove(*cptr, cstr, RSTRING_LEN(rstr));
}
if (psize) *psize = size;
return SWIG_OK;
} else {
return SWIG_TypeError;
}
}
}
%fragment("SWIG_FromWCharPtrAndSize","header",fragment="<wchar.h>",fragment="SWIG_pwchar_descriptor",fragment="SWIG_FromCharPtrAndSize") {
%fragment("SWIG_FromWCharPtrAndSize","header",fragment="<wchar.h>",fragment="SWIG_pwchar_descriptor",fragment="SWIG_FromCharPtrAndSize",fragment="SWIG_ruby_wstring_encoding_init") {
SWIGINTERNINLINE VALUE
SWIG_FromWCharPtrAndSize(const wchar_t * carray, size_t size)
{
return SWIG_FromCharPtrAndSize( (const char*)carray, size);
// if (carray) {
// if (size > INT_MAX) {
// swig_type_info* pwchar_descriptor = SWIG_pwchar_descriptor();
// return pwchar_descriptor ?
// SWIG_NewPointerObj(%const_cast(carray,wchar_t *), pwchar_descriptor, 0) : Qnil;
// } else {
// return SWIG_Unicode_FromWideChar(carray, %numeric_cast(size,int));
// }
// } else {
// return Qnil;
// }
rb_encoding* wstr_enc = swig_ruby_wstring_encoding;
rb_encoding* rb_enc = swig_ruby_internal_encoding;
if (carray && size <= LONG_MAX/sizeof(wchar_t)) {
VALUE rstr = rb_str_new( (const char*)carray, %numeric_cast(size*sizeof(wchar_t),long) );
rb_encoding* new_enc = rb_default_internal_encoding();
rb_enc_associate(rstr, wstr_enc);
if ( !new_enc ) {
new_enc = rb_enc;
}
return rb_str_conv_enc(rstr, wstr_enc, new_enc);
} else {
return Qnil;
}
}
}

View file

@ -55,6 +55,10 @@ SWIGINTERNINLINE VALUE
#if !defined(SWIG_STD_WSTRING)
%traits_swigtype(std::basic_string<wchar_t>);
%fragment(SWIG_Traits_frag(std::basic_string<wchar_t>));
%fragment(SWIG_AsPtr_frag(std::basic_string<wchar_t>),"header",
fragment="SWIG_AsWCharPtrAndSize") {
SWIGINTERN int

View file

@ -1,3 +1,50 @@
%{
#ifdef __cplusplus
extern "C" {
#endif
#ifdef HAVE_RUBY_ENCODING_H
#include "ruby/encoding.h"
#endif
/**
* The internal encoding of std::wstring is defined based on
* the size of wchar_t. If it is not appropriate for your library,
* SWIG_RUBY_WSTRING_ENCODING must be given when compiling.
*/
#ifndef SWIG_RUBY_WSTRING_ENCODING
#if WCHAR_MAX == 0x7fff || WCHAR_MAX == 0xffff
#define SWIG_RUBY_WSTRING_ENCODING "UTF-16LE"
#elif WCHAR_MAX == 0x7fffffff || WCHAR_MAX == 0xffffffff
#define SWIG_RUBY_WSTRING_ENCODING "UTF-32LE"
#else
#error unsupported wchar_t size. SWIG_RUBY_WSTRING_ENCODING must be given.
#endif
#endif
/**
* If Encoding.default_internal is nil, this encoding will be used
* when converting from std::wstring to String object in Ruby.
*/
#ifndef SWIG_RUBY_INTERNAL_ENCODING
#define SWIG_RUBY_INTERNAL_ENCODING "UTF-8"
#endif
static rb_encoding *swig_ruby_wstring_encoding;
static rb_encoding *swig_ruby_internal_encoding;
#ifdef __cplusplus
}
#endif
%}
%fragment("SWIG_ruby_wstring_encoding_init", "init") {
swig_ruby_wstring_encoding = rb_enc_find( SWIG_RUBY_WSTRING_ENCODING );
swig_ruby_internal_encoding = rb_enc_find( SWIG_RUBY_INTERNAL_ENCODING );
}
%include <rubywstrings.swg>
%include <typemaps/std_wstring.swg>