Merge branch 'tamuratak-fix_ruby_wstring'

* tamuratak-fix_ruby_wstring: [ruby] use %fragment to clarify the dependency of code. [ruby] should initialize static variables inside %init{}, in which it will not be excuted concurrently by multiple threads. [ruby] * use static variable to avoid creating stirngs every time. * fix possible overflow. [ruby] add std::wstring tests for string including a null terminator. [ruby] * rewrite SWIG_AsWCharPtrAndSize and SWIG_FromWCharPtrAndSize * use UTF-32LE and UTF-16LE to avoid BOM * add tests [ruby] use WCHAR_MAX to determine the encoding of std::wstring. [ruby] add a few tests for std::wstring [ruby] fix support for std::wstring.
2017-06-20 20:13:01 +01:00 · 2017-06-20 20:13:01 +01:00 · e9e9531dc3
commit e9e9531dc3
parent ff53789dc9 4f31f5d0a3
6 changed files with 137 additions and 53 deletions
--- a/Examples/test-suite/li_std_wstring.i
+++ b/Examples/test-suite/li_std_wstring.i
@ -78,6 +78,10 @@ std::wstring& test_reference_out() {
   return x;
 }

+bool test_equal_abc(const std::wstring &s) {
+  return L"abc" == s;
+}
+
 #if defined(_MSC_VER)
  #pragma warning(disable: 4290) // C++ exception specification ignored except to indicate a function is not __declspec(nothrow)
 #endif
--- a/Examples/test-suite/ruby/Makefile.in
+++ b/Examples/test-suite/ruby/Makefile.in
@ -21,6 +21,7 @@ CPP_TEST_CASES = \
 	li_std_queue \
 	li_std_set \
 	li_std_stack \
+	li_std_wstring \
 	primitive_types \
 	ruby_keywords \
 	ruby_minherit_shared_ptr \
--- a/Examples/test-suite/ruby/li_std_wstring_runme.rb
+++ b/Examples/test-suite/ruby/li_std_wstring_runme.rb
@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+require 'swig_assert'
+require 'li_std_wstring'
+
+x = "abc"
+swig_assert_equal("Li_std_wstring.test_wchar_overload(x)", "x", binding)
+swig_assert_equal("Li_std_wstring.test_ccvalue(x)", "x", binding)
+swig_assert_equal("Li_std_wstring.test_value(Li_std_wstring::Wstring.new(x))", "x", binding)
+
+swig_assert_equal("Li_std_wstring.test_wchar_overload()", "nil", binding)
+
+swig_assert_equal("Li_std_wstring.test_pointer(Li_std_wstring::Wstring.new(x))", "nil", binding)
+swig_assert_equal("Li_std_wstring.test_const_pointer(Li_std_wstring::Wstring.new(x))", "nil", binding)
+swig_assert_equal("Li_std_wstring.test_const_pointer(Li_std_wstring::Wstring.new(x))", "nil", binding)
+swig_assert_equal("Li_std_wstring.test_reference(Li_std_wstring::Wstring.new(x))", "nil", binding)
+
+x = "y"
+swig_assert_equal("Li_std_wstring.test_value(x)", "x", binding)
+a = Li_std_wstring::A.new(x)
+swig_assert_equal("Li_std_wstring.test_value(a)", "x", binding)
+
+x = "hello"
+swig_assert_equal("Li_std_wstring.test_const_reference(x)", "x", binding)
+
+
+swig_assert_equal("Li_std_wstring.test_pointer_out", "'x'", binding)
+swig_assert_equal("Li_std_wstring.test_const_pointer_out", "'x'", binding)
+swig_assert_equal("Li_std_wstring.test_reference_out()", "'x'", binding)
+
+s = "abc"
+swig_assert("Li_std_wstring.test_equal_abc(s)", binding)
+
+begin
+  Li_std_wstring.test_throw
+rescue RuntimeError => e
+  swig_assert_equal("e.message", "'x'", binding)
+end
+
+x = "abc\0def"
+swig_assert_equal("Li_std_wstring.test_value(x)", "x", binding)
+swig_assert_equal("Li_std_wstring.test_ccvalue(x)", '"abc"', binding)
+swig_assert_equal("Li_std_wstring.test_wchar_overload(x)", '"abc"', binding)
--- a/Lib/ruby/rubywstrings.swg
+++ b/Lib/ruby/rubywstrings.swg
@ -1,71 +1,57 @@
 /* -----------------------------------------------------------------------------
 * rubywstrings.swg
 *
- * Currently, Ruby does not support Unicode or WChar properly, so these
- * are still treated as char arrays for now.
- * There are other libraries available that add support to this in
- * ruby including WString, FXString, etc.
- * ----------------------------------------------------------------------------- */
-
-/* ------------------------------------------------------------
 *  utility methods for wchar_t strings 
 * ------------------------------------------------------------ */

-%fragment("SWIG_AsWCharPtrAndSize","header",fragment="<wchar.h>",fragment="SWIG_pwchar_descriptor",fragment="SWIG_AsCharPtrAndSize") {
+%fragment("SWIG_AsWCharPtrAndSize","header",fragment="<wchar.h>",fragment="SWIG_pwchar_descriptor",fragment="SWIG_AsCharPtrAndSize",fragment="SWIG_ruby_wstring_encoding_init") {
 SWIGINTERN int
 SWIG_AsWCharPtrAndSize(VALUE obj, wchar_t **cptr, size_t *psize, int *alloc)
 {
-  return SWIG_AsCharPtrAndSize( obj, (char**)cptr, psize, alloc);
-//   VALUE tmp = 0;
-//   bool ok = false;
-//   if ( TYPE(obj) == T_STRING ) {
-//     if (cptr) {
-//       obj = tmp = SWIG_Unicode_FromObject(obj);
-//       ok = true;
-//     }
-//   }
-//   if (ok) {
-//     Py_ssize_t len = PyUnicode_GetSize(obj);
-//     rb_notimplement();
-//     if (cptr) {
-//       *cptr = %new_array(len + 1, wchar_t);
-//       SWIG_Unicode_AsWideChar((PyUnicodeObject *)obj, *cptr, len);
-//       (*cptr)[len] = 0;
-//     }
-//     if (psize) *psize = (size_t) len + 1;
-//     if (alloc) *alloc = cptr ? SWIG_NEWOBJ : 0;
-//     return SWIG_OK;
-//   } else {
-//     swig_type_info* pwchar_descriptor = SWIG_pwchar_descriptor();
-//     if (pwchar_descriptor) {
-//       void * vptr = 0;
-//       if (SWIG_ConvertPtr(obj, &vptr, pwchar_descriptor, 0) == SWIG_OK) {
-// 	if (cptr) *cptr = (wchar_t *)vptr;
-// 	if (psize) *psize = vptr ? (wcslen((wchar_t *)vptr) + 1) : 0;
-// 	return SWIG_OK;
-//       }
-//     }
-//   }
-//   return SWIG_TypeError;
+  rb_encoding* wstr_enc = swig_ruby_wstring_encoding;
+
+  if (TYPE(obj) == T_STRING) {
+    VALUE rstr = rb_str_conv_enc(obj, rb_enc_get(obj), wstr_enc);
+    wchar_t* cstr = (wchar_t*) StringValuePtr(rstr);
+    size_t   size = RSTRING_LEN(rstr) / sizeof(wchar_t) + 1;
+
+    if ( RSTRING_LEN(rstr) % sizeof(wchar_t) != 0 ) {
+        rb_raise(rb_eRuntimeError,
+                 "The length of the byte sequence of converted string is not a multiplier of sizeof(wchar_t). Invalid byte sequence is given. Or invalid SWIG_RUBY_WSTRING_ENCODING is given when compiling this binding.");
+    }
+    if (cptr && alloc)  {
+      *alloc = SWIG_NEWOBJ;
+      *cptr = %new_array(size, wchar_t);
+      memmove(*cptr, cstr, RSTRING_LEN(rstr));
+    }
+    if (psize) *psize = size;
+
+    return SWIG_OK;
+  } else {
+    return SWIG_TypeError;
+  }
 }
 }

-%fragment("SWIG_FromWCharPtrAndSize","header",fragment="<wchar.h>",fragment="SWIG_pwchar_descriptor",fragment="SWIG_FromCharPtrAndSize") {
+%fragment("SWIG_FromWCharPtrAndSize","header",fragment="<wchar.h>",fragment="SWIG_pwchar_descriptor",fragment="SWIG_FromCharPtrAndSize",fragment="SWIG_ruby_wstring_encoding_init") {
 SWIGINTERNINLINE VALUE 
 SWIG_FromWCharPtrAndSize(const wchar_t * carray, size_t size)
 {
-  return SWIG_FromCharPtrAndSize( (const char*)carray, size);
-//   if (carray) {
-//     if (size > INT_MAX) {
-//       swig_type_info* pwchar_descriptor = SWIG_pwchar_descriptor();
-//       return pwchar_descriptor ? 
-// 	SWIG_NewPointerObj(%const_cast(carray,wchar_t *), pwchar_descriptor, 0) : Qnil;
-//     } else {
-//       return SWIG_Unicode_FromWideChar(carray, %numeric_cast(size,int));
-//     }
-//   } else {
-//     return Qnil;
-//   }
+  rb_encoding* wstr_enc = swig_ruby_wstring_encoding;
+  rb_encoding* rb_enc   = swig_ruby_internal_encoding;
+
+  if (carray && size <= LONG_MAX/sizeof(wchar_t)) {
+    VALUE rstr = rb_str_new( (const char*)carray, %numeric_cast(size*sizeof(wchar_t),long) );
+    rb_encoding* new_enc = rb_default_internal_encoding();
+
+    rb_enc_associate(rstr, wstr_enc);
+    if ( !new_enc ) {
+      new_enc = rb_enc;
+    }
+    return rb_str_conv_enc(rstr, wstr_enc, new_enc);
+  } else {
+    return Qnil;
+  }
 }
 }

--- a/Lib/ruby/std_basic_string.i
+++ b/Lib/ruby/std_basic_string.i
@ -55,6 +55,10 @@ SWIGINTERNINLINE VALUE

 #if !defined(SWIG_STD_WSTRING)

+%traits_swigtype(std::basic_string<wchar_t>);
+%fragment(SWIG_Traits_frag(std::basic_string<wchar_t>));
+
+
 %fragment(SWIG_AsPtr_frag(std::basic_string<wchar_t>),"header",
 	  fragment="SWIG_AsWCharPtrAndSize") {
 SWIGINTERN int
--- a/Lib/ruby/std_wstring.i
+++ b/Lib/ruby/std_wstring.i
@ -1,3 +1,50 @@
+%{
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_RUBY_ENCODING_H
+#include "ruby/encoding.h"
+#endif
+
+/**
+ *  The internal encoding of std::wstring is defined based on
+ *  the size of wchar_t. If it is not appropriate for your library,
+ *  SWIG_RUBY_WSTRING_ENCODING must be given when compiling.
+ */
+#ifndef SWIG_RUBY_WSTRING_ENCODING
+
+#if WCHAR_MAX == 0x7fff || WCHAR_MAX == 0xffff
+#define SWIG_RUBY_WSTRING_ENCODING "UTF-16LE"
+#elif WCHAR_MAX == 0x7fffffff || WCHAR_MAX == 0xffffffff
+#define SWIG_RUBY_WSTRING_ENCODING "UTF-32LE"
+#else
+#error unsupported wchar_t size. SWIG_RUBY_WSTRING_ENCODING must be given.
+#endif
+
+#endif
+
+/**
+ *  If Encoding.default_internal is nil, this encoding will be used
+ *  when converting from std::wstring to String object in Ruby.
+ */
+#ifndef SWIG_RUBY_INTERNAL_ENCODING
+#define SWIG_RUBY_INTERNAL_ENCODING "UTF-8"
+#endif
+
+static rb_encoding *swig_ruby_wstring_encoding;
+static rb_encoding *swig_ruby_internal_encoding;
+
+#ifdef __cplusplus
+}
+#endif
+%}
+
+%fragment("SWIG_ruby_wstring_encoding_init", "init") {
+  swig_ruby_wstring_encoding  = rb_enc_find( SWIG_RUBY_WSTRING_ENCODING );
+  swig_ruby_internal_encoding = rb_enc_find( SWIG_RUBY_INTERNAL_ENCODING );
+}
+
 %include <rubywstrings.swg>
 %include <typemaps/std_wstring.swg>