From 20ddfb7fcded992bd0e98b9ef8797104fe0900fd Mon Sep 17 00:00:00 2001
From: Harvey Falcic
->>> s = test.non_utf8_c_str() +@@ -5974,7 +5974,7 @@ bytes are represented as high surrogate characters that can be used to obtain the original byte sequence: -+>>> s = example.non_utf8_c_str() >>> s 'h\udce9llo wörld'+>>> b = s.encode('utf-8', errors='surrogateescape') >>> b b'h\xe9llo w\xc3\xb6rld' @@ -5985,7 +5985,7 @@ One can then attempt a different encoding, if desired (or simply leave the byte string as a raw sequence of bytes for use in binary protocols): -+@@ -5995,7 +5995,7 @@ Note, however, that text strings containing surrogate characters are rejected with the default strict codec error handler. For example: ->>> b.decode('latin-1') 'héllo wörld'+>>> with open('test', 'w') as f: ... print(s, file=f) ... From 5c5dfc106f00b4125f1ae7a173463afc8cdeccad Mon Sep 17 00:00:00 2001 From: Harvey FalcicDate: Sat, 24 May 2014 15:39:53 -0400 Subject: [PATCH 2/5] Python unicode_strings test case: restrict to Python > 3.0 Also adjust the test method names and content to match the docs. --- Examples/test-suite/python/unicode_strings_runme.py | 7 +++++-- Examples/test-suite/unicode_strings.i | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Examples/test-suite/python/unicode_strings_runme.py b/Examples/test-suite/python/unicode_strings_runme.py index 2d26599aa..162f40972 100644 --- a/Examples/test-suite/python/unicode_strings_runme.py +++ b/Examples/test-suite/python/unicode_strings_runme.py @@ -1,4 +1,7 @@ +import sys + import unicode_strings -unicode_strings.test_c_str() -unicode_strings.test_std_string() +if sys.version_info > (3, 0): + unicode_strings.non_utf8_c_str() + unicode_strings.non_utf8_std_string() diff --git a/Examples/test-suite/unicode_strings.i b/Examples/test-suite/unicode_strings.i index f4a8b8b50..56063c8a4 100644 --- a/Examples/test-suite/unicode_strings.i +++ b/Examples/test-suite/unicode_strings.i @@ -4,12 +4,12 @@ %inline %{ -const char* test_c_str(void) { - return "h\xe9llo"; +const char* non_utf8_c_str(void) { + return "h\xe9llo w\xc3\xb6rld"; } -std::string test_std_string(void) { - return std::string("h\xe9llo"); +std::string non_utf8_std_string(void) { + return std::string("h\xe9llo w\xc3\xb6rld"); } %} From 2533d0210fb2f3b2e9f441c38bd03b99f4a57a44 Mon Sep 17 00:00:00 2001 From: Harvey Falcic Date: Sat, 24 May 2014 16:50:33 -0400 Subject: [PATCH 3/5] unicode_strings test: check return values --- Examples/test-suite/python/unicode_strings_runme.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Examples/test-suite/python/unicode_strings_runme.py b/Examples/test-suite/python/unicode_strings_runme.py index 162f40972..0e2e1af06 100644 --- a/Examples/test-suite/python/unicode_strings_runme.py +++ b/Examples/test-suite/python/unicode_strings_runme.py @@ -2,6 +2,11 @@ import sys import unicode_strings +# The 'u' string prefix isn't valid in Python 3.0 - 3.2 and is redundant +# in 3.3+. Since this file is run through 2to3 before testing, though, +# mark this as a unicode string in 2.x so it'll become a str in 3.x. +test_string = u'h\udce9llo w\u00f6rld' + if sys.version_info > (3, 0): - unicode_strings.non_utf8_c_str() - unicode_strings.non_utf8_std_string() + assert unicode_strings.non_utf8_c_str() == test_string + assert unicode_strings.non_utf8_std_string() == test_string From ab527b0e4bf592e55b038a6de0fb8fe4540eea92 Mon Sep 17 00:00:00 2001 From: Harvey Falcic Date: Sat, 24 May 2014 17:54:53 -0400 Subject: [PATCH 4/5] unicode_strings_runme.py: fix version check Python 3.0.1 shouldn't pass. --- Examples/test-suite/python/unicode_strings_runme.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Examples/test-suite/python/unicode_strings_runme.py b/Examples/test-suite/python/unicode_strings_runme.py index 0e2e1af06..2110c5ce7 100644 --- a/Examples/test-suite/python/unicode_strings_runme.py +++ b/Examples/test-suite/python/unicode_strings_runme.py @@ -7,6 +7,6 @@ import unicode_strings # mark this as a unicode string in 2.x so it'll become a str in 3.x. test_string = u'h\udce9llo w\u00f6rld' -if sys.version_info > (3, 0): +if sys.version_info[0:2] >= (3, 1): assert unicode_strings.non_utf8_c_str() == test_string assert unicode_strings.non_utf8_std_string() == test_string From 91e93838fc6dbc181181f6ffacb024b6a87cfe63 Mon Sep 17 00:00:00 2001 From: Harvey Falcic Date: Sat, 24 May 2014 18:00:04 -0400 Subject: [PATCH 5/5] unicode_strings test: manually check values instead of using assert --- Examples/test-suite/python/unicode_strings_runme.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Examples/test-suite/python/unicode_strings_runme.py b/Examples/test-suite/python/unicode_strings_runme.py index 2110c5ce7..e1fc7adec 100644 --- a/Examples/test-suite/python/unicode_strings_runme.py +++ b/Examples/test-suite/python/unicode_strings_runme.py @@ -8,5 +8,7 @@ import unicode_strings test_string = u'h\udce9llo w\u00f6rld' if sys.version_info[0:2] >= (3, 1): - assert unicode_strings.non_utf8_c_str() == test_string - assert unicode_strings.non_utf8_std_string() == test_string + if unicode_strings.non_utf8_c_str() != test_string: + raise ValueError('Test comparison mismatch') + if unicode_strings.non_utf8_std_string() != test_string: + raise ValueError('Test comparison mismatch')