diff --git a/Doc/Manual/Python.html b/Doc/Manual/Python.html index 45725065d..6dc0ff9ba 100644 --- a/Doc/Manual/Python.html +++ b/Doc/Manual/Python.html @@ -5962,8 +5962,8 @@ When this method is called from Python 3, the return value is the following text string:
-->>> s = test.non_utf8_c_str() +@@ -5974,7 +5974,7 @@ bytes are represented as high surrogate characters that can be used to obtain the original byte sequence: -+>>> s = example.non_utf8_c_str() >>> s 'h\udce9llo wörld'+>>> b = s.encode('utf-8', errors='surrogateescape') >>> b b'h\xe9llo w\xc3\xb6rld' @@ -5985,7 +5985,7 @@ One can then attempt a different encoding, if desired (or simply leave the byte string as a raw sequence of bytes for use in binary protocols): -+@@ -5995,7 +5995,7 @@ Note, however, that text strings containing surrogate characters are rejected with the default strict codec error handler. For example: ->>> b.decode('latin-1') 'héllo wörld'+>>> with open('test', 'w') as f: ... print(s, file=f) ... diff --git a/Examples/test-suite/python/unicode_strings_runme.py b/Examples/test-suite/python/unicode_strings_runme.py index 2d26599aa..e1fc7adec 100644 --- a/Examples/test-suite/python/unicode_strings_runme.py +++ b/Examples/test-suite/python/unicode_strings_runme.py @@ -1,4 +1,14 @@ +import sys + import unicode_strings -unicode_strings.test_c_str() -unicode_strings.test_std_string() +# The 'u' string prefix isn't valid in Python 3.0 - 3.2 and is redundant +# in 3.3+. Since this file is run through 2to3 before testing, though, +# mark this as a unicode string in 2.x so it'll become a str in 3.x. +test_string = u'h\udce9llo w\u00f6rld' + +if sys.version_info[0:2] >= (3, 1): + if unicode_strings.non_utf8_c_str() != test_string: + raise ValueError('Test comparison mismatch') + if unicode_strings.non_utf8_std_string() != test_string: + raise ValueError('Test comparison mismatch') diff --git a/Examples/test-suite/unicode_strings.i b/Examples/test-suite/unicode_strings.i index f4a8b8b50..56063c8a4 100644 --- a/Examples/test-suite/unicode_strings.i +++ b/Examples/test-suite/unicode_strings.i @@ -4,12 +4,12 @@ %inline %{ -const char* test_c_str(void) { - return "h\xe9llo"; +const char* non_utf8_c_str(void) { + return "h\xe9llo w\xc3\xb6rld"; } -std::string test_std_string(void) { - return std::string("h\xe9llo"); +std::string non_utf8_std_string(void) { + return std::string("h\xe9llo w\xc3\xb6rld"); } %}