Merge branch 'hfalcic-master' - Python unicode surrogate escape strings

* hfalcic-master:
  unicode_strings test: manually check values instead of using assert
  unicode_strings_runme.py: fix version check
  unicode_strings test: check return values
  Python unicode_strings test case: restrict to Python > 3.0
  Python 3 'surrogateescape' docs: fix div class for Python code
This commit is contained in:
William S Fulton 2014-05-25 12:04:36 +01:00
commit 55639cfff0
3 changed files with 21 additions and 11 deletions

View file

@ -5962,8 +5962,8 @@ When this method is called from Python 3, the return value is the following
text string:
</p>
<div class="code"><pre>
&gt;&gt;&gt; s = test.non_utf8_c_str()
<div class="targetlang"><pre>
&gt;&gt;&gt; s = example.non_utf8_c_str()
&gt;&gt;&gt; s
'h\udce9llo w&#246;rld'
</pre></div>
@ -5974,7 +5974,7 @@ bytes are represented as high surrogate characters that can be used to obtain
the original byte sequence:
</p>
<div class="code"><pre>
<div class="targetlang"><pre>
&gt;&gt;&gt; b = s.encode('utf-8', errors='surrogateescape')
&gt;&gt;&gt; b
b'h\xe9llo w\xc3\xb6rld'
@ -5985,7 +5985,7 @@ One can then attempt a different encoding, if desired (or simply leave the
byte string as a raw sequence of bytes for use in binary protocols):
</p>
<div class="code"><pre>
<div class="targetlang"><pre>
&gt;&gt;&gt; b.decode('latin-1')
'h&#233;llo w&#195;&#182;rld'
</pre></div>
@ -5995,7 +5995,7 @@ Note, however, that text strings containing surrogate characters are rejected
with the default <tt>strict</tt> codec error handler. For example:
</p>
<div class="code"><pre>
<div class="targetlang"><pre>
&gt;&gt;&gt; with open('test', 'w') as f:
... print(s, file=f)
...

View file

@ -1,4 +1,14 @@
import sys
import unicode_strings
unicode_strings.test_c_str()
unicode_strings.test_std_string()
# The 'u' string prefix isn't valid in Python 3.0 - 3.2 and is redundant
# in 3.3+. Since this file is run through 2to3 before testing, though,
# mark this as a unicode string in 2.x so it'll become a str in 3.x.
test_string = u'h\udce9llo w\u00f6rld'
if sys.version_info[0:2] >= (3, 1):
if unicode_strings.non_utf8_c_str() != test_string:
raise ValueError('Test comparison mismatch')
if unicode_strings.non_utf8_std_string() != test_string:
raise ValueError('Test comparison mismatch')

View file

@ -4,12 +4,12 @@
%inline %{
const char* test_c_str(void) {
return "h\xe9llo";
const char* non_utf8_c_str(void) {
return "h\xe9llo w\xc3\xb6rld";
}
std::string test_std_string(void) {
return std::string("h\xe9llo");
std::string non_utf8_std_string(void) {
return std::string("h\xe9llo w\xc3\xb6rld");
}
%}