Merge branch 'hfalcic-master' - Python unicode surrogate escape strings
* hfalcic-master: unicode_strings test: manually check values instead of using assert unicode_strings_runme.py: fix version check unicode_strings test: check return values Python unicode_strings test case: restrict to Python > 3.0 Python 3 'surrogateescape' docs: fix div class for Python code
This commit is contained in:
commit
55639cfff0
3 changed files with 21 additions and 11 deletions
|
|
@ -5962,8 +5962,8 @@ When this method is called from Python 3, the return value is the following
|
|||
text string:
|
||||
</p>
|
||||
|
||||
<div class="code"><pre>
|
||||
>>> s = test.non_utf8_c_str()
|
||||
<div class="targetlang"><pre>
|
||||
>>> s = example.non_utf8_c_str()
|
||||
>>> s
|
||||
'h\udce9llo wörld'
|
||||
</pre></div>
|
||||
|
|
@ -5974,7 +5974,7 @@ bytes are represented as high surrogate characters that can be used to obtain
|
|||
the original byte sequence:
|
||||
</p>
|
||||
|
||||
<div class="code"><pre>
|
||||
<div class="targetlang"><pre>
|
||||
>>> b = s.encode('utf-8', errors='surrogateescape')
|
||||
>>> b
|
||||
b'h\xe9llo w\xc3\xb6rld'
|
||||
|
|
@ -5985,7 +5985,7 @@ One can then attempt a different encoding, if desired (or simply leave the
|
|||
byte string as a raw sequence of bytes for use in binary protocols):
|
||||
</p>
|
||||
|
||||
<div class="code"><pre>
|
||||
<div class="targetlang"><pre>
|
||||
>>> b.decode('latin-1')
|
||||
'héllo wörld'
|
||||
</pre></div>
|
||||
|
|
@ -5995,7 +5995,7 @@ Note, however, that text strings containing surrogate characters are rejected
|
|||
with the default <tt>strict</tt> codec error handler. For example:
|
||||
</p>
|
||||
|
||||
<div class="code"><pre>
|
||||
<div class="targetlang"><pre>
|
||||
>>> with open('test', 'w') as f:
|
||||
... print(s, file=f)
|
||||
...
|
||||
|
|
|
|||
|
|
@ -1,4 +1,14 @@
|
|||
import sys
|
||||
|
||||
import unicode_strings
|
||||
|
||||
unicode_strings.test_c_str()
|
||||
unicode_strings.test_std_string()
|
||||
# The 'u' string prefix isn't valid in Python 3.0 - 3.2 and is redundant
|
||||
# in 3.3+. Since this file is run through 2to3 before testing, though,
|
||||
# mark this as a unicode string in 2.x so it'll become a str in 3.x.
|
||||
test_string = u'h\udce9llo w\u00f6rld'
|
||||
|
||||
if sys.version_info[0:2] >= (3, 1):
|
||||
if unicode_strings.non_utf8_c_str() != test_string:
|
||||
raise ValueError('Test comparison mismatch')
|
||||
if unicode_strings.non_utf8_std_string() != test_string:
|
||||
raise ValueError('Test comparison mismatch')
|
||||
|
|
|
|||
|
|
@ -4,12 +4,12 @@
|
|||
|
||||
%inline %{
|
||||
|
||||
const char* test_c_str(void) {
|
||||
return "h\xe9llo";
|
||||
const char* non_utf8_c_str(void) {
|
||||
return "h\xe9llo w\xc3\xb6rld";
|
||||
}
|
||||
|
||||
std::string test_std_string(void) {
|
||||
return std::string("h\xe9llo");
|
||||
std::string non_utf8_std_string(void) {
|
||||
return std::string("h\xe9llo w\xc3\xb6rld");
|
||||
}
|
||||
|
||||
%}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue