Merge branch 'hfalcic-master' - Python unicode surrogate escape strings
* hfalcic-master: unicode_strings test: manually check values instead of using assert unicode_strings_runme.py: fix version check unicode_strings test: check return values Python unicode_strings test case: restrict to Python > 3.0 Python 3 'surrogateescape' docs: fix div class for Python code
This commit is contained in:
commit
55639cfff0
3 changed files with 21 additions and 11 deletions
|
|
@ -5962,8 +5962,8 @@ When this method is called from Python 3, the return value is the following
|
||||||
text string:
|
text string:
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<div class="code"><pre>
|
<div class="targetlang"><pre>
|
||||||
>>> s = test.non_utf8_c_str()
|
>>> s = example.non_utf8_c_str()
|
||||||
>>> s
|
>>> s
|
||||||
'h\udce9llo wörld'
|
'h\udce9llo wörld'
|
||||||
</pre></div>
|
</pre></div>
|
||||||
|
|
@ -5974,7 +5974,7 @@ bytes are represented as high surrogate characters that can be used to obtain
|
||||||
the original byte sequence:
|
the original byte sequence:
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<div class="code"><pre>
|
<div class="targetlang"><pre>
|
||||||
>>> b = s.encode('utf-8', errors='surrogateescape')
|
>>> b = s.encode('utf-8', errors='surrogateescape')
|
||||||
>>> b
|
>>> b
|
||||||
b'h\xe9llo w\xc3\xb6rld'
|
b'h\xe9llo w\xc3\xb6rld'
|
||||||
|
|
@ -5985,7 +5985,7 @@ One can then attempt a different encoding, if desired (or simply leave the
|
||||||
byte string as a raw sequence of bytes for use in binary protocols):
|
byte string as a raw sequence of bytes for use in binary protocols):
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<div class="code"><pre>
|
<div class="targetlang"><pre>
|
||||||
>>> b.decode('latin-1')
|
>>> b.decode('latin-1')
|
||||||
'héllo wörld'
|
'héllo wörld'
|
||||||
</pre></div>
|
</pre></div>
|
||||||
|
|
@ -5995,7 +5995,7 @@ Note, however, that text strings containing surrogate characters are rejected
|
||||||
with the default <tt>strict</tt> codec error handler. For example:
|
with the default <tt>strict</tt> codec error handler. For example:
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<div class="code"><pre>
|
<div class="targetlang"><pre>
|
||||||
>>> with open('test', 'w') as f:
|
>>> with open('test', 'w') as f:
|
||||||
... print(s, file=f)
|
... print(s, file=f)
|
||||||
...
|
...
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,14 @@
|
||||||
|
import sys
|
||||||
|
|
||||||
import unicode_strings
|
import unicode_strings
|
||||||
|
|
||||||
unicode_strings.test_c_str()
|
# The 'u' string prefix isn't valid in Python 3.0 - 3.2 and is redundant
|
||||||
unicode_strings.test_std_string()
|
# in 3.3+. Since this file is run through 2to3 before testing, though,
|
||||||
|
# mark this as a unicode string in 2.x so it'll become a str in 3.x.
|
||||||
|
test_string = u'h\udce9llo w\u00f6rld'
|
||||||
|
|
||||||
|
if sys.version_info[0:2] >= (3, 1):
|
||||||
|
if unicode_strings.non_utf8_c_str() != test_string:
|
||||||
|
raise ValueError('Test comparison mismatch')
|
||||||
|
if unicode_strings.non_utf8_std_string() != test_string:
|
||||||
|
raise ValueError('Test comparison mismatch')
|
||||||
|
|
|
||||||
|
|
@ -4,12 +4,12 @@
|
||||||
|
|
||||||
%inline %{
|
%inline %{
|
||||||
|
|
||||||
const char* test_c_str(void) {
|
const char* non_utf8_c_str(void) {
|
||||||
return "h\xe9llo";
|
return "h\xe9llo w\xc3\xb6rld";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string test_std_string(void) {
|
std::string non_utf8_std_string(void) {
|
||||||
return std::string("h\xe9llo");
|
return std::string("h\xe9llo w\xc3\xb6rld");
|
||||||
}
|
}
|
||||||
|
|
||||||
%}
|
%}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue