diff --git a/glymur/jp2box.py b/glymur/jp2box.py
index 901f9e9..bf88e4d 100644
--- a/glymur/jp2box.py
+++ b/glymur/jp2box.py
@@ -1870,10 +1870,8 @@ class XMLBox(Jp2kBox):
# Strip out any trailing nulls, as they can foul up XML parsing.
text = text.rstrip(chr(0))
- # Scan for the start of the xml declaration.
-
try:
- elt = ET.fromstring(text)
+ elt = ET.fromstring(text.encode('utf-8'))
xml = ET.ElementTree(elt)
except ParseError as parse_error:
msg = 'A problem was encountered while parsing an XML box:'
@@ -2749,9 +2747,18 @@ def _pretty_print_xml(xml, level=0):
"""
xml = copy.deepcopy(xml)
_indent(xml.getroot(), level=level)
- xmltext = ET.tostring(xml.getroot()).decode('utf-8')
+ xmltext = ET.tostring(xml.getroot(), encoding='utf-8').decode('utf-8')
# Indent it a bit.
lst = [(' ' + x) for x in xmltext.split('\n')]
- xml = '\n'.join(lst)
- return '\n{0}'.format(xml)
+ try:
+ xml = '\n'.join(lst)
+ return '\n{0}'.format(xml)
+ except UnicodeEncodeError:
+ # This can happen on python 2.x if the character set contains certain
+ # non-ascii characters. Just print out the corresponding xml char
+ # entities instead.
+ xml = u'\n'.join(lst)
+ text = u'\n{0}'.format(xml)
+ text = text.encode('ascii', 'xmlcharrefreplace')
+ return text
diff --git a/glymur/test/test_jp2box_xml.py b/glymur/test/test_jp2box_xml.py
index ba1ff15..b875188 100644
--- a/glymur/test/test_jp2box_xml.py
+++ b/glymur/test/test_jp2box_xml.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
"""
Test suite specifically targeting JP2 box layout.
"""
@@ -23,6 +24,16 @@ import tempfile
import warnings
import xml.etree.cElementTree as ET
+if sys.hexversion < 0x03000000:
+ from StringIO import StringIO
+else:
+ from io import StringIO
+
+if sys.hexversion <= 0x03030000:
+ from mock import patch
+else:
+ from unittest.mock import patch
+
if sys.hexversion < 0x02070000:
import unittest2 as unittest
else:
@@ -94,8 +105,6 @@ class TestXML(unittest.TestCase):
with self.assertRaises((IOError, OSError)):
glymur.jp2box.XMLBox(filename=self.xmlfile, xml=xml_object)
- @unittest.skipIf(os.name == "nt",
- "Problems using NamedTemporaryFile on windows.")
def test_basic_xml(self):
"""Should be able to write a basic XMLBox"""
j2k = Jp2k(self.j2kfile)
@@ -116,8 +125,6 @@ class TestXML(unittest.TestCase):
self.assertEqual(ET.tostring(jp2.box[3].xml.getroot()),
b'0')
- @unittest.skipIf(os.name == "nt",
- "Problems using NamedTemporaryFile on windows.")
def test_xml_from_file(self):
"""Must be able to create an XML box from an XML file."""
j2k = Jp2k(self.j2kfile)
@@ -141,6 +148,28 @@ class TestXML(unittest.TestCase):
self.assertEqual(neighbor.attrib['name'], 'Malaysia')
self.assertEqual(neighbor.attrib['direction'], 'N')
+ def test_utf8_xml(self):
+ """Should be able to write/read an XMLBox with utf-8 encoding."""
+ # 'Россия' is 'Russia' in Cyrillic, not that it matters.
+ xml = u"""
+ Россия"""
+ with tempfile.NamedTemporaryFile(suffix=".xml") as xmlfile:
+ xmlfile.write(xml.encode('utf-8'))
+ xmlfile.flush()
+
+ j2k = glymur.Jp2k(self.j2kfile)
+ with tempfile.NamedTemporaryFile(suffix=".jp2") as jfile:
+ jp2 = j2k.wrap(jfile.name)
+ xmlbox = glymur.jp2box.XMLBox(filename=xmlfile.name)
+ jp2.append(xmlbox)
+
+ box_xml = jp2.box[-1].xml.getroot()
+ box_xml_str = ET.tostring(box_xml,
+ encoding='utf-8').decode('utf-8')
+ self.assertEqual(box_xml_str,
+ u'Россия')
+
+
@unittest.skipIf(os.name == "nt", "NamedTemporaryFile issue on windows")
class TestJp2kBadXmlFile(unittest.TestCase):
@@ -179,7 +208,6 @@ class TestJp2kBadXmlFile(unittest.TestCase):
def setUp(self):
self.jp2file = glymur.data.nemo()
- self.j2kfile = glymur.data.goodstuff()
def tearDown(self):
pass
diff --git a/glymur/test/test_printing.py b/glymur/test/test_printing.py
index 2694aa2..ef2b7f2 100644
--- a/glymur/test/test_printing.py
+++ b/glymur/test/test_printing.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
"""Test suite for printing.
"""
# C0302: don't care too much about having too many lines in a test module
@@ -15,6 +16,7 @@ import struct
import sys
import tempfile
import warnings
+from xml.etree import cElementTree as ET
if sys.hexversion < 0x02070000:
import unittest2 as unittest
@@ -730,6 +732,46 @@ class TestPrinting(unittest.TestCase):
expected = '\n'.join(lines)
self.assertEqual(actual, expected)
+ @unittest.skipIf(sys.hexversion < 0x02070000,
+ "Differences in XML printing between 2.6 and 2.7")
+ def test_xml_latin1(self):
+ """Should be able to print an XMLBox with utf-8 encoding (latin1)."""
+ text = u"""
+ Strömung"""
+ if sys.hexversion < 0x03000000:
+ xml = ET.parse(StringIO(text.encode('utf-8')))
+ else:
+ xml = ET.parse(StringIO(text))
+
+ xmlbox = glymur.jp2box.XMLBox(xml=xml)
+ with patch('sys.stdout', new=StringIO()) as fake_out:
+ print(xmlbox)
+ actual = fake_out.getvalue().strip()
+ lines = ["XML Box (xml ) @ (-1, 0)",
+ " Strömung"]
+ expected = '\n'.join(lines)
+ self.assertEqual(actual, expected)
+
+ @unittest.skipIf(sys.hexversion < 0x02070000,
+ "Differences in XML printing between 2.6 and 2.7")
+ def test_xml_cyrrilic(self):
+ """Should be able to print an XMLBox with utf-8 encoding (cyrrillic)."""
+ text = u"""
+ Россия"""
+ if sys.hexversion < 0x03000000:
+ xml = ET.parse(StringIO(text.encode('utf-8')))
+ else:
+ xml = ET.parse(StringIO(text))
+
+ xmlbox = glymur.jp2box.XMLBox(xml=xml)
+ with patch('sys.stdout', new=StringIO()) as fake_out:
+ print(xmlbox)
+ actual = fake_out.getvalue().strip()
+ lines = ["XML Box (xml ) @ (-1, 0)",
+ " Россия"]
+ expected = '\n'.join(lines)
+ self.assertEqual(actual, expected)
+
@unittest.skipIf(OPJ_DATA_ROOT is None,
"OPJ_DATA_ROOT environment variable not set")
def test_channel_definition(self):