diff --git a/glymur/jp2box.py b/glymur/jp2box.py index 901f9e9..bf88e4d 100644 --- a/glymur/jp2box.py +++ b/glymur/jp2box.py @@ -1870,10 +1870,8 @@ class XMLBox(Jp2kBox): # Strip out any trailing nulls, as they can foul up XML parsing. text = text.rstrip(chr(0)) - # Scan for the start of the xml declaration. - try: - elt = ET.fromstring(text) + elt = ET.fromstring(text.encode('utf-8')) xml = ET.ElementTree(elt) except ParseError as parse_error: msg = 'A problem was encountered while parsing an XML box:' @@ -2749,9 +2747,18 @@ def _pretty_print_xml(xml, level=0): """ xml = copy.deepcopy(xml) _indent(xml.getroot(), level=level) - xmltext = ET.tostring(xml.getroot()).decode('utf-8') + xmltext = ET.tostring(xml.getroot(), encoding='utf-8').decode('utf-8') # Indent it a bit. lst = [(' ' + x) for x in xmltext.split('\n')] - xml = '\n'.join(lst) - return '\n{0}'.format(xml) + try: + xml = '\n'.join(lst) + return '\n{0}'.format(xml) + except UnicodeEncodeError: + # This can happen on python 2.x if the character set contains certain + # non-ascii characters. Just print out the corresponding xml char + # entities instead. + xml = u'\n'.join(lst) + text = u'\n{0}'.format(xml) + text = text.encode('ascii', 'xmlcharrefreplace') + return text diff --git a/glymur/test/test_jp2box_xml.py b/glymur/test/test_jp2box_xml.py index ba1ff15..b875188 100644 --- a/glymur/test/test_jp2box_xml.py +++ b/glymur/test/test_jp2box_xml.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ Test suite specifically targeting JP2 box layout. """ @@ -23,6 +24,16 @@ import tempfile import warnings import xml.etree.cElementTree as ET +if sys.hexversion < 0x03000000: + from StringIO import StringIO +else: + from io import StringIO + +if sys.hexversion <= 0x03030000: + from mock import patch +else: + from unittest.mock import patch + if sys.hexversion < 0x02070000: import unittest2 as unittest else: @@ -94,8 +105,6 @@ class TestXML(unittest.TestCase): with self.assertRaises((IOError, OSError)): glymur.jp2box.XMLBox(filename=self.xmlfile, xml=xml_object) - @unittest.skipIf(os.name == "nt", - "Problems using NamedTemporaryFile on windows.") def test_basic_xml(self): """Should be able to write a basic XMLBox""" j2k = Jp2k(self.j2kfile) @@ -116,8 +125,6 @@ class TestXML(unittest.TestCase): self.assertEqual(ET.tostring(jp2.box[3].xml.getroot()), b'0') - @unittest.skipIf(os.name == "nt", - "Problems using NamedTemporaryFile on windows.") def test_xml_from_file(self): """Must be able to create an XML box from an XML file.""" j2k = Jp2k(self.j2kfile) @@ -141,6 +148,28 @@ class TestXML(unittest.TestCase): self.assertEqual(neighbor.attrib['name'], 'Malaysia') self.assertEqual(neighbor.attrib['direction'], 'N') + def test_utf8_xml(self): + """Should be able to write/read an XMLBox with utf-8 encoding.""" + # 'Россия' is 'Russia' in Cyrillic, not that it matters. + xml = u""" + Россия""" + with tempfile.NamedTemporaryFile(suffix=".xml") as xmlfile: + xmlfile.write(xml.encode('utf-8')) + xmlfile.flush() + + j2k = glymur.Jp2k(self.j2kfile) + with tempfile.NamedTemporaryFile(suffix=".jp2") as jfile: + jp2 = j2k.wrap(jfile.name) + xmlbox = glymur.jp2box.XMLBox(filename=xmlfile.name) + jp2.append(xmlbox) + + box_xml = jp2.box[-1].xml.getroot() + box_xml_str = ET.tostring(box_xml, + encoding='utf-8').decode('utf-8') + self.assertEqual(box_xml_str, + u'Россия') + + @unittest.skipIf(os.name == "nt", "NamedTemporaryFile issue on windows") class TestJp2kBadXmlFile(unittest.TestCase): @@ -179,7 +208,6 @@ class TestJp2kBadXmlFile(unittest.TestCase): def setUp(self): self.jp2file = glymur.data.nemo() - self.j2kfile = glymur.data.goodstuff() def tearDown(self): pass diff --git a/glymur/test/test_printing.py b/glymur/test/test_printing.py index 2694aa2..ef2b7f2 100644 --- a/glymur/test/test_printing.py +++ b/glymur/test/test_printing.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """Test suite for printing. """ # C0302: don't care too much about having too many lines in a test module @@ -15,6 +16,7 @@ import struct import sys import tempfile import warnings +from xml.etree import cElementTree as ET if sys.hexversion < 0x02070000: import unittest2 as unittest @@ -730,6 +732,46 @@ class TestPrinting(unittest.TestCase): expected = '\n'.join(lines) self.assertEqual(actual, expected) + @unittest.skipIf(sys.hexversion < 0x02070000, + "Differences in XML printing between 2.6 and 2.7") + def test_xml_latin1(self): + """Should be able to print an XMLBox with utf-8 encoding (latin1).""" + text = u""" + Strömung""" + if sys.hexversion < 0x03000000: + xml = ET.parse(StringIO(text.encode('utf-8'))) + else: + xml = ET.parse(StringIO(text)) + + xmlbox = glymur.jp2box.XMLBox(xml=xml) + with patch('sys.stdout', new=StringIO()) as fake_out: + print(xmlbox) + actual = fake_out.getvalue().strip() + lines = ["XML Box (xml ) @ (-1, 0)", + " Strömung"] + expected = '\n'.join(lines) + self.assertEqual(actual, expected) + + @unittest.skipIf(sys.hexversion < 0x02070000, + "Differences in XML printing between 2.6 and 2.7") + def test_xml_cyrrilic(self): + """Should be able to print an XMLBox with utf-8 encoding (cyrrillic).""" + text = u""" + Россия""" + if sys.hexversion < 0x03000000: + xml = ET.parse(StringIO(text.encode('utf-8'))) + else: + xml = ET.parse(StringIO(text)) + + xmlbox = glymur.jp2box.XMLBox(xml=xml) + with patch('sys.stdout', new=StringIO()) as fake_out: + print(xmlbox) + actual = fake_out.getvalue().strip() + lines = ["XML Box (xml ) @ (-1, 0)", + " Россия"] + expected = '\n'.join(lines) + self.assertEqual(actual, expected) + @unittest.skipIf(OPJ_DATA_ROOT is None, "OPJ_DATA_ROOT environment variable not set") def test_channel_definition(self):