From 4c0da0dd856f3cd60e39770fc30af002ef39439b Mon Sep 17 00:00:00 2001 From: John Evans Date: Sat, 12 Oct 2013 18:57:35 -0400 Subject: [PATCH 1/3] Added xml box write test with utf-8 content. #131 --- glymur/test/test_jp2box_xml.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/glymur/test/test_jp2box_xml.py b/glymur/test/test_jp2box_xml.py index ba1ff15..0bd9a59 100644 --- a/glymur/test/test_jp2box_xml.py +++ b/glymur/test/test_jp2box_xml.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ Test suite specifically targeting JP2 box layout. """ @@ -94,8 +95,6 @@ class TestXML(unittest.TestCase): with self.assertRaises((IOError, OSError)): glymur.jp2box.XMLBox(filename=self.xmlfile, xml=xml_object) - @unittest.skipIf(os.name == "nt", - "Problems using NamedTemporaryFile on windows.") def test_basic_xml(self): """Should be able to write a basic XMLBox""" j2k = Jp2k(self.j2kfile) @@ -116,8 +115,26 @@ class TestXML(unittest.TestCase): self.assertEqual(ET.tostring(jp2.box[3].xml.getroot()), b'0') - @unittest.skipIf(os.name == "nt", - "Problems using NamedTemporaryFile on windows.") + def test_utf8_xml(self): + """Should be able to write/read an XMLBox with utf-8 encoding.""" + j2k = Jp2k(self.j2kfile) + + self.jp2h.box = [self.ihdr, self.colr] + + xml_header = u'' + xml_string = u'Россия' + the_xml = ET.fromstring((xml_header + xml_string).encode('utf-8')) + xmlb = glymur.jp2box.XMLBox(xml=the_xml) + self.assertEqual(ET.tostring(xmlb.xml, encoding='utf-8').decode('utf-8'), xml_string) + + boxes = [self.jp2b, self.ftyp, self.jp2h, xmlb, self.jp2c] + + with tempfile.NamedTemporaryFile(suffix=".jp2") as tfile: + j2k.wrap(tfile.name, boxes=boxes) + jp2 = Jp2k(tfile.name) + self.assertEqual(ET.tostring(jp2.box[3].xml.getroot(), encoding='utf-8').decode('utf-8'), + xml_string) + def test_xml_from_file(self): """Must be able to create an XML box from an XML file.""" j2k = Jp2k(self.j2kfile) From 79d2c134c80db0dfd8a5f500e5c86d60efde8b93 Mon Sep 17 00:00:00 2001 From: jevans Date: Sat, 12 Oct 2013 21:31:11 -0400 Subject: [PATCH 2/3] XML text now constructed via utf-8, not ascii. #131 --- glymur/jp2box.py | 4 +- glymur/test/test_jp2box_xml.py | 81 +++++++++++++++++++++++++--------- 2 files changed, 61 insertions(+), 24 deletions(-) diff --git a/glymur/jp2box.py b/glymur/jp2box.py index 901f9e9..f213cab 100644 --- a/glymur/jp2box.py +++ b/glymur/jp2box.py @@ -1870,10 +1870,8 @@ class XMLBox(Jp2kBox): # Strip out any trailing nulls, as they can foul up XML parsing. text = text.rstrip(chr(0)) - # Scan for the start of the xml declaration. - try: - elt = ET.fromstring(text) + elt = ET.fromstring(text.encode('utf-8')) xml = ET.ElementTree(elt) except ParseError as parse_error: msg = 'A problem was encountered while parsing an XML box:' diff --git a/glymur/test/test_jp2box_xml.py b/glymur/test/test_jp2box_xml.py index 0bd9a59..7e6980f 100644 --- a/glymur/test/test_jp2box_xml.py +++ b/glymur/test/test_jp2box_xml.py @@ -24,6 +24,16 @@ import tempfile import warnings import xml.etree.cElementTree as ET +if sys.hexversion < 0x03000000: + from StringIO import StringIO +else: + from io import StringIO + +if sys.hexversion <= 0x03030000: + from mock import patch +else: + from unittest.mock import patch + if sys.hexversion < 0x02070000: import unittest2 as unittest else: @@ -115,26 +125,6 @@ class TestXML(unittest.TestCase): self.assertEqual(ET.tostring(jp2.box[3].xml.getroot()), b'0') - def test_utf8_xml(self): - """Should be able to write/read an XMLBox with utf-8 encoding.""" - j2k = Jp2k(self.j2kfile) - - self.jp2h.box = [self.ihdr, self.colr] - - xml_header = u'' - xml_string = u'Россия' - the_xml = ET.fromstring((xml_header + xml_string).encode('utf-8')) - xmlb = glymur.jp2box.XMLBox(xml=the_xml) - self.assertEqual(ET.tostring(xmlb.xml, encoding='utf-8').decode('utf-8'), xml_string) - - boxes = [self.jp2b, self.ftyp, self.jp2h, xmlb, self.jp2c] - - with tempfile.NamedTemporaryFile(suffix=".jp2") as tfile: - j2k.wrap(tfile.name, boxes=boxes) - jp2 = Jp2k(tfile.name) - self.assertEqual(ET.tostring(jp2.box[3].xml.getroot(), encoding='utf-8').decode('utf-8'), - xml_string) - def test_xml_from_file(self): """Must be able to create an XML box from an XML file.""" j2k = Jp2k(self.j2kfile) @@ -159,6 +149,56 @@ class TestXML(unittest.TestCase): self.assertEqual(neighbor.attrib['direction'], 'N') +@unittest.skipIf(os.name == "nt", "Temporary file issue on window.") +class TestUTF8XML(unittest.TestCase): + """Test suite for UTF-8 XML boxes.""" + + def setUp(self): + """Create a JP2 file with a UTF-8 XML box.""" + self.j2kfile = glymur.data.goodstuff() + + # 'Россия' is 'Russia' in Cyrillic, not that it matters. + + xml = u""" + Россия""" + with tempfile.NamedTemporaryFile(suffix=".xml", delete=False) as tfile: + tfile.write(xml.encode('utf-8')) + tfile.flush() + self.xmlfile = tfile.name + + j2k = glymur.Jp2k(self.j2kfile) + with tempfile.NamedTemporaryFile(suffix=".jp2", delete=False) as tfile: + jp2 = j2k.wrap(tfile.name) + xmlbox = glymur.jp2box.XMLBox(filename=self.xmlfile) + jp2.append(xmlbox) + self.jp2_xml_file = tfile.name + + def tearDown(self): + os.unlink(self.xmlfile) + os.unlink(self.jp2_xml_file) + + def test_utf8_xml(self): + """Should be able to write/read an XMLBox with utf-8 encoding.""" + jp2 = Jp2k(self.jp2_xml_file) + box_xml = jp2.box[-1].xml.getroot() + box_xml_str = ET.tostring(box_xml, encoding='utf-8').decode('utf-8') + self.assertEqual(box_xml_str, + u'Россия') + + @unittest.skip("Does not print properly.") + def test_printing_utf8_xml(self): + """Should be able to print an XMLBox with utf-8 encoding.""" + jp2 = Jp2k(self.jp2_xml_file) + with patch('sys.stdout', new=StringIO()) as fake_out: + print(jp2.box[-1]) + actual = fake_out.getvalue().strip() + lines = ["XML Box (xml ) @ (115305, 39)", + " u'Россия'"] + expected = '\n'.join(lines) + self.assertEqual(actual, expected) + + + @unittest.skipIf(os.name == "nt", "NamedTemporaryFile issue on windows") class TestJp2kBadXmlFile(unittest.TestCase): """Test suite for bad XML box situations""" @@ -196,7 +236,6 @@ class TestJp2kBadXmlFile(unittest.TestCase): def setUp(self): self.jp2file = glymur.data.nemo() - self.j2kfile = glymur.data.goodstuff() def tearDown(self): pass From ff97e0fb8244048d18c26206f9abea87beac290b Mon Sep 17 00:00:00 2001 From: John Evans Date: Sun, 13 Oct 2013 11:40:54 -0400 Subject: [PATCH 3/3] Printing of XML and UUID boxes with non-ascii characters only fully supported in 3.x In 2.x, the XML contents will be printed as entity references instead. It's just too difficult to get both 2.x and 3.x to be entirely consistent, and so 3.x gets the preferential treatment. Closes #131. --- glymur/jp2box.py | 15 ++++++-- glymur/test/test_jp2box_xml.py | 62 ++++++++++------------------------ glymur/test/test_printing.py | 42 +++++++++++++++++++++++ 3 files changed, 71 insertions(+), 48 deletions(-) diff --git a/glymur/jp2box.py b/glymur/jp2box.py index f213cab..bf88e4d 100644 --- a/glymur/jp2box.py +++ b/glymur/jp2box.py @@ -2747,9 +2747,18 @@ def _pretty_print_xml(xml, level=0): """ xml = copy.deepcopy(xml) _indent(xml.getroot(), level=level) - xmltext = ET.tostring(xml.getroot()).decode('utf-8') + xmltext = ET.tostring(xml.getroot(), encoding='utf-8').decode('utf-8') # Indent it a bit. lst = [(' ' + x) for x in xmltext.split('\n')] - xml = '\n'.join(lst) - return '\n{0}'.format(xml) + try: + xml = '\n'.join(lst) + return '\n{0}'.format(xml) + except UnicodeEncodeError: + # This can happen on python 2.x if the character set contains certain + # non-ascii characters. Just print out the corresponding xml char + # entities instead. + xml = u'\n'.join(lst) + text = u'\n{0}'.format(xml) + text = text.encode('ascii', 'xmlcharrefreplace') + return text diff --git a/glymur/test/test_jp2box_xml.py b/glymur/test/test_jp2box_xml.py index 7e6980f..b875188 100644 --- a/glymur/test/test_jp2box_xml.py +++ b/glymur/test/test_jp2box_xml.py @@ -148,54 +148,26 @@ class TestXML(unittest.TestCase): self.assertEqual(neighbor.attrib['name'], 'Malaysia') self.assertEqual(neighbor.attrib['direction'], 'N') - -@unittest.skipIf(os.name == "nt", "Temporary file issue on window.") -class TestUTF8XML(unittest.TestCase): - """Test suite for UTF-8 XML boxes.""" - - def setUp(self): - """Create a JP2 file with a UTF-8 XML box.""" - self.j2kfile = glymur.data.goodstuff() - - # 'Россия' is 'Russia' in Cyrillic, not that it matters. - - xml = u""" - Россия""" - with tempfile.NamedTemporaryFile(suffix=".xml", delete=False) as tfile: - tfile.write(xml.encode('utf-8')) - tfile.flush() - self.xmlfile = tfile.name - - j2k = glymur.Jp2k(self.j2kfile) - with tempfile.NamedTemporaryFile(suffix=".jp2", delete=False) as tfile: - jp2 = j2k.wrap(tfile.name) - xmlbox = glymur.jp2box.XMLBox(filename=self.xmlfile) - jp2.append(xmlbox) - self.jp2_xml_file = tfile.name - - def tearDown(self): - os.unlink(self.xmlfile) - os.unlink(self.jp2_xml_file) - def test_utf8_xml(self): """Should be able to write/read an XMLBox with utf-8 encoding.""" - jp2 = Jp2k(self.jp2_xml_file) - box_xml = jp2.box[-1].xml.getroot() - box_xml_str = ET.tostring(box_xml, encoding='utf-8').decode('utf-8') - self.assertEqual(box_xml_str, - u'Россия') + # 'Россия' is 'Russia' in Cyrillic, not that it matters. + xml = u""" + Россия""" + with tempfile.NamedTemporaryFile(suffix=".xml") as xmlfile: + xmlfile.write(xml.encode('utf-8')) + xmlfile.flush() - @unittest.skip("Does not print properly.") - def test_printing_utf8_xml(self): - """Should be able to print an XMLBox with utf-8 encoding.""" - jp2 = Jp2k(self.jp2_xml_file) - with patch('sys.stdout', new=StringIO()) as fake_out: - print(jp2.box[-1]) - actual = fake_out.getvalue().strip() - lines = ["XML Box (xml ) @ (115305, 39)", - " u'Россия'"] - expected = '\n'.join(lines) - self.assertEqual(actual, expected) + j2k = glymur.Jp2k(self.j2kfile) + with tempfile.NamedTemporaryFile(suffix=".jp2") as jfile: + jp2 = j2k.wrap(jfile.name) + xmlbox = glymur.jp2box.XMLBox(filename=xmlfile.name) + jp2.append(xmlbox) + + box_xml = jp2.box[-1].xml.getroot() + box_xml_str = ET.tostring(box_xml, + encoding='utf-8').decode('utf-8') + self.assertEqual(box_xml_str, + u'Россия') diff --git a/glymur/test/test_printing.py b/glymur/test/test_printing.py index 2694aa2..ef2b7f2 100644 --- a/glymur/test/test_printing.py +++ b/glymur/test/test_printing.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """Test suite for printing. """ # C0302: don't care too much about having too many lines in a test module @@ -15,6 +16,7 @@ import struct import sys import tempfile import warnings +from xml.etree import cElementTree as ET if sys.hexversion < 0x02070000: import unittest2 as unittest @@ -730,6 +732,46 @@ class TestPrinting(unittest.TestCase): expected = '\n'.join(lines) self.assertEqual(actual, expected) + @unittest.skipIf(sys.hexversion < 0x02070000, + "Differences in XML printing between 2.6 and 2.7") + def test_xml_latin1(self): + """Should be able to print an XMLBox with utf-8 encoding (latin1).""" + text = u""" + Strömung""" + if sys.hexversion < 0x03000000: + xml = ET.parse(StringIO(text.encode('utf-8'))) + else: + xml = ET.parse(StringIO(text)) + + xmlbox = glymur.jp2box.XMLBox(xml=xml) + with patch('sys.stdout', new=StringIO()) as fake_out: + print(xmlbox) + actual = fake_out.getvalue().strip() + lines = ["XML Box (xml ) @ (-1, 0)", + " Strömung"] + expected = '\n'.join(lines) + self.assertEqual(actual, expected) + + @unittest.skipIf(sys.hexversion < 0x02070000, + "Differences in XML printing between 2.6 and 2.7") + def test_xml_cyrrilic(self): + """Should be able to print an XMLBox with utf-8 encoding (cyrrillic).""" + text = u""" + Россия""" + if sys.hexversion < 0x03000000: + xml = ET.parse(StringIO(text.encode('utf-8'))) + else: + xml = ET.parse(StringIO(text)) + + xmlbox = glymur.jp2box.XMLBox(xml=xml) + with patch('sys.stdout', new=StringIO()) as fake_out: + print(xmlbox) + actual = fake_out.getvalue().strip() + lines = ["XML Box (xml ) @ (-1, 0)", + " Россия"] + expected = '\n'.join(lines) + self.assertEqual(actual, expected) + @unittest.skipIf(OPJ_DATA_ROOT is None, "OPJ_DATA_ROOT environment variable not set") def test_channel_definition(self):