Merge branch 'issue185' into devel

This commit is contained in:
John Evans 2014-03-13 21:18:36 -04:00
commit 9ff572f9fd
2 changed files with 46 additions and 19 deletions

View file

@ -2656,10 +2656,20 @@ class XMLBox(Jp2kBox):
warnings.warn(msg, UserWarning)
# Strip out any trailing nulls, as they can foul up XML parsing.
# Remove any byte order markers.
text = text.rstrip(chr(0))
if u'\ufeff' in text:
msg = 'An illegal BOM (byte order marker) was detected and '
msg += 'removed from the XML contents in the box starting at byte '
msg += 'offset {0}'.format(offset)
warnings.warn(msg)
text = text.replace(u'\ufeff', '')
# Remove any encoding declaration.
if text.startswith('<?xml version="1.0" encoding="UTF-8"?>'):
text = text[38:]
try:
elt = ET.fromstring(text.encode('utf-8'))
elt = ET.fromstring(text)
xml = ET.ElementTree(elt)
except ET.ParseError as err:
msg = 'A problem was encountered while parsing an XML box:'

View file

@ -94,24 +94,6 @@ class TestXML(unittest.TestCase):
def tearDown(self):
os.unlink(self.xmlfile)
@unittest.skipIf(OPJ_DATA_ROOT is None,
"OPJ_DATA_ROOT environment variable not set")
def test_invalid_utf8(self):
"""Bad byte sequence that cannot be parsed."""
filename = opj_data_file(os.path.join('input',
'nonregression',
'26ccf3651020967f7778238ef5af08af.SIGFPE.d25.527.jp2'))
if sys.hexversion < 0x03000000:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
jp2 = Jp2k(filename)
else:
with self.assertWarns(UserWarning):
jp2 = Jp2k(filename)
self.assertIsNone(jp2.box[3].box[1].box[1].xml)
def test_negative_file_and_xml(self):
"""The XML should come from only one source."""
xml_object = ET.parse(self.xmlfile)
@ -305,3 +287,38 @@ class TestBadButRecoverableXmlFile(unittest.TestCase):
b'<test>this is a test</test>')
class TestXML_OpjDataRoot(unittest.TestCase):
"""Test suite for XML boxes, requires OPJ_DATA_ROOT."""
def test_bom(self):
"""Byte order markers are illegal in UTF-8. Issue 185"""
filename = opj_data_file(os.path.join('input',
'nonregression',
'issue171.jp2'))
if sys.hexversion < 0x03000000:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
jp2 = Jp2k(filename)
else:
with self.assertWarns(UserWarning):
jp2 = Jp2k(filename)
self.assertIsNotNone(jp2.box[3].xml)
def test_invalid_utf8(self):
"""Bad byte sequence that cannot be parsed."""
filename = opj_data_file(os.path.join('input',
'nonregression',
'26ccf3651020967f7778238ef5af08af.SIGFPE.d25.527.jp2'))
if sys.hexversion < 0x03000000:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
jp2 = Jp2k(filename)
else:
with self.assertWarns(UserWarning):
jp2 = Jp2k(filename)
self.assertIsNone(jp2.box[3].box[1].box[1].xml)