Remove xmlserializer dependency

jsdom can be counted on to do that for us, these days, in a more standards-complaint way. A notable change is that void elements get serialized like <br /> instead of <br/>.
This commit is contained in:
Domenic Denicola 2020-10-22 19:09:43 -04:00
commit e13ca0de74
4 changed files with 37 additions and 43 deletions

View file

@ -2,7 +2,6 @@
const path = require("path");
const fs = require("fs").promises;
const throat = require("throat");
const serializeToXML = require("xmlserializer").serializeToString;
const { JSDOM } = require("jsdom");
const substitutions = require("./substitutions.json");
@ -139,10 +138,11 @@ function getBodyXML(chapter, contentEl) {
bodyEl.appendChild(contentEl.firstChild);
}
let xml = serializeToXML(bodyEl);
const xmlSerializer = new contentEl.ownerDocument.defaultView.XMLSerializer();
let xml = xmlSerializer.serializeToString(bodyEl);
// Fix recurring strange pattern of extra <br> in <p>...<em>...<br>\n</em></p>
xml = xml.replace(/<br\/>\s*<\/em><\/p>/g, "</em></p>");
xml = xml.replace(/<br \/>\s*<\/em><\/p>/g, "</em></p>");
// There are way too many nonbreaking spaces where they don't belong.
// If they show up three in a row, then let them live. Otherwise, they die.
@ -203,17 +203,17 @@ function getBodyXML(chapter, contentEl) {
fixEms();
// Similar problems occur in Ward with <b> and <strong> as do in Worm with <em>s
xml = xml.replace(/<b\/>/g, "");
xml = xml.replace(/<b>(\s*<br\/>\s*)<\/b>/g, "$1");
xml = xml.replace(/<strong>(\s*<br\/>\s*)<\/strong>/g, "$1");
xml = xml.replace(/<b \/>/g, "");
xml = xml.replace(/<b>(\s*<br \/>\s*)<\/b>/g, "$1");
xml = xml.replace(/<strong>(\s*<br \/>\s*)<\/strong>/g, "$1");
xml = xml.replace(/<\/strong>(\s*)<strong>/g, "$1");
xml = xml.replace(/<strong>@<\/strong>/g, "@");
xml = xml.replace(/<br\/>(\s*)<\/strong>/g, "</strong><br/>$1");
xml = xml.replace(/<br \/>(\s*)<\/strong>/g, "</strong><br />$1");
xml = xml.replace(/(\s*)<\/strong>/g, "</strong>$1");
// No need for line breaks before paragraph ends
// These often occur with the <br>s inside <b>/<strong> fixed above.
xml = xml.replace(/<br\/>\s*<\/p>/g, "</p>");
xml = xml.replace(/<br \/>\s*<\/p>/g, "</p>");
// Fix possessive of names ending in "s"
// Note: if the "s" is unvoiced, as in Marquis, then it doesn't get the second "s".
@ -298,12 +298,12 @@ function getBodyXML(chapter, contentEl) {
// Use <hr> for separators
// https://www.parahumans.net/2019/12/21/interlude-18-z-radiation/ has "super-separators" ("⊙ ⊙ ⊙ ⊙ ⊙") which we
// leave untouched for now.
xml = xml.replace(/<p>■<\/p>/g, "<hr/>");
xml = xml.replace(/<p style="text-align:center;">■<\/p>/g, "<hr/>");
xml = xml.replace(/<p style="text-align: center;">⊙<\/p>/g, "<hr/>");
xml = xml.replace(/<p style="text-align: center;"><strong>⊙<\/strong><\/p>/g, "<hr/>");
xml = xml.replace(/<p style="text-align: center;"><em><strong>⊙<\/strong><\/em><\/p>/g, "<hr/>");
xml = xml.replace(/<p style="text-align: center;"><strong>⊙⊙<\/strong><\/p>/g, "<hr/>");
xml = xml.replace(/<p>■<\/p>/g, "<hr />");
xml = xml.replace(/<p style="text-align:center;">■<\/p>/g, "<hr />");
xml = xml.replace(/<p style="text-align: center;">⊙<\/p>/g, "<hr />");
xml = xml.replace(/<p style="text-align: center;"><strong>⊙<\/strong><\/p>/g, "<hr />");
xml = xml.replace(/<p style="text-align: center;"><em><strong>⊙<\/strong><\/em><\/p>/g, "<hr />");
xml = xml.replace(/<p style="text-align: center;"><strong>⊙⊙<\/strong><\/p>/g, "<hr />");
// Fix recurring miscapitalization with questions
xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");

View file

@ -5,7 +5,7 @@
"after": "bugs: flies, ants"
},
{
"before": "<p><em>Brief note from the author: This story isnt intended for young or sensitive readers. Readers who are on the lookout for <a href=\"http://www.urbandictionary.com/define.php?term=Trigger%20warning&amp;defid=6195009\">trigger warnings</a> are advised to give Worm a pass.</em></p>\n<hr/>\n",
"before": "<p><em>Brief note from the author: This story isnt intended for young or sensitive readers. Readers who are on the lookout for <a href=\"http://www.urbandictionary.com/define.php?term=Trigger%20warning&amp;defid=6195009\">trigger warnings</a> are advised to give Worm a pass.</em></p>\n<hr />\n",
"after": "",
"_comment": "The pseudo-trigger warning is out of place in an eBook."
}
@ -906,15 +906,15 @@
"after": "crash when the wave rolled"
},
{
"regExp": "\n ?<em>\\s*([^<]+)</em>(<br/>|</p>)",
"regExp": "\n ?<em>\\s*([^<]+)</em>(<br />|</p>)",
"replacement": "\n<p style=\"padding-left:30px;\"><i>$1</i></p>"
},
{
"regExp": "\n<p style=\"padding-left:30px;\"><em>([^<]+)</em>(<br/>|</p>)",
"regExp": "\n<p style=\"padding-left:30px;\"><em>([^<]+)</em>(<br />|</p>)",
"replacement": "\n<p style=\"padding-left:30px;\"><i>$1</i></p>"
},
{
"regExp": "\n<p style=\"padding-left:30px;\"><em>([^<\n]+)<br/>\n([^>\n]+)</em><br/>\n",
"regExp": "\n<p style=\"padding-left:30px;\"><em>([^<\n]+)<br />\n([^>\n]+)</em><br />\n",
"replacement": "\n<p style=\"padding-left:30px;\"><i>$1</i></p>\n<p style=\"padding-left:30px;\"><i>$2</i></p>\n"
}
],
@ -1770,15 +1770,15 @@
"replacement": "<p>"
},
{
"before": "agreed-upon confidentiality.<br/>\n■ ",
"before": "agreed-upon confidentiality.<br />\n■ ",
"after": "agreed-upon confidentiality.</p>\n<ul>\n<li>"
},
{
"regExp": "<br/>\n■ ",
"regExp": "<br />\n■ ",
"replacement": "</li>\n<li>"
},
{
"before": "three, male.<br/>\nBoth vials",
"before": "three, male.<br />\nBoth vials",
"after": "three, male.</li>\n</ul>\n<p>Both vials"
},
{
@ -1904,24 +1904,24 @@
"after": "<p><i>Sweet Honey—</i></p>"
},
{
"before": "<p><em>Love me, love me, you know you wanna love me…</em><br/>\n<em>Love me, love me, you know you wanna love me…</em></p>",
"after": "<p><i>Love me, love me, you know you wanna love me…<br/>\nLove me, love me, you know you wanna love me…</i></p>"
"before": "<p><em>Love me, love me, you know you wanna love me…</em><br />\n<em>Love me, love me, you know you wanna love me…</em></p>",
"after": "<p><i>Love me, love me, you know you wanna love me…<br />\nLove me, love me, you know you wanna love me…</i></p>"
},
{
"before": "<p><em>Love me, you?<br/>\nLove me, true?</em></p>",
"after": "<p><i>Love me, you?<br/>\nLove me, true?</i></p>"
"before": "<p><em>Love me, you?<br />\nLove me, true?</em></p>",
"after": "<p><i>Love me, you?<br />\nLove me, true?</i></p>"
},
{
"before": "<p><em>Crazed, kooky, cracked, crazy,</em> <br/>\n<em>Nutty, barmy, mad for me…</em></p>",
"after": "<p><i>Crazed, kooky, cracked, crazy,<br/>\nNutty, barmy, mad for me…</i></p>"
"before": "<p><em>Crazed, kooky, cracked, crazy,</em> <br />\n<em>Nutty, barmy, mad for me…</em></p>",
"after": "<p><i>Crazed, kooky, cracked, crazy,<br />\nNutty, barmy, mad for me…</i></p>"
},
{
"before": "<p><em><em>Crazed, kooky, cracked, crazy,</em><br/>\nMental, dotty, whacked, loopy…</em></p>",
"after": "<p><i>Crazed, kooky, cracked, crazy,<br/>\nMental, dotty, whacked, loopy…</i></p>"
"before": "<p><em><em>Crazed, kooky, cracked, crazy,</em><br />\nMental, dotty, whacked, loopy…</em></p>",
"after": "<p><i>Crazed, kooky, cracked, crazy,<br />\nMental, dotty, whacked, loopy…</i></p>"
},
{
"before": "<p><em>Crazed, kooky, cracked, crazy,<br/>\n<em>Nutty, screwy, mentally diseased…</em><br/>\n</em> She ",
"after": "<p><i>Crazed, kooky, cracked, crazy,<br/>\nNutty, screwy, mentally diseased…</i></p>\n<p>She "
"before": "<p><em>Crazed, kooky, cracked, crazy,<br />\n<em>Nutty, screwy, mentally diseased…</em><br />\n</em> She ",
"after": "<p><i>Crazed, kooky, cracked, crazy,<br />\nNutty, screwy, mentally diseased…</i></p>\n<p>She "
},
{
"before": "<em>Ça va?</em>",
@ -2206,7 +2206,7 @@
],
"https://parahumans.wordpress.com/2012/09/11/prey-14-3/": [
{
"before": "truck reached<br/>\nthe other Nine",
"before": "truck reached<br />\nthe other Nine",
"after": "truck reached the other Nine"
},
{
@ -2266,7 +2266,7 @@
],
"https://parahumans.wordpress.com/2012/10/18/interlude-15-donation-bonus/": [
{
"before": "volunteered, <em>asked<br/>\n</em> to",
"before": "volunteered, <em>asked<br />\n</em> to",
"after": "volunteered, <em>asked</em> to"
},
{
@ -3365,7 +3365,7 @@
"https://parahumans.wordpress.com/2013/06/25/interlude-24-donation-bonus-1/": [
{
"before": "silence like this. Divide: They",
"after": "silence like this.<br/> Divide: They"
"after": "silence like this.<br /> Divide: They"
},
{
"before": "Bay,” Wanton said. “Not",
@ -3441,7 +3441,7 @@
},
{
"before": "SimurghNotes",
"after": "Simurgh<br/>Notes"
"after": "Simurgh<br />Notes"
},
{
"before": "avoided-",
@ -4238,7 +4238,7 @@
"after": "Imp: Ive"
},
{
"before": "Tattletale:<br/>\nwaiting?",
"before": "Tattletale:<br />\nwaiting?",
"after": "Tattletale: waiting?"
},
{
@ -4308,7 +4308,7 @@
],
"https://www.parahumans.net/2017/09/11/daybreak-1-1/": [
{
"before": "<p>Ward is the second work in the Parahumans series, and reading <strong><a href=\"https://parahumans.wordpress.com/\">Worm</a></strong> first is strongly recommended. A lot of this wont make sense otherwise and if you do find yourself a fan of the universe, the spoilers in Ward will affect the reading of the other work.</p>\n<p>Ward is not recommended for young or sensitive readers.</p>\n<hr/>",
"before": "<p>Ward is the second work in the Parahumans series, and reading <strong><a href=\"https://parahumans.wordpress.com/\">Worm</a></strong> first is strongly recommended. A lot of this wont make sense otherwise and if you do find yourself a fan of the universe, the spoilers in Ward will affect the reading of the other work.</p>\n<p>Ward is not recommended for young or sensitive readers.</p>\n<hr />",
"after": "",
"_comment": "This is out of place in an eBook."
}

5
npm-shrinkwrap.json generated
View file

@ -2060,11 +2060,6 @@
"resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz",
"integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw=="
},
"xmlserializer": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/xmlserializer/-/xmlserializer-0.6.1.tgz",
"integrity": "sha512-FNb0eEqqUUbnuvxuHqNuKH8qCGKqxu+558Zi8UzOoQk8Z9LdvpONK+v7m3gpKVHrk5Aq+0nNLsKxu/6OYh7Umw=="
},
"xtend": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz",

View file

@ -28,7 +28,6 @@
"jsdom": "^16.2.2",
"requisition": "^1.5.0",
"throat": "^5.0.0",
"xmlserializer": "^0.6.1",
"yargs": "^15.3.1"
},
"devDependencies": {