From 33d88eb52307c530095852499dd844bd2aa18319 Mon Sep 17 00:00:00 2001
From: Domenic Denicola <d@domenic.me>
Date: Sat, 7 Nov 2020 18:37:35 -0500
Subject: [PATCH] Refactor convert-worker.js a bit

This introduces a few fixes around italics.
---
 lib/convert-worker.js  | 353 +++++++++++++++++++++++------------------
 lib/substitutions.json |  20 ++-
 2 files changed, 214 insertions(+), 159 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 9959711..f69df99 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -135,6 +135,10 @@ function getBodyXML(chapter, book, contentEl) {
   // Fix recurring strange pattern of extra <br> in <p>...<em>...<br>\n</em></p>
   xml = xml.replace(/<br \/>\s*<\/em><\/p>/g, "</em></p>");
 
+  // Replace single-word <i>s with <em>s. Other <i>s are probably erroneous too, but these are known-bad.
+  xml = xml.replace(/<i>([^ ]+)<\/i>/g, "<em>$1</em>");
+  xml = xml.replace(/<i>([^ ]+)( +)<\/i>/g, "<em>$1</em>$2");
+
   // There are way too many nonbreaking spaces where they don't belong.
   // If they show up three in a row, then let them live. Otherwise, they die.
   // Also remove any run of them after a period.
@@ -192,8 +196,6 @@ function getBodyXML(chapter, book, contentEl) {
   fixEms();
   fixQuotesAndApostrophes();
   fixEms();
-  fixQuotesAndApostrophes();
-  fixEms();
   xml = xml.replace(/I”m/g, "I’m");
 
   // Similar problems occur in Ward with <b> and <strong> as do in Worm with <em>s
@@ -206,9 +208,155 @@ function getBodyXML(chapter, book, contentEl) {
   xml = xml.replace(/(\s*)<\/strong>/g, "</strong>$1");
 
   // No need for line breaks before paragraph ends
-  // These often occur with the <br>s inside <b>/<strong> fixed above.
+  // These often occur with the <br>s inside <b>/<strong>/<em>/<i> fixed above.
   xml = xml.replace(/<br \/>\s*<\/p>/g, "</p>");
 
+  // Fix missing spaces after commas
+  xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/g, "$1, $2");
+
+  // Fix bad periods and spacing/markup surrounding them
+  xml = xml.replace(/\.\.<\/p>/g, ".</p>");
+  xml = xml.replace(/\.\.”<\/p>/g, ".”</p>");
+  xml = xml.replace(/ \. /g, ". ");
+  xml = xml.replace(/ \.<\/p>/g, ".</p>");
+  xml = xml.replace(/\.<em>\.\./g, "<em>…");
+
+  // Fix extra spaces
+  xml = xml.replace(/ ? <\/p>/g, "</p>");
+  xml = xml.replace(/([a-z]) ,/g, "$1,");
+
+  xml = fixDialogueTags(xml);
+  xml = fixForeignNames(xml);
+  xml = fixEmDashes(xml);
+  xml = enDashJointNames(xml);
+  xml = fixPossessives(xml);
+  xml = cleanSceneBreaks(xml);
+  xml = fixCapitalization(xml, book);
+  xml = fixMispellings(xml);
+  xml = fixHyphens(xml);
+  xml = standardizeSpellings(xml);
+
+  // One-off fixes
+  for (const substitution of substitutions[chapter.url] || []) {
+    if (substitution.before) {
+      const indexOf = xml.indexOf(substitution.before);
+      if (indexOf === -1) {
+        warnings.push(`Could not find text "${substitution.before}" in ${chapter.url}. The chapter may have been ` +
+                      `updated at the source, in which case, you should edit substitutions.json.`);
+      }
+      if (indexOf !== xml.lastIndexOf(substitution.before)) {
+        warnings.push(`The text "${substitution.before}" occurred twice, and so the substitution was ambiguous. ` +
+                      `Update substitutions.json for a more precise substitution.`);
+      }
+
+      xml = xml.replace(new RegExp(escapeRegExp(substitution.before)), substitution.after);
+    } else if (substitution.regExp) {
+      xml = xml.replace(new RegExp(substitution.regExp, "g"), substitution.replacement);
+    } else {
+      warnings.push(`Invalid substitution specified for ${chapter.url}`);
+    }
+  }
+
+  // Serializer inserts extra xmlns for us since it doesn't know we're going to put this into a <html>.
+  // Use this opportunity to insert a comment pointing to the original URL, for reference.
+  xml = xml.replace(
+    /<body xmlns="http:\/\/www.w3.org\/1999\/xhtml">/,
+    `<body>\n<!-- ${chapter.url} -->\n`);
+
+  return { xml, warnings };
+}
+
+function fixDialogueTags(xml) {
+  // Fix recurring miscapitalization with questions
+  xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");
+  xml = xml.replace(/\?”\s\s?He asked/g, "?” he asked");
+
+  // The author often fails to terminate a sentence, instead using a comma after a dialogue tag. For example,
+  // > “I didn’t get much done,” Greg said, “I got distracted by...
+  // This should instead be
+  // > “I didn’t get much done,” Greg said. “I got distracted by...
+  //
+  // Our heuristic is to try to automatically fix this if the dialogue tag is two words (X said/admitted/sighed/etc.).
+  //
+  // This sometimes overcorrects, as in the following example:
+  // > “Basically,” Alec said, “For your powers to manifest, ...
+  // Here instead we should lowercase the "f". We handle that via one-offs in substitutions.json.
+  //
+  // This applies to ~800 instances, so although we have to correct back in substitutions.json a decent number of
+  // times, it definitely pays for itself. Most of the instances we have to correct back we also need to fix the
+  // capitalization anyway, and that's harder to do automatically, since proper names/"I"/etc. stay capitalized.
+  xml = xml.replace(/,” ([A-Za-z]+ [A-Za-z]+), “([A-Z])/g, ",” $1. “$2");
+
+  return xml;
+}
+
+function fixForeignNames(xml) {
+  // This is consistently missing diacritics
+  xml = xml.replace(/Yangban/g, "Yàngbǎn");
+
+  // These are usually not italicized, but sometimes are. Other foreign-language names (like Yàngbǎn) are not
+  // italicized, so we go in the direction of removing the italics.
+  xml = xml.replace(/<em>Garama<\/em>/g, "Garama");
+  xml = xml.replace(/<em>Thanda<\/em>/g, "Thanda");
+  xml = xml.replace(/<em>Sifara([^<]*)<\/em>/g, "Sifara$1");
+  xml = xml.replace(/<em>Moord Nag([^<]*)<\/em>/g, "Moord Nag$1");
+  xml = xml.replace(/<em>Califa de Perro([^<]*)<\/em>/g, "Califa de Perro$1");
+  xml = xml.replace(/<em>Turanta([^<]*)<\/em>/g, "Turanta$1");
+
+  return xml;
+}
+
+function fixEmDashes(xml) {
+  xml = xml.replace(/ – /g, "—");
+  xml = xml.replace(/“((?:<em>)?)-/g, "“$1—");
+  xml = xml.replace(/-[,.]?”/g, "—”");
+  xml = xml.replace(/-(!|\?)”/g, "—$1”");
+  xml = xml.replace(/-[,.]?<\/em>”/g, "—</em>”");
+  xml = xml.replace(/-“/g, "—”");
+  xml = xml.replace(/<p>-/g, "<p>—");
+  xml = xml.replace(/-<\/p>/g, "—</p>");
+  xml = xml.replace(/-<\/em><\/p>/g, "—</em></p>");
+  xml = xml.replace(/\s?\s?–\s?\s?/g, "—");
+  xml = xml.replace(/-\s\s?/g, "—");
+  xml = xml.replace(/\s?\s-/g, "—");
+  xml = xml.replace(/\s+—”/g, "—”");
+  xml = xml.replace(/I-I/g, "I—I");
+  xml = xml.replace(/I-uh/g, "I—uh");
+
+  return xml;
+}
+
+function enDashJointNames(xml) {
+  // Joint names should use en dashes
+  xml = xml.replace(/Dallon-Pelham/g, "Dallon–Pelham");
+  xml = xml.replace(/Bet-Gimel/g, "Bet–Gimel");
+  xml = xml.replace(/Tristan-Capricorn/g, "Tristan–Capricorn");
+  xml = xml.replace(/Capricorn-Byron/g, "Capricorn–Byron");
+  xml = xml.replace(/Tristan-Byron/g, "Tristan–Byron");
+  xml = xml.replace(/Gimel-Europe/g, "Gimel–Europe");
+  xml = xml.replace(/G-N/g, "G–N");
+  xml = xml.replace(/Imp-Damsel/g, "Imp–Damsel");
+  xml = xml.replace(/Damsel-Ashley/g, "Damsel–Ashley");
+  xml = xml.replace(/Antares-Anelace/g, "Antares–Anelace");
+  xml = xml.replace(/Challenger-Gallant/g, "Challenger–Gallant");
+  xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/g, "Undersider$1–$2");
+  xml = xml.replace(/Norwalk-Fairfield/g, "Norwalk–Fairfield");
+  xml = xml.replace(/East-West/g, "east–west");
+  xml = xml.replace(/(Green|Yellow)-Black/g, "$1–Black");
+  xml = xml.replace(/Creutzfeldt-Jakob/g, "Creutzfeldt–Jakob");
+  xml = xml.replace(/Astaroth-Nidhug/g, "Astaroth–Nidhug");
+  xml = xml.replace(/Capulet-Montague/g, "Capulet–Montague");
+  xml = xml.replace(/Weaver-Clockblocker/g, "Weaver–Clockblocker");
+  xml = xml.replace(/Alexandria-Pretender/g, "Alexandria–Pretender");
+  xml = xml.replace(/Night Hag-Nyx/g, "Night Hag–Nyx");
+  xml = xml.replace(/Crawler-Breed/g, "Crawler–Breed");
+  xml = xml.replace(/Simurgh-Myrddin-plant/g, "Simurgh–Myrddin–plant");
+  xml = xml.replace(/Armsmaster-Defiant/g, "Armsmaster–Defiant");
+
+  return xml;
+}
+
+function fixPossessives(xml) {
   // Fix possessive of names ending in "s"
   // Note: if the "s" is unvoiced, as in Marquis, then it doesn't get the second "s".
   xml = xml.replace(/([^‘])Judas’([^s])/g, "$1Judas’s$2");
@@ -246,62 +394,15 @@ function getBodyXML(chapter, book, contentEl) {
   xml = xml.replace(/([^‘])Ms. Stillons’([^s])/g, "$1Ms. Stillons’s$2");
   xml = xml.replace(/([^‘])Chuckles’([^s])/g, "$1Chuckles’s$2");
 
-  // Fixes dashes
-  xml = xml.replace(/ – /g, "—");
-  xml = xml.replace(/“((?:<em>)?)-/g, "“$1—");
-  xml = xml.replace(/-[,.]?”/g, "—”");
-  xml = xml.replace(/-(!|\?)”/g, "—$1”");
-  xml = xml.replace(/-[,.]?<\/em>”/g, "—</em>”");
-  xml = xml.replace(/-“/g, "—”");
-  xml = xml.replace(/<p>-/g, "<p>—");
-  xml = xml.replace(/-<\/p>/g, "—</p>");
-  xml = xml.replace(/-<\/em><\/p>/g, "—</em></p>");
-  xml = xml.replace(/\s?\s?–\s?\s?/g, "—");
-  xml = xml.replace(/-\s\s?/g, "—");
-  xml = xml.replace(/\s?\s-/g, "—");
-  xml = xml.replace(/\s+—”/g, "—”");
-  xml = xml.replace(/I-I/g, "I—I");
-  xml = xml.replace(/I-uh/g, "I—uh");
-
-  // "X-year-old" should use hyphens; all grammar guides agree. The books are very inconsistent but most often omit
-  // them.
-  xml = xml.replace(/(\w+)[ -]year[ -]old(s?)(?!\w)/g, "$1-year-old$2");
-  xml = xml.replace(/(\w+) or (\w+)-year-old/g, "$1- or $2-year-old");
-
-  // Fix missing spaces after commas
-  xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/g, "$1, $2");
-
-  // Joint names should use em dashes
-  xml = xml.replace(/Dallon-Pelham/g, "Dallon–Pelham");
-  xml = xml.replace(/Bet-Gimel/g, "Bet–Gimel");
-  xml = xml.replace(/Tristan-Capricorn/g, "Tristan–Capricorn");
-  xml = xml.replace(/Capricorn-Byron/g, "Capricorn–Byron");
-  xml = xml.replace(/Tristan-Byron/g, "Tristan–Byron");
-  xml = xml.replace(/Gimel-Europe/g, "Gimel–Europe");
-  xml = xml.replace(/G-N/g, "G–N");
-  xml = xml.replace(/Imp-Damsel/g, "Imp–Damsel");
-  xml = xml.replace(/Damsel-Ashley/g, "Damsel–Ashley");
-  xml = xml.replace(/Antares-Anelace/g, "Antares–Anelace");
-  xml = xml.replace(/Challenger-Gallant/g, "Challenger–Gallant");
-  xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/g, "Undersider$1–$2");
-  xml = xml.replace(/Norwalk-Fairfield/g, "Norwalk–Fairfield");
-  xml = xml.replace(/East-West/g, "east–west");
-  xml = xml.replace(/(Green|Yellow)-Black/g, "$1–Black");
-  xml = xml.replace(/Creutzfeldt-Jakob/g, "Creutzfeldt–Jakob");
-  xml = xml.replace(/Astaroth-Nidhug/g, "Astaroth–Nidhug");
-  xml = xml.replace(/Capulet-Montague/g, "Capulet–Montague");
-  xml = xml.replace(/Weaver-Clockblocker/g, "Weaver–Clockblocker");
-  xml = xml.replace(/Alexandria-Pretender/g, "Alexandria–Pretender");
-  xml = xml.replace(/Night Hag-Nyx/g, "Night Hag–Nyx");
-  xml = xml.replace(/Crawler-Breed/g, "Crawler–Breed");
-  xml = xml.replace(/Simurgh-Myrddin-plant/g, "Simurgh–Myrddin–plant");
-  xml = xml.replace(/Armsmaster-Defiant/g, "Armsmaster–Defiant");
+  return xml;
+}
 
+function cleanSceneBreaks(xml) {
   // Normalize scene breaks. <hr> would be more semantically appropriate, but loses the author's intent. This is
   // especially the case in Ward, which uses a variety of different scene breaks.
+
   xml = xml.replace(/<p(?:[^>]*)>■<\/p>/g, `<p style="text-align: center;">■</p>`);
 
-  xml = xml.replace(/<p style="text-align: center;">⊙<\/p>/g, `<p style="text-align: center;">⊙</p>`);
   xml = xml.replace(/<p style="text-align: center;"><strong>⊙<\/strong><\/p>/g, `<p style="text-align: center;">⊙</p>`);
   xml = xml.replace(/<p style="text-align: center;"><em><strong>⊙<\/strong><\/em><\/p>/g,
     `<p style="text-align: center;">⊙</p>`);
@@ -311,40 +412,10 @@ function getBodyXML(chapter, book, contentEl) {
   xml = xml.replace(/<p style="text-align: center;"><strong>⊙ *⊙ *⊙ *⊙ *⊙<\/strong><\/p>/g,
     `<p style="text-align: center;">⊙ ⊙ ⊙ ⊙ ⊙</p>`);
 
-  // Fix recurring miscapitalization with questions
-  xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");
-  xml = xml.replace(/\?”\s\s?He asked/g, "?” he asked");
-
-  // Fix bad periods and spacing/markup surrounding them
-  xml = xml.replace(/\.\.<\/p>/g, ".</p>");
-  xml = xml.replace(/\.\.”<\/p>/g, ".”</p>");
-  xml = xml.replace(/ \. /g, ". ");
-  xml = xml.replace(/ \.<\/p>/g, ".</p>");
-  xml = xml.replace(/\.<em>\.\./g, "<em>…");
-
-  // Fix extra spaces
-  xml = xml.replace(/ ? <\/p>/g, "</p>");
-  xml = xml.replace(/([a-z]) ,/g, "$1,");
-
-  // The author often fails to terminate a sentence, instead using a comma after a dialogue tag. For example,
-  // > “I didn’t get much done,” Greg said, “I got distracted by...
-  // This should instead be
-  // > “I didn’t get much done,” Greg said. “I got distracted by...
-  //
-  // Our heuristic is to try to automatically fix this if the dialogue tag is two words (X said/admitted/sighed/etc.).
-  //
-  // This sometimes overcorrects, as in the following example:
-  // > “Basically,” Alec said, “For your powers to manifest, ...
-  // Here instead we should lowercase the "f". We handle that via one-offs in substitutions.json.
-  //
-  // This applies to ~800 instances, so although we have to correct back in substitutions.json a decent number of
-  // times, it definitely pays for itself. Most of the instances we have to correct back we also need to fix the
-  // capitalization anyway, and that's harder to do automatically, since proper names/"I"/etc. stay capitalized.
-  xml = xml.replace(/,” ([A-Za-z]+ [A-Za-z]+), “([A-Z])/g, ",” $1. “$2");
-
-  // Replace single-word <i>s with <em>s. Other <i>s are probably erroneous too, but these are known-bad.
-  xml = xml.replace(/<i>([A-Za-z]+)<\/i>/g, "<em>$1</em>");
+  return xml;
+}
 
+function fixCapitalization(xml, book) {
   // This occurs enough times it's better to do here than in one-off fixes. We correct the single instance where
   // it's incorrect to capitalize in the one-off fixes.
   // Note that Ward contains much talk of "the clairvoyants", so we don't want to capitalize plurals.
@@ -366,13 +437,6 @@ function getBodyXML(chapter, book, contentEl) {
   // This is sometimes missing its capitalization.
   xml = xml.replace(/the birdcage/g, "the Birdcage");
 
-  // This is usually spelled "TV" but sometimes the other ways. Normalize.
-  xml = xml.replace(/(\b)tv(\b)/g, "$1TV$2");
-  xml = xml.replace(/t\.v\./ig, "TV");
-
-  // This is commonly misspelled.
-  xml = xml.replace(/([Ss])houlderblade/g, "$1houlder blade");
-
   // There's no reason why these should be capitalized. (Note that they never appear at the beginning of any sentences.)
   xml = xml.replace(/Halberd/g, "halberd");
   xml = xml.replace(/Loft/g, "loft");
@@ -397,9 +461,6 @@ function getBodyXML(chapter, book, contentEl) {
     "$1–$2"
   );
 
-  // This is consistently missing accents
-  xml = xml.replace(/Yangban/g, "Yàngbǎn");
-
   // Place names need to always be capitalized
   xml = xml.replace(/North end/g, "North End");
   xml = xml.replace(/(Stonemast|Shale) avenue/g, "$1 Avenue");
@@ -409,14 +470,48 @@ function getBodyXML(chapter, book, contentEl) {
   xml = xml.replace(/the megalopolis/g, "the Megalopolis");
   xml = xml.replace(/earths(?![a-z])/g, "Earths");
 
-  // These are usually not italicized, but sometimes are. Other foreign-language names (like Yàngbǎn) are not
-  // italicized, so we go in the direction of removing the italics.
-  xml = xml.replace(/<em>Garama<\/em>/g, "Garama");
-  xml = xml.replace(/<em>Thanda<\/em>/g, "Thanda");
-  xml = xml.replace(/<em>Sifara([^<]*)<\/em>/g, "Sifara$1");
-  xml = xml.replace(/<em>Moord Nag([^<]*)<\/em>/g, "Moord Nag$1");
-  xml = xml.replace(/<em>Califa de Perro([^<]*)<\/em>/g, "Califa de Perro$1");
-  xml = xml.replace(/<em>Turanta([^<]*)<\/em>/g, "Turanta$1");
+  // "Mom" and "Dad" should be capitalized when used as a proper name. These regexps are tuned to catch a good amount of
+  // instances, without over-correcting for non-proper-name-like cases. Many other instances are handled in
+  // substitutions.json.
+  xml = xml.replace(/(?<!mom), dad(?![a-z])/g, ", Dad");
+  xml = xml.replace(/, mom(?![a-z-])/g, ", Mom");
+
+  return xml;
+}
+
+function fixMispellings(xml) {
+  // This is commonly misspelled.
+  xml = xml.replace(/([Ss])houlderblade/g, "$1houlder blade");
+
+  // Preemptive(ly) is often hyphenated (not always). It should not be.
+  xml = xml.replace(/([Pp])re-emptive/g, "$1reemptive");
+
+  return xml;
+}
+
+function fixHyphens(xml) {
+  // "X-year-old" should use hyphens; all grammar guides agree. The books are very inconsistent but most often omit
+  // them.
+  xml = xml.replace(/(\w+)[ -]year[ -]old(s?)(?!\w)/g, "$1-year-old$2");
+  xml = xml.replace(/(\w+) or (\w+)-year-old/g, "$1- or $2-year-old");
+
+  // These are consistently missing hyphens.
+  xml = xml.replace(/self destruct/g, "self-destruct");
+  xml = xml.replace(/life threatening/g, "life-threatening");
+  xml = xml.replace(/hard headed/g, "hard-headed");
+  xml = xml.replace(/shoulder mounted/g, "shoulder-mounted");
+  xml = xml.replace(/golden skinned/g, "golden-skinned");
+  xml = xml.replace(/creepy crawl/g, "creepy-crawl");
+  xml = xml.replace(/well armed/g, "well-armed");
+  xml = xml.replace(/able bodied/g, "able-bodied");
+
+  return xml;
+}
+
+function standardizeSpellings(xml) {
+  // This is usually spelled "TV" but sometimes the other ways. Normalize.
+  xml = xml.replace(/(\b)tv(\b)/g, "$1TV$2");
+  xml = xml.replace(/t\.v\./ig, "TV");
 
   // "okay" is preferred to "ok" or "o.k.". This sometimes gets changed back via substitutions.json when people are
   // writing notes and thus probably the intention was to be less formal. Also it seems per
@@ -438,53 +533,7 @@ function getBodyXML(chapter, book, contentEl) {
   // "gray" is the majority spelling, except for "greyhound"
   xml = xml.replace(/(G|g)rey(?!hound)/g, "$1ray");
 
-  // "Mom" and "Dad" should be capitalized when used as a proper name. These regexps are tuned to catch a good amount of
-  // instances, without over-correcting for non-proper-name-like cases. Many other instances are handled in
-  // substitutions.json.
-  xml = xml.replace(/(?<!mom), dad(?![a-z])/g, ", Dad");
-  xml = xml.replace(/, mom(?![a-z-])/g, ", Mom");
-
-  // These are consistently missing hyphens.
-  xml = xml.replace(/self destruct/g, "self-destruct");
-  xml = xml.replace(/life threatening/g, "life-threatening");
-  xml = xml.replace(/hard headed/g, "hard-headed");
-  xml = xml.replace(/shoulder mounted/g, "shoulder-mounted");
-  xml = xml.replace(/golden skinned/g, "golden-skinned");
-  xml = xml.replace(/creepy crawl/g, "creepy-crawl");
-  xml = xml.replace(/well armed/g, "well-armed");
-  xml = xml.replace(/able bodied/g, "able-bodied");
-
-  // Preemptive(ly) is often hyphenated (not always). It should not be.
-  xml = xml.replace(/([Pp])re-emptive/g, "$1reemptive");
-
-  // One-off fixes
-  (substitutions[chapter.url] || []).forEach(substitution => {
-    if (substitution.before) {
-      const indexOf = xml.indexOf(substitution.before);
-      if (indexOf === -1) {
-        warnings.push(`Could not find text "${substitution.before}" in ${chapter.url}. The chapter may have been ` +
-                      `updated at the source, in which case, you should edit substitutions.json.`);
-      }
-      if (indexOf !== xml.lastIndexOf(substitution.before)) {
-        warnings.push(`The text "${substitution.before}" occurred twice, and so the substitution was ambiguous. ` +
-                      `Update substitutions.json for a more precise substitution.`);
-      }
-
-      xml = xml.replace(new RegExp(escapeRegExp(substitution.before)), substitution.after);
-    } else if (substitution.regExp) {
-      xml = xml.replace(new RegExp(substitution.regExp, "g"), substitution.replacement);
-    } else {
-      warnings.push(`Invalid substitution specified for ${chapter.url}`);
-    }
-  });
-
-  // Serializer inserts extra xmlns for us since it doesn't know we're going to put this into a <html>.
-  // Use this opportunity to insert a comment pointing to the original URL, for reference.
-  xml = xml.replace(
-    /<body xmlns="http:\/\/www.w3.org\/1999\/xhtml">/,
-    `<body>\n<!-- ${chapter.url} -->\n`);
-
-  return { xml, warnings };
+  return xml;
 }
 
 function isEmptyOrGarbage(el) {
diff --git a/lib/substitutions.json b/lib/substitutions.json
index dcfbf93..ef2bc4f 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2448,12 +2448,6 @@
       "after": "all the way up to the nape of my neck"
     }
   ],
-  "https://parahumans.wordpress.com/2012/12/13/interlude-16-donation-bonus-2/": [
-    {
-      "before": "<i>“</i>",
-      "after": "”"
-    }
-  ],
   "https://parahumans.wordpress.com/2012/12/15/monarch-16-7/": [
     {
       "before": "Brockton bay",
@@ -2516,6 +2510,10 @@
     {
       "before": "ten,” I asked. “Just",
       "after": "ten,” I asked, “just"
+    },
+    {
+      "before": "weeks, <i>months.  </i>Anticipating",
+      "after": "weeks, <em>months</em>.  Anticipating"
     }
   ],
   "https://parahumans.wordpress.com/2013/01/08/migration-17-1/": [
@@ -2654,7 +2652,7 @@
   ],
   "https://parahumans.wordpress.com/2013/01/15/migration-17-8/": [
     {
-      "before": "replied.  <i>Need to talk about being more secure with our names.  “</i>What’s going on?<i>“</i>",
+      "before": "replied.  <i>Need to talk about being more secure with our names.  “</i>What’s going on?”",
       "after": "replied.  <em>Need to talk about being more secure with our names.</em>  “What’s going on?”"
     },
     {
@@ -3356,6 +3354,10 @@
     {
       "before": "aren’t allies.",
       "after": "aren’t allies.”"
+    },
+    {
+      "before": "<i>Blameful?  </i>“Guilty",
+      "after": "<em>Blameful?</em>  “Guilty"
     }
   ],
   "https://parahumans.wordpress.com/2013/06/20/crushed-24-5/": [
@@ -4547,6 +4549,10 @@
     {
       "before": "dangerous?” The recluse asked",
       "after": "dangerous?” the recluse asked"
+    },
+    {
+      "before": "<i>Stop.  </i>Please.",
+      "after": "<em>Stop.</em>  Please."
     }
   ],
   "https://www.parahumans.net/2018/02/08/shade-4-4/": [