diff --git a/lib/convert.js b/lib/convert.js index c82a6b5..2684748 100644 --- a/lib/convert.js +++ b/lib/convert.js @@ -203,8 +203,12 @@ function getBodyXML(chapter, contentEl) { xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked"); xml = xml.replace(/\?”\s\s?He asked/g, "?” he asked"); - // Fix extra periods at the end of paragraphs + // Fix bad periods and spacing/markup surrounding them xml = xml.replace(/\.\.<\/p>/g, ".
"); + xml = xml.replace(/\.\.”<\/p>/g, ".”"); + xml = xml.replace(/ \. /g, ". "); + xml = xml.replace(/ \.<\/p>/g, "."); + xml = xml.replace(/\.\.\./g, "…"); // The author often fails to terminate a sentence, instead using a comma after a dialogue tag. For example, // > “I didn’t get much done,” Greg said, “I got distracted by... @@ -232,8 +236,9 @@ function getBodyXML(chapter, contentEl) { // This is sometimes missing its capitalization xml = xml.replace(/the birdcage/g, "the Birdcage"); - // There's no reason why this should be capitalized. (Note that it never appears at the beginning of any sentences.) + // There's no reason why these should be capitalized. (Note that they never appear at the beginning of any sentences.) xml = xml.replace(/Halberd/g, "halberd"); + xml = xml.replace(/Loft/g, "loft"); // Especially early in the story, PRT designations are capitalized; they should not be. This fixes the cases where we // can be reasonably sure they don't start a sentence, although more specific instances are done in @@ -277,22 +282,29 @@ function getBodyXML(chapter, contentEl) { // Signal(l)ing/signal(l)ed are spelled both ways. Both are acceptable in English. Let's standardize on single-L. xml = xml.replace(/(S|s)ignall/g, "$1ignal"); - // This is consistently missing hyphens. + // These are consistently missing hyphens. xml = xml.replace(/shoulder mounted/g, "shoulder-mounted"); + xml = xml.replace(/golden skinned/, "golden-skinned"); // One-off fixes (substitutions[chapter.url] || []).forEach(substitution => { - const indexOf = xml.indexOf(substitution.before); - if (indexOf === -1) { - console.warn(`Could not find text "${substitution.before}" in ${chapter.url}. The chapter may have been ` + - `updated at the source, in which case, you should edit substitutions.json.`); - } - if (indexOf !== xml.lastIndexOf(substitution.before)) { - console.warn(`The text "${substitution.before}" occurred twice, and so the substitution was ambiguous. ` + - `Update substitutions.json for a more precise substitution.`); - } + if (substitution.before) { + const indexOf = xml.indexOf(substitution.before); + if (indexOf === -1) { + console.warn(`Could not find text "${substitution.before}" in ${chapter.url}. The chapter may have been ` + + `updated at the source, in which case, you should edit substitutions.json.`); + } + if (indexOf !== xml.lastIndexOf(substitution.before)) { + console.warn(`The text "${substitution.before}" occurred twice, and so the substitution was ambiguous. ` + + `Update substitutions.json for a more precise substitution.`); + } - xml = xml.replace(new RegExp(escapeRegExp(substitution.before)), substitution.after); + xml = xml.replace(new RegExp(escapeRegExp(substitution.before)), substitution.after); + } else if (substitution.regExp) { + xml = xml.replace(new RegExp(substitution.regExp, "g"), substitution.replacement); + } else { + console.warn(`Invalid substitution specified for ${chapter.url}`); + } }); // Serializer inserts extra xmlns for us since it doesn't know we're going to put this into a diff --git a/lib/substitutions.json b/lib/substitutions.json index f000f7c..a34f23f 100644 --- a/lib/substitutions.json +++ b/lib/substitutions.json @@ -78,6 +78,10 @@ { "before": "simply, “Bug”", "after": "simply, ‘Bug’." + }, + { + "before": "powers: He could", + "after": "powers: he could" } ], "https://parahumans.wordpress.com/2011/07/12/insinuation-2-3/": [ @@ -276,6 +280,10 @@ { "before": "Not so unusual,", "after": "Not so unusual;" + }, + { + "before": "offer is this: Let me", + "after": "offer is this: let me" } ], "https://parahumans.wordpress.com/2011/10/15/shell-4-7/": [ @@ -450,6 +458,10 @@ { "before": "South", "after": "south" + }, + { + "before": "news: A tally", + "after": "news: a tally" } ], "https://parahumans.wordpress.com/2011/12/20/tangle-6-3/": [ @@ -726,6 +738,10 @@ { "before": "East", "after": "east" + }, + { + "before": "listened to,", + "after": "listened to." } ], "https://parahumans.wordpress.com/2012/03/06/extermination-8-2/": [ @@ -736,9 +752,25 @@ { "before": "West", "after": "west" + }, + { + "before": "And villains, too", + "after": "and villains, too" + }, + { + "before": "featureless plain", + "after": "featureless plane" } ], "https://parahumans.wordpress.com/2012/03/08/interlude-7%C2%BD-bonus/": [ + { + "before": "‘boyfriend’; Not", + "after": "‘boyfriend’; not" + }, + { + "before": "thought & the", + "after": "thought and the" + }, { "before": "legs: Suit", "after": "legs: suit" @@ -758,6 +790,104 @@ { "before": "arms: Irritation", "after": "arms: irritation" + }, + { + "before": "goodwill bin", + "after": "Goodwill bin" + }, + { + "before": "chedcked", + "after": "checked" + }, + { + "before": "An envelope should arrive", + "after": "an envelope should arrive" + }, + { + "before": "Which", + "after": "Which" + }, + { + "before": "spoke, “My supervisor", + "after": "spoke. “My supervisor" + }, + { + "before": "serious, “Turn", + "after": "serious. “Turn" + }, + { + "before": "A third person gravely injured", + "after": "A third person was gravely injured" + }, + { + "regExp": "\n\\s*([^<]+)($1
" + }, + { + "regExp": "\n([^<]+)(
|
$1
" + }, + { + "regExp": "\n([^<\n]+)
\n([^>\n]+)
\n",
+ "replacement": "\n
$1
\n$2
\n" + } + ], + "https://parahumans.wordpress.com/2012/03/10/extermination-8-3/": [ + { + "before": "Icouldn’t", + "after": "I couldn’t" + }, + { + "before": "CD-5…", + "after": "CD-5…" + }, + { + "before": "CD-5", + "after": "CD-5." + }, + { + "before": "Brigandine deceased, CD-5.
", + "after": "Brigandine deceased, CD-5.
" + }, + { + "before": "deceased, CD-6.", + "after": "deceased, CD-6." + }, + { + "before": "CD-6, The armbands", + "after": "CD-6, the armbands" + }, + { + "before": "CD-6", + "after": "CD-6." + }, + { + "before": "CD-6.", + "after": "CD-6." + }, + { + "before": "pushing.", + "after": "pushing." + }, + { + "before": " Fuck", + "after": " Fuck" + }, + { + "before": " Losses", + "after": " Losses" + }, + { + "before": "Narwhal,", + "after": "Narwhal," + }, + { + "before": "Tattletale…", + "after": "Tattletale…" + }, + { + "before": "reeled—He", + "after": "reeled—he" } ], "https://parahumans.wordpress.com/2012/03/13/extermination-8-4/": [ @@ -792,6 +922,34 @@ { "before": "half South", "after": "half south" + }, + { + "before": "impression It had", + "after": "impression it had" + }, + { + "before": "Aegis deceased, CD-6", + "after": "Aegis deceased, CD-6" + }, + { + "before": "down, CC-6.", + "after": "down, CC-6." + }, + { + "before": "Kid win", + "after": "Kid Win" + }, + { + "before": "of ‘a few blocks’.", + "after": "of ‘a few blocks’." + }, + { + "before": "Negative.", + "after": "Negative." + }, + { + "before": "up.", + "after": "up." } ], "https://parahumans.wordpress.com/2012/03/17/extermination-8-5/": [ @@ -806,6 +964,60 @@ { "before": "Master 5", "after": "master five" + }, + { + "before": "down, BW-8", + "after": "down, BW-8." + }, + { + "before": "deceased, BW-8", + "after": "deceased, BW-8." + }, + { + "before": "deceased, CB-10.", + "after": "deceased, CB-10." + }, + { + "before": "Defensive perimeter, report.", + "after": "Defensive perimeter, report." + }, + { + "before": "Enemy location unknown,", + "after": "Enemy location unknown," + }, + { + "before": "Dad.", + "after": "Dad." + }, + { + "before": "The girl.", + "after": "The girl." + }, + { + "before": "goggles, it", + "after": "goggles. It" + }, + { + "before": "old, dirt and blood", + "after": "old dir and blood" + } + ], + "https://parahumans.wordpress.com/2012/03/20/extermination-8-6/": [ + { + "before": "and It—it’s", + "after": "and it—it’s" + }, + { + "before": "glaring at me, “There’s", + "after": "glaring at me. “There’s" + }, + { + "before": "Ah.", + "after": "Ah." + }, + { + "before": "“All of us?”", + "after": "“All of us?”" } ], "https://parahumans.wordpress.com/2012/03/24/extermination-8-7/": [ @@ -824,22 +1036,94 @@ { "before": "Master 5", "after": "Master five" - } - ], - "https://parahumans.wordpress.com/2012/03/08/interlude-7%c2%bd-bonus/": [ - { - "before": "‘boyfriend’; Not", - "after": "‘boyfriend’; not" }, { - "before": "thought & the", - "after": "thought and the" + "before": "This working? This on? Good.", + "after": "“This working? This on? Good.”" + }, + { + "before": "Acknowledged. Her", + "after": "Acknowledged, her" + }, + { + "before": "struck me—If", + "after": "struck me—if" + }, + { + "before": "went on, “Just", + "after": "went on, “just" + }, + { + "before": "her, “That", + "after": "her, “that" + }, + { + "before": "towards her, “Heroes died", + "after": "towards her. “Heroes died" + }, + { + "before": "we get get in", + "after": "we get in" + }, + { + "before": "at the heroine, “Let us", + "after": "at the heroine. “Let us" + }, + { + "before": "ignoring Tattletale, “Frustrated", + "after": "ignoring Tattletale. “Frustrated" + }, + { + "before": "humility, “If it", + "after": "humility. “If it" + }, + { + "before": "hand at me, “She’s", + "after": "hand at me, “she’s" + } + ], + "https://parahumans.wordpress.com/2012/03/27/extermination-8-8/": [ + { + "before": "stick—hawk or some other", + "after": "stick ‘hawk’ or some other" + }, + { + "before": "this: Knowing we had", + "after": "this: knowing we had" + }, + { + "before": "of her head, “And my", + "after": "of her head. “And my" } ], "https://parahumans.wordpress.com/2012/03/31/interlude-8/": [ { "before": "other: Another", "after": "other: another" + }, + { + "before": "reality: He", + "after": "reality: he" + }, + { + "before": "prius", + "after": "Prius" + }, + { + "before": "room,” it was", + "after": "room.” It was" + }, + { + "before": "handicap accessible", + "after": "handicap-accessible" + }, + { + "before": "“Hey,” her voice", + "after": "“Hey.” Her voice" + }, + { + "before": "Mr. Pitter, “The Travelers’", + "after": "Mr. Pitter. “The Travelers’" } ], "https://parahumans.wordpress.com/2012/04/03/cell-9-1/": [ @@ -990,6 +1274,12 @@ "after": "was… how" } ], + "https://parahumans.wordpress.com/2012/05/26/infestation-11-3/": [ + { + "before": "intimidating: A sea", + "after": "intimidating: a sea" + } + ], "https://parahumans.wordpress.com/2012/05/29/infestation-11-4/": [ { "before": "of facility‘", @@ -1420,6 +1710,12 @@ "after": "debate,” Kurt said, “people" } ], + "https://parahumans.wordpress.com/2012/12/18/monarch-16-8/": [ + { + "before": "decisions: Holding back", + "after": "decisions: holding back" + } + ], "https://parahumans.wordpress.com/2012/12/25/monarch-16-10/": [ { "before": "it,” Trickster said. “You", @@ -1541,10 +1837,6 @@ "before": "MWBB ", "after": "MWBB " }, - { - "before": "kill us..", - "after": "kill us." - }, { "before": "happens,” Krouse said. “Blame", "after": "happens,” Krouse said, “blame" @@ -1562,6 +1854,10 @@ { "before": "Well,” Krouse said. “Do", "after": "Well,” Krouse said, “do" + }, + { + "before": "the guy: A brown", + "after": "the guy: a brown" } ], "https://parahumans.wordpress.com/2013/01/15/migration-17-8/": [ @@ -1663,8 +1959,8 @@ ], "https://parahumans.wordpress.com/2013/02/07/interlude-18-donation-bonus-3/": [ { - "before": "But..", - "after": "But…" + "before": "fighting. But.", + "after": "fighting. But…" }, { "before": "r,s", @@ -1895,10 +2191,6 @@ "before": "’20th’", "after": "‘20th’" }, - { - "before": "...The reality", - "after": "…The reality" - }, { "before": "—A known murderer", "after": "—a known murderer" @@ -2155,6 +2447,12 @@ "after": "it,” Imp said, “she’s" } ], + "https://parahumans.wordpress.com/2013/06/06/interlude-23/": [ + { + "before": "her mouth: A click", + "after": "her mouth: a click" + } + ], "https://parahumans.wordpress.com/2013/06/08/crushed-24-1/": [ { "before": "both of you” I said", @@ -2848,10 +3146,6 @@ { "before": "moved Case fifty-threes", "after": "moved case fifty-threes" - }, - { - "before": "future sight of his .", - "after": "future sight of his." } ], "https://parahumans.wordpress.com/2013/10/26/speck-30-6/": [