diff --git a/lib/convert-worker.js b/lib/convert-worker.js index 0b7a59c..a943d8f 100644 --- a/lib/convert-worker.js +++ b/lib/convert-worker.js @@ -236,6 +236,7 @@ function getBodyXML(chapter, book, contentEl) { xml = fixMispellings(xml); xml = fixHyphens(xml); xml = standardizeSpellings(xml); + xml = fixCaseNumbers(xml); // One-off fixes for (const substitution of substitutions[chapter.url] || []) { @@ -563,6 +564,31 @@ function standardizeSpellings(xml) { return xml; } +function fixCaseNumbers(xml) { + // Case numbers are very inconsistent. For "Case Fifty-Three", the breakdown is: + // * 9 Case-53 + // * 6 Case 53 + // * 2 case-53 + // * 1 Case-Fifty-Three + // * 41 Case Fifty-Three + // * 1 Case Fifty Three + // * 13 Case fifty-three + // * 119 case fifty-three + // * 4 case-fifty-three + // * 1 case fifty three + // We standardize on "Case Fifty-Three"; although it isn't the most common, it seems best to treat these as proper + // nouns. + + xml = xml.replace(/case[ -](?:fifty[ -]three|53)(?!’)/ig, "Case Fifty-Three"); + xml = xml.replace(/case[ -](?:thirty[ -]two|53)(?!’)/ig, "Case Thirty-Two"); + xml = xml.replace(/case[ -](?:sixty[ -]nine|53)(?!’)/ig, "Case Sixty-Nine"); + + xml = xml.replace(/(? "Case " + caseNumber[0].toUpperCase() + caseNumber.substring(1)); + + return xml; +} + function isEmptyOrGarbage(el) { const text = el.textContent.trim(); return text === "" || diff --git a/lib/substitutions.json b/lib/substitutions.json index eb74acf..edab3e9 100644 --- a/lib/substitutions.json +++ b/lib/substitutions.json @@ -3723,10 +3723,6 @@ "before": "I suspect It’s a", "after": "I suspect it’s a" }, - { - "before": "Case Fifty-three", - "after": "case fifty-three" - }, { "before": "reality,” Tattletale said. “What’s", "after": "reality,” Tattletale said, “what’s" @@ -3751,10 +3747,6 @@ } ], "https://parahumans.wordpress.com/2013/09/17/interlude-28/": [ - { - "before": "Case fifty-threes", - "after": "case fifty-threes" - }, { "before": "than,” Revel paused. “Six", "after": "than,” Revel paused, “six" @@ -4045,14 +4037,6 @@ "before": "gun build", "after": "gun built" }, - { - "before": "hunchbacked Case fifty-three", - "after": "hunchbacked case fifty-three" - }, - { - "before": "moved Case fifty-threes", - "after": "moved case fifty-threes" - }, { "before": "I revoked my control over her, leaving in in the middle", "after": "I revoked my control over her, leaving her in the middle"