Settle on "Patrol <lowercase>" and "the Patrol"

This is reversing the decision made in 84e0d83bd1, and extending the revised decision to other Patrol-related terms.
This commit is contained in:
Domenic Denicola 2020-10-26 20:54:25 -04:00
commit 4fbaa0e8db
3 changed files with 17 additions and 11 deletions

View file

@ -6,11 +6,11 @@ const substitutions = require("./substitutions.json");
workerpool.worker({ convertChapter });
function convertChapter(chapter, inputPath, outputPath) {
function convertChapter(chapter, book, inputPath, outputPath) {
const contents = fs.readFileSync(inputPath, { encoding: "utf-8" });
const rawChapterJSDOM = new JSDOM(contents);
const output = getChapterString(chapter, rawChapterJSDOM.window.document);
const output = getChapterString(chapter, book, rawChapterJSDOM.window.document);
// TODO: this should probably not be necessary... jsdom bug I guess!?
rawChapterJSDOM.window.close();
@ -18,8 +18,8 @@ function convertChapter(chapter, inputPath, outputPath) {
fs.writeFileSync(outputPath, output);
}
function getChapterString(chapter, rawChapterDoc) {
const body = getBodyXML(chapter, rawChapterDoc.querySelector(".entry-content"));
function getChapterString(chapter, book, rawChapterDoc) {
const body = getBodyXML(chapter, book, rawChapterDoc.querySelector(".entry-content"));
return `<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
@ -32,7 +32,7 @@ ${body}
</html>`;
}
function getBodyXML(chapter, contentEl) {
function getBodyXML(chapter, book, contentEl) {
// Remove initial Next Chapter and Previous Chapter <p>
contentEl.removeChild(contentEl.firstElementChild);
@ -337,9 +337,15 @@ function getBodyXML(chapter, contentEl) {
// ReSound's name is sometimes miscapitalized. The word is never used in a non-name context.
xml = xml.replace(/Resound/g, "ReSound");
// "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". I can see
// arguments for any of them, so let's go with the most prevalent: "patrol block".
xml = xml.replace(/([^ ]) Patrol (?:B|b)lock/g, "$1 patrol block");
// "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". "patrol
// group" is always lowercased. It seems like "Patrol" is a proper name, and is used as a capitalized modifier in
// other contexts (e.g. Patrol leader). So let's standardize on "Patrol <lowercase>".
xml = xml.replace(/patrol (block|group|leader|guard|student|uniform|squad|soldier|officer|crew|girl)/ig,
(_, $1) => `Patrol ${$1.toLowerCase()}`);
// This always works in Ward and has a few false positives in Worm, where it is never needed:
if (book === "ward") {
xml = xml.replace(/the patrol/g, "the Patrol");
}
// This is sometimes missing its capitalization.
xml = xml.replace(/the birdcage/g, "the Birdcage");