Settle on "Patrol <lowercase>" and "the Patrol"
This is reversing the decision made in 84e0d83bd1, and extending the revised decision to other Patrol-related terms.
This commit is contained in:
parent
6b5bedcbc7
commit
4fbaa0e8db
3 changed files with 17 additions and 11 deletions
|
|
@ -6,11 +6,11 @@ const substitutions = require("./substitutions.json");
|
|||
|
||||
workerpool.worker({ convertChapter });
|
||||
|
||||
function convertChapter(chapter, inputPath, outputPath) {
|
||||
function convertChapter(chapter, book, inputPath, outputPath) {
|
||||
const contents = fs.readFileSync(inputPath, { encoding: "utf-8" });
|
||||
|
||||
const rawChapterJSDOM = new JSDOM(contents);
|
||||
const output = getChapterString(chapter, rawChapterJSDOM.window.document);
|
||||
const output = getChapterString(chapter, book, rawChapterJSDOM.window.document);
|
||||
|
||||
// TODO: this should probably not be necessary... jsdom bug I guess!?
|
||||
rawChapterJSDOM.window.close();
|
||||
|
|
@ -18,8 +18,8 @@ function convertChapter(chapter, inputPath, outputPath) {
|
|||
fs.writeFileSync(outputPath, output);
|
||||
}
|
||||
|
||||
function getChapterString(chapter, rawChapterDoc) {
|
||||
const body = getBodyXML(chapter, rawChapterDoc.querySelector(".entry-content"));
|
||||
function getChapterString(chapter, book, rawChapterDoc) {
|
||||
const body = getBodyXML(chapter, book, rawChapterDoc.querySelector(".entry-content"));
|
||||
|
||||
return `<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
|
|
@ -32,7 +32,7 @@ ${body}
|
|||
</html>`;
|
||||
}
|
||||
|
||||
function getBodyXML(chapter, contentEl) {
|
||||
function getBodyXML(chapter, book, contentEl) {
|
||||
// Remove initial Next Chapter and Previous Chapter <p>
|
||||
contentEl.removeChild(contentEl.firstElementChild);
|
||||
|
||||
|
|
@ -337,9 +337,15 @@ function getBodyXML(chapter, contentEl) {
|
|||
// ReSound's name is sometimes miscapitalized. The word is never used in a non-name context.
|
||||
xml = xml.replace(/Resound/g, "ReSound");
|
||||
|
||||
// "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". I can see
|
||||
// arguments for any of them, so let's go with the most prevalent: "patrol block".
|
||||
xml = xml.replace(/([^ ]) Patrol (?:B|b)lock/g, "$1 patrol block");
|
||||
// "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". "patrol
|
||||
// group" is always lowercased. It seems like "Patrol" is a proper name, and is used as a capitalized modifier in
|
||||
// other contexts (e.g. Patrol leader). So let's standardize on "Patrol <lowercase>".
|
||||
xml = xml.replace(/patrol (block|group|leader|guard|student|uniform|squad|soldier|officer|crew|girl)/ig,
|
||||
(_, $1) => `Patrol ${$1.toLowerCase()}`);
|
||||
// This always works in Ward and has a few false positives in Worm, where it is never needed:
|
||||
if (book === "ward") {
|
||||
xml = xml.replace(/the patrol/g, "the Patrol");
|
||||
}
|
||||
|
||||
// This is sometimes missing its capitalization.
|
||||
xml = xml.replace(/the birdcage/g, "the Birdcage");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue