fixed above.
- xml = xml.replace(/
\s*<\/p>/g, "");
+ xml = xml.replace(/
\s*<\/p>/g, "");
// Fix possessive of names ending in "s"
// Note: if the "s" is unvoiced, as in Marquis, then it doesn't get the second "s".
@@ -298,12 +298,12 @@ function getBodyXML(chapter, contentEl) {
// Use
for separators
// https://www.parahumans.net/2019/12/21/interlude-18-z-radiation/ has "super-separators" ("⊙ ⊙ ⊙ ⊙ ⊙") which we
// leave untouched for now.
- xml = xml.replace(/■<\/p>/g, "
");
- xml = xml.replace(/■<\/p>/g, "
");
- xml = xml.replace(/⊙<\/p>/g, "
");
- xml = xml.replace(/⊙<\/strong><\/p>/g, "
");
- xml = xml.replace(/⊙<\/strong><\/em><\/p>/g, "
");
- xml = xml.replace(/⊙⊙<\/strong><\/p>/g, "
");
+ xml = xml.replace(/■<\/p>/g, "
");
+ xml = xml.replace(/■<\/p>/g, "
");
+ xml = xml.replace(/⊙<\/p>/g, "
");
+ xml = xml.replace(/⊙<\/strong><\/p>/g, "
");
+ xml = xml.replace(/⊙<\/strong><\/em><\/p>/g, "
");
+ xml = xml.replace(/⊙⊙<\/strong><\/p>/g, "
");
// Fix recurring miscapitalization with questions
xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 0431645..4264abd 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -5,7 +5,7 @@
"after": "bugs: flies, ants"
},
{
- "before": "Brief note from the author: This story isn’t intended for young or sensitive readers. Readers who are on the lookout for trigger warnings are advised to give Worm a pass.
\n
\n",
+ "before": "Brief note from the author: This story isn’t intended for young or sensitive readers. Readers who are on the lookout for trigger warnings are advised to give Worm a pass.
\n
\n",
"after": "",
"_comment": "The pseudo-trigger warning is out of place in an eBook."
}
@@ -906,15 +906,15 @@
"after": "crash when the wave rolled"
},
{
- "regExp": "\n ?\\s*([^<]+)(
|
)",
+ "regExp": "\n ?\\s*([^<]+)(
|
)",
"replacement": "\n$1
"
},
{
- "regExp": "\n([^<]+)(
|
)",
+ "regExp": "\n([^<]+)(
|
)",
"replacement": "\n$1
"
},
{
- "regExp": "\n([^<\n]+)
\n([^>\n]+)
\n",
+ "regExp": "\n
([^<\n]+)
\n([^>\n]+)
\n",
"replacement": "\n
$1
\n$2
\n"
}
],
@@ -1770,15 +1770,15 @@
"replacement": ""
},
{
- "before": "agreed-upon confidentiality.
\n■ ",
+ "before": "agreed-upon confidentiality.
\n■ ",
"after": "agreed-upon confidentiality.
\n\n- "
},
{
- "regExp": "
\n■ ",
+ "regExp": "
\n■ ",
"replacement": " \n- "
},
{
- "before": "three, male.
\nBoth vials",
+ "before": "three, male.
\nBoth vials",
"after": "three, male. \n
\nBoth vials"
},
{
@@ -1904,24 +1904,24 @@
"after": "
Sweet Honey—
"
},
{
- "before": "Love me, love me, you know you wanna love me…
\nLove me, love me, you know you wanna love me…
",
- "after": "Love me, love me, you know you wanna love me…
\nLove me, love me, you know you wanna love me…
"
+ "before": "Love me, love me, you know you wanna love me…
\nLove me, love me, you know you wanna love me…
",
+ "after": "Love me, love me, you know you wanna love me…
\nLove me, love me, you know you wanna love me…
"
},
{
- "before": "Love me, you?
\nLove me, true?
",
- "after": "Love me, you?
\nLove me, true?
"
+ "before": "Love me, you?
\nLove me, true?
",
+ "after": "Love me, you?
\nLove me, true?
"
},
{
- "before": "Crazed, kooky, cracked, crazy,
\nNutty, barmy, mad for me…
",
- "after": "Crazed, kooky, cracked, crazy,
\nNutty, barmy, mad for me…
"
+ "before": "Crazed, kooky, cracked, crazy,
\nNutty, barmy, mad for me…
",
+ "after": "Crazed, kooky, cracked, crazy,
\nNutty, barmy, mad for me…
"
},
{
- "before": "Crazed, kooky, cracked, crazy,
\nMental, dotty, whacked, loopy…
",
- "after": "Crazed, kooky, cracked, crazy,
\nMental, dotty, whacked, loopy…
"
+ "before": "Crazed, kooky, cracked, crazy,
\nMental, dotty, whacked, loopy…
",
+ "after": "Crazed, kooky, cracked, crazy,
\nMental, dotty, whacked, loopy…
"
},
{
- "before": "Crazed, kooky, cracked, crazy,
\nNutty, screwy, mentally diseased…
\n She ",
- "after": "
Crazed, kooky, cracked, crazy,
\nNutty, screwy, mentally diseased…
\nShe "
+ "before": "
Crazed, kooky, cracked, crazy,
\nNutty, screwy, mentally diseased…
\n She ",
+ "after": "
Crazed, kooky, cracked, crazy,
\nNutty, screwy, mentally diseased…
\nShe "
},
{
"before": "Ça va?",
@@ -2206,7 +2206,7 @@
],
"https://parahumans.wordpress.com/2012/09/11/prey-14-3/": [
{
- "before": "truck reached
\nthe other Nine",
+ "before": "truck reached
\nthe other Nine",
"after": "truck reached the other Nine"
},
{
@@ -2266,7 +2266,7 @@
],
"https://parahumans.wordpress.com/2012/10/18/interlude-15-donation-bonus/": [
{
- "before": "volunteered, asked
\n to",
+ "before": "volunteered, asked
\n to",
"after": "volunteered, asked to"
},
{
@@ -3365,7 +3365,7 @@
"https://parahumans.wordpress.com/2013/06/25/interlude-24-donation-bonus-1/": [
{
"before": "silence like this. Divide: They",
- "after": "silence like this.
Divide: They"
+ "after": "silence like this.
Divide: They"
},
{
"before": "Bay,” Wanton said. “Not",
@@ -3441,7 +3441,7 @@
},
{
"before": "SimurghNotes",
- "after": "Simurgh
Notes"
+ "after": "Simurgh
Notes"
},
{
"before": "avoided-",
@@ -4238,7 +4238,7 @@
"after": "Imp: I’ve"
},
{
- "before": "Tattletale:
\nwaiting?",
+ "before": "Tattletale:
\nwaiting?",
"after": "Tattletale: waiting?"
},
{
@@ -4308,7 +4308,7 @@
],
"https://www.parahumans.net/2017/09/11/daybreak-1-1/": [
{
- "before": "
Ward is the second work in the Parahumans series, and reading Worm first is strongly recommended. A lot of this won’t make sense otherwise and if you do find yourself a fan of the universe, the spoilers in Ward will affect the reading of the other work.
\nWard is not recommended for young or sensitive readers.
\n
",
+ "before": "Ward is the second work in the Parahumans series, and reading Worm first is strongly recommended. A lot of this won’t make sense otherwise and if you do find yourself a fan of the universe, the spoilers in Ward will affect the reading of the other work.
\nWard is not recommended for young or sensitive readers.
\n
",
"after": "",
"_comment": "This is out of place in an eBook."
}
diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json
index c5eb93a..599380f 100644
--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
@@ -2060,11 +2060,6 @@
"resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz",
"integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw=="
},
- "xmlserializer": {
- "version": "0.6.1",
- "resolved": "https://registry.npmjs.org/xmlserializer/-/xmlserializer-0.6.1.tgz",
- "integrity": "sha512-FNb0eEqqUUbnuvxuHqNuKH8qCGKqxu+558Zi8UzOoQk8Z9LdvpONK+v7m3gpKVHrk5Aq+0nNLsKxu/6OYh7Umw=="
- },
"xtend": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz",
diff --git a/package.json b/package.json
index 90654f3..05b24e9 100644
--- a/package.json
+++ b/package.json
@@ -28,7 +28,6 @@
"jsdom": "^16.2.2",
"requisition": "^1.5.0",
"throat": "^5.0.0",
- "xmlserializer": "^0.6.1",
"yargs": "^15.3.1"
},
"devDependencies": {
From 51520c11beb22819fbbb1d235dc81563865ca3b7 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 19:20:29 -0400
Subject: [PATCH 002/186] Add an original-URL HTML comment to each chapter
This makes it easier for me to figure out how to update substitutions.json.
---
lib/convert.js | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/lib/convert.js b/lib/convert.js
index 457e081..a4251be 100644
--- a/lib/convert.js
+++ b/lib/convert.js
@@ -130,10 +130,14 @@ function getBodyXML(chapter, contentEl) {
// Synthesize a tag to serialize
const bodyEl = contentEl.ownerDocument.createElement("body");
+
const h1El = contentEl.ownerDocument.createElement("h1");
h1El.textContent = chapter.title;
-
bodyEl.appendChild(h1El);
+
+ const comment = contentEl.ownerDocument.createComment(chapter.url);
+ bodyEl.appendChild(comment);
+
while (contentEl.firstChild) {
bodyEl.appendChild(contentEl.firstChild);
}
From ae5b179d7c261a7bfed20ab6006f4ab193470c28 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 19:31:28 -0400
Subject: [PATCH 003/186] Add a nice progress bar for the convert step
---
lib/convert.js | 15 ++++++++++++---
npm-shrinkwrap.json | 14 ++++++++++++++
package.json | 1 +
3 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/lib/convert.js b/lib/convert.js
index a4251be..8a989c7 100644
--- a/lib/convert.js
+++ b/lib/convert.js
@@ -3,14 +3,24 @@ const path = require("path");
const fs = require("fs").promises;
const throat = require("throat");
const { JSDOM } = require("jsdom");
+const cliProgress = require("cli-progress");
const substitutions = require("./substitutions.json");
module.exports = async (cachePath, manifestPath, contentPath, concurrentJobs) => {
const manifestContents = await fs.readFile(manifestPath, { encoding: "utf-8" });
const chapters = JSON.parse(manifestContents);
- console.log("All chapters downloaded; beginning conversion to EPUB chapters");
- const mapper = throat(concurrentJobs, chapter => convertChapter(chapter, cachePath, contentPath));
+ console.log("Converting raw downloaded HTML to EPUB chapters");
+ const progress = new cliProgress.SingleBar({
+ stopOnComplete: true,
+ clearOnComplete: true
+ }, cliProgress.Presets.shades_classic);
+ progress.start(chapters.length, 0);
+
+ const mapper = throat(concurrentJobs, async chapter => {
+ await convertChapter(chapter, cachePath, contentPath);
+ progress.increment();
+ });
await Promise.all(chapters.map(mapper));
console.log("All chapters converted");
@@ -32,7 +42,6 @@ async function convertChapter(chapter, cachePath, contentPath) {
const destFilePath = path.resolve(contentPath, destFileName);
await fs.writeFile(destFilePath, output);
- console.log(`- Finished converting ${filename}`);
}
function getChapterString(chapter, rawChapterDoc) {
diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json
index 599380f..468ce14 100644
--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
@@ -335,6 +335,15 @@
}
}
},
+ "cli-progress": {
+ "version": "3.8.2",
+ "resolved": "https://registry.npmjs.org/cli-progress/-/cli-progress-3.8.2.tgz",
+ "integrity": "sha512-qRwBxLldMSfxB+YGFgNRaj5vyyHe1yMpVeDL79c+7puGujdKJHQHydgqXDcrkvQgJ5U/d3lpf6vffSoVVUftVQ==",
+ "requires": {
+ "colors": "^1.1.2",
+ "string-width": "^4.2.0"
+ }
+ },
"cliui": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/cliui/-/cliui-6.0.0.tgz",
@@ -358,6 +367,11 @@
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
},
+ "colors": {
+ "version": "1.4.0",
+ "resolved": "https://registry.npmjs.org/colors/-/colors-1.4.0.tgz",
+ "integrity": "sha512-a+UqTh4kgZg/SlGvfbzDHpgRu7AAQOmmqRHJnxhRZICKFUT91brVhNNt58CMWU9PsBbv3PDCZUHbVxuDiH2mtA=="
+ },
"combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
diff --git a/package.json b/package.json
index 05b24e9..e29c69b 100644
--- a/package.json
+++ b/package.json
@@ -24,6 +24,7 @@
},
"dependencies": {
"archiver": "^4.0.1",
+ "cli-progress": "^3.8.2",
"cpr": "^3.0.1",
"jsdom": "^16.2.2",
"requisition": "^1.5.0",
From 4d9e55643192efaf7da5ce4b26e73733ce71c168 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 19:34:58 -0400
Subject: [PATCH 004/186] Update dependencies
---
npm-shrinkwrap.json | 482 ++++++++++++++++++++++----------------------
package.json | 8 +-
2 files changed, 249 insertions(+), 241 deletions(-)
diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json
index 468ce14..3c54975 100644
--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
@@ -67,20 +67,58 @@
}
}
},
+ "@eslint/eslintrc": {
+ "version": "0.1.3",
+ "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-0.1.3.tgz",
+ "integrity": "sha512-4YVwPkANLeNtRjMekzux1ci8hIaH5eGKktGqR0d3LWsKNn5B2X/1Z6Trxy7jQXl9EBGE6Yj02O+t09FMeRllaA==",
+ "dev": true,
+ "requires": {
+ "ajv": "^6.12.4",
+ "debug": "^4.1.1",
+ "espree": "^7.3.0",
+ "globals": "^12.1.0",
+ "ignore": "^4.0.6",
+ "import-fresh": "^3.2.1",
+ "js-yaml": "^3.13.1",
+ "lodash": "^4.17.19",
+ "minimatch": "^3.0.4",
+ "strip-json-comments": "^3.1.1"
+ },
+ "dependencies": {
+ "ajv": {
+ "version": "6.12.6",
+ "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
+ "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
+ "dev": true,
+ "requires": {
+ "fast-deep-equal": "^3.1.1",
+ "fast-json-stable-stringify": "^2.0.0",
+ "json-schema-traverse": "^0.4.1",
+ "uri-js": "^4.2.2"
+ }
+ },
+ "lodash": {
+ "version": "4.17.20",
+ "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.20.tgz",
+ "integrity": "sha512-PlhdFcillOINfeV7Ni6oF1TAEayyZBoZ8bcshTHqOYJYlrqzRK5hagpagky5o4HfCzzd1TRkXPMFq6cKk9rGmA==",
+ "dev": true
+ }
+ }
+ },
"@types/color-name": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@types/color-name/-/color-name-1.1.1.tgz",
"integrity": "sha512-rr+OQyAjxze7GgWrSaJwydHStIhHq2lvY3BOC2Mj7KnzI7XK0Uw1TOOdI9lDoajEbSWLiYgoo4f1R51erQfhPQ=="
},
"abab": {
- "version": "2.0.3",
- "resolved": "https://registry.npmjs.org/abab/-/abab-2.0.3.tgz",
- "integrity": "sha512-tsFzPpcttalNjFBCFMqsKYQcWxxen1pgJR56by//QwvJc4/OUS3kPOOttx2tSIfjsylB0pYu7f5D3K1RCxUnUg=="
+ "version": "2.0.5",
+ "resolved": "https://registry.npmjs.org/abab/-/abab-2.0.5.tgz",
+ "integrity": "sha512-9IK9EadsbHo6jLWIpxpR6pL0sazTXV6+SQv25ZB+F7Bj9mJNaOc4nCRabwd5M/JwmUa8idz6Eci6eKfJryPs6Q=="
},
"acorn": {
- "version": "7.3.1",
- "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.3.1.tgz",
- "integrity": "sha512-tLc0wSnatxAQHVHUapaHdz72pi9KUyHjq5KyHjGg9Y8Ifdc79pTh2XvI6I1/chZbnM7QtNKzh66ooDogPZSleA=="
+ "version": "7.4.1",
+ "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.1.tgz",
+ "integrity": "sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A=="
},
"acorn-globals": {
"version": "6.0.0",
@@ -92,9 +130,9 @@
}
},
"acorn-jsx": {
- "version": "5.2.0",
- "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.2.0.tgz",
- "integrity": "sha512-HiUX/+K2YpkpJ+SzBffkM/AQ2YE03S0U1kjTLVpoJdhZMOWy8qvXVN9JdLqv2QsaQ6MPYQIuNmwD8zOiYUofLQ==",
+ "version": "5.3.1",
+ "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.1.tgz",
+ "integrity": "sha512-K0Ptm/47OKfQRpNQ2J/oIN/3QYiK6FwW+eJbILhsdxh2WTLdl+30o8aGdTbm5JbffpFFAg/g+zi1E+jvJha5ng==",
"dev": true
},
"acorn-walk": {
@@ -106,6 +144,7 @@
"version": "6.12.2",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.2.tgz",
"integrity": "sha512-k+V+hzjm5q/Mr8ef/1Y9goCmlsK4I6Sm74teeyGvFk1XrOsbsKLjEdrvny42CZ+a8sXbk8KWpY/bDwS+FLL2UQ==",
+ "dev": true,
"requires": {
"fast-deep-equal": "^3.1.1",
"fast-json-stable-stringify": "^2.0.0",
@@ -114,9 +153,9 @@
}
},
"ansi-colors": {
- "version": "3.2.4",
- "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-3.2.4.tgz",
- "integrity": "sha512-hHUXGagefjN2iRrID63xckIvotOXOojhQKWIPUZ4mNUZ9nLZW+7FMNoE1lOkEhNWYsx/7ysGIuJYCiMAA9FnrA==",
+ "version": "4.1.1",
+ "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz",
+ "integrity": "sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==",
"dev": true
},
"ansi-regex": {
@@ -139,17 +178,17 @@
"integrity": "sha1-q8av7tzqUugJzcA3au0845Y10X8="
},
"archiver": {
- "version": "4.0.1",
- "resolved": "https://registry.npmjs.org/archiver/-/archiver-4.0.1.tgz",
- "integrity": "sha512-/YV1pU4Nhpf/rJArM23W6GTUjT0l++VbjykrCRua1TSXrn+yM8Qs7XvtwSiRse0iCe49EPNf7ktXnPsWuSb91Q==",
+ "version": "5.0.2",
+ "resolved": "https://registry.npmjs.org/archiver/-/archiver-5.0.2.tgz",
+ "integrity": "sha512-Tq3yV/T4wxBsD2Wign8W9VQKhaUxzzRmjEiSoOK0SLqPgDP/N1TKdYyBeIEu56T4I9iO4fKTTR0mN9NWkBA0sg==",
"requires": {
"archiver-utils": "^2.1.0",
- "async": "^2.6.3",
+ "async": "^3.2.0",
"buffer-crc32": "^0.2.1",
- "glob": "^7.1.6",
"readable-stream": "^3.6.0",
- "tar-stream": "^2.1.2",
- "zip-stream": "^3.0.1"
+ "readdir-glob": "^1.0.0",
+ "tar-stream": "^2.1.4",
+ "zip-stream": "^4.0.0"
}
},
"archiver-utils": {
@@ -214,12 +253,9 @@
"dev": true
},
"async": {
- "version": "2.6.3",
- "resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz",
- "integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==",
- "requires": {
- "lodash": "^4.17.14"
- }
+ "version": "3.2.0",
+ "resolved": "https://registry.npmjs.org/async/-/async-3.2.0.tgz",
+ "integrity": "sha512-TR2mEZFVOj2pLStYxLht7TyfuRzaydfpxr3k9RpHIzMgw7A64dzsdqCxH1WJyQdoe8T10nDXd9wnEigmiuHIZw=="
},
"asynckit": {
"version": "0.4.0",
@@ -232,9 +268,9 @@
"integrity": "sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg="
},
"aws4": {
- "version": "1.10.0",
- "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.10.0.tgz",
- "integrity": "sha512-3YDiu347mtVtjpyV3u5kVqQLP242c06zwDOgpeRnybmXlYYsLbtTrUBUm8i8srONt+FWobl5aibnU1030PeeuA=="
+ "version": "1.10.1",
+ "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.10.1.tgz",
+ "integrity": "sha512-zg7Hz2k5lI8kb7U32998pRRFin7zJlkfezGJjUc2heaD4Pw2wObakCDVzkKztTm/Ln7eiVvYsjqak0Ed4LkMDA=="
},
"balanced-match": {
"version": "1.0.0",
@@ -255,9 +291,9 @@
}
},
"bl": {
- "version": "4.0.2",
- "resolved": "https://registry.npmjs.org/bl/-/bl-4.0.2.tgz",
- "integrity": "sha512-j4OH8f6Qg2bGuWfRiltT2HYGx0e1QcBTrK9KAHNMwMZdQnDZFk0ZSYIpADjYCB3U12nicC5tVJwSIhwOWjb4RQ==",
+ "version": "4.0.3",
+ "resolved": "https://registry.npmjs.org/bl/-/bl-4.0.3.tgz",
+ "integrity": "sha512-fs4G6/Hu4/EE+F75J8DuN/0IpQqNjAdC7aEQv7Qt8MHGUH7Ckv2MwTEEeN9QehD0pfIDkMI1bkHYkKy7xHyKIg==",
"requires": {
"buffer": "^5.5.0",
"inherits": "^2.0.4",
@@ -298,11 +334,6 @@
"integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==",
"dev": true
},
- "camelcase": {
- "version": "5.3.1",
- "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz",
- "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg=="
- },
"caseless": {
"version": "0.12.0",
"resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz",
@@ -325,9 +356,9 @@
"dev": true
},
"supports-color": {
- "version": "7.1.0",
- "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.1.0.tgz",
- "integrity": "sha512-oRSIpR8pxT1Wr2FquTNnGet79b3BWljqOuoW/h4oBhxJ/HUbX5nX6JSruTkvXDCFMwDPvsaTTbvMLKZWSy0R5g==",
+ "version": "7.2.0",
+ "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
+ "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
"dev": true,
"requires": {
"has-flag": "^4.0.0"
@@ -345,13 +376,13 @@
}
},
"cliui": {
- "version": "6.0.0",
- "resolved": "https://registry.npmjs.org/cliui/-/cliui-6.0.0.tgz",
- "integrity": "sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ==",
+ "version": "7.0.3",
+ "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.3.tgz",
+ "integrity": "sha512-Gj3QHTkVMPKqwP3f7B4KPkBZRMR9r4rfi5bXFpg1a+Svvj8l7q5CnkBkVQzfxT5DFSsGk2+PascOgL0JYkL2kw==",
"requires": {
"string-width": "^4.2.0",
"strip-ansi": "^6.0.0",
- "wrap-ansi": "^6.2.0"
+ "wrap-ansi": "^7.0.0"
}
},
"color-convert": {
@@ -381,30 +412,14 @@
}
},
"compress-commons": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/compress-commons/-/compress-commons-3.0.0.tgz",
- "integrity": "sha512-FyDqr8TKX5/X0qo+aVfaZ+PVmNJHJeckFBlq8jZGSJOgnynhfifoyl24qaqdUdDIBe0EVTHByN6NAkqYvE/2Xg==",
+ "version": "4.0.1",
+ "resolved": "https://registry.npmjs.org/compress-commons/-/compress-commons-4.0.1.tgz",
+ "integrity": "sha512-xZm9o6iikekkI0GnXCmAl3LQGZj5TBDj0zLowsqi7tJtEa3FMGSEcHcqrSJIrOAk1UG/NBbDn/F1q+MG/p/EsA==",
"requires": {
"buffer-crc32": "^0.2.13",
- "crc32-stream": "^3.0.1",
+ "crc32-stream": "^4.0.0",
"normalize-path": "^3.0.0",
- "readable-stream": "^2.3.7"
- },
- "dependencies": {
- "readable-stream": {
- "version": "2.3.7",
- "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.7.tgz",
- "integrity": "sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw==",
- "requires": {
- "core-util-is": "~1.0.0",
- "inherits": "~2.0.3",
- "isarray": "~1.0.0",
- "process-nextick-args": "~2.0.0",
- "safe-buffer": "~5.1.1",
- "string_decoder": "~1.1.1",
- "util-deprecate": "~1.0.1"
- }
- }
+ "readable-stream": "^3.6.0"
}
},
"concat-map": {
@@ -442,9 +457,9 @@
}
},
"crc32-stream": {
- "version": "3.0.1",
- "resolved": "https://registry.npmjs.org/crc32-stream/-/crc32-stream-3.0.1.tgz",
- "integrity": "sha512-mctvpXlbzsvK+6z8kJwSJ5crm7yBwrQMTybJzMw1O4lLGJqjlDCXY2Zw7KheiA6XBEcBmfLx1D88mjRGVJtY9w==",
+ "version": "4.0.0",
+ "resolved": "https://registry.npmjs.org/crc32-stream/-/crc32-stream-4.0.0.tgz",
+ "integrity": "sha512-tyMw2IeUX6t9jhgXI6um0eKfWq4EIDpfv5m7GX4Jzp7eVelQ360xd8EPXJhp2mHwLQIkqlnMLjzqSZI3a+0wRw==",
"requires": {
"crc": "^3.4.4",
"readable-stream": "^3.4.0"
@@ -500,23 +515,18 @@
}
},
"debug": {
- "version": "4.1.1",
- "resolved": "https://registry.npmjs.org/debug/-/debug-4.1.1.tgz",
- "integrity": "sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw==",
+ "version": "4.2.0",
+ "resolved": "https://registry.npmjs.org/debug/-/debug-4.2.0.tgz",
+ "integrity": "sha512-IX2ncY78vDTjZMFUdmsvIRFY2Cf4FnD0wRs+nQwJU8Lu99/tPFdb0VybiiMTPe3I6rQmwsqQqRBvxU+bZ/I8sg==",
"dev": true,
"requires": {
- "ms": "^2.1.1"
+ "ms": "2.1.2"
}
},
- "decamelize": {
- "version": "1.2.0",
- "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz",
- "integrity": "sha1-9lNNFRSCabIDUue+4m9QH5oZEpA="
- },
"decimal.js": {
- "version": "10.2.0",
- "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.2.0.tgz",
- "integrity": "sha512-vDPw+rDgn3bZe1+F/pyEwb1oMG2XTlRVgAa6B4KccTEpYgF8w6eQllVbQcfIJnZyvzFtFpxnpGtx8dd7DJp/Rw=="
+ "version": "10.2.1",
+ "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.2.1.tgz",
+ "integrity": "sha512-KaL7+6Fw6i5A2XSnsbhm/6B+NuEA7TZ4vqxnd5tXz9sbKtrN9Srj8ab4vKVdK8YAqZO9P1kg45Y6YLoduPf+kw=="
},
"deep-is": {
"version": "0.1.3",
@@ -585,14 +595,19 @@
}
},
"enquirer": {
- "version": "2.3.5",
- "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.3.5.tgz",
- "integrity": "sha512-BNT1C08P9XD0vNg3J475yIUG+mVdp9T6towYFHUv897X0KoHBjB1shyrNmhmtHWKP17iSWgo7Gqh7BBuzLZMSA==",
+ "version": "2.3.6",
+ "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.3.6.tgz",
+ "integrity": "sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg==",
"dev": true,
"requires": {
- "ansi-colors": "^3.2.1"
+ "ansi-colors": "^4.1.1"
}
},
+ "escalade": {
+ "version": "3.1.1",
+ "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz",
+ "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw=="
+ },
"escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
@@ -612,22 +627,23 @@
}
},
"eslint": {
- "version": "7.3.1",
- "resolved": "https://registry.npmjs.org/eslint/-/eslint-7.3.1.tgz",
- "integrity": "sha512-cQC/xj9bhWUcyi/RuMbRtC3I0eW8MH0jhRELSvpKYkWep3C6YZ2OkvcvJVUeO6gcunABmzptbXBuDoXsjHmfTA==",
+ "version": "7.11.0",
+ "resolved": "https://registry.npmjs.org/eslint/-/eslint-7.11.0.tgz",
+ "integrity": "sha512-G9+qtYVCHaDi1ZuWzBsOWo2wSwd70TXnU6UHA3cTYHp7gCTXZcpggWFoUVAMRarg68qtPoNfFbzPh+VdOgmwmw==",
"dev": true,
"requires": {
"@babel/code-frame": "^7.0.0",
+ "@eslint/eslintrc": "^0.1.3",
"ajv": "^6.10.0",
"chalk": "^4.0.0",
"cross-spawn": "^7.0.2",
"debug": "^4.0.1",
"doctrine": "^3.0.0",
"enquirer": "^2.3.5",
- "eslint-scope": "^5.1.0",
- "eslint-utils": "^2.0.0",
- "eslint-visitor-keys": "^1.2.0",
- "espree": "^7.1.0",
+ "eslint-scope": "^5.1.1",
+ "eslint-utils": "^2.1.0",
+ "eslint-visitor-keys": "^2.0.0",
+ "espree": "^7.3.0",
"esquery": "^1.2.0",
"esutils": "^2.0.2",
"file-entry-cache": "^5.0.1",
@@ -641,7 +657,7 @@
"js-yaml": "^3.13.1",
"json-stable-stringify-without-jsonify": "^1.0.1",
"levn": "^0.4.1",
- "lodash": "^4.17.14",
+ "lodash": "^4.17.19",
"minimatch": "^3.0.4",
"natural-compare": "^1.4.0",
"optionator": "^0.9.1",
@@ -665,6 +681,12 @@
"type-check": "~0.4.0"
}
},
+ "lodash": {
+ "version": "4.17.20",
+ "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.20.tgz",
+ "integrity": "sha512-PlhdFcillOINfeV7Ni6oF1TAEayyZBoZ8bcshTHqOYJYlrqzRK5hagpagky5o4HfCzzd1TRkXPMFq6cKk9rGmA==",
+ "dev": true
+ },
"optionator": {
"version": "0.9.1",
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz",
@@ -697,12 +719,12 @@
}
},
"eslint-scope": {
- "version": "5.1.0",
- "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.0.tgz",
- "integrity": "sha512-iiGRvtxWqgtx5m8EyQUJihBloE4EnYeGE/bz1wSPwJE6tZuJUtHlhqDM4Xj2ukE8Dyy1+HCZ4hE0fzIVMzb58w==",
+ "version": "5.1.1",
+ "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz",
+ "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==",
"dev": true,
"requires": {
- "esrecurse": "^4.1.0",
+ "esrecurse": "^4.3.0",
"estraverse": "^4.1.1"
}
},
@@ -713,23 +735,45 @@
"dev": true,
"requires": {
"eslint-visitor-keys": "^1.1.0"
+ },
+ "dependencies": {
+ "eslint-visitor-keys": {
+ "version": "1.3.0",
+ "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz",
+ "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==",
+ "dev": true
+ }
}
},
"eslint-visitor-keys": {
- "version": "1.3.0",
- "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz",
- "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==",
+ "version": "2.0.0",
+ "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.0.0.tgz",
+ "integrity": "sha512-QudtT6av5WXels9WjIM7qz1XD1cWGvX4gGXvp/zBn9nXG02D0utdU3Em2m/QjTnrsk6bBjmCygl3rmj118msQQ==",
"dev": true
},
"espree": {
- "version": "7.1.0",
- "resolved": "https://registry.npmjs.org/espree/-/espree-7.1.0.tgz",
- "integrity": "sha512-dcorZSyfmm4WTuTnE5Y7MEN1DyoPYy1ZR783QW1FJoenn7RailyWFsq/UL6ZAAA7uXurN9FIpYyUs3OfiIW+Qw==",
+ "version": "7.3.0",
+ "resolved": "https://registry.npmjs.org/espree/-/espree-7.3.0.tgz",
+ "integrity": "sha512-dksIWsvKCixn1yrEXO8UosNSxaDoSYpq9reEjZSbHLpT5hpaCAKTLBwq0RHtLrIr+c0ByiYzWT8KTMRzoRCNlw==",
"dev": true,
"requires": {
- "acorn": "^7.2.0",
+ "acorn": "^7.4.0",
"acorn-jsx": "^5.2.0",
- "eslint-visitor-keys": "^1.2.0"
+ "eslint-visitor-keys": "^1.3.0"
+ },
+ "dependencies": {
+ "acorn": {
+ "version": "7.4.1",
+ "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.1.tgz",
+ "integrity": "sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==",
+ "dev": true
+ },
+ "eslint-visitor-keys": {
+ "version": "1.3.0",
+ "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz",
+ "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==",
+ "dev": true
+ }
}
},
"esprima": {
@@ -747,20 +791,28 @@
},
"dependencies": {
"estraverse": {
- "version": "5.1.0",
- "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.1.0.tgz",
- "integrity": "sha512-FyohXK+R0vE+y1nHLoBM7ZTyqRpqAlhdZHCWIWEviFLiGB8b04H6bQs8G+XTthacvT8VuwvteiP7RJSxMs8UEw==",
+ "version": "5.2.0",
+ "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz",
+ "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==",
"dev": true
}
}
},
"esrecurse": {
- "version": "4.2.1",
- "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.2.1.tgz",
- "integrity": "sha512-64RBB++fIOAXPw3P9cy89qfMlvZEXZkqqJkjqqXIvzP5ezRZjW+lPWjw35UX/3EhUPFYbg5ER4JYgDw4007/DQ==",
+ "version": "4.3.0",
+ "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz",
+ "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==",
"dev": true,
"requires": {
- "estraverse": "^4.1.0"
+ "estraverse": "^5.2.0"
+ },
+ "dependencies": {
+ "estraverse": {
+ "version": "5.2.0",
+ "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz",
+ "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==",
+ "dev": true
+ }
}
},
"estraverse": {
@@ -807,15 +859,6 @@
"flat-cache": "^2.0.1"
}
},
- "find-up": {
- "version": "4.1.0",
- "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz",
- "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==",
- "requires": {
- "locate-path": "^5.0.0",
- "path-exists": "^4.0.0"
- }
- },
"flat-cache": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-2.0.1.tgz",
@@ -941,12 +984,25 @@
"integrity": "sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI="
},
"har-validator": {
- "version": "5.1.3",
- "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.3.tgz",
- "integrity": "sha512-sNvOCzEQNr/qrvJgc3UG/kD4QtlHycrzwS+6mfTrrSq97BvaYcPZZI1ZSqGSPR73Cxn4LKTD4PttRwfU7jWq5g==",
+ "version": "5.1.5",
+ "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.5.tgz",
+ "integrity": "sha512-nmT2T0lljbxdQZfspsno9hgrG3Uir6Ks5afism62poxqBM6sDnMEuPmzTq8XN0OEwqKLLdh1jQI3qyE66Nzb3w==",
"requires": {
- "ajv": "^6.5.5",
+ "ajv": "^6.12.3",
"har-schema": "^2.0.0"
+ },
+ "dependencies": {
+ "ajv": {
+ "version": "6.12.6",
+ "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
+ "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
+ "requires": {
+ "fast-deep-equal": "^3.1.1",
+ "fast-json-stable-stringify": "^2.0.0",
+ "json-schema-traverse": "^0.4.1",
+ "uri-js": "^4.2.2"
+ }
+ }
}
},
"has-flag": {
@@ -1107,9 +1163,9 @@
"integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM="
},
"jsdom": {
- "version": "16.2.2",
- "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-16.2.2.tgz",
- "integrity": "sha512-pDFQbcYtKBHxRaP55zGXCJWgFHkDAYbKcsXEK/3Icu9nKYZkutUXfLBwbD+09XDutkYSHcgfQLZ0qvpAAm9mvg==",
+ "version": "16.4.0",
+ "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-16.4.0.tgz",
+ "integrity": "sha512-lYMm3wYdgPhrl7pDcRmvzPhhrGVBeVhPIqeHjzeiHN3DFmD1RBpbExbi8vU7BJdH8VAZYovR8DMt0PNNDM7k8w==",
"requires": {
"abab": "^2.0.3",
"acorn": "^7.1.1",
@@ -1131,7 +1187,7 @@
"tough-cookie": "^3.0.1",
"w3c-hr-time": "^1.0.2",
"w3c-xmlserializer": "^2.0.0",
- "webidl-conversions": "^6.0.0",
+ "webidl-conversions": "^6.1.0",
"whatwg-encoding": "^1.0.5",
"whatwg-mimetype": "^2.3.0",
"whatwg-url": "^8.0.0",
@@ -1204,18 +1260,11 @@
"type-check": "~0.3.2"
}
},
- "locate-path": {
- "version": "5.0.0",
- "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz",
- "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==",
- "requires": {
- "p-locate": "^4.1.0"
- }
- },
"lodash": {
- "version": "4.17.15",
- "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz",
- "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A=="
+ "version": "4.17.20",
+ "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.20.tgz",
+ "integrity": "sha512-PlhdFcillOINfeV7Ni6oF1TAEayyZBoZ8bcshTHqOYJYlrqzRK5hagpagky5o4HfCzzd1TRkXPMFq6cKk9rGmA==",
+ "dev": true
},
"lodash.defaults": {
"version": "4.2.0",
@@ -1368,27 +1417,6 @@
"word-wrap": "~1.2.3"
}
},
- "p-limit": {
- "version": "2.3.0",
- "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz",
- "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==",
- "requires": {
- "p-try": "^2.0.0"
- }
- },
- "p-locate": {
- "version": "4.1.0",
- "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz",
- "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==",
- "requires": {
- "p-limit": "^2.2.0"
- }
- },
- "p-try": {
- "version": "2.2.0",
- "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz",
- "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ=="
- },
"parent-module": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz",
@@ -1411,11 +1439,6 @@
"resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.1.tgz",
"integrity": "sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug=="
},
- "path-exists": {
- "version": "4.0.0",
- "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
- "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w=="
- },
"path-is-absolute": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
@@ -1473,6 +1496,14 @@
"util-deprecate": "^1.0.1"
}
},
+ "readdir-glob": {
+ "version": "1.1.1",
+ "resolved": "https://registry.npmjs.org/readdir-glob/-/readdir-glob-1.1.1.tgz",
+ "integrity": "sha512-91/k1EzZwDx6HbERR+zucygRFfiPl2zkIYZtv3Jjr6Mn7SkKcVct8aVO+sSRiGMc6fLf72du3d92/uY63YPdEA==",
+ "requires": {
+ "minimatch": "^3.0.4"
+ }
+ },
"regexpp": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.1.0.tgz",
@@ -1518,19 +1549,26 @@
}
},
"request-promise-core": {
- "version": "1.1.3",
- "resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.3.tgz",
- "integrity": "sha512-QIs2+ArIGQVp5ZYbWD5ZLCY29D5CfWizP8eWnm8FoGD1TX61veauETVQbrV60662V0oFBkrDOuaBI8XgtuyYAQ==",
+ "version": "1.1.4",
+ "resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.4.tgz",
+ "integrity": "sha512-TTbAfBBRdWD7aNNOoVOBH4pN/KigV6LyapYNNlAPA8JwbovRti1E88m3sYAwsLi5ryhPKsE9APwnjFTgdUjTpw==",
"requires": {
- "lodash": "^4.17.15"
+ "lodash": "^4.17.19"
+ },
+ "dependencies": {
+ "lodash": {
+ "version": "4.17.20",
+ "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.20.tgz",
+ "integrity": "sha512-PlhdFcillOINfeV7Ni6oF1TAEayyZBoZ8bcshTHqOYJYlrqzRK5hagpagky5o4HfCzzd1TRkXPMFq6cKk9rGmA=="
+ }
}
},
"request-promise-native": {
- "version": "1.0.8",
- "resolved": "https://registry.npmjs.org/request-promise-native/-/request-promise-native-1.0.8.tgz",
- "integrity": "sha512-dapwLGqkHtwL5AEbfenuzjTYg35Jd6KPytsC2/TLkVMz8rm+tNt72MGUWT1RP/aYawMpN6HqbNGBQaRcBtjQMQ==",
+ "version": "1.0.9",
+ "resolved": "https://registry.npmjs.org/request-promise-native/-/request-promise-native-1.0.9.tgz",
+ "integrity": "sha512-wcW+sIUiWnKgNY0dqCpOZkUbF/I+YPi+f09JZIDa39Ec+q82CpSYniDp+ISgTTbKmnpJWASeJBPZmoxH84wt3g==",
"requires": {
- "request-promise-core": "1.1.3",
+ "request-promise-core": "1.1.4",
"stealthy-require": "^1.1.1",
"tough-cookie": "^2.3.3"
},
@@ -1551,11 +1589,6 @@
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
"integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I="
},
- "require-main-filename": {
- "version": "2.0.0",
- "resolved": "https://registry.npmjs.org/require-main-filename/-/require-main-filename-2.0.0.tgz",
- "integrity": "sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg=="
- },
"requisition": {
"version": "1.7.0",
"resolved": "https://registry.npmjs.org/requisition/-/requisition-1.7.0.tgz",
@@ -1616,11 +1649,6 @@
"integrity": "sha512-OrOb32TeeambH6UrhtShmF7CRDqhL6/5XpPNp2DuRH6+9QLw/orhp72j87v8Qa1ScDkvrrBNpZcDejAirJmfXQ==",
"dev": true
},
- "set-blocking": {
- "version": "2.0.0",
- "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz",
- "integrity": "sha1-BF+XgtARrppoA93TgrJDkrPYkPc="
- },
"setprototypeof": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
@@ -1757,9 +1785,9 @@
}
},
"strip-json-comments": {
- "version": "3.1.0",
- "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.0.tgz",
- "integrity": "sha512-e6/d0eBu7gHtdCqFt0xJr642LdToM5/cN4Qb9DbHjVx1CP5RyeM+zH7pbecEmDv/lBqb0QH+6Uqq75rxFPkM0w==",
+ "version": "3.1.1",
+ "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
+ "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==",
"dev": true
},
"supports-color": {
@@ -1829,11 +1857,11 @@
}
},
"tar-stream": {
- "version": "2.1.2",
- "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.1.2.tgz",
- "integrity": "sha512-UaF6FoJ32WqALZGOIAApXx+OdxhekNMChu6axLJR85zMMjXKWFGjbIRe+J6P4UnRGg9rAwWvbTT0oI7hD/Un7Q==",
+ "version": "2.1.4",
+ "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.1.4.tgz",
+ "integrity": "sha512-o3pS2zlG4gxr67GmFYBLlq+dM8gyRGUOvsrHclSkvtVtQbjV0s/+ZE8OpICbaj8clrX3tjeHngYGP7rweaBnuw==",
"requires": {
- "bl": "^4.0.1",
+ "bl": "^4.0.3",
"end-of-stream": "^1.4.1",
"fs-constants": "^1.0.0",
"inherits": "^2.0.3",
@@ -2000,20 +2028,13 @@
"integrity": "sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g=="
},
"whatwg-url": {
- "version": "8.1.0",
- "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-8.1.0.tgz",
- "integrity": "sha512-vEIkwNi9Hqt4TV9RdnaBPNt+E2Sgmo3gePebCRgZ1R7g6d23+53zCTnuB0amKI4AXq6VM8jj2DUAa0S1vjJxkw==",
+ "version": "8.4.0",
+ "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-8.4.0.tgz",
+ "integrity": "sha512-vwTUFf6V4zhcPkWp/4CQPr1TW9Ml6SF4lVyaIMBdJw5i6qUUJ1QWM4Z6YYVkfka0OUIzVo/0aNtGVGk256IKWw==",
"requires": {
"lodash.sortby": "^4.7.0",
"tr46": "^2.0.2",
- "webidl-conversions": "^5.0.0"
- },
- "dependencies": {
- "webidl-conversions": {
- "version": "5.0.0",
- "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-5.0.0.tgz",
- "integrity": "sha512-VlZwKPCkYKxQgeSbH5EyngOmRp7Ww7I9rQLERETtf5ofd9pGeswWiOtogpEO850jziPRarreGxn5QIiTqpb2wA=="
- }
+ "webidl-conversions": "^6.1.0"
}
},
"which": {
@@ -2025,20 +2046,15 @@
"isexe": "^2.0.0"
}
},
- "which-module": {
- "version": "2.0.0",
- "resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.0.tgz",
- "integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho="
- },
"word-wrap": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz",
"integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ=="
},
"wrap-ansi": {
- "version": "6.2.0",
- "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-6.2.0.tgz",
- "integrity": "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==",
+ "version": "7.0.0",
+ "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
+ "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
"requires": {
"ansi-styles": "^4.0.0",
"string-width": "^4.1.0",
@@ -2060,9 +2076,9 @@
}
},
"ws": {
- "version": "7.3.0",
- "resolved": "https://registry.npmjs.org/ws/-/ws-7.3.0.tgz",
- "integrity": "sha512-iFtXzngZVXPGgpTlP1rBqsUK82p9tKqsWRPg5L56egiljujJT3vGAYnHANvFxBieXrTFavhzhxW52jnaWV+w2w=="
+ "version": "7.3.1",
+ "resolved": "https://registry.npmjs.org/ws/-/ws-7.3.1.tgz",
+ "integrity": "sha512-D3RuNkynyHmEJIpD2qrgVkc9DQ23OrN/moAwZX4L8DfvszsJxpjQuUq3LMx6HoYji9fbIOBY18XWBsAux1ZZUA=="
},
"xml-name-validator": {
"version": "3.0.0",
@@ -2080,44 +2096,36 @@
"integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ=="
},
"y18n": {
- "version": "4.0.0",
- "resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.0.tgz",
- "integrity": "sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w=="
+ "version": "5.0.4",
+ "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.4.tgz",
+ "integrity": "sha512-deLOfD+RvFgrpAmSZgfGdWYE+OKyHcVHaRQ7NphG/63scpRvTHHeQMAxGGvaLVGJ+HYVcCXlzcTK0ZehFf+eHQ=="
},
"yargs": {
- "version": "15.3.1",
- "resolved": "https://registry.npmjs.org/yargs/-/yargs-15.3.1.tgz",
- "integrity": "sha512-92O1HWEjw27sBfgmXiixJWT5hRBp2eobqXicLtPBIDBhYB+1HpwZlXmbW2luivBJHBzki+7VyCLRtAkScbTBQA==",
+ "version": "16.1.0",
+ "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.1.0.tgz",
+ "integrity": "sha512-upWFJOmDdHN0syLuESuvXDmrRcWd1QafJolHskzaw79uZa7/x53gxQKiR07W59GWY1tFhhU/Th9DrtSfpS782g==",
"requires": {
- "cliui": "^6.0.0",
- "decamelize": "^1.2.0",
- "find-up": "^4.1.0",
- "get-caller-file": "^2.0.1",
+ "cliui": "^7.0.2",
+ "escalade": "^3.1.1",
+ "get-caller-file": "^2.0.5",
"require-directory": "^2.1.1",
- "require-main-filename": "^2.0.0",
- "set-blocking": "^2.0.0",
"string-width": "^4.2.0",
- "which-module": "^2.0.0",
- "y18n": "^4.0.0",
- "yargs-parser": "^18.1.1"
+ "y18n": "^5.0.2",
+ "yargs-parser": "^20.2.2"
}
},
"yargs-parser": {
- "version": "18.1.3",
- "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-18.1.3.tgz",
- "integrity": "sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==",
- "requires": {
- "camelcase": "^5.0.0",
- "decamelize": "^1.2.0"
- }
+ "version": "20.2.3",
+ "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.3.tgz",
+ "integrity": "sha512-emOFRT9WVHw03QSvN5qor9QQT9+sw5vwxfYweivSMHTcAXPefwVae2FjO7JJjj8hCE4CzPOPeFM83VwT29HCww=="
},
"zip-stream": {
- "version": "3.0.1",
- "resolved": "https://registry.npmjs.org/zip-stream/-/zip-stream-3.0.1.tgz",
- "integrity": "sha512-r+JdDipt93ttDjsOVPU5zaq5bAyY+3H19bDrThkvuVxC0xMQzU1PJcS6D+KrP3u96gH9XLomcHPb+2skoDjulQ==",
+ "version": "4.0.2",
+ "resolved": "https://registry.npmjs.org/zip-stream/-/zip-stream-4.0.2.tgz",
+ "integrity": "sha512-TGxB2g+1ur6MHkvM644DuZr8Uzyz0k0OYWtS3YlpfWBEmK4woaC2t3+pozEL3dBfIPmpgmClR5B2QRcMgGt22g==",
"requires": {
"archiver-utils": "^2.1.0",
- "compress-commons": "^3.0.0",
+ "compress-commons": "^4.0.0",
"readable-stream": "^3.6.0"
}
}
diff --git a/package.json b/package.json
index e29c69b..2fba4a3 100644
--- a/package.json
+++ b/package.json
@@ -23,16 +23,16 @@
"lint": "eslint lib"
},
"dependencies": {
- "archiver": "^4.0.1",
+ "archiver": "^5.0.2",
"cli-progress": "^3.8.2",
"cpr": "^3.0.1",
- "jsdom": "^16.2.2",
+ "jsdom": "^16.4.0",
"requisition": "^1.5.0",
"throat": "^5.0.0",
- "yargs": "^15.3.1"
+ "yargs": "^16.1.0"
},
"devDependencies": {
- "eslint": "7.3.1"
+ "eslint": "^7.11.0"
},
"engines": {
"node": ">=12.10.0"
From d4c67d73dfffd04058e2d67e1d413be26b113b9d Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 20:49:50 -0400
Subject: [PATCH 005/186] Fix uncapitalized sentences
---
lib/substitutions.json | 354 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 339 insertions(+), 15 deletions(-)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 4264abd..1501418 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -444,6 +444,10 @@
{
"before": "what I could see of the the second floor",
"after": "what I could see of the second floor"
+ },
+ {
+ "before": "down the stairs. then as carefully as we could",
+ "after": "down the stairs. Then, as carefully as we could"
}
],
"https://parahumans.wordpress.com/2011/12/03/hive-5-9/": [
@@ -962,6 +966,10 @@
{
"before": "reeled—He",
"after": "reeled—he"
+ },
+ {
+ "before": "Shielder’s bubble. water in front of",
+ "after": "Shielder’s bubble. Water in front of"
}
],
"https://parahumans.wordpress.com/2012/03/13/extermination-8-4/": [
@@ -2656,6 +2664,10 @@
{
"before": "Krouse,” Luke said. “We",
"after": "Krouse,” Luke said, “we"
+ },
+ {
+ "before": "on his knees. he held the cigarette",
+ "after": "on his knees. He held the cigarette"
}
],
"https://parahumans.wordpress.com/2013/01/19/queen-18-1/": [
@@ -3025,6 +3037,10 @@
"before": "the Clairvoyant",
"after": "the clairvoyant",
"_comment": "see convert.js; this corrects an over-correction"
+ },
+ {
+ "before": "maintain eye contact. he could feel the warmth",
+ "after": "maintain eye contact. He could feel the warmth"
}
],
"https://parahumans.wordpress.com/2013/04/09/imago-21-1/": [
@@ -3782,6 +3798,10 @@
{
"before": "willingly,” Weld said. “Before",
"after": "willingly,” Weld said, “before"
+ },
+ {
+ "before": "onto the phone. seven point font",
+ "after": "onto the phone. Seven point font"
}
],
"https://parahumans.wordpress.com/2013/09/19/venom-29-1/": [
@@ -3896,12 +3916,20 @@
{
"before": "by Golem’s bindings
",
"after": "by Golem’s bindings.
"
+ },
+ {
+ "before": "have time to react. the light detonated",
+ "after": "have time to react. The light detonated"
}
],
"https://parahumans.wordpress.com/2013/10/12/interlude-29/": [
{
"before": "the host’s natures",
"after": "the hosts’ natures"
+ },
+ {
+ "before": "panting for breath. the wound at his",
+ "after": "panting for breath. The wound at his"
}
],
"https://parahumans.wordpress.com/2013/10/15/speck-30-1/": [
@@ -4148,6 +4176,14 @@
{
"before": "“Yes,’ I",
"after": "“Yes,” I"
+ },
+ {
+ "before": "against me. it would be idiotic",
+ "after": "against me. It would be idiotic"
+ },
+ {
+ "before": "hand as he talked. he pointed to me",
+ "after": "hand as he talked. He pointed to me"
}
],
"https://parahumans.wordpress.com/2013/11/02/teneral-e-1/": [
@@ -4194,6 +4230,10 @@
{
"before": "standby”",
"after": "standby.”"
+ },
+ {
+ "before": "minutes passed. the chance rose",
+ "after": "minutes passed. The chance rose"
}
],
"https://parahumans.wordpress.com/2013/11/12/teneral-e-4/": [
@@ -4212,6 +4252,14 @@
{
"before": "—Problematic",
"after": "—problematic"
+ },
+ {
+ "before": "balconies and rooftops. hazards",
+ "after": "balconies and rooftops. Hazards"
+ },
+ {
+ "before": "another tree stood. with a glimpse",
+ "after": "another tree stood, with a glimpse"
}
],
"https://parahumans.wordpress.com/2013/11/16/teneral-e-5/": [
@@ -4329,12 +4377,6 @@
"after": "and Mom being business"
}
],
- "https://www.parahumans.net/2017/12/09/flare-2-1/": [
- {
- "before": "Some is mom and dad’s",
- "after": "Some is Mom and Dad’s"
- }
- ],
"https://www.parahumans.net/2017/11/30/daybreak-1-7/": [
{
"before": "How do you even think rec—how do you think",
@@ -4351,6 +4393,12 @@
"after": "Glitzglam *New Message*: I"
}
],
+ "https://www.parahumans.net/2017/12/09/flare-2-1/": [
+ {
+ "before": "Some is mom and dad’s",
+ "after": "Some is Mom and Dad’s"
+ }
+ ],
"https://www.parahumans.net/2017/12/16/flare-2-3/": [
{
"before": "two—piece",
@@ -4359,6 +4407,10 @@
{
"before": "want to think about mom",
"after": "want to think about Mom"
+ },
+ {
+ "before": "the floor. the armband landed",
+ "after": "the floor. The armband landed"
}
],
"https://www.parahumans.net/2017/12/23/flare-2-5/": [
@@ -4413,6 +4465,10 @@
{
"before": "Tristan, “You should take",
"after": "Tristan, “you should take"
+ },
+ {
+ "before": "to start with. it’s delicate enough",
+ "after": "to start with. It’s delicate enough"
}
],
"https://www.parahumans.net/2018/01/13/glare-3-3/": [
@@ -4511,6 +4567,10 @@
{
"before": "mom’s verdict was",
"after": "Mom’s verdict was"
+ },
+ {
+ "before": "as you walk. or blurry light",
+ "after": "as you walk. Or blurry light"
}
],
"https://www.parahumans.net/2018/02/20/shade-4-7/": [
@@ -4543,6 +4603,10 @@
{
"before": "“Cozy,” The side of the",
"after": "“Cozy.” The side of the"
+ },
+ {
+ "before": "hair he had. it was long",
+ "after": "hair he had. It was long"
}
],
"https://www.parahumans.net/2018/03/06/shadow-5-4/": [
@@ -4597,16 +4661,48 @@
"after": "The convoy continued to disintegrate as the road made its gradual turn"
}
],
+ "https://www.parahumans.net/2018/03/27/shadow-5-10/": [
+ {
+ "before": "our heads. the pupil",
+ "after": "our heads. The pupil"
+ },
+ {
+ "before": "forearm and her fingertips. and she stumbled into",
+ "after": "forearm and her fingertips, and she stumbled into"
+ }
+ ],
"https://www.parahumans.net/2018/03/31/shadow-5-11/": [
{
"before": "side of her her chin",
"after": "side of her chin"
}
],
+ "https://www.parahumans.net/2018/04/03/shadow-5-12/": [
+ {
+ "before": "lost my grip on him. and my hand",
+ "after": "lost my grip on him, and my hand"
+ }
+ ],
+ "https://www.parahumans.net/2018/04/21/pitch-6-3/": [
+ {
+ "before": "wall of bodies. the woman behind",
+ "after": "wall of bodies. The woman behind"
+ }
+ ],
+ "https://www.parahumans.net/2018/04/24/pitch-6-4/": [
+ {
+ "before": "glanced back. the others had",
+ "after": "glanced back. The others had"
+ }
+ ],
"https://www.parahumans.net/2018/04/28/pitch-6-5/": [
{
"before": "of material to to bandage",
"after": "of material to bandage"
+ },
+ {
+ "before": "Cradle said. his tone was",
+ "after": "Cradle said. His tone was"
}
],
"https://www.parahumans.net/2018/05/08/pitch-6-8/": [
@@ -4652,6 +4748,18 @@
"after": "answer the second question"
}
],
+ "https://www.parahumans.net/2018/05/30/eclipse-x-5/": [
+ {
+ "before": "no words. and there",
+ "after": "no words. And there"
+ }
+ ],
+ "https://www.parahumans.net/2018/06/09/torch-7-6/": [
+ {
+ "before": "I saw. the scans of the woman",
+ "after": "I saw. The scans of the woman"
+ }
+ ],
"https://www.parahumans.net/2018/06/12/torch-7-7/": [
{
"before": "and I can call dad",
@@ -4702,6 +4810,10 @@
{
"before": "cooked for me,” mom said",
"after": "cooked for me,” Mom said"
+ },
+ {
+ "before": "of her bag. only one temporary tattoo",
+ "after": "of her bag. Only one temporary tattoo"
}
],
"https://www.parahumans.net/2018/07/07/beacon-8-2/": [
@@ -4710,16 +4822,76 @@
"after": "Mom and Dad.
"
}
],
- "https://www.parahumans.net/2018/07/31/beacon-8-9/": [
+ "https://www.parahumans.net/2018/07/17/beacon-8-5/": [
{
- "before": "you know that dad and I saying",
- "after": "you know that dad and I saying"
+ "before": "on the offensive. chains whirled",
+ "after": "on the offensive. Chains whirled"
}
],
"https://www.parahumans.net/2018/07/21/beacon-8-6/": [
{
"before": "unfolded umbrella.A metal",
"after": "unfolded umbrella. A metal"
+ },
+ {
+ "before": "above me. the sky was dark",
+ "after": "above me. The sky was dark"
+ }
+ ],
+ "https://www.parahumans.net/2018/07/31/beacon-8-9/": [
+ {
+ "before": "you know that dad and I saying",
+ "after": "you know that Dad and I saying"
+ },
+ {
+ "before": "was impatient. it was hard to tell",
+ "after": "was impatient. It was hard to tell"
+ }
+ ],
+ "https://www.parahumans.net/2018/08/11/beacon-8-12/": [
+ {
+ "before": "who are free now. they’re out there",
+ "after": "who are free now. They’re out there"
+ }
+ ],
+ "https://www.parahumans.net/2018/08/28/gleaming-9-3/": [
+ {
+ "before": "Chris in the process. stretched on his way",
+ "after": "Chris in the process. He stretched on his way"
+ }
+ ],
+ "https://www.parahumans.net/2018/09/11/gleaming-9-7/": [
+ {
+ "before": "Byron said. he was at",
+ "after": "Byron said. He was at"
+ }
+ ],
+ "https://www.parahumans.net/2018/09/13/gleaming-interlude-9-x/": [
+ {
+ "before": "he bottles it up. then the bottle cracks",
+ "after": "he bottles it up. Then the bottle cracks"
+ }
+ ],
+ "https://www.parahumans.net/2018/09/25/gleaming-9-10/": [
+ {
+ "before": "and mom and dad’s rules",
+ "after": "and Mom and Dad’s rules"
+ },
+ {
+ "before": "at the balcony. both wore",
+ "after": "at the balcony. Both wore"
+ }
+ ],
+ "https://www.parahumans.net/2018/09/29/gleaming-9-11/": [
+ {
+ "before": "two people died. the forcefield went",
+ "after": "two people died. The forcefield went"
+ }
+ ],
+ "https://www.parahumans.net/2018/10/09/gleaming-9-14/": [
+ {
+ "before": "in a fireman carry. with my free hand",
+ "after": "in a fireman carry. With my free hand"
}
],
"https://www.parahumans.net/2018/10/16/gleaming-interlude-9-z/": [
@@ -4734,18 +4906,30 @@
"after": "shape of the moment"
}
],
- "https://www.parahumans.net/2018/09/25/gleaming-9-10/": [
- {
- "before": "and mom and dad’s rules",
- "after": "and Mom and Dad’s rules"
- }
- ],
"https://www.parahumans.net/2018/10/23/polarize-10-1/": [
{
"before": "You let mom talk and",
"after": "You let Mom talk and"
}
],
+ "https://www.parahumans.net/2018/11/03/polarize-10-4/": [
+ {
+ "before": "services of mercenaries. depending on timing",
+ "after": "services of mercenaries. Depending on timing"
+ }
+ ],
+ "https://www.parahumans.net/2018/11/17/polarize-10-8/": [
+ {
+ "before": "Sveta said. her hand was removed",
+ "after": "Sveta said. Her hand was removed"
+ }
+ ],
+ "https://www.parahumans.net/2018/11/20/polarize-10-9/": [
+ {
+ "before": "confines of the suit. and rearranged herself",
+ "after": "confines of the suit, and rearranged herself"
+ }
+ ],
"https://www.parahumans.net/2018/12/01/polarize-10-11/": [
{
"before": "See dad, sleep.",
@@ -4772,6 +4956,22 @@
{
"before": "someone had been been called at three",
"after": "someone had been called at three"
+ },
+ {
+ "before": "I croaked. as she picked up",
+ "after": "I croaked, as she picked up"
+ }
+ ],
+ "https://www.parahumans.net/2019/02/02/blinding-11-11/": [
+ {
+ "before": "to Capricorn. then she looked",
+ "after": "to Capricorn. Then she looked"
+ }
+ ],
+ "https://www.parahumans.net/2019/02/05/blinding-11-12/": [
+ {
+ "before": "and drawers. her legs were stacked against",
+ "after": "and drawers. Her legs were stacked against"
}
],
"https://www.parahumans.net/2019/02/09/interlude-12-z/": [
@@ -4822,6 +5022,18 @@
"after": "wrists behind them around the stomach"
}
],
+ "https://www.parahumans.net/2019/03/16/heavens-12-7/": [
+ {
+ "before": "without knowing. by Love Lost",
+ "after": "without knowing. By Love Lost"
+ }
+ ],
+ "https://www.parahumans.net/2019/03/19/heavens-12-8/": [
+ {
+ "before": "created glowing orbs. he threw one to my",
+ "after": "created glowing orbs. He threw one to my"
+ }
+ ],
"https://www.parahumans.net/2019/03/26/heavens-12-none/": [
{
"before": "if I inherit mom’s whole",
@@ -4840,6 +5052,14 @@
{
"before": "Night mom",
"after": "Night Mom"
+ },
+ {
+ "before": "was distorted. if it wasn’t for the fact",
+ "after": "was distorted. If it wasn’t for the fact"
+ },
+ {
+ "before": "March said. she flourished her",
+ "after": "March said. She flourished her"
}
],
"https://www.parahumans.net/2019/04/09/black-13-3/": [
@@ -4848,12 +5068,24 @@
"after": "you have surmised"
}
],
+ "https://www.parahumans.net/2019/04/02/black-13-1/": [
+ {
+ "before": "walls stone and wood. the building",
+ "after": "walls stone and wood. The building"
+ }
+ ],
"https://www.parahumans.net/2019/04/23/black-13-7/": [
{
"before": "looked like they were were painted on",
"after": "looked like they were painted on"
}
],
+ "https://www.parahumans.net/2019/04/27/black-13-8/": [
+ {
+ "before": "cash are involved. there’s a reason",
+ "after": "cash are involved. There’s a reason"
+ }
+ ],
"https://www.parahumans.net/2019/05/11/black-13-11/": [
{
"before": "been scummy and and tried",
@@ -4892,10 +5124,26 @@
"after": "You stay away from Mom, you stay away from Dad"
}
],
+ "https://www.parahumans.net/2019/06/29/breaking-14-z/": [
+ {
+ "before": "smirked, leering. her face was stretching",
+ "after": "smirked, leering. Her face was stretching"
+ }
+ ],
"https://www.parahumans.net/2019/07/02/dying-15-a/": [
{
"before": "this sad old man made her think of dad",
"after": "this sad old man made her think of Dad"
+ },
+ {
+ "before": "scary. that’s all",
+ "after": "scary. That’s all"
+ }
+ ],
+ "https://www.parahumans.net/2019/07/06/dying-15-1/": [
+ {
+ "before": "wall met ceiling. the wall was smooth",
+ "after": "wall met ceiling. The wall was smooth"
}
],
"https://www.parahumans.net/2019/07/13/dying-15-3/": [
@@ -4916,6 +5164,22 @@
"after": "worth of water"
}
],
+ "https://www.parahumans.net/2019/07/30/dying-15-8/": [
+ {
+ "before": "I thought. the dosage was supposed",
+ "after": "I thought. The dosage was supposed"
+ },
+ {
+ "before": "I got a glimpse. it’s fucking with me",
+ "after": "I got a glimpse. It’s fucking with me"
+ }
+ ],
+ "https://www.parahumans.net/2019/08/13/from-within-16-1/": [
+ {
+ "before": "wasn’t A.I. it was just a picture",
+ "after": "wasn’t A.I. It was just a picture"
+ }
+ ],
"https://www.parahumans.net/2019/08/17/from-within-16-2/": [
{
"before": "a small part of that was being being grumpy",
@@ -4928,6 +5192,18 @@
"after": "small face on a fifteen inch"
}
],
+ "https://www.parahumans.net/2019/08/24/from-within-16-4/": [
+ {
+ "before": "the rest black and slim. with doodle-like drawings",
+ "after": "the rest black and slim, with doodle-like drawings"
+ }
+ ],
+ "https://www.parahumans.net/2019/08/27/from-within-16-5/": [
+ {
+ "before": "thing number one. it’s not a lot",
+ "after": "thing number one. It’s not a lot"
+ }
+ ],
"https://www.parahumans.net/2019/08/31/from-within-16-6/": [
{
"before": "letters I wrote to dad’s friends",
@@ -4940,6 +5216,12 @@
"after": "changers and shakers"
}
],
+ "https://www.parahumans.net/2019/09/15/from-within-16-10/": [
+ {
+ "before": "carried on. out of",
+ "after": "carried on, out of"
+ }
+ ],
"https://www.parahumans.net/2019/10/01/sundown-17-1/": [
{
"before": "mind of dad after his head injury",
@@ -4982,16 +5264,32 @@
"after": "Because it made Mom come"
}
],
+ "https://www.parahumans.net/2019/11/12/interlude-17-z-sundown/": [
+ {
+ "before": "had his powers. the forcefield above",
+ "after": "had his powers. The forcefield above"
+ }
+ ],
"https://www.parahumans.net/2019/11/19/radiation-18-2/": [
{
"before": "looking for mom in the crowd",
"after": "looking for Mom in the crowd"
}
],
+ "https://www.parahumans.net/2019/11/23/radiation-18-3/": [
+ {
+ "before": "in the eyes. they were a",
+ "after": "in the eyes. They were a"
+ }
+ ],
"https://www.parahumans.net/2019/11/26/radiation-18-4/": [
{
"before": "bucked a little with the the impact",
"after": "bucked a little with the impact"
+ },
+ {
+ "before": "you go. no joke, no wiggle",
+ "after": "you go. No joke, no wiggle"
}
],
"https://www.parahumans.net/2019/12/07/radiation-18-7/": [
@@ -5028,6 +5326,12 @@
"after": "so Mom doesn’t end up alone"
}
],
+ "https://www.parahumans.net/2020/02/02/infrared-19-7/": [
+ {
+ "before": "I really wasn’t. really",
+ "after": "I really wasn’t. Really"
+ }
+ ],
"https://www.parahumans.net/2020/02/04/infrared-19-8/": [
{
"before": "Lessons from mom, age thirteen",
@@ -5058,6 +5362,22 @@
{
"before": "thinking about mom getting on my case",
"after": "thinking about Mom getting on my case"
+ },
+ {
+ "before": "figure out while she was gone. we’ll see who",
+ "after": "figure out while she was gone. We’ll see who"
+ }
+ ],
+ "https://www.parahumans.net/2020/02/25/last-20-1/": [
+ {
+ "before": "across this clearing. eyes, cameras",
+ "after": "across this clearing. Eyes, cameras"
+ }
+ ],
+ "https://www.parahumans.net/2020/03/31/last-20-10/": [
+ {
+ "before": "shaping it as it rolled out. it became a circular",
+ "after": "shaping it as it rolled out. It became a circular"
}
],
"https://www.parahumans.net/2020/04/21/last-20-e4/": [
@@ -5122,6 +5442,10 @@
{
"before": "he mom shook her head",
"after": "her mom shook her head"
+ },
+ {
+ "before": "with black slacks. his hair was chin",
+ "after": "with black slacks. His hair was chin"
}
]
}
From 68eabd0ca03c1053c773fd0036c9b84107e2e1a0 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 20:57:01 -0400
Subject: [PATCH 006/186] Fix incorrect over-substitution of "TV" in place of
"tv"
Ward contains several instances of words that contain "tv", which shouldn't get capitalized.
---
lib/convert.js | 2 +-
lib/substitutions.json | 6 ++++++
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/lib/convert.js b/lib/convert.js
index 8a989c7..7897f88 100644
--- a/lib/convert.js
+++ b/lib/convert.js
@@ -368,7 +368,7 @@ function getBodyXML(chapter, contentEl) {
xml = xml.replace(/the birdcage/g, "the Birdcage");
// This is usually spelled "TV" but sometimes the other ways. Normalize.
- xml = xml.replace(/tv/g, "TV");
+ xml = xml.replace(/(\b)tv(\b)/g, "$1TV$2");
xml = xml.replace(/T\.V\./g, "TV");
// There's no reason why these should be capitalized. (Note that they never appear at the beginning of any sentences.)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 1501418..6ccc84c 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4683,6 +4683,12 @@
"after": "lost my grip on him, and my hand"
}
],
+ "https://www.parahumans.net/2018/04/07/shadow-interlude-5-x/": [
+ {
+ "before": "objectvity",
+ "after": "objectivity"
+ }
+ ],
"https://www.parahumans.net/2018/04/21/pitch-6-3/": [
{
"before": "wall of bodies. the woman behind",
From eebd6cb66926fc838f1ddb32b58a7dc00179a41e Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 21:03:55 -0400
Subject: [PATCH 007/186] Spot fixes for Ward through Shadow 5.12
---
lib/substitutions.json | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 6ccc84c..29b4ac5 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4669,18 +4669,38 @@
{
"before": "forearm and her fingertips. and she stumbled into",
"after": "forearm and her fingertips, and she stumbled into"
+ },
+ {
+ "before": "and nearby bowled over",
+ "after": "and nearly bowled over"
+ },
+ {
+ "before": "There’s—Careful, Victoria.",
+ "after": "There’s— Careful, Victoria."
}
],
"https://www.parahumans.net/2018/03/31/shadow-5-11/": [
{
"before": "side of her her chin",
"after": "side of her chin"
+ },
+ {
+ "before": "Plus my walkman and pocket atari",
+ "after": "Plus my Walkman and Pocket Atari"
+ },
+ {
+ "before": "The woman—I turned to look and saw",
+ "after": "The woman— I turned to look and saw"
}
],
"https://www.parahumans.net/2018/04/03/shadow-5-12/": [
{
"before": "lost my grip on him. and my hand",
"after": "lost my grip on him, and my hand"
+ },
+ {
+ "before": "Tristan-as Capricorn",
+ "after": "Tristan-as-Capricorn"
}
],
"https://www.parahumans.net/2018/04/07/shadow-interlude-5-x/": [
From 6848456bc92668c2574723bd5ecc718035599430 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 21:14:06 -0400
Subject: [PATCH 008/186] Fix overcapitalizations of "master"
---
lib/substitutions.json | 86 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 86 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 29b4ac5..cc2b8fd 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2755,6 +2755,14 @@
{
"before": "Kayden,” Theo said. “You’re",
"after": "Kayden,” Theo said, “you’re"
+ },
+ {
+ "before": "about how Masters tend to have",
+ "after": "about how masters tend to have"
+ },
+ {
+ "before": "that was why Masters tend to be",
+ "after": "that was why masters tend to be"
}
],
"https://parahumans.wordpress.com/2013/02/07/interlude-18-donation-bonus-3/": [
@@ -3532,6 +3540,10 @@
{
"before": "This,” Imp said. “Is",
"after": "This,” Imp said, “is"
+ },
+ {
+ "before": "kill all of the Masters that are generating",
+ "after": "kill all of the masters that are generating"
}
],
"https://parahumans.wordpress.com/2013/08/03/sting-26-6/": [
@@ -4677,6 +4689,14 @@
{
"before": "There’s—Careful, Victoria.",
"after": "There’s— Careful, Victoria."
+ },
+ {
+ "before": "Hypnotist Master",
+ "after": "Hypnotist master"
+ },
+ {
+ "before": "with Masters in mind",
+ "after": "with masters in mind"
}
],
"https://www.parahumans.net/2018/03/31/shadow-5-11/": [
@@ -4701,6 +4721,10 @@
{
"before": "Tristan-as Capricorn",
"after": "Tristan-as-Capricorn"
+ },
+ {
+ "before": "were Masters and strangers",
+ "after": "were masters and strangers"
}
],
"https://www.parahumans.net/2018/04/07/shadow-interlude-5-x/": [
@@ -4719,6 +4743,10 @@
{
"before": "glanced back. the others had",
"after": "glanced back. The others had"
+ },
+ {
+ "before": "Animal Master, likely",
+ "after": "Animal master, likely"
}
],
"https://www.parahumans.net/2018/04/28/pitch-6-5/": [
@@ -4752,6 +4780,10 @@
{
"before": "those hectic days after dad’s head injury",
"after": "those hectic days after Dad’s head injury"
+ },
+ {
+ "before": "no Master pets",
+ "after": "no master pets"
}
],
"https://www.parahumans.net/2018/05/26/eclipse-x-1/": [
@@ -4884,6 +4916,10 @@
{
"before": "Chris in the process. stretched on his way",
"after": "Chris in the process. He stretched on his way"
+ },
+ {
+ "before": "with a strong Master",
+ "after": "with a strong master"
}
],
"https://www.parahumans.net/2018/09/11/gleaming-9-7/": [
@@ -4956,6 +4992,12 @@
"after": "confines of the suit, and rearranged herself"
}
],
+ "https://www.parahumans.net/2018/11/27/polarize-10-10/": [
+ {
+ "before": "all about how Masters have",
+ "after": "all about how masters have"
+ }
+ ],
"https://www.parahumans.net/2018/12/01/polarize-10-11/": [
{
"before": "See dad, sleep.",
@@ -5100,6 +5142,12 @@
"after": "walls stone and wood. The building"
}
],
+ "https://www.parahumans.net/2019/04/20/black-13-6/": [
+ {
+ "before": "I’m a Master, right, you",
+ "after": "I’m a master, right, you"
+ }
+ ],
"https://www.parahumans.net/2019/04/23/black-13-7/": [
{
"before": "looked like they were were painted on",
@@ -5154,6 +5202,14 @@
{
"before": "smirked, leering. her face was stretching",
"after": "smirked, leering. Her face was stretching"
+ },
+ {
+ "before": "But Master–stranger protocols",
+ "after": "But master–stranger protocols"
+ },
+ {
+ "before": "allusion to Master–stranger protocols",
+ "after": "allusion to master–stranger protocols"
}
],
"https://www.parahumans.net/2019/07/02/dying-15-a/": [
@@ -5190,6 +5246,12 @@
"after": "worth of water"
}
],
+ "https://www.parahumans.net/2019/07/27/dying-15-7/": [
+ {
+ "before": "stranger or Master in",
+ "after": "stranger or master in"
+ }
+ ],
"https://www.parahumans.net/2019/07/30/dying-15-8/": [
{
"before": "I thought. the dosage was supposed",
@@ -5260,6 +5322,12 @@
"after": "egg for Crystal and Mom"
}
],
+ "https://www.parahumans.net/2019/10/12/sundown-17-4/": [
+ {
+ "before": "on resisting Master influence",
+ "after": "on resisting master influence"
+ }
+ ],
"https://www.parahumans.net/2019/10/15/sundown-17-5/": [
{
"before": "Is mom walking without difficulty",
@@ -5318,6 +5386,12 @@
"after": "you go. No joke, no wiggle"
}
],
+ "https://www.parahumans.net/2019/12/03/radiation-18-6/": [
+ {
+ "before": "Some Master minions",
+ "after": "Some master minions"
+ }
+ ],
"https://www.parahumans.net/2019/12/07/radiation-18-7/": [
{
"before": "Couple of of times a month",
@@ -5338,6 +5412,18 @@
{
"before": "Hey mom? Mom",
"after": "Hey Mom? Mom"
+ },
+ {
+ "before": "or heard of the Master who had created",
+ "after": "or heard of the master who had created"
+ },
+ {
+ "before": "pretty sure it’s Master",
+ "after": "pretty sure it’s master"
+ },
+ {
+ "before": "Love Lost is Master-class",
+ "after": "Love Lost is master-class"
}
],
"https://www.parahumans.net/2020/01/25/infrared-19-d/": [
From ad2c0b0ca5c22bea95d7e898d85388c54c724863 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 21:41:12 -0400
Subject: [PATCH 009/186] Improve conversion speed using a worker pool
---
README.md | 2 +-
lib/convert-worker.js | 473 ++++++++++++++++++++++++++++++++++++++++
lib/convert.js | 497 ++----------------------------------------
lib/worm-scraper.js | 5 +-
npm-shrinkwrap.json | 10 +-
package.json | 2 +-
6 files changed, 501 insertions(+), 488 deletions(-)
create mode 100644 lib/convert-worker.js
diff --git a/README.md b/README.md
index 3987306..19f8e7a 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,6 @@ Alternately, if you are a developer, a pull request adding support for MOBI outp
## Text fixups
-This project makes a lot of fixups to the original text, mostly around typos, punctuation, capitalization, and consistency. You can get a more specific idea of what these are via the code; there's [`convert.js`](https://github.com/domenic/worm-scraper/blob/master/lib/convert.js), where some things are handled generally, and [`substitutions.json`](https://github.com/domenic/worm-scraper/blob/master/lib/substitutions.json), for one-off fixes.
+This project makes a lot of fixups to the original text, mostly around typos, punctuation, capitalization, and consistency. You can get a more specific idea of what these are via the code; there's [`convert-worker.js`](https://github.com/domenic/worm-scraper/blob/master/lib/convert-worker.js), where some things are handled generally, and [`substitutions.json`](https://github.com/domenic/worm-scraper/blob/master/lib/substitutions.json), for one-off fixes.
This process is designed to be extensible, so if you notice any problems with the original text that you think should be fixed, file an issue to let me know, and we can update the fixup code so that the resulting eBook is improved. (Or better yet, send a pull request!)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
new file mode 100644
index 0000000..ee5c02f
--- /dev/null
+++ b/lib/convert-worker.js
@@ -0,0 +1,473 @@
+"use strict";
+const workerpool = require("workerpool");
+const fs = require("fs");
+const { JSDOM } = require("jsdom");
+const substitutions = require("./substitutions.json");
+
+workerpool.worker({ convertChapter });
+
+function convertChapter(chapter, inputPath, outputPath) {
+ const contents = fs.readFileSync(inputPath, { encoding: "utf-8" });
+
+ const rawChapterJSDOM = new JSDOM(contents);
+ const output = getChapterString(chapter, rawChapterJSDOM.window.document);
+
+ // TODO: this should probably not be necessary... jsdom bug I guess!?
+ rawChapterJSDOM.window.close();
+
+ fs.writeFileSync(outputPath, output);
+}
+
+function getChapterString(chapter, rawChapterDoc) {
+ const body = getBodyXML(chapter, rawChapterDoc.querySelector(".entry-content"));
+
+ return `
+
+
+
+
+ ${chapter.title}
+
+${body}
+`;
+}
+
+function getBodyXML(chapter, contentEl) {
+ // Remove initial Next Chapter and Previous Chapter
+ contentEl.removeChild(contentEl.firstElementChild);
+
+ // Remove everything after the last
(e.g. analytics
s)
+ const lastP = contentEl.querySelector("p:last-of-type");
+ while (contentEl.lastElementChild !== lastP) {
+ contentEl.removeChild(contentEl.lastElementChild);
+ }
+
+ // Remove empty
s or Last Chapter/Next Chapter
s
+ while (isEmptyOrGarbage(contentEl.lastElementChild)) {
+ contentEl.removeChild(contentEl.lastElementChild);
+ }
+
+ // Remove redundant attributes and style
+ Array.prototype.forEach.call(contentEl.children, child => {
+ if (child.getAttribute("dir") === "ltr") {
+ child.removeAttribute("dir");
+ }
+
+ // Only ever appears with align="LEFT" (useless) or align="CENTER" overridden by style="text-align: left;" (also
+ // useless)
+ child.removeAttribute("align");
+
+ if (child.getAttribute("style") === "text-align:left;") {
+ child.removeAttribute("style");
+ }
+ if (child.getAttribute("style") === "text-align:left;padding-left:30px;") {
+ child.setAttribute("style", "padding-left:30px;");
+ }
+ });
+
+ // Remove empty s and s
+ // Remove style attributes from them, as they're always messed up.
+ const ems = contentEl.querySelectorAll("em, i");
+ Array.prototype.forEach.call(ems, em => {
+ if (em.textContent.trim() === "") {
+ const replacement = contentEl.ownerDocument.createTextNode(" ");
+ em.parentNode.replaceChild(replacement, em);
+ } else {
+ em.removeAttribute("style");
+ }
+ });
+
+ // In https://parahumans.wordpress.com/2013/01/05/monarch-16-13/ there are some s that should be s O_o
+ const addresses = contentEl.querySelectorAll("address");
+ Array.prototype.forEach.call(addresses, address => {
+ const p = contentEl.ownerDocument.createElement("p");
+ p.innerHTML = address.innerHTML;
+ address.parentNode.replaceChild(p, address);
+ });
+
+ // Every except underline ones is pointless at best and frequently messed up. (Weird font size, line spacing,
+ // etc.)
+ const spans = contentEl.querySelectorAll("span");
+ Array.prototype.forEach.call(spans, span => {
+ if (span.getAttribute("style") === "text-decoration:underline;") {
+ return;
+ }
+
+ if (span.textContent.trim() === "") {
+ span.parentNode.removeChild(span);
+ } else {
+ const docFrag = contentEl.ownerDocument.createDocumentFragment();
+ while (span.firstChild) {
+ docFrag.appendChild(span.firstChild);
+ }
+ span.parentNode.replaceChild(docFrag, span);
+ }
+ });
+
+ // In Ward, CloudFlare email protection obfuscates the email addresses:
+ // https://usamaejaz.com/cloudflare-email-decoding/
+ for (const emailEl of contentEl.querySelectorAll("[data-cfemail]")) {
+ const decoded = decodeCloudFlareEmail(emailEl.dataset.cfemail);
+ emailEl.replaceWith(contentEl.ownerDocument.createTextNode(decoded));
+ }
+
+ // Synthesize a tag to serialize
+ const bodyEl = contentEl.ownerDocument.createElement("body");
+
+ const h1El = contentEl.ownerDocument.createElement("h1");
+ h1El.textContent = chapter.title;
+ bodyEl.appendChild(h1El);
+
+ const comment = contentEl.ownerDocument.createComment(chapter.url);
+ bodyEl.appendChild(comment);
+
+ while (contentEl.firstChild) {
+ bodyEl.appendChild(contentEl.firstChild);
+ }
+
+ const xmlSerializer = new contentEl.ownerDocument.defaultView.XMLSerializer();
+ let xml = xmlSerializer.serializeToString(bodyEl);
+
+ // Fix recurring strange pattern of extra
in ......
\n
+ xml = xml.replace(/
\s*<\/em><\/p>/g, "
");
+
+ // There are way too many nonbreaking spaces where they don't belong.
+ // If they show up three in a row, then let them live. Otherwise, they die.
+ // Also remove any run of them after a period.
+ xml = xml.replace(/([^\xA0])\xA0\xA0?([^\xA0])/g, "$1 $2");
+ xml = xml.replace(/\.\xA0+\s*/, ". ");
+
+ function fixEms() {
+ // Fix recurring broken-up or erroneous
s
+ xml = xml.replace(/<\/em>‘s/g, "’s");
+ xml = xml.replace(/
<\/em>/g, "");
+ xml = xml.replace(/<\/em>/g, "");
+ xml = xml.replace(/(\s?\s?[^A-Za-z]\s?\s?)<\/em>/g, "$1");
+ xml = xml.replace(/<\/em>(\s?\s?[^A-Za-z]\s?\s?)/g, "$1");
+ xml = xml.replace(/“([^>]+)<\/em>(!|\?|\.)”/g, "“$1$2”");
+ xml = xml.replace(/([^>]+)<\/em>(!|\?|\.)<\/p>/g, "$1$2
");
+ xml = xml.replace(/(!|\?|\.)\s{2}<\/em><\/p>/g, "$1
");
+ xml = xml.replace(/([a-z]+)(\?|\.)<\/em>/g, "$1$2");
+ xml = xml.replace(/([^>]+?)( +)<\/em>/g, "$1$2");
+ xml = xml.replace(/ ([a-zA-Z]+)<\/em>/g, " $1");
+ xml = xml.replace(/‘\s*([^<]+)\s*’<\/em>/g, "‘$1’");
+ xml = xml.replace(/‘\s*([^<]+)\s*<\/em>\s*’/g, "‘$1’");
+ xml = xml.replace(/‘\s*\s*([^<]+)\s*’<\/em>/g, "‘$1’");
+ xml = xml.replace(/“\s*([^<]+)\s*”<\/em>/g, "“$1”");
+ xml = xml.replace(/“\s*([^<]+)\s*<\/em>\s*”/g, "“$1”");
+ xml = xml.replace(/“\s*\s*([^<]+)\s*”<\/em>/g, "“$1”");
+ xml = xml.replace(/([^\n>]) ?/g, "$1 ");
+ xml = xml.replace(/ ?<\/em>/g, " ");
+ xml = xml.replace(/]+)> /g, "");
+ xml = xml.replace(/<\/em> <\/p>/g, "
");
+ xml = xml.replace(/([a-z]+),<\/em>/g, "$1,");
+ }
+
+ function fixQuotesAndApostrophes() {
+ // Fix recurring poor quotes and apostrophes
+ xml = xml.replace(/”/g, "
“");
+ xml = xml.replace(/“\s*<\/p>/g, "”
");
+ xml = xml.replace(/“\s*<\/em><\/p>/g, "”
");
+ xml = xml.replace(/‘\s*<\/p>/g, "’");
+ xml = xml.replace(/‘\s*<\/em><\/p>/g, "’");
+ xml = xml.replace(/,” <\/em>/g, ",” ");
+ xml = xml.replace(/′/g, "’");
+ xml = xml.replace(/″/g, "”");
+ xml = xml.replace(/([A-Za-z])‘s(\s?)/g, "$1’s$2");
+ xml = xml.replace(/I‘m/g, "I’m");
+ xml = xml.replace(/“\s+/g, "
“");
+ xml = xml.replace(/'/g, "’");
+ xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’");
+ xml = xml.replace(/‘Sup/g, "’Sup");
+ }
+
+ // These interact with each other, so do them a few times.
+ xml = xml.replace(/,” <\/em>/g, "
,” ");
+ fixEms();
+ fixQuotesAndApostrophes();
+ fixEms();
+ fixQuotesAndApostrophes();
+ fixEms();
+
+ // Similar problems occur in Ward with and as do in Worm with s
+ xml = xml.replace(//g, "");
+ xml = xml.replace(/(\s*
\s*)<\/b>/g, "$1");
+ xml = xml.replace(/(\s*
\s*)<\/strong>/g, "$1");
+ xml = xml.replace(/<\/strong>(\s*)/g, "$1");
+ xml = xml.replace(/@<\/strong>/g, "@");
+ xml = xml.replace(/
(\s*)<\/strong>/g, "
$1");
+ xml = xml.replace(/(\s*)<\/strong>/g, "$1");
+
+ // No need for line breaks before paragraph ends
+ // These often occur with the
s inside / fixed above.
+ xml = xml.replace(/
\s*<\/p>/g, "");
+
+ // Fix possessive of names ending in "s"
+ // Note: if the "s" is unvoiced, as in Marquis, then it doesn't get the second "s".
+ xml = xml.replace(/([^‘])Judas’([^s])/g, "$1Judas’s$2");
+ xml = xml.replace(/([^‘])Brutus’([^s])/g, "$1Brutus’s$2");
+ xml = xml.replace(/([^‘])Jess’([^s])/g, "$1Jess’s$2");
+ xml = xml.replace(/([^‘])Aegis’([^s])/g, "$1Aegis’s$2");
+ xml = xml.replace(/([^‘])Dauntless’([^s])/g, "$1Dauntless’s$2");
+ xml = xml.replace(/([^‘])Circus’([^s])/g, "$1Circus’s$2");
+ xml = xml.replace(/([^‘])Sirius’([^s])/g, "$1Sirius’s$2");
+ xml = xml.replace(/([^‘])Brooks’([^s])/g, "$1Brooks’s$2");
+ xml = xml.replace(/([^‘])Genesis’([^s])/g, "$1Genesis’s$2");
+ xml = xml.replace(/([^‘])Atlas’([^s])/g, "$1Atlas’s$2");
+ xml = xml.replace(/([^‘])Lucas’([^s])/g, "$1Lucas’s$2");
+ xml = xml.replace(/([^‘])Gwerrus’([^s])/g, "$1Gwerrus’s$2");
+ xml = xml.replace(/([^‘])Chris’([^s])/g, "$1Chris’s$2");
+ xml = xml.replace(/([^‘])Eligos’([^s])/g, "$1Eligos’s$2");
+ xml = xml.replace(/([^‘])Animos’([^s])/g, "$1Animos’s$2");
+ xml = xml.replace(/([^‘])Mags’([^s])/g, "$1Mags’s$2");
+ xml = xml.replace(/([^‘])Huntress’([^s])/g, "$1Huntress’s$2");
+ xml = xml.replace(/([^‘])Hephaestus’([^s])/g, "$1Hephaestus’s$2");
+ xml = xml.replace(/([^‘])Lord of Loss’([^s])/g, "$1Lord of Loss’s$2");
+ xml = xml.replace(/([^‘])John Combs’([^s])/g, "$1John Combs’s$2");
+ xml = xml.replace(/([^‘])Mama Mathers’([^s])/g, "$1Mama Mathers’s$2");
+ xml = xml.replace(/([^‘])Monokeros’([^s])/g, "$1Monokeros’s$2");
+ xml = xml.replace(/([^‘])Goddess’([^s])/g, "$1Goddess’s$2");
+ xml = xml.replace(/([^‘])Boundless’([^s])/g, "$1Boundless’s$2");
+ xml = xml.replace(/([^‘])Paris’([^s])/g, "$1Paris’s$2");
+ xml = xml.replace(/([^‘])Tress’([^s])/g, "$1Tress’s$2");
+ xml = xml.replace(/([^‘])Harris’([^s])/g, "$1Harris’s$2");
+ xml = xml.replace(/([^‘])Antares’([^s])/g, "$1Antares’s$2");
+ xml = xml.replace(/([^‘])Nieves’([^s])/g, "$1Nieves’s$2");
+ xml = xml.replace(/([^‘])Backwoods’([^s])/g, "$1Backwoods’s$2");
+ xml = xml.replace(/([^‘])Midas’([^s])/g, "$1Midas’s$2");
+ xml = xml.replace(/([^‘])Mrs. Sims’([^s])/g, "$1Mrs. Sims’s$2");
+ xml = xml.replace(/([^‘])Ms. Stillons’([^s])/g, "$1Ms. Stillons’s$2");
+ xml = xml.replace(/([^‘])Chuckles’([^s])/g, "$1Chuckles’s$2");
+
+ // Fixes dashes
+ xml = xml.replace(/ – /g, "—");
+ xml = xml.replace(/“((?:)?)-/g, "“$1—");
+ xml = xml.replace(/-[,.]?”/g, "—”");
+ xml = xml.replace(/-(!|\?)”/g, "—$1”");
+ xml = xml.replace(/-[,.]?<\/em>”/g, "—”");
+ xml = xml.replace(/-“/g, "—”");
+ xml = xml.replace(/-/g, "
—");
+ xml = xml.replace(/-<\/p>/g, "—
");
+ xml = xml.replace(/-<\/em><\/p>/g, "—");
+ xml = xml.replace(/\s?\s?–\s?\s?/g, "—");
+ xml = xml.replace(/-\s\s?/g, "—");
+ xml = xml.replace(/\s?\s-/g, "—");
+ xml = xml.replace(/\s+—”/g, "—”");
+ xml = xml.replace(/I-I/g, "I—I");
+ xml = xml.replace(/I-uh/g, "I—uh");
+
+ // Joint names should use em dashes
+ xml = xml.replace(/Dallon-Pelham/g, "Dallon–Pelham");
+ xml = xml.replace(/Bet-Gimel/g, "Bet–Gimel");
+ xml = xml.replace(/Tristan-Capricorn/g, "Tristan–Capricorn");
+ xml = xml.replace(/Capricorn-Byron/g, "Capricorn–Byron");
+ xml = xml.replace(/Tristan-Byron/g, "Tristan–Byron");
+ xml = xml.replace(/Earth-Gimel/g, "Earth–Gimel");
+ xml = xml.replace(/Gimel-Europe/g, "Gimel–Europe");
+ xml = xml.replace(/Imp-Damsel/g, "Imp–Damsel");
+ xml = xml.replace(/Damsel-Ashley/g, "Damsel–Ashley");
+ xml = xml.replace(/Antares-Anelace/g, "Antares–Anelace");
+ xml = xml.replace(/Challenger-Gallant/g, "Challenger–Gallant");
+ xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/g, "Undersider$1–$2");
+ xml = xml.replace(/Norwalk-Fairfield/g, "Norwalk–Fairfield");
+ xml = xml.replace(/East-West/g, "east–west");
+ xml = xml.replace(/(Green|Yellow)-Black/g, "$1–Black");
+ xml = xml.replace(/Creutzfeldt-Jakob/g, "Creutzfeldt–Jakob");
+ xml = xml.replace(/Astaroth-Nidhug/g, "Astaroth–Nidhug");
+ xml = xml.replace(/Capulet-Montague/g, "Capulet–Montague");
+ xml = xml.replace(/Weaver-Clockblocker/g, "Weaver–Clockblocker");
+ xml = xml.replace(/Alexandria-Pretender/g, "Alexandria–Pretender");
+ xml = xml.replace(/Night Hag-Nyx/g, "Night Hag–Nyx");
+ xml = xml.replace(/Crawler-Breed/g, "Crawler–Breed");
+ xml = xml.replace(/Simurgh-Myrddin-plant/g, "Simurgh–Myrddin–plant");
+ xml = xml.replace(/Armsmaster-Defiant/g, "Armsmaster–Defiant");
+
+ // Use
for separators
+ // https://www.parahumans.net/2019/12/21/interlude-18-z-radiation/ has "super-separators" ("⊙ ⊙ ⊙ ⊙ ⊙") which we
+ // leave untouched for now.
+ xml = xml.replace(/■<\/p>/g, "
");
+ xml = xml.replace(/■<\/p>/g, "
");
+ xml = xml.replace(/⊙<\/p>/g, "
");
+ xml = xml.replace(/⊙<\/strong><\/p>/g, "
");
+ xml = xml.replace(/⊙<\/strong><\/em><\/p>/g, "
");
+ xml = xml.replace(/⊙⊙<\/strong><\/p>/g, "
");
+
+ // Fix recurring miscapitalization with questions
+ xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");
+ xml = xml.replace(/\?”\s\s?He asked/g, "?” he asked");
+
+ // Fix bad periods and spacing/markup surrounding them
+ xml = xml.replace(/\.\.<\/p>/g, ".
");
+ xml = xml.replace(/\.\.”<\/p>/g, ".”
");
+ xml = xml.replace(/ \. /g, ". ");
+ xml = xml.replace(/ \.<\/p>/g, ".
");
+ xml = xml.replace(/\.\.\./g, "…");
+
+ // Fix extra spaces
+ xml = xml.replace(/ ? <\/p>/g, "");
+ xml = xml.replace(/([a-z]) ,/g, "$1,");
+
+ // The author often fails to terminate a sentence, instead using a comma after a dialogue tag. For example,
+ // > “I didn’t get much done,” Greg said, “I got distracted by...
+ // This should instead be
+ // > “I didn’t get much done,” Greg said. “I got distracted by...
+ //
+ // Our heuristic is to try to automatically fix this if the dialogue tag is two words (X said/admitted/sighed/etc.).
+ //
+ // This sometimes overcorrects, as in the following example:
+ // > “Basically,” Alec said, “For your powers to manifest, ...
+ // Here instead we should lowercase the "f". We handle that via one-offs in substitutions.json.
+ //
+ // This applies to ~800 instances, so although we have to correct back in substitutions.json a decent number of
+ // times, it definitely pays for itself. Most of the instances we have to correct back we also need to fix the
+ // capitalization anyway, and that's harder to do automatically, since proper names/"I"/etc. stay capitalized.
+ xml = xml.replace(/,” ([A-Za-z]+ [A-Za-z]+), “([A-Z])/g, ",” $1. “$2");
+
+ // Replace single-word s with s. Other s are probably erroneous too, but these are known-bad.
+ xml = xml.replace(/([A-Za-z]+)<\/i>/g, "$1");
+
+ // This occurs enough times it's better to do here than in one-off fixes. We correct the single instance where
+ // it's incorrect to capitalize in the one-off fixes.
+ // Note that Ward contains much talk of "the clairvoyants", so we don't want to capitalize plurals.
+ xml = xml.replace(/the clairvoyant([^s])/g, "the Clairvoyant$1");
+
+ // ReSound's name is sometimes miscapitalized. The word is never used in a non-name context.
+ xml = xml.replace(/Resound/g, "ReSound");
+
+ // "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". I can see
+ // arguments for any of them, so let's go with the most prevalent: "patrol block".
+ xml = xml.replace(/([^ ]) Patrol (?:B|b)lock/g, "$1 patrol block");
+
+ // This is sometimes missing its capitalization.
+ xml = xml.replace(/the birdcage/g, "the Birdcage");
+
+ // This is usually spelled "TV" but sometimes the other ways. Normalize.
+ xml = xml.replace(/(\b)tv(\b)/g, "$1TV$2");
+ xml = xml.replace(/T\.V\./g, "TV");
+
+ // There's no reason why these should be capitalized. (Note that they never appear at the beginning of any sentences.)
+ xml = xml.replace(/Halberd/g, "halberd");
+ xml = xml.replace(/Loft/g, "loft");
+
+ // Especially early in the story, PRT designations are capitalized; they should not be. This fixes the cases where we
+ // can be reasonably sure they don't start a sentence, although more specific instances are done in
+ // substitutions.json, and some need to be back-corrected.
+ //
+ // Note: "Master" is specifically omitted because it fails poorly on Interlude 4. Other instances need to be
+ // corrected via substitutions.json.
+ xml = xml.replace(
+ /([a-zA-Z,] |\/)(Mover|Shaker|Brute|Breaker|Tinker|Blaster|Thinker|Striker|Changer|Trump|Stranger|Shifter|Shaper)/g,
+ (_, prefix, designation) => prefix + designation.toLowerCase()
+ );
+ xml = xml.replace(
+ /(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)-(\d+)/gi,
+ "$1 $2"
+ );
+ xml = xml.replace(
+ // eslint-disable-next-line max-len
+ /(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)[ -/](mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)/gi,
+ "$1–$2"
+ );
+
+ // This is consistently missing accents
+ xml = xml.replace(/Yangban/g, "Yàngbǎn");
+
+ // Place names need to always be capitalized
+ xml = xml.replace(/North end/g, "North End");
+ xml = xml.replace(/Stonemast avenue/g, "Stonemast Avenue");
+ xml = xml.replace(/Shale avenue/g, "Shale Avenue");
+ xml = xml.replace(/Lord street/g, "Lord Street");
+ xml = xml.replace(/Slater street/g, "Slater Street");
+ xml = xml.replace(/Hollow point/g, "Hollow Point");
+ xml = xml.replace(/Cedar point/g, "Cedar Point");
+
+ // These are usually not italicized, but sometimes are. Other foreign-language names (like Yàngbǎn) are not
+ // italicized, so we go in the direction of removing the italics.
+ xml = xml.replace(/Garama<\/em>/g, "Garama");
+ xml = xml.replace(/Thanda<\/em>/g, "Thanda");
+ xml = xml.replace(/Sifara([^<]*)<\/em>/g, "Sifara$1");
+ xml = xml.replace(/Moord Nag([^<]*)<\/em>/g, "Moord Nag$1");
+ xml = xml.replace(/Califa de Perro([^<]*)<\/em>/g, "Califa de Perro$1");
+ xml = xml.replace(/Turanta([^<]*)<\/em>/g, "Turanta$1");
+
+ // "okay" is preferred to "ok". This sometimes gets changed back via substitutions.json when people are writing notes
+ // and thus probably the intention was to be less formal. Also it seems per https://en.wikipedia.org/wiki/A-ok the
+ // "A" in "A-okay" should be capitalized.
+ xml = xml.replace(/Ok([,. ])/g, "Okay$1");
+ xml = xml.replace(/([^a-zA-Z])ok([^a])/g, "$1okay$2");
+ xml = xml.replace(/a-okay/g, "A-okay");
+
+ // Signal(l)ing/signal(l)ed are spelled both ways. Both are acceptable in English. Let's standardize on single-L.
+ xml = xml.replace(/(S|s)ignall/g, "$1ignal");
+
+ // Clich(e|é) is spelled both ways. Let's standardize on including the accent.
+ xml = xml.replace(/cliche/g, "cliché");
+
+ // "gray" is the majority spelling, except for "greyhound"
+ xml = xml.replace(/(G|g)rey(?!hound)/g, "$1ray");
+
+ // "Mom" and "Dad" should be capitalized when used as a proper name. These regexps are tuned to catch a good amount of
+ // instances, without over-correcting for non-proper-name-like cases. Many other instances are handled in
+ // substitutions.json.
+ xml = xml.replace(/(? {
+ if (substitution.before) {
+ const indexOf = xml.indexOf(substitution.before);
+ if (indexOf === -1) {
+ console.warn(`Could not find text "${substitution.before}" in ${chapter.url}. The chapter may have been ` +
+ `updated at the source, in which case, you should edit substitutions.json.`);
+ }
+ if (indexOf !== xml.lastIndexOf(substitution.before)) {
+ console.warn(`The text "${substitution.before}" occurred twice, and so the substitution was ambiguous. ` +
+ `Update substitutions.json for a more precise substitution.`);
+ }
+
+ xml = xml.replace(new RegExp(escapeRegExp(substitution.before)), substitution.after);
+ } else if (substitution.regExp) {
+ xml = xml.replace(new RegExp(substitution.regExp, "g"), substitution.replacement);
+ } else {
+ console.warn(`Invalid substitution specified for ${chapter.url}`);
+ }
+ });
+
+ // Serializer inserts extra xmlns for us since it doesn't know we're going to put this into a
+ xml = xml.replace(//, "");
+
+ return xml;
+}
+
+function isEmptyOrGarbage(el) {
+ const text = el.textContent.trim();
+ return text === "" ||
+ text.startsWith("Last Chapter") ||
+ text.startsWith("Previous Chapter") ||
+ text.startsWith("Next Chapter");
+}
+
+function escapeRegExp(str) {
+ return str.replace(/[-[\]/{}()*+?.\\^$|]/g, "\\$&");
+}
+
+function decodeCloudFlareEmail(hash) {
+ let email = "";
+ const xorWithThis = parseInt(hash.substring(0, 2), 16);
+ for (let i = 2; i < hash.length; i += 2) {
+ const charCode = parseInt(hash.substring(i, i + 2), 16) ^ xorWithThis;
+ email += String.fromCharCode(charCode);
+ }
+
+ return email;
+}
diff --git a/lib/convert.js b/lib/convert.js
index 7897f88..8ff6509 100644
--- a/lib/convert.js
+++ b/lib/convert.js
@@ -1,10 +1,8 @@
"use strict";
const path = require("path");
const fs = require("fs").promises;
-const throat = require("throat");
-const { JSDOM } = require("jsdom");
+const workerpool = require("workerpool");
const cliProgress = require("cli-progress");
-const substitutions = require("./substitutions.json");
module.exports = async (cachePath, manifestPath, contentPath, concurrentJobs) => {
const manifestContents = await fs.readFile(manifestPath, { encoding: "utf-8" });
@@ -17,483 +15,24 @@ module.exports = async (cachePath, manifestPath, contentPath, concurrentJobs) =>
}, cliProgress.Presets.shades_classic);
progress.start(chapters.length, 0);
- const mapper = throat(concurrentJobs, async chapter => {
- await convertChapter(chapter, cachePath, contentPath);
+ const poolOptions = {};
+ if (concurrentJobs !== undefined) {
+ poolOptions.maxWorkers = concurrentJobs;
+ }
+ const pool = workerpool.pool(path.resolve(__dirname, "convert-worker.js"), poolOptions);
+
+ await Promise.all(chapters.map(async chapter => {
+ const inputPath = path.resolve(cachePath, chapter.filename);
+
+ const destFileName = `${path.basename(chapter.filename, ".html")}.xhtml`;
+ const outputPath = path.resolve(contentPath, destFileName);
+
+ await pool.exec("convertChapter", [chapter, inputPath, outputPath]);
+
progress.increment();
- });
- await Promise.all(chapters.map(mapper));
+ }));
+
+ pool.terminate();
console.log("All chapters converted");
};
-
-async function convertChapter(chapter, cachePath, contentPath) {
- const filename = chapter.filename;
- const filePath = path.resolve(cachePath, filename);
-
- const contents = await fs.readFile(filePath, { encoding: "utf-8" });
-
- const rawChapterJSDOM = new JSDOM(contents);
- const output = getChapterString(chapter, rawChapterJSDOM.window.document);
-
- // TODO: this should probably not be necessary... jsdom bug I guess!?
- rawChapterJSDOM.window.close();
-
- const destFileName = `${path.basename(filename, ".html")}.xhtml`;
- const destFilePath = path.resolve(contentPath, destFileName);
-
- await fs.writeFile(destFilePath, output);
-}
-
-function getChapterString(chapter, rawChapterDoc) {
- const body = getBodyXML(chapter, rawChapterDoc.querySelector(".entry-content"));
-
- return `
-
-
-
-
- ${chapter.title}
-
-${body}
-`;
-}
-
-function getBodyXML(chapter, contentEl) {
- // Remove initial Next Chapter and Previous Chapter
- contentEl.removeChild(contentEl.firstElementChild);
-
- // Remove everything after the last
(e.g. analytics
s)
- const lastP = contentEl.querySelector("p:last-of-type");
- while (contentEl.lastElementChild !== lastP) {
- contentEl.removeChild(contentEl.lastElementChild);
- }
-
- // Remove empty
s or Last Chapter/Next Chapter
s
- while (isEmptyOrGarbage(contentEl.lastElementChild)) {
- contentEl.removeChild(contentEl.lastElementChild);
- }
-
- // Remove redundant attributes and style
- Array.prototype.forEach.call(contentEl.children, child => {
- if (child.getAttribute("dir") === "ltr") {
- child.removeAttribute("dir");
- }
-
- // Only ever appears with align="LEFT" (useless) or align="CENTER" overridden by style="text-align: left;" (also
- // useless)
- child.removeAttribute("align");
-
- if (child.getAttribute("style") === "text-align:left;") {
- child.removeAttribute("style");
- }
- if (child.getAttribute("style") === "text-align:left;padding-left:30px;") {
- child.setAttribute("style", "padding-left:30px;");
- }
- });
-
- // Remove empty s and s
- // Remove style attributes from them, as they're always messed up.
- const ems = contentEl.querySelectorAll("em, i");
- Array.prototype.forEach.call(ems, em => {
- if (em.textContent.trim() === "") {
- const replacement = contentEl.ownerDocument.createTextNode(" ");
- em.parentNode.replaceChild(replacement, em);
- } else {
- em.removeAttribute("style");
- }
- });
-
- // In https://parahumans.wordpress.com/2013/01/05/monarch-16-13/ there are some s that should be s O_o
- const addresses = contentEl.querySelectorAll("address");
- Array.prototype.forEach.call(addresses, address => {
- const p = contentEl.ownerDocument.createElement("p");
- p.innerHTML = address.innerHTML;
- address.parentNode.replaceChild(p, address);
- });
-
- // Every except underline ones is pointless at best and frequently messed up. (Weird font size, line spacing,
- // etc.)
- const spans = contentEl.querySelectorAll("span");
- Array.prototype.forEach.call(spans, span => {
- if (span.getAttribute("style") === "text-decoration:underline;") {
- return;
- }
-
- if (span.textContent.trim() === "") {
- span.parentNode.removeChild(span);
- } else {
- const docFrag = contentEl.ownerDocument.createDocumentFragment();
- while (span.firstChild) {
- docFrag.appendChild(span.firstChild);
- }
- span.parentNode.replaceChild(docFrag, span);
- }
- });
-
- // In Ward, CloudFlare email protection obfuscates the email addresses:
- // https://usamaejaz.com/cloudflare-email-decoding/
- for (const emailEl of contentEl.querySelectorAll("[data-cfemail]")) {
- const decoded = decodeCloudFlareEmail(emailEl.dataset.cfemail);
- emailEl.replaceWith(contentEl.ownerDocument.createTextNode(decoded));
- }
-
- // Synthesize a tag to serialize
- const bodyEl = contentEl.ownerDocument.createElement("body");
-
- const h1El = contentEl.ownerDocument.createElement("h1");
- h1El.textContent = chapter.title;
- bodyEl.appendChild(h1El);
-
- const comment = contentEl.ownerDocument.createComment(chapter.url);
- bodyEl.appendChild(comment);
-
- while (contentEl.firstChild) {
- bodyEl.appendChild(contentEl.firstChild);
- }
-
- const xmlSerializer = new contentEl.ownerDocument.defaultView.XMLSerializer();
- let xml = xmlSerializer.serializeToString(bodyEl);
-
- // Fix recurring strange pattern of extra
in ......
\n
- xml = xml.replace(/
\s*<\/em><\/p>/g, "
");
-
- // There are way too many nonbreaking spaces where they don't belong.
- // If they show up three in a row, then let them live. Otherwise, they die.
- // Also remove any run of them after a period.
- xml = xml.replace(/([^\xA0])\xA0\xA0?([^\xA0])/g, "$1 $2");
- xml = xml.replace(/\.\xA0+\s*/, ". ");
-
- function fixEms() {
- // Fix recurring broken-up or erroneous
s
- xml = xml.replace(/<\/em>‘s/g, "’s");
- xml = xml.replace(/
<\/em>/g, "");
- xml = xml.replace(/<\/em>/g, "");
- xml = xml.replace(/(\s?\s?[^A-Za-z]\s?\s?)<\/em>/g, "$1");
- xml = xml.replace(/<\/em>(\s?\s?[^A-Za-z]\s?\s?)/g, "$1");
- xml = xml.replace(/“([^>]+)<\/em>(!|\?|\.)”/g, "“$1$2”");
- xml = xml.replace(/([^>]+)<\/em>(!|\?|\.)<\/p>/g, "$1$2
");
- xml = xml.replace(/(!|\?|\.)\s{2}<\/em><\/p>/g, "$1
");
- xml = xml.replace(/([a-z]+)(\?|\.)<\/em>/g, "$1$2");
- xml = xml.replace(/([^>]+?)( +)<\/em>/g, "$1$2");
- xml = xml.replace(/ ([a-zA-Z]+)<\/em>/g, " $1");
- xml = xml.replace(/‘\s*([^<]+)\s*’<\/em>/g, "‘$1’");
- xml = xml.replace(/‘\s*([^<]+)\s*<\/em>\s*’/g, "‘$1’");
- xml = xml.replace(/‘\s*\s*([^<]+)\s*’<\/em>/g, "‘$1’");
- xml = xml.replace(/“\s*([^<]+)\s*”<\/em>/g, "“$1”");
- xml = xml.replace(/“\s*([^<]+)\s*<\/em>\s*”/g, "“$1”");
- xml = xml.replace(/“\s*\s*([^<]+)\s*”<\/em>/g, "“$1”");
- xml = xml.replace(/([^\n>]) ?/g, "$1 ");
- xml = xml.replace(/ ?<\/em>/g, " ");
- xml = xml.replace(/]+)> /g, "");
- xml = xml.replace(/<\/em> <\/p>/g, "
");
- xml = xml.replace(/([a-z]+),<\/em>/g, "$1,");
- }
-
- function fixQuotesAndApostrophes() {
- // Fix recurring poor quotes and apostrophes
- xml = xml.replace(/”/g, "
“");
- xml = xml.replace(/“\s*<\/p>/g, "”
");
- xml = xml.replace(/“\s*<\/em><\/p>/g, "”
");
- xml = xml.replace(/‘\s*<\/p>/g, "’");
- xml = xml.replace(/‘\s*<\/em><\/p>/g, "’");
- xml = xml.replace(/,” <\/em>/g, ",” ");
- xml = xml.replace(/′/g, "’");
- xml = xml.replace(/″/g, "”");
- xml = xml.replace(/([A-Za-z])‘s(\s?)/g, "$1’s$2");
- xml = xml.replace(/I‘m/g, "I’m");
- xml = xml.replace(/“\s+/g, "
“");
- xml = xml.replace(/'/g, "’");
- xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’");
- xml = xml.replace(/‘Sup/g, "’Sup");
- }
-
- // These interact with each other, so do them a few times.
- xml = xml.replace(/,” <\/em>/g, "
,” ");
- fixEms();
- fixQuotesAndApostrophes();
- fixEms();
- fixQuotesAndApostrophes();
- fixEms();
-
- // Similar problems occur in Ward with and as do in Worm with s
- xml = xml.replace(//g, "");
- xml = xml.replace(/(\s*
\s*)<\/b>/g, "$1");
- xml = xml.replace(/(\s*
\s*)<\/strong>/g, "$1");
- xml = xml.replace(/<\/strong>(\s*)/g, "$1");
- xml = xml.replace(/@<\/strong>/g, "@");
- xml = xml.replace(/
(\s*)<\/strong>/g, "
$1");
- xml = xml.replace(/(\s*)<\/strong>/g, "$1");
-
- // No need for line breaks before paragraph ends
- // These often occur with the
s inside / fixed above.
- xml = xml.replace(/
\s*<\/p>/g, "");
-
- // Fix possessive of names ending in "s"
- // Note: if the "s" is unvoiced, as in Marquis, then it doesn't get the second "s".
- xml = xml.replace(/([^‘])Judas’([^s])/g, "$1Judas’s$2");
- xml = xml.replace(/([^‘])Brutus’([^s])/g, "$1Brutus’s$2");
- xml = xml.replace(/([^‘])Jess’([^s])/g, "$1Jess’s$2");
- xml = xml.replace(/([^‘])Aegis’([^s])/g, "$1Aegis’s$2");
- xml = xml.replace(/([^‘])Dauntless’([^s])/g, "$1Dauntless’s$2");
- xml = xml.replace(/([^‘])Circus’([^s])/g, "$1Circus’s$2");
- xml = xml.replace(/([^‘])Sirius’([^s])/g, "$1Sirius’s$2");
- xml = xml.replace(/([^‘])Brooks’([^s])/g, "$1Brooks’s$2");
- xml = xml.replace(/([^‘])Genesis’([^s])/g, "$1Genesis’s$2");
- xml = xml.replace(/([^‘])Atlas’([^s])/g, "$1Atlas’s$2");
- xml = xml.replace(/([^‘])Lucas’([^s])/g, "$1Lucas’s$2");
- xml = xml.replace(/([^‘])Gwerrus’([^s])/g, "$1Gwerrus’s$2");
- xml = xml.replace(/([^‘])Chris’([^s])/g, "$1Chris’s$2");
- xml = xml.replace(/([^‘])Eligos’([^s])/g, "$1Eligos’s$2");
- xml = xml.replace(/([^‘])Animos’([^s])/g, "$1Animos’s$2");
- xml = xml.replace(/([^‘])Mags’([^s])/g, "$1Mags’s$2");
- xml = xml.replace(/([^‘])Huntress’([^s])/g, "$1Huntress’s$2");
- xml = xml.replace(/([^‘])Hephaestus’([^s])/g, "$1Hephaestus’s$2");
- xml = xml.replace(/([^‘])Lord of Loss’([^s])/g, "$1Lord of Loss’s$2");
- xml = xml.replace(/([^‘])John Combs’([^s])/g, "$1John Combs’s$2");
- xml = xml.replace(/([^‘])Mama Mathers’([^s])/g, "$1Mama Mathers’s$2");
- xml = xml.replace(/([^‘])Monokeros’([^s])/g, "$1Monokeros’s$2");
- xml = xml.replace(/([^‘])Goddess’([^s])/g, "$1Goddess’s$2");
- xml = xml.replace(/([^‘])Boundless’([^s])/g, "$1Boundless’s$2");
- xml = xml.replace(/([^‘])Paris’([^s])/g, "$1Paris’s$2");
- xml = xml.replace(/([^‘])Tress’([^s])/g, "$1Tress’s$2");
- xml = xml.replace(/([^‘])Harris’([^s])/g, "$1Harris’s$2");
- xml = xml.replace(/([^‘])Antares’([^s])/g, "$1Antares’s$2");
- xml = xml.replace(/([^‘])Nieves’([^s])/g, "$1Nieves’s$2");
- xml = xml.replace(/([^‘])Backwoods’([^s])/g, "$1Backwoods’s$2");
- xml = xml.replace(/([^‘])Midas’([^s])/g, "$1Midas’s$2");
- xml = xml.replace(/([^‘])Mrs. Sims’([^s])/g, "$1Mrs. Sims’s$2");
- xml = xml.replace(/([^‘])Ms. Stillons’([^s])/g, "$1Ms. Stillons’s$2");
- xml = xml.replace(/([^‘])Chuckles’([^s])/g, "$1Chuckles’s$2");
-
- // Fixes dashes
- xml = xml.replace(/ – /g, "—");
- xml = xml.replace(/“((?:)?)-/g, "“$1—");
- xml = xml.replace(/-[,.]?”/g, "—”");
- xml = xml.replace(/-(!|\?)”/g, "—$1”");
- xml = xml.replace(/-[,.]?<\/em>”/g, "—”");
- xml = xml.replace(/-“/g, "—”");
- xml = xml.replace(/-/g, "
—");
- xml = xml.replace(/-<\/p>/g, "—
");
- xml = xml.replace(/-<\/em><\/p>/g, "—");
- xml = xml.replace(/\s?\s?–\s?\s?/g, "—");
- xml = xml.replace(/-\s\s?/g, "—");
- xml = xml.replace(/\s?\s-/g, "—");
- xml = xml.replace(/\s+—”/g, "—”");
- xml = xml.replace(/I-I/g, "I—I");
- xml = xml.replace(/I-uh/g, "I—uh");
-
- // Joint names should use em dashes
- xml = xml.replace(/Dallon-Pelham/g, "Dallon–Pelham");
- xml = xml.replace(/Bet-Gimel/g, "Bet–Gimel");
- xml = xml.replace(/Tristan-Capricorn/g, "Tristan–Capricorn");
- xml = xml.replace(/Capricorn-Byron/g, "Capricorn–Byron");
- xml = xml.replace(/Tristan-Byron/g, "Tristan–Byron");
- xml = xml.replace(/Earth-Gimel/g, "Earth–Gimel");
- xml = xml.replace(/Gimel-Europe/g, "Gimel–Europe");
- xml = xml.replace(/Imp-Damsel/g, "Imp–Damsel");
- xml = xml.replace(/Damsel-Ashley/g, "Damsel–Ashley");
- xml = xml.replace(/Antares-Anelace/g, "Antares–Anelace");
- xml = xml.replace(/Challenger-Gallant/g, "Challenger–Gallant");
- xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/g, "Undersider$1–$2");
- xml = xml.replace(/Norwalk-Fairfield/g, "Norwalk–Fairfield");
- xml = xml.replace(/East-West/g, "east–west");
- xml = xml.replace(/(Green|Yellow)-Black/g, "$1–Black");
- xml = xml.replace(/Creutzfeldt-Jakob/g, "Creutzfeldt–Jakob");
- xml = xml.replace(/Astaroth-Nidhug/g, "Astaroth–Nidhug");
- xml = xml.replace(/Capulet-Montague/g, "Capulet–Montague");
- xml = xml.replace(/Weaver-Clockblocker/g, "Weaver–Clockblocker");
- xml = xml.replace(/Alexandria-Pretender/g, "Alexandria–Pretender");
- xml = xml.replace(/Night Hag-Nyx/g, "Night Hag–Nyx");
- xml = xml.replace(/Crawler-Breed/g, "Crawler–Breed");
- xml = xml.replace(/Simurgh-Myrddin-plant/g, "Simurgh–Myrddin–plant");
- xml = xml.replace(/Armsmaster-Defiant/g, "Armsmaster–Defiant");
-
- // Use
for separators
- // https://www.parahumans.net/2019/12/21/interlude-18-z-radiation/ has "super-separators" ("⊙ ⊙ ⊙ ⊙ ⊙") which we
- // leave untouched for now.
- xml = xml.replace(/■<\/p>/g, "
");
- xml = xml.replace(/■<\/p>/g, "
");
- xml = xml.replace(/⊙<\/p>/g, "
");
- xml = xml.replace(/⊙<\/strong><\/p>/g, "
");
- xml = xml.replace(/⊙<\/strong><\/em><\/p>/g, "
");
- xml = xml.replace(/⊙⊙<\/strong><\/p>/g, "
");
-
- // Fix recurring miscapitalization with questions
- xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");
- xml = xml.replace(/\?”\s\s?He asked/g, "?” he asked");
-
- // Fix bad periods and spacing/markup surrounding them
- xml = xml.replace(/\.\.<\/p>/g, ".
");
- xml = xml.replace(/\.\.”<\/p>/g, ".”
");
- xml = xml.replace(/ \. /g, ". ");
- xml = xml.replace(/ \.<\/p>/g, ".
");
- xml = xml.replace(/\.\.\./g, "…");
-
- // Fix extra spaces
- xml = xml.replace(/ ? <\/p>/g, "");
- xml = xml.replace(/([a-z]) ,/g, "$1,");
-
- // The author often fails to terminate a sentence, instead using a comma after a dialogue tag. For example,
- // > “I didn’t get much done,” Greg said, “I got distracted by...
- // This should instead be
- // > “I didn’t get much done,” Greg said. “I got distracted by...
- //
- // Our heuristic is to try to automatically fix this if the dialogue tag is two words (X said/admitted/sighed/etc.).
- //
- // This sometimes overcorrects, as in the following example:
- // > “Basically,” Alec said, “For your powers to manifest, ...
- // Here instead we should lowercase the "f". We handle that via one-offs in substitutions.json.
- //
- // This applies to ~800 instances, so although we have to correct back in substitutions.json a decent number of
- // times, it definitely pays for itself. Most of the instances we have to correct back we also need to fix the
- // capitalization anyway, and that's harder to do automatically, since proper names/"I"/etc. stay capitalized.
- xml = xml.replace(/,” ([A-Za-z]+ [A-Za-z]+), “([A-Z])/g, ",” $1. “$2");
-
- // Replace single-word s with s. Other s are probably erroneous too, but these are known-bad.
- xml = xml.replace(/([A-Za-z]+)<\/i>/g, "$1");
-
- // This occurs enough times it's better to do here than in one-off fixes. We correct the single instance where
- // it's incorrect to capitalize in the one-off fixes.
- // Note that Ward contains much talk of "the clairvoyants", so we don't want to capitalize plurals.
- xml = xml.replace(/the clairvoyant([^s])/g, "the Clairvoyant$1");
-
- // ReSound's name is sometimes miscapitalized. The word is never used in a non-name context.
- xml = xml.replace(/Resound/g, "ReSound");
-
- // "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". I can see
- // arguments for any of them, so let's go with the most prevalent: "patrol block".
- xml = xml.replace(/([^ ]) Patrol (?:B|b)lock/g, "$1 patrol block");
-
- // This is sometimes missing its capitalization.
- xml = xml.replace(/the birdcage/g, "the Birdcage");
-
- // This is usually spelled "TV" but sometimes the other ways. Normalize.
- xml = xml.replace(/(\b)tv(\b)/g, "$1TV$2");
- xml = xml.replace(/T\.V\./g, "TV");
-
- // There's no reason why these should be capitalized. (Note that they never appear at the beginning of any sentences.)
- xml = xml.replace(/Halberd/g, "halberd");
- xml = xml.replace(/Loft/g, "loft");
-
- // Especially early in the story, PRT designations are capitalized; they should not be. This fixes the cases where we
- // can be reasonably sure they don't start a sentence, although more specific instances are done in
- // substitutions.json, and some need to be back-corrected.
- //
- // Note: "Master" is specifically omitted because it fails poorly on Interlude 4. Other instances need to be
- // corrected via substitutions.json.
- xml = xml.replace(
- /([a-zA-Z,] |\/)(Mover|Shaker|Brute|Breaker|Tinker|Blaster|Thinker|Striker|Changer|Trump|Stranger|Shifter|Shaper)/g,
- (_, prefix, designation) => prefix + designation.toLowerCase()
- );
- xml = xml.replace(
- /(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)-(\d+)/gi,
- "$1 $2"
- );
- xml = xml.replace(
- // eslint-disable-next-line max-len
- /(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)[ -/](mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)/gi,
- "$1–$2"
- );
-
- // This is consistently missing accents
- xml = xml.replace(/Yangban/g, "Yàngbǎn");
-
- // Place names need to always be capitalized
- xml = xml.replace(/North end/g, "North End");
- xml = xml.replace(/Stonemast avenue/g, "Stonemast Avenue");
- xml = xml.replace(/Shale avenue/g, "Shale Avenue");
- xml = xml.replace(/Lord street/g, "Lord Street");
- xml = xml.replace(/Slater street/g, "Slater Street");
- xml = xml.replace(/Hollow point/g, "Hollow Point");
- xml = xml.replace(/Cedar point/g, "Cedar Point");
-
- // These are usually not italicized, but sometimes are. Other foreign-language names (like Yàngbǎn) are not
- // italicized, so we go in the direction of removing the italics.
- xml = xml.replace(/Garama<\/em>/g, "Garama");
- xml = xml.replace(/Thanda<\/em>/g, "Thanda");
- xml = xml.replace(/Sifara([^<]*)<\/em>/g, "Sifara$1");
- xml = xml.replace(/Moord Nag([^<]*)<\/em>/g, "Moord Nag$1");
- xml = xml.replace(/Califa de Perro([^<]*)<\/em>/g, "Califa de Perro$1");
- xml = xml.replace(/Turanta([^<]*)<\/em>/g, "Turanta$1");
-
- // "okay" is preferred to "ok". This sometimes gets changed back via substitutions.json when people are writing notes
- // and thus probably the intention was to be less formal. Also it seems per https://en.wikipedia.org/wiki/A-ok the
- // "A" in "A-okay" should be capitalized.
- xml = xml.replace(/Ok([,. ])/g, "Okay$1");
- xml = xml.replace(/([^a-zA-Z])ok([^a])/g, "$1okay$2");
- xml = xml.replace(/a-okay/g, "A-okay");
-
- // Signal(l)ing/signal(l)ed are spelled both ways. Both are acceptable in English. Let's standardize on single-L.
- xml = xml.replace(/(S|s)ignall/g, "$1ignal");
-
- // Clich(e|é) is spelled both ways. Let's standardize on including the accent.
- xml = xml.replace(/cliche/g, "cliché");
-
- // "gray" is the majority spelling, except for "greyhound"
- xml = xml.replace(/(G|g)rey(?!hound)/g, "$1ray");
-
- // "Mom" and "Dad" should be capitalized when used as a proper name. These regexps are tuned to catch a good amount of
- // instances, without over-correcting for non-proper-name-like cases. Many other instances are handled in
- // substitutions.json.
- xml = xml.replace(/(? {
- if (substitution.before) {
- const indexOf = xml.indexOf(substitution.before);
- if (indexOf === -1) {
- console.warn(`Could not find text "${substitution.before}" in ${chapter.url}. The chapter may have been ` +
- `updated at the source, in which case, you should edit substitutions.json.`);
- }
- if (indexOf !== xml.lastIndexOf(substitution.before)) {
- console.warn(`The text "${substitution.before}" occurred twice, and so the substitution was ambiguous. ` +
- `Update substitutions.json for a more precise substitution.`);
- }
-
- xml = xml.replace(new RegExp(escapeRegExp(substitution.before)), substitution.after);
- } else if (substitution.regExp) {
- xml = xml.replace(new RegExp(substitution.regExp, "g"), substitution.replacement);
- } else {
- console.warn(`Invalid substitution specified for ${chapter.url}`);
- }
- });
-
- // Serializer inserts extra xmlns for us since it doesn't know we're going to put this into a
- xml = xml.replace(//, "");
-
- return xml;
-}
-
-function isEmptyOrGarbage(el) {
- const text = el.textContent.trim();
- return text === "" ||
- text.startsWith("Last Chapter") ||
- text.startsWith("Previous Chapter") ||
- text.startsWith("Next Chapter");
-}
-
-function escapeRegExp(str) {
- return str.replace(/[-[\]/{}()*+?.\\^$|]/g, "\\$&");
-}
-
-function decodeCloudFlareEmail(hash) {
- let email = "";
- const xorWithThis = parseInt(hash.substring(0, 2), 16);
- for (let i = 2; i < hash.length; i += 2) {
- const charCode = parseInt(hash.substring(i, i + 2), 16) ^ xorWithThis;
- email += String.fromCharCode(charCode);
- }
-
- return email;
-}
diff --git a/lib/worm-scraper.js b/lib/worm-scraper.js
index 6afbab5..381f9eb 100644
--- a/lib/worm-scraper.js
+++ b/lib/worm-scraper.js
@@ -52,8 +52,9 @@ const argv = yargs
})
.option("j", {
alias: "jobs",
- default: 10,
- describe: "Number of concurrent read/write conversion jobs",
+ default: undefined,
+ defaultDescription: "# of CPU cores - 1",
+ describe: "number of concurrent read/write conversion jobs",
requiresArg: true,
global: true
})
diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json
index 3c54975..015a00c 100644
--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
@@ -1895,11 +1895,6 @@
"thenify": ">= 3.1.0 < 4"
}
},
- "throat": {
- "version": "5.0.0",
- "resolved": "https://registry.npmjs.org/throat/-/throat-5.0.0.tgz",
- "integrity": "sha512-fcwX4mndzpLQKBS1DVYhGAcYaYt7vsHNIvQV+WXMvnow5cgjPphq5CaayLaGsjRdSCKZFNGt7/GYAuXaNOiYCA=="
- },
"toidentifier": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.0.tgz",
@@ -2051,6 +2046,11 @@
"resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz",
"integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ=="
},
+ "workerpool": {
+ "version": "6.0.2",
+ "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.0.2.tgz",
+ "integrity": "sha512-DSNyvOpFKrNusaaUwk+ej6cBj1bmhLcBfj80elGk+ZIo5JSkq+unB1dLKEOcNfJDZgjGICfhQ0Q5TbP0PvF4+Q=="
+ },
"wrap-ansi": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
diff --git a/package.json b/package.json
index 2fba4a3..cb0e2ba 100644
--- a/package.json
+++ b/package.json
@@ -28,7 +28,7 @@
"cpr": "^3.0.1",
"jsdom": "^16.4.0",
"requisition": "^1.5.0",
- "throat": "^5.0.0",
+ "workerpool": "^6.0.2",
"yargs": "^16.1.0"
},
"devDependencies": {
From e2277751c0e8e0bf642512837364a292225da750 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 21:58:59 -0400
Subject: [PATCH 010/186] Fix various dash issues
---
lib/substitutions.json | 132 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 132 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index cc2b8fd..7984c85 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -3185,6 +3185,10 @@
{
"before": "But.“ He",
"after": "But.” He"
+ },
+ {
+ "before": "is led by non-capes",
+ "after": "is led by non-capes"
}
],
"https://parahumans.wordpress.com/2013/05/09/cell-22-3/": [
@@ -4290,6 +4294,10 @@
{
"before": "background of his mind",
"after": "background of his mind."
+ },
+ {
+ "before": "boyfriends are my ex-boyfriends",
+ "after": "boyfriends are my ex-boyfriends"
}
],
"https://parahumans.wordpress.com/2013/11/19/interlude-end/": [
@@ -4411,6 +4419,12 @@
"after": "Some is Mom and Dad’s"
}
],
+ "https://www.parahumans.net/2017/12/12/flare-2-2/": [
+ {
+ "before": "-Victoria",
+ "after": "—Victoria"
+ }
+ ],
"https://www.parahumans.net/2017/12/16/flare-2-3/": [
{
"before": "two—piece",
@@ -4725,12 +4739,24 @@
{
"before": "were Masters and strangers",
"after": "were masters and strangers"
+ },
+ {
+ "before": "or if he said something like ‘You’ or “Everyone’, I could",
+ "after": "or if he said something like ‘You’ or ‘Everyone’, I could"
}
],
"https://www.parahumans.net/2018/04/07/shadow-interlude-5-x/": [
{
"before": "objectvity",
"after": "objectivity"
+ },
+ {
+ "before": "“Are the production rates",
+ "after": "“—are the production rates"
+ },
+ {
+ "before": "millions—tens of millions- is",
+ "after": "millions—tens of millions—is"
}
],
"https://www.parahumans.net/2018/04/21/pitch-6-3/": [
@@ -4872,6 +4898,10 @@
{
"before": "of her bag. only one temporary tattoo",
"after": "of her bag. Only one temporary tattoo"
+ },
+ {
+ "before": "loved—loved- for my",
+ "after": "loved—loved—for my"
}
],
"https://www.parahumans.net/2018/07/07/beacon-8-2/": [
@@ -4932,6 +4962,10 @@
{
"before": "he bottles it up. then the bottle cracks",
"after": "he bottles it up. Then the bottle cracks"
+ },
+ {
+ "before": "that made her her- the shock of white",
+ "after": "that made her her—the shock of white"
}
],
"https://www.parahumans.net/2018/09/25/gleaming-9-10/": [
@@ -4956,6 +4990,12 @@
"after": "in a fireman carry. With my free hand"
}
],
+ "https://www.parahumans.net/2018/10/13/gleaming-9-15/": [
+ {
+ "before": "-fucked in the head.",
+ "after": "—fucked in the head."
+ }
+ ],
"https://www.parahumans.net/2018/10/16/gleaming-interlude-9-z/": [
{
"before": "cliff he was was expected",
@@ -4980,6 +5020,20 @@
"after": "services of mercenaries. Depending on timing"
}
],
+ "https://www.parahumans.net/2018/11/13/polarize-10-7/": [
+ {
+ "before": "your favorite top-’",
+ "after": "your favorite top—’"
+ },
+ {
+ "before": "-your phone, twice, your banking card-",
+ "after": "—your phone, twice, your banking card—"
+ },
+ {
+ "before": "-your pancreas.",
+ "after": "—your pancreas."
+ }
+ ],
"https://www.parahumans.net/2018/11/17/polarize-10-8/": [
{
"before": "Sveta said. her hand was removed",
@@ -5018,6 +5072,10 @@
{
"before": "and it it pulled him",
"after": "and it pulled him"
+ },
+ {
+ "before": "Victoria-flesh",
+ "after": "Victoria-flesh"
}
],
"https://www.parahumans.net/2019/01/26/blinding-11-9/": [
@@ -5054,6 +5112,10 @@
{
"before": "mom messed up",
"after": "Mom messed up"
+ },
+ {
+ "before": "You- upstairs.",
+ "after": "You—upstairs."
}
],
"https://www.parahumans.net/2019/02/12/heavens-12-1/": [
@@ -5062,6 +5124,12 @@
"after": "And Mom’s implying I want"
}
],
+ "https://www.parahumans.net/2019/02/16/heavens-12-2/": [
+ {
+ "before": "And you- don’t do that",
+ "after": "And you—don’t do that"
+ }
+ ],
"https://www.parahumans.net/2019/02/19/interlude-12-e/": [
{
"before": "I want dad",
@@ -5072,6 +5140,12 @@
"after": "I need Dad"
}
],
+ "https://www.parahumans.net/2019/02/23/heavens-12-3/": [
+ {
+ "before": "had shaken them good- but I could",
+ "after": "had shaken them good—but I could"
+ }
+ ],
"https://www.parahumans.net/2019/03/02/heavens-12-all/": [
{
"before": "bounce off of the the people",
@@ -5142,6 +5216,12 @@
"after": "walls stone and wood. The building"
}
],
+ "https://www.parahumans.net/2019/04/13/black-13-4/": [
+ {
+ "before": "wiped—or perhaps struck- clean.",
+ "after": "wiped—or perhaps struck—clean."
+ }
+ ],
"https://www.parahumans.net/2019/04/20/black-13-6/": [
{
"before": "I’m a Master, right, you",
@@ -5198,6 +5278,30 @@
"after": "You stay away from Mom, you stay away from Dad"
}
],
+ "https://www.parahumans.net/2019/06/22/breaking-14-11/": [
+ {
+ "before": "-He’s changing-",
+ "after": "-He’s changing-"
+ }
+ ],
+ "https://www.parahumans.net/2019/06/25/breaking-14-12/": [
+ {
+ "before": "-Her phone?—",
+ "after": "-Her phone?-"
+ },
+ {
+ "before": "-Have to ask her.—",
+ "after": "-Have to ask her.-"
+ },
+ {
+ "before": "-down hall with gate. basement of that part of complx—",
+ "after": "-down hall with gate. basement of that part of complx-"
+ },
+ {
+ "before": "-That was us—the message on my display read.—cuz guards—",
+ "after": "-That was us- the message on my display read. -cuz guards-"
+ }
+ ],
"https://www.parahumans.net/2019/06/29/breaking-14-z/": [
{
"before": "smirked, leering. her face was stretching",
@@ -5314,6 +5418,10 @@
{
"before": "mind of dad after his head injury",
"after": "mind of Dad after his head injury"
+ },
+ {
+ "before": "Some—many- were unwillingly",
+ "after": "Some—many—were unwillingly"
}
],
"https://www.parahumans.net/2019/10/05/sundown-17-2/": [
@@ -5426,6 +5534,30 @@
"after": "Love Lost is master-class"
}
],
+ "https://www.parahumans.net/2020/01/14/infrared-19-c/": [
+ {
+ "before": "-maim, destroy, tear it all down—",
+ "after": "—maim, destroy, tear it all down—"
+ },
+ {
+ "before": "-make it hurt, torture, butcher, kill—",
+ "after": "—make it hurt, torture, butcher, kill—"
+ },
+ {
+ "before": "—Retaliate—",
+ "after": "—Retaliate—"
+ },
+ {
+ "before": "—die quickly, if he has to die.",
+ "after": "—die quickly, if he has to die."
+ }
+ ],
+ "https://www.parahumans.net/2020/01/21/infrared-19-6/": [
+ {
+ "before": "-through the emotion. Emote through the emotion. Emote—",
+ "after": "—through the emotion. Emote through the emotion. Emote—"
+ }
+ ],
"https://www.parahumans.net/2020/01/25/infrared-19-d/": [
{
"before": "look after mom",
From b6bce1244dc4c66f1185ab7c044e6f4efedcd709 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 22:17:02 -0400
Subject: [PATCH 011/186] Spot fixes for Ward through Eclipse x.1
---
lib/substitutions.json | 101 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 101 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 7984c85..d1a7ad9 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4757,6 +4757,38 @@
{
"before": "millions—tens of millions- is",
"after": "millions—tens of millions—is"
+ },
+ {
+ "before": "“—And frankly,” Mr. Nieves said",
+ "after": "“—and frankly,” Mr. Nieves said"
+ },
+ {
+ "before": "Because, Balminder,” Kurt said. “You were plausible",
+ "after": "Because, Balminder,” Kurt said, “you were plausible"
+ },
+ {
+ "before": "she aid",
+ "after": "she said"
+ }
+ ],
+ "https://www.parahumans.net/2018/04/10/shadow-interlude-5-y/": [
+ {
+ "before": "Hopefully—I’m hoping she figures out",
+ "after": "Hopefully— I’m hoping she figures out"
+ },
+ {
+ "before": "…It’s not so important when",
+ "after": "…it’s not so important when"
+ }
+ ],
+ "https://www.parahumans.net/2018/04/17/pitch-6-2/": [
+ {
+ "before": "very clear words, only ‘ERIN’ and ‘NO-‘.",
+ "after": "very clear words, only ‘ERIN’ and ‘NO—’."
+ },
+ {
+ "before": "The Mcveay’s",
+ "after": "The McVeays"
}
],
"https://www.parahumans.net/2018/04/21/pitch-6-3/": [
@@ -4783,18 +4815,87 @@
{
"before": "Cradle said. his tone was",
"after": "Cradle said. His tone was"
+ },
+ {
+ "before": "“…Multiply the grudge,”",
+ "after": "“…multiply the grudge,”"
+ },
+ {
+ "before": "used flight to stead myself",
+ "after": "used flight to steady myself"
+ }
+ ],
+ "https://www.parahumans.net/2018/05/05/pitch-6-7/": [
+ {
+ "before": "“—And we wanted to ask for your",
+ "after": "“—and we wanted to ask for your"
+ },
+ {
+ "before": "while you were bloody—I shouldn’t be",
+ "after": "while you were bloody— I shouldn’t be"
}
],
"https://www.parahumans.net/2018/05/08/pitch-6-8/": [
{
"before": "check in on on Ashley and Rain",
"after": "check in on Ashley and Rain"
+ },
+ {
+ "before": "earth Shin",
+ "after": "Earth Shin"
+ },
+ {
+ "before": "one Jackass said",
+ "after": "one jackass said"
+ },
+ {
+ "before": "an Über-neighborhood",
+ "after": "an uber-neighborhood",
+ "_comment": "This seems to lose its umlaut in English; see e.g. https://www.dictionary.com/browse/uber. It's certainly not capitalized."
+ }
+ ],
+ "https://www.parahumans.net/2018/05/11/pitch-6-9/": [
+ {
+ "before": "bunch of Jackasses that",
+ "after": "bunch of jackasses that"
+ },
+ {
+ "before": "some of the Jackasses",
+ "after": "some of the jackasses"
+ },
+ {
+ "before": "one of the Jackasses turned",
+ "after": "one of the jackasses turned"
+ },
+ {
+ "before": "The Jackass next to Vince",
+ "after": "The jackass next to Vince"
+ },
+ {
+ "before": "couple of the Jackasses’",
+ "after": "couple of the jackasses"
+ },
+ {
+ "before": "I could hear her, “I’m trying",
+ "after": "I could hear her. “I’m trying"
+ },
+ {
+ "before": "all-hands on deck",
+ "after": "all-hands-on-deck"
+ },
+ {
+ "before": "the Junior Captain said",
+ "after": "the junior captain said"
}
],
"https://www.parahumans.net/2018/05/19/torch-7-2/": [
{
"before": "you had to deal with with could have",
"after": "you had to deal with could have"
+ },
+ {
+ "before": "—And from other horrors.",
+ "after": "—and from other horrors."
}
],
"https://www.parahumans.net/2018/05/26/torch-7-4/": [
From 5cbbd8a6fb695240c61dd94ebc3efe6fe35c7a82 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 22:21:54 -0400
Subject: [PATCH 012/186] Fix missing space in many instances of "shoulder
blade"
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index ee5c02f..f3511f6 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -345,6 +345,9 @@ function getBodyXML(chapter, contentEl) {
xml = xml.replace(/(\b)tv(\b)/g, "$1TV$2");
xml = xml.replace(/T\.V\./g, "TV");
+ // This is commonly misspelled.
+ xml = xml.replace(/([Ss])houlderblade/g, "$1houlder blade");
+
// There's no reason why these should be capitalized. (Note that they never appear at the beginning of any sentences.)
xml = xml.replace(/Halberd/g, "halberd");
xml = xml.replace(/Loft/g, "loft");
From 3e2bc4a4479c15a67adbca2bf0af6ea3e266ee0e Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 22:27:02 -0400
Subject: [PATCH 013/186] Fix train station capitalization
---
lib/convert-worker.js | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index f3511f6..db0d832 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -377,12 +377,10 @@ function getBodyXML(chapter, contentEl) {
// Place names need to always be capitalized
xml = xml.replace(/North end/g, "North End");
- xml = xml.replace(/Stonemast avenue/g, "Stonemast Avenue");
- xml = xml.replace(/Shale avenue/g, "Shale Avenue");
- xml = xml.replace(/Lord street/g, "Lord Street");
- xml = xml.replace(/Slater street/g, "Slater Street");
- xml = xml.replace(/Hollow point/g, "Hollow Point");
- xml = xml.replace(/Cedar point/g, "Cedar Point");
+ xml = xml.replace(/(Stonemast|Shale) avenue/g, "$1 Avenue");
+ xml = xml.replace(/(Lord|Slater) street/g, "$1 Street");
+ xml = xml.replace(/(Hollow|Cedar) point/g, "$1 Point");
+ xml = xml.replace(/(Norwalk|Fenway|Stratford) station/g, "$1 Station");
// These are usually not italicized, but sometimes are. Other foreign-language names (like Yàngbǎn) are not
// italicized, so we go in the direction of removing the italics.
From 304955df72ba0f0de56bd0d5c63130759412d21b Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 22:35:29 -0400
Subject: [PATCH 014/186] Settle on lowercasing "kiss and kill"
---
lib/substitutions.json | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index d1a7ad9..c7cac21 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4779,6 +4779,10 @@
{
"before": "…It’s not so important when",
"after": "…it’s not so important when"
+ },
+ {
+ "before": "Kiss and Kill.",
+ "after": "Kiss and kill."
}
],
"https://www.parahumans.net/2018/04/17/pitch-6-2/": [
@@ -5217,6 +5221,10 @@
{
"before": "You- upstairs.",
"after": "You—upstairs."
+ },
+ {
+ "before": "all the Kiss and Kill things",
+ "after": "all the kiss and kill things"
}
],
"https://www.parahumans.net/2019/02/12/heavens-12-1/": [
From e8921bbbfe142c4c1d9b7cebfd6230046ac06858 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 22:39:47 -0400
Subject: [PATCH 015/186] Fix more cases of extra spaces after periods
---
lib/convert-worker.js | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index db0d832..1a1e64d 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -135,7 +135,7 @@ function getBodyXML(chapter, contentEl) {
// If they show up three in a row, then let them live. Otherwise, they die.
// Also remove any run of them after a period.
xml = xml.replace(/([^\xA0])\xA0\xA0?([^\xA0])/g, "$1 $2");
- xml = xml.replace(/\.\xA0+\s*/, ". ");
+ xml = xml.replace(/\.\x20*\xA0[\xA0\x20]*/, ". ");
function fixEms() {
// Fix recurring broken-up or erroneous s
From 3905965134cdb7d42f0893a48031ab66570db25a Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 22:50:11 -0400
Subject: [PATCH 016/186] Fix missing periods at the end of quotations
---
lib/convert-worker.js | 1 +
lib/substitutions.json | 74 +++++-------------------------------------
2 files changed, 10 insertions(+), 65 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 1a1e64d..a0a7528 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -179,6 +179,7 @@ function getBodyXML(chapter, contentEl) {
xml = xml.replace(/'/g, "’");
xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’");
xml = xml.replace(/‘Sup/g, "’Sup");
+ xml = xml.replace(/([a-z])”<\/p>/g, "$1.”");
}
// These interact with each other, so do them a few times.
diff --git a/lib/substitutions.json b/lib/substitutions.json
index c7cac21..6e78aa3 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -74,7 +74,7 @@
],
"https://parahumans.wordpress.com/2011/07/09/insinuation-2-2/": [
{
- "before": "simply, “Bug”",
+ "before": "simply, “Bug.”",
"after": "simply, ‘Bug’."
},
{
@@ -361,10 +361,6 @@
}
],
"https://parahumans.wordpress.com/2011/10/29/shell-4-11/": [
- {
- "before": "eyes, I see”",
- "after": "eyes, I see.”"
- },
{
"before": "bugging the f-, uh",
"after": "bugging the f—, uh"
@@ -679,10 +675,6 @@
}
],
"https://parahumans.wordpress.com/2012/01/28/buzz-7-4/": [
- {
- "before": "on the way”",
- "after": "on the way.”"
- },
{
"before": "better I’ll feel,” he was",
"after": "better I’ll feel.” He was"
@@ -693,10 +685,6 @@
}
],
"https://parahumans.wordpress.com/2012/01/31/buzz-7-5/": [
- {
- "before": "in force”",
- "after": "in force.”"
- },
{
"before": "realized,” Brian spoke. “I",
"after": "realized,” Brian spoke, “I"
@@ -1751,10 +1739,6 @@
}
],
"https://parahumans.wordpress.com/2012/06/05/infestation-11-6/": [
- {
- "before": "we do here”",
- "after": "we do here.”"
- },
{
"before": "&",
"after": "and"
@@ -2276,10 +2260,6 @@
{
"before": "volunteered, asked
\n to",
"after": "volunteered, asked to"
- },
- {
- "before": "her father’s abilities”",
- "after": "her father’s abilities.”"
}
],
"https://parahumans.wordpress.com/2012/10/25/interlude-15-donation-bonus-2/": [
@@ -2418,12 +2398,6 @@
"after": "that,” Grue said, “is"
}
],
- "https://parahumans.wordpress.com/2012/11/29/interlude-16-donation-bonus/": [
- {
- "before": "in the darkness”",
- "after": "in the darkness.”"
- }
- ],
"https://parahumans.wordpress.com/2012/12/01/monarch-16-3/": [
{
"before": "two teams” I said",
@@ -2443,10 +2417,6 @@
}
],
"https://parahumans.wordpress.com/2012/12/11/monarch-16-6/": [
- {
- "before": "Lost in thought”",
- "after": "Lost in thought.”"
- },
{
"before": "—But we definitely",
"after": "—but we definitely"
@@ -2513,10 +2483,6 @@
}
],
"https://parahumans.wordpress.com/2013/01/05/monarch-16-13/": [
- {
- "before": "word on both points”",
- "after": "word on both points.”"
- },
{
"before": "doubts,” I said. “You",
"after": "doubts,” I said, “you"
@@ -3343,12 +3309,6 @@
"after": "And,” I said, “we"
}
],
- "https://parahumans.wordpress.com/2013/06/11/crushed-24-2/": [
- {
- "before": "To Behemoth”",
- "after": "To Behemoth.”"
- }
- ],
"https://parahumans.wordpress.com/2013/06/15/crushed-24-3/": [
{
"before": "Phir Sē echoed me. “",
@@ -3507,10 +3467,6 @@
"before": "the other-.",
"after": "the other—"
},
- {
- "before": "“Yeah”",
- "after": "“Yeah.”"
- },
{
"before": "Vista,” he said. “Another",
"after": "Vista,” he said, “another"
@@ -3687,10 +3643,6 @@
}
],
"https://parahumans.wordpress.com/2013/08/31/cockroaches-28-1/": [
- {
- "before": "lot of supplies in”",
- "after": "lot of supplies in.”"
- },
{
"before": "could save issue",
"after": "could say issue"
@@ -3799,10 +3751,6 @@
}
],
"https://parahumans.wordpress.com/2013/09/17/interlude-28/": [
- {
- "before": "“Password protected”",
- "after": "“Password protected.”"
- },
{
"before": "Case fifty-threes",
"after": "case fifty-threes"
@@ -4213,10 +4161,6 @@
"before": "property of Nero",
"after": "property of Nero"
},
- {
- "before": "“Stay, Hooligan”",
- "after": "“Stay, Hooligan.”"
- },
{
"before": "Nero, Why don’t we get",
"after": "Nero, why don’t we get"
@@ -4243,10 +4187,6 @@
"before": "vageuly",
"after": "vaguely"
},
- {
- "before": "standby”",
- "after": "standby.”"
- },
{
"before": "minutes passed. the chance rose",
"after": "minutes passed. The chance rose"
@@ -4287,10 +4227,6 @@
"before": "the ‘cage",
"after": "the ’cage"
},
- {
- "before": "camera follows”",
- "after": "camera follows.”"
- },
{
"before": "background of his mind",
"after": "background of his mind."
@@ -4663,6 +4599,10 @@
{
"before": "Peaches and—You mean peachy",
"after": "Peaches and— You mean peachy"
+ },
+ {
+ "before": "Victoria” she",
+ "after": "Victoria,” she"
}
],
"https://www.parahumans.net/2018/03/20/shadow-5-7/": [
@@ -4799,6 +4739,10 @@
{
"before": "wall of bodies. the woman behind",
"after": "wall of bodies. The woman behind"
+ },
+ {
+ "before": "dying, motherfuckers” the bearded",
+ "after": "dying, motherfuckers,” the bearded"
}
],
"https://www.parahumans.net/2018/04/24/pitch-6-4/": [
From 695bb24d3c5030571b247a415d1feb289eab95fd Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 22:56:38 -0400
Subject: [PATCH 017/186] Fix missing commas at the end of quotations
---
lib/substitutions.json | 50 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 6e78aa3..08e0cab 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -682,6 +682,10 @@
{
"before": "In short, It",
"after": "In short, it"
+ },
+ {
+ "before": "phone, Alec” Brian spoke",
+ "after": "phone, Alec,” Brian spoke"
}
],
"https://parahumans.wordpress.com/2012/01/31/buzz-7-5/": [
@@ -2080,6 +2084,12 @@
"after": "leaving the space that Mom had once occupied"
}
],
+ "https://parahumans.wordpress.com/2012/07/14/plague-12-6/": [
+ {
+ "before": "had their shots” I said",
+ "after": "had their shots,” I said"
+ }
+ ],
"https://parahumans.wordpress.com/2012/07/17/plague-12-7/": [
{
"before": "costumes: The",
@@ -4887,6 +4897,12 @@
"after": "no words. And there"
}
],
+ "https://www.parahumans.net/2018/06/02/eclipse-x-8/": [
+ {
+ "before": "Bonesaw” Blasto said",
+ "after": "Bonesaw,” Blasto said"
+ }
+ ],
"https://www.parahumans.net/2018/06/09/torch-7-6/": [
{
"before": "I saw. the scans of the woman",
@@ -4903,6 +4919,10 @@
{
"before": "abused by the the transition",
"after": "abused by the transition"
+ },
+ {
+ "before": "poking around” I said",
+ "after": "poking around,” I said"
}
],
"https://www.parahumans.net/2018/06/23/torch-7-10/": [
@@ -4959,6 +4979,12 @@
"after": "Mom and Dad.
"
}
],
+ "https://www.parahumans.net/2018/07/14/beacon-8-4/": [
+ {
+ "before": "use their surveillance” Kenzie said",
+ "after": "use their surveillance,” Kenzie said"
+ }
+ ],
"https://www.parahumans.net/2018/07/17/beacon-8-5/": [
{
"before": "on the offensive. chains whirled",
@@ -4975,6 +5001,12 @@
"after": "above me. The sky was dark"
}
],
+ "https://www.parahumans.net/2018/07/28/beacon-8-8/": [
+ {
+ "before": "“Yeah, that’s breaker” I said.",
+ "after": "“Yeah, that’s breaker,” I said."
+ }
+ ],
"https://www.parahumans.net/2018/07/31/beacon-8-9/": [
{
"before": "you know that dad and I saying",
@@ -5215,6 +5247,10 @@
{
"before": "wrists behind them around around the stomach",
"after": "wrists behind them around the stomach"
+ },
+ {
+ "before": "back pouch” my mom said",
+ "after": "back pouch,” my mom said"
}
],
"https://www.parahumans.net/2019/03/16/heavens-12-7/": [
@@ -5559,6 +5595,12 @@
"after": "Couple of times a month"
}
],
+ "https://www.parahumans.net/2020/01/01/interlude-19-a/": [
+ {
+ "before": "“I’m fine” Egg said",
+ "after": "“I’m fine,” Egg said"
+ }
+ ],
"https://www.parahumans.net/2020/01/04/infrared-19-3/": [
{
"before": "I thought of dad, seeing him hug my mother",
@@ -5633,12 +5675,20 @@
{
"before": "Lessons from mom, age thirteen",
"after": "Lessons from Mom, age thirteen"
+ },
+ {
+ "before": "relatively speaking” Number Five",
+ "after": "relatively speaking,” Number Five"
}
],
"https://www.parahumans.net/2020/02/08/infrared-19-9/": [
{
"before": "had insisted she or dad drive us",
"after": "had insisted she or Dad drive us"
+ },
+ {
+ "before": "you know about” my Aunt Sarah said",
+ "after": "you know about,” my Aunt Sarah said"
}
],
"https://www.parahumans.net/2020/02/11/infrared-19-f/": [
From 367b69221a5eeb34be107ad3f59c961134df18ad Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 22 Oct 2020 23:09:51 -0400
Subject: [PATCH 018/186] Fix missing periods at end of sentences in Ward
---
lib/substitutions.json | 114 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 114 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 08e0cab..d7b4143 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4581,6 +4581,12 @@
"after": "hair he had. It was long"
}
],
+ "https://www.parahumans.net/2018/02/24/shadow-5-1/": [
+ {
+ "before": "thermos of regular tap water",
+ "after": "thermos of regular tap water."
+ }
+ ],
"https://www.parahumans.net/2018/03/06/shadow-5-4/": [
{
"before": "It’s—My forcefield moves",
@@ -4661,6 +4667,10 @@
{
"before": "with Masters in mind",
"after": "with masters in mind"
+ },
+ {
+ "before": "paralyzed started moving",
+ "after": "paralyzed started moving."
}
],
"https://www.parahumans.net/2018/03/31/shadow-5-11/": [
@@ -4781,6 +4791,10 @@
{
"before": "used flight to stead myself",
"after": "used flight to steady myself"
+ },
+ {
+ "before": "Capes converged, ready to guard",
+ "after": "Capes converged, ready to guard."
}
],
"https://www.parahumans.net/2018/05/05/pitch-6-7/": [
@@ -4897,6 +4911,18 @@
"after": "no words. And there"
}
],
+ "https://www.parahumans.net/2018/05/31/eclipse-x-6/": [
+ {
+ "before": "a desperate edge to her feelings",
+ "after": "a desperate edge to her feelings."
+ }
+ ],
+ "https://www.parahumans.net/2018/06/01/eclipse-x-7/": [
+ {
+ "before": "dotting the ‘i’s and slashing the ‘Q’s",
+ "after": "dotting the ‘i’s and slashing the ‘Q’s."
+ }
+ ],
"https://www.parahumans.net/2018/06/02/eclipse-x-8/": [
{
"before": "Bonesaw” Blasto said",
@@ -4983,6 +5009,10 @@
{
"before": "use their surveillance” Kenzie said",
"after": "use their surveillance,” Kenzie said"
+ },
+ {
+ "before": "and then hit buttons",
+ "after": "and then hit buttons."
}
],
"https://www.parahumans.net/2018/07/17/beacon-8-5/": [
@@ -5023,6 +5053,12 @@
"after": "who are free now. They’re out there"
}
],
+ "https://www.parahumans.net/2018/08/18/beacon-interlude-8-y/": [
+ {
+ "before": "A sufficient impact or distraction",
+ "after": "A sufficient impact or distraction."
+ }
+ ],
"https://www.parahumans.net/2018/08/28/gleaming-9-3/": [
{
"before": "Chris in the process. stretched on his way",
@@ -5095,6 +5131,12 @@
"after": "You let Mom talk and"
}
],
+ "https://www.parahumans.net/2018/10/27/polarize-10-2/": [
+ {
+ "before": "Sveta said, leaning on me",
+ "after": "Sveta said, leaning on me."
+ }
+ ],
"https://www.parahumans.net/2018/11/03/polarize-10-4/": [
{
"before": "services of mercenaries. depending on timing",
@@ -5127,6 +5169,12 @@
"after": "confines of the suit, and rearranged herself"
}
],
+ "https://www.parahumans.net/2018/11/24/interlude-10-x/": [
+ {
+ "before": "that would be in time Hopefully",
+ "after": "that would be in time. Hopefully."
+ }
+ ],
"https://www.parahumans.net/2018/11/27/polarize-10-10/": [
{
"before": "all about how Masters have",
@@ -5147,6 +5195,10 @@
{
"before": "Reminded me of mom",
"after": "Reminded me of Mom"
+ },
+ {
+ "before": "more fragile than it should have been",
+ "after": "more fragile than it should have been."
}
],
"https://www.parahumans.net/2019/01/15/blinding-11-8/": [
@@ -5159,6 +5211,12 @@
"after": "Victoria-flesh"
}
],
+ "https://www.parahumans.net/2019/01/19/interlude-11-b/": [
+ {
+ "before": "loading up trucks with basic supplies",
+ "after": "loading up trucks with basic supplies."
+ }
+ ],
"https://www.parahumans.net/2019/01/26/blinding-11-9/": [
{
"before": "someone had been been called at three",
@@ -5241,6 +5299,10 @@
{
"before": "mom, are you okay being",
"after": "Mom, are you okay being"
+ },
+ {
+ "before": "we didn’t resume the discussion",
+ "after": "we didn’t resume the discussion."
}
],
"https://www.parahumans.net/2019/03/09/heavens-12-6/": [
@@ -5303,6 +5365,10 @@
{
"before": "walls stone and wood. the building",
"after": "walls stone and wood. The building"
+ },
+ {
+ "before": "he asked",
+ "after": "he asked."
}
],
"https://www.parahumans.net/2019/04/13/black-13-4/": [
@@ -5421,6 +5487,12 @@
"after": "wall met ceiling. The wall was smooth"
}
],
+ "https://www.parahumans.net/2019/07/09/dying-15-2/": [
+ {
+ "before": "I told Precipice",
+ "after": "I told Precipice."
+ }
+ ],
"https://www.parahumans.net/2019/07/13/dying-15-3/": [
{
"before": "Didn’t ask about mom, dad, or",
@@ -5439,6 +5511,12 @@
"after": "worth of water"
}
],
+ "https://www.parahumans.net/2019/07/23/dying-15-6/": [
+ {
+ "before": "The teams had finished picking",
+ "after": "The teams had finished picking."
+ }
+ ],
"https://www.parahumans.net/2019/07/27/dying-15-7/": [
{
"before": "stranger or Master in",
@@ -5503,6 +5581,12 @@
"after": "carried on, out of"
}
],
+ "https://www.parahumans.net/2019/09/28/from-within-16-z/": [
+ {
+ "before": "ducked their heads down and hurried",
+ "after": "ducked their heads down and hurried."
+ }
+ ],
"https://www.parahumans.net/2019/10/01/sundown-17-1/": [
{
"before": "mind of dad after his head injury",
@@ -5541,12 +5625,20 @@
{
"before": "art good enough to to keep forever",
"after": "art good enough to keep forever"
+ },
+ {
+ "before": "Golem explained",
+ "after": "Golem explained."
}
],
"https://www.parahumans.net/2019/11/02/sundown-17-10/": [
{
"before": "wanted you to go to mom if you",
"after": "wanted you to go to Mom if you"
+ },
+ {
+ "before": "that didn’t betray too much",
+ "after": "that didn’t betray too much."
}
],
"https://www.parahumans.net/2019/11/09/interlude-17-y-sundown/": [
@@ -5581,6 +5673,10 @@
{
"before": "you go. no joke, no wiggle",
"after": "you go. No joke, no wiggle"
+ },
+ {
+ "before": "some business headquarters I was in",
+ "after": "some business headquarters I was in."
}
],
"https://www.parahumans.net/2019/12/03/radiation-18-6/": [
@@ -5595,6 +5691,16 @@
"after": "Couple of times a month"
}
],
+ "https://www.parahumans.net/2019/12/28/infrared-19-2/": [
+ {
+ "before": "a violent exclamation",
+ "after": "a violent exclamation."
+ },
+ {
+ "before": "Three seconds
",
+ "after": "Three seconds.
"
+ }
+ ],
"https://www.parahumans.net/2020/01/01/interlude-19-a/": [
{
"before": "“I’m fine” Egg said",
@@ -5627,6 +5733,14 @@
{
"before": "Love Lost is Master-class",
"after": "Love Lost is master-class"
+ },
+ {
+ "before": "opportunity to handpick people",
+ "after": "opportunity to handpick people."
+ },
+ {
+ "before": "pause, drew its attention",
+ "after": "pause, drew its attention."
}
],
"https://www.parahumans.net/2020/01/14/infrared-19-c/": [
From 6406a0f37072665c4b3a3bfdaefa5d46e2f0060e Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 26 Oct 2020 20:28:48 -0400
Subject: [PATCH 019/186] Spot fixes for Ward through Torch 7.x
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Also some general fixes to "TV", "okay", and "’cuz".
---
lib/convert-worker.js | 10 +--
lib/substitutions.json | 158 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 164 insertions(+), 4 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index a0a7528..c3b4518 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -179,6 +179,7 @@ function getBodyXML(chapter, contentEl) {
xml = xml.replace(/'/g, "’");
xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’");
xml = xml.replace(/‘Sup/g, "’Sup");
+ xml = xml.replace(/‘cuz/g, "’cuz");
xml = xml.replace(/([a-z])”<\/p>/g, "$1.”");
}
@@ -344,7 +345,7 @@ function getBodyXML(chapter, contentEl) {
// This is usually spelled "TV" but sometimes the other ways. Normalize.
xml = xml.replace(/(\b)tv(\b)/g, "$1TV$2");
- xml = xml.replace(/T\.V\./g, "TV");
+ xml = xml.replace(/t\.v\./ig, "TV");
// This is commonly misspelled.
xml = xml.replace(/([Ss])houlderblade/g, "$1houlder blade");
@@ -392,11 +393,12 @@ function getBodyXML(chapter, contentEl) {
xml = xml.replace(/Califa de Perro([^<]*)<\/em>/g, "Califa de Perro$1");
xml = xml.replace(/Turanta([^<]*)<\/em>/g, "Turanta$1");
- // "okay" is preferred to "ok". This sometimes gets changed back via substitutions.json when people are writing notes
- // and thus probably the intention was to be less formal. Also it seems per https://en.wikipedia.org/wiki/A-ok the
- // "A" in "A-okay" should be capitalized.
+ // "okay" is preferred to "ok" or "o.k.". This sometimes gets changed back via substitutions.json when people are
+ // writing notes and thus probably the intention was to be less formal. Also it seems per
+ // https://en.wikipedia.org/wiki/A-ok the "A" in "A-okay" should be capitalized.
xml = xml.replace(/Ok([,. ])/g, "Okay$1");
xml = xml.replace(/([^a-zA-Z])ok([^a])/g, "$1okay$2");
+ xml = xml.replace(/([^a-zA-Z])o\.k\.([^a])/g, "$1okay$2");
xml = xml.replace(/a-okay/g, "A-okay");
// Signal(l)ing/signal(l)ed are spelled both ways. Both are acceptable in English. Let's standardize on single-L.
diff --git a/lib/substitutions.json b/lib/substitutions.json
index d7b4143..458adbb 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -3398,6 +3398,10 @@
{
"before": "There’ more",
"after": "There’s more"
+ },
+ {
+ "before": "White capped",
+ "after": "White-capped"
}
],
"https://parahumans.wordpress.com/2013/07/09/scarab-25-4/": [
@@ -4903,42 +4907,156 @@
{
"before": "answer the the second question",
"after": "answer the second question"
+ },
+ {
+ "before": "discuss goals,” Jessica said. “We think",
+ "after": "discuss goals,” Jessica said, “we think"
+ }
+ ],
+ "https://www.parahumans.net/2018/05/29/eclipse-x-4/": [
+ {
+ "before": "ma‘am",
+ "after": "ma’am"
}
],
"https://www.parahumans.net/2018/05/30/eclipse-x-5/": [
{
"before": "no words. and there",
"after": "no words. And there"
+ },
+ {
+ "before": "hesitated to long",
+ "after": "hesitated too long"
+ },
+ {
+ "before": "back.,",
+ "after": "back,"
+ },
+ {
+ "before": "Wardens’ ID badges",
+ "after": "Wardens ID badges"
}
],
"https://www.parahumans.net/2018/05/31/eclipse-x-6/": [
{
"before": "a desperate edge to her feelings",
"after": "a desperate edge to her feelings."
+ },
+ {
+ "before": "—To need at least three if",
+ "after": "—to need at least three if"
+ },
+ {
+ "before": "We—if you stop and",
+ "after": "We— If you stop and"
+ },
+ {
+ "before": "…We’re not especially",
+ "after": "…we’re not especially"
}
],
"https://www.parahumans.net/2018/06/01/eclipse-x-7/": [
{
"before": "dotting the ‘i’s and slashing the ‘Q’s",
"after": "dotting the ‘i’s and slashing the ‘Q’s."
+ },
+ {
+ "before": "—And they have thinkers",
+ "after": "—and they have thinkers"
+ },
+ {
+ "before": "You’re leaving Boston?",
+ "after": "You’re leaving Boston?"
+ },
+ {
+ "before": "—Assign her house arrest",
+ "after": "—assign her house arrest"
+ },
+ {
+ "before": "Mr… Chief Armstrong",
+ "after": "Mr.… Chief Armstrong"
+ },
+ {
+ "before": "The there were monsters out there",
+ "after": "That there were monsters out there"
+ },
+ {
+ "before": "“I’m not—I wasn’t—”",
+ "after": "“I’m not— I wasn’t—”"
}
],
"https://www.parahumans.net/2018/06/02/eclipse-x-8/": [
{
"before": "Bonesaw” Blasto said",
"after": "Bonesaw,” Blasto said"
+ },
+ {
+ "before": "“Well,” Fappy said. “You know Stan,",
+ "after": "“Well,” Fappy said, “you know Stan,"
+ },
+ {
+ "before": "it or not,” Jack said. “I want",
+ "after": "it or not,” Jack said, “I want"
+ }
+ ],
+ "https://www.parahumans.net/2018/06/05/torch-7-5/": [
+ {
+ "before": "
\nI don’t",
+ "after": "I don’t"
+ },
+ {
+ "before": "Chris snorted, “Count yourself lucky.",
+ "after": "Chris snorted. “Count yourself lucky."
+ },
+ {
+ "before": "was your crush Jhett Marion?",
+ "after": "was your crush Jhett Marion?"
+ },
+ {
+ "before": "thing aside,” I said. “I mean",
+ "after": "thing aside,” I said, “I mean"
+ },
+ {
+ "before": "Fifty…seven",
+ "after": "Fifty… seven"
+ },
+ {
+ "before": "brass capped",
+ "after": "brass-capped"
}
],
"https://www.parahumans.net/2018/06/09/torch-7-6/": [
{
"before": "I saw. the scans of the woman",
"after": "I saw. The scans of the woman"
+ },
+ {
+ "before": "the woman withw the sunglasses",
+ "after": "the woman with the sunglasses"
}
],
"https://www.parahumans.net/2018/06/12/torch-7-7/": [
{
"before": "and I can call dad",
"after": "and I can call Dad"
+ },
+ {
+ "before": "five by three inch",
+ "after": "five-by-three inch"
+ },
+ {
+ "before": "staying calm, her voice quieter, “Said your",
+ "after": "staying calm, her voice quieter, “said your"
+ }
+ ],
+ "https://www.parahumans.net/2018/06/16/torch-7-8/": [
+ {
+ "before": "you asked him,” Sveta said. “He’d say",
+ "after": "you asked him,” Sveta said, “he’d say"
+ },
+ {
+ "before": "Looksee, You mentioned",
+ "after": "Looksee, you mentioned"
}
],
"https://www.parahumans.net/2018/06/19/torch-7-9/": [
@@ -4949,6 +5067,10 @@
{
"before": "poking around” I said",
"after": "poking around,” I said"
+ },
+ {
+ "before": "Loss as he said it, “Because",
+ "after": "Loss as he said it, “because"
}
],
"https://www.parahumans.net/2018/06/23/torch-7-10/": [
@@ -4959,6 +5081,22 @@
{
"before": "Kenzie almost yipped out out a, “No.”",
"after": "Kenzie almost yipped out, a “No.”"
+ },
+ {
+ "before": "thought things through mid-sentence, “Is that for",
+ "after": "thought things through mid-sentence, “is that for"
+ },
+ {
+ "before": "he goes to Church with them",
+ "after": "he goes to church with them"
+ },
+ {
+ "before": "maybe,” She said, smiling",
+ "after": "maybe,” she said, smiling"
+ },
+ {
+ "before": "fighting, ‘rene",
+ "after": "fighting, ’rene"
}
],
"https://www.parahumans.net/2018/06/26/torch-interlude-7-x/": [
@@ -4997,6 +5135,26 @@
{
"before": "loved—loved- for my",
"after": "loved—loved—for my"
+ },
+ {
+ "before": "pitched to be heard, “But her antics",
+ "after": "pitched to be heard, “but her antics"
+ },
+ {
+ "before": "—Really truly, I hope",
+ "after": "—really truly, I hope"
+ },
+ {
+ "before": "big guy said, “Is our mood",
+ "after": "big guy said, “is our mood"
+ },
+ {
+ "before": "thing we try to do,” he said. “Is we ask",
+ "after": "thing we try to do,” he said, “is we ask"
+ },
+ {
+ "before": "Keith said. “You decide",
+ "after": "Keith said, “you decide"
}
],
"https://www.parahumans.net/2018/07/07/beacon-8-2/": [
From ab28acfd9af94f506abff0e1d6701c3e3ce593f6 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 26 Oct 2020 20:32:19 -0400
Subject: [PATCH 020/186] Remove erroneous hyphen in "preemptive(ly)"
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index c3b4518..d9f45dd 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -426,6 +426,9 @@ function getBodyXML(chapter, contentEl) {
xml = xml.replace(/well armed/g, "well-armed");
xml = xml.replace(/able bodied/g, "able-bodied");
+ // Preemptive(ly) is often hyphenated (not always). It should not be.
+ xml = xml.replace(/([Pp])re-emptive/g, "$1reemptive");
+
// One-off fixes
(substitutions[chapter.url] || []).forEach(substitution => {
if (substitution.before) {
From 6b5bedcbc721e31af286edb220f7d3ac2bd31569 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 26 Oct 2020 20:34:01 -0400
Subject: [PATCH 021/186] =?UTF-8?q?Use=20en=20dash=20instead=20of=20hyphen?=
=?UTF-8?q?-minus=20for=20"G=E2=80=93N"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index d9f45dd..c853b9c 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -266,6 +266,7 @@ function getBodyXML(chapter, contentEl) {
xml = xml.replace(/Tristan-Byron/g, "Tristan–Byron");
xml = xml.replace(/Earth-Gimel/g, "Earth–Gimel");
xml = xml.replace(/Gimel-Europe/g, "Gimel–Europe");
+ xml = xml.replace(/G-N/g, "G–N");
xml = xml.replace(/Imp-Damsel/g, "Imp–Damsel");
xml = xml.replace(/Damsel-Ashley/g, "Damsel–Ashley");
xml = xml.replace(/Antares-Anelace/g, "Antares–Anelace");
From 4fbaa0e8db262b185d45719a605a164da9163f6a Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 26 Oct 2020 20:54:25 -0400
Subject: [PATCH 022/186] Settle on "Patrol " and "the Patrol"
This is reversing the decision made in 84e0d83bd1a84f098a201e7362d60e07d6aec42f, and extending the revised decision to other Patrol-related terms.
---
lib/convert-worker.js | 22 ++++++++++++++--------
lib/convert.js | 4 ++--
lib/worm-scraper.js | 2 +-
3 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index c853b9c..87dcd1e 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -6,11 +6,11 @@ const substitutions = require("./substitutions.json");
workerpool.worker({ convertChapter });
-function convertChapter(chapter, inputPath, outputPath) {
+function convertChapter(chapter, book, inputPath, outputPath) {
const contents = fs.readFileSync(inputPath, { encoding: "utf-8" });
const rawChapterJSDOM = new JSDOM(contents);
- const output = getChapterString(chapter, rawChapterJSDOM.window.document);
+ const output = getChapterString(chapter, book, rawChapterJSDOM.window.document);
// TODO: this should probably not be necessary... jsdom bug I guess!?
rawChapterJSDOM.window.close();
@@ -18,8 +18,8 @@ function convertChapter(chapter, inputPath, outputPath) {
fs.writeFileSync(outputPath, output);
}
-function getChapterString(chapter, rawChapterDoc) {
- const body = getBodyXML(chapter, rawChapterDoc.querySelector(".entry-content"));
+function getChapterString(chapter, book, rawChapterDoc) {
+ const body = getBodyXML(chapter, book, rawChapterDoc.querySelector(".entry-content"));
return `
@@ -32,7 +32,7 @@ ${body}
`;
}
-function getBodyXML(chapter, contentEl) {
+function getBodyXML(chapter, book, contentEl) {
// Remove initial Next Chapter and Previous Chapter
contentEl.removeChild(contentEl.firstElementChild);
@@ -337,9 +337,15 @@ function getBodyXML(chapter, contentEl) {
// ReSound's name is sometimes miscapitalized. The word is never used in a non-name context.
xml = xml.replace(/Resound/g, "ReSound");
- // "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". I can see
- // arguments for any of them, so let's go with the most prevalent: "patrol block".
- xml = xml.replace(/([^ ]) Patrol (?:B|b)lock/g, "$1 patrol block");
+ // "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". "patrol
+ // group" is always lowercased. It seems like "Patrol" is a proper name, and is used as a capitalized modifier in
+ // other contexts (e.g. Patrol leader). So let's standardize on "Patrol ".
+ xml = xml.replace(/patrol (block|group|leader|guard|student|uniform|squad|soldier|officer|crew|girl)/ig,
+ (_, $1) => `Patrol ${$1.toLowerCase()}`);
+ // This always works in Ward and has a few false positives in Worm, where it is never needed:
+ if (book === "ward") {
+ xml = xml.replace(/the patrol/g, "the Patrol");
+ }
// This is sometimes missing its capitalization.
xml = xml.replace(/the birdcage/g, "the Birdcage");
diff --git a/lib/convert.js b/lib/convert.js
index 8ff6509..e7555cb 100644
--- a/lib/convert.js
+++ b/lib/convert.js
@@ -4,7 +4,7 @@ const fs = require("fs").promises;
const workerpool = require("workerpool");
const cliProgress = require("cli-progress");
-module.exports = async (cachePath, manifestPath, contentPath, concurrentJobs) => {
+module.exports = async (cachePath, manifestPath, contentPath, book, concurrentJobs) => {
const manifestContents = await fs.readFile(manifestPath, { encoding: "utf-8" });
const chapters = JSON.parse(manifestContents);
@@ -27,7 +27,7 @@ module.exports = async (cachePath, manifestPath, contentPath, concurrentJobs) =>
const destFileName = `${path.basename(chapter.filename, ".html")}.xhtml`;
const outputPath = path.resolve(contentPath, destFileName);
- await pool.exec("convertChapter", [chapter, inputPath, outputPath]);
+ await pool.exec("convertChapter", [chapter, book, inputPath, outputPath]);
progress.increment();
}));
diff --git a/lib/worm-scraper.js b/lib/worm-scraper.js
index 381f9eb..556a0df 100644
--- a/lib/worm-scraper.js
+++ b/lib/worm-scraper.js
@@ -86,7 +86,7 @@ if (argv._.includes("convert")) {
commands.push(() => {
return fs.rmdir(chaptersPath, { recursive: true })
.then(() => fs.mkdir(chaptersPath, { recursive: true }))
- .then(() => convert(cachePath, manifestPath, chaptersPath, argv.jobs));
+ .then(() => convert(cachePath, manifestPath, chaptersPath, argv.book, argv.jobs));
});
}
From 0fc22d064dd41d7df350be29b9a55ddc8c7c3d2b Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 26 Oct 2020 20:59:32 -0400
Subject: [PATCH 023/186] Remove periods from a few instances of PRT
---
lib/substitutions.json | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 458adbb..7d11c61 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2630,6 +2630,14 @@
{
"before": "exaggerated shrug
",
"after": "exaggerated shrug.
"
+ },
+ {
+ "before": "the P.R.T. uniforms",
+ "after": "the PRT uniforms"
+ },
+ {
+ "before": "injured P.R.T. soldier",
+ "after": "injured PRT soldier"
}
],
"https://parahumans.wordpress.com/2013/01/15/migration-17-8/": [
@@ -5811,6 +5819,12 @@
"after": "had his powers. The forcefield above"
}
],
+ "https://www.parahumans.net/2019/11/16/radiation-18-1/": [
+ {
+ "before": "worked for the P.R.T.",
+ "after": "worked for the PRT."
+ }
+ ],
"https://www.parahumans.net/2019/11/19/radiation-18-2/": [
{
"before": "looking for mom in the crowd",
@@ -5999,6 +6013,12 @@
"after": "shaping it as it rolled out. It became a circular"
}
],
+ "https://www.parahumans.net/2020/04/07/last-20-11/": [
+ {
+ "before": "had harangued the P.R.T., even",
+ "after": "had harangued the PRT, even"
+ }
+ ],
"https://www.parahumans.net/2020/04/21/last-20-e4/": [
{
"before": "grown around the the maille sheath",
From 50dce93587dbf1344211d584df013966df6e6b63 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 26 Oct 2020 21:08:51 -0400
Subject: [PATCH 024/186] Remove empty paragraph starting Ward Daybreak 1.8
---
lib/substitutions.json | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 7d11c61..a26f014 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4362,6 +4362,10 @@
}
],
"https://www.parahumans.net/2017/12/01/daybreak-1-8/": [
+ {
+ "before": "\n
",
+ "after": "
"
+ },
{
"before": "parahumans online",
"after": "Parahumans Online"
From fb933228231e267960a705b76c0d857aa2313ca2 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 26 Oct 2020 21:19:56 -0400
Subject: [PATCH 025/186] Modernize DOM manipulation code
jsdom, and JavaScript, have gotten nicer since this was first written.
---
lib/convert-worker.js | 36 ++++++++++++++++--------------------
1 file changed, 16 insertions(+), 20 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 87dcd1e..8d3b2e4 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -34,21 +34,21 @@ ${body}
function getBodyXML(chapter, book, contentEl) {
// Remove initial Next Chapter and Previous Chapter
- contentEl.removeChild(contentEl.firstElementChild);
+ contentEl.firstElementChild.remove();
// Remove everything after the last
(e.g. analytics
s)
const lastP = contentEl.querySelector("p:last-of-type");
while (contentEl.lastElementChild !== lastP) {
- contentEl.removeChild(contentEl.lastElementChild);
+ contentEl.lastElementChild.remove();
}
// Remove empty
s or Last Chapter/Next Chapter
s
while (isEmptyOrGarbage(contentEl.lastElementChild)) {
- contentEl.removeChild(contentEl.lastElementChild);
+ contentEl.lastElementChild.remove();
}
// Remove redundant attributes and style
- Array.prototype.forEach.call(contentEl.children, child => {
+ for (const child of contentEl.children) {
if (child.getAttribute("dir") === "ltr") {
child.removeAttribute("dir");
}
@@ -63,46 +63,42 @@ function getBodyXML(chapter, book, contentEl) {
if (child.getAttribute("style") === "text-align:left;padding-left:30px;") {
child.setAttribute("style", "padding-left:30px;");
}
- });
+ }
// Remove empty s and s
// Remove style attributes from them, as they're always messed up.
- const ems = contentEl.querySelectorAll("em, i");
- Array.prototype.forEach.call(ems, em => {
+ for (const em of contentEl.querySelectorAll("em, i")) {
if (em.textContent.trim() === "") {
- const replacement = contentEl.ownerDocument.createTextNode(" ");
- em.parentNode.replaceChild(replacement, em);
+ em.replaceWith(contentEl.ownerDocument.createTextNode(" "));
} else {
em.removeAttribute("style");
}
- });
+ }
// In https://parahumans.wordpress.com/2013/01/05/monarch-16-13/ there are some s that should be s O_o
- const addresses = contentEl.querySelectorAll("address");
- Array.prototype.forEach.call(addresses, address => {
+ for (const address of contentEl.querySelectorAll("address")) {
const p = contentEl.ownerDocument.createElement("p");
p.innerHTML = address.innerHTML;
- address.parentNode.replaceChild(p, address);
- });
+ address.replaceWith(p);
+ }
// Every except underline ones is pointless at best and frequently messed up. (Weird font size, line spacing,
// etc.)
- const spans = contentEl.querySelectorAll("span");
- Array.prototype.forEach.call(spans, span => {
+ for (const span of contentEl.querySelectorAll("span")) {
if (span.getAttribute("style") === "text-decoration:underline;") {
- return;
+ continue;
}
if (span.textContent.trim() === "") {
- span.parentNode.removeChild(span);
+ span.remove();
} else {
const docFrag = contentEl.ownerDocument.createDocumentFragment();
while (span.firstChild) {
docFrag.appendChild(span.firstChild);
}
- span.parentNode.replaceChild(docFrag, span);
+ span.replaceWith(docFrag);
}
- });
+ }
// In Ward, CloudFlare email protection obfuscates the email addresses:
// https://usamaejaz.com/cloudflare-email-decoding/
From 699c9be71f099d7ede04b6d089ae95bf5c52111c Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 26 Oct 2020 21:20:30 -0400
Subject: [PATCH 026/186] Uniformize indentation between Ward and Worm
---
lib/convert-worker.js | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 8d3b2e4..c29163c 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -57,11 +57,16 @@ function getBodyXML(chapter, book, contentEl) {
// useless)
child.removeAttribute("align");
- if (child.getAttribute("style") === "text-align:left;") {
+ const style = child.getAttribute("style");
+ if (style === "text-align:left;" || style === "text-align: left;") {
child.removeAttribute("style");
}
- if (child.getAttribute("style") === "text-align:left;padding-left:30px;") {
- child.setAttribute("style", "padding-left:30px;");
+
+ // Worm uses 30px; Ward mostly uses 40px but sometimes uses 30px/60px. Let's standardize on 30px.
+ if (style === "text-align:left;padding-left:30px;" ||
+ style === "text-align: left;padding-left: 40px;" ||
+ style === "padding-left: 40px;") {
+ child.setAttribute("style", "padding-left: 30px;");
}
}
From bcca56866c354d7ae01ac1010c40d42f3ce64842 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 26 Oct 2020 21:29:21 -0400
Subject: [PATCH 027/186] Revise the chapter URL comments
---
lib/convert-worker.js | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index c29163c..a01a2a1 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -119,9 +119,6 @@ function getBodyXML(chapter, book, contentEl) {
h1El.textContent = chapter.title;
bodyEl.appendChild(h1El);
- const comment = contentEl.ownerDocument.createComment(chapter.url);
- bodyEl.appendChild(comment);
-
while (contentEl.firstChild) {
bodyEl.appendChild(contentEl.firstChild);
}
@@ -458,8 +455,11 @@ function getBodyXML(chapter, book, contentEl) {
}
});
- // Serializer inserts extra xmlns for us since it doesn't know we're going to put this into a
- xml = xml.replace(//, "");
+ // Serializer inserts extra xmlns for us since it doesn't know we're going to put this into a .
+ // Use this opportunity to insert a comment pointing to the original URL, for reference.
+ xml = xml.replace(
+ //,
+ `\n\n`);
return xml;
}
From 056dc9d514ef68c19426d327062911a6f059ac86 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 26 Oct 2020 21:59:41 -0400
Subject: [PATCH 028/186] 4.3.0
---
npm-shrinkwrap.json | 2 +-
package.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json
index 015a00c..7f379e7 100644
--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
@@ -1,6 +1,6 @@
{
"name": "worm-scraper",
- "version": "4.2.0",
+ "version": "4.3.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
diff --git a/package.json b/package.json
index cb0e2ba..5011a22 100644
--- a/package.json
+++ b/package.json
@@ -8,7 +8,7 @@
"parahuman",
"scraper"
],
- "version": "4.2.0",
+ "version": "4.3.0",
"author": "Domenic Denicola (https://domenic.me/)",
"license": "WTFPL",
"repository": "domenic/worm-scraper",
From 3810138e15b12d080fa2b46406044135d633739a Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 31 Oct 2020 17:36:31 -0400
Subject: [PATCH 029/186] Spot fixes for Ward through Beacon 8.12
---
lib/convert-worker.js | 1 -
lib/substitutions.json | 98 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 98 insertions(+), 1 deletion(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index a01a2a1..51d5d61 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -262,7 +262,6 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/Tristan-Capricorn/g, "Tristan–Capricorn");
xml = xml.replace(/Capricorn-Byron/g, "Capricorn–Byron");
xml = xml.replace(/Tristan-Byron/g, "Tristan–Byron");
- xml = xml.replace(/Earth-Gimel/g, "Earth–Gimel");
xml = xml.replace(/Gimel-Europe/g, "Gimel–Europe");
xml = xml.replace(/G-N/g, "G–N");
xml = xml.replace(/Imp-Damsel/g, "Imp–Damsel");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index a26f014..33c689a 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -5169,10 +5169,38 @@
"after": "Keith said, “you decide"
}
],
+ "https://www.parahumans.net/2018/07/03/beacon-8-1/": [
+ {
+ "before": "a group of minors,” Dragon said. “Allow others",
+ "after": "a group of minors,” Dragon said, “allow others"
+ }
+ ],
"https://www.parahumans.net/2018/07/07/beacon-8-2/": [
{
"before": "Mom and dad.
",
"after": "Mom and Dad.
"
+ },
+ {
+ "before": "Missisipi",
+ "after": "Mississippi"
+ },
+ {
+ "before": "dragoncraft",
+ "after": "Dragon-craft"
+ }
+ ],
+ "https://www.parahumans.net/2018/07/10/beacon-8-3/": [
+ {
+ "before": "It’s—I can’t talk to Byron",
+ "after": "It’s— I can’t talk to Byron"
+ },
+ {
+ "before": "emergency, we’re capes, Cryptid said",
+ "after": "emergency, we’re capes,” Cryptid said"
+ },
+ {
+ "before": "one or two more fallen",
+ "after": "one or two more Fallen"
}
],
"https://www.parahumans.net/2018/07/14/beacon-8-4/": [
@@ -5183,12 +5211,24 @@
{
"before": "and then hit buttons
",
"after": "and then hit buttons.
"
+ },
+ {
+ "before": "I
think it’s done",
+ "after": "I
think it’s done"
}
],
"https://www.parahumans.net/2018/07/17/beacon-8-5/": [
{
"before": "on the offensive. chains whirled",
"after": "on the offensive. Chains whirled"
+ },
+ {
+ "before": "a bit hit to morale",
+ "after": "a big hit to morale"
+ },
+ {
+ "before": "mask was only a B-",
+ "after": "mask was only a B−"
}
],
"https://www.parahumans.net/2018/07/21/beacon-8-6/": [
@@ -5199,6 +5239,28 @@
{
"before": "above me. the sky was dark",
"after": "above me. The sky was dark"
+ },
+ {
+ "before": "she thrwe out a hand",
+ "after": "she threw out a hand"
+ },
+ {
+ "before": "corner them,” I said. “They’ll threaten civilians",
+ "after": "corner them,” I said, “they’ll threaten civilians"
+ },
+ {
+ "before": "reminiscent of a certain teenage boy-was cast in",
+ "after": "reminiscent of a certain teenage boy—was cast in"
+ },
+ {
+ "before": "Errors influence",
+ "after": "Error’s influence"
+ }
+ ],
+ "https://www.parahumans.net/2018/07/24/beacon-8-7/": [
+ {
+ "before": "Ashley’s activities therin",
+ "after": "Ashley’s activities therein"
}
],
"https://www.parahumans.net/2018/07/28/beacon-8-8/": [
@@ -5215,12 +5277,48 @@
{
"before": "was impatient. it was hard to tell",
"after": "was impatient. It was hard to tell"
+ },
+ {
+ "before": "“Is—Yes,” I finished",
+ "after": "“Is— Yes,” I finished"
+ }
+ ],
+ "https://www.parahumans.net/2018/08/04/beacon-8-10/": [
+ {
+ "before": "
I’m supposed to be you?",
+ "after": "
I’m supposed to be you?"
+ },
+ {
+ "before": "She had her Eyehook",
+ "after": "She had her eyehook"
+ },
+ {
+ "before": "You—I have to imagine you were",
+ "after": "You— I have to imagine you were"
+ },
+ {
+ "before": "I don’t—I can hear your mother",
+ "after": "I don’t— I can hear your mother"
+ },
+ {
+ "before": "my A—costume with my makeup at an A",
+ "after": "my A− costume with my makeup at an A"
+ }
+ ],
+ "https://www.parahumans.net/2018/08/07/beacon-8-11/": [
+ {
+ "before": "easy to understand story",
+ "after": "easy-to-understand story"
}
],
"https://www.parahumans.net/2018/08/11/beacon-8-12/": [
{
"before": "who are free now. they’re out there",
"after": "who are free now. They’re out there"
+ },
+ {
+ "before": "dare you? Hamza barked, again",
+ "after": "dare you?” Hamza barked, again"
}
],
"https://www.parahumans.net/2018/08/18/beacon-interlude-8-y/": [
From ff09d1fd76f9efc255faf0d610c0538ed26d53ce Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 31 Oct 2020 17:41:13 -0400
Subject: [PATCH 030/186] Always capitalize "the Megalopolis"
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 51d5d61..086745d 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -387,6 +387,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/(Lord|Slater) street/g, "$1 Street");
xml = xml.replace(/(Hollow|Cedar) point/g, "$1 Point");
xml = xml.replace(/(Norwalk|Fenway|Stratford) station/g, "$1 Station");
+ xml = xml.replace(/the megalopolis/g, "the Megalopolis");
// These are usually not italicized, but sometimes are. Other foreign-language names (like Yàngbǎn) are not
// italicized, so we go in the direction of removing the italics.
From e24bbc462beeb29117da58a1b6248f16b7f8b2de Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 31 Oct 2020 17:48:01 -0400
Subject: [PATCH 031/186] Fixes instances of -, in Ward
They are usually supposed to be em dashes.
---
lib/substitutions.json | 48 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 33c689a..04dd1b7 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -5303,6 +5303,10 @@
{
"before": "my A—costume with my makeup at an A",
"after": "my A− costume with my makeup at an A"
+ },
+ {
+ "before": "now my apartment-, three or four minutes",
+ "after": "now my apartment—three or four minutes"
}
],
"https://www.parahumans.net/2018/08/07/beacon-8-11/": [
@@ -5621,6 +5625,10 @@
{
"before": "March said. she flourished her",
"after": "March said. She flourished her"
+ },
+ {
+ "before": "morning breath—more than morning breath-, but there",
+ "after": "morning breath—more than morning breath—but there"
}
],
"https://www.parahumans.net/2019/04/09/black-13-3/": [
@@ -5661,6 +5669,16 @@
{
"before": "cash are involved. there’s a reason",
"after": "cash are involved. There’s a reason"
+ },
+ {
+ "before": "zero—a circle made with her full hand-, three",
+ "after": "zero—a circle made with her full hand—three"
+ }
+ ],
+ "https://www.parahumans.net/2019/05/07/black-13-10/": [
+ {
+ "before": "the interior of this one was quiet-, I could",
+ "after": "the interior of this one was quiet—I could"
}
],
"https://www.parahumans.net/2019/05/11/black-13-11/": [
@@ -5669,6 +5687,12 @@
"after": "been scummy and tried"
}
],
+ "https://www.parahumans.net/2019/05/18/breaking-14-1/": [
+ {
+ "before": "face—ridiculous notion but still-, if I wrote him a note or sent him a text—less ridiculous notion-, saying",
+ "after": "face—ridiculous notion but still—if I wrote him a note or sent him a text—less ridiculous notion—saying"
+ }
+ ],
"https://www.parahumans.net/2019/05/25/breaking-14-3/": [
{
"before": "actively tamper with it it",
@@ -5687,12 +5711,20 @@
{
"before": "Promises to mom and dad",
"after": "Promises to Mom and Dad"
+ },
+ {
+ "before": "or innovators—scientists-, they say",
+ "after": "or innovators—scientists—they say"
}
],
"https://www.parahumans.net/2019/06/08/breaking-14-7/": [
{
"before": "Regretting sending mom here",
"after": "Regretting sending Mom here"
+ },
+ {
+ "before": "you’re talking syncope—fainting-, arrythmia,",
+ "after": "you’re talking syncope—fainting, arrythmia,"
}
],
"https://www.parahumans.net/2019/06/15/breaking-14-9/": [
@@ -5701,6 +5733,12 @@
"after": "You stay away from Mom, you stay away from Dad"
}
],
+ "https://www.parahumans.net/2019/06/18/breaking-14-10/": [
+ {
+ "before": "in a real fight-, but in",
+ "after": "in a real fight—but in"
+ }
+ ],
"https://www.parahumans.net/2019/06/22/breaking-14-11/": [
{
"before": "-He’s changing-",
@@ -5899,6 +5937,12 @@
"after": "Golem explained."
}
],
+ "https://www.parahumans.net/2019/10/26/sundown-17-8/": [
+ {
+ "before": "one primary goal—me-, and who",
+ "after": "one primary goal—me—and who"
+ }
+ ],
"https://www.parahumans.net/2019/11/02/sundown-17-10/": [
{
"before": "wanted you to go to mom if you",
@@ -5973,6 +6017,10 @@
{
"before": "Three seconds
",
"after": "Three seconds.
"
+ },
+ {
+ "before": "scream—no sound when viewing crystal-pictures, of course-, and",
+ "after": "scream—no sound when viewing crystal-pictures, of course—and"
}
],
"https://www.parahumans.net/2020/01/01/interlude-19-a/": [
From dd2b5b15ce67bf33a48f400a2cacce57061f78c7 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 31 Oct 2020 17:53:32 -0400
Subject: [PATCH 032/186] Fix instances of "The clairvoyant"
Previously we only fixed "the clairvoyant", missing cases where it his name started sentences.
---
lib/convert-worker.js | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 086745d..4d33ecf 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -329,7 +329,7 @@ function getBodyXML(chapter, book, contentEl) {
// This occurs enough times it's better to do here than in one-off fixes. We correct the single instance where
// it's incorrect to capitalize in the one-off fixes.
// Note that Ward contains much talk of "the clairvoyants", so we don't want to capitalize plurals.
- xml = xml.replace(/the clairvoyant([^s])/g, "the Clairvoyant$1");
+ xml = xml.replace(/([Tt])he clairvoyant([^s])/g, "$1he Clairvoyant$2");
// ReSound's name is sometimes miscapitalized. The word is never used in a non-name context.
xml = xml.replace(/Resound/g, "ReSound");
From 24809f607bcf011062feb0210a704545d937f115 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 31 Oct 2020 17:58:16 -0400
Subject: [PATCH 033/186] Always capitalize "Earths"
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 4d33ecf..3ea0d9c 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -388,6 +388,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/(Hollow|Cedar) point/g, "$1 Point");
xml = xml.replace(/(Norwalk|Fenway|Stratford) station/g, "$1 Station");
xml = xml.replace(/the megalopolis/g, "the Megalopolis");
+ xml = xml.replace(/earths(?![a-z])/g, "Earths");
// These are usually not italicized, but sometimes are. Other foreign-language names (like Yàngbǎn) are not
// italicized, so we go in the direction of removing the italics.
From 411defc1531f588885bcbd3225cf2d77994193ce Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 31 Oct 2020 18:04:01 -0400
Subject: [PATCH 034/186] 4.4.0
---
npm-shrinkwrap.json | 2 +-
package.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json
index 7f379e7..1c2a77a 100644
--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
@@ -1,6 +1,6 @@
{
"name": "worm-scraper",
- "version": "4.3.0",
+ "version": "4.4.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
diff --git a/package.json b/package.json
index 5011a22..6466068 100644
--- a/package.json
+++ b/package.json
@@ -8,7 +8,7 @@
"parahuman",
"scraper"
],
- "version": "4.3.0",
+ "version": "4.4.0",
"author": "Domenic Denicola (https://domenic.me/)",
"license": "WTFPL",
"repository": "domenic/worm-scraper",
From b4454f8432ac3a8f751174201cc2777bb1954928 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 16:08:25 -0500
Subject: [PATCH 035/186] Fix warning output
This has been broken since the progress bar addition.
---
lib/convert-worker.js | 26 ++++++++++++++++----------
lib/convert.js | 7 ++++++-
2 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 3ea0d9c..c0305d5 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -10,29 +10,35 @@ function convertChapter(chapter, book, inputPath, outputPath) {
const contents = fs.readFileSync(inputPath, { encoding: "utf-8" });
const rawChapterJSDOM = new JSDOM(contents);
- const output = getChapterString(chapter, book, rawChapterJSDOM.window.document);
+ const { output, warnings } = getChapterString(chapter, book, rawChapterJSDOM.window.document);
// TODO: this should probably not be necessary... jsdom bug I guess!?
rawChapterJSDOM.window.close();
fs.writeFileSync(outputPath, output);
+ return warnings;
}
function getChapterString(chapter, book, rawChapterDoc) {
- const body = getBodyXML(chapter, book, rawChapterDoc.querySelector(".entry-content"));
+ const { xml, warnings } =
+ getBodyXML(chapter, book, rawChapterDoc.querySelector(".entry-content"));
- return `
+ const output = `
${chapter.title}
-${body}
+${xml}
`;
+
+ return { output, warnings };
}
function getBodyXML(chapter, book, contentEl) {
+ const warnings = [];
+
// Remove initial Next Chapter and Previous Chapter
contentEl.firstElementChild.remove();
@@ -440,19 +446,19 @@ function getBodyXML(chapter, book, contentEl) {
if (substitution.before) {
const indexOf = xml.indexOf(substitution.before);
if (indexOf === -1) {
- console.warn(`Could not find text "${substitution.before}" in ${chapter.url}. The chapter may have been ` +
- `updated at the source, in which case, you should edit substitutions.json.`);
+ warnings.push(`Could not find text "${substitution.before}" in ${chapter.url}. The chapter may have been ` +
+ `updated at the source, in which case, you should edit substitutions.json.`);
}
if (indexOf !== xml.lastIndexOf(substitution.before)) {
- console.warn(`The text "${substitution.before}" occurred twice, and so the substitution was ambiguous. ` +
- `Update substitutions.json for a more precise substitution.`);
+ warnings.push(`The text "${substitution.before}" occurred twice, and so the substitution was ambiguous. ` +
+ `Update substitutions.json for a more precise substitution.`);
}
xml = xml.replace(new RegExp(escapeRegExp(substitution.before)), substitution.after);
} else if (substitution.regExp) {
xml = xml.replace(new RegExp(substitution.regExp, "g"), substitution.replacement);
} else {
- console.warn(`Invalid substitution specified for ${chapter.url}`);
+ warnings.push(`Invalid substitution specified for ${chapter.url}`);
}
});
@@ -462,7 +468,7 @@ function getBodyXML(chapter, book, contentEl) {
/
/,
`\n\n`);
- return xml;
+ return { xml, warnings };
}
function isEmptyOrGarbage(el) {
diff --git a/lib/convert.js b/lib/convert.js
index e7555cb..c4ae90d 100644
--- a/lib/convert.js
+++ b/lib/convert.js
@@ -21,18 +21,23 @@ module.exports = async (cachePath, manifestPath, contentPath, book, concurrentJo
}
const pool = workerpool.pool(path.resolve(__dirname, "convert-worker.js"), poolOptions);
+ const warnings = [];
await Promise.all(chapters.map(async chapter => {
const inputPath = path.resolve(cachePath, chapter.filename);
const destFileName = `${path.basename(chapter.filename, ".html")}.xhtml`;
const outputPath = path.resolve(contentPath, destFileName);
- await pool.exec("convertChapter", [chapter, book, inputPath, outputPath]);
+ warnings.push(...await pool.exec("convertChapter", [chapter, book, inputPath, outputPath]));
progress.increment();
}));
pool.terminate();
+ for (const warning of warnings) {
+ console.warn(warning);
+ }
+
console.log("All chapters converted");
};
From 547b7e95180dc576f9b1235dc4fcdd5736f0cbdb Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 16:12:41 -0500
Subject: [PATCH 036/186] Restore original scene breaks, instead of using
---
lib/convert-worker.js | 22 +++++++++++++---------
lib/substitutions.json | 6 +++---
2 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index c0305d5..d0fc0c9 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -288,15 +288,19 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/Simurgh-Myrddin-plant/g, "Simurgh–Myrddin–plant");
xml = xml.replace(/Armsmaster-Defiant/g, "Armsmaster–Defiant");
- // Use
for separators
- // https://www.parahumans.net/2019/12/21/interlude-18-z-radiation/ has "super-separators" ("⊙ ⊙ ⊙ ⊙ ⊙") which we
- // leave untouched for now.
- xml = xml.replace(/■<\/p>/g, "
");
- xml = xml.replace(/■<\/p>/g, "
");
- xml = xml.replace(/⊙<\/p>/g, "
");
- xml = xml.replace(/⊙<\/strong><\/p>/g, "
");
- xml = xml.replace(/⊙<\/strong><\/em><\/p>/g, "
");
- xml = xml.replace(/⊙⊙<\/strong><\/p>/g, "
");
+ // Normalize scene breaks.
would be more semantically appropriate, but loses the author's intent. This is
+ // especially the case in Ward, which uses a variety of different scene breaks.
+ xml = xml.replace(/]*)>■<\/p>/g, `
■
`);
+
+ xml = xml.replace(/⊙<\/p>/g, `
⊙
`);
+ xml = xml.replace(/⊙<\/strong><\/p>/g, `⊙
`);
+ xml = xml.replace(/⊙<\/strong><\/em><\/p>/g,
+ `⊙
`);
+ xml = xml.replace(/⊙⊙<\/strong><\/p>/g,
+ `⊙
`);
+
+ xml = xml.replace(/⊙ *⊙ *⊙ *⊙ *⊙<\/strong><\/p>/g,
+ `⊙ ⊙ ⊙ ⊙ ⊙
`);
// Fix recurring miscapitalization with questions
xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 04dd1b7..e0f82cd 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -5,7 +5,7 @@
"after": "bugs: flies, ants"
},
{
- "before": "Brief note from the author: This story isn’t intended for young or sensitive readers. Readers who are on the lookout for trigger warnings are advised to give Worm a pass.
\n
\n",
+ "before": "Brief note from the author: This story isn’t intended for young or sensitive readers. Readers who are on the lookout for trigger warnings are advised to give Worm a pass.
\n■
\n",
"after": "",
"_comment": "The pseudo-trigger warning is out of place in an eBook."
}
@@ -4334,7 +4334,7 @@
],
"https://www.parahumans.net/2017/09/11/daybreak-1-1/": [
{
- "before": "Ward is the second work in the Parahumans series, and reading Worm first is strongly recommended. A lot of this won’t make sense otherwise and if you do find yourself a fan of the universe, the spoilers in Ward will affect the reading of the other work.
\nWard is not recommended for young or sensitive readers.
\n
",
+ "before": "Ward is the second work in the Parahumans series, and reading Worm first is strongly recommended. A lot of this won’t make sense otherwise and if you do find yourself a fan of the universe, the spoilers in Ward will affect the reading of the other work.
\nWard is not recommended for young or sensitive readers.
\n⊙
\n",
"after": "",
"_comment": "This is out of place in an eBook."
}
@@ -5013,7 +5013,7 @@
],
"https://www.parahumans.net/2018/06/05/torch-7-5/": [
{
- "before": "
\nI don’t",
+ "before": "⊙
\nI don’t",
"after": "I don’t"
},
{
From b85e8a68b157623aba902bd02471bebe8882aa55 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 17:29:01 -0500
Subject: [PATCH 037/186] Spot fixes for Ward through Polarize 10.x
---
lib/convert-worker.js | 2 +-
lib/substitutions.json | 431 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 429 insertions(+), 4 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index d0fc0c9..6ade3b7 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -347,7 +347,7 @@ function getBodyXML(chapter, book, contentEl) {
// "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". "patrol
// group" is always lowercased. It seems like "Patrol" is a proper name, and is used as a capitalized modifier in
// other contexts (e.g. Patrol leader). So let's standardize on "Patrol ".
- xml = xml.replace(/patrol (block|group|leader|guard|student|uniform|squad|soldier|officer|crew|girl)/ig,
+ xml = xml.replace(/patrol (block|group|leader|guard|student|uniform|squad|soldier|officer|crew|girl|bus)/ig,
(_, $1) => `Patrol ${$1.toLowerCase()}`);
// This always works in Ward and has a few false positives in Worm, where it is never needed:
if (book === "ward") {
diff --git a/lib/substitutions.json b/lib/substitutions.json
index e0f82cd..412659d 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4505,6 +4505,12 @@
"after": "much like the machines"
}
],
+ "https://www.parahumans.net/2018/01/30/shade-4-2/": [
+ {
+ "before": "one earth",
+ "after": "one Earth"
+ }
+ ],
"https://www.parahumans.net/2018/02/03/shade-4-3/": [
{
"before": "ex-birdcage",
@@ -4719,6 +4725,10 @@
{
"before": "or if he said something like ‘You’ or “Everyone’, I could",
"after": "or if he said something like ‘You’ or ‘Everyone’, I could"
+ },
+ {
+ "before": "rain. and I saw",
+ "after": "rain. And I saw"
}
],
"https://www.parahumans.net/2018/04/07/shadow-interlude-5-x/": [
@@ -5167,6 +5177,10 @@
{
"before": "Keith said. “You decide",
"after": "Keith said, “you decide"
+ },
+ {
+ "before": "The towels!",
+ "after": "The towels!"
}
],
"https://www.parahumans.net/2018/07/03/beacon-8-1/": [
@@ -5325,10 +5339,54 @@
"after": "dare you?” Hamza barked, again"
}
],
+ "https://www.parahumans.net/2018/08/14/beacon-interlude-8-x/": [
+ {
+ "before": "attention on Natalie, “this is",
+ "after": "attention on Natalie. “This is"
+ },
+ {
+ "before": "Natalie’s beetle",
+ "after": "Natalie’s Beetle"
+ },
+ {
+ "before": "I said!",
+ "after": "I said!"
+ },
+ {
+ "before": "adam’s apple",
+ "after": "Adam’s apple"
+ },
+ {
+ "before": "Time-time for Kenzie",
+ "after": "Time—time for Kenzie"
+ }
+ ],
"https://www.parahumans.net/2018/08/18/beacon-interlude-8-y/": [
{
"before": "A sufficient impact or distraction
",
"after": "A sufficient impact or distraction.
"
+ },
+ {
+ "before": "‘Hard Boil.’",
+ "after": "‘Hard Boil’."
+ },
+ {
+ "before": "tee-vee",
+ "after": "TV"
+ },
+ {
+ "before": "teacher’s power",
+ "after": "Teacher’s power"
+ }
+ ],
+ "https://www.parahumans.net/2018/08/21/gleaming-9-1/": [
+ {
+ "before": "Probably teacher",
+ "after": "Probably Teacher"
+ },
+ {
+ "before": "b-list",
+ "after": "B-list"
}
],
"https://www.parahumans.net/2018/08/28/gleaming-9-3/": [
@@ -5339,12 +5397,54 @@
{
"before": "with a strong Master",
"after": "with a strong master"
+ },
+ {
+ "before": "—But I can’t make concessions",
+ "after": "—but I can’t make concessions"
+ },
+ {
+ "before": "too impatient and angry It was worse because",
+ "after": "too impatient and angry. It was worse because"
+ },
+ {
+ "before": "what it’s worth,” Tristan said. “Thanks for",
+ "after": "what it’s worth,” Tristan said, “thanks for"
+ },
+ {
+ "before": "Apparently… perk of..",
+ "after": "Apparently… perk of…"
+ }
+ ],
+ "https://www.parahumans.net/2018/09/01/gleaming-9-4/": [
+ {
+ "before": "Power related",
+ "after": "Power-related"
+ }
+ ],
+ "https://www.parahumans.net/2018/09/04/gleaming-9-5/": [
+ {
+ "before": "still a voice without confidence",
+ "after": "Still a voice without confidence"
+ },
+ {
+ "before": "The only thing that hold him back",
+ "after": "The only things that hold him back"
+ }
+ ],
+ "https://www.parahumans.net/2018/09/08/gleaming-9-6/": [
+ {
+ "before": "The patrol had done its work",
+ "after": "The Patrol had done its work"
}
],
"https://www.parahumans.net/2018/09/11/gleaming-9-7/": [
{
"before": "Byron said. he was at",
"after": "Byron said. He was at"
+ },
+ {
+ "before": "then teacher can’t",
+ "after": "then Teacher can’t"
}
],
"https://www.parahumans.net/2018/09/13/gleaming-interlude-9-x/": [
@@ -5355,6 +5455,120 @@
{
"before": "that made her her- the shock of white",
"after": "that made her her—the shock of white"
+ },
+ {
+ "regExp": "their Papa",
+ "replacement": "their papa"
+ },
+ {
+ "before": "Mama and papa talk",
+ "after": "Mama and Papa talk"
+ },
+ {
+ "before": "injuries,” papa said",
+ "after": "injuries,” Papa said"
+ },
+ {
+ "before": "papa’s face",
+ "after": "Papa’s face"
+ },
+ {
+ "before": "Selfless?” papa asked",
+ "after": "Selfless?” Papa asked"
+ },
+ {
+ "before": "kind of selfie, papa",
+ "after": "kind of selfie, Papa"
+ },
+ {
+ "before": "part of that,” papa said",
+ "after": "part of that,” Papa said"
+ },
+ {
+ "before": "you,” papa said",
+ "after": "you,” Papa said"
+ },
+ {
+ "before": "too, papa,” Tristan said",
+ "after": "too, Papa,” Tristan said"
+ },
+ {
+ "before": "worked in rigid. the armor’s",
+ "after": "worked in rigid. The armor’s"
+ },
+ {
+ "before": "moving and across to",
+ "after": "moving across to"
+ },
+ {
+ "before": "fucking sense!”",
+ "after": "fucking sense
!”"
+ },
+ {
+ "before": "that hurt! Stop!",
+ "after": "that hurt! Stop!"
+ },
+ {
+ "before": "it was one thing!",
+ "after": "It was one thing!"
+ },
+ {
+ "before": "as an anything!",
+ "after": "as an anything!"
+ },
+ {
+ "before": "have backed you up!",
+ "after": "have backed you up!"
+ },
+ {
+ "before": "mr. Vaughn",
+ "after": "Mr. Vaughn"
+ }
+ ],
+ "https://www.parahumans.net/2018/09/15/gleaming-9-8/": [
+ {
+ "before": "
For those who missed it, there was a Thursday update. See the prior chapter.
\n⊙
\n",
+ "after": ""
+ },
+ {
+ "before": "Capricorn blue",
+ "after": "Capricorn Blue"
+ },
+ {
+ "before": "everything ended, Several of my",
+ "after": "everything ended, several of my"
+ },
+ {
+ "before": "you can! You",
+ "after": "you can! You"
+ }
+ ],
+ "https://www.parahumans.net/2018/09/18/gleaming-9-9/": [
+ {
+ "before": "teacher thrall",
+ "after": "Teacher thrall"
+ },
+ {
+ "before": "need to,” I said, because I’m",
+ "after": "need to,” I said, “because I’m"
+ },
+ {
+ "before": "keeping the Warden and his deputy",
+ "after": "keeping the warden and his deputy"
+ }
+ ],
+ "https://www.parahumans.net/2018/09/22/gleaming-interlude-9-y/": [
+ {
+ "before": "you up where you’re
weak? You’re",
+ "after": "you up where you’re weak? You’re"
+ },
+ {
+ "before": "for you.",
+ "after": "for you."
+ },
+ {
+ "before": "Why ‘inflict‘",
+ "after": "Why ‘inflict’"
}
],
"https://www.parahumans.net/2018/09/25/gleaming-9-10/": [
@@ -5365,54 +5579,195 @@
{
"before": "at the balcony. both wore",
"after": "at the balcony. Both wore"
+ },
+ {
+ "before": "—But Rain and Sveta",
+ "after": "—but Rain and Sveta"
+ },
+ {
+ "before": "Cyrstalclear",
+ "after": "Crystalclear"
+ },
+ {
+ "before": "He stopped here he was",
+ "after": "He stopped where he was"
+ },
+ {
+ "regExp": "Warden",
+ "replacement": "warden",
+ "_comment": "This is discussing prison wardens, which is a normal job title"
}
],
"https://www.parahumans.net/2018/09/29/gleaming-9-11/": [
{
"before": "two people died. the forcefield went",
"after": "two people died. The forcefield went"
+ },
+ {
+ "before": "Maybe wrong to think that within",
+ "after": "Maybe wrong to think that, within"
+ },
+ {
+ "regExp": "Crock o Shit",
+ "after": "Crock o’ Shit"
+ }
+ ],
+ "https://www.parahumans.net/2018/10/02/gleaming-9-12/": [
+ {
+ "before": "teacher hit squad",
+ "after": "Teacher hit squad"
+ },
+ {
+ "before": "—You gotta give something",
+ "after": "—you gotta give something"
}
],
"https://www.parahumans.net/2018/10/09/gleaming-9-14/": [
{
"before": "in a fireman carry. with my free hand",
"after": "in a fireman carry. With my free hand"
+ },
+ {
+ "before": "fucking teacher wouldn’t",
+ "after": "fucking Teacher wouldn’t"
}
],
"https://www.parahumans.net/2018/10/13/gleaming-9-15/": [
{
"before": "-fucked in the head.",
"after": "—fucked in the head."
+ },
+ {
+ "before": "some of the creases. and from the",
+ "after": "some of the creases, and from the"
+ },
+ {
+ "before": "Disappointed?",
+ "after": "Disappointed?"
}
],
"https://www.parahumans.net/2018/10/16/gleaming-interlude-9-z/": [
{
"before": "cliff he was was expected",
"after": "cliff he was expected"
+ },
+ {
+ "before": "told the Warden differed",
+ "after": "told the warden differed"
+ },
+ {
+ "before": "assistant Warden",
+ "after": "assistant warden"
+ },
+ {
+ "before": "behind those things was By,",
+ "after": "behind those things was By,"
+ },
+ {
+ "before": "What got into you?",
+ "after": "What got into you?"
+ },
+ {
+ "before": "what happens to anyone with powers is near anyone",
+ "after": "what happens when anyone with powers is near anyone"
}
],
- "https://www.parahumans.net/2018/12/11/interlude-10-y/": [
+ "https://www.parahumans.net/2018/10/20/gleaming-interlude-9/": [
{
- "before": "shape of the the moment",
- "after": "shape of the moment"
+ "before": "Guarderò",
+ "after": "Guarderò."
+ },
+ {
+ "before": "one earth’s worth",
+ "after": "one Earth’s worth"
+ },
+ {
+ "before": "the adjacent earth",
+ "after": "the adjacent Earth"
+ },
+ {
+ "before": "closest earth",
+ "after": "closest Earth"
+ },
+ {
+ "before": "of them, “Would be",
+ "after": "of them, “would be"
+ },
+ {
+ "before": "finds it’s root",
+ "after": "finds its root"
+ },
+ {
+ "before": "they’re Valkyrie, Crystal",
+ "after": "they’re Valkyrie, Crystal"
}
],
"https://www.parahumans.net/2018/10/23/polarize-10-1/": [
{
"before": "You let mom talk and",
"after": "You let Mom talk and"
+ },
+ {
+ "before": "post-Prison",
+ "after": "post-prison"
+ },
+ {
+ "before": "to take power!’ ”",
+ "after": "to take power!’”"
+ },
+ {
+ "before": "I’m sorry, what? What?",
+ "after": "I’m sorry, what? What?"
}
],
"https://www.parahumans.net/2018/10/27/polarize-10-2/": [
{
"before": "Sveta said, leaning on me
",
"after": "Sveta said, leaning on me.
"
+ },
+ {
+ "before": "and that part of me is really disappointed",
+ "after": "And that part of me is really disappointed"
+ }
+ ],
+ "https://www.parahumans.net/2018/10/30/polarize-10-3/": [
+ {
+ "before": "betray-y",
+ "after": "betray-ey"
}
],
"https://www.parahumans.net/2018/11/03/polarize-10-4/": [
{
"before": "services of mercenaries. depending on timing",
"after": "services of mercenaries. Depending on timing"
+ },
+ {
+ "before": "we should get going",
+ "after": "We should get going"
+ },
+ {
+ "before": "clean her stuff.. She’ll be out soon",
+ "after": "clean her stuff. She’ll be out soon"
+ },
+ {
+ "before": "finger—and thumb-rings",
+ "after": "finger- and thumb-rings"
+ }
+ ],
+ "https://www.parahumans.net/2018/11/06/polarize-10-5/": [
+ {
+ "before": "because It’s pertinent to",
+ "after": "because it’s pertinent to"
+ },
+ {
+ "before": "bangs said, “They won’t give",
+ "after": "bangs said, “they won’t give"
+ }
+ ],
+ "https://www.parahumans.net/2018/11/10/polarize-10-6/": [
+ {
+ "before": "Caveat",
+ "after": "caveat"
}
],
"https://www.parahumans.net/2018/11/13/polarize-10-7/": [
@@ -5427,24 +5782,84 @@
{
"before": "-your pancreas.",
"after": "—your pancreas."
+ },
+ {
+ "before": "a hair of headphones",
+ "after": "a pair of headphones"
+ },
+ {
+ "before": "Shark girl",
+ "after": "shark girl"
+ },
+ {
+ "before": "Be safe—Nat.",
+ "after": "Be safe —Nat."
+ },
+ {
+ "before": "kill each other.”",
+ "after": "kill each other.”"
}
],
"https://www.parahumans.net/2018/11/17/polarize-10-8/": [
{
"before": "Sveta said. her hand was removed",
"after": "Sveta said. Her hand was removed"
+ },
+ {
+ "before": "There were a bag fast food",
+ "after": "There was a bag of fast food"
+ },
+ {
+ "before": "anti parahuman",
+ "after": "anti-parahuman"
}
],
"https://www.parahumans.net/2018/11/20/polarize-10-9/": [
{
"before": "confines of the suit. and rearranged herself",
"after": "confines of the suit, and rearranged herself"
+ },
+ {
+ "before": "‘piece",
+ "after": "’piece"
+ },
+ {
+ "before": "Fuck y—of course",
+ "after": "Fuck y— Of course"
+ },
+ {
+ "before": "they had Fallen behind",
+ "after": "they had fallen behind"
+ },
+ {
+ "before": "Fucking heroes!",
+ "after": "Fucking heroes!"
+ },
+ {
+ "before": "the unfucked!",
+ "after": "the unfucked!"
}
],
"https://www.parahumans.net/2018/11/24/interlude-10-x/": [
{
"before": "that would be in time Hopefully",
"after": "that would be in time. Hopefully."
+ },
+ {
+ "before": "consisting of two—and three-person",
+ "after": "consisting of two- and three-person"
+ },
+ {
+ "before": "Garotte, Hand at one",
+ "after": "Garotte, hand at one"
+ },
+ {
+ "before": "with that, The unfortunately named",
+ "after": "with that, the unfortunately-named"
+ },
+ {
+ "before": "not aware of the lipstick am",
+ "after": "not aware of the lipstick"
}
],
"https://www.parahumans.net/2018/11/27/polarize-10-10/": [
@@ -5459,6 +5874,12 @@
"after": "See Dad, sleep."
}
],
+ "https://www.parahumans.net/2018/12/11/interlude-10-y/": [
+ {
+ "before": "shape of the the moment",
+ "after": "shape of the moment"
+ }
+ ],
"https://www.parahumans.net/2019/01/12/blinding-11-7/": [
{
"before": "threw the the man",
@@ -5553,6 +5974,10 @@
{
"before": "I need dad",
"after": "I need Dad"
+ },
+ {
+ "before": "Oh fuck!",
+ "after": "Oh fuck!"
}
],
"https://www.parahumans.net/2019/02/23/heavens-12-3/": [
From e364cb2b221d6054fed0fae82a61fdaefb9ad5f8 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 17:34:22 -0500
Subject: [PATCH 038/186] Normalize a few instances of "T-shirt" to "t-shirt"
The latter is overwhelmingly more common.
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 6ade3b7..05292c8 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -423,6 +423,9 @@ function getBodyXML(chapter, book, contentEl) {
// Clich(e|é) is spelled both ways. Let's standardize on including the accent.
xml = xml.replace(/cliche/g, "cliché");
+ // T-shirt is usually spelled lowercase ("t-shirt"). Normalize the remaining instances.
+ xml = xml.replace(/T-shirt/g, "t-shirt");
+
// "gray" is the majority spelling, except for "greyhound"
xml = xml.replace(/(G|g)rey(?!hound)/g, "$1ray");
From 6cdf486858666ea9e60d1480b7226aa447a71fd6 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 17:39:13 -0500
Subject: [PATCH 039/186] Fix missing spaces after commas
---
lib/convert-worker.js | 3 +++
lib/substitutions.json | 4 ----
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 05292c8..ec909a8 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -262,6 +262,9 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/I-I/g, "I—I");
xml = xml.replace(/I-uh/g, "I—uh");
+ // Fix missing spaces after commas
+ xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/g, "$1, $2");
+
// Joint names should use em dashes
xml = xml.replace(/Dallon-Pelham/g, "Dallon–Pelham");
xml = xml.replace(/Bet-Gimel/g, "Bet–Gimel");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 412659d..9d1d362 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2754,10 +2754,6 @@
"before": "fighting. But.",
"after": "fighting. But…"
},
- {
- "before": "r,s",
- "after": "r, s"
- },
{
"before": "—But I have",
"after": "—but I have"
From 56a38609ca12c190ee4afcd314caf720b431fdb8 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 17:46:23 -0500
Subject: [PATCH 040/186] Fix a variety of misplaced or extraneous quotes
---
lib/convert-worker.js | 1 +
lib/substitutions.json | 42 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index ec909a8..a3a8000 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -194,6 +194,7 @@ function getBodyXML(chapter, book, contentEl) {
fixEms();
fixQuotesAndApostrophes();
fixEms();
+ xml = xml.replace(/I”m/g, "I’m");
// Similar problems occur in Ward with and as do in Worm with s
xml = xml.replace(//g, "");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 9d1d362..b0506d4 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2350,6 +2350,18 @@
{
"before": "to,” Alexandria spoke. “Is",
"after": "to,” Alexandria spoke, “is"
+ },
+ {
+ "before": "leave?” Hero asked. ”Why",
+ "after": "leave?” Hero asked. “Why"
+ },
+ {
+ "before": "eye contact. ”She",
+ "after": "eye contact. “She"
+ },
+ {
+ "before": "Alexandria said. ”Why",
+ "after": "Alexandria said. “Why"
}
],
"https://parahumans.wordpress.com/2012/11/10/colony-15-8/": [
@@ -2857,6 +2869,12 @@
"after": "the phone."
}
],
+ "https://parahumans.wordpress.com/2013/02/16/interlude-18/": [
+ {
+ "before": "her feet. ”We’re",
+ "after": "her feet. “We’re"
+ }
+ ],
"https://parahumans.wordpress.com/2013/02/19/scourge-19-1/": [
{
"before": "Rachel,” Tattletale said. “Come",
@@ -4459,6 +4477,10 @@
{
"before": "my flight toward toward Tristan",
"after": "my flight toward Tristan"
+ },
+ {
+ "before": "“Spooky,” Sveta said.”I expected",
+ "after": "“Spooky,” Sveta said. “I expected"
}
],
"https://www.parahumans.net/2018/01/16/glare-3-4/": [
@@ -4703,6 +4725,10 @@
{
"before": "The woman—I turned to look and saw",
"after": "The woman— I turned to look and saw"
+ },
+ {
+ "before": "Tristan asked.”",
+ "after": "Tristan asked."
}
],
"https://www.parahumans.net/2018/04/03/shadow-5-12/": [
@@ -5618,6 +5644,12 @@
"after": "—you gotta give something"
}
],
+ "https://www.parahumans.net/2018/10/06/gleaming-9-13/": [
+ {
+ "before": "second.”Clarify.”",
+ "after": "second. “Clarify.”"
+ }
+ ],
"https://www.parahumans.net/2018/10/09/gleaming-9-14/": [
{
"before": "in a fireman carry. with my free hand",
@@ -5900,6 +5932,12 @@
"after": "Victoria-flesh"
}
],
+ "https://www.parahumans.net/2019/01/26/blinding-11-9/": [
+ {
+ "before": "I told the mercenary.”",
+ "after": "I told the mercenary."
+ }
+ ],
"https://www.parahumans.net/2019/01/19/interlude-11-b/": [
{
"before": "loading up trucks with basic supplies",
@@ -6276,6 +6314,10 @@
{
"before": "small face on on a fifteen inch",
"after": "small face on a fifteen inch"
+ },
+ {
+ "before": "for the recording.”",
+ "after": "for the recording."
}
],
"https://www.parahumans.net/2019/08/24/from-within-16-4/": [
From 0053545444bd68262262302e2383118ee05d252e Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 17:58:33 -0500
Subject: [PATCH 041/186] Correctly hyphenate "X-year-old"
---
lib/convert-worker.js | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index a3a8000..9959711 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -263,6 +263,11 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/I-I/g, "I—I");
xml = xml.replace(/I-uh/g, "I—uh");
+ // "X-year-old" should use hyphens; all grammar guides agree. The books are very inconsistent but most often omit
+ // them.
+ xml = xml.replace(/(\w+)[ -]year[ -]old(s?)(?!\w)/g, "$1-year-old$2");
+ xml = xml.replace(/(\w+) or (\w+)-year-old/g, "$1- or $2-year-old");
+
// Fix missing spaces after commas
xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/g, "$1, $2");
From 481e9cc3a881257cd78d046bd99d6d870571fea9 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 18:03:58 -0500
Subject: [PATCH 042/186] Capitalize "the Pharmacist" from Ward Gleaming 9.10
onward
In Gleaming 9.10, it starts being capitalized consistently, presumably reflecting the shift from thinking of "the pharmacist" as a job title, to thinking of "the Pharmacist" as a cape name. So, for subsequent chapters, let's be sure to retain that.
---
lib/substitutions.json | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index b0506d4..dcfbf93 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -5632,6 +5632,11 @@
{
"regExp": "Crock o Shit",
"after": "Crock o’ Shit"
+ },
+ {
+ "regExp": "the pharmacist",
+ "replacement": "the Pharmacist",
+ "_comment": "in the previous chapter, 'the Pharmacist' starts getting capitalized consistently, as if it's a name instead of a profession. Carry it forward."
}
],
"https://www.parahumans.net/2018/10/02/gleaming-9-12/": [
@@ -5642,6 +5647,11 @@
{
"before": "—You gotta give something",
"after": "—you gotta give something"
+ },
+ {
+ "before": "the pharmacist",
+ "after": "the Pharmacist",
+ "_comment": "See comment in https://www.parahumans.net/2018/09/29/gleaming-9-11/"
}
],
"https://www.parahumans.net/2018/10/06/gleaming-9-13/": [
@@ -5908,6 +5918,13 @@
"after": "shape of the moment"
}
],
+ "https://www.parahumans.net/2018/12/25/blinding-11-3/": [
+ {
+ "before": "the pharmacist",
+ "after": "the Pharmacist",
+ "_comment": "See comment in https://www.parahumans.net/2018/09/29/gleaming-9-11/"
+ }
+ ],
"https://www.parahumans.net/2019/01/12/blinding-11-7/": [
{
"before": "threw the the man",
From 7d5167c952888615d6f1f80615f0f347a561abab Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 18:09:19 -0500
Subject: [PATCH 043/186] 4.5.0
---
npm-shrinkwrap.json | 2 +-
package.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json
index 1c2a77a..aaea9a7 100644
--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
@@ -1,6 +1,6 @@
{
"name": "worm-scraper",
- "version": "4.4.0",
+ "version": "4.5.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
diff --git a/package.json b/package.json
index 6466068..81d34a5 100644
--- a/package.json
+++ b/package.json
@@ -8,7 +8,7 @@
"parahuman",
"scraper"
],
- "version": "4.4.0",
+ "version": "4.5.0",
"author": "Domenic Denicola (https://domenic.me/)",
"license": "WTFPL",
"repository": "domenic/worm-scraper",
From 58b0856deb141055e15e452de6fd5bce9d659925 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 18:36:29 -0500
Subject: [PATCH 044/186] Stop failing the convert step due to busy filesystems
This often happens with virus scanners, etc.
---
lib/worm-scraper.js | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/worm-scraper.js b/lib/worm-scraper.js
index 556a0df..52f3fd0 100644
--- a/lib/worm-scraper.js
+++ b/lib/worm-scraper.js
@@ -84,7 +84,7 @@ if (argv._.includes("download")) {
if (argv._.includes("convert")) {
commands.push(() => {
- return fs.rmdir(chaptersPath, { recursive: true })
+ return fs.rmdir(chaptersPath, { recursive: true, maxRetries: 3 })
.then(() => fs.mkdir(chaptersPath, { recursive: true }))
.then(() => convert(cachePath, manifestPath, chaptersPath, argv.book, argv.jobs));
});
From f70ba646297c12fdb53f13b3a6502bb44ddbc61a Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 18:46:56 -0500
Subject: [PATCH 045/186] Measure and output the time conversion takes
---
lib/convert.js | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/lib/convert.js b/lib/convert.js
index c4ae90d..0f35b62 100644
--- a/lib/convert.js
+++ b/lib/convert.js
@@ -1,6 +1,7 @@
"use strict";
const path = require("path");
const fs = require("fs").promises;
+const { performance } = require("perf_hooks");
const workerpool = require("workerpool");
const cliProgress = require("cli-progress");
@@ -11,9 +12,12 @@ module.exports = async (cachePath, manifestPath, contentPath, book, concurrentJo
console.log("Converting raw downloaded HTML to EPUB chapters");
const progress = new cliProgress.SingleBar({
stopOnComplete: true,
- clearOnComplete: true
+ clearOnComplete: true,
+ format: " {bar} {percentage}% | {time} | {value}/{total}"
}, cliProgress.Presets.shades_classic);
- progress.start(chapters.length, 0);
+
+ const start = performance.now();
+ progress.start(chapters.length, 0, { time: " " });
const poolOptions = {};
if (concurrentJobs !== undefined) {
@@ -30,7 +34,8 @@ module.exports = async (cachePath, manifestPath, contentPath, book, concurrentJo
warnings.push(...await pool.exec("convertChapter", [chapter, book, inputPath, outputPath]));
- progress.increment();
+ const time = String(Math.round((performance.now() - start) / 1000)).padStart(3) + " s";
+ progress.increment({ time });
}));
pool.terminate();
@@ -39,5 +44,5 @@ module.exports = async (cachePath, manifestPath, contentPath, book, concurrentJo
console.warn(warning);
}
- console.log("All chapters converted");
+ console.log(`All chapters converted in ${Math.round((performance.now() - start) / 100) / 10} seconds`);
};
From 33d88eb52307c530095852499dd844bd2aa18319 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 18:37:35 -0500
Subject: [PATCH 046/186] Refactor convert-worker.js a bit
This introduces a few fixes around italics.
---
lib/convert-worker.js | 353 +++++++++++++++++++++++------------------
lib/substitutions.json | 20 ++-
2 files changed, 214 insertions(+), 159 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 9959711..f69df99 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -135,6 +135,10 @@ function getBodyXML(chapter, book, contentEl) {
// Fix recurring strange pattern of extra
in ......
\n
xml = xml.replace(/
\s*<\/em><\/p>/g, "");
+ // Replace single-word s with s. Other s are probably erroneous too, but these are known-bad.
+ xml = xml.replace(/([^ ]+)<\/i>/g, "$1");
+ xml = xml.replace(/([^ ]+)( +)<\/i>/g, "$1$2");
+
// There are way too many nonbreaking spaces where they don't belong.
// If they show up three in a row, then let them live. Otherwise, they die.
// Also remove any run of them after a period.
@@ -192,8 +196,6 @@ function getBodyXML(chapter, book, contentEl) {
fixEms();
fixQuotesAndApostrophes();
fixEms();
- fixQuotesAndApostrophes();
- fixEms();
xml = xml.replace(/I”m/g, "I’m");
// Similar problems occur in Ward with and as do in Worm with s
@@ -206,9 +208,155 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/(\s*)<\/strong>/g, "$1");
// No need for line breaks before paragraph ends
- // These often occur with the
s inside / fixed above.
+ // These often occur with the
s inside /// fixed above.
xml = xml.replace(/
\s*<\/p>/g, "");
+ // Fix missing spaces after commas
+ xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/g, "$1, $2");
+
+ // Fix bad periods and spacing/markup surrounding them
+ xml = xml.replace(/\.\.<\/p>/g, ".");
+ xml = xml.replace(/\.\.”<\/p>/g, ".”");
+ xml = xml.replace(/ \. /g, ". ");
+ xml = xml.replace(/ \.<\/p>/g, ".");
+ xml = xml.replace(/\.\.\./g, "…");
+
+ // Fix extra spaces
+ xml = xml.replace(/ ? <\/p>/g, "");
+ xml = xml.replace(/([a-z]) ,/g, "$1,");
+
+ xml = fixDialogueTags(xml);
+ xml = fixForeignNames(xml);
+ xml = fixEmDashes(xml);
+ xml = enDashJointNames(xml);
+ xml = fixPossessives(xml);
+ xml = cleanSceneBreaks(xml);
+ xml = fixCapitalization(xml, book);
+ xml = fixMispellings(xml);
+ xml = fixHyphens(xml);
+ xml = standardizeSpellings(xml);
+
+ // One-off fixes
+ for (const substitution of substitutions[chapter.url] || []) {
+ if (substitution.before) {
+ const indexOf = xml.indexOf(substitution.before);
+ if (indexOf === -1) {
+ warnings.push(`Could not find text "${substitution.before}" in ${chapter.url}. The chapter may have been ` +
+ `updated at the source, in which case, you should edit substitutions.json.`);
+ }
+ if (indexOf !== xml.lastIndexOf(substitution.before)) {
+ warnings.push(`The text "${substitution.before}" occurred twice, and so the substitution was ambiguous. ` +
+ `Update substitutions.json for a more precise substitution.`);
+ }
+
+ xml = xml.replace(new RegExp(escapeRegExp(substitution.before)), substitution.after);
+ } else if (substitution.regExp) {
+ xml = xml.replace(new RegExp(substitution.regExp, "g"), substitution.replacement);
+ } else {
+ warnings.push(`Invalid substitution specified for ${chapter.url}`);
+ }
+ }
+
+ // Serializer inserts extra xmlns for us since it doesn't know we're going to put this into a .
+ // Use this opportunity to insert a comment pointing to the original URL, for reference.
+ xml = xml.replace(
+ //,
+ `\n\n`);
+
+ return { xml, warnings };
+}
+
+function fixDialogueTags(xml) {
+ // Fix recurring miscapitalization with questions
+ xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");
+ xml = xml.replace(/\?”\s\s?He asked/g, "?” he asked");
+
+ // The author often fails to terminate a sentence, instead using a comma after a dialogue tag. For example,
+ // > “I didn’t get much done,” Greg said, “I got distracted by...
+ // This should instead be
+ // > “I didn’t get much done,” Greg said. “I got distracted by...
+ //
+ // Our heuristic is to try to automatically fix this if the dialogue tag is two words (X said/admitted/sighed/etc.).
+ //
+ // This sometimes overcorrects, as in the following example:
+ // > “Basically,” Alec said, “For your powers to manifest, ...
+ // Here instead we should lowercase the "f". We handle that via one-offs in substitutions.json.
+ //
+ // This applies to ~800 instances, so although we have to correct back in substitutions.json a decent number of
+ // times, it definitely pays for itself. Most of the instances we have to correct back we also need to fix the
+ // capitalization anyway, and that's harder to do automatically, since proper names/"I"/etc. stay capitalized.
+ xml = xml.replace(/,” ([A-Za-z]+ [A-Za-z]+), “([A-Z])/g, ",” $1. “$2");
+
+ return xml;
+}
+
+function fixForeignNames(xml) {
+ // This is consistently missing diacritics
+ xml = xml.replace(/Yangban/g, "Yàngbǎn");
+
+ // These are usually not italicized, but sometimes are. Other foreign-language names (like Yàngbǎn) are not
+ // italicized, so we go in the direction of removing the italics.
+ xml = xml.replace(/Garama<\/em>/g, "Garama");
+ xml = xml.replace(/Thanda<\/em>/g, "Thanda");
+ xml = xml.replace(/Sifara([^<]*)<\/em>/g, "Sifara$1");
+ xml = xml.replace(/Moord Nag([^<]*)<\/em>/g, "Moord Nag$1");
+ xml = xml.replace(/Califa de Perro([^<]*)<\/em>/g, "Califa de Perro$1");
+ xml = xml.replace(/Turanta([^<]*)<\/em>/g, "Turanta$1");
+
+ return xml;
+}
+
+function fixEmDashes(xml) {
+ xml = xml.replace(/ – /g, "—");
+ xml = xml.replace(/“((?:)?)-/g, "“$1—");
+ xml = xml.replace(/-[,.]?”/g, "—”");
+ xml = xml.replace(/-(!|\?)”/g, "—$1”");
+ xml = xml.replace(/-[,.]?<\/em>”/g, "—”");
+ xml = xml.replace(/-“/g, "—”");
+ xml = xml.replace(/-/g, "
—");
+ xml = xml.replace(/-<\/p>/g, "—
");
+ xml = xml.replace(/-<\/em><\/p>/g, "—");
+ xml = xml.replace(/\s?\s?–\s?\s?/g, "—");
+ xml = xml.replace(/-\s\s?/g, "—");
+ xml = xml.replace(/\s?\s-/g, "—");
+ xml = xml.replace(/\s+—”/g, "—”");
+ xml = xml.replace(/I-I/g, "I—I");
+ xml = xml.replace(/I-uh/g, "I—uh");
+
+ return xml;
+}
+
+function enDashJointNames(xml) {
+ // Joint names should use en dashes
+ xml = xml.replace(/Dallon-Pelham/g, "Dallon–Pelham");
+ xml = xml.replace(/Bet-Gimel/g, "Bet–Gimel");
+ xml = xml.replace(/Tristan-Capricorn/g, "Tristan–Capricorn");
+ xml = xml.replace(/Capricorn-Byron/g, "Capricorn–Byron");
+ xml = xml.replace(/Tristan-Byron/g, "Tristan–Byron");
+ xml = xml.replace(/Gimel-Europe/g, "Gimel–Europe");
+ xml = xml.replace(/G-N/g, "G–N");
+ xml = xml.replace(/Imp-Damsel/g, "Imp–Damsel");
+ xml = xml.replace(/Damsel-Ashley/g, "Damsel–Ashley");
+ xml = xml.replace(/Antares-Anelace/g, "Antares–Anelace");
+ xml = xml.replace(/Challenger-Gallant/g, "Challenger–Gallant");
+ xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/g, "Undersider$1–$2");
+ xml = xml.replace(/Norwalk-Fairfield/g, "Norwalk–Fairfield");
+ xml = xml.replace(/East-West/g, "east–west");
+ xml = xml.replace(/(Green|Yellow)-Black/g, "$1–Black");
+ xml = xml.replace(/Creutzfeldt-Jakob/g, "Creutzfeldt–Jakob");
+ xml = xml.replace(/Astaroth-Nidhug/g, "Astaroth–Nidhug");
+ xml = xml.replace(/Capulet-Montague/g, "Capulet–Montague");
+ xml = xml.replace(/Weaver-Clockblocker/g, "Weaver–Clockblocker");
+ xml = xml.replace(/Alexandria-Pretender/g, "Alexandria–Pretender");
+ xml = xml.replace(/Night Hag-Nyx/g, "Night Hag–Nyx");
+ xml = xml.replace(/Crawler-Breed/g, "Crawler–Breed");
+ xml = xml.replace(/Simurgh-Myrddin-plant/g, "Simurgh–Myrddin–plant");
+ xml = xml.replace(/Armsmaster-Defiant/g, "Armsmaster–Defiant");
+
+ return xml;
+}
+
+function fixPossessives(xml) {
// Fix possessive of names ending in "s"
// Note: if the "s" is unvoiced, as in Marquis, then it doesn't get the second "s".
xml = xml.replace(/([^‘])Judas’([^s])/g, "$1Judas’s$2");
@@ -246,62 +394,15 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/([^‘])Ms. Stillons’([^s])/g, "$1Ms. Stillons’s$2");
xml = xml.replace(/([^‘])Chuckles’([^s])/g, "$1Chuckles’s$2");
- // Fixes dashes
- xml = xml.replace(/ – /g, "—");
- xml = xml.replace(/“((?:)?)-/g, "“$1—");
- xml = xml.replace(/-[,.]?”/g, "—”");
- xml = xml.replace(/-(!|\?)”/g, "—$1”");
- xml = xml.replace(/-[,.]?<\/em>”/g, "—”");
- xml = xml.replace(/-“/g, "—”");
- xml = xml.replace(/-/g, "
—");
- xml = xml.replace(/-<\/p>/g, "—
");
- xml = xml.replace(/-<\/em><\/p>/g, "—");
- xml = xml.replace(/\s?\s?–\s?\s?/g, "—");
- xml = xml.replace(/-\s\s?/g, "—");
- xml = xml.replace(/\s?\s-/g, "—");
- xml = xml.replace(/\s+—”/g, "—”");
- xml = xml.replace(/I-I/g, "I—I");
- xml = xml.replace(/I-uh/g, "I—uh");
-
- // "X-year-old" should use hyphens; all grammar guides agree. The books are very inconsistent but most often omit
- // them.
- xml = xml.replace(/(\w+)[ -]year[ -]old(s?)(?!\w)/g, "$1-year-old$2");
- xml = xml.replace(/(\w+) or (\w+)-year-old/g, "$1- or $2-year-old");
-
- // Fix missing spaces after commas
- xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/g, "$1, $2");
-
- // Joint names should use em dashes
- xml = xml.replace(/Dallon-Pelham/g, "Dallon–Pelham");
- xml = xml.replace(/Bet-Gimel/g, "Bet–Gimel");
- xml = xml.replace(/Tristan-Capricorn/g, "Tristan–Capricorn");
- xml = xml.replace(/Capricorn-Byron/g, "Capricorn–Byron");
- xml = xml.replace(/Tristan-Byron/g, "Tristan–Byron");
- xml = xml.replace(/Gimel-Europe/g, "Gimel–Europe");
- xml = xml.replace(/G-N/g, "G–N");
- xml = xml.replace(/Imp-Damsel/g, "Imp–Damsel");
- xml = xml.replace(/Damsel-Ashley/g, "Damsel–Ashley");
- xml = xml.replace(/Antares-Anelace/g, "Antares–Anelace");
- xml = xml.replace(/Challenger-Gallant/g, "Challenger–Gallant");
- xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/g, "Undersider$1–$2");
- xml = xml.replace(/Norwalk-Fairfield/g, "Norwalk–Fairfield");
- xml = xml.replace(/East-West/g, "east–west");
- xml = xml.replace(/(Green|Yellow)-Black/g, "$1–Black");
- xml = xml.replace(/Creutzfeldt-Jakob/g, "Creutzfeldt–Jakob");
- xml = xml.replace(/Astaroth-Nidhug/g, "Astaroth–Nidhug");
- xml = xml.replace(/Capulet-Montague/g, "Capulet–Montague");
- xml = xml.replace(/Weaver-Clockblocker/g, "Weaver–Clockblocker");
- xml = xml.replace(/Alexandria-Pretender/g, "Alexandria–Pretender");
- xml = xml.replace(/Night Hag-Nyx/g, "Night Hag–Nyx");
- xml = xml.replace(/Crawler-Breed/g, "Crawler–Breed");
- xml = xml.replace(/Simurgh-Myrddin-plant/g, "Simurgh–Myrddin–plant");
- xml = xml.replace(/Armsmaster-Defiant/g, "Armsmaster–Defiant");
+ return xml;
+}
+function cleanSceneBreaks(xml) {
// Normalize scene breaks.
would be more semantically appropriate, but loses the author's intent. This is
// especially the case in Ward, which uses a variety of different scene breaks.
+
xml = xml.replace(/]*)>■<\/p>/g, `
■
`);
- xml = xml.replace(/⊙<\/p>/g, `
⊙
`);
xml = xml.replace(/⊙<\/strong><\/p>/g, `⊙
`);
xml = xml.replace(/⊙<\/strong><\/em><\/p>/g,
`⊙
`);
@@ -311,40 +412,10 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/⊙ *⊙ *⊙ *⊙ *⊙<\/strong><\/p>/g,
`⊙ ⊙ ⊙ ⊙ ⊙
`);
- // Fix recurring miscapitalization with questions
- xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");
- xml = xml.replace(/\?”\s\s?He asked/g, "?” he asked");
-
- // Fix bad periods and spacing/markup surrounding them
- xml = xml.replace(/\.\.<\/p>/g, ".
");
- xml = xml.replace(/\.\.”<\/p>/g, ".”
");
- xml = xml.replace(/ \. /g, ". ");
- xml = xml.replace(/ \.<\/p>/g, ".
");
- xml = xml.replace(/\.\.\./g, "…");
-
- // Fix extra spaces
- xml = xml.replace(/ ? <\/p>/g, "");
- xml = xml.replace(/([a-z]) ,/g, "$1,");
-
- // The author often fails to terminate a sentence, instead using a comma after a dialogue tag. For example,
- // > “I didn’t get much done,” Greg said, “I got distracted by...
- // This should instead be
- // > “I didn’t get much done,” Greg said. “I got distracted by...
- //
- // Our heuristic is to try to automatically fix this if the dialogue tag is two words (X said/admitted/sighed/etc.).
- //
- // This sometimes overcorrects, as in the following example:
- // > “Basically,” Alec said, “For your powers to manifest, ...
- // Here instead we should lowercase the "f". We handle that via one-offs in substitutions.json.
- //
- // This applies to ~800 instances, so although we have to correct back in substitutions.json a decent number of
- // times, it definitely pays for itself. Most of the instances we have to correct back we also need to fix the
- // capitalization anyway, and that's harder to do automatically, since proper names/"I"/etc. stay capitalized.
- xml = xml.replace(/,” ([A-Za-z]+ [A-Za-z]+), “([A-Z])/g, ",” $1. “$2");
-
- // Replace single-word s with s. Other s are probably erroneous too, but these are known-bad.
- xml = xml.replace(/([A-Za-z]+)<\/i>/g, "$1");
+ return xml;
+}
+function fixCapitalization(xml, book) {
// This occurs enough times it's better to do here than in one-off fixes. We correct the single instance where
// it's incorrect to capitalize in the one-off fixes.
// Note that Ward contains much talk of "the clairvoyants", so we don't want to capitalize plurals.
@@ -366,13 +437,6 @@ function getBodyXML(chapter, book, contentEl) {
// This is sometimes missing its capitalization.
xml = xml.replace(/the birdcage/g, "the Birdcage");
- // This is usually spelled "TV" but sometimes the other ways. Normalize.
- xml = xml.replace(/(\b)tv(\b)/g, "$1TV$2");
- xml = xml.replace(/t\.v\./ig, "TV");
-
- // This is commonly misspelled.
- xml = xml.replace(/([Ss])houlderblade/g, "$1houlder blade");
-
// There's no reason why these should be capitalized. (Note that they never appear at the beginning of any sentences.)
xml = xml.replace(/Halberd/g, "halberd");
xml = xml.replace(/Loft/g, "loft");
@@ -397,9 +461,6 @@ function getBodyXML(chapter, book, contentEl) {
"$1–$2"
);
- // This is consistently missing accents
- xml = xml.replace(/Yangban/g, "Yàngbǎn");
-
// Place names need to always be capitalized
xml = xml.replace(/North end/g, "North End");
xml = xml.replace(/(Stonemast|Shale) avenue/g, "$1 Avenue");
@@ -409,14 +470,48 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/the megalopolis/g, "the Megalopolis");
xml = xml.replace(/earths(?![a-z])/g, "Earths");
- // These are usually not italicized, but sometimes are. Other foreign-language names (like Yàngbǎn) are not
- // italicized, so we go in the direction of removing the italics.
- xml = xml.replace(/Garama<\/em>/g, "Garama");
- xml = xml.replace(/Thanda<\/em>/g, "Thanda");
- xml = xml.replace(/Sifara([^<]*)<\/em>/g, "Sifara$1");
- xml = xml.replace(/Moord Nag([^<]*)<\/em>/g, "Moord Nag$1");
- xml = xml.replace(/Califa de Perro([^<]*)<\/em>/g, "Califa de Perro$1");
- xml = xml.replace(/Turanta([^<]*)<\/em>/g, "Turanta$1");
+ // "Mom" and "Dad" should be capitalized when used as a proper name. These regexps are tuned to catch a good amount of
+ // instances, without over-correcting for non-proper-name-like cases. Many other instances are handled in
+ // substitutions.json.
+ xml = xml.replace(/(? {
- if (substitution.before) {
- const indexOf = xml.indexOf(substitution.before);
- if (indexOf === -1) {
- warnings.push(`Could not find text "${substitution.before}" in ${chapter.url}. The chapter may have been ` +
- `updated at the source, in which case, you should edit substitutions.json.`);
- }
- if (indexOf !== xml.lastIndexOf(substitution.before)) {
- warnings.push(`The text "${substitution.before}" occurred twice, and so the substitution was ambiguous. ` +
- `Update substitutions.json for a more precise substitution.`);
- }
-
- xml = xml.replace(new RegExp(escapeRegExp(substitution.before)), substitution.after);
- } else if (substitution.regExp) {
- xml = xml.replace(new RegExp(substitution.regExp, "g"), substitution.replacement);
- } else {
- warnings.push(`Invalid substitution specified for ${chapter.url}`);
- }
- });
-
- // Serializer inserts extra xmlns for us since it doesn't know we're going to put this into a .
- // Use this opportunity to insert a comment pointing to the original URL, for reference.
- xml = xml.replace(
- //,
- `\n\n`);
-
- return { xml, warnings };
+ return xml;
}
function isEmptyOrGarbage(el) {
diff --git a/lib/substitutions.json b/lib/substitutions.json
index dcfbf93..ef2bc4f 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2448,12 +2448,6 @@
"after": "all the way up to the nape of my neck"
}
],
- "https://parahumans.wordpress.com/2012/12/13/interlude-16-donation-bonus-2/": [
- {
- "before": "“",
- "after": "”"
- }
- ],
"https://parahumans.wordpress.com/2012/12/15/monarch-16-7/": [
{
"before": "Brockton bay",
@@ -2516,6 +2510,10 @@
{
"before": "ten,” I asked. “Just",
"after": "ten,” I asked, “just"
+ },
+ {
+ "before": "weeks, months. Anticipating",
+ "after": "weeks, months. Anticipating"
}
],
"https://parahumans.wordpress.com/2013/01/08/migration-17-1/": [
@@ -2654,7 +2652,7 @@
],
"https://parahumans.wordpress.com/2013/01/15/migration-17-8/": [
{
- "before": "replied. Need to talk about being more secure with our names. “What’s going on?“",
+ "before": "replied. Need to talk about being more secure with our names. “What’s going on?”",
"after": "replied. Need to talk about being more secure with our names. “What’s going on?”"
},
{
@@ -3356,6 +3354,10 @@
{
"before": "aren’t allies.",
"after": "aren’t allies.”"
+ },
+ {
+ "before": "Blameful? “Guilty",
+ "after": "Blameful? “Guilty"
}
],
"https://parahumans.wordpress.com/2013/06/20/crushed-24-5/": [
@@ -4547,6 +4549,10 @@
{
"before": "dangerous?” The recluse asked",
"after": "dangerous?” the recluse asked"
+ },
+ {
+ "before": "Stop. Please.",
+ "after": "Stop. Please."
}
],
"https://www.parahumans.net/2018/02/08/shade-4-4/": [
From 6ee5fb9ef6c2a17d05c075e8ddbd45cab4a89440 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 19:46:52 -0500
Subject: [PATCH 047/186] Fix backward closing single quotation marks
---
lib/convert-worker.js | 1 +
lib/substitutions.json | 20 ++------------------
2 files changed, 3 insertions(+), 18 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index f69df99..4fac1b9 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -196,6 +196,7 @@ function getBodyXML(chapter, book, contentEl) {
fixEms();
fixQuotesAndApostrophes();
fixEms();
+ xml = xml.replace(/‘([^<]+)<\/em>‘/g, "‘$1’");
xml = xml.replace(/I”m/g, "I’m");
// Similar problems occur in Ward with and as do in Worm with s
diff --git a/lib/substitutions.json b/lib/substitutions.json
index ef2bc4f..5ddd4c0 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2222,8 +2222,8 @@
],
"https://parahumans.wordpress.com/2012/09/15/prey-14-4/": [
{
- "before": "a ‘No, you just told me.‘, but",
- "after": "a “No, you just told me,” but"
+ "before": "a ‘No, you just told me.’, but",
+ "after": "a ‘No, you just told me,’ but"
},
{
"before": "Amy,” Tattletale said. “But",
@@ -3749,14 +3749,6 @@
"before": "I’m not? Fuck. There’s",
"after": "I’m not? Fuck. There’s"
},
- {
- "before": "‘Freud‘, and",
- "after": "‘Freud’, and"
- },
- {
- "before": "‘who knows?‘",
- "after": "‘who knows?’"
- },
{
"before": "I suspect It’s a",
"after": "I suspect it’s a"
@@ -4599,10 +4591,6 @@
{
"before": "‘Won’t",
"after": "‘Won’t"
- },
- {
- "before": "you the number‘",
- "after": "you the number’"
}
],
"https://www.parahumans.net/2018/02/22/shade-interlude-4c/": [
@@ -5593,10 +5581,6 @@
{
"before": "for you.",
"after": "for you."
- },
- {
- "before": "Why ‘inflict‘",
- "after": "Why ‘inflict’"
}
],
"https://www.parahumans.net/2018/09/25/gleaming-9-10/": [
From 6c6b360dac181de30e93b0428b1aa5d678a26893 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 19:48:52 -0500
Subject: [PATCH 048/186] Fix a couple instances of "Hardboil" to be "Hard
Boil"
---
lib/substitutions.json | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 5ddd4c0..54b3eca 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -5894,6 +5894,10 @@
{
"before": "all about how Masters have",
"after": "all about how masters have"
+ },
+ {
+ "before": "Hardboil",
+ "after": "Hard Boil"
}
],
"https://www.parahumans.net/2018/12/01/polarize-10-11/": [
@@ -6315,6 +6319,10 @@
{
"before": "a small part of that was being being grumpy",
"after": "a small part of that was being grumpy"
+ },
+ {
+ "before": "Hardboil",
+ "after": "Hard Boil"
}
],
"https://www.parahumans.net/2019/08/20/from-within-16-3/": [
From 326ec148c9b07b4847b4a182024d5100000c187c Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 19:56:45 -0500
Subject: [PATCH 049/186] =?UTF-8?q?Fix=20instances=20of=20"Warden=E2=80=99?=
=?UTF-8?q?s"=20which=20should=20be=20"Wardens=E2=80=99"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 4fac1b9..b16399b 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -395,6 +395,9 @@ function fixPossessives(xml) {
xml = xml.replace(/([^‘])Ms. Stillons’([^s])/g, "$1Ms. Stillons’s$2");
xml = xml.replace(/([^‘])Chuckles’([^s])/g, "$1Chuckles’s$2");
+ // This one is not just missing the extra "s"; it's often misplaced.
+ xml = xml.replace(/Warden’s/g, "Wardens’");
+
return xml;
}
From 13b25835ba50be4d9a376f24c52e93f1c3d76b7b Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 7 Nov 2020 19:58:11 -0500
Subject: [PATCH 050/186] =?UTF-8?q?Always=20lowercase=20"the=20Wardens?=
=?UTF-8?q?=E2=80=99=20headquarters"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
lib/convert-worker.js | 4 ++++
lib/substitutions.json | 4 ++++
2 files changed, 8 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index b16399b..9c5c0f8 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -480,6 +480,10 @@ function fixCapitalization(xml, book) {
xml = xml.replace(/(?
Date: Sat, 14 Nov 2020 16:53:15 -0500
Subject: [PATCH 051/186] Spot fixes for Ward through Blinding 11.5
---
lib/substitutions.json | 176 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 176 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 2db7cf8..cbe754e 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -5902,18 +5902,132 @@
{
"before": "Sveta added a, “No,” for good measure",
"after": "Sveta added a “no” for good measure"
+ },
+ {
+ "before": "We need to figure out how—we—handle this",
+ "after": "We need to figure out how we handle this"
}
],
"https://www.parahumans.net/2018/12/01/polarize-10-11/": [
{
"before": "See dad, sleep.",
"after": "See Dad, sleep."
+ },
+ {
+ "before": "T, u-.",
+ "after": "T, U."
+ },
+ {
+ "before": "I did it with—I’ve done it before",
+ "after": "I did it with— I’ve done it before"
+ },
+ {
+ "before": "and to be our liaisons to the Wardens",
+ "after": "and to be our liaison to the Wardens"
+ },
+ {
+ "before": "“Twelve hours ago,” I explained. “The Navigators were sent",
+ "after": "“Twelve hours ago,” I explained, “the Navigators were sent"
+ },
+ {
+ "before": "You said you wanted to limiting this serious step",
+ "after": "You said you wanted to limit this serious step"
+ },
+ {
+ "before": "On any other day, objective, I would",
+ "after": "On any other day, objectively, I would"
+ },
+ {
+ "before": "how it were more active",
+ "after": "how it was more active"
+ },
+ {
+ "before": "running to the same places.”",
+ "after": "running to the same places.”"
+ }
+ ],
+ "https://www.parahumans.net/2018/12/04/polarize-10-12/": [
+ {
+ "before": "Dryad Project 3",
+ "after": "Dryad Project Three"
+ },
+ {
+ "before": "Pollution, deforestation, ecology all things that",
+ "after": "Pollution, deforestation, ecology, all things that"
+ },
+ {
+ "before": "dropped an expose",
+ "after": "dropped an exposé"
+ },
+ {
+ "before": "—But the good feelings",
+ "after": "—but the good feelings"
+ },
+ {
+ "before": "A pan—a cure all for",
+ "after": "A pan— A cure-all for"
+ },
+ {
+ "before": "No—Yes, but that isn’t",
+ "after": "No— Yes, but that isn’t"
+ }
+ ],
+ "https://www.parahumans.net/2018/12/08/polarize-10-13/": [
+ {
+ "before": "A ring thirty feet in diameterand past those thirty feet",
+ "after": "A ring thirty feet in diameter—and past those thirty feet"
+ },
+ {
+ "before": "axe—arm",
+ "after": "axe-arm"
}
],
"https://www.parahumans.net/2018/12/11/interlude-10-y/": [
{
"before": "shape of the the moment",
"after": "shape of the moment"
+ },
+ {
+ "before": "We have! Multiple times!",
+ "after": "We have! Multiple times!"
+ }
+ ],
+ "https://www.parahumans.net/2018/12/15/interlude-10-z/": [
+ {
+ "before": "roman with hair on his legs",
+ "after": "Roman with hair on his legs"
+ },
+ {
+ "before": "New Brockton primary school",
+ "after": "New Brockton Primary School"
+ },
+ {
+ "before": "They can be so cool, but",
+ "after": "They can be so cool, but"
+ },
+ {
+ "before": "Rome-Roman’s",
+ "after": "Rome—Roman’s"
+ },
+ {
+ "before": "Amais’",
+ "after": "Amais’s"
+ }
+ ],
+ "https://www.parahumans.net/2018/12/18/blinding-11-1/": [
+ {
+ "before": "prisoner and patrol",
+ "after": "prisoner and Patrol"
+ },
+ {
+ "before": "before they chage",
+ "after": "before they change"
+ }
+ ],
+ "https://www.parahumans.net/2018/12/22/blinding-11-2/": [
+ {
+ "before": "—But please respect",
+ "after": "—but please respect"
}
],
"https://www.parahumans.net/2018/12/25/blinding-11-3/": [
@@ -5921,6 +6035,68 @@
"before": "the pharmacist",
"after": "the Pharmacist",
"_comment": "See comment in https://www.parahumans.net/2018/09/29/gleaming-9-11/"
+ },
+ {
+ "before": "…Delivering my coup de grace",
+ "after": "…delivering my coup de grace"
+ },
+ {
+ "before": "G.G..",
+ "after": "G.G."
+ },
+ {
+ "before": "Flash Gun",
+ "after": "flash gun"
+ }
+ ],
+ "https://www.parahumans.net/2018/12/29/interlude-11-a/": [
+ {
+ "before": "D.J..",
+ "after": "D.J."
+ },
+ {
+ "before": "Chiet",
+ "after": "Cheit"
+ }
+ ],
+ "https://www.parahumans.net/2019/01/01/blinding-11-4/": [
+ {
+ "before": "—But if your head is full of noise",
+ "after": "—but if your head is full of noise"
+ },
+ {
+ "before": "One of the Heartbroken—Candy, I was pretty sure, was sitting",
+ "after": "One of the Heartbroken—Candy, I was pretty sure—was sitting"
+ }
+ ],
+ "https://www.parahumans.net/2019/01/05/blinding-11-5/": [
+ {
+ "before": "they‘re",
+ "after": "they’re"
+ },
+ {
+ "before": "and piece of construction material",
+ "after": "and pieces of construction material"
+ },
+ {
+ "before": "the chiming finding it stride",
+ "after": "the chiming finding its stride"
+ },
+ {
+ "before": "need to step in.",
+ "after": "need to step in.”"
+ },
+ {
+ "before": "a smell had soaked into",
+ "after": "a smell that had soaked into"
+ },
+ {
+ "before": "result putting together",
+ "after": "result of putting together"
+ },
+ {
+ "before": "’emergency alert’",
+ "after": "‘emergency alert’"
}
],
"https://www.parahumans.net/2019/01/12/blinding-11-7/": [
From c5b13e7cc1e5a66cb37f4bd7b03a5c6b35b50a58 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 16:54:19 -0500
Subject: [PATCH 052/186] Fix capitalization and apostrophes for truncated
names
---
lib/convert-worker.js | 22 ++++++++++++++++++++--
lib/substitutions.json | 8 --------
2 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 9c5c0f8..8d73a55 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -186,8 +186,6 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/“\s+/g, "
“");
xml = xml.replace(/'/g, "’");
xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’");
- xml = xml.replace(/‘Sup/g, "’Sup");
- xml = xml.replace(/‘cuz/g, "’cuz");
xml = xml.replace(/([a-z])”<\/p>/g, "$1.”
");
}
@@ -226,6 +224,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/ ? <\/p>/g, "");
xml = xml.replace(/([a-z]) ,/g, "$1,");
+ xml = fixTruncatedWords(xml);
xml = fixDialogueTags(xml);
xml = fixForeignNames(xml);
xml = fixEmDashes(xml);
@@ -267,6 +266,22 @@ function getBodyXML(chapter, book, contentEl) {
return { xml, warnings };
}
+function fixTruncatedWords(xml) {
+ xml = xml.replace(/‘Sup/g, "’Sup");
+ xml = xml.replace(/‘cuz/g, "’cuz");
+
+ // Short for "Sidepeace"
+ xml = xml.replace(/[‘’][Pp]iece(?![a-z])/g, "’Piece");
+
+ // Short for "Disjoint"
+ xml = xml.replace(/[‘’][Jj]oint(?![a-z])/g, "’Joint");
+
+ // Short for "Contender"
+ xml = xml.replace(/[‘’][Tt]end(?![a-z])/g, "’Tend");
+
+ return xml;
+}
+
function fixDialogueTags(xml) {
// Fix recurring miscapitalization with questions
xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");
@@ -428,6 +443,9 @@ function fixCapitalization(xml, book) {
// ReSound's name is sometimes miscapitalized. The word is never used in a non-name context.
xml = xml.replace(/Resound/g, "ReSound");
+ // The Speedrunners team name is missing its capitalization a couple times.
+ xml = xml.replace(/speedrunners/g, "Speedrunners");
+
// "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". "patrol
// group" is always lowercased. It seems like "Patrol" is a proper name, and is used as a capitalized modifier in
// other contexts (e.g. Patrol leader). So let's standardize on "Patrol ".
diff --git a/lib/substitutions.json b/lib/substitutions.json
index cbe754e..6aa1e31 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4628,10 +4628,6 @@
}
],
"https://www.parahumans.net/2018/03/10/shadow-5-5/": [
- {
- "before": "Don’t fucking hit me, ‘piece",
- "after": "Don’t fucking hit me, ’piece"
- },
{
"before": "out of the back, “She still would have",
"after": "out of the back, “she still would have"
@@ -5847,10 +5843,6 @@
"before": "confines of the suit. and rearranged herself",
"after": "confines of the suit, and rearranged herself"
},
- {
- "before": "‘piece",
- "after": "’piece"
- },
{
"before": "Fuck y—of course",
"after": "Fuck y— Of course"
From 15e34e47a316d616acd890ddd3b3be7eb736b9cc Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 17:00:06 -0500
Subject: [PATCH 053/186] Fix double-periods in Ward
It appears all the ones in Worm, at least detectable via this pattern, were already caught.
---
lib/convert-worker.js | 1 +
lib/substitutions.json | 42 ------------------------------------------
2 files changed, 1 insertion(+), 42 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 8d73a55..0b7a59c 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -219,6 +219,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/ \. /g, ". ");
xml = xml.replace(/ \.<\/p>/g, ".");
xml = xml.replace(/\.\.\./g, "…");
+ xml = xml.replace(/\.\. {2}/g, ". ");
// Fix extra spaces
xml = xml.replace(/ ? <\/p>/g, "");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 6aa1e31..eb74acf 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2322,16 +2322,6 @@
"after": "this,” Regent said, “I"
}
],
- "https://parahumans.wordpress.com/2012/11/03/colony-15-6/": [
- {
- "before": "M.O..",
- "after": "M.O."
- },
- {
- "before": "tearing flesh..",
- "after": "tearing flesh."
- }
- ],
"https://parahumans.wordpress.com/2012/11/06/colony-15-7/": [
{
"before": "this sort of resistance.",
@@ -2401,10 +2391,6 @@
}
],
"https://parahumans.wordpress.com/2012/11/24/monarch-16-1/": [
- {
- "before": "A.I..",
- "after": "A.I."
- },
{
"before": "; Nobody",
"after": "; nobody"
@@ -2727,10 +2713,6 @@
}
],
"https://parahumans.wordpress.com/2013/02/05/monarch-18-6/": [
- {
- "before": "M.M..",
- "after": "M.M."
- },
{
"before": "‘okay’",
"after": "‘OK’",
@@ -3544,12 +3526,6 @@
"after": "kill all of the masters that are generating"
}
],
- "https://parahumans.wordpress.com/2013/08/03/sting-26-6/": [
- {
- "before": "canals..",
- "after": "canals."
- }
- ],
"https://parahumans.wordpress.com/2013/08/06/interlude-26a/": [
{
"before": "city, Golem thought.",
@@ -3638,12 +3614,6 @@
"after": "said. “But"
}
],
- "https://parahumans.wordpress.com/2013/08/22/extinction-27-4/": [
- {
- "before": "pattern..",
- "after": "pattern."
- }
- ],
"https://parahumans.wordpress.com/2013/08/24/extinction-27-5/": [
{
"before": "around around",
@@ -5769,10 +5739,6 @@
"before": "we should get going",
"after": "We should get going"
},
- {
- "before": "clean her stuff.. She’ll be out soon",
- "after": "clean her stuff. She’ll be out soon"
- },
{
"before": "finger—and thumb-rings",
"after": "finger- and thumb-rings"
@@ -6032,20 +5998,12 @@
"before": "…Delivering my coup de grace",
"after": "…delivering my coup de grace"
},
- {
- "before": "G.G..",
- "after": "G.G."
- },
{
"before": "Flash Gun",
"after": "flash gun"
}
],
"https://www.parahumans.net/2018/12/29/interlude-11-a/": [
- {
- "before": "D.J..",
- "after": "D.J."
- },
{
"before": "Chiet",
"after": "Cheit"
From 1a2b6de78e12a04a5aed90c4443344e9f964fe81 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 17:23:22 -0500
Subject: [PATCH 054/186] Standardize on one style for "Case Fifty-Three" and
friends
---
lib/convert-worker.js | 26 ++++++++++++++++++++++++++
lib/substitutions.json | 16 ----------------
2 files changed, 26 insertions(+), 16 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 0b7a59c..a943d8f 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -236,6 +236,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = fixMispellings(xml);
xml = fixHyphens(xml);
xml = standardizeSpellings(xml);
+ xml = fixCaseNumbers(xml);
// One-off fixes
for (const substitution of substitutions[chapter.url] || []) {
@@ -563,6 +564,31 @@ function standardizeSpellings(xml) {
return xml;
}
+function fixCaseNumbers(xml) {
+ // Case numbers are very inconsistent. For "Case Fifty-Three", the breakdown is:
+ // * 9 Case-53
+ // * 6 Case 53
+ // * 2 case-53
+ // * 1 Case-Fifty-Three
+ // * 41 Case Fifty-Three
+ // * 1 Case Fifty Three
+ // * 13 Case fifty-three
+ // * 119 case fifty-three
+ // * 4 case-fifty-three
+ // * 1 case fifty three
+ // We standardize on "Case Fifty-Three"; although it isn't the most common, it seems best to treat these as proper
+ // nouns.
+
+ xml = xml.replace(/case[ -](?:fifty[ -]three|53)(?!’)/ig, "Case Fifty-Three");
+ xml = xml.replace(/case[ -](?:thirty[ -]two|53)(?!’)/ig, "Case Thirty-Two");
+ xml = xml.replace(/case[ -](?:sixty[ -]nine|53)(?!’)/ig, "Case Sixty-Nine");
+
+ xml = xml.replace(/(? "Case " + caseNumber[0].toUpperCase() + caseNumber.substring(1));
+
+ return xml;
+}
+
function isEmptyOrGarbage(el) {
const text = el.textContent.trim();
return text === "" ||
diff --git a/lib/substitutions.json b/lib/substitutions.json
index eb74acf..edab3e9 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -3723,10 +3723,6 @@
"before": "I suspect It’s a",
"after": "I suspect it’s a"
},
- {
- "before": "Case Fifty-three",
- "after": "case fifty-three"
- },
{
"before": "reality,” Tattletale said. “What’s",
"after": "reality,” Tattletale said, “what’s"
@@ -3751,10 +3747,6 @@
}
],
"https://parahumans.wordpress.com/2013/09/17/interlude-28/": [
- {
- "before": "Case fifty-threes",
- "after": "case fifty-threes"
- },
{
"before": "than,” Revel paused. “Six",
"after": "than,” Revel paused, “six"
@@ -4045,14 +4037,6 @@
"before": "gun build",
"after": "gun built"
},
- {
- "before": "hunchbacked Case fifty-three",
- "after": "hunchbacked case fifty-three"
- },
- {
- "before": "moved Case fifty-threes",
- "after": "moved case fifty-threes"
- },
{
"before": "I revoked my control over her, leaving in in the middle",
"after": "I revoked my control over her, leaving her in the middle"
From 631897b011d5cb1d49ef1499d51d5cfc9b0f2574 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 17:36:52 -0500
Subject: [PATCH 055/186] Capitalize "Heartbroken" when it's a proper noun
---
lib/substitutions.json | 60 +++++++++++++++++++++++++++++++++++++-----
1 file changed, 54 insertions(+), 6 deletions(-)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index edab3e9..97ef03c 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -6031,6 +6031,14 @@
{
"before": "’emergency alert’",
"after": "‘emergency alert’"
+ },
+ {
+ "before": "Three heartbroken",
+ "after": "Three Heartbroken"
+ },
+ {
+ "before": "two heartbroken",
+ "after": "two Heartbroken"
}
],
"https://www.parahumans.net/2019/01/12/blinding-11-7/": [
@@ -6045,6 +6053,18 @@
{
"before": "more fragile than it should have been",
"after": "more fragile than it should have been."
+ },
+ {
+ "before": "too close to heartbroken",
+ "after": "too close to Heartbroken"
+ },
+ {
+ "before": "Precipice and the heartbroken",
+ "after": "Precipice and the Heartbroken"
+ },
+ {
+ "before": "If the heartbroken’s power",
+ "after": "If the Heartbroken’s power"
}
],
"https://www.parahumans.net/2019/01/15/blinding-11-8/": [
@@ -6057,12 +6077,6 @@
"after": "Victoria-flesh"
}
],
- "https://www.parahumans.net/2019/01/26/blinding-11-9/": [
- {
- "before": "I told the mercenary.”",
- "after": "I told the mercenary."
- }
- ],
"https://www.parahumans.net/2019/01/19/interlude-11-b/": [
{
"before": "loading up trucks with basic supplies",
@@ -6070,6 +6084,10 @@
}
],
"https://www.parahumans.net/2019/01/26/blinding-11-9/": [
+ {
+ "before": "I told the mercenary.”",
+ "after": "I told the mercenary."
+ },
{
"before": "someone had been been called at three",
"after": "someone had been called at three"
@@ -6077,6 +6095,20 @@
{
"before": "I croaked. as she picked up",
"after": "I croaked, as she picked up"
+ },
+ {
+ "before": "two heartbroken",
+ "after": "two Heartbroken"
+ },
+ {
+ "before": "three heartbroken",
+ "after": "three Heartbroken"
+ }
+ ],
+ "https://www.parahumans.net/2019/01/29/blinding-11-10/": [
+ {
+ "before": "heartbroken kid",
+ "after": "Heartbroken kid"
}
],
"https://www.parahumans.net/2019/02/02/blinding-11-11/": [
@@ -6237,6 +6269,12 @@
"after": "wiped—or perhaps struck—clean."
}
],
+ "https://www.parahumans.net/2019/04/16/black-13-5/": [
+ {
+ "before": "For some of the heartbroken",
+ "after": "For some of the Heartbroken"
+ }
+ ],
"https://www.parahumans.net/2019/04/20/black-13-6/": [
{
"before": "I’m a Master, right, you",
@@ -6375,6 +6413,10 @@
{
"before": "wall met ceiling. the wall was smooth",
"after": "wall met ceiling. The wall was smooth"
+ },
+ {
+ "before": "The blond heartbroken",
+ "after": "The blond Heartbroken"
}
],
"https://www.parahumans.net/2019/07/09/dying-15-2/": [
@@ -6743,6 +6785,12 @@
"after": "figure out while she was gone. We’ll see who"
}
],
+ "https://www.parahumans.net/2020/02/18/infrared-19-g/": [
+ {
+ "before": "Imp and the heartbroken",
+ "after": "Imp and the Heartbroken"
+ }
+ ],
"https://www.parahumans.net/2020/02/25/last-20-1/": [
{
"before": "across this clearing. eyes, cameras",
From 9da59c9104844d29a4444dc9b3bdda37f61b7488 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 17:49:41 -0500
Subject: [PATCH 056/186] Fix a variety of missing periods
---
lib/substitutions.json | 56 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 56 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 97ef03c..9f0c45c 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -444,6 +444,10 @@
{
"before": "down the stairs. then as carefully as we could",
"after": "down the stairs. Then, as carefully as we could"
+ },
+ {
+ "before": "I took his feet With agonizing slowness",
+ "after": "I took his feet. With agonizing slowness"
}
],
"https://parahumans.wordpress.com/2011/12/03/hive-5-9/": [
@@ -754,6 +758,10 @@
{
"before": "Coil,” I spoke. “It’s",
"after": "Coil,” I spoke, “it’s"
+ },
+ {
+ "before": "trying to convince me The question",
+ "after": "trying to convince me. The question"
}
],
"https://parahumans.wordpress.com/2012/02/25/buzz-7-12/": [
@@ -1450,6 +1458,10 @@
{
"before": "“…Things are",
"after": "“…things are"
+ },
+ {
+ "before": "swarm had gone Had the girl’s armor",
+ "after": "swarm had gone. Had the girl’s armor"
}
],
"https://parahumans.wordpress.com/2012/04/24/parasite-10-1/": [
@@ -2785,6 +2797,10 @@
{
"before": "priest,” he said. “I",
"after": "priest,” he said, “I"
+ },
+ {
+ "before": "Jessica observed She looked like",
+ "after": "Jessica observed. She looked like"
}
],
"https://parahumans.wordpress.com/2013/02/09/queen-18-7/": [
@@ -2905,6 +2921,12 @@
"after": "noting,” Legend said, “that"
}
],
+ "https://parahumans.wordpress.com/2013/03/05/scourge-19-5/": [
+ {
+ "before": "returned to a standing position Her face was softer",
+ "after": "returned to a standing position. Her face was softer"
+ }
+ ],
"https://parahumans.wordpress.com/2013/03/09/scourge-19-6/": [
{
"before": "heailng",
@@ -3500,6 +3522,10 @@
{
"before": "think,” Dobrynja said. “You’ve",
"after": "think,” Dobrynja said, “you’ve"
+ },
+ {
+ "before": "particular type out there Eight Cherishes are dead",
+ "after": "particular type out there. Eight Cherishes are dead"
}
],
"https://parahumans.wordpress.com/2013/07/27/sting-26-4/": [
@@ -3570,6 +3596,10 @@
{
"before": "these past few years",
"after": "these past few years."
+ },
+ {
+ "before": "Focus Memorize.",
+ "after": "Focus. Memorize."
}
],
"https://parahumans.wordpress.com/2013/08/13/extinction-27-1/": [
@@ -3886,6 +3916,10 @@
{
"before": "panting for breath. the wound at his",
"after": "panting for breath. The wound at his"
+ },
+ {
+ "before": "appeared behind her A man with yellow skin",
+ "after": "appeared behind her. A man with yellow skin"
}
],
"https://parahumans.wordpress.com/2013/10/15/speck-30-1/": [
@@ -5922,6 +5956,10 @@
{
"before": "axe—arm",
"after": "axe-arm"
+ },
+ {
+ "before": "hit him again He wouldn’t simply",
+ "after": "hit him again. He wouldn’t simply"
}
],
"https://www.parahumans.net/2018/12/11/interlude-10-y/": [
@@ -6041,6 +6079,16 @@
"after": "two Heartbroken"
}
],
+ "https://www.parahumans.net/2019/01/08/blinding-11-6/": [
+ {
+ "before": "Let’s go Let’s make a hole",
+ "after": "Let’s go. Let’s make a hole"
+ },
+ {
+ "before": "catch up with the group I saw Foil and Chastity",
+ "after": "catch up with the group. I saw Foil and Chastity"
+ }
+ ],
"https://www.parahumans.net/2019/01/12/blinding-11-7/": [
{
"before": "threw the the man",
@@ -6575,6 +6623,10 @@
{
"before": "one primary goal—me-, and who",
"after": "one primary goal—me—and who"
+ },
+ {
+ "before": "She’s aggressive If anyone makes",
+ "after": "She’s aggressive. If anyone makes"
}
],
"https://www.parahumans.net/2019/11/02/sundown-17-10/": [
@@ -6661,6 +6713,10 @@
{
"before": "“I’m fine” Egg said",
"after": "“I’m fine,” Egg said"
+ },
+ {
+ "before": "can’t induce triggers There were parahumans",
+ "after": "can’t induce triggers. There were parahumans"
}
],
"https://www.parahumans.net/2020/01/04/infrared-19-3/": [
From 05a9abab9119b6c1825ba24cf279e5e070922c54 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 17:56:50 -0500
Subject: [PATCH 057/186] Fix many erroneously-italicized exclamation points
---
lib/convert-worker.js | 1 +
lib/substitutions.json | 44 +++++++++++++++---------------------------
2 files changed, 17 insertions(+), 28 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index a943d8f..d90d750 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -196,6 +196,7 @@ function getBodyXML(chapter, book, contentEl) {
fixEms();
xml = xml.replace(/‘([^<]+)<\/em>‘/g, "‘$1’");
xml = xml.replace(/I”m/g, "I’m");
+ xml = xml.replace(/([a-z]+)!<\/em>/g, "$1!");
// Similar problems occur in Ward with and as do in Worm with s
xml = xml.replace(//g, "");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 9f0c45c..5485182 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2282,6 +2282,10 @@
{
"before": "volunteered, asked
\n to",
"after": "volunteered, asked to"
+ },
+ {
+ "before": "happened to my daughter!?",
+ "after": "happened to my daughter!?"
}
],
"https://parahumans.wordpress.com/2012/10/25/interlude-15-donation-bonus-2/": [
@@ -2660,6 +2664,10 @@
{
"before": "on his knees. he held the cigarette",
"after": "on his knees. He held the cigarette"
+ },
+ {
+ "before": "‘hief!",
+ "after": "’hief!"
}
],
"https://parahumans.wordpress.com/2013/01/19/queen-18-1/": [
@@ -3550,6 +3558,10 @@
{
"before": "kill all of the Masters that are generating",
"after": "kill all of the masters that are generating"
+ },
+ {
+ "before": "—break up the fog!",
+ "after": "—break up the fog!"
}
],
"https://parahumans.wordpress.com/2013/08/06/interlude-26a/": [
@@ -4263,10 +4275,6 @@
"before": "Tattletale:
\nwaiting?",
"after": "Tattletale: waiting?"
},
- {
- "before": "yes!",
- "after": "yes!"
- },
{
"before": "Please, If you",
"after": "Please, if you"
@@ -5177,10 +5185,6 @@
{
"before": "Keith said. “You decide",
"after": "Keith said, “you decide"
- },
- {
- "before": "The towels!",
- "after": "The towels!"
}
],
"https://www.parahumans.net/2018/07/03/beacon-8-1/": [
@@ -5348,10 +5352,6 @@
"before": "Natalie’s beetle",
"after": "Natalie’s Beetle"
},
- {
- "before": "I said!",
- "after": "I said!"
- },
{
"before": "adam’s apple",
"after": "Adam’s apple"
@@ -5512,10 +5512,6 @@
"before": "it was one thing!",
"after": "It was one thing!"
},
- {
- "before": "as an anything!",
- "after": "as an anything!"
- },
{
"before": "have backed you up!",
"after": "have backed you up!"
@@ -5834,14 +5830,6 @@
{
"before": "they had Fallen behind",
"after": "they had fallen behind"
- },
- {
- "before": "Fucking heroes!",
- "after": "Fucking heroes!"
- },
- {
- "before": "the unfucked!",
- "after": "the unfucked!"
}
],
"https://www.parahumans.net/2018/11/24/interlude-10-x/": [
@@ -6163,6 +6151,10 @@
{
"before": "to Capricorn. then she looked",
"after": "to Capricorn. Then she looked"
+ },
+ {
+ "before": "fucking daughter!",
+ "after": "fucking daughter!"
}
],
"https://www.parahumans.net/2019/02/05/blinding-11-12/": [
@@ -6213,10 +6205,6 @@
{
"before": "I need dad",
"after": "I need Dad"
- },
- {
- "before": "Oh fuck!",
- "after": "Oh fuck!"
}
],
"https://www.parahumans.net/2019/02/23/heavens-12-3/": [
From d2402b1bb2980c7698c0b5301449ac8614ce1486 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 18:13:12 -0500
Subject: [PATCH 058/186] Fix many erroneously-italicized closing quotes
---
lib/convert-worker.js | 8 +++++---
lib/substitutions.json | 40 ++++++++++++++++++++++++++--------------
2 files changed, 31 insertions(+), 17 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index d90d750..63967ca 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -161,9 +161,9 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/‘\s*([^<]+)\s*’<\/em>/g, "‘$1’");
xml = xml.replace(/‘\s*([^<]+)\s*<\/em>\s*’/g, "‘$1’");
xml = xml.replace(/‘\s*\s*([^<]+)\s*’<\/em>/g, "‘$1’");
- xml = xml.replace(/“\s*([^<]+)\s*”<\/em>/g, "“$1”");
- xml = xml.replace(/“\s*([^<]+)\s*<\/em>\s*”/g, "“$1”");
- xml = xml.replace(/“\s*\s*([^<]+)\s*”<\/em>/g, "“$1”");
+ xml = xml.replace(/“\s*([^<”]+)\s*”<\/em>/g, "“$1”");
+ xml = xml.replace(/“\s*([^<”]+)\s*<\/em>\s*”/g, "“$1”");
+ xml = xml.replace(/“\s*\s*([^<”]+)\s*”<\/em>/g, "“$1”");
xml = xml.replace(/([^\n>]) ?/g, "$1 ");
xml = xml.replace(/ ?<\/em>/g, " ");
xml = xml.replace(/]+)> /g, "");
@@ -197,6 +197,8 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/‘([^<]+)<\/em>‘/g, "‘$1’");
xml = xml.replace(/I”m/g, "I’m");
xml = xml.replace(/([a-z]+)!<\/em>/g, "$1!");
+ xml = xml.replace(/(?([\w ’]+)([!.?])”<\/em>/g, "$1$2”");
+ xml = xml.replace(/([\w ’]+[!.?])”<\/em>/g, "$1”");
// Similar problems occur in Ward with and as do in Worm with s
xml = xml.replace(//g, "");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 5485182..8cdf8c4 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2544,6 +2544,10 @@
{
"before": "city,” Jess said. “And",
"after": "city,” Jess said, “and"
+ },
+ {
+ "before": "“No!”",
+ "after": "“No!”"
}
],
"https://parahumans.wordpress.com/2013/01/10/migration-17-3/": [
@@ -3438,6 +3442,10 @@
{
"before": "White capped",
"after": "White-capped"
+ },
+ {
+ "before": ". “Bosses are worried.”",
+ "after": ". “Bosses are worried.”"
}
],
"https://parahumans.wordpress.com/2013/07/09/scarab-25-4/": [
@@ -3486,12 +3494,6 @@
"after": "avoided"
}
],
- "https://parahumans.wordpress.com/2013/07/16/interlude-25/": [
- {
- "before": "your manners?”",
- "after": "your manners?”"
- }
- ],
"https://parahumans.wordpress.com/2013/07/18/sting-26-1/": [
{
"before": "up,” I said. “The",
@@ -5798,10 +5800,6 @@
{
"before": "Be safe—Nat.",
"after": "Be safe —Nat."
- },
- {
- "before": "kill each other.”",
- "after": "kill each other.”"
}
],
"https://www.parahumans.net/2018/11/17/polarize-10-8/": [
@@ -5904,10 +5902,6 @@
{
"before": "how it were more active",
"after": "how it was more active"
- },
- {
- "before": "running to the same places.”",
- "after": "running to the same places.”"
}
],
"https://www.parahumans.net/2018/12/04/polarize-10-12/": [
@@ -6287,6 +6281,10 @@
{
"before": "you have have surmised",
"after": "you have surmised"
+ },
+ {
+ "before": "pound of flesh, at least—!”",
+ "after": "pound of flesh, at least—!”"
}
],
"https://www.parahumans.net/2019/04/02/black-13-1/": [
@@ -6649,6 +6647,10 @@
{
"before": "looking for mom in the crowd",
"after": "looking for Mom in the crowd"
+ },
+ {
+ "before": "“Entrapment.”",
+ "after": "“Entrapment.”"
}
],
"https://www.parahumans.net/2019/11/23/radiation-18-3/": [
@@ -6803,6 +6805,10 @@
{
"before": "you know about” my Aunt Sarah said",
"after": "you know about,” my Aunt Sarah said"
+ },
+ {
+ "before": "so they grow in…”",
+ "after": "so they grow in…”"
}
],
"https://www.parahumans.net/2020/02/11/infrared-19-f/": [
@@ -6841,6 +6847,12 @@
"after": "across this clearing. Eyes, cameras"
}
],
+ "https://www.parahumans.net/2020/03/07/last-20-4/": [
+ {
+ "before": "peace!” Cryptid growled",
+ "after": "peace!” Cryptid growled"
+ }
+ ],
"https://www.parahumans.net/2020/03/31/last-20-10/": [
{
"before": "shaping it as it rolled out. it became a circular",
From 075db7f4461739fd023f17ba3051fda519f8397c Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 18:37:52 -0500
Subject: [PATCH 059/186] Standardize on "Mrs." Yamada, not "Ms."
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 63967ca..6d722e1 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -228,6 +228,9 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/ ? <\/p>/g, "
");
xml = xml.replace(/([a-z]) ,/g, "$1,");
+ // 197 instances of "Mrs." to 21 of "Ms."
+ xml = xml.replace(/Ms\. Yamada/g, "Mrs. Yamada");
+
xml = fixTruncatedWords(xml);
xml = fixDialogueTags(xml);
xml = fixForeignNames(xml);
From 584a52fc27f73f9bf231e72bae99c86de9d37268 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 18:41:39 -0500
Subject: [PATCH 060/186] Standardize on "Amias", not "Amais"
---
lib/convert-worker.js | 15 ++++++++++++---
lib/substitutions.json | 4 ----
2 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 6d722e1..acd415e 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -228,12 +228,10 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/ ? <\/p>/g, "
");
xml = xml.replace(/([a-z]) ,/g, "$1,");
- // 197 instances of "Mrs." to 21 of "Ms."
- xml = xml.replace(/Ms\. Yamada/g, "Mrs. Yamada");
-
xml = fixTruncatedWords(xml);
xml = fixDialogueTags(xml);
xml = fixForeignNames(xml);
+ xml = standardizeNames(xml);
xml = fixEmDashes(xml);
xml = enDashJointNames(xml);
xml = fixPossessives(xml);
@@ -330,6 +328,16 @@ function fixForeignNames(xml) {
return xml;
}
+function standardizeNames(xml) {
+ // 197 instances of "Mrs." to 21 of "Ms."
+ xml = xml.replace(/Ms\. Yamada/g, "Mrs. Yamada");
+
+ // 25 instances of "Amias" to 3 of "Amais"
+ xml = xml.replace(/Amais/g, "Amias");
+
+ return xml;
+}
+
function fixEmDashes(xml) {
xml = xml.replace(/ – /g, "—");
xml = xml.replace(/“((?:)?)-/g, "“$1—");
@@ -417,6 +425,7 @@ function fixPossessives(xml) {
xml = xml.replace(/([^‘])Mrs. Sims’([^s])/g, "$1Mrs. Sims’s$2");
xml = xml.replace(/([^‘])Ms. Stillons’([^s])/g, "$1Ms. Stillons’s$2");
xml = xml.replace(/([^‘])Chuckles’([^s])/g, "$1Chuckles’s$2");
+ xml = xml.replace(/([^‘])Amias’([^s])/g, "$1Amias’s$2");
// This one is not just missing the extra "s"; it's often misplaced.
xml = xml.replace(/Warden’s/g, "Wardens’");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 8cdf8c4..507413d 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -5970,10 +5970,6 @@
{
"before": "Rome-Roman’s",
"after": "Rome—Roman’s"
- },
- {
- "before": "Amais’",
- "after": "Amais’s"
}
],
"https://www.parahumans.net/2018/12/18/blinding-11-1/": [
From 41566a380ebf8830b349aa426e174bae8ed81afe Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 18:46:43 -0500
Subject: [PATCH 061/186] Refactor possessive-fixing regular expressions
---
lib/convert-worker.js | 40 +++++-----------------------------------
1 file changed, 5 insertions(+), 35 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index acd415e..f5f9bc2 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -391,41 +391,11 @@ function enDashJointNames(xml) {
function fixPossessives(xml) {
// Fix possessive of names ending in "s"
// Note: if the "s" is unvoiced, as in Marquis, then it doesn't get the second "s".
- xml = xml.replace(/([^‘])Judas’([^s])/g, "$1Judas’s$2");
- xml = xml.replace(/([^‘])Brutus’([^s])/g, "$1Brutus’s$2");
- xml = xml.replace(/([^‘])Jess’([^s])/g, "$1Jess’s$2");
- xml = xml.replace(/([^‘])Aegis’([^s])/g, "$1Aegis’s$2");
- xml = xml.replace(/([^‘])Dauntless’([^s])/g, "$1Dauntless’s$2");
- xml = xml.replace(/([^‘])Circus’([^s])/g, "$1Circus’s$2");
- xml = xml.replace(/([^‘])Sirius’([^s])/g, "$1Sirius’s$2");
- xml = xml.replace(/([^‘])Brooks’([^s])/g, "$1Brooks’s$2");
- xml = xml.replace(/([^‘])Genesis’([^s])/g, "$1Genesis’s$2");
- xml = xml.replace(/([^‘])Atlas’([^s])/g, "$1Atlas’s$2");
- xml = xml.replace(/([^‘])Lucas’([^s])/g, "$1Lucas’s$2");
- xml = xml.replace(/([^‘])Gwerrus’([^s])/g, "$1Gwerrus’s$2");
- xml = xml.replace(/([^‘])Chris’([^s])/g, "$1Chris’s$2");
- xml = xml.replace(/([^‘])Eligos’([^s])/g, "$1Eligos’s$2");
- xml = xml.replace(/([^‘])Animos’([^s])/g, "$1Animos’s$2");
- xml = xml.replace(/([^‘])Mags’([^s])/g, "$1Mags’s$2");
- xml = xml.replace(/([^‘])Huntress’([^s])/g, "$1Huntress’s$2");
- xml = xml.replace(/([^‘])Hephaestus’([^s])/g, "$1Hephaestus’s$2");
- xml = xml.replace(/([^‘])Lord of Loss’([^s])/g, "$1Lord of Loss’s$2");
- xml = xml.replace(/([^‘])John Combs’([^s])/g, "$1John Combs’s$2");
- xml = xml.replace(/([^‘])Mama Mathers’([^s])/g, "$1Mama Mathers’s$2");
- xml = xml.replace(/([^‘])Monokeros’([^s])/g, "$1Monokeros’s$2");
- xml = xml.replace(/([^‘])Goddess’([^s])/g, "$1Goddess’s$2");
- xml = xml.replace(/([^‘])Boundless’([^s])/g, "$1Boundless’s$2");
- xml = xml.replace(/([^‘])Paris’([^s])/g, "$1Paris’s$2");
- xml = xml.replace(/([^‘])Tress’([^s])/g, "$1Tress’s$2");
- xml = xml.replace(/([^‘])Harris’([^s])/g, "$1Harris’s$2");
- xml = xml.replace(/([^‘])Antares’([^s])/g, "$1Antares’s$2");
- xml = xml.replace(/([^‘])Nieves’([^s])/g, "$1Nieves’s$2");
- xml = xml.replace(/([^‘])Backwoods’([^s])/g, "$1Backwoods’s$2");
- xml = xml.replace(/([^‘])Midas’([^s])/g, "$1Midas’s$2");
- xml = xml.replace(/([^‘])Mrs. Sims’([^s])/g, "$1Mrs. Sims’s$2");
- xml = xml.replace(/([^‘])Ms. Stillons’([^s])/g, "$1Ms. Stillons’s$2");
- xml = xml.replace(/([^‘])Chuckles’([^s])/g, "$1Chuckles’s$2");
- xml = xml.replace(/([^‘])Amias’([^s])/g, "$1Amias’s$2");
+ xml = xml.replace(
+ // eslint-disable-next-line max-len
+ /(?
Date: Sat, 14 Nov 2020 18:54:02 -0500
Subject: [PATCH 062/186] Remove overcapitalization of "university"
---
lib/convert-worker.js | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index f5f9bc2..50d8eda 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -450,6 +450,10 @@ function fixCapitalization(xml, book) {
xml = xml.replace(/Halberd/g, "halberd");
xml = xml.replace(/Loft/g, "loft");
+ // There's no reason why university should be capitalized in most contexts, although sometimes it's used as part of
+ // a compound noun or at the beginning of a sentence.
+ xml = xml.replace(/(?
Date: Sat, 14 Nov 2020 18:57:51 -0500
Subject: [PATCH 063/186] Fix inconsistently-bolded colons in Ward text
conversations
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 50d8eda..92e1821 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -208,6 +208,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/@<\/strong>/g, "@");
xml = xml.replace(/
(\s*)<\/strong>/g, "
$1");
xml = xml.replace(/(\s*)<\/strong>/g, "$1");
+ xml = xml.replace(/>(.*)<\/strong>:$1:");
// No need for line breaks before paragraph ends
// These often occur with the
s inside /// fixed above.
From 54093109bf6bb16c24e3992d59afd45040afd79b Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 19:30:00 -0500
Subject: [PATCH 064/186] Spot fixes for Ward through Heavens 12.4
---
lib/convert-worker.js | 4 +-
lib/substitutions.json | 134 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 136 insertions(+), 2 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 92e1821..bd9d172 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -437,7 +437,7 @@ function fixCapitalization(xml, book) {
// "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". "patrol
// group" is always lowercased. It seems like "Patrol" is a proper name, and is used as a capitalized modifier in
// other contexts (e.g. Patrol leader). So let's standardize on "Patrol ".
- xml = xml.replace(/patrol (block|group|leader|guard|student|uniform|squad|soldier|officer|crew|girl|bus)/ig,
+ xml = xml.replace(/patrol (block|group|leader|guard|student|uniform|squad|soldier|officer|crew|girl|bus|training)/ig,
(_, $1) => `Patrol ${$1.toLowerCase()}`);
// This always works in Ward and has a few false positives in Worm, where it is never needed:
if (book === "ward") {
@@ -546,7 +546,7 @@ function standardizeSpellings(xml) {
xml = xml.replace(/cliche/g, "cliché");
// T-shirt is usually spelled lowercase ("t-shirt"). Normalize the remaining instances.
- xml = xml.replace(/T-shirt/g, "t-shirt");
+ xml = xml.replace(/(?",
"after": "loading up trucks with basic supplies."
+ },
+ {
+ "before": "I’m sor—that fucking sucks",
+ "after": "I’m sor— That fucking sucks"
+ },
+ {
+ "before": "two of the targets was really",
+ "after": "two of the targets were really"
+ },
+ {
+ "before": "punch a hold into the ground",
+ "after": "punch a hole into the ground"
+ }
+ ],
+ "https://www.parahumans.net/2019/01/22/interlude-11-c/": [
+ {
+ "before": "were a worse adult examples",
+ "after": "were worse adult examples"
+ },
+ {
+ "before": "four-times-normal—width",
+ "after": "four-times-normal-width"
+ },
+ {
+ "before": "accumualted",
+ "after": "accumulated"
+ },
+ {
+ "before": "get away from one swung",
+ "after": "get away from one swing"
+ },
+ {
+ "before": "“Cradle—the most intact part of Tattletale said",
+ "after": "“Cradle—” the most intact part of Tattletale said"
}
],
"https://www.parahumans.net/2019/01/26/blinding-11-9/": [
@@ -6129,12 +6163,28 @@
{
"before": "three heartbroken",
"after": "three Heartbroken"
+ },
+ {
+ "before": "Do yo want me to send",
+ "after": "Do you want me to send"
+ },
+ {
+ "before": "PRT—or SWAT-van style",
+ "after": "PRT- or SWAT-van style"
+ },
+ {
+ "before": "apparently I",
+ "after": "apparently I"
}
],
"https://www.parahumans.net/2019/01/29/blinding-11-10/": [
{
"before": "heartbroken kid",
"after": "Heartbroken kid"
+ },
+ {
+ "before": "The team, the dynamic with the Undersiders, the Heartbroken, the kids",
+ "after": "The team, the dynamic with the Undersiders, the Heartbroken, the kids"
}
],
"https://www.parahumans.net/2019/02/02/blinding-11-11/": [
@@ -6145,12 +6195,36 @@
{
"before": "fucking daughter!",
"after": "fucking daughter!"
+ },
+ {
+ "before": "The Warrior Monk told me",
+ "after": "the Warrior Monk told me"
+ },
+ {
+ "before": "Foil-Hookline",
+ "after": "Foil–Hookline"
}
],
"https://www.parahumans.net/2019/02/05/blinding-11-12/": [
{
"before": "and drawers. her legs were stacked against",
"after": "and drawers. Her legs were stacked against"
+ },
+ {
+ "before": "Not when—not like this.",
+ "after": "Not when— Not like this."
+ },
+ {
+ "before": "Go you, Little V.",
+ "after": "Go you, Little V."
+ },
+ {
+ "before": "the King on the chess board",
+ "after": "the king on the chess board"
+ },
+ {
+ "before": "Almost Eight?",
+ "after": "Almost eight?"
}
],
"https://www.parahumans.net/2019/02/09/interlude-12-z/": [
@@ -6173,18 +6247,54 @@
{
"before": "all the Kiss and Kill things",
"after": "all the kiss and kill things"
+ },
+ {
+ "before": "relatives houses",
+ "after": "relatives’ houses"
+ },
+ {
+ "before": "Do you want to see me go all out? she thought",
+ "after": "Do you want to see me go all out? she thought"
+ },
+ {
+ "before": "roof top",
+ "after": "rooftop"
}
],
"https://www.parahumans.net/2019/02/12/heavens-12-1/": [
{
"before": "And mom’s implying I want",
"after": "And Mom’s implying I want"
+ },
+ {
+ "before": "ent North",
+ "after": "ent north"
+ },
+ {
+ "before": "They headed North",
+ "after": "They headed north"
+ },
+ {
+ "before": "It had been Marquis, before",
+ "after": "It had been Marquis’, before"
+ },
+ {
+ "before": "police, patrol, or station",
+ "after": "police, Patrol, or station"
+ },
+ {
+ "before": "earth-N",
+ "after": "Earth-N"
}
],
"https://www.parahumans.net/2019/02/16/heavens-12-2/": [
{
"before": "And you- don’t do that",
"after": "And you—don’t do that"
+ },
+ {
+ "before": "Harbingers?",
+ "after": "Harbingers?"
}
],
"https://www.parahumans.net/2019/02/19/interlude-12-e/": [
@@ -6195,12 +6305,30 @@
{
"before": "I need dad",
"after": "I need Dad"
+ },
+ {
+ "before": "going to mars or some",
+ "after": "going to Mars or some"
}
],
"https://www.parahumans.net/2019/02/23/heavens-12-3/": [
{
"before": "had shaken them good- but I could",
"after": "had shaken them good—but I could"
+ },
+ {
+ "before": "Harbinger one was down, and two was wounded",
+ "after": "Harbinger One was down, and Two was wounded"
+ }
+ ],
+ "https://www.parahumans.net/2019/02/26/heavens-12-4/": [
+ {
+ "before": "but the Harbinger said, “yes,” and I took",
+ "after": "but the Harbinger said “yes,” and I took"
+ },
+ {
+ "before": "closer to haunting, “Think you can",
+ "after": "closer to haunting, “think you can"
}
],
"https://www.parahumans.net/2019/03/02/heavens-12-all/": [
@@ -6241,6 +6369,12 @@
"after": "created glowing orbs. He threw one to my"
}
],
+ "https://www.parahumans.net/2019/03/23/heavens-12-9/": [
+ {
+ "before": "Harbinger two was out and Harbinger one",
+ "after": "Harbinger Two was out and Harbinger One"
+ }
+ ],
"https://www.parahumans.net/2019/03/26/heavens-12-none/": [
{
"before": "if I inherit mom’s whole",
From b017ab54d1c74c4fe2c968b7fbb3585dfd7bf3bb Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 19:51:20 -0500
Subject: [PATCH 065/186] Fix a variety of wrong closing quotes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
E.g. doubles, "I”ll", backwards
---
lib/convert-worker.js | 40 +++++++++++++++++++---------------------
lib/substitutions.json | 42 ++++++++++++++++++++++++++++++++----------
2 files changed, 51 insertions(+), 31 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index bd9d172..e796a84 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -171,34 +171,32 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/([a-z]+),<\/em>/g, "$1,");
}
- function fixQuotesAndApostrophes() {
- // Fix recurring poor quotes and apostrophes
- xml = xml.replace(/”/g, "
“");
- xml = xml.replace(/“\s*<\/p>/g, "”
");
- xml = xml.replace(/“\s*<\/em><\/p>/g, "”");
- xml = xml.replace(/‘\s*<\/p>/g, "’");
- xml = xml.replace(/‘\s*<\/em><\/p>/g, "’");
- xml = xml.replace(/,” <\/em>/g, ",” ");
- xml = xml.replace(/′/g, "’");
- xml = xml.replace(/″/g, "”");
- xml = xml.replace(/([A-Za-z])‘s(\s?)/g, "$1’s$2");
- xml = xml.replace(/I‘m/g, "I’m");
- xml = xml.replace(/“\s+/g, "
“");
- xml = xml.replace(/'/g, "’");
- xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’");
- xml = xml.replace(/([a-z])”<\/p>/g, "$1.”
");
- }
-
- // These interact with each other, so do them a few times.
+ // These quote/apostrophe/em fixes interact with each other. TODO: try to disentangle so we don't repeat all of
+ // fixEms.
xml = xml.replace(/,” <\/em>/g, ",” ");
fixEms();
- fixQuotesAndApostrophes();
+ xml = xml.replace(/”/g, "
“");
+ xml = xml.replace(/“\s*<\/p>/g, "”
");
+ xml = xml.replace(/“\s*<\/em><\/p>/g, "”");
+ xml = xml.replace(/‘\s*<\/p>/g, "’");
+ xml = xml.replace(/‘\s*<\/em><\/p>/g, "’");
+ xml = xml.replace(/,” <\/em>/g, ",” ");
+ xml = xml.replace(/′/g, "’");
+ xml = xml.replace(/″/g, "”");
+ xml = xml.replace(/([A-Za-z])‘s(\s?)/g, "$1’s$2");
+ xml = xml.replace(/I‘m/g, "I’m");
+ xml = xml.replace(/“\s+/g, "
“");
+ xml = xml.replace(/'/g, "’");
+ xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’");
+ xml = xml.replace(/([a-z])”<\/p>/g, "$1.”
");
fixEms();
xml = xml.replace(/‘([^<]+)<\/em>‘/g, "‘$1’");
- xml = xml.replace(/I”m/g, "I’m");
xml = xml.replace(/([a-z]+)!<\/em>/g, "$1!");
xml = xml.replace(/(?([\w ’]+)([!.?])”<\/em>/g, "$1$2”");
xml = xml.replace(/([\w ’]+[!.?])”<\/em>/g, "$1”");
+ xml = xml.replace(/I”(m|ll)/g, "I’$1");
+ xml = xml.replace(/””<\/p>/g, "”");
+ xml = xml.replace(/^([^“]+?) ?”(?![ —<])/gm, "$1 “");
// Similar problems occur in Ward with and as do in Worm with s
xml = xml.replace(//g, "");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 9eafa64..3552166 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2361,13 +2361,13 @@
"before": "leave?” Hero asked. ”Why",
"after": "leave?” Hero asked. “Why"
},
- {
- "before": "eye contact. ”She",
- "after": "eye contact. “She"
- },
{
"before": "Alexandria said. ”Why",
"after": "Alexandria said. “Why"
+ },
+ {
+ "before": "voice as quiet. ”We’ve got teams",
+ "after": "voice as quiet. “We’ve got teams"
}
],
"https://parahumans.wordpress.com/2012/11/10/colony-15-8/": [
@@ -3408,6 +3408,10 @@
{
"before": "Bay,” Wanton said. “Not",
"after": "Bay,” Wanton said, “not"
+ },
+ {
+ "before": "Connecting to “agChat.ParahumansOnline016.par:6667” (Attempt 1 of 55)
\nResolving Host Name
\nConnecting…
\nConnected.
\nUsing identity “Iblis”, nick “Iblis”
\nWelcome to Parahumans Online Chatroom #116, ‘The Holdout’. Rules Here. Behave. Obey the @s.",
+ "after": "Connecting to “agChat.ParahumansOnline016.par:6667” (Attempt 1 of 55)
\nResolving Host Name
\nConnecting…
\nConnected.
\nUsing identity “Iblis”, nick “Iblis”
\nWelcome to Parahumans Online Chatroom #116, ‘The Holdout’. Rules Here. Behave. Obey the @s."
}
],
"https://parahumans.wordpress.com/2013/06/29/scarab-25-1/": [
@@ -4906,6 +4910,12 @@
"after": "—and from other horrors."
}
],
+ "https://www.parahumans.net/2018/05/22/torch-7-3/": [
+ {
+ "before": "by saying,”Isn’t it a rule",
+ "after": "by saying, “Isn’t it a rule"
+ }
+ ],
"https://www.parahumans.net/2018/05/26/torch-7-4/": [
{
"before": "⊙
",
@@ -5447,6 +5457,10 @@
{
"before": "then teacher can’t",
"after": "then Teacher can’t"
+ },
+ {
+ "before": "“Who is this?”a strange voice asked",
+ "after": "“Who is this?” a strange voice asked"
}
],
"https://www.parahumans.net/2018/09/13/gleaming-interlude-9-x/": [
@@ -5626,12 +5640,6 @@
"_comment": "See comment in https://www.parahumans.net/2018/09/29/gleaming-9-11/"
}
],
- "https://www.parahumans.net/2018/10/06/gleaming-9-13/": [
- {
- "before": "second.”Clarify.”",
- "after": "second. “Clarify.”"
- }
- ],
"https://www.parahumans.net/2018/10/09/gleaming-9-14/": [
{
"before": "in a fireman carry. with my free hand",
@@ -6383,6 +6391,10 @@
{
"before": "wasn’t mom’s whole",
"after": "wasn’t Mom’s whole"
+ },
+ {
+ "before": "“Fuck,”Capricorn said again",
+ "after": "“Fuck,” Capricorn said again"
}
],
"https://www.parahumans.net/2019/03/29/heavens-12-x/": [
@@ -6689,6 +6701,10 @@
{
"before": "ducked their heads down and hurried",
"after": "ducked their heads down and hurried."
+ },
+ {
+ "before": "in the way,”she said",
+ "after": "in the way,” she said"
}
],
"https://www.parahumans.net/2019/10/01/sundown-17-1/": [
@@ -6971,6 +6987,12 @@
"after": "Imp and the Heartbroken"
}
],
+ "https://www.parahumans.net/2020/02/23/infrared-19-z/": [
+ {
+ "before": "5’1 “",
+ "after": "5′1″"
+ }
+ ],
"https://www.parahumans.net/2020/02/25/last-20-1/": [
{
"before": "across this clearing. eyes, cameras",
From b20182ad0d283e1b0e71c2b377aa91717001ef71 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 20:21:26 -0500
Subject: [PATCH 066/186] Un-italicize various commas
---
lib/convert-worker.js | 1 +
lib/substitutions.json | 14 +++-----------
2 files changed, 4 insertions(+), 11 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index e796a84..531347b 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -197,6 +197,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/I”(m|ll)/g, "I’$1");
xml = xml.replace(/””<\/p>/g, "”");
xml = xml.replace(/^([^“]+?) ?”(?![ —<])/gm, "$1 “");
+ xml = xml.replace(/(?([A-Za-z]+),<\/em>(?!”| +[A-Za-z]+ thought)/, "$1,");
// Similar problems occur in Ward with and as do in Worm with s
xml = xml.replace(//g, "");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 3552166..854677e 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -956,7 +956,7 @@
"after": "CD-6."
},
{
- "before": "Narwhal,",
+ "before": "Narwhal,",
"after": "Narwhal,"
},
{
@@ -5677,10 +5677,6 @@
"before": "assistant Warden",
"after": "assistant warden"
},
- {
- "before": "behind those things was By,",
- "after": "behind those things was By,"
- },
{
"before": "What got into you?",
"after": "What got into you?"
@@ -5714,10 +5710,6 @@
{
"before": "finds it’s root",
"after": "finds its root"
- },
- {
- "before": "they’re Valkyrie, Crystal",
- "after": "they’re Valkyrie, Crystal"
}
],
"https://www.parahumans.net/2018/10/23/polarize-10-1/": [
@@ -7073,8 +7065,8 @@
"after": "Your issue with Dad"
},
{
- "before": "My boss is-was Narwhal, mom",
- "after": "My boss is—was Narwhal, Mom"
+ "before": "My boss is-was",
+ "after": "My boss is—was"
},
{
"before": "he mom shook her head",
From 21bded56503c6831119706ced69c40c251013511 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 20:27:22 -0500
Subject: [PATCH 067/186] 4.6.0
---
npm-shrinkwrap.json | 2 +-
package.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json
index aaea9a7..85e811f 100644
--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
@@ -1,6 +1,6 @@
{
"name": "worm-scraper",
- "version": "4.5.0",
+ "version": "4.6.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
diff --git a/package.json b/package.json
index 81d34a5..e6f658c 100644
--- a/package.json
+++ b/package.json
@@ -8,7 +8,7 @@
"parahuman",
"scraper"
],
- "version": "4.5.0",
+ "version": "4.6.0",
"author": "Domenic Denicola (https://domenic.me/)",
"license": "WTFPL",
"repository": "domenic/worm-scraper",
From d69484af8e8481c64a1fd38a266ec09dabadfca8 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Tue, 17 Nov 2020 10:39:03 -0500
Subject: [PATCH 068/186] Fix messed up chapters with text conversations
aea63ba05564b97580042213b8875e71bb6080bb introduced a bug where these chapters would contain ill-formed XML, and so at least some eBook readers would refuse to display them.
---
lib/convert-worker.js | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 531347b..7bb84db 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -207,7 +207,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/@<\/strong>/g, "@");
xml = xml.replace(/
(\s*)<\/strong>/g, "
$1");
xml = xml.replace(/(\s*)<\/strong>/g, "$1");
- xml = xml.replace(/>(.*)<\/strong>:$1:");
+ xml = xml.replace(/>(.*)<\/strong>:$1:<");
// No need for line breaks before paragraph ends
// These often occur with the
s inside /// fixed above.
From b113b240cc1ab2192a6098b9b0321e0fcb33e04f Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Tue, 17 Nov 2020 10:39:12 -0500
Subject: [PATCH 069/186] 4.6.1
---
npm-shrinkwrap.json | 2 +-
package.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json
index 85e811f..2821b4e 100644
--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
@@ -1,6 +1,6 @@
{
"name": "worm-scraper",
- "version": "4.6.0",
+ "version": "4.6.1",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
diff --git a/package.json b/package.json
index e6f658c..e240275 100644
--- a/package.json
+++ b/package.json
@@ -8,7 +8,7 @@
"parahuman",
"scraper"
],
- "version": "4.6.0",
+ "version": "4.6.1",
"author": "Domenic Denicola (https://domenic.me/)",
"license": "WTFPL",
"repository": "domenic/worm-scraper",
From abca01b1d630dbad8bd8a7891e28cf1467a18008 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 15:59:28 -0500
Subject: [PATCH 070/186] Fix a few incomplete or misplaced ellipses
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 7bb84db..dc50ab6 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -223,6 +223,9 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/ \.<\/p>/g, ".");
xml = xml.replace(/\.\.\./g, "…");
xml = xml.replace(/\.\. {2}/g, ". ");
+ xml = xml.replace(/\.\./g, "…");
+ xml = xml.replace(/(?/g, "");
From 121ab01243a14dc3e7f733af0d15dc39a86ca348 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 16:11:50 -0500
Subject: [PATCH 071/186] Spot fixes for Ward through Heavens 12.none
---
lib/convert-worker.js | 9 +++-
lib/substitutions.json | 100 +++++++++++++++++++++++++++++++++++++----
2 files changed, 98 insertions(+), 11 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index dc50ab6..a580dfd 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -436,14 +436,19 @@ function fixCapitalization(xml, book) {
// The Speedrunners team name is missing its capitalization a couple times.
xml = xml.replace(/speedrunners/g, "Speedrunners");
+ // Capitalization is inconsistent, but shard names seems to usually be capitalized.
+ xml = xml.replace(/Grasping self/g, "Grasping Self");
+ xml = xml.replace(/Cloven stranger/g, "Cloven Stranger");
+
// "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". "patrol
// group" is always lowercased. It seems like "Patrol" is a proper name, and is used as a capitalized modifier in
// other contexts (e.g. Patrol leader). So let's standardize on "Patrol ".
xml = xml.replace(/patrol (block|group|leader|guard|student|uniform|squad|soldier|officer|crew|girl|bus|training)/ig,
(_, $1) => `Patrol ${$1.toLowerCase()}`);
- // This always works in Ward and has a few false positives in Worm, where it is never needed:
+ // This usually works in Ward (some instances corrected back in substitutions.json), and has a few false positives in
+ // Worm, where it is never needed:
if (book === "ward") {
- xml = xml.replace(/the patrol/g, "the Patrol");
+ xml = xml.replace(/the patrol(?!s)/g, "the Patrol");
}
// This is sometimes missing its capitalization.
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 854677e..28f0485 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -1174,6 +1174,10 @@
{
"before": "hand at me, “She’s",
"after": "hand at me, “she’s"
+ },
+ {
+ "before": ", kay",
+ "after": ", ’kay"
}
],
"https://parahumans.wordpress.com/2012/03/27/extermination-8-8/": [
@@ -1689,10 +1693,6 @@
}
],
"https://parahumans.wordpress.com/2012/05/19/infestation-11-1/": [
- {
- "before": "was.. how",
- "after": "was… how"
- },
{
"before": "Dinah being kidnapped and leaving",
"after": "Dinah being kidnapped, and leaving"
@@ -3596,7 +3596,7 @@
],
"https://parahumans.wordpress.com/2013/08/10/interlude-26/": [
{
- "before": "variation...",
+ "before": "variation…. ",
"after": "variation…"
},
{
@@ -5421,10 +5421,6 @@
{
"before": "what it’s worth,” Tristan said. “Thanks for",
"after": "what it’s worth,” Tristan said, “thanks for"
- },
- {
- "before": "Apparently… perk of..",
- "after": "Apparently… perk of…"
}
],
"https://www.parahumans.net/2018/09/01/gleaming-9-4/": [
@@ -6335,6 +6331,30 @@
{
"before": "bounce off of the the people",
"after": "bounce off of the people"
+ },
+ {
+ "before": "a ‘us’",
+ "after": "an ‘us’"
+ },
+ {
+ "before": "keep going,” Their March says",
+ "after": "keep going,” their March says"
+ },
+ {
+ "before": "move, but Vista was, at least a little",
+ "after": "move, but Vista could, at least a little"
+ },
+ {
+ "before": "a fraction of a second her more time",
+ "after": "a fraction of a second more time"
+ },
+ {
+ "before": "let the charge grow",
+ "after": "lets the charge grow"
+ },
+ {
+ "before": "She imagines It would",
+ "after": "She imagines it would"
}
],
"https://www.parahumans.net/2019/03/05/heavens-12-5/": [
@@ -6345,6 +6365,18 @@
{
"before": "we didn’t resume the discussion",
"after": "we didn’t resume the discussion."
+ },
+ {
+ "before": "flashlight of the Patrol",
+ "after": "flashlight of the patrol"
+ },
+ {
+ "before": "ID’s",
+ "after": "IDs"
+ },
+ {
+ "before": "My mom asked",
+ "after": "my mom asked"
}
],
"https://www.parahumans.net/2019/03/09/heavens-12-6/": [
@@ -6355,12 +6387,42 @@
{
"before": "back pouch” my mom said",
"after": "back pouch,” my mom said"
+ },
+ {
+ "before": "protest—Two people held hostage",
+ "after": "protest—two people held hostage"
+ }
+ ],
+ "https://www.parahumans.net/2019/03/12/heavens-12-f/": [
+ {
+ "before": ", kay",
+ "after": ", ’kay"
+ },
+ {
+ "before": "thinking, the he brushed",
+ "after": "thinking, then he brushed"
}
],
"https://www.parahumans.net/2019/03/16/heavens-12-7/": [
{
"before": "without knowing. by Love Lost",
"after": "without knowing. By Love Lost"
+ },
+ {
+ "before": "The man-though he",
+ "after": "The man—though he"
+ },
+ {
+ "before": "a chance it might never.",
+ "after":"a chance it might never stop."
+ },
+ {
+ "before": "wrench a weapon free, Curved blades that",
+ "after": "wrench a weapon free. Curved blades that"
+ },
+ {
+ "before": "saw the group that standing tallest",
+ "after": "saw the group that was standing tallest"
}
],
"https://www.parahumans.net/2019/03/19/heavens-12-8/": [
@@ -6373,6 +6435,18 @@
{
"before": "Harbinger two was out and Harbinger one",
"after": "Harbinger Two was out and Harbinger One"
+ },
+ {
+ "before": "pizz dispenser",
+ "after": "Pez dispenser"
+ },
+ {
+ "before": "background keeping Crete",
+ "after": "background keeping Cretan"
+ },
+ {
+ "regExp": " Mech",
+ "replacement": " mech"
}
],
"https://www.parahumans.net/2019/03/26/heavens-12-none/": [
@@ -6387,6 +6461,14 @@
{
"before": "“Fuck,”Capricorn said again",
"after": "“Fuck,” Capricorn said again"
+ },
+ {
+ "before": "With your friends, your teachers the places you love",
+ "after": "With your friends, your teachers, the places you love"
+ },
+ {
+ "before": "every earth",
+ "after": "every Earth"
}
],
"https://www.parahumans.net/2019/03/29/heavens-12-x/": [
From 3284c04f8b6b42023192e6e556d26f9febc303b9 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 16:14:35 -0500
Subject: [PATCH 072/186] Always capitalize "Dauntless Titan"
---
lib/convert-worker.js | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index a580dfd..9aa7cbd 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -436,6 +436,10 @@ function fixCapitalization(xml, book) {
// The Speedrunners team name is missing its capitalization a couple times.
xml = xml.replace(/speedrunners/g, "Speedrunners");
+ // Dauntless is a cape name. The majority of the time "Dauntless Titan" is fully capitalized, but either word
+ // sometimes is missing its capitalization.
+ xml = xml.replace(/dauntless titan/ig, "Dauntless Titan");
+
// Capitalization is inconsistent, but shard names seems to usually be capitalized.
xml = xml.replace(/Grasping self/g, "Grasping Self");
xml = xml.replace(/Cloven stranger/g, "Cloven Stranger");
From 06c7b3adf230fa094bfb641fd2a30660686e483e Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 16:22:29 -0500
Subject: [PATCH 073/186] Standardize on "Dragon-craft" and "Dragon-mech"
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 9aa7cbd..59094bc 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -562,6 +562,9 @@ function standardizeSpellings(xml) {
// "gray" is the majority spelling, except for "greyhound"
xml = xml.replace(/(G|g)rey(?!hound)/g, "$1ray");
+ // 12 instances of "Dragon-craft", 12 instances of "Dragon craft", 1 instance of "dragon craft"
+ xml = xml.replace(/[Dd]ragon[ -](craft|mech)/g, "Dragon-$1");
+
return xml;
}
From 60709f54c2366b75054e46e7991dd0b15d446c5c Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 16:32:08 -0500
Subject: [PATCH 074/186] Always capitalize "Nazi"
Also fix hyphenation and capitalization around "neo-Nazi".
---
lib/convert-worker.js | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 59094bc..e12de7d 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -505,6 +505,12 @@ function fixCapitalization(xml, book) {
// place name. So we remove the capitalization in the few places where it does appear.
xml = xml.replace(/Wardens’ Headquarters/g, "Wardens’ headquarters");
+ // Some style guides try to reserve capitalized "Nazi" for historical discussions of members of the Nazi party. This
+ // seems fuzzy when it comes to phrases like "neo-Nazi", and doesn't seem to be what the author is doing; the books
+ // are just plain inconsistent. So, let's standardize on always uppercasing.
+ xml = xml.replace(/(?
Date: Mon, 23 Nov 2020 16:37:29 -0500
Subject: [PATCH 075/186] =?UTF-8?q?Use=20apostrophes=20instead=20of=20open?=
=?UTF-8?q?ing=20quotes=20for=20=E2=80=99kay?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
lib/convert-worker.js | 1 +
lib/substitutions.json | 6 +-----
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index e12de7d..74c9344 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -198,6 +198,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/””<\/p>/g, "”");
xml = xml.replace(/^([^“]+?) ?”(?![ —<])/gm, "$1 “");
xml = xml.replace(/(?([A-Za-z]+),<\/em>(?!”| +[A-Za-z]+ thought)/, "$1,");
+ xml = xml.replace(/‘([Kk])ay(?!’)/g, "’$1ay");
// Similar problems occur in Ward with and as do in Worm with s
xml = xml.replace(//g, "");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 28f0485..385a81b 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -346,7 +346,7 @@
"after": "in point: shazam"
},
{
- "before": "‘Kay",
+ "before": "’Kay",
"after": "’kay"
}
],
@@ -405,10 +405,6 @@
"before": "guardian spoke, “You’d",
"after": "guardian spoke. “You’d"
},
- {
- "before": "‘Kay",
- "after": "’Kay"
- },
{
"before": "‘specially with",
"after": "’Specially with"
From 53f7307daa10fca3b08ccb61817a17631c162d40 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 16:49:50 -0500
Subject: [PATCH 076/186] Capitalize "English" but de-capitalize "english
muffin"
Also lowercase one instance of "french toast"
---
lib/convert-worker.js | 6 ++++++
lib/substitutions.json | 4 ++++
2 files changed, 10 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 74c9344..cf14d7a 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -512,6 +512,12 @@ function fixCapitalization(xml, book) {
xml = xml.replace(/(?
Date: Mon, 23 Nov 2020 16:53:07 -0500
Subject: [PATCH 077/186] =?UTF-8?q?Fix=20capitalization=20and=20apostrophe?=
=?UTF-8?q?s=20for=20=E2=80=99Lace?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index cf14d7a..395cf0e 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -289,6 +289,9 @@ function fixTruncatedWords(xml) {
// Short for "Contender"
xml = xml.replace(/[‘’][Tt]end(?![a-z])/g, "’Tend");
+ // Short for "Anelace"
+ xml = xml.replace(/[‘’][Ll]ace(?![a-z])/g, "’Lace");
+
return xml;
}
From 22fbff008e956fbaf19e2cf02c893db24c5e2a4d Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 16:57:05 -0500
Subject: [PATCH 078/186] Capitalize a few more alternate "Earth"s
---
lib/substitutions.json | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index c558abc..31fce07 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4130,6 +4130,10 @@
{
"before": "resulting crater that compared with",
"after": "resulting crater compared with"
+ },
+ {
+ "before": "on the other earth",
+ "after": "on the other Earth"
}
],
"https://parahumans.wordpress.com/2013/10/29/30-7/": [
@@ -6543,6 +6547,16 @@
"after": "zero—a circle made with her full hand—three"
}
],
+ "https://www.parahumans.net/2019/04/30/black-13-x/": [
+ {
+ "before": "technology from another earth",
+ "after": "technology from another Earth"
+ },
+ {
+ "before": "from yet another earth",
+ "after": "from yet another Earth"
+ }
+ ],
"https://www.parahumans.net/2019/05/07/black-13-10/": [
{
"before": "the interior of this one was quiet-, I could",
@@ -6567,6 +6581,12 @@
"after": "actively tamper with it"
}
],
+ "https://www.parahumans.net/2019/05/28/breaking-14-4/": [
+ {
+ "before": "Once Goddess’s earth",
+ "after": "Once Goddess’s Earth"
+ }
+ ],
"https://www.parahumans.net/2019/06/04/breaking-14-6/": [
{
"before": "and the the undo button",
From af0306422183271f899b9c228fee40830e58173c Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 17:00:33 -0500
Subject: [PATCH 079/186] =?UTF-8?q?Standardize=20on=20"Crock=20o=E2=80=99?=
=?UTF-8?q?=20Shit"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 395cf0e..1f622c1 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -342,6 +342,9 @@ function standardizeNames(xml) {
// 25 instances of "Amias" to 3 of "Amais"
xml = xml.replace(/Amais/g, "Amias");
+ // Earlier chapters have a space; later ones do not. They're separate words, so side with the earlier chapters.
+ xml = xml.replace(/Crock o[‘’]Shit/g, "Crock o’ Shit");
+
return xml;
}
From 2eafeee8146ed5feaa3cc7c836e3a26d6917deb2 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 17:49:17 -0500
Subject: [PATCH 080/186] Fix possessive of Semiramis
---
lib/convert-worker.js | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 1f622c1..a2a38ac 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -403,7 +403,7 @@ function fixPossessives(xml) {
// Note: if the "s" is unvoiced, as in Marquis, then it doesn't get the second "s".
xml = xml.replace(
// eslint-disable-next-line max-len
- /(?
Date: Mon, 23 Nov 2020 17:50:24 -0500
Subject: [PATCH 081/186] Spot fixes for Ward through Breaking 14.z
---
lib/substitutions.json | 266 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 258 insertions(+), 8 deletions(-)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 31fce07..e5dcbc8 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2819,6 +2819,10 @@
{
"before": "fight,” Tattletale said. “And",
"after": "fight,” Tattletale said, “and"
+ },
+ {
+ "before": "earth-shaking",
+ "after": "earthshaking"
}
],
"https://parahumans.wordpress.com/2013/02/12/queen-18-8/": [
@@ -6423,6 +6427,10 @@
{
"before": "saw the group that standing tallest",
"after": "saw the group that was standing tallest"
+ },
+ {
+ "before": "T’was",
+ "after": "’Twas"
}
],
"https://www.parahumans.net/2019/03/19/heavens-12-8/": [
@@ -6491,6 +6499,66 @@
{
"before": "morning breath—more than morning breath-, but there",
"after": "morning breath—more than morning breath—but there"
+ },
+ {
+ "before": "Previous Chapter Next Chapter
\n\n\n\n\n\n\n\n\n\n\n\n",
+ "after": "
\n
",
+ "_comment": "This is the best way I can think of to emulate the end of chapter 'fake out' in an ebook format"
+ },
+ {
+ "before": "Can we—Can we do my thing",
+ "after": "Can we— Can we do my thing"
+ },
+ {
+ "before": "Let me through!",
+ "after": "Let me through!"
+ },
+ {
+ "before": "This? That?",
+ "after": "This? That?"
+ },
+ {
+ "before": "more saccharine, “You don’t",
+ "after": "more saccharine, “you don’t"
+ },
+ {
+ "before": "Damsel told Antares, “Think again",
+ "after": "Damsel told Antares, “think again"
+ }
+ ],
+ "https://www.parahumans.net/2019/04/02/black-13-1/": [
+ {
+ "before": "⊙
\nPrevious Chapter Next Chapter
\n",
+ "after": "",
+ "_comment": "Our usual heuristics of removing the first paragraph to remove the previous/next chapter links are broken here because of the 'go back and look at the fake out' comment at the top"
+ },
+ {
+ "before": "walls stone and wood. the building",
+ "after": "walls stone and wood. The building"
+ },
+ {
+ "before": "he asked",
+ "after": "he asked."
+ },
+ {
+ "before": "—and my costume leggings",
+ "after": "—And my costume leggings"
+ },
+ {
+ "before": "I told Rain, “Is a goodbye.",
+ "after": "I told Rain, “is a goodbye."
+ },
+ {
+ "before": "fiancee",
+ "after": "fiancée"
+ },
+ {
+ "before": "Signal fire",
+ "after": "Signal Fire"
+ },
+ {
+ "before": "“It’s winter,’",
+ "after": "“It’s winter,”"
}
],
"https://www.parahumans.net/2019/04/09/black-13-3/": [
@@ -6501,40 +6569,66 @@
{
"before": "pound of flesh, at least—!”",
"after": "pound of flesh, at least—!”"
- }
- ],
- "https://www.parahumans.net/2019/04/02/black-13-1/": [
- {
- "before": "walls stone and wood. the building",
- "after": "walls stone and wood. The building"
},
{
- "before": "he asked",
- "after": "he asked."
+ "before": "…Kind of hate hospitals",
+ "after": "…kind of hate hospitals"
+ },
+ {
+ "before": "—For them!",
+ "after": "—for them!"
}
],
"https://www.parahumans.net/2019/04/13/black-13-4/": [
{
"before": "wiped—or perhaps struck- clean.",
"after": "wiped—or perhaps struck—clean."
+ },
+ {
+ "before": "—Because it’s a dress",
+ "after": "—because it’s a dress"
+ },
+ {
+ "before": "Bam!",
+ "after": "Bam!"
}
],
"https://www.parahumans.net/2019/04/16/black-13-5/": [
{
"before": "For some of the heartbroken",
"after": "For some of the Heartbroken"
+ },
+ {
+ "before": "agent-parahuman",
+ "after": "agent–parahuman"
}
],
"https://www.parahumans.net/2019/04/20/black-13-6/": [
{
"before": "I’m a Master, right, you",
"after": "I’m a master, right, you"
+ },
+ {
+ "before": "been studying, woo!",
+ "after": "been studying, woo!"
+ },
+ {
+ "before": "Kenz-Lookout",
+ "after": "Kenz—Lookout"
}
],
"https://www.parahumans.net/2019/04/23/black-13-7/": [
{
"before": "looked like they were were painted on",
"after": "looked like they were painted on"
+ },
+ {
+ "before": "—My friend",
+ "after": "—my friend"
+ },
+ {
+ "before": "Fucking what? When? How bad",
+ "after": "Fucking what? When? How bad"
}
],
"https://www.parahumans.net/2019/04/27/black-13-8/": [
@@ -6545,6 +6639,10 @@
{
"before": "zero—a circle made with her full hand-, three",
"after": "zero—a circle made with her full hand—three"
+ },
+ {
+ "before": "what the Wedge is or where",
+ "after": "what the wedge is or where"
}
],
"https://www.parahumans.net/2019/04/30/black-13-x/": [
@@ -6555,30 +6653,106 @@
{
"before": "from yet another earth",
"after": "from yet another Earth"
+ },
+ {
+ "before": "in that a sea of yellow",
+ "after": "in that sea of yellow"
+ },
+ {
+ "regExp": "cell five",
+ "replacement": "Cell Five"
+ },
+ {
+ "regExp": "cell eleven",
+ "replacement": "Cell Eleven"
+ },
+ {
+ "regExp": "cell nineteen",
+ "replacement": "Cell Nineteen"
+ },
+ {
+ "before": "there were always some overhead",
+ "after": "there were always some over head"
+ },
+ {
+ "before": "one of the Theocrats",
+ "after": "one of the theocrats"
+ }
+ ],
+ "https://www.parahumans.net/2019/05/04/black-13-9/": [
+ {
+ "before": "thing is,” Tattletale said. “What did",
+ "after": "thing is,” Tattletale said, “what did"
+ },
+ {
+ "before": "jacakss",
+ "after": "jackass"
+ },
+ {
+ "before": "whether you invite me or not, it’s understandable if you don’t you’re going to want",
+ "after": "whether you invite me or not, it’s understandable if you don’t, you’re going to want"
}
],
"https://www.parahumans.net/2019/05/07/black-13-10/": [
{
"before": "the interior of this one was quiet-, I could",
"after": "the interior of this one was quiet—I could"
+ },
+ {
+ "before": "‘why didn’t Imp back us up’?",
+ "after": "‘why didn’t Imp back us up?’"
}
],
"https://www.parahumans.net/2019/05/11/black-13-11/": [
{
"before": "been scummy and and tried",
"after": "been scummy and tried"
+ },
+ {
+ "before": "—And I wanted to bring it up",
+ "after": "—and I wanted to bring it up"
+ },
+ {
+ "before": "—Because I could have pushed harder",
+ "after": "—because I could have pushed harder"
+ }
+ ],
+ "https://www.parahumans.net/2019/05/14/13-z/": [
+ {
+ "before": "and they were pro-irregular",
+ "after": "and they were pro-Irregular"
}
],
"https://www.parahumans.net/2019/05/18/breaking-14-1/": [
{
"before": "face—ridiculous notion but still-, if I wrote him a note or sent him a text—less ridiculous notion-, saying",
"after": "face—ridiculous notion but still—if I wrote him a note or sent him a text—less ridiculous notion—saying"
+ },
+ {
+ "before": "Instead a 1,—2, 4",
+ "after": "Instead a 1, −2, 4"
}
],
"https://www.parahumans.net/2019/05/25/breaking-14-3/": [
{
"before": "actively tamper with it it",
"after": "actively tamper with it"
+ },
+ {
+ "before": "to reclaim her throne, and I.”",
+ "after": "to reclaim her throne, and I—”"
+ },
+ {
+ "before": "A teacher plot",
+ "after": "A Teacher plot"
+ },
+ {
+ "before": "Your own mother just went",
+ "after": "your own mother just went"
+ },
+ {
+ "before": "Ashley stared int the",
+ "after": "Ashley stared into the"
}
],
"https://www.parahumans.net/2019/05/28/breaking-14-4/": [
@@ -6587,6 +6761,16 @@
"after": "Once Goddess’s Earth"
}
],
+ "https://www.parahumans.net/2019/06/01/breaking-14-5/": [
+ {
+ "before": "his wife was dressed in patching colors",
+ "after": "his wife was dressed in matching colors"
+ },
+ {
+ "before": "Luis said. “he saw a lot of",
+ "after": "Luis said. “He saw a lot of"
+ }
+ ],
"https://www.parahumans.net/2019/06/04/breaking-14-6/": [
{
"before": "and the the undo button",
@@ -6603,6 +6787,18 @@
{
"before": "or innovators—scientists-, they say",
"after": "or innovators—scientists—they say"
+ },
+ {
+ "before": "PHD",
+ "after": "PhD"
+ },
+ {
+ "before": "If I’m asking?",
+ "after": "If I’m asking?"
+ },
+ {
+ "before": "Isn’t it?",
+ "after": "Isn’t it?"
}
],
"https://www.parahumans.net/2019/06/08/breaking-14-7/": [
@@ -6613,18 +6809,60 @@
{
"before": "you’re talking syncope—fainting-, arrythmia,",
"after": "you’re talking syncope—fainting, arrythmia,"
+ },
+ {
+ "before": "inter-earth",
+ "after": "inter-Earth"
+ },
+ {
+ "before": "‘can‘t’",
+ "after": "‘can’t’"
+ },
+ {
+ "before": "The Western of the two half-castles",
+ "after": "The western of the two half-castles"
+ }
+ ],
+ "https://www.parahumans.net/2019/06/11/breaking-14-8/": [
+ {
+ "before": "the conditions weren’t great-it was cold enough",
+ "after": "the conditions weren’t great—it was cold enough"
+ },
+ {
+ "before": "pick up line",
+ "after": "pickup line"
+ },
+ {
+ "before": "got me,” Rain said. “Was that",
+ "after": "got me,” Rain said, “was that"
+ },
+ {
+ "before": "—Ask for some strings",
+ "after": "—ask for some strings"
}
],
"https://www.parahumans.net/2019/06/15/breaking-14-9/": [
{
"before": "You stay away from mom, you stay away from dad",
"after": "You stay away from Mom, you stay away from Dad"
+ },
+ {
+ "before": "‘fuck this, not hitching a ride with—this-‘",
+ "after": "‘fuck this, not hitching a ride with this’"
}
],
"https://www.parahumans.net/2019/06/18/breaking-14-10/": [
{
"before": "in a real fight-, but in",
"after": "in a real fight—but in"
+ },
+ {
+ "before": "fall to the mattress, “Let",
+ "after": "fall to the mattress, “let"
+ },
+ {
+ "before": "I—Only if you hold onto it.",
+ "after": "I— Only if you hold onto it."
}
],
"https://www.parahumans.net/2019/06/22/breaking-14-11/": [
@@ -6649,6 +6887,14 @@
{
"before": "-That was us—the message on my display read.—cuz guards—",
"after": "-That was us- the message on my display read. -cuz guards-"
+ },
+ {
+ "before": "blinfd",
+ "after": "blind"
+ },
+ {
+ "before": "This? It’s stupid politics",
+ "after": "This? It’s stupid politics"
}
],
"https://www.parahumans.net/2019/06/29/breaking-14-z/": [
@@ -6663,6 +6909,10 @@
{
"before": "allusion to Master–stranger protocols",
"after": "allusion to master–stranger protocols"
+ },
+ {
+ "before": "I dealt with ‘m",
+ "after": "I dealt with ’m"
}
],
"https://www.parahumans.net/2019/07/02/dying-15-a/": [
From 16ef1836da085e1a329d165787bef5cb7be7cf83 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 18:35:10 -0500
Subject: [PATCH 082/186] Hyphenate spelled out numbers from 11 through 99
---
lib/convert-worker.js | 6 ++++++
lib/substitutions.json | 10 +++++-----
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index a2a38ac..069d369 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -543,6 +543,12 @@ function fixHyphens(xml) {
xml = xml.replace(/(\w+)[ -]year[ -]old(s?)(?!\w)/g, "$1-year-old$2");
xml = xml.replace(/(\w+) or (\w+)-year-old/g, "$1- or $2-year-old");
+ // Compound numbers must be hyphenated.
+ xml = xml.replace(
+ /(?here? Now?",
"after": "here? Now?"
@@ -1886,7 +1882,7 @@
"after": "my ‘candy’"
},
{
- "before": "Thirty Nine point",
+ "before": "Thirty-Nine point",
"after": "Thirty-nine point"
},
{
@@ -3516,6 +3512,10 @@
{
"before": "a burst of alarm.",
"after": "a burst of alarm."
+ },
+ {
+ "before": "Up from Eighty-three point four percent",
+ "after": "Up from eighty-three point four percent"
}
],
"https://parahumans.wordpress.com/2013/07/23/sting-26-3/": [
From b6135149943cafe39ef980a856cd6d6022f1f06f Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 18:58:56 -0500
Subject: [PATCH 083/186] Remove hyphenation around "hundred" and "percent"
---
lib/convert-worker.js | 5 ++++-
lib/substitutions.json | 16 ++++++++++++++++
2 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 069d369..f4622e9 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -543,11 +543,14 @@ function fixHyphens(xml) {
xml = xml.replace(/(\w+)[ -]year[ -]old(s?)(?!\w)/g, "$1-year-old$2");
xml = xml.replace(/(\w+) or (\w+)-year-old/g, "$1- or $2-year-old");
- // Compound numbers must be hyphenated.
+ // Compound numbers from 11 through 99 must be hyphenated, but others should not be.
xml = xml.replace(
/(?
Date: Mon, 23 Nov 2020 19:04:28 -0500
Subject: [PATCH 084/186] Always capitalize "the Bunker" in Ward
---
lib/convert-worker.js | 4 ++++
lib/substitutions.json | 8 ++++++++
2 files changed, 12 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index f4622e9..6f6d4e1 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -501,6 +501,10 @@ function fixCapitalization(xml, book) {
xml = xml.replace(/(Norwalk|Fenway|Stratford) station/g, "$1 Station");
xml = xml.replace(/the megalopolis/g, "the Megalopolis");
xml = xml.replace(/earths(?![a-z])/g, "Earths");
+ if (book === "ward") {
+ xml = xml.replace(/the bunker/g, "the Bunker");
+ xml = xml.replace(/‘bunker’/g, "‘Bunker’");
+ }
// "Mom" and "Dad" should be capitalized when used as a proper name. These regexps are tuned to catch a good amount of
// instances, without over-correcting for non-proper-name-like cases. Many other instances are handled in
diff --git a/lib/substitutions.json b/lib/substitutions.json
index a9efd6a..e1290cc 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -6731,6 +6731,14 @@
{
"before": "—Because I could have pushed harder",
"after": "—because I could have pushed harder"
+ },
+ {
+ "before": "the Wardens’ bunker",
+ "after": "the Wardens’ Bunker"
+ },
+ {
+ "before": "the more distant bunker itself",
+ "after": "the more distant Bunker itself"
}
],
"https://www.parahumans.net/2019/05/14/13-z/": [
From ea19dbb6c511f28af7f83d12a06e6456d2f8b53a Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 19:09:40 -0500
Subject: [PATCH 085/186] Standardize on "Jotun" instead of "Jotunn"
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 6f6d4e1..ebdd2bb 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -345,6 +345,9 @@ function standardizeNames(xml) {
// Earlier chapters have a space; later ones do not. They're separate words, so side with the earlier chapters.
xml = xml.replace(/Crock o[‘’]Shit/g, "Crock o’ Shit");
+ // 5 instances of "Jotun" to 2 of "Jotunn"
+ xml = xml.replace(/Jotunn/g, "Jotun");
+
return xml;
}
From 1da99791d37d28e46dfa3ab80a2002e99269afe0 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 19:30:38 -0500
Subject: [PATCH 086/186] Replace hyphen-minus with em dash when preceding a
question mark
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index ebdd2bb..72a73c2 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -367,6 +367,7 @@ function fixEmDashes(xml) {
xml = xml.replace(/\s+—”/g, "—”");
xml = xml.replace(/I-I/g, "I—I");
xml = xml.replace(/I-uh/g, "I—uh");
+ xml = xml.replace(/-\?/g, "—?");
return xml;
}
From 6751136becbf9859bd2fb6d50080c69e8ae9bef3 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 19:33:27 -0500
Subject: [PATCH 087/186] Fix some instances of the possessive of Marquis
---
lib/convert-worker.js | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 72a73c2..dd91449 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -403,14 +403,16 @@ function enDashJointNames(xml) {
}
function fixPossessives(xml) {
- // Fix possessive of names ending in "s"
- // Note: if the "s" is unvoiced, as in Marquis, then it doesn't get the second "s".
+ // Fix possessive of names ending in "s".
xml = xml.replace(
// eslint-disable-next-line max-len
/(?
Date: Mon, 23 Nov 2020 19:35:48 -0500
Subject: [PATCH 088/186] Always hyphenate self-conscious and derivatives
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index dd91449..0ecad93 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -571,6 +571,7 @@ function fixHyphens(xml) {
xml = xml.replace(/creepy crawl/g, "creepy-crawl");
xml = xml.replace(/well armed/g, "well-armed");
xml = xml.replace(/able bodied/g, "able-bodied");
+ xml = xml.replace(/self conscious/g, "self-conscious");
// This is usually correct but sometimes wrong.
xml = xml.replace(/neo /g, "neo-");
From 3ec6e36e34c2ec24a976fa4c3010c8d46715f9da Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Mon, 23 Nov 2020 19:39:04 -0500
Subject: [PATCH 089/186] Un-capitalize "Church" when appropriate
---
lib/substitutions.json | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index e1290cc..869172f 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -1808,6 +1808,10 @@
{
"before": "asinine confession again, “It’s",
"after": "asinine confession again. “It’s"
+ },
+ {
+ "before": "people from the Church",
+ "after": "people from the church"
}
],
"https://parahumans.wordpress.com/2012/06/16/interlude-11a/": [
@@ -4631,6 +4635,10 @@
{
"before": "hair he had. it was long",
"after": "hair he had. It was long"
+ },
+ {
+ "before": "about you and Church",
+ "after": "about you and church"
}
],
"https://www.parahumans.net/2018/02/24/shadow-5-1/": [
@@ -4916,6 +4924,12 @@
"after": "the junior captain said"
}
],
+ "https://www.parahumans.net/2018/05/15/torch-7-1/": [
+ {
+ "before": "got to go to Church",
+ "after": "got to go to church"
+ }
+ ],
"https://www.parahumans.net/2018/05/19/torch-7-2/": [
{
"before": "you had to deal with with could have",
@@ -6863,6 +6877,10 @@
{
"before": "—Ask for some strings",
"after": "—ask for some strings"
+ },
+ {
+ "before": "for the needy for Church events",
+ "after": "for the needy for church events"
}
],
"https://www.parahumans.net/2019/06/15/breaking-14-9/": [
@@ -7197,6 +7215,10 @@
{
"before": "Couple of of times a month",
"after": "Couple of times a month"
+ },
+ {
+ "before": "This would be the Church.",
+ "after": "This would be the church."
}
],
"https://www.parahumans.net/2019/12/28/infrared-19-2/": [
From de83a47bfa3c05d48be14daa4f607e2e93811c40 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 15:06:43 -0500
Subject: [PATCH 090/186] Spot fixes through Dying 15.z
---
lib/convert-worker.js | 1 +
lib/substitutions.json | 221 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 222 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 0ecad93..d427070 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -71,6 +71,7 @@ function getBodyXML(chapter, book, contentEl) {
// Worm uses 30px; Ward mostly uses 40px but sometimes uses 30px/60px. Let's standardize on 30px.
if (style === "text-align:left;padding-left:30px;" ||
style === "text-align: left;padding-left: 40px;" ||
+ style === "text-align: left; padding-left: 40px;" ||
style === "padding-left: 40px;") {
child.setAttribute("style", "padding-left: 30px;");
}
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 869172f..b57c532 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -5473,6 +5473,10 @@
{
"before": "The patrol had done its work",
"after": "The Patrol had done its work"
+ },
+ {
+ "before": "guard confirmed as Thralls",
+ "after": "guard confirmed as thralls"
}
],
"https://www.parahumans.net/2018/09/11/gleaming-9-7/": [
@@ -6325,6 +6329,10 @@
{
"before": "Harbingers?",
"after": "Harbingers?"
+ },
+ {
+ "before": "“Thing one, thing two",
+ "after": "“Thing One, Thing Two"
}
],
"https://www.parahumans.net/2019/02/19/interlude-12-e/": [
@@ -6965,6 +6973,14 @@
{
"before": "scary. that’s all",
"after": "scary. That’s all"
+ },
+ {
+ "before": "At Anita’s assistance",
+ "after": "At Anita’s insistence"
+ },
+ {
+ "before": "kanji for ‘Shi’",
+ "after": "kanji for ‘shi’"
}
],
"https://www.parahumans.net/2019/07/06/dying-15-1/": [
@@ -6975,42 +6991,162 @@
{
"before": "The blond heartbroken",
"after": "The blond Heartbroken"
+ },
+ {
+ "before": "straight A’s",
+ "after": "straight As"
}
],
"https://www.parahumans.net/2019/07/09/dying-15-2/": [
{
"before": "I told Precipice",
"after": "I told Precipice."
+ },
+ {
+ "before": "Guys’ gotta be a thirteen",
+ "after": "Guy’s gotta be a thirteen"
+ },
+ {
+ "before": "been sitting her for",
+ "after": "been sitting here for"
+ },
+ {
+ "before": "a body torn two",
+ "after": "a body torn in two"
+ },
+ {
+ "before": "it had eben damaged",
+ "after": "it had been damaged"
+ },
+ {
+ "before": "dodge that was fast moving",
+ "after": "dodge something that fast-moving"
+ },
+ {
+ "before": "done so far, and crippling",
+ "after": "done so far, crippling"
+ },
+ {
+ "before": "‘hedrons",
+ "after": "’hedrons"
+ },
+ {
+ "before": "post its",
+ "after": "Post-its"
}
],
"https://www.parahumans.net/2019/07/13/dying-15-3/": [
{
"before": "Didn’t ask about mom, dad, or",
"after": "Didn’t ask about Mom, Dad, or"
+ },
+ {
+ "before": "—And we’re heading upstairs",
+ "after": "—and we’re heading upstairs"
}
],
"https://www.parahumans.net/2019/07/16/dying-15-4/": [
{
"before": "There was was no ‘good’",
"after": "There was no ‘good’"
+ },
+ {
+ "before": "was a H turned",
+ "after": "was an H turned"
+ },
+ {
+ "before": "Needles receded as the entered",
+ "after": "Needles receded as they entered"
+ },
+ {
+ "before": "flanking help other teams",
+ "after": "flanking to help other teams"
+ },
+ {
+ "before": "water off a ducks’ back",
+ "after": "water off a duck’s back"
+ },
+ {
+ "before": "the damage segments around",
+ "after": "the damaged segments around"
+ },
+ {
+ "before": "when we’re done this mission",
+ "after": "when we’re done with this mission"
+ },
+ {
+ "before": "If you take the current device with you can use it again",
+ "after": "If you take the current device with, you can use it again"
+ },
+ {
+ "before": "one remaining Thrall",
+ "after": "one remaining thrall"
}
],
"https://www.parahumans.net/2019/07/20/dying-15-5/": [
{
"before": "worth of of water",
"after": "worth of water"
+ },
+ {
+ "before": "mind controlling",
+ "after": "mind-controlling"
+ },
+ {
+ "before": "range of the Halo",
+ "after": "range of the halo"
+ },
+ {
+ "before": "tear up the Haloed angel",
+ "after": "tear up the haloed angel"
+ },
+ {
+ "before": "her power helping, to keep her there instead of falling",
+ "after": "her power helping to keep her there instead of falling"
+ },
+ {
+ "before": "the time manipulating tinkers",
+ "after": "the time-manipulating tinkers"
}
],
"https://www.parahumans.net/2019/07/23/dying-15-6/": [
{
"before": "The teams had finished picking",
"after": "The teams had finished picking."
+ },
+ {
+ "before": "no wifi",
+ "after": "no Wi-Fi"
+ },
+ {
+ "before": "a of linked metal segments",
+ "after": "a ??? of linked metal segments"
+ },
+ {
+ "before": "Things one through whatever",
+ "after": "Things One through whatever"
+ },
+ {
+ "before": "close to identical powersets",
+ "after": "close-to-identical powersets"
+ },
+ {
+ "before": "distracting you, Lookout, I wrote.",
+ "after": "distracting you, Lookout, I wrote."
+ },
+ {
+ "before": "The second they were out jailer’s door",
+ "after": "The second they were out of the jailer’s door"
}
],
"https://www.parahumans.net/2019/07/27/dying-15-7/": [
{
"before": "stranger or Master in",
"after": "stranger or master in"
+ },
+ {
+ "before": "d—! _ ! d—",
+ "after": "d- ! _ ! d-"
}
],
"https://www.parahumans.net/2019/07/30/dying-15-8/": [
@@ -7021,6 +7157,85 @@
{
"before": "I got a glimpse. it’s fucking with me",
"after": "I got a glimpse. It’s fucking with me"
+ },
+ {
+ "before": "What? But Cassie.",
+ "after": "What? But Cassie."
+ },
+ {
+ "before": "‘aunt Rachel‘s’",
+ "after": "‘Aunt Rachel’s’"
+ },
+ {
+ "before": "Letting teacher go, knowing two members",
+ "after": "Letting Teacher go, knowing two members"
+ },
+ {
+ "before": "using then to help hold up",
+ "after": "using them to help hold up"
+ }
+ ],
+ "https://www.parahumans.net/2019/08/03/dying-15-x/": [
+ {
+ "before": "The Halo protected against",
+ "after": "The halo protected against"
+ },
+ {
+ "before": "Normally teacher granted something",
+ "after": "Normally Teacher granted something"
+ },
+ {
+ "before": "Carbon Dioxide bubbles manifesting",
+ "after": "Carbon dioxide bubbles manifesting"
+ },
+ {
+ "before": "got the Thrall Commander, right",
+ "after": "got the thrall commander, right"
+ },
+ {
+ "before": "I could ask these questions. Why?",
+ "after": "I could ask these questions. Why?"
+ },
+ {
+ "before": "the Yàngbǎn supported squad",
+ "after": "the Yàngbǎn-supported squad"
+ },
+ {
+ "before": "radius in you systems",
+ "after": "radius in your systems"
+ }
+ ],
+ "https://www.parahumans.net/2019/08/06/dying-15-y/": [
+ {
+ "before": "little miss Webb’s way",
+ "after": "little Miss Webb’s way"
+ }
+ ],
+ "https://www.parahumans.net/2019/08/10/dying-15-z/": [
+ {
+ "before": "All around her, Thralls paid her their worship",
+ "after": "All around her, thralls paid her their worship"
+ },
+ {
+ "before": "almost floated almost three hundred thousand",
+ "after": "almost floated amongst three hundred thousand"
+ },
+ {
+ "before": "parahuman’s hackers",
+ "after": "parahumans’ hackers"
+ },
+ {
+ "before": "the room, and them him",
+ "after": "the room, and then him"
+ },
+ {
+ "before": "transcendant",
+ "after": "transcendent"
+ },
+ {
+ "before": "The one who was slicing up the supporting walls ground rumbled, the floor twisted, the house-sized turbine in the center of the room lost some of its foundation",
+ "after": "The one who was slicing up the supporting walls—\nThe ground rumbled, the floor twisted, the house-sized turbine in the center of the room lost some of its foundation",
+ "_comment": "This paragraph break probably wasn't what was intended, but a good chunk of the sentence is missing, so this is the best fix I can think of."
}
],
"https://www.parahumans.net/2019/08/13/from-within-16-1/": [
@@ -7071,6 +7286,12 @@
"after": "letters I wrote to Dad’s friends"
}
],
+ "https://www.parahumans.net/2019/09/03/from-within-16-7/": [
+ {
+ "before": "aunt Rachel",
+ "after": "Aunt Rachel"
+ }
+ ],
"https://www.parahumans.net/2019/09/10/from-within-16-9/": [
{
"before": "changers and and shakers",
From 4c1c7cd03df0544f8a89c93141d338925e01407b Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 15:29:00 -0500
Subject: [PATCH 091/186] Remove extra spaces before closing quote marks
---
lib/convert-worker.js | 3 ++-
lib/substitutions.json | 28 ++++++++++++++++++++--------
2 files changed, 22 insertions(+), 9 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index d427070..c4747bc 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -187,6 +187,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/([A-Za-z])‘s(\s?)/g, "$1’s$2");
xml = xml.replace(/I‘m/g, "I’m");
xml = xml.replace(/“\s+/g, "
“");
+ xml = xml.replace(/\s+”/g, "”");
xml = xml.replace(/'/g, "’");
xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’");
xml = xml.replace(/([a-z])”<\/p>/g, "$1.”
");
@@ -197,7 +198,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/([\w ’]+[!.?])”<\/em>/g, "$1”");
xml = xml.replace(/I”(m|ll)/g, "I’$1");
xml = xml.replace(/””<\/p>/g, "”
");
- xml = xml.replace(/^([^“]+?) ?”(?![ —<])/gm, "$1 “");
+ xml = xml.replace(/^([^“]+? ?)”(?![ —<])/gm, "$1 “");
xml = xml.replace(/(?([A-Za-z]+),<\/em>(?!”| +[A-Za-z]+ thought)/, "$1,");
xml = xml.replace(/‘([Kk])ay(?!’)/g, "’$1ay");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index b57c532..d9aed16 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -372,6 +372,10 @@
{
"before": "so to speak, they",
"after": "so to speak; they"
+ },
+ {
+ "before": "Brian looked across the room,” We’ve",
+ "after": "Brian looked across the room. “We’ve"
}
],
"https://parahumans.wordpress.com/2011/11/12/hive-5-3/": [
@@ -464,6 +468,10 @@
{
"before": "East",
"after": "east"
+ },
+ {
+ "before": "“Yeah,” Newter grinned,” You can tell",
+ "after": "“Yeah,” Newter grinned. “You can tell"
}
],
"https://parahumans.wordpress.com/2011/12/10/interlude-5/": [
@@ -2354,15 +2362,15 @@
"after": "to,” Alexandria spoke, “is"
},
{
- "before": "leave?” Hero asked. ”Why",
+ "before": "leave?” Hero asked.”Why",
"after": "leave?” Hero asked. “Why"
},
{
- "before": "Alexandria said. ”Why",
+ "before": "Alexandria said.”Why",
"after": "Alexandria said. “Why"
},
{
- "before": "voice as quiet. ”We’ve got teams",
+ "before": "voice as quiet.”We’ve got teams",
"after": "voice as quiet. “We’ve got teams"
}
],
@@ -2883,7 +2891,7 @@
],
"https://parahumans.wordpress.com/2013/02/16/interlude-18/": [
{
- "before": "her feet. ”We’re",
+ "before": "her feet.”We’re",
"after": "her feet. “We’re"
}
],
@@ -3546,6 +3554,10 @@
{
"before": "me,” I said. “Is",
"after": "me,” I said, “is"
+ },
+ {
+ "before": "“Help’s on the way.”
",
+ "after": "“Help’s on the way.”
"
}
],
"https://parahumans.wordpress.com/2013/07/25/interlude-26-donation-bonus-1/": [
@@ -5755,10 +5767,6 @@
"before": "post-Prison",
"after": "post-prison"
},
- {
- "before": "to take power!’ ”",
- "after": "to take power!’”"
- },
{
"before": "I’m sorry, what? What?",
"after": "I’m sorry, what? What?"
@@ -7626,6 +7634,10 @@
{
"before": "had harangued the P.R.T., even",
"after": "had harangued the PRT, even"
+ },
+ {
+ "before": "“Actually,” Tattletale said. “I’ve been going",
+ "after": "“Actually,” Tattletale said, “I’ve been going"
}
],
"https://www.parahumans.net/2020/04/21/last-20-e4/": [
From 2a0919547122dbd9de92c2c457189fab01b32b61 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 15:34:23 -0500
Subject: [PATCH 092/186] Italicize question marks in single-question-word
sentences
---
lib/convert-worker.js | 3 ++-
lib/substitutions.json | 12 ------------
2 files changed, 2 insertions(+), 13 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index c4747bc..ec9ed38 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -198,9 +198,10 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/([\w ’]+[!.?])”<\/em>/g, "$1”");
xml = xml.replace(/I”(m|ll)/g, "I’$1");
xml = xml.replace(/””<\/p>/g, "”");
- xml = xml.replace(/^([^“]+? ?)”(?![ —<])/gm, "$1 “");
+ xml = xml.replace(/^([^“]+?) ?”(?![ —<])/gm, "$1 “");
xml = xml.replace(/(?([A-Za-z]+),<\/em>(?!”| +[A-Za-z]+ thought)/, "$1,");
xml = xml.replace(/‘([Kk])ay(?!’)/g, "’$1ay");
+ xml = xml.replace(/(Why|What|Who|How|Where|When)<\/em>\?/g, "$1?");
// Similar problems occur in Ward with and as do in Worm with s
xml = xml.replace(//g, "");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index d9aed16..0f603b3 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -1539,10 +1539,6 @@
"before": "Brian, “You’re",
"after": "Brian. “You’re"
},
- {
- "before": "Why? Why come",
- "after": "Why? Why come"
- },
{
"before": "‘Sides",
"after": "’Sides"
@@ -7166,10 +7162,6 @@
"before": "I got a glimpse. it’s fucking with me",
"after": "I got a glimpse. It’s fucking with me"
},
- {
- "before": "What? But Cassie.",
- "after": "What? But Cassie."
- },
{
"before": "‘aunt Rachel‘s’",
"after": "‘Aunt Rachel’s’"
@@ -7200,10 +7192,6 @@
"before": "got the Thrall Commander, right",
"after": "got the thrall commander, right"
},
- {
- "before": "I could ask these questions. Why?",
- "after": "I could ask these questions. Why?"
- },
{
"before": "the Yàngbǎn supported squad",
"after": "the Yàngbǎn-supported squad"
From ee76715935adbdfc1f53b3961047b5f8e772970d Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 15:43:20 -0500
Subject: [PATCH 093/186] Stop over-italicizing commas
---
lib/convert-worker.js | 1 +
lib/substitutions.json | 30 +++++++-----------------------
2 files changed, 8 insertions(+), 23 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index ec9ed38..dd59cfd 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -202,6 +202,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/(?([A-Za-z]+),<\/em>(?!”| +[A-Za-z]+ thought)/, "$1,");
xml = xml.replace(/‘([Kk])ay(?!’)/g, "’$1ay");
xml = xml.replace(/(Why|What|Who|How|Where|When)<\/em>\?/g, "$1?");
+ xml = xml.replace(/,<\/em>/g, ",");
// Similar problems occur in Ward with and as do in Worm with s
xml = xml.replace(//g, "");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 0f603b3..a74c262 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -814,7 +814,7 @@
"after": "east"
},
{
- "before": "listened to,",
+ "before": "listened to,",
"after": "listened to."
},
{
@@ -940,11 +940,11 @@
"after": "Brigandine deceased, CD-5.
"
},
{
- "before": "deceased, CD-6.",
+ "before": "deceased, CD-6.",
"after": "deceased, CD-6."
},
{
- "before": "CD-6, The armbands",
+ "before": "CD-6, The armbands",
"after": "CD-6, the armbands"
},
{
@@ -1014,7 +1014,7 @@
"after": "Aegis deceased, CD-6"
},
{
- "before": "down, CC-6.",
+ "before": "down, CC-6.",
"after": "down, CC-6."
},
{
@@ -1052,17 +1052,13 @@
"after": "deceased, BW-8."
},
{
- "before": "deceased, CB-10.",
+ "before": "deceased, CB-10.",
"after": "deceased, CB-10."
},
{
"before": "Defensive perimeter, report.",
"after": "Defensive perimeter, report."
},
- {
- "before": "Enemy location unknown,",
- "after": "Enemy location unknown,"
- },
{
"before": "Dad.",
"after": "Dad."
@@ -1838,10 +1834,6 @@
{
"before": "“I—” Bitch paused, “Don’t",
"after": "“I—” Bitch paused. “Don’t"
- },
- {
- "before": "Bitch,”",
- "after": "Bitch,”"
}
],
"https://parahumans.wordpress.com/2012/06/17/interlude-11b/": [
@@ -1920,11 +1912,11 @@
"after": "Love me, you?
\nLove me, true?
"
},
{
- "before": "Crazed, kooky, cracked, crazy,
\nNutty, barmy, mad for me…
",
+ "before": "Crazed, kooky, cracked, crazy,
\nNutty, barmy, mad for me…
",
"after": "Crazed, kooky, cracked, crazy,
\nNutty, barmy, mad for me…
"
},
{
- "before": "Crazed, kooky, cracked, crazy,
\nMental, dotty, whacked, loopy…
",
+ "before": "Crazed, kooky, cracked, crazy,
\nMental, dotty, whacked, loopy…
",
"after": "Crazed, kooky, cracked, crazy,
\nMental, dotty, whacked, loopy…
"
},
{
@@ -6001,10 +5993,6 @@
"before": "New Brockton primary school",
"after": "New Brockton Primary School"
},
- {
- "before": "They can be so cool, but",
- "after": "They can be so cool, but"
- },
{
"before": "Rome-Roman’s",
"after": "Rome—Roman’s"
@@ -7134,10 +7122,6 @@
"before": "close to identical powersets",
"after": "close-to-identical powersets"
},
- {
- "before": "distracting you, Lookout, I wrote.",
- "after": "distracting you, Lookout, I wrote."
- },
{
"before": "The second they were out jailer’s door",
"after": "The second they were out of the jailer’s door"
From 79efec7080823b4ba34e09cf615b615738ca73ab Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 15:45:28 -0500
Subject: [PATCH 094/186] Fix end-of-line commas that should be periods.
---
lib/convert-worker.js | 2 ++
lib/substitutions.json | 8 ++++----
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index dd59cfd..f781647 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -203,6 +203,8 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/‘([Kk])ay(?!’)/g, "’$1ay");
xml = xml.replace(/(Why|What|Who|How|Where|When)<\/em>\?/g, "$1?");
xml = xml.replace(/,<\/em>/g, ",");
+ xml = xml.replace(/,”<\/p>/g, ".”");
+ xml = xml.replace(/(.*),<\/p>/g, "
$1.
");
// Similar problems occur in Ward with and as do in Worm with s
xml = xml.replace(//g, "");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index a74c262..3ab3573 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -3967,10 +3967,6 @@
"before": "‘lowed to do that, ‘miright",
"after": "’lowed to do that, ’miright"
},
- {
- "before": "path was clear,",
- "after": "path was clear."
- },
{
"before": "hard, don’t you?",
"after": "hard, don’t you?"
@@ -6229,6 +6225,10 @@
{
"before": "Foil-Hookline",
"after": "Foil–Hookline"
+ },
+ {
+ "before": "“Fired through a hole I made in my fist.",
+ "after": "“Fired through a hole I made in my fist.”"
}
],
"https://www.parahumans.net/2019/02/05/blinding-11-12/": [
From 5310672cc2ff7f1b6d8ec406c6ab301e76ce64c6 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 15:58:08 -0500
Subject: [PATCH 095/186] De-capitalize aunt and uncle where appropriate
---
lib/substitutions.json | 60 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 60 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 3ab3573..c4f2055 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4563,6 +4563,10 @@
{
"before": "Stop. Please.",
"after": "Stop. Please."
+ },
+ {
+ "before": "his Aunt would say",
+ "after": "his aunt would say"
}
],
"https://www.parahumans.net/2018/02/08/shade-4-4/": [
@@ -7125,6 +7129,10 @@
{
"before": "The second they were out jailer’s door",
"after": "The second they were out of the jailer’s door"
+ },
+ {
+ "before": "my Aunt’s",
+ "after": "my aunt’s"
}
],
"https://www.parahumans.net/2019/07/27/dying-15-7/": [
@@ -7420,6 +7428,42 @@
{
"before": "This would be the Church.",
"after": "This would be the church."
+ },
+ {
+ "before": "my Uncle said",
+ "after": "my uncle said"
+ },
+ {
+ "before": "my Uncle asked",
+ "after": "my uncle asked"
+ },
+ {
+ "before": "Only Jester and my Uncle were",
+ "after": "Only Jester and my uncle were"
+ },
+ {
+ "before": "Victoria?” My Uncle asked",
+ "after": "Victoria?” my uncle asked"
+ }
+ ],
+ "https://www.parahumans.net/2019/12/21/interlude-18-z-radiation/": [
+ {
+ "before": "face where her Uncle would see",
+ "after": "face where her uncle would see"
+ },
+ {
+ "before": "Her Uncle put his hand",
+ "after": "Her uncle put his hand"
+ },
+ {
+ "before": "and saw her Aunt and Uncle approaching",
+ "after": "and saw her aunt and uncle approaching"
+ }
+ ],
+ "https://www.parahumans.net/2019/12/24/infrared-19-1/": [
+ {
+ "before": "Victoria,” my Aunt said",
+ "after": "Victoria,” my aunt said"
}
],
"https://www.parahumans.net/2019/12/28/infrared-19-2/": [
@@ -7480,6 +7524,18 @@
{
"before": "pause, drew its attention",
"after": "pause, drew its attention."
+ },
+ {
+ "before": "someone my Aunt and Uncle deemed subversive",
+ "after": "someone my aunt and uncle deemed subversive"
+ },
+ {
+ "before": "Looks after my Aunt while my Uncle",
+ "after": "Looks after my aunt while my uncle"
+ },
+ {
+ "before": "can understand your Aunt and Uncle",
+ "after": "can understand your aunt and uncle"
}
],
"https://www.parahumans.net/2020/01/14/infrared-19-c/": [
@@ -7546,6 +7602,10 @@
{
"before": "so they grow in…”",
"after": "so they grow in…”"
+ },
+ {
+ "before": "my Uncle said",
+ "after": "my uncle said"
}
],
"https://www.parahumans.net/2020/02/11/infrared-19-f/": [
From 1328dfd8e317f7df5721d023f9808dfda3fad4df Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 16:01:00 -0500
Subject: [PATCH 096/186] Standardize on "A.I." instead of "AI"
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index f781647..8e5bb59 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -613,6 +613,9 @@ function standardizeSpellings(xml) {
// 12 instances of "Dragon-craft", 12 instances of "Dragon craft", 1 instance of "dragon craft"
xml = xml.replace(/[Dd]ragon[ -](craft|mech)/g, "Dragon-$1");
+ // 88 instances of "A.I." to four of "AI"
+ xml = xml.replace(/AI(!?\b)/g, "A.I.");
+
return xml;
}
From f3063854ffa9309e9316d3d89521242f9c46a4a6 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 16:04:06 -0500
Subject: [PATCH 097/186] De-capitalize season names
---
lib/substitutions.json | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index c4f2055..3c6072f 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -7230,6 +7230,10 @@
{
"before": "wasn’t A.I. it was just a picture",
"after": "wasn’t A.I. It was just a picture"
+ },
+ {
+ "before": "end of Summer that",
+ "after": "end of summer that"
}
],
"https://www.parahumans.net/2019/08/17/from-within-16-2/": [
@@ -7392,6 +7396,10 @@
{
"before": "“Entrapment.”",
"after": "“Entrapment.”"
+ },
+ {
+ "before": "tents last Winter or Spring",
+ "after": "tents last winter or spring"
}
],
"https://www.parahumans.net/2019/11/23/radiation-18-3/": [
@@ -7444,6 +7452,10 @@
{
"before": "Victoria?” My Uncle asked",
"after": "Victoria?” my uncle asked"
+ },
+ {
+ "before": "end of the Summer",
+ "after": "end of the summer"
}
],
"https://www.parahumans.net/2019/12/21/interlude-18-z-radiation/": [
From d62739f6cded1194fb35eeb764db2e5e1333bf58 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 16:06:14 -0500
Subject: [PATCH 098/186] De-capitalize math
---
lib/substitutions.json | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 3c6072f..96859db 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -7244,6 +7244,10 @@
{
"before": "Hardboil",
"after": "Hard Boil"
+ },
+ {
+ "before": "after class in Math, my",
+ "after": "after class in math, my"
}
],
"https://www.parahumans.net/2019/08/20/from-within-16-3/": [
@@ -7322,6 +7326,12 @@
"after": "egg for Crystal and Mom"
}
],
+ "https://www.parahumans.net/2019/10/08/sundown-17-3/": [
+ {
+ "before": "say, Math class",
+ "after": "say, math class"
+ }
+ ],
"https://www.parahumans.net/2019/10/12/sundown-17-4/": [
{
"before": "on resisting Master influence",
From c128712bb442f1b3726ffd0cf556dc723b398f48 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 16:11:58 -0500
Subject: [PATCH 099/186] Change treatment of Teacher color team names
Previously I was treating them as "conflict or connection", trying to give them an en dash. But I think they're just compound adjectives, so let's revert to a hyphen-minus. Also fixes missing capitalization for one of them.
---
lib/convert-worker.js | 1 -
lib/substitutions.json | 4 ++++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 8e5bb59..ea159d4 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -394,7 +394,6 @@ function enDashJointNames(xml) {
xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/g, "Undersider$1–$2");
xml = xml.replace(/Norwalk-Fairfield/g, "Norwalk–Fairfield");
xml = xml.replace(/East-West/g, "east–west");
- xml = xml.replace(/(Green|Yellow)-Black/g, "$1–Black");
xml = xml.replace(/Creutzfeldt-Jakob/g, "Creutzfeldt–Jakob");
xml = xml.replace(/Astaroth-Nidhug/g, "Astaroth–Nidhug");
xml = xml.replace(/Capulet-Montague/g, "Capulet–Montague");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 96859db..50f02e3 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -7077,6 +7077,10 @@
{
"before": "one remaining Thrall",
"after": "one remaining thrall"
+ },
+ {
+ "before": "Team copper-white",
+ "after": "Team Copper-White"
}
],
"https://www.parahumans.net/2019/07/20/dying-15-5/": [
From 32817eb2551e5fdd26daabc7f56a0eff5b20229a Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 16:13:59 -0500
Subject: [PATCH 100/186] =?UTF-8?q?Fix=20capitalization=20and=20apostrophe?=
=?UTF-8?q?s=20for=20=E2=80=99Cage?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
lib/convert-worker.js | 3 +++
lib/substitutions.json | 4 ----
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index ea159d4..6f0c2ea 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -298,6 +298,9 @@ function fixTruncatedWords(xml) {
// Short for "Anelace"
xml = xml.replace(/[‘’][Ll]ace(?![a-z])/g, "’Lace");
+ // Short for "Birdcage"
+ xml = xml.replace(/[‘’][Cc]age(?![a-z])/g, "’Cage");
+
return xml;
}
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 50f02e3..1174977 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4275,10 +4275,6 @@
"before": "and His flesh began",
"after": "and his flesh began"
},
- {
- "before": "the ‘cage",
- "after": "the ’cage"
- },
{
"before": "background of his mind",
"after": "background of his mind."
From 2412fa0375b766c1984ed2c6bee1635c6cee7c9a Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 16:19:45 -0500
Subject: [PATCH 101/186] Capitalize U-turn
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 6f0c2ea..8fdea9e 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -552,6 +552,9 @@ function fixMispellings(xml) {
// Preemptive(ly) is often hyphenated (not always). It should not be.
xml = xml.replace(/([Pp])re-emptive/g, "$1reemptive");
+ // All dictionaries agree this is capitalized.
+ xml = xml.replace(/u-turn/g, "U-turn");
+
return xml;
}
From 5c80327fef326ea28cf9dab5f3ba1b6c5e3f9110 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 16:07:52 -0500
Subject: [PATCH 102/186] Spot fixes for Ward through From Within 16.z
---
lib/convert-worker.js | 12 ++-
lib/substitutions.json | 214 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 224 insertions(+), 2 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 8fdea9e..4156330 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -352,11 +352,15 @@ function standardizeNames(xml) {
xml = xml.replace(/Amais/g, "Amias");
// Earlier chapters have a space; later ones do not. They're separate words, so side with the earlier chapters.
- xml = xml.replace(/Crock o[‘’]Shit/g, "Crock o’ Shit");
+ // One location is missing the "k".
+ xml = xml.replace(/Crock? o[‘’]Shit/g, "Crock o’ Shit");
// 5 instances of "Jotun" to 2 of "Jotunn"
xml = xml.replace(/Jotunn/g, "Jotun");
+ // 13 instances of Elman to 1 of Elmann
+ xml = xml.replace(/Elmann/g, "Elman");
+
return xml;
}
@@ -464,6 +468,7 @@ function fixCapitalization(xml, book) {
// Capitalization is inconsistent, but shard names seems to usually be capitalized.
xml = xml.replace(/Grasping self/g, "Grasping Self");
xml = xml.replace(/Cloven stranger/g, "Cloven Stranger");
+ xml = xml.replace(/Princess shaper/g, "Princess Shaper");
// "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". "patrol
// group" is always lowercased. It seems like "Patrol" is a proper name, and is used as a capitalized modifier in
@@ -619,7 +624,10 @@ function standardizeSpellings(xml) {
xml = xml.replace(/[Dd]ragon[ -](craft|mech)/g, "Dragon-$1");
// 88 instances of "A.I." to four of "AI"
- xml = xml.replace(/AI(!?\b)/g, "A.I.");
+ xml = xml.replace(/(?<=\b)AI(?=\b)/g, "A.I.");
+
+ // 2 instances of "G.M." to one of "GM"
+ xml = xml.replace(/(?<=\b)GM(?=\b)/g, "G.M.");
return xml;
}
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 1174977..cfad210 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -170,6 +170,10 @@
{
"before": "gangbangers easy access to to the rest of the city",
"after": "gangbangers easy access to the rest of the city"
+ },
+ {
+ "before": "be, tonight, If that’s",
+ "after": "be, tonight, if that’s"
}
],
"https://parahumans.wordpress.com/2011/08/23/agitation-3-5/": [
@@ -5285,6 +5289,10 @@
{
"before": "mask was only a B-",
"after": "mask was only a B−"
+ },
+ {
+ "before": "Nothing about it was even B-.",
+ "after": "Nothing about it was even B−."
}
],
"https://www.parahumans.net/2018/07/21/beacon-8-6/": [
@@ -6731,6 +6739,10 @@
{
"before": "‘why didn’t Imp back us up’?",
"after": "‘why didn’t Imp back us up?’"
+ },
+ {
+ "before": "panic inducing memories",
+ "after": "panic-inducing memories"
}
],
"https://www.parahumans.net/2019/05/11/black-13-11/": [
@@ -7025,6 +7037,10 @@
{
"before": "post its",
"after": "Post-its"
+ },
+ {
+ "before": "travelled",
+ "after": "traveled"
}
],
"https://www.parahumans.net/2019/07/13/dying-15-3/": [
@@ -7077,6 +7093,10 @@
{
"before": "Team copper-white",
"after": "Team Copper-White"
+ },
+ {
+ "before": "Caraytid",
+ "after": "Caryatid"
}
],
"https://www.parahumans.net/2019/07/20/dying-15-5/": [
@@ -7234,6 +7254,10 @@
{
"before": "end of Summer that",
"after": "end of summer that"
+ },
+ {
+ "before": "or get back to where you",
+ "after": "or to get back to where you"
}
],
"https://www.parahumans.net/2019/08/17/from-within-16-2/": [
@@ -7248,6 +7272,18 @@
{
"before": "after class in Math, my",
"after": "after class in math, my"
+ },
+ {
+ "before": "My point is, If you",
+ "after": "My point is, if you"
+ },
+ {
+ "before": "loyalty inducing Goddesses",
+ "after": "loyalty-inducing Goddesses"
+ },
+ {
+ "before": "Did you get an impression about her character, or anything weird.",
+ "after": "Did you get an impression about her character, or anything weird?"
}
],
"https://www.parahumans.net/2019/08/20/from-within-16-3/": [
@@ -7258,12 +7294,24 @@
{
"before": "for the recording.”",
"after": "for the recording."
+ },
+ {
+ "before": "—But one of us could still",
+ "after": "—but one of us could still"
}
],
"https://www.parahumans.net/2019/08/24/from-within-16-4/": [
{
"before": "the rest black and slim. with doodle-like drawings",
"after": "the rest black and slim, with doodle-like drawings"
+ },
+ {
+ "before": "trophy wrack",
+ "after": "trophy rack"
+ },
+ {
+ "before": "hopped onto his back for the extra eight",
+ "after": "hopped onto his back for the extra height"
}
],
"https://www.parahumans.net/2019/08/27/from-within-16-5/": [
@@ -7274,12 +7322,24 @@
{
"before": "French toast",
"after": "french toast"
+ },
+ {
+ "before": "put into Neutral out of",
+ "after": "put into neutral out of"
+ },
+ {
+ "before": "after Mockument’s had hatched that",
+ "after": "after Mockument had hatched that"
}
],
"https://www.parahumans.net/2019/08/31/from-within-16-6/": [
{
"before": "letters I wrote to dad’s friends",
"after": "letters I wrote to Dad’s friends"
+ },
+ {
+ "before": "—But that might be intentional",
+ "after": "—but that might be intentional"
}
],
"https://www.parahumans.net/2019/09/03/from-within-16-7/": [
@@ -7288,16 +7348,136 @@
"after": "Aunt Rachel"
}
],
+ "https://www.parahumans.net/2019/09/07/from-within-16-8/": [
+ {
+ "before": "Giant lightning man guard dog",
+ "after": "Giant lightning-man guard dog"
+ },
+ {
+ "before": "35: 10",
+ "after": "35:10"
+ },
+ {
+ "before": "pluckign",
+ "after": "plucking"
+ },
+ {
+ "before": "special event space",
+ "after": "Special event space"
+ }
+ ],
"https://www.parahumans.net/2019/09/10/from-within-16-9/": [
{
"before": "changers and and shakers",
"after": "changers and shakers"
+ },
+ {
+ "before": "the Twins’ portion of the room",
+ "after": "the twins’ portion of the room"
+ },
+ {
+ "before": "one central area—No weapon—and swung backhanded",
+ "after": "one central area—no weapon—and swung backhanded"
+ },
+ {
+ "before": "shifted course to favor the smallest portion of the gap leaped the ditch",
+ "after": "shifted course to favor the smallest portion of the gap and leaped the ditch"
+ },
+ {
+ "before": "—It’s not three-dimensional",
+ "after": "—it’s not three-dimensional"
+ },
+ {
+ "before": "a work bench",
+ "after": "a workbench"
+ },
+ {
+ "before": "—Don’t shoot it!",
+ "after": "—don’t shoot it!"
}
],
"https://www.parahumans.net/2019/09/15/from-within-16-10/": [
{
"before": "carried on. out of",
"after": "carried on, out of"
+ },
+ {
+ "before": "That-. Okay.",
+ "after": "That— Okay."
+ },
+ {
+ "before": "combat trained",
+ "after": "combat-trained"
+ }
+ ],
+ "https://www.parahumans.net/2019/09/17/from-within-16-11/": [
+ {
+ "before": "Warrior monk",
+ "after": "Warrior Monk"
+ },
+ {
+ "before": "Empire Eighty-eight",
+ "after": "Empire Eighty-Eight"
+ }
+ ],
+ "https://www.parahumans.net/2019/09/21/from-within-16-12/": [
+ {
+ "before": "harder than normal exhalation",
+ "after": "harder-than-normal exhalation"
+ },
+ {
+ "before": "maybe a bit to the right. “There’s probably a",
+ "after": "maybe a bit to the right, “there’s probably a"
+ },
+ {
+ "before": "—We need to do this fast",
+ "after": "—we need to do this fast"
+ }
+ ],
+ "https://www.parahumans.net/2019/09/24/from-within-16-y/": [
+ {
+ "before": "…She was my first kiss",
+ "after": "…she was my first kiss"
+ },
+ {
+ "before": "another on the railing. “I’m not",
+ "after": "another on the railing, “I’m not"
+ },
+ {
+ "before": "that out loud, “—But you’re a therapist",
+ "after": "that out loud, “—but you’re a therapist"
+ },
+ {
+ "before": "monster slaying",
+ "after": "monster-slaying"
+ },
+ {
+ "before": "so important they’d focus on you’?”",
+ "after": "so important they’d focus on you?’”"
+ },
+ {
+ "before": "Than in B—In B, the woman",
+ "after": "Than in B—in B, the woman"
+ },
+ {
+ "before": "insistent, annoyed. “I could",
+ "after": "insistent, annoyed, “I could"
+ },
+ {
+ "before": "Listen!",
+ "after": "Listen!"
+ },
+ {
+ "before": "“—But when I shook your hand",
+ "after": "“—but when I shook your hand"
+ },
+ {
+ "before": "That,” Chris said. “Is what",
+ "after": "That,” Chris said, “is what"
+ },
+ {
+ "before": "transcending earth and humanity",
+ "after": "transcending Earth and humanity"
}
],
"https://www.parahumans.net/2019/09/28/from-within-16-z/": [
@@ -7308,6 +7488,34 @@
{
"before": "in the way,”she said",
"after": "in the way,” she said"
+ },
+ {
+ "before": "his voice just for her. “I’ll be",
+ "after": "his voice just for her, “I’ll be"
+ },
+ {
+ "before": "through the threshold to the.",
+ "after": "through the threshold."
+ },
+ {
+ "regExp": "capitol",
+ "replacement": "capital"
+ },
+ {
+ "before": "—But none of that would",
+ "after": "—but none of that would"
+ },
+ {
+ "before": "the Doctor replied",
+ "after": "the doctor replied"
+ },
+ {
+ "before": "…It’d be exactly the same",
+ "after": "…it’d be exactly the same"
+ },
+ {
+ "before": "This—This had been just for her.",
+ "after": "This—this had been just for her."
}
],
"https://www.parahumans.net/2019/10/01/sundown-17-1/": [
@@ -7694,6 +7902,12 @@
"after": "“Actually,” Tattletale said, “I’ve been going"
}
],
+ "https://www.parahumans.net/2020/04/18/last-20-e3/": [
+ {
+ "before": "madness inducing years of isolation",
+ "after": "madness-inducing years of isolation"
+ }
+ ],
"https://www.parahumans.net/2020/04/21/last-20-e4/": [
{
"before": "grown around the the maille sheath",
From 2e4a3e56dc54f91cddc1882039d8e34c67e64ca6 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 16:50:30 -0500
Subject: [PATCH 103/186] Hyphenate compound words ending in "haired"
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 4156330..3f34d1c 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -578,6 +578,9 @@ function fixHyphens(xml) {
xml = xml.replace(/(?
Date: Sun, 29 Nov 2020 16:55:34 -0500
Subject: [PATCH 104/186] Fix opening quotes inside quotes to apostrophes
instead
---
lib/convert-worker.js | 1 +
lib/substitutions.json | 10 ----------
2 files changed, 1 insertion(+), 10 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 3f34d1c..bb0674b 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -205,6 +205,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/,<\/em>/g, ",");
xml = xml.replace(/,”<\/p>/g, ".”");
xml = xml.replace(/(.*),<\/p>/g, "
$1.
");
+ xml = xml.replace(/‘(\w+)‘(\w+)’/g, "‘$1’$2’");
// Similar problems occur in Ward with and as do in Worm with s
xml = xml.replace(//g, "");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index cfad210..0311867 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4985,12 +4985,6 @@
"after": "discuss goals,” Jessica said, “we think"
}
],
- "https://www.parahumans.net/2018/05/29/eclipse-x-4/": [
- {
- "before": "ma‘am",
- "after": "ma’am"
- }
- ],
"https://www.parahumans.net/2018/05/30/eclipse-x-5/": [
{
"before": "no words. and there",
@@ -6864,10 +6858,6 @@
"before": "inter-earth",
"after": "inter-Earth"
},
- {
- "before": "‘can‘t’",
- "after": "‘can’t’"
- },
{
"before": "The Western of the two half-castles",
"after": "The western of the two half-castles"
From 0c22f7df11de4dfe4ba658feb329771f2813e600 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 17:00:22 -0500
Subject: [PATCH 105/186] Hyphenate N-dimensional
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index bb0674b..9db4fb8 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -592,6 +592,7 @@ function fixHyphens(xml) {
xml = xml.replace(/well armed/g, "well-armed");
xml = xml.replace(/able bodied/g, "able-bodied");
xml = xml.replace(/self conscious/g, "self-conscious");
+ xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
// This is usually correct but sometimes wrong.
xml = xml.replace(/neo /g, "neo-");
From 54f2f82650f121cc603fda95576439d0f6a99b37 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 17:08:31 -0500
Subject: [PATCH 106/186] De-capitalize "corona pollentia" and friends
---
lib/convert-worker.js | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 9db4fb8..40ab0ab 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -493,6 +493,13 @@ function fixCapitalization(xml, book) {
// a compound noun or at the beginning of a sentence.
xml = xml.replace(/(?
Date: Sun, 29 Nov 2020 17:10:54 -0500
Subject: [PATCH 107/186] Always hyphenate "one-on-one"
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 40ab0ab..6f52ab1 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -600,6 +600,7 @@ function fixHyphens(xml) {
xml = xml.replace(/able bodied/g, "able-bodied");
xml = xml.replace(/self conscious/g, "self-conscious");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
+ xml = xml.replace(/(?<=\b)one on one(?=\b)/g, "one-on-one");
// This is usually correct but sometimes wrong.
xml = xml.replace(/neo /g, "neo-");
From ecea0f56604f724300ca20779ea5fa07d4d20c2d Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 17:17:54 -0500
Subject: [PATCH 108/186] Hyphenate day-to-day when appropriate
---
lib/substitutions.json | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 0311867..d51c5a2 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -390,6 +390,10 @@
{
"before": "Terrifying.” Alan told my dad, “When",
"after": "Terrifying,” Alan told my dad. “When"
+ },
+ {
+ "before": "day to day basis",
+ "after": "day-to-day basis"
}
],
"https://parahumans.wordpress.com/2011/11/15/hive-5-4/": [
@@ -476,6 +480,10 @@
{
"before": "“Yeah,” Newter grinned,” You can tell",
"after": "“Yeah,” Newter grinned. “You can tell"
+ },
+ {
+ "before": "day to day interactions",
+ "after": "day-to-day interactions"
}
],
"https://parahumans.wordpress.com/2011/12/10/interlude-5/": [
@@ -4210,6 +4218,10 @@
{
"before": "stories,” Riley said. “The",
"after": "stories,” Riley said, “the"
+ },
+ {
+ "before": "day to day, minute-to-minute existence",
+ "after": "day-to-day, minute-to-minute existence"
}
],
"https://parahumans.wordpress.com/2013/11/05/teneral-e-2/": [
@@ -4399,6 +4411,10 @@
{
"before": "Glitzglam *New Message*: I",
"after": "Glitzglam *New Message*: I"
+ },
+ {
+ "before": "day to day life",
+ "after": "day-to-day life"
}
],
"https://www.parahumans.net/2017/12/09/flare-2-1/": [
@@ -4959,6 +4975,10 @@
{
"before": "no Master pets",
"after": "no master pets"
+ },
+ {
+ "before": "day to day basis",
+ "after": "day-to-day basis"
}
],
"https://www.parahumans.net/2018/05/26/eclipse-x-1/": [
@@ -5227,6 +5247,10 @@
{
"before": "a group of minors,” Dragon said. “Allow others",
"after": "a group of minors,” Dragon said, “allow others"
+ },
+ {
+ "before": "day to day basis",
+ "after": "day-to-day basis"
}
],
"https://www.parahumans.net/2018/07/07/beacon-8-2/": [
@@ -6031,6 +6055,10 @@
{
"before": "Chiet",
"after": "Cheit"
+ },
+ {
+ "before": "day to day way",
+ "after": "day-to-day way"
}
],
"https://www.parahumans.net/2019/01/01/blinding-11-4/": [
@@ -6975,6 +7003,10 @@
{
"before": "kanji for ‘Shi’",
"after": "kanji for ‘shi’"
+ },
+ {
+ "before": "re-learning day to day life",
+ "after": "relearning day-to-day life"
}
],
"https://www.parahumans.net/2019/07/06/dying-15-1/": [
@@ -7468,6 +7500,14 @@
{
"before": "transcending earth and humanity",
"after": "transcending Earth and humanity"
+ },
+ {
+ "before": "day to day stuff",
+ "after": "day-to-day stuff"
+ },
+ {
+ "before": "day to day work",
+ "after": "day-to-day work"
}
],
"https://www.parahumans.net/2019/09/28/from-within-16-z/": [
From 6e591815242e27b38a31e88ba3414d5ebdbf8ae4 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 17:22:36 -0500
Subject: [PATCH 109/186] Standardize on "Juliette" instead of "Juliet"
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 6f52ab1..8356f39 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -352,6 +352,9 @@ function standardizeNames(xml) {
// 25 instances of "Amias" to 3 of "Amais"
xml = xml.replace(/Amais/g, "Amias");
+ // 185 instances of Juliette to 4 of Juliet
+ xml = xml.replace(/Juliet(?=\b)/g, "Juliette");
+
// Earlier chapters have a space; later ones do not. They're separate words, so side with the earlier chapters.
// One location is missing the "k".
xml = xml.replace(/Crock? o[‘’]Shit/g, "Crock o’ Shit");
From fc641af4f58782dc5df0e84222495de477ab4e96 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sun, 29 Nov 2020 17:26:34 -0500
Subject: [PATCH 110/186] 4.7.0
---
npm-shrinkwrap.json | 2 +-
package.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json
index 2821b4e..b997681 100644
--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
@@ -1,6 +1,6 @@
{
"name": "worm-scraper",
- "version": "4.6.1",
+ "version": "4.7.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
diff --git a/package.json b/package.json
index e240275..80f362f 100644
--- a/package.json
+++ b/package.json
@@ -8,7 +8,7 @@
"parahuman",
"scraper"
],
- "version": "4.6.1",
+ "version": "4.7.0",
"author": "Domenic Denicola (https://domenic.me/)",
"license": "WTFPL",
"repository": "domenic/worm-scraper",
From a86e21b846177185bebfbf4011e1f9e29c998c69 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 13:01:05 -0500
Subject: [PATCH 111/186] Re-capitalize "Stranger Titan"
---
lib/convert-worker.js | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 8356f39..ff96285 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -503,15 +503,18 @@ function fixCapitalization(xml, book) {
xml = xml.replace(/Radiata/g, "radiata");
xml = xml.replace(/Gemma/g, "gemma");
- // Especially early in the story, PRT designations are capitalized; they should not be. This fixes the cases where we
+ // Especially early in Worm, PRT designations are capitalized; they should not be. This fixes the cases where we
// can be reasonably sure they don't start a sentence, although more specific instances are done in
// substitutions.json, and some need to be back-corrected.
//
- // Note: "Master" is specifically omitted because it fails poorly on Interlude 4. Other instances need to be
+ // Note: "Master" is specifically omitted because it fails poorly on Worm Interlude 4. Other instances need to be
// corrected via substitutions.json.
+ //
+ // This also over-de-capitalizes "The Stranger" in Ward (a titan name). Those also get fixed in substitutions.json.
xml = xml.replace(
- /([a-zA-Z,] |\/)(Mover|Shaker|Brute|Breaker|Tinker|Blaster|Thinker|Striker|Changer|Trump|Stranger|Shifter|Shaper)/g,
- (_, prefix, designation) => prefix + designation.toLowerCase()
+ // eslint-disable-next-line max-len
+ /(?|“|\n|: )(Mover|Shaker|Brute|Breaker|Tinker|Blaster|Thinker|Striker|Changer|Trump|Stranger|Shifter|Shaper)(?! [A-Z])/g,
+ (_, designation) => designation.toLowerCase()
);
xml = xml.replace(
/(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)-(\d+)/gi,
From cc151355fde1a430cbb8a091db9676b17e44039a Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 13:07:50 -0500
Subject: [PATCH 112/186] Re-capitalize "the Stranger"
---
lib/substitutions.json | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index d51c5a2..0c470f6 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2987,7 +2987,7 @@
{
"before": "Unfortunate tinker",
"after": "Unfortunate tinker",
- "_comment": "See convert.js; this corrects an over-correction"
+ "_comment": "See convert-worker.js; this corrects an over-correction"
}
],
"https://parahumans.wordpress.com/2013/03/19/interlude-19/": [
@@ -3068,7 +3068,7 @@
{
"before": "the Clairvoyant",
"after": "the clairvoyant",
- "_comment": "see convert.js; this corrects an over-correction"
+ "_comment": "See convert-worker.js; this corrects an over-correction"
},
{
"before": "maintain eye contact. he could feel the warmth",
@@ -7724,6 +7724,11 @@
{
"before": "Victoria,” my Aunt said",
"after": "Victoria,” my aunt said"
+ },
+ {
+ "regExp": "([Tt]he) stranger",
+ "replacement": "$1 Stranger",
+ "_comment": "See convert-worker.js; this corrects an over-correction"
}
],
"https://www.parahumans.net/2019/12/28/infrared-19-2/": [
@@ -7738,6 +7743,11 @@
{
"before": "scream—no sound when viewing crystal-pictures, of course-, and",
"after": "scream—no sound when viewing crystal-pictures, of course—and"
+ },
+ {
+ "regExp": "([Tt]he) stranger",
+ "replacement": "$1 Stranger",
+ "_comment": "See convert-worker.js; this corrects an over-correction"
}
],
"https://www.parahumans.net/2020/01/01/interlude-19-a/": [
From 730cc512e393c91e61ca64996a7888620c616876 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 13:17:14 -0500
Subject: [PATCH 113/186] Capitalize "Titans"
---
lib/convert-worker.js | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index ff96285..ea79761 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -561,6 +561,15 @@ function fixCapitalization(xml, book) {
xml = xml.replace(/english(?! muffin)/g, "English");
xml = xml.replace(/(?
Date: Sat, 19 Dec 2020 13:40:07 -0500
Subject: [PATCH 114/186] Consistently capitalize "Titan"
---
lib/convert-worker.js | 19 ++++++++-----
lib/substitutions.json | 60 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 72 insertions(+), 7 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index ea79761..5873150 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -465,10 +465,6 @@ function fixCapitalization(xml, book) {
// The Speedrunners team name is missing its capitalization a couple times.
xml = xml.replace(/speedrunners/g, "Speedrunners");
- // Dauntless is a cape name. The majority of the time "Dauntless Titan" is fully capitalized, but either word
- // sometimes is missing its capitalization.
- xml = xml.replace(/dauntless titan/ig, "Dauntless Titan");
-
// Capitalization is inconsistent, but shard names seems to usually be capitalized.
xml = xml.replace(/Grasping self/g, "Grasping Self");
xml = xml.replace(/Cloven stranger/g, "Cloven Stranger");
@@ -563,11 +559,20 @@ function fixCapitalization(xml, book) {
// I was very torn on what to do with capitalization for "Titan" and "Titans". In general you don't capitalize species
// names or other classifications, e.g. style guides are quite clear you don't capitalize "gods". The author
- // capitalizes them more often than not (e.g., 179 raw "Titans" to 49 "titans"), but is quite inconsistent. In the
- // end, I decided for capitalization, based on the precedent set by "Endbringers" (which are conceptually paired with
- // Titans several times in the text).
+ // capitalizes them more often than not (e.g., 179 raw "Titans" to 49 "titans"), but is quite inconsistent.
+ //
+ // In the end, I decided against de-capitalization, based on the precedent set by "Endbringers" (which are
+ // conceptually paired with Titans several times in the text). However, we only capitalize the class after they are
+ // _introduced_ as a class in Sundown 17.y. (Before then we still capitalize individual names like "Dauntless Titan"
+ // or "Kronos Titan".)
if (book === "ward") {
+ // All plural discussions of "Titans" are after Sundown 17.y.
xml = xml.replace(/titans/g, "Titans");
+
+ // Since we can't safely change all instances of "titan", most are in substitutions.json. We can do a few here,
+ // though.
+ xml = xml.replace(/dauntless titan/ig, "Dauntless Titan"); // Sometimes "Dauntless" isn't even capitalized.
+ xml = xml.replace(/Kronos titan/g, "Kronos Titan");
}
return xml;
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 0c470f6..8249bd6 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -7622,18 +7622,30 @@
{
"before": "Because it made mom come",
"after": "Because it made Mom come"
+ },
+ {
+ "before": "new titan",
+ "after": "new Titan"
}
],
"https://www.parahumans.net/2019/11/12/interlude-17-z-sundown/": [
{
"before": "had his powers. the forcefield above",
"after": "had his powers. The forcefield above"
+ },
+ {
+ "regExp": "titan(?![a-z])",
+ "replacement": "Titan"
}
],
"https://www.parahumans.net/2019/11/16/radiation-18-1/": [
{
"before": "worked for the P.R.T.",
"after": "worked for the PRT."
+ },
+ {
+ "regExp": "titan(?![a-z])",
+ "replacement": "Titan"
}
],
"https://www.parahumans.net/2019/11/19/radiation-18-2/": [
@@ -7648,12 +7660,20 @@
{
"before": "tents last Winter or Spring",
"after": "tents last winter or spring"
+ },
+ {
+ "regExp": "titan(?![a-z])",
+ "replacement": "Titan"
}
],
"https://www.parahumans.net/2019/11/23/radiation-18-3/": [
{
"before": "in the eyes. they were a",
"after": "in the eyes. They were a"
+ },
+ {
+ "before": "first look at a titan",
+ "after": "first look at a Titan"
}
],
"https://www.parahumans.net/2019/11/26/radiation-18-4/": [
@@ -7668,12 +7688,20 @@
{
"before": "some business headquarters I was in",
"after": "some business headquarters I was in."
+ },
+ {
+ "before": "hunched-over titan",
+ "after": "hunched-over Titan"
}
],
"https://www.parahumans.net/2019/12/03/radiation-18-6/": [
{
"before": "Some Master minions",
"after": "Some master minions"
+ },
+ {
+ "before": "bomb to a titan",
+ "after": "bomb to a Titan"
}
],
"https://www.parahumans.net/2019/12/07/radiation-18-7/": [
@@ -7706,6 +7734,18 @@
"after": "end of the summer"
}
],
+ "https://www.parahumans.net/2019/12/14/radiation-18-9/": [
+ {
+ "before": "the next titan",
+ "after": "the next Titan"
+ }
+ ],
+ "https://www.parahumans.net/2019/12/17/radiation-18-10/": [
+ {
+ "before": "one titan dogging me",
+ "after": "one Titan dogging me"
+ }
+ ],
"https://www.parahumans.net/2019/12/21/interlude-18-z-radiation/": [
{
"before": "face where her Uncle would see",
@@ -7748,6 +7788,10 @@
"regExp": "([Tt]he) stranger",
"replacement": "$1 Stranger",
"_comment": "See convert-worker.js; this corrects an over-correction"
+ },
+ {
+ "before": "because the titan was",
+ "after": "because the Titan was"
}
],
"https://www.parahumans.net/2020/01/01/interlude-19-a/": [
@@ -7764,6 +7808,14 @@
{
"before": "I thought of dad, seeing him hug my mother",
"after": "I thought of Dad, seeing him hug my mother"
+ },
+ {
+ "before": "One titan mobilizing",
+ "after": "One Titan mobilizing"
+ },
+ {
+ "before": "the horrifying titan",
+ "after": "the horrifying Titan"
}
],
"https://www.parahumans.net/2020/01/11/infrared-19-b/": [
@@ -7830,6 +7882,10 @@
{
"before": "-through the emotion. Emote through the emotion. Emote—",
"after": "—through the emotion. Emote through the emotion. Emote—"
+ },
+ {
+ "before": "titan Ophion’s",
+ "after": "Titan Ophion’s"
}
],
"https://www.parahumans.net/2020/01/25/infrared-19-d/": [
@@ -7912,6 +7968,10 @@
{
"before": "5’1 “",
"after": "5′1″"
+ },
+ {
+ "before": "pink and black titan",
+ "after": "pink and black Titan"
}
],
"https://www.parahumans.net/2020/02/25/last-20-1/": [
From 3aece3e05e486654da2960022f0b7c008d2f9351 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 14:40:21 -0500
Subject: [PATCH 115/186] Fix misspellings of "Tattletale"
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 5873150..4c8e5c9 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -365,6 +365,9 @@ function standardizeNames(xml) {
// 13 instances of Elman to 1 of Elmann
xml = xml.replace(/Elmann/g, "Elman");
+ // Thousands of instances of Tattletale to 4 instances of Tatteltale
+ xml = xml.replace(/Tatteltale/g, "Tattletale");
+
return xml;
}
From 3e06358fa24003ccad81220bf8d7fde27554efe5 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 13:42:36 -0500
Subject: [PATCH 116/186] Spot fixes for Ward through Sundown 17.z
---
lib/convert-worker.js | 18 ++-
lib/substitutions.json | 276 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 283 insertions(+), 11 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 4c8e5c9..1cd668f 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -302,6 +302,9 @@ function fixTruncatedWords(xml) {
// Short for "Birdcage"
xml = xml.replace(/[‘’][Cc]age(?![a-z])/g, "’Cage");
+ // We can't do "’Clear" (short for Crystalclear) here because it appears too much as a normal word preceded by an
+ // open quote, so we do that in substitutions.json.
+
return xml;
}
@@ -396,6 +399,7 @@ function enDashJointNames(xml) {
// Joint names should use en dashes
xml = xml.replace(/Dallon-Pelham/g, "Dallon–Pelham");
xml = xml.replace(/Bet-Gimel/g, "Bet–Gimel");
+ xml = xml.replace(/Cheit-Gimel/g, "Bet–Gimel");
xml = xml.replace(/Tristan-Capricorn/g, "Tristan–Capricorn");
xml = xml.replace(/Capricorn-Byron/g, "Capricorn–Byron");
xml = xml.replace(/Tristan-Byron/g, "Tristan–Byron");
@@ -417,6 +421,8 @@ function enDashJointNames(xml) {
xml = xml.replace(/Crawler-Breed/g, "Crawler–Breed");
xml = xml.replace(/Simurgh-Myrddin-plant/g, "Simurgh–Myrddin–plant");
xml = xml.replace(/Armsmaster-Defiant/g, "Armsmaster–Defiant");
+ xml = xml.replace(/([Aa])gent-parahuman/g, "$1gent–parahuman");
+ xml = xml.replace(/([Pp])arahuman-agent/g, "$1arahuman–agent");
return xml;
}
@@ -425,7 +431,7 @@ function fixPossessives(xml) {
// Fix possessive of names ending in "s".
xml = xml.replace(
// eslint-disable-next-line max-len
- /(?".
@@ -525,6 +526,11 @@ function fixCapitalization(xml, book) {
"$1–$2"
);
+ // Capitalization is inconsistent, but shard names seems to usually be capitalized.
+ xml = xml.replace(/Grasping self/g, "Grasping Self");
+ xml = xml.replace(/Cloven stranger/g, "Cloven Stranger");
+ xml = xml.replace(/Princess shaper/g, "Princess Shaper");
+
// Place names need to always be capitalized
xml = xml.replace(/North end/g, "North End");
xml = xml.replace(/(Stonemast|Shale) avenue/g, "$1 Avenue");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 8249bd6..a9ee5de 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4162,6 +4162,10 @@
{
"before": "on the other earth",
"after": "on the other Earth"
+ },
+ {
+ "before": "name? did it start",
+ "after": "name? Did it start"
}
],
"https://parahumans.wordpress.com/2013/10/29/30-7/": [
@@ -6227,6 +6231,10 @@
{
"before": "apparently I",
"after": "apparently I"
+ },
+ {
+ "before": "Hey Little V",
+ "after": "Hey little V"
}
],
"https://www.parahumans.net/2019/01/29/blinding-11-10/": [
@@ -6272,7 +6280,7 @@
},
{
"before": "Go you, Little V.",
- "after": "Go you, Little V."
+ "after": "Go you, little V."
},
{
"before": "the King on the chess board",
@@ -6657,10 +6665,6 @@
{
"before": "For some of the heartbroken",
"after": "For some of the Heartbroken"
- },
- {
- "before": "agent-parahuman",
- "after": "agent–parahuman"
}
],
"https://www.parahumans.net/2019/04/20/black-13-6/": [
@@ -7556,36 +7560,172 @@
{
"before": "Some—many- were unwillingly",
"after": "Some—many—were unwillingly"
+ },
+ {
+ "before": "looking down at my hand at her arm and my hand",
+ "after": "looking down at her arm and my hand"
+ },
+ {
+ "before": "more more thing hanging over our heads",
+ "after": "one more thing hanging over our heads"
+ },
+ {
+ "before": "my mothers cheeks",
+ "after": "my mother’s cheeks"
+ },
+ {
+ "before": "—Understand, please",
+ "after": "—understand, please"
+ },
+ {
+ "before": "“Victoria,” Crystal’s voice was gentle",
+ "after": "“Victoria.” Crystal’s voice was gentle"
}
],
"https://www.parahumans.net/2019/10/05/sundown-17-2/": [
{
"before": "egg for Crystal and mom",
"after": "egg for Crystal and Mom"
+ },
+ {
+ "before": "Me:
",
+ "after": "
Me:
"
+ },
+ {
+ "before": "
Vista (Little V):
",
+ "after": "
Vista (Little V):
"
+ },
+ {
+ "before": "few minutes of peace, damn it.—",
+ "after": "few minutes of peace, damn it—"
+ },
+ {
+ "regExp": "‘Clear",
+ "replacement": "’Clear"
}
],
"https://www.parahumans.net/2019/10/08/sundown-17-3/": [
{
"before": "say, Math class",
"after": "say, math class"
+ },
+ {
+ "before": "hang outs",
+ "after": "hangouts"
+ },
+ {
+ "before": "‘",
+ "after": "’"
+ },
+ {
+ "before": "“—Which I’m not.”",
+ "after": "“—which I’m not.”"
+ },
+ {
+ "before": "Vista for the kid’s sake",
+ "after": "Vista for the kids’ sake"
+ },
+ {
+ "before": "Day to day activities",
+ "after": "Day-to-day activities"
+ },
+ {
+ "before": "tower crescent avenue",
+ "after": "Tower Crescent Avenue"
+ },
+ {
+ "before": "all had to deal with ‘heavy’.",
+ "after": "all had to deal with ‘heavy’.”"
+ },
+ {
+ "before": "stuff?” I spoke up, “it get into that control",
+ "after": "stuff?” I spoke up. “It gets into that control"
+ },
+ {
+ "before": "talked to me a like",
+ "after": "talked to me like"
+ },
+ {
+ "before": "call me Big V",
+ "after": "call me big V"
}
],
"https://www.parahumans.net/2019/10/12/sundown-17-4/": [
{
"before": "on resisting Master influence",
"after": "on resisting master influence"
+ },
+ {
+ "before": "CDs and Vinyls",
+ "after": "CDs and vinyls"
+ },
+ {
+ "before": "“P.R.T.”",
+ "after": "“PRT.”"
+ },
+ {
+ "before": "second or third string Wardens",
+ "after": "second- or third-string Wardens"
+ },
+ {
+ "before": "intern level clerks",
+ "after": "intern-level clerks"
+ },
+ {
+ "before": "half way",
+ "after": "halfway"
+ },
+ {
+ "before": "her tone didn’t soft",
+ "after": "her tone didn’t soften"
+ },
+ {
+ "before": "Jessica, Mr…",
+ "after": "Jessica, Mr.…"
+ },
+ {
+ "before": "—Captive",
+ "after": "—captive"
+ },
+ {
+ "before": "come from? what language",
+ "after": "come from? What language"
}
],
"https://www.parahumans.net/2019/10/15/sundown-17-5/": [
{
"before": "Is mom walking without difficulty",
"after": "Is Mom walking without difficulty"
+ },
+ {
+ "before": "Through valkyrie",
+ "after": "Through Valkyrie"
+ },
+ {
+ "before": "“‘Lo.”",
+ "after": "“’Lo.”"
}
],
"https://www.parahumans.net/2019/10/19/sundown-17-6/": [
{
"before": "Probably dad",
"after": "Probably Dad"
+ },
+ {
+ "before": "and no and no rejiggered rat",
+ "after": "and no rejiggered rat"
+ },
+ {
+ "before": "Harder headed",
+ "after": "Harder-headed"
+ },
+ {
+ "before": "you proved my sister right? Tired,",
+ "after": "you proved my sister right? Tired,"
+ },
+ {
+ "before": "boundary,” He whispered",
+ "after": "boundary,” he whispered"
}
],
"https://www.parahumans.net/2019/10/22/sundown-17-7/": [
@@ -7596,6 +7736,10 @@
{
"before": "Golem explained
",
"after": "Golem explained."
+ },
+ {
+ "before": "see the Lab",
+ "after": "see the lab"
}
],
"https://www.parahumans.net/2019/10/26/sundown-17-8/": [
@@ -7606,6 +7750,40 @@
{
"before": "She’s aggressive If anyone makes",
"after": "She’s aggressive. If anyone makes"
+ },
+ {
+ "before": "—They mobbed Lookout",
+ "after": "—they mobbed Lookout"
+ },
+ {
+ "before": "Of Course Kenzie",
+ "after": "Of course Kenzie"
+ },
+ {
+ "before": "Rain about Girls",
+ "after": "Rain about girls"
+ },
+ {
+ "before": "I typed: The one",
+ "after": "I typed: The one"
+ },
+ {
+ "before": "realize,” Seir called out. “If a fight",
+ "after": "realize,” Seir called out, “if a fight"
+ },
+ {
+ "before": "was emotion resistant",
+ "after": "was emotion-resistant"
+ }
+ ],
+ "https://www.parahumans.net/2019/10/29/sundown-17-9/": [
+ {
+ "before": "—Are you going to be wishing",
+ "after": "—are you going to be wishing"
+ },
+ {
+ "before": "Asking question?",
+ "after": "Asking a question?"
}
],
"https://www.parahumans.net/2019/11/02/sundown-17-10/": [
@@ -7616,6 +7794,40 @@
{
"before": "that didn’t betray too much",
"after": "that didn’t betray too much."
+ },
+ {
+ "before": "say Molasses",
+ "after": "say molasses"
+ },
+ {
+ "before": "‘nother",
+ "after": "’nother"
+ },
+ {
+ "before": "‘she peed on me!’",
+ "after": "‘she peed on me!’."
+ },
+ {
+ "before": "madam Mayor",
+ "after": "Madam Mayor"
+ }
+ ],
+ "https://www.parahumans.net/2019/11/05/interlude-17-x-sundown/": [
+ {
+ "before": "five-o’clock shadow",
+ "after": "five o’clock shadow"
+ },
+ {
+ "before": "nervous looking guy",
+ "after": "nervous-looking guy"
+ },
+ {
+ "before": "you have drugs? Alcohol? Cigarettes? I have",
+ "after": "you have drugs? Alcohol? Cigarettes? I have"
+ },
+ {
+ "before": "Not by mom or dad",
+ "after": "Not by Mom or Dad"
}
],
"https://www.parahumans.net/2019/11/09/interlude-17-y-sundown/": [
@@ -7626,6 +7838,14 @@
{
"before": "new titan",
"after": "new Titan"
+ },
+ {
+ "before": "anti parahumans",
+ "after": "anti-parahumans"
+ },
+ {
+ "before": "hit the Anti-parahumans",
+ "after": "hit the anti-parahumans"
}
],
"https://www.parahumans.net/2019/11/12/interlude-17-z-sundown/": [
@@ -7636,6 +7856,42 @@
{
"regExp": "titan(?![a-z])",
"replacement": "Titan"
+ },
+ {
+ "before": "What if you weren’t?",
+ "after": "What if you weren’t?"
+ },
+ {
+ "before": "Victor, Can you go join Tribute",
+ "after": "Victor, can you go join Tribute"
+ },
+ {
+ "before": "The forcefield above him elaborate and getting",
+ "after": "The forcefield above him was elaborate and getting"
+ },
+ {
+ "before": "Their faculties were diminished, but",
+ "after": "Their faculties were diminished, but—"
+ },
+ {
+ "before": "Flashbang, he saw was one of the",
+ "after": "Flashbang, he saw, was one of the"
+ },
+ {
+ "before": "the palm, Out each foot, and out of the mouth.lifting one",
+ "after": "the palm. Out each foot, and out of the mouth. Lifting one"
+ },
+ {
+ "before": "no eyes; only a blindfold",
+ "after": "no eyes, only a blindfold"
+ },
+ {
+ "before": "lightness of a feather landing, came to perch",
+ "after": "lightness of a feather landing, it came to perch"
+ },
+ {
+ "before": "…The only things that are",
+ "after": "…the only things that are"
}
],
"https://www.parahumans.net/2019/11/16/radiation-18-1/": [
@@ -7816,6 +8072,16 @@
{
"before": "the horrifying titan",
"after": "the horrifying Titan"
+ },
+ {
+ "before": "that stranger",
+ "after": "that Stranger"
+ }
+ ],
+ "https://www.parahumans.net/2020/01/07/infrared-19-4/": [
+ {
+ "before": "talked to them? or done",
+ "after": "talked to them? Or done"
}
],
"https://www.parahumans.net/2020/01/11/infrared-19-b/": [
From 5ff6621b31a000c194f46f9d9426b5af91ec78fa Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 14:44:29 -0500
Subject: [PATCH 117/186] Hyphenate a variety of words starting with "self-"
Previously we only did self-conscious; this brings along self-esteem, self-loathing, and self-harm.
---
lib/convert-worker.js | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 1cd668f..bf9c938 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -627,7 +627,7 @@ function fixHyphens(xml) {
xml = xml.replace(/creepy crawl/g, "creepy-crawl");
xml = xml.replace(/well armed/g, "well-armed");
xml = xml.replace(/able bodied/g, "able-bodied");
- xml = xml.replace(/self conscious/g, "self-conscious");
+ xml = xml.replace(/self (conscious|esteem|loathing|harm)/g, "self-$1");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
xml = xml.replace(/(?<=\b)one on one(?=\b)/g, "one-on-one");
From 8a9562e10eaac3f2800559e4923b27cf903e1118 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 14:52:03 -0500
Subject: [PATCH 118/186] Hyphenate "level-headed"
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index bf9c938..86f5ca5 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -627,6 +627,7 @@ function fixHyphens(xml) {
xml = xml.replace(/creepy crawl/g, "creepy-crawl");
xml = xml.replace(/well armed/g, "well-armed");
xml = xml.replace(/able bodied/g, "able-bodied");
+ xml = xml.replace(/level headed/g, "level-headed");
xml = xml.replace(/self (conscious|esteem|loathing|harm)/g, "self-$1");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
xml = xml.replace(/(?<=\b)one on one(?=\b)/g, "one-on-one");
From 631417a53004cf274e6d71636a3b036dd603be10 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 14:55:37 -0500
Subject: [PATCH 119/186] Capitalize "Fragile One"
---
lib/convert-worker.js | 1 +
lib/substitutions.json | 20 ++++++++++++++++++++
2 files changed, 21 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 86f5ca5..1b7d4e4 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -530,6 +530,7 @@ function fixCapitalization(xml, book) {
xml = xml.replace(/Grasping self/g, "Grasping Self");
xml = xml.replace(/Cloven stranger/g, "Cloven Stranger");
xml = xml.replace(/Princess shaper/g, "Princess Shaper");
+ xml = xml.replace(/Fragile one/g, "Fragile One");
// Place names need to always be capitalized
xml = xml.replace(/North end/g, "North End");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index a9ee5de..3b81b22 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -7774,6 +7774,10 @@
{
"before": "was emotion resistant",
"after": "was emotion-resistant"
+ },
+ {
+ "before": "Thank you, fragile one",
+ "after": "Thank you, Fragile One"
}
],
"https://www.parahumans.net/2019/10/29/sundown-17-9/": [
@@ -7948,6 +7952,14 @@
{
"before": "hunched-over titan",
"after": "hunched-over Titan"
+ },
+ {
+ "before": "experience the world, fragile one",
+ "after": "experience the world, Fragile One"
+ },
+ {
+ "before": "chime in, fragile one",
+ "after": "chime in, Fragile One"
}
],
"https://www.parahumans.net/2019/12/03/radiation-18-6/": [
@@ -8152,6 +8164,14 @@
{
"before": "titan Ophion’s",
"after": "Titan Ophion’s"
+ },
+ {
+ "before": "Work with me, fragile one",
+ "after": "Work with me, Fragile One"
+ },
+ {
+ "before": "‘fragile one’",
+ "after": "‘Fragile One’"
}
],
"https://www.parahumans.net/2020/01/25/infrared-19-d/": [
From 9fc36b813fa5f8119406328d20e9cdacf1a2df12 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 14:59:08 -0500
Subject: [PATCH 120/186] Hyphenate "clear-cut"
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 1b7d4e4..1d40868 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -629,6 +629,7 @@ function fixHyphens(xml) {
xml = xml.replace(/well armed/g, "well-armed");
xml = xml.replace(/able bodied/g, "able-bodied");
xml = xml.replace(/level headed/g, "level-headed");
+ xml = xml.replace(/clear cut/g, "clear-cut");
xml = xml.replace(/self (conscious|esteem|loathing|harm)/g, "self-$1");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
xml = xml.replace(/(?<=\b)one on one(?=\b)/g, "one-on-one");
From 442d245e2d10af20b534e417243bd558d38e96b2 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 15:55:38 -0500
Subject: [PATCH 121/186] Remove more non-breaking spaces
Also normalizes after-sentence spaces to two (normal) spaces, but that's not visible to readers.
---
lib/convert-worker.js | 13 ++++++++-----
lib/substitutions.json | 13 +++++++++++--
2 files changed, 19 insertions(+), 7 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 1d40868..36b043b 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -140,11 +140,14 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/([^ ]+)<\/i>/g, "$1");
xml = xml.replace(/([^ ]+)( +)<\/i>/g, "$1$2");
- // There are way too many nonbreaking spaces where they don't belong.
- // If they show up three in a row, then let them live. Otherwise, they die.
- // Also remove any run of them after a period.
- xml = xml.replace(/([^\xA0])\xA0\xA0?([^\xA0])/g, "$1 $2");
- xml = xml.replace(/\.\x20*\xA0[\xA0\x20]*/, ". ");
+ // There are way too many nonbreaking spaces where they don't belong. If they show up three in a row, then let them
+ // live; they're maybe being used for alignment or something. Otherwise, they die.
+ //
+ // Also, normalize spaces after a period/quote mark to two (normal) spaces. The second one is invisible when
+ // rendered, but it helps future heuristics detect end of sentences.
+ xml = xml.replace(/\xA0{1,2}(?!\x20\xA0)/g, " ");
+ xml = xml.replace(/([.”])\x20*\xA0[\xA0\x20]*/g, "$1 ");
+ xml = xml.replace(/([.”])\x20{3,}/g, "$1 ");
function fixEms() {
// Fix recurring broken-up or erroneous s
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 3b81b22..d3ba79c 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -1200,6 +1200,11 @@
{
"before": "of her head, “And my",
"after": "of her head. “And my"
+ },
+ {
+ "before": "KOOROW BULLIT
\nMILK STUMPY
\nBROOTUS JOODUS
\nAXIL GINGIR",
+ "after": "KOOROW\u00A0\u00A0\u00A0BULLIT
\nMILK\u00A0\u00A0\u00A0\u00A0\u00A0\u00A0\u00A0\u00A0\u00A0\u00A0\u00A0\u00A0\u00A0\u00A0STUMPY
\nBROOTUS\u00A0\u00A0JOODUS
\nAXIL\u00A0\u00A0\u00A0\u00A0\u00A0GINGIR",
+ "_comment": "This section plays poorly with our space-normalizing heuristic."
}
],
"https://parahumans.wordpress.com/2012/03/31/interlude-8/": [
@@ -2925,6 +2930,10 @@
{
"before": "and be brought it",
"after": "and he brought it"
+ },
+ {
+ "before": "propellers One caught her",
+ "after": "propellers. One caught her"
}
],
"https://parahumans.wordpress.com/2013/03/02/scourge-19-4/": [
@@ -6569,7 +6578,7 @@
"after": "morning breath—more than morning breath—but there"
},
{
- "before": "Previous Chapter Next Chapter
\n\n\n\n\n\n\n\n\n\n\n\n",
+ "before": "Previous Chapter Next Chapter
\n\n\n\n\n\n\n\n\n\n\n\n",
"after": "
\n
",
"_comment": "This is the best way I can think of to emulate the end of chapter 'fake out' in an ebook format"
},
@@ -6596,7 +6605,7 @@
],
"https://www.parahumans.net/2019/04/02/black-13-1/": [
{
- "before": "⊙
\nPrevious Chapter Next Chapter
\n",
+ "before": "⊙
\nPrevious Chapter Next Chapter
\n",
"after": "",
"_comment": "Our usual heuristics of removing the first paragraph to remove the previous/next chapter links are broken here because of the 'go back and look at the fake out' comment at the top"
},
From 5b7ec80750554c63732fbf13f37f1c003ad3bbed Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 16:53:26 -0500
Subject: [PATCH 122/186] Spot fixes for Ward through Infrared 19.4
---
lib/convert-worker.js | 2 +-
lib/substitutions.json | 257 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 258 insertions(+), 1 deletion(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 36b043b..7e36b60 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -485,7 +485,7 @@ function fixCapitalization(xml, book) {
// This usually works in Ward (some instances corrected back in substitutions.json), and has a few false positives in
// Worm, where it is never needed:
if (book === "ward") {
- xml = xml.replace(/the patrol(?!s)/g, "the Patrol");
+ xml = xml.replace(/the patrol(?!s|ling)/g, "the Patrol");
}
// This is sometimes missing its capitalization.
diff --git a/lib/substitutions.json b/lib/substitutions.json
index d3ba79c..5fe4175 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -7933,6 +7933,30 @@
{
"regExp": "titan(?![a-z])",
"replacement": "Titan"
+ },
+ {
+ "before": "condemnation free",
+ "after": "condemnation-free"
+ },
+ {
+ "before": "eyes heavily decorate with gray",
+ "after": "eyes heavily decorated with gray"
+ },
+ {
+ "before": "Yuh!",
+ "after": "Yuh!"
+ },
+ {
+ "before": "cultured accent, “Is that us",
+ "after": "cultured accent, “is that us"
+ },
+ {
+ "before": "to force one.”",
+ "after": "to force one’.”"
+ },
+ {
+ "before": "between the Titans Pull",
+ "after": "between the Titans. Pull"
}
],
"https://www.parahumans.net/2019/11/23/radiation-18-3/": [
@@ -7943,6 +7967,18 @@
{
"before": "first look at a titan",
"after": "first look at a Titan"
+ },
+ {
+ "before": "too much fo a retreat",
+ "after": "too much of a retreat"
+ },
+ {
+ "before": "said, “after we were all",
+ "after": "said, “After we were all"
+ },
+ {
+ "before": "reconicle",
+ "after": "reconcile"
}
],
"https://www.parahumans.net/2019/11/26/radiation-18-4/": [
@@ -7969,6 +8005,60 @@
{
"before": "chime in, fragile one",
"after": "chime in, Fragile One"
+ },
+ {
+ "before": "syndicate-awareness",
+ "after": "Syndicate-awareness"
+ },
+ {
+ "before": "gray Jester Mockument was like",
+ "after": "gray jester Mockument like"
+ },
+ {
+ "before": "he halfway back",
+ "after": "he was halfway back"
+ },
+ {
+ "before": "master of biology",
+ "after": "mastery of biology"
+ },
+ {
+ "before": "for most move, or raise",
+ "after": "for most moves, or raise"
+ },
+ {
+ "before": "wasn’t hookline’s",
+ "after": "wasn’t Hookline’s"
+ },
+ {
+ "before": "Chicken little",
+ "after": "Chicken Little"
+ }
+ ],
+ "https://www.parahumans.net/2019/11/30/radiation-18-5/": [
+ {
+ "before": "jumping of a cardboard",
+ "after": "jumping off a cardboard"
+ },
+ {
+ "before": "Tattletale said. “That breastplate",
+ "after": "Tattletale said, “that breastplate"
+ },
+ {
+ "before": "they were gas damaged",
+ "after": "they were gas-damaged"
+ },
+ {
+ "before": "“skewer them to",
+ "after": "“Skewer them to"
+ },
+ {
+ "before": "…You have to conserve",
+ "after": "…you have to conserve"
+ },
+ {
+ "before": "Shorcut",
+ "after": "Shortcut"
}
],
"https://www.parahumans.net/2019/12/03/radiation-18-6/": [
@@ -7979,6 +8069,14 @@
{
"before": "bomb to a titan",
"after": "bomb to a Titan"
+ },
+ {
+ "before": "military inspired",
+ "after": "military-inspired"
+ },
+ {
+ "before": "a rough fight",
+ "after": "a rough flight"
}
],
"https://www.parahumans.net/2019/12/07/radiation-18-7/": [
@@ -8009,18 +8107,80 @@
{
"before": "end of the Summer",
"after": "end of the summer"
+ },
+ {
+ "before": "five pound phone",
+ "after": "five-pound phone"
+ },
+ {
+ "before": "He was slowly knitting itself together",
+ "after": "He was slowly knitting himself together"
+ },
+ {
+ "before": "and uh, Stables",
+ "after": "and uh, stables"
+ },
+ {
+ "before": "feel the meta tines",
+ "after": "feel the metal tines"
+ },
+ {
+ "before": "back at the Workshop",
+ "after": "back at the workshop"
+ },
+ {
+ "before": "Victoria,” My uncle",
+ "after": "Victoria,” my uncle"
+ }
+ ],
+ "https://www.parahumans.net/2019/12/10/radiation-18-8/": [
+ {
+ "before": "Parahuman Sciences",
+ "after": "parahuman sciences"
+ },
+ {
+ "before": "protocols in action.’",
+ "after": "protocols in action’."
+ },
+ {
+ "before": "“—Too close",
+ "after": "“—too close"
+ },
+ {
+ "before": "Ten-thirty-six",
+ "after": "Ten thirty-six"
+ },
+ {
+ "before": "“—He was warm and safe",
+ "after": "“—he was warm and safe"
}
],
"https://www.parahumans.net/2019/12/14/radiation-18-9/": [
{
"before": "the next titan",
"after": "the next Titan"
+ },
+ {
+ "before": "Kenzie said. Give me a minute.",
+ "after": "Kenzie said. “Give me a minute."
+ },
+ {
+ "before": "Sveta said. Especially if we",
+ "after": "Sveta said. “Especially if we"
+ },
+ {
+ "before": "from the cold, though his face",
+ "after": "from the cold; though his face"
}
],
"https://www.parahumans.net/2019/12/17/radiation-18-10/": [
{
"before": "one titan dogging me",
"after": "one Titan dogging me"
+ },
+ {
+ "before": "Capes on two rises",
+ "after": "Capes on two risers"
}
],
"https://www.parahumans.net/2019/12/21/interlude-18-z-radiation/": [
@@ -8035,6 +8195,15 @@
{
"before": "and saw her Aunt and Uncle approaching",
"after": "and saw her aunt and uncle approaching"
+ },
+ {
+ "regExp": "([Tt]he) Giantess",
+ "replacement": "$1 giantess",
+ "_comment": "14 other instances in this chapter are not capitalized"
+ },
+ {
+ "before": "had allies. connections.",
+ "after": "had allies. Connections."
}
],
"https://www.parahumans.net/2019/12/24/infrared-19-1/": [
@@ -8046,6 +8215,14 @@
"regExp": "([Tt]he) stranger",
"replacement": "$1 Stranger",
"_comment": "See convert-worker.js; this corrects an over-correction"
+ },
+ {
+ "before": "doing the damage herself",
+ "after": "doing the damage themselves"
+ },
+ {
+ "before": "This? This",
+ "after": "This? This"
}
],
"https://www.parahumans.net/2019/12/28/infrared-19-2/": [
@@ -8069,6 +8246,46 @@
{
"before": "because the titan was",
"after": "because the Titan was"
+ },
+ {
+ "before": "endless flesh monster were",
+ "after": "endless flesh monsters were"
+ },
+ {
+ "before": "putting it of balance",
+ "after": "putting it off balance"
+ },
+ {
+ "before": "What lay us below",
+ "after": "What lay below us"
+ },
+ {
+ "before": "Do you have any regrets now?",
+ "after": "Do you have any regrets now?"
+ },
+ {
+ "before": "Stygean Blue",
+ "after": "stygian blue"
+ },
+ {
+ "before": "Except the feeling of breached",
+ "after": "Except the feeling of being breached"
+ },
+ {
+ "before": "best guess of it’s location",
+ "after": "best guess of its location"
+ },
+ {
+ "before": "Dream Room",
+ "after": "dream room"
+ },
+ {
+ "before": "‘EM",
+ "after": "’EM"
+ },
+ {
+ "before": "In effect, Powers were different",
+ "after": "In effect, powers were different"
}
],
"https://www.parahumans.net/2020/01/01/interlude-19-a/": [
@@ -8079,6 +8296,30 @@
{
"before": "can’t induce triggers There were parahumans",
"after": "can’t induce triggers. There were parahumans"
+ },
+ {
+ "before": "into the conversation, “You do some",
+ "after": "into the conversation, “you do some"
+ },
+ {
+ "before": "None of us do, Egg thought",
+ "after": "None of us do, Egg thought"
+ },
+ {
+ "before": "Our ‘number zero’s wife",
+ "after": "Our ‘number zero’s’ wife"
+ },
+ {
+ "before": "his voice was hard, “You took",
+ "after": "his voice was hard, “you took"
+ },
+ {
+ "before": "You don’t get to do that!",
+ "after": "You don’t get to do that!"
+ },
+ {
+ "before": "Almost a Figurehead",
+ "after": "Almost a figurehead"
}
],
"https://www.parahumans.net/2020/01/04/infrared-19-3/": [
@@ -8097,12 +8338,28 @@
{
"before": "that stranger",
"after": "that Stranger"
+ },
+ {
+ "before": "I could Strands worked to braid together",
+ "after": "I could see strands working to braid together"
+ },
+ {
+ "before": "any of the Master Protocols",
+ "after": "any of the master protocols"
}
],
"https://www.parahumans.net/2020/01/07/infrared-19-4/": [
{
"before": "talked to them? or done",
"after": "talked to them? Or done"
+ },
+ {
+ "before": "all nighters",
+ "after": "all-nighters"
+ },
+ {
+ "before": "the other cases",
+ "after": "the other Cases"
}
],
"https://www.parahumans.net/2020/01/11/infrared-19-b/": [
From 1a0780bd7b3326645e716f2d5e52fe2eac4a8ac5 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 16:55:34 -0500
Subject: [PATCH 123/186] Capitalize "Aunt Sarah"
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 7e36b60..136db35 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -554,6 +554,9 @@ function fixCapitalization(xml, book) {
xml = xml.replace(/(?
Date: Sat, 19 Dec 2020 17:03:33 -0500
Subject: [PATCH 124/186] Fix hyphenation for high five and fist bump
---
lib/convert-worker.js | 10 +++++++---
lib/substitutions.json | 14 ++++++++++++++
2 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 136db35..db7eedc 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -598,9 +598,6 @@ function fixMispellings(xml) {
// This is commonly misspelled.
xml = xml.replace(/([Ss])houlderblade/g, "$1houlder blade");
- // Preemptive(ly) is often hyphenated (not always). It should not be.
- xml = xml.replace(/([Pp])re-emptive/g, "$1reemptive");
-
// All dictionaries agree this is capitalized.
xml = xml.replace(/u-turn/g, "U-turn");
@@ -640,6 +637,13 @@ function fixHyphens(xml) {
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
xml = xml.replace(/(?<=\b)one on one(?=\b)/g, "one-on-one");
+ // Preemptive(ly) is often hyphenated (not always). It should not be.
+ xml = xml.replace(/([Pp])re-emptive/g, "$1reemptive");
+
+ // These should be hyphenated only when used as a verb. We correct those cases back in substitutions.json.
+ xml = xml.replace(/fist-bump/g, "fist bump");
+ xml = xml.replace(/high-five/g, "high five");
+
// This is usually correct but sometimes wrong.
xml = xml.replace(/neo /g, "neo-");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 5fe4175..b2ff494 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -6206,6 +6206,10 @@
{
"before": "“Cradle—the most intact part of Tattletale said",
"after": "“Cradle—” the most intact part of Tattletale said"
+ },
+ {
+ "before": "backwards-high five",
+ "after": "backwards high five"
}
],
"https://www.parahumans.net/2019/01/26/blinding-11-9/": [
@@ -7381,6 +7385,11 @@
{
"before": "aunt Rachel",
"after": "Aunt Rachel"
+ },
+ {
+ "before": "for Kenzie to high five",
+ "after": "for Kenzie to high-five",
+ "_comment": "Here it's a verb"
}
],
"https://www.parahumans.net/2019/09/07/from-within-16-8/": [
@@ -8320,6 +8329,11 @@
{
"before": "Almost a Figurehead",
"after": "Almost a figurehead"
+ },
+ {
+ "before": "Can’t fist bump",
+ "after": "Can’t fist-bump",
+ "_comment": "Here it's a verb"
}
],
"https://www.parahumans.net/2020/01/04/infrared-19-3/": [
From a1c7f00b42da4450110928f4d1ad7d1b11da71ff Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 17:05:36 -0500
Subject: [PATCH 125/186] Capitalize "Machine Army"
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index db7eedc..f07db96 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -477,6 +477,9 @@ function fixCapitalization(xml, book) {
// The Speedrunners team name is missing its capitalization a couple times.
xml = xml.replace(/speedrunners/g, "Speedrunners");
+ // The Machine Army is missing its capitalization a couple times.
+ xml = xml.replace(/machine army/g, "Machine Army");
+
// "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". "patrol
// group" is always lowercased. It seems like "Patrol" is a proper name, and is used as a capitalized modifier in
// other contexts (e.g. Patrol leader). So let's standardize on "Patrol ".
From aa9fc197e9706cae406e78d6bfa411c6e31a8cb5 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 17:11:34 -0500
Subject: [PATCH 126/186] Hyphenate "hand-to-hand"
---
lib/convert-worker.js | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index f07db96..4faeb16 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -647,6 +647,11 @@ function fixHyphens(xml) {
xml = xml.replace(/fist-bump/g, "fist bump");
xml = xml.replace(/high-five/g, "high five");
+ // This should be hyphenated when used as an adjective (instead of an adverb or noun). I.e. it should be
+ // "hand-to-hand combat", but "passed from hand to hand", and "capable in hand to hand". The following heuristic works
+ // in the books.
+ xml = xml.replace(/hand to hand(?= [a-z])/g, "hand-to-hand");
+
// This is usually correct but sometimes wrong.
xml = xml.replace(/neo /g, "neo-");
From 6ddde0681720b43af7e65b6c15cb6223b70cff34 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 17:16:05 -0500
Subject: [PATCH 127/186] De-capitalize "parahumans"
---
lib/substitutions.json | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index b2ff494..6e1df8c 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -124,6 +124,10 @@
{
"before": "Really dad",
"after": "Really Dad"
+ },
+ {
+ "before": "The Parahumans wiki",
+ "after": "The parahumans wiki"
}
],
"https://parahumans.wordpress.com/2011/08/09/agitation-3-1/": [
@@ -2529,6 +2533,10 @@
{
"before": "weeks, months. Anticipating",
"after": "weeks, months. Anticipating"
+ },
+ {
+ "before": "as much or more about Parahumans than",
+ "after": "as much or more about parahumans than"
}
],
"https://parahumans.wordpress.com/2013/01/08/migration-17-1/": [
@@ -2776,6 +2784,10 @@
{
"before": "that was why Masters tend to be",
"after": "that was why masters tend to be"
+ },
+ {
+ "before": "the top researchers on Parahumans",
+ "after": "the top researchers on parahumans"
}
],
"https://parahumans.wordpress.com/2013/02/07/interlude-18-donation-bonus-3/": [
@@ -5196,6 +5208,10 @@
{
"before": "fighting, ‘rene",
"after": "fighting, ’rene"
+ },
+ {
+ "before": "I studied Parahumans before",
+ "after": "I studied parahumans before"
}
],
"https://www.parahumans.net/2018/06/26/torch-interlude-7-x/": [
@@ -5717,6 +5733,10 @@
{
"before": "fucking teacher wouldn’t",
"after": "fucking Teacher wouldn’t"
+ },
+ {
+ "before": "That the Parahumans were taking over",
+ "after": "That the parahumans were taking over"
}
],
"https://www.parahumans.net/2018/10/13/gleaming-9-15/": [
@@ -6822,6 +6842,12 @@
"after": "Instead a 1, −2, 4"
}
],
+ "https://www.parahumans.net/2019/05/21/breaking-14-2/": [
+ {
+ "before": "if Parahumans like Swansong",
+ "after": "if parahumans like Swansong"
+ }
+ ],
"https://www.parahumans.net/2019/05/25/breaking-14-3/": [
{
"before": "actively tamper with it it",
@@ -8334,6 +8360,10 @@
"before": "Can’t fist bump",
"after": "Can’t fist-bump",
"_comment": "Here it's a verb"
+ },
+ {
+ "before": "that other Parahumans are",
+ "after": "that other parahumans are"
}
],
"https://www.parahumans.net/2020/01/04/infrared-19-3/": [
From 877beda7332b74570b653873d3b07ef61481392e Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 19 Dec 2020 17:21:59 -0500
Subject: [PATCH 128/186] 4.8.0
---
npm-shrinkwrap.json | 2 +-
package.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json
index b997681..748555a 100644
--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
@@ -1,6 +1,6 @@
{
"name": "worm-scraper",
- "version": "4.7.0",
+ "version": "4.8.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
diff --git a/package.json b/package.json
index 80f362f..7437398 100644
--- a/package.json
+++ b/package.json
@@ -8,7 +8,7 @@
"parahuman",
"scraper"
],
- "version": "4.7.0",
+ "version": "4.8.0",
"author": "Domenic Denicola (https://domenic.me/)",
"license": "WTFPL",
"repository": "domenic/worm-scraper",
From 6f51bc6c9a7296c8b2790eecf8b02e4d7bfcdfb6 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Thu, 24 Dec 2020 21:22:37 -0500
Subject: [PATCH 129/186] Spot fixes for Ward through Infrared 19.8
---
lib/convert-worker.js | 2 +
lib/substitutions.json | 106 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 108 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 4faeb16..774a772 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -424,6 +424,8 @@ function enDashJointNames(xml) {
xml = xml.replace(/Crawler-Breed/g, "Crawler–Breed");
xml = xml.replace(/Simurgh-Myrddin-plant/g, "Simurgh–Myrddin–plant");
xml = xml.replace(/Armsmaster-Defiant/g, "Armsmaster–Defiant");
+ xml = xml.replace(/Matryoshka-Valentin/g, "Matryoshka–Valentin");
+ xml = xml.replace(/Gaea-Eden/g, "Gaea–Eden");
xml = xml.replace(/([Aa])gent-parahuman/g, "$1gent–parahuman");
xml = xml.replace(/([Pp])arahuman-agent/g, "$1arahuman–agent");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 6e1df8c..1ffed4a 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -7950,6 +7950,10 @@
{
"regExp": "titan(?![a-z])",
"replacement": "Titan"
+ },
+ {
+ "before": "the Fallen Raid or the Prison?",
+ "after": "the Fallen raid or the prison?"
}
],
"https://www.parahumans.net/2019/11/19/radiation-18-2/": [
@@ -8446,6 +8450,14 @@
{
"before": "can understand your Aunt and Uncle",
"after": "can understand your aunt and uncle"
+ },
+ {
+ "before": "—And I think I have a grasp",
+ "after": "—and I think I have a grasp"
+ },
+ {
+ "before": "emotion driven",
+ "after": "emotion-driven"
}
],
"https://www.parahumans.net/2020/01/14/infrared-19-c/": [
@@ -8464,6 +8476,24 @@
{
"before": "—die quickly, if he has to die.",
"after": "—die quickly, if he has to die."
+ },
+ {
+ "before": "close mouthed",
+ "after": "close-mouthed"
+ },
+ {
+ "before": "“…Don’t really trust",
+ "after": "“…don’t really trust"
+ }
+ ],
+ "https://www.parahumans.net/2020/01/18/infrared-19-5/": [
+ {
+ "before": "liqud Titan",
+ "after": "liquid Titan"
+ },
+ {
+ "before": "done at the Prison",
+ "after": "done at the prison"
}
],
"https://www.parahumans.net/2020/01/21/infrared-19-6/": [
@@ -8482,24 +8512,96 @@
{
"before": "‘fragile one’",
"after": "‘Fragile One’"
+ },
+ {
+ "before": "“—But there won’t be any effects",
+ "after": "“—but there won’t be any effects"
+ },
+ {
+ "before": "One of his Titans lurked",
+ "after": "One of his giants lurked"
+ },
+ {
+ "before": "furously",
+ "after": "furiously"
+ },
+ {
+ "before": "what I’d looked, hadn’t registered",
+ "after": "what I’d looked at, hadn’t registered"
+ },
+ {
+ "before": "Mother Titan",
+ "after": "Mother Giant"
+ },
+ {
+ "before": "Adrenline",
+ "after": "Adrenaline"
+ },
+ {
+ "before": "ex-Prison",
+ "after": "ex-prison"
+ },
+ {
+ "before": "Chris and the Syringe",
+ "after": "Chris and the syringe"
+ },
+ {
+ "before": "“…She might have gotten",
+ "after": "“…she might have gotten"
+ },
+ {
+ "before": "“—It was Jessica accepting",
+ "after": "“—it was Jessica accepting"
+ },
+ {
+ "before": "We saw people running around.",
+ "after": "We saw people running around.”"
}
],
"https://www.parahumans.net/2020/01/25/infrared-19-d/": [
{
"before": "look after mom",
"after": "look after Mom"
+ },
+ {
+ "before": "pink-a line of red-pink",
+ "after": "pink—a line of red—pink"
+ },
+ {
+ "before": "recruited for busy work",
+ "after": "recruited for busywork"
+ },
+ {
+ "before": "Steamwheel went Rogue",
+ "after": "Steamwheel went rogue"
+ },
+ {
+ "before": "mother said. Such a horrible",
+ "after": "mother said. “Such a horrible"
}
],
"https://www.parahumans.net/2020/01/28/infrared-19-e/": [
{
"before": "so mom doesn’t end up alone",
"after": "so Mom doesn’t end up alone"
+ },
+ {
+ "before": "“No!” she was almost drowned out",
+ "after": "“No!” She was almost drowned out"
}
],
"https://www.parahumans.net/2020/02/02/infrared-19-7/": [
{
"before": "I really wasn’t. really",
"after": "I really wasn’t. Really"
+ },
+ {
+ "before": "‘I have that and ‘I don’t want it’",
+ "after": "‘I have that and I don’t want it’"
+ },
+ {
+ "before": "number boy",
+ "after": "Number Boy"
}
],
"https://www.parahumans.net/2020/02/04/infrared-19-8/": [
@@ -8510,6 +8612,10 @@
{
"before": "relatively speaking” Number Five",
"after": "relatively speaking,” Number Five"
+ },
+ {
+ "before": "Skitter, Khepri",
+ "after": "Skitter, Khepri"
}
],
"https://www.parahumans.net/2020/02/08/infrared-19-9/": [
From bf8a1b325c5bcf482408770b2167537ef4134753 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Fri, 25 Dec 2020 15:42:12 -0500
Subject: [PATCH 130/186] Hyphenate self-preservation
---
lib/convert-worker.js | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 774a772..63576f7 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -628,7 +628,6 @@ function fixHyphens(xml) {
xml = xml.replace(/ haired/g, "-haired");
// These are consistently missing hyphens.
- xml = xml.replace(/self destruct/g, "self-destruct");
xml = xml.replace(/life threatening/g, "life-threatening");
xml = xml.replace(/hard headed/g, "hard-headed");
xml = xml.replace(/shoulder mounted/g, "shoulder-mounted");
@@ -638,7 +637,7 @@ function fixHyphens(xml) {
xml = xml.replace(/able bodied/g, "able-bodied");
xml = xml.replace(/level headed/g, "level-headed");
xml = xml.replace(/clear cut/g, "clear-cut");
- xml = xml.replace(/self (conscious|esteem|loathing|harm)/g, "self-$1");
+ xml = xml.replace(/self (conscious|esteem|loathing|harm|destruct|preservation)/g, "self-$2");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
xml = xml.replace(/(?<=\b)one on one(?=\b)/g, "one-on-one");
From 6256b332cbcadeedde50e1c11ed7fcbfdb516c38 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Fri, 25 Dec 2020 15:44:35 -0500
Subject: [PATCH 131/186] Apply hyphenation fixes even to capitalized phrases
---
lib/convert-worker.js | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 63576f7..9d851a6 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -628,18 +628,18 @@ function fixHyphens(xml) {
xml = xml.replace(/ haired/g, "-haired");
// These are consistently missing hyphens.
- xml = xml.replace(/life threatening/g, "life-threatening");
- xml = xml.replace(/hard headed/g, "hard-headed");
- xml = xml.replace(/shoulder mounted/g, "shoulder-mounted");
- xml = xml.replace(/golden skinned/g, "golden-skinned");
- xml = xml.replace(/creepy crawl/g, "creepy-crawl");
- xml = xml.replace(/well armed/g, "well-armed");
- xml = xml.replace(/able bodied/g, "able-bodied");
- xml = xml.replace(/level headed/g, "level-headed");
- xml = xml.replace(/clear cut/g, "clear-cut");
- xml = xml.replace(/self (conscious|esteem|loathing|harm|destruct|preservation)/g, "self-$2");
+ xml = xml.replace(/([Ll]ife) threatening/g, "life-threatening");
+ xml = xml.replace(/([Hh]ard) headed/g, "$1-headed");
+ xml = xml.replace(/([Ss]houlder) mounted/g, "$1-mounted");
+ xml = xml.replace(/([Gg]olden) skinned/g, "$1-skinned");
+ xml = xml.replace(/([Cc]reepy) crawl/g, "$1-crawl");
+ xml = xml.replace(/([Ww]ell) armed/g, "$1-armed");
+ xml = xml.replace(/([Aa]ble) bodied/g, "$1-bodied");
+ xml = xml.replace(/([Ll]evel) headed/g, "$1-headed");
+ xml = xml.replace(/([Cc]lear) cut/g, "$1-cut");
+ xml = xml.replace(/([Ss]elf) (conscious|esteem|loathing|harm|destruct|preservation)/g, "$1-$2");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
- xml = xml.replace(/(?<=\b)one on one(?=\b)/g, "one-on-one");
+ xml = xml.replace(/(?<=\b)([Oo]ne) on one(?=\b)/g, "$1-on-one");
// Preemptive(ly) is often hyphenated (not always). It should not be.
xml = xml.replace(/([Pp])re-emptive/g, "$1reemptive");
From 294cbb2e718c596aecf3d4064cdce2852d196332 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Fri, 25 Dec 2020 15:46:36 -0500
Subject: [PATCH 132/186] Hyphenate "vat-grown"
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 9d851a6..1b49329 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -637,6 +637,7 @@ function fixHyphens(xml) {
xml = xml.replace(/([Aa]ble) bodied/g, "$1-bodied");
xml = xml.replace(/([Ll]evel) headed/g, "$1-headed");
xml = xml.replace(/([Cc]lear) cut/g, "$1-cut");
+ xml = xml.replace(/([Vv]at) grown/g, "$1-grown");
xml = xml.replace(/([Ss]elf) (conscious|esteem|loathing|harm|destruct|preservation)/g, "$1-$2");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
xml = xml.replace(/(?<=\b)([Oo]ne) on one(?=\b)/g, "$1-on-one");
From 44f2cc3c7beef87c4734bd62168d956a33d89c04 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Fri, 25 Dec 2020 15:48:00 -0500
Subject: [PATCH 133/186] Hyphenate "shell-shocked"
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 1b49329..30add8e 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -638,6 +638,7 @@ function fixHyphens(xml) {
xml = xml.replace(/([Ll]evel) headed/g, "$1-headed");
xml = xml.replace(/([Cc]lear) cut/g, "$1-cut");
xml = xml.replace(/([Vv]at) grown/g, "$1-grown");
+ xml = xml.replace(/([Ss]hell) shocked/g, "$1-shocked");
xml = xml.replace(/([Ss]elf) (conscious|esteem|loathing|harm|destruct|preservation)/g, "$1-$2");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
xml = xml.replace(/(?<=\b)([Oo]ne) on one(?=\b)/g, "$1-on-one");
From 651944b4da0c80a6fd0923165fedcd98c28eea4b Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Fri, 25 Dec 2020 15:56:24 -0500
Subject: [PATCH 134/186] Hyphenate "second-guess" and derivatives, when
appropriate
---
lib/convert-worker.js | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 30add8e..11ed960 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -655,6 +655,10 @@ function fixHyphens(xml) {
// in the books.
xml = xml.replace(/hand to hand(?= [a-z])/g, "hand-to-hand");
+ // This is usually wrong but sometimes correct. The lookarounds avoid specific cases where it's referring to an actual
+ // second in a series of guesses.
+ xml = xml.replace(/(?
Date: Fri, 25 Dec 2020 15:59:20 -0500
Subject: [PATCH 135/186] Hyphenate "built-in" when appropriate
---
lib/substitutions.json | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 1ffed4a..d8de61a 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -4422,6 +4422,10 @@
{
"before": "How do you even think rec—how do you think",
"after": "How do you even think rec— How do you think"
+ },
+ {
+ "before": "built in protections",
+ "after": "built-in protections"
}
],
"https://www.parahumans.net/2017/12/01/daybreak-1-8/": [
@@ -4494,6 +4498,10 @@
{
"before": "She huffed out out a small",
"after": "She huffed out a small"
+ },
+ {
+ "before": "a built in glare",
+ "after": "a built-in glare"
}
],
"https://www.parahumans.net/2017/12/30/flare-2-7/": [
@@ -6662,6 +6670,12 @@
"after": "“It’s winter,”"
}
],
+ "https://www.parahumans.net/2019/04/05/black-13-2/": [
+ {
+ "before": "built in protection",
+ "after": "built-in protection"
+ }
+ ],
"https://www.parahumans.net/2019/04/09/black-13-3/": [
{
"before": "you have have surmised",
@@ -8616,6 +8630,10 @@
{
"before": "Skitter, Khepri",
"after": "Skitter, Khepri"
+ },
+ {
+ "before": "built in spears",
+ "after": "built-in spears"
}
],
"https://www.parahumans.net/2020/02/08/infrared-19-9/": [
From e1b59994f8d8b0ec392b73dcbea5977cc21ab22e Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Fri, 25 Dec 2020 16:01:21 -0500
Subject: [PATCH 136/186] Capitalize "Uncle Neil" and "Aunt Fleur"
---
lib/convert-worker.js | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 11ed960..2c11cd1 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -559,8 +559,10 @@ function fixCapitalization(xml, book) {
xml = xml.replace(/(?
Date: Fri, 25 Dec 2020 16:03:23 -0500
Subject: [PATCH 137/186] Hyphenate "dog-tired"
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 2c11cd1..8506a0f 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -641,6 +641,7 @@ function fixHyphens(xml) {
xml = xml.replace(/([Cc]lear) cut/g, "$1-cut");
xml = xml.replace(/([Vv]at) grown/g, "$1-grown");
xml = xml.replace(/([Ss]hell) shocked/g, "$1-shocked");
+ xml = xml.replace(/([Dd]og) tired/g, "$1-tired");
xml = xml.replace(/([Ss]elf) (conscious|esteem|loathing|harm|destruct|preservation)/g, "$1-$2");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
xml = xml.replace(/(?<=\b)([Oo]ne) on one(?=\b)/g, "$1-on-one");
From 369714f3d15dac7e5b403cadf085caafc1989a4e Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Fri, 25 Dec 2020 17:32:28 -0500
Subject: [PATCH 138/186] Spot fixes for Ward through Infrared 19.10
---
lib/substitutions.json | 52 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 52 insertions(+)
diff --git a/lib/substitutions.json b/lib/substitutions.json
index d8de61a..92f7ff9 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -3949,6 +3949,10 @@
{
"before": "this,” I said. “I",
"after": "this,” I said, “I"
+ },
+ {
+ "before": "Second Triggers",
+ "after": "Second triggers"
}
],
"https://parahumans.wordpress.com/2013/10/05/venom-29-8/": [
@@ -8652,6 +8656,22 @@
{
"before": "my Uncle said",
"after": "my uncle said"
+ },
+ {
+ "before": "Anywhere else?",
+ "after": "Anywhere else?"
+ },
+ {
+ "before": "do you understand about Trigger events",
+ "after": "do you understand about trigger events"
+ },
+ {
+ "before": "good day,” my mother said, “The best",
+ "after": "good day,” my mother said, “the best"
+ },
+ {
+ "before": "happen to you,” My mother said",
+ "after": "happen to you,” my mother said"
}
],
"https://www.parahumans.net/2020/02/11/infrared-19-f/": [
@@ -8662,6 +8682,30 @@
{
"before": "while mom did her own thing",
"after": "while Mom did her own thing"
+ },
+ {
+ "before": "still hadn’t been repeated from the fights",
+ "after": "still hadn’t been repaired from the fights"
+ },
+ {
+ "before": "“Hey!” the girl turned",
+ "after": "“Hey!” The girl turned"
+ },
+ {
+ "before": "Lookout, How long do you need",
+ "after": "Lookout, how long do you need"
+ },
+ {
+ "before": "You guy should know",
+ "after": "You guys should know"
+ },
+ {
+ "before": "he’d gotten more of dad",
+ "after": "he’d gotten more of Dad"
+ },
+ {
+ "before": "Its Capricorn",
+ "after": "It’s Capricorn"
}
],
"https://www.parahumans.net/2020/02/15/infrared-19-10/": [
@@ -8676,6 +8720,14 @@
{
"before": "figure out while she was gone. we’ll see who",
"after": "figure out while she was gone. We’ll see who"
+ },
+ {
+ "before": "miligram",
+ "after": "milligram"
+ },
+ {
+ "before": "same clothes that had been issues to the refugees",
+ "after": "same clothes that had been issued to the refugees"
}
],
"https://www.parahumans.net/2020/02/18/infrared-19-g/": [
From d0c23d86fd64ed9a7b80341b74cd6371304c44ee Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Fri, 25 Dec 2020 17:37:00 -0500
Subject: [PATCH 139/186] Hyphenate "a just-in-case"
---
lib/convert-worker.js | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 8506a0f..b7e4760 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -662,6 +662,10 @@ function fixHyphens(xml) {
// second in a series of guesses.
xml = xml.replace(/(?
Date: Fri, 25 Dec 2020 17:45:48 -0500
Subject: [PATCH 140/186] De-capitalize judo, aikido, karate, and tae kwon do
---
lib/convert-worker.js | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index b7e4760..0e3bcda 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -496,9 +496,16 @@ function fixCapitalization(xml, book) {
// This is sometimes missing its capitalization.
xml = xml.replace(/the birdcage/g, "the Birdcage");
- // There's no reason why these should be capitalized. (Note that they never appear at the beginning of any sentences.)
- xml = xml.replace(/Halberd/g, "halberd");
- xml = xml.replace(/Loft/g, "loft");
+ // There's no reason why these should be capitalized.
+ xml = xml.replace(/(?
Date: Fri, 25 Dec 2020 17:53:31 -0500
Subject: [PATCH 141/186] Hyphenate "face-to-face" when appropriate
---
lib/convert-worker.js | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 0e3bcda..1e6c0df 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -673,6 +673,11 @@ function fixHyphens(xml) {
// noun cases are missing one or both hyphens.
xml = xml.replace(/([Aa]) just[ -]in case/g, "$1 just-in-case");
+ // When used as an adjective, it's hyphenated. It turns out most cases are as an adverb, so we go with this approach:
+ xml = xml.replace(
+ /face to face(?= meeting| hang-out| interaction| contact| conversation| confrontation| fight)/g,
+ "face-to-face");
+
// This is usually correct but sometimes wrong.
xml = xml.replace(/neo /g, "neo-");
From 26a2b9c9b54c58f7600c452105231a52c6a6870c Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Fri, 25 Dec 2020 18:07:06 -0500
Subject: [PATCH 142/186] Hyphenate "fight or flight" when appropriate
---
lib/convert-worker.js | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 1e6c0df..b7904bf 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -678,6 +678,9 @@ function fixHyphens(xml) {
/face to face(?= meeting| hang-out| interaction| contact| conversation| confrontation| fight)/g,
"face-to-face");
+ // When used as an adjective, it's hyphenated. This heuristic works in the books.
+ xml = xml.replace(/fight or flight(?= [a-z])/g, "fight-or-flight");
+
// This is usually correct but sometimes wrong.
xml = xml.replace(/neo /g, "neo-");
From b652e3812bdb0965cffcf604dfb6e1c7f2e78445 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 26 Dec 2020 15:09:14 -0500
Subject: [PATCH 143/186] De-capitalize "flock" to match prevailing usage
---
lib/convert-worker.js | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index b7904bf..1e3558b 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -471,7 +471,7 @@ function fixCapitalization(xml, book) {
// This occurs enough times it's better to do here than in one-off fixes. We correct the single instance where
// it's incorrect to capitalize in the one-off fixes.
// Note that Ward contains much talk of "the clairvoyants", so we don't want to capitalize plurals.
- xml = xml.replace(/([Tt])he clairvoyant([^s])/g, "$1he Clairvoyant$2");
+ xml = xml.replace(/([Tt])he clairvoyant(?!s)/g, "$1he Clairvoyant");
// ReSound's name is sometimes miscapitalized. The word is never used in a non-name context.
xml = xml.replace(/Resound/g, "ReSound");
@@ -518,6 +518,11 @@ function fixCapitalization(xml, book) {
xml = xml.replace(/Radiata/g, "radiata");
xml = xml.replace(/Gemma/g, "gemma");
+ // We de-capitalize Valkyrie's "flock", since most uses are de-capitalized (e.g. the many instances in Gleaming
+ // Interlude 9, or Dying 15.z). This is a bit surprising; it seems like an organization name. But I guess it's
+ // informal.
+ xml = xml.replace(/(?
Date: Sat, 26 Dec 2020 15:35:31 -0500
Subject: [PATCH 144/186] De-capitalize "giants" but capitalize "X Giant"
---
lib/convert-worker.js | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 1e3558b..390deac 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -497,23 +497,23 @@ function fixCapitalization(xml, book) {
xml = xml.replace(/the birdcage/g, "the Birdcage");
// There's no reason why these should be capitalized.
- xml = xml.replace(/(?)Halberd/g, "halberd");
+ xml = xml.replace(/(?)Loft/g, "loft");
// These are treated as common nouns and not traditionally capitalized. "Krav Maga" remains capitalized,
// interestingly (according to dictionaries and Wikipedia).
- xml = xml.replace(/(?)Judo/g, "judo");
+ xml = xml.replace(/(?)Aikido/g, "aikido");
+ xml = xml.replace(/(?)Karate/g, "karate");
+ xml = xml.replace(/(?)Tae Kwon Do/g, "tae kwon do");
// There's no reason why university should be capitalized in most contexts, although sometimes it's used as part of
// a compound noun or at the beginning of a sentence.
- xml = xml.replace(/(?|Cornell |Nilles )University(?! Road)/, "university");
// Organ names (e.g. brain, arm) or scientific names are not capitalized, so the "corona pollentia" and friends should
// not be either. The books are inconsistent.
- xml = xml.replace(/(?|-)Corona/g, "corona");
xml = xml.replace(/Pollentia/g, "pollentia");
xml = xml.replace(/Radiata/g, "radiata");
xml = xml.replace(/Gemma/g, "gemma");
@@ -521,7 +521,7 @@ function fixCapitalization(xml, book) {
// We de-capitalize Valkyrie's "flock", since most uses are de-capitalized (e.g. the many instances in Gleaming
// Interlude 9, or Dying 15.z). This is a bit surprising; it seems like an organization name. But I guess it's
// informal.
- xml = xml.replace(/(?)Flock/g, "flock");
// Especially early in Worm, PRT designations are capitalized; they should not be. This fixes the cases where we
// can be reasonably sure they don't start a sentence, although more specific instances are done in
@@ -533,7 +533,7 @@ function fixCapitalization(xml, book) {
// This also over-de-capitalizes "The Stranger" in Ward (a titan name). Those also get fixed in substitutions.json.
xml = xml.replace(
// eslint-disable-next-line max-len
- /(?|“|\n|: )(Mover|Shaker|Brute|Breaker|Tinker|Blaster|Thinker|Striker|Changer|Trump|Stranger|Shifter|Shaper)(?! [A-Z])/g,
+ /(?|\n|: )(Mover|Shaker|Brute|Breaker|Tinker|Blaster|Thinker|Striker|Changer|Trump|Stranger|Shifter|Shaper)(?! [A-Z])/g,
(_, designation) => designation.toLowerCase()
);
xml = xml.replace(
@@ -610,6 +610,11 @@ function fixCapitalization(xml, book) {
xml = xml.replace(/Kronos titan/g, "Kronos Titan");
}
+ // For the giants, the prevailing usage seems to be to keep the term lowercase, but capitalize when used as a name.
+ xml = xml.replace(/(?<=Mathers |Goddess )giant/g, "Giant");
+ xml = xml.replace(/mother giant/ig, "Mother Giant");
+ xml = xml.replace(/(?)Giants/g, "giants");
+
return xml;
}
From 08e0d0d9a851263873b2e49b0a12a2a443a4f805 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 26 Dec 2020 15:39:39 -0500
Subject: [PATCH 145/186] De-italicize some commas
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 390deac..2dbdb32 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -209,6 +209,7 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/,”<\/p>/g, ".”");
xml = xml.replace(/(.*),<\/p>/g, "
$1.
");
xml = xml.replace(/‘(\w+)‘(\w+)’/g, "‘$1’$2’");
+ xml = xml.replace(/([a-z]+), ([a-z]+)<\/em>/g, "$1, $2");
// Similar problems occur in Ward with and as do in Worm with s
xml = xml.replace(//g, "");
From f5f0ba8e61b51ddf1635bca81e501951ae79c4f9 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 26 Dec 2020 16:20:31 -0500
Subject: [PATCH 146/186] Spot fixes for Ward through Last 20.end
---
lib/convert-worker.js | 1 +
lib/substitutions.json | 274 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 273 insertions(+), 2 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 2dbdb32..030d689 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -660,6 +660,7 @@ function fixHyphens(xml) {
xml = xml.replace(/([Vv]at) grown/g, "$1-grown");
xml = xml.replace(/([Ss]hell) shocked/g, "$1-shocked");
xml = xml.replace(/([Dd]og) tired/g, "$1-tired");
+ xml = xml.replace(/([Nn]ightmare) filled/g, "$1-filled");
xml = xml.replace(/([Ss]elf) (conscious|esteem|loathing|harm|destruct|preservation)/g, "$1-$2");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
xml = xml.replace(/(?<=\b)([Oo]ne) on one(?=\b)/g, "$1-on-one");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 92f7ff9..991f542 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -3667,6 +3667,10 @@
{
"before": "Focus Memorize.",
"after": "Focus. Memorize."
+ },
+ {
+ "before": "The Entity slowed",
+ "after": "The entity slowed"
}
],
"https://parahumans.wordpress.com/2013/08/13/extinction-27-1/": [
@@ -8734,6 +8738,14 @@
{
"before": "Imp and the heartbroken",
"after": "Imp and the Heartbroken"
+ },
+ {
+ "before": "That’s the way it always goes I get close to people",
+ "after": "That’s the way it always goes—I get close to people"
+ },
+ {
+ "before": "The voices, a veritable crowd, was audible now",
+ "after": "The voices, a veritable crowd, were audible now"
}
],
"https://www.parahumans.net/2020/02/23/infrared-19-z/": [
@@ -8744,18 +8756,141 @@
{
"before": "pink and black titan",
"after": "pink and black Titan"
+ },
+ {
+ "before": "throguh",
+ "after": "through"
}
],
"https://www.parahumans.net/2020/02/25/last-20-1/": [
{
"before": "across this clearing. eyes, cameras",
"after": "across this clearing. Eyes, cameras"
+ },
+ {
+ "before": "monsters, and to use the phrasing he liked",
+ "after": "monsters, to use the phrasing he liked"
+ },
+ {
+ "before": "about giving short speeches. I couldn’t help",
+ "after": "about giving short speeches, I couldn’t help"
+ },
+ {
+ "before": "this Endbringer that wasn’t brutish and noisy",
+ "after": "this Endbringer wasn’t brutish and noisy"
+ },
+ {
+ "before": "like a dance with wings",
+ "after": "like a dancer with wings"
+ },
+ {
+ "before": "a gravely sound",
+ "after": "a gravelly sound"
+ },
+ {
+ "before": "trying to remain peripherally aware of.",
+ "after": "trying to remain peripherally aware."
+ },
+ {
+ "before": "The scream, the fact was",
+ "after": "The scream, the fact we"
+ },
+ {
+ "before": "The water receded before tsunami",
+ "after": "The water receded before a tsunami"
+ },
+ {
+ "before": "Stay the course!",
+ "after": "Stay the course!”"
+ }
+ ],
+ "https://www.parahumans.net/2020/02/29/last-20-2/": [
+ {
+ "before": "Go!",
+ "after": "Go!"
+ }
+ ],
+ "https://www.parahumans.net/2020/03/03/last-20-3/": [
+ {
+ "before": "Get-between the words",
+ "after": "Get——between the words"
}
],
"https://www.parahumans.net/2020/03/07/last-20-4/": [
{
"before": "peace!” Cryptid growled",
"after": "peace!” Cryptid growled"
+ },
+ {
+ "before": "I’m going crazy,” Sveta remarked. “It’s you",
+ "after": "I’m going crazy,” Sveta remarked, “it’s you"
+ }
+ ],
+ "https://www.parahumans.net/2020/03/10/last-20-5/": [
+ {
+ "before": "“…It’s slipping through our",
+ "after": "“…it’s slipping through our"
+ }
+ ],
+ "https://www.parahumans.net/2020/03/15/last-20-6/": [
+ {
+ "before": "Strikes, brutes, breakers",
+ "after": "Strikers, brutes, breakers"
+ }
+ ],
+ "https://www.parahumans.net/2020/03/17/last-20-7/": [
+ {
+ "before": "the pain in the ass heroine",
+ "after": "the pain-in-the-ass heroine"
+ },
+ {
+ "before": "manton limit",
+ "after": "Manton limit"
+ }
+ ],
+ "https://www.parahumans.net/2020/03/21/last-20-8/": [
+ {
+ "before": "For Humanity?",
+ "after": "For humanity?"
+ },
+ {
+ "before": "a plan for Endgame",
+ "after": "a plan for endgame"
+ },
+ {
+ "before": "Saturday Morning cartoon",
+ "after": "Saturday-morning cartoon",
+ "_comment": "https://en.wikipedia.org/wiki/Saturday-morning_cartoon"
+ },
+ {
+ "before": "Simurgh driven future",
+ "after": "Simurgh-driven future"
+ },
+ {
+ "before": "the Entities",
+ "after": "the entities"
+ }
+ ],
+ "https://www.parahumans.net/2020/03/25/last-20-9/": [
+ {
+ "before": "you could help miss—help Antares",
+ "after": "you could help Miss—help Antares"
+ },
+ {
+ "before": "Did I scare him?",
+ "after": "Did I scare him?"
+ },
+ {
+ "regExp": "nurse Leah",
+ "replacement": "Nurse Leah"
+ },
+ {
+ "before": "“…But I have to draw",
+ "after": "“…but I have to draw"
+ },
+ {
+ "before": "break Capricorn’s parents hearts",
+ "after": "break Capricorn’s parents’ hearts"
}
],
"https://www.parahumans.net/2020/03/31/last-20-10/": [
@@ -8764,32 +8899,139 @@
"after": "shaping it as it rolled out. It became a circular"
}
],
+ "https://www.parahumans.net/2020/04/04/last-20-b/": [
+ {
+ "before": "Aurora Borealis",
+ "after": "aurora borealis"
+ },
+ {
+ "before": "the snow fall and",
+ "after": "the snowfall and"
+ },
+ {
+ "before": "the more boy crazy girls",
+ "after": "the more boy-crazy girls"
+ },
+ {
+ "before": "hostage with us, too!",
+ "after": "hostage with us, too!"
+ },
+ {
+ "before": "done, prez",
+ "after": "done, Prez"
+ },
+ {
+ "before": "The—-bye.",
+ "after": "The——bye."
+ },
+ {
+ "before": "who would gave gone after",
+ "after": "who would have gone after"
+ },
+ {
+ "before": "a problem?” Her dad asked",
+ "after": "a problem?” her dad asked"
+ },
+ {
+ "before": "her fathers hand",
+ "after": "her father’s hand"
+ }
+ ],
"https://www.parahumans.net/2020/04/07/last-20-11/": [
{
- "before": "had harangued the P.R.T., even",
- "after": "had harangued the PRT, even"
+ "before": "had harangued the P.R.T., even though the Patrol had",
+ "after": "had harangued the PRT, even though the patrol had"
},
{
"before": "“Actually,” Tattletale said. “I’ve been going",
"after": "“Actually,” Tattletale said, “I’ve been going"
+ },
+ {
+ "before": "It’s not hell either",
+ "after": "It’s not Hell either"
+ },
+ {
+ "before": "damocles",
+ "after": "Damocles"
+ }
+ ],
+ "https://www.parahumans.net/2020/04/11/last-20-e1/": [
+ {
+ "before": "newly formed Protectorate",
+ "after": "newly-formed Protectorate"
+ },
+ {
+ "before": "looked out over the city, and feeling shock",
+ "after": "looked out over the city, and felt shock"
+ }
+ ],
+ "https://www.parahumans.net/2020/04/14/last-20-e2/": [
+ {
+ "before": "a few years where father felt like",
+ "after": "a few years where Father felt like"
+ },
+ {
+ "before": "“—And the escaped prisoners",
+ "after": "“—and the escaped prisoners"
+ },
+ {
+ "before": "case by case basis",
+ "after": "case-by-case basis"
}
],
"https://www.parahumans.net/2020/04/18/last-20-e3/": [
{
"before": "madness inducing years of isolation",
"after": "madness-inducing years of isolation"
+ },
+ {
+ "before": "Finale! Now!",
+ "after": "Finale! Now!"
}
],
"https://www.parahumans.net/2020/04/21/last-20-e4/": [
{
"before": "grown around the the maille sheath",
"after": "grown around the maille sheath"
+ },
+ {
+ "before": "the size of a deserted earth",
+ "after": "the size of a deserted Earth"
}
],
"https://www.parahumans.net/2020/04/25/last-20-e5/": [
{
"before": "She has has some past therapy",
"after": "She has had some past therapy"
+ },
+ {
+ "before": "It was a sunny spring",
+ "after": "
It was a sunny spring"
+ },
+ {
+ "before": "living like a king.
",
+ "after": "living like a king."
+ },
+ {
+ "before": "dont’",
+ "after": "don’t"
+ },
+ {
+ "before": "its a nightmare I know",
+ "after": "it’s a nightmare I know"
+ },
+ {
+ "before": "what to do.”\n“Imp was saying Lookout",
+ "after": "what to do.
\n“Imp was saying Lookout",
+ "_comment": "https://english.stackexchange.com/q/2288/242244"
+ },
+ {
+ "before": "listening, lost in tinkering—
",
+ "after": "listening, lost in tinkering—”"
+ },
+ {
+ "before": ":Transmit
\nI’ve got",
+ "after": ":Transmit
I’ve got"
}
],
"https://www.parahumans.net/2020/04/28/last-20-e6/": [
@@ -8812,6 +9054,22 @@
{
"before": "Back to mom",
"after": "Back to Mom"
+ },
+ {
+ "before": "Gold morning",
+ "after": "Gold Morning"
+ },
+ {
+ "before": "pay it safe",
+ "after": "play it safe"
+ },
+ {
+ "before": "“—You’ll want to pass on",
+ "after": "“—you’ll want to pass on"
+ },
+ {
+ "before": "cross the Ocean",
+ "after": "cross the ocean"
}
],
"https://www.parahumans.net/?p=3365&preview=true": [
@@ -8846,6 +9104,18 @@
{
"before": "with black slacks. his hair was chin",
"after": "with black slacks. His hair was chin"
+ },
+ {
+ "before": "human right’s abuses",
+ "after": "human rights abuses"
+ },
+ {
+ "before": "‘us‘",
+ "after": "‘us’"
+ },
+ {
+ "before": "captain Gaile",
+ "after": "Captain Gaile"
}
]
}
From 0b4af123ab760b0d8c72a4e0c5ddd4391a71477b Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 26 Dec 2020 16:23:03 -0500
Subject: [PATCH 147/186] Hyphenate "one-sided"
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 030d689..24d6b56 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -661,6 +661,7 @@ function fixHyphens(xml) {
xml = xml.replace(/([Ss]hell) shocked/g, "$1-shocked");
xml = xml.replace(/([Dd]og) tired/g, "$1-tired");
xml = xml.replace(/([Nn]ightmare) filled/g, "$1-filled");
+ xml = xml.replace(/([Oo]ne) sided/g, "$1-sided");
xml = xml.replace(/([Ss]elf) (conscious|esteem|loathing|harm|destruct|preservation)/g, "$1-$2");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
xml = xml.replace(/(?<=\b)([Oo]ne) on one(?=\b)/g, "$1-on-one");
From f3366e834690869fc59611fa7b176761b883983e Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 26 Dec 2020 16:28:36 -0500
Subject: [PATCH 148/186] Hyphenate "medium-sized"
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 24d6b56..3fefb13 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -662,6 +662,7 @@ function fixHyphens(xml) {
xml = xml.replace(/([Dd]og) tired/g, "$1-tired");
xml = xml.replace(/([Nn]ightmare) filled/g, "$1-filled");
xml = xml.replace(/([Oo]ne) sided/g, "$1-sided");
+ xml = xml.replace(/([Mm]edium) sized/g, "$1-sized");
xml = xml.replace(/([Ss]elf) (conscious|esteem|loathing|harm|destruct|preservation)/g, "$1-$2");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
xml = xml.replace(/(?<=\b)([Oo]ne) on one(?=\b)/g, "$1-on-one");
From 5a25df658b0322ca12cb590b7396966665b2e66c Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 26 Dec 2020 16:31:44 -0500
Subject: [PATCH 149/186] Hyphenate "teary-eyed"
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 3fefb13..a271a5a 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -663,6 +663,7 @@ function fixHyphens(xml) {
xml = xml.replace(/([Nn]ightmare) filled/g, "$1-filled");
xml = xml.replace(/([Oo]ne) sided/g, "$1-sided");
xml = xml.replace(/([Mm]edium) sized/g, "$1-sized");
+ xml = xml.replace(/([Tt]eary) eyed/g, "$1-eyed");
xml = xml.replace(/([Ss]elf) (conscious|esteem|loathing|harm|destruct|preservation)/g, "$1-$2");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
xml = xml.replace(/(?<=\b)([Oo]ne) on one(?=\b)/g, "$1-on-one");
From 89789724d176911f7a1b7f74de80c915e3c73a0b Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 26 Dec 2020 16:34:26 -0500
Subject: [PATCH 150/186] Hyphenate "worst-case scenario"
---
lib/convert-worker.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index a271a5a..5b9602d 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -664,6 +664,7 @@ function fixHyphens(xml) {
xml = xml.replace(/([Oo]ne) sided/g, "$1-sided");
xml = xml.replace(/([Mm]edium) sized/g, "$1-sized");
xml = xml.replace(/([Tt]eary) eyed/g, "$1-eyed");
+ xml = xml.replace(/([Ww]orst) case scenario/g, "$1-case scenario");
xml = xml.replace(/([Ss]elf) (conscious|esteem|loathing|harm|destruct|preservation)/g, "$1-$2");
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
xml = xml.replace(/(?<=\b)([Oo]ne) on one(?=\b)/g, "$1-on-one");
From a89414392ef6c6bc53a6b60b062c32092cd46cf7 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 26 Dec 2020 16:38:42 -0500
Subject: [PATCH 151/186] Fix some em dashes in Ward Last 20.e5
---
lib/convert-worker.js | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index 5b9602d..5b1f4a0 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -383,11 +383,12 @@ function fixEmDashes(xml) {
xml = xml.replace(/“((?:)?)-/g, "“$1—");
xml = xml.replace(/-[,.]?”/g, "—”");
xml = xml.replace(/-(!|\?)”/g, "—$1”");
- xml = xml.replace(/-[,.]?<\/em>”/g, "—”");
+ xml = xml.replace(/-[,.]?<\/([a-z]+)>”/g, "—$1>”");
xml = xml.replace(/-“/g, "—”");
xml = xml.replace(/-/g, "
—");
xml = xml.replace(/-<\/p>/g, "—
");
- xml = xml.replace(/-<\/em><\/p>/g, "—
");
+ xml = xml.replace(/-
/g, "—
");
+ xml = xml.replace(/-<\/([a-z]+)><\/p>/g, "—$1>");
xml = xml.replace(/\s?\s?–\s?\s?/g, "—");
xml = xml.replace(/-\s\s?/g, "—");
xml = xml.replace(/\s?\s-/g, "—");
From cc2db87b5843c1927c425c2b911a899179c88a00 Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 26 Dec 2020 16:55:45 -0500
Subject: [PATCH 152/186] 4.9.0
---
npm-shrinkwrap.json | 2 +-
package.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json
index 748555a..35c64ed 100644
--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
@@ -1,6 +1,6 @@
{
"name": "worm-scraper",
- "version": "4.8.0",
+ "version": "4.9.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
diff --git a/package.json b/package.json
index 7437398..d21c2be 100644
--- a/package.json
+++ b/package.json
@@ -8,7 +8,7 @@
"parahuman",
"scraper"
],
- "version": "4.8.0",
+ "version": "4.9.0",
"author": "Domenic Denicola (https://domenic.me/)",
"license": "WTFPL",
"repository": "domenic/worm-scraper",
From 28d4c6927a9873e250ad52d4564d1b7c5e813d6f Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 26 Dec 2020 17:31:16 -0500
Subject: [PATCH 153/186] Add a new cover for Worm
---
covers/worm/cover.jpg | Bin 0 -> 383259 bytes
covers/worm/cover.png | Bin 339496 -> 0 bytes
covers/worm/cover.xhtml | 2 +-
3 files changed, 1 insertion(+), 1 deletion(-)
create mode 100644 covers/worm/cover.jpg
delete mode 100644 covers/worm/cover.png
diff --git a/covers/worm/cover.jpg b/covers/worm/cover.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f4c70423e93055c223742b03c970c906b4da63bb
GIT binary patch
literal 383259
zcmbTeXH*m48wMCeKm|nU9TgN50qIH$h=70yO7BXC5F*lRKm?^mKtMp6^b(L7dZb2#
z&i>OziLy}=K&b&Y
zDdi=l{I|TJl>d(T-}}3_t{&dv9**9(WW=S!C1ogQngyr>E>TWQMfG2O>A&SN^?xF#*4N`)upT@id*-NE*H1SceI_x{7x
zYwR4HT-35(o+AR{X$|M-c5`ZEnpt>@Z0hDOFFre@|A_709t&MvNQUfw>we*OW0
z5g#L?K7EdkNlN~jlA87{J>y4yL19sGNoiS4ZC!l>qOqyDv#YzO7xlaE&*<3r1ZHw-
zdS(f`yt2Bs{%>OwzkhIebbLZMJ^K$A6@dExghi?UA93BJ;JWnRJ)rv!7u6*{N};}a
zndY`6EwkDyy0@Mzccj9vus%)9tM0feDE%78X7_&N8oQ9p(p~(2(Ef+){~fRo|G$v^
zU%>uvTr&U$YAVXjqrM3M0tiux-R;&J_x_^xLS^=9?IWz|vKR74e8p!z?1p@V)5hAB
zG?h3uoiAa-1SMK`O9l7kg3nz5ieCHPKN?;SUlq);0~h_CUG%X#p+X{OW)ROc%s!tKaEO&WMBsz0tjAG%NJ~ZdMbFT`x;7!r|MAD7G3Z2!u6Ab1oPT
zGvTz~?fvi;&+c8WxX0%9YRYvuzzv?7ytRJ;&^gotcKL>~rU#391~g%9zmbuc4^jH1
zv(Y9q(*ag6#mS5xy_$9oX+h3=fx|RG?e&)hp!i%yMKRz`;x1~Q3%(cCemS{E<@Ubi
zxkafj33zTqHRYNufx7@4FGK1N^6J5fRn0i40Z1NY%UZ7tCZh{xLZT*kL@?Dmi3LE3
zfyGa&YMFuGNxtpDEmu$Tmxj_?*7yR>*2+Oh{GMuMoap5#w5+-}*@27kJzmqO`*IB@mbkW#2OH$T`9FWlXJdmJBdZ25Uc{yBM32yapJKGgn%Zy$LN
z7#lGNLI;as6)ym+{-?5quz|+L^u%;O%egFc;^eAt$0>3KP5Y_M1NIy~8(W+PaGY)mRteR7Az!Mx@LfM{=C&o2LEAk|Z_8DU+<
z$wd0xTPrKSijlPp(i7-9+fwJAMe$W!tFEZw3Qjo?RK4S*swJE}DLX+@8QZIpC2}6Z
z9e*VCE;-gl@$`x&@0XGO6yH{*3;)3A;uN9{U!3!2>P$_X*X;LEasusirA|4_@{><}
zh4EGN$Qs=7NAMb6cCK1z*pe1uE`PN$qW1DogD1g2#KSv7V}3WU^d*iUDHf~|QXu4M
zj>sX$a&5@ubR;#tP3U(1j^PL=$(Ipqu_^7ag2ms1lUxTy#4$Ok^^KOt$34_wt7G~_
zF8{Gk@}mSOlWQswK5Z#4xwn#4>~gl&Y)Y6f2-Y61Y-PijYrdk_7HgY0bSuLQwKaIe
z1^)uiXq9yAsim(d(8bDDeaK;6(vvVsI(T9EA#tvU=b;3bS{&_q*t?L9s{A|cW5N0b
zmIS1Fq6Jq2&*#ZnF!h;%^J@jxbbfh9|Ayf1ox{G8yB7d6kh*Bb0sya1x?>|^nT}lk
zvoDKiJ4;;Wz_MNFz;GR>l*_Bu3l{|je_B*kfuY`Jym?xf!
zekA-miE{#>zWRl#GrsL7fd4!QBi>k3nfEAd4>Ghgdk9~~g)@e0+YwpMh1L`{Lm3E&
zSGZ5{;9d91O6Dbxl!}%53qY!2?q%AuxKJNJ$nswS{%OJmK;x4|Eh^_-;qK?Xd&{jC
zfJJk7`G#xVt4l3k{1pz
z4p0BVa1K2?dJr-*jBe9jp8siAV16tx($4H_4)4ZLx`PkO?F2>@
zY&jepa%r}D-{*21&&LY@q=}>tjJJO+YZjCM@|*iMkofZ)Q{PTRpP5Bdko
z1ifytfq*A?6cs%9gkcwv2Zw;wE*4%MIu}3rOPl?6&+10O%j4T36t?ar05fxtH-8L9}FJv?BX)
zvzdfK+mFK+`^b#HWEmEw&zR~=ZJb_Yx34W8|KKm#Dl`SgiVzi^^|%0F^iGh)8*0zi
zv|?MC82I;_PSh>Ymk@L&RMzuTmu(Y&3Led{G%=)viiy;cISku$7~9ZzrhJd
zteOFrH}RhGhv$;2yzu(rk%vMphQz*8w>!+%n&Lu0X#*!rzKuG4Kxj%bR_w(#5m
zRa0eWZk*+1^nD}Z`9E1p=FZ8NiM4?j0E@;xyX1nI=+hWd9TmV+XnSkSkF9p8UqWHS
z0c8Q$tIN8kSN?8&MOoC$6;t4#*0K{ql`0}W{Y{dklc*yIG97@~|$Oh-LVbb3EF
z1i($vbk8*%$UcS8O3Rts4Rsd)lU9kNa~EpbO_J5%7&M3^QkRC#?9XKkauJMJ*FVbI
zGS!LFopes#69c&1$L4?Odw&6N4md6dDWOipS9ND9^PjgW7hdh@zIHiTth0Aak}5+a
z?DYG$--~3lSF>|WU6xzt1_YMuOr2L7`yAnt#o_xdW-gK(5;W|Rz=372F7?k|-LBZn
zPndBnFq<#fFG>@Hd@UyI8|DPZbQ4T-7YH@SkIr5o9c$it`p9_hMDvsY0>i^Xp5Kd5
z9}j*7C(QP6AHlk|cq(SBS;@mq4atAvdP~q;-!1?=$;8I<@oU&%wMt6gt{+_vtIyE#
z7<}6lZ?egycE|MBDMADc-~5$9!#G4kNUPF-Z|*D&F+7{p(*KfgrQYHmJ4KpX=NxKx
zMPB!g-FeE>`!yuXPv~3Yzp2OMl=MQX1Y$j=-zHpL}`%B`WOUd|P{4fm$x
z2l~6qFtWvMiF78%$2(T~U2wUz-K*_){1awGciyHx_I|u#k}l||bccE$bqT;5=oGH*
zHQr9^D%i0MC)@W@A>&(_LDBCJmF`J47AjBLi{f}Ei{YSoUb>A-9i
zc=Iw((lE&`S+EIiQzg8be54Y1CP;yZ)|_>Z3RBU&!Of0{s4Oy0Xo15b(22gA#rhd$K%tK9N`4W--sH_0)qPEHuP0PrgN37^a9w?a3vXK4PwR9d1AI-6%L
zY&3HpW^yluZkGiQAzgBo8Kr_<&6X?=~e>k(51Z^b(I1*Fg`1If&)8`BIjcR%jEbs^h4_;Rtwf@DIZg$!Z}9TGB#*w<8&50HGOD5Fh8Utb)r8A
ziWO=fXLC<;|I*v5LGm&^B=*_)Na@#`QNc#rym}Nw#S}*1CL6ZC+gXe83Lv&b`C@fd
zR>#`u+ZLC$d0eC+uj^4~Yh|MsfC?FIvClbsM*5F5XDKux&S_z`3}mem)Z?Vqd7xVP
zIaMnPa(iW8*RzFqv=GcqaD@BAY!lA5p6(wulAW5#C#59H)1SX|8TMJhHmnY^4d%vk
zpA5|2)JJ{*o*k{qbMF=h$xQlH?mUdsO;RqT@s#vrjJL-I(Vb
ze>^5+TWJQDRhg}R+%{4_Kv=kreQ|r#0bGEkk^*wCPj+M|H{_S{Y{3YJAApBLCmd^)
z_TBuq`GxT^lav4&)#yG8ZKUfzvx|^Bl7{zhopmfg{dgFWDd4R%$(hfsRm~i%++qp7
z%T@zP-2DM7jrKA*o+X#y1OJkxP$zDBQW6SI$(S%P*@?=`#+)Pg35eg^A^QY94#d<3
zFOD7d0DQ>oBkXwl#+UJA*$ov=oZjdL3+UdW_q#(kz_1mVp&a2ds4OaPg?FXfM$Tiy
zBq6d8$=HOW9PvV8*MR|b=rnSzWZ_@{axYBKGpa|gcw^MY3e_4M6(;db{hDtOH0gAzH7zgAOT-{M^6%Ewqkm*wT&OxlpQDvjVi0#8*r9&%FZA4Dx{U
z=%&9_G0q)`D@Y|{hxPBHJMfum^^+@OJKnRI{r70=9gX8Sr+j(6EVFqDh4?2*+CEDkaVhb0{SM~$YA4`$Y7a%O<66c)
znWm*1YBf{)IRb=bQ=IawYsSz=aWS8bg2u*-MZ@-}ySlH{0$%H%x(a>#(jFE*Q(TP)
zr^SPJR%&oLQw4}5ua$BS?c0~zM?;qyt#%$$!qhj;--q4db`ms%*zO!=jpGl&Pb5F}
z{n`9dmRFOYR=phMh?Vo!%k|@|ATF|ivZ;`tRB`ch8Q}asI_9VC7P>r|0CP*Sg+ko@
zqg%tz{S~e7dgDL2c71epuUNw?#Rca*ETnqUZ_V$wRvP790E+yJNWp{&j<}uly53pd
z#NJG}QuW=Mh(^uN-{bRFRSH-Z!jw=Q7XZe}U@hzueol{kM8-zC!~V2m2Ka*-chvg#
z6=Jd__vEJn9DB5ftd?;*ve7qt5OU;}8Tu5svBa|m+_sBQuR70A0bBq&$H#Yk(Dsh7
zsKfM(E#vlNIB9Ce3R*L8RbU(2Ue!2SaRI28$zgRJ2}z~Ua))X&GEq`5#&9Y;c`t+E
zZqwQ#Po0ZWK+)XY^Jt=jZ{q2VS)Vj}54*(#5dW;z$V#nlW)A(_g3CM0ii~87zEzf=
z4xfkcOEpZ5cf$Du!dvw#NiKx6{EakjOY@C}20b5;8-B+jR;W^|lvCvtZAOTdT_|6s
zZ5o3ZgLh1~Qybs{nLLW+GD(f(OMqWzPGL`SBIFPXwq5;u{B!-s(N#hRGRJAAzf;oX
zL>#tbauuL;ps`j7ET293rieF$a|Z2ssqD&f23vr~H&I2Mg5Vz3q&@P&=0KS<>H<*5
zUk_RqxM9P`FPF3l{HY6m6kgDnNzL=h@~g>w%fD$j6Z1bCL0n`4?EYz$`{9vpiz2$V
z>)uE>Ygq6IDHohZaQMBWR=tO;5!m4aRZOcbG2ICH@z@4;9$&fQoU~xq;wvrxQ0v9X
zmd#$N>%Qd+%)U6+Bc83UDfwM6id@fHZXI{?M&Znd|>nNNTxm
z3inFI3YSv3z$>Tbf&8bHX}`#lQTww*0jn0s6DodtpXBebu}ig1<11FJ@perPX2%Kr
z#*2Fr|CfQFo)4f70vTcjYuU7w|S
zjaG>u?%yL-Gcq9>f%Ers>5bsj6%boU6^z5}UCr(ymQACk3B
z+?sb$Yv-+WK&pM4%@nC;)BQ(bC|)_gur(uDMfSoA
z^QuhHHf>c-@~zQcO9Kk^G+W#PQo&XY2$1gg<_a@?CjkO*+LsDh4)2AkqXmg@6F~}V
ztdp^1>3En((-_~{blW|DvV!!8(9bqL{4ZN16a0(kZPNuHyFDjOzq7iO#VF`rmY2_G
z^v3zxTuw1$j7`fN6BS!9r^i5aYIcaCx&Y|i4T(6^SuMq7J%)bPXD4J5Il4x{;7P+c
zYVxG!xtEO(4f^XVraEYad+oV)Nil!1(dON})2*e+#=2%pOjerP(~-zz@66ARA+Z}j
z87B6a+$^lo8D-YuQ)%u&_Ots5bo`CIAzNcg5kXztxYqWlPV@=5@of%)#@l2@tiKcP
zGj;)}QoXS#2eX4S!CfJQlHAYMB=4MOum7CPd%e$FpN_kZUs^xmb8=
z+a0(2OIsd(X;rH{SeYPN=vpQl@AJbc($8sMukQjtWqW{DL;=~e2h|Jx-(uuu8?>2~
z^EnT5C*0rNUi+Haw0^)hm6|-6VLz{Fd7oSp#yq9(XEu!v`Q(ENe?ElUZJWO)|V4HQxW{WuIW=#SYNTDv@{74GD
z0IU-{V^&((*cPK@?|D&KudL`*Z(|@AyG-txf$Qy2Pps+Aqs%Ky{hP@6K$;iC-`>%T-Tg
ztvbZr|t@l@qjNF0C@pbv&5w^c#_D#Js5qMX^!mu*j7!H_kb
zQQtU$rORG~N#*4sp2Z&b1x}o9$K5fZ_@H-_{BFP4yaYx49#qwxJrb;^9nnF_FDct<
z5$y9&m2M+Z34iYA4R7B|xSPGXIzpGpBLCQ|0lBPE)njdHJ+YG9#m_QPRrE^8&_cuW
zigV-#k$1B&raK(KTdOdeI;Ir<_`(kSNsnRD!sU@*VWGqp?Gh_Jh~FJWNubEJia0f{
zzh2pGpjratueJ)e`ok@(V(iAf6>aY?<(c^dNvS}UOWzp|0aU#rw+0B+IML^XK~CMk
zk(!#UngFl+oSdR9*;+tDE0rc(mo?#$yZ6nd4OujUP;%(*MDUjRmDx0nzt@+?-<;$=
ziRb!(Bh9c%5u8yQ&5C9`+oBBYFuu{}EB960T5w$@{Bv(6sVWH;j5cJJs-I2Ijl}w`
zwI532Bv+dlI{Lmy@@6wFspSm?s||a|DvLUW3(CmJvu|)D@;rE0tN5^t+AF*c7VK2p
zs*K}bHp~#yQGTiG)s-kQ@34s^?21s=jAI0@PMObCIGs5*z6CooWE)X6oNqAU4~8~O
zl_YWL?RVX_u+V5`+4D{fZjb%r*(ni^ORy8wvuqa`W#Ds|2L1{HN7rR3l&Sr5Txs6k
z0CxT&t{5?vcS758u4!p`YQYn!%@%6xe|CeGZbL0%ZXX)Wy>~1SR|P-hRI0WwxbiXQ
zaAx}yd;$0}Zpxv!8OlV^E%$%gbG+e;?>mkiGF$yZ&4e`?794B8q*dl-;Z%qvRx?Pu
zne;AT7W9T?ZfJGBs0=mjXUv{Lx%J`sBY`o@${dfKv2H(4{*a4`gmekesd87mJZ99{sfZgL+GVzImyj;*kAn<2JemTUq}4KNq&
z)_Sazd2mfo%KSnz9E8=wnNXF-;1r!v8IAb!RR5K0fSI{=@RTFNCA_qT!J)!8=2U}J2V3Lm}qXPwaP<758`$LV?KU^855>@t*g2n@H9k#K*t
zy~na=mI`inxd1rAeqR8#CwngdTq=B4}4+*D}O0ig2BNUxy2Tz0*8$dk4-i
zbQ^QH*3xhn`Kby~@5SS5C3R*J^>L-i$0c;%6pS0fdY@gdcFV~A(NNA1hdB{DVK%YO
z6S@GrCGTNsrB&Jq()c?=CxY@fE2lQk|{yVkf~sQ0@E*;G`>oubS#RH;EeTc
zl?C3E`!4W)`awnf)-A|)ib`%-J{YWlZE4A}vv+{{!V!41F&nW)6kV38qJ>ZEYB6m&
zkcoD9K1pK&Y<|3cgtp(3KWR4p9%%WjMz%RMRgHgm6Lpmry=j}c6SMQA^!q}zUzK<*
z=hcRN0ZE}(LU8UV&TH53;dgG@l!EYzJscLYtutBQc`D!9E_Re~<_A>-ncI}B
zRm%-e60=IIrUJ~vzg4@#?(xOs*M#qkEpqKy1m68>0iQ*w)GJpcHa9Fvc>J4wnv=Pu
zrqgHkb4ISJ$O^qSE3L)rUaF{q{d1iT_`=9@^HCIRAT|h0T&QDK7oORjkFV)2(f`8(
zH*BhoUURA16B_Wp$4UOTEW?E;nQARhJO-=K#;XHr3DkL)
z+K(wnZ15{=3(jubN4bR3!C=tcnC@`7&f36|T)NW*=x-OI++!KQgxQUhh;!ei=4RM+
z*T4isx#f&-R+Av44!jYMTvFwabk5yyEt>g<@8dP-HU8Fjk`wox{G`||=cbsyw9ELqwhK;eJSzbyA&+I`lkM2^G_85lQ@jgub*XmSVT?u3hvx!}jrap!e
zp)zLfjthYN1@~`9cs-v&JJDJHaFNRpPh^Le-kDpYKSr8cWiyYGQI+5g-+X2WiEd?25!_tF8p@hi+e-9JS$8?p4ViUF
zP-o7&0JvP{qU2CycWkI|o+IK>#Vb8aR^%B|3>{8T$UCgoy%}ag)Db@=zb$iTlXqI6&y@0N#Vd*Z%6uevz%}L3fd#
zkAkeFD9?;?b6HLPVC!ABKaq(%dlCch3biPXCa@&P|I$KUy(GA<{w{W6{{
zpCa<>-?Vix82D-X{1x1T;r(?@o>T)<>Zxmo@un!?Wn3EL(x-!G-w^xDGX$WY;3Vfz
z=~|u8?=dB+DniW(QCAI@)%KlPPf$FsL|asv!OwU(AxF*HBp&Cimx_7PYF?pIy87VT
z&d#!^+b4leCZKIF=8Z2qCbi+w-NKZLcRk!of}eKbFA`7tSBIU3xCwh@8#Xry5wtms
z71i4CnNE*NcD-NP^ws9RL{Tb&>HVM*OOoMao&E=#8qe$OTiBB6>1>&CsV~C1DOlG=2
zu?!h`nN7(ax7Qn(Q0*BcTk*c{?)bsnb0J2a%9q^g61Y2ma{|{x<@}3naLBO3MSzQd
zOVs1bekUBG0Z`}0rP_DF$TZi*{bhn@wwIa=Si9t}|#uuo3b4
zSl%$?^s597at1}Y+EMOA;9tL9SC#v&eU9rtn@10)QqR-Q-QTaC^hL%|Wfr^pyqcudUBL32Zw|T2P_8)S%wTXQJDt>o=
zDsI`!YaA0f&r86W6xn~ATi9s$Df0$(jJ5zaYhC{FUg`-6Bw#cg{KU>&Ldh0#Z=tkk
zrtcjxip)XsQZph8%FUXO1Osn34nLGAc-S@Al=Zb7x~LQs$~M)ttf#R4A|dFx5V)V%
zB`{xsu8fV!&GOo_THm&Jha(tOyb&HVv
zH$B+&vfr`iY2ZiCo%jm?^2%xE5n50oQ$UWSi(@3m3m?P3^IOBmW5y)fHxHOt{voFk
zN%PPl?M#6z@7E82*QDrJu*LTK?8?Sbf6{|B_^QHG9{D3WW+JnWlRSMulARZTkZUWG
z(*jH?_smF7aVj~>)?BxkU&g8Q3(x6((;DI-)?{@}m%TvOMLl+(UV@}Pv(6_XMNYMT
z{!@;0(gid^RMdj69ie1gUV0
zR*|&%Q#{GlyUX-M^rC+w5sABYJS3G)_oTKkb6
zfY~=WcgUzar}mt&yV!7(QrBDU1ns3fc9oG*HDNp@=qN+tqJ=-n^d%(vtm|x+KTHD#
zck@;ct)Cj$54_zp)DznC6i#m$auqf^0h>*$CcF%_;emB#3YmQA;0b}}rP~{)WO_Kf
z0|>7!Ctb~*c9-H)ElHO<-+D(0blcH-kRO)T)kNX58Wd!Gh$6N;AF0C?;1rsN5UVO?
zMJSnef%u~FsuTVDk6~kDhm;JVQ@`>=cf6->i|3&7gVxELo9`0^<}CpAh4?yXcD+ho
z0IyWt{gc^|`A-7_6ednbs=?&;)X+{tslQ~;L
z_AQk3$h9_(=w$WJU5@P26ga8bbyR#Gy{xp4cV1c#2|rkIn9
zx%x*8HF}ELf*wwLdf7*}zoG;d`9SI#)%1N2(QDeYYrr0BiLO0bvGdKXvzVQzM;4NY
z%cn5|jC@`edVZ$HpUG=1k)#I`1=575GZ8_GSB7c+0syoCY6AntGGXo9xwxxD$z}+PF-qChCN^d-;V_Su?w%QkL%w(fA((hoKD`Ru(ux#z*ZL-%bOjb17o5x*B&(`
z+4DS?@k#!3W5L*6xLE>fQ0-peFzrwz38;MD*_kD69Y2wsvZO3hi2iP91*G=Qj5!Og
zqZ-^&sIHD3cwKb-me^Lk!-*;$t-cdYu?GmBZKzOUxG;GovHaFFCvBDe18boZ9#cp$
zE0s#DJ?+mI`m|VNk0pDcN=~uh^R2ow1Dc?mn;Kvt5Zzhpw^@o}do($aQ|Fo4d1Bx%
z{PL^RtkvA&oDhPCcS?u#+YbR$dPMTB9*rR(q4;k|7N!{9;aRppTp2gHT9n{&tj$)P
zxDux!;4ba8`95f}-Q~=EwA}h-cbR4lnB-tE<+n8
z`<8Hf$#uuB2BReG4DuKW?sS=l@F73{>D%8E7FtHOsTYI}LTT&lSIzzPCI{yC`vfJo
zHs^*f%OPvlJ4I4gkxN_-P0jAnrheZV|GE$L!z>a6rgs@{woeoh)$GnbZ8`;?6w*e=i$be*Y2o
zc^|ZPr_EGNA8)b8p0LYl;q%&0%2{T<6@&Q`_qf>>4?U-O@c}r^7$L5F*4vdzznx^1
zifO++EUq%b_Hp#-)Lj2C#_ns|a_)%Fyu!f=!$@}!N_c_wTn{`P+2CV;zPe$i>s1E4
zb}I$(Y?v~7-RSXDxijK8=A%5U)=Wz*#;qR}=;?pNwuaz{FDBu_-+GAC9BUB+^J|`Q
zJ5?#?cG}dZ`eK!9i%8Mq5K5kOKW<=@jWnR^+bO?pj}t)on(T7J-$?01&%|8
z8(PA4ETZyxJ->|_J@*6qI5HrEvOk)vRWF5HBmd!(eY;P|TuMq5S8bW;Sfn_pSf(6Y
ztI{}yii8uNS=UCTvmdH?82z8F0e%67nBU9cI*UMQDX
z-m%)}L9`A5{-(J8($h`P4g>6*BZERNeqCc(x@4}0VnFL|^_FnTt*
z)_IPiR>RF^dK_I+heIX=stnI2bK;7(w`5wfrtTA;Tj#LSTgIv#M~>fVz5oQ3mi(w4
zC8?J_9ea4gMqT6@^x83u_AHAn`;+5RDaF5dMAMwKaO%M=PyAgV)wdc1-rCFfMp)jd
zo~5W;w=JP}pS_p2eF)&V0PywD=q_p84_0$!smNV@F74TUbLRwk64SlQBQ@MkS89#vj}
zbg!RbY_jrf+|q1kC}_6Q5x~*BjejJy~*^ea_pK&tFa7L9|O)aj@UbiEK@`jJZtmTe&MZrp774mA#yRr*G)C8&z`cxnIF@CZZ9sHh`$GC
zHD#m(J$O9Dp()i`CGOBVK^eJqRBTs(I>@DSUYp-G0!Nz^b1q=C67BxB!?7GaFmZE<3)jkw2UMgjT&V<2u`J
z2Mrw14fR=~!rTp9U|Q$dDx9+$
z?1_XJS2l{-0Tjg8^6s|K=^OAel^m?I%|n=?OMEGbFnR5|x8Vq0s;&_XWLcQTt6
zFy4pH8N2>j$fS;p**_#$=#Aw6wA+7jbgk92DEJAk!Xg{?77Kze=#6E@%iY^e$P7Q@
zT?yg_?AFCgidUMrE>9IoBF1T=)jt}0
zv4ubjY=8W9c8&2aPH?!DZD~i{7{h^TmqjEf_8C*SePKF_=hK6HBZ!^rn;|jP#7rFHYh$7v3W`08!LDFM@o-9$N)5Fa3ow)H#7}90
zn$W+p>D3*xHm42ckoTEts*Pt1rgZq&B_CvLtMa{6XG{MDz2nNoP13N+0TD4Lpfkdi
zbC=Mi(K_tI1&91~Dbl{C%>t$31^;L5GgW>k_eX2&)WAhWd6xFU+NTn0*O{-GVK=LmB8Z<1bSdM
zl2N;Y;0JDRa&|{Epvjd>T4p(O$8Z3Fy!Y~}6}|b6#nsz)9V3z99jBkho}lWD&H4Fi
z)K3rs>z1e`X~n8YTTa=kPuX|J>WL7-*(ij2f9M?dt?GvgbMP|+0qy5mrW?R9D-5l(
z?wnUg4nVU!v89cDdVEF??rZ!yhF*3_L@C;KmX!l~ISPHQkl!qBf(E}f6?(NW73fEb
zixsqFRo)G-cY?IHJ%~I`dtY<@W9Fz>lq7~}TV9_T9~o29!a5iUyqRZp^V8bNNPdZV
zgYn?E46+saGiw6WHcdP;uxe+u0GZQ=l-afkQ@WPn+gOkk#WQ%6`=^Je
zN)fuZx-i=Wv2kf63ip!}8(w0=_wyxsL`tG%DXzI~k1b=ddd8>z>?7uu5WIvA<1*VH
zbHtdVuyn}oQ6MATAVgs$bTxpL`DN<8GYY+k1y|UgJ6Bb0WdtYBOiRtU?C))IwFTs&
z;m@}#Ueui0?wGWiY`EEn12G~`NdGt|i&>47-;?Z@6i4YM#n1ry$86E%TLbNX5j-K6SKh-KFOJjrdL`KP8@OA?Hf`3E+d%8az1%G7_DyDI3+qE8Y7$UfbhXZ{mD
z4T#zZzkx?kDz)@~IZUBE3zp!fq_HOgt&m;QVt=S5!RxOoLZRmn!HhgZw<%lzA_3aV
zIWzj5>@$WgWag1{ebS*vu@WT__SeTY_MpGG&TETbXJ;t+KR?HJK2*&5q=dD--p`2r
zs?(FQnTTkrx0J-mKmz0bZuu+A(EU0YiKi`s^oAS9QnvE}2D7^mTZ@5#?O^%8?-EZ>
zv>P;@I98VJnSFYWh`vpn^;G5cMPQ3{hr~rl(?v^Z#O$0xM=h4A=SiCa$PB%1X!Q^)
zfG}ovI{Dh;%eq&nQfP{h>1%7?bRJN`4KRn>{c=0Sp=rZ%@}!>?YFEAYEBEr4yGtVV#`n{Y(ZM`2N
zL85hZUyURpqXb%_>&*iF?M?Gs%F;JYHW@i2Ml>%#+Ne@uzChVz4~=XdR1;eMy2=8t
z>$O$6ujrbcUxgK~jZ+<y@<;F-qnN;i?&JHKVtNH#NIgiy+$64EWe{8
zKCUW#2!ETgDavDJpMaUa{7#)A=-Ko9OH#nrTrhBlT9tW=d}oX#S{Ra-YRD56w4zlHFKm=-j%<$7RaW4#Fc({yXKXT_4-0JzIZ!jv
zig(LU{w`?+Vk}Y%x_bZ~>rM(V;Q8?5=^!+2oBGsr7|EnW<%;dAvyHQnnkbx*C>*s<
z9b9;P+~rY7JDfkv5E3{{;X{WmLP;hEcLlw?_9vFfxdtNJT6BxW{^d_`i_xt&1B(4i
z^K=!xU1v--ZQnvcxV3Jsml!V
zk;Cgfx@0b%;{TgId?#j=d}T}J`h*Oh?k_2`MO)Cqtw>Un;MnZ;qvo0}W)xS7QDgjT
zy(!Lli^5#)hQ->1%(qxdv>&<18tmijT=H;K*UbbXCGE>{@?Q6r)6*L9;a@EEHN{^v
zycYOzaM(w2rpR5N{-_CdeL0t<#N|77@7iA^l4>|7X*@?%`P3@X3+&PCAyhbetQZPb
zADeY$Gj8->vkS}!lvy|LEB{sZ+Xxvv`w5+0)p#iTPa!_}DV!m$Ffr!$9}b#NhWq#=
zF9POMj>wD_u*5PHj~sIQ-EN&=68=8_&N1UI+DM$^&8&F_JjQpD#cUJk0oyHqyJdHt
zP2UoPmfo%Nuq2;WOdFB!pxcIKcJdnqdblqDw?qYdEZQpM3@L`)e)TZu19T>@V_k3e
zj>p`@#9+Dm^rnMePL_*Pa4k`t;G_R}?@kNF>hwHeqhD`pP-`Ow-|~6DJU-KR3={FR
z3<@uIJ#nFi6AYJQk@Q=~qn6gPb;I-b$96gPG(5*|lKYtwF96eQCuyC?&vMzyxI=}n
zxBc|mAQj8RFV?qKgo#`N5=2E;Onhk(-7D61Z*9pZ;xXtW!5mB?`N&@2Bc1SkVaX*-
ze8|fDssDy`O5{-1<@Z7%{4RMF
zz=u>7U74aJlDS8Td!nAiwnpXWC=3GD>0}f4IGNd}GG}Qo;b`bG=!@;Yv~1mnC@2wB
z=Tuo>taDH2Wr|Tq4~}{n8r6Q+#iRjK3r>H-Pl=Q2DfEd2@)GV9`L#xwdsOK6h_i>d&FalStAmFW
zgN?wow#R=d+)9B_(Qy351CKZ6X=7xIo7qQxj^zA(vwtwR;ypbI5y~FZsT~ylp%xBLzfyPiw
z?=skWT^V`-H50hNfMy_#x6@{G9J?dpi0#i-U@Kz3)gUJgv8vki;@b|FW7ko+nlED=
zk60v!trJh{dHaM3mY^@KW9u74rVId%)0`P85=wo3!*tevfuBrO9y5$+G(3fR377l>
zVS<$@Q!JMTbT-^S1KI6qf*H{Epd?AzHI^;p|3k+ep?o57RIj_FQ5dVMOBiQ_yY}
z=V;OnTlpz;F&US8txw4}y5R{4`nRx9{K
z&!LjbLn-If^(JIKc`Bxd;E4y>QhIM6<$nS6fA*C=o$1YVYiy9T`V!+g^L&mu|7Q;^
zo}mARkoi}nZZW={htRfEz5(}P)Sb)u4_LJ_Nw
z$|>g@mlPopIiJd$W)3-@trBuv2q8vJIm~jHIgFKKIp=KH9CB`KP8*wje(&$^Pw?>G
zeR#gE=XE~-Cxr!|icvUC$G+5%9w!;lLnHCM{HR`>&!UibSLFY$o~G?P@NX|`;MHAl
zlvl7{8xK8{U|^_gYEvt>-~tOsmCqZ{?Th{1&*n?KP7hm@;dD2FIh<9D*%o{0RX5%M
z4fHu${u?!V{>LKbc_Wq9ZFn^cW5n&qdt;IDxTw|P>FbT3`CJtm+Kc^TXaXu+wg7|s
zhQQh0BXGLX8!Jh1*}9$FSM05)-a=vff{252GR!cWtrZy9Z+nQHJ3&2QvCQ8r!f}0j
z6D&LOdRpVtl@WJ}BQ+@h;1uP+`p(!`KH=*&BGoh!|_4FAIN