5.1.0

Add some fixes for Worm
Update dependencies
2022-05-28 18:05:25 -04:00 · 2022-05-28 18:02:34 -04:00 · 2022-05-28 17:21:28 -04:00 · 2022-01-22 18:50:33 -05:00 · 2022-01-22 18:50:18 -05:00 · 2022-01-22 18:33:02 -05:00
18 changed files with 3282 additions and 1852 deletions
--- a/.eslintrc.json
+++ b/.eslintrc.json
@ -1,259 +1,10 @@
 {
  "root": true,
+  "extends": "@domenic",
  "env": {
-    "node": true,
-    "es6": true
-  },
-  "parserOptions": {
-    "ecmaVersion": 2019
+    "node": true
  },
  "rules": {
-    // Possible errors
-    "no-await-in-loop": "off",
-    "comma-dangle": ["error", "never"],
-    "no-cond-assign": ["error", "except-parens"],
-    "no-console": "off",
-    "no-constant-condition": "error",
-    "no-control-regex": "error",
-    "no-debugger": "error",
-    "no-dupe-args": "error",
-    "no-dupe-keys": "error",
-    "no-duplicate-case": "error",
-    "no-empty": "error",
-    "no-empty-character-class": "error",
-    "no-ex-assign": "error",
-    "no-extra-boolean-cast": "error",
-    "no-extra-parens": ["error", "all", { "conditionalAssign": false, "nestedBinaryExpressions": false }],
-    "no-extra-semi": "error",
-    "no-func-assign": "error",
-    "no-inner-declarations": "off",
-    "no-invalid-regexp": "error",
-    "no-irregular-whitespace": "error",
-    "no-obj-calls": "error",
-    "no-prototype-builtins": "error",
-    "no-regex-spaces": "error",
-    "no-sparse-arrays": "error",
-    "no-template-curly-in-string": "error",
-    "no-unexpected-multiline": "error",
-    "no-unreachable": "error",
-    "no-unsafe-finally": "off",
-    "no-unsafe-negation": "error",
-    "use-isnan": "error",
-    "valid-jsdoc": "off",
-    "valid-typeof": "error",
-
-    // Best practices
-    "accessor-pairs": "error",
-    "array-callback-return": "error",
-    "block-scoped-var": "off",
-    "class-methods-use-this": "error",
-    "complexity": "off",
-    "consistent-return": "error",
-    "curly": ["error", "all"],
-    "default-case": "off",
-    "dot-location": ["error", "property"],
-    "dot-notation": "error",
-    "eqeqeq": "error",
-    "guard-for-in": "off",
-    "no-alert": "error",
-    "no-caller": "error",
-    "no-case-declarations": "error",
-    "no-div-regex": "off",
-    "no-else-return": "error",
-    "no-empty-function": "error",
-    "no-empty-pattern": "error",
-    "no-eq-null": "error",
-    "no-eval": "error",
-    "no-extend-native": "error",
-    "no-extra-bind": "error",
-    "no-extra-label": "error",
-    "no-fallthrough": "error",
-    "no-floating-decimal": "error",
-    "no-global-assign": "error",
-    "no-implicit-coercion": "error",
-    "no-implicit-globals": "error",
-    "no-implied-eval": "off",
-    "no-invalid-this": "error",
-    "no-iterator": "error",
-    "no-labels": ["error", { "allowLoop": true }],
-    "no-lone-blocks": "error",
-    "no-loop-func": "off",
-    "no-magic-numbers": "off",
-    "no-multi-spaces": "error",
-    "no-multi-str": "error",
-    "no-new": "error",
-    "no-new-func": "error",
-    "no-new-wrappers": "error",
-    "no-octal": "error",
-    "no-octal-escape": "error",
-    "no-param-reassign": "off",
-    "no-process-env": "error",
-    "no-proto": "error",
-    "no-redeclare": "error",
-    "no-restricted-properties": "off",
-    "no-return-assign": ["error", "except-parens"],
-    "no-return-await": "error",
-    "no-script-url": "off",
-    "no-self-assign": "error",
-    "no-self-compare": "error",
-    "no-sequences": "error",
-    "no-throw-literal": "error",
-    "no-unmodified-loop-condition": "error",
-    "no-unused-expressions": "error",
-    "no-unused-labels": "error",
-    "no-useless-call": "error",
-    "no-useless-concat": "error",
-    "no-useless-escape": "error",
-    "no-useless-return": "error",
-    "no-void": "error",
-    "no-warning-comments": "off",
-    "no-with": "error",
-    "radix": ["error", "as-needed"],
-    "require-await": "error",
-    "vars-on-top": "off",
-    "wrap-iife": ["error", "outside"],
-    "yoda": ["error", "never"],
-
-    // Strict Mode
-    "strict": ["error", "global"],
-
-    // Variables
-    "init-declarations": "off",
-    "no-catch-shadow": "error",
-    "no-delete-var": "error",
-    "no-label-var": "error",
-    "no-restricted-globals": "off",
-    "no-shadow": "error",
-    "no-shadow-restricted-names": "error",
-    "no-undef": "error",
-    "no-undef-init": "error",
-    "no-undefined": "off",
-    "no-unused-vars": "error",
-    "no-use-before-define": ["error", "nofunc"],
-
-    // Node.js and CommonJS
-    "callback-return": "off",
-    "global-require": "error",
-    "handle-callback-err": "error",
-    "no-mixed-requires": ["error", true],
-    "no-new-require": "error",
-    "no-path-concat": "error",
-    "no-process-exit": "error",
-    "no-restricted-imports": "off",
-    "no-restricted-modules": "off",
-    "no-sync": "off",
-
-    // Stylistic Issues
-    "array-bracket-spacing": ["error", "never"],
-    "block-spacing": ["error", "always"],
-    "brace-style": ["error", "1tbs", { "allowSingleLine": false }],
-    "camelcase": ["error", { "properties": "always" }],
-    "capitalized-comments": ["error", "always", { "ignoreConsecutiveComments": true }],
-    "comma-spacing": ["error", { "before": false, "after": true }],
-    "comma-style": ["error", "last"],
-    "computed-property-spacing": ["error", "never"],
-    "consistent-this": "off",
-    "eol-last": "error",
-    "func-call-spacing": ["error", "never"],
-    "func-name-matching": ["error", "always"],
-    "func-names": ["error", "never"],
-    "func-style": ["error", "declaration"],
-    "id-blacklist": "off",
-    "id-length": "off",
-    "id-match": "off",
-    "indent": ["error", 2, { "SwitchCase": 1 }],
-    "jsx-quotes": "off",
-    "key-spacing": ["error", { "beforeColon": false, "afterColon": true, "mode": "strict" }],
-    "keyword-spacing": ["error", { "before": true, "after": true }],
-    "line-comment-position": "off",
-    "linebreak-style": ["error", "unix"],
-    "lines-around-comment": "off",
-    "lines-around-directive": "off",
-    "max-depth": "off",
-    "max-len": ["error", 120, { "ignoreUrls": true }],
-    "max-lines": "off",
-    "max-nested-callbacks": "off",
-    "max-params": "off",
-    "max-statements": "off",
-    "max-statements-per-line": ["error", { "max": 1 }],
-    "multiline-ternary": "off",
-    "new-cap": "error",
-    "new-parens": "error",
-    "newline-after-var": "off",
-    "newline-before-return": "off",
-    "newline-per-chained-call": "off",
-    "no-array-constructor": "error",
-    "no-bitwise": "off",
-    "no-continue": "off",
-    "no-inline-comments": "off",
-    "no-lonely-if": "error",
-    "no-mixed-operators": "error",
-    "no-mixed-spaces-and-tabs": "error",
-    "no-multiple-empty-lines": "error",
-    "no-negated-condition": "off",
-    "no-nested-ternary": "error",
-    "no-new-object": "error",
-    "no-plusplus": "off",
-    "no-restricted-syntax": "off",
-    "no-tabs": "error",
-    "no-ternary": "off",
-    "no-trailing-spaces": "error",
-    "no-underscore-dangle": "off",
-    "no-unneeded-ternary": "error",
-    "no-whitespace-before-property": "error",
-    "object-curly-newline": ["error", { "multiline": true }],
-    "object-curly-spacing": ["error", "always"],
-    "object-property-newline": "off",
-    "one-var": ["error", "never"],
-    "one-var-declaration-per-line": ["error", "initializations"],
-    "operator-assignment": ["error", "always"],
-    "operator-linebreak": ["error", "after"],
-    "padded-blocks": ["error", "never"],
-    "quote-props": ["error", "as-needed"],
-    "quotes": ["error", "double", { "avoidEscape": true, "allowTemplateLiterals": true }],
-    "require-jsdoc": "off",
-    "semi": ["error", "always"],
-    "semi-spacing": "error",
-    "sort-keys": "off",
-    "sort-vars": "off",
-    "space-before-blocks": ["error", "always"],
-    "space-before-function-paren": ["error", { "anonymous": "always", "named": "never" }],
-    "space-in-parens": ["error", "never"],
-    "space-infix-ops": "error",
-    "space-unary-ops": ["error", { "words": true, "nonwords": false }],
-    "spaced-comment": ["error", "always", { "markers": ["///"] }],
-    "unicode-bom": ["error", "never"],
-    "wrap-regex": "off",
-
-    // ECMAScript 6
-    "arrow-body-style": "off", // meh
-    "arrow-parens": ["error", "as-needed"],
-    "arrow-spacing": "error",
-    "constructor-super": "error",
-    "generator-star-spacing": ["error", "after"],
-    "no-class-assign": "error",
-    "no-confusing-arrow": "off",
-    "no-const-assign": "error",
-    "no-dupe-class-members": "error",
-    "no-duplicate-imports": "error",
-    "no-new-symbol": "error",
-    "no-this-before-super": "error",
-    "no-useless-computed-key": "error",
-    "no-useless-constructor": "error",
-    "no-useless-rename": "error",
-    "no-var": "error",
-    "object-shorthand": "error",
-    "prefer-arrow-callback": "error",
-    "prefer-const": "error",
-    "prefer-numeric-literals": "error",
-    "prefer-rest-params": "error",
-    "prefer-spread": "error",
-    "prefer-template": "off",
-    "require-yield": "error",
-    "rest-spread-spacing": ["error", "never"],
-    "sort-imports": "off",
-    "symbol-description": "error",
-    "template-curly-spacing": ["error", "never"],
-    "yield-star-spacing": ["error", "after"]
+    "no-console": "off"
  }
 }
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -0,0 +1,50 @@
+name: Test
+on:
+  pull_request:
+    branches:
+    - master
+  push:
+    branches:
+    - master
+jobs:
+  test:
+    name: Test
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: actions/setup-node@v2
+      with:
+        node-version: 16
+
+    - run: npm install
+
+    - run: npm run lint
+
+    # CI would take too long if we did the download every time too. So, we cache it. This does mean we're vulnerable to
+    # source changes exposing problems in our code, but those are pretty infrequent. If they occur, we need to bump the
+    # cache key.
+    - uses: actions/cache@v2
+      with:
+        key: worm-ward-cache-2021-01-17
+        path: ./cache
+
+    - run: node ./lib/worm-scraper.js --book=worm
+
+    - run: node ./lib/worm-scraper.js --book=ward
+
+    - uses: actions/setup-java@v1
+      with:
+        java-version: 15
+        java-package: jre
+
+    - name: Get EPUBCheck
+      run: |
+        curl https://github.com/w3c/epubcheck/releases/download/v4.2.4/epubcheck-4.2.4.zip --location --output epubcheck.zip
+        unzip epubcheck.zip
+
+    - name: Check Worm.epub
+      run: java -jar epubcheck-4.2.4/epubcheck.jar --failonwarnings Worm.epub
+
+    - name: Check Ward.epub
+      run: java -jar epubcheck-4.2.4/epubcheck.jar --failonwarnings Ward.epub
--- a/.travis.yml
+++ b/.travis.yml
@ -1,6 +0,0 @@
-language: node_js
-node_js:
-  - 10
-  - stable
-script:
-  npm run lint
--- a/README.md
+++ b/README.md
@ -4,7 +4,7 @@ Scrapes the web serial [_Worm_](https://parahumans.wordpress.com/) and its seque

 ## How to use

-First you'll need a modern version of [Node.js](https://nodejs.org/en/). Install whatever is current (not LTS); at least v12.10.0 is necessary.
+First you'll need a modern version of [Node.js](https://nodejs.org/en/). At least v16.13.2 is necessary.

 Then, open a terminal ([Mac documentation](http://blog.teamtreehouse.com/introduction-to-the-mac-os-x-command-line), [Windows documentation](http://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/)) and install the program by typing

@ -21,7 +21,7 @@ worm-scraper --help
 If this outputs some help documentation, then the installation process went smoothly. You can move on to assemble the eBook by typing

 ```bash
-worm-scraper download convert scaffold zip
+worm-scraper
 ```

 This will take a while, but will eventually produce a `Worm.epub` file!
@ -29,7 +29,7 @@ This will take a while, but will eventually produce a `Worm.epub` file!
 If you'd like to get _Ward_ instead of _Worm_, use `--book=ward`, e.g.

 ```bash
-worm-scraper download convert scaffold zip --book=ward
+worm-scraper --book=ward
 ```

 ## EPUB vs. other formats
--- a/covers/README.md
+++ b/covers/README.md
@ -0,0 +1,11 @@
+# Cover credits
+
+The _Worm_ cover is assembled from:
+
+- [Ari Ibarra's fanart](https://www.instagram.com/p/B1wSi1Ynaze/) on Instagram
+- The "Wildbow's Past Works" image for _Worm_ on [parahumans.net](https://www.parahumans.net/)
+
+The _Ward_ cover is assembled from:
+
+- [zearoe's fanart](https://www.reddit.com/r/Parahumans/comments/b8n7o0/fanartrepost_antares/) on Reddit
+- The header image on [parahumans.net](https://www.parahumans.net/)
--- a/covers/ward/cover.xhtml
+++ b/covers/ward/cover.xhtml
@ -3,7 +3,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <title>Cover</title>
-    <style>
+    <style type="text/css">
      body {
        text-align: center;
        margin: 0;
@ -17,6 +17,8 @@
    </style>
  </head>
  <body>
-    <img src="cover.jpg" alt=""/>
+    <div>
+      <img src="cover.jpg" alt=""/>
+    </div>
  </body>
 </html>
--- a/covers/worm/cover.jpg
+++ b/covers/worm/cover.jpg
--- a/covers/worm/cover.png
+++ b/covers/worm/cover.png
--- a/covers/worm/cover.xhtml
+++ b/covers/worm/cover.xhtml
@ -3,7 +3,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <title>Cover</title>
-    <style>
+    <style type="text/css">
      body {
        text-align: center;
        margin: 0;
@ -17,6 +17,8 @@
    </style>
  </head>
  <body>
-    <img src="cover.png" alt=""/>
+    <div>
+      <img src="cover.jpg" alt=""/>
+    </div>
  </body>
 </html>
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@ -77,13 +77,21 @@ function getBodyXML(chapter, book, contentEl) {
    }
  }

-  // Remove empty <em>s and <i>s
-  // Remove style attributes from them, as they're always messed up.
-  for (const em of contentEl.querySelectorAll("em, i")) {
-    if (em.textContent.trim() === "") {
-      em.replaceWith(contentEl.ownerDocument.createTextNode(" "));
+  // Remove empty inline elements.
+  // Remove style attributes from inline elements, as they're always messed up.
+  for (const el of contentEl.querySelectorAll("em, i, strong, b")) {
+    const { textContent } = el;
+
+    if (textContent === "") {
+      el.remove();
+    } else if (textContent.trim() === "") {
+      if (el.childElementCount === 0) {
+        el.replaceWith(" ");
+      } else if (el.childElementCount === 1 && el.children[0].localName === "br") {
+        el.outerHTML = "<br />\n";
+      }
    } else {
-      em.removeAttribute("style");
+      el.removeAttribute("style");
    }
  }

@ -134,114 +142,125 @@ function getBodyXML(chapter, book, contentEl) {
  let xml = xmlSerializer.serializeToString(bodyEl);

  // Fix recurring strange pattern of extra <br> in <p>...<em>...<br>\n</em></p>
-  xml = xml.replace(/<br \/>\s*<\/em><\/p>/g, "</em></p>");
+  xml = xml.replace(/<br \/>\s*<\/em><\/p>/ug, "</em></p>");

  // Replace single-word <i>s with <em>s. Other <i>s are probably erroneous too, but these are known-bad.
-  xml = xml.replace(/<i>([^ ]+)<\/i>/g, "<em>$1</em>");
-  xml = xml.replace(/<i>([^ ]+)( +)<\/i>/g, "<em>$1</em>$2");
+  xml = xml.replace(/<i>([^ ]+)<\/i>/ug, "<em>$1</em>");
+  xml = xml.replace(/<i>([^ ]+)( +)<\/i>/ug, "<em>$1</em>$2");

  // There are way too many nonbreaking spaces where they don't belong. If they show up three in a row, then let them
  // live; they're maybe being used for alignment or something. Otherwise, they die.
  //
  // Also, normalize spaces after a period/quote mark to two (normal) spaces. The second one is invisible when
  // rendered, but it helps future heuristics detect end of sentences.
-  xml = xml.replace(/\xA0{1,2}(?!\x20\xA0)/g, " ");
-  xml = xml.replace(/([.”])\x20*\xA0[\xA0\x20]*/g, "$1  ");
-  xml = xml.replace(/([.”])\x20{3,}/g, "$1  ");
+  xml = xml.replace(/\xA0{1,2}(?!\x20\xA0)/ug, " ");
+  xml = xml.replace(/([.”])\x20*\xA0[\xA0\x20]*/ug, "$1  ");
+  xml = xml.replace(/([.”])\x20{3,}/ug, "$1  ");

  function fixEms() {
    // Fix recurring broken-up or erroneous <em>s
-    xml = xml.replace(/<\/em>‘s/g, "’s</em>");
-    xml = xml.replace(/<em><\/em>/g, "");
-    xml = xml.replace(/<\/em><em>/g, "");
-    xml = xml.replace(/<em>(\s?\s?[^A-Za-z]\s?\s?)<\/em>/g, "$1");
-    xml = xml.replace(/<\/em>(\s?\s?[^A-Za-z]\s?\s?)<em>/g, "$1");
-    xml = xml.replace(/“<em>([^>]+)<\/em>(!|\?|\.)”/g, "“<em>$1$2</em>”");
-    xml = xml.replace(/<p><em>([^>]+)<\/em>(!|\?|\.)<\/p>/g, "<p><em>$1$2</em></p>");
-    xml = xml.replace(/(!|\?|\.)\s{2}<\/em><\/p>/g, "$1</em></p>");
-    xml = xml.replace(/<em>([a-z]+)(\?|\.)<\/em>/g, "<em>$1</em>$2");
-    xml = xml.replace(/<em>([^>]+?)( +)<\/em>/g, "<em>$1</em>$2");
-    xml = xml.replace(/<em> ([a-zA-Z]+)<\/em>/g, " <em>$1</em>");
-    xml = xml.replace(/<em>‘\s*([^<]+)\s*’<\/em>/g, "‘<em>$1</em>’");
-    xml = xml.replace(/<em>‘\s*([^<]+)\s*<\/em>\s*’/g, "‘<em>$1</em>’");
-    xml = xml.replace(/‘\s*<em>\s*([^<]+)\s*’<\/em>/g, "‘<em>$1</em>’");
-    xml = xml.replace(/<em>“\s*([^<”]+)\s*”<\/em>/g, "“<em>$1</em>”");
-    xml = xml.replace(/<em>“\s*([^<”]+)\s*<\/em>\s*”/g, "“<em>$1</em>”");
-    xml = xml.replace(/“\s*<em>\s*([^<”]+)\s*”<\/em>/g, "“<em>$1</em>”");
-    xml = xml.replace(/([^\n>])<em>  ?/g, "$1 <em>");
-    xml = xml.replace(/  ?<\/em>/g, "</em> ");
-    xml = xml.replace(/<p([^>]+)> <em>/g, "<p$1><em>");
-    xml = xml.replace(/<\/em> <\/p>/g, "</em></p>");
-    xml = xml.replace(/<em>([a-z]+),<\/em>/g, "<em>$1</em>,");
+    xml = xml.replace(/<\/em>‘s/ug, "’s</em>");
+    xml = xml.replace(/<em><\/em>/ug, "");
+    xml = xml.replace(/<\/em><em>/ug, "");
+    xml = xml.replace(/<em>(\s?\s?[^A-Za-z]\s?\s?)<\/em>/ug, "$1");
+    xml = xml.replace(/<\/em>(\s?\s?[^A-Za-z]\s?\s?)<em>/ug, "$1");
+    xml = xml.replace(/“<em>([^>]+)<\/em>(!|\?|\.)”/ug, "“<em>$1$2</em>”");
+    xml = xml.replace(/<p><em>([^>]+)<\/em>(!|\?|\.)<\/p>/ug, "<p><em>$1$2</em></p>");
+    xml = xml.replace(/(!|\?|\.)\s{2}<\/em><\/p>/ug, "$1</em></p>");
+    xml = xml.replace(/<em>([a-z]+)(\?|\.)<\/em>/ug, "<em>$1</em>$2");
+    xml = xml.replace(/<em>([^>]+?)( +)<\/em>/ug, "<em>$1</em>$2");
+    xml = xml.replace(/<em> ([a-zA-Z]+)<\/em>/ug, " <em>$1</em>");
+    xml = xml.replace(/<em>‘\s*([^<]+)\s*’<\/em>/ug, "‘<em>$1</em>’");
+    xml = xml.replace(/<em>‘\s*([^<]+)\s*<\/em>\s*’/ug, "‘<em>$1</em>’");
+    xml = xml.replace(/‘\s*<em>\s*([^<]+)\s*’<\/em>/ug, "‘<em>$1</em>’");
+    xml = xml.replace(/<em>“\s*([^<”]+)\s*”<\/em>/ug, "“<em>$1</em>”");
+    xml = xml.replace(/<em>“\s*([^<”]+)\s*<\/em>\s*”/ug, "“<em>$1</em>”");
+    xml = xml.replace(/“\s*<em>\s*([^<”]+)\s*”<\/em>/ug, "“<em>$1</em>”");
+    xml = xml.replace(/([^\n>])<em>  ?/ug, "$1 <em>");
+    xml = xml.replace(/  ?<\/em>/ug, "</em> ");
+    xml = xml.replace(/<p([^>]+)> <em>/ug, "<p$1><em>");
+    xml = xml.replace(/<\/em> <\/p>/ug, "</em></p>");
+    xml = xml.replace(/<em>([a-z]+),<\/em>/ug, "<em>$1</em>,");
  }

  // These quote/apostrophe/em fixes interact with each other. TODO: try to disentangle so we don't repeat all of
  // fixEms.
-  xml = xml.replace(/,” <\/em>/g, "</em>,” ");
+  xml = xml.replace(/,” <\/em>/ug, "</em>,” ");
  fixEms();
-  xml = xml.replace(/<p>”/g, "<p>“");
-  xml = xml.replace(/“\s*<\/p>/g, "”</p>");
-  xml = xml.replace(/“\s*<\/em><\/p>/g, "</em>”</p>");
-  xml = xml.replace(/‘\s*<\/p>/g, "’</p>");
-  xml = xml.replace(/‘\s*<\/em><\/p>/g, "’</em></p>");
-  xml = xml.replace(/,” <\/em>/g, "</em>,” ");
-  xml = xml.replace(/′/g, "’");
-  xml = xml.replace(/″/g, "”");
-  xml = xml.replace(/([A-Za-z])‘s(\s?)/g, "$1’s$2");
-  xml = xml.replace(/I‘m/g, "I’m");
-  xml = xml.replace(/<p>“\s+/g, "<p>“");
-  xml = xml.replace(/\s+”/g, "”");
-  xml = xml.replace(/'/g, "’");
-  xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’");
-  xml = xml.replace(/([a-z])”<\/p>/g, "$1.”</p>");
+  xml = xml.replace(/<p>”/ug, "<p>“");
+  xml = xml.replace(/“\s*<\/p>/ug, "”</p>");
+  xml = xml.replace(/“\s*<\/em><\/p>/ug, "</em>”</p>");
+  xml = xml.replace(/‘\s*<\/p>/ug, "’</p>");
+  xml = xml.replace(/‘\s*<\/em><\/p>/ug, "’</em></p>");
+  xml = xml.replace(/,” <\/em>/ug, "</em>,” ");
+  xml = xml.replace(/′/ug, "’");
+  xml = xml.replace(/″/ug, "”");
+  xml = xml.replace(/([A-Za-z])‘s(\s?)/ug, "$1’s$2");
+  xml = xml.replace(/I‘m/ug, "I’m");
+  xml = xml.replace(/<p>“\s+/ug, "<p>“");
+  xml = xml.replace(/\s+”/ug, "”");
+  xml = xml.replace(/'/ug, "’");
+  xml = xml.replace(/’([A-Za-z]+)’/ug, "‘$1’");
+  xml = xml.replace(/([a-z])”<\/p>/ug, "$1.”</p>");
  fixEms();
-  xml = xml.replace(/‘<em>([^<]+)<\/em>‘/g, "‘<em>$1</em>’");
-  xml = xml.replace(/<em>([a-z]+)!<\/em>/g, "<em>$1</em>!");
-  xml = xml.replace(/(?<! {2})<em>([\w ’]+)([!.?])”<\/em>/g, "<em>$1</em>$2”");
-  xml = xml.replace(/<em>([\w ’]+[!.?])”<\/em>/g, "<em>$1</em>”");
-  xml = xml.replace(/I”(m|ll)/g, "I’$1");
-  xml = xml.replace(/””<\/p>/g, "”</p>");
-  xml = xml.replace(/^([^“]+?) ?”(?![ —<])/gm, "$1 “");
-  xml = xml.replace(/(?<!“)<em>([A-Za-z]+),<\/em>(?!”| +[A-Za-z]+ thought)/, "<em>$1</em>,");
-  xml = xml.replace(/‘([Kk])ay(?!’)/g, "’$1ay");
-  xml = xml.replace(/<em>(Why|What|Who|How|Where|When)<\/em>\?/g, "<em>$1?</em>");
-  xml = xml.replace(/,<\/em>/g, "</em>,");
-  xml = xml.replace(/,”<\/p>/g, ".”</p>");
-  xml = xml.replace(/<p>(.*),<\/p>/g, "<p>$1.</p>");
-  xml = xml.replace(/‘(\w+)‘(\w+)’/g, "‘$1’$2’");
-  xml = xml.replace(/<em>([a-z]+), ([a-z]+)<\/em>/g, "<em>$1</em>, <em>$2</em>");
+  xml = xml.replace(/‘<em>([^<]+)<\/em>‘/ug, "‘<em>$1</em>’");
+  xml = xml.replace(/<em>([a-z]+)!<\/em>/ug, "<em>$1</em>!");
+  xml = xml.replace(/(?<! {2})<em>([\w ’]+)([!.?])”<\/em>/ug, "<em>$1</em>$2”");
+  xml = xml.replace(/<em>([\w ’]+[!.?])”<\/em>/ug, "<em>$1</em>”");
+  xml = xml.replace(/I”(m|ll)/ug, "I’$1");
+  xml = xml.replace(/””<\/p>/ug, "”</p>");
+  xml = xml.replace(/^([^“]+?) ?”(?![ —<])/ugm, "$1 “");
+  xml = xml.replace(/(?<!“)<em>([A-Za-z]+),<\/em>(?!”| +[A-Za-z]+ thought)/u, "<em>$1</em>,");
+  xml = xml.replace(/‘([Kk])ay(?!’)/ug, "’$1ay");
+  xml = xml.replace(/<em>(Why|What|Who|How|Where|When)<\/em>\?/ug, "<em>$1?</em>");
+  xml = xml.replace(/,<\/em>/ug, "</em>,");
+  xml = xml.replace(/,”<\/p>/ug, ".”</p>");
+  xml = xml.replace(/<p>(.*),<\/p>/ug, "<p>$1.</p>");
+  xml = xml.replace(/‘(\w+)‘(\w+)’/ug, "‘$1’$2’");
+  xml = xml.replace(/<em>([a-z]+), ([a-z]+)<\/em>/ug, "<em>$1</em>, <em>$2</em>");

  // Similar problems occur in Ward with <b> and <strong> as do in Worm with <em>s
-  xml = xml.replace(/<b \/>/g, "");
-  xml = xml.replace(/<b>(\s*<br \/>\s*)<\/b>/g, "$1");
-  xml = xml.replace(/<strong>(\s*<br \/>\s*)<\/strong>/g, "$1");
-  xml = xml.replace(/<\/strong>(\s*)<strong>/g, "$1");
-  xml = xml.replace(/<strong>@<\/strong>/g, "@");
-  xml = xml.replace(/<br \/>(\s*)<\/strong>/g, "</strong><br />$1");
-  xml = xml.replace(/(\s*)<\/strong>/g, "</strong>$1");
-  xml = xml.replace(/><strong>(.*)<\/strong>:</g, "><strong>$1:</strong><");
+  xml = xml.replace(/<b \/>/ug, "");
+  xml = xml.replace(/<b>(\s*<br \/>\s*)<\/b>/ug, "$1");
+  xml = xml.replace(/<strong>(\s*<br \/>\s*)<\/strong>/ug, "$1");
+  xml = xml.replace(/<\/strong>(\s*)<strong>/ug, "$1");
+  xml = xml.replace(/<strong>@<\/strong>/ug, "@");
+  xml = xml.replace(/<br \/>(\s*)<\/strong>/ug, "</strong><br />$1");
+  xml = xml.replace(/(\s*)<\/strong>/ug, "</strong>$1");
+  xml = xml.replace(/><strong>(.*)<\/strong>:</ug, "><strong>$1:</strong><");

-  // No need for line breaks before paragraph ends
+  // No need for line breaks before paragraph ends or after paragraph starts
  // These often occur with the <br>s inside <b>/<strong>/<em>/<i> fixed above.
-  xml = xml.replace(/<br \/>\s*<\/p>/g, "</p>");
+  xml = xml.replace(/<br \/>\s*<\/p>/ug, "</p>");
+  xml = xml.replace(/<p><br \/>\s*/ug, "<p>");
+
+  // This is another quote fix but it needs to happen after the line break deletion... so entangled, ugh.
+  xml = xml.replace(/<\/em>\s*“\s*<\/p>/ug, "</em>”</p>");

  // Fix missing spaces after commas
-  xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/g, "$1, $2");
+  xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/ug, "$1, $2");

  // Fix bad periods and spacing/markup surrounding them
-  xml = xml.replace(/\.\.<\/p>/g, ".</p>");
-  xml = xml.replace(/\.\.”<\/p>/g, ".”</p>");
-  xml = xml.replace(/ \. /g, ". ");
-  xml = xml.replace(/ \.<\/p>/g, ".</p>");
-  xml = xml.replace(/\.<em>\.\./g, "<em>…");
-  xml = xml.replace(/\.\. {2}/g, ".  ");
-  xml = xml.replace(/\.\./g, "…");
-  xml = xml.replace(/(?<!Mr|Ms|Mrs)…\./g, "…");
-  xml = xml.replace(/(?<=Mr|Ms|Mrs)…\./g, ".…");
+  xml = xml.replace(/\.\.<\/p>/ug, ".</p>");
+  xml = xml.replace(/\.\.”<\/p>/ug, ".”</p>");
+  xml = xml.replace(/ \. /ug, ". ");
+  xml = xml.replace(/ \.<\/p>/ug, ".</p>");
+  xml = xml.replace(/\.<em>\.\./ug, "<em>…");
+  xml = xml.replace(/\.\. {2}/ug, ".  ");
+  xml = xml.replace(/\.\./ug, "…");
+  xml = xml.replace(/(?<!Mr|Ms|Mrs)…\./ug, "…");
+  xml = xml.replace(/(?<=Mr|Ms|Mrs)…\./ug, ".…");

  // Fix extra spaces
-  xml = xml.replace(/ ? <\/p>/g, "</p>");
-  xml = xml.replace(/([a-z]) ,/g, "$1,");
+  xml = xml.replace(/ ? <\/p>/ug, "</p>");
+  xml = xml.replace(/([a-z]) ,/ug, "$1,");
+
+  // Use actual emojis instead of images
+  xml = xml.replace(
+    // eslint-disable-next-line max-len
+    /<img width="16" height="16" class="wp-smiley emoji" draggable="false" alt="O_o" src="https:\/\/s1.wp.com\/wp-content\/mu-plugins\/wpcom-smileys\/o_O.svg" style="height: 1em; max-height: 1em;" \/>/ug,
+    "🤨"
+  );

  xml = fixTruncatedWords(xml);
  xml = fixDialogueTags(xml);
@ -270,9 +289,9 @@ function getBodyXML(chapter, book, contentEl) {
                      `Update substitutions.json for a more precise substitution.`);
      }

-      xml = xml.replace(new RegExp(escapeRegExp(substitution.before)), substitution.after);
+      xml = xml.replace(new RegExp(escapeRegExp(substitution.before), "u"), substitution.after);
    } else if (substitution.regExp) {
-      xml = xml.replace(new RegExp(substitution.regExp, "g"), substitution.replacement);
+      xml = xml.replace(new RegExp(substitution.regExp, "ug"), substitution.replacement);
    } else {
      warnings.push(`Invalid substitution specified for ${chapter.url}`);
    }
@ -281,30 +300,31 @@ function getBodyXML(chapter, book, contentEl) {
  // Serializer inserts extra xmlns for us since it doesn't know we're going to put this into a <html>.
  // Use this opportunity to insert a comment pointing to the original URL, for reference.
  xml = xml.replace(
-    /<body xmlns="http:\/\/www.w3.org\/1999\/xhtml">/,
-    `<body>\n<!-- ${chapter.url} -->\n`);
+    /<body xmlns="http:\/\/www.w3.org\/1999\/xhtml">/u,
+    `<body>\n<!-- ${chapter.url} -->\n`
+  );

  return { xml, warnings };
 }

 function fixTruncatedWords(xml) {
-  xml = xml.replace(/‘Sup/g, "’Sup");
-  xml = xml.replace(/‘cuz/g, "’cuz");
+  xml = xml.replace(/‘Sup/ug, "’Sup");
+  xml = xml.replace(/‘cuz/ug, "’cuz");

  // Short for "Sidepeace"
-  xml = xml.replace(/[‘’][Pp]iece(?![a-z])/g, "’Piece");
+  xml = xml.replace(/[‘’][Pp]iece(?![a-z])/ug, "’Piece");

  // Short for "Disjoint"
-  xml = xml.replace(/[‘’][Jj]oint(?![a-z])/g, "’Joint");
+  xml = xml.replace(/[‘’][Jj]oint(?![a-z])/ug, "’Joint");

  // Short for "Contender"
-  xml = xml.replace(/[‘’][Tt]end(?![a-z])/g, "’Tend");
+  xml = xml.replace(/[‘’][Tt]end(?![a-z])/ug, "’Tend");

  // Short for "Anelace"
-  xml = xml.replace(/[‘’][Ll]ace(?![a-z])/g, "’Lace");
+  xml = xml.replace(/[‘’][Ll]ace(?![a-z])/ug, "’Lace");

  // Short for "Birdcage"
-  xml = xml.replace(/[‘’][Cc]age(?![a-z])/g, "’Cage");
+  xml = xml.replace(/[‘’][Cc]age(?![a-z])/ug, "’Cage");

  // We can't do "’Clear" (short for Crystalclear) here because it appears too much as a normal word preceded by an
  // open quote, so we do that in substitutions.json.
@ -314,8 +334,8 @@ function fixTruncatedWords(xml) {

 function fixDialogueTags(xml) {
  // Fix recurring miscapitalization with questions
-  xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");
-  xml = xml.replace(/\?”\s\s?He asked/g, "?” he asked");
+  xml = xml.replace(/\?”\s\s?She asked/ug, "?” she asked");
+  xml = xml.replace(/\?”\s\s?He asked/ug, "?” he asked");

  // The author often fails to terminate a sentence, instead using a comma after a dialogue tag. For example,
  // > “I didn’t get much done,” Greg said, “I got distracted by...
@ -331,105 +351,108 @@ function fixDialogueTags(xml) {
  // This applies to ~800 instances, so although we have to correct back in substitutions.json a decent number of
  // times, it definitely pays for itself. Most of the instances we have to correct back we also need to fix the
  // capitalization anyway, and that's harder to do automatically, since proper names/"I"/etc. stay capitalized.
-  xml = xml.replace(/,” ([A-Za-z]+ [A-Za-z]+), “([A-Z])/g, ",” $1. “$2");
+  xml = xml.replace(/,” ([A-Za-z]+ [A-Za-z]+), “([A-Z])/ug, ",” $1. “$2");

  return xml;
 }

 function fixForeignNames(xml) {
  // This is consistently missing diacritics
-  xml = xml.replace(/Yangban/g, "Yàngbǎn");
+  xml = xml.replace(/Yangban/ug, "Yàngbǎn");

  // These are usually not italicized, but sometimes are. Other foreign-language names (like Yàngbǎn) are not
  // italicized, so we go in the direction of removing the italics.
-  xml = xml.replace(/<em>Garama<\/em>/g, "Garama");
-  xml = xml.replace(/<em>Thanda<\/em>/g, "Thanda");
-  xml = xml.replace(/<em>Sifara([^<]*)<\/em>/g, "Sifara$1");
-  xml = xml.replace(/<em>Moord Nag([^<]*)<\/em>/g, "Moord Nag$1");
-  xml = xml.replace(/<em>Califa de Perro([^<]*)<\/em>/g, "Califa de Perro$1");
-  xml = xml.replace(/<em>Turanta([^<]*)<\/em>/g, "Turanta$1");
+  xml = xml.replace(/<em>Garama<\/em>/ug, "Garama");
+  xml = xml.replace(/<em>Thanda<\/em>/ug, "Thanda");
+  xml = xml.replace(/<em>Sifara([^<]*)<\/em>/ug, "Sifara$1");
+  xml = xml.replace(/<em>Moord Nag([^<]*)<\/em>/ug, "Moord Nag$1");
+  xml = xml.replace(/<em>Califa de Perro([^<]*)<\/em>/ug, "Califa de Perro$1");
+  xml = xml.replace(/<em>Turanta([^<]*)<\/em>/ug, "Turanta$1");

  return xml;
 }

 function standardizeNames(xml) {
  // 197 instances of "Mrs." to 21 of "Ms."
-  xml = xml.replace(/Ms\. Yamada/g, "Mrs. Yamada");
+  xml = xml.replace(/Ms\. Yamada/ug, "Mrs. Yamada");

  // 25 instances of "Amias" to 3 of "Amais"
-  xml = xml.replace(/Amais/g, "Amias");
+  xml = xml.replace(/Amais/ug, "Amias");

  // 185 instances of Juliette to 4 of Juliet
-  xml = xml.replace(/Juliet(?=\b)/g, "Juliette");
+  xml = xml.replace(/Juliet(?=\b)/ug, "Juliette");

  // Earlier chapters have a space; later ones do not. They're separate words, so side with the earlier chapters.
  // One location is missing the "k".
-  xml = xml.replace(/Crock? o[‘’]Shit/g, "Crock o’ Shit");
+  xml = xml.replace(/Crock? o[‘’]Shit/ug, "Crock o’ Shit");

  // 5 instances of "Jotun" to 2 of "Jotunn"
-  xml = xml.replace(/Jotunn/g, "Jotun");
+  xml = xml.replace(/Jotunn/ug, "Jotun");

  // 13 instances of Elman to 1 of Elmann
-  xml = xml.replace(/Elmann/g, "Elman");
+  xml = xml.replace(/Elmann/ug, "Elman");

  // Thousands of instances of Tattletale to 4 instances of Tatteltale
-  xml = xml.replace(/Tatteltale/g, "Tattletale");
+  xml = xml.replace(/Tatteltale/ug, "Tattletale");
+
+  // 73 instances of Über to 2 of Uber
+  xml = xml.replace(/Uber/ug, "Über");

  return xml;
 }

 function fixEmDashes(xml) {
-  xml = xml.replace(/ – /g, "—");
-  xml = xml.replace(/“((?:<em>)?)-/g, "“$1—");
-  xml = xml.replace(/-[,.]?”/g, "—”");
-  xml = xml.replace(/-(!|\?)”/g, "—$1”");
-  xml = xml.replace(/-[,.]?<\/([a-z]+)>”/g, "—</$1>”");
-  xml = xml.replace(/-“/g, "—”");
-  xml = xml.replace(/<p>-/g, "<p>—");
-  xml = xml.replace(/-<\/p>/g, "—</p>");
-  xml = xml.replace(/-<br \/>/g, "—<br />");
-  xml = xml.replace(/-<\/([a-z]+)><\/p>/g, "—</$1></p>");
-  xml = xml.replace(/\s?\s?–\s?\s?/g, "—");
-  xml = xml.replace(/-\s\s?/g, "—");
-  xml = xml.replace(/\s?\s-/g, "—");
-  xml = xml.replace(/\s+—”/g, "—”");
-  xml = xml.replace(/I-I/g, "I—I");
-  xml = xml.replace(/I-uh/g, "I—uh");
-  xml = xml.replace(/-\?/g, "—?");
+  xml = xml.replace(/ – /ug, "—");
+  xml = xml.replace(/“((?:<em>)?)-/ug, "“$1—");
+  xml = xml.replace(/-[,.]?”/ug, "—”");
+  xml = xml.replace(/-(!|\?)”/ug, "—$1”");
+  xml = xml.replace(/-[,.]?<\/([a-z]+)>”/ug, "—</$1>”");
+  xml = xml.replace(/-“/ug, "—”");
+  xml = xml.replace(/<p>-/ug, "<p>—");
+  xml = xml.replace(/-<\/p>/ug, "—</p>");
+  xml = xml.replace(/-<br \/>/ug, "—<br />");
+  xml = xml.replace(/-<\/([a-z]+)><\/p>/ug, "—</$1></p>");
+  xml = xml.replace(/\s?\s?–\s?\s?/ug, "—");
+  xml = xml.replace(/-\s\s?/ug, "—");
+  xml = xml.replace(/\s?\s-/ug, "—");
+  xml = xml.replace(/\s+—”/ug, "—”");
+  xml = xml.replace(/I-I/ug, "I—I");
+  xml = xml.replace(/I-uh/ug, "I—uh");
+  xml = xml.replace(/-\?/ug, "—?");

  return xml;
 }

 function enDashJointNames(xml) {
  // Joint names should use en dashes
-  xml = xml.replace(/Dallon-Pelham/g, "Dallon–Pelham");
-  xml = xml.replace(/Bet-Gimel/g, "Bet–Gimel");
-  xml = xml.replace(/Cheit-Gimel/g, "Bet–Gimel");
-  xml = xml.replace(/Tristan-Capricorn/g, "Tristan–Capricorn");
-  xml = xml.replace(/Capricorn-Byron/g, "Capricorn–Byron");
-  xml = xml.replace(/Tristan-Byron/g, "Tristan–Byron");
-  xml = xml.replace(/Gimel-Europe/g, "Gimel–Europe");
-  xml = xml.replace(/G-N/g, "G–N");
-  xml = xml.replace(/Imp-Damsel/g, "Imp–Damsel");
-  xml = xml.replace(/Damsel-Ashley/g, "Damsel–Ashley");
-  xml = xml.replace(/Antares-Anelace/g, "Antares–Anelace");
-  xml = xml.replace(/Challenger-Gallant/g, "Challenger–Gallant");
-  xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/g, "Undersider$1–$2");
-  xml = xml.replace(/Norwalk-Fairfield/g, "Norwalk–Fairfield");
-  xml = xml.replace(/East-West/g, "east–west");
-  xml = xml.replace(/Creutzfeldt-Jakob/g, "Creutzfeldt–Jakob");
-  xml = xml.replace(/Astaroth-Nidhug/g, "Astaroth–Nidhug");
-  xml = xml.replace(/Capulet-Montague/g, "Capulet–Montague");
-  xml = xml.replace(/Weaver-Clockblocker/g, "Weaver–Clockblocker");
-  xml = xml.replace(/Alexandria-Pretender/g, "Alexandria–Pretender");
-  xml = xml.replace(/Night Hag-Nyx/g, "Night Hag–Nyx");
-  xml = xml.replace(/Crawler-Breed/g, "Crawler–Breed");
-  xml = xml.replace(/Simurgh-Myrddin-plant/g, "Simurgh–Myrddin–plant");
-  xml = xml.replace(/Armsmaster-Defiant/g, "Armsmaster–Defiant");
-  xml = xml.replace(/Matryoshka-Valentin/g, "Matryoshka–Valentin");
-  xml = xml.replace(/Gaea-Eden/g, "Gaea–Eden");
-  xml = xml.replace(/([Aa])gent-parahuman/g, "$1gent–parahuman");
-  xml = xml.replace(/([Pp])arahuman-agent/g, "$1arahuman–agent");
+  xml = xml.replace(/Dallon-Pelham/ug, "Dallon–Pelham");
+  xml = xml.replace(/Bet-Gimel/ug, "Bet–Gimel");
+  xml = xml.replace(/Cheit-Gimel/ug, "Bet–Gimel");
+  xml = xml.replace(/Tristan-Capricorn/ug, "Tristan–Capricorn");
+  xml = xml.replace(/Capricorn-Byron/ug, "Capricorn–Byron");
+  xml = xml.replace(/Tristan-Byron/ug, "Tristan–Byron");
+  xml = xml.replace(/Gimel-Europe/ug, "Gimel–Europe");
+  xml = xml.replace(/G-N/ug, "G–N");
+  xml = xml.replace(/Imp-Damsel/ug, "Imp–Damsel");
+  xml = xml.replace(/Damsel-Ashley/ug, "Damsel–Ashley");
+  xml = xml.replace(/Antares-Anelace/ug, "Antares–Anelace");
+  xml = xml.replace(/Challenger-Gallant/ug, "Challenger–Gallant");
+  xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/ug, "Undersider$1–$2");
+  xml = xml.replace(/Norwalk-Fairfield/ug, "Norwalk–Fairfield");
+  xml = xml.replace(/East-West/ug, "east–west");
+  xml = xml.replace(/Creutzfeldt-Jakob/ug, "Creutzfeldt–Jakob");
+  xml = xml.replace(/Astaroth-Nidhug/ug, "Astaroth–Nidhug");
+  xml = xml.replace(/Capulet-Montague/ug, "Capulet–Montague");
+  xml = xml.replace(/Weaver-Clockblocker/ug, "Weaver–Clockblocker");
+  xml = xml.replace(/Alexandria-Pretender/ug, "Alexandria–Pretender");
+  xml = xml.replace(/Night Hag-Nyx/ug, "Night Hag–Nyx");
+  xml = xml.replace(/Crawler-Breed/ug, "Crawler–Breed");
+  xml = xml.replace(/Simurgh-Myrddin-plant/ug, "Simurgh–Myrddin–plant");
+  xml = xml.replace(/Armsmaster-Defiant/ug, "Armsmaster–Defiant");
+  xml = xml.replace(/Matryoshka-Valentin/ug, "Matryoshka–Valentin");
+  xml = xml.replace(/Gaea-Eden/ug, "Gaea–Eden");
+  xml = xml.replace(/([Aa])gent-parahuman/ug, "$1gent–parahuman");
+  xml = xml.replace(/([Pp])arahuman-agent/ug, "$1arahuman–agent");

  return xml;
 }
@ -438,15 +461,15 @@ function fixPossessives(xml) {
  // Fix possessive of names ending in "s".
  xml = xml.replace(
    // eslint-disable-next-line max-len
-    /(?<!‘)(Judas|Brutus|Jess|Aegis|Dauntless|Circus|Sirius|Brooks|Genesis|Atlas|Lucas|Gwerrus|Chris|Eligos|Animos|Mags|Huntress|Hephaestus|Lord of Loss|John Combs|Mama Mathers|Monokeros|Goddess|Boundless|Paris|Tress|Harris|Antares|Nieves|Backwoods|Midas|Mrs. Sims|Ms. Stillons|Chuckles|Amias|Semiramis|Mother of Mothers)’(?!s)/g,
+    /(?<!‘)(Judas|Brutus|Jess|Aegis|Dauntless|Circus|Sirius|Brooks|Genesis|Atlas|Lucas|Gwerrus|Chris|Eligos|Animos|Mags|Huntress|Hephaestus|Lord of Loss|John Combs|Mama Mathers|Monokeros|Goddess|Boundless|Paris|Tress|Harris|Antares|Nieves|Backwoods|Midas|Mrs. Sims|Ms. Stillons|Chuckles|Amias|Semiramis|Mother of Mothers)’(?!s)/ug,
    "$1’s"
  );

  // Note: if the "s" is unvoiced, as in Marquis, then it doesn't get the second "s".
-  xml = xml.replace(/Marquis’s/g, "Marquis’");
+  xml = xml.replace(/Marquis’s/ug, "Marquis’");

  // This one is not just missing the extra "s"; it's often misplaced.
-  xml = xml.replace(/Warden’s/g, "Wardens’");
+  xml = xml.replace(/Warden’s/ug, "Wardens’");

  return xml;
 }
@ -455,16 +478,25 @@ function cleanSceneBreaks(xml) {
  // Normalize scene breaks. <hr> would be more semantically appropriate, but loses the author's intent. This is
  // especially the case in Ward, which uses a variety of different scene breaks.

-  xml = xml.replace(/<p(?:[^>]*)>■<\/p>/g, `<p style="text-align: center;">■</p>`);
+  xml = xml.replace(/<p(?:[^>]*)>■<\/p>/ug, `<p style="text-align: center;">■</p>`);

-  xml = xml.replace(/<p style="text-align: center;"><strong>⊙<\/strong><\/p>/g, `<p style="text-align: center;">⊙</p>`);
-  xml = xml.replace(/<p style="text-align: center;"><em><strong>⊙<\/strong><\/em><\/p>/g,
-    `<p style="text-align: center;">⊙</p>`);
-  xml = xml.replace(/<p style="text-align: center;"><strong>⊙⊙<\/strong><\/p>/g,
-    `<p style="text-align: center;">⊙</p>`);
+  xml = xml.replace(
+    /<p style="text-align: center;"><strong>⊙<\/strong><\/p>/ug,
+    `<p style="text-align: center;">⊙</p>`
+  );
+  xml = xml.replace(
+    /<p style="text-align: center;"><em><strong>⊙<\/strong><\/em><\/p>/ug,
+    `<p style="text-align: center;">⊙</p>`
+  );
+  xml = xml.replace(
+    /<p style="text-align: center;"><strong>⊙⊙<\/strong><\/p>/ug,
+    `<p style="text-align: center;">⊙</p>`
+  );

-  xml = xml.replace(/<p style="text-align: center;"><strong>⊙ *⊙ *⊙ *⊙ *⊙<\/strong><\/p>/g,
-    `<p style="text-align: center;">⊙ ⊙ ⊙ ⊙ ⊙</p>`);
+  xml = xml.replace(
+    /<p style="text-align: center;"><strong>⊙ *⊙ *⊙ *⊙ *⊙<\/strong><\/p>/ug,
+    `<p style="text-align: center;">⊙ ⊙ ⊙ ⊙ ⊙</p>`
+  );

  return xml;
 }
@ -473,57 +505,59 @@ function fixCapitalization(xml, book) {
  // This occurs enough times it's better to do here than in one-off fixes. We correct the single instance where
  // it's incorrect to capitalize in the one-off fixes.
  // Note that Ward contains much talk of "the clairvoyants", so we don't want to capitalize plurals.
-  xml = xml.replace(/([Tt])he clairvoyant(?!s)/g, "$1he Clairvoyant");
+  xml = xml.replace(/([Tt])he clairvoyant(?!s)/ug, "$1he Clairvoyant");

  // ReSound's name is sometimes miscapitalized. The word is never used in a non-name context.
-  xml = xml.replace(/Resound/g, "ReSound");
+  xml = xml.replace(/Resound/ug, "ReSound");

  // The Speedrunners team name is missing its capitalization a couple times.
-  xml = xml.replace(/speedrunners/g, "Speedrunners");
+  xml = xml.replace(/speedrunners/ug, "Speedrunners");

  // The Machine Army is missing its capitalization a couple times.
-  xml = xml.replace(/machine army/g, "Machine Army");
+  xml = xml.replace(/machine army/ug, "Machine Army");

  // "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". "patrol
  // group" is always lowercased. It seems like "Patrol" is a proper name, and is used as a capitalized modifier in
  // other contexts (e.g. Patrol leader). So let's standardize on "Patrol <lowercase>".
-  xml = xml.replace(/patrol (block|group|leader|guard|student|uniform|squad|soldier|officer|crew|girl|bus|training)/ig,
-    (_, $1) => `Patrol ${$1.toLowerCase()}`);
+  xml = xml.replace(
+    /patrol (block|group|leader|guard|student|uniform|squad|soldier|officer|crew|girl|bus|training)/uig,
+    (_, $1) => `Patrol ${$1.toLowerCase()}`
+  );
  // This usually works in Ward (some instances corrected back in substitutions.json), and has a few false positives in
  // Worm, where it is never needed:
  if (book === "ward") {
-    xml = xml.replace(/the patrol(?!s|ling)/g, "the Patrol");
+    xml = xml.replace(/the patrol(?!s|ling)/ug, "the Patrol");
  }

  // This is sometimes missing its capitalization.
-  xml = xml.replace(/the birdcage/g, "the Birdcage");
+  xml = xml.replace(/the birdcage/ug, "the Birdcage");

  // There's no reason why these should be capitalized.
-  xml = xml.replace(/(?<! {2}|“|>)Halberd/g, "halberd");
-  xml = xml.replace(/(?<! {2}|“|>)Loft/g, "loft");
+  xml = xml.replace(/(?<! {2}|“|>)Halberd/ug, "halberd");
+  xml = xml.replace(/(?<! {2}|“|>)Loft/ug, "loft");

  // These are treated as common nouns and not traditionally capitalized. "Krav Maga" remains capitalized,
  // interestingly (according to dictionaries and Wikipedia).
-  xml = xml.replace(/(?<! {2}|“|>)Judo/g, "judo");
-  xml = xml.replace(/(?<! {2}|“|>)Aikido/g, "aikido");
-  xml = xml.replace(/(?<! {2}|“|>)Karate/g, "karate");
-  xml = xml.replace(/(?<! {2}|“|>)Tae Kwon Do/g, "tae kwon do");
+  xml = xml.replace(/(?<! {2}|“|>)Judo/ug, "judo");
+  xml = xml.replace(/(?<! {2}|“|>)Aikido/ug, "aikido");
+  xml = xml.replace(/(?<! {2}|“|>)Karate/ug, "karate");
+  xml = xml.replace(/(?<! {2}|“|>)Tae Kwon Do/ug, "tae kwon do");

  // There's no reason why university should be capitalized in most contexts, although sometimes it's used as part of
  // a compound noun or at the beginning of a sentence.
-  xml = xml.replace(/(?<! {2}|“|>|Cornell |Nilles )University(?! Road)/, "university");
+  xml = xml.replace(/(?<! {2}|“|>|Cornell |Nilles )University(?! Road)/ug, "university");

  // Organ names (e.g. brain, arm) or scientific names are not capitalized, so the "corona pollentia" and friends should
  // not be either. The books are inconsistent.
-  xml = xml.replace(/(?<! {2}|“|>|-)Corona/g, "corona");
-  xml = xml.replace(/Pollentia/g, "pollentia");
-  xml = xml.replace(/Radiata/g, "radiata");
-  xml = xml.replace(/Gemma/g, "gemma");
+  xml = xml.replace(/(?<! {2}|“|>|-)Corona/ug, "corona");
+  xml = xml.replace(/Pollentia/ug, "pollentia");
+  xml = xml.replace(/Radiata/ug, "radiata");
+  xml = xml.replace(/Gemma/ug, "gemma");

  // We de-capitalize Valkyrie's "flock", since most uses are de-capitalized (e.g. the many instances in Gleaming
  // Interlude 9, or Dying 15.z). This is a bit surprising; it seems like an organization name. But I guess it's
  // informal.
-  xml = xml.replace(/(?<! {2}|“|>)Flock/g, "flock");
+  xml = xml.replace(/(?<! {2}|“|>)Flock/ug, "flock");

  // Especially early in Worm, PRT designations are capitalized; they should not be. This fixes the cases where we
  // can be reasonably sure they don't start a sentence, although more specific instances are done in
@ -535,64 +569,64 @@ function fixCapitalization(xml, book) {
  // This also over-de-capitalizes "The Stranger" in Ward (a titan name). Those also get fixed in substitutions.json.
  xml = xml.replace(
    // eslint-disable-next-line max-len
-    /(?<! {2}|“|>|\n|: )(Mover|Shaker|Brute|Breaker|Tinker|Blaster|Thinker|Striker|Changer|Trump|Stranger|Shifter|Shaper)(?! [A-Z])/g,
+    /(?<! {2}|“|>|\n|: )(Mover|Shaker|Brute|Breaker|Tinker|Blaster|Thinker|Striker|Changer|Trump|Stranger|Shifter|Shaper)(?! [A-Z])/ug,
    (_, designation) => designation.toLowerCase()
  );
  xml = xml.replace(
-    /(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)-(\d+)/gi,
+    /(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)-(\d+)/ugi,
    "$1 $2"
  );
  xml = xml.replace(
    // eslint-disable-next-line max-len
-    /(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)[ -/](mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)/gi,
+    /(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)[ -/](mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)/ugi,
    "$1–$2"
  );

  // Capitalization is inconsistent, but shard names seems to usually be capitalized.
-  xml = xml.replace(/Grasping self/g, "Grasping Self");
-  xml = xml.replace(/Cloven stranger/g, "Cloven Stranger");
-  xml = xml.replace(/Princess shaper/g, "Princess Shaper");
-  xml = xml.replace(/Fragile one/g, "Fragile One");
+  xml = xml.replace(/Grasping self/ug, "Grasping Self");
+  xml = xml.replace(/Cloven stranger/ug, "Cloven Stranger");
+  xml = xml.replace(/Princess shaper/ug, "Princess Shaper");
+  xml = xml.replace(/Fragile one/ug, "Fragile One");

  // Place names need to always be capitalized
-  xml = xml.replace(/North end/g, "North End");
-  xml = xml.replace(/(Stonemast|Shale) avenue/g, "$1 Avenue");
-  xml = xml.replace(/(Lord|Slater) street/g, "$1 Street");
-  xml = xml.replace(/(Hollow|Cedar) point/g, "$1 Point");
-  xml = xml.replace(/(Norwalk|Fenway|Stratford) station/g, "$1 Station");
-  xml = xml.replace(/the megalopolis/g, "the Megalopolis");
-  xml = xml.replace(/earths(?![a-z])/g, "Earths");
+  xml = xml.replace(/North end/ug, "North End");
+  xml = xml.replace(/(Stonemast|Shale) avenue/ug, "$1 Avenue");
+  xml = xml.replace(/(Lord|Slater) street/ug, "$1 Street");
+  xml = xml.replace(/(Hollow|Cedar) point/ug, "$1 Point");
+  xml = xml.replace(/(Norwalk|Fenway|Stratford) station/ug, "$1 Station");
+  xml = xml.replace(/the megalopolis/ug, "the Megalopolis");
+  xml = xml.replace(/earths(?![a-z])/ug, "Earths");
  if (book === "ward") {
-    xml = xml.replace(/the bunker/g, "the Bunker");
-    xml = xml.replace(/‘bunker’/g, "‘Bunker’");
+    xml = xml.replace(/the bunker/ug, "the Bunker");
+    xml = xml.replace(/‘bunker’/ug, "‘Bunker’");
  }

  // "Mom" and "Dad" should be capitalized when used as a proper name. These regexps are tuned to catch a good amount of
  // instances, without over-correcting for non-proper-name-like cases. Many other instances are handled in
  // substitutions.json.
-  xml = xml.replace(/(?<!mom), dad(?![a-z])/g, ", Dad");
-  xml = xml.replace(/, mom(?![a-z-])/g, ", Mom");
+  xml = xml.replace(/(?<!mom), dad(?![a-z])/ug, ", Dad");
+  xml = xml.replace(/, mom(?![a-z-])/ug, ", Mom");

  // Similarly, specific aunts and uncles get capitalized when used as a title. These are often missed.
-  xml = xml.replace(/aunt Sarah/g, "Aunt Sarah");
-  xml = xml.replace(/aunt Fleur/g, "Aunt Fleur");
-  xml = xml.replace(/uncle Neil/g, "Uncle Neil");
+  xml = xml.replace(/aunt Sarah/ug, "Aunt Sarah");
+  xml = xml.replace(/aunt Fleur/ug, "Aunt Fleur");
+  xml = xml.replace(/uncle Neil/ug, "Uncle Neil");

  // The majority of "Wardens’ headquarters" is lowercased, and always prefixed with "the", indicating it's not a proper
  // place name. So we remove the capitalization in the few places where it does appear.
-  xml = xml.replace(/Wardens’ Headquarters/g, "Wardens’ headquarters");
+  xml = xml.replace(/Wardens’ Headquarters/ug, "Wardens’ headquarters");

  // Some style guides try to reserve capitalized "Nazi" for historical discussions of members of the Nazi party. This
  // seems fuzzy when it comes to phrases like "neo-Nazi", and doesn't seem to be what the author is doing; the books
  // are just plain inconsistent. So, let's standardize on always uppercasing.
-  xml = xml.replace(/(?<![a-z])nazi/g, "Nazi");
-  xml = xml.replace(/ Neo-/g, " neo-");
+  xml = xml.replace(/(?<![a-z])nazi/ug, "Nazi");
+  xml = xml.replace(/ Neo-/ug, " neo-");

  // Style guides disagree on whether items like "english muffin", "french toast", and "french kiss" need their
  // adjective capitalized. The books mostly use lowercase, so let's stick with that. (substitutions.json corrects one
  // case of "French toast".)
-  xml = xml.replace(/english(?! muffin)/g, "English");
-  xml = xml.replace(/(?<! {2})English muffin/g, "english muffin");
+  xml = xml.replace(/english(?! muffin)/ug, "English");
+  xml = xml.replace(/(?<! {2})English muffin/ug, "english muffin");

  // I was very torn on what to do with capitalization for "Titan" and "Titans". In general you don't capitalize species
  // names or other classifications, e.g. style guides are quite clear you don't capitalize "gods". The author
@ -604,28 +638,34 @@ function fixCapitalization(xml, book) {
  // or "Kronos Titan".)
  if (book === "ward") {
    // All plural discussions of "Titans" are after Sundown 17.y.
-    xml = xml.replace(/titans/g, "Titans");
+    xml = xml.replace(/titans/ug, "Titans");

    // Since we can't safely change all instances of "titan", most are in substitutions.json. We can do a few here,
    // though.
-    xml = xml.replace(/dauntless titan/ig, "Dauntless Titan"); // Sometimes "Dauntless" isn't even capitalized.
-    xml = xml.replace(/Kronos titan/g, "Kronos Titan");
+    xml = xml.replace(/dauntless titan/uig, "Dauntless Titan"); // Sometimes "Dauntless" isn't even capitalized.
+    xml = xml.replace(/Kronos titan/ug, "Kronos Titan");
  }

  // For the giants, the prevailing usage seems to be to keep the term lowercase, but capitalize when used as a name.
-  xml = xml.replace(/(?<=Mathers |Goddess )giant/g, "Giant");
-  xml = xml.replace(/mother giant/ig, "Mother Giant");
-  xml = xml.replace(/(?<! {2}|“|>)Giants/g, "giants");
+  xml = xml.replace(/(?<=Mathers |Goddess )giant/ug, "Giant");
+  xml = xml.replace(/mother giant/uig, "Mother Giant");
+  xml = xml.replace(/(?<! {2}|“|>)Giants/ug, "giants");

  return xml;
 }

 function fixMispellings(xml) {
  // This is commonly misspelled.
-  xml = xml.replace(/([Ss])houlderblade/g, "$1houlder blade");
+  xml = xml.replace(/([Ss])houlderblade/ug, "$1houlder blade");

  // All dictionaries agree this is capitalized.
-  xml = xml.replace(/u-turn/g, "U-turn");
+  xml = xml.replace(/u-turn/ug, "U-turn");
+
+  // https://www.dictionary.com/browse/scot-free
+  xml = xml.replace(/scott(?: |-)free/ug, "scot-free");
+
+  // https://ugrammarist.com/idiom/change-tack/
+  xml = xml.replace(/changed tacks/ug, "changed tack");

  return xml;
 }
@ -633,110 +673,111 @@ function fixMispellings(xml) {
 function fixHyphens(xml) {
  // "X-year-old" should use hyphens; all grammar guides agree. The books are very inconsistent but most often omit
  // them.
-  xml = xml.replace(/(\w+)[ -]year[ -]old(s?)(?!\w)/g, "$1-year-old$2");
-  xml = xml.replace(/(\w+) or (\w+)-year-old/g, "$1- or $2-year-old");
+  xml = xml.replace(/(\w+)[ -]year[ -]old(s?)(?!\w)/ug, "$1-year-old$2");
+  xml = xml.replace(/(\w+) or (\w+)-year-old/ug, "$1- or $2-year-old");

  // Compound numbers from 11 through 99 must be hyphenated, but others should not be.
  xml = xml.replace(
-    /(?<!\w)(twenty|thirty|fourty|fifty|sixty|seventy|eighty|ninety) (one|two|three|four|five|six|seven|eight|nine)/ig,
+    /(?<!\w)(twenty|thirty|fourty|fifty|sixty|seventy|eighty|ninety) (one|two|three|four|five|six|seven|eight|nine)/uig,
    "$1-$2"
  );
-  xml = xml.replace(/[- ]hundred-and-/g, " hundred and ");
-  xml = xml.replace(/(?<!-)(one|two|three|four|five|six|seven|eight|nine|twelve)-hundred/, "$1 hundred");
-  xml = xml.replace(/(hundred|ninety)-percent(?!-)/g, "$1 percent");
+  xml = xml.replace(/[- ]hundred-and-/ug, " hundred and ");
+  xml = xml.replace(/(?<!-)(one|two|three|four|five|six|seven|eight|nine|twelve)-hundred/ug, "$1 hundred");
+  xml = xml.replace(/(hundred|ninety)-percent(?!-)/ug, "$1 percent");

  // "red-haired", "long-haired", etc.: they all need hyphens
-  xml = xml.replace(/ haired/g, "-haired");
+  xml = xml.replace(/ haired/ug, "-haired");

  // These are consistently missing hyphens.
-  xml = xml.replace(/([Ll]ife) threatening/g, "life-threatening");
-  xml = xml.replace(/([Hh]ard) headed/g, "$1-headed");
-  xml = xml.replace(/([Ss]houlder) mounted/g, "$1-mounted");
-  xml = xml.replace(/([Gg]olden) skinned/g, "$1-skinned");
-  xml = xml.replace(/([Cc]reepy) crawl/g, "$1-crawl");
-  xml = xml.replace(/([Ww]ell) armed/g, "$1-armed");
-  xml = xml.replace(/([Aa]ble) bodied/g, "$1-bodied");
-  xml = xml.replace(/([Ll]evel) headed/g, "$1-headed");
-  xml = xml.replace(/([Cc]lear) cut/g, "$1-cut");
-  xml = xml.replace(/([Vv]at) grown/g, "$1-grown");
-  xml = xml.replace(/([Ss]hell) shocked/g, "$1-shocked");
-  xml = xml.replace(/([Dd]og) tired/g, "$1-tired");
-  xml = xml.replace(/([Nn]ightmare) filled/g, "$1-filled");
-  xml = xml.replace(/([Oo]ne) sided/g, "$1-sided");
-  xml = xml.replace(/([Mm]edium) sized/g, "$1-sized");
-  xml = xml.replace(/([Tt]eary) eyed/g, "$1-eyed");
-  xml = xml.replace(/([Ww]orst) case scenario/g, "$1-case scenario");
-  xml = xml.replace(/([Ss]elf) (conscious|esteem|loathing|harm|destruct|preservation)/g, "$1-$2");
-  xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/g, "$1-dimensional");
-  xml = xml.replace(/(?<=\b)([Oo]ne) on one(?=\b)/g, "$1-on-one");
+  xml = xml.replace(/([Ll]ife) threatening/ug, "life-threatening");
+  xml = xml.replace(/([Hh]ard) headed/ug, "$1-headed");
+  xml = xml.replace(/([Ss]houlder) mounted/ug, "$1-mounted");
+  xml = xml.replace(/([Gg]olden) skinned/ug, "$1-skinned");
+  xml = xml.replace(/([Cc]reepy) crawl/ug, "$1-crawl");
+  xml = xml.replace(/([Ww]ell) armed/ug, "$1-armed");
+  xml = xml.replace(/([Aa]ble) bodied/ug, "$1-bodied");
+  xml = xml.replace(/([Ll]evel) headed/ug, "$1-headed");
+  xml = xml.replace(/([Cc]lear) cut/ug, "$1-cut");
+  xml = xml.replace(/([Vv]at) grown/ug, "$1-grown");
+  xml = xml.replace(/([Ss]hell) shocked/ug, "$1-shocked");
+  xml = xml.replace(/([Dd]og) tired/ug, "$1-tired");
+  xml = xml.replace(/([Nn]ightmare) filled/ug, "$1-filled");
+  xml = xml.replace(/([Oo]ne) sided/ug, "$1-sided");
+  xml = xml.replace(/([Mm]edium) sized/ug, "$1-sized");
+  xml = xml.replace(/([Tt]eary) eyed/ug, "$1-eyed");
+  xml = xml.replace(/([Ww]orst) case scenario/ug, "$1-case scenario");
+  xml = xml.replace(/([Ss]elf) (conscious|esteem|loathing|harm|destruct|preservation)/ug, "$1-$2");
+  xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/ug, "$1-dimensional");
+  xml = xml.replace(/(?<=\b)([Oo]ne) on one(?=\b)/ug, "$1-on-one");

  // Preemptive(ly) is often hyphenated (not always). It should not be.
-  xml = xml.replace(/([Pp])re-emptive/g, "$1reemptive");
+  xml = xml.replace(/([Pp])re-emptive/ug, "$1reemptive");

  // These should be hyphenated only when used as a verb. We correct those cases back in substitutions.json.
-  xml = xml.replace(/fist-bump/g, "fist bump");
-  xml = xml.replace(/high-five/g, "high five");
+  xml = xml.replace(/fist-bump/ug, "fist bump");
+  xml = xml.replace(/high-five/ug, "high five");

  // This should be hyphenated when used as an adjective (instead of an adverb or noun). I.e. it should be
  // "hand-to-hand combat", but "passed from hand to hand", and "capable in hand to hand". The following heuristic works
  // in the books.
-  xml = xml.replace(/hand to hand(?= [a-z])/g, "hand-to-hand");
+  xml = xml.replace(/hand to hand(?= [a-z])/ug, "hand-to-hand");

  // This is usually wrong but sometimes correct. The lookarounds avoid specific cases where it's referring to an actual
  // second in a series of guesses.
-  xml = xml.replace(/(?<!my |that )([Ss]econd) guess(?!es)/g, "$1-guess");
+  xml = xml.replace(/(?<!my |that )([Ss]econd) guess(?!es)/ug, "$1-guess");

  // When used as a phrase "just in case" gets no hyphens. When used as a noun or adjective it does. A couple of the
  // noun cases are missing one or both hyphens.
-  xml = xml.replace(/([Aa]) just[ -]in case/g, "$1 just-in-case");
+  xml = xml.replace(/([Aa]) just[ -]in case/ug, "$1 just-in-case");

  // When used as an adjective, it's hyphenated. It turns out most cases are as an adverb, so we go with this approach:
  xml = xml.replace(
-    /face to face(?= meeting| hang-out| interaction| contact| conversation| confrontation| fight)/g,
-    "face-to-face");
+    /face to face(?= meeting| hang-out| interaction| contact| conversation| confrontation| fight)/ug,
+    "face-to-face"
+  );

  // When used as an adjective, it's hyphenated. This heuristic works in the books.
-  xml = xml.replace(/fight or flight(?= [a-z])/g, "fight-or-flight");
+  xml = xml.replace(/fight or flight(?= [a-z])/ug, "fight-or-flight");

  // This is usually correct but sometimes wrong.
-  xml = xml.replace(/neo /g, "neo-");
+  xml = xml.replace(/neo /ug, "neo-");

  return xml;
 }

 function standardizeSpellings(xml) {
  // This is usually spelled "TV" but sometimes the other ways. Normalize.
-  xml = xml.replace(/(\b)tv(\b)/g, "$1TV$2");
-  xml = xml.replace(/t\.v\./ig, "TV");
+  xml = xml.replace(/(\b)tv(\b)/ug, "$1TV$2");
+  xml = xml.replace(/t\.v\./uig, "TV");

  // "okay" is preferred to "ok" or "o.k.". This sometimes gets changed back via substitutions.json when people are
  // writing notes and thus probably the intention was to be less formal. Also it seems per
  // https://en.wikipedia.org/wiki/A-ok the "A" in "A-okay" should be capitalized.
-  xml = xml.replace(/Ok([,. ])/g, "Okay$1");
-  xml = xml.replace(/([^a-zA-Z])ok([^a])/g, "$1okay$2");
-  xml = xml.replace(/([^a-zA-Z])o\.k\.([^a])/g, "$1okay$2");
-  xml = xml.replace(/a-okay/g, "A-okay");
+  xml = xml.replace(/Ok([,. ])/ug, "Okay$1");
+  xml = xml.replace(/([^a-zA-Z])ok([^a])/ug, "$1okay$2");
+  xml = xml.replace(/([^a-zA-Z])o\.k\.([^a])/ug, "$1okay$2");
+  xml = xml.replace(/a-okay/ug, "A-okay");

  // Signal(l)ing/signal(l)ed are spelled both ways. Both are acceptable in English. Let's standardize on single-L.
-  xml = xml.replace(/(S|s)ignall/g, "$1ignal");
+  xml = xml.replace(/(S|s)ignall/ug, "$1ignal");

  // Clich(e|é) is spelled both ways. Let's standardize on including the accent.
-  xml = xml.replace(/cliche/g, "cliché");
+  xml = xml.replace(/cliche/ug, "cliché");

  // T-shirt is usually spelled lowercase ("t-shirt"). Normalize the remaining instances.
-  xml = xml.replace(/(?<! {2})T-shirt/g, "t-shirt");
+  xml = xml.replace(/(?<! {2})T-shirt/ug, "t-shirt");

  // "gray" is the majority spelling, except for "greyhound"
-  xml = xml.replace(/(G|g)rey(?!hound)/g, "$1ray");
+  xml = xml.replace(/(G|g)rey(?!hound)/ug, "$1ray");

  // 12 instances of "Dragon-craft", 12 instances of "Dragon craft", 1 instance of "dragon craft"
-  xml = xml.replace(/[Dd]ragon[ -](craft|mech)/g, "Dragon-$1");
+  xml = xml.replace(/[Dd]ragon[ -](craft|mech)/ug, "Dragon-$1");

  // 88 instances of "A.I." to four of "AI"
-  xml = xml.replace(/(?<=\b)AI(?=\b)/g, "A.I.");
+  xml = xml.replace(/(?<=\b)AI(?=\b)/ug, "A.I.");

  // 2 instances of "G.M." to one of "GM"
-  xml = xml.replace(/(?<=\b)GM(?=\b)/g, "G.M.");
+  xml = xml.replace(/(?<=\b)GM(?=\b)/ug, "G.M.");

  return xml;
 }
@ -756,12 +797,14 @@ function fixCaseNumbers(xml) {
  // We standardize on "Case Fifty-Three"; although it isn't the most common, it seems best to treat these as proper
  // nouns.

-  xml = xml.replace(/case[ -](?:fifty[ -]three|53)(?!’)/ig, "Case Fifty-Three");
-  xml = xml.replace(/case[ -](?:thirty[ -]two|53)(?!’)/ig, "Case Thirty-Two");
-  xml = xml.replace(/case[ -](?:sixty[ -]nine|53)(?!’)/ig, "Case Sixty-Nine");
+  xml = xml.replace(/case[ -](?:fifty[ -]three|53)(?!’)/uig, "Case Fifty-Three");
+  xml = xml.replace(/case[ -](?:thirty[ -]two|53)(?!’)/uig, "Case Thirty-Two");
+  xml = xml.replace(/case[ -](?:sixty[ -]nine|53)(?!’)/uig, "Case Sixty-Nine");

-  xml = xml.replace(/(?<!in )case[ -](zero|one|two|three|four|twelve|fifteen|seventy|ninety)(?!-)/ig,
-    (_, caseNumber) => "Case " + caseNumber[0].toUpperCase() + caseNumber.substring(1));
+  xml = xml.replace(
+    /(?<!in )case[ -](zero|one|two|three|four|twelve|fifteen|seventy|ninety)(?!-)/uig,
+    (_, caseNumber) => `Case ${caseNumber[0].toUpperCase()}${caseNumber.substring(1)}`
+  );

  return xml;
 }
@ -775,7 +818,7 @@ function isEmptyOrGarbage(el) {
 }

 function escapeRegExp(str) {
-  return str.replace(/[-[\]/{}()*+?.\\^$|]/g, "\\$&");
+  return str.replace(/[[\]/{}()*+?.\\^$|]/ug, "\\$&");
 }

 function decodeCloudFlareEmail(hash) {
--- a/lib/convert.js
+++ b/lib/convert.js
@ -34,8 +34,8 @@ module.exports = async (cachePath, manifestPath, contentPath, book, concurrentJo

    warnings.push(...await pool.exec("convertChapter", [chapter, book, inputPath, outputPath]));

-    const time = String(Math.round((performance.now() - start) / 1000)).padStart(3) + " s";
-    progress.increment({ time });
+    const seconds = String(Math.round((performance.now() - start) / 1000)).padStart(3);
+    progress.increment({ time: `${seconds} s` });
  }));

  pool.terminate();
--- a/lib/download.js
+++ b/lib/download.js
@ -1,7 +1,7 @@
 "use strict";
 const path = require("path");
 const fs = require("fs").promises;
-const request = require("requisition");
+const fetch = require("minipass-fetch");
 const { JSDOM } = require("jsdom");

 const FILENAME_PREFIX = "chapter";
@ -39,34 +39,21 @@ async function downloadAllChapters(manifest, startChapterURL, cachePath, manifes
  while (currentChapter !== null) {
    const filename = `${FILENAME_PREFIX}${chapterIndex.toString().padStart(3, "0")}.html`;

-    console.log(`Downloading ${currentChapter}`);
+    process.stdout.write(`Downloading ${currentChapter}... `);

-    const response = await downloadChapter(currentChapter);
-    const contents = await response.text();
-    console.log("- Response body received");
-    const rawChapterJSDOM = new JSDOM(contents, { url: currentChapter });
-    console.log("- Response body parsed into DOM");
+    const { contents, dom, url } = await downloadChapter(currentChapter);
+    const title = getChapterTitle(dom.window.document);
+    currentChapter = getNextChapterURL(dom.window.document);

-    const chapterURLToSave = currentChapter;
-    const chapterTitle = getChapterTitle(rawChapterJSDOM.window.document);
-    currentChapter = getNextChapterURL(rawChapterJSDOM.window.document);
-
-    // TODO: this should probably not be necessary... jsdom bug I guess!?
-    rawChapterJSDOM.window.close();
-
-    manifest.push({
-      url: chapterURLToSave,
-      title: chapterTitle,
-      filename
-    });
+    dom.window.close();

+    manifest.push({ url, title, filename });
    await fs.writeFile(path.resolve(cachePath, filename), contents);
-    console.log("- Response text saved to cache file");

    // Incrementally update the manifest after every successful download, instead of waiting until the end.
    const newManifestContents = JSON.stringify(manifest, undefined, 2);
    await fs.writeFile(manifestPath, newManifestContents);
-    console.log("- Manifest updated");
+    process.stdout.write("done\n");

    ++chapterIndex;
  }
@ -78,14 +65,21 @@ function getNextChapterURL(rawChapterDoc) {
  // - https://parahumans.wordpress.com/2012/04/21/sentinel-9-6/
  // So instead search for the first <a> within the main content area starting with "Next", trimmed.

+  let result = null;
  const aEls = rawChapterDoc.querySelectorAll(".entry-content a");
  for (let i = 0; i < aEls.length; ++i) {
    if (aEls[i].textContent.trim().startsWith("Next")) {
-      return aEls[i].href;
+      result = aEls[i].href;
+      break;
    }
  }

-  return null;
+  // Except, this doesn't always work, because the "Next Chapter" link in
+  // https://www.parahumans.net/2020/04/28/last-20-e6/ is just broken for some reason. We hard-code that.
+  if (result === "https://www.parahumans.net/?p=3365&preview=true") {
+    return "https://www.parahumans.net/2020/05/02/last-20-end/";
+  }
+  return result;
 }

 function getChapterTitle(rawChapterDoc) {
@ -93,7 +87,7 @@ function getChapterTitle(rawChapterDoc) {
  // issues down the line where we remove spaces around em dashes during conversion.) In the future it might be nice to
  // have proper chapter titles, e.g. sections per arc with title pages and then just "1" or similar for the chapter.
  // Until then this is reasonable and uniform.
-  return rawChapterDoc.querySelector("h1.entry-title").textContent.replace(/ – /, " ");
+  return rawChapterDoc.querySelector("h1.entry-title").textContent.replace(/ – /u, " ");
 }

 function retry(times, fn) {
@ -106,9 +100,33 @@ function retry(times, fn) {
  });
 }

-function downloadChapter(url) {
+async function downloadChapter(startingURL) {
+  let urlToFollow = startingURL;
+
+  let url, contents, dom;
+  while (urlToFollow !== null) {
+    const response = await downloadWithRetry(urlToFollow);
+
+    url = urlToFollow;
+    contents = await response.text();
+    dom = new JSDOM(contents, { url });
+
+    const refreshMeta = dom.window.document.querySelector("meta[http-equiv=refresh]");
+    if (refreshMeta) {
+      [, urlToFollow] = /\d+;url=(.*)/ui.exec(refreshMeta.content);
+      process.stdout.write(`\n  Redirected to ${urlToFollow}... `);
+      dom.window.close();
+    } else {
+      urlToFollow = null;
+    }
+  }
+
+  return { url, contents, dom };
+}
+
+function downloadWithRetry(url) {
  return retry(3, async () => {
-    const response = await request(url).redirects(10);
+    const response = await fetch(url);
    if (response.status !== 200) {
      throw new Error(`Response status for ${url} was ${response.status}`);
    }
--- a/lib/scaffold.js
+++ b/lib/scaffold.js
@ -4,7 +4,7 @@ const path = require("path");
 const cpr = require("util").promisify(require("cpr"));

 const BOOK_PUBLISHER = "Domenic Denicola";
-const BOOK_AUTHOR = "wildbow";
+const BOOK_AUTHOR = "Wildbow";

 const NCX_FILENAME = "toc.ncx";

@ -22,6 +22,8 @@ module.exports = async (scaffoldingPath, coverPath, bookPath, contentPath, chapt
      ]);
    })
  ]);
+
+  console.log(`EPUB contents assembled into ${scaffoldingPath}`);
 };

 function noThumbs(filePath) {
@ -54,7 +56,7 @@ function writeOPF(chapters, contentPath, coverFiles, bookInfo) {
  <manifest>
 <item id="ncx" href="${NCX_FILENAME}" media-type="application/x-dtbncx+xml"/>
 <item id="cover" href="${coverFiles.xhtml}" media-type="application/xhtml+xml"/>
-<item id="cover-image" properties="cover-image" href="${coverFiles.image}" media-type="${coverFiles.imageMimeType}"/>
+<item id="cover-image" href="${coverFiles.image}" media-type="${coverFiles.imageMimeType}"/>
 ${manifestChapters}
  </manifest>

--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@ -101,6 +101,10 @@
      "before": "I felt painfully conspicuous",
      "after": "I felt painfully conspicuous."
    },
+    {
+      "before": "Justin Beiber",
+      "after": "Justin Bieber"
+    },
    {
      "before": "Lisa said. “Entire",
      "after": "Lisa said, “entire"
@ -861,6 +865,14 @@
    }
  ],
  "https://parahumans.wordpress.com/2012/03/08/interlude-7%C2%BD-bonus/": [
+    {
+      "before": "she been blossoming",
+      "after": "she had been blossoming"
+    },
+    {
+      "before": "Get Ready!",
+      "after": "Get ready!"
+    },
    {
      "before": "‘boyfriend’; Not",
      "after": "‘boyfriend’; not"
@ -926,16 +938,14 @@
      "after": "crash when the wave rolled"
    },
    {
-      "regExp": "\n ?<em>\\s*([^<]+)</em>(<br />|</p>)",
-      "replacement": "\n<p style=\"padding-left:30px;\"><i>$1</i></p>"
+      "regExp": "</em><br />\n<em>\\s*",
+      "replacement": "</em></p>\n<p style=\"padding-left: 30px;\"><em>",
+      "_comment": "These are clearly paragraphs. Also the leading space is strange."
    },
    {
-      "regExp": "\n<p style=\"padding-left:30px;\"><em>([^<]+)</em>(<br />|</p>)",
-      "replacement": "\n<p style=\"padding-left:30px;\"><i>$1</i></p>"
-    },
-    {
-      "regExp": "\n<p style=\"padding-left:30px;\"><em>([^<\n]+)<br />\n([^>\n]+)</em><br />\n",
-      "replacement": "\n<p style=\"padding-left:30px;\"><i>$1</i></p>\n<p style=\"padding-left:30px;\"><i>$2</i></p>\n"
+      "before": "<em>Word choice, ‘too’:  haunted by demons.<br />\nSwearing: antisocial.</em>",
+      "after": "<em>Word choice, ‘too’:  haunted by demons.</em></p>\n<p><em>Swearing: antisocial.</em>",
+      "_comment": "This one has the <em> surrounding the <br />, unlike the others."
    }
  ],
  "https://parahumans.wordpress.com/2012/03/10/extermination-8-3/": [
@ -1711,12 +1721,6 @@
      "after": "Dinah being kidnapped, and leaving"
    }
  ],
-  "https://parahumans.wordpress.com/2012/05/22/infestation-11-2/": [
-    {
-      "before": "attentio n",
-      "after": "attention"
-    }
-  ],
  "https://parahumans.wordpress.com/2012/05/26/infestation-11-3/": [
    {
      "before": "intimidating:  A sea",
@ -1812,6 +1816,10 @@
      "before": "Charlotte,” I frowned. “Look",
      "after": "Charlotte,” I frowned, “look"
    },
+    {
+      "before": "non-sequitor",
+      "after": "non-sequitur"
+    },
    {
      "before": "Did they… was he",
      "after": "Did they… Was he"
@ -1819,12 +1827,12 @@
    {
      "before": "see the Doctor",
      "after": "see the doctor",
-      "comment": "Unlike the Cauldron Doctor, this is not used as a proper noun"
+      "_comment": "Unlike the Cauldron Doctor, this is not used as a proper noun"
    },
    {
      "before": "the Doctor spoke",
      "after": "the doctor spoke",
-      "comment": "Unlike the Cauldron Doctor, this is not used as a proper noun"
+      "_comment": "Unlike the Cauldron Doctor, this is not used as a proper noun"
    },
    {
      "before": "asinine confession again, “It’s",
@ -1941,7 +1949,7 @@
      "after": "<p><i>Crazed, kooky, cracked, crazy,<br />\nMental, dotty, whacked, loopy…</i></p>"
    },
    {
-      "before": "<p><em>Crazed, kooky, cracked, crazy,<br />\n<em>Nutty, screwy, mentally diseased…</em><br />\n</em> She ",
+      "before": "<p><em>Crazed, kooky, cracked, crazy,<br />\n<em>Nutty, screwy, mentally diseased…</em><br />\n<br />\n</em>She ",
      "after": "<p><i>Crazed, kooky, cracked, crazy,<br />\nNutty, screwy, mentally diseased…</i></p>\n<p>She "
    },
    {
@ -2132,6 +2140,10 @@
    }
  ],
  "https://parahumans.wordpress.com/2012/07/26/interlude-12%C2%BD/": [
+    {
+      "before": "implode, it has created a powerful vacuum in someone’s mouth, that",
+      "after": "implode, and it has created a powerful vacuum in someone’s mouth that"
+    },
    {
      "before": "doesn’t,” Madcap said. “But",
      "after": "doesn’t,” Madcap said, “but"
@ -2177,6 +2189,11 @@
    {
      "before": "Grue,” Trickster said. “Get",
      "after": "Grue,” Trickster said, “get"
+    },
+    {
+      "before": "Nothing she can’t do outside the bubble",
+      "after": "Nothing she can do outside the bubble",
+      "_comment": "This doesn't make sense logically with 'can’t'."
    }
  ],
  "https://parahumans.wordpress.com/2012/08/18/snare-13-7/": [
@ -2308,6 +2325,11 @@
    }
  ],
  "https://parahumans.wordpress.com/2012/10/20/colony-15-2/": [
+    {
+      "before": "turned something",
+      "after": "taken something",
+      "_comment": "'turned' is repeated later in the sentence."
+    },
    {
      "before": "on,” Tattletale said. “Let’s",
      "after": "on,” Tattletale said, “let’s"
@ -2318,6 +2340,11 @@
    }
  ],
  "https://parahumans.wordpress.com/2012/10/23/colony-15-3/": [
+    {
+      "before": "whether or not I agreed or not",
+      "after": "whether I agreed or not",
+      "_comment": "Alternatively, the second 'or not' could be deleted instead of the first."
+    },
    {
      "before": "Woah,” Regent said. “Relax",
      "after": "Woah,” Regent said, “relax"
@ -2329,6 +2356,11 @@
    {
      "before": "the street</p>",
      "after": "the street.</p>"
+    },
+    {
+      "before": "dishonest members",
+      "after": "dishonest member",
+      "_comment": "He's talking only about Skitter here."
    }
  ],
  "https://parahumans.wordpress.com/2012/10/27/colony-15-4/": [
@ -2407,10 +2439,6 @@
    {
      "before": "better pictures of mom",
      "after": "better pictures of Mom"
-    },
-    {
-      "before": "five-hundred percent",
-      "after": "five hundred percent"
    }
  ],
  "https://parahumans.wordpress.com/2012/11/20/interlude-15/": [
@ -2428,6 +2456,10 @@
      "before": "; Nobody",
      "after": "; nobody"
    },
+    {
+      "before": "version on",
+      "after": "version of"
+    },
    {
      "before": "is,” Tattletale said. “They’ve",
      "after": "is,” Tattletale said, “they’ve"
@ -2485,6 +2517,11 @@
    {
      "before": "decisions:  Holding back",
      "after": "decisions: holding back"
+    },
+    {
+      "before": "their positioning, they had planned this",
+      "after": "their positioning… They had planned this",
+      "_comment": "A comma seems like the wrong choice here."
    }
  ],
  "https://parahumans.wordpress.com/2012/12/22/monarch-16-9/": [
@ -2593,6 +2630,11 @@
    {
      "before": "GWER-.</p>",
      "after": "GWER—.</p>"
+    },
+    {
+      "before": "the Birdcage",
+      "after": "the birdcage",
+      "_comment": "A literal birdcage, not the parahuman prison."
    }
  ],
  "https://parahumans.wordpress.com/2013/01/12/migration-17-5/": [
@ -2758,6 +2800,10 @@
    }
  ],
  "https://parahumans.wordpress.com/2013/02/05/monarch-18-6/": [
+    {
+      "before": "he encouragement",
+      "after": "the encouragement"
+    },
    {
      "before": "‘okay’",
      "after": "‘OK’",
@ -3032,6 +3078,10 @@
    }
  ],
  "https://parahumans.wordpress.com/2013/03/23/chrysalis-20-2/": [
+    {
+      "before": "something together something",
+      "after": "something together"
+    },
    {
      "before": "of anxiety</em>.",
      "after": "of anxiety.</em>"
@ -3176,6 +3226,15 @@
    {
      "before": "to,” I said. “Is",
      "after": "to,” I said, “is"
+    },
+    {
+      "before": "are sorry your",
+      "after": "are <em>sorry</em> your",
+      "_comment": "All the other 'sorry's are emphasized in this paragraph"
+    },
+    {
+      "before": "<em>sorry that</em>",
+      "after": "<em>sorry</em> that"
    }
  ],
  "https://parahumans.wordpress.com/2013/04/25/imago-21-7/": [
@ -3186,6 +3245,10 @@
    {
      "before": "Ironic,” she mumbled. “Coming",
      "after": "Ironic,” she mumbled, “coming"
+    },
+    {
+      "before": "to looked around",
+      "after": "to look around"
    }
  ],
  "https://parahumans.wordpress.com/2013/04/27/interlude-21-donation-bonus-1/": [
@ -3256,10 +3319,6 @@
    {
      "before": "in,” I said. “Could",
      "after": "in,” I said, “could"
-    },
-    {
-      "before": " “ <em>We’re",
-      "after": "“<em>We’re"
    }
  ],
  "https://parahumans.wordpress.com/2013/05/16/cell-22-6/": [
@ -3289,10 +3348,6 @@
      "before": "it,” he said. “She",
      "after": "it,” he said, “she"
    },
-    {
-      "before": "“ I don’t",
-      "after": "“I don’t"
-    },
    {
      "before": "confirmed okay",
      "after": "confirmed ok",
@ -3300,6 +3355,10 @@
    }
  ],
  "https://parahumans.wordpress.com/2013/05/21/interlude-22-donation-bonus-1/": [
+    {
+      "before": "Sumimasen deshita",
+      "after": "<i lang=\"ja-JP\">Sumimasen deshita</i>"
+    },
    {
      "before": "“is it reassuring",
      "after": "“Is it reassuring"
@ -3311,6 +3370,10 @@
    {
      "before": "their ears</p>",
      "after": "their ears.</p>"
+    },
+    {
+      "before": "it’s import",
+      "after": "its import"
    }
  ],
  "https://parahumans.wordpress.com/2013/05/25/drone-23-1/": [
@ -3328,6 +3391,10 @@
      "before": "of the ship.  It’s",
      "after": "of the ship.’  It’s"
    },
+    {
+      "before": "supervillain-turned hero",
+      "after": "supervillain-turned-hero"
+    },
    {
      "before": "Las Vegas Rogue",
      "after": "Las Vegas rogue"
@ -3350,6 +3417,10 @@
    }
  ],
  "https://parahumans.wordpress.com/2013/05/30/drone-23-3/": [
+    {
+      "before": "nerf",
+      "after": "Nerf"
+    },
    {
      "before": "Nine,” Glenn said. “I",
      "after": "Nine,” Glenn said, “I"
@ -3361,6 +3432,10 @@
    }
  ],
  "https://parahumans.wordpress.com/2013/06/01/drone-23-4/": [
+    {
+      "before": "FIrst",
+      "after": "First"
+    },
    {
      "before": "said.  Someone",
      "after": "said.  “Someone"
@ -3370,9 +3445,17 @@
    {
      "before": "it,” Imp said. “She’s",
      "after": "it,” Imp said, “she’s"
+    },
+    {
+      "before": "electorcuted",
+      "after": "electrocuted"
    }
  ],
  "https://parahumans.wordpress.com/2013/06/06/interlude-23/": [
+    {
+      "before": "one—or two-word",
+      "after": "one- or two-word"
+    },
    {
      "before": "her mouth:  A click",
      "after": "her mouth: a click"
@ -3392,6 +3475,12 @@
      "after": "And,” I said, “we"
    }
  ],
+  "https://parahumans.wordpress.com/2013/06/11/crushed-24-2/": [
+    {
+      "before": "They destroying",
+      "after": "They were destroying"
+    }
+  ],
  "https://parahumans.wordpress.com/2013/06/15/crushed-24-3/": [
    {
      "before": "<em>Phir Sē echoed me.  “</em>",
@ -3466,9 +3555,22 @@
    }
  ],
  "https://parahumans.wordpress.com/2013/07/02/scarab-25-2/": [
+    {
+      "before": "Enough or three",
+      "after": "Enough for three"
+    },
+    {
+      "before": "—3",
+      "after": "-3",
+      "_comment": "This is a computer display and thus probably uses a regular hyphen for the minus sign."
+    },
    {
      "before": "said.  Except without",
      "after": "said.  “Except without"
+    },
+    {
+      "before": "line, t had",
+      "after": "line, it had"
    }
  ],
  "https://parahumans.wordpress.com/2013/07/06/scarab-25-3/": [
@ -3533,6 +3635,15 @@
    {
      "before": "avoided-",
      "after": "avoided"
+    },
+    {
+      "before": "<span style=\"text-decoration:underline;\"><strong>Lord</strong></span><span style=\"text-decoration:underline;\"> <strong>Walston</strong></span>",
+      "after": "<span style=\"text-decoration:underline;\"><strong>Lord Walston</strong></span>"
+    },
+    {
+      "before": "—16",
+      "after": "-16",
+      "_comment": "This is a computer display and thus probably uses a regular hyphen for the minus sign."
    }
  ],
  "https://parahumans.wordpress.com/2013/07/18/sting-26-1/": [
@ -3550,6 +3661,11 @@
    }
  ],
  "https://parahumans.wordpress.com/2013/07/20/sting-26-2/": [
+    {
+      "before": "It didn’t hurt that",
+      "after": "It didn’t help that",
+      "_comment": "This makes more sense with 'help'"
+    },
    {
      "before": "And gauntlet</em>, to reply to the last one",
      "after": "And gauntlet, to reply to the last one</em>"
@ -3603,8 +3719,8 @@
  ],
  "https://parahumans.wordpress.com/2013/07/30/sting-26-5/": [
    {
-      "before": "</em>  Wait…",
-      "after": "  Wait…</em>"
+      "before": "</em>  Wait…“</p>",
+      "after": "  Wait…</em>”</p>"
    },
    {
      "before": "This,” Imp said. “Is",
@ -3617,6 +3733,19 @@
    {
      "before": "—<em>break up the fog!</em>",
      "after": "<em>—break up the fog!</em>"
+    },
+    {
+      "before": "The Nyx were women",
+      "after": "The Nyxes were women"
+    },
+    {
+      "before": "Psychosomas were men,",
+      "after": "Psychosomas were men:",
+      "_comment": "This is a strange sentence, but these changes make it flow better"
+    },
+    {
+      "before": "Night hags",
+      "after": "Night Hags"
    }
  ],
  "https://parahumans.wordpress.com/2013/08/06/interlude-26a/": [
@ -3699,10 +3828,6 @@
    {
      "before": "shift position</p>",
      "after": "shift position.</p>"
-    },
-    {
-      "before": "“ Convenient.",
-      "after": "“Convenient."
    }
  ],
  "https://parahumans.wordpress.com/2013/08/20/extinction-27-3/": [
@ -3711,8 +3836,8 @@
      "after": "guess,” Sophia said, “you"
    },
    {
-      "before": "said  “ But",
-      "after": "said. “But"
+      "before": "“Maybe,” I said  “But",
+      "after": "“Maybe,” I said.  “But"
    }
  ],
  "https://parahumans.wordpress.com/2013/08/24/extinction-27-5/": [
@ -3752,6 +3877,10 @@
    }
  ],
  "https://parahumans.wordpress.com/2013/08/31/cockroaches-28-1/": [
+    {
+      "before": "“No, I said.",
+      "after": "“No,” I said."
+    },
    {
      "before": "could save issue",
      "after": "could say issue"
@ -3763,8 +3892,8 @@
  ],
  "https://parahumans.wordpress.com/2013/09/03/cockroaches-28-2/": [
    {
-      "before": "N<em>ext",
-      "after": "<em>Next</em>"
+      "before": "N<em>ext 12 of 32 additional windows</em>.",
+      "after": "<em>Next 12 of 32 additional windows.</em>"
    },
    {
      "before": "—But no",
@ -3820,6 +3949,10 @@
      "before": "I’m <em>not?  Fuck</em>.  There’s",
      "after": "I’m <em>not</em>?  <em>Fuck.</em>  There’s"
    },
+    {
+      "before": "‘I <em>don’t know</em>,”",
+      "after": "“I <em>don’t know</em>,”"
+    },
    {
      "before": "I suspect It’s a",
      "after": "I suspect it’s a"
@ -3895,10 +4028,6 @@
    {
      "before": "<em>Once the bead was in place, every</em> bullet hit.",
      "after": "Once the bead was in place, every bullet hit."
-    },
-    {
-      "before": "I ‘m",
-      "after": "I’m"
    }
  ],
  "https://parahumans.wordpress.com/2013/09/24/venom-29-3/": [
@ -4033,6 +4162,10 @@
    {
      "before": "coordinating two teams</em>.",
      "after": "coordinating two teams.</em>"
+    },
+    {
+      "before": "undecision",
+      "after": "indecision"
    }
  ],
  "https://parahumans.wordpress.com/2013/10/22/speck-30-4/": [
@ -4145,6 +4278,10 @@
    {
      "before": "I revoked my control over her, leaving in in the middle",
      "after": "I revoked my control over her, leaving her in the middle"
+    },
+    {
+      "before": "suffered for",
+      "after": "suffered from"
    }
  ],
  "https://parahumans.wordpress.com/2013/10/26/speck-30-6/": [
@ -4291,6 +4428,14 @@
    {
      "before": "minutes passed.  the chance rose",
      "after": "minutes passed.  The chance rose"
+    },
+    {
+      "before": "Collin",
+      "after": "Colin"
+    },
+    {
+      "before": "systems teacher had",
+      "after": "systems Teacher had"
    }
  ],
  "https://parahumans.wordpress.com/2013/11/12/teneral-e-4/": [
@ -4331,6 +4476,10 @@
    {
      "before": "boyfriends are my <em>ex-</em>boyfriends",
      "after": "boyfriends are my <em>ex</em>-boyfriends"
+    },
+    {
+      "before": "that ‘because I can’t isn’t an",
+      "after": "that ‘because I can’ isn’t an"
    }
  ],
  "https://parahumans.wordpress.com/2013/11/19/interlude-end/": [
@ -5005,7 +5154,7 @@
  ],
  "https://www.parahumans.net/2018/05/26/torch-7-4/": [
    {
-      "before": "<p style=\"text-align: center;\"><a href=\"https://www.parahumans.net/2018/05/26/eclipse-x-1/\"><strong>⊙</strong></a></p>",
+      "before": "<p style=\"text-align: center;\"><a href=\"/2018/05/26/eclipse-x-1/\"><strong>⊙</strong></a></p>",
      "after": "",
      "_comment": "This appears to be intentional, but it just duplicates the Next Chapter link (i.e. turning the page in the eBook), so we remove it"
    },
@ -6618,7 +6767,7 @@
      "after": "morning breath—more than morning breath—but there"
    },
    {
-      "before": "<p><strong><a href=\"https://www.parahumans.net/2019/03/26/heavens-12-none/\">Previous Chapter</a>                                                                                    <a href=\"https://www.parahumans.net/2019/04/02/black-13-1/\">Next Chapter</a></strong></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>",
+      "before": "<p><strong><a href=\"/2019/03/26/heavens-12-none/\">Previous Chapter</a>                                                                                    <a href=\"/2019/04/02/black-13-1/\">Next Chapter</a></strong></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>\n<p></p>",
      "after": "<div style=\"page-break-after: always;\">&#160;</div>\n<div style=\"page-break-after: always;\">&#160;</div>",
      "_comment": "This is the best way I can think of to emulate the end of chapter 'fake out' in an ebook format"
    },
@ -6645,7 +6794,7 @@
  ],
  "https://www.parahumans.net/2019/04/02/black-13-1/": [
    {
-      "before": "<p style=\"text-align: center;\">⊙</p>\n<p><strong><a href=\"https://www.parahumans.net/2019/03/29/heavens-12-x/\">Previous Chapter</a>                                                                                    <a href=\"https://www.parahumans.net/2019/04/05/black-13-2/\">Next Chapter</a></strong></p>\n",
+      "before": "<p style=\"text-align: center;\">⊙</p>\n<p><strong><a href=\"/2019/03/29/heavens-12-x/\">Previous Chapter</a>                                                                                    <a href=\"/2019/04/05/black-13-2/\">Next Chapter</a></strong></p>\n",
      "after": "",
      "_comment": "Our usual heuristics of removing the first paragraph to remove the previous/next chapter links are broken here because of the 'go back and look at the fake out' comment at the top"
    },
@ -7488,7 +7637,7 @@
      "after": "—don’t shoot it!"
    }
  ],
-  "https://www.parahumans.net/2019/09/15/from-within-16-10/": [
+  "https://www.parahumans.net/2019/09/14/from-within-16-10/": [
    {
      "before": "carried on.  out of",
      "after": "carried on, out of"
@ -7787,7 +7936,7 @@
    },
    {
      "before": "<em>you proved my sister</em> right?  <em>Tired,",
-      "after": "<em>you proved my sister </em>right</em>?  Tired,"
+      "after": "<em>you proved my sister </em>right<em>?  Tired,"
    },
    {
      "before": "boundary,” He whispered",
@ -8871,7 +9020,7 @@
      "after": "the entities"
    }
  ],
-  "https://www.parahumans.net/2020/03/25/last-20-9/": [
+  "https://www.parahumans.net/2020/03/24/last-20-9/": [
    {
      "before": "you could help miss—help Antares",
      "after": "you could help Miss—help Antares"
@ -9072,7 +9221,7 @@
      "after": "cross the ocean"
    }
  ],
-  "https://www.parahumans.net/?p=3365&preview=true": [
+  "https://www.parahumans.net/2020/05/02/last-20-end/": [
    {
      "before": "saying dad had custody",
      "after": "saying Dad had custody"
--- a/lib/worm-scraper.js
+++ b/lib/worm-scraper.js
@ -14,9 +14,10 @@ const zip = require("./zip.js");

 const OUTPUT_DEFAULT = "(Book name).epub";

-const argv = yargs
+const { argv } = yargs
  .usage(`${packageJson.description}\n\n${packageJson.name} [<command1> [<command2> [<command3> ...]]]\n\n` +
-         "Each command will fail if the previously-listed one has not yet been run (with matching options).")
+         "Each command will fail if the previously-listed one has not yet been run (with matching options).\n\n" +
+         "Running with no commands is equivalent to running download convert scaffold zip.")
  .command("download", "download all chapters into the cache")
  .command("convert", "convert the raw HTML into cleaned-up ebook chapters")
  .command("scaffold", "assemble the table of contents, etc.")
@ -58,11 +59,9 @@ const argv = yargs
    requiresArg: true,
    global: true
  })
-  .demandCommand(1) // TODO remove and allow all
  .recommendCommands()
  .help()
-  .version()
-  .argv;
+  .version();

 const outputFilename = argv.out === OUTPUT_DEFAULT ? `${books[argv.book].title}.epub` : argv.out;

@ -77,14 +76,18 @@ const chaptersPath = path.resolve(contentPath, "chapters");

 const commands = [];

+if (argv._.length === 0) {
+  argv._ = ["download", "convert", "scaffold", "zip"];
+}
+
 if (argv._.includes("download")) {
-  const startURL = books[argv.book].startURL;
+  const { startURL } = books[argv.book];
  commands.push(() => download(startURL, cachePath, manifestPath));
 }

 if (argv._.includes("convert")) {
  commands.push(() => {
-    return fs.rmdir(chaptersPath, { recursive: true, maxRetries: 3 })
+    return fs.rm(chaptersPath, { force: true, recursive: true, maxRetries: 3 })
      .then(() => fs.mkdir(chaptersPath, { recursive: true }))
      .then(() => convert(cachePath, manifestPath, chaptersPath, argv.book, argv.jobs));
  });
@ -93,7 +96,13 @@ if (argv._.includes("convert")) {
 if (argv._.includes("scaffold")) {
  const bookInfo = books[argv.book];
  commands.push(() => scaffold(
-    scaffoldingPath, coverPath, stagingPath, contentPath, chaptersPath, manifestPath, bookInfo
+    scaffoldingPath,
+    coverPath,
+    stagingPath,
+    contentPath,
+    chaptersPath,
+    manifestPath,
+    bookInfo
  ));
 }

@ -106,8 +115,6 @@ if (argv._.includes("zip")) {
    for (const command of commands) {
      await command();
    }
-
-    console.log("All done!");
  } catch (e) {
    console.error(e.stack);
    process.exit(1);
--- a/lib/zip.js
+++ b/lib/zip.js
@ -21,7 +21,7 @@ module.exports = (bookPath, contentPath, outPath) => {
    archive.pipe(destStream);

    // Order matters; mimetype must be first for a valid EPUB
-    archive.file(path.resolve(bookPath, "mimetype"), { name: "mimetype" });
+    archive.file(path.resolve(bookPath, "mimetype"), { name: "mimetype", store: true });
    archive.directory(contentPath, "OEBPS", { name: "OEBPS" });
    archive.directory(path.resolve(bookPath, "META-INF"), "META-INF", { name: "META-INF" });

--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
--- a/package.json
+++ b/package.json
@ -8,7 +8,7 @@
    "parahuman",
    "scraper"
  ],
-  "version": "4.9.0",
+  "version": "5.1.0",
  "author": "Domenic Denicola <d@domenic.me> (https://domenic.me/)",
  "license": "WTFPL",
  "repository": "domenic/worm-scraper",
@ -23,18 +23,19 @@
    "lint": "eslint lib"
  },
  "dependencies": {
-    "archiver": "^5.0.2",
-    "cli-progress": "^3.8.2",
+    "archiver": "^5.3.1",
+    "cli-progress": "^3.11.1",
    "cpr": "^3.0.1",
-    "jsdom": "^16.4.0",
-    "requisition": "^1.5.0",
-    "workerpool": "^6.0.2",
-    "yargs": "^16.1.0"
+    "jsdom": "^19.0.0",
+    "minipass-fetch": "^2.1.0",
+    "workerpool": "^6.2.1",
+    "yargs": "^17.5.1"
  },
  "devDependencies": {
-    "eslint": "^7.11.0"
+    "@domenic/eslint-config": "^2.0.0",
+    "eslint": "^8.16.0"
  },
  "engines": {
-    "node": ">=12.10.0"
+    "node": ">=16.13.2"
  }
 }
Author	SHA1	Message	Date
Domenic Denicola	8f73d57a5a	5.1.0	2022-05-28 18:05:25 -04:00
Santiago Arambillete	b25a0d3a65	Add some fixes for Worm	2022-05-28 18:02:34 -04:00
Domenic Denicola	752c4c3916	Update dependencies	2022-05-28 17:21:28 -04:00
Domenic Denicola	43a80d5dd9	5.0.0	2022-01-22 18:50:33 -05:00
Domenic Denicola	7c9d96578b	Use minipass-fetch instead of requisition Requisition appears to be unmaintained and causes security alerts upon installation.	2022-01-22 18:50:18 -05:00
Domenic Denicola	ac56d1c4c7	Update dependencies and minimum Node	2022-01-22 18:33:02 -05:00
Evan Young	12394da334	Fix failure on convert in modern Node.js Closes #30.	2022-01-22 13:39:12 -05:00
Domenic Denicola	c860873d78	Update dependencies	2021-02-15 22:05:38 -05:00
Domenic Denicola	360f108c1c	Use @domenic/eslint-config	2021-02-15 22:05:38 -05:00
Domenic Denicola	f3893e2f3f	4.12.1	2021-01-17 17:04:56 -05:00
Domenic Denicola	32c76a59a5	Update CI cache key for source updates	2021-01-17 17:04:37 -05:00
Domenic Denicola	ed070daf9f	Fix Ward substitutions for source updates URLs seem to be relative now.	2021-01-17 17:04:37 -05:00
Domenic Denicola	96e3e837e6	Fix broken Ward last chapter downloading It's not clear when this started happening, but the "Next Chapter" link in Last 20.e6 no longer works.	2021-01-17 17:04:37 -05:00
Domenic Denicola	bfdb9eadde	Follow redirects during downloads Fixes #25.	2021-01-17 17:04:37 -05:00
Domenic Denicola	48400e6f96	Run EPUBCheck on CI This should help detect any issues such as those seen in #22.	2021-01-01 17:48:41 -05:00
Domenic Denicola	b94c33ea6f	4.12.0	2021-01-01 16:27:03 -05:00
Domenic Denicola	ba387d3555	Improve deletion of empty-ish elements The previous heuristic of replacing them with a space character caused spaces to be inserted in the middle of words. Also, various cases were missed. This should help.	2021-01-01 16:24:33 -05:00
Domenic Denicola	a405adf6b7	Fix more "changed tack" misspellings	2021-01-01 16:23:05 -05:00
Domenic Denicola	0efaf38170	Fix more "scot-free" misspellings	2021-01-01 16:23:05 -05:00
Elaina Martineau	ab226686ae	Spot fixes for all of Worm	2021-01-01 16:23:05 -05:00
Domenic Denicola	66f7856a0f	4.11.0	2020-12-31 14:22:50 -05:00
Domenic Denicola	848b090b0d	Fix bad XHTML in Ward Sundown 17.6 Closes #22.	2020-12-31 14:22:10 -05:00
Domenic Denicola	db1b3d9e97	Do not compress the mimetype file This is the last error (for Worm) noted via epubcheck, per #22.	2020-12-31 14:22:10 -05:00
Domenic Denicola	c6f7460b82	Tweaks to cover-related EPUB stuff Helps with the validation errors noted in #22.	2020-12-31 14:22:10 -05:00
Domenic Denicola	3865ae0f5b	Fix bad XHTML in Worm Cockroaches 28.2 Part of #22.	2020-12-31 14:22:10 -05:00
Domenic Denicola	d4663ed2e3	Replace WordPress <img>s with emoji Noticed via the validation discussed in #22, which was failing on the draggable attribute.	2020-12-31 14:22:10 -05:00
Domenic Denicola	4a2d33f968	Fix bad XHTML in Worm Interlude 8 (Bonus) Part of #22.	2020-12-31 14:22:09 -05:00
Domenic Denicola	07f3011423	4.10.0	2020-12-26 18:03:22 -05:00
Domenic Denicola	e1f9320524	Capitalize "Wildbow" in book metadata Per Q2 on https://www.parahumans.net/f-a-q/ it seems to be capitalized like a name, not just at the beginning of sentences.	2020-12-26 17:58:59 -05:00
Domenic Denicola	18874a335c	Add cover credits	2020-12-26 17:58:09 -05:00
Domenic Denicola	d5f3b58f0b	Slightly improve program output Most notably reduce the number of output lines per chapter in the download step.	2020-12-26 17:54:19 -05:00
Domenic Denicola	0be098ff16	Allow running with no commands	2020-12-26 17:43:42 -05:00
Domenic Denicola	54dc72b182	Update dependencies	2020-12-26 17:38:16 -05:00
Domenic Denicola	28d4c6927a	Add a new cover for Worm	2020-12-26 17:31:16 -05:00