Compare commits
117 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8f73d57a5a | ||
|
|
b25a0d3a65 |
||
|
|
752c4c3916 | ||
|
|
43a80d5dd9 | ||
|
|
7c9d96578b | ||
|
|
ac56d1c4c7 | ||
|
|
12394da334 |
||
|
|
c860873d78 | ||
|
|
360f108c1c | ||
|
|
f3893e2f3f | ||
|
|
32c76a59a5 | ||
|
|
ed070daf9f | ||
|
|
96e3e837e6 | ||
|
|
bfdb9eadde | ||
|
|
48400e6f96 |
||
|
|
b94c33ea6f | ||
|
|
ba387d3555 | ||
|
|
a405adf6b7 | ||
|
|
0efaf38170 | ||
|
|
ab226686ae | ||
|
|
66f7856a0f | ||
|
|
848b090b0d | ||
|
|
db1b3d9e97 | ||
|
|
c6f7460b82 | ||
|
|
3865ae0f5b | ||
|
|
d4663ed2e3 | ||
|
|
4a2d33f968 | ||
|
|
07f3011423 | ||
|
|
e1f9320524 | ||
|
|
18874a335c | ||
|
|
d5f3b58f0b | ||
|
|
0be098ff16 | ||
|
|
54dc72b182 | ||
|
|
28d4c6927a | ||
|
|
cc2db87b58 | ||
|
|
a89414392e | ||
|
|
89789724d1 | ||
|
|
5a25df658b | ||
|
|
f3366e8346 | ||
|
|
0b4af123ab | ||
|
|
f5f0ba8e61 | ||
|
|
08e0d0d9a8 | ||
|
|
5d9a031c02 | ||
|
|
b652e3812b | ||
|
|
26a2b9c9b5 | ||
|
|
2fb075128a | ||
|
|
ed392d8b98 | ||
|
|
d0c23d86fd | ||
|
|
369714f3d1 | ||
|
|
06e43dcf16 | ||
|
|
e1b59994f8 | ||
|
|
b67f4032f5 | ||
|
|
651944b4da | ||
|
|
44f2cc3c7b | ||
|
|
294cbb2e71 | ||
|
|
6256b332cb | ||
|
|
bf8a1b325c | ||
|
|
6f51bc6c9a | ||
|
|
877beda733 | ||
|
|
6ddde06817 | ||
|
|
aa9fc197e9 | ||
|
|
a1c7f00b42 | ||
|
|
b9d396f6f5 | ||
|
|
1a0780bd7b | ||
|
|
5b7ec80750 | ||
|
|
442d245e2d | ||
|
|
9fc36b813f | ||
|
|
631417a530 | ||
|
|
8a9562e10e | ||
|
|
5ff6621b31 | ||
|
|
3e06358fa2 | ||
|
|
3aece3e05e | ||
|
|
4908956f0c | ||
|
|
730cc512e3 | ||
|
|
cc151355fd | ||
|
|
a86e21b846 | ||
|
|
fc641af4f5 | ||
|
|
6e59181524 | ||
|
|
ecea0f5660 | ||
|
|
e3ef2e254a | ||
|
|
54f2f82650 | ||
|
|
0c22f7df11 | ||
|
|
d12c48976b | ||
|
|
2e4a3e56dc | ||
|
|
5c80327fef | ||
|
|
2412fa0375 | ||
|
|
32817eb255 | ||
|
|
c128712bb4 | ||
|
|
d62739f6cd | ||
|
|
f3063854ff | ||
|
|
1328dfd8e3 | ||
|
|
5310672cc2 | ||
|
|
79efec7080 | ||
|
|
ee76715935 | ||
|
|
2a09195471 | ||
|
|
4c1c7cd03d | ||
|
|
de83a47bfa | ||
|
|
3ec6e36e34 | ||
|
|
70e1ff0281 | ||
|
|
6751136bec | ||
|
|
1da99791d3 | ||
|
|
ea19dbb6c5 | ||
|
|
424f13f169 | ||
|
|
b613514994 | ||
|
|
16ef1836da | ||
|
|
85b5e142da | ||
|
|
2eafeee814 | ||
|
|
af03064221 | ||
|
|
22fbff008e | ||
|
|
c9d51787e2 | ||
|
|
53f7307daa | ||
|
|
20d91d37df | ||
|
|
60709f54c2 | ||
|
|
06c7b3adf2 | ||
|
|
3284c04f8b | ||
|
|
121ab01243 | ||
|
|
abca01b1d6 |
18 changed files with 5506 additions and 1847 deletions
255
.eslintrc.json
255
.eslintrc.json
|
|
@ -1,259 +1,10 @@
|
|||
{
|
||||
"root": true,
|
||||
"extends": "@domenic",
|
||||
"env": {
|
||||
"node": true,
|
||||
"es6": true
|
||||
},
|
||||
"parserOptions": {
|
||||
"ecmaVersion": 2019
|
||||
"node": true
|
||||
},
|
||||
"rules": {
|
||||
// Possible errors
|
||||
"no-await-in-loop": "off",
|
||||
"comma-dangle": ["error", "never"],
|
||||
"no-cond-assign": ["error", "except-parens"],
|
||||
"no-console": "off",
|
||||
"no-constant-condition": "error",
|
||||
"no-control-regex": "error",
|
||||
"no-debugger": "error",
|
||||
"no-dupe-args": "error",
|
||||
"no-dupe-keys": "error",
|
||||
"no-duplicate-case": "error",
|
||||
"no-empty": "error",
|
||||
"no-empty-character-class": "error",
|
||||
"no-ex-assign": "error",
|
||||
"no-extra-boolean-cast": "error",
|
||||
"no-extra-parens": ["error", "all", { "conditionalAssign": false, "nestedBinaryExpressions": false }],
|
||||
"no-extra-semi": "error",
|
||||
"no-func-assign": "error",
|
||||
"no-inner-declarations": "off",
|
||||
"no-invalid-regexp": "error",
|
||||
"no-irregular-whitespace": "error",
|
||||
"no-obj-calls": "error",
|
||||
"no-prototype-builtins": "error",
|
||||
"no-regex-spaces": "error",
|
||||
"no-sparse-arrays": "error",
|
||||
"no-template-curly-in-string": "error",
|
||||
"no-unexpected-multiline": "error",
|
||||
"no-unreachable": "error",
|
||||
"no-unsafe-finally": "off",
|
||||
"no-unsafe-negation": "error",
|
||||
"use-isnan": "error",
|
||||
"valid-jsdoc": "off",
|
||||
"valid-typeof": "error",
|
||||
|
||||
// Best practices
|
||||
"accessor-pairs": "error",
|
||||
"array-callback-return": "error",
|
||||
"block-scoped-var": "off",
|
||||
"class-methods-use-this": "error",
|
||||
"complexity": "off",
|
||||
"consistent-return": "error",
|
||||
"curly": ["error", "all"],
|
||||
"default-case": "off",
|
||||
"dot-location": ["error", "property"],
|
||||
"dot-notation": "error",
|
||||
"eqeqeq": "error",
|
||||
"guard-for-in": "off",
|
||||
"no-alert": "error",
|
||||
"no-caller": "error",
|
||||
"no-case-declarations": "error",
|
||||
"no-div-regex": "off",
|
||||
"no-else-return": "error",
|
||||
"no-empty-function": "error",
|
||||
"no-empty-pattern": "error",
|
||||
"no-eq-null": "error",
|
||||
"no-eval": "error",
|
||||
"no-extend-native": "error",
|
||||
"no-extra-bind": "error",
|
||||
"no-extra-label": "error",
|
||||
"no-fallthrough": "error",
|
||||
"no-floating-decimal": "error",
|
||||
"no-global-assign": "error",
|
||||
"no-implicit-coercion": "error",
|
||||
"no-implicit-globals": "error",
|
||||
"no-implied-eval": "off",
|
||||
"no-invalid-this": "error",
|
||||
"no-iterator": "error",
|
||||
"no-labels": ["error", { "allowLoop": true }],
|
||||
"no-lone-blocks": "error",
|
||||
"no-loop-func": "off",
|
||||
"no-magic-numbers": "off",
|
||||
"no-multi-spaces": "error",
|
||||
"no-multi-str": "error",
|
||||
"no-new": "error",
|
||||
"no-new-func": "error",
|
||||
"no-new-wrappers": "error",
|
||||
"no-octal": "error",
|
||||
"no-octal-escape": "error",
|
||||
"no-param-reassign": "off",
|
||||
"no-process-env": "error",
|
||||
"no-proto": "error",
|
||||
"no-redeclare": "error",
|
||||
"no-restricted-properties": "off",
|
||||
"no-return-assign": ["error", "except-parens"],
|
||||
"no-return-await": "error",
|
||||
"no-script-url": "off",
|
||||
"no-self-assign": "error",
|
||||
"no-self-compare": "error",
|
||||
"no-sequences": "error",
|
||||
"no-throw-literal": "error",
|
||||
"no-unmodified-loop-condition": "error",
|
||||
"no-unused-expressions": "error",
|
||||
"no-unused-labels": "error",
|
||||
"no-useless-call": "error",
|
||||
"no-useless-concat": "error",
|
||||
"no-useless-escape": "error",
|
||||
"no-useless-return": "error",
|
||||
"no-void": "error",
|
||||
"no-warning-comments": "off",
|
||||
"no-with": "error",
|
||||
"radix": ["error", "as-needed"],
|
||||
"require-await": "error",
|
||||
"vars-on-top": "off",
|
||||
"wrap-iife": ["error", "outside"],
|
||||
"yoda": ["error", "never"],
|
||||
|
||||
// Strict Mode
|
||||
"strict": ["error", "global"],
|
||||
|
||||
// Variables
|
||||
"init-declarations": "off",
|
||||
"no-catch-shadow": "error",
|
||||
"no-delete-var": "error",
|
||||
"no-label-var": "error",
|
||||
"no-restricted-globals": "off",
|
||||
"no-shadow": "error",
|
||||
"no-shadow-restricted-names": "error",
|
||||
"no-undef": "error",
|
||||
"no-undef-init": "error",
|
||||
"no-undefined": "off",
|
||||
"no-unused-vars": "error",
|
||||
"no-use-before-define": ["error", "nofunc"],
|
||||
|
||||
// Node.js and CommonJS
|
||||
"callback-return": "off",
|
||||
"global-require": "error",
|
||||
"handle-callback-err": "error",
|
||||
"no-mixed-requires": ["error", true],
|
||||
"no-new-require": "error",
|
||||
"no-path-concat": "error",
|
||||
"no-process-exit": "error",
|
||||
"no-restricted-imports": "off",
|
||||
"no-restricted-modules": "off",
|
||||
"no-sync": "off",
|
||||
|
||||
// Stylistic Issues
|
||||
"array-bracket-spacing": ["error", "never"],
|
||||
"block-spacing": ["error", "always"],
|
||||
"brace-style": ["error", "1tbs", { "allowSingleLine": false }],
|
||||
"camelcase": ["error", { "properties": "always" }],
|
||||
"capitalized-comments": ["error", "always", { "ignoreConsecutiveComments": true }],
|
||||
"comma-spacing": ["error", { "before": false, "after": true }],
|
||||
"comma-style": ["error", "last"],
|
||||
"computed-property-spacing": ["error", "never"],
|
||||
"consistent-this": "off",
|
||||
"eol-last": "error",
|
||||
"func-call-spacing": ["error", "never"],
|
||||
"func-name-matching": ["error", "always"],
|
||||
"func-names": ["error", "never"],
|
||||
"func-style": ["error", "declaration"],
|
||||
"id-blacklist": "off",
|
||||
"id-length": "off",
|
||||
"id-match": "off",
|
||||
"indent": ["error", 2, { "SwitchCase": 1 }],
|
||||
"jsx-quotes": "off",
|
||||
"key-spacing": ["error", { "beforeColon": false, "afterColon": true, "mode": "strict" }],
|
||||
"keyword-spacing": ["error", { "before": true, "after": true }],
|
||||
"line-comment-position": "off",
|
||||
"linebreak-style": ["error", "unix"],
|
||||
"lines-around-comment": "off",
|
||||
"lines-around-directive": "off",
|
||||
"max-depth": "off",
|
||||
"max-len": ["error", 120, { "ignoreUrls": true }],
|
||||
"max-lines": "off",
|
||||
"max-nested-callbacks": "off",
|
||||
"max-params": "off",
|
||||
"max-statements": "off",
|
||||
"max-statements-per-line": ["error", { "max": 1 }],
|
||||
"multiline-ternary": "off",
|
||||
"new-cap": "error",
|
||||
"new-parens": "error",
|
||||
"newline-after-var": "off",
|
||||
"newline-before-return": "off",
|
||||
"newline-per-chained-call": "off",
|
||||
"no-array-constructor": "error",
|
||||
"no-bitwise": "off",
|
||||
"no-continue": "off",
|
||||
"no-inline-comments": "off",
|
||||
"no-lonely-if": "error",
|
||||
"no-mixed-operators": "error",
|
||||
"no-mixed-spaces-and-tabs": "error",
|
||||
"no-multiple-empty-lines": "error",
|
||||
"no-negated-condition": "off",
|
||||
"no-nested-ternary": "error",
|
||||
"no-new-object": "error",
|
||||
"no-plusplus": "off",
|
||||
"no-restricted-syntax": "off",
|
||||
"no-tabs": "error",
|
||||
"no-ternary": "off",
|
||||
"no-trailing-spaces": "error",
|
||||
"no-underscore-dangle": "off",
|
||||
"no-unneeded-ternary": "error",
|
||||
"no-whitespace-before-property": "error",
|
||||
"object-curly-newline": ["error", { "multiline": true }],
|
||||
"object-curly-spacing": ["error", "always"],
|
||||
"object-property-newline": "off",
|
||||
"one-var": ["error", "never"],
|
||||
"one-var-declaration-per-line": ["error", "initializations"],
|
||||
"operator-assignment": ["error", "always"],
|
||||
"operator-linebreak": ["error", "after"],
|
||||
"padded-blocks": ["error", "never"],
|
||||
"quote-props": ["error", "as-needed"],
|
||||
"quotes": ["error", "double", { "avoidEscape": true, "allowTemplateLiterals": true }],
|
||||
"require-jsdoc": "off",
|
||||
"semi": ["error", "always"],
|
||||
"semi-spacing": "error",
|
||||
"sort-keys": "off",
|
||||
"sort-vars": "off",
|
||||
"space-before-blocks": ["error", "always"],
|
||||
"space-before-function-paren": ["error", { "anonymous": "always", "named": "never" }],
|
||||
"space-in-parens": ["error", "never"],
|
||||
"space-infix-ops": "error",
|
||||
"space-unary-ops": ["error", { "words": true, "nonwords": false }],
|
||||
"spaced-comment": ["error", "always", { "markers": ["///"] }],
|
||||
"unicode-bom": ["error", "never"],
|
||||
"wrap-regex": "off",
|
||||
|
||||
// ECMAScript 6
|
||||
"arrow-body-style": "off", // meh
|
||||
"arrow-parens": ["error", "as-needed"],
|
||||
"arrow-spacing": "error",
|
||||
"constructor-super": "error",
|
||||
"generator-star-spacing": ["error", "after"],
|
||||
"no-class-assign": "error",
|
||||
"no-confusing-arrow": "off",
|
||||
"no-const-assign": "error",
|
||||
"no-dupe-class-members": "error",
|
||||
"no-duplicate-imports": "error",
|
||||
"no-new-symbol": "error",
|
||||
"no-this-before-super": "error",
|
||||
"no-useless-computed-key": "error",
|
||||
"no-useless-constructor": "error",
|
||||
"no-useless-rename": "error",
|
||||
"no-var": "error",
|
||||
"object-shorthand": "error",
|
||||
"prefer-arrow-callback": "error",
|
||||
"prefer-const": "error",
|
||||
"prefer-numeric-literals": "error",
|
||||
"prefer-rest-params": "error",
|
||||
"prefer-spread": "error",
|
||||
"prefer-template": "off",
|
||||
"require-yield": "error",
|
||||
"rest-spread-spacing": ["error", "never"],
|
||||
"sort-imports": "off",
|
||||
"symbol-description": "error",
|
||||
"template-curly-spacing": ["error", "never"],
|
||||
"yield-star-spacing": ["error", "after"]
|
||||
"no-console": "off"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
50
.github/workflows/test.yml
vendored
Normal file
50
.github/workflows/test.yml
vendored
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
name: Test
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
jobs:
|
||||
test:
|
||||
name: Test
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- uses: actions/setup-node@v2
|
||||
with:
|
||||
node-version: 16
|
||||
|
||||
- run: npm install
|
||||
|
||||
- run: npm run lint
|
||||
|
||||
# CI would take too long if we did the download every time too. So, we cache it. This does mean we're vulnerable to
|
||||
# source changes exposing problems in our code, but those are pretty infrequent. If they occur, we need to bump the
|
||||
# cache key.
|
||||
- uses: actions/cache@v2
|
||||
with:
|
||||
key: worm-ward-cache-2021-01-17
|
||||
path: ./cache
|
||||
|
||||
- run: node ./lib/worm-scraper.js --book=worm
|
||||
|
||||
- run: node ./lib/worm-scraper.js --book=ward
|
||||
|
||||
- uses: actions/setup-java@v1
|
||||
with:
|
||||
java-version: 15
|
||||
java-package: jre
|
||||
|
||||
- name: Get EPUBCheck
|
||||
run: |
|
||||
curl https://github.com/w3c/epubcheck/releases/download/v4.2.4/epubcheck-4.2.4.zip --location --output epubcheck.zip
|
||||
unzip epubcheck.zip
|
||||
|
||||
- name: Check Worm.epub
|
||||
run: java -jar epubcheck-4.2.4/epubcheck.jar --failonwarnings Worm.epub
|
||||
|
||||
- name: Check Ward.epub
|
||||
run: java -jar epubcheck-4.2.4/epubcheck.jar --failonwarnings Ward.epub
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
language: node_js
|
||||
node_js:
|
||||
- 10
|
||||
- stable
|
||||
script:
|
||||
npm run lint
|
||||
|
|
@ -4,7 +4,7 @@ Scrapes the web serial [_Worm_](https://parahumans.wordpress.com/) and its seque
|
|||
|
||||
## How to use
|
||||
|
||||
First you'll need a modern version of [Node.js](https://nodejs.org/en/). Install whatever is current (not LTS); at least v12.10.0 is necessary.
|
||||
First you'll need a modern version of [Node.js](https://nodejs.org/en/). At least v16.13.2 is necessary.
|
||||
|
||||
Then, open a terminal ([Mac documentation](http://blog.teamtreehouse.com/introduction-to-the-mac-os-x-command-line), [Windows documentation](http://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/)) and install the program by typing
|
||||
|
||||
|
|
@ -21,7 +21,7 @@ worm-scraper --help
|
|||
If this outputs some help documentation, then the installation process went smoothly. You can move on to assemble the eBook by typing
|
||||
|
||||
```bash
|
||||
worm-scraper download convert scaffold zip
|
||||
worm-scraper
|
||||
```
|
||||
|
||||
This will take a while, but will eventually produce a `Worm.epub` file!
|
||||
|
|
@ -29,7 +29,7 @@ This will take a while, but will eventually produce a `Worm.epub` file!
|
|||
If you'd like to get _Ward_ instead of _Worm_, use `--book=ward`, e.g.
|
||||
|
||||
```bash
|
||||
worm-scraper download convert scaffold zip --book=ward
|
||||
worm-scraper --book=ward
|
||||
```
|
||||
|
||||
## EPUB vs. other formats
|
||||
|
|
|
|||
11
covers/README.md
Normal file
11
covers/README.md
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
# Cover credits
|
||||
|
||||
The _Worm_ cover is assembled from:
|
||||
|
||||
- [Ari Ibarra's fanart](https://www.instagram.com/p/B1wSi1Ynaze/) on Instagram
|
||||
- The "Wildbow's Past Works" image for _Worm_ on [parahumans.net](https://www.parahumans.net/)
|
||||
|
||||
The _Ward_ cover is assembled from:
|
||||
|
||||
- [zearoe's fanart](https://www.reddit.com/r/Parahumans/comments/b8n7o0/fanartrepost_antares/) on Reddit
|
||||
- The header image on [parahumans.net](https://www.parahumans.net/)
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>Cover</title>
|
||||
<style>
|
||||
<style type="text/css">
|
||||
body {
|
||||
text-align: center;
|
||||
margin: 0;
|
||||
|
|
@ -17,6 +17,8 @@
|
|||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<img src="cover.jpg" alt=""/>
|
||||
<div>
|
||||
<img src="cover.jpg" alt=""/>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
|||
BIN
covers/worm/cover.jpg
Normal file
BIN
covers/worm/cover.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 374 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 332 KiB |
|
|
@ -3,7 +3,7 @@
|
|||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>Cover</title>
|
||||
<style>
|
||||
<style type="text/css">
|
||||
body {
|
||||
text-align: center;
|
||||
margin: 0;
|
||||
|
|
@ -17,6 +17,8 @@
|
|||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<img src="cover.png" alt=""/>
|
||||
<div>
|
||||
<img src="cover.jpg" alt=""/>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
|||
|
|
@ -71,18 +71,27 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
// Worm uses 30px; Ward mostly uses 40px but sometimes uses 30px/60px. Let's standardize on 30px.
|
||||
if (style === "text-align:left;padding-left:30px;" ||
|
||||
style === "text-align: left;padding-left: 40px;" ||
|
||||
style === "text-align: left; padding-left: 40px;" ||
|
||||
style === "padding-left: 40px;") {
|
||||
child.setAttribute("style", "padding-left: 30px;");
|
||||
}
|
||||
}
|
||||
|
||||
// Remove empty <em>s and <i>s
|
||||
// Remove style attributes from them, as they're always messed up.
|
||||
for (const em of contentEl.querySelectorAll("em, i")) {
|
||||
if (em.textContent.trim() === "") {
|
||||
em.replaceWith(contentEl.ownerDocument.createTextNode(" "));
|
||||
// Remove empty inline elements.
|
||||
// Remove style attributes from inline elements, as they're always messed up.
|
||||
for (const el of contentEl.querySelectorAll("em, i, strong, b")) {
|
||||
const { textContent } = el;
|
||||
|
||||
if (textContent === "") {
|
||||
el.remove();
|
||||
} else if (textContent.trim() === "") {
|
||||
if (el.childElementCount === 0) {
|
||||
el.replaceWith(" ");
|
||||
} else if (el.childElementCount === 1 && el.children[0].localName === "br") {
|
||||
el.outerHTML = "<br />\n";
|
||||
}
|
||||
} else {
|
||||
em.removeAttribute("style");
|
||||
el.removeAttribute("style");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -133,100 +142,125 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
let xml = xmlSerializer.serializeToString(bodyEl);
|
||||
|
||||
// Fix recurring strange pattern of extra <br> in <p>...<em>...<br>\n</em></p>
|
||||
xml = xml.replace(/<br \/>\s*<\/em><\/p>/g, "</em></p>");
|
||||
xml = xml.replace(/<br \/>\s*<\/em><\/p>/ug, "</em></p>");
|
||||
|
||||
// Replace single-word <i>s with <em>s. Other <i>s are probably erroneous too, but these are known-bad.
|
||||
xml = xml.replace(/<i>([^ ]+)<\/i>/g, "<em>$1</em>");
|
||||
xml = xml.replace(/<i>([^ ]+)( +)<\/i>/g, "<em>$1</em>$2");
|
||||
xml = xml.replace(/<i>([^ ]+)<\/i>/ug, "<em>$1</em>");
|
||||
xml = xml.replace(/<i>([^ ]+)( +)<\/i>/ug, "<em>$1</em>$2");
|
||||
|
||||
// There are way too many nonbreaking spaces where they don't belong.
|
||||
// If they show up three in a row, then let them live. Otherwise, they die.
|
||||
// Also remove any run of them after a period.
|
||||
xml = xml.replace(/([^\xA0])\xA0\xA0?([^\xA0])/g, "$1 $2");
|
||||
xml = xml.replace(/\.\x20*\xA0[\xA0\x20]*/, ". ");
|
||||
// There are way too many nonbreaking spaces where they don't belong. If they show up three in a row, then let them
|
||||
// live; they're maybe being used for alignment or something. Otherwise, they die.
|
||||
//
|
||||
// Also, normalize spaces after a period/quote mark to two (normal) spaces. The second one is invisible when
|
||||
// rendered, but it helps future heuristics detect end of sentences.
|
||||
xml = xml.replace(/\xA0{1,2}(?!\x20\xA0)/ug, " ");
|
||||
xml = xml.replace(/([.”])\x20*\xA0[\xA0\x20]*/ug, "$1 ");
|
||||
xml = xml.replace(/([.”])\x20{3,}/ug, "$1 ");
|
||||
|
||||
function fixEms() {
|
||||
// Fix recurring broken-up or erroneous <em>s
|
||||
xml = xml.replace(/<\/em>‘s/g, "’s</em>");
|
||||
xml = xml.replace(/<em><\/em>/g, "");
|
||||
xml = xml.replace(/<\/em><em>/g, "");
|
||||
xml = xml.replace(/<em>(\s?\s?[^A-Za-z]\s?\s?)<\/em>/g, "$1");
|
||||
xml = xml.replace(/<\/em>(\s?\s?[^A-Za-z]\s?\s?)<em>/g, "$1");
|
||||
xml = xml.replace(/“<em>([^>]+)<\/em>(!|\?|\.)”/g, "“<em>$1$2</em>”");
|
||||
xml = xml.replace(/<p><em>([^>]+)<\/em>(!|\?|\.)<\/p>/g, "<p><em>$1$2</em></p>");
|
||||
xml = xml.replace(/(!|\?|\.)\s{2}<\/em><\/p>/g, "$1</em></p>");
|
||||
xml = xml.replace(/<em>([a-z]+)(\?|\.)<\/em>/g, "<em>$1</em>$2");
|
||||
xml = xml.replace(/<em>([^>]+?)( +)<\/em>/g, "<em>$1</em>$2");
|
||||
xml = xml.replace(/<em> ([a-zA-Z]+)<\/em>/g, " <em>$1</em>");
|
||||
xml = xml.replace(/<em>‘\s*([^<]+)\s*’<\/em>/g, "‘<em>$1</em>’");
|
||||
xml = xml.replace(/<em>‘\s*([^<]+)\s*<\/em>\s*’/g, "‘<em>$1</em>’");
|
||||
xml = xml.replace(/‘\s*<em>\s*([^<]+)\s*’<\/em>/g, "‘<em>$1</em>’");
|
||||
xml = xml.replace(/<em>“\s*([^<”]+)\s*”<\/em>/g, "“<em>$1</em>”");
|
||||
xml = xml.replace(/<em>“\s*([^<”]+)\s*<\/em>\s*”/g, "“<em>$1</em>”");
|
||||
xml = xml.replace(/“\s*<em>\s*([^<”]+)\s*”<\/em>/g, "“<em>$1</em>”");
|
||||
xml = xml.replace(/([^\n>])<em> ?/g, "$1 <em>");
|
||||
xml = xml.replace(/ ?<\/em>/g, "</em> ");
|
||||
xml = xml.replace(/<p([^>]+)> <em>/g, "<p$1><em>");
|
||||
xml = xml.replace(/<\/em> <\/p>/g, "</em></p>");
|
||||
xml = xml.replace(/<em>([a-z]+),<\/em>/g, "<em>$1</em>,");
|
||||
xml = xml.replace(/<\/em>‘s/ug, "’s</em>");
|
||||
xml = xml.replace(/<em><\/em>/ug, "");
|
||||
xml = xml.replace(/<\/em><em>/ug, "");
|
||||
xml = xml.replace(/<em>(\s?\s?[^A-Za-z]\s?\s?)<\/em>/ug, "$1");
|
||||
xml = xml.replace(/<\/em>(\s?\s?[^A-Za-z]\s?\s?)<em>/ug, "$1");
|
||||
xml = xml.replace(/“<em>([^>]+)<\/em>(!|\?|\.)”/ug, "“<em>$1$2</em>”");
|
||||
xml = xml.replace(/<p><em>([^>]+)<\/em>(!|\?|\.)<\/p>/ug, "<p><em>$1$2</em></p>");
|
||||
xml = xml.replace(/(!|\?|\.)\s{2}<\/em><\/p>/ug, "$1</em></p>");
|
||||
xml = xml.replace(/<em>([a-z]+)(\?|\.)<\/em>/ug, "<em>$1</em>$2");
|
||||
xml = xml.replace(/<em>([^>]+?)( +)<\/em>/ug, "<em>$1</em>$2");
|
||||
xml = xml.replace(/<em> ([a-zA-Z]+)<\/em>/ug, " <em>$1</em>");
|
||||
xml = xml.replace(/<em>‘\s*([^<]+)\s*’<\/em>/ug, "‘<em>$1</em>’");
|
||||
xml = xml.replace(/<em>‘\s*([^<]+)\s*<\/em>\s*’/ug, "‘<em>$1</em>’");
|
||||
xml = xml.replace(/‘\s*<em>\s*([^<]+)\s*’<\/em>/ug, "‘<em>$1</em>’");
|
||||
xml = xml.replace(/<em>“\s*([^<”]+)\s*”<\/em>/ug, "“<em>$1</em>”");
|
||||
xml = xml.replace(/<em>“\s*([^<”]+)\s*<\/em>\s*”/ug, "“<em>$1</em>”");
|
||||
xml = xml.replace(/“\s*<em>\s*([^<”]+)\s*”<\/em>/ug, "“<em>$1</em>”");
|
||||
xml = xml.replace(/([^\n>])<em> ?/ug, "$1 <em>");
|
||||
xml = xml.replace(/ ?<\/em>/ug, "</em> ");
|
||||
xml = xml.replace(/<p([^>]+)> <em>/ug, "<p$1><em>");
|
||||
xml = xml.replace(/<\/em> <\/p>/ug, "</em></p>");
|
||||
xml = xml.replace(/<em>([a-z]+),<\/em>/ug, "<em>$1</em>,");
|
||||
}
|
||||
|
||||
// These quote/apostrophe/em fixes interact with each other. TODO: try to disentangle so we don't repeat all of
|
||||
// fixEms.
|
||||
xml = xml.replace(/,” <\/em>/g, "</em>,” ");
|
||||
xml = xml.replace(/,” <\/em>/ug, "</em>,” ");
|
||||
fixEms();
|
||||
xml = xml.replace(/<p>”/g, "<p>“");
|
||||
xml = xml.replace(/“\s*<\/p>/g, "”</p>");
|
||||
xml = xml.replace(/“\s*<\/em><\/p>/g, "</em>”</p>");
|
||||
xml = xml.replace(/‘\s*<\/p>/g, "’</p>");
|
||||
xml = xml.replace(/‘\s*<\/em><\/p>/g, "’</em></p>");
|
||||
xml = xml.replace(/,” <\/em>/g, "</em>,” ");
|
||||
xml = xml.replace(/′/g, "’");
|
||||
xml = xml.replace(/″/g, "”");
|
||||
xml = xml.replace(/([A-Za-z])‘s(\s?)/g, "$1’s$2");
|
||||
xml = xml.replace(/I‘m/g, "I’m");
|
||||
xml = xml.replace(/<p>“\s+/g, "<p>“");
|
||||
xml = xml.replace(/'/g, "’");
|
||||
xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’");
|
||||
xml = xml.replace(/([a-z])”<\/p>/g, "$1.”</p>");
|
||||
xml = xml.replace(/<p>”/ug, "<p>“");
|
||||
xml = xml.replace(/“\s*<\/p>/ug, "”</p>");
|
||||
xml = xml.replace(/“\s*<\/em><\/p>/ug, "</em>”</p>");
|
||||
xml = xml.replace(/‘\s*<\/p>/ug, "’</p>");
|
||||
xml = xml.replace(/‘\s*<\/em><\/p>/ug, "’</em></p>");
|
||||
xml = xml.replace(/,” <\/em>/ug, "</em>,” ");
|
||||
xml = xml.replace(/′/ug, "’");
|
||||
xml = xml.replace(/″/ug, "”");
|
||||
xml = xml.replace(/([A-Za-z])‘s(\s?)/ug, "$1’s$2");
|
||||
xml = xml.replace(/I‘m/ug, "I’m");
|
||||
xml = xml.replace(/<p>“\s+/ug, "<p>“");
|
||||
xml = xml.replace(/\s+”/ug, "”");
|
||||
xml = xml.replace(/'/ug, "’");
|
||||
xml = xml.replace(/’([A-Za-z]+)’/ug, "‘$1’");
|
||||
xml = xml.replace(/([a-z])”<\/p>/ug, "$1.”</p>");
|
||||
fixEms();
|
||||
xml = xml.replace(/‘<em>([^<]+)<\/em>‘/g, "‘<em>$1</em>’");
|
||||
xml = xml.replace(/<em>([a-z]+)!<\/em>/g, "<em>$1</em>!");
|
||||
xml = xml.replace(/(?<! {2})<em>([\w ’]+)([!.?])”<\/em>/g, "<em>$1</em>$2”");
|
||||
xml = xml.replace(/<em>([\w ’]+[!.?])”<\/em>/g, "<em>$1</em>”");
|
||||
xml = xml.replace(/I”(m|ll)/g, "I’$1");
|
||||
xml = xml.replace(/””<\/p>/g, "”</p>");
|
||||
xml = xml.replace(/^([^“]+?) ?”(?![ —<])/gm, "$1 “");
|
||||
xml = xml.replace(/(?<!“)<em>([A-Za-z]+),<\/em>(?!”| +[A-Za-z]+ thought)/, "<em>$1</em>,");
|
||||
xml = xml.replace(/‘<em>([^<]+)<\/em>‘/ug, "‘<em>$1</em>’");
|
||||
xml = xml.replace(/<em>([a-z]+)!<\/em>/ug, "<em>$1</em>!");
|
||||
xml = xml.replace(/(?<! {2})<em>([\w ’]+)([!.?])”<\/em>/ug, "<em>$1</em>$2”");
|
||||
xml = xml.replace(/<em>([\w ’]+[!.?])”<\/em>/ug, "<em>$1</em>”");
|
||||
xml = xml.replace(/I”(m|ll)/ug, "I’$1");
|
||||
xml = xml.replace(/””<\/p>/ug, "”</p>");
|
||||
xml = xml.replace(/^([^“]+?) ?”(?![ —<])/ugm, "$1 “");
|
||||
xml = xml.replace(/(?<!“)<em>([A-Za-z]+),<\/em>(?!”| +[A-Za-z]+ thought)/u, "<em>$1</em>,");
|
||||
xml = xml.replace(/‘([Kk])ay(?!’)/ug, "’$1ay");
|
||||
xml = xml.replace(/<em>(Why|What|Who|How|Where|When)<\/em>\?/ug, "<em>$1?</em>");
|
||||
xml = xml.replace(/,<\/em>/ug, "</em>,");
|
||||
xml = xml.replace(/,”<\/p>/ug, ".”</p>");
|
||||
xml = xml.replace(/<p>(.*),<\/p>/ug, "<p>$1.</p>");
|
||||
xml = xml.replace(/‘(\w+)‘(\w+)’/ug, "‘$1’$2’");
|
||||
xml = xml.replace(/<em>([a-z]+), ([a-z]+)<\/em>/ug, "<em>$1</em>, <em>$2</em>");
|
||||
|
||||
// Similar problems occur in Ward with <b> and <strong> as do in Worm with <em>s
|
||||
xml = xml.replace(/<b \/>/g, "");
|
||||
xml = xml.replace(/<b>(\s*<br \/>\s*)<\/b>/g, "$1");
|
||||
xml = xml.replace(/<strong>(\s*<br \/>\s*)<\/strong>/g, "$1");
|
||||
xml = xml.replace(/<\/strong>(\s*)<strong>/g, "$1");
|
||||
xml = xml.replace(/<strong>@<\/strong>/g, "@");
|
||||
xml = xml.replace(/<br \/>(\s*)<\/strong>/g, "</strong><br />$1");
|
||||
xml = xml.replace(/(\s*)<\/strong>/g, "</strong>$1");
|
||||
xml = xml.replace(/><strong>(.*)<\/strong>:</g, "><strong>$1:</strong><");
|
||||
xml = xml.replace(/<b \/>/ug, "");
|
||||
xml = xml.replace(/<b>(\s*<br \/>\s*)<\/b>/ug, "$1");
|
||||
xml = xml.replace(/<strong>(\s*<br \/>\s*)<\/strong>/ug, "$1");
|
||||
xml = xml.replace(/<\/strong>(\s*)<strong>/ug, "$1");
|
||||
xml = xml.replace(/<strong>@<\/strong>/ug, "@");
|
||||
xml = xml.replace(/<br \/>(\s*)<\/strong>/ug, "</strong><br />$1");
|
||||
xml = xml.replace(/(\s*)<\/strong>/ug, "</strong>$1");
|
||||
xml = xml.replace(/><strong>(.*)<\/strong>:</ug, "><strong>$1:</strong><");
|
||||
|
||||
// No need for line breaks before paragraph ends
|
||||
// No need for line breaks before paragraph ends or after paragraph starts
|
||||
// These often occur with the <br>s inside <b>/<strong>/<em>/<i> fixed above.
|
||||
xml = xml.replace(/<br \/>\s*<\/p>/g, "</p>");
|
||||
xml = xml.replace(/<br \/>\s*<\/p>/ug, "</p>");
|
||||
xml = xml.replace(/<p><br \/>\s*/ug, "<p>");
|
||||
|
||||
// This is another quote fix but it needs to happen after the line break deletion... so entangled, ugh.
|
||||
xml = xml.replace(/<\/em>\s*“\s*<\/p>/ug, "</em>”</p>");
|
||||
|
||||
// Fix missing spaces after commas
|
||||
xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/g, "$1, $2");
|
||||
xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/ug, "$1, $2");
|
||||
|
||||
// Fix bad periods and spacing/markup surrounding them
|
||||
xml = xml.replace(/\.\.<\/p>/g, ".</p>");
|
||||
xml = xml.replace(/\.\.”<\/p>/g, ".”</p>");
|
||||
xml = xml.replace(/ \. /g, ". ");
|
||||
xml = xml.replace(/ \.<\/p>/g, ".</p>");
|
||||
xml = xml.replace(/\.<em>\.\./g, "<em>…");
|
||||
xml = xml.replace(/\.\. {2}/g, ". ");
|
||||
xml = xml.replace(/\.\.<\/p>/ug, ".</p>");
|
||||
xml = xml.replace(/\.\.”<\/p>/ug, ".”</p>");
|
||||
xml = xml.replace(/ \. /ug, ". ");
|
||||
xml = xml.replace(/ \.<\/p>/ug, ".</p>");
|
||||
xml = xml.replace(/\.<em>\.\./ug, "<em>…");
|
||||
xml = xml.replace(/\.\. {2}/ug, ". ");
|
||||
xml = xml.replace(/\.\./ug, "…");
|
||||
xml = xml.replace(/(?<!Mr|Ms|Mrs)…\./ug, "…");
|
||||
xml = xml.replace(/(?<=Mr|Ms|Mrs)…\./ug, ".…");
|
||||
|
||||
// Fix extra spaces
|
||||
xml = xml.replace(/ ? <\/p>/g, "</p>");
|
||||
xml = xml.replace(/([a-z]) ,/g, "$1,");
|
||||
xml = xml.replace(/ ? <\/p>/ug, "</p>");
|
||||
xml = xml.replace(/([a-z]) ,/ug, "$1,");
|
||||
|
||||
// Use actual emojis instead of images
|
||||
xml = xml.replace(
|
||||
// eslint-disable-next-line max-len
|
||||
/<img width="16" height="16" class="wp-smiley emoji" draggable="false" alt="O_o" src="https:\/\/s1.wp.com\/wp-content\/mu-plugins\/wpcom-smileys\/o_O.svg" style="height: 1em; max-height: 1em;" \/>/ug,
|
||||
"🤨"
|
||||
);
|
||||
|
||||
xml = fixTruncatedWords(xml);
|
||||
xml = fixDialogueTags(xml);
|
||||
|
|
@ -255,9 +289,9 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
`Update substitutions.json for a more precise substitution.`);
|
||||
}
|
||||
|
||||
xml = xml.replace(new RegExp(escapeRegExp(substitution.before)), substitution.after);
|
||||
xml = xml.replace(new RegExp(escapeRegExp(substitution.before), "u"), substitution.after);
|
||||
} else if (substitution.regExp) {
|
||||
xml = xml.replace(new RegExp(substitution.regExp, "g"), substitution.replacement);
|
||||
xml = xml.replace(new RegExp(substitution.regExp, "ug"), substitution.replacement);
|
||||
} else {
|
||||
warnings.push(`Invalid substitution specified for ${chapter.url}`);
|
||||
}
|
||||
|
|
@ -266,32 +300,42 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
// Serializer inserts extra xmlns for us since it doesn't know we're going to put this into a <html>.
|
||||
// Use this opportunity to insert a comment pointing to the original URL, for reference.
|
||||
xml = xml.replace(
|
||||
/<body xmlns="http:\/\/www.w3.org\/1999\/xhtml">/,
|
||||
`<body>\n<!-- ${chapter.url} -->\n`);
|
||||
/<body xmlns="http:\/\/www.w3.org\/1999\/xhtml">/u,
|
||||
`<body>\n<!-- ${chapter.url} -->\n`
|
||||
);
|
||||
|
||||
return { xml, warnings };
|
||||
}
|
||||
|
||||
function fixTruncatedWords(xml) {
|
||||
xml = xml.replace(/‘Sup/g, "’Sup");
|
||||
xml = xml.replace(/‘cuz/g, "’cuz");
|
||||
xml = xml.replace(/‘Sup/ug, "’Sup");
|
||||
xml = xml.replace(/‘cuz/ug, "’cuz");
|
||||
|
||||
// Short for "Sidepeace"
|
||||
xml = xml.replace(/[‘’][Pp]iece(?![a-z])/g, "’Piece");
|
||||
xml = xml.replace(/[‘’][Pp]iece(?![a-z])/ug, "’Piece");
|
||||
|
||||
// Short for "Disjoint"
|
||||
xml = xml.replace(/[‘’][Jj]oint(?![a-z])/g, "’Joint");
|
||||
xml = xml.replace(/[‘’][Jj]oint(?![a-z])/ug, "’Joint");
|
||||
|
||||
// Short for "Contender"
|
||||
xml = xml.replace(/[‘’][Tt]end(?![a-z])/g, "’Tend");
|
||||
xml = xml.replace(/[‘’][Tt]end(?![a-z])/ug, "’Tend");
|
||||
|
||||
// Short for "Anelace"
|
||||
xml = xml.replace(/[‘’][Ll]ace(?![a-z])/ug, "’Lace");
|
||||
|
||||
// Short for "Birdcage"
|
||||
xml = xml.replace(/[‘’][Cc]age(?![a-z])/ug, "’Cage");
|
||||
|
||||
// We can't do "’Clear" (short for Crystalclear) here because it appears too much as a normal word preceded by an
|
||||
// open quote, so we do that in substitutions.json.
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function fixDialogueTags(xml) {
|
||||
// Fix recurring miscapitalization with questions
|
||||
xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");
|
||||
xml = xml.replace(/\?”\s\s?He asked/g, "?” he asked");
|
||||
xml = xml.replace(/\?”\s\s?She asked/ug, "?” she asked");
|
||||
xml = xml.replace(/\?”\s\s?He asked/ug, "?” he asked");
|
||||
|
||||
// The author often fails to terminate a sentence, instead using a comma after a dialogue tag. For example,
|
||||
// > “I didn’t get much done,” Greg said, “I got distracted by...
|
||||
|
|
@ -307,98 +351,125 @@ function fixDialogueTags(xml) {
|
|||
// This applies to ~800 instances, so although we have to correct back in substitutions.json a decent number of
|
||||
// times, it definitely pays for itself. Most of the instances we have to correct back we also need to fix the
|
||||
// capitalization anyway, and that's harder to do automatically, since proper names/"I"/etc. stay capitalized.
|
||||
xml = xml.replace(/,” ([A-Za-z]+ [A-Za-z]+), “([A-Z])/g, ",” $1. “$2");
|
||||
xml = xml.replace(/,” ([A-Za-z]+ [A-Za-z]+), “([A-Z])/ug, ",” $1. “$2");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function fixForeignNames(xml) {
|
||||
// This is consistently missing diacritics
|
||||
xml = xml.replace(/Yangban/g, "Yàngbǎn");
|
||||
xml = xml.replace(/Yangban/ug, "Yàngbǎn");
|
||||
|
||||
// These are usually not italicized, but sometimes are. Other foreign-language names (like Yàngbǎn) are not
|
||||
// italicized, so we go in the direction of removing the italics.
|
||||
xml = xml.replace(/<em>Garama<\/em>/g, "Garama");
|
||||
xml = xml.replace(/<em>Thanda<\/em>/g, "Thanda");
|
||||
xml = xml.replace(/<em>Sifara([^<]*)<\/em>/g, "Sifara$1");
|
||||
xml = xml.replace(/<em>Moord Nag([^<]*)<\/em>/g, "Moord Nag$1");
|
||||
xml = xml.replace(/<em>Califa de Perro([^<]*)<\/em>/g, "Califa de Perro$1");
|
||||
xml = xml.replace(/<em>Turanta([^<]*)<\/em>/g, "Turanta$1");
|
||||
xml = xml.replace(/<em>Garama<\/em>/ug, "Garama");
|
||||
xml = xml.replace(/<em>Thanda<\/em>/ug, "Thanda");
|
||||
xml = xml.replace(/<em>Sifara([^<]*)<\/em>/ug, "Sifara$1");
|
||||
xml = xml.replace(/<em>Moord Nag([^<]*)<\/em>/ug, "Moord Nag$1");
|
||||
xml = xml.replace(/<em>Califa de Perro([^<]*)<\/em>/ug, "Califa de Perro$1");
|
||||
xml = xml.replace(/<em>Turanta([^<]*)<\/em>/ug, "Turanta$1");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function standardizeNames(xml) {
|
||||
// 197 instances of "Mrs." to 21 of "Ms."
|
||||
xml = xml.replace(/Ms\. Yamada/g, "Mrs. Yamada");
|
||||
xml = xml.replace(/Ms\. Yamada/ug, "Mrs. Yamada");
|
||||
|
||||
// 25 instances of "Amias" to 3 of "Amais"
|
||||
xml = xml.replace(/Amais/g, "Amias");
|
||||
xml = xml.replace(/Amais/ug, "Amias");
|
||||
|
||||
// 185 instances of Juliette to 4 of Juliet
|
||||
xml = xml.replace(/Juliet(?=\b)/ug, "Juliette");
|
||||
|
||||
// Earlier chapters have a space; later ones do not. They're separate words, so side with the earlier chapters.
|
||||
// One location is missing the "k".
|
||||
xml = xml.replace(/Crock? o[‘’]Shit/ug, "Crock o’ Shit");
|
||||
|
||||
// 5 instances of "Jotun" to 2 of "Jotunn"
|
||||
xml = xml.replace(/Jotunn/ug, "Jotun");
|
||||
|
||||
// 13 instances of Elman to 1 of Elmann
|
||||
xml = xml.replace(/Elmann/ug, "Elman");
|
||||
|
||||
// Thousands of instances of Tattletale to 4 instances of Tatteltale
|
||||
xml = xml.replace(/Tatteltale/ug, "Tattletale");
|
||||
|
||||
// 73 instances of Über to 2 of Uber
|
||||
xml = xml.replace(/Uber/ug, "Über");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function fixEmDashes(xml) {
|
||||
xml = xml.replace(/ – /g, "—");
|
||||
xml = xml.replace(/“((?:<em>)?)-/g, "“$1—");
|
||||
xml = xml.replace(/-[,.]?”/g, "—”");
|
||||
xml = xml.replace(/-(!|\?)”/g, "—$1”");
|
||||
xml = xml.replace(/-[,.]?<\/em>”/g, "—</em>”");
|
||||
xml = xml.replace(/-“/g, "—”");
|
||||
xml = xml.replace(/<p>-/g, "<p>—");
|
||||
xml = xml.replace(/-<\/p>/g, "—</p>");
|
||||
xml = xml.replace(/-<\/em><\/p>/g, "—</em></p>");
|
||||
xml = xml.replace(/\s?\s?–\s?\s?/g, "—");
|
||||
xml = xml.replace(/-\s\s?/g, "—");
|
||||
xml = xml.replace(/\s?\s-/g, "—");
|
||||
xml = xml.replace(/\s+—”/g, "—”");
|
||||
xml = xml.replace(/I-I/g, "I—I");
|
||||
xml = xml.replace(/I-uh/g, "I—uh");
|
||||
xml = xml.replace(/ – /ug, "—");
|
||||
xml = xml.replace(/“((?:<em>)?)-/ug, "“$1—");
|
||||
xml = xml.replace(/-[,.]?”/ug, "—”");
|
||||
xml = xml.replace(/-(!|\?)”/ug, "—$1”");
|
||||
xml = xml.replace(/-[,.]?<\/([a-z]+)>”/ug, "—</$1>”");
|
||||
xml = xml.replace(/-“/ug, "—”");
|
||||
xml = xml.replace(/<p>-/ug, "<p>—");
|
||||
xml = xml.replace(/-<\/p>/ug, "—</p>");
|
||||
xml = xml.replace(/-<br \/>/ug, "—<br />");
|
||||
xml = xml.replace(/-<\/([a-z]+)><\/p>/ug, "—</$1></p>");
|
||||
xml = xml.replace(/\s?\s?–\s?\s?/ug, "—");
|
||||
xml = xml.replace(/-\s\s?/ug, "—");
|
||||
xml = xml.replace(/\s?\s-/ug, "—");
|
||||
xml = xml.replace(/\s+—”/ug, "—”");
|
||||
xml = xml.replace(/I-I/ug, "I—I");
|
||||
xml = xml.replace(/I-uh/ug, "I—uh");
|
||||
xml = xml.replace(/-\?/ug, "—?");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function enDashJointNames(xml) {
|
||||
// Joint names should use en dashes
|
||||
xml = xml.replace(/Dallon-Pelham/g, "Dallon–Pelham");
|
||||
xml = xml.replace(/Bet-Gimel/g, "Bet–Gimel");
|
||||
xml = xml.replace(/Tristan-Capricorn/g, "Tristan–Capricorn");
|
||||
xml = xml.replace(/Capricorn-Byron/g, "Capricorn–Byron");
|
||||
xml = xml.replace(/Tristan-Byron/g, "Tristan–Byron");
|
||||
xml = xml.replace(/Gimel-Europe/g, "Gimel–Europe");
|
||||
xml = xml.replace(/G-N/g, "G–N");
|
||||
xml = xml.replace(/Imp-Damsel/g, "Imp–Damsel");
|
||||
xml = xml.replace(/Damsel-Ashley/g, "Damsel–Ashley");
|
||||
xml = xml.replace(/Antares-Anelace/g, "Antares–Anelace");
|
||||
xml = xml.replace(/Challenger-Gallant/g, "Challenger–Gallant");
|
||||
xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/g, "Undersider$1–$2");
|
||||
xml = xml.replace(/Norwalk-Fairfield/g, "Norwalk–Fairfield");
|
||||
xml = xml.replace(/East-West/g, "east–west");
|
||||
xml = xml.replace(/(Green|Yellow)-Black/g, "$1–Black");
|
||||
xml = xml.replace(/Creutzfeldt-Jakob/g, "Creutzfeldt–Jakob");
|
||||
xml = xml.replace(/Astaroth-Nidhug/g, "Astaroth–Nidhug");
|
||||
xml = xml.replace(/Capulet-Montague/g, "Capulet–Montague");
|
||||
xml = xml.replace(/Weaver-Clockblocker/g, "Weaver–Clockblocker");
|
||||
xml = xml.replace(/Alexandria-Pretender/g, "Alexandria–Pretender");
|
||||
xml = xml.replace(/Night Hag-Nyx/g, "Night Hag–Nyx");
|
||||
xml = xml.replace(/Crawler-Breed/g, "Crawler–Breed");
|
||||
xml = xml.replace(/Simurgh-Myrddin-plant/g, "Simurgh–Myrddin–plant");
|
||||
xml = xml.replace(/Armsmaster-Defiant/g, "Armsmaster–Defiant");
|
||||
xml = xml.replace(/Dallon-Pelham/ug, "Dallon–Pelham");
|
||||
xml = xml.replace(/Bet-Gimel/ug, "Bet–Gimel");
|
||||
xml = xml.replace(/Cheit-Gimel/ug, "Bet–Gimel");
|
||||
xml = xml.replace(/Tristan-Capricorn/ug, "Tristan–Capricorn");
|
||||
xml = xml.replace(/Capricorn-Byron/ug, "Capricorn–Byron");
|
||||
xml = xml.replace(/Tristan-Byron/ug, "Tristan–Byron");
|
||||
xml = xml.replace(/Gimel-Europe/ug, "Gimel–Europe");
|
||||
xml = xml.replace(/G-N/ug, "G–N");
|
||||
xml = xml.replace(/Imp-Damsel/ug, "Imp–Damsel");
|
||||
xml = xml.replace(/Damsel-Ashley/ug, "Damsel–Ashley");
|
||||
xml = xml.replace(/Antares-Anelace/ug, "Antares–Anelace");
|
||||
xml = xml.replace(/Challenger-Gallant/ug, "Challenger–Gallant");
|
||||
xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/ug, "Undersider$1–$2");
|
||||
xml = xml.replace(/Norwalk-Fairfield/ug, "Norwalk–Fairfield");
|
||||
xml = xml.replace(/East-West/ug, "east–west");
|
||||
xml = xml.replace(/Creutzfeldt-Jakob/ug, "Creutzfeldt–Jakob");
|
||||
xml = xml.replace(/Astaroth-Nidhug/ug, "Astaroth–Nidhug");
|
||||
xml = xml.replace(/Capulet-Montague/ug, "Capulet–Montague");
|
||||
xml = xml.replace(/Weaver-Clockblocker/ug, "Weaver–Clockblocker");
|
||||
xml = xml.replace(/Alexandria-Pretender/ug, "Alexandria–Pretender");
|
||||
xml = xml.replace(/Night Hag-Nyx/ug, "Night Hag–Nyx");
|
||||
xml = xml.replace(/Crawler-Breed/ug, "Crawler–Breed");
|
||||
xml = xml.replace(/Simurgh-Myrddin-plant/ug, "Simurgh–Myrddin–plant");
|
||||
xml = xml.replace(/Armsmaster-Defiant/ug, "Armsmaster–Defiant");
|
||||
xml = xml.replace(/Matryoshka-Valentin/ug, "Matryoshka–Valentin");
|
||||
xml = xml.replace(/Gaea-Eden/ug, "Gaea–Eden");
|
||||
xml = xml.replace(/([Aa])gent-parahuman/ug, "$1gent–parahuman");
|
||||
xml = xml.replace(/([Pp])arahuman-agent/ug, "$1arahuman–agent");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function fixPossessives(xml) {
|
||||
// Fix possessive of names ending in "s"
|
||||
// Note: if the "s" is unvoiced, as in Marquis, then it doesn't get the second "s".
|
||||
// Fix possessive of names ending in "s".
|
||||
xml = xml.replace(
|
||||
// eslint-disable-next-line max-len
|
||||
/(?<!‘)(Judas|Brutus|Jess|Aegis|Dauntless|Circus|Sirius|Brooks|Genesis|Atlas|Lucas|Gwerrus|Chris|Eligos|Animos|Mags|Huntress|Hephaestus|Lord of Loss|John Combs|Mama Mathers|Monokeros|Goddess|Boundless|Paris|Tress|Harris|Antares|Nieves|Backwoods|Midas|Mrs. Sims|Ms. Stillons|Chuckles|Amias)’(?!s)/g,
|
||||
/(?<!‘)(Judas|Brutus|Jess|Aegis|Dauntless|Circus|Sirius|Brooks|Genesis|Atlas|Lucas|Gwerrus|Chris|Eligos|Animos|Mags|Huntress|Hephaestus|Lord of Loss|John Combs|Mama Mathers|Monokeros|Goddess|Boundless|Paris|Tress|Harris|Antares|Nieves|Backwoods|Midas|Mrs. Sims|Ms. Stillons|Chuckles|Amias|Semiramis|Mother of Mothers)’(?!s)/ug,
|
||||
"$1’s"
|
||||
);
|
||||
|
||||
// Note: if the "s" is unvoiced, as in Marquis, then it doesn't get the second "s".
|
||||
xml = xml.replace(/Marquis’s/ug, "Marquis’");
|
||||
|
||||
// This one is not just missing the extra "s"; it's often misplaced.
|
||||
xml = xml.replace(/Warden’s/g, "Wardens’");
|
||||
xml = xml.replace(/Warden’s/ug, "Wardens’");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
|
@ -407,16 +478,25 @@ function cleanSceneBreaks(xml) {
|
|||
// Normalize scene breaks. <hr> would be more semantically appropriate, but loses the author's intent. This is
|
||||
// especially the case in Ward, which uses a variety of different scene breaks.
|
||||
|
||||
xml = xml.replace(/<p(?:[^>]*)>■<\/p>/g, `<p style="text-align: center;">■</p>`);
|
||||
xml = xml.replace(/<p(?:[^>]*)>■<\/p>/ug, `<p style="text-align: center;">■</p>`);
|
||||
|
||||
xml = xml.replace(/<p style="text-align: center;"><strong>⊙<\/strong><\/p>/g, `<p style="text-align: center;">⊙</p>`);
|
||||
xml = xml.replace(/<p style="text-align: center;"><em><strong>⊙<\/strong><\/em><\/p>/g,
|
||||
`<p style="text-align: center;">⊙</p>`);
|
||||
xml = xml.replace(/<p style="text-align: center;"><strong>⊙⊙<\/strong><\/p>/g,
|
||||
`<p style="text-align: center;">⊙</p>`);
|
||||
xml = xml.replace(
|
||||
/<p style="text-align: center;"><strong>⊙<\/strong><\/p>/ug,
|
||||
`<p style="text-align: center;">⊙</p>`
|
||||
);
|
||||
xml = xml.replace(
|
||||
/<p style="text-align: center;"><em><strong>⊙<\/strong><\/em><\/p>/ug,
|
||||
`<p style="text-align: center;">⊙</p>`
|
||||
);
|
||||
xml = xml.replace(
|
||||
/<p style="text-align: center;"><strong>⊙⊙<\/strong><\/p>/ug,
|
||||
`<p style="text-align: center;">⊙</p>`
|
||||
);
|
||||
|
||||
xml = xml.replace(/<p style="text-align: center;"><strong>⊙ *⊙ *⊙ *⊙ *⊙<\/strong><\/p>/g,
|
||||
`<p style="text-align: center;">⊙ ⊙ ⊙ ⊙ ⊙</p>`);
|
||||
xml = xml.replace(
|
||||
/<p style="text-align: center;"><strong>⊙ *⊙ *⊙ *⊙ *⊙<\/strong><\/p>/ug,
|
||||
`<p style="text-align: center;">⊙ ⊙ ⊙ ⊙ ⊙</p>`
|
||||
);
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
|
@ -425,83 +505,167 @@ function fixCapitalization(xml, book) {
|
|||
// This occurs enough times it's better to do here than in one-off fixes. We correct the single instance where
|
||||
// it's incorrect to capitalize in the one-off fixes.
|
||||
// Note that Ward contains much talk of "the clairvoyants", so we don't want to capitalize plurals.
|
||||
xml = xml.replace(/([Tt])he clairvoyant([^s])/g, "$1he Clairvoyant$2");
|
||||
xml = xml.replace(/([Tt])he clairvoyant(?!s)/ug, "$1he Clairvoyant");
|
||||
|
||||
// ReSound's name is sometimes miscapitalized. The word is never used in a non-name context.
|
||||
xml = xml.replace(/Resound/g, "ReSound");
|
||||
xml = xml.replace(/Resound/ug, "ReSound");
|
||||
|
||||
// The Speedrunners team name is missing its capitalization a couple times.
|
||||
xml = xml.replace(/speedrunners/g, "Speedrunners");
|
||||
xml = xml.replace(/speedrunners/ug, "Speedrunners");
|
||||
|
||||
// The Machine Army is missing its capitalization a couple times.
|
||||
xml = xml.replace(/machine army/ug, "Machine Army");
|
||||
|
||||
// "patrol block" is capitalized three different ways: "patrol block", "Patrol block", and "Patrol Block". "patrol
|
||||
// group" is always lowercased. It seems like "Patrol" is a proper name, and is used as a capitalized modifier in
|
||||
// other contexts (e.g. Patrol leader). So let's standardize on "Patrol <lowercase>".
|
||||
xml = xml.replace(/patrol (block|group|leader|guard|student|uniform|squad|soldier|officer|crew|girl|bus|training)/ig,
|
||||
(_, $1) => `Patrol ${$1.toLowerCase()}`);
|
||||
// This always works in Ward and has a few false positives in Worm, where it is never needed:
|
||||
xml = xml.replace(
|
||||
/patrol (block|group|leader|guard|student|uniform|squad|soldier|officer|crew|girl|bus|training)/uig,
|
||||
(_, $1) => `Patrol ${$1.toLowerCase()}`
|
||||
);
|
||||
// This usually works in Ward (some instances corrected back in substitutions.json), and has a few false positives in
|
||||
// Worm, where it is never needed:
|
||||
if (book === "ward") {
|
||||
xml = xml.replace(/the patrol/g, "the Patrol");
|
||||
xml = xml.replace(/the patrol(?!s|ling)/ug, "the Patrol");
|
||||
}
|
||||
|
||||
// This is sometimes missing its capitalization.
|
||||
xml = xml.replace(/the birdcage/g, "the Birdcage");
|
||||
xml = xml.replace(/the birdcage/ug, "the Birdcage");
|
||||
|
||||
// There's no reason why these should be capitalized. (Note that they never appear at the beginning of any sentences.)
|
||||
xml = xml.replace(/Halberd/g, "halberd");
|
||||
xml = xml.replace(/Loft/g, "loft");
|
||||
// There's no reason why these should be capitalized.
|
||||
xml = xml.replace(/(?<! {2}|“|>)Halberd/ug, "halberd");
|
||||
xml = xml.replace(/(?<! {2}|“|>)Loft/ug, "loft");
|
||||
|
||||
// These are treated as common nouns and not traditionally capitalized. "Krav Maga" remains capitalized,
|
||||
// interestingly (according to dictionaries and Wikipedia).
|
||||
xml = xml.replace(/(?<! {2}|“|>)Judo/ug, "judo");
|
||||
xml = xml.replace(/(?<! {2}|“|>)Aikido/ug, "aikido");
|
||||
xml = xml.replace(/(?<! {2}|“|>)Karate/ug, "karate");
|
||||
xml = xml.replace(/(?<! {2}|“|>)Tae Kwon Do/ug, "tae kwon do");
|
||||
|
||||
// There's no reason why university should be capitalized in most contexts, although sometimes it's used as part of
|
||||
// a compound noun or at the beginning of a sentence.
|
||||
xml = xml.replace(/(?<! {2}|“|Cornell |Nilles )University(?! Road)/, "university");
|
||||
xml = xml.replace(/(?<! {2}|“|>|Cornell |Nilles )University(?! Road)/ug, "university");
|
||||
|
||||
// Especially early in the story, PRT designations are capitalized; they should not be. This fixes the cases where we
|
||||
// Organ names (e.g. brain, arm) or scientific names are not capitalized, so the "corona pollentia" and friends should
|
||||
// not be either. The books are inconsistent.
|
||||
xml = xml.replace(/(?<! {2}|“|>|-)Corona/ug, "corona");
|
||||
xml = xml.replace(/Pollentia/ug, "pollentia");
|
||||
xml = xml.replace(/Radiata/ug, "radiata");
|
||||
xml = xml.replace(/Gemma/ug, "gemma");
|
||||
|
||||
// We de-capitalize Valkyrie's "flock", since most uses are de-capitalized (e.g. the many instances in Gleaming
|
||||
// Interlude 9, or Dying 15.z). This is a bit surprising; it seems like an organization name. But I guess it's
|
||||
// informal.
|
||||
xml = xml.replace(/(?<! {2}|“|>)Flock/ug, "flock");
|
||||
|
||||
// Especially early in Worm, PRT designations are capitalized; they should not be. This fixes the cases where we
|
||||
// can be reasonably sure they don't start a sentence, although more specific instances are done in
|
||||
// substitutions.json, and some need to be back-corrected.
|
||||
//
|
||||
// Note: "Master" is specifically omitted because it fails poorly on Interlude 4. Other instances need to be
|
||||
// Note: "Master" is specifically omitted because it fails poorly on Worm Interlude 4. Other instances need to be
|
||||
// corrected via substitutions.json.
|
||||
//
|
||||
// This also over-de-capitalizes "The Stranger" in Ward (a titan name). Those also get fixed in substitutions.json.
|
||||
xml = xml.replace(
|
||||
/([a-zA-Z,] |\/)(Mover|Shaker|Brute|Breaker|Tinker|Blaster|Thinker|Striker|Changer|Trump|Stranger|Shifter|Shaper)/g,
|
||||
(_, prefix, designation) => prefix + designation.toLowerCase()
|
||||
// eslint-disable-next-line max-len
|
||||
/(?<! {2}|“|>|\n|: )(Mover|Shaker|Brute|Breaker|Tinker|Blaster|Thinker|Striker|Changer|Trump|Stranger|Shifter|Shaper)(?! [A-Z])/ug,
|
||||
(_, designation) => designation.toLowerCase()
|
||||
);
|
||||
xml = xml.replace(
|
||||
/(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)-(\d+)/gi,
|
||||
/(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)-(\d+)/ugi,
|
||||
"$1 $2"
|
||||
);
|
||||
xml = xml.replace(
|
||||
// eslint-disable-next-line max-len
|
||||
/(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)[ -/](mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)/gi,
|
||||
/(mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)[ -/](mover|shaker|brute|breaker|tinker|blaster|thinker|master|striker|changer|trump|stranger|shifter|shaper)/ugi,
|
||||
"$1–$2"
|
||||
);
|
||||
|
||||
// Capitalization is inconsistent, but shard names seems to usually be capitalized.
|
||||
xml = xml.replace(/Grasping self/ug, "Grasping Self");
|
||||
xml = xml.replace(/Cloven stranger/ug, "Cloven Stranger");
|
||||
xml = xml.replace(/Princess shaper/ug, "Princess Shaper");
|
||||
xml = xml.replace(/Fragile one/ug, "Fragile One");
|
||||
|
||||
// Place names need to always be capitalized
|
||||
xml = xml.replace(/North end/g, "North End");
|
||||
xml = xml.replace(/(Stonemast|Shale) avenue/g, "$1 Avenue");
|
||||
xml = xml.replace(/(Lord|Slater) street/g, "$1 Street");
|
||||
xml = xml.replace(/(Hollow|Cedar) point/g, "$1 Point");
|
||||
xml = xml.replace(/(Norwalk|Fenway|Stratford) station/g, "$1 Station");
|
||||
xml = xml.replace(/the megalopolis/g, "the Megalopolis");
|
||||
xml = xml.replace(/earths(?![a-z])/g, "Earths");
|
||||
xml = xml.replace(/North end/ug, "North End");
|
||||
xml = xml.replace(/(Stonemast|Shale) avenue/ug, "$1 Avenue");
|
||||
xml = xml.replace(/(Lord|Slater) street/ug, "$1 Street");
|
||||
xml = xml.replace(/(Hollow|Cedar) point/ug, "$1 Point");
|
||||
xml = xml.replace(/(Norwalk|Fenway|Stratford) station/ug, "$1 Station");
|
||||
xml = xml.replace(/the megalopolis/ug, "the Megalopolis");
|
||||
xml = xml.replace(/earths(?![a-z])/ug, "Earths");
|
||||
if (book === "ward") {
|
||||
xml = xml.replace(/the bunker/ug, "the Bunker");
|
||||
xml = xml.replace(/‘bunker’/ug, "‘Bunker’");
|
||||
}
|
||||
|
||||
// "Mom" and "Dad" should be capitalized when used as a proper name. These regexps are tuned to catch a good amount of
|
||||
// instances, without over-correcting for non-proper-name-like cases. Many other instances are handled in
|
||||
// substitutions.json.
|
||||
xml = xml.replace(/(?<!mom), dad(?![a-z])/g, ", Dad");
|
||||
xml = xml.replace(/, mom(?![a-z-])/g, ", Mom");
|
||||
xml = xml.replace(/(?<!mom), dad(?![a-z])/ug, ", Dad");
|
||||
xml = xml.replace(/, mom(?![a-z-])/ug, ", Mom");
|
||||
|
||||
// Similarly, specific aunts and uncles get capitalized when used as a title. These are often missed.
|
||||
xml = xml.replace(/aunt Sarah/ug, "Aunt Sarah");
|
||||
xml = xml.replace(/aunt Fleur/ug, "Aunt Fleur");
|
||||
xml = xml.replace(/uncle Neil/ug, "Uncle Neil");
|
||||
|
||||
// The majority of "Wardens’ headquarters" is lowercased, and always prefixed with "the", indicating it's not a proper
|
||||
// place name. So we remove the capitalization in the few places where it does appear.
|
||||
xml = xml.replace(/Wardens’ Headquarters/g, "Wardens’ headquarters");
|
||||
xml = xml.replace(/Wardens’ Headquarters/ug, "Wardens’ headquarters");
|
||||
|
||||
// Some style guides try to reserve capitalized "Nazi" for historical discussions of members of the Nazi party. This
|
||||
// seems fuzzy when it comes to phrases like "neo-Nazi", and doesn't seem to be what the author is doing; the books
|
||||
// are just plain inconsistent. So, let's standardize on always uppercasing.
|
||||
xml = xml.replace(/(?<![a-z])nazi/ug, "Nazi");
|
||||
xml = xml.replace(/ Neo-/ug, " neo-");
|
||||
|
||||
// Style guides disagree on whether items like "english muffin", "french toast", and "french kiss" need their
|
||||
// adjective capitalized. The books mostly use lowercase, so let's stick with that. (substitutions.json corrects one
|
||||
// case of "French toast".)
|
||||
xml = xml.replace(/english(?! muffin)/ug, "English");
|
||||
xml = xml.replace(/(?<! {2})English muffin/ug, "english muffin");
|
||||
|
||||
// I was very torn on what to do with capitalization for "Titan" and "Titans". In general you don't capitalize species
|
||||
// names or other classifications, e.g. style guides are quite clear you don't capitalize "gods". The author
|
||||
// capitalizes them more often than not (e.g., 179 raw "Titans" to 49 "titans"), but is quite inconsistent.
|
||||
//
|
||||
// In the end, I decided against de-capitalization, based on the precedent set by "Endbringers" (which are
|
||||
// conceptually paired with Titans several times in the text). However, we only capitalize the class after they are
|
||||
// _introduced_ as a class in Sundown 17.y. (Before then we still capitalize individual names like "Dauntless Titan"
|
||||
// or "Kronos Titan".)
|
||||
if (book === "ward") {
|
||||
// All plural discussions of "Titans" are after Sundown 17.y.
|
||||
xml = xml.replace(/titans/ug, "Titans");
|
||||
|
||||
// Since we can't safely change all instances of "titan", most are in substitutions.json. We can do a few here,
|
||||
// though.
|
||||
xml = xml.replace(/dauntless titan/uig, "Dauntless Titan"); // Sometimes "Dauntless" isn't even capitalized.
|
||||
xml = xml.replace(/Kronos titan/ug, "Kronos Titan");
|
||||
}
|
||||
|
||||
// For the giants, the prevailing usage seems to be to keep the term lowercase, but capitalize when used as a name.
|
||||
xml = xml.replace(/(?<=Mathers |Goddess )giant/ug, "Giant");
|
||||
xml = xml.replace(/mother giant/uig, "Mother Giant");
|
||||
xml = xml.replace(/(?<! {2}|“|>)Giants/ug, "giants");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function fixMispellings(xml) {
|
||||
// This is commonly misspelled.
|
||||
xml = xml.replace(/([Ss])houlderblade/g, "$1houlder blade");
|
||||
xml = xml.replace(/([Ss])houlderblade/ug, "$1houlder blade");
|
||||
|
||||
// Preemptive(ly) is often hyphenated (not always). It should not be.
|
||||
xml = xml.replace(/([Pp])re-emptive/g, "$1reemptive");
|
||||
// All dictionaries agree this is capitalized.
|
||||
xml = xml.replace(/u-turn/ug, "U-turn");
|
||||
|
||||
// https://www.dictionary.com/browse/scot-free
|
||||
xml = xml.replace(/scott(?: |-)free/ug, "scot-free");
|
||||
|
||||
// https://ugrammarist.com/idiom/change-tack/
|
||||
xml = xml.replace(/changed tacks/ug, "changed tack");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
|
@ -509,46 +673,111 @@ function fixMispellings(xml) {
|
|||
function fixHyphens(xml) {
|
||||
// "X-year-old" should use hyphens; all grammar guides agree. The books are very inconsistent but most often omit
|
||||
// them.
|
||||
xml = xml.replace(/(\w+)[ -]year[ -]old(s?)(?!\w)/g, "$1-year-old$2");
|
||||
xml = xml.replace(/(\w+) or (\w+)-year-old/g, "$1- or $2-year-old");
|
||||
xml = xml.replace(/(\w+)[ -]year[ -]old(s?)(?!\w)/ug, "$1-year-old$2");
|
||||
xml = xml.replace(/(\w+) or (\w+)-year-old/ug, "$1- or $2-year-old");
|
||||
|
||||
// Compound numbers from 11 through 99 must be hyphenated, but others should not be.
|
||||
xml = xml.replace(
|
||||
/(?<!\w)(twenty|thirty|fourty|fifty|sixty|seventy|eighty|ninety) (one|two|three|four|five|six|seven|eight|nine)/uig,
|
||||
"$1-$2"
|
||||
);
|
||||
xml = xml.replace(/[- ]hundred-and-/ug, " hundred and ");
|
||||
xml = xml.replace(/(?<!-)(one|two|three|four|five|six|seven|eight|nine|twelve)-hundred/ug, "$1 hundred");
|
||||
xml = xml.replace(/(hundred|ninety)-percent(?!-)/ug, "$1 percent");
|
||||
|
||||
// "red-haired", "long-haired", etc.: they all need hyphens
|
||||
xml = xml.replace(/ haired/ug, "-haired");
|
||||
|
||||
// These are consistently missing hyphens.
|
||||
xml = xml.replace(/self destruct/g, "self-destruct");
|
||||
xml = xml.replace(/life threatening/g, "life-threatening");
|
||||
xml = xml.replace(/hard headed/g, "hard-headed");
|
||||
xml = xml.replace(/shoulder mounted/g, "shoulder-mounted");
|
||||
xml = xml.replace(/golden skinned/g, "golden-skinned");
|
||||
xml = xml.replace(/creepy crawl/g, "creepy-crawl");
|
||||
xml = xml.replace(/well armed/g, "well-armed");
|
||||
xml = xml.replace(/able bodied/g, "able-bodied");
|
||||
xml = xml.replace(/([Ll]ife) threatening/ug, "life-threatening");
|
||||
xml = xml.replace(/([Hh]ard) headed/ug, "$1-headed");
|
||||
xml = xml.replace(/([Ss]houlder) mounted/ug, "$1-mounted");
|
||||
xml = xml.replace(/([Gg]olden) skinned/ug, "$1-skinned");
|
||||
xml = xml.replace(/([Cc]reepy) crawl/ug, "$1-crawl");
|
||||
xml = xml.replace(/([Ww]ell) armed/ug, "$1-armed");
|
||||
xml = xml.replace(/([Aa]ble) bodied/ug, "$1-bodied");
|
||||
xml = xml.replace(/([Ll]evel) headed/ug, "$1-headed");
|
||||
xml = xml.replace(/([Cc]lear) cut/ug, "$1-cut");
|
||||
xml = xml.replace(/([Vv]at) grown/ug, "$1-grown");
|
||||
xml = xml.replace(/([Ss]hell) shocked/ug, "$1-shocked");
|
||||
xml = xml.replace(/([Dd]og) tired/ug, "$1-tired");
|
||||
xml = xml.replace(/([Nn]ightmare) filled/ug, "$1-filled");
|
||||
xml = xml.replace(/([Oo]ne) sided/ug, "$1-sided");
|
||||
xml = xml.replace(/([Mm]edium) sized/ug, "$1-sized");
|
||||
xml = xml.replace(/([Tt]eary) eyed/ug, "$1-eyed");
|
||||
xml = xml.replace(/([Ww]orst) case scenario/ug, "$1-case scenario");
|
||||
xml = xml.replace(/([Ss]elf) (conscious|esteem|loathing|harm|destruct|preservation)/ug, "$1-$2");
|
||||
xml = xml.replace(/([Oo]ne|[Tt]wo|[Tt]hree|[Ff]our|[Ff]ourth) dimensional/ug, "$1-dimensional");
|
||||
xml = xml.replace(/(?<=\b)([Oo]ne) on one(?=\b)/ug, "$1-on-one");
|
||||
|
||||
// Preemptive(ly) is often hyphenated (not always). It should not be.
|
||||
xml = xml.replace(/([Pp])re-emptive/ug, "$1reemptive");
|
||||
|
||||
// These should be hyphenated only when used as a verb. We correct those cases back in substitutions.json.
|
||||
xml = xml.replace(/fist-bump/ug, "fist bump");
|
||||
xml = xml.replace(/high-five/ug, "high five");
|
||||
|
||||
// This should be hyphenated when used as an adjective (instead of an adverb or noun). I.e. it should be
|
||||
// "hand-to-hand combat", but "passed from hand to hand", and "capable in hand to hand". The following heuristic works
|
||||
// in the books.
|
||||
xml = xml.replace(/hand to hand(?= [a-z])/ug, "hand-to-hand");
|
||||
|
||||
// This is usually wrong but sometimes correct. The lookarounds avoid specific cases where it's referring to an actual
|
||||
// second in a series of guesses.
|
||||
xml = xml.replace(/(?<!my |that )([Ss]econd) guess(?!es)/ug, "$1-guess");
|
||||
|
||||
// When used as a phrase "just in case" gets no hyphens. When used as a noun or adjective it does. A couple of the
|
||||
// noun cases are missing one or both hyphens.
|
||||
xml = xml.replace(/([Aa]) just[ -]in case/ug, "$1 just-in-case");
|
||||
|
||||
// When used as an adjective, it's hyphenated. It turns out most cases are as an adverb, so we go with this approach:
|
||||
xml = xml.replace(
|
||||
/face to face(?= meeting| hang-out| interaction| contact| conversation| confrontation| fight)/ug,
|
||||
"face-to-face"
|
||||
);
|
||||
|
||||
// When used as an adjective, it's hyphenated. This heuristic works in the books.
|
||||
xml = xml.replace(/fight or flight(?= [a-z])/ug, "fight-or-flight");
|
||||
|
||||
// This is usually correct but sometimes wrong.
|
||||
xml = xml.replace(/neo /ug, "neo-");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function standardizeSpellings(xml) {
|
||||
// This is usually spelled "TV" but sometimes the other ways. Normalize.
|
||||
xml = xml.replace(/(\b)tv(\b)/g, "$1TV$2");
|
||||
xml = xml.replace(/t\.v\./ig, "TV");
|
||||
xml = xml.replace(/(\b)tv(\b)/ug, "$1TV$2");
|
||||
xml = xml.replace(/t\.v\./uig, "TV");
|
||||
|
||||
// "okay" is preferred to "ok" or "o.k.". This sometimes gets changed back via substitutions.json when people are
|
||||
// writing notes and thus probably the intention was to be less formal. Also it seems per
|
||||
// https://en.wikipedia.org/wiki/A-ok the "A" in "A-okay" should be capitalized.
|
||||
xml = xml.replace(/Ok([,. ])/g, "Okay$1");
|
||||
xml = xml.replace(/([^a-zA-Z])ok([^a])/g, "$1okay$2");
|
||||
xml = xml.replace(/([^a-zA-Z])o\.k\.([^a])/g, "$1okay$2");
|
||||
xml = xml.replace(/a-okay/g, "A-okay");
|
||||
xml = xml.replace(/Ok([,. ])/ug, "Okay$1");
|
||||
xml = xml.replace(/([^a-zA-Z])ok([^a])/ug, "$1okay$2");
|
||||
xml = xml.replace(/([^a-zA-Z])o\.k\.([^a])/ug, "$1okay$2");
|
||||
xml = xml.replace(/a-okay/ug, "A-okay");
|
||||
|
||||
// Signal(l)ing/signal(l)ed are spelled both ways. Both are acceptable in English. Let's standardize on single-L.
|
||||
xml = xml.replace(/(S|s)ignall/g, "$1ignal");
|
||||
xml = xml.replace(/(S|s)ignall/ug, "$1ignal");
|
||||
|
||||
// Clich(e|é) is spelled both ways. Let's standardize on including the accent.
|
||||
xml = xml.replace(/cliche/g, "cliché");
|
||||
xml = xml.replace(/cliche/ug, "cliché");
|
||||
|
||||
// T-shirt is usually spelled lowercase ("t-shirt"). Normalize the remaining instances.
|
||||
xml = xml.replace(/(?<! {2})T-shirt/g, "t-shirt");
|
||||
xml = xml.replace(/(?<! {2})T-shirt/ug, "t-shirt");
|
||||
|
||||
// "gray" is the majority spelling, except for "greyhound"
|
||||
xml = xml.replace(/(G|g)rey(?!hound)/g, "$1ray");
|
||||
xml = xml.replace(/(G|g)rey(?!hound)/ug, "$1ray");
|
||||
|
||||
// 12 instances of "Dragon-craft", 12 instances of "Dragon craft", 1 instance of "dragon craft"
|
||||
xml = xml.replace(/[Dd]ragon[ -](craft|mech)/ug, "Dragon-$1");
|
||||
|
||||
// 88 instances of "A.I." to four of "AI"
|
||||
xml = xml.replace(/(?<=\b)AI(?=\b)/ug, "A.I.");
|
||||
|
||||
// 2 instances of "G.M." to one of "GM"
|
||||
xml = xml.replace(/(?<=\b)GM(?=\b)/ug, "G.M.");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
|
@ -568,12 +797,14 @@ function fixCaseNumbers(xml) {
|
|||
// We standardize on "Case Fifty-Three"; although it isn't the most common, it seems best to treat these as proper
|
||||
// nouns.
|
||||
|
||||
xml = xml.replace(/case[ -](?:fifty[ -]three|53)(?!’)/ig, "Case Fifty-Three");
|
||||
xml = xml.replace(/case[ -](?:thirty[ -]two|53)(?!’)/ig, "Case Thirty-Two");
|
||||
xml = xml.replace(/case[ -](?:sixty[ -]nine|53)(?!’)/ig, "Case Sixty-Nine");
|
||||
xml = xml.replace(/case[ -](?:fifty[ -]three|53)(?!’)/uig, "Case Fifty-Three");
|
||||
xml = xml.replace(/case[ -](?:thirty[ -]two|53)(?!’)/uig, "Case Thirty-Two");
|
||||
xml = xml.replace(/case[ -](?:sixty[ -]nine|53)(?!’)/uig, "Case Sixty-Nine");
|
||||
|
||||
xml = xml.replace(/(?<!in )case[ -](zero|one|two|three|four|twelve|fifteen|seventy|ninety)(?!-)/ig,
|
||||
(_, caseNumber) => "Case " + caseNumber[0].toUpperCase() + caseNumber.substring(1));
|
||||
xml = xml.replace(
|
||||
/(?<!in )case[ -](zero|one|two|three|four|twelve|fifteen|seventy|ninety)(?!-)/uig,
|
||||
(_, caseNumber) => `Case ${caseNumber[0].toUpperCase()}${caseNumber.substring(1)}`
|
||||
);
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
|
@ -587,7 +818,7 @@ function isEmptyOrGarbage(el) {
|
|||
}
|
||||
|
||||
function escapeRegExp(str) {
|
||||
return str.replace(/[-[\]/{}()*+?.\\^$|]/g, "\\$&");
|
||||
return str.replace(/[[\]/{}()*+?.\\^$|]/ug, "\\$&");
|
||||
}
|
||||
|
||||
function decodeCloudFlareEmail(hash) {
|
||||
|
|
|
|||
|
|
@ -34,8 +34,8 @@ module.exports = async (cachePath, manifestPath, contentPath, book, concurrentJo
|
|||
|
||||
warnings.push(...await pool.exec("convertChapter", [chapter, book, inputPath, outputPath]));
|
||||
|
||||
const time = String(Math.round((performance.now() - start) / 1000)).padStart(3) + " s";
|
||||
progress.increment({ time });
|
||||
const seconds = String(Math.round((performance.now() - start) / 1000)).padStart(3);
|
||||
progress.increment({ time: `${seconds} s` });
|
||||
}));
|
||||
|
||||
pool.terminate();
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
"use strict";
|
||||
const path = require("path");
|
||||
const fs = require("fs").promises;
|
||||
const request = require("requisition");
|
||||
const fetch = require("minipass-fetch");
|
||||
const { JSDOM } = require("jsdom");
|
||||
|
||||
const FILENAME_PREFIX = "chapter";
|
||||
|
|
@ -39,34 +39,21 @@ async function downloadAllChapters(manifest, startChapterURL, cachePath, manifes
|
|||
while (currentChapter !== null) {
|
||||
const filename = `${FILENAME_PREFIX}${chapterIndex.toString().padStart(3, "0")}.html`;
|
||||
|
||||
console.log(`Downloading ${currentChapter}`);
|
||||
process.stdout.write(`Downloading ${currentChapter}... `);
|
||||
|
||||
const response = await downloadChapter(currentChapter);
|
||||
const contents = await response.text();
|
||||
console.log("- Response body received");
|
||||
const rawChapterJSDOM = new JSDOM(contents, { url: currentChapter });
|
||||
console.log("- Response body parsed into DOM");
|
||||
const { contents, dom, url } = await downloadChapter(currentChapter);
|
||||
const title = getChapterTitle(dom.window.document);
|
||||
currentChapter = getNextChapterURL(dom.window.document);
|
||||
|
||||
const chapterURLToSave = currentChapter;
|
||||
const chapterTitle = getChapterTitle(rawChapterJSDOM.window.document);
|
||||
currentChapter = getNextChapterURL(rawChapterJSDOM.window.document);
|
||||
|
||||
// TODO: this should probably not be necessary... jsdom bug I guess!?
|
||||
rawChapterJSDOM.window.close();
|
||||
|
||||
manifest.push({
|
||||
url: chapterURLToSave,
|
||||
title: chapterTitle,
|
||||
filename
|
||||
});
|
||||
dom.window.close();
|
||||
|
||||
manifest.push({ url, title, filename });
|
||||
await fs.writeFile(path.resolve(cachePath, filename), contents);
|
||||
console.log("- Response text saved to cache file");
|
||||
|
||||
// Incrementally update the manifest after every successful download, instead of waiting until the end.
|
||||
const newManifestContents = JSON.stringify(manifest, undefined, 2);
|
||||
await fs.writeFile(manifestPath, newManifestContents);
|
||||
console.log("- Manifest updated");
|
||||
process.stdout.write("done\n");
|
||||
|
||||
++chapterIndex;
|
||||
}
|
||||
|
|
@ -78,14 +65,21 @@ function getNextChapterURL(rawChapterDoc) {
|
|||
// - https://parahumans.wordpress.com/2012/04/21/sentinel-9-6/
|
||||
// So instead search for the first <a> within the main content area starting with "Next", trimmed.
|
||||
|
||||
let result = null;
|
||||
const aEls = rawChapterDoc.querySelectorAll(".entry-content a");
|
||||
for (let i = 0; i < aEls.length; ++i) {
|
||||
if (aEls[i].textContent.trim().startsWith("Next")) {
|
||||
return aEls[i].href;
|
||||
result = aEls[i].href;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
// Except, this doesn't always work, because the "Next Chapter" link in
|
||||
// https://www.parahumans.net/2020/04/28/last-20-e6/ is just broken for some reason. We hard-code that.
|
||||
if (result === "https://www.parahumans.net/?p=3365&preview=true") {
|
||||
return "https://www.parahumans.net/2020/05/02/last-20-end/";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function getChapterTitle(rawChapterDoc) {
|
||||
|
|
@ -93,7 +87,7 @@ function getChapterTitle(rawChapterDoc) {
|
|||
// issues down the line where we remove spaces around em dashes during conversion.) In the future it might be nice to
|
||||
// have proper chapter titles, e.g. sections per arc with title pages and then just "1" or similar for the chapter.
|
||||
// Until then this is reasonable and uniform.
|
||||
return rawChapterDoc.querySelector("h1.entry-title").textContent.replace(/ – /, " ");
|
||||
return rawChapterDoc.querySelector("h1.entry-title").textContent.replace(/ – /u, " ");
|
||||
}
|
||||
|
||||
function retry(times, fn) {
|
||||
|
|
@ -106,9 +100,33 @@ function retry(times, fn) {
|
|||
});
|
||||
}
|
||||
|
||||
function downloadChapter(url) {
|
||||
async function downloadChapter(startingURL) {
|
||||
let urlToFollow = startingURL;
|
||||
|
||||
let url, contents, dom;
|
||||
while (urlToFollow !== null) {
|
||||
const response = await downloadWithRetry(urlToFollow);
|
||||
|
||||
url = urlToFollow;
|
||||
contents = await response.text();
|
||||
dom = new JSDOM(contents, { url });
|
||||
|
||||
const refreshMeta = dom.window.document.querySelector("meta[http-equiv=refresh]");
|
||||
if (refreshMeta) {
|
||||
[, urlToFollow] = /\d+;url=(.*)/ui.exec(refreshMeta.content);
|
||||
process.stdout.write(`\n Redirected to ${urlToFollow}... `);
|
||||
dom.window.close();
|
||||
} else {
|
||||
urlToFollow = null;
|
||||
}
|
||||
}
|
||||
|
||||
return { url, contents, dom };
|
||||
}
|
||||
|
||||
function downloadWithRetry(url) {
|
||||
return retry(3, async () => {
|
||||
const response = await request(url).redirects(10);
|
||||
const response = await fetch(url);
|
||||
if (response.status !== 200) {
|
||||
throw new Error(`Response status for ${url} was ${response.status}`);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ const path = require("path");
|
|||
const cpr = require("util").promisify(require("cpr"));
|
||||
|
||||
const BOOK_PUBLISHER = "Domenic Denicola";
|
||||
const BOOK_AUTHOR = "wildbow";
|
||||
const BOOK_AUTHOR = "Wildbow";
|
||||
|
||||
const NCX_FILENAME = "toc.ncx";
|
||||
|
||||
|
|
@ -22,6 +22,8 @@ module.exports = async (scaffoldingPath, coverPath, bookPath, contentPath, chapt
|
|||
]);
|
||||
})
|
||||
]);
|
||||
|
||||
console.log(`EPUB contents assembled into ${scaffoldingPath}`);
|
||||
};
|
||||
|
||||
function noThumbs(filePath) {
|
||||
|
|
@ -54,7 +56,7 @@ function writeOPF(chapters, contentPath, coverFiles, bookInfo) {
|
|||
<manifest>
|
||||
<item id="ncx" href="${NCX_FILENAME}" media-type="application/x-dtbncx+xml"/>
|
||||
<item id="cover" href="${coverFiles.xhtml}" media-type="application/xhtml+xml"/>
|
||||
<item id="cover-image" properties="cover-image" href="${coverFiles.image}" media-type="${coverFiles.imageMimeType}"/>
|
||||
<item id="cover-image" href="${coverFiles.image}" media-type="${coverFiles.imageMimeType}"/>
|
||||
${manifestChapters}
|
||||
</manifest>
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -14,9 +14,10 @@ const zip = require("./zip.js");
|
|||
|
||||
const OUTPUT_DEFAULT = "(Book name).epub";
|
||||
|
||||
const argv = yargs
|
||||
const { argv } = yargs
|
||||
.usage(`${packageJson.description}\n\n${packageJson.name} [<command1> [<command2> [<command3> ...]]]\n\n` +
|
||||
"Each command will fail if the previously-listed one has not yet been run (with matching options).")
|
||||
"Each command will fail if the previously-listed one has not yet been run (with matching options).\n\n" +
|
||||
"Running with no commands is equivalent to running download convert scaffold zip.")
|
||||
.command("download", "download all chapters into the cache")
|
||||
.command("convert", "convert the raw HTML into cleaned-up ebook chapters")
|
||||
.command("scaffold", "assemble the table of contents, etc.")
|
||||
|
|
@ -58,11 +59,9 @@ const argv = yargs
|
|||
requiresArg: true,
|
||||
global: true
|
||||
})
|
||||
.demandCommand(1) // TODO remove and allow all
|
||||
.recommendCommands()
|
||||
.help()
|
||||
.version()
|
||||
.argv;
|
||||
.version();
|
||||
|
||||
const outputFilename = argv.out === OUTPUT_DEFAULT ? `${books[argv.book].title}.epub` : argv.out;
|
||||
|
||||
|
|
@ -77,14 +76,18 @@ const chaptersPath = path.resolve(contentPath, "chapters");
|
|||
|
||||
const commands = [];
|
||||
|
||||
if (argv._.length === 0) {
|
||||
argv._ = ["download", "convert", "scaffold", "zip"];
|
||||
}
|
||||
|
||||
if (argv._.includes("download")) {
|
||||
const startURL = books[argv.book].startURL;
|
||||
const { startURL } = books[argv.book];
|
||||
commands.push(() => download(startURL, cachePath, manifestPath));
|
||||
}
|
||||
|
||||
if (argv._.includes("convert")) {
|
||||
commands.push(() => {
|
||||
return fs.rmdir(chaptersPath, { recursive: true, maxRetries: 3 })
|
||||
return fs.rm(chaptersPath, { force: true, recursive: true, maxRetries: 3 })
|
||||
.then(() => fs.mkdir(chaptersPath, { recursive: true }))
|
||||
.then(() => convert(cachePath, manifestPath, chaptersPath, argv.book, argv.jobs));
|
||||
});
|
||||
|
|
@ -93,7 +96,13 @@ if (argv._.includes("convert")) {
|
|||
if (argv._.includes("scaffold")) {
|
||||
const bookInfo = books[argv.book];
|
||||
commands.push(() => scaffold(
|
||||
scaffoldingPath, coverPath, stagingPath, contentPath, chaptersPath, manifestPath, bookInfo
|
||||
scaffoldingPath,
|
||||
coverPath,
|
||||
stagingPath,
|
||||
contentPath,
|
||||
chaptersPath,
|
||||
manifestPath,
|
||||
bookInfo
|
||||
));
|
||||
}
|
||||
|
||||
|
|
@ -106,8 +115,6 @@ if (argv._.includes("zip")) {
|
|||
for (const command of commands) {
|
||||
await command();
|
||||
}
|
||||
|
||||
console.log("All done!");
|
||||
} catch (e) {
|
||||
console.error(e.stack);
|
||||
process.exit(1);
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ module.exports = (bookPath, contentPath, outPath) => {
|
|||
archive.pipe(destStream);
|
||||
|
||||
// Order matters; mimetype must be first for a valid EPUB
|
||||
archive.file(path.resolve(bookPath, "mimetype"), { name: "mimetype" });
|
||||
archive.file(path.resolve(bookPath, "mimetype"), { name: "mimetype", store: true });
|
||||
archive.directory(contentPath, "OEBPS", { name: "OEBPS" });
|
||||
archive.directory(path.resolve(bookPath, "META-INF"), "META-INF", { name: "META-INF" });
|
||||
|
||||
|
|
|
|||
3812
npm-shrinkwrap.json
generated
3812
npm-shrinkwrap.json
generated
File diff suppressed because it is too large
Load diff
19
package.json
19
package.json
|
|
@ -8,7 +8,7 @@
|
|||
"parahuman",
|
||||
"scraper"
|
||||
],
|
||||
"version": "4.6.1",
|
||||
"version": "5.1.0",
|
||||
"author": "Domenic Denicola <d@domenic.me> (https://domenic.me/)",
|
||||
"license": "WTFPL",
|
||||
"repository": "domenic/worm-scraper",
|
||||
|
|
@ -23,18 +23,19 @@
|
|||
"lint": "eslint lib"
|
||||
},
|
||||
"dependencies": {
|
||||
"archiver": "^5.0.2",
|
||||
"cli-progress": "^3.8.2",
|
||||
"archiver": "^5.3.1",
|
||||
"cli-progress": "^3.11.1",
|
||||
"cpr": "^3.0.1",
|
||||
"jsdom": "^16.4.0",
|
||||
"requisition": "^1.5.0",
|
||||
"workerpool": "^6.0.2",
|
||||
"yargs": "^16.1.0"
|
||||
"jsdom": "^19.0.0",
|
||||
"minipass-fetch": "^2.1.0",
|
||||
"workerpool": "^6.2.1",
|
||||
"yargs": "^17.5.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"eslint": "^7.11.0"
|
||||
"@domenic/eslint-config": "^2.0.0",
|
||||
"eslint": "^8.16.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.10.0"
|
||||
"node": ">=16.13.2"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue