diff --git a/.eslintrc.json b/.eslintrc.json index 886d1ff..b7348b0 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -1,259 +1,10 @@ { "root": true, + "extends": "@domenic", "env": { - "node": true, - "es6": true - }, - "parserOptions": { - "ecmaVersion": 2019 + "node": true }, "rules": { - // Possible errors - "no-await-in-loop": "off", - "comma-dangle": ["error", "never"], - "no-cond-assign": ["error", "except-parens"], - "no-console": "off", - "no-constant-condition": "error", - "no-control-regex": "error", - "no-debugger": "error", - "no-dupe-args": "error", - "no-dupe-keys": "error", - "no-duplicate-case": "error", - "no-empty": "error", - "no-empty-character-class": "error", - "no-ex-assign": "error", - "no-extra-boolean-cast": "error", - "no-extra-parens": ["error", "all", { "conditionalAssign": false, "nestedBinaryExpressions": false }], - "no-extra-semi": "error", - "no-func-assign": "error", - "no-inner-declarations": "off", - "no-invalid-regexp": "error", - "no-irregular-whitespace": "error", - "no-obj-calls": "error", - "no-prototype-builtins": "error", - "no-regex-spaces": "error", - "no-sparse-arrays": "error", - "no-template-curly-in-string": "error", - "no-unexpected-multiline": "error", - "no-unreachable": "error", - "no-unsafe-finally": "off", - "no-unsafe-negation": "error", - "use-isnan": "error", - "valid-jsdoc": "off", - "valid-typeof": "error", - - // Best practices - "accessor-pairs": "error", - "array-callback-return": "error", - "block-scoped-var": "off", - "class-methods-use-this": "error", - "complexity": "off", - "consistent-return": "error", - "curly": ["error", "all"], - "default-case": "off", - "dot-location": ["error", "property"], - "dot-notation": "error", - "eqeqeq": "error", - "guard-for-in": "off", - "no-alert": "error", - "no-caller": "error", - "no-case-declarations": "error", - "no-div-regex": "off", - "no-else-return": "error", - "no-empty-function": "error", - "no-empty-pattern": "error", - "no-eq-null": "error", - "no-eval": "error", - "no-extend-native": "error", - "no-extra-bind": "error", - "no-extra-label": "error", - "no-fallthrough": "error", - "no-floating-decimal": "error", - "no-global-assign": "error", - "no-implicit-coercion": "error", - "no-implicit-globals": "error", - "no-implied-eval": "off", - "no-invalid-this": "error", - "no-iterator": "error", - "no-labels": ["error", { "allowLoop": true }], - "no-lone-blocks": "error", - "no-loop-func": "off", - "no-magic-numbers": "off", - "no-multi-spaces": "error", - "no-multi-str": "error", - "no-new": "error", - "no-new-func": "error", - "no-new-wrappers": "error", - "no-octal": "error", - "no-octal-escape": "error", - "no-param-reassign": "off", - "no-process-env": "error", - "no-proto": "error", - "no-redeclare": "error", - "no-restricted-properties": "off", - "no-return-assign": ["error", "except-parens"], - "no-return-await": "error", - "no-script-url": "off", - "no-self-assign": "error", - "no-self-compare": "error", - "no-sequences": "error", - "no-throw-literal": "error", - "no-unmodified-loop-condition": "error", - "no-unused-expressions": "error", - "no-unused-labels": "error", - "no-useless-call": "error", - "no-useless-concat": "error", - "no-useless-escape": "error", - "no-useless-return": "error", - "no-void": "error", - "no-warning-comments": "off", - "no-with": "error", - "radix": ["error", "as-needed"], - "require-await": "error", - "vars-on-top": "off", - "wrap-iife": ["error", "outside"], - "yoda": ["error", "never"], - - // Strict Mode - "strict": ["error", "global"], - - // Variables - "init-declarations": "off", - "no-catch-shadow": "error", - "no-delete-var": "error", - "no-label-var": "error", - "no-restricted-globals": "off", - "no-shadow": "error", - "no-shadow-restricted-names": "error", - "no-undef": "error", - "no-undef-init": "error", - "no-undefined": "off", - "no-unused-vars": "error", - "no-use-before-define": ["error", "nofunc"], - - // Node.js and CommonJS - "callback-return": "off", - "global-require": "error", - "handle-callback-err": "error", - "no-mixed-requires": ["error", true], - "no-new-require": "error", - "no-path-concat": "error", - "no-process-exit": "error", - "no-restricted-imports": "off", - "no-restricted-modules": "off", - "no-sync": "off", - - // Stylistic Issues - "array-bracket-spacing": ["error", "never"], - "block-spacing": ["error", "always"], - "brace-style": ["error", "1tbs", { "allowSingleLine": false }], - "camelcase": ["error", { "properties": "always" }], - "capitalized-comments": ["error", "always", { "ignoreConsecutiveComments": true }], - "comma-spacing": ["error", { "before": false, "after": true }], - "comma-style": ["error", "last"], - "computed-property-spacing": ["error", "never"], - "consistent-this": "off", - "eol-last": "error", - "func-call-spacing": ["error", "never"], - "func-name-matching": ["error", "always"], - "func-names": ["error", "never"], - "func-style": ["error", "declaration"], - "id-blacklist": "off", - "id-length": "off", - "id-match": "off", - "indent": ["error", 2, { "SwitchCase": 1 }], - "jsx-quotes": "off", - "key-spacing": ["error", { "beforeColon": false, "afterColon": true, "mode": "strict" }], - "keyword-spacing": ["error", { "before": true, "after": true }], - "line-comment-position": "off", - "linebreak-style": ["error", "unix"], - "lines-around-comment": "off", - "lines-around-directive": "off", - "max-depth": "off", - "max-len": ["error", 120, { "ignoreUrls": true }], - "max-lines": "off", - "max-nested-callbacks": "off", - "max-params": "off", - "max-statements": "off", - "max-statements-per-line": ["error", { "max": 1 }], - "multiline-ternary": "off", - "new-cap": "error", - "new-parens": "error", - "newline-after-var": "off", - "newline-before-return": "off", - "newline-per-chained-call": "off", - "no-array-constructor": "error", - "no-bitwise": "off", - "no-continue": "off", - "no-inline-comments": "off", - "no-lonely-if": "error", - "no-mixed-operators": "error", - "no-mixed-spaces-and-tabs": "error", - "no-multiple-empty-lines": "error", - "no-negated-condition": "off", - "no-nested-ternary": "error", - "no-new-object": "error", - "no-plusplus": "off", - "no-restricted-syntax": "off", - "no-tabs": "error", - "no-ternary": "off", - "no-trailing-spaces": "error", - "no-underscore-dangle": "off", - "no-unneeded-ternary": "error", - "no-whitespace-before-property": "error", - "object-curly-newline": ["error", { "multiline": true }], - "object-curly-spacing": ["error", "always"], - "object-property-newline": "off", - "one-var": ["error", "never"], - "one-var-declaration-per-line": ["error", "initializations"], - "operator-assignment": ["error", "always"], - "operator-linebreak": ["error", "after"], - "padded-blocks": ["error", "never"], - "quote-props": ["error", "as-needed"], - "quotes": ["error", "double", { "avoidEscape": true, "allowTemplateLiterals": true }], - "require-jsdoc": "off", - "semi": ["error", "always"], - "semi-spacing": "error", - "sort-keys": "off", - "sort-vars": "off", - "space-before-blocks": ["error", "always"], - "space-before-function-paren": ["error", { "anonymous": "always", "named": "never" }], - "space-in-parens": ["error", "never"], - "space-infix-ops": "error", - "space-unary-ops": ["error", { "words": true, "nonwords": false }], - "spaced-comment": ["error", "always", { "markers": ["///"] }], - "unicode-bom": ["error", "never"], - "wrap-regex": "off", - - // ECMAScript 6 - "arrow-body-style": "off", // meh - "arrow-parens": ["error", "as-needed"], - "arrow-spacing": "error", - "constructor-super": "error", - "generator-star-spacing": ["error", "after"], - "no-class-assign": "error", - "no-confusing-arrow": "off", - "no-const-assign": "error", - "no-dupe-class-members": "error", - "no-duplicate-imports": "error", - "no-new-symbol": "error", - "no-this-before-super": "error", - "no-useless-computed-key": "error", - "no-useless-constructor": "error", - "no-useless-rename": "error", - "no-var": "error", - "object-shorthand": "error", - "prefer-arrow-callback": "error", - "prefer-const": "error", - "prefer-numeric-literals": "error", - "prefer-rest-params": "error", - "prefer-spread": "error", - "prefer-template": "off", - "require-yield": "error", - "rest-spread-spacing": ["error", "never"], - "sort-imports": "off", - "symbol-description": "error", - "template-curly-spacing": ["error", "never"], - "yield-star-spacing": ["error", "after"] + "no-console": "off" } } diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..a801689 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,50 @@ +name: Test +on: + pull_request: + branches: + - master + push: + branches: + - master +jobs: + test: + name: Test + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-node@v2 + with: + node-version: 16 + + - run: npm install + + - run: npm run lint + + # CI would take too long if we did the download every time too. So, we cache it. This does mean we're vulnerable to + # source changes exposing problems in our code, but those are pretty infrequent. If they occur, we need to bump the + # cache key. + - uses: actions/cache@v2 + with: + key: worm-ward-cache-2021-01-17 + path: ./cache + + - run: node ./lib/worm-scraper.js --book=worm + + - run: node ./lib/worm-scraper.js --book=ward + + - uses: actions/setup-java@v1 + with: + java-version: 15 + java-package: jre + + - name: Get EPUBCheck + run: | + curl https://github.com/w3c/epubcheck/releases/download/v4.2.4/epubcheck-4.2.4.zip --location --output epubcheck.zip + unzip epubcheck.zip + + - name: Check Worm.epub + run: java -jar epubcheck-4.2.4/epubcheck.jar --failonwarnings Worm.epub + + - name: Check Ward.epub + run: java -jar epubcheck-4.2.4/epubcheck.jar --failonwarnings Ward.epub diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index de4a137..0000000 --- a/.travis.yml +++ /dev/null @@ -1,6 +0,0 @@ -language: node_js -node_js: - - 10 - - stable -script: - npm run lint diff --git a/README.md b/README.md index ce27d97..56aff60 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Scrapes the web serial [_Worm_](https://parahumans.wordpress.com/) and its seque ## How to use -First you'll need a modern version of [Node.js](https://nodejs.org/en/). Install whatever is current (not LTS); at least v12.10.0 is necessary. +First you'll need a modern version of [Node.js](https://nodejs.org/en/). At least v16.13.2 is necessary. Then, open a terminal ([Mac documentation](http://blog.teamtreehouse.com/introduction-to-the-mac-os-x-command-line), [Windows documentation](http://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/)) and install the program by typing diff --git a/covers/ward/cover.xhtml b/covers/ward/cover.xhtml index 8b44d63..b1affee 100644 --- a/covers/ward/cover.xhtml +++ b/covers/ward/cover.xhtml @@ -3,7 +3,7 @@
+
+
+
+ ......
\n
([^>]+)<\/em>(!|\?|\.)<\/p>/g, " $1$2
]+)> /g, " ");
- xml = xml.replace(/<\/em> <\/p>/g, " ([^>]+)<\/em>(!|\?|\.)<\/p>/ug, " $1$2 ]+)> /ug, " ");
+ xml = xml.replace(/<\/em> <\/p>/ug, " ”/g, " “");
- xml = xml.replace(/“\s*<\/p>/g, "”
“\s+/g, "
“"); - xml = xml.replace(/\s+”/g, "”"); - xml = xml.replace(/'/g, "’"); - xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’"); - xml = xml.replace(/([a-z])”<\/p>/g, "$1.”
"); + xml = xml.replace(/”/ug, "
“"); + xml = xml.replace(/“\s*<\/p>/ug, "”
"); + xml = xml.replace(/“\s*<\/em><\/p>/ug, "”"); + xml = xml.replace(/‘\s*<\/p>/ug, "’"); + xml = xml.replace(/‘\s*<\/em><\/p>/ug, "’"); + xml = xml.replace(/,” <\/em>/ug, ",” "); + xml = xml.replace(/′/ug, "’"); + xml = xml.replace(/″/ug, "”"); + xml = xml.replace(/([A-Za-z])‘s(\s?)/ug, "$1’s$2"); + xml = xml.replace(/I‘m/ug, "I’m"); + xml = xml.replace(/“\s+/ug, "
“"); + xml = xml.replace(/\s+”/ug, "”"); + xml = xml.replace(/'/ug, "’"); + xml = xml.replace(/’([A-Za-z]+)’/ug, "‘$1’"); + xml = xml.replace(/([a-z])”<\/p>/ug, "$1.”
"); fixEms(); - xml = xml.replace(/‘([^<]+)<\/em>‘/g, "‘$1’"); - xml = xml.replace(/([a-z]+)!<\/em>/g, "$1!"); - xml = xml.replace(/(?([\w ’]+)([!.?])”<\/em>/g, "$1$2”"); - xml = xml.replace(/([\w ’]+[!.?])”<\/em>/g, "$1”"); - xml = xml.replace(/I”(m|ll)/g, "I’$1"); - xml = xml.replace(/””<\/p>/g, "”"); - xml = xml.replace(/^([^“]+?) ?”(?![ —<])/gm, "$1 “"); - xml = xml.replace(/(?([A-Za-z]+),<\/em>(?!”| +[A-Za-z]+ thought)/, "$1,"); - xml = xml.replace(/‘([Kk])ay(?!’)/g, "’$1ay"); - xml = xml.replace(/(Why|What|Who|How|Where|When)<\/em>\?/g, "$1?"); - xml = xml.replace(/,<\/em>/g, ","); - xml = xml.replace(/,”<\/p>/g, ".”"); - xml = xml.replace(/(.*),<\/p>/g, "
$1.
"); - xml = xml.replace(/‘(\w+)‘(\w+)’/g, "‘$1’$2’"); - xml = xml.replace(/([a-z]+), ([a-z]+)<\/em>/g, "$1, $2"); + xml = xml.replace(/‘([^<]+)<\/em>‘/ug, "‘$1’"); + xml = xml.replace(/([a-z]+)!<\/em>/ug, "$1!"); + xml = xml.replace(/(?([\w ’]+)([!.?])”<\/em>/ug, "$1$2”"); + xml = xml.replace(/([\w ’]+[!.?])”<\/em>/ug, "$1”"); + xml = xml.replace(/I”(m|ll)/ug, "I’$1"); + xml = xml.replace(/””<\/p>/ug, "”"); + xml = xml.replace(/^([^“]+?) ?”(?![ —<])/ugm, "$1 “"); + xml = xml.replace(/(?([A-Za-z]+),<\/em>(?!”| +[A-Za-z]+ thought)/u, "$1,"); + xml = xml.replace(/‘([Kk])ay(?!’)/ug, "’$1ay"); + xml = xml.replace(/(Why|What|Who|How|Where|When)<\/em>\?/ug, "$1?"); + xml = xml.replace(/,<\/em>/ug, ","); + xml = xml.replace(/,”<\/p>/ug, ".”"); + xml = xml.replace(/(.*),<\/p>/ug, "
$1.
"); + xml = xml.replace(/‘(\w+)‘(\w+)’/ug, "‘$1’$2’"); + xml = xml.replace(/([a-z]+), ([a-z]+)<\/em>/ug, "$1, $2"); // Similar problems occur in Ward with and as do in Worm with s - xml = xml.replace(//g, ""); - xml = xml.replace(/(\s*
\s*/ug, "
"); + + // This is another quote fix but it needs to happen after the line break deletion... so entangled, ugh. + xml = xml.replace(/<\/em>\s*“\s*<\/p>/ug, "
”"); // Fix missing spaces after commas - xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/g, "$1, $2"); + xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/ug, "$1, $2"); // Fix bad periods and spacing/markup surrounding them - xml = xml.replace(/\.\.<\/p>/g, "."); - xml = xml.replace(/\.\.”<\/p>/g, ".”"); - xml = xml.replace(/ \. /g, ". "); - xml = xml.replace(/ \.<\/p>/g, "."); - xml = xml.replace(/\.\.\./g, "…"); - xml = xml.replace(/\.\. {2}/g, ". "); - xml = xml.replace(/\.\./g, "…"); - xml = xml.replace(/(?/ug, "."); + xml = xml.replace(/\.\.”<\/p>/ug, ".”"); + xml = xml.replace(/ \. /ug, ". "); + xml = xml.replace(/ \.<\/p>/ug, "."); + xml = xml.replace(/\.\.\./ug, "…"); + xml = xml.replace(/\.\. {2}/ug, ". "); + xml = xml.replace(/\.\./ug, "…"); + xml = xml.replace(/(?/g, ""); - xml = xml.replace(/([a-z]) ,/g, "$1,"); + xml = xml.replace(/ ? <\/p>/ug, ""); + xml = xml.replace(/([a-z]) ,/ug, "$1,"); + + // Use actual emojis instead of images + xml = xml.replace( + // eslint-disable-next-line max-len + /-/g, "
—"); - xml = xml.replace(/-<\/p>/g, "—
"); - xml = xml.replace(/--/ug, "
—"); + xml = xml.replace(/-<\/p>/ug, "—
"); + xml = xml.replace(/-]*)>■<\/p>/g, `
■
`); + xml = xml.replace(/]*)>■<\/p>/ug, `
■
`); - xml = xml.replace(/⊙<\/strong><\/p>/g, ` ⊙ ⊙<\/strong><\/em><\/p>/g,
- ` ⊙ ⊙⊙<\/strong><\/p>/g,
- ` ⊙ ⊙<\/strong><\/p>/ug,
+ ` ⊙ ⊙<\/strong><\/em><\/p>/ug,
+ ` ⊙ ⊙⊙<\/strong><\/p>/ug,
+ ` ⊙ ⊙ *⊙ *⊙ *⊙ *⊙<\/strong><\/p>/g,
- ` ⊙ ⊙ ⊙ ⊙ ⊙ ⊙ *⊙ *⊙ *⊙ *⊙<\/strong><\/p>/ug,
+ ` ⊙ ⊙ ⊙ ⊙ ⊙ $1 ",
+ "_comment": "These are clearly paragraphs. Also the leading space is strange."
},
{
- "regExp": "\n ([^<]+)( $1 ([^<\n]+) $1 $2 Swearing: antisocial.",
+ "_comment": "This one has the surrounding the Crazed, kooky, cracked, crazy, Crazed, kooky, cracked, crazy, Crazed, kooky, cracked, crazy, Crazed, kooky, cracked, crazy, She "
},
{
@@ -2132,6 +2140,10 @@
}
],
"https://parahumans.wordpress.com/2012/07/26/interlude-12%C2%BD/": [
+ {
+ "before": "implode, it has created a powerful vacuum in someone’s mouth, that",
+ "after": "implode, and it has created a powerful vacuum in someone’s mouth that"
+ },
{
"before": "doesn’t,” Madcap said. “But",
"after": "doesn’t,” Madcap said, “but"
@@ -2177,6 +2189,11 @@
{
"before": "Grue,” Trickster said. “Get",
"after": "Grue,” Trickster said, “get"
+ },
+ {
+ "before": "Nothing she can’t do outside the bubble",
+ "after": "Nothing she can do outside the bubble",
+ "_comment": "This doesn't make sense logically with 'can’t'."
}
],
"https://parahumans.wordpress.com/2012/08/18/snare-13-7/": [
@@ -2308,6 +2325,11 @@
}
],
"https://parahumans.wordpress.com/2012/10/20/colony-15-2/": [
+ {
+ "before": "turned something",
+ "after": "taken something",
+ "_comment": "'turned' is repeated later in the sentence."
+ },
{
"before": "on,” Tattletale said. “Let’s",
"after": "on,” Tattletale said, “let’s"
@@ -2318,6 +2340,11 @@
}
],
"https://parahumans.wordpress.com/2012/10/23/colony-15-3/": [
+ {
+ "before": "whether or not I agreed or not",
+ "after": "whether I agreed or not",
+ "_comment": "Alternatively, the second 'or not' could be deleted instead of the first."
+ },
{
"before": "Woah,” Regent said. “Relax",
"after": "Woah,” Regent said, “relax"
@@ -2329,6 +2356,11 @@
{
"before": "the street
|
\n\\s*",
+ "replacement": "
|
\n([^>\n]+)
\n",
- "replacement": "\n
\nSwearing: antisocial.",
+ "after": "Word choice, ‘too’: haunted by demons.
, unlike the others."
}
],
"https://parahumans.wordpress.com/2012/03/10/extermination-8-3/": [
@@ -1711,12 +1721,6 @@
"after": "Dinah being kidnapped, and leaving"
}
],
- "https://parahumans.wordpress.com/2012/05/22/infestation-11-2/": [
- {
- "before": "attentio n",
- "after": "attention"
- }
- ],
"https://parahumans.wordpress.com/2012/05/26/infestation-11-3/": [
{
"before": "intimidating: A sea",
@@ -1812,6 +1816,10 @@
"before": "Charlotte,” I frowned. “Look",
"after": "Charlotte,” I frowned, “look"
},
+ {
+ "before": "non-sequitor",
+ "after": "non-sequitur"
+ },
{
"before": "Did they… was he",
"after": "Did they… Was he"
@@ -1819,12 +1827,12 @@
{
"before": "see the Doctor",
"after": "see the doctor",
- "comment": "Unlike the Cauldron Doctor, this is not used as a proper noun"
+ "_comment": "Unlike the Cauldron Doctor, this is not used as a proper noun"
},
{
"before": "the Doctor spoke",
"after": "the doctor spoke",
- "comment": "Unlike the Cauldron Doctor, this is not used as a proper noun"
+ "_comment": "Unlike the Cauldron Doctor, this is not used as a proper noun"
},
{
"before": "asinine confession again, “It’s",
@@ -1941,7 +1949,7 @@
"after": "
\nMental, dotty, whacked, loopy…
\nNutty, screwy, mentally diseased…
\n She ",
+ "before": "
\nNutty, screwy, mentally diseased…
\n
\nShe ",
"after": "
\nNutty, screwy, mentally diseased…
⊙
\n\n", + "before": "⊙
\n\n", "after": "", "_comment": "Our usual heuristics of removing the first paragraph to remove the previous/next chapter links are broken here because of the 'go back and look at the fake out' comment at the top" }, @@ -7488,7 +7637,7 @@ "after": "—don’t shoot it!" } ], - "https://www.parahumans.net/2019/09/15/from-within-16-10/": [ + "https://www.parahumans.net/2019/09/14/from-within-16-10/": [ { "before": "carried on. out of", "after": "carried on, out of" @@ -7787,7 +7936,7 @@ }, { "before": "you proved my sister right? Tired,", - "after": "you proved my sister right? Tired," + "after": "you proved my sister right? Tired," }, { "before": "boundary,” He whispered", @@ -8871,7 +9020,7 @@ "after": "the entities" } ], - "https://www.parahumans.net/2020/03/25/last-20-9/": [ + "https://www.parahumans.net/2020/03/24/last-20-9/": [ { "before": "you could help miss—help Antares", "after": "you could help Miss—help Antares" @@ -9072,7 +9221,7 @@ "after": "cross the ocean" } ], - "https://www.parahumans.net/?p=3365&preview=true": [ + "https://www.parahumans.net/2020/05/02/last-20-end/": [ { "before": "saying dad had custody", "after": "saying Dad had custody" diff --git a/lib/worm-scraper.js b/lib/worm-scraper.js index 4a63eef..5f34617 100644 --- a/lib/worm-scraper.js +++ b/lib/worm-scraper.js @@ -14,7 +14,7 @@ const zip = require("./zip.js"); const OUTPUT_DEFAULT = "(Book name).epub"; -const argv = yargs +const { argv } = yargs .usage(`${packageJson.description}\n\n${packageJson.name} [