diff --git a/.eslintrc.json b/.eslintrc.json index b7348b0..886d1ff 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -1,10 +1,259 @@ { "root": true, - "extends": "@domenic", "env": { - "node": true + "node": true, + "es6": true + }, + "parserOptions": { + "ecmaVersion": 2019 }, "rules": { - "no-console": "off" + // Possible errors + "no-await-in-loop": "off", + "comma-dangle": ["error", "never"], + "no-cond-assign": ["error", "except-parens"], + "no-console": "off", + "no-constant-condition": "error", + "no-control-regex": "error", + "no-debugger": "error", + "no-dupe-args": "error", + "no-dupe-keys": "error", + "no-duplicate-case": "error", + "no-empty": "error", + "no-empty-character-class": "error", + "no-ex-assign": "error", + "no-extra-boolean-cast": "error", + "no-extra-parens": ["error", "all", { "conditionalAssign": false, "nestedBinaryExpressions": false }], + "no-extra-semi": "error", + "no-func-assign": "error", + "no-inner-declarations": "off", + "no-invalid-regexp": "error", + "no-irregular-whitespace": "error", + "no-obj-calls": "error", + "no-prototype-builtins": "error", + "no-regex-spaces": "error", + "no-sparse-arrays": "error", + "no-template-curly-in-string": "error", + "no-unexpected-multiline": "error", + "no-unreachable": "error", + "no-unsafe-finally": "off", + "no-unsafe-negation": "error", + "use-isnan": "error", + "valid-jsdoc": "off", + "valid-typeof": "error", + + // Best practices + "accessor-pairs": "error", + "array-callback-return": "error", + "block-scoped-var": "off", + "class-methods-use-this": "error", + "complexity": "off", + "consistent-return": "error", + "curly": ["error", "all"], + "default-case": "off", + "dot-location": ["error", "property"], + "dot-notation": "error", + "eqeqeq": "error", + "guard-for-in": "off", + "no-alert": "error", + "no-caller": "error", + "no-case-declarations": "error", + "no-div-regex": "off", + "no-else-return": "error", + "no-empty-function": "error", + "no-empty-pattern": "error", + "no-eq-null": "error", + "no-eval": "error", + "no-extend-native": "error", + "no-extra-bind": "error", + "no-extra-label": "error", + "no-fallthrough": "error", + "no-floating-decimal": "error", + "no-global-assign": "error", + "no-implicit-coercion": "error", + "no-implicit-globals": "error", + "no-implied-eval": "off", + "no-invalid-this": "error", + "no-iterator": "error", + "no-labels": ["error", { "allowLoop": true }], + "no-lone-blocks": "error", + "no-loop-func": "off", + "no-magic-numbers": "off", + "no-multi-spaces": "error", + "no-multi-str": "error", + "no-new": "error", + "no-new-func": "error", + "no-new-wrappers": "error", + "no-octal": "error", + "no-octal-escape": "error", + "no-param-reassign": "off", + "no-process-env": "error", + "no-proto": "error", + "no-redeclare": "error", + "no-restricted-properties": "off", + "no-return-assign": ["error", "except-parens"], + "no-return-await": "error", + "no-script-url": "off", + "no-self-assign": "error", + "no-self-compare": "error", + "no-sequences": "error", + "no-throw-literal": "error", + "no-unmodified-loop-condition": "error", + "no-unused-expressions": "error", + "no-unused-labels": "error", + "no-useless-call": "error", + "no-useless-concat": "error", + "no-useless-escape": "error", + "no-useless-return": "error", + "no-void": "error", + "no-warning-comments": "off", + "no-with": "error", + "radix": ["error", "as-needed"], + "require-await": "error", + "vars-on-top": "off", + "wrap-iife": ["error", "outside"], + "yoda": ["error", "never"], + + // Strict Mode + "strict": ["error", "global"], + + // Variables + "init-declarations": "off", + "no-catch-shadow": "error", + "no-delete-var": "error", + "no-label-var": "error", + "no-restricted-globals": "off", + "no-shadow": "error", + "no-shadow-restricted-names": "error", + "no-undef": "error", + "no-undef-init": "error", + "no-undefined": "off", + "no-unused-vars": "error", + "no-use-before-define": ["error", "nofunc"], + + // Node.js and CommonJS + "callback-return": "off", + "global-require": "error", + "handle-callback-err": "error", + "no-mixed-requires": ["error", true], + "no-new-require": "error", + "no-path-concat": "error", + "no-process-exit": "error", + "no-restricted-imports": "off", + "no-restricted-modules": "off", + "no-sync": "off", + + // Stylistic Issues + "array-bracket-spacing": ["error", "never"], + "block-spacing": ["error", "always"], + "brace-style": ["error", "1tbs", { "allowSingleLine": false }], + "camelcase": ["error", { "properties": "always" }], + "capitalized-comments": ["error", "always", { "ignoreConsecutiveComments": true }], + "comma-spacing": ["error", { "before": false, "after": true }], + "comma-style": ["error", "last"], + "computed-property-spacing": ["error", "never"], + "consistent-this": "off", + "eol-last": "error", + "func-call-spacing": ["error", "never"], + "func-name-matching": ["error", "always"], + "func-names": ["error", "never"], + "func-style": ["error", "declaration"], + "id-blacklist": "off", + "id-length": "off", + "id-match": "off", + "indent": ["error", 2, { "SwitchCase": 1 }], + "jsx-quotes": "off", + "key-spacing": ["error", { "beforeColon": false, "afterColon": true, "mode": "strict" }], + "keyword-spacing": ["error", { "before": true, "after": true }], + "line-comment-position": "off", + "linebreak-style": ["error", "unix"], + "lines-around-comment": "off", + "lines-around-directive": "off", + "max-depth": "off", + "max-len": ["error", 120, { "ignoreUrls": true }], + "max-lines": "off", + "max-nested-callbacks": "off", + "max-params": "off", + "max-statements": "off", + "max-statements-per-line": ["error", { "max": 1 }], + "multiline-ternary": "off", + "new-cap": "error", + "new-parens": "error", + "newline-after-var": "off", + "newline-before-return": "off", + "newline-per-chained-call": "off", + "no-array-constructor": "error", + "no-bitwise": "off", + "no-continue": "off", + "no-inline-comments": "off", + "no-lonely-if": "error", + "no-mixed-operators": "error", + "no-mixed-spaces-and-tabs": "error", + "no-multiple-empty-lines": "error", + "no-negated-condition": "off", + "no-nested-ternary": "error", + "no-new-object": "error", + "no-plusplus": "off", + "no-restricted-syntax": "off", + "no-tabs": "error", + "no-ternary": "off", + "no-trailing-spaces": "error", + "no-underscore-dangle": "off", + "no-unneeded-ternary": "error", + "no-whitespace-before-property": "error", + "object-curly-newline": ["error", { "multiline": true }], + "object-curly-spacing": ["error", "always"], + "object-property-newline": "off", + "one-var": ["error", "never"], + "one-var-declaration-per-line": ["error", "initializations"], + "operator-assignment": ["error", "always"], + "operator-linebreak": ["error", "after"], + "padded-blocks": ["error", "never"], + "quote-props": ["error", "as-needed"], + "quotes": ["error", "double", { "avoidEscape": true, "allowTemplateLiterals": true }], + "require-jsdoc": "off", + "semi": ["error", "always"], + "semi-spacing": "error", + "sort-keys": "off", + "sort-vars": "off", + "space-before-blocks": ["error", "always"], + "space-before-function-paren": ["error", { "anonymous": "always", "named": "never" }], + "space-in-parens": ["error", "never"], + "space-infix-ops": "error", + "space-unary-ops": ["error", { "words": true, "nonwords": false }], + "spaced-comment": ["error", "always", { "markers": ["///"] }], + "unicode-bom": ["error", "never"], + "wrap-regex": "off", + + // ECMAScript 6 + "arrow-body-style": "off", // meh + "arrow-parens": ["error", "as-needed"], + "arrow-spacing": "error", + "constructor-super": "error", + "generator-star-spacing": ["error", "after"], + "no-class-assign": "error", + "no-confusing-arrow": "off", + "no-const-assign": "error", + "no-dupe-class-members": "error", + "no-duplicate-imports": "error", + "no-new-symbol": "error", + "no-this-before-super": "error", + "no-useless-computed-key": "error", + "no-useless-constructor": "error", + "no-useless-rename": "error", + "no-var": "error", + "object-shorthand": "error", + "prefer-arrow-callback": "error", + "prefer-const": "error", + "prefer-numeric-literals": "error", + "prefer-rest-params": "error", + "prefer-spread": "error", + "prefer-template": "off", + "require-yield": "error", + "rest-spread-spacing": ["error", "never"], + "sort-imports": "off", + "symbol-description": "error", + "template-curly-spacing": ["error", "never"], + "yield-star-spacing": ["error", "after"] } } diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index a801689..0000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Test -on: - pull_request: - branches: - - master - push: - branches: - - master -jobs: - test: - name: Test - runs-on: ubuntu-20.04 - steps: - - uses: actions/checkout@v2 - - - uses: actions/setup-node@v2 - with: - node-version: 16 - - - run: npm install - - - run: npm run lint - - # CI would take too long if we did the download every time too. So, we cache it. This does mean we're vulnerable to - # source changes exposing problems in our code, but those are pretty infrequent. If they occur, we need to bump the - # cache key. - - uses: actions/cache@v2 - with: - key: worm-ward-cache-2021-01-17 - path: ./cache - - - run: node ./lib/worm-scraper.js --book=worm - - - run: node ./lib/worm-scraper.js --book=ward - - - uses: actions/setup-java@v1 - with: - java-version: 15 - java-package: jre - - - name: Get EPUBCheck - run: | - curl https://github.com/w3c/epubcheck/releases/download/v4.2.4/epubcheck-4.2.4.zip --location --output epubcheck.zip - unzip epubcheck.zip - - - name: Check Worm.epub - run: java -jar epubcheck-4.2.4/epubcheck.jar --failonwarnings Worm.epub - - - name: Check Ward.epub - run: java -jar epubcheck-4.2.4/epubcheck.jar --failonwarnings Ward.epub diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..de4a137 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,6 @@ +language: node_js +node_js: + - 10 + - stable +script: + npm run lint diff --git a/README.md b/README.md index 56aff60..ce27d97 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Scrapes the web serial [_Worm_](https://parahumans.wordpress.com/) and its seque ## How to use -First you'll need a modern version of [Node.js](https://nodejs.org/en/). At least v16.13.2 is necessary. +First you'll need a modern version of [Node.js](https://nodejs.org/en/). Install whatever is current (not LTS); at least v12.10.0 is necessary. Then, open a terminal ([Mac documentation](http://blog.teamtreehouse.com/introduction-to-the-mac-os-x-command-line), [Windows documentation](http://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/)) and install the program by typing diff --git a/covers/ward/cover.xhtml b/covers/ward/cover.xhtml index b1affee..8b44d63 100644 --- a/covers/ward/cover.xhtml +++ b/covers/ward/cover.xhtml @@ -3,7 +3,7 @@
-
diff --git a/covers/worm/cover.xhtml b/covers/worm/cover.xhtml
index b1affee..8b44d63 100644
--- a/covers/worm/cover.xhtml
+++ b/covers/worm/cover.xhtml
@@ -3,7 +3,7 @@
-
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index f9f2b5f..5b1f4a0 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -77,21 +77,13 @@ function getBodyXML(chapter, book, contentEl) {
}
}
- // Remove empty inline elements.
- // Remove style attributes from inline elements, as they're always messed up.
- for (const el of contentEl.querySelectorAll("em, i, strong, b")) {
- const { textContent } = el;
-
- if (textContent === "") {
- el.remove();
- } else if (textContent.trim() === "") {
- if (el.childElementCount === 0) {
- el.replaceWith(" ");
- } else if (el.childElementCount === 1 && el.children[0].localName === "br") {
- el.outerHTML = "......
\n
([^>]+)<\/em>(!|\?|\.)<\/p>/ug, " $1$2
]+)> /ug, " ");
- xml = xml.replace(/<\/em> <\/p>/ug, " ([^>]+)<\/em>(!|\?|\.)<\/p>/g, " $1$2 ]+)> /g, " ");
+ xml = xml.replace(/<\/em> <\/p>/g, " ”/ug, " “");
- xml = xml.replace(/“\s*<\/p>/ug, "”
“\s+/ug, "
“"); - xml = xml.replace(/\s+”/ug, "”"); - xml = xml.replace(/'/ug, "’"); - xml = xml.replace(/’([A-Za-z]+)’/ug, "‘$1’"); - xml = xml.replace(/([a-z])”<\/p>/ug, "$1.”
"); + xml = xml.replace(/”/g, "
“"); + xml = xml.replace(/“\s*<\/p>/g, "”
"); + xml = xml.replace(/“\s*<\/em><\/p>/g, "”"); + xml = xml.replace(/‘\s*<\/p>/g, "’"); + xml = xml.replace(/‘\s*<\/em><\/p>/g, "’"); + xml = xml.replace(/,” <\/em>/g, ",” "); + xml = xml.replace(/′/g, "’"); + xml = xml.replace(/″/g, "”"); + xml = xml.replace(/([A-Za-z])‘s(\s?)/g, "$1’s$2"); + xml = xml.replace(/I‘m/g, "I’m"); + xml = xml.replace(/“\s+/g, "
“"); + xml = xml.replace(/\s+”/g, "”"); + xml = xml.replace(/'/g, "’"); + xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’"); + xml = xml.replace(/([a-z])”<\/p>/g, "$1.”
"); fixEms(); - xml = xml.replace(/‘([^<]+)<\/em>‘/ug, "‘$1’"); - xml = xml.replace(/([a-z]+)!<\/em>/ug, "$1!"); - xml = xml.replace(/(?([\w ’]+)([!.?])”<\/em>/ug, "$1$2”"); - xml = xml.replace(/([\w ’]+[!.?])”<\/em>/ug, "$1”"); - xml = xml.replace(/I”(m|ll)/ug, "I’$1"); - xml = xml.replace(/””<\/p>/ug, "”"); - xml = xml.replace(/^([^“]+?) ?”(?![ —<])/ugm, "$1 “"); - xml = xml.replace(/(?([A-Za-z]+),<\/em>(?!”| +[A-Za-z]+ thought)/u, "$1,"); - xml = xml.replace(/‘([Kk])ay(?!’)/ug, "’$1ay"); - xml = xml.replace(/(Why|What|Who|How|Where|When)<\/em>\?/ug, "$1?"); - xml = xml.replace(/,<\/em>/ug, ","); - xml = xml.replace(/,”<\/p>/ug, ".”"); - xml = xml.replace(/(.*),<\/p>/ug, "
$1.
"); - xml = xml.replace(/‘(\w+)‘(\w+)’/ug, "‘$1’$2’"); - xml = xml.replace(/([a-z]+), ([a-z]+)<\/em>/ug, "$1, $2"); + xml = xml.replace(/‘([^<]+)<\/em>‘/g, "‘$1’"); + xml = xml.replace(/([a-z]+)!<\/em>/g, "$1!"); + xml = xml.replace(/(?([\w ’]+)([!.?])”<\/em>/g, "$1$2”"); + xml = xml.replace(/([\w ’]+[!.?])”<\/em>/g, "$1”"); + xml = xml.replace(/I”(m|ll)/g, "I’$1"); + xml = xml.replace(/””<\/p>/g, "”"); + xml = xml.replace(/^([^“]+?) ?”(?![ —<])/gm, "$1 “"); + xml = xml.replace(/(?([A-Za-z]+),<\/em>(?!”| +[A-Za-z]+ thought)/, "$1,"); + xml = xml.replace(/‘([Kk])ay(?!’)/g, "’$1ay"); + xml = xml.replace(/(Why|What|Who|How|Where|When)<\/em>\?/g, "$1?"); + xml = xml.replace(/,<\/em>/g, ","); + xml = xml.replace(/,”<\/p>/g, ".”"); + xml = xml.replace(/(.*),<\/p>/g, "
$1.
"); + xml = xml.replace(/‘(\w+)‘(\w+)’/g, "‘$1’$2’"); + xml = xml.replace(/([a-z]+), ([a-z]+)<\/em>/g, "$1, $2"); // Similar problems occur in Ward with and as do in Worm with s - xml = xml.replace(//ug, ""); - xml = xml.replace(/(\s*
\s*/ug, "
"); - - // This is another quote fix but it needs to happen after the line break deletion... so entangled, ugh. - xml = xml.replace(/<\/em>\s*“\s*<\/p>/ug, "
”"); + xml = xml.replace(/-/ug, "
—"); - xml = xml.replace(/-<\/p>/ug, "—
"); - xml = xml.replace(/--/g, "
—"); + xml = xml.replace(/-<\/p>/g, "—
"); + xml = xml.replace(/-]*)>■<\/p>/ug, `
■
`); + xml = xml.replace(/]*)>■<\/p>/g, `
■
`); - xml = xml.replace( - /⊙<\/strong><\/p>/ug,
- ` ⊙ ⊙<\/strong><\/em><\/p>/ug,
- ` ⊙ ⊙⊙<\/strong><\/p>/ug,
- ` ⊙ ⊙<\/strong><\/p>/g, ` ⊙ ⊙<\/strong><\/em><\/p>/g,
+ ` ⊙ ⊙⊙<\/strong><\/p>/g,
+ ` ⊙ ⊙ *⊙ *⊙ *⊙ *⊙<\/strong><\/p>/ug,
- ` ⊙ ⊙ ⊙ ⊙ ⊙ ⊙ *⊙ *⊙ *⊙ *⊙<\/strong><\/p>/g,
+ ` ⊙ ⊙ ⊙ ⊙ ⊙ ",
- "_comment": "These are clearly paragraphs. Also the leading space is strange."
+ "regExp": "\n ?\\s*([^<]+)( $1 Swearing: antisocial.",
- "_comment": "This one has the surrounding the ([^<]+)( $1 ([^<\n]+) $1 $2 Crazed, kooky, cracked, crazy, Crazed, kooky, cracked, crazy, Crazed, kooky, cracked, crazy, Crazed, kooky, cracked, crazy, She "
},
{
@@ -2140,10 +2132,6 @@
}
],
"https://parahumans.wordpress.com/2012/07/26/interlude-12%C2%BD/": [
- {
- "before": "implode, it has created a powerful vacuum in someone’s mouth, that",
- "after": "implode, and it has created a powerful vacuum in someone’s mouth that"
- },
{
"before": "doesn’t,” Madcap said. “But",
"after": "doesn’t,” Madcap said, “but"
@@ -2189,11 +2177,6 @@
{
"before": "Grue,” Trickster said. “Get",
"after": "Grue,” Trickster said, “get"
- },
- {
- "before": "Nothing she can’t do outside the bubble",
- "after": "Nothing she can do outside the bubble",
- "_comment": "This doesn't make sense logically with 'can’t'."
}
],
"https://parahumans.wordpress.com/2012/08/18/snare-13-7/": [
@@ -2325,11 +2308,6 @@
}
],
"https://parahumans.wordpress.com/2012/10/20/colony-15-2/": [
- {
- "before": "turned something",
- "after": "taken something",
- "_comment": "'turned' is repeated later in the sentence."
- },
{
"before": "on,” Tattletale said. “Let’s",
"after": "on,” Tattletale said, “let’s"
@@ -2340,11 +2318,6 @@
}
],
"https://parahumans.wordpress.com/2012/10/23/colony-15-3/": [
- {
- "before": "whether or not I agreed or not",
- "after": "whether I agreed or not",
- "_comment": "Alternatively, the second 'or not' could be deleted instead of the first."
- },
{
"before": "Woah,” Regent said. “Relax",
"after": "Woah,” Regent said, “relax"
@@ -2356,11 +2329,6 @@
{
"before": "the street
\n\\s*",
- "replacement": "
|
\nSwearing: antisocial.",
- "after": "Word choice, ‘too’: haunted by demons.
, unlike the others."
+ "regExp": "\n
|
\n([^>\n]+)
\n",
+ "replacement": "\n
\nMental, dotty, whacked, loopy…
\nNutty, screwy, mentally diseased…
\n
\nShe ",
+ "before": "
\nNutty, screwy, mentally diseased…
\n She ",
"after": "
\nNutty, screwy, mentally diseased…
⊙
\n\n", + "before": "⊙
\n\n", "after": "", "_comment": "Our usual heuristics of removing the first paragraph to remove the previous/next chapter links are broken here because of the 'go back and look at the fake out' comment at the top" }, @@ -7637,7 +7488,7 @@ "after": "—don’t shoot it!" } ], - "https://www.parahumans.net/2019/09/14/from-within-16-10/": [ + "https://www.parahumans.net/2019/09/15/from-within-16-10/": [ { "before": "carried on. out of", "after": "carried on, out of" @@ -7936,7 +7787,7 @@ }, { "before": "you proved my sister right? Tired,", - "after": "you proved my sister right? Tired," + "after": "you proved my sister right? Tired," }, { "before": "boundary,” He whispered", @@ -9020,7 +8871,7 @@ "after": "the entities" } ], - "https://www.parahumans.net/2020/03/24/last-20-9/": [ + "https://www.parahumans.net/2020/03/25/last-20-9/": [ { "before": "you could help miss—help Antares", "after": "you could help Miss—help Antares" @@ -9221,7 +9072,7 @@ "after": "cross the ocean" } ], - "https://www.parahumans.net/2020/05/02/last-20-end/": [ + "https://www.parahumans.net/?p=3365&preview=true": [ { "before": "saying dad had custody", "after": "saying Dad had custody" diff --git a/lib/worm-scraper.js b/lib/worm-scraper.js index 5f34617..4a63eef 100644 --- a/lib/worm-scraper.js +++ b/lib/worm-scraper.js @@ -14,7 +14,7 @@ const zip = require("./zip.js"); const OUTPUT_DEFAULT = "(Book name).epub"; -const { argv } = yargs +const argv = yargs .usage(`${packageJson.description}\n\n${packageJson.name} [