diff --git a/.eslintrc.json b/.eslintrc.json index 886d1ff..373b63a 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -204,7 +204,7 @@ "object-curly-newline": ["error", { "multiline": true }], "object-curly-spacing": ["error", "always"], "object-property-newline": "off", - "one-var": ["error", "never"], + "one-var": ["error", { "initialized": "never" }], "one-var-declaration-per-line": ["error", "initializations"], "operator-assignment": ["error", "always"], "operator-linebreak": ["error", "after"], diff --git a/lib/download.js b/lib/download.js index edc7591..8317c23 100644 --- a/lib/download.js +++ b/lib/download.js @@ -41,23 +41,13 @@ async function downloadAllChapters(manifest, startChapterURL, cachePath, manifes process.stdout.write(`Downloading ${currentChapter}... `); - const response = await downloadChapter(currentChapter); - const contents = await response.text(); - const rawChapterJSDOM = new JSDOM(contents, { url: currentChapter }); + const { contents, dom, url } = await downloadChapter(currentChapter); + const title = getChapterTitle(dom.window.document); + currentChapter = getNextChapterURL(dom.window.document); - const chapterURLToSave = currentChapter; - const chapterTitle = getChapterTitle(rawChapterJSDOM.window.document); - currentChapter = getNextChapterURL(rawChapterJSDOM.window.document); - - // TODO: this should probably not be necessary... jsdom bug I guess!? - rawChapterJSDOM.window.close(); - - manifest.push({ - url: chapterURLToSave, - title: chapterTitle, - filename - }); + dom.window.close(); + manifest.push({ url, title, filename }); await fs.writeFile(path.resolve(cachePath, filename), contents); // Incrementally update the manifest after every successful download, instead of waiting until the end. @@ -103,7 +93,31 @@ function retry(times, fn) { }); } -function downloadChapter(url) { +async function downloadChapter(startingURL) { + let urlToFollow = startingURL; + + let url, contents, dom; + while (urlToFollow !== null) { + const response = await downloadWithRetry(urlToFollow); + + url = urlToFollow; + contents = await response.text(); + dom = new JSDOM(contents, { url }); + + const refreshMeta = dom.window.document.querySelector("meta[http-equiv=refresh]"); + if (refreshMeta) { + [, urlToFollow] = /\d+;url=(.*)/i.exec(refreshMeta.content); + process.stdout.write(`\n Redirected to ${urlToFollow}... `); + dom.window.close(); + } else { + urlToFollow = null; + } + } + + return { url, contents, dom }; +} + +function downloadWithRetry(url) { return retry(3, async () => { const response = await request(url).redirects(10); if (response.status !== 200) { diff --git a/lib/substitutions.json b/lib/substitutions.json index 30f027e..6cc8d57 100644 --- a/lib/substitutions.json +++ b/lib/substitutions.json @@ -7544,7 +7544,7 @@ "after": "—don’t shoot it!" } ], - "https://www.parahumans.net/2019/09/15/from-within-16-10/": [ + "https://www.parahumans.net/2019/09/14/from-within-16-10/": [ { "before": "carried on. out of", "after": "carried on, out of" @@ -8927,7 +8927,7 @@ "after": "the entities" } ], - "https://www.parahumans.net/2020/03/25/last-20-9/": [ + "https://www.parahumans.net/2020/03/24/last-20-9/": [ { "before": "you could help miss—help Antares", "after": "you could help Miss—help Antares"