Follow redirects during downloads

Fixes #25.
This commit is contained in:
Domenic Denicola 2021-01-17 16:02:59 -05:00
commit bfdb9eadde
3 changed files with 33 additions and 19 deletions

View file

@ -204,7 +204,7 @@
"object-curly-newline": ["error", { "multiline": true }],
"object-curly-spacing": ["error", "always"],
"object-property-newline": "off",
"one-var": ["error", "never"],
"one-var": ["error", { "initialized": "never" }],
"one-var-declaration-per-line": ["error", "initializations"],
"operator-assignment": ["error", "always"],
"operator-linebreak": ["error", "after"],

View file

@ -41,23 +41,13 @@ async function downloadAllChapters(manifest, startChapterURL, cachePath, manifes
process.stdout.write(`Downloading ${currentChapter}... `);
const response = await downloadChapter(currentChapter);
const contents = await response.text();
const rawChapterJSDOM = new JSDOM(contents, { url: currentChapter });
const { contents, dom, url } = await downloadChapter(currentChapter);
const title = getChapterTitle(dom.window.document);
currentChapter = getNextChapterURL(dom.window.document);
const chapterURLToSave = currentChapter;
const chapterTitle = getChapterTitle(rawChapterJSDOM.window.document);
currentChapter = getNextChapterURL(rawChapterJSDOM.window.document);
// TODO: this should probably not be necessary... jsdom bug I guess!?
rawChapterJSDOM.window.close();
manifest.push({
url: chapterURLToSave,
title: chapterTitle,
filename
});
dom.window.close();
manifest.push({ url, title, filename });
await fs.writeFile(path.resolve(cachePath, filename), contents);
// Incrementally update the manifest after every successful download, instead of waiting until the end.
@ -103,7 +93,31 @@ function retry(times, fn) {
});
}
function downloadChapter(url) {
async function downloadChapter(startingURL) {
let urlToFollow = startingURL;
let url, contents, dom;
while (urlToFollow !== null) {
const response = await downloadWithRetry(urlToFollow);
url = urlToFollow;
contents = await response.text();
dom = new JSDOM(contents, { url });
const refreshMeta = dom.window.document.querySelector("meta[http-equiv=refresh]");
if (refreshMeta) {
[, urlToFollow] = /\d+;url=(.*)/i.exec(refreshMeta.content);
process.stdout.write(`\n Redirected to ${urlToFollow}... `);
dom.window.close();
} else {
urlToFollow = null;
}
}
return { url, contents, dom };
}
function downloadWithRetry(url) {
return retry(3, async () => {
const response = await request(url).redirects(10);
if (response.status !== 200) {

View file

@ -7544,7 +7544,7 @@
"after": "—dont shoot it!"
}
],
"https://www.parahumans.net/2019/09/15/from-within-16-10/": [
"https://www.parahumans.net/2019/09/14/from-within-16-10/": [
{
"before": "carried on. out of",
"after": "carried on, out of"
@ -8927,7 +8927,7 @@
"after": "the entities"
}
],
"https://www.parahumans.net/2020/03/25/last-20-9/": [
"https://www.parahumans.net/2020/03/24/last-20-9/": [
{
"before": "you could help miss—help Antares",
"after": "you could help Miss—help Antares"