parent
48400e6f96
commit
bfdb9eadde
3 changed files with 33 additions and 19 deletions
|
|
@ -204,7 +204,7 @@
|
|||
"object-curly-newline": ["error", { "multiline": true }],
|
||||
"object-curly-spacing": ["error", "always"],
|
||||
"object-property-newline": "off",
|
||||
"one-var": ["error", "never"],
|
||||
"one-var": ["error", { "initialized": "never" }],
|
||||
"one-var-declaration-per-line": ["error", "initializations"],
|
||||
"operator-assignment": ["error", "always"],
|
||||
"operator-linebreak": ["error", "after"],
|
||||
|
|
|
|||
|
|
@ -41,23 +41,13 @@ async function downloadAllChapters(manifest, startChapterURL, cachePath, manifes
|
|||
|
||||
process.stdout.write(`Downloading ${currentChapter}... `);
|
||||
|
||||
const response = await downloadChapter(currentChapter);
|
||||
const contents = await response.text();
|
||||
const rawChapterJSDOM = new JSDOM(contents, { url: currentChapter });
|
||||
const { contents, dom, url } = await downloadChapter(currentChapter);
|
||||
const title = getChapterTitle(dom.window.document);
|
||||
currentChapter = getNextChapterURL(dom.window.document);
|
||||
|
||||
const chapterURLToSave = currentChapter;
|
||||
const chapterTitle = getChapterTitle(rawChapterJSDOM.window.document);
|
||||
currentChapter = getNextChapterURL(rawChapterJSDOM.window.document);
|
||||
|
||||
// TODO: this should probably not be necessary... jsdom bug I guess!?
|
||||
rawChapterJSDOM.window.close();
|
||||
|
||||
manifest.push({
|
||||
url: chapterURLToSave,
|
||||
title: chapterTitle,
|
||||
filename
|
||||
});
|
||||
dom.window.close();
|
||||
|
||||
manifest.push({ url, title, filename });
|
||||
await fs.writeFile(path.resolve(cachePath, filename), contents);
|
||||
|
||||
// Incrementally update the manifest after every successful download, instead of waiting until the end.
|
||||
|
|
@ -103,7 +93,31 @@ function retry(times, fn) {
|
|||
});
|
||||
}
|
||||
|
||||
function downloadChapter(url) {
|
||||
async function downloadChapter(startingURL) {
|
||||
let urlToFollow = startingURL;
|
||||
|
||||
let url, contents, dom;
|
||||
while (urlToFollow !== null) {
|
||||
const response = await downloadWithRetry(urlToFollow);
|
||||
|
||||
url = urlToFollow;
|
||||
contents = await response.text();
|
||||
dom = new JSDOM(contents, { url });
|
||||
|
||||
const refreshMeta = dom.window.document.querySelector("meta[http-equiv=refresh]");
|
||||
if (refreshMeta) {
|
||||
[, urlToFollow] = /\d+;url=(.*)/i.exec(refreshMeta.content);
|
||||
process.stdout.write(`\n Redirected to ${urlToFollow}... `);
|
||||
dom.window.close();
|
||||
} else {
|
||||
urlToFollow = null;
|
||||
}
|
||||
}
|
||||
|
||||
return { url, contents, dom };
|
||||
}
|
||||
|
||||
function downloadWithRetry(url) {
|
||||
return retry(3, async () => {
|
||||
const response = await request(url).redirects(10);
|
||||
if (response.status !== 200) {
|
||||
|
|
|
|||
|
|
@ -7544,7 +7544,7 @@
|
|||
"after": "—don’t shoot it!"
|
||||
}
|
||||
],
|
||||
"https://www.parahumans.net/2019/09/15/from-within-16-10/": [
|
||||
"https://www.parahumans.net/2019/09/14/from-within-16-10/": [
|
||||
{
|
||||
"before": "carried on. out of",
|
||||
"after": "carried on, out of"
|
||||
|
|
@ -8927,7 +8927,7 @@
|
|||
"after": "the entities"
|
||||
}
|
||||
],
|
||||
"https://www.parahumans.net/2020/03/25/last-20-9/": [
|
||||
"https://www.parahumans.net/2020/03/24/last-20-9/": [
|
||||
{
|
||||
"before": "you could help miss—help Antares",
|
||||
"after": "you could help Miss—help Antares"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue