Update dependencies and code style

This commit is contained in:
Domenic Denicola 2017-08-13 16:38:42 -04:00
commit 0435c45b2e
7 changed files with 108 additions and 140 deletions

View file

@ -4,7 +4,7 @@ Scrapes the web serial [_Worm_](https://parahumans.wordpress.com/) into an eBook
## How to use
First you'll need a modern version of [Node.js](https://nodejs.org/en/). Install whatever is current (not LTS).
First you'll need a modern version of [Node.js](https://nodejs.org/en/). Install whatever is current (not LTS); at least v8.x is necessary.
Then, open a terminal ([Mac documentation](http://blog.teamtreehouse.com/introduction-to-the-mac-os-x-command-line), [Windows documentation](http://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/)) and install the program by typing

View file

@ -3,40 +3,39 @@ const path = require("path");
const fs = require("mz/fs");
const throat = require("throat");
const serializeToXML = require("xmlserializer").serializeToString;
const jsdom = require("./jsdom.js");
const { JSDOM } = require("jsdom");
const substitutions = require("./substitutions.json");
module.exports = (cachePath, manifestPath, contentPath) => {
return fs.readFile(manifestPath, { encoding: "utf-8" }).then(manifestContents => {
const chapters = JSON.parse(manifestContents);
console.log("All chapters downloaded; beginning conversion to EPUB chapters");
module.exports = async (cachePath, manifestPath, contentPath) => {
const manifestContents = await fs.readFile(manifestPath, { encoding: "utf-8" });
const chapters = JSON.parse(manifestContents);
console.log("All chapters downloaded; beginning conversion to EPUB chapters");
const mapper = throat(10, chapter => {
return convertChapter(chapter, cachePath, contentPath);
});
return Promise.all(chapters.map(mapper));
})
.then(() => console.log("All chapters converted"));
const mapper = throat(10, chapter => convertChapter(chapter, cachePath, contentPath));
await Promise.all(chapters.map(mapper));
console.log("All chapters converted");
};
function convertChapter(chapter, cachePath, contentPath) {
async function convertChapter(chapter, cachePath, contentPath) {
const filename = chapter.filename;
const filePath = path.resolve(cachePath, filename);
console.log(`- Reading ${filename}`);
return fs.readFile(filePath, { encoding: "utf-8" }).then(contents => {
console.log(`- Read ${filename}`);
const rawChapterDoc = jsdom(contents);
const output = getChapterString(chapter, rawChapterDoc);
const contents = await fs.readFile(filePath, { encoding: "utf-8" });
console.log(`- Read ${filename}`);
// TODO: this should probably not be necessary... jsdom bug I guess!?
rawChapterDoc.defaultView.close();
const rawChapterJSDOM = new JSDOM(contents);
const output = getChapterString(chapter, rawChapterJSDOM.window.document);
const destFileName = `${path.basename(filename, ".html")}.xhtml`;
const destFilePath = path.resolve(contentPath, destFileName);
return fs.writeFile(destFilePath, output);
})
.then(() => console.log(`- Finished converting ${filename}`));
// TODO: this should probably not be necessary... jsdom bug I guess!?
rawChapterJSDOM.window.close();
const destFileName = `${path.basename(filename, ".html")}.xhtml`;
const destFilePath = path.resolve(contentPath, destFileName);
await fs.writeFile(destFilePath, output);
console.log(`- Finished converting ${filename}`);
}
function getChapterString(chapter, rawChapterDoc) {

View file

@ -4,28 +4,27 @@ const fs = require("mz/fs");
const mkdirp = require("mkdirp-then");
const request = require("requisition");
const zfill = require("zfill");
const jsdom = require("./jsdom.js");
const { JSDOM } = require("jsdom");
const FILENAME_PREFIX = "chapter";
module.exports = (startChapterUrl, cachePath, manifestPath) => {
return fs.readFile(manifestPath, { encoding: "utf-8" }).then(
manifestContents => {
const manifest = JSON.parse(manifestContents);
return downloadAllChapters(manifest, startChapterUrl, cachePath, manifestPath);
},
e => {
if (e.code === "ENOENT") {
return downloadAllChapters(null, startChapterUrl, cachePath, manifestPath);
}
throw e;
module.exports = async (startChapterURL, cachePath, manifestPath) => {
let manifestContents;
try {
manifestContents = await fs.readFile(manifestPath, { encoding: "utf-8" });
} catch (e) {
if (e.code === "ENOENT") {
return downloadAllChapters(null, startChapterURL, cachePath, manifestPath);
}
);
throw e;
}
const manifest = JSON.parse(manifestContents);
return downloadAllChapters(manifest, startChapterURL, cachePath, manifestPath);
};
function downloadAllChapters(manifest, startChapterUrl, cachePath, manifestPath) {
let currentChapter = startChapterUrl;
async function downloadAllChapters(manifest, startChapterURL, cachePath, manifestPath) {
let currentChapter = startChapterURL;
let chapterIndex = 0;
if (manifest !== null) {
currentChapter = manifest[manifest.length - 1].url;
@ -37,62 +36,45 @@ function downloadAllChapters(manifest, startChapterUrl, cachePath, manifestPath)
manifest = [];
}
return mkdirp(cachePath).then(loop);
await mkdirp(cachePath);
function loop() {
while (currentChapter !== null) {
const filename = `${FILENAME_PREFIX}${zfill(chapterIndex, 3)}.html`;
console.log(`Downloading ${currentChapter}`);
return downloadChapter(currentChapter).then(response => {
console.log("- Response received");
return response.text();
})
.then(contents => {
console.log("- Response body received");
const rawChapterDoc = jsdom(contents, { url: currentChapter });
console.log("- Response body parsed into DOM");
const response = await downloadChapter(currentChapter);
const contents = await response.text();
console.log("- Response body received");
const rawChapterJSDOM = new JSDOM(contents, { url: currentChapter });
console.log("- Response body parsed into DOM");
const chapterUrlToSave = currentChapter;
const chapterTitle = getChapterTitle(rawChapterDoc);
currentChapter = getNextChapterUrl(rawChapterDoc);
const chapterURLToSave = currentChapter;
const chapterTitle = getChapterTitle(rawChapterJSDOM.window.document);
currentChapter = getNextChapterURL(rawChapterJSDOM.window.document);
// TODO: this should probably not be necessary... jsdom bug I guess!?
rawChapterDoc.defaultView.close();
// TODO: this should probably not be necessary... jsdom bug I guess!?
rawChapterJSDOM.window.close();
manifest.push({
url: chapterUrlToSave,
title: chapterTitle,
filename
});
fs.writeFile(path.resolve(cachePath, filename), contents);
})
.then(() => {
console.log("- Response text saved to cache file");
// Incrementally update the manifest after every successful download, instead of waiting until the end.
return writeManifest();
})
.then(() => {
console.log("- Manifest updated");
if (currentChapter === null) {
return undefined;
}
++chapterIndex;
return loop();
manifest.push({
url: chapterURLToSave,
title: chapterTitle,
filename
});
}
function writeManifest() {
const contents = JSON.stringify(manifest, undefined, 2);
return fs.writeFile(manifestPath, contents);
await fs.writeFile(path.resolve(cachePath, filename), contents);
console.log("- Response text saved to cache file");
// Incrementally update the manifest after every successful download, instead of waiting until the end.
const newManifestContents = JSON.stringify(manifest, undefined, 2);
await fs.writeFile(manifestPath, newManifestContents);
console.log("- Manifest updated");
++chapterIndex;
}
}
function getNextChapterUrl(rawChapterDoc) {
function getNextChapterURL(rawChapterDoc) {
// `a[title="Next Chapter"]` doesn"t always work. Two different pathologies:
// - https://parahumans.wordpress.com/2011/09/27/shell-4-2/
// - https://parahumans.wordpress.com/2012/04/21/sentinel-9-6/
@ -123,12 +105,11 @@ function retry(times, fn) {
}
function downloadChapter(url) {
return retry(3, () => {
return request(url).redirects(10).then(response => {
if (response.status !== 200) {
throw new Error(`Response status for ${url} was ${response.status}`);
}
return response;
});
return retry(3, async () => {
const response = await request(url).redirects(10);
if (response.status !== 200) {
throw new Error(`Response status for ${url} was ${response.status}`);
}
return response;
});
}

View file

@ -1,14 +0,0 @@
"use strict";
const jsdom = require("jsdom");
// No need to fetch or execute JavaScript
module.exports = (contents, options) => {
options = Object.assign({}, options, {
features: {
FetchExternalResources: false,
ProcessExternalResources: false
}
});
return jsdom.jsdom(contents, options);
};

View file

@ -21,17 +21,16 @@ const COVER_IMG_FILENAME = "cover.png";
const COVER_XHTML_FILENAME = "cover.xhtml";
const COVER_MIMETYPE = "image/png";
module.exports = (scaffoldingPath, bookPath, contentPath, chaptersPath, manifestPath) => {
return Promise.all([
module.exports = async (scaffoldingPath, bookPath, contentPath, chaptersPath, manifestPath) => {
await Promise.all([
cpr(scaffoldingPath, bookPath, { overwrite: true, confirm: true, filter: noThumbs }),
getChapters(contentPath, chaptersPath, manifestPath).then(chapters => {
return Promise.all([
writeOpf(chapters, contentPath),
writeOPF(chapters, contentPath),
writeNcx(chapters, contentPath)
]);
})
])
.then(() => undefined);
]);
};
function noThumbs(filePath) {
@ -39,7 +38,7 @@ function noThumbs(filePath) {
return path.basename(filePath) !== "Thumbs.db";
}
function writeOpf(chapters, contentPath) {
function writeOPF(chapters, contentPath) {
const manifestChapters = chapters.map(c => {
return `<item id="${c.id}" href="${c.href}" media-type="application/xhtml+xml"/>`;
}).join("\n");
@ -115,23 +114,22 @@ ${navPoints}
return fs.writeFile(path.resolve(contentPath, NCX_FILENAME), contents);
}
function getChapters(contentPath, chaptersPath, manifestPath) {
async function getChapters(contentPath, chaptersPath, manifestPath) {
const hrefPrefix = `${path.relative(contentPath, chaptersPath)}/`;
return fs.readFile(manifestPath, { encoding: "utf-8" }).then(manifestContents => {
const manifestChapters = JSON.parse(manifestContents);
const manifestContents = await fs.readFile(manifestPath, { encoding: "utf-8" });
const manifestChapters = JSON.parse(manifestContents);
return fs.readdir(chaptersPath).then(filenames => {
return filenames
.filter(f => path.extname(f) === ".xhtml")
.sort()
.map((f, i) => {
return {
id: path.basename(f),
title: manifestChapters[i].title,
href: `${hrefPrefix}${f}`
};
});
const filenames = await fs.readdir(chaptersPath);
return filenames
.filter(f => path.extname(f) === ".xhtml")
.sort()
.map((f, i) => {
return {
id: path.basename(f),
title: manifestChapters[i].title,
href: `${hrefPrefix}${f}`
};
});
});
}

View file

@ -84,11 +84,15 @@ if (argv._.includes("zip")) {
commands.push(() => zip(bookPath, contentPath, path.resolve(argv.out)));
}
commands.reduce((previous, command) => {
return previous.then(command);
}, Promise.resolve())
.then(() => console.log("All done!"))
.catch(e => {
console.error(e.stack);
process.exit(1);
});
(async () => {
try {
for (const command of commands) {
await command();
}
console.log("All done!");
} catch (e) {
console.error(e.stack);
process.exit(1);
}
})();

View file

@ -20,19 +20,19 @@
"lint": "eslint lib"
},
"dependencies": {
"archiver": "^1.3.0",
"cpr": "^2.0.2",
"jsdom": "^9.9.1",
"archiver": "^2.0.0",
"cpr": "^2.2.0",
"jsdom": "^11.1.0",
"mkdirp-then": "^1.0.1",
"requisition": "^1.5.0",
"rimraf-then": "^1.0.0",
"thenify": "^3.1.0",
"throat": "^3.0.0",
"thenify": "^3.3.0",
"throat": "^4.1.0",
"xmlserializer": "^0.6.0",
"yargs": "^6.6.0",
"yargs": "^8.0.2",
"zfill": "0.0.2"
},
"devDependencies": {
"eslint": "3.12.2"
"eslint": "4.4.1"
}
}