diff --git a/README.md b/README.md index a511608..19ed9f2 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Scrapes the web serial [_Worm_](https://parahumans.wordpress.com/) into an eBook ## How to use -First you'll need a modern version of [Node.js](https://nodejs.org/en/). Install whatever is current (not LTS). +First you'll need a modern version of [Node.js](https://nodejs.org/en/). Install whatever is current (not LTS); at least v8.x is necessary. Then, open a terminal ([Mac documentation](http://blog.teamtreehouse.com/introduction-to-the-mac-os-x-command-line), [Windows documentation](http://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/)) and install the program by typing diff --git a/lib/convert.js b/lib/convert.js index d137a9c..d641ff9 100644 --- a/lib/convert.js +++ b/lib/convert.js @@ -3,40 +3,39 @@ const path = require("path"); const fs = require("mz/fs"); const throat = require("throat"); const serializeToXML = require("xmlserializer").serializeToString; -const jsdom = require("./jsdom.js"); +const { JSDOM } = require("jsdom"); const substitutions = require("./substitutions.json"); -module.exports = (cachePath, manifestPath, contentPath) => { - return fs.readFile(manifestPath, { encoding: "utf-8" }).then(manifestContents => { - const chapters = JSON.parse(manifestContents); - console.log("All chapters downloaded; beginning conversion to EPUB chapters"); +module.exports = async (cachePath, manifestPath, contentPath) => { + const manifestContents = await fs.readFile(manifestPath, { encoding: "utf-8" }); + const chapters = JSON.parse(manifestContents); + console.log("All chapters downloaded; beginning conversion to EPUB chapters"); - const mapper = throat(10, chapter => { - return convertChapter(chapter, cachePath, contentPath); - }); - return Promise.all(chapters.map(mapper)); - }) - .then(() => console.log("All chapters converted")); + const mapper = throat(10, chapter => convertChapter(chapter, cachePath, contentPath)); + await Promise.all(chapters.map(mapper)); + + console.log("All chapters converted"); }; -function convertChapter(chapter, cachePath, contentPath) { +async function convertChapter(chapter, cachePath, contentPath) { const filename = chapter.filename; const filePath = path.resolve(cachePath, filename); console.log(`- Reading ${filename}`); - return fs.readFile(filePath, { encoding: "utf-8" }).then(contents => { - console.log(`- Read ${filename}`); - const rawChapterDoc = jsdom(contents); - const output = getChapterString(chapter, rawChapterDoc); + const contents = await fs.readFile(filePath, { encoding: "utf-8" }); + console.log(`- Read ${filename}`); - // TODO: this should probably not be necessary... jsdom bug I guess!? - rawChapterDoc.defaultView.close(); + const rawChapterJSDOM = new JSDOM(contents); + const output = getChapterString(chapter, rawChapterJSDOM.window.document); - const destFileName = `${path.basename(filename, ".html")}.xhtml`; - const destFilePath = path.resolve(contentPath, destFileName); - return fs.writeFile(destFilePath, output); - }) - .then(() => console.log(`- Finished converting ${filename}`)); + // TODO: this should probably not be necessary... jsdom bug I guess!? + rawChapterJSDOM.window.close(); + + const destFileName = `${path.basename(filename, ".html")}.xhtml`; + const destFilePath = path.resolve(contentPath, destFileName); + + await fs.writeFile(destFilePath, output); + console.log(`- Finished converting ${filename}`); } function getChapterString(chapter, rawChapterDoc) { diff --git a/lib/download.js b/lib/download.js index 20c3420..fe1abc1 100644 --- a/lib/download.js +++ b/lib/download.js @@ -4,28 +4,27 @@ const fs = require("mz/fs"); const mkdirp = require("mkdirp-then"); const request = require("requisition"); const zfill = require("zfill"); -const jsdom = require("./jsdom.js"); +const { JSDOM } = require("jsdom"); const FILENAME_PREFIX = "chapter"; -module.exports = (startChapterUrl, cachePath, manifestPath) => { - return fs.readFile(manifestPath, { encoding: "utf-8" }).then( - manifestContents => { - const manifest = JSON.parse(manifestContents); - - return downloadAllChapters(manifest, startChapterUrl, cachePath, manifestPath); - }, - e => { - if (e.code === "ENOENT") { - return downloadAllChapters(null, startChapterUrl, cachePath, manifestPath); - } - throw e; +module.exports = async (startChapterURL, cachePath, manifestPath) => { + let manifestContents; + try { + manifestContents = await fs.readFile(manifestPath, { encoding: "utf-8" }); + } catch (e) { + if (e.code === "ENOENT") { + return downloadAllChapters(null, startChapterURL, cachePath, manifestPath); } - ); + throw e; + } + + const manifest = JSON.parse(manifestContents); + return downloadAllChapters(manifest, startChapterURL, cachePath, manifestPath); }; -function downloadAllChapters(manifest, startChapterUrl, cachePath, manifestPath) { - let currentChapter = startChapterUrl; +async function downloadAllChapters(manifest, startChapterURL, cachePath, manifestPath) { + let currentChapter = startChapterURL; let chapterIndex = 0; if (manifest !== null) { currentChapter = manifest[manifest.length - 1].url; @@ -37,62 +36,45 @@ function downloadAllChapters(manifest, startChapterUrl, cachePath, manifestPath) manifest = []; } - return mkdirp(cachePath).then(loop); + await mkdirp(cachePath); - function loop() { + while (currentChapter !== null) { const filename = `${FILENAME_PREFIX}${zfill(chapterIndex, 3)}.html`; console.log(`Downloading ${currentChapter}`); - return downloadChapter(currentChapter).then(response => { - console.log("- Response received"); - return response.text(); - }) - .then(contents => { - console.log("- Response body received"); - const rawChapterDoc = jsdom(contents, { url: currentChapter }); - console.log("- Response body parsed into DOM"); + const response = await downloadChapter(currentChapter); + const contents = await response.text(); + console.log("- Response body received"); + const rawChapterJSDOM = new JSDOM(contents, { url: currentChapter }); + console.log("- Response body parsed into DOM"); - const chapterUrlToSave = currentChapter; - const chapterTitle = getChapterTitle(rawChapterDoc); - currentChapter = getNextChapterUrl(rawChapterDoc); + const chapterURLToSave = currentChapter; + const chapterTitle = getChapterTitle(rawChapterJSDOM.window.document); + currentChapter = getNextChapterURL(rawChapterJSDOM.window.document); - // TODO: this should probably not be necessary... jsdom bug I guess!? - rawChapterDoc.defaultView.close(); + // TODO: this should probably not be necessary... jsdom bug I guess!? + rawChapterJSDOM.window.close(); - manifest.push({ - url: chapterUrlToSave, - title: chapterTitle, - filename - }); - - fs.writeFile(path.resolve(cachePath, filename), contents); - }) - .then(() => { - console.log("- Response text saved to cache file"); - // Incrementally update the manifest after every successful download, instead of waiting until the end. - return writeManifest(); - }) - .then(() => { - console.log("- Manifest updated"); - - if (currentChapter === null) { - return undefined; - } - - ++chapterIndex; - - return loop(); + manifest.push({ + url: chapterURLToSave, + title: chapterTitle, + filename }); - } - function writeManifest() { - const contents = JSON.stringify(manifest, undefined, 2); - return fs.writeFile(manifestPath, contents); + await fs.writeFile(path.resolve(cachePath, filename), contents); + console.log("- Response text saved to cache file"); + + // Incrementally update the manifest after every successful download, instead of waiting until the end. + const newManifestContents = JSON.stringify(manifest, undefined, 2); + await fs.writeFile(manifestPath, newManifestContents); + console.log("- Manifest updated"); + + ++chapterIndex; } } -function getNextChapterUrl(rawChapterDoc) { +function getNextChapterURL(rawChapterDoc) { // `a[title="Next Chapter"]` doesn"t always work. Two different pathologies: // - https://parahumans.wordpress.com/2011/09/27/shell-4-2/ // - https://parahumans.wordpress.com/2012/04/21/sentinel-9-6/ @@ -123,12 +105,11 @@ function retry(times, fn) { } function downloadChapter(url) { - return retry(3, () => { - return request(url).redirects(10).then(response => { - if (response.status !== 200) { - throw new Error(`Response status for ${url} was ${response.status}`); - } - return response; - }); + return retry(3, async () => { + const response = await request(url).redirects(10); + if (response.status !== 200) { + throw new Error(`Response status for ${url} was ${response.status}`); + } + return response; }); } diff --git a/lib/jsdom.js b/lib/jsdom.js deleted file mode 100644 index 2b98589..0000000 --- a/lib/jsdom.js +++ /dev/null @@ -1,14 +0,0 @@ -"use strict"; -const jsdom = require("jsdom"); - -// No need to fetch or execute JavaScript -module.exports = (contents, options) => { - options = Object.assign({}, options, { - features: { - FetchExternalResources: false, - ProcessExternalResources: false - } - }); - - return jsdom.jsdom(contents, options); -}; diff --git a/lib/scaffold.js b/lib/scaffold.js index 2613ea5..e748acf 100644 --- a/lib/scaffold.js +++ b/lib/scaffold.js @@ -21,17 +21,16 @@ const COVER_IMG_FILENAME = "cover.png"; const COVER_XHTML_FILENAME = "cover.xhtml"; const COVER_MIMETYPE = "image/png"; -module.exports = (scaffoldingPath, bookPath, contentPath, chaptersPath, manifestPath) => { - return Promise.all([ +module.exports = async (scaffoldingPath, bookPath, contentPath, chaptersPath, manifestPath) => { + await Promise.all([ cpr(scaffoldingPath, bookPath, { overwrite: true, confirm: true, filter: noThumbs }), getChapters(contentPath, chaptersPath, manifestPath).then(chapters => { return Promise.all([ - writeOpf(chapters, contentPath), + writeOPF(chapters, contentPath), writeNcx(chapters, contentPath) ]); }) - ]) - .then(() => undefined); + ]); }; function noThumbs(filePath) { @@ -39,7 +38,7 @@ function noThumbs(filePath) { return path.basename(filePath) !== "Thumbs.db"; } -function writeOpf(chapters, contentPath) { +function writeOPF(chapters, contentPath) { const manifestChapters = chapters.map(c => { return ``; }).join("\n"); @@ -115,23 +114,22 @@ ${navPoints} return fs.writeFile(path.resolve(contentPath, NCX_FILENAME), contents); } -function getChapters(contentPath, chaptersPath, manifestPath) { +async function getChapters(contentPath, chaptersPath, manifestPath) { const hrefPrefix = `${path.relative(contentPath, chaptersPath)}/`; - return fs.readFile(manifestPath, { encoding: "utf-8" }).then(manifestContents => { - const manifestChapters = JSON.parse(manifestContents); + const manifestContents = await fs.readFile(manifestPath, { encoding: "utf-8" }); + const manifestChapters = JSON.parse(manifestContents); - return fs.readdir(chaptersPath).then(filenames => { - return filenames - .filter(f => path.extname(f) === ".xhtml") - .sort() - .map((f, i) => { - return { - id: path.basename(f), - title: manifestChapters[i].title, - href: `${hrefPrefix}${f}` - }; - }); + const filenames = await fs.readdir(chaptersPath); + + return filenames + .filter(f => path.extname(f) === ".xhtml") + .sort() + .map((f, i) => { + return { + id: path.basename(f), + title: manifestChapters[i].title, + href: `${hrefPrefix}${f}` + }; }); - }); } diff --git a/lib/worm-scraper.js b/lib/worm-scraper.js index d55d20f..a288d6d 100644 --- a/lib/worm-scraper.js +++ b/lib/worm-scraper.js @@ -84,11 +84,15 @@ if (argv._.includes("zip")) { commands.push(() => zip(bookPath, contentPath, path.resolve(argv.out))); } -commands.reduce((previous, command) => { - return previous.then(command); -}, Promise.resolve()) -.then(() => console.log("All done!")) -.catch(e => { - console.error(e.stack); - process.exit(1); -}); +(async () => { + try { + for (const command of commands) { + await command(); + } + + console.log("All done!"); + } catch (e) { + console.error(e.stack); + process.exit(1); + } +})(); diff --git a/package.json b/package.json index 13387c2..e918345 100644 --- a/package.json +++ b/package.json @@ -20,19 +20,19 @@ "lint": "eslint lib" }, "dependencies": { - "archiver": "^1.3.0", - "cpr": "^2.0.2", - "jsdom": "^9.9.1", + "archiver": "^2.0.0", + "cpr": "^2.2.0", + "jsdom": "^11.1.0", "mkdirp-then": "^1.0.1", "requisition": "^1.5.0", "rimraf-then": "^1.0.0", - "thenify": "^3.1.0", - "throat": "^3.0.0", + "thenify": "^3.3.0", + "throat": "^4.1.0", "xmlserializer": "^0.6.0", - "yargs": "^6.6.0", + "yargs": "^8.0.2", "zfill": "0.0.2" }, "devDependencies": { - "eslint": "3.12.2" + "eslint": "4.4.1" } }