diff --git a/lib/worm-scraper.js b/lib/worm-scraper.js index 2f285e6..ec09efe 100644 --- a/lib/worm-scraper.js +++ b/lib/worm-scraper.js @@ -22,30 +22,37 @@ rimraf(outPath) .then(function () { return download(START_CHAPTER_URL, cachePath); }) - .then(readAllRawChapterDocs) - .then(function (rawChapterDocs) { - console.log("Extracting content into EPUB chapter files"); - return Promise.all(rawChapterDocs.map(function (rawChapterDoc, i) { - const output = getChapterString(rawChapterDoc); - const destFilename = path.resolve(contentPath, `chapter${i + 1}.xhtml`); - return fs.writeFile(destFilename, output); - })); + .then(getChapterFilePaths) + .then(function (chapterFilePaths) { + console.log("All chapters downloaded; beginning conversion to EPUB chapters"); + return Promise.all(chapterFilePaths.map(convertChapter)); }) .then(function () { console.log("All done!"); }); -function readAllRawChapterDocs() { +function getChapterFilePaths() { return fs.readdir(cachePath).then(function (filenames) { - const htmlFiles = filenames.filter(function (f) { return f.endsWith(".html"); }); - return Promise.all(htmlFiles.map(readRawChapterDoc)); + return filenames.filter(function (f) { return f.endsWith(".html"); }) + .map(function (f) { return path.resolve(cachePath, f); }); }); } -function readRawChapterDoc(filename) { - const filePath = path.resolve(cachePath, filename); +function convertChapter(filePath, i) { + console.log(`- Reading ${filePath}`); return fs.readFile(filePath, { encoding: "utf-8" }).then(function (contents) { - return jsdom.jsdom(contents); + console.log(`- Read ${filePath}`); + const rawChapterDoc = jsdom.jsdom(contents); + const output = getChapterString(rawChapterDoc); + + // TODO: this should probably not be necessary... jsdom bug I guess!? + rawChapterDoc.defaultView.close(); + + const destFilename = path.resolve(contentPath, `chapter${i + 1}.xhtml`); + return fs.writeFile(destFilename, output); + }) + .then(function () { + console.log(`- Finished converting ${filePath}`); }); }