"use strict"; const path = require("path"); const fs = require("mz/fs"); const mkdirp = require("mkdirp-then"); const rimraf = require("rimraf-then"); const jsdom = require("jsdom"); const download = require("./download.js"); require("./track-rejections.js"); const START_CHAPTER_URL = "https://parahumans.wordpress.com/2011/06/11/1-1/"; const cachePath = path.resolve("cache"); const outPath = path.resolve("out"); const contentPath = path.resolve(outPath, "OEBPS"); rimraf(outPath) .then(function () { return mkdirp(contentPath); }) .then(function () { return download(START_CHAPTER_URL, cachePath); }) .then(readAllRawChapterDocs) .then(function (rawChapterDocs) { console.log("Extracting content into EPUB chapter files"); return Promise.all(rawChapterDocs.map(function (rawChapterDoc, i) { const output = getChapterString(rawChapterDoc); const destFilename = path.resolve(contentPath, `chapter${i + 1}.xhtml`); return fs.writeFile(destFilename, output); })); }) .then(function () { console.log("All done!"); }); function readAllRawChapterDocs() { return fs.readdir(cachePath).then(function (filenames) { const htmlFiles = filenames.filter(function (f) { return f.endsWith(".html"); }); return Promise.all(htmlFiles.map(readRawChapterDoc)); }); } function readRawChapterDoc(filename) { const filePath = path.resolve(cachePath, filename); return fs.readFile(filePath, { encoding: "utf-8" }).then(function (contents) { return jsdom.jsdom(contents); }); } function getChapterString(rawChapterDoc) { const title = rawChapterDoc.querySelector("h1.entry-title").textContent; const body = cleanContentEl(rawChapterDoc.querySelector(".entry-content")).innerHTML; return `
s el.removeChild(el.firstElementChild); el.removeChild(el.lastElementChild); // Remove redundant dir="ltr" Array.prototype.forEach.call(el.children, function (child) { if (child.getAttribute("dir") === "ltr") { child.removeAttribute("dir"); } }); return el; }