Update conversion to work with cache manifest

This commit is contained in:
Domenic Denicola 2015-05-11 21:14:40 -04:00
commit cf6b5c9ab9
4 changed files with 23 additions and 34 deletions

View file

@ -1,5 +0,0 @@
"use strict";
exports.getChapterTitle = function (rawChapterDoc) {
return rawChapterDoc.querySelector("h1.entry-title").textContent;
};

View file

@ -4,39 +4,31 @@ const fs = require("mz/fs");
const throat = require("throat");
const serializeToXml = require("xmlserializer").serializeToString;
const jsdom = require("./jsdom.js");
const getChapterTitle = require("./common.js").getChapterTitle;
module.exports = function (cachePath, contentPath) {
return getChapterFilePaths(cachePath)
.then(function (chapterFilePaths) {
console.log("All chapters downloaded; beginning conversion to EPUB chapters");
module.exports = function (cachePath, manifestPath, contentPath) {
return fs.readFile(manifestPath, { encoding: "utf-8" }).then(function (manifestContents) {
const chapters = JSON.parse(manifestContents);
console.log("All chapters downloaded; beginning conversion to EPUB chapters");
const mapper = throat(10, function (filePath) {
return convertChapter(filePath, contentPath);
});
return Promise.all(chapterFilePaths.map(mapper));
})
.then(function () {
console.log("All chapters converted");
const mapper = throat(10, function (chapter) {
return convertChapter(chapter, cachePath, contentPath);
});
return Promise.all(chapters.map(mapper));
})
.then(function () {
console.log("All chapters converted");
});
};
function getChapterFilePaths(cachePath) {
return fs.readdir(cachePath).then(function (filenames) {
return filenames.filter(function (f) { return f.endsWith(".html"); })
.map(function (f) { return path.resolve(cachePath, f); });
});
}
function convertChapter(filePath, contentPath) {
const filename = path.basename(filePath);
function convertChapter(chapter, cachePath, contentPath) {
const filename = chapter.filename;
const filePath = path.resolve(cachePath, filename);
console.log(`- Reading ${filename}`);
return fs.readFile(filePath, { encoding: "utf-8" }).then(function (contents) {
console.log(`- Read ${filename}`);
const rawChapterDoc = jsdom(contents);
const output = getChapterString(rawChapterDoc);
const output = getChapterString(chapter.title, rawChapterDoc);
// TODO: this should probably not be necessary... jsdom bug I guess!?
rawChapterDoc.defaultView.close();
@ -50,8 +42,7 @@ function convertChapter(filePath, contentPath) {
});
}
function getChapterString(rawChapterDoc) {
const title = getChapterTitle(rawChapterDoc);
function getChapterString(title, rawChapterDoc) {
const body = getBodyXml(title, rawChapterDoc.querySelector(".entry-content"));
return `<?xml version="1.0" encoding="UTF-8" ?>

View file

@ -5,7 +5,6 @@ const mkdirp = require("mkdirp-then");
const request = require("requisition");
const zfill = require("zfill");
const jsdom = require("./jsdom.js");
const getChapterTitle = require("./common.js").getChapterTitle;
const FILENAME_PREFIX = "chapter";
@ -110,6 +109,10 @@ function getNextChapterUrl(rawChapterDoc) {
return null;
}
function getChapterTitle(rawChapterDoc) {
return rawChapterDoc.querySelector("h1.entry-title").textContent;
}
function retry(times, fn) {
if (times === 0) {
return fn();

View file

@ -21,16 +21,16 @@ Promise.resolve()
.then(function () {
return download(START_CHAPTER_URL, cachePath, manifestPath);
})
/* .then(function () {
.then(function () {
return rimraf(chaptersPath);
})
.then(function () {
return mkdirp(chaptersPath);
})
.then(function () {
return convert(cachePath, chaptersPath);
return convert(cachePath, manifestPath, chaptersPath);
})
.then(function () {
/* .then(function () {
return extras(contentPath, chaptersPath);
})*/
.then(function () {