More clean-ups; do these at a textual level.

This commit is contained in:
Domenic Denicola 2015-05-09 00:21:05 +02:00
commit f932be159c

View file

@ -50,7 +50,7 @@ function convertChapter(filePath, contentPath) {
function getChapterString(rawChapterDoc) {
const title = rawChapterDoc.querySelector("h1.entry-title").textContent;
const body = cleanContentEl(rawChapterDoc.querySelector(".entry-content")).innerHTML;
const body = cleanContentEl(rawChapterDoc.querySelector(".entry-content"));
return `<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
@ -103,7 +103,16 @@ function cleanContentEl(el) {
}
});
return el;
let html = el.innerHTML;
// Fix recurring strange pattern of extra <br> in <p>...<em>...<br>\n</em></p>
html = html.replace(/<br>\s*<\/em><\/p>/g, '</em></p>');
// One-off fixes
html = html.replace(/truck reached<br>\nthe other Nine/, 'truck reached the other Nine');
return html;
}
function isEmptyOrGarbage(el) {