Modernize DOM manipulation code

jsdom, and JavaScript, have gotten nicer since this was first written.
This commit is contained in:
Domenic Denicola 2020-10-26 21:19:56 -04:00
commit fb93322823

View file

@ -34,21 +34,21 @@ ${body}
function getBodyXML(chapter, book, contentEl) {
// Remove initial Next Chapter and Previous Chapter <p>
contentEl.removeChild(contentEl.firstElementChild);
contentEl.firstElementChild.remove();
// Remove everything after the last <p> (e.g. analytics <div>s)
const lastP = contentEl.querySelector("p:last-of-type");
while (contentEl.lastElementChild !== lastP) {
contentEl.removeChild(contentEl.lastElementChild);
contentEl.lastElementChild.remove();
}
// Remove empty <p>s or Last Chapter/Next Chapter <p>s
while (isEmptyOrGarbage(contentEl.lastElementChild)) {
contentEl.removeChild(contentEl.lastElementChild);
contentEl.lastElementChild.remove();
}
// Remove redundant attributes and style
Array.prototype.forEach.call(contentEl.children, child => {
for (const child of contentEl.children) {
if (child.getAttribute("dir") === "ltr") {
child.removeAttribute("dir");
}
@ -63,46 +63,42 @@ function getBodyXML(chapter, book, contentEl) {
if (child.getAttribute("style") === "text-align:left;padding-left:30px;") {
child.setAttribute("style", "padding-left:30px;");
}
});
}
// Remove empty <em>s and <i>s
// Remove style attributes from them, as they're always messed up.
const ems = contentEl.querySelectorAll("em, i");
Array.prototype.forEach.call(ems, em => {
for (const em of contentEl.querySelectorAll("em, i")) {
if (em.textContent.trim() === "") {
const replacement = contentEl.ownerDocument.createTextNode(" ");
em.parentNode.replaceChild(replacement, em);
em.replaceWith(contentEl.ownerDocument.createTextNode(" "));
} else {
em.removeAttribute("style");
}
});
}
// In https://parahumans.wordpress.com/2013/01/05/monarch-16-13/ there are some <address>s that should be <p>s O_o
const addresses = contentEl.querySelectorAll("address");
Array.prototype.forEach.call(addresses, address => {
for (const address of contentEl.querySelectorAll("address")) {
const p = contentEl.ownerDocument.createElement("p");
p.innerHTML = address.innerHTML;
address.parentNode.replaceChild(p, address);
});
address.replaceWith(p);
}
// Every <span> except underline ones is pointless at best and frequently messed up. (Weird font size, line spacing,
// etc.)
const spans = contentEl.querySelectorAll("span");
Array.prototype.forEach.call(spans, span => {
for (const span of contentEl.querySelectorAll("span")) {
if (span.getAttribute("style") === "text-decoration:underline;") {
return;
continue;
}
if (span.textContent.trim() === "") {
span.parentNode.removeChild(span);
span.remove();
} else {
const docFrag = contentEl.ownerDocument.createDocumentFragment();
while (span.firstChild) {
docFrag.appendChild(span.firstChild);
}
span.parentNode.replaceChild(docFrag, span);
span.replaceWith(docFrag);
}
});
}
// In Ward, CloudFlare email protection obfuscates the email addresses:
// https://usamaejaz.com/cloudflare-email-decoding/