Improve deletion of empty-ish elements
The previous heuristic of replacing them with a space character caused spaces to be inserted in the middle of words. Also, various cases were missed. This should help.
This commit is contained in:
parent
a405adf6b7
commit
ba387d3555
2 changed files with 28 additions and 39 deletions
|
|
@ -77,13 +77,21 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
}
|
||||
}
|
||||
|
||||
// Remove empty <em>s and <i>s
|
||||
// Remove style attributes from them, as they're always messed up.
|
||||
for (const em of contentEl.querySelectorAll("em, i")) {
|
||||
if (em.textContent.trim() === "") {
|
||||
em.replaceWith(contentEl.ownerDocument.createTextNode(" "));
|
||||
// Remove empty inline elements.
|
||||
// Remove style attributes from inline elements, as they're always messed up.
|
||||
for (const el of contentEl.querySelectorAll("em, i, strong, b")) {
|
||||
const { textContent } = el;
|
||||
|
||||
if (textContent === "") {
|
||||
el.remove();
|
||||
} else if (textContent.trim() === "") {
|
||||
if (el.childElementCount === 0) {
|
||||
el.replaceWith(" ");
|
||||
} else if (el.childElementCount === 1 && el.children[0].localName === "br") {
|
||||
el.outerHTML = "<br />\n";
|
||||
}
|
||||
} else {
|
||||
em.removeAttribute("style");
|
||||
el.removeAttribute("style");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -221,9 +229,13 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
xml = xml.replace(/(\s*)<\/strong>/g, "</strong>$1");
|
||||
xml = xml.replace(/><strong>(.*)<\/strong>:</g, "><strong>$1:</strong><");
|
||||
|
||||
// No need for line breaks before paragraph ends
|
||||
// No need for line breaks before paragraph ends or after paragraph starts
|
||||
// These often occur with the <br>s inside <b>/<strong>/<em>/<i> fixed above.
|
||||
xml = xml.replace(/<br \/>\s*<\/p>/g, "</p>");
|
||||
xml = xml.replace(/<p><br \/>\s*/g, "<p>");
|
||||
|
||||
// This is another quote fix but it needs to happen after the line break deletion... so entangled, ugh.
|
||||
xml = xml.replace(/<\/em>\s*“\s*<\/p>/g, "</em>”</p>");
|
||||
|
||||
// Fix missing spaces after commas
|
||||
xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/g, "$1, $2");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue