Remove more non-breaking spaces
Also normalizes after-sentence spaces to two (normal) spaces, but that's not visible to readers.
This commit is contained in:
parent
9fc36b813f
commit
442d245e2d
2 changed files with 19 additions and 7 deletions
|
|
@ -140,11 +140,14 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
xml = xml.replace(/<i>([^ ]+)<\/i>/g, "<em>$1</em>");
|
||||
xml = xml.replace(/<i>([^ ]+)( +)<\/i>/g, "<em>$1</em>$2");
|
||||
|
||||
// There are way too many nonbreaking spaces where they don't belong.
|
||||
// If they show up three in a row, then let them live. Otherwise, they die.
|
||||
// Also remove any run of them after a period.
|
||||
xml = xml.replace(/([^\xA0])\xA0\xA0?([^\xA0])/g, "$1 $2");
|
||||
xml = xml.replace(/\.\x20*\xA0[\xA0\x20]*/, ". ");
|
||||
// There are way too many nonbreaking spaces where they don't belong. If they show up three in a row, then let them
|
||||
// live; they're maybe being used for alignment or something. Otherwise, they die.
|
||||
//
|
||||
// Also, normalize spaces after a period/quote mark to two (normal) spaces. The second one is invisible when
|
||||
// rendered, but it helps future heuristics detect end of sentences.
|
||||
xml = xml.replace(/\xA0{1,2}(?!\x20\xA0)/g, " ");
|
||||
xml = xml.replace(/([.”])\x20*\xA0[\xA0\x20]*/g, "$1 ");
|
||||
xml = xml.replace(/([.”])\x20{3,}/g, "$1 ");
|
||||
|
||||
function fixEms() {
|
||||
// Fix recurring broken-up or erroneous <em>s
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue