Refactor convert-worker.js a bit
This introduces a few fixes around italics.
This commit is contained in:
parent
f70ba64629
commit
33d88eb523
2 changed files with 214 additions and 159 deletions
|
|
@ -135,6 +135,10 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
// Fix recurring strange pattern of extra <br> in <p>...<em>...<br>\n</em></p>
|
||||
xml = xml.replace(/<br \/>\s*<\/em><\/p>/g, "</em></p>");
|
||||
|
||||
// Replace single-word <i>s with <em>s. Other <i>s are probably erroneous too, but these are known-bad.
|
||||
xml = xml.replace(/<i>([^ ]+)<\/i>/g, "<em>$1</em>");
|
||||
xml = xml.replace(/<i>([^ ]+)( +)<\/i>/g, "<em>$1</em>$2");
|
||||
|
||||
// There are way too many nonbreaking spaces where they don't belong.
|
||||
// If they show up three in a row, then let them live. Otherwise, they die.
|
||||
// Also remove any run of them after a period.
|
||||
|
|
@ -192,8 +196,6 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
fixEms();
|
||||
fixQuotesAndApostrophes();
|
||||
fixEms();
|
||||
fixQuotesAndApostrophes();
|
||||
fixEms();
|
||||
xml = xml.replace(/I”m/g, "I’m");
|
||||
|
||||
// Similar problems occur in Ward with <b> and <strong> as do in Worm with <em>s
|
||||
|
|
@ -206,9 +208,155 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
xml = xml.replace(/(\s*)<\/strong>/g, "</strong>$1");
|
||||
|
||||
// No need for line breaks before paragraph ends
|
||||
// These often occur with the <br>s inside <b>/<strong> fixed above.
|
||||
// These often occur with the <br>s inside <b>/<strong>/<em>/<i> fixed above.
|
||||
xml = xml.replace(/<br \/>\s*<\/p>/g, "</p>");
|
||||
|
||||
// Fix missing spaces after commas
|
||||
xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/g, "$1, $2");
|
||||
|
||||
// Fix bad periods and spacing/markup surrounding them
|
||||
xml = xml.replace(/\.\.<\/p>/g, ".</p>");
|
||||
xml = xml.replace(/\.\.”<\/p>/g, ".”</p>");
|
||||
xml = xml.replace(/ \. /g, ". ");
|
||||
xml = xml.replace(/ \.<\/p>/g, ".</p>");
|
||||
xml = xml.replace(/\.<em>\.\./g, "<em>…");
|
||||
|
||||
// Fix extra spaces
|
||||
xml = xml.replace(/ ? <\/p>/g, "</p>");
|
||||
xml = xml.replace(/([a-z]) ,/g, "$1,");
|
||||
|
||||
xml = fixDialogueTags(xml);
|
||||
xml = fixForeignNames(xml);
|
||||
xml = fixEmDashes(xml);
|
||||
xml = enDashJointNames(xml);
|
||||
xml = fixPossessives(xml);
|
||||
xml = cleanSceneBreaks(xml);
|
||||
xml = fixCapitalization(xml, book);
|
||||
xml = fixMispellings(xml);
|
||||
xml = fixHyphens(xml);
|
||||
xml = standardizeSpellings(xml);
|
||||
|
||||
// One-off fixes
|
||||
for (const substitution of substitutions[chapter.url] || []) {
|
||||
if (substitution.before) {
|
||||
const indexOf = xml.indexOf(substitution.before);
|
||||
if (indexOf === -1) {
|
||||
warnings.push(`Could not find text "${substitution.before}" in ${chapter.url}. The chapter may have been ` +
|
||||
`updated at the source, in which case, you should edit substitutions.json.`);
|
||||
}
|
||||
if (indexOf !== xml.lastIndexOf(substitution.before)) {
|
||||
warnings.push(`The text "${substitution.before}" occurred twice, and so the substitution was ambiguous. ` +
|
||||
`Update substitutions.json for a more precise substitution.`);
|
||||
}
|
||||
|
||||
xml = xml.replace(new RegExp(escapeRegExp(substitution.before)), substitution.after);
|
||||
} else if (substitution.regExp) {
|
||||
xml = xml.replace(new RegExp(substitution.regExp, "g"), substitution.replacement);
|
||||
} else {
|
||||
warnings.push(`Invalid substitution specified for ${chapter.url}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Serializer inserts extra xmlns for us since it doesn't know we're going to put this into a <html>.
|
||||
// Use this opportunity to insert a comment pointing to the original URL, for reference.
|
||||
xml = xml.replace(
|
||||
/<body xmlns="http:\/\/www.w3.org\/1999\/xhtml">/,
|
||||
`<body>\n<!-- ${chapter.url} -->\n`);
|
||||
|
||||
return { xml, warnings };
|
||||
}
|
||||
|
||||
function fixDialogueTags(xml) {
|
||||
// Fix recurring miscapitalization with questions
|
||||
xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");
|
||||
xml = xml.replace(/\?”\s\s?He asked/g, "?” he asked");
|
||||
|
||||
// The author often fails to terminate a sentence, instead using a comma after a dialogue tag. For example,
|
||||
// > “I didn’t get much done,” Greg said, “I got distracted by...
|
||||
// This should instead be
|
||||
// > “I didn’t get much done,” Greg said. “I got distracted by...
|
||||
//
|
||||
// Our heuristic is to try to automatically fix this if the dialogue tag is two words (X said/admitted/sighed/etc.).
|
||||
//
|
||||
// This sometimes overcorrects, as in the following example:
|
||||
// > “Basically,” Alec said, “For your powers to manifest, ...
|
||||
// Here instead we should lowercase the "f". We handle that via one-offs in substitutions.json.
|
||||
//
|
||||
// This applies to ~800 instances, so although we have to correct back in substitutions.json a decent number of
|
||||
// times, it definitely pays for itself. Most of the instances we have to correct back we also need to fix the
|
||||
// capitalization anyway, and that's harder to do automatically, since proper names/"I"/etc. stay capitalized.
|
||||
xml = xml.replace(/,” ([A-Za-z]+ [A-Za-z]+), “([A-Z])/g, ",” $1. “$2");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function fixForeignNames(xml) {
|
||||
// This is consistently missing diacritics
|
||||
xml = xml.replace(/Yangban/g, "Yàngbǎn");
|
||||
|
||||
// These are usually not italicized, but sometimes are. Other foreign-language names (like Yàngbǎn) are not
|
||||
// italicized, so we go in the direction of removing the italics.
|
||||
xml = xml.replace(/<em>Garama<\/em>/g, "Garama");
|
||||
xml = xml.replace(/<em>Thanda<\/em>/g, "Thanda");
|
||||
xml = xml.replace(/<em>Sifara([^<]*)<\/em>/g, "Sifara$1");
|
||||
xml = xml.replace(/<em>Moord Nag([^<]*)<\/em>/g, "Moord Nag$1");
|
||||
xml = xml.replace(/<em>Califa de Perro([^<]*)<\/em>/g, "Califa de Perro$1");
|
||||
xml = xml.replace(/<em>Turanta([^<]*)<\/em>/g, "Turanta$1");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function fixEmDashes(xml) {
|
||||
xml = xml.replace(/ – /g, "—");
|
||||
xml = xml.replace(/“((?:<em>)?)-/g, "“$1—");
|
||||
xml = xml.replace(/-[,.]?”/g, "—”");
|
||||
xml = xml.replace(/-(!|\?)”/g, "—$1”");
|
||||
xml = xml.replace(/-[,.]?<\/em>”/g, "—</em>”");
|
||||
xml = xml.replace(/-“/g, "—”");
|
||||
xml = xml.replace(/<p>-/g, "<p>—");
|
||||
xml = xml.replace(/-<\/p>/g, "—</p>");
|
||||
xml = xml.replace(/-<\/em><\/p>/g, "—</em></p>");
|
||||
xml = xml.replace(/\s?\s?–\s?\s?/g, "—");
|
||||
xml = xml.replace(/-\s\s?/g, "—");
|
||||
xml = xml.replace(/\s?\s-/g, "—");
|
||||
xml = xml.replace(/\s+—”/g, "—”");
|
||||
xml = xml.replace(/I-I/g, "I—I");
|
||||
xml = xml.replace(/I-uh/g, "I—uh");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function enDashJointNames(xml) {
|
||||
// Joint names should use en dashes
|
||||
xml = xml.replace(/Dallon-Pelham/g, "Dallon–Pelham");
|
||||
xml = xml.replace(/Bet-Gimel/g, "Bet–Gimel");
|
||||
xml = xml.replace(/Tristan-Capricorn/g, "Tristan–Capricorn");
|
||||
xml = xml.replace(/Capricorn-Byron/g, "Capricorn–Byron");
|
||||
xml = xml.replace(/Tristan-Byron/g, "Tristan–Byron");
|
||||
xml = xml.replace(/Gimel-Europe/g, "Gimel–Europe");
|
||||
xml = xml.replace(/G-N/g, "G–N");
|
||||
xml = xml.replace(/Imp-Damsel/g, "Imp–Damsel");
|
||||
xml = xml.replace(/Damsel-Ashley/g, "Damsel–Ashley");
|
||||
xml = xml.replace(/Antares-Anelace/g, "Antares–Anelace");
|
||||
xml = xml.replace(/Challenger-Gallant/g, "Challenger–Gallant");
|
||||
xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/g, "Undersider$1–$2");
|
||||
xml = xml.replace(/Norwalk-Fairfield/g, "Norwalk–Fairfield");
|
||||
xml = xml.replace(/East-West/g, "east–west");
|
||||
xml = xml.replace(/(Green|Yellow)-Black/g, "$1–Black");
|
||||
xml = xml.replace(/Creutzfeldt-Jakob/g, "Creutzfeldt–Jakob");
|
||||
xml = xml.replace(/Astaroth-Nidhug/g, "Astaroth–Nidhug");
|
||||
xml = xml.replace(/Capulet-Montague/g, "Capulet–Montague");
|
||||
xml = xml.replace(/Weaver-Clockblocker/g, "Weaver–Clockblocker");
|
||||
xml = xml.replace(/Alexandria-Pretender/g, "Alexandria–Pretender");
|
||||
xml = xml.replace(/Night Hag-Nyx/g, "Night Hag–Nyx");
|
||||
xml = xml.replace(/Crawler-Breed/g, "Crawler–Breed");
|
||||
xml = xml.replace(/Simurgh-Myrddin-plant/g, "Simurgh–Myrddin–plant");
|
||||
xml = xml.replace(/Armsmaster-Defiant/g, "Armsmaster–Defiant");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function fixPossessives(xml) {
|
||||
// Fix possessive of names ending in "s"
|
||||
// Note: if the "s" is unvoiced, as in Marquis, then it doesn't get the second "s".
|
||||
xml = xml.replace(/([^‘])Judas’([^s])/g, "$1Judas’s$2");
|
||||
|
|
@ -246,62 +394,15 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
xml = xml.replace(/([^‘])Ms. Stillons’([^s])/g, "$1Ms. Stillons’s$2");
|
||||
xml = xml.replace(/([^‘])Chuckles’([^s])/g, "$1Chuckles’s$2");
|
||||
|
||||
// Fixes dashes
|
||||
xml = xml.replace(/ – /g, "—");
|
||||
xml = xml.replace(/“((?:<em>)?)-/g, "“$1—");
|
||||
xml = xml.replace(/-[,.]?”/g, "—”");
|
||||
xml = xml.replace(/-(!|\?)”/g, "—$1”");
|
||||
xml = xml.replace(/-[,.]?<\/em>”/g, "—</em>”");
|
||||
xml = xml.replace(/-“/g, "—”");
|
||||
xml = xml.replace(/<p>-/g, "<p>—");
|
||||
xml = xml.replace(/-<\/p>/g, "—</p>");
|
||||
xml = xml.replace(/-<\/em><\/p>/g, "—</em></p>");
|
||||
xml = xml.replace(/\s?\s?–\s?\s?/g, "—");
|
||||
xml = xml.replace(/-\s\s?/g, "—");
|
||||
xml = xml.replace(/\s?\s-/g, "—");
|
||||
xml = xml.replace(/\s+—”/g, "—”");
|
||||
xml = xml.replace(/I-I/g, "I—I");
|
||||
xml = xml.replace(/I-uh/g, "I—uh");
|
||||
|
||||
// "X-year-old" should use hyphens; all grammar guides agree. The books are very inconsistent but most often omit
|
||||
// them.
|
||||
xml = xml.replace(/(\w+)[ -]year[ -]old(s?)(?!\w)/g, "$1-year-old$2");
|
||||
xml = xml.replace(/(\w+) or (\w+)-year-old/g, "$1- or $2-year-old");
|
||||
|
||||
// Fix missing spaces after commas
|
||||
xml = xml.replace(/([a-zA-Z]+),([a-zA-Z]+)/g, "$1, $2");
|
||||
|
||||
// Joint names should use em dashes
|
||||
xml = xml.replace(/Dallon-Pelham/g, "Dallon–Pelham");
|
||||
xml = xml.replace(/Bet-Gimel/g, "Bet–Gimel");
|
||||
xml = xml.replace(/Tristan-Capricorn/g, "Tristan–Capricorn");
|
||||
xml = xml.replace(/Capricorn-Byron/g, "Capricorn–Byron");
|
||||
xml = xml.replace(/Tristan-Byron/g, "Tristan–Byron");
|
||||
xml = xml.replace(/Gimel-Europe/g, "Gimel–Europe");
|
||||
xml = xml.replace(/G-N/g, "G–N");
|
||||
xml = xml.replace(/Imp-Damsel/g, "Imp–Damsel");
|
||||
xml = xml.replace(/Damsel-Ashley/g, "Damsel–Ashley");
|
||||
xml = xml.replace(/Antares-Anelace/g, "Antares–Anelace");
|
||||
xml = xml.replace(/Challenger-Gallant/g, "Challenger–Gallant");
|
||||
xml = xml.replace(/Undersider(s?)-(Breakthrough|Ambassador)/g, "Undersider$1–$2");
|
||||
xml = xml.replace(/Norwalk-Fairfield/g, "Norwalk–Fairfield");
|
||||
xml = xml.replace(/East-West/g, "east–west");
|
||||
xml = xml.replace(/(Green|Yellow)-Black/g, "$1–Black");
|
||||
xml = xml.replace(/Creutzfeldt-Jakob/g, "Creutzfeldt–Jakob");
|
||||
xml = xml.replace(/Astaroth-Nidhug/g, "Astaroth–Nidhug");
|
||||
xml = xml.replace(/Capulet-Montague/g, "Capulet–Montague");
|
||||
xml = xml.replace(/Weaver-Clockblocker/g, "Weaver–Clockblocker");
|
||||
xml = xml.replace(/Alexandria-Pretender/g, "Alexandria–Pretender");
|
||||
xml = xml.replace(/Night Hag-Nyx/g, "Night Hag–Nyx");
|
||||
xml = xml.replace(/Crawler-Breed/g, "Crawler–Breed");
|
||||
xml = xml.replace(/Simurgh-Myrddin-plant/g, "Simurgh–Myrddin–plant");
|
||||
xml = xml.replace(/Armsmaster-Defiant/g, "Armsmaster–Defiant");
|
||||
return xml;
|
||||
}
|
||||
|
||||
function cleanSceneBreaks(xml) {
|
||||
// Normalize scene breaks. <hr> would be more semantically appropriate, but loses the author's intent. This is
|
||||
// especially the case in Ward, which uses a variety of different scene breaks.
|
||||
|
||||
xml = xml.replace(/<p(?:[^>]*)>■<\/p>/g, `<p style="text-align: center;">■</p>`);
|
||||
|
||||
xml = xml.replace(/<p style="text-align: center;">⊙<\/p>/g, `<p style="text-align: center;">⊙</p>`);
|
||||
xml = xml.replace(/<p style="text-align: center;"><strong>⊙<\/strong><\/p>/g, `<p style="text-align: center;">⊙</p>`);
|
||||
xml = xml.replace(/<p style="text-align: center;"><em><strong>⊙<\/strong><\/em><\/p>/g,
|
||||
`<p style="text-align: center;">⊙</p>`);
|
||||
|
|
@ -311,40 +412,10 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
xml = xml.replace(/<p style="text-align: center;"><strong>⊙ *⊙ *⊙ *⊙ *⊙<\/strong><\/p>/g,
|
||||
`<p style="text-align: center;">⊙ ⊙ ⊙ ⊙ ⊙</p>`);
|
||||
|
||||
// Fix recurring miscapitalization with questions
|
||||
xml = xml.replace(/\?”\s\s?She asked/g, "?” she asked");
|
||||
xml = xml.replace(/\?”\s\s?He asked/g, "?” he asked");
|
||||
|
||||
// Fix bad periods and spacing/markup surrounding them
|
||||
xml = xml.replace(/\.\.<\/p>/g, ".</p>");
|
||||
xml = xml.replace(/\.\.”<\/p>/g, ".”</p>");
|
||||
xml = xml.replace(/ \. /g, ". ");
|
||||
xml = xml.replace(/ \.<\/p>/g, ".</p>");
|
||||
xml = xml.replace(/\.<em>\.\./g, "<em>…");
|
||||
|
||||
// Fix extra spaces
|
||||
xml = xml.replace(/ ? <\/p>/g, "</p>");
|
||||
xml = xml.replace(/([a-z]) ,/g, "$1,");
|
||||
|
||||
// The author often fails to terminate a sentence, instead using a comma after a dialogue tag. For example,
|
||||
// > “I didn’t get much done,” Greg said, “I got distracted by...
|
||||
// This should instead be
|
||||
// > “I didn’t get much done,” Greg said. “I got distracted by...
|
||||
//
|
||||
// Our heuristic is to try to automatically fix this if the dialogue tag is two words (X said/admitted/sighed/etc.).
|
||||
//
|
||||
// This sometimes overcorrects, as in the following example:
|
||||
// > “Basically,” Alec said, “For your powers to manifest, ...
|
||||
// Here instead we should lowercase the "f". We handle that via one-offs in substitutions.json.
|
||||
//
|
||||
// This applies to ~800 instances, so although we have to correct back in substitutions.json a decent number of
|
||||
// times, it definitely pays for itself. Most of the instances we have to correct back we also need to fix the
|
||||
// capitalization anyway, and that's harder to do automatically, since proper names/"I"/etc. stay capitalized.
|
||||
xml = xml.replace(/,” ([A-Za-z]+ [A-Za-z]+), “([A-Z])/g, ",” $1. “$2");
|
||||
|
||||
// Replace single-word <i>s with <em>s. Other <i>s are probably erroneous too, but these are known-bad.
|
||||
xml = xml.replace(/<i>([A-Za-z]+)<\/i>/g, "<em>$1</em>");
|
||||
return xml;
|
||||
}
|
||||
|
||||
function fixCapitalization(xml, book) {
|
||||
// This occurs enough times it's better to do here than in one-off fixes. We correct the single instance where
|
||||
// it's incorrect to capitalize in the one-off fixes.
|
||||
// Note that Ward contains much talk of "the clairvoyants", so we don't want to capitalize plurals.
|
||||
|
|
@ -366,13 +437,6 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
// This is sometimes missing its capitalization.
|
||||
xml = xml.replace(/the birdcage/g, "the Birdcage");
|
||||
|
||||
// This is usually spelled "TV" but sometimes the other ways. Normalize.
|
||||
xml = xml.replace(/(\b)tv(\b)/g, "$1TV$2");
|
||||
xml = xml.replace(/t\.v\./ig, "TV");
|
||||
|
||||
// This is commonly misspelled.
|
||||
xml = xml.replace(/([Ss])houlderblade/g, "$1houlder blade");
|
||||
|
||||
// There's no reason why these should be capitalized. (Note that they never appear at the beginning of any sentences.)
|
||||
xml = xml.replace(/Halberd/g, "halberd");
|
||||
xml = xml.replace(/Loft/g, "loft");
|
||||
|
|
@ -397,9 +461,6 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
"$1–$2"
|
||||
);
|
||||
|
||||
// This is consistently missing accents
|
||||
xml = xml.replace(/Yangban/g, "Yàngbǎn");
|
||||
|
||||
// Place names need to always be capitalized
|
||||
xml = xml.replace(/North end/g, "North End");
|
||||
xml = xml.replace(/(Stonemast|Shale) avenue/g, "$1 Avenue");
|
||||
|
|
@ -409,14 +470,48 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
xml = xml.replace(/the megalopolis/g, "the Megalopolis");
|
||||
xml = xml.replace(/earths(?![a-z])/g, "Earths");
|
||||
|
||||
// These are usually not italicized, but sometimes are. Other foreign-language names (like Yàngbǎn) are not
|
||||
// italicized, so we go in the direction of removing the italics.
|
||||
xml = xml.replace(/<em>Garama<\/em>/g, "Garama");
|
||||
xml = xml.replace(/<em>Thanda<\/em>/g, "Thanda");
|
||||
xml = xml.replace(/<em>Sifara([^<]*)<\/em>/g, "Sifara$1");
|
||||
xml = xml.replace(/<em>Moord Nag([^<]*)<\/em>/g, "Moord Nag$1");
|
||||
xml = xml.replace(/<em>Califa de Perro([^<]*)<\/em>/g, "Califa de Perro$1");
|
||||
xml = xml.replace(/<em>Turanta([^<]*)<\/em>/g, "Turanta$1");
|
||||
// "Mom" and "Dad" should be capitalized when used as a proper name. These regexps are tuned to catch a good amount of
|
||||
// instances, without over-correcting for non-proper-name-like cases. Many other instances are handled in
|
||||
// substitutions.json.
|
||||
xml = xml.replace(/(?<!mom), dad(?![a-z])/g, ", Dad");
|
||||
xml = xml.replace(/, mom(?![a-z-])/g, ", Mom");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function fixMispellings(xml) {
|
||||
// This is commonly misspelled.
|
||||
xml = xml.replace(/([Ss])houlderblade/g, "$1houlder blade");
|
||||
|
||||
// Preemptive(ly) is often hyphenated (not always). It should not be.
|
||||
xml = xml.replace(/([Pp])re-emptive/g, "$1reemptive");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function fixHyphens(xml) {
|
||||
// "X-year-old" should use hyphens; all grammar guides agree. The books are very inconsistent but most often omit
|
||||
// them.
|
||||
xml = xml.replace(/(\w+)[ -]year[ -]old(s?)(?!\w)/g, "$1-year-old$2");
|
||||
xml = xml.replace(/(\w+) or (\w+)-year-old/g, "$1- or $2-year-old");
|
||||
|
||||
// These are consistently missing hyphens.
|
||||
xml = xml.replace(/self destruct/g, "self-destruct");
|
||||
xml = xml.replace(/life threatening/g, "life-threatening");
|
||||
xml = xml.replace(/hard headed/g, "hard-headed");
|
||||
xml = xml.replace(/shoulder mounted/g, "shoulder-mounted");
|
||||
xml = xml.replace(/golden skinned/g, "golden-skinned");
|
||||
xml = xml.replace(/creepy crawl/g, "creepy-crawl");
|
||||
xml = xml.replace(/well armed/g, "well-armed");
|
||||
xml = xml.replace(/able bodied/g, "able-bodied");
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
function standardizeSpellings(xml) {
|
||||
// This is usually spelled "TV" but sometimes the other ways. Normalize.
|
||||
xml = xml.replace(/(\b)tv(\b)/g, "$1TV$2");
|
||||
xml = xml.replace(/t\.v\./ig, "TV");
|
||||
|
||||
// "okay" is preferred to "ok" or "o.k.". This sometimes gets changed back via substitutions.json when people are
|
||||
// writing notes and thus probably the intention was to be less formal. Also it seems per
|
||||
|
|
@ -438,53 +533,7 @@ function getBodyXML(chapter, book, contentEl) {
|
|||
// "gray" is the majority spelling, except for "greyhound"
|
||||
xml = xml.replace(/(G|g)rey(?!hound)/g, "$1ray");
|
||||
|
||||
// "Mom" and "Dad" should be capitalized when used as a proper name. These regexps are tuned to catch a good amount of
|
||||
// instances, without over-correcting for non-proper-name-like cases. Many other instances are handled in
|
||||
// substitutions.json.
|
||||
xml = xml.replace(/(?<!mom), dad(?![a-z])/g, ", Dad");
|
||||
xml = xml.replace(/, mom(?![a-z-])/g, ", Mom");
|
||||
|
||||
// These are consistently missing hyphens.
|
||||
xml = xml.replace(/self destruct/g, "self-destruct");
|
||||
xml = xml.replace(/life threatening/g, "life-threatening");
|
||||
xml = xml.replace(/hard headed/g, "hard-headed");
|
||||
xml = xml.replace(/shoulder mounted/g, "shoulder-mounted");
|
||||
xml = xml.replace(/golden skinned/g, "golden-skinned");
|
||||
xml = xml.replace(/creepy crawl/g, "creepy-crawl");
|
||||
xml = xml.replace(/well armed/g, "well-armed");
|
||||
xml = xml.replace(/able bodied/g, "able-bodied");
|
||||
|
||||
// Preemptive(ly) is often hyphenated (not always). It should not be.
|
||||
xml = xml.replace(/([Pp])re-emptive/g, "$1reemptive");
|
||||
|
||||
// One-off fixes
|
||||
(substitutions[chapter.url] || []).forEach(substitution => {
|
||||
if (substitution.before) {
|
||||
const indexOf = xml.indexOf(substitution.before);
|
||||
if (indexOf === -1) {
|
||||
warnings.push(`Could not find text "${substitution.before}" in ${chapter.url}. The chapter may have been ` +
|
||||
`updated at the source, in which case, you should edit substitutions.json.`);
|
||||
}
|
||||
if (indexOf !== xml.lastIndexOf(substitution.before)) {
|
||||
warnings.push(`The text "${substitution.before}" occurred twice, and so the substitution was ambiguous. ` +
|
||||
`Update substitutions.json for a more precise substitution.`);
|
||||
}
|
||||
|
||||
xml = xml.replace(new RegExp(escapeRegExp(substitution.before)), substitution.after);
|
||||
} else if (substitution.regExp) {
|
||||
xml = xml.replace(new RegExp(substitution.regExp, "g"), substitution.replacement);
|
||||
} else {
|
||||
warnings.push(`Invalid substitution specified for ${chapter.url}`);
|
||||
}
|
||||
});
|
||||
|
||||
// Serializer inserts extra xmlns for us since it doesn't know we're going to put this into a <html>.
|
||||
// Use this opportunity to insert a comment pointing to the original URL, for reference.
|
||||
xml = xml.replace(
|
||||
/<body xmlns="http:\/\/www.w3.org\/1999\/xhtml">/,
|
||||
`<body>\n<!-- ${chapter.url} -->\n`);
|
||||
|
||||
return { xml, warnings };
|
||||
return xml;
|
||||
}
|
||||
|
||||
function isEmptyOrGarbage(el) {
|
||||
|
|
|
|||
|
|
@ -2448,12 +2448,6 @@
|
|||
"after": "all the way up to the nape of my neck"
|
||||
}
|
||||
],
|
||||
"https://parahumans.wordpress.com/2012/12/13/interlude-16-donation-bonus-2/": [
|
||||
{
|
||||
"before": "<i>“</i>",
|
||||
"after": "”"
|
||||
}
|
||||
],
|
||||
"https://parahumans.wordpress.com/2012/12/15/monarch-16-7/": [
|
||||
{
|
||||
"before": "Brockton bay",
|
||||
|
|
@ -2516,6 +2510,10 @@
|
|||
{
|
||||
"before": "ten,” I asked. “Just",
|
||||
"after": "ten,” I asked, “just"
|
||||
},
|
||||
{
|
||||
"before": "weeks, <i>months. </i>Anticipating",
|
||||
"after": "weeks, <em>months</em>. Anticipating"
|
||||
}
|
||||
],
|
||||
"https://parahumans.wordpress.com/2013/01/08/migration-17-1/": [
|
||||
|
|
@ -2654,7 +2652,7 @@
|
|||
],
|
||||
"https://parahumans.wordpress.com/2013/01/15/migration-17-8/": [
|
||||
{
|
||||
"before": "replied. <i>Need to talk about being more secure with our names. “</i>What’s going on?<i>“</i>",
|
||||
"before": "replied. <i>Need to talk about being more secure with our names. “</i>What’s going on?”",
|
||||
"after": "replied. <em>Need to talk about being more secure with our names.</em> “What’s going on?”"
|
||||
},
|
||||
{
|
||||
|
|
@ -3356,6 +3354,10 @@
|
|||
{
|
||||
"before": "aren’t allies.",
|
||||
"after": "aren’t allies.”"
|
||||
},
|
||||
{
|
||||
"before": "<i>Blameful? </i>“Guilty",
|
||||
"after": "<em>Blameful?</em> “Guilty"
|
||||
}
|
||||
],
|
||||
"https://parahumans.wordpress.com/2013/06/20/crushed-24-5/": [
|
||||
|
|
@ -4547,6 +4549,10 @@
|
|||
{
|
||||
"before": "dangerous?” The recluse asked",
|
||||
"after": "dangerous?” the recluse asked"
|
||||
},
|
||||
{
|
||||
"before": "<i>Stop. </i>Please.",
|
||||
"after": "<em>Stop.</em> Please."
|
||||
}
|
||||
],
|
||||
"https://www.parahumans.net/2018/02/08/shade-4-4/": [
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue