From b017ab54d1c74c4fe2c968b7fbb3585dfd7bf3bb Mon Sep 17 00:00:00 2001
From: Domenic Denicola
Date: Sat, 14 Nov 2020 19:51:20 -0500
Subject: [PATCH] Fix a variety of wrong closing quotes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
E.g. doubles, "I”ll", backwards
---
lib/convert-worker.js | 40 +++++++++++++++++++---------------------
lib/substitutions.json | 42 ++++++++++++++++++++++++++++++++----------
2 files changed, 51 insertions(+), 31 deletions(-)
diff --git a/lib/convert-worker.js b/lib/convert-worker.js
index bd9d172..e796a84 100644
--- a/lib/convert-worker.js
+++ b/lib/convert-worker.js
@@ -171,34 +171,32 @@ function getBodyXML(chapter, book, contentEl) {
xml = xml.replace(/([a-z]+),<\/em>/g, "$1,");
}
- function fixQuotesAndApostrophes() {
- // Fix recurring poor quotes and apostrophes
- xml = xml.replace(/”/g, "
“");
- xml = xml.replace(/“\s*<\/p>/g, "”
");
- xml = xml.replace(/“\s*<\/em><\/p>/g, "”
");
- xml = xml.replace(/‘\s*<\/p>/g, "’");
- xml = xml.replace(/‘\s*<\/em><\/p>/g, "’");
- xml = xml.replace(/,” <\/em>/g, ",” ");
- xml = xml.replace(/′/g, "’");
- xml = xml.replace(/″/g, "”");
- xml = xml.replace(/([A-Za-z])‘s(\s?)/g, "$1’s$2");
- xml = xml.replace(/I‘m/g, "I’m");
- xml = xml.replace(/“\s+/g, "
“");
- xml = xml.replace(/'/g, "’");
- xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’");
- xml = xml.replace(/([a-z])”<\/p>/g, "$1.”
");
- }
-
- // These interact with each other, so do them a few times.
+ // These quote/apostrophe/em fixes interact with each other. TODO: try to disentangle so we don't repeat all of
+ // fixEms.
xml = xml.replace(/,” <\/em>/g, ",” ");
fixEms();
- fixQuotesAndApostrophes();
+ xml = xml.replace(/”/g, "
“");
+ xml = xml.replace(/“\s*<\/p>/g, "”
");
+ xml = xml.replace(/“\s*<\/em><\/p>/g, "”");
+ xml = xml.replace(/‘\s*<\/p>/g, "’");
+ xml = xml.replace(/‘\s*<\/em><\/p>/g, "’");
+ xml = xml.replace(/,” <\/em>/g, ",” ");
+ xml = xml.replace(/′/g, "’");
+ xml = xml.replace(/″/g, "”");
+ xml = xml.replace(/([A-Za-z])‘s(\s?)/g, "$1’s$2");
+ xml = xml.replace(/I‘m/g, "I’m");
+ xml = xml.replace(/“\s+/g, "
“");
+ xml = xml.replace(/'/g, "’");
+ xml = xml.replace(/’([A-Za-z]+)’/g, "‘$1’");
+ xml = xml.replace(/([a-z])”<\/p>/g, "$1.”
");
fixEms();
xml = xml.replace(/‘([^<]+)<\/em>‘/g, "‘$1’");
- xml = xml.replace(/I”m/g, "I’m");
xml = xml.replace(/([a-z]+)!<\/em>/g, "$1!");
xml = xml.replace(/(?([\w ’]+)([!.?])”<\/em>/g, "$1$2”");
xml = xml.replace(/([\w ’]+[!.?])”<\/em>/g, "$1”");
+ xml = xml.replace(/I”(m|ll)/g, "I’$1");
+ xml = xml.replace(/””<\/p>/g, "”");
+ xml = xml.replace(/^([^“]+?) ?”(?![ —<])/gm, "$1 “");
// Similar problems occur in Ward with and as do in Worm with s
xml = xml.replace(//g, "");
diff --git a/lib/substitutions.json b/lib/substitutions.json
index 9eafa64..3552166 100644
--- a/lib/substitutions.json
+++ b/lib/substitutions.json
@@ -2361,13 +2361,13 @@
"before": "leave?” Hero asked. ”Why",
"after": "leave?” Hero asked. “Why"
},
- {
- "before": "eye contact. ”She",
- "after": "eye contact. “She"
- },
{
"before": "Alexandria said. ”Why",
"after": "Alexandria said. “Why"
+ },
+ {
+ "before": "voice as quiet. ”We’ve got teams",
+ "after": "voice as quiet. “We’ve got teams"
}
],
"https://parahumans.wordpress.com/2012/11/10/colony-15-8/": [
@@ -3408,6 +3408,10 @@
{
"before": "Bay,” Wanton said. “Not",
"after": "Bay,” Wanton said, “not"
+ },
+ {
+ "before": "Connecting to “agChat.ParahumansOnline016.par:6667” (Attempt 1 of 55)
\nResolving Host Name
\nConnecting…
\nConnected.
\nUsing identity “Iblis”, nick “Iblis”
\nWelcome to Parahumans Online Chatroom #116, ‘The Holdout’. Rules Here. Behave. Obey the @s.",
+ "after": "Connecting to “agChat.ParahumansOnline016.par:6667” (Attempt 1 of 55)
\nResolving Host Name
\nConnecting…
\nConnected.
\nUsing identity “Iblis”, nick “Iblis”
\nWelcome to Parahumans Online Chatroom #116, ‘The Holdout’. Rules Here. Behave. Obey the @s."
}
],
"https://parahumans.wordpress.com/2013/06/29/scarab-25-1/": [
@@ -4906,6 +4910,12 @@
"after": "—and from other horrors."
}
],
+ "https://www.parahumans.net/2018/05/22/torch-7-3/": [
+ {
+ "before": "by saying,”Isn’t it a rule",
+ "after": "by saying, “Isn’t it a rule"
+ }
+ ],
"https://www.parahumans.net/2018/05/26/torch-7-4/": [
{
"before": "⊙
",
@@ -5447,6 +5457,10 @@
{
"before": "then teacher can’t",
"after": "then Teacher can’t"
+ },
+ {
+ "before": "“Who is this?”a strange voice asked",
+ "after": "“Who is this?” a strange voice asked"
}
],
"https://www.parahumans.net/2018/09/13/gleaming-interlude-9-x/": [
@@ -5626,12 +5640,6 @@
"_comment": "See comment in https://www.parahumans.net/2018/09/29/gleaming-9-11/"
}
],
- "https://www.parahumans.net/2018/10/06/gleaming-9-13/": [
- {
- "before": "second.”Clarify.”",
- "after": "second. “Clarify.”"
- }
- ],
"https://www.parahumans.net/2018/10/09/gleaming-9-14/": [
{
"before": "in a fireman carry. with my free hand",
@@ -6383,6 +6391,10 @@
{
"before": "wasn’t mom’s whole",
"after": "wasn’t Mom’s whole"
+ },
+ {
+ "before": "“Fuck,”Capricorn said again",
+ "after": "“Fuck,” Capricorn said again"
}
],
"https://www.parahumans.net/2019/03/29/heavens-12-x/": [
@@ -6689,6 +6701,10 @@
{
"before": "ducked their heads down and hurried",
"after": "ducked their heads down and hurried."
+ },
+ {
+ "before": "in the way,”she said",
+ "after": "in the way,” she said"
}
],
"https://www.parahumans.net/2019/10/01/sundown-17-1/": [
@@ -6971,6 +6987,12 @@
"after": "Imp and the Heartbroken"
}
],
+ "https://www.parahumans.net/2020/02/23/infrared-19-z/": [
+ {
+ "before": "5’1 “",
+ "after": "5′1″"
+ }
+ ],
"https://www.parahumans.net/2020/02/25/last-20-1/": [
{
"before": "across this clearing. eyes, cameras",