Improve deletion of empty-ish elements

The previous heuristic of replacing them with a space character caused spaces to be inserted in the middle of words. Also, various cases were missed. This should help.
This commit is contained in:
Domenic Denicola 2021-01-01 16:24:33 -05:00
commit ba387d3555
2 changed files with 28 additions and 39 deletions

View file

@ -1713,12 +1713,6 @@
"after": "Dinah being kidnapped, and leaving"
}
],
"https://parahumans.wordpress.com/2012/05/22/infestation-11-2/": [
{
"before": "attentio n",
"after": "attention"
}
],
"https://parahumans.wordpress.com/2012/05/26/infestation-11-3/": [
{
"before": "intimidating: A sea",
@ -1943,7 +1937,7 @@
"after": "<p><i>Crazed, kooky, cracked, crazy,<br />\nMental, dotty, whacked, loopy…</i></p>"
},
{
"before": "<p><em>Crazed, kooky, cracked, crazy,<br />\n<em>Nutty, screwy, mentally diseased…</em><br />\n</em> She ",
"before": "<p><em>Crazed, kooky, cracked, crazy,<br />\n<em>Nutty, screwy, mentally diseased…</em><br />\n<br />\n</em>She ",
"after": "<p><i>Crazed, kooky, cracked, crazy,<br />\nNutty, screwy, mentally diseased…</i></p>\n<p>She "
},
{
@ -2765,11 +2759,6 @@
{
"before": "stranger class",
"after": "stranger-class"
},
{
"before": "real ly",
"after": "really",
"comment": "There's an empty em element in the middle of this word that gets converted to a space"
}
],
"https://parahumans.wordpress.com/2013/02/05/monarch-18-6/": [
@ -3284,10 +3273,6 @@
{
"before": "in,” I said. “Could",
"after": "in,” I said, “could"
},
{
"before": " “ <em>Were",
"after": "“<em>Were"
}
],
"https://parahumans.wordpress.com/2013/05/16/cell-22-6/": [
@ -3317,10 +3302,6 @@
"before": "it,” he said. “She",
"after": "it,” he said, “she"
},
{
"before": "“ I dont",
"after": "“I dont"
},
{
"before": "confirmed okay",
"after": "confirmed ok",
@ -3579,6 +3560,10 @@
{
"before": "avoided-",
"after": "avoided"
},
{
"before": "<span style=\"text-decoration:underline;\"><strong>Lord</strong></span><span style=\"text-decoration:underline;\"> <strong>Walston</strong></span>",
"after": "<span style=\"text-decoration:underline;\"><strong>Lord Walston</strong></span>"
}
],
"https://parahumans.wordpress.com/2013/07/18/sting-26-1/": [
@ -3649,8 +3634,8 @@
],
"https://parahumans.wordpress.com/2013/07/30/sting-26-5/": [
{
"before": "</em> Wait…",
"after": " Wait…</em>"
"before": "</em> Wait…“</p>",
"after": " Wait…</em>”</p>"
},
{
"before": "This,” Imp said. “Is",
@ -3758,10 +3743,6 @@
{
"before": "shift position</p>",
"after": "shift position.</p>"
},
{
"before": "“ Convenient.",
"after": "“Convenient."
}
],
"https://parahumans.wordpress.com/2013/08/20/extinction-27-3/": [
@ -3770,8 +3751,8 @@
"after": "guess,” Sophia said, “you"
},
{
"before": "said “ But",
"after": "said. “But"
"before": "“Maybe,” I said “But",
"after": "“Maybe,” I said. “But"
}
],
"https://parahumans.wordpress.com/2013/08/24/extinction-27-5/": [
@ -3954,10 +3935,6 @@
{
"before": "<em>Once the bead was in place, every</em> bullet hit.",
"after": "Once the bead was in place, every bullet hit."
},
{
"before": "I m",
"after": "Im"
}
],
"https://parahumans.wordpress.com/2013/09/24/venom-29-3/": [