Add SSML to web server
This commit is contained in:
parent
a0bbcddbdc
commit
40a74df2e6
5 changed files with 56 additions and 27 deletions
7
mimic3-http/README.md
Normal file
7
mimic3-http/README.md
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
# Mimic 3 Web Server
|
||||
|
||||
|
||||
## Server
|
||||
|
||||
|
||||
## Client
|
||||
|
|
@ -30,7 +30,12 @@ from uuid import uuid4
|
|||
|
||||
import hypercorn
|
||||
import quart_cors
|
||||
from mimic3_tts import AudioResult, Mimic3Settings, Mimic3TextToSpeechSystem
|
||||
from mimic3_tts import (
|
||||
AudioResult,
|
||||
Mimic3Settings,
|
||||
Mimic3TextToSpeechSystem,
|
||||
SSMLSpeaker,
|
||||
)
|
||||
from quart import (
|
||||
Quart,
|
||||
Response,
|
||||
|
|
@ -40,21 +45,19 @@ from quart import (
|
|||
send_from_directory,
|
||||
)
|
||||
|
||||
from ._resources import _PACKAGE, __version__, _DIR
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
_MISSING = object()
|
||||
_TEMP_DIR: typing.Optional[Path] = None
|
||||
|
||||
_PACKAGE = "mimic3_http"
|
||||
_DIR = Path(__file__).parent
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
parser = argparse.ArgumentParser(prog=_PACKAGE)
|
||||
parser.add_argument(
|
||||
"--voices-dir",
|
||||
action="append",
|
||||
help="Directory with <language>/<voice> structure",
|
||||
"--voices-dir", action="append", help="Directory with <language>/<voice> structure",
|
||||
)
|
||||
parser.add_argument("--voice", help="Default voice (name of model directory)")
|
||||
parser.add_argument(
|
||||
|
|
@ -65,9 +68,7 @@ parser.add_argument(
|
|||
)
|
||||
parser.add_argument("--speaker", type=int, help="Default speaker to use (name or id)")
|
||||
parser.add_argument(
|
||||
"--noise-scale",
|
||||
type=float,
|
||||
help="Noise scale [0-1], default is 0.667",
|
||||
"--noise-scale", type=float, help="Noise scale [0-1], default is 0.667",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--length-scale",
|
||||
|
|
@ -75,9 +76,7 @@ parser.add_argument(
|
|||
help="Length scale (1.0 is default speed, 0.5 is 2x faster)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--noise-w",
|
||||
type=float,
|
||||
help="Variation in cadence [0-1], default is 0.8",
|
||||
"--noise-w", type=float, help="Variation in cadence [0-1], default is 0.8",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cache-dir",
|
||||
|
|
@ -182,21 +181,32 @@ def text_to_wav(params: TextToWavParams, no_cache: bool = False) -> bytes:
|
|||
wav_params_set = False
|
||||
|
||||
with wav_file:
|
||||
# TODO: SSML
|
||||
mimic3.begin_utterance()
|
||||
mimic3.speak_text(params.text, text_language=params.text_language)
|
||||
results = mimic3.end_utterance()
|
||||
try:
|
||||
if params.ssml:
|
||||
results = SSMLSpeaker(mimic3).speak(params.text)
|
||||
else:
|
||||
mimic3.begin_utterance()
|
||||
mimic3.speak_text(params.text, text_language=params.text_language)
|
||||
results = mimic3.end_utterance()
|
||||
|
||||
for result in results:
|
||||
# TODO: Marks
|
||||
if isinstance(result, AudioResult):
|
||||
if not wav_params_set:
|
||||
wav_file.setframerate(result.sample_rate_hz)
|
||||
wav_file.setsampwidth(result.sample_width_bytes)
|
||||
wav_file.setnchannels(result.num_channels)
|
||||
wav_params_set = True
|
||||
for result in results:
|
||||
# TODO: Marks
|
||||
if isinstance(result, AudioResult):
|
||||
if not wav_params_set:
|
||||
wav_file.setframerate(result.sample_rate_hz)
|
||||
wav_file.setsampwidth(result.sample_width_bytes)
|
||||
wav_file.setnchannels(result.num_channels)
|
||||
wav_params_set = True
|
||||
|
||||
wav_file.writeframes(result.audio_bytes)
|
||||
wav_file.writeframes(result.audio_bytes)
|
||||
except Exception as e:
|
||||
if not wav_params_set:
|
||||
# Set default parameters so exception can propagate
|
||||
wav_file.setframerate(22050)
|
||||
wav_file.setsampwidth(2)
|
||||
wav_file.setnchannels(1)
|
||||
|
||||
raise e
|
||||
|
||||
wav_bytes = wav_io.getvalue()
|
||||
|
||||
|
|
@ -305,7 +315,9 @@ async def app_tts() -> Response:
|
|||
|
||||
@app.route("/api/voices", methods=["GET"])
|
||||
async def api_voices():
|
||||
return jsonify([dataclasses.asdict(v) for v in mimic3.get_voices()])
|
||||
voices_dict = {v.key: v for v in mimic3.get_voices()}
|
||||
voices = sorted(voices_dict.values(), key=lambda v: v.key)
|
||||
return jsonify([dataclasses.asdict(v) for v in voices])
|
||||
|
||||
|
||||
@app.route("/process", methods=["GET", "POST"])
|
||||
|
|
|
|||
0
mimic3-http/mimic3_http/py.typed
Normal file
0
mimic3-http/mimic3_http/py.typed
Normal file
|
|
@ -66,6 +66,10 @@
|
|||
<select id="speaker-list" name="speaker">
|
||||
</select>
|
||||
</div>
|
||||
<div class="col-auto">
|
||||
<input type="checkbox" id="ssml">
|
||||
<label class="ml-1" for="ssml">SSML</label>
|
||||
</div>
|
||||
</div>
|
||||
<div id="audio-message" class="row mt-3" hidden>
|
||||
<div class="col">
|
||||
|
|
@ -131,6 +135,7 @@
|
|||
}
|
||||
|
||||
var textLanguage = q('#text-language').value || ''
|
||||
var ssml = q('#ssml').value || 'false'
|
||||
|
||||
q('#audio-message').hidden = false
|
||||
|
||||
|
|
@ -142,6 +147,7 @@
|
|||
'&noiseScale=' + encodeURIComponent(noiseScale) +
|
||||
'&noiseW=' + encodeURIComponent(noiseW) +
|
||||
'&lengthScale=' + encodeURIComponent(lengthScale) +
|
||||
'&ssml=' + encodeURIComponent(ssml) +
|
||||
'&textLanguage=' + encodeURIComponent(textLanguage),
|
||||
{cache: 'no-cache'})
|
||||
|
||||
|
|
|
|||
|
|
@ -87,7 +87,11 @@ class SSMLSpeaker:
|
|||
if isinstance(ssml, etree.Element):
|
||||
root_element = ssml
|
||||
else:
|
||||
root_element = etree.fromstring(ssml)
|
||||
try:
|
||||
root_element = etree.fromstring(ssml)
|
||||
except etree.ParseError:
|
||||
# Try again wrapped in <speak>
|
||||
root_element = etree.fromstring(f"<speak>{ssml}</speak>")
|
||||
|
||||
# Process sub-elements and text chunks
|
||||
for elem_or_text in text_and_elements(root_element):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue