From 355a467c5edde18d1d8b9c9fe81de502b600deef Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Mon, 21 Mar 2022 16:52:32 -0400 Subject: [PATCH] Use inference settings from config --- mimic3-http/mimic3_http/templates/index.html | 15 +++++++++++---- mimic3-tts/mimic3_tts/tts.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/mimic3-http/mimic3_http/templates/index.html b/mimic3-http/mimic3_http/templates/index.html index 503f4c2..0df266c 100644 --- a/mimic3-http/mimic3_http/templates/index.html +++ b/mimic3-http/mimic3_http/templates/index.html @@ -76,9 +76,9 @@
- + - +
@@ -120,8 +120,8 @@ var voiceList = q('#voice-list') var voice = voiceList.options[voiceList.selectedIndex].value - var noiseScale = q('#noise-scale').value || '0.333' - var noiseW = q('#noise-w').value || '1.0' + var noiseScale = q('#noise-scale').value || '0.667' + var noiseW = q('#noise-w').value || '0.8' var lengthScale = q('#length-scale').value || '1.0' var speakerList = q('#speaker-list') @@ -191,6 +191,13 @@ 'beforeend', '' ) } + + // Update inference settings + if (voice.properties) { + q('#length-scale').value = voice.properties.length_scale || 1.0 + q('#noise-scale').value = voice.properties.noise_scale || 0.667 + q('#noise-w').value = voice.properties.noise_w || 0.8 + } } q('#voice-list').addEventListener('change', voiceChanged) diff --git a/mimic3-tts/mimic3_tts/tts.py b/mimic3-tts/mimic3_tts/tts.py index 6b44c36..2fbf85f 100644 --- a/mimic3-tts/mimic3_tts/tts.py +++ b/mimic3-tts/mimic3_tts/tts.py @@ -134,6 +134,21 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem): continue voice_lang = lang_dir.name + + # Load config + config_path = voice_dir / "config.json" + _LOGGER.debug("Loading config from %s", config_path) + + with open(config_path, "r", encoding="utf-8") as config_file: + config = TrainingConfig.load(config_file) + + properties: typing.Dict[str, typing.Any] = { + "length_scale": config.inference.length_scale, + "noise_scale": config.inference.noise_scale, + "noise_w": config.inference.noise_w, + } + + # Load speaker names voice_name = voice_dir.name speakers: typing.Optional[typing.Sequence[str]] = None @@ -154,6 +169,7 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem): language=voice_lang, description="", speakers=speakers, + properties=properties, ) def begin_utterance(self):