Use inference settings from config

2022-03-21 16:52:32 -04:00 · 2022-03-21 16:52:32 -04:00 · 355a467c5e
commit 355a467c5e
parent d4ef4744c7
2 changed files with 27 additions and 4 deletions
--- a/mimic3-http/mimic3_http/templates/index.html
+++ b/mimic3-http/mimic3_http/templates/index.html
@ -76,9 +76,9 @@
            <div class="row mt-3">
                <div class="col-auto">
                    <label for="noise-scale" title="Voice volatility">Noise:</label>
-                    <input type="number" id="noise-scale" name="noiseScale" size="5" min="0" max="1" step="0.001" value="0.333">
+                    <input type="number" id="noise-scale" name="noiseScale" size="5" min="0" max="1" step="0.001" value="0.667">
                    <label for="noise-w" class="ml-2" title="Voice volatility 2">Noise W:</label>
-                    <input type="number" id="noise-w" name="noiseW" size="5" min="0" max="1" step="0.001" value="1.0">
+                    <input type="number" id="noise-w" name="noiseW" size="5" min="0" max="1" step="0.001" value="0.8">
                    <label for="length-scale" class="ml-2" title="Voice speed (< 1 is faster)">Length:</label>
                    <input type="number" id="length-scale" name="lengthScale" size="5" min="0" step="0.001" value="1">
                </div>
@ -120,8 +120,8 @@
             var voiceList = q('#voice-list')
             var voice = voiceList.options[voiceList.selectedIndex].value

-             var noiseScale = q('#noise-scale').value || '0.333'
-             var noiseW = q('#noise-w').value || '1.0'
+             var noiseScale = q('#noise-scale').value || '0.667'
+             var noiseW = q('#noise-w').value || '0.8'
             var lengthScale = q('#length-scale').value || '1.0'

             var speakerList = q('#speaker-list')
@ -191,6 +191,13 @@
                    'beforeend', '<option value="">default</option>'
                )
             }
+
+             // Update inference settings
+             if (voice.properties) {
+                 q('#length-scale').value = voice.properties.length_scale || 1.0
+                 q('#noise-scale').value = voice.properties.noise_scale || 0.667
+                 q('#noise-w').value = voice.properties.noise_w || 0.8
+             }
         }

         q('#voice-list').addEventListener('change', voiceChanged)
--- a/mimic3-tts/mimic3_tts/tts.py
+++ b/mimic3-tts/mimic3_tts/tts.py
@ -134,6 +134,21 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem):
                        continue

                    voice_lang = lang_dir.name
+
+                    # Load config
+                    config_path = voice_dir / "config.json"
+                    _LOGGER.debug("Loading config from %s", config_path)
+
+                    with open(config_path, "r", encoding="utf-8") as config_file:
+                        config = TrainingConfig.load(config_file)
+
+                    properties: typing.Dict[str, typing.Any] = {
+                        "length_scale": config.inference.length_scale,
+                        "noise_scale": config.inference.noise_scale,
+                        "noise_w": config.inference.noise_w,
+                    }
+
+                    # Load speaker names
                    voice_name = voice_dir.name
                    speakers: typing.Optional[typing.Sequence[str]] = None

@ -154,6 +169,7 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem):
                        language=voice_lang,
                        description="",
                        speakers=speakers,
+                        properties=properties,
                    )

    def begin_utterance(self):