Use inference settings from config

This commit is contained in:
Michael Hansen 2022-03-21 16:52:32 -04:00
commit 355a467c5e
2 changed files with 27 additions and 4 deletions

View file

@ -76,9 +76,9 @@
<div class="row mt-3">
<div class="col-auto">
<label for="noise-scale" title="Voice volatility">Noise:</label>
<input type="number" id="noise-scale" name="noiseScale" size="5" min="0" max="1" step="0.001" value="0.333">
<input type="number" id="noise-scale" name="noiseScale" size="5" min="0" max="1" step="0.001" value="0.667">
<label for="noise-w" class="ml-2" title="Voice volatility 2">Noise W:</label>
<input type="number" id="noise-w" name="noiseW" size="5" min="0" max="1" step="0.001" value="1.0">
<input type="number" id="noise-w" name="noiseW" size="5" min="0" max="1" step="0.001" value="0.8">
<label for="length-scale" class="ml-2" title="Voice speed (< 1 is faster)">Length:</label>
<input type="number" id="length-scale" name="lengthScale" size="5" min="0" step="0.001" value="1">
</div>
@ -120,8 +120,8 @@
var voiceList = q('#voice-list')
var voice = voiceList.options[voiceList.selectedIndex].value
var noiseScale = q('#noise-scale').value || '0.333'
var noiseW = q('#noise-w').value || '1.0'
var noiseScale = q('#noise-scale').value || '0.667'
var noiseW = q('#noise-w').value || '0.8'
var lengthScale = q('#length-scale').value || '1.0'
var speakerList = q('#speaker-list')
@ -191,6 +191,13 @@
'beforeend', '<option value="">default</option>'
)
}
// Update inference settings
if (voice.properties) {
q('#length-scale').value = voice.properties.length_scale || 1.0
q('#noise-scale').value = voice.properties.noise_scale || 0.667
q('#noise-w').value = voice.properties.noise_w || 0.8
}
}
q('#voice-list').addEventListener('change', voiceChanged)

View file

@ -134,6 +134,21 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem):
continue
voice_lang = lang_dir.name
# Load config
config_path = voice_dir / "config.json"
_LOGGER.debug("Loading config from %s", config_path)
with open(config_path, "r", encoding="utf-8") as config_file:
config = TrainingConfig.load(config_file)
properties: typing.Dict[str, typing.Any] = {
"length_scale": config.inference.length_scale,
"noise_scale": config.inference.noise_scale,
"noise_w": config.inference.noise_w,
}
# Load speaker names
voice_name = voice_dir.name
speakers: typing.Optional[typing.Sequence[str]] = None
@ -154,6 +169,7 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem):
language=voice_lang,
description="",
speakers=speakers,
properties=properties,
)
def begin_utterance(self):