From 1bec818bc738212efc4e053cdbdefd59cfba7e78 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Mon, 9 May 2022 17:31:05 -0400 Subject: [PATCH] Enable wildcards in voice preloading --- mimic3_tts/download.py | 6 ++---- mimic3_tts/tts.py | 28 ++++++++++++++++++++++++++-- mimic3_tts/utils.py | 2 ++ mimic3_tts/voice.py | 7 ++++++- 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/mimic3_tts/download.py b/mimic3_tts/download.py index 2ef9288..a92c75c 100644 --- a/mimic3_tts/download.py +++ b/mimic3_tts/download.py @@ -28,12 +28,10 @@ from urllib.error import HTTPError from ._resources import _PACKAGE, _VOICES from .const import DEFAULT_VOICES_DOWNLOAD_DIR, DEFAULT_VOICES_URL_FORMAT -from .utils import file_sha256_sum, wildcard_to_regex +from .utils import WILDCARD, file_sha256_sum, wildcard_to_regex _LOGGER = logging.getLogger(__name__) -_WILDCARD = "*" - # ----------------------------------------------------------------------------- @@ -193,7 +191,7 @@ def main(argv=None): sys.exit(0) args.key = [ - wildcard_to_regex(key, wildcard=_WILDCARD) if _WILDCARD in key else key + wildcard_to_regex(key, wildcard=WILDCARD) if WILDCARD in key else key for key in args.key ] diff --git a/mimic3_tts/tts.py b/mimic3_tts/tts.py index cfff1be..ed41248 100644 --- a/mimic3_tts/tts.py +++ b/mimic3_tts/tts.py @@ -17,6 +17,7 @@ import audioop import itertools import logging +import re import typing from copy import deepcopy from dataclasses import dataclass, field @@ -48,6 +49,7 @@ from .const import ( DEFAULT_VOLUME, ) from .download import VoiceFile, download_voice +from .utils import WILDCARD, wildcard_to_regex from .voice import SPEAKER_TYPE, BreakType, Mimic3Voice _DIR = Path(__file__).parent @@ -282,8 +284,30 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem): ) def preload_voice(self, voice_key: str): - """Ensure voice is loaded in memory before synthesis""" - self._get_or_load_voice(voice_key) + """Ensure voice(s) are loaded in memory before synthesis. + + Voice key may contain wildcards (*). + """ + voice_keys = [] + + if WILDCARD in voice_key: + key_or_pattern = wildcard_to_regex(voice_key, wildcard=WILDCARD) + if isinstance(key_or_pattern, re.Pattern): + # Wildcards + for maybe_key in _VOICES.keys(): + if key_or_pattern.match(maybe_key): + voice_keys.append(maybe_key) + + _LOGGER.debug("%s matched %s", key_or_pattern, voice_keys) + else: + # Didn't contain wildcards + voice_keys.append(voice_key) + else: + # No wildcards + voice_keys.append(voice_key) + + for key_to_load in voice_keys: + self._get_or_load_voice(key_to_load) # ------------------------------------------------------------------------- diff --git a/mimic3_tts/utils.py b/mimic3_tts/utils.py index e8d370c..47d8199 100644 --- a/mimic3_tts/utils.py +++ b/mimic3_tts/utils.py @@ -21,6 +21,8 @@ import unicodedata import numpy as np +WILDCARD = "*" + def audio_float_to_int16( audio: np.ndarray, max_wav_value: float = 32767.0 diff --git a/mimic3_tts/voice.py b/mimic3_tts/voice.py index c3575b4..f71596b 100644 --- a/mimic3_tts/voice.py +++ b/mimic3_tts/voice.py @@ -180,7 +180,12 @@ class Mimic3Voice(metaclass=ABCMeta): text_array = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0) text_lengths_array = np.array([text_array.shape[1]], dtype=np.int64) scales_array = np.array( - [noise_scale, length_scale, noise_w,], dtype=np.float32, + [ + noise_scale, + length_scale, + noise_w, + ], + dtype=np.float32, ) inputs = {