Enable wildcards in voice preloading

2022-05-09 17:31:05 -04:00 · 2022-05-09 17:31:05 -04:00 · 1bec818bc7
commit 1bec818bc7
parent 229f8728aa
4 changed files with 36 additions and 7 deletions
--- a/mimic3_tts/download.py
+++ b/mimic3_tts/download.py
@ -28,12 +28,10 @@ from urllib.error import HTTPError

 from ._resources import _PACKAGE, _VOICES
 from .const import DEFAULT_VOICES_DOWNLOAD_DIR, DEFAULT_VOICES_URL_FORMAT
-from .utils import file_sha256_sum, wildcard_to_regex
+from .utils import WILDCARD, file_sha256_sum, wildcard_to_regex

 _LOGGER = logging.getLogger(__name__)

-_WILDCARD = "*"
-
 # -----------------------------------------------------------------------------


@ -193,7 +191,7 @@ def main(argv=None):
        sys.exit(0)

    args.key = [
-        wildcard_to_regex(key, wildcard=_WILDCARD) if _WILDCARD in key else key
+        wildcard_to_regex(key, wildcard=WILDCARD) if WILDCARD in key else key
        for key in args.key
    ]

--- a/mimic3_tts/tts.py
+++ b/mimic3_tts/tts.py
@ -17,6 +17,7 @@
 import audioop
 import itertools
 import logging
+import re
 import typing
 from copy import deepcopy
 from dataclasses import dataclass, field
@ -48,6 +49,7 @@ from .const import (
    DEFAULT_VOLUME,
 )
 from .download import VoiceFile, download_voice
+from .utils import WILDCARD, wildcard_to_regex
 from .voice import SPEAKER_TYPE, BreakType, Mimic3Voice

 _DIR = Path(__file__).parent
@ -282,8 +284,30 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem):
            )

    def preload_voice(self, voice_key: str):
-        """Ensure voice is loaded in memory before synthesis"""
-        self._get_or_load_voice(voice_key)
+        """Ensure voice(s) are loaded in memory before synthesis.
+
+        Voice key may contain wildcards (*).
+        """
+        voice_keys = []
+
+        if WILDCARD in voice_key:
+            key_or_pattern = wildcard_to_regex(voice_key, wildcard=WILDCARD)
+            if isinstance(key_or_pattern, re.Pattern):
+                # Wildcards
+                for maybe_key in _VOICES.keys():
+                    if key_or_pattern.match(maybe_key):
+                        voice_keys.append(maybe_key)
+
+                _LOGGER.debug("%s matched %s", key_or_pattern, voice_keys)
+            else:
+                # Didn't contain wildcards
+                voice_keys.append(voice_key)
+        else:
+            # No wildcards
+            voice_keys.append(voice_key)
+
+        for key_to_load in voice_keys:
+            self._get_or_load_voice(key_to_load)

    # -------------------------------------------------------------------------

--- a/mimic3_tts/utils.py
+++ b/mimic3_tts/utils.py
@ -21,6 +21,8 @@ import unicodedata

 import numpy as np

+WILDCARD = "*"
+

 def audio_float_to_int16(
    audio: np.ndarray, max_wav_value: float = 32767.0
--- a/mimic3_tts/voice.py
+++ b/mimic3_tts/voice.py
@ -180,7 +180,12 @@ class Mimic3Voice(metaclass=ABCMeta):
        text_array = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
        text_lengths_array = np.array([text_array.shape[1]], dtype=np.int64)
        scales_array = np.array(
-            [noise_scale, length_scale, noise_w,], dtype=np.float32,
+            [
+                noise_scale,
+                length_scale,
+                noise_w,
+            ],
+            dtype=np.float32,
        )

        inputs = {