Enable wildcards in voice preloading

This commit is contained in:
Michael Hansen 2022-05-09 17:31:05 -04:00
commit 1bec818bc7
4 changed files with 36 additions and 7 deletions

View file

@ -28,12 +28,10 @@ from urllib.error import HTTPError
from ._resources import _PACKAGE, _VOICES
from .const import DEFAULT_VOICES_DOWNLOAD_DIR, DEFAULT_VOICES_URL_FORMAT
from .utils import file_sha256_sum, wildcard_to_regex
from .utils import WILDCARD, file_sha256_sum, wildcard_to_regex
_LOGGER = logging.getLogger(__name__)
_WILDCARD = "*"
# -----------------------------------------------------------------------------
@ -193,7 +191,7 @@ def main(argv=None):
sys.exit(0)
args.key = [
wildcard_to_regex(key, wildcard=_WILDCARD) if _WILDCARD in key else key
wildcard_to_regex(key, wildcard=WILDCARD) if WILDCARD in key else key
for key in args.key
]

View file

@ -17,6 +17,7 @@
import audioop
import itertools
import logging
import re
import typing
from copy import deepcopy
from dataclasses import dataclass, field
@ -48,6 +49,7 @@ from .const import (
DEFAULT_VOLUME,
)
from .download import VoiceFile, download_voice
from .utils import WILDCARD, wildcard_to_regex
from .voice import SPEAKER_TYPE, BreakType, Mimic3Voice
_DIR = Path(__file__).parent
@ -282,8 +284,30 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem):
)
def preload_voice(self, voice_key: str):
"""Ensure voice is loaded in memory before synthesis"""
self._get_or_load_voice(voice_key)
"""Ensure voice(s) are loaded in memory before synthesis.
Voice key may contain wildcards (*).
"""
voice_keys = []
if WILDCARD in voice_key:
key_or_pattern = wildcard_to_regex(voice_key, wildcard=WILDCARD)
if isinstance(key_or_pattern, re.Pattern):
# Wildcards
for maybe_key in _VOICES.keys():
if key_or_pattern.match(maybe_key):
voice_keys.append(maybe_key)
_LOGGER.debug("%s matched %s", key_or_pattern, voice_keys)
else:
# Didn't contain wildcards
voice_keys.append(voice_key)
else:
# No wildcards
voice_keys.append(voice_key)
for key_to_load in voice_keys:
self._get_or_load_voice(key_to_load)
# -------------------------------------------------------------------------

View file

@ -21,6 +21,8 @@ import unicodedata
import numpy as np
WILDCARD = "*"
def audio_float_to_int16(
audio: np.ndarray, max_wav_value: float = 32767.0

View file

@ -180,7 +180,12 @@ class Mimic3Voice(metaclass=ABCMeta):
text_array = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
text_lengths_array = np.array([text_array.shape[1]], dtype=np.int64)
scales_array = np.array(
[noise_scale, length_scale, noise_w,], dtype=np.float32,
[
noise_scale,
length_scale,
noise_w,
],
dtype=np.float32,
)
inputs = {