From dd04ebd6f82f7f751e9808f37b6b8d5ca4296310 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Tue, 29 Mar 2022 16:18:50 -0400 Subject: [PATCH] Add ability to download voices --- Dockerfile | 2 +- mimic3-tts/download.sh | 20 + mimic3-tts/mimic3_tts/__main__.py | 30 +- mimic3-tts/mimic3_tts/_resources.py | 17 + mimic3-tts/mimic3_tts/const.py | 23 + mimic3-tts/mimic3_tts/download.py | 168 +++---- mimic3-tts/mimic3_tts/tts.py | 104 +++- mimic3-tts/mimic3_tts/voices.json | 705 ++++++++++++++++++++++++++++ 8 files changed, 950 insertions(+), 119 deletions(-) create mode 100755 mimic3-tts/download.sh create mode 100644 mimic3-tts/mimic3_tts/const.py create mode 100644 mimic3-tts/mimic3_tts/voices.json diff --git a/Dockerfile b/Dockerfile index 155540b..dd0fd99 100644 --- a/Dockerfile +++ b/Dockerfile @@ -67,7 +67,7 @@ RUN --mount=type=cache,id=apt-run,target=/var/cache/apt \ mkdir -p /var/cache/apt/${TARGETARCH}${TARGETVARIANT}/archives/partial && \ apt-get update && \ apt-get install --yes --no-install-recommends \ - python3 ca-certificates + python3 ca-certificates libespeak-ng1 RUN useradd -ms /bin/bash mimic3 diff --git a/mimic3-tts/download.sh b/mimic3-tts/download.sh new file mode 100755 index 0000000..94d5025 --- /dev/null +++ b/mimic3-tts/download.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -eo pipefail + +# Directory of *this* script +this_dir="$( cd "$( dirname "$0" )" && pwd )" + +# Kebab to snake case +module_name="$(basename "${this_dir}" | sed -e 's/-/_/g')" +src_dir="${this_dir}/${module_name}" + +# Path to virtual environment +: "${venv:=${this_dir}/.venv}" + +if [ -d "${venv}" ]; then + # Activate virtual environment if available + source "${venv}/bin/activate" +fi + +export PYTHONPATH="${this_dir}" +python3 -m "${module_name}.download" "$@" diff --git a/mimic3-tts/mimic3_tts/__main__.py b/mimic3-tts/mimic3_tts/__main__.py index f6a140c..0f12df8 100644 --- a/mimic3-tts/mimic3_tts/__main__.py +++ b/mimic3-tts/mimic3_tts/__main__.py @@ -126,6 +126,7 @@ def main(): def initialize_args(state: CommandLineInterfaceState): + """Initialze CLI state from command-line arguments""" import numpy as np args = state.args @@ -201,11 +202,14 @@ def initialize_args(state: CommandLineInterfaceState): def initialize_tts(state: CommandLineInterfaceState): + """Create Mimic 3 TTS from command-line arguments""" from mimic3_tts import Mimic3Settings, Mimic3TextToSpeechSystem # noqa: F811 args = state.args - state.tts = Mimic3TextToSpeechSystem(Mimic3Settings()) + state.tts = Mimic3TextToSpeechSystem( + Mimic3Settings(voices_directories=args.voices_dir, speaker=args.speaker) + ) if args.voices: # Don't bother with the rest of the initialization @@ -433,9 +437,6 @@ def print_voices(state: CommandLineInterfaceState): def get_args(): """Parse command-line arguments""" parser = argparse.ArgumentParser(prog=_PACKAGE) - # parser.add_argument( - # "--language", help="Gruut language for text input (en-us, etc.)" - # ) parser.add_argument( "text", nargs="*", help="Text to convert to speech (default: stdin)" ) @@ -450,10 +451,16 @@ def get_args(): "-v", help="Name of voice (expected in /)", ) - # parser.add_argument( - # "--voices-dir", - # help="Directory with voices (format is /)", - # ) + parser.add_argument( + "--speaker", + "-s", + help="Name or number of speaker (default: first speaker)", + ) + parser.add_argument( + "--voices-dir", + action="append", + help="Directory with voices (format is /)", + ) parser.add_argument("--voices", action="store_true", help="List available voices") parser.add_argument("--output-dir", help="Directory to write WAV file(s)") parser.add_argument( @@ -506,13 +513,6 @@ def get_args(): help="Process text only after encountering a blank line", ) parser.add_argument("--ssml", action="store_true", help="Input text is SSML") - # parser.add_argument( - # "--optimizations", - # choices=["auto", "on", "off"], - # default="auto", - # help="Enable/disable Onnx optimizations (auto=disable on armv7l)", - # ) - parser.add_argument( "--stdout", action="store_true", diff --git a/mimic3-tts/mimic3_tts/_resources.py b/mimic3-tts/mimic3_tts/_resources.py index 9ecccae..0eb73ef 100644 --- a/mimic3-tts/mimic3_tts/_resources.py +++ b/mimic3-tts/mimic3_tts/_resources.py @@ -14,6 +14,7 @@ # along with this program. If not, see . # """Shared access to package resources""" +import json import os import typing from pathlib import Path @@ -32,3 +33,19 @@ _PACKAGE = "mimic3_tts" _DIR = Path(typing.cast(os.PathLike, files(_PACKAGE))) __version__ = (_DIR / "VERSION").read_text(encoding="utf-8").strip() + +# Load voices.json +# { +# "/": { +# "files": { +# "relative/path": { +# "size_bytes": size in bytes, +# "sha256_sum": sha256 hash +# } +# }, +# "speakers": [], +# "properties": {} +# } +# } +with open(_DIR / "voices.json", "r", encoding="utf-8") as voices_file: + _VOICES = json.load(voices_file) diff --git a/mimic3-tts/mimic3_tts/const.py b/mimic3-tts/mimic3_tts/const.py new file mode 100644 index 0000000..bf395ef --- /dev/null +++ b/mimic3-tts/mimic3_tts/const.py @@ -0,0 +1,23 @@ +# Copyright 2022 Mycroft AI Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from pathlib import Path + +from xdgenvpy import XDG + +DEFAULT_VOICE = "en_US/vctk_low" +DEFAULT_LANGUAGE = "en_US" +DEFAULT_VOICES_URL_FORMAT = "https://github.com/MycroftAI/mimic3-voices/raw/master/{lang}/{name}" +DEFAULT_VOICES_DOWNLOAD_DIR = Path(XDG().XDG_DATA_HOME) / "mimic3" diff --git a/mimic3-tts/mimic3_tts/download.py b/mimic3-tts/mimic3_tts/download.py index f53c58d..bf05bff 100644 --- a/mimic3-tts/mimic3_tts/download.py +++ b/mimic3-tts/mimic3_tts/download.py @@ -16,17 +16,15 @@ import argparse import json import logging -import shutil import sys -import tempfile import typing import urllib.request +from dataclasses import dataclass from pathlib import Path from urllib.error import HTTPError -from xdgenvpy import XDG - -from ._resources import _DIR, _PACKAGE +from ._resources import _PACKAGE, _VOICES +from .const import DEFAULT_VOICES_DOWNLOAD_DIR, DEFAULT_VOICES_URL_FORMAT _LOGGER = logging.getLogger(__name__) @@ -37,69 +35,61 @@ class VoiceDownloadError(Exception): """Occurs when a voice fails to download""" -def download_voice(voices_dir: typing.Union[str, Path], link: str) -> Path: - """Download and extract a voice (or vocoder)""" +@dataclass +class VoiceFile: + """File associated with a voice to download""" + + relative_path: str + size_bytes: typing.Optional[int] = None + sha256_sum: typing.Optional[str] = None + + +def download_voice( + voice_key: str, + url_base: str, + voice_files: typing.Iterable[VoiceFile], + voices_dir: typing.Union[str, Path], + chunk_bytes: int = 4096, +): + """Downloads a voice to a directory""" from tqdm.auto import tqdm - voice_name = link.split("/")[-1] - voices_dir = Path(voices_dir) - voices_dir.mkdir(parents=True, exist_ok=True) + if url_base.endswith("/"): + # Remove final slash + url_base = url_base[:-1] - _LOGGER.debug("Downloading voice to %s from %s", voices_dir, link) + voice_dir = Path(voices_dir) / voice_key + voice_dir.mkdir(parents=True, exist_ok=True) - try: - with urllib.request.urlopen(link) as response: - with tempfile.NamedTemporaryFile(mode="wb+", suffix=".tar.gz") as temp_file: - with tqdm( - unit="B", - unit_scale=True, - unit_divisor=1024, - miniters=1, - desc=voice_name, - total=int(response.headers.get("content-length", 0)), - ) as pbar: - chunk = response.read(4096) - while chunk: - temp_file.write(chunk) - pbar.update(len(chunk)) - chunk = response.read(4096) + _LOGGER.debug("Downloading voice %s to %s", voice_key, voice_dir) - temp_file.seek(0) + for voice_file in voice_files: + file_url = f"{url_base}/{voice_file.relative_path}" + file_path = voice_dir / voice_file.relative_path - # Extract - with tempfile.TemporaryDirectory() as temp_dir_str: - temp_dir = Path(temp_dir_str) - _LOGGER.debug("Extracting %s to %s", temp_file.name, temp_dir_str) - shutil.unpack_archive(temp_file.name, temp_dir_str) + try: + with urllib.request.urlopen(file_url) as response: + with open(file_path, mode="wb") as dest_file: + with tqdm( + unit="B", + unit_scale=True, + unit_divisor=1024, + miniters=1, + desc=voice_file.relative_path, + total=int(response.headers.get("content-length", 0)), + ) as pbar: + chunk = response.read(chunk_bytes) + while chunk: + dest_file.write(chunk) + pbar.update(len(chunk)) + chunk = response.read(chunk_bytes) - # Expecting / - lang_dir = next(temp_dir.iterdir()) - assert lang_dir.is_dir() - - voice_dir = next(lang_dir.iterdir()) - assert voice_dir.is_dir() - - # Copy to destination - dest_lang_dir = voices_dir / lang_dir.name - dest_lang_dir.mkdir(parents=True, exist_ok=True) - - dest_voice_dir = voices_dir / lang_dir.name / voice_dir.name - if dest_voice_dir.is_dir(): - # Delete existing files - shutil.rmtree(str(dest_voice_dir)) - - # Move files - _LOGGER.debug("Moving %s to %s", voice_dir, dest_voice_dir) - shutil.move(str(voice_dir), str(dest_voice_dir)) - - _LOGGER.info("Installed %s to %s", link, dest_voice_dir) - - return dest_voice_dir - except HTTPError as e: - _LOGGER.exception("download_voice") - raise VoiceDownloadError( - f"Failed to download voice {voice_name} from {link}: {e}" - ) from e + _LOGGER.debug("Downloaded %s", file_path) + except HTTPError as e: + _LOGGER.exception("download_voice") + raise VoiceDownloadError( + f"Failed to download file for voice {voice_key} from {file_url}: {e}" + ) from e # ----------------------------------------------------------------------------- @@ -107,19 +97,21 @@ def download_voice(voices_dir: typing.Union[str, Path], link: str) -> Path: def main(): """Main entry point""" - default_voices_dir = Path(XDG().XDG_DATA_HOME) / "mimic3" - parser = argparse.ArgumentParser(prog=f"{_PACKAGE}.download") - parser.add_argument("--url", action="append", help="URL of voice to download") parser.add_argument( - "--name", - action="append", - help="Name of voice to download (e.g., en_US/vctk_low)", + "key", + nargs="*", + help="Keys of voices to download (e.g., en_US/vctk_low)", ) parser.add_argument( "--output-dir", - default=default_voices_dir, - help=f"Path to output directory (default: {default_voices_dir})", + default=DEFAULT_VOICES_DOWNLOAD_DIR, + help="Path to output directory", + ) + parser.add_argument( + "--url-format", + default=DEFAULT_VOICES_URL_FORMAT, + help="URL format string for voices (contains {key}, {lang}, {name})", ) parser.add_argument( "--debug", action="store_true", help="Print DEBUG messages to console" @@ -134,34 +126,28 @@ def main(): _LOGGER.debug(args) args.output_dir = Path(args.output_dir) - args.url = args.url or [] - args.name = args.name or [] + args.key = args.key or [] - with open(_DIR / "voices.json", "r", encoding="utf-8") as voices_file: - voices_by_name = json.load(voices_file) - - if (not args.url) and (not args.name): + if not args.key: # Print available voices and exit - json.dump(voices_by_name, sys.stdout, indent=4, ensure_ascii=False) + json.dump(_VOICES, sys.stdout, indent=4, ensure_ascii=False) sys.exit(0) - urls_to_download = args.url - - if args.name: - # Gather URLs for voices by name - - for voice_name in args.name: - voice_info = voices_by_name.get(voice_name) - if not voice_info: - _LOGGER.fatal("Voice not found: %s", voice_name) - sys.exit(1) - - urls_to_download.append(voice_info["url"]) - args.output_dir.mkdir(parents=True, exist_ok=True) - for url in urls_to_download: - download_voice(args.output_dir, url) + for voice_key in args.key: + voice_lang, voice_name = voice_key.split("/", maxsplit=1) + voice_info = _VOICES[voice_key] + voice_url = str.format( + args.url_format, key=voice_key, lang=voice_lang, name=voice_name + ) + voice_files = voice_info["files"] + download_voice( + voice_key=voice_key, + url_base=voice_url, + voice_files=[VoiceFile(file_key) for file_key in voice_files.keys()], + voices_dir=args.output_dir, + ) # ----------------------------------------------------------------------------- diff --git a/mimic3-tts/mimic3_tts/tts.py b/mimic3-tts/mimic3_tts/tts.py index d2162af..ed24b4e 100644 --- a/mimic3-tts/mimic3_tts/tts.py +++ b/mimic3-tts/mimic3_tts/tts.py @@ -35,7 +35,15 @@ from opentts_abc import ( ) from xdgenvpy import XDG +from ._resources import _VOICES from .config import TrainingConfig +from .const import ( + DEFAULT_LANGUAGE, + DEFAULT_VOICE, + DEFAULT_VOICES_DOWNLOAD_DIR, + DEFAULT_VOICES_URL_FORMAT, +) +from .download import VoiceFile, download_voice from .voice import SPEAKER_TYPE, Mimic3Voice _DIR = Path(__file__).parent @@ -44,9 +52,6 @@ _LOGGER = logging.getLogger(__name__) PHONEMES_LIST_TYPE = typing.List[typing.List[str]] -DEFAULT_VOICE = "en_US/vctk_low" -DEFAULT_LANGUAGE = "en_US" - # ----------------------------------------------------------------------------- @@ -64,6 +69,15 @@ class Mimic3Settings: voices_directories: typing.Optional[typing.Iterable[typing.Union[str, Path]]] = None """Directories to search for voices (/)""" + voices_url_format: str = DEFAULT_VOICES_URL_FORMAT + """URL format string for a voice directory. + + May contain: + * {key} - unique voice key + * {lang} - voice language + * {name} - voice name + """ + speaker: typing.Optional[SPEAKER_TYPE] = None """Default speaker name or id""" @@ -82,6 +96,12 @@ class Mimic3Settings: sample_rate: int = 22050 """Sample rate of silence from add_break() in Hertz""" + voices_download_dir: typing.Union[str, Path] = DEFAULT_VOICES_DOWNLOAD_DIR + """Directory to download voices to""" + + no_download: bool = False + """Do not download voices automatically""" + @dataclass class Mimic3Phonemes: @@ -125,8 +145,7 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem): - /usr/local/share/mimic3 - /usr/share/mimic3 """ - data_dirs = [Path(d) / "mimic3" for d in XDG().XDG_DATA_DIRS.split(":")] - return [_DIR.parent.parent / "voices"] + data_dirs + return [Path(d) / "mimic3" for d in XDG().XDG_DATA_DIRS.split(":")] def get_voices(self) -> typing.Iterable[Voice]: """Returns an iterable of all available voices""" @@ -137,29 +156,34 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem): if self.settings.voices_directories is not None: voices_dirs = itertools.chain(self.settings.voices_directories, voices_dirs) + known_voices = set(_VOICES.keys()) + # voices/// for voices_dir in voices_dirs: voices_dir = Path(voices_dir) - if not voices_dir.is_dir(): + if not voices_dir.is_dir() or voices_dir.name.startswith("."): _LOGGER.debug("Skipping voice directory %s", voices_dir) continue _LOGGER.debug("Searching %s for voices", voices_dir) for lang_dir in voices_dir.iterdir(): - if not lang_dir.is_dir(): + if not lang_dir.is_dir() or lang_dir.name.startswith("."): continue for voice_dir in lang_dir.iterdir(): - if not voice_dir.is_dir(): + if not voice_dir.is_dir() or voice_dir.name.startswith("."): + continue + + config_path = voice_dir / "config.json" + if not config_path.is_file(): continue _LOGGER.debug("Voice found in %s", voice_dir) voice_lang = lang_dir.name # Load config - config_path = voice_dir / "config.json" _LOGGER.debug("Loading config from %s", config_path) with open(config_path, "r", encoding="utf-8") as config_file: @@ -186,8 +210,10 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem): if line: speakers.append(line) + voice_key = f"{voice_lang}/{voice_name}" + yield Voice( - key=f"{voice_lang}/{voice_name}", + key=voice_key, name=voice_name, language=voice_lang, description="", @@ -196,6 +222,30 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem): properties=properties, ) + known_voices.discard(voice_key) + + # Yield voices that haven't yet been downloaded + for voice_key in known_voices: + voice_lang, voice_name = voice_key.split("/", maxsplit=1) + voice_info = _VOICES.get(voice_key, {}) + speakers = voice_info.get("speakers", []) + properties = voice_info.get("properties", {}) + + yield Voice( + key=voice_key, + name=voice_name, + language=voice_lang, + description="", + speakers=speakers, + location=str.format( + self.settings.voices_url_format, + lang=voice_lang, + name=voice_name, + key=voice_key, + ), + properties=properties, + ) + def preload_voice(self, voice_key: str): """Ensure voice is loaded in memory before synthesis""" self._get_or_load_voice(voice_key) @@ -381,8 +431,16 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem): model_dir: typing.Optional[Path] = None for maybe_voice in self.get_voices(): if maybe_voice.key.endswith(voice_key): - model_dir = Path(maybe_voice.location) - break + maybe_model_dir = Path(maybe_voice.location) + + if (not maybe_model_dir.is_dir()) and (not self.settings.no_download): + # Download voice + maybe_model_dir = self._download_voice(voice_key) + + if maybe_model_dir.is_dir(): + # Voice found + model_dir = maybe_model_dir + break if model_dir is None: raise VoiceNotFoundError(voice_key) @@ -407,3 +465,25 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem): self._loaded_voices[canonical_key] = voice return voice + + def _download_voice(self, voice_key: str) -> Path: + """Downloads a voice by key""" + voice_lang, voice_name = voice_key.split("/", maxsplit=1) + voice_info = _VOICES[voice_key] + voice_url = str.format( + self.settings.voices_url_format, + key=voice_key, + lang=voice_lang, + name=voice_name, + ) + voice_files = voice_info["files"] + download_voice( + voice_key=voice_key, + url_base=voice_url, + voice_files=[VoiceFile(file_key) for file_key in voice_files.keys()], + voices_dir=self.settings.voices_download_dir, + ) + + voice_dir = Path(self.settings.voices_download_dir) / voice_key + + return voice_dir diff --git a/mimic3-tts/mimic3_tts/voices.json b/mimic3-tts/mimic3_tts/voices.json new file mode 100644 index 0000000..455a215 --- /dev/null +++ b/mimic3-tts/mimic3_tts/voices.json @@ -0,0 +1,705 @@ +{ + "de_DE/thorsten_low": { + "files": { + "LICENSE": { + "size_bytes": 6557, + "sha256_sum": "434e11b12f4a3f3096032bc35c5189afe7827b726212b2406a28189598d9c4cf" + }, + "README.md": { + "size_bytes": 193, + "sha256_sum": "e341ebb38ee231c19fc6b132058398725060bc62871a3de39552dea92e2282b2" + }, + "SOURCE": { + "size_bytes": 61, + "sha256_sum": "5913b6f0cf4fc4d751aade453924bfa05413245075d3a294f6c70e6497e7e01c" + }, + "config.json": { + "size_bytes": 3736, + "sha256_sum": "27ad9d2e36d3beaf2fd797537edf0b2243b73795eb57742b2aa69525258dd088" + }, + "generator.onnx": { + "size_bytes": 62798359, + "sha256_sum": "166146bf2705b3c280d3ca6b29f1f3315fe474feb58b47db5152bf78a28af4d0" + }, + "phoneme_map.txt": { + "size_bytes": 15, + "sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d" + }, + "phonemes.txt": { + "size_bytes": 340, + "sha256_sum": "530fed94716cbb8ebe88700028257f2ce39566e6e37e62da3a9e9ce4fc8a90d5" + } + }, + "speakers": [], + "properties": {} + }, + "el_GR/rapunzelina_low": { + "files": { + "LICENSE": { + "size_bytes": 6384, + "sha256_sum": "e052310c1e6d75057abe231ba94b7f2eedee1aec4a0c5c658c8151f6f8c05fd8" + }, + "README.md": { + "size_bytes": 199, + "sha256_sum": "9a7979350469d0819cb7cdd293f63a99ed29643782c5e435e70f8599c02a565a" + }, + "SOURCE": { + "size_bytes": 69, + "sha256_sum": "c3d41e924e28a9a5d6384af1be84a140ff3ab957f338f56680a076fef07d12b3" + }, + "config.json": { + "size_bytes": 3397, + "sha256_sum": "5d4da9a6d55500c067a66b29d21aa14df4d6fe53e9e5ce5b3ee1b2d8ecbb98fc" + }, + "generator.onnx": { + "size_bytes": 62787607, + "sha256_sum": "f364132e32a8160b7a5945e7f52fd25fa4f8413c8826de07d6b21ec4222bd0d6" + }, + "phonemes.txt": { + "size_bytes": 215, + "sha256_sum": "0deecbaabd16fa94b58375c4bfb1ee66da6567cc56507d52b9c32d0d9553f642" + } + }, + "speakers": [], + "properties": {} + }, + "en_US/cmu-arctic_low": { + "files": { + "LICENSE": { + "size_bytes": 960, + "sha256_sum": "244ff21a910baf28bcb27b1975620a79d2be8611815ecc599f08eb06dd6f000e" + }, + "README.md": { + "size_bytes": 181, + "sha256_sum": "3d5ad2368b2e61a31679400322924eeb312c7b97e68a4fc127461bb6ef18bae5" + }, + "SOURCE": { + "size_bytes": 35, + "sha256_sum": "234919f888057ce202730f2ce9e87ab526c6db4b410047a3c9ca52b1cf51de2b" + }, + "config.json": { + "size_bytes": 3550, + "sha256_sum": "e98bf4210293be786fc219612f6a0ac1a67b40bb2f5fa5f7c7ddbd595638c193" + }, + "generator.onnx": { + "size_bytes": 76359777, + "sha256_sum": "366fd96a96c7ee81ce932973b9c457d13b99696c1a98eda117395e7c882695b0" + }, + "phoneme_map.txt": { + "size_bytes": 15, + "sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d" + }, + "phonemes.txt": { + "size_bytes": 263, + "sha256_sum": "8f9c3e6ced14d7fc5426e4e1bc7f7cc1037a20a645ca34110abcb76148fa8bfd" + }, + "speaker_map.csv": { + "size_bytes": 332, + "sha256_sum": "30409b44f0d4413ef99a146c86849844086cf7aa97c645660473dbe094ca2565" + }, + "speakers.txt": { + "size_bytes": 90, + "sha256_sum": "f8d46538e6058f2f7d58b0bfd996cfb1bd9a4e6c81a1b6764ff9bb49fd48cdf0" + } + }, + "speakers": [ + "awb", + "rms", + "slt", + "ksp", + "clb", + "aew", + "bdl", + "lnh", + "jmk", + "rxr", + "fem", + "ljm", + "slp", + "ahw", + "axb", + "aup", + "eey", + "gka" + ], + "properties": {} + }, + "en_US/ljspeech_low": { + "files": { + "LICENSE": { + "size_bytes": 42, + "sha256_sum": "2a380bafa00cc11ecae80f4a1c21f3873361bc9af1f23c8eecc255b143cdaf68" + }, + "README.md": { + "size_bytes": 183, + "sha256_sum": "43e5814f58fb743862bc7381d3a233b9060d766f8e5ef8336b3f5c4afc38e12e" + }, + "SOURCE": { + "size_bytes": 40, + "sha256_sum": "f72dc7596d10484aea8dbd1b907728ff332acf8899a38dbca468197a26c3c5d9" + }, + "config.json": { + "size_bytes": 3495, + "sha256_sum": "7f89388f366789ede1a32756d98b576a18e410f0f1a9af2ce64d0fbbcd0d971f" + }, + "generator.onnx": { + "size_bytes": 62792219, + "sha256_sum": "d178e03b43b41da49f337626a7024826e79fe7deb7db102a5deedb027f9caa37" + }, + "phoneme_map.txt": { + "size_bytes": 15, + "sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d" + }, + "phonemes.txt": { + "size_bytes": 263, + "sha256_sum": "8f9c3e6ced14d7fc5426e4e1bc7f7cc1037a20a645ca34110abcb76148fa8bfd" + } + }, + "speakers": [], + "properties": {} + }, + "en_US/vctk_low": { + "files": { + "LICENSE": { + "size_bytes": 17417, + "sha256_sum": "b351fdf5bbec1e011fd4c09ed1af05df6fd7de2e679fd7a92e6ec4398c38e3ff" + }, + "README.md": { + "size_bytes": 179, + "sha256_sum": "7e482c32766c0f0612ade79a7255b39da2852cba14d8ad170458fe8b0816e449" + }, + "SOURCE": { + "size_bytes": 45, + "sha256_sum": "fe147d22acd80ce096d7c3069bb66ece887db8b72fb5f38ac6017f7aa98a9698" + }, + "config.json": { + "size_bytes": 3555, + "sha256_sum": "ab38b8df74db751dc89d43c17f238ee7a5e56d8e26f59673e272ea4802d275a7" + }, + "generator.onnx": { + "size_bytes": 76546145, + "sha256_sum": "c958303de83a59fac937a91009c9081b5f2f7369890b9969e05141e56e867d2b" + }, + "phoneme_map.txt": { + "size_bytes": 15, + "sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d" + }, + "phonemes.txt": { + "size_bytes": 263, + "sha256_sum": "8f9c3e6ced14d7fc5426e4e1bc7f7cc1037a20a645ca34110abcb76148fa8bfd" + }, + "speaker_map.csv": { + "size_bytes": 1523, + "sha256_sum": "8ecc8b46e35edcb4664bc5804e77b807bf66fa155a9871f8e51b56f1c63d380b" + }, + "speakers.txt": { + "size_bytes": 652, + "sha256_sum": "c26aab76774111665e6ce4092b9ae40e18ca2dc048a300325f03f674c398f547" + } + }, + "speakers": [ + "p239", + "p236", + "p264", + "p250", + "p259", + "p247", + "p261", + "p263", + "p283", + "p274", + "p286", + "p276", + "p270", + "p281", + "p277", + "p231", + "p238", + "p271", + "p257", + "p273", + "p284", + "p329", + "p361", + "p287", + "p360", + "p374", + "p376", + "p310", + "p304", + "p340", + "p347", + "p330", + "p308", + "p314", + "p317", + "p339", + "p311", + "p294", + "p305", + "p266", + "p335", + "p334", + "p318", + "p323", + "p351", + "p333", + "p313", + "p316", + "p244", + "p307", + "p363", + "p336", + "p312", + "p267", + "p297", + "p275", + "p295", + "p288", + "p258", + "p301", + "p232", + "p292", + "p272", + "p278", + "p280", + "p341", + "p268", + "p298", + "p299", + "p279", + "p285", + "p326", + "p300", + "s5", + "p230", + "p254", + "p269", + "p293", + "p252", + "p345", + "p262", + "p243", + "p227", + "p343", + "p255", + "p229", + "p240", + "p248", + "p253", + "p233", + "p228", + "p251", + "p282", + "p246", + "p234", + "p226", + "p260", + "p245", + "p241", + "p303", + "p265", + "p306", + "p237", + "p249", + "p256", + "p302", + "p364", + "p225", + "p362" + ], + "properties": {} + }, + "es_ES/carlfm_low": { + "files": { + "LICENSE": { + "size_bytes": 14, + "sha256_sum": "f5b244982699ca9fe5cc8fa8a7c08cf5dee5d3a0c8896892899e5df13316e1b7" + }, + "README.md": { + "size_bytes": 192, + "sha256_sum": "2140442eaefadcc0162caae3db531fcd6a8070068087499101ec7f7d49f236bb" + }, + "SOURCE": { + "size_bytes": 47, + "sha256_sum": "afb36ff925af99bf47b97ff5f753a4dc1402e4a3f3e491a7898ad38791b7920c" + }, + "config.json": { + "size_bytes": 3401, + "sha256_sum": "e4da85d44a84c729310d8bffe81a4452bd86b7a1d3874ce3243f0e1d494e704f" + }, + "generator.onnx": { + "size_bytes": 62786839, + "sha256_sum": "8cfd9a91a68b5c62e52a7483c61e1f5e8e09c0e21c0ac77a991f1ab123d0e260" + }, + "phoneme_map.txt": { + "size_bytes": 15, + "sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d" + }, + "phonemes.txt": { + "size_bytes": 217, + "sha256_sum": "dd24b95ded6ff32b410390bb4aa56bcd96042762add3c166588136096c4890e0" + } + }, + "speakers": [], + "properties": {} + }, + "fi_FI/harri-tapani-ylilammi_low": { + "files": { + "LICENSE": { + "size_bytes": 6384, + "sha256_sum": "e052310c1e6d75057abe231ba94b7f2eedee1aec4a0c5c658c8151f6f8c05fd8" + }, + "README.md": { + "size_bytes": 215, + "sha256_sum": "061335c2aa8f9f2126a80ecd8f6635e85e8ace7a9a37950f9b808420f8233345" + }, + "SOURCE": { + "size_bytes": 71, + "sha256_sum": "f5d064abd622989907fbf4116caa1d9914c30dc11b2fd83447d2ba9da8cec1f7" + }, + "config.json": { + "size_bytes": 3399, + "sha256_sum": "2c7ddf22d83670542cf21b17b8177f53cdcf4e57d581a7fc92ff3c266985c826" + }, + "generator.onnx": { + "size_bytes": 62782999, + "sha256_sum": "31ab7f2200e3246d50eebb8eefc108f08a709d63b89122a25ae443d9c1d9c82b" + }, + "phonemes.txt": { + "size_bytes": 179, + "sha256_sum": "986029f7b8967e438c3ff901d328bf2fc9d3f4164f8b72def149208082513b26" + } + }, + "speakers": [], + "properties": {} + }, + "fr_FR/siwis_low": { + "files": { + "LICENSE": { + "size_bytes": 17416, + "sha256_sum": "b34e17103bfb246f2549fc82a279e6ba28834e0cb42f76a92efc14b72e3a3723" + }, + "README.md": { + "size_bytes": 174, + "sha256_sum": "00ea65658e20fd3301f95f0909eb46c8c54ba377bb6b00032e318ede7c543dca" + }, + "SOURCE": { + "size_bytes": 48, + "sha256_sum": "e81bea943c3a359cef9dafdd66dd29ec6af41bed92dfc2de28879ffa44ba5c84" + }, + "config.json": { + "size_bytes": 3390, + "sha256_sum": "9545c9bdda9692175fae0658c754d2cd1cc786615a6ca673eae3cd6eb1b567ed" + }, + "generator.onnx": { + "size_bytes": 62788375, + "sha256_sum": "5154cc50d87fa6b15c6c5b0eb1597cba15162a8c143baed898ffb55240ba8a4c" + }, + "phoneme_map.txt": { + "size_bytes": 15, + "sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d" + }, + "phonemes.txt": { + "size_bytes": 232, + "sha256_sum": "711294d0b5a0ec08ec21ca8a75184e0fee3aba1e1adcf967fe5e1ef96f6c176e" + } + }, + "speakers": [], + "properties": {} + }, + "hu_HU/diana-majlinger_low": { + "files": { + "LICENSE": { + "size_bytes": 6384, + "sha256_sum": "e052310c1e6d75057abe231ba94b7f2eedee1aec4a0c5c658c8151f6f8c05fd8" + }, + "README.md": { + "size_bytes": 215, + "sha256_sum": "766b65cc2fd22f02cf3e220876b25dfc7354a25a055f43ec1b9efb4f6eab8726" + }, + "SOURCE": { + "size_bytes": 73, + "sha256_sum": "b8d62d6f483feb8fb6a6ccac36bdc4080a4771cbbf38ab3c4f6f9d0e93a977d6" + }, + "config.json": { + "size_bytes": 3396, + "sha256_sum": "f91d65016d5d2b8f3c1076552344121db024c3e49c11fcadb8dcdad65e9a192f" + }, + "generator.onnx": { + "size_bytes": 62786071, + "sha256_sum": "6feb6a2307f2342a4a9ad43d9ae06d0e8778a6f4486b43dc34d8c62bf523c53c" + }, + "phonemes.txt": { + "size_bytes": 202, + "sha256_sum": "51da9a9ac03277d0f057de50f92fe178ad4980a1e8493a6f0c65040bf660beb0" + } + }, + "speakers": [], + "properties": {} + }, + "it_IT/riccardo-fasol_low": { + "files": { + "LICENSE": { + "size_bytes": 1372, + "sha256_sum": "fdd78a909fb9384d869363522b967557bc9e28e5b65874921f24e48cbb82f38c" + }, + "README.md": { + "size_bytes": 201, + "sha256_sum": "9b7125e8de3ad26f4f81acf4cbfa8f27b7f991a91d6750a6653d117c9e25775b" + }, + "SOURCE": { + "size_bytes": 61, + "sha256_sum": "841520f6a8cc616e307a92552355691f8c3087fadda2e9b7a03a7863b2d0cf6a" + }, + "config.json": { + "size_bytes": 3417, + "sha256_sum": "e60ee585ce2c1709c47d8bf21af6ba7fcc75e50fd533ce479ba2b00408630f8f" + }, + "generator.onnx": { + "size_bytes": 62785303, + "sha256_sum": "b3e06e391e8b056460f64db6c5f3c3e4107a5a53257808fa88d6dcc43b11f3f6" + }, + "phoneme_map.txt": { + "size_bytes": 15, + "sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d" + }, + "phonemes.txt": { + "size_bytes": 210, + "sha256_sum": "282837161676bffa5b304cbb878eace1c8da670a46e08e8e800515f924ecfde3" + } + }, + "speakers": [], + "properties": {} + }, + "ko_KO/kss_low": { + "files": { + "LICENSE": { + "size_bytes": 6384, + "sha256_sum": "e052310c1e6d75057abe231ba94b7f2eedee1aec4a0c5c658c8151f6f8c05fd8" + }, + "README.md": { + "size_bytes": 191, + "sha256_sum": "91fe70211181048b0afba60aae1e7fd90661e90176590ffbcf4f868f3d6608d3" + }, + "SOURCE": { + "size_bytes": 70, + "sha256_sum": "0c424cc057609d0547bf29e94a9c6dbda619787fde8ff21cc7e404d1c62d562c" + }, + "config.json": { + "size_bytes": 3357, + "sha256_sum": "2433ba5cefa3dd957dc1276b7a501ab7e8f4a867b6ea8daa3543a5582560157c" + }, + "generator.onnx": { + "size_bytes": 62792983, + "sha256_sum": "9198b939b5b713c7b59e7ba28163ed2546dc49691fea82c6614dc0b8d5612c51" + }, + "phonemes.txt": { + "size_bytes": 256, + "sha256_sum": "d8d8f00e1b855c92cbc53f442166f2b4c20d898777f8d754a93eb074d0b218e0" + } + }, + "speakers": [], + "properties": {} + }, + "nl/rdh_low": { + "files": { + "LICENSE": { + "size_bytes": 7049, + "sha256_sum": "7179683e8000e6bdc9bbc60d85edf0a4ac8e76f951857f54fcb775d5886f1309" + }, + "README.md": { + "size_bytes": 167, + "sha256_sum": "4260521a828b09a25a29fb3ffa3ff57b816452e925b9feae528796ffb5d1f0f8" + }, + "SOURCE": { + "size_bytes": 37, + "sha256_sum": "e4874bd5a71c42ef3f963944571e107734928b9db960f17dfadb6e2afce2956b" + }, + "config.json": { + "size_bytes": 3378, + "sha256_sum": "1b9a50cd5e70e44c3aac6fa01bf4b1607627973814fc4ce7e0d4bf1166ce4305" + }, + "generator.onnx": { + "size_bytes": 62800663, + "sha256_sum": "2082891212f3f399097be4ea540ee397912238f777b1c2bced5986060700e268" + }, + "phoneme_map.txt": { + "size_bytes": 15, + "sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d" + }, + "phonemes.txt": { + "size_bytes": 336, + "sha256_sum": "355389fee04f97557232cdde7fb8d4cf03ae2aabd7b0b26ed5978ebbf6575dd4" + } + }, + "speakers": [], + "properties": {} + }, + "pt_BR/edresson_low": { + "files": { + "LICENSE": { + "size_bytes": 18652, + "sha256_sum": "cce5d01fa4a83b794271bd2c28cffdf99afd43c803e6ddefddae39b591ea7448" + }, + "SOURCE": { + "size_bytes": 50, + "sha256_sum": "1ba21abad312197fbe4c9c0d449e16bad57f4c2e3e8e37e31e2d50b413faab04" + }, + "config.json": { + "size_bytes": 3586, + "sha256_sum": "d19b81d56f90344e110426d5830e5b27a3af178bccd44dd6b072d811cdade750" + }, + "generator.onnx": { + "size_bytes": 62796055, + "sha256_sum": "142f4a8268549a8fa148066182e548335eb60826c751228f0c311e8d49d0d938" + }, + "phonemes.txt": { + "size_bytes": 282, + "sha256_sum": "270d2d069b677555c8d703afa3e3883e43e905e993ebb3e85f3481b60fe9f638" + } + }, + "speakers": [], + "properties": {} + }, + "ru_RU/multi_low": { + "files": { + "config.json": { + "size_bytes": 3923, + "sha256_sum": "314e0fdd09183942d2f7393d4b950a12823849c0f72d22e62dc9858a6b4886c6" + }, + "css10/LICENSE": { + "size_bytes": 6384, + "sha256_sum": "e052310c1e6d75057abe231ba94b7f2eedee1aec4a0c5c658c8151f6f8c05fd8" + }, + "css10/SOURCE": { + "size_bytes": 71, + "sha256_sum": "7edef4a18d5ea07a79f374a9ffdf7e5e5dfc347466feac994d87e9bda9be05ec" + }, + "generator.onnx": { + "size_bytes": 76335199, + "sha256_sum": "cb84b12479fc619943cb8fbb56827f7fd95f5ffcbebf2c220606b3a9750bf2ca" + }, + "phoneme_map.txt": { + "size_bytes": 15, + "sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d" + }, + "phonemes.txt": { + "size_bytes": 326, + "sha256_sum": "63b030e0fc9ebd79f93c82a906c34910a23a8de46e372b48bab1119fd28ec2fa" + }, + "speaker_map.csv": { + "size_bytes": 61, + "sha256_sum": "b6e1b09bfc4358b66e93dbb3f647f572341969fd66bb403fff519e5f540119a0" + }, + "speakers.txt": { + "size_bytes": 29, + "sha256_sum": "f985c0983cb587acb11e9f33538ae9827c9eae0138acc3dae1def1e7780b3211" + } + }, + "speakers": [ + "hajdurova", + "minaev", + "nikolaev" + ], + "properties": {} + }, + "sv_SE/talesyntese_low": { + "files": { + "LICENSE": { + "size_bytes": 51, + "sha256_sum": "bd1a963f2c77481f0a658b5fa7fe77c2515e73be3972f1e991741b72f6fd7d31" + }, + "README.md": { + "size_bytes": 203, + "sha256_sum": "f574e3807bec86b91caa0d70b1ac8c4ef85ecc297afb49b62642f9944554cbaa" + }, + "SOURCE": { + "size_bytes": 63, + "sha256_sum": "295e2c2e47edb2f156c10808efb9439714d227c5b45b60a4f8ec3adc33451a6b" + }, + "config.json": { + "size_bytes": 3376, + "sha256_sum": "8e5a29c1a0ae655c9d0d56df025f22286e81ce323d1b68d07977b90bf61ee33e" + }, + "generator.onnx": { + "size_bytes": 62802967, + "sha256_sum": "bd9a50a8b0d35116c0d543681c2384bb738087ea771f9abee805feb53aa5f708" + }, + "phoneme_map.txt": { + "size_bytes": 15, + "sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d" + }, + "phonemes.txt": { + "size_bytes": 360, + "sha256_sum": "b4d2422bcc2b2f3ea739ce3f59019e499b966a74836aa54f6300921c4fc7ae76" + } + }, + "speakers": [], + "properties": {} + }, + "sw/lanfrica_low": { + "files": { + "LICENSE": { + "size_bytes": 0, + "sha256_sum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + }, + "SOURCE": { + "size_bytes": 48, + "sha256_sum": "1553a74483d9094830d5d249ed6db286eb52f0e7057f0b903efd8c45656bcfeb" + }, + "config.json": { + "size_bytes": 3387, + "sha256_sum": "3b8f3876f998624fff4760015cbdf7cf7e8110eed9753e45a99479fdc8ba8817" + }, + "generator.onnx": { + "size_bytes": 62787607, + "sha256_sum": "b470bf4b042ea96d2272162e9efaa8bd48bae4bc771d4a9996631f645e740e80" + }, + "phoneme_map.txt": { + "size_bytes": 15, + "sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d" + }, + "phonemes.txt": { + "size_bytes": 245, + "sha256_sum": "4784d6c095a3937b09a6f1fa292df160409033ec1d763d90d9b95ac5a42bf42d" + } + }, + "speakers": [], + "properties": {} + }, + "uk_UK/m-ailabs_low": { + "files": { + "LICENSE": { + "size_bytes": 1372, + "sha256_sum": "fdd78a909fb9384d869363522b967557bc9e28e5b65874921f24e48cbb82f38c" + }, + "README.md": { + "size_bytes": 198, + "sha256_sum": "d399789ee16b4610af50b3316cb1a9281f37002728fc1185e0690840be2bd58b" + }, + "SOURCE": { + "size_bytes": 61, + "sha256_sum": "841520f6a8cc616e307a92552355691f8c3087fadda2e9b7a03a7863b2d0cf6a" + }, + "config.json": { + "size_bytes": 5197, + "sha256_sum": "bbd2c66d5920d9e54771d480a982e801cb23ad7de5848625d1ebc82c6b1c7752" + }, + "generator.onnx": { + "size_bytes": 76355935, + "sha256_sum": "ee409d02d0e02d3bf92c3ee1f7403328213dda7515c17246b2b9ca2f005c09d3" + }, + "phonemes.txt": { + "size_bytes": 426, + "sha256_sum": "9a4d708ae3ddffc67709c83c608ce6acdeff511ad288b2f037d41ea2ec3867ee" + }, + "speaker_map.csv": { + "size_bytes": 118, + "sha256_sum": "f74765e11fca2ac205b2acb1213bdaa3bd3f6c9235ebcab5479160bfef1b7aa0" + } + }, + "speakers": [], + "properties": {} + } +} \ No newline at end of file