Add ability to download voices

This commit is contained in:
Michael Hansen 2022-03-29 16:18:50 -04:00
commit dd04ebd6f8
8 changed files with 950 additions and 119 deletions

View file

@ -67,7 +67,7 @@ RUN --mount=type=cache,id=apt-run,target=/var/cache/apt \
mkdir -p /var/cache/apt/${TARGETARCH}${TARGETVARIANT}/archives/partial && \
apt-get update && \
apt-get install --yes --no-install-recommends \
python3 ca-certificates
python3 ca-certificates libespeak-ng1
RUN useradd -ms /bin/bash mimic3

20
mimic3-tts/download.sh Executable file
View file

@ -0,0 +1,20 @@
#!/usr/bin/env bash
set -eo pipefail
# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"
# Kebab to snake case
module_name="$(basename "${this_dir}" | sed -e 's/-/_/g')"
src_dir="${this_dir}/${module_name}"
# Path to virtual environment
: "${venv:=${this_dir}/.venv}"
if [ -d "${venv}" ]; then
# Activate virtual environment if available
source "${venv}/bin/activate"
fi
export PYTHONPATH="${this_dir}"
python3 -m "${module_name}.download" "$@"

View file

@ -126,6 +126,7 @@ def main():
def initialize_args(state: CommandLineInterfaceState):
"""Initialze CLI state from command-line arguments"""
import numpy as np
args = state.args
@ -201,11 +202,14 @@ def initialize_args(state: CommandLineInterfaceState):
def initialize_tts(state: CommandLineInterfaceState):
"""Create Mimic 3 TTS from command-line arguments"""
from mimic3_tts import Mimic3Settings, Mimic3TextToSpeechSystem # noqa: F811
args = state.args
state.tts = Mimic3TextToSpeechSystem(Mimic3Settings())
state.tts = Mimic3TextToSpeechSystem(
Mimic3Settings(voices_directories=args.voices_dir, speaker=args.speaker)
)
if args.voices:
# Don't bother with the rest of the initialization
@ -433,9 +437,6 @@ def print_voices(state: CommandLineInterfaceState):
def get_args():
"""Parse command-line arguments"""
parser = argparse.ArgumentParser(prog=_PACKAGE)
# parser.add_argument(
# "--language", help="Gruut language for text input (en-us, etc.)"
# )
parser.add_argument(
"text", nargs="*", help="Text to convert to speech (default: stdin)"
)
@ -450,10 +451,16 @@ def get_args():
"-v",
help="Name of voice (expected in <voices-dir>/<language>)",
)
# parser.add_argument(
# "--voices-dir",
# help="Directory with voices (format is <language>/<name_model-type>)",
# )
parser.add_argument(
"--speaker",
"-s",
help="Name or number of speaker (default: first speaker)",
)
parser.add_argument(
"--voices-dir",
action="append",
help="Directory with voices (format is <language>/<voice_name>)",
)
parser.add_argument("--voices", action="store_true", help="List available voices")
parser.add_argument("--output-dir", help="Directory to write WAV file(s)")
parser.add_argument(
@ -506,13 +513,6 @@ def get_args():
help="Process text only after encountering a blank line",
)
parser.add_argument("--ssml", action="store_true", help="Input text is SSML")
# parser.add_argument(
# "--optimizations",
# choices=["auto", "on", "off"],
# default="auto",
# help="Enable/disable Onnx optimizations (auto=disable on armv7l)",
# )
parser.add_argument(
"--stdout",
action="store_true",

View file

@ -14,6 +14,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
"""Shared access to package resources"""
import json
import os
import typing
from pathlib import Path
@ -32,3 +33,19 @@ _PACKAGE = "mimic3_tts"
_DIR = Path(typing.cast(os.PathLike, files(_PACKAGE)))
__version__ = (_DIR / "VERSION").read_text(encoding="utf-8").strip()
# Load voices.json
# {
# "<lang>/<voice>": {
# "files": {
# "relative/path": {
# "size_bytes": size in bytes,
# "sha256_sum": sha256 hash
# }
# },
# "speakers": [],
# "properties": {}
# }
# }
with open(_DIR / "voices.json", "r", encoding="utf-8") as voices_file:
_VOICES = json.load(voices_file)

View file

@ -0,0 +1,23 @@
# Copyright 2022 Mycroft AI Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from pathlib import Path
from xdgenvpy import XDG
DEFAULT_VOICE = "en_US/vctk_low"
DEFAULT_LANGUAGE = "en_US"
DEFAULT_VOICES_URL_FORMAT = "https://github.com/MycroftAI/mimic3-voices/raw/master/{lang}/{name}"
DEFAULT_VOICES_DOWNLOAD_DIR = Path(XDG().XDG_DATA_HOME) / "mimic3"

View file

@ -16,17 +16,15 @@
import argparse
import json
import logging
import shutil
import sys
import tempfile
import typing
import urllib.request
from dataclasses import dataclass
from pathlib import Path
from urllib.error import HTTPError
from xdgenvpy import XDG
from ._resources import _DIR, _PACKAGE
from ._resources import _PACKAGE, _VOICES
from .const import DEFAULT_VOICES_DOWNLOAD_DIR, DEFAULT_VOICES_URL_FORMAT
_LOGGER = logging.getLogger(__name__)
@ -37,69 +35,61 @@ class VoiceDownloadError(Exception):
"""Occurs when a voice fails to download"""
def download_voice(voices_dir: typing.Union[str, Path], link: str) -> Path:
"""Download and extract a voice (or vocoder)"""
@dataclass
class VoiceFile:
"""File associated with a voice to download"""
relative_path: str
size_bytes: typing.Optional[int] = None
sha256_sum: typing.Optional[str] = None
def download_voice(
voice_key: str,
url_base: str,
voice_files: typing.Iterable[VoiceFile],
voices_dir: typing.Union[str, Path],
chunk_bytes: int = 4096,
):
"""Downloads a voice to a directory"""
from tqdm.auto import tqdm
voice_name = link.split("/")[-1]
voices_dir = Path(voices_dir)
voices_dir.mkdir(parents=True, exist_ok=True)
if url_base.endswith("/"):
# Remove final slash
url_base = url_base[:-1]
_LOGGER.debug("Downloading voice to %s from %s", voices_dir, link)
voice_dir = Path(voices_dir) / voice_key
voice_dir.mkdir(parents=True, exist_ok=True)
try:
with urllib.request.urlopen(link) as response:
with tempfile.NamedTemporaryFile(mode="wb+", suffix=".tar.gz") as temp_file:
with tqdm(
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
desc=voice_name,
total=int(response.headers.get("content-length", 0)),
) as pbar:
chunk = response.read(4096)
while chunk:
temp_file.write(chunk)
pbar.update(len(chunk))
chunk = response.read(4096)
_LOGGER.debug("Downloading voice %s to %s", voice_key, voice_dir)
temp_file.seek(0)
for voice_file in voice_files:
file_url = f"{url_base}/{voice_file.relative_path}"
file_path = voice_dir / voice_file.relative_path
# Extract
with tempfile.TemporaryDirectory() as temp_dir_str:
temp_dir = Path(temp_dir_str)
_LOGGER.debug("Extracting %s to %s", temp_file.name, temp_dir_str)
shutil.unpack_archive(temp_file.name, temp_dir_str)
try:
with urllib.request.urlopen(file_url) as response:
with open(file_path, mode="wb") as dest_file:
with tqdm(
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
desc=voice_file.relative_path,
total=int(response.headers.get("content-length", 0)),
) as pbar:
chunk = response.read(chunk_bytes)
while chunk:
dest_file.write(chunk)
pbar.update(len(chunk))
chunk = response.read(chunk_bytes)
# Expecting <language>/<voice_name>
lang_dir = next(temp_dir.iterdir())
assert lang_dir.is_dir()
voice_dir = next(lang_dir.iterdir())
assert voice_dir.is_dir()
# Copy to destination
dest_lang_dir = voices_dir / lang_dir.name
dest_lang_dir.mkdir(parents=True, exist_ok=True)
dest_voice_dir = voices_dir / lang_dir.name / voice_dir.name
if dest_voice_dir.is_dir():
# Delete existing files
shutil.rmtree(str(dest_voice_dir))
# Move files
_LOGGER.debug("Moving %s to %s", voice_dir, dest_voice_dir)
shutil.move(str(voice_dir), str(dest_voice_dir))
_LOGGER.info("Installed %s to %s", link, dest_voice_dir)
return dest_voice_dir
except HTTPError as e:
_LOGGER.exception("download_voice")
raise VoiceDownloadError(
f"Failed to download voice {voice_name} from {link}: {e}"
) from e
_LOGGER.debug("Downloaded %s", file_path)
except HTTPError as e:
_LOGGER.exception("download_voice")
raise VoiceDownloadError(
f"Failed to download file for voice {voice_key} from {file_url}: {e}"
) from e
# -----------------------------------------------------------------------------
@ -107,19 +97,21 @@ def download_voice(voices_dir: typing.Union[str, Path], link: str) -> Path:
def main():
"""Main entry point"""
default_voices_dir = Path(XDG().XDG_DATA_HOME) / "mimic3"
parser = argparse.ArgumentParser(prog=f"{_PACKAGE}.download")
parser.add_argument("--url", action="append", help="URL of voice to download")
parser.add_argument(
"--name",
action="append",
help="Name of voice to download (e.g., en_US/vctk_low)",
"key",
nargs="*",
help="Keys of voices to download (e.g., en_US/vctk_low)",
)
parser.add_argument(
"--output-dir",
default=default_voices_dir,
help=f"Path to output directory (default: {default_voices_dir})",
default=DEFAULT_VOICES_DOWNLOAD_DIR,
help="Path to output directory",
)
parser.add_argument(
"--url-format",
default=DEFAULT_VOICES_URL_FORMAT,
help="URL format string for voices (contains {key}, {lang}, {name})",
)
parser.add_argument(
"--debug", action="store_true", help="Print DEBUG messages to console"
@ -134,34 +126,28 @@ def main():
_LOGGER.debug(args)
args.output_dir = Path(args.output_dir)
args.url = args.url or []
args.name = args.name or []
args.key = args.key or []
with open(_DIR / "voices.json", "r", encoding="utf-8") as voices_file:
voices_by_name = json.load(voices_file)
if (not args.url) and (not args.name):
if not args.key:
# Print available voices and exit
json.dump(voices_by_name, sys.stdout, indent=4, ensure_ascii=False)
json.dump(_VOICES, sys.stdout, indent=4, ensure_ascii=False)
sys.exit(0)
urls_to_download = args.url
if args.name:
# Gather URLs for voices by name
for voice_name in args.name:
voice_info = voices_by_name.get(voice_name)
if not voice_info:
_LOGGER.fatal("Voice not found: %s", voice_name)
sys.exit(1)
urls_to_download.append(voice_info["url"])
args.output_dir.mkdir(parents=True, exist_ok=True)
for url in urls_to_download:
download_voice(args.output_dir, url)
for voice_key in args.key:
voice_lang, voice_name = voice_key.split("/", maxsplit=1)
voice_info = _VOICES[voice_key]
voice_url = str.format(
args.url_format, key=voice_key, lang=voice_lang, name=voice_name
)
voice_files = voice_info["files"]
download_voice(
voice_key=voice_key,
url_base=voice_url,
voice_files=[VoiceFile(file_key) for file_key in voice_files.keys()],
voices_dir=args.output_dir,
)
# -----------------------------------------------------------------------------

View file

@ -35,7 +35,15 @@ from opentts_abc import (
)
from xdgenvpy import XDG
from ._resources import _VOICES
from .config import TrainingConfig
from .const import (
DEFAULT_LANGUAGE,
DEFAULT_VOICE,
DEFAULT_VOICES_DOWNLOAD_DIR,
DEFAULT_VOICES_URL_FORMAT,
)
from .download import VoiceFile, download_voice
from .voice import SPEAKER_TYPE, Mimic3Voice
_DIR = Path(__file__).parent
@ -44,9 +52,6 @@ _LOGGER = logging.getLogger(__name__)
PHONEMES_LIST_TYPE = typing.List[typing.List[str]]
DEFAULT_VOICE = "en_US/vctk_low"
DEFAULT_LANGUAGE = "en_US"
# -----------------------------------------------------------------------------
@ -64,6 +69,15 @@ class Mimic3Settings:
voices_directories: typing.Optional[typing.Iterable[typing.Union[str, Path]]] = None
"""Directories to search for voices (<lang>/<voice>)"""
voices_url_format: str = DEFAULT_VOICES_URL_FORMAT
"""URL format string for a voice directory.
May contain:
* {key} - unique voice key
* {lang} - voice language
* {name} - voice name
"""
speaker: typing.Optional[SPEAKER_TYPE] = None
"""Default speaker name or id"""
@ -82,6 +96,12 @@ class Mimic3Settings:
sample_rate: int = 22050
"""Sample rate of silence from add_break() in Hertz"""
voices_download_dir: typing.Union[str, Path] = DEFAULT_VOICES_DOWNLOAD_DIR
"""Directory to download voices to"""
no_download: bool = False
"""Do not download voices automatically"""
@dataclass
class Mimic3Phonemes:
@ -125,8 +145,7 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem):
- /usr/local/share/mimic3
- /usr/share/mimic3
"""
data_dirs = [Path(d) / "mimic3" for d in XDG().XDG_DATA_DIRS.split(":")]
return [_DIR.parent.parent / "voices"] + data_dirs
return [Path(d) / "mimic3" for d in XDG().XDG_DATA_DIRS.split(":")]
def get_voices(self) -> typing.Iterable[Voice]:
"""Returns an iterable of all available voices"""
@ -137,29 +156,34 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem):
if self.settings.voices_directories is not None:
voices_dirs = itertools.chain(self.settings.voices_directories, voices_dirs)
known_voices = set(_VOICES.keys())
# voices/<language>/<voice>/
for voices_dir in voices_dirs:
voices_dir = Path(voices_dir)
if not voices_dir.is_dir():
if not voices_dir.is_dir() or voices_dir.name.startswith("."):
_LOGGER.debug("Skipping voice directory %s", voices_dir)
continue
_LOGGER.debug("Searching %s for voices", voices_dir)
for lang_dir in voices_dir.iterdir():
if not lang_dir.is_dir():
if not lang_dir.is_dir() or lang_dir.name.startswith("."):
continue
for voice_dir in lang_dir.iterdir():
if not voice_dir.is_dir():
if not voice_dir.is_dir() or voice_dir.name.startswith("."):
continue
config_path = voice_dir / "config.json"
if not config_path.is_file():
continue
_LOGGER.debug("Voice found in %s", voice_dir)
voice_lang = lang_dir.name
# Load config
config_path = voice_dir / "config.json"
_LOGGER.debug("Loading config from %s", config_path)
with open(config_path, "r", encoding="utf-8") as config_file:
@ -186,8 +210,10 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem):
if line:
speakers.append(line)
voice_key = f"{voice_lang}/{voice_name}"
yield Voice(
key=f"{voice_lang}/{voice_name}",
key=voice_key,
name=voice_name,
language=voice_lang,
description="",
@ -196,6 +222,30 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem):
properties=properties,
)
known_voices.discard(voice_key)
# Yield voices that haven't yet been downloaded
for voice_key in known_voices:
voice_lang, voice_name = voice_key.split("/", maxsplit=1)
voice_info = _VOICES.get(voice_key, {})
speakers = voice_info.get("speakers", [])
properties = voice_info.get("properties", {})
yield Voice(
key=voice_key,
name=voice_name,
language=voice_lang,
description="",
speakers=speakers,
location=str.format(
self.settings.voices_url_format,
lang=voice_lang,
name=voice_name,
key=voice_key,
),
properties=properties,
)
def preload_voice(self, voice_key: str):
"""Ensure voice is loaded in memory before synthesis"""
self._get_or_load_voice(voice_key)
@ -381,8 +431,16 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem):
model_dir: typing.Optional[Path] = None
for maybe_voice in self.get_voices():
if maybe_voice.key.endswith(voice_key):
model_dir = Path(maybe_voice.location)
break
maybe_model_dir = Path(maybe_voice.location)
if (not maybe_model_dir.is_dir()) and (not self.settings.no_download):
# Download voice
maybe_model_dir = self._download_voice(voice_key)
if maybe_model_dir.is_dir():
# Voice found
model_dir = maybe_model_dir
break
if model_dir is None:
raise VoiceNotFoundError(voice_key)
@ -407,3 +465,25 @@ class Mimic3TextToSpeechSystem(TextToSpeechSystem):
self._loaded_voices[canonical_key] = voice
return voice
def _download_voice(self, voice_key: str) -> Path:
"""Downloads a voice by key"""
voice_lang, voice_name = voice_key.split("/", maxsplit=1)
voice_info = _VOICES[voice_key]
voice_url = str.format(
self.settings.voices_url_format,
key=voice_key,
lang=voice_lang,
name=voice_name,
)
voice_files = voice_info["files"]
download_voice(
voice_key=voice_key,
url_base=voice_url,
voice_files=[VoiceFile(file_key) for file_key in voice_files.keys()],
voices_dir=self.settings.voices_download_dir,
)
voice_dir = Path(self.settings.voices_download_dir) / voice_key
return voice_dir

View file

@ -0,0 +1,705 @@
{
"de_DE/thorsten_low": {
"files": {
"LICENSE": {
"size_bytes": 6557,
"sha256_sum": "434e11b12f4a3f3096032bc35c5189afe7827b726212b2406a28189598d9c4cf"
},
"README.md": {
"size_bytes": 193,
"sha256_sum": "e341ebb38ee231c19fc6b132058398725060bc62871a3de39552dea92e2282b2"
},
"SOURCE": {
"size_bytes": 61,
"sha256_sum": "5913b6f0cf4fc4d751aade453924bfa05413245075d3a294f6c70e6497e7e01c"
},
"config.json": {
"size_bytes": 3736,
"sha256_sum": "27ad9d2e36d3beaf2fd797537edf0b2243b73795eb57742b2aa69525258dd088"
},
"generator.onnx": {
"size_bytes": 62798359,
"sha256_sum": "166146bf2705b3c280d3ca6b29f1f3315fe474feb58b47db5152bf78a28af4d0"
},
"phoneme_map.txt": {
"size_bytes": 15,
"sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d"
},
"phonemes.txt": {
"size_bytes": 340,
"sha256_sum": "530fed94716cbb8ebe88700028257f2ce39566e6e37e62da3a9e9ce4fc8a90d5"
}
},
"speakers": [],
"properties": {}
},
"el_GR/rapunzelina_low": {
"files": {
"LICENSE": {
"size_bytes": 6384,
"sha256_sum": "e052310c1e6d75057abe231ba94b7f2eedee1aec4a0c5c658c8151f6f8c05fd8"
},
"README.md": {
"size_bytes": 199,
"sha256_sum": "9a7979350469d0819cb7cdd293f63a99ed29643782c5e435e70f8599c02a565a"
},
"SOURCE": {
"size_bytes": 69,
"sha256_sum": "c3d41e924e28a9a5d6384af1be84a140ff3ab957f338f56680a076fef07d12b3"
},
"config.json": {
"size_bytes": 3397,
"sha256_sum": "5d4da9a6d55500c067a66b29d21aa14df4d6fe53e9e5ce5b3ee1b2d8ecbb98fc"
},
"generator.onnx": {
"size_bytes": 62787607,
"sha256_sum": "f364132e32a8160b7a5945e7f52fd25fa4f8413c8826de07d6b21ec4222bd0d6"
},
"phonemes.txt": {
"size_bytes": 215,
"sha256_sum": "0deecbaabd16fa94b58375c4bfb1ee66da6567cc56507d52b9c32d0d9553f642"
}
},
"speakers": [],
"properties": {}
},
"en_US/cmu-arctic_low": {
"files": {
"LICENSE": {
"size_bytes": 960,
"sha256_sum": "244ff21a910baf28bcb27b1975620a79d2be8611815ecc599f08eb06dd6f000e"
},
"README.md": {
"size_bytes": 181,
"sha256_sum": "3d5ad2368b2e61a31679400322924eeb312c7b97e68a4fc127461bb6ef18bae5"
},
"SOURCE": {
"size_bytes": 35,
"sha256_sum": "234919f888057ce202730f2ce9e87ab526c6db4b410047a3c9ca52b1cf51de2b"
},
"config.json": {
"size_bytes": 3550,
"sha256_sum": "e98bf4210293be786fc219612f6a0ac1a67b40bb2f5fa5f7c7ddbd595638c193"
},
"generator.onnx": {
"size_bytes": 76359777,
"sha256_sum": "366fd96a96c7ee81ce932973b9c457d13b99696c1a98eda117395e7c882695b0"
},
"phoneme_map.txt": {
"size_bytes": 15,
"sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d"
},
"phonemes.txt": {
"size_bytes": 263,
"sha256_sum": "8f9c3e6ced14d7fc5426e4e1bc7f7cc1037a20a645ca34110abcb76148fa8bfd"
},
"speaker_map.csv": {
"size_bytes": 332,
"sha256_sum": "30409b44f0d4413ef99a146c86849844086cf7aa97c645660473dbe094ca2565"
},
"speakers.txt": {
"size_bytes": 90,
"sha256_sum": "f8d46538e6058f2f7d58b0bfd996cfb1bd9a4e6c81a1b6764ff9bb49fd48cdf0"
}
},
"speakers": [
"awb",
"rms",
"slt",
"ksp",
"clb",
"aew",
"bdl",
"lnh",
"jmk",
"rxr",
"fem",
"ljm",
"slp",
"ahw",
"axb",
"aup",
"eey",
"gka"
],
"properties": {}
},
"en_US/ljspeech_low": {
"files": {
"LICENSE": {
"size_bytes": 42,
"sha256_sum": "2a380bafa00cc11ecae80f4a1c21f3873361bc9af1f23c8eecc255b143cdaf68"
},
"README.md": {
"size_bytes": 183,
"sha256_sum": "43e5814f58fb743862bc7381d3a233b9060d766f8e5ef8336b3f5c4afc38e12e"
},
"SOURCE": {
"size_bytes": 40,
"sha256_sum": "f72dc7596d10484aea8dbd1b907728ff332acf8899a38dbca468197a26c3c5d9"
},
"config.json": {
"size_bytes": 3495,
"sha256_sum": "7f89388f366789ede1a32756d98b576a18e410f0f1a9af2ce64d0fbbcd0d971f"
},
"generator.onnx": {
"size_bytes": 62792219,
"sha256_sum": "d178e03b43b41da49f337626a7024826e79fe7deb7db102a5deedb027f9caa37"
},
"phoneme_map.txt": {
"size_bytes": 15,
"sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d"
},
"phonemes.txt": {
"size_bytes": 263,
"sha256_sum": "8f9c3e6ced14d7fc5426e4e1bc7f7cc1037a20a645ca34110abcb76148fa8bfd"
}
},
"speakers": [],
"properties": {}
},
"en_US/vctk_low": {
"files": {
"LICENSE": {
"size_bytes": 17417,
"sha256_sum": "b351fdf5bbec1e011fd4c09ed1af05df6fd7de2e679fd7a92e6ec4398c38e3ff"
},
"README.md": {
"size_bytes": 179,
"sha256_sum": "7e482c32766c0f0612ade79a7255b39da2852cba14d8ad170458fe8b0816e449"
},
"SOURCE": {
"size_bytes": 45,
"sha256_sum": "fe147d22acd80ce096d7c3069bb66ece887db8b72fb5f38ac6017f7aa98a9698"
},
"config.json": {
"size_bytes": 3555,
"sha256_sum": "ab38b8df74db751dc89d43c17f238ee7a5e56d8e26f59673e272ea4802d275a7"
},
"generator.onnx": {
"size_bytes": 76546145,
"sha256_sum": "c958303de83a59fac937a91009c9081b5f2f7369890b9969e05141e56e867d2b"
},
"phoneme_map.txt": {
"size_bytes": 15,
"sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d"
},
"phonemes.txt": {
"size_bytes": 263,
"sha256_sum": "8f9c3e6ced14d7fc5426e4e1bc7f7cc1037a20a645ca34110abcb76148fa8bfd"
},
"speaker_map.csv": {
"size_bytes": 1523,
"sha256_sum": "8ecc8b46e35edcb4664bc5804e77b807bf66fa155a9871f8e51b56f1c63d380b"
},
"speakers.txt": {
"size_bytes": 652,
"sha256_sum": "c26aab76774111665e6ce4092b9ae40e18ca2dc048a300325f03f674c398f547"
}
},
"speakers": [
"p239",
"p236",
"p264",
"p250",
"p259",
"p247",
"p261",
"p263",
"p283",
"p274",
"p286",
"p276",
"p270",
"p281",
"p277",
"p231",
"p238",
"p271",
"p257",
"p273",
"p284",
"p329",
"p361",
"p287",
"p360",
"p374",
"p376",
"p310",
"p304",
"p340",
"p347",
"p330",
"p308",
"p314",
"p317",
"p339",
"p311",
"p294",
"p305",
"p266",
"p335",
"p334",
"p318",
"p323",
"p351",
"p333",
"p313",
"p316",
"p244",
"p307",
"p363",
"p336",
"p312",
"p267",
"p297",
"p275",
"p295",
"p288",
"p258",
"p301",
"p232",
"p292",
"p272",
"p278",
"p280",
"p341",
"p268",
"p298",
"p299",
"p279",
"p285",
"p326",
"p300",
"s5",
"p230",
"p254",
"p269",
"p293",
"p252",
"p345",
"p262",
"p243",
"p227",
"p343",
"p255",
"p229",
"p240",
"p248",
"p253",
"p233",
"p228",
"p251",
"p282",
"p246",
"p234",
"p226",
"p260",
"p245",
"p241",
"p303",
"p265",
"p306",
"p237",
"p249",
"p256",
"p302",
"p364",
"p225",
"p362"
],
"properties": {}
},
"es_ES/carlfm_low": {
"files": {
"LICENSE": {
"size_bytes": 14,
"sha256_sum": "f5b244982699ca9fe5cc8fa8a7c08cf5dee5d3a0c8896892899e5df13316e1b7"
},
"README.md": {
"size_bytes": 192,
"sha256_sum": "2140442eaefadcc0162caae3db531fcd6a8070068087499101ec7f7d49f236bb"
},
"SOURCE": {
"size_bytes": 47,
"sha256_sum": "afb36ff925af99bf47b97ff5f753a4dc1402e4a3f3e491a7898ad38791b7920c"
},
"config.json": {
"size_bytes": 3401,
"sha256_sum": "e4da85d44a84c729310d8bffe81a4452bd86b7a1d3874ce3243f0e1d494e704f"
},
"generator.onnx": {
"size_bytes": 62786839,
"sha256_sum": "8cfd9a91a68b5c62e52a7483c61e1f5e8e09c0e21c0ac77a991f1ab123d0e260"
},
"phoneme_map.txt": {
"size_bytes": 15,
"sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d"
},
"phonemes.txt": {
"size_bytes": 217,
"sha256_sum": "dd24b95ded6ff32b410390bb4aa56bcd96042762add3c166588136096c4890e0"
}
},
"speakers": [],
"properties": {}
},
"fi_FI/harri-tapani-ylilammi_low": {
"files": {
"LICENSE": {
"size_bytes": 6384,
"sha256_sum": "e052310c1e6d75057abe231ba94b7f2eedee1aec4a0c5c658c8151f6f8c05fd8"
},
"README.md": {
"size_bytes": 215,
"sha256_sum": "061335c2aa8f9f2126a80ecd8f6635e85e8ace7a9a37950f9b808420f8233345"
},
"SOURCE": {
"size_bytes": 71,
"sha256_sum": "f5d064abd622989907fbf4116caa1d9914c30dc11b2fd83447d2ba9da8cec1f7"
},
"config.json": {
"size_bytes": 3399,
"sha256_sum": "2c7ddf22d83670542cf21b17b8177f53cdcf4e57d581a7fc92ff3c266985c826"
},
"generator.onnx": {
"size_bytes": 62782999,
"sha256_sum": "31ab7f2200e3246d50eebb8eefc108f08a709d63b89122a25ae443d9c1d9c82b"
},
"phonemes.txt": {
"size_bytes": 179,
"sha256_sum": "986029f7b8967e438c3ff901d328bf2fc9d3f4164f8b72def149208082513b26"
}
},
"speakers": [],
"properties": {}
},
"fr_FR/siwis_low": {
"files": {
"LICENSE": {
"size_bytes": 17416,
"sha256_sum": "b34e17103bfb246f2549fc82a279e6ba28834e0cb42f76a92efc14b72e3a3723"
},
"README.md": {
"size_bytes": 174,
"sha256_sum": "00ea65658e20fd3301f95f0909eb46c8c54ba377bb6b00032e318ede7c543dca"
},
"SOURCE": {
"size_bytes": 48,
"sha256_sum": "e81bea943c3a359cef9dafdd66dd29ec6af41bed92dfc2de28879ffa44ba5c84"
},
"config.json": {
"size_bytes": 3390,
"sha256_sum": "9545c9bdda9692175fae0658c754d2cd1cc786615a6ca673eae3cd6eb1b567ed"
},
"generator.onnx": {
"size_bytes": 62788375,
"sha256_sum": "5154cc50d87fa6b15c6c5b0eb1597cba15162a8c143baed898ffb55240ba8a4c"
},
"phoneme_map.txt": {
"size_bytes": 15,
"sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d"
},
"phonemes.txt": {
"size_bytes": 232,
"sha256_sum": "711294d0b5a0ec08ec21ca8a75184e0fee3aba1e1adcf967fe5e1ef96f6c176e"
}
},
"speakers": [],
"properties": {}
},
"hu_HU/diana-majlinger_low": {
"files": {
"LICENSE": {
"size_bytes": 6384,
"sha256_sum": "e052310c1e6d75057abe231ba94b7f2eedee1aec4a0c5c658c8151f6f8c05fd8"
},
"README.md": {
"size_bytes": 215,
"sha256_sum": "766b65cc2fd22f02cf3e220876b25dfc7354a25a055f43ec1b9efb4f6eab8726"
},
"SOURCE": {
"size_bytes": 73,
"sha256_sum": "b8d62d6f483feb8fb6a6ccac36bdc4080a4771cbbf38ab3c4f6f9d0e93a977d6"
},
"config.json": {
"size_bytes": 3396,
"sha256_sum": "f91d65016d5d2b8f3c1076552344121db024c3e49c11fcadb8dcdad65e9a192f"
},
"generator.onnx": {
"size_bytes": 62786071,
"sha256_sum": "6feb6a2307f2342a4a9ad43d9ae06d0e8778a6f4486b43dc34d8c62bf523c53c"
},
"phonemes.txt": {
"size_bytes": 202,
"sha256_sum": "51da9a9ac03277d0f057de50f92fe178ad4980a1e8493a6f0c65040bf660beb0"
}
},
"speakers": [],
"properties": {}
},
"it_IT/riccardo-fasol_low": {
"files": {
"LICENSE": {
"size_bytes": 1372,
"sha256_sum": "fdd78a909fb9384d869363522b967557bc9e28e5b65874921f24e48cbb82f38c"
},
"README.md": {
"size_bytes": 201,
"sha256_sum": "9b7125e8de3ad26f4f81acf4cbfa8f27b7f991a91d6750a6653d117c9e25775b"
},
"SOURCE": {
"size_bytes": 61,
"sha256_sum": "841520f6a8cc616e307a92552355691f8c3087fadda2e9b7a03a7863b2d0cf6a"
},
"config.json": {
"size_bytes": 3417,
"sha256_sum": "e60ee585ce2c1709c47d8bf21af6ba7fcc75e50fd533ce479ba2b00408630f8f"
},
"generator.onnx": {
"size_bytes": 62785303,
"sha256_sum": "b3e06e391e8b056460f64db6c5f3c3e4107a5a53257808fa88d6dcc43b11f3f6"
},
"phoneme_map.txt": {
"size_bytes": 15,
"sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d"
},
"phonemes.txt": {
"size_bytes": 210,
"sha256_sum": "282837161676bffa5b304cbb878eace1c8da670a46e08e8e800515f924ecfde3"
}
},
"speakers": [],
"properties": {}
},
"ko_KO/kss_low": {
"files": {
"LICENSE": {
"size_bytes": 6384,
"sha256_sum": "e052310c1e6d75057abe231ba94b7f2eedee1aec4a0c5c658c8151f6f8c05fd8"
},
"README.md": {
"size_bytes": 191,
"sha256_sum": "91fe70211181048b0afba60aae1e7fd90661e90176590ffbcf4f868f3d6608d3"
},
"SOURCE": {
"size_bytes": 70,
"sha256_sum": "0c424cc057609d0547bf29e94a9c6dbda619787fde8ff21cc7e404d1c62d562c"
},
"config.json": {
"size_bytes": 3357,
"sha256_sum": "2433ba5cefa3dd957dc1276b7a501ab7e8f4a867b6ea8daa3543a5582560157c"
},
"generator.onnx": {
"size_bytes": 62792983,
"sha256_sum": "9198b939b5b713c7b59e7ba28163ed2546dc49691fea82c6614dc0b8d5612c51"
},
"phonemes.txt": {
"size_bytes": 256,
"sha256_sum": "d8d8f00e1b855c92cbc53f442166f2b4c20d898777f8d754a93eb074d0b218e0"
}
},
"speakers": [],
"properties": {}
},
"nl/rdh_low": {
"files": {
"LICENSE": {
"size_bytes": 7049,
"sha256_sum": "7179683e8000e6bdc9bbc60d85edf0a4ac8e76f951857f54fcb775d5886f1309"
},
"README.md": {
"size_bytes": 167,
"sha256_sum": "4260521a828b09a25a29fb3ffa3ff57b816452e925b9feae528796ffb5d1f0f8"
},
"SOURCE": {
"size_bytes": 37,
"sha256_sum": "e4874bd5a71c42ef3f963944571e107734928b9db960f17dfadb6e2afce2956b"
},
"config.json": {
"size_bytes": 3378,
"sha256_sum": "1b9a50cd5e70e44c3aac6fa01bf4b1607627973814fc4ce7e0d4bf1166ce4305"
},
"generator.onnx": {
"size_bytes": 62800663,
"sha256_sum": "2082891212f3f399097be4ea540ee397912238f777b1c2bced5986060700e268"
},
"phoneme_map.txt": {
"size_bytes": 15,
"sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d"
},
"phonemes.txt": {
"size_bytes": 336,
"sha256_sum": "355389fee04f97557232cdde7fb8d4cf03ae2aabd7b0b26ed5978ebbf6575dd4"
}
},
"speakers": [],
"properties": {}
},
"pt_BR/edresson_low": {
"files": {
"LICENSE": {
"size_bytes": 18652,
"sha256_sum": "cce5d01fa4a83b794271bd2c28cffdf99afd43c803e6ddefddae39b591ea7448"
},
"SOURCE": {
"size_bytes": 50,
"sha256_sum": "1ba21abad312197fbe4c9c0d449e16bad57f4c2e3e8e37e31e2d50b413faab04"
},
"config.json": {
"size_bytes": 3586,
"sha256_sum": "d19b81d56f90344e110426d5830e5b27a3af178bccd44dd6b072d811cdade750"
},
"generator.onnx": {
"size_bytes": 62796055,
"sha256_sum": "142f4a8268549a8fa148066182e548335eb60826c751228f0c311e8d49d0d938"
},
"phonemes.txt": {
"size_bytes": 282,
"sha256_sum": "270d2d069b677555c8d703afa3e3883e43e905e993ebb3e85f3481b60fe9f638"
}
},
"speakers": [],
"properties": {}
},
"ru_RU/multi_low": {
"files": {
"config.json": {
"size_bytes": 3923,
"sha256_sum": "314e0fdd09183942d2f7393d4b950a12823849c0f72d22e62dc9858a6b4886c6"
},
"css10/LICENSE": {
"size_bytes": 6384,
"sha256_sum": "e052310c1e6d75057abe231ba94b7f2eedee1aec4a0c5c658c8151f6f8c05fd8"
},
"css10/SOURCE": {
"size_bytes": 71,
"sha256_sum": "7edef4a18d5ea07a79f374a9ffdf7e5e5dfc347466feac994d87e9bda9be05ec"
},
"generator.onnx": {
"size_bytes": 76335199,
"sha256_sum": "cb84b12479fc619943cb8fbb56827f7fd95f5ffcbebf2c220606b3a9750bf2ca"
},
"phoneme_map.txt": {
"size_bytes": 15,
"sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d"
},
"phonemes.txt": {
"size_bytes": 326,
"sha256_sum": "63b030e0fc9ebd79f93c82a906c34910a23a8de46e372b48bab1119fd28ec2fa"
},
"speaker_map.csv": {
"size_bytes": 61,
"sha256_sum": "b6e1b09bfc4358b66e93dbb3f647f572341969fd66bb403fff519e5f540119a0"
},
"speakers.txt": {
"size_bytes": 29,
"sha256_sum": "f985c0983cb587acb11e9f33538ae9827c9eae0138acc3dae1def1e7780b3211"
}
},
"speakers": [
"hajdurova",
"minaev",
"nikolaev"
],
"properties": {}
},
"sv_SE/talesyntese_low": {
"files": {
"LICENSE": {
"size_bytes": 51,
"sha256_sum": "bd1a963f2c77481f0a658b5fa7fe77c2515e73be3972f1e991741b72f6fd7d31"
},
"README.md": {
"size_bytes": 203,
"sha256_sum": "f574e3807bec86b91caa0d70b1ac8c4ef85ecc297afb49b62642f9944554cbaa"
},
"SOURCE": {
"size_bytes": 63,
"sha256_sum": "295e2c2e47edb2f156c10808efb9439714d227c5b45b60a4f8ec3adc33451a6b"
},
"config.json": {
"size_bytes": 3376,
"sha256_sum": "8e5a29c1a0ae655c9d0d56df025f22286e81ce323d1b68d07977b90bf61ee33e"
},
"generator.onnx": {
"size_bytes": 62802967,
"sha256_sum": "bd9a50a8b0d35116c0d543681c2384bb738087ea771f9abee805feb53aa5f708"
},
"phoneme_map.txt": {
"size_bytes": 15,
"sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d"
},
"phonemes.txt": {
"size_bytes": 360,
"sha256_sum": "b4d2422bcc2b2f3ea739ce3f59019e499b966a74836aa54f6300921c4fc7ae76"
}
},
"speakers": [],
"properties": {}
},
"sw/lanfrica_low": {
"files": {
"LICENSE": {
"size_bytes": 0,
"sha256_sum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
},
"SOURCE": {
"size_bytes": 48,
"sha256_sum": "1553a74483d9094830d5d249ed6db286eb52f0e7057f0b903efd8c45656bcfeb"
},
"config.json": {
"size_bytes": 3387,
"sha256_sum": "3b8f3876f998624fff4760015cbdf7cf7e8110eed9753e45a99479fdc8ba8817"
},
"generator.onnx": {
"size_bytes": 62787607,
"sha256_sum": "b470bf4b042ea96d2272162e9efaa8bd48bae4bc771d4a9996631f645e740e80"
},
"phoneme_map.txt": {
"size_bytes": 15,
"sha256_sum": "4003f421fc91ed1d5a343442659db6cf9d58bd1c6d8d771abc1999cc24d7694d"
},
"phonemes.txt": {
"size_bytes": 245,
"sha256_sum": "4784d6c095a3937b09a6f1fa292df160409033ec1d763d90d9b95ac5a42bf42d"
}
},
"speakers": [],
"properties": {}
},
"uk_UK/m-ailabs_low": {
"files": {
"LICENSE": {
"size_bytes": 1372,
"sha256_sum": "fdd78a909fb9384d869363522b967557bc9e28e5b65874921f24e48cbb82f38c"
},
"README.md": {
"size_bytes": 198,
"sha256_sum": "d399789ee16b4610af50b3316cb1a9281f37002728fc1185e0690840be2bd58b"
},
"SOURCE": {
"size_bytes": 61,
"sha256_sum": "841520f6a8cc616e307a92552355691f8c3087fadda2e9b7a03a7863b2d0cf6a"
},
"config.json": {
"size_bytes": 5197,
"sha256_sum": "bbd2c66d5920d9e54771d480a982e801cb23ad7de5848625d1ebc82c6b1c7752"
},
"generator.onnx": {
"size_bytes": 76355935,
"sha256_sum": "ee409d02d0e02d3bf92c3ee1f7403328213dda7515c17246b2b9ca2f005c09d3"
},
"phonemes.txt": {
"size_bytes": 426,
"sha256_sum": "9a4d708ae3ddffc67709c83c608ce6acdeff511ad288b2f037d41ea2ec3867ee"
},
"speaker_map.csv": {
"size_bytes": 118,
"sha256_sum": "f74765e11fca2ac205b2acb1213bdaa3bd3f6c9235ebcab5479160bfef1b7aa0"
}
},
"speakers": [],
"properties": {}
}
}