diff --git a/Dockerfile.binary b/Dockerfile.binary index 9e0df74..f9cc80b 100644 --- a/Dockerfile.binary +++ b/Dockerfile.binary @@ -63,6 +63,8 @@ RUN .venv/bin/pyinstaller \ --collect-data "gruut_lang_de" \ --hidden-import "gruut_lang_en" \ --collect-data "gruut_lang_en" \ + --hidden-import "gruut_lang_es" \ + --collect-data "gruut_lang_es" \ --hidden-import "gruut_lang_fr" \ --collect-data "gruut_lang_fr" \ --hidden-import "gruut_lang_it" \ @@ -76,10 +78,20 @@ RUN .venv/bin/pyinstaller \ --collect-data 'espeak_phonemizer' \ --collect-data 'phonemes2ids' \ --hidden-import 'swagger_ui' \ + --hidden-import 'epitran' \ + --hidden-import 'epitran' \ + --collect-data 'panphon' \ + --collect-data 'panphon' \ --collect-data 'mimic3_tts' \ --collect-data 'mimic3_http' \ pyinstaller/mimic3.py +# Clean up unused lexicons +RUN find dist/mimic3/ -wholename '*/gruut_lang_*/espeak' -type d | \ + while read -r espeak_dir; do \ + rm -rf "${espeak_dir}"; \ + done + COPY pyinstaller/mimic3-server dist/mimic3/ # ----------------------------------------------------------------------------- diff --git a/Makefile b/Makefile index 05650ff..00931cd 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ .PHONY: dist install docker binaries SHELL := bash -DOCKER_PLATFORM ?= linux/amd64 +DOCKER_PLATFORM ?= linux/amd64 # linux/arm64 linux/arm/v7 dist: cd opentts-abc && python3 setup.py sdist @@ -34,7 +34,9 @@ docker: docker buildx build . -f Dockerfile --platform $(DOCKER_PLATFORM) --tag mycroftai/mimic3 --load docker-gpu: - docker buildx build . -f Dockerfile.gpu --tag mycroftai/mimic3:gpu --load + docker buildx build . -f Dockerfile.gpu --tag 'mycroftai/mimic3:gpu' --load binaries: docker buildx build . -f Dockerfile.binary --platform $(DOCKER_PLATFORM) --output type=local,dest=dist/$(DOCKER_PLATFORM) + +debian: diff --git a/README.md b/README.md index c6cfcbf..939ec5b 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ A fast and local neural text to speech system for [Mycroft](https://mycroft.ai/) and the [Mark II](https://mycroft.ai/product/mark-ii/). * [Available voices](https://github.com/MycroftAI/mimic3-voices) -* [How does it work?](mimic3-tts/) +* [How does it work?](mimic3-tts/#architecture) ## Use Cases diff --git a/debian/bin/mimic3 b/debian/bin/mimic3 new file mode 100755 index 0000000..b075063 --- /dev/null +++ b/debian/bin/mimic3 @@ -0,0 +1,17 @@ +#!/usr/bin/env sh +# Copyright 2022 Mycroft AI Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +/usr/lib/mimic3-tts/mimic3 "$@" diff --git a/debian/bin/mimic3-server b/debian/bin/mimic3-server new file mode 100755 index 0000000..7afadf8 --- /dev/null +++ b/debian/bin/mimic3-server @@ -0,0 +1,17 @@ +#!/usr/bin/env sh +# Copyright 2022 Mycroft AI Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +/usr/lib/mimic3-tts/mimic3-server "$@" diff --git a/debian/build-debian.sh b/debian/build-debian.sh new file mode 100755 index 0000000..11b528a --- /dev/null +++ b/debian/build-debian.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# Copyright 2022 Mycroft AI Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# ----------------------------------------------------------------------------- +# Script for building Debian packages from PyInstaller binaries. +# +# Before running this script, you must build PyInstaller binaries with "make +# binaries" or by manually invoking Dockerfile.binary. +# ----------------------------------------------------------------------------- +set -euo pipefail + +# Directory of *this* script +this_dir="$( cd "$( dirname "$0" )" && pwd )" +src_dir="$(realpath "${this_dir}/..")" +dist_dir="${src_dir}/dist/linux" + +version="$(cat "${src_dir}/mimic3-tts/mimic3_tts/VERSION")" + +for platform in 'amd64' 'arm64' 'arm/v7'; do + platform_dir="${dist_dir}/${platform}" + if [ -d "${platform_dir}" ]; then + # Create temporary directory for building + temp_dir="$(mktemp -d)" + function cleanup { + rm -rf "${temp_dir}"; + } + trap cleanup EXIT + + package_dir="${temp_dir}/mimic3-tts" + mkdir -p "${package_dir}" + + # Fix Debian arch name + case "${platform}" in + arm/v7) + debian_arch='armhf' + ;; + + *) + debian_arch="${platform}" + ;; + esac + + # Create control file + mkdir -p "${package_dir}/DEBIAN" + VERSION="${version}" DEBIAN_ARCH="${debian_arch}" \ + envsubst \ + < "${src_dir}/debian/control.in" \ + > "${package_dir}/DEBIAN/control" + + # Copy artifacts + mkdir -p "${package_dir}/usr/lib/mimic3-tts" + rsync -av "${platform_dir}/mimic3/" "${package_dir}/usr/lib/mimic3-tts/" + + # Copy scripts + mkdir -p "${package_dir}/usr/bin/" + rsync -av "${this_dir}/bin/" "${package_dir}/usr/bin/" + + # Build Debian package + pushd "${temp_dir}" 2>/dev/null + dpkg --build 'mimic3-tts' + dpkg-name ./*.deb + cp ./*.deb "${src_dir}/dist/" + popd 2>/dev/null + fi +done diff --git a/debian/control.in b/debian/control.in new file mode 100644 index 0000000..6518e3c --- /dev/null +++ b/debian/control.in @@ -0,0 +1,9 @@ +Package: mimic3-tts +Version: ${VERSION} +Section: utils +Priority: optional +Depends: libespeak-ng1 +Recommends: sox +Architecture: ${DEBIAN_ARCH} +Maintainer: Mycroft AI +Description: A fast, local, neural text to speech system for Mycroft diff --git a/mimic3-tts/README.md b/mimic3-tts/README.md index 97411bf..bbc709f 100644 --- a/mimic3-tts/README.md +++ b/mimic3-tts/README.md @@ -292,7 +292,7 @@ Our implementation is heavily based on [Jaehyeon Kim's PyTorch model](https://gi At a high level, Mimic 3 performs two important tasks: -1. Converting raw text input numeric input for the VITS TTS model, and +1. Converting raw text to numeric input for the VITS TTS model, and 2. Using the model to transform numeric input into audio output The second step is the same for every voice, but the first step (text to numbers) varies. There are currently three implementations of step 1, described below.