From 1c5d4eb7ab98cd4e787ef4fbad3879d1f0b86261 Mon Sep 17 00:00:00 2001 From: Ajay Raj Date: Fri, 17 Mar 2023 00:14:14 -0700 Subject: [PATCH] emotion --- pyproject.toml | 2 +- vocode/models/synthesizer.py | 51 ++++++++++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 54bd0da..5a5e101 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "vocode" -version = "0.1.37" +version = "0.1.39" description = "The all-in-one voice SDK" authors = ["Ajay Raj "] license = "MIT License" diff --git a/vocode/models/synthesizer.py b/vocode/models/synthesizer.py index d92ac0d..2e23f57 100644 --- a/vocode/models/synthesizer.py +++ b/vocode/models/synthesizer.py @@ -1,44 +1,73 @@ from enum import Enum -from typing import Optional +from typing import Optional, Union + +from pydantic import BaseModel, validator from .model import TypedModel from .audio_encoding import AudioEncoding from ..output_device.base_output_device import BaseOutputDevice + class SynthesizerType(str, Enum): BASE = "synthesizer_base" AZURE = "synthesizer_azure" GOOGLE = "synthesizer_google" ELEVEN_LABS = "synthesizer_eleven_labs" + +class TrackBotSentimentConfig(BaseModel): + emotions: list[str] = ["angry", "friendly", "sad", "whispering"] + + @validator("emotions") + def emotions_must_not_be_empty(cls, v): + if len(v) == 0: + raise ValueError("must have at least one emotion") + return v + + class SynthesizerConfig(TypedModel, type=SynthesizerType.BASE): sampling_rate: int audio_encoding: AudioEncoding + should_encode_as_wav: bool = False + track_bot_sentiment_in_voice: Union[bool, TrackBotSentimentConfig] = False @classmethod def from_output_device(cls, output_device: BaseOutputDevice): - return cls(sampling_rate=output_device.sampling_rate, audio_encoding=output_device.audio_encoding) + return cls( + sampling_rate=output_device.sampling_rate, + audio_encoding=output_device.audio_encoding, + ) + + +AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME = "en-US-AriaNeural" +AZURE_SYNTHESIZER_DEFAULT_PITCH = 0 +AZURE_SYNTHESIZER_DEFAULT_RATE = 15 + class AzureSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.AZURE): - voice_name: Optional[str] = None - pitch: Optional[int] = None - rate: Optional[int] = None + voice_name: str = AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME + pitch: int = AZURE_SYNTHESIZER_DEFAULT_PITCH + rate: int = AZURE_SYNTHESIZER_DEFAULT_RATE @classmethod def from_output_device( - cls, - output_device: BaseOutputDevice, - voice_name: Optional[str] = None, - pitch: Optional[int] = None, - rate: Optional[int] = None, + cls, + output_device: BaseOutputDevice, + voice_name: str = AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME, + pitch: int = AZURE_SYNTHESIZER_DEFAULT_PITCH, + rate: int = AZURE_SYNTHESIZER_DEFAULT_RATE, + track_bot_sentiment_in_voice: Union[bool, TrackBotSentimentConfig] = False, ): return cls( - sampling_rate=output_device.sampling_rate, + sampling_rate=output_device.sampling_rate, audio_encoding=output_device.audio_encoding, voice_name=voice_name, pitch=pitch, rate=rate, + track_bot_sentiment_in_voice=track_bot_sentiment_in_voice, ) + pass + class GoogleSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.GOOGLE): pass