This commit is contained in:
Ajay Raj 2023-03-17 00:14:14 -07:00
commit 1c5d4eb7ab
2 changed files with 41 additions and 12 deletions

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "vocode" name = "vocode"
version = "0.1.37" version = "0.1.39"
description = "The all-in-one voice SDK" description = "The all-in-one voice SDK"
authors = ["Ajay Raj <ajay@vocode.dev>"] authors = ["Ajay Raj <ajay@vocode.dev>"]
license = "MIT License" license = "MIT License"

View file

@ -1,44 +1,73 @@
from enum import Enum from enum import Enum
from typing import Optional from typing import Optional, Union
from pydantic import BaseModel, validator
from .model import TypedModel from .model import TypedModel
from .audio_encoding import AudioEncoding from .audio_encoding import AudioEncoding
from ..output_device.base_output_device import BaseOutputDevice from ..output_device.base_output_device import BaseOutputDevice
class SynthesizerType(str, Enum): class SynthesizerType(str, Enum):
BASE = "synthesizer_base" BASE = "synthesizer_base"
AZURE = "synthesizer_azure" AZURE = "synthesizer_azure"
GOOGLE = "synthesizer_google" GOOGLE = "synthesizer_google"
ELEVEN_LABS = "synthesizer_eleven_labs" ELEVEN_LABS = "synthesizer_eleven_labs"
class TrackBotSentimentConfig(BaseModel):
emotions: list[str] = ["angry", "friendly", "sad", "whispering"]
@validator("emotions")
def emotions_must_not_be_empty(cls, v):
if len(v) == 0:
raise ValueError("must have at least one emotion")
return v
class SynthesizerConfig(TypedModel, type=SynthesizerType.BASE): class SynthesizerConfig(TypedModel, type=SynthesizerType.BASE):
sampling_rate: int sampling_rate: int
audio_encoding: AudioEncoding audio_encoding: AudioEncoding
should_encode_as_wav: bool = False
track_bot_sentiment_in_voice: Union[bool, TrackBotSentimentConfig] = False
@classmethod @classmethod
def from_output_device(cls, output_device: BaseOutputDevice): def from_output_device(cls, output_device: BaseOutputDevice):
return cls(sampling_rate=output_device.sampling_rate, audio_encoding=output_device.audio_encoding) return cls(
sampling_rate=output_device.sampling_rate,
audio_encoding=output_device.audio_encoding,
)
AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME = "en-US-AriaNeural"
AZURE_SYNTHESIZER_DEFAULT_PITCH = 0
AZURE_SYNTHESIZER_DEFAULT_RATE = 15
class AzureSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.AZURE): class AzureSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.AZURE):
voice_name: Optional[str] = None voice_name: str = AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME
pitch: Optional[int] = None pitch: int = AZURE_SYNTHESIZER_DEFAULT_PITCH
rate: Optional[int] = None rate: int = AZURE_SYNTHESIZER_DEFAULT_RATE
@classmethod @classmethod
def from_output_device( def from_output_device(
cls, cls,
output_device: BaseOutputDevice, output_device: BaseOutputDevice,
voice_name: Optional[str] = None, voice_name: str = AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME,
pitch: Optional[int] = None, pitch: int = AZURE_SYNTHESIZER_DEFAULT_PITCH,
rate: Optional[int] = None, rate: int = AZURE_SYNTHESIZER_DEFAULT_RATE,
track_bot_sentiment_in_voice: Union[bool, TrackBotSentimentConfig] = False,
): ):
return cls( return cls(
sampling_rate=output_device.sampling_rate, sampling_rate=output_device.sampling_rate,
audio_encoding=output_device.audio_encoding, audio_encoding=output_device.audio_encoding,
voice_name=voice_name, voice_name=voice_name,
pitch=pitch, pitch=pitch,
rate=rate, rate=rate,
track_bot_sentiment_in_voice=track_bot_sentiment_in_voice,
) )
pass pass
class GoogleSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.GOOGLE): class GoogleSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.GOOGLE):
pass pass