open source

This commit is contained in:
Ajay Raj 2023-03-28 00:15:34 -07:00
commit a93bfc1ec9
61 changed files with 4013 additions and 126 deletions

View file

@ -42,6 +42,7 @@ class AgentConfig(TypedModel, type=AgentType.BASE):
initial_message: Optional[BaseMessage] = None
generate_responses: bool = True
allowed_idle_time_seconds: Optional[float] = None
allow_agent_to_be_cut_off: bool = True
end_conversation_on_goodbye: bool = False
send_filler_audio: Union[bool, FillerAudioConfig] = False
@ -59,6 +60,13 @@ class LLMAgentConfig(AgentConfig, type=AgentType.LLM):
cut_off_response: Optional[CutOffResponse] = None
class ChatGPTAlphaAgentConfig(AgentConfig, type=AgentType.CHAT_GPT_ALPHA):
prompt_preamble: str
expected_first_prompt: Optional[str] = None
temperature: float = LLM_AGENT_DEFAULT_TEMPERATURE
max_tokens: int = LLM_AGENT_DEFAULT_MAX_TOKENS
class ChatGPTAgentConfig(AgentConfig, type=AgentType.CHAT_GPT):
prompt_preamble: str
expected_first_prompt: Optional[str] = None

View file

@ -1,5 +1,6 @@
from enum import Enum
class AudioEncoding(str, Enum):
LINEAR16 = "linear16"
MULAW = "mulaw"
MULAW = "mulaw"

View file

@ -1,17 +1,17 @@
import pydantic
class BaseModel(pydantic.BaseModel):
class BaseModel(pydantic.BaseModel):
def __init__(self, **data):
for key, value in data.items():
if isinstance(value, dict):
if 'type' in value:
if "type" in value:
data[key] = TypedModel.parse_obj(value)
super().__init__(**data)
# Adapted from https://github.com/pydantic/pydantic/discussions/3091
class TypedModel(BaseModel):
_subtypes_ = []
def __init_subclass__(cls, type=None):
@ -22,31 +22,30 @@ class TypedModel(BaseModel):
for t, cls in _cls._subtypes_:
if t == type:
return cls
raise ValueError(f'Unknown type {type}')
raise ValueError(f"Unknown type {type}")
@classmethod
def get_type(_cls, cls_name):
for t, cls in _cls._subtypes_:
if cls.__name__ == cls_name:
return t
raise ValueError(f'Unknown class {cls_name}')
raise ValueError(f"Unknown class {cls_name}")
@classmethod
def parse_obj(cls, obj):
data_type = obj.get('type')
data_type = obj.get("type")
if data_type is None:
raise ValueError(f'type is required for {cls.__name__}')
raise ValueError(f"type is required for {cls.__name__}")
sub = cls.get_cls(data_type)
if sub is None:
raise ValueError(f'Unknown type {data_type}')
raise ValueError(f"Unknown type {data_type}")
return sub(**obj)
def _iter(self, **kwargs):
yield 'type', self.get_type(self.__class__.__name__)
yield "type", self.get_type(self.__class__.__name__)
yield from super()._iter(**kwargs)
@property
def type(self):
return self.get_type(self.__class__.__name__)

View file

@ -2,9 +2,14 @@ from enum import Enum
from typing import Optional, Union
from pydantic import BaseModel, validator
from vocode.streaming.output_device.base_output_device import BaseOutputDevice
from vocode.streaming.telephony.constants import (
DEFAULT_AUDIO_ENCODING,
DEFAULT_SAMPLING_RATE,
)
from .model import TypedModel
from .audio_encoding import AudioEncoding
from ..output_device.base_output_device import BaseOutputDevice
class SynthesizerType(str, Enum):
@ -38,6 +43,13 @@ class SynthesizerConfig(TypedModel, type=SynthesizerType.BASE):
audio_encoding=output_device.audio_encoding,
)
@classmethod
def from_telephone_output_device(cls):
return cls(
sampling_rate=DEFAULT_SAMPLING_RATE,
audio_encoding=DEFAULT_AUDIO_ENCODING,
)
AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME = "en-US-AriaNeural"
AZURE_SYNTHESIZER_DEFAULT_PITCH = 0
@ -45,18 +57,32 @@ AZURE_SYNTHESIZER_DEFAULT_RATE = 15
class AzureSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.AZURE):
voice_name: str = AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME
pitch: int = AZURE_SYNTHESIZER_DEFAULT_PITCH
rate: int = AZURE_SYNTHESIZER_DEFAULT_RATE
voice_name: Optional[str] = AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME
pitch: Optional[int] = AZURE_SYNTHESIZER_DEFAULT_PITCH
rate: Optional[int] = AZURE_SYNTHESIZER_DEFAULT_RATE
class Config:
validate_assignment = True
@validator("voice_name")
def set_name(cls, voice_name):
return voice_name or AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME
@validator("pitch")
def set_pitch(cls, pitch):
return pitch or AZURE_SYNTHESIZER_DEFAULT_PITCH
@validator("rate")
def set_rate(cls, rate):
return rate or AZURE_SYNTHESIZER_DEFAULT_RATE
@classmethod
def from_output_device(
cls,
output_device: BaseOutputDevice,
voice_name: str = AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME,
pitch: int = AZURE_SYNTHESIZER_DEFAULT_PITCH,
rate: int = AZURE_SYNTHESIZER_DEFAULT_RATE,
track_bot_sentiment_in_voice: Union[bool, TrackBotSentimentConfig] = False,
voice_name: Optional[str] = None,
pitch: Optional[int] = None,
rate: Optional[int] = None,
):
return cls(
sampling_rate=output_device.sampling_rate,
@ -64,16 +90,33 @@ class AzureSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.AZURE):
voice_name=voice_name,
pitch=pitch,
rate=rate,
track_bot_sentiment_in_voice=track_bot_sentiment_in_voice,
)
pass
@classmethod
def from_telephone_output_device(
cls,
voice_name: Optional[str] = None,
pitch: Optional[int] = None,
rate: Optional[int] = None,
):
return cls(
sampling_rate=DEFAULT_SAMPLING_RATE,
audio_encoding=DEFAULT_AUDIO_ENCODING,
voice_name=voice_name,
pitch=pitch,
rate=rate,
)
class GoogleSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.GOOGLE):
pass
class ElevenLabsSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.ELEVEN_LABS):
api_key: str
voice_id: Optional[str] = None
class RimeSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.RIME):
speaker: str
@ -88,3 +131,14 @@ class RimeSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.RIME):
audio_encoding=output_device.audio_encoding,
speaker=speaker,
)
@classmethod
def from_telephone_output_device(
cls,
speaker: str,
):
return cls(
sampling_rate=DEFAULT_SAMPLING_RATE,
audio_encoding=DEFAULT_AUDIO_ENCODING,
speaker=speaker,
)

View file

@ -1,4 +1,5 @@
from typing import Optional
from vocode.streaming.models.audio_encoding import AudioEncoding
from vocode.streaming.models.model import BaseModel
from vocode.streaming.models.agent import AgentConfig
from vocode.streaming.models.synthesizer import SynthesizerConfig
@ -19,6 +20,7 @@ class CreateInboundCall(BaseModel):
agent_config: AgentConfig
synthesizer_config: Optional[SynthesizerConfig] = None
twilio_sid: str
conversation_id: Optional[str] = None
twilio_config: Optional[TwilioConfig] = None
@ -48,3 +50,11 @@ class DialIntoZoomCall(BaseModel):
synthesizer_config: Optional[SynthesizerConfig] = None
conversation_id: Optional[str] = None
twilio_config: Optional[TwilioConfig] = None
class CallConfig(BaseModel):
transcriber_config: TranscriberConfig
agent_config: AgentConfig
synthesizer_config: SynthesizerConfig
twilio_config: Optional[TwilioConfig]
twilio_sid: str

View file

@ -1,8 +1,11 @@
from enum import Enum
from typing import Optional
from vocode.streaming.input_device.base_input_device import (
BaseInputDevice,
from vocode.streaming.input_device.base_input_device import BaseInputDevice
from vocode.streaming.telephony.constants import (
DEFAULT_AUDIO_ENCODING,
DEFAULT_CHUNK_SIZE,
DEFAULT_SAMPLING_RATE,
)
from .audio_encoding import AudioEncoding
from .model import BaseModel, TypedModel
@ -54,11 +57,25 @@ class TranscriberConfig(TypedModel, type=TranscriberType.BASE):
endpointing_config=endpointing_config,
)
@classmethod
def from_telephone_input_device(
cls,
endpointing_config: Optional[EndpointingConfig] = None,
):
return cls(
sampling_rate=DEFAULT_SAMPLING_RATE,
audio_encoding=DEFAULT_AUDIO_ENCODING,
chunk_size=DEFAULT_CHUNK_SIZE,
endpointing_config=endpointing_config,
)
class DeepgramTranscriberConfig(TranscriberConfig, type=TranscriberType.DEEPGRAM):
model: Optional[str] = None
tier: Optional[str] = None
should_warmup_model: bool = False
version: Optional[str] = None
downsampling: Optional[int] = None
class GoogleTranscriberConfig(TranscriberConfig, type=TranscriberType.GOOGLE):

View file

@ -6,33 +6,40 @@ from .transcriber import TranscriberConfig
from .agent import AgentConfig
from .synthesizer import SynthesizerConfig
class WebSocketMessageType(str, Enum):
BASE = 'websocket_base'
START = 'websocket_start'
AUDIO = 'websocket_audio'
READY = 'websocket_ready'
STOP = 'websocket_stop'
class WebSocketMessage(TypedModel, type=WebSocketMessageType.BASE): pass
class WebSocketMessageType(str, Enum):
BASE = "websocket_base"
START = "websocket_start"
AUDIO = "websocket_audio"
READY = "websocket_ready"
STOP = "websocket_stop"
class WebSocketMessage(TypedModel, type=WebSocketMessageType.BASE):
pass
class AudioMessage(WebSocketMessage, type=WebSocketMessageType.AUDIO):
data: str
@classmethod
def from_bytes(cls, chunk: bytes):
return cls(data=base64.b64encode(chunk).decode('utf-8'))
return cls(data=base64.b64encode(chunk).decode("utf-8"))
def get_bytes(self) -> bytes:
return base64.b64decode(self.data)
class StartMessage(WebSocketMessage, type=WebSocketMessageType.START):
transcriber_config: TranscriberConfig
agent_config: AgentConfig
synthesizer_config: SynthesizerConfig
conversation_id: Optional[str] = None
class ReadyMessage(WebSocketMessage, type=WebSocketMessageType.READY):
pass
class StopMessage(WebSocketMessage, type=WebSocketMessageType.STOP):
pass
pass