open source
This commit is contained in:
parent
70b6e17c69
commit
a93bfc1ec9
61 changed files with 4013 additions and 126 deletions
|
|
@ -42,6 +42,7 @@ class AgentConfig(TypedModel, type=AgentType.BASE):
|
|||
initial_message: Optional[BaseMessage] = None
|
||||
generate_responses: bool = True
|
||||
allowed_idle_time_seconds: Optional[float] = None
|
||||
allow_agent_to_be_cut_off: bool = True
|
||||
end_conversation_on_goodbye: bool = False
|
||||
send_filler_audio: Union[bool, FillerAudioConfig] = False
|
||||
|
||||
|
|
@ -59,6 +60,13 @@ class LLMAgentConfig(AgentConfig, type=AgentType.LLM):
|
|||
cut_off_response: Optional[CutOffResponse] = None
|
||||
|
||||
|
||||
class ChatGPTAlphaAgentConfig(AgentConfig, type=AgentType.CHAT_GPT_ALPHA):
|
||||
prompt_preamble: str
|
||||
expected_first_prompt: Optional[str] = None
|
||||
temperature: float = LLM_AGENT_DEFAULT_TEMPERATURE
|
||||
max_tokens: int = LLM_AGENT_DEFAULT_MAX_TOKENS
|
||||
|
||||
|
||||
class ChatGPTAgentConfig(AgentConfig, type=AgentType.CHAT_GPT):
|
||||
prompt_preamble: str
|
||||
expected_first_prompt: Optional[str] = None
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from enum import Enum
|
||||
|
||||
|
||||
class AudioEncoding(str, Enum):
|
||||
LINEAR16 = "linear16"
|
||||
MULAW = "mulaw"
|
||||
MULAW = "mulaw"
|
||||
|
|
|
|||
|
|
@ -1,17 +1,17 @@
|
|||
import pydantic
|
||||
|
||||
class BaseModel(pydantic.BaseModel):
|
||||
|
||||
class BaseModel(pydantic.BaseModel):
|
||||
def __init__(self, **data):
|
||||
for key, value in data.items():
|
||||
if isinstance(value, dict):
|
||||
if 'type' in value:
|
||||
if "type" in value:
|
||||
data[key] = TypedModel.parse_obj(value)
|
||||
super().__init__(**data)
|
||||
|
||||
|
||||
# Adapted from https://github.com/pydantic/pydantic/discussions/3091
|
||||
class TypedModel(BaseModel):
|
||||
|
||||
_subtypes_ = []
|
||||
|
||||
def __init_subclass__(cls, type=None):
|
||||
|
|
@ -22,31 +22,30 @@ class TypedModel(BaseModel):
|
|||
for t, cls in _cls._subtypes_:
|
||||
if t == type:
|
||||
return cls
|
||||
raise ValueError(f'Unknown type {type}')
|
||||
|
||||
raise ValueError(f"Unknown type {type}")
|
||||
|
||||
@classmethod
|
||||
def get_type(_cls, cls_name):
|
||||
for t, cls in _cls._subtypes_:
|
||||
if cls.__name__ == cls_name:
|
||||
return t
|
||||
raise ValueError(f'Unknown class {cls_name}')
|
||||
|
||||
raise ValueError(f"Unknown class {cls_name}")
|
||||
|
||||
@classmethod
|
||||
def parse_obj(cls, obj):
|
||||
data_type = obj.get('type')
|
||||
data_type = obj.get("type")
|
||||
if data_type is None:
|
||||
raise ValueError(f'type is required for {cls.__name__}')
|
||||
|
||||
raise ValueError(f"type is required for {cls.__name__}")
|
||||
|
||||
sub = cls.get_cls(data_type)
|
||||
if sub is None:
|
||||
raise ValueError(f'Unknown type {data_type}')
|
||||
raise ValueError(f"Unknown type {data_type}")
|
||||
return sub(**obj)
|
||||
|
||||
def _iter(self, **kwargs):
|
||||
yield 'type', self.get_type(self.__class__.__name__)
|
||||
yield "type", self.get_type(self.__class__.__name__)
|
||||
yield from super()._iter(**kwargs)
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return self.get_type(self.__class__.__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,9 +2,14 @@ from enum import Enum
|
|||
from typing import Optional, Union
|
||||
|
||||
from pydantic import BaseModel, validator
|
||||
|
||||
from vocode.streaming.output_device.base_output_device import BaseOutputDevice
|
||||
from vocode.streaming.telephony.constants import (
|
||||
DEFAULT_AUDIO_ENCODING,
|
||||
DEFAULT_SAMPLING_RATE,
|
||||
)
|
||||
from .model import TypedModel
|
||||
from .audio_encoding import AudioEncoding
|
||||
from ..output_device.base_output_device import BaseOutputDevice
|
||||
|
||||
|
||||
class SynthesizerType(str, Enum):
|
||||
|
|
@ -38,6 +43,13 @@ class SynthesizerConfig(TypedModel, type=SynthesizerType.BASE):
|
|||
audio_encoding=output_device.audio_encoding,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_telephone_output_device(cls):
|
||||
return cls(
|
||||
sampling_rate=DEFAULT_SAMPLING_RATE,
|
||||
audio_encoding=DEFAULT_AUDIO_ENCODING,
|
||||
)
|
||||
|
||||
|
||||
AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME = "en-US-AriaNeural"
|
||||
AZURE_SYNTHESIZER_DEFAULT_PITCH = 0
|
||||
|
|
@ -45,18 +57,32 @@ AZURE_SYNTHESIZER_DEFAULT_RATE = 15
|
|||
|
||||
|
||||
class AzureSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.AZURE):
|
||||
voice_name: str = AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME
|
||||
pitch: int = AZURE_SYNTHESIZER_DEFAULT_PITCH
|
||||
rate: int = AZURE_SYNTHESIZER_DEFAULT_RATE
|
||||
voice_name: Optional[str] = AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME
|
||||
pitch: Optional[int] = AZURE_SYNTHESIZER_DEFAULT_PITCH
|
||||
rate: Optional[int] = AZURE_SYNTHESIZER_DEFAULT_RATE
|
||||
|
||||
class Config:
|
||||
validate_assignment = True
|
||||
|
||||
@validator("voice_name")
|
||||
def set_name(cls, voice_name):
|
||||
return voice_name or AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME
|
||||
|
||||
@validator("pitch")
|
||||
def set_pitch(cls, pitch):
|
||||
return pitch or AZURE_SYNTHESIZER_DEFAULT_PITCH
|
||||
|
||||
@validator("rate")
|
||||
def set_rate(cls, rate):
|
||||
return rate or AZURE_SYNTHESIZER_DEFAULT_RATE
|
||||
|
||||
@classmethod
|
||||
def from_output_device(
|
||||
cls,
|
||||
output_device: BaseOutputDevice,
|
||||
voice_name: str = AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME,
|
||||
pitch: int = AZURE_SYNTHESIZER_DEFAULT_PITCH,
|
||||
rate: int = AZURE_SYNTHESIZER_DEFAULT_RATE,
|
||||
track_bot_sentiment_in_voice: Union[bool, TrackBotSentimentConfig] = False,
|
||||
voice_name: Optional[str] = None,
|
||||
pitch: Optional[int] = None,
|
||||
rate: Optional[int] = None,
|
||||
):
|
||||
return cls(
|
||||
sampling_rate=output_device.sampling_rate,
|
||||
|
|
@ -64,16 +90,33 @@ class AzureSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.AZURE):
|
|||
voice_name=voice_name,
|
||||
pitch=pitch,
|
||||
rate=rate,
|
||||
track_bot_sentiment_in_voice=track_bot_sentiment_in_voice,
|
||||
)
|
||||
|
||||
pass
|
||||
@classmethod
|
||||
def from_telephone_output_device(
|
||||
cls,
|
||||
voice_name: Optional[str] = None,
|
||||
pitch: Optional[int] = None,
|
||||
rate: Optional[int] = None,
|
||||
):
|
||||
return cls(
|
||||
sampling_rate=DEFAULT_SAMPLING_RATE,
|
||||
audio_encoding=DEFAULT_AUDIO_ENCODING,
|
||||
voice_name=voice_name,
|
||||
pitch=pitch,
|
||||
rate=rate,
|
||||
)
|
||||
|
||||
|
||||
class GoogleSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.GOOGLE):
|
||||
pass
|
||||
|
||||
|
||||
class ElevenLabsSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.ELEVEN_LABS):
|
||||
api_key: str
|
||||
voice_id: Optional[str] = None
|
||||
|
||||
|
||||
class RimeSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.RIME):
|
||||
speaker: str
|
||||
|
||||
|
|
@ -88,3 +131,14 @@ class RimeSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.RIME):
|
|||
audio_encoding=output_device.audio_encoding,
|
||||
speaker=speaker,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_telephone_output_device(
|
||||
cls,
|
||||
speaker: str,
|
||||
):
|
||||
return cls(
|
||||
sampling_rate=DEFAULT_SAMPLING_RATE,
|
||||
audio_encoding=DEFAULT_AUDIO_ENCODING,
|
||||
speaker=speaker,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
from typing import Optional
|
||||
from vocode.streaming.models.audio_encoding import AudioEncoding
|
||||
from vocode.streaming.models.model import BaseModel
|
||||
from vocode.streaming.models.agent import AgentConfig
|
||||
from vocode.streaming.models.synthesizer import SynthesizerConfig
|
||||
|
|
@ -19,6 +20,7 @@ class CreateInboundCall(BaseModel):
|
|||
agent_config: AgentConfig
|
||||
synthesizer_config: Optional[SynthesizerConfig] = None
|
||||
twilio_sid: str
|
||||
conversation_id: Optional[str] = None
|
||||
twilio_config: Optional[TwilioConfig] = None
|
||||
|
||||
|
||||
|
|
@ -48,3 +50,11 @@ class DialIntoZoomCall(BaseModel):
|
|||
synthesizer_config: Optional[SynthesizerConfig] = None
|
||||
conversation_id: Optional[str] = None
|
||||
twilio_config: Optional[TwilioConfig] = None
|
||||
|
||||
|
||||
class CallConfig(BaseModel):
|
||||
transcriber_config: TranscriberConfig
|
||||
agent_config: AgentConfig
|
||||
synthesizer_config: SynthesizerConfig
|
||||
twilio_config: Optional[TwilioConfig]
|
||||
twilio_sid: str
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
from vocode.streaming.input_device.base_input_device import (
|
||||
BaseInputDevice,
|
||||
from vocode.streaming.input_device.base_input_device import BaseInputDevice
|
||||
from vocode.streaming.telephony.constants import (
|
||||
DEFAULT_AUDIO_ENCODING,
|
||||
DEFAULT_CHUNK_SIZE,
|
||||
DEFAULT_SAMPLING_RATE,
|
||||
)
|
||||
from .audio_encoding import AudioEncoding
|
||||
from .model import BaseModel, TypedModel
|
||||
|
|
@ -54,11 +57,25 @@ class TranscriberConfig(TypedModel, type=TranscriberType.BASE):
|
|||
endpointing_config=endpointing_config,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_telephone_input_device(
|
||||
cls,
|
||||
endpointing_config: Optional[EndpointingConfig] = None,
|
||||
):
|
||||
return cls(
|
||||
sampling_rate=DEFAULT_SAMPLING_RATE,
|
||||
audio_encoding=DEFAULT_AUDIO_ENCODING,
|
||||
chunk_size=DEFAULT_CHUNK_SIZE,
|
||||
endpointing_config=endpointing_config,
|
||||
)
|
||||
|
||||
|
||||
class DeepgramTranscriberConfig(TranscriberConfig, type=TranscriberType.DEEPGRAM):
|
||||
model: Optional[str] = None
|
||||
tier: Optional[str] = None
|
||||
should_warmup_model: bool = False
|
||||
version: Optional[str] = None
|
||||
downsampling: Optional[int] = None
|
||||
|
||||
|
||||
class GoogleTranscriberConfig(TranscriberConfig, type=TranscriberType.GOOGLE):
|
||||
|
|
|
|||
|
|
@ -6,33 +6,40 @@ from .transcriber import TranscriberConfig
|
|||
from .agent import AgentConfig
|
||||
from .synthesizer import SynthesizerConfig
|
||||
|
||||
class WebSocketMessageType(str, Enum):
|
||||
BASE = 'websocket_base'
|
||||
START = 'websocket_start'
|
||||
AUDIO = 'websocket_audio'
|
||||
READY = 'websocket_ready'
|
||||
STOP = 'websocket_stop'
|
||||
|
||||
class WebSocketMessage(TypedModel, type=WebSocketMessageType.BASE): pass
|
||||
class WebSocketMessageType(str, Enum):
|
||||
BASE = "websocket_base"
|
||||
START = "websocket_start"
|
||||
AUDIO = "websocket_audio"
|
||||
READY = "websocket_ready"
|
||||
STOP = "websocket_stop"
|
||||
|
||||
|
||||
class WebSocketMessage(TypedModel, type=WebSocketMessageType.BASE):
|
||||
pass
|
||||
|
||||
|
||||
class AudioMessage(WebSocketMessage, type=WebSocketMessageType.AUDIO):
|
||||
data: str
|
||||
|
||||
@classmethod
|
||||
def from_bytes(cls, chunk: bytes):
|
||||
return cls(data=base64.b64encode(chunk).decode('utf-8'))
|
||||
return cls(data=base64.b64encode(chunk).decode("utf-8"))
|
||||
|
||||
def get_bytes(self) -> bytes:
|
||||
return base64.b64decode(self.data)
|
||||
|
||||
|
||||
class StartMessage(WebSocketMessage, type=WebSocketMessageType.START):
|
||||
transcriber_config: TranscriberConfig
|
||||
agent_config: AgentConfig
|
||||
synthesizer_config: SynthesizerConfig
|
||||
conversation_id: Optional[str] = None
|
||||
|
||||
|
||||
class ReadyMessage(WebSocketMessage, type=WebSocketMessageType.READY):
|
||||
pass
|
||||
|
||||
|
||||
class StopMessage(WebSocketMessage, type=WebSocketMessageType.STOP):
|
||||
pass
|
||||
pass
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue