From 1807fbef0d96d5f68895494d17246669c079b993 Mon Sep 17 00:00:00 2001 From: Ajay Raj Date: Sun, 12 Mar 2023 00:29:12 -0800 Subject: [PATCH] add voice configs + transcriber configs --- vocode/input_device/telephone_input.py | 9 ++++++++ vocode/models/agent.py | 9 +++++++- vocode/models/telephony.py | 4 ++++ vocode/output_device/telephone_output.py | 7 ++++++ vocode/telephony/inbound_call_server.py | 16 +++++++++++-- vocode/telephony/outbound_call.py | 29 ++++++++++++++++-------- 6 files changed, 62 insertions(+), 12 deletions(-) create mode 100644 vocode/input_device/telephone_input.py create mode 100644 vocode/output_device/telephone_output.py diff --git a/vocode/input_device/telephone_input.py b/vocode/input_device/telephone_input.py new file mode 100644 index 0000000..f43b094 --- /dev/null +++ b/vocode/input_device/telephone_input.py @@ -0,0 +1,9 @@ +from vocode.input_device.base_input_device import BaseInputDevice +from vocode.models.audio_encoding import AudioEncoding + + +class TelephoneInput(BaseInputDevice): + def __init__(self): + super().__init__( + sampling_rate=8000, audio_encoding=AudioEncoding.MULAW, chunk_size=160 + ) diff --git a/vocode/models/agent.py b/vocode/models/agent.py index b45082d..39d31ee 100644 --- a/vocode/models/agent.py +++ b/vocode/models/agent.py @@ -1,9 +1,11 @@ -from typing import Optional +from typing import Optional, Union from enum import Enum from vocode.models.message import BaseMessage from .model import TypedModel, BaseModel +FILLER_AUDIO_DEFAULT_SILENCE_THRESHOLD_SECONDS = 0.5 + class AgentType(str, Enum): BASE = "agent_base" @@ -16,11 +18,16 @@ class AgentType(str, Enum): WEBSOCKET_USER_IMPLEMENTED = "agent_websocket_user_implemented" +class FillerAudioConfig(BaseModel): + silence_threshold_seconds: float = FILLER_AUDIO_DEFAULT_SILENCE_THRESHOLD_SECONDS + + class AgentConfig(TypedModel, type=AgentType.BASE): initial_message: Optional[BaseMessage] = None generate_responses: bool = True allowed_idle_time_seconds: Optional[float] = None end_conversation_on_goodbye: bool = False + send_filler_audio: Union[bool, FillerAudioConfig] = False class LLMAgentConfig(AgentConfig, type=AgentType.LLM): diff --git a/vocode/models/telephony.py b/vocode/models/telephony.py index 8e24e6a..fbecaec 100644 --- a/vocode/models/telephony.py +++ b/vocode/models/telephony.py @@ -2,6 +2,7 @@ from typing import Optional from vocode.models.model import BaseModel from vocode.models.agent import AgentConfig from vocode.models.synthesizer import SynthesizerConfig +from vocode.models.transcriber import TranscriberConfig class CallEntity(BaseModel): @@ -9,7 +10,9 @@ class CallEntity(BaseModel): class CreateInboundCall(BaseModel): + transcriber_config: Optional[TranscriberConfig] = None agent_config: AgentConfig + synthesizer_config: Optional[SynthesizerConfig] = None twilio_sid: str conversation_id: Optional[str] = None @@ -17,6 +20,7 @@ class CreateInboundCall(BaseModel): class CreateOutboundCall(BaseModel): recipient: CallEntity caller: CallEntity + transcriber_config: Optional[TranscriberConfig] = None agent_config: AgentConfig synthesizer_config: Optional[SynthesizerConfig] = None conversation_id: Optional[str] = None diff --git a/vocode/output_device/telephone_output.py b/vocode/output_device/telephone_output.py new file mode 100644 index 0000000..1fc80a3 --- /dev/null +++ b/vocode/output_device/telephone_output.py @@ -0,0 +1,7 @@ +from .base_output_device import BaseOutputDevice +from ..models.audio_encoding import AudioEncoding + + +class TelephoneOutput(BaseOutputDevice): + def __init__(self): + super().__init__(sampling_rate=8000, audio_encoding=AudioEncoding.MULAW) diff --git a/vocode/telephony/inbound_call_server.py b/vocode/telephony/inbound_call_server.py index b040d1d..6a1e085 100644 --- a/vocode/telephony/inbound_call_server.py +++ b/vocode/telephony/inbound_call_server.py @@ -2,6 +2,9 @@ from fastapi import FastAPI, Response, Form from typing import Optional import requests import uvicorn +from vocode.models.synthesizer import SynthesizerConfig + +from vocode.models.transcriber import TranscriberConfig from .. import api_key, BASE_URL from ..models.agent import AgentConfig @@ -12,9 +15,15 @@ VOCODE_INBOUND_CALL_URL = f"https://{BASE_URL}/create_inbound_call" class InboundCallServer: def __init__( - self, agent_config: AgentConfig, response_on_rate_limit: Optional[str] = None + self, + agent_config: AgentConfig, + transcriber_config: Optional[TranscriberConfig] = None, + synthesizer_config: Optional[SynthesizerConfig] = None, + response_on_rate_limit: Optional[str] = None, ): self.agent_config = agent_config + self.transcriber_config = transcriber_config + self.synthesizer_config = synthesizer_config self.app = FastAPI() self.app.post("/vocode")(self.handle_call) self.response_on_rate_limit = ( @@ -27,7 +36,10 @@ class InboundCallServer: VOCODE_INBOUND_CALL_URL, headers={"Authorization": f"Bearer {api_key}"}, json=CreateInboundCall( - agent_config=self.agent_config, twilio_sid=twilio_sid + agent_config=self.agent_config, + twilio_sid=twilio_sid, + transcriber_config=self.transcriber_config, + synthesizer_config=self.synthesizer_config, ).dict(), ) if response.status_code == 429: diff --git a/vocode/telephony/outbound_call.py b/vocode/telephony/outbound_call.py index 72e72e8..652869a 100644 --- a/vocode/telephony/outbound_call.py +++ b/vocode/telephony/outbound_call.py @@ -1,27 +1,38 @@ +from typing import Optional +from vocode.models.agent import AgentConfig +from vocode.models.synthesizer import SynthesizerConfig +from vocode.models.transcriber import TranscriberConfig from ..models.telephony import CallEntity, CreateOutboundCall import requests from .. import api_key, BASE_URL VOCODE_OUTBOUND_CALL_URL = f"https://{BASE_URL}/create_outbound_call" -class OutboundCall: - def __init__(self, recipient: CallEntity, caller: CallEntity, agent_config): +class OutboundCall: + def __init__( + self, + recipient: CallEntity, + caller: CallEntity, + agent_config: AgentConfig, + transcriber_config: Optional[TranscriberConfig] = None, + synthesizer_config: Optional[SynthesizerConfig] = None, + ): self.recipient = recipient self.caller = caller self.agent_config = agent_config + self.transcriber_config = transcriber_config + self.synthesizer_config = synthesizer_config def start(self): return requests.post( VOCODE_OUTBOUND_CALL_URL, - headers={ - "Authorization": f"Bearer {api_key}" - }, + headers={"Authorization": f"Bearer {api_key}"}, json=CreateOutboundCall( recipient=self.recipient, caller=self.caller, - agent_config=self.agent_config - ).dict() + agent_config=self.agent_config, + transcriber_config=self.transcriber_config, + synthesizer_config=self.synthesizer_config, + ).dict(), ) - - \ No newline at end of file