open source
This commit is contained in:
parent
70b6e17c69
commit
a93bfc1ec9
61 changed files with 4013 additions and 126 deletions
170
vocode/streaming/telephony/conversation/call.py
Normal file
170
vocode/streaming/telephony/conversation/call.py
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
from fastapi import WebSocket
|
||||
import base64
|
||||
from enum import Enum
|
||||
import json
|
||||
import logging
|
||||
from typing import Optional
|
||||
from vocode.streaming.agent.base_agent import BaseAgent
|
||||
from vocode.streaming.factory import (
|
||||
create_agent,
|
||||
create_synthesizer,
|
||||
create_transcriber,
|
||||
)
|
||||
|
||||
from vocode.streaming.streaming_conversation import StreamingConversation
|
||||
from vocode.streaming.models.telephony import CallConfig, TwilioConfig
|
||||
from vocode.streaming.output_device.twilio_output_device import TwilioOutputDevice
|
||||
from vocode.streaming.models.synthesizer import (
|
||||
AzureSynthesizerConfig,
|
||||
)
|
||||
from vocode.streaming.models.transcriber import (
|
||||
DeepgramTranscriberConfig,
|
||||
PunctuationEndpointingConfig,
|
||||
)
|
||||
from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
|
||||
from vocode.streaming.synthesizer.base_synthesizer import BaseSynthesizer
|
||||
from vocode.streaming.telephony.config_manager.base_config_manager import (
|
||||
BaseConfigManager,
|
||||
)
|
||||
from vocode.streaming.telephony.twilio import create_twilio_client
|
||||
from vocode.streaming.models.audio_encoding import AudioEncoding
|
||||
from vocode.streaming.streaming_conversation import StreamingConversation
|
||||
from vocode.streaming.transcriber.base_transcriber import BaseTranscriber
|
||||
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
|
||||
|
||||
|
||||
class PhoneCallAction(Enum):
|
||||
CLOSE_WEBSOCKET = 1
|
||||
|
||||
|
||||
class Call(StreamingConversation):
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str,
|
||||
config_manager: BaseConfigManager,
|
||||
agent: BaseAgent,
|
||||
twilio_config: TwilioConfig,
|
||||
transcriber: Optional[BaseTranscriber] = None,
|
||||
synthesizer: Optional[BaseSynthesizer] = None,
|
||||
twilio_sid=None,
|
||||
conversation_id: Optional[str] = None,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
):
|
||||
self.base_url = base_url
|
||||
self.config_manager = config_manager
|
||||
self.output_device = TwilioOutputDevice()
|
||||
self.twilio_config = twilio_config
|
||||
self.twilio_client = create_twilio_client(twilio_config)
|
||||
super().__init__(
|
||||
self.output_device,
|
||||
transcriber
|
||||
or DeepgramTranscriber(
|
||||
DeepgramTranscriberConfig(
|
||||
sampling_rate=8000,
|
||||
audio_encoding=AudioEncoding.MULAW,
|
||||
chunk_size=self.CHUNK_SIZE,
|
||||
model="voicemail",
|
||||
endpointing_config=PunctuationEndpointingConfig(),
|
||||
),
|
||||
logger=logger,
|
||||
),
|
||||
agent,
|
||||
synthesizer
|
||||
or AzureSynthesizer(
|
||||
AzureSynthesizerConfig(
|
||||
sampling_rate=8000, audio_encoding=AudioEncoding.MULAW
|
||||
)
|
||||
),
|
||||
conversation_id=conversation_id,
|
||||
per_chunk_allowance_seconds=0.01,
|
||||
logger=logger,
|
||||
)
|
||||
self.twilio_sid = twilio_sid
|
||||
self.latest_media_timestamp = 0
|
||||
|
||||
@staticmethod
|
||||
def from_call_config(
|
||||
base_url: str,
|
||||
call_config: CallConfig,
|
||||
config_manager: BaseConfigManager,
|
||||
conversation_id: str,
|
||||
logger: logging.Logger,
|
||||
):
|
||||
return Call(
|
||||
base_url=base_url,
|
||||
logger=logger,
|
||||
config_manager=config_manager,
|
||||
agent=create_agent(call_config.agent_config),
|
||||
transcriber=create_transcriber(call_config.transcriber_config),
|
||||
synthesizer=create_synthesizer(call_config.synthesizer_config),
|
||||
twilio_config=call_config.twilio_config,
|
||||
twilio_sid=call_config.twilio_sid,
|
||||
conversation_id=conversation_id,
|
||||
)
|
||||
|
||||
async def attach_ws_and_start(self, ws: WebSocket):
|
||||
self.logger.debug("Trying to attach WS to outbound call")
|
||||
self.output_device.ws = ws
|
||||
self.logger.debug("Attached WS to outbound call")
|
||||
|
||||
twilio_call = self.twilio_client.calls(self.twilio_sid).fetch()
|
||||
|
||||
if twilio_call.answered_by in ("machine_start", "fax"):
|
||||
self.logger.info(f"Call answered by {twilio_call.answered_by}")
|
||||
twilio_call.update(status="completed")
|
||||
else:
|
||||
await self.wait_for_twilio_start(ws)
|
||||
await super().start()
|
||||
while self.active:
|
||||
message = await ws.receive_text()
|
||||
response = await self.handle_ws_message(message)
|
||||
if response == PhoneCallAction.CLOSE_WEBSOCKET:
|
||||
break
|
||||
self.tear_down()
|
||||
|
||||
async def wait_for_twilio_start(self, ws: WebSocket):
|
||||
while True:
|
||||
message = await ws.receive_text()
|
||||
if not message:
|
||||
continue
|
||||
data = json.loads(message)
|
||||
if data["event"] == "start":
|
||||
self.logger.debug(
|
||||
f"Media WS: Received event '{data['event']}': {message}"
|
||||
)
|
||||
self.output_device.stream_sid = data["start"]["streamSid"]
|
||||
break
|
||||
|
||||
async def handle_ws_message(self, message) -> PhoneCallAction:
|
||||
if message is None:
|
||||
return PhoneCallAction.CLOSE_WEBSOCKET
|
||||
|
||||
data = json.loads(message)
|
||||
if data["event"] == "media":
|
||||
media = data["media"]
|
||||
chunk = base64.b64decode(media["payload"])
|
||||
if self.latest_media_timestamp + 20 < int(media["timestamp"]):
|
||||
bytes_to_fill = 8 * (
|
||||
int(media["timestamp"]) - (self.latest_media_timestamp + 20)
|
||||
)
|
||||
self.logger.debug(f"Filling {bytes_to_fill} bytes of silence")
|
||||
# NOTE: 0xff is silence for mulaw audio
|
||||
self.receive_audio(b"\xff" * bytes_to_fill)
|
||||
self.latest_media_timestamp = int(media["timestamp"])
|
||||
self.receive_audio(chunk)
|
||||
elif data["event"] == "stop":
|
||||
self.logger.debug(f"Media WS: Received event 'stop': {message}")
|
||||
self.logger.debug("Stopping...")
|
||||
return PhoneCallAction.CLOSE_WEBSOCKET
|
||||
|
||||
def end_twilio_call(self) -> bool:
|
||||
response = self.twilio_client.calls(self.twilio_sid).update(status="completed")
|
||||
return response.status == "completed"
|
||||
|
||||
def mark_terminated(self):
|
||||
super().mark_terminated()
|
||||
self.end_twilio_call()
|
||||
self.config_manager.delete_config(self.id)
|
||||
|
||||
def tear_down(self):
|
||||
self.terminate()
|
||||
110
vocode/streaming/telephony/conversation/outbound_call.py
Normal file
110
vocode/streaming/telephony/conversation/outbound_call.py
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
import logging
|
||||
from typing import Optional
|
||||
from twilio.rest import Client
|
||||
|
||||
from vocode.streaming.models.agent import AgentConfig
|
||||
from vocode.streaming.models.synthesizer import (
|
||||
AzureSynthesizerConfig,
|
||||
SynthesizerConfig,
|
||||
)
|
||||
from vocode.streaming.models.telephony import CallConfig, TwilioConfig
|
||||
from vocode.streaming.models.transcriber import (
|
||||
DeepgramTranscriberConfig,
|
||||
PunctuationEndpointingConfig,
|
||||
TranscriberConfig,
|
||||
)
|
||||
from vocode.streaming.telephony.config_manager.base_config_manager import (
|
||||
BaseConfigManager,
|
||||
)
|
||||
from vocode.streaming.telephony.constants import (
|
||||
DEFAULT_AUDIO_ENCODING,
|
||||
DEFAULT_CHUNK_SIZE,
|
||||
DEFAULT_SAMPLING_RATE,
|
||||
)
|
||||
from vocode.streaming.telephony.twilio import create_twilio_client
|
||||
from vocode.streaming.utils import create_conversation_id
|
||||
|
||||
|
||||
class OutboundCall:
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str,
|
||||
to_phone: str,
|
||||
from_phone: str,
|
||||
config_manager: BaseConfigManager,
|
||||
agent_config: AgentConfig,
|
||||
twilio_config: TwilioConfig,
|
||||
transcriber_config: Optional[TranscriberConfig] = None,
|
||||
synthesizer_config: Optional[SynthesizerConfig] = None,
|
||||
conversation_id: Optional[str] = None,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
):
|
||||
self.base_url = base_url
|
||||
self.to_phone = to_phone
|
||||
self.from_phone = from_phone
|
||||
self.config_manager = config_manager
|
||||
self.agent_config = agent_config
|
||||
self.transcriber_config = transcriber_config or DeepgramTranscriberConfig(
|
||||
sampling_rate=DEFAULT_SAMPLING_RATE,
|
||||
audio_encoding=DEFAULT_AUDIO_ENCODING,
|
||||
chunk_size=DEFAULT_CHUNK_SIZE,
|
||||
model="voicemail",
|
||||
endpointing_config=PunctuationEndpointingConfig(),
|
||||
)
|
||||
self.synthesizer_config = synthesizer_config or AzureSynthesizerConfig(
|
||||
sampling_rate=DEFAULT_SAMPLING_RATE, audio_encoding=DEFAULT_AUDIO_ENCODING
|
||||
)
|
||||
self.conversation_id = conversation_id or create_conversation_id()
|
||||
self.logger = logger
|
||||
self.twilio_config = twilio_config
|
||||
self.twilio_client = create_twilio_client(twilio_config)
|
||||
self.twilio_sid = None
|
||||
|
||||
def create_twilio_call(
|
||||
self, to_phone: str, from_phone: str, digits: str = ""
|
||||
) -> str:
|
||||
twilio_call = self.twilio_client.calls.create(
|
||||
url=f"https://{self.base_url}/twiml/initiate_call/{self.conversation_id}",
|
||||
to=to_phone,
|
||||
from_=from_phone,
|
||||
send_digits=digits,
|
||||
)
|
||||
return twilio_call.sid
|
||||
|
||||
def validate_outbound_call(
|
||||
self,
|
||||
to_phone: str,
|
||||
from_phone: str,
|
||||
mobile_only: bool = True,
|
||||
):
|
||||
if len(to_phone) < 8:
|
||||
raise ValueError("Invalid 'to' phone")
|
||||
|
||||
if not mobile_only:
|
||||
return
|
||||
line_type_intelligence = (
|
||||
self.twilio_client.lookups.v2.phone_numbers(to_phone)
|
||||
.fetch(fields="line_type_intelligence")
|
||||
.line_type_intelligence
|
||||
)
|
||||
if not line_type_intelligence or (
|
||||
line_type_intelligence and line_type_intelligence["type"] != "mobile"
|
||||
):
|
||||
raise ValueError("Can only call mobile phones")
|
||||
|
||||
def start(self):
|
||||
self.logger.debug("Starting outbound call")
|
||||
self.validate_outbound_call(self.to_phone, self.from_phone)
|
||||
self.twilio_sid = self.create_twilio_call(self.to_phone, self.from_phone)
|
||||
call_config = CallConfig(
|
||||
transcriber_config=self.transcriber_config,
|
||||
agent_config=self.agent_config,
|
||||
synthesizer_config=self.synthesizer_config,
|
||||
twilio_config=self.twilio_config,
|
||||
twilio_sid=self.twilio_sid,
|
||||
)
|
||||
self.config_manager.save_config(self.conversation_id, call_config)
|
||||
|
||||
def end(self):
|
||||
response = self.twilio_client.calls(self.twilio_sid).update(status="completed")
|
||||
return response.status == "completed"
|
||||
73
vocode/streaming/telephony/conversation/zoom_dial_in.py
Normal file
73
vocode/streaming/telephony/conversation/zoom_dial_in.py
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
import logging
|
||||
from typing import Optional
|
||||
from twilio.rest import Client
|
||||
from vocode.streaming.agent.base_agent import BaseAgent
|
||||
from vocode.streaming.models.agent import AgentConfig
|
||||
from vocode.streaming.models.synthesizer import SynthesizerConfig
|
||||
from vocode.streaming.models.telephony import CallConfig, TwilioConfig
|
||||
from vocode.streaming.models.transcriber import TranscriberConfig
|
||||
from vocode.streaming.synthesizer.base_synthesizer import BaseSynthesizer
|
||||
from vocode.streaming.telephony.config_manager.base_config_manager import (
|
||||
BaseConfigManager,
|
||||
)
|
||||
from vocode.streaming.telephony.conversation.outbound_call import OutboundCall
|
||||
from vocode.streaming.transcriber.base_transcriber import BaseTranscriber
|
||||
from vocode.streaming.utils import create_conversation_id
|
||||
|
||||
|
||||
class ZoomDialIn(OutboundCall):
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str,
|
||||
zoom_number: str,
|
||||
zoom_meeting_id: str,
|
||||
zoom_meeting_password: Optional[str],
|
||||
from_phone: str,
|
||||
config_manager: BaseConfigManager,
|
||||
twilio_config: TwilioConfig,
|
||||
agent_config: AgentConfig,
|
||||
transcriber_config: TranscriberConfig,
|
||||
synthesizer_config: SynthesizerConfig,
|
||||
conversation_id: Optional[str] = None,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
):
|
||||
super().__init__(
|
||||
base_url=base_url,
|
||||
to_phone=zoom_number,
|
||||
from_phone=from_phone,
|
||||
config_manager=config_manager,
|
||||
transcriber_config=transcriber_config,
|
||||
agent_config=agent_config,
|
||||
synthesizer_config=synthesizer_config,
|
||||
twilio_config=twilio_config,
|
||||
conversation_id=conversation_id,
|
||||
logger=logger,
|
||||
)
|
||||
self.zoom_number = zoom_number
|
||||
self.zoom_meeting_id = zoom_meeting_id
|
||||
self.zoom_meeting_password = zoom_meeting_password
|
||||
self.from_phone = from_phone
|
||||
|
||||
def start(self):
|
||||
self.validate_outbound_call(
|
||||
self.zoom_number,
|
||||
self.from_phone,
|
||||
mobile_only=False,
|
||||
)
|
||||
digits = f"ww{self.zoom_meeting_id}#"
|
||||
if self.zoom_meeting_password:
|
||||
digits += f"wwww*{self.zoom_meeting_password}#"
|
||||
self.logger.debug("Sending digits %s to the call", digits)
|
||||
twilio_sid = self.create_twilio_call(
|
||||
self.zoom_number,
|
||||
self.from_phone,
|
||||
digits=digits,
|
||||
)
|
||||
call_config = CallConfig(
|
||||
transcriber_config=self.transcriber_config,
|
||||
agent_config=self.agent_config,
|
||||
synthesizer_config=self.synthesizer_config,
|
||||
twilio_config=self.twilio_config,
|
||||
twilio_sid=twilio_sid,
|
||||
)
|
||||
self.config_manager.save_config(self.conversation_id, call_config)
|
||||
Loading…
Add table
Add a link
Reference in a new issue