open source

This commit is contained in:
Ajay Raj 2023-03-28 00:15:34 -07:00
commit a93bfc1ec9
61 changed files with 4013 additions and 126 deletions

View file

@ -0,0 +1,170 @@
from fastapi import WebSocket
import base64
from enum import Enum
import json
import logging
from typing import Optional
from vocode.streaming.agent.base_agent import BaseAgent
from vocode.streaming.factory import (
create_agent,
create_synthesizer,
create_transcriber,
)
from vocode.streaming.streaming_conversation import StreamingConversation
from vocode.streaming.models.telephony import CallConfig, TwilioConfig
from vocode.streaming.output_device.twilio_output_device import TwilioOutputDevice
from vocode.streaming.models.synthesizer import (
AzureSynthesizerConfig,
)
from vocode.streaming.models.transcriber import (
DeepgramTranscriberConfig,
PunctuationEndpointingConfig,
)
from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
from vocode.streaming.synthesizer.base_synthesizer import BaseSynthesizer
from vocode.streaming.telephony.config_manager.base_config_manager import (
BaseConfigManager,
)
from vocode.streaming.telephony.twilio import create_twilio_client
from vocode.streaming.models.audio_encoding import AudioEncoding
from vocode.streaming.streaming_conversation import StreamingConversation
from vocode.streaming.transcriber.base_transcriber import BaseTranscriber
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
class PhoneCallAction(Enum):
CLOSE_WEBSOCKET = 1
class Call(StreamingConversation):
def __init__(
self,
base_url: str,
config_manager: BaseConfigManager,
agent: BaseAgent,
twilio_config: TwilioConfig,
transcriber: Optional[BaseTranscriber] = None,
synthesizer: Optional[BaseSynthesizer] = None,
twilio_sid=None,
conversation_id: Optional[str] = None,
logger: Optional[logging.Logger] = None,
):
self.base_url = base_url
self.config_manager = config_manager
self.output_device = TwilioOutputDevice()
self.twilio_config = twilio_config
self.twilio_client = create_twilio_client(twilio_config)
super().__init__(
self.output_device,
transcriber
or DeepgramTranscriber(
DeepgramTranscriberConfig(
sampling_rate=8000,
audio_encoding=AudioEncoding.MULAW,
chunk_size=self.CHUNK_SIZE,
model="voicemail",
endpointing_config=PunctuationEndpointingConfig(),
),
logger=logger,
),
agent,
synthesizer
or AzureSynthesizer(
AzureSynthesizerConfig(
sampling_rate=8000, audio_encoding=AudioEncoding.MULAW
)
),
conversation_id=conversation_id,
per_chunk_allowance_seconds=0.01,
logger=logger,
)
self.twilio_sid = twilio_sid
self.latest_media_timestamp = 0
@staticmethod
def from_call_config(
base_url: str,
call_config: CallConfig,
config_manager: BaseConfigManager,
conversation_id: str,
logger: logging.Logger,
):
return Call(
base_url=base_url,
logger=logger,
config_manager=config_manager,
agent=create_agent(call_config.agent_config),
transcriber=create_transcriber(call_config.transcriber_config),
synthesizer=create_synthesizer(call_config.synthesizer_config),
twilio_config=call_config.twilio_config,
twilio_sid=call_config.twilio_sid,
conversation_id=conversation_id,
)
async def attach_ws_and_start(self, ws: WebSocket):
self.logger.debug("Trying to attach WS to outbound call")
self.output_device.ws = ws
self.logger.debug("Attached WS to outbound call")
twilio_call = self.twilio_client.calls(self.twilio_sid).fetch()
if twilio_call.answered_by in ("machine_start", "fax"):
self.logger.info(f"Call answered by {twilio_call.answered_by}")
twilio_call.update(status="completed")
else:
await self.wait_for_twilio_start(ws)
await super().start()
while self.active:
message = await ws.receive_text()
response = await self.handle_ws_message(message)
if response == PhoneCallAction.CLOSE_WEBSOCKET:
break
self.tear_down()
async def wait_for_twilio_start(self, ws: WebSocket):
while True:
message = await ws.receive_text()
if not message:
continue
data = json.loads(message)
if data["event"] == "start":
self.logger.debug(
f"Media WS: Received event '{data['event']}': {message}"
)
self.output_device.stream_sid = data["start"]["streamSid"]
break
async def handle_ws_message(self, message) -> PhoneCallAction:
if message is None:
return PhoneCallAction.CLOSE_WEBSOCKET
data = json.loads(message)
if data["event"] == "media":
media = data["media"]
chunk = base64.b64decode(media["payload"])
if self.latest_media_timestamp + 20 < int(media["timestamp"]):
bytes_to_fill = 8 * (
int(media["timestamp"]) - (self.latest_media_timestamp + 20)
)
self.logger.debug(f"Filling {bytes_to_fill} bytes of silence")
# NOTE: 0xff is silence for mulaw audio
self.receive_audio(b"\xff" * bytes_to_fill)
self.latest_media_timestamp = int(media["timestamp"])
self.receive_audio(chunk)
elif data["event"] == "stop":
self.logger.debug(f"Media WS: Received event 'stop': {message}")
self.logger.debug("Stopping...")
return PhoneCallAction.CLOSE_WEBSOCKET
def end_twilio_call(self) -> bool:
response = self.twilio_client.calls(self.twilio_sid).update(status="completed")
return response.status == "completed"
def mark_terminated(self):
super().mark_terminated()
self.end_twilio_call()
self.config_manager.delete_config(self.id)
def tear_down(self):
self.terminate()

View file

@ -0,0 +1,110 @@
import logging
from typing import Optional
from twilio.rest import Client
from vocode.streaming.models.agent import AgentConfig
from vocode.streaming.models.synthesizer import (
AzureSynthesizerConfig,
SynthesizerConfig,
)
from vocode.streaming.models.telephony import CallConfig, TwilioConfig
from vocode.streaming.models.transcriber import (
DeepgramTranscriberConfig,
PunctuationEndpointingConfig,
TranscriberConfig,
)
from vocode.streaming.telephony.config_manager.base_config_manager import (
BaseConfigManager,
)
from vocode.streaming.telephony.constants import (
DEFAULT_AUDIO_ENCODING,
DEFAULT_CHUNK_SIZE,
DEFAULT_SAMPLING_RATE,
)
from vocode.streaming.telephony.twilio import create_twilio_client
from vocode.streaming.utils import create_conversation_id
class OutboundCall:
def __init__(
self,
base_url: str,
to_phone: str,
from_phone: str,
config_manager: BaseConfigManager,
agent_config: AgentConfig,
twilio_config: TwilioConfig,
transcriber_config: Optional[TranscriberConfig] = None,
synthesizer_config: Optional[SynthesizerConfig] = None,
conversation_id: Optional[str] = None,
logger: Optional[logging.Logger] = None,
):
self.base_url = base_url
self.to_phone = to_phone
self.from_phone = from_phone
self.config_manager = config_manager
self.agent_config = agent_config
self.transcriber_config = transcriber_config or DeepgramTranscriberConfig(
sampling_rate=DEFAULT_SAMPLING_RATE,
audio_encoding=DEFAULT_AUDIO_ENCODING,
chunk_size=DEFAULT_CHUNK_SIZE,
model="voicemail",
endpointing_config=PunctuationEndpointingConfig(),
)
self.synthesizer_config = synthesizer_config or AzureSynthesizerConfig(
sampling_rate=DEFAULT_SAMPLING_RATE, audio_encoding=DEFAULT_AUDIO_ENCODING
)
self.conversation_id = conversation_id or create_conversation_id()
self.logger = logger
self.twilio_config = twilio_config
self.twilio_client = create_twilio_client(twilio_config)
self.twilio_sid = None
def create_twilio_call(
self, to_phone: str, from_phone: str, digits: str = ""
) -> str:
twilio_call = self.twilio_client.calls.create(
url=f"https://{self.base_url}/twiml/initiate_call/{self.conversation_id}",
to=to_phone,
from_=from_phone,
send_digits=digits,
)
return twilio_call.sid
def validate_outbound_call(
self,
to_phone: str,
from_phone: str,
mobile_only: bool = True,
):
if len(to_phone) < 8:
raise ValueError("Invalid 'to' phone")
if not mobile_only:
return
line_type_intelligence = (
self.twilio_client.lookups.v2.phone_numbers(to_phone)
.fetch(fields="line_type_intelligence")
.line_type_intelligence
)
if not line_type_intelligence or (
line_type_intelligence and line_type_intelligence["type"] != "mobile"
):
raise ValueError("Can only call mobile phones")
def start(self):
self.logger.debug("Starting outbound call")
self.validate_outbound_call(self.to_phone, self.from_phone)
self.twilio_sid = self.create_twilio_call(self.to_phone, self.from_phone)
call_config = CallConfig(
transcriber_config=self.transcriber_config,
agent_config=self.agent_config,
synthesizer_config=self.synthesizer_config,
twilio_config=self.twilio_config,
twilio_sid=self.twilio_sid,
)
self.config_manager.save_config(self.conversation_id, call_config)
def end(self):
response = self.twilio_client.calls(self.twilio_sid).update(status="completed")
return response.status == "completed"

View file

@ -0,0 +1,73 @@
import logging
from typing import Optional
from twilio.rest import Client
from vocode.streaming.agent.base_agent import BaseAgent
from vocode.streaming.models.agent import AgentConfig
from vocode.streaming.models.synthesizer import SynthesizerConfig
from vocode.streaming.models.telephony import CallConfig, TwilioConfig
from vocode.streaming.models.transcriber import TranscriberConfig
from vocode.streaming.synthesizer.base_synthesizer import BaseSynthesizer
from vocode.streaming.telephony.config_manager.base_config_manager import (
BaseConfigManager,
)
from vocode.streaming.telephony.conversation.outbound_call import OutboundCall
from vocode.streaming.transcriber.base_transcriber import BaseTranscriber
from vocode.streaming.utils import create_conversation_id
class ZoomDialIn(OutboundCall):
def __init__(
self,
base_url: str,
zoom_number: str,
zoom_meeting_id: str,
zoom_meeting_password: Optional[str],
from_phone: str,
config_manager: BaseConfigManager,
twilio_config: TwilioConfig,
agent_config: AgentConfig,
transcriber_config: TranscriberConfig,
synthesizer_config: SynthesizerConfig,
conversation_id: Optional[str] = None,
logger: Optional[logging.Logger] = None,
):
super().__init__(
base_url=base_url,
to_phone=zoom_number,
from_phone=from_phone,
config_manager=config_manager,
transcriber_config=transcriber_config,
agent_config=agent_config,
synthesizer_config=synthesizer_config,
twilio_config=twilio_config,
conversation_id=conversation_id,
logger=logger,
)
self.zoom_number = zoom_number
self.zoom_meeting_id = zoom_meeting_id
self.zoom_meeting_password = zoom_meeting_password
self.from_phone = from_phone
def start(self):
self.validate_outbound_call(
self.zoom_number,
self.from_phone,
mobile_only=False,
)
digits = f"ww{self.zoom_meeting_id}#"
if self.zoom_meeting_password:
digits += f"wwww*{self.zoom_meeting_password}#"
self.logger.debug("Sending digits %s to the call", digits)
twilio_sid = self.create_twilio_call(
self.zoom_number,
self.from_phone,
digits=digits,
)
call_config = CallConfig(
transcriber_config=self.transcriber_config,
agent_config=self.agent_config,
synthesizer_config=self.synthesizer_config,
twilio_config=self.twilio_config,
twilio_sid=twilio_sid,
)
self.config_manager.save_config(self.conversation_id, call_config)