first pass at turn based conversation

This commit is contained in:
Ajay Raj 2023-03-20 15:37:23 -07:00
commit 518a0f2b53
40 changed files with 503 additions and 99 deletions

View file

@ -0,0 +1,16 @@
from vocode.streaming.models.audio_encoding import AudioEncoding
import queue
from typing import Optional
class BaseInputDevice:
def __init__(
self, sampling_rate: int, audio_encoding: AudioEncoding, chunk_size: int
):
self.sampling_rate = sampling_rate
self.audio_encoding = audio_encoding
self.chunk_size = chunk_size
self.queue = queue.Queue()
def get_audio(self) -> Optional[bytes]:
raise NotImplementedError

View file

@ -0,0 +1,51 @@
import sounddevice as sd
import numpy as np
from typing import Optional
import queue
import wave
from vocode.streaming.input_device.base_input_device import BaseInputDevice
from vocode.streaming.models.audio_encoding import AudioEncoding
class MicrophoneInput(BaseInputDevice):
DEFAULT_SAMPLING_RATE = 44100
DEFAULT_CHUNK_SIZE = 2048
def __init__(
self,
device_info: dict,
sampling_rate: int = None,
chunk_size: int = DEFAULT_CHUNK_SIZE,
microphone_gain: int = 1,
):
self.device_info = device_info
sampling_rate = sampling_rate or (
self.device_info.get("default_samplerate", self.DEFAULT_SAMPLING_RATE)
)
super().__init__(int(sampling_rate), AudioEncoding.LINEAR16, chunk_size)
self.stream = sd.InputStream(
dtype=np.int16,
channels=1,
samplerate=self.sampling_rate,
blocksize=self.chunk_size,
device=int(self.device_info["index"]),
callback=self._stream_callback,
)
self.stream.start()
self.queue = queue.Queue()
self.microphone_gain = microphone_gain
def _stream_callback(self, in_data: np.ndarray[np.int16], *_args):
if self.microphone_gain > 1:
in_data = in_data * (2 ^ self.microphone_gain)
else:
in_data = in_data // (2 ^ self.microphone_gain)
audio_bytes = in_data.tobytes()
self.queue.put_nowait(audio_bytes)
def get_audio(self) -> Optional[bytes]:
try:
return self.queue.get_nowait()
except queue.Empty:
return None

View file

@ -0,0 +1,11 @@
from vocode.streaming.input_device.base_input_device import (
BaseInputDevice,
)
from vocode.streaming.models.audio_encoding import AudioEncoding
class TelephoneInput(BaseInputDevice):
def __init__(self):
super().__init__(
sampling_rate=8000, audio_encoding=AudioEncoding.MULAW, chunk_size=160
)

View file

@ -0,0 +1,181 @@
from typing import Optional, Union
from enum import Enum
from pydantic import validator
from vocode.streaming.models.message import BaseMessage
from .model import TypedModel, BaseModel
FILLER_AUDIO_DEFAULT_SILENCE_THRESHOLD_SECONDS = 0.5
LLM_AGENT_DEFAULT_TEMPERATURE = 1.0
LLM_AGENT_DEFAULT_MAX_TOKENS = 256
LLM_AGENT_DEFAULT_MODEL_NAME = "text-curie-001"
CHAT_GPT_AGENT_DEFAULT_MODEL_NAME = "gpt-3.5-turbo"
class AgentType(str, Enum):
BASE = "agent_base"
LLM = "agent_llm"
CHAT_GPT_ALPHA = "agent_chat_gpt_alpha"
CHAT_GPT = "agent_chat_gpt"
ECHO = "agent_echo"
INFORMATION_RETRIEVAL = "agent_information_retrieval"
RESTFUL_USER_IMPLEMENTED = "agent_restful_user_implemented"
WEBSOCKET_USER_IMPLEMENTED = "agent_websocket_user_implemented"
class FillerAudioConfig(BaseModel):
silence_threshold_seconds: float = FILLER_AUDIO_DEFAULT_SILENCE_THRESHOLD_SECONDS
use_phrases: bool = True
use_typing_noise: bool = False
@validator("use_typing_noise")
def typing_noise_excludes_phrases(cls, v, values):
if v and values.get("use_phrases"):
values["use_phrases"] = False
if not v and not values.get("use_phrases"):
raise ValueError("must use either typing noise or phrases for filler audio")
return v
class AgentConfig(TypedModel, type=AgentType.BASE):
initial_message: Optional[BaseMessage] = None
generate_responses: bool = True
allowed_idle_time_seconds: Optional[float] = None
end_conversation_on_goodbye: bool = False
send_filler_audio: Union[bool, FillerAudioConfig] = False
class CutOffResponse(BaseModel):
messages: list[BaseMessage] = [BaseMessage(text="Sorry?")]
class LLMAgentConfig(AgentConfig, type=AgentType.LLM):
prompt_preamble: str
expected_first_prompt: Optional[str] = None
model_name: str = LLM_AGENT_DEFAULT_MODEL_NAME
temperature: float = LLM_AGENT_DEFAULT_TEMPERATURE
max_tokens: int = LLM_AGENT_DEFAULT_MAX_TOKENS
cut_off_response: Optional[CutOffResponse] = None
class ChatGPTAgentConfig(AgentConfig, type=AgentType.CHAT_GPT):
prompt_preamble: str
expected_first_prompt: Optional[str] = None
generate_responses: bool = False
model_name: str = CHAT_GPT_AGENT_DEFAULT_MODEL_NAME
temperature: float = LLM_AGENT_DEFAULT_TEMPERATURE
max_tokens: int = LLM_AGENT_DEFAULT_MAX_TOKENS
cut_off_response: Optional[CutOffResponse] = None
class InformationRetrievalAgentConfig(
AgentConfig, type=AgentType.INFORMATION_RETRIEVAL
):
recipient_descriptor: str
caller_descriptor: str
goal_description: str
fields: list[str]
# TODO: add fields for IVR, voicemail
class EchoAgentConfig(AgentConfig, type=AgentType.ECHO):
pass
class RESTfulUserImplementedAgentConfig(
AgentConfig, type=AgentType.RESTFUL_USER_IMPLEMENTED
):
class EndpointConfig(BaseModel):
url: str
method: str = "POST"
respond: EndpointConfig
generate_responses: bool = False
# generate_response: Optional[EndpointConfig]
# update_last_bot_message_on_cut_off: Optional[EndpointConfig]
class RESTfulAgentInput(BaseModel):
conversation_id: str
human_input: str
class RESTfulAgentOutputType(str, Enum):
BASE = "restful_agent_base"
TEXT = "restful_agent_text"
END = "restful_agent_end"
class RESTfulAgentOutput(TypedModel, type=RESTfulAgentOutputType.BASE):
pass
class RESTfulAgentText(RESTfulAgentOutput, type=RESTfulAgentOutputType.TEXT):
response: str
class RESTfulAgentEnd(RESTfulAgentOutput, type=RESTfulAgentOutputType.END):
pass
class WebSocketUserImplementedAgentConfig(
AgentConfig, type=AgentType.WEBSOCKET_USER_IMPLEMENTED
):
class RouteConfig(BaseModel):
url: str
respond: RouteConfig
generate_responses: bool = False
# generate_response: Optional[RouteConfig]
# send_message_on_cut_off: bool = False
class WebSocketAgentMessageType(str, Enum):
BASE = "websocket_agent_base"
START = "websocket_agent_start"
TEXT = "websocket_agent_text"
TEXT_END = "websocket_agent_text_end"
READY = "websocket_agent_ready"
STOP = "websocket_agent_stop"
class WebSocketAgentMessage(TypedModel, type=WebSocketAgentMessageType.BASE):
conversation_id: Optional[str] = None
class WebSocketAgentTextMessage(
WebSocketAgentMessage, type=WebSocketAgentMessageType.TEXT
):
class Payload(BaseModel):
text: str
data: Payload
@classmethod
def from_text(cls, text: str, conversation_id: Optional[str] = None):
return cls(data=cls.Payload(text=text), conversation_id=conversation_id)
class WebSocketAgentStartMessage(
WebSocketAgentMessage, type=WebSocketAgentMessageType.START
):
pass
class WebSocketAgentReadyMessage(
WebSocketAgentMessage, type=WebSocketAgentMessageType.READY
):
pass
class WebSocketAgentStopMessage(
WebSocketAgentMessage, type=WebSocketAgentMessageType.STOP
):
pass
class WebSocketAgentTextEndMessage(
WebSocketAgentMessage, type=WebSocketAgentMessageType.TEXT_END
):
pass

View file

@ -0,0 +1,5 @@
from enum import Enum
class AudioEncoding(str, Enum):
LINEAR16 = "linear16"
MULAW = "mulaw"

View file

@ -0,0 +1,16 @@
from enum import Enum
from .model import TypedModel
from enum import Enum
class MessageType(str, Enum):
BASE = "message_base"
SSML = "message_ssml"
class BaseMessage(TypedModel, type=MessageType.BASE):
text: str
class SSMLMessage(BaseMessage, type=MessageType.SSML):
ssml: str

View file

@ -0,0 +1,52 @@
import pydantic
class BaseModel(pydantic.BaseModel):
def __init__(self, **data):
for key, value in data.items():
if isinstance(value, dict):
if 'type' in value:
data[key] = TypedModel.parse_obj(value)
super().__init__(**data)
# Adapted from https://github.com/pydantic/pydantic/discussions/3091
class TypedModel(BaseModel):
_subtypes_ = []
def __init_subclass__(cls, type=None):
cls._subtypes_.append([type, cls])
@classmethod
def get_cls(_cls, type):
for t, cls in _cls._subtypes_:
if t == type:
return cls
raise ValueError(f'Unknown type {type}')
@classmethod
def get_type(_cls, cls_name):
for t, cls in _cls._subtypes_:
if cls.__name__ == cls_name:
return t
raise ValueError(f'Unknown class {cls_name}')
@classmethod
def parse_obj(cls, obj):
data_type = obj.get('type')
if data_type is None:
raise ValueError(f'type is required for {cls.__name__}')
sub = cls.get_cls(data_type)
if sub is None:
raise ValueError(f'Unknown type {data_type}')
return sub(**obj)
def _iter(self, **kwargs):
yield 'type', self.get_type(self.__class__.__name__)
yield from super()._iter(**kwargs)
@property
def type(self):
return self.get_type(self.__class__.__name__)

View file

@ -0,0 +1,73 @@
from enum import Enum
from typing import Optional, Union
from pydantic import BaseModel, validator
from .model import TypedModel
from .audio_encoding import AudioEncoding
from ..output_device.base_output_device import BaseOutputDevice
class SynthesizerType(str, Enum):
BASE = "synthesizer_base"
AZURE = "synthesizer_azure"
GOOGLE = "synthesizer_google"
ELEVEN_LABS = "synthesizer_eleven_labs"
class TrackBotSentimentConfig(BaseModel):
emotions: list[str] = ["angry", "friendly", "sad", "whispering"]
@validator("emotions")
def emotions_must_not_be_empty(cls, v):
if len(v) == 0:
raise ValueError("must have at least one emotion")
return v
class SynthesizerConfig(TypedModel, type=SynthesizerType.BASE):
sampling_rate: int
audio_encoding: AudioEncoding
should_encode_as_wav: bool = False
track_bot_sentiment_in_voice: Union[bool, TrackBotSentimentConfig] = False
@classmethod
def from_output_device(cls, output_device: BaseOutputDevice):
return cls(
sampling_rate=output_device.sampling_rate,
audio_encoding=output_device.audio_encoding,
)
AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME = "en-US-AriaNeural"
AZURE_SYNTHESIZER_DEFAULT_PITCH = 0
AZURE_SYNTHESIZER_DEFAULT_RATE = 15
class AzureSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.AZURE):
voice_name: str = AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME
pitch: int = AZURE_SYNTHESIZER_DEFAULT_PITCH
rate: int = AZURE_SYNTHESIZER_DEFAULT_RATE
@classmethod
def from_output_device(
cls,
output_device: BaseOutputDevice,
voice_name: str = AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME,
pitch: int = AZURE_SYNTHESIZER_DEFAULT_PITCH,
rate: int = AZURE_SYNTHESIZER_DEFAULT_RATE,
track_bot_sentiment_in_voice: Union[bool, TrackBotSentimentConfig] = False,
):
return cls(
sampling_rate=output_device.sampling_rate,
audio_encoding=output_device.audio_encoding,
voice_name=voice_name,
pitch=pitch,
rate=rate,
track_bot_sentiment_in_voice=track_bot_sentiment_in_voice,
)
pass
class GoogleSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.GOOGLE):
pass

View file

@ -0,0 +1,50 @@
from typing import Optional
from vocode.streaming.models.model import BaseModel
from vocode.streaming.models.agent import AgentConfig
from vocode.streaming.models.synthesizer import SynthesizerConfig
from vocode.streaming.models.transcriber import TranscriberConfig
class TwilioConfig(BaseModel):
account_sid: str
auth_token: str
class CallEntity(BaseModel):
phone_number: str
class CreateInboundCall(BaseModel):
transcriber_config: Optional[TranscriberConfig] = None
agent_config: AgentConfig
synthesizer_config: Optional[SynthesizerConfig] = None
twilio_sid: str
twilio_config: Optional[TwilioConfig] = None
class EndOutboundCall(BaseModel):
call_id: str
twilio_config: Optional[TwilioConfig] = None
class CreateOutboundCall(BaseModel):
recipient: CallEntity
caller: CallEntity
transcriber_config: Optional[TranscriberConfig] = None
agent_config: AgentConfig
synthesizer_config: Optional[SynthesizerConfig] = None
conversation_id: Optional[str] = None
twilio_config: Optional[TwilioConfig] = None
# TODO add IVR/etc.
class DialIntoZoomCall(BaseModel):
recipient: CallEntity
caller: CallEntity
zoom_meeting_id: str
zoom_meeting_password: Optional[str]
transcriber_config: Optional[TranscriberConfig] = None
agent_config: AgentConfig
synthesizer_config: Optional[SynthesizerConfig] = None
conversation_id: Optional[str] = None
twilio_config: Optional[TwilioConfig] = None

View file

@ -0,0 +1,70 @@
from enum import Enum
from typing import Optional
from vocode.streaming.input_device.base_input_device import (
BaseInputDevice,
)
from .audio_encoding import AudioEncoding
from .model import BaseModel, TypedModel
class TranscriberType(str, Enum):
BASE = "transcriber_base"
DEEPGRAM = "transcriber_deepgram"
GOOGLE = "transcriber_google"
ASSEMBLY_AI = "transcriber_assembly_ai"
class EndpointingType(str, Enum):
BASE = "endpointing_base"
TIME_BASED = "endpointing_time_based"
PUNCTUATION_BASED = "endpointing_punctuation_based"
class EndpointingConfig(TypedModel, type=EndpointingType.BASE):
pass
class TimeEndpointingConfig(EndpointingConfig, type=EndpointingType.TIME_BASED):
time_cutoff_seconds: float = 0.4
class PunctuationEndpointingConfig(
EndpointingConfig, type=EndpointingType.PUNCTUATION_BASED
):
time_cutoff_seconds: float = 0.4
class TranscriberConfig(TypedModel, type=TranscriberType.BASE):
sampling_rate: int
audio_encoding: AudioEncoding
chunk_size: int
endpointing_config: Optional[EndpointingConfig] = None
@classmethod
def from_input_device(
cls,
input_device: BaseInputDevice,
endpointing_config: Optional[EndpointingConfig] = None,
):
return cls(
sampling_rate=input_device.sampling_rate,
audio_encoding=input_device.audio_encoding,
chunk_size=input_device.chunk_size,
endpointing_config=endpointing_config,
)
class DeepgramTranscriberConfig(TranscriberConfig, type=TranscriberType.DEEPGRAM):
model: Optional[str] = None
should_warmup_model: bool = False
version: Optional[str] = None
class GoogleTranscriberConfig(TranscriberConfig, type=TranscriberType.GOOGLE):
model: Optional[str] = None
should_warmup_model: bool = False
class AssemblyAITranscriberConfig(TranscriberConfig, type=TranscriberType.ASSEMBLY_AI):
should_warmup_model: bool = False

View file

@ -0,0 +1,38 @@
import base64
from enum import Enum
from typing import Optional
from .model import TypedModel
from .transcriber import TranscriberConfig
from .agent import AgentConfig
from .synthesizer import SynthesizerConfig
class WebSocketMessageType(str, Enum):
BASE = 'websocket_base'
START = 'websocket_start'
AUDIO = 'websocket_audio'
READY = 'websocket_ready'
STOP = 'websocket_stop'
class WebSocketMessage(TypedModel, type=WebSocketMessageType.BASE): pass
class AudioMessage(WebSocketMessage, type=WebSocketMessageType.AUDIO):
data: str
@classmethod
def from_bytes(cls, chunk: bytes):
return cls(data=base64.b64encode(chunk).decode('utf-8'))
def get_bytes(self) -> bytes:
return base64.b64decode(self.data)
class StartMessage(WebSocketMessage, type=WebSocketMessageType.START):
transcriber_config: TranscriberConfig
agent_config: AgentConfig
synthesizer_config: SynthesizerConfig
conversation_id: Optional[str] = None
class ReadyMessage(WebSocketMessage, type=WebSocketMessageType.READY):
pass
class StopMessage(WebSocketMessage, type=WebSocketMessageType.STOP):
pass

View file

@ -0,0 +1,13 @@
from vocode.streaming.models.audio_encoding import AudioEncoding
class BaseOutputDevice:
def __init__(self, sampling_rate: int, audio_encoding: AudioEncoding):
self.sampling_rate = sampling_rate
self.audio_encoding = audio_encoding
async def send_async(self, chunk):
raise NotImplemented
async def maybe_send_mark_async(self, message):
pass

View file

@ -0,0 +1,34 @@
import sounddevice as sd
import numpy as np
from .base_output_device import BaseOutputDevice
from vocode.streaming.models.audio_encoding import AudioEncoding
class SpeakerOutput(BaseOutputDevice):
DEFAULT_SAMPLING_RATE = 44100
def __init__(
self,
device_info: dict,
sampling_rate: int = None,
audio_encoding: AudioEncoding = AudioEncoding.LINEAR16,
):
self.device_info = device_info
sampling_rate = sampling_rate or int(
self.device_info.get("default_samplerate", self.DEFAULT_SAMPLING_RATE)
)
super().__init__(sampling_rate, audio_encoding)
self.stream = sd.OutputStream(
channels=1,
samplerate=self.sampling_rate,
dtype=np.int16,
device=int(self.device_info["index"]),
)
self.stream.start()
async def send_async(self, chunk):
self.stream.write(np.frombuffer(chunk, dtype=np.int16))
def terminate(self):
self.stream.close()

View file

@ -0,0 +1,7 @@
from .base_output_device import BaseOutputDevice
from vocode.streaming.models.audio_encoding import AudioEncoding
class TelephoneOutput(BaseOutputDevice):
def __init__(self):
super().__init__(sampling_rate=8000, audio_encoding=AudioEncoding.MULAW)

View file

@ -0,0 +1,106 @@
import websockets
from websockets.exceptions import ConnectionClosedOK
from websockets.client import WebSocketClientProtocol
import asyncio
from dotenv import load_dotenv
import os
import logging
import threading
import queue
import vocode
from vocode.streaming.input_device.base_input_device import (
BaseInputDevice,
)
from vocode.streaming.output_device.base_output_device import BaseOutputDevice
from vocode.streaming.models.transcriber import TranscriberConfig
from vocode.streaming.models.agent import AgentConfig
from vocode.streaming.models.synthesizer import SynthesizerConfig
from vocode.streaming.models.websocket import (
ReadyMessage,
AudioMessage,
StartMessage,
StopMessage,
)
load_dotenv()
class StreamingConversation:
def __init__(
self,
input_device: BaseInputDevice,
output_device: BaseOutputDevice,
transcriber_config: TranscriberConfig,
agent_config: AgentConfig,
synthesizer_config: SynthesizerConfig,
id: str = None,
):
self.id = id
self.input_device = input_device
self.output_device = output_device
self.transcriber_config = transcriber_config
self.agent_config = agent_config
self.synthesizer_config = synthesizer_config
self.logger = logging.getLogger(__name__)
self.receiver_ready = False
self.active = True
self.output_loop = asyncio.new_event_loop()
self.output_audio_queue = queue.Queue()
self.vocode_websocket_url = f"wss://{vocode.base_url}/conversation"
async def wait_for_ready(self):
while not self.receiver_ready:
await asyncio.sleep(0.1)
return True
def deactivate(self):
self.active = False
def play_audio(self):
async def run():
while self.active:
try:
audio = self.output_audio_queue.get(timeout=5)
await self.output_device.send_async(audio)
except queue.Empty:
continue
loop = asyncio.new_event_loop()
loop.run_until_complete(run())
async def start(self):
async with websockets.connect(
f"{self.vocode_websocket_url}?key={vocode.api_key}"
) as ws:
async def sender(ws: WebSocketClientProtocol):
start_message = StartMessage(
transcriber_config=self.transcriber_config,
agent_config=self.agent_config,
synthesizer_config=self.synthesizer_config,
conversation_id=self.id,
)
await ws.send(start_message.json())
await self.wait_for_ready()
self.logger.info("Listening...press Ctrl+C to stop")
while self.active:
data = self.input_device.get_audio()
if data:
try:
await ws.send(AudioMessage.from_bytes(data).json())
except ConnectionClosedOK:
self.deactivate()
return
await asyncio.sleep(0)
await ws.send(StopMessage().json())
async def receiver(ws: WebSocketClientProtocol):
ReadyMessage.parse_raw(await ws.recv())
self.receiver_ready = True
async for msg in ws:
audio_message = AudioMessage.parse_raw(msg)
self.output_audio_queue.put_nowait(audio_message.get_bytes())
output_thread = threading.Thread(target=self.play_audio)
output_thread.start()
return await asyncio.gather(sender(ws), receiver(ws))

View file

@ -0,0 +1,62 @@
from fastapi import FastAPI, Response, Form
from typing import Optional
import requests
import uvicorn
import vocode
from vocode.streaming.models.transcriber import TranscriberConfig
from vocode.streaming.models.synthesizer import SynthesizerConfig
from vocode.streaming.models.agent import AgentConfig
from vocode.streaming.models.telephony import (
CreateInboundCall,
TwilioConfig,
TwilioConfig,
)
class InboundCallServer:
def __init__(
self,
agent_config: AgentConfig,
transcriber_config: Optional[TranscriberConfig] = None,
synthesizer_config: Optional[SynthesizerConfig] = None,
response_on_rate_limit: Optional[str] = None,
twilio_config: Optional[TwilioConfig] = None,
):
self.agent_config = agent_config
self.transcriber_config = transcriber_config
self.synthesizer_config = synthesizer_config
self.app = FastAPI()
self.app.post("/vocode")(self.handle_call)
self.response_on_rate_limit = (
response_on_rate_limit
or "The line is really busy right now, check back later!"
)
self.twilio_config = twilio_config
self.vocode_inbound_call_url = f"https://{vocode.base_url}/create_inbound_call"
async def handle_call(self, twilio_sid: str = Form(alias="CallSid")):
response = requests.post(
self.vocode_inbound_call_url,
headers={"Authorization": f"Bearer {vocode.api_key}"},
json=CreateInboundCall(
agent_config=self.agent_config,
twilio_sid=twilio_sid,
transcriber_config=self.transcriber_config,
synthesizer_config=self.synthesizer_config,
twilio_config=self.twilio_config,
).dict(),
)
if response.status_code == 429:
return Response(
f"<Response><Say>{self.response_on_rate_limit}</Say></Response>",
media_type="application/xml",
)
assert response.ok, response.text
return Response(
response.text,
media_type="application/xml",
)
def run(self, host="localhost", port=3000):
uvicorn.run(self.app, host=host, port=port)

View file

@ -0,0 +1,68 @@
from typing import Optional
import requests
import vocode
from vocode.streaming.models.agent import AgentConfig
from vocode.streaming.models.synthesizer import SynthesizerConfig
from vocode.streaming.models.transcriber import TranscriberConfig
from ..models.telephony import (
CallEntity,
CreateOutboundCall,
EndOutboundCall,
TwilioConfig,
)
class OutboundCall:
def __init__(
self,
recipient: CallEntity,
caller: CallEntity,
agent_config: AgentConfig,
transcriber_config: Optional[TranscriberConfig] = None,
synthesizer_config: Optional[SynthesizerConfig] = None,
conversation_id: Optional[str] = None,
twilio_config: Optional[TwilioConfig] = None,
):
self.recipient = recipient
self.caller = caller
self.agent_config = agent_config
self.transcriber_config = transcriber_config
self.synthesizer_config = synthesizer_config
self.conversation_id = conversation_id
self.twilio_config = twilio_config
self.vocode_create_outbound_call_url = (
f"https://{vocode.base_url}/create_outbound_call"
)
self.vocode_end_outbound_call_url = (
f"https://{vocode.base_url}/end_outbound_call"
)
def start(self) -> str:
response = requests.post(
self.vocode_create_outbound_call_url,
headers={"Authorization": f"Bearer {vocode.api_key}"},
json=CreateOutboundCall(
recipient=self.recipient,
caller=self.caller,
agent_config=self.agent_config,
transcriber_config=self.transcriber_config,
synthesizer_config=self.synthesizer_config,
conversation_id=self.conversation_id,
twilio_config=self.twilio_config,
).dict(),
)
assert response.ok, response.text
data = response.json()
self.conversation_id = data["id"]
def end(self) -> str:
response = requests.post(
self.vocode_end_outbound_call_url,
headers={"Authorization": f"Bearer {vocode.api_key}"},
json=EndOutboundCall(
call_id=self.conversation_id,
twilio_config=self.twilio_config,
).dict(),
)
assert response.ok or response.status_code == 404, response.text

View file

@ -0,0 +1,60 @@
from typing import Optional
import requests
import vocode
from vocode.streaming.models.agent import AgentConfig
from vocode.streaming.models.synthesizer import SynthesizerConfig
from vocode.streaming.models.transcriber import TranscriberConfig
from vocode.streaming.telephony.outbound_call import OutboundCall
from vocode.streaming.models.telephony import (
CallEntity,
DialIntoZoomCall,
TwilioConfig,
)
class ZoomDialIn(OutboundCall):
def __init__(
self,
recipient: CallEntity,
caller: CallEntity,
zoom_meeting_id: str,
zoom_meeting_password: str,
agent_config: AgentConfig,
transcriber_config: Optional[TranscriberConfig] = None,
synthesizer_config: Optional[SynthesizerConfig] = None,
conversation_id: Optional[str] = None,
twilio_config: Optional[TwilioConfig] = None,
):
super().__init__(
recipient=recipient,
caller=caller,
agent_config=agent_config,
transcriber_config=transcriber_config,
synthesizer_config=synthesizer_config,
conversation_id=conversation_id,
twilio_config=twilio_config,
)
self.zoom_meeting_id = zoom_meeting_id
self.zoom_meeting_password = zoom_meeting_password
self.vocode_zoom_dial_in_url = f"https://{vocode.base_url}/dial_into_zoom_call"
def start(self) -> str:
response = requests.post(
self.vocode_zoom_dial_in_url,
headers={"Authorization": f"Bearer {vocode.api_key}"},
json=DialIntoZoomCall(
recipient=self.recipient,
caller=self.caller,
zoom_meeting_id=self.zoom_meeting_id,
zoom_meeting_password=self.zoom_meeting_password,
agent_config=self.agent_config,
transcriber_config=self.transcriber_config,
synthesizer_config=self.synthesizer_config,
conversation_id=self.conversation_id,
twilio_config=self.twilio_config,
).dict(),
)
assert response.ok, response.text
data = response.json()
self.conversation_id = data["id"]

View file

@ -0,0 +1,10 @@
from fastapi import FastAPI
import uvicorn
class BaseAgent:
def __init__(self):
self.app = FastAPI()
def run(self, host="localhost", port=3000):
uvicorn.run(self.app, host=host, port=port)

View file

@ -0,0 +1,19 @@
from .base_agent import BaseAgent
from ..models.agent import RESTfulAgentInput, RESTfulAgentOutput, RESTfulAgentText, RESTfulAgentEnd
from pydantic import BaseModel
from typing import Union
from fastapi import APIRouter
class RESTfulAgent(BaseAgent):
def __init__(self):
super().__init__()
self.app.post("/respond")(self.respond_rest)
async def respond(self, human_input, conversation_id) -> RESTfulAgentOutput:
raise NotImplementedError
async def respond_rest(self, request: RESTfulAgentInput) -> Union[RESTfulAgentText, RESTfulAgentEnd]:
response = await self.respond(request.human_input, request.conversation_id)
return response

View file

@ -0,0 +1,56 @@
from .base_agent import BaseAgent
import uuid
import typing
from typing import AsyncGenerator, Union, Optional
from fastapi import WebSocket
from ..models.agent import (
WebSocketAgentStartMessage,
WebSocketAgentReadyMessage,
WebSocketAgentTextEndMessage,
WebSocketAgentTextMessage,
WebSocketAgentStopMessage,
WebSocketAgentMessage,
WebSocketAgentMessageType,
)
class WebSocketAgent(BaseAgent):
def __init__(self, generate_responses: bool = False):
super().__init__()
self.generate_responses = generate_responses
self.app.websocket("/respond")(self.respond_websocket)
async def respond(
self, human_input: str, conversation_id: Optional[str] = None
) -> Union[WebSocketAgentTextMessage, WebSocketAgentStopMessage]:
raise NotImplementedError
async def generate_response(
self, human_input: str, conversation_id: Optional[str] = None
) -> AsyncGenerator[
Union[WebSocketAgentTextMessage, WebSocketAgentTextEndMessage], None
]:
raise NotImplementedError
async def respond_websocket(self, websocket: WebSocket):
await websocket.accept()
WebSocketAgentStartMessage.parse_obj(await websocket.receive_json())
await websocket.send_text(WebSocketAgentReadyMessage().json())
while True:
input_message: WebSocketAgentMessage = WebSocketAgentMessage.parse_obj(
await websocket.receive_json()
)
if input_message.type == WebSocketAgentMessageType.STOP:
break
text_message = typing.cast(WebSocketAgentTextMessage, input_message)
if self.generate_responses:
async for output_response in self.generate_response(
text_message.data.text, text_message.conversation_id
):
await websocket.send_text(output_response.json())
else:
output_response = await self.respond(
text_message.data.text, text_message.conversation_id
)
await websocket.send_text(output_response.json())
await websocket.close()