From de6d76c955a66924a316ef8831c9d30cb2712bc1 Mon Sep 17 00:00:00 2001 From: Ajay Raj Date: Fri, 3 Mar 2023 18:24:56 -0800 Subject: [PATCH] monster commit --- simple_conversation.py | 6 ++- simple_outbound_call.py | 19 +++++++ user_implemented_agent.py | 25 ++++++++-- vocode/__init__.py | 3 +- vocode/conversation.py | 17 ++++--- vocode/models/agent.py | 49 ++++++++++++------- vocode/models/synthesizer.py | 8 +-- vocode/models/telephony.py | 7 +-- vocode/models/transcriber.py | 8 +-- vocode/models/websocket.py | 10 ++-- vocode/outbound_call.py | 27 ++++++++++ vocode/user_implemented_agent/base_agent.py | 3 -- .../user_implemented_agent/restful_agent.py | 12 +++-- .../user_implemented_agent/websocket_agent.py | 27 +++++++--- 14 files changed, 155 insertions(+), 66 deletions(-) create mode 100644 simple_outbound_call.py create mode 100644 vocode/outbound_call.py diff --git a/simple_conversation.py b/simple_conversation.py index 0f89cce..1a33a89 100644 --- a/simple_conversation.py +++ b/simple_conversation.py @@ -20,9 +20,11 @@ if __name__ == "__main__": input_device=microphone_input, output_device=speaker_output, transcriber_config=DeepgramTranscriberConfig.from_input_device(microphone_input), - agent_config=ChatGPTAgentConfig( + agent_config=WebSocketUserImplementedAgentConfig( initial_message="Hello!", - prompt_preamble="Vocode is an SDK that allows developers to create voice bots like this one in less than 10 lines of code. The AI is explaining to the human what Vocode is." + respond=WebSocketUserImplementedAgentConfig.RouteConfig( + url="ws://localhost:3001/respond" + ) ), synthesizer_config=AzureSynthesizerConfig.from_output_device(speaker_output) ) diff --git a/simple_outbound_call.py b/simple_outbound_call.py new file mode 100644 index 0000000..59038d5 --- /dev/null +++ b/simple_outbound_call.py @@ -0,0 +1,19 @@ +from vocode.outbound_call import OutboundCall +from vocode.models.telephony import CallEntity +from vocode.models.agent import EchoAgentConfig, WebSocketUserImplementedAgentConfig + +if __name__ == '__main__': + call = OutboundCall( + recipient=CallEntity( + phone_number="+14088926228", + ), + caller=CallEntity( + phone_number="+14086600744", + ), + agent_config=WebSocketUserImplementedAgentConfig( + respond=WebSocketUserImplementedAgentConfig.RouteConfig( + url="ws://localhost:3001/respond" + ) + ) + ) + call.start() \ No newline at end of file diff --git a/user_implemented_agent.py b/user_implemented_agent.py index cd744c2..710e0ea 100644 --- a/user_implemented_agent.py +++ b/user_implemented_agent.py @@ -1,12 +1,27 @@ from vocode.user_implemented_agent.restful_agent import RESTfulAgent +from vocode.models.agent import RESTfulAgentOutput, RESTfulAgentText, RESTfulAgentEnd, WebSocketAgentMessage, WebSocketAgentTextMessage, WebSocketAgentStopMessage from vocode.user_implemented_agent.websocket_agent import WebSocketAgent -class EchoAgent(WebSocketAgent): +class TestRESTfulAgent(RESTfulAgent): - async def respond(self, input: str) -> str: + async def respond(self, input: str) -> RESTfulAgentOutput: print(input) - return ''.join(i + j for i, j in zip(input, ' ' * len(input))) + if "bye" in input: + return RESTfulAgentEnd() + else: + spelt = ''.join(i + j for i, j in zip(input, ' ' * len(input))) + return RESTfulAgentText(response=spelt) +class TestWebSocketAgent(WebSocketAgent): + + async def respond(self, input: str) -> WebSocketAgentMessage: + print(input) + if "bye" in input: + return WebSocketAgentStopMessage() + else: + spelt = ''.join(i + j for i, j in zip(input, ' ' * len(input))) + return WebSocketAgentTextMessage.from_text(spelt) + if __name__ == "__main__": - agent = EchoAgent() - agent.run() + agent = TestWebSocketAgent() + agent.run(port=3001) diff --git a/vocode/__init__.py b/vocode/__init__.py index f726bd5..3b62cfb 100644 --- a/vocode/__init__.py +++ b/vocode/__init__.py @@ -3,4 +3,5 @@ from dotenv import load_dotenv load_dotenv() -api_key = os.getenv("VOCODE_API_KEY") \ No newline at end of file +api_key = os.getenv("VOCODE_API_KEY") +BASE_URL = "api.vocode.dev" \ No newline at end of file diff --git a/vocode/conversation.py b/vocode/conversation.py index 169dc4f..02bf99a 100644 --- a/vocode/conversation.py +++ b/vocode/conversation.py @@ -1,4 +1,6 @@ import websockets +from websockets.exceptions import ConnectionClosedOK +from websockets.client import WebSocketClientProtocol import asyncio from dotenv import load_dotenv import os @@ -14,10 +16,9 @@ from .models.transcriber import TranscriberConfig from .models.agent import AgentConfig from .models.synthesizer import SynthesizerConfig from .models.websocket import ReadyMessage, AudioMessage, StartMessage, StopMessage -from . import api_key - -VOCODE_WEBSOCKET_URL = f"wss://api.vocode.dev/conversation" +from . import api_key, BASE_URL +VOCODE_WEBSOCKET_URL = f"wss://{BASE_URL}/conversation" class Conversation: def __init__( @@ -62,7 +63,7 @@ class Conversation: async def start(self): async with websockets.connect(f"{VOCODE_WEBSOCKET_URL}?key={api_key}") as ws: - async def sender(ws): + async def sender(ws: WebSocketClientProtocol): start_message = StartMessage( transcriber_config=self.transcriber_config, agent_config=self.agent_config, @@ -74,11 +75,15 @@ class Conversation: while self.active: data = self.input_device.get_audio() if data: - await ws.send(AudioMessage.from_bytes(data).json()) + try: + await ws.send(AudioMessage.from_bytes(data).json()) + except ConnectionClosedOK: + self.deactivate() + return await asyncio.sleep(0) await ws.send(StopMessage().json()) - async def receiver(ws): + async def receiver(ws: WebSocketClientProtocol): ReadyMessage.parse_raw(await ws.recv()) self.receiver_ready = True async for msg in ws: diff --git a/vocode/models/agent.py b/vocode/models/agent.py index ded85bd..a8ec115 100644 --- a/vocode/models/agent.py +++ b/vocode/models/agent.py @@ -4,14 +4,14 @@ from .model import TypedModel, BaseModel class AgentType(str, Enum): - BASE = "base" - LLM = "llm" - CHAT_GPT_ALPHA = "chat_gpt_alpha" - CHAT_GPT = "chat_gpt" - ECHO = "echo" - INFORMATION_RETRIEVAL = "information_retrieval" - RESTFUL_USER_IMPLEMENTED = "restful_user_implemented" - WEBSOCKET_USER_IMPLEMENTED = "websocket_user_implemented" + BASE = "agent_base" + LLM = "agent_llm" + CHAT_GPT_ALPHA = "agent_chat_gpt_alpha" + CHAT_GPT = "agent_chat_gpt" + ECHO = "agent_echo" + INFORMATION_RETRIEVAL = "agent_information_retrieval" + RESTFUL_USER_IMPLEMENTED = "agent_restful_user_implemented" + WEBSOCKET_USER_IMPLEMENTED = "agent_websocket_user_implemented" class AgentConfig(TypedModel, type=AgentType.BASE): @@ -57,9 +57,20 @@ class RESTfulUserImplementedAgentConfig(AgentConfig, type=AgentType.RESTFUL_USER class RESTfulAgentInput(BaseModel): human_input: str -class RESTfulAgentOutput(BaseModel): +class RESTfulAgentOutputType(str, Enum): + BASE = "restful_agent_base" + TEXT = "restful_agent_text" + END = "restful_agent_end" + +class RESTfulAgentOutput(TypedModel, type=RESTfulAgentOutputType.BASE): + pass + +class RESTfulAgentText(RESTfulAgentOutput, type=RESTfulAgentOutputType.TEXT): response: str +class RESTfulAgentEnd(RESTfulAgentOutput, type=RESTfulAgentOutputType.END): + pass + class WebSocketUserImplementedAgentConfig(AgentConfig, type=AgentType.WEBSOCKET_USER_IMPLEMENTED): class RouteConfig(BaseModel): url: str @@ -70,15 +81,15 @@ class WebSocketUserImplementedAgentConfig(AgentConfig, type=AgentType.WEBSOCKET_ # send_message_on_cut_off: bool = False class WebSocketAgentMessageType(str, Enum): - AGENT_BASE = 'agent_base' - AGENT_START = 'agent_start' - AGENT_TEXT = 'agent_text' - AGENT_READY = 'agent_ready' - AGENT_STOP = 'agent_stop' + BASE = 'websocket_agent_base' + START = 'websocket_agent_start' + TEXT = 'websocket_agent_text' + READY = 'websocket_agent_ready' + STOP = 'websocket_agent_stop' -class WebSocketAgentMessage(TypedModel, type=WebSocketAgentMessageType.AGENT_BASE): pass +class WebSocketAgentMessage(TypedModel, type=WebSocketAgentMessageType.BASE): pass -class AgentTextMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGENT_TEXT): +class WebSocketAgentTextMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.TEXT): class Payload(BaseModel): text: str @@ -89,11 +100,11 @@ class AgentTextMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGE return cls(data=cls.Payload(text=text)) -class AgentStartMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGENT_START): +class WebSocketAgentStartMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.START): pass -class AgentReadyMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGENT_READY): +class WebSocketAgentReadyMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.READY): pass -class AgentStopMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGENT_STOP): +class WebSocketAgentStopMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.STOP): pass diff --git a/vocode/models/synthesizer.py b/vocode/models/synthesizer.py index 6c662bd..f796e60 100644 --- a/vocode/models/synthesizer.py +++ b/vocode/models/synthesizer.py @@ -4,10 +4,10 @@ from .audio_encoding import AudioEncoding from ..output_device.base_output_device import BaseOutputDevice class SynthesizerType(str, Enum): - BASE = "base" - AZURE = "azure" - GOOGLE = "google" - ELEVEN_LABS = "eleven_labs" + BASE = "synthesizer_base" + AZURE = "synthesizer_azure" + GOOGLE = "synthesizer_google" + ELEVEN_LABS = "synthesizer_eleven_labs" class SynthesizerConfig(TypedModel, type=SynthesizerType.BASE): sampling_rate: int diff --git a/vocode/models/telephony.py b/vocode/models/telephony.py index 645e165..cc5bfd9 100644 --- a/vocode/models/telephony.py +++ b/vocode/models/telephony.py @@ -1,14 +1,11 @@ -from pydantic import BaseModel +from vocode.models.model import BaseModel from vocode.models.agent import AgentConfig, InformationRetrievalAgentConfig - class CallEntity(BaseModel): phone_number: str - descriptor: str - class CreateCallRequest(BaseModel): recipient: CallEntity caller: CallEntity - agent_config: InformationRetrievalAgentConfig # TODO switch to AgentConfig + agent_config: AgentConfig # TODO add IVR/etc. diff --git a/vocode/models/transcriber.py b/vocode/models/transcriber.py index 9b684df..508aa13 100644 --- a/vocode/models/transcriber.py +++ b/vocode/models/transcriber.py @@ -6,10 +6,10 @@ from ..input_device.base_input_device import BaseInputDevice class TranscriberType(str, Enum): - BASE = "base" - DEEPGRAM = "deepgram" - GOOGLE = "google" - ASSEMBLY_AI = "assembly_ai" + BASE = "transcriber_base" + DEEPGRAM = "transcriber_deepgram" + GOOGLE = "transcriber_google" + ASSEMBLY_AI = "transcriber_assembly_ai" class EndpointingConfig(BaseModel): diff --git a/vocode/models/websocket.py b/vocode/models/websocket.py index c446426..b88d116 100644 --- a/vocode/models/websocket.py +++ b/vocode/models/websocket.py @@ -6,11 +6,11 @@ from .agent import AgentConfig from .synthesizer import SynthesizerConfig class WebSocketMessageType(str, Enum): - BASE = 'base' - START = 'start' - AUDIO = 'audio' - READY = 'ready' - STOP = 'stop' + BASE = 'websocket_base' + START = 'websocket_start' + AUDIO = 'websocket_audio' + READY = 'websocket_ready' + STOP = 'websocket_stop' class WebSocketMessage(TypedModel, type=WebSocketMessageType.BASE): pass diff --git a/vocode/outbound_call.py b/vocode/outbound_call.py new file mode 100644 index 0000000..21b5278 --- /dev/null +++ b/vocode/outbound_call.py @@ -0,0 +1,27 @@ +from .models.telephony import CallEntity, CreateCallRequest +import requests +from . import api_key, BASE_URL + +VOCODE_OUTBOUND_CALL_URL = f"https://{BASE_URL}/create_outbound_call" + +class OutboundCall: + + def __init__(self, recipient: CallEntity, caller: CallEntity, agent_config): + self.recipient = recipient + self.caller = caller + self.agent_config = agent_config + + def start(self): + return requests.post( + VOCODE_OUTBOUND_CALL_URL, + headers={ + "Authorization": f"Bearer {api_key}" + }, + json=CreateCallRequest( + recipient=self.recipient, + caller=self.caller, + agent_config=self.agent_config + ).dict() + ) + + \ No newline at end of file diff --git a/vocode/user_implemented_agent/base_agent.py b/vocode/user_implemented_agent/base_agent.py index 904687e..3a08a4f 100644 --- a/vocode/user_implemented_agent/base_agent.py +++ b/vocode/user_implemented_agent/base_agent.py @@ -5,9 +5,6 @@ class BaseAgent(): def __init__(self): self.app = FastAPI() - - async def respond(self, human_input) -> str: - raise NotImplementedError def run(self, host="localhost", port=3000): uvicorn.run(self.app, host=host, port=port) \ No newline at end of file diff --git a/vocode/user_implemented_agent/restful_agent.py b/vocode/user_implemented_agent/restful_agent.py index 385f28d..9d346a0 100644 --- a/vocode/user_implemented_agent/restful_agent.py +++ b/vocode/user_implemented_agent/restful_agent.py @@ -1,6 +1,7 @@ from .base_agent import BaseAgent -from ..models.agent import RESTfulAgentInput, RESTfulAgentOutput +from ..models.agent import RESTfulAgentInput, RESTfulAgentOutput, RESTfulAgentText, RESTfulAgentEnd from pydantic import BaseModel +from typing import Union from fastapi import APIRouter class RESTfulAgent(BaseAgent): @@ -9,7 +10,10 @@ class RESTfulAgent(BaseAgent): super().__init__() self.app.post("/respond")(self.respond_rest) - async def respond_rest(self, request: RESTfulAgentInput) -> RESTfulAgentOutput: - response = await self.respond(request.human_input) - return RESTfulAgentOutput(response=response) + async def respond(self, human_input) -> RESTfulAgentOutput: + raise NotImplementedError + + async def respond_rest(self, request: RESTfulAgentInput) -> Union[RESTfulAgentText, RESTfulAgentEnd]: + response = await self.respond(request.human_input) + return response diff --git a/vocode/user_implemented_agent/websocket_agent.py b/vocode/user_implemented_agent/websocket_agent.py index 05fe0cf..d191d86 100644 --- a/vocode/user_implemented_agent/websocket_agent.py +++ b/vocode/user_implemented_agent/websocket_agent.py @@ -1,8 +1,16 @@ from .base_agent import BaseAgent from pydantic import BaseModel import typing +from typing import Union from fastapi import APIRouter, WebSocket -from ..models.agent import AgentStartMessage, AgentReadyMessage, AgentTextMessage, WebSocketAgentMessage, WebSocketAgentMessageType +from ..models.agent import ( + WebSocketAgentStartMessage, + WebSocketAgentReadyMessage, + WebSocketAgentTextMessage, + WebSocketAgentStopMessage, + WebSocketAgentMessage, + WebSocketAgentMessageType +) class WebSocketAgent(BaseAgent): @@ -10,16 +18,19 @@ class WebSocketAgent(BaseAgent): super().__init__() self.app.websocket("/respond")(self.respond_websocket) + async def respond(self, human_input) -> Union[WebSocketAgentTextMessage, WebSocketAgentStopMessage]: + raise NotImplementedError + async def respond_websocket(self, websocket: WebSocket): await websocket.accept() - AgentStartMessage.parse_obj(await websocket.receive_json()) - await websocket.send_text(AgentReadyMessage().json()) + WebSocketAgentStartMessage.parse_obj(await websocket.receive_json()) + await websocket.send_text(WebSocketAgentReadyMessage().json()) while True: - message = WebSocketAgentMessage.parse_obj(await websocket.receive_json()) - if message.type == WebSocketAgentMessageType.AGENT_STOP: + input_message = WebSocketAgentMessage.parse_obj(await websocket.receive_json()) + if input_message.type == WebSocketAgentMessageType.STOP: break - text_message = typing.cast(AgentTextMessage, message) - response = await self.respond(text_message.data.text) - await websocket.send_text(AgentTextMessage.from_text(response).json()) + text_message = typing.cast(WebSocketAgentTextMessage, input_message) + output_response = await self.respond(text_message.data.text) + await websocket.send_text(output_response.json()) await websocket.close()