monster commit

2023-03-03 18:24:56 -08:00 · 2023-03-03 18:24:56 -08:00 · de6d76c955
commit de6d76c955
parent 89d4dc2370
14 changed files with 155 additions and 66 deletions
--- a/simple_conversation.py
+++ b/simple_conversation.py
@ -20,9 +20,11 @@ if __name__ == "__main__":
        input_device=microphone_input,
        output_device=speaker_output,
        transcriber_config=DeepgramTranscriberConfig.from_input_device(microphone_input),
-        agent_config=ChatGPTAgentConfig(
+        agent_config=WebSocketUserImplementedAgentConfig(
            initial_message="Hello!",
-            prompt_preamble="Vocode is an SDK that allows developers to create voice bots like this one in less than 10 lines of code. The AI is explaining to the human what Vocode is."
+            respond=WebSocketUserImplementedAgentConfig.RouteConfig(
+                url="ws://localhost:3001/respond"
+            )
        ),
        synthesizer_config=AzureSynthesizerConfig.from_output_device(speaker_output)
    )
--- a/simple_outbound_call.py
+++ b/simple_outbound_call.py
@ -0,0 +1,19 @@
+from vocode.outbound_call import OutboundCall
+from vocode.models.telephony import CallEntity
+from vocode.models.agent import EchoAgentConfig, WebSocketUserImplementedAgentConfig
+
+if __name__ == '__main__':
+    call = OutboundCall(
+        recipient=CallEntity(
+            phone_number="+14088926228",
+        ),
+        caller=CallEntity(
+            phone_number="+14086600744",
+        ),
+        agent_config=WebSocketUserImplementedAgentConfig(
+            respond=WebSocketUserImplementedAgentConfig.RouteConfig(
+                url="ws://localhost:3001/respond"
+            )
+        )
+    )
+    call.start()
--- a/user_implemented_agent.py
+++ b/user_implemented_agent.py
@ -1,12 +1,27 @@
 from vocode.user_implemented_agent.restful_agent import RESTfulAgent
+from vocode.models.agent import RESTfulAgentOutput, RESTfulAgentText, RESTfulAgentEnd, WebSocketAgentMessage, WebSocketAgentTextMessage, WebSocketAgentStopMessage
 from vocode.user_implemented_agent.websocket_agent import WebSocketAgent 

-class EchoAgent(WebSocketAgent):
+class TestRESTfulAgent(RESTfulAgent):

-    async def respond(self, input: str) -> str:
+    async def respond(self, input: str) -> RESTfulAgentOutput:
        print(input)
-        return ''.join(i + j for i, j in zip(input, ' ' * len(input)))
+        if "bye" in input:
+            return RESTfulAgentEnd()
+        else:
+            spelt = ''.join(i + j for i, j in zip(input, ' ' * len(input)))
+            return RESTfulAgentText(response=spelt)
    
+class TestWebSocketAgent(WebSocketAgent):
+
+    async def respond(self, input: str) -> WebSocketAgentMessage:
+        print(input)
+        if "bye" in input:
+            return WebSocketAgentStopMessage()
+        else:
+            spelt = ''.join(i + j for i, j in zip(input, ' ' * len(input)))
+            return WebSocketAgentTextMessage.from_text(spelt)
+        
 if __name__ == "__main__":
-    agent = EchoAgent()
-    agent.run()
+    agent = TestWebSocketAgent()
+    agent.run(port=3001)
--- a/vocode/init.py
+++ b/vocode/init.py
@ -3,4 +3,5 @@ from dotenv import load_dotenv

 load_dotenv()

-api_key = os.getenv("VOCODE_API_KEY")
+api_key = os.getenv("VOCODE_API_KEY")
+BASE_URL = "api.vocode.dev"
--- a/vocode/conversation.py
+++ b/vocode/conversation.py
@ -1,4 +1,6 @@
 import websockets
+from websockets.exceptions import ConnectionClosedOK
+from websockets.client import WebSocketClientProtocol
 import asyncio
 from dotenv import load_dotenv
 import os
@ -14,10 +16,9 @@ from .models.transcriber import TranscriberConfig
 from .models.agent import AgentConfig
 from .models.synthesizer import SynthesizerConfig
 from .models.websocket import ReadyMessage, AudioMessage, StartMessage, StopMessage
-from . import api_key
-
-VOCODE_WEBSOCKET_URL = f"wss://api.vocode.dev/conversation"
+from . import api_key, BASE_URL

+VOCODE_WEBSOCKET_URL = f"wss://{BASE_URL}/conversation"

 class Conversation:
    def __init__(
@ -62,7 +63,7 @@ class Conversation:
    async def start(self):
        async with websockets.connect(f"{VOCODE_WEBSOCKET_URL}?key={api_key}") as ws:

-            async def sender(ws):
+            async def sender(ws: WebSocketClientProtocol):
                start_message = StartMessage(
                    transcriber_config=self.transcriber_config,
                    agent_config=self.agent_config,
@ -74,11 +75,15 @@ class Conversation:
                while self.active:
                    data = self.input_device.get_audio()
                    if data:
-                        await ws.send(AudioMessage.from_bytes(data).json())
+                        try:
+                            await ws.send(AudioMessage.from_bytes(data).json())
+                        except ConnectionClosedOK:
+                            self.deactivate()
+                            return
                        await asyncio.sleep(0)
                await ws.send(StopMessage().json())

-            async def receiver(ws):
+            async def receiver(ws: WebSocketClientProtocol):
                ReadyMessage.parse_raw(await ws.recv())
                self.receiver_ready = True
                async for msg in ws:
--- a/vocode/models/agent.py
+++ b/vocode/models/agent.py
@ -4,14 +4,14 @@ from .model import TypedModel, BaseModel


 class AgentType(str, Enum):
-    BASE = "base"
-    LLM = "llm"
-    CHAT_GPT_ALPHA = "chat_gpt_alpha"
-    CHAT_GPT = "chat_gpt"
-    ECHO = "echo"
-    INFORMATION_RETRIEVAL = "information_retrieval"
-    RESTFUL_USER_IMPLEMENTED = "restful_user_implemented"
-    WEBSOCKET_USER_IMPLEMENTED = "websocket_user_implemented"
+    BASE = "agent_base"
+    LLM = "agent_llm"
+    CHAT_GPT_ALPHA = "agent_chat_gpt_alpha"
+    CHAT_GPT = "agent_chat_gpt"
+    ECHO = "agent_echo"
+    INFORMATION_RETRIEVAL = "agent_information_retrieval"
+    RESTFUL_USER_IMPLEMENTED = "agent_restful_user_implemented"
+    WEBSOCKET_USER_IMPLEMENTED = "agent_websocket_user_implemented"


 class AgentConfig(TypedModel, type=AgentType.BASE):
@ -57,9 +57,20 @@ class RESTfulUserImplementedAgentConfig(AgentConfig, type=AgentType.RESTFUL_USER
 class RESTfulAgentInput(BaseModel):
    human_input: str

-class RESTfulAgentOutput(BaseModel):
+class RESTfulAgentOutputType(str, Enum):
+    BASE = "restful_agent_base"
+    TEXT = "restful_agent_text"
+    END = "restful_agent_end"
+
+class RESTfulAgentOutput(TypedModel, type=RESTfulAgentOutputType.BASE):
+    pass
+
+class RESTfulAgentText(RESTfulAgentOutput, type=RESTfulAgentOutputType.TEXT):
    response: str

+class RESTfulAgentEnd(RESTfulAgentOutput, type=RESTfulAgentOutputType.END):
+    pass
+
 class WebSocketUserImplementedAgentConfig(AgentConfig, type=AgentType.WEBSOCKET_USER_IMPLEMENTED):
    class RouteConfig(BaseModel):
        url: str
@ -70,15 +81,15 @@ class WebSocketUserImplementedAgentConfig(AgentConfig, type=AgentType.WEBSOCKET_
    # send_message_on_cut_off: bool = False

 class WebSocketAgentMessageType(str, Enum):
-    AGENT_BASE = 'agent_base'
-    AGENT_START = 'agent_start'
-    AGENT_TEXT = 'agent_text'
-    AGENT_READY = 'agent_ready'
-    AGENT_STOP = 'agent_stop'
+    BASE = 'websocket_agent_base'
+    START = 'websocket_agent_start'
+    TEXT = 'websocket_agent_text'
+    READY = 'websocket_agent_ready'
+    STOP = 'websocket_agent_stop'

-class WebSocketAgentMessage(TypedModel, type=WebSocketAgentMessageType.AGENT_BASE): pass
+class WebSocketAgentMessage(TypedModel, type=WebSocketAgentMessageType.BASE): pass

-class AgentTextMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGENT_TEXT):
+class WebSocketAgentTextMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.TEXT):
    class Payload(BaseModel):
        text: str

@ -89,11 +100,11 @@ class AgentTextMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGE
        return cls(data=cls.Payload(text=text))


-class AgentStartMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGENT_START):
+class WebSocketAgentStartMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.START):
    pass

-class AgentReadyMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGENT_READY):
+class WebSocketAgentReadyMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.READY):
    pass

-class AgentStopMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGENT_STOP):
+class WebSocketAgentStopMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.STOP):
    pass
--- a/vocode/models/synthesizer.py
+++ b/vocode/models/synthesizer.py
@ -4,10 +4,10 @@ from .audio_encoding import AudioEncoding
 from ..output_device.base_output_device import BaseOutputDevice

 class SynthesizerType(str, Enum):
-    BASE = "base"
-    AZURE = "azure"
-    GOOGLE = "google"
-    ELEVEN_LABS = "eleven_labs"
+    BASE = "synthesizer_base"
+    AZURE = "synthesizer_azure"
+    GOOGLE = "synthesizer_google"
+    ELEVEN_LABS = "synthesizer_eleven_labs"

 class SynthesizerConfig(TypedModel, type=SynthesizerType.BASE):
    sampling_rate: int
--- a/vocode/models/telephony.py
+++ b/vocode/models/telephony.py
@ -1,14 +1,11 @@
-from pydantic import BaseModel
+from vocode.models.model import BaseModel
 from vocode.models.agent import AgentConfig, InformationRetrievalAgentConfig

-
 class CallEntity(BaseModel):
    phone_number: str
-    descriptor: str
-

 class CreateCallRequest(BaseModel):
    recipient: CallEntity
    caller: CallEntity
-    agent_config: InformationRetrievalAgentConfig  # TODO switch to AgentConfig
+    agent_config: AgentConfig
    # TODO add IVR/etc.
--- a/vocode/models/transcriber.py
+++ b/vocode/models/transcriber.py
@ -6,10 +6,10 @@ from ..input_device.base_input_device import BaseInputDevice


 class TranscriberType(str, Enum):
-    BASE = "base"
-    DEEPGRAM = "deepgram"
-    GOOGLE = "google"
-    ASSEMBLY_AI = "assembly_ai"
+    BASE = "transcriber_base"
+    DEEPGRAM = "transcriber_deepgram"
+    GOOGLE = "transcriber_google"
+    ASSEMBLY_AI = "transcriber_assembly_ai"


 class EndpointingConfig(BaseModel):
--- a/vocode/models/websocket.py
+++ b/vocode/models/websocket.py
@ -6,11 +6,11 @@ from .agent import AgentConfig
 from .synthesizer import SynthesizerConfig

 class WebSocketMessageType(str, Enum):
-    BASE = 'base'
-    START = 'start'
-    AUDIO = 'audio'
-    READY = 'ready'
-    STOP = 'stop'
+    BASE = 'websocket_base'
+    START = 'websocket_start'
+    AUDIO = 'websocket_audio'
+    READY = 'websocket_ready'
+    STOP = 'websocket_stop'

 class WebSocketMessage(TypedModel, type=WebSocketMessageType.BASE): pass

--- a/vocode/outbound_call.py
+++ b/vocode/outbound_call.py
@ -0,0 +1,27 @@
+from .models.telephony import CallEntity, CreateCallRequest
+import requests
+from . import api_key, BASE_URL
+
+VOCODE_OUTBOUND_CALL_URL = f"https://{BASE_URL}/create_outbound_call"
+
+class OutboundCall:
+
+    def __init__(self, recipient: CallEntity, caller: CallEntity, agent_config):
+        self.recipient = recipient
+        self.caller = caller
+        self.agent_config = agent_config
+
+    def start(self):
+        return requests.post(
+            VOCODE_OUTBOUND_CALL_URL,
+            headers={
+                "Authorization": f"Bearer {api_key}"
+            },
+            json=CreateCallRequest(
+                recipient=self.recipient,
+                caller=self.caller,
+                agent_config=self.agent_config
+            ).dict()
+        )
+
+    
--- a/vocode/user_implemented_agent/base_agent.py
+++ b/vocode/user_implemented_agent/base_agent.py
@ -5,9 +5,6 @@ class BaseAgent():

    def __init__(self):
        self.app = FastAPI()
-
-    async def respond(self, human_input) -> str:
-        raise NotImplementedError
    
    def run(self, host="localhost", port=3000):
        uvicorn.run(self.app, host=host, port=port)
--- a/vocode/user_implemented_agent/restful_agent.py
+++ b/vocode/user_implemented_agent/restful_agent.py
@ -1,6 +1,7 @@
 from .base_agent import BaseAgent
-from ..models.agent import RESTfulAgentInput, RESTfulAgentOutput
+from ..models.agent import RESTfulAgentInput, RESTfulAgentOutput, RESTfulAgentText, RESTfulAgentEnd
 from pydantic import BaseModel
+from typing import Union
 from fastapi import APIRouter

 class RESTfulAgent(BaseAgent):
@ -9,7 +10,10 @@ class RESTfulAgent(BaseAgent):
        super().__init__()
        self.app.post("/respond")(self.respond_rest)

-    async def respond_rest(self, request: RESTfulAgentInput) -> RESTfulAgentOutput:
-        response = await self.respond(request.human_input)
-        return RESTfulAgentOutput(response=response)
+    async def respond(self, human_input) -> RESTfulAgentOutput:
+        raise NotImplementedError
+
+    async def respond_rest(self, request: RESTfulAgentInput) -> Union[RESTfulAgentText, RESTfulAgentEnd]:
+        response = await self.respond(request.human_input)
+        return response

--- a/vocode/user_implemented_agent/websocket_agent.py
+++ b/vocode/user_implemented_agent/websocket_agent.py
@ -1,8 +1,16 @@
 from .base_agent import BaseAgent
 from pydantic import BaseModel
 import typing
+from typing import Union
 from fastapi import APIRouter, WebSocket
-from ..models.agent import AgentStartMessage, AgentReadyMessage, AgentTextMessage, WebSocketAgentMessage, WebSocketAgentMessageType
+from ..models.agent import (
+    WebSocketAgentStartMessage, 
+    WebSocketAgentReadyMessage, 
+    WebSocketAgentTextMessage, 
+    WebSocketAgentStopMessage, 
+    WebSocketAgentMessage, 
+    WebSocketAgentMessageType
+)

 class WebSocketAgent(BaseAgent):
        
@ -10,16 +18,19 @@ class WebSocketAgent(BaseAgent):
        super().__init__()
        self.app.websocket("/respond")(self.respond_websocket)

+    async def respond(self, human_input) -> Union[WebSocketAgentTextMessage, WebSocketAgentStopMessage]:
+        raise NotImplementedError
+
    async def respond_websocket(self, websocket: WebSocket):
        await websocket.accept()
-        AgentStartMessage.parse_obj(await websocket.receive_json())
-        await websocket.send_text(AgentReadyMessage().json())
+        WebSocketAgentStartMessage.parse_obj(await websocket.receive_json())
+        await websocket.send_text(WebSocketAgentReadyMessage().json())
        while True:
-            message = WebSocketAgentMessage.parse_obj(await websocket.receive_json())
-            if message.type == WebSocketAgentMessageType.AGENT_STOP:
+            input_message = WebSocketAgentMessage.parse_obj(await websocket.receive_json())
+            if input_message.type == WebSocketAgentMessageType.STOP:
                break
-            text_message = typing.cast(AgentTextMessage, message)
-            response = await self.respond(text_message.data.text)
-            await websocket.send_text(AgentTextMessage.from_text(response).json())
+            text_message = typing.cast(WebSocketAgentTextMessage, input_message)
+            output_response = await self.respond(text_message.data.text)
+            await websocket.send_text(output_response.json())
        await websocket.close()