monster commit

2023-03-03 18:24:56 -08:00 · 2023-03-03 18:24:56 -08:00 · de6d76c955
commit de6d76c955
parent 89d4dc2370
14 changed files with 155 additions and 66 deletions
--- a/vocode/init.py
+++ b/vocode/init.py
@ -3,4 +3,5 @@ from dotenv import load_dotenv

 load_dotenv()

-api_key = os.getenv("VOCODE_API_KEY")
+api_key = os.getenv("VOCODE_API_KEY")
+BASE_URL = "api.vocode.dev"
--- a/vocode/conversation.py
+++ b/vocode/conversation.py
@ -1,4 +1,6 @@
 import websockets
+from websockets.exceptions import ConnectionClosedOK
+from websockets.client import WebSocketClientProtocol
 import asyncio
 from dotenv import load_dotenv
 import os
@ -14,10 +16,9 @@ from .models.transcriber import TranscriberConfig
 from .models.agent import AgentConfig
 from .models.synthesizer import SynthesizerConfig
 from .models.websocket import ReadyMessage, AudioMessage, StartMessage, StopMessage
-from . import api_key
-
-VOCODE_WEBSOCKET_URL = f"wss://api.vocode.dev/conversation"
+from . import api_key, BASE_URL

+VOCODE_WEBSOCKET_URL = f"wss://{BASE_URL}/conversation"

 class Conversation:
    def __init__(
@ -62,7 +63,7 @@ class Conversation:
    async def start(self):
        async with websockets.connect(f"{VOCODE_WEBSOCKET_URL}?key={api_key}") as ws:

-            async def sender(ws):
+            async def sender(ws: WebSocketClientProtocol):
                start_message = StartMessage(
                    transcriber_config=self.transcriber_config,
                    agent_config=self.agent_config,
@ -74,11 +75,15 @@ class Conversation:
                while self.active:
                    data = self.input_device.get_audio()
                    if data:
-                        await ws.send(AudioMessage.from_bytes(data).json())
+                        try:
+                            await ws.send(AudioMessage.from_bytes(data).json())
+                        except ConnectionClosedOK:
+                            self.deactivate()
+                            return
                        await asyncio.sleep(0)
                await ws.send(StopMessage().json())

-            async def receiver(ws):
+            async def receiver(ws: WebSocketClientProtocol):
                ReadyMessage.parse_raw(await ws.recv())
                self.receiver_ready = True
                async for msg in ws:
--- a/vocode/models/agent.py
+++ b/vocode/models/agent.py
@ -4,14 +4,14 @@ from .model import TypedModel, BaseModel


 class AgentType(str, Enum):
-    BASE = "base"
-    LLM = "llm"
-    CHAT_GPT_ALPHA = "chat_gpt_alpha"
-    CHAT_GPT = "chat_gpt"
-    ECHO = "echo"
-    INFORMATION_RETRIEVAL = "information_retrieval"
-    RESTFUL_USER_IMPLEMENTED = "restful_user_implemented"
-    WEBSOCKET_USER_IMPLEMENTED = "websocket_user_implemented"
+    BASE = "agent_base"
+    LLM = "agent_llm"
+    CHAT_GPT_ALPHA = "agent_chat_gpt_alpha"
+    CHAT_GPT = "agent_chat_gpt"
+    ECHO = "agent_echo"
+    INFORMATION_RETRIEVAL = "agent_information_retrieval"
+    RESTFUL_USER_IMPLEMENTED = "agent_restful_user_implemented"
+    WEBSOCKET_USER_IMPLEMENTED = "agent_websocket_user_implemented"


 class AgentConfig(TypedModel, type=AgentType.BASE):
@ -57,9 +57,20 @@ class RESTfulUserImplementedAgentConfig(AgentConfig, type=AgentType.RESTFUL_USER
 class RESTfulAgentInput(BaseModel):
    human_input: str

-class RESTfulAgentOutput(BaseModel):
+class RESTfulAgentOutputType(str, Enum):
+    BASE = "restful_agent_base"
+    TEXT = "restful_agent_text"
+    END = "restful_agent_end"
+
+class RESTfulAgentOutput(TypedModel, type=RESTfulAgentOutputType.BASE):
+    pass
+
+class RESTfulAgentText(RESTfulAgentOutput, type=RESTfulAgentOutputType.TEXT):
    response: str

+class RESTfulAgentEnd(RESTfulAgentOutput, type=RESTfulAgentOutputType.END):
+    pass
+
 class WebSocketUserImplementedAgentConfig(AgentConfig, type=AgentType.WEBSOCKET_USER_IMPLEMENTED):
    class RouteConfig(BaseModel):
        url: str
@ -70,15 +81,15 @@ class WebSocketUserImplementedAgentConfig(AgentConfig, type=AgentType.WEBSOCKET_
    # send_message_on_cut_off: bool = False

 class WebSocketAgentMessageType(str, Enum):
-    AGENT_BASE = 'agent_base'
-    AGENT_START = 'agent_start'
-    AGENT_TEXT = 'agent_text'
-    AGENT_READY = 'agent_ready'
-    AGENT_STOP = 'agent_stop'
+    BASE = 'websocket_agent_base'
+    START = 'websocket_agent_start'
+    TEXT = 'websocket_agent_text'
+    READY = 'websocket_agent_ready'
+    STOP = 'websocket_agent_stop'

-class WebSocketAgentMessage(TypedModel, type=WebSocketAgentMessageType.AGENT_BASE): pass
+class WebSocketAgentMessage(TypedModel, type=WebSocketAgentMessageType.BASE): pass

-class AgentTextMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGENT_TEXT):
+class WebSocketAgentTextMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.TEXT):
    class Payload(BaseModel):
        text: str

@ -89,11 +100,11 @@ class AgentTextMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGE
        return cls(data=cls.Payload(text=text))


-class AgentStartMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGENT_START):
+class WebSocketAgentStartMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.START):
    pass

-class AgentReadyMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGENT_READY):
+class WebSocketAgentReadyMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.READY):
    pass

-class AgentStopMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.AGENT_STOP):
+class WebSocketAgentStopMessage(WebSocketAgentMessage, type=WebSocketAgentMessageType.STOP):
    pass
--- a/vocode/models/synthesizer.py
+++ b/vocode/models/synthesizer.py
@ -4,10 +4,10 @@ from .audio_encoding import AudioEncoding
 from ..output_device.base_output_device import BaseOutputDevice

 class SynthesizerType(str, Enum):
-    BASE = "base"
-    AZURE = "azure"
-    GOOGLE = "google"
-    ELEVEN_LABS = "eleven_labs"
+    BASE = "synthesizer_base"
+    AZURE = "synthesizer_azure"
+    GOOGLE = "synthesizer_google"
+    ELEVEN_LABS = "synthesizer_eleven_labs"

 class SynthesizerConfig(TypedModel, type=SynthesizerType.BASE):
    sampling_rate: int
--- a/vocode/models/telephony.py
+++ b/vocode/models/telephony.py
@ -1,14 +1,11 @@
-from pydantic import BaseModel
+from vocode.models.model import BaseModel
 from vocode.models.agent import AgentConfig, InformationRetrievalAgentConfig

-
 class CallEntity(BaseModel):
    phone_number: str
-    descriptor: str
-

 class CreateCallRequest(BaseModel):
    recipient: CallEntity
    caller: CallEntity
-    agent_config: InformationRetrievalAgentConfig  # TODO switch to AgentConfig
+    agent_config: AgentConfig
    # TODO add IVR/etc.
--- a/vocode/models/transcriber.py
+++ b/vocode/models/transcriber.py
@ -6,10 +6,10 @@ from ..input_device.base_input_device import BaseInputDevice


 class TranscriberType(str, Enum):
-    BASE = "base"
-    DEEPGRAM = "deepgram"
-    GOOGLE = "google"
-    ASSEMBLY_AI = "assembly_ai"
+    BASE = "transcriber_base"
+    DEEPGRAM = "transcriber_deepgram"
+    GOOGLE = "transcriber_google"
+    ASSEMBLY_AI = "transcriber_assembly_ai"


 class EndpointingConfig(BaseModel):
--- a/vocode/models/websocket.py
+++ b/vocode/models/websocket.py
@ -6,11 +6,11 @@ from .agent import AgentConfig
 from .synthesizer import SynthesizerConfig

 class WebSocketMessageType(str, Enum):
-    BASE = 'base'
-    START = 'start'
-    AUDIO = 'audio'
-    READY = 'ready'
-    STOP = 'stop'
+    BASE = 'websocket_base'
+    START = 'websocket_start'
+    AUDIO = 'websocket_audio'
+    READY = 'websocket_ready'
+    STOP = 'websocket_stop'

 class WebSocketMessage(TypedModel, type=WebSocketMessageType.BASE): pass

--- a/vocode/outbound_call.py
+++ b/vocode/outbound_call.py
@ -0,0 +1,27 @@
+from .models.telephony import CallEntity, CreateCallRequest
+import requests
+from . import api_key, BASE_URL
+
+VOCODE_OUTBOUND_CALL_URL = f"https://{BASE_URL}/create_outbound_call"
+
+class OutboundCall:
+
+    def __init__(self, recipient: CallEntity, caller: CallEntity, agent_config):
+        self.recipient = recipient
+        self.caller = caller
+        self.agent_config = agent_config
+
+    def start(self):
+        return requests.post(
+            VOCODE_OUTBOUND_CALL_URL,
+            headers={
+                "Authorization": f"Bearer {api_key}"
+            },
+            json=CreateCallRequest(
+                recipient=self.recipient,
+                caller=self.caller,
+                agent_config=self.agent_config
+            ).dict()
+        )
+
+    
--- a/vocode/user_implemented_agent/base_agent.py
+++ b/vocode/user_implemented_agent/base_agent.py
@ -5,9 +5,6 @@ class BaseAgent():

    def __init__(self):
        self.app = FastAPI()
-
-    async def respond(self, human_input) -> str:
-        raise NotImplementedError
    
    def run(self, host="localhost", port=3000):
        uvicorn.run(self.app, host=host, port=port)
--- a/vocode/user_implemented_agent/restful_agent.py
+++ b/vocode/user_implemented_agent/restful_agent.py
@ -1,6 +1,7 @@
 from .base_agent import BaseAgent
-from ..models.agent import RESTfulAgentInput, RESTfulAgentOutput
+from ..models.agent import RESTfulAgentInput, RESTfulAgentOutput, RESTfulAgentText, RESTfulAgentEnd
 from pydantic import BaseModel
+from typing import Union
 from fastapi import APIRouter

 class RESTfulAgent(BaseAgent):
@ -9,7 +10,10 @@ class RESTfulAgent(BaseAgent):
        super().__init__()
        self.app.post("/respond")(self.respond_rest)

-    async def respond_rest(self, request: RESTfulAgentInput) -> RESTfulAgentOutput:
-        response = await self.respond(request.human_input)
-        return RESTfulAgentOutput(response=response)
+    async def respond(self, human_input) -> RESTfulAgentOutput:
+        raise NotImplementedError
+
+    async def respond_rest(self, request: RESTfulAgentInput) -> Union[RESTfulAgentText, RESTfulAgentEnd]:
+        response = await self.respond(request.human_input)
+        return response

--- a/vocode/user_implemented_agent/websocket_agent.py
+++ b/vocode/user_implemented_agent/websocket_agent.py
@ -1,8 +1,16 @@
 from .base_agent import BaseAgent
 from pydantic import BaseModel
 import typing
+from typing import Union
 from fastapi import APIRouter, WebSocket
-from ..models.agent import AgentStartMessage, AgentReadyMessage, AgentTextMessage, WebSocketAgentMessage, WebSocketAgentMessageType
+from ..models.agent import (
+    WebSocketAgentStartMessage, 
+    WebSocketAgentReadyMessage, 
+    WebSocketAgentTextMessage, 
+    WebSocketAgentStopMessage, 
+    WebSocketAgentMessage, 
+    WebSocketAgentMessageType
+)

 class WebSocketAgent(BaseAgent):
        
@ -10,16 +18,19 @@ class WebSocketAgent(BaseAgent):
        super().__init__()
        self.app.websocket("/respond")(self.respond_websocket)

+    async def respond(self, human_input) -> Union[WebSocketAgentTextMessage, WebSocketAgentStopMessage]:
+        raise NotImplementedError
+
    async def respond_websocket(self, websocket: WebSocket):
        await websocket.accept()
-        AgentStartMessage.parse_obj(await websocket.receive_json())
-        await websocket.send_text(AgentReadyMessage().json())
+        WebSocketAgentStartMessage.parse_obj(await websocket.receive_json())
+        await websocket.send_text(WebSocketAgentReadyMessage().json())
        while True:
-            message = WebSocketAgentMessage.parse_obj(await websocket.receive_json())
-            if message.type == WebSocketAgentMessageType.AGENT_STOP:
+            input_message = WebSocketAgentMessage.parse_obj(await websocket.receive_json())
+            if input_message.type == WebSocketAgentMessageType.STOP:
                break
-            text_message = typing.cast(AgentTextMessage, message)
-            response = await self.respond(text_message.data.text)
-            await websocket.send_text(AgentTextMessage.from_text(response).json())
+            text_message = typing.cast(WebSocketAgentTextMessage, input_message)
+            output_response = await self.respond(text_message.data.text)
+            await websocket.send_text(output_response.json())
        await websocket.close()