add voice configs + transcriber configs

2023-03-12 00:29:12 -08:00 · 2023-03-12 00:29:12 -08:00 · 1807fbef0d
commit 1807fbef0d
parent 68a51e7131
6 changed files with 62 additions and 12 deletions
--- a/vocode/input_device/telephone_input.py
+++ b/vocode/input_device/telephone_input.py
@ -0,0 +1,9 @@
+from vocode.input_device.base_input_device import BaseInputDevice
+from vocode.models.audio_encoding import AudioEncoding
+
+
+class TelephoneInput(BaseInputDevice):
+    def __init__(self):
+        super().__init__(
+            sampling_rate=8000, audio_encoding=AudioEncoding.MULAW, chunk_size=160
+        )
--- a/vocode/models/agent.py
+++ b/vocode/models/agent.py
@ -1,9 +1,11 @@
-from typing import Optional
+from typing import Optional, Union
 from enum import Enum

 from vocode.models.message import BaseMessage
 from .model import TypedModel, BaseModel

+FILLER_AUDIO_DEFAULT_SILENCE_THRESHOLD_SECONDS = 0.5
+

 class AgentType(str, Enum):
    BASE = "agent_base"
@ -16,11 +18,16 @@ class AgentType(str, Enum):
    WEBSOCKET_USER_IMPLEMENTED = "agent_websocket_user_implemented"


+class FillerAudioConfig(BaseModel):
+    silence_threshold_seconds: float = FILLER_AUDIO_DEFAULT_SILENCE_THRESHOLD_SECONDS
+
+
 class AgentConfig(TypedModel, type=AgentType.BASE):
    initial_message: Optional[BaseMessage] = None
    generate_responses: bool = True
    allowed_idle_time_seconds: Optional[float] = None
    end_conversation_on_goodbye: bool = False
+    send_filler_audio: Union[bool, FillerAudioConfig] = False


 class LLMAgentConfig(AgentConfig, type=AgentType.LLM):
--- a/vocode/models/telephony.py
+++ b/vocode/models/telephony.py
@ -2,6 +2,7 @@ from typing import Optional
 from vocode.models.model import BaseModel
 from vocode.models.agent import AgentConfig
 from vocode.models.synthesizer import SynthesizerConfig
+from vocode.models.transcriber import TranscriberConfig


 class CallEntity(BaseModel):
@ -9,7 +10,9 @@ class CallEntity(BaseModel):


 class CreateInboundCall(BaseModel):
+    transcriber_config: Optional[TranscriberConfig] = None
    agent_config: AgentConfig
+    synthesizer_config: Optional[SynthesizerConfig] = None
    twilio_sid: str
    conversation_id: Optional[str] = None

@ -17,6 +20,7 @@ class CreateInboundCall(BaseModel):
 class CreateOutboundCall(BaseModel):
    recipient: CallEntity
    caller: CallEntity
+    transcriber_config: Optional[TranscriberConfig] = None
    agent_config: AgentConfig
    synthesizer_config: Optional[SynthesizerConfig] = None
    conversation_id: Optional[str] = None
--- a/vocode/output_device/telephone_output.py
+++ b/vocode/output_device/telephone_output.py
@ -0,0 +1,7 @@
+from .base_output_device import BaseOutputDevice
+from ..models.audio_encoding import AudioEncoding
+
+
+class TelephoneOutput(BaseOutputDevice):
+    def __init__(self):
+        super().__init__(sampling_rate=8000, audio_encoding=AudioEncoding.MULAW)
--- a/vocode/telephony/inbound_call_server.py
+++ b/vocode/telephony/inbound_call_server.py
@ -2,6 +2,9 @@ from fastapi import FastAPI, Response, Form
 from typing import Optional
 import requests
 import uvicorn
+from vocode.models.synthesizer import SynthesizerConfig
+
+from vocode.models.transcriber import TranscriberConfig
 from .. import api_key, BASE_URL

 from ..models.agent import AgentConfig
@ -12,9 +15,15 @@ VOCODE_INBOUND_CALL_URL = f"https://{BASE_URL}/create_inbound_call"

 class InboundCallServer:
    def __init__(
-        self, agent_config: AgentConfig, response_on_rate_limit: Optional[str] = None
+        self,
+        agent_config: AgentConfig,
+        transcriber_config: Optional[TranscriberConfig] = None,
+        synthesizer_config: Optional[SynthesizerConfig] = None,
+        response_on_rate_limit: Optional[str] = None,
    ):
        self.agent_config = agent_config
+        self.transcriber_config = transcriber_config
+        self.synthesizer_config = synthesizer_config
        self.app = FastAPI()
        self.app.post("/vocode")(self.handle_call)
        self.response_on_rate_limit = (
@ -27,7 +36,10 @@ class InboundCallServer:
            VOCODE_INBOUND_CALL_URL,
            headers={"Authorization": f"Bearer {api_key}"},
            json=CreateInboundCall(
-                agent_config=self.agent_config, twilio_sid=twilio_sid
+                agent_config=self.agent_config,
+                twilio_sid=twilio_sid,
+                transcriber_config=self.transcriber_config,
+                synthesizer_config=self.synthesizer_config,
            ).dict(),
        )
        if response.status_code == 429:
--- a/vocode/telephony/outbound_call.py
+++ b/vocode/telephony/outbound_call.py
@ -1,27 +1,38 @@
+from typing import Optional
+from vocode.models.agent import AgentConfig
+from vocode.models.synthesizer import SynthesizerConfig
+from vocode.models.transcriber import TranscriberConfig
 from ..models.telephony import CallEntity, CreateOutboundCall
 import requests
 from .. import api_key, BASE_URL

 VOCODE_OUTBOUND_CALL_URL = f"https://{BASE_URL}/create_outbound_call"

-class OutboundCall:

-    def __init__(self, recipient: CallEntity, caller: CallEntity, agent_config):
+class OutboundCall:
+    def __init__(
+        self,
+        recipient: CallEntity,
+        caller: CallEntity,
+        agent_config: AgentConfig,
+        transcriber_config: Optional[TranscriberConfig] = None,
+        synthesizer_config: Optional[SynthesizerConfig] = None,
+    ):
        self.recipient = recipient
        self.caller = caller
        self.agent_config = agent_config
+        self.transcriber_config = transcriber_config
+        self.synthesizer_config = synthesizer_config

    def start(self):
        return requests.post(
            VOCODE_OUTBOUND_CALL_URL,
-            headers={
-                "Authorization": f"Bearer {api_key}"
-            },
+            headers={"Authorization": f"Bearer {api_key}"},
            json=CreateOutboundCall(
                recipient=self.recipient,
                caller=self.caller,
-                agent_config=self.agent_config
-            ).dict()
+                agent_config=self.agent_config,
+                transcriber_config=self.transcriber_config,
+                synthesizer_config=self.synthesizer_config,
+            ).dict(),
        )
-
-