python SDK

2023-02-24 10:47:17 -08:00 · 2023-02-24 10:47:17 -08:00 · 6dc9fceeb5
commit 6dc9fceeb5
18 changed files with 482 additions and 0 deletions
--- a/vocode/models/agent.py
+++ b/vocode/models/agent.py
@ -0,0 +1,37 @@
+from typing import Optional
+from enum import Enum
+from .model import TypedModel
+
+
+class AgentType(str, Enum):
+    BASE = "base"
+    LLM = "llm"
+    CHAT_GPT = "chat_gpt"
+    ECHO = "echo"
+    INFORMATION_RETRIEVAL = "information_retrieval"
+
+
+class AgentConfig(TypedModel, type=AgentType.BASE):
+    initial_message: Optional[str] = None
+
+
+class LLMAgentConfig(AgentConfig, type=AgentType.LLM):
+    prompt_preamble: str
+    expected_first_prompt: Optional[str] = None
+
+class ChatGPTAgentConfig(AgentConfig, type=AgentType.CHAT_GPT):
+    prompt_preamble: str
+    expected_first_prompt: Optional[str] = None
+
+class InformationRetrievalAgentConfig(
+    AgentConfig, type=AgentType.INFORMATION_RETRIEVAL
+):
+    recipient_descriptor: str
+    caller_descriptor: str
+    goal_description: str
+    fields: list[str]
+    # TODO: add fields for IVR, voicemail
+
+
+class EchoAgentConfig(AgentConfig, type=AgentType.ECHO):
+    pass
--- a/vocode/models/audio_encoding.py
+++ b/vocode/models/audio_encoding.py
@ -0,0 +1,5 @@
+from enum import Enum
+
+class AudioEncoding(str, Enum):
+    LINEAR16 = "linear16"
+    MULAW = "mulaw"
--- a/vocode/models/model.py
+++ b/vocode/models/model.py
@ -0,0 +1,51 @@
+import pydantic
+
+class BaseModel(pydantic.BaseModel):
+
+    def __init__(self, **data):
+        for key, value in data.items():
+            if isinstance(value, dict):
+                data[key] = self.parse_obj(value)
+        super().__init__(**data)
+
+# Adapted from https://github.com/pydantic/pydantic/discussions/3091
+class TypedModel(BaseModel):
+
+    _subtypes_ = []
+
+    def __init_subclass__(cls, type=None):
+        cls._subtypes_.append([type, cls])
+
+    @classmethod
+    def get_cls(_cls, type):
+        for t, cls in _cls._subtypes_:
+            if t == type:
+                return cls
+        raise ValueError(f'Unknown type {type}')
+    
+    @classmethod
+    def get_type(_cls, cls_name):
+        for t, cls in _cls._subtypes_:
+            if cls.__name__ == cls_name:
+                return t
+        raise ValueError(f'Unknown class {cls_name}')
+    
+    @classmethod
+    def parse_obj(cls, obj):
+        data_type = obj.get('type')
+        if data_type is None:
+            raise ValueError(f'type is required for {cls.__name__}')
+    
+        sub = cls.get_cls(data_type)
+        if sub is None:
+            raise ValueError(f'Unknown type {data_type}')
+        return sub(**obj)
+
+    def _iter(self, **kwargs):
+        yield 'type', self.get_type(self.__class__.__name__)
+        yield from super()._iter(**kwargs)
+
+    @property
+    def type(self):
+        return self.get_type(self.__class__.__name__)
+
--- a/vocode/models/synthesizer.py
+++ b/vocode/models/synthesizer.py
@ -0,0 +1,27 @@
+from enum import Enum
+from .model import TypedModel
+from .audio_encoding import AudioEncoding
+from ..output_device.base_output_device import BaseOutputDevice
+
+class SynthesizerType(str, Enum):
+    BASE = "base"
+    AZURE = "azure"
+    GOOGLE = "google"
+    ELEVEN_LABS = "eleven_labs"
+
+class SynthesizerConfig(TypedModel, type=SynthesizerType.BASE):
+    sampling_rate: int
+    audio_encoding: AudioEncoding
+
+    @classmethod
+    def from_output_device(cls, output_device: BaseOutputDevice):
+        return cls(sampling_rate=output_device.sampling_rate, audio_encoding=output_device.audio_encoding)
+
+class AzureSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.AZURE):
+    pass
+
+class GoogleSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.GOOGLE):
+    pass
+
+class ElevenLabsSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.ELEVEN_LABS):
+    pass
--- a/vocode/models/telephony.py
+++ b/vocode/models/telephony.py
@ -0,0 +1,14 @@
+from pydantic import BaseModel
+from vocode.models.agent import AgentConfig, InformationRetrievalAgentConfig
+
+
+class CallEntity(BaseModel):
+    phone_number: str
+    descriptor: str
+
+
+class CreateCallRequest(BaseModel):
+    recipient: CallEntity
+    caller: CallEntity
+    agent_config: InformationRetrievalAgentConfig  # TODO switch to AgentConfig
+    # TODO add IVR/etc.
--- a/vocode/models/transcriber.py
+++ b/vocode/models/transcriber.py
@ -0,0 +1,31 @@
+from enum import Enum
+from typing import Optional
+from .audio_encoding import AudioEncoding
+from .model import TypedModel
+from ..input_device.base_input_device import BaseInputDevice
+
+class TranscriberType(str, Enum):
+    BASE = "base"
+    DEEPGRAM = "deepgram"
+    GOOGLE = "google"
+
+class TranscriberConfig(TypedModel, type=TranscriberType.BASE):
+    sampling_rate: int
+    audio_encoding: AudioEncoding
+    chunk_size: int
+
+    @classmethod
+    def from_input_device(cls, input_device: BaseInputDevice):
+        return cls(
+            sampling_rate=input_device.sampling_rate,
+            audio_encoding=input_device.audio_encoding,
+            chunk_size=input_device.chunk_size)
+
+class DeepgramTranscriberConfig(TranscriberConfig, type=TranscriberType.DEEPGRAM):
+    model: Optional[str] = None
+    should_warmup_model: bool = False
+    version: Optional[str] = None
+
+class GoogleTranscriberConfig(TranscriberConfig, type=TranscriberType.GOOGLE):
+    model: Optional[str] = None
+    should_warmup_model: bool = False
--- a/vocode/models/websocket.py
+++ b/vocode/models/websocket.py
@ -0,0 +1,36 @@
+import base64
+from enum import Enum
+from .model import TypedModel
+from .transcriber import TranscriberConfig
+from .agent import AgentConfig
+from .synthesizer import SynthesizerConfig
+
+class WebSocketMessageType(str, Enum):
+    BASE = 'base'
+    START = 'start'
+    AUDIO = 'audio'
+    READY = 'ready'
+    STOP = 'stop'
+
+class WebSocketMessage(TypedModel, type=WebSocketMessageType.BASE): pass
+
+class AudioMessage(WebSocketMessage, type=WebSocketMessageType.AUDIO):
+    data: str
+
+    @classmethod
+    def from_bytes(cls, chunk: bytes):
+        return cls(data=base64.b64encode(chunk).decode('utf-8'))
+
+    def get_bytes(self) -> bytes:
+        return base64.b64decode(self.data)
+
+class StartMessage(WebSocketMessage, type=WebSocketMessageType.START):
+    transcriber_config: TranscriberConfig
+    agent_config: AgentConfig
+    synthesizer_config: SynthesizerConfig
+
+class ReadyMessage(WebSocketMessage, type=WebSocketMessageType.READY):
+    pass
+
+class StopMessage(WebSocketMessage, type=WebSocketMessageType.STOP):
+    pass