diff --git a/vocode/conversation.py b/vocode/conversation.py index fa22d77..9fc2fa0 100644 --- a/vocode/conversation.py +++ b/vocode/conversation.py @@ -16,17 +16,17 @@ from .models.synthesizer import SynthesizerConfig from .models.websocket import ReadyMessage, AudioMessage, StartMessage, StopMessage from . import api_key -VOCODE_WEBSOCKET_URL = f'wss://api.vocode.dev/conversation' +VOCODE_WEBSOCKET_URL = f"wss://3fcd-136-24-82-111.ngrok.io/conversation" + class Conversation: - def __init__( self, - input_device: BaseInputDevice, - output_device: BaseOutputDevice, - transcriber_config: TranscriberConfig, + input_device: BaseInputDevice, + output_device: BaseOutputDevice, + transcriber_config: TranscriberConfig, agent_config: AgentConfig, - synthesizer_config: SynthesizerConfig + synthesizer_config: SynthesizerConfig, ): self.input_device = input_device self.output_device = output_device @@ -43,7 +43,7 @@ class Conversation: while not self.receiver_ready: await asyncio.sleep(0.1) return True - + def deactivate(self): self.active = False @@ -55,16 +55,18 @@ class Conversation: await self.output_device.send_async(audio) except queue.Empty: continue + loop = asyncio.new_event_loop() loop.run_until_complete(run()) - + async def start(self): async with websockets.connect(f"{VOCODE_WEBSOCKET_URL}?key={api_key}") as ws: + async def sender(ws): start_message = StartMessage( - transcriber_config=self.transcriber_config, - agent_config=self.agent_config, - synthesizer_config=self.synthesizer_config + transcriber_config=self.transcriber_config, + agent_config=self.agent_config, + synthesizer_config=self.synthesizer_config, ) await ws.send(start_message.json()) await self.wait_for_ready() @@ -83,8 +85,6 @@ class Conversation: audio_message = AudioMessage.parse_raw(msg) self.output_audio_queue.put_nowait(audio_message.get_bytes()) - output_thread = threading.Thread(target=self.play_audio) output_thread.start() return await asyncio.gather(sender(ws), receiver(ws)) - diff --git a/vocode/models/transcriber.py b/vocode/models/transcriber.py index 190a1f3..9b684df 100644 --- a/vocode/models/transcriber.py +++ b/vocode/models/transcriber.py @@ -1,35 +1,51 @@ from enum import Enum from typing import Optional from .audio_encoding import AudioEncoding -from .model import TypedModel +from .model import BaseModel, TypedModel from ..input_device.base_input_device import BaseInputDevice + class TranscriberType(str, Enum): BASE = "base" DEEPGRAM = "deepgram" GOOGLE = "google" ASSEMBLY_AI = "assembly_ai" + +class EndpointingConfig(BaseModel): + time_cutoff_seconds: Optional[float] = None + + class TranscriberConfig(TypedModel, type=TranscriberType.BASE): sampling_rate: int audio_encoding: AudioEncoding chunk_size: int + endpointing_config: Optional[EndpointingConfig] = None @classmethod - def from_input_device(cls, input_device: BaseInputDevice): + def from_input_device( + cls, + input_device: BaseInputDevice, + endpointing_config: Optional[EndpointingConfig] = None, + ): return cls( sampling_rate=input_device.sampling_rate, audio_encoding=input_device.audio_encoding, - chunk_size=input_device.chunk_size) + chunk_size=input_device.chunk_size, + endpointing_config=endpointing_config, + ) + class DeepgramTranscriberConfig(TranscriberConfig, type=TranscriberType.DEEPGRAM): model: Optional[str] = None should_warmup_model: bool = False version: Optional[str] = None + class GoogleTranscriberConfig(TranscriberConfig, type=TranscriberType.GOOGLE): model: Optional[str] = None should_warmup_model: bool = False + class AssemblyAITranscriberConfig(TranscriberConfig, type=TranscriberType.ASSEMBLY_AI): - should_warmup_model: bool = False \ No newline at end of file + should_warmup_model: bool = False