From 61607ad0a495e1136ce4c9306f7db89a5be514d7 Mon Sep 17 00:00:00 2001 From: Kian Date: Thu, 2 Mar 2023 16:18:11 -0800 Subject: [PATCH 1/4] lint files --- vocode/models/transcriber.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/vocode/models/transcriber.py b/vocode/models/transcriber.py index 190a1f3..87a0fe7 100644 --- a/vocode/models/transcriber.py +++ b/vocode/models/transcriber.py @@ -4,12 +4,14 @@ from .audio_encoding import AudioEncoding from .model import TypedModel from ..input_device.base_input_device import BaseInputDevice + class TranscriberType(str, Enum): BASE = "base" DEEPGRAM = "deepgram" GOOGLE = "google" ASSEMBLY_AI = "assembly_ai" + class TranscriberConfig(TypedModel, type=TranscriberType.BASE): sampling_rate: int audio_encoding: AudioEncoding @@ -20,16 +22,20 @@ class TranscriberConfig(TypedModel, type=TranscriberType.BASE): return cls( sampling_rate=input_device.sampling_rate, audio_encoding=input_device.audio_encoding, - chunk_size=input_device.chunk_size) + chunk_size=input_device.chunk_size, + ) + class DeepgramTranscriberConfig(TranscriberConfig, type=TranscriberType.DEEPGRAM): model: Optional[str] = None should_warmup_model: bool = False version: Optional[str] = None + class GoogleTranscriberConfig(TranscriberConfig, type=TranscriberType.GOOGLE): model: Optional[str] = None should_warmup_model: bool = False + class AssemblyAITranscriberConfig(TranscriberConfig, type=TranscriberType.ASSEMBLY_AI): - should_warmup_model: bool = False \ No newline at end of file + should_warmup_model: bool = False From 274c1e3acd1b7d0f3ce73d02a0a3c27ee89114fb Mon Sep 17 00:00:00 2001 From: Kian Date: Fri, 3 Mar 2023 10:49:07 -0800 Subject: [PATCH 2/4] update models --- vocode/models/transcriber.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/vocode/models/transcriber.py b/vocode/models/transcriber.py index 87a0fe7..3523b16 100644 --- a/vocode/models/transcriber.py +++ b/vocode/models/transcriber.py @@ -12,17 +12,31 @@ class TranscriberType(str, Enum): ASSEMBLY_AI = "assembly_ai" +class EndpointingType(str, Enum): + BASE = "base" + + +class EndpointingConfig(TypedModel, type=EndpointingType.BASE): + time_cutoff: int + + class TranscriberConfig(TypedModel, type=TranscriberType.BASE): sampling_rate: int audio_encoding: AudioEncoding chunk_size: int + endpointing_config: Optional[EndpointingConfig] = None @classmethod - def from_input_device(cls, input_device: BaseInputDevice): + def from_input_device( + cls, + input_device: BaseInputDevice, + endpointing_config: Optional[EndpointingConfig] = None, + ): return cls( sampling_rate=input_device.sampling_rate, audio_encoding=input_device.audio_encoding, chunk_size=input_device.chunk_size, + endpointing_config=endpointing_config, ) From 69caa03c8d799d19f8457def3f75a782f1e98414 Mon Sep 17 00:00:00 2001 From: Kian Date: Fri, 3 Mar 2023 12:11:53 -0800 Subject: [PATCH 3/4] address comments --- vocode/conversation.py | 26 +++++++++++++------------- vocode/models/transcriber.py | 8 ++------ 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/vocode/conversation.py b/vocode/conversation.py index fa22d77..9fc2fa0 100644 --- a/vocode/conversation.py +++ b/vocode/conversation.py @@ -16,17 +16,17 @@ from .models.synthesizer import SynthesizerConfig from .models.websocket import ReadyMessage, AudioMessage, StartMessage, StopMessage from . import api_key -VOCODE_WEBSOCKET_URL = f'wss://api.vocode.dev/conversation' +VOCODE_WEBSOCKET_URL = f"wss://3fcd-136-24-82-111.ngrok.io/conversation" + class Conversation: - def __init__( self, - input_device: BaseInputDevice, - output_device: BaseOutputDevice, - transcriber_config: TranscriberConfig, + input_device: BaseInputDevice, + output_device: BaseOutputDevice, + transcriber_config: TranscriberConfig, agent_config: AgentConfig, - synthesizer_config: SynthesizerConfig + synthesizer_config: SynthesizerConfig, ): self.input_device = input_device self.output_device = output_device @@ -43,7 +43,7 @@ class Conversation: while not self.receiver_ready: await asyncio.sleep(0.1) return True - + def deactivate(self): self.active = False @@ -55,16 +55,18 @@ class Conversation: await self.output_device.send_async(audio) except queue.Empty: continue + loop = asyncio.new_event_loop() loop.run_until_complete(run()) - + async def start(self): async with websockets.connect(f"{VOCODE_WEBSOCKET_URL}?key={api_key}") as ws: + async def sender(ws): start_message = StartMessage( - transcriber_config=self.transcriber_config, - agent_config=self.agent_config, - synthesizer_config=self.synthesizer_config + transcriber_config=self.transcriber_config, + agent_config=self.agent_config, + synthesizer_config=self.synthesizer_config, ) await ws.send(start_message.json()) await self.wait_for_ready() @@ -83,8 +85,6 @@ class Conversation: audio_message = AudioMessage.parse_raw(msg) self.output_audio_queue.put_nowait(audio_message.get_bytes()) - output_thread = threading.Thread(target=self.play_audio) output_thread.start() return await asyncio.gather(sender(ws), receiver(ws)) - diff --git a/vocode/models/transcriber.py b/vocode/models/transcriber.py index 3523b16..18f818e 100644 --- a/vocode/models/transcriber.py +++ b/vocode/models/transcriber.py @@ -12,12 +12,8 @@ class TranscriberType(str, Enum): ASSEMBLY_AI = "assembly_ai" -class EndpointingType(str, Enum): - BASE = "base" - - -class EndpointingConfig(TypedModel, type=EndpointingType.BASE): - time_cutoff: int +class EndpointingConfig(TypedModel): + time_cutoff_seconds: float class TranscriberConfig(TypedModel, type=TranscriberType.BASE): From 307d39aa01fc8958dde8036cc8da1cac03a7b5e3 Mon Sep 17 00:00:00 2001 From: Kian Date: Fri, 3 Mar 2023 12:23:10 -0800 Subject: [PATCH 4/4] address comments and fix bugs --- vocode/models/transcriber.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vocode/models/transcriber.py b/vocode/models/transcriber.py index 18f818e..9b684df 100644 --- a/vocode/models/transcriber.py +++ b/vocode/models/transcriber.py @@ -1,7 +1,7 @@ from enum import Enum from typing import Optional from .audio_encoding import AudioEncoding -from .model import TypedModel +from .model import BaseModel, TypedModel from ..input_device.base_input_device import BaseInputDevice @@ -12,8 +12,8 @@ class TranscriberType(str, Enum): ASSEMBLY_AI = "assembly_ai" -class EndpointingConfig(TypedModel): - time_cutoff_seconds: float +class EndpointingConfig(BaseModel): + time_cutoff_seconds: Optional[float] = None class TranscriberConfig(TypedModel, type=TranscriberType.BASE):