Merge pull request #3 from vocodedev/kian/endpointing

Kian/endpointing
This commit is contained in:
Kian Hooshmand 2023-03-03 12:24:09 -08:00 committed by GitHub
commit 3dd23214ea
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 17 deletions

View file

@ -16,17 +16,17 @@ from .models.synthesizer import SynthesizerConfig
from .models.websocket import ReadyMessage, AudioMessage, StartMessage, StopMessage
from . import api_key
VOCODE_WEBSOCKET_URL = f'wss://api.vocode.dev/conversation'
VOCODE_WEBSOCKET_URL = f"wss://3fcd-136-24-82-111.ngrok.io/conversation"
class Conversation:
def __init__(
self,
input_device: BaseInputDevice,
output_device: BaseOutputDevice,
transcriber_config: TranscriberConfig,
input_device: BaseInputDevice,
output_device: BaseOutputDevice,
transcriber_config: TranscriberConfig,
agent_config: AgentConfig,
synthesizer_config: SynthesizerConfig
synthesizer_config: SynthesizerConfig,
):
self.input_device = input_device
self.output_device = output_device
@ -43,7 +43,7 @@ class Conversation:
while not self.receiver_ready:
await asyncio.sleep(0.1)
return True
def deactivate(self):
self.active = False
@ -55,16 +55,18 @@ class Conversation:
await self.output_device.send_async(audio)
except queue.Empty:
continue
loop = asyncio.new_event_loop()
loop.run_until_complete(run())
async def start(self):
async with websockets.connect(f"{VOCODE_WEBSOCKET_URL}?key={api_key}") as ws:
async def sender(ws):
start_message = StartMessage(
transcriber_config=self.transcriber_config,
agent_config=self.agent_config,
synthesizer_config=self.synthesizer_config
transcriber_config=self.transcriber_config,
agent_config=self.agent_config,
synthesizer_config=self.synthesizer_config,
)
await ws.send(start_message.json())
await self.wait_for_ready()
@ -83,8 +85,6 @@ class Conversation:
audio_message = AudioMessage.parse_raw(msg)
self.output_audio_queue.put_nowait(audio_message.get_bytes())
output_thread = threading.Thread(target=self.play_audio)
output_thread.start()
return await asyncio.gather(sender(ws), receiver(ws))

View file

@ -1,35 +1,51 @@
from enum import Enum
from typing import Optional
from .audio_encoding import AudioEncoding
from .model import TypedModel
from .model import BaseModel, TypedModel
from ..input_device.base_input_device import BaseInputDevice
class TranscriberType(str, Enum):
BASE = "base"
DEEPGRAM = "deepgram"
GOOGLE = "google"
ASSEMBLY_AI = "assembly_ai"
class EndpointingConfig(BaseModel):
time_cutoff_seconds: Optional[float] = None
class TranscriberConfig(TypedModel, type=TranscriberType.BASE):
sampling_rate: int
audio_encoding: AudioEncoding
chunk_size: int
endpointing_config: Optional[EndpointingConfig] = None
@classmethod
def from_input_device(cls, input_device: BaseInputDevice):
def from_input_device(
cls,
input_device: BaseInputDevice,
endpointing_config: Optional[EndpointingConfig] = None,
):
return cls(
sampling_rate=input_device.sampling_rate,
audio_encoding=input_device.audio_encoding,
chunk_size=input_device.chunk_size)
chunk_size=input_device.chunk_size,
endpointing_config=endpointing_config,
)
class DeepgramTranscriberConfig(TranscriberConfig, type=TranscriberType.DEEPGRAM):
model: Optional[str] = None
should_warmup_model: bool = False
version: Optional[str] = None
class GoogleTranscriberConfig(TranscriberConfig, type=TranscriberType.GOOGLE):
model: Optional[str] = None
should_warmup_model: bool = False
class AssemblyAITranscriberConfig(TranscriberConfig, type=TranscriberType.ASSEMBLY_AI):
should_warmup_model: bool = False
should_warmup_model: bool = False