add assembly ai integration
This commit is contained in:
parent
a773179946
commit
f39af91505
4 changed files with 14 additions and 8 deletions
|
|
@ -11,7 +11,7 @@ def _get_device_prompt(device_infos: list[dict]) -> str:
|
||||||
Choice: """.format(
|
Choice: """.format(
|
||||||
"\n".join(f"{index}: {device['name']}" for index, device in enumerate(device_infos)))
|
"\n".join(f"{index}: {device['name']}" for index, device in enumerate(device_infos)))
|
||||||
|
|
||||||
def create_microphone_input_and_speaker_output(use_first_available_device=False) -> tuple[MicrophoneInput, SpeakerOutput]:
|
def create_microphone_input_and_speaker_output(use_first_available_device=False, mic_sampling_rate=None) -> tuple[MicrophoneInput, SpeakerOutput]:
|
||||||
pa = pyaudio.PyAudio()
|
pa = pyaudio.PyAudio()
|
||||||
num_devices = pa.get_device_count()
|
num_devices = pa.get_device_count()
|
||||||
devices = list(map(pa.get_device_info_by_index, range(num_devices)))
|
devices = list(map(pa.get_device_info_by_index, range(num_devices)))
|
||||||
|
|
@ -24,7 +24,7 @@ def create_microphone_input_and_speaker_output(use_first_available_device=False)
|
||||||
input_device_info = input_device_infos[int(input(_get_device_prompt(input_device_infos)))]
|
input_device_info = input_device_infos[int(input(_get_device_prompt(input_device_infos)))]
|
||||||
output_device_info = output_device_infos[int(input(_get_device_prompt(output_device_infos)))]
|
output_device_info = output_device_infos[int(input(_get_device_prompt(output_device_infos)))]
|
||||||
logger.info("Using microphone input device: %s", input_device_info['name'])
|
logger.info("Using microphone input device: %s", input_device_info['name'])
|
||||||
microphone_input = MicrophoneInput(pa, input_device_info)
|
microphone_input = MicrophoneInput(pa, input_device_info, sampling_rate=mic_sampling_rate)
|
||||||
logger.info("Using speaker output device: %s", output_device_info['name'])
|
logger.info("Using speaker output device: %s", output_device_info['name'])
|
||||||
speaker_output = SpeakerOutput(pa, output_device_info)
|
speaker_output = SpeakerOutput(pa, output_device_info)
|
||||||
return microphone_input, speaker_output
|
return microphone_input, speaker_output
|
||||||
|
|
@ -10,10 +10,10 @@ class MicrophoneInput(BaseInputDevice):
|
||||||
DEFAULT_SAMPLING_RATE = 44100
|
DEFAULT_SAMPLING_RATE = 44100
|
||||||
DEFAULT_CHUNK_SIZE = 2048
|
DEFAULT_CHUNK_SIZE = 2048
|
||||||
|
|
||||||
def __init__(self, pa: pyaudio.PyAudio, device_info: dict, chunk_size: int = DEFAULT_CHUNK_SIZE):
|
def __init__(self, pa: pyaudio.PyAudio, device_info: dict, sampling_rate: int = None, chunk_size: int = DEFAULT_CHUNK_SIZE):
|
||||||
self.device_info = device_info
|
self.device_info = device_info
|
||||||
sampling_rate = int(self.device_info.get('defaultSampleRate', self.DEFAULT_SAMPLING_RATE))
|
sampling_rate = sampling_rate or (self.device_info.get('defaultSampleRate', self.DEFAULT_SAMPLING_RATE))
|
||||||
super().__init__(sampling_rate, AudioEncoding.LINEAR16, chunk_size)
|
super().__init__(int(sampling_rate), AudioEncoding.LINEAR16, chunk_size)
|
||||||
self.pa = pa
|
self.pa = pa
|
||||||
self.stream = pa.open(
|
self.stream = pa.open(
|
||||||
format=pyaudio.paInt16,
|
format=pyaudio.paInt16,
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ class TranscriberType(str, Enum):
|
||||||
BASE = "base"
|
BASE = "base"
|
||||||
DEEPGRAM = "deepgram"
|
DEEPGRAM = "deepgram"
|
||||||
GOOGLE = "google"
|
GOOGLE = "google"
|
||||||
|
ASSEMBLY_AI = "assembly_ai"
|
||||||
|
|
||||||
class TranscriberConfig(TypedModel, type=TranscriberType.BASE):
|
class TranscriberConfig(TypedModel, type=TranscriberType.BASE):
|
||||||
sampling_rate: int
|
sampling_rate: int
|
||||||
|
|
@ -28,4 +29,9 @@ class DeepgramTranscriberConfig(TranscriberConfig, type=TranscriberType.DEEPGRAM
|
||||||
|
|
||||||
class GoogleTranscriberConfig(TranscriberConfig, type=TranscriberType.GOOGLE):
|
class GoogleTranscriberConfig(TranscriberConfig, type=TranscriberType.GOOGLE):
|
||||||
model: Optional[str] = None
|
model: Optional[str] = None
|
||||||
should_warmup_model: bool = False
|
should_warmup_model: bool = False
|
||||||
|
|
||||||
|
class AssemblyAITranscriberConfig(TranscriberConfig, type=TranscriberType.ASSEMBLY_AI):
|
||||||
|
model: Optional[str] = None
|
||||||
|
should_warmup_model: bool = False
|
||||||
|
version: Optional[str] = None
|
||||||
|
|
@ -7,9 +7,9 @@ class SpeakerOutput(BaseOutputDevice):
|
||||||
|
|
||||||
DEFAULT_SAMPLING_RATE = 44100
|
DEFAULT_SAMPLING_RATE = 44100
|
||||||
|
|
||||||
def __init__(self, pa: pyaudio.PyAudio, device_info: dict, audio_encoding: AudioEncoding = AudioEncoding.LINEAR16):
|
def __init__(self, pa: pyaudio.PyAudio, device_info: dict, sampling_rate: int = None, audio_encoding: AudioEncoding = AudioEncoding.LINEAR16):
|
||||||
self.device_info = device_info
|
self.device_info = device_info
|
||||||
sampling_rate = int(self.device_info.get('defaultSampleRate', self.DEFAULT_SAMPLING_RATE))
|
sampling_rate = sampling_rate or int(self.device_info.get('defaultSampleRate', self.DEFAULT_SAMPLING_RATE))
|
||||||
super().__init__(sampling_rate, audio_encoding)
|
super().__init__(sampling_rate, audio_encoding)
|
||||||
self.pa = pa
|
self.pa = pa
|
||||||
self.stream = self.pa.open(
|
self.stream = self.pa.open(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue