remove pyq goodbye model and rime synthesizer and fix environment loading
This commit is contained in:
parent
a93bfc1ec9
commit
1dc7bc74c3
28 changed files with 143 additions and 285 deletions
|
|
@ -1,4 +1,3 @@
|
|||
import os
|
||||
import random
|
||||
import time
|
||||
from langchain.prompts import (
|
||||
|
|
@ -16,23 +15,20 @@ import openai
|
|||
import json
|
||||
from typing import Generator, Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from typing import Generator
|
||||
import logging
|
||||
from vocode import getenv
|
||||
|
||||
from vocode.streaming.agent.base_agent import BaseAgent
|
||||
from vocode.streaming.models.agent import ChatGPTAgentConfig
|
||||
from vocode.streaming.utils.sse_client import SSEClient
|
||||
from vocode.streaming.agent.utils import stream_llm_response
|
||||
|
||||
load_dotenv()
|
||||
|
||||
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||
|
||||
|
||||
class ChatGPTAgent(BaseAgent):
|
||||
def __init__(self, agent_config: ChatGPTAgentConfig, logger: logging.Logger = None):
|
||||
super().__init__(agent_config)
|
||||
openai.api_key = getenv("OPENAI_API_KEY")
|
||||
self.agent_config = agent_config
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
self.logger.setLevel(logging.DEBUG)
|
||||
|
|
@ -112,7 +108,7 @@ class ChatGPTAgent(BaseAgent):
|
|||
"https://api.openai.com/v1/chat/completions",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
|
||||
"Authorization": f"Bearer {getenv('OPENAI_API_KEY')}",
|
||||
},
|
||||
json={
|
||||
"model": self.agent_config.model_name,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import re
|
||||
from typing import Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from langchain import OpenAI
|
||||
from langchain.llms import OpenAIChat
|
||||
from typing import Generator
|
||||
|
|
@ -11,8 +10,6 @@ from vocode.streaming.agent.base_agent import BaseAgent
|
|||
from vocode.streaming.agent.utils import stream_llm_response
|
||||
from vocode.streaming.models.agent import LLMAgentConfig
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class LLMAgent(BaseAgent):
|
||||
SENTENCE_ENDINGS = [".", "!", "?"]
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
|
|||
from vocode.streaming.synthesizer.base_synthesizer import BaseSynthesizer
|
||||
from vocode.streaming.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
|
||||
from vocode.streaming.synthesizer.google_synthesizer import GoogleSynthesizer
|
||||
from vocode.streaming.synthesizer.rime_synthesizer import RimeSynthesizer
|
||||
from vocode.streaming.transcriber.assembly_ai_transcriber import AssemblyAITranscriber
|
||||
from vocode.streaming.transcriber.base_transcriber import BaseTranscriber
|
||||
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
|
||||
|
|
@ -48,11 +47,6 @@ def create_synthesizer(synthesizer_config: SynthesizerConfig) -> BaseSynthesizer
|
|||
elif synthesizer_config.type == SynthesizerType.AZURE:
|
||||
return AzureSynthesizer(synthesizer_config)
|
||||
elif synthesizer_config.type == SynthesizerType.ELEVEN_LABS:
|
||||
kwargs = {}
|
||||
if synthesizer_config.voice_id:
|
||||
kwargs["voice_id"] = synthesizer_config.voice_id
|
||||
return ElevenLabsSynthesizer(synthesizer_config, **kwargs)
|
||||
elif synthesizer_config.type == SynthesizerType.RIME:
|
||||
return RimeSynthesizer(synthesizer_config)
|
||||
return ElevenLabsSynthesizer(synthesizer_config)
|
||||
else:
|
||||
raise Exception("Invalid synthesizer config")
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@ import websockets
|
|||
from websockets.exceptions import ConnectionClosedOK
|
||||
from websockets.client import WebSocketClientProtocol
|
||||
import asyncio
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
import logging
|
||||
import threading
|
||||
import queue
|
||||
|
|
@ -22,8 +20,6 @@ from vocode.streaming.models.websocket import (
|
|||
StopMessage,
|
||||
)
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class HostedStreamingConversation:
|
||||
def __init__(
|
||||
|
|
|
|||
|
|
@ -8,15 +8,18 @@ import time
|
|||
import secrets
|
||||
import random
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from vocode.streaming.agent.bot_sentiment_analyser import (
|
||||
BotSentiment,
|
||||
BotSentimentAnalyser,
|
||||
)
|
||||
from vocode.streaming.agent.information_retrieval_agent import InformationRetrievalAgent
|
||||
from vocode.streaming.factory import (
|
||||
create_agent,
|
||||
create_synthesizer,
|
||||
create_transcriber,
|
||||
)
|
||||
from vocode.streaming.models.message import BaseMessage
|
||||
from vocode.streaming.output_device.base_output_device import BaseOutputDevice
|
||||
from vocode.streaming.synthesizer.rime_synthesizer import RimeSynthesizer
|
||||
from vocode.streaming.transcriber.assembly_ai_transcriber import AssemblyAITranscriber
|
||||
from vocode.streaming.utils.goodbye_model import GoodbyeModel
|
||||
from vocode.streaming.utils.transcript import Transcript
|
||||
|
|
@ -48,9 +51,6 @@ from vocode.streaming.synthesizer.base_synthesizer import (
|
|||
SynthesisResult,
|
||||
FillerAudio,
|
||||
)
|
||||
from vocode.streaming.synthesizer.google_synthesizer import GoogleSynthesizer
|
||||
from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
|
||||
from vocode.streaming.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
|
||||
from vocode.streaming.utils import (
|
||||
create_conversation_id,
|
||||
create_loop_in_thread,
|
||||
|
|
@ -60,19 +60,15 @@ from vocode.streaming.transcriber.base_transcriber import (
|
|||
Transcription,
|
||||
BaseTranscriber,
|
||||
)
|
||||
from vocode.streaming.transcriber.google_transcriber import GoogleTranscriber
|
||||
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class StreamingConversation:
|
||||
def __init__(
|
||||
self,
|
||||
output_device: BaseOutputDevice,
|
||||
transcriber: BaseTranscriber,
|
||||
agent: BaseAgent,
|
||||
synthesizer: BaseSynthesizer,
|
||||
transcriber_config: TranscriberConfig,
|
||||
agent_config: AgentConfig,
|
||||
synthesizer_config: SynthesizerConfig,
|
||||
conversation_id: str = None,
|
||||
per_chunk_allowance_seconds: int = PER_CHUNK_ALLOWANCE_SECONDS,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
|
|
@ -80,11 +76,11 @@ class StreamingConversation:
|
|||
self.id = conversation_id or create_conversation_id()
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
self.output_device = output_device
|
||||
self.transcriber = transcriber
|
||||
self.transcriber = create_transcriber(transcriber_config)
|
||||
self.transcriber.set_on_response(self.on_transcription_response)
|
||||
self.transcriber_task = None
|
||||
self.agent = agent
|
||||
self.synthesizer = synthesizer
|
||||
self.agent = create_agent(agent_config)
|
||||
self.synthesizer = create_synthesizer(synthesizer_config)
|
||||
self.synthesizer_event_loop = asyncio.new_event_loop()
|
||||
self.synthesizer_thread = threading.Thread(
|
||||
name="synthesizer",
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import re
|
|||
from typing import Any, Optional
|
||||
from xml.etree import ElementTree
|
||||
import azure.cognitiveservices.speech as speechsdk
|
||||
from dotenv import load_dotenv
|
||||
from vocode import getenv
|
||||
|
||||
from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
|
||||
from vocode.streaming.models.message import BaseMessage, SSMLMessage
|
||||
|
|
@ -20,7 +20,6 @@ from vocode.streaming.synthesizer.base_synthesizer import (
|
|||
from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
|
||||
from vocode.streaming.models.audio_encoding import AudioEncoding
|
||||
|
||||
load_dotenv()
|
||||
|
||||
NAMESPACES = {
|
||||
"mstts": "https://www.w3.org/2001/mstts",
|
||||
|
|
@ -59,8 +58,8 @@ class AzureSynthesizer(BaseSynthesizer):
|
|||
self.synthesizer_config = synthesizer_config
|
||||
# Instantiates a client
|
||||
speech_config = speechsdk.SpeechConfig(
|
||||
subscription=os.environ.get("AZURE_SPEECH_KEY"),
|
||||
region=os.environ.get("AZURE_SPEECH_REGION"),
|
||||
subscription=getenv("AZURE_SPEECH_KEY"),
|
||||
region=getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
if self.synthesizer_config.audio_encoding == AudioEncoding.LINEAR16:
|
||||
if self.synthesizer_config.sampling_rate == 44100:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
from typing import Any, Optional
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import requests
|
||||
from vocode import getenv
|
||||
|
||||
from vocode.streaming.synthesizer.base_synthesizer import (
|
||||
BaseSynthesizer,
|
||||
|
|
@ -11,9 +10,7 @@ from vocode.streaming.models.synthesizer import ElevenLabsSynthesizerConfig
|
|||
from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
|
||||
from vocode.streaming.models.message import BaseMessage
|
||||
|
||||
load_dotenv()
|
||||
|
||||
ELEVEN_LABS_API_KEY = os.environ.get("ELEVEN_LABS_API_KEY")
|
||||
ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/"
|
||||
ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB"
|
||||
OBAMA_VOICE_ID = "vLITIS0SH2an5iQGxw5C"
|
||||
|
|
@ -22,7 +19,7 @@ OBAMA_VOICE_ID = "vLITIS0SH2an5iQGxw5C"
|
|||
class ElevenLabsSynthesizer(BaseSynthesizer):
|
||||
def __init__(self, config: ElevenLabsSynthesizerConfig):
|
||||
super().__init__(config)
|
||||
self.api_key = config.api_key
|
||||
self.api_key = getenv("ELEVEN_LABS_API_KEY")
|
||||
self.voice_id = config.voice_id or ADAM_VOICE_ID
|
||||
self.words_per_minute = 150
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ import io
|
|||
import wave
|
||||
from typing import Any, Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from google.cloud import texttospeech_v1beta1 as tts
|
||||
|
||||
from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
|
||||
|
|
@ -16,8 +15,6 @@ from vocode.streaming.models.synthesizer import GoogleSynthesizerConfig
|
|||
from vocode.streaming.models.audio_encoding import AudioEncoding
|
||||
from vocode.streaming.utils import convert_wav
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class GoogleSynthesizer(BaseSynthesizer):
|
||||
OFFSET_SECONDS = 0.5
|
||||
|
|
|
|||
|
|
@ -1,78 +0,0 @@
|
|||
import audioop
|
||||
import base64
|
||||
from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
|
||||
from vocode.streaming.models.audio_encoding import AudioEncoding
|
||||
|
||||
from vocode.streaming.models.message import BaseMessage
|
||||
|
||||
from .base_synthesizer import BaseSynthesizer, SynthesisResult, encode_as_wav
|
||||
from typing import Any, Optional
|
||||
import os
|
||||
import io
|
||||
import wave
|
||||
from dotenv import load_dotenv
|
||||
import requests
|
||||
|
||||
from ..utils import convert_linear_audio, convert_wav
|
||||
from ..models.synthesizer import ElevenLabsSynthesizerConfig, RimeSynthesizerConfig
|
||||
|
||||
load_dotenv()
|
||||
|
||||
RIME_API_KEY = os.getenv("RIME_API_KEY")
|
||||
RIME_BASE_URL = os.getenv("RIME_BASE_URL")
|
||||
|
||||
|
||||
class RimeSynthesizer(BaseSynthesizer):
|
||||
def __init__(self, config: RimeSynthesizerConfig):
|
||||
super().__init__(config)
|
||||
self.speaker = config.speaker
|
||||
|
||||
def create_speech(
|
||||
self,
|
||||
message: BaseMessage,
|
||||
chunk_size: int,
|
||||
bot_sentiment: Optional[BotSentiment] = None,
|
||||
) -> SynthesisResult:
|
||||
url = RIME_BASE_URL
|
||||
headers = {"Authorization": f"Bearer {RIME_API_KEY}"}
|
||||
body = {"inputs": {"text": message.text, "speaker": self.speaker}}
|
||||
response = requests.post(url, headers=headers, json=body)
|
||||
|
||||
def chunk_generator(audio, chunk_transform=lambda x: x):
|
||||
for i in range(0, len(audio), chunk_size):
|
||||
chunk = audio[i : i + chunk_size]
|
||||
yield SynthesisResult.ChunkResult(
|
||||
chunk_transform(chunk), len(chunk) != chunk_size
|
||||
)
|
||||
|
||||
assert response.ok, response.text
|
||||
data = response.json().get("data")
|
||||
assert data
|
||||
|
||||
audio_file = io.BytesIO(base64.b64decode(data))
|
||||
|
||||
if self.synthesizer_config.audio_encoding == AudioEncoding.LINEAR16:
|
||||
output_bytes = convert_wav(
|
||||
audio_file,
|
||||
output_sample_rate=self.synthesizer_config.sampling_rate,
|
||||
output_encoding=AudioEncoding.LINEAR16,
|
||||
)
|
||||
elif self.synthesizer_config.audio_encoding == AudioEncoding.MULAW:
|
||||
output_bytes = convert_wav(
|
||||
audio_file,
|
||||
output_sample_rate=self.synthesizer_config.sampling_rate,
|
||||
output_encoding=AudioEncoding.MULAW,
|
||||
)
|
||||
|
||||
if self.synthesizer_config.should_encode_as_wav:
|
||||
output_generator = chunk_generator(
|
||||
output_bytes, chunk_transform=encode_as_wav
|
||||
)
|
||||
else:
|
||||
output_generator = chunk_generator(output_bytes)
|
||||
return SynthesisResult(
|
||||
output_generator,
|
||||
lambda seconds: self.get_message_cutoff_from_total_response_length(
|
||||
message, seconds, len(output_bytes)
|
||||
),
|
||||
)
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
from redis import Redis
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from enum import Enum
|
|||
import json
|
||||
import logging
|
||||
from typing import Optional
|
||||
from vocode import getenv
|
||||
from vocode.streaming.agent.base_agent import BaseAgent
|
||||
from vocode.streaming.factory import (
|
||||
create_agent,
|
||||
|
|
@ -42,38 +43,36 @@ class Call(StreamingConversation):
|
|||
self,
|
||||
base_url: str,
|
||||
config_manager: BaseConfigManager,
|
||||
agent: BaseAgent,
|
||||
twilio_config: TwilioConfig,
|
||||
transcriber: Optional[BaseTranscriber] = None,
|
||||
synthesizer: Optional[BaseSynthesizer] = None,
|
||||
twilio_sid=None,
|
||||
agent_config: BaseAgent,
|
||||
transcriber_config: Optional[BaseTranscriber] = None,
|
||||
synthesizer_config: Optional[BaseSynthesizer] = None,
|
||||
twilio_config: Optional[TwilioConfig] = None,
|
||||
twilio_sid: Optional[str] = None,
|
||||
conversation_id: Optional[str] = None,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
):
|
||||
self.base_url = base_url
|
||||
self.config_manager = config_manager
|
||||
self.output_device = TwilioOutputDevice()
|
||||
self.twilio_config = twilio_config
|
||||
self.twilio_config = twilio_config or TwilioConfig(
|
||||
account_sid=getenv("TWILIO_ACCOUNT_SID"),
|
||||
auth_token=getenv("TWILIO_AUTH_TOKEN"),
|
||||
)
|
||||
self.twilio_client = create_twilio_client(twilio_config)
|
||||
super().__init__(
|
||||
self.output_device,
|
||||
transcriber
|
||||
or DeepgramTranscriber(
|
||||
DeepgramTranscriberConfig(
|
||||
sampling_rate=8000,
|
||||
audio_encoding=AudioEncoding.MULAW,
|
||||
chunk_size=self.CHUNK_SIZE,
|
||||
model="voicemail",
|
||||
endpointing_config=PunctuationEndpointingConfig(),
|
||||
),
|
||||
logger=logger,
|
||||
transcriber_config
|
||||
or DeepgramTranscriberConfig(
|
||||
sampling_rate=8000,
|
||||
audio_encoding=AudioEncoding.MULAW,
|
||||
chunk_size=self.CHUNK_SIZE,
|
||||
model="voicemail",
|
||||
endpointing_config=PunctuationEndpointingConfig(),
|
||||
),
|
||||
agent,
|
||||
synthesizer
|
||||
or AzureSynthesizer(
|
||||
AzureSynthesizerConfig(
|
||||
sampling_rate=8000, audio_encoding=AudioEncoding.MULAW
|
||||
)
|
||||
agent_config,
|
||||
synthesizer_config
|
||||
or AzureSynthesizerConfig(
|
||||
sampling_rate=8000, audio_encoding=AudioEncoding.MULAW
|
||||
),
|
||||
conversation_id=conversation_id,
|
||||
per_chunk_allowance_seconds=0.01,
|
||||
|
|
@ -94,9 +93,9 @@ class Call(StreamingConversation):
|
|||
base_url=base_url,
|
||||
logger=logger,
|
||||
config_manager=config_manager,
|
||||
agent=create_agent(call_config.agent_config),
|
||||
transcriber=create_transcriber(call_config.transcriber_config),
|
||||
synthesizer=create_synthesizer(call_config.synthesizer_config),
|
||||
agent_config=call_config.agent_config,
|
||||
transcriber_config=call_config.transcriber_config,
|
||||
synthesizer_config=call_config.synthesizer_config,
|
||||
twilio_config=call_config.twilio_config,
|
||||
twilio_sid=call_config.twilio_sid,
|
||||
conversation_id=conversation_id,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import logging
|
||||
from typing import Optional
|
||||
from twilio.rest import Client
|
||||
from vocode import getenv
|
||||
|
||||
from vocode.streaming.models.agent import AgentConfig
|
||||
from vocode.streaming.models.synthesizer import (
|
||||
|
|
@ -33,7 +33,7 @@ class OutboundCall:
|
|||
from_phone: str,
|
||||
config_manager: BaseConfigManager,
|
||||
agent_config: AgentConfig,
|
||||
twilio_config: TwilioConfig,
|
||||
twilio_config: Optional[TwilioConfig] = None,
|
||||
transcriber_config: Optional[TranscriberConfig] = None,
|
||||
synthesizer_config: Optional[SynthesizerConfig] = None,
|
||||
conversation_id: Optional[str] = None,
|
||||
|
|
@ -56,7 +56,10 @@ class OutboundCall:
|
|||
)
|
||||
self.conversation_id = conversation_id or create_conversation_id()
|
||||
self.logger = logger
|
||||
self.twilio_config = twilio_config
|
||||
self.twilio_config = twilio_config or TwilioConfig(
|
||||
account_sid=getenv("TWILIO_ACCOUNT_SID"),
|
||||
auth_token=getenv("TWILIO_AUTH_TOKEN"),
|
||||
)
|
||||
self.twilio_client = create_twilio_client(twilio_config)
|
||||
self.twilio_sid = None
|
||||
|
||||
|
|
|
|||
|
|
@ -24,10 +24,10 @@ class ZoomDialIn(OutboundCall):
|
|||
zoom_meeting_password: Optional[str],
|
||||
from_phone: str,
|
||||
config_manager: BaseConfigManager,
|
||||
twilio_config: TwilioConfig,
|
||||
agent_config: AgentConfig,
|
||||
transcriber_config: TranscriberConfig,
|
||||
synthesizer_config: SynthesizerConfig,
|
||||
twilio_config: Optional[TwilioConfig] = None,
|
||||
conversation_id: Optional[str] = None,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
):
|
||||
|
|
|
|||
|
|
@ -34,7 +34,6 @@ from vocode.streaming.models.telephony import (
|
|||
EndOutboundCall,
|
||||
TwilioConfig,
|
||||
)
|
||||
from twilio.rest import Client
|
||||
|
||||
from vocode.streaming.telephony.conversation.call import Call
|
||||
from vocode.streaming.telephony.templates import Templater
|
||||
|
|
@ -45,7 +44,7 @@ from vocode.streaming.utils import create_conversation_id
|
|||
class InboundCallConfig(BaseModel):
|
||||
url: str
|
||||
agent_config: AgentConfig
|
||||
twilio_config: TwilioConfig
|
||||
twilio_config: Optional[TwilioConfig] = None
|
||||
transcriber_config: Optional[TranscriberConfig] = None
|
||||
synthesizer_config: Optional[SynthesizerConfig] = None
|
||||
|
||||
|
|
@ -92,7 +91,7 @@ class TelephonyServer:
|
|||
def create_inbound_route(
|
||||
self,
|
||||
agent_config: AgentConfig,
|
||||
twilio_config: TwilioConfig,
|
||||
twilio_config: Optional[TwilioConfig] = None,
|
||||
transcriber_config: Optional[TranscriberConfig] = None,
|
||||
synthesizer_config: Optional[SynthesizerConfig] = None,
|
||||
):
|
||||
|
|
|
|||
|
|
@ -1,12 +1,8 @@
|
|||
import os
|
||||
from typing import Optional
|
||||
from dotenv import load_dotenv
|
||||
from twilio.rest import Client
|
||||
|
||||
from vocode.streaming.models.telephony import TwilioConfig
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def create_twilio_client(twilio_config: TwilioConfig):
|
||||
return Client(twilio_config.account_sid, twilio_config.auth_token)
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import websockets
|
||||
from urllib.parse import urlencode
|
||||
from vocode import getenv
|
||||
|
||||
from vocode.streaming.models.transcriber import AssemblyAITranscriberConfig
|
||||
from vocode.streaming.models.websocket import AudioMessage
|
||||
|
|
@ -14,9 +13,7 @@ from vocode.streaming.transcriber.base_transcriber import (
|
|||
)
|
||||
from vocode.streaming.models.audio_encoding import AudioEncoding
|
||||
|
||||
load_dotenv()
|
||||
|
||||
ASSEMBLY_AI_API_KEY = os.environ.get("ASSEMBLY_AI_API_KEY")
|
||||
ASSEMBLY_AI_URL = "wss://api.assemblyai.com/v2/realtime/ws"
|
||||
|
||||
|
||||
|
|
@ -27,6 +24,7 @@ class AssemblyAITranscriber(BaseTranscriber):
|
|||
logger: logging.Logger = None,
|
||||
):
|
||||
super().__init__(transcriber_config)
|
||||
self.api_key = getenv("ASSEMBLY_AI_API_KEY")
|
||||
self._ended = False
|
||||
self.is_ready = False
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
|
|
@ -61,7 +59,7 @@ class AssemblyAITranscriber(BaseTranscriber):
|
|||
|
||||
async with websockets.connect(
|
||||
URL,
|
||||
extra_headers=(("Authorization", ASSEMBLY_AI_API_KEY),),
|
||||
extra_headers=(("Authorization", self.api_key),),
|
||||
ping_interval=5,
|
||||
ping_timeout=20,
|
||||
) as ws:
|
||||
|
|
|
|||
|
|
@ -1,11 +1,8 @@
|
|||
from dotenv import load_dotenv
|
||||
from typing import Callable, Optional, Awaitable
|
||||
|
||||
from vocode.streaming.utils import convert_wav
|
||||
from vocode.streaming.models.transcriber import EndpointingConfig, TranscriberConfig
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class Transcription:
|
||||
def __init__(
|
||||
|
|
|
|||
|
|
@ -1,12 +1,11 @@
|
|||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import websockets
|
||||
from websockets.client import WebSocketClientProtocol
|
||||
import audioop
|
||||
from urllib.parse import urlencode
|
||||
from vocode import getenv
|
||||
|
||||
from vocode.streaming.transcriber.base_transcriber import (
|
||||
BaseTranscriber,
|
||||
|
|
@ -19,9 +18,7 @@ from vocode.streaming.models.transcriber import (
|
|||
)
|
||||
from vocode.streaming.models.audio_encoding import AudioEncoding
|
||||
|
||||
load_dotenv()
|
||||
|
||||
DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
|
||||
PUNCTUATION_TERMINATORS = [".", "!", "?"]
|
||||
NUM_RESTARTS = 5
|
||||
|
||||
|
|
@ -33,6 +30,7 @@ class DeepgramTranscriber(BaseTranscriber):
|
|||
logger: logging.Logger = None,
|
||||
):
|
||||
super().__init__(transcriber_config)
|
||||
self.api_key = getenv("DEEPGRAM_API_KEY")
|
||||
self.transcriber_config = transcriber_config
|
||||
self._ended = False
|
||||
self.warmed_up = False
|
||||
|
|
@ -155,7 +153,7 @@ class DeepgramTranscriber(BaseTranscriber):
|
|||
return data["duration"]
|
||||
|
||||
async def process(self, warmup=True):
|
||||
extra_headers = {"Authorization": f"Token {DEEPGRAM_API_KEY}"}
|
||||
extra_headers = {"Authorization": f"Token {self.api_key}"}
|
||||
self.audio_queue = asyncio.Queue()
|
||||
|
||||
async with websockets.connect(
|
||||
|
|
|
|||
|
|
@ -1,19 +1,12 @@
|
|||
import os
|
||||
import asyncio
|
||||
import openai
|
||||
from dotenv import load_dotenv
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
load_dotenv()
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
from vocode import getenv
|
||||
|
||||
|
||||
PLATFORM = "pyq" if os.getenv("USE_PYQ_EMBEDDINGS", "false") == "true" else "openai"
|
||||
SIMILARITY_THRESHOLD = 0.9
|
||||
SIMILARITY_THRESHOLD_PYQ = 0.7
|
||||
EMBEDDING_SIZE = 1536
|
||||
PYQ_EMBEDDING_SIZE = 768
|
||||
GOODBYE_PHRASES = [
|
||||
"bye",
|
||||
"goodbye",
|
||||
|
|
@ -24,7 +17,6 @@ GOODBYE_PHRASES = [
|
|||
"have a good day",
|
||||
"have a good night",
|
||||
]
|
||||
PYQ_API_URL = "https://embeddings.pyqai.com"
|
||||
|
||||
|
||||
class GoodbyeModel:
|
||||
|
|
@ -34,12 +26,10 @@ class GoodbyeModel:
|
|||
os.path.dirname(__file__), "goodbye_embeddings"
|
||||
),
|
||||
):
|
||||
openai.api_key = getenv("OPENAI_API_KEY")
|
||||
self.goodbye_embeddings = self.load_or_create_embeddings(
|
||||
f"{embeddings_cache_path}/goodbye_embeddings.npy"
|
||||
)
|
||||
self.goodbye_embeddings_pyq = self.load_or_create_embeddings(
|
||||
f"{embeddings_cache_path}/goodbye_embeddings_pyq.npy"
|
||||
)
|
||||
|
||||
def load_or_create_embeddings(self, path):
|
||||
if os.path.exists(path):
|
||||
|
|
@ -49,50 +39,33 @@ class GoodbyeModel:
|
|||
np.save(path, embeddings)
|
||||
return embeddings
|
||||
|
||||
def create_embeddings(self, platform=PLATFORM):
|
||||
def create_embeddings(self):
|
||||
print("Creating embeddings...")
|
||||
size = EMBEDDING_SIZE if platform == "openai" else PYQ_EMBEDDING_SIZE
|
||||
size = EMBEDDING_SIZE
|
||||
embeddings = np.empty((size, len(GOODBYE_PHRASES)))
|
||||
for i, goodbye_phrase in enumerate(GOODBYE_PHRASES):
|
||||
embeddings[:, i] = self.create_embedding(goodbye_phrase, platform=platform)
|
||||
embeddings[:, i] = self.create_embedding(goodbye_phrase)
|
||||
return embeddings
|
||||
|
||||
async def is_goodbye(self, text: str, platform=PLATFORM) -> bool:
|
||||
async def is_goodbye(self, text: str) -> bool:
|
||||
if "bye" in text.lower():
|
||||
return True
|
||||
embedding = self.create_embedding(text.strip().lower(), platform=platform)
|
||||
goodbye_embeddings = (
|
||||
self.goodbye_embeddings
|
||||
if platform == "openai"
|
||||
else self.goodbye_embeddings_pyq
|
||||
)
|
||||
threshold = (
|
||||
SIMILARITY_THRESHOLD if platform == "openai" else SIMILARITY_THRESHOLD_PYQ
|
||||
)
|
||||
similarity_results = embedding @ goodbye_embeddings
|
||||
return np.max(similarity_results) > threshold
|
||||
embedding = self.create_embedding(text.strip().lower())
|
||||
similarity_results = embedding @ self.goodbye_embeddings
|
||||
return np.max(similarity_results) > SIMILARITY_THRESHOLD
|
||||
|
||||
def create_embedding(self, text, platform=PLATFORM) -> np.array:
|
||||
if platform == "openai":
|
||||
return np.array(
|
||||
openai.Embedding.create(input=text, model="text-embedding-ada-002")[
|
||||
"data"
|
||||
][0]["embedding"]
|
||||
)
|
||||
elif platform == "pyq":
|
||||
return np.array(
|
||||
requests.post(
|
||||
PYQ_API_URL,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": os.getenv("PYQ_API_KEY"),
|
||||
},
|
||||
json={"input_sequence": [text], "account_id": "400"},
|
||||
).json()["response"][0]
|
||||
)
|
||||
def create_embedding(self, text) -> np.array:
|
||||
return np.array(
|
||||
openai.Embedding.create(input=text, model="text-embedding-ada-002")["data"][
|
||||
0
|
||||
]["embedding"]
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
async def main():
|
||||
model = GoodbyeModel()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue