remove pyq goodbye model and rime synthesizer and fix environment loading

This commit is contained in:
Ajay Raj 2023-03-28 10:20:36 -07:00
commit 1dc7bc74c3
28 changed files with 143 additions and 285 deletions

View file

@ -2,14 +2,15 @@ import asyncio
import logging
import signal
from dotenv import load_dotenv
import os
load_dotenv()
from vocode.streaming.hosted_streaming_conversation import HostedStreamingConversation
from vocode.streaming.streaming_conversation import StreamingConversation
from vocode.helpers import create_microphone_input_and_speaker_output
from vocode.streaming.models.transcriber import (
DeepgramTranscriberConfig,
PunctuationEndpointingConfig,
GoogleTranscriberConfig,
)
from vocode.streaming.models.agent import (
ChatGPTAgentConfig,
@ -23,10 +24,6 @@ from vocode.streaming.models.agent import (
)
from vocode.streaming.models.message import BaseMessage
from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
import vocode
load_dotenv()
vocode.api_key = os.getenv("VOCODE_API_KEY")
logging.basicConfig()
logging.root.setLevel(logging.INFO)
@ -41,7 +38,8 @@ if __name__ == "__main__":
input_device=microphone_input,
output_device=speaker_output,
transcriber_config=DeepgramTranscriberConfig.from_input_device(
microphone_input
microphone_input,
endpointing_config=PunctuationEndpointingConfig(),
),
agent_config=ChatGPTAgentConfig(
initial_message=BaseMessage(text="Hello!"),

View file

@ -2,7 +2,9 @@ import asyncio
import logging
import signal
from dotenv import load_dotenv
import os
load_dotenv()
from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
from vocode.streaming.streaming_conversation import StreamingConversation
from vocode.helpers import create_microphone_input_and_speaker_output
@ -31,8 +33,6 @@ import vocode
from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
load_dotenv()
vocode.api_key = os.getenv("VOCODE_API_KEY")
logging.basicConfig()
logger = logging.getLogger(__name__)
@ -46,23 +46,17 @@ async def main():
conversation = StreamingConversation(
output_device=speaker_output,
transcriber=DeepgramTranscriber(
DeepgramTranscriberConfig.from_input_device(
microphone_input, endpointing_config=PunctuationEndpointingConfig()
)
transcriber=DeepgramTranscriberConfig.from_input_device(
microphone_input, endpointing_config=PunctuationEndpointingConfig()
),
agent=ChatGPTAgent(
ChatGPTAgentConfig(
initial_message=BaseMessage(text="What up"),
prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
agent=ChatGPTAgentConfig(
initial_message=BaseMessage(text="What up"),
prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
hella, down, fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
generate_responses=True,
cut_off_response=CutOffResponse(),
)
),
synthesizer=AzureSynthesizer(
AzureSynthesizerConfig.from_output_device(speaker_output),
generate_responses=True,
cut_off_response=CutOffResponse(),
),
synthesizer=AzureSynthesizerConfig.from_output_device(speaker_output),
logger=logger,
)
await conversation.start()

View file

@ -1,7 +1,7 @@
import logging
from fastapi import FastAPI
import os
from dotenv import load_dotenv
from vocode import getenv
load_dotenv()
@ -34,13 +34,12 @@ telephony_server = TelephonyServer(
url="/inbound_call",
agent_config=ChatGPTAgentConfig(
initial_message=BaseMessage(text="What up"),
prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
hella, down, fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
prompt_preamble="Have a pleasant conversation about life",
generate_responses=True,
),
twilio_config=TwilioConfig(
account_sid=os.getenv("TWILIO_ACCOUNT_SID"),
auth_token=os.getenv("TWILIO_AUTH_TOKEN"),
account_sid=getenv("TWILIO_ACCOUNT_SID"),
auth_token=getenv("TWILIO_AUTH_TOKEN"),
),
)
],
@ -49,21 +48,22 @@ hella, down, fire, totally, but like, slay, vibing, queen, go off, bet, sus,
app.include_router(telephony_server.get_router())
# outbound_call = OutboundCall(
# base_url=BASE_URL,
# to_phone="+14088926228",
# from_phone="+14086600744",
# config_manager=config_manager,
# agent_config=ChatGPTAgentConfig(
# initial_message=BaseMessage(text="What up"),
# prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
# hella, down, fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
# generate_responses=True,
# ),
# twilio_config=TwilioConfig(
# account_sid=os.getenv("TWILIO_ACCOUNT_SID"),
# auth_token=os.getenv("TWILIO_AUTH_TOKEN"),
# ),
# logger=logger,
# )
# outbound_call.start()
outbound_call = OutboundCall(
base_url=BASE_URL,
to_phone="+14088926228",
from_phone="+14086600744",
config_manager=config_manager,
agent_config=ChatGPTAgentConfig(
initial_message=BaseMessage(text="What up"),
prompt_preamble="Have a pleasant conversation about life",
generate_responses=True,
),
twilio_config=TwilioConfig(
account_sid=getenv("TWILIO_ACCOUNT_SID"),
auth_token=getenv("TWILIO_AUTH_TOKEN"),
),
logger=logger,
)
input("Press enter to start call...")
outbound_call.start()

View file

@ -1,6 +1,6 @@
import logging
from dotenv import load_dotenv
import os
from vocode import getenv
from vocode.helpers import create_microphone_input_and_speaker_output
from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer
@ -25,15 +25,15 @@ if __name__ == "__main__":
conversation = TurnBasedConversation(
input_device=microphone_input,
output_device=speaker_output,
transcriber=WhisperTranscriber(api_key=os.getenv("OPENAI_API_KEY")),
transcriber=WhisperTranscriber(api_key=getenv("OPENAI_API_KEY")),
agent=ChatGPTAgent(
system_prompt="The AI is having a pleasant conversation about life",
initial_message="Hello!",
api_key=os.getenv("OPENAI_API_KEY"),
api_key=getenv("OPENAI_API_KEY"),
),
synthesizer=ElevenLabsSynthesizer(
voice_id=ADAM_VOICE_ID,
api_key=os.getenv("ELEVEN_LABS_API_KEY"),
api_key=getenv("ELEVEN_LABS_API_KEY"),
),
logger=logger,
)

View file

@ -1,7 +1,17 @@
import os
from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv("VOCODE_API_KEY")
base_url = os.getenv("VOCODE_BASE_URL", "api.vocode.dev")
environment = {}
def setenv(**kwargs):
for key, value in kwargs.items():
environment[key] = value
def getenv(key, default=None):
return environment.get(key) or os.getenv(key, default)
api_key = getenv("VOCODE_API_KEY")
base_url = getenv("VOCODE_BASE_URL", "api.vocode.dev")

View file

@ -1,4 +1,3 @@
import os
import random
import time
from langchain.prompts import (
@ -16,23 +15,20 @@ import openai
import json
from typing import Generator, Optional
from dotenv import load_dotenv
from typing import Generator
import logging
from vocode import getenv
from vocode.streaming.agent.base_agent import BaseAgent
from vocode.streaming.models.agent import ChatGPTAgentConfig
from vocode.streaming.utils.sse_client import SSEClient
from vocode.streaming.agent.utils import stream_llm_response
load_dotenv()
openai.api_key = os.environ.get("OPENAI_API_KEY")
class ChatGPTAgent(BaseAgent):
def __init__(self, agent_config: ChatGPTAgentConfig, logger: logging.Logger = None):
super().__init__(agent_config)
openai.api_key = getenv("OPENAI_API_KEY")
self.agent_config = agent_config
self.logger = logger or logging.getLogger(__name__)
self.logger.setLevel(logging.DEBUG)
@ -112,7 +108,7 @@ class ChatGPTAgent(BaseAgent):
"https://api.openai.com/v1/chat/completions",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
"Authorization": f"Bearer {getenv('OPENAI_API_KEY')}",
},
json={
"model": self.agent_config.model_name,

View file

@ -1,7 +1,6 @@
import re
from typing import Optional
from dotenv import load_dotenv
from langchain import OpenAI
from langchain.llms import OpenAIChat
from typing import Generator
@ -11,8 +10,6 @@ from vocode.streaming.agent.base_agent import BaseAgent
from vocode.streaming.agent.utils import stream_llm_response
from vocode.streaming.models.agent import LLMAgentConfig
load_dotenv()
class LLMAgent(BaseAgent):
SENTENCE_ENDINGS = [".", "!", "?"]

View file

@ -10,7 +10,6 @@ from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
from vocode.streaming.synthesizer.base_synthesizer import BaseSynthesizer
from vocode.streaming.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
from vocode.streaming.synthesizer.google_synthesizer import GoogleSynthesizer
from vocode.streaming.synthesizer.rime_synthesizer import RimeSynthesizer
from vocode.streaming.transcriber.assembly_ai_transcriber import AssemblyAITranscriber
from vocode.streaming.transcriber.base_transcriber import BaseTranscriber
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
@ -48,11 +47,6 @@ def create_synthesizer(synthesizer_config: SynthesizerConfig) -> BaseSynthesizer
elif synthesizer_config.type == SynthesizerType.AZURE:
return AzureSynthesizer(synthesizer_config)
elif synthesizer_config.type == SynthesizerType.ELEVEN_LABS:
kwargs = {}
if synthesizer_config.voice_id:
kwargs["voice_id"] = synthesizer_config.voice_id
return ElevenLabsSynthesizer(synthesizer_config, **kwargs)
elif synthesizer_config.type == SynthesizerType.RIME:
return RimeSynthesizer(synthesizer_config)
return ElevenLabsSynthesizer(synthesizer_config)
else:
raise Exception("Invalid synthesizer config")

View file

@ -2,8 +2,6 @@ import websockets
from websockets.exceptions import ConnectionClosedOK
from websockets.client import WebSocketClientProtocol
import asyncio
from dotenv import load_dotenv
import os
import logging
import threading
import queue
@ -22,8 +20,6 @@ from vocode.streaming.models.websocket import (
StopMessage,
)
load_dotenv()
class HostedStreamingConversation:
def __init__(

View file

@ -8,15 +8,18 @@ import time
import secrets
import random
from dotenv import load_dotenv
from vocode.streaming.agent.bot_sentiment_analyser import (
BotSentiment,
BotSentimentAnalyser,
)
from vocode.streaming.agent.information_retrieval_agent import InformationRetrievalAgent
from vocode.streaming.factory import (
create_agent,
create_synthesizer,
create_transcriber,
)
from vocode.streaming.models.message import BaseMessage
from vocode.streaming.output_device.base_output_device import BaseOutputDevice
from vocode.streaming.synthesizer.rime_synthesizer import RimeSynthesizer
from vocode.streaming.transcriber.assembly_ai_transcriber import AssemblyAITranscriber
from vocode.streaming.utils.goodbye_model import GoodbyeModel
from vocode.streaming.utils.transcript import Transcript
@ -48,9 +51,6 @@ from vocode.streaming.synthesizer.base_synthesizer import (
SynthesisResult,
FillerAudio,
)
from vocode.streaming.synthesizer.google_synthesizer import GoogleSynthesizer
from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
from vocode.streaming.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
from vocode.streaming.utils import (
create_conversation_id,
create_loop_in_thread,
@ -60,19 +60,15 @@ from vocode.streaming.transcriber.base_transcriber import (
Transcription,
BaseTranscriber,
)
from vocode.streaming.transcriber.google_transcriber import GoogleTranscriber
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
load_dotenv()
class StreamingConversation:
def __init__(
self,
output_device: BaseOutputDevice,
transcriber: BaseTranscriber,
agent: BaseAgent,
synthesizer: BaseSynthesizer,
transcriber_config: TranscriberConfig,
agent_config: AgentConfig,
synthesizer_config: SynthesizerConfig,
conversation_id: str = None,
per_chunk_allowance_seconds: int = PER_CHUNK_ALLOWANCE_SECONDS,
logger: Optional[logging.Logger] = None,
@ -80,11 +76,11 @@ class StreamingConversation:
self.id = conversation_id or create_conversation_id()
self.logger = logger or logging.getLogger(__name__)
self.output_device = output_device
self.transcriber = transcriber
self.transcriber = create_transcriber(transcriber_config)
self.transcriber.set_on_response(self.on_transcription_response)
self.transcriber_task = None
self.agent = agent
self.synthesizer = synthesizer
self.agent = create_agent(agent_config)
self.synthesizer = create_synthesizer(synthesizer_config)
self.synthesizer_event_loop = asyncio.new_event_loop()
self.synthesizer_thread = threading.Thread(
name="synthesizer",

View file

@ -4,7 +4,7 @@ import re
from typing import Any, Optional
from xml.etree import ElementTree
import azure.cognitiveservices.speech as speechsdk
from dotenv import load_dotenv
from vocode import getenv
from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
from vocode.streaming.models.message import BaseMessage, SSMLMessage
@ -20,7 +20,6 @@ from vocode.streaming.synthesizer.base_synthesizer import (
from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
from vocode.streaming.models.audio_encoding import AudioEncoding
load_dotenv()
NAMESPACES = {
"mstts": "https://www.w3.org/2001/mstts",
@ -59,8 +58,8 @@ class AzureSynthesizer(BaseSynthesizer):
self.synthesizer_config = synthesizer_config
# Instantiates a client
speech_config = speechsdk.SpeechConfig(
subscription=os.environ.get("AZURE_SPEECH_KEY"),
region=os.environ.get("AZURE_SPEECH_REGION"),
subscription=getenv("AZURE_SPEECH_KEY"),
region=getenv("AZURE_SPEECH_REGION"),
)
if self.synthesizer_config.audio_encoding == AudioEncoding.LINEAR16:
if self.synthesizer_config.sampling_rate == 44100:

View file

@ -1,7 +1,6 @@
from typing import Any, Optional
import os
from dotenv import load_dotenv
import requests
from vocode import getenv
from vocode.streaming.synthesizer.base_synthesizer import (
BaseSynthesizer,
@ -11,9 +10,7 @@ from vocode.streaming.models.synthesizer import ElevenLabsSynthesizerConfig
from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
from vocode.streaming.models.message import BaseMessage
load_dotenv()
ELEVEN_LABS_API_KEY = os.environ.get("ELEVEN_LABS_API_KEY")
ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/"
ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB"
OBAMA_VOICE_ID = "vLITIS0SH2an5iQGxw5C"
@ -22,7 +19,7 @@ OBAMA_VOICE_ID = "vLITIS0SH2an5iQGxw5C"
class ElevenLabsSynthesizer(BaseSynthesizer):
def __init__(self, config: ElevenLabsSynthesizerConfig):
super().__init__(config)
self.api_key = config.api_key
self.api_key = getenv("ELEVEN_LABS_API_KEY")
self.voice_id = config.voice_id or ADAM_VOICE_ID
self.words_per_minute = 150

View file

@ -2,7 +2,6 @@ import io
import wave
from typing import Any, Optional
from dotenv import load_dotenv
from google.cloud import texttospeech_v1beta1 as tts
from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
@ -16,8 +15,6 @@ from vocode.streaming.models.synthesizer import GoogleSynthesizerConfig
from vocode.streaming.models.audio_encoding import AudioEncoding
from vocode.streaming.utils import convert_wav
load_dotenv()
class GoogleSynthesizer(BaseSynthesizer):
OFFSET_SECONDS = 0.5

View file

@ -1,78 +0,0 @@
import audioop
import base64
from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
from vocode.streaming.models.audio_encoding import AudioEncoding
from vocode.streaming.models.message import BaseMessage
from .base_synthesizer import BaseSynthesizer, SynthesisResult, encode_as_wav
from typing import Any, Optional
import os
import io
import wave
from dotenv import load_dotenv
import requests
from ..utils import convert_linear_audio, convert_wav
from ..models.synthesizer import ElevenLabsSynthesizerConfig, RimeSynthesizerConfig
load_dotenv()
RIME_API_KEY = os.getenv("RIME_API_KEY")
RIME_BASE_URL = os.getenv("RIME_BASE_URL")
class RimeSynthesizer(BaseSynthesizer):
def __init__(self, config: RimeSynthesizerConfig):
super().__init__(config)
self.speaker = config.speaker
def create_speech(
self,
message: BaseMessage,
chunk_size: int,
bot_sentiment: Optional[BotSentiment] = None,
) -> SynthesisResult:
url = RIME_BASE_URL
headers = {"Authorization": f"Bearer {RIME_API_KEY}"}
body = {"inputs": {"text": message.text, "speaker": self.speaker}}
response = requests.post(url, headers=headers, json=body)
def chunk_generator(audio, chunk_transform=lambda x: x):
for i in range(0, len(audio), chunk_size):
chunk = audio[i : i + chunk_size]
yield SynthesisResult.ChunkResult(
chunk_transform(chunk), len(chunk) != chunk_size
)
assert response.ok, response.text
data = response.json().get("data")
assert data
audio_file = io.BytesIO(base64.b64decode(data))
if self.synthesizer_config.audio_encoding == AudioEncoding.LINEAR16:
output_bytes = convert_wav(
audio_file,
output_sample_rate=self.synthesizer_config.sampling_rate,
output_encoding=AudioEncoding.LINEAR16,
)
elif self.synthesizer_config.audio_encoding == AudioEncoding.MULAW:
output_bytes = convert_wav(
audio_file,
output_sample_rate=self.synthesizer_config.sampling_rate,
output_encoding=AudioEncoding.MULAW,
)
if self.synthesizer_config.should_encode_as_wav:
output_generator = chunk_generator(
output_bytes, chunk_transform=encode_as_wav
)
else:
output_generator = chunk_generator(output_bytes)
return SynthesisResult(
output_generator,
lambda seconds: self.get_message_cutoff_from_total_response_length(
message, seconds, len(output_bytes)
),
)

View file

@ -1,5 +1,4 @@
import logging
import os
from typing import Optional
from redis import Redis

View file

@ -4,6 +4,7 @@ from enum import Enum
import json
import logging
from typing import Optional
from vocode import getenv
from vocode.streaming.agent.base_agent import BaseAgent
from vocode.streaming.factory import (
create_agent,
@ -42,38 +43,36 @@ class Call(StreamingConversation):
self,
base_url: str,
config_manager: BaseConfigManager,
agent: BaseAgent,
twilio_config: TwilioConfig,
transcriber: Optional[BaseTranscriber] = None,
synthesizer: Optional[BaseSynthesizer] = None,
twilio_sid=None,
agent_config: BaseAgent,
transcriber_config: Optional[BaseTranscriber] = None,
synthesizer_config: Optional[BaseSynthesizer] = None,
twilio_config: Optional[TwilioConfig] = None,
twilio_sid: Optional[str] = None,
conversation_id: Optional[str] = None,
logger: Optional[logging.Logger] = None,
):
self.base_url = base_url
self.config_manager = config_manager
self.output_device = TwilioOutputDevice()
self.twilio_config = twilio_config
self.twilio_config = twilio_config or TwilioConfig(
account_sid=getenv("TWILIO_ACCOUNT_SID"),
auth_token=getenv("TWILIO_AUTH_TOKEN"),
)
self.twilio_client = create_twilio_client(twilio_config)
super().__init__(
self.output_device,
transcriber
or DeepgramTranscriber(
DeepgramTranscriberConfig(
sampling_rate=8000,
audio_encoding=AudioEncoding.MULAW,
chunk_size=self.CHUNK_SIZE,
model="voicemail",
endpointing_config=PunctuationEndpointingConfig(),
),
logger=logger,
transcriber_config
or DeepgramTranscriberConfig(
sampling_rate=8000,
audio_encoding=AudioEncoding.MULAW,
chunk_size=self.CHUNK_SIZE,
model="voicemail",
endpointing_config=PunctuationEndpointingConfig(),
),
agent,
synthesizer
or AzureSynthesizer(
AzureSynthesizerConfig(
sampling_rate=8000, audio_encoding=AudioEncoding.MULAW
)
agent_config,
synthesizer_config
or AzureSynthesizerConfig(
sampling_rate=8000, audio_encoding=AudioEncoding.MULAW
),
conversation_id=conversation_id,
per_chunk_allowance_seconds=0.01,
@ -94,9 +93,9 @@ class Call(StreamingConversation):
base_url=base_url,
logger=logger,
config_manager=config_manager,
agent=create_agent(call_config.agent_config),
transcriber=create_transcriber(call_config.transcriber_config),
synthesizer=create_synthesizer(call_config.synthesizer_config),
agent_config=call_config.agent_config,
transcriber_config=call_config.transcriber_config,
synthesizer_config=call_config.synthesizer_config,
twilio_config=call_config.twilio_config,
twilio_sid=call_config.twilio_sid,
conversation_id=conversation_id,

View file

@ -1,6 +1,6 @@
import logging
from typing import Optional
from twilio.rest import Client
from vocode import getenv
from vocode.streaming.models.agent import AgentConfig
from vocode.streaming.models.synthesizer import (
@ -33,7 +33,7 @@ class OutboundCall:
from_phone: str,
config_manager: BaseConfigManager,
agent_config: AgentConfig,
twilio_config: TwilioConfig,
twilio_config: Optional[TwilioConfig] = None,
transcriber_config: Optional[TranscriberConfig] = None,
synthesizer_config: Optional[SynthesizerConfig] = None,
conversation_id: Optional[str] = None,
@ -56,7 +56,10 @@ class OutboundCall:
)
self.conversation_id = conversation_id or create_conversation_id()
self.logger = logger
self.twilio_config = twilio_config
self.twilio_config = twilio_config or TwilioConfig(
account_sid=getenv("TWILIO_ACCOUNT_SID"),
auth_token=getenv("TWILIO_AUTH_TOKEN"),
)
self.twilio_client = create_twilio_client(twilio_config)
self.twilio_sid = None

View file

@ -24,10 +24,10 @@ class ZoomDialIn(OutboundCall):
zoom_meeting_password: Optional[str],
from_phone: str,
config_manager: BaseConfigManager,
twilio_config: TwilioConfig,
agent_config: AgentConfig,
transcriber_config: TranscriberConfig,
synthesizer_config: SynthesizerConfig,
twilio_config: Optional[TwilioConfig] = None,
conversation_id: Optional[str] = None,
logger: Optional[logging.Logger] = None,
):

View file

@ -34,7 +34,6 @@ from vocode.streaming.models.telephony import (
EndOutboundCall,
TwilioConfig,
)
from twilio.rest import Client
from vocode.streaming.telephony.conversation.call import Call
from vocode.streaming.telephony.templates import Templater
@ -45,7 +44,7 @@ from vocode.streaming.utils import create_conversation_id
class InboundCallConfig(BaseModel):
url: str
agent_config: AgentConfig
twilio_config: TwilioConfig
twilio_config: Optional[TwilioConfig] = None
transcriber_config: Optional[TranscriberConfig] = None
synthesizer_config: Optional[SynthesizerConfig] = None
@ -92,7 +91,7 @@ class TelephonyServer:
def create_inbound_route(
self,
agent_config: AgentConfig,
twilio_config: TwilioConfig,
twilio_config: Optional[TwilioConfig] = None,
transcriber_config: Optional[TranscriberConfig] = None,
synthesizer_config: Optional[SynthesizerConfig] = None,
):

View file

@ -1,12 +1,8 @@
import os
from typing import Optional
from dotenv import load_dotenv
from twilio.rest import Client
from vocode.streaming.models.telephony import TwilioConfig
load_dotenv()
def create_twilio_client(twilio_config: TwilioConfig):
return Client(twilio_config.account_sid, twilio_config.auth_token)

View file

@ -1,10 +1,9 @@
import asyncio
import json
import logging
import os
from dotenv import load_dotenv
import websockets
from urllib.parse import urlencode
from vocode import getenv
from vocode.streaming.models.transcriber import AssemblyAITranscriberConfig
from vocode.streaming.models.websocket import AudioMessage
@ -14,9 +13,7 @@ from vocode.streaming.transcriber.base_transcriber import (
)
from vocode.streaming.models.audio_encoding import AudioEncoding
load_dotenv()
ASSEMBLY_AI_API_KEY = os.environ.get("ASSEMBLY_AI_API_KEY")
ASSEMBLY_AI_URL = "wss://api.assemblyai.com/v2/realtime/ws"
@ -27,6 +24,7 @@ class AssemblyAITranscriber(BaseTranscriber):
logger: logging.Logger = None,
):
super().__init__(transcriber_config)
self.api_key = getenv("ASSEMBLY_AI_API_KEY")
self._ended = False
self.is_ready = False
self.logger = logger or logging.getLogger(__name__)
@ -61,7 +59,7 @@ class AssemblyAITranscriber(BaseTranscriber):
async with websockets.connect(
URL,
extra_headers=(("Authorization", ASSEMBLY_AI_API_KEY),),
extra_headers=(("Authorization", self.api_key),),
ping_interval=5,
ping_timeout=20,
) as ws:

View file

@ -1,11 +1,8 @@
from dotenv import load_dotenv
from typing import Callable, Optional, Awaitable
from vocode.streaming.utils import convert_wav
from vocode.streaming.models.transcriber import EndpointingConfig, TranscriberConfig
load_dotenv()
class Transcription:
def __init__(

View file

@ -1,12 +1,11 @@
import asyncio
import json
import logging
import os
from dotenv import load_dotenv
import websockets
from websockets.client import WebSocketClientProtocol
import audioop
from urllib.parse import urlencode
from vocode import getenv
from vocode.streaming.transcriber.base_transcriber import (
BaseTranscriber,
@ -19,9 +18,7 @@ from vocode.streaming.models.transcriber import (
)
from vocode.streaming.models.audio_encoding import AudioEncoding
load_dotenv()
DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
PUNCTUATION_TERMINATORS = [".", "!", "?"]
NUM_RESTARTS = 5
@ -33,6 +30,7 @@ class DeepgramTranscriber(BaseTranscriber):
logger: logging.Logger = None,
):
super().__init__(transcriber_config)
self.api_key = getenv("DEEPGRAM_API_KEY")
self.transcriber_config = transcriber_config
self._ended = False
self.warmed_up = False
@ -155,7 +153,7 @@ class DeepgramTranscriber(BaseTranscriber):
return data["duration"]
async def process(self, warmup=True):
extra_headers = {"Authorization": f"Token {DEEPGRAM_API_KEY}"}
extra_headers = {"Authorization": f"Token {self.api_key}"}
self.audio_queue = asyncio.Queue()
async with websockets.connect(

View file

@ -1,19 +1,12 @@
import os
import asyncio
import openai
from dotenv import load_dotenv
import numpy as np
import requests
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
from vocode import getenv
PLATFORM = "pyq" if os.getenv("USE_PYQ_EMBEDDINGS", "false") == "true" else "openai"
SIMILARITY_THRESHOLD = 0.9
SIMILARITY_THRESHOLD_PYQ = 0.7
EMBEDDING_SIZE = 1536
PYQ_EMBEDDING_SIZE = 768
GOODBYE_PHRASES = [
"bye",
"goodbye",
@ -24,7 +17,6 @@ GOODBYE_PHRASES = [
"have a good day",
"have a good night",
]
PYQ_API_URL = "https://embeddings.pyqai.com"
class GoodbyeModel:
@ -34,12 +26,10 @@ class GoodbyeModel:
os.path.dirname(__file__), "goodbye_embeddings"
),
):
openai.api_key = getenv("OPENAI_API_KEY")
self.goodbye_embeddings = self.load_or_create_embeddings(
f"{embeddings_cache_path}/goodbye_embeddings.npy"
)
self.goodbye_embeddings_pyq = self.load_or_create_embeddings(
f"{embeddings_cache_path}/goodbye_embeddings_pyq.npy"
)
def load_or_create_embeddings(self, path):
if os.path.exists(path):
@ -49,50 +39,33 @@ class GoodbyeModel:
np.save(path, embeddings)
return embeddings
def create_embeddings(self, platform=PLATFORM):
def create_embeddings(self):
print("Creating embeddings...")
size = EMBEDDING_SIZE if platform == "openai" else PYQ_EMBEDDING_SIZE
size = EMBEDDING_SIZE
embeddings = np.empty((size, len(GOODBYE_PHRASES)))
for i, goodbye_phrase in enumerate(GOODBYE_PHRASES):
embeddings[:, i] = self.create_embedding(goodbye_phrase, platform=platform)
embeddings[:, i] = self.create_embedding(goodbye_phrase)
return embeddings
async def is_goodbye(self, text: str, platform=PLATFORM) -> bool:
async def is_goodbye(self, text: str) -> bool:
if "bye" in text.lower():
return True
embedding = self.create_embedding(text.strip().lower(), platform=platform)
goodbye_embeddings = (
self.goodbye_embeddings
if platform == "openai"
else self.goodbye_embeddings_pyq
)
threshold = (
SIMILARITY_THRESHOLD if platform == "openai" else SIMILARITY_THRESHOLD_PYQ
)
similarity_results = embedding @ goodbye_embeddings
return np.max(similarity_results) > threshold
embedding = self.create_embedding(text.strip().lower())
similarity_results = embedding @ self.goodbye_embeddings
return np.max(similarity_results) > SIMILARITY_THRESHOLD
def create_embedding(self, text, platform=PLATFORM) -> np.array:
if platform == "openai":
return np.array(
openai.Embedding.create(input=text, model="text-embedding-ada-002")[
"data"
][0]["embedding"]
)
elif platform == "pyq":
return np.array(
requests.post(
PYQ_API_URL,
headers={
"Content-Type": "application/json",
"Authorization": os.getenv("PYQ_API_KEY"),
},
json={"input_sequence": [text], "account_id": "400"},
).json()["response"][0]
)
def create_embedding(self, text) -> np.array:
return np.array(
openai.Embedding.create(input=text, model="text-embedding-ada-002")["data"][
0
]["embedding"]
)
if __name__ == "__main__":
from dotenv import load_dotenv
load_dotenv()
async def main():
model = GoodbyeModel()

View file

@ -1,4 +1,3 @@
import os
from typing import Optional
import openai
from langchain.prompts import (
@ -10,6 +9,7 @@ from langchain.prompts import (
from langchain.chains import ConversationChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from vocode import getenv
from vocode.turn_based.agent.base_agent import BaseAgent
@ -25,7 +25,7 @@ class ChatGPTAgent(BaseAgent):
max_tokens: int = 100,
):
super().__init__(initial_message=initial_message)
openai.api_key = os.getenv("OPENAI_API_KET", api_key)
openai.api_key = getenv("OPENAI_API_KET", api_key)
if not openai.api_key:
raise ValueError("OpenAI API key not provided")
self.prompt = ChatPromptTemplate.from_messages(

View file

@ -1,7 +1,7 @@
import os
from typing import Optional
import azure.cognitiveservices.speech as speechsdk
from pydub import AudioSegment
from vocode import getenv
from vocode.turn_based.synthesizer.base_synthesizer import BaseSynthesizer
@ -15,8 +15,8 @@ class AzureSynthesizer(BaseSynthesizer):
):
self.sampling_rate = sampling_rate
speech_config = speechsdk.SpeechConfig(
subscription=os.getenv("AZURE_SPEECH_KEY", api_key),
region=os.getenv("AZURE_SPEECH_REGION", region),
subscription=getenv("AZURE_SPEECH_KEY", api_key),
region=getenv("AZURE_SPEECH_REGION", region),
)
if self.sampling_rate == 44100:
speech_config.set_speech_synthesis_output_format(

View file

@ -1,8 +1,8 @@
import io
import os
from typing import Optional
from pydub import AudioSegment
import requests
from vocode import getenv
from vocode.turn_based.synthesizer.base_synthesizer import BaseSynthesizer
ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/"
@ -11,7 +11,7 @@ ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/"
class ElevenLabsSynthesizer(BaseSynthesizer):
def __init__(self, voice_id: str, api_key: Optional[str] = None):
self.voice_id = voice_id
self.api_key = os.getenv("ELEVEN_LABS_API_KEY", api_key)
self.api_key = getenv("ELEVEN_LABS_API_KEY", api_key)
def synthesize(self, text: str) -> AudioSegment:
url = ELEVEN_LABS_BASE_URL + f"text-to-speech/{self.voice_id}"

View file

@ -1,15 +1,15 @@
from typing import Optional
from pydub import AudioSegment
import io
import os
import openai
from vocode import getenv
from vocode.turn_based.transcriber.base_transcriber import BaseTranscriber
class WhisperTranscriber(BaseTranscriber):
def __init__(self, api_key: Optional[str] = None):
openai.api_key = os.getenv("OPENAI_API_KEY", api_key)
openai.api_key = getenv("OPENAI_API_KEY", api_key)
if not openai.api_key:
raise ValueError("OpenAI API key not provided")