remove pyq goodbye model and rime synthesizer and fix environment loading

2023-03-28 10:20:36 -07:00 · 2023-03-28 10:20:36 -07:00 · 1dc7bc74c3
commit 1dc7bc74c3
parent a93bfc1ec9
28 changed files with 143 additions and 285 deletions
--- a/examples/hosted_streaming_conversation.py
+++ b/examples/hosted_streaming_conversation.py
@ -2,14 +2,15 @@ import asyncio
 import logging
 import signal
 from dotenv import load_dotenv
-import os
+
+load_dotenv()
+
 from vocode.streaming.hosted_streaming_conversation import HostedStreamingConversation
 from vocode.streaming.streaming_conversation import StreamingConversation
 from vocode.helpers import create_microphone_input_and_speaker_output
 from vocode.streaming.models.transcriber import (
    DeepgramTranscriberConfig,
    PunctuationEndpointingConfig,
-    GoogleTranscriberConfig,
 )
 from vocode.streaming.models.agent import (
    ChatGPTAgentConfig,
@ -23,10 +24,6 @@ from vocode.streaming.models.agent import (
 )
 from vocode.streaming.models.message import BaseMessage
 from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
-import vocode
-
-load_dotenv()
-vocode.api_key = os.getenv("VOCODE_API_KEY")

 logging.basicConfig()
 logging.root.setLevel(logging.INFO)
@ -41,7 +38,8 @@ if __name__ == "__main__":
        input_device=microphone_input,
        output_device=speaker_output,
        transcriber_config=DeepgramTranscriberConfig.from_input_device(
-            microphone_input
+            microphone_input,
+            endpointing_config=PunctuationEndpointingConfig(),
        ),
        agent_config=ChatGPTAgentConfig(
            initial_message=BaseMessage(text="Hello!"),
--- a/examples/streaming_conversation.py
+++ b/examples/streaming_conversation.py
@ -2,7 +2,9 @@ import asyncio
 import logging
 import signal
 from dotenv import load_dotenv
-import os
+
+load_dotenv()
+
 from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
 from vocode.streaming.streaming_conversation import StreamingConversation
 from vocode.helpers import create_microphone_input_and_speaker_output
@ -31,8 +33,6 @@ import vocode
 from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
 from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber

-load_dotenv()
-vocode.api_key = os.getenv("VOCODE_API_KEY")

 logging.basicConfig()
 logger = logging.getLogger(__name__)
@ -46,23 +46,17 @@ async def main():

    conversation = StreamingConversation(
        output_device=speaker_output,
-        transcriber=DeepgramTranscriber(
-            DeepgramTranscriberConfig.from_input_device(
-                microphone_input, endpointing_config=PunctuationEndpointingConfig()
-            )
+        transcriber=DeepgramTranscriberConfig.from_input_device(
+            microphone_input, endpointing_config=PunctuationEndpointingConfig()
        ),
-        agent=ChatGPTAgent(
-            ChatGPTAgentConfig(
-                initial_message=BaseMessage(text="What up"),
-                prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
+        agent=ChatGPTAgentConfig(
+            initial_message=BaseMessage(text="What up"),
+            prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
 hella, down,     fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
-                generate_responses=True,
-                cut_off_response=CutOffResponse(),
-            )
-        ),
-        synthesizer=AzureSynthesizer(
-            AzureSynthesizerConfig.from_output_device(speaker_output),
+            generate_responses=True,
+            cut_off_response=CutOffResponse(),
        ),
+        synthesizer=AzureSynthesizerConfig.from_output_device(speaker_output),
        logger=logger,
    )
    await conversation.start()
--- a/examples/telephony_app.py
+++ b/examples/telephony_app.py
@ -1,7 +1,7 @@
 import logging
 from fastapi import FastAPI
-import os
 from dotenv import load_dotenv
+from vocode import getenv

 load_dotenv()

@ -34,13 +34,12 @@ telephony_server = TelephonyServer(
            url="/inbound_call",
            agent_config=ChatGPTAgentConfig(
                initial_message=BaseMessage(text="What up"),
-                prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
-hella, down,     fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
+                prompt_preamble="Have a pleasant conversation about life",
                generate_responses=True,
            ),
            twilio_config=TwilioConfig(
-                account_sid=os.getenv("TWILIO_ACCOUNT_SID"),
-                auth_token=os.getenv("TWILIO_AUTH_TOKEN"),
+                account_sid=getenv("TWILIO_ACCOUNT_SID"),
+                auth_token=getenv("TWILIO_AUTH_TOKEN"),
            ),
        )
    ],
@ -49,21 +48,22 @@ hella, down,     fire, totally, but like, slay, vibing, queen, go off, bet, sus,

 app.include_router(telephony_server.get_router())

-# outbound_call = OutboundCall(
-#     base_url=BASE_URL,
-#     to_phone="+14088926228",
-#     from_phone="+14086600744",
-#     config_manager=config_manager,
-#     agent_config=ChatGPTAgentConfig(
-#         initial_message=BaseMessage(text="What up"),
-#         prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
-# hella, down,     fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
-#         generate_responses=True,
-#     ),
-#     twilio_config=TwilioConfig(
-#         account_sid=os.getenv("TWILIO_ACCOUNT_SID"),
-#         auth_token=os.getenv("TWILIO_AUTH_TOKEN"),
-#     ),
-#     logger=logger,
-# )
-# outbound_call.start()
+outbound_call = OutboundCall(
+    base_url=BASE_URL,
+    to_phone="+14088926228",
+    from_phone="+14086600744",
+    config_manager=config_manager,
+    agent_config=ChatGPTAgentConfig(
+        initial_message=BaseMessage(text="What up"),
+        prompt_preamble="Have a pleasant conversation about life",
+        generate_responses=True,
+    ),
+    twilio_config=TwilioConfig(
+        account_sid=getenv("TWILIO_ACCOUNT_SID"),
+        auth_token=getenv("TWILIO_AUTH_TOKEN"),
+    ),
+    logger=logger,
+)
+
+input("Press enter to start call...")
+outbound_call.start()
--- a/examples/turn_based_conversation.py
+++ b/examples/turn_based_conversation.py
@ -1,6 +1,6 @@
 import logging
 from dotenv import load_dotenv
-import os
+from vocode import getenv
 from vocode.helpers import create_microphone_input_and_speaker_output
 from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
 from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer
@ -25,15 +25,15 @@ if __name__ == "__main__":
    conversation = TurnBasedConversation(
        input_device=microphone_input,
        output_device=speaker_output,
-        transcriber=WhisperTranscriber(api_key=os.getenv("OPENAI_API_KEY")),
+        transcriber=WhisperTranscriber(api_key=getenv("OPENAI_API_KEY")),
        agent=ChatGPTAgent(
            system_prompt="The AI is having a pleasant conversation about life",
            initial_message="Hello!",
-            api_key=os.getenv("OPENAI_API_KEY"),
+            api_key=getenv("OPENAI_API_KEY"),
        ),
        synthesizer=ElevenLabsSynthesizer(
            voice_id=ADAM_VOICE_ID,
-            api_key=os.getenv("ELEVEN_LABS_API_KEY"),
+            api_key=getenv("ELEVEN_LABS_API_KEY"),
        ),
        logger=logger,
    )
--- a/vocode/init.py
+++ b/vocode/init.py
@ -1,7 +1,17 @@
 import os
-from dotenv import load_dotenv

-load_dotenv()

-api_key = os.getenv("VOCODE_API_KEY")
-base_url = os.getenv("VOCODE_BASE_URL", "api.vocode.dev")
+environment = {}
+
+
+def setenv(**kwargs):
+    for key, value in kwargs.items():
+        environment[key] = value
+
+
+def getenv(key, default=None):
+    return environment.get(key) or os.getenv(key, default)
+
+
+api_key = getenv("VOCODE_API_KEY")
+base_url = getenv("VOCODE_BASE_URL", "api.vocode.dev")
--- a/vocode/streaming/agent/chat_gpt_agent.py
+++ b/vocode/streaming/agent/chat_gpt_agent.py
@ -1,4 +1,3 @@
-import os
 import random
 import time
 from langchain.prompts import (
@ -16,23 +15,20 @@ import openai
 import json
 from typing import Generator, Optional

-from dotenv import load_dotenv
 from typing import Generator
 import logging
+from vocode import getenv

 from vocode.streaming.agent.base_agent import BaseAgent
 from vocode.streaming.models.agent import ChatGPTAgentConfig
 from vocode.streaming.utils.sse_client import SSEClient
 from vocode.streaming.agent.utils import stream_llm_response

-load_dotenv()
-
-openai.api_key = os.environ.get("OPENAI_API_KEY")
-

 class ChatGPTAgent(BaseAgent):
    def __init__(self, agent_config: ChatGPTAgentConfig, logger: logging.Logger = None):
        super().__init__(agent_config)
+        openai.api_key = getenv("OPENAI_API_KEY")
        self.agent_config = agent_config
        self.logger = logger or logging.getLogger(__name__)
        self.logger.setLevel(logging.DEBUG)
@ -112,7 +108,7 @@ class ChatGPTAgent(BaseAgent):
            "https://api.openai.com/v1/chat/completions",
            headers={
                "Content-Type": "application/json",
-                "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
+                "Authorization": f"Bearer {getenv('OPENAI_API_KEY')}",
            },
            json={
                "model": self.agent_config.model_name,
--- a/vocode/streaming/agent/llm_agent.py
+++ b/vocode/streaming/agent/llm_agent.py
@ -1,7 +1,6 @@
 import re
 from typing import Optional

-from dotenv import load_dotenv
 from langchain import OpenAI
 from langchain.llms import OpenAIChat
 from typing import Generator
@ -11,8 +10,6 @@ from vocode.streaming.agent.base_agent import BaseAgent
 from vocode.streaming.agent.utils import stream_llm_response
 from vocode.streaming.models.agent import LLMAgentConfig

-load_dotenv()
-

 class LLMAgent(BaseAgent):
    SENTENCE_ENDINGS = [".", "!", "?"]
--- a/vocode/streaming/factory.py
+++ b/vocode/streaming/factory.py
@ -10,7 +10,6 @@ from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
 from vocode.streaming.synthesizer.base_synthesizer import BaseSynthesizer
 from vocode.streaming.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
 from vocode.streaming.synthesizer.google_synthesizer import GoogleSynthesizer
-from vocode.streaming.synthesizer.rime_synthesizer import RimeSynthesizer
 from vocode.streaming.transcriber.assembly_ai_transcriber import AssemblyAITranscriber
 from vocode.streaming.transcriber.base_transcriber import BaseTranscriber
 from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
@ -48,11 +47,6 @@ def create_synthesizer(synthesizer_config: SynthesizerConfig) -> BaseSynthesizer
    elif synthesizer_config.type == SynthesizerType.AZURE:
        return AzureSynthesizer(synthesizer_config)
    elif synthesizer_config.type == SynthesizerType.ELEVEN_LABS:
-        kwargs = {}
-        if synthesizer_config.voice_id:
-            kwargs["voice_id"] = synthesizer_config.voice_id
-        return ElevenLabsSynthesizer(synthesizer_config, **kwargs)
-    elif synthesizer_config.type == SynthesizerType.RIME:
-        return RimeSynthesizer(synthesizer_config)
+        return ElevenLabsSynthesizer(synthesizer_config)
    else:
        raise Exception("Invalid synthesizer config")
--- a/vocode/streaming/hosted_streaming_conversation.py
+++ b/vocode/streaming/hosted_streaming_conversation.py
@ -2,8 +2,6 @@ import websockets
 from websockets.exceptions import ConnectionClosedOK
 from websockets.client import WebSocketClientProtocol
 import asyncio
-from dotenv import load_dotenv
-import os
 import logging
 import threading
 import queue
@ -22,8 +20,6 @@ from vocode.streaming.models.websocket import (
    StopMessage,
 )

-load_dotenv()
-

 class HostedStreamingConversation:
    def __init__(
--- a/vocode/streaming/streaming_conversation.py
+++ b/vocode/streaming/streaming_conversation.py
@ -8,15 +8,18 @@ import time
 import secrets
 import random

-from dotenv import load_dotenv
 from vocode.streaming.agent.bot_sentiment_analyser import (
    BotSentiment,
    BotSentimentAnalyser,
 )
 from vocode.streaming.agent.information_retrieval_agent import InformationRetrievalAgent
+from vocode.streaming.factory import (
+    create_agent,
+    create_synthesizer,
+    create_transcriber,
+)
 from vocode.streaming.models.message import BaseMessage
 from vocode.streaming.output_device.base_output_device import BaseOutputDevice
-from vocode.streaming.synthesizer.rime_synthesizer import RimeSynthesizer
 from vocode.streaming.transcriber.assembly_ai_transcriber import AssemblyAITranscriber
 from vocode.streaming.utils.goodbye_model import GoodbyeModel
 from vocode.streaming.utils.transcript import Transcript
@ -48,9 +51,6 @@ from vocode.streaming.synthesizer.base_synthesizer import (
    SynthesisResult,
    FillerAudio,
 )
-from vocode.streaming.synthesizer.google_synthesizer import GoogleSynthesizer
-from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
-from vocode.streaming.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
 from vocode.streaming.utils import (
    create_conversation_id,
    create_loop_in_thread,
@ -60,19 +60,15 @@ from vocode.streaming.transcriber.base_transcriber import (
    Transcription,
    BaseTranscriber,
 )
-from vocode.streaming.transcriber.google_transcriber import GoogleTranscriber
-from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
-
-load_dotenv()


 class StreamingConversation:
    def __init__(
        self,
        output_device: BaseOutputDevice,
-        transcriber: BaseTranscriber,
-        agent: BaseAgent,
-        synthesizer: BaseSynthesizer,
+        transcriber_config: TranscriberConfig,
+        agent_config: AgentConfig,
+        synthesizer_config: SynthesizerConfig,
        conversation_id: str = None,
        per_chunk_allowance_seconds: int = PER_CHUNK_ALLOWANCE_SECONDS,
        logger: Optional[logging.Logger] = None,
@ -80,11 +76,11 @@ class StreamingConversation:
        self.id = conversation_id or create_conversation_id()
        self.logger = logger or logging.getLogger(__name__)
        self.output_device = output_device
-        self.transcriber = transcriber
+        self.transcriber = create_transcriber(transcriber_config)
        self.transcriber.set_on_response(self.on_transcription_response)
        self.transcriber_task = None
-        self.agent = agent
-        self.synthesizer = synthesizer
+        self.agent = create_agent(agent_config)
+        self.synthesizer = create_synthesizer(synthesizer_config)
        self.synthesizer_event_loop = asyncio.new_event_loop()
        self.synthesizer_thread = threading.Thread(
            name="synthesizer",
--- a/vocode/streaming/synthesizer/azure_synthesizer.py
+++ b/vocode/streaming/synthesizer/azure_synthesizer.py
@ -4,7 +4,7 @@ import re
 from typing import Any, Optional
 from xml.etree import ElementTree
 import azure.cognitiveservices.speech as speechsdk
-from dotenv import load_dotenv
+from vocode import getenv

 from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
 from vocode.streaming.models.message import BaseMessage, SSMLMessage
@ -20,7 +20,6 @@ from vocode.streaming.synthesizer.base_synthesizer import (
 from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
 from vocode.streaming.models.audio_encoding import AudioEncoding

-load_dotenv()

 NAMESPACES = {
    "mstts": "https://www.w3.org/2001/mstts",
@ -59,8 +58,8 @@ class AzureSynthesizer(BaseSynthesizer):
        self.synthesizer_config = synthesizer_config
        # Instantiates a client
        speech_config = speechsdk.SpeechConfig(
-            subscription=os.environ.get("AZURE_SPEECH_KEY"),
-            region=os.environ.get("AZURE_SPEECH_REGION"),
+            subscription=getenv("AZURE_SPEECH_KEY"),
+            region=getenv("AZURE_SPEECH_REGION"),
        )
        if self.synthesizer_config.audio_encoding == AudioEncoding.LINEAR16:
            if self.synthesizer_config.sampling_rate == 44100:
--- a/vocode/streaming/synthesizer/eleven_labs_synthesizer.py
+++ b/vocode/streaming/synthesizer/eleven_labs_synthesizer.py
@ -1,7 +1,6 @@
 from typing import Any, Optional
-import os
-from dotenv import load_dotenv
 import requests
+from vocode import getenv

 from vocode.streaming.synthesizer.base_synthesizer import (
    BaseSynthesizer,
@ -11,9 +10,7 @@ from vocode.streaming.models.synthesizer import ElevenLabsSynthesizerConfig
 from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
 from vocode.streaming.models.message import BaseMessage

-load_dotenv()

-ELEVEN_LABS_API_KEY = os.environ.get("ELEVEN_LABS_API_KEY")
 ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/"
 ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB"
 OBAMA_VOICE_ID = "vLITIS0SH2an5iQGxw5C"
@ -22,7 +19,7 @@ OBAMA_VOICE_ID = "vLITIS0SH2an5iQGxw5C"
 class ElevenLabsSynthesizer(BaseSynthesizer):
    def __init__(self, config: ElevenLabsSynthesizerConfig):
        super().__init__(config)
-        self.api_key = config.api_key
+        self.api_key = getenv("ELEVEN_LABS_API_KEY")
        self.voice_id = config.voice_id or ADAM_VOICE_ID
        self.words_per_minute = 150

--- a/vocode/streaming/synthesizer/google_synthesizer.py
+++ b/vocode/streaming/synthesizer/google_synthesizer.py
@ -2,7 +2,6 @@ import io
 import wave
 from typing import Any, Optional

-from dotenv import load_dotenv
 from google.cloud import texttospeech_v1beta1 as tts

 from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
@ -16,8 +15,6 @@ from vocode.streaming.models.synthesizer import GoogleSynthesizerConfig
 from vocode.streaming.models.audio_encoding import AudioEncoding
 from vocode.streaming.utils import convert_wav

-load_dotenv()
-

 class GoogleSynthesizer(BaseSynthesizer):
    OFFSET_SECONDS = 0.5
--- a/vocode/streaming/synthesizer/rime_synthesizer.py
+++ b/vocode/streaming/synthesizer/rime_synthesizer.py
@ -1,78 +0,0 @@
-import audioop
-import base64
-from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
-from vocode.streaming.models.audio_encoding import AudioEncoding
-
-from vocode.streaming.models.message import BaseMessage
-
-from .base_synthesizer import BaseSynthesizer, SynthesisResult, encode_as_wav
-from typing import Any, Optional
-import os
-import io
-import wave
-from dotenv import load_dotenv
-import requests
-
-from ..utils import convert_linear_audio, convert_wav
-from ..models.synthesizer import ElevenLabsSynthesizerConfig, RimeSynthesizerConfig
-
-load_dotenv()
-
-RIME_API_KEY = os.getenv("RIME_API_KEY")
-RIME_BASE_URL = os.getenv("RIME_BASE_URL")
-
-
-class RimeSynthesizer(BaseSynthesizer):
-    def __init__(self, config: RimeSynthesizerConfig):
-        super().__init__(config)
-        self.speaker = config.speaker
-
-    def create_speech(
-        self,
-        message: BaseMessage,
-        chunk_size: int,
-        bot_sentiment: Optional[BotSentiment] = None,
-    ) -> SynthesisResult:
-        url = RIME_BASE_URL
-        headers = {"Authorization": f"Bearer {RIME_API_KEY}"}
-        body = {"inputs": {"text": message.text, "speaker": self.speaker}}
-        response = requests.post(url, headers=headers, json=body)
-
-        def chunk_generator(audio, chunk_transform=lambda x: x):
-            for i in range(0, len(audio), chunk_size):
-                chunk = audio[i : i + chunk_size]
-                yield SynthesisResult.ChunkResult(
-                    chunk_transform(chunk), len(chunk) != chunk_size
-                )
-
-        assert response.ok, response.text
-        data = response.json().get("data")
-        assert data
-
-        audio_file = io.BytesIO(base64.b64decode(data))
-
-        if self.synthesizer_config.audio_encoding == AudioEncoding.LINEAR16:
-            output_bytes = convert_wav(
-                audio_file,
-                output_sample_rate=self.synthesizer_config.sampling_rate,
-                output_encoding=AudioEncoding.LINEAR16,
-            )
-        elif self.synthesizer_config.audio_encoding == AudioEncoding.MULAW:
-            output_bytes = convert_wav(
-                audio_file,
-                output_sample_rate=self.synthesizer_config.sampling_rate,
-                output_encoding=AudioEncoding.MULAW,
-            )
-
-        if self.synthesizer_config.should_encode_as_wav:
-            output_generator = chunk_generator(
-                output_bytes, chunk_transform=encode_as_wav
-            )
-        else:
-            output_generator = chunk_generator(output_bytes)
-        return SynthesisResult(
-            output_generator,
-            lambda seconds: self.get_message_cutoff_from_total_response_length(
-                message, seconds, len(output_bytes)
-            ),
-        )
--- a/vocode/streaming/telephony/config_manager/base_config_manager.py
+++ b/vocode/streaming/telephony/config_manager/base_config_manager.py
@ -1,5 +1,4 @@
 import logging
-import os
 from typing import Optional
 from redis import Redis

--- a/vocode/streaming/telephony/conversation/call.py
+++ b/vocode/streaming/telephony/conversation/call.py
@ -4,6 +4,7 @@ from enum import Enum
 import json
 import logging
 from typing import Optional
+from vocode import getenv
 from vocode.streaming.agent.base_agent import BaseAgent
 from vocode.streaming.factory import (
    create_agent,
@ -42,38 +43,36 @@ class Call(StreamingConversation):
        self,
        base_url: str,
        config_manager: BaseConfigManager,
-        agent: BaseAgent,
-        twilio_config: TwilioConfig,
-        transcriber: Optional[BaseTranscriber] = None,
-        synthesizer: Optional[BaseSynthesizer] = None,
-        twilio_sid=None,
+        agent_config: BaseAgent,
+        transcriber_config: Optional[BaseTranscriber] = None,
+        synthesizer_config: Optional[BaseSynthesizer] = None,
+        twilio_config: Optional[TwilioConfig] = None,
+        twilio_sid: Optional[str] = None,
        conversation_id: Optional[str] = None,
        logger: Optional[logging.Logger] = None,
    ):
        self.base_url = base_url
        self.config_manager = config_manager
        self.output_device = TwilioOutputDevice()
-        self.twilio_config = twilio_config
+        self.twilio_config = twilio_config or TwilioConfig(
+            account_sid=getenv("TWILIO_ACCOUNT_SID"),
+            auth_token=getenv("TWILIO_AUTH_TOKEN"),
+        )
        self.twilio_client = create_twilio_client(twilio_config)
        super().__init__(
            self.output_device,
-            transcriber
-            or DeepgramTranscriber(
-                DeepgramTranscriberConfig(
-                    sampling_rate=8000,
-                    audio_encoding=AudioEncoding.MULAW,
-                    chunk_size=self.CHUNK_SIZE,
-                    model="voicemail",
-                    endpointing_config=PunctuationEndpointingConfig(),
-                ),
-                logger=logger,
+            transcriber_config
+            or DeepgramTranscriberConfig(
+                sampling_rate=8000,
+                audio_encoding=AudioEncoding.MULAW,
+                chunk_size=self.CHUNK_SIZE,
+                model="voicemail",
+                endpointing_config=PunctuationEndpointingConfig(),
            ),
-            agent,
-            synthesizer
-            or AzureSynthesizer(
-                AzureSynthesizerConfig(
-                    sampling_rate=8000, audio_encoding=AudioEncoding.MULAW
-                )
+            agent_config,
+            synthesizer_config
+            or AzureSynthesizerConfig(
+                sampling_rate=8000, audio_encoding=AudioEncoding.MULAW
            ),
            conversation_id=conversation_id,
            per_chunk_allowance_seconds=0.01,
@ -94,9 +93,9 @@ class Call(StreamingConversation):
            base_url=base_url,
            logger=logger,
            config_manager=config_manager,
-            agent=create_agent(call_config.agent_config),
-            transcriber=create_transcriber(call_config.transcriber_config),
-            synthesizer=create_synthesizer(call_config.synthesizer_config),
+            agent_config=call_config.agent_config,
+            transcriber_config=call_config.transcriber_config,
+            synthesizer_config=call_config.synthesizer_config,
            twilio_config=call_config.twilio_config,
            twilio_sid=call_config.twilio_sid,
            conversation_id=conversation_id,
--- a/vocode/streaming/telephony/conversation/outbound_call.py
+++ b/vocode/streaming/telephony/conversation/outbound_call.py
@ -1,6 +1,6 @@
 import logging
 from typing import Optional
-from twilio.rest import Client
+from vocode import getenv

 from vocode.streaming.models.agent import AgentConfig
 from vocode.streaming.models.synthesizer import (
@ -33,7 +33,7 @@ class OutboundCall:
        from_phone: str,
        config_manager: BaseConfigManager,
        agent_config: AgentConfig,
-        twilio_config: TwilioConfig,
+        twilio_config: Optional[TwilioConfig] = None,
        transcriber_config: Optional[TranscriberConfig] = None,
        synthesizer_config: Optional[SynthesizerConfig] = None,
        conversation_id: Optional[str] = None,
@ -56,7 +56,10 @@ class OutboundCall:
        )
        self.conversation_id = conversation_id or create_conversation_id()
        self.logger = logger
-        self.twilio_config = twilio_config
+        self.twilio_config = twilio_config or TwilioConfig(
+            account_sid=getenv("TWILIO_ACCOUNT_SID"),
+            auth_token=getenv("TWILIO_AUTH_TOKEN"),
+        )
        self.twilio_client = create_twilio_client(twilio_config)
        self.twilio_sid = None

--- a/vocode/streaming/telephony/conversation/zoom_dial_in.py
+++ b/vocode/streaming/telephony/conversation/zoom_dial_in.py
@ -24,10 +24,10 @@ class ZoomDialIn(OutboundCall):
        zoom_meeting_password: Optional[str],
        from_phone: str,
        config_manager: BaseConfigManager,
-        twilio_config: TwilioConfig,
        agent_config: AgentConfig,
        transcriber_config: TranscriberConfig,
        synthesizer_config: SynthesizerConfig,
+        twilio_config: Optional[TwilioConfig] = None,
        conversation_id: Optional[str] = None,
        logger: Optional[logging.Logger] = None,
    ):
--- a/vocode/streaming/telephony/server/base.py
+++ b/vocode/streaming/telephony/server/base.py
@ -34,7 +34,6 @@ from vocode.streaming.models.telephony import (
    EndOutboundCall,
    TwilioConfig,
 )
-from twilio.rest import Client

 from vocode.streaming.telephony.conversation.call import Call
 from vocode.streaming.telephony.templates import Templater
@ -45,7 +44,7 @@ from vocode.streaming.utils import create_conversation_id
 class InboundCallConfig(BaseModel):
    url: str
    agent_config: AgentConfig
-    twilio_config: TwilioConfig
+    twilio_config: Optional[TwilioConfig] = None
    transcriber_config: Optional[TranscriberConfig] = None
    synthesizer_config: Optional[SynthesizerConfig] = None

@ -92,7 +91,7 @@ class TelephonyServer:
    def create_inbound_route(
        self,
        agent_config: AgentConfig,
-        twilio_config: TwilioConfig,
+        twilio_config: Optional[TwilioConfig] = None,
        transcriber_config: Optional[TranscriberConfig] = None,
        synthesizer_config: Optional[SynthesizerConfig] = None,
    ):
--- a/vocode/streaming/telephony/twilio.py
+++ b/vocode/streaming/telephony/twilio.py
@ -1,12 +1,8 @@
-import os
 from typing import Optional
-from dotenv import load_dotenv
 from twilio.rest import Client

 from vocode.streaming.models.telephony import TwilioConfig

-load_dotenv()
-

 def create_twilio_client(twilio_config: TwilioConfig):
    return Client(twilio_config.account_sid, twilio_config.auth_token)
--- a/vocode/streaming/transcriber/assembly_ai_transcriber.py
+++ b/vocode/streaming/transcriber/assembly_ai_transcriber.py
@ -1,10 +1,9 @@
 import asyncio
 import json
 import logging
-import os
-from dotenv import load_dotenv
 import websockets
 from urllib.parse import urlencode
+from vocode import getenv

 from vocode.streaming.models.transcriber import AssemblyAITranscriberConfig
 from vocode.streaming.models.websocket import AudioMessage
@ -14,9 +13,7 @@ from vocode.streaming.transcriber.base_transcriber import (
 )
 from vocode.streaming.models.audio_encoding import AudioEncoding

-load_dotenv()

-ASSEMBLY_AI_API_KEY = os.environ.get("ASSEMBLY_AI_API_KEY")
 ASSEMBLY_AI_URL = "wss://api.assemblyai.com/v2/realtime/ws"


@ -27,6 +24,7 @@ class AssemblyAITranscriber(BaseTranscriber):
        logger: logging.Logger = None,
    ):
        super().__init__(transcriber_config)
+        self.api_key = getenv("ASSEMBLY_AI_API_KEY")
        self._ended = False
        self.is_ready = False
        self.logger = logger or logging.getLogger(__name__)
@ -61,7 +59,7 @@ class AssemblyAITranscriber(BaseTranscriber):

        async with websockets.connect(
            URL,
-            extra_headers=(("Authorization", ASSEMBLY_AI_API_KEY),),
+            extra_headers=(("Authorization", self.api_key),),
            ping_interval=5,
            ping_timeout=20,
        ) as ws:
--- a/vocode/streaming/transcriber/base_transcriber.py
+++ b/vocode/streaming/transcriber/base_transcriber.py
@ -1,11 +1,8 @@
-from dotenv import load_dotenv
 from typing import Callable, Optional, Awaitable

 from vocode.streaming.utils import convert_wav
 from vocode.streaming.models.transcriber import EndpointingConfig, TranscriberConfig

-load_dotenv()
-

 class Transcription:
    def __init__(
--- a/vocode/streaming/transcriber/deepgram_transcriber.py
+++ b/vocode/streaming/transcriber/deepgram_transcriber.py
@ -1,12 +1,11 @@
 import asyncio
 import json
 import logging
-import os
-from dotenv import load_dotenv
 import websockets
 from websockets.client import WebSocketClientProtocol
 import audioop
 from urllib.parse import urlencode
+from vocode import getenv

 from vocode.streaming.transcriber.base_transcriber import (
    BaseTranscriber,
@ -19,9 +18,7 @@ from vocode.streaming.models.transcriber import (
 )
 from vocode.streaming.models.audio_encoding import AudioEncoding

-load_dotenv()

-DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
 PUNCTUATION_TERMINATORS = [".", "!", "?"]
 NUM_RESTARTS = 5

@ -33,6 +30,7 @@ class DeepgramTranscriber(BaseTranscriber):
        logger: logging.Logger = None,
    ):
        super().__init__(transcriber_config)
+        self.api_key = getenv("DEEPGRAM_API_KEY")
        self.transcriber_config = transcriber_config
        self._ended = False
        self.warmed_up = False
@ -155,7 +153,7 @@ class DeepgramTranscriber(BaseTranscriber):
        return data["duration"]

    async def process(self, warmup=True):
-        extra_headers = {"Authorization": f"Token {DEEPGRAM_API_KEY}"}
+        extra_headers = {"Authorization": f"Token {self.api_key}"}
        self.audio_queue = asyncio.Queue()

        async with websockets.connect(
--- a/vocode/streaming/utils/goodbye_model.py
+++ b/vocode/streaming/utils/goodbye_model.py
@ -1,19 +1,12 @@
-import os
 import asyncio
 import openai
-from dotenv import load_dotenv
 import numpy as np
 import requests

-load_dotenv()
-openai.api_key = os.getenv("OPENAI_API_KEY")
+from vocode import getenv

-
-PLATFORM = "pyq" if os.getenv("USE_PYQ_EMBEDDINGS", "false") == "true" else "openai"
 SIMILARITY_THRESHOLD = 0.9
-SIMILARITY_THRESHOLD_PYQ = 0.7
 EMBEDDING_SIZE = 1536
-PYQ_EMBEDDING_SIZE = 768
 GOODBYE_PHRASES = [
    "bye",
    "goodbye",
@ -24,7 +17,6 @@ GOODBYE_PHRASES = [
    "have a good day",
    "have a good night",
 ]
-PYQ_API_URL = "https://embeddings.pyqai.com"


 class GoodbyeModel:
@ -34,12 +26,10 @@ class GoodbyeModel:
            os.path.dirname(__file__), "goodbye_embeddings"
        ),
    ):
+        openai.api_key = getenv("OPENAI_API_KEY")
        self.goodbye_embeddings = self.load_or_create_embeddings(
            f"{embeddings_cache_path}/goodbye_embeddings.npy"
        )
-        self.goodbye_embeddings_pyq = self.load_or_create_embeddings(
-            f"{embeddings_cache_path}/goodbye_embeddings_pyq.npy"
-        )

    def load_or_create_embeddings(self, path):
        if os.path.exists(path):
@ -49,50 +39,33 @@ class GoodbyeModel:
            np.save(path, embeddings)
            return embeddings

-    def create_embeddings(self, platform=PLATFORM):
+    def create_embeddings(self):
        print("Creating embeddings...")
-        size = EMBEDDING_SIZE if platform == "openai" else PYQ_EMBEDDING_SIZE
+        size = EMBEDDING_SIZE
        embeddings = np.empty((size, len(GOODBYE_PHRASES)))
        for i, goodbye_phrase in enumerate(GOODBYE_PHRASES):
-            embeddings[:, i] = self.create_embedding(goodbye_phrase, platform=platform)
+            embeddings[:, i] = self.create_embedding(goodbye_phrase)
        return embeddings

-    async def is_goodbye(self, text: str, platform=PLATFORM) -> bool:
+    async def is_goodbye(self, text: str) -> bool:
        if "bye" in text.lower():
            return True
-        embedding = self.create_embedding(text.strip().lower(), platform=platform)
-        goodbye_embeddings = (
-            self.goodbye_embeddings
-            if platform == "openai"
-            else self.goodbye_embeddings_pyq
-        )
-        threshold = (
-            SIMILARITY_THRESHOLD if platform == "openai" else SIMILARITY_THRESHOLD_PYQ
-        )
-        similarity_results = embedding @ goodbye_embeddings
-        return np.max(similarity_results) > threshold
+        embedding = self.create_embedding(text.strip().lower())
+        similarity_results = embedding @ self.goodbye_embeddings
+        return np.max(similarity_results) > SIMILARITY_THRESHOLD

-    def create_embedding(self, text, platform=PLATFORM) -> np.array:
-        if platform == "openai":
-            return np.array(
-                openai.Embedding.create(input=text, model="text-embedding-ada-002")[
-                    "data"
-                ][0]["embedding"]
-            )
-        elif platform == "pyq":
-            return np.array(
-                requests.post(
-                    PYQ_API_URL,
-                    headers={
-                        "Content-Type": "application/json",
-                        "Authorization": os.getenv("PYQ_API_KEY"),
-                    },
-                    json={"input_sequence": [text], "account_id": "400"},
-                ).json()["response"][0]
-            )
+    def create_embedding(self, text) -> np.array:
+        return np.array(
+            openai.Embedding.create(input=text, model="text-embedding-ada-002")["data"][
+                0
+            ]["embedding"]
+        )


 if __name__ == "__main__":
+    from dotenv import load_dotenv
+
+    load_dotenv()

    async def main():
        model = GoodbyeModel()
--- a/vocode/turn_based/agent/chat_gpt_agent.py
+++ b/vocode/turn_based/agent/chat_gpt_agent.py
@ -1,4 +1,3 @@
-import os
 from typing import Optional
 import openai
 from langchain.prompts import (
@ -10,6 +9,7 @@ from langchain.prompts import (
 from langchain.chains import ConversationChain
 from langchain.chat_models import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
+from vocode import getenv

 from vocode.turn_based.agent.base_agent import BaseAgent

@ -25,7 +25,7 @@ class ChatGPTAgent(BaseAgent):
        max_tokens: int = 100,
    ):
        super().__init__(initial_message=initial_message)
-        openai.api_key = os.getenv("OPENAI_API_KET", api_key)
+        openai.api_key = getenv("OPENAI_API_KET", api_key)
        if not openai.api_key:
            raise ValueError("OpenAI API key not provided")
        self.prompt = ChatPromptTemplate.from_messages(
--- a/vocode/turn_based/synthesizer/azure_synthesizer.py
+++ b/vocode/turn_based/synthesizer/azure_synthesizer.py
@ -1,7 +1,7 @@
-import os
 from typing import Optional
 import azure.cognitiveservices.speech as speechsdk
 from pydub import AudioSegment
+from vocode import getenv

 from vocode.turn_based.synthesizer.base_synthesizer import BaseSynthesizer

@ -15,8 +15,8 @@ class AzureSynthesizer(BaseSynthesizer):
    ):
        self.sampling_rate = sampling_rate
        speech_config = speechsdk.SpeechConfig(
-            subscription=os.getenv("AZURE_SPEECH_KEY", api_key),
-            region=os.getenv("AZURE_SPEECH_REGION", region),
+            subscription=getenv("AZURE_SPEECH_KEY", api_key),
+            region=getenv("AZURE_SPEECH_REGION", region),
        )
        if self.sampling_rate == 44100:
            speech_config.set_speech_synthesis_output_format(
--- a/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py
+++ b/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py
@ -1,8 +1,8 @@
 import io
-import os
 from typing import Optional
 from pydub import AudioSegment
 import requests
+from vocode import getenv
 from vocode.turn_based.synthesizer.base_synthesizer import BaseSynthesizer

 ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/"
@ -11,7 +11,7 @@ ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/"
 class ElevenLabsSynthesizer(BaseSynthesizer):
    def __init__(self, voice_id: str, api_key: Optional[str] = None):
        self.voice_id = voice_id
-        self.api_key = os.getenv("ELEVEN_LABS_API_KEY", api_key)
+        self.api_key = getenv("ELEVEN_LABS_API_KEY", api_key)

    def synthesize(self, text: str) -> AudioSegment:
        url = ELEVEN_LABS_BASE_URL + f"text-to-speech/{self.voice_id}"
--- a/vocode/turn_based/transcriber/whisper_transcriber.py
+++ b/vocode/turn_based/transcriber/whisper_transcriber.py
@ -1,15 +1,15 @@
 from typing import Optional
 from pydub import AudioSegment
 import io
-import os
 import openai
+from vocode import getenv

 from vocode.turn_based.transcriber.base_transcriber import BaseTranscriber


 class WhisperTranscriber(BaseTranscriber):
    def __init__(self, api_key: Optional[str] = None):
-        openai.api_key = os.getenv("OPENAI_API_KEY", api_key)
+        openai.api_key = getenv("OPENAI_API_KEY", api_key)
        if not openai.api_key:
            raise ValueError("OpenAI API key not provided")