From 1dc7bc74c3299f45d0ec2b5428d6ab8bf0ea7123 Mon Sep 17 00:00:00 2001
From: Ajay Raj <ajay.n.raj@gmail.com>
Date: Tue, 28 Mar 2023 10:20:36 -0700
Subject: [PATCH] remove pyq goodbye model and rime synthesizer and fix
 environment loading

---
 examples/hosted_streaming_conversation.py     | 12 ++-
 examples/streaming_conversation.py            | 28 +++----
 examples/telephony_app.py                     | 46 +++++------
 examples/turn_based_conversation.py           |  8 +-
 vocode/__init__.py                            | 18 ++++-
 vocode/streaming/agent/chat_gpt_agent.py      | 10 +--
 vocode/streaming/agent/llm_agent.py           |  3 -
 vocode/streaming/factory.py                   |  8 +-
 .../hosted_streaming_conversation.py          |  4 -
 vocode/streaming/streaming_conversation.py    | 26 +++----
 .../synthesizer/azure_synthesizer.py          |  7 +-
 .../synthesizer/eleven_labs_synthesizer.py    |  7 +-
 .../synthesizer/google_synthesizer.py         |  3 -
 .../streaming/synthesizer/rime_synthesizer.py | 78 -------------------
 .../config_manager/base_config_manager.py     |  1 -
 .../streaming/telephony/conversation/call.py  | 49 ++++++------
 .../telephony/conversation/outbound_call.py   |  9 ++-
 .../telephony/conversation/zoom_dial_in.py    |  2 +-
 vocode/streaming/telephony/server/base.py     |  5 +-
 vocode/streaming/telephony/twilio.py          |  4 -
 .../transcriber/assembly_ai_transcriber.py    |  8 +-
 .../streaming/transcriber/base_transcriber.py |  3 -
 .../transcriber/deepgram_transcriber.py       |  8 +-
 vocode/streaming/utils/goodbye_model.py       | 63 +++++----------
 vocode/turn_based/agent/chat_gpt_agent.py     |  4 +-
 .../synthesizer/azure_synthesizer.py          |  6 +-
 .../synthesizer/eleven_labs_synthesizer.py    |  4 +-
 .../transcriber/whisper_transcriber.py        |  4 +-
 28 files changed, 143 insertions(+), 285 deletions(-)
 delete mode 100644 vocode/streaming/synthesizer/rime_synthesizer.py

diff --git a/examples/hosted_streaming_conversation.py b/examples/hosted_streaming_conversation.py
index 59152f0..4a119bc 100644
--- a/examples/hosted_streaming_conversation.py
+++ b/examples/hosted_streaming_conversation.py
@@ -2,14 +2,15 @@ import asyncio
 import logging
 import signal
 from dotenv import load_dotenv
-import os
+
+load_dotenv()
+
 from vocode.streaming.hosted_streaming_conversation import HostedStreamingConversation
 from vocode.streaming.streaming_conversation import StreamingConversation
 from vocode.helpers import create_microphone_input_and_speaker_output
 from vocode.streaming.models.transcriber import (
     DeepgramTranscriberConfig,
     PunctuationEndpointingConfig,
-    GoogleTranscriberConfig,
 )
 from vocode.streaming.models.agent import (
     ChatGPTAgentConfig,
@@ -23,10 +24,6 @@ from vocode.streaming.models.agent import (
 )
 from vocode.streaming.models.message import BaseMessage
 from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
-import vocode
-
-load_dotenv()
-vocode.api_key = os.getenv("VOCODE_API_KEY")
 
 logging.basicConfig()
 logging.root.setLevel(logging.INFO)
@@ -41,7 +38,8 @@ if __name__ == "__main__":
         input_device=microphone_input,
         output_device=speaker_output,
         transcriber_config=DeepgramTranscriberConfig.from_input_device(
-            microphone_input
+            microphone_input,
+            endpointing_config=PunctuationEndpointingConfig(),
         ),
         agent_config=ChatGPTAgentConfig(
             initial_message=BaseMessage(text="Hello!"),
diff --git a/examples/streaming_conversation.py b/examples/streaming_conversation.py
index 60d2d90..c0be5be 100644
--- a/examples/streaming_conversation.py
+++ b/examples/streaming_conversation.py
@@ -2,7 +2,9 @@ import asyncio
 import logging
 import signal
 from dotenv import load_dotenv
-import os
+
+load_dotenv()
+
 from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
 from vocode.streaming.streaming_conversation import StreamingConversation
 from vocode.helpers import create_microphone_input_and_speaker_output
@@ -31,8 +33,6 @@ import vocode
 from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
 from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
 
-load_dotenv()
-vocode.api_key = os.getenv("VOCODE_API_KEY")
 
 logging.basicConfig()
 logger = logging.getLogger(__name__)
@@ -46,23 +46,17 @@ async def main():
 
     conversation = StreamingConversation(
         output_device=speaker_output,
-        transcriber=DeepgramTranscriber(
-            DeepgramTranscriberConfig.from_input_device(
-                microphone_input, endpointing_config=PunctuationEndpointingConfig()
-            )
+        transcriber=DeepgramTranscriberConfig.from_input_device(
+            microphone_input, endpointing_config=PunctuationEndpointingConfig()
         ),
-        agent=ChatGPTAgent(
-            ChatGPTAgentConfig(
-                initial_message=BaseMessage(text="What up"),
-                prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
+        agent=ChatGPTAgentConfig(
+            initial_message=BaseMessage(text="What up"),
+            prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
 hella, down,     fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
-                generate_responses=True,
-                cut_off_response=CutOffResponse(),
-            )
-        ),
-        synthesizer=AzureSynthesizer(
-            AzureSynthesizerConfig.from_output_device(speaker_output),
+            generate_responses=True,
+            cut_off_response=CutOffResponse(),
         ),
+        synthesizer=AzureSynthesizerConfig.from_output_device(speaker_output),
         logger=logger,
     )
     await conversation.start()
diff --git a/examples/telephony_app.py b/examples/telephony_app.py
index 8162cdb..86e81c2 100644
--- a/examples/telephony_app.py
+++ b/examples/telephony_app.py
@@ -1,7 +1,7 @@
 import logging
 from fastapi import FastAPI
-import os
 from dotenv import load_dotenv
+from vocode import getenv
 
 load_dotenv()
 
@@ -34,13 +34,12 @@ telephony_server = TelephonyServer(
             url="/inbound_call",
             agent_config=ChatGPTAgentConfig(
                 initial_message=BaseMessage(text="What up"),
-                prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
-hella, down,     fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
+                prompt_preamble="Have a pleasant conversation about life",
                 generate_responses=True,
             ),
             twilio_config=TwilioConfig(
-                account_sid=os.getenv("TWILIO_ACCOUNT_SID"),
-                auth_token=os.getenv("TWILIO_AUTH_TOKEN"),
+                account_sid=getenv("TWILIO_ACCOUNT_SID"),
+                auth_token=getenv("TWILIO_AUTH_TOKEN"),
             ),
         )
     ],
@@ -49,21 +48,22 @@ hella, down,     fire, totally, but like, slay, vibing, queen, go off, bet, sus,
 
 app.include_router(telephony_server.get_router())
 
-# outbound_call = OutboundCall(
-#     base_url=BASE_URL,
-#     to_phone="+14088926228",
-#     from_phone="+14086600744",
-#     config_manager=config_manager,
-#     agent_config=ChatGPTAgentConfig(
-#         initial_message=BaseMessage(text="What up"),
-#         prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
-# hella, down,     fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
-#         generate_responses=True,
-#     ),
-#     twilio_config=TwilioConfig(
-#         account_sid=os.getenv("TWILIO_ACCOUNT_SID"),
-#         auth_token=os.getenv("TWILIO_AUTH_TOKEN"),
-#     ),
-#     logger=logger,
-# )
-# outbound_call.start()
+outbound_call = OutboundCall(
+    base_url=BASE_URL,
+    to_phone="+14088926228",
+    from_phone="+14086600744",
+    config_manager=config_manager,
+    agent_config=ChatGPTAgentConfig(
+        initial_message=BaseMessage(text="What up"),
+        prompt_preamble="Have a pleasant conversation about life",
+        generate_responses=True,
+    ),
+    twilio_config=TwilioConfig(
+        account_sid=getenv("TWILIO_ACCOUNT_SID"),
+        auth_token=getenv("TWILIO_AUTH_TOKEN"),
+    ),
+    logger=logger,
+)
+
+input("Press enter to start call...")
+outbound_call.start()
diff --git a/examples/turn_based_conversation.py b/examples/turn_based_conversation.py
index 9681aaf..4bd5fce 100644
--- a/examples/turn_based_conversation.py
+++ b/examples/turn_based_conversation.py
@@ -1,6 +1,6 @@
 import logging
 from dotenv import load_dotenv
-import os
+from vocode import getenv
 from vocode.helpers import create_microphone_input_and_speaker_output
 from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
 from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer
@@ -25,15 +25,15 @@ if __name__ == "__main__":
     conversation = TurnBasedConversation(
         input_device=microphone_input,
         output_device=speaker_output,
-        transcriber=WhisperTranscriber(api_key=os.getenv("OPENAI_API_KEY")),
+        transcriber=WhisperTranscriber(api_key=getenv("OPENAI_API_KEY")),
         agent=ChatGPTAgent(
             system_prompt="The AI is having a pleasant conversation about life",
             initial_message="Hello!",
-            api_key=os.getenv("OPENAI_API_KEY"),
+            api_key=getenv("OPENAI_API_KEY"),
         ),
         synthesizer=ElevenLabsSynthesizer(
             voice_id=ADAM_VOICE_ID,
-            api_key=os.getenv("ELEVEN_LABS_API_KEY"),
+            api_key=getenv("ELEVEN_LABS_API_KEY"),
         ),
         logger=logger,
     )
diff --git a/vocode/__init__.py b/vocode/__init__.py
index c5da2cc..b2c8af7 100644
--- a/vocode/__init__.py
+++ b/vocode/__init__.py
@@ -1,7 +1,17 @@
 import os
-from dotenv import load_dotenv
 
-load_dotenv()
 
-api_key = os.getenv("VOCODE_API_KEY")
-base_url = os.getenv("VOCODE_BASE_URL", "api.vocode.dev")
+environment = {}
+
+
+def setenv(**kwargs):
+    for key, value in kwargs.items():
+        environment[key] = value
+
+
+def getenv(key, default=None):
+    return environment.get(key) or os.getenv(key, default)
+
+
+api_key = getenv("VOCODE_API_KEY")
+base_url = getenv("VOCODE_BASE_URL", "api.vocode.dev")
diff --git a/vocode/streaming/agent/chat_gpt_agent.py b/vocode/streaming/agent/chat_gpt_agent.py
index 0a240e8..623350b 100644
--- a/vocode/streaming/agent/chat_gpt_agent.py
+++ b/vocode/streaming/agent/chat_gpt_agent.py
@@ -1,4 +1,3 @@
-import os
 import random
 import time
 from langchain.prompts import (
@@ -16,23 +15,20 @@ import openai
 import json
 from typing import Generator, Optional
 
-from dotenv import load_dotenv
 from typing import Generator
 import logging
+from vocode import getenv
 
 from vocode.streaming.agent.base_agent import BaseAgent
 from vocode.streaming.models.agent import ChatGPTAgentConfig
 from vocode.streaming.utils.sse_client import SSEClient
 from vocode.streaming.agent.utils import stream_llm_response
 
-load_dotenv()
-
-openai.api_key = os.environ.get("OPENAI_API_KEY")
-
 
 class ChatGPTAgent(BaseAgent):
     def __init__(self, agent_config: ChatGPTAgentConfig, logger: logging.Logger = None):
         super().__init__(agent_config)
+        openai.api_key = getenv("OPENAI_API_KEY")
         self.agent_config = agent_config
         self.logger = logger or logging.getLogger(__name__)
         self.logger.setLevel(logging.DEBUG)
@@ -112,7 +108,7 @@ class ChatGPTAgent(BaseAgent):
             "https://api.openai.com/v1/chat/completions",
             headers={
                 "Content-Type": "application/json",
-                "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
+                "Authorization": f"Bearer {getenv('OPENAI_API_KEY')}",
             },
             json={
                 "model": self.agent_config.model_name,
diff --git a/vocode/streaming/agent/llm_agent.py b/vocode/streaming/agent/llm_agent.py
index e605507..9810239 100644
--- a/vocode/streaming/agent/llm_agent.py
+++ b/vocode/streaming/agent/llm_agent.py
@@ -1,7 +1,6 @@
 import re
 from typing import Optional
 
-from dotenv import load_dotenv
 from langchain import OpenAI
 from langchain.llms import OpenAIChat
 from typing import Generator
@@ -11,8 +10,6 @@ from vocode.streaming.agent.base_agent import BaseAgent
 from vocode.streaming.agent.utils import stream_llm_response
 from vocode.streaming.models.agent import LLMAgentConfig
 
-load_dotenv()
-
 
 class LLMAgent(BaseAgent):
     SENTENCE_ENDINGS = [".", "!", "?"]
diff --git a/vocode/streaming/factory.py b/vocode/streaming/factory.py
index 063c46b..9838b7d 100644
--- a/vocode/streaming/factory.py
+++ b/vocode/streaming/factory.py
@@ -10,7 +10,6 @@ from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
 from vocode.streaming.synthesizer.base_synthesizer import BaseSynthesizer
 from vocode.streaming.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
 from vocode.streaming.synthesizer.google_synthesizer import GoogleSynthesizer
-from vocode.streaming.synthesizer.rime_synthesizer import RimeSynthesizer
 from vocode.streaming.transcriber.assembly_ai_transcriber import AssemblyAITranscriber
 from vocode.streaming.transcriber.base_transcriber import BaseTranscriber
 from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
@@ -48,11 +47,6 @@ def create_synthesizer(synthesizer_config: SynthesizerConfig) -> BaseSynthesizer
     elif synthesizer_config.type == SynthesizerType.AZURE:
         return AzureSynthesizer(synthesizer_config)
     elif synthesizer_config.type == SynthesizerType.ELEVEN_LABS:
-        kwargs = {}
-        if synthesizer_config.voice_id:
-            kwargs["voice_id"] = synthesizer_config.voice_id
-        return ElevenLabsSynthesizer(synthesizer_config, **kwargs)
-    elif synthesizer_config.type == SynthesizerType.RIME:
-        return RimeSynthesizer(synthesizer_config)
+        return ElevenLabsSynthesizer(synthesizer_config)
     else:
         raise Exception("Invalid synthesizer config")
diff --git a/vocode/streaming/hosted_streaming_conversation.py b/vocode/streaming/hosted_streaming_conversation.py
index 657e40d..ccdc78f 100644
--- a/vocode/streaming/hosted_streaming_conversation.py
+++ b/vocode/streaming/hosted_streaming_conversation.py
@@ -2,8 +2,6 @@ import websockets
 from websockets.exceptions import ConnectionClosedOK
 from websockets.client import WebSocketClientProtocol
 import asyncio
-from dotenv import load_dotenv
-import os
 import logging
 import threading
 import queue
@@ -22,8 +20,6 @@ from vocode.streaming.models.websocket import (
     StopMessage,
 )
 
-load_dotenv()
-
 
 class HostedStreamingConversation:
     def __init__(
diff --git a/vocode/streaming/streaming_conversation.py b/vocode/streaming/streaming_conversation.py
index 89ad708..ce08a21 100644
--- a/vocode/streaming/streaming_conversation.py
+++ b/vocode/streaming/streaming_conversation.py
@@ -8,15 +8,18 @@ import time
 import secrets
 import random
 
-from dotenv import load_dotenv
 from vocode.streaming.agent.bot_sentiment_analyser import (
     BotSentiment,
     BotSentimentAnalyser,
 )
 from vocode.streaming.agent.information_retrieval_agent import InformationRetrievalAgent
+from vocode.streaming.factory import (
+    create_agent,
+    create_synthesizer,
+    create_transcriber,
+)
 from vocode.streaming.models.message import BaseMessage
 from vocode.streaming.output_device.base_output_device import BaseOutputDevice
-from vocode.streaming.synthesizer.rime_synthesizer import RimeSynthesizer
 from vocode.streaming.transcriber.assembly_ai_transcriber import AssemblyAITranscriber
 from vocode.streaming.utils.goodbye_model import GoodbyeModel
 from vocode.streaming.utils.transcript import Transcript
@@ -48,9 +51,6 @@ from vocode.streaming.synthesizer.base_synthesizer import (
     SynthesisResult,
     FillerAudio,
 )
-from vocode.streaming.synthesizer.google_synthesizer import GoogleSynthesizer
-from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
-from vocode.streaming.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
 from vocode.streaming.utils import (
     create_conversation_id,
     create_loop_in_thread,
@@ -60,19 +60,15 @@ from vocode.streaming.transcriber.base_transcriber import (
     Transcription,
     BaseTranscriber,
 )
-from vocode.streaming.transcriber.google_transcriber import GoogleTranscriber
-from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
-
-load_dotenv()
 
 
 class StreamingConversation:
     def __init__(
         self,
         output_device: BaseOutputDevice,
-        transcriber: BaseTranscriber,
-        agent: BaseAgent,
-        synthesizer: BaseSynthesizer,
+        transcriber_config: TranscriberConfig,
+        agent_config: AgentConfig,
+        synthesizer_config: SynthesizerConfig,
         conversation_id: str = None,
         per_chunk_allowance_seconds: int = PER_CHUNK_ALLOWANCE_SECONDS,
         logger: Optional[logging.Logger] = None,
@@ -80,11 +76,11 @@ class StreamingConversation:
         self.id = conversation_id or create_conversation_id()
         self.logger = logger or logging.getLogger(__name__)
         self.output_device = output_device
-        self.transcriber = transcriber
+        self.transcriber = create_transcriber(transcriber_config)
         self.transcriber.set_on_response(self.on_transcription_response)
         self.transcriber_task = None
-        self.agent = agent
-        self.synthesizer = synthesizer
+        self.agent = create_agent(agent_config)
+        self.synthesizer = create_synthesizer(synthesizer_config)
         self.synthesizer_event_loop = asyncio.new_event_loop()
         self.synthesizer_thread = threading.Thread(
             name="synthesizer",
diff --git a/vocode/streaming/synthesizer/azure_synthesizer.py b/vocode/streaming/synthesizer/azure_synthesizer.py
index a8f603f..5237479 100644
--- a/vocode/streaming/synthesizer/azure_synthesizer.py
+++ b/vocode/streaming/synthesizer/azure_synthesizer.py
@@ -4,7 +4,7 @@ import re
 from typing import Any, Optional
 from xml.etree import ElementTree
 import azure.cognitiveservices.speech as speechsdk
-from dotenv import load_dotenv
+from vocode import getenv
 
 from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
 from vocode.streaming.models.message import BaseMessage, SSMLMessage
@@ -20,7 +20,6 @@ from vocode.streaming.synthesizer.base_synthesizer import (
 from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
 from vocode.streaming.models.audio_encoding import AudioEncoding
 
-load_dotenv()
 
 NAMESPACES = {
     "mstts": "https://www.w3.org/2001/mstts",
@@ -59,8 +58,8 @@ class AzureSynthesizer(BaseSynthesizer):
         self.synthesizer_config = synthesizer_config
         # Instantiates a client
         speech_config = speechsdk.SpeechConfig(
-            subscription=os.environ.get("AZURE_SPEECH_KEY"),
-            region=os.environ.get("AZURE_SPEECH_REGION"),
+            subscription=getenv("AZURE_SPEECH_KEY"),
+            region=getenv("AZURE_SPEECH_REGION"),
         )
         if self.synthesizer_config.audio_encoding == AudioEncoding.LINEAR16:
             if self.synthesizer_config.sampling_rate == 44100:
diff --git a/vocode/streaming/synthesizer/eleven_labs_synthesizer.py b/vocode/streaming/synthesizer/eleven_labs_synthesizer.py
index 7663c58..610232a 100644
--- a/vocode/streaming/synthesizer/eleven_labs_synthesizer.py
+++ b/vocode/streaming/synthesizer/eleven_labs_synthesizer.py
@@ -1,7 +1,6 @@
 from typing import Any, Optional
-import os
-from dotenv import load_dotenv
 import requests
+from vocode import getenv
 
 from vocode.streaming.synthesizer.base_synthesizer import (
     BaseSynthesizer,
@@ -11,9 +10,7 @@ from vocode.streaming.models.synthesizer import ElevenLabsSynthesizerConfig
 from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
 from vocode.streaming.models.message import BaseMessage
 
-load_dotenv()
 
-ELEVEN_LABS_API_KEY = os.environ.get("ELEVEN_LABS_API_KEY")
 ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/"
 ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB"
 OBAMA_VOICE_ID = "vLITIS0SH2an5iQGxw5C"
@@ -22,7 +19,7 @@ OBAMA_VOICE_ID = "vLITIS0SH2an5iQGxw5C"
 class ElevenLabsSynthesizer(BaseSynthesizer):
     def __init__(self, config: ElevenLabsSynthesizerConfig):
         super().__init__(config)
-        self.api_key = config.api_key
+        self.api_key = getenv("ELEVEN_LABS_API_KEY")
         self.voice_id = config.voice_id or ADAM_VOICE_ID
         self.words_per_minute = 150
 
diff --git a/vocode/streaming/synthesizer/google_synthesizer.py b/vocode/streaming/synthesizer/google_synthesizer.py
index 6af1f41..cecde94 100644
--- a/vocode/streaming/synthesizer/google_synthesizer.py
+++ b/vocode/streaming/synthesizer/google_synthesizer.py
@@ -2,7 +2,6 @@ import io
 import wave
 from typing import Any, Optional
 
-from dotenv import load_dotenv
 from google.cloud import texttospeech_v1beta1 as tts
 
 from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
@@ -16,8 +15,6 @@ from vocode.streaming.models.synthesizer import GoogleSynthesizerConfig
 from vocode.streaming.models.audio_encoding import AudioEncoding
 from vocode.streaming.utils import convert_wav
 
-load_dotenv()
-
 
 class GoogleSynthesizer(BaseSynthesizer):
     OFFSET_SECONDS = 0.5
diff --git a/vocode/streaming/synthesizer/rime_synthesizer.py b/vocode/streaming/synthesizer/rime_synthesizer.py
deleted file mode 100644
index 74d2f0b..0000000
--- a/vocode/streaming/synthesizer/rime_synthesizer.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import audioop
-import base64
-from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
-from vocode.streaming.models.audio_encoding import AudioEncoding
-
-from vocode.streaming.models.message import BaseMessage
-
-from .base_synthesizer import BaseSynthesizer, SynthesisResult, encode_as_wav
-from typing import Any, Optional
-import os
-import io
-import wave
-from dotenv import load_dotenv
-import requests
-
-from ..utils import convert_linear_audio, convert_wav
-from ..models.synthesizer import ElevenLabsSynthesizerConfig, RimeSynthesizerConfig
-
-load_dotenv()
-
-RIME_API_KEY = os.getenv("RIME_API_KEY")
-RIME_BASE_URL = os.getenv("RIME_BASE_URL")
-
-
-class RimeSynthesizer(BaseSynthesizer):
-    def __init__(self, config: RimeSynthesizerConfig):
-        super().__init__(config)
-        self.speaker = config.speaker
-
-    def create_speech(
-        self,
-        message: BaseMessage,
-        chunk_size: int,
-        bot_sentiment: Optional[BotSentiment] = None,
-    ) -> SynthesisResult:
-        url = RIME_BASE_URL
-        headers = {"Authorization": f"Bearer {RIME_API_KEY}"}
-        body = {"inputs": {"text": message.text, "speaker": self.speaker}}
-        response = requests.post(url, headers=headers, json=body)
-
-        def chunk_generator(audio, chunk_transform=lambda x: x):
-            for i in range(0, len(audio), chunk_size):
-                chunk = audio[i : i + chunk_size]
-                yield SynthesisResult.ChunkResult(
-                    chunk_transform(chunk), len(chunk) != chunk_size
-                )
-
-        assert response.ok, response.text
-        data = response.json().get("data")
-        assert data
-
-        audio_file = io.BytesIO(base64.b64decode(data))
-
-        if self.synthesizer_config.audio_encoding == AudioEncoding.LINEAR16:
-            output_bytes = convert_wav(
-                audio_file,
-                output_sample_rate=self.synthesizer_config.sampling_rate,
-                output_encoding=AudioEncoding.LINEAR16,
-            )
-        elif self.synthesizer_config.audio_encoding == AudioEncoding.MULAW:
-            output_bytes = convert_wav(
-                audio_file,
-                output_sample_rate=self.synthesizer_config.sampling_rate,
-                output_encoding=AudioEncoding.MULAW,
-            )
-
-        if self.synthesizer_config.should_encode_as_wav:
-            output_generator = chunk_generator(
-                output_bytes, chunk_transform=encode_as_wav
-            )
-        else:
-            output_generator = chunk_generator(output_bytes)
-        return SynthesisResult(
-            output_generator,
-            lambda seconds: self.get_message_cutoff_from_total_response_length(
-                message, seconds, len(output_bytes)
-            ),
-        )
diff --git a/vocode/streaming/telephony/config_manager/base_config_manager.py b/vocode/streaming/telephony/config_manager/base_config_manager.py
index e74025e..22f3702 100644
--- a/vocode/streaming/telephony/config_manager/base_config_manager.py
+++ b/vocode/streaming/telephony/config_manager/base_config_manager.py
@@ -1,5 +1,4 @@
 import logging
-import os
 from typing import Optional
 from redis import Redis
 
diff --git a/vocode/streaming/telephony/conversation/call.py b/vocode/streaming/telephony/conversation/call.py
index f2fae8f..8426e0f 100644
--- a/vocode/streaming/telephony/conversation/call.py
+++ b/vocode/streaming/telephony/conversation/call.py
@@ -4,6 +4,7 @@ from enum import Enum
 import json
 import logging
 from typing import Optional
+from vocode import getenv
 from vocode.streaming.agent.base_agent import BaseAgent
 from vocode.streaming.factory import (
     create_agent,
@@ -42,38 +43,36 @@ class Call(StreamingConversation):
         self,
         base_url: str,
         config_manager: BaseConfigManager,
-        agent: BaseAgent,
-        twilio_config: TwilioConfig,
-        transcriber: Optional[BaseTranscriber] = None,
-        synthesizer: Optional[BaseSynthesizer] = None,
-        twilio_sid=None,
+        agent_config: BaseAgent,
+        transcriber_config: Optional[BaseTranscriber] = None,
+        synthesizer_config: Optional[BaseSynthesizer] = None,
+        twilio_config: Optional[TwilioConfig] = None,
+        twilio_sid: Optional[str] = None,
         conversation_id: Optional[str] = None,
         logger: Optional[logging.Logger] = None,
     ):
         self.base_url = base_url
         self.config_manager = config_manager
         self.output_device = TwilioOutputDevice()
-        self.twilio_config = twilio_config
+        self.twilio_config = twilio_config or TwilioConfig(
+            account_sid=getenv("TWILIO_ACCOUNT_SID"),
+            auth_token=getenv("TWILIO_AUTH_TOKEN"),
+        )
         self.twilio_client = create_twilio_client(twilio_config)
         super().__init__(
             self.output_device,
-            transcriber
-            or DeepgramTranscriber(
-                DeepgramTranscriberConfig(
-                    sampling_rate=8000,
-                    audio_encoding=AudioEncoding.MULAW,
-                    chunk_size=self.CHUNK_SIZE,
-                    model="voicemail",
-                    endpointing_config=PunctuationEndpointingConfig(),
-                ),
-                logger=logger,
+            transcriber_config
+            or DeepgramTranscriberConfig(
+                sampling_rate=8000,
+                audio_encoding=AudioEncoding.MULAW,
+                chunk_size=self.CHUNK_SIZE,
+                model="voicemail",
+                endpointing_config=PunctuationEndpointingConfig(),
             ),
-            agent,
-            synthesizer
-            or AzureSynthesizer(
-                AzureSynthesizerConfig(
-                    sampling_rate=8000, audio_encoding=AudioEncoding.MULAW
-                )
+            agent_config,
+            synthesizer_config
+            or AzureSynthesizerConfig(
+                sampling_rate=8000, audio_encoding=AudioEncoding.MULAW
             ),
             conversation_id=conversation_id,
             per_chunk_allowance_seconds=0.01,
@@ -94,9 +93,9 @@ class Call(StreamingConversation):
             base_url=base_url,
             logger=logger,
             config_manager=config_manager,
-            agent=create_agent(call_config.agent_config),
-            transcriber=create_transcriber(call_config.transcriber_config),
-            synthesizer=create_synthesizer(call_config.synthesizer_config),
+            agent_config=call_config.agent_config,
+            transcriber_config=call_config.transcriber_config,
+            synthesizer_config=call_config.synthesizer_config,
             twilio_config=call_config.twilio_config,
             twilio_sid=call_config.twilio_sid,
             conversation_id=conversation_id,
diff --git a/vocode/streaming/telephony/conversation/outbound_call.py b/vocode/streaming/telephony/conversation/outbound_call.py
index 7d24ad3..557fa30 100644
--- a/vocode/streaming/telephony/conversation/outbound_call.py
+++ b/vocode/streaming/telephony/conversation/outbound_call.py
@@ -1,6 +1,6 @@
 import logging
 from typing import Optional
-from twilio.rest import Client
+from vocode import getenv
 
 from vocode.streaming.models.agent import AgentConfig
 from vocode.streaming.models.synthesizer import (
@@ -33,7 +33,7 @@ class OutboundCall:
         from_phone: str,
         config_manager: BaseConfigManager,
         agent_config: AgentConfig,
-        twilio_config: TwilioConfig,
+        twilio_config: Optional[TwilioConfig] = None,
         transcriber_config: Optional[TranscriberConfig] = None,
         synthesizer_config: Optional[SynthesizerConfig] = None,
         conversation_id: Optional[str] = None,
@@ -56,7 +56,10 @@ class OutboundCall:
         )
         self.conversation_id = conversation_id or create_conversation_id()
         self.logger = logger
-        self.twilio_config = twilio_config
+        self.twilio_config = twilio_config or TwilioConfig(
+            account_sid=getenv("TWILIO_ACCOUNT_SID"),
+            auth_token=getenv("TWILIO_AUTH_TOKEN"),
+        )
         self.twilio_client = create_twilio_client(twilio_config)
         self.twilio_sid = None
 
diff --git a/vocode/streaming/telephony/conversation/zoom_dial_in.py b/vocode/streaming/telephony/conversation/zoom_dial_in.py
index aa6766d..d57f1d6 100644
--- a/vocode/streaming/telephony/conversation/zoom_dial_in.py
+++ b/vocode/streaming/telephony/conversation/zoom_dial_in.py
@@ -24,10 +24,10 @@ class ZoomDialIn(OutboundCall):
         zoom_meeting_password: Optional[str],
         from_phone: str,
         config_manager: BaseConfigManager,
-        twilio_config: TwilioConfig,
         agent_config: AgentConfig,
         transcriber_config: TranscriberConfig,
         synthesizer_config: SynthesizerConfig,
+        twilio_config: Optional[TwilioConfig] = None,
         conversation_id: Optional[str] = None,
         logger: Optional[logging.Logger] = None,
     ):
diff --git a/vocode/streaming/telephony/server/base.py b/vocode/streaming/telephony/server/base.py
index ed10892..8be5688 100644
--- a/vocode/streaming/telephony/server/base.py
+++ b/vocode/streaming/telephony/server/base.py
@@ -34,7 +34,6 @@ from vocode.streaming.models.telephony import (
     EndOutboundCall,
     TwilioConfig,
 )
-from twilio.rest import Client
 
 from vocode.streaming.telephony.conversation.call import Call
 from vocode.streaming.telephony.templates import Templater
@@ -45,7 +44,7 @@ from vocode.streaming.utils import create_conversation_id
 class InboundCallConfig(BaseModel):
     url: str
     agent_config: AgentConfig
-    twilio_config: TwilioConfig
+    twilio_config: Optional[TwilioConfig] = None
     transcriber_config: Optional[TranscriberConfig] = None
     synthesizer_config: Optional[SynthesizerConfig] = None
 
@@ -92,7 +91,7 @@ class TelephonyServer:
     def create_inbound_route(
         self,
         agent_config: AgentConfig,
-        twilio_config: TwilioConfig,
+        twilio_config: Optional[TwilioConfig] = None,
         transcriber_config: Optional[TranscriberConfig] = None,
         synthesizer_config: Optional[SynthesizerConfig] = None,
     ):
diff --git a/vocode/streaming/telephony/twilio.py b/vocode/streaming/telephony/twilio.py
index 37acbe0..9fe7765 100644
--- a/vocode/streaming/telephony/twilio.py
+++ b/vocode/streaming/telephony/twilio.py
@@ -1,12 +1,8 @@
-import os
 from typing import Optional
-from dotenv import load_dotenv
 from twilio.rest import Client
 
 from vocode.streaming.models.telephony import TwilioConfig
 
-load_dotenv()
-
 
 def create_twilio_client(twilio_config: TwilioConfig):
     return Client(twilio_config.account_sid, twilio_config.auth_token)
diff --git a/vocode/streaming/transcriber/assembly_ai_transcriber.py b/vocode/streaming/transcriber/assembly_ai_transcriber.py
index 3389d9d..6780a8e 100644
--- a/vocode/streaming/transcriber/assembly_ai_transcriber.py
+++ b/vocode/streaming/transcriber/assembly_ai_transcriber.py
@@ -1,10 +1,9 @@
 import asyncio
 import json
 import logging
-import os
-from dotenv import load_dotenv
 import websockets
 from urllib.parse import urlencode
+from vocode import getenv
 
 from vocode.streaming.models.transcriber import AssemblyAITranscriberConfig
 from vocode.streaming.models.websocket import AudioMessage
@@ -14,9 +13,7 @@ from vocode.streaming.transcriber.base_transcriber import (
 )
 from vocode.streaming.models.audio_encoding import AudioEncoding
 
-load_dotenv()
 
-ASSEMBLY_AI_API_KEY = os.environ.get("ASSEMBLY_AI_API_KEY")
 ASSEMBLY_AI_URL = "wss://api.assemblyai.com/v2/realtime/ws"
 
 
@@ -27,6 +24,7 @@ class AssemblyAITranscriber(BaseTranscriber):
         logger: logging.Logger = None,
     ):
         super().__init__(transcriber_config)
+        self.api_key = getenv("ASSEMBLY_AI_API_KEY")
         self._ended = False
         self.is_ready = False
         self.logger = logger or logging.getLogger(__name__)
@@ -61,7 +59,7 @@ class AssemblyAITranscriber(BaseTranscriber):
 
         async with websockets.connect(
             URL,
-            extra_headers=(("Authorization", ASSEMBLY_AI_API_KEY),),
+            extra_headers=(("Authorization", self.api_key),),
             ping_interval=5,
             ping_timeout=20,
         ) as ws:
diff --git a/vocode/streaming/transcriber/base_transcriber.py b/vocode/streaming/transcriber/base_transcriber.py
index 7c9aa0b..2e42713 100644
--- a/vocode/streaming/transcriber/base_transcriber.py
+++ b/vocode/streaming/transcriber/base_transcriber.py
@@ -1,11 +1,8 @@
-from dotenv import load_dotenv
 from typing import Callable, Optional, Awaitable
 
 from vocode.streaming.utils import convert_wav
 from vocode.streaming.models.transcriber import EndpointingConfig, TranscriberConfig
 
-load_dotenv()
-
 
 class Transcription:
     def __init__(
diff --git a/vocode/streaming/transcriber/deepgram_transcriber.py b/vocode/streaming/transcriber/deepgram_transcriber.py
index 2ff2387..c475b60 100644
--- a/vocode/streaming/transcriber/deepgram_transcriber.py
+++ b/vocode/streaming/transcriber/deepgram_transcriber.py
@@ -1,12 +1,11 @@
 import asyncio
 import json
 import logging
-import os
-from dotenv import load_dotenv
 import websockets
 from websockets.client import WebSocketClientProtocol
 import audioop
 from urllib.parse import urlencode
+from vocode import getenv
 
 from vocode.streaming.transcriber.base_transcriber import (
     BaseTranscriber,
@@ -19,9 +18,7 @@ from vocode.streaming.models.transcriber import (
 )
 from vocode.streaming.models.audio_encoding import AudioEncoding
 
-load_dotenv()
 
-DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
 PUNCTUATION_TERMINATORS = [".", "!", "?"]
 NUM_RESTARTS = 5
 
@@ -33,6 +30,7 @@ class DeepgramTranscriber(BaseTranscriber):
         logger: logging.Logger = None,
     ):
         super().__init__(transcriber_config)
+        self.api_key = getenv("DEEPGRAM_API_KEY")
         self.transcriber_config = transcriber_config
         self._ended = False
         self.warmed_up = False
@@ -155,7 +153,7 @@ class DeepgramTranscriber(BaseTranscriber):
         return data["duration"]
 
     async def process(self, warmup=True):
-        extra_headers = {"Authorization": f"Token {DEEPGRAM_API_KEY}"}
+        extra_headers = {"Authorization": f"Token {self.api_key}"}
         self.audio_queue = asyncio.Queue()
 
         async with websockets.connect(
diff --git a/vocode/streaming/utils/goodbye_model.py b/vocode/streaming/utils/goodbye_model.py
index 6ad95cf..e15b051 100644
--- a/vocode/streaming/utils/goodbye_model.py
+++ b/vocode/streaming/utils/goodbye_model.py
@@ -1,19 +1,12 @@
-import os
 import asyncio
 import openai
-from dotenv import load_dotenv
 import numpy as np
 import requests
 
-load_dotenv()
-openai.api_key = os.getenv("OPENAI_API_KEY")
+from vocode import getenv
 
-
-PLATFORM = "pyq" if os.getenv("USE_PYQ_EMBEDDINGS", "false") == "true" else "openai"
 SIMILARITY_THRESHOLD = 0.9
-SIMILARITY_THRESHOLD_PYQ = 0.7
 EMBEDDING_SIZE = 1536
-PYQ_EMBEDDING_SIZE = 768
 GOODBYE_PHRASES = [
     "bye",
     "goodbye",
@@ -24,7 +17,6 @@ GOODBYE_PHRASES = [
     "have a good day",
     "have a good night",
 ]
-PYQ_API_URL = "https://embeddings.pyqai.com"
 
 
 class GoodbyeModel:
@@ -34,12 +26,10 @@ class GoodbyeModel:
             os.path.dirname(__file__), "goodbye_embeddings"
         ),
     ):
+        openai.api_key = getenv("OPENAI_API_KEY")
         self.goodbye_embeddings = self.load_or_create_embeddings(
             f"{embeddings_cache_path}/goodbye_embeddings.npy"
         )
-        self.goodbye_embeddings_pyq = self.load_or_create_embeddings(
-            f"{embeddings_cache_path}/goodbye_embeddings_pyq.npy"
-        )
 
     def load_or_create_embeddings(self, path):
         if os.path.exists(path):
@@ -49,50 +39,33 @@ class GoodbyeModel:
             np.save(path, embeddings)
             return embeddings
 
-    def create_embeddings(self, platform=PLATFORM):
+    def create_embeddings(self):
         print("Creating embeddings...")
-        size = EMBEDDING_SIZE if platform == "openai" else PYQ_EMBEDDING_SIZE
+        size = EMBEDDING_SIZE
         embeddings = np.empty((size, len(GOODBYE_PHRASES)))
         for i, goodbye_phrase in enumerate(GOODBYE_PHRASES):
-            embeddings[:, i] = self.create_embedding(goodbye_phrase, platform=platform)
+            embeddings[:, i] = self.create_embedding(goodbye_phrase)
         return embeddings
 
-    async def is_goodbye(self, text: str, platform=PLATFORM) -> bool:
+    async def is_goodbye(self, text: str) -> bool:
         if "bye" in text.lower():
             return True
-        embedding = self.create_embedding(text.strip().lower(), platform=platform)
-        goodbye_embeddings = (
-            self.goodbye_embeddings
-            if platform == "openai"
-            else self.goodbye_embeddings_pyq
-        )
-        threshold = (
-            SIMILARITY_THRESHOLD if platform == "openai" else SIMILARITY_THRESHOLD_PYQ
-        )
-        similarity_results = embedding @ goodbye_embeddings
-        return np.max(similarity_results) > threshold
+        embedding = self.create_embedding(text.strip().lower())
+        similarity_results = embedding @ self.goodbye_embeddings
+        return np.max(similarity_results) > SIMILARITY_THRESHOLD
 
-    def create_embedding(self, text, platform=PLATFORM) -> np.array:
-        if platform == "openai":
-            return np.array(
-                openai.Embedding.create(input=text, model="text-embedding-ada-002")[
-                    "data"
-                ][0]["embedding"]
-            )
-        elif platform == "pyq":
-            return np.array(
-                requests.post(
-                    PYQ_API_URL,
-                    headers={
-                        "Content-Type": "application/json",
-                        "Authorization": os.getenv("PYQ_API_KEY"),
-                    },
-                    json={"input_sequence": [text], "account_id": "400"},
-                ).json()["response"][0]
-            )
+    def create_embedding(self, text) -> np.array:
+        return np.array(
+            openai.Embedding.create(input=text, model="text-embedding-ada-002")["data"][
+                0
+            ]["embedding"]
+        )
 
 
 if __name__ == "__main__":
+    from dotenv import load_dotenv
+
+    load_dotenv()
 
     async def main():
         model = GoodbyeModel()
diff --git a/vocode/turn_based/agent/chat_gpt_agent.py b/vocode/turn_based/agent/chat_gpt_agent.py
index 693a4fa..187311d 100644
--- a/vocode/turn_based/agent/chat_gpt_agent.py
+++ b/vocode/turn_based/agent/chat_gpt_agent.py
@@ -1,4 +1,3 @@
-import os
 from typing import Optional
 import openai
 from langchain.prompts import (
@@ -10,6 +9,7 @@ from langchain.prompts import (
 from langchain.chains import ConversationChain
 from langchain.chat_models import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
+from vocode import getenv
 
 from vocode.turn_based.agent.base_agent import BaseAgent
 
@@ -25,7 +25,7 @@ class ChatGPTAgent(BaseAgent):
         max_tokens: int = 100,
     ):
         super().__init__(initial_message=initial_message)
-        openai.api_key = os.getenv("OPENAI_API_KET", api_key)
+        openai.api_key = getenv("OPENAI_API_KET", api_key)
         if not openai.api_key:
             raise ValueError("OpenAI API key not provided")
         self.prompt = ChatPromptTemplate.from_messages(
diff --git a/vocode/turn_based/synthesizer/azure_synthesizer.py b/vocode/turn_based/synthesizer/azure_synthesizer.py
index c88a663..f893913 100644
--- a/vocode/turn_based/synthesizer/azure_synthesizer.py
+++ b/vocode/turn_based/synthesizer/azure_synthesizer.py
@@ -1,7 +1,7 @@
-import os
 from typing import Optional
 import azure.cognitiveservices.speech as speechsdk
 from pydub import AudioSegment
+from vocode import getenv
 
 from vocode.turn_based.synthesizer.base_synthesizer import BaseSynthesizer
 
@@ -15,8 +15,8 @@ class AzureSynthesizer(BaseSynthesizer):
     ):
         self.sampling_rate = sampling_rate
         speech_config = speechsdk.SpeechConfig(
-            subscription=os.getenv("AZURE_SPEECH_KEY", api_key),
-            region=os.getenv("AZURE_SPEECH_REGION", region),
+            subscription=getenv("AZURE_SPEECH_KEY", api_key),
+            region=getenv("AZURE_SPEECH_REGION", region),
         )
         if self.sampling_rate == 44100:
             speech_config.set_speech_synthesis_output_format(
diff --git a/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py b/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py
index 018b895..d819d28 100644
--- a/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py
+++ b/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py
@@ -1,8 +1,8 @@
 import io
-import os
 from typing import Optional
 from pydub import AudioSegment
 import requests
+from vocode import getenv
 from vocode.turn_based.synthesizer.base_synthesizer import BaseSynthesizer
 
 ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/"
@@ -11,7 +11,7 @@ ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/"
 class ElevenLabsSynthesizer(BaseSynthesizer):
     def __init__(self, voice_id: str, api_key: Optional[str] = None):
         self.voice_id = voice_id
-        self.api_key = os.getenv("ELEVEN_LABS_API_KEY", api_key)
+        self.api_key = getenv("ELEVEN_LABS_API_KEY", api_key)
 
     def synthesize(self, text: str) -> AudioSegment:
         url = ELEVEN_LABS_BASE_URL + f"text-to-speech/{self.voice_id}"
diff --git a/vocode/turn_based/transcriber/whisper_transcriber.py b/vocode/turn_based/transcriber/whisper_transcriber.py
index 24c59d0..7c2d1cb 100644
--- a/vocode/turn_based/transcriber/whisper_transcriber.py
+++ b/vocode/turn_based/transcriber/whisper_transcriber.py
@@ -1,15 +1,15 @@
 from typing import Optional
 from pydub import AudioSegment
 import io
-import os
 import openai
+from vocode import getenv
 
 from vocode.turn_based.transcriber.base_transcriber import BaseTranscriber
 
 
 class WhisperTranscriber(BaseTranscriber):
     def __init__(self, api_key: Optional[str] = None):
-        openai.api_key = os.getenv("OPENAI_API_KEY", api_key)
+        openai.api_key = getenv("OPENAI_API_KEY", api_key)
         if not openai.api_key:
             raise ValueError("OpenAI API key not provided")