From a9424ffaeb8e5b33926255f4f844c4a78c1cef8c Mon Sep 17 00:00:00 2001
From: Ajay Raj <ajay.n.raj@gmail.com>
Date: Tue, 28 Mar 2023 12:39:49 -0700
Subject: [PATCH] streaming conversation takes instances not configs

---
 examples/streaming_conversation.py            | 22 ++++++++++------
 examples/telephony_app.py                     |  2 +-
 vocode/streaming/agent/chat_gpt_agent.py      | 13 ++++++----
 vocode/streaming/streaming_conversation.py    | 13 +++++-----
 .../streaming/telephony/conversation/call.py  | 26 +++++++------------
 5 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/examples/streaming_conversation.py b/examples/streaming_conversation.py
index 662fce3..7fee974 100644
--- a/examples/streaming_conversation.py
+++ b/examples/streaming_conversation.py
@@ -46,17 +46,23 @@ async def main():
 
     conversation = StreamingConversation(
         output_device=speaker_output,
-        transcriber_config=DeepgramTranscriberConfig.from_input_device(
-            microphone_input, endpointing_config=PunctuationEndpointingConfig()
+        transcriber=DeepgramTranscriber(
+            DeepgramTranscriberConfig.from_input_device(
+                microphone_input, endpointing_config=PunctuationEndpointingConfig()
+            )
         ),
-        agent_config=ChatGPTAgentConfig(
-            initial_message=BaseMessage(text="What up"),
-            prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
+        agent=ChatGPTAgent(
+            ChatGPTAgentConfig(
+                initial_message=BaseMessage(text="What up"),
+                prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
 hella, down,     fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
-            generate_responses=True,
-            cut_off_response=CutOffResponse(),
+                generate_responses=True,
+                cut_off_response=CutOffResponse(),
+            )
+        ),
+        synthesizer=AzureSynthesizer(
+            AzureSynthesizerConfig.from_output_device(speaker_output)
         ),
-        synthesizer_config=AzureSynthesizerConfig.from_output_device(speaker_output),
         logger=logger,
     )
     await conversation.start()
diff --git a/examples/telephony_app.py b/examples/telephony_app.py
index 1af1b39..8cc0f8e 100644
--- a/examples/telephony_app.py
+++ b/examples/telephony_app.py
@@ -6,7 +6,7 @@ from vocode import getenv
 load_dotenv()
 
 from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
-from vocode.streaming.models.agent import ChatGPTAgentConfig
+from vocode.streaming.models.agent import ChatGPTAgentConfig, EchoAgentConfig
 from vocode.streaming.models.message import BaseMessage
 from vocode.streaming.models.telephony import TwilioConfig
 from vocode.streaming.telephony.config_manager.redis_config_manager import (
diff --git a/vocode/streaming/agent/chat_gpt_agent.py b/vocode/streaming/agent/chat_gpt_agent.py
index d9c3603..c458586 100644
--- a/vocode/streaming/agent/chat_gpt_agent.py
+++ b/vocode/streaming/agent/chat_gpt_agent.py
@@ -142,14 +142,17 @@ class ChatGPTAgent(BaseAgent):
 
 
 if __name__ == "__main__":
+    from dotenv import load_dotenv
+
+    load_dotenv()
+
     agent = ChatGPTAgent(
         ChatGPTAgentConfig(
-            model_name="gpt-4",
             prompt_preamble="The assistant is having a pleasant conversation about life. If the user hasn't completed their thought, the assistant responds with 'PASS'",
         )
     )
     while True:
-        # response = agent.respond(input("Human: "))[0]
-        # print(f"AI: {response}")
-        for response in agent.generate_response(input("Human: ")):
-            print(f"AI: {response}")
+        response = agent.respond(input("Human: "))[0]
+        print(f"AI: {response}")
+        # for response in agent.generate_response(input("Human: ")):
+        #     print(f"AI: {response}")
diff --git a/vocode/streaming/streaming_conversation.py b/vocode/streaming/streaming_conversation.py
index ce08a21..3dcf572 100644
--- a/vocode/streaming/streaming_conversation.py
+++ b/vocode/streaming/streaming_conversation.py
@@ -1,5 +1,6 @@
 import asyncio
 from asyncio import Future
+from ctypes import Union
 import queue
 from typing import Callable, Awaitable, Optional, Any
 import logging
@@ -66,9 +67,9 @@ class StreamingConversation:
     def __init__(
         self,
         output_device: BaseOutputDevice,
-        transcriber_config: TranscriberConfig,
-        agent_config: AgentConfig,
-        synthesizer_config: SynthesizerConfig,
+        transcriber: BaseTranscriber,
+        agent: BaseAgent,
+        synthesizer: BaseSynthesizer,
         conversation_id: str = None,
         per_chunk_allowance_seconds: int = PER_CHUNK_ALLOWANCE_SECONDS,
         logger: Optional[logging.Logger] = None,
@@ -76,11 +77,11 @@ class StreamingConversation:
         self.id = conversation_id or create_conversation_id()
         self.logger = logger or logging.getLogger(__name__)
         self.output_device = output_device
-        self.transcriber = create_transcriber(transcriber_config)
+        self.transcriber = transcriber
         self.transcriber.set_on_response(self.on_transcription_response)
         self.transcriber_task = None
-        self.agent = create_agent(agent_config)
-        self.synthesizer = create_synthesizer(synthesizer_config)
+        self.agent = agent
+        self.synthesizer = synthesizer
         self.synthesizer_event_loop = asyncio.new_event_loop()
         self.synthesizer_thread = threading.Thread(
             name="synthesizer",
diff --git a/vocode/streaming/telephony/conversation/call.py b/vocode/streaming/telephony/conversation/call.py
index 8426e0f..fd2ba85 100644
--- a/vocode/streaming/telephony/conversation/call.py
+++ b/vocode/streaming/telephony/conversation/call.py
@@ -11,22 +11,26 @@ from vocode.streaming.factory import (
     create_synthesizer,
     create_transcriber,
 )
+from vocode.streaming.models.agent import AgentConfig
 
 from vocode.streaming.streaming_conversation import StreamingConversation
 from vocode.streaming.models.telephony import CallConfig, TwilioConfig
 from vocode.streaming.output_device.twilio_output_device import TwilioOutputDevice
 from vocode.streaming.models.synthesizer import (
     AzureSynthesizerConfig,
+    SynthesizerConfig,
 )
 from vocode.streaming.models.transcriber import (
     DeepgramTranscriberConfig,
     PunctuationEndpointingConfig,
+    TranscriberConfig,
 )
 from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
 from vocode.streaming.synthesizer.base_synthesizer import BaseSynthesizer
 from vocode.streaming.telephony.config_manager.base_config_manager import (
     BaseConfigManager,
 )
+from vocode.streaming.telephony.constants import DEFAULT_SAMPLING_RATE
 from vocode.streaming.telephony.twilio import create_twilio_client
 from vocode.streaming.models.audio_encoding import AudioEncoding
 from vocode.streaming.streaming_conversation import StreamingConversation
@@ -43,9 +47,9 @@ class Call(StreamingConversation):
         self,
         base_url: str,
         config_manager: BaseConfigManager,
-        agent_config: BaseAgent,
-        transcriber_config: Optional[BaseTranscriber] = None,
-        synthesizer_config: Optional[BaseSynthesizer] = None,
+        agent_config: AgentConfig,
+        transcriber_config: TranscriberConfig,
+        synthesizer_config: SynthesizerConfig,
         twilio_config: Optional[TwilioConfig] = None,
         twilio_sid: Optional[str] = None,
         conversation_id: Optional[str] = None,
@@ -61,19 +65,9 @@ class Call(StreamingConversation):
         self.twilio_client = create_twilio_client(twilio_config)
         super().__init__(
             self.output_device,
-            transcriber_config
-            or DeepgramTranscriberConfig(
-                sampling_rate=8000,
-                audio_encoding=AudioEncoding.MULAW,
-                chunk_size=self.CHUNK_SIZE,
-                model="voicemail",
-                endpointing_config=PunctuationEndpointingConfig(),
-            ),
-            agent_config,
-            synthesizer_config
-            or AzureSynthesizerConfig(
-                sampling_rate=8000, audio_encoding=AudioEncoding.MULAW
-            ),
+            create_transcriber(transcriber_config),
+            create_agent(agent_config),
+            create_synthesizer(synthesizer_config),
             conversation_id=conversation_id,
             per_chunk_allowance_seconds=0.01,
             logger=logger,