streaming conversation takes instances not configs

2023-03-28 12:39:49 -07:00 · 2023-03-28 12:39:49 -07:00 · a9424ffaeb
commit a9424ffaeb
parent 609de1f4a4
5 changed files with 40 additions and 36 deletions
--- a/examples/streaming_conversation.py
+++ b/examples/streaming_conversation.py
@ -46,17 +46,23 @@ async def main():
    conversation = StreamingConversation(
        output_device=speaker_output,
-        transcriber_config=DeepgramTranscriberConfig.from_input_device(
+        transcriber=DeepgramTranscriber(
-            microphone_input, endpointing_config=PunctuationEndpointingConfig()
+            DeepgramTranscriberConfig.from_input_device(
                microphone_input, endpointing_config=PunctuationEndpointingConfig()
            )
        ),
-        agent_config=ChatGPTAgentConfig(
+        agent=ChatGPTAgent(
-            initial_message=BaseMessage(text="What up"),
+            ChatGPTAgentConfig(
-            prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
+                initial_message=BaseMessage(text="What up"),
                prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
 hella, down,     fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
-            generate_responses=True,
+                generate_responses=True,
-            cut_off_response=CutOffResponse(),
+                cut_off_response=CutOffResponse(),
            )
        ),
        synthesizer=AzureSynthesizer(
            AzureSynthesizerConfig.from_output_device(speaker_output)
        ),
        synthesizer_config=AzureSynthesizerConfig.from_output_device(speaker_output),
        logger=logger,
    )
    await conversation.start()
--- a/examples/telephony_app.py
+++ b/examples/telephony_app.py
@ -6,7 +6,7 @@ from vocode import getenv
 load_dotenv()
 from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
-from vocode.streaming.models.agent import ChatGPTAgentConfig
+from vocode.streaming.models.agent import ChatGPTAgentConfig, EchoAgentConfig
 from vocode.streaming.models.message import BaseMessage
 from vocode.streaming.models.telephony import TwilioConfig
 from vocode.streaming.telephony.config_manager.redis_config_manager import (
--- a/vocode/streaming/agent/chat_gpt_agent.py
+++ b/vocode/streaming/agent/chat_gpt_agent.py
@ -142,14 +142,17 @@ class ChatGPTAgent(BaseAgent):
 if __name__ == "__main__":
    from dotenv import load_dotenv
    load_dotenv()
    agent = ChatGPTAgent(
        ChatGPTAgentConfig(
            model_name="gpt-4",
            prompt_preamble="The assistant is having a pleasant conversation about life. If the user hasn't completed their thought, the assistant responds with 'PASS'",
        )
    )
    while True:
-        # response = agent.respond(input("Human: "))[0]
+        response = agent.respond(input("Human: "))[0]
-        # print(f"AI: {response}")
+        print(f"AI: {response}")
-        for response in agent.generate_response(input("Human: ")):
+        # for response in agent.generate_response(input("Human: ")):
-            print(f"AI: {response}")
+        #     print(f"AI: {response}")
--- a/vocode/streaming/streaming_conversation.py
+++ b/vocode/streaming/streaming_conversation.py
@ -1,5 +1,6 @@
 import asyncio
 from asyncio import Future
 from ctypes import Union
 import queue
 from typing import Callable, Awaitable, Optional, Any
 import logging
@ -66,9 +67,9 @@ class StreamingConversation:
    def __init__(
        self,
        output_device: BaseOutputDevice,
-        transcriber_config: TranscriberConfig,
+        transcriber: BaseTranscriber,
-        agent_config: AgentConfig,
+        agent: BaseAgent,
-        synthesizer_config: SynthesizerConfig,
+        synthesizer: BaseSynthesizer,
        conversation_id: str = None,
        per_chunk_allowance_seconds: int = PER_CHUNK_ALLOWANCE_SECONDS,
        logger: Optional[logging.Logger] = None,
@ -76,11 +77,11 @@ class StreamingConversation:
        self.id = conversation_id or create_conversation_id()
        self.logger = logger or logging.getLogger(__name__)
        self.output_device = output_device
-        self.transcriber = create_transcriber(transcriber_config)
+        self.transcriber = transcriber
        self.transcriber.set_on_response(self.on_transcription_response)
        self.transcriber_task = None
-        self.agent = create_agent(agent_config)
+        self.agent = agent
-        self.synthesizer = create_synthesizer(synthesizer_config)
+        self.synthesizer = synthesizer
        self.synthesizer_event_loop = asyncio.new_event_loop()
        self.synthesizer_thread = threading.Thread(
            name="synthesizer",
--- a/vocode/streaming/telephony/conversation/call.py
+++ b/vocode/streaming/telephony/conversation/call.py
@ -11,22 +11,26 @@ from vocode.streaming.factory import (
    create_synthesizer,
    create_transcriber,
 )
 from vocode.streaming.models.agent import AgentConfig
 from vocode.streaming.streaming_conversation import StreamingConversation
 from vocode.streaming.models.telephony import CallConfig, TwilioConfig
 from vocode.streaming.output_device.twilio_output_device import TwilioOutputDevice
 from vocode.streaming.models.synthesizer import (
    AzureSynthesizerConfig,
    SynthesizerConfig,
 )
 from vocode.streaming.models.transcriber import (
    DeepgramTranscriberConfig,
    PunctuationEndpointingConfig,
    TranscriberConfig,
 )
 from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
 from vocode.streaming.synthesizer.base_synthesizer import BaseSynthesizer
 from vocode.streaming.telephony.config_manager.base_config_manager import (
    BaseConfigManager,
 )
 from vocode.streaming.telephony.constants import DEFAULT_SAMPLING_RATE
 from vocode.streaming.telephony.twilio import create_twilio_client
 from vocode.streaming.models.audio_encoding import AudioEncoding
 from vocode.streaming.streaming_conversation import StreamingConversation
@ -43,9 +47,9 @@ class Call(StreamingConversation):
        self,
        base_url: str,
        config_manager: BaseConfigManager,
-        agent_config: BaseAgent,
+        agent_config: AgentConfig,
-        transcriber_config: Optional[BaseTranscriber] = None,
+        transcriber_config: TranscriberConfig,
-        synthesizer_config: Optional[BaseSynthesizer] = None,
+        synthesizer_config: SynthesizerConfig,
        twilio_config: Optional[TwilioConfig] = None,
        twilio_sid: Optional[str] = None,
        conversation_id: Optional[str] = None,
@ -61,19 +65,9 @@ class Call(StreamingConversation):
        self.twilio_client = create_twilio_client(twilio_config)
        super().__init__(
            self.output_device,
-            transcriber_config
+            create_transcriber(transcriber_config),
-            or DeepgramTranscriberConfig(
+            create_agent(agent_config),
-                sampling_rate=8000,
+            create_synthesizer(synthesizer_config),
                audio_encoding=AudioEncoding.MULAW,
                chunk_size=self.CHUNK_SIZE,
                model="voicemail",
                endpointing_config=PunctuationEndpointingConfig(),
            ),
            agent_config,
            synthesizer_config
            or AzureSynthesizerConfig(
                sampling_rate=8000, audio_encoding=AudioEncoding.MULAW
            ),
            conversation_id=conversation_id,
            per_chunk_allowance_seconds=0.01,
            logger=logger,