From a9424ffaeb8e5b33926255f4f844c4a78c1cef8c Mon Sep 17 00:00:00 2001 From: Ajay Raj Date: Tue, 28 Mar 2023 12:39:49 -0700 Subject: [PATCH] streaming conversation takes instances not configs --- examples/streaming_conversation.py | 22 ++++++++++------ examples/telephony_app.py | 2 +- vocode/streaming/agent/chat_gpt_agent.py | 13 ++++++---- vocode/streaming/streaming_conversation.py | 13 +++++----- .../streaming/telephony/conversation/call.py | 26 +++++++------------ 5 files changed, 40 insertions(+), 36 deletions(-) diff --git a/examples/streaming_conversation.py b/examples/streaming_conversation.py index 662fce3..7fee974 100644 --- a/examples/streaming_conversation.py +++ b/examples/streaming_conversation.py @@ -46,17 +46,23 @@ async def main(): conversation = StreamingConversation( output_device=speaker_output, - transcriber_config=DeepgramTranscriberConfig.from_input_device( - microphone_input, endpointing_config=PunctuationEndpointingConfig() + transcriber=DeepgramTranscriber( + DeepgramTranscriberConfig.from_input_device( + microphone_input, endpointing_config=PunctuationEndpointingConfig() + ) ), - agent_config=ChatGPTAgentConfig( - initial_message=BaseMessage(text="What up"), - prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like + agent=ChatGPTAgent( + ChatGPTAgentConfig( + initial_message=BaseMessage(text="What up"), + prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like hella, down, fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""", - generate_responses=True, - cut_off_response=CutOffResponse(), + generate_responses=True, + cut_off_response=CutOffResponse(), + ) + ), + synthesizer=AzureSynthesizer( + AzureSynthesizerConfig.from_output_device(speaker_output) ), - synthesizer_config=AzureSynthesizerConfig.from_output_device(speaker_output), logger=logger, ) await conversation.start() diff --git a/examples/telephony_app.py b/examples/telephony_app.py index 1af1b39..8cc0f8e 100644 --- a/examples/telephony_app.py +++ b/examples/telephony_app.py @@ -6,7 +6,7 @@ from vocode import getenv load_dotenv() from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent -from vocode.streaming.models.agent import ChatGPTAgentConfig +from vocode.streaming.models.agent import ChatGPTAgentConfig, EchoAgentConfig from vocode.streaming.models.message import BaseMessage from vocode.streaming.models.telephony import TwilioConfig from vocode.streaming.telephony.config_manager.redis_config_manager import ( diff --git a/vocode/streaming/agent/chat_gpt_agent.py b/vocode/streaming/agent/chat_gpt_agent.py index d9c3603..c458586 100644 --- a/vocode/streaming/agent/chat_gpt_agent.py +++ b/vocode/streaming/agent/chat_gpt_agent.py @@ -142,14 +142,17 @@ class ChatGPTAgent(BaseAgent): if __name__ == "__main__": + from dotenv import load_dotenv + + load_dotenv() + agent = ChatGPTAgent( ChatGPTAgentConfig( - model_name="gpt-4", prompt_preamble="The assistant is having a pleasant conversation about life. If the user hasn't completed their thought, the assistant responds with 'PASS'", ) ) while True: - # response = agent.respond(input("Human: "))[0] - # print(f"AI: {response}") - for response in agent.generate_response(input("Human: ")): - print(f"AI: {response}") + response = agent.respond(input("Human: "))[0] + print(f"AI: {response}") + # for response in agent.generate_response(input("Human: ")): + # print(f"AI: {response}") diff --git a/vocode/streaming/streaming_conversation.py b/vocode/streaming/streaming_conversation.py index ce08a21..3dcf572 100644 --- a/vocode/streaming/streaming_conversation.py +++ b/vocode/streaming/streaming_conversation.py @@ -1,5 +1,6 @@ import asyncio from asyncio import Future +from ctypes import Union import queue from typing import Callable, Awaitable, Optional, Any import logging @@ -66,9 +67,9 @@ class StreamingConversation: def __init__( self, output_device: BaseOutputDevice, - transcriber_config: TranscriberConfig, - agent_config: AgentConfig, - synthesizer_config: SynthesizerConfig, + transcriber: BaseTranscriber, + agent: BaseAgent, + synthesizer: BaseSynthesizer, conversation_id: str = None, per_chunk_allowance_seconds: int = PER_CHUNK_ALLOWANCE_SECONDS, logger: Optional[logging.Logger] = None, @@ -76,11 +77,11 @@ class StreamingConversation: self.id = conversation_id or create_conversation_id() self.logger = logger or logging.getLogger(__name__) self.output_device = output_device - self.transcriber = create_transcriber(transcriber_config) + self.transcriber = transcriber self.transcriber.set_on_response(self.on_transcription_response) self.transcriber_task = None - self.agent = create_agent(agent_config) - self.synthesizer = create_synthesizer(synthesizer_config) + self.agent = agent + self.synthesizer = synthesizer self.synthesizer_event_loop = asyncio.new_event_loop() self.synthesizer_thread = threading.Thread( name="synthesizer", diff --git a/vocode/streaming/telephony/conversation/call.py b/vocode/streaming/telephony/conversation/call.py index 8426e0f..fd2ba85 100644 --- a/vocode/streaming/telephony/conversation/call.py +++ b/vocode/streaming/telephony/conversation/call.py @@ -11,22 +11,26 @@ from vocode.streaming.factory import ( create_synthesizer, create_transcriber, ) +from vocode.streaming.models.agent import AgentConfig from vocode.streaming.streaming_conversation import StreamingConversation from vocode.streaming.models.telephony import CallConfig, TwilioConfig from vocode.streaming.output_device.twilio_output_device import TwilioOutputDevice from vocode.streaming.models.synthesizer import ( AzureSynthesizerConfig, + SynthesizerConfig, ) from vocode.streaming.models.transcriber import ( DeepgramTranscriberConfig, PunctuationEndpointingConfig, + TranscriberConfig, ) from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer from vocode.streaming.synthesizer.base_synthesizer import BaseSynthesizer from vocode.streaming.telephony.config_manager.base_config_manager import ( BaseConfigManager, ) +from vocode.streaming.telephony.constants import DEFAULT_SAMPLING_RATE from vocode.streaming.telephony.twilio import create_twilio_client from vocode.streaming.models.audio_encoding import AudioEncoding from vocode.streaming.streaming_conversation import StreamingConversation @@ -43,9 +47,9 @@ class Call(StreamingConversation): self, base_url: str, config_manager: BaseConfigManager, - agent_config: BaseAgent, - transcriber_config: Optional[BaseTranscriber] = None, - synthesizer_config: Optional[BaseSynthesizer] = None, + agent_config: AgentConfig, + transcriber_config: TranscriberConfig, + synthesizer_config: SynthesizerConfig, twilio_config: Optional[TwilioConfig] = None, twilio_sid: Optional[str] = None, conversation_id: Optional[str] = None, @@ -61,19 +65,9 @@ class Call(StreamingConversation): self.twilio_client = create_twilio_client(twilio_config) super().__init__( self.output_device, - transcriber_config - or DeepgramTranscriberConfig( - sampling_rate=8000, - audio_encoding=AudioEncoding.MULAW, - chunk_size=self.CHUNK_SIZE, - model="voicemail", - endpointing_config=PunctuationEndpointingConfig(), - ), - agent_config, - synthesizer_config - or AzureSynthesizerConfig( - sampling_rate=8000, audio_encoding=AudioEncoding.MULAW - ), + create_transcriber(transcriber_config), + create_agent(agent_config), + create_synthesizer(synthesizer_config), conversation_id=conversation_id, per_chunk_allowance_seconds=0.01, logger=logger,