diff --git a/simple_turn_based_conversation.py b/simple_turn_based_conversation.py index 5f6e1a1..67856b6 100644 --- a/simple_turn_based_conversation.py +++ b/simple_turn_based_conversation.py @@ -8,9 +8,11 @@ from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber from vocode.turn_based.turn_based_conversation import TurnBasedConversation -load_dotenv() -vocode.api_key = os.getenv("VOCODE_API_KEY") +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +load_dotenv() if __name__ == "__main__": microphone_input, speaker_output = create_microphone_input_and_speaker_output( @@ -26,8 +28,14 @@ if __name__ == "__main__": initial_message="Hello!", ), synthesizer=AzureSynthesizer(sampling_rate=speaker_output.sampling_rate), + logger=logger, ) + print("Starting conversation. Press Ctrl+C to exit.") while True: - conversation.start_speech() - input("Press enter to end speech") - conversation.end_speech_and_respond() + try: + input("Press enter to start recording...") + conversation.start_speech() + input("Press enter to end recording...") + conversation.end_speech_and_respond() + except KeyboardInterrupt: + break diff --git a/vocode/turn_based/turn_based_conversation.py b/vocode/turn_based/turn_based_conversation.py index 13b9b8b..e14f37f 100644 --- a/vocode/turn_based/turn_based_conversation.py +++ b/vocode/turn_based/turn_based_conversation.py @@ -1,3 +1,4 @@ +import logging from vocode.turn_based.agent.base_agent import BaseAgent from vocode.turn_based.input_device.base_input_device import ( BaseInputDevice, @@ -15,6 +16,7 @@ class TurnBasedConversation: agent: BaseAgent, synthesizer: BaseSynthesizer, output_device: BaseOutputDevice, + logger: logging.Logger = None, ): self.input_device = input_device self.transcriber = transcriber @@ -22,6 +24,7 @@ class TurnBasedConversation: self.synthesizer = synthesizer self.output_device = output_device self.maybe_play_initial_message() + self.logger = logger or logging.getLogger(__name__) def maybe_play_initial_message(self): if self.agent.initial_message: @@ -34,5 +37,7 @@ class TurnBasedConversation: def end_speech_and_respond(self): human_input = self.transcriber.transcribe(self.input_device.end_listening()) + self.logger.info(f"Transcription: {human_input}") agent_response = self.agent.respond(human_input) + self.logger.info(f"Agent response: {agent_response}") self.output_device.send_audio(self.synthesizer.synthesize(agent_response))