open source

2023-03-28 00:15:34 -07:00 · 2023-03-28 00:15:34 -07:00 · a93bfc1ec9
commit a93bfc1ec9
parent 70b6e17c69
61 changed files with 4013 additions and 126 deletions
--- a/examples/hosted_inbound_call_server.py
+++ b/examples/hosted_inbound_call_server.py
@ -0,0 +1,6 @@
+from vocode.streaming.telephony.hosted.inbound_call_server import InboundCallServer
+from vocode.streaming.models.agent import EchoAgentConfig
+
+if __name__ == "__main__":
+    server = InboundCallServer(agent_config=EchoAgentConfig(initial_message="hello!"))
+    server.run(port=3001)
--- a/examples/hosted_outbound_call.py
+++ b/examples/hosted_outbound_call.py
@ -0,0 +1,35 @@
+from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
+from vocode.streaming.output_device.telephone_output import TelephoneOutput
+from vocode.streaming.telephony.hosted.outbound_call import OutboundCall
+from vocode.streaming.models.telephony import CallEntity
+from vocode.streaming.models.agent import (
+    EchoAgentConfig,
+    ChatGPTAgentConfig,
+    WebSocketUserImplementedAgentConfig,
+)
+from vocode.streaming.models.message import BaseMessage
+from vocode.streaming.telephony.hosted.zoom_dial_in import ZoomDialIn
+
+if __name__ == "__main__":
+    call = ZoomDialIn(
+        recipient=CallEntity(phone_number="<your zoom phone number>"),
+        caller=CallEntity(
+            phone_number="<your phone number>",
+        ),
+        zoom_meeting_id="<your zoom meeting id>",
+        zoom_meeting_password="<your zoom meeting password>",
+        agent_config=ChatGPTAgentConfig(
+            initial_message=BaseMessage(text="the quick fox jumped over the lazy dog "),
+            prompt_preamble="respond two sentences at a time",
+            generate_responses=True,
+            end_conversation_on_goodbye=True,
+            send_filler_audio=True,
+            allowed_idle_time_seconds=30,
+        ),
+        synthesizer_config=AzureSynthesizerConfig.from_output_device(
+            output_device=TelephoneOutput(), voice_name="en-US-JennyNeural"
+        ),
+    )
+    call.start()
+    input("Press enter to end the call...")
+    call.end()
--- a/examples/hosted_streaming_conversation.py
+++ b/examples/hosted_streaming_conversation.py
@ -0,0 +1,55 @@
+import asyncio
+import logging
+import signal
+from dotenv import load_dotenv
+import os
+from vocode.streaming.hosted_streaming_conversation import HostedStreamingConversation
+from vocode.streaming.streaming_conversation import StreamingConversation
+from vocode.helpers import create_microphone_input_and_speaker_output
+from vocode.streaming.models.transcriber import (
+    DeepgramTranscriberConfig,
+    PunctuationEndpointingConfig,
+    GoogleTranscriberConfig,
+)
+from vocode.streaming.models.agent import (
+    ChatGPTAgentConfig,
+    CutOffResponse,
+    FillerAudioConfig,
+    RESTfulUserImplementedAgentConfig,
+    WebSocketUserImplementedAgentConfig,
+    EchoAgentConfig,
+    LLMAgentConfig,
+    ChatGPTAgentConfig,
+)
+from vocode.streaming.models.message import BaseMessage
+from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
+import vocode
+
+load_dotenv()
+vocode.api_key = os.getenv("VOCODE_API_KEY")
+
+logging.basicConfig()
+logging.root.setLevel(logging.INFO)
+
+
+if __name__ == "__main__":
+    microphone_input, speaker_output = create_microphone_input_and_speaker_output(
+        streaming=True, use_default_devices=False
+    )
+
+    conversation = HostedStreamingConversation(
+        input_device=microphone_input,
+        output_device=speaker_output,
+        transcriber_config=DeepgramTranscriberConfig.from_input_device(
+            microphone_input
+        ),
+        agent_config=ChatGPTAgentConfig(
+            initial_message=BaseMessage(text="Hello!"),
+            prompt_preamble="The AI is having a pleasant conversation about life",
+            generate_responses=True,
+            cut_off_response=CutOffResponse(),
+        ),
+        synthesizer_config=AzureSynthesizerConfig.from_output_device(speaker_output),
+    )
+    signal.signal(signal.SIGINT, lambda _0, _1: conversation.deactivate())
+    asyncio.run(conversation.start())
--- a/examples/streaming_conversation.py
+++ b/examples/streaming_conversation.py
@ -0,0 +1,79 @@
+import asyncio
+import logging
+import signal
+from dotenv import load_dotenv
+import os
+from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
+from vocode.streaming.streaming_conversation import StreamingConversation
+from vocode.helpers import create_microphone_input_and_speaker_output
+from vocode.streaming.models.transcriber import (
+    DeepgramTranscriberConfig,
+    PunctuationEndpointingConfig,
+    GoogleTranscriberConfig,
+)
+from vocode.streaming.models.agent import (
+    ChatGPTAgentConfig,
+    CutOffResponse,
+    FillerAudioConfig,
+    RESTfulUserImplementedAgentConfig,
+    WebSocketUserImplementedAgentConfig,
+    EchoAgentConfig,
+    LLMAgentConfig,
+    ChatGPTAgentConfig,
+)
+from vocode.streaming.models.message import BaseMessage
+from vocode.streaming.models.synthesizer import (
+    AzureSynthesizerConfig,
+    GoogleSynthesizerConfig,
+    RimeSynthesizerConfig,
+)
+import vocode
+from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
+from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
+
+load_dotenv()
+vocode.api_key = os.getenv("VOCODE_API_KEY")
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+
+async def main():
+    microphone_input, speaker_output = create_microphone_input_and_speaker_output(
+        streaming=True, use_default_devices=False
+    )
+
+    conversation = StreamingConversation(
+        output_device=speaker_output,
+        transcriber=DeepgramTranscriber(
+            DeepgramTranscriberConfig.from_input_device(
+                microphone_input, endpointing_config=PunctuationEndpointingConfig()
+            )
+        ),
+        agent=ChatGPTAgent(
+            ChatGPTAgentConfig(
+                initial_message=BaseMessage(text="What up"),
+                prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
+hella, down,     fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
+                generate_responses=True,
+                cut_off_response=CutOffResponse(),
+            )
+        ),
+        synthesizer=AzureSynthesizer(
+            AzureSynthesizerConfig.from_output_device(speaker_output),
+        ),
+        logger=logger,
+    )
+    await conversation.start()
+    print("Conversation started, press Ctrl+C to end")
+    signal.signal(signal.SIGINT, lambda _0, _1: conversation.terminate())
+    while conversation.is_active():
+        chunk = microphone_input.get_audio()
+        if chunk:
+            conversation.receive_audio(chunk)
+        await asyncio.sleep(0)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/examples/telephony_app.py
+++ b/examples/telephony_app.py
@ -0,0 +1,69 @@
+import logging
+from fastapi import FastAPI
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
+from vocode.streaming.models.agent import ChatGPTAgentConfig
+from vocode.streaming.models.message import BaseMessage
+from vocode.streaming.models.telephony import TwilioConfig
+from vocode.streaming.telephony.config_manager.redis_config_manager import (
+    RedisConfigManager,
+)
+from vocode.streaming.telephony.conversation.outbound_call import OutboundCall
+
+from vocode.streaming.telephony.server.base import InboundCallConfig, TelephonyServer
+
+app = FastAPI(docs_url=None)
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+config_manager = RedisConfigManager()
+
+BASE_URL = "59b8e140372d.ngrok.app"
+
+telephony_server = TelephonyServer(
+    base_url=BASE_URL,
+    config_manager=config_manager,
+    inbound_call_configs=[
+        InboundCallConfig(
+            url="/inbound_call",
+            agent_config=ChatGPTAgentConfig(
+                initial_message=BaseMessage(text="What up"),
+                prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
+hella, down,     fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
+                generate_responses=True,
+            ),
+            twilio_config=TwilioConfig(
+                account_sid=os.getenv("TWILIO_ACCOUNT_SID"),
+                auth_token=os.getenv("TWILIO_AUTH_TOKEN"),
+            ),
+        )
+    ],
+    logger=logger,
+)
+
+app.include_router(telephony_server.get_router())
+
+# outbound_call = OutboundCall(
+#     base_url=BASE_URL,
+#     to_phone="+14088926228",
+#     from_phone="+14086600744",
+#     config_manager=config_manager,
+#     agent_config=ChatGPTAgentConfig(
+#         initial_message=BaseMessage(text="What up"),
+#         prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
+# hella, down,     fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
+#         generate_responses=True,
+#     ),
+#     twilio_config=TwilioConfig(
+#         account_sid=os.getenv("TWILIO_ACCOUNT_SID"),
+#         auth_token=os.getenv("TWILIO_AUTH_TOKEN"),
+#     ),
+#     logger=logger,
+# )
+# outbound_call.start()
--- a/examples/turn_based_conversation.py
+++ b/examples/turn_based_conversation.py
@ -0,0 +1,48 @@
+import logging
+from dotenv import load_dotenv
+import os
+from vocode.helpers import create_microphone_input_and_speaker_output
+from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
+from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer
+from vocode.turn_based.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
+from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
+from vocode.turn_based.turn_based_conversation import TurnBasedConversation
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+load_dotenv()
+
+# See https://api.elevenlabs.io/v1/voices
+ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB"
+
+if __name__ == "__main__":
+    microphone_input, speaker_output = create_microphone_input_and_speaker_output(
+        streaming=False, use_default_devices=False
+    )
+
+    conversation = TurnBasedConversation(
+        input_device=microphone_input,
+        output_device=speaker_output,
+        transcriber=WhisperTranscriber(api_key=os.getenv("OPENAI_API_KEY")),
+        agent=ChatGPTAgent(
+            system_prompt="The AI is having a pleasant conversation about life",
+            initial_message="Hello!",
+            api_key=os.getenv("OPENAI_API_KEY"),
+        ),
+        synthesizer=ElevenLabsSynthesizer(
+            voice_id=ADAM_VOICE_ID,
+            api_key=os.getenv("ELEVEN_LABS_API_KEY"),
+        ),
+        logger=logger,
+    )
+    print("Starting conversation. Press Ctrl+C to exit.")
+    while True:
+        try:
+            input("Press enter to start recording...")
+            conversation.start_speech()
+            input("Press enter to end recording...")
+            conversation.end_speech_and_respond()
+        except KeyboardInterrupt:
+            break
--- a/examples/user_implemented_agent.py
+++ b/examples/user_implemented_agent.py
@ -0,0 +1,48 @@
+from typing import AsyncGenerator
+from vocode.streaming.user_implemented_agent.restful_agent import RESTfulAgent
+from vocode.streaming.models.agent import (
+    RESTfulAgentOutput,
+    RESTfulAgentText,
+    RESTfulAgentEnd,
+    WebSocketAgentMessage,
+    WebSocketAgentTextEndMessage,
+    WebSocketAgentTextMessage,
+    WebSocketAgentStopMessage,
+)
+from vocode.streaming.user_implemented_agent.websocket_agent import WebSocketAgent
+
+
+class TestRESTfulAgent(RESTfulAgent):
+    async def respond(self, input: str, conversation_id: str) -> RESTfulAgentOutput:
+        print(input, conversation_id)
+        if "bye" in input:
+            return RESTfulAgentEnd()
+        else:
+            spelt = "".join(i + j for i, j in zip(input, " " * len(input)))
+            return RESTfulAgentText(response=spelt)
+
+
+class TestWebSocketAgent(WebSocketAgent):
+    async def respond(self, input: str, conversation_id: str) -> WebSocketAgentMessage:
+        print(input, conversation_id)
+        if "bye" in input:
+            return WebSocketAgentStopMessage()
+        else:
+            spelt = "".join(i + j for i, j in zip(input, " " * len(input)))
+            return WebSocketAgentTextMessage.from_text(spelt)
+
+    async def generate_response(
+        self, input: str, conversation_id: str
+    ) -> AsyncGenerator[WebSocketAgentMessage, None]:
+        print(input, conversation_id)
+        if "bye" in input:
+            yield WebSocketAgentTextEndMessage()
+        else:
+            for word in input.split():
+                yield WebSocketAgentTextMessage.from_text(word)
+            yield WebSocketAgentTextEndMessage()
+
+
+if __name__ == "__main__":
+    agent = TestWebSocketAgent(generate_responses=True)
+    agent.run(port=3001)