open source

This commit is contained in:
Ajay Raj 2023-03-28 00:15:34 -07:00
commit a93bfc1ec9
61 changed files with 4013 additions and 126 deletions

View file

@ -0,0 +1,6 @@
from vocode.streaming.telephony.hosted.inbound_call_server import InboundCallServer
from vocode.streaming.models.agent import EchoAgentConfig
if __name__ == "__main__":
server = InboundCallServer(agent_config=EchoAgentConfig(initial_message="hello!"))
server.run(port=3001)

View file

@ -0,0 +1,35 @@
from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
from vocode.streaming.output_device.telephone_output import TelephoneOutput
from vocode.streaming.telephony.hosted.outbound_call import OutboundCall
from vocode.streaming.models.telephony import CallEntity
from vocode.streaming.models.agent import (
EchoAgentConfig,
ChatGPTAgentConfig,
WebSocketUserImplementedAgentConfig,
)
from vocode.streaming.models.message import BaseMessage
from vocode.streaming.telephony.hosted.zoom_dial_in import ZoomDialIn
if __name__ == "__main__":
call = ZoomDialIn(
recipient=CallEntity(phone_number="<your zoom phone number>"),
caller=CallEntity(
phone_number="<your phone number>",
),
zoom_meeting_id="<your zoom meeting id>",
zoom_meeting_password="<your zoom meeting password>",
agent_config=ChatGPTAgentConfig(
initial_message=BaseMessage(text="the quick fox jumped over the lazy dog "),
prompt_preamble="respond two sentences at a time",
generate_responses=True,
end_conversation_on_goodbye=True,
send_filler_audio=True,
allowed_idle_time_seconds=30,
),
synthesizer_config=AzureSynthesizerConfig.from_output_device(
output_device=TelephoneOutput(), voice_name="en-US-JennyNeural"
),
)
call.start()
input("Press enter to end the call...")
call.end()

View file

@ -0,0 +1,55 @@
import asyncio
import logging
import signal
from dotenv import load_dotenv
import os
from vocode.streaming.hosted_streaming_conversation import HostedStreamingConversation
from vocode.streaming.streaming_conversation import StreamingConversation
from vocode.helpers import create_microphone_input_and_speaker_output
from vocode.streaming.models.transcriber import (
DeepgramTranscriberConfig,
PunctuationEndpointingConfig,
GoogleTranscriberConfig,
)
from vocode.streaming.models.agent import (
ChatGPTAgentConfig,
CutOffResponse,
FillerAudioConfig,
RESTfulUserImplementedAgentConfig,
WebSocketUserImplementedAgentConfig,
EchoAgentConfig,
LLMAgentConfig,
ChatGPTAgentConfig,
)
from vocode.streaming.models.message import BaseMessage
from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
import vocode
load_dotenv()
vocode.api_key = os.getenv("VOCODE_API_KEY")
logging.basicConfig()
logging.root.setLevel(logging.INFO)
if __name__ == "__main__":
microphone_input, speaker_output = create_microphone_input_and_speaker_output(
streaming=True, use_default_devices=False
)
conversation = HostedStreamingConversation(
input_device=microphone_input,
output_device=speaker_output,
transcriber_config=DeepgramTranscriberConfig.from_input_device(
microphone_input
),
agent_config=ChatGPTAgentConfig(
initial_message=BaseMessage(text="Hello!"),
prompt_preamble="The AI is having a pleasant conversation about life",
generate_responses=True,
cut_off_response=CutOffResponse(),
),
synthesizer_config=AzureSynthesizerConfig.from_output_device(speaker_output),
)
signal.signal(signal.SIGINT, lambda _0, _1: conversation.deactivate())
asyncio.run(conversation.start())

View file

@ -0,0 +1,79 @@
import asyncio
import logging
import signal
from dotenv import load_dotenv
import os
from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
from vocode.streaming.streaming_conversation import StreamingConversation
from vocode.helpers import create_microphone_input_and_speaker_output
from vocode.streaming.models.transcriber import (
DeepgramTranscriberConfig,
PunctuationEndpointingConfig,
GoogleTranscriberConfig,
)
from vocode.streaming.models.agent import (
ChatGPTAgentConfig,
CutOffResponse,
FillerAudioConfig,
RESTfulUserImplementedAgentConfig,
WebSocketUserImplementedAgentConfig,
EchoAgentConfig,
LLMAgentConfig,
ChatGPTAgentConfig,
)
from vocode.streaming.models.message import BaseMessage
from vocode.streaming.models.synthesizer import (
AzureSynthesizerConfig,
GoogleSynthesizerConfig,
RimeSynthesizerConfig,
)
import vocode
from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
load_dotenv()
vocode.api_key = os.getenv("VOCODE_API_KEY")
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
async def main():
microphone_input, speaker_output = create_microphone_input_and_speaker_output(
streaming=True, use_default_devices=False
)
conversation = StreamingConversation(
output_device=speaker_output,
transcriber=DeepgramTranscriber(
DeepgramTranscriberConfig.from_input_device(
microphone_input, endpointing_config=PunctuationEndpointingConfig()
)
),
agent=ChatGPTAgent(
ChatGPTAgentConfig(
initial_message=BaseMessage(text="What up"),
prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
hella, down, fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
generate_responses=True,
cut_off_response=CutOffResponse(),
)
),
synthesizer=AzureSynthesizer(
AzureSynthesizerConfig.from_output_device(speaker_output),
),
logger=logger,
)
await conversation.start()
print("Conversation started, press Ctrl+C to end")
signal.signal(signal.SIGINT, lambda _0, _1: conversation.terminate())
while conversation.is_active():
chunk = microphone_input.get_audio()
if chunk:
conversation.receive_audio(chunk)
await asyncio.sleep(0)
if __name__ == "__main__":
asyncio.run(main())

69
examples/telephony_app.py Normal file
View file

@ -0,0 +1,69 @@
import logging
from fastapi import FastAPI
import os
from dotenv import load_dotenv
load_dotenv()
from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
from vocode.streaming.models.agent import ChatGPTAgentConfig
from vocode.streaming.models.message import BaseMessage
from vocode.streaming.models.telephony import TwilioConfig
from vocode.streaming.telephony.config_manager.redis_config_manager import (
RedisConfigManager,
)
from vocode.streaming.telephony.conversation.outbound_call import OutboundCall
from vocode.streaming.telephony.server.base import InboundCallConfig, TelephonyServer
app = FastAPI(docs_url=None)
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
config_manager = RedisConfigManager()
BASE_URL = "59b8e140372d.ngrok.app"
telephony_server = TelephonyServer(
base_url=BASE_URL,
config_manager=config_manager,
inbound_call_configs=[
InboundCallConfig(
url="/inbound_call",
agent_config=ChatGPTAgentConfig(
initial_message=BaseMessage(text="What up"),
prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
hella, down, fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
generate_responses=True,
),
twilio_config=TwilioConfig(
account_sid=os.getenv("TWILIO_ACCOUNT_SID"),
auth_token=os.getenv("TWILIO_AUTH_TOKEN"),
),
)
],
logger=logger,
)
app.include_router(telephony_server.get_router())
# outbound_call = OutboundCall(
# base_url=BASE_URL,
# to_phone="+14088926228",
# from_phone="+14086600744",
# config_manager=config_manager,
# agent_config=ChatGPTAgentConfig(
# initial_message=BaseMessage(text="What up"),
# prompt_preamble="""You are a helpful gen Z AI assistant. You use slang like um, but, and like a LOT. All of your responses are 10 words or less. Be super chill, use slang like
# hella, down, fire, totally, but like, slay, vibing, queen, go off, bet, sus, simp, cap, big yikes, main character, dank""",
# generate_responses=True,
# ),
# twilio_config=TwilioConfig(
# account_sid=os.getenv("TWILIO_ACCOUNT_SID"),
# auth_token=os.getenv("TWILIO_AUTH_TOKEN"),
# ),
# logger=logger,
# )
# outbound_call.start()

View file

@ -0,0 +1,48 @@
import logging
from dotenv import load_dotenv
import os
from vocode.helpers import create_microphone_input_and_speaker_output
from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer
from vocode.turn_based.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
from vocode.turn_based.turn_based_conversation import TurnBasedConversation
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
load_dotenv()
# See https://api.elevenlabs.io/v1/voices
ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB"
if __name__ == "__main__":
microphone_input, speaker_output = create_microphone_input_and_speaker_output(
streaming=False, use_default_devices=False
)
conversation = TurnBasedConversation(
input_device=microphone_input,
output_device=speaker_output,
transcriber=WhisperTranscriber(api_key=os.getenv("OPENAI_API_KEY")),
agent=ChatGPTAgent(
system_prompt="The AI is having a pleasant conversation about life",
initial_message="Hello!",
api_key=os.getenv("OPENAI_API_KEY"),
),
synthesizer=ElevenLabsSynthesizer(
voice_id=ADAM_VOICE_ID,
api_key=os.getenv("ELEVEN_LABS_API_KEY"),
),
logger=logger,
)
print("Starting conversation. Press Ctrl+C to exit.")
while True:
try:
input("Press enter to start recording...")
conversation.start_speech()
input("Press enter to end recording...")
conversation.end_speech_and_respond()
except KeyboardInterrupt:
break

View file

@ -0,0 +1,48 @@
from typing import AsyncGenerator
from vocode.streaming.user_implemented_agent.restful_agent import RESTfulAgent
from vocode.streaming.models.agent import (
RESTfulAgentOutput,
RESTfulAgentText,
RESTfulAgentEnd,
WebSocketAgentMessage,
WebSocketAgentTextEndMessage,
WebSocketAgentTextMessage,
WebSocketAgentStopMessage,
)
from vocode.streaming.user_implemented_agent.websocket_agent import WebSocketAgent
class TestRESTfulAgent(RESTfulAgent):
async def respond(self, input: str, conversation_id: str) -> RESTfulAgentOutput:
print(input, conversation_id)
if "bye" in input:
return RESTfulAgentEnd()
else:
spelt = "".join(i + j for i, j in zip(input, " " * len(input)))
return RESTfulAgentText(response=spelt)
class TestWebSocketAgent(WebSocketAgent):
async def respond(self, input: str, conversation_id: str) -> WebSocketAgentMessage:
print(input, conversation_id)
if "bye" in input:
return WebSocketAgentStopMessage()
else:
spelt = "".join(i + j for i, j in zip(input, " " * len(input)))
return WebSocketAgentTextMessage.from_text(spelt)
async def generate_response(
self, input: str, conversation_id: str
) -> AsyncGenerator[WebSocketAgentMessage, None]:
print(input, conversation_id)
if "bye" in input:
yield WebSocketAgentTextEndMessage()
else:
for word in input.split():
yield WebSocketAgentTextMessage.from_text(word)
yield WebSocketAgentTextEndMessage()
if __name__ == "__main__":
agent = TestWebSocketAgent(generate_responses=True)
agent.run(port=3001)