open source
This commit is contained in:
parent
70b6e17c69
commit
a93bfc1ec9
61 changed files with 4013 additions and 126 deletions
44
vocode/streaming/agent/base_agent.py
Normal file
44
vocode/streaming/agent/base_agent.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
import random
|
||||
from typing import Generator, Optional
|
||||
from vocode.streaming.models.agent import (
|
||||
AgentConfig,
|
||||
ChatGPTAgentConfig,
|
||||
LLMAgentConfig,
|
||||
)
|
||||
|
||||
|
||||
class BaseAgent:
|
||||
def __init__(self, agent_config: AgentConfig):
|
||||
self.agent_config = agent_config
|
||||
|
||||
def get_agent_config(self) -> AgentConfig:
|
||||
return self.agent_config
|
||||
|
||||
def start(self):
|
||||
pass
|
||||
|
||||
def respond(
|
||||
self, human_input, is_interrupt: bool = False
|
||||
) -> tuple[Optional[str], bool]:
|
||||
raise NotImplementedError
|
||||
|
||||
def generate_response(
|
||||
self, human_input, is_interrupt: bool = False
|
||||
) -> Generator[str, None, None]:
|
||||
"""Returns a generator that yields a sentence at a time."""
|
||||
raise NotImplementedError
|
||||
|
||||
def update_last_bot_message_on_cut_off(self, message: str):
|
||||
"""Updates the last bot message in the conversation history when the human cuts off the bot's response."""
|
||||
pass
|
||||
|
||||
def get_cut_off_response(self) -> Optional[str]:
|
||||
assert isinstance(self.agent_config, LLMAgentConfig) or isinstance(
|
||||
self.agent_config, ChatGPTAgentConfig
|
||||
)
|
||||
on_cut_off_messages = self.agent_config.cut_off_response.messages
|
||||
if on_cut_off_messages:
|
||||
return random.choice(on_cut_off_messages).text
|
||||
|
||||
def terminate(self):
|
||||
pass
|
||||
50
vocode/streaming/agent/bot_sentiment_analyser.py
Normal file
50
vocode/streaming/agent/bot_sentiment_analyser.py
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
from typing import Optional
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.prompts import PromptTemplate
|
||||
from pydantic import BaseModel
|
||||
|
||||
TEMPLATE = """
|
||||
Read the following conversation classify the final emotion of the Bot as one of [{emotions}].
|
||||
Output the degree of emotion as a value between 0 and 1 in the format EMOTION,DEGREE: ex. {example_emotion},0.5
|
||||
|
||||
<start>
|
||||
{{transcript}}
|
||||
<end>
|
||||
"""
|
||||
|
||||
|
||||
class BotSentiment(BaseModel):
|
||||
emotion: Optional[str] = None
|
||||
degree: float = 0.0
|
||||
|
||||
|
||||
class BotSentimentAnalyser:
|
||||
def __init__(self, emotions: list[str], model_name: str = "text-davinci-003"):
|
||||
self.model_name = model_name
|
||||
self.llm = OpenAI(
|
||||
model_name=self.model_name,
|
||||
)
|
||||
assert len(emotions) > 0
|
||||
self.emotions = [e.lower() for e in emotions]
|
||||
self.prompt = PromptTemplate(
|
||||
input_variables=["transcript"],
|
||||
template=TEMPLATE.format(
|
||||
emotions=",".join(self.emotions), example_emotion=self.emotions[0]
|
||||
),
|
||||
)
|
||||
|
||||
def analyse(self, transcript: str) -> BotSentiment:
|
||||
prompt = self.prompt.format(transcript=transcript)
|
||||
response = self.llm(prompt).strip()
|
||||
tokens = response.split(",")
|
||||
if len(tokens) != 2:
|
||||
return BotSentiment(emotion=None, degree=0.0)
|
||||
emotion, degree = tokens
|
||||
emotion = emotion.strip().lower()
|
||||
if emotion.lower() not in self.emotions:
|
||||
return BotSentiment(emotion=None, degree=0.0)
|
||||
try:
|
||||
degree = float(degree.strip())
|
||||
except ValueError:
|
||||
return BotSentiment(emotion=emotion, degree=0.5)
|
||||
return BotSentiment(emotion=emotion, degree=degree)
|
||||
158
vocode/streaming/agent/chat_gpt_agent.py
Normal file
158
vocode/streaming/agent/chat_gpt_agent.py
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
import os
|
||||
import random
|
||||
import time
|
||||
from langchain.prompts import (
|
||||
ChatPromptTemplate,
|
||||
MessagesPlaceholder,
|
||||
SystemMessagePromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
)
|
||||
from langchain.chains import ConversationChain
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.llms import OpenAIChat
|
||||
from langchain.memory import ConversationBufferMemory
|
||||
from langchain.schema import ChatMessage, AIMessage
|
||||
import openai
|
||||
import json
|
||||
from typing import Generator, Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from typing import Generator
|
||||
import logging
|
||||
|
||||
from vocode.streaming.agent.base_agent import BaseAgent
|
||||
from vocode.streaming.models.agent import ChatGPTAgentConfig
|
||||
from vocode.streaming.utils.sse_client import SSEClient
|
||||
from vocode.streaming.agent.utils import stream_llm_response
|
||||
|
||||
load_dotenv()
|
||||
|
||||
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||
|
||||
|
||||
class ChatGPTAgent(BaseAgent):
|
||||
def __init__(self, agent_config: ChatGPTAgentConfig, logger: logging.Logger = None):
|
||||
super().__init__(agent_config)
|
||||
self.agent_config = agent_config
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
self.logger.setLevel(logging.DEBUG)
|
||||
self.prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
SystemMessagePromptTemplate.from_template(agent_config.prompt_preamble),
|
||||
MessagesPlaceholder(variable_name="history"),
|
||||
HumanMessagePromptTemplate.from_template("{input}"),
|
||||
]
|
||||
)
|
||||
self.memory = ConversationBufferMemory(return_messages=True)
|
||||
if agent_config.initial_message:
|
||||
if (
|
||||
agent_config.generate_responses
|
||||
): # we use ChatMessages for memory when we generate responses
|
||||
self.memory.chat_memory.messages.append(
|
||||
ChatMessage(
|
||||
content=agent_config.initial_message.text, role="assistant"
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.memory.chat_memory.add_ai_message(
|
||||
agent_config.initial_message.text
|
||||
)
|
||||
self.llm = ChatOpenAI(
|
||||
model_name=self.agent_config.model_name,
|
||||
temperature=self.agent_config.temperature,
|
||||
max_tokens=self.agent_config.max_tokens,
|
||||
)
|
||||
self.conversation = ConversationChain(
|
||||
memory=self.memory, prompt=self.prompt, llm=self.llm
|
||||
)
|
||||
self.first_response = (
|
||||
self.create_first_response(agent_config.expected_first_prompt)
|
||||
if agent_config.expected_first_prompt
|
||||
else None
|
||||
)
|
||||
self.is_first_response = True
|
||||
|
||||
def create_first_response(self, first_prompt):
|
||||
return self.conversation.predict(input=first_prompt)
|
||||
|
||||
def respond(self, human_input, is_interrupt: bool = False) -> tuple[str, bool]:
|
||||
if is_interrupt and self.agent_config.cut_off_response:
|
||||
cut_off_response = self.get_cut_off_response()
|
||||
self.memory.chat_memory.add_user_message(human_input)
|
||||
self.memory.chat_memory.add_ai_message(cut_off_response)
|
||||
return cut_off_response, False
|
||||
self.logger.debug("LLM responding to human input")
|
||||
if self.is_first_response and self.first_response:
|
||||
self.logger.debug("First response is cached")
|
||||
self.is_first_response = False
|
||||
text = self.first_response
|
||||
else:
|
||||
text = self.conversation.predict(input=human_input)
|
||||
self.logger.debug(f"LLM response: {text}")
|
||||
return text, False
|
||||
|
||||
def generate_response(
|
||||
self, human_input, is_interrupt: bool = False
|
||||
) -> Generator[str, None, None]:
|
||||
self.memory.chat_memory.messages.append(
|
||||
ChatMessage(role="user", content=human_input)
|
||||
)
|
||||
if is_interrupt and self.agent_config.cut_off_response:
|
||||
cut_off_response = self.get_cut_off_response()
|
||||
self.memory.chat_memory.messages.append(
|
||||
ChatMessage(role="assistant", content=cut_off_response)
|
||||
)
|
||||
yield cut_off_response
|
||||
return
|
||||
prompt_messages = [
|
||||
ChatMessage(role="system", content=self.agent_config.prompt_preamble)
|
||||
] + self.memory.chat_memory.messages
|
||||
messages = SSEClient(
|
||||
"POST",
|
||||
"https://api.openai.com/v1/chat/completions",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
|
||||
},
|
||||
json={
|
||||
"model": self.agent_config.model_name,
|
||||
"messages": [
|
||||
prompt_message.dict(include={"content": True, "role": True})
|
||||
for prompt_message in prompt_messages
|
||||
],
|
||||
"max_tokens": 256,
|
||||
"temperature": 1.0,
|
||||
"stream": True,
|
||||
},
|
||||
)
|
||||
bot_memory_message = ChatMessage(role="assistant", content="")
|
||||
self.memory.chat_memory.messages.append(bot_memory_message)
|
||||
for message in stream_llm_response(
|
||||
map(lambda event: json.loads(event.data), messages),
|
||||
get_text=lambda choice: choice.get("delta", {}).get("content"),
|
||||
):
|
||||
bot_memory_message.content = f"{bot_memory_message.content} {message}"
|
||||
yield message
|
||||
|
||||
def update_last_bot_message_on_cut_off(self, message: str):
|
||||
for memory_message in self.memory.chat_memory.messages[::-1]:
|
||||
if (
|
||||
isinstance(memory_message, ChatMessage)
|
||||
and memory_message.role == "assistant"
|
||||
) or isinstance(memory_message, AIMessage):
|
||||
memory_message.content = message
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
agent = ChatGPTAgent(
|
||||
ChatGPTAgentConfig(
|
||||
model_name="gpt-4",
|
||||
prompt_preamble="The assistant is having a pleasant conversation about life. If the user hasn't completed their thought, the assistant responds with 'PASS'",
|
||||
)
|
||||
)
|
||||
while True:
|
||||
# response = agent.respond(input("Human: "))[0]
|
||||
# print(f"AI: {response}")
|
||||
for response in agent.generate_response(input("Human: ")):
|
||||
print(f"AI: {response}")
|
||||
13
vocode/streaming/agent/echo_agent.py
Normal file
13
vocode/streaming/agent/echo_agent.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
from typing import Generator
|
||||
from vocode.streaming.agent.base_agent import BaseAgent
|
||||
|
||||
|
||||
class EchoAgent(BaseAgent):
|
||||
def respond(self, human_input, is_interrupt: bool = False) -> tuple[str, bool]:
|
||||
return human_input, False
|
||||
|
||||
def generate_response(self, human_input, is_interrupt: bool = False) -> Generator:
|
||||
yield human_input
|
||||
|
||||
def update_last_bot_message_on_cut_off(self, message: str):
|
||||
pass
|
||||
32
vocode/streaming/agent/information_retrieval_agent.py
Normal file
32
vocode/streaming/agent/information_retrieval_agent.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
import logging
|
||||
from typing import List
|
||||
|
||||
from langchain import OpenAI
|
||||
from vocode.streaming.agent.llm_agent import LLMAgent
|
||||
from ..models.agent import InformationRetrievalAgentConfig, LLMAgentConfig
|
||||
|
||||
|
||||
class InformationRetrievalAgent(LLMAgent):
|
||||
def __init__(
|
||||
self,
|
||||
agent_config: InformationRetrievalAgentConfig,
|
||||
logger: logging.Logger,
|
||||
):
|
||||
# super().__init__(agent_config, logger)
|
||||
prompt_preamble = f"""
|
||||
The AI is a friendly phone bot built for information retrieval. It understands IVR navigation and chooses which numbers to press based on the intended goal and the options provided.
|
||||
Once it reaches the human, it verifies the identity of the person it is trying to reach and states its purpose. If it needs to be transferred, then the AI asks to speak to the intended recipient of the phone call.
|
||||
|
||||
Here is the context for the call:
|
||||
Intended goal: { agent_config.goal_description }
|
||||
Intended recipient: { agent_config.recipient_descriptor }
|
||||
Information to be collected: { agent_config.fields }
|
||||
Information to provide to the person who answers the phone: this is a robot calling on behalf of { agent_config.caller_descriptor }
|
||||
|
||||
The AI begins the call by introducing itself and who it represents.
|
||||
"""
|
||||
agent_config = LLMAgentConfig(
|
||||
prompt_preamble=prompt_preamble,
|
||||
)
|
||||
super().__init__(agent_config, logger=logger)
|
||||
self.llm = OpenAI(model_name="text-davinci-003", temperature=1)
|
||||
139
vocode/streaming/agent/llm_agent.py
Normal file
139
vocode/streaming/agent/llm_agent.py
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
import re
|
||||
from typing import Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from langchain import OpenAI
|
||||
from langchain.llms import OpenAIChat
|
||||
from typing import Generator
|
||||
import logging
|
||||
|
||||
from vocode.streaming.agent.base_agent import BaseAgent
|
||||
from vocode.streaming.agent.utils import stream_llm_response
|
||||
from vocode.streaming.models.agent import LLMAgentConfig
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class LLMAgent(BaseAgent):
|
||||
SENTENCE_ENDINGS = [".", "!", "?"]
|
||||
|
||||
DEFAULT_PROMPT_TEMPLATE = "{history}\nHuman: {human_input}\nAI:"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
agent_config: LLMAgentConfig,
|
||||
logger: logging.Logger = None,
|
||||
sender="AI",
|
||||
recipient="Human",
|
||||
):
|
||||
super().__init__(agent_config)
|
||||
self.agent_config = agent_config
|
||||
self.prompt_template = (
|
||||
f"{agent_config.prompt_preamble}\n\n{self.DEFAULT_PROMPT_TEMPLATE}"
|
||||
)
|
||||
self.initial_bot_message = (
|
||||
agent_config.initial_message.text if agent_config.initial_message else None
|
||||
)
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
self.sender = sender
|
||||
self.recipient = recipient
|
||||
self.memory = (
|
||||
[f"AI: {agent_config.initial_message.text}"]
|
||||
if agent_config.initial_message
|
||||
else []
|
||||
)
|
||||
self.llm = OpenAI(
|
||||
model_name=self.agent_config.model_name,
|
||||
temperature=self.agent_config.temperature,
|
||||
max_tokens=self.agent_config.max_tokens,
|
||||
)
|
||||
self.stop_tokens = [f"{recipient}:"]
|
||||
self.first_response = (
|
||||
self.llm(
|
||||
self.prompt_template.format(
|
||||
history="", human_input=agent_config.expected_first_prompt
|
||||
),
|
||||
stop=self.stop_tokens,
|
||||
).strip()
|
||||
if agent_config.expected_first_prompt
|
||||
else None
|
||||
)
|
||||
self.is_first_response = True
|
||||
|
||||
def create_prompt(self, human_input):
|
||||
history = "\n".join(self.memory[-5:])
|
||||
return self.prompt_template.format(history=history, human_input=human_input)
|
||||
|
||||
def get_memory_entry(self, human_input, response):
|
||||
return f"{self.recipient}: {human_input}\n{self.sender}: {response}"
|
||||
|
||||
def respond(self, human_input, is_interrupt: bool = False) -> tuple[str, bool]:
|
||||
if is_interrupt and self.agent_config.cut_off_response:
|
||||
cut_off_response = self.get_cut_off_response()
|
||||
self.memory.append(self.get_memory_entry(human_input, cut_off_response))
|
||||
return cut_off_response, False
|
||||
self.logger.debug("LLM responding to human input")
|
||||
if self.is_first_response and self.first_response:
|
||||
self.logger.debug("First response is cached")
|
||||
self.is_first_response = False
|
||||
response = self.first_response
|
||||
else:
|
||||
response = self.llm(self.create_prompt(human_input), stop=self.stop_tokens)
|
||||
response = response.replace(f"{self.sender}:", "")
|
||||
self.memory.append(self.get_memory_entry(human_input, response))
|
||||
self.logger.debug(f"LLM response: {response}")
|
||||
return response, False
|
||||
|
||||
def generate_response(self, human_input, is_interrupt: bool = False) -> Generator:
|
||||
self.logger.debug("LLM generating response to human input")
|
||||
if is_interrupt and self.agent_config.cut_off_response:
|
||||
cut_off_response = self.get_cut_off_response()
|
||||
self.memory.append(self.get_memory_entry(human_input, cut_off_response))
|
||||
yield cut_off_response
|
||||
return
|
||||
self.memory.append(self.get_memory_entry(human_input, ""))
|
||||
if self.is_first_response and self.first_response:
|
||||
self.logger.debug("First response is cached")
|
||||
self.is_first_response = False
|
||||
sentences = [self.first_response]
|
||||
else:
|
||||
self.logger.debug("Creating LLM prompt")
|
||||
prompt = self.create_prompt(human_input)
|
||||
self.logger.debug("Streaming LLM response")
|
||||
sentences = stream_llm_response(
|
||||
map(
|
||||
lambda resp: resp.to_dict(),
|
||||
self.llm.stream(prompt, stop=self.stop_tokens),
|
||||
)
|
||||
)
|
||||
response_buffer = ""
|
||||
for sentence in sentences:
|
||||
sentence = sentence.replace(f"{self.sender}:", "")
|
||||
sentence = re.sub(r"^\s+(.*)", r" \1", sentence)
|
||||
response_buffer += sentence
|
||||
self.memory[-1] = self.get_memory_entry(human_input, response_buffer)
|
||||
yield sentence
|
||||
|
||||
def update_last_bot_message_on_cut_off(self, message: str):
|
||||
last_message = self.memory[-1]
|
||||
new_last_message = (
|
||||
last_message.split("\n", 1)[0] + f"\n{self.sender}: {message}"
|
||||
)
|
||||
self.memory[-1] = new_last_message
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
chat_responder = LLMAgent(
|
||||
LLMAgentConfig(
|
||||
prompt_preamble="""
|
||||
The AI is having a pleasant conversation about life. If the human hasn't completed their thought, the AI responds with 'PASS'
|
||||
|
||||
{history}
|
||||
Human: {human_input}
|
||||
AI:""",
|
||||
)
|
||||
)
|
||||
while True:
|
||||
# response = chat_responder.respond(input("Human: "))[0]
|
||||
for response in chat_responder.generate_response(input("Human: ")):
|
||||
print(f"AI: {response}")
|
||||
25
vocode/streaming/agent/utils.py
Normal file
25
vocode/streaming/agent/utils.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
from typing import Generator
|
||||
|
||||
SENTENCE_ENDINGS = [".", "!", "?"]
|
||||
|
||||
|
||||
def stream_llm_response(
|
||||
gen, get_text=lambda choice: choice.get("text"), sentence_endings=SENTENCE_ENDINGS
|
||||
) -> Generator:
|
||||
buffer = ""
|
||||
for response in gen:
|
||||
choices = response.get("choices", [])
|
||||
if len(choices) == 0:
|
||||
break
|
||||
choice = choices[0]
|
||||
if choice["finish_reason"]:
|
||||
break
|
||||
token = get_text(choice)
|
||||
if not token:
|
||||
continue
|
||||
buffer += token
|
||||
if any(token.endswith(ending) for ending in sentence_endings):
|
||||
yield buffer.strip()
|
||||
buffer = ""
|
||||
if buffer.strip():
|
||||
yield buffer
|
||||
Loading…
Add table
Add a link
Reference in a new issue