vocode-python/vocode/streaming/utils/goodbye_model.py

80 lines
2.2 KiB
Python

import os
import asyncio
from typing import Optional
import openai
import numpy as np
import requests
from vocode import getenv
SIMILARITY_THRESHOLD = 0.9
EMBEDDING_SIZE = 1536
GOODBYE_PHRASES = [
"bye",
"goodbye",
"see you",
"see you later",
"talk to you later",
"talk to you soon",
"have a good day",
"have a good night",
]
class GoodbyeModel:
def __init__(
self,
embeddings_cache_path=os.path.join(
os.path.dirname(__file__), "goodbye_embeddings"
),
openai_api_key: Optional[str] = None,
):
openai.api_key = openai_api_key or getenv("OPENAI_API_KEY")
if not openai.api_key:
raise ValueError("OPENAI_API_KEY must be set in environment or passed in")
self.goodbye_embeddings = self.load_or_create_embeddings(
f"{embeddings_cache_path}/goodbye_embeddings.npy"
)
def load_or_create_embeddings(self, path):
if os.path.exists(path):
return np.load(path)
else:
embeddings = self.create_embeddings()
np.save(path, embeddings)
return embeddings
def create_embeddings(self):
print("Creating embeddings...")
size = EMBEDDING_SIZE
embeddings = np.empty((size, len(GOODBYE_PHRASES)))
for i, goodbye_phrase in enumerate(GOODBYE_PHRASES):
embeddings[:, i] = self.create_embedding(goodbye_phrase)
return embeddings
async def is_goodbye(self, text: str) -> bool:
if "bye" in text.lower():
return True
embedding = self.create_embedding(text.strip().lower())
similarity_results = embedding @ self.goodbye_embeddings
return np.max(similarity_results) > SIMILARITY_THRESHOLD
def create_embedding(self, text) -> np.array:
return np.array(
openai.Embedding.create(input=text, model="text-embedding-ada-002")["data"][
0
]["embedding"]
)
if __name__ == "__main__":
from dotenv import load_dotenv
load_dotenv()
async def main():
model = GoodbyeModel()
while True:
print(await model.is_goodbye(input("Text: ")))
asyncio.run(main())