vocode-python/vocode/streaming/utils/goodbye_model.py
2023-03-28 10:29:42 -07:00

76 lines
2 KiB
Python

import os
import asyncio
import openai
import numpy as np
import requests
from vocode import getenv
SIMILARITY_THRESHOLD = 0.9
EMBEDDING_SIZE = 1536
GOODBYE_PHRASES = [
"bye",
"goodbye",
"see you",
"see you later",
"talk to you later",
"talk to you soon",
"have a good day",
"have a good night",
]
class GoodbyeModel:
def __init__(
self,
embeddings_cache_path=os.path.join(
os.path.dirname(__file__), "goodbye_embeddings"
),
):
openai.api_key = getenv("OPENAI_API_KEY")
self.goodbye_embeddings = self.load_or_create_embeddings(
f"{embeddings_cache_path}/goodbye_embeddings.npy"
)
def load_or_create_embeddings(self, path):
if os.path.exists(path):
return np.load(path)
else:
embeddings = self.create_embeddings()
np.save(path, embeddings)
return embeddings
def create_embeddings(self):
print("Creating embeddings...")
size = EMBEDDING_SIZE
embeddings = np.empty((size, len(GOODBYE_PHRASES)))
for i, goodbye_phrase in enumerate(GOODBYE_PHRASES):
embeddings[:, i] = self.create_embedding(goodbye_phrase)
return embeddings
async def is_goodbye(self, text: str) -> bool:
if "bye" in text.lower():
return True
embedding = self.create_embedding(text.strip().lower())
similarity_results = embedding @ self.goodbye_embeddings
return np.max(similarity_results) > SIMILARITY_THRESHOLD
def create_embedding(self, text) -> np.array:
return np.array(
openai.Embedding.create(input=text, model="text-embedding-ada-002")["data"][
0
]["embedding"]
)
if __name__ == "__main__":
from dotenv import load_dotenv
load_dotenv()
async def main():
model = GoodbyeModel()
while True:
print(await model.is_goodbye(input("Text: ")))
asyncio.run(main())