integrate eleven labs

This commit is contained in:
Ajay Raj 2023-03-20 20:37:39 -07:00
commit 97c7a4f8ac
3 changed files with 32 additions and 5 deletions

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "vocode" name = "vocode"
version = "0.1.48" version = "0.1.49"
description = "The all-in-one voice SDK" description = "The all-in-one voice SDK"
authors = ["Ajay Raj <ajay@vocode.dev>"] authors = ["Ajay Raj <ajay@vocode.dev>"]
license = "MIT License" license = "MIT License"

View file

@ -4,6 +4,7 @@ import os
from vocode.helpers import create_microphone_input_and_speaker_output from vocode.helpers import create_microphone_input_and_speaker_output
from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer
from vocode.turn_based.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
from vocode.turn_based.turn_based_conversation import TurnBasedConversation from vocode.turn_based.turn_based_conversation import TurnBasedConversation
@ -13,6 +14,9 @@ logger.setLevel(logging.INFO)
load_dotenv() load_dotenv()
# See https://api.elevenlabs.io/v1/voices
ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB"
if __name__ == "__main__": if __name__ == "__main__":
microphone_input, speaker_output = create_microphone_input_and_speaker_output( microphone_input, speaker_output = create_microphone_input_and_speaker_output(
streaming=False, use_default_devices=False streaming=False, use_default_devices=False
@ -27,10 +31,9 @@ if __name__ == "__main__":
initial_message="Hello!", initial_message="Hello!",
api_key=os.getenv("OPENAI_API_KEY"), api_key=os.getenv("OPENAI_API_KEY"),
), ),
synthesizer=AzureSynthesizer( synthesizer=ElevenLabsSynthesizer(
sampling_rate=speaker_output.sampling_rate, voice_id=ADAM_VOICE_ID,
api_key=os.getenv("AZURE_SPEECH_KEY"), api_key=os.getenv("ELEVEN_LABS_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"),
), ),
logger=logger, logger=logger,
) )

View file

@ -0,0 +1,24 @@
import io
import os
from typing import Optional
from pydub import AudioSegment
import requests
from vocode.turn_based.synthesizer.base_synthesizer import BaseSynthesizer
ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/"
class ElevenLabsSynthesizer(BaseSynthesizer):
def __init__(self, voice_id: str, api_key: Optional[str] = None):
self.voice_id = voice_id
self.api_key = os.getenv("ELEVEN_LABS_API_KEY", api_key)
def synthesize(self, text: str) -> AudioSegment:
url = ELEVEN_LABS_BASE_URL + f"text-to-speech/{self.voice_id}"
headers = {"xi-api-key": self.api_key, "voice_id": self.voice_id}
body = {
"text": text,
}
response = requests.post(url, headers=headers, json=body)
assert response.ok, response.text
return AudioSegment.from_mp3(io.BytesIO(response.content))