From 97c7a4f8acf702a1060717da175037a50039cfff Mon Sep 17 00:00:00 2001 From: Ajay Raj Date: Mon, 20 Mar 2023 20:37:39 -0700 Subject: [PATCH] integrate eleven labs --- pyproject.toml | 2 +- simple_turn_based_conversation.py | 11 +++++---- .../synthesizer/eleven_labs_synthesizer.py | 24 +++++++++++++++++++ 3 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 vocode/turn_based/synthesizer/eleven_labs_synthesizer.py diff --git a/pyproject.toml b/pyproject.toml index 213969d..c99b6ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "vocode" -version = "0.1.48" +version = "0.1.49" description = "The all-in-one voice SDK" authors = ["Ajay Raj "] license = "MIT License" diff --git a/simple_turn_based_conversation.py b/simple_turn_based_conversation.py index 1cadb6c..9681aaf 100644 --- a/simple_turn_based_conversation.py +++ b/simple_turn_based_conversation.py @@ -4,6 +4,7 @@ import os from vocode.helpers import create_microphone_input_and_speaker_output from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer +from vocode.turn_based.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber from vocode.turn_based.turn_based_conversation import TurnBasedConversation @@ -13,6 +14,9 @@ logger.setLevel(logging.INFO) load_dotenv() +# See https://api.elevenlabs.io/v1/voices +ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB" + if __name__ == "__main__": microphone_input, speaker_output = create_microphone_input_and_speaker_output( streaming=False, use_default_devices=False @@ -27,10 +31,9 @@ if __name__ == "__main__": initial_message="Hello!", api_key=os.getenv("OPENAI_API_KEY"), ), - synthesizer=AzureSynthesizer( - sampling_rate=speaker_output.sampling_rate, - api_key=os.getenv("AZURE_SPEECH_KEY"), - region=os.getenv("AZURE_SPEECH_REGION"), + synthesizer=ElevenLabsSynthesizer( + voice_id=ADAM_VOICE_ID, + api_key=os.getenv("ELEVEN_LABS_API_KEY"), ), logger=logger, ) diff --git a/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py b/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py new file mode 100644 index 0000000..018b895 --- /dev/null +++ b/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py @@ -0,0 +1,24 @@ +import io +import os +from typing import Optional +from pydub import AudioSegment +import requests +from vocode.turn_based.synthesizer.base_synthesizer import BaseSynthesizer + +ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/" + + +class ElevenLabsSynthesizer(BaseSynthesizer): + def __init__(self, voice_id: str, api_key: Optional[str] = None): + self.voice_id = voice_id + self.api_key = os.getenv("ELEVEN_LABS_API_KEY", api_key) + + def synthesize(self, text: str) -> AudioSegment: + url = ELEVEN_LABS_BASE_URL + f"text-to-speech/{self.voice_id}" + headers = {"xi-api-key": self.api_key, "voice_id": self.voice_id} + body = { + "text": text, + } + response = requests.post(url, headers=headers, json=body) + assert response.ok, response.text + return AudioSegment.from_mp3(io.BytesIO(response.content))