integrate eleven labs

2023-03-20 20:37:39 -07:00 · 2023-03-20 20:37:39 -07:00 · 97c7a4f8ac
commit 97c7a4f8ac
parent a01e3b2fb8
3 changed files with 32 additions and 5 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "vocode"
-version = "0.1.48"
+version = "0.1.49"
 description = "The all-in-one voice SDK"
 authors = ["Ajay Raj <ajay@vocode.dev>"]
 license = "MIT License"
--- a/simple_turn_based_conversation.py
+++ b/simple_turn_based_conversation.py
@ -4,6 +4,7 @@ import os
 from vocode.helpers import create_microphone_input_and_speaker_output
 from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
 from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer
 from vocode.turn_based.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
 from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
 from vocode.turn_based.turn_based_conversation import TurnBasedConversation
@ -13,6 +14,9 @@ logger.setLevel(logging.INFO)
 load_dotenv()
 # See https://api.elevenlabs.io/v1/voices
 ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB"
 if __name__ == "__main__":
    microphone_input, speaker_output = create_microphone_input_and_speaker_output(
        streaming=False, use_default_devices=False
@ -27,10 +31,9 @@ if __name__ == "__main__":
            initial_message="Hello!",
            api_key=os.getenv("OPENAI_API_KEY"),
        ),
-        synthesizer=AzureSynthesizer(
+        synthesizer=ElevenLabsSynthesizer(
-            sampling_rate=speaker_output.sampling_rate,
+            voice_id=ADAM_VOICE_ID,
-            api_key=os.getenv("AZURE_SPEECH_KEY"),
+            api_key=os.getenv("ELEVEN_LABS_API_KEY"),
            region=os.getenv("AZURE_SPEECH_REGION"),
        ),
        logger=logger,
    )
--- a/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py
+++ b/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py
@ -0,0 +1,24 @@
 import io
 import os
 from typing import Optional
 from pydub import AudioSegment
 import requests
 from vocode.turn_based.synthesizer.base_synthesizer import BaseSynthesizer
 ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/"
 class ElevenLabsSynthesizer(BaseSynthesizer):
    def __init__(self, voice_id: str, api_key: Optional[str] = None):
        self.voice_id = voice_id
        self.api_key = os.getenv("ELEVEN_LABS_API_KEY", api_key)
    def synthesize(self, text: str) -> AudioSegment:
        url = ELEVEN_LABS_BASE_URL + f"text-to-speech/{self.voice_id}"
        headers = {"xi-api-key": self.api_key, "voice_id": self.voice_id}
        body = {
            "text": text,
        }
        response = requests.post(url, headers=headers, json=body)
        assert response.ok, response.text
        return AudioSegment.from_mp3(io.BytesIO(response.content))