From 97c7a4f8acf702a1060717da175037a50039cfff Mon Sep 17 00:00:00 2001
From: Ajay Raj <ajay.n.raj@gmail.com>
Date: Mon, 20 Mar 2023 20:37:39 -0700
Subject: [PATCH] integrate eleven labs

---
 pyproject.toml                                |  2 +-
 simple_turn_based_conversation.py             | 11 +++++----
 .../synthesizer/eleven_labs_synthesizer.py    | 24 +++++++++++++++++++
 3 files changed, 32 insertions(+), 5 deletions(-)
 create mode 100644 vocode/turn_based/synthesizer/eleven_labs_synthesizer.py

diff --git a/pyproject.toml b/pyproject.toml
index 213969d..c99b6ea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "vocode"
-version = "0.1.48"
+version = "0.1.49"
 description = "The all-in-one voice SDK"
 authors = ["Ajay Raj <ajay@vocode.dev>"]
 license = "MIT License"
diff --git a/simple_turn_based_conversation.py b/simple_turn_based_conversation.py
index 1cadb6c..9681aaf 100644
--- a/simple_turn_based_conversation.py
+++ b/simple_turn_based_conversation.py
@@ -4,6 +4,7 @@ import os
 from vocode.helpers import create_microphone_input_and_speaker_output
 from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
 from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer
+from vocode.turn_based.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
 from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
 from vocode.turn_based.turn_based_conversation import TurnBasedConversation
 
@@ -13,6 +14,9 @@ logger.setLevel(logging.INFO)
 
 load_dotenv()
 
+# See https://api.elevenlabs.io/v1/voices
+ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB"
+
 if __name__ == "__main__":
     microphone_input, speaker_output = create_microphone_input_and_speaker_output(
         streaming=False, use_default_devices=False
@@ -27,10 +31,9 @@ if __name__ == "__main__":
             initial_message="Hello!",
             api_key=os.getenv("OPENAI_API_KEY"),
         ),
-        synthesizer=AzureSynthesizer(
-            sampling_rate=speaker_output.sampling_rate,
-            api_key=os.getenv("AZURE_SPEECH_KEY"),
-            region=os.getenv("AZURE_SPEECH_REGION"),
+        synthesizer=ElevenLabsSynthesizer(
+            voice_id=ADAM_VOICE_ID,
+            api_key=os.getenv("ELEVEN_LABS_API_KEY"),
         ),
         logger=logger,
     )
diff --git a/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py b/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py
new file mode 100644
index 0000000..018b895
--- /dev/null
+++ b/vocode/turn_based/synthesizer/eleven_labs_synthesizer.py
@@ -0,0 +1,24 @@
+import io
+import os
+from typing import Optional
+from pydub import AudioSegment
+import requests
+from vocode.turn_based.synthesizer.base_synthesizer import BaseSynthesizer
+
+ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1/"
+
+
+class ElevenLabsSynthesizer(BaseSynthesizer):
+    def __init__(self, voice_id: str, api_key: Optional[str] = None):
+        self.voice_id = voice_id
+        self.api_key = os.getenv("ELEVEN_LABS_API_KEY", api_key)
+
+    def synthesize(self, text: str) -> AudioSegment:
+        url = ELEVEN_LABS_BASE_URL + f"text-to-speech/{self.voice_id}"
+        headers = {"xi-api-key": self.api_key, "voice_id": self.voice_id}
+        body = {
+            "text": text,
+        }
+        response = requests.post(url, headers=headers, json=body)
+        assert response.ok, response.text
+        return AudioSegment.from_mp3(io.BytesIO(response.content))