feat: conditionally render voice button based on voice mode state (#8561)

* chore: update webrtcvad dependency and add optional audio support - Updated webrtcvad version requirement to >=1.9.4 in pyproject.toml and uv.lock. - Moved webrtcvad to optional dependencies under the new 'audio' category in both pyproject.toml files for better modularity. - Ensured compatibility with existing dependencies while enhancing documentation for optional audio features. * fix: handle optional import of voice_mode_router in API initialization - Updated the API router initialization to conditionally include the voice_mode_router if it is available, improving modularity and preventing import errors. - Adjusted the import statement for voice_mode_router to handle ImportError gracefully, ensuring the application remains robust in its absence. * feat: add voice mode flag to API configuration - Introduced a new function `get_voice_mode_enabled` to check for the availability of the `webrtcvad` library, enhancing the API's capability to support voice mode features. - Updated the `ConfigResponse` schema to include `voice_mode_enabled` flag, allowing clients to query the status of voice mode support. - Integrated the voice mode feature flag into the `get_config` endpoint response, improving the configurability of the API. * test: skip voice mode tests if webrtcvad is not installed - Added a conditional import for the webrtcvad library in the voice mode test suite. - Implemented a pytest marker to skip tests when webrtcvad is unavailable, enhancing test robustness and preventing unnecessary failures. * feat: add voice mode state management to flows manager store - Introduced `voiceModeEnabled` state and `setVoiceModeEnabled` action in the flows manager store to manage the voice mode feature. - Updated the `ConfigResponse` interface and `useGetConfig` hook to integrate the new voice mode state, allowing for dynamic configuration updates. - Enhanced the API's configurability by ensuring the voice mode state is properly set based on the API response. * feat: conditionally render voice button based on voice mode state - Integrated `voiceModeEnabled` from the flows manager store to control the visibility of the VoiceButton component. - The VoiceButton will now return null if voice mode is not enabled, enhancing the user interface by preventing unnecessary rendering. * fix: ensure boolean conversion for voice mode state in config hook - Updated the `setVoiceModeEnabled` function in the `useGetConfig` hook to explicitly convert the `voice_mode_enabled` value to a boolean, ensuring consistent handling of the voice mode state based on API responses. * refactor: conditionally import voice_mode_router in API initialization - Updated the API router to conditionally include the voice_mode_router based on its availability, enhancing modularity and preventing import errors. - Adjusted the __init__.py file to dynamically add voice_mode_router to the __all__ list if the import is successful, improving the API's robustness. * chore: revert mcp version update * refactor: reorder router initialization and add audio flag to backend install task * feat: add audio module to backend installation command in VS Code tasks * ci: add audio extras to uv sync in frontend tests
2025-06-17 12:23:12 -03:00 · 2025-06-17 12:23:12 -03:00 · 4d3eae63ac
commit 4d3eae63ac
parent 1950116916
15 changed files with 73 additions and 24 deletions
--- a/.github/workflows/typescript_test.yml
+++ b/.github/workflows/typescript_test.yml
@ -220,12 +220,12 @@ jobs:

          echo "Total tests to run: $TEST_COUNT"

-          # Calculate optimal shard count - 1 shard per 5 tests, min 1, max 25
+          # Calculate optimal shard count - 1 shard per 5 tests, min 1, max 40
          SHARD_COUNT=$(( (TEST_COUNT + 4) / 5 ))
          if [ $SHARD_COUNT -lt 1 ]; then
            SHARD_COUNT=1
-          elif [ $SHARD_COUNT -gt 25 ]; then
-            SHARD_COUNT=25
+          elif [ $SHARD_COUNT -gt 40 ]; then
+            SHARD_COUNT=40
          fi

          # Create the matrix combinations string
@ -301,7 +301,7 @@ jobs:
          prune-cache: false

      - name: Install Python Dependencies
-        run: uv sync
+        run: uv sync --extra audio

      - name: Configure Environment Variables
        run: |
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@ -48,7 +48,7 @@
    {
      "label": "Install Backend",
      "type": "shell",
-      "command": "make install_backend"
+      "command": "make install_backend EXTRA_ARGS='--extra audio'"
    }
  ]
 }
--- a/pyproject.toml
+++ b/pyproject.toml
@ -108,7 +108,6 @@ dependencies = [
    "crewai==0.102.0",
    "mcp>=0.9.1",
    "uv>=0.5.7",
-    "webrtcvad>=2.0.10",
    "scipy>=1.14.1",
    "ag2>=0.1.0",
    "scrapegraph-py>=1.12.0",
@ -191,6 +190,10 @@ Repository = "https://github.com/langflow-ai/langflow"
 Documentation = "https://docs.langflow.org"

 [project.optional-dependencies]
+audio = [
+    "webrtcvad>=2.0.10",
+]
+
 couchbase = [
    "couchbase>=4.2.1"
 ]
@ -214,7 +217,6 @@ nv-ingest = [
 postgresql = [
  "sqlalchemy[postgresql_psycopg2binary]",
    "sqlalchemy[postgresql_psycopg]",
-
 ]

 [project.scripts]
--- a/src/backend/base/langflow/api/router.py
+++ b/src/backend/base/langflow/api/router.py
@ -18,15 +18,10 @@ from langflow.api.v1 import (
    users_router,
    validate_router,
    variables_router,
-    voice_mode_router,
 )
 from langflow.api.v2 import files_router as files_router_v2
 from langflow.api.v2 import mcp_router as mcp_router_v2

-router = APIRouter(
-    prefix="/api",
-)
-
 router_v1 = APIRouter(
    prefix="/v1",
 )
@ -49,12 +44,21 @@ router_v1.include_router(monitor_router)
 router_v1.include_router(folders_router)
 router_v1.include_router(projects_router)
 router_v1.include_router(starter_projects_router)
-router_v1.include_router(voice_mode_router)
 router_v1.include_router(mcp_router)
 router_v1.include_router(mcp_projects_router)

 router_v2.include_router(files_router_v2)
 router_v2.include_router(mcp_router_v2)

+try:
+    from langflow.api.v1.voice_mode import router as voice_mode_router
+
+    router_v1.include_router(voice_mode_router)
+except ImportError:
+    pass
+
+router = APIRouter(
+    prefix="/api",
+)
 router.include_router(router_v1)
 router.include_router(router_v2)
--- a/src/backend/base/langflow/api/utils.py
+++ b/src/backend/base/langflow/api/utils.py
@ -378,3 +378,11 @@ async def verify_public_flow_and_get_user(flow_id: uuid.UUID, client_id: str | N
        raise HTTPException(status_code=403, detail=msg)

    return user, new_flow_id
+
+
+def get_voice_mode_enabled() -> bool:
+    try:
+        import webrtcvad  # noqa: F401
+    except ImportError:
+        return False
+    return True
--- a/src/backend/base/langflow/api/v1/init.py
+++ b/src/backend/base/langflow/api/v1/init.py
@ -14,7 +14,6 @@ from langflow.api.v1.store import router as store_router
 from langflow.api.v1.users import router as users_router
 from langflow.api.v1.validate import router as validate_router
 from langflow.api.v1.variable import router as variables_router
-from langflow.api.v1.voice_mode import router as voice_mode_router

 __all__ = [
    "api_key_router",
@ -33,5 +32,11 @@ __all__ = [
    "users_router",
    "validate_router",
    "variables_router",
-    "voice_mode_router",
 ]
+
+try:
+    from langflow.api.v1.voice_mode import router as voice_mode_router
+
+    __all__ += ["voice_mode_router"]
+except ImportError:
+    pass
--- a/src/backend/base/langflow/api/v1/endpoints.py
+++ b/src/backend/base/langflow/api/v1/endpoints.py
@ -14,7 +14,7 @@ from fastapi.responses import StreamingResponse
 from loguru import logger
 from sqlmodel import select

-from langflow.api.utils import CurrentActiveUser, DbSession, parse_value
+from langflow.api.utils import CurrentActiveUser, DbSession, get_voice_mode_enabled, parse_value
 from langflow.api.v1.schemas import (
    ConfigResponse,
    CustomComponentRequest,
@ -752,6 +752,7 @@ async def get_config():
        return {
            "feature_flags": FEATURE_FLAGS,
            **settings_service.settings.model_dump(),
+            "voice_mode_enabled": get_voice_mode_enabled(),
        }
    except Exception as exc:
        raise HTTPException(status_code=500, detail=str(exc)) from exc
--- a/src/backend/base/langflow/api/v1/schemas.py
+++ b/src/backend/base/langflow/api/v1/schemas.py
@ -388,6 +388,7 @@ class ConfigResponse(BaseModel):
    public_flow_cleanup_interval: int
    public_flow_expiration: int
    event_delivery: Literal["polling", "streaming", "direct"]
+    voice_mode_enabled: bool


 class CancelFlowResponse(BaseModel):
--- a/src/backend/base/pyproject.toml
+++ b/src/backend/base/pyproject.toml
@ -84,7 +84,6 @@ dependencies = [
    "sqlalchemy[aiosqlite]>=2.0.38,<3.0.0",
    'elevenlabs==1.58.1; python_version == "3.12"',
    'elevenlabs>=1.52.0; python_version != "3.12"',
-    "webrtcvad>=2.0.10",
    "scipy>=1.15.2",
    "ibm-watsonx-ai>=1.3.1",
    "langchain-ibm>=0.3.8",
@ -232,6 +231,9 @@ Documentation = "https://docs.langflow.org"

 # Optional dependencies for uv
 [project.optional-dependencies]
+audio = [
+    "webrtcvad>=2.0.10",
+]

 postgresql = [
  "sqlalchemy[postgresql_psycopg2binary]",
--- a/src/backend/tests/unit/test_voice_mode.py
+++ b/src/backend/tests/unit/test_voice_mode.py
@ -1,6 +1,11 @@
 import numpy as np
 import pytest
-import webrtcvad
+
+try:
+    import webrtcvad
+except ImportError:
+    pytestmark = pytest.mark.skip(reason="webrtcvad is not installed. Skipping voice mode tests.")
+
 from langflow.utils.voice_utils import (
    BYTES_PER_16K_FRAME,
    BYTES_PER_24K_FRAME,
--- a/src/frontend/src/controllers/API/queries/config/use-get-config.ts
+++ b/src/frontend/src/controllers/API/queries/config/use-get-config.ts
@ -21,6 +21,7 @@ export interface ConfigResponse {
  webhook_polling_interval: number;
  serialization_max_items_length: number;
  event_delivery: EventDeliveryType;
+  voice_mode_enabled: boolean;
 }

 export const useGetConfig: useQueryFunctionType<undefined, ConfigResponse> = (
@ -44,6 +45,9 @@ export const useGetConfig: useQueryFunctionType<undefined, ConfigResponse> = (
    (state) => state.setWebhookPollingInterval,
  );
  const setEventDelivery = useUtilityStore((state) => state.setEventDelivery);
+  const setVoiceModeEnabled = useFlowsManagerStore(
+    (state) => state.setVoiceModeEnabled,
+  );
  const { query } = UseRequestProcessor();

  const getConfigFn = async () => {
@ -65,6 +69,7 @@ export const useGetConfig: useQueryFunctionType<undefined, ConfigResponse> = (
        data.webhook_polling_interval ?? DEFAULT_POLLING_INTERVAL,
      );
      setEventDelivery(data.event_delivery ?? EventDeliveryType.POLLING);
+      setVoiceModeEnabled(Boolean(data.voice_mode_enabled));
    }
    return data;
  };
--- a/src/frontend/src/modals/IOModal/components/chatView/chatInput/components/voice-assistant/components/voice-button.tsx
+++ b/src/frontend/src/modals/IOModal/components/chatView/chatInput/components/voice-assistant/components/voice-button.tsx
@ -1,6 +1,7 @@
 import ForwardedIconComponent from "@/components/common/genericIconComponent";
 import { Button } from "@/components/ui/button";
 import { ICON_STROKE_WIDTH } from "@/constants/constants";
+import useFlowsManagerStore from "@/stores/flowsManagerStore";
 import { useVoiceStore } from "@/stores/voiceStore";

 interface VoiceButtonProps {
@ -11,6 +12,13 @@ const VoiceButton = ({ toggleRecording }: VoiceButtonProps) => {
  const setNewSessionCloseVoiceAssistant = useVoiceStore(
    (state) => state.setNewSessionCloseVoiceAssistant,
  );
+  const voiceModeEnabled = useFlowsManagerStore(
+    (state) => state.voiceModeEnabled,
+  );
+
+  if (!voiceModeEnabled) {
+    return null;
+  }

  return (
    <>
--- a/src/frontend/src/stores/flowsManagerStore.ts
+++ b/src/frontend/src/stores/flowsManagerStore.ts
@ -17,6 +17,8 @@ const past = {};
 const future = {};

 const useFlowsManagerStore = create<FlowsManagerStoreType>((set, get) => ({
+  voiceModeEnabled: false,
+  setVoiceModeEnabled: (voiceModeEnabled: boolean) => set({ voiceModeEnabled }),
  IOModalOpen: false,
  setIOModalOpen: (IOModalOpen: boolean) => {
    set({ IOModalOpen });
--- a/src/frontend/src/types/zustand/flowsManager/index.ts
+++ b/src/frontend/src/types/zustand/flowsManager/index.ts
@ -29,6 +29,8 @@ export type FlowsManagerStoreType = {
  IOModalOpen: boolean;
  setIOModalOpen: (IOModalOpen: boolean) => void;
  resetStore: () => void;
+  voiceModeEnabled: boolean;
+  setVoiceModeEnabled: (voiceModeEnabled: boolean) => void;
 };

 export type UseUndoRedoOptions = {
--- a/uv.lock
+++ b/uv.lock
@ -4746,7 +4746,6 @@ dependencies = [
    { name = "upstash-vector" },
    { name = "uv" },
    { name = "weaviate-client" },
-    { name = "webrtcvad" },
    { name = "wikipedia" },
    { name = "wolframalpha" },
    { name = "yfinance" },
@ -4755,6 +4754,9 @@ dependencies = [
 ]

 [package.optional-dependencies]
+audio = [
+    { name = "webrtcvad" },
+]
 cassio = [
    { name = "cassio" },
 ]
@ -4940,14 +4942,14 @@ requires-dist = [
    { name = "upstash-vector", specifier = "==0.6.0" },
    { name = "uv", specifier = ">=0.5.7" },
    { name = "weaviate-client", specifier = "==4.10.2" },
-    { name = "webrtcvad", specifier = ">=2.0.10" },
+    { name = "webrtcvad", marker = "extra == 'audio'", specifier = ">=2.0.10" },
    { name = "wikipedia", specifier = "==1.4.0" },
    { name = "wolframalpha", specifier = "==5.1.3" },
    { name = "yfinance", specifier = "==0.2.50" },
    { name = "youtube-transcript-api", specifier = "==0.6.3" },
    { name = "zep-python", specifier = "==2.0.2" },
 ]
-provides-extras = ["couchbase", "cassio", "local", "clickhouse-connect", "nv-ingest", "postgresql"]
+provides-extras = ["audio", "couchbase", "cassio", "local", "clickhouse-connect", "nv-ingest", "postgresql"]

 [package.metadata.requires-dev]
 dev = [
@ -5073,7 +5075,6 @@ dependencies = [
    { name = "uncurl" },
    { name = "uvicorn" },
    { name = "validators" },
-    { name = "webrtcvad" },
 ]

 [package.optional-dependencies]
@ -5082,6 +5083,9 @@ all = [
    { name = "llama-cpp-python" },
    { name = "sentence-transformers" },
 ]
+audio = [
+    { name = "webrtcvad" },
+]
 local = [
    { name = "ctransformers" },
    { name = "llama-cpp-python" },
@ -5210,9 +5214,9 @@ requires-dist = [
    { name = "uncurl", specifier = ">=0.0.11,<1.0.0" },
    { name = "uvicorn", specifier = ">=0.30.0,<1.0.0" },
    { name = "validators", specifier = ">=0.34.0" },
-    { name = "webrtcvad", specifier = ">=2.0.10" },
+    { name = "webrtcvad", marker = "extra == 'audio'", specifier = ">=2.0.10" },
 ]
-provides-extras = ["postgresql", "local", "all"]
+provides-extras = ["audio", "postgresql", "local", "all"]

 [package.metadata.requires-dev]
 dev = [