diff --git a/.cursor/settings.json b/.cursor/settings.json
index ab4c98c2a..c2a49269e 100644
--- a/.cursor/settings.json
+++ b/.cursor/settings.json
@@ -1,3 +1,3 @@
{
- "biome.configurationPath": "src/frontend/biome.json"
+ "biome.configurationPath": "src/frontend/biome.json"
}
\ No newline at end of file
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 000000000..a6f10e0b2
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,12 @@
+src/frontend/node_modules
+src/frontend/build
+src/frontend/coverage
+src/frontend/test-results
+src/frontend/playwright-report
+src/frontend/.dspy_cache
+**/.DS_Store
+**/__pycache__
+**/*.pyc
+**/.pytest_cache
+**/.venv
+**/.env
\ No newline at end of file
diff --git a/.env.example b/.env.example
index 7b3c76e9b..e4e9713b4 100644
--- a/.env.example
+++ b/.env.example
@@ -79,12 +79,16 @@ LANGFLOW_REMOVE_API_KEYS=
# LANGFLOW_REDIS_CACHE_EXPIRE (default: 3600)
LANGFLOW_CACHE_TYPE=
-# Set AUTO_LOGIN to false if you want to disable auto login
+# Set LANGFLOW_AUTO_LOGIN to false if you want to disable auto login
# and use the login form to login. LANGFLOW_SUPERUSER and LANGFLOW_SUPERUSER_PASSWORD
# must be set if AUTO_LOGIN is set to false
# Values: true, false
LANGFLOW_AUTO_LOGIN=
+# SET LANGFLOW_ENABLE_SUPERUSER_CLI to false to disable
+# superuser creation via the CLI
+LANGFLOW_ENABLE_SUPERUSER_CLI=
+
# Superuser username
# Example: LANGFLOW_SUPERUSER=admin
LANGFLOW_SUPERUSER=
@@ -97,6 +101,11 @@ LANGFLOW_SUPERUSER_PASSWORD=
# Values: true, false
LANGFLOW_STORE_ENVIRONMENT_VARIABLES=
+# Should enable the MCP composer feature in MCP projects
+# Values: true, false
+# Default: true
+LANGFLOW_FEATURE_MCP_COMPOSER=
+
# STORE_URL
# Example: LANGFLOW_STORE_URL=https://api.langflow.store
# LANGFLOW_STORE_URL=
@@ -111,4 +120,4 @@ LANGFLOW_STORE_ENVIRONMENT_VARIABLES=
# Value must finish with slash /
#BACKEND_URL=http://localhost:7860/
-BACKEND_URL=
\ No newline at end of file
+BACKEND_URL=
diff --git a/.github/changes-filter.yaml b/.github/changes-filter.yaml
index b21ef891e..02a639abd 100644
--- a/.github/changes-filter.yaml
+++ b/.github/changes-filter.yaml
@@ -7,6 +7,7 @@ python:
- "src/backend/base/pyproject.toml"
- "src/backend/base/uv.lock"
- "**/python_test.yml"
+ - ".github/workflows/ci.yml"
components-changes:
- "src/backend/base/langflow/components/**"
starter-projects-changes:
@@ -17,6 +18,7 @@ frontend:
- "src/frontend/**"
- "**/typescript_test.yml"
- "**/jest_test.yml"
+ - ".github/workflows/ci.yml"
docs:
- "docs/**"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 07f81087c..17b2fc53c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -38,7 +38,7 @@ on:
type: string
default: "['3.10']"
pull_request:
- types: [synchronize, labeled]
+ types: [opened, synchronize, labeled]
merge_group:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -120,7 +120,7 @@ jobs:
name: Should Run CI
runs-on: ubuntu-latest
outputs:
- should-run-ci: ${{ (contains( github.event.pull_request.labels.*.name, 'lgtm') && github.event.pull_request.draft == false) || (github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_call' || github.event_name == 'merge_group') }}
+ should-run-ci: ${{ (github.event.pull_request.draft == false) || (github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_call' || github.event_name == 'merge_group') }}
should-run-tests: ${{ !contains(github.event.pull_request.labels.*.name, 'fast-track') || github.event_name == 'workflow_call' || github.event_name == 'workflow_dispatch' || github.event_name == 'merge_group' }}
steps:
# Do anything just to make the job run
@@ -208,7 +208,7 @@ jobs:
test-templates:
needs: [path-filter, set-ci-condition]
name: Test Starter Templates
- if: ${{ needs.path-filter.outputs.starter-projects == 'true' && needs.set-ci-condition.outputs.should-run-tests == 'true' }}
+ if: ${{ (needs.path-filter.outputs.python == 'true' || needs.path-filter.outputs.frontend == 'true') && needs.set-ci-condition.outputs.should-run-tests == 'true' }}
runs-on: ubuntu-latest
steps:
- name: Checkout code
diff --git a/.github/workflows/cross-platform-test.yml b/.github/workflows/cross-platform-test.yml
index 81158d734..b072fb41f 100644
--- a/.github/workflows/cross-platform-test.yml
+++ b/.github/workflows/cross-platform-test.yml
@@ -256,7 +256,8 @@ jobs:
- name: Test CLI help command (Windows)
if: matrix.os == 'windows'
run: |
- test-env\Scripts\python.exe -m langflow --help
+ call test-env\Scripts\activate.bat
+ python -m langflow --help
shell: cmd
- name: Test CLI help command (Unix)
@@ -508,7 +509,8 @@ jobs:
- name: Test CLI help command (Windows)
if: matrix.os == 'windows'
run: |
- test-env\Scripts\python.exe -m langflow --help
+ call test-env\Scripts\activate.bat
+ python -m langflow --help
shell: cmd
- name: Test CLI help command (Unix)
diff --git a/README.md b/README.md
index 1529de885..0edc01745 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,10 @@
[](https://deepwiki.com/langflow-ai/langflow)
> [!CAUTION]
-> Users must update to Langflow >= 1.3 to protect against [CVE-2025-3248](https://nvd.nist.gov/vuln/detail/CVE-2025-3248).
+> - Users must update to Langflow >= 1.3 to protect against [CVE-2025-3248](https://nvd.nist.gov/vuln/detail/CVE-2025-3248)
+> - Users must update to Langflow >= 1.5.1 to protect against [CVE-2025-57760](https://github.com/langflow-ai/langflow/security/advisories/GHSA-4gv9-mp8m-592r)
+>
+> For security information, see our [Security Policy](./SECURITY.md) and [Security Advisories](https://github.com/langflow-ai/langflow/security/advisories).
[Langflow](https://langflow.org) is a powerful tool for building and deploying AI-powered agents and workflows. It provides developers with both a visual authoring experience and built-in API and MCP servers that turn every workflow into a tool that can be integrated into applications built on any framework or stack. Langflow comes with batteries included and supports all major LLMs, vector databases and a growing library of AI tools.
diff --git a/RELEASE.md b/RELEASE.md
new file mode 100644
index 000000000..f18f02a6b
--- /dev/null
+++ b/RELEASE.md
@@ -0,0 +1,125 @@
+# Releasing Langflow
+
+Langflow follows a **release-when-ready** cadence, with each cycle typically lasting 4–6 weeks depending on QA and stabilization needs.
+
+## Goals
+
+* Keep `main` fast-moving for everyday work while ensuring stable release builds when features mature.
+* Provide an isolated branch for QA and last-minute fixes (the release candidate, RC).
+* Preserve a linear, readable history wherever possible.
+* Ensure released code is extensively tested before publication.
+* Minimize time to resolution of critical bugs.
+
+## Process Overview
+
+### 1. OSS QA
+
+Create an OSS release candidate (RC) branch containing `langflow` and any associated PyPI packages (e.g. `lfx`).
+During this period:
+
+* QA is performed manually.
+* Bug fixes are merged into the RC branch.
+* New features continue development on `main`.
+
+This step usually lasts about a week.
+
+### 2. Desktop QA
+
+Once OSS QA and bugfixing are complete, create a Desktop release candidate.
+
+* The Desktop RC is based on the final OSS RC.
+* Manual QA is performed.
+* Bug fixes are merged into the Desktop RC.
+* New features continue on `main`.
+
+This step also usually lasts about a week.
+
+### 3. Release
+
+After QA and bugfixing are complete for both OSS and Desktop:
+
+* Final releases are cut from their respective RC branches.
+* Release timing is coordinated with Langflow’s DevRel team.
+* For at least 24 hours after release, Discord, GitHub, and other support channels should be monitored for critical bug reports.
+
+## Branch Model
+
+| Branch | Purpose | Merge Policy |
+| --------------------------------------------- | --------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------ |
+| **`main`** | Integration branch. All feature PRs target this by default. | **Squash & Merge** (linear history) |
+| **`release-X.Y.Z`**
(e.g. `release-1.4.3`) | Temporary RC branch. Active only for the release cycle. Accepts QA and blocking-bug PRs labeled `type:release`. | **Squash & Merge** within the branch.
Rebased onto **`main`** before final merge. |
+
+## Release Steps
+
+### 1. Cut Release Candidate
+
+```sh
+git checkout main && git pull # Ensure local main is up to date
+git checkout -b release-X.Y.Z # Create new release candidate branch
+git push -u origin release-X.Y.Z # Push RC branch to remote
+```
+
+### 2. Apply a Bugfix to RC
+
+1. Create a feature branch as usual.
+2. Open a GitHub PR targeting `release-X.Y.Z`.
+3. Review and approve as normal.
+4. Merge into the RC branch after review.
+
+### 3. Final Release
+
+```sh
+git checkout release-X.Y.Z && git pull # Ensure RC branch is up to date
+git tag vX.Y.Z # Create final release tag
+git push origin vX.Y.Z # Push tag to remote
+```
+
+### 4. Merge RC Back into Main
+
+```sh
+git checkout main
+git merge --ff-only release-X.Y.Z # Fast-forward main to include RC changes
+```
+
+## Merge Strategy
+
+1. **Squash & Merge** everywhere for atomic commits and clean history.
+
+2. While RC is open, periodically re-sync with main:
+
+ ```sh
+ git checkout release-X.Y.Z
+ git fetch origin
+ git rebase origin/main
+ ```
+
+ *This resolves conflicts early while keeping history linear.*
+
+3. Final merge back must be fast-forward only. If not possible, rebase the RC onto `main` before merging.
+
+## Versioning & Tags
+
+* Follows [Semantic Versioning](https://semver.org): `MAJOR.MINOR.PATCH`.
+* RC tags use `-rc.N`, e.g. `v1.8.0-rc.1`.
+
+## Roles
+
+| Role | Responsibility |
+| --------------------------------------- | ----------------------------------------------------------------- |
+| **Release Captain** (rotates per cycle) | Owns timeline, branch cut, tagging, merge-back. |
+| **PR Author** | Ensures tests pass; flags PR with `type:release` if needed in RC. |
+| **CI** | Blocks merges on failing tests or missing labels. |
+
+## FAQ
+
+### Do we ever merge main into the RC?
+
+No. Always rebase the RC onto `main` to preserve linear history.
+
+### Can we automate branch deletion?
+
+Not yet — merge-back and cleanup are manual.
+
+### How flexible is the timeline?
+
+Very flexible. QA and stabilization phases can be extended as needed for quality.
\ No newline at end of file
diff --git a/SECURITY.md b/SECURITY.md
index 275df1f23..930c0f86c 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -48,7 +48,15 @@ Langflow allows users to define and run **custom code components** through endpo
This means an attacker could send malicious code to the endpoint and have it executed on the server—leading to full system compromise, including data theft, remote shell access, or lateral movement within the network.
-To address, upgrade to >= 1.3.0.
+**CVE**: [CVE-2025-3248](https://nvd.nist.gov/vuln/detail/CVE-2025-3248)
+**Fixed in**: Langflow >= 1.3.0
+
+### Privilege Escalation via CLI Superuser Creation (Fixed in 1.5.1)
+
+A privilege escalation vulnerability exists in Langflow containers where an authenticated user with RCE access can invoke the internal CLI command `langflow superuser` to create a new administrative user. This results in full superuser access, even if the user initially registered through the UI as a regular (non-admin) account.
+
+**CVE**: [CVE-2025-57760](https://github.com/langflow-ai/langflow/security/advisories/GHSA-4gv9-mp8m-592r)
+**Fixed in**: Langflow >= 1.5.1
### No API key required if running Langflow with `LANGFLOW_AUTO_LOGIN=true` and `LANGFLOW_SKIP_AUTH_AUTO_LOGIN=true`
@@ -59,4 +67,36 @@ Setting `LANGFLOW_SKIP_AUTH_AUTO_LOGIN=true` and `LANGFLOW_AUTO_LOGIN=true` skip
`LANGFLOW_SKIP_AUTH_AUTO_LOGIN=true` is the default behavior, so users do not need to change existing workflows in 1.5. To update your workflows to require authentication, set `LANGFLOW_SKIP_AUTH_AUTO_LOGIN=false`.
-For more information, see [API keys and authentication](https://docs.langflow.org/api-keys-and-authentication).
\ No newline at end of file
+For more information, see [API keys and authentication](https://docs.langflow.org/api-keys-and-authentication).
+
+## Security Configuration Guidelines
+
+### Superuser Creation Security
+
+The `langflow superuser` CLI command can present a privilege escalation risk if not properly secured.
+
+#### Security Measures
+
+1. **Authentication Required in Production**
+ - When `LANGFLOW_AUTO_LOGIN=false`, superuser creation requires authentication
+ - Use `--auth-token` parameter with a valid superuser API key or JWT token
+
+2. **Disable CLI Superuser Creation**
+ - Set `LANGFLOW_ENABLE_SUPERUSER_CLI=false` to disable the command entirely
+ - Strongly recommended for production environments
+
+3. **Secure AUTO_LOGIN Setting**
+ - Default is `true` for <=1.5. This may change in a future release.
+ - When `true`, creates default superuser `langflow/langflow` - **ONLY USE IN DEVELOPMENT**
+
+#### Production Security Configuration
+
+```bash
+# Recommended production settings
+export LANGFLOW_AUTO_LOGIN=false
+export LANGFLOW_ENABLE_SUPERUSER_CLI=false
+export LANGFLOW_SUPERUSER=""
+export LANGFLOW_SUPERUSER_PASSWORD=""
+export LANGFLOW_DATABASE_URL="" # e.g. "postgresql+psycopg://langflow:secure_pass@db.internal:5432/langflow"
+export LANGFLOW_SECRET_KEY="your-strong-random-secret-key"
+```
diff --git a/docs/css/custom.css b/docs/css/custom.css
index d93edc20e..1760ead4a 100644
--- a/docs/css/custom.css
+++ b/docs/css/custom.css
@@ -4,10 +4,9 @@
* work well for content-centric websites.
*/
:root {
- --ifm-background-color: var(--token-primary-bg-c);
- --ifm-color-primary: hsla(330, 81%, 60%, 1);
--ifm-navbar-link-hover-color: initial;
--ifm-navbar-padding-vertical: 0;
+ --ifm-global-radius: 16px;
--ifm-navbar-item-padding-vertical: 0;
--ifm-font-family-base: Inter, -apple-system, BlinkMacSystemFont, Helvetica,
Arial, sans-serif, "Apple Color Emoji", "Segoe UI emoji";
@@ -15,6 +14,26 @@
"Liberation Mono", Menlo, Courier, monospace;
}
+/* Light theme - Pure white background */
+html[data-theme="light"] {
+ --ifm-color-primary: hsla(333, 71%, 51%, 1); /* Slightly darker pink for light theme */
+ --ifm-background-color: var(--ifm-color-white);
+ --ifm-background-surface-color: var(--ifm-color-white);
+}
+
+/* Dark theme - Pure black background */
+html[data-theme="dark"] {
+ --ifm-color-primary: hsla(329, 86%, 70%, 1); /* Lighter pink for dark theme */
+ --ifm-background-color: var(--ifm-color-black);
+ --ifm-background-surface-color: var(--ifm-color-black);
+}
+
+/* override the infima navbar docs/node_modules/infima/dist/css/default/default.css */
+.navbar {
+ box-shadow: none !important;
+ border-bottom: 1px solid var(--ifm-toc-border-color);
+}
+
.theme-doc-sidebar-item-category.menu__list-item:not(:first-child) {
margin-top: 0.5rem !important;
}
@@ -54,6 +73,34 @@ p {
text-align: start;
}
+/* Tabs Styling */
+.tabs-container {
+ border: 1px solid var(--ifm-color-emphasis-300);
+ border-radius: var(--ifm-global-radius);
+ padding: 1rem;
+ margin-bottom: 1rem;
+}
+
+.tabs {
+ margin-bottom: 1rem;
+}
+
+.tabs__item {
+ border: none;
+ border-bottom: 1px solid var(--ifm-color-emphasis-200);
+ margin-right: 0rem;
+ padding-bottom: 0.5rem;
+ border-radius: 0;
+}
+
+.tabs__item:hover {
+ background-color: var(--ifm-hover-overlay);
+}
+
+.tabs__item--active {
+ border-bottom-color: var(--ifm-tabs-color-active);
+}
+
/* apply */
#hero-apply {
z-index: -1;
@@ -80,21 +127,19 @@ p {
);
}
-/**
- * Hero component title overrides to match other heading styles
- */
+/* Hero component title overrides to match other heading styles */
.hero-title {
color: rgb(28, 30, 33);
font-family: var(--ifm-heading-font-family);
}
h1 {
- font-size: 26px;
+ font-size: 30px;
}
h2 {
- font-size: 22px;
+ font-size: 25px;
}
h3 {
- font-size: 18px;
+ font-size: 22px;
}
body {
@@ -154,7 +199,6 @@ body {
}
/* Discord */
-
.header-discord-link {
margin-right: 0.5rem;
}
@@ -279,8 +323,10 @@ body {
/* Footer Styles */
.footer {
- padding: 8px 0;
- background-color: var(--ifm-navbar-background-color);
+ /* padding: 8px 0; */
+ padding: 1rem 0 0;
+ background-color: var(--ifm-background-color);
+ border-top: 1px solid var(--ifm-color-emphasis-200);
}
[data-theme="light"] .footer {
@@ -317,11 +363,16 @@ body {
}
.footer .container {
- padding: 0 5rem;
+ padding: 0 1.25rem;
display: flex;
- justify-content: space-between;
+ justify-content: left;
align-items: center;
flex-direction: row-reverse;
+ max-width: 100%;
+}
+
+.footer__title {
+ margin-bottom: 0;
}
/* Sidebar Styles */
@@ -416,3 +467,21 @@ body {
padding: 0;
}
+/* DocSearch Input Styling - Simple border override */
+.DocSearch-Button {
+ border: 1px solid var(--ifm-color-emphasis-300) !important;
+ border-radius: 6px !important;
+ background: var(--ifm-color-content-inverse) !important;
+ color: var(--ifm-color-emphasis-500) !important;
+}
+
+.DocSearch-Button:hover,
+.DocSearch-Button:focus {
+ border-color: var(--ifm-color-primary) !important;
+ box-shadow: 0 0 0 1px var(--ifm-color-primary) !important;
+}
+
+.DocSearch-Search-Icon {
+ color: var(--ifm-color-emphasis-500) !important;
+ size: 16px !important;
+}
\ No newline at end of file
diff --git a/docs/docs/API-Reference/api-files.mdx b/docs/docs/API-Reference/api-files.mdx
index 05e01f5e9..8b472f48e 100644
--- a/docs/docs/API-Reference/api-files.mdx
+++ b/docs/docs/API-Reference/api-files.mdx
@@ -419,4 +419,8 @@ curl -X DELETE \
## Create upload file (Deprecated)
-This endpoint is deprecated. Use the `/files` endpoints instead.
\ No newline at end of file
+This endpoint is deprecated. Use the `/files` endpoints instead.
+
+## See also
+
+* [Manage files](/concepts-file-management)
\ No newline at end of file
diff --git a/docs/docs/API-Reference/api-monitor.mdx b/docs/docs/API-Reference/api-monitor.mdx
index f0839349a..89f697303 100644
--- a/docs/docs/API-Reference/api-monitor.mdx
+++ b/docs/docs/API-Reference/api-monitor.mdx
@@ -6,9 +6,26 @@ slug: /api-monitor
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
-Use the `/monitor` endpoint to monitor and modify messages passed between Langflow components, vertex builds, and transactions.
+The `/monitor` endpoints are for internal Langflow functionality, primarily related to running flows in the **Playground**, storing chat history, and generating flow logs.
-## Get Vertex builds
+This information is primarily for those who are building custom components or contributing to the Langflow codebase in a way that requires calling or understanding these endpoints.
+
+For typical application development with Langflow, there are more appropriate options for monitoring, debugging, and memory management.
+For more information, see the following:
+
+* [Logs](/logging): Langflow log storage locations, customization options, and where to view logs in the visual editor
+* [Test flows in the Playground](/concepts-playground): Run flows and inspect message history
+* [Memory management options](/memory): Langflow storage locations and options, including the database, cache, and chat history
+
+## Vertex builds
+
+The Vertex build endpoints (`/monitor/builds`) are exclusively for **Playground** functionality.
+
+When you run a flow in the **Playground**, Langflow calls the `/build/$FLOW_ID/flow` endpoint in [chat.py](https://github.com/langflow-ai/langflow/blob/main/src/backend/base/langflow/api/v1/chat.py#L143). This call retrieves the flow data, builds a graph, and executes the graph. As each component (or node) is executed, the `build_vertex` function calls `build_and_run`, which may call the individual components' `def_build` method, if it exists. If a component doesn't have a `def_build` function, the build still returns a component.
+
+The `build` function allows components to execute logic at runtime. For example, the [**Recursive Character Text Splitter** component](https://github.com/langflow-ai/langflow/blob/main/src/backend/base/langflow/components/langchain_utilities/recursive_character.py) is a child of the `LCTextSplitterComponent` class. When text needs to be processed, the parent class's `build` method is called, which creates a `RecursiveCharacterTextSplitter` object and uses it to split the text according to the defined parameters. The split text is then passed on to the next component. This all occurs when the component is built.
+
+### Get Vertex builds
Retrieve Vertex builds for a specific flow.
@@ -384,7 +401,7 @@ curl -X GET \
-## Delete Vertex builds
+### Delete Vertex builds
Delete Vertex builds for a specific flow.
@@ -404,7 +421,12 @@ curl -X DELETE \
-## Get messages
+## Messages endpoints
+
+The `/monitor/messages` endpoints store, retrieve, edit, and delete records in the message table in [`langflow.db`](/memory)
+Typically, these are called implicitly when running flows that produce message history, or when inspecting and modifying **Playground** memories.
+
+### Get messages
Retrieve a list of all messages:
@@ -466,7 +488,7 @@ curl -X GET \
-## Delete messages
+### Delete messages
Delete specific messages by their IDs.
@@ -490,7 +512,7 @@ curl -v -X DELETE \
-## Update message
+### Update message
Update a specific message by its ID.
@@ -540,7 +562,7 @@ curl -X PUT \
-## Update session ID
+### Update session ID
Update the session ID for messages.
@@ -591,7 +613,7 @@ curl -X PATCH \
-## Delete messages by session
+### Delete messages by session
Delete all messages for a specific session.
@@ -614,6 +636,7 @@ HTTP/1.1 204 No Content
## Get transactions
Retrieve all transactions, which are interactions between components, for a specific flow.
+This information is also available in [flow logs](/logging).
```bash
curl -X GET \
@@ -651,4 +674,5 @@ curl -X GET \
## See also
+- [Use voice mode](/concepts-voice-mode)
- [Session ID](/session-id)
\ No newline at end of file
diff --git a/docs/docs/API-Reference/api-reference-api-examples.mdx b/docs/docs/API-Reference/api-reference-api-examples.mdx
index f9c7aeaae..a6290f2c1 100644
--- a/docs/docs/API-Reference/api-reference-api-examples.mdx
+++ b/docs/docs/API-Reference/api-reference-api-examples.mdx
@@ -186,9 +186,198 @@ curl -X GET \
-H "x-api-key: $LANGFLOW_API_KEY"
```
+## Available endpoints
+
+Because you can run Langflow as either an IDE (frontend and backend) or a runtime (headless, backend-only), it serves endpoints that support frontend and backend operations.
+Many endpoints are for orchestration between the frontend and backend, reading and writing to the Langflow database, or enabling frontend functionality, like the **Playground**.
+Unless you are contributing to the Langflow codebase, you won't directly call most of the Langflow endpoints.
+
+For application development, the most commonly used endpoints are the `/run` and `/webhook` [flow trigger endpoints](/api-flows-run).
+For some use cases, you might use some other endpoints, such as the `/files` endpoints to use files in flows.
+
+To help you explore the available endpoints, the following lists are sorted by primary use case, although some endpoints might support multiple use cases.
+
+
+
+
+The following endpoints are useful for developing applications with Langflow and administering Langflow deployments with one or more users.
+You will most often use the flow trigger endpoints.
+Other endpoints are helpful for specific use cases, such as administration and flow management in runtime deployments that don't have a visual editor.
+
+* [Flow trigger endpoints](/api-flows-run):
+ * POST `/v1/run/{flow_id_or_name}`: Run a flow.
+ * POST `/v1/run/advanced/{flow_id}`: Advanced run with explicit `inputs`, `outputs`, `tweaks`, and optional `session_id`.
+ * POST `/v1/webhook/{flow_id_or_name}`: Trigger a flow via webhook payload.
+
+* Deployment details:
+ * GET `/v1/version`: Return Langflow version. See [Get version](/api-reference-api-examples#get-version).
+ * GET `/v1/config`: Return deployment configuration. See [Get configuration](/api-reference-api-examples#get-configuration).
+
+* [Projects endpoints](/api-projects):
+ * POST `/v1/projects/`: Create a project.
+ * GET `/v1/projects/`: List projects.
+ * GET `/v1/projects/{project_id}`: Read a project (with paginated flows support).
+ * PATCH `/v1/projects/{project_id}`: Update project info and membership.
+ * DELETE `/v1/projects/{project_id}`: Delete a project.
+ * GET `/v1/projects/download/{project_id}`: Export all flows in a project as ZIP.
+ * POST `/v1/projects/upload/`: Import a project ZIP (creates project and flows).
+ * GET `/v1/starter-projects/`: Return a list of templates.
+
+* [Files endpoints](/api-files):
+ * Files (v1)
+ * POST `/v1/files/upload/{flow_id}`: Upload a file to a specific flow.
+ * GET `/v1/files/download/{flow_id}/{file_name}`: Download a file from a flow.
+ * GET `/v1/files/images/{flow_id}/{file_name}`: Stream an image from a flow.
+ * GET `/v1/files/profile_pictures/{folder_name}/{file_name}`: Get a profile picture asset.
+ * GET `/v1/files/profile_pictures/list`: List available profile picture assets.
+ * GET `/v1/files/list/{flow_id}`: List files for a flow.
+ * DELETE `/v1/files/delete/{flow_id}/{file_name}`: Delete a file from a flow.
+ * Files (v2)
+ * POST `/v2/files` (alias `/v2/files/`): Upload a file owned by the current user.
+ * GET `/v2/files` (alias `/v2/files/`): List files owned by the current user.
+ * DELETE `/v2/files/batch/`: Delete multiple files by IDs.
+ * POST `/v2/files/batch/`: Download multiple files as a ZIP by IDs.
+ * GET `/v2/files/{file_id}`: Download a file by ID (or return raw content internally).
+ * PUT `/v2/files/{file_id}`: Edit a file name by ID.
+ * DELETE `/v2/files/{file_id}`: Delete a file by ID.
+ * DELETE `/v2/files` (alias `/v2/files/`): Delete all files for the current user.
+
+* [API keys and authentication](/api-keys-and-authentication):
+ * GET `/v1/api_key/`: List API keys for the current user.
+ * POST `/v1/api_key/`: Create a new API key.
+ * DELETE `/v1/api_key/{api_key_id}`: Delete an API key.
+ * POST `/v1/api_key/store`: Save an encrypted Store API key (cookie set).
+
+* [Flow management endpoints](/api-flows):
+ * POST `/v1/flows/`: Create a flow.
+ * GET `/v1/flows/`: List flows (supports pagination and filters).
+ * GET `/v1/flows/{flow_id}`: Read a flow by ID.
+ * GET `/v1/flows/public_flow/{flow_id}`: Read a public flow by ID.
+ * PATCH `/v1/flows/{flow_id}`: Update a flow.
+ * DELETE `/v1/flows/{flow_id}`: Delete a flow.
+ * POST `/v1/flows/batch/`: Create multiple flows.
+ * POST `/v1/flows/upload/`: Import flows from a JSON file.
+ * DELETE `/v1/flows/`: Delete multiple flows by IDs.
+ * POST `/v1/flows/download/`: Export flows to a ZIP file.
+ * GET `/v1/flows/basic_examples/`: List basic example flows.
+
+* [Users endpoints](/api-users):
+ * POST `/v1/users/`: Add a user (superuser required when auth enabled).
+ * GET `/v1/users/whoami`: Return the current authenticated user.
+ * GET `/v1/users/`: List all users (superuser required).
+ * PATCH `/v1/users/{user_id}`: Update a user (with role checks).
+ * PATCH `/v1/users/{user_id}/reset-password`: Reset own password.
+ * DELETE `/v1/users/{user_id}`: Delete a user (cannot delete yourself).
+
+
+
+
+You might use these endpoints when developing custom Langflow components for your own use or to share with the Langflow community:
+
+* Develop custom components:
+ * GET `/v1/all`: Return all available Langflow component types. See [Get all components](/api-reference-api-examples#get-all-components).
+ * POST `/v1/custom_component`: Build a custom component from code and return its node.
+ * POST `/v1/custom_component/update`: Update an existing custom component's build config and outputs.
+ * POST `/v1/validate/code`: Validate a Python code snippet for a custom component.
+
+* Langflow Store:
+ * GET `/v1/store/check/`: Return whether the Store feature is enabled.
+ * GET `/v1/store/check/api_key`: Check if a Store API key exists and is valid.
+ * POST `/v1/store/components/`: Share a component to the Store.
+ * PATCH `/v1/store/components/{component_id}`: Update a shared component.
+ * GET `/v1/store/components/`: List available Store components (filters supported).
+ * GET `/v1/store/components/{component_id}`: Download a component from the Store.
+ * GET `/v1/store/tags`: List Store tags.
+ * GET `/v1/store/users/likes`: List components liked by the current user.
+ * POST `/v1/store/users/likes/{component_id}`: Like a component.
+
+
+
+
+The following endpoints are for managing Langflow MCP servers, both Langflow-hosted MCP servers and external MCP server connections:
+
+* **MCP (global)**:
+ * HEAD `/v1/mcp/sse`: Health check for MCP SSE.
+ * GET `/v1/mcp/sse`: Open SSE stream for MCP server events.
+ * POST `/v1/mcp/`: Post messages to the MCP server.
+
+* **MCP (project-specific)**:
+ * GET `/v1/mcp/project/{project_id}`: List MCP-enabled tools and project auth settings.
+ * HEAD `/v1/mcp/project/{project_id}/sse`: Health check for project SSE.
+ * GET `/v1/mcp/project/{project_id}/sse`: Open project-scoped MCP SSE.
+ * POST `/v1/mcp/project/{project_id}`: Post messages to project MCP server.
+ * POST `/v1/mcp/project/{project_id}/` (trailing slash): Same as above.
+ * PATCH `/v1/mcp/project/{project_id}`: Update MCP settings for flows and project auth settings.
+ * POST `/v1/mcp/project/{project_id}/install`: Install MCP client config for Cursor/Windsurf/Claude (local only).
+ * GET `/v1/mcp/project/{project_id}/installed`: Check which clients have MCP config installed.
+
+
+
+
+The following endpoints are most often used when contributing to the Langflow codebase, and you need to understand or call endpoints that support frontend-to-backend orchestration or other internal functionality.
+
+* Base (metadata):
+ * GET `/v1/all`: Return all available Langflow component types. See [Get all components](/api-reference-api-examples#get-all-components).
+ * GET `/v1/version`: Return Langflow version. See [Get version](/api-reference-api-examples#get-version).
+ * GET `/v1/config`: Return deployment configuration. See [Get configuration](/api-reference-api-examples#get-configuration).
+ * GET `/v1/starter-projects/`: Return a list of templates.
+
+* [Build endpoints](/api-build) (internal editor support):
+ * POST `/v1/build/{flow_id}/flow`: Start a flow build and return a job ID.
+ * GET `/v1/build/{job_id}/events`: Stream or fetch build events.
+ * POST `/v1/build/{job_id}/cancel`: Cancel a build job.
+ * POST `/v1/build_public_tmp/{flow_id}/flow`: Build a public flow without auth.
+ * POST `/v1/validate/prompt`: Validate a prompt payload.
+
+* [API keys and authentication](/api-keys-and-authentication):
+ * POST `/v1/login`: Login and set tokens as cookies.
+ * GET `/v1/auto_login`: Auto-login (if enabled) and set tokens.
+ * POST `/v1/refresh`: Refresh tokens using refresh cookie.
+ * POST `/v1/logout`: Logout and clear cookies.
+
+* [Monitor endpoints](/api-monitor):
+ * GET `/v1/monitor/builds`: Get vertex builds for a flow.
+ * DELETE `/v1/monitor/builds`: Delete vertex builds for a flow.
+ * GET `/v1/monitor/messages/sessions`: List message session IDs (auth required).
+ * GET `/v1/monitor/messages`: List messages with optional filters.
+ * DELETE `/v1/monitor/messages`: Delete messages by IDs (auth required).
+ * PUT `/v1/monitor/messages/{message_id}`: Update a message.
+ * PATCH `/v1/monitor/messages/session/{old_session_id}`: Change a session ID for all messages in that session.
+ * DELETE `/v1/monitor/messages/session/{session_id}`: Delete messages by session.
+ * GET `/v1/monitor/transactions`: List transactions for a flow (paginated).
+
+* Variables:
+ * POST `/v1/variables/`: Create a variable, such as an API key, for the user.
+ * GET `/v1/variables/`: List variables for the user.
+ * PATCH `/v1/variables/{variable_id}`: Update a variable.
+ * DELETE `/v1/variables/{variable_id}`: Delete a variable.
+
+* [Use voice mode](/concepts-voice-mode):
+ * WS `/v1/voice/ws/flow_as_tool/{flow_id}`: Bi-directional voice session exposing the flow as a tool.
+ * WS `/v1/voice/ws/flow_as_tool/{flow_id}/{session_id}`: Same as above with explicit session ID.
+ * WS `/v1/voice/ws/flow_tts/{flow_id}`: Voice-to-text session that runs a flow and returns TTS.
+ * WS `/v1/voice/ws/flow_tts/{flow_id}/{session_id}`: Same as above with explicit session ID.
+ * GET `/v1/voice/elevenlabs/voice_ids`: List available ElevenLabs voice IDs for the user.
+
+
+
+
+The following endpoints are deprecated:
+
+* POST `/v1/predict/{flow_id}`: Use [`/v1/run/{flow_id}`](/api-flows-run) instead.
+* POST `/v1/process/{flow_id}`: Use [`/v1/run/{flow_id}`](/api-flows-run) instead.
+* GET `/v1/task/{task_id}`: Deprecated functionality.
+* POST `/v1/upload/{flow_id}`: Use [`/files`](/api-files) instead.
+* POST `/v1/build/{flow_id}/vertices`: Replaced by [`/monitor/builds`](/api-monitor).
+* POST `/v1/build/{flow_id}/vertices/{vertex_id}`: Replaced by [`/monitor/builds`](/api-monitor).
+* GET `/v1/build/{flow_id}/{vertex_id}/stream`: Replaced by [`/monitor/builds`](/api-monitor).
+
+
+
+
## Next steps
-- Use the Langflow API to [run a flow](/api-flows-run).
-- Use the Langflow API to [upload files](/api-files).
-- Use the Langflow API to [get flow logs](/api-logs).
-- Explore all endpoints in the [Langflow API specification](/api).
\ No newline at end of file
+* Use the Langflow API to [run a flow](/api-flows-run).
+* Use the Langflow API to [upload files](/api-files).
+* Use the Langflow API to [get flow logs](/api-logs).
+* Explore all endpoints in the [Langflow API specification](/api).
\ No newline at end of file
diff --git a/docs/docs/Components/bundles-nvidia.mdx b/docs/docs/Components/bundles-nvidia.mdx
index cbc5369f7..af16b55a9 100644
--- a/docs/docs/Components/bundles-nvidia.mdx
+++ b/docs/docs/Components/bundles-nvidia.mdx
@@ -44,6 +44,12 @@ For more information about using embedding model components in flows, see [**Emb
| temperature | Float | Input parameter. The model temperature for embedding generation. Default: `0.1`. |
| embeddings | Embeddings | Output parameter. An `NVIDIAEmbeddings` instance for generating embeddings. |
+:::tip Tokenization considerations
+Be aware of your embedding model's chunk size limit.
+Tokenization errors can occur if your text chunks are too large.
+For more information, see [Tokenization errors due to chunk size](/components-processing#chunk-size).
+:::
+
## NVIDIA Rerank
This component finds and reranks documents using the NVIDIA API.
@@ -53,7 +59,7 @@ This component finds and reranks documents using the NVIDIA API.
This component uses the NVIDIA `nv-ingest` microservice for data ingestion, processing, and extraction of text files.
For more information, see [Integrate NVIDIA Retriever Extraction with Langflow](/integrations-nvidia-ingest).
-## NVIDIA System-Assist
+## NVIDIA G-Assist
This component requires a specific system environment.
For information about this component, see [Integrate NVIDIA G-Assist with Langflow](/integrations-nvidia-g-assist).
\ No newline at end of file
diff --git a/docs/docs/Components/bundles-perplexity.mdx b/docs/docs/Components/bundles-perplexity.mdx
index fceaac1c3..e9aa6054c 100644
--- a/docs/docs/Components/bundles-perplexity.mdx
+++ b/docs/docs/Components/bundles-perplexity.mdx
@@ -29,9 +29,8 @@ You can toggle parameters through the
-Loop example
+### Loop example
In the follow example, the **Loop** component iterates over a CSV file until there are no rows left to process.
In this case, the **Item** port passes each row to a **Type Convert** component to convert the row into a `Message` object, passes the `Message` to a **Structured Output** component to be processed into structured data that is then passed back to the **Loop** component's **Looping** port.
@@ -145,7 +154,13 @@ After processing all rows, the **Loop** component loads the aggregated list of s
For more examples of the **Loop** component, try the **Research Translation Loop** template in Langflow, or see the video tutorial [Mastering the Loop Component & Agentic RAG in Langflow](https://www.youtube.com/watch?v=9Wx7WODSKTo).
:::
-
+### Conditional looping
+
+The **If-Else** component isn't compatible with the **Loop** component.
+If you need conditional loop events, redesign your flow to process conditions before the loop.
+For example, if you are looping over a `DataFrame`, you could use multiple [**DataFrame Operations** components](/components-processing#dataframe-operations) to conditionally filter data, and then run separate loops on each set of filtered data.
+
+
## Notify and Listen
diff --git a/docs/docs/Components/components-models.mdx b/docs/docs/Components/components-models.mdx
index dcad38225..24cffa26b 100644
--- a/docs/docs/Components/components-models.mdx
+++ b/docs/docs/Components/components-models.mdx
@@ -7,8 +7,8 @@ import Icon from "@site/src/components/icon";
**Language Model** components in Langflow generate text using a specified Large Language Model (LLM).
-Langflow includes a **Language Model** core component that has built-in support for many LLMs, as well as an interface to connect any [additional **Language Model** component](#additional-language-model-components).
-The built-in LLMs are appropriate for most text-based language model use cases in Langflow.
+Langflow includes a **Language Model** core component that has built-in support for many LLMs.
+Alternatively, you can use any [additional language model](#additional-language-models) in place of the core **Language Model** component.
## Use Language Model components in a flow
@@ -18,19 +18,20 @@ These components accept inputs like chat messages, files, and instructions in or
The flow must include [**Chat Input and Output** components](/components-io#chat-io) to allow chat-based interactions with the LLM.
However, you can also use the **Language Model** component for actions that don't emit chat output directly, such as the **Smart Function** component.
-The following example uses the **Language Model** core component and a built-in LLM to create a chatbot flow similar to the **Basic Prompting** template.
-The example focuses on using the built-in models, but it also indicates where you can integrate another model.
+The following example uses the **Language Model** core component to create a chatbot flow similar to the **Basic Prompting** template.
+It also explains how you can replace the core component with another LLM.
1. Add the **Language Model** component to your flow.
2. In the **OpenAI API Key** field, enter your OpenAI API key.
This example uses the default OpenAI model and a built-in Anthropic model to compare responses from different providers.
-
If you want to use a different provider, edit the **Model Provider**, **Model Name**, and **API Key** fields accordingly.
- If you want to use provider or model that isn't built-in to the **Language Model** core component, see [Additional Language Model components](#additional-language-model-components) to learn how to connect a **Custom** model provider to the **Language Model** component.
+ :::tip My preferred provider or model isn't listed
+ If you want to use a provider or model that isn't built-in to the **Language Model** core component, you can replace this component with another compatible component, as explained in [Additional language models](#additional-language-models).
Then, you can continue following these steps to build your flow.
+ :::
3. In the [component's header menu](/concepts-components#component-menus), click **Controls**, enable the **System Message** parameter, and then click **Close**.
@@ -118,24 +119,27 @@ This is a specific data type that is only required by certain components, such a
With this configuration, the **Language Model** component is meant to support an action completed by another component, rather than producing a text response for a standard chat-based interaction.
For an example, the **Smart Function** component uses an LLM to create a function from natural language input.
-## Additional Language Model components
+## Additional language models
-If your provider or model isn't supported by the **Language Model** core component, additional single-provider **Language Model** components are available in the [**Bundles**](/components-bundle-components) section of the **Components** menu.
+If your provider or model isn't supported by the **Language Model** core component, additional provider-specific models are available in the [**Bundles**](/components-bundle-components) section of the **Components** menu.
-You can use bundled components directly in your flows or you can connect them to other components that accept a [`LanguageModel`](/data-types#languagemodel) input, such as the **Language Model** and **Agent** components.
+You can use these provider-specific components directly in your flows in the same place that you would use the **Language Model** core component.
+Or, you can connect them to other components that accept a [`LanguageModel`](/data-types#languagemodel) input, such as the **Smart Function** and **Agent** components.
-For example, to connect bundled components to the **Language Model** core component, do the following:
+For example, to connect a provider-specific component to the **Agent** component, do the following:
-1. In the **Language Model** component, set **Model Provider** to **Custom**.
+1. In the **Components** menu, search for your preferred model provider, and then add the provider's LLM component to your flow.
+The component may not have `model` in the name.
+For example, Azure OpenAI LLMs are in the [**Azure OpenAI** component](/bundles-azure#azure-openai).
- The field name changes to **Language Model** and the input port changes to a `LanguageModel` port.
+2. Configure the LLM component as needed to connect to your preferred model.
-2. Add a compatible bundled component to your flow, such as the [**Vertex AI** component for text generation](/bundles-vertexai).
-
-3. Change the bundled component's output type to `LanguageModel`.
-To do this, click **Model Response** near the component's output port, and then select **Language Model**.
+3. Change the LLM component's output type from **Model Response** to **Language Model**.
+The output port changes to a `LanguageModel` port.
For more information, see [Language Model output types](#language-model-output-types).
-4. Connect the bundled component's output to the **Language Model** component's `LanguageModel` input port.
+2. Add an **Agent** component to the flow, and then set **Model Provider** to **Custom**.
+The **Model Provider** field changes to a **Language Model** field with a `LanguageModel` port.
- The bundled component now provides the LLM configuration for the component that it is connected to, and you can continue building your flow as needed.
\ No newline at end of file
+4. Connect the LLM component's output to the **Agent** component's **Language Model** input.
+The **Agent** component now inherits the LLM settings from the connected LLM component instead of using any of the built-in models.
\ No newline at end of file
diff --git a/docs/docs/Components/components-processing.mdx b/docs/docs/Components/components-processing.mdx
index ca2c1ec43..b6e76a9b4 100644
--- a/docs/docs/Components/components-processing.mdx
+++ b/docs/docs/Components/components-processing.mdx
@@ -4,6 +4,8 @@ slug: /components-processing
---
import Icon from "@site/src/components/icon";
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
Langflow's **Processing** components process and transform data within a flow.
They have many uses, including:
@@ -11,7 +13,7 @@ They have many uses, including:
* Feed instructions and context to your LLMs and agents with the [**Prompt Template** component](#prompt-template).
* Extract content from larger chunks of data with a [**Parser** component](#parser).
* Filter data with natural language with the [**Smart Function** component](#smart-function).
-* Save data to your local machine with the [**Save To File** component](#save-to-file).
+* Save data to your local machine with the [**Save File** component](#save-file).
* Transform data into a different data type with the [**Type Convert** component](#type-convert) to pass it between incompatible components.
## Prompt Template
@@ -21,8 +23,7 @@ See [Prompt Template](/components-prompts).
## Batch Run
The **Batch Run** component runs a language model over _each row of one text column_ in a [`DataFrame`](/data-types#dataframe), and then returns a new `DataFrame` with the original text and an LLM response.
-
-The response contains the following columns:
+The output contains the following columns:
* `text_input`: The original text from the input `DataFrame`
* `model_response`: The model's response for each input
@@ -31,7 +32,7 @@ The response contains the following columns:
### Use the Batch Run component in a flow
-If you pass this output to a [**Parser** component](/components-processing#parser), you can use variables in the parsing template to reference these keys, such as `{text_input}` and `{model_response}`.
+If you pass the **Batch Run** output to a [**Parser** component](/components-processing#parser), you can use variables in the parsing template to reference these keys, such as `{text_input}` and `{model_response}`.
This is demonstrated in the following example.

@@ -57,7 +58,7 @@ For example, `Create a business card for each name.`
record_number: {batch_index}, name: {text_input}, summary: {model_response}
```
-7. To test the processing, click the **Parser** component, and then click **Run component**, and then click **Inspect output** to view the final `DataFrame`.
+7. To test the processing, click the **Parser** component, click **Run component**, and then click **Inspect output** to view the final `DataFrame`.
You can also connect a **Chat Output** component to the **Parser** component if you want to see the output in the **Playground**.
@@ -78,26 +79,41 @@ You can toggle parameters through the **Add more**.
+3. Under **Select Keys**, add keys for `name`, `username`, and `email`.
+Click **Add more** to add a field for each key.
-4. Connect a **Chat Output** component.
+ For this example, assume that the webhook will receive consistent payloads that always contain `name`, `username`, and `email` keys.
+ The **Select Keys** operation extracts the value of these keys from each incoming payload.
+4. Optional: If you want to view the output in the **Playground**, connect the **Data Operations** component's output to a **Chat Output** component.

-5. To test the flow, send the following request to your flow's webhook endpoint, and then open the **Playground** to see the resulting output from processing the payload.
+5. To test the flow, send the following request to your flow's webhook endpoint.
+For more information about the webhook endpoint, see [Trigger flows with webhooks](/webhook).
```bash
curl -X POST "http://$LANGFLOW_SERVER_URL/api/v1/webhook/$FLOW_ID" \
@@ -128,15 +144,19 @@ For this example, select the **Select Keys** operation to extract specific user
}'
```
+6. To view the `Data` resulting from the **Select Keys** operation, do one of the following:
+
+ * If you attached a **Chat Output** component, open the **Playground** to see the result as a chat message.
+ * Click **Inspect output** on the **Data Operations** component.
+
### Data Operations parameters
-Some **Data Operations** component input parameters are hidden by default in the visual editor.
-You can toggle parameters through the **Controls** in the [component's header menu](/concepts-components#component-menus).
+Many **Data Operations** component input parameters are conditional based on the selected **Operation** (`operation`).
| Name | Display Name | Info |
|------|--------------|------|
| data | Data | Input parameter. The `Data` object to operate on. |
-| operations | Operations | Input parameter. The operation to perform on the data. See [Data Operations operations](#data-operations-operations) |
+| operation | Operation | Input parameter. The operation to perform on the data. See [Available data operations](#available-data-operations) |
| select_keys_input | Select Keys | Input parameter. A list of keys to select from the data. |
| filter_key | Filter Key | Input parameter. The key to filter by. |
| operator | Comparison Operator | Input parameter. The operator to apply for comparing values. |
@@ -144,9 +164,8 @@ You can toggle parameters through the **Run component**.
-6. To inspect the filtered data, in the **Smart Filter** component, click **Inspect output**.
-The result is a structured DataFrame.
+1. Create a new flow or use an existing flow.
-```text
-id | name | company | username | email | address | zip
----|------------------|----------------------|-----------------|------------------------------------|-------------------|-------
-1 | Emily Johnson | ABC Corporation | emily_johnson | emily.johnson@abccorporation.com | 123 Main St | 12345
-2 | Michael Williams | XYZ Corp | michael_williams| michael.williams@xyzcorp.com | 456 Elm Ave | 67890
-```
+
+ API response extraction flow example
-7. Add **DataFrame Operations** and **Chat Output** components to the flow.
-8. In the **DataFrame Operations** component, in the **Operation** field, select **Filter**.
-9. To apply a filter, in the **Column Name** field, enter a column to filter on. This example filters by `name`.
-10. Click **Playground**, and then click **Run Flow**.
-The flow extracts the values from the `name` column.
+ The following example flow uses five components to extract `Data` from an API response, transform it to a `DataFrame`, and then perform further processing on the tabular data using a **DataFrame Operations** component.
+ The sixth component, **Chat Output**, is optional in this example.
+ It only serves as a convenient way for you to view the final output in the **Playground**, rather than inspecting the component logs.
-```text
-name
-Emily Johnson
-Michael Williams
-John Smith
-...
-```
+ 
-### Operations
+ If you want to use this example to test the **DataFrame Operations** component, do the following:
-This component can perform the following operations on Pandas [DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html).
+ 1. Create a flow with the following components:
-| Operation | Required Inputs | Info |
-|-----------|----------------|-------------|
-| Add Column | new_column_name, new_column_value | Adds a new column with a constant value. |
-| Drop Column | column_name | Removes a specified column. |
-| Filter | column_name, filter_value | Filters rows based on column value. |
-| Head | num_rows | Returns first `n` rows. |
-| Rename Column | column_name, new_column_name | Renames an existing column. |
-| Replace Value | column_name, replace_value, replacement_value | Replaces values in a column. |
-| Select Columns | columns_to_select | Selects specific columns. |
-| Sort | column_name, ascending | Sorts DataFrame by column. |
-| Tail | num_rows | Returns last `n` rows. |
+ * **API Request**
+ * **Language Model**
+ * **Smart Function**
+ * **Type Convert**
-
-Parameters
+ 2. Configure the [**Smart Function** component](#smart-function) and its dependencies:
-**Inputs**
+ * **API Request**: Configure the [**API Request** component](/components-data#api-request) to get JSON data from an endpoint of your choice, and then connect the **API Response** output to the **Smart Function** component's **Data** input.
+ * **Language Model**: Select your preferred provider and model, and then enter a valid API key.
+ Change the output to **Language Model**, and then connect the `LanguageModel` output to the **Smart Function** component's **Language Model** input.
+ * **Smart Function**: In the **Instructions** field, enter natural language instructions to extract data from the API response.
+ Your instructions depend on the response content and desired outcome.
+ For example, if the response contains a large `result` field, you might provide instructions like `explode the result field out into a Data object`.
-| Name | Display Name | Info |
-|------|--------------|------|
-| df | DataFrame | The input DataFrame to operate on. |
-| operation | Operation | The DataFrame operation to perform. Options include Add Column, Drop Column, Filter, Head, Rename Column, Replace Value, Select Columns, Sort, and Tail. |
-| column_name | Column Name | The column name to use for the operation. |
-| filter_value | Filter Value | The value to filter rows by. |
-| ascending | Sort Ascending | Whether to sort in ascending order. |
-| new_column_name | New Column Name | The new column name when renaming or adding a column. |
-| new_column_value | New Column Value | The value to populate the new column with. |
-| columns_to_select | Columns to Select | A list of column names to select. |
-| num_rows | Number of Rows | The number of rows to return for head/tail operations. The default is 5. |
-| replace_value | Value to Replace | The value to replace in the column. |
-| replacement_value | Replacement Value | The value to replace with. |
+ 3. Convert the **Smart Function** component's `Data` output to `DataFrame`:
-**Outputs**
+ 1. Connect the **Filtered Data** output to the **Type Convert** component's **Data** input.
+ 2. Set the **Type Convert** component's **Output Type** to **DataFrame**.
-| Name | Display Name | Info |
-|------|--------------|------|
-| output | DataFrame | The resulting DataFrame after the operation. |
+ Now the flow is ready for you to add the **DataFrame Operations** component.
-
+
-## LLM router
+2. Add a **DataFrame Operations** component to the flow, and then connect `DataFrame` output from another component to the **DataFrame** input.
-This component routes requests to the most appropriate LLM based on [OpenRouter](https://openrouter.ai/docs/quickstart) model specifications.
+ All operations in the **DataFrame Operations** component require at least one `DataFrame` input from another component.
+ If a component doesn't produce `DataFrame` output, you can use another component, such as the **Type Convert** component, to reformat the data before passing it to the **DataFrame Operations** component.
+ Alternatively, you could consider using a component that is designed to process the original data type, such as the **Parser** or **Data Operations** components.
-The judge LLM analyzed your input message to understand the evaluation context, and then selects the most appropriate model from your LLM pool.
+ If you are following along with the example flow, connect the **Type Convert** component's **DataFrame Output** port to the **DataFrame** input.
-The selected model processes your input and returns the response.
+3. In the **Operations** field, select the operation you want to perform on the incoming `DataFrame`.
+For example, the **Filter** operation filters the rows based on a specified column and value.
-To use the **LLM Router** component in a flow, do the following:
+ :::tip
+ You can select only one operation.
+ If you need to perform multiple operations on the data, you can chain multiple **DataFrame Operations** components together to execute each operation in sequence.
+ For more complex multi-step operations, like dramatic schema changes or pivots, consider using an LLM-powered component, like the **Structured Output** or **Smart Function** component, as a replacement or preparation for the **DataFrame Operations** component.
+ :::
-1. Connect multiple **Language Model** components to the **LLM Router**'s **Language Models** input.
+ If you're following along with the example flow, select any operation that you want to apply to the data that was extracted by the **Smart Function** component.
+ To view the contents of the incoming `DataFrame`, click **Run component** on the **Type Convert** component, and then **Inspect output**.
+ If the `DataFrame` seems malformed, click **Inspect output** on each upstream component to determine where the error occurs, and then modify your flow's configuration as needed.
+ For example, if the **Smart Function** component didn't extract the expected fields, modify your instructions or verify that the given fields are present in the **API Response** output.
-2. Connect a **Judge LLM** component to the **Judge LLM** input.
+4. Configure the operation's parameters.
+The specific parameters depend on the selected operation.
+For example, if you select the **Filter** operation, you must define a filter condition using the **Column Name**, **Filter Value**, and **Filter Operator** parameters.
+For more information, see [DataFrame Operations parameters](#dataframe-operations-parameters)
-3. Connect **Chat Input** and **Chat Output** components to the **LLM Router**.
-The flow looks like this:
+5. To test the flow, click **Run component** on the **DataFrame Operations** component, and then click **Inspect output** to view the new `DataFrame` created from the **Filter** operation.
+
+ If you want to view the output in the **Playground**, connect the **DataFrame Operations** component's output to a **Chat Output** component, rerun the **DataFrame Operations** component, and then click **Playground**.
+
+For another example, see [Conditional looping](/components-logic#conditional-looping).
+
+### DataFrame Operations parameters
+
+Most **DataFrame Operations** parameters are conditional because they only apply to specific operations.
+
+The only permanent parameters are **DataFrame** (`df`), which is the `DataFrame` input, and **Operation** (`operation`), which is the operation to perform on the `DataFrame`.
+Once you select an operation, the conditional parameters for that operation appear on the **DataFrame Operations** component.
+
+
+
+
+The **Add Column** operation allows you to add a new column to the `DataFrame` with a constant value.
+
+The parameters are **New Column Name** (`new_column_name`) and **New Column Value** (`new_column_value`).
+
+
+
+
+The **Drop Column** operation allows you to remove a column from the `DataFrame`, specified by **Column Name** (`column_name`).
+
+
+
+
+The **Filter** operation allows you to filter the `DataFrame` based on a specified condition.
+The output is a `DataFrame` containing only the rows that matched the filter condition.
+
+Provide the following parameters:
+
+* **Column Name** (`column_name`): The name of the column to filter on.
+* **Filter Value** (`filter_value`): The value to filter on.
+* **Filter Operator** (`filter_operator`): The operator to use for filtering, one of `equals` (default), `not equals`, `contains`, `starts with`, `ends with`, `greater than`, or `less than`.
+
+
+
+
+The **Head** operation allows you to retrieve the first `n` rows of the `DataFrame`, where `n` is set in **Number of Rows** (`num_rows`).
+The default is `5`.
+
+The output is a `DataFrame` containing only the selected rows.
+
+
+
+
+The **Rename Column** operation allows you to rename an existing column in the `DataFrame`.
+
+The parameters are **Column Name** (`column_name`), which is the current name, and **New Column Name** (`new_column_name`).
+
+
+
+
+The **Replace Value** operation allows you to replace values in a specific column of the `DataFrame`.
+This operation replaces a target value with a new value.
+All cells matching the target value are replaced with the new value in the new `DataFrame` output.
+
+Provide the following parameters:
+
+* **Column Name** (`column_name`): The name of the column to modify.
+* **Value to Replace** (`replace_value`): The value that you want to replace.
+* **Replacement Value** (`replacement_value`): The new value to use.
+
+
+
+
+The **Select Columns** operation allows you to select one or more specific columns from the `DataFrame`.
+
+Provide a list of column names in **Columns to Select** (`columns_to_select`).
+In the visual editor, click **Add More** to add multiple fields, and then enter one column name in each field.
+
+The output is a `DataFrame` containing only the specified columns.
+
+
+
+
+The **Sort** operation allows you to sort the `DataFrame` on a specific column in ascending or descending order.
+
+Provide the following parameters:
+
+* **Column Name** (`column_name`): The name of the column to sort on.
+* **Sort Ascending** (`ascending`): Whether to sort in ascending or descending order. If enabled (true), sorts in ascending order; if disabled (false), sorts in descending order. Default: Enabled (true)
+
+
+
+
+The **Tail** operation allows you to retrieve the last `n` rows of the `DataFrame`, where `n` is set in **Number of Rows** (`num_rows`).
+The default is `5`.
+
+The output is a `DataFrame` containing only the selected rows.
+
+
+
+
+The **Drop Duplicates** operation removes rows from the `DataFrame` by identifying all duplicate values within a single column.
+
+The only parameter is the **Column Name** (`column_name`).
+
+When the flow runs, all rows with duplicate values in the given column are removed.
+The output is a `DataFrame` containing all columns from the original `DataFrame`, but only rows with non-duplicate values.
+
+
+
+
+## LLM Router
+
+The **LLM Router** component routes requests to the most appropriate LLM based on [OpenRouter](https://openrouter.ai/docs/quickstart) model specifications.
+
+To use the component in a flow, you connect multiple **Language Model** components to the **LLM Router** components.
+One model is the judge LLM that analyzes input messages to understand the evaluation context, selects the most appropriate model from the other attached LLMs, and then routes the input to the selected model.
+The selected model processes the input, and then returns the generated response.
+
+The following example flow has three **Language Model** components.
+One is the judge LLM, and the other two are in the LLM pool for request routing.
+The **Chat Input** and **Chat Output** components create a seamless chat interaction where you send a message and receive a response without any user awareness of the underlying routing.

-4. In the **LLM Router** component, set your **Optimization** preference:
- - **Quality**: Prioritizes the highest quality response.
- - **Speed**: Prioritizes the fastest response time.
- - **Cost**: Prioritizes the most cost-effective option.
- - **Balanced**: Strikes a balance between quality, speed, and cost.
+### LLM Router parameters
-5. Run the flow.
-Your input is the task that the LLM router evaluates the models against, such as `Write a story about horses` or `How do I parse data objects out of JSON?`.
-6. In the **LLM Router** component, select the **Model Selection Decision** output to view the router's reasoning.
+Some **LLM Router** component input parameters are hidden by default in the visual editor.
+You can toggle parameters through the **Controls** in the [component's header menu](/concepts-components#component-menus).
+
+| Name | Display Name | Info |
+|------|--------------|------|
+| `models` | **Language Models** | Input parameter. Connect [`LanguageModel`](/data-types#languagemodel) output from multiple [**Language Model** components](/components-models) to create a pool of models. The `judge_llm` selects models from this pool when routing requests. The first model you connect is the default model if there is a problem with model selection or routing. |
+| `input_value` | **Input** | Input parameter. The incoming query to be routed to the model selected by the judge LLM. |
+| `judge_llm` | **Judge LLM** | Input parameter. Connect `LanguageModel` output from _one_ **Language Model** component to serve as the judge LLM for request routing. |
+| `optimization` | **Optimization** | Input parameter. Set a preferred characteristic for model selection by the judge LLM. The options are `quality` (highest response quality), `speed` (fastest response time), `cost` (most cost-effective model), or `balanced` (equal weight for quality, speed, and cost). Default: `balanced` |
+| `use_openrouter_specs` | **Use OpenRouter Specs** | Input parameter. Whether to fetch model specifications from the OpenRouter API.
+If false, only the model name is provided to the judge LLM. Default: Enabled (true) |
+| `timeout` | **API Timeout** | Input parameter. Set a timeout duration in seconds for API requests made by the router. Default: `10` |
+| `fallback_to_first` | **Fallback to First Model** | Input parameter. Whether to use the first LLM in `models` as a backup if routing fails to reach the selected model. Default: Enabled (true) |
+
+### LLM Router outputs
+
+The **LLM Router** component provides three output options.
+You can set the desired output type near the component's output port.
+
+* **Output**: A `Message` containing the response to the original query as generated by the selected LLM.
+Use this output for regular chat interactions.
+
+* **Selected Model Info**: A `Data` object containing information about the selected model, such as its name and version.
+
+* **Routing Decision**: A `Message` containing the judge model's reasoning for selecting a particular model, including input query length and number of models considered.
+For example:
```text
Model Selection Decision:
@@ -292,51 +418,67 @@ Your input is the task that the LLM router evaluates the models against, such as
- Specifications Source: OpenRouter API
```
+ This is useful for debugging if you feel the judge model isn't selecting the best model.
+
+## Parser {#parser}
+
+The **Parser** component extracts text from structured data (`DataFrame` or `Data`) using a template or direct stringification.
+The output is a `Message` containing the parsed text.
+
+This is a versatile component for data extraction and manipulation in your flows.
+For examples of **Parser** components in flows, see the following:
+
+* [**Batch Run** component](#batch-run)
+* [**Structured Output** component](#structured-output)
+* **Financial Report Parser** template
+* [**Vector Store** components](/components-vector-stores)
+* [Trigger flows with webhooks](/webhook)
+* [Create a vector RAG chatbot](/chat-with-rag)
+
+
+
+### Parsing modes
+
+The **Parser** component has two modes: **Parser** and **Stringify**.
+
+
+
+
+In **Parser** mode, you create a template for text output that can include literal strings and variables for extracted keys.
+
+Use curly braces to define variables anywhere in the template.
+Variables must match keys in the `DataFrame` or `Data` input, such as column names.
+For example, `{name}` extracts the value of a `name` key.
+For more information about the content and structure of `DataFrame` and `Data` objects, see [Langflow data types](/data-types).
+
+When the flow runs, the **Parser** component iterates over the input, producing a `Message` for each parsed item.
+For example, parsing a `DataFrame` creates a `Message` for each row, populated with the unique values from that row.
+
-Parameters
+Employee summary template
-**Inputs**
+This example template extracts employee data into a natural language summary about an employee's hire date and current role:
-| Name | Display Name | Info |
-|------|--------------|------|
-| models | Language Models | A list of LLMs to route between. |
-| input_value | Input | The input message to be routed. |
-| judge_llm | Judge LLM | The LLM that evaluates and selects the most appropriate model. |
-| optimization | Optimization | The optimization preference between quality, speed, cost, or balanced. |
+```text
+{employee_first_name} {employee_last_name} was hired on {start_date}.
+Their current position is {job_title} ({grade}).
+```
-**Outputs**
+The resulting `Message` output replaces the variables with the corresponding extracted values.
+For example:
-| Name | Display Name | Info |
-|------|--------------|------|
-| output | Output | The response from the selected model. |
-| selected_model | Selected Model | The name of the chosen model. |
+```text
+Renlo Kai was hired on 11-July-2017.
+Their current position is Software Engineer (Principal).
+```
-## Parser
+
+Employee profile template
-This component formats `DataFrame` or `Data` objects into text using templates, with an option to convert inputs directly to strings using `stringify`.
+This example template uses Markdown syntax and extracted employee data to create an employee profile:
-To use this component, create variables for values in the `template` the same way you would in a [**Prompt Template** component](/components-prompts). For `DataFrames`, use column names, for example `Name: {Name}`. For `Data` objects, use `{text}`.
-
-To use the **Parser** component with a **Structured Output** component, do the following:
-
-1. Connect a **Structured Output** component's **DataFrame** output to the **Parser** component's **DataFrame** input.
-2. Connect the **File** component to the **Structured Output** component's **Message** input.
-3. Connect the **OpenAI** component's **Language Model** output to the **Structured Output** component's **Language Model** input.
-
-The flow looks like this:
-
-
-
-4. In the **Structured Output** component, click **Open Table**.
-This opens a pane for structuring your table.
-The table contains the rows **Name**, **Description**, **Type**, and **Multiple**.
-5. Create a table that maps to the data you're loading from the **File** loader.
-For example, to create a table for employees, you might have the rows `id`, `name`, and `email`, all of type `string`.
-6. In the **Template** field of the **Parser** component, enter a template for parsing the **Structured Output** component's DataFrame output into structured text.
-Create variables for values in the `template` using curly braces.
-For example, to present a table of employees in Markdown:
```text
# Employee Profile
## Personal Information
@@ -344,40 +486,84 @@ For example, to present a table of employees in Markdown:
- **ID:** {id}
- **Email:** {email}
```
-7. To run the flow, in the **Parser** component, click **Run component**.
-8. To view your parsed text, in the **Parser** component, click **Inspect output**.
-9. Optionally, connect a **Chat Output** component, and open the **Playground** to see the output.
-For an additional example of using the **Parser** component to format a DataFrame from a **Structured Output** component, see the **Market Research** template flow.
-
-
-Parameters
-
-**Inputs**
-
-| Name | Display Name | Info |
-|------|--------------|------|
-| mode | Mode | The tab selection between "Parser" and "Stringify" modes. "Stringify" converts input to a string instead of using a template. |
-| pattern | Template | The template for formatting using variables in curly brackets. For DataFrames, use column names, such as `Name: {Name}`. For Data objects, use `{text}`. |
-| input_data | Data or DataFrame | The input to parse. Accepts either a DataFrame or Data object. |
-| sep | Separator | The string used to separate rows or items. The default is a newline. |
-| clean_data | Clean Data | When stringify is enabled, this option cleans data by removing empty rows and lines. |
-
-**Outputs**
-
-| Name | Display Name | Info |
-|------|--------------|------|
-| parsed_text | Parsed Text | The resulting formatted text as a [Message](/data-types#message) object. |
+When the flow runs, the **Parser** component iterates over each row of the `DataFrame`, populating the template's variables with the appropriate extracted values.
+The resulting text for each row is output as a [`Message`](/data-types#message).
+The following parameters are available in **Parser** mode.
+To view and edit all available parameters, click **Controls** in the [component's header menu](/concepts-components#component-menus).
+
+| Name | Display Name | Info |
+|------|--------------|------|
+| input_data | Data or DataFrame | Input parameter. The `Data` or `DataFrame` input to parse. |
+| pattern | Template | Input parameter. The formatting template using plaintext and variables for keys (`{KEY_NAME}`). See the preceding examples for more information. |
+| sep | Separator | Input parameter. A string defining the separator for rows or lines. Default: `\n` (new line). |
+| clean_data | Clean Data | Whether to remove empty rows and lines in each cell or key of the `DataFrame` or `Data` input. Default: Enabled (true) |
+
+
+
+
+Use **Stringify** mode to convert the entire input directly to text.
+This mode doesn't support templates or key selection.
+
+The following parameters are available in **Stringify** mode.
+To view and edit all available parameters, click **Controls** in the [component's header menu](/concepts-components#component-menus).
+
+| Name | Display Name | Info |
+|------|--------------|------|
+| input_data | Data or DataFrame | Input parameter. The `Data` or `DataFrame` input to parse. |
+| sep | Separator | Input parameter. A string defining the separator for rows or lines. Default: `\n` (new line). |
+| clean_data | Clean Data | Whether to remove empty rows and lines in each cell or key of the `DataFrame` or `Data` input. Default: Enabled (true) |
+
+
+
+
+### Test and troubleshoot parsed text
+
+To test the **Parser** component, click **Run component**, and then click **Inspect output** to see the `Message` output with the parsed text.
+You can also connect a **Chat Output** component if you want to view the output in the **Playground**.
+
+If the `Message` output from the **Parser** component has empty or unexpected values, there might be a mapping error between the input and the parsing mode, the input has empty values, or the input isn't suitable for plaintext extraction.
+
+For example, assume you use the following template to parse a `DataFrame`:
+
+```text
+{employee_first_name} {employee_last_name} is a {job_title} ({grade}).
+```
+
+The following `Message` could result from parsing a row where `employee_first_name` was empty and `grade` was `null`:
+
+```text
+ Smith is a Software Engineer (null).
+```
+
+To troubleshoot missing or unexpected values, you can do the following:
+
+* Make sure the variables in your template map to keys in the incoming `Data` or `DataFrame`.
+To see the data being passed directly to the **Parser** component, click **Inspect output** on the component that is sending data to the **Parser** component.
+
+* Check the source data for missing or incorrect values.
+There are several ways you can address these inconsistencies:
+
+ * Rectify the source data directly.
+ * Use other components to amend or filter anomalies before passing the data to the **Parser** component.
+ There are many components you can use for this depending on your goal, such as the **Data Operations**, **Structured Output**, and **Smart Function** components.
+ * Enable the **Parser** component's **Clean Data** parameter to skip empty rows or lines.
+
## Python Interpreter
This component allows you to execute Python code with imported packages.
+The **Python Interpreter** component can only import packages that are already installed in your Langflow environment.
+If you encounter an `ImportError` when trying to use a package, you need to install it first.
+
+To install custom packages, see [Install custom dependencies](/install-custom-dependencies).
+
### Use the Python Interpreter in a flow
-1. To use this component in a flow,in the **Global Imports** field, add the packages you want to import as a comma-separated list, such as `math,pandas`.
+1. To use this component in a flow, in the **Global Imports** field, add the packages you want to import as a comma-separated list, such as `math,pandas`.
At least one import is required.
2. In the **Python Code** field, enter the Python code you want to execute. Use `print()` to see the output.
3. Optional: Enable **Tool Mode**, and then connect the **Python Interpreter** component to an **Agent** component as a tool.
@@ -446,322 +632,385 @@ If you don't include the package imports in the chat, the agent can still create
| python_code | Code | Input parameter. The Python code to execute. Only modules specified in Global Imports can be used. |
| results | Data | Output parameter. The output of the executed Python code, including any printed results or errors. |
-## Save to File
+## Save File
-This component saves `DataFrame`, `Data`, or `Message` [data types](/data-types) to various file formats.
+The **Save File** component creates a file containing data produced by another component.
+Several file formats are supported, and you can store files in [Langflow storage](/memory) or the local file system.
-1. To use this component in a flow, connect a component that outputs [DataFrames, Data, or Messages](/data-types#data) to the **Save to File** component's input.
-The following example connects a **Webhook** component to two **Save to File** components to demonstrate the different outputs.
+To configure the **Save File** component and use it in a flow, do the following:
-
+1. Connect [`DataFrame`](/data-types#dataframe), [`Data`](/data-types#data), or [`Message`](/data-types#message) output from another component to the **Save File** component's **Input** port.
-2. In the **Save to File** component's **Input Type** field, select the expected input type.
-This example expects **Data** from the **Webhook**.
-3. In the **File Format** field, select the file type for your saved file.
-This example uses `.md` in one **Save to File** component, and `.xlsx` in another.
-4. In the **File Path** field, enter the path for your saved file.
-This example uses `./output/employees.xlsx` and `./output/employees.md` to save the files in a directory relative to where Langflow is running.
-The component accepts both relative and absolute paths, and creates any necessary directories if they don't exist.
-:::tip
-If the `file_format` and `file_path` extension don't align, the component appends the specified format to the file.
-For example, if the `file_format` is `csv`, and the `file_path` is `./output/test.txt`, then the file is saved as `./output/test.txt.csv` so the file isn't corrupted.
-:::
-5. Send a POST request to the **Webhook** containing your JSON data.
-Replace `YOUR_FLOW_ID` with your flow ID.
-This example uses the default Langflow server address.
-```text
-curl -X POST "http://127.0.0.1:7860/api/v1/webhook/YOUR_FLOW_ID" \
--H 'Content-Type: application/json' \
--H 'x-api-key: LANGFLOW_API_KEY' \
--d '{
- "Name": ["Alex Cruz", "Kalani Smith", "Noam Johnson"],
- "Role": ["Developer", "Designer", "Manager"],
- "Department": ["Engineering", "Design", "Management"]
-}'
-```
-6. In your local filesystem, open the `outputs` directory.
-You should see two files created from the data you've sent: one in `.xlsx` for structured spreadsheets, and one in Markdown.
-```text
-| Name | Role | Department |
-|:-------------|:----------|:-------------|
-| Alex Cruz | Developer | Engineering |
-| Kalani Smith | Designer | Design |
-| Noam Johnson | Manager | Management |
-```
+ You can connect the same output to multiple **Save File** components if you want to create multiple files, save the data in different file formats, or save files to multiple locations.
-### File input format options
+2. In **File Name**, enter a file name and an optional path.
-For `DataFrame` and `Data` inputs, the component can create:
- - `csv`
- - `excel`
- - `json`
- - `markdown`
- - `pdf`
+ The **File Name** parameter controls where the file is saved.
+ It can contain a file name or an entire file path:
-For `Message` inputs, the component can create:
- - `txt`
- - `json`
- - `markdown`
- - `pdf`
+ * **Default location**: If you only provide a file name, then the file is stored in `.langflow/data`.
-
-Parameters
+ * **Subdirectory**: To store files in subdirectories, add the path to the **File Name** parameter.
+ For example, `subdirectory/my_file` creates `my_file` in `.langflow/data/subdirectory`.
+ If a given subdirectory doesn't already exist, Langflow automatically creates it.
-**Inputs**
+ * **Absolute or relative path**: To store files elsewhere in your `.langflow` installation or the local file system, provide the absolute or relative path to the desired location.
+ For example, `~/Desktop/my_file` saves `my_file` to the desktop.
-| Name | Display Name | Info |
-|------|--------------|------|
-| input_text | Input Text | The text to analyze and extract patterns from. |
-| pattern | Regex Pattern | The regular expression pattern to match in the text. |
-| input_type | Input Type | The type of input to save. |
-| df | DataFrame | The DataFrame to save. |
-| data | Data | The Data object to save. |
-| message | Message | The Message to save. |
-| file_format | File Format | The file format to save the input in. |
-| file_path | File Path | The full file path including filename and extension. |
+ Don't include an extension in the file name.
+ If you do, the extension is treated as part of the file name; it has no impact on the **File Format** parameter.
-**Outputs**
+3. In the [component's header menu](/concepts-components#component-menus), click **Controls**, select the desired file format, and then click **Close**.
-| Name | Display Name | Info |
-|------|--------------|------|
-| data | Data | A list of extracted matches as Data objects. |
-| text | Message | The extracted matches formatted as a Message object. |
-| confirmation | Confirmation | The confirmation message after saving the file. |
+ The available **File Format** options depend on the input data type:
-
+ * `DataFrame` can be saved to CSV (default), Excel (requires `openpyxl` [custom dependency](/install-custom-dependencies)), JSON (fallback default), or Markdown.
+
+ * `Data` can be saved to CSV, Excel (requires `openpyxl` [custom dependency](/install-custom-dependencies)), JSON (default), or Markdown.
+
+ * `Message` can be saved to TXT, JSON (default), or Markdown.
+
+ :::important Overwrites allowed
+ If you have multiple **Save File** components, in one or more flows, with the same file name, path, and extension, the file contains the data from the most recent run only.
+ Langflow doesn't block overwrites if a matching file already exists.
+ To avoid unintended overwrites, use unique file names and paths.
+ :::
+
+4. To test the **Save File** component, click **Run component**, and then click **Inspect output** to get the filepath where the file was saved.
+
+ The component's literal output is a `Message` containing the original data type, the file name and extension, and the absolute filepath to the file based on the **File Name** parameter.
+ For example:
+
+ ```text
+ DataFrame saved successfully as 'my_file.csv' at /Users/user.name/.langflow/data/my_file.csv
+ ```
+
+ If the **File Name** contains a subdirectory or other non-default path, this is reflected in the `Message` output.
+ For example, a CSV file with the file name `~/Desktop/my_file` could produce the following output:
+
+ ```text
+ DataFrame saved successfully as '/Users/user.name/Desktop/my_file.csv' at /Users/user.name/Desktop/my_file.csv
+ ```
+
+
+5. Optional: If you want to use the saved file in a flow, you must use an API call or another component to retrieve the file from the given filepath.
## Smart Function
In Langflow version 1.5, this component was renamed from **Lambda Filter** to **Smart Function**.
-This component uses an LLM to generate a function for filtering or transforming structured data.
+The **Smart Function** component uses an LLM to generate a Lambda function to filter or transform structured data based on natural language instructions.
+You must connect this component to a [**Language Model** component](/components-models), which is used to generate a function based on the natural language instructions you provide in the **Instructions** parameter.
+The LLM runs the function against the data input, and then outputs the results as [`Data`](/data-types#data).
-To use the **Smart Function** component, you must connect it to a [**Language Model** component](/components-models), which the **Smart Filter** component uses to generate a function based on the natural language instructions in the **Instructions** field.
+:::tip
+Provide brief, clear instructions, focusing on the desired outcome or specific actions, such as `Filter the data to only include items where the 'status' is 'active'`.
+One sentence or less is preferred because end punctuation, like periods, can cause errors or unexpected behavior.
-This example gets JSON data from the `https://jsonplaceholder.typicode.com/users` API endpoint.
-The **Instructions** field in the **Smart Function** component specifies the task `extract emails`.
-The connected LLM creates a filter based on the instructions, and successfully extracts a list of email addresses from the JSON data.
+If you need to provide more details instructions that aren't directly relevant to the Lambda function, you can input them in the **Language Model** component's **Input** field or through a **Prompt Template** component.
+:::
-
+The following example uses the **API Request** endpoint to pass JSON data from the `https://jsonplaceholder.typicode.com/users` endpoint to the **Smart Function** component.
+Then, the **Smart Function** component passes the data and the instruction `extract emails` to the attached **Language Model** component.
+From there, the LLM generates a filter function that extracts email addresses from the JSON data, returning the filtered data as chat output.
-
-Parameters
+
-**Inputs**
+### Smart Function parameters
+
+Some **Smart Function** component input parameters are hidden by default in the visual editor.
+You can toggle parameters through the **Controls** in the [component's header menu](/concepts-components#component-menus).
| Name | Display Name | Info |
|------|--------------|------|
-| data | Data | The structured data to filter or transform using a Lambda function. |
-| llm | Language Model | The connection port for a **Language Model** component. |
-| filter_instruction | Instructions | The natural language instructions for how to filter or transform the data using a Lambda function, such as `Filter the data to only include items where the 'status' is 'active'`. |
-| sample_size | Sample Size | For large datasets, the number of characters to sample from the dataset head and tail. |
-| max_size | Max Size | The number of characters for the data to be considered "large", which triggers sampling by the `sample_size` value. |
-
-**Outputs**
-
-| Name | Display Name | Info |
-|------|--------------|------|
-| filtered_data | Filtered Data | The filtered or transformed `Data` object. |
-| dataframe | DataFrame | The filtered data as a `DataFrame`. |
-
-
+| data | Data | Input parameter. The structured data to filter or transform using a Lambda function. |
+| llm | Language Model | Input parameter. Connect [`LanguageModel`](/data-types#languagemodel) output from a **Language Model** component. |
+| filter_instruction | Instructions | Input parameter. The natural language instructions for how to filter or transform the data. The LLM uses these instructions to create a Lambda function. |
+| sample_size | Sample Size | Input parameter. For large datasets, the number of characters to sample from the dataset head and tail. Only applied if the dataset meets or exceeds `max_size`. Default: `1000`. |
+| max_size | Max Size | Input parameter. The number of characters for the dataset to be considered large, which triggers sampling by the `sample_size` value. Default: `30000`. |
## Split Text
-This component splits text into chunks based on specified criteria. It's ideal for chunking data to be tokenized and embedded into vector databases.
+The **Split Text** component splits data into chunks based on parameters like chunk size and separator.
+It is often used to chunk data to be tokenized and embedded into vector databases.
+For examples, see [Use Vector Store components in a flow](/components-vector-stores#use-vector-store-components-in-a-flow), [Use Embedding Model components in a flow](/components-embedding-models#use-embedding-model-components-in-a-flow), and [Create a Vector RAG chatbot](/chat-with-rag).
-The **Split Text** component outputs **Chunks** or **DataFrame**.
-The **Chunks** output returns a list of individual text chunks.
-The **DataFrame** output returns a structured data format, with additional `text` and `metadata` columns applied.
+
-1. To use this component in a flow, connect a component that outputs [Data or DataFrame](/data-types#data) to the **Split Text** component's **Data** port.
-This example uses the **URL** component, which is fetching JSON placeholder data.
+The component accepts `Message`, `Data`, or `DataFrame`, and then outputs either **Chunks** or **DataFrame**.
+The **Chunks** output returns a list of [`Data`](/data-types#data) objects containing individual text chunks.
+The **DataFrame** output returns the list of chunks as a structured [`DataFrame`](/data-types#dataframe) with additional `text` and `metadata` columns.
-
+### Split Text parameters
-2. In the **Split Text** component, define your data splitting parameters.
+The **Split Text** component's parameters control how the text is split into chunks, specifically the `chunk_size`, `chunk_overlap`, and `separator` parameters.
-This example splits incoming JSON data at the separator `},`, so each chunk contains one JSON object.
+To test the chunking behavior, add a **Text Input** or **File** component with some sample data to chunk, click **Run component** on the **Split Text** component, and then click **Inspect output** to view the list of chunks and their metadata. The **text** column contains the actual text chunks created from your chunking settings.
+If the chunks aren't split as you expect, adjust the parameters, rerun the component, and then inspect the new output.
-The order of precedence is **Separator**, then **Chunk Size**, and then **Chunk Overlap**.
-If any segment after separator splitting is longer than `chunk_size`, it is split again to fit within `chunk_size`.
-
-After `chunk_size`, **Chunk Overlap** is applied between chunks to maintain context.
-
-3. Connect a **Chat Output** component to the **Split Text** component's **DataFrame** output to view its output.
-4. Click **Playground**, and then click **Run Flow**.
-The output contains a table of JSON objects split at `},`.
-```text
-{
-"userId": 1,
-"id": 1,
-"title": "Introduction to Artificial Intelligence",
-"body": "Learn the basics of Artificial Intelligence and its applications in various industries.",
-"link": "https://example.com/article1",
-"comment_count": 8
-},
-{
-"userId": 2,
-"id": 2,
-"title": "Web Development with React",
-"body": "Build modern web applications using React.js and explore its powerful features.",
-"link": "https://example.com/article2",
-"comment_count": 12
-},
-```
-5. Clear the **Separator** field, and then run the flow again.
-Instead of JSON objects, the output contains 50-character lines of text with 10 characters of overlap.
-```text
-First chunk: "title": "Introduction to Artificial Intelligence""
-Second chunk: "elligence", "body": "Learn the basics of Artif"
-Third chunk: "s of Artificial Intelligence and its applications"
-```
-
-
-Parameters
-
-**Inputs**
+Some **Split Text** component input parameters are hidden by default in the visual editor.
+You can toggle parameters through the **Controls** in the [component's header menu](/concepts-components#component-menus).
| Name | Display Name | Info |
|------|--------------|------|
-| data_inputs | Input Documents | The data to split. The component accepts [Data](/data-types#data) or [DataFrame](/data-types#dataframe) objects. |
-| chunk_overlap | Chunk Overlap | The number of characters to overlap between chunks. Default: `200`. |
-| chunk_size | Chunk Size | The maximum number of characters in each chunk. Default: `1000`. |
-| separator | Separator | The character to split on. Default: `newline`. |
-| text_key | Text Key | The key to use for the text column. Default: `text`. |
+| data_inputs | Input | Input parameter. The data to split. Input must be in `Message`, `Data`, or `DataFrame` format. |
+| chunk_overlap | Chunk Overlap | Input parameter. The number of characters to overlap between chunks. This helps maintain context across chunks. When a separator is encountered, the overlap is applied at the point of the separator so that the subsequent chunk contains the last _n_ characters of the preceding chunk. Default: `200`. |
+| chunk_size | Chunk Size | Input parameter. The target length for each chunk after splitting. The data is first split by separator, and then chunks smaller than the `chunk_size` are merged up to this limit. However, if the initial separator split produces any chunks larger than the `chunk_size`, those chunks are neither further subdivided nor combined with any smaller chunks; these chunks will be output as-is even though they exceed the the `chunk_size`. Default: `1000`. See [Tokenization errors due to chunk size](#chunk-size) for important considerations. |
+| separator | Separator | Input parameter. A string defining a character to split on, such as `\n` to split on new line characters, `\n\n` to split at paragraph breaks, or `},` to split at the end of JSON objects. You can directly provide the separator string, or pass a separator string from another component as `Message` input. |
+| text_key | Text Key | Input parameter. The key to use for the text column that is extracted from the input and then split. Default: `text`. |
+| keep_separator | Keep Separator | Input parameter. Select how to handle separators in output chunks. If False, separators are omitted from output chunks. Options include `False` (remove separators), `True` (keep separators in chunks without preference for placement), `Start` (place separators at the beginning of chunks), or `End` (place separators at the end of chunks). Default: `False`. |
-**Outputs**
+### Tokenization errors due to chunk size {#chunk-size}
-| Name | Display Name | Info |
-|------|--------------|------|
-| chunks | Chunks | A list of split text chunks as [Data](/data-types#data) objects. |
-| dataframe | DataFrame | A list of split text chunks as [DataFrame](/data-types#dataframe) objects. |
-
-
+When using **Split Text** with embedding models (especially NVIDIA models like `nvidia/nv-embed-v1`), you may need to use smaller chunk sizes (`500` or less) even though the model supports larger token limits.
+The **Split Text** component doesn't always enforce the exact chunk size you set, and individual chunks may exceed your specified limit.
+If you encounter tokenization errors, modify your text splitting strategy by reducing the chunk size, changing the overlap length, or using a more common separator.
+Then, test your configuration by running the flow and inspecting the component's output.
### Other text splitters
See [LangChain text splitter components](/bundles-langchain#text-splitters).
-## Structured output
+## Structured Output
-This component transforms LLM responses into structured data formats.
+The **Structured Output** component uses an LLM to transform any input into structured data (`Data` or `DataFrame`) using natural language formatting instructions and an output schema definition.
+For example, you can extract specific details from documents, like email messages or scientific papers.
-In this example from the **Financial Support Parser** template, the **Structured Output** component transforms unstructured financial reports into structured data.
+### Use the Structured Output component in a flow
-
+To use the **Structured Output** component in a flow, do the following:
-The connected LLM model is prompted by the **Structured Output** component's `Format Instructions` parameter to extract structured output from the unstructured text. `Format Instructions` is utilized as the system prompt for the **Structured Output** component.
+1. Provide an **Input Message**, which is the source material from which you want to extract structured data.
+This can come from practically any component, but it is typically a **Chat Input**, **File**, or other component that provides some unstructured or semi-structured input.
-In the **Structured Output** component, click the **Open table** button to view the `Output Schema` table.
-The `Output Schema` parameter defines the structure and data types for the model's output using a table with the following fields:
+ :::tip
+ Not all source material has to become structured output.
+ The power of the **Structured Output** component is that you can specify the information you want to extract, even if that data isn't explicitly labeled or an exact keyword match.
+ Then, the LLM can use your instructions to analyze the source material, extract the relevant data, and format it according to your specifications.
+ Any irrelevant source material isn't included in the structured output.
+ :::
-* **Name**: The name of the output field.
-* **Description**: The purpose of the output field.
-* **Type**: The data type of the output field. The available types are `str`, `int`, `float`, `bool`, `list`, or `dict`. The default is `text`.
-* **Multiple (deprecated)**: Always true to allow multiple output values for a single field.
+2. Define **Format Instructions** and an **Output Schema** to specify the data to extract from the source material and how to structure it in the final `Data` or `DataFrame` output.
-The **Parse DataFrame** component parses the structured output into a template for orderly presentation in chat output. The template receives the values from the `output_schema` table with curly braces.
+ The instructions are a prompt that tell the LLM what data to extract, how to format it, how to handle exceptions, and any other instructions relevant to preparing the structured data.
-For example, the template `EBITDA: {EBITDA} , Net Income: {NET_INCOME} , GROSS_PROFIT: {GROSS_PROFIT}` presents the extracted values in the **Playground** as `EBITDA: 900 million , Net Income: 500 million , GROSS_PROFIT: 1.2 billion`.
+ The schema is a table that defines the fields (keys) and data types to organize the data extracted by the LLM into a structured `Data` or `DataFrame` object.
+ For more information, see [Output Schema options](#output-schema-options)
+
+3. Attach a [**Language Model** component](/components-models) that is set to emit [`LanguageModel`](/data-types#languagemodel) output.
+
+ The **Language Model** component uses the **Input Message** and **Format Instructions** from the **Structured Output** component to extract specific pieces of data from the input text.
+ The output schema is applied to the model's response to produce the final `Data` or `DataFrame` structured object.
+
+4. Optional: Typically, the structured output is passed to downstream components that use the extracted data for other processes, such as other **Processing** components like the **Parser** or **Data Operations** components.
+
+
-Parameters
+Structured Output example: Financial Report Parser template
-**Inputs**
+The **Financial Report Parser** template provides an example of how the **Structured Output** component can be used to extract structured data from unstructured text.
-| Name | Type | Description |
-|------|------|-------------|
-| llm | LanguageModel | The language model to use to generate the structured output. |
-| input_value | String | The input message to the language model. |
-| system_prompt | String | The instructions to the language model for formatting the output. |
-| schema_name | String | The name for the output data schema. |
-| output_schema | Table | The structure and data types for the model's output. |
-| multiple | Boolean | [Deprecated] Always true. |
+The template's **Structured Output** component has the following configuration:
-**Outputs**
+* The **Input Message** comes from a **Chat Input** component that is preloaded with quotes from sample financial reports
-| Name | Type | Description |
-|------|------|-------------|
-| structured_output | Data | The structured output is a Data object based on the defined schema. |
+* The **Format Instructions** are as follows:
+
+ ```text
+ You are an AI that extracts structured JSON objects from unstructured text.
+ Use a predefined schema with expected types (str, int, float, bool, dict).
+ Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all.
+ Fill missing or ambiguous values with defaults: null for missing values.
+ Remove exact duplicates but keep variations that have different field values.
+ Always return valid JSON in the expected format, never throw errors.
+ If multiple objects can be extracted, return them all in the structured format.
+ ```
+
+* The **Output Schema** includes keys for `EBITDA`, `NET_INCOME`, and `GROSS_PROFIT`.
+
+The structured `Data` object is passed to a **Parser** component that produces a text string by mapping the schema keys to variables in the parsing template:
+
+```text
+EBITDA: {EBITDA} , Net Income: {NET_INCOME} , GROSS_PROFIT: {GROSS_PROFIT}
+```
+
+When printed to the **Playground**, the resulting `Message` replaces the variables with the actual values extracted by the **Structured Output** component. For example:
+
+```text
+EBITDA: 900 million , Net Income: 500 million , GROSS_PROFIT: 1.2 billion
+```
-## Type convert
+### Structured Output parameters
-This component converts data types between different formats. It can transform data between [`Data`](/data-types#data), [`DataFrame`](/data-types#dataframe), and [`Message`](/data-types#message) objects.
+Some **Structured Output** component input parameters are hidden by default in the visual editor.
+You can toggle parameters through the **Controls** in the [component's header menu](/concepts-components#component-menus).
+
+| Name | Type | Description |
+|------|------|-------------|
+| Language Model (`llm`) | `LanguageModel` | Input parameter. The [`LanguageModel`](/data-types#languagemodel) output from a **Language Model** component that defines the LLM to use to analyze, extract, and prepare the structured output. |
+| Input Message (`input_value`) | String | Input parameter. The input message containing source material for extraction. |
+| Format Instructions (`system_prompt`) | String | Input parameter. The instructions to the language model for extracting and formatting the output. |
+| Schema Name (`schema_name`) | String | Input parameter. An optional title for the **Output Schema**. |
+| Output Schema (`output_schema`)| Table | Input parameter. A table describing the schema of the desired structured output, ultimately determining the content of the `Data` or `DataFrame` output. See [Output Schema options](#output-schema-options). |
+| Structured Output (`structured_output`) | `Data` or `DataFrame` | Output parameter. The final structured output produced by the component. Near the component's output port, you can select the output data type as either **Structured Output Data** or **Structured Output DataFrame**. The specific content and structure of the output depends on the input parameters. |
+
+#### Output Schema options {#output-schema-options}
+
+After the LLM extracts the relevant data from the **Input Message** and **Format Instructions**, the data is organized according to the **Output Schema**.
+
+The schema is a table that defines the fields (keys) and data types for the final `Data` or `DataFrame` output from the **Structured Output** component.
+
+The default schema is a single `field` string.
+
+To add a key to the schema, click **Add a new row**, and then edit each column to define the schema:
+
+* **Name**: The name of the output field. Typically a specific key for which you want to extract a value.
+
+ You can reference these keys as variables in downstream components, such as a **Parser** component's template.
+ For example, the schema key `NET_INCOME` could be referenced by the variable `{NET_INCOME}`.
+
+* **Description**: An optional metadata description of the field's contents and purpose.
+
+* **Type**: The data type of the value stored in the field.
+Supported types are `str` (default), `int`, `float`, `bool`, and `dict`.
+
+* **As List**: Enable this setting if you want the field to contain a list of values rather than a single value.
+
+For simple schemas, you might only extract a few `string` or `int` fields.
+For more complex schemas with lists and dictionaries, it might help to refer to the `Data` and `DataFrame` structures and attributes, as described in [Langflow data types](/data-types).
+You can also emit a rough `Data` or `DataFrame`, and then use downstream components for further refinement, such as a **Data Operations** component.
+
+## Type Convert
+
+The **Type Convert** component converts data from one type to another.
+It supports `Data`, `DataFrame`, and `Message` data types.
+
+
+
+
+A `Data` object is a structured object that contains a primary `text` key and other key-value pairs:
+
+```json
+"data": {
+ "text": "User Profile",
+ "name": "Charlie Lastname",
+ "age": 28,
+ "email": "charlie.lastname@example.com"
+},
+```
+
+The larger context associated with a component's `data` dictionary also identifies which key is the primary `text_key`, and it can provide an optional default value if the primary key isn't specified.
+For example:
-* **Data**: A structured object that contains both text and metadata.
```json
{
- "text": "User Profile",
+ "text_key": "text",
"data": {
- "name": "John Smith",
- "age": 30,
- "email": "john@example.com"
- }
+ "text": "User Profile",
+ "name": "Charlie Lastname",
+ "age": 28,
+ "email": "charlie.lastname@example.com"
+ },
+ "default_value": ""
}
```
-* **DataFrame**: A tabular data structure with rows and columns.
-Keys are columns, and each dictionary (a collection of key-value pairs) in the list is a row.
+
+
+
+
+A `DataFrame` is an array that represents a tabular data structure with rows and columns.
+
+It consists of a list (array) of dictionary objects, where each dictionary represents a row.
+Each key in the dictionaries corresponds to a column name.
+For example, the following `DataFrame` contains two rows with columns for `name`, `age`, and `email`:
+
```json
[
{
- "name": "John Smith",
- "age": 30,
- "email": "john@example.com"
+ "name": "Charlie Lastname",
+ "age": 28,
+ "email": "charlie.lastname@example.com"
},
{
- "name": "Jane Doe",
+ "name": "Bobby Othername",
"age": 25,
- "email": "jane@example.com"
+ "email": "bobby.othername@example.com"
}
]
```
-* **Message**: A string, such as`"Name: John Smith, Age: 30, Email: john@example.com"`.
-To use this component in a flow, do the following:
+
+
-1. Add the **Web Search** component to the **Basic Prompting** template. In the **Search Query** field, enter a query, such as `environmental news`.
-2. Connect the **Web Search** component's output to a component that accepts the `DataFrame` input.
-This example uses a **Prompt Template** component to give the chatbot context, so you must convert the **Web Search** component's `DataFrame` output to a `Message` type.
-3. Connect a **Type Convert** component to convert the `DataFrame` to a `Message`.
-4. In the **Type Convert** component, in the **Output Type** field, select **Message**.
+A `Message` is primarily for passing a `text` string, such as`"Name: Charlie Lastname, Age: 28, Email: charlie.lastname@example.com"`.
+However, the entire `Message` object can include metadata about the message, particularly when used as chat input or output.
- 
+
+
-5. In the **Language Model** component, in the **OpenAI API Key** field, add your OpenAI API key.
-6. Click **Playground**, and then ask about `latest news`.
+For more information, see [Langflow data types](/data-types).
-The search results are returned to the **Playground** as a message.
+### Use the Type Convert component in a flow
-Result:
-```text
-Latest news
-AI
-gpt-4o-mini
-Here are some of the latest news articles related to the environment:
-Ozone Pollution and Global Warming: A recent study highlights that ozone pollution is a significant global environmental concern, threatening human health and crop production while exacerbating global warming. Read more
-...
-```
+The **Type Convert** component is typically used to transform data into a format required by a downstream component.
+For example, if a component outputs a `Message`, but the following component requires `Data`, then you can use the **Type Convert** component to reformat the `Message` as `Data` before passing it to the downstream component.
-
-Parameters
+The following example uses the **Type Convert** component to convert the `DataFrame` output from a **Web Search** component into `Message` data that is passed as text input for an LLM:
-**Inputs**
+1. Create a flow based on the **Basic prompting** template.
+
+2. Add a **Web Search** component to the flow, and then enter a search query, such as `environmental news`.
+
+3. In the **Prompt Template** component, replace the contents of the **Template** field with the following text:
+
+ ```text
+ Answer the user's question using the {context}
+ ```
+
+ The curly braces define a [prompt variable](/components-prompts#define-variables-in-prompts) that becomes an input field on the **Prompt Template** component.
+ In this example, you will use the **context** field to pass the search results into the template, as explained in the next steps.
+
+3. Add a **Type Convert** component to the flow, and then set the **Output Type** to **Message**.
+
+ Because the **Web Search** component's `DataFrame` output is incompatible with the **context** variable's `Message` input, you must use the **Type Convert** component to change the `DataFrame` to a `Message` in order to pass the search results to the **Prompt Template** component.
+
+4. Connect the additional components to the rest of the flow:
+
+ * Connect the **Web Search** component's output to the **Type Convert** component's input.
+ * Connect the **Type Convert** component's output to the **Prompt Template** component's **context** input.
+
+ 
+
+5. In the **Language Model** component, add your OpenAI API key.
+
+ If you want to use a different provider or model, edit the **Model Provider**, **Model Name**, and **API Key** fields accordingly.
+
+6. Click **Playground**, and then ask something relevant to your search query, such as `latest news` or `what's the latest research on the environment?`.
+
+
+ Result
+
+ The LLM uses the search results context, your chat message, and it's built-in training data to respond to your question.
+ For example:
+
+ ```text
+ Here are some of the latest news articles related to the environment:
+ Ozone Pollution and Global Warming: A recent study highlights that ozone pollution is a significant global environmental concern, threatening human health and crop production while exacerbating global warming. Read more
+ ...
+ ```
+
+
+
+### Type Convert parameters
| Name | Display Name | Info |
|------|--------------|------|
-| input_data | Input Data | The data to convert. Accepts Data, DataFrame, or Message objects. |
-| output_type | Output Type | The desired output type. Options: Data, DataFrame, or Message. |
-
-**Outputs**
-
-| Name | Display Name | Info |
-|------|--------------|------|
-| output | Output | The converted data in the specified format. |
-
-
+| input_data | Input Data | Input parameter. The data to convert. Accepts `Data`, `DataFrame`, or `Message` input. |
+| output_type | Output Type | Input parameter. The desired output type, as one of **Data**, **DataFrame** or **Message**. |
+| output | Output | Output parameter. The converted data in the specified format. The output port changes depending on the selected **Output Type**. |
## Legacy Processing components
diff --git a/docs/docs/Components/components-vector-stores.mdx b/docs/docs/Components/components-vector-stores.mdx
index 701163091..09719649f 100644
--- a/docs/docs/Components/components-vector-stores.mdx
+++ b/docs/docs/Components/components-vector-stores.mdx
@@ -4,121 +4,455 @@ slug: /components-vector-stores
---
import Icon from "@site/src/components/icon";
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
-Langflow's **Vector Store** components connect to your vector databases or create in-memory vector stores for storing and retrieving vector data in flows.
+Langflow's **Vector Store** components are used to read and write vector data, including embedding storage, vector search, Graph RAG traversals, and specialized provider-specific search, such as OpenSearch, Elasticsearch, and Vectara.
-Vector databases and **Vector Store** components are specifically designed for storing and retrieving vector data, such as embeddings generated by language models. They are used to perform similarity searches, enabling applications like chatbots to retrieve relevant context from large datasets.
+These components are critical for vector search applications, such as Retrieval Augmented Generation (RAG) chatbots that need to retrieve relevant context from large datasets.
+
+Most of these components connect to a specific vector database provider, but some components support multiple providers or platforms.
+For example, the **Cassandra** vector store component can connect to self-managed Apache Cassandra-based clusters as well as Astra DB, which is a managed Cassandra DBaaS.
Other types of storage, like traditional structured databases and chat memory, are handled through other components like the [**SQL Database** component](/components-data#sql-database) or the [**Message History** component](/components-helpers#message-history).
-## Use a Vector Store component in a flow
+## Use Vector Store components in a flow
:::tip
-For an tutorial using **Vector Store** components in a flow, see [Create a vector RAG chatbot](/chat-with-rag).
+For a tutorial using **Vector Store** components in a flow, see [Create a vector RAG chatbot](/chat-with-rag).
:::
-This example uses the **Chroma DB** vector store component. Your **Vector Store** component's parameters and authentication may be different, but the document ingestion workflow is the same. A document is loaded from a local machine and chunked. The **Vector Store** component generates embeddings with the connected [**Embedding Model** component](/components-embedding-models), and stores them in the connected vector database.
-This vector data can then be retrieved for workloads like Retrieval Augmented Generation (RAG).
+The following steps introduce the use of **Vector Store** components in a flow, including configuration details, how the components work when you run a flow, why you might need multiple **Vector Store** components in one flow, and useful supporting components, such as **Embedding Model** and **Parser** components.
-
+1. Create a flow with the **Vector Store RAG** template.
-The user's chat input is embedded and compared to the vectors embedded during document ingestion for a similarity search.
-The results are output from the **Vector Store** component as a [`Data`](/data-types#data) object and parsed into text.
-This text fills the `{context}` variable in the **Prompt Template** component, which informs the **OpenAI** language model component's responses.
+ This template has two subflows.
+ The **Load Data** subflow loads embeddings and content into a vector database, and the **Retriever** subflow runs a vector search to retrieve relevant context based on a user's query.
-
+2. Configure the database connection for both [**Astra DB** components](#astra-db), or replace them with another pair of **Vector Store** components of your choice.
+Make sure the components connect to the same vector store, and that the component in the **Retriever** subflow is able to run a similarity search.
-### Configure vector store parameters
+ The parameters you set in each **Vector Store** component depend on the component's role in your flow.
+ In this example, the **Load Data** subflow _writes_ to the vector store, whereas the **Retriever** subflow _reads_ from the vector store.
+ Therefore, search-related parameters are only relevant to the **Vector Search** component in the **Retriever** subflow.
-Most **Vector Store** components have the same utility within a flow, but each provider can offer different parameters and functionality.
-Inspect a component's parameters to learn more about the inputs it accepts and how to configure it.
+ For information about specific configuration parameters, see the section of this page for your chosen **Vector Store** component and [Hidden parameters](#hidden-parameters).
+
+3. To configure the embedding model, do one of the following:
+
+ * **Use an OpenAI model**: In both **OpenAI Embeddings** components, enter your OpenAI API key.
+ You can use the default model or select a different OpenAI embedding model.
+
+ * **Use another provider**: Replace the **OpenAI Embeddings** components with another pair of [**Embedding Model** component](/components-embedding-models) of your choice, and then configure the parameters and credentials accordingly.
+
+ * **Use Astra DB vectorize**: If you are using an Astra DB vector store that has a vectorize integration, you can remove both **OpenAI Embeddings** components.
+ If you do this, the vectorize integration automatically generates embeddings from the **Ingest Data** (in the **Load Data** subflow) and **Search Query** (in the **Retriever** subflow).
+
+ :::tip
+ If your vector store already contains embeddings, make sure your **Embedding Model** components use the same model as your previous embeddings.
+ Mixing embedding models in the same vector store can produce inaccurate search results.
+ :::
+
+4. Recommended: In the [**Split Text** component](/components-processing#split-text), optimize the chunking settings for your embedding model.
+For example, if your embedding model has a token limit of 512, then the **Chunk Size** parameter must not exceed that limit.
+
+ Additionally, because the **Retriever** subflow passes the chat input directly to the **Vector Store** component for vector search, make sure that your chat input string doesn't exceed your embedding model's limits.
+ For this example, you can enter a query that is within the limits; however, in a production environment, you might need to implement additional checks or preprocessing steps to ensure compliance.
+ For example, use additional components to prepare the chat input before running the vector search, or enforce chat input limits in your application code.
+
+5. In the **Language Model** component, enter your OpenAI API key, or select a different provider and model to use for the chat portion of the flow.
+
+6. Run the **Load Data** subflow to populate your vector store.
+In the **File** component, select one or more files, and then click **Run component** on the **Vector Store** component in the **Load Data** subflow.
+
+ The **Load Data** subflow loads files from your local machine, chunks them, generates embeddings for the chunks, and then stores the chunks and their embeddings in the vector database.
+
+ 
+
+ The **Load Data** subflow is separate from the **Retriever** subflow because you probably won't run it every time you use the chat.
+ You can run the **Load Data** subflow as needed to preload or update the data in your vector store.
+ Then, your chat interactions only use the components that are necessary for chat.
+
+ If your vector store already contains data that you want to use for vector search, then you don't need to run the **Load Data** subflow.
+
+7. Open the **Playground** and start chatting to run the **Retriever** subflow.
+
+ The **Retriever** subflow generates an embedding from chat input, runs a vector search to retrieve similar content from your vector store, parses the search results into supplemental context for the LLM, and then uses the LLM to generate a natural language response to your query.
+ The LLM uses the vector search results along with its internal training data and tools, such as basic web search and datetime information, to produce the response.
+
+ 
+
+ To avoid passing the entire block of raw search results to the LLM, the **Parser** component extracts `text` strings from the search results `Data` object, and then passes them to the **Prompt Template** component in `Message` format.
+ From there, the strings and other template content are compiled into natural language instructions for the LLM.
+
+ You can use other components for this transformation, such as the **Data Operations** component, depending on how you want to use the search results.
+
+ To view the raw search results, click **Inspect output** on the **Vector Store** component after running the **Retriever** subflow.
+
+### Hidden parameters
+
+You can inspect a **Vector Store** component's parameters to learn more about the inputs it accepts, the features it supports, and how to configure it.
Many input parameters for **Vector Store** components are hidden by default in the visual editor.
You can toggle parameters through the **Controls** in each [component's header menu](/concepts-components#component-menus).
-For details about a specific provider's parameters, see the provider's documentation.
+Some parameters are conditional, and they are only available after you set other parameters or select specific options for other parameters.
+Conditional parameters may not be visible on the **Controls** pane until you set the required dependencies.
+However, all parameters are always listed in a [component's code](/concepts-components#component-code).
-## Astra DB
+For information about a specific component's parameters, see the provider's documentation and the component details.
-This component implements an [Astra DB Serverless vector store](https://docs.datastax.com/en/astra-db-serverless/databases/create-database.html) with search capabilities.
+### Search results output
+
+If you use a **Vector Store** component to query your vector store, it produces search results that you can pass to downstream components in your flow as a list of [`Data`](/data-types#data) objects or a tabular [`DataFrame`](/data-types#dataframe).
+If both types are supported, you can set the format near the component's output port in the visual editor.
+
+The exception to this pattern is the **Vectara RAG** component, which outputs only an `answer` string in [`Message`](/data-types#message) format.
+
+### Vector store instances
+
+Because Langflow is based on LangChain, **Vector Store** components use an instance of [LangChain vector store](https://python.langchain.com/docs/integrations/vectorstores/) to drive the underlying vector search functions.
+In the component code, this is often instantiated as `vector_store`, but some components use a different name, such as the provider name.
+
+For the **Cassandra Graph** and **Astra DB Graph** components, `vector_store` is an instance of [LangChain graph vector store](https://python.langchain.com/api_reference/community/graph_vectorstores.html).
+
+These instances are provider-specific and configured according to the component's parameters.
+For example, the **Redis** component creates an instance of [`RedisVectorStore`](https://python.langchain.com/docs/integrations/vectorstores/redis/) based on the component's parameters, such as the connection string, index name, and schema.
+
+Some LangChain classes don't expose all possible options as component parameters.
+Depending on the provider, these options might use default values or allow modification through environment variables, if they are supported in Langflow.
+For information about specific options, see the LangChain API reference and provider documentation.
-Parameters
+Vector Store Connection ports
-**Inputs**
+The **Astra DB** and **OpenSearch** components have an additional **Vector Store Connection** output.
+This output can only connect to a `VectorStore` input port, and it was intended for use with dedicated Graph RAG components.
-| Name | Display Name | Info |
-|------|--------------|------|
-| token | Astra DB Application Token | An Astra application token with permission to access your vector database. Once the connection is verified, additional fields are populated with your existing databases and collections. |
-| environment | Environment | The environment for the Astra DB API Endpoint. For example, `dev` or `prod`. |
-| database_name | Database | The name of the database that you want this component to connect to, or select **New Database** to create a new database. To create a new database, you must provide the database details, and then wait for the database to initialize. |
-| api_endpoint | Astra DB API Endpoint | The API endpoint for the Astra DB instance. This supersedes the database selection. |
-| collection_name | Collection | The name of the collection that you want to use with this flow, or click **New Collection** to create a new collection. |
-| keyspace | Keyspace | An optional keyspace within Astra DB to use for the collection. |
-| embedding_choice | Embedding Model or Astra Vectorize | Choose an embedding model or use Astra vectorize. If the collection has a vectorize integration, **Astra Vectorize** can be selected automatically. |
-| embedding_model | Embedding Model | Specify the embedding model. Not required if the embedding choice is **Astra Vectorize** because the component automatically uses the integrated model. |
-| number_of_results | Number of Search Results | The number of search results to return. Default:`4`. |
-| search_type | Search Type | The search type to use. The options are `Similarity`, `Similarity with score threshold`, and `MMR (Max Marginal Relevance)`. |
-| search_score_threshold | Search Score Threshold | The minimum similarity score threshold for search results when using the `Similarity with score threshold` option. |
-| advanced_search_filter | Search Metadata Filter | An optional dictionary of filters to apply to the search query. |
-| autodetect_collection | Autodetect Collection | Boolean flag to determine whether to autodetect the collection. |
-| content_field | Content Field | A field to use as the text content field for the vector store. |
-| deletion_field | Deletion Based On Field | When provided, documents in the target collection with metadata field values matching the input metadata field value are deleted before new data is loaded. |
-| ignore_invalid_documents | Ignore Invalid Documents | Boolean flag to determine whether to ignore invalid documents at runtime. |
-| astradb_vectorstore_kwargs | AstraDBVectorStore Parameters | An optional dictionary of additional parameters for the AstraDBVectorStore. |
-
-**Outputs**
-
-| Name | Display Name | Info |
-|------|--------------|------|
-| vector_store | Vector Store | The Astra DB vector store instance configured with the specified parameters. |
-| search_results | Search Results | The results of the similarity search as a list of [Data](/data-types#data) objects. |
+The only non-legacy component that supports this input is the **Graph RAG** component, which was meant as a Graph RAG extension to the **Astra DB** component.
+Instead, you can use the **Astra DB Graph** component that includes both the vector store connection and Graph RAG functionality.
+OpenSearch instances support Graph traversal through built-in RAG functionality and plugins.
-### Generate embeddings
+## Apache Cassandra
-The **Astra DB** component offers two methods for generating embeddings.
+The **Cassandra** and **Cassandra Graph** components can be used with Cassandra clusters that support vector search, including Astra DB.
-* ***Embedding Model**: Use your own embedding model by connecting an [**Embedding Model** component](/components-embedding-models) in Langflow.
+For more information, see the following:
-* **Astra Vectorize**: Use Astra DB's built-in embedding generation service. When creating a new collection, choose the embeddings provider and models, including NVIDIA's `NV-Embed-QA` model hosted by DataStax.
-For more information, see the [Astra DB Serverless documentation](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html).
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [Vector search in Cassandra](https://cassandra.apache.org/doc/latest/cassandra/vector-search/overview.html)
- :::important
- With vectorize, the embedding model you choose when you create a collection cannot be changed later.
- :::
+### Cassandra
-For an example of using the **Astra DB** component with an embedding model, see the **Vector Store RAG** template.
+Use the **Cassandra** component to read or write to a Cassandra vector store using a `CassandraVectorStore` instance.
-### Hybrid search
+
+Cassandra parameters
-The **Astra DB** component includes Astra DB's [hybrid search](https://docs.datastax.com/en/astra-db-serverless/databases/hybrid-search.html) feature through the Astra DB Data API.
+| Name | Type | Description |
+|------|------|-------------|
+| database_ref | String | Input parameter. Contact points for the database or an Astra database ID. |
+| username | String | Input parameter. Username for the database. Leave empty for Astra DB. |
+| token | SecretString | Input parameter. User password for the database or an Astra application token. |
+| keyspace | String | Input parameter. The name of the keyspace containing the vector store specified in **Table Name** (`table_name`). |
+| table_name | String | Input parameter. The name of the table or collection that is the vector store. |
+| ttl_seconds | Integer | Input parameter. Time-to-live for added texts, if supported by the cluster. Only relevant for writes. |
+| batch_size | Integer | Input parameter. Amount of records to process in a single batch. |
+| setup_mode | String | Input parameter. Configuration mode for setting up a Cassandra table. |
+| cluster_kwargs | Dict | Input parameter. Additional keyword arguments for a Cassandra cluster. |
+| search_query | String | Input parameter. Query string for similarity search. Only relevant for reads. |
+| ingest_data | Data | Input parameter. Data to be loaded into the vector store as raw chunks and embeddings. Only relevant for writes. |
+| embedding | Embeddings | Input parameter. Embedding function to use. |
+| number_of_results | Integer | Input parameter. Number of results to return in search. Only relevant for reads. |
+| search_type | String | Input parameter. Type of search to perform. Only relevant for reads. |
+| search_score_threshold | Float | Input parameter. Minimum similarity score for search results. Only relevant for reads. |
+| search_filter | Dict | Input parameter. An optional dictionary of metadata search filters to apply in addition to vector search. Only relevant for reads. |
+| body_search | String | Input parameter. Document textual search terms. Only relevant for reads. |
+| enable_body_search | Boolean | Input parameter. Flag to enable body search. Only relevant for reads. |
-Hybrid search performs a vector similarity search and a lexical search, compares the results of both searches, and then returns the most relevant results overall.
+
-To use hybrid search through the **Astra DB** component, you must [create a collection with that supports hybrid search](https://docs.datastax.com/en/astra-db-serverless/api-reference/collection-methods/create-collection.html#example-hybrid).
+### Cassandra Graph
-The following **Astra DB** component parameters are used for hybrid search:
+The **Cassandra Graph** component uses a `CassandraGraphVectorStore` instance for graph traversal and graph-based document retrieval in a compatible Cassandra cluster.
+It also supports writing to the vector store.
-* **Search Query**: The query to use for vector search.
-* **Lexical Terms**: A comma-separated string of keywords, like `features, data, attributes, characteristics`.
-* **Reranker**: The re-ranker model to use for hybrid search, such as `nvidia/llama-3.2-nv.reranker`.
+
+Cassandra Graph parameters
+
+| Name | Display Name | Info |
+|------|--------------|------|
+| database_ref | Contact Points / Astra Database ID | Input parameter. The contact points for the database or an Astra database ID. Required. |
+| username | Username | Input parameter. The username for the database. Leave empty for Astra DB. |
+| token | Password / Astra DB Token | Input parameter. The user password for the database or an Astra application token. Required. |
+| keyspace | Keyspace | Input parameter. The name of the keyspace containing the vector store specified in **Table Name** (`table_name`). Required. |
+| table_name | Table Name | Input parameter. The name of the table or collection that is the vector store. Required. |
+| setup_mode | Setup Mode | Input parameter. The configuration mode for setting up the Cassandra table. The options are `Sync` (default) or `Off`. |
+| cluster_kwargs | Cluster arguments | Input parameter. An optional dictionary of additional keyword arguments for the Cassandra cluster. |
+| search_query | Search Query | Input parameter. The query string for similarity search. Only relevant for reads. |
+| ingest_data | Ingest Data | Input parameter. Data to be loaded into the vector store as raw chunks and embeddings. Only relevant for writes. |
+| embedding | Embedding | Input parameter. The embedding model to use. |
+| number_of_results | Number of Results | Input parameter. The number of results to return in similarity search. Only relevant for reads. Default: 4. |
+| search_type | Search Type | Input parameter. The search type to use. The options are `Traversal` (default), `MMR Traversal`, `Similarity`, `Similarity with score threshold`, or `MMR (Max Marginal Relevance)`. |
+| depth | Depth of traversal | Input parameter. The maximum depth of edges to traverse. Only relevant if **Search Type** (`search_type`) is `Traversal` or `MMR Traversal`. Default: 1. |
+| search_score_threshold | Search Score Threshold | Input parameter. The minimum similarity score threshold for search results. Only relevant for reads using the `Similarity with score threshold` search type. |
+| search_filter | Search Metadata Filter | Input parameter. An optional dictionary of metadata search filters to apply in addition to graph traversal and similarity search. |
+
+
+
+## Chroma
+
+The **Chroma DB** and **Local DB** components read and write to Chroma vector stores using an instance of `Chroma` vector store.
+Includes support for remote or in-memory instances with or without persistence.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [Chroma documentation](https://docs.trychroma.com/)
+
+### Chroma DB
+
+You can use the **Chroma DB** component to read and write to a Chroma database in local storage or a remote Chroma server with options for persistence and caching.
+When writing, the component can create a new database or collection at the specified location.
+
+:::tip
+An ephemeral (non-persistent) local Chroma vector store is helpful for testing vector search flows where you don't need to retain the database.
+:::
+
+The following example flow uses one **Chroma DB** component for both reads and writes:
+
+* When writing, it splits `Data` from a [**URL** component](/components-data#url) into chunks, computes embeddings with attached **Embedding Model** component, and then loads the chunks and embeddings into the Chroma vector store.
+To trigger writes, click **Run component** on the **Chroma DB** component.
+
+* When reading, it uses chat input to perform a similarity search on the vector store, and then print the search results to the chat.
+To trigger reads, open the **Playground** and enter a chat message.
+
+After running the flow once, you can click **Inspect Output** on each component to understand how the data transformed as it passed from component to component.
+
+
+
+
+Chroma DB parameters
+
+| Name | Type | Description |
+|------|------|-------------|
+| **Collection Name** (`collection_name`) | String | Input parameter. The name of your Chroma vector store collection. Default: `langflow`. |
+| **Persist Directory** (`persist_directory`) | String | Input parameter. To persist the Chroma database, enter a relative or absolute path to a directory to store the `chroma.sqlite3` file. Leave empty for an ephemeral database. When reading or writing to an existing persistent database, specify the path to the persistent directory. |
+| **Ingest Data** (`ingest_data`) | Data or DataFrame | Input parameter. `Data` or `DataFrame` input containing the records to write to the vector store. Only relevant for writes. |
+| **Search Query** (`search_query`) | String | Input parameter. The query to use for vector search. Only relevant for reads. |
+| **Cache Vector Store** (`cache_vector_store`) | Boolean | Input parameter. If true, the component caches the vector store in memory for faster reads. Default: Enabled (true). |
+| **Embedding** (`embedding`) | Embeddings | Input parameter. The embedding function to use for the vector store. By default, Chroma DB uses its built-in embeddings model, or you can attach an **Embedding Model** component to use a different provider or model. |
+| **CORS Allow Origins** (`chroma_server_cors_allow_origins`) | String | Input parameter. The CORS allow origins for the Chroma server. |
+| **Chroma Server Host** (`chroma_server_host`) | String | Input parameter. The host for the Chroma server. |
+| **Chroma Server HTTP Port** (`chroma_server_http_port`) | Integer | Input parameter. The HTTP port for the Chroma server. |
+| **Chroma Server gRPC Port** (`chroma_server_grpc_port`) | Integer | Input parameter. The gRPC port for the Chroma server. |
+| **Chroma Server SSL Enabled** (`chroma_server_ssl_enabled`) | Boolean | Input parameter. Enable SSL for the Chroma server. |
+| **Allow Duplicates** (`allow_duplicates`) | Boolean | Input parameter. If true (default), writes don't check for existing duplicates in the collection, allowing you to store multiple copies of the same content. If false, writes won't add documents that match existing documents already present in the collection. If false, it can strictly enforce deduplication by searching the entire collection or only search the number of records, specified in `limit`. Only relevant for writes.|
+| **Search Type** (`search_type`) | String | Input parameter. The type of search to perform, either `Similarity` or `MMR`. Only relevant for reads. |
+| **Number of Results** (`number_of_results`) | Integer | Input parameter. The number of search results to return. Default: `10`. Only relevant for reads. |
+| **Limit** (`limit`) | Integer | Input parameter. Limit the number of records to compare when **Allow Duplicates** is false. This can help improve performance when writing to large collections, but it can result in some duplicate records. Only relevant for writes. |
+
+
+
+### Local DB
+
+The **Local DB** component reads and writes to a persistent, in-memory Chroma DB instance intended for use with Langflow.
+It has separate modes for reads and writes, automatic collection management, and default persistence in your Langflow cache directory.
+
+
+
+Set the **Mode** parameter to reflect the operation you want the component to perform, and the configure the other parameters accordingly.
+Some parameters are only available for one mode.
+
+
+
+
+To create or write to your local Chroma vector store, use **Ingest** mode.
+
+The following parameters are available in **Ingest** mode:
+
+| Name | Type | Description |
+|------|------|-------------|
+| **Name Your Collection** (`collection_name`) | String | Input parameter. The name for your Chroma vector store collection. Default: `langflow`. Only available in **Ingest** mode. |
+| **Persist Directory** (`persist_directory`) | String | Input parameter. The base directory where you want to create and persist the vector store. If you use the **Local DB** component in multiple flows or to create multiple collections, collections are stored at `$PERSISTENT_DIRECTORY/vector_stores/$COLLECTION_NAME`. If not specified, the default location is your Langflow cache directory (`LANGFLOW_CONFIG_DIR`). For more information, see [Memory management options](/memory). |
+| **Embedding** (`embedding`) | Embeddings | Input parameter. The embedding function to use for the vector store. |
+| **Allow Duplicates** (`allow_duplicates`) | Boolean | Input parameter. If true (default), writes don't check for existing duplicates in the collection, allowing you to store multiple copies of the same content. If false, writes won't add documents that match existing documents already present in the collection. If false, it can strictly enforce deduplication by searching the entire collection or only search the number of records, specified in `limit`. Only available in **Ingest** mode. |
+| **Ingest Data** (`ingest_data`) | Data or DataFrame | Input parameter. The records to write to the collection. Records are embedded and indexed for semantic search. Only available in **Ingest** mode. |
+| **Limit** (`limit`) | Integer | Input parameter. Limit the number of records to compare when **Allow Duplicates** is false. This can help improve performance when writing to large collections, but it can result in some duplicate records. Only available in **Ingest** mode. |
+
+
+
+
+To read from your local Chroma vector store, use **Retrieve** mode.
+
+The following parameters are available in **Retrieve** mode:
+
+| Name | Type | Description |
+|------|------|-------------|
+| **Persist Directory** (`persist_directory`) | String | Input parameter. The base directory where you want to create and persist the vector store. If you use the **Local DB** component in multiple flows or to create multiple collections, collections are stored at `$PERSISTENT_DIRECTORY/vector_stores/$COLLECTION_NAME`. If not specified, the default location is your Langflow cache directory (`LANGFLOW_CONFIG_DIR`). For more information, see [Memory management options](/memory). |
+| **Existing Collections** (`existing_collections`) | String | Input parameter. Select a previously-created collection to search. Only available in **Retrieve** mode. |
+| **Embedding** (`embedding`) | Embeddings | Input parameter. The embedding function to use for the vector store. |
+| **Search Type** (`search_type`) | String | Input parameter. The type of search to perform, either `Similarity` or `MMR`. Only available in **Retrieve** mode. |
+| **Search Query** (`search_query`) | String | Input parameter. Enter a query for similarity search. Only available in **Retrieve** mode. |
+| **Number of Results** (`number_of_results`) | Integer | Input parameter. Number of search results to return. Default: 10. Only available in **Retrieve** mode. |
+
+
+
+
+## Clickhouse
+
+The **Clickhouse** component reads and writes to a Clickhouse vector store using an instance of `Clickhouse` vector store.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [Clickhouse Documentation](https://clickhouse.com/docs/en/intro)
+
+
+Clickhouse parameters
+
+| Name | Display Name | Info |
+|------|--------------|------|
+| host | hostname | Input parameter. The Clickhouse server hostname. Required. Default: `localhost`. |
+| port | port | Input parameter. The Clickhouse server port. Required. Default: `8123`. |
+| database | database | Input parameter. The Clickhouse database name. Required. |
+| table | Table name | Input parameter. The Clickhouse table name. Required. |
+| username | Username | Input parameter. Clickhouse username for authentication. Required. |
+| password | Password | Input parameter. Clickhouse password for authentication. Required. |
+| index_type | index_type | Input parameter. Type of the index, either `annoy` (default) or `vector_similarity`. |
+| metric | metric | Input parameter. Metric to compute distance for similarity search. The options are `angular` (default), `euclidean`, `manhattan`, `hamming`, `dot`. |
+| secure | Use HTTPS/TLS | Input parameter. If true, enables HTTPS/TLS for the Clickhouse server and overrides inferred values for interface or port arguments. Default: false. |
+| index_param | Param of the index | Input parameter. Index parameters. Default: `100,'L2Distance'`. |
+| index_query_params | index query params | Input parameter. Additional index query parameters. |
+| search_query | Search Query | Input parameter. The query string for similarity search. Only relevant for reads. |
+| ingest_data | Ingest Data | Input parameter. The records to load into the vector store. |
+| cache_vector_store | Cache Vector Store | Input parameter. If true, the component caches the vector store in memory for faster reads. Default: Enabled (true). |
+| embedding | Embedding | Input parameter. The embedding model to use. |
+| number_of_results | Number of Results | Input parameter. The number of search results to return. Default: `4`. Only relevant for reads. |
+| score_threshold | Score threshold | Input parameter. The threshold for similarity score comparison. Default: Unset (no threshold). Only relevant for reads. |
+
+
+
+## Couchbase
+
+The **Couchbase** component reads and writes to a Couchbase vector store using an instance of `CouchbaseSearchVectorStore`.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [Couchbase documentation](https://docs.couchbase.com/home/index.html)
+
+
+Couchbase parameters
+
+| Name | Type | Description |
+|------|------|-------------|
+| couchbase_connection_string | SecretString | Input parameter. Couchbase Cluster connection string. Required. |
+| couchbase_username | String | Input parameter. Couchbase username for authentication. Required. |
+| couchbase_password | SecretString | Input parameter. Couchbase password for authentication. Required. |
+| bucket_name | String | Input parameter. Name of the Couchbase bucket. Required. |
+| scope_name | String | Input parameter. Name of the Couchbase scope. Required. |
+| collection_name | String | Input parameter. Name of the Couchbase collection. Required. |
+| index_name | String | Input parameter. Name of the Couchbase index. Required. |
+| ingest_data | Data | Input parameter. The records to load into the vector store. Only relevant for writes. |
+| search_query | String | Input parameter. The query string for vector search. Only relevant for reads. |
+| cache_vector_store | Boolean | Input parameter. If true, the component caches the vector store in memory for faster reads. Default: Enabled (true). |
+| embedding | Embeddings | Input parameter. The embedding function to use for the vector store. |
+| number_of_results | Integer | Input parameter. Maximum number of search results to return. Default: 4. Only relevant for reads. |
+
+
+
+## DataStax
+
+The following components support DataStax vector stores.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [Astra DB Serverless documentation](https://docs.datastax.com/en/astra-db-serverless/index.html)
+* [Hyper-Converged Database (HCD) documentation](https://docs.datastax.com/en/hyper-converged-database/1.2/get-started/get-started-hcd.html)
+
+### Astra DB
+
+The **Astra DB** component read and writes to Astra DB Serverless databases, using an instance of `AstraDBVectorStore` to call the Data API and DevOps API.
+
+:::important
+It is recommend that you create any databases, keyspaces, and collections you need before configuring the **Astra DB** component.
+
+You can create new databases and collections through this component, but this is only possible in the Langflow visual editor, not at runtime, and you must wait while the database or collection initializes before proceeding with flow configuration.
+Additionally, not all database and collection configuration options are available through the **Astra DB** component, such as hybrid search options, PCU groups, vectorize integration management, and multi-region deployments.
+:::
+
+
+Astra DB parameters
+
+| Name | Display Name | Info |
+|------|--------------|------|
+| token | Astra DB Application Token | Input parameter. An Astra application token with permission to access your vector database. Once the connection is verified, additional fields are populated with your existing databases and collections. If you want to create a database through this component, the application token must have Organization Administrator permissions. |
+| environment | Environment | Input parameter. The environment for the Astra DB API endpoint. Always use `prod`. |
+| database_name | Database | Input parameter. The name of the database that you want this component to connect to. Or, you can select **New Database** to create a new database, and then wait for the database to initialize. |
+| keyspace | Keyspace | Input parameter. The keyspace in your database that contains the collection specified in `collection_name`. Default: `default_keyspace`. |
+| collection_name | Collection | Input parameter. The name of the collection that you want to use with this flow. Or, select **New Collection** to create a new collection with limited configuration options. To ensure your collection is configured with the correct embedding provider and search capabilities, it is recommended to create the collection in the Astra Portal or with the Data API *before* configuring this component. For more information, see [Manage collections in Astra DB Serverless](https://docs.datastax.com/en/astra-db-serverless/databases/manage-collections.html). |
+| embedding_model | Embedding Model | Input parameter. Attach an [**Embedding Model** component](/components-embedding-models) to generate embeddings. Only available if the specified collection doesn't have a [vectorize integration](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html). If a vectorize integration exists, the component automatically uses the collection's integrated model. |
+| ingest_data | Ingest Data | Input parameter. The documents to load into the specified collection. |
+| search_query | Search Query | Input parameter. The query string for vector search. |
+| cache_vector_store | Cache Vector Store | Input parameter. Whether to cache the vector store in Langflow memory for faster reads. Default: Enabled (true). |
+| search_method | Search Method | Input parameter. The search methods to use, either `Hybrid Search` or `Vector Search`. Your collection must be configured to support the chosen option, and the default depends on what your collection supports. All collections in Astra DB Serverless (Vector) databases support vector search, but hybrid search requires that you set specific collection settings when creating the collection. These options are only available when creating a collection programmatically. For more information, see [Ways to find data in Astra DB Serverless](https://docs.datastax.com/en/astra-db-serverless/databases/about-search.html) and [Create a collection that supports hybrid search](https://docs.datastax.com/en/astra-db-serverless/api-reference/collection-methods/create-collection.html#example-hybrid). |
+| reranker | Reranker | Input parameter. The re-ranker model to use for hybrid search, depending on the collection configuration. **This parameter shows the default reranker even if the selected collection doesn't support hybrid search.** To verify if a collection supports hybrid search, [get collection metadata](https://docs.datastax.com/en/astra-db-serverless/api-reference/collection-methods/list-collection-metadata.html), and then check that `lexical` and `rerank` both have `"enabled": true`. |
+| lexical_terms | Lexical Terms | Input parameter. A space-separated string of keywords for hybrid search, like `features, data, attributes, characteristics`. This parameter is only available if the collection supports hybrid search. For more information, see the following **Hybrid search example**. |
+| number_of_results | Number of Search Results | Input parameter. The number of search results to return. Default: 4. |
+| search_type | Search Type | Input parameter. The search type to use, either `Similarity` (default), `Similarity with score threshold`, and `MMR (Max Marginal Relevance)`. |
+| search_score_threshold | Search Score Threshold | Input parameter. The minimum similarity score threshold for vector search results with the `Similarity with score threshold` search type. Default: 0. |
+| advanced_search_filter | Search Metadata Filter | Input parameter. An optional dictionary of metadata filters to apply in addition to vector or hybrid search. |
+| autodetect_collection | Autodetect Collection | Input parameter. Whether to automatically fetch a list of available collections after providing an application token and API endpoint. |
+| content_field | Content Field | Input parameter. For writes, this parameter specifies the name of the field in the documents that contains text strings for which you want to generate embeddings. |
+| deletion_field | Deletion Based On Field | Input parameter. When provided, documents in the target collection with metadata field values matching the input metadata field value are deleted before new records are loaded. Use this setting for writes with upserts (overwrites). |
+| ignore_invalid_documents | Ignore Invalid Documents | Input parameter. Whether to ignore invalid documents during writes. If disabled (false), then an error is raised for invalid documents. Default: Enabled (true). |
+| astradb_vectorstore_kwargs | AstraDBVectorStore Parameters | Input parameter. An optional dictionary of additional parameters for the `AstraDBVectorStore` instance. For more information, see [Vector store instances](#vector-store-instances). |
+
+
Hybrid search example
+The **Astra DB** component supports the Data API's [hybrid search](https://docs.datastax.com/en/astra-db-serverless/databases/hybrid-search.html) feature.
+Hybrid search performs a vector similarity search and a lexical search, compares the results of both searches, and then returns the most relevant results overall.
+
To use hybrid search through the **Astra DB** component, do the following:
-1. Create a flow based on the **Hybrid Search RAG** template.
-2. In the **OpenAI** component, add your OpenAI API key.
-3. In the **Astra DB** vector store component, add your **Astra DB Application Token**.
-4. In the **Database** field, select your database.
-5. In the **Collection** field, select or create a collection with hybrid search capabilities enabled.
-6. In the **Playground**, enter a question about your data, such as `What are the features of my data?`
+1. Use the Data API to [create a collection that supports hybrid search](https://docs.datastax.com/en/astra-db-serverless/api-reference/collection-methods/create-collection.html#example-hybrid) if you haven't already created one.
- Your query is sent to the **OpenAI** and **Astra DB** components.
- The **OpenAI** component contains a prompt for creating the lexical query from your input:
+ Although you can create a collection through the **Astra DB** component, you have more control and insight into the collection settings when using the Data API for this operation.
+
+2. Create a flow based on the **Hybrid Search RAG** template, which includes an **Astra DB** component that is pre-configured for hybrid search.
+3. In the **Language Model** components, add your OpenAI API key.
+4. Delete the **Language Model** component that is connected to the **Structured Output** component's **Input Message** port, and then connect the **Chat Input** component to that port.
+5. Configure the **Astra DB** vector store component:
+
+ 1. Enter your Astra DB application token.
+ 2. In the **Database** field, select your database.
+ 3. In the **Collection** field, select your collection with hybrid search enabled.
+
+ Once you select a collection that supports hybrid search, the other parameters automatically update to allow hybrid search options.
+
+6. In the [component's header menu](/concepts-components#component-menus), click **Controls**, find the **Lexical Terms** field, enable the **Show** toggle, and then click **Close**.
+
+7. Connect the first **Parser** component's **Parsed Text** output to the **Astra DB** component's **Lexical Terms** input.
+This input only appears after connecting a collection that support hybrid search with reranking.
+
+8. Click the **Structured Output** component to expose the [component's header menu](/concepts-components#component-menus), click **Controls**, find the **Format Instructions** row, click **Expand**, and then replace the prompt with the following text:
```text
You are a database query planner that takes a user's requests, and then converts to a search against the subject matter in question.
@@ -128,808 +462,522 @@ To use hybrid search through the **Astra DB** component, do the following:
Avoid common keywords associated with the user's subject matter.
```
-7. To view the keywords and questions the **OpenAI** component generates from your collection, in the **OpenAI** component, click **Inspect Output**.
+9. Click **Finish Editing**, and then click **Close** to save your changes to the component.
- ```
- 1. Keywords: features, data, attributes, characteristics
- 2. Question: What characteristics can be identified in my data?
- ```
+10. Open the **Playground**, and then enter a natural language question that you would ask about your database.
-8. To view the [DataFrame](/data-types#dataframe) generated from the **OpenAI** component's response, in the **Structured Output** component, click **Inspect output**.
+ In this example, your input is sent to both the **Astra DB** and **Structured Output** components:
- The DataFrame is passed to a **Parser** component, which parses the contents of the **Keywords** column into a string.
+ * The input sent directly to the **Astra DB** component's **Search Query** port is used as a string for similarity search.
+ An embedding is generated from the query string using the collection's Astra DB vectorize integration.
- This string of comma-separated words is passed to the **Lexical Terms** port of the **Astra DB** component.
- Note that the **Search Query** port of the **Astra DB** component is connected to the **Chat Input** component.
- The search query is vectorized, and both the **Search Query** and **Lexical Terms** content are sent to the reranker at the `find_and_rerank` endpoint.
- The reranker compares the vector search results against the string of terms from the lexical search.
- The highest-ranked results of your hybrid search are returned to the **Playground**.
+ * The input sent to the **Structured Output** component is processed by the **Structured Output**, **Language Model**, and **Parser** components to extract space-separated `keywords` used for the lexical search portion of the hybrid search.
+
+ The complete hybrid search query is executed against your database using the Data API's `find_and_rerank` command.
+ The API's response is output as a `DataFrame` that is transformed into a text string `Message` by another **Parser** component.
+ Finally, the **Chat Output** component prints the `Message` response to the **Playground**.
+
+11. Optional: Exit the **Playground**, and then click **Inspect Output** on each individual component to understand how lexical keywords were constructed and view the raw response from the Data API.
+This is helpful for debugging flows where a certain component isn't receiving input as expected from another component.
+
+ * **Structured Output component**: The output is the `Data` object produced by applying the output schema to the LLM's response to the input message and format instructions.
+ The following example is based on the aforementioned instructions for keyword extraction:
+
+ ```
+ 1. Keywords: features, data, attributes, characteristics
+ 2. Question: What characteristics can be identified in my data?
+ ```
+
+ * **Parser component**: The output is the string of keywords extracted from the structured output `Data`, and then used as lexical terms for the hybrid search.
+
+ * **Astra DB component**: The output is the `DataFrame` containing the results of the hybrid search as returned by the Data API.
-## Astra DB Graph
+### Astra DB Graph
-This component implements a vector store using Astra DB with graph capabilities.
-For more information, see the [Astra DB Serverless documentation](https://docs.datastax.com/en/astra-db-serverless/tutorials/graph-rag.html).
+The **Astra DB Graph** component uses a `AstraDBGraphVectorStore` instance for graph traversal and graph-based document retrieval in an Astra DB collection. It also supports writing to the vector store.
+For more information, see [Build a Graph RAG system with LangChain and GraphRetriever](https://docs.datastax.com/en/astra-db-serverless/tutorials/graph-rag.html).
-Parameters
-
-**Inputs**
+Astra DB Graph parameters
| Name | Display Name | Info |
|------|--------------|------|
-| collection_name | Collection Name | The name of the collection within Astra DB where the vectors are stored. Required. |
-| token | Astra DB Application Token | Authentication token for accessing Astra DB. Required. |
-| api_endpoint | API Endpoint | API endpoint URL for the Astra DB service. Required. |
-| search_input | Search Input | Query string for similarity search. |
-| ingest_data | Ingest Data | Data to be ingested into the vector store. |
-| keyspace | Keyspace | Optional keyspace within Astra DB to use for the collection. |
-| embedding | Embedding Model | Embedding model to use. |
-| metric | Metric | Distance metric for vector comparisons. The options are "cosine", "euclidean", "dot_product". |
-| setup_mode | Setup Mode | Configuration mode for setting up the vector store. The options are "Sync", "Async", "Off". |
-| pre_delete_collection | Pre Delete Collection | Boolean flag to determine whether to delete the collection before creating a new one. |
-| number_of_results | Number of Results | Number of results to return in similarity search. Default: 4. |
-| search_type | Search Type | Search type to use. The options are "Similarity", "Graph Traversal", "Hybrid". |
-| traversal_depth | Traversal Depth | Maximum depth for graph traversal searches. Default: 1. |
-| search_score_threshold | Search Score Threshold | Minimum similarity score threshold for search results. |
-| search_filter | Search Metadata Filter | Optional dictionary of filters to apply to the search query. |
+| token | Astra DB Application Token | Input parameter. An Astra application token with permission to access your vector database. Once the connection is verified, additional fields are populated with your existing databases and collections. If you want to create a database through this component, the application token must have Organization Administrator permissions. |
+| api_endpoint | API Endpoint | Input parameter. Your database's API endpoint. |
+| keyspace | Keyspace | Input parameter. The keyspace in your database that contains the collection specified in `collection_name`. Default: `default_keyspace`. |
+| collection_name | Collection | Input parameter. The name of the collection that you want to use with this flow. For write operations, if a matching collection doesn't exist, a new one is created. |
+| metadata_incoming_links_key | Metadata Incoming Links Key | Input parameter. The metadata key for the incoming links in the vector store. |
+| ingest_data | Ingest Data | Input parameter. Records to load into the vector store. Only relevant for writes. |
+| search_input | Search Query | Input parameter. Query string for similarity search. Only relevant for reads. |
+| cache_vector_store | Cache Vector Store | Input parameter. Whether to cache the vector store in Langflow memory for faster reads. Default: Enabled (true). |
+| embedding_model | Embedding Model | Input parameter. Attach an [**Embedding Model** component](/components-embedding-models) to generate embeddings. If the collection has a [vectorize integration](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html), don't attach an **Embedding Model** component. |
+| metric | Metric | Input parameter. The metrics to use for similarity search calculations, either `cosine` (default), `dot_product`, or `euclidean`. This is a collection setting. |
+| batch_size | Batch Size | Input parameter. Optional number of records to process in a single batch. |
+| bulk_insert_batch_concurrency | Bulk Insert Batch Concurrency | Input parameter. Optional concurrency level for bulk write operations. |
+| bulk_insert_overwrite_concurrency | Bulk Insert Overwrite Concurrency | Input parameter. Optional concurrency level for bulk write operations that allow upserts (overwriting existing records). |
+| bulk_delete_concurrency | Bulk Delete Concurrency | Input parameter. Optional concurrency level for bulk delete operations. |
+| setup_mode | Setup Mode | Input parameter. Configuration mode for setting up the vector store, either `Sync` (default) or `Off`. |
+| pre_delete_collection | Pre Delete Collection | Input parameter. Whether to delete the collection before creating a new one. Default: Disabled (false). |
+| metadata_indexing_include | Metadata Indexing Include | Input parameter. An list of metadata fields to index if you want to enable [selective indexing](https://docs.datastax.com/en/astra-db-serverless/api-reference/collection-indexes.html) *only* when creating a collection. Doesn't apply to existing collections. Only one `*_indexing_*` parameter can be set per collection. If all `*_indexing_*` parameters are unset, then all fields are indexed (default indexing). |
+| metadata_indexing_exclude | Metadata Indexing Exclude | Input parameter. An list of metadata fields to exclude from indexing if you want to enable selective indexing *only* when creating a collection. Doesn't apply to existing collections. Only one `*_indexing_*` parameter can be set per collection. If all `*_indexing_*` parameters are unset, then all fields are indexed (default indexing). |
+| collection_indexing_policy | Collection Indexing Policy | Input parameter. A dictionary to define the indexing policy if you want to enable selective indexing *only* when creating a collection. Doesn't apply to existing collections. Only one `*_indexing_*` parameter can be set per collection. If all `*_indexing_*` parameters are unset, then all fields are indexed (default indexing). The `collection_indexing_policy` dictionary is used when you need to set indexing on subfields or a complex indexing definition that isn't compatible as a list. |
+| number_of_results | Number of Results | Input parameter. Number of search results to return. Default: 4. Only relevant to reads. |
+| search_type | Search Type | Input parameter. Search type to use, either `Similarity`, `Similarity with score threshold`, or `MMR (Max Marginal Relevance)`, `Graph Traversal`, or `MMR (Max Marginal Relevance) Graph Traversal` (default). Only relevant to reads. |
+| search_score_threshold | Search Score Threshold | Input parameter. Minimum similarity score threshold for search results if the `search_type` is `Similarity with score threshold`. Default: 0. |
+| search_filter | Search Metadata Filter | Input parameter. Optional dictionary of metadata filters to apply in addition to vector search. |
-**Outputs**
+
+
+### Graph RAG
+
+The **Graph RAG** component uses an instance of [`GraphRetriever`](https://datastax.github.io/graph-rag/reference/langchain_graph_retriever/) for Graph RAG traversal enabling graph-based document retrieval in an Astra DB vector store.
+For more information, see the [DataStax Graph RAG documentation](https://datastax.github.io/graph-rag/).
+
+:::tip
+This component was meant as a Graph RAG extension for the **Astra DB** vector store component.
+However, the **Astra DB Graph** component includes both the vector store connection and Graph RAG functionality.
+:::
+
+
+Graph RAG parameters
| Name | Display Name | Info |
|------|--------------|------|
-| vector_store | Vector Store | The Graph RAG vector store instance configured with the specified parameters. |
-| search_results | Search Results | The results of the similarity search as a list of [Data](/data-types#data) objects. |
+| embedding_model | Embedding Model | Input parameter. Specify the embedding model to use. Not required if the connected vector store has an [vectorize integration](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html). |
+| vector_store | Vector Store Connection | Input parameter. A [`vector_store`](#vector-store-instances) instance inherited from an **Astra DB** component's **Vector Store Connection** output. |
+| edge_definition | Edge Definition | Input parameter. [Edge definition](https://datastax.github.io/graph-rag/reference/graph_retriever/edges/) for the graph traversal. |
+| strategy | Traversal Strategies | Input parameter. The strategy to use for graph traversal. Strategy options are dynamically loaded from available strategies. |
+| search_query | Search Query | Input parameter. The query to search for in the vector store. |
+| graphrag_strategy_kwargs | Strategy Parameters | Input parameter. Optional dictionary of additional parameters for the [retrieval strategy](https://datastax.github.io/graph-rag/reference/graph_retriever/strategies/). |
+| search_results | **Search Results** or **DataFrame** | Output parameter. The results of the graph-based document retrieval as a list of [`Data`](/data-types#data) objects or as a tabular [`DataFrame`](/data-types#dataframe). You can set the desired output type near the component's output port. |
-## Cassandra
+### Hyper-Converged Database (HCD)
-This component creates a Cassandra vector store with search capabilities.
-For more information, see the [Cassandra documentation](https://cassandra.apache.org/doc/latest/cassandra/vector-search/overview.html).
+The **Hyper-Converged Database (HCD)** component uses your cluster's the Data API server to read and write to an HCD vector store.
+Because the underlying functions call the Data API, which originated from Astra DB, the component uses an instance of `AstraDBVectorStore`.
+
+
+
+For more information about using the Data API with an HCD deployment, see [Get started with the Data API in HCD 1.2](https://docs.datastax.com/en/hyper-converged-database/1.2/api-reference/dataapiclient.html).
-Parameters
-
-**Inputs**
-
-| Name | Type | Description |
-|------|------|-------------|
-| database_ref | String | Contact points for the database or Astra DB database ID. |
-| username | String | Username for the database (leave empty for Astra DB). |
-| token | SecretString | User password for the database or Astra DB token. |
-| keyspace | String | Table or keyspace. |
-| table_name | String | Name of the table or Astra DB collection. |
-| ttl_seconds | Integer | Time-to-live for added texts. |
-| batch_size | Integer | Number of data to process in a single batch. |
-| setup_mode | String | Configuration mode for setting up the Cassandra table. |
-| cluster_kwargs | Dict | Additional keyword arguments for the Cassandra cluster. |
-| search_query | String | Query for similarity search. |
-| ingest_data | Data | Data to be ingested into the vector store. |
-| embedding | Embeddings | Embedding function to use. |
-| number_of_results | Integer | Number of results to return in search. |
-| search_type | String | Type of search to perform. |
-| search_score_threshold | Float | Minimum similarity score for search results. |
-| search_filter | Dict | Metadata filters for search query. |
-| body_search | String | Document textual search terms. |
-| enable_body_search | Boolean | Flag to enable body search. |
-
-**Outputs**
-
-| Name | Type | Description |
-|------|------|-------------|
-| vector_store | Cassandra | The Cassandra vector store instance configured with the specified parameters. |
-| search_results | List[Data] | The results of the similarity search as a list of `Data` objects. |
-
-
-
-## Cassandra Graph
-
-This component implements a Cassandra Graph vector store with search capabilities.
-
-
-Parameters
-
-**Inputs**
+HCD parameters
| Name | Display Name | Info |
|------|--------------|------|
-| database_ref | Contact Points / Astra Database ID | The contact points for the database or Astra DB database ID. Required. |
-| username | Username | The username for the database. Leave this field empty for Astra DB. |
-| token | Password / Astra DB Token | The user password for the database or Astra DB token. Required. |
-| keyspace | Keyspace | The table or keyspace. Required. |
-| table_name | Table Name | The name of the table or Astra DB collection where vectors are stored. Required. |
-| setup_mode | Setup Mode | The configuration mode for setting up the Cassandra table. The options are "Sync" or "Off". Default: "Sync". |
-| cluster_kwargs | Cluster arguments | An optional dictionary of additional keyword arguments for the Cassandra cluster. |
-| search_query | Search Query | The query string for similarity search. |
-| ingest_data | Ingest Data | The list of data to be ingested into the vector store. |
-| embedding | Embedding | The embedding model to use. |
-| number_of_results | Number of Results | The number of results to return in similarity search. Default: 4. |
-| search_type | Search Type | The search type to use. The options are "Traversal", "MMR traversal", "Similarity", "Similarity with score threshold", or "MMR (Max Marginal Relevance)". Default: "Traversal". |
-| depth | Depth of traversal | The maximum depth of edges to traverse. Used for "Traversal" or "MMR traversal" search types. Default: 1. |
-| search_score_threshold | Search Score Threshold | The minimum similarity score threshold for search results. Used for "Similarity with score threshold" search types. |
-| search_filter | Search Metadata Filter | An optional dictionary of filters to apply to the search query. |
-
-**Outputs**
-
-| Name | Display Name | Info |
-|------|--------------|------|
-| vector_store | Vector Store | The Cassandra Graph vector store instance configured with the specified parameters. |
-| search_results | Search Results | The results of the similarity search as a list of [Data](/data-types#data) objects. |
-
-
-
-## Chroma DB
-
-The **Chroma DB** component creates an ephemeral, Chroma vector database with search capabilities that you can use for experimentation and vector storage.
-For more information, see the [Chroma documentation](https://docs.trychroma.com/).
-
-
-Chroma DB sample flow
-
-1. To use this component in a flow, connect it to a component that outputs `Data` or `DataFrame`.
-
- This example splits text from a [**URL** component](/components-data#url), and then computes embeddings with the connected **OpenAI Embeddings** component. Chroma DB computes embeddings by default, but you can connect your own embeddings model, as seen in this example.
-
- 
-
-2. In the **Chroma DB** component, in the **Collection** field, enter a name for your embeddings collection.
-3. Optional: To persist the Chroma database, in the **Persist** field, enter a directory to store the `chroma.sqlite3` file.
-This example uses `./chroma-db` to create a directory relative to where Langflow is running.
-4. To load data and embeddings into your Chroma database, in the **Chroma DB** component, click **Run component**.
-
- :::tip
- When loading duplicate documents, enable the **Allow Duplicates** option in Chroma DB if you want to store multiple copies of the same content, or disable it to automatically deduplicate your data.
- :::
-
-5. To view the split data, in the **Split Text** component, click **Inspect Output**.
-6. To query your loaded data, open the **Playground** and query your database.
-Your input is converted to vector data and compared to the stored vectors in a vector similarity search.
-
-
-
-
-Parameters
-
-**Inputs**
-
-| Name | Type | Description |
-|------------------------------|---------------|--------------------------------------------------|
-| collection_name | String | The name of the Chroma collection. Default: "langflow". |
-| persist_directory | String | The directory to persist the Chroma database. |
-| search_query | String | The query to search for in the vector store. |
-| ingest_data | Data | The data to ingest into the vector store (list of `Data` objects). |
-| embedding | Embeddings | The embedding function to use for the vector store. |
-| chroma_server_cors_allow_origins | String | The CORS allow origins for the Chroma server. |
-| chroma_server_host | String | The host for the Chroma server. |
-| chroma_server_http_port | Integer | The HTTP port for the Chroma server. |
-| chroma_server_grpc_port | Integer | The gRPC port for the Chroma server. |
-| chroma_server_ssl_enabled | Boolean | Enable SSL for the Chroma server. |
-| allow_duplicates | Boolean | Allow duplicate documents in the vector store. |
-| search_type | String | The type of search to perform: "Similarity" or "MMR". |
-| number_of_results | Integer | The number of results to return from the search. Default: `10`. |
-| limit | Integer | The limit of the number of records to compare when `Allow Duplicates` is false. |
-
-**Outputs**
-
-| Name | Type | Description |
-|----------------|---------------|--------------------------------|
-| vector_store | Chroma | The Chroma vector store instance. |
-| search_results | List[Data] | The results of the similarity search as a list of [Data](/data-types#data) objects. |
-
-
-
-## Clickhouse
-
-This component implements a Clickhouse vector store with search capabilities.
-For more information, see the [Clickhouse Documentation](https://clickhouse.com/docs/en/intro).
-
-
-Parameters
-
-**Inputs**
-
-| Name | Display Name | Info |
-|------|--------------|------|
-| host | hostname | The Clickhouse server hostname. Required. Default: "localhost". |
-| port | port | The Clickhouse server port. Required. Default: 8123. |
-| database | database | The Clickhouse database name. Required. |
-| table | Table name | The Clickhouse table name. Required. |
-| username | The ClickHouse user name. | Username for authentication. Required. |
-| password | The password for username. | Password for authentication. Required. |
-| index_type | index_type | Type of the index. The options are "annoy" and "vector_similarity". Default: "annoy". |
-| metric | metric | Metric to compute distance. The options are "angular", "euclidean", "manhattan", "hamming", "dot". Default: "angular". |
-| secure | Use https/TLS | Overrides inferred values from the interface or port arguments. Default: false. |
-| index_param | Param of the index | Index parameters. Default: "'L2Distance',100". |
-| index_query_params | index query params | Additional index query parameters. |
-| search_query | Search Query | The query string for similarity search. |
-| ingest_data | Ingest Data | The data to be ingested into the vector store. |
-| embedding | Embedding | The embedding model to use. |
-| number_of_results | Number of Results | The number of results to return in similarity search. Default: 4. |
-| score_threshold | Score threshold | The threshold for similarity scores. |
-
-**Outputs**
-
-| Name | Display Name | Info |
-|------|--------------|------|
-| vector_store | Vector Store | The Clickhouse vector store. |
-| search_results | Search Results | The results of the similarity search as a list of Data objects. |
-
-
-
-## Couchbase
-
-This component creates a Couchbase vector store with search capabilities.
-For more information, see the [Couchbase documentation](https://docs.couchbase.com/home/index.html).
-
-
-Parameters
-
-**Inputs**
-
-| Name | Type | Description |
-|-------------------------|---------------|--------------------------------------------------|
-| couchbase_connection_string | SecretString | Couchbase Cluster connection string. Required. |
-| couchbase_username | String | Couchbase username. Required. |
-| couchbase_password | SecretString | Couchbase password. Required. |
-| bucket_name | String | Name of the Couchbase bucket. Required. |
-| scope_name | String | Name of the Couchbase scope. Required. |
-| collection_name | String | Name of the Couchbase collection. Required. |
-| index_name | String | Name of the Couchbase index. Required. |
-| search_query | String | The query to search for in the vector store. |
-| ingest_data | Data | The list of data to ingest into the vector store. |
-| embedding | Embeddings | The embedding function to use for the vector store. |
-| number_of_results | Integer | Number of results to return from the search. Default: 4. |
-
-**Outputs**
-
-| Name | Type | Description |
-|----------------|------------------------|--------------------------------|
-| vector_store | CouchbaseVectorStore | A Couchbase vector store instance configured with the specified parameters. |
+| collection_name | Collection Name | Input parameter. The name of a vector store collection in HCD. For write operations, if the collection doesn't exist, then a new one is created. Required. |
+| username | HCD Username | Input parameter. Username for authenticating to your HCD deployment. Default: `hcd-superuser`. Required. |
+| password | HCD Password | Input parameter. Password for authenticating to your HCD deployment. Required. |
+| api_endpoint | HCD API Endpoint | Input parameter. Your deployment's HCD Data API endpoint, formatted as `http[s]://**CLUSTER_HOST**:**GATEWAY_PORT` where `CLUSTER_HOST` is the IP address of any node in your cluster and `GATEWAY_PORT` is the port number ofr your API gateway service. For example, `http://192.0.2.250:8181`. Required. |
+| ingest_data | Ingest Data | Input parameter. Records to load into the vector store. Only relevant for writes. |
+| search_input | Search Input | Input parameter. Query string for similarity search. Only relevant for reads. |
+| namespace | Namespace | Input parameter. The namespace in HCD that contains or will contain the collection specified in `collection_name`. Default: `default_namespace`. |
+| ca_certificate | CA Certificate | Input parameter. Optional CA certificate for TLS connections to HCD. |
+| metric | Metric | Input parameter. The metrics to use for similarity search calculations, either `cosine`, `dot_product`, or `euclidean`. This is a collection setting. If calling an existing collection, leave unset to use the collection's metric. If a write operation creates a new collection, specify the desired similarity metric setting. |
+| batch_size | Batch Size | Input parameter. Optional number of records to process in a single batch. |
+| bulk_insert_batch_concurrency | Bulk Insert Batch Concurrency | Input parameter. Optional concurrency level for bulk write operations. |
+| bulk_insert_overwrite_concurrency | Bulk Insert Overwrite Concurrency | Input parameter. Optional concurrency level for bulk write operations that allow upserts (overwriting existing records). |
+| bulk_delete_concurrency | Bulk Delete Concurrency | Input parameter. Optional concurrency level for bulk delete operations. |
+| setup_mode | Setup Mode | Input parameter. Configuration mode for setting up the vector store, either `Sync` (default), `Async`, or `Off`. |
+| pre_delete_collection | Pre Delete Collection | Input parameter. Whether to delete the collection before creating a new one. |
+| metadata_indexing_include | Metadata Indexing Include | Input parameter. An list of metadata fields to index if you want to enable [selective indexing](https://docs.datastax.com/en/hyper-converged-database/1.2/api-reference/collection-indexes.html) *only* when creating a collection. Doesn't apply to existing collections. Only one `*_indexing_*` parameter can be set per collection. If all `*_indexing_*` parameters are unset, then all fields are indexed (default indexing). |
+| metadata_indexing_exclude | Metadata Indexing Exclude | Input parameter. An list of metadata fields to exclude from indexing if you want to enable selective indexing *only* when creating a collection. Doesn't apply to existing collections. Only one `*_indexing_*` parameter can be set per collection. If all `*_indexing_*` parameters are unset, then all fields are indexed (default indexing). |
+| collection_indexing_policy | Collection Indexing Policy | Input parameter. A dictionary to define the indexing policy if you want to enable selective indexing *only* when creating a collection. Doesn't apply to existing collections. Only one `*_indexing_*` parameter can be set per collection. If all `*_indexing_*` parameters are unset, then all fields are indexed (default indexing). The `collection_indexing_policy` dictionary is used when you need to set indexing on subfields or a complex indexing definition that isn't compatible as a list. |
+| embedding | Embedding or Astra Vectorize | Input parameter. The embedding model to use by attaching an **Embedding Model** component. This component doesn't support additional vectorize authentication headers, so it isn't possible to use a vectorize integration with this component, even if you have enabled one on an existing HCD collection. |
+| number_of_results | Number of Results | Input parameter. Number of search results to return. Default: 4. Only relevant to reads. |
+| search_type | Search Type | Input parameter. Search type to use, either `Similarity` (default), `Similarity with score threshold`, or `MMR (Max Marginal Relevance)`. Only relevant to reads. |
+| search_score_threshold | Search Score Threshold | Input parameter. Minimum similarity score threshold for search results if the `search_type` is `Similarity with score threshold`. Default: 0. |
+| search_filter | Search Metadata Filter | Input parameter. Optional dictionary of metadata filters to apply in addition to vector search. |
## Elasticsearch
-This component creates an Elasticsearch vector store with search capabilities.
-For more information, see the [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html).
+The **Elasticsearch** component reads and writes to an Elasticsearch instance using `ElasticsearchStore`.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html)
-Parameters
-
-**Inputs**
+Elasticsearch parameters
| Name | Type | Description |
|------|------|-------------|
-| es_url | String | Elasticsearch server URL. |
-| es_user | String | Username for Elasticsearch authentication. |
-| es_password | SecretString | Password for Elasticsearch authentication. |
-| index_name | String | Name of the Elasticsearch index. |
-| strategy | String | Strategy for vector search. The options are "approximate_k_nearest_neighbors" or "script_scoring". |
-| distance_strategy | String | Strategy for distance calculation. The options are "COSINE", "EUCLIDEAN_DISTANCE", or "DOT_PRODUCT". |
-| search_query | String | Query for similarity search. |
-| ingest_data | Data | Data to be ingested into the vector store. |
-| embedding | Embeddings | Embedding function to use. |
-| number_of_results | Integer | Number of results to return in search. Default: `4`. |
-
-**Outputs**
-
-| Name | Type | Description |
-|------|------|-------------|
-| vector_store | ElasticsearchStore | The Elasticsearch vector store instance. |
-| search_results | List[Data] | The results of the similarity search as a list of [Data](/data-types#data) objects. |
+| es_url | String | Input parameter. Elasticsearch server URL. |
+| es_user | String | Input parameter. Username for Elasticsearch authentication. |
+| es_password | SecretString | Input parameter. Password for Elasticsearch authentication. |
+| index_name | String | Input parameter. Name of the Elasticsearch index. |
+| strategy | String | Input parameter. Strategy for vector search, either `approximate_k_nearest_neighbors` or `script_scoring`. |
+| distance_strategy | String | Input parameter. Strategy for distance calculation, either `COSINE`, `EUCLIDEAN_DISTANCE`, or `DOT_PRODUCT`. |
+| search_query | String | Input parameter. Query string for similarity search. |
+| ingest_data | Data | Input parameter. Records to load into the vector store. |
+| embedding | Embeddings | Input parameter. The embedding model to use. |
+| number_of_results | Integer | Input parameter. Number of search results to return. Default: 4. |
## FAISS
-This component creates a FAISS vector store with search capabilities.
-For more information, see the [FAISS documentation](https://faiss.ai/index.html).
+The **FAISS** component providese access to the Facebook AI Similarity Search (FAISS) library through an instance of `FAISS` vector store.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [FAISS documentation](https://faiss.ai/index.html)
-Parameters
-
-**Inputs**
+FAISS parameters
| Name | Type | Description |
|---------------------------|---------------|--------------------------------------------------|
-| index_name | String | The name of the FAISS index. Default: "langflow_index". |
-| persist_directory | String | Path to save the FAISS index. It is relative to where Langflow is running. |
-| search_query | String | The query to search for in the vector store. |
-| ingest_data | Data | The list of data to ingest into the vector store. |
-| allow_dangerous_deserialization | Boolean | Set to True to allow loading pickle files from untrusted sources. Default: True. |
-| embedding | Embeddings | The embedding function to use for the vector store. |
-| number_of_results | Integer | Number of results to return from the search. Default: 4. |
-
-**Outputs**
-
-| Name | Display Name | Info |
-|------|--------------|------|
-| vector_store | Vector Store | The FAISS vector store instance configured with the specified parameters. |
-| search_results | Search Results | The results of the similarity search as a list of [Data](/data-types#data) objects. |
-
-
-
-
-## Graph RAG
-
-This component performs Graph RAG traversal in a vector store, enabling graph-based document retrieval.
-For more information, see the [Graph RAG documentation](https://datastax.github.io/graph-rag/).
-
-For an example flow, see the **Graph RAG** template in Langflow.
-
-
-Parameters
-
-**Inputs**
-
-| Name | Display Name | Info |
-|------|--------------|------|
-| embedding_model | Embedding Model | Specify the embedding model. This isn't required for collections embedded with [Astra vectorize](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html). |
-| vector_store | Vector Store Connection | Connection to the vector store. |
-| edge_definition | Edge Definition | Edge definition for the graph traversal. For more information, see the [GraphRAG documentation](https://datastax.github.io/graph-rag/reference/graph_retriever/edges/). |
-| strategy | Traversal Strategies | The strategy to use for graph traversal. Strategy options are dynamically loaded from available strategies. |
-| search_query | Search Query | The query to search for in the vector store. |
-| graphrag_strategy_kwargs | Strategy Parameters | Optional dictionary of additional parameters for the retrieval strategy. For more information, see the [strategy documentation](https://datastax.github.io/graph-rag/reference/graph_retriever/strategies/). |
-
-**Outputs**
-
-| Name | Type | Description |
-|------|------|-------------|
-| search_results | List[Data] | Results of the graph-based document retrieval as a list of [Data](/data-types#data) objects. |
-
-
-
-## Hyper-Converged Database (HCD)
-
-This component implements a vector store using HCD.
-
-To use the HCD vector store, add your deployment's collection name, username, password, and HCD Data API endpoint.
-The endpoint must be formatted like `http[s]://**DOMAIN_NAME** or **IP_ADDRESS**[:port]`, for example, `http://192.0.2.250:8181`.
-
-Replace **DOMAIN_NAME** or **IP_ADDRESS** with the domain name or IP address of your HCD Data API connection.
-
-To use the HCD vector store for embeddings ingestion, connect it to an embeddings model and a file loader.
-
-
-
-
-Parameters
-
-**Inputs**
-
-| Name | Display Name | Info |
-|------|--------------|------|
-| collection_name | Collection Name | The name of the collection within HCD where the vectors will be stored. Required. |
-| username | HCD Username | Authentication username for accessing HCD. Default is "hcd-superuser". Required. |
-| password | HCD Password | Authentication password for accessing HCD. Required. |
-| api_endpoint | HCD API Endpoint | API endpoint URL for the HCD service. Required. |
-| search_input | Search Input | Query string for similarity search. |
-| ingest_data | Ingest Data | Data to be ingested into the vector store. |
-| namespace | Namespace | Optional namespace within HCD to use for the collection. Default is "default_namespace". |
-| ca_certificate | CA Certificate | Optional CA certificate for TLS connections to HCD. |
-| metric | Metric | Optional distance metric for vector comparisons. Options are "cosine", "dot_product", "euclidean". |
-| batch_size | Batch Size | Optional number of data to process in a single batch. |
-| bulk_insert_batch_concurrency | Bulk Insert Batch Concurrency | Optional concurrency level for bulk insert operations. |
-| bulk_insert_overwrite_concurrency | Bulk Insert Overwrite Concurrency | Optional concurrency level for bulk insert operations that overwrite existing data. |
-| bulk_delete_concurrency | Bulk Delete Concurrency | Optional concurrency level for bulk delete operations. |
-| setup_mode | Setup Mode | Configuration mode for setting up the vector store. Options are "Sync", "Async", "Off". Default is "Sync". |
-| pre_delete_collection | Pre Delete Collection | Boolean flag to determine whether to delete the collection before creating a new one. |
-| metadata_indexing_include | Metadata Indexing Include | Optional list of metadata fields to include in the indexing. |
-| embedding | Embedding or Astra Vectorize | Allows either an embedding model or an Astra Vectorize configuration. |
-| metadata_indexing_exclude | Metadata Indexing Exclude | Optional list of metadata fields to exclude from the indexing. |
-| collection_indexing_policy | Collection Indexing Policy | Optional dictionary defining the indexing policy for the collection. |
-| number_of_results | Number of Results | Number of results to return in similarity search. Default is 4. |
-| search_type | Search Type | Search type to use. Options are "Similarity", "Similarity with score threshold", "MMR (Max Marginal Relevance)". Default is "Similarity". |
-| search_score_threshold | Search Score Threshold | Minimum similarity score threshold for search results. Default is 0. |
-| search_filter | Search Metadata Filter | Optional dictionary of filters to apply to the search query. |
-
-**Outputs**
-
-| Name | Type | Description |
-|---------------|--------------|-------------------------------------------|
-| vector_store | HyperConvergedDatabaseVectorStore | The HCD vector store instance. |
-| search_results| List[Data] | The results of the similarity search as a list of [Data](/data-types#data) objects. |
-
-
-
-## Local DB
-
-The **Local DB** component is Langflow's enhanced version of Chroma DB.
-
-The component adds a user-friendly interface with two modes (Ingest and Retrieve), automatic collection management, and built-in persistence in Langflow's cache directory.
-
-Local DB includes **Ingest** and **Retrieve** modes.
-
-The **Ingest** mode works similarly to [ChromaDB](#chroma-db), and persists your database to the Langflow cache directory. The Langflow cache directory location is specified in the `LANGFLOW_CONFIG_DIR` environment variable. For more information, see [Flow storage and logs](/concepts-flows#flow-storage-and-logs).
-
-The **Retrieve** mode can query your **Chroma DB** collections.
-
-
-
-For more information, see the [Chroma documentation](https://docs.trychroma.com/).
-
-
-Parameters
-
-**Inputs**
-
-| Name | Type | Description |
-|------|------|-------------|
-| collection_name | String | The name of the Chroma collection. Default: "langflow". |
-| persist_directory | String | Custom base directory to save the vector store. Collections are stored under `$DIRECTORY/vector_stores/$COLLECTION_NAME`. If not specified, it uses your system's cache folder. |
-| existing_collections | String | Select a previously created collection to search through its stored data. |
-| embedding | Embeddings | The embedding function to use for the vector store. |
-| allow_duplicates | Boolean | If false, the component won't add documents that are already in the vector store. |
-| search_type | String | Type of search to perform: "Similarity" or "MMR". |
-| ingest_data | Data/DataFrame | Data to store. It is embedded and indexed for semantic search. |
-| search_query | String | Enter text to search for similar content in the selected collection. |
-| number_of_results | Integer | Number of results to return. Default: 10. |
-| limit | Integer | Limit the number of records to compare when Allow Duplicates is False. |
-
-**Outputs**
-
-| Name | Type | Description |
-|------|------|-------------|
-| vector_store | Chroma | A local Chroma vector store instance configured with the specified parameters. |
-| search_results | List[Data](/data-types#data) | The results of the similarity search as a list of [Data](/data-types#data) objects. |
+| index_name | String | Input parameter. The name of the FAISS index. Default: "langflow_index". |
+| persist_directory | String | Input parameter. Path to save the FAISS index. It is relative to where Langflow is running. |
+| search_query | String | Input parameter. The query to search for in the vector store. |
+| ingest_data | Data | Input parameter. The list of data to ingest into the vector store. |
+| allow_dangerous_deserialization | Boolean | Input parameter. Set to True to allow loading pickle files from untrusted sources. Default: True. |
+| embedding | Embeddings | Input parameter. The embedding function to use for the vector store. |
+| number_of_results | Integer | Input parameter. Number of results to return from the search. Default: 4. |
## Milvus
-This component creates a Milvus vector store with search capabilities.
-For more information, see the [Milvus documentation](https://milvus.io/docs).
+The **Milvus** component reads and writes to Milvus vector stores using an instance of `Milvus` vector store.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [Milvus documentation](https://milvus.io/docs)
-Parameters
-
-**Inputs**
+Milvus parameters
| Name | Type | Description |
|-------------------------|---------------|--------------------------------------------------|
-| collection_name | String | Name of the Milvus collection. |
-| collection_description | String | Description of the Milvus collection. |
-| uri | String | Connection URI for Milvus. |
-| password | SecretString | Password for Milvus. |
-| username | SecretString | Username for Milvus. |
-| batch_size | Integer | Number of data to process in a single batch. |
-| search_query | String | Query for similarity search. |
-| ingest_data | Data | Data to be ingested into the vector store. |
-| embedding | Embeddings | Embedding function to use. |
-| number_of_results | Integer | Number of results to return in search. |
-| search_type | String | Type of search to perform. |
-| search_score_threshold | Float | Minimum similarity score for search results. |
-| search_filter | Dict | Metadata filters for search query. |
-| setup_mode | String | Configuration mode for setting up the vector store. |
-| vector_dimensions | Integer | Number of dimensions of the vectors. |
-| pre_delete_collection | Boolean | Whether to delete the collection before creating a new one. |
-
-**Outputs**
-
-| Name | Type | Description |
-|----------------|------------------------|--------------------------------|
-| vector_store | Milvus | A Milvus vector store instance configured with the specified parameters. |
+| collection_name | String | Input parameter. Name of the Milvus collection. |
+| collection_description | String | Input parameter. Description of the Milvus collection. |
+| uri | String | Input parameter. Connection URI for Milvus. |
+| password | SecretString | Input parameter. Password for Milvus. |
+| username | SecretString | Input parameter. Username for Milvus. |
+| batch_size | Integer | Input parameter. Number of data to process in a single batch. |
+| search_query | String | Input parameter. Query for similarity search. |
+| ingest_data | Data | Input parameter. Data to be ingested into the vector store. |
+| embedding | Embeddings | Input parameter. Embedding function to use. |
+| number_of_results | Integer | Input parameter. Number of results to return in search. |
+| search_type | String | Input parameter. Type of search to perform. |
+| search_score_threshold | Float | Input parameter. Minimum similarity score for search results. |
+| search_filter | Dict | Input parameter. Metadata filters for search query. |
+| setup_mode | String | Input parameter. Configuration mode for setting up the vector store. |
+| vector_dimensions | Integer | Input parameter. Number of dimensions of the vectors. |
+| pre_delete_collection | Boolean | Input parameter. Whether to delete the collection before creating a new one. |
## MongoDB Atlas
-This component creates a MongoDB Atlas vector store with search capabilities.
-For more information, see the [MongoDB Atlas documentation](https://www.mongodb.com/docs/atlas/atlas-vector-search/tutorials/vector-search-quick-start/).
+The **MongoDB Atlas** component reads and writes to MongoDB Atlas vector stores using an instance of `MongoDBAtlasVectorSearch`.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [MongoDB Atlas documentation](https://www.mongodb.com/docs/atlas/atlas-vector-search/tutorials/vector-search-quick-start/)
-Parameters
+MongoDB Atlas parameters
-**Inputs**
| Name | Type | Description |
| ------------------------- | ------------ | ----------------------------------------- |
-| mongodb_atlas_cluster_uri | SecretString | The connection URI for your MongoDB Atlas cluster. Required. |
-| enable_mtls | Boolean | Enable mutual TLS authentication. Default: false. |
-| mongodb_atlas_client_cert | SecretString | Client certificate combined with private key for mTLS authentication. Required if mTLS is enabled. |
-| db_name | String | The name of the database to use. Required. |
-| collection_name | String | The name of the collection to use. Required. |
-| index_name | String | The name of the Atlas Search index, it should be a Vector Search. Required. |
-| insert_mode | String | How to insert new documents into the collection. The options are "append" or "overwrite". Default: "append". |
-| embedding | Embeddings | The embedding model to use. |
-| number_of_results | Integer | Number of results to return in similarity search. Default: 4. |
-| index_field | String | The field to index. Default: "embedding". |
-| filter_field | String | The field to filter the index. |
-| number_dimensions | Integer | Embedding context length. Default: 1536. |
-| similarity | String | The method used to measure similarity between vectors. The options are "cosine", "euclidean", or "dotProduct". Default: "cosine". |
-| quantization | String | Quantization reduces memory costs by converting 32-bit floats to smaller data types. The options are "scalar" or "binary". |
-
-**Outputs**
-
-| Name | Type | Description |
-| ------------- | ---------------------- | ----------------------------------------- |
-| vector_store | MongoDBAtlasVectorSearch| The MongoDB Atlas vector store instance. |
-| search_results| List[Data] | The results of the similarity search as a list of [Data](/data-types#data) objects. |
+| mongodb_atlas_cluster_uri | SecretString | Input parameter. The connection URI for your MongoDB Atlas cluster. Required. |
+| enable_mtls | Boolean | Input parameter. Enable mutual TLS authentication. Default: false. |
+| mongodb_atlas_client_cert | SecretString | Input parameter. Client certificate combined with private key for mTLS authentication. Required if mTLS is enabled. |
+| db_name | String | Input parameter. The name of the database to use. Required. |
+| collection_name | String | Input parameter. The name of the collection to use. Required. |
+| index_name | String | Input parameter. The name of the Atlas Search index, it should be a Vector Search. Required. |
+| insert_mode | String | Input parameter. How to insert new documents into the collection. The options are "append" or "overwrite". Default: "append". |
+| embedding | Embeddings | Input parameter. The embedding model to use. |
+| number_of_results | Integer | Input parameter. Number of results to return in similarity search. Default: 4. |
+| index_field | String | Input parameter. The field to index. Default: "embedding". |
+| filter_field | String | Input parameter. The field to filter the index. |
+| number_dimensions | Integer | Input parameter. Embedding context length. Default: 1536. |
+| similarity | String | Input parameter. The method used to measure similarity between vectors. The options are "cosine", "euclidean", or "dotProduct". Default: "cosine". |
+| quantization | String | Input parameter. Quantization reduces memory costs by converting 32-bit floats to smaller data types. The options are "scalar" or "binary". |
## OpenSearch
-This component creates an OpenSearch vector store with search capabilities
-For more information, see [OpenSearch documentation](https://opensearch.org/platform/search/vector-database.html).
+The **OpenSearch** component reads and writes to OpenSearch instances using `OpenSearchVectorSearch`.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [OpenSearch documentation](https://opensearch.org/platform/search/vector-database.html)
-Parameters
+OpenSearch parameters
-**Inputs**
-| Name | Type | Description |
-|------------------------|--------------|------------------------------------------------------------------------------------------------------------------------|
-| opensearch_url | String | URL for OpenSearch cluster, such as `https://192.168.1.1:9200`. |
-| index_name | String | The index name where the vectors are stored in OpenSearch cluster. |
-| search_input | String | Enter a search query. Leave empty to retrieve all documents or if hybrid search is being used. |
-| ingest_data | Data | The data to be ingested into the vector store. |
-| embedding | Embeddings | The embedding function to use. |
-| search_type | String | The options are "similarity", "similarity_score_threshold", "mmr". |
-| number_of_results | Integer | The number of results to return in search. |
-| search_score_threshold | Float | The minimum similarity score threshold for search results. |
-| username | String | The username for the opensource cluster. |
-| password | SecretString | The password for the opensource cluster. |
-| use_ssl | Boolean | Use SSL. |
-| verify_certs | Boolean | Verify certificates. |
-| hybrid_search_query | String | Provide a custom hybrid search query in JSON format. This allows you to combine vector similarity and keyword matching. |
-
-**Outputs**
-
-| Name | Type | Description |
-| ------------- |------------------------|---------------------------------------------|
-| vector_store | OpenSearchVectorSearch | OpenSearch vector store instance |
-| search_results| List[Data] | The results of the similarity search as a list of [Data](/data-types#data) objects. |
+| Name | Type | Description |
+|------|------|-------------|
+| opensearch_url | String | Input parameter. URL for OpenSearch cluster, such as `https://192.168.1.1:9200`. |
+| index_name | String | Input parameter. The index name where the vectors are stored in OpenSearch cluster. |
+| search_input | String | Input parameter. Enter a search query. Leave empty to retrieve all documents or if hybrid search is being used. |
+| ingest_data | Data | Input parameter. The data to be ingested into the vector store. |
+| embedding | Embeddings | Input parameter. The embedding function to use. |
+| search_type | String | Input parameter. The options are "similarity", "similarity_score_threshold", "mmr". |
+| number_of_results | Integer | Input parameter. The number of results to return in search. |
+| search_score_threshold | Float | Input parameter. The minimum similarity score threshold for search results. |
+| username | String | Input parameter. The username for the opensource cluster. |
+| password | SecretString | Input parameter. The password for the opensource cluster. |
+| use_ssl | Boolean | Input parameter. Use SSL. |
+| verify_certs | Boolean | Input parameter. Verify certificates. |
+| hybrid_search_query | String | Input parameter. Provide a custom hybrid search query in JSON format. This allows you to combine vector similarity and keyword matching. |
## PGVector
-This component creates a PGVector vector store with search capabilities.
-For more information, see the [PGVector documentation](https://github.com/pgvector/pgvector).
+The **PGVector** component reads and writes to PostgreSQL vector stores using an instance of `PGVector`.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [PGVector documentation](https://github.com/pgvector/pgvector)
-Parameters
-
-**Inputs**
+PGVector parameters
| Name | Type | Description |
| --------------- | ------------ | ----------------------------------------- |
-| pg_server_url | SecretString | The PostgreSQL server connection string. |
-| collection_name | String | The table name for the vector store. |
-| search_query | String | The query for similarity search. |
-| ingest_data | Data | The data to be ingested into the vector store. |
-| embedding | Embeddings | The embedding function to use. |
-| number_of_results | Integer | The number of results to return in search. |
-
-**Outputs**
-
-| Name | Display Name | Info |
-|------|--------------|------|
-| vector_store | Vector Store | The PGVector vector store instance configured with the specified parameters. |
-| search_results | Search Results | The results of the similarity search as a list of [Data](/data-types#data) objects. |
+| pg_server_url | SecretString | Input parameter. The PostgreSQL server connection string. |
+| collection_name | String | Input parameter. The table name for the vector store. |
+| search_query | String | Input parameter. The query for similarity search. |
+| ingest_data | Data | Input parameter. The data to be ingested into the vector store. |
+| embedding | Embeddings | Input parameter. The embedding function to use. |
+| number_of_results | Integer | Input parameter. The number of results to return in search. |
## Pinecone
-This component creates a Pinecone vector store with search capabilities.
-For more information, see the [Pinecone documentation](https://docs.pinecone.io/home).
+The **Pinecone** component reads and writes to Pinecone vector stores using an instance of `PineconeVectorStore`.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [Pinecone documentation](https://docs.pinecone.io/home)
-Parameters
-
-**Inputs**
+Pinecone parameters
| Name | Type | Description |
| ----------------- | ------------ | ----------------------------------------- |
-| index_name | String | The name of the Pinecone index. |
-| namespace | String | The namespace for the index. |
-| distance_strategy | String | The strategy for calculating distance between vectors. |
-| pinecone_api_key | SecretString | The API key for Pinecone. |
-| text_key | String | The key in the record to use as text. |
-| search_query | String | The query for similarity search. |
-| ingest_data | Data | The data to be ingested into the vector store. |
-| embedding | Embeddings | The embedding function to use. |
-| number_of_results | Integer | The number of results to return in search. |
-
-**Outputs**
-
-| Name | Display Name | Info |
-|------|--------------|------|
-| vector_store | Vector Store | The Pinecone vector store instance configured with the specified parameters. |
-| search_results | Search Results | The results of the similarity search as a list of [Data](/data-types#data) objects. |
+| index_name | String | Input parameter. The name of the Pinecone index. |
+| namespace | String | Input parameter. The namespace for the index. |
+| distance_strategy | String | Input parameter. The strategy for calculating distance between vectors. |
+| pinecone_api_key | SecretString | Input parameter. The API key for Pinecone. |
+| text_key | String | Input parameter. The key in the record to use as text. |
+| search_query | String | Input parameter. The query for similarity search. |
+| ingest_data | Data | Input parameter. The data to be ingested into the vector store. |
+| embedding | Embeddings | Input parameter. The embedding function to use. |
+| number_of_results | Integer | Input parameter. The number of results to return in search. |
## Qdrant
-This component creates a Qdrant vector store with search capabilities.
-For more information, see the [Qdrant documentation](https://qdrant.tech/documentation/).
+The **Qdrant** component reads and writes to Qdrant vector stores using an instance of `QdrantVectorStore`.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [Qdrant documentation](https://qdrant.tech/documentation/)
-Parameters
-
-**Inputs**
+Qdrant parameters
| Name | Type | Description |
| -------------------- | ------------ | ----------------------------------------- |
-| collection_name | String | The name of the Qdrant collection. |
-| host | String | The Qdrant server host. |
-| port | Integer | The Qdrant server port. |
-| grpc_port | Integer | The Qdrant gRPC port. |
-| api_key | SecretString | The API key for Qdrant. |
-| prefix | String | The prefix for Qdrant. |
-| timeout | Integer | The timeout for Qdrant operations. |
-| path | String | The path for Qdrant. |
-| url | String | The URL for Qdrant. |
-| distance_func | String | The distance function for vector similarity. |
-| content_payload_key | String | The content payload key. |
-| metadata_payload_key | String | The metadata payload key. |
-| search_query | String | The query for similarity search. |
-| ingest_data | Data | The data to be ingested into the vector store. |
-| embedding | Embeddings | The embedding function to use. |
-| number_of_results | Integer | The number of results to return in search. |
-
-**Outputs**
-
-| Name | Type | Description |
-| ------------- | -------- | ----------------------------------------- |
-| vector_store | Qdrant | A Qdrant vector store instance. |
-| search_results| List[Data] | The results of the similarity search as a list of [Data](/data-types#data) objects. |
+| collection_name | String | Input parameter. The name of the Qdrant collection. |
+| host | String | Input parameter. The Qdrant server host. |
+| port | Integer | Input parameter. The Qdrant server port. |
+| grpc_port | Integer | Input parameter. The Qdrant gRPC port. |
+| api_key | SecretString | Input parameter. The API key for Qdrant. |
+| prefix | String | Input parameter. The prefix for Qdrant. |
+| timeout | Integer | Input parameter. The timeout for Qdrant operations. |
+| path | String | Input parameter. The path for Qdrant. |
+| url | String | Input parameter. The URL for Qdrant. |
+| distance_func | String | Input parameter. The distance function for vector similarity. |
+| content_payload_key | String | Input parameter. The content payload key. |
+| metadata_payload_key | String | Input parameter. The metadata payload key. |
+| search_query | String | Input parameter. The query for similarity search. |
+| ingest_data | Data | Input parameter. The data to be ingested into the vector store. |
+| embedding | Embeddings | Input parameter. The embedding function to use. |
+| number_of_results | Integer | Input parameter. The number of results to return in search. |
## Redis
-This component creates a Redis vector store with search capabilities.
-For more information, see the [Redis documentation](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/vectors/).
+The **Redis** component reads and writes to Redis vector stores using an instance of `Redis` vector store.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [Redis documentation](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/vectors/)
-Parameters
-
-**Inputs**
+Redis parameters
| Name | Type | Description |
| ----------------- | ------------ | ----------------------------------------- |
-| redis_server_url | SecretString | The Redis server connection string. |
-| redis_index_name | String | The name of the Redis index. |
-| code | String | The custom code for Redis (advanced). |
-| schema | String | The schema for Redis index. |
-| search_query | String | The query for similarity search. |
-| ingest_data | Data | The data to be ingested into the vector store. |
-| number_of_results | Integer | The number of results to return in search. |
-| embedding | Embeddings | The embedding function to use. |
-
-**Outputs**
-
-| Name | Type | Description |
-| ------------- | -------- | ----------------------------------------- |
-| vector_store | Redis | Redis vector store instance |
-| search_results| List[Data] | The results of the similarity search as a list of [Data](/data-types#data) objects. |
+| redis_server_url | SecretString | Input parameter. The Redis server connection string. |
+| redis_index_name | String | Input parameter. The name of the Redis index. |
+| code | String | Input parameter. The custom code for Redis (advanced). |
+| schema | String | Input parameter. The schema for Redis index. |
+| search_query | String | Input parameter. The query for similarity search. |
+| ingest_data | Data | Input parameter. The data to be ingested into the vector store. |
+| number_of_results | Integer | Input parameter. The number of results to return in search. |
+| embedding | Embeddings | Input parameter. The embedding function to use. |
## Supabase
-This component creates a connection to a Supabase vector store with search capabilities.
-For more information, see the [Supabase documentation](https://supabase.com/docs/guides/ai).
+The **Supabase** component reads and writes to Supabase vector stores using an instance of `SupabaseVectorStore`.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [Supabase documentation](https://supabase.com/docs/guides/ai)
-Parameters
-
-**Inputs**
+Supabase parameters
| Name | Type | Description |
| ------------------- | ------------ | ----------------------------------------- |
-| supabase_url | String | The URL of the Supabase instance. |
-| supabase_service_key| SecretString | The service key for Supabase authentication. |
-| table_name | String | The name of the table in Supabase. |
-| query_name | String | The name of the query to use. |
-| search_query | String | The query for similarity search. |
-| ingest_data | Data | The data to be ingested into the vector store. |
-| embedding | Embeddings | The embedding function to use. |
-| number_of_results | Integer | The number of results to return in search. |
-
-**Outputs**
-
-| Name | Type | Description |
-| ------------- | ------------------ | ----------------------------------------- |
-| vector_store | SupabaseVectorStore | A Supabase vector store instance. |
-| search_results| List[Data] | The results of the similarity search as a list of [Data](/data-types#data) objects. |
+| supabase_url | String | Input parameter. The URL of the Supabase instance. |
+| supabase_service_key| SecretString | Input parameter. The service key for Supabase authentication. |
+| table_name | String | Input parameter. The name of the table in Supabase. |
+| query_name | String | Input parameter. The name of the query to use. |
+| search_query | String | Input parameter. The query for similarity search. |
+| ingest_data | Data | Input parameter. The data to be ingested into the vector store. |
+| embedding | Embeddings | Input parameter. The embedding function to use. |
+| number_of_results | Integer | Input parameter. The number of results to return in search. |
## Upstash
-This component creates an Upstash vector store with search capabilities.
-For more information, see the [Upstash documentation](https://upstash.com/docs/introduction).
+The **Upstash** component reads and writes to Upstash vector stores using an instance of `UpstashVectorStore`.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [Upstash documentation](https://upstash.com/docs/introduction)
-Parameters
-
-**Inputs**
+Upstash parameters
| Name | Type | Description |
| --------------- | ------------ | ----------------------------------------- |
-| index_url | String | The URL of the Upstash index. |
-| index_token | SecretString | The token for the Upstash index. |
-| text_key | String | The key in the record to use as text. |
-| namespace | String | The namespace for the index. |
-| search_query | String | The query for similarity search. |
-| metadata_filter | String | Filter documents by metadata. |
-| ingest_data | Data | The data to be ingested into the vector store. |
-| embedding | Embeddings | The embedding function to use. |
-| number_of_results | Integer | The number of results to return in search. |
-
-**Outputs**
-
-| Name | Type | Description |
-| ------------- | ---------------- | ----------------------------------------- |
-| vector_store | UpstashVectorStore| An Upstash vector store instance. |
-| search_results| List[Data] | The results of the similarity search as a list of [Data](/data-types#data) objects. |
+| index_url | String | Input parameter. The URL of the Upstash index. |
+| index_token | SecretString | Input parameter. The token for the Upstash index. |
+| text_key | String | Input parameter. The key in the record to use as text. |
+| namespace | String | Input parameter. The namespace for the index. |
+| search_query | String | Input parameter. The query for similarity search. |
+| metadata_filter | String | Input parameter. Filter documents by metadata. |
+| ingest_data | Data | Input parameter. The data to be ingested into the vector store. |
+| embedding | Embeddings | Input parameter. The embedding function to use. |
+| number_of_results | Integer | Input parameter. The number of results to return in search. |
-## Vectara
+## Vectara Platform
-This component creates a Vectara vector store with search capabilities.
-For more information, see the [Vectara documentation](https://docs.vectara.com/docs/).
+The **Vectara** and **Vectara RAG** components support Vectara vector store, search, and RAG functionality using instances of `Vectara` vector store.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Vector store instances](#vector-store-instances)
+* [Vectara documentation](https://docs.vectara.com/docs/)
+
+### Vectara
+
+The **Vectara** component reads and writes to Vectara vector stores, and then produces [search results output](#search-results-output).
-Parameters
-
-**Inputs**
+Vectara parameters
| Name | Type | Description |
| ---------------- | ------------ | ----------------------------------------- |
-| vectara_customer_id | String | The Vectara customer ID. |
-| vectara_corpus_id | String | The Vectara corpus ID. |
-| vectara_api_key | SecretString | The Vectara API key. |
-| embedding | Embeddings | The embedding function to use (optional). |
-| ingest_data | List[Document/Data] | The data to be ingested into the vector store. |
-| search_query | String | The query for similarity search. |
-| number_of_results | Integer | The number of results to return in search. |
-
-**Outputs**
-
-| Name | Type | Description |
-| ------------- | ----------------- | ----------------------------------------- |
-| vector_store | VectaraVectorStore | Vectara vector store instance. |
-| search_results| List[Data] | The results of the similarity search as a list of [Data](/data-types#data) objects. |
+| vectara_customer_id | String | Input parameter. The Vectara customer ID. |
+| vectara_corpus_id | String | Input parameter. The Vectara corpus ID. |
+| vectara_api_key | SecretString | Input parameter. The Vectara API key. |
+| embedding | Embeddings | Input parameter. The embedding function to use (optional). |
+| ingest_data | List[Document/Data] | Input parameter. The data to be ingested into the vector store. |
+| search_query | String | Input parameter. The query for similarity search. |
+| number_of_results | Integer | Input parameter. The number of results to return in search. |
-## Vectara RAG
+### Vectara RAG
-This component enabled Vectara's full end-to-end RAG capabilities with reranking options.
-For more information, see the [Vectara documentation](https://docs.vectara.com/docs/).
+This component enables Vectara's full end-to-end RAG capabilities with reranking options.
+
+This component uses a `Vectara` vector store to execute the vector search and reranking functions, and then outputs an **Answer** string in [`Message`](/data-types#message) format.
## Weaviate
-This component facilitates a Weaviate vector store setup, optimizing text and document indexing and retrieval.
-For more information, see the [Weaviate Documentation](https://weaviate.io/developers/weaviate).
+The **Weaviate** component reads and writes to Weaviate vector stores using an instance of `Weaviate` vector store.
+
+For more information, see the following:
+
+* [Hidden parameters](#hidden-parameters)
+* [Search results output](#search-results-output)
+* [Vector store instances](#vector-store-instances)
+* [Weaviate Documentation](https://weaviate.io/developers/weaviate)
-Parameters
-
-**Inputs**
+Weaviate parameters
| Name | Type | Description |
|---------------|--------------|-------------------------------------------|
-| weaviate_url | String | The default instance URL. |
-| search_by_text| Boolean | Indicates whether to search by text. |
-| api_key | SecretString | The optional API key for authentication. |
-| index_name | String | The optional index name. |
-| text_key | String | The default text extraction key. |
-| input | Document | The document or record. |
-| embedding | Embeddings | The embedding model used. |
-| attributes | List[String] | Optional additional attributes. |
-
-**Outputs**
-
-| Name | Type | Description |
-|--------------|------------------|-------------------------------|
-| vector_store | WeaviateVectorStore | The Weaviate vector store instance. |
+| weaviate_url | String | Input parameter. The default instance URL. |
+| search_by_text| Boolean | Input parameter. Indicates whether to search by text. |
+| api_key | SecretString | Input parameter. The optional API key for authentication. |
+| index_name | String | Input parameter. The optional index name. |
+| text_key | String | Input parameter. The default text extraction key. |
+| input | Document | Input parameter. The document or record. |
+| embedding | Embeddings | Input parameter. The embedding model used. |
+| attributes | List[String] | Input parameter. Optional additional attributes. |
\ No newline at end of file
diff --git a/docs/docs/Concepts/concepts-file-management.mdx b/docs/docs/Concepts/concepts-file-management.mdx
index 584c1f3c1..ca304408f 100644
--- a/docs/docs/Concepts/concepts-file-management.mdx
+++ b/docs/docs/Concepts/concepts-file-management.mdx
@@ -54,7 +54,34 @@ For more information about the **File** component and other data loading compone
### Load files at runtime
You can use preloaded files in your flows, and you can load files at runtime, if your flow accepts file input.
-For an example, see [Create a chatbot that can ingest files](/chat-with-files).
+To enable file input in your flow, do the following:
+1. Add a [**File** component](/components-data#file) to your flow.
+2. Click **Share**, select **API access**, and then click **Input Schema** to add [`tweaks`](/concepts-publish#input-schema) to the request payload in the flow's automatically generated code snippets.
+3. Expand the **File** section, find the **Files** row, and then enable **Expose Input** to allow the parameter to be set at runtime through the Langflow API.
+4. Close the **Input Schema** pane to return to the **API access** pane.
+The payload in each code snippet now includes `tweaks`, your **File** component's ID, and the `path` key that you enabled in **Input Schema**:
+
+ ```json
+ "tweaks": {
+ "File-qYD5w": {
+ "path": []
+ }
+ }
+ ```
+
+5. When you run this flow programmatically, your script must upload a file to Langflow file management, and then pass the returned `file_path` to the `path` tweak in the `/run` request:
+
+ ```json
+ "tweaks": {
+ "FILE_COMPONENT_ID": {
+ "path": [ "file_path" ]
+ }
+ }
+ ```
+
+ For a complete example see [Create a chatbot that can ingest files](/chat-with-files) and [Files endpoints](/api-files).
+
+ If you want to upload multiple files, you can pass multiple `file_path` values in the `path` array, such as `[ "path1", "path2" ]`.
## Upload images
diff --git a/docs/docs/Concepts/concepts-playground.mdx b/docs/docs/Concepts/concepts-playground.mdx
index 69dbfd5c0..b69f8cae8 100644
--- a/docs/docs/Concepts/concepts-playground.mdx
+++ b/docs/docs/Concepts/concepts-playground.mdx
@@ -29,14 +29,7 @@ For flows that require another type of input, such as a webhook event, file uplo

-
-Playground mechanics
-
-When you run a flow in the **Playground**, Langflow calls the `/build/$FLOW_ID/flow` endpoint in [chat.py](https://github.com/langflow-ai/langflow/blob/main/src/backend/base/langflow/api/v1/chat.py#L143). This call retrieves the flow data, builds a graph, and executes the graph. As each component (or node) is executed, the `build_vertex` function calls `build_and_run`, which may call the individual components' `def_build` method, if it exists. If a component doesn't have a `def_build` function, the build still returns a component.
-
-The `build` function allows components to execute logic at runtime. For example, the [**Recursive Character Text Splitter** component](https://github.com/langflow-ai/langflow/blob/main/src/backend/base/langflow/components/langchain_utilities/recursive_character.py) is a child of the `LCTextSplitterComponent` class. When text needs to be processed, the parent class's `build` method is called, which creates a `RecursiveCharacterTextSplitter` object and uses it to split the text according to the defined parameters. The split text is then passed on to the next component. This all occurs when the component is built.
-
-
+For technical details about how the **Playground** works, see [Monitor endpoints](/api-monitor).
### Review agent logic
diff --git a/docs/docs/Concepts/data-types.mdx b/docs/docs/Concepts/data-types.mdx
index 430d2057a..9bc346290 100644
--- a/docs/docs/Concepts/data-types.mdx
+++ b/docs/docs/Concepts/data-types.mdx
@@ -39,39 +39,33 @@ The schema is defined in [`data.py`](https://github.com/langflow-ai/langflow/blo
The following attributes are available:
-- `data`: A dictionary that stores key-value pairs.
+- `data`: A `Data` object stores key-value pairs within the `.data` attribute. This is the `Data` object's core dictionary. Each key is a field name, and the values can be any supported data type.
- `text_key`: The key in `data` that is considered the primary text value.
- `default_value`: Fallback if `text_key` is missing. The default `text_key` is `"text"`.
-### Data structure
-
-A `Data` object stores key-value pairs within the `.data` attribute, where each key is a field name and its value can be any supported data type. `text_key` tells Langflow which key in the data dictionary is the primary text value for that object.
-
```python
data_obj = Data(
- text_key="text", # Field 1
- data={ # Field 2 (the actual dict)
+ text_key="text",
+ data={
"text": "Hello world",
"name": "Charlie",
"age": 28
},
- default_value="" # Field 3
+ default_value=""
)
```
`Data` objects can be serialized to JSON, created from JSON, or created from other dictionary data.
However, the resulting `Data` object is a structured object with validation and methods, not a plain dictionary.
-
-For example, when serialized into JSON, the previous example becomes the following JSON object:
+For example, when serialized into JSON, the previous Python example becomes the following JSON object:
```json
{
"text_key": "text",
"data": {
- "text": "User Profile",
- "name": "Charlie Lastname",
- "age": 28,
- "email": "charlie.lastname@example.com"
+ "text": "Hello world",
+ "name": "Charlie",
+ "age": 28
},
"default_value": ""
}
@@ -263,7 +257,7 @@ Hover over the port to see the accepted or produced data types.
In Langflow, you can use **Inspect output** to view the output of individual components.
This can help you learn about the different data type and debug problems with invalid or malformed inputs and output.
-The following example shows how to inspect the output of a **Type Convert** component, which can convert `Message`, `Data`, or `DataFrame` input into `Message`, `Data`, or `DataFrame` output:
+The following example shows how to inspect the output of a [**Type Convert** component](/components-processing#type-convert), which can convert data from one type to another:
1. Create a flow, and then connect a **Chat Input** component to a **Type Convert** component.
@@ -344,6 +338,7 @@ The following example shows how to inspect the output of a **Type Convert** comp
## See also
+- [**Processing** components](/components-processing)
- [Custom components](/components-custom-components)
- [Pydantic Models](https://docs.pydantic.dev/latest/api/base_model/)
- [pandas.DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html)
\ No newline at end of file
diff --git a/docs/docs/Configuration/api-keys-and-authentication.mdx b/docs/docs/Configuration/api-keys-and-authentication.mdx
index 5553ae711..ca6c63020 100644
--- a/docs/docs/Configuration/api-keys-and-authentication.mdx
+++ b/docs/docs/Configuration/api-keys-and-authentication.mdx
@@ -317,7 +317,7 @@ Additionally, you must sign in as a superuser to manage users and [create a Lang
uv run langflow run --env-file .env
```
- Starting Langflow with an `.env` file automatically authenticates you as the superuser set in `LANGFLOW_SUPERUSER` and `LANGFLOW_SUPERUSER_PASSWORD`.
+ Starting Langflow with a `.env` file automatically authenticates you as the superuser set in `LANGFLOW_SUPERUSER` and `LANGFLOW_SUPERUSER_PASSWORD`.
If you don't explicitly set these variables, the default values are `langflow` and `langflow` for system auto-login.
6. Verify the server is running. The default location is `http://localhost:7860`.
diff --git a/docs/docs/Configuration/configuration-cli.mdx b/docs/docs/Configuration/configuration-cli.mdx
index a751e2f4b..211429003 100644
--- a/docs/docs/Configuration/configuration-cli.mdx
+++ b/docs/docs/Configuration/configuration-cli.mdx
@@ -230,7 +230,7 @@ Use this mode to previews the changes that would be made to the database schema
-### langflow run
+### langflow run {#langflow-run}
Starts the Langflow server.
diff --git a/docs/docs/Configuration/environment-variables.mdx b/docs/docs/Configuration/environment-variables.mdx
index d4bf13414..c8b71c5ba 100644
--- a/docs/docs/Configuration/environment-variables.mdx
+++ b/docs/docs/Configuration/environment-variables.mdx
@@ -16,12 +16,16 @@ You can set Langflow environment variables in your terminal, in `.env`, and with
If an environment variable is set in multiple places, the following hierarchy applies:
-1. Langflow CLI options override `.env` and terminal variables.
-2. `.env` overrides terminal variables.
-3. Terminal variables are used only if the variable isn't set in `.env` or Langflow CLI options.
+1. Langflow CLI options override all other sources.
+2. The `.env` file overrides system environment variables.
+3. System environment variables are used only if not set elsewhere.
+
+ When running a Langflow Docker image, the `-e` flag sets system environment variables.
+
+For example:
+- If you set `LANGFLOW_PORT=8080` in your system environment and `LANGFLOW_PORT=7860` in `.env`, Langflow uses `7860` from `.env`.
+- If you run `langflow run --port 9000` with `LANGFLOW_PORT=7860` in `.env`, Langflow uses `9000` from the CLI option.
-For example, if you set `LANGFLOW_PORT` in `.env` and your terminal, then Langflow uses the value from `.env`.
-Similarly, if you run a Langflow CLI command with `--port`, Langflow uses that port number instead of the `LANGFLOW_PORT` in `.env`.
## Configure environment variables
@@ -151,7 +155,7 @@ The following table lists the environment variables supported by Langflow.
| `LANGFLOW_AUTO_SAVING_INTERVAL` | Integer | `1000` | Set the interval for flow auto-saving in milliseconds. |
| `LANGFLOW_BACKEND_ONLY` | Boolean | False | Run only the Langflow backend service (no frontend). |
| `LANGFLOW_BUNDLE_URLS` | List[String] | `[]` | A list of URLs from which to load component bundles and flows. Supports GitHub URLs. If LANGFLOW_AUTO_LOGIN is enabled, flows from these bundles are loaded into the database. |
-| `LANGFLOW_CACHE_TYPE` | String | `async` | Set the cache type for Langflow. Possible values: `async`, `redis`, `memory`, `disk`. If you set the type to `redis`, then you must also set the following environment variables: `LANGFLOW_REDIS_HOST`, `LANGFLOW_REDIS_PORT`, `LANGFLOW_REDIS_DB`, and `LANGFLOW_REDIS_CACHE_EXPIRE`. |
+| `LANGFLOW_CACHE_TYPE` | String | `async` | Set the cache type for Langflow. Possible values: `async`, `redis`, `memory`, `disk`. If you set the type to `redis`, then you must also set the following environment variables: `LANGFLOW_REDIS_HOST`, `LANGFLOW_REDIS_PORT`, `LANGFLOW_REDIS_DB`, and `LANGFLOW_REDIS_CACHE_EXPIRE`. See also [`langflow run`](/configuration-cli#langflow-run). |
| `LANGFLOW_COMPONENTS_PATH` | String | Not set | Path to the directory containing custom components. |
| `LANGFLOW_CONFIG_DIR` | String | Varies | Set the Langflow configuration directory where files, logs, and the Langflow database are stored. Default path depends on your installation. See [Flow storage and logs](/concepts-flows#flow-storage-and-logs). |
| `LANGFLOW_DATABASE_URL` | String | Not set | Set the database URL for Langflow. If not provided, Langflow uses a SQLite database. |
@@ -163,14 +167,14 @@ The following table lists the environment variables supported by Langflow.
| `LANGFLOW_DISABLE_TRACK_APIKEY_USAGE` | Boolean | False | Whether to track API key usage. If true, disables tracking of API key usage (`total_uses` and `last_used_at`) to avoid database contention under high concurrency. |
| `LANGFLOW_ENABLE_SUPERUSER_CLI` | Boolean | True | Allow creation of superusers with the Langflow CLI command [`langflow superuser`](./configuration-cli.mdx#langflow-superuser). Recommended to be disabled (false) in production for security reasons. |
| `LANGFLOW_FALLBACK_TO_ENV_VAR` | Boolean | True | If enabled, [global variables](/configuration-global-variables) set in your Langflow **Settings** can use an environment variable with the same name if Langflow can't retrieve the variable value from the global variables. |
-| `LANGFLOW_FRONTEND_PATH` | String | `./frontend` | Path to the frontend directory containing build files. This is for development purposes only. See [`--frontend-path`](./configuration-cli.mdx#run-frontend-path). |
-| `LANGFLOW_HEALTH_CHECK_MAX_RETRIES` | Integer | `5` | Set the maximum number of retries for the health check. See [`--health-check-max-retries`](./configuration-cli.mdx#run-health-check-max-retries). |
-| `LANGFLOW_HOST` | String | `localhost` | The host on which the Langflow server will run. See [`--host`](./configuration-cli.mdx#run-host). |
-| `LANGFLOW_LANGCHAIN_CACHE` | String | `InMemoryCache` | Type of cache to use. Possible values: `InMemoryCache`, `SQLiteCache`. See [`--cache`](./configuration-cli.mdx#run-cache). |
+| `LANGFLOW_FRONTEND_PATH` | String | `./frontend` | Path to the frontend directory containing build files. This is for development purposes only. See [`langflow run`](/configuration-cli#langflow-run). |
+| `LANGFLOW_HEALTH_CHECK_MAX_RETRIES` | Integer | `5` | Set the maximum number of retries for the health check. See [`langflow run`](/configuration-cli#langflow-run). |
+| `LANGFLOW_HOST` | String | `localhost` | The host on which the Langflow server will run. See [`langflow run`](/configuration-cli#langflow-run). |
+| `LANGFLOW_LANGCHAIN_CACHE` | String | `InMemoryCache` | Type of cache storage to use, separate from `LANGFLOW_CACHE_TYPE`. Possible values: `InMemoryCache`, `SQLiteCache`. |
| `LANGFLOW_LOG_LEVEL` | String | `INFO` | Set the logging level for Langflow. Possible values: `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL`. |
| `LANGFLOW_LOG_FILE` | String | Not set | Path to the log file. If this option isn't set, logs are written to stdout. |
| `LANGFLOW_LOG_RETRIEVER_BUFFER_SIZE` | Integer | `10000` | Set the buffer size for log retrieval. Only used if `LANGFLOW_ENABLE_LOG_RETRIEVAL` is enabled. |
-| `LANGFLOW_MAX_FILE_SIZE_UPLOAD` | Integer | `100` | Set the maximum file size for the upload in megabytes. See [`--max-file-size-upload`](./configuration-cli.mdx#run-max-file-size-upload). |
+| `LANGFLOW_MAX_FILE_SIZE_UPLOAD` | Integer | `100` | Set the maximum file size for the upload in megabytes. See [`langflow run`](/configuration-cli#langflow-run). |
| `LANGFLOW_MAX_ITEMS_LENGTH` | Integer | `100` | Maximum number of items to store and display in the visual editor. Lists longer than this will be truncated when displayed in the visual editor. Doesn't affect data passed between components nor outputs. |
| `LANGFLOW_MAX_TEXT_LENGTH` | Integer | `1000` | Maximum number of characters to store and display in the visual editor. Responses longer than this will be truncated when displayed in the visual editor. Doesn't truncate responses between components nor outputs. |
| `LANGFLOW_MCP_SERVER_ENABLED` | Boolean | True | If this option is set to False, Langflow doesn't enable the MCP server. |
diff --git a/docs/docs/Develop/logging.mdx b/docs/docs/Develop/logging.mdx
index e69956fdc..f73ec3008 100644
--- a/docs/docs/Develop/logging.mdx
+++ b/docs/docs/Develop/logging.mdx
@@ -4,6 +4,8 @@ slug: /logging
---
import Icon from "@site/src/components/icon";
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
This page provides information about Langflow logs, including logs for individual flows and the Langflow application itself.
@@ -94,6 +96,45 @@ When debugging issues with the format or content of a flow's output, it can help
To view the output produced by a single component during the most recent run, click **Inspect output** on the component in the visual editor.
+## Access Langflow Desktop logs {#desktop-logs}
+
+If you encounter issues with Langflow Desktop, you might need to access startup logs for debugging.
+Follow the steps for your operating system.
+
+
+
+
+1. Open Terminal and run:
+ ```bash
+ cd ~/Library/Logs/com.Langflow
+ ```
+
+2. To open the folder and view the log files, run the command:
+ ```bash
+ open .
+ ```
+
+3. Locate the `langflow.log` file.
+
+
+
+1. Open the Command Prompt (CMD), and then run the following command:
+ ```cmd
+ cd %LOCALAPPDATA%\com.langflow\logs
+ ```
+
+2. To open the folder and view the log files, run the command:
+ ```cmd
+ start .
+ ```
+
+3. Locate the `langflow.log` file.
+
+
+
+You can use the log file to investigate the issue on your own, add context to a [GitHub Issue](/contributing-github-issues), or send it to [support](/luna-for-langflow) for debugging assistance.
+
+The log file is only created when Langflow Desktop runs. If you don't see a log file, try starting Langflow Desktop first, then check for the log file.
## See also
* [Memory management options](/memory)
diff --git a/docs/docs/Get-Started/get-started-installation.mdx b/docs/docs/Get-Started/get-started-installation.mdx
index 917a4ed13..198d5387a 100644
--- a/docs/docs/Get-Started/get-started-installation.mdx
+++ b/docs/docs/Get-Started/get-started-installation.mdx
@@ -26,6 +26,8 @@ However, some features aren't available for Langflow Desktop, such as the **Shar
+Langflow Desktop requires macOS 13 or later.
+
1. Navigate to [Langflow Desktop](https://www.langflow.org/desktop).
2. Click **Download Langflow**, enter your contact information, and then click **Download**.
3. Mount and install the Langflow application.
diff --git a/docs/docs/Integrations/Nvidia/integrations-nvidia-ingest.mdx b/docs/docs/Integrations/Nvidia/integrations-nvidia-ingest.mdx
index 98d025a2e..1db9d0a7e 100644
--- a/docs/docs/Integrations/Nvidia/integrations-nvidia-ingest.mdx
+++ b/docs/docs/Integrations/Nvidia/integrations-nvidia-ingest.mdx
@@ -78,7 +78,7 @@ For more information, see the [NV-Ingest documentation](https://nvidia.github.io
| extract_infographics | Extract Infographics | Extract infographics from document. Default: false. |
| text_depth | Text Depth | The level at which text is extracted. Options: 'document', 'page', 'block', 'line', 'span'. Default: `page`. |
| split_text | Split Text | Split text into smaller chunks. Default: true. |
-| chunk_size | Chunk Size | The number of tokens per chunk. Default: `500`. |
+| chunk_size | Chunk Size | The number of tokens per chunk. Default: `500`. Make sure the chunk size is compatible with your embedding model. For more information, see [Tokenization errors due to chunk size](/components-processing#chunk-size). |
| chunk_overlap | Chunk Overlap | Number of tokens to overlap from previous chunk. Default: `150`. |
| filter_images | Filter Images | Filter images (see advanced options for filtering criteria). Default: false. |
| min_image_size | Minimum Image Size Filter | Minimum image width/length in pixels. Default: `128`. |
diff --git a/docs/docs/Support/luna-for-langflow.mdx b/docs/docs/Support/luna-for-langflow.mdx
index 93c4c6ba0..0b7274e2f 100644
--- a/docs/docs/Support/luna-for-langflow.mdx
+++ b/docs/docs/Support/luna-for-langflow.mdx
@@ -1,20 +1,14 @@
---
-title: Enterprise support for Langflow
+title: IBM Elite Support for Langflow
slug: /luna-for-langflow
---
-With **Luna for Langflow** support, you can develop and deploy Langflow applications with confidence.
+IBM Elite Support offers enterprise support for Langflow to help you develop and deploy Langflow applications with confidence.
-Luna is a subscription to the Langflow expertise at DataStax. It's meant for Langflow users who want all the benefits of running their own open-source deployments, as well as the peace of mind that comes with having direct access to the team that has authored the majority of the Langflow code.
+IBM Elite Support for Langflow is a subscription to the Langflow expertise at IBM.
+It's meant for Langflow users who want all the benefits of running their own open-source deployments, as well as the peace of mind that comes with having direct access to the team that has authored the majority of the Langflow code.
-Luna subscribers can get help with general-purpose and technical questions for their open-source Langflow deployments.
-If an issue is encountered, DataStax is there to help.
+IBM Elite Support subscribers can get help with general-purpose and technical questions for their open-source Langflow deployments.
+If an issue is encountered, IBM is there to help.
-:::info
-As of May 2025, Luna for Langflow support covers Langflow versions 1.4.x.
-
-Subscribers must run a supported Python version to receive support.
-Supported versions are `>=3.10, <3.14`, which includes all version from 3.10 through 3.13.x, but not 3.14.
-:::
-
-To subscribe or learn more, see [Luna for Langflow](https://www.datastax.com/products/luna-langflow).
\ No newline at end of file
+To subscribe or learn more, see [IBM Elite Support for Langflow](https://ibm.com/docs/esfl?topic=elite-support-langflow-specification).
\ No newline at end of file
diff --git a/docs/docs/Support/release-notes.mdx b/docs/docs/Support/release-notes.mdx
index a23ffc08a..f75f7e168 100644
--- a/docs/docs/Support/release-notes.mdx
+++ b/docs/docs/Support/release-notes.mdx
@@ -61,7 +61,7 @@ For all changes, see the [Changelog](https://github.com/langflow-ai/langflow/rel
The [**Language Model** component](/components-models) and [**Embedding Model** component](/components-embedding-models) are now core components for your LLM and embeddings flows. They support multiple models and model providers, and allow you to experiment with different models without swapping out single-provider components.
Find them in the **Components** menu in the **Models** category.
- The single-provider components are still available for your flows in the **Components** menu in the [**Bundles**](/components-bundle-components) section, and you can connect them to the **Language Model** and **Embedding Model** components with the **Custom** provider option.
+ The single-provider components are still available for your flows in the **Components** menu in the [**Bundles**](/components-bundle-components) section, and you can use them to replace the **Language Model** and **Embedding Model** core components, or connect them to the **Agent** component with the **Custom** provider option.
- MCP server one-click installation
@@ -107,7 +107,7 @@ For all changes, see the [Changelog](https://github.com/langflow-ai/langflow/rel
- Enhanced file and flow management system with improved bulk capabilities.
- Added the **BigQuery** component
- Added the **Twelve Labs** bundle
-- Added the **NVIDIA System Assistant** component
+- Added the **NVIDIA G-Assist** component
### Deprecations
diff --git a/docs/docs/Support/troubleshooting.mdx b/docs/docs/Support/troubleshooting.mdx
index cfadf7cf6..86084eb7c 100644
--- a/docs/docs/Support/troubleshooting.mdx
+++ b/docs/docs/Support/troubleshooting.mdx
@@ -124,6 +124,21 @@ There are two possible reasons for this error:
Environment variables set in your terminal aren't automatically available to GUI-based applications like Langflow Desktop when launched through the Finder or the Start Menu.
To set environment variables for Langflow Desktop, see [Set environment variables for Langflow Desktop](/environment-variables#set-environment-variables-for-langflow-desktop).
+### Access Langflow Desktop startup logs
+
+If you encounter issues with Langflow Desktop, you might need to [access Langflow Desktop startup logs](/logging#desktop-logs) for debugging.
+
+### User not found or inactive when running multiple flows
+
+When running multiple local Langflow OSS instances on different ports, such as `localhost:7860` and `localhost:7861`, you might see authentication errors in the logs.
+For example:
+
+```text
+[07/22/25 10:57:07] INFO 2025-07-22 10:57:07 - INFO - utils - User not found or inactive.
+```
+
+To resolve this error, use separate browser instances or browser profiles to access each Langflow instance.
+
### Package is not installed
In Langflow OSS, you can follow the error message's instructions to install the missing dependency.
@@ -146,13 +161,22 @@ The following error can occur during Langflow upgrades when the new version can'
To resolve this error, clear the cache by deleting the contents of your Langflow cache folder.
The filepath depends on your operating system, installation type, and configuration options.
-For more information and default filepaths, see [Memory management options](/memory#flow-storage-and-logs).
+For more information and default filepaths, see [Memory management options](/memory).
:::important
Clearing the cache erases your settings.
If you want to retain your settings files, create a backup of those files before clearing the cache folder.
:::
+### Langflow Desktop says it is running the latest version, but it is actually behind
+
+If you are running Langflow Desktop version 1.4.2 or earlier, the UI might incorrectly report that you are on the latest version when a newer version is available.
+
+This happens because the automatic update feature in the UI was introduced in version 1.4.2.
+Earlier versions can't automatically detect or apply updates.
+
+To resolve this issue, uninstall Langflow Desktop, and then [download and install the latest version of Langflow Desktop](https://langflow.org/desktop).
+
## Langflow uninstall issues
The following issues can occur when uninstalling Langflow.
@@ -174,12 +198,41 @@ To fully remove a Langflow Desktop macOS installation, you must also delete `~/.
- [Use MCP Inspector to test and debug flows](/mcp-server#test-and-debug-flows)
- [Troubleshooting MCP server](/mcp-server#troubleshooting-mcp-server)
+## Token length limit errors in Embedding Model components
+
+Token length errors can happen if your chunking strategy doesn't align with your embedding model's tokenization limits.
+For more information, see [Tokenization errors due to chunk size](/components-processing#chunk-size).
+
## Custom components and integrations issues
For troubleshooting advice for a third-party integration, see the information about that integration in the Langflow documentation and the provider's documentation.
If you are building a custom component, see [Error handling and logging for custom Python components](/components-custom-components#error-handling-and-logging).
+### Custom components not appearing in the visual editor
+
+If your custom components are not appearing in the Langflow visual editor, try the following troubleshooting steps:
+
+1. Ensure your components follow the [required directory structure](https://docs.langflow.org/components-custom-components#directory-structure-requirements).
+ ```
+ /your/custom/components/path/ # Base directory set by LANGFLOW_COMPONENTS_PATH
+ └── category_name/ # Required category subfolder that determines menu name
+ ├── __init__.py # Required
+ └── custom_component.py # Component file
+ ```
+
+2. Verify each category directory includes an `__init__.py` file.
+This is required for Python to recognize the directory as a module.
+
+3. Use the command line argument instead of the environment variable for `LANGFLOW_COMPONENTS_PATH`.
+If you're using the `LANGFLOW_COMPONENTS_PATH` environment variable and components aren't loading, try the `--components-path` command line argument instead:
+
+ ```bash
+ uv run langflow run --components-path /path/to/your/custom/components
+ ```
+
+If you continue to experience issues, please [report them on GitHub](https://github.com/langflow-ai/langflow/issues) with details about your directory structure and component setup.
+
## See also
- [Langflow GitHub Issues and Discussions](/contributing-github-issues)
diff --git a/docs/docs/Tutorials/chat-with-files.mdx b/docs/docs/Tutorials/chat-with-files.mdx
index b9bb2cccb..3400446f0 100644
--- a/docs/docs/Tutorials/chat-with-files.mdx
+++ b/docs/docs/Tutorials/chat-with-files.mdx
@@ -31,8 +31,10 @@ The following steps modify the **Basic Prompting** template to accept file input
2. In the **Language Model** component, enter your OpenAI API key.
If you want to use a different provider or model, edit the **Model Provider**, **Model Name**, and **API Key** fields accordingly.
+
3. To verify that your API key is valid, click **Playground**, and then ask the LLM a question.
The LLM should respond according to the specifications in the **Prompt Template** component's **Template** field.
+
4. Exit the **Playground**, and then modify the **Prompt Template** component to accept file input in addition to chat input.
To do this, edit the **Template** field, and then replace the default prompt with the following text:
diff --git a/docs/docusaurus.config.js b/docs/docusaurus.config.js
index c80d673b3..fc19cf33f 100644
--- a/docs/docusaurus.config.js
+++ b/docs/docusaurus.config.js
@@ -400,13 +400,6 @@ const config = {
},
},
footer: {
- logo: {
- alt: "Langflow",
- src: "img/lf-docs-light.svg",
- srcDark: "img/lf-docs-dark.svg",
- width: 160,
- height: 40,
- },
links: [
{
title: null,
diff --git a/docs/openapi.json b/docs/openapi.json
index 86d67bd9f..cdb16b6cf 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -2,7 +2,7 @@
"openapi": "3.1.0",
"info": {
"title": "Langflow",
- "version": "1.5.0.post1"
+ "version": "1.5.0.post2"
},
"paths": {
"/api/v1/build/{flow_id}/vertices": {
diff --git a/docs/sidebars.js b/docs/sidebars.js
index 689b4032b..1b9ffa880 100644
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -471,7 +471,7 @@ module.exports = {
{
type: "doc",
id: "Support/luna-for-langflow",
- label: "Enterprise support",
+ label: "IBM Elite Support for Langflow",
},
],
},
diff --git a/docs/src/theme/Footer.js b/docs/src/theme/Footer.js
index aed2e940b..2a8b4a5e9 100644
--- a/docs/src/theme/Footer.js
+++ b/docs/src/theme/Footer.js
@@ -24,8 +24,8 @@ export default function FooterWrapper(props) {
onMouseLeave={() => setIsHovered(false)}
style={{
position: 'fixed',
- right: '20px',
- bottom: '20px',
+ right: '21px',
+ bottom: '21px',
zIndex: 100,
display: 'flex',
alignItems: 'center',
diff --git a/docs/static/img/conditional-looping.png b/docs/static/img/conditional-looping.png
new file mode 100644
index 000000000..3f3efadab
Binary files /dev/null and b/docs/static/img/conditional-looping.png differ
diff --git a/pyproject.toml b/pyproject.toml
index 57e0f45d6..52460dfff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "langflow"
-version = "1.5.0.post1"
+version = "1.6.0"
description = "A Python package with a built-in web application"
requires-python = ">=3.10,<3.14"
license = "MIT"
@@ -31,7 +31,7 @@ dependencies = [
"faiss-cpu==1.9.0.post1",
"types-cachetools==5.5.0.20240820",
"pymongo==4.10.1",
- "supabase==2.6.0",
+ "supabase>=2.6.0,<3.0.0",
"certifi>=2023.11.17,<2025.0.0",
"certifi==2024.8.30",
'fastavro==1.9.7; python_version < "3.13"',
@@ -52,7 +52,7 @@ dependencies = [
"dspy-ai==2.5.41",
"datasets>2.14.7",
"assemblyai==0.35.1",
- "litellm==1.60.2",
+ "litellm>=1.60.2,<2.0.0",
"chromadb==0.5.23",
"zep-python==2.0.2",
"youtube-transcript-api==0.6.3",
@@ -61,13 +61,13 @@ dependencies = [
"GitPython==3.1.43",
"kubernetes==31.0.0",
"json_repair==0.30.3",
- "langwatch==0.1.16",
+ "langwatch>=0.2.11,<0.3.0",
"langsmith>=0.3.42,<1.0.0",
"yfinance==0.2.50",
"wolframalpha==5.1.3",
- "astra-assistants[tools]~=2.2.12",
- "composio-langchain==0.7.15",
- "composio-core==0.7.15",
+ "astra-assistants[tools]>=2.2.13,<3.0.0",
+ "composio==0.8.5",
+ "composio-langchain==0.8.5",
"spider-client==0.1.24",
"nltk==3.9.1",
"lark==1.2.2",
@@ -77,10 +77,11 @@ dependencies = [
"opensearch-py==2.8.0",
"langchain-google-genai==2.0.6",
"langchain-cohere==0.3.3",
+ "langchain-huggingface==0.3.1",
"langchain-anthropic==0.3.14",
"langchain-astradb~=0.6.0",
"langchain-openai>=0.2.12",
- "langchain-google-vertexai==2.0.7",
+ "langchain-google-vertexai>=2.0.7,<3.0.0",
"langchain-groq==0.2.1",
"langchain-pinecone>=0.2.8",
"langchain-mistralai==0.2.3",
@@ -112,8 +113,6 @@ dependencies = [
"pydantic-ai>=0.0.19",
"smolagents>=1.8.0",
"apify-client>=1.8.1",
- "pylint>=3.3.4",
- "ruff>=0.9.7",
"langchain-graph-retriever==0.6.1",
"graph-retriever==0.6.1",
"ibm-watsonx-ai>=1.3.1",
@@ -126,6 +125,10 @@ dependencies = [
"docling_core>=2.36.1",
"filelock>=3.18.0",
"jigsawstack==0.2.7",
+ "structlog>=25.4.0",
+ "aiosqlite==0.21.0",
+ "fastparquet>=2024.11.0",
+ "traceloop-sdk>=0.43.1",
]
[dependency-groups]
@@ -134,8 +137,8 @@ dev = [
"types-redis>=4.6.0.5",
"ipykernel>=6.29.0",
"mypy>=1.11.0",
- "ruff>=0.9.7,<0.10",
- "httpx>=0.27.0",
+ "ruff>=0.12.7",
+ "httpx>=0.28.1",
"pytest>=8.2.0",
"types-requests>=2.32.0",
"requests>=2.32.0",
@@ -176,6 +179,7 @@ dev = [
"pytest-timeout>=2.3.1",
"pyyaml>=6.0.2",
"pyleak>=0.1.14",
+ "docling>=2.36.1"
]
[tool.uv.sources]
@@ -195,6 +199,9 @@ Documentation = "https://docs.langflow.org"
[project.optional-dependencies]
docling = [
"docling>=2.36.1",
+ "tesserocr>=2.8.0",
+ "rapidocr-onnxruntime>=1.4.4",
+ "ocrmac>=1.0.0; sys_platform == 'darwin'",
]
audio = [
@@ -217,8 +224,12 @@ clickhouse-connect = [
]
nv-ingest = [
- "nv-ingest-api==2025.4.22.dev20250422",
- "nv-ingest-client==2025.4.22.dev20250422",
+ # NOTE: These must be removed in order to run `uv lock --upgrade` or `uv sync --upgrade`
+ # due to incompatibility with <3.12 and how uv handles lockfile creation.
+ # If upgrading, ensure `uv lock` and `uv sync` are run after upgrade with these,
+ # and that afterward, the lockfile contains nv-ingest.
+ "nv-ingest-api==25.6.2,<26.0.0 ; python_version >= '3.12'",
+ "nv-ingest-client==25.6.3,<26.0.0 ; python_version >= '3.12'",
]
postgresql = [
@@ -254,6 +265,7 @@ log_cli_date_format = "%Y-%m-%d %H:%M:%S"
markers = ["async_test", "api_key_required", "no_blockbuster", "benchmark"]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
+addopts = "-p no:benchmark"
[tool.coverage.run]
command_line = """
@@ -296,7 +308,9 @@ ignore = [
"TD002", # Missing author in TODO
"TD003", # Missing issue link in TODO
"TRY301", # A bit too harsh (Abstract `raise` to an inner function)
-
+ "PLC0415", # Inline imports
+ "D10", # Missing docstrings
+ "PLW1641", # Object does not implement `__hash__` method (mutable objects shouldn't be hashable)
# Rules that are TODOs
"ANN",
]
@@ -306,6 +320,7 @@ external = ["RUF027"]
[tool.ruff.lint.per-file-ignores]
"scripts/*" = ["D1", "INP", "T201"]
+"src/backend/base/langflow/alembic/versions/*" = ["INP001", "D415", "PGH003"]
"src/backend/tests/*" = [
"D1",
"PLR2004",
diff --git a/scripts/windows/build_and_run.bat b/scripts/windows/build_and_run.bat
index 249322e68..26f79867e 100644
--- a/scripts/windows/build_and_run.bat
+++ b/scripts/windows/build_and_run.bat
@@ -1,14 +1,14 @@
@echo off
echo Starting Langflow build and run process...
-REM Check if .env file exists and set env file parameter
-set "ENV_FILE_PARAM="
+REM Check if .env file exists and set env file flag
+set "USE_ENV_FILE="
REM Get the script directory and resolve project root
for %%I in ("%~dp0..\..") do set "PROJECT_ROOT=%%~fI"
set "ENV_PATH=%PROJECT_ROOT%\.env"
if exist "%ENV_PATH%" (
echo Found .env file at: %ENV_PATH%
- set "ENV_FILE_PARAM=--env-file \"%ENV_PATH%\""
+ set "USE_ENV_FILE=1"
) else (
echo .env file not found at: %ENV_PATH%
echo Langflow will use default configuration
@@ -85,8 +85,8 @@ echo Step 4: Running Langflow...
echo.
echo Attention: Wait until uvicorn is running before opening the browser
echo.
-if defined ENV_FILE_PARAM (
- uv run langflow run %ENV_FILE_PARAM%
+if defined USE_ENV_FILE (
+ uv run --env-file "%ENV_PATH%" langflow run
) else (
uv run langflow run
)
diff --git a/scripts/windows/build_and_run.ps1 b/scripts/windows/build_and_run.ps1
index e0ab1c90c..164592e8f 100644
--- a/scripts/windows/build_and_run.ps1
+++ b/scripts/windows/build_and_run.ps1
@@ -87,7 +87,7 @@ Write-Host "`nStep 4: Running Langflow..." -ForegroundColor Yellow
Write-Host "`nAttention: Wait until uvicorn is running before opening the browser" -ForegroundColor Red
try {
if ($useEnvFile) {
- & uv run langflow run --env-file $envPath
+ & uv run --env-file $envPath langflow run
} else {
& uv run langflow run
}
diff --git a/src/backend/base/langflow/__main__.py b/src/backend/base/langflow/__main__.py
index 470c133af..804c19134 100644
--- a/src/backend/base/langflow/__main__.py
+++ b/src/backend/base/langflow/__main__.py
@@ -15,7 +15,9 @@ import click
import httpx
import typer
from dotenv import load_dotenv
+from fastapi import HTTPException
from httpx import HTTPError
+from jose import JWTError
from multiprocess import cpu_count
from multiprocess.context import Process
from packaging import version as pkg_version
@@ -29,9 +31,9 @@ from langflow.cli.progress import create_langflow_progress
from langflow.initial_setup.setup import get_or_create_default_folder
from langflow.logging.logger import configure, logger
from langflow.main import setup_app
-from langflow.services.database.utils import session_getter
+from langflow.services.auth.utils import check_key, get_current_user_by_jwt
from langflow.services.deps import get_db_service, get_settings_service, session_scope
-from langflow.services.settings.constants import DEFAULT_SUPERUSER
+from langflow.services.settings.constants import DEFAULT_SUPERUSER, DEFAULT_SUPERUSER_PASSWORD
from langflow.services.utils import initialize_services
from langflow.utils.version import fetch_latest_version, get_version_info
from langflow.utils.version import is_pre_release as langflow_is_pre_release
@@ -160,7 +162,7 @@ def wait_for_server_ready(host, port, protocol) -> None:
except HTTPError:
time.sleep(1)
except Exception: # noqa: BLE001
- logger.opt(exception=True).debug("Error while waiting for the server to become ready.")
+ logger.debug("Error while waiting for the server to become ready.", exc_info=True)
time.sleep(1)
@@ -632,41 +634,138 @@ def print_banner(host: str, port: int, protocol: str) -> None:
@app.command()
def superuser(
- username: str = typer.Option(..., prompt=True, help="Username for the superuser."),
- password: str = typer.Option(..., prompt=True, hide_input=True, help="Password for the superuser."),
+ username: str = typer.Option(
+ None, help="Username for the superuser. Defaults to 'langflow' when AUTO_LOGIN is enabled."
+ ),
+ password: str = typer.Option(
+ None, help="Password for the superuser. Defaults to 'langflow' when AUTO_LOGIN is enabled."
+ ),
log_level: str = typer.Option("error", help="Logging level.", envvar="LANGFLOW_LOG_LEVEL"),
+ auth_token: str = typer.Option(
+ None, help="Authentication token of existing superuser.", envvar="LANGFLOW_SUPERUSER_TOKEN"
+ ),
) -> None:
- """Create a superuser."""
+ """Create a superuser.
+
+ When AUTO_LOGIN is enabled, uses default credentials.
+ In production mode, requires authentication.
+ """
configure(log_level=log_level)
- db_service = get_db_service()
- async def _create_superuser():
- await initialize_services()
- async with session_getter(db_service) as session:
- from langflow.services.auth.utils import create_super_user
+ asyncio.run(_create_superuser(username, password, auth_token))
- if await create_super_user(db=session, username=username, password=password):
- # Verify that the superuser was created
- from langflow.services.database.models.user.model import User
- stmt = select(User).where(User.username == username)
- user: User = (await session.exec(stmt)).first()
- if user is None or not user.is_superuser:
- typer.echo("Superuser creation failed.")
- return
- # Now create the first folder for the user
- result = await get_or_create_default_folder(session, user.id)
- if result:
- typer.echo("Default folder created successfully.")
- else:
- msg = "Could not create default folder."
- raise RuntimeError(msg)
- typer.echo("Superuser created successfully.")
+async def _create_superuser(username: str, password: str, auth_token: str | None):
+ """Create a superuser."""
+ await initialize_services()
- else:
+ settings_service = get_settings_service()
+ # Check if superuser creation via CLI is enabled
+ if not settings_service.auth_settings.ENABLE_SUPERUSER_CLI:
+ typer.echo("Error: Superuser creation via CLI is disabled.")
+ typer.echo("Set LANGFLOW_ENABLE_SUPERUSER_CLI=true to enable this feature.")
+ raise typer.Exit(1)
+
+ if settings_service.auth_settings.AUTO_LOGIN:
+ # Force default credentials for AUTO_LOGIN mode
+ username = DEFAULT_SUPERUSER
+ password = DEFAULT_SUPERUSER_PASSWORD
+ else:
+ # Production mode - prompt for credentials if not provided
+ if not username:
+ username = typer.prompt("Username")
+ if not password:
+ password = typer.prompt("Password", hide_input=True)
+
+ from langflow.services.database.models.user.crud import get_all_superusers
+
+ existing_superusers = []
+ async with session_scope() as session:
+ # Note that the default superuser is created by the initialize_services() function,
+ # but leaving this check here in case we change that behavior
+ existing_superusers = await get_all_superusers(session)
+ is_first_setup = len(existing_superusers) == 0
+
+ # If AUTO_LOGIN is true, only allow default superuser creation
+ if settings_service.auth_settings.AUTO_LOGIN:
+ if not is_first_setup:
+ typer.echo("Error: Cannot create additional superusers when AUTO_LOGIN is enabled.")
+ typer.echo("AUTO_LOGIN mode is for development with only the default superuser.")
+ typer.echo("To create additional superusers:")
+ typer.echo("1. Set LANGFLOW_AUTO_LOGIN=false")
+ typer.echo("2. Run this command again with --auth-token")
+ raise typer.Exit(1)
+
+ typer.echo(f"AUTO_LOGIN enabled. Creating default superuser '{username}'...")
+ typer.echo(f"Note: Default credentials are {DEFAULT_SUPERUSER}/{DEFAULT_SUPERUSER_PASSWORD}")
+ # AUTO_LOGIN is false - production mode
+ elif is_first_setup:
+ typer.echo("No superusers found. Creating first superuser...")
+ else:
+ # Authentication is required in production mode
+ if not auth_token:
+ typer.echo("Error: Creating a superuser requires authentication.")
+ typer.echo("Please provide --auth-token with a valid superuser API key or JWT token.")
+ typer.echo("To get a token, use: `uv run langflow api_key`")
+ raise typer.Exit(1)
+
+ # Validate the auth token
+ try:
+ auth_user = None
+ async with session_scope() as session:
+ # Try JWT first
+ user = None
+ try:
+ user = await get_current_user_by_jwt(auth_token, session)
+ except (JWTError, HTTPException):
+ # Try API key
+ api_key_result = await check_key(session, auth_token)
+ if api_key_result and hasattr(api_key_result, "is_superuser"):
+ user = api_key_result
+ auth_user = user
+
+ if not auth_user or not auth_user.is_superuser:
+ typer.echo(
+ "Error: Invalid token or insufficient privileges. Only superusers can create other superusers."
+ )
+ raise typer.Exit(1)
+ except typer.Exit:
+ raise # Re-raise typer.Exit without wrapping
+ except Exception as e: # noqa: BLE001
+ typer.echo(f"Error: Authentication failed - {e!s}")
+ raise typer.Exit(1) from None
+
+ # Auth complete, create the superuser
+ async with session_scope() as session:
+ from langflow.services.auth.utils import create_super_user
+
+ if await create_super_user(db=session, username=username, password=password):
+ # Verify that the superuser was created
+ from langflow.services.database.models.user.model import User
+
+ stmt = select(User).where(User.username == username)
+ created_user: User = (await session.exec(stmt)).first()
+ if created_user is None or not created_user.is_superuser:
typer.echo("Superuser creation failed.")
+ return
+ # Now create the first folder for the user
+ result = await get_or_create_default_folder(session, created_user.id)
+ if result:
+ typer.echo("Default folder created successfully.")
+ else:
+ msg = "Could not create default folder."
+ raise RuntimeError(msg)
- asyncio.run(_create_superuser())
+ # Log the superuser creation for audit purposes
+ logger.warning(
+ f"SECURITY AUDIT: New superuser '{username}' created via CLI command"
+ + (" by authenticated user" if auth_token else " (first-time setup)")
+ )
+ typer.echo("Superuser created successfully.")
+
+ else:
+ logger.error(f"SECURITY AUDIT: Failed attempt to create superuser '{username}' via CLI")
+ typer.echo("Superuser creation failed.")
# command to copy the langflow database from the cache to the current directory
@@ -749,6 +848,7 @@ def api_key(
settings_service = get_settings_service()
auth_settings = settings_service.auth_settings
if not auth_settings.AUTO_LOGIN:
+ # TODO: Allow non-auto-login users to create API keys via CLI
typer.echo("Auto login is disabled. API keys cannot be created through the CLI.")
return None
diff --git a/src/backend/base/langflow/alembic/versions/0882f9657f22_encrypt_existing_mcp_auth_settings_.py b/src/backend/base/langflow/alembic/versions/0882f9657f22_encrypt_existing_mcp_auth_settings_.py
new file mode 100644
index 000000000..1c0e09896
--- /dev/null
+++ b/src/backend/base/langflow/alembic/versions/0882f9657f22_encrypt_existing_mcp_auth_settings_.py
@@ -0,0 +1,122 @@
+"""Encrypt existing MCP auth_settings credentials
+
+Revision ID: 0882f9657f22
+Revises: 1cb603706752
+Create Date: 2025-08-21 20:11:26.504681
+
+"""
+import json
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+import sqlmodel
+from sqlalchemy.engine.reflection import Inspector
+from langflow.utils import migration
+
+
+# revision identifiers, used by Alembic.
+revision: str = '0882f9657f22'
+down_revision: Union[str, None] = '1cb603706752'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+ """Encrypt sensitive fields in existing auth_settings data."""
+ conn = op.get_bind()
+
+ # Import encryption utilities
+ try:
+ from langflow.services.auth.mcp_encryption import encrypt_auth_settings
+ from langflow.services.deps import get_settings_service
+
+ # Check if the folder table exists
+ inspector = sa.inspect(conn)
+ if 'folder' not in inspector.get_table_names():
+ return
+
+ # Query all folders with auth_settings
+ result = conn.execute(
+ sa.text("SELECT id, auth_settings FROM folder WHERE auth_settings IS NOT NULL")
+ )
+
+ # Encrypt auth_settings for each folder
+ for row in result:
+ folder_id = row.id
+ auth_settings = row.auth_settings
+
+ if auth_settings:
+ try:
+ # Parse JSON if it's a string
+ if isinstance(auth_settings, str):
+ auth_settings_dict = json.loads(auth_settings)
+ else:
+ auth_settings_dict = auth_settings
+
+ # Encrypt sensitive fields
+ encrypted_settings = encrypt_auth_settings(auth_settings_dict)
+
+ # Update the record with encrypted data
+ if encrypted_settings:
+ conn.execute(
+ sa.text("UPDATE folder SET auth_settings = :auth_settings WHERE id = :id"),
+ {"auth_settings": json.dumps(encrypted_settings), "id": folder_id}
+ )
+ except Exception as e:
+ # Log the error but continue with other records
+ print(f"Warning: Failed to encrypt auth_settings for folder {folder_id}: {e}")
+
+ except ImportError as e:
+ # If encryption utilities are not available, skip the migration
+ print(f"Warning: Encryption utilities not available, skipping encryption migration: {e}")
+
+
+def downgrade() -> None:
+ """Decrypt sensitive fields in auth_settings data (for rollback)."""
+ conn = op.get_bind()
+
+ # Import decryption utilities
+ try:
+ from langflow.services.auth.mcp_encryption import decrypt_auth_settings
+ from langflow.services.deps import get_settings_service
+
+ # Check if the folder table exists
+ inspector = sa.inspect(conn)
+ if 'folder' not in inspector.get_table_names():
+ return
+
+ # Query all folders with auth_settings
+ result = conn.execute(
+ sa.text("SELECT id, auth_settings FROM folder WHERE auth_settings IS NOT NULL")
+ )
+
+ # Decrypt auth_settings for each folder
+ for row in result:
+ folder_id = row.id
+ auth_settings = row.auth_settings
+
+ if auth_settings:
+ try:
+ # Parse JSON if it's a string
+ if isinstance(auth_settings, str):
+ auth_settings_dict = json.loads(auth_settings)
+ else:
+ auth_settings_dict = auth_settings
+
+ # Decrypt sensitive fields
+ decrypted_settings = decrypt_auth_settings(auth_settings_dict)
+
+ # Update the record with decrypted data
+ if decrypted_settings:
+ conn.execute(
+ sa.text("UPDATE folder SET auth_settings = :auth_settings WHERE id = :id"),
+ {"auth_settings": json.dumps(decrypted_settings), "id": folder_id}
+ )
+ except Exception as e:
+ # Log the error but continue with other records
+ print(f"Warning: Failed to decrypt auth_settings for folder {folder_id}: {e}")
+
+ except ImportError as e:
+ # If decryption utilities are not available, skip the migration
+ print(f"Warning: Decryption utilities not available, skipping decryption migration: {e}")
diff --git a/src/backend/base/langflow/alembic/versions/1cb603706752_modify_uniqueness_constraint_on_file_.py b/src/backend/base/langflow/alembic/versions/1cb603706752_modify_uniqueness_constraint_on_file_.py
new file mode 100644
index 000000000..057777e4f
--- /dev/null
+++ b/src/backend/base/langflow/alembic/versions/1cb603706752_modify_uniqueness_constraint_on_file_.py
@@ -0,0 +1,279 @@
+"""Modify uniqueness constraint on file names
+
+Revision ID: 1cb603706752
+Revises: 3162e83e485f
+Create Date: 2025-07-24 07:02:14.896583
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import time
+from typing import Sequence, Union, Iterable, Optional, Set, Tuple
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy import inspect
+
+# revision identifiers, used by Alembic.
+revision: str = "1cb603706752"
+down_revision: Union[str, None] = "3162e83e485f"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+logger = logging.getLogger(__name__)
+
+# Behavior constants
+DUPLICATE_SUFFIX_START = 2 # first suffix to use, e.g., "name_2.ext"
+BATCH_SIZE = 1000 # Process duplicates in batches for large datasets
+
+
+def _get_unique_constraints_by_columns(
+ inspector, table: str, expected_cols: Iterable[str]
+) -> Optional[str]:
+ """Return the name of a unique constraint that matches the exact set of expected columns."""
+ expected = set(expected_cols)
+ for c in inspector.get_unique_constraints(table):
+ cols = set(c.get("column_names") or [])
+ if cols == expected:
+ return c.get("name")
+ return None
+
+
+def _split_base_ext(name: str) -> Tuple[str, str]:
+ """Split a filename into (base, ext) where ext does not include the leading dot; ext may be ''."""
+ if "." in name:
+ base, ext = name.rsplit(".", 1)
+ return base, ext
+ return name, ""
+
+
+def _escape_like(s: str) -> str:
+ # escape backslash first, then SQL LIKE wildcards
+ return s.replace("\\", "\\\\").replace("%", r"\%").replace("_", r"\_")
+
+
+def _like_for_suffixes(base: str, ext: str) -> str:
+ eb = _escape_like(base)
+ if ext:
+ ex = ext.replace("%", r"\%").replace("_", r"\_")
+ return f"{eb}\\_%." + ex # literal underscore
+ else:
+ return f"{eb}\\_%"
+
+
+def _next_available_name(conn, user_id: str, base_name: str) -> str:
+ """
+ Compute the next available non-conflicting name for a given user.
+ Handles names with or without extensions and existing _N suffixes.
+ """
+ base, ext = _split_base_ext(base_name)
+
+ # Load all sibling names once
+ rows = conn.execute(
+ sa.text("""
+ SELECT name
+ FROM file
+ WHERE user_id = :uid
+ AND (name = :base_name OR name LIKE :like ESCAPE '\\')
+ """),
+ {"uid": user_id, "base_name": base_name, "like": _like_for_suffixes(base, ext)},
+ ).scalars().all()
+
+ taken: Set[str] = set(rows)
+
+ # Pattern to detect base_N(.ext) and capture N
+ if ext:
+ rx = re.compile(rf"^{re.escape(base)}_(\d+)\.{re.escape(ext)}$")
+ else:
+ rx = re.compile(rf"^{re.escape(base)}_(\d+)$")
+
+ max_n = 1
+ for n in rows:
+ m = rx.match(n)
+ if m:
+ max_n = max(max_n, int(m.group(1)))
+
+ n = max(max_n + 1, DUPLICATE_SUFFIX_START)
+ while True:
+ candidate = f"{base}_{n}.{ext}" if ext else f"{base}_{n}"
+ if candidate not in taken:
+ return candidate
+ n += 1
+
+
+def _handle_duplicates_before_upgrade(conn) -> None:
+ """
+ Ensure (user_id, name) is unique by renaming older duplicates before adding the composite unique constraint.
+ Keeps the most recently updated/created/id-highest record; renames the rest with _N suffix.
+ """
+ logger.info("Scanning for duplicate file names per user...")
+ duplicates = conn.execute(
+ sa.text(
+ """
+ SELECT user_id, name, COUNT(*) AS cnt
+ FROM file
+ GROUP BY user_id, name
+ HAVING COUNT(*) > 1
+ """
+ )
+ ).fetchall()
+
+ if not duplicates:
+ logger.info("No duplicates found.")
+ return
+
+ logger.info("Found %d duplicate sets. Resolving...", len(duplicates))
+
+ # Add progress indicator for large datasets
+ if len(duplicates) > 100:
+ logger.info("Large number of duplicates detected. This may take several minutes...")
+
+ # Wrap in a nested transaction so we fail cleanly on any error
+ with conn.begin_nested():
+ # Process duplicates in batches for better performance on large datasets
+ for batch_start in range(0, len(duplicates), BATCH_SIZE):
+ batch_end = min(batch_start + BATCH_SIZE, len(duplicates))
+ batch = duplicates[batch_start:batch_end]
+
+ if len(duplicates) > BATCH_SIZE:
+ logger.info("Processing batch %d-%d of %d duplicate sets...",
+ batch_start + 1, batch_end, len(duplicates))
+
+ for user_id, name, cnt in batch:
+ logger.debug("Resolving duplicates for user=%s, name=%r (count=%s)", user_id, name, cnt)
+
+ file_ids = conn.execute(
+ sa.text(
+ """
+ SELECT id
+ FROM file
+ WHERE user_id = :uid AND name = :name
+ ORDER BY updated_at DESC, created_at DESC, id DESC
+ """
+ ),
+ {"uid": user_id, "name": name},
+ ).scalars().all()
+
+ # Keep the first (most recent), rename the rest
+ for file_id in file_ids[1:]:
+ new_name = _next_available_name(conn, user_id, name)
+ conn.execute(
+ sa.text("UPDATE file SET name = :new_name WHERE id = :fid"),
+ {"new_name": new_name, "fid": file_id},
+ )
+ logger.debug("Renamed id=%s: %r -> %r", file_id, name, new_name)
+
+ # Progress update for large batches
+ if len(duplicates) > BATCH_SIZE and batch_end < len(duplicates):
+ logger.info("Completed %d of %d duplicate sets (%.1f%%)",
+ batch_end, len(duplicates), (batch_end / len(duplicates)) * 100)
+
+ logger.info("Duplicate resolution completed.")
+
+
+def upgrade() -> None:
+ start_time = time.time()
+ logger.info("Starting upgrade: adding composite unique (name, user_id) on file")
+
+ conn = op.get_bind()
+ inspector = inspect(conn)
+
+ # 1) Resolve pre-existing duplicates so the new unique can be created
+ duplicate_start = time.time()
+ _handle_duplicates_before_upgrade(conn)
+ duplicate_duration = time.time() - duplicate_start
+
+ if duplicate_duration > 1.0: # Only log if it took more than 1 second
+ logger.info("Duplicate resolution completed in %.2f seconds", duplicate_duration)
+
+ # 2) Detect existing single-column unique on name (if any)
+ inspector = inspect(conn) # refresh inspector
+ single_name_uc = _get_unique_constraints_by_columns(inspector, "file", {"name"})
+ composite_uc = _get_unique_constraints_by_columns(inspector, "file", {"name", "user_id"})
+
+ # 3) Use a unified, reflection-based batch_alter_table for both Postgres and SQLite.
+ # recreate="always" ensures a safe table rebuild on SQLite and a standard alter on Postgres.
+ constraint_start = time.time()
+ with op.batch_alter_table("file", recreate="always") as batch_op:
+ # Drop old single-column unique if present
+ if single_name_uc:
+ logger.info("Dropping existing single-column unique: %s", single_name_uc)
+ batch_op.drop_constraint(single_name_uc, type_="unique")
+
+ # Create composite unique if not already present
+ if not composite_uc:
+ logger.info("Creating composite unique: file_name_user_id_key on (name, user_id)")
+ batch_op.create_unique_constraint("file_name_user_id_key", ["name", "user_id"])
+ else:
+ logger.info("Composite unique already present: %s", composite_uc)
+
+ constraint_duration = time.time() - constraint_start
+ if constraint_duration > 1.0: # Only log if it took more than 1 second
+ logger.info("Constraint operations completed in %.2f seconds", constraint_duration)
+
+ total_duration = time.time() - start_time
+ logger.info("Upgrade completed successfully in %.2f seconds", total_duration)
+
+
+def downgrade() -> None:
+ start_time = time.time()
+ logger.info("Starting downgrade: reverting to single-column unique on (name)")
+
+ conn = op.get_bind()
+ inspector = inspect(conn)
+
+ # 1) Ensure no cross-user duplicates on name (since we'll enforce global uniqueness on name)
+ logger.info("Checking for cross-user duplicate names prior to downgrade...")
+ validation_start = time.time()
+
+ dup_names = conn.execute(
+ sa.text(
+ """
+ SELECT name, COUNT(*) AS cnt
+ FROM file
+ GROUP BY name
+ HAVING COUNT(*) > 1
+ """
+ )
+ ).fetchall()
+
+ validation_duration = time.time() - validation_start
+ if validation_duration > 1.0: # Only log if it took more than 1 second
+ logger.info("Validation completed in %.2f seconds", validation_duration)
+
+ if dup_names:
+ examples = [row[0] for row in dup_names[:10]]
+ raise RuntimeError(
+ "Downgrade aborted: duplicate names exist across users. "
+ f"Examples: {examples}{'...' if len(dup_names) > 10 else ''}. "
+ "Rename conflicting files before downgrading."
+ )
+
+ # 2) Detect constraints
+ inspector = inspect(conn) # refresh
+ composite_uc = _get_unique_constraints_by_columns(inspector, "file", {"name", "user_id"})
+ single_name_uc = _get_unique_constraints_by_columns(inspector, "file", {"name"})
+
+ # 3) Perform alteration using batch with reflect to preserve other objects
+ constraint_start = time.time()
+ with op.batch_alter_table("file", recreate="always") as batch_op:
+ if composite_uc:
+ logger.info("Dropping composite unique: %s", composite_uc)
+ batch_op.drop_constraint(composite_uc, type_="unique")
+ else:
+ logger.info("No composite unique found to drop.")
+
+ if not single_name_uc:
+ logger.info("Creating single-column unique: file_name_key on (name)")
+ batch_op.create_unique_constraint("file_name_key", ["name"])
+ else:
+ logger.info("Single-column unique already present: %s", single_name_uc)
+
+ constraint_duration = time.time() - constraint_start
+ if constraint_duration > 1.0: # Only log if it took more than 1 second
+ logger.info("Constraint operations completed in %.2f seconds", constraint_duration)
+
+ total_duration = time.time() - start_time
+ logger.info("Downgrade completed successfully in %.2f seconds", total_duration)
diff --git a/src/backend/base/langflow/alembic/versions/4e5980a44eaa_fix_date_times_again.py b/src/backend/base/langflow/alembic/versions/4e5980a44eaa_fix_date_times_again.py
index 089949e30..f5e52926b 100644
--- a/src/backend/base/langflow/alembic/versions/4e5980a44eaa_fix_date_times_again.py
+++ b/src/backend/base/langflow/alembic/versions/4e5980a44eaa_fix_date_times_again.py
@@ -6,19 +6,19 @@ Create Date: 2024-04-12 18:11:06.454037
"""
-from typing import Sequence, Union
+from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
-from loguru import logger
from sqlalchemy.dialects import postgresql
-from sqlalchemy.engine.reflection import Inspector
+
+from langflow.logging.logger import logger
# revision identifiers, used by Alembic.
revision: str = "4e5980a44eaa"
-down_revision: Union[str, None] = "79e675cb6752"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | None = "79e675cb6752"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
@@ -37,11 +37,10 @@ def upgrade() -> None:
type_=sa.DateTime(timezone=True),
existing_nullable=False,
)
+ elif created_at_column is None:
+ logger.warning("Column 'created_at' not found in table 'apikey'")
else:
- if created_at_column is None:
- logger.warning("Column 'created_at' not found in table 'apikey'")
- else:
- logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'apikey'")
+ logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'apikey'")
if "variable" in table_names:
columns = inspector.get_columns("variable")
created_at_column = next((column for column in columns if column["name"] == "created_at"), None)
@@ -54,11 +53,10 @@ def upgrade() -> None:
type_=sa.DateTime(timezone=True),
existing_nullable=True,
)
+ elif created_at_column is None:
+ logger.warning("Column 'created_at' not found in table 'variable'")
else:
- if created_at_column is None:
- logger.warning("Column 'created_at' not found in table 'variable'")
- else:
- logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'variable'")
+ logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'variable'")
if updated_at_column is not None and isinstance(updated_at_column["type"], postgresql.TIMESTAMP):
batch_op.alter_column(
"updated_at",
@@ -66,11 +64,10 @@ def upgrade() -> None:
type_=sa.DateTime(timezone=True),
existing_nullable=True,
)
+ elif updated_at_column is None:
+ logger.warning("Column 'updated_at' not found in table 'variable'")
else:
- if updated_at_column is None:
- logger.warning("Column 'updated_at' not found in table 'variable'")
- else:
- logger.warning(f"Column 'updated_at' has type {updated_at_column['type']} in table 'variable'")
+ logger.warning(f"Column 'updated_at' has type {updated_at_column['type']} in table 'variable'")
# ### end Alembic commands ###
@@ -92,11 +89,10 @@ def downgrade() -> None:
type_=postgresql.TIMESTAMP(),
existing_nullable=True,
)
+ elif updated_at_column is None:
+ logger.warning("Column 'updated_at' not found in table 'variable'")
else:
- if updated_at_column is None:
- logger.warning("Column 'updated_at' not found in table 'variable'")
- else:
- logger.warning(f"Column 'updated_at' has type {updated_at_column['type']} in table 'variable'")
+ logger.warning(f"Column 'updated_at' has type {updated_at_column['type']} in table 'variable'")
if created_at_column is not None and isinstance(created_at_column["type"], sa.DateTime):
batch_op.alter_column(
"created_at",
@@ -104,11 +100,10 @@ def downgrade() -> None:
type_=postgresql.TIMESTAMP(),
existing_nullable=True,
)
+ elif created_at_column is None:
+ logger.warning("Column 'created_at' not found in table 'variable'")
else:
- if created_at_column is None:
- logger.warning("Column 'created_at' not found in table 'variable'")
- else:
- logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'variable'")
+ logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'variable'")
if "apikey" in table_names:
columns = inspector.get_columns("apikey")
@@ -121,10 +116,9 @@ def downgrade() -> None:
type_=postgresql.TIMESTAMP(),
existing_nullable=False,
)
+ elif created_at_column is None:
+ logger.warning("Column 'created_at' not found in table 'apikey'")
else:
- if created_at_column is None:
- logger.warning("Column 'created_at' not found in table 'apikey'")
- else:
- logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'apikey'")
+ logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'apikey'")
# ### end Alembic commands ###
diff --git a/src/backend/base/langflow/alembic/versions/58b28437a398_modify_nullable.py b/src/backend/base/langflow/alembic/versions/58b28437a398_modify_nullable.py
index 564f778fc..4d9b1825b 100644
--- a/src/backend/base/langflow/alembic/versions/58b28437a398_modify_nullable.py
+++ b/src/backend/base/langflow/alembic/versions/58b28437a398_modify_nullable.py
@@ -6,16 +6,16 @@ Create Date: 2024-04-13 10:57:23.061709
"""
-from typing import Sequence, Union
+from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
-from loguru import logger
-from sqlalchemy.engine.reflection import Inspector
-down_revision: Union[str, None] = "4e5980a44eaa"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+from langflow.logging.logger import logger
+
+down_revision: str | None = "4e5980a44eaa"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
# Revision identifiers, used by Alembic.
revision = "58b28437a398"
diff --git a/src/backend/base/langflow/alembic/versions/79e675cb6752_change_datetime_type.py b/src/backend/base/langflow/alembic/versions/79e675cb6752_change_datetime_type.py
index b71706c22..4c1619cbb 100644
--- a/src/backend/base/langflow/alembic/versions/79e675cb6752_change_datetime_type.py
+++ b/src/backend/base/langflow/alembic/versions/79e675cb6752_change_datetime_type.py
@@ -6,19 +6,19 @@ Create Date: 2024-04-11 19:23:10.697335
"""
-from typing import Sequence, Union
+from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
-from loguru import logger
from sqlalchemy.dialects import postgresql
-from sqlalchemy.engine.reflection import Inspector
+
+from langflow.logging.logger import logger
# revision identifiers, used by Alembic.
revision: str = "79e675cb6752"
-down_revision: Union[str, None] = "e3bc869fa272"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | None = "e3bc869fa272"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
@@ -37,11 +37,10 @@ def upgrade() -> None:
type_=sa.DateTime(timezone=True),
existing_nullable=False,
)
+ elif created_at_column is None:
+ logger.warning("Column 'created_at' not found in table 'apikey'")
else:
- if created_at_column is None:
- logger.warning("Column 'created_at' not found in table 'apikey'")
- else:
- logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'apikey'")
+ logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'apikey'")
if "variable" in table_names:
columns = inspector.get_columns("variable")
created_at_column = next((column for column in columns if column["name"] == "created_at"), None)
@@ -54,11 +53,10 @@ def upgrade() -> None:
type_=sa.DateTime(timezone=True),
existing_nullable=True,
)
+ elif created_at_column is None:
+ logger.warning("Column 'created_at' not found in table 'variable'")
else:
- if created_at_column is None:
- logger.warning("Column 'created_at' not found in table 'variable'")
- else:
- logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'variable'")
+ logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'variable'")
if updated_at_column is not None and isinstance(updated_at_column["type"], postgresql.TIMESTAMP):
batch_op.alter_column(
"updated_at",
@@ -66,11 +64,10 @@ def upgrade() -> None:
type_=sa.DateTime(timezone=True),
existing_nullable=True,
)
+ elif updated_at_column is None:
+ logger.warning("Column 'updated_at' not found in table 'variable'")
else:
- if updated_at_column is None:
- logger.warning("Column 'updated_at' not found in table 'variable'")
- else:
- logger.warning(f"Column 'updated_at' has type {updated_at_column['type']} in table 'variable'")
+ logger.warning(f"Column 'updated_at' has type {updated_at_column['type']} in table 'variable'")
# ### end Alembic commands ###
@@ -92,11 +89,10 @@ def downgrade() -> None:
type_=postgresql.TIMESTAMP(),
existing_nullable=True,
)
+ elif updated_at_column is None:
+ logger.warning("Column 'updated_at' not found in table 'variable'")
else:
- if updated_at_column is None:
- logger.warning("Column 'updated_at' not found in table 'variable'")
- else:
- logger.warning(f"Column 'updated_at' has type {updated_at_column['type']} in table 'variable'")
+ logger.warning(f"Column 'updated_at' has type {updated_at_column['type']} in table 'variable'")
if created_at_column is not None and isinstance(created_at_column["type"], sa.DateTime):
batch_op.alter_column(
"created_at",
@@ -104,11 +100,10 @@ def downgrade() -> None:
type_=postgresql.TIMESTAMP(),
existing_nullable=True,
)
+ elif created_at_column is None:
+ logger.warning("Column 'created_at' not found in table 'variable'")
else:
- if created_at_column is None:
- logger.warning("Column 'created_at' not found in table 'variable'")
- else:
- logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'variable'")
+ logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'variable'")
if "apikey" in table_names:
columns = inspector.get_columns("apikey")
@@ -121,10 +116,9 @@ def downgrade() -> None:
type_=postgresql.TIMESTAMP(),
existing_nullable=False,
)
+ elif created_at_column is None:
+ logger.warning("Column 'created_at' not found in table 'apikey'")
else:
- if created_at_column is None:
- logger.warning("Column 'created_at' not found in table 'apikey'")
- else:
- logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'apikey'")
+ logger.warning(f"Column 'created_at' has type {created_at_column['type']} in table 'apikey'")
# ### end Alembic commands ###
diff --git a/src/backend/base/langflow/alembic/versions/b2fa308044b5_add_unique_constraints.py b/src/backend/base/langflow/alembic/versions/b2fa308044b5_add_unique_constraints.py
index 8aae1acf9..a1575eeff 100644
--- a/src/backend/base/langflow/alembic/versions/b2fa308044b5_add_unique_constraints.py
+++ b/src/backend/base/langflow/alembic/versions/b2fa308044b5_add_unique_constraints.py
@@ -1,4 +1,4 @@
-"""Add unique constraints
+"""Add unique constraints.
Revision ID: b2fa308044b5
Revises: 0b8757876a7c
@@ -6,25 +6,25 @@ Create Date: 2024-01-26 13:31:14.797548
"""
-from typing import Sequence, Union
+from collections.abc import Sequence
import sqlalchemy as sa
import sqlmodel
from alembic import op
-from loguru import logger # noqa
-from sqlalchemy.engine.reflection import Inspector
+
+from langflow.logging.logger import logger
# revision identifiers, used by Alembic.
revision: str = "b2fa308044b5"
-down_revision: Union[str, None] = "0b8757876a7c"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | None = "0b8757876a7c"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
conn = op.get_bind()
- inspector = sa.inspect(conn) # type: ignore
+ inspector = sa.inspect(conn)
tables = inspector.get_table_names()
# ### commands auto generated by Alembic - please adjust! ###
try:
@@ -53,14 +53,13 @@ def upgrade() -> None:
if "fk_flow_user_id_user" not in constraint_names:
batch_op.create_foreign_key("fk_flow_user_id_user", "user", ["user_id"], ["id"])
- except Exception as e:
+ except Exception as e: # noqa: BLE001
logger.exception(f"Error during upgrade: {e}")
- pass
def downgrade() -> None:
conn = op.get_bind()
- inspector = sa.inspect(conn) # type: ignore
+ inspector = sa.inspect(conn)
try:
# Re-create the dropped table 'flowstyle' if it was previously dropped in upgrade
if "flowstyle" not in inspector.get_table_names():
@@ -97,6 +96,6 @@ def downgrade() -> None:
if "fk_flow_user_id_user" in constraint_names:
batch_op.drop_constraint("fk_flow_user_id_user", type_="foreignkey")
- except Exception as e:
+ except Exception as e: # noqa: BLE001
# It's generally a good idea to log the exception or handle it in a way other than a bare pass
- print(f"Error during downgrade: {e}")
+ logger.exception(f"Error during downgrade: {e}")
diff --git a/src/backend/base/langflow/api/build.py b/src/backend/base/langflow/api/build.py
index 21031e414..a35980c8f 100644
--- a/src/backend/base/langflow/api/build.py
+++ b/src/backend/base/langflow/api/build.py
@@ -6,7 +6,6 @@ import uuid
from collections.abc import AsyncIterator
from fastapi import BackgroundTasks, HTTPException, Response
-from loguru import logger
from sqlmodel import select
from langflow.api.disconnect import DisconnectHandlerStreamingResponse
@@ -20,16 +19,12 @@ from langflow.api.utils import (
get_top_level_vertices,
parse_exception,
)
-from langflow.api.v1.schemas import (
- FlowDataRequest,
- InputValueRequest,
- ResultDataResponse,
- VertexBuildResponse,
-)
+from langflow.api.v1.schemas import FlowDataRequest, InputValueRequest, ResultDataResponse, VertexBuildResponse
from langflow.events.event_manager import EventManager
from langflow.exceptions.component import ComponentBuildError
from langflow.graph.graph.base import Graph
from langflow.graph.utils import log_vertex_build
+from langflow.logging.logger import logger
from langflow.schema.message import ErrorMessage
from langflow.schema.schema import OutputValue
from langflow.services.database.models.flow.model import Flow
@@ -75,7 +70,7 @@ async def start_flow_build(
)
queue_service.start_job(job_id, task_coro)
except Exception as e:
- logger.exception("Failed to create queue and start task")
+ await logger.aexception("Failed to create queue and start task")
raise HTTPException(status_code=500, detail=str(e)) from e
return job_id
@@ -91,7 +86,7 @@ async def get_flow_events_response(
main_queue, event_manager, event_task, _ = queue_service.get_queue_data(job_id)
if event_delivery in (EventDeliveryType.STREAMING, EventDeliveryType.DIRECT):
if event_task is None:
- logger.error(f"No event task found for job {job_id}")
+ await logger.aerror(f"No event task found for job {job_id}")
raise HTTPException(status_code=404, detail="No event task found for job")
return await create_flow_response(
queue=main_queue,
@@ -130,19 +125,19 @@ async def get_flow_events_response(
content = "\n".join([event for event in events if event is not None])
return Response(content=content, media_type="application/x-ndjson")
except asyncio.CancelledError as exc:
- logger.info(f"Event polling was cancelled for job {job_id}")
+ await logger.ainfo(f"Event polling was cancelled for job {job_id}")
raise HTTPException(status_code=499, detail="Event polling was cancelled") from exc
except asyncio.TimeoutError:
- logger.warning(f"Timeout while waiting for events for job {job_id}")
+ await logger.awarning(f"Timeout while waiting for events for job {job_id}")
return Response(content="", media_type="application/x-ndjson") # Return empty response instead of error
except JobQueueNotFoundError as exc:
- logger.error(f"Job not found: {job_id}. Error: {exc!s}")
+ await logger.aerror(f"Job not found: {job_id}. Error: {exc!s}")
raise HTTPException(status_code=404, detail=f"Job not found: {exc!s}") from exc
except Exception as exc:
if isinstance(exc, HTTPException):
raise
- logger.exception(f"Unexpected error processing flow events for job {job_id}")
+ await logger.aexception(f"Unexpected error processing flow events for job {job_id}")
raise HTTPException(status_code=500, detail=f"Unexpected error: {exc!s}") from exc
@@ -161,9 +156,9 @@ async def create_flow_response(
break
get_time = time.time()
yield value.decode("utf-8")
- logger.debug(f"Event {event_id} consumed in {get_time - put_time:.4f}s")
+ await logger.adebug(f"Event {event_id} consumed in {get_time - put_time:.4f}s")
except Exception as exc: # noqa: BLE001
- logger.exception(f"Error consuming event: {exc}")
+ await logger.aexception(f"Error consuming event: {exc}")
break
def on_disconnect() -> None:
@@ -233,7 +228,7 @@ async def generate_flow_events(
if "stream or streaming set to True" in str(exc):
raise HTTPException(status_code=400, detail=str(exc)) from exc
- logger.exception("Error checking build status")
+ await logger.aexception("Error checking build status")
raise HTTPException(status_code=500, detail=str(exc)) from exc
return first_layer, vertices_to_run, graph
@@ -317,7 +312,7 @@ async def generate_flow_events(
tb = exc.formatted_traceback
else:
tb = traceback.format_exc()
- logger.exception("Error building Component")
+ await logger.aexception("Error building Component")
params = format_exception_message(exc)
message = {"errorMessage": params, "stackTrace": tb}
valid = False
@@ -390,7 +385,7 @@ async def generate_flow_events(
component_error_message=str(exc),
),
)
- logger.exception("Error building Component")
+ await logger.aexception("Error building Component")
message = parse_exception(exc)
raise HTTPException(status_code=500, detail=message) from exc
@@ -411,7 +406,7 @@ async def generate_flow_events(
try:
vertex_build_response: VertexBuildResponse = await _build_vertex(vertex_id, graph, event_manager)
except asyncio.CancelledError as exc:
- logger.error(f"Build cancelled: {exc}")
+ await logger.aerror(f"Build cancelled: {exc}")
raise
# send built event or error event
@@ -459,7 +454,7 @@ async def generate_flow_events(
background_tasks.add_task(graph.end_all_traces_in_context())
raise
except Exception as e:
- logger.error(f"Error building vertices: {e}")
+ await logger.aerror(f"Error building vertices: {e}")
custom_component = graph.get_vertex(vertex_id).custom_component
trace_name = getattr(custom_component, "trace_name", None)
error_message = ErrorMessage(
@@ -499,11 +494,11 @@ async def cancel_flow_build(
_, _, event_task, _ = queue_service.get_queue_data(job_id)
if event_task is None:
- logger.warning(f"No event task found for job_id {job_id}")
+ await logger.awarning(f"No event task found for job_id {job_id}")
return True # Nothing to cancel is still a success
if event_task.done():
- logger.info(f"Task for job_id {job_id} is already completed")
+ await logger.ainfo(f"Task for job_id {job_id} is already completed")
return True # Nothing to cancel is still a success
# Store the task reference to check status after cleanup
@@ -515,18 +510,18 @@ async def cancel_flow_build(
except asyncio.CancelledError:
# Check if the task was actually cancelled
if task_before_cleanup.cancelled():
- logger.info(f"Successfully cancelled flow build for job_id {job_id} (CancelledError caught)")
+ await logger.ainfo(f"Successfully cancelled flow build for job_id {job_id} (CancelledError caught)")
return True
# If the task wasn't cancelled, re-raise the exception
- logger.error(f"CancelledError caught but task for job_id {job_id} was not cancelled")
+ await logger.aerror(f"CancelledError caught but task for job_id {job_id} was not cancelled")
raise
# If no exception was raised, verify that the task was actually cancelled
# The task should be done (cancelled) after cleanup
if task_before_cleanup.cancelled():
- logger.info(f"Successfully cancelled flow build for job_id {job_id}")
+ await logger.ainfo(f"Successfully cancelled flow build for job_id {job_id}")
return True
# If we get here, the task wasn't cancelled properly
- logger.error(f"Failed to cancel flow build for job_id {job_id}, task is still running")
+ await logger.aerror(f"Failed to cancel flow build for job_id {job_id}, task is still running")
return False
diff --git a/src/backend/base/langflow/api/health_check_router.py b/src/backend/base/langflow/api/health_check_router.py
index 6c5316cd9..02c4c387b 100644
--- a/src/backend/base/langflow/api/health_check_router.py
+++ b/src/backend/base/langflow/api/health_check_router.py
@@ -1,11 +1,11 @@
import uuid
from fastapi import APIRouter, HTTPException, status
-from loguru import logger
from pydantic import BaseModel
from sqlmodel import select
from langflow.api.utils import DbSession
+from langflow.logging.logger import logger
from langflow.services.database.models.flow.model import Flow
from langflow.services.deps import get_chat_service
@@ -49,7 +49,7 @@ async def health_check(
(await session.exec(stmt)).first()
response.db = "ok"
except Exception: # noqa: BLE001
- logger.exception("Error checking database")
+ await logger.aexception("Error checking database")
try:
chat = get_chat_service()
@@ -57,7 +57,7 @@ async def health_check(
await chat.get_cache("health_check")
response.chat = "ok"
except Exception: # noqa: BLE001
- logger.exception("Error checking chat service")
+ await logger.aexception("Error checking chat service")
if response.has_error():
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=response.model_dump())
diff --git a/src/backend/base/langflow/api/router.py b/src/backend/base/langflow/api/router.py
index df7b2aebe..41e505e4f 100644
--- a/src/backend/base/langflow/api/router.py
+++ b/src/backend/base/langflow/api/router.py
@@ -8,10 +8,12 @@ from langflow.api.v1 import (
files_router,
flows_router,
folders_router,
+ knowledge_bases_router,
login_router,
mcp_projects_router,
mcp_router,
monitor_router,
+ openai_responses_router,
projects_router,
starter_projects_router,
store_router,
@@ -45,9 +47,11 @@ router_v1.include_router(monitor_router)
router_v1.include_router(folders_router)
router_v1.include_router(projects_router)
router_v1.include_router(starter_projects_router)
+router_v1.include_router(knowledge_bases_router)
router_v1.include_router(mcp_router)
router_v1.include_router(voice_mode_router)
router_v1.include_router(mcp_projects_router)
+router_v1.include_router(openai_responses_router)
router_v2.include_router(files_router_v2)
router_v2.include_router(mcp_router_v2)
diff --git a/src/backend/base/langflow/api/utils.py b/src/backend/base/langflow/api/utils.py
index 755613b75..0982ee02c 100644
--- a/src/backend/base/langflow/api/utils.py
+++ b/src/backend/base/langflow/api/utils.py
@@ -8,11 +8,11 @@ from typing import TYPE_CHECKING, Annotated, Any
from fastapi import Depends, HTTPException, Query
from fastapi_pagination import Params
-from loguru import logger
from sqlalchemy import delete
from sqlmodel.ext.asyncio.session import AsyncSession
from langflow.graph.graph.base import Graph
+from langflow.logging.logger import logger
from langflow.services.auth.utils import get_current_active_user, get_current_active_user_mcp
from langflow.services.database.models.flow.model import Flow
from langflow.services.database.models.message.model import MessageTable
@@ -119,7 +119,7 @@ async def check_langflow_version(component: StoreComponentCreate) -> None:
if langflow_version is None:
raise HTTPException(status_code=500, detail="Unable to verify the latest version of Langflow")
if langflow_version != component.last_tested_version:
- logger.warning(
+ await logger.awarning(
f"Your version of Langflow ({component.last_tested_version}) is outdated. "
f"Please update to the latest version ({langflow_version}) and try again."
)
@@ -371,7 +371,7 @@ async def verify_public_flow_and_get_user(flow_id: uuid.UUID, client_id: str | N
user = await get_user_by_flow_id_or_endpoint_name(str(flow_id))
except Exception as exc:
- logger.exception(f"Error getting user for public flow {flow_id}")
+ await logger.aexception(f"Error getting user for public flow {flow_id}")
raise HTTPException(status_code=403, detail="Flow is not accessible") from exc
if not user:
diff --git a/src/backend/base/langflow/api/v1/__init__.py b/src/backend/base/langflow/api/v1/__init__.py
index ad276df48..96ba29d16 100644
--- a/src/backend/base/langflow/api/v1/__init__.py
+++ b/src/backend/base/langflow/api/v1/__init__.py
@@ -4,10 +4,12 @@ from langflow.api.v1.endpoints import router as endpoints_router
from langflow.api.v1.files import router as files_router
from langflow.api.v1.flows import router as flows_router
from langflow.api.v1.folders import router as folders_router
+from langflow.api.v1.knowledge_bases import router as knowledge_bases_router
from langflow.api.v1.login import router as login_router
from langflow.api.v1.mcp import router as mcp_router
from langflow.api.v1.mcp_projects import router as mcp_projects_router
from langflow.api.v1.monitor import router as monitor_router
+from langflow.api.v1.openai_responses import router as openai_responses_router
from langflow.api.v1.projects import router as projects_router
from langflow.api.v1.starter_projects import router as starter_projects_router
from langflow.api.v1.store import router as store_router
@@ -23,10 +25,12 @@ __all__ = [
"files_router",
"flows_router",
"folders_router",
+ "knowledge_bases_router",
"login_router",
"mcp_projects_router",
"mcp_router",
"monitor_router",
+ "openai_responses_router",
"projects_router",
"starter_projects_router",
"store_router",
diff --git a/src/backend/base/langflow/api/v1/callback.py b/src/backend/base/langflow/api/v1/callback.py
index 527241a64..2459bf5d6 100644
--- a/src/backend/base/langflow/api/v1/callback.py
+++ b/src/backend/base/langflow/api/v1/callback.py
@@ -5,10 +5,10 @@ from uuid import UUID
from langchain_core.agents import AgentAction, AgentFinish
from langchain_core.callbacks.base import AsyncCallbackHandler
-from loguru import logger
from typing_extensions import override
from langflow.api.v1.schemas import ChatResponse, PromptResponse
+from langflow.logging.logger import logger
from langflow.services.deps import get_chat_service, get_socket_service
from langflow.utils.util import remove_ansi_escape_codes
@@ -78,7 +78,7 @@ class AsyncStreamingLLMCallbackHandleSIO(AsyncCallbackHandler):
for resp in resps:
await self.socketio_service.emit_token(to=self.sid, data=resp.model_dump())
except Exception: # noqa: BLE001
- logger.exception("Error sending response")
+ await logger.aexception("Error sending response")
async def on_tool_error(
self,
diff --git a/src/backend/base/langflow/api/v1/chat.py b/src/backend/base/langflow/api/v1/chat.py
index f1b617944..693901961 100644
--- a/src/backend/base/langflow/api/v1/chat.py
+++ b/src/backend/base/langflow/api/v1/chat.py
@@ -6,23 +6,10 @@ import traceback
import uuid
from typing import TYPE_CHECKING, Annotated
-from fastapi import (
- APIRouter,
- BackgroundTasks,
- Body,
- Depends,
- HTTPException,
- Request,
- status,
-)
+from fastapi import APIRouter, BackgroundTasks, Body, Depends, HTTPException, Request, status
from fastapi.responses import StreamingResponse
-from loguru import logger
-from langflow.api.build import (
- cancel_flow_build,
- get_flow_events_response,
- start_flow_build,
-)
+from langflow.api.build import cancel_flow_build, get_flow_events_response, start_flow_build
from langflow.api.limited_background_tasks import LimitVertexBuildBackgroundTasks
from langflow.api.utils import (
CurrentActiveUser,
@@ -48,6 +35,7 @@ from langflow.api.v1.schemas import (
from langflow.exceptions.component import ComponentBuildError
from langflow.graph.graph.base import Graph
from langflow.graph.utils import log_vertex_build
+from langflow.logging.logger import logger
from langflow.schema.schema import OutputValue
from langflow.services.cache.utils import CacheMiss
from langflow.services.chat.service import ChatService
@@ -135,7 +123,7 @@ async def retrieve_vertices_order(
)
if "stream or streaming set to True" in str(exc):
raise HTTPException(status_code=400, detail=str(exc)) from exc
- logger.exception("Error checking build status")
+ await logger.aexception("Error checking build status")
raise HTTPException(status_code=500, detail=str(exc)) from exc
@@ -239,17 +227,17 @@ async def cancel_build(
return CancelFlowResponse(success=False, message="Failed to cancel flow build")
except asyncio.CancelledError:
# If CancelledError reaches here, it means the task was not successfully cancelled
- logger.error(f"Failed to cancel flow build for job_id {job_id} (CancelledError caught)")
+ await logger.aerror(f"Failed to cancel flow build for job_id {job_id} (CancelledError caught)")
return CancelFlowResponse(success=False, message="Failed to cancel flow build")
except ValueError as exc:
# Job not found
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)) from exc
except JobQueueNotFoundError as exc:
- logger.error(f"Job not found: {job_id}. Error: {exc!s}")
+ await logger.aerror(f"Job not found: {job_id}. Error: {exc!s}")
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Job not found: {exc!s}") from exc
except Exception as exc:
# Any other unexpected error
- logger.exception(f"Error cancelling flow build for job_id {job_id}: {exc}")
+ await logger.aexception(f"Error cancelling flow build for job_id {job_id}: {exc}")
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc
@@ -297,7 +285,7 @@ async def build_vertex(
cache = await chat_service.get_cache(flow_id_str)
if isinstance(cache, CacheMiss):
# If there's no cache
- logger.warning(f"No cache found for {flow_id_str}. Building graph starting at {vertex_id}")
+ await logger.awarning(f"No cache found for {flow_id_str}. Building graph starting at {vertex_id}")
graph = await build_graph_from_db(
flow_id=flow_id,
session=await anext(get_session()),
@@ -331,7 +319,7 @@ async def build_vertex(
tb = exc.formatted_traceback
else:
tb = traceback.format_exc()
- logger.exception("Error building Component")
+ await logger.aexception("Error building Component")
params = format_exception_message(exc)
message = {"errorMessage": params, "stackTrace": tb}
valid = False
@@ -408,7 +396,7 @@ async def build_vertex(
component_error_message=str(exc),
),
)
- logger.exception("Error building Component")
+ await logger.aexception("Error building Component")
message = parse_exception(exc)
raise HTTPException(status_code=500, detail=message) from exc
@@ -421,14 +409,14 @@ async def _stream_vertex(flow_id: str, vertex_id: str, chat_service: ChatService
try:
cache = await chat_service.get_cache(flow_id)
except Exception as exc: # noqa: BLE001
- logger.exception("Error building Component")
+ await logger.aexception("Error building Component")
yield str(StreamData(event="error", data={"error": str(exc)}))
return
if isinstance(cache, CacheMiss):
# If there's no cache
msg = f"No cache found for {flow_id}."
- logger.error(msg)
+ await logger.aerror(msg)
yield str(StreamData(event="error", data={"error": msg}))
return
else:
@@ -437,13 +425,13 @@ async def _stream_vertex(flow_id: str, vertex_id: str, chat_service: ChatService
try:
vertex: InterfaceVertex = graph.get_vertex(vertex_id)
except Exception as exc: # noqa: BLE001
- logger.exception("Error building Component")
+ await logger.aexception("Error building Component")
yield str(StreamData(event="error", data={"error": str(exc)}))
return
if not hasattr(vertex, "stream"):
msg = f"Vertex {vertex_id} does not support streaming"
- logger.error(msg)
+ await logger.aerror(msg)
yield str(StreamData(event="error", data={"error": msg}))
return
@@ -460,7 +448,7 @@ async def _stream_vertex(flow_id: str, vertex_id: str, chat_service: ChatService
yield str(stream_data)
elif not vertex.frozen or not vertex.built:
- logger.debug(f"Streaming vertex {vertex_id}")
+ await logger.adebug(f"Streaming vertex {vertex_id}")
stream_data = StreamData(
event="message",
data={"message": f"Streaming vertex {vertex_id}"},
@@ -474,7 +462,7 @@ async def _stream_vertex(flow_id: str, vertex_id: str, chat_service: ChatService
)
yield str(stream_data)
except Exception as exc: # noqa: BLE001
- logger.exception("Error building Component")
+ await logger.aexception("Error building Component")
exc_message = parse_exception(exc)
if exc_message == "The message must be an iterator or an async iterator.":
exc_message = "This stream has already been closed."
@@ -487,11 +475,11 @@ async def _stream_vertex(flow_id: str, vertex_id: str, chat_service: ChatService
yield str(stream_data)
else:
msg = f"No result found for vertex {vertex_id}"
- logger.error(msg)
+ await logger.aerror(msg)
yield str(StreamData(event="error", data={"error": msg}))
return
finally:
- logger.debug("Closing stream")
+ await logger.adebug("Closing stream")
if graph:
await chat_service.set_cache(flow_id, graph)
yield str(StreamData(event="close", data={"message": "Stream closed"}))
@@ -625,7 +613,7 @@ async def build_public_tmp(
flow_name=flow_name or f"{client_id}_{flow_id}",
)
except Exception as exc:
- logger.exception("Error building public flow")
+ await logger.aexception("Error building public flow")
if isinstance(exc, HTTPException):
raise
raise HTTPException(status_code=500, detail=str(exc)) from exc
diff --git a/src/backend/base/langflow/api/v1/endpoints.py b/src/backend/base/langflow/api/v1/endpoints.py
index 769474327..9ddc7a02a 100644
--- a/src/backend/base/langflow/api/v1/endpoints.py
+++ b/src/backend/base/langflow/api/v1/endpoints.py
@@ -11,7 +11,6 @@ import sqlalchemy as sa
from fastapi import APIRouter, BackgroundTasks, Body, Depends, HTTPException, Request, UploadFile, status
from fastapi.encoders import jsonable_encoder
from fastapi.responses import StreamingResponse
-from loguru import logger
from sqlmodel import select
from langflow.api.utils import CurrentActiveUser, DbSession, parse_value
@@ -39,11 +38,11 @@ from langflow.exceptions.serialization import SerializationError
from langflow.graph.graph.base import Graph
from langflow.graph.schema import RunOutputs
from langflow.helpers.flow import get_flow_by_id_or_endpoint_name
-from langflow.helpers.user import get_user_by_flow_id_or_endpoint_name
from langflow.interface.initialize.loading import update_params_with_load_from_db_fields
+from langflow.logging.logger import logger
from langflow.processing.process import process_tweaks, run_graph_internal
from langflow.schema.graph import Tweaks
-from langflow.services.auth.utils import api_key_security, get_current_active_user
+from langflow.services.auth.utils import api_key_security, get_current_active_user, get_webhook_user
from langflow.services.cache.utils import save_uploaded_file
from langflow.services.database.models.flow.model import Flow, FlowRead
from langflow.services.database.models.flow.utils import get_all_webhook_components_in_flow
@@ -116,6 +115,7 @@ async def simple_run_flow(
stream: bool = False,
api_key_user: User | None = None,
event_manager: EventManager | None = None,
+ context: dict | None = None,
):
validate_input_and_tweaks(input_request)
try:
@@ -127,7 +127,9 @@ async def simple_run_flow(
raise ValueError(msg)
graph_data = flow.data.copy()
graph_data = process_tweaks(graph_data, input_request.tweaks or {}, stream=stream)
- graph = Graph.from_payload(graph_data, flow_id=flow_id_str, user_id=str(user_id), flow_name=flow.name)
+ graph = Graph.from_payload(
+ graph_data, flow_id=flow_id_str, user_id=str(user_id), flow_name=flow.name, context=context
+ )
inputs = None
if input_request.input_value is not None:
inputs = [
@@ -184,7 +186,7 @@ async def simple_run_flow_task(
)
except Exception: # noqa: BLE001
- logger.exception(f"Error running flow {flow.id} task")
+ await logger.aexception(f"Error running flow {flow.id} task")
async def consume_and_yield(queue: asyncio.Queue, client_consumed_queue: asyncio.Queue) -> AsyncGenerator:
@@ -215,7 +217,7 @@ async def consume_and_yield(queue: asyncio.Queue, client_consumed_queue: asyncio
yield value
get_time_yield = time.time()
client_consumed_queue.put_nowait(event_id)
- logger.debug(
+ await logger.adebug(
f"consumed event {event_id} "
f"(time in queue, {get_time - put_time:.4f}, "
f"client {get_time_yield - get_time:.4f})"
@@ -228,6 +230,7 @@ async def run_flow_generator(
api_key_user: User | None,
event_manager: EventManager,
client_consumed_queue: asyncio.Queue,
+ context: dict | None = None,
) -> None:
"""Executes a flow asynchronously and manages event streaming to the client.
@@ -240,6 +243,7 @@ async def run_flow_generator(
api_key_user (User | None): Optional authenticated user running the flow
event_manager (EventManager): Manages the streaming of events to the client
client_consumed_queue (asyncio.Queue): Tracks client consumption of events
+ context (dict | None): Optional context to pass to the flow
Events Generated:
- "add_message": Sent when new messages are added during flow execution
@@ -260,11 +264,12 @@ async def run_flow_generator(
stream=True,
api_key_user=api_key_user,
event_manager=event_manager,
+ context=context,
)
event_manager.on_end(data={"result": result.model_dump()})
await client_consumed_queue.get()
except (ValueError, InvalidChatInputError, SerializationError) as e:
- logger.error(f"Error running flow: {e}")
+ await logger.aerror(f"Error running flow: {e}")
event_manager.on_error(data={"error": str(e)})
finally:
await event_manager.queue.put((None, None, time.time))
@@ -331,7 +336,7 @@ async def simplified_run_flow(
)
async def on_disconnect() -> None:
- logger.debug("Client disconnected, closing tasks")
+ await logger.adebug("Client disconnected, closing tasks")
main_task.cancel()
return StreamingResponse(
@@ -393,16 +398,16 @@ async def simplified_run_flow(
@router.post("/webhook/{flow_id_or_name}", response_model=dict, status_code=HTTPStatus.ACCEPTED) # noqa: RUF100, FAST003
async def webhook_run_flow(
+ flow_id_or_name: str,
flow: Annotated[Flow, Depends(get_flow_by_id_or_endpoint_name)],
- user: Annotated[User, Depends(get_user_by_flow_id_or_endpoint_name)],
request: Request,
background_tasks: BackgroundTasks,
):
"""Run a flow using a webhook request.
Args:
- flow (Flow, optional): The flow to be executed. Defaults to Depends(get_flow_by_id).
- user (User): The flow user.
+ flow_id_or_name (str): The flow ID or endpoint name.
+ flow (Flow): The flow to be executed.
request (Request): The incoming HTTP request.
background_tasks (BackgroundTasks): The background tasks manager.
@@ -414,8 +419,12 @@ async def webhook_run_flow(
"""
telemetry_service = get_telemetry_service()
start_time = time.perf_counter()
- logger.debug("Received webhook request")
+ await logger.adebug("Received webhook request")
error_msg = ""
+
+ # Get the appropriate user for webhook execution based on auth settings
+ webhook_user = await get_webhook_user(flow_id_or_name, request)
+
try:
try:
data = await request.body()
@@ -442,12 +451,12 @@ async def webhook_run_flow(
session_id=None,
)
- logger.debug("Starting background task")
+ await logger.adebug("Starting background task")
background_tasks.add_task(
simple_run_flow_task,
flow=flow,
input_request=input_request,
- api_key_user=user,
+ api_key_user=webhook_user,
)
except Exception as exc:
error_msg = str(exc)
@@ -553,7 +562,7 @@ async def experimental_run_flow(
except sa.exc.StatementError as exc:
# StatementError('(builtins.ValueError) badly formed hexadecimal UUID string')
if "badly formed hexadecimal UUID string" in str(exc):
- logger.error(f"Flow ID {flow_id_str} is not a valid UUID")
+ await logger.aerror(f"Flow ID {flow_id_str} is not a valid UUID")
# This means the Flow ID is not a valid UUID which means it can't find the flow
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)) from exc
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc
@@ -600,7 +609,7 @@ async def experimental_run_flow(
async def process(_flow_id) -> None:
"""Endpoint to process an input with a given flow_id."""
# Raise a depreciation warning
- logger.warning(
+ await logger.awarning(
"The /process endpoint is deprecated and will be removed in a future version. Please use /run instead."
)
raise HTTPException(
@@ -643,7 +652,7 @@ async def create_upload_file(
file_path=file_path,
)
except Exception as exc:
- logger.exception("Error saving file")
+ await logger.aexception("Error saving file")
raise HTTPException(status_code=500, detail=str(exc)) from exc
@@ -724,7 +733,7 @@ async def custom_component_update(
field_value=code_request.field_value,
field_name=code_request.field,
)
- if "code" not in updated_build_config:
+ if "code" not in updated_build_config or not updated_build_config.get("code", {}).get("value"):
updated_build_config = add_code_field_to_build_config(updated_build_config, code_request.code)
component_node["template"] = updated_build_config
@@ -756,7 +765,7 @@ async def get_config() -> ConfigResponse:
"""
try:
settings_service: SettingsService = get_settings_service()
- return ConfigResponse.from_settings(settings_service.settings)
+ return ConfigResponse.from_settings(settings_service.settings, settings_service.auth_settings)
except Exception as exc:
raise HTTPException(status_code=500, detail=str(exc)) from exc
diff --git a/src/backend/base/langflow/api/v1/flows.py b/src/backend/base/langflow/api/v1/flows.py
index a51334fbc..04faa6898 100644
--- a/src/backend/base/langflow/api/v1/flows.py
+++ b/src/backend/base/langflow/api/v1/flows.py
@@ -55,7 +55,7 @@ async def _save_flow_to_fs(flow: Flow) -> None:
try:
await f.write(flow.model_dump_json())
except OSError:
- logger.exception("Failed to write flow %s to path %s", flow.name, flow.fs_path)
+ await logger.aexception("Failed to write flow %s to path %s", flow.name, flow.fs_path)
async def _new_flow(
diff --git a/src/backend/base/langflow/api/v1/knowledge_bases.py b/src/backend/base/langflow/api/v1/knowledge_bases.py
new file mode 100644
index 000000000..d2375b9b1
--- /dev/null
+++ b/src/backend/base/langflow/api/v1/knowledge_bases.py
@@ -0,0 +1,444 @@
+import json
+import shutil
+from http import HTTPStatus
+from pathlib import Path
+
+import pandas as pd
+from fastapi import APIRouter, HTTPException
+from langchain_chroma import Chroma
+from pydantic import BaseModel
+
+from langflow.api.utils import CurrentActiveUser
+from langflow.logging import logger
+from langflow.services.deps import get_settings_service
+
+router = APIRouter(tags=["Knowledge Bases"], prefix="/knowledge_bases")
+
+
+settings = get_settings_service().settings
+knowledge_directory = settings.knowledge_bases_dir
+if not knowledge_directory:
+ msg = "Knowledge bases directory is not set in the settings."
+ raise ValueError(msg)
+KNOWLEDGE_BASES_DIR = Path(knowledge_directory).expanduser()
+
+
+class KnowledgeBaseInfo(BaseModel):
+ id: str
+ name: str
+ embedding_provider: str | None = "Unknown"
+ embedding_model: str | None = "Unknown"
+ size: int = 0
+ words: int = 0
+ characters: int = 0
+ chunks: int = 0
+ avg_chunk_size: float = 0.0
+
+
+class BulkDeleteRequest(BaseModel):
+ kb_names: list[str]
+
+
+def get_kb_root_path() -> Path:
+ """Get the knowledge bases root path."""
+ return KNOWLEDGE_BASES_DIR
+
+
+def get_directory_size(path: Path) -> int:
+ """Calculate the total size of all files in a directory."""
+ total_size = 0
+ try:
+ for file_path in path.rglob("*"):
+ if file_path.is_file():
+ total_size += file_path.stat().st_size
+ except (OSError, PermissionError):
+ pass
+ return total_size
+
+
+def detect_embedding_provider(kb_path: Path) -> str:
+ """Detect the embedding provider from config files and directory structure."""
+ # Provider patterns to check for
+ provider_patterns = {
+ "OpenAI": ["openai", "text-embedding-ada", "text-embedding-3"],
+ "HuggingFace": ["sentence-transformers", "huggingface", "bert-"],
+ "Cohere": ["cohere", "embed-english", "embed-multilingual"],
+ "Google": ["palm", "gecko", "google"],
+ "Chroma": ["chroma"],
+ }
+
+ # Check JSON config files for provider information
+ for config_file in kb_path.glob("*.json"):
+ try:
+ with config_file.open("r", encoding="utf-8") as f:
+ config_data = json.load(f)
+ if not isinstance(config_data, dict):
+ continue
+
+ config_str = json.dumps(config_data).lower()
+
+ # Check for explicit provider fields first
+ provider_fields = ["embedding_provider", "provider", "embedding_model_provider"]
+ for field in provider_fields:
+ if field in config_data:
+ provider_value = str(config_data[field]).lower()
+ for provider, patterns in provider_patterns.items():
+ if any(pattern in provider_value for pattern in patterns):
+ return provider
+
+ # Check for model name patterns
+ for provider, patterns in provider_patterns.items():
+ if any(pattern in config_str for pattern in patterns):
+ return provider
+
+ except (OSError, json.JSONDecodeError) as _:
+ logger.exception("Error reading config file '%s'", config_file)
+ continue
+
+ # Fallback to directory structure
+ if (kb_path / "chroma").exists():
+ return "Chroma"
+ if (kb_path / "vectors.npy").exists():
+ return "Local"
+
+ return "Unknown"
+
+
+def detect_embedding_model(kb_path: Path) -> str:
+ """Detect the embedding model from config files."""
+ # First check the embedding metadata file (most accurate)
+ metadata_file = kb_path / "embedding_metadata.json"
+ if metadata_file.exists():
+ try:
+ with metadata_file.open("r", encoding="utf-8") as f:
+ metadata = json.load(f)
+ if isinstance(metadata, dict) and "embedding_model" in metadata:
+ # Check for embedding model field
+ model_value = str(metadata.get("embedding_model", "unknown"))
+ if model_value and model_value.lower() != "unknown":
+ return model_value
+ except (OSError, json.JSONDecodeError) as _:
+ logger.exception("Error reading embedding metadata file '%s'", metadata_file)
+
+ # Check other JSON config files for model information
+ for config_file in kb_path.glob("*.json"):
+ # Skip the embedding metadata file since we already checked it
+ if config_file.name == "embedding_metadata.json":
+ continue
+
+ try:
+ with config_file.open("r", encoding="utf-8") as f:
+ config_data = json.load(f)
+ if not isinstance(config_data, dict):
+ continue
+
+ # Check for explicit model fields first and return the actual model name
+ model_fields = ["embedding_model", "model", "embedding_model_name", "model_name"]
+ for field in model_fields:
+ if field in config_data:
+ model_value = str(config_data[field])
+ if model_value and model_value.lower() != "unknown":
+ return model_value
+
+ # Check for OpenAI specific model names
+ if "openai" in json.dumps(config_data).lower():
+ openai_models = ["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]
+ config_str = json.dumps(config_data).lower()
+ for model in openai_models:
+ if model in config_str:
+ return model
+
+ # Check for HuggingFace model names (usually in model field)
+ if "model" in config_data:
+ model_name = str(config_data["model"])
+ # Common HuggingFace embedding models
+ hf_patterns = ["sentence-transformers", "all-MiniLM", "all-mpnet", "multi-qa"]
+ if any(pattern in model_name for pattern in hf_patterns):
+ return model_name
+
+ except (OSError, json.JSONDecodeError) as _:
+ logger.exception("Error reading config file '%s'", config_file)
+ continue
+
+ return "Unknown"
+
+
+def get_text_columns(df: pd.DataFrame, schema_data: list | None = None) -> list[str]:
+ """Get the text columns to analyze for word/character counts."""
+ # First try schema-defined text columns
+ if schema_data:
+ text_columns = [
+ col["column_name"]
+ for col in schema_data
+ if col.get("vectorize", False) and col.get("data_type") == "string"
+ ]
+ if text_columns:
+ return [col for col in text_columns if col in df.columns]
+
+ # Fallback to common text column names
+ common_names = ["text", "content", "document", "chunk"]
+ text_columns = [col for col in df.columns if col.lower() in common_names]
+ if text_columns:
+ return text_columns
+
+ # Last resort: all string columns
+ return [col for col in df.columns if df[col].dtype == "object"]
+
+
+def calculate_text_metrics(df: pd.DataFrame, text_columns: list[str]) -> tuple[int, int]:
+ """Calculate total words and characters from text columns."""
+ total_words = 0
+ total_characters = 0
+
+ for col in text_columns:
+ if col not in df.columns:
+ continue
+
+ text_series = df[col].astype(str).fillna("")
+ total_characters += text_series.str.len().sum()
+ total_words += text_series.str.split().str.len().sum()
+
+ return int(total_words), int(total_characters)
+
+
+def get_kb_metadata(kb_path: Path) -> dict:
+ """Extract metadata from a knowledge base directory."""
+ metadata: dict[str, float | int | str] = {
+ "chunks": 0,
+ "words": 0,
+ "characters": 0,
+ "avg_chunk_size": 0.0,
+ "embedding_provider": "Unknown",
+ "embedding_model": "Unknown",
+ }
+
+ try:
+ # First check embedding metadata file for accurate provider and model info
+ metadata_file = kb_path / "embedding_metadata.json"
+ if metadata_file.exists():
+ try:
+ with metadata_file.open("r", encoding="utf-8") as f:
+ embedding_metadata = json.load(f)
+ if isinstance(embedding_metadata, dict):
+ if "embedding_provider" in embedding_metadata:
+ metadata["embedding_provider"] = embedding_metadata["embedding_provider"]
+ if "embedding_model" in embedding_metadata:
+ metadata["embedding_model"] = embedding_metadata["embedding_model"]
+ except (OSError, json.JSONDecodeError) as _:
+ logger.exception("Error reading embedding metadata file '%s'", metadata_file)
+
+ # Fallback to detection if not found in metadata file
+ if metadata["embedding_provider"] == "Unknown":
+ metadata["embedding_provider"] = detect_embedding_provider(kb_path)
+ if metadata["embedding_model"] == "Unknown":
+ metadata["embedding_model"] = detect_embedding_model(kb_path)
+
+ # Read schema for text column information
+ schema_data = None
+ schema_file = kb_path / "schema.json"
+ if schema_file.exists():
+ try:
+ with schema_file.open("r", encoding="utf-8") as f:
+ schema_data = json.load(f)
+ if not isinstance(schema_data, list):
+ schema_data = None
+ except (ValueError, TypeError, OSError) as _:
+ logger.exception("Error reading schema file '%s'", schema_file)
+
+ # Create vector store
+ chroma = Chroma(
+ persist_directory=str(kb_path),
+ collection_name=kb_path.name,
+ )
+
+ # Access the raw collection
+ collection = chroma._collection
+
+ # Fetch all documents and metadata
+ results = collection.get(include=["documents", "metadatas"])
+
+ # Convert to pandas DataFrame
+ source_chunks = pd.DataFrame(
+ {
+ "document": results["documents"],
+ "metadata": results["metadatas"],
+ }
+ )
+
+ # Process the source data for metadata
+ try:
+ metadata["chunks"] = len(source_chunks)
+
+ # Get text columns and calculate metrics
+ text_columns = get_text_columns(source_chunks, schema_data)
+ if text_columns:
+ words, characters = calculate_text_metrics(source_chunks, text_columns)
+ metadata["words"] = words
+ metadata["characters"] = characters
+
+ # Calculate average chunk size
+ if int(metadata["chunks"]) > 0:
+ metadata["avg_chunk_size"] = round(int(characters) / int(metadata["chunks"]), 1)
+
+ except (OSError, ValueError, TypeError) as _:
+ logger.exception("Error processing Chroma DB '%s'", kb_path.name)
+
+ except (OSError, ValueError, TypeError) as _:
+ logger.exception("Error processing knowledge base directory '%s'", kb_path)
+
+ return metadata
+
+
+@router.get("", status_code=HTTPStatus.OK)
+@router.get("/", status_code=HTTPStatus.OK)
+async def list_knowledge_bases(current_user: CurrentActiveUser) -> list[KnowledgeBaseInfo]:
+ """List all available knowledge bases."""
+ try:
+ kb_root_path = get_kb_root_path()
+ kb_user = current_user.username
+ kb_path = kb_root_path / kb_user
+
+ if not kb_path.exists():
+ return []
+
+ knowledge_bases = []
+
+ for kb_dir in kb_path.iterdir():
+ if not kb_dir.is_dir() or kb_dir.name.startswith("."):
+ continue
+
+ try:
+ # Get size of the directory
+ size = get_directory_size(kb_dir)
+
+ # Get metadata from KB files
+ metadata = get_kb_metadata(kb_dir)
+
+ kb_info = KnowledgeBaseInfo(
+ id=kb_dir.name,
+ name=kb_dir.name.replace("_", " ").replace("-", " ").title(),
+ embedding_provider=metadata["embedding_provider"],
+ embedding_model=metadata["embedding_model"],
+ size=size,
+ words=metadata["words"],
+ characters=metadata["characters"],
+ chunks=metadata["chunks"],
+ avg_chunk_size=metadata["avg_chunk_size"],
+ )
+
+ knowledge_bases.append(kb_info)
+
+ except OSError as _:
+ # Log the exception and skip directories that can't be read
+ await logger.aexception("Error reading knowledge base directory '%s'", kb_dir)
+ continue
+
+ # Sort by name alphabetically
+ knowledge_bases.sort(key=lambda x: x.name)
+
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Error listing knowledge bases: {e!s}") from e
+ else:
+ return knowledge_bases
+
+
+@router.get("/{kb_name}", status_code=HTTPStatus.OK)
+async def get_knowledge_base(kb_name: str, current_user: CurrentActiveUser) -> KnowledgeBaseInfo:
+ """Get detailed information about a specific knowledge base."""
+ try:
+ kb_root_path = get_kb_root_path()
+ kb_user = current_user.username
+ kb_path = kb_root_path / kb_user / kb_name
+
+ if not kb_path.exists() or not kb_path.is_dir():
+ raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
+
+ # Get size of the directory
+ size = get_directory_size(kb_path)
+
+ # Get metadata from KB files
+ metadata = get_kb_metadata(kb_path)
+
+ return KnowledgeBaseInfo(
+ id=kb_name,
+ name=kb_name.replace("_", " ").replace("-", " ").title(),
+ embedding_provider=metadata["embedding_provider"],
+ embedding_model=metadata["embedding_model"],
+ size=size,
+ words=metadata["words"],
+ characters=metadata["characters"],
+ chunks=metadata["chunks"],
+ avg_chunk_size=metadata["avg_chunk_size"],
+ )
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Error getting knowledge base '{kb_name}': {e!s}") from e
+
+
+@router.delete("/{kb_name}", status_code=HTTPStatus.OK)
+async def delete_knowledge_base(kb_name: str, current_user: CurrentActiveUser) -> dict[str, str]:
+ """Delete a specific knowledge base."""
+ try:
+ kb_root_path = get_kb_root_path()
+ kb_user = current_user.username
+ kb_path = kb_root_path / kb_user / kb_name
+
+ if not kb_path.exists() or not kb_path.is_dir():
+ raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
+
+ # Delete the entire knowledge base directory
+ shutil.rmtree(kb_path)
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Error deleting knowledge base '{kb_name}': {e!s}") from e
+ else:
+ return {"message": f"Knowledge base '{kb_name}' deleted successfully"}
+
+
+@router.delete("", status_code=HTTPStatus.OK)
+@router.delete("/", status_code=HTTPStatus.OK)
+async def delete_knowledge_bases_bulk(request: BulkDeleteRequest, current_user: CurrentActiveUser) -> dict[str, object]:
+ """Delete multiple knowledge bases."""
+ try:
+ kb_root_path = get_kb_root_path()
+ kb_user = current_user.username
+ kb_user_path = kb_root_path / kb_user
+ deleted_count = 0
+ not_found_kbs = []
+
+ for kb_name in request.kb_names:
+ kb_path = kb_user_path / kb_name
+
+ if not kb_path.exists() or not kb_path.is_dir():
+ not_found_kbs.append(kb_name)
+ continue
+
+ try:
+ # Delete the entire knowledge base directory
+ shutil.rmtree(kb_path)
+ deleted_count += 1
+ except (OSError, PermissionError) as e:
+ await logger.aexception("Error deleting knowledge base '%s': %s", kb_name, e)
+ # Continue with other deletions even if one fails
+
+ if not_found_kbs and deleted_count == 0:
+ raise HTTPException(status_code=404, detail=f"Knowledge bases not found: {', '.join(not_found_kbs)}")
+
+ result = {
+ "message": f"Successfully deleted {deleted_count} knowledge base(s)",
+ "deleted_count": deleted_count,
+ }
+
+ if not_found_kbs:
+ result["not_found"] = ", ".join(not_found_kbs)
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Error deleting knowledge bases: {e!s}") from e
+ else:
+ return result
diff --git a/src/backend/base/langflow/api/v1/mcp.py b/src/backend/base/langflow/api/v1/mcp.py
index 742eccc64..7d6b5d55e 100644
--- a/src/backend/base/langflow/api/v1/mcp.py
+++ b/src/backend/base/langflow/api/v1/mcp.py
@@ -4,7 +4,6 @@ import pydantic
from anyio import BrokenResourceError
from fastapi import APIRouter, HTTPException, Request, Response
from fastapi.responses import HTMLResponse, StreamingResponse
-from loguru import logger
from mcp import types
from mcp.server import NotificationOptions, Server
from mcp.server.sse import SseServerTransport
@@ -18,6 +17,7 @@ from langflow.api.v1.mcp_utils import (
handle_mcp_errors,
handle_read_resource,
)
+from langflow.logging.logger import logger
from langflow.services.deps import get_settings_service
router = APIRouter(prefix="/mcp", tags=["mcp"])
@@ -83,22 +83,22 @@ async def im_alive():
@router.get("/sse", response_class=StreamingResponse)
async def handle_sse(request: Request, current_user: CurrentActiveMCPUser):
msg = f"Starting SSE connection, server name: {server.name}"
- logger.info(msg)
+ await logger.ainfo(msg)
token = current_user_ctx.set(current_user)
try:
async with sse.connect_sse(request.scope, request.receive, request._send) as streams:
try:
msg = "Starting SSE connection"
- logger.debug(msg)
+ await logger.adebug(msg)
msg = f"Stream types: read={type(streams[0])}, write={type(streams[1])}"
- logger.debug(msg)
+ await logger.adebug(msg)
notification_options = NotificationOptions(
prompts_changed=True, resources_changed=True, tools_changed=True
)
init_options = server.create_initialization_options(notification_options)
msg = f"Initialization options: {init_options}"
- logger.debug(msg)
+ await logger.adebug(msg)
try:
await server.run(streams[0], streams[1], init_options)
@@ -106,20 +106,20 @@ async def handle_sse(request: Request, current_user: CurrentActiveMCPUser):
validation_error = find_validation_error(exc)
if validation_error:
msg = "Validation error in MCP:" + str(validation_error)
- logger.debug(msg)
+ await logger.adebug(msg)
else:
msg = f"Error in MCP: {exc!s}"
- logger.debug(msg)
+ await logger.adebug(msg)
return
except BrokenResourceError:
# Handle gracefully when client disconnects
- logger.info("Client disconnected from SSE connection")
+ await logger.ainfo("Client disconnected from SSE connection")
except asyncio.CancelledError:
- logger.info("SSE connection was cancelled")
+ await logger.ainfo("SSE connection was cancelled")
raise
except Exception as e:
msg = f"Error in MCP: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise
finally:
current_user_ctx.reset(token)
@@ -130,8 +130,8 @@ async def handle_messages(request: Request):
try:
await sse.handle_post_message(request.scope, request.receive, request._send)
except (BrokenResourceError, BrokenPipeError) as e:
- logger.info("MCP Server disconnected")
+ await logger.ainfo("MCP Server disconnected")
raise HTTPException(status_code=404, detail=f"MCP Server disconnected, error: {e}") from e
except Exception as e:
- logger.error(f"Internal server error: {e}")
+ await logger.aerror(f"Internal server error: {e}")
raise HTTPException(status_code=500, detail=f"Internal server error: {e}") from e
diff --git a/src/backend/base/langflow/api/v1/mcp_projects.py b/src/backend/base/langflow/api/v1/mcp_projects.py
index d85049cc9..2a8150ccb 100644
--- a/src/backend/base/langflow/api/v1/mcp_projects.py
+++ b/src/backend/base/langflow/api/v1/mcp_projects.py
@@ -1,6 +1,5 @@
import asyncio
import json
-import logging
import os
import platform
from asyncio.subprocess import create_subprocess_exec
@@ -9,10 +8,11 @@ from datetime import datetime, timezone
from ipaddress import ip_address
from pathlib import Path
from subprocess import CalledProcessError
+from typing import Annotated, Any
from uuid import UUID
from anyio import BrokenResourceError
-from fastapi import APIRouter, HTTPException, Request, Response
+from fastapi import APIRouter, Depends, HTTPException, Request, Response
from fastapi.responses import HTMLResponse
from mcp import types
from mcp.server import NotificationOptions, Server
@@ -30,6 +30,7 @@ from langflow.api.v1.mcp_utils import (
handle_read_resource,
)
from langflow.api.v1.schemas import (
+ AuthSettings,
MCPInstallRequest,
MCPProjectResponse,
MCPProjectUpdateRequest,
@@ -37,14 +38,113 @@ from langflow.api.v1.schemas import (
)
from langflow.base.mcp.constants import MAX_MCP_SERVER_NAME_LENGTH
from langflow.base.mcp.util import sanitize_mcp_name
+from langflow.logging import logger
+from langflow.services.auth.mcp_encryption import decrypt_auth_settings, encrypt_auth_settings
from langflow.services.database.models import Flow, Folder
+from langflow.services.database.models.api_key.crud import check_key, create_api_key
+from langflow.services.database.models.api_key.model import ApiKeyCreate
+from langflow.services.database.models.user.model import User
from langflow.services.deps import get_settings_service, session_scope
from langflow.services.settings.feature_flags import FEATURE_FLAGS
-logger = logging.getLogger(__name__)
-
router = APIRouter(prefix="/mcp/project", tags=["mcp_projects"])
+
+async def verify_project_auth(
+ project_id: UUID,
+ query_param: str | None = None,
+ header_param: str | None = None,
+) -> User:
+ """Custom authentication for MCP project endpoints when API key is required.
+
+ This is only used when MCP composer is enabled and project requires API key auth.
+ """
+ async with session_scope() as session:
+ # First, get the project to check its auth settings
+ project = (await session.exec(select(Folder).where(Folder.id == project_id))).first()
+
+ if not project:
+ raise HTTPException(status_code=404, detail="Project not found")
+
+ # For MCP composer enabled, only use API key
+ api_key = query_param or header_param
+ if not api_key:
+ raise HTTPException(
+ status_code=401,
+ detail="API key required for this project. Provide x-api-key header or query parameter.",
+ )
+
+ # Validate the API key
+ user = await check_key(session, api_key)
+ if not user:
+ raise HTTPException(status_code=401, detail="Invalid API key")
+
+ # Verify user has access to the project
+ project_access = (
+ await session.exec(select(Folder).where(Folder.id == project_id, Folder.user_id == user.id))
+ ).first()
+
+ if not project_access:
+ raise HTTPException(status_code=403, detail="Access denied to this project")
+
+ return user
+
+
+# Smart authentication dependency that chooses method based on project settings
+async def verify_project_auth_conditional(
+ project_id: UUID,
+ request: Request,
+) -> User:
+ """Choose authentication method based on project settings.
+
+ - MCP Composer enabled + API key auth: Only allow API keys
+ - All other cases: Use standard MCP auth (JWT + API keys)
+ """
+ async with session_scope() as session:
+ # Get project to check auth settings
+ project = (await session.exec(select(Folder).where(Folder.id == project_id))).first()
+
+ if not project:
+ raise HTTPException(status_code=404, detail="Project not found")
+
+ # Check if this project requires API key only authentication
+ if FEATURE_FLAGS.mcp_composer and project.auth_settings:
+ auth_settings = AuthSettings(**project.auth_settings)
+ if auth_settings.auth_type == "apikey":
+ # For MCP composer projects with API key auth, use custom API key validation
+ api_key_header_value = request.headers.get("x-api-key")
+ api_key_query_value = request.query_params.get("x-api-key")
+ return await verify_project_auth(project_id, api_key_query_value, api_key_header_value)
+
+ # For all other cases, use standard MCP authentication (allows JWT + API keys)
+ # Extract token
+ token: str | None = None
+ auth_header = request.headers.get("authorization")
+ if auth_header and auth_header.startswith("Bearer "):
+ token = auth_header[7:]
+
+ # Extract API keys
+ api_key_query_value = request.query_params.get("x-api-key")
+ api_key_header_value = request.headers.get("x-api-key")
+
+ # Call the MCP auth function directly
+ from langflow.services.auth.utils import get_current_user_mcp
+
+ user = await get_current_user_mcp(
+ token=token or "", query_param=api_key_query_value, header_param=api_key_header_value, db=session
+ )
+
+ # Verify project access
+ project_access = (
+ await session.exec(select(Folder).where(Folder.id == project_id, Folder.user_id == user.id))
+ ).first()
+
+ if not project_access:
+ raise HTTPException(status_code=404, detail="Project not found")
+
+ return user
+
+
# Create project-specific context variable
current_project_ctx: ContextVar[UUID | None] = ContextVar("current_project_ctx", default=None)
@@ -106,7 +206,7 @@ async def list_project_tools(
)
try:
tool = MCPSettings(
- id=str(flow.id),
+ id=flow.id,
action_name=name,
action_description=description,
mcp_enabled=flow.mcp_enabled,
@@ -117,26 +217,28 @@ async def list_project_tools(
tools.append(tool)
except Exception as e: # noqa: BLE001
msg = f"Error in listing project tools: {e!s} from flow: {name}"
- logger.warning(msg)
+ await logger.awarning(msg)
continue
- # Get project-level auth settings
+ # Get project-level auth settings and decrypt sensitive fields
auth_settings = None
if project.auth_settings:
from langflow.api.v1.schemas import AuthSettings
- auth_settings = AuthSettings(**project.auth_settings)
+ # Decrypt sensitive fields before returning
+ decrypted_settings = decrypt_auth_settings(project.auth_settings)
+ auth_settings = AuthSettings(**decrypted_settings) if decrypted_settings else None
except Exception as e:
msg = f"Error listing project tools: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise HTTPException(status_code=500, detail=str(e)) from e
return MCPProjectResponse(tools=tools, auth_settings=auth_settings)
@router.head("/{project_id}/sse", response_class=HTMLResponse, include_in_schema=False)
-async def im_alive():
+async def im_alive(project_id: str): # noqa: ARG001
return Response()
@@ -144,22 +246,13 @@ async def im_alive():
async def handle_project_sse(
project_id: UUID,
request: Request,
- current_user: CurrentActiveMCPUser,
+ current_user: Annotated[User, Depends(verify_project_auth_conditional)],
):
"""Handle SSE connections for a specific project."""
- # Verify project exists and user has access
- async with session_scope() as session:
- project = (
- await session.exec(select(Folder).where(Folder.id == project_id, Folder.user_id == current_user.id))
- ).first()
-
- if not project:
- raise HTTPException(status_code=404, detail="Project not found")
-
# Get project-specific SSE transport and MCP server
sse = get_project_sse(project_id)
project_server = get_project_mcp_server(project_id)
- logger.debug("Project MCP server name: %s", project_server.server.name)
+ await logger.adebug("Project MCP server name: %s", project_server.server.name)
# Set context variables
user_token = current_user_ctx.set(current_user)
@@ -168,7 +261,7 @@ async def handle_project_sse(
try:
async with sse.connect_sse(request.scope, request.receive, request._send) as streams:
try:
- logger.debug("Starting SSE connection for project %s", project_id)
+ await logger.adebug("Starting SSE connection for project %s", project_id)
notification_options = NotificationOptions(
prompts_changed=True, resources_changed=True, tools_changed=True
@@ -177,15 +270,15 @@ async def handle_project_sse(
try:
await project_server.server.run(streams[0], streams[1], init_options)
- except Exception:
- logger.exception("Error in project MCP")
+ except Exception: # noqa: BLE001
+ await logger.aexception("Error in project MCP")
except BrokenResourceError:
- logger.info("Client disconnected from project SSE connection")
+ await logger.ainfo("Client disconnected from project SSE connection")
except asyncio.CancelledError:
- logger.info("Project SSE connection was cancelled")
+ await logger.ainfo("Project SSE connection was cancelled")
raise
except Exception:
- logger.exception("Error in project MCP")
+ await logger.aexception("Error in project MCP")
raise
finally:
current_user_ctx.reset(user_token)
@@ -195,17 +288,12 @@ async def handle_project_sse(
@router.post("/{project_id}")
-async def handle_project_messages(project_id: UUID, request: Request, current_user: CurrentActiveMCPUser):
+async def handle_project_messages(
+ project_id: UUID,
+ request: Request,
+ current_user: Annotated[User, Depends(verify_project_auth_conditional)],
+):
"""Handle POST messages for a project-specific MCP server."""
- # Verify project exists and user has access
- async with session_scope() as session:
- project = (
- await session.exec(select(Folder).where(Folder.id == project_id, Folder.user_id == current_user.id))
- ).first()
-
- if not project:
- raise HTTPException(status_code=404, detail="Project not found")
-
# Set context variables
user_token = current_user_ctx.set(current_user)
project_token = current_project_ctx.set(project_id)
@@ -214,7 +302,7 @@ async def handle_project_messages(project_id: UUID, request: Request, current_us
sse = get_project_sse(project_id)
await sse.handle_post_message(request.scope, request.receive, request._send)
except BrokenResourceError as e:
- logger.info("Project MCP Server disconnected for project %s", project_id)
+ await logger.ainfo("Project MCP Server disconnected for project %s", project_id)
raise HTTPException(status_code=404, detail=f"Project MCP Server disconnected, error: {e}") from e
finally:
current_user_ctx.reset(user_token)
@@ -222,7 +310,11 @@ async def handle_project_messages(project_id: UUID, request: Request, current_us
@router.post("/{project_id}/")
-async def handle_project_messages_with_slash(project_id: UUID, request: Request, current_user: CurrentActiveMCPUser):
+async def handle_project_messages_with_slash(
+ project_id: UUID,
+ request: Request,
+ current_user: Annotated[User, Depends(verify_project_auth_conditional)],
+):
"""Handle POST messages for a project-specific MCP server with trailing slash."""
# Call the original handler
return await handle_project_messages(project_id, request, current_user)
@@ -249,11 +341,33 @@ async def update_project_mcp_settings(
if not project:
raise HTTPException(status_code=404, detail="Project not found")
- # Update project-level auth settings
- if request.auth_settings:
- project.auth_settings = request.auth_settings.model_dump(mode="json")
- else:
- project.auth_settings = None
+ # Update project-level auth settings with encryption
+ if "auth_settings" in request.model_fields_set:
+ if request.auth_settings is None:
+ # Explicitly set to None - clear auth settings
+ project.auth_settings = None
+ else:
+ # Use python mode to get raw values without SecretStr masking
+ auth_model = request.auth_settings
+ auth_dict = auth_model.model_dump(mode="python", exclude_none=True)
+
+ # Extract actual secret values before encryption
+ from pydantic import SecretStr
+
+ # Handle api_key if it's a SecretStr
+ api_key_val = getattr(auth_model, "api_key", None)
+ if isinstance(api_key_val, SecretStr):
+ auth_dict["api_key"] = api_key_val.get_secret_value()
+
+ # Handle oauth_client_secret if it's a SecretStr
+ client_secret_val = getattr(auth_model, "oauth_client_secret", None)
+ if isinstance(client_secret_val, SecretStr):
+ auth_dict["oauth_client_secret"] = client_secret_val.get_secret_value()
+
+ # Encrypt and store
+ encrypted_settings = encrypt_auth_settings(auth_dict)
+ project.auth_settings = encrypted_settings
+
session.add(project)
# Query flows in the project
@@ -280,7 +394,7 @@ async def update_project_mcp_settings(
except Exception as e:
msg = f"Error updating project MCP settings: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise HTTPException(status_code=500, detail=str(e)) from e
@@ -348,6 +462,7 @@ async def install_mcp_config(
if not is_local_ip(client_ip):
raise HTTPException(status_code=500, detail="MCP configuration can only be installed from a local connection")
+ removed_servers: list[str] = [] # Track removed servers for reinstallation
try:
# Verify project exists and user has access
async with session_scope() as session:
@@ -358,6 +473,28 @@ async def install_mcp_config(
if not project:
raise HTTPException(status_code=404, detail="Project not found")
+ # Check if project requires API key authentication and generate if needed
+ generated_api_key = None
+
+ # Determine if we need to generate an API key based on feature flag
+ should_generate_api_key = False
+ if not FEATURE_FLAGS.mcp_composer:
+ # When MCP_COMPOSER is disabled, only generate API key if autologin is disabled
+ # (matches frontend !isAutoLogin check)
+ settings_service = get_settings_service()
+ should_generate_api_key = not settings_service.auth_settings.AUTO_LOGIN
+ elif project.auth_settings:
+ # When MCP_COMPOSER is enabled, only generate if auth_type is "apikey"
+ auth_settings = AuthSettings(**project.auth_settings) if project.auth_settings else AuthSettings()
+ should_generate_api_key = auth_settings.auth_type == "apikey"
+
+ if should_generate_api_key:
+ # Generate API key with specific name format
+ api_key_name = f"MCP Project {project.name} - {body.client}"
+ api_key_create = ApiKeyCreate(name=api_key_name)
+ unmasked_api_key = await create_api_key(session, api_key_create, current_user.id)
+ generated_api_key = unmasked_api_key.api_key
+
# Get settings service to build the SSE URL
settings_service = get_settings_service()
host = getattr(settings_service.settings, "host", "localhost")
@@ -368,13 +505,12 @@ async def install_mcp_config(
# Determine command and args based on operating system
os_type = platform.system()
command = "uvx"
- mcp_tool = "mcp-composer" if FEATURE_FLAGS.mcp_composer else "mcp-proxy"
# Check if running on WSL (will appear as Linux but with Microsoft in release info)
is_wsl = os_type == "Linux" and "microsoft" in platform.uname().release.lower()
if is_wsl:
- logger.debug("WSL detected, using Windows-specific configuration")
+ await logger.adebug("WSL detected, using Windows-specific configuration")
# If we're in WSL and the host is localhost, we might need to adjust the URL
# so Windows applications can reach the WSL service
@@ -389,66 +525,62 @@ async def install_mcp_config(
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
- stdout, stderr = await proc.communicate()
+ stdout, _ = await proc.communicate()
if proc.returncode == 0 and stdout.strip():
wsl_ip = stdout.decode().strip().split()[0] # Get first IP address
- logger.debug("Using WSL IP for external access: %s", wsl_ip)
+ await logger.adebug("Using WSL IP for external access: %s", wsl_ip)
# Replace the localhost with the WSL IP in the URL
sse_url = sse_url.replace(f"http://{host}:{port}", f"http://{wsl_ip}:{port}")
except OSError as e:
- logger.warning("Failed to get WSL IP address: %s. Using default URL.", str(e))
+ await logger.awarning("Failed to get WSL IP address: %s. Using default URL.", str(e))
- # Configure args based on the MCP tool
- oauth_env = None
+ # Base args
+ args = ["mcp-composer"] if FEATURE_FLAGS.mcp_composer else ["mcp-proxy"]
+
+ # Add authentication args based on MCP_COMPOSER feature flag and auth settings
+ if not FEATURE_FLAGS.mcp_composer:
+ # When MCP_COMPOSER is disabled, only use headers format if API key was generated
+ # (when autologin is disabled)
+ if generated_api_key:
+ args.extend(["--headers", "x-api-key", generated_api_key])
+ elif project.auth_settings:
+ # Decrypt sensitive fields before using them
+ decrypted_settings = decrypt_auth_settings(project.auth_settings)
+ auth_settings = AuthSettings(**decrypted_settings) if decrypted_settings else AuthSettings()
+ args.extend(["--auth_type", auth_settings.auth_type])
+
+ # When MCP_COMPOSER is enabled, only add headers if auth_type is "apikey"
+ auth_settings = AuthSettings(**project.auth_settings)
+ if auth_settings.auth_type == "apikey" and generated_api_key:
+ args.extend(["--headers", "x-api-key", generated_api_key])
+ # If no auth_settings or auth_type is "none", don't add any auth headers
+
+ # Add the SSE URL
if FEATURE_FLAGS.mcp_composer:
- args = [mcp_tool, "--sse-url", sse_url]
-
- # Check for auth settings and add auth parameters
- if project.auth_settings:
- from langflow.api.v1.schemas import AuthSettings
-
- auth_settings = AuthSettings(**project.auth_settings)
- args.extend(["--auth_type", auth_settings.auth_type])
-
- oauth_env = {
- "OAUTH_HOST": auth_settings.oauth_host,
- "OAUTH_PORT": auth_settings.oauth_port,
- "OAUTH_SERVER_URL": auth_settings.oauth_server_url,
- "OAUTH_CALLBACK_PATH": auth_settings.oauth_callback_path,
- "OAUTH_CLIENT_ID": auth_settings.oauth_client_id,
- "OAUTH_CLIENT_SECRET": auth_settings.oauth_client_secret,
- "OAUTH_AUTH_URL": auth_settings.oauth_auth_url,
- "OAUTH_TOKEN_URL": auth_settings.oauth_token_url,
- "OAUTH_MCP_SCOPE": auth_settings.oauth_mcp_scope,
- "OAUTH_PROVIDER_SCOPE": auth_settings.oauth_provider_scope,
- }
+ args.extend(["--sse-url", sse_url])
else:
- args = [mcp_tool, sse_url]
+ args.append(sse_url)
if os_type == "Windows":
command = "cmd"
args = ["/c", "uvx", *args]
- logger.debug("Windows detected, using cmd command")
+ await logger.adebug("Windows detected, using cmd command")
name = project.name
# Create the MCP configuration
- server_config = {
+ server_config: dict[str, Any] = {
"command": command,
"args": args,
}
- # Add environment variables if mcp-composer feature flag is enabled and auth settings exist
- if FEATURE_FLAGS.mcp_composer and oauth_env is not None:
- server_config["env"] = oauth_env # type: ignore[assignment]
-
mcp_config = {
"mcpServers": {f"lf-{sanitize_mcp_name(name)[: (MAX_MCP_SERVER_NAME_LENGTH - 4)]}": server_config}
}
server_name = f"lf-{sanitize_mcp_name(name)[: (MAX_MCP_SERVER_NAME_LENGTH - 4)]}"
- logger.debug("Installing MCP config for project: %s (server name: %s)", project.name, server_name)
+ await logger.adebug("Installing MCP config for project: %s (server name: %s)", project.name, server_name)
# Determine the config file path based on the client and OS
if body.client.lower() == "cursor":
@@ -500,7 +632,7 @@ async def install_mcp_config(
status_code=400, detail="Windows C: drive not mounted at /mnt/c in WSL"
)
except (OSError, CalledProcessError) as e:
- logger.warning("Failed to determine Windows user path in WSL: %s", str(e))
+ await logger.awarning("Failed to determine Windows user path in WSL: %s", str(e))
raise HTTPException(
status_code=400, detail=f"Could not determine Windows Claude config path in WSL: {e!s}"
) from e
@@ -525,9 +657,18 @@ async def install_mcp_config(
# If file exists but is invalid JSON, start fresh
existing_config = {"mcpServers": {}}
- # Merge new config with existing config
+ # Ensure mcpServers section exists
if "mcpServers" not in existing_config:
existing_config["mcpServers"] = {}
+
+ # Remove any existing servers with the same SSE URL (for reinstalling)
+ project_sse_url = await get_project_sse_url(project_id)
+ existing_config, removed_servers = remove_server_by_sse_url(existing_config, project_sse_url)
+
+ if removed_servers:
+ logger.info("Removed existing MCP servers with same SSE URL for reinstall: %s", removed_servers)
+
+ # Merge new config with existing config
existing_config["mcpServers"].update(mcp_config["mcpServers"])
# Write the updated config
@@ -536,11 +677,17 @@ async def install_mcp_config(
except Exception as e:
msg = f"Error installing MCP configuration: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise HTTPException(status_code=500, detail=str(e)) from e
else:
- message = f"Successfully installed MCP configuration for {body.client}"
- logger.info(message)
+ action = "reinstalled" if removed_servers else "installed"
+ message = f"Successfully {action} MCP configuration for {body.client}"
+ if removed_servers:
+ message += f" (replaced existing servers: {', '.join(removed_servers)})"
+ if generated_api_key:
+ auth_type = "API key" if FEATURE_FLAGS.mcp_composer else "legacy API key"
+ message += f" with {auth_type} authentication (key name: 'MCP Project {project.name} - {body.client}')"
+ await logger.ainfo(message)
return {"message": message}
@@ -560,12 +707,11 @@ async def check_installed_mcp_servers(
if not project:
raise HTTPException(status_code=404, detail="Project not found")
- # Project server name pattern (must match the logic in install function)
- name = project.name
- project_server_name = f"lf-{sanitize_mcp_name(name)[: (MAX_MCP_SERVER_NAME_LENGTH - 4)]}"
+ # Generate the SSE URL for this project
+ project_sse_url = await get_project_sse_url(project_id)
- logger.debug(
- "Checking for installed MCP servers for project: %s (server name: %s)", project.name, project_server_name
+ await logger.adebug(
+ "Checking for installed MCP servers for project: %s (SSE URL: %s)", project.name, project_sse_url
)
# Check configurations for different clients
@@ -573,43 +719,45 @@ async def check_installed_mcp_servers(
# Check Cursor configuration
cursor_config_path = Path.home() / ".cursor" / "mcp.json"
- logger.debug("Checking Cursor config at: %s (exists: %s)", cursor_config_path, cursor_config_path.exists())
+ await logger.adebug(
+ "Checking Cursor config at: %s (exists: %s)", cursor_config_path, cursor_config_path.exists()
+ )
if cursor_config_path.exists():
try:
with cursor_config_path.open("r") as f:
cursor_config = json.load(f)
- if "mcpServers" in cursor_config and project_server_name in cursor_config["mcpServers"]:
- logger.debug("Found Cursor config for project server: %s", project_server_name)
+ if config_contains_sse_url(cursor_config, project_sse_url):
+ await logger.adebug("Found Cursor config with matching SSE URL: %s", project_sse_url)
results.append("cursor")
else:
- logger.debug(
- "Cursor config exists but no entry for server: %s (available servers: %s)",
- project_server_name,
+ await logger.adebug(
+ "Cursor config exists but no server with SSE URL: %s (available servers: %s)",
+ project_sse_url,
list(cursor_config.get("mcpServers", {}).keys()),
)
except json.JSONDecodeError:
- logger.warning("Failed to parse Cursor config JSON at: %s", cursor_config_path)
+ await logger.awarning("Failed to parse Cursor config JSON at: %s", cursor_config_path)
# Check Windsurf configuration
windsurf_config_path = Path.home() / ".codeium" / "windsurf" / "mcp_config.json"
- logger.debug(
+ await logger.adebug(
"Checking Windsurf config at: %s (exists: %s)", windsurf_config_path, windsurf_config_path.exists()
)
if windsurf_config_path.exists():
try:
with windsurf_config_path.open("r") as f:
windsurf_config = json.load(f)
- if "mcpServers" in windsurf_config and project_server_name in windsurf_config["mcpServers"]:
- logger.debug("Found Windsurf config for project server: %s", project_server_name)
+ if config_contains_sse_url(windsurf_config, project_sse_url):
+ await logger.adebug("Found Windsurf config with matching SSE URL: %s", project_sse_url)
results.append("windsurf")
else:
- logger.debug(
- "Windsurf config exists but no entry for server: %s (available servers: %s)",
- project_server_name,
+ await logger.adebug(
+ "Windsurf config exists but no server with SSE URL: %s (available servers: %s)",
+ project_sse_url,
list(windsurf_config.get("mcpServers", {}).keys()),
)
except json.JSONDecodeError:
- logger.warning("Failed to parse Windsurf config JSON at: %s", windsurf_config_path)
+ await logger.awarning("Failed to parse Windsurf config JSON at: %s", windsurf_config_path)
# Check Claude configuration
claude_config_path = None
@@ -654,7 +802,7 @@ async def check_installed_mcp_servers(
user_dirs[0] / "AppData" / "Roaming" / "Claude" / "claude_desktop_config.json"
)
except (OSError, CalledProcessError) as e:
- logger.warning(
+ await logger.awarning(
"Failed to determine Windows user path in WSL for checking Claude config: %s", str(e)
)
# Don't set claude_config_path, so it will be skipped
@@ -663,31 +811,168 @@ async def check_installed_mcp_servers(
claude_config_path = Path(os.environ["APPDATA"]) / "Claude" / "claude_desktop_config.json"
if claude_config_path and claude_config_path.exists():
- logger.debug("Checking Claude config at: %s", claude_config_path)
+ await logger.adebug("Checking Claude config at: %s", claude_config_path)
try:
with claude_config_path.open("r") as f:
claude_config = json.load(f)
- if "mcpServers" in claude_config and project_server_name in claude_config["mcpServers"]:
- logger.debug("Found Claude config for project server: %s", project_server_name)
+ if config_contains_sse_url(claude_config, project_sse_url):
+ await logger.adebug("Found Claude config with matching SSE URL: %s", project_sse_url)
results.append("claude")
else:
- logger.debug(
- "Claude config exists but no entry for server: %s (available servers: %s)",
- project_server_name,
+ await logger.adebug(
+ "Claude config exists but no server with SSE URL: %s (available servers: %s)",
+ project_sse_url,
list(claude_config.get("mcpServers", {}).keys()),
)
except json.JSONDecodeError:
- logger.warning("Failed to parse Claude config JSON at: %s", claude_config_path)
+ await logger.awarning("Failed to parse Claude config JSON at: %s", claude_config_path)
else:
- logger.debug("Claude config path not found or doesn't exist: %s", claude_config_path)
+ await logger.adebug("Claude config path not found or doesn't exist: %s", claude_config_path)
except Exception as e:
msg = f"Error checking MCP configuration: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise HTTPException(status_code=500, detail=str(e)) from e
return results
+def config_contains_sse_url(config_data: dict, sse_url: str) -> bool:
+ """Check if any MCP server in the config uses the specified SSE URL."""
+ mcp_servers = config_data.get("mcpServers", {})
+ for server_name, server_config in mcp_servers.items():
+ args = server_config.get("args", [])
+ # The SSE URL is typically the last argument in mcp-proxy configurations
+ if args and args[-1] == sse_url:
+ logger.debug("Found matching SSE URL in server: %s", server_name)
+ return True
+ return False
+
+
+async def get_project_sse_url(project_id: UUID) -> str:
+ """Generate the SSE URL for a project, including WSL handling."""
+ # Get settings service to build the SSE URL
+ settings_service = get_settings_service()
+ host = getattr(settings_service.settings, "host", "localhost")
+ port = getattr(settings_service.settings, "port", 3000)
+ base_url = f"http://{host}:{port}".rstrip("/")
+ project_sse_url = f"{base_url}/api/v1/mcp/project/{project_id}/sse"
+
+ # Handle WSL case - must match the logic in install function
+ os_type = platform.system()
+ is_wsl = os_type == "Linux" and "microsoft" in platform.uname().release.lower()
+
+ if is_wsl and host in {"localhost", "127.0.0.1"}:
+ try:
+ proc = await create_subprocess_exec(
+ "/usr/bin/hostname",
+ "-I",
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE,
+ )
+ stdout, stderr = await proc.communicate()
+
+ if proc.returncode == 0 and stdout.strip():
+ wsl_ip = stdout.decode().strip().split()[0] # Get first IP address
+ logger.debug("Using WSL IP for external access: %s", wsl_ip)
+ # Replace the localhost with the WSL IP in the URL
+ project_sse_url = project_sse_url.replace(f"http://{host}:{port}", f"http://{wsl_ip}:{port}")
+ except OSError as e:
+ logger.warning("Failed to get WSL IP address: %s. Using default URL.", str(e))
+
+ return project_sse_url
+
+
+async def get_config_path(client: str) -> Path:
+ """Get the configuration file path for a given client and operating system."""
+ os_type = platform.system()
+ is_wsl = os_type == "Linux" and "microsoft" in platform.uname().release.lower()
+
+ if client.lower() == "cursor":
+ return Path.home() / ".cursor" / "mcp.json"
+ if client.lower() == "windsurf":
+ return Path.home() / ".codeium" / "windsurf" / "mcp_config.json"
+ if client.lower() == "claude":
+ if os_type == "Darwin": # macOS
+ return Path.home() / "Library" / "Application Support" / "Claude" / "claude_desktop_config.json"
+ if os_type == "Windows" or is_wsl: # Windows or WSL (Claude runs on Windows host)
+ if is_wsl:
+ # In WSL, we need to access the Windows APPDATA directory
+ try:
+ # First try to get the Windows username
+ proc = await create_subprocess_exec(
+ "/mnt/c/Windows/System32/cmd.exe",
+ "/c",
+ "echo %USERNAME%",
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE,
+ )
+ stdout, stderr = await proc.communicate()
+
+ if proc.returncode == 0 and stdout.strip():
+ windows_username = stdout.decode().strip()
+ return Path(
+ f"/mnt/c/Users/{windows_username}/AppData/Roaming/Claude/claude_desktop_config.json"
+ )
+
+ # Fallback: try to find the Windows user directory
+ users_dir = Path("/mnt/c/Users")
+ if users_dir.exists():
+ # Get the first non-system user directory
+ user_dirs = [
+ d
+ for d in users_dir.iterdir()
+ if d.is_dir() and not d.name.startswith(("Default", "Public", "All Users"))
+ ]
+ if user_dirs:
+ return user_dirs[0] / "AppData" / "Roaming" / "Claude" / "claude_desktop_config.json"
+
+ if not Path("/mnt/c").exists():
+ msg = "Windows C: drive not mounted at /mnt/c in WSL"
+ raise ValueError(msg)
+
+ msg = "Could not find valid Windows user directory in WSL"
+ raise ValueError(msg)
+ except (OSError, CalledProcessError) as e:
+ logger.warning("Failed to determine Windows user path in WSL: %s", str(e))
+ msg = f"Could not determine Windows Claude config path in WSL: {e!s}"
+ raise ValueError(msg) from e
+ # Regular Windows
+ return Path(os.environ["APPDATA"]) / "Claude" / "claude_desktop_config.json"
+
+ msg = "Unsupported operating system for Claude configuration"
+ raise ValueError(msg)
+
+ msg = "Unsupported client"
+ raise ValueError(msg)
+
+
+def remove_server_by_sse_url(config_data: dict, sse_url: str) -> tuple[dict, list[str]]:
+ """Remove any MCP servers that use the specified SSE URL from config data.
+
+ Returns:
+ tuple: (updated_config, list_of_removed_server_names)
+ """
+ if "mcpServers" not in config_data:
+ return config_data, []
+
+ removed_servers: list[str] = []
+ servers_to_remove: list[str] = []
+
+ # Find servers to remove
+ for server_name, server_config in config_data["mcpServers"].items():
+ args = server_config.get("args", [])
+ if args and args[-1] == sse_url:
+ servers_to_remove.append(server_name)
+
+ # Remove the servers
+ for server_name in servers_to_remove:
+ del config_data["mcpServers"][server_name]
+ removed_servers.append(server_name)
+ logger.debug("Removed existing server with matching SSE URL: %s", server_name)
+
+ return config_data, removed_servers
+
+
# Project-specific MCP server instance for handling project-specific tools
class ProjectMCPServer:
def __init__(self, project_id: UUID):
@@ -750,11 +1035,11 @@ async def init_mcp_servers():
try:
get_project_sse(project.id)
get_project_mcp_server(project.id)
- except Exception as e:
+ except Exception as e: # noqa: BLE001
msg = f"Failed to initialize MCP server for project {project.id}: {e}"
- logger.exception(msg)
+ await logger.aexception(msg)
# Continue to next project even if this one fails
- except Exception as e:
+ except Exception as e: # noqa: BLE001
msg = f"Failed to initialize MCP servers: {e}"
- logger.exception(msg)
+ await logger.aexception(msg)
diff --git a/src/backend/base/langflow/api/v1/mcp_utils.py b/src/backend/base/langflow/api/v1/mcp_utils.py
index 6dc0ec110..ae8e05ccb 100644
--- a/src/backend/base/langflow/api/v1/mcp_utils.py
+++ b/src/backend/base/langflow/api/v1/mcp_utils.py
@@ -12,7 +12,6 @@ from typing import Any, ParamSpec, TypeVar
from urllib.parse import quote, unquote, urlparse
from uuid import uuid4
-from loguru import logger
from mcp import types
from sqlmodel import select
@@ -21,6 +20,7 @@ from langflow.api.v1.schemas import SimplifiedAPIRequest
from langflow.base.mcp.constants import MAX_MCP_TOOL_NAME_LENGTH
from langflow.base.mcp.util import get_flow_snake_case, get_unique_name, sanitize_mcp_name
from langflow.helpers.flow import json_schema_from_flow
+from langflow.logging.logger import logger
from langflow.schema.message import Message
from langflow.services.database.models import Flow
from langflow.services.database.models.user.model import User
@@ -43,7 +43,7 @@ def handle_mcp_errors(func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[
return await func(*args, **kwargs)
except Exception as e:
msg = f"Error in {func.__name__}: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise
return wrapper
@@ -108,11 +108,11 @@ async def handle_list_resources(project_id=None):
resources.append(resource)
except FileNotFoundError as e:
msg = f"Error listing files for flow {flow.id}: {e}"
- logger.debug(msg)
+ await logger.adebug(msg)
continue
except Exception as e:
msg = f"Error in listing resources: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise
return resources
@@ -150,7 +150,7 @@ async def handle_read_resource(uri: str) -> bytes:
return base64.b64encode(content)
except Exception as e:
msg = f"Error reading resource {uri}: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise
@@ -271,7 +271,7 @@ async def handle_call_tool(
return await with_db_session(execute_tool)
except Exception as e:
msg = f"Error executing tool {name}: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise
@@ -339,10 +339,10 @@ async def handle_list_tools(project_id=None, *, mcp_enabled_only=False):
existing_names.add(name)
except Exception as e: # noqa: BLE001
msg = f"Error in listing tools: {e!s} from flow: {base_name}"
- logger.warning(msg)
+ await logger.awarning(msg)
continue
except Exception as e:
msg = f"Error in listing tools: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise
return tools
diff --git a/src/backend/base/langflow/api/v1/openai_responses.py b/src/backend/base/langflow/api/v1/openai_responses.py
new file mode 100644
index 000000000..ca0c280b3
--- /dev/null
+++ b/src/backend/base/langflow/api/v1/openai_responses.py
@@ -0,0 +1,545 @@
+import asyncio
+import json
+import time
+import uuid
+from collections.abc import AsyncGenerator
+from typing import Annotated, Any
+
+from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request
+from fastapi.responses import StreamingResponse
+from loguru import logger
+
+from langflow.api.v1.endpoints import consume_and_yield, run_flow_generator, simple_run_flow
+from langflow.api.v1.schemas import SimplifiedAPIRequest
+from langflow.events.event_manager import create_stream_tokens_event_manager
+from langflow.helpers.flow import get_flow_by_id_or_endpoint_name
+from langflow.schema.content_types import ToolContent
+from langflow.schema.openai_responses_schemas import (
+ OpenAIErrorResponse,
+ OpenAIResponsesRequest,
+ OpenAIResponsesResponse,
+ OpenAIResponsesStreamChunk,
+ create_openai_error,
+)
+from langflow.services.auth.utils import api_key_security
+from langflow.services.database.models.flow.model import FlowRead
+from langflow.services.database.models.user.model import UserRead
+from langflow.services.deps import get_telemetry_service
+from langflow.services.telemetry.schema import RunPayload
+from langflow.services.telemetry.service import TelemetryService
+
+router = APIRouter(tags=["OpenAI Responses API"])
+
+
+def has_chat_input(flow_data: dict | None) -> bool:
+ """Check if the flow has a chat input component."""
+ if not flow_data or "nodes" not in flow_data:
+ return False
+
+ return any(node.get("data", {}).get("type") in ["ChatInput", "Chat Input"] for node in flow_data["nodes"])
+
+
+def has_chat_output(flow_data: dict | None) -> bool:
+ """Check if the flow has a chat input component."""
+ if not flow_data or "nodes" not in flow_data:
+ return False
+
+ return any(node.get("data", {}).get("type") in ["ChatOutput", "Chat Output"] for node in flow_data["nodes"])
+
+
+async def run_flow_for_openai_responses(
+ flow: FlowRead,
+ request: OpenAIResponsesRequest,
+ api_key_user: UserRead,
+ *,
+ stream: bool = False,
+ variables: dict[str, str] | None = None,
+) -> OpenAIResponsesResponse | StreamingResponse:
+ """Run a flow for OpenAI Responses API compatibility."""
+ # Check if flow has chat input
+ if not has_chat_input(flow.data):
+ msg = "Flow must have a ChatInput component to be compatible with OpenAI Responses API"
+ raise ValueError(msg)
+
+ if not has_chat_output(flow.data):
+ msg = "Flow must have a ChatOutput component to be compatible with OpenAI Responses API"
+ raise ValueError(msg)
+
+ # Use previous_response_id as session_id for conversation continuity
+ # If no previous_response_id, create a new session_id
+ session_id = request.previous_response_id or str(uuid.uuid4())
+
+ # Store header variables in context for global variable override
+ context = {}
+ if variables:
+ context["request_variables"] = variables
+ logger.debug(f"Added request variables to context: {variables}")
+
+ # Convert OpenAI request to SimplifiedAPIRequest
+ # Note: We're moving away from tweaks to a context-based approach
+ simplified_request = SimplifiedAPIRequest(
+ input_value=request.input,
+ input_type="chat", # Use chat input type for better compatibility
+ output_type="chat", # Use chat output type for better compatibility
+ tweaks={}, # Empty tweaks, using context instead
+ session_id=session_id,
+ )
+
+ # Context will be passed separately to simple_run_flow
+
+ logger.debug(f"SimplifiedAPIRequest created with context: {context}")
+
+ # Use session_id as response_id for OpenAI compatibility
+ response_id = session_id
+ created_timestamp = int(time.time())
+
+ if stream:
+ # Handle streaming response
+ asyncio_queue: asyncio.Queue = asyncio.Queue()
+ asyncio_queue_client_consumed: asyncio.Queue = asyncio.Queue()
+ event_manager = create_stream_tokens_event_manager(queue=asyncio_queue)
+
+ async def openai_stream_generator() -> AsyncGenerator[str, None]:
+ """Convert Langflow events to OpenAI Responses API streaming format."""
+ main_task = asyncio.create_task(
+ run_flow_generator(
+ flow=flow,
+ input_request=simplified_request,
+ api_key_user=api_key_user,
+ event_manager=event_manager,
+ client_consumed_queue=asyncio_queue_client_consumed,
+ context=context,
+ )
+ )
+
+ try:
+ # Send initial chunk to establish connection
+ initial_chunk = OpenAIResponsesStreamChunk(
+ id=response_id,
+ created=created_timestamp,
+ model=request.model,
+ delta={"content": ""},
+ )
+ yield f"data: {initial_chunk.model_dump_json()}\n\n"
+
+ tool_call_counter = 0
+ processed_tools = set() # Track processed tool calls to avoid duplicates
+ previous_content = "" # Track content already sent to calculate deltas
+
+ async for event_data in consume_and_yield(asyncio_queue, asyncio_queue_client_consumed):
+ if event_data is None:
+ break
+
+ content = ""
+
+ # Parse byte string events as JSON
+ if isinstance(event_data, bytes):
+ try:
+ import json
+
+ event_str = event_data.decode("utf-8")
+ parsed_event = json.loads(event_str)
+
+ if isinstance(parsed_event, dict):
+ event_type = parsed_event.get("event")
+ data = parsed_event.get("data", {})
+
+ # Handle add_message events
+ if event_type == "add_message":
+ sender_name = data.get("sender_name", "")
+ text = data.get("text", "")
+ sender = data.get("sender", "")
+ content_blocks = data.get("content_blocks", [])
+
+ # Look for Agent Steps in content_blocks
+ for block in content_blocks:
+ if block.get("title") == "Agent Steps":
+ contents = block.get("contents", [])
+ for step in contents:
+ # Look for tool_use type items
+ if step.get("type") == "tool_use":
+ tool_name = step.get("name", "")
+ tool_input = step.get("tool_input", {})
+ tool_output = step.get("output")
+
+ # Only emit tool calls with explicit tool names and
+ # meaningful arguments
+ if tool_name and tool_input is not None and tool_output is not None:
+ # Create unique identifier for this tool call
+ tool_signature = (
+ f"{tool_name}:{hash(str(sorted(tool_input.items())))}"
+ )
+
+ # Skip if we've already processed this tool call
+ if tool_signature in processed_tools:
+ continue
+
+ processed_tools.add(tool_signature)
+ tool_call_counter += 1
+ call_id = f"call_{tool_call_counter}"
+ tool_id = f"fc_{tool_call_counter}"
+ tool_call_event = {
+ "type": "response.output_item.added",
+ "item": {
+ "id": tool_id,
+ "type": "function_call", # OpenAI uses "function_call"
+ "status": "in_progress", # OpenAI includes status
+ "name": tool_name,
+ "arguments": "", # Start with empty, build via deltas
+ "call_id": call_id,
+ },
+ }
+ yield (
+ f"event: response.output_item.added\n"
+ f"data: {json.dumps(tool_call_event)}\n\n"
+ )
+
+ # Send function call arguments as delta events (like OpenAI)
+ arguments_str = json.dumps(tool_input)
+ arg_delta_event = {
+ "type": "response.function_call_arguments.delta",
+ "delta": arguments_str,
+ "item_id": tool_id,
+ "output_index": 0,
+ }
+ yield (
+ f"event: response.function_call_arguments.delta\n"
+ f"data: {json.dumps(arg_delta_event)}\n\n"
+ )
+
+ # Send function call arguments done event
+ arg_done_event = {
+ "type": "response.function_call_arguments.done",
+ "arguments": arguments_str,
+ "item_id": tool_id,
+ "output_index": 0,
+ }
+ yield (
+ f"event: response.function_call_arguments.done\n"
+ f"data: {json.dumps(arg_done_event)}\n\n"
+ )
+
+ # If there's output, send completion event
+ if tool_output is not None:
+ # Check if include parameter requests tool_call.results
+ include_results = (
+ request.include
+ and "tool_call.results" in request.include
+ )
+
+ if include_results:
+ # Format with detailed results
+ tool_done_event = {
+ "type": "response.output_item.done",
+ "item": {
+ "id": f"{tool_name}_{tool_id}",
+ "inputs": tool_input, # Raw inputs as-is
+ "status": "completed",
+ "type": "tool_call",
+ "tool_name": f"{tool_name}",
+ "results": tool_output, # Raw output as-is
+ },
+ "output_index": 0,
+ "sequence_number": tool_call_counter + 5,
+ }
+ else:
+ # Regular function call format
+ tool_done_event = {
+ "type": "response.output_item.done",
+ "item": {
+ "id": tool_id,
+ "type": "function_call", # Match OpenAI format
+ "status": "completed",
+ "arguments": arguments_str,
+ "call_id": call_id,
+ "name": tool_name,
+ },
+ }
+
+ yield (
+ f"event: response.output_item.done\n"
+ f"data: {json.dumps(tool_done_event)}\n\n"
+ )
+
+ # Extract text content for streaming (only AI responses)
+ if (
+ sender in ["Machine", "AI", "Agent"]
+ and text != request.input
+ and sender_name == "Agent"
+ ):
+ # Calculate delta: only send newly generated content
+ if text.startswith(previous_content):
+ content = text[len(previous_content) :]
+ previous_content = text
+ else:
+ # If text doesn't start with previous content, send full text
+ # This handles cases where the content might be reset
+ content = text
+ previous_content = text
+
+ except (json.JSONDecodeError, UnicodeDecodeError):
+ continue
+
+ # Only send chunks with actual content
+ if content:
+ chunk = OpenAIResponsesStreamChunk(
+ id=response_id,
+ created=created_timestamp,
+ model=request.model,
+ delta={"content": content},
+ )
+ yield f"data: {chunk.model_dump_json()}\n\n"
+
+ # Send final completion chunk
+ final_chunk = OpenAIResponsesStreamChunk(
+ id=response_id,
+ created=created_timestamp,
+ model=request.model,
+ delta={},
+ status="completed",
+ )
+ yield f"data: {final_chunk.model_dump_json()}\n\n"
+ yield "data: [DONE]\n\n"
+
+ except Exception as e: # noqa: BLE001
+ logger.error(f"Error in stream generator: {e}")
+ error_response = create_openai_error(
+ message=str(e),
+ type_="processing_error",
+ )
+ yield f"data: {error_response}\n\n"
+ finally:
+ if not main_task.done():
+ main_task.cancel()
+
+ return StreamingResponse(
+ openai_stream_generator(),
+ media_type="text/event-stream",
+ headers={
+ "Cache-Control": "no-cache",
+ "Connection": "keep-alive",
+ "Access-Control-Allow-Origin": "*",
+ },
+ )
+
+ # Handle non-streaming response
+ result = await simple_run_flow(
+ flow=flow,
+ input_request=simplified_request,
+ stream=False,
+ api_key_user=api_key_user,
+ context=context,
+ )
+
+ # Extract output text and tool calls from result
+ output_text = ""
+ tool_calls: list[dict[str, Any]] = []
+
+ if result.outputs:
+ for run_output in result.outputs:
+ if run_output and run_output.outputs:
+ for component_output in run_output.outputs:
+ if component_output:
+ # Handle messages (final chat outputs)
+ if hasattr(component_output, "messages") and component_output.messages:
+ for msg in component_output.messages:
+ if hasattr(msg, "message"):
+ output_text = msg.message
+ break
+ # Handle results
+ if not output_text and hasattr(component_output, "results") and component_output.results:
+ for value in component_output.results.values():
+ if hasattr(value, "get_text"):
+ output_text = value.get_text()
+ break
+ if isinstance(value, str):
+ output_text = value
+ break
+
+ if hasattr(component_output, "results") and component_output.results:
+ for blocks in component_output.results.get("message", {}).content_blocks:
+ tool_calls.extend(
+ {
+ "name": content.name,
+ "input": content.tool_input,
+ "output": content.output,
+ }
+ for content in blocks.contents
+ if isinstance(content, ToolContent)
+ )
+ if output_text:
+ break
+ if output_text:
+ break
+
+ # Build output array
+ output_items = []
+
+ # Add tool calls if includes parameter requests them
+ include_results = request.include and "tool_call.results" in request.include
+
+ tool_call_id_counter = 1
+ for tool_call in tool_calls:
+ if include_results:
+ # Format as detailed tool call with results (like file_search_call in sample)
+ tool_call_item = {
+ "id": f"{tool_call['name']}_{tool_call_id_counter}",
+ "queries": list(tool_call["input"].values())
+ if isinstance(tool_call["input"], dict)
+ else [str(tool_call["input"])],
+ "status": "completed",
+ "tool_name": f"{tool_call['name']}",
+ "type": "tool_call",
+ "results": tool_call["output"] if tool_call["output"] is not None else [],
+ }
+ else:
+ # Format as basic function call
+ tool_call_item = {
+ "id": f"fc_{tool_call_id_counter}",
+ "type": "function_call",
+ "status": "completed",
+ "name": tool_call["name"],
+ "arguments": json.dumps(tool_call["input"]) if tool_call["input"] is not None else "{}",
+ }
+
+ output_items.append(tool_call_item)
+ tool_call_id_counter += 1
+
+ # Add the message output
+ output_message = {
+ "type": "message",
+ "id": f"msg_{response_id}",
+ "status": "completed",
+ "role": "assistant",
+ "content": [{"type": "output_text", "text": output_text, "annotations": []}],
+ }
+ output_items.append(output_message)
+
+ return OpenAIResponsesResponse(
+ id=response_id,
+ created_at=created_timestamp,
+ model=request.model,
+ output=output_items,
+ previous_response_id=request.previous_response_id,
+ )
+
+
+@router.post("/responses", response_model=None)
+async def create_response(
+ request: OpenAIResponsesRequest,
+ background_tasks: BackgroundTasks,
+ api_key_user: Annotated[UserRead, Depends(api_key_security)],
+ telemetry_service: Annotated[TelemetryService, Depends(get_telemetry_service)],
+ http_request: Request,
+) -> OpenAIResponsesResponse | StreamingResponse | OpenAIErrorResponse:
+ """Create a response using OpenAI Responses API format.
+
+ This endpoint accepts a flow_id in the model parameter and processes
+ the input through the specified Langflow flow.
+
+ Args:
+ request: OpenAI Responses API request with model (flow_id) and input
+ background_tasks: FastAPI background task manager
+ api_key_user: Authenticated user from API key
+ http_request: The incoming HTTP request
+ telemetry_service: Telemetry service for logging
+
+ Returns:
+ OpenAI-compatible response or streaming response
+
+ Raises:
+ HTTPException: For validation errors or flow execution issues
+ """
+ start_time = time.perf_counter()
+
+ # Extract global variables from X-LANGFLOW-GLOBAL-VAR-* headers
+ variables = {}
+ header_prefix = "x-langflow-global-var-"
+
+ logger.debug(f"All headers received: {list(http_request.headers.keys())}")
+ logger.debug(f"Looking for headers starting with: {header_prefix}")
+
+ for header_name, header_value in http_request.headers.items():
+ header_lower = header_name.lower()
+ logger.debug(f"Checking header: '{header_lower}' (original: '{header_name}')")
+ if header_lower.startswith(header_prefix):
+ # Extract variable name from header (remove prefix) and convert to uppercase
+ var_name_lower = header_lower[len(header_prefix) :]
+ var_name = var_name_lower.upper() # Default to uppercase
+
+ variables[var_name] = header_value
+ logger.debug(
+ f"Found global variable: {var_name} = {header_value} "
+ f"(converted to uppercase from header: {header_name})"
+ )
+
+ logger.debug(f"Extracted global variables from headers: {list(variables.keys())}")
+ logger.debug(f"Variables dict: {variables}")
+
+ # Validate tools parameter - error out if tools are provided
+ if request.tools is not None:
+ error_response = create_openai_error(
+ message="Tools are not supported yet",
+ type_="invalid_request_error",
+ code="tools_not_supported",
+ )
+ return OpenAIErrorResponse(error=error_response["error"])
+
+ # Get flow using the model field (which contains flow_id)
+ try:
+ flow = await get_flow_by_id_or_endpoint_name(request.model, str(api_key_user.id))
+ except HTTPException:
+ flow = None
+
+ if flow is None:
+ error_response = create_openai_error(
+ message=f"Flow with id '{request.model}' not found",
+ type_="invalid_request_error",
+ code="flow_not_found",
+ )
+ return OpenAIErrorResponse(error=error_response["error"])
+
+ try:
+ # Process the request
+ result = await run_flow_for_openai_responses(
+ flow=flow,
+ request=request,
+ api_key_user=api_key_user,
+ stream=request.stream,
+ variables=variables,
+ )
+
+ # Log telemetry for successful completion
+ if not request.stream: # Only log for non-streaming responses
+ end_time = time.perf_counter()
+ background_tasks.add_task(
+ telemetry_service.log_package_run,
+ RunPayload(
+ run_is_webhook=False,
+ run_seconds=int(end_time - start_time),
+ run_success=True,
+ run_error_message="",
+ ),
+ )
+
+ except Exception as exc: # noqa: BLE001
+ logger.error(f"Error processing OpenAI Responses request: {exc}")
+
+ # Log telemetry for failed completion
+ background_tasks.add_task(
+ telemetry_service.log_package_run,
+ RunPayload(
+ run_is_webhook=False,
+ run_seconds=int(time.perf_counter() - start_time),
+ run_success=False,
+ run_error_message=str(exc),
+ ),
+ )
+
+ # Return OpenAI-compatible error
+ error_response = create_openai_error(
+ message=str(exc),
+ type_="processing_error",
+ )
+ return OpenAIErrorResponse(error=error_response["error"])
+ return result
diff --git a/src/backend/base/langflow/api/v1/schemas.py b/src/backend/base/langflow/api/v1/schemas.py
index bd55ddf82..fcf9e1e1f 100644
--- a/src/backend/base/langflow/api/v1/schemas.py
+++ b/src/backend/base/langflow/api/v1/schemas.py
@@ -407,13 +407,15 @@ class ConfigResponse(BaseModel):
public_flow_cleanup_interval: int
public_flow_expiration: int
event_delivery: Literal["polling", "streaming", "direct"]
+ webhook_auth_enable: bool
@classmethod
- def from_settings(cls, settings: Settings) -> "ConfigResponse":
- """Create a ConfigResponse instance using values from a Settings object and global feature flags.
+ def from_settings(cls, settings: Settings, auth_settings) -> "ConfigResponse":
+ """Create a ConfigResponse instance using values from a Settings object and AuthSettings.
Parameters:
settings (Settings): The Settings object containing configuration values.
+ auth_settings: The AuthSettings object containing authentication configuration values.
Returns:
ConfigResponse: An instance populated with configuration and feature flag values.
@@ -431,6 +433,7 @@ class ConfigResponse(BaseModel):
public_flow_cleanup_interval=settings.public_flow_cleanup_interval,
public_flow_expiration=settings.public_flow_expiration,
event_delivery=settings.event_delivery,
+ webhook_auth_enable=auth_settings.WEBHOOK_AUTH_ENABLE,
)
@@ -444,18 +447,13 @@ class CancelFlowResponse(BaseModel):
class AuthSettings(BaseModel):
"""Model representing authentication settings for MCP."""
- auth_type: Literal["none", "apikey", "basic", "bearer", "iam", "oauth"] = "none"
- api_key: SecretStr | None = None
- username: str | None = None
- password: SecretStr | None = None
- bearer_token: SecretStr | None = None
- iam_endpoint: str | None = None
+ auth_type: Literal["none", "apikey", "oauth"] = "none"
oauth_host: str | None = None
oauth_port: str | None = None
oauth_server_url: str | None = None
oauth_callback_path: str | None = None
oauth_client_id: str | None = None
- oauth_client_secret: str | None = None
+ oauth_client_secret: SecretStr | None = None
oauth_auth_url: str | None = None
oauth_token_url: str | None = None
oauth_mcp_scope: str | None = None
diff --git a/src/backend/base/langflow/api/v1/store.py b/src/backend/base/langflow/api/v1/store.py
index 23023da78..39b1bea7b 100644
--- a/src/backend/base/langflow/api/v1/store.py
+++ b/src/backend/base/langflow/api/v1/store.py
@@ -2,9 +2,9 @@ from typing import Annotated
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, Query
-from loguru import logger
from langflow.api.utils import CurrentActiveUser, check_langflow_version
+from langflow.logging.logger import logger
from langflow.services.auth import utils as auth_utils
from langflow.services.deps import get_settings_service, get_store_service
from langflow.services.store.exceptions import CustomError
diff --git a/src/backend/base/langflow/api/v1/validate.py b/src/backend/base/langflow/api/v1/validate.py
index 1bc8219ab..a7b829a09 100644
--- a/src/backend/base/langflow/api/v1/validate.py
+++ b/src/backend/base/langflow/api/v1/validate.py
@@ -1,9 +1,9 @@
from fastapi import APIRouter, HTTPException
-from loguru import logger
from langflow.api.utils import CurrentActiveUser
from langflow.api.v1.base import Code, CodeValidationResponse, PromptValidationResponse, ValidatePromptRequest
from langflow.base.prompts.api_utils import process_prompt_template
+from langflow.logging.logger import logger
from langflow.utils.validate import validate_code
# build router
@@ -19,7 +19,7 @@ async def post_validate_code(code: Code, _current_user: CurrentActiveUser) -> Co
function=errors.get("function", {}),
)
except Exception as e:
- logger.opt(exception=True).debug("Error validating code")
+ logger.debug("Error validating code", exc_info=True)
raise HTTPException(status_code=500, detail=str(e)) from e
diff --git a/src/backend/base/langflow/api/v1/voice_mode.py b/src/backend/base/langflow/api/v1/voice_mode.py
index 429b94800..ec4a07611 100644
--- a/src/backend/base/langflow/api/v1/voice_mode.py
+++ b/src/backend/base/langflow/api/v1/voice_mode.py
@@ -33,11 +33,7 @@ from langflow.services.database.models.flow.model import Flow
from langflow.services.database.models.message.model import MessageTable
from langflow.services.database.models.user.model import User
from langflow.services.deps import get_variable_service, session_scope
-from langflow.utils.voice_utils import (
- BYTES_PER_24K_FRAME,
- VAD_SAMPLE_RATE_16K,
- resample_24k_to_16k,
-)
+from langflow.utils.voice_utils import BYTES_PER_24K_FRAME, VAD_SAMPLE_RATE_16K, resample_24k_to_16k
router = APIRouter(prefix="/voice", tags=["Voice"])
@@ -121,8 +117,8 @@ async def authenticate_and_get_openai_key(session: DbSession, user: User, websoc
)
return None, None
except Exception as e: # noqa: BLE001
- logger.error(f"Error with API key: {e}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Error with API key: {e}")
+ await logger.aerror(traceback.format_exc())
return None, None
return user, openai_key
@@ -185,13 +181,13 @@ class ElevenLabsClientManager:
session=session,
)
except (InvalidToken, ValueError) as e:
- logger.error(f"Error with ElevenLabs API key: {e}")
+ await logger.aerror(f"Error with ElevenLabs API key: {e}")
cls._api_key = os.getenv("ELEVENLABS_API_KEY", "")
if not cls._api_key:
- logger.error("ElevenLabs API key not found")
+ await logger.aerror("ElevenLabs API key not found")
return None
except (KeyError, AttributeError, sqlalchemy.exc.SQLAlchemyError) as e:
- logger.error(f"Exception getting ElevenLabs API key: {e}")
+ await logger.aerror(f"Exception getting ElevenLabs API key: {e}")
return None
if cls._api_key:
@@ -310,25 +306,25 @@ async def process_message_queue(queue_key, session):
try:
await aadd_messagetables([message], session)
- logger.debug(f"Added message to DB: {message.text[:30]}...")
+ await logger.adebug(f"Added message to DB: {message.text[:30]}...")
except ValueError as e:
- logger.error(f"Error saving message to database (ValueError): {e}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Error saving message to database (ValueError): {e}")
+ await logger.aerror(traceback.format_exc())
except sqlalchemy.exc.SQLAlchemyError as e:
- logger.error(f"Error saving message to database (SQLAlchemyError): {e}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Error saving message to database (SQLAlchemyError): {e}")
+ await logger.aerror(traceback.format_exc())
except (KeyError, AttributeError, TypeError) as e:
# More specific exceptions instead of blind Exception
- logger.error(f"Error saving message to database: {e}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Error saving message to database: {e}")
+ await logger.aerror(traceback.format_exc())
finally:
message_queues[queue_key].task_done()
if message_queues[queue_key].empty():
break
except Exception as e: # noqa: BLE001
- logger.debug(f"Message queue processor for {queue_key} was cancelled: {e}")
- logger.error(traceback.format_exc())
+ await logger.adebug(f"Message queue processor for {queue_key} was cancelled: {e}")
+ await logger.aerror(traceback.format_exc())
class SendQueues:
@@ -369,7 +365,7 @@ class SendQueues:
logger.trace("OPENAI BLOCKING")
# log_event(msg, DIRECTION_TO_OPENAI)
except Exception: # noqa: BLE001
- logger.error(traceback.format_exc())
+ await logger.aerror(traceback.format_exc())
def client_send(self, payload):
try:
@@ -387,7 +383,7 @@ class SendQueues:
self.log_event(msg, LF_TO_CLIENT)
await self.client_ws.send_text(json.dumps(msg))
except Exception: # noqa: BLE001
- logger.error(traceback.format_exc())
+ await logger.aerror(traceback.format_exc())
async def close(self):
self.openai_send_q.put_nowait(None)
@@ -462,7 +458,7 @@ async def handle_function_call(
create_response()
except json.JSONDecodeError as e:
trace = traceback.format_exc()
- logger.error(f"JSON decode error: {e!s}\ntrace: {trace}")
+ await logger.aerror(f"JSON decode error: {e!s}\ntrace: {trace}")
function_output = {
"type": "conversation.item.create",
"item": {
@@ -474,7 +470,7 @@ async def handle_function_call(
msg_handler.openai_send(function_output)
except ValueError as e:
trace = traceback.format_exc()
- logger.error(f"Value error: {e!s}\ntrace: {trace}")
+ await logger.aerror(f"Value error: {e!s}\ntrace: {trace}")
function_output = {
"type": "conversation.item.create",
"item": {
@@ -486,7 +482,7 @@ async def handle_function_call(
msg_handler.openai_send(function_output)
except (ConnectionError, websockets.exceptions.WebSocketException) as e:
trace = traceback.format_exc()
- logger.error(f"Connection error: {e!s}\ntrace: {trace}")
+ await logger.aerror(f"Connection error: {e!s}\ntrace: {trace}")
function_output = {
"type": "conversation.item.create",
"item": {
@@ -497,8 +493,8 @@ async def handle_function_call(
}
msg_handler.openai_send(function_output)
except (KeyError, AttributeError, TypeError) as e:
- logger.error(f"Error executing flow: {e}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Error executing flow: {e}")
+ await logger.aerror(traceback.format_exc())
function_output = {
"type": "conversation.item.create",
"item": {
@@ -751,7 +747,7 @@ async def flow_as_tool_websocket(
except Exception as e: # noqa: BLE001
err_msg = {"error": f"Failed to load flow: {e!s}"}
await client_websocket.send_json(err_msg)
- logger.error(f"Failed to load flow: {e}")
+ await logger.aerror(f"Failed to load flow: {e}")
return
url = "wss://api.openai.com/v1/realtime?model=gpt-4o-mini-realtime-preview"
@@ -800,7 +796,7 @@ async def flow_as_tool_websocket(
msg_handler.openai_send({"type": "response.cancel"})
bot_speaking_flag[0] = False
except Exception as e: # noqa: BLE001
- logger.error(f"[ERROR] VAD processing failed (ValueError): {e}")
+ await logger.aerror(f"[ERROR] VAD processing failed (ValueError): {e}")
continue
if has_speech:
last_speech_time = datetime.now(tz=timezone.utc)
@@ -856,7 +852,7 @@ async def flow_as_tool_websocket(
return new_session
class Response:
- def __init__(self, response_id: str, use_elevenlabs: bool | None = None):
+ def __init__(self, response_id: str, *, use_elevenlabs: bool | None = None):
if use_elevenlabs is None:
use_elevenlabs = False
self.response_id = response_id
@@ -925,7 +921,7 @@ async def flow_as_tool_websocket(
# client_send_event_from_thread(event, main_loop)
msg_handler.client_send(event)
except Exception: # noqa: BLE001
- logger.error(traceback.format_exc())
+ await logger.aerror(traceback.format_exc())
async def forward_to_openai() -> None:
nonlocal openai_realtime_session
@@ -954,10 +950,10 @@ async def flow_as_tool_websocket(
msg_handler.openai_send(msg)
num_audio_samples = 0
elif msg.get("type") == "langflow.voice_mode.config":
- logger.info(f"langflow.voice_mode.config {msg}")
+ await logger.ainfo(f"langflow.voice_mode.config {msg}")
voice_config.progress_enabled = msg.get("progress_enabled", True)
elif msg.get("type") == "langflow.elevenlabs.config":
- logger.info(f"langflow.elevenlabs.config {msg}")
+ await logger.ainfo(f"langflow.elevenlabs.config {msg}")
voice_config.use_elevenlabs = msg["enabled"]
voice_config.elevenlabs_voice = msg.get("voice_id", voice_config.elevenlabs_voice)
@@ -997,7 +993,7 @@ async def flow_as_tool_websocket(
if do_forward:
msg_handler.client_send(event)
if event_type == "response.created":
- responses[response_id] = Response(response_id, voice_config.use_elevenlabs)
+ responses[response_id] = Response(response_id, use_elevenlabs=voice_config.use_elevenlabs)
if function_call:
if function_call.is_prog_enabled and not function_call.prog_rsp_id:
function_call.prog_rsp_id = response_id
@@ -1021,12 +1017,12 @@ async def flow_as_tool_websocket(
message_text = event.get("text", "")
await add_message_to_db(message_text, session, flow_id, session_id, "Machine", "AI")
except ValueError as err:
- logger.error(f"Error saving message to database (ValueError): {err}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Error saving message to database (ValueError): {err}")
+ await logger.aerror(traceback.format_exc())
except (KeyError, AttributeError, TypeError) as err:
# Replace blind Exception with specific exceptions
- logger.error(f"Error saving message to database: {err}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Error saving message to database: {err}")
+ await logger.aerror(traceback.format_exc())
elif event_type == "response.output_item.added":
bot_speaking_flag[0] = True
@@ -1050,12 +1046,12 @@ async def flow_as_tool_websocket(
if transcript and transcript.strip():
await add_message_to_db(transcript, session, flow_id, session_id, "Machine", "AI")
except ValueError as err:
- logger.error(f"Error saving message to database (ValueError): {err}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Error saving message to database (ValueError): {err}")
+ await logger.aerror(traceback.format_exc())
except (KeyError, AttributeError, TypeError) as err:
# Replace blind Exception with specific exceptions
- logger.error(f"Error saving message to database: {err}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Error saving message to database: {err}")
+ await logger.aerror(traceback.format_exc())
bot_speaking_flag[0] = False
elif event_type == "response.done":
msg_handler.openai_unblock()
@@ -1080,12 +1076,12 @@ async def flow_as_tool_websocket(
if message_text and message_text.strip():
await add_message_to_db(message_text, session, flow_id, session_id, "User", "User")
except ValueError as e:
- logger.error(f"Error saving message to database (ValueError): {e}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Error saving message to database (ValueError): {e}")
+ await logger.aerror(traceback.format_exc())
except (KeyError, AttributeError, TypeError) as e:
# Replace blind Exception with specific exceptions
- logger.error(f"Error saving message to database: {e}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Error saving message to database: {e}")
+ await logger.aerror(traceback.format_exc())
elif event_type == "error":
pass
@@ -1104,12 +1100,12 @@ async def flow_as_tool_websocket(
# Check for exceptions in results
for result in results:
if isinstance(result, Exception):
- logger.error("WS loop failed:", exc_info=result)
- logger.error(traceback.format_exc())
+ await logger.aerror("WS loop failed:", exc_info=result)
+ await logger.aerror(traceback.format_exc())
except Exception as e: # noqa: BLE001
# Handle any other exceptions
- logger.error(f"WS loop failed: {e}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"WS loop failed: {e}")
+ await logger.aerror(traceback.format_exc())
finally:
# shared cleanup for writers & sockets
async def close():
@@ -1119,8 +1115,8 @@ async def flow_as_tool_websocket(
await close()
except Exception as e: # noqa: BLE001
- logger.error(f"Unexpected error: {e}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Unexpected error: {e}")
+ await logger.aerror(traceback.format_exc())
finally:
# Make sure to clean up the task
if vad_task and not vad_task.done():
@@ -1232,16 +1228,16 @@ async def flow_tts_websocket(
elif event.get("type") == "input_audio_buffer.commit":
openai_send(event)
elif event.get("type") == "langflow.elevenlabs.config":
- logger.info(f"langflow.elevenlabs.config {event}")
+ await logger.ainfo(f"langflow.elevenlabs.config {event}")
tts_config.use_elevenlabs = event["enabled"]
tts_config.elevenlabs_voice = event.get("voice_id", tts_config.elevenlabs_voice)
elif event.get("type") == "voice.settings":
# Store the voice setting
if event.get("voice"):
tts_config.openai_voice = event.get("voice")
- logger.info(f"Updated OpenAI voice to: {tts_config.openai_voice}")
+ await logger.ainfo(f"Updated OpenAI voice to: {tts_config.openai_voice}")
except Exception as e: # noqa: BLE001
- logger.error(f"Error in WebSocket communication: {e}")
+ await logger.aerror(f"Error in WebSocket communication: {e}")
async def forward_to_client() -> None:
try:
@@ -1312,7 +1308,7 @@ async def flow_tts_websocket(
audio_event = {"type": "response.audio.delta", "delta": base64_audio}
client_send(audio_event)
except Exception as e: # noqa: BLE001
- logger.error(f"Error in WebSocket communication: {e}")
+ await logger.aerror(f"Error in WebSocket communication: {e}")
try:
# Create tasks and gather them for concurrent execution
@@ -1321,13 +1317,13 @@ async def flow_tts_websocket(
await asyncio.gather(task1, task2)
except Exception as exc: # noqa: BLE001
# handle any exceptions from any task
- logger.error("WS loop failed:", exc_info=exc)
+ await logger.aerror("WS loop failed:", exc_info=exc)
finally:
# shared cleanup for writers & sockets
await close()
except Exception as e: # noqa: BLE001
- logger.error(f"Unexpected error: {e}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Unexpected error: {e}")
+ await logger.aerror(traceback.format_exc())
def extract_transcript(json_data):
@@ -1367,13 +1363,13 @@ async def get_elevenlabs_voice_ids(
for voice in voices
]
except ValueError as e:
- logger.error(f"Error fetching ElevenLabs voices (ValueError): {e}")
+ await logger.aerror(f"Error fetching ElevenLabs voices (ValueError): {e}")
return {"error": str(e)}
except requests.RequestException as e:
- logger.error(f"Error fetching ElevenLabs voices (RequestException): {e}")
+ await logger.aerror(f"Error fetching ElevenLabs voices (RequestException): {e}")
return {"error": str(e)}
except (KeyError, AttributeError, TypeError) as e:
# More specific exceptions instead of blind Exception
- logger.error(f"Error fetching ElevenLabs voices: {e}")
- logger.error(traceback.format_exc())
+ await logger.aerror(f"Error fetching ElevenLabs voices: {e}")
+ await logger.aerror(traceback.format_exc())
return {"error": str(e)}
diff --git a/src/backend/base/langflow/api/v2/files.py b/src/backend/base/langflow/api/v2/files.py
index 3777a5da6..afbab6151 100644
--- a/src/backend/base/langflow/api/v2/files.py
+++ b/src/backend/base/langflow/api/v2/files.py
@@ -11,11 +11,11 @@ from zoneinfo import ZoneInfo
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
from fastapi.responses import StreamingResponse
-from loguru import logger
from sqlmodel import col, select
from langflow.api.schemas import UploadFileResponse
from langflow.api.utils import CurrentActiveUser, DbSession
+from langflow.logging.logger import logger
from langflow.services.database.models.file.model import File as UserFile
from langflow.services.deps import get_settings_service, get_storage_service
from langflow.services.storage.service import StorageService
@@ -123,7 +123,9 @@ async def upload_user_file(
unique_filename = new_filename
else:
# For normal files, ensure unique name by appending a count if necessary
- stmt = select(UserFile).where(col(UserFile.name).like(f"{root_filename}%"))
+ stmt = select(UserFile).where(
+ col(UserFile.name).like(f"{root_filename}%"), UserFile.user_id == current_user.id
+ )
existing_files = await session.exec(stmt)
files = existing_files.all() # Fetch all matching records
@@ -486,7 +488,7 @@ async def delete_file(
raise
except Exception as e:
# Log and return a generic server error
- logger.error("Error deleting file %s: %s", file_id, e)
+ await logger.aerror("Error deleting file %s: %s", file_id, e)
raise HTTPException(status_code=500, detail=f"Error deleting file: {e}") from e
return {"detail": f"File {file_to_delete.name} deleted successfully"}
diff --git a/src/backend/base/langflow/api/v2/mcp.py b/src/backend/base/langflow/api/v2/mcp.py
index 84e18bbdf..4a00cf8ad 100644
--- a/src/backend/base/langflow/api/v2/mcp.py
+++ b/src/backend/base/langflow/api/v2/mcp.py
@@ -115,6 +115,7 @@ async def get_servers(
session: DbSession,
storage_service=Depends(get_storage_service),
settings_service=Depends(get_settings_service),
+ *,
action_count: bool | None = None,
):
"""Get the list of available servers."""
@@ -140,27 +141,27 @@ async def get_servers(
server_info["error"] = "No tools found"
except ValueError as e:
# Configuration validation errors, invalid URLs, etc.
- logger.error(f"Configuration error for server {server_name}: {e}")
+ await logger.aerror(f"Configuration error for server {server_name}: {e}")
server_info["error"] = f"Configuration error: {e}"
except ConnectionError as e:
# Network connection and timeout issues
- logger.error(f"Connection error for server {server_name}: {e}")
+ await logger.aerror(f"Connection error for server {server_name}: {e}")
server_info["error"] = f"Connection failed: {e}"
except (TimeoutError, asyncio.TimeoutError) as e:
# Timeout errors
- logger.error(f"Timeout error for server {server_name}: {e}")
+ await logger.aerror(f"Timeout error for server {server_name}: {e}")
server_info["error"] = "Timeout when checking server tools"
except OSError as e:
# System-level errors (process execution, file access)
- logger.error(f"System error for server {server_name}: {e}")
+ await logger.aerror(f"System error for server {server_name}: {e}")
server_info["error"] = f"System error: {e}"
except (KeyError, TypeError) as e:
# Data parsing and access errors
- logger.error(f"Data error for server {server_name}: {e}")
+ await logger.aerror(f"Data error for server {server_name}: {e}")
server_info["error"] = f"Configuration data error: {e}"
except (RuntimeError, ProcessLookupError, PermissionError) as e:
# Runtime and process-related errors
- logger.error(f"Runtime error for server {server_name}: {e}")
+ await logger.aerror(f"Runtime error for server {server_name}: {e}")
server_info["error"] = f"Runtime error: {e}"
except Exception as e: # noqa: BLE001
# Generic catch-all for other exceptions (including ExceptionGroup)
@@ -168,15 +169,15 @@ async def get_servers(
# Extract the first underlying exception for a more meaningful error message
underlying_error = e.exceptions[0]
if hasattr(underlying_error, "exceptions"):
- logger.error(
+ await logger.aerror(
f"Error checking server {server_name}: {underlying_error}, {underlying_error.exceptions}"
)
underlying_error = underlying_error.exceptions[0]
else:
- logger.exception(f"Error checking server {server_name}: {underlying_error}")
+ await logger.aexception(f"Error checking server {server_name}: {underlying_error}")
server_info["error"] = f"Error loading server: {underlying_error}"
else:
- logger.exception(f"Error checking server {server_name}: {e}")
+ await logger.aexception(f"Error checking server {server_name}: {e}")
server_info["error"] = f"Error loading server: {e}"
return server_info
diff --git a/src/backend/base/langflow/base/composio/composio_base.py b/src/backend/base/langflow/base/composio/composio_base.py
index c1bf2d546..0a7b2cb31 100644
--- a/src/backend/base/langflow/base/composio/composio_base.py
+++ b/src/backend/base/langflow/base/composio/composio_base.py
@@ -1,31 +1,69 @@
+import copy
import re
-from abc import abstractmethod
from typing import Any
-from composio.client.collections import AppAuthScheme
-from composio.client.exceptions import NoItemsFound
-from composio.exceptions import ApiKeyError
-from composio_langchain import ComposioToolSet
+from composio import Composio
+from composio_langchain import LangchainProvider
from langchain_core.tools import Tool
+from langflow.base.mcp.util import create_input_schema_from_json_schema
from langflow.custom.custom_component.component import Component
from langflow.inputs.inputs import (
AuthInput,
+ FileInput,
+ InputTypes,
MessageTextInput,
SecretStrInput,
SortableListInput,
)
from langflow.io import Output
+from langflow.io.schema import flatten_schema, schema_to_langflow_inputs
from langflow.logging import logger
from langflow.schema.data import Data
from langflow.schema.dataframe import DataFrame
from langflow.schema.message import Message
+def _patch_graph_clean_null_input_types() -> None:
+ """Monkey-patch Graph._create_vertex to clean legacy templates."""
+ try:
+ from langflow.graph.graph.base import Graph
+
+ original_create_vertex = Graph._create_vertex
+
+ def _create_vertex_with_cleanup(self, frontend_data):
+ try:
+ node_id: str | None = frontend_data.get("id") if isinstance(frontend_data, dict) else None
+ if node_id and "Composio" in node_id:
+ template = frontend_data.get("data", {}).get("node", {}).get("template", {})
+ if isinstance(template, dict):
+ for field_cfg in template.values():
+ if isinstance(field_cfg, dict) and field_cfg.get("input_types") is None:
+ field_cfg["input_types"] = []
+ except (AttributeError, TypeError, KeyError) as e:
+ logger.debug(f"Composio template cleanup encountered error: {e}")
+
+ return original_create_vertex(self, frontend_data)
+
+ # Patch only once
+ if getattr(Graph, "_composio_patch_applied", False) is False:
+ Graph._create_vertex = _create_vertex_with_cleanup # type: ignore[method-assign]
+ Graph._composio_patch_applied = True # type: ignore[attr-defined]
+ logger.debug("Applied Composio template cleanup patch to Graph._create_vertex")
+
+ except (AttributeError, TypeError) as e:
+ logger.debug(f"Failed to apply Composio Graph patch: {e}")
+
+
+# Apply the patch at import time
+_patch_graph_clean_null_input_types()
+
+
class ComposioBaseComponent(Component):
"""Base class for Composio components with common functionality."""
- # Common inputs that all Composio components will need
+ default_tools_limit: int = 5
+
_base_inputs = [
MessageTextInput(
name="entity_id",
@@ -45,9 +83,10 @@ class ComposioBaseComponent(Component):
name="auth_link",
value="",
auth_tooltip="Please insert a valid Composio API Key.",
+ show=False,
),
SortableListInput(
- name="action",
+ name="action_button",
display_name="Action",
placeholder="Select action",
options=[],
@@ -60,30 +99,51 @@ class ComposioBaseComponent(Component):
limit=1,
),
]
- _all_fields: set[str] = set()
- _bool_variables: set[str] = set()
- _actions_data: dict[str, dict[str, Any]] = {}
- _default_tools: set[str] = set()
- _display_to_key_map: dict[str, str] = {}
- _key_to_display_map: dict[str, str] = {}
- _sanitized_names: dict[str, str] = {}
+
_name_sanitizer = re.compile(r"[^a-zA-Z0-9_-]")
+ # Class-level caches
+ _actions_cache: dict[str, dict[str, Any]] = {}
+ _action_schema_cache: dict[str, dict[str, Any]] = {}
+
outputs = [
Output(name="dataFrame", display_name="DataFrame", method="as_dataframe"),
]
+ inputs = list(_base_inputs)
+
+ def __init__(self, **kwargs):
+ """Initialize instance variables to prevent shared state between components."""
+ super().__init__(**kwargs)
+ self._all_fields: set[str] = set()
+ self._bool_variables: set[str] = set()
+ self._actions_data: dict[str, dict[str, Any]] = {}
+ self._default_tools: set[str] = set()
+ self._display_to_key_map: dict[str, str] = {}
+ self._key_to_display_map: dict[str, str] = {}
+ self._sanitized_names: dict[str, str] = {}
+ self._action_schemas: dict[str, Any] = {}
+
def as_message(self) -> Message:
result = self.execute_action()
+ if result is None:
+ return Message(text="Action execution returned no result")
return Message(text=str(result))
def as_dataframe(self) -> DataFrame:
result = self.execute_action()
- # If the result is a dict, pandas will raise ValueError: If using all scalar values, you must pass an index
- # So we need to make sure the result is a list of dicts
+
if isinstance(result, dict):
result = [result]
- return DataFrame(result)
+ # Build DataFrame and avoid exposing a 'data' attribute via column access,
+ result_dataframe = DataFrame(result)
+ if hasattr(result_dataframe, "columns"):
+ try:
+ if "data" in result_dataframe.columns:
+ result_dataframe = result_dataframe.rename(columns={"data": "_data"})
+ except (AttributeError, TypeError, ValueError, KeyError) as e:
+ logger.debug(f"Failed to rename 'data' column: {e}")
+ return result_dataframe
def as_data(self) -> Data:
result = self.execute_action()
@@ -115,13 +175,13 @@ class ComposioBaseComponent(Component):
return set()
return set(self._actions_data[action_key]["action_fields"]) if action_key in self._actions_data else set()
- def _build_wrapper(self) -> ComposioToolSet:
- """Build the Composio toolset wrapper."""
+ def _build_wrapper(self) -> Composio:
+ """Build the Composio wrapper."""
try:
if not self.api_key:
msg = "Composio API Key is required"
raise ValueError(msg)
- return ComposioToolSet(api_key=self.api_key)
+ return Composio(api_key=self.api_key, provider=LangchainProvider())
except ValueError as e:
logger.error(f"Error building Composio wrapper: {e}")
@@ -157,128 +217,949 @@ class ComposioBaseComponent(Component):
else:
build_config[field]["value"] = ""
+ def _populate_actions_data(self):
+ """Fetch the list of actions for the toolkit and build helper maps."""
+ if self._actions_data:
+ return
+
+ # Try to load from the class-level cache
+ toolkit_slug = self.app_name.lower()
+ if toolkit_slug in self.__class__._actions_cache:
+ # Deep-copy so that any mutation on this instance does not affect the
+ # cached master copy.
+ self._actions_data = copy.deepcopy(self.__class__._actions_cache[toolkit_slug])
+ self._action_schemas = copy.deepcopy(self.__class__._action_schema_cache.get(toolkit_slug, {}))
+ logger.debug(f"Loaded actions for {toolkit_slug} from in-process cache")
+ return
+
+ api_key = getattr(self, "api_key", None)
+ if not api_key:
+ logger.warning("API key is missing. Cannot populate actions data.")
+ return
+
+ try:
+ composio = self._build_wrapper()
+ toolkit_slug = self.app_name.lower()
+
+ raw_tools = composio.tools.get_raw_composio_tools(toolkits=[toolkit_slug], limit=999)
+
+ if not raw_tools:
+ msg = f"Toolkit '{toolkit_slug}' not found or has no available tools"
+ raise ValueError(msg)
+
+ for raw_tool in raw_tools:
+ try:
+ # Convert raw_tool to dict-like structure
+ tool_dict = raw_tool.__dict__ if hasattr(raw_tool, "__dict__") else raw_tool
+
+ if not tool_dict:
+ logger.warning(f"Tool is None or empty: {raw_tool}")
+ continue
+
+ action_key = tool_dict.get("slug")
+ if not action_key:
+ logger.warning(f"Action key (slug) is missing in tool: {tool_dict}")
+ continue
+
+ # Human-friendly display name
+ display_name = tool_dict.get("name") or tool_dict.get("display_name")
+ if not display_name:
+ # Better fallback: convert GMAIL_SEND_EMAIL to "Send Email"
+ # Remove app prefix and convert to title case
+ clean_name = action_key
+ clean_name = clean_name.removeprefix(f"{self.app_name.upper()}_")
+ # Convert underscores to spaces and title case
+ display_name = clean_name.replace("_", " ").title()
+
+ # Build list of parameter names and track bool fields
+ parameters_schema = tool_dict.get("input_parameters", {})
+ if parameters_schema is None:
+ logger.warning(f"Parameters schema is None for action key: {action_key}")
+ # Still add the action but with empty fields
+ self._action_schemas[action_key] = tool_dict
+ self._actions_data[action_key] = {
+ "display_name": display_name,
+ "action_fields": [],
+ "file_upload_fields": set(),
+ }
+ continue
+
+ try:
+ # Special handling for unusual schema structures
+ if not isinstance(parameters_schema, dict):
+ # Try to convert if it's a model object
+ if hasattr(parameters_schema, "model_dump"):
+ parameters_schema = parameters_schema.model_dump()
+ elif hasattr(parameters_schema, "__dict__"):
+ parameters_schema = parameters_schema.__dict__
+ else:
+ logger.warning(f"Cannot process parameters schema for {action_key}, skipping")
+ self._action_schemas[action_key] = tool_dict
+ self._actions_data[action_key] = {
+ "display_name": display_name,
+ "action_fields": [],
+ "file_upload_fields": set(),
+ }
+ continue
+
+ # Validate parameters_schema has required structure before flattening
+ if not parameters_schema.get("properties") and not parameters_schema.get("$defs"):
+ # Create a minimal valid schema to avoid errors
+ parameters_schema = {"type": "object", "properties": {}}
+
+ # Sanitize the schema before passing to flatten_schema
+ # Handle case where 'required' is explicitly None (causes "'NoneType' object is not iterable")
+ if parameters_schema.get("required") is None:
+ parameters_schema = parameters_schema.copy() # Don't modify the original
+ parameters_schema["required"] = []
+
+ try:
+ # Preserve original descriptions before flattening to restore if lost
+ original_descriptions = {}
+ original_props = parameters_schema.get("properties", {})
+ for prop_name, prop_schema in original_props.items():
+ if isinstance(prop_schema, dict) and "description" in prop_schema:
+ original_descriptions[prop_name] = prop_schema["description"]
+
+ flat_schema = flatten_schema(parameters_schema)
+
+ # Restore lost descriptions in flattened schema
+ if flat_schema and isinstance(flat_schema, dict) and "properties" in flat_schema:
+ flat_props = flat_schema["properties"]
+ for field_name, field_schema in flat_props.items():
+ # Check if this field lost its description during flattening
+ if isinstance(field_schema, dict) and "description" not in field_schema:
+ # Try to find the original description
+ # Handle array fields like bcc[0] -> bcc
+ base_field_name = field_name.replace("[0]", "")
+ if base_field_name in original_descriptions:
+ field_schema["description"] = original_descriptions[base_field_name]
+ elif field_name in original_descriptions:
+ field_schema["description"] = original_descriptions[field_name]
+ except (KeyError, TypeError, ValueError):
+ self._action_schemas[action_key] = tool_dict
+ self._actions_data[action_key] = {
+ "display_name": display_name,
+ "action_fields": [],
+ "file_upload_fields": set(),
+ }
+ continue
+
+ if flat_schema is None:
+ logger.warning(f"Flat schema is None for action key: {action_key}")
+ # Still add the action but with empty fields so the UI doesn't break
+ self._action_schemas[action_key] = tool_dict
+ self._actions_data[action_key] = {
+ "display_name": display_name,
+ "action_fields": [],
+ "file_upload_fields": set(),
+ }
+ continue
+
+ # Extract field names and detect file upload fields during parsing
+ raw_action_fields = list(flat_schema.get("properties", {}).keys())
+ action_fields = []
+ attachment_related_found = False
+ file_upload_fields = set()
+
+ # Check original schema properties for file_uploadable fields
+ original_props = parameters_schema.get("properties", {})
+ for field_name, field_schema in original_props.items():
+ if isinstance(field_schema, dict):
+ clean_field_name = field_name.replace("[0]", "")
+ # Check direct file_uploadable attribute
+ if field_schema.get("file_uploadable") is True:
+ file_upload_fields.add(clean_field_name)
+
+ # Check anyOf structures (like OUTLOOK_OUTLOOK_SEND_EMAIL)
+ if "anyOf" in field_schema:
+ for any_of_item in field_schema["anyOf"]:
+ if isinstance(any_of_item, dict) and any_of_item.get("file_uploadable") is True:
+ file_upload_fields.add(clean_field_name)
+
+ for field in raw_action_fields:
+ clean_field = field.replace("[0]", "")
+ # Check if this field is attachment-related
+ if clean_field.lower().startswith("attachment."):
+ attachment_related_found = True
+ continue # Skip individual attachment fields
+
+ # Handle conflicting field names - rename user_id to avoid conflicts with entity_id
+ if clean_field == "user_id":
+ clean_field = f"{self.app_name}_user_id"
+ elif clean_field == "status":
+ clean_field = f"{self.app_name}_status"
+
+ action_fields.append(clean_field)
+
+ # Add consolidated attachment field if we found attachment-related fields
+ if attachment_related_found:
+ action_fields.append("attachment")
+ file_upload_fields.add("attachment") # Attachment fields are also file upload fields
+
+ # Track boolean parameters so we can coerce them later
+ properties = flat_schema.get("properties", {})
+ if properties:
+ for p_name, p_schema in properties.items():
+ if isinstance(p_schema, dict) and p_schema.get("type") == "boolean":
+ # Use cleaned field name for boolean tracking
+ clean_field_name = p_name.replace("[0]", "")
+ self._bool_variables.add(clean_field_name)
+
+ self._action_schemas[action_key] = tool_dict
+ self._actions_data[action_key] = {
+ "display_name": display_name,
+ "action_fields": action_fields,
+ "file_upload_fields": file_upload_fields,
+ }
+
+ except (KeyError, TypeError, ValueError) as flatten_error:
+ logger.error(f"flatten_schema failed for {action_key}: {flatten_error}")
+ self._action_schemas[action_key] = tool_dict
+ self._actions_data[action_key] = {
+ "display_name": display_name,
+ "action_fields": [],
+ "file_upload_fields": set(),
+ }
+ continue
+
+ except ValueError as e:
+ logger.warning(f"Failed processing Composio tool for action {raw_tool}: {e}")
+
+ # Helper look-ups used elsewhere
+ self._all_fields = {f for d in self._actions_data.values() for f in d["action_fields"]}
+ self._build_action_maps()
+
+ # Cache actions for this toolkit so subsequent component instances
+ # can reuse them without hitting the Composio API again.
+ self.__class__._actions_cache[toolkit_slug] = copy.deepcopy(self._actions_data)
+ self.__class__._action_schema_cache[toolkit_slug] = copy.deepcopy(self._action_schemas)
+
+ except ValueError as e:
+ logger.debug(f"Could not populate Composio actions for {self.app_name}: {e}")
+
+ def _validate_schema_inputs(self, action_key: str) -> list[InputTypes]:
+ """Convert the JSON schema for *action_key* into Langflow input objects."""
+ # Skip validation for default/placeholder values
+ if action_key in ("disabled", "placeholder", ""):
+ logger.debug(f"Skipping schema validation for placeholder value: {action_key}")
+ return []
+
+ schema_dict = self._action_schemas.get(action_key)
+ if not schema_dict:
+ logger.warning(f"No schema found for action key: {action_key}")
+ return []
+
+ try:
+ parameters_schema = schema_dict.get("input_parameters", {})
+ if parameters_schema is None:
+ logger.warning(f"Parameters schema is None for action key: {action_key}")
+ return []
+
+ # Check if parameters_schema has the expected structure
+ if not isinstance(parameters_schema, dict):
+ logger.warning(
+ f"Parameters schema is not a dict for action key: {action_key}, got: {type(parameters_schema)}"
+ )
+ return []
+
+ # Validate parameters_schema has required structure before flattening
+ if not parameters_schema.get("properties") and not parameters_schema.get("$defs"):
+ # Create a minimal valid schema to avoid errors
+ parameters_schema = {"type": "object", "properties": {}}
+
+ # Sanitize the schema before passing to flatten_schema
+ # Handle case where 'required' is explicitly None (causes "'NoneType' object is not iterable")
+ if parameters_schema.get("required") is None:
+ parameters_schema = parameters_schema.copy() # Don't modify the original
+ parameters_schema["required"] = []
+
+ try:
+ # Preserve original descriptions before flattening to restore if lost
+ original_descriptions = {}
+ original_props = parameters_schema.get("properties", {})
+ for prop_name, prop_schema in original_props.items():
+ if isinstance(prop_schema, dict) and "description" in prop_schema:
+ original_descriptions[prop_name] = prop_schema["description"]
+
+ flat_schema = flatten_schema(parameters_schema)
+
+ # Restore lost descriptions in flattened schema
+ if flat_schema and isinstance(flat_schema, dict) and "properties" in flat_schema:
+ flat_props = flat_schema["properties"]
+ for field_name, field_schema in flat_props.items():
+ # Check if this field lost its description during flattening
+ if isinstance(field_schema, dict) and "description" not in field_schema:
+ # Try to find the original description
+ # Handle array fields like bcc[0] -> bcc
+ base_field_name = field_name.replace("[0]", "")
+ if base_field_name in original_descriptions:
+ field_schema["description"] = original_descriptions[base_field_name]
+ elif field_name in original_descriptions:
+ field_schema["description"] = original_descriptions[field_name]
+ except (KeyError, TypeError, ValueError) as flatten_error:
+ logger.error(f"flatten_schema failed for {action_key}: {flatten_error}")
+ return []
+
+ if flat_schema is None:
+ logger.warning(f"Flat schema is None for action key: {action_key}")
+ return []
+
+ # Additional check for flat_schema structure
+ if not isinstance(flat_schema, dict):
+ logger.warning(f"Flat schema is not a dict for action key: {action_key}, got: {type(flat_schema)}")
+ return []
+
+ # Ensure flat_schema has the expected structure for create_input_schema_from_json_schema
+ if flat_schema.get("type") != "object":
+ logger.warning(f"Flat schema for {action_key} is not of type 'object', got: {flat_schema.get('type')}")
+ # Fix the schema type if it's missing
+ flat_schema["type"] = "object"
+
+ if "properties" not in flat_schema:
+ flat_schema["properties"] = {}
+
+ # Clean up field names - remove [0] suffixes from array fields
+ cleaned_properties = {}
+ attachment_related_fields = set() # Track fields that are attachment-related
+
+ for field_name, field_schema in flat_schema.get("properties", {}).items():
+ # Remove [0] suffix from field names (e.g., "bcc[0]" -> "bcc", "cc[0]" -> "cc")
+ clean_field_name = field_name.replace("[0]", "")
+
+ # Check if this field is attachment-related (contains "attachment." prefix)
+ if clean_field_name.lower().startswith("attachment."):
+ attachment_related_fields.add(clean_field_name)
+ # Don't add individual attachment sub-fields to the schema
+ continue
+
+ # Handle conflicting field names - rename user_id to avoid conflicts with entity_id
+ if clean_field_name == "user_id":
+ clean_field_name = f"{self.app_name}_user_id"
+ # Update
+ field_schema_copy = field_schema.copy()
+ field_schema_copy["description"] = (
+ f"User ID for {self.app_name.title()}: " + field_schema["description"]
+ )
+ elif clean_field_name == "status":
+ clean_field_name = f"{self.app_name}_status"
+ # Update
+ field_schema_copy = field_schema.copy()
+ field_schema_copy["description"] = (
+ f"Status for {self.app_name.title()}: " + field_schema["description"]
+ )
+ else:
+ # Use the original field schema for all other fields
+ field_schema_copy = field_schema
+
+ # Preserve the full schema information, not just the type
+ cleaned_properties[clean_field_name] = field_schema_copy
+
+ # If we found attachment-related fields, add a single "attachment" field
+ if attachment_related_fields:
+ # Create a generic attachment field schema
+ attachment_schema = {
+ "type": "string",
+ "description": "File attachment for the email",
+ "title": "Attachment",
+ }
+ cleaned_properties["attachment"] = attachment_schema
+
+ # Update the flat schema with cleaned field names
+ flat_schema["properties"] = cleaned_properties
+
+ # Also update required fields to match cleaned names
+ if flat_schema.get("required"):
+ cleaned_required = [field.replace("[0]", "") for field in flat_schema["required"]]
+ flat_schema["required"] = cleaned_required
+
+ input_schema = create_input_schema_from_json_schema(flat_schema)
+ if input_schema is None:
+ logger.warning(f"Input schema is None for action key: {action_key}")
+ return []
+
+ # Additional safety check before calling schema_to_langflow_inputs
+ if not hasattr(input_schema, "model_fields"):
+ logger.warning(f"Input schema for {action_key} does not have model_fields attribute")
+ return []
+
+ if input_schema.model_fields is None:
+ logger.warning(f"Input schema model_fields is None for {action_key}")
+ return []
+
+ result = schema_to_langflow_inputs(input_schema)
+
+ # Process inputs to handle attachment fields and set advanced status
+ if result:
+ processed_inputs = []
+ required_fields_set = set(flat_schema.get("required", []))
+
+ # Get file upload fields from stored action data
+ file_upload_fields = self._actions_data.get(action_key, {}).get("file_upload_fields", set())
+ if attachment_related_fields: # If we consolidated attachment fields
+ file_upload_fields = file_upload_fields | {"attachment"}
+
+ for inp in result:
+ if hasattr(inp, "name") and inp.name is not None:
+ # Check if this specific field is a file upload field
+ if inp.name.lower() in file_upload_fields or inp.name.lower() == "attachment":
+ # Replace with FileInput for file upload fields
+ file_input = FileInput(
+ name=inp.name,
+ display_name=getattr(inp, "display_name", inp.name.replace("_", " ").title()),
+ required=inp.name in required_fields_set,
+ advanced=inp.name not in required_fields_set,
+ info=getattr(inp, "info", "Upload file for this field"),
+ show=True,
+ file_types=[
+ "csv",
+ "txt",
+ "doc",
+ "docx",
+ "xls",
+ "xlsx",
+ "pdf",
+ "png",
+ "jpg",
+ "jpeg",
+ "gif",
+ "zip",
+ "rar",
+ "ppt",
+ "pptx",
+ ],
+ )
+ processed_inputs.append(file_input)
+ else:
+ # Ensure proper display_name and info are set for regular fields
+ if not hasattr(inp, "display_name") or not inp.display_name:
+ inp.display_name = inp.name.replace("_", " ").title()
+
+ # Preserve description from schema if available
+ field_schema = flat_schema.get("properties", {}).get(inp.name, {})
+ schema_description = field_schema.get("description")
+ current_info = getattr(inp, "info", None)
+
+ # Use schema description if available, otherwise keep current info or create from name
+ if schema_description:
+ inp.info = schema_description
+ elif not current_info:
+ # Fallback: create a basic description from the field name if no description exists
+ inp.info = f"{inp.name.replace('_', ' ').title()} field"
+
+ # Set advanced status for non-file-upload fields
+ if inp.name not in required_fields_set:
+ inp.advanced = True
+
+ # Skip entity_id being mapped to user_id parameter
+ if inp.name == "user_id" and getattr(self, "entity_id", None) == getattr(
+ inp, "value", None
+ ):
+ continue
+
+ processed_inputs.append(inp)
+ else:
+ processed_inputs.append(inp)
+
+ return processed_inputs
+ return result # noqa: TRY300
+ except ValueError as e:
+ logger.warning(f"Error generating inputs for {action_key}: {e}")
+ return []
+
+ def _get_inputs_for_all_actions(self) -> dict[str, list[InputTypes]]:
+ """Return a mapping action_key → list[InputTypes] for every action."""
+ result: dict[str, list[InputTypes]] = {}
+ for key in self._actions_data:
+ result[key] = self._validate_schema_inputs(key)
+ return result
+
+ def _remove_inputs_from_build_config(self, build_config: dict, keep_for_action: str) -> None:
+ """Remove parameter UI fields that belong to other actions."""
+ protected_keys = {"code", "entity_id", "api_key", "auth_link", "action_button", "tool_mode"}
+
+ for action_key, lf_inputs in self._get_inputs_for_all_actions().items():
+ if action_key == keep_for_action:
+ continue
+ for inp in lf_inputs:
+ if inp.name is not None and inp.name not in protected_keys:
+ build_config.pop(inp.name, None)
+
+ def _update_action_config(self, build_config: dict, selected_value: Any) -> None:
+ """Add or update parameter input fields for the chosen action."""
+ if not selected_value:
+ return
+
+ # The UI passes either a list with dict [{name: display_name}] OR the raw key
+ if isinstance(selected_value, list) and selected_value:
+ display_name = selected_value[0]["name"]
+ else:
+ display_name = selected_value
+
+ action_key = self.desanitize_action_name(display_name)
+
+ # Skip validation for default/placeholder values
+ if action_key in ("disabled", "placeholder", ""):
+ logger.debug(f"Skipping action config update for placeholder value: {action_key}")
+ return
+
+ lf_inputs = self._validate_schema_inputs(action_key)
+
+ # First remove inputs belonging to other actions
+ self._remove_inputs_from_build_config(build_config, action_key)
+
+ # Add / update the inputs for this action
+ for inp in lf_inputs:
+ if inp.name is not None:
+ inp_dict = inp.to_dict() if hasattr(inp, "to_dict") else inp.__dict__.copy()
+
+ # Ensure input_types is always a list
+ if not isinstance(inp_dict.get("input_types"), list):
+ inp_dict["input_types"] = []
+
+ inp_dict.setdefault("show", True) # visible once action selected
+ # Preserve previously entered value if user already filled something
+ if inp.name in build_config:
+ existing_val = build_config[inp.name].get("value")
+ inp_dict.setdefault("value", existing_val)
+ build_config[inp.name] = inp_dict
+
+ # Ensure _all_fields includes new ones
+ self._all_fields.update({i.name for i in lf_inputs if i.name is not None})
+
+ def _is_tool_mode_enabled(self) -> bool:
+ """Check if tool_mode is currently enabled."""
+ return getattr(self, "tool_mode", False)
+
+ def _set_action_visibility(self, build_config: dict, *, force_show: bool | None = None) -> None:
+ """Set action field visibility based on tool_mode state or forced value."""
+ if force_show is not None:
+ build_config["action_button"]["show"] = force_show
+ else:
+ # When tool_mode is enabled, hide action field
+ build_config["action_button"]["show"] = not self._is_tool_mode_enabled()
+
+ def create_new_auth_config(self, app_name: str) -> str:
+ """Create a new auth config for the given app name."""
+ composio = self._build_wrapper()
+ auth_config = composio.auth_configs.create(toolkit=app_name, options={"type": "use_composio_managed_auth"})
+ return auth_config.id
+
+ def _initiate_connection(self, app_name: str) -> tuple[str, str]:
+ """Initiate OAuth connection and return (redirect_url, connection_id)."""
+ try:
+ composio = self._build_wrapper()
+
+ auth_configs = composio.auth_configs.list(toolkit_slug=app_name)
+ if len(auth_configs.items) == 0:
+ auth_config_id = self.create_new_auth_config(app_name)
+ else:
+ auth_config_id = None
+ for auth_config in auth_configs.items:
+ if auth_config.auth_scheme == "OAUTH2":
+ auth_config_id = auth_config.id
+
+ auth_config_id = auth_configs.items[0].id
+
+ connection_request = composio.connected_accounts.initiate(
+ user_id=self.entity_id, auth_config_id=auth_config_id
+ )
+
+ redirect_url = getattr(connection_request, "redirect_url", None)
+ connection_id = getattr(connection_request, "id", None)
+
+ if not redirect_url or not redirect_url.startswith(("http://", "https://")):
+ msg = "Invalid redirect URL received from Composio"
+ raise ValueError(msg)
+
+ if not connection_id:
+ msg = "No connection ID received from Composio"
+ raise ValueError(msg)
+
+ logger.info(f"OAuth connection initiated for {app_name}: {redirect_url} (ID: {connection_id})")
+ return redirect_url, connection_id # noqa: TRY300
+
+ except Exception as e:
+ logger.error(f"Error initiating connection for {app_name}: {e}")
+ msg = f"Failed to initiate OAuth connection: {e}"
+ raise ValueError(msg) from e
+
+ def _check_connection_status_by_id(self, connection_id: str) -> str | None:
+ """Check status of a specific connection by ID. Returns status or None if not found."""
+ try:
+ composio = self._build_wrapper()
+ connection = composio.connected_accounts.get(nanoid=connection_id)
+ status = getattr(connection, "status", None)
+ logger.info(f"Connection {connection_id} status: {status}")
+ except (ValueError, ConnectionError) as e:
+ logger.error(f"Error checking connection {connection_id}: {e}")
+ return None
+ else:
+ return status
+
+ def _find_active_connection_for_app(self, app_name: str) -> tuple[str, str] | None:
+ """Find any ACTIVE connection for this app/user. Returns (connection_id, status) or None."""
+ try:
+ composio = self._build_wrapper()
+ connection_list = composio.connected_accounts.list(
+ user_ids=[self.entity_id], toolkit_slugs=[app_name.lower()]
+ )
+
+ if connection_list and hasattr(connection_list, "items") and connection_list.items:
+ for connection in connection_list.items:
+ connection_id = getattr(connection, "id", None)
+ connection_status = getattr(connection, "status", None)
+ if connection_status == "ACTIVE" and connection_id:
+ logger.info(f"Found existing ACTIVE connection for {app_name}: {connection_id}")
+ return connection_id, connection_status
+
+ except (ValueError, ConnectionError) as e:
+ logger.error(f"Error finding active connection for {app_name}: {e}")
+ return None
+ else:
+ return None
+
+ def _disconnect_specific_connection(self, connection_id: str) -> None:
+ """Disconnect a specific Composio connection by ID."""
+ try:
+ composio = self._build_wrapper()
+ composio.connected_accounts.delete(nanoid=connection_id)
+ logger.info(f"✅ Disconnected specific connection: {connection_id}")
+
+ except Exception as e:
+ logger.error(f"Error disconnecting connection {connection_id}: {e}")
+ msg = f"Failed to disconnect connection {connection_id}: {e}"
+ raise ValueError(msg) from e
+
def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:
- """Optimized build config updates."""
+ """Update build config for auth and action selection."""
+ # Clean any legacy None values that may still be present
+ for _fconfig in build_config.values():
+ if isinstance(_fconfig, dict) and _fconfig.get("input_types") is None:
+ _fconfig["input_types"] = []
+
+ # BULLETPROOF tool_mode checking - check all possible places where tool_mode could be stored
+ instance_tool_mode = getattr(self, "tool_mode", False) if hasattr(self, "tool_mode") else False
+
+ # Check build_config for tool_mode in multiple possible structures
+ build_config_tool_mode = False
+ if "tool_mode" in build_config:
+ tool_mode_config = build_config["tool_mode"]
+ if isinstance(tool_mode_config, dict):
+ build_config_tool_mode = tool_mode_config.get("value", False)
+ else:
+ build_config_tool_mode = bool(tool_mode_config)
+
+ # If this is a tool_mode change, update BOTH instance variable AND build_config
if field_name == "tool_mode":
- build_config["action"]["show"] = not field_value
+ self.tool_mode = field_value
+ instance_tool_mode = field_value
+ # CRITICAL: Store tool_mode state in build_config so it persists
+ if "tool_mode" not in build_config:
+ build_config["tool_mode"] = {}
+ if isinstance(build_config["tool_mode"], dict):
+ build_config["tool_mode"]["value"] = field_value
+ build_config_tool_mode = field_value
+
+ # Current tool_mode is True if ANY source indicates it's enabled
+ current_tool_mode = instance_tool_mode or build_config_tool_mode or (field_name == "tool_mode" and field_value)
+
+ # CRITICAL: Ensure dynamic action metadata is available whenever we have an API key
+ # This must happen BEFORE any early returns to ensure tools are always loaded
+ api_key_available = hasattr(self, "api_key") and self.api_key
+
+ # Check if we need to populate actions - but also check cache availability
+ actions_available = bool(self._actions_data)
+ toolkit_slug = getattr(self, "app_name", "").lower()
+ cached_actions_available = toolkit_slug in self.__class__._actions_cache
+
+ should_populate = False
+
+ if (field_name == "api_key" and field_value) or (
+ api_key_available and not actions_available and not cached_actions_available
+ ):
+ should_populate = True
+ elif api_key_available and not actions_available and cached_actions_available:
+ self._populate_actions_data()
+
+ if should_populate:
+ logger.info(f"Populating actions data for {getattr(self, 'app_name', 'unknown')}...")
+ self._populate_actions_data()
+ logger.info(f"Actions populated: {len(self._actions_data)} actions found")
+
+ # CRITICAL: Set action options if we have actions (either from fresh population or cache)
+ if self._actions_data:
+ self._build_action_maps()
+ build_config["action_button"]["options"] = [
+ {"name": self.sanitize_action_name(action), "metadata": action} for action in self._actions_data
+ ]
+ logger.info(f"Action options set in build_config: {len(build_config['action_button']['options'])} options")
+ else:
+ build_config["action_button"]["options"] = []
+ logger.warning("No actions found, setting empty options")
+
+ # clear stored connection_id when api_key is changed
+ if field_name == "api_key" and field_value:
+ stored_connection_before = build_config.get("auth_link", {}).get("connection_id")
+ if "auth_link" in build_config and "connection_id" in build_config["auth_link"]:
+ build_config["auth_link"].pop("connection_id", None)
+ build_config["auth_link"]["value"] = "connect"
+ build_config["auth_link"]["auth_tooltip"] = "Connect"
+ logger.info(f"Cleared stored connection_id '{stored_connection_before}' due to API key change")
+ else:
+ logger.info("DEBUG: EARLY No stored connection_id to clear on API key change")
+
+ # Handle disconnect operations when tool mode is enabled
+ if field_name == "auth_link" and field_value == "disconnect":
+ try:
+ # Get the specific connection ID that's currently being used
+ stored_connection_id = build_config.get("auth_link", {}).get("connection_id")
+ if stored_connection_id:
+ self._disconnect_specific_connection(stored_connection_id)
+ else:
+ # No connection ID stored - nothing to disconnect
+ logger.warning("No connection ID found to disconnect")
+ build_config["auth_link"]["value"] = "connect"
+ build_config["auth_link"]["auth_tooltip"] = "Connect"
+ return build_config
+ except (ValueError, ConnectionError) as e:
+ logger.error(f"Error disconnecting: {e}")
+ build_config["auth_link"]["value"] = "error"
+ build_config["auth_link"]["auth_tooltip"] = f"Disconnect failed: {e!s}"
+ return build_config
+ else:
+ build_config["auth_link"]["value"] = "connect"
+ build_config["auth_link"]["auth_tooltip"] = "Connect"
+ build_config["auth_link"].pop("connection_id", None) # Clear stored connection ID
+ build_config["action_button"]["helper_text"] = "Please connect before selecting actions."
+ build_config["action_button"]["helper_text_metadata"] = {"variant": "destructive"}
+ return build_config
+
+ # Handle connection initiation when tool mode is enabled
+ if field_name == "auth_link" and isinstance(field_value, dict):
+ try:
+ toolkit_slug = self.app_name.lower()
+
+ # First check if we already have an ACTIVE connection
+ existing_active = self._find_active_connection_for_app(self.app_name)
+ if existing_active:
+ connection_id, _ = existing_active
+ build_config["auth_link"]["value"] = "validated"
+ build_config["auth_link"]["auth_tooltip"] = "Disconnect"
+ build_config["auth_link"]["connection_id"] = connection_id
+ build_config["action_button"]["helper_text"] = ""
+ build_config["action_button"]["helper_text_metadata"] = {}
+ logger.info(f"Using existing ACTIVE connection {connection_id} for {toolkit_slug}")
+ return build_config
+
+ # Check if we have a stored connection ID with INITIATED status
+ stored_connection_id = build_config.get("auth_link", {}).get("connection_id")
+ if stored_connection_id:
+ # Check status of existing connection
+ status = self._check_connection_status_by_id(stored_connection_id)
+ if status == "INITIATED":
+ # Get redirect URL from stored connection
+ try:
+ composio = self._build_wrapper()
+ connection = composio.connected_accounts.get(nanoid=stored_connection_id)
+ state = getattr(connection, "state", None)
+ if state and hasattr(state, "val"):
+ redirect_url = getattr(state.val, "redirect_url", None)
+ if redirect_url:
+ build_config["auth_link"]["value"] = redirect_url
+ logger.info(f"Reusing existing OAuth URL for {toolkit_slug}: {redirect_url}")
+ return build_config
+ except (AttributeError, ValueError, ConnectionError) as e:
+ logger.debug(f"Could not retrieve connection {stored_connection_id}: {e}")
+ # Continue to create new connection below
+
+ # Create new OAuth connection ONLY if we truly have no usable connection yet
+ if existing_active is None and not (stored_connection_id and status in ("ACTIVE", "INITIATED")):
+ try:
+ redirect_url, connection_id = self._initiate_connection(toolkit_slug)
+ build_config["auth_link"]["value"] = redirect_url
+ build_config["auth_link"]["connection_id"] = connection_id # Store connection ID
+ logger.info(f"New OAuth URL created for {toolkit_slug}: {redirect_url}")
+ except (ValueError, ConnectionError) as e:
+ logger.error(f"Error creating OAuth connection: {e}")
+ build_config["auth_link"]["value"] = "connect"
+ build_config["auth_link"]["auth_tooltip"] = f"Error: {e!s}"
+ else:
+ return build_config
+ else:
+ # We already have a usable connection; no new OAuth request
+ build_config["auth_link"]["auth_tooltip"] = "Disconnect"
+
+ except (ValueError, ConnectionError) as e:
+ logger.error(f"Error in connection initiation: {e}")
+ build_config["auth_link"]["value"] = "connect"
+ build_config["auth_link"]["auth_tooltip"] = f"Error: {e!s}"
+ build_config["action_button"]["helper_text"] = "Please connect before selecting actions."
+ build_config["action_button"]["helper_text_metadata"] = {"variant": "destructive"}
+ return build_config
+
+ # Check for ACTIVE connections and update status accordingly (tool mode)
+ if hasattr(self, "api_key") and self.api_key:
+ stored_connection_id = build_config.get("auth_link", {}).get("connection_id")
+ active_connection_id = None
+
+ # First try to check stored connection ID
+ if stored_connection_id:
+ status = self._check_connection_status_by_id(stored_connection_id)
+ if status == "ACTIVE":
+ active_connection_id = stored_connection_id
+
+ # If no stored connection or stored connection is not ACTIVE, find any ACTIVE connection
+ if not active_connection_id:
+ active_connection = self._find_active_connection_for_app(self.app_name)
+ if active_connection:
+ active_connection_id, _ = active_connection
+ # Store the found active connection ID for future use
+ if "auth_link" not in build_config:
+ build_config["auth_link"] = {}
+ build_config["auth_link"]["connection_id"] = active_connection_id
+
+ if active_connection_id:
+ # Show validated connection status
+ build_config["auth_link"]["value"] = "validated"
+ build_config["auth_link"]["auth_tooltip"] = "Disconnect"
+ build_config["action_button"]["helper_text"] = ""
+ build_config["action_button"]["helper_text_metadata"] = {}
+ else:
+ build_config["auth_link"]["value"] = "connect"
+ build_config["auth_link"]["auth_tooltip"] = "Connect"
+ build_config["action_button"]["helper_text"] = "Please connect before selecting actions."
+ build_config["action_button"]["helper_text_metadata"] = {"variant": "destructive"}
+
+ # CRITICAL: If tool_mode is enabled from ANY source, immediately hide action field and return
+ if current_tool_mode:
+ build_config["action_button"]["show"] = False
+
+ # CRITICAL: Hide ALL action parameter fields when tool mode is enabled
for field in self._all_fields:
- build_config[field]["show"] = False
+ if field in build_config:
+ build_config[field]["show"] = False
+
+ # Also hide any other action-related fields that might be in build_config
+ for field_name_in_config in build_config: # noqa: PLC0206
+ # Skip base fields like api_key, tool_mode, action, etc.
+ if (
+ field_name_in_config not in ["api_key", "tool_mode", "action_button", "auth_link", "entity_id"]
+ and isinstance(build_config[field_name_in_config], dict)
+ and "show" in build_config[field_name_in_config]
+ ):
+ build_config[field_name_in_config]["show"] = False
+
+ # ENSURE tool_mode state is preserved in build_config for future calls
+ if "tool_mode" not in build_config:
+ build_config["tool_mode"] = {"value": True}
+ elif isinstance(build_config["tool_mode"], dict):
+ build_config["tool_mode"]["value"] = True
+ # Don't proceed with any other logic that might override this
return build_config
- if field_name == "action":
+ if field_name == "tool_mode":
+ if field_value is True:
+ build_config["action_button"]["show"] = False # Hide action field when tool mode is enabled
+ for field in self._all_fields:
+ build_config[field]["show"] = False # Update show status for all fields based on tool mode
+ elif field_value is False:
+ build_config["action_button"]["show"] = True # Show action field when tool mode is disabled
+ for field in self._all_fields:
+ build_config[field]["show"] = True # Update show status for all fields based on tool mode
+ return build_config
+
+ if field_name == "action_button":
+ self._update_action_config(build_config, field_value)
+ # Keep the existing show/hide behaviour
self.show_hide_fields(build_config, field_value)
- if build_config["auth_link"]["value"] == "validated":
- return build_config
+ return build_config
+
+ # Handle API key removal
if field_name == "api_key" and len(field_value) == 0:
build_config["auth_link"]["value"] = ""
build_config["auth_link"]["auth_tooltip"] = "Please provide a valid Composio API Key."
- build_config["action"]["options"] = []
- build_config["action"]["helper_text"] = "Please connect before selecting actions."
- build_config["action"]["helper_text_metadata"] = {"variant": "destructive"}
+ build_config["action_button"]["options"] = []
+ build_config["action_button"]["helper_text"] = "Please connect before selecting actions."
+ build_config["action_button"]["helper_text_metadata"] = {"variant": "destructive"}
+ build_config["auth_link"].pop("connection_id", None)
return build_config
+
+ # Only proceed with connection logic if we have an API key
if not hasattr(self, "api_key") or not self.api_key:
return build_config
- # Build the action maps before using them
+ # CRITICAL: If tool_mode is enabled (check both instance and build_config), skip all connection logic
+ if current_tool_mode:
+ build_config["action_button"]["show"] = False
+ return build_config
+
+ # Update action options only if tool_mode is disabled
self._build_action_maps()
+ # Only set options if they haven't been set already during action population
+ if "options" not in build_config.get("action_button", {}) or not build_config["action_button"]["options"]:
+ build_config["action_button"]["options"] = [
+ {"name": self.sanitize_action_name(action), "metadata": action} for action in self._actions_data
+ ]
+ logger.debug("Setting action options from main logic path")
+ else:
+ logger.debug("Action options already set, skipping duplicate setting")
+ # Only set show=True if tool_mode is not enabled
+ if not current_tool_mode:
+ build_config["action_button"]["show"] = True
- # Update the action options
- build_config["action"]["options"] = [
- {
- "name": self.sanitize_action_name(action),
- "metadata": action,
- }
- for action in self._actions_data
- ]
+ stored_connection_id = build_config.get("auth_link", {}).get("connection_id")
+ active_connection_id = None
- try:
- toolset = self._build_wrapper()
- entity = toolset.client.get_entity(id=self.entity_id)
+ if stored_connection_id:
+ status = self._check_connection_status_by_id(stored_connection_id)
+ if status == "ACTIVE":
+ active_connection_id = stored_connection_id
- try:
- entity.get_connection(app=self.app_name)
- build_config["auth_link"]["value"] = "validated"
- build_config["auth_link"]["auth_tooltip"] = "Disconnect"
- build_config["action"]["helper_text"] = None
- build_config["action"]["helper_text_metadata"] = {}
- except NoItemsFound:
- auth_scheme = self._get_auth_scheme(self.app_name)
- if auth_scheme and auth_scheme.auth_mode == "OAUTH2":
- try:
- build_config["auth_link"]["value"] = self._initiate_default_connection(entity, self.app_name)
- build_config["auth_link"]["auth_tooltip"] = "Connect"
- except (ValueError, ConnectionError, ApiKeyError) as e:
- build_config["auth_link"]["value"] = "disabled"
- build_config["auth_link"]["auth_tooltip"] = f"Error: {e!s}"
- logger.error(f"Error checking auth status: {e}")
+ if not active_connection_id:
+ active_connection = self._find_active_connection_for_app(self.app_name)
+ if active_connection:
+ active_connection_id, _ = active_connection
+ if "auth_link" not in build_config:
+ build_config["auth_link"] = {}
+ build_config["auth_link"]["connection_id"] = active_connection_id
- except (ValueError, ConnectionError) as e:
- build_config["auth_link"]["value"] = "error"
- build_config["auth_link"]["auth_tooltip"] = f"Error: {e!s}"
- logger.error(f"Error checking auth status: {e}")
- except ApiKeyError as e:
- build_config["auth_link"]["value"] = ""
- build_config["auth_link"]["auth_tooltip"] = "Please provide a valid Composio API Key."
- build_config["action"]["options"] = []
- build_config["action"]["value"] = ""
- build_config["action"]["helper_text"] = "Please connect before selecting actions."
- build_config["action"]["helper_text_metadata"] = {"variant": "destructive"}
- logger.error(f"Error checking auth status: {e}")
-
- # Handle disconnection
- if field_name == "auth_link" and field_value == "disconnect":
- try:
- for field in self._all_fields:
- build_config[field]["show"] = False
- toolset = self._build_wrapper()
- entity = toolset.client.get_entity(id=self.entity_id)
- self.disconnect_connection(entity, self.app_name)
- build_config["auth_link"]["value"] = self._initiate_default_connection(entity, self.app_name)
+ if active_connection_id:
+ build_config["auth_link"]["value"] = "validated"
+ build_config["auth_link"]["auth_tooltip"] = "Disconnect"
+ build_config["action_button"]["helper_text"] = ""
+ build_config["action_button"]["helper_text_metadata"] = {}
+ elif stored_connection_id:
+ status = self._check_connection_status_by_id(stored_connection_id)
+ if status == "INITIATED":
+ current_value = build_config.get("auth_link", {}).get("value")
+ if not current_value or current_value == "connect":
+ build_config["auth_link"]["value"] = "connect"
build_config["auth_link"]["auth_tooltip"] = "Connect"
- build_config["action"]["helper_text"] = "Please connect before selecting actions."
- build_config["action"]["helper_text_metadata"] = {
- "variant": "destructive",
- }
- build_config["action"]["options"] = []
- build_config["action"]["value"] = ""
- except (ValueError, ConnectionError, ApiKeyError) as e:
- build_config["auth_link"]["value"] = "error"
- build_config["auth_link"]["auth_tooltip"] = f"Failed to disconnect from the app: {e}"
- logger.error(f"Error disconnecting: {e}")
- if field_name == "auth_link" and field_value == "validated":
- build_config["action"]["helper_text"] = ""
- build_config["action"]["helper_text_metadata"] = {"icon": "Check", "variant": "success"}
+ build_config["action_button"]["helper_text"] = "Please connect before selecting actions."
+ build_config["action_button"]["helper_text_metadata"] = {"variant": "destructive"}
+ else:
+ # Connection not found or other status
+ build_config["auth_link"]["value"] = "connect"
+ build_config["auth_link"]["auth_tooltip"] = "Connect"
+ build_config["action_button"]["helper_text"] = "Please connect before selecting actions."
+ build_config["action_button"]["helper_text_metadata"] = {"variant": "destructive"}
+ else:
+ build_config["auth_link"]["value"] = "connect"
+ build_config["auth_link"]["auth_tooltip"] = "Connect"
+ build_config["action_button"]["helper_text"] = "Please connect before selecting actions."
+ build_config["action_button"]["helper_text_metadata"] = {"variant": "destructive"}
+
+ if self._is_tool_mode_enabled():
+ build_config["action_button"]["show"] = False
return build_config
- def _get_auth_scheme(self, app_name: str) -> AppAuthScheme:
- """Get the primary auth scheme for an app."""
- toolset = self._build_wrapper()
- try:
- return toolset.get_auth_scheme_for_app(app=app_name.lower())
- except (ValueError, ConnectionError, NoItemsFound):
- logger.exception(f"Error getting auth scheme for {app_name}")
- return None
+ def configure_tools(self, composio: Composio, limit: int | None = None) -> list[Tool]:
+ if limit is None:
+ limit = 999
- def _initiate_default_connection(self, entity: Any, app: str) -> str:
- connection = entity.initiate_connection(app_name=app, use_composio_auth=True, force_new_integration=True)
- return connection.redirectUrl
-
- def disconnect_connection(self, entity: Any, app: str) -> None:
- """Disconnect a Composio connection."""
- try:
- # Get the connection first
- connection = entity.get_connection(app=app)
- # Delete the connection using the integrations collection
- entity.client.integrations.remove(id=connection.integrationId)
- except Exception as e:
- logger.error(f"Error disconnecting from {app}: {e}")
- msg = f"Failed to disconnect from {app}: {e}"
- raise ValueError(msg) from e
-
- def configure_tools(self, toolset: ComposioToolSet) -> list[Tool]:
- tools = toolset.get_tools(actions=self._actions_data.keys())
- logger.info(f"Tools: {tools}")
+ tools = composio.tools.get(user_id=self.entity_id, toolkits=[self.app_name.lower()], limit=limit)
configured_tools = []
for tool in tools:
# Set the sanitized name
@@ -293,20 +1174,124 @@ class ComposioBaseComponent(Component):
async def _get_tools(self) -> list[Tool]:
"""Get tools with cached results and optimized name sanitization."""
- toolset = self._build_wrapper()
+ composio = self._build_wrapper()
self.set_default_tools()
- return self.configure_tools(toolset)
+ return self.configure_tools(composio)
@property
def enabled_tools(self):
- if not hasattr(self, "action") or not self.action or not isinstance(self.action, list):
+ """Return tag names for actions of this app that should be exposed to the agent.
+
+ If default tools are set via set_default_tools(), returns those.
+ Otherwise, returns only the first few tools (limited by default_tools_limit)
+ to prevent overwhelming the agent. Subclasses can override this behavior.
+
+ """
+ if not self._actions_data:
+ self._populate_actions_data()
+
+ if hasattr(self, "_default_tools") and self._default_tools:
return list(self._default_tools)
- return list(self._default_tools.union(action["name"].replace(" ", "-") for action in self.action))
- @abstractmethod
- def execute_action(self) -> list[dict]:
- """Execute action and return response as Message."""
+ all_tools = list(self._actions_data.keys())
+ limit = getattr(self, "default_tools_limit", 5)
+ return all_tools[:limit]
+
+ def execute_action(self):
+ """Execute the selected Composio tool."""
+ composio = self._build_wrapper()
+ self._populate_actions_data()
+ self._build_action_maps()
+
+ display_name = (
+ self.action_button[0]["name"]
+ if isinstance(getattr(self, "action_button", None), list) and self.action_button
+ else self.action_button
+ )
+ action_key = self._display_to_key_map.get(display_name)
+
+ if not action_key:
+ msg = f"Invalid action: {display_name}"
+ raise ValueError(msg)
+
+ try:
+ arguments: dict[str, Any] = {}
+ param_fields = self._actions_data.get(action_key, {}).get("action_fields", [])
+
+ schema_dict = self._action_schemas.get(action_key, {})
+ parameters_schema = schema_dict.get("input_parameters", {})
+ schema_properties = parameters_schema.get("properties", {}) if parameters_schema else {}
+ # Handle case where 'required' field is None (causes "'NoneType' object is not iterable")
+ required_list = parameters_schema.get("required", []) if parameters_schema else []
+ required_fields = set(required_list) if required_list is not None else set()
+
+ for field in param_fields:
+ if not hasattr(self, field):
+ continue
+ value = getattr(self, field)
+
+ # Skip None, empty strings, and empty lists
+ if value is None or value == "" or (isinstance(value, list) and len(value) == 0):
+ continue
+
+ # For optional fields, be more strict about including them
+ # Only include if the user has explicitly provided a meaningful value
+ if field not in required_fields:
+ # Get the default value from the schema
+ field_schema = schema_properties.get(field, {})
+ schema_default = field_schema.get("default")
+
+ # Skip if the current value matches the schema default
+ if value == schema_default:
+ continue
+
+ # Convert comma-separated to list for array parameters (heuristic)
+ prop_schema = schema_properties.get(field, {})
+ if prop_schema.get("type") == "array" and isinstance(value, str):
+ value = [item.strip() for item in value.split(",")]
+
+ if field in self._bool_variables:
+ value = bool(value)
+
+ # Handle renamed fields - map back to original names for API execution
+ final_field_name = field
+ if field.endswith("_user_id") and field.startswith(self.app_name):
+ final_field_name = "user_id"
+ elif field.endswith("_status") and field.startswith(self.app_name):
+ final_field_name = "status"
+
+ arguments[final_field_name] = value
+
+ # Execute using new SDK
+ result = composio.tools.execute(
+ slug=action_key,
+ arguments=arguments,
+ user_id=self.entity_id,
+ )
+
+ if isinstance(result, dict) and "successful" in result:
+ if result["successful"]:
+ raw_data = result.get("data", result)
+ return self._apply_post_processor(action_key, raw_data)
+ error_msg = result.get("error", "Tool execution failed")
+ raise ValueError(error_msg)
+
+ except ValueError as e:
+ logger.error(f"Failed to execute {action_key}: {e}")
+ raise
+
+ def _apply_post_processor(self, action_key: str, raw_data: Any) -> Any:
+ """Apply post-processor for the given action if defined."""
+ if hasattr(self, "post_processors") and isinstance(self.post_processors, dict):
+ processor_func = self.post_processors.get(action_key)
+ if processor_func and callable(processor_func):
+ try:
+ return processor_func(raw_data)
+ except (TypeError, ValueError, KeyError) as e:
+ logger.error(f"Error in post-processor for {action_key}: {e} (Exception type: {type(e).__name__})")
+ return raw_data
+
+ return raw_data
- @abstractmethod
def set_default_tools(self):
"""Set the default tools."""
diff --git a/src/backend/base/langflow/base/data/docling_utils.py b/src/backend/base/langflow/base/data/docling_utils.py
index 1d19ff252..eafdf961c 100644
--- a/src/backend/base/langflow/base/data/docling_utils.py
+++ b/src/backend/base/langflow/base/data/docling_utils.py
@@ -1,4 +1,10 @@
+import signal
+import sys
+import traceback
+from contextlib import suppress
+
from docling_core.types.doc import DoclingDocument
+from loguru import logger
from langflow.schema.data import Data
from langflow.schema.dataframe import DataFrame
@@ -49,3 +55,191 @@ def extract_docling_documents(data_inputs: Data | list[Data] | DataFrame, doc_ke
msg = f"Invalid input type in collection: {e}"
raise TypeError(msg) from e
return documents
+
+
+def docling_worker(file_paths: list[str], queue, pipeline: str, ocr_engine: str):
+ """Worker function for processing files with Docling in a separate process."""
+ # Signal handling for graceful shutdown
+ shutdown_requested = False
+
+ def signal_handler(signum: int, frame) -> None: # noqa: ARG001
+ """Handle shutdown signals gracefully."""
+ nonlocal shutdown_requested
+ signal_names: dict[int, str] = {signal.SIGTERM: "SIGTERM", signal.SIGINT: "SIGINT"}
+ signal_name = signal_names.get(signum, f"signal {signum}")
+
+ logger.debug(f"Docling worker received {signal_name}, initiating graceful shutdown...")
+ shutdown_requested = True
+
+ # Send shutdown notification to parent process
+ with suppress(Exception):
+ queue.put({"error": f"Worker interrupted by {signal_name}", "shutdown": True})
+
+ # Exit gracefully
+ sys.exit(0)
+
+ def check_shutdown() -> None:
+ """Check if shutdown was requested and exit if so."""
+ if shutdown_requested:
+ logger.info("Shutdown requested, exiting worker...")
+
+ with suppress(Exception):
+ queue.put({"error": "Worker shutdown requested", "shutdown": True})
+
+ sys.exit(0)
+
+ # Register signal handlers early
+ try:
+ signal.signal(signal.SIGTERM, signal_handler)
+ signal.signal(signal.SIGINT, signal_handler)
+ logger.debug("Signal handlers registered for graceful shutdown")
+ except (OSError, ValueError) as e:
+ # Some signals might not be available on all platforms
+ logger.warning(f"Warning: Could not register signal handlers: {e}")
+
+ # Check for shutdown before heavy imports
+ check_shutdown()
+
+ try:
+ from docling.datamodel.base_models import ConversionStatus, InputFormat
+ from docling.datamodel.pipeline_options import OcrOptions, PdfPipelineOptions, VlmPipelineOptions
+ from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
+ from docling.models.factories import get_ocr_factory
+ from docling.pipeline.vlm_pipeline import VlmPipeline
+
+ # Check for shutdown after imports
+ check_shutdown()
+ logger.debug("Docling dependencies loaded successfully")
+
+ except ModuleNotFoundError:
+ msg = (
+ "Docling is an optional dependency of Langflow. "
+ "Install with `uv pip install 'langflow[docling]'` "
+ "or refer to the documentation"
+ )
+ queue.put({"error": msg})
+ return
+ except ImportError as e:
+ # A different import failed (e.g., a transitive dependency); preserve details.
+ queue.put({"error": f"Failed to import a Docling dependency: {e}"})
+ return
+ except KeyboardInterrupt:
+ logger.warning("KeyboardInterrupt during imports, exiting...")
+ queue.put({"error": "Worker interrupted during imports", "shutdown": True})
+ return
+
+ # Configure the standard PDF pipeline
+ def _get_standard_opts() -> PdfPipelineOptions:
+ check_shutdown() # Check before heavy operations
+
+ pipeline_options = PdfPipelineOptions()
+ pipeline_options.do_ocr = ocr_engine != ""
+ if pipeline_options.do_ocr:
+ ocr_factory = get_ocr_factory(
+ allow_external_plugins=False,
+ )
+
+ ocr_options: OcrOptions = ocr_factory.create_options(
+ kind=ocr_engine,
+ )
+ pipeline_options.ocr_options = ocr_options
+ return pipeline_options
+
+ # Configure the VLM pipeline
+ def _get_vlm_opts() -> VlmPipelineOptions:
+ check_shutdown() # Check before heavy operations
+ return VlmPipelineOptions()
+
+ # Configure the main format options and create the DocumentConverter()
+ def _get_converter() -> DocumentConverter:
+ check_shutdown() # Check before heavy operations
+
+ if pipeline == "standard":
+ pdf_format_option = PdfFormatOption(
+ pipeline_options=_get_standard_opts(),
+ )
+ elif pipeline == "vlm":
+ pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
+ else:
+ msg = f"Unknown pipeline: {pipeline!r}"
+ raise ValueError(msg)
+
+ format_options: dict[InputFormat, FormatOption] = {
+ InputFormat.PDF: pdf_format_option,
+ InputFormat.IMAGE: pdf_format_option,
+ }
+
+ return DocumentConverter(format_options=format_options)
+
+ try:
+ # Check for shutdown before creating converter (can be slow)
+ check_shutdown()
+ logger.info(f"Initializing {pipeline} pipeline with OCR: {ocr_engine or 'disabled'}")
+
+ converter = _get_converter()
+
+ # Check for shutdown before processing files
+ check_shutdown()
+ logger.info(f"Starting to process {len(file_paths)} files...")
+
+ # Process files with periodic shutdown checks
+ results = []
+ for i, file_path in enumerate(file_paths):
+ # Check for shutdown before processing each file
+ check_shutdown()
+
+ logger.debug(f"Processing file {i + 1}/{len(file_paths)}: {file_path}")
+
+ try:
+ # Process single file (we can't easily interrupt convert_all)
+ single_result = converter.convert_all([file_path])
+ results.extend(single_result)
+
+ # Check for shutdown after each file
+ check_shutdown()
+
+ except (OSError, ValueError, RuntimeError, ImportError) as file_error:
+ # Handle specific file processing errors
+ logger.error(f"Error processing file {file_path}: {file_error}")
+ # Continue with other files, but check for shutdown
+ check_shutdown()
+ except Exception as file_error: # noqa: BLE001
+ # Catch any other unexpected errors to prevent worker crash
+ logger.error(f"Unexpected error processing file {file_path}: {file_error}")
+ # Continue with other files, but check for shutdown
+ check_shutdown()
+
+ # Final shutdown check before sending results
+ check_shutdown()
+
+ # Process the results while maintaining the original structure
+ processed_data = [
+ {"document": res.document, "file_path": str(res.input.file), "status": res.status.name}
+ if res.status == ConversionStatus.SUCCESS
+ else None
+ for res in results
+ ]
+
+ logger.info(f"Successfully processed {len([d for d in processed_data if d])} files")
+ queue.put(processed_data)
+
+ except KeyboardInterrupt:
+ logger.warning("KeyboardInterrupt during processing, exiting gracefully...")
+ queue.put({"error": "Worker interrupted during processing", "shutdown": True})
+ return
+ except Exception as e: # noqa: BLE001
+ if shutdown_requested:
+ logger.exception("Exception occurred during shutdown, exiting...")
+ return
+
+ # Send any processing error to the main process with traceback
+ error_info = {"error": str(e), "traceback": traceback.format_exc()}
+ logger.error(f"Error in worker: {error_info}")
+ queue.put(error_info)
+ finally:
+ logger.info("Docling worker finishing...")
+ # Ensure we don't leave any hanging processes
+ if shutdown_requested:
+ logger.debug("Worker shutdown completed")
+ else:
+ logger.debug("Worker completed normally")
diff --git a/src/backend/base/langflow/base/embeddings/aiml_embeddings.py b/src/backend/base/langflow/base/embeddings/aiml_embeddings.py
index de908e756..793151faf 100644
--- a/src/backend/base/langflow/base/embeddings/aiml_embeddings.py
+++ b/src/backend/base/langflow/base/embeddings/aiml_embeddings.py
@@ -2,10 +2,10 @@ import concurrent.futures
import json
import httpx
-from loguru import logger
from pydantic import BaseModel, SecretStr
from langflow.field_typing import Embeddings
+from langflow.logging.logger import logger
class AIMLEmbeddingsImpl(BaseModel, Embeddings):
diff --git a/src/backend/base/langflow/base/flow_processing/utils.py b/src/backend/base/langflow/base/flow_processing/utils.py
index 320053168..f88a7650e 100644
--- a/src/backend/base/langflow/base/flow_processing/utils.py
+++ b/src/backend/base/langflow/base/flow_processing/utils.py
@@ -1,6 +1,5 @@
-from loguru import logger
-
from langflow.graph.schema import ResultData, RunOutputs
+from langflow.logging.logger import logger
from langflow.schema.data import Data
from langflow.schema.message import Message
diff --git a/src/backend/base/langflow/base/io/__init__.py b/src/backend/base/langflow/base/io/__init__.py
index dc9fd4c06..e69de29bb 100644
--- a/src/backend/base/langflow/base/io/__init__.py
+++ b/src/backend/base/langflow/base/io/__init__.py
@@ -1 +0,0 @@
-# noqa: A005
diff --git a/src/backend/tests/unit/components/bundles/composio/__init__.py b/src/backend/base/langflow/base/knowledge_bases/__init__.py
similarity index 100%
rename from src/backend/tests/unit/components/bundles/composio/__init__.py
rename to src/backend/base/langflow/base/knowledge_bases/__init__.py
diff --git a/src/backend/base/langflow/base/knowledge_bases/knowledge_base_utils.py b/src/backend/base/langflow/base/knowledge_bases/knowledge_base_utils.py
new file mode 100644
index 000000000..f58ede2df
--- /dev/null
+++ b/src/backend/base/langflow/base/knowledge_bases/knowledge_base_utils.py
@@ -0,0 +1,137 @@
+import math
+from collections import Counter
+from pathlib import Path
+from uuid import UUID
+
+from langflow.services.database.models.user.crud import get_user_by_id
+from langflow.services.deps import session_scope
+
+
+def compute_tfidf(documents: list[str], query_terms: list[str]) -> list[float]:
+ """Compute TF-IDF scores for query terms across a collection of documents.
+
+ Args:
+ documents: List of document strings
+ query_terms: List of query terms to score
+
+ Returns:
+ List of TF-IDF scores for each document
+ """
+ # Tokenize documents (simple whitespace splitting)
+ tokenized_docs = [doc.lower().split() for doc in documents]
+ n_docs = len(documents)
+
+ # Calculate document frequency for each term
+ document_frequencies = {}
+ for term in query_terms:
+ document_frequencies[term] = sum(1 for doc in tokenized_docs if term.lower() in doc)
+
+ scores = []
+
+ for doc_tokens in tokenized_docs:
+ doc_score = 0.0
+ doc_length = len(doc_tokens)
+ term_counts = Counter(doc_tokens)
+
+ for term in query_terms:
+ term_lower = term.lower()
+
+ # Term frequency (TF)
+ tf = term_counts[term_lower] / doc_length if doc_length > 0 else 0
+
+ # Inverse document frequency (IDF)
+ idf = math.log(n_docs / document_frequencies[term]) if document_frequencies[term] > 0 else 0
+
+ # TF-IDF score
+ doc_score += tf * idf
+
+ scores.append(doc_score)
+
+ return scores
+
+
+def compute_bm25(documents: list[str], query_terms: list[str], k1: float = 1.2, b: float = 0.75) -> list[float]:
+ """Compute BM25 scores for query terms across a collection of documents.
+
+ Args:
+ documents: List of document strings
+ query_terms: List of query terms to score
+ k1: Controls term frequency scaling (default: 1.2)
+ b: Controls document length normalization (default: 0.75)
+
+ Returns:
+ List of BM25 scores for each document
+ """
+ # Tokenize documents
+ tokenized_docs = [doc.lower().split() for doc in documents]
+ n_docs = len(documents)
+
+ # Calculate average document length
+ avg_doc_length = sum(len(doc) for doc in tokenized_docs) / n_docs if n_docs > 0 else 0
+
+ # Handle edge case where all documents are empty
+ if avg_doc_length == 0:
+ return [0.0] * n_docs
+
+ # Calculate document frequency for each term
+ document_frequencies = {}
+ for term in query_terms:
+ document_frequencies[term] = sum(1 for doc in tokenized_docs if term.lower() in doc)
+
+ scores = []
+
+ for doc_tokens in tokenized_docs:
+ doc_score = 0.0
+ doc_length = len(doc_tokens)
+ term_counts = Counter(doc_tokens)
+
+ for term in query_terms:
+ term_lower = term.lower()
+
+ # Term frequency in document
+ tf = term_counts[term_lower]
+
+ # Inverse document frequency (IDF)
+ # Use standard BM25 IDF formula that ensures non-negative values
+ idf = math.log(n_docs / document_frequencies[term]) if document_frequencies[term] > 0 else 0
+
+ # BM25 score calculation
+ numerator = tf * (k1 + 1)
+ denominator = tf + k1 * (1 - b + b * (doc_length / avg_doc_length))
+
+ # Handle division by zero when tf=0 and k1=0
+ term_score = 0 if denominator == 0 else idf * (numerator / denominator)
+
+ doc_score += term_score
+
+ scores.append(doc_score)
+
+ return scores
+
+
+async def get_knowledge_bases(kb_root: Path, user_id: UUID | str) -> list[str]:
+ """Retrieve a list of available knowledge bases.
+
+ Returns:
+ A list of knowledge base names.
+ """
+ if not kb_root.exists():
+ return []
+
+ # Get the current user
+ async with session_scope() as db:
+ if not user_id:
+ msg = "User ID is required for fetching knowledge bases."
+ raise ValueError(msg)
+ user_id = UUID(user_id) if isinstance(user_id, str) else user_id
+ current_user = await get_user_by_id(db, user_id)
+ if not current_user:
+ msg = f"User with ID {user_id} not found."
+ raise ValueError(msg)
+ kb_user = current_user.username
+ kb_path = kb_root / kb_user
+
+ if not kb_path.exists():
+ return []
+
+ return [str(d.name) for d in kb_path.iterdir() if not d.name.startswith(".") and d.is_dir()]
diff --git a/src/backend/base/langflow/base/langwatch/utils.py b/src/backend/base/langflow/base/langwatch/utils.py
index c4aac71f2..857e4ee3b 100644
--- a/src/backend/base/langflow/base/langwatch/utils.py
+++ b/src/backend/base/langflow/base/langwatch/utils.py
@@ -2,7 +2,8 @@ from functools import lru_cache
from typing import Any
import httpx
-from loguru import logger
+
+from langflow.logging.logger import logger
@lru_cache(maxsize=1)
diff --git a/src/backend/base/langflow/base/mcp/util.py b/src/backend/base/langflow/base/mcp/util.py
index 07f493134..7b9638925 100644
--- a/src/backend/base/langflow/base/mcp/util.py
+++ b/src/backend/base/langflow/base/mcp/util.py
@@ -15,12 +15,12 @@ import httpx
from anyio import ClosedResourceError
from httpx import codes as httpx_codes
from langchain_core.tools import StructuredTool
-from loguru import logger
from mcp import ClientSession
from mcp.shared.exceptions import McpError
from pydantic import BaseModel, Field, create_model
from sqlmodel import select
+from langflow.logging.logger import logger
from langflow.services.database.models.flow.model import Flow
from langflow.services.deps import get_settings_service
@@ -214,7 +214,7 @@ def create_tool_coroutine(tool_name: str, arg_schema: type[BaseModel], client) -
try:
return await client.run_tool(tool_name, arguments=validated.model_dump())
except Exception as e:
- logger.error(f"Tool '{tool_name}' execution failed: {e}")
+ await logger.aerror(f"Tool '{tool_name}' execution failed: {e}")
# Re-raise with more context
msg = f"Tool '{tool_name}' execution failed: {e}"
raise ValueError(msg) from e
@@ -264,7 +264,7 @@ def get_unique_name(base_name, max_length, existing_names):
i += 1
-async def get_flow_snake_case(flow_name: str, user_id: str, session, is_action: bool | None = None) -> Flow | None:
+async def get_flow_snake_case(flow_name: str, user_id: str, session, *, is_action: bool | None = None) -> Flow | None:
uuid_user_id = UUID(user_id) if isinstance(user_id, str) else user_id
stmt = select(Flow).where(Flow.user_id == uuid_user_id).where(Flow.is_component == False) # noqa: E712
flows = (await session.exec(stmt)).all()
@@ -506,7 +506,7 @@ class MCPSessionManager:
break
except (RuntimeError, KeyError, ClosedResourceError, ValueError, asyncio.TimeoutError) as e:
# Handle common recoverable errors without stopping the cleanup loop
- logger.warning(f"Error in periodic cleanup: {e}")
+ await logger.awarning(f"Error in periodic cleanup: {e}")
async def _cleanup_idle_sessions(self):
"""Clean up sessions that have been idle for too long."""
@@ -523,7 +523,7 @@ class MCPSessionManager:
# Clean up idle sessions
for session_id in sessions_to_remove:
- logger.info(f"Cleaning up idle session {session_id} for server {server_key}")
+ await logger.ainfo(f"Cleaning up idle session {session_id} for server {server_key}")
await self._cleanup_session_by_id(server_key, session_id)
# Remove server entry if no sessions left
@@ -561,7 +561,7 @@ class MCPSessionManager:
# Use a shorter timeout for the connectivity test to fail fast
response = await asyncio.wait_for(session.list_tools(), timeout=3.0)
except (asyncio.TimeoutError, ConnectionError, OSError, ValueError) as e:
- logger.debug(f"Session connectivity test failed (standard error): {e}")
+ await logger.adebug(f"Session connectivity test failed (standard error): {e}")
return False
except Exception as e:
# Handle MCP-specific errors that might not be in the standard list
@@ -574,27 +574,27 @@ class MCPSessionManager:
or "Transport closed" in error_str
or "Stream closed" in error_str
):
- logger.debug(f"Session connectivity test failed (MCP connection error): {e}")
+ await logger.adebug(f"Session connectivity test failed (MCP connection error): {e}")
return False
# Re-raise unexpected errors
- logger.warning(f"Unexpected error in connectivity test: {e}")
+ await logger.awarning(f"Unexpected error in connectivity test: {e}")
raise
else:
# Validate that we got a meaningful response
if response is None:
- logger.debug("Session connectivity test failed: received None response")
+ await logger.adebug("Session connectivity test failed: received None response")
return False
try:
# Check if we can access the tools list (even if empty)
tools = getattr(response, "tools", None)
if tools is None:
- logger.debug("Session connectivity test failed: no tools attribute in response")
+ await logger.adebug("Session connectivity test failed: no tools attribute in response")
return False
except (AttributeError, TypeError) as e:
- logger.debug(f"Session connectivity test failed while validating response: {e}")
+ await logger.adebug(f"Session connectivity test failed while validating response: {e}")
return False
else:
- logger.debug(f"Session connectivity test passed: found {len(tools)} tools")
+ await logger.adebug(f"Session connectivity test passed: found {len(tools)} tools")
return True
async def get_session(self, context_id: str, connection_params, transport_type: str):
@@ -625,32 +625,32 @@ class MCPSessionManager:
# Quick health check
if await self._validate_session_connectivity(session):
- logger.debug(f"Reusing existing session {session_id} for server {server_key}")
+ await logger.adebug(f"Reusing existing session {session_id} for server {server_key}")
# record mapping & bump ref-count for backwards compatibility
self._context_to_session[context_id] = (server_key, session_id)
self._session_refcount[(server_key, session_id)] = (
self._session_refcount.get((server_key, session_id), 0) + 1
)
return session
- logger.info(f"Session {session_id} for server {server_key} failed health check, cleaning up")
+ await logger.ainfo(f"Session {session_id} for server {server_key} failed health check, cleaning up")
await self._cleanup_session_by_id(server_key, session_id)
else:
# Task is done, clean up
- logger.info(f"Session {session_id} for server {server_key} task is done, cleaning up")
+ await logger.ainfo(f"Session {session_id} for server {server_key} task is done, cleaning up")
await self._cleanup_session_by_id(server_key, session_id)
# Check if we've reached the maximum number of sessions for this server
if len(sessions) >= MAX_SESSIONS_PER_SERVER:
# Remove the oldest session
oldest_session_id = min(sessions.keys(), key=lambda x: sessions[x]["last_used"])
- logger.info(
+ await logger.ainfo(
f"Maximum sessions reached for server {server_key}, removing oldest session {oldest_session_id}"
)
await self._cleanup_session_by_id(server_key, oldest_session_id)
# Create new session
session_id = f"{server_key}_{len(sessions)}"
- logger.info(f"Creating new session {session_id} for server {server_key}")
+ await logger.ainfo(f"Creating new session {session_id} for server {server_key}")
if transport_type == "stdio":
session, task = await self._create_stdio_session(session_id, connection_params)
@@ -700,7 +700,7 @@ class MCPSessionManager:
try:
await event.wait()
except asyncio.CancelledError:
- logger.info(f"Session {session_id} is shutting down")
+ await logger.ainfo(f"Session {session_id} is shutting down")
except Exception as e: # noqa: BLE001
if not session_future.done():
session_future.set_exception(e)
@@ -723,7 +723,7 @@ class MCPSessionManager:
await task
self._background_tasks.discard(task)
msg = f"Timeout waiting for STDIO session {session_id} to initialize"
- logger.error(msg)
+ await logger.aerror(msg)
raise ValueError(msg) from timeout_err
return session, task
@@ -759,7 +759,7 @@ class MCPSessionManager:
try:
await event.wait()
except asyncio.CancelledError:
- logger.info(f"Session {session_id} is shutting down")
+ await logger.ainfo(f"Session {session_id} is shutting down")
except Exception as e: # noqa: BLE001
if not session_future.done():
session_future.set_exception(e)
@@ -782,7 +782,7 @@ class MCPSessionManager:
await task
self._background_tasks.discard(task)
msg = f"Timeout waiting for SSE session {session_id} to initialize"
- logger.error(msg)
+ await logger.aerror(msg)
raise ValueError(msg) from timeout_err
return session, task
@@ -813,9 +813,9 @@ class MCPSessionManager:
if hasattr(session, "aclose"):
try:
await session.aclose()
- logger.debug("Successfully closed session %s using aclose()", session_id)
+ await logger.adebug("Successfully closed session %s using aclose()", session_id)
except Exception as e: # noqa: BLE001
- logger.debug("Error closing session %s with aclose(): %s", session_id, e)
+ await logger.adebug("Error closing session %s with aclose(): %s", session_id, e)
# If no aclose, try regular close method
elif hasattr(session, "close"):
@@ -824,18 +824,20 @@ class MCPSessionManager:
if inspect.iscoroutinefunction(session.close):
# It's an async method
await session.close()
- logger.debug("Successfully closed session %s using async close()", session_id)
+ await logger.adebug("Successfully closed session %s using async close()", session_id)
else:
# Try calling it and check if result is awaitable
close_result = session.close()
if inspect.isawaitable(close_result):
await close_result
- logger.debug("Successfully closed session %s using awaitable close()", session_id)
+ await logger.adebug(
+ "Successfully closed session %s using awaitable close()", session_id
+ )
else:
# It's a synchronous close
- logger.debug("Successfully closed session %s using sync close()", session_id)
+ await logger.adebug("Successfully closed session %s using sync close()", session_id)
except Exception as e: # noqa: BLE001
- logger.debug("Error closing session %s with close(): %s", session_id, e)
+ await logger.adebug("Error closing session %s with close(): %s", session_id, e)
# Cancel the background task which will properly close the session
if "task" in session_info:
@@ -845,9 +847,9 @@ class MCPSessionManager:
try:
await task
except asyncio.CancelledError:
- logger.info(f"Cancelled task for session {session_id}")
+ await logger.ainfo(f"Cancelled task for session {session_id}")
except Exception as e: # noqa: BLE001
- logger.warning(f"Error cleaning up session {session_id}: {e}")
+ await logger.awarning(f"Error cleaning up session {session_id}: {e}")
finally:
# Remove from sessions dict
del sessions[session_id]
@@ -900,7 +902,7 @@ class MCPSessionManager:
"""
mapping = self._context_to_session.get(context_id)
if not mapping:
- logger.debug(f"No session mapping found for context_id {context_id}")
+ await logger.adebug(f"No session mapping found for context_id {context_id}")
return
server_key, session_id = mapping
@@ -1031,7 +1033,7 @@ class MCPStdioClient:
for attempt in range(max_retries):
try:
- logger.debug(f"Attempting to run tool '{tool_name}' (attempt {attempt + 1}/{max_retries})")
+ await logger.adebug(f"Attempting to run tool '{tool_name}' (attempt {attempt + 1}/{max_retries})")
# Get or create persistent session
session = await self._get_or_create_session()
@@ -1041,7 +1043,7 @@ class MCPStdioClient:
)
except Exception as e:
current_error_type = type(e).__name__
- logger.warning(f"Tool '{tool_name}' failed on attempt {attempt + 1}: {current_error_type} - {e}")
+ await logger.awarning(f"Tool '{tool_name}' failed on attempt {attempt + 1}: {current_error_type} - {e}")
# Import specific MCP error types for detection
try:
@@ -1056,14 +1058,14 @@ class MCPStdioClient:
# If we're getting the same error type repeatedly, don't retry
if last_error_type == current_error_type and attempt > 0:
- logger.error(f"Repeated {current_error_type} error for tool '{tool_name}', not retrying")
+ await logger.aerror(f"Repeated {current_error_type} error for tool '{tool_name}', not retrying")
break
last_error_type = current_error_type
# If it's a connection error (ClosedResourceError or MCP connection closed) and we have retries left
if (is_closed_resource_error or is_mcp_connection_error) and attempt < max_retries - 1:
- logger.warning(
+ await logger.awarning(
f"MCP session connection issue for tool '{tool_name}', retrying with fresh session..."
)
# Clean up the dead session
@@ -1076,7 +1078,7 @@ class MCPStdioClient:
# If it's a timeout error and we have retries left, try once more
if is_timeout_error and attempt < max_retries - 1:
- logger.warning(f"Tool '{tool_name}' timed out, retrying...")
+ await logger.awarning(f"Tool '{tool_name}' timed out, retrying...")
# Don't clean up session for timeouts, might just be a slow response
await asyncio.sleep(1.0)
continue
@@ -1089,7 +1091,7 @@ class MCPStdioClient:
or is_timeout_error
):
msg = f"Failed to run tool '{tool_name}' after {attempt + 1} attempts: {e}"
- logger.error(msg)
+ await logger.aerror(msg)
# Clean up failed session from cache
if self._session_context and self._component_cache:
cache_key = f"mcp_session_stdio_{self._session_context}"
@@ -1099,12 +1101,12 @@ class MCPStdioClient:
# Re-raise unexpected errors
raise
else:
- logger.debug(f"Tool '{tool_name}' completed successfully")
+ await logger.adebug(f"Tool '{tool_name}' completed successfully")
return result
# This should never be reached due to the exception handling above
msg = f"Failed to run tool '{tool_name}': Maximum retries exceeded with repeated {last_error_type} errors"
- logger.error(msg)
+ await logger.aerror(msg)
raise ValueError(msg)
async def disconnect(self):
@@ -1213,7 +1215,7 @@ class MCPSseClient:
return response.headers.get("Location", url)
# Don't treat 404 as an error here - let the main connection handle it
except (httpx.RequestError, httpx.HTTPError) as e:
- logger.warning(f"Error checking redirects: {e}")
+ await logger.awarning(f"Error checking redirects: {e}")
return url
async def _connect_to_server(
@@ -1336,7 +1338,7 @@ class MCPSseClient:
for attempt in range(max_retries):
try:
- logger.debug(f"Attempting to run tool '{tool_name}' (attempt {attempt + 1}/{max_retries})")
+ await logger.adebug(f"Attempting to run tool '{tool_name}' (attempt {attempt + 1}/{max_retries})")
# Get or create persistent session
session = await self._get_or_create_session()
@@ -1349,7 +1351,7 @@ class MCPSseClient:
)
except Exception as e:
current_error_type = type(e).__name__
- logger.warning(f"Tool '{tool_name}' failed on attempt {attempt + 1}: {current_error_type} - {e}")
+ await logger.awarning(f"Tool '{tool_name}' failed on attempt {attempt + 1}: {current_error_type} - {e}")
# Import specific MCP error types for detection
try:
@@ -1367,14 +1369,14 @@ class MCPSseClient:
# If we're getting the same error type repeatedly, don't retry
if last_error_type == current_error_type and attempt > 0:
- logger.error(f"Repeated {current_error_type} error for tool '{tool_name}', not retrying")
+ await logger.aerror(f"Repeated {current_error_type} error for tool '{tool_name}', not retrying")
break
last_error_type = current_error_type
# If it's a connection error (ClosedResourceError or MCP connection closed) and we have retries left
if (is_closed_resource_error or is_mcp_connection_error) and attempt < max_retries - 1:
- logger.warning(
+ await logger.awarning(
f"MCP session connection issue for tool '{tool_name}', retrying with fresh session..."
)
# Clean up the dead session
@@ -1387,7 +1389,7 @@ class MCPSseClient:
# If it's a timeout error and we have retries left, try once more
if is_timeout_error and attempt < max_retries - 1:
- logger.warning(f"Tool '{tool_name}' timed out, retrying...")
+ await logger.awarning(f"Tool '{tool_name}' timed out, retrying...")
# Don't clean up session for timeouts, might just be a slow response
await asyncio.sleep(1.0)
continue
@@ -1400,7 +1402,7 @@ class MCPSseClient:
or is_timeout_error
):
msg = f"Failed to run tool '{tool_name}' after {attempt + 1} attempts: {e}"
- logger.error(msg)
+ await logger.aerror(msg)
# Clean up failed session from cache
if self._session_context and self._component_cache:
cache_key = f"mcp_session_sse_{self._session_context}"
@@ -1410,12 +1412,12 @@ class MCPSseClient:
# Re-raise unexpected errors
raise
else:
- logger.debug(f"Tool '{tool_name}' completed successfully")
+ await logger.adebug(f"Tool '{tool_name}' completed successfully")
return result
# This should never be reached due to the exception handling above
msg = f"Failed to run tool '{tool_name}': Maximum retries exceeded with repeated {last_error_type} errors"
- logger.error(msg)
+ await logger.aerror(msg)
raise ValueError(msg)
async def disconnect(self):
diff --git a/src/backend/base/langflow/base/models/model.py b/src/backend/base/langflow/base/models/model.py
index f8a13037b..f3fd5b9af 100644
--- a/src/backend/base/langflow/base/models/model.py
+++ b/src/backend/base/langflow/base/models/model.py
@@ -252,7 +252,7 @@ class LCModelComponent(Component):
if stream:
lf_message, result = await self._handle_stream(runnable, inputs)
else:
- message = runnable.invoke(inputs)
+ message = await runnable.ainvoke(inputs)
result = message.content if hasattr(message, "content") else message
if isinstance(message, AIMessage):
status_message = self.build_status_message(message)
@@ -288,7 +288,7 @@ class LCModelComponent(Component):
else:
session_id = None
model_message = Message(
- text=runnable.stream(inputs),
+ text=runnable.astream(inputs),
sender=MESSAGE_SENDER_AI,
sender_name="AI",
properties={"icon": self.icon, "state": "partial"},
@@ -298,7 +298,7 @@ class LCModelComponent(Component):
lf_message = await self.send_message(model_message)
result = lf_message.text
else:
- message = runnable.invoke(inputs)
+ message = await runnable.ainvoke(inputs)
result = message.content if hasattr(message, "content") else message
return lf_message, result
diff --git a/src/backend/base/langflow/base/models/openai_constants.py b/src/backend/base/langflow/base/models/openai_constants.py
index 29017790b..8eb028e72 100644
--- a/src/backend/base/langflow/base/models/openai_constants.py
+++ b/src/backend/base/langflow/base/models/openai_constants.py
@@ -2,6 +2,35 @@ from .model_metadata import create_model_metadata
# Unified model metadata - single source of truth
OPENAI_MODELS_DETAILED = [
+ # GPT-5 Series
+ create_model_metadata(
+ provider="OpenAI",
+ name="gpt-5",
+ icon="OpenAI",
+ tool_calling=True,
+ reasoning=True,
+ ),
+ create_model_metadata(
+ provider="OpenAI",
+ name="gpt-5-mini",
+ icon="OpenAI",
+ tool_calling=True,
+ reasoning=True,
+ ),
+ create_model_metadata(
+ provider="OpenAI",
+ name="gpt-5-nano",
+ icon="OpenAI",
+ tool_calling=True,
+ reasoning=True,
+ ),
+ create_model_metadata(
+ provider="OpenAI",
+ name="gpt-5-chat-latest",
+ icon="OpenAI",
+ tool_calling=False,
+ reasoning=True,
+ ),
# Regular OpenAI Models
create_model_metadata(provider="OpenAI", name="gpt-4o-mini", icon="OpenAI", tool_calling=True),
create_model_metadata(provider="OpenAI", name="gpt-4o", icon="OpenAI", tool_calling=True),
diff --git a/src/backend/base/langflow/base/prompts/api_utils.py b/src/backend/base/langflow/base/prompts/api_utils.py
index 150c740ce..c07a3c5ac 100644
--- a/src/backend/base/langflow/base/prompts/api_utils.py
+++ b/src/backend/base/langflow/base/prompts/api_utils.py
@@ -3,10 +3,10 @@ from typing import Any
from fastapi import HTTPException
from langchain_core.prompts import PromptTemplate
-from loguru import logger
from langflow.inputs.inputs import DefaultPromptField
from langflow.interface.utils import extract_input_variables_from_prompt
+from langflow.logging.logger import logger
_INVALID_CHARACTERS = {
" ",
diff --git a/src/backend/base/langflow/base/tools/flow_tool.py b/src/backend/base/langflow/base/tools/flow_tool.py
index 53a43c666..c661af42c 100644
--- a/src/backend/base/langflow/base/tools/flow_tool.py
+++ b/src/backend/base/langflow/base/tools/flow_tool.py
@@ -3,13 +3,13 @@ from __future__ import annotations
from typing import TYPE_CHECKING, Any
from langchain_core.tools import BaseTool, ToolException
-from loguru import logger
from typing_extensions import override
from langflow.base.flow_processing.utils import build_data_from_result_data, format_flow_output_data
from langflow.graph.graph.base import Graph # cannot be a part of TYPE_CHECKING # noqa: TC001
from langflow.graph.vertex.base import Vertex # cannot be a part of TYPE_CHECKING # noqa: TC001
from langflow.helpers.flow import build_schema_from_inputs, get_arg_names, get_flow_inputs, run_flow
+from langflow.logging.logger import logger
from langflow.utils.async_helpers import run_until_complete
if TYPE_CHECKING:
@@ -109,7 +109,7 @@ class FlowTool(BaseTool):
try:
run_id = self.graph.run_id if hasattr(self, "graph") and self.graph else None
except Exception: # noqa: BLE001
- logger.opt(exception=True).warning("Failed to set run_id")
+ logger.warning("Failed to set run_id", exc_info=True)
run_id = None
run_outputs = await run_flow(
tweaks={key: {"input_value": value} for key, value in tweaks.items()},
diff --git a/src/backend/base/langflow/base/tools/run_flow.py b/src/backend/base/langflow/base/tools/run_flow.py
index f05c6b6f5..d7fdf3aba 100644
--- a/src/backend/base/langflow/base/tools/run_flow.py
+++ b/src/backend/base/langflow/base/tools/run_flow.py
@@ -1,7 +1,6 @@
from abc import abstractmethod
from typing import TYPE_CHECKING
-from loguru import logger
from typing_extensions import override
from langflow.custom.custom_component.component import Component, _get_component_toolkit
@@ -9,11 +8,8 @@ from langflow.field_typing import Tool
from langflow.graph.graph.base import Graph
from langflow.graph.vertex.base import Vertex
from langflow.helpers.flow import get_flow_inputs
-from langflow.inputs.inputs import (
- DropdownInput,
- InputTypes,
- MessageInput,
-)
+from langflow.inputs.inputs import DropdownInput, InputTypes, MessageInput
+from langflow.logging.logger import logger
from langflow.schema.data import Data
from langflow.schema.dataframe import DataFrame
from langflow.schema.dotdict import dotdict
diff --git a/src/backend/base/langflow/components/FAISS/__init__.py b/src/backend/base/langflow/components/FAISS/__init__.py
new file mode 100644
index 000000000..cb7246bbd
--- /dev/null
+++ b/src/backend/base/langflow/components/FAISS/__init__.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from langflow.components._importing import import_mod
+
+if TYPE_CHECKING:
+ from .faiss import FaissVectorStoreComponent
+
+_dynamic_imports = {
+ "FaissVectorStoreComponent": "faiss",
+}
+
+__all__ = [
+ "FaissVectorStoreComponent",
+]
+
+
+def __getattr__(attr_name: str) -> Any:
+ """Lazily import FAISS components on attribute access."""
+ if attr_name not in _dynamic_imports:
+ msg = f"module '{__name__}' has no attribute '{attr_name}'"
+ raise AttributeError(msg)
+ try:
+ result = import_mod(attr_name, _dynamic_imports[attr_name], __spec__.parent)
+ except (ModuleNotFoundError, ImportError, AttributeError) as e:
+ msg = f"Could not import '{attr_name}' from '{__name__}': {e}"
+ raise AttributeError(msg) from e
+ globals()[attr_name] = result
+ return result
+
+
+def __dir__() -> list[str]:
+ return list(__all__)
diff --git a/src/backend/base/langflow/components/vectorstores/faiss.py b/src/backend/base/langflow/components/FAISS/faiss.py
similarity index 100%
rename from src/backend/base/langflow/components/vectorstores/faiss.py
rename to src/backend/base/langflow/components/FAISS/faiss.py
diff --git a/src/backend/base/langflow/components/Notion/add_content_to_page.py b/src/backend/base/langflow/components/Notion/add_content_to_page.py
index ac9b7a98c..86bb25e20 100644
--- a/src/backend/base/langflow/components/Notion/add_content_to_page.py
+++ b/src/backend/base/langflow/components/Notion/add_content_to_page.py
@@ -4,13 +4,13 @@ from typing import Any
import requests
from bs4 import BeautifulSoup
from langchain.tools import StructuredTool
-from loguru import logger
from markdown import markdown
from pydantic import BaseModel, Field
from langflow.base.langchain_utilities.model import LCToolComponent
from langflow.field_typing import Tool
from langflow.inputs.inputs import MultilineInput, SecretStrInput, StrInput
+from langflow.logging.logger import logger
from langflow.schema.data import Data
MIN_ROWS_IN_TABLE = 3
@@ -84,7 +84,7 @@ class AddContentToPage(LCToolComponent):
error_message += f" Status code: {e.response.status_code}, Response: {e.response.text}"
return error_message
except Exception as e: # noqa: BLE001
- logger.opt(exception=True).debug("Error adding content to Notion page")
+ logger.debug("Error adding content to Notion page", exc_info=True)
return f"Error: An unexpected error occurred while adding content to Notion page. {e}"
def process_node(self, node):
diff --git a/src/backend/base/langflow/components/Notion/list_database_properties.py b/src/backend/base/langflow/components/Notion/list_database_properties.py
index 4c2961481..3ad9244d7 100644
--- a/src/backend/base/langflow/components/Notion/list_database_properties.py
+++ b/src/backend/base/langflow/components/Notion/list_database_properties.py
@@ -1,11 +1,11 @@
import requests
from langchain.tools import StructuredTool
-from loguru import logger
from pydantic import BaseModel, Field
from langflow.base.langchain_utilities.model import LCToolComponent
from langflow.field_typing import Tool
from langflow.inputs.inputs import SecretStrInput, StrInput
+from langflow.logging.logger import logger
from langflow.schema.data import Data
@@ -64,5 +64,5 @@ class NotionDatabaseProperties(LCToolComponent):
except ValueError as e:
return f"Error parsing Notion API response: {e}"
except Exception as e: # noqa: BLE001
- logger.opt(exception=True).debug("Error fetching Notion database properties")
+ logger.debug("Error fetching Notion database properties", exc_info=True)
return f"An unexpected error occurred: {e}"
diff --git a/src/backend/base/langflow/components/Notion/list_pages.py b/src/backend/base/langflow/components/Notion/list_pages.py
index b7691b86b..413358dd2 100644
--- a/src/backend/base/langflow/components/Notion/list_pages.py
+++ b/src/backend/base/langflow/components/Notion/list_pages.py
@@ -3,12 +3,12 @@ from typing import Any
import requests
from langchain.tools import StructuredTool
-from loguru import logger
from pydantic import BaseModel, Field
from langflow.base.langchain_utilities.model import LCToolComponent
from langflow.field_typing import Tool
from langflow.inputs.inputs import MultilineInput, SecretStrInput, StrInput
+from langflow.logging.logger import logger
from langflow.schema.data import Data
@@ -118,5 +118,5 @@ class NotionListPages(LCToolComponent):
except KeyError:
return "Unexpected response format from Notion API"
except Exception as e: # noqa: BLE001
- logger.opt(exception=True).debug("Error querying Notion database")
+ logger.debug("Error querying Notion database", exc_info=True)
return f"An unexpected error occurred: {e}"
diff --git a/src/backend/base/langflow/components/Notion/page_content_viewer.py b/src/backend/base/langflow/components/Notion/page_content_viewer.py
index c1287b773..664526a6b 100644
--- a/src/backend/base/langflow/components/Notion/page_content_viewer.py
+++ b/src/backend/base/langflow/components/Notion/page_content_viewer.py
@@ -1,11 +1,11 @@
import requests
from langchain.tools import StructuredTool
-from loguru import logger
from pydantic import BaseModel, Field
from langflow.base.langchain_utilities.model import LCToolComponent
from langflow.field_typing import Tool
from langflow.inputs.inputs import SecretStrInput, StrInput
+from langflow.logging.logger import logger
from langflow.schema.data import Data
@@ -65,7 +65,7 @@ class NotionPageContent(LCToolComponent):
error_message += f" Status code: {e.response.status_code}, Response: {e.response.text}"
return error_message
except Exception as e: # noqa: BLE001
- logger.opt(exception=True).debug("Error retrieving Notion page content")
+ logger.debug("Error retrieving Notion page content", exc_info=True)
return f"Error: An unexpected error occurred while retrieving Notion page content. {e}"
def parse_blocks(self, blocks: list) -> str:
diff --git a/src/backend/base/langflow/components/Notion/update_page_property.py b/src/backend/base/langflow/components/Notion/update_page_property.py
index 15a4a8228..749a3559e 100644
--- a/src/backend/base/langflow/components/Notion/update_page_property.py
+++ b/src/backend/base/langflow/components/Notion/update_page_property.py
@@ -3,12 +3,12 @@ from typing import Any
import requests
from langchain.tools import StructuredTool
-from loguru import logger
from pydantic import BaseModel, Field
from langflow.base.langchain_utilities.model import LCToolComponent
from langflow.field_typing import Tool
from langflow.inputs.inputs import MultilineInput, SecretStrInput, StrInput
+from langflow.logging.logger import logger
from langflow.schema.data import Data
diff --git a/src/backend/base/langflow/components/agentql/agentql_api.py b/src/backend/base/langflow/components/agentql/agentql_api.py
index 578c5e95d..adf99b8b3 100644
--- a/src/backend/base/langflow/components/agentql/agentql_api.py
+++ b/src/backend/base/langflow/components/agentql/agentql_api.py
@@ -1,17 +1,9 @@
import httpx
-from loguru import logger
from langflow.custom.custom_component.component import Component
from langflow.field_typing.range_spec import RangeSpec
-from langflow.io import (
- BoolInput,
- DropdownInput,
- IntInput,
- MessageTextInput,
- MultilineInput,
- Output,
- SecretStrInput,
-)
+from langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, MultilineInput, Output, SecretStrInput
+from langflow.logging.logger import logger
from langflow.schema.data import Data
diff --git a/src/backend/base/langflow/components/agents/agent.py b/src/backend/base/langflow/components/agents/agent.py
index e7b3db35d..d29e427a9 100644
--- a/src/backend/base/langflow/components/agents/agent.py
+++ b/src/backend/base/langflow/components/agents/agent.py
@@ -2,6 +2,7 @@ import json
import re
from langchain_core.tools import StructuredTool
+from pydantic import ValidationError
from langflow.base.agents.agent import LCToolsAgentComponent
from langflow.base.agents.events import ExceptionWithMessageError
@@ -19,11 +20,13 @@ from langflow.components.langchain_utilities.tool_calling import ToolCallingAgen
from langflow.custom.custom_component.component import _get_component_toolkit
from langflow.custom.utils import update_component_build_config
from langflow.field_typing import Tool
-from langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output
+from langflow.helpers.base_model import build_model_from_schema
+from langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput
from langflow.logging import logger
from langflow.schema.data import Data
from langflow.schema.dotdict import dotdict
from langflow.schema.message import Message
+from langflow.schema.table import EditMode
def set_advanced_true(component_input):
@@ -78,6 +81,67 @@ class AgentComponent(ToolCallingAgentComponent):
advanced=True,
show=True,
),
+ MultilineInput(
+ name="format_instructions",
+ display_name="Output Format Instructions",
+ info="Generic Template for structured output formatting. Valid only with Structured response.",
+ value=(
+ "You are an AI that extracts structured JSON objects from unstructured text. "
+ "Use a predefined schema with expected types (str, int, float, bool, dict). "
+ "Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. "
+ "Fill missing or ambiguous values with defaults: null for missing values. "
+ "Remove exact duplicates but keep variations that have different field values. "
+ "Always return valid JSON in the expected format, never throw errors. "
+ "If multiple objects can be extracted, return them all in the structured format."
+ ),
+ advanced=True,
+ ),
+ TableInput(
+ name="output_schema",
+ display_name="Output Schema",
+ info=(
+ "Schema Validation: Define the structure and data types for structured output. "
+ "No validation if no output schema."
+ ),
+ advanced=True,
+ required=False,
+ value=[],
+ table_schema=[
+ {
+ "name": "name",
+ "display_name": "Name",
+ "type": "str",
+ "description": "Specify the name of the output field.",
+ "default": "field",
+ "edit_mode": EditMode.INLINE,
+ },
+ {
+ "name": "description",
+ "display_name": "Description",
+ "type": "str",
+ "description": "Describe the purpose of the output field.",
+ "default": "description of field",
+ "edit_mode": EditMode.POPOVER,
+ },
+ {
+ "name": "type",
+ "display_name": "Type",
+ "type": "str",
+ "edit_mode": EditMode.INLINE,
+ "description": ("Indicate the data type of the output field (e.g., str, int, float, bool, dict)."),
+ "options": ["str", "int", "float", "bool", "dict"],
+ "default": "str",
+ },
+ {
+ "name": "multiple",
+ "display_name": "As List",
+ "type": "boolean",
+ "description": "Set to True if this output field should be a list of the specified type.",
+ "default": "False",
+ "edit_mode": EditMode.INLINE,
+ },
+ ],
+ ),
*LCToolsAgentComponent._base_inputs,
# removed memory inputs from agent component
# *memory_inputs,
@@ -94,31 +158,33 @@ class AgentComponent(ToolCallingAgentComponent):
Output(name="structured_response", display_name="Structured Response", method="json_response", tool_mode=False),
]
+ async def get_agent_requirements(self):
+ """Get the agent requirements for the agent."""
+ llm_model, display_name = await self.get_llm()
+ if llm_model is None:
+ msg = "No language model selected. Please choose a model to proceed."
+ raise ValueError(msg)
+ self.model_name = get_model_name(llm_model, display_name=display_name)
+
+ # Get memory data
+ self.chat_history = await self.get_memory_data()
+ if isinstance(self.chat_history, Message):
+ self.chat_history = [self.chat_history]
+
+ # Add current date tool if enabled
+ if self.add_current_date_tool:
+ if not isinstance(self.tools, list): # type: ignore[has-type]
+ self.tools = []
+ current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)
+ if not isinstance(current_date_tool, StructuredTool):
+ msg = "CurrentDateComponent must be converted to a StructuredTool"
+ raise TypeError(msg)
+ self.tools.append(current_date_tool)
+ return llm_model, self.chat_history, self.tools
+
async def message_response(self) -> Message:
try:
- # Get LLM model and validate
- llm_model, display_name = self.get_llm()
- if llm_model is None:
- msg = "No language model selected. Please choose a model to proceed."
- raise ValueError(msg)
- self.model_name = get_model_name(llm_model, display_name=display_name)
-
- # Get memory data
- self.chat_history = await self.get_memory_data()
- if isinstance(self.chat_history, Message):
- self.chat_history = [self.chat_history]
-
- # Add current date tool if enabled
- if self.add_current_date_tool:
- if not isinstance(self.tools, list): # type: ignore[has-type]
- self.tools = []
- current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)
- if not isinstance(current_date_tool, StructuredTool):
- msg = "CurrentDateComponent must be converted to a StructuredTool"
- raise TypeError(msg)
- self.tools.append(current_date_tool)
- # note the tools are not required to run the agent, hence the validation removed.
-
+ llm_model, self.chat_history, self.tools = await self.get_agent_requirements()
# Set up and run agent
self.set(
llm=llm_model,
@@ -132,52 +198,180 @@ class AgentComponent(ToolCallingAgentComponent):
# Store result for potential JSON output
self._agent_result = result
- # return result
except (ValueError, TypeError, KeyError) as e:
- logger.error(f"{type(e).__name__}: {e!s}")
+ await logger.aerror(f"{type(e).__name__}: {e!s}")
raise
except ExceptionWithMessageError as e:
- logger.error(f"ExceptionWithMessageError occurred: {e}")
+ await logger.aerror(f"ExceptionWithMessageError occurred: {e}")
raise
+ # Avoid catching blind Exception; let truly unexpected exceptions propagate
except Exception as e:
- logger.error(f"Unexpected error: {e!s}")
+ await logger.aerror(f"Unexpected error: {e!s}")
raise
else:
return result
- async def json_response(self) -> Data:
- """Convert agent response to structured JSON Data output."""
- # Run the regular message response first to get the result
- if not hasattr(self, "_agent_result"):
- await self.message_response()
+ def _preprocess_schema(self, schema):
+ """Preprocess schema to ensure correct data types for build_model_from_schema."""
+ processed_schema = []
+ for field in schema:
+ processed_field = {
+ "name": str(field.get("name", "field")),
+ "type": str(field.get("type", "str")),
+ "description": str(field.get("description", "")),
+ "multiple": field.get("multiple", False),
+ }
+ # Ensure multiple is handled correctly
+ if isinstance(processed_field["multiple"], str):
+ processed_field["multiple"] = processed_field["multiple"].lower() in ["true", "1", "t", "y", "yes"]
+ processed_schema.append(processed_field)
+ return processed_schema
- result = self._agent_result
+ async def build_structured_output_base(self, content: str):
+ """Build structured output with optional BaseModel validation."""
+ json_pattern = r"\{.*\}"
+ schema_error_msg = "Try setting an output schema"
- # Extract content from result
- if hasattr(result, "content"):
- content = result.content
- elif hasattr(result, "text"):
- content = result.text
- else:
- content = str(result)
-
- # Try to parse as JSON
+ # Try to parse content as JSON first
+ json_data = None
try:
json_data = json.loads(content)
- return Data(data=json_data)
except json.JSONDecodeError:
- # If it's not valid JSON, try to extract JSON from the content
- json_match = re.search(r"\{.*\}", content, re.DOTALL)
+ json_match = re.search(json_pattern, content, re.DOTALL)
if json_match:
try:
json_data = json.loads(json_match.group())
- return Data(data=json_data)
except json.JSONDecodeError:
- pass
+ return {"content": content, "error": schema_error_msg}
+ else:
+ return {"content": content, "error": schema_error_msg}
- # If we can't extract JSON, return the raw content as data
- return Data(data={"content": content, "error": "Could not parse as JSON"})
+ # If no output schema provided, return parsed JSON without validation
+ if not hasattr(self, "output_schema") or not self.output_schema or len(self.output_schema) == 0:
+ return json_data
+
+ # Use BaseModel validation with schema
+ try:
+ processed_schema = self._preprocess_schema(self.output_schema)
+ output_model = build_model_from_schema(processed_schema)
+
+ # Validate against the schema
+ if isinstance(json_data, list):
+ # Multiple objects
+ validated_objects = []
+ for item in json_data:
+ try:
+ validated_obj = output_model.model_validate(item)
+ validated_objects.append(validated_obj.model_dump())
+ except ValidationError as e:
+ await logger.aerror(f"Validation error for item: {e}")
+ # Include invalid items with error info
+ validated_objects.append({"data": item, "validation_error": str(e)})
+ return validated_objects
+
+ # Single object
+ try:
+ validated_obj = output_model.model_validate(json_data)
+ return [validated_obj.model_dump()] # Return as list for consistency
+ except ValidationError as e:
+ await logger.aerror(f"Validation error: {e}")
+ return [{"data": json_data, "validation_error": str(e)}]
+
+ except (TypeError, ValueError) as e:
+ await logger.aerror(f"Error building structured output: {e}")
+ # Fallback to parsed JSON without validation
+ return json_data
+
+ async def json_response(self) -> Data:
+ """Convert agent response to structured JSON Data output with schema validation."""
+ # Always use structured chat agent for JSON response mode for better JSON formatting
+ try:
+ system_components = []
+
+ # 1. Agent Instructions (system_prompt)
+ agent_instructions = getattr(self, "system_prompt", "") or ""
+ if agent_instructions:
+ system_components.append(f"{agent_instructions}")
+
+ # 2. Format Instructions
+ format_instructions = getattr(self, "format_instructions", "") or ""
+ if format_instructions:
+ system_components.append(f"Format instructions: {format_instructions}")
+
+ # 3. Schema Information from BaseModel
+ if hasattr(self, "output_schema") and self.output_schema and len(self.output_schema) > 0:
+ try:
+ processed_schema = self._preprocess_schema(self.output_schema)
+ output_model = build_model_from_schema(processed_schema)
+ schema_dict = output_model.model_json_schema()
+ schema_info = (
+ "You are given some text that may include format instructions, "
+ "explanations, or other content alongside a JSON schema.\n\n"
+ "Your task:\n"
+ "- Extract only the JSON schema.\n"
+ "- Return it as valid JSON.\n"
+ "- Do not include format instructions, explanations, or extra text.\n\n"
+ "Input:\n"
+ f"{json.dumps(schema_dict, indent=2)}\n\n"
+ "Output (only JSON schema):"
+ )
+ system_components.append(schema_info)
+ except (ValidationError, ValueError, TypeError, KeyError) as e:
+ await logger.aerror(f"Could not build schema for prompt: {e}", exc_info=True)
+
+ # Combine all components
+ combined_instructions = "\n\n".join(system_components) if system_components else ""
+ llm_model, self.chat_history, self.tools = await self.get_agent_requirements()
+ self.set(
+ llm=llm_model,
+ tools=self.tools or [],
+ chat_history=self.chat_history,
+ input_value=self.input_value,
+ system_prompt=combined_instructions,
+ )
+
+ # Create and run structured chat agent
+ try:
+ structured_agent = self.create_agent_runnable()
+ except (NotImplementedError, ValueError, TypeError) as e:
+ await logger.aerror(f"Error with structured chat agent: {e}")
+ raise
+ try:
+ result = await self.run_agent(structured_agent)
+ except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:
+ await logger.aerror(f"Error with structured agent result: {e}")
+ raise
+ # Extract content from structured agent result
+ if hasattr(result, "content"):
+ content = result.content
+ elif hasattr(result, "text"):
+ content = result.text
+ else:
+ content = str(result)
+
+ except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:
+ await logger.aerror(f"Error with structured chat agent: {e}")
+ # Fallback to regular agent
+ content_str = "No content returned from agent"
+ return Data(data={"content": content_str, "error": str(e)})
+
+ # Process with structured output validation
+ try:
+ structured_output = await self.build_structured_output_base(content)
+
+ # Handle different output formats
+ if isinstance(structured_output, list) and structured_output:
+ if len(structured_output) == 1:
+ return Data(data=structured_output[0])
+ return Data(data={"results": structured_output})
+ if isinstance(structured_output, dict):
+ return Data(data=structured_output)
+ return Data(data={"content": content})
+
+ except (ValueError, TypeError) as e:
+ await logger.aerror(f"Error in structured output processing: {e}")
+ return Data(data={"content": content, "error": str(e)})
async def get_memory_data(self):
# TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.
@@ -190,7 +384,7 @@ class AgentComponent(ToolCallingAgentComponent):
message for message in messages if getattr(message, "id", None) != getattr(self.input_value, "id", None)
]
- def get_llm(self):
+ async def get_llm(self):
if not isinstance(self.agent_llm, str):
return self.agent_llm, None
@@ -207,8 +401,8 @@ class AgentComponent(ToolCallingAgentComponent):
return self._build_llm_model(component_class, inputs, prefix), display_name
- except Exception as e:
- logger.error(f"Error building {self.agent_llm} language model: {e!s}")
+ except (AttributeError, ValueError, TypeError, RuntimeError) as e:
+ await logger.aerror(f"Error building {self.agent_llm} language model: {e!s}")
msg = f"Failed to initialize language model: {e!s}"
raise ValueError(msg) from e
@@ -289,6 +483,7 @@ class AgentComponent(ToolCallingAgentComponent):
build_config.update(fields_to_add)
# Reset input types for agent_llm
build_config["agent_llm"]["input_types"] = []
+ build_config["agent_llm"]["display_name"] = "Model Provider"
elif field_value == "Custom":
# Delete all provider fields
self.delete_fields(build_config, ALL_PROVIDER_FIELDS)
diff --git a/src/backend/base/langflow/components/agents/mcp_component.py b/src/backend/base/langflow/components/agents/mcp_component.py
index fe3c2c69e..bdb5a99e5 100644
--- a/src/backend/base/langflow/components/agents/mcp_component.py
+++ b/src/backend/base/langflow/components/agents/mcp_component.py
@@ -21,11 +21,10 @@ from langflow.io.schema import flatten_schema, schema_to_langflow_inputs
from langflow.logging import logger
from langflow.schema.dataframe import DataFrame
from langflow.schema.message import Message
-from langflow.services.auth.utils import create_user_longterm_token
# Import get_server from the backend API
from langflow.services.database.models.user.crud import get_user_by_id
-from langflow.services.deps import get_session, get_settings_service, get_storage_service
+from langflow.services.deps import get_settings_service, get_storage_service, session_scope
class MCPToolsComponent(ComponentWithCache):
@@ -118,12 +117,12 @@ class MCPToolsComponent(ComponentWithCache):
schema_inputs = schema_to_langflow_inputs(input_schema)
if not schema_inputs:
msg = f"No input parameters defined for tool '{tool_obj.name}'"
- logger.warning(msg)
+ await logger.awarning(msg)
return []
except Exception as e:
msg = f"Error validating schema inputs: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise ValueError(msg) from e
else:
return schema_inputs
@@ -154,9 +153,11 @@ class MCPToolsComponent(ComponentWithCache):
return self.tools, {"name": server_name, "config": server_config_from_value}
try:
- async for db in get_session():
- user_id, _ = await create_user_longterm_token(db)
- current_user = await get_user_by_id(db, user_id)
+ async with session_scope() as db:
+ if not self.user_id:
+ msg = "User ID is required for fetching MCP tools."
+ raise ValueError(msg)
+ current_user = await get_user_by_id(db, self.user_id)
# Try to get server config from DB/API
server_config = await get_server(
@@ -167,47 +168,48 @@ class MCPToolsComponent(ComponentWithCache):
settings_service=get_settings_service(),
)
- # If get_server returns empty but we have a config, use it
- if not server_config and server_config_from_value:
- server_config = server_config_from_value
+ # If get_server returns empty but we have a config, use it
+ if not server_config and server_config_from_value:
+ server_config = server_config_from_value
- if not server_config:
- self.tools = []
- return [], {"name": server_name, "config": server_config}
+ if not server_config:
+ self.tools = []
+ return [], {"name": server_name, "config": server_config}
- _, tool_list, tool_cache = await update_tools(
- server_name=server_name,
- server_config=server_config,
- mcp_stdio_client=self.stdio_client,
- mcp_sse_client=self.sse_client,
- )
+ _, tool_list, tool_cache = await update_tools(
+ server_name=server_name,
+ server_config=server_config,
+ mcp_stdio_client=self.stdio_client,
+ mcp_sse_client=self.sse_client,
+ )
- self.tool_names = [tool.name for tool in tool_list if hasattr(tool, "name")]
- self._tool_cache = tool_cache
- self.tools = tool_list
- # Cache the result using shared cache
- cache_data = {
- "tools": tool_list,
- "tool_names": self.tool_names,
- "tool_cache": tool_cache,
- "config": server_config,
- }
+ self.tool_names = [tool.name for tool in tool_list if hasattr(tool, "name")]
+ self._tool_cache = tool_cache
+ self.tools = tool_list
+ # Cache the result using shared cache
+ cache_data = {
+ "tools": tool_list,
+ "tool_names": self.tool_names,
+ "tool_cache": tool_cache,
+ "config": server_config,
+ }
- # Safely update the servers cache
- current_servers_cache = safe_cache_get(self._shared_component_cache, "servers", {})
- if isinstance(current_servers_cache, dict):
- current_servers_cache[server_name] = cache_data
- safe_cache_set(self._shared_component_cache, "servers", current_servers_cache)
+ # Safely update the servers cache
+ current_servers_cache = safe_cache_get(self._shared_component_cache, "servers", {})
+ if isinstance(current_servers_cache, dict):
+ current_servers_cache[server_name] = cache_data
+ safe_cache_set(self._shared_component_cache, "servers", current_servers_cache)
- return tool_list, {"name": server_name, "config": server_config}
except (TimeoutError, asyncio.TimeoutError) as e:
msg = f"Timeout updating tool list: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise TimeoutError(msg) from e
except Exception as e:
msg = f"Error updating tool list: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise ValueError(msg) from e
+ else:
+ return tool_list, {"name": server_name, "config": server_config}
async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:
"""Toggle the visibility of connection-specific fields based on the selected mode."""
@@ -221,7 +223,7 @@ class MCPToolsComponent(ComponentWithCache):
build_config["tool"]["placeholder"] = "Select a tool"
except (TimeoutError, asyncio.TimeoutError) as e:
msg = f"Timeout updating tool list: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
if not build_config["tools_metadata"]["show"]:
build_config["tool"]["show"] = True
build_config["tool"]["options"] = []
@@ -247,7 +249,7 @@ class MCPToolsComponent(ComponentWithCache):
break
if tool_obj is None:
msg = f"Tool {field_value} not found in available tools: {self.tools}"
- logger.warning(msg)
+ await logger.awarning(msg)
return build_config
await self._update_tool_config(build_config, field_value)
except Exception as e:
@@ -331,7 +333,7 @@ class MCPToolsComponent(ComponentWithCache):
except Exception as e:
msg = f"Error in update_build_config: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise ValueError(msg) from e
else:
return build_config
@@ -384,7 +386,7 @@ class MCPToolsComponent(ComponentWithCache):
msg = f"Tool {tool_name} not found in available tools: {self.tools}"
self.remove_non_default_keys(build_config)
build_config["tool"]["value"] = ""
- logger.warning(msg)
+ await logger.awarning(msg)
return
try:
@@ -402,14 +404,14 @@ class MCPToolsComponent(ComponentWithCache):
self.schema_inputs = await self._validate_schema_inputs(tool_obj)
if not self.schema_inputs:
msg = f"No input parameters to configure for tool '{tool_name}'"
- logger.info(msg)
+ await logger.ainfo(msg)
return
# Add new inputs to build config
for schema_input in self.schema_inputs:
if not schema_input or not hasattr(schema_input, "name"):
msg = "Invalid schema input detected, skipping"
- logger.warning(msg)
+ await logger.awarning(msg)
continue
try:
@@ -426,16 +428,16 @@ class MCPToolsComponent(ComponentWithCache):
except (AttributeError, KeyError, TypeError) as e:
msg = f"Error processing schema input {schema_input}: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
continue
except ValueError as e:
msg = f"Schema validation error for tool {tool_name}: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
self.schema_inputs = []
return
except (AttributeError, KeyError, TypeError) as e:
msg = f"Error updating tool config: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise ValueError(msg) from e
async def build_output(self) -> DataFrame:
@@ -472,7 +474,7 @@ class MCPToolsComponent(ComponentWithCache):
return DataFrame(data=[{"error": "You must select a tool"}])
except Exception as e:
msg = f"Error in build_output: {e!s}"
- logger.exception(msg)
+ await logger.aexception(msg)
raise ValueError(msg) from e
def _get_session_context(self) -> str | None:
diff --git a/src/backend/base/langflow/components/anthropic/anthropic.py b/src/backend/base/langflow/components/anthropic/anthropic.py
index f0a5144df..2cd427da0 100644
--- a/src/backend/base/langflow/components/anthropic/anthropic.py
+++ b/src/backend/base/langflow/components/anthropic/anthropic.py
@@ -1,7 +1,6 @@
from typing import Any, cast
import requests
-from loguru import logger
from pydantic import ValidationError
from langflow.base.models.anthropic_constants import (
@@ -14,6 +13,7 @@ from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.field_typing.range_spec import RangeSpec
from langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, SecretStrInput, SliderInput
+from langflow.logging.logger import logger
from langflow.schema.dotdict import dotdict
@@ -101,7 +101,7 @@ class AnthropicModelComponent(LCModelComponent):
return output
- def get_models(self, tool_model_enabled: bool | None = None) -> list[str]:
+ def get_models(self, *, tool_model_enabled: bool | None = None) -> list[str]:
try:
import anthropic
@@ -129,7 +129,7 @@ class AnthropicModelComponent(LCModelComponent):
model_with_tool = ChatAnthropic(
model=model, # Use the current model being checked
anthropic_api_key=self.api_key,
- anthropic_api_url=cast(str, self.base_url) or DEFAULT_ANTHROPIC_API_URL,
+ anthropic_api_url=cast("str", self.base_url) or DEFAULT_ANTHROPIC_API_URL,
)
if (
@@ -177,8 +177,9 @@ class AnthropicModelComponent(LCModelComponent):
except (ImportError, ValueError, requests.exceptions.RequestException) as e:
logger.exception(f"Error getting model names: {e}")
ids = ANTHROPIC_MODELS
+ build_config.setdefault("model_name", {})
build_config["model_name"]["options"] = ids
- build_config["model_name"]["value"] = ids[0]
+ build_config["model_name"].setdefault("value", ids[0])
build_config["model_name"]["combobox"] = True
except Exception as e:
msg = f"Error getting model names: {e}"
diff --git a/src/backend/base/langflow/components/assemblyai/assemblyai_get_subtitles.py b/src/backend/base/langflow/components/assemblyai/assemblyai_get_subtitles.py
index 3d477497f..8d9c4caee 100644
--- a/src/backend/base/langflow/components/assemblyai/assemblyai_get_subtitles.py
+++ b/src/backend/base/langflow/components/assemblyai/assemblyai_get_subtitles.py
@@ -1,8 +1,8 @@
import assemblyai as aai
-from loguru import logger
from langflow.custom.custom_component.component import Component
from langflow.io import DataInput, DropdownInput, IntInput, Output, SecretStrInput
+from langflow.logging.logger import logger
from langflow.schema.data import Data
@@ -58,7 +58,7 @@ class AssemblyAIGetSubtitles(Component):
transcript = aai.Transcript.get_by_id(transcript_id)
except Exception as e: # noqa: BLE001
error = f"Getting transcription failed: {e}"
- logger.opt(exception=True).debug(error)
+ logger.debug(error, exc_info=True)
self.status = error
return Data(data={"error": error})
diff --git a/src/backend/base/langflow/components/assemblyai/assemblyai_lemur.py b/src/backend/base/langflow/components/assemblyai/assemblyai_lemur.py
index ec5bbed5a..94152e2d0 100644
--- a/src/backend/base/langflow/components/assemblyai/assemblyai_lemur.py
+++ b/src/backend/base/langflow/components/assemblyai/assemblyai_lemur.py
@@ -1,8 +1,8 @@
import assemblyai as aai
-from loguru import logger
from langflow.custom.custom_component.component import Component
from langflow.io import DataInput, DropdownInput, FloatInput, IntInput, MultilineInput, Output, SecretStrInput
+from langflow.logging.logger import logger
from langflow.schema.data import Data
@@ -131,7 +131,7 @@ class AssemblyAILeMUR(Component):
try:
response = self.perform_lemur_action(transcript_group, self.endpoint)
except Exception as e: # noqa: BLE001
- logger.opt(exception=True).debug("Error running LeMUR")
+ logger.debug("Error running LeMUR", exc_info=True)
error = f"An Error happened: {e}"
self.status = error
return Data(data={"error": error})
diff --git a/src/backend/base/langflow/components/assemblyai/assemblyai_list_transcripts.py b/src/backend/base/langflow/components/assemblyai/assemblyai_list_transcripts.py
index a9c101b0a..eb9033163 100644
--- a/src/backend/base/langflow/components/assemblyai/assemblyai_list_transcripts.py
+++ b/src/backend/base/langflow/components/assemblyai/assemblyai_list_transcripts.py
@@ -1,8 +1,8 @@
import assemblyai as aai
-from loguru import logger
from langflow.custom.custom_component.component import Component
from langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput
+from langflow.logging.logger import logger
from langflow.schema.data import Data
@@ -86,7 +86,7 @@ class AssemblyAIListTranscripts(Component):
transcripts = convert_page_to_data_list(page)
except Exception as e: # noqa: BLE001
- logger.opt(exception=True).debug("Error listing transcripts")
+ logger.debug("Error listing transcripts", exc_info=True)
error_data = Data(data={"error": f"An error occurred: {e}"})
self.status = [error_data]
return [error_data]
diff --git a/src/backend/base/langflow/components/assemblyai/assemblyai_poll_transcript.py b/src/backend/base/langflow/components/assemblyai/assemblyai_poll_transcript.py
index e3795f849..38982402b 100644
--- a/src/backend/base/langflow/components/assemblyai/assemblyai_poll_transcript.py
+++ b/src/backend/base/langflow/components/assemblyai/assemblyai_poll_transcript.py
@@ -1,9 +1,9 @@
import assemblyai as aai
-from loguru import logger
from langflow.custom.custom_component.component import Component
from langflow.field_typing.range_spec import RangeSpec
from langflow.io import DataInput, FloatInput, Output, SecretStrInput
+from langflow.logging.logger import logger
from langflow.schema.data import Data
@@ -54,7 +54,7 @@ class AssemblyAITranscriptionJobPoller(Component):
transcript = aai.Transcript.get_by_id(self.transcript_id.data["transcript_id"])
except Exception as e: # noqa: BLE001
error = f"Getting transcription failed: {e}"
- logger.opt(exception=True).debug(error)
+ logger.debug(error, exc_info=True)
self.status = error
return Data(data={"error": error})
diff --git a/src/backend/base/langflow/components/assemblyai/assemblyai_start_transcript.py b/src/backend/base/langflow/components/assemblyai/assemblyai_start_transcript.py
index 36da3e3cc..470c2cd46 100644
--- a/src/backend/base/langflow/components/assemblyai/assemblyai_start_transcript.py
+++ b/src/backend/base/langflow/components/assemblyai/assemblyai_start_transcript.py
@@ -1,10 +1,10 @@
from pathlib import Path
import assemblyai as aai
-from loguru import logger
from langflow.custom.custom_component.component import Component
from langflow.io import BoolInput, DropdownInput, FileInput, MessageTextInput, Output, SecretStrInput
+from langflow.logging.logger import logger
from langflow.schema.data import Data
@@ -176,7 +176,7 @@ class AssemblyAITranscriptionJobCreator(Component):
try:
transcript = aai.Transcriber().submit(audio, config=config)
except Exception as e: # noqa: BLE001
- logger.opt(exception=True).debug("Error submitting transcription job")
+ logger.debug("Error submitting transcription job", exc_info=True)
self.status = f"An error occurred: {e}"
return Data(data={"error": f"An error occurred: {e}"})
diff --git a/src/backend/base/langflow/components/cassandra/__init__.py b/src/backend/base/langflow/components/cassandra/__init__.py
new file mode 100644
index 000000000..5a07a6d13
--- /dev/null
+++ b/src/backend/base/langflow/components/cassandra/__init__.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from langflow.components._importing import import_mod
+
+if TYPE_CHECKING:
+ from .cassandra import CassandraVectorStoreComponent
+ from .cassandra_chat import CassandraChatMemory
+ from .cassandra_graph import CassandraGraphVectorStoreComponent
+
+_dynamic_imports = {
+ "CassandraVectorStoreComponent": "cassandra",
+ "CassandraGraphVectorStoreComponent": "cassandra_graph",
+ "CassandraChatMemory": "cassandra_chat",
+}
+
+__all__ = [
+ "CassandraChatMemory",
+ "CassandraGraphVectorStoreComponent",
+ "CassandraVectorStoreComponent",
+]
+
+
+def __getattr__(attr_name: str) -> Any:
+ """Lazily import Cassandra components on attribute access."""
+ if attr_name not in _dynamic_imports:
+ msg = f"module '{__name__}' has no attribute '{attr_name}'"
+ raise AttributeError(msg)
+ try:
+ result = import_mod(attr_name, _dynamic_imports[attr_name], __spec__.parent)
+ except (ModuleNotFoundError, ImportError, AttributeError) as e:
+ msg = f"Could not import '{attr_name}' from '{__name__}': {e}"
+ raise AttributeError(msg) from e
+ globals()[attr_name] = result
+ return result
+
+
+def __dir__() -> list[str]:
+ return list(__all__)
diff --git a/src/backend/base/langflow/components/vectorstores/cassandra.py b/src/backend/base/langflow/components/cassandra/cassandra.py
similarity index 100%
rename from src/backend/base/langflow/components/vectorstores/cassandra.py
rename to src/backend/base/langflow/components/cassandra/cassandra.py
diff --git a/src/backend/base/langflow/components/datastax/cassandra.py b/src/backend/base/langflow/components/cassandra/cassandra_chat.py
similarity index 100%
rename from src/backend/base/langflow/components/datastax/cassandra.py
rename to src/backend/base/langflow/components/cassandra/cassandra_chat.py
diff --git a/src/backend/base/langflow/components/vectorstores/cassandra_graph.py b/src/backend/base/langflow/components/cassandra/cassandra_graph.py
similarity index 100%
rename from src/backend/base/langflow/components/vectorstores/cassandra_graph.py
rename to src/backend/base/langflow/components/cassandra/cassandra_graph.py
diff --git a/src/backend/base/langflow/components/chroma/__init__.py b/src/backend/base/langflow/components/chroma/__init__.py
new file mode 100644
index 000000000..2bd5f2324
--- /dev/null
+++ b/src/backend/base/langflow/components/chroma/__init__.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from langflow.components._importing import import_mod
+
+if TYPE_CHECKING:
+ from .chroma import ChromaVectorStoreComponent
+
+_dynamic_imports = {
+ "ChromaVectorStoreComponent": "chroma",
+}
+
+__all__ = [
+ "ChromaVectorStoreComponent",
+]
+
+
+def __getattr__(attr_name: str) -> Any:
+ """Lazily import Chroma components on attribute access."""
+ if attr_name not in _dynamic_imports:
+ msg = f"module '{__name__}' has no attribute '{attr_name}'"
+ raise AttributeError(msg)
+ try:
+ result = import_mod(attr_name, _dynamic_imports[attr_name], __spec__.parent)
+ except (ModuleNotFoundError, ImportError, AttributeError) as e:
+ msg = f"Could not import '{attr_name}' from '{__name__}': {e}"
+ raise AttributeError(msg) from e
+ globals()[attr_name] = result
+ return result
+
+
+def __dir__() -> list[str]:
+ return list(__all__)
diff --git a/src/backend/base/langflow/components/vectorstores/chroma.py b/src/backend/base/langflow/components/chroma/chroma.py
similarity index 100%
rename from src/backend/base/langflow/components/vectorstores/chroma.py
rename to src/backend/base/langflow/components/chroma/chroma.py
diff --git a/src/backend/base/langflow/components/clickhouse/__init__.py b/src/backend/base/langflow/components/clickhouse/__init__.py
new file mode 100644
index 000000000..2245a7d39
--- /dev/null
+++ b/src/backend/base/langflow/components/clickhouse/__init__.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from langflow.components._importing import import_mod
+
+if TYPE_CHECKING:
+ from .clickhouse import ClickhouseVectorStoreComponent
+
+_dynamic_imports = {
+ "ClickhouseVectorStoreComponent": "clickhouse",
+}
+
+__all__ = [
+ "ClickhouseVectorStoreComponent",
+]
+
+
+def __getattr__(attr_name: str) -> Any:
+ """Lazily import ClickHouse components on attribute access."""
+ if attr_name not in _dynamic_imports:
+ msg = f"module '{__name__}' has no attribute '{attr_name}'"
+ raise AttributeError(msg)
+ try:
+ result = import_mod(attr_name, _dynamic_imports[attr_name], __spec__.parent)
+ except (ModuleNotFoundError, ImportError, AttributeError) as e:
+ msg = f"Could not import '{attr_name}' from '{__name__}': {e}"
+ raise AttributeError(msg) from e
+ globals()[attr_name] = result
+ return result
+
+
+def __dir__() -> list[str]:
+ return list(__all__)
diff --git a/src/backend/base/langflow/components/vectorstores/clickhouse.py b/src/backend/base/langflow/components/clickhouse/clickhouse.py
similarity index 100%
rename from src/backend/base/langflow/components/vectorstores/clickhouse.py
rename to src/backend/base/langflow/components/clickhouse/clickhouse.py
diff --git a/src/backend/base/langflow/components/composio/__init__.py b/src/backend/base/langflow/components/composio/__init__.py
index d9afb88aa..ca8b64314 100644
--- a/src/backend/base/langflow/components/composio/__init__.py
+++ b/src/backend/base/langflow/components/composio/__init__.py
@@ -9,16 +9,32 @@ if TYPE_CHECKING:
from .github_composio import ComposioGitHubAPIComponent
from .gmail_composio import ComposioGmailAPIComponent
from .googlecalendar_composio import ComposioGoogleCalendarAPIComponent
+ from .googlemeet_composio import ComposioGooglemeetAPIComponent
+ from .googletasks_composio import ComposioGoogleTasksAPIComponent
+ from .linear_composio import ComposioLinearAPIComponent
from .outlook_composio import ComposioOutlookAPIComponent
+ from .reddit_composio import ComposioRedditAPIComponent
from .slack_composio import ComposioSlackAPIComponent
+ from .slackbot_composio import ComposioSlackbotAPIComponent
+ from .supabase_composio import ComposioSupabaseAPIComponent
+ from .todoist_composio import ComposioTodoistAPIComponent
+ from .youtube_composio import ComposioYoutubeAPIComponent
_dynamic_imports = {
"ComposioAPIComponent": "composio_api",
"ComposioGitHubAPIComponent": "github_composio",
"ComposioGmailAPIComponent": "gmail_composio",
"ComposioGoogleCalendarAPIComponent": "googlecalendar_composio",
+ "ComposioGooglemeetAPIComponent": "googlemeet_composio",
"ComposioOutlookAPIComponent": "outlook_composio",
"ComposioSlackAPIComponent": "slack_composio",
+ "ComposioGoogleTasksAPIComponent": "googletasks_composio",
+ "ComposioLinearAPIComponent": "linear_composio",
+ "ComposioRedditAPIComponent": "reddit_composio",
+ "ComposioSlackbotAPIComponent": "slackbot_composio",
+ "ComposioSupabaseAPIComponent": "supabase_composio",
+ "ComposioTodoistAPIComponent": "todoist_composio",
+ "ComposioYoutubeAPIComponent": "youtube_composio",
}
__all__ = [
@@ -26,8 +42,16 @@ __all__ = [
"ComposioGitHubAPIComponent",
"ComposioGmailAPIComponent",
"ComposioGoogleCalendarAPIComponent",
+ "ComposioGoogleTasksAPIComponent",
+ "ComposioGooglemeetAPIComponent",
+ "ComposioLinearAPIComponent",
"ComposioOutlookAPIComponent",
+ "ComposioRedditAPIComponent",
"ComposioSlackAPIComponent",
+ "ComposioSlackbotAPIComponent",
+ "ComposioSupabaseAPIComponent",
+ "ComposioTodoistAPIComponent",
+ "ComposioYoutubeAPIComponent",
]
diff --git a/src/backend/base/langflow/components/composio/composio_api.py b/src/backend/base/langflow/components/composio/composio_api.py
index a7102c320..6908e7e4a 100644
--- a/src/backend/base/langflow/components/composio/composio_api.py
+++ b/src/backend/base/langflow/components/composio/composio_api.py
@@ -2,10 +2,10 @@
from collections.abc import Sequence
from typing import Any
-from composio import Action, App
+from composio import Composio
+from composio_langchain import LangchainProvider
# Third-party imports
-from composio_langchain import ComposioToolSet
from langchain_core.tools import Tool
# Local imports
@@ -69,27 +69,7 @@ class ComposioAPIComponent(LCToolComponent):
Output(name="tools", display_name="Tools", method="build_tool"),
]
- def sanitize_action_name(self, action_name: str) -> str:
- # TODO: Maybe restore
- return action_name
-
- # We want to use title case, and replace underscores with spaces
- sanitized_name = action_name.replace("_", " ").title()
-
- # Now we want to remove everything from and including the first dot
- return sanitized_name.replace(self.tool_name.title() + " ", "")
-
- def desanitize_action_name(self, action_name: str) -> str:
- # TODO: Maybe restore
- return action_name
-
- # We want to reverse what we did above
- unsanitized_name = action_name.replace(" ", "_").upper()
-
- # Append the tool_name to it at the beginning, followed by a dot, in all CAPS
- return f"{self.tool_name.upper()}_{unsanitized_name}"
-
- def validate_tool(self, build_config: dict, field_value: Any, connected_app_names: list) -> dict:
+ def validate_tool(self, build_config: dict, field_value: Any, tool_name: str | None = None) -> dict:
# Get the index of the selected tool in the list of options
selected_tool_index = next(
(
@@ -108,35 +88,40 @@ class ComposioAPIComponent(LCToolComponent):
build_config["actions"]["helper_text"] = ""
build_config["actions"]["helper_text_metadata"] = {"icon": "Check", "variant": "success"}
- # Get the list of actions available
- all_actions = list(Action.all())
- authenticated_actions = sorted(
- [
- action
- for action in all_actions
- if action.app.lower() in list(connected_app_names) and action.app.lower() == self.tool_name.lower()
- ],
- key=lambda x: x.name,
- )
+ try:
+ composio = self._build_wrapper()
+ current_tool = tool_name or getattr(self, "tool_name", None)
+ if not current_tool:
+ self.log("No tool name available for validate_tool")
+ return build_config
+
+ toolkit_slug = current_tool.lower()
+
+ tools = composio.tools.get(user_id=self.entity_id, toolkits=[toolkit_slug])
+
+ authenticated_actions = []
+ for tool in tools:
+ if hasattr(tool, "name"):
+ action_name = tool.name
+ display_name = action_name.replace("_", " ").title()
+ authenticated_actions.append({"name": action_name, "display_name": display_name})
+ except (ValueError, ConnectionError, AttributeError) as e:
+ self.log(f"Error getting actions for {current_tool or 'unknown tool'}: {e}")
+ authenticated_actions = []
- # Return the list of action names
build_config["actions"]["options"] = [
{
- "name": self.sanitize_action_name(action.name),
+ "name": action["name"],
}
for action in authenticated_actions
]
- # Lastly, we need to show the actions field
build_config["actions"]["show"] = True
-
return build_config
def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:
- # If the list of tools is not available, always update it
if field_name == "api_key" or (self.api_key and not build_config["tool_name"]["options"]):
if field_name == "api_key" and not field_value:
- # Reset the list of tools
build_config["tool_name"]["options"] = []
build_config["tool_name"]["value"] = ""
@@ -147,113 +132,94 @@ class ComposioAPIComponent(LCToolComponent):
return build_config
- # TODO: Re-enable dynamic tool list
- # Initialize the Composio ToolSet with your API key
- # toolset = ComposioToolSet(api_key=self.api_key)
-
- # Get the entity (e.g., "default" for your user)
- # entity = toolset.get_entity(self.entity_id)
-
- # Get all available apps
- # all_apps = entity.client.apps.get()
-
- # Build an object with name, icon, link
+ # Build the list of available tools
build_config["tool_name"]["options"] = [
{
- "name": app.title(), # TODO: Switch to app.name
- "icon": app, # TODO: Switch to app.name
+ "name": app.title(),
+ "icon": app,
"link": (
build_config["tool_name"]["options"][ind]["link"]
if build_config["tool_name"]["options"]
else ""
),
}
- # for app in sorted(all_apps, key=lambda x: x.name)
for ind, app in enumerate(enabled_tools)
]
return build_config
- # Handle the click of the Tool Name connect button
if field_name == "tool_name" and field_value:
- # Get the list of apps (tools) we have connected
- toolset = ComposioToolSet(api_key=self.api_key)
- connected_apps = [app for app in toolset.get_connected_accounts() if app.status == "ACTIVE"]
+ composio = self._build_wrapper()
- # Get the unique list of appName from the connected apps
- connected_app_names = [app.appName.lower() for app in connected_apps]
-
- # Clear out the list of selected actions
- build_config["actions"]["show"] = True
- build_config["actions"]["options"] = []
- build_config["actions"]["value"] = ""
-
- # Clear out any helper text
- build_config["tool_name"]["helper_text"] = ""
- build_config["tool_name"]["helper_text_metadata"] = {}
-
- # If it's a dictionary, we need to do validation
- if isinstance(field_value, dict):
- # If the current field value is a dictionary, it means the user has selected a tool
- if "validate" not in field_value:
- return build_config
-
- # Check if the selected tool is connected
- check_app = field_value["validate"].lower()
-
- # If the tool selected is NOT what we are validating, return the build config
- if check_app != self.tool_name.lower():
- # Set the helper text and helper text metadata field of the actions now
- build_config["actions"]["helper_text"] = "Please connect before selecting actions."
- build_config["actions"]["helper_text_metadata"] = {
- "icon": "OctagonAlert",
- "variant": "destructive",
- }
-
- return build_config
-
- # Check if the tool is already validated
- if check_app not in connected_app_names:
- return build_config
-
- # Validate the selected tool
- return self.validate_tool(build_config, field_value, connected_app_names)
-
- # Check if the tool is already validated
- if field_value.lower() in connected_app_names:
- return self.validate_tool(build_config, field_value, connected_app_names)
-
- # Get the entity (e.g., "default" for your user)
- entity = toolset.get_entity(id=self.entity_id)
-
- # Set the metadata for the actions
- build_config["actions"]["helper_text_metadata"] = {"icon": "OctagonAlert", "variant": "destructive"}
-
- # Get the index of the selected tool in the list of options
- selected_tool_index = next(
- (ind for ind, tool in enumerate(build_config["tool_name"]["options"]) if tool["name"] == field_value),
- None,
+ current_tool_name = (
+ field_value
+ if isinstance(field_value, str)
+ else field_value.get("validate")
+ if isinstance(field_value, dict) and "validate" in field_value
+ else getattr(self, "tool_name", None)
)
- # Initiate a GitHub connection and get the redirect URL
- try:
- connection_request = entity.initiate_connection(app_name=getattr(App, field_value.upper()))
- except Exception as _: # noqa: BLE001
- # Indicate that there was an error connecting to the tool
- build_config["tool_name"]["options"][selected_tool_index]["link"] = "error"
- build_config["tool_name"]["helper_text"] = f"Error connecting to {field_value}"
- build_config["tool_name"]["helper_text_metadata"] = {
- "icon": "OctagonAlert",
- "variant": "destructive",
- }
-
+ if not current_tool_name:
+ self.log("No tool name available for connection check")
return build_config
- # Print the direct HTTP link for authentication
- build_config["tool_name"]["options"][selected_tool_index]["link"] = connection_request.redirectUrl
+ try:
+ toolkit_slug = current_tool_name.lower()
- # Set the helper text and helper text metadata field of the actions now
- build_config["actions"]["helper_text"] = "Please connect before selecting actions."
+ connection_list = composio.connected_accounts.list(
+ user_ids=[self.entity_id], toolkit_slugs=[toolkit_slug]
+ )
+
+ # Check for active connections
+ has_active_connections = False
+ if (
+ connection_list
+ and hasattr(connection_list, "items")
+ and connection_list.items
+ and isinstance(connection_list.items, list)
+ and len(connection_list.items) > 0
+ ):
+ for connection in connection_list.items:
+ if getattr(connection, "status", None) == "ACTIVE":
+ has_active_connections = True
+ break
+
+ # Get the index of the selected tool in the list of options
+ selected_tool_index = next(
+ (
+ ind
+ for ind, tool in enumerate(build_config["tool_name"]["options"])
+ if tool["name"] == current_tool_name.title()
+ ),
+ None,
+ )
+
+ if has_active_connections:
+ # User has active connection
+ if selected_tool_index is not None:
+ build_config["tool_name"]["options"][selected_tool_index]["link"] = "validated"
+
+ # If it's a validation request, validate the tool
+ if (isinstance(field_value, dict) and "validate" in field_value) or isinstance(field_value, str):
+ return self.validate_tool(build_config, field_value, current_tool_name)
+ else:
+ # No active connection - create OAuth connection
+ try:
+ connection = composio.toolkits.authorize(user_id=self.entity_id, toolkit=toolkit_slug)
+ redirect_url = getattr(connection, "redirect_url", None)
+
+ if redirect_url and redirect_url.startswith(("http://", "https://")):
+ if selected_tool_index is not None:
+ build_config["tool_name"]["options"][selected_tool_index]["link"] = redirect_url
+ elif selected_tool_index is not None:
+ build_config["tool_name"]["options"][selected_tool_index]["link"] = "error"
+ except (ValueError, ConnectionError, AttributeError) as e:
+ self.log(f"Error creating OAuth connection: {e}")
+ if selected_tool_index is not None:
+ build_config["tool_name"]["options"][selected_tool_index]["link"] = "error"
+
+ except (ValueError, ConnectionError, AttributeError) as e:
+ self.log(f"Error checking connection status: {e}")
return build_config
@@ -263,16 +229,30 @@ class ComposioAPIComponent(LCToolComponent):
Returns:
Sequence[Tool]: List of configured Composio tools.
"""
- composio_toolset = self._build_wrapper()
- return composio_toolset.get_tools(
- actions=[self.desanitize_action_name(action["name"]) for action in self.actions]
- )
+ composio = self._build_wrapper()
+ action_names = [action["name"] for action in self.actions]
- def _build_wrapper(self) -> ComposioToolSet:
- """Build the Composio toolset wrapper.
+ # Get toolkits from action names
+ toolkits = set()
+ for action_name in action_names:
+ if "_" in action_name:
+ toolkit = action_name.split("_")[0].lower()
+ toolkits.add(toolkit)
+
+ if not toolkits:
+ return []
+
+ # Get all tools for the relevant toolkits
+ all_tools = composio.tools.get(user_id=self.entity_id, toolkits=list(toolkits))
+
+ # Filter to only the specific actions we want using list comprehension
+ return [tool for tool in all_tools if hasattr(tool, "name") and tool.name in action_names]
+
+ def _build_wrapper(self) -> Composio:
+ """Build the Composio wrapper using new SDK.
Returns:
- ComposioToolSet: The initialized toolset.
+ Composio: The initialized Composio client.
Raises:
ValueError: If the API key is not found or invalid.
@@ -281,7 +261,7 @@ class ComposioAPIComponent(LCToolComponent):
if not self.api_key:
msg = "Composio API Key is required"
raise ValueError(msg)
- return ComposioToolSet(api_key=self.api_key, entity_id=self.entity_id)
+ return Composio(api_key=self.api_key, provider=LangchainProvider())
except ValueError as e:
self.log(f"Error building Composio wrapper: {e}")
msg = "Please provide a valid Composio API Key in the component settings"
diff --git a/src/backend/base/langflow/components/composio/dropbox_compnent.py b/src/backend/base/langflow/components/composio/dropbox_compnent.py
new file mode 100644
index 000000000..cf67d7114
--- /dev/null
+++ b/src/backend/base/langflow/components/composio/dropbox_compnent.py
@@ -0,0 +1,11 @@
+from langflow.base.composio.composio_base import ComposioBaseComponent
+
+
+class ComposioDropboxAPIComponent(ComposioBaseComponent):
+ display_name: str = "Dropbox"
+ icon = "Dropbox"
+ documentation: str = "https://docs.composio.dev"
+ app_name = "dropbox"
+
+ def set_default_tools(self):
+ """Set the default tools for Dropbox component."""
diff --git a/src/backend/base/langflow/components/composio/github_composio.py b/src/backend/base/langflow/components/composio/github_composio.py
index bf5fdfa29..9e4c13281 100644
--- a/src/backend/base/langflow/components/composio/github_composio.py
+++ b/src/backend/base/langflow/components/composio/github_composio.py
@@ -1,649 +1,11 @@
-import json
-from typing import Any
-
-from composio import Action
-
from langflow.base.composio.composio_base import ComposioBaseComponent
-from langflow.inputs import (
- BoolInput,
- IntInput,
- MessageTextInput,
-)
-from langflow.logging import logger
class ComposioGitHubAPIComponent(ComposioBaseComponent):
- """GitHub API component for interacting with GitHub services."""
-
display_name: str = "GitHub"
- description: str = "GitHub API"
icon = "Github"
documentation: str = "https://docs.composio.dev"
app_name = "github"
- # GitHub-specific actions
- _actions_data: dict = {
- "GITHUB_CREATE_A_PULL_REQUEST": {
- "display_name": "Create A Pull Request",
- "action_fields": [
- "GITHUB_CREATE_A_PULL_REQUEST_owner",
- "GITHUB_CREATE_A_PULL_REQUEST_repo",
- "GITHUB_CREATE_A_PULL_REQUEST_title",
- "GITHUB_CREATE_A_PULL_REQUEST_head",
- "GITHUB_CREATE_A_PULL_REQUEST_head_repo",
- "GITHUB_CREATE_A_PULL_REQUEST_base",
- "GITHUB_CREATE_A_PULL_REQUEST_body",
- "GITHUB_CREATE_A_PULL_REQUEST_maintainer_can_modify",
- "GITHUB_CREATE_A_PULL_REQUEST_draft",
- "GITHUB_CREATE_A_PULL_REQUEST_issue",
- ],
- },
- "GITHUB_STAR_A_REPOSITORY_FOR_THE_AUTHENTICATED_USER": {
- "display_name": "Star A Repository",
- "action_fields": [
- "GITHUB_STAR_A_REPOSITORY_FOR_THE_AUTHENTICATED_USER_owner",
- "GITHUB_STAR_A_REPOSITORY_FOR_THE_AUTHENTICATED_USER_repo",
- ],
- },
- "GITHUB_LIST_COMMITS": {
- "display_name": "List Commits",
- "action_fields": [
- "GITHUB_LIST_COMMITS_owner",
- "GITHUB_LIST_COMMITS_repo",
- "GITHUB_LIST_COMMITS_sha",
- "GITHUB_LIST_COMMITS_path",
- "GITHUB_LIST_COMMITS_author",
- "GITHUB_LIST_COMMITS_committer",
- "GITHUB_LIST_COMMITS_since",
- "GITHUB_LIST_COMMITS_until",
- "GITHUB_LIST_COMMITS_per_page",
- "GITHUB_LIST_COMMITS_page",
- ],
- },
- "GITHUB_GET_A_PULL_REQUEST": {
- "display_name": "Get A Pull Request",
- "action_fields": [
- "GITHUB_GET_A_PULL_REQUEST_owner",
- "GITHUB_GET_A_PULL_REQUEST_repo",
- "GITHUB_GET_A_PULL_REQUEST_pull_number",
- ],
- },
- "GITHUB_CREATE_AN_ISSUE": {
- "display_name": "Create An Issue",
- "action_fields": [
- "GITHUB_CREATE_AN_ISSUE_owner",
- "GITHUB_CREATE_AN_ISSUE_repo",
- "GITHUB_CREATE_AN_ISSUE_title",
- "GITHUB_CREATE_AN_ISSUE_body",
- "GITHUB_CREATE_AN_ISSUE_assignee",
- "GITHUB_CREATE_AN_ISSUE_milestone",
- "GITHUB_CREATE_AN_ISSUE_labels",
- "GITHUB_CREATE_AN_ISSUE_assignees",
- ],
- },
- "GITHUB_LIST_REPOSITORY_ISSUES": {
- "display_name": "List Repository Issues",
- "action_fields": [
- "GITHUB_LIST_REPOSITORY_ISSUES_owner",
- "GITHUB_LIST_REPOSITORY_ISSUES_repo",
- "GITHUB_LIST_REPOSITORY_ISSUES_milestone",
- "GITHUB_LIST_REPOSITORY_ISSUES_state",
- "GITHUB_LIST_REPOSITORY_ISSUES_assignee",
- "GITHUB_LIST_REPOSITORY_ISSUES_creator",
- "GITHUB_LIST_REPOSITORY_ISSUES_mentioned",
- "GITHUB_LIST_REPOSITORY_ISSUES_labels",
- "GITHUB_LIST_REPOSITORY_ISSUES_sort",
- "GITHUB_LIST_REPOSITORY_ISSUES_direction",
- "GITHUB_LIST_REPOSITORY_ISSUES_since",
- "GITHUB_LIST_REPOSITORY_ISSUES_per_page",
- "GITHUB_LIST_REPOSITORY_ISSUES_page",
- ],
- },
- "GITHUB_LIST_BRANCHES": {
- "display_name": "List Branches",
- "action_fields": [
- "GITHUB_LIST_BRANCHES_owner",
- "GITHUB_LIST_BRANCHES_repo",
- "GITHUB_LIST_BRANCHES_protected",
- "GITHUB_LIST_BRANCHES_per_page",
- "GITHUB_LIST_BRANCHES_page",
- ],
- },
- "GITHUB_LIST_PULL_REQUESTS": {
- "display_name": "List Pull Requests",
- "action_fields": [
- "GITHUB_LIST_PULL_REQUESTS_owner",
- "GITHUB_LIST_PULL_REQUESTS_repo",
- "GITHUB_LIST_PULL_REQUESTS_state",
- "GITHUB_LIST_PULL_REQUESTS_head",
- "GITHUB_LIST_PULL_REQUESTS_base",
- "GITHUB_LIST_PULL_REQUESTS_sort",
- "GITHUB_LIST_PULL_REQUESTS_direction",
- "GITHUB_LIST_PULL_REQUESTS_per_page",
- "GITHUB_LIST_PULL_REQUESTS_page",
- ],
- },
- }
-
- _all_fields = {field for action_data in _actions_data.values() for field in action_data["action_fields"]}
- _bool_variables = {
- "GITHUB_CREATE_A_PULL_REQUEST_maintainer_can_modify",
- "GITHUB_CREATE_A_PULL_REQUEST_draft",
- "GITHUB_LIST_BRANCHES_protected",
- }
-
- inputs = [
- *ComposioBaseComponent._base_inputs,
- MessageTextInput(
- name="GITHUB_CREATE_AN_ISSUE_owner",
- display_name="Owner",
- info="The account owner of the repository. The name is not case sensitive.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_AN_ISSUE_repo",
- display_name="Repo",
- info="The name of the repository. The name is not case sensitive. ",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_AN_ISSUE_title",
- display_name="Title",
- info="The title of the issue.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_AN_ISSUE_body",
- display_name="Body",
- info="The contents of the issue.",
- show=False,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_AN_ISSUE_assignee",
- display_name="Assignee",
- info="Login for the user that this issue should be assigned to. _NOTE: Only users with push access can set the assignee for new issues. The assignee is silently dropped otherwise. **This field is deprecated.**_ ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_AN_ISSUE_milestone",
- display_name="Milestone",
- info="Milestone",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_AN_ISSUE_labels",
- display_name="Labels",
- info="Labels to associate with this issue. _NOTE: Only users with push access can set labels for new issues. Labels are silently dropped otherwise._ ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_AN_ISSUE_assignees",
- display_name="Assignees",
- info="Logins for Users to assign to this issue. _NOTE: Only users with push access can set assignees for new issues. Assignees are silently dropped otherwise._ ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_PULL_REQUESTS_owner",
- display_name="Owner",
- info="The account owner of the repository. The name is not case sensitive.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_PULL_REQUESTS_repo",
- display_name="Repo",
- info="The name of the repository. The name is not case sensitive. ",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_PULL_REQUESTS_state",
- display_name="State",
- info="Either `open`, `closed`, or `all` to filter by state.",
- show=False,
- value="open",
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_PULL_REQUESTS_head",
- display_name="Head",
- info="Filter pulls by head user or head organization and branch name in the format of `user:ref-name` or `organization:ref-name`. For example: `github:new-script-format` or `octocat:test-branch`. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_PULL_REQUESTS_base",
- display_name="Base",
- info="Filter pulls by base branch name. Example: `gh-pages`.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_PULL_REQUESTS_sort",
- display_name="Sort",
- info="What to sort results by. `popularity` will sort by the number of comments. `long-running` will sort by date created and will limit the results to pull requests that have been open for more than a month and have had activity within the past month. ", # noqa: E501
- show=False,
- value="created",
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_PULL_REQUESTS_direction",
- display_name="Direction",
- info="The direction of the sort. Default: `desc` when sort is `created` or sort is not specified, otherwise `asc`. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- IntInput(
- name="GITHUB_LIST_PULL_REQUESTS_per_page",
- display_name="Per Page",
- info="The number of results per page (max 100)",
- show=False,
- value=1,
- advanced=True,
- ),
- IntInput(
- name="GITHUB_LIST_PULL_REQUESTS_page",
- display_name="Page",
- info="The page number of the results to fetch",
- show=False,
- value=1,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_A_PULL_REQUEST_owner",
- display_name="Owner",
- info="The account owner of the repository. The name is not case sensitive.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_A_PULL_REQUEST_repo",
- display_name="Repo",
- info="The name of the repository. The name is not case sensitive. ",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_A_PULL_REQUEST_title",
- display_name="Title",
- info="The title of the new pull request. Required unless `issue` is specified.",
- show=False,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_A_PULL_REQUEST_head",
- display_name="Head",
- info="The name of the branch where your changes are implemented. For cross-repository pull requests in the same network, namespace `head` with a user like this: `username:branch`. ", # noqa: E501
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_A_PULL_REQUEST_head_repo",
- display_name="Head Repo",
- info="The name of the repository where the changes in the pull request were made. This field is required for cross-repository pull requests if both repositories are owned by the same organization. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_A_PULL_REQUEST_base",
- display_name="Base",
- info="The name of the branch you want the changes pulled into. This should be an existing branch on the current repository. You cannot submit a pull request to one repository that requests a merge to a base of another repository. ", # noqa: E501
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_CREATE_A_PULL_REQUEST_body",
- display_name="Body",
- info="The contents of the pull request.",
- show=False,
- ),
- BoolInput(
- name="GITHUB_CREATE_A_PULL_REQUEST_maintainer_can_modify",
- display_name="Maintainer Can Modify",
- info="Indicates whether maintainers can modify the pull request",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="GITHUB_CREATE_A_PULL_REQUEST_draft",
- display_name="Draft",
- info="Indicates whether the pull request is a draft",
- show=False,
- advanced=True,
- ),
- IntInput(
- name="GITHUB_CREATE_A_PULL_REQUEST_issue",
- display_name="Issue",
- info="An issue in the repository to convert to a pull request. The issue title, body, and comments will become the title, body, and comments on the new pull request. Required unless `title` is specified. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_REPOSITORY_ISSUES_owner",
- display_name="Owner",
- info="The account owner of the repository. The name is not case sensitive.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_REPOSITORY_ISSUES_repo",
- display_name="Repo",
- info="The name of the repository. The name is not case sensitive. ",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_REPOSITORY_ISSUES_milestone",
- display_name="Milestone",
- info="If an `integer` is passed, it should refer to a milestone by its `number` field. If the string `*` is passed, issues with any milestone are accepted. If the string `none` is passed, issues without milestones are returned. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_REPOSITORY_ISSUES_state",
- display_name="State",
- info="Indicates the state of the issues to return.",
- show=False,
- value="open",
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_REPOSITORY_ISSUES_assignee",
- display_name="Assignee",
- info="Can be the name of a user. Pass in `none` for issues with no assigned user, and `*` for issues assigned to any user. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_REPOSITORY_ISSUES_creator",
- display_name="Creator",
- info="The user that created the issue.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_REPOSITORY_ISSUES_mentioned",
- display_name="Mentioned",
- info="A user that's mentioned in the issue.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_REPOSITORY_ISSUES_labels",
- display_name="Labels",
- info="A list of comma separated label names. Example: `bug,ui,@high`",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_REPOSITORY_ISSUES_sort",
- display_name="Sort",
- info="What to sort results by",
- show=False,
- value="created",
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_REPOSITORY_ISSUES_direction",
- display_name="Direction",
- info="The direction to sort the results by",
- show=False,
- value="desc",
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_REPOSITORY_ISSUES_since",
- display_name="Since",
- info="Only show results that were last updated after the given time. This is a timestamp in ISO 8601 (https://en.wikipedia.org/wiki/ISO_8601) format: `YYYY-MM-DDTHH:MM:SSZ`. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- IntInput(
- name="GITHUB_LIST_REPOSITORY_ISSUES_per_page",
- display_name="Per Page",
- info="The number of results per page (max 100)",
- show=False,
- value=1,
- advanced=True,
- ),
- IntInput(
- name="GITHUB_LIST_REPOSITORY_ISSUES_page",
- display_name="Page",
- info="The page number of the results to fetch",
- show=False,
- value=1,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_BRANCHES_owner",
- display_name="Owner",
- info="The account owner of the repository. The name is not case sensitive.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_BRANCHES_repo",
- display_name="Repo",
- info="The name of the repository. The name is not case sensitive. ",
- show=False,
- required=True,
- ),
- BoolInput(
- name="GITHUB_LIST_BRANCHES_protected",
- display_name="Protected",
- info="Setting to `true` returns only protected branches. When set to `false`, only unprotected branches are returned. Omitting this parameter returns all branches", # noqa: E501
- show=False,
- ),
- IntInput(
- name="GITHUB_LIST_BRANCHES_per_page",
- display_name="Per Page",
- info="The number of results per page (max 100)",
- show=False,
- value=30,
- advanced=True,
- ),
- IntInput(
- name="GITHUB_LIST_BRANCHES_page",
- display_name="Page",
- info="The page number of the results to fetch",
- show=False,
- value=1,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_STAR_A_REPOSITORY_FOR_THE_AUTHENTICATED_USER_owner",
- display_name="Owner",
- info="The account owner of the repository. The name is not case sensitive.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_STAR_A_REPOSITORY_FOR_THE_AUTHENTICATED_USER_repo",
- display_name="Repo",
- info="The name of the repository. The name is not case sensitive.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_GET_A_PULL_REQUEST_owner",
- display_name="Owner",
- info="The account owner of the repository. The name is not case sensitive.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_GET_A_PULL_REQUEST_repo",
- display_name="Repo",
- info="The name of the repository. The name is not case sensitive. ",
- show=False,
- required=True,
- ),
- IntInput(
- name="GITHUB_GET_A_PULL_REQUEST_pull_number",
- display_name="Pull Number",
- info="The number that identifies the pull request.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_COMMITS_owner",
- display_name="Owner",
- info="The account owner of the repository. The name is not case sensitive.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_COMMITS_repo",
- display_name="Repo",
- info="The name of the repository. The name is not case sensitive. ",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_COMMITS_sha",
- display_name="SHA",
- info="SHA or branch to start listing commits from. Default: the repository's default branch (usually `main`). ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_COMMITS_path",
- display_name="Path",
- info="Only commits containing this file path will be returned.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_COMMITS_author",
- display_name="Author",
- info="GitHub username or email address to use to filter by commit author.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_COMMITS_committer",
- display_name="Committer",
- info="GitHub username or email address to use to filter by commit committer.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_COMMITS_since",
- display_name="Since",
- info="Only show results that were last updated after the given time. This is a timestamp in ISO 8601 (https://en.wikipedia.org/wiki/ISO_8601) format: `YYYY-MM-DDTHH:MM:SSZ`. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GITHUB_LIST_COMMITS_until",
- display_name="Until",
- info="Only commits before this date will be returned. This is a timestamp in ISO 8601 (https://en.wikipedia.org/wiki/ISO_8601) format: `YYYY-MM-DDTHH:MM:SSZ`. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- IntInput(
- name="GITHUB_LIST_COMMITS_per_page",
- display_name="Per Page",
- info="The number of results per page (max 100)",
- show=False,
- value=1,
- advanced=True,
- ),
- IntInput(
- name="GITHUB_LIST_COMMITS_page",
- display_name="Page",
- info="The page number of the results to fetch",
- show=False,
- value=1,
- advanced=True,
- ),
- ]
-
- def execute_action(self):
- """Execute action and return response as Message."""
- toolset = self._build_wrapper()
-
- try:
- self._build_action_maps()
- # Get the display name from the action list
- display_name = self.action[0]["name"] if isinstance(self.action, list) and self.action else self.action
- # Use the display_to_key_map to get the action key
- action_key = self._display_to_key_map.get(display_name)
- if not action_key:
- msg = f"Invalid action: {display_name}"
- raise ValueError(msg)
-
- enum_name = getattr(Action, action_key)
- params = {}
- if action_key in self._actions_data:
- for field in self._actions_data[action_key]["action_fields"]:
- value = getattr(self, field)
-
- if value is None or value == "":
- continue
-
- if (
- field
- in [
- "GITHUB_CREATE_AN_ISSUE_labels",
- "GITHUB_CREATE_AN_ISSUE_assignees",
- "GITHUB_LIST_REPOSITORY_ISSUES_labels",
- ]
- and value
- ):
- value = [item.strip() for item in value.split(",")]
-
- if field in self._bool_variables:
- value = bool(value)
-
- param_name = field.replace(action_key + "_", "")
- params[param_name] = value
-
- result = toolset.execute_action(
- action=enum_name,
- params=params,
- )
- if not result.get("successful"):
- try:
- message_str = result.get("error", {})
- error_message = message_str.split("`")[1]
- error_msg_json = json.loads(error_message)
- except (IndexError, json.JSONDecodeError):
- return {"error": str(message_str)}
- return {
- "code": error_msg_json.get("status"),
- "message": error_msg_json.get("message"),
- "documentation_url": error_msg_json.get("documentation_url"),
- }
-
- result_data = result.get("data", [])
- if (
- len(result_data) != 1
- and not self._actions_data.get(action_key, {}).get("result_field")
- and self._actions_data.get(action_key, {}).get("get_result_field")
- ):
- msg = f"Expected a dict with a single key, got {len(result_data)} keys: {result_data.keys()}"
- raise ValueError(msg)
- if isinstance(result_data.get("details"), list):
- return result_data.get("details")
- return result_data # noqa: TRY300
- except Exception as e:
- logger.error(f"Error executing action: {e}")
- display_name = self.action[0]["name"] if isinstance(self.action, list) and self.action else str(self.action)
- msg = f"Failed to execute {display_name}: {e!s}"
- raise ValueError(msg) from e
-
- def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:
- return super().update_build_config(build_config, field_value, field_name)
-
def set_default_tools(self):
- self._default_tools = {
- self.sanitize_action_name("GITHUB_STAR_A_REPOSITORY_FOR_THE_AUTHENTICATED_USER").replace(" ", "-"),
- self.sanitize_action_name("GITHUB_CREATE_A_PULL_REQUEST").replace(" ", "-"),
- }
+ """Set the default tools for GitHub component."""
diff --git a/src/backend/base/langflow/components/composio/gmail_composio.py b/src/backend/base/langflow/components/composio/gmail_composio.py
index f1e5213f7..76d8f505c 100644
--- a/src/backend/base/langflow/components/composio/gmail_composio.py
+++ b/src/backend/base/langflow/components/composio/gmail_composio.py
@@ -1,406 +1,38 @@
-import json
-from typing import Any
-
-from composio import Action
-
from langflow.base.composio.composio_base import ComposioBaseComponent
-from langflow.inputs.inputs import (
- BoolInput,
- FileInput,
- IntInput,
- MessageTextInput,
-)
-from langflow.logging import logger
class ComposioGmailAPIComponent(ComposioBaseComponent):
- """Gmail API component for interacting with Gmail services."""
-
display_name: str = "Gmail"
- name = "GmailAPI"
icon = "Google"
documentation: str = "https://docs.composio.dev"
app_name = "gmail"
- # Gmail-specific actions
- _actions_data: dict = {
- "GMAIL_SEND_EMAIL": {
- "display_name": "Send Email",
- "action_fields": [
- "recipient_email",
- "subject",
- "body",
- "cc",
- "bcc",
- "is_html",
- "gmail_user_id",
- "attachment",
- ],
- },
- "GMAIL_FETCH_EMAILS": {
- "display_name": "Fetch Emails",
- "action_fields": [
- "gmail_user_id",
- "max_results",
- "query",
- "page_token",
- "label_ids",
- "include_spam_trash",
- ],
- "get_result_field": True,
- "result_field": "messages",
- },
- "GMAIL_GET_PROFILE": {
- "display_name": "Get User Profile",
- "action_fields": ["gmail_user_id"],
- },
- "GMAIL_FETCH_MESSAGE_BY_MESSAGE_ID": {
- "display_name": "Get Email By ID",
- "action_fields": ["message_id", "gmail_user_id", "format"],
- "get_result_field": False,
- },
- "GMAIL_CREATE_EMAIL_DRAFT": {
- "display_name": "Create Draft Email",
- "action_fields": [
- "recipient_email",
- "subject",
- "body",
- "cc",
- "bcc",
- "is_html",
- "attachment",
- "gmail_user_id",
- ],
- },
- "GMAIL_FETCH_MESSAGE_BY_THREAD_ID": {
- "display_name": "Get Message By Thread ID",
- "action_fields": ["thread_id", "page_token", "gmail_user_id"],
- "get_result_field": False,
- },
- "GMAIL_LIST_THREADS": {
- "display_name": "List Email Threads",
- "action_fields": ["max_results", "query", "gmail_user_id", "page_token"],
- "get_result_field": True,
- "result_field": "threads",
- },
- "GMAIL_REPLY_TO_THREAD": {
- "display_name": "Reply To Thread",
- "action_fields": ["thread_id", "message_body", "recipient_email", "gmail_user_id", "cc", "bcc", "is_html"],
- },
- "GMAIL_LIST_LABELS": {
- "display_name": "List Email Labels",
- "action_fields": ["gmail_user_id"],
- "get_result_field": True,
- "result_field": "labels",
- },
- "GMAIL_CREATE_LABEL": {
- "display_name": "Create Email Label",
- "action_fields": ["label_name", "label_list_visibility", "message_list_visibility", "gmail_user_id"],
- },
- "GMAIL_GET_PEOPLE": {
- "display_name": "Get Contacts",
- "action_fields": ["resource_name", "person_fields"],
- "get_result_field": True,
- "result_field": "people_data",
- },
- "GMAIL_REMOVE_LABEL": {
- "display_name": "Delete Email Label",
- "action_fields": ["label_id", "gmail_user_id"],
- "get_result_field": False,
- },
- "GMAIL_GET_ATTACHMENT": {
- "display_name": "Get Attachment",
- "action_fields": ["message_id", "attachment_id", "file_name", "gmail_user_id"],
- },
- }
- _all_fields = {field for action_data in _actions_data.values() for field in action_data["action_fields"]}
- _bool_variables = {"is_html", "include_spam_trash"}
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self.post_processors = {
+ "GMAIL_SEND_EMAIL": self._process_send_email_response,
+ "GMAIL_FETCH_EMAILS": self._process_fetch_emails_response,
+ }
- # Combine base inputs with Gmail-specific inputs
- inputs = [
- *ComposioBaseComponent._base_inputs,
- # Email composition fields
- MessageTextInput(
- name="recipient_email",
- display_name="Recipient Email",
- info="Email address of the recipient",
- show=False,
- required=True,
- advanced=False,
- ),
- MessageTextInput(
- name="subject",
- display_name="Subject",
- info="Subject of the email",
- show=False,
- required=True,
- advanced=False,
- ),
- MessageTextInput(
- name="body",
- display_name="Body",
- required=True,
- info="Content of the email",
- show=False,
- advanced=False,
- ),
- MessageTextInput(
- name="cc",
- display_name="CC",
- info="Email addresses to CC (Carbon Copy) in the email, separated by commas",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="bcc",
- display_name="BCC",
- info="Email addresses to BCC (Blind Carbon Copy) in the email, separated by commas",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="is_html",
- display_name="Is HTML",
- info="Specify whether the email body contains HTML content (true/false)",
- show=False,
- value=False,
- advanced=True,
- ),
- # Email retrieval and management fields
- MessageTextInput(
- name="gmail_user_id",
- display_name="User ID",
- info="The user's email address or 'me' for the authenticated user",
- show=False,
- advanced=True,
- ),
- IntInput(
- name="max_results",
- display_name="Max Results",
- required=True,
- info="Maximum number of emails to be returned",
- show=False,
- advanced=False,
- ),
- MessageTextInput(
- name="message_id",
- display_name="Message ID",
- info="The ID of the specific email message",
- show=False,
- required=True,
- advanced=False,
- ),
- MessageTextInput(
- name="thread_id",
- display_name="Thread ID",
- info="The ID of the email thread",
- show=False,
- required=True,
- advanced=False,
- ),
- MessageTextInput(
- name="query",
- display_name="Query",
- info="Search query to filter emails (e.g., 'from:someone@email.com' or 'subject:hello')",
- show=False,
- advanced=False,
- ),
- MessageTextInput(
- name="message_body",
- display_name="Message Body",
- info="The body content of the message to be sent",
- show=False,
- advanced=True,
- ),
- # Label management fields
- MessageTextInput(
- name="label_name",
- display_name="Label Name",
- info="Name of the Gmail label to create, modify, or filter by",
- show=False,
- required=True,
- advanced=False,
- ),
- MessageTextInput(
- name="label_id",
- display_name="Label ID",
- info="The ID of the Gmail label",
- show=False,
- advanced=False,
- ),
- MessageTextInput(
- name="label_ids",
- display_name="Label Ids",
- info="Comma-separated list of label IDs to filter messages",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="label_list_visibility",
- display_name="Label List Visibility",
- info="The visibility of the label in the label list in the Gmail web interface",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="message_list_visibility",
- display_name="Message List Visibility",
- info="The visibility of the label in the message list in the Gmail web interface",
- show=False,
- advanced=True,
- ),
- # Pagination and filtering
- MessageTextInput(
- name="page_token",
- display_name="Page Token",
- info="Token for retrieving the next page of results",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="include_spam_trash",
- display_name="Include messages from Spam/Trash",
- info="Include messages from SPAM and TRASH in the results",
- show=False,
- value=False,
- advanced=True,
- ),
- MessageTextInput(
- name="format",
- display_name="Format",
- info="The format to return the message in. Possible values: minimal, full, raw, metadata",
- show=False,
- advanced=True,
- ),
- # Contact management fields
- MessageTextInput(
- name="resource_name",
- display_name="Resource Name",
- info="The resource name of the person to provide information about",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="person_fields",
- display_name="Person fields",
- info="Fields to return for the person. Multiple fields can be specified by separating them with commas",
- show=False,
- advanced=True,
- ),
- # Attachment handling
- MessageTextInput(
- name="attachment_id",
- display_name="Attachment ID",
- info="Id of the attachment",
- show=False,
- required=True,
- advanced=False,
- ),
- MessageTextInput(
- name="file_name",
- display_name="File name",
- info="File name of the attachment file",
- show=False,
- required=True,
- advanced=False,
- ),
- FileInput(
- name="attachment",
- display_name="Add Attachment",
- file_types=[
- "csv",
- "txt",
- "doc",
- "docx",
- "xls",
- "xlsx",
- "pdf",
- "png",
- "jpg",
- "jpeg",
- "gif",
- "zip",
- "rar",
- "ppt",
- "pptx",
- ],
- info="Add an attachment",
- show=False,
- ),
- ]
+ def _process_send_email_response(self, raw_data):
+ """Post-processor for GMAIL_SEND_EMAIL action."""
+ if isinstance(raw_data, dict):
+ response_data = raw_data.get("response_data", raw_data)
- def execute_action(self):
- """Execute action and return response as Message."""
- toolset = self._build_wrapper()
+ return {
+ "message_id": response_data.get("id"),
+ "thread_id": response_data.get("threadId"),
+ "label_ids": response_data.get("labelIds", []),
+ }
+ return raw_data
- try:
- self._build_action_maps()
- # Get the display name from the action list
- display_name = self.action[0]["name"] if isinstance(self.action, list) and self.action else self.action
- # Use the display_to_key_map to get the action key
- action_key = self._display_to_key_map.get(display_name)
- if not action_key:
- msg = f"Invalid action: {display_name}"
- raise ValueError(msg)
-
- enum_name = getattr(Action, action_key)
- params = {}
- if action_key in self._actions_data:
- for field in self._actions_data[action_key]["action_fields"]:
- value = getattr(self, field)
-
- if value is None or value == "":
- continue
-
- if field in ["cc", "bcc", "label_ids"] and value:
- value = [item.strip() for item in value.split(",")]
-
- if field in self._bool_variables:
- value = bool(value)
-
- params[field] = value
-
- if params.get("gmail_user_id"):
- params["user_id"] = params.pop("gmail_user_id")
-
- result = toolset.execute_action(
- action=enum_name,
- params=params,
- )
- if not result.get("successful"):
- message_str = result.get("data", {}).get("message", "{}")
- try:
- error_data = json.loads(message_str).get("error", {})
- except json.JSONDecodeError:
- error_data = {"error": "Failed to get exact error details"}
- return {
- "code": error_data.get("code"),
- "message": error_data.get("message"),
- "errors": error_data.get("errors", []),
- "status": error_data.get("status"),
- }
-
- result_data = result.get("data", {})
- actions_data = self._actions_data.get(action_key, {})
- # If 'get_result_field' is True and 'result_field' is specified, extract the data
- # using 'result_field'. Otherwise, fall back to the entire 'data' field in the response.
- if actions_data.get("get_result_field") and actions_data.get("result_field"):
- result_data = result_data.get(actions_data.get("result_field"), result.get("data", []))
- if len(result_data) != 1 and not actions_data.get("result_field") and actions_data.get("get_result_field"):
- msg = f"Expected a dict with a single key, got {len(result_data)} keys: {result_data.keys()}"
- raise ValueError(msg)
- return result_data # noqa: TRY300
- except Exception as e:
- logger.error(f"Error executing action: {e}")
- display_name = self.action[0]["name"] if isinstance(self.action, list) and self.action else str(self.action)
- msg = f"Failed to execute {display_name}: {e!s}"
- raise ValueError(msg) from e
-
- def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:
- return super().update_build_config(build_config, field_value, field_name)
+ def _process_fetch_emails_response(self, raw_data):
+ """Post-processor for GMAIL_FETCH_EMAILS action."""
+ if isinstance(raw_data, dict):
+ messages = raw_data.get("messages", [])
+ if messages:
+ return messages
+ return raw_data
def set_default_tools(self):
- self._default_tools = {
- "GMAIL_SEND_EMAIL",
- "GMAIL_FETCH_EMAILS",
- }
+ """Set the default tools for Gmail component."""
diff --git a/src/backend/base/langflow/components/composio/googlecalendar_composio.py b/src/backend/base/langflow/components/composio/googlecalendar_composio.py
index c2cf5a12a..265d20c25 100644
--- a/src/backend/base/langflow/components/composio/googlecalendar_composio.py
+++ b/src/backend/base/langflow/components/composio/googlecalendar_composio.py
@@ -1,787 +1,11 @@
-from typing import Any
-
-from composio import Action
-
from langflow.base.composio.composio_base import ComposioBaseComponent
-from langflow.inputs import (
- BoolInput,
- IntInput,
- MessageTextInput,
-)
-from langflow.logging import logger
class ComposioGoogleCalendarAPIComponent(ComposioBaseComponent):
- """Google Calendar API component for interacting with Google Calendar services."""
-
display_name: str = "Google Calendar"
- description: str = "Google Calendar API"
icon = "Googlecalendar"
documentation: str = "https://docs.composio.dev"
app_name = "googlecalendar"
- _actions_data: dict = {
- "GOOGLECALENDAR_UPDATE_EVENT": {
- "display_name": "Update Google Event",
- "action_fields": [
- "GOOGLECALENDAR_UPDATE_EVENT_description",
- "GOOGLECALENDAR_UPDATE_EVENT_eventType",
- "GOOGLECALENDAR_UPDATE_EVENT_create_meeting_room",
- "GOOGLECALENDAR_UPDATE_EVENT_guestsCanSeeOtherGuests",
- "GOOGLECALENDAR_UPDATE_EVENT_guestsCanInviteOthers",
- "GOOGLECALENDAR_UPDATE_EVENT_location",
- "GOOGLECALENDAR_UPDATE_EVENT_summary",
- "GOOGLECALENDAR_UPDATE_EVENT_transparency",
- "GOOGLECALENDAR_UPDATE_EVENT_visibility",
- "GOOGLECALENDAR_UPDATE_EVENT_timezone",
- "GOOGLECALENDAR_UPDATE_EVENT_recurrence",
- "GOOGLECALENDAR_UPDATE_EVENT_guests_can_modify",
- "GOOGLECALENDAR_UPDATE_EVENT_attendees",
- "GOOGLECALENDAR_UPDATE_EVENT_send_updates",
- "GOOGLECALENDAR_UPDATE_EVENT_start_datetime",
- "GOOGLECALENDAR_UPDATE_EVENT_event_duration_hour",
- "GOOGLECALENDAR_UPDATE_EVENT_event_duration_minutes",
- "GOOGLECALENDAR_UPDATE_EVENT_calendar_id",
- "GOOGLECALENDAR_UPDATE_EVENT_event_id",
- ],
- },
- "GOOGLECALENDAR_REMOVE_ATTENDEE": {
- "display_name": "Remove Attendee From Event",
- "action_fields": [
- "GOOGLECALENDAR_REMOVE_ATTENDEE_calendar_id",
- "GOOGLECALENDAR_REMOVE_ATTENDEE_event_id",
- "GOOGLECALENDAR_REMOVE_ATTENDEE_attendee_email",
- ],
- },
- "GOOGLECALENDAR_GET_CURRENT_DATE_TIME": {
- "display_name": "Get Current Date And Time",
- "action_fields": ["GOOGLECALENDAR_GET_CURRENT_DATE_TIME_timezone"],
- },
- "GOOGLECALENDAR_QUICK_ADD": {
- "display_name": "Quick Add Event",
- "action_fields": [
- "GOOGLECALENDAR_QUICK_ADD_calendar_id",
- "GOOGLECALENDAR_QUICK_ADD_text",
- "GOOGLECALENDAR_QUICK_ADD_send_updates",
- ],
- },
- "GOOGLECALENDAR_LIST_CALENDARS": {
- "display_name": "List Google Calendars",
- "action_fields": [
- "GOOGLECALENDAR_LIST_CALENDARS_max_results",
- "GOOGLECALENDAR_LIST_CALENDARS_min_access_role",
- "GOOGLECALENDAR_LIST_CALENDARS_page_token",
- "GOOGLECALENDAR_LIST_CALENDARS_show_deleted",
- "GOOGLECALENDAR_LIST_CALENDARS_show_hidden",
- "GOOGLECALENDAR_LIST_CALENDARS_sync_token",
- ],
- },
- "GOOGLECALENDAR_FIND_EVENT": {
- "display_name": "Find Event",
- "action_fields": [
- "GOOGLECALENDAR_FIND_EVENT_calendar_id",
- "GOOGLECALENDAR_FIND_EVENT_query",
- "GOOGLECALENDAR_FIND_EVENT_max_results",
- "GOOGLECALENDAR_FIND_EVENT_order_by",
- "GOOGLECALENDAR_FIND_EVENT_show_deleted",
- "GOOGLECALENDAR_FIND_EVENT_single_events",
- "GOOGLECALENDAR_FIND_EVENT_timeMax",
- "GOOGLECALENDAR_FIND_EVENT_timeMin",
- "GOOGLECALENDAR_FIND_EVENT_updated_min",
- "GOOGLECALENDAR_FIND_EVENT_event_types",
- "GOOGLECALENDAR_FIND_EVENT_page_token",
- ],
- },
- "GOOGLECALENDAR_CREATE_EVENT": {
- "display_name": "Create Event",
- "action_fields": [
- "GOOGLECALENDAR_CREATE_EVENT_description",
- "GOOGLECALENDAR_CREATE_EVENT_eventType",
- "GOOGLECALENDAR_CREATE_EVENT_create_meeting_room",
- "GOOGLECALENDAR_CREATE_EVENT_guestsCanSeeOtherGuests",
- "GOOGLECALENDAR_CREATE_EVENT_guestsCanInviteOthers",
- "GOOGLECALENDAR_CREATE_EVENT_location",
- "GOOGLECALENDAR_CREATE_EVENT_summary",
- "GOOGLECALENDAR_CREATE_EVENT_transparency",
- "GOOGLECALENDAR_CREATE_EVENT_visibility",
- "GOOGLECALENDAR_CREATE_EVENT_timezone",
- "GOOGLECALENDAR_CREATE_EVENT_recurrence",
- "GOOGLECALENDAR_CREATE_EVENT_guests_can_modify",
- "GOOGLECALENDAR_CREATE_EVENT_attendees",
- "GOOGLECALENDAR_CREATE_EVENT_send_updates",
- "GOOGLECALENDAR_CREATE_EVENT_start_datetime",
- "GOOGLECALENDAR_CREATE_EVENT_event_duration_hour",
- "GOOGLECALENDAR_CREATE_EVENT_event_duration_minutes",
- "GOOGLECALENDAR_CREATE_EVENT_calendar_id",
- ],
- },
- "GOOGLECALENDAR_FIND_FREE_SLOTS": {
- "display_name": "Find Free Slots",
- "action_fields": [
- "GOOGLECALENDAR_FIND_FREE_SLOTS_time_min",
- "GOOGLECALENDAR_FIND_FREE_SLOTS_time_max",
- "GOOGLECALENDAR_FIND_FREE_SLOTS_timezone",
- "GOOGLECALENDAR_FIND_FREE_SLOTS_group_expansion_max",
- "GOOGLECALENDAR_FIND_FREE_SLOTS_calendar_expansion_max",
- "GOOGLECALENDAR_FIND_FREE_SLOTS_items",
- ],
- },
- "GOOGLECALENDAR_PATCH_CALENDAR": {
- "display_name": "Patch Calendar",
- "action_fields": [
- "GOOGLECALENDAR_PATCH_CALENDAR_calendar_id",
- "GOOGLECALENDAR_PATCH_CALENDAR_description",
- "GOOGLECALENDAR_PATCH_CALENDAR_location",
- "GOOGLECALENDAR_PATCH_CALENDAR_summary",
- "GOOGLECALENDAR_PATCH_CALENDAR_timezone",
- ],
- },
- "GOOGLECALENDAR_GET_CALENDAR": {
- "display_name": "Fetch Google Calendar",
- "action_fields": ["GOOGLECALENDAR_GET_CALENDAR_calendar_id"],
- },
- "GOOGLECALENDAR_DELETE_EVENT": {
- "display_name": "Delete Event",
- "action_fields": ["GOOGLECALENDAR_DELETE_EVENT_calendar_id", "GOOGLECALENDAR_DELETE_EVENT_event_id"],
- },
- "GOOGLECALENDAR_DUPLICATE_CALENDAR": {
- "display_name": "Duplicate Calendar",
- "action_fields": ["GOOGLECALENDAR_DUPLICATE_CALENDAR_summary"],
- },
- }
-
- _list_variables = {
- "GOOGLECALENDAR_FIND_EVENT_event_types",
- "GOOGLECALENDAR_CREATE_EVENT_recurrence",
- "GOOGLECALENDAR_CREATE_EVENT_attendees",
- "GOOGLECALENDAR_FIND_FREE_SLOTS_items",
- "GOOGLECALENDAR_UPDATE_EVENT_recurrence",
- "GOOGLECALENDAR_UPDATE_EVENT_attendees",
- }
-
- _all_fields = {field for action_data in _actions_data.values() for field in action_data["action_fields"]}
- _bool_variables = {
- "GOOGLECALENDAR_LIST_CALENDARS_show_deleted",
- "GOOGLECALENDAR_LIST_CALENDARS_show_hidden",
- "GOOGLECALENDAR_FIND_EVENT_show_deleted",
- "GOOGLECALENDAR_FIND_EVENT_single_events",
- "GOOGLECALENDAR_CREATE_EVENT_create_meeting_room",
- "GOOGLECALENDAR_CREATE_EVENT_guestsCanSeeOtherGuests",
- "GOOGLECALENDAR_CREATE_EVENT_guestsCanInviteOthers",
- "GOOGLECALENDAR_CREATE_EVENT_guests_can_modify",
- "GOOGLECALENDAR_CREATE_EVENT_send_updates",
- "GOOGLECALENDAR_UPDATE_EVENT_create_meeting_room",
- "GOOGLECALENDAR_UPDATE_EVENT_guestsCanSeeOtherGuests",
- "GOOGLECALENDAR_UPDATE_EVENT_guestsCanInviteOthers",
- "GOOGLECALENDAR_UPDATE_EVENT_guests_can_modify",
- "GOOGLECALENDAR_UPDATE_EVENT_send_updates",
- }
-
- inputs = [
- *ComposioBaseComponent._base_inputs,
- IntInput(
- name="GOOGLECALENDAR_LIST_CALENDARS_max_results",
- display_name="Max Results",
- info="Maximum number of entries returned on one result page. The page size can never be larger than 250 entries.", # noqa: E501
- show=False,
- value=10,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_LIST_CALENDARS_min_access_role",
- display_name="Min Access Role",
- info="The minimum access role for the user in the returned entries. Accepted values are 'owner' & 'reader'",
- show=False,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_LIST_CALENDARS_page_token",
- display_name="Page Token",
- info="Token specifying which result page to return.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="GOOGLECALENDAR_LIST_CALENDARS_show_deleted",
- display_name="Show Deleted",
- info="Whether to include deleted calendar list entries in the result.",
- show=False,
- value=False,
- advanced=True,
- ),
- BoolInput(
- name="GOOGLECALENDAR_LIST_CALENDARS_show_hidden",
- display_name="Show Hidden",
- info="Whether to show hidden entries.",
- show=False,
- value=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_LIST_CALENDARS_sync_token",
- display_name="Sync Token",
- info="Token obtained from the nextSyncToken field returned on the last page of results from the previous list request.", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_FIND_EVENT_calendar_id",
- display_name="Calendar Id",
- info="Identifier of the Google Calendar. Use 'primary' for the currently logged in user's primary calendar.", # noqa: E501
- show=False,
- value="primary",
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_FIND_EVENT_query",
- display_name="Query",
- info="Search term to find events that match these terms in the event's summary, description, location, attendee's displayName, attendee's email, organizer's displayName, organizer's email, etc if needed.", # noqa: E501
- show=False,
- ),
- IntInput(
- name="GOOGLECALENDAR_FIND_EVENT_max_results",
- display_name="Max Results",
- info="Maximum number of events returned on one result page. The page size can never be larger than 2500 events. The default value is 10.", # noqa: E501
- show=False,
- value=10,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_FIND_EVENT_order_by",
- display_name="Order By",
- info="The order of the events returned in the result. Acceptable values are 'startTime' and 'updated'.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="GOOGLECALENDAR_FIND_EVENT_show_deleted",
- display_name="Show Deleted",
- info="Whether to include deleted events (with status equals 'cancelled') in the result.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="GOOGLECALENDAR_FIND_EVENT_single_events",
- display_name="Single Events",
- info="Whether to expand recurring events into instances and only return single one-off events and instances of recurring events, but not the underlying recurring events themselves.", # noqa: E501
- show=False,
- value=True,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_FIND_EVENT_timeMax",
- display_name="Timemax",
- info="Upper bound (exclusive) for an event's start time to filter by. Accepts multiple formats:, 1. ISO format with timezone (e.g., 2024-12-06T13:00:00Z), 2. Comma-separated format (e.g., 2024,12,06,13,00,00), 3. Simple datetime format (e.g., 2024-12-06 13:00:00)", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_FIND_EVENT_timeMin",
- display_name="Timemin",
- info="Lower bound (exclusive) for an event's end time to filter by. Accepts multiple formats:, 1. ISO format with timezone (e.g., 2024-12-06T13:00:00Z), 2. Comma-separated format (e.g., 2024,12,06,13,00,00), 3. Simple datetime format (e.g., 2024-12-06 13:00:00)", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_FIND_EVENT_updated_min",
- display_name="Updated Min",
- info="Lower bound for an event's last modification time to filter by. Accepts multiple formats:, 1. ISO format with timezone (e.g., 2024-12-06T13:00:00Z), 2. Comma-separated format (e.g., 2024,12,06,13,00,00), 3. Simple datetime format (e.g., 2024-12-06 13:00:00)", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_FIND_EVENT_event_types",
- display_name="Event Types",
- info="List of event types to return. Possible values are: default, outOfOffice, focusTime, workingLocation.", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_FIND_EVENT_page_token",
- display_name="Page Token",
- info="Token specifying which result page to return. Optional.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_DUPLICATE_CALENDAR_summary",
- display_name="Summary/Title",
- info="Title of the calendar to be duplicated.",
- show=False,
- value="",
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_REMOVE_ATTENDEE_calendar_id",
- display_name="Calendar Id",
- info="ID of the Google Calendar",
- show=False,
- value="primary",
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_REMOVE_ATTENDEE_event_id",
- display_name="Event Id",
- info="ID of the event",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_REMOVE_ATTENDEE_attendee_email",
- display_name="Attendee Email",
- info="Email address of the attendee to be removed",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_GET_CALENDAR_calendar_id",
- display_name="Calendar Id",
- info="The ID of the Google Calendar that needs to be fetched. Default is 'primary'.",
- show=False,
- value="primary",
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_CREATE_EVENT_description",
- display_name="Description",
- info="Description of the event. Can contain HTML. Optional.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_CREATE_EVENT_eventType",
- display_name="Event Type",
- info="Type of the event, immutable post-creation. Currently, only 'default'",
- show=False,
- value="default",
- advanced=True,
- ),
- BoolInput(
- name="GOOGLECALENDAR_CREATE_EVENT_create_meeting_room",
- display_name="Create Meeting Room",
- info="If true, a Google Meet link is created and added to the event.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="GOOGLECALENDAR_CREATE_EVENT_guestsCanSeeOtherGuests",
- display_name="Guests Can See Other Guests",
- info="Whether attendees other than the organizer can see who the event's attendees are.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="GOOGLECALENDAR_CREATE_EVENT_guestsCanInviteOthers",
- display_name="Guests Can Invite Others",
- info="Whether attendees other than the organizer can invite others to the event.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_CREATE_EVENT_location",
- display_name="Location",
- info="Geographic location of the event as free-form text.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_CREATE_EVENT_summary",
- display_name="Summary/Title",
- info="Summary (title) of the event.",
- show=False,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_CREATE_EVENT_transparency",
- display_name="Event Transparency",
- info="'opaque' (busy) or 'transparent' (available).",
- show=False,
- value="opaque",
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_CREATE_EVENT_visibility",
- display_name="Event Visibility",
- info="Event visibility: 'default', 'public', 'private', or 'confidential'.",
- show=False,
- value="default",
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_CREATE_EVENT_timezone",
- display_name="Timezone",
- info="IANA timezone name (e.g., 'America/New_York'). Required if datetime is naive. If datetime includes timezone info (Z or offset), this field is optional and defaults to UTC.", # noqa: E501
- show=False,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_CREATE_EVENT_recurrence",
- display_name="Recurrence",
- info="List of RRULE, EXRULE, RDATE, EXDATE lines for recurring events.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="GOOGLECALENDAR_CREATE_EVENT_guests_can_modify",
- display_name="Guests Can Modify",
- info="If True, guests can modify the event.",
- show=False,
- value=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_CREATE_EVENT_attendees",
- display_name="Attendees",
- info="List of attendee emails (strings).",
- show=False,
- ),
- BoolInput(
- name="GOOGLECALENDAR_CREATE_EVENT_send_updates",
- display_name="Send Updates",
- info="Defaults to True. Whether to send updates to the attendees.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_CREATE_EVENT_start_datetime",
- display_name="Start Datetime",
- info="Naive date/time (YYYY-MM-DDTHH:MM:SS) with NO offsets or Z. e.g. '2025-01-16T13:00:00'",
- show=False,
- required=True,
- ),
- IntInput(
- name="GOOGLECALENDAR_CREATE_EVENT_event_duration_hour",
- display_name="Event Duration Hour",
- info="Number of hours (0-24).",
- show=False,
- value=0,
- advanced=True,
- ),
- IntInput(
- name="GOOGLECALENDAR_CREATE_EVENT_event_duration_minutes",
- display_name="Event Duration Minutes",
- info="Number of minutes (0-59).",
- show=False,
- value=30,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_CREATE_EVENT_calendar_id",
- display_name="Calendar Id",
- info="The ID of the Google Calendar. `primary` for interacting with the primary calendar.",
- show=False,
- value="primary",
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_DELETE_EVENT_calendar_id",
- display_name="Calendar Id",
- info="ID of the Google Calendar",
- show=False,
- value="primary",
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_DELETE_EVENT_event_id",
- display_name="Event Id",
- info="ID of the event to be deleted",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_FIND_FREE_SLOTS_time_min",
- display_name="Time Min",
- info="The start datetime of the interval for the query. Supports multiple formats:, 1. ISO format with timezone (e.g., 2024-12-06T13:00:00Z), 2. Comma-separated format (e.g., 2024,12,06,13,00,00), 3. Simple datetime format (e.g., 2024-12-06 13:00:00)", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_FIND_FREE_SLOTS_time_max",
- display_name="Time Max",
- info="The end datetime of the interval for the query. Supports multiple formats:, 1. ISO format with timezone (e.g., 2024-12-06T13:00:00Z), 2. Comma-separated format (e.g., 2024,12,06,13,00,00), 3. Simple datetime format (e.g., 2024-12-06 13:00:00)", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_FIND_FREE_SLOTS_timezone",
- display_name="Timezone",
- info="Time zone used in the response. Optional. The default is UTC.",
- show=False,
- value="UTC",
- advanced=True,
- ),
- IntInput(
- name="GOOGLECALENDAR_FIND_FREE_SLOTS_group_expansion_max",
- display_name="Group Expansion Max",
- info="Maximal number of calendar identifiers to be provided for a single group. Optional. An error is returned for a group with more members than this value. Maximum value is 100.", # noqa: E501
- show=False,
- value=100,
- advanced=True,
- ),
- IntInput(
- name="GOOGLECALENDAR_FIND_FREE_SLOTS_calendar_expansion_max",
- display_name="Calendar Expansion Max",
- info="Maximal number of calendars for which FreeBusy information is to be provided. Optional. Maximum value is 50.", # noqa: E501
- show=False,
- value=50,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_FIND_FREE_SLOTS_items",
- display_name="Items",
- info="List of calendars ids for which to fetch",
- show=False,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_QUICK_ADD_calendar_id",
- display_name="Calendar Id",
- info="Calendar identifier. To list calendars to retrieve calendar IDs use relevant tools. To access the primary calendar of the currently logged in user, use the 'primary' keyword.", # noqa: E501
- show=False,
- value="primary",
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_QUICK_ADD_text",
- display_name="Text",
- info="The text describing the event to be created.",
- show=False,
- value="",
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_QUICK_ADD_send_updates",
- display_name="Send Updates",
- info="Guests who should receive notifications about the creation of the new event. Accepted fields include 'all', 'none', 'externalOnly'", # noqa: E501
- show=False,
- value="none",
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_PATCH_CALENDAR_calendar_id",
- display_name="Calendar Id",
- info="The ID of the Google Calendar that needs to be updated.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_PATCH_CALENDAR_description",
- display_name="Description",
- info="Description of the calendar. Optional.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_PATCH_CALENDAR_location",
- display_name="Location",
- info="Geographic location of the calendar as free-form text.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_PATCH_CALENDAR_summary",
- display_name="Title/Summary",
- info="Title of the calendar. This field is required and cannot be left blank as per the Google Calendar API requirements.", # noqa: E501
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_PATCH_CALENDAR_timezone",
- display_name="Timezone",
- info="The time zone of the calendar. (Formatted as an IANA Time Zone Database name, e.g. 'Europe/Zurich').",
- show=False,
- advanced=True,
- ),
- IntInput(
- name="GOOGLECALENDAR_GET_CURRENT_DATE_TIME_timezone",
- display_name="Timezone",
- info="The timezone offset from UTC to retrieve current date and time, like for location of UTC+6, you give 6, for UTC -9, your give -9.", # noqa: E501
- show=False,
- value=0,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_description",
- display_name="Description",
- info="Description of the event. Can contain HTML. Optional.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_eventType",
- display_name="EventType",
- info="Type of the event, immutable post-creation. Currently, only 'default' and 'workingLocation' can be created.", # noqa: E501
- show=False,
- value="default",
- advanced=True,
- ),
- BoolInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_create_meeting_room",
- display_name="Create Meeting Room",
- info="If true, a Google Meet link is created and added to the event.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_guestsCanSeeOtherGuests",
- display_name="Guests Can See Other Guests",
- info="Whether attendees other than the organizer can see who the event's attendees are.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_guestsCanInviteOthers",
- display_name="Guests Can Invite Others",
- info="Whether attendees other than the organizer can invite others to the event.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_location",
- display_name="Location",
- info="Geographic location of the event as free-form text.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_summary",
- display_name="Summary/Title",
- info="Summary (title) of the event.",
- show=False,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_transparency",
- display_name="Event Transparency",
- info="'opaque' (busy) or 'transparent' (available).",
- show=False,
- value="opaque",
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_visibility",
- display_name="Event Visibility",
- info="Event visibility: 'default', 'public', 'private', or 'confidential'.",
- show=False,
- value="default",
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_timezone",
- display_name="Timezone",
- info="IANA timezone name (e.g., 'America/New_York'). Required if datetime is naive. If datetime includes timezone info (Z or offset), this field is optional and defaults to UTC.", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_recurrence",
- display_name="Recurrence",
- info="List of RRULE, EXRULE, RDATE, EXDATE lines for recurring events.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_guests_can_modify",
- display_name="Guests Can Modify",
- info="If True, guests can modify the event.",
- show=False,
- value=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_attendees",
- display_name="Attendees",
- info="List of attendee emails (strings).",
- show=False,
- ),
- BoolInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_send_updates",
- display_name="Send Updates",
- info="Defaults to True. Whether to send updates to the attendees.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_start_datetime",
- display_name="Start Datetime",
- info="Naive date/time (YYYY-MM-DDTHH:MM:SS) with NO offsets or Z. e.g. '2025-01-16T13:00:00'",
- show=False,
- required=True,
- ),
- IntInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_event_duration_hour",
- display_name="Event Duration Hour",
- info="Number of hours (0-24).",
- show=False,
- value=0,
- advanced=True,
- ),
- IntInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_event_duration_minutes",
- display_name="Event Duration Minutes",
- info="Number of minutes (0-59).",
- show=False,
- value=30,
- advanced=True,
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_calendar_id",
- display_name="Calendar Id",
- info="ID of the Google Calendar",
- show=False,
- value="primary",
- ),
- MessageTextInput(
- name="GOOGLECALENDAR_UPDATE_EVENT_event_id",
- display_name="Event Id",
- info="ID of the event to be updated",
- show=False,
- required=True,
- ),
- ]
-
- def execute_action(self):
- """Execute action and return response as Message."""
- toolset = self._build_wrapper()
-
- try:
- self._build_action_maps()
- # Get the display name from the action list
- display_name = self.action[0]["name"] if isinstance(self.action, list) and self.action else self.action
- # Use the display_to_key_map to get the action key
- action_key = self._display_to_key_map.get(display_name)
- if not action_key:
- msg = f"Invalid action: {display_name}"
- raise ValueError(msg)
-
- enum_name = getattr(Action, action_key)
- params = {}
- if action_key in self._actions_data:
- for field in self._actions_data[action_key]["action_fields"]:
- value = getattr(self, field)
-
- if value is None or value == "":
- continue
-
- if field in self._list_variables and value:
- value = [item.strip() for item in value.split(",")]
-
- if field in self._bool_variables:
- value = bool(value)
-
- param_name = field.replace(action_key + "_", "")
- params[param_name] = value
-
- result = toolset.execute_action(
- action=enum_name,
- params=params,
- )
- if not result.get("successful"):
- message_str = result.get("error", {})
- return {"error": message_str}
-
- result_data = result.get("data", [])
- if (
- len(result_data) != 1
- and not self._actions_data.get(action_key, {}).get("result_field")
- and self._actions_data.get(action_key, {}).get("get_result_field")
- ):
- msg = f"Expected a dict with a single key, got {len(result_data)} keys: {result_data.keys()}"
- raise ValueError(msg)
- if action_key == "GOOGLECALENDAR_GET_CURRENT_DATE_TIME":
- return result_data
- return result_data[next(iter(result_data))]
- except Exception as e:
- logger.error(f"Error executing action: {e}")
- display_name = self.action[0]["name"] if isinstance(self.action, list) and self.action else str(self.action)
- msg = f"Failed to execute {display_name}: {e!s}"
- raise ValueError(msg) from e
-
- def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:
- return super().update_build_config(build_config, field_value, field_name)
+ def set_default_tools(self):
+ """Set the default tools for Google Calendar component."""
diff --git a/src/backend/base/langflow/components/composio/googlemeet_composio.py b/src/backend/base/langflow/components/composio/googlemeet_composio.py
new file mode 100644
index 000000000..bf2ad0171
--- /dev/null
+++ b/src/backend/base/langflow/components/composio/googlemeet_composio.py
@@ -0,0 +1,11 @@
+from langflow.base.composio.composio_base import ComposioBaseComponent
+
+
+class ComposioGooglemeetAPIComponent(ComposioBaseComponent):
+ display_name: str = "Google Meet"
+ icon = "Googlemeet"
+ documentation: str = "https://docs.composio.dev"
+ app_name = "googlemeet"
+
+ def set_default_tools(self):
+ """Set the default tools for Google Calendar component."""
diff --git a/src/backend/base/langflow/components/composio/googletasks_composio.py b/src/backend/base/langflow/components/composio/googletasks_composio.py
new file mode 100644
index 000000000..7c380bc8b
--- /dev/null
+++ b/src/backend/base/langflow/components/composio/googletasks_composio.py
@@ -0,0 +1,8 @@
+from base.langflow.base.composio.composio_base import ComposioBaseComponent
+
+
+class ComposioGoogleTasksAPIComponent(ComposioBaseComponent):
+ display_name: str = "Google Tasks"
+ icon = "GoogleTasks"
+ documentation: str = "https://docs.composio.dev"
+ app_name = "googletasks"
diff --git a/src/backend/base/langflow/components/composio/linear_composio.py b/src/backend/base/langflow/components/composio/linear_composio.py
new file mode 100644
index 000000000..1aecd8005
--- /dev/null
+++ b/src/backend/base/langflow/components/composio/linear_composio.py
@@ -0,0 +1,11 @@
+from langflow.base.composio.composio_base import ComposioBaseComponent
+
+
+class ComposioLinearAPIComponent(ComposioBaseComponent):
+ display_name: str = "Linear"
+ icon = "Linear"
+ documentation: str = "https://docs.composio.dev"
+ app_name = "linear"
+
+ def set_default_tools(self):
+ """Set the default tools for Linear component."""
diff --git a/src/backend/base/langflow/components/composio/outlook_composio.py b/src/backend/base/langflow/components/composio/outlook_composio.py
index 7540b9330..7ef8f4c16 100644
--- a/src/backend/base/langflow/components/composio/outlook_composio.py
+++ b/src/backend/base/langflow/components/composio/outlook_composio.py
@@ -1,765 +1,11 @@
-import json
-from typing import Any
-
-from composio import Action
-
from langflow.base.composio.composio_base import ComposioBaseComponent
-from langflow.inputs import BoolInput, FileInput, IntInput, MessageTextInput
-from langflow.logging import logger
class ComposioOutlookAPIComponent(ComposioBaseComponent):
display_name: str = "Outlook"
- description: str = "Outlook API"
icon = "Outlook"
documentation: str = "https://docs.composio.dev"
app_name = "outlook"
- _actions_data: dict = {
- "OUTLOOK_OUTLOOK_REPLY_EMAIL": {
- "display_name": "Reply To Email",
- "action_fields": [
- "OUTLOOK_OUTLOOK_REPLY_EMAIL_user_id",
- "OUTLOOK_OUTLOOK_REPLY_EMAIL_message_id",
- "OUTLOOK_OUTLOOK_REPLY_EMAIL_comment",
- "OUTLOOK_OUTLOOK_REPLY_EMAIL_cc_emails",
- "OUTLOOK_OUTLOOK_REPLY_EMAIL_bcc_emails",
- ],
- "get_result_field": False,
- },
- "OUTLOOK_OUTLOOK_GET_PROFILE": {
- "display_name": "Get Profile",
- "action_fields": ["OUTLOOK_OUTLOOK_GET_PROFILE_user_id"],
- "get_result_field": True,
- "result_field": "response_data",
- },
- "OUTLOOK_OUTLOOK_SEND_EMAIL": {
- "display_name": "Send Email",
- "action_fields": [
- "OUTLOOK_OUTLOOK_SEND_EMAIL_user_id",
- "OUTLOOK_OUTLOOK_SEND_EMAIL_subject",
- "OUTLOOK_OUTLOOK_SEND_EMAIL_body",
- "OUTLOOK_OUTLOOK_SEND_EMAIL_to_email",
- "OUTLOOK_OUTLOOK_SEND_EMAIL_to_name",
- "OUTLOOK_OUTLOOK_SEND_EMAIL_cc_emails",
- "OUTLOOK_OUTLOOK_SEND_EMAIL_bcc_emails",
- "OUTLOOK_OUTLOOK_SEND_EMAIL_is_html",
- "OUTLOOK_OUTLOOK_SEND_EMAIL_save_to_sent_items",
- "OUTLOOK_OUTLOOK_SEND_EMAIL_attachment",
- ],
- "get_result_field": False,
- },
- "OUTLOOK_OUTLOOK_LIST_MESSAGES": {
- "display_name": "List Messages",
- "action_fields": [
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_user_id",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_folder",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_top",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_skip",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_is_read",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_importance",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_subject",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_received_date_time_gt",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_subject_startswith",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_subject_endswith",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_subject_contains",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_received_date_time_ge",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_received_date_time_lt",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_received_date_time_le",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_from_address",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_has_attachments",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_body_preview_contains",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_sent_date_time_gt",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_sent_date_time_lt",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_categories",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_select",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_orderby",
- ],
- "get_result_field": True,
- "result_field": "value",
- },
- "OUTLOOK_OUTLOOK_LIST_EVENTS": {
- "display_name": "List Events",
- "action_fields": [
- "OUTLOOK_OUTLOOK_LIST_EVENTS_user_id",
- "OUTLOOK_OUTLOOK_LIST_EVENTS_top",
- "OUTLOOK_OUTLOOK_LIST_EVENTS_skip",
- "OUTLOOK_OUTLOOK_LIST_EVENTS_filter",
- "OUTLOOK_OUTLOOK_LIST_EVENTS_select",
- "OUTLOOK_OUTLOOK_LIST_EVENTS_orderby",
- "OUTLOOK_OUTLOOK_LIST_EVENTS_timezone",
- ],
- "get_result_field": True,
- "result_field": "value",
- },
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT": {
- "display_name": "Create Calendar Event",
- "action_fields": [
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_user_id",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_subject",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_body",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_is_html",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_start_datetime",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_end_datetime",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_time_zone",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_is_online_meeting",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_online_meeting_provider",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_attendees_info",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_location",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_show_as",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_categories",
- ],
- "get_result_field": True,
- "result_field": "response_data",
- },
- "OUTLOOK_OUTLOOK_GET_EVENT": {
- "display_name": "Get Calendar Event",
- "action_fields": ["OUTLOOK_OUTLOOK_GET_EVENT_user_id", "OUTLOOK_OUTLOOK_GET_EVENT_event_id"],
- "get_result_field": True,
- "result_field": "response_data",
- },
- "OUTLOOK_OUTLOOK_CREATE_DRAFT": {
- "display_name": "Create Email Draft",
- "action_fields": [
- "OUTLOOK_OUTLOOK_CREATE_DRAFT_subject",
- "OUTLOOK_OUTLOOK_CREATE_DRAFT_body",
- "OUTLOOK_OUTLOOK_CREATE_DRAFT_to_recipients",
- "OUTLOOK_OUTLOOK_CREATE_DRAFT_cc_recipients",
- "OUTLOOK_OUTLOOK_CREATE_DRAFT_bcc_recipients",
- "OUTLOOK_OUTLOOK_CREATE_DRAFT_is_html",
- "OUTLOOK_OUTLOOK_CREATE_DRAFT_attachment",
- ],
- "get_result_field": True,
- "result_field": "response_data",
- },
- }
-
- _all_fields = {field for action_data in _actions_data.values() for field in action_data["action_fields"]}
-
- _bool_variables = {
- "OUTLOOK_OUTLOOK_SEND_EMAIL_is_html",
- "OUTLOOK_OUTLOOK_SEND_EMAIL_save_to_sent_items",
- "OUTLOOK_OUTLOOK_CREATE_DRAFT_is_html",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_is_html",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_is_online_meeting",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_is_read",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_has_attachments",
- }
-
- _list_variables = {
- "OUTLOOK_OUTLOOK_LIST_EVENTS_select",
- "OUTLOOK_OUTLOOK_LIST_EVENTS_orderby",
- "OUTLOOK_OUTLOOK_SEND_EMAIL_cc_emails",
- "OUTLOOK_OUTLOOK_SEND_EMAIL_bcc_emails",
- "OUTLOOK_OUTLOOK_CREATE_DRAFT_to_recipients",
- "OUTLOOK_OUTLOOK_CREATE_DRAFT_cc_recipients",
- "OUTLOOK_OUTLOOK_CREATE_DRAFT_bcc_recipients",
- "OUTLOOK_OUTLOOK_REPLY_EMAIL_cc_emails",
- "OUTLOOK_OUTLOOK_REPLY_EMAIL_bcc_emails",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_attendees_info",
- "OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_categories",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_categories",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_select",
- "OUTLOOK_OUTLOOK_LIST_MESSAGES_orderby",
- }
-
- inputs = [
- *ComposioBaseComponent._base_inputs,
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_EVENTS_user_id",
- display_name="User Id",
- info="The target user's email address or 'me' for the authenticated user.",
- show=False,
- value="me",
- advanced=True,
- ),
- IntInput(
- name="OUTLOOK_OUTLOOK_LIST_EVENTS_top",
- display_name="Max Results",
- info="The maximum number of events to return per request.",
- show=False,
- value=10,
- ),
- IntInput(
- name="OUTLOOK_OUTLOOK_LIST_EVENTS_skip",
- display_name="Skip",
- info="The number of events to skip before starting to collect results.",
- show=False,
- value=0,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_EVENTS_filter",
- display_name="Filter",
- info="OData query string to filter results. Example: start/dateTime ge '2024-01-01T00:00:00'",
- show=False,
- value="",
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_EVENTS_select",
- display_name="Select",
- info="List of properties to include in the response comma separated.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_EVENTS_orderby",
- display_name="Orderby",
- info="Properties to sort results by comma separated.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_EVENTS_timezone",
- display_name="Timezone",
- info="The timezone for event start and end times in the response.",
- show=False,
- value="UTC",
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_SEND_EMAIL_user_id",
- display_name="User Id",
- info="The user's email address or 'me' for the authenticated user.",
- show=False,
- value="me",
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_SEND_EMAIL_subject",
- display_name="Subject",
- info="Subject of the email",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_SEND_EMAIL_body",
- display_name="Body",
- info="Body content of the email. Can be plain text or HTML based on is_html flag.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_SEND_EMAIL_to_email",
- display_name="Recipient Email",
- info="Recipient email address",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_SEND_EMAIL_to_name",
- display_name="To Name",
- info="Recipient display name",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_SEND_EMAIL_cc_emails",
- display_name="CC",
- info="List of CC recipient email addresses comma separated",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_SEND_EMAIL_bcc_emails",
- display_name="BCC",
- info="List of BCC recipient email addresses comma separated",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="OUTLOOK_OUTLOOK_SEND_EMAIL_is_html",
- display_name="Is HTML",
- info="Set to True if the body content is HTML formatted",
- show=False,
- value=False,
- advanced=True,
- ),
- BoolInput(
- name="OUTLOOK_OUTLOOK_SEND_EMAIL_save_to_sent_items",
- display_name="Save To Sent Items",
- info="Whether to save the sent email to Sent Items folder.",
- show=False,
- value=True,
- advanced=True,
- ),
- FileInput(
- name="OUTLOOK_OUTLOOK_SEND_EMAIL_attachment",
- display_name="Attachment",
- file_types=[
- "csv",
- "txt",
- "doc",
- "docx",
- "xls",
- "xlsx",
- "pdf",
- "png",
- "jpg",
- "jpeg",
- "gif",
- "zip",
- "rar",
- "ppt",
- "pptx",
- ],
- info="Add an attachment",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CREATE_DRAFT_subject",
- display_name="Subject",
- info="Subject of the email",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CREATE_DRAFT_body",
- display_name="Body",
- info="Body content of the email. Can be plain text or HTML based on is_html flag",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CREATE_DRAFT_to_recipients",
- display_name="Recipient Email",
- info="List of recipient email addresses comma separated",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CREATE_DRAFT_cc_recipients",
- display_name="Cc Recipients",
- info="List of CC recipient email addresses",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CREATE_DRAFT_bcc_recipients",
- display_name="BCC",
- info="List of BCC recipient email addresses comma separated",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="OUTLOOK_OUTLOOK_CREATE_DRAFT_is_html",
- display_name="Is HTML",
- info="Set to True if the body content is HTML formatted",
- show=False,
- value=False,
- advanced=True,
- ),
- FileInput(
- name="OUTLOOK_OUTLOOK_CREATE_DRAFT_attachment",
- display_name="Attachment",
- file_types=[
- "csv",
- "txt",
- "doc",
- "docx",
- "xls",
- "xlsx",
- "pdf",
- "png",
- "jpg",
- "jpeg",
- "gif",
- "zip",
- "rar",
- "ppt",
- "pptx",
- ],
- info="Add an attachment",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_GET_PROFILE_user_id",
- display_name="User Id",
- info="The user's email address or 'me' for the authenticated user.",
- show=False,
- value="me",
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_REPLY_EMAIL_user_id",
- display_name="User Id",
- info="The user's email address or 'me' for the authenticated user.",
- show=False,
- value="me",
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_REPLY_EMAIL_message_id",
- display_name="Message Id",
- info="The ID of the message to reply to. Can be obtained from OUTLOOK_LIST_MESSAGES action.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_REPLY_EMAIL_comment",
- display_name="Comment",
- info="Comment to include in the reply. Must be plain text.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_REPLY_EMAIL_cc_emails",
- display_name="CC",
- info="List of CC recipient email addresses comma separated",
- show=False,
- value=[],
- is_list=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_REPLY_EMAIL_bcc_emails",
- display_name="BCC",
- info="List of BCC recipient email addresses comma separated",
- show=False,
- value=[],
- is_list=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_user_id",
- display_name="User Id",
- info="The user's email address or 'me' for the authenticated user.",
- show=False,
- value="me",
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_subject",
- display_name="Subject",
- info="Subject of the event. Example: 'Team Meeting'.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_body",
- display_name="Body",
- info="Body content of the event. Can be plain text or HTML.",
- show=False,
- required=True,
- ),
- BoolInput(
- name="OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_is_html",
- display_name="Is Html",
- info="Set to True if the body content should be interpreted as HTML.",
- show=False,
- value=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_start_datetime",
- display_name="Start Datetime",
- info="Start date/time (ISO 8601). Example: '2025-01-03T10:00:00Z'.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_end_datetime",
- display_name="End Datetime",
- info="End date/time (ISO 8601). Example: '2025-01-03T11:00:00Z'.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_time_zone",
- display_name="Time Zone",
- info="Time zone (e.g., 'UTC' or 'America/Los_Angeles').",
- show=False,
- required=True,
- ),
- BoolInput(
- name="OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_is_online_meeting",
- display_name="Is Online Meeting",
- info="Set to True to make this an online meeting and generate a Teams URL.",
- show=False,
- value=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_online_meeting_provider",
- display_name="Online Meeting Provider",
- info="The online meeting service provider. Currently only supports 'teamsForBusiness'.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_attendees_info",
- display_name="Attendees",
- info="A list of attendee information. Only email is required for each attendee., Example: [{ 'email': 'team@example.com', 'name': 'Team', 'type': 'required' }, { 'email': 'other@example.com', 'type': 'optional' }, { 'email': 'other2@example.com' }]", # noqa: E501
- show=False,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_location",
- display_name="Location",
- info="Location of the event (e.g., 'Conference Room').",
- show=False,
- value="",
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_show_as",
- display_name="Show As",
- info="Status of the event: 'free', 'tentative', 'busy', or 'oof'.",
- show=False,
- value="busy",
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_CALENDAR_CREATE_EVENT_categories",
- display_name="Categories",
- info="List of categories associated with the event comma separated.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_GET_EVENT_user_id",
- display_name="User Id",
- info="The user's email address or 'me' for the authenticated user.",
- show=False,
- value="me",
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_GET_EVENT_event_id",
- display_name="Event Id",
- info="The ID of the calendar event to retrieve.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_user_id",
- display_name="User Id",
- info="The target user's email address or 'me' for the authenticated user. For delegated access scenarios, this should be the email of the shared mailbox or delegated user.", # noqa: E501
- show=False,
- value="me",
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_folder",
- display_name="Folder",
- info="",
- show=False,
- value="inbox",
- advanced=True,
- ),
- IntInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_top",
- display_name="Max Results",
- info="The maximum number of messages to return per request. Must be a positive integer between 1 and 1000.",
- show=False,
- value=10,
- ),
- IntInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_skip",
- display_name="Skip",
- info="The number of messages to skip before starting to collect results. Use for paginated responses.",
- show=False,
- value=0,
- advanced=True,
- ),
- BoolInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_is_read",
- display_name="Is Read",
- info="Filter messages by read status. If set to False, only unread messages will be returned.",
- show=False,
- value=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_importance",
- display_name="Importance",
- info="Filter messages by importance. For example, 'high', 'normal', or 'low'.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_subject",
- display_name="Subject",
- info="Filter messages by subject (exact match).",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_received_date_time_gt",
- display_name="Received Date Time Gt",
- info="Filter messages with a receivedDateTime greater than the specified value. Example: '2023-01-01T00:00:00Z'.", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_subject_startswith",
- display_name="Subject Startswith",
- info="Filter messages where the subject starts with the specified string.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_subject_endswith",
- display_name="Subject Endswith",
- info="Filter messages where the subject ends with the specified string.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_subject_contains",
- display_name="Subject Contains",
- info="Filter messages where the subject contains the specified substring.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_received_date_time_ge",
- display_name="Received Date Time Ge",
- info="Filter messages with a receivedDateTime greater than or equal to the specified value.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_received_date_time_lt",
- display_name="Received Date Time Lt",
- info="Filter messages with a receivedDateTime less than the specified value.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_received_date_time_le",
- display_name="Received Date Time Le",
- info="Filter messages with a receivedDateTime less than or equal to the specified value.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_from_address",
- display_name="From Address",
- info="Filter messages by the sender's email address. Uses equality check on from/emailAddress/address.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_has_attachments",
- display_name="Has Attachments",
- info="Filter messages by whether they have attachments.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_body_preview_contains",
- display_name="Body Preview Contains",
- info="Filter messages where the bodyPreview contains the specified substring.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_sent_date_time_gt",
- display_name="Sent Date Time Gt",
- info="Filter messages with a sentDateTime greater than the specified value.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_sent_date_time_lt",
- display_name="Sent Date Time Lt",
- info="Filter messages with a sentDateTime less than the specified value.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_categories",
- display_name="Categories",
- info="Filter messages by categories. Matches if the message contains any of the specified categories.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_select",
- display_name="Select",
- info="A list of properties to include in the response comma separated. Common properties: 'subject', 'from', 'toRecipients', 'receivedDateTime'.", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="OUTLOOK_OUTLOOK_LIST_MESSAGES_orderby",
- display_name="Orderby",
- info="Specify properties to sort results by. For example, 'receivedDateTime desc' for newest messages first.", # noqa: E501
- show=False,
- advanced=True,
- ),
- ]
-
- def execute_action(self):
- """Execute action and return response as Message."""
- toolset = self._build_wrapper()
-
- try:
- self._build_action_maps()
- display_name = self.action[0]["name"] if isinstance(self.action, list) and self.action else self.action
- action_key = self._display_to_key_map.get(display_name)
- if not action_key:
- msg = f"Invalid action: {display_name}"
- raise ValueError(msg)
-
- enum_name = getattr(Action, action_key)
- params = {}
- if action_key in self._actions_data:
- for field in self._actions_data[action_key]["action_fields"]:
- value = getattr(self, field)
-
- if value is None or value == "":
- continue
-
- if field in self._list_variables and value:
- value = [item.strip() for item in value.split(",")]
-
- if field in self._bool_variables:
- value = bool(value)
-
- param_name = field.replace(action_key + "_", "")
-
- params[param_name] = value
-
- result = toolset.execute_action(
- action=enum_name,
- params=params,
- )
- if not result.get("successful"):
- error_data = result.get("data", {})
- error_message = error_data.get("message", str(result.get("error", "Unknown Error")))
-
- if isinstance(error_message, str):
- try:
- error_obj = json.loads(error_message).get("error", {})
- error_obj["status_code"] = error_data.get("status_code", 400)
- return error_obj # noqa: TRY300
- except json.JSONDecodeError:
- return {"error": error_message, "status_code": error_data.get("status_code", 400)}
-
- return error_message
-
- result_data = result.get("data", {})
- actions_data = self._actions_data.get(action_key, {})
- if actions_data.get("get_result_field") and actions_data.get("result_field"):
- response_data = result_data.get("response_data", {})
- if response_data and actions_data.get("result_field") in response_data:
- result_data = response_data.get(actions_data.get("result_field"), result.get("data", []))
- else:
- result_data = result_data.get(actions_data.get("result_field"), result.get("data", []))
- if len(result_data) != 1 and not actions_data.get("result_field") and actions_data.get("get_result_field"):
- msg = f"Expected a dict with a single key, got {len(result_data)} keys: {result_data.keys()}"
- raise ValueError(msg)
- return result_data # noqa: TRY300
- except Exception as e:
- logger.error(f"Error executing action: {e}")
- display_name = self.action[0]["name"] if isinstance(self.action, list) and self.action else str(self.action)
- msg = f"Failed to execute {display_name}: {e!s}"
- raise ValueError(msg) from e
-
- def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:
- return super().update_build_config(build_config, field_value, field_name)
-
def set_default_tools(self):
- self._default_tools = {
- self.sanitize_action_name("OUTLOOK_OUTLOOK_SEND_EMAIL").replace(" ", "-"),
- self.sanitize_action_name("OUTLOOK_OUTLOOK_LIST_MESSAGES").replace(" ", "-"),
- }
+ """Set the default tools for Gmail component."""
diff --git a/src/backend/base/langflow/components/composio/reddit_composio.py b/src/backend/base/langflow/components/composio/reddit_composio.py
new file mode 100644
index 000000000..4fec4cd13
--- /dev/null
+++ b/src/backend/base/langflow/components/composio/reddit_composio.py
@@ -0,0 +1,11 @@
+from langflow.base.composio.composio_base import ComposioBaseComponent
+
+
+class ComposioRedditAPIComponent(ComposioBaseComponent):
+ display_name: str = "Reddit"
+ icon = "Reddit"
+ documentation: str = "https://docs.composio.dev"
+ app_name = "reddit"
+
+ def set_default_tools(self):
+ """Set the default tools for Reddit component."""
diff --git a/src/backend/base/langflow/components/composio/slack_composio.py b/src/backend/base/langflow/components/composio/slack_composio.py
index 80d45ebf3..db7b6162c 100644
--- a/src/backend/base/langflow/components/composio/slack_composio.py
+++ b/src/backend/base/langflow/components/composio/slack_composio.py
@@ -1,586 +1,11 @@
-from typing import Any
-
-from composio import Action
-
from langflow.base.composio.composio_base import ComposioBaseComponent
-from langflow.inputs import (
- BoolInput,
- IntInput,
- MessageTextInput,
-)
-from langflow.logging import logger
class ComposioSlackAPIComponent(ComposioBaseComponent):
display_name: str = "Slack"
- description: str = "Slack API"
icon = "Slack"
documentation: str = "https://docs.composio.dev"
app_name = "slack"
- _actions_data: dict = {
- "SLACK_LIST_ALL_SLACK_TEAM_USERS_WITH_PAGINATION": {
- "display_name": "List Users",
- "action_fields": [
- "SLACK_LIST_ALL_SLACK_TEAM_USERS_WITH_PAGINATION_limit",
- "SLACK_LIST_ALL_SLACK_TEAM_USERS_WITH_PAGINATION_cursor",
- "SLACK_LIST_ALL_SLACK_TEAM_USERS_WITH_PAGINATION_include_locale",
- ],
- },
- "SLACK_LIST_ALL_SLACK_TEAM_CHANNELS_WITH_VARIOUS_FILTERS": {
- "display_name": "List Channels",
- "action_fields": [
- "SLACK_LIST_ALL_SLACK_TEAM_CHANNELS_WITH_VARIOUS_FILTERS_exclude_archived",
- "SLACK_LIST_ALL_SLACK_TEAM_CHANNELS_WITH_VARIOUS_FILTERS_types",
- "SLACK_LIST_ALL_SLACK_TEAM_CHANNELS_WITH_VARIOUS_FILTERS_limit",
- "SLACK_LIST_ALL_SLACK_TEAM_CHANNELS_WITH_VARIOUS_FILTERS_cursor",
- ],
- },
- "SLACK_UPDATES_A_SLACK_MESSAGE": {
- "display_name": "Update Slack Chat Message",
- "action_fields": [
- "SLACK_UPDATES_A_SLACK_MESSAGE_as_user",
- "SLACK_UPDATES_A_SLACK_MESSAGE_attachments",
- "SLACK_UPDATES_A_SLACK_MESSAGE_blocks",
- "SLACK_UPDATES_A_SLACK_MESSAGE_channel",
- "SLACK_UPDATES_A_SLACK_MESSAGE_link_names",
- "SLACK_UPDATES_A_SLACK_MESSAGE_parse",
- "SLACK_UPDATES_A_SLACK_MESSAGE_text",
- "SLACK_UPDATES_A_SLACK_MESSAGE_ts",
- ],
- },
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL": {
- "display_name": "Post Message To Channel",
- "action_fields": [
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_as_user",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_attachments",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_blocks",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_channel",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_icon_emoji",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_icon_url",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_link_names",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_mrkdwn",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_parse",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_reply_broadcast",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_text",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_thread_ts",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_unfurl_links",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_unfurl_media",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_username",
- ],
- },
- "SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY": {
- "display_name": "Search Messages Endpoint",
- "action_fields": [
- "SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY_count",
- "SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY_highlight",
- "SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY_page",
- "SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY_query",
- "SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY_sort",
- "SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY_sort_dir",
- ],
- },
- "SLACK_FETCH_CONVERSATION_HISTORY": {
- "display_name": "Retrieve conversation history",
- "action_fields": [
- "SLACK_FETCH_CONVERSATION_HISTORY_channel",
- "SLACK_FETCH_CONVERSATION_HISTORY_latest",
- "SLACK_FETCH_CONVERSATION_HISTORY_oldest",
- "SLACK_FETCH_CONVERSATION_HISTORY_inclusive",
- "SLACK_FETCH_CONVERSATION_HISTORY_limit",
- "SLACK_FETCH_CONVERSATION_HISTORY_cursor",
- ],
- },
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME": {
- "display_name": "Schedule Message In Chat",
- "action_fields": [
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_as_user",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_attachments",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_blocks",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_channel",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_link_names",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_parse",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_post_at",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_reply_broadcast",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_text",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_thread_ts",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_unfurl_links",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_unfurl_media",
- ],
- },
- "SLACK_CREATE_A_REMINDER": {
- "display_name": "Add Reminder For User",
- "action_fields": [
- "SLACK_CREATE_A_REMINDER_text",
- "SLACK_CREATE_A_REMINDER_time",
- "SLACK_CREATE_A_REMINDER_user",
- ],
- },
- }
-
- _all_fields = {field for action_data in _actions_data.values() for field in action_data["action_fields"]}
- _bool_variables = {
- "SLACK_LIST_ALL_SLACK_TEAM_USERS_WITH_PAGINATION_include_locale",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_as_user",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_link_names",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_mrkdwn",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_reply_broadcast",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_unfurl_links",
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_unfurl_media",
- "SLACK_FETCH_CONVERSATION_HISTORY_inclusive",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_as_user",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_link_names",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_reply_broadcast",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_unfurl_links",
- "SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_unfurl_media",
- "SLACK_LIST_ALL_SLACK_TEAM_CHANNELS_WITH_VARIOUS_FILTERS_exclude_archived",
- "SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY_highlight",
- }
-
- inputs = [
- *ComposioBaseComponent._base_inputs,
- IntInput(
- name="SLACK_LIST_ALL_SLACK_TEAM_USERS_WITH_PAGINATION_limit",
- display_name="Limit",
- info="The maximum number of items to return. Fewer than the requested number of items may be returned, even if the end of the users list hasn't been reached. Providing no `limit` value will result in Slack attempting to deliver you the entire result set. If the collection is too large you may experience `limit_required` or HTTP 500 errors. ", # noqa: E501
- show=False,
- value=1,
- ),
- MessageTextInput(
- name="SLACK_LIST_ALL_SLACK_TEAM_USERS_WITH_PAGINATION_cursor",
- display_name="Cursor",
- info="Paginate through collections of data by setting the `cursor` parameter to a `next_cursor` attribute returned by a previous request's `response_metadata`. Default value fetches the first `page` of the collection", # noqa: E501
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="SLACK_LIST_ALL_SLACK_TEAM_USERS_WITH_PAGINATION_include_locale",
- display_name="Include Locale",
- info="Set this to `true` to receive the locale for users. Defaults to `false`",
- show=False,
- ),
- BoolInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_as_user",
- display_name="As User",
- info="Pass true to post the message as the authed user, instead of as a bot. Defaults to false",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_attachments",
- display_name="Attachments",
- info="A JSON-based array of structured attachments, presented as a URL-encoded string. ",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_blocks",
- display_name="Blocks",
- info="A JSON-based array of structured blocks, presented as a URL-encoded string. ",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_channel",
- display_name="Channel",
- info="Channel, private group, or IM channel to send message to. Can be an encoded ID, or a name ",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_icon_emoji",
- display_name="Icon Emoji",
- info="Emoji to use as the icon for this message. Overrides `icon_url`. Must be used in conjunction with `as_user` set to `false`, otherwise ignored", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_icon_url",
- display_name="Icon Url",
- info="URL to an image to use as the icon for this message. Must be used in conjunction with `as_user` set to false, otherwise ignored", # noqa: E501
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_link_names",
- display_name="Link Names",
- info="Find and link channel names and usernames.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_mrkdwn",
- display_name="Mrkdwn",
- info="Disable Slack markup parsing by setting to `false`. Enabled by default.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_parse",
- display_name="Parse",
- info="Change how messages are treated. Defaults to `none` ",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_reply_broadcast",
- display_name="Reply Broadcast",
- info="Used in conjunction with `thread_ts` and indicates whether reply should be made visible to everyone in the channel or conversation. Defaults to `false`. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_text",
- display_name="Text",
- info="How this field works and whether it is required depends on other fields you use in your API call",
- show=False,
- ),
- MessageTextInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_thread_ts",
- display_name="Thread Ts",
- info="Provide another message's `ts` value to make this message a reply. Avoid using a reply's `ts` value; use its parent instead. ", # noqa: E501
- show=False,
- ),
- BoolInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_unfurl_links",
- display_name="Unfurl Links",
- info="Pass true to enable unfurling of primarily text-based content.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_unfurl_media",
- display_name="Unfurl Media",
- info="Pass false to disable unfurling of media content.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL_username",
- display_name="Username",
- info="Set your bot's user name. Must be used in conjunction with `as_user` set to false, otherwise ignored",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_UPDATES_A_SLACK_MESSAGE_as_user",
- display_name="As User",
- info="Pass true to update the message as the authed user",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_UPDATES_A_SLACK_MESSAGE_attachments",
- display_name="Attachments",
- info="A JSON-based array of structured attachments, presented as a URL-encoded string. This field is required when not presenting `text`. If you don't include this field, the message's previous `attachments` will be retained. To remove previous `attachments`, include an empty array for this field. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_UPDATES_A_SLACK_MESSAGE_blocks",
- display_name="Blocks",
- info="A JSON-based array of structured blocks, presented as a URL-encoded string. If you don't include this field, the message's previous `blocks` will be retained. To remove previous `blocks`, include an empty array for this field. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_UPDATES_A_SLACK_MESSAGE_channel",
- display_name="Channel ID",
- info="Channel ID containing the message to be updated.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="SLACK_UPDATES_A_SLACK_MESSAGE_link_names",
- display_name="Link Names",
- info="Find and link channel names and usernames. Defaults to `none`. If you do not specify a value for this field, the original value set for the message will be overwritten with the default, `none`. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_UPDATES_A_SLACK_MESSAGE_parse",
- display_name="Parse",
- info="Change how messages are treated. Defaults to `client`, unlike `chat.postMessage`. Accepts either `none` or `full`. If you do not specify a value for this field, the original value set for the message will be overwritten with the default, `client`. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_UPDATES_A_SLACK_MESSAGE_text",
- display_name="Text",
- info="New text for the message, using the default formatting rules. It's not required when presenting `blocks` or `attachments`. ", # noqa: E501
- show=False,
- ),
- MessageTextInput(
- name="SLACK_UPDATES_A_SLACK_MESSAGE_ts",
- display_name="Ts",
- info="Timestamp of the message to be updated.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="SLACK_FETCH_CONVERSATION_HISTORY_channel",
- display_name="Channel ID",
- info="Channel ID to fetch history for.",
- show=False,
- ),
- IntInput(
- name="SLACK_FETCH_CONVERSATION_HISTORY_latest",
- display_name="Latest",
- info="End of time range of messages to include in results.",
- show=False,
- advanced=True,
- ),
- IntInput(
- name="SLACK_FETCH_CONVERSATION_HISTORY_oldest",
- display_name="Oldest",
- info="Start of time range of messages to include in results.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="SLACK_FETCH_CONVERSATION_HISTORY_inclusive",
- display_name="Inclusive",
- info="Include messages with latest or oldest timestamp in results only when either timestamp is specified. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- IntInput(
- name="SLACK_FETCH_CONVERSATION_HISTORY_limit",
- display_name="Limit",
- info="The maximum number of items to return. Fewer than the requested number of items may be returned, even if the end of the users list hasn't been reached. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_FETCH_CONVERSATION_HISTORY_cursor",
- display_name="Cursor",
- info="Paginate through collections of data by setting the `cursor` parameter to a `next_cursor` attribute returned by a previous request's `response_metadata`. Default value fetches the first 'page' of the collection. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_as_user",
- display_name="As User",
- info="Pass true to post the message as the authed user, instead of as a bot. Defaults to false",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_attachments",
- display_name="Attachments",
- info="A JSON-based array of structured attachments, presented as a URL-encoded string. ",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_blocks",
- display_name="Blocks",
- info="A JSON-based array of structured blocks, presented as a URL-encoded string. ",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_channel",
- display_name="Channel",
- info="Channel, private group, or DM channel to send message to. Can be an encoded ID, or a name",
- show=False,
- required=True,
- ),
- BoolInput(
- name="SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_link_names",
- display_name="Link Names",
- info="Find and link channel names and usernames.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_parse",
- display_name="Parse",
- info="Change how messages are treated. Defaults to `none`",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_post_at",
- display_name="Post At",
- info="Unix EPOCH timestamp of time in future to send the message.",
- show=False,
- ),
- BoolInput(
- name="SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_reply_broadcast",
- display_name="Reply Broadcast",
- info="Used in conjunction with `thread_ts` and indicates whether reply should be made visible to everyone in the channel or conversation. Defaults to `false`. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_text",
- display_name="Text",
- info="How this field works and whether it is required depends on other fields you use in your API call",
- show=False,
- ),
- IntInput(
- name="SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_thread_ts",
- display_name="Thread Ts",
- info="Provide another message's `ts` value to make this message a reply. Avoid using a reply's `ts` value; use its parent instead. ", # noqa: E501
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_unfurl_links",
- display_name="Unfurl Links",
- info="Pass true to enable unfurling of primarily text-based content.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="SLACK_SCHEDULES_A_MESSAGE_TO_A_CHANNEL_AT_A_SPECIFIED_TIME_unfurl_media",
- display_name="Unfurl Media",
- info="Pass false to disable unfurling of media content.",
- show=False,
- advanced=True,
- ),
- BoolInput(
- name="SLACK_LIST_ALL_SLACK_TEAM_CHANNELS_WITH_VARIOUS_FILTERS_exclude_archived",
- display_name="Exclude Archived",
- info="Set to `true` to exclude archived channels from the list",
- show=False,
- ),
- MessageTextInput(
- name="SLACK_LIST_ALL_SLACK_TEAM_CHANNELS_WITH_VARIOUS_FILTERS_types",
- display_name="Types",
- info="Mix and match channel types by providing a comma-separated list of any combination of `public_channel`, `private_channel`, `mpim`, `im` ", # noqa: E501
- show=False,
- ),
- IntInput(
- name="SLACK_LIST_ALL_SLACK_TEAM_CHANNELS_WITH_VARIOUS_FILTERS_limit",
- display_name="Limit",
- info="The maximum number of items to return. Fewer than the requested number of items may be returned, even if the end of the list hasn't been reached. Must be an integer no larger than 1000. ", # noqa: E501
- show=False,
- value=1,
- ),
- MessageTextInput(
- name="SLACK_LIST_ALL_SLACK_TEAM_CHANNELS_WITH_VARIOUS_FILTERS_cursor",
- display_name="Cursor",
- info="Paginate through collections of data by setting the `cursor` parameter to a `next_cursor` attribute returned by a previous request's `response_metadata`. Default value fetches the first 'page' of the collection", # noqa: E501
- show=False,
- advanced=True,
- ),
- IntInput(
- name="SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY_count",
- display_name="Count",
- info="Pass the number of results you want per 'page'. Maximum of `100`.",
- show=False,
- value=1,
- advanced=True,
- ),
- BoolInput(
- name="SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY_highlight",
- display_name="Highlight",
- info="Pass a value of `true` to enable query highlight markers",
- show=False,
- advanced=True,
- ),
- IntInput(
- name="SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY_page",
- display_name="Page",
- info="Page",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY_query",
- display_name="Query",
- info="Search query.",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY_sort",
- display_name="Sort",
- info="Return matches sorted by either `score` or `timestamp`.",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY_sort_dir",
- display_name="Sort Dir",
- info="Change sort direction to ascending (`asc`) or descending (`desc`).",
- show=False,
- advanced=True,
- ),
- MessageTextInput(
- name="SLACK_CREATE_A_REMINDER_text",
- display_name="Text",
- info="The content of the reminder",
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="SLACK_CREATE_A_REMINDER_time",
- display_name="Time",
- info="When this reminder should happen: the Unix timestamp (up to five years from now), the number of seconds until the reminder (if within 24 hours), or a natural language description (Ex. 'in 15 minutes,' or 'every Thursday') ", # noqa: E501
- show=False,
- required=True,
- ),
- MessageTextInput(
- name="SLACK_CREATE_A_REMINDER_user",
- display_name="User",
- info="The user who will receive the reminder. If no user is specified, the reminder will go to user who created it. ", # noqa: E501
- show=False,
- ),
- ]
-
- def execute_action(self):
- """Execute action and return response as Message."""
- toolset = self._build_wrapper()
-
- try:
- self._build_action_maps()
- display_name = self.action[0]["name"] if isinstance(self.action, list) and self.action else self.action
- action_key = self._display_to_key_map.get(display_name)
- if not action_key:
- msg = f"Invalid action: {display_name}"
- raise ValueError(msg)
-
- enum_name = getattr(Action, action_key)
- params = {}
- if action_key in self._actions_data:
- for field in self._actions_data[action_key]["action_fields"]:
- value = getattr(self, field)
-
- if value is None or value == "":
- continue
-
- if field in self._bool_variables:
- value = bool(value)
-
- param_name = field.replace(action_key + "_", "")
-
- if param_name == "as_user":
- value = True
-
- params[param_name] = value
-
- result = toolset.execute_action(
- action=enum_name,
- params=params,
- )
- if not result.get("successful"):
- return {"error": result.get("error", "No response")}
-
- return result.get("data", [])
- except Exception as e:
- logger.error(f"Error executing action: {e}")
- display_name = self.action[0]["name"] if isinstance(self.action, list) and self.action else str(self.action)
- msg = f"Failed to execute {display_name}: {e!s}"
- raise ValueError(msg) from e
-
- def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:
- return super().update_build_config(build_config, field_value, field_name)
-
def set_default_tools(self):
- self._default_tools = {
- "SLACK_SENDS_A_MESSAGE_TO_A_SLACK_CHANNEL",
- "SLACK_SEARCH_FOR_MESSAGES_WITH_QUERY",
- }
+ """Set the default tools for Slack component."""
diff --git a/src/backend/base/langflow/components/composio/slackbot_composio.py b/src/backend/base/langflow/components/composio/slackbot_composio.py
new file mode 100644
index 000000000..1659ab9ec
--- /dev/null
+++ b/src/backend/base/langflow/components/composio/slackbot_composio.py
@@ -0,0 +1,11 @@
+from langflow.base.composio.composio_base import ComposioBaseComponent
+
+
+class ComposioSlackbotAPIComponent(ComposioBaseComponent):
+ display_name: str = "Slackbot"
+ icon = "Slack"
+ documentation: str = "https://docs.composio.dev"
+ app_name = "slackbot"
+
+ def set_default_tools(self):
+ """Set the default tools for Slackbot component."""
diff --git a/src/backend/base/langflow/components/composio/supabase_composio.py b/src/backend/base/langflow/components/composio/supabase_composio.py
new file mode 100644
index 000000000..7f8540ee1
--- /dev/null
+++ b/src/backend/base/langflow/components/composio/supabase_composio.py
@@ -0,0 +1,11 @@
+from langflow.base.composio.composio_base import ComposioBaseComponent
+
+
+class ComposioSupabaseAPIComponent(ComposioBaseComponent):
+ display_name: str = "Supabase"
+ icon = "Supabase"
+ documentation: str = "https://docs.composio.dev"
+ app_name = "supabase"
+
+ def set_default_tools(self):
+ """Set the default tools for Supabase component."""
diff --git a/src/backend/base/langflow/components/composio/todoist_composio.py b/src/backend/base/langflow/components/composio/todoist_composio.py
new file mode 100644
index 000000000..f4b7d676d
--- /dev/null
+++ b/src/backend/base/langflow/components/composio/todoist_composio.py
@@ -0,0 +1,11 @@
+from langflow.base.composio.composio_base import ComposioBaseComponent
+
+
+class ComposioTodoistAPIComponent(ComposioBaseComponent):
+ display_name: str = "Todoist"
+ icon = "Todoist"
+ documentation: str = "https://docs.composio.dev"
+ app_name = "todoist"
+
+ def set_default_tools(self):
+ """Set the default tools for Todoist component."""
diff --git a/src/backend/base/langflow/components/composio/youtube_composio.py b/src/backend/base/langflow/components/composio/youtube_composio.py
new file mode 100644
index 000000000..3f5b1b760
--- /dev/null
+++ b/src/backend/base/langflow/components/composio/youtube_composio.py
@@ -0,0 +1,11 @@
+from langflow.base.composio.composio_base import ComposioBaseComponent
+
+
+class ComposioYoutubeAPIComponent(ComposioBaseComponent):
+ display_name: str = "Youtube"
+ icon = "Youtube"
+ documentation: str = "https://docs.composio.dev"
+ app_name = "youtube"
+
+ def set_default_tools(self):
+ """Set the default tools for Youtube component."""
diff --git a/src/backend/base/langflow/components/couchbase/__init__.py b/src/backend/base/langflow/components/couchbase/__init__.py
new file mode 100644
index 000000000..890caddeb
--- /dev/null
+++ b/src/backend/base/langflow/components/couchbase/__init__.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from langflow.components._importing import import_mod
+
+if TYPE_CHECKING:
+ from .couchbase import CouchbaseVectorStoreComponent
+
+_dynamic_imports = {
+ "CouchbaseVectorStoreComponent": "couchbase",
+}
+
+__all__ = [
+ "CouchbaseVectorStoreComponent",
+]
+
+
+def __getattr__(attr_name: str) -> Any:
+ """Lazily import Couchbase components on attribute access."""
+ if attr_name not in _dynamic_imports:
+ msg = f"module '{__name__}' has no attribute '{attr_name}'"
+ raise AttributeError(msg)
+ try:
+ result = import_mod(attr_name, _dynamic_imports[attr_name], __spec__.parent)
+ except (ModuleNotFoundError, ImportError, AttributeError) as e:
+ msg = f"Could not import '{attr_name}' from '{__name__}': {e}"
+ raise AttributeError(msg) from e
+ globals()[attr_name] = result
+ return result
+
+
+def __dir__() -> list[str]:
+ return list(__all__)
diff --git a/src/backend/base/langflow/components/vectorstores/couchbase.py b/src/backend/base/langflow/components/couchbase/couchbase.py
similarity index 100%
rename from src/backend/base/langflow/components/vectorstores/couchbase.py
rename to src/backend/base/langflow/components/couchbase/couchbase.py
diff --git a/src/backend/base/langflow/components/data/file.py b/src/backend/base/langflow/components/data/file.py
index 9b28910dc..f52581919 100644
--- a/src/backend/base/langflow/components/data/file.py
+++ b/src/backend/base/langflow/components/data/file.py
@@ -1,29 +1,90 @@
+"""Enhanced file component with clearer structure and Docling isolation.
+
+Notes:
+-----
+- Functionality is preserved with minimal behavioral changes.
+- ALL Docling parsing/export runs in a separate OS process to prevent memory
+ growth and native library state from impacting the main Langflow process.
+- Standard text/structured parsing continues to use existing BaseFileComponent
+ utilities (and optional threading via `parallel_load_data`).
+"""
+
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+import textwrap
from copy import deepcopy
-from typing import Any
+from typing import TYPE_CHECKING, Any
from langflow.base.data.base_file import BaseFileComponent
from langflow.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data
-from langflow.io import BoolInput, FileInput, IntInput, Output
+from langflow.io import (
+ BoolInput,
+ DropdownInput,
+ FileInput,
+ IntInput,
+ MessageTextInput,
+ Output,
+ StrInput,
+)
from langflow.schema.data import Data
+from langflow.schema.message import Message
+
+if TYPE_CHECKING:
+ from langflow.schema import DataFrame
class FileComponent(BaseFileComponent):
- """Handles loading and processing of individual or zipped text files.
-
- This component supports processing multiple valid files within a zip archive,
- resolving paths, validating file types, and optionally using multithreading for processing.
- """
+ """File component with optional Docling processing (isolated in a subprocess)."""
display_name = "File"
- description = "Loads content from one or more files."
+ description = "Loads content from files with optional advanced document processing and export using Docling."
documentation: str = "https://docs.langflow.org/components-data#file"
icon = "file-text"
name = "File"
- VALID_EXTENSIONS = TEXT_FILE_TYPES
+ # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.
+ VALID_EXTENSIONS = [
+ "adoc",
+ "asciidoc",
+ "asc",
+ "bmp",
+ "csv",
+ "dotx",
+ "dotm",
+ "docm",
+ "docx",
+ "htm",
+ "html",
+ "jpeg",
+ "json",
+ "md",
+ "pdf",
+ "png",
+ "potx",
+ "ppsx",
+ "pptm",
+ "potm",
+ "ppsm",
+ "pptx",
+ "tiff",
+ "txt",
+ "xls",
+ "xlsx",
+ "xhtml",
+ "xml",
+ "webp",
+ *TEXT_FILE_TYPES,
+ ]
+ # Fixed export settings used when markdown export is requested.
+ EXPORT_FORMAT = "Markdown"
+ IMAGE_MODE = "placeholder"
+
+ # ---- Inputs / Outputs (kept as close to original as possible) -------------------
_base_inputs = deepcopy(BaseFileComponent._base_inputs)
-
for input_item in _base_inputs:
if isinstance(input_item, FileInput) and input_item.name == "path":
input_item.real_time_refresh = True
@@ -31,6 +92,59 @@ class FileComponent(BaseFileComponent):
inputs = [
*_base_inputs,
+ BoolInput(
+ name="advanced_mode",
+ display_name="Advanced Parser",
+ value=False,
+ real_time_refresh=True,
+ info=(
+ "Enable advanced document processing and export with Docling for PDFs, images, and office documents. "
+ "Available only for single file processing."
+ ),
+ show=False,
+ ),
+ DropdownInput(
+ name="pipeline",
+ display_name="Pipeline",
+ info="Docling pipeline to use",
+ options=["standard", "vlm"],
+ value="standard",
+ advanced=True,
+ ),
+ DropdownInput(
+ name="ocr_engine",
+ display_name="OCR Engine",
+ info="OCR engine to use. Only available when pipeline is set to 'standard'.",
+ options=["", "easyocr"],
+ value="",
+ show=False,
+ advanced=True,
+ ),
+ StrInput(
+ name="md_image_placeholder",
+ display_name="Image placeholder",
+ info="Specify the image placeholder for markdown exports.",
+ value="",
+ advanced=True,
+ show=False,
+ ),
+ StrInput(
+ name="md_page_break_placeholder",
+ display_name="Page break placeholder",
+ info="Add this placeholder between pages in the markdown output.",
+ value="",
+ advanced=True,
+ show=False,
+ ),
+ MessageTextInput(
+ name="doc_key",
+ display_name="Doc Key",
+ info="The key to use for the DoclingDocument column.",
+ value="doc",
+ advanced=True,
+ show=False,
+ ),
+ # Deprecated input retained for backward-compatibility.
BoolInput(
name="use_multithreading",
display_name="[Deprecated] Use Multithreading",
@@ -45,96 +159,428 @@ class FileComponent(BaseFileComponent):
info="When multiple files are being processed, the number of files to process concurrently.",
value=1,
),
+ BoolInput(
+ name="markdown",
+ display_name="Markdown Export",
+ info="Export processed documents to Markdown format. Only available when advanced mode is enabled.",
+ value=False,
+ show=False,
+ ),
]
outputs = [
Output(display_name="Raw Content", name="message", method="load_files_message"),
]
- def update_outputs(self, frontend_node: dict, field_name: str, field_value: Any) -> dict:
- """Dynamically show only the relevant output based on the number of files processed."""
+ # ------------------------------ UI helpers --------------------------------------
+
+ def _path_value(self, template: dict) -> list[str]:
+ """Return the list of currently selected file paths from the template."""
+ return template.get("path", {}).get("file_path", [])
+
+ def update_build_config(
+ self,
+ build_config: dict[str, Any],
+ field_value: Any,
+ field_name: str | None = None,
+ ) -> dict[str, Any]:
+ """Show/hide Advanced Parser and related fields based on selection context."""
if field_name == "path":
- # Add outputs based on the number of files in the path
- if len(field_value) == 0:
- return frontend_node
+ paths = self._path_value(build_config)
+ file_path = paths[0] if paths else ""
+ file_count = len(field_value) if field_value else 0
- frontend_node["outputs"] = []
+ # Advanced mode only for single (non-tabular) file
+ allow_advanced = file_count == 1 and not file_path.endswith((".csv", ".xlsx", ".parquet"))
+ build_config["advanced_mode"]["show"] = allow_advanced
+ if not allow_advanced:
+ build_config["advanced_mode"]["value"] = False
+ for f in ("pipeline", "ocr_engine", "doc_key", "md_image_placeholder", "md_page_break_placeholder"):
+ if f in build_config:
+ build_config[f]["show"] = False
- if len(field_value) == 1:
- # We need to check if the file is structured content
- file_path = frontend_node["template"]["path"]["file_path"][0]
- if file_path.endswith((".csv", ".xlsx", ".parquet")):
- frontend_node["outputs"].append(
- Output(display_name="Structured Content", name="dataframe", method="load_files_structured"),
- )
- elif file_path.endswith(".json"):
- frontend_node["outputs"].append(
- Output(display_name="Structured Content", name="json", method="load_files_json"),
- )
+ elif field_name == "advanced_mode":
+ for f in ("pipeline", "ocr_engine", "doc_key", "md_image_placeholder", "md_page_break_placeholder"):
+ if f in build_config:
+ build_config[f]["show"] = bool(field_value)
- # All files get the raw content and path outputs
+ return build_config
+
+ def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]: # noqa: ARG002
+ """Dynamically show outputs based on file count/type and advanced mode."""
+ if field_name not in ["path", "advanced_mode"]:
+ return frontend_node
+
+ template = frontend_node.get("template", {})
+ paths = self._path_value(template)
+ if not paths:
+ return frontend_node
+
+ frontend_node["outputs"] = []
+ if len(paths) == 1:
+ file_path = paths[0] if field_name == "path" else frontend_node["template"]["path"]["file_path"][0]
+ if file_path.endswith((".csv", ".xlsx", ".parquet")):
+ frontend_node["outputs"].append(
+ Output(display_name="Structured Content", name="dataframe", method="load_files_structured"),
+ )
+ elif file_path.endswith(".json"):
+ frontend_node["outputs"].append(
+ Output(display_name="Structured Content", name="json", method="load_files_json"),
+ )
+
+ advanced_mode = frontend_node.get("template", {}).get("advanced_mode", {}).get("value", False)
+ if advanced_mode:
+ frontend_node["outputs"].append(
+ Output(display_name="Structured Output", name="advanced", method="load_files_advanced"),
+ )
+ frontend_node["outputs"].append(
+ Output(display_name="Markdown", name="markdown", method="load_files_markdown"),
+ )
+ frontend_node["outputs"].append(
+ Output(display_name="File Path", name="path", method="load_files_path"),
+ )
+ else:
frontend_node["outputs"].append(
Output(display_name="Raw Content", name="message", method="load_files_message"),
)
frontend_node["outputs"].append(
Output(display_name="File Path", name="path", method="load_files_path"),
)
- else:
- # For multiple files, we only show the files output
- frontend_node["outputs"].append(
- Output(display_name="Files", name="dataframe", method="load_files"),
- )
+ else:
+ # Multiple files => DataFrame output; advanced parser disabled
+ frontend_node["outputs"].append(Output(display_name="Files", name="dataframe", method="load_files"))
return frontend_node
- def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
- """Processes files either sequentially or in parallel, depending on concurrency settings.
+ # ------------------------------ Core processing ----------------------------------
- Args:
- file_list (list[BaseFileComponent.BaseFile]): List of files to process.
+ def _is_docling_compatible(self, file_path: str) -> bool:
+ """Lightweight extension gate for Docling-compatible types."""
+ docling_exts = (
+ ".adoc",
+ ".asciidoc",
+ ".asc",
+ ".bmp",
+ ".csv",
+ ".dotx",
+ ".dotm",
+ ".docm",
+ ".docx",
+ ".htm",
+ ".html",
+ ".jpeg",
+ ".json",
+ ".md",
+ ".pdf",
+ ".png",
+ ".potx",
+ ".ppsx",
+ ".pptm",
+ ".potm",
+ ".ppsm",
+ ".pptx",
+ ".tiff",
+ ".txt",
+ ".xls",
+ ".xlsx",
+ ".xhtml",
+ ".xml",
+ ".webp",
+ )
+ return file_path.lower().endswith(docling_exts)
- Returns:
- list[BaseFileComponent.BaseFile]: Updated list of files with merged data.
+ def _process_docling_in_subprocess(self, file_path: str) -> Data | None:
+ """Run Docling in a separate OS process and map the result to a Data object.
+
+ We avoid multiprocessing pickling by launching `python -c "