From 76a8584c54270e6322fbb3bfad3622be6476a466 Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Tue, 12 Aug 2025 15:42:57 -0400 Subject: [PATCH 01/86] refactor: remove Anthropic provider test (#9369) * Comment out Anthropic provider test and update API key usage Temporarily commented out the Anthropic provider selection in the Research Translation Loop integration test and switched the API key input to use OPENAI_API_KEY. This change is made pending a way to properly test Anthropic integration. * Update Research Translation Loop.spec.ts --- src/frontend/package-lock.json | 1 + .../integrations/Research Translation Loop.spec.ts | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/frontend/package-lock.json b/src/frontend/package-lock.json index c3717f816..f2275b480 100644 --- a/src/frontend/package-lock.json +++ b/src/frontend/package-lock.json @@ -1176,6 +1176,7 @@ }, "node_modules/@clack/prompts/node_modules/is-unicode-supported": { "version": "1.3.0", + "extraneous": true, "inBundle": true, "license": "MIT", "engines": { diff --git a/src/frontend/tests/core/integrations/Research Translation Loop.spec.ts b/src/frontend/tests/core/integrations/Research Translation Loop.spec.ts index f5ed97850..4a88e3a39 100644 --- a/src/frontend/tests/core/integrations/Research Translation Loop.spec.ts +++ b/src/frontend/tests/core/integrations/Research Translation Loop.spec.ts @@ -10,8 +10,8 @@ withEventDeliveryModes( { tag: ["@release", "@starter-projects"] }, async ({ page }) => { test.skip( - !process?.env?.ANTHROPIC_API_KEY, - "ANTHROPIC_API_KEY required to run this test", + !process?.env?.OPENAI_API_KEY, + "OPENAI_API_KEY required to run this test", ); if (!process.env.CI) { @@ -34,14 +34,14 @@ withEventDeliveryModes( skipAddNewApiKeys: true, skipSelectGptModel: true, }); - - await page.getByTestId("dropdown_str_provider").click(); - await page.getByTestId("Anthropic-1-option").click(); + // TODO: Uncomment this when we have a way to test Anthropic + // await page.getByTestId("dropdown_str_provider").click(); + // await page.getByTestId("Anthropic-1-option").click(); await page .getByTestId("popover-anchor-input-api_key") .last() - .fill(process.env.ANTHROPIC_API_KEY ?? ""); + .fill(process.env.OPENAI_API_KEY ?? ""); await page.waitForSelector('[data-testid="dropdown_str_model_name"]', { timeout: 5000, From 0628398fa9046e34a14dfb342989b94e97f1ea97 Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Tue, 12 Aug 2025 16:54:40 -0400 Subject: [PATCH 02/86] feat: Add GPT-5 series models to OpenAI metadata (#9336) Add GPT-5 series models to OpenAI metadata Introduces metadata entries for the new GPT-5, GPT-5-mini, GPT-5-nano, and GPT-5-chat-latest models in the OPENAI_MODELS_DETAILED list. This update ensures these models are recognized and available for use in the application. --- .../langflow/base/models/openai_constants.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/backend/base/langflow/base/models/openai_constants.py b/src/backend/base/langflow/base/models/openai_constants.py index 29017790b..8eb028e72 100644 --- a/src/backend/base/langflow/base/models/openai_constants.py +++ b/src/backend/base/langflow/base/models/openai_constants.py @@ -2,6 +2,35 @@ from .model_metadata import create_model_metadata # Unified model metadata - single source of truth OPENAI_MODELS_DETAILED = [ + # GPT-5 Series + create_model_metadata( + provider="OpenAI", + name="gpt-5", + icon="OpenAI", + tool_calling=True, + reasoning=True, + ), + create_model_metadata( + provider="OpenAI", + name="gpt-5-mini", + icon="OpenAI", + tool_calling=True, + reasoning=True, + ), + create_model_metadata( + provider="OpenAI", + name="gpt-5-nano", + icon="OpenAI", + tool_calling=True, + reasoning=True, + ), + create_model_metadata( + provider="OpenAI", + name="gpt-5-chat-latest", + icon="OpenAI", + tool_calling=False, + reasoning=True, + ), # Regular OpenAI Models create_model_metadata(provider="OpenAI", name="gpt-4o-mini", icon="OpenAI", tool_calling=True), create_model_metadata(provider="OpenAI", name="gpt-4o", icon="OpenAI", tool_calling=True), From 4ab322290b270fb47723222eb8f54b3855b2bf11 Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Wed, 13 Aug 2025 10:23:31 -0400 Subject: [PATCH 03/86] refactor: Remove extraneous flag from package lock (#9371) Remove extraneous flag from is-unicode-supported The 'extraneous' flag was removed from the is-unicode-supported dependency in package-lock.json to reflect its correct status in the bundle. --- src/frontend/package-lock.json | 1 - 1 file changed, 1 deletion(-) diff --git a/src/frontend/package-lock.json b/src/frontend/package-lock.json index f2275b480..c3717f816 100644 --- a/src/frontend/package-lock.json +++ b/src/frontend/package-lock.json @@ -1176,7 +1176,6 @@ }, "node_modules/@clack/prompts/node_modules/is-unicode-supported": { "version": "1.3.0", - "extraneous": true, "inBundle": true, "license": "MIT", "engines": { From 181606fd803f1ab6d8ace99502c5212e21274190 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira <62335616+lucaseduoli@users.noreply.github.com> Date: Wed, 13 Aug 2025 12:54:49 -0300 Subject: [PATCH 04/86] fix: add cursor state to not skip to end on input components (#9375) * Add cursor handling to input component * add cursor handling to text area component * [autofix.ci] apply automated fixes * Modified tests to check cursor position * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- .../components/popover/index.tsx | 19 +++++++++++++++---- .../components/popoverObject/index.tsx | 11 +++++++++++ .../components/inputComponent/index.tsx | 9 +++++++++ .../components/textAreaComponent/index.tsx | 9 +++++++++ .../tests/core/unit/inputComponent.spec.ts | 13 +++++++++++++ .../core/unit/textAreaModalComponent.spec.ts | 15 +++++++++++++++ 6 files changed, 72 insertions(+), 4 deletions(-) diff --git a/src/frontend/src/components/core/parameterRenderComponent/components/inputComponent/components/popover/index.tsx b/src/frontend/src/components/core/parameterRenderComponent/components/inputComponent/components/popover/index.tsx index 4613275dd..ac1b30a5d 100644 --- a/src/frontend/src/components/core/parameterRenderComponent/components/inputComponent/components/popover/index.tsx +++ b/src/frontend/src/components/core/parameterRenderComponent/components/inputComponent/components/popover/index.tsx @@ -1,7 +1,7 @@ import { PopoverAnchor } from "@radix-ui/react-popover"; -import { uniqueId } from "lodash"; + import { X } from "lucide-react"; -import { type ReactNode, useMemo, useState } from "react"; +import { type ReactNode, useEffect, useMemo, useState } from "react"; import ForwardedIconComponent from "@/components/common/genericIconComponent"; import ShadTooltip from "@/components/common/shadTooltipComponent"; import { Badge } from "@/components/ui/badge"; @@ -189,12 +189,20 @@ const CustomInputPopover = ({ hasRefreshButton, }) => { const [isFocused, setIsFocused] = useState(false); + const [cursor, setCursor] = useState(null); const memoizedOptions = useMemo(() => new Set(options), [options]); const PopoverContentInput = editNode ? PopoverContent : PopoverContentWithoutPortal; + // Restore cursor position after value changes + useEffect(() => { + if (cursor !== null && refInput.current) { + refInput.current.setSelectionRange(cursor, cursor); + } + }, [cursor, value]); + const handleRemoveOption = ( optionToRemove: string, e: React.MouseEvent, @@ -270,7 +278,7 @@ const CustomInputPopover = ({ autoComplete="off" onFocus={() => setIsFocused(true)} autoFocus={autoFocus} - id={id + uniqueId()} + id={id} ref={refInput} type={!pwdVisible && password ? "password" : "text"} onBlur={() => { @@ -292,7 +300,10 @@ const CustomInputPopover = ({ ? "" : placeholder } - onChange={(e) => onChange?.(e.target.value)} + onChange={(e) => { + setCursor(e.target.selectionStart); + onChange?.(e.target.value); + }} onKeyDown={(e) => { handleKeyDown?.(e); if (blurOnEnter && e.key === "Enter") refInput.current?.blur(); diff --git a/src/frontend/src/components/core/parameterRenderComponent/components/inputComponent/components/popoverObject/index.tsx b/src/frontend/src/components/core/parameterRenderComponent/components/inputComponent/components/popoverObject/index.tsx index 191c76085..10b44aa76 100644 --- a/src/frontend/src/components/core/parameterRenderComponent/components/inputComponent/components/popoverObject/index.tsx +++ b/src/frontend/src/components/core/parameterRenderComponent/components/inputComponent/components/popoverObject/index.tsx @@ -1,4 +1,5 @@ import { PopoverAnchor } from "@radix-ui/react-popover"; +import { useEffect, useState } from "react"; import ForwardedIconComponent from "@/components/common/genericIconComponent"; import { Command, @@ -40,11 +41,21 @@ const CustomInputPopoverObject = ({ handleKeyDown, showOptions, }) => { + const [cursor, setCursor] = useState(null); + const PopoverContentInput = editNode ? PopoverContent : PopoverContentWithoutPortal; + // Restore cursor position after value changes + useEffect(() => { + if (cursor !== null && refInput.current) { + refInput.current.setSelectionRange(cursor, cursor); + } + }, [cursor, value]); + const handleInputChange = (e) => { + setCursor(e.target.selectionStart); onChange && onChange(e.target.value); }; diff --git a/src/frontend/src/components/core/parameterRenderComponent/components/inputComponent/index.tsx b/src/frontend/src/components/core/parameterRenderComponent/components/inputComponent/index.tsx index 11889f12d..11ca476fd 100644 --- a/src/frontend/src/components/core/parameterRenderComponent/components/inputComponent/index.tsx +++ b/src/frontend/src/components/core/parameterRenderComponent/components/inputComponent/index.tsx @@ -45,6 +45,7 @@ export default function InputComponent({ hasRefreshButton = false, }: InputComponentType): JSX.Element { const [pwdVisible, setPwdVisible] = useState(false); + const [cursor, setCursor] = useState(null); const refInput = useRef(null); const [showOptions, setShowOptions] = useState(false); @@ -54,6 +55,13 @@ export default function InputComponent({ } }, [disabled]); + // Restore cursor position after value changes + useEffect(() => { + if (cursor !== null && refInput.current) { + refInput.current.setSelectionRange(cursor, cursor); + } + }, [cursor, value]); + function onInputLostFocus(event): void { if (onBlur) onBlur(event); } @@ -83,6 +91,7 @@ export default function InputComponent({ )} placeholder={password && editNode ? "Key" : placeholder} onChange={(e) => { + setCursor(e.target.selectionStart); if (onChangeFolderName) { return onChangeFolderName(e); } diff --git a/src/frontend/src/components/core/parameterRenderComponent/components/textAreaComponent/index.tsx b/src/frontend/src/components/core/parameterRenderComponent/components/textAreaComponent/index.tsx index 2a844ac59..07a1726af 100644 --- a/src/frontend/src/components/core/parameterRenderComponent/components/textAreaComponent/index.tsx +++ b/src/frontend/src/components/core/parameterRenderComponent/components/textAreaComponent/index.tsx @@ -74,6 +74,7 @@ export default function TextAreaComponent({ const inputRef = useRef(null); const [isFocused, setIsFocused] = useState(false); const [passwordVisible, setPasswordVisible] = useState(false); + const [cursor, setCursor] = useState(null); const isWebhook = useMemo( () => nodeInformationMetadata?.nodeType === "webhook", @@ -100,6 +101,13 @@ export default function TextAreaComponent({ } }, [isWebhook, value, nodeInformationMetadata, handleOnNewValue]); + // Restore cursor position after value changes + useEffect(() => { + if (cursor !== null && inputRef.current) { + inputRef.current.setSelectionRange(cursor, cursor); + } + }, [cursor, value]); + const getInputClassName = () => { return cn( inputClasses.base({ isFocused, password: password! }), @@ -111,6 +119,7 @@ export default function TextAreaComponent({ }; const handleInputChange = (e: React.ChangeEvent) => { + setCursor(e.target.selectionStart); handleOnNewValue({ value: e.target.value }); }; diff --git a/src/frontend/tests/core/unit/inputComponent.spec.ts b/src/frontend/tests/core/unit/inputComponent.spec.ts index 08d26a4f8..fb3d985c0 100644 --- a/src/frontend/tests/core/unit/inputComponent.spec.ts +++ b/src/frontend/tests/core/unit/inputComponent.spec.ts @@ -38,6 +38,19 @@ test( expect(false).toBeTruthy(); } + // Test cursor position preservation + const input = page.getByTestId("popover-anchor-input-collection_name"); + await input.click(); + await input.press("Home"); // Move cursor to start + await input.press("ArrowRight"); // Move cursor to position 1 + await input.press("ArrowRight"); // Move cursor to position 2 + await input.pressSequentially("X", { delay: 100 }); // Type at position 2 + const cursorValue = await input.inputValue(); + if (!cursorValue.startsWith("coX")) { + expect(false).toBeTruthy(); + } + await input.fill("collection_name_test_123123123!@#$&*(&%$@"); + await page.getByTestId("div-generic-node").click(); await page.getByTestId("edit-button-modal").last().click(); diff --git a/src/frontend/tests/core/unit/textAreaModalComponent.spec.ts b/src/frontend/tests/core/unit/textAreaModalComponent.spec.ts index 2621a49c0..2a6218564 100644 --- a/src/frontend/tests/core/unit/textAreaModalComponent.spec.ts +++ b/src/frontend/tests/core/unit/textAreaModalComponent.spec.ts @@ -44,6 +44,21 @@ test( "test test test test test test test test test test test !@#%*)( 123456789101010101010101111111111 !!!!!!!!!!", ); + // Test cursor position preservation + const textInput = page.getByTestId("textarea_str_text"); + await textInput.click(); + await textInput.press("Home"); // Move cursor to start + await textInput.press("ArrowRight"); // Move cursor to position 1 + await textInput.press("ArrowRight"); // Move cursor to position 2 + await textInput.pressSequentially("Y", { delay: 100 }); // Type at position 2 + const cursorValue = await textInput.inputValue(); + if (!cursorValue.startsWith("teY")) { + expect(false).toBeTruthy(); + } + await textInput.fill( + "test test test test test test test test test test test !@#%*)( 123456789101010101010101111111111 !!!!!!!!!!", + ); + await page .getByTestId("button_open_text_area_modal_textarea_str_text") .click(); From 9141005e6bd68e83d0babbe3d966de0d685f394a Mon Sep 17 00:00:00 2001 From: Lucas Oliveira <62335616+lucaseduoli@users.noreply.github.com> Date: Wed, 13 Aug 2025 13:20:04 -0300 Subject: [PATCH 05/86] fix: make settings nav not enter history (#9376) * remove unused location store * make sidebar component replace route in order for back button to go back to previous page before settings * added test to back button functionality * [autofix.ci] apply automated fixes --------- Co-authored-by: Mike Fortman Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- .../core/sidebarComponent/index.tsx | 2 +- src/frontend/src/stores/locationStore.ts | 21 --------- .../extended/features/userSettings.spec.ts | 45 +++++++++++++++++++ 3 files changed, 46 insertions(+), 22 deletions(-) delete mode 100644 src/frontend/src/stores/locationStore.ts diff --git a/src/frontend/src/components/core/sidebarComponent/index.tsx b/src/frontend/src/components/core/sidebarComponent/index.tsx index 9feae5c7f..1ba31000a 100644 --- a/src/frontend/src/components/core/sidebarComponent/index.tsx +++ b/src/frontend/src/components/core/sidebarComponent/index.tsx @@ -34,7 +34,7 @@ const SideBarButtonsComponent = ({ items }: SideBarButtonsComponentProps) => { {items.map((item, index) => ( - + ((set, get) => ({ - routeHistory: [], - setRouteHistory: (location) => { - const routeHistoryArray = get().routeHistory; - routeHistoryArray.push(location); - - if (routeHistoryArray?.length > 100) { - routeHistoryArray.shift(); - set({ - routeHistory: routeHistoryArray, - }); - } - - set({ - routeHistory: routeHistoryArray, - }); - }, -})); diff --git a/src/frontend/tests/extended/features/userSettings.spec.ts b/src/frontend/tests/extended/features/userSettings.spec.ts index a7cb26656..a4df73cb3 100644 --- a/src/frontend/tests/extended/features/userSettings.spec.ts +++ b/src/frontend/tests/extended/features/userSettings.spec.ts @@ -1,4 +1,5 @@ import { expect, test } from "@playwright/test"; +import { awaitBootstrapTest } from "../../utils/await-bootstrap-test"; test.beforeAll(async () => { await new Promise((resolve) => setTimeout(resolve, 7000)); @@ -200,3 +201,47 @@ test( await page.getByText(randomName).isVisible(); }, ); + +test( + "should navigate back to flow from global variables", + { tag: ["@release", "@workspace"] }, + async ({ page }) => { + await awaitBootstrapTest(page); + + await page.getByTestId("side_nav_options_all-templates").click(); + await page.getByRole("heading", { name: "Basic Prompting" }).click(); + await page.waitForSelector('[data-testid="fit_view"]', { + timeout: 100000, + }); + // Now navigate to user settings + await page.getByTestId("user-profile-settings").click(); + await page.getByTestId("menu_settings_button").click(); + + // Verify we're on the settings page + await expect(page.getByText("General").nth(2)).toBeVisible({ + timeout: 4000, + }); + + // Navigate to Global Variables + await page.getByText("Global Variables").click(); + await page.getByText("Global Variables").nth(2); + await page + .getByText("Global Variables", { exact: true }) + .nth(1) + .isVisible(); + + // Click the back button - this should take us back to the flow, not to the main settings page + await page.getByTestId("back_page_button").click(); + + // Verify we're back on the flow page, not the settings main page + await page.waitForSelector('[data-testid="sidebar-search-input"]', { + timeout: 5000, + }); + + // Additional verification that we're on the flow page + expect(page.url()).toMatch(/\/flow\//); + + // Verify we can see flow-specific elements + await expect(page.getByTestId("sidebar-search-input")).toBeVisible(); + }, +); From faff2015c428404efc948c49aab9b5268dddf2ad Mon Sep 17 00:00:00 2001 From: Lucas Oliveira <62335616+lucaseduoli@users.noreply.github.com> Date: Wed, 13 Aug 2025 17:14:30 -0300 Subject: [PATCH 06/86] fix: make tools be selected on mcp server after opening for the first time (#9377) * Add on grid ready handler to select required nodes after opening * [autofix.ci] apply automated fixes * Updated mcp server tab test to test if state is maintained after refresh * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Carlos Coelho <80289056+carlosrcoelho@users.noreply.github.com> --- .../components/toolsTable/index.tsx | 46 ++++++++++++------- .../extended/features/mcp-server-tab.spec.ts | 27 ++++++++++- 2 files changed, 55 insertions(+), 18 deletions(-) diff --git a/src/frontend/src/modals/toolsModal/components/toolsTable/index.tsx b/src/frontend/src/modals/toolsModal/components/toolsTable/index.tsx index 1302b4a33..23c7295d1 100644 --- a/src/frontend/src/modals/toolsModal/components/toolsTable/index.tsx +++ b/src/frontend/src/modals/toolsModal/components/toolsTable/index.tsx @@ -57,25 +57,31 @@ export default function ToolsTable({ setSelectedRows(filter); }, [rows, open]); - useEffect(() => { + const applyInitialSelection = () => { + if (!agGrid.current?.api) return; + const initialData = cloneDeep(rows); const filter = initialData.filter((row) => row.status === true); - if (agGrid.current) { - agGrid.current?.api?.forEachNode((node) => { - if ( - filter.some( - (row) => - (row.display_name ?? row.name) === - (node.data.display_name ?? node.data.name), - ) - ) { - node.setSelected(true); - } else { - node.setSelected(false); - } - }); - } - }, [agGrid.current]); + + agGrid.current.api.forEachNode((node) => { + if ( + filter.some( + (row) => + (row.display_name ?? row.name) === + (node.data.display_name ?? node.data.name), + ) + ) { + node.setSelected(true); + } else { + node.setSelected(false); + } + }); + }; + + // Apply initial selection when data changes and grid is ready + useEffect(() => { + applyInitialSelection(); + }, [rows, data]); useEffect(() => { if (!open) { @@ -251,6 +257,11 @@ export default function ToolsTable({ setSidebarOpen(true); }; + const handleGridReady = () => { + // Apply initial selection when grid is ready + applyInitialSelection(); + }; + const rowName = useMemo(() => { return parseString(focusedRow?.display_name || focusedRow?.name || "", [ "space_case", @@ -284,6 +295,7 @@ export default function ToolsTable({ tableOptions={tableOptions} onRowClicked={handleRowClicked} getRowId={getRowId} + onGridReady={handleGridReady} /> diff --git a/src/frontend/tests/extended/features/mcp-server-tab.spec.ts b/src/frontend/tests/extended/features/mcp-server-tab.spec.ts index 312a24b47..348f45101 100644 --- a/src/frontend/tests/extended/features/mcp-server-tab.spec.ts +++ b/src/frontend/tests/extended/features/mcp-server-tab.spec.ts @@ -85,12 +85,37 @@ test( await page.waitForTimeout(1000); } + // Verify if the state is maintained + + await page.locator('input[data-ref="eInput"]').first().click(); + + await page.waitForTimeout(1000); + + await page.reload(); + + // Navigate to MCP server tab + await page.getByTestId("mcp-btn").click({ timeout: 10000 }); + + // Verify MCP server tab is visible + await expect(page.getByTestId("mcp-server-title")).toBeVisible(); + await expect(page.getByText("Flows/Tools")).toBeVisible(); + + // Click on Edit Tools button + await page.getByTestId("button_open_actions").click(); + await page.waitForTimeout(500); + + // Verify actions modal is open + await expect(page.getByText("MCP Server Tools")).toBeVisible(); + const isCheckedAgainAgain = await page .locator('input[data-ref="eInput"]') .first() .isChecked(); - expect(isCheckedAgainAgain).toBeFalsy(); + expect(isCheckedAgainAgain).toBeTruthy(); + + await page.locator('input[data-ref="eInput"]').first().click(); + await page.waitForTimeout(1000); // Select first action let element = page.locator('input[data-ref="eInput"]').last(); From e68f6a405af4d35be9baf977da2081824696e5f7 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Wed, 13 Aug 2025 13:15:57 -0700 Subject: [PATCH 07/86] feat: Add support for Ingestion and Retrieval of Knowledge Bases (#9088) * refactor: Standardize import statements and improve code readability across components - Updated import statements to use consistent single quotes. - Refactored various components to enhance readability and maintainability. - Adjusted folder and file handling logic in the sidebar and file manager components. - Introduced a new tabbed interface for the files page to separate files and knowledge bases, improving user experience. * [autofix.ci] apply automated fixes * feat: Introduce new Files and Knowledge Bases page with tabbed interface - Added a new FilesPage component to manage file uploads and organization. - Implemented a tabbed interface to separate Files and Knowledge Bases for improved user experience. - Created FilesTab and KnowledgeBasesTab components for handling respective functionalities. - Refactored routing to accommodate the new structure and updated import statements for consistency. - Removed the old filesPage component to streamline the codebase. * Create knowledgebase_utils.py * Push initial ingest component * [autofix.ci] apply automated fixes * Create initial KB Ingestion component * [autofix.ci] apply automated fixes * Fix ruff check on utility functions * [autofix.ci] apply automated fixes * Some quick fixes * Update kb_ingest.py * [autofix.ci] apply automated fixes * First version of retrieval component * [autofix.ci] apply automated fixes * Update icon * Update kb_retrieval.py * [autofix.ci] apply automated fixes * Add knowledge bases feature with API integration and UI components * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * Refactor imports and update routing paths for assets and main page components. Adjust tab handling in the assets page to reflect URL changes and improve user navigation experience. * [autofix.ci] apply automated fixes * Add CreateKnowledgeBaseButton, KnowledgeBaseEmptyState, and KnowledgeBaseSelectionOverlay components. Refactor KnowledgeBasesTab to utilize new components and improve UI for knowledge base management. Introduce utility functions for formatting numbers and average chunk sizes. * [autofix.ci] apply automated fixes * PoV: Add Parquet data retrieval to KBRetrievalComponent (#9097) * Add Parquet data retrieval to KBRetrievalComponent Introduces a new output to KBRetrievalComponent for returning knowledge base data by reading Parquet files. Updates dependencies to include fastparquet for Parquet support. * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> * Fix some ruff issues * [autofix.ci] apply automated fixes * feat: refactor file management and knowledge base components - Replaced the existing assetsPage with a new filesPage to better organize file management functionalities. - Introduced KnowledgePage to handle knowledge base operations, integrating KnowledgeBasesTab for displaying and managing knowledge bases. - Added various components for file and knowledge base management, including CreateKnowledgeBaseButton, KnowledgeBaseEmptyState, and drag-and-drop functionality. - Updated routing and imports to reflect the new structure and ensure consistency across the application. - Enhanced user experience with improved UI elements and state management for file selection and operations. * feat: implement delete confirmation modal for knowledge base deletion - Added a DeleteConfirmationModal component to confirm deletion actions. - Integrated the modal into the KnowledgeBasesTab for handling knowledge base deletions. - Updated column definitions to include a delete button for each knowledge base. - Enhanced user experience by ensuring deletion actions require confirmation. - Adjusted styles for the knowledge base table to improve checkbox visibility. * feat: enhance knowledge base metadata with embedding model detection - Added `embedding_model` field to `KnowledgeBaseInfo` for improved metadata tracking. - Implemented `detect_embedding_model` function to extract embedding model information from configuration files. - Updated `get_kb_metadata` to prioritize metadata extraction from `embedding_metadata.json`, falling back to detection if necessary. - Modified `KBIngestionComponent` to save embedding model metadata during ingestion. - Adjusted frontend components to display embedding model information in knowledge base queries and tables. * refactor: clean up tooltip and value getter comments in knowledge base columns - Removed redundant comments in the `knowledgeBaseColumns.tsx` file to enhance code clarity. - Simplified the tooltip and value getter functions for embedding model display. * [autofix.ci] apply automated fixes * refactor: simplify KnowledgeBaseSelectionOverlay component - Removed the unused onExport prop and its associated functionality. - Cleaned up code formatting for consistency and readability. - Updated success message strings to use single quotes for uniformity. * feat: implement bulk and single deletion for knowledge bases - Added `BulkDeleteRequest` model to handle bulk deletion requests. - Implemented `delete_knowledge_base` endpoint for single knowledge base deletion. - Created `delete_knowledge_bases_bulk` endpoint for deleting multiple knowledge bases at once. - Introduced `useDeleteKnowledgeBase` and `useDeleteKnowledgeBases` hooks for frontend integration. - Updated `KnowledgeBaseSelectionOverlay` and `KnowledgeBasesTab` components to utilize new deletion functionality with user feedback on success and error handling. * Initial support for vector search * feat: add KnowledgeBaseDrawer component for enhanced knowledge base details - Introduced `KnowledgeBaseDrawer` component to display detailed information about selected knowledge bases. - Integrated mock data for source files and linked flows, with a layout for displaying descriptions and embedding models. - Updated `KnowledgeBasesTab` to handle row clicks and open the drawer with relevant knowledge base data. - Enhanced `KnowledgePage` to manage drawer state and selected knowledge base, improving user interaction and experience. * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * Fix ruff checks * Update knowledge_bases.py * feat: update mock data and enhance drawer functionality in KnowledgeBase components - Replaced mock data in `KnowledgeBaseDrawer` with more descriptive placeholders. - Added a reference to the drawer in `KnowledgePage` for improved click handling. - Implemented logic to close the drawer when clicking outside, except for table row clicks. - Enhanced row click handling to toggle drawer state based on current visibility. * [autofix.ci] apply automated fixes * Append scores column to rows * refactor: improve knowledge base deletion and UI components - Updated `useDeleteKnowledgeBase` and `useDeleteKnowledgeBases` to enhance parameter naming for clarity. - Removed the `CreateKnowledgeBaseButton` component and its references to streamline the UI. - Simplified the `KnowledgeBaseDrawer` and `KnowledgeBasesTab` components by removing mock data and improving state management. - Enhanced the `KnowledgeBaseSelectionOverlay` to better handle bulk deletions and selection states. - Refactored various components for consistent styling and improved readability. * refactor: standardize import statements and improve code readability in SideBarFoldersButtonsComponent - Updated import statements to use consistent single quotes. - Refactored various function calls and state management for improved clarity. - Enhanced folder handling logic and UI interactions for better user experience. * feat: Add encryption for API keys in KB ingest and retrieval (#9129) Add encryption for API keys in KB ingest and retrieval Introduces secure storage of embedding model API keys by encrypting them during knowledge base ingestion and decrypting them during retrieval. Refactors metadata handling to include encrypted API keys, updates retrieval to support decryption and dynamic embedder construction, and improves logging for key operations. Removes legacy embedding client code in retrieval in favor of a provider-based approach. * [autofix.ci] apply automated fixes * Fix import of auth utils * Allow appending to existing knowledge base * [autofix.ci] apply automated fixes * Update kb_ingest.py * Update kb_ingest.py * feat: enhance table component with editable Vectorize column functionality - Implemented logic to determine editability of the Vectorize column based on other row values. - Added checks to refresh grid cells upon changes to the Vectorize column. - Updated TableAutoCellRender to conditionally disable editing based on Vectorize column state. * New ingestion creation dialog * [autofix.ci] apply automated fixes * Clean up the creation process for KB * [autofix.ci] apply automated fixes * Clean up names and descriptions * Update kb_retrieval.py * chroma retrieval * [autofix.ci] apply automated fixes * Further KB cleanup * refactor: update KB ingestion component and enhance NodeDialog functionality - Restored SecretStrInput for API key in KB ingestion component. - Modified NodeDialog to handle new value format and added support for additional properties. - Introduced custom hooks for managing global variable states in InputGlobalComponent. - Improved dropdown component styling and interaction. - Cleaned up input component code for better readability and maintainability. * Hash the text as id * [autofix.ci] apply automated fixes * Update kb_retrieval.py * [autofix.ci] apply automated fixes * Make sure to write out the source parquet * Remove unneeded old code * Add ability to block duplicate ingestion chunks * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * Rename retrieval component * Better refresh mechanism for the retrieve * Clean up some unused functionality * Update kb_ingest.py * Fix dropdown component logic to include checks for refresh button and dialog inputs * Test the API key before saving knowledge * [autofix.ci] apply automated fixes * Allow storing updated api keys if provided at ingest time * Add Knowledge Bases component and enhance Knowledge Base Empty State - Introduced a new JSON configuration for Knowledge Bases, defining nodes and edges for data processing. - Enhanced the KnowledgeBaseEmptyState component to include a button for creating a knowledge base template. - Updated KnowledgeBasesTab to handle template creation, integrating flow management and navigation features. * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * Update Knowledge Bases.json * Update Knowledge Bases configuration and enhance UI components - Updated the code hash in the Knowledge Bases JSON configuration. - Modified the KnowledgeBaseEmptyState component to change the button icon and text from "Try Knowledge Base Template" to "Create Knowledge". - Cleared the options for the knowledge base selection dropdowns to ensure they reflect the current state of available knowledge bases. * [autofix.ci] apply automated fixes * Implement feature flag for Knowledge Bases functionality - Added FEATURE_FLAGS.knowledge_bases to control the visibility of knowledge base components in the API and UI. - Updated the router to conditionally include the knowledge bases router based on the feature flag. - Modified KBIngestionComponent and KBRetrievalComponent to hide if the knowledge bases feature is disabled. - Enhanced the initial setup to skip loading knowledge base starter projects when the feature is disabled. - Updated frontend routes and sidebar components to conditionally render knowledge base options based on the feature flag. - Adjusted API queries to return an empty array if the knowledge bases feature is disabled. * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * Refactor Knowledge Bases feature flag implementation - Removed the FEATURE_FLAGS.knowledge_bases flag from backend components and frontend routes. - Updated the API and UI to always include knowledge base components, simplifying the codebase. - Adjusted the frontend feature flags to set ENABLE_KNOWLEDGE_BASES to false, ensuring knowledge base features are not displayed. - Cleaned up related components and routes to reflect the removal of the feature flag, enhancing maintainability. * revert * [autofix.ci] apply automated fixes * Remove Knowledge Bases JSON configuration and clean up KnowledgeBasesTab component by eliminating unused imports and template creation functionality. * [autofix.ci] apply automated fixes * Enhance routing structure by adding admin and login routes with protected access. Refactor flow routes for improved organization and clarity. * added template back * Use chroma for stats computation * Fix ruff issue * [autofix.ci] apply automated fixes * Update Knowledge Bases.json * Update Knowledge Bases.json * Rename to just knowledge * feat: enhance Jest configuration and add new tests for Knowledge Base components - Updated jest.config.js to include a new setup file and refined test matching patterns. - Introduced jest.setup.js for mocking globals and Vite-specific syntax. - Added tests for KnowledgeBaseDrawer, KnowledgeBaseEmptyState, KnowledgeBaseSelectionOverlay, KnowledgeBasesTab, and KnowledgePage components. - Created utility functions for testing and mock data for knowledge bases. - Implemented tests for utility functions related to knowledge base formatting. * [autofix.ci] apply automated fixes * refactor: reorganize imports and clean up console log in Dropdown component - Moved and re-imported necessary dependencies for better structure. - Removed unnecessary console log statement to clean up the code. * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * feat: add success callback for knowledge base creation in NodeDialog component - Introduced a new success callback to handle knowledge base creation notifications. - Enhanced dialog closing logic with a delay for Astra database tracking. - Reorganized imports for better structure. * refactor: update table component to handle single-toggle columns - Renamed functions and variables to improve clarity regarding single-toggle columns (Vectorize and Identifier). - Updated logic to ensure proper editability checks for single-toggle columns. - Adjusted related components to reflect changes in column handling and rendering. * [autofix.ci] apply automated fixes * feat: Add unit tests for KBIngestionComponent (#9246) * [autofix.ci] apply automated fixes * fix: remove unnecessary drawer open state change in KnowledgePage * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * Remove kb_info output from KBIngestionComponent (#9275) * [autofix.ci] apply automated fixes * Update Knowledge Bases.json * Use settings service for knowledge base directory Replaces the hardcoded knowledge base directory path with a value from the settings service. This improves configurability and centralizes directory management. * Fix knowledge bases mypy issue * test: Update file page tests for consistency and clarity - Changed expected title text from "My Files" to "Files" for accuracy. - Removed unnecessary parentheses in arrow functions for cleaner syntax. - Updated test assertions to ensure visibility checks are clear and consistent. - Improved readability by standardizing the formatting of test cases. * test: Update expected title in file upload component test for accuracy - Changed expected title text from "My Files" to "Files" to reflect the correct page title. * [autofix.ci] apply automated fixes * Fix tests on backend * Update kb_ingest.py * [autofix.ci] apply automated fixes * Switch to two templates for KB * Update names and descs * [autofix.ci] apply automated fixes * Rename templates * [autofix.ci] apply automated fixes --------- Co-authored-by: Deon Sanchez <69873175+deon-sanchez@users.noreply.github.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Edwin Jose --- pyproject.toml | 2 + src/backend/base/langflow/api/router.py | 2 + src/backend/base/langflow/api/v1/__init__.py | 2 + .../base/langflow/api/v1/knowledge_bases.py | 437 +++++++ .../base/langflow/base/data/kb_utils.py | 104 ++ .../base/langflow/components/data/__init__.py | 4 + .../langflow/components/data/kb_ingest.py | 585 +++++++++ .../langflow/components/data/kb_retrieval.py | 254 ++++ .../starter_projects/Knowledge Ingestion.json | 1052 +++++++++++++++++ .../starter_projects/Knowledge Retrieval.json | 707 +++++++++++ .../base/langflow/services/settings/base.py | 3 + src/backend/tests/unit/base/data/__init__.py | 0 .../tests/unit/base/data/test_kb_utils.py | 458 +++++++ .../unit/components/data/test_kb_ingest.py | 392 ++++++ .../unit/components/data/test_kb_retrieval.py | 368 ++++++ src/frontend/jest.config.js | 4 +- src/frontend/jest.setup.js | 38 + .../components/NodeDialogComponent/index.tsx | 103 +- .../core/dropdownComponent/index.tsx | 72 +- .../components/sideBarFolderButtons/index.tsx | 23 +- .../components/inputGlobalComponent/hooks.ts | 82 ++ .../components/inputGlobalComponent/index.tsx | 181 +-- .../components/inputGlobalComponent/types.ts | 14 + .../components/tableAutoCellRender/index.tsx | 8 +- .../components/tableComponent/index.tsx | 216 +++- .../src/controllers/API/helpers/constants.ts | 1 + .../use-delete-knowledge-base.ts | 39 + .../use-delete-knowledge-bases.ts | 38 + .../use-get-knowledge-bases.ts | 40 + .../src/customization/feature-flags.ts | 2 + .../modals/deleteConfirmationModal/index.tsx | 4 +- .../pages/filesPage/components/FilesTab.tsx | 446 +++++++ .../components/KnowledgeBaseDrawer.tsx | 68 ++ .../components/KnowledgeBaseEmptyState.tsx | 63 + .../KnowledgeBaseSelectionOverlay.tsx | 97 ++ .../components/KnowledgeBasesTab.tsx | 221 ++++ .../__tests__/KnowledgeBaseDrawer.test.tsx | 163 +++ .../KnowledgeBaseEmptyState.test.tsx | 105 ++ .../KnowledgeBaseSelectionOverlay.test.tsx | 173 +++ .../__tests__/KnowledgeBasesTab.test.tsx | 170 +++ .../components/__tests__/test-utils.tsx | 126 ++ .../filesPage/config/knowledgeBaseColumns.tsx | 115 ++ .../pages/MainPage/pages/filesPage/index.tsx | 441 +------ .../__tests__/knowledgeBaseUtils.test.ts | 73 ++ .../filesPage/utils/knowledgeBaseUtils.ts | 13 + .../__tests__/KnowledgePage.test.tsx | 244 ++++ .../MainPage/pages/knowledgePage/index.tsx | 143 +++ .../src/pages/MainPage/pages/main-page.tsx | 2 +- src/frontend/src/routes.tsx | 13 +- src/frontend/src/style/ag-theme-shadcn.css | 10 + .../core/unit/fileUploadComponent.spec.ts | 2 +- .../extended/features/files-page.spec.ts | 2 +- uv.lock | 157 ++- 53 files changed, 7475 insertions(+), 607 deletions(-) create mode 100644 src/backend/base/langflow/api/v1/knowledge_bases.py create mode 100644 src/backend/base/langflow/base/data/kb_utils.py create mode 100644 src/backend/base/langflow/components/data/kb_ingest.py create mode 100644 src/backend/base/langflow/components/data/kb_retrieval.py create mode 100644 src/backend/base/langflow/initial_setup/starter_projects/Knowledge Ingestion.json create mode 100644 src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json create mode 100644 src/backend/tests/unit/base/data/__init__.py create mode 100644 src/backend/tests/unit/base/data/test_kb_utils.py create mode 100644 src/backend/tests/unit/components/data/test_kb_ingest.py create mode 100644 src/backend/tests/unit/components/data/test_kb_retrieval.py create mode 100644 src/frontend/jest.setup.js create mode 100644 src/frontend/src/components/core/parameterRenderComponent/components/inputGlobalComponent/hooks.ts create mode 100644 src/frontend/src/components/core/parameterRenderComponent/components/inputGlobalComponent/types.ts create mode 100644 src/frontend/src/controllers/API/queries/knowledge-bases/use-delete-knowledge-base.ts create mode 100644 src/frontend/src/controllers/API/queries/knowledge-bases/use-delete-knowledge-bases.ts create mode 100644 src/frontend/src/controllers/API/queries/knowledge-bases/use-get-knowledge-bases.ts create mode 100644 src/frontend/src/pages/MainPage/pages/filesPage/components/FilesTab.tsx create mode 100644 src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBaseDrawer.tsx create mode 100644 src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBaseEmptyState.tsx create mode 100644 src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBaseSelectionOverlay.tsx create mode 100644 src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBasesTab.tsx create mode 100644 src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBaseDrawer.test.tsx create mode 100644 src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBaseEmptyState.test.tsx create mode 100644 src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBaseSelectionOverlay.test.tsx create mode 100644 src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBasesTab.test.tsx create mode 100644 src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/test-utils.tsx create mode 100644 src/frontend/src/pages/MainPage/pages/filesPage/config/knowledgeBaseColumns.tsx create mode 100644 src/frontend/src/pages/MainPage/pages/filesPage/utils/__tests__/knowledgeBaseUtils.test.ts create mode 100644 src/frontend/src/pages/MainPage/pages/filesPage/utils/knowledgeBaseUtils.ts create mode 100644 src/frontend/src/pages/MainPage/pages/knowledgePage/__tests__/KnowledgePage.test.tsx create mode 100644 src/frontend/src/pages/MainPage/pages/knowledgePage/index.tsx diff --git a/pyproject.toml b/pyproject.toml index 57e0f45d6..9079c8186 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,6 +77,7 @@ dependencies = [ "opensearch-py==2.8.0", "langchain-google-genai==2.0.6", "langchain-cohere==0.3.3", + "langchain-huggingface==0.3.1", "langchain-anthropic==0.3.14", "langchain-astradb~=0.6.0", "langchain-openai>=0.2.12", @@ -126,6 +127,7 @@ dependencies = [ "docling_core>=2.36.1", "filelock>=3.18.0", "jigsawstack==0.2.7", + "fastparquet>=2024.11.0", ] [dependency-groups] diff --git a/src/backend/base/langflow/api/router.py b/src/backend/base/langflow/api/router.py index df7b2aebe..731d0a3e9 100644 --- a/src/backend/base/langflow/api/router.py +++ b/src/backend/base/langflow/api/router.py @@ -8,6 +8,7 @@ from langflow.api.v1 import ( files_router, flows_router, folders_router, + knowledge_bases_router, login_router, mcp_projects_router, mcp_router, @@ -45,6 +46,7 @@ router_v1.include_router(monitor_router) router_v1.include_router(folders_router) router_v1.include_router(projects_router) router_v1.include_router(starter_projects_router) +router_v1.include_router(knowledge_bases_router) router_v1.include_router(mcp_router) router_v1.include_router(voice_mode_router) router_v1.include_router(mcp_projects_router) diff --git a/src/backend/base/langflow/api/v1/__init__.py b/src/backend/base/langflow/api/v1/__init__.py index ad276df48..9a86307c0 100644 --- a/src/backend/base/langflow/api/v1/__init__.py +++ b/src/backend/base/langflow/api/v1/__init__.py @@ -4,6 +4,7 @@ from langflow.api.v1.endpoints import router as endpoints_router from langflow.api.v1.files import router as files_router from langflow.api.v1.flows import router as flows_router from langflow.api.v1.folders import router as folders_router +from langflow.api.v1.knowledge_bases import router as knowledge_bases_router from langflow.api.v1.login import router as login_router from langflow.api.v1.mcp import router as mcp_router from langflow.api.v1.mcp_projects import router as mcp_projects_router @@ -23,6 +24,7 @@ __all__ = [ "files_router", "flows_router", "folders_router", + "knowledge_bases_router", "login_router", "mcp_projects_router", "mcp_router", diff --git a/src/backend/base/langflow/api/v1/knowledge_bases.py b/src/backend/base/langflow/api/v1/knowledge_bases.py new file mode 100644 index 000000000..138fda815 --- /dev/null +++ b/src/backend/base/langflow/api/v1/knowledge_bases.py @@ -0,0 +1,437 @@ +import json +import shutil +from http import HTTPStatus +from pathlib import Path + +import pandas as pd +from fastapi import APIRouter, HTTPException +from langchain_chroma import Chroma +from loguru import logger +from pydantic import BaseModel + +from langflow.services.deps import get_settings_service + +router = APIRouter(tags=["Knowledge Bases"], prefix="/knowledge_bases") + + +settings = get_settings_service().settings +knowledge_directory = settings.knowledge_bases_dir +if not knowledge_directory: + msg = "Knowledge bases directory is not set in the settings." + raise ValueError(msg) +KNOWLEDGE_BASES_DIR = Path(knowledge_directory).expanduser() + + +class KnowledgeBaseInfo(BaseModel): + id: str + name: str + embedding_provider: str | None = "Unknown" + embedding_model: str | None = "Unknown" + size: int = 0 + words: int = 0 + characters: int = 0 + chunks: int = 0 + avg_chunk_size: float = 0.0 + + +class BulkDeleteRequest(BaseModel): + kb_names: list[str] + + +def get_kb_root_path() -> Path: + """Get the knowledge bases root path.""" + return KNOWLEDGE_BASES_DIR + + +def get_directory_size(path: Path) -> int: + """Calculate the total size of all files in a directory.""" + total_size = 0 + try: + for file_path in path.rglob("*"): + if file_path.is_file(): + total_size += file_path.stat().st_size + except (OSError, PermissionError): + pass + return total_size + + +def detect_embedding_provider(kb_path: Path) -> str: + """Detect the embedding provider from config files and directory structure.""" + # Provider patterns to check for + provider_patterns = { + "OpenAI": ["openai", "text-embedding-ada", "text-embedding-3"], + "HuggingFace": ["sentence-transformers", "huggingface", "bert-"], + "Cohere": ["cohere", "embed-english", "embed-multilingual"], + "Google": ["palm", "gecko", "google"], + "Chroma": ["chroma"], + } + + # Check JSON config files for provider information + for config_file in kb_path.glob("*.json"): + try: + with config_file.open("r", encoding="utf-8") as f: + config_data = json.load(f) + if not isinstance(config_data, dict): + continue + + config_str = json.dumps(config_data).lower() + + # Check for explicit provider fields first + provider_fields = ["embedding_provider", "provider", "embedding_model_provider"] + for field in provider_fields: + if field in config_data: + provider_value = str(config_data[field]).lower() + for provider, patterns in provider_patterns.items(): + if any(pattern in provider_value for pattern in patterns): + return provider + + # Check for model name patterns + for provider, patterns in provider_patterns.items(): + if any(pattern in config_str for pattern in patterns): + return provider + + except (OSError, json.JSONDecodeError) as _: + logger.exception("Error reading config file '%s'", config_file) + continue + + # Fallback to directory structure + if (kb_path / "chroma").exists(): + return "Chroma" + if (kb_path / "vectors.npy").exists(): + return "Local" + + return "Unknown" + + +def detect_embedding_model(kb_path: Path) -> str: + """Detect the embedding model from config files.""" + # First check the embedding metadata file (most accurate) + metadata_file = kb_path / "embedding_metadata.json" + if metadata_file.exists(): + try: + with metadata_file.open("r", encoding="utf-8") as f: + metadata = json.load(f) + if isinstance(metadata, dict) and "embedding_model" in metadata: + # Check for embedding model field + model_value = str(metadata.get("embedding_model", "unknown")) + if model_value and model_value.lower() != "unknown": + return model_value + except (OSError, json.JSONDecodeError) as _: + logger.exception("Error reading embedding metadata file '%s'", metadata_file) + + # Check other JSON config files for model information + for config_file in kb_path.glob("*.json"): + # Skip the embedding metadata file since we already checked it + if config_file.name == "embedding_metadata.json": + continue + + try: + with config_file.open("r", encoding="utf-8") as f: + config_data = json.load(f) + if not isinstance(config_data, dict): + continue + + # Check for explicit model fields first and return the actual model name + model_fields = ["embedding_model", "model", "embedding_model_name", "model_name"] + for field in model_fields: + if field in config_data: + model_value = str(config_data[field]) + if model_value and model_value.lower() != "unknown": + return model_value + + # Check for OpenAI specific model names + if "openai" in json.dumps(config_data).lower(): + openai_models = ["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"] + config_str = json.dumps(config_data).lower() + for model in openai_models: + if model in config_str: + return model + + # Check for HuggingFace model names (usually in model field) + if "model" in config_data: + model_name = str(config_data["model"]) + # Common HuggingFace embedding models + hf_patterns = ["sentence-transformers", "all-MiniLM", "all-mpnet", "multi-qa"] + if any(pattern in model_name for pattern in hf_patterns): + return model_name + + except (OSError, json.JSONDecodeError) as _: + logger.exception("Error reading config file '%s'", config_file) + continue + + return "Unknown" + + +def get_text_columns(df: pd.DataFrame, schema_data: list | None = None) -> list[str]: + """Get the text columns to analyze for word/character counts.""" + # First try schema-defined text columns + if schema_data: + text_columns = [ + col["column_name"] + for col in schema_data + if col.get("vectorize", False) and col.get("data_type") == "string" + ] + if text_columns: + return [col for col in text_columns if col in df.columns] + + # Fallback to common text column names + common_names = ["text", "content", "document", "chunk"] + text_columns = [col for col in df.columns if col.lower() in common_names] + if text_columns: + return text_columns + + # Last resort: all string columns + return [col for col in df.columns if df[col].dtype == "object"] + + +def calculate_text_metrics(df: pd.DataFrame, text_columns: list[str]) -> tuple[int, int]: + """Calculate total words and characters from text columns.""" + total_words = 0 + total_characters = 0 + + for col in text_columns: + if col not in df.columns: + continue + + text_series = df[col].astype(str).fillna("") + total_characters += text_series.str.len().sum() + total_words += text_series.str.split().str.len().sum() + + return int(total_words), int(total_characters) + + +def get_kb_metadata(kb_path: Path) -> dict: + """Extract metadata from a knowledge base directory.""" + metadata: dict[str, float | int | str] = { + "chunks": 0, + "words": 0, + "characters": 0, + "avg_chunk_size": 0.0, + "embedding_provider": "Unknown", + "embedding_model": "Unknown", + } + + try: + # First check embedding metadata file for accurate provider and model info + metadata_file = kb_path / "embedding_metadata.json" + if metadata_file.exists(): + try: + with metadata_file.open("r", encoding="utf-8") as f: + embedding_metadata = json.load(f) + if isinstance(embedding_metadata, dict): + if "embedding_provider" in embedding_metadata: + metadata["embedding_provider"] = embedding_metadata["embedding_provider"] + if "embedding_model" in embedding_metadata: + metadata["embedding_model"] = embedding_metadata["embedding_model"] + except (OSError, json.JSONDecodeError) as _: + logger.exception("Error reading embedding metadata file '%s'", metadata_file) + + # Fallback to detection if not found in metadata file + if metadata["embedding_provider"] == "Unknown": + metadata["embedding_provider"] = detect_embedding_provider(kb_path) + if metadata["embedding_model"] == "Unknown": + metadata["embedding_model"] = detect_embedding_model(kb_path) + + # Read schema for text column information + schema_data = None + schema_file = kb_path / "schema.json" + if schema_file.exists(): + try: + with schema_file.open("r", encoding="utf-8") as f: + schema_data = json.load(f) + if not isinstance(schema_data, list): + schema_data = None + except (ValueError, TypeError, OSError) as _: + logger.exception("Error reading schema file '%s'", schema_file) + + # Create vector store + chroma = Chroma( + persist_directory=str(kb_path), + collection_name=kb_path.name, + ) + + # Access the raw collection + collection = chroma._collection + + # Fetch all documents and metadata + results = collection.get(include=["documents", "metadatas"]) + + # Convert to pandas DataFrame + source_chunks = pd.DataFrame( + { + "document": results["documents"], + "metadata": results["metadatas"], + } + ) + + # Process the source data for metadata + try: + metadata["chunks"] = len(source_chunks) + + # Get text columns and calculate metrics + text_columns = get_text_columns(source_chunks, schema_data) + if text_columns: + words, characters = calculate_text_metrics(source_chunks, text_columns) + metadata["words"] = words + metadata["characters"] = characters + + # Calculate average chunk size + if int(metadata["chunks"]) > 0: + metadata["avg_chunk_size"] = round(int(characters) / int(metadata["chunks"]), 1) + + except (OSError, ValueError, TypeError) as _: + logger.exception("Error processing Chroma DB '%s'", kb_path.name) + + except (OSError, ValueError, TypeError) as _: + logger.exception("Error processing knowledge base directory '%s'", kb_path) + + return metadata + + +@router.get("", status_code=HTTPStatus.OK) +@router.get("/", status_code=HTTPStatus.OK) +async def list_knowledge_bases() -> list[KnowledgeBaseInfo]: + """List all available knowledge bases.""" + try: + kb_root_path = get_kb_root_path() + + if not kb_root_path.exists(): + return [] + + knowledge_bases = [] + + for kb_dir in kb_root_path.iterdir(): + if not kb_dir.is_dir() or kb_dir.name.startswith("."): + continue + + try: + # Get size of the directory + size = get_directory_size(kb_dir) + + # Get metadata from KB files + metadata = get_kb_metadata(kb_dir) + + kb_info = KnowledgeBaseInfo( + id=kb_dir.name, + name=kb_dir.name.replace("_", " ").replace("-", " ").title(), + embedding_provider=metadata["embedding_provider"], + embedding_model=metadata["embedding_model"], + size=size, + words=metadata["words"], + characters=metadata["characters"], + chunks=metadata["chunks"], + avg_chunk_size=metadata["avg_chunk_size"], + ) + + knowledge_bases.append(kb_info) + + except OSError as _: + # Log the exception and skip directories that can't be read + logger.exception("Error reading knowledge base directory '%s'", kb_dir) + continue + + # Sort by name alphabetically + knowledge_bases.sort(key=lambda x: x.name) + + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error listing knowledge bases: {e!s}") from e + else: + return knowledge_bases + + +@router.get("/{kb_name}", status_code=HTTPStatus.OK) +async def get_knowledge_base(kb_name: str) -> KnowledgeBaseInfo: + """Get detailed information about a specific knowledge base.""" + try: + kb_root_path = get_kb_root_path() + kb_path = kb_root_path / kb_name + + if not kb_path.exists() or not kb_path.is_dir(): + raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found") + + # Get size of the directory + size = get_directory_size(kb_path) + + # Get metadata from KB files + metadata = get_kb_metadata(kb_path) + + return KnowledgeBaseInfo( + id=kb_name, + name=kb_name.replace("_", " ").replace("-", " ").title(), + embedding_provider=metadata["embedding_provider"], + embedding_model=metadata["embedding_model"], + size=size, + words=metadata["words"], + characters=metadata["characters"], + chunks=metadata["chunks"], + avg_chunk_size=metadata["avg_chunk_size"], + ) + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error getting knowledge base '{kb_name}': {e!s}") from e + + +@router.delete("/{kb_name}", status_code=HTTPStatus.OK) +async def delete_knowledge_base(kb_name: str) -> dict[str, str]: + """Delete a specific knowledge base.""" + try: + kb_root_path = get_kb_root_path() + kb_path = kb_root_path / kb_name + + if not kb_path.exists() or not kb_path.is_dir(): + raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found") + + # Delete the entire knowledge base directory + shutil.rmtree(kb_path) + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error deleting knowledge base '{kb_name}': {e!s}") from e + else: + return {"message": f"Knowledge base '{kb_name}' deleted successfully"} + + +@router.delete("", status_code=HTTPStatus.OK) +@router.delete("/", status_code=HTTPStatus.OK) +async def delete_knowledge_bases_bulk(request: BulkDeleteRequest) -> dict[str, object]: + """Delete multiple knowledge bases.""" + try: + kb_root_path = get_kb_root_path() + deleted_count = 0 + not_found_kbs = [] + + for kb_name in request.kb_names: + kb_path = kb_root_path / kb_name + + if not kb_path.exists() or not kb_path.is_dir(): + not_found_kbs.append(kb_name) + continue + + try: + # Delete the entire knowledge base directory + shutil.rmtree(kb_path) + deleted_count += 1 + except (OSError, PermissionError) as e: + logger.exception("Error deleting knowledge base '%s': %s", kb_name, e) + # Continue with other deletions even if one fails + + if not_found_kbs and deleted_count == 0: + raise HTTPException(status_code=404, detail=f"Knowledge bases not found: {', '.join(not_found_kbs)}") + + result = { + "message": f"Successfully deleted {deleted_count} knowledge base(s)", + "deleted_count": deleted_count, + } + + if not_found_kbs: + result["not_found"] = ", ".join(not_found_kbs) + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error deleting knowledge bases: {e!s}") from e + else: + return result diff --git a/src/backend/base/langflow/base/data/kb_utils.py b/src/backend/base/langflow/base/data/kb_utils.py new file mode 100644 index 000000000..f453eef6f --- /dev/null +++ b/src/backend/base/langflow/base/data/kb_utils.py @@ -0,0 +1,104 @@ +import math +from collections import Counter + + +def compute_tfidf(documents: list[str], query_terms: list[str]) -> list[float]: + """Compute TF-IDF scores for query terms across a collection of documents. + + Args: + documents: List of document strings + query_terms: List of query terms to score + + Returns: + List of TF-IDF scores for each document + """ + # Tokenize documents (simple whitespace splitting) + tokenized_docs = [doc.lower().split() for doc in documents] + n_docs = len(documents) + + # Calculate document frequency for each term + document_frequencies = {} + for term in query_terms: + document_frequencies[term] = sum(1 for doc in tokenized_docs if term.lower() in doc) + + scores = [] + + for doc_tokens in tokenized_docs: + doc_score = 0.0 + doc_length = len(doc_tokens) + term_counts = Counter(doc_tokens) + + for term in query_terms: + term_lower = term.lower() + + # Term frequency (TF) + tf = term_counts[term_lower] / doc_length if doc_length > 0 else 0 + + # Inverse document frequency (IDF) + idf = math.log(n_docs / document_frequencies[term]) if document_frequencies[term] > 0 else 0 + + # TF-IDF score + doc_score += tf * idf + + scores.append(doc_score) + + return scores + + +def compute_bm25(documents: list[str], query_terms: list[str], k1: float = 1.2, b: float = 0.75) -> list[float]: + """Compute BM25 scores for query terms across a collection of documents. + + Args: + documents: List of document strings + query_terms: List of query terms to score + k1: Controls term frequency scaling (default: 1.2) + b: Controls document length normalization (default: 0.75) + + Returns: + List of BM25 scores for each document + """ + # Tokenize documents + tokenized_docs = [doc.lower().split() for doc in documents] + n_docs = len(documents) + + # Calculate average document length + avg_doc_length = sum(len(doc) for doc in tokenized_docs) / n_docs if n_docs > 0 else 0 + + # Handle edge case where all documents are empty + if avg_doc_length == 0: + return [0.0] * n_docs + + # Calculate document frequency for each term + document_frequencies = {} + for term in query_terms: + document_frequencies[term] = sum(1 for doc in tokenized_docs if term.lower() in doc) + + scores = [] + + for doc_tokens in tokenized_docs: + doc_score = 0.0 + doc_length = len(doc_tokens) + term_counts = Counter(doc_tokens) + + for term in query_terms: + term_lower = term.lower() + + # Term frequency in document + tf = term_counts[term_lower] + + # Inverse document frequency (IDF) + # Use standard BM25 IDF formula that ensures non-negative values + idf = math.log(n_docs / document_frequencies[term]) if document_frequencies[term] > 0 else 0 + + # BM25 score calculation + numerator = tf * (k1 + 1) + denominator = tf + k1 * (1 - b + b * (doc_length / avg_doc_length)) + + # Handle division by zero when tf=0 and k1=0 + term_score = 0 if denominator == 0 else idf * (numerator / denominator) + + doc_score += term_score + + scores.append(doc_score) + + return scores diff --git a/src/backend/base/langflow/components/data/__init__.py b/src/backend/base/langflow/components/data/__init__.py index 6e90f0426..4f589c37f 100644 --- a/src/backend/base/langflow/components/data/__init__.py +++ b/src/backend/base/langflow/components/data/__init__.py @@ -3,6 +3,8 @@ from .csv_to_data import CSVToDataComponent from .directory import DirectoryComponent from .file import FileComponent from .json_to_data import JSONToDataComponent +from .kb_ingest import KBIngestionComponent +from .kb_retrieval import KBRetrievalComponent from .news_search import NewsSearchComponent from .rss import RSSReaderComponent from .sql_executor import SQLComponent @@ -16,6 +18,8 @@ __all__ = [ "DirectoryComponent", "FileComponent", "JSONToDataComponent", + "KBIngestionComponent", + "KBRetrievalComponent", "NewsSearchComponent", "RSSReaderComponent", "SQLComponent", diff --git a/src/backend/base/langflow/components/data/kb_ingest.py b/src/backend/base/langflow/components/data/kb_ingest.py new file mode 100644 index 000000000..6be2196fd --- /dev/null +++ b/src/backend/base/langflow/components/data/kb_ingest.py @@ -0,0 +1,585 @@ +from __future__ import annotations + +import hashlib +import json +import re +import uuid +from dataclasses import asdict, dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import pandas as pd +from cryptography.fernet import InvalidToken +from langchain_chroma import Chroma +from loguru import logger + +from langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES +from langflow.custom import Component +from langflow.io import BoolInput, DataFrameInput, DropdownInput, IntInput, Output, SecretStrInput, StrInput, TableInput +from langflow.schema.data import Data +from langflow.schema.dotdict import dotdict # noqa: TC001 +from langflow.schema.table import EditMode +from langflow.services.auth.utils import decrypt_api_key, encrypt_api_key +from langflow.services.deps import get_settings_service + +HUGGINGFACE_MODEL_NAMES = ["sentence-transformers/all-MiniLM-L6-v2", "sentence-transformers/all-mpnet-base-v2"] +COHERE_MODEL_NAMES = ["embed-english-v3.0", "embed-multilingual-v3.0"] + +settings = get_settings_service().settings +knowledge_directory = settings.knowledge_bases_dir +if not knowledge_directory: + msg = "Knowledge bases directory is not set in the settings." + raise ValueError(msg) +KNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser() + + +class KBIngestionComponent(Component): + """Create or append to Langflow Knowledge from a DataFrame.""" + + # ------ UI metadata --------------------------------------------------- + display_name = "Knowledge Ingestion" + description = "Create or update knowledge in Langflow." + icon = "database" + name = "KBIngestion" + + @dataclass + class NewKnowledgeBaseInput: + functionality: str = "create" + fields: dict[str, dict] = field( + default_factory=lambda: { + "data": { + "node": { + "name": "create_knowledge_base", + "description": "Create new knowledge in Langflow.", + "display_name": "Create new knowledge", + "field_order": ["01_new_kb_name", "02_embedding_model", "03_api_key"], + "template": { + "01_new_kb_name": StrInput( + name="new_kb_name", + display_name="Knowledge Name", + info="Name of the new knowledge to create.", + required=True, + ), + "02_embedding_model": DropdownInput( + name="embedding_model", + display_name="Model Name", + info="Select the embedding model to use for this knowledge base.", + required=True, + options=OPENAI_EMBEDDING_MODEL_NAMES + HUGGINGFACE_MODEL_NAMES + COHERE_MODEL_NAMES, + options_metadata=[{"icon": "OpenAI"} for _ in OPENAI_EMBEDDING_MODEL_NAMES] + + [{"icon": "HuggingFace"} for _ in HUGGINGFACE_MODEL_NAMES] + + [{"icon": "Cohere"} for _ in COHERE_MODEL_NAMES], + ), + "03_api_key": SecretStrInput( + name="api_key", + display_name="API Key", + info="Provider API key for embedding model", + required=True, + load_from_db=True, + ), + }, + }, + } + } + ) + + # ------ Inputs -------------------------------------------------------- + inputs = [ + DropdownInput( + name="knowledge_base", + display_name="Knowledge", + info="Select the knowledge to load data from.", + required=True, + options=[ + str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(".") and d.is_dir() + ] + if KNOWLEDGE_BASES_ROOT_PATH.exists() + else [], + refresh_button=True, + dialog_inputs=asdict(NewKnowledgeBaseInput()), + ), + DataFrameInput( + name="input_df", + display_name="Data", + info="Table with all original columns (already chunked / processed).", + required=True, + ), + TableInput( + name="column_config", + display_name="Column Configuration", + info="Configure column behavior for the knowledge base.", + required=True, + table_schema=[ + { + "name": "column_name", + "display_name": "Column Name", + "type": "str", + "description": "Name of the column in the source DataFrame", + "edit_mode": EditMode.INLINE, + }, + { + "name": "vectorize", + "display_name": "Vectorize", + "type": "boolean", + "description": "Create embeddings for this column", + "default": False, + "edit_mode": EditMode.INLINE, + }, + { + "name": "identifier", + "display_name": "Identifier", + "type": "boolean", + "description": "Use this column as unique identifier", + "default": False, + "edit_mode": EditMode.INLINE, + }, + ], + value=[ + { + "column_name": "text", + "vectorize": True, + "identifier": False, + } + ], + ), + IntInput( + name="chunk_size", + display_name="Chunk Size", + info="Batch size for processing embeddings", + advanced=True, + value=1000, + ), + SecretStrInput( + name="api_key", + display_name="Embedding Provider API Key", + info="API key for the embedding provider to generate embeddings.", + advanced=True, + required=False, + ), + BoolInput( + name="allow_duplicates", + display_name="Allow Duplicates", + info="Allow duplicate rows in the knowledge base", + advanced=True, + value=False, + ), + ] + + # ------ Outputs ------------------------------------------------------- + outputs = [Output(display_name="DataFrame", name="dataframe", method="build_kb_info")] + + # ------ Internal helpers --------------------------------------------- + def _get_kb_root(self) -> Path: + """Return the root directory for knowledge bases.""" + return KNOWLEDGE_BASES_ROOT_PATH + + def _validate_column_config(self, df_source: pd.DataFrame) -> list[dict[str, Any]]: + """Validate column configuration using Structured Output patterns.""" + if not self.column_config: + msg = "Column configuration cannot be empty" + raise ValueError(msg) + + # Convert table input to list of dicts (similar to Structured Output) + config_list = self.column_config if isinstance(self.column_config, list) else [] + + # Validate column names exist in DataFrame + df_columns = set(df_source.columns) + for config in config_list: + col_name = config.get("column_name") + if col_name not in df_columns and not self.silent_errors: + msg = f"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}" + self.log(f"Warning: {msg}") + raise ValueError(msg) + + return config_list + + def _get_embedding_provider(self, embedding_model: str) -> str: + """Get embedding provider by matching model name to lists.""" + if embedding_model in OPENAI_EMBEDDING_MODEL_NAMES: + return "OpenAI" + if embedding_model in HUGGINGFACE_MODEL_NAMES: + return "HuggingFace" + if embedding_model in COHERE_MODEL_NAMES: + return "Cohere" + return "Custom" + + def _build_embeddings(self, embedding_model: str, api_key: str): + """Build embedding model using provider patterns.""" + # Get provider by matching model name to lists + provider = self._get_embedding_provider(embedding_model) + + # Validate provider and model + if provider == "OpenAI": + from langchain_openai import OpenAIEmbeddings + + if not api_key: + msg = "OpenAI API key is required when using OpenAI provider" + raise ValueError(msg) + return OpenAIEmbeddings( + model=embedding_model, + api_key=api_key, + chunk_size=self.chunk_size, + ) + if provider == "HuggingFace": + from langchain_huggingface import HuggingFaceEmbeddings + + return HuggingFaceEmbeddings( + model=embedding_model, + ) + if provider == "Cohere": + from langchain_cohere import CohereEmbeddings + + if not api_key: + msg = "Cohere API key is required when using Cohere provider" + raise ValueError(msg) + return CohereEmbeddings( + model=embedding_model, + cohere_api_key=api_key, + ) + if provider == "Custom": + # For custom embedding models, we would need additional configuration + msg = "Custom embedding models not yet supported" + raise NotImplementedError(msg) + msg = f"Unknown provider: {provider}" + raise ValueError(msg) + + def _build_embedding_metadata(self, embedding_model, api_key) -> dict[str, Any]: + """Build embedding model metadata.""" + # Get provider by matching model name to lists + embedding_provider = self._get_embedding_provider(embedding_model) + + api_key_to_save = None + if api_key and hasattr(api_key, "get_secret_value"): + api_key_to_save = api_key.get_secret_value() + elif isinstance(api_key, str): + api_key_to_save = api_key + + encrypted_api_key = None + if api_key_to_save: + settings_service = get_settings_service() + try: + encrypted_api_key = encrypt_api_key(api_key_to_save, settings_service=settings_service) + except (TypeError, ValueError) as e: + self.log(f"Could not encrypt API key: {e}") + logger.error(f"Could not encrypt API key: {e}") + + return { + "embedding_provider": embedding_provider, + "embedding_model": embedding_model, + "api_key": encrypted_api_key, + "api_key_used": bool(api_key), + "chunk_size": self.chunk_size, + "created_at": datetime.now(timezone.utc).isoformat(), + } + + def _save_embedding_metadata(self, kb_path: Path, embedding_model: str, api_key: str) -> None: + """Save embedding model metadata.""" + embedding_metadata = self._build_embedding_metadata(embedding_model, api_key) + metadata_path = kb_path / "embedding_metadata.json" + metadata_path.write_text(json.dumps(embedding_metadata, indent=2)) + + def _save_kb_files( + self, + kb_path: Path, + config_list: list[dict[str, Any]], + ) -> None: + """Save KB files using File Component storage patterns.""" + try: + # Create directory (following File Component patterns) + kb_path.mkdir(parents=True, exist_ok=True) + + # Save column configuration + # Only do this if the file doesn't exist already + cfg_path = kb_path / "schema.json" + if not cfg_path.exists(): + cfg_path.write_text(json.dumps(config_list, indent=2)) + + except Exception as e: + if not self.silent_errors: + raise + self.log(f"Error saving KB files: {e}") + + def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]: + """Build detailed column metadata.""" + metadata: dict[str, Any] = { + "total_columns": len(df_source.columns), + "mapped_columns": len(config_list), + "unmapped_columns": len(df_source.columns) - len(config_list), + "columns": [], + "summary": {"vectorized_columns": [], "identifier_columns": []}, + } + + for config in config_list: + col_name = config.get("column_name") + vectorize = config.get("vectorize") == "True" or config.get("vectorize") is True + identifier = config.get("identifier") == "True" or config.get("identifier") is True + + # Add to columns list + metadata["columns"].append( + { + "name": col_name, + "vectorize": vectorize, + "identifier": identifier, + } + ) + + # Update summary + if vectorize: + metadata["summary"]["vectorized_columns"].append(col_name) + if identifier: + metadata["summary"]["identifier_columns"].append(col_name) + + return metadata + + def _create_vector_store( + self, df_source: pd.DataFrame, config_list: list[dict[str, Any]], embedding_model: str, api_key: str + ) -> None: + """Create vector store following Local DB component pattern.""" + try: + # Set up vector store directory + base_dir = self._get_kb_root() + + vector_store_dir = base_dir / self.knowledge_base + vector_store_dir.mkdir(parents=True, exist_ok=True) + + # Create embeddings model + embedding_function = self._build_embeddings(embedding_model, api_key) + + # Convert DataFrame to Data objects (following Local DB pattern) + data_objects = self._convert_df_to_data_objects(df_source, config_list) + + # Create vector store + chroma = Chroma( + persist_directory=str(vector_store_dir), + embedding_function=embedding_function, + collection_name=self.knowledge_base, + ) + + # Convert Data objects to LangChain Documents + documents = [] + for data_obj in data_objects: + doc = data_obj.to_lc_document() + documents.append(doc) + + # Add documents to vector store + if documents: + chroma.add_documents(documents) + self.log(f"Added {len(documents)} documents to vector store '{self.knowledge_base}'") + + except Exception as e: + if not self.silent_errors: + raise + self.log(f"Error creating vector store: {e}") + + def _convert_df_to_data_objects(self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]) -> list[Data]: + """Convert DataFrame to Data objects for vector store.""" + data_objects: list[Data] = [] + + # Set up vector store directory + base_dir = self._get_kb_root() + + # If we don't allow duplicates, we need to get the existing hashes + chroma = Chroma( + persist_directory=str(base_dir / self.knowledge_base), + collection_name=self.knowledge_base, + ) + + # Get all documents and their metadata + all_docs = chroma.get() + + # Extract all _id values from metadata + id_list = [metadata.get("_id") for metadata in all_docs["metadatas"] if metadata.get("_id")] + + # Get column roles + content_cols = [] + identifier_cols = [] + + for config in config_list: + col_name = config.get("column_name") + vectorize = config.get("vectorize") == "True" or config.get("vectorize") is True + identifier = config.get("identifier") == "True" or config.get("identifier") is True + + if vectorize: + content_cols.append(col_name) + elif identifier: + identifier_cols.append(col_name) + + # Convert each row to a Data object + for _, row in df_source.iterrows(): + # Build content text from vectorized columns using list comprehension + content_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])] + + page_content = " ".join(content_parts) + + # Build metadata from NON-vectorized columns only (simple key-value pairs) + data_dict = { + "text": page_content, # Main content for vectorization + } + + # Add metadata columns as simple key-value pairs + for col in df_source.columns: + if col not in content_cols and col in row and pd.notna(row[col]): + # Convert to simple types for Chroma metadata + value = row[col] + data_dict[col] = str(value) # Convert complex types to string + + # Hash the page_content for unique ID + page_content_hash = hashlib.sha256(page_content.encode()).hexdigest() + data_dict["_id"] = page_content_hash + + # If duplicates are disallowed, and hash exists, prevent adding this row + if not self.allow_duplicates and page_content_hash in id_list: + self.log(f"Skipping duplicate row with hash {page_content_hash}") + continue + + # Create Data object - everything except "text" becomes metadata + data_obj = Data(data=data_dict) + data_objects.append(data_obj) + + return data_objects + + def is_valid_collection_name(self, name, min_length: int = 3, max_length: int = 63) -> bool: + """Validates collection name against conditions 1-3. + + 1. Contains 3-63 characters + 2. Starts and ends with alphanumeric character + 3. Contains only alphanumeric characters, underscores, or hyphens. + + Args: + name (str): Collection name to validate + min_length (int): Minimum length of the name + max_length (int): Maximum length of the name + + Returns: + bool: True if valid, False otherwise + """ + # Check length (condition 1) + if not (min_length <= len(name) <= max_length): + return False + + # Check start/end with alphanumeric (condition 2) + if not (name[0].isalnum() and name[-1].isalnum()): + return False + + # Check allowed characters (condition 3) + return re.match(r"^[a-zA-Z0-9_-]+$", name) is not None + + # --------------------------------------------------------------------- + # OUTPUT METHODS + # --------------------------------------------------------------------- + def build_kb_info(self) -> Data: + """Main ingestion routine → returns a dict with KB metadata.""" + try: + # Get source DataFrame + df_source: pd.DataFrame = self.input_df + + # Validate column configuration (using Structured Output patterns) + config_list = self._validate_column_config(df_source) + column_metadata = self._build_column_metadata(config_list, df_source) + + # Prepare KB folder (using File Component patterns) + kb_root = self._get_kb_root() + kb_path = kb_root / self.knowledge_base + + # Read the embedding info from the knowledge base folder + metadata_path = kb_path / "embedding_metadata.json" + + # If the API key is not provided, try to read it from the metadata file + if metadata_path.exists(): + settings_service = get_settings_service() + metadata = json.loads(metadata_path.read_text()) + embedding_model = metadata.get("embedding_model") + try: + api_key = decrypt_api_key(metadata["api_key"], settings_service) + except (InvalidToken, TypeError, ValueError) as e: + logger.error(f"Could not decrypt API key. Please provide it manually. Error: {e}") + + # Check if a custom API key was provided, update metadata if so + if self.api_key: + api_key = self.api_key + self._save_embedding_metadata( + kb_path=kb_path, + embedding_model=embedding_model, + api_key=api_key, + ) + + # Create vector store following Local DB component pattern + self._create_vector_store(df_source, config_list, embedding_model=embedding_model, api_key=api_key) + + # Save KB files (using File Component storage patterns) + self._save_kb_files(kb_path, config_list) + + # Build metadata response + meta: dict[str, Any] = { + "kb_id": str(uuid.uuid4()), + "kb_name": self.knowledge_base, + "rows": len(df_source), + "column_metadata": column_metadata, + "path": str(kb_path), + "config_columns": len(config_list), + "timestamp": datetime.now(tz=timezone.utc).isoformat(), + } + + # Set status message + self.status = f"✅ KB **{self.knowledge_base}** saved · {len(df_source)} chunks." + + return Data(data=meta) + + except Exception as e: + if not self.silent_errors: + raise + self.log(f"Error in KB ingestion: {e}") + self.status = f"❌ KB ingestion failed: {e}" + return Data(data={"error": str(e), "kb_name": self.knowledge_base}) + + def _get_knowledge_bases(self) -> list[str]: + """Retrieve a list of available knowledge bases. + + Returns: + A list of knowledge base names. + """ + # Return the list of directories in the knowledge base root path + kb_root_path = self._get_kb_root() + + if not kb_root_path.exists(): + return [] + + return [str(d.name) for d in kb_root_path.iterdir() if not d.name.startswith(".") and d.is_dir()] + + def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict: + """Update build configuration based on provider selection.""" + # Create a new knowledge base + if field_name == "knowledge_base": + if isinstance(field_value, dict) and "01_new_kb_name" in field_value: + # Validate the knowledge base name - Make sure it follows these rules: + if not self.is_valid_collection_name(field_value["01_new_kb_name"]): + msg = f"Invalid knowledge base name: {field_value['01_new_kb_name']}" + raise ValueError(msg) + + # We need to test the API Key one time against the embedding model + embed_model = self._build_embeddings( + embedding_model=field_value["02_embedding_model"], api_key=field_value["03_api_key"] + ) + + # Try to generate a dummy embedding to validate the API key + embed_model.embed_query("test") + + # Create the new knowledge base directory + kb_path = KNOWLEDGE_BASES_ROOT_PATH / field_value["01_new_kb_name"] + kb_path.mkdir(parents=True, exist_ok=True) + + # Save the embedding metadata + build_config["knowledge_base"]["value"] = field_value["01_new_kb_name"] + self._save_embedding_metadata( + kb_path=kb_path, + embedding_model=field_value["02_embedding_model"], + api_key=field_value["03_api_key"], + ) + + # Update the knowledge base options dynamically + build_config["knowledge_base"]["options"] = self._get_knowledge_bases() + if build_config["knowledge_base"]["value"] not in build_config["knowledge_base"]["options"]: + build_config["knowledge_base"]["value"] = None + + return build_config diff --git a/src/backend/base/langflow/components/data/kb_retrieval.py b/src/backend/base/langflow/components/data/kb_retrieval.py new file mode 100644 index 000000000..2356b74a3 --- /dev/null +++ b/src/backend/base/langflow/components/data/kb_retrieval.py @@ -0,0 +1,254 @@ +import json +from pathlib import Path +from typing import Any + +from cryptography.fernet import InvalidToken +from langchain_chroma import Chroma +from loguru import logger + +from langflow.custom import Component +from langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput +from langflow.schema.data import Data +from langflow.schema.dataframe import DataFrame +from langflow.services.auth.utils import decrypt_api_key +from langflow.services.deps import get_settings_service + +settings = get_settings_service().settings +knowledge_directory = settings.knowledge_bases_dir +if not knowledge_directory: + msg = "Knowledge bases directory is not set in the settings." + raise ValueError(msg) +KNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser() + + +class KBRetrievalComponent(Component): + display_name = "Knowledge Retrieval" + description = "Search and retrieve data from knowledge." + icon = "database" + name = "KBRetrieval" + + inputs = [ + DropdownInput( + name="knowledge_base", + display_name="Knowledge", + info="Select the knowledge to load data from.", + required=True, + options=[ + str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(".") and d.is_dir() + ] + if KNOWLEDGE_BASES_ROOT_PATH.exists() + else [], + refresh_button=True, + real_time_refresh=True, + ), + SecretStrInput( + name="api_key", + display_name="Embedding Provider API Key", + info="API key for the embedding provider to generate embeddings.", + advanced=True, + required=False, + ), + MessageTextInput( + name="search_query", + display_name="Search Query", + info="Optional search query to filter knowledge base data.", + ), + IntInput( + name="top_k", + display_name="Top K Results", + info="Number of top results to return from the knowledge base.", + value=5, + advanced=True, + required=False, + ), + BoolInput( + name="include_metadata", + display_name="Include Metadata", + info="Whether to include all metadata and embeddings in the output. If false, only content is returned.", + value=True, + advanced=True, + ), + ] + + outputs = [ + Output( + name="chroma_kb_data", + display_name="Results", + method="get_chroma_kb_data", + info="Returns the data from the selected knowledge base.", + ), + ] + + def _get_knowledge_bases(self) -> list[str]: + """Retrieve a list of available knowledge bases. + + Returns: + A list of knowledge base names. + """ + if not KNOWLEDGE_BASES_ROOT_PATH.exists(): + return [] + + return [str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(".") and d.is_dir()] + + def update_build_config(self, build_config, field_value, field_name=None): # noqa: ARG002 + if field_name == "knowledge_base": + # Update the knowledge base options dynamically + build_config["knowledge_base"]["options"] = self._get_knowledge_bases() + + # If the selected knowledge base is not available, reset it + if build_config["knowledge_base"]["value"] not in build_config["knowledge_base"]["options"]: + build_config["knowledge_base"]["value"] = None + + return build_config + + def _get_kb_metadata(self, kb_path: Path) -> dict: + """Load and process knowledge base metadata.""" + metadata: dict[str, Any] = {} + metadata_file = kb_path / "embedding_metadata.json" + if not metadata_file.exists(): + logger.warning(f"Embedding metadata file not found at {metadata_file}") + return metadata + + try: + with metadata_file.open("r", encoding="utf-8") as f: + metadata = json.load(f) + except json.JSONDecodeError: + logger.error(f"Error decoding JSON from {metadata_file}") + return {} + + # Decrypt API key if it exists + if "api_key" in metadata and metadata.get("api_key"): + settings_service = get_settings_service() + try: + decrypted_key = decrypt_api_key(metadata["api_key"], settings_service) + metadata["api_key"] = decrypted_key + except (InvalidToken, TypeError, ValueError) as e: + logger.error(f"Could not decrypt API key. Please provide it manually. Error: {e}") + metadata["api_key"] = None + return metadata + + def _build_embeddings(self, metadata: dict): + """Build embedding model from metadata.""" + provider = metadata.get("embedding_provider") + model = metadata.get("embedding_model") + api_key = metadata.get("api_key") + chunk_size = metadata.get("chunk_size") + + # If user provided a key in the input, it overrides the stored one. + if self.api_key and self.api_key.get_secret_value(): + api_key = self.api_key.get_secret_value() + + # Handle various providers + if provider == "OpenAI": + from langchain_openai import OpenAIEmbeddings + + if not api_key: + msg = "OpenAI API key is required. Provide it in the component's advanced settings." + raise ValueError(msg) + return OpenAIEmbeddings( + model=model, + api_key=api_key, + chunk_size=chunk_size, + ) + if provider == "HuggingFace": + from langchain_huggingface import HuggingFaceEmbeddings + + return HuggingFaceEmbeddings( + model=model, + ) + if provider == "Cohere": + from langchain_cohere import CohereEmbeddings + + if not api_key: + msg = "Cohere API key is required when using Cohere provider" + raise ValueError(msg) + return CohereEmbeddings( + model=model, + cohere_api_key=api_key, + ) + if provider == "Custom": + # For custom embedding models, we would need additional configuration + msg = "Custom embedding models not yet supported" + raise NotImplementedError(msg) + # Add other providers here if they become supported in ingest + msg = f"Embedding provider '{provider}' is not supported for retrieval." + raise NotImplementedError(msg) + + def get_chroma_kb_data(self) -> DataFrame: + """Retrieve data from the selected knowledge base by reading the Chroma collection. + + Returns: + A DataFrame containing the data rows from the knowledge base. + """ + kb_path = KNOWLEDGE_BASES_ROOT_PATH / self.knowledge_base + + metadata = self._get_kb_metadata(kb_path) + if not metadata: + msg = f"Metadata not found for knowledge base: {self.knowledge_base}. Ensure it has been indexed." + raise ValueError(msg) + + # Build the embedder for the knowledge base + embedding_function = self._build_embeddings(metadata) + + # Load vector store + chroma = Chroma( + persist_directory=str(kb_path), + embedding_function=embedding_function, + collection_name=self.knowledge_base, + ) + + # If a search query is provided, perform a similarity search + if self.search_query: + # Use the search query to perform a similarity search + logger.info(f"Performing similarity search with query: {self.search_query}") + results = chroma.similarity_search_with_score( + query=self.search_query or "", + k=self.top_k, + ) + else: + results = chroma.similarity_search( + query=self.search_query or "", + k=self.top_k, + ) + + # For each result, make it a tuple to match the expected output format + results = [(doc, 0) for doc in results] # Assign a dummy score of 0 + + # If metadata is enabled, get embeddings for the results + id_to_embedding = {} + if self.include_metadata and results: + doc_ids = [doc[0].metadata.get("_id") for doc in results if doc[0].metadata.get("_id")] + + # Only proceed if we have valid document IDs + if doc_ids: + # Access underlying client to get embeddings + collection = chroma._client.get_collection(name=self.knowledge_base) + embeddings_result = collection.get(where={"_id": {"$in": doc_ids}}, include=["embeddings", "metadatas"]) + + # Create a mapping from document ID to embedding + for i, metadata in enumerate(embeddings_result.get("metadatas", [])): + if metadata and "_id" in metadata: + id_to_embedding[metadata["_id"]] = embeddings_result["embeddings"][i] + + # Build output data based on include_metadata setting + data_list = [] + for doc in results: + if self.include_metadata: + # Include all metadata, embeddings, and content + kwargs = { + "content": doc[0].page_content, + **doc[0].metadata, + } + if self.search_query: + kwargs["_score"] = -1 * doc[1] + kwargs["_embeddings"] = id_to_embedding.get(doc[0].metadata.get("_id")) + else: + # Only include content + kwargs = { + "content": doc[0].page_content, + } + + data_list.append(Data(**kwargs)) + + # Return the DataFrame containing the data + return DataFrame(data=data_list) diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Ingestion.json b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Ingestion.json new file mode 100644 index 000000000..b023a135b --- /dev/null +++ b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Ingestion.json @@ -0,0 +1,1052 @@ +{ + "data": { + "edges": [ + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "URLComponent", + "id": "URLComponent-6JEUC", + "name": "page_results", + "output_types": [ + "DataFrame" + ] + }, + "targetHandle": { + "fieldName": "data_inputs", + "id": "SplitText-gvHe2", + "inputTypes": [ + "Data", + "DataFrame", + "Message" + ], + "type": "other" + } + }, + "id": "reactflow__edge-URLComponent-6JEUC{œdataTypeœ:œURLComponentœ,œidœ:œURLComponent-6JEUCœ,œnameœ:œpage_resultsœ,œoutput_typesœ:[œDataFrameœ]}-SplitText-gvHe2{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-gvHe2œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", + "selected": false, + "source": "URLComponent-6JEUC", + "sourceHandle": "{œdataTypeœ: œURLComponentœ, œidœ: œURLComponent-6JEUCœ, œnameœ: œpage_resultsœ, œoutput_typesœ: [œDataFrameœ]}", + "target": "SplitText-gvHe2", + "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-gvHe2œ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}" + }, + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "SplitText", + "id": "SplitText-gvHe2", + "name": "dataframe", + "output_types": [ + "DataFrame" + ] + }, + "targetHandle": { + "fieldName": "input_df", + "id": "KBIngestion-jj5iW", + "inputTypes": [ + "DataFrame" + ], + "type": "other" + } + }, + "id": "xy-edge__SplitText-gvHe2{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-gvHe2œ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-KBIngestion-jj5iW{œfieldNameœ:œinput_dfœ,œidœ:œKBIngestion-jj5iWœ,œinputTypesœ:[œDataFrameœ],œtypeœ:œotherœ}", + "selected": false, + "source": "SplitText-gvHe2", + "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-gvHe2œ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", + "target": "KBIngestion-jj5iW", + "targetHandle": "{œfieldNameœ: œinput_dfœ, œidœ: œKBIngestion-jj5iWœ, œinputTypesœ: [œDataFrameœ], œtypeœ: œotherœ}" + } + ], + "nodes": [ + { + "data": { + "id": "SplitText-gvHe2", + "node": { + "base_classes": [ + "DataFrame" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Split text into chunks based on specified criteria.", + "display_name": "Split Text", + "documentation": "https://docs.langflow.org/components-processing#split-text", + "edited": false, + "field_order": [ + "data_inputs", + "chunk_overlap", + "chunk_size", + "separator", + "text_key", + "keep_separator" + ], + "frozen": false, + "icon": "scissors-line-dashed", + "legacy": false, + "lf_version": "1.5.0.post1", + "metadata": { + "code_hash": "dbf2e9d2319d", + "module": "langflow.components.processing.split_text.SplitTextComponent" + }, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Chunks", + "group_outputs": false, + "method": "split_text", + "name": "dataframe", + "selected": "DataFrame", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "chunk_overlap": { + "_input_type": "IntInput", + "advanced": false, + "display_name": "Chunk Overlap", + "dynamic": false, + "info": "Number of characters to overlap between chunks.", + "list": false, + "list_add_label": "Add More", + "name": "chunk_overlap", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 0 + }, + "chunk_size": { + "_input_type": "IntInput", + "advanced": false, + "display_name": "Chunk Size", + "dynamic": false, + "info": "The maximum length of each chunk. Text is first split by separator, then chunks are merged up to this size. Individual splits larger than this won't be further divided.", + "list": false, + "list_add_label": "Add More", + "name": "chunk_size", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 100 + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n display_name: str = \"Split Text\"\n description: str = \"Split text into chunks based on specified criteria.\"\n documentation: str = \"https://docs.langflow.org/components-processing#split-text\"\n icon = \"scissors-line-dashed\"\n name = \"SplitText\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Input\",\n info=\"The data with texts to split in chunks.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"Number of characters to overlap between chunks.\",\n value=200,\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=(\n \"The maximum length of each chunk. Text is first split by separator, \"\n \"then chunks are merged up to this size. \"\n \"Individual splits larger than this won't be further divided.\"\n ),\n value=1000,\n ),\n MessageTextInput(\n name=\"separator\",\n display_name=\"Separator\",\n info=(\n \"The character to split on. Use \\\\n for newline. \"\n \"Examples: \\\\n\\\\n for paragraphs, \\\\n for lines, . for sentences\"\n ),\n value=\"\\n\",\n ),\n MessageTextInput(\n name=\"text_key\",\n display_name=\"Text Key\",\n info=\"The key to use for the text column.\",\n value=\"text\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keep_separator\",\n display_name=\"Keep Separator\",\n info=\"Whether to keep the separator in the output chunks and where to place it.\",\n options=[\"False\", \"True\", \"Start\", \"End\"],\n value=\"False\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Chunks\", name=\"dataframe\", method=\"split_text\"),\n ]\n\n def _docs_to_data(self, docs) -> list[Data]:\n return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]\n\n def _fix_separator(self, separator: str) -> str:\n \"\"\"Fix common separator issues and convert to proper format.\"\"\"\n if separator == \"/n\":\n return \"\\n\"\n if separator == \"/t\":\n return \"\\t\"\n return separator\n\n def split_text_base(self):\n separator = self._fix_separator(self.separator)\n separator = unescape_string(separator)\n\n if isinstance(self.data_inputs, DataFrame):\n if not len(self.data_inputs):\n msg = \"DataFrame is empty\"\n raise TypeError(msg)\n\n self.data_inputs.text_key = self.text_key\n try:\n documents = self.data_inputs.to_lc_documents()\n except Exception as e:\n msg = f\"Error converting DataFrame to documents: {e}\"\n raise TypeError(msg) from e\n elif isinstance(self.data_inputs, Message):\n self.data_inputs = [self.data_inputs.to_data()]\n return self.split_text_base()\n else:\n if not self.data_inputs:\n msg = \"No data inputs provided\"\n raise TypeError(msg)\n\n documents = []\n if isinstance(self.data_inputs, Data):\n self.data_inputs.text_key = self.text_key\n documents = [self.data_inputs.to_lc_document()]\n else:\n try:\n documents = [input_.to_lc_document() for input_ in self.data_inputs if isinstance(input_, Data)]\n if not documents:\n msg = f\"No valid Data inputs found in {type(self.data_inputs)}\"\n raise TypeError(msg)\n except AttributeError as e:\n msg = f\"Invalid input type in collection: {e}\"\n raise TypeError(msg) from e\n try:\n # Convert string 'False'/'True' to boolean\n keep_sep = self.keep_separator\n if isinstance(keep_sep, str):\n if keep_sep.lower() == \"false\":\n keep_sep = False\n elif keep_sep.lower() == \"true\":\n keep_sep = True\n # 'start' and 'end' are kept as strings\n\n splitter = CharacterTextSplitter(\n chunk_overlap=self.chunk_overlap,\n chunk_size=self.chunk_size,\n separator=separator,\n keep_separator=keep_sep,\n )\n return splitter.split_documents(documents)\n except Exception as e:\n msg = f\"Error splitting text: {e}\"\n raise TypeError(msg) from e\n\n def split_text(self) -> DataFrame:\n return DataFrame(self._docs_to_data(self.split_text_base()))\n" + }, + "data_inputs": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Input", + "dynamic": false, + "info": "The data with texts to split in chunks.", + "input_types": [ + "Data", + "DataFrame", + "Message" + ], + "list": false, + "list_add_label": "Add More", + "name": "data_inputs", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "keep_separator": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Keep Separator", + "dynamic": false, + "info": "Whether to keep the separator in the output chunks and where to place it.", + "name": "keep_separator", + "options": [ + "False", + "True", + "Start", + "End" + ], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "False" + }, + "separator": { + "_input_type": "MessageTextInput", + "advanced": false, + "display_name": "Separator", + "dynamic": false, + "info": "The character to split on. Use \\n for newline. Examples: \\n\\n for paragraphs, \\n for lines, . for sentences", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "separator", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "\n" + }, + "text_key": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Text Key", + "dynamic": false, + "info": "The key to use for the text column.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "text_key", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "text" + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "SplitText" + }, + "dragging": false, + "id": "SplitText-gvHe2", + "measured": { + "height": 413, + "width": 320 + }, + "position": { + "x": 620, + "y": 69.00284194946289 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "note-bpWz8", + "node": { + "description": "## Knowledge Ingestion\n\nThis flow shows the basics of the creation and ingestion of knowledge bases in Langflow. Here we use the `URL` component to dynamically fetch page data from the Langflow website, split it into chunks of 100 tokens, then ingest into a Knowledge Base.\n\n1. (Optional) Change the URL or switch to a different input data source as desired.\n2. (Optional) Adjust the Chunk Size as desired.\n3. Select or Create a new knowledge base.\n4. Ensure the column you wish to Vectorize is properly reflected in the Column Configuration table.", + "display_name": "", + "documentation": "", + "template": {} + }, + "type": "note" + }, + "dragging": false, + "height": 401, + "id": "note-bpWz8", + "measured": { + "height": 401, + "width": 388 + }, + "position": { + "x": -225.94224126537597, + "y": 75.97023827444744 + }, + "resizing": false, + "selected": true, + "type": "noteNode", + "width": 388 + }, + { + "data": { + "id": "URLComponent-6JEUC", + "node": { + "base_classes": [ + "DataFrame", + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Fetch content from one or more web pages, following links recursively.", + "display_name": "URL", + "documentation": "https://docs.langflow.org/components-data#url", + "edited": false, + "field_order": [ + "urls", + "max_depth", + "prevent_outside", + "use_async", + "format", + "timeout", + "headers", + "filter_text_html", + "continue_on_failure", + "check_response_status", + "autoset_encoding" + ], + "frozen": false, + "icon": "layout-template", + "legacy": false, + "lf_version": "1.5.0.post1", + "metadata": { + "code_hash": "a81817a7f244", + "module": "langflow.components.data.url.URLComponent" + }, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Extracted Pages", + "group_outputs": false, + "method": "fetch_content", + "name": "page_results", + "selected": "DataFrame", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "Raw Content", + "group_outputs": false, + "method": "fetch_content_as_message", + "name": "raw_results", + "selected": null, + "tool_mode": false, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "autoset_encoding": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Autoset Encoding", + "dynamic": false, + "info": "If enabled, automatically sets the encoding of the request.", + "list": false, + "list_add_label": "Add More", + "name": "autoset_encoding", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "check_response_status": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Check Response Status", + "dynamic": false, + "info": "If enabled, checks the response status of the request.", + "list": false, + "list_add_label": "Add More", + "name": "check_response_status", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "import re\n\nimport requests\nfrom bs4 import BeautifulSoup\nfrom langchain_community.document_loaders import RecursiveUrlLoader\nfrom loguru import logger\n\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.helpers.data import safe_convert\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SliderInput, TableInput\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.services.deps import get_settings_service\n\n# Constants\nDEFAULT_TIMEOUT = 30\nDEFAULT_MAX_DEPTH = 1\nDEFAULT_FORMAT = \"Text\"\nURL_REGEX = re.compile(\n r\"^(https?:\\/\\/)?\" r\"(www\\.)?\" r\"([a-zA-Z0-9.-]+)\" r\"(\\.[a-zA-Z]{2,})?\" r\"(:\\d+)?\" r\"(\\/[^\\s]*)?$\",\n re.IGNORECASE,\n)\n\n\nclass URLComponent(Component):\n \"\"\"A component that loads and parses content from web pages recursively.\n\n This component allows fetching content from one or more URLs, with options to:\n - Control crawl depth\n - Prevent crawling outside the root domain\n - Use async loading for better performance\n - Extract either raw HTML or clean text\n - Configure request headers and timeouts\n \"\"\"\n\n display_name = \"URL\"\n description = \"Fetch content from one or more web pages, following links recursively.\"\n documentation: str = \"https://docs.langflow.org/components-data#url\"\n icon = \"layout-template\"\n name = \"URLComponent\"\n\n inputs = [\n MessageTextInput(\n name=\"urls\",\n display_name=\"URLs\",\n info=\"Enter one or more URLs to crawl recursively, by clicking the '+' button.\",\n is_list=True,\n tool_mode=True,\n placeholder=\"Enter a URL...\",\n list_add_label=\"Add URL\",\n input_types=[],\n ),\n SliderInput(\n name=\"max_depth\",\n display_name=\"Depth\",\n info=(\n \"Controls how many 'clicks' away from the initial page the crawler will go:\\n\"\n \"- depth 1: only the initial page\\n\"\n \"- depth 2: initial page + all pages linked directly from it\\n\"\n \"- depth 3: initial page + direct links + links found on those direct link pages\\n\"\n \"Note: This is about link traversal, not URL path depth.\"\n ),\n value=DEFAULT_MAX_DEPTH,\n range_spec=RangeSpec(min=1, max=5, step=1),\n required=False,\n min_label=\" \",\n max_label=\" \",\n min_label_icon=\"None\",\n max_label_icon=\"None\",\n # slider_input=True\n ),\n BoolInput(\n name=\"prevent_outside\",\n display_name=\"Prevent Outside\",\n info=(\n \"If enabled, only crawls URLs within the same domain as the root URL. \"\n \"This helps prevent the crawler from going to external websites.\"\n ),\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"use_async\",\n display_name=\"Use Async\",\n info=(\n \"If enabled, uses asynchronous loading which can be significantly faster \"\n \"but might use more system resources.\"\n ),\n value=True,\n required=False,\n advanced=True,\n ),\n DropdownInput(\n name=\"format\",\n display_name=\"Output Format\",\n info=\"Output Format. Use 'Text' to extract the text from the HTML or 'HTML' for the raw HTML content.\",\n options=[\"Text\", \"HTML\"],\n value=DEFAULT_FORMAT,\n advanced=True,\n ),\n IntInput(\n name=\"timeout\",\n display_name=\"Timeout\",\n info=\"Timeout for the request in seconds.\",\n value=DEFAULT_TIMEOUT,\n required=False,\n advanced=True,\n ),\n TableInput(\n name=\"headers\",\n display_name=\"Headers\",\n info=\"The headers to send with the request\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Header\",\n \"type\": \"str\",\n \"description\": \"Header name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Header value\",\n },\n ],\n value=[{\"key\": \"User-Agent\", \"value\": get_settings_service().settings.user_agent}],\n advanced=True,\n input_types=[\"DataFrame\"],\n ),\n BoolInput(\n name=\"filter_text_html\",\n display_name=\"Filter Text/HTML\",\n info=\"If enabled, filters out text/css content type from the results.\",\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"continue_on_failure\",\n display_name=\"Continue on Failure\",\n info=\"If enabled, continues crawling even if some requests fail.\",\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"check_response_status\",\n display_name=\"Check Response Status\",\n info=\"If enabled, checks the response status of the request.\",\n value=False,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"autoset_encoding\",\n display_name=\"Autoset Encoding\",\n info=\"If enabled, automatically sets the encoding of the request.\",\n value=True,\n required=False,\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Extracted Pages\", name=\"page_results\", method=\"fetch_content\"),\n Output(display_name=\"Raw Content\", name=\"raw_results\", method=\"fetch_content_as_message\", tool_mode=False),\n ]\n\n @staticmethod\n def validate_url(url: str) -> bool:\n \"\"\"Validates if the given string matches URL pattern.\n\n Args:\n url: The URL string to validate\n\n Returns:\n bool: True if the URL is valid, False otherwise\n \"\"\"\n return bool(URL_REGEX.match(url))\n\n def ensure_url(self, url: str) -> str:\n \"\"\"Ensures the given string is a valid URL.\n\n Args:\n url: The URL string to validate and normalize\n\n Returns:\n str: The normalized URL\n\n Raises:\n ValueError: If the URL is invalid\n \"\"\"\n url = url.strip()\n if not url.startswith((\"http://\", \"https://\")):\n url = \"https://\" + url\n\n if not self.validate_url(url):\n msg = f\"Invalid URL: {url}\"\n raise ValueError(msg)\n\n return url\n\n def _create_loader(self, url: str) -> RecursiveUrlLoader:\n \"\"\"Creates a RecursiveUrlLoader instance with the configured settings.\n\n Args:\n url: The URL to load\n\n Returns:\n RecursiveUrlLoader: Configured loader instance\n \"\"\"\n headers_dict = {header[\"key\"]: header[\"value\"] for header in self.headers}\n extractor = (lambda x: x) if self.format == \"HTML\" else (lambda x: BeautifulSoup(x, \"lxml\").get_text())\n\n return RecursiveUrlLoader(\n url=url,\n max_depth=self.max_depth,\n prevent_outside=self.prevent_outside,\n use_async=self.use_async,\n extractor=extractor,\n timeout=self.timeout,\n headers=headers_dict,\n check_response_status=self.check_response_status,\n continue_on_failure=self.continue_on_failure,\n base_url=url, # Add base_url to ensure consistent domain crawling\n autoset_encoding=self.autoset_encoding, # Enable automatic encoding detection\n exclude_dirs=[], # Allow customization of excluded directories\n link_regex=None, # Allow customization of link filtering\n )\n\n def fetch_url_contents(self) -> list[dict]:\n \"\"\"Load documents from the configured URLs.\n\n Returns:\n List[Data]: List of Data objects containing the fetched content\n\n Raises:\n ValueError: If no valid URLs are provided or if there's an error loading documents\n \"\"\"\n try:\n urls = list({self.ensure_url(url) for url in self.urls if url.strip()})\n logger.debug(f\"URLs: {urls}\")\n if not urls:\n msg = \"No valid URLs provided.\"\n raise ValueError(msg)\n\n all_docs = []\n for url in urls:\n logger.debug(f\"Loading documents from {url}\")\n\n try:\n loader = self._create_loader(url)\n docs = loader.load()\n\n if not docs:\n logger.warning(f\"No documents found for {url}\")\n continue\n\n logger.debug(f\"Found {len(docs)} documents from {url}\")\n all_docs.extend(docs)\n\n except requests.exceptions.RequestException as e:\n logger.exception(f\"Error loading documents from {url}: {e}\")\n continue\n\n if not all_docs:\n msg = \"No documents were successfully loaded from any URL\"\n raise ValueError(msg)\n\n # data = [Data(text=doc.page_content, **doc.metadata) for doc in all_docs]\n data = [\n {\n \"text\": safe_convert(doc.page_content, clean_data=True),\n \"url\": doc.metadata.get(\"source\", \"\"),\n \"title\": doc.metadata.get(\"title\", \"\"),\n \"description\": doc.metadata.get(\"description\", \"\"),\n \"content_type\": doc.metadata.get(\"content_type\", \"\"),\n \"language\": doc.metadata.get(\"language\", \"\"),\n }\n for doc in all_docs\n ]\n except Exception as e:\n error_msg = e.message if hasattr(e, \"message\") else e\n msg = f\"Error loading documents: {error_msg!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n return data\n\n def fetch_content(self) -> DataFrame:\n \"\"\"Convert the documents to a DataFrame.\"\"\"\n return DataFrame(data=self.fetch_url_contents())\n\n def fetch_content_as_message(self) -> Message:\n \"\"\"Convert the documents to a Message.\"\"\"\n url_contents = self.fetch_url_contents()\n return Message(text=\"\\n\\n\".join([x[\"text\"] for x in url_contents]), data={\"data\": url_contents})\n" + }, + "continue_on_failure": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Continue on Failure", + "dynamic": false, + "info": "If enabled, continues crawling even if some requests fail.", + "list": false, + "list_add_label": "Add More", + "name": "continue_on_failure", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "filter_text_html": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Filter Text/HTML", + "dynamic": false, + "info": "If enabled, filters out text/css content type from the results.", + "list": false, + "list_add_label": "Add More", + "name": "filter_text_html", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "format": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Output Format", + "dynamic": false, + "info": "Output Format. Use 'Text' to extract the text from the HTML or 'HTML' for the raw HTML content.", + "name": "format", + "options": [ + "Text", + "HTML" + ], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Text" + }, + "headers": { + "_input_type": "TableInput", + "advanced": true, + "display_name": "Headers", + "dynamic": false, + "info": "The headers to send with the request", + "input_types": [ + "DataFrame" + ], + "is_list": true, + "list_add_label": "Add More", + "name": "headers", + "placeholder": "", + "required": false, + "show": true, + "table_icon": "Table", + "table_schema": { + "columns": [ + { + "default": "None", + "description": "Header name", + "disable_edit": false, + "display_name": "Header", + "edit_mode": "popover", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "key", + "sortable": true, + "type": "str" + }, + { + "default": "None", + "description": "Header value", + "disable_edit": false, + "display_name": "Value", + "edit_mode": "popover", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "value", + "sortable": true, + "type": "str" + } + ] + }, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "trigger_icon": "Table", + "trigger_text": "Open table", + "type": "table", + "value": [ + { + "key": "User-Agent", + "value": "langflow" + } + ] + }, + "max_depth": { + "_input_type": "SliderInput", + "advanced": false, + "display_name": "Depth", + "dynamic": false, + "info": "Controls how many 'clicks' away from the initial page the crawler will go:\n- depth 1: only the initial page\n- depth 2: initial page + all pages linked directly from it\n- depth 3: initial page + direct links + links found on those direct link pages\nNote: This is about link traversal, not URL path depth.", + "max_label": " ", + "max_label_icon": "None", + "min_label": " ", + "min_label_icon": "None", + "name": "max_depth", + "placeholder": "", + "range_spec": { + "max": 5, + "min": 1, + "step": 1, + "step_type": "float" + }, + "required": false, + "show": true, + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "title_case": false, + "tool_mode": false, + "type": "slider", + "value": 2 + }, + "prevent_outside": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Prevent Outside", + "dynamic": false, + "info": "If enabled, only crawls URLs within the same domain as the root URL. This helps prevent the crawler from going to external websites.", + "list": false, + "list_add_label": "Add More", + "name": "prevent_outside", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "timeout": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Timeout", + "dynamic": false, + "info": "Timeout for the request in seconds.", + "list": false, + "list_add_label": "Add More", + "name": "timeout", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 30 + }, + "urls": { + "_input_type": "MessageTextInput", + "advanced": false, + "display_name": "URLs", + "dynamic": false, + "info": "Enter one or more URLs to crawl recursively, by clicking the '+' button.", + "input_types": [], + "list": true, + "list_add_label": "Add URL", + "load_from_db": false, + "name": "urls", + "placeholder": "Enter a URL...", + "required": false, + "show": true, + "title_case": false, + "tool_mode": true, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": [ + "https://langflow.org" + ] + }, + "use_async": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Use Async", + "dynamic": false, + "info": "If enabled, uses asynchronous loading which can be significantly faster but might use more system resources.", + "list": false, + "list_add_label": "Add More", + "name": "use_async", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + } + }, + "tool_mode": false + }, + "selected_output": "page_results", + "showNode": true, + "type": "URLComponent" + }, + "dragging": false, + "id": "URLComponent-6JEUC", + "measured": { + "height": 292, + "width": 320 + }, + "position": { + "x": 238.30016557701828, + "y": 132.82375729958179 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "KBIngestion-jj5iW", + "node": { + "base_classes": [ + "Data" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Create or update knowledge in Langflow.", + "display_name": "Knowledge Ingestion", + "documentation": "", + "edited": false, + "field_order": [ + "knowledge_base", + "input_df", + "column_config", + "chunk_size", + "api_key", + "allow_duplicates" + ], + "frozen": false, + "icon": "database", + "last_updated": "2025-08-13T19:45:49.122Z", + "legacy": false, + "metadata": { + "code_hash": "11df19de541d", + "module": "langflow.components.data.kb_ingest.KBIngestionComponent" + }, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "DataFrame", + "group_outputs": false, + "method": "build_kb_info", + "name": "dataframe", + "selected": "Data", + "tool_mode": true, + "types": [ + "Data" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "allow_duplicates": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Allow Duplicates", + "dynamic": false, + "info": "Allow duplicate rows in the knowledge base", + "list": false, + "list_add_label": "Add More", + "name": "allow_duplicates", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + }, + "api_key": { + "_input_type": "SecretStrInput", + "advanced": true, + "display_name": "Embedding Provider API Key", + "dynamic": false, + "info": "API key for the embedding provider to generate embeddings.", + "input_types": [], + "load_from_db": false, + "name": "api_key", + "password": true, + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "type": "str", + "value": "" + }, + "chunk_size": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Chunk Size", + "dynamic": false, + "info": "Batch size for processing embeddings", + "list": false, + "list_add_label": "Add More", + "name": "chunk_size", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 1000 + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from __future__ import annotations\n\nimport hashlib\nimport json\nimport re\nimport uuid\nfrom dataclasses import asdict, dataclass, field\nfrom datetime import datetime, timezone\nfrom pathlib import Path\nfrom typing import Any\n\nimport pandas as pd\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom loguru import logger\n\nfrom langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DataFrameInput, DropdownInput, IntInput, Output, SecretStrInput, StrInput, TableInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict # noqa: TC001\nfrom langflow.schema.table import EditMode\nfrom langflow.services.auth.utils import decrypt_api_key, encrypt_api_key\nfrom langflow.services.deps import get_settings_service\n\nHUGGINGFACE_MODEL_NAMES = [\"sentence-transformers/all-MiniLM-L6-v2\", \"sentence-transformers/all-mpnet-base-v2\"]\nCOHERE_MODEL_NAMES = [\"embed-english-v3.0\", \"embed-multilingual-v3.0\"]\n\nsettings = get_settings_service().settings\nknowledge_directory = settings.knowledge_bases_dir\nif not knowledge_directory:\n msg = \"Knowledge bases directory is not set in the settings.\"\n raise ValueError(msg)\nKNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n\n\nclass KBIngestionComponent(Component):\n \"\"\"Create or append to Langflow Knowledge from a DataFrame.\"\"\"\n\n # ------ UI metadata ---------------------------------------------------\n display_name = \"Knowledge Ingestion\"\n description = \"Create or update knowledge in Langflow.\"\n icon = \"database\"\n name = \"KBIngestion\"\n\n @dataclass\n class NewKnowledgeBaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_knowledge_base\",\n \"description\": \"Create new knowledge in Langflow.\",\n \"display_name\": \"Create new knowledge\",\n \"field_order\": [\"01_new_kb_name\", \"02_embedding_model\", \"03_api_key\"],\n \"template\": {\n \"01_new_kb_name\": StrInput(\n name=\"new_kb_name\",\n display_name=\"Knowledge Name\",\n info=\"Name of the new knowledge to create.\",\n required=True,\n ),\n \"02_embedding_model\": DropdownInput(\n name=\"embedding_model\",\n display_name=\"Model Name\",\n info=\"Select the embedding model to use for this knowledge base.\",\n required=True,\n options=OPENAI_EMBEDDING_MODEL_NAMES + HUGGINGFACE_MODEL_NAMES + COHERE_MODEL_NAMES,\n options_metadata=[{\"icon\": \"OpenAI\"} for _ in OPENAI_EMBEDDING_MODEL_NAMES]\n + [{\"icon\": \"HuggingFace\"} for _ in HUGGINGFACE_MODEL_NAMES]\n + [{\"icon\": \"Cohere\"} for _ in COHERE_MODEL_NAMES],\n ),\n \"03_api_key\": SecretStrInput(\n name=\"api_key\",\n display_name=\"API Key\",\n info=\"Provider API key for embedding model\",\n required=True,\n load_from_db=True,\n ),\n },\n },\n }\n }\n )\n\n # ------ Inputs --------------------------------------------------------\n inputs = [\n DropdownInput(\n name=\"knowledge_base\",\n display_name=\"Knowledge\",\n info=\"Select the knowledge to load data from.\",\n required=True,\n options=[\n str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(\".\") and d.is_dir()\n ]\n if KNOWLEDGE_BASES_ROOT_PATH.exists()\n else [],\n refresh_button=True,\n dialog_inputs=asdict(NewKnowledgeBaseInput()),\n ),\n DataFrameInput(\n name=\"input_df\",\n display_name=\"Data\",\n info=\"Table with all original columns (already chunked / processed).\",\n required=True,\n ),\n TableInput(\n name=\"column_config\",\n display_name=\"Column Configuration\",\n info=\"Configure column behavior for the knowledge base.\",\n required=True,\n table_schema=[\n {\n \"name\": \"column_name\",\n \"display_name\": \"Column Name\",\n \"type\": \"str\",\n \"description\": \"Name of the column in the source DataFrame\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"vectorize\",\n \"display_name\": \"Vectorize\",\n \"type\": \"boolean\",\n \"description\": \"Create embeddings for this column\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"identifier\",\n \"display_name\": \"Identifier\",\n \"type\": \"boolean\",\n \"description\": \"Use this column as unique identifier\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"column_name\": \"text\",\n \"vectorize\": True,\n \"identifier\": False,\n }\n ],\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"Batch size for processing embeddings\",\n advanced=True,\n value=1000,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Embedding Provider API Key\",\n info=\"API key for the embedding provider to generate embeddings.\",\n advanced=True,\n required=False,\n ),\n BoolInput(\n name=\"allow_duplicates\",\n display_name=\"Allow Duplicates\",\n info=\"Allow duplicate rows in the knowledge base\",\n advanced=True,\n value=False,\n ),\n ]\n\n # ------ Outputs -------------------------------------------------------\n outputs = [Output(display_name=\"DataFrame\", name=\"dataframe\", method=\"build_kb_info\")]\n\n # ------ Internal helpers ---------------------------------------------\n def _get_kb_root(self) -> Path:\n \"\"\"Return the root directory for knowledge bases.\"\"\"\n return KNOWLEDGE_BASES_ROOT_PATH\n\n def _validate_column_config(self, df_source: pd.DataFrame) -> list[dict[str, Any]]:\n \"\"\"Validate column configuration using Structured Output patterns.\"\"\"\n if not self.column_config:\n msg = \"Column configuration cannot be empty\"\n raise ValueError(msg)\n\n # Convert table input to list of dicts (similar to Structured Output)\n config_list = self.column_config if isinstance(self.column_config, list) else []\n\n # Validate column names exist in DataFrame\n df_columns = set(df_source.columns)\n for config in config_list:\n col_name = config.get(\"column_name\")\n if col_name not in df_columns and not self.silent_errors:\n msg = f\"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}\"\n self.log(f\"Warning: {msg}\")\n raise ValueError(msg)\n\n return config_list\n\n def _get_embedding_provider(self, embedding_model: str) -> str:\n \"\"\"Get embedding provider by matching model name to lists.\"\"\"\n if embedding_model in OPENAI_EMBEDDING_MODEL_NAMES:\n return \"OpenAI\"\n if embedding_model in HUGGINGFACE_MODEL_NAMES:\n return \"HuggingFace\"\n if embedding_model in COHERE_MODEL_NAMES:\n return \"Cohere\"\n return \"Custom\"\n\n def _build_embeddings(self, embedding_model: str, api_key: str):\n \"\"\"Build embedding model using provider patterns.\"\"\"\n # Get provider by matching model name to lists\n provider = self._get_embedding_provider(embedding_model)\n\n # Validate provider and model\n if provider == \"OpenAI\":\n from langchain_openai import OpenAIEmbeddings\n\n if not api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n return OpenAIEmbeddings(\n model=embedding_model,\n api_key=api_key,\n chunk_size=self.chunk_size,\n )\n if provider == \"HuggingFace\":\n from langchain_huggingface import HuggingFaceEmbeddings\n\n return HuggingFaceEmbeddings(\n model=embedding_model,\n )\n if provider == \"Cohere\":\n from langchain_cohere import CohereEmbeddings\n\n if not api_key:\n msg = \"Cohere API key is required when using Cohere provider\"\n raise ValueError(msg)\n return CohereEmbeddings(\n model=embedding_model,\n cohere_api_key=api_key,\n )\n if provider == \"Custom\":\n # For custom embedding models, we would need additional configuration\n msg = \"Custom embedding models not yet supported\"\n raise NotImplementedError(msg)\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def _build_embedding_metadata(self, embedding_model, api_key) -> dict[str, Any]:\n \"\"\"Build embedding model metadata.\"\"\"\n # Get provider by matching model name to lists\n embedding_provider = self._get_embedding_provider(embedding_model)\n\n api_key_to_save = None\n if api_key and hasattr(api_key, \"get_secret_value\"):\n api_key_to_save = api_key.get_secret_value()\n elif isinstance(api_key, str):\n api_key_to_save = api_key\n\n encrypted_api_key = None\n if api_key_to_save:\n settings_service = get_settings_service()\n try:\n encrypted_api_key = encrypt_api_key(api_key_to_save, settings_service=settings_service)\n except (TypeError, ValueError) as e:\n self.log(f\"Could not encrypt API key: {e}\")\n logger.error(f\"Could not encrypt API key: {e}\")\n\n return {\n \"embedding_provider\": embedding_provider,\n \"embedding_model\": embedding_model,\n \"api_key\": encrypted_api_key,\n \"api_key_used\": bool(api_key),\n \"chunk_size\": self.chunk_size,\n \"created_at\": datetime.now(timezone.utc).isoformat(),\n }\n\n def _save_embedding_metadata(self, kb_path: Path, embedding_model: str, api_key: str) -> None:\n \"\"\"Save embedding model metadata.\"\"\"\n embedding_metadata = self._build_embedding_metadata(embedding_model, api_key)\n metadata_path = kb_path / \"embedding_metadata.json\"\n metadata_path.write_text(json.dumps(embedding_metadata, indent=2))\n\n def _save_kb_files(\n self,\n kb_path: Path,\n config_list: list[dict[str, Any]],\n ) -> None:\n \"\"\"Save KB files using File Component storage patterns.\"\"\"\n try:\n # Create directory (following File Component patterns)\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save column configuration\n # Only do this if the file doesn't exist already\n cfg_path = kb_path / \"schema.json\"\n if not cfg_path.exists():\n cfg_path.write_text(json.dumps(config_list, indent=2))\n\n except Exception as e:\n if not self.silent_errors:\n raise\n self.log(f\"Error saving KB files: {e}\")\n\n def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]:\n \"\"\"Build detailed column metadata.\"\"\"\n metadata: dict[str, Any] = {\n \"total_columns\": len(df_source.columns),\n \"mapped_columns\": len(config_list),\n \"unmapped_columns\": len(df_source.columns) - len(config_list),\n \"columns\": [],\n \"summary\": {\"vectorized_columns\": [], \"identifier_columns\": []},\n }\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n # Add to columns list\n metadata[\"columns\"].append(\n {\n \"name\": col_name,\n \"vectorize\": vectorize,\n \"identifier\": identifier,\n }\n )\n\n # Update summary\n if vectorize:\n metadata[\"summary\"][\"vectorized_columns\"].append(col_name)\n if identifier:\n metadata[\"summary\"][\"identifier_columns\"].append(col_name)\n\n return metadata\n\n def _create_vector_store(\n self, df_source: pd.DataFrame, config_list: list[dict[str, Any]], embedding_model: str, api_key: str\n ) -> None:\n \"\"\"Create vector store following Local DB component pattern.\"\"\"\n try:\n # Set up vector store directory\n base_dir = self._get_kb_root()\n\n vector_store_dir = base_dir / self.knowledge_base\n vector_store_dir.mkdir(parents=True, exist_ok=True)\n\n # Create embeddings model\n embedding_function = self._build_embeddings(embedding_model, api_key)\n\n # Convert DataFrame to Data objects (following Local DB pattern)\n data_objects = self._convert_df_to_data_objects(df_source, config_list)\n\n # Create vector store\n chroma = Chroma(\n persist_directory=str(vector_store_dir),\n embedding_function=embedding_function,\n collection_name=self.knowledge_base,\n )\n\n # Convert Data objects to LangChain Documents\n documents = []\n for data_obj in data_objects:\n doc = data_obj.to_lc_document()\n documents.append(doc)\n\n # Add documents to vector store\n if documents:\n chroma.add_documents(documents)\n self.log(f\"Added {len(documents)} documents to vector store '{self.knowledge_base}'\")\n\n except Exception as e:\n if not self.silent_errors:\n raise\n self.log(f\"Error creating vector store: {e}\")\n\n def _convert_df_to_data_objects(self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]) -> list[Data]:\n \"\"\"Convert DataFrame to Data objects for vector store.\"\"\"\n data_objects: list[Data] = []\n\n # Set up vector store directory\n base_dir = self._get_kb_root()\n\n # If we don't allow duplicates, we need to get the existing hashes\n chroma = Chroma(\n persist_directory=str(base_dir / self.knowledge_base),\n collection_name=self.knowledge_base,\n )\n\n # Get all documents and their metadata\n all_docs = chroma.get()\n\n # Extract all _id values from metadata\n id_list = [metadata.get(\"_id\") for metadata in all_docs[\"metadatas\"] if metadata.get(\"_id\")]\n\n # Get column roles\n content_cols = []\n identifier_cols = []\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n if vectorize:\n content_cols.append(col_name)\n elif identifier:\n identifier_cols.append(col_name)\n\n # Convert each row to a Data object\n for _, row in df_source.iterrows():\n # Build content text from vectorized columns using list comprehension\n content_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]\n\n page_content = \" \".join(content_parts)\n\n # Build metadata from NON-vectorized columns only (simple key-value pairs)\n data_dict = {\n \"text\": page_content, # Main content for vectorization\n }\n\n # Add metadata columns as simple key-value pairs\n for col in df_source.columns:\n if col not in content_cols and col in row and pd.notna(row[col]):\n # Convert to simple types for Chroma metadata\n value = row[col]\n data_dict[col] = str(value) # Convert complex types to string\n\n # Hash the page_content for unique ID\n page_content_hash = hashlib.sha256(page_content.encode()).hexdigest()\n data_dict[\"_id\"] = page_content_hash\n\n # If duplicates are disallowed, and hash exists, prevent adding this row\n if not self.allow_duplicates and page_content_hash in id_list:\n self.log(f\"Skipping duplicate row with hash {page_content_hash}\")\n continue\n\n # Create Data object - everything except \"text\" becomes metadata\n data_obj = Data(data=data_dict)\n data_objects.append(data_obj)\n\n return data_objects\n\n def is_valid_collection_name(self, name, min_length: int = 3, max_length: int = 63) -> bool:\n \"\"\"Validates collection name against conditions 1-3.\n\n 1. Contains 3-63 characters\n 2. Starts and ends with alphanumeric character\n 3. Contains only alphanumeric characters, underscores, or hyphens.\n\n Args:\n name (str): Collection name to validate\n min_length (int): Minimum length of the name\n max_length (int): Maximum length of the name\n\n Returns:\n bool: True if valid, False otherwise\n \"\"\"\n # Check length (condition 1)\n if not (min_length <= len(name) <= max_length):\n return False\n\n # Check start/end with alphanumeric (condition 2)\n if not (name[0].isalnum() and name[-1].isalnum()):\n return False\n\n # Check allowed characters (condition 3)\n return re.match(r\"^[a-zA-Z0-9_-]+$\", name) is not None\n\n # ---------------------------------------------------------------------\n # OUTPUT METHODS\n # ---------------------------------------------------------------------\n def build_kb_info(self) -> Data:\n \"\"\"Main ingestion routine → returns a dict with KB metadata.\"\"\"\n try:\n # Get source DataFrame\n df_source: pd.DataFrame = self.input_df\n\n # Validate column configuration (using Structured Output patterns)\n config_list = self._validate_column_config(df_source)\n column_metadata = self._build_column_metadata(config_list, df_source)\n\n # Prepare KB folder (using File Component patterns)\n kb_root = self._get_kb_root()\n kb_path = kb_root / self.knowledge_base\n\n # Read the embedding info from the knowledge base folder\n metadata_path = kb_path / \"embedding_metadata.json\"\n\n # If the API key is not provided, try to read it from the metadata file\n if metadata_path.exists():\n settings_service = get_settings_service()\n metadata = json.loads(metadata_path.read_text())\n embedding_model = metadata.get(\"embedding_model\")\n try:\n api_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n except (InvalidToken, TypeError, ValueError) as e:\n logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n\n # Check if a custom API key was provided, update metadata if so\n if self.api_key:\n api_key = self.api_key\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=embedding_model,\n api_key=api_key,\n )\n\n # Create vector store following Local DB component pattern\n self._create_vector_store(df_source, config_list, embedding_model=embedding_model, api_key=api_key)\n\n # Save KB files (using File Component storage patterns)\n self._save_kb_files(kb_path, config_list)\n\n # Build metadata response\n meta: dict[str, Any] = {\n \"kb_id\": str(uuid.uuid4()),\n \"kb_name\": self.knowledge_base,\n \"rows\": len(df_source),\n \"column_metadata\": column_metadata,\n \"path\": str(kb_path),\n \"config_columns\": len(config_list),\n \"timestamp\": datetime.now(tz=timezone.utc).isoformat(),\n }\n\n # Set status message\n self.status = f\"✅ KB **{self.knowledge_base}** saved · {len(df_source)} chunks.\"\n\n return Data(data=meta)\n\n except Exception as e:\n if not self.silent_errors:\n raise\n self.log(f\"Error in KB ingestion: {e}\")\n self.status = f\"❌ KB ingestion failed: {e}\"\n return Data(data={\"error\": str(e), \"kb_name\": self.knowledge_base})\n\n def _get_knowledge_bases(self) -> list[str]:\n \"\"\"Retrieve a list of available knowledge bases.\n\n Returns:\n A list of knowledge base names.\n \"\"\"\n # Return the list of directories in the knowledge base root path\n kb_root_path = self._get_kb_root()\n\n if not kb_root_path.exists():\n return []\n\n return [str(d.name) for d in kb_root_path.iterdir() if not d.name.startswith(\".\") and d.is_dir()]\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n \"\"\"Update build configuration based on provider selection.\"\"\"\n # Create a new knowledge base\n if field_name == \"knowledge_base\":\n if isinstance(field_value, dict) and \"01_new_kb_name\" in field_value:\n # Validate the knowledge base name - Make sure it follows these rules:\n if not self.is_valid_collection_name(field_value[\"01_new_kb_name\"]):\n msg = f\"Invalid knowledge base name: {field_value['01_new_kb_name']}\"\n raise ValueError(msg)\n\n # We need to test the API Key one time against the embedding model\n embed_model = self._build_embeddings(\n embedding_model=field_value[\"02_embedding_model\"], api_key=field_value[\"03_api_key\"]\n )\n\n # Try to generate a dummy embedding to validate the API key\n embed_model.embed_query(\"test\")\n\n # Create the new knowledge base directory\n kb_path = KNOWLEDGE_BASES_ROOT_PATH / field_value[\"01_new_kb_name\"]\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save the embedding metadata\n build_config[\"knowledge_base\"][\"value\"] = field_value[\"01_new_kb_name\"]\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=field_value[\"02_embedding_model\"],\n api_key=field_value[\"03_api_key\"],\n )\n\n # Update the knowledge base options dynamically\n build_config[\"knowledge_base\"][\"options\"] = self._get_knowledge_bases()\n if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n build_config[\"knowledge_base\"][\"value\"] = None\n\n return build_config\n" + }, + "column_config": { + "_input_type": "TableInput", + "advanced": false, + "display_name": "Column Configuration", + "dynamic": false, + "info": "Configure column behavior for the knowledge base.", + "is_list": true, + "list_add_label": "Add More", + "name": "column_config", + "placeholder": "", + "required": true, + "show": true, + "table_icon": "Table", + "table_schema": { + "columns": [ + { + "default": "None", + "description": "Name of the column in the source DataFrame", + "disable_edit": false, + "display_name": "Column Name", + "edit_mode": "inline", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "column_name", + "sortable": true, + "type": "str" + }, + { + "default": false, + "description": "Create embeddings for this column", + "disable_edit": false, + "display_name": "Vectorize", + "edit_mode": "inline", + "filterable": true, + "formatter": "boolean", + "hidden": false, + "name": "vectorize", + "sortable": true, + "type": "boolean" + }, + { + "default": false, + "description": "Use this column as unique identifier", + "disable_edit": false, + "display_name": "Identifier", + "edit_mode": "inline", + "filterable": true, + "formatter": "boolean", + "hidden": false, + "name": "identifier", + "sortable": true, + "type": "boolean" + } + ] + }, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "trigger_icon": "Table", + "trigger_text": "Open table", + "type": "table", + "value": [ + { + "column_name": "text", + "identifier": false, + "vectorize": true + } + ] + }, + "input_df": { + "_input_type": "DataFrameInput", + "advanced": false, + "display_name": "Data", + "dynamic": false, + "info": "Table with all original columns (already chunked / processed).", + "input_types": [ + "DataFrame" + ], + "list": false, + "list_add_label": "Add More", + "name": "input_df", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "knowledge_base": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": { + "fields": { + "data": { + "node": { + "description": "Create new knowledge in Langflow.", + "display_name": "Create new knowledge", + "field_order": [ + "01_new_kb_name", + "02_embedding_model", + "03_api_key" + ], + "name": "create_knowledge_base", + "template": { + "01_new_kb_name": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Knowledge Name", + "dynamic": false, + "info": "Name of the new knowledge to create.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "new_kb_name", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "02_embedding_model": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model Name", + "dynamic": false, + "info": "Select the embedding model to use for this knowledge base.", + "name": "embedding_model", + "options": [ + "text-embedding-3-small", + "text-embedding-3-large", + "text-embedding-ada-002", + "sentence-transformers/all-MiniLM-L6-v2", + "sentence-transformers/all-mpnet-base-v2", + "embed-english-v3.0", + "embed-multilingual-v3.0" + ], + "options_metadata": [ + { + "icon": "OpenAI" + }, + { + "icon": "OpenAI" + }, + { + "icon": "OpenAI" + }, + { + "icon": "HuggingFace" + }, + { + "icon": "HuggingFace" + }, + { + "icon": "Cohere" + }, + { + "icon": "Cohere" + } + ], + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "03_api_key": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "API Key", + "dynamic": false, + "info": "Provider API key for embedding model", + "input_types": [], + "load_from_db": true, + "name": "api_key", + "password": true, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "str", + "value": "" + } + } + } + } + }, + "functionality": "create" + }, + "display_name": "Knowledge", + "dynamic": false, + "info": "Select the knowledge to load data from.", + "name": "knowledge_base", + "options": [], + "options_metadata": [], + "placeholder": "", + "refresh_button": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": null + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "KBIngestion" + }, + "dragging": false, + "id": "KBIngestion-jj5iW", + "measured": { + "height": 333, + "width": 320 + }, + "position": { + "x": 1000.4023842644599, + "y": 101.77068666606948 + }, + "selected": false, + "type": "genericNode" + } + ], + "viewport": { + "x": 280.03407172860966, + "y": 131.39479654897661, + "zoom": 0.9295918751284687 + } + }, + "description": "An example of creating a Knowledge Base and ingesting data into it from a web URL.", + "endpoint_name": null, + "id": "dfffa40b-547b-46ae-9c4a-6539851990bf", + "is_component": false, + "last_tested_version": "1.5.0.post1", + "name": "Knowledge Ingestion", + "tags": [] +} \ No newline at end of file diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json new file mode 100644 index 000000000..ba99538fc --- /dev/null +++ b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json @@ -0,0 +1,707 @@ +{ + "data": { + "edges": [ + { + "className": "", + "data": { + "sourceHandle": { + "dataType": "TextInput", + "id": "TextInput-Z3rM3", + "name": "text", + "output_types": [ + "Message" + ] + }, + "targetHandle": { + "fieldName": "search_query", + "id": "KBRetrieval-tGoBR", + "inputTypes": [ + "Message" + ], + "type": "str" + } + }, + "id": "xy-edge__TextInput-Z3rM3{œdataTypeœ:œTextInputœ,œidœ:œTextInput-Z3rM3œ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-KBRetrieval-tGoBR{œfieldNameœ:œsearch_queryœ,œidœ:œKBRetrieval-tGoBRœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "source": "TextInput-Z3rM3", + "sourceHandle": "{œdataTypeœ: œTextInputœ, œidœ: œTextInput-Z3rM3œ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", + "target": "KBRetrieval-tGoBR", + "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œKBRetrieval-tGoBRœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + }, + { + "className": "", + "data": { + "sourceHandle": { + "dataType": "KBRetrieval", + "id": "KBRetrieval-tGoBR", + "name": "chroma_kb_data", + "output_types": [ + "DataFrame" + ] + }, + "targetHandle": { + "fieldName": "input_value", + "id": "ChatOutput-tixOe", + "inputTypes": [ + "Data", + "DataFrame", + "Message" + ], + "type": "other" + } + }, + "id": "xy-edge__KBRetrieval-tGoBR{œdataTypeœ:œKBRetrievalœ,œidœ:œKBRetrieval-tGoBRœ,œnameœ:œchroma_kb_dataœ,œoutput_typesœ:[œDataFrameœ]}-ChatOutput-tixOe{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-tixOeœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", + "source": "KBRetrieval-tGoBR", + "sourceHandle": "{œdataTypeœ: œKBRetrievalœ, œidœ: œKBRetrieval-tGoBRœ, œnameœ: œchroma_kb_dataœ, œoutput_typesœ: [œDataFrameœ]}", + "target": "ChatOutput-tixOe", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-tixOeœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}" + } + ], + "nodes": [ + { + "data": { + "id": "note-YyBfz", + "node": { + "description": "## Knowledge Retrieval\n\nA stand-alone component handles the retrieval of ingested knowledge from existing knowledge bases. To retrieve knowledge:\n\n1. Select your knowledge base from the Knowledge Base dropdown. If you do not see it, choose \"Refresh List\".\n2. (Optional) Enter a Search Query to be performed against the knowledge base.\n\nNote that by default, 5 results are returned, which can be configured by clicking Controls at the top of the component.\n", + "display_name": "", + "documentation": "", + "template": {} + }, + "type": "note" + }, + "dragging": false, + "height": 384, + "id": "note-YyBfz", + "measured": { + "height": 384, + "width": 371 + }, + "position": { + "x": -215.63964109627526, + "y": -365.1224988685513 + }, + "resizing": false, + "selected": false, + "type": "noteNode", + "width": 371 + }, + { + "data": { + "id": "TextInput-Z3rM3", + "node": { + "base_classes": [ + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Get user text inputs.", + "display_name": "Text Input", + "documentation": "https://docs.langflow.org/components-io#text-input", + "edited": false, + "field_order": [ + "input_value" + ], + "frozen": false, + "icon": "type", + "legacy": false, + "lf_version": "1.5.0.post1", + "metadata": { + "code_hash": "efdcba3771af", + "module": "langflow.components.input_output.text.TextInputComponent" + }, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Output Text", + "group_outputs": false, + "method": "text_response", + "name": "text", + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from langflow.base.io.text import TextComponent\nfrom langflow.io import MultilineInput, Output\nfrom langflow.schema.message import Message\n\n\nclass TextInputComponent(TextComponent):\n display_name = \"Text Input\"\n description = \"Get user text inputs.\"\n documentation: str = \"https://docs.langflow.org/components-io#text-input\"\n icon = \"type\"\n name = \"TextInput\"\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Text to be passed as input.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\n ]\n\n def text_response(self) -> Message:\n return Message(\n text=self.input_value,\n )\n" + }, + "input_value": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "Text", + "dynamic": false, + "info": "Text to be passed as input.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "input_value", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "IBM Acquires DataStax" + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "TextInput" + }, + "dragging": false, + "id": "TextInput-Z3rM3", + "measured": { + "height": 204, + "width": 320 + }, + "position": { + "x": 234.35280633316273, + "y": -280.9003423728733 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "ChatOutput-tixOe", + "node": { + "base_classes": [ + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Display a chat message in the Playground.", + "display_name": "Chat Output", + "documentation": "https://docs.langflow.org/components-io#chat-output", + "edited": false, + "field_order": [ + "input_value", + "should_store_message", + "sender", + "sender_name", + "session_id", + "data_template", + "background_color", + "chat_icon", + "text_color", + "clean_data" + ], + "frozen": false, + "icon": "MessagesSquare", + "legacy": false, + "lf_version": "1.5.0.post1", + "metadata": { + "code_hash": "6f74e04e39d5", + "module": "langflow.components.input_output.chat_output.ChatOutput" + }, + "minimized": true, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Output Message", + "group_outputs": false, + "method": "message_response", + "name": "message", + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "background_color": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Background Color", + "dynamic": false, + "info": "The background color of the icon.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "background_color", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "chat_icon": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Icon", + "dynamic": false, + "info": "The icon of the message.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "chat_icon", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "clean_data": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Basic Clean Data", + "dynamic": false, + "info": "Whether to clean the data", + "list": false, + "list_add_label": "Add More", + "name": "clean_data", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom langflow.base.io.chat import ChatComponent\nfrom langflow.helpers.data import safe_convert\nfrom langflow.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.schema.properties import Source\nfrom langflow.template.field.base import Output\nfrom langflow.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n MessageTextInput(\n name=\"background_color\",\n display_name=\"Background Color\",\n info=\"The background color of the icon.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"chat_icon\",\n display_name=\"Icon\",\n info=\"The icon of the message.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"text_color\",\n display_name=\"Text Color\",\n info=\"The text color of the name\",\n advanced=True,\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n info=\"Whether to clean the data\",\n advanced=True,\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, icon, display_name, source_id = self.get_properties_from_source_component()\n background_color = self.background_color\n text_color = self.text_color\n if self.chat_icon:\n icon = self.chat_icon\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n message.properties.icon = icon\n message.properties.background_color = background_color\n message.properties.text_color = text_color\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n return \"\\n\".join([safe_convert(item, clean_data=self.clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" + }, + "data_template": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Data Template", + "dynamic": false, + "info": "Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "data_template", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "{text}" + }, + "input_value": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Inputs", + "dynamic": false, + "info": "Message to be passed as output.", + "input_types": [ + "Data", + "DataFrame", + "Message" + ], + "list": false, + "list_add_label": "Add More", + "name": "input_value", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "sender": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Sender Type", + "dynamic": false, + "info": "Type of sender.", + "name": "sender", + "options": [ + "Machine", + "User" + ], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Machine" + }, + "sender_name": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Sender Name", + "dynamic": false, + "info": "Name of the sender.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "sender_name", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "AI" + }, + "session_id": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Session ID", + "dynamic": false, + "info": "The session ID of the chat. If empty, the current session ID parameter will be used.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "session_id", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "should_store_message": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Store Messages", + "dynamic": false, + "info": "Store the message in the history.", + "list": false, + "list_add_label": "Add More", + "name": "should_store_message", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "text_color": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Text Color", + "dynamic": false, + "info": "The text color of the name", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "text_color", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + } + }, + "tool_mode": false + }, + "showNode": false, + "type": "ChatOutput" + }, + "dragging": false, + "id": "ChatOutput-tixOe", + "measured": { + "height": 48, + "width": 192 + }, + "position": { + "x": 1043.5413322661916, + "y": -202.42300688367868 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "KBRetrieval-tGoBR", + "node": { + "base_classes": [ + "DataFrame" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Search and retrieve data from knowledge.", + "display_name": "Knowledge Retrieval", + "documentation": "", + "edited": false, + "field_order": [ + "knowledge_base", + "api_key", + "search_query", + "top_k", + "include_metadata" + ], + "frozen": false, + "icon": "database", + "last_updated": "2025-08-13T19:46:57.894Z", + "legacy": false, + "metadata": { + "code_hash": "f82365a0977f", + "module": "langflow.components.data.kb_retrieval.KBRetrievalComponent" + }, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Results", + "group_outputs": false, + "method": "get_chroma_kb_data", + "name": "chroma_kb_data", + "selected": "DataFrame", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "api_key": { + "_input_type": "SecretStrInput", + "advanced": true, + "display_name": "Embedding Provider API Key", + "dynamic": false, + "info": "API key for the embedding provider to generate embeddings.", + "input_types": [], + "load_from_db": false, + "name": "api_key", + "password": true, + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "type": "str", + "value": "" + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "import json\nfrom pathlib import Path\nfrom typing import Any\n\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom loguru import logger\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.services.auth.utils import decrypt_api_key\nfrom langflow.services.deps import get_settings_service\n\nsettings = get_settings_service().settings\nknowledge_directory = settings.knowledge_bases_dir\nif not knowledge_directory:\n msg = \"Knowledge bases directory is not set in the settings.\"\n raise ValueError(msg)\nKNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n\n\nclass KBRetrievalComponent(Component):\n display_name = \"Knowledge Retrieval\"\n description = \"Search and retrieve data from knowledge.\"\n icon = \"database\"\n name = \"KBRetrieval\"\n\n inputs = [\n DropdownInput(\n name=\"knowledge_base\",\n display_name=\"Knowledge\",\n info=\"Select the knowledge to load data from.\",\n required=True,\n options=[\n str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(\".\") and d.is_dir()\n ]\n if KNOWLEDGE_BASES_ROOT_PATH.exists()\n else [],\n refresh_button=True,\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Embedding Provider API Key\",\n info=\"API key for the embedding provider to generate embeddings.\",\n advanced=True,\n required=False,\n ),\n MessageTextInput(\n name=\"search_query\",\n display_name=\"Search Query\",\n info=\"Optional search query to filter knowledge base data.\",\n ),\n IntInput(\n name=\"top_k\",\n display_name=\"Top K Results\",\n info=\"Number of top results to return from the knowledge base.\",\n value=5,\n advanced=True,\n required=False,\n ),\n BoolInput(\n name=\"include_metadata\",\n display_name=\"Include Metadata\",\n info=\"Whether to include all metadata and embeddings in the output. If false, only content is returned.\",\n value=True,\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"chroma_kb_data\",\n display_name=\"Results\",\n method=\"get_chroma_kb_data\",\n info=\"Returns the data from the selected knowledge base.\",\n ),\n ]\n\n def _get_knowledge_bases(self) -> list[str]:\n \"\"\"Retrieve a list of available knowledge bases.\n\n Returns:\n A list of knowledge base names.\n \"\"\"\n if not KNOWLEDGE_BASES_ROOT_PATH.exists():\n return []\n\n return [str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(\".\") and d.is_dir()]\n\n def update_build_config(self, build_config, field_value, field_name=None): # noqa: ARG002\n if field_name == \"knowledge_base\":\n # Update the knowledge base options dynamically\n build_config[\"knowledge_base\"][\"options\"] = self._get_knowledge_bases()\n\n # If the selected knowledge base is not available, reset it\n if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n build_config[\"knowledge_base\"][\"value\"] = None\n\n return build_config\n\n def _get_kb_metadata(self, kb_path: Path) -> dict:\n \"\"\"Load and process knowledge base metadata.\"\"\"\n metadata: dict[str, Any] = {}\n metadata_file = kb_path / \"embedding_metadata.json\"\n if not metadata_file.exists():\n logger.warning(f\"Embedding metadata file not found at {metadata_file}\")\n return metadata\n\n try:\n with metadata_file.open(\"r\", encoding=\"utf-8\") as f:\n metadata = json.load(f)\n except json.JSONDecodeError:\n logger.error(f\"Error decoding JSON from {metadata_file}\")\n return {}\n\n # Decrypt API key if it exists\n if \"api_key\" in metadata and metadata.get(\"api_key\"):\n settings_service = get_settings_service()\n try:\n decrypted_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n metadata[\"api_key\"] = decrypted_key\n except (InvalidToken, TypeError, ValueError) as e:\n logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n metadata[\"api_key\"] = None\n return metadata\n\n def _build_embeddings(self, metadata: dict):\n \"\"\"Build embedding model from metadata.\"\"\"\n provider = metadata.get(\"embedding_provider\")\n model = metadata.get(\"embedding_model\")\n api_key = metadata.get(\"api_key\")\n chunk_size = metadata.get(\"chunk_size\")\n\n # If user provided a key in the input, it overrides the stored one.\n if self.api_key and self.api_key.get_secret_value():\n api_key = self.api_key.get_secret_value()\n\n # Handle various providers\n if provider == \"OpenAI\":\n from langchain_openai import OpenAIEmbeddings\n\n if not api_key:\n msg = \"OpenAI API key is required. Provide it in the component's advanced settings.\"\n raise ValueError(msg)\n return OpenAIEmbeddings(\n model=model,\n api_key=api_key,\n chunk_size=chunk_size,\n )\n if provider == \"HuggingFace\":\n from langchain_huggingface import HuggingFaceEmbeddings\n\n return HuggingFaceEmbeddings(\n model=model,\n )\n if provider == \"Cohere\":\n from langchain_cohere import CohereEmbeddings\n\n if not api_key:\n msg = \"Cohere API key is required when using Cohere provider\"\n raise ValueError(msg)\n return CohereEmbeddings(\n model=model,\n cohere_api_key=api_key,\n )\n if provider == \"Custom\":\n # For custom embedding models, we would need additional configuration\n msg = \"Custom embedding models not yet supported\"\n raise NotImplementedError(msg)\n # Add other providers here if they become supported in ingest\n msg = f\"Embedding provider '{provider}' is not supported for retrieval.\"\n raise NotImplementedError(msg)\n\n def get_chroma_kb_data(self) -> DataFrame:\n \"\"\"Retrieve data from the selected knowledge base by reading the Chroma collection.\n\n Returns:\n A DataFrame containing the data rows from the knowledge base.\n \"\"\"\n kb_path = KNOWLEDGE_BASES_ROOT_PATH / self.knowledge_base\n\n metadata = self._get_kb_metadata(kb_path)\n if not metadata:\n msg = f\"Metadata not found for knowledge base: {self.knowledge_base}. Ensure it has been indexed.\"\n raise ValueError(msg)\n\n # Build the embedder for the knowledge base\n embedding_function = self._build_embeddings(metadata)\n\n # Load vector store\n chroma = Chroma(\n persist_directory=str(kb_path),\n embedding_function=embedding_function,\n collection_name=self.knowledge_base,\n )\n\n # If a search query is provided, perform a similarity search\n if self.search_query:\n # Use the search query to perform a similarity search\n logger.info(f\"Performing similarity search with query: {self.search_query}\")\n results = chroma.similarity_search_with_score(\n query=self.search_query or \"\",\n k=self.top_k,\n )\n else:\n results = chroma.similarity_search(\n query=self.search_query or \"\",\n k=self.top_k,\n )\n\n # For each result, make it a tuple to match the expected output format\n results = [(doc, 0) for doc in results] # Assign a dummy score of 0\n\n # If metadata is enabled, get embeddings for the results\n id_to_embedding = {}\n if self.include_metadata and results:\n doc_ids = [doc[0].metadata.get(\"_id\") for doc in results if doc[0].metadata.get(\"_id\")]\n\n # Only proceed if we have valid document IDs\n if doc_ids:\n # Access underlying client to get embeddings\n collection = chroma._client.get_collection(name=self.knowledge_base)\n embeddings_result = collection.get(where={\"_id\": {\"$in\": doc_ids}}, include=[\"embeddings\", \"metadatas\"])\n\n # Create a mapping from document ID to embedding\n for i, metadata in enumerate(embeddings_result.get(\"metadatas\", [])):\n if metadata and \"_id\" in metadata:\n id_to_embedding[metadata[\"_id\"]] = embeddings_result[\"embeddings\"][i]\n\n # Build output data based on include_metadata setting\n data_list = []\n for doc in results:\n if self.include_metadata:\n # Include all metadata, embeddings, and content\n kwargs = {\n \"content\": doc[0].page_content,\n **doc[0].metadata,\n }\n if self.search_query:\n kwargs[\"_score\"] = -1 * doc[1]\n kwargs[\"_embeddings\"] = id_to_embedding.get(doc[0].metadata.get(\"_id\"))\n else:\n # Only include content\n kwargs = {\n \"content\": doc[0].page_content,\n }\n\n data_list.append(Data(**kwargs))\n\n # Return the DataFrame containing the data\n return DataFrame(data=data_list)\n" + }, + "include_metadata": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Include Metadata", + "dynamic": false, + "info": "Whether to include all metadata and embeddings in the output. If false, only content is returned.", + "list": false, + "list_add_label": "Add More", + "name": "include_metadata", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "knowledge_base": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Knowledge", + "dynamic": false, + "info": "Select the knowledge to load data from.", + "name": "knowledge_base", + "options": [], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "refresh_button": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": null + }, + "search_query": { + "_input_type": "MessageTextInput", + "advanced": false, + "display_name": "Search Query", + "dynamic": false, + "info": "Optional search query to filter knowledge base data.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "search_query", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "top_k": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Top K Results", + "dynamic": false, + "info": "Number of top results to return from the knowledge base.", + "list": false, + "list_add_label": "Add More", + "name": "top_k", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 5 + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "KBRetrieval" + }, + "dragging": false, + "id": "KBRetrieval-tGoBR", + "measured": { + "height": 286, + "width": 320 + }, + "position": { + "x": 640.6283193600648, + "y": -313.9694258557284 + }, + "selected": false, + "type": "genericNode" + } + ], + "viewport": { + "x": 285.0464459586908, + "y": 588.7377652547386, + "zoom": 0.9833370380356916 + } + }, + "description": "An example of performing a vector search against data in a Knowledge Base to retrieve relevant documents.", + "endpoint_name": null, + "id": "670745f6-08b1-480e-bdaf-64ba74967cba", + "is_component": false, + "last_tested_version": "1.5.0.post1", + "name": "Knowledge Retrieval", + "tags": [] +} \ No newline at end of file diff --git a/src/backend/base/langflow/services/settings/base.py b/src/backend/base/langflow/services/settings/base.py index cf7668fed..7d3749b50 100644 --- a/src/backend/base/langflow/services/settings/base.py +++ b/src/backend/base/langflow/services/settings/base.py @@ -73,6 +73,9 @@ class Settings(BaseSettings): """Define if langflow database should be saved in LANGFLOW_CONFIG_DIR or in the langflow directory (i.e. in the package directory).""" + knowledge_bases_dir: str | None = "~/.langflow/knowledge_bases" + """The directory to store knowledge bases.""" + dev: bool = False """If True, Langflow will run in development mode.""" database_url: str | None = None diff --git a/src/backend/tests/unit/base/data/__init__.py b/src/backend/tests/unit/base/data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/backend/tests/unit/base/data/test_kb_utils.py b/src/backend/tests/unit/base/data/test_kb_utils.py new file mode 100644 index 000000000..0d6b3441e --- /dev/null +++ b/src/backend/tests/unit/base/data/test_kb_utils.py @@ -0,0 +1,458 @@ +import pytest +from langflow.base.data.kb_utils import compute_bm25, compute_tfidf + + +class TestKBUtils: + """Test suite for knowledge base utility functions.""" + + # Test data for TF-IDF and BM25 tests + @pytest.fixture + def sample_documents(self): + """Sample documents for testing.""" + return ["the cat sat on the mat", "the dog ran in the park", "cats and dogs are pets", "birds fly in the sky"] + + @pytest.fixture + def query_terms(self): + """Sample query terms for testing.""" + return ["cat", "dog"] + + @pytest.fixture + def empty_documents(self): + """Empty documents for edge case testing.""" + return ["", "", ""] + + @pytest.fixture + def single_document(self): + """Single document for testing.""" + return ["hello world"] + + def test_compute_tfidf_basic(self, sample_documents, query_terms): + """Test basic TF-IDF computation.""" + scores = compute_tfidf(sample_documents, query_terms) + + # Should return a score for each document + assert len(scores) == len(sample_documents) + + # All scores should be floats + assert all(isinstance(score, float) for score in scores) + + # First document contains "cat", should have non-zero score + assert scores[0] > 0.0 + + # Second document contains "dog", should have non-zero score + assert scores[1] > 0.0 + + # Third document contains both "cats" and "dogs", but case-insensitive matching should work + # Note: "cats" != "cat" exactly, so this tests the term matching behavior + assert scores[2] >= 0.0 + + # Fourth document contains neither term, should have zero score + assert scores[3] == 0.0 + + def test_compute_tfidf_case_insensitive(self): + """Test that TF-IDF computation is case insensitive.""" + documents = ["The CAT sat", "the dog RAN", "CATS and DOGS"] + query_terms = ["cat", "DOG"] + + scores = compute_tfidf(documents, query_terms) + + # First document should match "cat" (case insensitive) + assert scores[0] > 0.0 + + # Second document should match "dog" (case insensitive) + assert scores[1] > 0.0 + + def test_compute_tfidf_empty_documents(self, empty_documents, query_terms): + """Test TF-IDF with empty documents.""" + scores = compute_tfidf(empty_documents, query_terms) + + # Should return scores for all documents + assert len(scores) == len(empty_documents) + + # All scores should be zero since documents are empty + assert all(score == 0.0 for score in scores) + + def test_compute_tfidf_empty_query_terms(self, sample_documents): + """Test TF-IDF with empty query terms.""" + scores = compute_tfidf(sample_documents, []) + + # Should return scores for all documents + assert len(scores) == len(sample_documents) + + # All scores should be zero since no query terms + assert all(score == 0.0 for score in scores) + + def test_compute_tfidf_single_document(self, single_document): + """Test TF-IDF with single document.""" + query_terms = ["hello", "world"] + scores = compute_tfidf(single_document, query_terms) + + assert len(scores) == 1 + # With only one document, IDF = log(1/1) = 0, so TF-IDF score is always 0 + # This is correct mathematical behavior - TF-IDF is designed to discriminate between documents + assert scores[0] == 0.0 + + def test_compute_tfidf_two_documents_positive_scores(self): + """Test TF-IDF with two documents to ensure positive scores are possible.""" + documents = ["hello world", "goodbye earth"] + query_terms = ["hello", "world"] + scores = compute_tfidf(documents, query_terms) + + assert len(scores) == 2 + # First document contains both terms, should have positive score + assert scores[0] > 0.0 + # Second document contains neither term, should have zero score + assert scores[1] == 0.0 + + def test_compute_tfidf_no_documents(self): + """Test TF-IDF with no documents.""" + scores = compute_tfidf([], ["cat", "dog"]) + + assert scores == [] + + def test_compute_tfidf_term_frequency_calculation(self): + """Test TF-IDF term frequency calculation.""" + # Documents with different term frequencies for the same term + documents = ["rare word text", "rare rare word", "other content"] + query_terms = ["rare"] + + scores = compute_tfidf(documents, query_terms) + + # "rare" appears in documents 0 and 1, but with different frequencies + # Document 1 has higher TF (2/3 vs 1/3), so should score higher + assert scores[0] > 0.0 # Contains "rare" once + assert scores[1] > scores[0] # Contains "rare" twice, should score higher + assert scores[2] == 0.0 # Doesn't contain "rare" + + def test_compute_tfidf_idf_calculation(self): + """Test TF-IDF inverse document frequency calculation.""" + # "rare" appears in only one document, "common" appears in both + documents = ["rare term", "common term", "common word"] + query_terms = ["rare", "common"] + + scores = compute_tfidf(documents, query_terms) + + # First document should have higher score due to rare term having higher IDF + assert scores[0] > scores[1] # rare term gets higher IDF + assert scores[0] > scores[2] + + def test_compute_bm25_basic(self, sample_documents, query_terms): + """Test basic BM25 computation.""" + scores = compute_bm25(sample_documents, query_terms) + + # Should return a score for each document + assert len(scores) == len(sample_documents) + + # All scores should be floats + assert all(isinstance(score, float) for score in scores) + + # First document contains "cat", should have non-zero score + assert scores[0] > 0.0 + + # Second document contains "dog", should have non-zero score + assert scores[1] > 0.0 + + # Fourth document contains neither term, should have zero score + assert scores[3] == 0.0 + + def test_compute_bm25_parameters(self, sample_documents, query_terms): + """Test BM25 with different k1 and b parameters.""" + # Test with default parameters + scores_default = compute_bm25(sample_documents, query_terms) + + # Test with different k1 + scores_k1 = compute_bm25(sample_documents, query_terms, k1=2.0) + + # Test with different b + scores_b = compute_bm25(sample_documents, query_terms, b=0.5) + + # Test with both different + scores_both = compute_bm25(sample_documents, query_terms, k1=2.0, b=0.5) + + # All should return valid scores + assert len(scores_default) == len(sample_documents) + assert len(scores_k1) == len(sample_documents) + assert len(scores_b) == len(sample_documents) + assert len(scores_both) == len(sample_documents) + + # Scores should be different with different parameters + assert scores_default != scores_k1 + assert scores_default != scores_b + + def test_compute_bm25_case_insensitive(self): + """Test that BM25 computation is case insensitive.""" + documents = ["The CAT sat", "the dog RAN", "CATS and DOGS"] + query_terms = ["cat", "DOG"] + + scores = compute_bm25(documents, query_terms) + + # First document should match "cat" (case insensitive) + assert scores[0] > 0.0 + + # Second document should match "dog" (case insensitive) + assert scores[1] > 0.0 + + def test_compute_bm25_empty_documents(self, empty_documents, query_terms): + """Test BM25 with empty documents.""" + scores = compute_bm25(empty_documents, query_terms) + + # Should return scores for all documents + assert len(scores) == len(empty_documents) + + # All scores should be zero since documents are empty + assert all(score == 0.0 for score in scores) + + def test_compute_bm25_empty_query_terms(self, sample_documents): + """Test BM25 with empty query terms.""" + scores = compute_bm25(sample_documents, []) + + # Should return scores for all documents + assert len(scores) == len(sample_documents) + + # All scores should be zero since no query terms + assert all(score == 0.0 for score in scores) + + def test_compute_bm25_single_document(self, single_document): + """Test BM25 with single document.""" + query_terms = ["hello", "world"] + scores = compute_bm25(single_document, query_terms) + + assert len(scores) == 1 + # With only one document, IDF = log(1/1) = 0, so BM25 score is always 0 + # This is correct mathematical behavior - both TF-IDF and BM25 are designed to discriminate between documents + assert scores[0] == 0.0 + + def test_compute_bm25_two_documents_positive_scores(self): + """Test BM25 with two documents to ensure positive scores are possible.""" + documents = ["hello world", "goodbye earth"] + query_terms = ["hello", "world"] + scores = compute_bm25(documents, query_terms) + + assert len(scores) == 2 + # First document contains both terms, should have positive score + assert scores[0] > 0.0 + # Second document contains neither term, should have zero score + assert scores[1] == 0.0 + + def test_compute_bm25_no_documents(self): + """Test BM25 with no documents.""" + scores = compute_bm25([], ["cat", "dog"]) + + assert scores == [] + + def test_compute_bm25_document_length_normalization(self): + """Test BM25 document length normalization.""" + # Test with documents where some terms appear in subset of documents + documents = [ + "cat unique1", # Short document with unique term + "cat dog bird mouse elephant tiger lion bear wolf unique2", # Long document with unique term + "other content", # Document without query terms + ] + query_terms = ["unique1", "unique2"] + + scores = compute_bm25(documents, query_terms) + + # Documents with unique terms should have positive scores + assert scores[0] > 0.0 # Contains "unique1" + assert scores[1] > 0.0 # Contains "unique2" + assert scores[2] == 0.0 # Contains neither term + + # Document length normalization affects scores + assert len(scores) == 3 + + def test_compute_bm25_term_frequency_saturation(self): + """Test BM25 term frequency saturation behavior.""" + # Test with documents where term frequencies can be meaningfully compared + documents = [ + "rare word text", # TF = 1 for "rare" + "rare rare word", # TF = 2 for "rare" + "rare rare rare rare rare word", # TF = 5 for "rare" + "other content", # No "rare" term + ] + query_terms = ["rare"] + + scores = compute_bm25(documents, query_terms) + + # Documents with the term should have positive scores + assert scores[0] > 0.0 # TF=1 + assert scores[1] > 0.0 # TF=2 + assert scores[2] > 0.0 # TF=5 + assert scores[3] == 0.0 # TF=0 + + # Scores should increase with term frequency, but with diminishing returns + assert scores[1] > scores[0] # TF=2 > TF=1 + assert scores[2] > scores[1] # TF=5 > TF=2 + + # Check that increases demonstrate saturation effect + increase_1_to_2 = scores[1] - scores[0] + increase_2_to_5 = scores[2] - scores[1] + assert increase_1_to_2 > 0 + assert increase_2_to_5 > 0 + + def test_compute_bm25_idf_calculation(self): + """Test BM25 inverse document frequency calculation.""" + # "rare" appears in only one document, "common" appears in multiple + documents = ["rare term", "common term", "common word"] + query_terms = ["rare", "common"] + + scores = compute_bm25(documents, query_terms) + + # First document should have higher score due to rare term having higher IDF + assert scores[0] > scores[1] # rare term gets higher IDF + assert scores[0] > scores[2] + + def test_compute_bm25_zero_parameters(self, sample_documents, query_terms): + """Test BM25 with edge case parameters.""" + # Test with k1=0 (no term frequency scaling) + scores_k1_zero = compute_bm25(sample_documents, query_terms, k1=0.0) + assert len(scores_k1_zero) == len(sample_documents) + + # Test with b=0 (no document length normalization) + scores_b_zero = compute_bm25(sample_documents, query_terms, b=0.0) + assert len(scores_b_zero) == len(sample_documents) + + # Test with b=1 (full document length normalization) + scores_b_one = compute_bm25(sample_documents, query_terms, b=1.0) + assert len(scores_b_one) == len(sample_documents) + + def test_tfidf_vs_bm25_comparison(self, sample_documents, query_terms): + """Test that TF-IDF and BM25 produce different but related scores.""" + tfidf_scores = compute_tfidf(sample_documents, query_terms) + bm25_scores = compute_bm25(sample_documents, query_terms) + + # Both should return same number of scores + assert len(tfidf_scores) == len(bm25_scores) == len(sample_documents) + + # For documents that match, both should be positive + for i in range(len(sample_documents)): + if tfidf_scores[i] > 0: + assert bm25_scores[i] > 0, f"Document {i} has TF-IDF score but zero BM25 score" + if bm25_scores[i] > 0: + assert tfidf_scores[i] > 0, f"Document {i} has BM25 score but zero TF-IDF score" + + def test_compute_tfidf_special_characters(self): + """Test TF-IDF with documents containing special characters.""" + documents = ["hello, world!", "world... hello?", "no match here"] + query_terms = ["hello", "world"] + + scores = compute_tfidf(documents, query_terms) + + # Should handle punctuation and still match terms + assert len(scores) == 3 + # Note: Current implementation does simple split(), so punctuation stays attached + # This tests the current behavior - may need updating if tokenization improves + + def test_compute_bm25_special_characters(self): + """Test BM25 with documents containing special characters.""" + documents = ["hello, world!", "world... hello?", "no match here"] + query_terms = ["hello", "world"] + + scores = compute_bm25(documents, query_terms) + + # Should handle punctuation and still match terms + assert len(scores) == 3 + # Same tokenization behavior as TF-IDF + + def test_compute_tfidf_whitespace_handling(self): + """Test TF-IDF with various whitespace scenarios.""" + documents = [ + " hello world ", # Extra spaces + "\thello\tworld\t", # Tabs + "hello\nworld", # Newlines + "", # Empty string + ] + query_terms = ["hello", "world"] + + scores = compute_tfidf(documents, query_terms) + + assert len(scores) == 4 + # First three should have positive scores (they contain the terms) + assert scores[0] > 0.0 + assert scores[1] > 0.0 + assert scores[2] > 0.0 + # Last should be zero (empty document) + assert scores[3] == 0.0 + + def test_compute_bm25_whitespace_handling(self): + """Test BM25 with various whitespace scenarios.""" + documents = [ + " hello world ", # Extra spaces + "\thello\tworld\t", # Tabs + "hello\nworld", # Newlines + "", # Empty string + ] + query_terms = ["hello", "world"] + + scores = compute_bm25(documents, query_terms) + + assert len(scores) == 4 + # First three should have positive scores (they contain the terms) + assert scores[0] > 0.0 + assert scores[1] > 0.0 + assert scores[2] > 0.0 + # Last should be zero (empty document) + assert scores[3] == 0.0 + + def test_compute_tfidf_mathematical_properties(self): + """Test mathematical properties of TF-IDF scores.""" + documents = ["cat dog", "cat", "dog"] + query_terms = ["cat"] + + scores = compute_tfidf(documents, query_terms) + + # All scores should be non-negative + assert all(score >= 0.0 for score in scores) + + # Documents containing the term should have positive scores + assert scores[0] > 0.0 # contains "cat" + assert scores[1] > 0.0 # contains "cat" + assert scores[2] == 0.0 # doesn't contain "cat" + + def test_compute_bm25_mathematical_properties(self): + """Test mathematical properties of BM25 scores.""" + documents = ["cat dog", "cat", "dog"] + query_terms = ["cat"] + + scores = compute_bm25(documents, query_terms) + + # All scores should be non-negative + assert all(score >= 0.0 for score in scores) + + # Documents containing the term should have positive scores + assert scores[0] > 0.0 # contains "cat" + assert scores[1] > 0.0 # contains "cat" + assert scores[2] == 0.0 # doesn't contain "cat" + + def test_compute_tfidf_duplicate_terms_in_query(self): + """Test TF-IDF with duplicate terms in query.""" + documents = ["cat dog bird", "cat cat dog", "bird bird bird"] + query_terms = ["cat", "cat", "dog"] # "cat" appears twice + + scores = compute_tfidf(documents, query_terms) + + # Should handle duplicate query terms gracefully + assert len(scores) == 3 + assert all(isinstance(score, float) for score in scores) + + # First two documents should have positive scores + assert scores[0] > 0.0 + assert scores[1] > 0.0 + # Third document only contains "bird", so should have zero score + assert scores[2] == 0.0 + + def test_compute_bm25_duplicate_terms_in_query(self): + """Test BM25 with duplicate terms in query.""" + documents = ["cat dog bird", "cat cat dog", "bird bird bird"] + query_terms = ["cat", "cat", "dog"] # "cat" appears twice + + scores = compute_bm25(documents, query_terms) + + # Should handle duplicate query terms gracefully + assert len(scores) == 3 + assert all(isinstance(score, float) for score in scores) + + # First two documents should have positive scores + assert scores[0] > 0.0 + assert scores[1] > 0.0 + # Third document only contains "bird", so should have zero score + assert scores[2] == 0.0 diff --git a/src/backend/tests/unit/components/data/test_kb_ingest.py b/src/backend/tests/unit/components/data/test_kb_ingest.py new file mode 100644 index 000000000..aa2ba2850 --- /dev/null +++ b/src/backend/tests/unit/components/data/test_kb_ingest.py @@ -0,0 +1,392 @@ +import json +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pandas as pd +import pytest +from langflow.components.data.kb_ingest import KBIngestionComponent +from langflow.schema.data import Data + +from tests.base import ComponentTestBaseWithoutClient + + +class TestKBIngestionComponent(ComponentTestBaseWithoutClient): + @pytest.fixture + def component_class(self): + """Return the component class to test.""" + return KBIngestionComponent + + @pytest.fixture(autouse=True) + def mock_knowledge_base_path(self, tmp_path): + """Mock the knowledge base root path directly.""" + with patch("langflow.components.data.kb_ingest.KNOWLEDGE_BASES_ROOT_PATH", tmp_path): + yield + + @pytest.fixture + def default_kwargs(self, tmp_path): + """Return default kwargs for component instantiation.""" + # Create a sample DataFrame + data_df = pd.DataFrame( + {"text": ["Sample text 1", "Sample text 2"], "title": ["Title 1", "Title 2"], "category": ["cat1", "cat2"]} + ) + + # Create column configuration + column_config = [ + {"column_name": "text", "vectorize": True, "identifier": False}, + {"column_name": "title", "vectorize": False, "identifier": False}, + {"column_name": "category", "vectorize": False, "identifier": True}, + ] + + # Create knowledge base directory + kb_name = "test_kb" + kb_path = tmp_path / kb_name + kb_path.mkdir(exist_ok=True) + + # Create embedding metadata file + metadata = { + "embedding_provider": "HuggingFace", + "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", + "api_key": None, + "api_key_used": False, + "chunk_size": 1000, + "created_at": "2024-01-01T00:00:00Z", + } + (kb_path / "embedding_metadata.json").write_text(json.dumps(metadata)) + + return { + "knowledge_base": kb_name, + "input_df": data_df, + "column_config": column_config, + "chunk_size": 1000, + "kb_root_path": str(tmp_path), + "api_key": None, + "allow_duplicates": False, + "silent_errors": False, + } + + @pytest.fixture + def file_names_mapping(self): + """Return file names mapping for version testing.""" + # This is a new component, so it doesn't exist in older versions + return [] + + def test_validate_column_config_valid(self, component_class, default_kwargs): + """Test column configuration validation with valid config.""" + component = component_class(**default_kwargs) + data_df = default_kwargs["input_df"] + + config_list = component._validate_column_config(data_df) + + assert len(config_list) == 3 + assert config_list[0]["column_name"] == "text" + assert config_list[0]["vectorize"] is True + + def test_validate_column_config_invalid_column(self, component_class, default_kwargs): + """Test column configuration validation with invalid column name.""" + # Modify column config to include non-existent column + invalid_config = [{"column_name": "nonexistent", "vectorize": True, "identifier": False}] + default_kwargs["column_config"] = invalid_config + + component = component_class(**default_kwargs) + data_df = default_kwargs["input_df"] + + with pytest.raises(ValueError, match="Column 'nonexistent' not found in DataFrame"): + component._validate_column_config(data_df) + + def test_validate_column_config_silent_errors(self, component_class, default_kwargs): + """Test column configuration validation with silent errors enabled.""" + # Modify column config to include non-existent column + invalid_config = [{"column_name": "nonexistent", "vectorize": True, "identifier": False}] + default_kwargs["column_config"] = invalid_config + default_kwargs["silent_errors"] = True + + component = component_class(**default_kwargs) + data_df = default_kwargs["input_df"] + + # Should not raise exception with silent_errors=True + config_list = component._validate_column_config(data_df) + assert isinstance(config_list, list) + + def test_get_embedding_provider(self, component_class, default_kwargs): + """Test embedding provider detection.""" + component = component_class(**default_kwargs) + + # Test OpenAI provider + assert component._get_embedding_provider("text-embedding-ada-002") == "OpenAI" + + # Test HuggingFace provider + assert component._get_embedding_provider("sentence-transformers/all-MiniLM-L6-v2") == "HuggingFace" + + # Test Cohere provider + assert component._get_embedding_provider("embed-english-v3.0") == "Cohere" + + # Test custom provider + assert component._get_embedding_provider("custom-model") == "Custom" + + @patch("langchain_huggingface.HuggingFaceEmbeddings") + def test_build_embeddings_huggingface(self, mock_hf_embeddings, component_class, default_kwargs): + """Test building HuggingFace embeddings.""" + component = component_class(**default_kwargs) + + mock_embeddings = MagicMock() + mock_hf_embeddings.return_value = mock_embeddings + + result = component._build_embeddings("sentence-transformers/all-MiniLM-L6-v2", None) + + mock_hf_embeddings.assert_called_once_with(model="sentence-transformers/all-MiniLM-L6-v2") + assert result == mock_embeddings + + @patch("langchain_openai.OpenAIEmbeddings") + def test_build_embeddings_openai(self, mock_openai_embeddings, component_class, default_kwargs): + """Test building OpenAI embeddings.""" + component = component_class(**default_kwargs) + + mock_embeddings = MagicMock() + mock_openai_embeddings.return_value = mock_embeddings + + result = component._build_embeddings("text-embedding-ada-002", "test-api-key") + + mock_openai_embeddings.assert_called_once_with( + model="text-embedding-ada-002", api_key="test-api-key", chunk_size=1000 + ) + assert result == mock_embeddings + + def test_build_embeddings_openai_no_key(self, component_class, default_kwargs): + """Test building OpenAI embeddings without API key raises error.""" + component = component_class(**default_kwargs) + + with pytest.raises(ValueError, match="OpenAI API key is required"): + component._build_embeddings("text-embedding-ada-002", None) + + @patch("langchain_cohere.CohereEmbeddings") + def test_build_embeddings_cohere(self, mock_cohere_embeddings, component_class, default_kwargs): + """Test building Cohere embeddings.""" + component = component_class(**default_kwargs) + + mock_embeddings = MagicMock() + mock_cohere_embeddings.return_value = mock_embeddings + + result = component._build_embeddings("embed-english-v3.0", "test-api-key") + + mock_cohere_embeddings.assert_called_once_with(model="embed-english-v3.0", cohere_api_key="test-api-key") + assert result == mock_embeddings + + def test_build_embeddings_cohere_no_key(self, component_class, default_kwargs): + """Test building Cohere embeddings without API key raises error.""" + component = component_class(**default_kwargs) + + with pytest.raises(ValueError, match="Cohere API key is required"): + component._build_embeddings("embed-english-v3.0", None) + + def test_build_embeddings_custom_not_supported(self, component_class, default_kwargs): + """Test building custom embeddings raises NotImplementedError.""" + component = component_class(**default_kwargs) + + with pytest.raises(NotImplementedError, match="Custom embedding models not yet supported"): + component._build_embeddings("custom-model", "test-key") + + @patch("langflow.components.data.kb_ingest.get_settings_service") + @patch("langflow.components.data.kb_ingest.encrypt_api_key") + def test_build_embedding_metadata(self, mock_encrypt, mock_get_settings, component_class, default_kwargs): + """Test building embedding metadata.""" + component = component_class(**default_kwargs) + + mock_settings = MagicMock() + mock_get_settings.return_value = mock_settings + mock_encrypt.return_value = "encrypted_key" + + metadata = component._build_embedding_metadata("sentence-transformers/all-MiniLM-L6-v2", "test-key") + + assert metadata["embedding_provider"] == "HuggingFace" + assert metadata["embedding_model"] == "sentence-transformers/all-MiniLM-L6-v2" + assert metadata["api_key"] == "encrypted_key" + assert metadata["api_key_used"] is True + assert metadata["chunk_size"] == 1000 + assert "created_at" in metadata + + def test_build_column_metadata(self, component_class, default_kwargs): + """Test building column metadata.""" + component = component_class(**default_kwargs) + data_df = default_kwargs["input_df"] + config_list = default_kwargs["column_config"] + + metadata = component._build_column_metadata(config_list, data_df) + + assert metadata["total_columns"] == 3 + assert metadata["mapped_columns"] == 3 + assert metadata["unmapped_columns"] == 0 + assert len(metadata["columns"]) == 3 + assert "text" in metadata["summary"]["vectorized_columns"] + assert "category" in metadata["summary"]["identifier_columns"] + + def test_convert_df_to_data_objects(self, component_class, default_kwargs): + """Test converting DataFrame to Data objects.""" + component = component_class(**default_kwargs) + data_df = default_kwargs["input_df"] + config_list = default_kwargs["column_config"] + + # Mock Chroma to avoid actual vector store operations + with patch("langflow.components.data.kb_ingest.Chroma") as mock_chroma: + mock_chroma_instance = MagicMock() + mock_chroma_instance.get.return_value = {"metadatas": []} + mock_chroma.return_value = mock_chroma_instance + + data_objects = component._convert_df_to_data_objects(data_df, config_list) + + assert len(data_objects) == 2 + assert all(isinstance(obj, Data) for obj in data_objects) + + # Check first data object + first_obj = data_objects[0] + assert "text" in first_obj.data + assert "title" in first_obj.data + assert "category" in first_obj.data + assert "_id" in first_obj.data + + def test_convert_df_to_data_objects_no_duplicates(self, component_class, default_kwargs): + """Test converting DataFrame to Data objects with duplicate prevention.""" + default_kwargs["allow_duplicates"] = False + component = component_class(**default_kwargs) + data_df = default_kwargs["input_df"] + config_list = default_kwargs["column_config"] + + # Mock Chroma with existing hash + with patch("langflow.components.data.kb_ingest.Chroma") as mock_chroma: + # Simulate existing document with same hash + existing_hash = "some_existing_hash" + mock_chroma_instance = MagicMock() + mock_chroma_instance.get.return_value = {"metadatas": [{"_id": existing_hash}]} + mock_chroma.return_value = mock_chroma_instance + + # Mock hashlib to return the existing hash for first row + with patch("langflow.components.data.kb_ingest.hashlib.sha256") as mock_hash: + mock_hash_obj = MagicMock() + mock_hash_obj.hexdigest.side_effect = [existing_hash, "different_hash"] + mock_hash.return_value = mock_hash_obj + + data_objects = component._convert_df_to_data_objects(data_df, config_list) + + # Should only return one object (second row) since first is duplicate + assert len(data_objects) == 1 + + def test_is_valid_collection_name(self, component_class, default_kwargs): + """Test collection name validation.""" + component = component_class(**default_kwargs) + + # Valid names + assert component.is_valid_collection_name("valid_name") is True + assert component.is_valid_collection_name("valid-name") is True + assert component.is_valid_collection_name("ValidName123") is True + + # Invalid names + assert component.is_valid_collection_name("ab") is False # Too short + assert component.is_valid_collection_name("a" * 64) is False # Too long + assert component.is_valid_collection_name("_invalid") is False # Starts with underscore + assert component.is_valid_collection_name("invalid_") is False # Ends with underscore + assert component.is_valid_collection_name("invalid@name") is False # Invalid character + + @patch("langflow.components.data.kb_ingest.json.loads") + @patch("langflow.components.data.kb_ingest.decrypt_api_key") + def test_build_kb_info_success(self, mock_decrypt, mock_json_loads, component_class, default_kwargs): + """Test successful KB info building.""" + component = component_class(**default_kwargs) + + # Mock metadata loading + mock_json_loads.return_value = { + "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", + "api_key": "encrypted_key", + } + mock_decrypt.return_value = "decrypted_key" + + # Mock vector store creation + with patch.object(component, "_create_vector_store"), patch.object(component, "_save_kb_files"): + result = component.build_kb_info() + + assert isinstance(result, Data) + assert "kb_id" in result.data + assert "kb_name" in result.data + assert "rows" in result.data + assert result.data["rows"] == 2 + + def test_build_kb_info_with_silent_errors(self, component_class, default_kwargs): + """Test KB info building with silent errors enabled.""" + default_kwargs["silent_errors"] = True + component = component_class(**default_kwargs) + + # Remove the metadata file to cause an error + kb_path = Path(default_kwargs["kb_root_path"]) / default_kwargs["knowledge_base"] + metadata_file = kb_path / "embedding_metadata.json" + if metadata_file.exists(): + metadata_file.unlink() + + # Should not raise exception with silent_errors=True + result = component.build_kb_info() + assert isinstance(result, Data) + assert "error" in result.data + + def test_get_knowledge_bases(self, component_class, default_kwargs, tmp_path): + """Test getting list of knowledge bases.""" + component = component_class(**default_kwargs) + + # Create additional test directories + (tmp_path / "kb1").mkdir() + (tmp_path / "kb2").mkdir() + (tmp_path / ".hidden").mkdir() # Should be ignored + + kb_list = component._get_knowledge_bases() + + assert "test_kb" in kb_list + assert "kb1" in kb_list + assert "kb2" in kb_list + assert ".hidden" not in kb_list + + @patch("langflow.components.data.kb_ingest.Path.exists") + def test_get_knowledge_bases_no_path(self, mock_exists, component_class, default_kwargs): + """Test getting knowledge bases when path doesn't exist.""" + component = component_class(**default_kwargs) + mock_exists.return_value = False + + kb_list = component._get_knowledge_bases() + assert kb_list == [] + + def test_update_build_config_new_kb(self, component_class, default_kwargs): + """Test updating build config for new knowledge base creation.""" + component = component_class(**default_kwargs) + + build_config = {"knowledge_base": {"value": None, "options": []}} + + field_value = { + "01_new_kb_name": "new_test_kb", + "02_embedding_model": "sentence-transformers/all-MiniLM-L6-v2", + "03_api_key": None, + } + + # Mock embedding validation + with ( + patch.object(component, "_build_embeddings") as mock_build_emb, + patch.object(component, "_save_embedding_metadata"), + patch.object(component, "_get_knowledge_bases") as mock_get_kbs, + ): + mock_embeddings = MagicMock() + mock_embeddings.embed_query.return_value = [0.1, 0.2, 0.3] + mock_build_emb.return_value = mock_embeddings + mock_get_kbs.return_value = ["new_test_kb"] + + result = component.update_build_config(build_config, field_value, "knowledge_base") + + assert result["knowledge_base"]["value"] == "new_test_kb" + assert "new_test_kb" in result["knowledge_base"]["options"] + + def test_update_build_config_invalid_kb_name(self, component_class, default_kwargs): + """Test updating build config with invalid KB name.""" + component = component_class(**default_kwargs) + + build_config = {"knowledge_base": {"value": None, "options": []}} + field_value = { + "01_new_kb_name": "invalid@name", # Invalid character + "02_embedding_model": "sentence-transformers/all-MiniLM-L6-v2", + "03_api_key": None, + } + + with pytest.raises(ValueError, match="Invalid knowledge base name"): + component.update_build_config(build_config, field_value, "knowledge_base") diff --git a/src/backend/tests/unit/components/data/test_kb_retrieval.py b/src/backend/tests/unit/components/data/test_kb_retrieval.py new file mode 100644 index 000000000..ee72c7840 --- /dev/null +++ b/src/backend/tests/unit/components/data/test_kb_retrieval.py @@ -0,0 +1,368 @@ +import contextlib +import json +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from langflow.components.data.kb_retrieval import KBRetrievalComponent + +from tests.base import ComponentTestBaseWithoutClient + + +class TestKBRetrievalComponent(ComponentTestBaseWithoutClient): + @pytest.fixture + def component_class(self): + """Return the component class to test.""" + return KBRetrievalComponent + + @pytest.fixture(autouse=True) + def mock_knowledge_base_path(self, tmp_path): + """Mock the knowledge base root path directly.""" + with patch("langflow.components.data.kb_retrieval.KNOWLEDGE_BASES_ROOT_PATH", tmp_path): + yield + + @pytest.fixture + def default_kwargs(self, tmp_path): + """Return default kwargs for component instantiation.""" + # Create knowledge base directory structure + kb_name = "test_kb" + kb_path = tmp_path / kb_name + kb_path.mkdir(exist_ok=True) + + # Create embedding metadata file + metadata = { + "embedding_provider": "HuggingFace", + "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", + "api_key": None, + "api_key_used": False, + "chunk_size": 1000, + "created_at": "2024-01-01T00:00:00Z", + } + (kb_path / "embedding_metadata.json").write_text(json.dumps(metadata)) + + return { + "knowledge_base": kb_name, + "kb_root_path": str(tmp_path), + "api_key": None, + "search_query": "", + "top_k": 5, + "include_embeddings": True, + } + + @pytest.fixture + def file_names_mapping(self): + """Return file names mapping for version testing.""" + # This is a new component, so it doesn't exist in older versions + return [] + + def test_get_knowledge_bases(self, component_class, default_kwargs, tmp_path): + """Test getting list of knowledge bases.""" + component = component_class(**default_kwargs) + + # Create additional test directories + (tmp_path / "kb1").mkdir() + (tmp_path / "kb2").mkdir() + (tmp_path / ".hidden").mkdir() # Should be ignored + + kb_list = component._get_knowledge_bases() + + assert "test_kb" in kb_list + assert "kb1" in kb_list + assert "kb2" in kb_list + assert ".hidden" not in kb_list + + @patch("langflow.components.data.kb_retrieval.Path.exists") + def test_get_knowledge_bases_no_path(self, mock_exists, component_class, default_kwargs): + """Test getting knowledge bases when path doesn't exist.""" + component = component_class(**default_kwargs) + mock_exists.return_value = False + + kb_list = component._get_knowledge_bases() + assert kb_list == [] + + def test_update_build_config(self, component_class, default_kwargs, tmp_path): + """Test updating build configuration.""" + component = component_class(**default_kwargs) + + # Create additional KB directories + (tmp_path / "kb1").mkdir() + (tmp_path / "kb2").mkdir() + + build_config = {"knowledge_base": {"value": "test_kb", "options": []}} + + result = component.update_build_config(build_config, None, "knowledge_base") + + assert "test_kb" in result["knowledge_base"]["options"] + assert "kb1" in result["knowledge_base"]["options"] + assert "kb2" in result["knowledge_base"]["options"] + + def test_update_build_config_invalid_kb(self, component_class, default_kwargs): + """Test updating build config when selected KB is not available.""" + component = component_class(**default_kwargs) + + build_config = {"knowledge_base": {"value": "nonexistent_kb", "options": ["test_kb"]}} + + result = component.update_build_config(build_config, None, "knowledge_base") + + assert result["knowledge_base"]["value"] is None + + def test_get_kb_metadata_success(self, component_class, default_kwargs): + """Test successful metadata loading.""" + component = component_class(**default_kwargs) + kb_path = Path(default_kwargs["kb_root_path"]) / default_kwargs["knowledge_base"] + + with patch("langflow.components.data.kb_retrieval.decrypt_api_key") as mock_decrypt: + mock_decrypt.return_value = "decrypted_key" + + metadata = component._get_kb_metadata(kb_path) + + assert metadata["embedding_provider"] == "HuggingFace" + assert metadata["embedding_model"] == "sentence-transformers/all-MiniLM-L6-v2" + assert "chunk_size" in metadata + + def test_get_kb_metadata_no_file(self, component_class, default_kwargs, tmp_path): + """Test metadata loading when file doesn't exist.""" + component = component_class(**default_kwargs) + nonexistent_path = tmp_path / "nonexistent" + nonexistent_path.mkdir() + + metadata = component._get_kb_metadata(nonexistent_path) + + assert metadata == {} + + def test_get_kb_metadata_json_error(self, component_class, default_kwargs, tmp_path): + """Test metadata loading with invalid JSON.""" + component = component_class(**default_kwargs) + kb_path = tmp_path / "invalid_json_kb" + kb_path.mkdir() + + # Create invalid JSON file + (kb_path / "embedding_metadata.json").write_text("invalid json content") + + metadata = component._get_kb_metadata(kb_path) + + assert metadata == {} + + def test_get_kb_metadata_decrypt_error(self, component_class, default_kwargs, tmp_path): + """Test metadata loading with decryption error.""" + component = component_class(**default_kwargs) + kb_path = tmp_path / "decrypt_error_kb" + kb_path.mkdir() + + # Create metadata with encrypted key + metadata = { + "embedding_provider": "OpenAI", + "embedding_model": "text-embedding-ada-002", + "api_key": "encrypted_key", + "chunk_size": 1000, + } + (kb_path / "embedding_metadata.json").write_text(json.dumps(metadata)) + + with patch("langflow.components.data.kb_retrieval.decrypt_api_key") as mock_decrypt: + mock_decrypt.side_effect = ValueError("Decryption failed") + + result = component._get_kb_metadata(kb_path) + + assert result["api_key"] is None + + @patch("langchain_huggingface.HuggingFaceEmbeddings") + def test_build_embeddings_huggingface(self, mock_hf_embeddings, component_class, default_kwargs): + """Test building HuggingFace embeddings.""" + component = component_class(**default_kwargs) + + metadata = { + "embedding_provider": "HuggingFace", + "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", + "chunk_size": 1000, + } + + mock_embeddings = MagicMock() + mock_hf_embeddings.return_value = mock_embeddings + + result = component._build_embeddings(metadata) + + mock_hf_embeddings.assert_called_once_with(model="sentence-transformers/all-MiniLM-L6-v2") + assert result == mock_embeddings + + @patch("langchain_openai.OpenAIEmbeddings") + def test_build_embeddings_openai(self, mock_openai_embeddings, component_class, default_kwargs): + """Test building OpenAI embeddings.""" + component = component_class(**default_kwargs) + + metadata = { + "embedding_provider": "OpenAI", + "embedding_model": "text-embedding-ada-002", + "api_key": "test-api-key", + "chunk_size": 1000, + } + + mock_embeddings = MagicMock() + mock_openai_embeddings.return_value = mock_embeddings + + result = component._build_embeddings(metadata) + + mock_openai_embeddings.assert_called_once_with( + model="text-embedding-ada-002", api_key="test-api-key", chunk_size=1000 + ) + assert result == mock_embeddings + + def test_build_embeddings_openai_no_key(self, component_class, default_kwargs): + """Test building OpenAI embeddings without API key raises error.""" + component = component_class(**default_kwargs) + + metadata = { + "embedding_provider": "OpenAI", + "embedding_model": "text-embedding-ada-002", + "api_key": None, + "chunk_size": 1000, + } + + with pytest.raises(ValueError, match="OpenAI API key is required"): + component._build_embeddings(metadata) + + @patch("langchain_cohere.CohereEmbeddings") + def test_build_embeddings_cohere(self, mock_cohere_embeddings, component_class, default_kwargs): + """Test building Cohere embeddings.""" + component = component_class(**default_kwargs) + + metadata = { + "embedding_provider": "Cohere", + "embedding_model": "embed-english-v3.0", + "api_key": "test-api-key", + "chunk_size": 1000, + } + + mock_embeddings = MagicMock() + mock_cohere_embeddings.return_value = mock_embeddings + + result = component._build_embeddings(metadata) + + mock_cohere_embeddings.assert_called_once_with(model="embed-english-v3.0", cohere_api_key="test-api-key") + assert result == mock_embeddings + + def test_build_embeddings_cohere_no_key(self, component_class, default_kwargs): + """Test building Cohere embeddings without API key raises error.""" + component = component_class(**default_kwargs) + + metadata = { + "embedding_provider": "Cohere", + "embedding_model": "embed-english-v3.0", + "api_key": None, + "chunk_size": 1000, + } + + with pytest.raises(ValueError, match="Cohere API key is required"): + component._build_embeddings(metadata) + + def test_build_embeddings_custom_not_supported(self, component_class, default_kwargs): + """Test building custom embeddings raises NotImplementedError.""" + component = component_class(**default_kwargs) + + metadata = {"embedding_provider": "Custom", "embedding_model": "custom-model", "api_key": "test-key"} + + with pytest.raises(NotImplementedError, match="Custom embedding models not yet supported"): + component._build_embeddings(metadata) + + def test_build_embeddings_unsupported_provider(self, component_class, default_kwargs): + """Test building embeddings with unsupported provider raises NotImplementedError.""" + component = component_class(**default_kwargs) + + metadata = {"embedding_provider": "UnsupportedProvider", "embedding_model": "some-model", "api_key": "test-key"} + + with pytest.raises(NotImplementedError, match="Embedding provider 'UnsupportedProvider' is not supported"): + component._build_embeddings(metadata) + + def test_build_embeddings_with_user_api_key(self, component_class, default_kwargs): + """Test that user-provided API key overrides stored one.""" + # Create a mock secret input + + mock_secret = MagicMock() + mock_secret.get_secret_value.return_value = "user-provided-key" + + default_kwargs["api_key"] = mock_secret + component = component_class(**default_kwargs) + + metadata = { + "embedding_provider": "OpenAI", + "embedding_model": "text-embedding-ada-002", + "api_key": "stored-key", + "chunk_size": 1000, + } + + with patch("langchain_openai.OpenAIEmbeddings") as mock_openai: + mock_embeddings = MagicMock() + mock_openai.return_value = mock_embeddings + + component._build_embeddings(metadata) + + mock_openai.assert_called_once_with( + model="text-embedding-ada-002", api_key="user-provided-key", chunk_size=1000 + ) + + def test_get_chroma_kb_data_no_metadata(self, component_class, default_kwargs, tmp_path): + """Test retrieving data when metadata is missing.""" + # Remove metadata file + kb_path = tmp_path / default_kwargs["knowledge_base"] + metadata_file = kb_path / "embedding_metadata.json" + if metadata_file.exists(): + metadata_file.unlink() + + component = component_class(**default_kwargs) + + with pytest.raises(ValueError, match="Metadata not found for knowledge base"): + component.get_chroma_kb_data() + + def test_get_chroma_kb_data_path_construction(self, component_class, default_kwargs): + """Test that get_chroma_kb_data constructs the correct paths.""" + component = component_class(**default_kwargs) + + # Test that the component correctly builds the KB path + + assert component.kb_root_path == default_kwargs["kb_root_path"] + assert component.knowledge_base == default_kwargs["knowledge_base"] + + # Test that paths are correctly expanded + expanded_path = Path(component.kb_root_path).expanduser() + assert expanded_path.exists() # tmp_path should exist + + # Verify method exists with correct parameters + assert hasattr(component, "get_chroma_kb_data") + assert hasattr(component, "search_query") + assert hasattr(component, "top_k") + assert hasattr(component, "include_embeddings") + + def test_get_chroma_kb_data_method_exists(self, component_class, default_kwargs): + """Test that get_chroma_kb_data method exists and can be called.""" + component = component_class(**default_kwargs) + + # Just verify the method exists and has the right signature + assert hasattr(component, "get_chroma_kb_data"), "Component should have get_chroma_kb_data method" + + # Mock all external calls to avoid integration issues + with ( + patch.object(component, "_get_kb_metadata") as mock_get_metadata, + patch.object(component, "_build_embeddings") as mock_build_embeddings, + patch("langchain_chroma.Chroma"), + ): + mock_get_metadata.return_value = {"embedding_provider": "HuggingFace", "embedding_model": "test-model"} + mock_build_embeddings.return_value = MagicMock() + + # This is a unit test focused on the component's internal logic + with contextlib.suppress(Exception): + component.get_chroma_kb_data() + + # Verify internal methods were called + mock_get_metadata.assert_called_once() + mock_build_embeddings.assert_called_once() + + def test_include_embeddings_parameter(self, component_class, default_kwargs): + """Test that include_embeddings parameter is properly set.""" + # Test with embeddings enabled + default_kwargs["include_embeddings"] = True + component = component_class(**default_kwargs) + assert component.include_embeddings is True + + # Test with embeddings disabled + default_kwargs["include_embeddings"] = False + component = component_class(**default_kwargs) + assert component.include_embeddings is False diff --git a/src/frontend/jest.config.js b/src/frontend/jest.config.js index 7eabf0685..9adc9062f 100644 --- a/src/frontend/jest.config.js +++ b/src/frontend/jest.config.js @@ -7,10 +7,12 @@ module.exports = { "\\.(css|less|scss|sass)$": "identity-obj-proxy", }, setupFilesAfterEnv: ["/src/setupTests.ts"], + setupFiles: ["/jest.setup.js"], testMatch: [ - "/src/**/__tests__/**/*.{ts,tsx}", + "/src/**/__tests__/**/*.{test,spec}.{ts,tsx}", "/src/**/*.{test,spec}.{ts,tsx}", ], + testPathIgnorePatterns: ["/node_modules/", "test-utils.tsx"], transform: { "^.+\\.(ts|tsx)$": "ts-jest", }, diff --git a/src/frontend/jest.setup.js b/src/frontend/jest.setup.js new file mode 100644 index 000000000..88abf9bbc --- /dev/null +++ b/src/frontend/jest.setup.js @@ -0,0 +1,38 @@ +// Jest setup file to mock globals and Vite-specific syntax + +// Mock import.meta +global.import = { + meta: { + env: { + CI: process.env.CI || false, + NODE_ENV: "test", + MODE: "test", + DEV: false, + PROD: false, + VITE_API_URL: "http://localhost:7860", + }, + }, +}; + +// Mock crypto for Node.js environment +if (typeof global.crypto === "undefined") { + const { webcrypto } = require("crypto"); + global.crypto = webcrypto; +} + +// Mock URL if not available +if (typeof global.URL === "undefined") { + global.URL = require("url").URL; +} + +// Mock localStorage +const localStorageMock = { + getItem: jest.fn(), + setItem: jest.fn(), + removeItem: jest.fn(), + clear: jest.fn(), +}; +global.localStorage = localStorageMock; + +// Mock sessionStorage +global.sessionStorage = localStorageMock; diff --git a/src/frontend/src/CustomNodes/GenericNode/components/NodeDialogComponent/index.tsx b/src/frontend/src/CustomNodes/GenericNode/components/NodeDialogComponent/index.tsx index 1d953f2f0..874286557 100644 --- a/src/frontend/src/CustomNodes/GenericNode/components/NodeDialogComponent/index.tsx +++ b/src/frontend/src/CustomNodes/GenericNode/components/NodeDialogComponent/index.tsx @@ -1,5 +1,6 @@ import { useState } from "react"; import { mutateTemplate } from "@/CustomNodes/helpers/mutate-template"; +import type { handleOnNewValueType } from "@/CustomNodes/hooks/use-handle-new-value"; import { ParameterRenderComponent } from "@/components/core/parameterRenderComponent"; import { Button } from "@/components/ui/button"; import { @@ -26,10 +27,6 @@ interface NodeDialogProps { nodeClass: APIClassType; } -interface ValueObject { - value: string; -} - export const NodeDialog: React.FC = ({ open, onClose, @@ -44,6 +41,7 @@ export const NodeDialog: React.FC = ({ const nodes = useFlowStore((state) => state.nodes); const setNode = useFlowStore((state) => state.setNode); const setErrorData = useAlertStore((state) => state.setErrorData); + const setSuccessData = useAlertStore((state) => state.setSuccessData); const postTemplateValue = usePostTemplateValue({ parameterId: name, @@ -71,14 +69,41 @@ export const NodeDialog: React.FC = ({ setIsLoading(false); }; - const updateFieldValue = (value: string | ValueObject, fieldKey: string) => { - const newValue = typeof value === "object" ? value.value : value; + const updateFieldValue = ( + changes: Parameters[0], + fieldKey: string, + ) => { + // Handle both legacy string format and new object format + const newValue = + typeof changes === "object" && changes !== null ? changes.value : changes; + const targetNode = nodes.find((node) => node.id === nodeId); if (!targetNode || !name) return; + // Update the main field value targetNode.data.node.template[name].dialog_inputs.fields.data.node.template[ fieldKey ].value = newValue; + + // Handle additional properties like load_from_db for InputGlobalComponent + if (typeof changes === "object" && changes !== null) { + const fieldTemplate = + targetNode.data.node.template[name].dialog_inputs.fields.data.node + .template[fieldKey]; + + // Update load_from_db if present (for InputGlobalComponent) + if ("load_from_db" in changes) { + fieldTemplate.load_from_db = changes.load_from_db; + } + + // Handle any other properties that might be needed + Object.keys(changes).forEach((key) => { + if (key !== "value" && key in fieldTemplate) { + fieldTemplate[key] = changes[key]; + } + }); + } + setNode(nodeId, targetNode); setFieldValues((prev) => ({ ...prev, [fieldKey]: newValue })); @@ -110,6 +135,48 @@ export const NodeDialog: React.FC = ({ onClose(); }; + const handleSuccessCallback = () => { + // Check if this is a knowledge base creation + const isKnowledgeBaseCreation = + dialogNodeData?.display_name === "Create Knowledge" || + dialogNodeData?.name === "create_knowledge_base" || + (dialogNodeData?.description && + dialogNodeData.description.toLowerCase().includes("knowledge")); + + if (isKnowledgeBaseCreation) { + // Get the knowledge base name from field values + const knowledgeBaseName = + fieldValues["01_new_kb_name"] || + fieldValues["new_kb_name"] || + "Knowledge Base"; + + setSuccessData({ + title: `Knowledge Base "${knowledgeBaseName}" created successfully!`, + }); + } + + // Only close dialog after success and delay for Astra database tracking + if (nodeId.toLowerCase().includes("astra") && name === "database_name") { + const { + cloud_provider: cloudProvider, + new_database_name: databaseName, + ...otherFields + } = fieldValues; + track("Database Created", { + nodeId, + cloudProvider, + databaseName, + ...otherFields, + }); + + setTimeout(() => { + handleCloseDialog(); + }, 5000); + } else { + handleCloseDialog(); + } + }; + const handleSubmitDialog = async () => { // Validate required fields first const missingRequiredFields = Object.entries(dialogTemplate) @@ -143,27 +210,9 @@ export const NodeDialog: React.FC = ({ postTemplateValue, handleErrorData, name, - handleCloseDialog, + handleSuccessCallback, nodeClass.tool_mode, ); - - if (nodeId.toLowerCase().includes("astra") && name === "database_name") { - const { - cloud_provider: cloudProvider, - new_database_name: databaseName, - ...otherFields - } = fieldValues; - track("Database Created", { - nodeId, - cloudProvider, - databaseName, - ...otherFields, - }); - } - - setTimeout(() => { - handleCloseDialog(); - }, 5000); }; // Render @@ -198,8 +247,8 @@ export const NodeDialog: React.FC = ({ })} - updateFieldValue(value, fieldKey) + handleOnNewValue={(changes) => + updateFieldValue(changes, fieldKey) } name={fieldKey} nodeId={nodeId} diff --git a/src/frontend/src/components/core/dropdownComponent/index.tsx b/src/frontend/src/components/core/dropdownComponent/index.tsx index 114289103..34500cc4d 100644 --- a/src/frontend/src/components/core/dropdownComponent/index.tsx +++ b/src/frontend/src/components/core/dropdownComponent/index.tsx @@ -1,6 +1,5 @@ import { PopoverAnchor } from "@radix-ui/react-popover"; import Fuse from "fuse.js"; -import { cloneDeep } from "lodash"; import { type ChangeEvent, useEffect, useMemo, useRef, useState } from "react"; import NodeDialog from "@/CustomNodes/GenericNode/components/NodeDialogComponent"; import { mutateTemplate } from "@/CustomNodes/helpers/mutate-template"; @@ -305,7 +304,9 @@ export default function Dropdown({ disabled || (Object.keys(validOptions).length === 0 && !combobox && - !dialogInputs?.fields?.data?.node?.template) + !dialogInputs?.fields?.data?.node?.template && + !hasRefreshButton && + !dialogInputs?.fields) } variant="primary" size="xs" @@ -489,41 +490,38 @@ export default function Dropdown({ {dialogInputs && dialogInputs?.fields && ( - - - - - - + + + { if (urlWithoutPath && itemId === myCollectionId && !checkPathFiles) { @@ -354,6 +354,14 @@ const SideBarFoldersButtonsComponent = ({ }); }; + const handleFilesNavigation = () => { + _navigate("/assets/files"); + }; + + const handleKnowledgeNavigation = () => { + _navigate("/assets/knowledge-bases"); + }; + return (
{/* TODO: Remove this on cleanup */} - {ENABLE_DATASTAX_LANGFLOW && } + {ENABLE_DATASTAX_LANGFLOW && }{" "} handleFilesClick?.()} + onClick={handleKnowledgeNavigation} + size="md" + className="text-sm" + > + + Knowledge + + diff --git a/src/frontend/src/components/core/parameterRenderComponent/components/inputGlobalComponent/hooks.ts b/src/frontend/src/components/core/parameterRenderComponent/components/inputGlobalComponent/hooks.ts new file mode 100644 index 000000000..82735a55d --- /dev/null +++ b/src/frontend/src/components/core/parameterRenderComponent/components/inputGlobalComponent/hooks.ts @@ -0,0 +1,82 @@ +import { useCallback, useEffect, useMemo, useRef } from "react"; +import { useGlobalVariablesStore } from "@/stores/globalVariablesStore/globalVariables"; +import type { GlobalVariable } from "./types"; + +// Custom hook for managing global variable value existence +export const useGlobalVariableValue = ( + value: string, + globalVariables: GlobalVariable[], +) => { + return useMemo(() => { + return ( + globalVariables?.some((variable) => variable.name === value) ?? false + ); + }, [globalVariables, value]); +}; + +// Custom hook for managing unavailable fields +export const useUnavailableField = ( + displayName: string | undefined, + value: string, +) => { + const unavailableFields = useGlobalVariablesStore( + (state) => state.unavailableFields, + ); + + return useMemo(() => { + if ( + displayName && + unavailableFields && + Object.keys(unavailableFields).includes(displayName) && + value === "" + ) { + return unavailableFields[displayName]; + } + return null; + }, [unavailableFields, displayName, value]); +}; + +// Custom hook for handling initial load logic +export const useInitialLoad = ( + disabled: boolean, + loadFromDb: boolean, + globalVariables: GlobalVariable[], + valueExists: boolean, + unavailableField: string | null, + handleOnNewValue: ( + value: { value: string; load_from_db: boolean }, + options?: { skipSnapshot: boolean }, + ) => void, +) => { + const initialLoadCompleted = useRef(false); + const handleOnNewValueRef = useRef(handleOnNewValue); + + // Keep the latest handleOnNewValue reference + handleOnNewValueRef.current = handleOnNewValue; + + // Handle database loading when value doesn't exist + useEffect(() => { + if (disabled || !loadFromDb || !globalVariables.length || valueExists) { + return; + } + + handleOnNewValueRef.current( + { value: "", load_from_db: false }, + { skipSnapshot: true }, + ); + }, [disabled, loadFromDb, globalVariables.length, valueExists]); + + // Handle unavailable field initialization + useEffect(() => { + if (initialLoadCompleted.current || disabled || unavailableField === null) { + return; + } + + handleOnNewValueRef.current( + { value: unavailableField, load_from_db: true }, + { skipSnapshot: true }, + ); + + initialLoadCompleted.current = true; + }, [unavailableField, disabled]); +}; diff --git a/src/frontend/src/components/core/parameterRenderComponent/components/inputGlobalComponent/index.tsx b/src/frontend/src/components/core/parameterRenderComponent/components/inputGlobalComponent/index.tsx index 8861931a6..9ab657918 100644 --- a/src/frontend/src/components/core/parameterRenderComponent/components/inputGlobalComponent/index.tsx +++ b/src/frontend/src/components/core/parameterRenderComponent/components/inputGlobalComponent/index.tsx @@ -1,8 +1,6 @@ -import { useEffect, useMemo, useRef } from "react"; +import { useEffect } from "react"; import { useGetGlobalVariables } from "@/controllers/API/queries/variables"; import GeneralDeleteConfirmationModal from "@/shared/components/delete-confirmation-modal"; -import { useGlobalVariablesStore } from "@/stores/globalVariablesStore/globalVariables"; - import { cn } from "../../../../../utils/utils"; import ForwardedIconComponent from "../../../../common/genericIconComponent"; import { CommandItem } from "../../../../ui/command"; @@ -10,6 +8,12 @@ import GlobalVariableModal from "../../../GlobalVariableModal/GlobalVariableModa import { getPlaceholder } from "../../helpers/get-placeholder-disabled"; import type { InputGlobalComponentType, InputProps } from "../../types"; import InputComponent from "../inputComponent"; +import { + useGlobalVariableValue, + useInitialLoad, + useUnavailableField, +} from "./hooks"; +import type { GlobalVariable, GlobalVariableHandlers } from "./types"; export default function InputGlobalComponent({ display_name, @@ -25,70 +29,93 @@ export default function InputGlobalComponent({ hasRefreshButton = false, }: InputProps): JSX.Element { const { data: globalVariables } = useGetGlobalVariables(); - const unavailableFields = useGlobalVariablesStore( - (state) => state.unavailableFields, + + // // Safely cast the data to our typed interface + const typedGlobalVariables: GlobalVariable[] = globalVariables ?? []; + const currentValue = value ?? ""; + const isDisabled = disabled ?? false; + const loadFromDb = load_from_db ?? false; + + // // Extract complex logic into custom hooks + const valueExists = useGlobalVariableValue( + currentValue, + typedGlobalVariables, + ); + const unavailableField = useUnavailableField(display_name, currentValue); + + useInitialLoad( + isDisabled, + loadFromDb, + typedGlobalVariables, + valueExists, + unavailableField, + handleOnNewValue, ); - const initialLoadCompleted = useRef(false); - - const valueExists = useMemo(() => { - return ( - globalVariables?.some((variable) => variable.name === value) ?? false - ); - }, [globalVariables, value]); - - const unavailableField = useMemo(() => { - if ( - display_name && - unavailableFields && - Object.keys(unavailableFields).includes(display_name) && - value === "" - ) { - return unavailableFields[display_name]; - } - return null; - }, [unavailableFields, display_name]); - - useMemo(() => { - if (disabled) { - return; - } - - if (load_from_db && globalVariables && !valueExists) { + // Clean up when selected variable no longer exists + useEffect(() => { + if (loadFromDb && currentValue && !valueExists && !isDisabled) { handleOnNewValue( { value: "", load_from_db: false }, { skipSnapshot: true }, ); } - }, [ - globalVariables, - unavailableFields, - disabled, - load_from_db, - valueExists, - unavailableField, - value, - handleOnNewValue, - ]); + }, [loadFromDb, currentValue, valueExists, isDisabled, handleOnNewValue]); - useEffect(() => { - if (initialLoadCompleted.current || disabled || unavailableField === null) { - return; - } + // Create handlers object for better organization + const handlers: GlobalVariableHandlers = { + // Handler for deleting global variables + handleVariableDelete: (variableName: string) => { + if (value === variableName) { + handleOnNewValue({ + value: "", + load_from_db: false, + }); + } + }, - handleOnNewValue( - { value: unavailableField, load_from_db: true }, - { skipSnapshot: true }, - ); + // Handler for selecting a global variable + handleVariableSelect: (selectedValue: string) => { + handleOnNewValue({ + value: selectedValue, + load_from_db: selectedValue !== "", + }); + }, - initialLoadCompleted.current = true; - }, [unavailableField, disabled, load_from_db, value, handleOnNewValue]); + // Handler for input changes + handleInputChange: (inputValue: string, skipSnapshot?: boolean) => { + handleOnNewValue( + { value: inputValue, load_from_db: false }, + { skipSnapshot }, + ); + }, + }; - function handleDelete(key: string) { - if (value === key) { - handleOnNewValue({ value: "", load_from_db: load_from_db }); - } - } + // Render add new variable button + const renderAddVariableButton = () => ( + + + + + ); + + // Render delete button for each option + const renderDeleteButton = (option: string) => ( + handlers.handleVariableDelete(option)} + /> + ); + + // // Extract options list for better readability + const variableOptions = typedGlobalVariables.map((variable) => variable.name); + const selectedOption = loadFromDb && valueExists ? currentValue : ""; return ( variable.name) ?? []} - optionsPlaceholder={"Global Variables"} + value={currentValue} + options={variableOptions} + optionsPlaceholder="Global Variables" optionsIcon="Globe" - optionsButton={ - - - - - } - optionButton={(option) => ( - handleDelete(option)} - /> - )} - selectedOption={load_from_db && valueExists ? value : ""} - setSelectedOption={(value) => { - handleOnNewValue({ - value: value, - load_from_db: value !== "" ? true : false, - }); - }} - onChange={(value, skipSnapshot) => { - handleOnNewValue( - { value: value, load_from_db: false }, - { skipSnapshot }, - ); - }} + optionsButton={renderAddVariableButton()} + optionButton={renderDeleteButton} + selectedOption={selectedOption} + setSelectedOption={handlers.handleVariableSelect} + onChange={handlers.handleInputChange} isToolMode={isToolMode} hasRefreshButton={hasRefreshButton} /> diff --git a/src/frontend/src/components/core/parameterRenderComponent/components/inputGlobalComponent/types.ts b/src/frontend/src/components/core/parameterRenderComponent/components/inputGlobalComponent/types.ts new file mode 100644 index 000000000..216cbcee1 --- /dev/null +++ b/src/frontend/src/components/core/parameterRenderComponent/components/inputGlobalComponent/types.ts @@ -0,0 +1,14 @@ +export interface GlobalVariable { + name: string; + // Add other properties as needed +} + +export interface UnavailableFields { + [key: string]: string; +} + +export interface GlobalVariableHandlers { + handleVariableDelete: (variableName: string) => void; + handleVariableSelect: (selectedValue: string) => void; + handleInputChange: (inputValue: string, skipSnapshot?: boolean) => void; +} diff --git a/src/frontend/src/components/core/parameterRenderComponent/components/tableComponent/components/tableAutoCellRender/index.tsx b/src/frontend/src/components/core/parameterRenderComponent/components/tableComponent/components/tableAutoCellRender/index.tsx index 500ecca1a..f95224721 100644 --- a/src/frontend/src/components/core/parameterRenderComponent/components/tableComponent/components/tableAutoCellRender/index.tsx +++ b/src/frontend/src/components/core/parameterRenderComponent/components/tableComponent/components/tableAutoCellRender/index.tsx @@ -18,6 +18,7 @@ export default function TableAutoCellRender({ colDef, formatter, api, + ...props }: CustomCellRender) { function getCellType() { let format: string = formatter ? formatter : typeof value; @@ -92,7 +93,12 @@ export default function TableAutoCellRender({ }} editNode={true} id={"toggle" + colDef?.colId + uniqueId()} - disabled={false} + disabled={ + colDef?.cellRendererParams?.isSingleToggleColumn && + colDef?.cellRendererParams?.checkSingleToggleEditable + ? !colDef.cellRendererParams.checkSingleToggleEditable(props) + : false + } /> ) : ( { + const isSingleToggleRowEditable = ( + colField: string, + rowData: any, + currentRowValue: any, + ) => { + try { + // Check if this is a single-toggle column (Vectorize or Identifier) + const isSingleToggleColumn = + colField === "Vectorize" || + colField === "vectorize" || + colField === "Identifier" || + colField === "identifier"; + + if (!isSingleToggleColumn) return true; + + // Safeguard: ensure we have rowData array + if (!props.rowData || !Array.isArray(props.rowData)) { + return true; + } + + // Normalize the current value to boolean + const normalizedCurrentValue = + currentRowValue === true || + currentRowValue === "true" || + currentRowValue === 1; + + // If current row is true, always allow editing (to turn it off) + if (normalizedCurrentValue) { + return true; + } + + // If current row is false, only allow editing if no other row is true + const hasAnyTrue = props.rowData.some((row) => { + if (!row || typeof row !== "object") return false; + const value = row[colField]; + const normalizedValue = + value === true || value === "true" || value === 1; + return normalizedValue; + }); + + return !hasAnyTrue; + } catch (error) { + // Default to editable if there's an error to avoid breaking functionality + return true; + } + }; + const colDef = props.columnDefs .filter((col) => !col.hide) .map((col, index, filteredArray) => { @@ -92,10 +139,49 @@ const TableComponent = forwardRef< props.editable.every((field) => typeof field === "string") && (props.editable as Array).includes(newCol.field ?? "")) ) { - newCol = { - ...newCol, - editable: true, - }; + // Special handling for single-toggle columns (Vectorize and Identifier) + const isSingleToggleColumn = + newCol.field === "Vectorize" || + newCol.field === "vectorize" || + newCol.field === "Identifier" || + newCol.field === "identifier"; + + if (isSingleToggleColumn) { + newCol = { + ...newCol, + editable: (params) => { + const currentValue = params.data[params.colDef.field!]; + return isSingleToggleRowEditable( + newCol.field!, + params.data, + currentValue, + ); + }, + cellRendererParams: { + ...newCol.cellRendererParams, + isSingleToggleColumn: true, + singleToggleField: newCol.field, + checkSingleToggleEditable: (params) => { + try { + const fieldName = newCol.field!; + const currentValue = params?.data?.[fieldName]; + return isSingleToggleRowEditable( + fieldName, + params?.data, + currentValue, + ); + } catch (error) { + return false; + } + }, + }, + }; + } else { + newCol = { + ...newCol, + editable: true, + }; + } } if ( Array.isArray(props.editable) && @@ -109,11 +195,68 @@ const TableComponent = forwardRef< }> ).find((field) => field.field === newCol.field); if (field) { - newCol = { - ...newCol, - editable: field.editableCell, - onCellValueChanged: (e) => field.onUpdate(e), - }; + // Special handling for single-toggle columns (Vectorize and Identifier) + const isSingleToggleColumn = + newCol.field === "Vectorize" || + newCol.field === "vectorize" || + newCol.field === "Identifier" || + newCol.field === "identifier"; + + if (isSingleToggleColumn) { + newCol = { + ...newCol, + editable: (params) => { + const currentValue = params.data[params.colDef.field!]; + return ( + field.editableCell && + isSingleToggleRowEditable( + newCol.field!, + params.data, + currentValue, + ) + ); + }, + cellRendererParams: { + ...newCol.cellRendererParams, + isSingleToggleColumn: true, + singleToggleField: newCol.field, + checkSingleToggleEditable: (params) => { + try { + const fieldName = newCol.field!; + const currentValue = params?.data?.[fieldName]; + return ( + field.editableCell && + isSingleToggleRowEditable( + fieldName, + params?.data, + currentValue, + ) + ); + } catch (error) { + return false; + } + }, + }, + onCellValueChanged: (e) => { + field.onUpdate(e); + // Refresh grid to update editable state of other cells + setTimeout(() => { + if ( + realRef.current?.api && + !realRef.current.api.isDestroyed() + ) { + realRef.current.api.refreshCells({ force: true }); + } + }, 0); + }, + }; + } else { + newCol = { + ...newCol, + editable: field.editableCell, + onCellValueChanged: (e) => field.onUpdate(e), + }; + } } } return newCol; @@ -253,6 +396,61 @@ const TableComponent = forwardRef< }} onGridReady={onGridReady} onColumnMoved={onColumnMoved} + onCellValueChanged={(e) => { + // Handle single-toggle column changes (Vectorize and Identifier) to refresh grid editability + const isSingleToggleField = + e.colDef.field === "Vectorize" || + e.colDef.field === "vectorize" || + e.colDef.field === "Identifier" || + e.colDef.field === "identifier"; + + if (isSingleToggleField) { + setTimeout(() => { + if ( + realRef.current?.api && + !realRef.current.api.isDestroyed() + ) { + // Refresh all cells with force to update cell renderer params + if (e.colDef.field) { + realRef.current.api.refreshCells({ + force: true, + columns: [e.colDef.field], + }); + } + // Also refresh all other single-toggle column cells if they exist + const allSingleToggleColumns = realRef.current.api + .getColumns() + ?.filter((col) => { + const field = col.getColDef().field; + return ( + field === "Vectorize" || + field === "vectorize" || + field === "Identifier" || + field === "identifier" + ); + }); + if ( + allSingleToggleColumns && + allSingleToggleColumns.length > 0 + ) { + const columnFields = allSingleToggleColumns + .map((col) => col.getColDef().field) + .filter((field): field is string => field !== undefined); + if (columnFields.length > 0) { + realRef.current.api.refreshCells({ + force: true, + columns: columnFields, + }); + } + } + } + }, 0); + } + // Call original onCellValueChanged if it exists + if (props.onCellValueChanged) { + props.onCellValueChanged(e); + } + }} onStateUpdated={(e) => { if (e.sources.some((source) => source.includes("column"))) { localStorage.setItem( diff --git a/src/frontend/src/controllers/API/helpers/constants.ts b/src/frontend/src/controllers/API/helpers/constants.ts index 083b198fd..265196ecb 100644 --- a/src/frontend/src/controllers/API/helpers/constants.ts +++ b/src/frontend/src/controllers/API/helpers/constants.ts @@ -29,6 +29,7 @@ export const URLs = { PUBLIC_FLOW: `flows/public_flow`, MCP: `mcp/project`, MCP_SERVERS: `mcp/servers`, + KNOWLEDGE_BASES: `knowledge_bases`, } as const; // IMPORTANT: FOLDERS endpoint now points to 'projects' for backward compatibility diff --git a/src/frontend/src/controllers/API/queries/knowledge-bases/use-delete-knowledge-base.ts b/src/frontend/src/controllers/API/queries/knowledge-bases/use-delete-knowledge-base.ts new file mode 100644 index 000000000..bf6911c2d --- /dev/null +++ b/src/frontend/src/controllers/API/queries/knowledge-bases/use-delete-knowledge-base.ts @@ -0,0 +1,39 @@ +import type { UseMutationResult } from "@tanstack/react-query"; +import type { useMutationFunctionType } from "@/types/api"; +import { api } from "../../api"; +import { getURL } from "../../helpers/constants"; +import { UseRequestProcessor } from "../../services/request-processor"; + +interface DeleteKnowledgeBaseParams { + kb_name: string; +} + +export const useDeleteKnowledgeBase: useMutationFunctionType< + DeleteKnowledgeBaseParams, + void +> = (params, options?) => { + const { mutate, queryClient } = UseRequestProcessor(); + + const deleteKnowledgeBaseFn = async (): Promise => { + const response = await api.delete( + `${getURL("KNOWLEDGE_BASES")}/${params.kb_name}`, + ); + return response.data; + }; + + const mutation: UseMutationResult = mutate( + ["useDeleteKnowledgeBase"], + deleteKnowledgeBaseFn, + { + onSettled: (data, error, variables, context) => { + queryClient.invalidateQueries({ + queryKey: ["useGetKnowledgeBases"], + }); + options?.onSettled?.(data, error, variables, context); + }, + ...options, + }, + ); + + return mutation; +}; diff --git a/src/frontend/src/controllers/API/queries/knowledge-bases/use-delete-knowledge-bases.ts b/src/frontend/src/controllers/API/queries/knowledge-bases/use-delete-knowledge-bases.ts new file mode 100644 index 000000000..997291590 --- /dev/null +++ b/src/frontend/src/controllers/API/queries/knowledge-bases/use-delete-knowledge-bases.ts @@ -0,0 +1,38 @@ +import type { UseMutationResult } from "@tanstack/react-query"; +import type { useMutationFunctionType } from "@/types/api"; +import { api } from "../../api"; +import { getURL } from "../../helpers/constants"; +import { UseRequestProcessor } from "../../services/request-processor"; + +interface DeleteKnowledgeBasesParams { + kb_names: string[]; +} + +export const useDeleteKnowledgeBases: useMutationFunctionType< + undefined, + DeleteKnowledgeBasesParams +> = (options?) => { + const { mutate, queryClient } = UseRequestProcessor(); + + const deleteKnowledgeBasesFn = async ( + params: DeleteKnowledgeBasesParams, + ): Promise => { + const response = await api.delete(`${getURL("KNOWLEDGE_BASES")}/`, { + data: { kb_names: params.kb_names }, + }); + return response.data; + }; + + const mutation: UseMutationResult = + mutate(["useDeleteKnowledgeBases"], deleteKnowledgeBasesFn, { + onSettled: (data, error, variables, context) => { + queryClient.invalidateQueries({ + queryKey: ["useGetKnowledgeBases"], + }); + options?.onSettled?.(data, error, variables, context); + }, + ...options, + }); + + return mutation; +}; diff --git a/src/frontend/src/controllers/API/queries/knowledge-bases/use-get-knowledge-bases.ts b/src/frontend/src/controllers/API/queries/knowledge-bases/use-get-knowledge-bases.ts new file mode 100644 index 000000000..5512769d9 --- /dev/null +++ b/src/frontend/src/controllers/API/queries/knowledge-bases/use-get-knowledge-bases.ts @@ -0,0 +1,40 @@ +import type { UseQueryResult } from "@tanstack/react-query"; +import type { useQueryFunctionType } from "@/types/api"; +import { api } from "../../api"; +import { getURL } from "../../helpers/constants"; +import { UseRequestProcessor } from "../../services/request-processor"; + +export interface KnowledgeBaseInfo { + id: string; + name: string; + embedding_provider?: string; + embedding_model?: string; + size: number; + words: number; + characters: number; + chunks: number; + avg_chunk_size: number; +} + +export const useGetKnowledgeBases: useQueryFunctionType< + undefined, + KnowledgeBaseInfo[] +> = (options?) => { + const { query } = UseRequestProcessor(); + + const getKnowledgeBasesFn = async (): Promise => { + const res = await api.get(`${getURL("KNOWLEDGE_BASES")}/`); + return res.data; + }; + + const queryResult: UseQueryResult = query( + ["useGetKnowledgeBases"], + getKnowledgeBasesFn, + { + refetchOnWindowFocus: false, + ...options, + }, + ); + + return queryResult; +}; diff --git a/src/frontend/src/customization/feature-flags.ts b/src/frontend/src/customization/feature-flags.ts index 79c18b31b..2e7a10bd5 100644 --- a/src/frontend/src/customization/feature-flags.ts +++ b/src/frontend/src/customization/feature-flags.ts @@ -15,5 +15,7 @@ export const ENABLE_VOICE_ASSISTANT = true; export const ENABLE_IMAGE_ON_PLAYGROUND = false; export const ENABLE_MCP = true; export const ENABLE_MCP_NOTICE = false; +export const ENABLE_KNOWLEDGE_BASES = false; + export const ENABLE_MCP_COMPOSER = process.env.LANGFLOW_FEATURE_MCP_COMPOSER === "true"; diff --git a/src/frontend/src/modals/deleteConfirmationModal/index.tsx b/src/frontend/src/modals/deleteConfirmationModal/index.tsx index 1f7f2e803..eec16abbc 100644 --- a/src/frontend/src/modals/deleteConfirmationModal/index.tsx +++ b/src/frontend/src/modals/deleteConfirmationModal/index.tsx @@ -46,7 +46,9 @@ export default function DeleteConfirmationModal({ This will permanently delete the {description ?? "flow"} - {note ? " " + note : ""}.

This can't be undone. + {note ? " " + note : ""}.
+
+ This can't be undone.
diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/components/FilesTab.tsx b/src/frontend/src/pages/MainPage/pages/filesPage/components/FilesTab.tsx new file mode 100644 index 000000000..0710eb1df --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/filesPage/components/FilesTab.tsx @@ -0,0 +1,446 @@ +import type { + ColDef, + NewValueParams, + SelectionChangedEvent, +} from "ag-grid-community"; +import type { AgGridReact } from "ag-grid-react"; +import { useEffect, useMemo, useRef, useState } from "react"; +import ForwardedIconComponent from "@/components/common/genericIconComponent"; +import ShadTooltip from "@/components/common/shadTooltipComponent"; +import CardsWrapComponent from "@/components/core/cardsWrapComponent"; +import TableComponent from "@/components/core/parameterRenderComponent/components/tableComponent"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import Loading from "@/components/ui/loading"; +import { useGetFilesV2 } from "@/controllers/API/queries/file-management"; +import { useDeleteFilesV2 } from "@/controllers/API/queries/file-management/use-delete-files"; +import { usePostRenameFileV2 } from "@/controllers/API/queries/file-management/use-put-rename-file"; +import { useCustomHandleBulkFilesDownload } from "@/customization/hooks/use-custom-handle-bulk-files-download"; +import { customPostUploadFileV2 } from "@/customization/hooks/use-custom-post-upload-file"; +import useUploadFile from "@/hooks/files/use-upload-file"; +import DeleteConfirmationModal from "@/modals/deleteConfirmationModal"; +import FilesContextMenuComponent from "@/modals/fileManagerModal/components/filesContextMenuComponent"; +import useAlertStore from "@/stores/alertStore"; +import { formatFileSize } from "@/utils/stringManipulation"; +import { FILE_ICONS } from "@/utils/styleUtils"; +import { cn } from "@/utils/utils"; +import { sortByDate } from "../../../utils/sort-flows"; +import DragWrapComponent from "./dragWrapComponent"; + +interface FilesTabProps { + quickFilterText: string; + setQuickFilterText: (text: string) => void; + selectedFiles: any[]; + setSelectedFiles: (files: any[]) => void; + quantitySelected: number; + setQuantitySelected: (quantity: number) => void; + isShiftPressed: boolean; +} + +const FilesTab = ({ + quickFilterText, + setQuickFilterText, + selectedFiles, + setSelectedFiles, + quantitySelected, + setQuantitySelected, + isShiftPressed, +}: FilesTabProps) => { + const tableRef = useRef>(null); + const { data: files } = useGetFilesV2(); + const setErrorData = useAlertStore((state) => state.setErrorData); + const setSuccessData = useAlertStore((state) => state.setSuccessData); + const [isDownloading, setIsDownloading] = useState(false); + + const { mutate: rename } = usePostRenameFileV2(); + const { mutate: deleteFiles, isPending: isDeleting } = useDeleteFilesV2(); + const { handleBulkDownload } = useCustomHandleBulkFilesDownload(); + + const handleRename = (params: NewValueParams) => { + rename({ + id: params.data.id, + name: params.newValue, + }); + }; + + const handleOpenRename = (id: string, name: string) => { + if (tableRef.current) { + tableRef.current.api.startEditingCell({ + rowIndex: files?.findIndex((file) => file.id === id) ?? 0, + colKey: "name", + }); + } + }; + + const uploadFile = useUploadFile({ multiple: true }); + + const handleUpload = async (files?: File[]) => { + try { + const filesIds = await uploadFile({ + files: files, + }); + setSuccessData({ + title: `File${filesIds.length > 1 ? "s" : ""} uploaded successfully`, + }); + } catch (error: any) { + setErrorData({ + title: "Error uploading file", + list: [error.message || "An error occurred while uploading the file"], + }); + } + }; + + const { mutate: uploadFileDirect } = customPostUploadFileV2(); + + useEffect(() => { + if (files) { + setQuantitySelected(0); + setSelectedFiles([]); + } + }, [files, setQuantitySelected, setSelectedFiles]); + + const handleSelectionChanged = (event: SelectionChangedEvent) => { + const selectedRows = event.api.getSelectedRows(); + setSelectedFiles(selectedRows); + if (selectedRows.length > 0) { + setQuantitySelected(selectedRows.length); + } else { + setTimeout(() => { + setQuantitySelected(0); + }, 300); + } + }; + + const colDefs: ColDef[] = [ + { + headerName: "Name", + field: "name", + flex: 2, + headerCheckboxSelection: true, + checkboxSelection: true, + editable: true, + filter: "agTextColumnFilter", + cellClass: + "cursor-text select-text group-[.no-select-cells]:cursor-default group-[.no-select-cells]:select-none", + cellRenderer: (params) => { + const type = params.data.path.split(".")[1]?.toLowerCase(); + return ( +
+ {params.data.progress !== undefined && + params.data.progress !== -1 ? ( +
+ {Math.round(params.data.progress * 100)}% +
+ ) : ( +
+ +
+ )} +
+ {params.value}.{type} +
+ {params.data.progress !== undefined && + params.data.progress === -1 ? ( + + Upload failed,{" "} + { + e.stopPropagation(); + if (params.data.file) { + uploadFileDirect({ file: params.data.file }); + } + }} + > + try again? + + + ) : ( + <> + )} +
+ ); + }, + }, + { + headerName: "Type", + field: "path", + flex: 1, + filter: "agTextColumnFilter", + editable: false, + valueFormatter: (params) => { + return params.value.split(".")[1]?.toUpperCase(); + }, + cellClass: + "text-muted-foreground cursor-text select-text group-[.no-select-cells]:cursor-default group-[.no-select-cells]:select-none", + }, + { + headerName: "Size", + field: "size", + flex: 1, + valueFormatter: (params) => { + return formatFileSize(params.value); + }, + editable: false, + cellClass: + "text-muted-foreground cursor-text select-text group-[.no-select-cells]:cursor-default group-[.no-select-cells]:select-none", + }, + { + headerName: "Modified", + field: "updated_at", + valueFormatter: (params) => { + return params.data.progress + ? "" + : new Date(params.value + "Z").toLocaleString(); + }, + editable: false, + flex: 1, + resizable: false, + cellClass: + "text-muted-foreground cursor-text select-text group-[.no-select-cells]:cursor-default group-[.no-select-cells]:select-none", + }, + { + maxWidth: 60, + editable: false, + resizable: false, + cellClass: "cursor-default", + cellRenderer: (params) => { + return ( +
+ {!params.data.progress && ( + + + + )} +
+ ); + }, + }, + ]; + + const onFileDrop = async (e: React.DragEvent) => { + e.preventDefault; + e.stopPropagation(); + const droppedFiles = Array.from(e.dataTransfer.files); + if (droppedFiles.length > 0) { + await handleUpload(droppedFiles); + } + }; + + const handleDownload = () => { + handleBulkDownload( + selectedFiles, + setSuccessData, + setErrorData, + setIsDownloading, + ); + }; + + const handleDelete = () => { + deleteFiles( + { + ids: selectedFiles.map((file) => file.id), + }, + { + onSuccess: (data) => { + setSuccessData({ title: data.message }); + setQuantitySelected(0); + setSelectedFiles([]); + }, + onError: (error) => { + setErrorData({ + title: "Error deleting files", + list: [ + error.message || "An error occurred while deleting the files", + ], + }); + }, + }, + ); + }; + + const UploadButtonComponent = useMemo(() => { + return ( + + + + ); + }, []); + + return ( +
+ {files && files.length !== 0 ? ( +
+
+ { + setQuickFilterText(event.target.value); + }} + /> +
+
{UploadButtonComponent}
+
+ ) : ( + <> + )} + +
+ {!files || !Array.isArray(files) ? ( +
+ +
+ ) : files.length > 0 ? ( + +
+ { + return sortByDate( + a.updated_at ?? a.created_at, + b.updated_at ?? b.created_at, + ); + })} + className={cn( + "ag-no-border group w-full", + isShiftPressed && quantitySelected > 0 && "no-select-cells", + )} + pagination + ref={tableRef} + quickFilterText={quickFilterText} + gridOptions={{ + stopEditingWhenCellsLoseFocus: true, + ensureDomOrder: true, + colResizeDefault: "shift", + }} + /> + +
0 ? "opacity-100" : "opacity-0", + )} + > +
0 + ? "pointer-events-auto" + : "pointer-events-none", + )} + > + + {quantitySelected} selected + +
+ + + 1 ? "s" : "")} + > + + +
+
+
+
+
+ ) : ( + +
+
+

No files

+

+ Upload files or import from your preferred cloud. +

+
+
+ {UploadButtonComponent} +
+
+
+ )} +
+
+ ); +}; + +export default FilesTab; diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBaseDrawer.tsx b/src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBaseDrawer.tsx new file mode 100644 index 000000000..3d55263f3 --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBaseDrawer.tsx @@ -0,0 +1,68 @@ +import ForwardedIconComponent from "@/components/common/genericIconComponent"; +import { Button } from "@/components/ui/button"; +import { Separator } from "@/components/ui/separator"; +import type { KnowledgeBaseInfo } from "@/controllers/API/queries/knowledge-bases/use-get-knowledge-bases"; + +interface KnowledgeBaseDrawerProps { + isOpen: boolean; + onClose: () => void; + knowledgeBase: KnowledgeBaseInfo | null; +} + +const KnowledgeBaseDrawer = ({ + isOpen, + onClose, + knowledgeBase, +}: KnowledgeBaseDrawerProps) => { + if (!isOpen || !knowledgeBase) { + return null; + } + + return ( +
+
+

{knowledgeBase.name}

+ +
+ +
+
+
+
+ No description available. +
+
+ + + +
+ +
+
+ {knowledgeBase.embedding_model || "Unknown"} +
+
+
+ +
+

Source Files

+
+ No source files available. +
+
+ +
+

Linked Flows

+
+ No linked flows available. +
+
+
+
+
+ ); +}; + +export default KnowledgeBaseDrawer; diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBaseEmptyState.tsx b/src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBaseEmptyState.tsx new file mode 100644 index 000000000..076101ecd --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBaseEmptyState.tsx @@ -0,0 +1,63 @@ +import { useParams } from "react-router-dom"; +import ForwardedIconComponent from "@/components/common/genericIconComponent"; +import { Button } from "@/components/ui/button"; +import { useCustomNavigate } from "@/customization/hooks/use-custom-navigate"; +import { track } from "@/customization/utils/analytics"; +import useAddFlow from "@/hooks/flows/use-add-flow"; +import useFlowsManagerStore from "@/stores/flowsManagerStore"; +import { useFolderStore } from "@/stores/foldersStore"; +import { updateIds } from "@/utils/reactflowUtils"; + +const KnowledgeBaseEmptyState = () => { + const examples = useFlowsManagerStore((state) => state.examples); + const addFlow = useAddFlow(); + const navigate = useCustomNavigate(); + const { folderId } = useParams(); + const myCollectionId = useFolderStore((state) => state.myCollectionId); + + const folderIdUrl = folderId ?? myCollectionId; + + const handleCreateKnowledge = async () => { + const knowledgeBasesExample = examples.find( + (example) => example.name === "Knowledge Ingestion", + ); + + if (knowledgeBasesExample && knowledgeBasesExample.data) { + updateIds(knowledgeBasesExample.data); + addFlow({ flow: knowledgeBasesExample }).then((id) => { + navigate(`/flow/${id}/folder/${folderIdUrl}`); + }); + track("New Flow Created", { + template: `${knowledgeBasesExample.name} Template`, + }); + } + }; + + return ( +
+
+

No knowledge bases

+

+ Create your first knowledge base to get started. +

+
+
+ +
+
+ ); +}; + +export default KnowledgeBaseEmptyState; diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBaseSelectionOverlay.tsx b/src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBaseSelectionOverlay.tsx new file mode 100644 index 000000000..95bcc4bb2 --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBaseSelectionOverlay.tsx @@ -0,0 +1,97 @@ +import ForwardedIconComponent from "@/components/common/genericIconComponent"; +import { Button } from "@/components/ui/button"; +import { useDeleteKnowledgeBases } from "@/controllers/API/queries/knowledge-bases/use-delete-knowledge-bases"; +import DeleteConfirmationModal from "@/modals/deleteConfirmationModal"; +import useAlertStore from "@/stores/alertStore"; +import { cn } from "@/utils/utils"; + +interface KnowledgeBaseSelectionOverlayProps { + selectedFiles: any[]; + quantitySelected: number; + onDelete?: () => void; + onClearSelection: () => void; +} + +const KnowledgeBaseSelectionOverlay = ({ + selectedFiles, + quantitySelected, + onDelete, + onClearSelection, +}: KnowledgeBaseSelectionOverlayProps) => { + const { setSuccessData, setErrorData } = useAlertStore((state) => ({ + setSuccessData: state.setSuccessData, + setErrorData: state.setErrorData, + })); + + const deleteMutation = useDeleteKnowledgeBases({ + onSuccess: (data) => { + setSuccessData({ + title: `${data.deleted_count} Knowledge Base(s) deleted successfully!`, + }); + onClearSelection(); + }, + onError: (error: any) => { + setErrorData({ + title: "Failed to delete knowledge bases", + list: [ + error?.response?.data?.detail || + error?.message || + "An unknown error occurred", + ], + }); + onClearSelection(); + }, + }); + + const handleBulkDelete = () => { + if (onDelete) { + onDelete(); + } else { + const knowledgeBaseIds = selectedFiles.map((file) => file.id); + if (knowledgeBaseIds.length > 0 && !deleteMutation.isPending) { + deleteMutation.mutate({ kb_names: knowledgeBaseIds }); + } + } + }; + + const isVisible = selectedFiles.length > 0; + const pluralSuffix = quantitySelected > 1 ? "s" : ""; + + return ( +
+
+ + {quantitySelected} selected + +
+ + + +
+
+
+ ); +}; + +export default KnowledgeBaseSelectionOverlay; diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBasesTab.tsx b/src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBasesTab.tsx new file mode 100644 index 000000000..b157004bd --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/filesPage/components/KnowledgeBasesTab.tsx @@ -0,0 +1,221 @@ +import type { + NewValueParams, + RowClickedEvent, + SelectionChangedEvent, +} from "ag-grid-community"; +import type { AgGridReact } from "ag-grid-react"; +import { useRef, useState } from "react"; +import TableComponent from "@/components/core/parameterRenderComponent/components/tableComponent"; +import { Input } from "@/components/ui/input"; +import Loading from "@/components/ui/loading"; +import { useDeleteKnowledgeBase } from "@/controllers/API/queries/knowledge-bases/use-delete-knowledge-base"; +import { + type KnowledgeBaseInfo, + useGetKnowledgeBases, +} from "@/controllers/API/queries/knowledge-bases/use-get-knowledge-bases"; +import DeleteConfirmationModal from "@/modals/deleteConfirmationModal"; +import useAlertStore from "@/stores/alertStore"; +import { cn } from "@/utils/utils"; +import { createKnowledgeBaseColumns } from "../config/knowledgeBaseColumns"; +import KnowledgeBaseEmptyState from "./KnowledgeBaseEmptyState"; +import KnowledgeBaseSelectionOverlay from "./KnowledgeBaseSelectionOverlay"; + +interface KnowledgeBasesTabProps { + quickFilterText: string; + setQuickFilterText: (text: string) => void; + selectedFiles: any[]; + setSelectedFiles: (files: any[]) => void; + quantitySelected: number; + setQuantitySelected: (quantity: number) => void; + isShiftPressed: boolean; + onRowClick?: (knowledgeBase: KnowledgeBaseInfo) => void; +} + +const KnowledgeBasesTab = ({ + quickFilterText, + setQuickFilterText, + selectedFiles, + setSelectedFiles, + quantitySelected, + setQuantitySelected, + isShiftPressed, + onRowClick, +}: KnowledgeBasesTabProps) => { + const tableRef = useRef>(null); + const { setErrorData, setSuccessData } = useAlertStore((state) => ({ + setErrorData: state.setErrorData, + setSuccessData: state.setSuccessData, + })); + + const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false); + const [knowledgeBaseToDelete, setKnowledgeBaseToDelete] = + useState(null); + + const { data: knowledgeBases, isLoading, error } = useGetKnowledgeBases(); + + const deleteKnowledgeBaseMutation = useDeleteKnowledgeBase( + { + kb_name: knowledgeBaseToDelete?.id || "", + }, + { + onSuccess: () => { + setSuccessData({ + title: `Knowledge Base "${knowledgeBaseToDelete?.name}" deleted successfully!`, + }); + resetDeleteState(); + }, + onError: (error: any) => { + setErrorData({ + title: "Failed to delete knowledge base", + list: [ + error?.response?.data?.detail || + error?.message || + "An unknown error occurred", + ], + }); + resetDeleteState(); + }, + }, + ); + + if (error) { + setErrorData({ + title: "Failed to load knowledge bases", + list: [error?.message || "An unknown error occurred"], + }); + } + + const resetDeleteState = () => { + setKnowledgeBaseToDelete(null); + setIsDeleteModalOpen(false); + }; + + const handleRename = (params: NewValueParams) => { + setSuccessData({ + title: "Knowledge Base renamed successfully!", + }); + }; + + const handleDelete = (knowledgeBase: KnowledgeBaseInfo) => { + setKnowledgeBaseToDelete(knowledgeBase); + setIsDeleteModalOpen(true); + }; + + const confirmDelete = () => { + if (knowledgeBaseToDelete && !deleteKnowledgeBaseMutation.isPending) { + deleteKnowledgeBaseMutation.mutate(); + } + }; + + const handleSelectionChange = (event: SelectionChangedEvent) => { + const selectedRows = event.api.getSelectedRows(); + setSelectedFiles(selectedRows); + if (selectedRows.length > 0) { + setQuantitySelected(selectedRows.length); + } else { + setTimeout(() => { + setQuantitySelected(0); + }, 300); + } + }; + + const clearSelection = () => { + setQuantitySelected(0); + setSelectedFiles([]); + }; + + const handleRowClick = (event: RowClickedEvent) => { + const clickedElement = event.event?.target as HTMLElement; + if (clickedElement && !clickedElement.closest("button") && onRowClick) { + onRowClick(event.data); + } + }; + + const columnDefs = createKnowledgeBaseColumns(handleRename, handleDelete); + + if (isLoading || !knowledgeBases || !Array.isArray(knowledgeBases)) { + return ( +
+ +
+ ); + } + + if (knowledgeBases.length === 0) { + return ; + } + + return ( +
+
+
+ setQuickFilterText(event.target.value)} + /> +
+
+ +
+
+ 0 && "no-select-cells", + )} + pagination + ref={tableRef} + quickFilterText={quickFilterText} + gridOptions={{ + stopEditingWhenCellsLoseFocus: true, + ensureDomOrder: true, + colResizeDefault: "shift", + }} + /> + + +
+
+ + + <> + +
+ ); +}; + +export default KnowledgeBasesTab; diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBaseDrawer.test.tsx b/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBaseDrawer.test.tsx new file mode 100644 index 000000000..a676efed8 --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBaseDrawer.test.tsx @@ -0,0 +1,163 @@ +import { fireEvent, render, screen } from "@testing-library/react"; +import React from "react"; + +// Mock the component to avoid complex dependency chains +jest.mock("../KnowledgeBaseDrawer", () => { + const MockKnowledgeBaseDrawer = ({ isOpen, onClose, knowledgeBase }: any) => { + if (!isOpen || !knowledgeBase) { + return null; + } + + return ( +
+
+

{knowledgeBase.name}

+ +
+
+
No description available.
+
+ +
{knowledgeBase.embedding_model || "Unknown"}
+
+
+

Source Files

+
No source files available.
+
+
+

Linked Flows

+
No linked flows available.
+
+
+
+ ); + }; + MockKnowledgeBaseDrawer.displayName = "KnowledgeBaseDrawer"; + return { + __esModule: true, + default: MockKnowledgeBaseDrawer, + }; +}); + +const KnowledgeBaseDrawer = require("../KnowledgeBaseDrawer").default; + +const mockKnowledgeBase = { + id: "kb-1", + name: "Test Knowledge Base", + embedding_provider: "OpenAI", + embedding_model: "text-embedding-ada-002", + size: 1024000, + words: 50000, + characters: 250000, + chunks: 100, + avg_chunk_size: 2500, +}; + +describe("KnowledgeBaseDrawer", () => { + const mockOnClose = jest.fn(); + + beforeEach(() => { + jest.clearAllMocks(); + }); + + it("renders nothing when isOpen is false", () => { + const { container } = render( + , + ); + + expect(container.firstChild).toBeNull(); + }); + + it("renders nothing when knowledgeBase is null", () => { + const { container } = render( + , + ); + + expect(container.firstChild).toBeNull(); + }); + + it("renders drawer when both isOpen is true and knowledgeBase is provided", () => { + render( + , + ); + + expect(screen.getByTestId("knowledge-base-drawer")).toBeInTheDocument(); + expect(screen.getByText("Test Knowledge Base")).toBeInTheDocument(); + }); + + it("calls onClose when close button is clicked", () => { + render( + , + ); + + const closeButton = screen.getByTestId("close-button"); + fireEvent.click(closeButton); + + expect(mockOnClose).toHaveBeenCalledTimes(1); + }); + + it("displays embedding model information", () => { + render( + , + ); + + expect(screen.getByText("Embedding Provider")).toBeInTheDocument(); + expect(screen.getByText("text-embedding-ada-002")).toBeInTheDocument(); + }); + + it("displays Unknown for missing embedding model", () => { + const kbWithoutModel = { + ...mockKnowledgeBase, + embedding_model: undefined, + }; + + render( + , + ); + + expect(screen.getByText("Unknown")).toBeInTheDocument(); + }); + + it("displays content sections", () => { + render( + , + ); + + expect(screen.getByText("No description available.")).toBeInTheDocument(); + expect(screen.getByText("Source Files")).toBeInTheDocument(); + expect(screen.getByText("Linked Flows")).toBeInTheDocument(); + }); +}); diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBaseEmptyState.test.tsx b/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBaseEmptyState.test.tsx new file mode 100644 index 000000000..b526a6393 --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBaseEmptyState.test.tsx @@ -0,0 +1,105 @@ +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import { fireEvent, render, screen, waitFor } from "@testing-library/react"; +import React from "react"; +import { BrowserRouter } from "react-router-dom"; + +// Mock all the dependencies to avoid complex imports +jest.mock("@/stores/flowsManagerStore", () => ({ + __esModule: true, + default: jest.fn(), +})); + +jest.mock("@/hooks/flows/use-add-flow", () => ({ + __esModule: true, + default: jest.fn(), +})); + +jest.mock("@/customization/hooks/use-custom-navigate", () => ({ + useCustomNavigate: jest.fn(), +})); + +jest.mock("@/stores/foldersStore", () => ({ + useFolderStore: jest.fn(), +})); + +jest.mock("@/customization/utils/analytics", () => ({ + track: jest.fn(), +})); + +jest.mock("@/utils/reactflowUtils", () => ({ + updateIds: jest.fn(), +})); + +// Mock the component itself to test in isolation +jest.mock("../KnowledgeBaseEmptyState", () => { + const MockKnowledgeBaseEmptyState = () => ( +
+

No knowledge bases

+

Create your first knowledge base to get started.

+ +
+ ); + MockKnowledgeBaseEmptyState.displayName = "KnowledgeBaseEmptyState"; + return { + __esModule: true, + default: MockKnowledgeBaseEmptyState, + }; +}); + +const KnowledgeBaseEmptyState = require("../KnowledgeBaseEmptyState").default; + +const createTestWrapper = () => { + const queryClient = new QueryClient({ + defaultOptions: { + queries: { retry: false }, + mutations: { retry: false }, + }, + }); + + return ({ children }: { children: React.ReactNode }) => ( + + {children} + + ); +}; + +describe("KnowledgeBaseEmptyState", () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it("renders empty state message correctly", () => { + render(, { wrapper: createTestWrapper() }); + + expect(screen.getByText("No knowledge bases")).toBeInTheDocument(); + expect( + screen.getByText("Create your first knowledge base to get started."), + ).toBeInTheDocument(); + }); + + it("renders create knowledge button", () => { + render(, { wrapper: createTestWrapper() }); + + const createButton = screen.getByTestId("create-knowledge-btn"); + expect(createButton).toBeInTheDocument(); + expect(createButton).toHaveTextContent("Create Knowledge"); + }); + + it("handles create knowledge button click", () => { + render(, { wrapper: createTestWrapper() }); + + const createButton = screen.getByTestId("create-knowledge-btn"); + fireEvent.click(createButton); + + // Since we're using a mock, we just verify the button is clickable + expect(createButton).toBeInTheDocument(); + }); + + it("renders with correct test id", () => { + render(, { wrapper: createTestWrapper() }); + + expect( + screen.getByTestId("knowledge-base-empty-state"), + ).toBeInTheDocument(); + }); +}); diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBaseSelectionOverlay.test.tsx b/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBaseSelectionOverlay.test.tsx new file mode 100644 index 000000000..857580e13 --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBaseSelectionOverlay.test.tsx @@ -0,0 +1,173 @@ +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import { fireEvent, render, screen } from "@testing-library/react"; +import React from "react"; + +// Mock the component to avoid complex dependency chains +jest.mock("../KnowledgeBaseSelectionOverlay", () => { + const MockKnowledgeBaseSelectionOverlay = ({ + selectedFiles, + quantitySelected, + onClearSelection, + onDelete, + }: any) => { + const isVisible = selectedFiles.length > 0; + const pluralSuffix = quantitySelected > 1 ? "s" : ""; + + const handleDelete = () => { + if (onDelete) { + onDelete(); + } + }; + + return ( +
+ {quantitySelected} selected + + + + knowledge base{pluralSuffix} + +
+ ); + }; + MockKnowledgeBaseSelectionOverlay.displayName = + "KnowledgeBaseSelectionOverlay"; + return { + __esModule: true, + default: MockKnowledgeBaseSelectionOverlay, + }; +}); + +const KnowledgeBaseSelectionOverlay = + require("../KnowledgeBaseSelectionOverlay").default; + +const createTestWrapper = () => { + const queryClient = new QueryClient({ + defaultOptions: { + queries: { retry: false }, + mutations: { retry: false }, + }, + }); + + return ({ children }: { children: React.ReactNode }) => ( + {children} + ); +}; + +const mockSelectedFiles = [ + { id: "kb-1", name: "Knowledge Base 1" }, + { id: "kb-2", name: "Knowledge Base 2" }, +]; + +describe("KnowledgeBaseSelectionOverlay", () => { + const mockOnClearSelection = jest.fn(); + const mockOnDelete = jest.fn(); + + beforeEach(() => { + jest.clearAllMocks(); + }); + + it("renders as invisible when no files are selected", () => { + render( + , + { wrapper: createTestWrapper() }, + ); + + const overlay = screen.getByTestId("selection-overlay"); + expect(overlay).toHaveClass("opacity-0"); + }); + + it("renders as visible when files are selected", () => { + render( + , + { wrapper: createTestWrapper() }, + ); + + const overlay = screen.getByTestId("selection-overlay"); + expect(overlay).toHaveClass("opacity-100"); + }); + + it("displays correct selection count for single item", () => { + render( + , + { wrapper: createTestWrapper() }, + ); + + expect(screen.getByTestId("selection-count")).toHaveTextContent( + "1 selected", + ); + expect(screen.getByTestId("delete-description")).toHaveTextContent( + "knowledge base", + ); + }); + + it("displays correct selection count for multiple items", () => { + render( + , + { wrapper: createTestWrapper() }, + ); + + expect(screen.getByTestId("selection-count")).toHaveTextContent( + "2 selected", + ); + expect(screen.getByTestId("delete-description")).toHaveTextContent( + "knowledge bases", + ); + }); + + it("calls custom onDelete when provided", () => { + render( + , + { wrapper: createTestWrapper() }, + ); + + const deleteButton = screen.getByTestId("bulk-delete-kb-btn"); + fireEvent.click(deleteButton); + + expect(mockOnDelete).toHaveBeenCalledTimes(1); + }); + + it("calls onClearSelection when clear button is clicked", () => { + render( + , + { wrapper: createTestWrapper() }, + ); + + const clearButton = screen.getByTestId("clear-selection-btn"); + fireEvent.click(clearButton); + + expect(mockOnClearSelection).toHaveBeenCalledTimes(1); + }); +}); diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBasesTab.test.tsx b/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBasesTab.test.tsx new file mode 100644 index 000000000..957390596 --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/KnowledgeBasesTab.test.tsx @@ -0,0 +1,170 @@ +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import { fireEvent, render, screen } from "@testing-library/react"; +import React from "react"; + +// Mock the component to avoid complex dependencies +jest.mock("../KnowledgeBasesTab", () => { + const MockKnowledgeBasesTab = ({ + quickFilterText, + setQuickFilterText, + selectedFiles, + quantitySelected, + isShiftPressed, + onRowClick, + }: any) => ( +
+ setQuickFilterText?.(e.target.value)} + /> +
+
Mock Table
+
+ {selectedFiles?.length || 0} selected +
+
+ {isShiftPressed ? "Shift pressed" : "No shift"} +
+ {onRowClick && ( + + )} +
+
+ ); + MockKnowledgeBasesTab.displayName = "KnowledgeBasesTab"; + return { + __esModule: true, + default: MockKnowledgeBasesTab, + }; +}); + +const KnowledgeBasesTab = require("../KnowledgeBasesTab").default; + +const createTestWrapper = () => { + const queryClient = new QueryClient({ + defaultOptions: { + queries: { retry: false }, + mutations: { retry: false }, + }, + }); + + return ({ children }: { children: React.ReactNode }) => ( + {children} + ); +}; + +const defaultProps = { + quickFilterText: "", + setQuickFilterText: jest.fn(), + selectedFiles: [], + setSelectedFiles: jest.fn(), + quantitySelected: 0, + setQuantitySelected: jest.fn(), + isShiftPressed: false, + onRowClick: jest.fn(), +}; + +describe("KnowledgeBasesTab", () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it("renders search input with correct placeholder", () => { + render(, { + wrapper: createTestWrapper(), + }); + + const searchInput = screen.getByTestId("search-kb-input"); + expect(searchInput).toBeInTheDocument(); + expect(searchInput).toHaveAttribute( + "placeholder", + "Search knowledge bases...", + ); + }); + + it("handles search input changes", () => { + const mockSetQuickFilterText = jest.fn(); + render( + , + { wrapper: createTestWrapper() }, + ); + + const searchInput = screen.getByTestId("search-kb-input"); + fireEvent.change(searchInput, { target: { value: "test search" } }); + + expect(mockSetQuickFilterText).toHaveBeenCalledWith("test search"); + }); + + it("displays search value in input", () => { + render( + , + { wrapper: createTestWrapper() }, + ); + + const searchInput = screen.getByTestId( + "search-kb-input", + ) as HTMLInputElement; + expect(searchInput.value).toBe("existing search"); + }); + + it("displays selected count", () => { + const selectedFiles = [{ id: "kb-1" }, { id: "kb-2" }]; + render( + , + { wrapper: createTestWrapper() }, + ); + + expect(screen.getByTestId("selected-count")).toHaveTextContent( + "2 selected", + ); + }); + + it("displays shift key state", () => { + render(, { + wrapper: createTestWrapper(), + }); + + expect(screen.getByTestId("shift-pressed")).toHaveTextContent( + "Shift pressed", + ); + }); + + it("calls onRowClick when provided", () => { + const mockOnRowClick = jest.fn(); + render( + , + { wrapper: createTestWrapper() }, + ); + + const rowButton = screen.getByTestId("mock-row-click"); + fireEvent.click(rowButton); + + expect(mockOnRowClick).toHaveBeenCalledWith({ + id: "kb-1", + name: "Test KB", + }); + }); + + it("renders table content", () => { + render(, { + wrapper: createTestWrapper(), + }); + + expect(screen.getByTestId("table-content")).toBeInTheDocument(); + expect(screen.getByText("Mock Table")).toBeInTheDocument(); + }); +}); diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/test-utils.tsx b/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/test-utils.tsx new file mode 100644 index 000000000..ddb0ae905 --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/filesPage/components/__tests__/test-utils.tsx @@ -0,0 +1,126 @@ +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import React from "react"; +import { BrowserRouter } from "react-router-dom"; +import type { KnowledgeBaseInfo } from "@/controllers/API/queries/knowledge-bases/use-get-knowledge-bases"; + +/** + * Creates a test wrapper with React Query and Router providers + */ +export const createTestWrapper = () => { + const queryClient = new QueryClient({ + defaultOptions: { + queries: { retry: false }, + mutations: { retry: false }, + }, + }); + + return ({ children }: { children: React.ReactNode }) => ( + + {children} + + ); +}; + +/** + * Mock knowledge base data for testing + */ +export const mockKnowledgeBase: KnowledgeBaseInfo = { + id: "kb-1", + name: "Test Knowledge Base", + embedding_provider: "OpenAI", + embedding_model: "text-embedding-ada-002", + size: 1024000, + words: 50000, + characters: 250000, + chunks: 100, + avg_chunk_size: 2500, +}; + +export const mockKnowledgeBaseList: KnowledgeBaseInfo[] = [ + mockKnowledgeBase, + { + id: "kb-2", + name: "Second Knowledge Base", + embedding_provider: "Anthropic", + embedding_model: "claude-embedding", + size: 2048000, + words: 75000, + characters: 400000, + chunks: 150, + avg_chunk_size: 2666, + }, + { + id: "kb-3", + name: "Third Knowledge Base", + embedding_model: undefined, // Test case for missing embedding model + size: 512000, + words: 25000, + characters: 125000, + chunks: 50, + avg_chunk_size: 2500, + }, +]; + +/** + * Mock ForwardedIconComponent for consistent testing + */ +export const mockIconComponent = () => { + jest.mock("@/components/common/genericIconComponent", () => { + const MockedIcon = ({ + name, + ...props + }: { + name: string; + [key: string]: any; + }) => ; + MockedIcon.displayName = "ForwardedIconComponent"; + return MockedIcon; + }); +}; + +/** + * Mock TableComponent for testing components that use ag-grid + */ +export const mockTableComponent = () => { + jest.mock( + "@/components/core/parameterRenderComponent/components/tableComponent", + () => { + const MockTable = (props: any) => ( +
+
Mock Table
+
+ ); + MockTable.displayName = "TableComponent"; + return MockTable; + }, + ); +}; + +/** + * Common alert store mock setup + */ +export const setupAlertStoreMock = () => { + const mockSetSuccessData = jest.fn(); + const mockSetErrorData = jest.fn(); + + return { + mockSetSuccessData, + mockSetErrorData, + mockAlertStore: { + setSuccessData: mockSetSuccessData, + setErrorData: mockSetErrorData, + }, + }; +}; + +/** + * Mock react-router-dom useParams hook + */ +export const mockUseParams = ( + params: Record = {}, +) => { + jest.doMock("react-router-dom", () => ({ + ...jest.requireActual("react-router-dom"), + useParams: () => params, + })); +}; diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/config/knowledgeBaseColumns.tsx b/src/frontend/src/pages/MainPage/pages/filesPage/config/knowledgeBaseColumns.tsx new file mode 100644 index 000000000..1cdb5e924 --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/filesPage/config/knowledgeBaseColumns.tsx @@ -0,0 +1,115 @@ +import type { ColDef, NewValueParams } from "ag-grid-community"; +import ForwardedIconComponent from "@/components/common/genericIconComponent"; +import { Button } from "@/components/ui/button"; +import { formatFileSize } from "@/utils/stringManipulation"; +import { + formatAverageChunkSize, + formatNumber, +} from "../utils/knowledgeBaseUtils"; + +export const createKnowledgeBaseColumns = ( + onRename?: (params: NewValueParams) => void, + onDelete?: (knowledgeBase: any) => void, +): ColDef[] => { + const baseCellClass = + "text-muted-foreground cursor-pointer select-text group-[.no-select-cells]:cursor-default group-[.no-select-cells]:select-none"; + + return [ + { + headerName: "Name", + field: "name", + flex: 2, + headerCheckboxSelection: true, + checkboxSelection: true, + editable: true, + filter: "agTextColumnFilter", + cellClass: baseCellClass, + cellRenderer: (params) => ( +
+
+
{params.value}
+
+
+ ), + }, + { + headerName: "Embedding Model", + field: "embedding_provider", + flex: 1.2, + filter: "agTextColumnFilter", + editable: false, + cellClass: baseCellClass, + tooltipValueGetter: (params) => params.data.embedding_model || "Unknown", + valueGetter: (params) => params.data.embedding_model || "Unknown", + }, + { + headerName: "Size", + field: "size", + flex: 0.8, + valueFormatter: (params) => formatFileSize(params.value), + editable: false, + cellClass: baseCellClass, + }, + { + headerName: "Words", + field: "words", + flex: 0.8, + editable: false, + cellClass: baseCellClass, + valueFormatter: (params) => formatNumber(params.value), + }, + { + headerName: "Characters", + field: "characters", + flex: 1, + editable: false, + cellClass: baseCellClass, + valueFormatter: (params) => formatNumber(params.value), + }, + { + headerName: "Chunks", + field: "chunks", + flex: 0.7, + editable: false, + cellClass: baseCellClass, + valueFormatter: (params) => formatNumber(params.value), + }, + { + headerName: "Avg Chunks", + field: "avg_chunk_size", + flex: 1, + editable: false, + cellClass: baseCellClass, + valueFormatter: (params) => formatAverageChunkSize(params.value), + }, + { + maxWidth: 60, + editable: false, + resizable: false, + cellClass: "cursor-default", + cellRenderer: (params) => { + const handleDeleteClick = () => { + if (onDelete) { + onDelete(params.data); + } + }; + + return ( +
+ +
+ ); + }, + }, + ]; +}; diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/index.tsx b/src/frontend/src/pages/MainPage/pages/filesPage/index.tsx index 4151d9e6a..963ae02ec 100644 --- a/src/frontend/src/pages/MainPage/pages/filesPage/index.tsx +++ b/src/frontend/src/pages/MainPage/pages/filesPage/index.tsx @@ -1,43 +1,13 @@ -import type { - ColDef, - NewValueParams, - SelectionChangedEvent, -} from "ag-grid-community"; -import type { AgGridReact } from "ag-grid-react"; -import { useEffect, useMemo, useRef, useState } from "react"; +import { useEffect, useState } from "react"; import ForwardedIconComponent from "@/components/common/genericIconComponent"; -import ShadTooltip from "@/components/common/shadTooltipComponent"; -import CardsWrapComponent from "@/components/core/cardsWrapComponent"; -import TableComponent from "@/components/core/parameterRenderComponent/components/tableComponent"; -import { Button } from "@/components/ui/button"; -import { Input } from "@/components/ui/input"; -import Loading from "@/components/ui/loading"; import { SidebarTrigger } from "@/components/ui/sidebar"; -import { useGetFilesV2 } from "@/controllers/API/queries/file-management"; -import { useDeleteFilesV2 } from "@/controllers/API/queries/file-management/use-delete-files"; -import { usePostRenameFileV2 } from "@/controllers/API/queries/file-management/use-put-rename-file"; -import { useCustomHandleBulkFilesDownload } from "@/customization/hooks/use-custom-handle-bulk-files-download"; -import { customPostUploadFileV2 } from "@/customization/hooks/use-custom-post-upload-file"; -import useUploadFile from "@/hooks/files/use-upload-file"; -import DeleteConfirmationModal from "@/modals/deleteConfirmationModal"; -import FilesContextMenuComponent from "@/modals/fileManagerModal/components/filesContextMenuComponent"; -import useAlertStore from "@/stores/alertStore"; -import { formatFileSize } from "@/utils/stringManipulation"; -import { FILE_ICONS } from "@/utils/styleUtils"; -import { cn } from "@/utils/utils"; -import { sortByDate } from "../../utils/sort-flows"; -import DragWrapComponent from "./components/dragWrapComponent"; +import FilesTab from "./components/FilesTab"; export const FilesPage = () => { - const tableRef = useRef>(null); - const { data: files } = useGetFilesV2(); - const setErrorData = useAlertStore((state) => state.setErrorData); - const setSuccessData = useAlertStore((state) => state.setSuccessData); - const [selectedFiles, setSelectedFiles] = useState([]); const [quantitySelected, setQuantitySelected] = useState(0); const [isShiftPressed, setIsShiftPressed] = useState(false); - const [isDownloading, setIsDownloading] = useState(false); + const [quickFilterText, setQuickFilterText] = useState(""); useEffect(() => { const handleKeyDown = (e: KeyboardEvent) => { @@ -61,260 +31,16 @@ export const FilesPage = () => { }; }, []); - const handleSelectionChanged = (event: SelectionChangedEvent) => { - const selectedRows = event.api.getSelectedRows(); - setSelectedFiles(selectedRows); - if (selectedRows.length > 0) { - setQuantitySelected(selectedRows.length); - } else { - setTimeout(() => { - setQuantitySelected(0); - }, 300); - } + const tabProps = { + quickFilterText, + setQuickFilterText, + selectedFiles, + setSelectedFiles, + quantitySelected, + setQuantitySelected, + isShiftPressed, }; - const { mutate: rename } = usePostRenameFileV2(); - - const { mutate: deleteFiles, isPending: isDeleting } = useDeleteFilesV2(); - const { handleBulkDownload } = useCustomHandleBulkFilesDownload(); - - const handleRename = (params: NewValueParams) => { - rename({ - id: params.data.id, - name: params.newValue, - }); - }; - - const handleOpenRename = (id: string, name: string) => { - if (tableRef.current) { - tableRef.current.api.startEditingCell({ - rowIndex: files?.findIndex((file) => file.id === id) ?? 0, - colKey: "name", - }); - } - }; - - const uploadFile = useUploadFile({ multiple: true }); - - const handleUpload = async (files?: File[]) => { - try { - const filesIds = await uploadFile({ - files: files, - }); - setSuccessData({ - title: `File${filesIds.length > 1 ? "s" : ""} uploaded successfully`, - }); - } catch (error: any) { - setErrorData({ - title: "Error uploading file", - list: [error.message || "An error occurred while uploading the file"], - }); - } - }; - - const { mutate: uploadFileDirect } = customPostUploadFileV2(); - - useEffect(() => { - if (files) { - setQuantitySelected(0); - setSelectedFiles([]); - } - }, [files]); - - const colDefs: ColDef[] = [ - { - headerName: "Name", - field: "name", - flex: 2, - headerCheckboxSelection: true, - checkboxSelection: true, - editable: true, - filter: "agTextColumnFilter", - cellClass: - "cursor-text select-text group-[.no-select-cells]:cursor-default group-[.no-select-cells]:select-none", - cellRenderer: (params) => { - const type = params.data.path.split(".")[1]?.toLowerCase(); - return ( -
- {params.data.progress !== undefined && - params.data.progress !== -1 ? ( -
- {Math.round(params.data.progress * 100)}% -
- ) : ( -
- -
- )} -
- {params.value}.{type} -
- {params.data.progress !== undefined && - params.data.progress === -1 ? ( - - Upload failed,{" "} - { - e.stopPropagation(); - if (params.data.file) { - uploadFileDirect({ file: params.data.file }); - } - }} - > - try again? - - - ) : ( - <> - )} -
- ); - }, //This column will be twice as wide as the others - }, //This column will be twice as wide as the others - { - headerName: "Type", - field: "path", - flex: 1, - filter: "agTextColumnFilter", - editable: false, - valueFormatter: (params) => { - return params.value.split(".")[1]?.toUpperCase(); - }, - cellClass: - "text-muted-foreground cursor-text select-text group-[.no-select-cells]:cursor-default group-[.no-select-cells]:select-none", - }, - { - headerName: "Size", - field: "size", - flex: 1, - valueFormatter: (params) => { - return formatFileSize(params.value); - }, - editable: false, - cellClass: - "text-muted-foreground cursor-text select-text group-[.no-select-cells]:cursor-default group-[.no-select-cells]:select-none", - }, - { - headerName: "Modified", - field: "updated_at", - valueFormatter: (params) => { - return params.data.progress - ? "" - : new Date(params.value + "Z").toLocaleString(); - }, - editable: false, - flex: 1, - resizable: false, - cellClass: - "text-muted-foreground cursor-text select-text group-[.no-select-cells]:cursor-default group-[.no-select-cells]:select-none", - }, - { - maxWidth: 60, - editable: false, - resizable: false, - cellClass: "cursor-default", - cellRenderer: (params) => { - return ( -
- {!params.data.progress && ( - - - - )} -
- ); - }, - }, - ]; - - const onFileDrop = async (e: React.DragEvent) => { - e.preventDefault; - e.stopPropagation(); - const droppedFiles = Array.from(e.dataTransfer.files); - if (droppedFiles.length > 0) { - await handleUpload(droppedFiles); - } - }; - - const handleDownload = () => { - handleBulkDownload( - selectedFiles, - setSuccessData, - setErrorData, - setIsDownloading, - ); - }; - - const handleDelete = () => { - deleteFiles( - { - ids: selectedFiles.map((file) => file.id), - }, - { - onSuccess: (data) => { - setSuccessData({ title: data.message }); - setQuantitySelected(0); - setSelectedFiles([]); - }, - onError: (error) => { - setErrorData({ - title: "Error deleting files", - list: [ - error.message || "An error occurred while deleting the files", - ], - }); - }, - }, - ); - }; - - const UploadButtonComponent = useMemo(() => { - return ( - - - - ); - }, [uploadFile]); - - const [quickFilterText, setQuickFilterText] = useState(""); - return (
{
- My Files + Files - {files && files.length !== 0 ? ( -
-
- { - setQuickFilterText(event.target.value); - }} - /> -
-
- {UploadButtonComponent} - {/* */} -
-
- ) : ( - <> - )} - -
- {!files || !Array.isArray(files) ? ( -
- -
- ) : files.length > 0 ? ( - -
- { - return sortByDate( - a.updated_at ?? a.created_at, - b.updated_at ?? b.created_at, - ); - })} - className={cn( - "ag-no-border group w-full", - isShiftPressed && - quantitySelected > 0 && - "no-select-cells", - )} - pagination - ref={tableRef} - quickFilterText={quickFilterText} - gridOptions={{ - stopEditingWhenCellsLoseFocus: true, - ensureDomOrder: true, - colResizeDefault: "shift", - }} - /> - -
0 ? "opacity-100" : "opacity-0", - )} - > -
0 - ? "pointer-events-auto" - : "pointer-events-none", - )} - > - - {quantitySelected} selected - -
- - - 1 ? "s" : "") - } - > - - -
-
-
-
-
- ) : ( - -
-
-

No files

-

- Upload files or import from your preferred cloud. -

-
-
- {UploadButtonComponent} - {/* */} -
-
-
- )} +
+
diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/utils/__tests__/knowledgeBaseUtils.test.ts b/src/frontend/src/pages/MainPage/pages/filesPage/utils/__tests__/knowledgeBaseUtils.test.ts new file mode 100644 index 000000000..addcc1a85 --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/filesPage/utils/__tests__/knowledgeBaseUtils.test.ts @@ -0,0 +1,73 @@ +import { formatAverageChunkSize, formatNumber } from "../knowledgeBaseUtils"; + +describe("knowledgeBaseUtils", () => { + describe("formatNumber", () => { + it("formats numbers with commas for thousands", () => { + expect(formatNumber(1000)).toBe("1,000"); + expect(formatNumber(1500)).toBe("1,500"); + expect(formatNumber(10000)).toBe("10,000"); + expect(formatNumber(100000)).toBe("100,000"); + expect(formatNumber(1000000)).toBe("1,000,000"); + }); + + it("handles numbers less than 1000 without commas", () => { + expect(formatNumber(0)).toBe("0"); + expect(formatNumber(1)).toBe("1"); + expect(formatNumber(99)).toBe("99"); + expect(formatNumber(999)).toBe("999"); + }); + + it("handles negative numbers", () => { + expect(formatNumber(-1000)).toBe("-1,000"); + expect(formatNumber(-1500)).toBe("-1,500"); + expect(formatNumber(-999)).toBe("-999"); + }); + + it("handles decimal numbers by displaying them with decimals", () => { + expect(formatNumber(1000.5)).toBe("1,000.5"); + expect(formatNumber(1999.9)).toBe("1,999.9"); + expect(formatNumber(999.1)).toBe("999.1"); + }); + + it("handles very large numbers", () => { + expect(formatNumber(1234567890)).toBe("1,234,567,890"); + expect(formatNumber(987654321)).toBe("987,654,321"); + }); + }); + + describe("formatAverageChunkSize", () => { + it("formats average chunk size by rounding and formatting", () => { + expect(formatAverageChunkSize(1000.4)).toBe("1,000"); + expect(formatAverageChunkSize(1000.6)).toBe("1,001"); + expect(formatAverageChunkSize(2500)).toBe("2,500"); + expect(formatAverageChunkSize(999.9)).toBe("1,000"); + }); + + it("handles small decimal values", () => { + expect(formatAverageChunkSize(1.2)).toBe("1"); + expect(formatAverageChunkSize(1.6)).toBe("2"); + expect(formatAverageChunkSize(0.4)).toBe("0"); + expect(formatAverageChunkSize(0.6)).toBe("1"); + }); + + it("handles zero and negative values", () => { + expect(formatAverageChunkSize(0)).toBe("0"); + expect(formatAverageChunkSize(-5.5)).toBe("-5"); + expect(formatAverageChunkSize(-1000.4)).toBe("-1,000"); + }); + + it("handles large decimal values", () => { + expect(formatAverageChunkSize(123456.7)).toBe("123,457"); + expect(formatAverageChunkSize(999999.1)).toBe("999,999"); + expect(formatAverageChunkSize(999999.9)).toBe("1,000,000"); + }); + + it("handles edge cases", () => { + expect(formatAverageChunkSize(0.5)).toBe("1"); + expect(formatAverageChunkSize(-0.5)).toBe("-0"); + expect(formatAverageChunkSize(Number.MAX_SAFE_INTEGER)).toBe( + "9,007,199,254,740,991", + ); + }); + }); +}); diff --git a/src/frontend/src/pages/MainPage/pages/filesPage/utils/knowledgeBaseUtils.ts b/src/frontend/src/pages/MainPage/pages/filesPage/utils/knowledgeBaseUtils.ts new file mode 100644 index 000000000..4a70cb282 --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/filesPage/utils/knowledgeBaseUtils.ts @@ -0,0 +1,13 @@ +/** + * Helper function to format numbers with commas + */ +export const formatNumber = (num: number): string => { + return new Intl.NumberFormat().format(num); +}; + +/** + * Format average chunk size with units + */ +export const formatAverageChunkSize = (avgChunkSize: number): string => { + return `${formatNumber(Math.round(avgChunkSize))}`; +}; diff --git a/src/frontend/src/pages/MainPage/pages/knowledgePage/__tests__/KnowledgePage.test.tsx b/src/frontend/src/pages/MainPage/pages/knowledgePage/__tests__/KnowledgePage.test.tsx new file mode 100644 index 000000000..bed1859fd --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/knowledgePage/__tests__/KnowledgePage.test.tsx @@ -0,0 +1,244 @@ +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import { fireEvent, render, screen, waitFor } from "@testing-library/react"; +import React from "react"; +import { BrowserRouter } from "react-router-dom"; + +// Mock the KnowledgePage component to test in isolation +jest.mock("../index", () => { + const MockKnowledgePage = () => { + const [isShiftPressed, setIsShiftPressed] = React.useState(false); + const [isDrawerOpen, setIsDrawerOpen] = React.useState(false); + const [selectedKnowledgeBase, setSelectedKnowledgeBase] = + React.useState(null); + + React.useEffect(() => { + const handleKeyDown = (e: KeyboardEvent) => { + if (e.key === "Shift") { + setIsShiftPressed(true); + } + }; + + const handleKeyUp = (e: KeyboardEvent) => { + if (e.key === "Shift") { + setIsShiftPressed(false); + } + }; + + window.addEventListener("keydown", handleKeyDown); + window.addEventListener("keyup", handleKeyUp); + + return () => { + window.removeEventListener("keydown", handleKeyDown); + window.removeEventListener("keyup", handleKeyUp); + }; + }, []); + + const handleRowClick = (knowledgeBase: any) => { + setSelectedKnowledgeBase(knowledgeBase); + setIsDrawerOpen(true); + }; + + const closeDrawer = () => { + setIsDrawerOpen(false); + setSelectedKnowledgeBase(null); + }; + + return ( +
+
+
+
+
+
+ + Knowledge +
+
+
+
Quick Filter:
+
Selected Files: 0
+
Quantity Selected: 0
+
Shift Pressed: {isShiftPressed ? "Yes" : "No"}
+ +
+
+
+
+
+
+ + {isDrawerOpen && ( +
+
+
Drawer Open: Yes
+
Knowledge Base: {selectedKnowledgeBase?.name || "None"}
+ +
+
+ )} + + {!isDrawerOpen && ( +
+
Drawer Open: No
+
Knowledge Base: None
+
+ )} +
+ ); + }; + MockKnowledgePage.displayName = "KnowledgePage"; + return { + KnowledgePage: MockKnowledgePage, + }; +}); + +const { KnowledgePage } = require("../index"); + +const createTestWrapper = () => { + const queryClient = new QueryClient({ + defaultOptions: { + queries: { retry: false }, + mutations: { retry: false }, + }, + }); + + return ({ children }: { children: React.ReactNode }) => ( + + {children} + + ); +}; + +describe("KnowledgePage", () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it("renders page title correctly", () => { + render(, { wrapper: createTestWrapper() }); + + expect(screen.getByTestId("mainpage_title")).toBeInTheDocument(); + expect(screen.getByText("Knowledge")).toBeInTheDocument(); + }); + + it("renders sidebar trigger", () => { + render(, { wrapper: createTestWrapper() }); + + expect(screen.getByTestId("sidebar-trigger")).toBeInTheDocument(); + expect(screen.getByTestId("icon-PanelLeftOpen")).toBeInTheDocument(); + }); + + it("handles shift key press and release", async () => { + render(, { wrapper: createTestWrapper() }); + + // Initially shift is not pressed + expect(screen.getByText("Shift Pressed: No")).toBeInTheDocument(); + + // Simulate shift key down + fireEvent.keyDown(window, { key: "Shift" }); + + await waitFor(() => { + expect(screen.getByText("Shift Pressed: Yes")).toBeInTheDocument(); + }); + + // Simulate shift key up + fireEvent.keyUp(window, { key: "Shift" }); + + await waitFor(() => { + expect(screen.getByText("Shift Pressed: No")).toBeInTheDocument(); + }); + }); + + it("ignores non-shift key events", async () => { + render(, { wrapper: createTestWrapper() }); + + expect(screen.getByText("Shift Pressed: No")).toBeInTheDocument(); + + // Simulate other key events + fireEvent.keyDown(window, { key: "Enter" }); + fireEvent.keyUp(window, { key: "Enter" }); + + // Should still be false + expect(screen.getByText("Shift Pressed: No")).toBeInTheDocument(); + }); + + it("initializes with drawer closed", () => { + render(, { wrapper: createTestWrapper() }); + + expect(screen.getByText("Drawer Open: No")).toBeInTheDocument(); + expect(screen.getByText("Knowledge Base: None")).toBeInTheDocument(); + }); + + it("opens drawer when row is clicked", async () => { + render(, { wrapper: createTestWrapper() }); + + // Initially drawer is closed + expect(screen.getByText("Drawer Open: No")).toBeInTheDocument(); + + // Click on a row + const rowClickButton = screen.getByTestId("mock-row-click"); + fireEvent.click(rowClickButton); + + await waitFor(() => { + expect(screen.getByText("Drawer Open: Yes")).toBeInTheDocument(); + expect( + screen.getByText("Knowledge Base: Test Knowledge Base"), + ).toBeInTheDocument(); + }); + }); + + it("closes drawer when close button is clicked", async () => { + render(, { wrapper: createTestWrapper() }); + + // First open the drawer + const rowClickButton = screen.getByTestId("mock-row-click"); + fireEvent.click(rowClickButton); + + await waitFor(() => { + expect(screen.getByText("Drawer Open: Yes")).toBeInTheDocument(); + }); + + // Now close the drawer + const closeButton = screen.getByTestId("drawer-close"); + fireEvent.click(closeButton); + + await waitFor(() => { + expect(screen.getByText("Drawer Open: No")).toBeInTheDocument(); + expect(screen.getByText("Knowledge Base: None")).toBeInTheDocument(); + }); + }); + + it("adjusts layout when drawer is open", async () => { + render(, { wrapper: createTestWrapper() }); + + const contentContainer = screen.getByTestId("cards-wrapper") + .firstChild as HTMLElement; + + // Initially no margin adjustment + expect(contentContainer).not.toHaveClass("mr-80"); + + // Open drawer + const rowClickButton = screen.getByTestId("mock-row-click"); + fireEvent.click(rowClickButton); + + await waitFor(() => { + expect(contentContainer).toHaveClass("mr-80"); + }); + }); +}); diff --git a/src/frontend/src/pages/MainPage/pages/knowledgePage/index.tsx b/src/frontend/src/pages/MainPage/pages/knowledgePage/index.tsx new file mode 100644 index 000000000..1c27e9786 --- /dev/null +++ b/src/frontend/src/pages/MainPage/pages/knowledgePage/index.tsx @@ -0,0 +1,143 @@ +import { useEffect, useRef, useState } from "react"; +import ForwardedIconComponent from "@/components/common/genericIconComponent"; +import { SidebarTrigger } from "@/components/ui/sidebar"; +import type { KnowledgeBaseInfo } from "@/controllers/API/queries/knowledge-bases/use-get-knowledge-bases"; +import KnowledgeBaseDrawer from "../filesPage/components/KnowledgeBaseDrawer"; +import KnowledgeBasesTab from "../filesPage/components/KnowledgeBasesTab"; + +export const KnowledgePage = () => { + const [selectedKnowledgeBases, setSelectedKnowledgeBases] = useState( + [], + ); + const [selectionCount, setSelectionCount] = useState(0); + const [isShiftPressed, setIsShiftPressed] = useState(false); + const [searchText, setSearchText] = useState(""); + const [isDrawerOpen, setIsDrawerOpen] = useState(false); + const [selectedKnowledgeBase, setSelectedKnowledgeBase] = + useState(null); + + const drawerRef = useRef(null); + + useEffect(() => { + const handleKeyDown = (e: KeyboardEvent) => { + if (e.key === "Shift") { + setIsShiftPressed(true); + } + }; + + const handleKeyUp = (e: KeyboardEvent) => { + if (e.key === "Shift") { + setIsShiftPressed(false); + } + }; + + window.addEventListener("keydown", handleKeyDown); + window.addEventListener("keyup", handleKeyUp); + + return () => { + window.removeEventListener("keydown", handleKeyDown); + window.removeEventListener("keyup", handleKeyUp); + }; + }, []); + + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if ( + isDrawerOpen && + drawerRef.current && + !drawerRef.current.contains(event.target as Node) + ) { + const clickedElement = event.target as HTMLElement; + const isTableRowClick = clickedElement.closest(".ag-row"); + + if (!isTableRowClick) { + closeDrawer(); + } + } + }; + + if (isDrawerOpen) { + document.addEventListener("mousedown", handleClickOutside); + } + + return () => { + document.removeEventListener("mousedown", handleClickOutside); + }; + }, [isDrawerOpen]); + + const handleKnowledgeBaseSelect = (knowledgeBase: KnowledgeBaseInfo) => { + if (isDrawerOpen) { + closeDrawer(); + } else { + setSelectedKnowledgeBase(knowledgeBase); + // setIsDrawerOpen(true); + } + }; + + const closeDrawer = () => { + setIsDrawerOpen(false); + setSelectedKnowledgeBase(null); + }; + + const tabProps = { + quickFilterText: searchText, + setQuickFilterText: setSearchText, + selectedFiles: selectedKnowledgeBases, + setSelectedFiles: setSelectedKnowledgeBases, + quantitySelected: selectionCount, + setQuantitySelected: setSelectionCount, + isShiftPressed, + onRowClick: handleKnowledgeBaseSelect, + }; + + return ( +
+
+
+
+
+
+
+
+ + +
+
+ Knowledge +
+
+ +
+
+
+
+
+ + {isDrawerOpen && ( +
+ +
+ )} +
+ ); +}; + +export default KnowledgePage; diff --git a/src/frontend/src/pages/MainPage/pages/main-page.tsx b/src/frontend/src/pages/MainPage/pages/main-page.tsx index a67627e34..94d9b8d36 100644 --- a/src/frontend/src/pages/MainPage/pages/main-page.tsx +++ b/src/frontend/src/pages/MainPage/pages/main-page.tsx @@ -69,7 +69,7 @@ export default function CollectionPage(): JSX.Element { setOpenDeleteFolderModal(true); }} handleFilesClick={() => { - navigate("files"); + navigate("assets"); }} /> )} diff --git a/src/frontend/src/routes.tsx b/src/frontend/src/routes.tsx index 5587eb1d1..909cbbd73 100644 --- a/src/frontend/src/routes.tsx +++ b/src/frontend/src/routes.tsx @@ -26,6 +26,7 @@ import FlowPage from "./pages/FlowPage"; import LoginPage from "./pages/LoginPage"; import FilesPage from "./pages/MainPage/pages/filesPage"; import HomePage from "./pages/MainPage/pages/homePage"; +import KnowledgePage from "./pages/MainPage/pages/knowledgePage"; import CollectionPage from "./pages/MainPage/pages/main-page"; import SettingsPage from "./pages/SettingsPage"; import ApiKeysPage from "./pages/SettingsPage/pages/ApiKeysPage"; @@ -82,7 +83,17 @@ const router = createBrowserRouter( element={} /> {ENABLE_FILE_MANAGEMENT && ( - } /> + + } + /> + } /> + } + /> + )} Date: Thu, 14 Aug 2025 11:25:57 -0300 Subject: [PATCH 08/86] fix: add langchain-chroma to langflow-base (#9396) --- src/backend/base/pyproject.toml | 1 + uv.lock | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/backend/base/pyproject.toml b/src/backend/base/pyproject.toml index 5b561dda8..0fef9889e 100644 --- a/src/backend/base/pyproject.toml +++ b/src/backend/base/pyproject.toml @@ -88,6 +88,7 @@ dependencies = [ "ibm-watsonx-ai>=1.3.1", "langchain-ibm>=0.3.8", "trustcall>=0.0.38", + "langchain-chroma>=0.1.4", ] [dependency-groups] diff --git a/uv.lock b/uv.lock index d7155d733..b0d0f533c 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10, <3.14" resolution-markers = [ "python_full_version >= '3.13' and sys_platform == 'darwin'", @@ -5238,6 +5238,7 @@ dependencies = [ { name = "json-repair" }, { name = "jsonquerylang" }, { name = "langchain" }, + { name = "langchain-chroma" }, { name = "langchain-community" }, { name = "langchain-core" }, { name = "langchain-experimental" }, @@ -5372,6 +5373,7 @@ requires-dist = [ { name = "json-repair", specifier = ">=0.30.3" }, { name = "jsonquerylang", specifier = ">=1.1.1" }, { name = "langchain", specifier = "~=0.3.21" }, + { name = "langchain-chroma", specifier = ">=0.1.4" }, { name = "langchain-community", specifier = "~=0.3.20" }, { name = "langchain-core", specifier = "~=0.3.45" }, { name = "langchain-experimental", specifier = ">=0.3.4,<1.0.0" }, From c205e9914a7007f9c74ce31c35d997eaa95079b9 Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Thu, 14 Aug 2025 13:58:28 -0400 Subject: [PATCH 09/86] docs: clarify available API endpoints and their use cases (#9382) * available-endpoints * asterisk * structure-into-tabs-and-confirm-login-endpoint * reorg and clarify some usage --------- Co-authored-by: April M --- docs/docs/API-Reference/api-files.mdx | 6 +- docs/docs/API-Reference/api-monitor.mdx | 40 +++- .../api-reference-api-examples.mdx | 197 +++++++++++++++++- docs/docs/Concepts/concepts-playground.mdx | 9 +- 4 files changed, 231 insertions(+), 21 deletions(-) diff --git a/docs/docs/API-Reference/api-files.mdx b/docs/docs/API-Reference/api-files.mdx index 05e01f5e9..8b472f48e 100644 --- a/docs/docs/API-Reference/api-files.mdx +++ b/docs/docs/API-Reference/api-files.mdx @@ -419,4 +419,8 @@ curl -X DELETE \ ## Create upload file (Deprecated) -This endpoint is deprecated. Use the `/files` endpoints instead. \ No newline at end of file +This endpoint is deprecated. Use the `/files` endpoints instead. + +## See also + +* [Manage files](/concepts-file-management) \ No newline at end of file diff --git a/docs/docs/API-Reference/api-monitor.mdx b/docs/docs/API-Reference/api-monitor.mdx index f0839349a..89f697303 100644 --- a/docs/docs/API-Reference/api-monitor.mdx +++ b/docs/docs/API-Reference/api-monitor.mdx @@ -6,9 +6,26 @@ slug: /api-monitor import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -Use the `/monitor` endpoint to monitor and modify messages passed between Langflow components, vertex builds, and transactions. +The `/monitor` endpoints are for internal Langflow functionality, primarily related to running flows in the **Playground**, storing chat history, and generating flow logs. -## Get Vertex builds +This information is primarily for those who are building custom components or contributing to the Langflow codebase in a way that requires calling or understanding these endpoints. + +For typical application development with Langflow, there are more appropriate options for monitoring, debugging, and memory management. +For more information, see the following: + +* [Logs](/logging): Langflow log storage locations, customization options, and where to view logs in the visual editor +* [Test flows in the Playground](/concepts-playground): Run flows and inspect message history +* [Memory management options](/memory): Langflow storage locations and options, including the database, cache, and chat history + +## Vertex builds + +The Vertex build endpoints (`/monitor/builds`) are exclusively for **Playground** functionality. + +When you run a flow in the **Playground**, Langflow calls the `/build/$FLOW_ID/flow` endpoint in [chat.py](https://github.com/langflow-ai/langflow/blob/main/src/backend/base/langflow/api/v1/chat.py#L143). This call retrieves the flow data, builds a graph, and executes the graph. As each component (or node) is executed, the `build_vertex` function calls `build_and_run`, which may call the individual components' `def_build` method, if it exists. If a component doesn't have a `def_build` function, the build still returns a component. + +The `build` function allows components to execute logic at runtime. For example, the [**Recursive Character Text Splitter** component](https://github.com/langflow-ai/langflow/blob/main/src/backend/base/langflow/components/langchain_utilities/recursive_character.py) is a child of the `LCTextSplitterComponent` class. When text needs to be processed, the parent class's `build` method is called, which creates a `RecursiveCharacterTextSplitter` object and uses it to split the text according to the defined parameters. The split text is then passed on to the next component. This all occurs when the component is built. + +### Get Vertex builds Retrieve Vertex builds for a specific flow. @@ -384,7 +401,7 @@ curl -X GET \ -## Delete Vertex builds +### Delete Vertex builds Delete Vertex builds for a specific flow. @@ -404,7 +421,12 @@ curl -X DELETE \ -## Get messages +## Messages endpoints + +The `/monitor/messages` endpoints store, retrieve, edit, and delete records in the message table in [`langflow.db`](/memory) +Typically, these are called implicitly when running flows that produce message history, or when inspecting and modifying **Playground** memories. + +### Get messages Retrieve a list of all messages: @@ -466,7 +488,7 @@ curl -X GET \ -## Delete messages +### Delete messages Delete specific messages by their IDs. @@ -490,7 +512,7 @@ curl -v -X DELETE \ -## Update message +### Update message Update a specific message by its ID. @@ -540,7 +562,7 @@ curl -X PUT \ -## Update session ID +### Update session ID Update the session ID for messages. @@ -591,7 +613,7 @@ curl -X PATCH \ -## Delete messages by session +### Delete messages by session Delete all messages for a specific session. @@ -614,6 +636,7 @@ HTTP/1.1 204 No Content ## Get transactions Retrieve all transactions, which are interactions between components, for a specific flow. +This information is also available in [flow logs](/logging). ```bash curl -X GET \ @@ -651,4 +674,5 @@ curl -X GET \ ## See also +- [Use voice mode](/concepts-voice-mode) - [Session ID](/session-id) \ No newline at end of file diff --git a/docs/docs/API-Reference/api-reference-api-examples.mdx b/docs/docs/API-Reference/api-reference-api-examples.mdx index f9c7aeaae..a6290f2c1 100644 --- a/docs/docs/API-Reference/api-reference-api-examples.mdx +++ b/docs/docs/API-Reference/api-reference-api-examples.mdx @@ -186,9 +186,198 @@ curl -X GET \ -H "x-api-key: $LANGFLOW_API_KEY" ``` +## Available endpoints + +Because you can run Langflow as either an IDE (frontend and backend) or a runtime (headless, backend-only), it serves endpoints that support frontend and backend operations. +Many endpoints are for orchestration between the frontend and backend, reading and writing to the Langflow database, or enabling frontend functionality, like the **Playground**. +Unless you are contributing to the Langflow codebase, you won't directly call most of the Langflow endpoints. + +For application development, the most commonly used endpoints are the `/run` and `/webhook` [flow trigger endpoints](/api-flows-run). +For some use cases, you might use some other endpoints, such as the `/files` endpoints to use files in flows. + +To help you explore the available endpoints, the following lists are sorted by primary use case, although some endpoints might support multiple use cases. + + + + +The following endpoints are useful for developing applications with Langflow and administering Langflow deployments with one or more users. +You will most often use the flow trigger endpoints. +Other endpoints are helpful for specific use cases, such as administration and flow management in runtime deployments that don't have a visual editor. + +* [Flow trigger endpoints](/api-flows-run): + * POST `/v1/run/{flow_id_or_name}`: Run a flow. + * POST `/v1/run/advanced/{flow_id}`: Advanced run with explicit `inputs`, `outputs`, `tweaks`, and optional `session_id`. + * POST `/v1/webhook/{flow_id_or_name}`: Trigger a flow via webhook payload. + +* Deployment details: + * GET `/v1/version`: Return Langflow version. See [Get version](/api-reference-api-examples#get-version). + * GET `/v1/config`: Return deployment configuration. See [Get configuration](/api-reference-api-examples#get-configuration). + +* [Projects endpoints](/api-projects): + * POST `/v1/projects/`: Create a project. + * GET `/v1/projects/`: List projects. + * GET `/v1/projects/{project_id}`: Read a project (with paginated flows support). + * PATCH `/v1/projects/{project_id}`: Update project info and membership. + * DELETE `/v1/projects/{project_id}`: Delete a project. + * GET `/v1/projects/download/{project_id}`: Export all flows in a project as ZIP. + * POST `/v1/projects/upload/`: Import a project ZIP (creates project and flows). + * GET `/v1/starter-projects/`: Return a list of templates. + +* [Files endpoints](/api-files): + * Files (v1) + * POST `/v1/files/upload/{flow_id}`: Upload a file to a specific flow. + * GET `/v1/files/download/{flow_id}/{file_name}`: Download a file from a flow. + * GET `/v1/files/images/{flow_id}/{file_name}`: Stream an image from a flow. + * GET `/v1/files/profile_pictures/{folder_name}/{file_name}`: Get a profile picture asset. + * GET `/v1/files/profile_pictures/list`: List available profile picture assets. + * GET `/v1/files/list/{flow_id}`: List files for a flow. + * DELETE `/v1/files/delete/{flow_id}/{file_name}`: Delete a file from a flow. + * Files (v2) + * POST `/v2/files` (alias `/v2/files/`): Upload a file owned by the current user. + * GET `/v2/files` (alias `/v2/files/`): List files owned by the current user. + * DELETE `/v2/files/batch/`: Delete multiple files by IDs. + * POST `/v2/files/batch/`: Download multiple files as a ZIP by IDs. + * GET `/v2/files/{file_id}`: Download a file by ID (or return raw content internally). + * PUT `/v2/files/{file_id}`: Edit a file name by ID. + * DELETE `/v2/files/{file_id}`: Delete a file by ID. + * DELETE `/v2/files` (alias `/v2/files/`): Delete all files for the current user. + +* [API keys and authentication](/api-keys-and-authentication): + * GET `/v1/api_key/`: List API keys for the current user. + * POST `/v1/api_key/`: Create a new API key. + * DELETE `/v1/api_key/{api_key_id}`: Delete an API key. + * POST `/v1/api_key/store`: Save an encrypted Store API key (cookie set). + +* [Flow management endpoints](/api-flows): + * POST `/v1/flows/`: Create a flow. + * GET `/v1/flows/`: List flows (supports pagination and filters). + * GET `/v1/flows/{flow_id}`: Read a flow by ID. + * GET `/v1/flows/public_flow/{flow_id}`: Read a public flow by ID. + * PATCH `/v1/flows/{flow_id}`: Update a flow. + * DELETE `/v1/flows/{flow_id}`: Delete a flow. + * POST `/v1/flows/batch/`: Create multiple flows. + * POST `/v1/flows/upload/`: Import flows from a JSON file. + * DELETE `/v1/flows/`: Delete multiple flows by IDs. + * POST `/v1/flows/download/`: Export flows to a ZIP file. + * GET `/v1/flows/basic_examples/`: List basic example flows. + +* [Users endpoints](/api-users): + * POST `/v1/users/`: Add a user (superuser required when auth enabled). + * GET `/v1/users/whoami`: Return the current authenticated user. + * GET `/v1/users/`: List all users (superuser required). + * PATCH `/v1/users/{user_id}`: Update a user (with role checks). + * PATCH `/v1/users/{user_id}/reset-password`: Reset own password. + * DELETE `/v1/users/{user_id}`: Delete a user (cannot delete yourself). + + + + +You might use these endpoints when developing custom Langflow components for your own use or to share with the Langflow community: + +* Develop custom components: + * GET `/v1/all`: Return all available Langflow component types. See [Get all components](/api-reference-api-examples#get-all-components). + * POST `/v1/custom_component`: Build a custom component from code and return its node. + * POST `/v1/custom_component/update`: Update an existing custom component's build config and outputs. + * POST `/v1/validate/code`: Validate a Python code snippet for a custom component. + +* Langflow Store: + * GET `/v1/store/check/`: Return whether the Store feature is enabled. + * GET `/v1/store/check/api_key`: Check if a Store API key exists and is valid. + * POST `/v1/store/components/`: Share a component to the Store. + * PATCH `/v1/store/components/{component_id}`: Update a shared component. + * GET `/v1/store/components/`: List available Store components (filters supported). + * GET `/v1/store/components/{component_id}`: Download a component from the Store. + * GET `/v1/store/tags`: List Store tags. + * GET `/v1/store/users/likes`: List components liked by the current user. + * POST `/v1/store/users/likes/{component_id}`: Like a component. + + + + +The following endpoints are for managing Langflow MCP servers, both Langflow-hosted MCP servers and external MCP server connections: + +* **MCP (global)**: + * HEAD `/v1/mcp/sse`: Health check for MCP SSE. + * GET `/v1/mcp/sse`: Open SSE stream for MCP server events. + * POST `/v1/mcp/`: Post messages to the MCP server. + +* **MCP (project-specific)**: + * GET `/v1/mcp/project/{project_id}`: List MCP-enabled tools and project auth settings. + * HEAD `/v1/mcp/project/{project_id}/sse`: Health check for project SSE. + * GET `/v1/mcp/project/{project_id}/sse`: Open project-scoped MCP SSE. + * POST `/v1/mcp/project/{project_id}`: Post messages to project MCP server. + * POST `/v1/mcp/project/{project_id}/` (trailing slash): Same as above. + * PATCH `/v1/mcp/project/{project_id}`: Update MCP settings for flows and project auth settings. + * POST `/v1/mcp/project/{project_id}/install`: Install MCP client config for Cursor/Windsurf/Claude (local only). + * GET `/v1/mcp/project/{project_id}/installed`: Check which clients have MCP config installed. + + + + +The following endpoints are most often used when contributing to the Langflow codebase, and you need to understand or call endpoints that support frontend-to-backend orchestration or other internal functionality. + +* Base (metadata): + * GET `/v1/all`: Return all available Langflow component types. See [Get all components](/api-reference-api-examples#get-all-components). + * GET `/v1/version`: Return Langflow version. See [Get version](/api-reference-api-examples#get-version). + * GET `/v1/config`: Return deployment configuration. See [Get configuration](/api-reference-api-examples#get-configuration). + * GET `/v1/starter-projects/`: Return a list of templates. + +* [Build endpoints](/api-build) (internal editor support): + * POST `/v1/build/{flow_id}/flow`: Start a flow build and return a job ID. + * GET `/v1/build/{job_id}/events`: Stream or fetch build events. + * POST `/v1/build/{job_id}/cancel`: Cancel a build job. + * POST `/v1/build_public_tmp/{flow_id}/flow`: Build a public flow without auth. + * POST `/v1/validate/prompt`: Validate a prompt payload. + +* [API keys and authentication](/api-keys-and-authentication): + * POST `/v1/login`: Login and set tokens as cookies. + * GET `/v1/auto_login`: Auto-login (if enabled) and set tokens. + * POST `/v1/refresh`: Refresh tokens using refresh cookie. + * POST `/v1/logout`: Logout and clear cookies. + +* [Monitor endpoints](/api-monitor): + * GET `/v1/monitor/builds`: Get vertex builds for a flow. + * DELETE `/v1/monitor/builds`: Delete vertex builds for a flow. + * GET `/v1/monitor/messages/sessions`: List message session IDs (auth required). + * GET `/v1/monitor/messages`: List messages with optional filters. + * DELETE `/v1/monitor/messages`: Delete messages by IDs (auth required). + * PUT `/v1/monitor/messages/{message_id}`: Update a message. + * PATCH `/v1/monitor/messages/session/{old_session_id}`: Change a session ID for all messages in that session. + * DELETE `/v1/monitor/messages/session/{session_id}`: Delete messages by session. + * GET `/v1/monitor/transactions`: List transactions for a flow (paginated). + +* Variables: + * POST `/v1/variables/`: Create a variable, such as an API key, for the user. + * GET `/v1/variables/`: List variables for the user. + * PATCH `/v1/variables/{variable_id}`: Update a variable. + * DELETE `/v1/variables/{variable_id}`: Delete a variable. + +* [Use voice mode](/concepts-voice-mode): + * WS `/v1/voice/ws/flow_as_tool/{flow_id}`: Bi-directional voice session exposing the flow as a tool. + * WS `/v1/voice/ws/flow_as_tool/{flow_id}/{session_id}`: Same as above with explicit session ID. + * WS `/v1/voice/ws/flow_tts/{flow_id}`: Voice-to-text session that runs a flow and returns TTS. + * WS `/v1/voice/ws/flow_tts/{flow_id}/{session_id}`: Same as above with explicit session ID. + * GET `/v1/voice/elevenlabs/voice_ids`: List available ElevenLabs voice IDs for the user. + + + + +The following endpoints are deprecated: + +* POST `/v1/predict/{flow_id}`: Use [`/v1/run/{flow_id}`](/api-flows-run) instead. +* POST `/v1/process/{flow_id}`: Use [`/v1/run/{flow_id}`](/api-flows-run) instead. +* GET `/v1/task/{task_id}`: Deprecated functionality. +* POST `/v1/upload/{flow_id}`: Use [`/files`](/api-files) instead. +* POST `/v1/build/{flow_id}/vertices`: Replaced by [`/monitor/builds`](/api-monitor). +* POST `/v1/build/{flow_id}/vertices/{vertex_id}`: Replaced by [`/monitor/builds`](/api-monitor). +* GET `/v1/build/{flow_id}/{vertex_id}/stream`: Replaced by [`/monitor/builds`](/api-monitor). + + + + ## Next steps -- Use the Langflow API to [run a flow](/api-flows-run). -- Use the Langflow API to [upload files](/api-files). -- Use the Langflow API to [get flow logs](/api-logs). -- Explore all endpoints in the [Langflow API specification](/api). \ No newline at end of file +* Use the Langflow API to [run a flow](/api-flows-run). +* Use the Langflow API to [upload files](/api-files). +* Use the Langflow API to [get flow logs](/api-logs). +* Explore all endpoints in the [Langflow API specification](/api). \ No newline at end of file diff --git a/docs/docs/Concepts/concepts-playground.mdx b/docs/docs/Concepts/concepts-playground.mdx index 69dbfd5c0..b69f8cae8 100644 --- a/docs/docs/Concepts/concepts-playground.mdx +++ b/docs/docs/Concepts/concepts-playground.mdx @@ -29,14 +29,7 @@ For flows that require another type of input, such as a webhook event, file uplo ![Playground window](/img/playground.png) -
-Playground mechanics - -When you run a flow in the **Playground**, Langflow calls the `/build/$FLOW_ID/flow` endpoint in [chat.py](https://github.com/langflow-ai/langflow/blob/main/src/backend/base/langflow/api/v1/chat.py#L143). This call retrieves the flow data, builds a graph, and executes the graph. As each component (or node) is executed, the `build_vertex` function calls `build_and_run`, which may call the individual components' `def_build` method, if it exists. If a component doesn't have a `def_build` function, the build still returns a component. - -The `build` function allows components to execute logic at runtime. For example, the [**Recursive Character Text Splitter** component](https://github.com/langflow-ai/langflow/blob/main/src/backend/base/langflow/components/langchain_utilities/recursive_character.py) is a child of the `LCTextSplitterComponent` class. When text needs to be processed, the parent class's `build` method is called, which creates a `RecursiveCharacterTextSplitter` object and uses it to split the text according to the defined parameters. The split text is then passed on to the next component. This all occurs when the component is built. - -
+For technical details about how the **Playground** works, see [Monitor endpoints](/api-monitor). ### Review agent logic From 8fff7cc4a4ed41eff466fd7be47656ad6ad5b38e Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Thu, 14 Aug 2025 14:56:12 -0400 Subject: [PATCH 10/86] docs: clarify file upload with API snippet (#9384) * clarify-file-upload-with-api-snippet * style * Apply suggestions from code review Co-authored-by: April I. Murphy <36110273+aimurphy@users.noreply.github.com> --------- Co-authored-by: April I. Murphy <36110273+aimurphy@users.noreply.github.com> --- .../Concepts/concepts-file-management.mdx | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/docs/docs/Concepts/concepts-file-management.mdx b/docs/docs/Concepts/concepts-file-management.mdx index 584c1f3c1..ca304408f 100644 --- a/docs/docs/Concepts/concepts-file-management.mdx +++ b/docs/docs/Concepts/concepts-file-management.mdx @@ -54,7 +54,34 @@ For more information about the **File** component and other data loading compone ### Load files at runtime You can use preloaded files in your flows, and you can load files at runtime, if your flow accepts file input. -For an example, see [Create a chatbot that can ingest files](/chat-with-files). +To enable file input in your flow, do the following: +1. Add a [**File** component](/components-data#file) to your flow. +2. Click **Share**, select **API access**, and then click **Input Schema** to add [`tweaks`](/concepts-publish#input-schema) to the request payload in the flow's automatically generated code snippets. +3. Expand the **File** section, find the **Files** row, and then enable **Expose Input** to allow the parameter to be set at runtime through the Langflow API. +4. Close the **Input Schema** pane to return to the **API access** pane. +The payload in each code snippet now includes `tweaks`, your **File** component's ID, and the `path` key that you enabled in **Input Schema**: + + ```json + "tweaks": { + "File-qYD5w": { + "path": [] + } + } + ``` + +5. When you run this flow programmatically, your script must upload a file to Langflow file management, and then pass the returned `file_path` to the `path` tweak in the `/run` request: + + ```json + "tweaks": { + "FILE_COMPONENT_ID": { + "path": [ "file_path" ] + } + } + ``` + + For a complete example see [Create a chatbot that can ingest files](/chat-with-files) and [Files endpoints](/api-files). + + If you want to upload multiple files, you can pass multiple `file_path` values in the `path` array, such as `[ "path1", "path2" ]`. ## Upload images From ede849aaf794e3cd83228be9999f9425571c4d95 Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Thu, 14 Aug 2025 14:59:05 -0400 Subject: [PATCH 11/86] fix: Set 'Include Metadata' as non-advanced option (#9400) * Set 'Include Metadata' as non-advanced option Changed the 'Include Metadata' parameter in KBRetrievalComponent from advanced to non-advanced, making it more visible in the UI. * [autofix.ci] apply automated fixes * Update Knowledge Retrieval.json * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- .../langflow/components/data/kb_retrieval.py | 2 +- .../starter_projects/Knowledge Retrieval.json | 72 ++++++++++--------- 2 files changed, 38 insertions(+), 36 deletions(-) diff --git a/src/backend/base/langflow/components/data/kb_retrieval.py b/src/backend/base/langflow/components/data/kb_retrieval.py index 2356b74a3..24d5559e8 100644 --- a/src/backend/base/langflow/components/data/kb_retrieval.py +++ b/src/backend/base/langflow/components/data/kb_retrieval.py @@ -66,7 +66,7 @@ class KBRetrievalComponent(Component): display_name="Include Metadata", info="Whether to include all metadata and embeddings in the output. If false, only content is returned.", value=True, - advanced=True, + advanced=False, ), ] diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json index ba99538fc..ccd8793bc 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json @@ -2,11 +2,11 @@ "data": { "edges": [ { - "className": "", + "animated": false, "data": { "sourceHandle": { "dataType": "TextInput", - "id": "TextInput-Z3rM3", + "id": "TextInput-WyJxO", "name": "text", "output_types": [ "Message" @@ -14,25 +14,26 @@ }, "targetHandle": { "fieldName": "search_query", - "id": "KBRetrieval-tGoBR", + "id": "KBRetrieval-zz3I0", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "xy-edge__TextInput-Z3rM3{œdataTypeœ:œTextInputœ,œidœ:œTextInput-Z3rM3œ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-KBRetrieval-tGoBR{œfieldNameœ:œsearch_queryœ,œidœ:œKBRetrieval-tGoBRœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "source": "TextInput-Z3rM3", - "sourceHandle": "{œdataTypeœ: œTextInputœ, œidœ: œTextInput-Z3rM3œ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", - "target": "KBRetrieval-tGoBR", - "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œKBRetrieval-tGoBRœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "id": "xy-edge__TextInput-WyJxO{œdataTypeœ:œTextInputœ,œidœ:œTextInput-WyJxOœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-KBRetrieval-zz3I0{œfieldNameœ:œsearch_queryœ,œidœ:œKBRetrieval-zz3I0œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "TextInput-WyJxO", + "sourceHandle": "{œdataTypeœ: œTextInputœ, œidœ: œTextInput-WyJxOœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", + "target": "KBRetrieval-zz3I0", + "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œKBRetrieval-zz3I0œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { - "className": "", + "animated": false, "data": { "sourceHandle": { "dataType": "KBRetrieval", - "id": "KBRetrieval-tGoBR", + "id": "KBRetrieval-zz3I0", "name": "chroma_kb_data", "output_types": [ "DataFrame" @@ -40,7 +41,7 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "ChatOutput-tixOe", + "id": "ChatOutput-N7nxz", "inputTypes": [ "Data", "DataFrame", @@ -49,17 +50,18 @@ "type": "other" } }, - "id": "xy-edge__KBRetrieval-tGoBR{œdataTypeœ:œKBRetrievalœ,œidœ:œKBRetrieval-tGoBRœ,œnameœ:œchroma_kb_dataœ,œoutput_typesœ:[œDataFrameœ]}-ChatOutput-tixOe{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-tixOeœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", - "source": "KBRetrieval-tGoBR", - "sourceHandle": "{œdataTypeœ: œKBRetrievalœ, œidœ: œKBRetrieval-tGoBRœ, œnameœ: œchroma_kb_dataœ, œoutput_typesœ: [œDataFrameœ]}", - "target": "ChatOutput-tixOe", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-tixOeœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}" + "id": "xy-edge__KBRetrieval-zz3I0{œdataTypeœ:œKBRetrievalœ,œidœ:œKBRetrieval-zz3I0œ,œnameœ:œchroma_kb_dataœ,œoutput_typesœ:[œDataFrameœ]}-ChatOutput-N7nxz{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-N7nxzœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", + "selected": false, + "source": "KBRetrieval-zz3I0", + "sourceHandle": "{œdataTypeœ: œKBRetrievalœ, œidœ: œKBRetrieval-zz3I0œ, œnameœ: œchroma_kb_dataœ, œoutput_typesœ: [œDataFrameœ]}", + "target": "ChatOutput-N7nxz", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-N7nxzœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}" } ], "nodes": [ { "data": { - "id": "note-YyBfz", + "id": "note-f86G8", "node": { "description": "## Knowledge Retrieval\n\nA stand-alone component handles the retrieval of ingested knowledge from existing knowledge bases. To retrieve knowledge:\n\n1. Select your knowledge base from the Knowledge Base dropdown. If you do not see it, choose \"Refresh List\".\n2. (Optional) Enter a Search Query to be performed against the knowledge base.\n\nNote that by default, 5 results are returned, which can be configured by clicking Controls at the top of the component.\n", "display_name": "", @@ -70,7 +72,7 @@ }, "dragging": false, "height": 384, - "id": "note-YyBfz", + "id": "note-f86G8", "measured": { "height": 384, "width": 371 @@ -86,7 +88,7 @@ }, { "data": { - "id": "TextInput-Z3rM3", + "id": "TextInput-WyJxO", "node": { "base_classes": [ "Message" @@ -180,7 +182,7 @@ "type": "TextInput" }, "dragging": false, - "id": "TextInput-Z3rM3", + "id": "TextInput-WyJxO", "measured": { "height": 204, "width": 320 @@ -194,7 +196,7 @@ }, { "data": { - "id": "ChatOutput-tixOe", + "id": "ChatOutput-N7nxz", "node": { "base_classes": [ "Message" @@ -492,7 +494,7 @@ "type": "ChatOutput" }, "dragging": false, - "id": "ChatOutput-tixOe", + "id": "ChatOutput-N7nxz", "measured": { "height": 48, "width": 192 @@ -506,7 +508,7 @@ }, { "data": { - "id": "KBRetrieval-tGoBR", + "id": "KBRetrieval-zz3I0", "node": { "base_classes": [ "DataFrame" @@ -527,10 +529,10 @@ ], "frozen": false, "icon": "database", - "last_updated": "2025-08-13T19:46:57.894Z", + "last_updated": "2025-08-14T17:19:22.182Z", "legacy": false, "metadata": { - "code_hash": "f82365a0977f", + "code_hash": "ee2b66958f09", "module": "langflow.components.data.kb_retrieval.KBRetrievalComponent" }, "minimized": false, @@ -587,11 +589,11 @@ "show": true, "title_case": false, "type": "code", - "value": "import json\nfrom pathlib import Path\nfrom typing import Any\n\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom loguru import logger\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.services.auth.utils import decrypt_api_key\nfrom langflow.services.deps import get_settings_service\n\nsettings = get_settings_service().settings\nknowledge_directory = settings.knowledge_bases_dir\nif not knowledge_directory:\n msg = \"Knowledge bases directory is not set in the settings.\"\n raise ValueError(msg)\nKNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n\n\nclass KBRetrievalComponent(Component):\n display_name = \"Knowledge Retrieval\"\n description = \"Search and retrieve data from knowledge.\"\n icon = \"database\"\n name = \"KBRetrieval\"\n\n inputs = [\n DropdownInput(\n name=\"knowledge_base\",\n display_name=\"Knowledge\",\n info=\"Select the knowledge to load data from.\",\n required=True,\n options=[\n str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(\".\") and d.is_dir()\n ]\n if KNOWLEDGE_BASES_ROOT_PATH.exists()\n else [],\n refresh_button=True,\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Embedding Provider API Key\",\n info=\"API key for the embedding provider to generate embeddings.\",\n advanced=True,\n required=False,\n ),\n MessageTextInput(\n name=\"search_query\",\n display_name=\"Search Query\",\n info=\"Optional search query to filter knowledge base data.\",\n ),\n IntInput(\n name=\"top_k\",\n display_name=\"Top K Results\",\n info=\"Number of top results to return from the knowledge base.\",\n value=5,\n advanced=True,\n required=False,\n ),\n BoolInput(\n name=\"include_metadata\",\n display_name=\"Include Metadata\",\n info=\"Whether to include all metadata and embeddings in the output. If false, only content is returned.\",\n value=True,\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"chroma_kb_data\",\n display_name=\"Results\",\n method=\"get_chroma_kb_data\",\n info=\"Returns the data from the selected knowledge base.\",\n ),\n ]\n\n def _get_knowledge_bases(self) -> list[str]:\n \"\"\"Retrieve a list of available knowledge bases.\n\n Returns:\n A list of knowledge base names.\n \"\"\"\n if not KNOWLEDGE_BASES_ROOT_PATH.exists():\n return []\n\n return [str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(\".\") and d.is_dir()]\n\n def update_build_config(self, build_config, field_value, field_name=None): # noqa: ARG002\n if field_name == \"knowledge_base\":\n # Update the knowledge base options dynamically\n build_config[\"knowledge_base\"][\"options\"] = self._get_knowledge_bases()\n\n # If the selected knowledge base is not available, reset it\n if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n build_config[\"knowledge_base\"][\"value\"] = None\n\n return build_config\n\n def _get_kb_metadata(self, kb_path: Path) -> dict:\n \"\"\"Load and process knowledge base metadata.\"\"\"\n metadata: dict[str, Any] = {}\n metadata_file = kb_path / \"embedding_metadata.json\"\n if not metadata_file.exists():\n logger.warning(f\"Embedding metadata file not found at {metadata_file}\")\n return metadata\n\n try:\n with metadata_file.open(\"r\", encoding=\"utf-8\") as f:\n metadata = json.load(f)\n except json.JSONDecodeError:\n logger.error(f\"Error decoding JSON from {metadata_file}\")\n return {}\n\n # Decrypt API key if it exists\n if \"api_key\" in metadata and metadata.get(\"api_key\"):\n settings_service = get_settings_service()\n try:\n decrypted_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n metadata[\"api_key\"] = decrypted_key\n except (InvalidToken, TypeError, ValueError) as e:\n logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n metadata[\"api_key\"] = None\n return metadata\n\n def _build_embeddings(self, metadata: dict):\n \"\"\"Build embedding model from metadata.\"\"\"\n provider = metadata.get(\"embedding_provider\")\n model = metadata.get(\"embedding_model\")\n api_key = metadata.get(\"api_key\")\n chunk_size = metadata.get(\"chunk_size\")\n\n # If user provided a key in the input, it overrides the stored one.\n if self.api_key and self.api_key.get_secret_value():\n api_key = self.api_key.get_secret_value()\n\n # Handle various providers\n if provider == \"OpenAI\":\n from langchain_openai import OpenAIEmbeddings\n\n if not api_key:\n msg = \"OpenAI API key is required. Provide it in the component's advanced settings.\"\n raise ValueError(msg)\n return OpenAIEmbeddings(\n model=model,\n api_key=api_key,\n chunk_size=chunk_size,\n )\n if provider == \"HuggingFace\":\n from langchain_huggingface import HuggingFaceEmbeddings\n\n return HuggingFaceEmbeddings(\n model=model,\n )\n if provider == \"Cohere\":\n from langchain_cohere import CohereEmbeddings\n\n if not api_key:\n msg = \"Cohere API key is required when using Cohere provider\"\n raise ValueError(msg)\n return CohereEmbeddings(\n model=model,\n cohere_api_key=api_key,\n )\n if provider == \"Custom\":\n # For custom embedding models, we would need additional configuration\n msg = \"Custom embedding models not yet supported\"\n raise NotImplementedError(msg)\n # Add other providers here if they become supported in ingest\n msg = f\"Embedding provider '{provider}' is not supported for retrieval.\"\n raise NotImplementedError(msg)\n\n def get_chroma_kb_data(self) -> DataFrame:\n \"\"\"Retrieve data from the selected knowledge base by reading the Chroma collection.\n\n Returns:\n A DataFrame containing the data rows from the knowledge base.\n \"\"\"\n kb_path = KNOWLEDGE_BASES_ROOT_PATH / self.knowledge_base\n\n metadata = self._get_kb_metadata(kb_path)\n if not metadata:\n msg = f\"Metadata not found for knowledge base: {self.knowledge_base}. Ensure it has been indexed.\"\n raise ValueError(msg)\n\n # Build the embedder for the knowledge base\n embedding_function = self._build_embeddings(metadata)\n\n # Load vector store\n chroma = Chroma(\n persist_directory=str(kb_path),\n embedding_function=embedding_function,\n collection_name=self.knowledge_base,\n )\n\n # If a search query is provided, perform a similarity search\n if self.search_query:\n # Use the search query to perform a similarity search\n logger.info(f\"Performing similarity search with query: {self.search_query}\")\n results = chroma.similarity_search_with_score(\n query=self.search_query or \"\",\n k=self.top_k,\n )\n else:\n results = chroma.similarity_search(\n query=self.search_query or \"\",\n k=self.top_k,\n )\n\n # For each result, make it a tuple to match the expected output format\n results = [(doc, 0) for doc in results] # Assign a dummy score of 0\n\n # If metadata is enabled, get embeddings for the results\n id_to_embedding = {}\n if self.include_metadata and results:\n doc_ids = [doc[0].metadata.get(\"_id\") for doc in results if doc[0].metadata.get(\"_id\")]\n\n # Only proceed if we have valid document IDs\n if doc_ids:\n # Access underlying client to get embeddings\n collection = chroma._client.get_collection(name=self.knowledge_base)\n embeddings_result = collection.get(where={\"_id\": {\"$in\": doc_ids}}, include=[\"embeddings\", \"metadatas\"])\n\n # Create a mapping from document ID to embedding\n for i, metadata in enumerate(embeddings_result.get(\"metadatas\", [])):\n if metadata and \"_id\" in metadata:\n id_to_embedding[metadata[\"_id\"]] = embeddings_result[\"embeddings\"][i]\n\n # Build output data based on include_metadata setting\n data_list = []\n for doc in results:\n if self.include_metadata:\n # Include all metadata, embeddings, and content\n kwargs = {\n \"content\": doc[0].page_content,\n **doc[0].metadata,\n }\n if self.search_query:\n kwargs[\"_score\"] = -1 * doc[1]\n kwargs[\"_embeddings\"] = id_to_embedding.get(doc[0].metadata.get(\"_id\"))\n else:\n # Only include content\n kwargs = {\n \"content\": doc[0].page_content,\n }\n\n data_list.append(Data(**kwargs))\n\n # Return the DataFrame containing the data\n return DataFrame(data=data_list)\n" + "value": "import json\nfrom pathlib import Path\nfrom typing import Any\n\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom loguru import logger\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.services.auth.utils import decrypt_api_key\nfrom langflow.services.deps import get_settings_service\n\nsettings = get_settings_service().settings\nknowledge_directory = settings.knowledge_bases_dir\nif not knowledge_directory:\n msg = \"Knowledge bases directory is not set in the settings.\"\n raise ValueError(msg)\nKNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n\n\nclass KBRetrievalComponent(Component):\n display_name = \"Knowledge Retrieval\"\n description = \"Search and retrieve data from knowledge.\"\n icon = \"database\"\n name = \"KBRetrieval\"\n\n inputs = [\n DropdownInput(\n name=\"knowledge_base\",\n display_name=\"Knowledge\",\n info=\"Select the knowledge to load data from.\",\n required=True,\n options=[\n str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(\".\") and d.is_dir()\n ]\n if KNOWLEDGE_BASES_ROOT_PATH.exists()\n else [],\n refresh_button=True,\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Embedding Provider API Key\",\n info=\"API key for the embedding provider to generate embeddings.\",\n advanced=True,\n required=False,\n ),\n MessageTextInput(\n name=\"search_query\",\n display_name=\"Search Query\",\n info=\"Optional search query to filter knowledge base data.\",\n ),\n IntInput(\n name=\"top_k\",\n display_name=\"Top K Results\",\n info=\"Number of top results to return from the knowledge base.\",\n value=5,\n advanced=True,\n required=False,\n ),\n BoolInput(\n name=\"include_metadata\",\n display_name=\"Include Metadata\",\n info=\"Whether to include all metadata and embeddings in the output. If false, only content is returned.\",\n value=True,\n advanced=False,\n ),\n ]\n\n outputs = [\n Output(\n name=\"chroma_kb_data\",\n display_name=\"Results\",\n method=\"get_chroma_kb_data\",\n info=\"Returns the data from the selected knowledge base.\",\n ),\n ]\n\n def _get_knowledge_bases(self) -> list[str]:\n \"\"\"Retrieve a list of available knowledge bases.\n\n Returns:\n A list of knowledge base names.\n \"\"\"\n if not KNOWLEDGE_BASES_ROOT_PATH.exists():\n return []\n\n return [str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(\".\") and d.is_dir()]\n\n def update_build_config(self, build_config, field_value, field_name=None): # noqa: ARG002\n if field_name == \"knowledge_base\":\n # Update the knowledge base options dynamically\n build_config[\"knowledge_base\"][\"options\"] = self._get_knowledge_bases()\n\n # If the selected knowledge base is not available, reset it\n if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n build_config[\"knowledge_base\"][\"value\"] = None\n\n return build_config\n\n def _get_kb_metadata(self, kb_path: Path) -> dict:\n \"\"\"Load and process knowledge base metadata.\"\"\"\n metadata: dict[str, Any] = {}\n metadata_file = kb_path / \"embedding_metadata.json\"\n if not metadata_file.exists():\n logger.warning(f\"Embedding metadata file not found at {metadata_file}\")\n return metadata\n\n try:\n with metadata_file.open(\"r\", encoding=\"utf-8\") as f:\n metadata = json.load(f)\n except json.JSONDecodeError:\n logger.error(f\"Error decoding JSON from {metadata_file}\")\n return {}\n\n # Decrypt API key if it exists\n if \"api_key\" in metadata and metadata.get(\"api_key\"):\n settings_service = get_settings_service()\n try:\n decrypted_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n metadata[\"api_key\"] = decrypted_key\n except (InvalidToken, TypeError, ValueError) as e:\n logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n metadata[\"api_key\"] = None\n return metadata\n\n def _build_embeddings(self, metadata: dict):\n \"\"\"Build embedding model from metadata.\"\"\"\n provider = metadata.get(\"embedding_provider\")\n model = metadata.get(\"embedding_model\")\n api_key = metadata.get(\"api_key\")\n chunk_size = metadata.get(\"chunk_size\")\n\n # If user provided a key in the input, it overrides the stored one.\n if self.api_key and self.api_key.get_secret_value():\n api_key = self.api_key.get_secret_value()\n\n # Handle various providers\n if provider == \"OpenAI\":\n from langchain_openai import OpenAIEmbeddings\n\n if not api_key:\n msg = \"OpenAI API key is required. Provide it in the component's advanced settings.\"\n raise ValueError(msg)\n return OpenAIEmbeddings(\n model=model,\n api_key=api_key,\n chunk_size=chunk_size,\n )\n if provider == \"HuggingFace\":\n from langchain_huggingface import HuggingFaceEmbeddings\n\n return HuggingFaceEmbeddings(\n model=model,\n )\n if provider == \"Cohere\":\n from langchain_cohere import CohereEmbeddings\n\n if not api_key:\n msg = \"Cohere API key is required when using Cohere provider\"\n raise ValueError(msg)\n return CohereEmbeddings(\n model=model,\n cohere_api_key=api_key,\n )\n if provider == \"Custom\":\n # For custom embedding models, we would need additional configuration\n msg = \"Custom embedding models not yet supported\"\n raise NotImplementedError(msg)\n # Add other providers here if they become supported in ingest\n msg = f\"Embedding provider '{provider}' is not supported for retrieval.\"\n raise NotImplementedError(msg)\n\n def get_chroma_kb_data(self) -> DataFrame:\n \"\"\"Retrieve data from the selected knowledge base by reading the Chroma collection.\n\n Returns:\n A DataFrame containing the data rows from the knowledge base.\n \"\"\"\n kb_path = KNOWLEDGE_BASES_ROOT_PATH / self.knowledge_base\n\n metadata = self._get_kb_metadata(kb_path)\n if not metadata:\n msg = f\"Metadata not found for knowledge base: {self.knowledge_base}. Ensure it has been indexed.\"\n raise ValueError(msg)\n\n # Build the embedder for the knowledge base\n embedding_function = self._build_embeddings(metadata)\n\n # Load vector store\n chroma = Chroma(\n persist_directory=str(kb_path),\n embedding_function=embedding_function,\n collection_name=self.knowledge_base,\n )\n\n # If a search query is provided, perform a similarity search\n if self.search_query:\n # Use the search query to perform a similarity search\n logger.info(f\"Performing similarity search with query: {self.search_query}\")\n results = chroma.similarity_search_with_score(\n query=self.search_query or \"\",\n k=self.top_k,\n )\n else:\n results = chroma.similarity_search(\n query=self.search_query or \"\",\n k=self.top_k,\n )\n\n # For each result, make it a tuple to match the expected output format\n results = [(doc, 0) for doc in results] # Assign a dummy score of 0\n\n # If metadata is enabled, get embeddings for the results\n id_to_embedding = {}\n if self.include_metadata and results:\n doc_ids = [doc[0].metadata.get(\"_id\") for doc in results if doc[0].metadata.get(\"_id\")]\n\n # Only proceed if we have valid document IDs\n if doc_ids:\n # Access underlying client to get embeddings\n collection = chroma._client.get_collection(name=self.knowledge_base)\n embeddings_result = collection.get(where={\"_id\": {\"$in\": doc_ids}}, include=[\"embeddings\", \"metadatas\"])\n\n # Create a mapping from document ID to embedding\n for i, metadata in enumerate(embeddings_result.get(\"metadatas\", [])):\n if metadata and \"_id\" in metadata:\n id_to_embedding[metadata[\"_id\"]] = embeddings_result[\"embeddings\"][i]\n\n # Build output data based on include_metadata setting\n data_list = []\n for doc in results:\n if self.include_metadata:\n # Include all metadata, embeddings, and content\n kwargs = {\n \"content\": doc[0].page_content,\n **doc[0].metadata,\n }\n if self.search_query:\n kwargs[\"_score\"] = -1 * doc[1]\n kwargs[\"_embeddings\"] = id_to_embedding.get(doc[0].metadata.get(\"_id\"))\n else:\n # Only include content\n kwargs = {\n \"content\": doc[0].page_content,\n }\n\n data_list.append(Data(**kwargs))\n\n # Return the DataFrame containing the data\n return DataFrame(data=data_list)\n" }, "include_metadata": { "_input_type": "BoolInput", - "advanced": true, + "advanced": false, "display_name": "Include Metadata", "dynamic": false, "info": "Whether to include all metadata and embeddings in the output. If false, only content is returned.", @@ -678,28 +680,28 @@ "type": "KBRetrieval" }, "dragging": false, - "id": "KBRetrieval-tGoBR", + "id": "KBRetrieval-zz3I0", "measured": { - "height": 286, + "height": 329, "width": 320 }, "position": { - "x": 640.6283193600648, - "y": -313.9694258557284 + "x": 616.6226476085393, + "y": -343.13068334363356 }, "selected": false, "type": "genericNode" } ], "viewport": { - "x": 285.0464459586908, - "y": 588.7377652547386, - "zoom": 0.9833370380356916 + "x": 177.06633386268413, + "y": 482.8027480187026, + "zoom": 0.8999566725119924 } }, "description": "An example of performing a vector search against data in a Knowledge Base to retrieve relevant documents.", "endpoint_name": null, - "id": "670745f6-08b1-480e-bdaf-64ba74967cba", + "id": "5487ee05-73d5-4b12-9b41-bc4c3a2f9326", "is_component": false, "last_tested_version": "1.5.0.post1", "name": "Knowledge Retrieval", From c188ec113c9ca46154ad01d0eded1754cc6bef97 Mon Sep 17 00:00:00 2001 From: Jordan Frazier <122494242+jordanrfrazier@users.noreply.github.com> Date: Thu, 14 Aug 2025 16:29:35 -0400 Subject: [PATCH 12/86] fix: enforce authentication for superuser cli command (#9152) * Enforce authentication for superuser cli command * shorten security md * cleanup * use session_scope * re-add uvlock * [autofix.ci] apply automated fixes * ruff * update env example * [autofix.ci] apply automated fixes * better exception handling * [autofix.ci] apply automated fixes * update tests to not use mocks * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes * Remove old test * Catch exceptions for typer * Try output instead of stdout * Use xdist to run in serial * Separate create superuse * [autofix.ci] apply automated fixes * Ruff * [autofix.ci] apply automated fixes * lint --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- .env.example | 8 +- SECURITY.md | 34 +++- src/backend/base/langflow/__main__.py | 156 ++++++++++++++---- .../services/database/models/user/crud.py | 7 + .../base/langflow/services/settings/auth.py | 15 +- src/backend/base/langflow/services/utils.py | 11 +- src/backend/tests/unit/test_cli.py | 94 ++++++++++- uv.lock | 140 ++++++++-------- 8 files changed, 353 insertions(+), 112 deletions(-) diff --git a/.env.example b/.env.example index 7b3c76e9b..9d9b283cd 100644 --- a/.env.example +++ b/.env.example @@ -79,12 +79,16 @@ LANGFLOW_REMOVE_API_KEYS= # LANGFLOW_REDIS_CACHE_EXPIRE (default: 3600) LANGFLOW_CACHE_TYPE= -# Set AUTO_LOGIN to false if you want to disable auto login +# Set LANGFLOW_AUTO_LOGIN to false if you want to disable auto login # and use the login form to login. LANGFLOW_SUPERUSER and LANGFLOW_SUPERUSER_PASSWORD # must be set if AUTO_LOGIN is set to false # Values: true, false LANGFLOW_AUTO_LOGIN= +# SET LANGFLOW_ENABLE_SUPERUSER_CLI to false to disable +# superuser creation via the CLI +LANGFLOW_ENABLE_SUPERUSER_CLI= + # Superuser username # Example: LANGFLOW_SUPERUSER=admin LANGFLOW_SUPERUSER= @@ -111,4 +115,4 @@ LANGFLOW_STORE_ENVIRONMENT_VARIABLES= # Value must finish with slash / #BACKEND_URL=http://localhost:7860/ -BACKEND_URL= \ No newline at end of file +BACKEND_URL= diff --git a/SECURITY.md b/SECURITY.md index 275df1f23..7c0fea934 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -59,4 +59,36 @@ Setting `LANGFLOW_SKIP_AUTH_AUTO_LOGIN=true` and `LANGFLOW_AUTO_LOGIN=true` skip `LANGFLOW_SKIP_AUTH_AUTO_LOGIN=true` is the default behavior, so users do not need to change existing workflows in 1.5. To update your workflows to require authentication, set `LANGFLOW_SKIP_AUTH_AUTO_LOGIN=false`. -For more information, see [API keys and authentication](https://docs.langflow.org/api-keys-and-authentication). \ No newline at end of file +For more information, see [API keys and authentication](https://docs.langflow.org/api-keys-and-authentication). + +## Security Configuration Guidelines + +### Superuser Creation Security + +The `langflow superuser` CLI command can present a privilege escalation risk if not properly secured. + +#### Security Measures + +1. **Authentication Required in Production** + - When `LANGFLOW_AUTO_LOGIN=false`, superuser creation requires authentication + - Use `--auth-token` parameter with a valid superuser API key or JWT token + +2. **Disable CLI Superuser Creation** + - Set `LANGFLOW_ENABLE_SUPERUSER_CLI=false` to disable the command entirely + - Strongly recommended for production environments + +3. **Secure AUTO_LOGIN Setting** + - Default is `true` for <=1.5. This may change in a future release. + - When `true`, creates default superuser `langflow/langflow` - **ONLY USE IN DEVELOPMENT** + +#### Production Security Configuration + +```bash +# Recommended production settings +export LANGFLOW_AUTO_LOGIN=false +export LANGFLOW_ENABLE_SUPERUSER_CLI=false +export LANGFLOW_SUPERUSER="" +export LANGFLOW_SUPERUSER_PASSWORD="" +export LANGFLOW_DATABASE_URL="" # e.g. "postgresql+psycopg://langflow:secure_pass@db.internal:5432/langflow" +export LANGFLOW_SECRET_KEY="your-strong-random-secret-key" +``` diff --git a/src/backend/base/langflow/__main__.py b/src/backend/base/langflow/__main__.py index 470c133af..20f21d6bf 100644 --- a/src/backend/base/langflow/__main__.py +++ b/src/backend/base/langflow/__main__.py @@ -15,7 +15,9 @@ import click import httpx import typer from dotenv import load_dotenv +from fastapi import HTTPException from httpx import HTTPError +from jose import JWTError from multiprocess import cpu_count from multiprocess.context import Process from packaging import version as pkg_version @@ -29,9 +31,9 @@ from langflow.cli.progress import create_langflow_progress from langflow.initial_setup.setup import get_or_create_default_folder from langflow.logging.logger import configure, logger from langflow.main import setup_app -from langflow.services.database.utils import session_getter +from langflow.services.auth.utils import check_key, get_current_user_by_jwt from langflow.services.deps import get_db_service, get_settings_service, session_scope -from langflow.services.settings.constants import DEFAULT_SUPERUSER +from langflow.services.settings.constants import DEFAULT_SUPERUSER, DEFAULT_SUPERUSER_PASSWORD from langflow.services.utils import initialize_services from langflow.utils.version import fetch_latest_version, get_version_info from langflow.utils.version import is_pre_release as langflow_is_pre_release @@ -632,41 +634,138 @@ def print_banner(host: str, port: int, protocol: str) -> None: @app.command() def superuser( - username: str = typer.Option(..., prompt=True, help="Username for the superuser."), - password: str = typer.Option(..., prompt=True, hide_input=True, help="Password for the superuser."), + username: str = typer.Option( + None, help="Username for the superuser. Defaults to 'langflow' when AUTO_LOGIN is enabled." + ), + password: str = typer.Option( + None, help="Password for the superuser. Defaults to 'langflow' when AUTO_LOGIN is enabled." + ), log_level: str = typer.Option("error", help="Logging level.", envvar="LANGFLOW_LOG_LEVEL"), + auth_token: str = typer.Option( + None, help="Authentication token of existing superuser.", envvar="LANGFLOW_SUPERUSER_TOKEN" + ), ) -> None: - """Create a superuser.""" + """Create a superuser. + + When AUTO_LOGIN is enabled, uses default credentials. + In production mode, requires authentication. + """ configure(log_level=log_level) - db_service = get_db_service() - async def _create_superuser(): - await initialize_services() - async with session_getter(db_service) as session: - from langflow.services.auth.utils import create_super_user + asyncio.run(_create_superuser(username, password, auth_token)) - if await create_super_user(db=session, username=username, password=password): - # Verify that the superuser was created - from langflow.services.database.models.user.model import User - stmt = select(User).where(User.username == username) - user: User = (await session.exec(stmt)).first() - if user is None or not user.is_superuser: - typer.echo("Superuser creation failed.") - return - # Now create the first folder for the user - result = await get_or_create_default_folder(session, user.id) - if result: - typer.echo("Default folder created successfully.") - else: - msg = "Could not create default folder." - raise RuntimeError(msg) - typer.echo("Superuser created successfully.") +async def _create_superuser(username: str, password: str, auth_token: str | None): + """Create a superuser.""" + await initialize_services() - else: + settings_service = get_settings_service() + # Check if superuser creation via CLI is enabled + if not settings_service.auth_settings.ENABLE_SUPERUSER_CLI: + typer.echo("Error: Superuser creation via CLI is disabled.") + typer.echo("Set LANGFLOW_ENABLE_SUPERUSER_CLI=true to enable this feature.") + raise typer.Exit(1) + + if settings_service.auth_settings.AUTO_LOGIN: + # Force default credentials for AUTO_LOGIN mode + username = DEFAULT_SUPERUSER + password = DEFAULT_SUPERUSER_PASSWORD + else: + # Production mode - prompt for credentials if not provided + if not username: + username = typer.prompt("Username") + if not password: + password = typer.prompt("Password", hide_input=True) + + from langflow.services.database.models.user.crud import get_all_superusers + + existing_superusers = [] + async with session_scope() as session: + # Note that the default superuser is created by the initialize_services() function, + # but leaving this check here in case we change that behavior + existing_superusers = await get_all_superusers(session) + is_first_setup = len(existing_superusers) == 0 + + # If AUTO_LOGIN is true, only allow default superuser creation + if settings_service.auth_settings.AUTO_LOGIN: + if not is_first_setup: + typer.echo("Error: Cannot create additional superusers when AUTO_LOGIN is enabled.") + typer.echo("AUTO_LOGIN mode is for development with only the default superuser.") + typer.echo("To create additional superusers:") + typer.echo("1. Set LANGFLOW_AUTO_LOGIN=false") + typer.echo("2. Run this command again with --auth-token") + raise typer.Exit(1) + + typer.echo(f"AUTO_LOGIN enabled. Creating default superuser '{username}'...") + typer.echo(f"Note: Default credentials are {DEFAULT_SUPERUSER}/{DEFAULT_SUPERUSER_PASSWORD}") + # AUTO_LOGIN is false - production mode + elif is_first_setup: + typer.echo("No superusers found. Creating first superuser...") + else: + # Authentication is required in production mode + if not auth_token: + typer.echo("Error: Creating a superuser requires authentication.") + typer.echo("Please provide --auth-token with a valid superuser API key or JWT token.") + typer.echo("To get a token, use: `uv run langflow api_key`") + raise typer.Exit(1) + + # Validate the auth token + try: + auth_user = None + async with session_scope() as session: + # Try JWT first + user = None + try: + user = await get_current_user_by_jwt(auth_token, session) + except (JWTError, HTTPException): + # Try API key + api_key_result = await check_key(session, auth_token) + if api_key_result and hasattr(api_key_result, "is_superuser"): + user = api_key_result + auth_user = user + + if not auth_user or not auth_user.is_superuser: + typer.echo( + "Error: Invalid token or insufficient privileges. Only superusers can create other superusers." + ) + raise typer.Exit(1) + except typer.Exit: + raise # Re-raise typer.Exit without wrapping + except Exception as e: # noqa: BLE001 + typer.echo(f"Error: Authentication failed - {e!s}") + raise typer.Exit(1) from None + + # Auth complete, create the superuser + async with session_scope() as session: + from langflow.services.auth.utils import create_super_user + + if await create_super_user(db=session, username=username, password=password): + # Verify that the superuser was created + from langflow.services.database.models.user.model import User + + stmt = select(User).where(User.username == username) + created_user: User = (await session.exec(stmt)).first() + if created_user is None or not created_user.is_superuser: typer.echo("Superuser creation failed.") + return + # Now create the first folder for the user + result = await get_or_create_default_folder(session, created_user.id) + if result: + typer.echo("Default folder created successfully.") + else: + msg = "Could not create default folder." + raise RuntimeError(msg) - asyncio.run(_create_superuser()) + # Log the superuser creation for audit purposes + logger.warning( + f"SECURITY AUDIT: New superuser '{username}' created via CLI command" + + (" by authenticated user" if auth_token else " (first-time setup)") + ) + typer.echo("Superuser created successfully.") + + else: + logger.error(f"SECURITY AUDIT: Failed attempt to create superuser '{username}' via CLI") + typer.echo("Superuser creation failed.") # command to copy the langflow database from the cache to the current directory @@ -749,6 +848,7 @@ def api_key( settings_service = get_settings_service() auth_settings = settings_service.auth_settings if not auth_settings.AUTO_LOGIN: + # TODO: Allow non-auto-login users to create API keys via CLI typer.echo("Auto login is disabled. API keys cannot be created through the CLI.") return None diff --git a/src/backend/base/langflow/services/database/models/user/crud.py b/src/backend/base/langflow/services/database/models/user/crud.py index f65f1b395..85eb5fef9 100644 --- a/src/backend/base/langflow/services/database/models/user/crud.py +++ b/src/backend/base/langflow/services/database/models/user/crud.py @@ -60,3 +60,10 @@ async def update_user_last_login_at(user_id: UUID, db: AsyncSession): return await update_user(user, user_data, db) except Exception as e: # noqa: BLE001 logger.error(f"Error updating user last login at: {e!s}") + + +async def get_all_superusers(db: AsyncSession) -> list[User]: + """Get all superuser accounts from the database.""" + stmt = select(User).where(User.is_superuser == True) # noqa: E712 + result = await db.exec(stmt) + return list(result.all()) diff --git a/src/backend/base/langflow/services/settings/auth.py b/src/backend/base/langflow/services/settings/auth.py index 8ff19e725..aacb59d9a 100644 --- a/src/backend/base/langflow/services/settings/auth.py +++ b/src/backend/base/langflow/services/settings/auth.py @@ -27,12 +27,25 @@ class AuthSettings(BaseSettings): API_KEY_ALGORITHM: str = "HS256" API_V1_STR: str = "/api/v1" - AUTO_LOGIN: bool = True + AUTO_LOGIN: bool = Field( + default=True, # TODO: Set to False in v1.6 + description=( + "Enable automatic login with default credentials. " + "SECURITY WARNING: This bypasses authentication and should only be used in development environments. " + "Set to False in production." + ), + ) """If True, the application will attempt to log in automatically as a super user.""" skip_auth_auto_login: bool = True """If True, the application will skip authentication when AUTO_LOGIN is enabled. This will be removed in v1.6""" + ENABLE_SUPERUSER_CLI: bool = Field( + default=True, + description="Allow creation of superusers via CLI. Set to False in production for security.", + ) + """If True, allows creation of superusers via the CLI 'langflow superuser' command.""" + NEW_USER_IS_ACTIVE: bool = False SUPERUSER: str = DEFAULT_SUPERUSER SUPERUSER_PASSWORD: str = DEFAULT_SUPERUSER_PASSWORD diff --git a/src/backend/base/langflow/services/utils.py b/src/backend/base/langflow/services/utils.py index 1a533ce45..9217cb614 100644 --- a/src/backend/base/langflow/services/utils.py +++ b/src/backend/base/langflow/services/utils.py @@ -68,15 +68,20 @@ async def get_or_create_super_user(session: AsyncSession, username, password, is return await create_super_user(username, password, db=session) -async def setup_superuser(settings_service, session: AsyncSession) -> None: +async def setup_superuser(settings_service: SettingsService, session: AsyncSession) -> None: if settings_service.auth_settings.AUTO_LOGIN: logger.debug("AUTO_LOGIN is set to True. Creating default superuser.") + username = DEFAULT_SUPERUSER + password = DEFAULT_SUPERUSER_PASSWORD else: # Remove the default superuser if it exists await teardown_superuser(settings_service, session) + username = settings_service.auth_settings.SUPERUSER + password = settings_service.auth_settings.SUPERUSER_PASSWORD - username = settings_service.auth_settings.SUPERUSER - password = settings_service.auth_settings.SUPERUSER_PASSWORD + if not username or not password: + msg = "Username and password must be set" + raise ValueError(msg) is_default = (username == DEFAULT_SUPERUSER) and (password == DEFAULT_SUPERUSER_PASSWORD) diff --git a/src/backend/tests/unit/test_cli.py b/src/backend/tests/unit/test_cli.py index c174c5949..46b7ce244 100644 --- a/src/backend/tests/unit/test_cli.py +++ b/src/backend/tests/unit/test_cli.py @@ -1,9 +1,11 @@ import socket import threading import time +from unittest.mock import patch import pytest -from langflow.__main__ import app +import typer +from langflow.__main__ import _create_superuser, app from langflow.services import deps @@ -57,7 +59,89 @@ def test_components_path(runner, default_settings, tmp_path): assert str(temp_dir) in settings_service.settings.components_path -def test_superuser(runner): - result = runner.invoke(app, ["superuser"], input="admin\nadmin\n") - assert result.exit_code == 0, result.stdout - assert "Superuser created successfully." in result.stdout +@pytest.mark.xdist_group(name="serial-superuser-tests") +class TestSuperuserCommand: + """Deterministic tests for the superuser CLI command.""" + + @pytest.mark.asyncio + async def test_additional_superuser_requires_auth_production(self, client, active_super_user): # noqa: ARG002 + """Test additional superuser creation requires authentication in production.""" + # We already have active_super_user from the fixture, so we're not in first setup + with ( + patch("langflow.services.deps.get_settings_service") as mock_settings, + patch("langflow.__main__.get_settings_service") as mock_settings2, + ): + # Configure settings for production mode (AUTO_LOGIN=False) + mock_auth_settings = type("MockAuthSettings", (), {"AUTO_LOGIN": False, "ENABLE_SUPERUSER_CLI": True})() + mock_settings.return_value.auth_settings = mock_auth_settings + mock_settings2.return_value.auth_settings = mock_auth_settings + + # Try to create a superuser without auth - should fail + with pytest.raises(typer.Exit) as exc_info: + await _create_superuser("newuser", "newpass", None) + + assert exc_info.value.exit_code == 1 + + @pytest.mark.asyncio + async def test_additional_superuser_blocked_in_auto_login_mode(self, client, active_super_user): # noqa: ARG002 + """Test additional superuser creation blocked when AUTO_LOGIN=true.""" + # We already have active_super_user from the fixture, so we're not in first setup + with ( + patch("langflow.services.deps.get_settings_service") as mock_settings, + patch("langflow.__main__.get_settings_service") as mock_settings2, + ): + # Configure settings for AUTO_LOGIN mode + mock_auth_settings = type("MockAuthSettings", (), {"AUTO_LOGIN": True, "ENABLE_SUPERUSER_CLI": True})() + mock_settings.return_value.auth_settings = mock_auth_settings + mock_settings2.return_value.auth_settings = mock_auth_settings + + # Try to create a superuser - should fail + with pytest.raises(typer.Exit) as exc_info: + await _create_superuser("newuser", "newpass", None) + + assert exc_info.value.exit_code == 1 + + @pytest.mark.asyncio + async def test_cli_disabled_blocks_creation(self, client): # noqa: ARG002 + """Test ENABLE_SUPERUSER_CLI=false blocks superuser creation.""" + with ( + patch("langflow.services.deps.get_settings_service") as mock_settings, + patch("langflow.__main__.get_settings_service") as mock_settings2, + ): + mock_auth_settings = type("MockAuthSettings", (), {"AUTO_LOGIN": True, "ENABLE_SUPERUSER_CLI": False})() + mock_settings.return_value.auth_settings = mock_auth_settings + mock_settings2.return_value.auth_settings = mock_auth_settings + + # Try to create a superuser - should fail + with pytest.raises(typer.Exit) as exc_info: + await _create_superuser("admin", "password", None) + + assert exc_info.value.exit_code == 1 + + @pytest.mark.skip(reason="Skip -- default superuser is created by initialize_services() function") + @pytest.mark.asyncio + async def test_auto_login_forces_default_credentials(self, client): + """Test AUTO_LOGIN=true forces default credentials.""" + # Since client fixture already creates default user, we need to test in a clean DB scenario + # But that's why this test is skipped - the behavior is already handled by initialize_services + + @pytest.mark.asyncio + async def test_failed_auth_token_validation(self, client, active_super_user): # noqa: ARG002 + """Test failed superuser creation with invalid auth token.""" + # We already have active_super_user from the fixture, so we're not in first setup + with ( + patch("langflow.services.deps.get_settings_service") as mock_settings, + patch("langflow.__main__.get_settings_service") as mock_settings2, + patch("langflow.__main__.get_current_user_by_jwt", side_effect=Exception("Invalid token")), + patch("langflow.__main__.check_key", return_value=None), + ): + # Configure settings for production mode (AUTO_LOGIN=False) + mock_auth_settings = type("MockAuthSettings", (), {"AUTO_LOGIN": False, "ENABLE_SUPERUSER_CLI": True})() + mock_settings.return_value.auth_settings = mock_auth_settings + mock_settings2.return_value.auth_settings = mock_auth_settings + + # Try to create a superuser with invalid token - should fail + with pytest.raises(typer.Exit) as exc_info: + await _create_superuser("newuser", "newpass", "invalid-token") + + assert exc_info.value.exit_code == 1 diff --git a/uv.lock b/uv.lock index b0d0f533c..d7961afa7 100644 --- a/uv.lock +++ b/uv.lock @@ -1220,7 +1220,7 @@ wheels = [ [[package]] name = "codeflash" -version = "0.15.5" +version = "0.15.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -1250,7 +1250,7 @@ dependencies = [ { name = "unidiff" }, { name = "unittest-xml-reporting" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/23/a2/f431963f72a45b50865607b929570a7727b2e9f5cb484f706bb79203d1aa/codeflash-0.15.5.tar.gz", hash = "sha256:331df47373af93341a952320fbee0be2e882f6b5cd293f8c090a87715522e733", size = 181261, upload-time = "2025-07-14T04:37:52.297Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ff/63/157d025e8af6a7347880b5c407f49962c084e5f84c2dca56893fcce8cacc/codeflash-0.15.6.tar.gz", hash = "sha256:d0e51bd6a3c0e20fe65320651404749bce381186e6427f728b19a664ffa882b7", size = 181650, upload-time = "2025-07-24T21:09:01.826Z" } [[package]] name = "cohere" @@ -1308,14 +1308,11 @@ wheels = [ [[package]] name = "comm" -version = "0.2.2" +version = "0.2.3" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "traitlets" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e9/a8/fb783cb0abe2b5fded9f55e5703015cdf1c9c85b3669087c538dd15a6a86/comm-0.2.2.tar.gz", hash = "sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e", size = 6210, upload-time = "2024-03-12T16:53:41.133Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4c/13/7d740c5849255756bc17888787313b61fd38a0a8304fc4f073dfc46122aa/comm-0.2.3.tar.gz", hash = "sha256:2dc8048c10962d55d7ad693be1e7045d891b7ce8d999c97963a5e3e99c055971", size = 6319, upload-time = "2025-07-25T14:02:04.452Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e6/75/49e5bfe642f71f272236b5b2d2691cf915a7283cc0ceda56357b61daa538/comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3", size = 7180, upload-time = "2024-03-12T16:53:39.226Z" }, + { url = "https://files.pythonhosted.org/packages/60/97/891a0971e1e4a8c5d2b20bbe0e524dc04548d2307fee33cdeba148fd4fc7/comm-0.2.3-py3-none-any.whl", hash = "sha256:c615d91d75f7f04f095b30d1c1711babd43bdc6419c1be9886a85f2f4e489417", size = 7294, upload-time = "2025-07-25T14:02:02.896Z" }, ] [[package]] @@ -1414,66 +1411,65 @@ wheels = [ [[package]] name = "coverage" -version = "7.9.2" +version = "7.10.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/04/b7/c0465ca253df10a9e8dae0692a4ae6e9726d245390aaef92360e1d6d3832/coverage-7.9.2.tar.gz", hash = "sha256:997024fa51e3290264ffd7492ec97d0690293ccd2b45a6cd7d82d945a4a80c8b", size = 813556, upload-time = "2025-07-03T10:54:15.101Z" } +sdist = { url = "https://files.pythonhosted.org/packages/87/0e/66dbd4c6a7f0758a8d18044c048779ba21fb94856e1edcf764bd5403e710/coverage-7.10.1.tar.gz", hash = "sha256:ae2b4856f29ddfe827106794f3589949a57da6f0d38ab01e24ec35107979ba57", size = 819938, upload-time = "2025-07-27T14:13:39.045Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a1/0d/5c2114fd776c207bd55068ae8dc1bef63ecd1b767b3389984a8e58f2b926/coverage-7.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:66283a192a14a3854b2e7f3418d7db05cdf411012ab7ff5db98ff3b181e1f912", size = 212039, upload-time = "2025-07-03T10:52:38.955Z" }, - { url = "https://files.pythonhosted.org/packages/cf/ad/dc51f40492dc2d5fcd31bb44577bc0cc8920757d6bc5d3e4293146524ef9/coverage-7.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4e01d138540ef34fcf35c1aa24d06c3de2a4cffa349e29a10056544f35cca15f", size = 212428, upload-time = "2025-07-03T10:52:41.36Z" }, - { url = "https://files.pythonhosted.org/packages/a2/a3/55cb3ff1b36f00df04439c3993d8529193cdf165a2467bf1402539070f16/coverage-7.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f22627c1fe2745ee98d3ab87679ca73a97e75ca75eb5faee48660d060875465f", size = 241534, upload-time = "2025-07-03T10:52:42.956Z" }, - { url = "https://files.pythonhosted.org/packages/eb/c9/a8410b91b6be4f6e9c2e9f0dce93749b6b40b751d7065b4410bf89cb654b/coverage-7.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b1c2d8363247b46bd51f393f86c94096e64a1cf6906803fa8d5a9d03784bdbf", size = 239408, upload-time = "2025-07-03T10:52:44.199Z" }, - { url = "https://files.pythonhosted.org/packages/ff/c4/6f3e56d467c612b9070ae71d5d3b114c0b899b5788e1ca3c93068ccb7018/coverage-7.9.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c10c882b114faf82dbd33e876d0cbd5e1d1ebc0d2a74ceef642c6152f3f4d547", size = 240552, upload-time = "2025-07-03T10:52:45.477Z" }, - { url = "https://files.pythonhosted.org/packages/fd/20/04eda789d15af1ce79bce5cc5fd64057c3a0ac08fd0576377a3096c24663/coverage-7.9.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:de3c0378bdf7066c3988d66cd5232d161e933b87103b014ab1b0b4676098fa45", size = 240464, upload-time = "2025-07-03T10:52:46.809Z" }, - { url = "https://files.pythonhosted.org/packages/a9/5a/217b32c94cc1a0b90f253514815332d08ec0812194a1ce9cca97dda1cd20/coverage-7.9.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1e2f097eae0e5991e7623958a24ced3282676c93c013dde41399ff63e230fcf2", size = 239134, upload-time = "2025-07-03T10:52:48.149Z" }, - { url = "https://files.pythonhosted.org/packages/34/73/1d019c48f413465eb5d3b6898b6279e87141c80049f7dbf73fd020138549/coverage-7.9.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28dc1f67e83a14e7079b6cea4d314bc8b24d1aed42d3582ff89c0295f09b181e", size = 239405, upload-time = "2025-07-03T10:52:49.687Z" }, - { url = "https://files.pythonhosted.org/packages/49/6c/a2beca7aa2595dad0c0d3f350382c381c92400efe5261e2631f734a0e3fe/coverage-7.9.2-cp310-cp310-win32.whl", hash = "sha256:bf7d773da6af9e10dbddacbf4e5cab13d06d0ed93561d44dae0188a42c65be7e", size = 214519, upload-time = "2025-07-03T10:52:51.036Z" }, - { url = "https://files.pythonhosted.org/packages/fc/c8/91e5e4a21f9a51e2c7cdd86e587ae01a4fcff06fc3fa8cde4d6f7cf68df4/coverage-7.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:0c0378ba787681ab1897f7c89b415bd56b0b2d9a47e5a3d8dc0ea55aac118d6c", size = 215400, upload-time = "2025-07-03T10:52:52.313Z" }, - { url = "https://files.pythonhosted.org/packages/39/40/916786453bcfafa4c788abee4ccd6f592b5b5eca0cd61a32a4e5a7ef6e02/coverage-7.9.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a7a56a2964a9687b6aba5b5ced6971af308ef6f79a91043c05dd4ee3ebc3e9ba", size = 212152, upload-time = "2025-07-03T10:52:53.562Z" }, - { url = "https://files.pythonhosted.org/packages/9f/66/cc13bae303284b546a030762957322bbbff1ee6b6cb8dc70a40f8a78512f/coverage-7.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:123d589f32c11d9be7fe2e66d823a236fe759b0096f5db3fb1b75b2fa414a4fa", size = 212540, upload-time = "2025-07-03T10:52:55.196Z" }, - { url = "https://files.pythonhosted.org/packages/0f/3c/d56a764b2e5a3d43257c36af4a62c379df44636817bb5f89265de4bf8bd7/coverage-7.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:333b2e0ca576a7dbd66e85ab402e35c03b0b22f525eed82681c4b866e2e2653a", size = 245097, upload-time = "2025-07-03T10:52:56.509Z" }, - { url = "https://files.pythonhosted.org/packages/b1/46/bd064ea8b3c94eb4ca5d90e34d15b806cba091ffb2b8e89a0d7066c45791/coverage-7.9.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:326802760da234baf9f2f85a39e4a4b5861b94f6c8d95251f699e4f73b1835dc", size = 242812, upload-time = "2025-07-03T10:52:57.842Z" }, - { url = "https://files.pythonhosted.org/packages/43/02/d91992c2b29bc7afb729463bc918ebe5f361be7f1daae93375a5759d1e28/coverage-7.9.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19e7be4cfec248df38ce40968c95d3952fbffd57b400d4b9bb580f28179556d2", size = 244617, upload-time = "2025-07-03T10:52:59.239Z" }, - { url = "https://files.pythonhosted.org/packages/b7/4f/8fadff6bf56595a16d2d6e33415841b0163ac660873ed9a4e9046194f779/coverage-7.9.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0b4a4cb73b9f2b891c1788711408ef9707666501ba23684387277ededab1097c", size = 244263, upload-time = "2025-07-03T10:53:00.601Z" }, - { url = "https://files.pythonhosted.org/packages/9b/d2/e0be7446a2bba11739edb9f9ba4eff30b30d8257370e237418eb44a14d11/coverage-7.9.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:2c8937fa16c8c9fbbd9f118588756e7bcdc7e16a470766a9aef912dd3f117dbd", size = 242314, upload-time = "2025-07-03T10:53:01.932Z" }, - { url = "https://files.pythonhosted.org/packages/9d/7d/dcbac9345000121b8b57a3094c2dfcf1ccc52d8a14a40c1d4bc89f936f80/coverage-7.9.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:42da2280c4d30c57a9b578bafd1d4494fa6c056d4c419d9689e66d775539be74", size = 242904, upload-time = "2025-07-03T10:53:03.478Z" }, - { url = "https://files.pythonhosted.org/packages/41/58/11e8db0a0c0510cf31bbbdc8caf5d74a358b696302a45948d7c768dfd1cf/coverage-7.9.2-cp311-cp311-win32.whl", hash = "sha256:14fa8d3da147f5fdf9d298cacc18791818f3f1a9f542c8958b80c228320e90c6", size = 214553, upload-time = "2025-07-03T10:53:05.174Z" }, - { url = "https://files.pythonhosted.org/packages/3a/7d/751794ec8907a15e257136e48dc1021b1f671220ecccfd6c4eaf30802714/coverage-7.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:549cab4892fc82004f9739963163fd3aac7a7b0df430669b75b86d293d2df2a7", size = 215441, upload-time = "2025-07-03T10:53:06.472Z" }, - { url = "https://files.pythonhosted.org/packages/62/5b/34abcedf7b946c1c9e15b44f326cb5b0da852885312b30e916f674913428/coverage-7.9.2-cp311-cp311-win_arm64.whl", hash = "sha256:c2667a2b913e307f06aa4e5677f01a9746cd08e4b35e14ebcde6420a9ebb4c62", size = 213873, upload-time = "2025-07-03T10:53:07.699Z" }, - { url = "https://files.pythonhosted.org/packages/53/d7/7deefc6fd4f0f1d4c58051f4004e366afc9e7ab60217ac393f247a1de70a/coverage-7.9.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ae9eb07f1cfacd9cfe8eaee6f4ff4b8a289a668c39c165cd0c8548484920ffc0", size = 212344, upload-time = "2025-07-03T10:53:09.3Z" }, - { url = "https://files.pythonhosted.org/packages/95/0c/ee03c95d32be4d519e6a02e601267769ce2e9a91fc8faa1b540e3626c680/coverage-7.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9ce85551f9a1119f02adc46d3014b5ee3f765deac166acf20dbb851ceb79b6f3", size = 212580, upload-time = "2025-07-03T10:53:11.52Z" }, - { url = "https://files.pythonhosted.org/packages/8b/9f/826fa4b544b27620086211b87a52ca67592622e1f3af9e0a62c87aea153a/coverage-7.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8f6389ac977c5fb322e0e38885fbbf901743f79d47f50db706e7644dcdcb6e1", size = 246383, upload-time = "2025-07-03T10:53:13.134Z" }, - { url = "https://files.pythonhosted.org/packages/7f/b3/4477aafe2a546427b58b9c540665feff874f4db651f4d3cb21b308b3a6d2/coverage-7.9.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff0d9eae8cdfcd58fe7893b88993723583a6ce4dfbfd9f29e001922544f95615", size = 243400, upload-time = "2025-07-03T10:53:14.614Z" }, - { url = "https://files.pythonhosted.org/packages/f8/c2/efffa43778490c226d9d434827702f2dfbc8041d79101a795f11cbb2cf1e/coverage-7.9.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fae939811e14e53ed8a9818dad51d434a41ee09df9305663735f2e2d2d7d959b", size = 245591, upload-time = "2025-07-03T10:53:15.872Z" }, - { url = "https://files.pythonhosted.org/packages/c6/e7/a59888e882c9a5f0192d8627a30ae57910d5d449c80229b55e7643c078c4/coverage-7.9.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:31991156251ec202c798501e0a42bbdf2169dcb0f137b1f5c0f4267f3fc68ef9", size = 245402, upload-time = "2025-07-03T10:53:17.124Z" }, - { url = "https://files.pythonhosted.org/packages/92/a5/72fcd653ae3d214927edc100ce67440ed8a0a1e3576b8d5e6d066ed239db/coverage-7.9.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d0d67963f9cbfc7c7f96d4ac74ed60ecbebd2ea6eeb51887af0f8dce205e545f", size = 243583, upload-time = "2025-07-03T10:53:18.781Z" }, - { url = "https://files.pythonhosted.org/packages/5c/f5/84e70e4df28f4a131d580d7d510aa1ffd95037293da66fd20d446090a13b/coverage-7.9.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:49b752a2858b10580969ec6af6f090a9a440a64a301ac1528d7ca5f7ed497f4d", size = 244815, upload-time = "2025-07-03T10:53:20.168Z" }, - { url = "https://files.pythonhosted.org/packages/39/e7/d73d7cbdbd09fdcf4642655ae843ad403d9cbda55d725721965f3580a314/coverage-7.9.2-cp312-cp312-win32.whl", hash = "sha256:88d7598b8ee130f32f8a43198ee02edd16d7f77692fa056cb779616bbea1b355", size = 214719, upload-time = "2025-07-03T10:53:21.521Z" }, - { url = "https://files.pythonhosted.org/packages/9f/d6/7486dcc3474e2e6ad26a2af2db7e7c162ccd889c4c68fa14ea8ec189c9e9/coverage-7.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:9dfb070f830739ee49d7c83e4941cc767e503e4394fdecb3b54bfdac1d7662c0", size = 215509, upload-time = "2025-07-03T10:53:22.853Z" }, - { url = "https://files.pythonhosted.org/packages/b7/34/0439f1ae2593b0346164d907cdf96a529b40b7721a45fdcf8b03c95fcd90/coverage-7.9.2-cp312-cp312-win_arm64.whl", hash = "sha256:4e2c058aef613e79df00e86b6d42a641c877211384ce5bd07585ed7ba71ab31b", size = 213910, upload-time = "2025-07-03T10:53:24.472Z" }, - { url = "https://files.pythonhosted.org/packages/94/9d/7a8edf7acbcaa5e5c489a646226bed9591ee1c5e6a84733c0140e9ce1ae1/coverage-7.9.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:985abe7f242e0d7bba228ab01070fde1d6c8fa12f142e43debe9ed1dde686038", size = 212367, upload-time = "2025-07-03T10:53:25.811Z" }, - { url = "https://files.pythonhosted.org/packages/e8/9e/5cd6f130150712301f7e40fb5865c1bc27b97689ec57297e568d972eec3c/coverage-7.9.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82c3939264a76d44fde7f213924021ed31f55ef28111a19649fec90c0f109e6d", size = 212632, upload-time = "2025-07-03T10:53:27.075Z" }, - { url = "https://files.pythonhosted.org/packages/a8/de/6287a2c2036f9fd991c61cefa8c64e57390e30c894ad3aa52fac4c1e14a8/coverage-7.9.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae5d563e970dbe04382f736ec214ef48103d1b875967c89d83c6e3f21706d5b3", size = 245793, upload-time = "2025-07-03T10:53:28.408Z" }, - { url = "https://files.pythonhosted.org/packages/06/cc/9b5a9961d8160e3cb0b558c71f8051fe08aa2dd4b502ee937225da564ed1/coverage-7.9.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bdd612e59baed2a93c8843c9a7cb902260f181370f1d772f4842987535071d14", size = 243006, upload-time = "2025-07-03T10:53:29.754Z" }, - { url = "https://files.pythonhosted.org/packages/49/d9/4616b787d9f597d6443f5588619c1c9f659e1f5fc9eebf63699eb6d34b78/coverage-7.9.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:256ea87cb2a1ed992bcdfc349d8042dcea1b80436f4ddf6e246d6bee4b5d73b6", size = 244990, upload-time = "2025-07-03T10:53:31.098Z" }, - { url = "https://files.pythonhosted.org/packages/48/83/801cdc10f137b2d02b005a761661649ffa60eb173dcdaeb77f571e4dc192/coverage-7.9.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f44ae036b63c8ea432f610534a2668b0c3aee810e7037ab9d8ff6883de480f5b", size = 245157, upload-time = "2025-07-03T10:53:32.717Z" }, - { url = "https://files.pythonhosted.org/packages/c8/a4/41911ed7e9d3ceb0ffb019e7635468df7499f5cc3edca5f7dfc078e9c5ec/coverage-7.9.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:82d76ad87c932935417a19b10cfe7abb15fd3f923cfe47dbdaa74ef4e503752d", size = 243128, upload-time = "2025-07-03T10:53:34.009Z" }, - { url = "https://files.pythonhosted.org/packages/10/41/344543b71d31ac9cb00a664d5d0c9ef134a0fe87cb7d8430003b20fa0b7d/coverage-7.9.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:619317bb86de4193debc712b9e59d5cffd91dc1d178627ab2a77b9870deb2868", size = 244511, upload-time = "2025-07-03T10:53:35.434Z" }, - { url = "https://files.pythonhosted.org/packages/d5/81/3b68c77e4812105e2a060f6946ba9e6f898ddcdc0d2bfc8b4b152a9ae522/coverage-7.9.2-cp313-cp313-win32.whl", hash = "sha256:0a07757de9feb1dfafd16ab651e0f628fd7ce551604d1bf23e47e1ddca93f08a", size = 214765, upload-time = "2025-07-03T10:53:36.787Z" }, - { url = "https://files.pythonhosted.org/packages/06/a2/7fac400f6a346bb1a4004eb2a76fbff0e242cd48926a2ce37a22a6a1d917/coverage-7.9.2-cp313-cp313-win_amd64.whl", hash = "sha256:115db3d1f4d3f35f5bb021e270edd85011934ff97c8797216b62f461dd69374b", size = 215536, upload-time = "2025-07-03T10:53:38.188Z" }, - { url = "https://files.pythonhosted.org/packages/08/47/2c6c215452b4f90d87017e61ea0fd9e0486bb734cb515e3de56e2c32075f/coverage-7.9.2-cp313-cp313-win_arm64.whl", hash = "sha256:48f82f889c80af8b2a7bb6e158d95a3fbec6a3453a1004d04e4f3b5945a02694", size = 213943, upload-time = "2025-07-03T10:53:39.492Z" }, - { url = "https://files.pythonhosted.org/packages/a3/46/e211e942b22d6af5e0f323faa8a9bc7c447a1cf1923b64c47523f36ed488/coverage-7.9.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:55a28954545f9d2f96870b40f6c3386a59ba8ed50caf2d949676dac3ecab99f5", size = 213088, upload-time = "2025-07-03T10:53:40.874Z" }, - { url = "https://files.pythonhosted.org/packages/d2/2f/762551f97e124442eccd907bf8b0de54348635b8866a73567eb4e6417acf/coverage-7.9.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cdef6504637731a63c133bb2e6f0f0214e2748495ec15fe42d1e219d1b133f0b", size = 213298, upload-time = "2025-07-03T10:53:42.218Z" }, - { url = "https://files.pythonhosted.org/packages/7a/b7/76d2d132b7baf7360ed69be0bcab968f151fa31abe6d067f0384439d9edb/coverage-7.9.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bcd5ebe66c7a97273d5d2ddd4ad0ed2e706b39630ed4b53e713d360626c3dbb3", size = 256541, upload-time = "2025-07-03T10:53:43.823Z" }, - { url = "https://files.pythonhosted.org/packages/a0/17/392b219837d7ad47d8e5974ce5f8dc3deb9f99a53b3bd4d123602f960c81/coverage-7.9.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9303aed20872d7a3c9cb39c5d2b9bdbe44e3a9a1aecb52920f7e7495410dfab8", size = 252761, upload-time = "2025-07-03T10:53:45.19Z" }, - { url = "https://files.pythonhosted.org/packages/d5/77/4256d3577fe1b0daa8d3836a1ebe68eaa07dd2cbaf20cf5ab1115d6949d4/coverage-7.9.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc18ea9e417a04d1920a9a76fe9ebd2f43ca505b81994598482f938d5c315f46", size = 254917, upload-time = "2025-07-03T10:53:46.931Z" }, - { url = "https://files.pythonhosted.org/packages/53/99/fc1a008eef1805e1ddb123cf17af864743354479ea5129a8f838c433cc2c/coverage-7.9.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6406cff19880aaaadc932152242523e892faff224da29e241ce2fca329866584", size = 256147, upload-time = "2025-07-03T10:53:48.289Z" }, - { url = "https://files.pythonhosted.org/packages/92/c0/f63bf667e18b7f88c2bdb3160870e277c4874ced87e21426128d70aa741f/coverage-7.9.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2d0d4f6ecdf37fcc19c88fec3e2277d5dee740fb51ffdd69b9579b8c31e4232e", size = 254261, upload-time = "2025-07-03T10:53:49.99Z" }, - { url = "https://files.pythonhosted.org/packages/8c/32/37dd1c42ce3016ff8ec9e4b607650d2e34845c0585d3518b2a93b4830c1a/coverage-7.9.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c33624f50cf8de418ab2b4d6ca9eda96dc45b2c4231336bac91454520e8d1fac", size = 255099, upload-time = "2025-07-03T10:53:51.354Z" }, - { url = "https://files.pythonhosted.org/packages/da/2e/af6b86f7c95441ce82f035b3affe1cd147f727bbd92f563be35e2d585683/coverage-7.9.2-cp313-cp313t-win32.whl", hash = "sha256:1df6b76e737c6a92210eebcb2390af59a141f9e9430210595251fbaf02d46926", size = 215440, upload-time = "2025-07-03T10:53:52.808Z" }, - { url = "https://files.pythonhosted.org/packages/4d/bb/8a785d91b308867f6b2e36e41c569b367c00b70c17f54b13ac29bcd2d8c8/coverage-7.9.2-cp313-cp313t-win_amd64.whl", hash = "sha256:f5fd54310b92741ebe00d9c0d1d7b2b27463952c022da6d47c175d246a98d1bd", size = 216537, upload-time = "2025-07-03T10:53:54.273Z" }, - { url = "https://files.pythonhosted.org/packages/1d/a0/a6bffb5e0f41a47279fd45a8f3155bf193f77990ae1c30f9c224b61cacb0/coverage-7.9.2-cp313-cp313t-win_arm64.whl", hash = "sha256:c48c2375287108c887ee87d13b4070a381c6537d30e8487b24ec721bf2a781cb", size = 214398, upload-time = "2025-07-03T10:53:56.715Z" }, - { url = "https://files.pythonhosted.org/packages/d7/85/f8bbefac27d286386961c25515431482a425967e23d3698b75a250872924/coverage-7.9.2-pp39.pp310.pp311-none-any.whl", hash = "sha256:8a1166db2fb62473285bcb092f586e081e92656c7dfa8e9f62b4d39d7e6b5050", size = 204013, upload-time = "2025-07-03T10:54:12.084Z" }, - { url = "https://files.pythonhosted.org/packages/3c/38/bbe2e63902847cf79036ecc75550d0698af31c91c7575352eb25190d0fb3/coverage-7.9.2-py3-none-any.whl", hash = "sha256:e425cd5b00f6fc0ed7cdbd766c70be8baab4b7839e4d4fe5fac48581dd968ea4", size = 204005, upload-time = "2025-07-03T10:54:13.491Z" }, + { url = "https://files.pythonhosted.org/packages/ef/e7/0f4e35a15361337529df88151bddcac8e8f6d6fd01da94a4b7588901c2fe/coverage-7.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1c86eb388bbd609d15560e7cc0eb936c102b6f43f31cf3e58b4fd9afe28e1372", size = 214627, upload-time = "2025-07-27T14:11:01.211Z" }, + { url = "https://files.pythonhosted.org/packages/e0/fd/17872e762c408362072c936dbf3ca28c67c609a1f5af434b1355edcb7e12/coverage-7.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6b4ba0f488c1bdb6bd9ba81da50715a372119785458831c73428a8566253b86b", size = 215015, upload-time = "2025-07-27T14:11:03.988Z" }, + { url = "https://files.pythonhosted.org/packages/54/50/c9d445ba38ee5f685f03876c0f8223469e2e46c5d3599594dca972b470c8/coverage-7.10.1-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:083442ecf97d434f0cb3b3e3676584443182653da08b42e965326ba12d6b5f2a", size = 241995, upload-time = "2025-07-27T14:11:05.983Z" }, + { url = "https://files.pythonhosted.org/packages/cc/83/4ae6e0f60376af33de543368394d21b9ac370dc86434039062ef171eebf8/coverage-7.10.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c1a40c486041006b135759f59189385da7c66d239bad897c994e18fd1d0c128f", size = 243253, upload-time = "2025-07-27T14:11:07.424Z" }, + { url = "https://files.pythonhosted.org/packages/49/90/17a4d9ac7171be364ce8c0bb2b6da05e618ebfe1f11238ad4f26c99f5467/coverage-7.10.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3beb76e20b28046989300c4ea81bf690df84ee98ade4dc0bbbf774a28eb98440", size = 245110, upload-time = "2025-07-27T14:11:09.152Z" }, + { url = "https://files.pythonhosted.org/packages/e1/f7/edc3f485d536ed417f3af2b4969582bcb5fab456241721825fa09354161e/coverage-7.10.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bc265a7945e8d08da28999ad02b544963f813a00f3ed0a7a0ce4165fd77629f8", size = 243056, upload-time = "2025-07-27T14:11:10.586Z" }, + { url = "https://files.pythonhosted.org/packages/58/2c/c4c316a57718556b8d0cc8304437741c31b54a62934e7c8c551a7915c2f4/coverage-7.10.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:47c91f32ba4ac46f1e224a7ebf3f98b4b24335bad16137737fe71a5961a0665c", size = 241731, upload-time = "2025-07-27T14:11:12.145Z" }, + { url = "https://files.pythonhosted.org/packages/f7/93/c78e144c6f086043d0d7d9237c5b880e71ac672ed2712c6f8cca5544481f/coverage-7.10.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1a108dd78ed185020f66f131c60078f3fae3f61646c28c8bb4edd3fa121fc7fc", size = 242023, upload-time = "2025-07-27T14:11:13.573Z" }, + { url = "https://files.pythonhosted.org/packages/8f/e1/34e8505ca81fc144a612e1cc79fadd4a78f42e96723875f4e9f1f470437e/coverage-7.10.1-cp310-cp310-win32.whl", hash = "sha256:7092cc82382e634075cc0255b0b69cb7cada7c1f249070ace6a95cb0f13548ef", size = 217130, upload-time = "2025-07-27T14:11:15.11Z" }, + { url = "https://files.pythonhosted.org/packages/75/2b/82adfce6edffc13d804aee414e64c0469044234af9296e75f6d13f92f6a2/coverage-7.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:ac0c5bba938879c2fc0bc6c1b47311b5ad1212a9dcb8b40fe2c8110239b7faed", size = 218015, upload-time = "2025-07-27T14:11:16.836Z" }, + { url = "https://files.pythonhosted.org/packages/20/8e/ef088112bd1b26e2aa931ee186992b3e42c222c64f33e381432c8ee52aae/coverage-7.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b45e2f9d5b0b5c1977cb4feb5f594be60eb121106f8900348e29331f553a726f", size = 214747, upload-time = "2025-07-27T14:11:18.217Z" }, + { url = "https://files.pythonhosted.org/packages/2d/76/a1e46f3c6e0897758eb43af88bb3c763cb005f4950769f7b553e22aa5f89/coverage-7.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3a7a4d74cb0f5e3334f9aa26af7016ddb94fb4bfa11b4a573d8e98ecba8c34f1", size = 215128, upload-time = "2025-07-27T14:11:19.706Z" }, + { url = "https://files.pythonhosted.org/packages/78/4d/903bafb371a8c887826ecc30d3977b65dfad0e1e66aa61b7e173de0828b0/coverage-7.10.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d4b0aab55ad60ead26159ff12b538c85fbab731a5e3411c642b46c3525863437", size = 245140, upload-time = "2025-07-27T14:11:21.261Z" }, + { url = "https://files.pythonhosted.org/packages/55/f1/1f8f09536f38394a8698dd08a0e9608a512eacee1d3b771e2d06397f77bf/coverage-7.10.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:dcc93488c9ebd229be6ee1f0d9aad90da97b33ad7e2912f5495804d78a3cd6b7", size = 246977, upload-time = "2025-07-27T14:11:23.15Z" }, + { url = "https://files.pythonhosted.org/packages/57/cc/ed6bbc5a3bdb36ae1bca900bbbfdcb23b260ef2767a7b2dab38b92f61adf/coverage-7.10.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa309df995d020f3438407081b51ff527171cca6772b33cf8f85344b8b4b8770", size = 249140, upload-time = "2025-07-27T14:11:24.743Z" }, + { url = "https://files.pythonhosted.org/packages/10/f5/e881ade2d8e291b60fa1d93d6d736107e940144d80d21a0d4999cff3642f/coverage-7.10.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cfb8b9d8855c8608f9747602a48ab525b1d320ecf0113994f6df23160af68262", size = 246869, upload-time = "2025-07-27T14:11:26.156Z" }, + { url = "https://files.pythonhosted.org/packages/53/b9/6a5665cb8996e3cd341d184bb11e2a8edf01d8dadcf44eb1e742186cf243/coverage-7.10.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:320d86da829b012982b414c7cdda65f5d358d63f764e0e4e54b33097646f39a3", size = 244899, upload-time = "2025-07-27T14:11:27.622Z" }, + { url = "https://files.pythonhosted.org/packages/27/11/24156776709c4e25bf8a33d6bb2ece9a9067186ddac19990f6560a7f8130/coverage-7.10.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dc60ddd483c556590da1d9482a4518292eec36dd0e1e8496966759a1f282bcd0", size = 245507, upload-time = "2025-07-27T14:11:29.544Z" }, + { url = "https://files.pythonhosted.org/packages/43/db/a6f0340b7d6802a79928659c9a32bc778ea420e87a61b568d68ac36d45a8/coverage-7.10.1-cp311-cp311-win32.whl", hash = "sha256:4fcfe294f95b44e4754da5b58be750396f2b1caca8f9a0e78588e3ef85f8b8be", size = 217167, upload-time = "2025-07-27T14:11:31.349Z" }, + { url = "https://files.pythonhosted.org/packages/f5/6f/1990eb4fd05cea4cfabdf1d587a997ac5f9a8bee883443a1d519a2a848c9/coverage-7.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:efa23166da3fe2915f8ab452dde40319ac84dc357f635737174a08dbd912980c", size = 218054, upload-time = "2025-07-27T14:11:33.202Z" }, + { url = "https://files.pythonhosted.org/packages/b4/4d/5e061d6020251b20e9b4303bb0b7900083a1a384ec4e5db326336c1c4abd/coverage-7.10.1-cp311-cp311-win_arm64.whl", hash = "sha256:d12b15a8c3759e2bb580ffa423ae54be4f184cf23beffcbd641f4fe6e1584293", size = 216483, upload-time = "2025-07-27T14:11:34.663Z" }, + { url = "https://files.pythonhosted.org/packages/a5/3f/b051feeb292400bd22d071fdf933b3ad389a8cef5c80c7866ed0c7414b9e/coverage-7.10.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6b7dc7f0a75a7eaa4584e5843c873c561b12602439d2351ee28c7478186c4da4", size = 214934, upload-time = "2025-07-27T14:11:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/f8/e4/a61b27d5c4c2d185bdfb0bfe9d15ab4ac4f0073032665544507429ae60eb/coverage-7.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:607f82389f0ecafc565813aa201a5cade04f897603750028dd660fb01797265e", size = 215173, upload-time = "2025-07-27T14:11:38.005Z" }, + { url = "https://files.pythonhosted.org/packages/8a/01/40a6ee05b60d02d0bc53742ad4966e39dccd450aafb48c535a64390a3552/coverage-7.10.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f7da31a1ba31f1c1d4d5044b7c5813878adae1f3af8f4052d679cc493c7328f4", size = 246190, upload-time = "2025-07-27T14:11:39.887Z" }, + { url = "https://files.pythonhosted.org/packages/11/ef/a28d64d702eb583c377255047281305dc5a5cfbfb0ee36e721f78255adb6/coverage-7.10.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:51fe93f3fe4f5d8483d51072fddc65e717a175490804e1942c975a68e04bf97a", size = 248618, upload-time = "2025-07-27T14:11:41.841Z" }, + { url = "https://files.pythonhosted.org/packages/6a/ad/73d018bb0c8317725370c79d69b5c6e0257df84a3b9b781bda27a438a3be/coverage-7.10.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3e59d00830da411a1feef6ac828b90bbf74c9b6a8e87b8ca37964925bba76dbe", size = 250081, upload-time = "2025-07-27T14:11:43.705Z" }, + { url = "https://files.pythonhosted.org/packages/2d/dd/496adfbbb4503ebca5d5b2de8bed5ec00c0a76558ffc5b834fd404166bc9/coverage-7.10.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:924563481c27941229cb4e16eefacc35da28563e80791b3ddc5597b062a5c386", size = 247990, upload-time = "2025-07-27T14:11:45.244Z" }, + { url = "https://files.pythonhosted.org/packages/18/3c/a9331a7982facfac0d98a4a87b36ae666fe4257d0f00961a3a9ef73e015d/coverage-7.10.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ca79146ee421b259f8131f153102220b84d1a5e6fb9c8aed13b3badfd1796de6", size = 246191, upload-time = "2025-07-27T14:11:47.093Z" }, + { url = "https://files.pythonhosted.org/packages/62/0c/75345895013b83f7afe92ec595e15a9a525ede17491677ceebb2ba5c3d85/coverage-7.10.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2b225a06d227f23f386fdc0eab471506d9e644be699424814acc7d114595495f", size = 247400, upload-time = "2025-07-27T14:11:48.643Z" }, + { url = "https://files.pythonhosted.org/packages/e2/a9/98b268cfc5619ef9df1d5d34fee408ecb1542d9fd43d467e5c2f28668cd4/coverage-7.10.1-cp312-cp312-win32.whl", hash = "sha256:5ba9a8770effec5baaaab1567be916c87d8eea0c9ad11253722d86874d885eca", size = 217338, upload-time = "2025-07-27T14:11:50.258Z" }, + { url = "https://files.pythonhosted.org/packages/fe/31/22a5440e4d1451f253c5cd69fdcead65e92ef08cd4ec237b8756dc0b20a7/coverage-7.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:9eb245a8d8dd0ad73b4062135a251ec55086fbc2c42e0eb9725a9b553fba18a3", size = 218125, upload-time = "2025-07-27T14:11:52.034Z" }, + { url = "https://files.pythonhosted.org/packages/d6/2b/40d9f0ce7ee839f08a43c5bfc9d05cec28aaa7c9785837247f96cbe490b9/coverage-7.10.1-cp312-cp312-win_arm64.whl", hash = "sha256:7718060dd4434cc719803a5e526838a5d66e4efa5dc46d2b25c21965a9c6fcc4", size = 216523, upload-time = "2025-07-27T14:11:53.965Z" }, + { url = "https://files.pythonhosted.org/packages/ef/72/135ff5fef09b1ffe78dbe6fcf1e16b2e564cd35faeacf3d63d60d887f12d/coverage-7.10.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ebb08d0867c5a25dffa4823377292a0ffd7aaafb218b5d4e2e106378b1061e39", size = 214960, upload-time = "2025-07-27T14:11:55.959Z" }, + { url = "https://files.pythonhosted.org/packages/b1/aa/73a5d1a6fc08ca709a8177825616aa95ee6bf34d522517c2595484a3e6c9/coverage-7.10.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f32a95a83c2e17422f67af922a89422cd24c6fa94041f083dd0bb4f6057d0bc7", size = 215220, upload-time = "2025-07-27T14:11:57.899Z" }, + { url = "https://files.pythonhosted.org/packages/8d/40/3124fdd45ed3772a42fc73ca41c091699b38a2c3bd4f9cb564162378e8b6/coverage-7.10.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c4c746d11c8aba4b9f58ca8bfc6fbfd0da4efe7960ae5540d1a1b13655ee8892", size = 245772, upload-time = "2025-07-27T14:12:00.422Z" }, + { url = "https://files.pythonhosted.org/packages/42/62/a77b254822efa8c12ad59e8039f2bc3df56dc162ebda55e1943e35ba31a5/coverage-7.10.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7f39edd52c23e5c7ed94e0e4bf088928029edf86ef10b95413e5ea670c5e92d7", size = 248116, upload-time = "2025-07-27T14:12:03.099Z" }, + { url = "https://files.pythonhosted.org/packages/1d/01/8101f062f472a3a6205b458d18ef0444a63ae5d36a8a5ed5dd0f6167f4db/coverage-7.10.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab6e19b684981d0cd968906e293d5628e89faacb27977c92f3600b201926b994", size = 249554, upload-time = "2025-07-27T14:12:04.668Z" }, + { url = "https://files.pythonhosted.org/packages/8f/7b/e51bc61573e71ff7275a4f167aecbd16cb010aefdf54bcd8b0a133391263/coverage-7.10.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5121d8cf0eacb16133501455d216bb5f99899ae2f52d394fe45d59229e6611d0", size = 247766, upload-time = "2025-07-27T14:12:06.234Z" }, + { url = "https://files.pythonhosted.org/packages/4b/71/1c96d66a51d4204a9d6d12df53c4071d87e110941a2a1fe94693192262f5/coverage-7.10.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:df1c742ca6f46a6f6cbcaef9ac694dc2cb1260d30a6a2f5c68c5f5bcfee1cfd7", size = 245735, upload-time = "2025-07-27T14:12:08.305Z" }, + { url = "https://files.pythonhosted.org/packages/13/d5/efbc2ac4d35ae2f22ef6df2ca084c60e13bd9378be68655e3268c80349ab/coverage-7.10.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:40f9a38676f9c073bf4b9194707aa1eb97dca0e22cc3766d83879d72500132c7", size = 247118, upload-time = "2025-07-27T14:12:09.903Z" }, + { url = "https://files.pythonhosted.org/packages/d1/22/073848352bec28ca65f2b6816b892fcf9a31abbef07b868487ad15dd55f1/coverage-7.10.1-cp313-cp313-win32.whl", hash = "sha256:2348631f049e884839553b9974f0821d39241c6ffb01a418efce434f7eba0fe7", size = 217381, upload-time = "2025-07-27T14:12:11.535Z" }, + { url = "https://files.pythonhosted.org/packages/b7/df/df6a0ff33b042f000089bd11b6bb034bab073e2ab64a56e78ed882cba55d/coverage-7.10.1-cp313-cp313-win_amd64.whl", hash = "sha256:4072b31361b0d6d23f750c524f694e1a417c1220a30d3ef02741eed28520c48e", size = 218152, upload-time = "2025-07-27T14:12:13.182Z" }, + { url = "https://files.pythonhosted.org/packages/30/e3/5085ca849a40ed6b47cdb8f65471c2f754e19390b5a12fa8abd25cbfaa8f/coverage-7.10.1-cp313-cp313-win_arm64.whl", hash = "sha256:3e31dfb8271937cab9425f19259b1b1d1f556790e98eb266009e7a61d337b6d4", size = 216559, upload-time = "2025-07-27T14:12:14.807Z" }, + { url = "https://files.pythonhosted.org/packages/cc/93/58714efbfdeb547909feaabe1d67b2bdd59f0597060271b9c548d5efb529/coverage-7.10.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1c4f679c6b573a5257af6012f167a45be4c749c9925fd44d5178fd641ad8bf72", size = 215677, upload-time = "2025-07-27T14:12:16.68Z" }, + { url = "https://files.pythonhosted.org/packages/c0/0c/18eaa5897e7e8cb3f8c45e563e23e8a85686b4585e29d53cacb6bc9cb340/coverage-7.10.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:871ebe8143da284bd77b84a9136200bd638be253618765d21a1fce71006d94af", size = 215899, upload-time = "2025-07-27T14:12:18.758Z" }, + { url = "https://files.pythonhosted.org/packages/84/c1/9d1affacc3c75b5a184c140377701bbf14fc94619367f07a269cd9e4fed6/coverage-7.10.1-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:998c4751dabf7d29b30594af416e4bf5091f11f92a8d88eb1512c7ba136d1ed7", size = 257140, upload-time = "2025-07-27T14:12:20.357Z" }, + { url = "https://files.pythonhosted.org/packages/3d/0f/339bc6b8fa968c346df346068cca1f24bdea2ddfa93bb3dc2e7749730962/coverage-7.10.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:780f750a25e7749d0af6b3631759c2c14f45de209f3faaa2398312d1c7a22759", size = 259005, upload-time = "2025-07-27T14:12:22.007Z" }, + { url = "https://files.pythonhosted.org/packages/c8/22/89390864b92ea7c909079939b71baba7e5b42a76bf327c1d615bd829ba57/coverage-7.10.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:590bdba9445df4763bdbebc928d8182f094c1f3947a8dc0fc82ef014dbdd8324", size = 261143, upload-time = "2025-07-27T14:12:23.746Z" }, + { url = "https://files.pythonhosted.org/packages/2c/56/3d04d89017c0c41c7a71bd69b29699d919b6bbf2649b8b2091240b97dd6a/coverage-7.10.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b2df80cb6a2af86d300e70acb82e9b79dab2c1e6971e44b78dbfc1a1e736b53", size = 258735, upload-time = "2025-07-27T14:12:25.73Z" }, + { url = "https://files.pythonhosted.org/packages/cb/40/312252c8afa5ca781063a09d931f4b9409dc91526cd0b5a2b84143ffafa2/coverage-7.10.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d6a558c2725bfb6337bf57c1cd366c13798bfd3bfc9e3dd1f4a6f6fc95a4605f", size = 256871, upload-time = "2025-07-27T14:12:27.767Z" }, + { url = "https://files.pythonhosted.org/packages/1f/2b/564947d5dede068215aaddb9e05638aeac079685101462218229ddea9113/coverage-7.10.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e6150d167f32f2a54690e572e0a4c90296fb000a18e9b26ab81a6489e24e78dd", size = 257692, upload-time = "2025-07-27T14:12:29.347Z" }, + { url = "https://files.pythonhosted.org/packages/93/1b/c8a867ade85cb26d802aea2209b9c2c80613b9c122baa8c8ecea6799648f/coverage-7.10.1-cp313-cp313t-win32.whl", hash = "sha256:d946a0c067aa88be4a593aad1236493313bafaa27e2a2080bfe88db827972f3c", size = 218059, upload-time = "2025-07-27T14:12:31.076Z" }, + { url = "https://files.pythonhosted.org/packages/a1/fe/cd4ab40570ae83a516bf5e754ea4388aeedd48e660e40c50b7713ed4f930/coverage-7.10.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e37c72eaccdd5ed1130c67a92ad38f5b2af66eeff7b0abe29534225db2ef7b18", size = 219150, upload-time = "2025-07-27T14:12:32.746Z" }, + { url = "https://files.pythonhosted.org/packages/8d/16/6e5ed5854be6d70d0c39e9cb9dd2449f2c8c34455534c32c1a508c7dbdb5/coverage-7.10.1-cp313-cp313t-win_arm64.whl", hash = "sha256:89ec0ffc215c590c732918c95cd02b55c7d0f569d76b90bb1a5e78aa340618e4", size = 217014, upload-time = "2025-07-27T14:12:34.406Z" }, + { url = "https://files.pythonhosted.org/packages/0f/64/922899cff2c0fd3496be83fa8b81230f5a8d82a2ad30f98370b133c2c83b/coverage-7.10.1-py3-none-any.whl", hash = "sha256:fa2a258aa6bf188eb9a8948f7102a83da7c430a0dce918dbd8b60ef8fcb772d7", size = 206597, upload-time = "2025-07-27T14:13:37.221Z" }, ] [package.optional-dependencies] @@ -3682,16 +3678,16 @@ wheels = [ [[package]] name = "hypothesis" -version = "6.136.3" +version = "6.136.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, { name = "sortedcontainers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/84/cd/714955a6ac2f4b786483a38f0b13e37e7efad37d23a2dc465b91a9fe2db0/hypothesis-6.136.3.tar.gz", hash = "sha256:89baa2bfc5af38f939e83b62f9f0e7e6407e81cade29cfcb3eafbc661177c2bd", size = 457758, upload-time = "2025-07-23T13:53:25.8Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/ec/83b5a33cef752a883048c1b1a3c3416abd0fdec0d11f3517ef82283873ef/hypothesis-6.136.5.tar.gz", hash = "sha256:590e9b986882d145c84490b64318d07439e81b06bd150164e197e1eb6a09bf7e", size = 457866, upload-time = "2025-07-28T04:30:17.591Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/de/d2145a3c329751b13fcfddfe119e5ec72e263c2766d1f655dedc3ed33a0d/hypothesis-6.136.3-py3-none-any.whl", hash = "sha256:88163307c625688317bc5f3c7bd88f18b4d5c7cd773c784e3c4182eed2ae1b3d", size = 524703, upload-time = "2025-07-23T13:53:22.384Z" }, + { url = "https://files.pythonhosted.org/packages/22/ce/3a7a8b22e16b767aa7861d0cf57ac12d21fef31c10cbb492a48c873dd962/hypothesis-6.136.5-py3-none-any.whl", hash = "sha256:101ff06e31f5f97c82a7829bc7682093c06b964a17779fa8c4ae9c72d7a860af", size = 524756, upload-time = "2025-07-28T04:30:13.858Z" }, ] [[package]] @@ -11038,11 +11034,11 @@ wheels = [ [[package]] name = "types-pywin32" -version = "311.0.0.20250723" +version = "311.0.0.20250728" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fa/14/3f12d6d6b4bf9699b6b2988dece12f1d7f065d21c80e4be565410028f7da/types_pywin32-311.0.0.20250723.tar.gz", hash = "sha256:95c28cae88d3a229b5793c9d2d5661c2da6f4dde8ad73ff243934e84371d3c0f", size = 328590, upload-time = "2025-07-23T03:24:08.044Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3a/b8/790e266e88b3b408b2910d7e4859c2820a16629130585fe05b833bcb5eda/types_pywin32-311.0.0.20250728.tar.gz", hash = "sha256:babee600f62c742306e4a9d4621f1cbfe44d57f904148b5cd363cced8b268d3f", size = 328445, upload-time = "2025-07-28T03:29:12.28Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/de/35/c5358b61cafc9394c919120d588f94ddfd2f9bcb7b486fce6466a7044904/types_pywin32-311.0.0.20250723-py3-none-any.whl", hash = "sha256:9c969fc4e4a0bdab61035c72baaef2ec64c2ada9cdcebe46af38ef5139f07434", size = 390486, upload-time = "2025-07-23T03:24:06.592Z" }, + { url = "https://files.pythonhosted.org/packages/b3/9c/e8ad1e7b0e041858320197a8fa9dcc354ac1ad859985d6a461d33bdc91fa/types_pywin32-311.0.0.20250728-py3-none-any.whl", hash = "sha256:a7a241447aecaf4623ca230c7bd664463bed1d6db0dde8329d615f076fb80761", size = 390427, upload-time = "2025-07-28T03:29:10.909Z" }, ] [[package]] From 3d601ffa795614435ede27044945140c99e2bfc9 Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Thu, 14 Aug 2025 16:41:15 -0400 Subject: [PATCH 13/86] chore: bump langflow version (#9402) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * bug: re-add NVIDIA Remix starter template (#8994) feat: add NVIDIA Remix starter template Introduced a new JSON configuration file for the NVIDIA Remix starter project, defining nodes and edges for chat input, output, and agent interactions. This setup enhances the integration of the NVIDIA RTX Remix Toolkit REST API, providing a structured approach for building chat-based applications. Co-authored-by: Gabriel Luiz Freitas Almeida * chore(release): bump to 1.5.0.post2 * bug: re-add NVIDIA Remix starter template (#8994) feat: add NVIDIA Remix starter template Introduced a new JSON configuration file for the NVIDIA Remix starter project, defining nodes and edges for chat input, output, and agent interactions. This setup enhances the integration of the NVIDIA RTX Remix Toolkit REST API, providing a structured approach for building chat-based applications. Co-authored-by: Gabriel Luiz Freitas Almeida * Bump langflow-base version to 0.5.0.post2 Updated the langflow-base package version in pyproject.toml and uv.lock to 0.5.0.post2 for release or deployment purposes. --------- Co-authored-by: Sebastián Estévez Co-authored-by: Gabriel Luiz Freitas Almeida --- pyproject.toml | 2 +- src/backend/base/pyproject.toml | 2 +- uv.lock | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9079c8186..cfbe32656 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "langflow" -version = "1.5.0.post1" +version = "1.5.0.post2" description = "A Python package with a built-in web application" requires-python = ">=3.10,<3.14" license = "MIT" diff --git a/src/backend/base/pyproject.toml b/src/backend/base/pyproject.toml index 0fef9889e..6e406b7f0 100644 --- a/src/backend/base/pyproject.toml +++ b/src/backend/base/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "langflow-base" -version = "0.5.0.post1" +version = "0.5.0.post2" description = "A Python package with a built-in web application" requires-python = ">=3.10,<3.14" license = "MIT" diff --git a/uv.lock b/uv.lock index d7961afa7..78ea3aa2d 100644 --- a/uv.lock +++ b/uv.lock @@ -4827,7 +4827,7 @@ wheels = [ [[package]] name = "langflow" -version = "1.5.0.post1" +version = "1.5.0.post2" source = { editable = "." } dependencies = [ { name = "aiofile" }, @@ -5200,7 +5200,7 @@ dev = [ [[package]] name = "langflow-base" -version = "0.5.0.post1" +version = "0.5.0.post2" source = { editable = "src/backend/base" } dependencies = [ { name = "aiofile" }, From 226c71bb7ca8c4deb862b387850793cb2ee7beae Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Thu, 14 Aug 2025 14:19:42 -0700 Subject: [PATCH 14/86] fix: Allow connections to save file output (#9386) * fix: Allow connections to save file output * Update save_file.py * [autofix.ci] apply automated fixes * Change name of output --------- Co-authored-by: Edwin Jose Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- .../base/langflow/components/processing/save_file.py | 2 +- .../initial_setup/starter_projects/News Aggregator.json | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/backend/base/langflow/components/processing/save_file.py b/src/backend/base/langflow/components/processing/save_file.py index fc0f1de81..55526ff3a 100644 --- a/src/backend/base/langflow/components/processing/save_file.py +++ b/src/backend/base/langflow/components/processing/save_file.py @@ -53,7 +53,7 @@ class SaveToFileComponent(Component): ), ] - outputs = [Output(display_name="File Path", name="result", method="save_to_file")] + outputs = [Output(display_name="File Path", name="message", method="save_to_file")] async def save_to_file(self) -> Message: """Save the input to a file and upload it, returning a confirmation message.""" diff --git a/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json b/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json index 5f7417c8f..dff88a621 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json @@ -1208,7 +1208,7 @@ "legacy": false, "lf_version": "1.4.3", "metadata": { - "code_hash": "6f244023207e", + "code_hash": "9134859cf24d", "module": "langflow.components.processing.save_file.SaveToFileComponent" }, "minimized": false, @@ -1220,8 +1220,8 @@ "display_name": "File Path", "group_outputs": false, "method": "save_to_file", - "name": "result", - "selected": "Text", + "name": "message", + "selected": "Message", "tool_mode": true, "types": [ "Message" @@ -1248,7 +1248,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import json\nfrom collections.abc import AsyncIterator, Iterator\nfrom pathlib import Path\n\nimport orjson\nimport pandas as pd\nfrom fastapi import UploadFile\nfrom fastapi.encoders import jsonable_encoder\n\nfrom langflow.api.v2.files import upload_user_file\nfrom langflow.custom import Component\nfrom langflow.io import DropdownInput, HandleInput, StrInput\nfrom langflow.schema import Data, DataFrame, Message\nfrom langflow.services.auth.utils import create_user_longterm_token\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom langflow.services.deps import get_session, get_settings_service, get_storage_service\nfrom langflow.template.field.base import Output\n\n\nclass SaveToFileComponent(Component):\n display_name = \"Save File\"\n description = \"Save data to a local file in the selected format.\"\n documentation: str = \"https://docs.langflow.org/components-processing#save-file\"\n icon = \"save\"\n name = \"SaveToFile\"\n\n # File format options for different types\n DATA_FORMAT_CHOICES = [\"csv\", \"excel\", \"json\", \"markdown\"]\n MESSAGE_FORMAT_CHOICES = [\"txt\", \"json\", \"markdown\"]\n\n inputs = [\n HandleInput(\n name=\"input\",\n display_name=\"Input\",\n info=\"The input to save.\",\n dynamic=True,\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n StrInput(\n name=\"file_name\",\n display_name=\"File Name\",\n info=\"Name file will be saved as (without extension).\",\n required=True,\n ),\n DropdownInput(\n name=\"file_format\",\n display_name=\"File Format\",\n options=list(dict.fromkeys(DATA_FORMAT_CHOICES + MESSAGE_FORMAT_CHOICES)),\n info=\"Select the file format to save the input. If not provided, the default format will be used.\",\n value=\"\",\n advanced=True,\n ),\n ]\n\n outputs = [Output(display_name=\"File Path\", name=\"result\", method=\"save_to_file\")]\n\n async def save_to_file(self) -> Message:\n \"\"\"Save the input to a file and upload it, returning a confirmation message.\"\"\"\n # Validate inputs\n if not self.file_name:\n msg = \"File name must be provided.\"\n raise ValueError(msg)\n if not self._get_input_type():\n msg = \"Input type is not set.\"\n raise ValueError(msg)\n\n # Validate file format based on input type\n file_format = self.file_format or self._get_default_format()\n allowed_formats = (\n self.MESSAGE_FORMAT_CHOICES if self._get_input_type() == \"Message\" else self.DATA_FORMAT_CHOICES\n )\n if file_format not in allowed_formats:\n msg = f\"Invalid file format '{file_format}' for {self._get_input_type()}. Allowed: {allowed_formats}\"\n raise ValueError(msg)\n\n # Prepare file path\n file_path = Path(self.file_name).expanduser()\n if not file_path.parent.exists():\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path = self._adjust_file_path_with_format(file_path, file_format)\n\n # Save the input to file based on type\n if self._get_input_type() == \"DataFrame\":\n confirmation = self._save_dataframe(self.input, file_path, file_format)\n elif self._get_input_type() == \"Data\":\n confirmation = self._save_data(self.input, file_path, file_format)\n elif self._get_input_type() == \"Message\":\n confirmation = await self._save_message(self.input, file_path, file_format)\n else:\n msg = f\"Unsupported input type: {self._get_input_type()}\"\n raise ValueError(msg)\n\n # Upload the saved file\n await self._upload_file(file_path)\n\n # Return the final file path and confirmation message\n final_path = Path.cwd() / file_path if not file_path.is_absolute() else file_path\n\n return Message(text=f\"{confirmation} at {final_path}\")\n\n def _get_input_type(self) -> str:\n \"\"\"Determine the input type based on the provided input.\"\"\"\n # Use exact type checking (type() is) instead of isinstance() to avoid inheritance issues.\n # Since Message inherits from Data, isinstance(message, Data) would return True for Message objects,\n # causing Message inputs to be incorrectly identified as Data type.\n if type(self.input) is DataFrame:\n return \"DataFrame\"\n if type(self.input) is Message:\n return \"Message\"\n if type(self.input) is Data:\n return \"Data\"\n msg = f\"Unsupported input type: {type(self.input)}\"\n raise ValueError(msg)\n\n def _get_default_format(self) -> str:\n \"\"\"Return the default file format based on input type.\"\"\"\n if self._get_input_type() == \"DataFrame\":\n return \"csv\"\n if self._get_input_type() == \"Data\":\n return \"json\"\n if self._get_input_type() == \"Message\":\n return \"json\"\n return \"json\" # Fallback\n\n def _adjust_file_path_with_format(self, path: Path, fmt: str) -> Path:\n \"\"\"Adjust the file path to include the correct extension.\"\"\"\n file_extension = path.suffix.lower().lstrip(\".\")\n if fmt == \"excel\":\n return Path(f\"{path}.xlsx\").expanduser() if file_extension not in [\"xlsx\", \"xls\"] else path\n return Path(f\"{path}.{fmt}\").expanduser() if file_extension != fmt else path\n\n async def _upload_file(self, file_path: Path) -> None:\n \"\"\"Upload the saved file using the upload_user_file service.\"\"\"\n if not file_path.exists():\n msg = f\"File not found: {file_path}\"\n raise FileNotFoundError(msg)\n\n with file_path.open(\"rb\") as f:\n async for db in get_session():\n user_id, _ = await create_user_longterm_token(db)\n current_user = await get_user_by_id(db, user_id)\n\n await upload_user_file(\n file=UploadFile(filename=file_path.name, file=f, size=file_path.stat().st_size),\n session=db,\n current_user=current_user,\n storage_service=get_storage_service(),\n settings_service=get_settings_service(),\n )\n\n def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str:\n \"\"\"Save a DataFrame to the specified file format.\"\"\"\n if fmt == \"csv\":\n dataframe.to_csv(path, index=False)\n elif fmt == \"excel\":\n dataframe.to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n dataframe.to_json(path, orient=\"records\", indent=2)\n elif fmt == \"markdown\":\n path.write_text(dataframe.to_markdown(index=False), encoding=\"utf-8\")\n else:\n msg = f\"Unsupported DataFrame format: {fmt}\"\n raise ValueError(msg)\n return f\"DataFrame saved successfully as '{path}'\"\n\n def _save_data(self, data: Data, path: Path, fmt: str) -> str:\n \"\"\"Save a Data object to the specified file format.\"\"\"\n if fmt == \"csv\":\n pd.DataFrame(data.data).to_csv(path, index=False)\n elif fmt == \"excel\":\n pd.DataFrame(data.data).to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n path.write_text(\n orjson.dumps(jsonable_encoder(data.data), option=orjson.OPT_INDENT_2).decode(\"utf-8\"), encoding=\"utf-8\"\n )\n elif fmt == \"markdown\":\n path.write_text(pd.DataFrame(data.data).to_markdown(index=False), encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Data format: {fmt}\"\n raise ValueError(msg)\n return f\"Data saved successfully as '{path}'\"\n\n async def _save_message(self, message: Message, path: Path, fmt: str) -> str:\n \"\"\"Save a Message to the specified file format, handling async iterators.\"\"\"\n content = \"\"\n if message.text is None:\n content = \"\"\n elif isinstance(message.text, AsyncIterator):\n async for item in message.text:\n content += str(item) + \" \"\n content = content.strip()\n elif isinstance(message.text, Iterator):\n content = \" \".join(str(item) for item in message.text)\n else:\n content = str(message.text)\n\n if fmt == \"txt\":\n path.write_text(content, encoding=\"utf-8\")\n elif fmt == \"json\":\n path.write_text(json.dumps({\"message\": content}, indent=2), encoding=\"utf-8\")\n elif fmt == \"markdown\":\n path.write_text(f\"**Message:**\\n\\n{content}\", encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Message format: {fmt}\"\n raise ValueError(msg)\n return f\"Message saved successfully as '{path}'\"\n" + "value": "import json\nfrom collections.abc import AsyncIterator, Iterator\nfrom pathlib import Path\n\nimport orjson\nimport pandas as pd\nfrom fastapi import UploadFile\nfrom fastapi.encoders import jsonable_encoder\n\nfrom langflow.api.v2.files import upload_user_file\nfrom langflow.custom import Component\nfrom langflow.io import DropdownInput, HandleInput, StrInput\nfrom langflow.schema import Data, DataFrame, Message\nfrom langflow.services.auth.utils import create_user_longterm_token\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom langflow.services.deps import get_session, get_settings_service, get_storage_service\nfrom langflow.template.field.base import Output\n\n\nclass SaveToFileComponent(Component):\n display_name = \"Save File\"\n description = \"Save data to a local file in the selected format.\"\n documentation: str = \"https://docs.langflow.org/components-processing#save-file\"\n icon = \"save\"\n name = \"SaveToFile\"\n\n # File format options for different types\n DATA_FORMAT_CHOICES = [\"csv\", \"excel\", \"json\", \"markdown\"]\n MESSAGE_FORMAT_CHOICES = [\"txt\", \"json\", \"markdown\"]\n\n inputs = [\n HandleInput(\n name=\"input\",\n display_name=\"Input\",\n info=\"The input to save.\",\n dynamic=True,\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n StrInput(\n name=\"file_name\",\n display_name=\"File Name\",\n info=\"Name file will be saved as (without extension).\",\n required=True,\n ),\n DropdownInput(\n name=\"file_format\",\n display_name=\"File Format\",\n options=list(dict.fromkeys(DATA_FORMAT_CHOICES + MESSAGE_FORMAT_CHOICES)),\n info=\"Select the file format to save the input. If not provided, the default format will be used.\",\n value=\"\",\n advanced=True,\n ),\n ]\n\n outputs = [Output(display_name=\"File Path\", name=\"message\", method=\"save_to_file\")]\n\n async def save_to_file(self) -> Message:\n \"\"\"Save the input to a file and upload it, returning a confirmation message.\"\"\"\n # Validate inputs\n if not self.file_name:\n msg = \"File name must be provided.\"\n raise ValueError(msg)\n if not self._get_input_type():\n msg = \"Input type is not set.\"\n raise ValueError(msg)\n\n # Validate file format based on input type\n file_format = self.file_format or self._get_default_format()\n allowed_formats = (\n self.MESSAGE_FORMAT_CHOICES if self._get_input_type() == \"Message\" else self.DATA_FORMAT_CHOICES\n )\n if file_format not in allowed_formats:\n msg = f\"Invalid file format '{file_format}' for {self._get_input_type()}. Allowed: {allowed_formats}\"\n raise ValueError(msg)\n\n # Prepare file path\n file_path = Path(self.file_name).expanduser()\n if not file_path.parent.exists():\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path = self._adjust_file_path_with_format(file_path, file_format)\n\n # Save the input to file based on type\n if self._get_input_type() == \"DataFrame\":\n confirmation = self._save_dataframe(self.input, file_path, file_format)\n elif self._get_input_type() == \"Data\":\n confirmation = self._save_data(self.input, file_path, file_format)\n elif self._get_input_type() == \"Message\":\n confirmation = await self._save_message(self.input, file_path, file_format)\n else:\n msg = f\"Unsupported input type: {self._get_input_type()}\"\n raise ValueError(msg)\n\n # Upload the saved file\n await self._upload_file(file_path)\n\n # Return the final file path and confirmation message\n final_path = Path.cwd() / file_path if not file_path.is_absolute() else file_path\n\n return Message(text=f\"{confirmation} at {final_path}\")\n\n def _get_input_type(self) -> str:\n \"\"\"Determine the input type based on the provided input.\"\"\"\n # Use exact type checking (type() is) instead of isinstance() to avoid inheritance issues.\n # Since Message inherits from Data, isinstance(message, Data) would return True for Message objects,\n # causing Message inputs to be incorrectly identified as Data type.\n if type(self.input) is DataFrame:\n return \"DataFrame\"\n if type(self.input) is Message:\n return \"Message\"\n if type(self.input) is Data:\n return \"Data\"\n msg = f\"Unsupported input type: {type(self.input)}\"\n raise ValueError(msg)\n\n def _get_default_format(self) -> str:\n \"\"\"Return the default file format based on input type.\"\"\"\n if self._get_input_type() == \"DataFrame\":\n return \"csv\"\n if self._get_input_type() == \"Data\":\n return \"json\"\n if self._get_input_type() == \"Message\":\n return \"json\"\n return \"json\" # Fallback\n\n def _adjust_file_path_with_format(self, path: Path, fmt: str) -> Path:\n \"\"\"Adjust the file path to include the correct extension.\"\"\"\n file_extension = path.suffix.lower().lstrip(\".\")\n if fmt == \"excel\":\n return Path(f\"{path}.xlsx\").expanduser() if file_extension not in [\"xlsx\", \"xls\"] else path\n return Path(f\"{path}.{fmt}\").expanduser() if file_extension != fmt else path\n\n async def _upload_file(self, file_path: Path) -> None:\n \"\"\"Upload the saved file using the upload_user_file service.\"\"\"\n if not file_path.exists():\n msg = f\"File not found: {file_path}\"\n raise FileNotFoundError(msg)\n\n with file_path.open(\"rb\") as f:\n async for db in get_session():\n user_id, _ = await create_user_longterm_token(db)\n current_user = await get_user_by_id(db, user_id)\n\n await upload_user_file(\n file=UploadFile(filename=file_path.name, file=f, size=file_path.stat().st_size),\n session=db,\n current_user=current_user,\n storage_service=get_storage_service(),\n settings_service=get_settings_service(),\n )\n\n def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str:\n \"\"\"Save a DataFrame to the specified file format.\"\"\"\n if fmt == \"csv\":\n dataframe.to_csv(path, index=False)\n elif fmt == \"excel\":\n dataframe.to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n dataframe.to_json(path, orient=\"records\", indent=2)\n elif fmt == \"markdown\":\n path.write_text(dataframe.to_markdown(index=False), encoding=\"utf-8\")\n else:\n msg = f\"Unsupported DataFrame format: {fmt}\"\n raise ValueError(msg)\n return f\"DataFrame saved successfully as '{path}'\"\n\n def _save_data(self, data: Data, path: Path, fmt: str) -> str:\n \"\"\"Save a Data object to the specified file format.\"\"\"\n if fmt == \"csv\":\n pd.DataFrame(data.data).to_csv(path, index=False)\n elif fmt == \"excel\":\n pd.DataFrame(data.data).to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n path.write_text(\n orjson.dumps(jsonable_encoder(data.data), option=orjson.OPT_INDENT_2).decode(\"utf-8\"), encoding=\"utf-8\"\n )\n elif fmt == \"markdown\":\n path.write_text(pd.DataFrame(data.data).to_markdown(index=False), encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Data format: {fmt}\"\n raise ValueError(msg)\n return f\"Data saved successfully as '{path}'\"\n\n async def _save_message(self, message: Message, path: Path, fmt: str) -> str:\n \"\"\"Save a Message to the specified file format, handling async iterators.\"\"\"\n content = \"\"\n if message.text is None:\n content = \"\"\n elif isinstance(message.text, AsyncIterator):\n async for item in message.text:\n content += str(item) + \" \"\n content = content.strip()\n elif isinstance(message.text, Iterator):\n content = \" \".join(str(item) for item in message.text)\n else:\n content = str(message.text)\n\n if fmt == \"txt\":\n path.write_text(content, encoding=\"utf-8\")\n elif fmt == \"json\":\n path.write_text(json.dumps({\"message\": content}, indent=2), encoding=\"utf-8\")\n elif fmt == \"markdown\":\n path.write_text(f\"**Message:**\\n\\n{content}\", encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Message format: {fmt}\"\n raise ValueError(msg)\n return f\"Message saved successfully as '{path}'\"\n" }, "file_format": { "_input_type": "DropdownInput", From 18e2ff2d2d3d464e09fab81b83549167e47e2228 Mon Sep 17 00:00:00 2001 From: Jordan Frazier <122494242+jordanrfrazier@users.noreply.github.com> Date: Mon, 18 Aug 2025 08:25:13 -0400 Subject: [PATCH 15/86] fix: fallback to env var correctly when using tweaks (#9422) * Fallback to env var correctly when using tweaks * Add unit test * [autofix.ci] apply automated fixes * refactor: improve docstrings for parameter update tests --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Gabriel Luiz Freitas Almeida --- .../langflow/interface/initialize/loading.py | 4 +- src/backend/tests/unit/interface/__init__.py | 1 + .../unit/interface/initialize/__init__.py | 1 + .../unit/interface/initialize/test_loading.py | 225 ++++++++++++++++++ 4 files changed, 230 insertions(+), 1 deletion(-) create mode 100644 src/backend/tests/unit/interface/__init__.py create mode 100644 src/backend/tests/unit/interface/initialize/__init__.py create mode 100644 src/backend/tests/unit/interface/initialize/test_loading.py diff --git a/src/backend/base/langflow/interface/initialize/loading.py b/src/backend/base/langflow/interface/initialize/loading.py index 5a25f8a75..a706a05df 100644 --- a/src/backend/base/langflow/interface/initialize/loading.py +++ b/src/backend/base/langflow/interface/initialize/loading.py @@ -122,7 +122,9 @@ async def update_params_with_load_from_db_fields( try: key = await custom_component.get_variable(name=params[field], field=field, session=session) except ValueError as e: - if any(reason in str(e) for reason in ["User id is not set", "variable not found."]): + if "User id is not set" in str(e): + raise + if "variable not found." in str(e) and not fallback_to_env_vars: raise logger.debug(str(e)) key = None diff --git a/src/backend/tests/unit/interface/__init__.py b/src/backend/tests/unit/interface/__init__.py new file mode 100644 index 000000000..0ad414113 --- /dev/null +++ b/src/backend/tests/unit/interface/__init__.py @@ -0,0 +1 @@ +# Unit tests for interface module diff --git a/src/backend/tests/unit/interface/initialize/__init__.py b/src/backend/tests/unit/interface/initialize/__init__.py new file mode 100644 index 000000000..6ce4cd81e --- /dev/null +++ b/src/backend/tests/unit/interface/initialize/__init__.py @@ -0,0 +1 @@ +# Unit tests for initialize module diff --git a/src/backend/tests/unit/interface/initialize/test_loading.py b/src/backend/tests/unit/interface/initialize/test_loading.py new file mode 100644 index 000000000..7a0860308 --- /dev/null +++ b/src/backend/tests/unit/interface/initialize/test_loading.py @@ -0,0 +1,225 @@ +import os +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from langflow.interface.initialize.loading import update_params_with_load_from_db_fields + + +@pytest.mark.asyncio +async def test_update_params_fallback_to_env_when_variable_not_found(): + """Test that when a variable is not found in database and fallback_to_env_vars is True. + + It falls back to environment variables. + """ + # Set up environment variable + os.environ["TEST_API_KEY"] = "test-secret-key-123" + + # Create mock custom component + custom_component = MagicMock() + custom_component.get_variable = AsyncMock(side_effect=ValueError("TEST_API_KEY variable not found.")) + + # Set up params with a field that should load from db + params = {"api_key": "TEST_API_KEY"} + load_from_db_fields = ["api_key"] + + # Call the function with fallback enabled + with patch("langflow.interface.initialize.loading.session_scope") as mock_session_scope: + mock_session_scope.return_value.__aenter__.return_value = MagicMock() + + result = await update_params_with_load_from_db_fields( + custom_component, params, load_from_db_fields, fallback_to_env_vars=True + ) + + # Should have fallen back to environment variable + assert result["api_key"] == "test-secret-key-123" + + # Clean up + del os.environ["TEST_API_KEY"] + + +@pytest.mark.asyncio +async def test_update_params_raises_when_variable_not_found_and_no_fallback(): + """Test that when a variable is not found and fallback_to_env_vars is False. + + It raises the error. + """ + # Create mock custom component + custom_component = MagicMock() + custom_component.get_variable = AsyncMock(side_effect=ValueError("TEST_API_KEY variable not found.")) + + # Set up params + params = {"api_key": "TEST_API_KEY"} + load_from_db_fields = ["api_key"] + + # Call the function with fallback disabled + with patch("langflow.interface.initialize.loading.session_scope") as mock_session_scope: + mock_session_scope.return_value.__aenter__.return_value = MagicMock() + + with pytest.raises(ValueError, match="TEST_API_KEY variable not found"): + await update_params_with_load_from_db_fields( + custom_component, params, load_from_db_fields, fallback_to_env_vars=False + ) + + +@pytest.mark.asyncio +async def test_update_params_uses_database_variable_when_found(): + """Test that when a variable is found in database, it uses that value. + + It doesn't check environment variables. + """ + # Set up environment variable (should not be used) + os.environ["TEST_API_KEY"] = "env-value" + + # Create mock custom component + custom_component = MagicMock() + custom_component.get_variable = AsyncMock(return_value="db-value") + + # Set up params + params = {"api_key": "TEST_API_KEY"} + load_from_db_fields = ["api_key"] + + # Call the function + with patch("langflow.interface.initialize.loading.session_scope") as mock_session_scope: + mock_session_scope.return_value.__aenter__.return_value = MagicMock() + + result = await update_params_with_load_from_db_fields( + custom_component, params, load_from_db_fields, fallback_to_env_vars=True + ) + + # Should use database value, not environment value + assert result["api_key"] == "db-value" + + # Clean up + del os.environ["TEST_API_KEY"] + + +@pytest.mark.asyncio +async def test_update_params_sets_none_when_no_env_var_and_fallback_enabled(): + """Test that when variable not found in db and env var doesn't exist. + + The field is set to None. + """ + # Make sure env var doesn't exist + if "NONEXISTENT_KEY" in os.environ: + del os.environ["NONEXISTENT_KEY"] + + # Create mock custom component + custom_component = MagicMock() + custom_component.get_variable = AsyncMock(side_effect=ValueError("NONEXISTENT_KEY variable not found.")) + + # Set up params + params = {"api_key": "NONEXISTENT_KEY"} + load_from_db_fields = ["api_key"] + + # Call the function with fallback enabled + with patch("langflow.interface.initialize.loading.session_scope") as mock_session_scope: + mock_session_scope.return_value.__aenter__.return_value = MagicMock() + + result = await update_params_with_load_from_db_fields( + custom_component, params, load_from_db_fields, fallback_to_env_vars=True + ) + + # Should be set to None + assert result["api_key"] is None + + +@pytest.mark.asyncio +async def test_update_params_raises_on_user_id_not_set(): + """Test that 'User id is not set' error is always raised regardless of fallback setting.""" + # Create mock custom component + custom_component = MagicMock() + custom_component.get_variable = AsyncMock(side_effect=ValueError("User id is not set")) + + # Set up params + params = {"api_key": "SOME_KEY"} + load_from_db_fields = ["api_key"] + + # Should raise with fallback enabled + with patch("langflow.interface.initialize.loading.session_scope") as mock_session_scope: + mock_session_scope.return_value.__aenter__.return_value = MagicMock() + + with pytest.raises(ValueError, match="User id is not set"): + await update_params_with_load_from_db_fields( + custom_component, params, load_from_db_fields, fallback_to_env_vars=True + ) + + # Should also raise with fallback disabled + with patch("langflow.interface.initialize.loading.session_scope") as mock_session_scope: + mock_session_scope.return_value.__aenter__.return_value = MagicMock() + + with pytest.raises(ValueError, match="User id is not set"): + await update_params_with_load_from_db_fields( + custom_component, params, load_from_db_fields, fallback_to_env_vars=False + ) + + +@pytest.mark.asyncio +async def test_update_params_skips_empty_fields(): + """Test that empty or None fields in params are skipped.""" + # Create mock custom component + custom_component = MagicMock() + custom_component.get_variable = AsyncMock(return_value="some-value") + + # Set up params with empty and None values + params = {"api_key": "", "another_key": None, "valid_key": "VALID_KEY"} + load_from_db_fields = ["api_key", "another_key", "valid_key"] + + # Call the function + with patch("langflow.interface.initialize.loading.session_scope") as mock_session_scope: + mock_session_scope.return_value.__aenter__.return_value = MagicMock() + + result = await update_params_with_load_from_db_fields( + custom_component, params, load_from_db_fields, fallback_to_env_vars=True + ) + + # Only valid_key should have been processed + assert result["api_key"] == "" + assert result["another_key"] is None + assert result["valid_key"] == "some-value" + + # get_variable should only be called once for valid_key + custom_component.get_variable.assert_called_once_with( + name="VALID_KEY", field="valid_key", session=mock_session_scope.return_value.__aenter__.return_value + ) + + +@pytest.mark.asyncio +async def test_update_params_handles_multiple_fields(): + """Test that multiple fields are processed correctly with mixed results.""" + # Set up environment variables + os.environ["ENV_KEY"] = "env-value" + + # Create mock custom component + custom_component = MagicMock() + + # Set up different responses for different fields + async def mock_get_variable(name, **_kwargs): + if name == "DB_KEY": + return "db-value" + if name == "ENV_KEY": + error_msg = "ENV_KEY variable not found." + raise ValueError(error_msg) + error_msg = f"{name} variable not found." + raise ValueError(error_msg) + + custom_component.get_variable = AsyncMock(side_effect=mock_get_variable) + + # Set up params + params = {"field1": "DB_KEY", "field2": "ENV_KEY", "field3": "MISSING_KEY"} + load_from_db_fields = ["field1", "field2", "field3"] + + # Call the function + with patch("langflow.interface.initialize.loading.session_scope") as mock_session_scope: + mock_session_scope.return_value.__aenter__.return_value = MagicMock() + + result = await update_params_with_load_from_db_fields( + custom_component, params, load_from_db_fields, fallback_to_env_vars=True + ) + + # Check results + assert result["field1"] == "db-value" # From database + assert result["field2"] == "env-value" # From environment + assert result["field3"] is None # Not found anywhere + + # Clean up + del os.environ["ENV_KEY"] From c8ac453601b7accad047fa41ae891299d3e6440c Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Mon, 18 Aug 2025 09:47:27 -0300 Subject: [PATCH 16/86] chore: update schema version and format JSON structure in biome.json (#9289) --- src/frontend/biome.json | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/frontend/biome.json b/src/frontend/biome.json index ff59e3d25..6b3c6a520 100644 --- a/src/frontend/biome.json +++ b/src/frontend/biome.json @@ -1,13 +1,19 @@ { - "$schema": "https://biomejs.dev/schemas/2.1.1/schema.json", + "$schema": "https://biomejs.dev/schemas/2.1.2/schema.json", "vcs": { "enabled": true, "clientKind": "git", "useIgnoreFile": true, "defaultBranch": "main" }, - "files": { "ignoreUnknown": false }, - "formatter": { "enabled": true, "indentStyle": "space", "indentWidth": 2 }, + "files": { + "ignoreUnknown": false + }, + "formatter": { + "enabled": true, + "indentStyle": "space", + "indentWidth": 2 + }, "linter": { "domains": { "test": "recommended" @@ -15,7 +21,9 @@ "enabled": true, "rules": { "recommended": false, - "a11y": { "noStaticElementInteractions": "warn" }, + "a11y": { + "noStaticElementInteractions": "warn" + }, "complexity": { "noAdjacentSpacesInRegex": "error", "noExtraBooleanCast": "error", @@ -82,9 +90,17 @@ } } }, - "javascript": { "formatter": { "quoteStyle": "double" } }, + "javascript": { + "formatter": { + "quoteStyle": "double" + } + }, "assist": { "enabled": true, - "actions": { "source": { "organizeImports": "on" } } + "actions": { + "source": { + "organizeImports": "on" + } + } } } From 93517fa3b68b2725f3da9de119553e303e972e00 Mon Sep 17 00:00:00 2001 From: psy42a Date: Mon, 18 Aug 2025 23:11:12 +1000 Subject: [PATCH 17/86] fix: mask Component Secrets in trace logs (#8204) Co-authored-by: psy42a <17905361+psy42a@users.noreply.github.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Gabriel Luiz Freitas Almeida --- .../base/langflow/services/tracing/service.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/backend/base/langflow/services/tracing/service.py b/src/backend/base/langflow/services/tracing/service.py index 03d00ca71..fdf4e8a05 100644 --- a/src/backend/base/langflow/services/tracing/service.py +++ b/src/backend/base/langflow/services/tracing/service.py @@ -275,10 +275,19 @@ class TracingService(Service): @staticmethod def _cleanup_inputs(inputs: dict[str, Any]): inputs = inputs.copy() - for key in inputs: - if "api_key" in key: - inputs[key] = "*****" # avoid logging api_keys for security reasons - return inputs + sensitive_keywords = {"api_key", "password", "server_url"} + + def _mask(obj: Any): + if isinstance(obj, dict): + return { + k: "*****" if any(word in k.lower() for word in sensitive_keywords) else _mask(v) + for k, v in obj.items() + } + if isinstance(obj, list): + return [_mask(i) for i in obj] + return obj + + return _mask(inputs) def _start_component_traces( self, @@ -344,6 +353,7 @@ class TracingService(Service): if component._vertex: trace_id = component._vertex.id trace_type = component.trace_type + inputs = self._cleanup_inputs(inputs) component_trace_context = ComponentTraceContext( trace_id, trace_name, trace_type, component._vertex, inputs, metadata ) From e0816e58a2081053838dcfe097706d2b0c3e075c Mon Sep 17 00:00:00 2001 From: "April I. Murphy" <36110273+aimurphy@users.noreply.github.com> Date: Mon, 18 Aug 2025 06:29:47 -0700 Subject: [PATCH 18/86] docs: Correct the instructions for additional language models that aren't built-in to the primary Language Model component. (#9406) * fix language about bundled models * Apply suggestions from code review --------- Co-authored-by: Mendon Kissling <59585235+mendonk@users.noreply.github.com> --- .../components-embedding-models.mdx | 24 ++++------- docs/docs/Components/components-models.mdx | 40 ++++++++++--------- docs/docs/Support/release-notes.mdx | 2 +- 3 files changed, 31 insertions(+), 35 deletions(-) diff --git a/docs/docs/Components/components-embedding-models.mdx b/docs/docs/Components/components-embedding-models.mdx index 49f49d435..f3c9525c6 100644 --- a/docs/docs/Components/components-embedding-models.mdx +++ b/docs/docs/Components/components-embedding-models.mdx @@ -8,9 +8,7 @@ import Icon from "@site/src/components/icon"; **Embedding Model** components in Langflow generate text embeddings using a specified Large Language Model (LLM). Langflow includes an **Embedding Model** core component that has built-in support for some LLMs. -Alternatively, you can use any [additional **Embedding Model** component](#additional-embedding-model-components) in place of the core **Embedding Model** component. - -The built-in LLMs are appropriate for most text-based embedding model use cases in Langflow. +Alternatively, you can use [additional embedding models](#additional-embedding-model-components) in place of the core **Embedding Model** component. ## Use Embedding Model components in a flow @@ -21,22 +19,16 @@ This flow loads a text file, splits the text into chunks, generates embeddings f ![A semantic search flow that uses Embedding Model, File, Split Text, Chroma DB, Chat Input, and Chat Output components](/img/component-embedding-models-add-chat.png) -:::tip -This example uses the **Embedding Model** core component. - -To use another model, you can replace the **Embedding Model** core component with any [additional **Embedding Model** component](#additional-embedding-model-components) in these steps. -However, your component might have different parameters than the **Embedding Model** core component. -::: - 1. Create a flow, add a **File** component, and then select a file containing text data, such as a PDF, that you can use to test the flow. 2. Add an **Embedding Model** component, and then provide a valid OpenAI API key. +You can enter component API keys directly or use Langflow global variables to reference your API keys. - By default, the **Embedding Model** component uses an OpenAI model. - If you want to use a different model, edit the **Model Name**, and **API Key** fields accordingly. - Or, see [Additional Embedding Model components](#additional-embedding-model-components) for other components that you can use in place of the **Embedding Model** core component. + :::tip + If your preferred embedding model provider or model isn't supported by the **Embedding Model** core component, you can use [additional embedding models](#additional-embedding-model-components) in place of the core component. - You can enter component API keys directly or use Langflow global variables to reference your API keys. + Search the **Components** menu for your preferred provider to find additional embedding models, such as the [**Hugging Face Embeddings Inference** component](/bundles-huggingface#hugging-face-embeddings-inference). + ::: 3. Add a [**Split Text** component](/components-processing#split-text) to your flow. This component splits text input into smaller chunks to be processed into embeddings. @@ -76,9 +68,9 @@ You can toggle parameters through the **Controls**, enable the **System Message** parameter, and then click **Close**. @@ -118,24 +119,27 @@ This is a specific data type that is only required by certain components, such a With this configuration, the **Language Model** component is meant to support an action completed by another component, rather than producing a text response for a standard chat-based interaction. For an example, the **Smart Function** component uses an LLM to create a function from natural language input. -## Additional Language Model components +## Additional language models -If your provider or model isn't supported by the **Language Model** core component, additional single-provider **Language Model** components are available in the [**Bundles**](/components-bundle-components) section of the **Components** menu. +If your provider or model isn't supported by the **Language Model** core component, additional provider-specific models are available in the [**Bundles**](/components-bundle-components) section of the **Components** menu. -You can use bundled components directly in your flows or you can connect them to other components that accept a [`LanguageModel`](/data-types#languagemodel) input, such as the **Language Model** and **Agent** components. +You can use these provider-specific components directly in your flows in the same place that you would use the **Language Model** core component. +Or, you can connect them to other components that accept a [`LanguageModel`](/data-types#languagemodel) input, such as the **Smart Function** and **Agent** components. -For example, to connect bundled components to the **Language Model** core component, do the following: +For example, to connect a provider-specific component to the **Agent** component, do the following: -1. In the **Language Model** component, set **Model Provider** to **Custom**. +1. In the **Components** menu, search for your preferred model provider, and then add the provider's LLM component to your flow. +The component may not have `model` in the name. +For example, Azure OpenAI LLMs are in the [**Azure OpenAI** component](/bundles-azure#azure-openai). - The field name changes to **Language Model** and the input port changes to a `LanguageModel` port. +2. Configure the LLM component as needed to connect to your preferred model. -2. Add a compatible bundled component to your flow, such as the [**Vertex AI** component for text generation](/bundles-vertexai). - -3. Change the bundled component's output type to `LanguageModel`. -To do this, click **Model Response** near the component's output port, and then select **Language Model**. +3. Change the LLM component's output type from **Model Response** to **Language Model**. +The output port changes to a `LanguageModel` port. For more information, see [Language Model output types](#language-model-output-types). -4. Connect the bundled component's output to the **Language Model** component's `LanguageModel` input port. +2. Add an **Agent** component to the flow, and then set **Model Provider** to **Custom**. +The **Model Provider** field changes to a **Language Model** field with a `LanguageModel` port. - The bundled component now provides the LLM configuration for the component that it is connected to, and you can continue building your flow as needed. \ No newline at end of file +4. Connect the LLM component's output to the **Agent** component's **Language Model** input. +The **Agent** component now inherits the LLM settings from the connected LLM component instead of using any of the built-in models. \ No newline at end of file diff --git a/docs/docs/Support/release-notes.mdx b/docs/docs/Support/release-notes.mdx index a23ffc08a..8faed6160 100644 --- a/docs/docs/Support/release-notes.mdx +++ b/docs/docs/Support/release-notes.mdx @@ -61,7 +61,7 @@ For all changes, see the [Changelog](https://github.com/langflow-ai/langflow/rel The [**Language Model** component](/components-models) and [**Embedding Model** component](/components-embedding-models) are now core components for your LLM and embeddings flows. They support multiple models and model providers, and allow you to experiment with different models without swapping out single-provider components. Find them in the **Components** menu in the **Models** category. - The single-provider components are still available for your flows in the **Components** menu in the [**Bundles**](/components-bundle-components) section, and you can connect them to the **Language Model** and **Embedding Model** components with the **Custom** provider option. + The single-provider components are still available for your flows in the **Components** menu in the [**Bundles**](/components-bundle-components) section, and you can use them to replace the **Language Model** and **Embedding Model** core components, or connect them to the **Agent** component with the **Custom** provider option. - MCP server one-click installation From cfb29134bb72f85911864aa766b4ce0753168852 Mon Sep 17 00:00:00 2001 From: "April I. Murphy" <36110273+aimurphy@users.noreply.github.com> Date: Mon, 18 Aug 2025 06:46:47 -0700 Subject: [PATCH 19/86] docs: Update, refresh, and expand Vector Store and Processing component documentation (#9407) * fix anchors * type convert and structured output components * vector store intro and flow example * reorg some vector search components by provider * still on vector stores * vector store example and outputs * finish vector store page * corrections to astra db vector store * start split text component * save file and smart function * llm router * parser * still on dataframe * finish datafram ops * remove-extra-kv-pair-and-clarify-serialization-from-python --------- Co-authored-by: Mendon Kissling <59585235+mendonk@users.noreply.github.com> --- docs/docs/Components/components-agents.mdx | 2 +- docs/docs/Components/components-data.mdx | 2 +- docs/docs/Components/components-logic.mdx | 27 +- .../docs/Components/components-processing.mdx | 1027 +++++++----- .../Components/components-vector-stores.mdx | 1484 +++++++++-------- docs/docs/Concepts/data-types.mdx | 25 +- .../api-keys-and-authentication.mdx | 2 +- docs/docs/Configuration/configuration-cli.mdx | 2 +- .../Configuration/environment-variables.mdx | 12 +- docs/docs/Support/troubleshooting.mdx | 2 +- docs/docs/Tutorials/chat-with-files.mdx | 2 + docs/static/img/conditional-looping.png | Bin 0 -> 194185 bytes 12 files changed, 1442 insertions(+), 1145 deletions(-) create mode 100644 docs/static/img/conditional-looping.png diff --git a/docs/docs/Components/components-agents.mdx b/docs/docs/Components/components-agents.mdx index b6c96e010..47e4bc81b 100644 --- a/docs/docs/Components/components-agents.mdx +++ b/docs/docs/Components/components-agents.mdx @@ -43,7 +43,7 @@ For examples of flows using the **Agent** and **MCP Tools** components, see the The **Agent** component is the primary agent actor in your agent flows. This component uses an LLM integration to respond to input, such as a chat message or file upload. -The agent can use the tools already available in the base LLM model as well as additional tools that you connect to the **Agent** component's **Tools** port. +The agent can use the tools already available in the base LLM as well as additional tools that you connect to the **Agent** component's **Tools** port. You can connect any Langflow component as a tool, including other **Agent** components and MCP servers through the [**MCP Tools** component](#mcp-connection). For more information about using this component, see [Use Langflow agents](/agents). diff --git a/docs/docs/Components/components-data.mdx b/docs/docs/Components/components-data.mdx index 3bc8b7a16..82e272275 100644 --- a/docs/docs/Components/components-data.mdx +++ b/docs/docs/Components/components-data.mdx @@ -396,7 +396,7 @@ There are two settings that control the output of the **URL** component at diffe When used as a standard component in a flow, the **URL** component must be connected to a component that accepts the selected output data type (`DataFrame` or `Message`). You can connect the **URL** component directly to a compatible component, or you can use a [**Type Convert** component](/components-processing#type-convert) to convert the output to another type before passing the data to other components if the data types aren't directly compatible. -Processing components, like the **Type Convert** component, are useful with the **URL** component because it can extract a large amount of data from the crawled pages. +**Processing** components like the **Type Convert** component are useful with the **URL** component because it can extract a large amount of data from the crawled pages. For example, if you only want to pass specific fields to other components, you can use a [**Parser** component](/components-processing#parser) to extract only that data from the crawled pages before passing the data to other components. When used in **Tool Mode** with an **Agent** component, the **URL** component can be connected directly to the **Agent** component's **Tools** port without converting the data. diff --git a/docs/docs/Components/components-logic.mdx b/docs/docs/Components/components-logic.mdx index 6c0389561..61e451ec5 100644 --- a/docs/docs/Components/components-logic.mdx +++ b/docs/docs/Components/components-logic.mdx @@ -33,7 +33,10 @@ The following example uses the **If-Else** component to check incoming chat mess 1. Add an **If-Else** component to your flow, and then configure it as follows: - * **Text Input**: Connect the **Text Input** port to a **Chat Input** component. + * **Text Input**: Connect the **Text Input** port to a **Chat Input** component or another `Message` input. + + If your input isn't in `Message` format, you can use another component to transform it, such as the [**Type Convert** component](/components-processing#type-convert) or [**Parser** component](/components-processing#parser). + If your input isn't appropriate for `Message` format, consider using another component for conditional routing, such as the [**Data Operations** component](/components-processing#data-operations). * **Match Text**: Enter `.*(urgent|warning|caution).*` so the component looks for these values in incoming input. The regex match is case sensitive, so if you need to look for all permutations of `warning`, enter `warning|Warning|WARNING`. @@ -96,7 +99,10 @@ You can toggle parameters through the -Loop example +### Loop example In the follow example, the **Loop** component iterates over a CSV file until there are no rows left to process. In this case, the **Item** port passes each row to a **Type Convert** component to convert the row into a `Message` object, passes the `Message` to a **Structured Output** component to be processed into structured data that is then passed back to the **Loop** component's **Looping** port. @@ -145,7 +154,13 @@ After processing all rows, the **Loop** component loads the aggregated list of s For more examples of the **Loop** component, try the **Research Translation Loop** template in Langflow, or see the video tutorial [Mastering the Loop Component & Agentic RAG in Langflow](https://www.youtube.com/watch?v=9Wx7WODSKTo). ::: - +### Conditional looping + +The **If-Else** component isn't compatible with the **Loop** component. +If you need conditional loop events, redesign your flow to process conditions before the loop. +For example, if you are looping over a `DataFrame`, you could use multiple [**DataFrame Operations** components](/components-processing#dataframe-operations) to conditionally filter data, and then run separate loops on each set of filtered data. + +![A flow with conditional looping.](/img/conditional-looping.png) ## Notify and Listen diff --git a/docs/docs/Components/components-processing.mdx b/docs/docs/Components/components-processing.mdx index ca2c1ec43..9855077fe 100644 --- a/docs/docs/Components/components-processing.mdx +++ b/docs/docs/Components/components-processing.mdx @@ -4,6 +4,8 @@ slug: /components-processing --- import Icon from "@site/src/components/icon"; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; Langflow's **Processing** components process and transform data within a flow. They have many uses, including: @@ -11,7 +13,7 @@ They have many uses, including: * Feed instructions and context to your LLMs and agents with the [**Prompt Template** component](#prompt-template). * Extract content from larger chunks of data with a [**Parser** component](#parser). * Filter data with natural language with the [**Smart Function** component](#smart-function). -* Save data to your local machine with the [**Save To File** component](#save-to-file). +* Save data to your local machine with the [**Save File** component](#save-file). * Transform data into a different data type with the [**Type Convert** component](#type-convert) to pass it between incompatible components. ## Prompt Template @@ -21,8 +23,7 @@ See [Prompt Template](/components-prompts). ## Batch Run The **Batch Run** component runs a language model over _each row of one text column_ in a [`DataFrame`](/data-types#dataframe), and then returns a new `DataFrame` with the original text and an LLM response. - -The response contains the following columns: +The output contains the following columns: * `text_input`: The original text from the input `DataFrame` * `model_response`: The model's response for each input @@ -31,7 +32,7 @@ The response contains the following columns: ### Use the Batch Run component in a flow -If you pass this output to a [**Parser** component](/components-processing#parser), you can use variables in the parsing template to reference these keys, such as `{text_input}` and `{model_response}`. +If you pass the **Batch Run** output to a [**Parser** component](/components-processing#parser), you can use variables in the parsing template to reference these keys, such as `{text_input}` and `{model_response}`. This is demonstrated in the following example. ![A batch run component connected to OpenAI and a Parser](/img/component-batch-run.png) @@ -57,7 +58,7 @@ For example, `Create a business card for each name.` record_number: {batch_index}, name: {text_input}, summary: {model_response} ``` -7. To test the processing, click the **Parser** component, and then click **Run component**, and then click **Inspect output** to view the final `DataFrame`. +7. To test the processing, click the **Parser** component, click **Run component**, and then click **Inspect output** to view the final `DataFrame`. You can also connect a **Chat Output** component to the **Parser** component if you want to see the output in the **Playground**. @@ -78,26 +79,41 @@ You can toggle parameters through the **Add more**. +3. Under **Select Keys**, add keys for `name`, `username`, and `email`. +Click **Add more** to add a field for each key. -4. Connect a **Chat Output** component. + For this example, assume that the webhook will receive consistent payloads that always contain `name`, `username`, and `email` keys. + The **Select Keys** operation extracts the value of these keys from each incoming payload. +4. Optional: If you want to view the output in the **Playground**, connect the **Data Operations** component's output to a **Chat Output** component. ![A flow with Webhook, Data Operations, and Chat Output components](/img/component-data-operations-select-key.png) -5. To test the flow, send the following request to your flow's webhook endpoint, and then open the **Playground** to see the resulting output from processing the payload. +5. To test the flow, send the following request to your flow's webhook endpoint. +For more information about the webhook endpoint, see [Trigger flows with webhooks](/webhook). ```bash curl -X POST "http://$LANGFLOW_SERVER_URL/api/v1/webhook/$FLOW_ID" \ @@ -128,15 +144,19 @@ For this example, select the **Select Keys** operation to extract specific user }' ``` +6. To view the `Data` resulting from the **Select Keys** operation, do one of the following: + + * If you attached a **Chat Output** component, open the **Playground** to see the result as a chat message. + * Click