feat: Add Cleanlab's AI Reliability Bundle to Langflow (#8049)

* Add Cleanlab bundle * Fix icon logic and add support for light/dark mode dynamic logo * Remove stuff * Modify components and add documentation * [autofix.ci] apply automated fixes * Add sidebar code * [autofix.ci] apply automated fixes * Update docs/sidebars.js Co-authored-by: Mendon Kissling <59585235+mendonk@users.noreply.github.com> * Update docs/docs/Integrations/Cleanlab/integrations-cleanlab.md Co-authored-by: Mendon Kissling <59585235+mendonk@users.noreply.github.com> * Update docs/docs/Integrations/Cleanlab/integrations-cleanlab.md Co-authored-by: Mendon Kissling <59585235+mendonk@users.noreply.github.com> * Update docs/docs/Integrations/Cleanlab/integrations-cleanlab.md Co-authored-by: Mendon Kissling <59585235+mendonk@users.noreply.github.com> * copy edits * update samples * style check fixes * [autofix.ci] apply automated fixes * style check fix 2 --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Co-authored-by: Mike Fortman <michael.fortman@datastax.com>
2025-06-04 18:56:08 -04:00 · 2025-06-04 18:56:08 -04:00 · 087a22c246
commit 087a22c246
parent 06f7279977
20 changed files with 9380 additions and 5422 deletions
--- a/src/backend/base/langflow/components/cleanlab/init.py
+++ b/src/backend/base/langflow/components/cleanlab/init.py
@ -0,0 +1,5 @@
+from .cleanlab_evaluator import CleanlabEvaluator
+from .cleanlab_rag_evaluator import CleanlabRAGEvaluator
+from .cleanlab_remediator import CleanlabRemediator
+
+__all__ = ["CleanlabEvaluator", "CleanlabRAGEvaluator", "CleanlabRemediator"]
--- a/src/backend/base/langflow/components/cleanlab/cleanlab_evaluator.py
+++ b/src/backend/base/langflow/components/cleanlab/cleanlab_evaluator.py
@ -0,0 +1,145 @@
+from cleanlab_tlm import TLM
+
+from langflow.custom import Component
+from langflow.io import (
+    DropdownInput,
+    MessageTextInput,
+    Output,
+    SecretStrInput,
+)
+from langflow.schema.message import Message
+
+
+class CleanlabEvaluator(Component):
+    """A component that evaluates the trustworthiness of LLM responses using Cleanlab.
+
+    This component takes a prompt and response pair, along with optional system instructions,
+    and uses Cleanlab's evaluation algorithms to generate a trust score and explanation.
+
+    Inputs:
+        - system_prompt (MessageTextInput): Optional system-level instructions prepended to the user prompt.
+        - prompt (MessageTextInput): The user's prompt or query sent to the LLM.
+        - response (MessageTextInput): The response generated by the LLM to be evaluated. This should come from the
+          LLM component, i.e. OpenAI, Gemini, etc.
+        - api_key (SecretStrInput): Your Cleanlab API key.
+        - model (DropdownInput): The model used by Cleanlab to evaluate the response (can differ from the
+          generation model).
+        - quality_preset (DropdownInput): Tradeoff setting for accuracy vs. speed and cost. Higher presets are
+          slower but more accurate.
+
+    Outputs:
+        - response_passthrough (Message): The original response, passed through for downstream use.
+        - score (number): A float between 0 and 1 indicating Cleanlab's trustworthiness score for the response.
+        - explanation (Message): A textual explanation of why the response received its score.
+
+    This component works well in conjunction with the CleanlabRemediator to create a complete trust evaluation
+    and remediation pipeline.
+
+    More details on the evaluation metrics can be found here: https://help.cleanlab.ai/tlm/tutorials/tlm/
+    """
+
+    display_name = "Cleanlab Evaluator"
+    description = "Evaluates any LLM response using Cleanlab and outputs trust score and explanation."
+    icon = "Cleanlab"
+    name = "CleanlabEvaluator"
+
+    inputs = [
+        MessageTextInput(
+            name="system_prompt",
+            display_name="System Message",
+            info="System-level instructions prepended to the user query.",
+            value="",
+        ),
+        MessageTextInput(
+            name="prompt",
+            display_name="Prompt",
+            info="The user's query to the model.",
+            required=True,
+        ),
+        MessageTextInput(
+            name="response",
+            display_name="Response",
+            info="The response to the user's query.",
+            required=True,
+        ),
+        SecretStrInput(
+            name="api_key",
+            display_name="Cleanlab API Key",
+            info="Your Cleanlab API key.",
+            required=True,
+        ),
+        DropdownInput(
+            name="model",
+            display_name="Cleanlab Evaluation Model",
+            options=[
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "o4-mini",
+                "o3",
+                "gpt-4.5-preview",
+                "gpt-4o-mini",
+                "gpt-4o",
+                "o3-mini",
+                "o1",
+                "o1-mini",
+                "gpt-4",
+                "gpt-3.5-turbo-16k",
+                "claude-3.7-sonnet",
+                "claude-3.5-sonnet-v2",
+                "claude-3.5-sonnet",
+                "claude-3.5-haiku",
+                "claude-3-haiku",
+                "nova-micro",
+                "nova-lite",
+                "nova-pro",
+            ],
+            info="The model Cleanlab uses to evaluate the response. This does NOT need to be the same model that "
+            "generated the response.",
+            value="gpt-4o-mini",
+            required=True,
+            advanced=True,
+        ),
+        DropdownInput(
+            name="quality_preset",
+            display_name="Quality Preset",
+            options=["base", "low", "medium", "high", "best"],
+            value="medium",
+            info="This determines the accuracy, latency, and cost of the evaluation. Higher quality is generally "
+            "slower but more accurate.",
+            required=True,
+            advanced=True,
+        ),
+    ]
+
+    outputs = [
+        Output(display_name="Response", name="response_passthrough", method="pass_response", types=["Message"]),
+        Output(display_name="Trust Score", name="score", method="get_score", types=["number"]),
+        Output(display_name="Explanation", name="explanation", method="get_explanation", types=["Message"]),
+    ]
+
+    def _evaluate_once(self):
+        if not hasattr(self, "_cached_result"):
+            full_prompt = f"{self.system_prompt}\n\n{self.prompt}" if self.system_prompt else self.prompt
+            tlm = TLM(
+                api_key=self.api_key,
+                options={"log": ["explanation"], "model": self.model},
+                quality_preset=self.quality_preset,
+            )
+            self._cached_result = tlm.get_trustworthiness_score(full_prompt, self.response)
+        return self._cached_result
+
+    def get_score(self) -> float:
+        result = self._evaluate_once()
+        score = result.get("trustworthiness_score", 0.0)
+        self.status = f"Trust score: {score:.2f}"
+        return score
+
+    def get_explanation(self) -> Message:
+        result = self._evaluate_once()
+        explanation = result.get("log", {}).get("explanation", "No explanation returned.")
+        return Message(text=explanation)
+
+    def pass_response(self) -> Message:
+        self.status = "Passing through response."
+        return Message(text=self.response)
--- a/src/backend/base/langflow/components/cleanlab/cleanlab_rag_evaluator.py
+++ b/src/backend/base/langflow/components/cleanlab/cleanlab_rag_evaluator.py
@ -0,0 +1,254 @@
+from cleanlab_tlm import TrustworthyRAG, get_default_evals
+
+from langflow.custom import Component
+from langflow.io import (
+    BoolInput,
+    DropdownInput,
+    MessageTextInput,
+    Output,
+    SecretStrInput,
+)
+from langflow.schema.message import Message
+
+
+class CleanlabRAGEvaluator(Component):
+    """A component that evaluates the quality of RAG (Retrieval-Augmented Generation) outputs using Cleanlab.
+
+    This component takes a query, retrieved context, and generated response from a RAG pipeline,
+    and uses Cleanlab's evaluation algorithms to assess various aspects of the RAG system's performance.
+
+    The component can evaluate:
+    - Overall trustworthiness of the LLM generated response
+    - Context sufficiency (whether the retrieved context contains information needed to answer the query)
+    - Response groundedness (whether the response is supported directly by the context)
+    - Response helpfulness (whether the response effectively addresses the user's query)
+    - Query ease (whether the user query seems easy for an AI system to properly handle, useful to diagnose
+      queries that are: complex, vague, tricky, or disgruntled-sounding)
+
+    Outputs:
+        - Trust Score: A score between 0-1 corresponding to the trustworthiness of the response. A higher score
+          indicates a higher confidence that the response is correct/good.
+        - Explanation: An LLM generated explanation of the trustworthiness assessment
+        - Other Evals: Additional evaluation metrics for selected evaluation types in the "Controls" tab
+        - Evaluation Summary: A comprehensive summary of context, query, response, and selected evaluation results
+
+    This component works well in conjunction with the CleanlabRemediator to create a complete trust evaluation
+    and remediation pipeline.
+
+    More details on the evaluation metrics can be found here: https://help.cleanlab.ai/tlm/use-cases/tlm_rag/
+    """
+
+    display_name = "Cleanlab RAG Evaluator"
+    description = "Evaluates context, query, and response from a RAG pipeline using Cleanlab and outputs trust metrics."
+    icon = "Cleanlab"
+    name = "CleanlabRAGEvaluator"
+
+    inputs = [
+        SecretStrInput(
+            name="api_key",
+            display_name="Cleanlab API Key",
+            info="Your Cleanlab API key.",
+            required=True,
+        ),
+        DropdownInput(
+            name="model",
+            display_name="Cleanlab Evaluation Model",
+            options=[
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "o4-mini",
+                "o3",
+                "gpt-4.5-preview",
+                "gpt-4o-mini",
+                "gpt-4o",
+                "o3-mini",
+                "o1",
+                "o1-mini",
+                "gpt-4",
+                "gpt-3.5-turbo-16k",
+                "claude-3.7-sonnet",
+                "claude-3.5-sonnet-v2",
+                "claude-3.5-sonnet",
+                "claude-3.5-haiku",
+                "claude-3-haiku",
+                "nova-micro",
+                "nova-lite",
+                "nova-pro",
+            ],
+            info="The model Cleanlab uses to evaluate the context, query, and response. This does NOT need to be "
+            "the same model that generated the response.",
+            value="gpt-4o-mini",
+            required=True,
+            advanced=True,
+        ),
+        DropdownInput(
+            name="quality_preset",
+            display_name="Quality Preset",
+            options=["base", "low", "medium"],
+            value="medium",
+            info="This determines the accuracy, latency, and cost of the evaluation. Higher quality is generally "
+            "slower but more accurate.",
+            required=True,
+            advanced=True,
+        ),
+        MessageTextInput(
+            name="context",
+            display_name="Context",
+            info="The context retrieved for the given query.",
+            required=True,
+        ),
+        MessageTextInput(
+            name="query",
+            display_name="Query",
+            info="The user's query.",
+            required=True,
+        ),
+        MessageTextInput(
+            name="response",
+            display_name="Response",
+            info="The response generated by the LLM.",
+            required=True,
+        ),
+        BoolInput(
+            name="run_context_sufficiency",
+            display_name="Run Context Sufficiency",
+            value=False,
+            advanced=True,
+        ),
+        BoolInput(
+            name="run_response_groundedness",
+            display_name="Run Response Groundedness",
+            value=False,
+            advanced=True,
+        ),
+        BoolInput(
+            name="run_response_helpfulness",
+            display_name="Run Response Helpfulness",
+            value=False,
+            advanced=True,
+        ),
+        BoolInput(
+            name="run_query_ease",
+            display_name="Run Query Ease",
+            value=False,
+            advanced=True,
+        ),
+    ]
+
+    outputs = [
+        Output(display_name="Response", name="response_passthrough", method="pass_response", types=["Message"]),
+        Output(display_name="Trust Score", name="trust_score", method="get_trust_score", types=["number"]),
+        Output(display_name="Explanation", name="trust_explanation", method="get_trust_explanation", types=["Message"]),
+        Output(display_name="Other Evals", name="other_scores", method="get_other_scores", types=["Data"]),
+        Output(
+            display_name="Evaluation Summary",
+            name="evaluation_summary",
+            method="get_evaluation_summary",
+            types=["Message"],
+        ),
+    ]
+
+    def _evaluate_once(self):
+        if not hasattr(self, "_cached_result"):
+            try:
+                self.status = "Configuring selected evals..."
+                default_evals = get_default_evals()
+                enabled_names = []
+                if self.run_context_sufficiency:
+                    enabled_names.append("context_sufficiency")
+                if self.run_response_groundedness:
+                    enabled_names.append("response_groundedness")
+                if self.run_response_helpfulness:
+                    enabled_names.append("response_helpfulness")
+                if self.run_query_ease:
+                    enabled_names.append("query_ease")
+
+                selected_evals = [e for e in default_evals if e.name in enabled_names]
+
+                validator = TrustworthyRAG(
+                    api_key=self.api_key,
+                    quality_preset=self.quality_preset,
+                    options={"log": ["explanation"], "model": self.model},
+                    evals=selected_evals,
+                )
+
+                self.status = f"Running evals: {[e.name for e in selected_evals]}"
+                self._cached_result = validator.score(
+                    query=self.query,
+                    context=self.context,
+                    response=self.response,
+                )
+                self.status = "Evaluation complete."
+
+            except Exception as e:  # noqa: BLE001
+                self.status = f"Evaluation failed: {e!s}"
+                self._cached_result = {}
+        return self._cached_result
+
+    def pass_response(self) -> Message:
+        self.status = "Passing through response."
+        return Message(text=self.response)
+
+    def get_trust_score(self) -> float:
+        score = self._evaluate_once().get("trustworthiness", {}).get("score", 0.0)
+        self.status = f"Trust Score: {score:.3f}"
+        return score
+
+    def get_trust_explanation(self) -> Message:
+        explanation = self._evaluate_once().get("trustworthiness", {}).get("log", {}).get("explanation", "")
+        self.status = "Trust explanation extracted."
+        return Message(text=explanation)
+
+    def get_other_scores(self) -> dict:
+        result = self._evaluate_once()
+
+        selected = {
+            "context_sufficiency": self.run_context_sufficiency,
+            "response_groundedness": self.run_response_groundedness,
+            "response_helpfulness": self.run_response_helpfulness,
+            "query_ease": self.run_query_ease,
+        }
+
+        filtered_scores = {key: result[key]["score"] for key, include in selected.items() if include and key in result}
+
+        self.status = f"{len(filtered_scores)} other evals returned."
+        return filtered_scores
+
+    def get_evaluation_summary(self) -> Message:
+        result = self._evaluate_once()
+
+        query_text = self.query.strip()
+        context_text = self.context.strip()
+        response_text = self.response.strip()
+
+        trust = result.get("trustworthiness", {}).get("score", 0.0)
+        trust_exp = result.get("trustworthiness", {}).get("log", {}).get("explanation", "")
+
+        selected = {
+            "context_sufficiency": self.run_context_sufficiency,
+            "response_groundedness": self.run_response_groundedness,
+            "response_helpfulness": self.run_response_helpfulness,
+            "query_ease": self.run_query_ease,
+        }
+
+        other_scores = {key: result[key]["score"] for key, include in selected.items() if include and key in result}
+
+        metrics = f"Trustworthiness: {trust:.3f}"
+        if trust_exp:
+            metrics += f"\nExplanation: {trust_exp}"
+        if other_scores:
+            metrics += "\n" + "\n".join(f"{k.replace('_', ' ').title()}: {v:.3f}" for k, v in other_scores.items())
+
+        summary = (
+            f"Query:\n{query_text}\n"
+            "-----\n"
+            f"Context:\n{context_text}\n"
+            "-----\n"
+            f"Response:\n{response_text}\n"
+            "------------------------------\n"
+            f"{metrics}"
+        )
+
+        self.status = "Evaluation summary built."
+        return Message(text=summary)
--- a/src/backend/base/langflow/components/cleanlab/cleanlab_remediator.py
+++ b/src/backend/base/langflow/components/cleanlab/cleanlab_remediator.py
@ -0,0 +1,131 @@
+from langflow.custom import Component
+from langflow.field_typing.range_spec import RangeSpec
+from langflow.io import BoolInput, FloatInput, HandleInput, MessageTextInput, Output, PromptInput
+from langflow.schema.message import Message
+
+
+class CleanlabRemediator(Component):
+    """Remediates potentially untrustworthy LLM responses based on trust scores computed by the Cleanlab Evaluator.
+
+    This component takes a response and its associated trust score,
+    and applies remediation strategies based on configurable thresholds and settings.
+
+    Inputs:
+        - response (MessageTextInput): The original LLM-generated response to be evaluated and possibly remediated.
+          The CleanlabEvaluator passes this response through.
+        - score (HandleInput): The trust score output from CleanlabEvaluator (expected to be a float between 0 and 1).
+        - explanation (MessageTextInput): Optional textual explanation for the trust score, to be included in the
+          output.
+        - threshold (Input[float]): Minimum trust score required to accept the response. If the score is lower, the
+          response is remediated.
+        - show_untrustworthy_response (BoolInput): If true, returns the original response with a warning; if false,
+          returns fallback text.
+        - untrustworthy_warning_text (PromptInput): Text warning to append to responses deemed untrustworthy (when
+          showing them).
+        - fallback_text (PromptInput): Replacement message returned if the response is untrustworthy and should be
+          hidden.
+
+    Outputs:
+        - remediated_response (Message): Either:
+            • the original response,
+            • the original response with appended warning, or
+            • the fallback response,
+          depending on the trust score and configuration.
+
+    This component is typically used downstream of CleanlabEvaluator or CleanlabRagValidator
+    to take appropriate action on low-trust responses and inform users accordingly.
+    """
+
+    display_name = "Cleanlab Remediator"
+    description = (
+        "Remediates an untrustworthy response based on trust score from the Cleanlab Evaluator, "
+        "score threshold, and message handling settings."
+    )
+    icon = "Cleanlab"
+    name = "CleanlabRemediator"
+
+    inputs = [
+        MessageTextInput(
+            name="response",
+            display_name="Response",
+            info="The response to the user's query.",
+            required=True,
+        ),
+        HandleInput(
+            name="score",
+            display_name="Trust Score",
+            info="The trustworthiness score output from the Cleanlab Evaluator.",
+            input_types=["number"],
+            required=True,
+        ),
+        MessageTextInput(
+            name="explanation",
+            display_name="Explanation",
+            info="The explanation from the Cleanlab Evaluator.",
+            required=False,
+        ),
+        FloatInput(
+            name="threshold",
+            display_name="Threshold",
+            field_type="float",
+            value=0.7,
+            range_spec=RangeSpec(min=0.0, max=1.0, step=0.05),
+            info="Minimum score required to show the response unmodified. Reponses with scores above this threshold "
+            "are considered trustworthy. Reponses with scores below this threshold are considered untrustworthy and "
+            "will be remediated based on the settings below.",
+            required=True,
+            show=True,
+        ),
+        BoolInput(
+            name="show_untrustworthy_response",
+            display_name="Show Untrustworthy Response",
+            info="If enabled, and the trust score is below the threshold, the original response is shown with the "
+            "added warning. If disabled, and the trust score is below the threshold, the fallback answer is returned.",
+            value=True,
+        ),
+        PromptInput(
+            name="untrustworthy_warning_text",
+            display_name="Warning for Untrustworthy Response",
+            info="Warning to append to the response if Show Untrustworthy Response is enabled and trust score is "
+            "below the threshold.",
+            value="⚠️ WARNING: The following response is potentially untrustworthy.",
+        ),
+        PromptInput(
+            name="fallback_text",
+            display_name="Fallback Answer",
+            info="Response returned if the trust score is below the threshold and 'Show Untrustworthy Response' is "
+            "disabled.",
+            value="Based on the available information, I cannot provide a complete answer to this question.",
+        ),
+    ]
+
+    outputs = [
+        Output(
+            display_name="Remediated Message",
+            name="remediated_response",
+            method="remediate_response",
+            types=["Message"],
+        ),
+    ]
+
+    def remediate_response(self) -> Message:
+        if self.score >= self.threshold:
+            self.status = f"Score {self.score:.2f} ≥ threshold {self.threshold:.2f} → accepted"
+            return Message(
+                text=f"{self.response}\n\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n**Trust Score:** {self.score:.2f}"
+            )
+
+        self.status = f"Score {self.score:.2f} < threshold {self.threshold:.2f} → flagged"
+
+        if self.show_untrustworthy_response:
+            parts = [
+                self.response,
+                "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━",
+                f"**{self.untrustworthy_warning_text.strip()}**",
+                f"**Trust Score:** {self.score:.2f}",
+            ]
+            if self.explanation:
+                parts.append(f"**Explanation:** {self.explanation}")
+            return Message(text="\n\n".join(parts))
+
+        return Message(text=self.fallback_text)
--- a/src/backend/base/langflow/initial_setup/starter_projects/Pokédex
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Pokédex
--- a/src/frontend/package-lock.json
+++ b/src/frontend/package-lock.json
--- a/src/frontend/src/icons/Cleanlab/Cleanlab.jsx
+++ b/src/frontend/src/icons/Cleanlab/Cleanlab.jsx
@ -0,0 +1,28 @@
+import { stringToBool } from "@/utils/utils";
+
+const SvgCleanlab = (props) => {
+  const isDark = stringToBool(props.isdark);
+
+  return (
+    <svg
+      width={697}
+      height={697}
+      viewBox="0 0 697 697"
+      fill="none"
+      xmlns="http://www.w3.org/2000/svg"
+      {...props}
+    >
+      <path
+        d="M341.847921,185.264125c-13.42293,23.249376-12.347471,50.877388,0.401709,72.451244,3.225931,5.459143,3.345505,12.207007,0.174798,17.69869l-48.852472,84.61514c-3.18275,5.512656-9.1167,8.767205-15.481774,8.699138-12.415908-.13284-25.026497,2.950045-36.559278,9.6085-34.331853,19.821505-46.094999,63.721679-26.273494,98.053532,19.821549,34.33193,63.721743,46.094757,98.053597,26.273252,11.532781-6.658454,20.508022-16.037949,26.600933-26.85686,3.123545-5.546426,8.909087-9.058026,15.274517-9.058117l97.705052.000012c6.314731.000014,12.107535,3.432422,15.203594,8.93605,12.65789,22.501254,37.082709,37.493928,64.94011,36.600417,38.349672-1.230173,69.281722-33.108828,69.419913-71.478015.143192-39.764396-32.048611-72.044287-71.779567-72.044476-26.846015-.00009-50.235009,14.745435-62.54359,36.573359-3.114983,5.523677-8.899051,9.00096-15.239989,9.000931l-95.950864.000159c-15.201841-.000006-24.703043-16.45657-17.102128-29.621754l47.975569-83.095807c3.157398-5.468647,9.026328-8.769166,15.340538-8.698443,25.815666.288662,51.012155-13.367473,64.167032-37.93976,17.644134-32.958183,6.647958-74.587984-24.980446-94.51786-34.663711-21.84271-80.199374-10.350384-100.49376,24.800671Z
+         M323.785578,413.729145c14.784192,25.606971,6.010559,58.350615-19.596489,73.134851s-58.350647,6.01068-73.134927-19.596445-6.010559-58.350615,19.596489-73.134851c25.607048-14.784236,58.350647-6.01068,73.134927,19.596445Z"
+        fill={isDark ? "#FFFFFF" : "#000000"}
+      />
+      <path
+        d="M572.283355,593.964596c-47.042646,32.305387-103.837611,51.438401-165.06516,52.07923-164.731918,1.724141-300.426141-132.22655-300.738501-296.967195-.312099-164.602718,133.029516-298.13692,297.559776-298.13692,62.422076,0,120.354912,19.221075,168.202634,52.06751,1.932802,1.326827,4.559313-.028862,4.559337-2.373257l.000546-53.16979c.000011-1.048757-.562682-2.02097-1.475604-2.537173C525.033846,16.489584,466.971795.188457,405.117629.001612,212.700173-.579629,55.540912,156.084111,55.54226,348.502446c.001349,192.470068,156.029578,348.497554,348.499958,348.497554,62.256594,0,120.700306-16.324561,171.284182-44.926817.912763-.516115,1.475336-1.488098,1.475331-2.536674l-.000234-53.172985c-.00001-2.322378-2.603713-3.713615-4.518142-2.398928Z"
+        fill={isDark ? "#FFFFFF" : "#000000"}
+      />
+    </svg>
+  );
+};
+
+export default SvgCleanlab;
--- a/src/frontend/src/icons/Cleanlab/index.tsx
+++ b/src/frontend/src/icons/Cleanlab/index.tsx
@ -0,0 +1,11 @@
+import { useDarkStore } from "@/stores/darkStore";
+import React, { forwardRef } from "react";
+import SvgCleanlab from "./Cleanlab";
+
+export const CleanlabIcon = forwardRef<
+  SVGSVGElement,
+  React.PropsWithChildren<{}>
+>((props, ref) => {
+  const isdark = useDarkStore((state) => state.dark).toString();
+  return <SvgCleanlab ref={ref} isdark={isdark} {...props} />;
+});
--- a/src/frontend/src/icons/lazyIconImports.ts
+++ b/src/frontend/src/icons/lazyIconImports.ts
@ -42,6 +42,8 @@ export const lazyIconsMapping = {
    import("@/icons/Cassandra").then((mod) => ({ default: mod.CassandraIcon })),
  Chroma: () =>
    import("@/icons/ChromaIcon").then((mod) => ({ default: mod.ChromaIcon })),
+  Cleanlab: () =>
+    import("@/icons/Cleanlab").then((mod) => ({ default: mod.CleanlabIcon })),
  Clickhouse: () =>
    import("@/icons/Clickhouse").then((mod) => ({
      default: mod.ClickhouseIcon,
--- a/src/frontend/src/utils/styleUtils.ts
+++ b/src/frontend/src/utils/styleUtils.ts
@ -281,6 +281,7 @@ export const SIDEBAR_BUNDLES = [
    name: "homeassistant",
    icon: "HomeAssistant",
  },
+  { display_name: "Cleanlab", name: "cleanlab", icon: "Cleanlab" },
  { display_name: "Search", name: "search", icon: "Search" },
  { display_name: "Tavily", name: "tavily", icon: "TavilyIcon" },
 ];