Merge remote-tracking branch 'origin/main' into dev

2023-09-13 21:32:51 -03:00 · 2023-09-13 21:32:51 -03:00 · c6be7cdecd
commit c6be7cdecd
parent 8121dc0a8b a35e6a6556
25 changed files with 740 additions and 515 deletions
--- a/src/backend/langflow/components/agents/OpenAIConversationalAgent.py
+++ b/src/backend/langflow/components/agents/OpenAIConversationalAgent.py
@ -42,8 +42,8 @@ class ConversationalAgent(CustomComponent):
        self,
        model_name: str,
        openai_api_key: str,
-        openai_api_base: str,
        tools: Tool,
+        openai_api_base: Optional[str] = None,
        memory: Optional[BaseMemory] = None,
        system_message: Optional[SystemMessagePromptTemplate] = None,
        max_token_limit: int = 2000,
--- a/src/backend/langflow/components/llms/HuggingFaceEndpoints.py
+++ b/src/backend/langflow/components/llms/HuggingFaceEndpoints.py
@ -0,0 +1,42 @@
+from typing import Optional
+from langflow import CustomComponent
+from langchain.llms import HuggingFaceEndpoint
+from langchain.llms.base import BaseLLM
+
+
+class HuggingFaceEndpointsComponent(CustomComponent):
+    display_name: str = "Hugging Face Inference API"
+    description: str = "LLM model from Hugging Face Inference API."
+
+    def build_config(self):
+        return {
+            "endpoint_url": {"display_name": "Endpoint URL", "password": True},
+            "task": {
+                "display_name": "Task",
+                "type": "select",
+                "options": ["text2text-generation", "text-generation", "summarization"],
+            },
+            "huggingfacehub_api_token": {"display_name": "API token", "password": True},
+            "model_kwargs": {
+                "display_name": "Model Keyword Arguments",
+                "field_type": "code",
+            },
+            "code": {"show": False},
+        }
+
+    def build(
+        self,
+        endpoint_url: str,
+        task="text2text-generation",
+        huggingfacehub_api_token: Optional[str] = None,
+        model_kwargs: Optional[dict] = None,
+    ) -> BaseLLM:
+        try:
+            output = HuggingFaceEndpoint(
+                endpoint_url=endpoint_url,
+                task=task,
+                huggingfacehub_api_token=huggingfacehub_api_token,
+            )
+        except Exception as e:
+            raise ValueError("Could not connect to HuggingFace Endpoints API.") from e
+        return output
--- a/src/backend/langflow/components/llms/init.py
+++ b/src/backend/langflow/components/llms/init.py
--- a/src/backend/langflow/components/retrievers/MetalRetriever.py
+++ b/src/backend/langflow/components/retrievers/MetalRetriever.py
@ -0,0 +1,28 @@
+from typing import Optional
+from langflow import CustomComponent
+from langchain.retrievers import MetalRetriever
+from langchain.schema import BaseRetriever
+from metal_sdk.metal import Metal  # type: ignore
+
+
+class MetalRetrieverComponent(CustomComponent):
+    display_name: str = "Metal Retriever"
+    description: str = "Retriever that uses the Metal API."
+
+    def build_config(self):
+        return {
+            "api_key": {"display_name": "API Key", "password": True},
+            "client_id": {"display_name": "Client ID", "password": True},
+            "index_id": {"display_name": "Index ID"},
+            "params": {"display_name": "Parameters", "field_type": "code"},
+            "code": {"show": False},
+        }
+
+    def build(
+        self, api_key: str, client_id: str, index_id: str, params: Optional[dict] = None
+    ) -> BaseRetriever:
+        try:
+            metal = Metal(api_key=api_key, client_id=client_id, index_id=index_id)
+        except Exception as e:
+            raise ValueError("Could not connect to Metal API.") from e
+        return MetalRetriever(client=metal, params=params or {})
--- a/src/backend/langflow/components/retrievers/init.py
+++ b/src/backend/langflow/components/retrievers/init.py
--- a/src/backend/langflow/components/textsplitters/LanguageRecursiveTextSplitter.py
+++ b/src/backend/langflow/components/textsplitters/LanguageRecursiveTextSplitter.py
@ -0,0 +1,82 @@
+from typing import Optional
+from langflow import CustomComponent
+from langchain.text_splitter import Language
+from langchain.schema import Document
+from langflow.utils.util import build_loader_repr_from_documents
+
+
+class LanguageRecursiveTextSplitterComponent(CustomComponent):
+    display_name: str = "Language Recursive Text Splitter"
+    description: str = "Split text into chunks of a specified length based on language."
+    documentation: str = "https://docs.langflow.org/components/text-splitters#languagerecursivetextsplitter"
+
+    def build_config(self):
+        options = [x.value for x in Language]
+        return {
+            "documents": {
+                "display_name": "Documents",
+                "info": "The documents to split.",
+            },
+            "separator_type": {
+                "display_name": "Separator Type",
+                "info": "The type of separator to use.",
+                "field_type": "str",
+                "options": options,
+                "value": "Python",
+            },
+            "separators": {
+                "display_name": "Separators",
+                "info": "The characters to split on.",
+                "is_list": True,
+            },
+            "chunk_size": {
+                "display_name": "Chunk Size",
+                "info": "The maximum length of each chunk.",
+                "field_type": "int",
+                "value": 1000,
+            },
+            "chunk_overlap": {
+                "display_name": "Chunk Overlap",
+                "info": "The amount of overlap between chunks.",
+                "field_type": "int",
+                "value": 200,
+            },
+            "code": {"show": False},
+        }
+
+    def build(
+        self,
+        documents: list[Document],
+        chunk_size: Optional[int] = 1000,
+        chunk_overlap: Optional[int] = 200,
+        separator_type: Optional[str] = "Python",
+    ) -> list[Document]:
+        """
+        Split text into chunks of a specified length.
+
+        Args:
+            separators (list[str]): The characters to split on.
+            chunk_size (int): The maximum length of each chunk.
+            chunk_overlap (int): The amount of overlap between chunks.
+            length_function (function): The function to use to calculate the length of the text.
+
+        Returns:
+            list[str]: The chunks of text.
+        """
+        from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+        # Make sure chunk_size and chunk_overlap are ints
+        if isinstance(chunk_size, str):
+            chunk_size = int(chunk_size)
+        if isinstance(chunk_overlap, str):
+            chunk_overlap = int(chunk_overlap)
+
+        splitter = RecursiveCharacterTextSplitter.from_language(
+            language=Language(separator_type),
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap,
+        )
+
+        docs = splitter.split_documents(documents)
+        self.repr_value = build_loader_repr_from_documents(docs)
+        return docs
--- a/src/backend/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py
+++ b/src/backend/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py
@ -0,0 +1,79 @@
+from typing import Optional
+from langflow import CustomComponent
+from langchain.schema import Document
+
+
+class RecursiveCharacterTextSplitterComponent(CustomComponent):
+    display_name: str = "Recursive Character Text Splitter"
+    description: str = "Split text into chunks of a specified length."
+    documentation: str = "https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter"
+
+    def build_config(self):
+        return {
+            "documents": {
+                "display_name": "Documents",
+                "info": "The documents to split.",
+            },
+            "separators": {
+                "display_name": "Separators",
+                "info": 'The characters to split on.\nIf left empty defaults to ["\\n\\n", "\\n", " ", ""].',
+                "is_list": True,
+            },
+            "chunk_size": {
+                "display_name": "Chunk Size",
+                "info": "The maximum length of each chunk.",
+                "field_type": "int",
+                "value": 1000,
+            },
+            "chunk_overlap": {
+                "display_name": "Chunk Overlap",
+                "info": "The amount of overlap between chunks.",
+                "field_type": "int",
+                "value": 200,
+            },
+            "code": {"show": False},
+        }
+
+    def build(
+        self,
+        documents: list[Document],
+        separators: Optional[list[str]] = None,
+        chunk_size: Optional[int] = 1000,
+        chunk_overlap: Optional[int] = 200,
+    ) -> list[Document]:
+        """
+        Split text into chunks of a specified length.
+
+        Args:
+            separators (list[str]): The characters to split on.
+            chunk_size (int): The maximum length of each chunk.
+            chunk_overlap (int): The amount of overlap between chunks.
+            length_function (function): The function to use to calculate the length of the text.
+
+        Returns:
+            list[str]: The chunks of text.
+        """
+        from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+        if separators == "":
+            separators = None
+        elif separators:
+            # check if the separators list has escaped characters
+            # if there are escaped characters, unescape them
+            separators = [x.encode().decode("unicode-escape") for x in separators]
+
+        # Make sure chunk_size and chunk_overlap are ints
+        if isinstance(chunk_size, str):
+            chunk_size = int(chunk_size)
+        if isinstance(chunk_overlap, str):
+            chunk_overlap = int(chunk_overlap)
+        splitter = RecursiveCharacterTextSplitter(
+            separators=separators,
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap,
+        )
+
+        docs = splitter.split_documents(documents)
+        # self.repr_value = build_loader_repr_from_documents(docs)
+        self.repr_value = separators
+        return docs
--- a/src/backend/langflow/components/textsplitters/init.py
+++ b/src/backend/langflow/components/textsplitters/init.py
--- a/src/backend/langflow/config.yaml
+++ b/src/backend/langflow/config.yaml
@ -171,8 +171,6 @@ prompts:
 textsplitters:
  CharacterTextSplitter:
    documentation: "https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/character_text_splitter"
-  RecursiveCharacterTextSplitter:
-    documentation: "https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/recursive_text_splitter"
 toolkits:
  OpenAPIToolkit:
    documentation: ""
--- a/src/backend/langflow/interface/types.py
+++ b/src/backend/langflow/interface/types.py
@ -5,6 +5,7 @@ from langflow.api.utils import merge_nested_dicts_with_renaming
 from langflow.interface.agents.base import agent_creator
 from langflow.interface.chains.base import chain_creator
 from langflow.interface.custom.constants import CUSTOM_COMPONENT_SUPPORTED_TYPES
+from langflow.interface.custom.utils import extract_inner_type
 from langflow.interface.document_loaders.base import documentloader_creator
 from langflow.interface.embeddings.base import embedding_creator
 from langflow.interface.importing.utils import get_function_custom
@ -84,6 +85,8 @@ def build_langchain_types_dict():  # sourcery skip: dict-assign-update-to-union


 def process_type(field_type: str):
+    if field_type.startswith("list") or field_type.startswith("List"):
+        return extract_inner_type(field_type)
    return "prompt" if field_type == "Prompt" else field_type


@ -100,6 +103,7 @@ def add_new_custom_field(
    # if it is, update the value
    display_name = field_config.pop("display_name", field_name)
    field_type = field_config.pop("field_type", field_type)
+    field_contains_list = "list" in field_type.lower()
    field_type = process_type(field_type)
    field_value = field_config.pop("value", field_value)
    field_advanced = field_config.pop("advanced", False)
@ -110,7 +114,9 @@ def add_new_custom_field(
    # If options is a list, then it's a dropdown
    # If options is None, then it's a list of strings
    is_list = isinstance(field_config.get("options"), list)
-    field_config["is_list"] = is_list or field_config.get("is_list", False)
+    field_config["is_list"] = (
+        is_list or field_config.get("is_list", False) or field_contains_list
+    )

    if "name" in field_config:
        warnings.warn(
@ -172,7 +178,7 @@ def extract_type_from_optional(field_type):
    Returns:
    str: The extracted type, or an empty string if no type was found.
    """
-    match = re.search(r"\[(.*?)\]", field_type)
+    match = re.search(r"\[(.*?)\]$", field_type)
    return match[1] if match else None


--- a/src/backend/langflow/main.py
+++ b/src/backend/langflow/main.py
@ -11,6 +11,7 @@ from langflow.api import router
 from langflow.interface.utils import setup_llm_caching
 from langflow.services.database.utils import initialize_database
 from langflow.services.manager import initialize_services, teardown_services
+from langflow.services.plugins.langfuse import LangfuseInstance
 from langflow.utils.logger import configure


@ -41,6 +42,8 @@ def create_app():
    app.on_event("startup")(initialize_database)
    app.on_event("startup")(setup_llm_caching)
    app.on_event("shutdown")(teardown_services)
+    app.on_event("startup")(LangfuseInstance.update)
+    app.on_event("shutdown")(LangfuseInstance.teardown)
    return app


--- a/src/backend/langflow/processing/base.py
+++ b/src/backend/langflow/processing/base.py
@ -1,4 +1,4 @@
-from typing import Union
+from typing import List, Union, TYPE_CHECKING
 from langflow.api.v1.callback import (
    AsyncStreamingLLMCallbackHandler,
    StreamingLLMCallbackHandler,
@ -6,6 +6,52 @@ from langflow.api.v1.callback import (
 from langflow.processing.process import fix_memory_inputs, format_actions
 from loguru import logger
 from langchain.agents.agent import AgentExecutor
+from langchain.callbacks.base import BaseCallbackHandler
+
+if TYPE_CHECKING:
+    from langfuse.callback import CallbackHandler  # type: ignore
+
+
+def setup_callbacks(sync, trace_id, **kwargs):
+    """Setup callbacks for langchain object"""
+    callbacks = []
+    if sync:
+        callbacks.append(StreamingLLMCallbackHandler(**kwargs))
+    else:
+        callbacks.append(AsyncStreamingLLMCallbackHandler(**kwargs))
+
+    if langfuse_callback := get_langfuse_callback(trace_id=trace_id):
+        logger.debug("Langfuse callback loaded")
+        callbacks.append(langfuse_callback)
+    return callbacks
+
+
+def get_langfuse_callback(trace_id):
+    from langflow.services.plugins.langfuse import LangfuseInstance
+    from langfuse.callback import CreateTrace
+
+    logger.debug("Initializing langfuse callback")
+    if langfuse := LangfuseInstance.get():
+        logger.debug("Langfuse credentials found")
+        try:
+            trace = langfuse.trace(CreateTrace(id=trace_id))
+            return trace.getNewHandler()
+        except Exception as exc:
+            logger.error(f"Error initializing langfuse callback: {exc}")
+
+    return None
+
+
+def flush_langfuse_callback_if_present(
+    callbacks: List[Union[BaseCallbackHandler, "CallbackHandler"]]
+):
+    """
+    If langfuse callback is present, run callback.langfuse.flush()
+    """
+    for callback in callbacks:
+        if hasattr(callback, "langfuse"):
+            callback.langfuse.flush()
+            break


 async def get_result_and_steps(langchain_object, inputs: Union[dict, str], **kwargs):
@ -27,13 +73,18 @@ async def get_result_and_steps(langchain_object, inputs: Union[dict, str], **kwa
            logger.error(f"Error fixing memory inputs: {exc}")

        try:
-            async_callbacks = [AsyncStreamingLLMCallbackHandler(**kwargs)]
-            output = await langchain_object.acall(inputs, callbacks=async_callbacks)
+            trace_id = kwargs.pop("session_id", None)
+            callbacks = setup_callbacks(sync=False, trace_id=trace_id, **kwargs)
+            output = await langchain_object.acall(inputs, callbacks=callbacks)
        except Exception as exc:
            # make the error message more informative
            logger.debug(f"Error: {str(exc)}")
-            sync_callbacks = [StreamingLLMCallbackHandler(**kwargs)]
-            output = langchain_object(inputs, callbacks=sync_callbacks)
+            trace_id = kwargs.pop("session_id", None)
+            callbacks = setup_callbacks(sync=True, trace_id=trace_id, **kwargs)
+            output = langchain_object(inputs, callbacks=callbacks)
+
+        # if langfuse callback is present, run callback.langfuse.flush()
+        flush_langfuse_callback_if_present(callbacks)

        intermediate_steps = (
            output.get("intermediate_steps", []) if isinstance(output, dict) else []
--- a/src/backend/langflow/processing/process.py
+++ b/src/backend/langflow/processing/process.py
@ -11,6 +11,7 @@ from langflow.graph import Graph
 from langchain.chains.base import Chain
 from langchain.vectorstores.base import VectorStore
 from typing import Any, Dict, List, Optional, Tuple, Union
+from langchain.schema import Document


 def fix_memory_inputs(langchain_object):
@ -142,6 +143,8 @@ def generate_result(langchain_object: Union[Chain, VectorStore], inputs: dict):
        logger.debug("Generated result and thought")
    elif isinstance(langchain_object, VectorStore):
        result = langchain_object.search(**inputs)
+    elif isinstance(langchain_object, Document):
+        result = langchain_object.dict()
    else:
        raise ValueError(
            f"Unknown langchain_object type: {type(langchain_object).__name__}"
--- a/src/backend/langflow/services/chat/manager.py
+++ b/src/backend/langflow/services/chat/manager.py
@ -1,4 +1,5 @@
 from collections import defaultdict
+import uuid
 from fastapi import WebSocket, status
 from langflow.api.v1.schemas import ChatMessage, ChatResponse, FileResponse
 from langflow.services.base import Service
@ -49,6 +50,7 @@ class ChatManager(Service):

    def __init__(self):
        self.active_connections: Dict[str, WebSocket] = {}
+        self.connection_ids: Dict[str, str] = {}
        self.chat_history = ChatHistory()
        self.cache_manager = service_manager.get(ServiceType.CACHE_MANAGER)
        self.cache_manager.attach(self.update)
@ -93,9 +95,13 @@ class ChatManager(Service):

    async def connect(self, client_id: str, websocket: WebSocket):
        self.active_connections[client_id] = websocket
+        # This is to avoid having multiple clients with the same id
+        #! Temporary solution
+        self.connection_ids[client_id] = f"{client_id}-{uuid.uuid4()}"

    def disconnect(self, client_id: str):
        self.active_connections.pop(client_id, None)
+        self.connection_ids.pop(client_id, None)

    async def send_message(self, client_id: str, message: str):
        websocket = self.active_connections[client_id]
@ -137,6 +143,7 @@ class ChatManager(Service):
                langchain_object=langchain_object,
                chat_inputs=chat_inputs,
                websocket=self.active_connections[client_id],
+                session_id=self.connection_ids[client_id],
            )
        except Exception as e:
            # Log stack trace
--- a/src/backend/langflow/services/chat/utils.py
+++ b/src/backend/langflow/services/chat/utils.py
@ -9,6 +9,7 @@ async def process_graph(
    langchain_object,
    chat_inputs: ChatMessage,
    websocket: WebSocket,
+    session_id: str,
 ):
    langchain_object = try_setting_streaming_options(langchain_object, websocket)
    logger.debug("Loaded langchain object")
@ -27,7 +28,10 @@ async def process_graph(

        logger.debug("Generating result and thought")
        result, intermediate_steps = await get_result_and_steps(
-            langchain_object, chat_inputs.message, websocket=websocket
+            langchain_object,
+            chat_inputs.message,
+            websocket=websocket,
+            session_id=session_id,
        )
        logger.debug("Generated result and intermediate_steps")
        return result, intermediate_steps
--- a/src/backend/langflow/services/plugins/init.py
+++ b/src/backend/langflow/services/plugins/init.py
--- a/src/backend/langflow/services/plugins/langfuse.py
+++ b/src/backend/langflow/services/plugins/langfuse.py
@ -0,0 +1,44 @@
+from langflow.utils.logger import logger
+
+### Temporary implementation
+# This will be replaced by a plugin system once merged into 0.5.0
+
+
+class LangfuseInstance:
+    _instance = None
+
+    @classmethod
+    def get(cls):
+        logger.debug("Getting Langfuse instance")
+        if cls._instance is None:
+            cls.create()
+        return cls._instance
+
+    @classmethod
+    def create(cls):
+        logger.debug("Creating Langfuse instance")
+        from langflow.settings import settings
+        from langfuse import Langfuse  # type: ignore
+
+        if settings.LANGFUSE_PUBLIC_KEY and settings.LANGFUSE_SECRET_KEY:
+            logger.debug("Langfuse credentials found")
+            cls._instance = Langfuse(
+                public_key=settings.LANGFUSE_PUBLIC_KEY,
+                secret_key=settings.LANGFUSE_SECRET_KEY,
+            )
+        else:
+            logger.debug("No Langfuse credentials found")
+            cls._instance = None
+
+    @classmethod
+    def update(cls):
+        logger.debug("Updating Langfuse instance")
+        cls._instance = None
+        cls.create()
+
+    @classmethod
+    def teardown(cls):
+        logger.debug("Tearing down Langfuse instance")
+        if cls._instance is not None:
+            cls._instance.flush()
+        cls._instance = None
--- a/src/backend/langflow/services/settings/base.py
+++ b/src/backend/langflow/services/settings/base.py
@ -41,6 +41,10 @@ class Settings(BaseSettings):
    REMOVE_API_KEYS: bool = False
    COMPONENTS_PATH: List[str] = []

+    LANGFUSE_SECRET_KEY: Optional[str] = None
+    LANGFUSE_PUBLIC_KEY: Optional[str] = None
+    LANGFUSE_HOST: Optional[str] = None
+
    @validator("CONFIG_DIR", pre=True, allow_reuse=True)
    def set_langflow_dir(cls, value):
        if not value: