Merge remote-tracking branch 'origin/main' into dev

This commit is contained in:
Gabriel Luiz Freitas Almeida 2023-09-13 21:32:51 -03:00
commit c6be7cdecd
25 changed files with 740 additions and 515 deletions

View file

@ -42,8 +42,8 @@ class ConversationalAgent(CustomComponent):
self,
model_name: str,
openai_api_key: str,
openai_api_base: str,
tools: Tool,
openai_api_base: Optional[str] = None,
memory: Optional[BaseMemory] = None,
system_message: Optional[SystemMessagePromptTemplate] = None,
max_token_limit: int = 2000,

View file

@ -0,0 +1,42 @@
from typing import Optional
from langflow import CustomComponent
from langchain.llms import HuggingFaceEndpoint
from langchain.llms.base import BaseLLM
class HuggingFaceEndpointsComponent(CustomComponent):
display_name: str = "Hugging Face Inference API"
description: str = "LLM model from Hugging Face Inference API."
def build_config(self):
return {
"endpoint_url": {"display_name": "Endpoint URL", "password": True},
"task": {
"display_name": "Task",
"type": "select",
"options": ["text2text-generation", "text-generation", "summarization"],
},
"huggingfacehub_api_token": {"display_name": "API token", "password": True},
"model_kwargs": {
"display_name": "Model Keyword Arguments",
"field_type": "code",
},
"code": {"show": False},
}
def build(
self,
endpoint_url: str,
task="text2text-generation",
huggingfacehub_api_token: Optional[str] = None,
model_kwargs: Optional[dict] = None,
) -> BaseLLM:
try:
output = HuggingFaceEndpoint(
endpoint_url=endpoint_url,
task=task,
huggingfacehub_api_token=huggingfacehub_api_token,
)
except Exception as e:
raise ValueError("Could not connect to HuggingFace Endpoints API.") from e
return output

View file

@ -0,0 +1,28 @@
from typing import Optional
from langflow import CustomComponent
from langchain.retrievers import MetalRetriever
from langchain.schema import BaseRetriever
from metal_sdk.metal import Metal # type: ignore
class MetalRetrieverComponent(CustomComponent):
display_name: str = "Metal Retriever"
description: str = "Retriever that uses the Metal API."
def build_config(self):
return {
"api_key": {"display_name": "API Key", "password": True},
"client_id": {"display_name": "Client ID", "password": True},
"index_id": {"display_name": "Index ID"},
"params": {"display_name": "Parameters", "field_type": "code"},
"code": {"show": False},
}
def build(
self, api_key: str, client_id: str, index_id: str, params: Optional[dict] = None
) -> BaseRetriever:
try:
metal = Metal(api_key=api_key, client_id=client_id, index_id=index_id)
except Exception as e:
raise ValueError("Could not connect to Metal API.") from e
return MetalRetriever(client=metal, params=params or {})

View file

@ -0,0 +1,82 @@
from typing import Optional
from langflow import CustomComponent
from langchain.text_splitter import Language
from langchain.schema import Document
from langflow.utils.util import build_loader_repr_from_documents
class LanguageRecursiveTextSplitterComponent(CustomComponent):
display_name: str = "Language Recursive Text Splitter"
description: str = "Split text into chunks of a specified length based on language."
documentation: str = "https://docs.langflow.org/components/text-splitters#languagerecursivetextsplitter"
def build_config(self):
options = [x.value for x in Language]
return {
"documents": {
"display_name": "Documents",
"info": "The documents to split.",
},
"separator_type": {
"display_name": "Separator Type",
"info": "The type of separator to use.",
"field_type": "str",
"options": options,
"value": "Python",
},
"separators": {
"display_name": "Separators",
"info": "The characters to split on.",
"is_list": True,
},
"chunk_size": {
"display_name": "Chunk Size",
"info": "The maximum length of each chunk.",
"field_type": "int",
"value": 1000,
},
"chunk_overlap": {
"display_name": "Chunk Overlap",
"info": "The amount of overlap between chunks.",
"field_type": "int",
"value": 200,
},
"code": {"show": False},
}
def build(
self,
documents: list[Document],
chunk_size: Optional[int] = 1000,
chunk_overlap: Optional[int] = 200,
separator_type: Optional[str] = "Python",
) -> list[Document]:
"""
Split text into chunks of a specified length.
Args:
separators (list[str]): The characters to split on.
chunk_size (int): The maximum length of each chunk.
chunk_overlap (int): The amount of overlap between chunks.
length_function (function): The function to use to calculate the length of the text.
Returns:
list[str]: The chunks of text.
"""
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Make sure chunk_size and chunk_overlap are ints
if isinstance(chunk_size, str):
chunk_size = int(chunk_size)
if isinstance(chunk_overlap, str):
chunk_overlap = int(chunk_overlap)
splitter = RecursiveCharacterTextSplitter.from_language(
language=Language(separator_type),
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
)
docs = splitter.split_documents(documents)
self.repr_value = build_loader_repr_from_documents(docs)
return docs

View file

@ -0,0 +1,79 @@
from typing import Optional
from langflow import CustomComponent
from langchain.schema import Document
class RecursiveCharacterTextSplitterComponent(CustomComponent):
display_name: str = "Recursive Character Text Splitter"
description: str = "Split text into chunks of a specified length."
documentation: str = "https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter"
def build_config(self):
return {
"documents": {
"display_name": "Documents",
"info": "The documents to split.",
},
"separators": {
"display_name": "Separators",
"info": 'The characters to split on.\nIf left empty defaults to ["\\n\\n", "\\n", " ", ""].',
"is_list": True,
},
"chunk_size": {
"display_name": "Chunk Size",
"info": "The maximum length of each chunk.",
"field_type": "int",
"value": 1000,
},
"chunk_overlap": {
"display_name": "Chunk Overlap",
"info": "The amount of overlap between chunks.",
"field_type": "int",
"value": 200,
},
"code": {"show": False},
}
def build(
self,
documents: list[Document],
separators: Optional[list[str]] = None,
chunk_size: Optional[int] = 1000,
chunk_overlap: Optional[int] = 200,
) -> list[Document]:
"""
Split text into chunks of a specified length.
Args:
separators (list[str]): The characters to split on.
chunk_size (int): The maximum length of each chunk.
chunk_overlap (int): The amount of overlap between chunks.
length_function (function): The function to use to calculate the length of the text.
Returns:
list[str]: The chunks of text.
"""
from langchain.text_splitter import RecursiveCharacterTextSplitter
if separators == "":
separators = None
elif separators:
# check if the separators list has escaped characters
# if there are escaped characters, unescape them
separators = [x.encode().decode("unicode-escape") for x in separators]
# Make sure chunk_size and chunk_overlap are ints
if isinstance(chunk_size, str):
chunk_size = int(chunk_size)
if isinstance(chunk_overlap, str):
chunk_overlap = int(chunk_overlap)
splitter = RecursiveCharacterTextSplitter(
separators=separators,
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
)
docs = splitter.split_documents(documents)
# self.repr_value = build_loader_repr_from_documents(docs)
self.repr_value = separators
return docs

View file

@ -171,8 +171,6 @@ prompts:
textsplitters:
CharacterTextSplitter:
documentation: "https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/character_text_splitter"
RecursiveCharacterTextSplitter:
documentation: "https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/recursive_text_splitter"
toolkits:
OpenAPIToolkit:
documentation: ""

View file

@ -5,6 +5,7 @@ from langflow.api.utils import merge_nested_dicts_with_renaming
from langflow.interface.agents.base import agent_creator
from langflow.interface.chains.base import chain_creator
from langflow.interface.custom.constants import CUSTOM_COMPONENT_SUPPORTED_TYPES
from langflow.interface.custom.utils import extract_inner_type
from langflow.interface.document_loaders.base import documentloader_creator
from langflow.interface.embeddings.base import embedding_creator
from langflow.interface.importing.utils import get_function_custom
@ -84,6 +85,8 @@ def build_langchain_types_dict(): # sourcery skip: dict-assign-update-to-union
def process_type(field_type: str):
if field_type.startswith("list") or field_type.startswith("List"):
return extract_inner_type(field_type)
return "prompt" if field_type == "Prompt" else field_type
@ -100,6 +103,7 @@ def add_new_custom_field(
# if it is, update the value
display_name = field_config.pop("display_name", field_name)
field_type = field_config.pop("field_type", field_type)
field_contains_list = "list" in field_type.lower()
field_type = process_type(field_type)
field_value = field_config.pop("value", field_value)
field_advanced = field_config.pop("advanced", False)
@ -110,7 +114,9 @@ def add_new_custom_field(
# If options is a list, then it's a dropdown
# If options is None, then it's a list of strings
is_list = isinstance(field_config.get("options"), list)
field_config["is_list"] = is_list or field_config.get("is_list", False)
field_config["is_list"] = (
is_list or field_config.get("is_list", False) or field_contains_list
)
if "name" in field_config:
warnings.warn(
@ -172,7 +178,7 @@ def extract_type_from_optional(field_type):
Returns:
str: The extracted type, or an empty string if no type was found.
"""
match = re.search(r"\[(.*?)\]", field_type)
match = re.search(r"\[(.*?)\]$", field_type)
return match[1] if match else None

View file

@ -11,6 +11,7 @@ from langflow.api import router
from langflow.interface.utils import setup_llm_caching
from langflow.services.database.utils import initialize_database
from langflow.services.manager import initialize_services, teardown_services
from langflow.services.plugins.langfuse import LangfuseInstance
from langflow.utils.logger import configure
@ -41,6 +42,8 @@ def create_app():
app.on_event("startup")(initialize_database)
app.on_event("startup")(setup_llm_caching)
app.on_event("shutdown")(teardown_services)
app.on_event("startup")(LangfuseInstance.update)
app.on_event("shutdown")(LangfuseInstance.teardown)
return app

View file

@ -1,4 +1,4 @@
from typing import Union
from typing import List, Union, TYPE_CHECKING
from langflow.api.v1.callback import (
AsyncStreamingLLMCallbackHandler,
StreamingLLMCallbackHandler,
@ -6,6 +6,52 @@ from langflow.api.v1.callback import (
from langflow.processing.process import fix_memory_inputs, format_actions
from loguru import logger
from langchain.agents.agent import AgentExecutor
from langchain.callbacks.base import BaseCallbackHandler
if TYPE_CHECKING:
from langfuse.callback import CallbackHandler # type: ignore
def setup_callbacks(sync, trace_id, **kwargs):
"""Setup callbacks for langchain object"""
callbacks = []
if sync:
callbacks.append(StreamingLLMCallbackHandler(**kwargs))
else:
callbacks.append(AsyncStreamingLLMCallbackHandler(**kwargs))
if langfuse_callback := get_langfuse_callback(trace_id=trace_id):
logger.debug("Langfuse callback loaded")
callbacks.append(langfuse_callback)
return callbacks
def get_langfuse_callback(trace_id):
from langflow.services.plugins.langfuse import LangfuseInstance
from langfuse.callback import CreateTrace
logger.debug("Initializing langfuse callback")
if langfuse := LangfuseInstance.get():
logger.debug("Langfuse credentials found")
try:
trace = langfuse.trace(CreateTrace(id=trace_id))
return trace.getNewHandler()
except Exception as exc:
logger.error(f"Error initializing langfuse callback: {exc}")
return None
def flush_langfuse_callback_if_present(
callbacks: List[Union[BaseCallbackHandler, "CallbackHandler"]]
):
"""
If langfuse callback is present, run callback.langfuse.flush()
"""
for callback in callbacks:
if hasattr(callback, "langfuse"):
callback.langfuse.flush()
break
async def get_result_and_steps(langchain_object, inputs: Union[dict, str], **kwargs):
@ -27,13 +73,18 @@ async def get_result_and_steps(langchain_object, inputs: Union[dict, str], **kwa
logger.error(f"Error fixing memory inputs: {exc}")
try:
async_callbacks = [AsyncStreamingLLMCallbackHandler(**kwargs)]
output = await langchain_object.acall(inputs, callbacks=async_callbacks)
trace_id = kwargs.pop("session_id", None)
callbacks = setup_callbacks(sync=False, trace_id=trace_id, **kwargs)
output = await langchain_object.acall(inputs, callbacks=callbacks)
except Exception as exc:
# make the error message more informative
logger.debug(f"Error: {str(exc)}")
sync_callbacks = [StreamingLLMCallbackHandler(**kwargs)]
output = langchain_object(inputs, callbacks=sync_callbacks)
trace_id = kwargs.pop("session_id", None)
callbacks = setup_callbacks(sync=True, trace_id=trace_id, **kwargs)
output = langchain_object(inputs, callbacks=callbacks)
# if langfuse callback is present, run callback.langfuse.flush()
flush_langfuse_callback_if_present(callbacks)
intermediate_steps = (
output.get("intermediate_steps", []) if isinstance(output, dict) else []

View file

@ -11,6 +11,7 @@ from langflow.graph import Graph
from langchain.chains.base import Chain
from langchain.vectorstores.base import VectorStore
from typing import Any, Dict, List, Optional, Tuple, Union
from langchain.schema import Document
def fix_memory_inputs(langchain_object):
@ -142,6 +143,8 @@ def generate_result(langchain_object: Union[Chain, VectorStore], inputs: dict):
logger.debug("Generated result and thought")
elif isinstance(langchain_object, VectorStore):
result = langchain_object.search(**inputs)
elif isinstance(langchain_object, Document):
result = langchain_object.dict()
else:
raise ValueError(
f"Unknown langchain_object type: {type(langchain_object).__name__}"

View file

@ -1,4 +1,5 @@
from collections import defaultdict
import uuid
from fastapi import WebSocket, status
from langflow.api.v1.schemas import ChatMessage, ChatResponse, FileResponse
from langflow.services.base import Service
@ -49,6 +50,7 @@ class ChatManager(Service):
def __init__(self):
self.active_connections: Dict[str, WebSocket] = {}
self.connection_ids: Dict[str, str] = {}
self.chat_history = ChatHistory()
self.cache_manager = service_manager.get(ServiceType.CACHE_MANAGER)
self.cache_manager.attach(self.update)
@ -93,9 +95,13 @@ class ChatManager(Service):
async def connect(self, client_id: str, websocket: WebSocket):
self.active_connections[client_id] = websocket
# This is to avoid having multiple clients with the same id
#! Temporary solution
self.connection_ids[client_id] = f"{client_id}-{uuid.uuid4()}"
def disconnect(self, client_id: str):
self.active_connections.pop(client_id, None)
self.connection_ids.pop(client_id, None)
async def send_message(self, client_id: str, message: str):
websocket = self.active_connections[client_id]
@ -137,6 +143,7 @@ class ChatManager(Service):
langchain_object=langchain_object,
chat_inputs=chat_inputs,
websocket=self.active_connections[client_id],
session_id=self.connection_ids[client_id],
)
except Exception as e:
# Log stack trace

View file

@ -9,6 +9,7 @@ async def process_graph(
langchain_object,
chat_inputs: ChatMessage,
websocket: WebSocket,
session_id: str,
):
langchain_object = try_setting_streaming_options(langchain_object, websocket)
logger.debug("Loaded langchain object")
@ -27,7 +28,10 @@ async def process_graph(
logger.debug("Generating result and thought")
result, intermediate_steps = await get_result_and_steps(
langchain_object, chat_inputs.message, websocket=websocket
langchain_object,
chat_inputs.message,
websocket=websocket,
session_id=session_id,
)
logger.debug("Generated result and intermediate_steps")
return result, intermediate_steps

View file

@ -0,0 +1,44 @@
from langflow.utils.logger import logger
### Temporary implementation
# This will be replaced by a plugin system once merged into 0.5.0
class LangfuseInstance:
_instance = None
@classmethod
def get(cls):
logger.debug("Getting Langfuse instance")
if cls._instance is None:
cls.create()
return cls._instance
@classmethod
def create(cls):
logger.debug("Creating Langfuse instance")
from langflow.settings import settings
from langfuse import Langfuse # type: ignore
if settings.LANGFUSE_PUBLIC_KEY and settings.LANGFUSE_SECRET_KEY:
logger.debug("Langfuse credentials found")
cls._instance = Langfuse(
public_key=settings.LANGFUSE_PUBLIC_KEY,
secret_key=settings.LANGFUSE_SECRET_KEY,
)
else:
logger.debug("No Langfuse credentials found")
cls._instance = None
@classmethod
def update(cls):
logger.debug("Updating Langfuse instance")
cls._instance = None
cls.create()
@classmethod
def teardown(cls):
logger.debug("Tearing down Langfuse instance")
if cls._instance is not None:
cls._instance.flush()
cls._instance = None

View file

@ -41,6 +41,10 @@ class Settings(BaseSettings):
REMOVE_API_KEYS: bool = False
COMPONENTS_PATH: List[str] = []
LANGFUSE_SECRET_KEY: Optional[str] = None
LANGFUSE_PUBLIC_KEY: Optional[str] = None
LANGFUSE_HOST: Optional[str] = None
@validator("CONFIG_DIR", pre=True, allow_reuse=True)
def set_langflow_dir(cls, value):
if not value: