Merge remote-tracking branch 'origin/cz/mergeAll' into fix/minor_ui_adjustments

This commit is contained in:
Lucas Oliveira 2024-06-10 16:08:18 -03:00
commit 8b209c2b1b
43 changed files with 173 additions and 180 deletions

View file

@ -110,7 +110,7 @@ async def download_profile_picture(
extension = file_name.split(".")[-1]
config_dir = get_storage_service().settings_service.settings.config_dir
config_path = Path(config_dir)
folder_path = config_path / 'profile_pictures' / folder_name
folder_path = config_path / "profile_pictures" / folder_name
content_type = build_content_type_from_extension(extension)
file_content = await storage_service.get_file(flow_id=folder_path, file_name=file_name)
return StreamingResponse(BytesIO(file_content), media_type=content_type)
@ -140,7 +140,6 @@ async def list_profile_pictures(storage_service: StorageService = Depends(get_st
raise HTTPException(status_code=500, detail=str(e))
@router.get("/list/{flow_id}")
async def list_files(
flow_id: UUID = Depends(get_flow_id), storage_service: StorageService = Depends(get_storage_service)

View file

@ -15,7 +15,7 @@ def chroma_collection_to_records(collection_dict: dict):
for i, doc in enumerate(collection_dict["documents"]):
record_dict = {
"id": collection_dict["ids"][i],
"document": doc,
"text": doc,
}
if "metadatas" in collection_dict:
for key, value in collection_dict["metadatas"][i].items():

View file

@ -4,7 +4,7 @@ from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_core.vectorstores import VectorStore
from langflow.custom import CustomComponent
from langflow.field_typing import BaseLanguageModel
from langflow.field_typing import BaseLanguageModel, Text
from langflow.schema import Record
from langflow.schema.message import Message
@ -14,25 +14,54 @@ class SelfQueryRetrieverComponent(CustomComponent):
description: str = "Retriever that uses a vector store and an LLM to generate the vector store queries."
icon = "LangChain"
def build_config(self):
return {
"query": {
"display_name": "Query",
"input_types": ["Message", "Text"],
"info": "Query to be passed as input.",
},
"vectorstore": {
"display_name": "Vector Store",
"info": "Vector Store to be passed as input.",
},
"attribute_infos": {
"display_name": "Metadata Field Info",
"info": "Metadata Field Info to be passed as input.",
},
"document_content_description": {
"display_name": "Document Content Description",
"info": "Document Content Description to be passed as input.",
},
"llm": {
"display_name": "LLM",
"info": "LLM to be passed as input.",
},
}
def build(
self,
query: Message,
vectorstore: VectorStore,
metadata_field_info: list[AttributeInfo],
document_content_description: str,
attribute_infos: list[Record],
document_content_description: Text,
llm: BaseLanguageModel,
) -> Record:
metadata_field_info = [i[0] for i in metadata_field_info]
metadata_field_infos = [AttributeInfo(**record.data) for record in attribute_infos]
self_query_retriever = SelfQueryRetriever.from_llm(
llm,
vectorstore,
document_content_description,
metadata_field_info,
llm=llm,
vectorstore=vectorstore,
document_contents=document_content_description,
metadata_field_info=metadata_field_infos,
enable_limit=True,
)
input_text = query.text
if isinstance(query, Message):
input_text = query.text
elif isinstance(query, str):
input_text = query
else:
raise ValueError(f"Query type {type(query)} not supported.")
documents = self_query_retriever.invoke(input=input_text)
records = [Record.from_document(document) for document in documents]
self.status = records

View file

@ -1,3 +1,4 @@
from copy import deepcopy
from typing import List, Optional, Union
import chromadb
@ -6,6 +7,7 @@ from langchain_chroma import Chroma
from langchain_core.embeddings import Embeddings
from langchain_core.retrievers import BaseRetriever
from langchain_core.vectorstores import VectorStore
from langflow.base.vectorstores.utils import chroma_collection_to_records
from langflow.custom import CustomComponent
from langflow.schema import Record
@ -48,6 +50,11 @@ class ChromaComponent(CustomComponent):
"display_name": "Server SSL Enabled",
"advanced": True,
},
"allow_duplicates": {
"display_name": "Allow Duplicates",
"advanced": True,
"info": "If false, will not add documents that are already in the Vector Store.",
},
}
def build(
@ -61,6 +68,7 @@ class ChromaComponent(CustomComponent):
chroma_server_host: Optional[str] = None,
chroma_server_http_port: Optional[int] = None,
chroma_server_grpc_port: Optional[int] = None,
allow_duplicates: bool = False,
) -> Union[VectorStore, BaseRetriever]:
"""
Builds the Vector Store or BaseRetriever object.
@ -75,6 +83,7 @@ class ChromaComponent(CustomComponent):
- chroma_server_host (Optional[str]): The host for the Chroma server.
- chroma_server_http_port (Optional[int]): The HTTP port for the Chroma server.
- chroma_server_grpc_port (Optional[int]): The gRPC port for the Chroma server.
- allow_duplicates (bool): Whether to allow duplicates in the Vector Store.
Returns:
- Union[VectorStore, BaseRetriever]: The Vector Store or BaseRetriever object.
@ -93,35 +102,34 @@ class ChromaComponent(CustomComponent):
)
client = chromadb.HttpClient(settings=chroma_settings)
# If documents, then we need to create a Chroma instance using .from_documents
# Check index_directory and expand it if it is a relative path
if index_directory is not None:
index_directory = self.resolve_path(index_directory)
chroma = Chroma(
persist_directory=index_directory,
client=client,
embedding_function=embedding,
collection_name=collection_name,
)
if allow_duplicates:
stored_records = []
else:
stored_records = chroma_collection_to_records(chroma.get())
_stored_documents_without_id = []
for record in deepcopy(stored_records):
del record.id
_stored_documents_without_id.append(record)
documents = []
for _input in inputs or []:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
if _input not in _stored_documents_without_id:
documents.append(_input.to_lc_document())
else:
documents.append(_input)
if documents is not None and embedding is not None:
if len(documents) == 0:
raise ValueError("If documents are provided, there must be at least one document.")
chroma = Chroma.from_documents(
documents=documents, # type: ignore
persist_directory=index_directory,
collection_name=collection_name,
embedding=embedding,
client=client,
)
else:
chroma = Chroma(
persist_directory=index_directory,
client=client,
embedding_function=embedding,
)
raise ValueError("Inputs must be a Record objects.")
store = chroma.get()
self.status = chroma_collection_to_records(store)
if documents and embedding is not None:
chroma.add_documents(documents)
self.status = stored_records
return chroma

View file

@ -20,4 +20,4 @@ def generate_unique_folder_name(folder_name, user_id, session):
# If a folder with the name already exists, append (n) to the name and increment n
folder_name = f"{original_name} ({n})"
n += 1
n += 1