Remove "documents" configuration from vector store components
This commit is contained in:
parent
ac8f2b7070
commit
dce9901222
18 changed files with 183 additions and 78 deletions
|
|
@ -1,8 +1,9 @@
|
|||
from typing import List
|
||||
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain_core.documents.base import Document
|
||||
|
||||
from langflow import CustomComponent
|
||||
from langflow.schema.schema import Record
|
||||
|
||||
|
||||
class CharacterTextSplitterComponent(CustomComponent):
|
||||
|
|
@ -11,7 +12,7 @@ class CharacterTextSplitterComponent(CustomComponent):
|
|||
|
||||
def build_config(self):
|
||||
return {
|
||||
"documents": {"display_name": "Documents"},
|
||||
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
|
||||
"chunk_overlap": {"display_name": "Chunk Overlap", "default": 200},
|
||||
"chunk_size": {"display_name": "Chunk Size", "default": 1000},
|
||||
"separator": {"display_name": "Separator", "default": "\n"},
|
||||
|
|
@ -19,17 +20,24 @@ class CharacterTextSplitterComponent(CustomComponent):
|
|||
|
||||
def build(
|
||||
self,
|
||||
documents: List[Document],
|
||||
inputs: List[Record],
|
||||
chunk_overlap: int = 200,
|
||||
chunk_size: int = 1000,
|
||||
separator: str = "\n",
|
||||
) -> List[Document]:
|
||||
) -> List[Record]:
|
||||
# separator may come escaped from the frontend
|
||||
separator = separator.encode().decode("unicode_escape")
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(_input)
|
||||
docs = CharacterTextSplitter(
|
||||
chunk_overlap=chunk_overlap,
|
||||
chunk_size=chunk_size,
|
||||
separator=separator,
|
||||
).split_documents(documents)
|
||||
self.status = docs
|
||||
return docs
|
||||
records = self.to_records(docs)
|
||||
self.status = records
|
||||
return records
|
||||
|
|
|
|||
|
|
@ -1,23 +1,22 @@
|
|||
from typing import Optional
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain.text_splitter import Language
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langflow import CustomComponent
|
||||
from langflow.schema.schema import Record
|
||||
|
||||
|
||||
class LanguageRecursiveTextSplitterComponent(CustomComponent):
|
||||
display_name: str = "Language Recursive Text Splitter"
|
||||
description: str = "Split text into chunks of a specified length based on language."
|
||||
documentation: str = "https://docs.langflow.org/components/text-splitters#languagerecursivetextsplitter"
|
||||
documentation: str = (
|
||||
"https://docs.langflow.org/components/text-splitters#languagerecursivetextsplitter"
|
||||
)
|
||||
|
||||
def build_config(self):
|
||||
options = [x.value for x in Language]
|
||||
return {
|
||||
"documents": {
|
||||
"display_name": "Documents",
|
||||
"info": "The documents to split.",
|
||||
},
|
||||
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
|
||||
"separator_type": {
|
||||
"display_name": "Separator Type",
|
||||
"info": "The type of separator to use.",
|
||||
|
|
@ -47,11 +46,11 @@ class LanguageRecursiveTextSplitterComponent(CustomComponent):
|
|||
|
||||
def build(
|
||||
self,
|
||||
documents: list[Document],
|
||||
inputs: List[Record],
|
||||
chunk_size: Optional[int] = 1000,
|
||||
chunk_overlap: Optional[int] = 200,
|
||||
separator_type: str = "Python",
|
||||
) -> list[Document]:
|
||||
) -> list[Record]:
|
||||
"""
|
||||
Split text into chunks of a specified length.
|
||||
|
||||
|
|
@ -77,6 +76,12 @@ class LanguageRecursiveTextSplitterComponent(CustomComponent):
|
|||
chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap,
|
||||
)
|
||||
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(_input)
|
||||
docs = splitter.split_documents(documents)
|
||||
return docs
|
||||
records = self.to_records(docs)
|
||||
return records
|
||||
|
|
|
|||
|
|
@ -1,22 +1,26 @@
|
|||
from typing import Optional
|
||||
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langflow import CustomComponent
|
||||
from langflow.schema import Record
|
||||
from langflow.utils.util import build_loader_repr_from_documents
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
|
||||
class RecursiveCharacterTextSplitterComponent(CustomComponent):
|
||||
display_name: str = "Recursive Character Text Splitter"
|
||||
description: str = "Split text into chunks of a specified length."
|
||||
documentation: str = "https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter"
|
||||
documentation: str = (
|
||||
"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter"
|
||||
)
|
||||
|
||||
def build_config(self):
|
||||
return {
|
||||
"documents": {
|
||||
"display_name": "Documents",
|
||||
"info": "The documents to split.",
|
||||
"inputs": {
|
||||
"display_name": "Input",
|
||||
"info": "The texts to split.",
|
||||
"input_types": ["Document", "Record"],
|
||||
},
|
||||
"separators": {
|
||||
"display_name": "Separators",
|
||||
|
|
@ -40,11 +44,11 @@ class RecursiveCharacterTextSplitterComponent(CustomComponent):
|
|||
|
||||
def build(
|
||||
self,
|
||||
documents: list[Document],
|
||||
inputs: list[Document],
|
||||
separators: Optional[list[str]] = None,
|
||||
chunk_size: Optional[int] = 1000,
|
||||
chunk_overlap: Optional[int] = 200,
|
||||
) -> list[Document]:
|
||||
) -> list[Record]:
|
||||
"""
|
||||
Split text into chunks of a specified length.
|
||||
|
||||
|
|
@ -75,7 +79,12 @@ class RecursiveCharacterTextSplitterComponent(CustomComponent):
|
|||
chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap,
|
||||
)
|
||||
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(_input)
|
||||
docs = splitter.split_documents(documents)
|
||||
self.repr_value = build_loader_repr_from_documents(docs)
|
||||
return docs
|
||||
return self.to_records(docs)
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ class ChromaComponent(CustomComponent):
|
|||
"collection_name": {"display_name": "Collection Name", "value": "langflow"},
|
||||
"index_directory": {"display_name": "Persist Directory"},
|
||||
"code": {"advanced": True, "display_name": "Code"},
|
||||
"documents": {"display_name": "Documents", "is_list": True},
|
||||
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
|
||||
"embedding": {"display_name": "Embedding"},
|
||||
"chroma_server_cors_allow_origins": {
|
||||
"display_name": "Server CORS Allow Origins",
|
||||
|
|
@ -84,7 +84,8 @@ class ChromaComponent(CustomComponent):
|
|||
|
||||
if chroma_server_host is not None:
|
||||
chroma_settings = chromadb.config.Settings(
|
||||
chroma_server_cors_allow_origins=chroma_server_cors_allow_origins or None,
|
||||
chroma_server_cors_allow_origins=chroma_server_cors_allow_origins
|
||||
or None,
|
||||
chroma_server_host=chroma_server_host,
|
||||
chroma_server_port=chroma_server_port or None,
|
||||
chroma_server_grpc_port=chroma_server_grpc_port or None,
|
||||
|
|
@ -99,7 +100,9 @@ class ChromaComponent(CustomComponent):
|
|||
|
||||
if documents is not None and embedding is not None:
|
||||
if len(documents) == 0:
|
||||
raise ValueError("If documents are provided, there must be at least one document.")
|
||||
raise ValueError(
|
||||
"If documents are provided, there must be at least one document."
|
||||
)
|
||||
chroma = Chroma.from_documents(
|
||||
documents=documents, # type: ignore
|
||||
persist_directory=index_directory,
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@ class ChromaSearchComponent(LCVectorStoreComponent):
|
|||
# "persist": {"display_name": "Persist"},
|
||||
"index_directory": {"display_name": "Index Directory"},
|
||||
"code": {"show": False, "display_name": "Code"},
|
||||
"documents": {"display_name": "Documents", "is_list": True},
|
||||
"embedding": {
|
||||
"display_name": "Embedding",
|
||||
"info": "Embedding model to vectorize inputs (make sure to use same as index)",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ from langchain_community.vectorstores import VectorStore
|
|||
from langchain_community.vectorstores.faiss import FAISS
|
||||
|
||||
from langflow import CustomComponent
|
||||
from langflow.field_typing import Document, Embeddings
|
||||
from langflow.field_typing import Embeddings
|
||||
from langflow.schema.schema import Record
|
||||
|
||||
|
||||
class FAISSComponent(CustomComponent):
|
||||
|
|
@ -15,7 +16,7 @@ class FAISSComponent(CustomComponent):
|
|||
|
||||
def build_config(self):
|
||||
return {
|
||||
"documents": {"display_name": "Documents"},
|
||||
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
|
||||
"embedding": {"display_name": "Embedding"},
|
||||
"folder_path": {
|
||||
"display_name": "Folder Path",
|
||||
|
|
@ -27,10 +28,16 @@ class FAISSComponent(CustomComponent):
|
|||
def build(
|
||||
self,
|
||||
embedding: Embeddings,
|
||||
documents: List[Document],
|
||||
inputs: List[Record],
|
||||
folder_path: str,
|
||||
index_name: str = "langflow_index",
|
||||
) -> Union[VectorStore, FAISS, BaseRetriever]:
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(_input)
|
||||
vector_store = FAISS.from_documents(documents=documents, embedding=embedding)
|
||||
if not folder_path:
|
||||
raise ValueError("Folder path is required to save the FAISS index.")
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ class FAISSSearchComponent(LCVectorStoreComponent):
|
|||
|
||||
def build_config(self):
|
||||
return {
|
||||
"documents": {"display_name": "Documents"},
|
||||
"embedding": {"display_name": "Embedding"},
|
||||
"folder_path": {
|
||||
"display_name": "Folder Path",
|
||||
|
|
|
|||
|
|
@ -3,17 +3,20 @@ from typing import List, Optional
|
|||
from langchain_community.vectorstores.mongodb_atlas import MongoDBAtlasVectorSearch
|
||||
|
||||
from langflow import CustomComponent
|
||||
from langflow.field_typing import Document, Embeddings, NestedDict
|
||||
from langflow.field_typing import Embeddings, NestedDict
|
||||
from langflow.schema.schema import Record
|
||||
|
||||
|
||||
class MongoDBAtlasComponent(CustomComponent):
|
||||
display_name = "MongoDB Atlas"
|
||||
description = "Construct a `MongoDB Atlas Vector Search` vector store from raw documents."
|
||||
description = (
|
||||
"Construct a `MongoDB Atlas Vector Search` vector store from raw documents."
|
||||
)
|
||||
icon = "MongoDB"
|
||||
|
||||
def build_config(self):
|
||||
return {
|
||||
"documents": {"display_name": "Documents"},
|
||||
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
|
||||
"embedding": {"display_name": "Embedding"},
|
||||
"collection_name": {"display_name": "Collection Name"},
|
||||
"db_name": {"display_name": "Database Name"},
|
||||
|
|
@ -25,7 +28,7 @@ class MongoDBAtlasComponent(CustomComponent):
|
|||
def build(
|
||||
self,
|
||||
embedding: Embeddings,
|
||||
documents: List[Document],
|
||||
inputs: List[Record],
|
||||
collection_name: str = "",
|
||||
db_name: str = "",
|
||||
index_name: str = "",
|
||||
|
|
@ -36,12 +39,20 @@ class MongoDBAtlasComponent(CustomComponent):
|
|||
try:
|
||||
from pymongo import MongoClient
|
||||
except ImportError:
|
||||
raise ImportError("Please install pymongo to use MongoDB Atlas Vector Store")
|
||||
raise ImportError(
|
||||
"Please install pymongo to use MongoDB Atlas Vector Store"
|
||||
)
|
||||
try:
|
||||
mongo_client: MongoClient = MongoClient(mongodb_atlas_cluster_uri)
|
||||
collection = mongo_client[db_name][collection_name]
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to connect to MongoDB Atlas: {e}")
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(_input)
|
||||
if documents:
|
||||
vector_store = MongoDBAtlasVectorSearch.from_documents(
|
||||
documents=documents,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@ from langchain_community.vectorstores import VectorStore
|
|||
from langchain_community.vectorstores.pinecone import Pinecone
|
||||
|
||||
from langflow import CustomComponent
|
||||
from langflow.field_typing import Document, Embeddings
|
||||
from langflow.field_typing import Embeddings
|
||||
from langflow.schema.schema import Record
|
||||
|
||||
|
||||
class PineconeComponent(CustomComponent):
|
||||
|
|
@ -17,7 +18,7 @@ class PineconeComponent(CustomComponent):
|
|||
|
||||
def build_config(self):
|
||||
return {
|
||||
"documents": {"display_name": "Documents"},
|
||||
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
|
||||
"embedding": {"display_name": "Embedding"},
|
||||
"index_name": {"display_name": "Index Name"},
|
||||
"namespace": {"display_name": "Namespace"},
|
||||
|
|
@ -44,7 +45,7 @@ class PineconeComponent(CustomComponent):
|
|||
self,
|
||||
embedding: Embeddings,
|
||||
pinecone_env: str,
|
||||
documents: List[Document],
|
||||
inputs: List[Record],
|
||||
text_key: str = "text",
|
||||
pool_threads: int = 4,
|
||||
index_name: Optional[str] = None,
|
||||
|
|
@ -59,6 +60,12 @@ class PineconeComponent(CustomComponent):
|
|||
pinecone.init(api_key=pinecone_api_key, environment=pinecone_env) # type: ignore
|
||||
if not index_name:
|
||||
raise ValueError("Index Name is required.")
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(_input)
|
||||
if documents:
|
||||
return Pinecone.from_documents(
|
||||
documents=documents,
|
||||
|
|
|
|||
|
|
@ -3,8 +3,10 @@ from typing import Optional, Union
|
|||
from langchain.schema import BaseRetriever
|
||||
from langchain_community.vectorstores import VectorStore
|
||||
from langchain_community.vectorstores.qdrant import Qdrant
|
||||
|
||||
from langflow import CustomComponent
|
||||
from langflow.field_typing import Document, Embeddings, NestedDict
|
||||
from langflow.field_typing import Embeddings, NestedDict
|
||||
from langflow.schema.schema import Record
|
||||
|
||||
|
||||
class QdrantComponent(CustomComponent):
|
||||
|
|
@ -14,17 +16,23 @@ class QdrantComponent(CustomComponent):
|
|||
|
||||
def build_config(self):
|
||||
return {
|
||||
"documents": {"display_name": "Documents"},
|
||||
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
|
||||
"embedding": {"display_name": "Embedding"},
|
||||
"api_key": {"display_name": "API Key", "password": True, "advanced": True},
|
||||
"collection_name": {"display_name": "Collection Name"},
|
||||
"content_payload_key": {"display_name": "Content Payload Key", "advanced": True},
|
||||
"content_payload_key": {
|
||||
"display_name": "Content Payload Key",
|
||||
"advanced": True,
|
||||
},
|
||||
"distance_func": {"display_name": "Distance Function", "advanced": True},
|
||||
"grpc_port": {"display_name": "gRPC Port", "advanced": True},
|
||||
"host": {"display_name": "Host", "advanced": True},
|
||||
"https": {"display_name": "HTTPS", "advanced": True},
|
||||
"location": {"display_name": "Location", "advanced": True},
|
||||
"metadata_payload_key": {"display_name": "Metadata Payload Key", "advanced": True},
|
||||
"metadata_payload_key": {
|
||||
"display_name": "Metadata Payload Key",
|
||||
"advanced": True,
|
||||
},
|
||||
"path": {"display_name": "Path", "advanced": True},
|
||||
"port": {"display_name": "Port", "advanced": True},
|
||||
"prefer_grpc": {"display_name": "Prefer gRPC", "advanced": True},
|
||||
|
|
@ -38,7 +46,7 @@ class QdrantComponent(CustomComponent):
|
|||
self,
|
||||
embedding: Embeddings,
|
||||
collection_name: str,
|
||||
documents: Optional[Document] = None,
|
||||
inputs: Optional[Record] = None,
|
||||
api_key: Optional[str] = None,
|
||||
content_payload_key: str = "page_content",
|
||||
distance_func: str = "Cosine",
|
||||
|
|
@ -55,6 +63,12 @@ class QdrantComponent(CustomComponent):
|
|||
timeout: Optional[int] = None,
|
||||
url: Optional[str] = None,
|
||||
) -> Union[VectorStore, Qdrant, BaseRetriever]:
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(_input)
|
||||
if documents is None:
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
|
|
|
|||
|
|
@ -3,9 +3,10 @@ from typing import Optional, Union
|
|||
from langchain.embeddings.base import Embeddings
|
||||
from langchain_community.vectorstores import VectorStore
|
||||
from langchain_community.vectorstores.redis import Redis
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
|
||||
from langflow import CustomComponent
|
||||
from langflow.schema.schema import Record
|
||||
|
||||
|
||||
class RedisComponent(CustomComponent):
|
||||
|
|
@ -28,7 +29,7 @@ class RedisComponent(CustomComponent):
|
|||
return {
|
||||
"index_name": {"display_name": "Index Name", "value": "your_index"},
|
||||
"code": {"show": False, "display_name": "Code"},
|
||||
"documents": {"display_name": "Documents", "is_list": True},
|
||||
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
|
||||
"embedding": {"display_name": "Embedding"},
|
||||
"schema": {"display_name": "Schema", "file_types": [".yaml"]},
|
||||
"redis_server_url": {
|
||||
|
|
@ -44,7 +45,7 @@ class RedisComponent(CustomComponent):
|
|||
redis_server_url: str,
|
||||
redis_index_name: str,
|
||||
schema: Optional[str] = None,
|
||||
documents: Optional[Document] = None,
|
||||
inputs: Optional[Record] = None,
|
||||
) -> Union[VectorStore, BaseRetriever]:
|
||||
"""
|
||||
Builds the Vector Store or BaseRetriever object.
|
||||
|
|
@ -58,9 +59,17 @@ class RedisComponent(CustomComponent):
|
|||
Returns:
|
||||
- VectorStore: The Vector Store object.
|
||||
"""
|
||||
if documents is None:
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(_input)
|
||||
if not documents:
|
||||
if schema is None:
|
||||
raise ValueError("If no documents are provided, a schema must be provided.")
|
||||
raise ValueError(
|
||||
"If no documents are provided, a schema must be provided."
|
||||
)
|
||||
redis_vs = Redis.from_existing_index(
|
||||
embedding=embedding,
|
||||
index_name=redis_index_name,
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ class RedisSearchComponent(RedisComponent, LCVectorStoreComponent):
|
|||
"input_value": {"display_name": "Input"},
|
||||
"index_name": {"display_name": "Index Name", "value": "your_index"},
|
||||
"code": {"show": False, "display_name": "Code"},
|
||||
"documents": {"display_name": "Documents", "is_list": True},
|
||||
|
||||
"embedding": {"display_name": "Embedding"},
|
||||
"schema": {"display_name": "Schema", "file_types": [".yaml"]},
|
||||
"redis_server_url": {
|
||||
|
|
|
|||
|
|
@ -3,10 +3,12 @@ from typing import List, Union
|
|||
from langchain.schema import BaseRetriever
|
||||
from langchain_community.vectorstores import VectorStore
|
||||
from langchain_community.vectorstores.supabase import SupabaseVectorStore
|
||||
from langflow import CustomComponent
|
||||
from langflow.field_typing import Document, Embeddings, NestedDict
|
||||
from supabase.client import Client, create_client
|
||||
|
||||
from langflow import CustomComponent
|
||||
from langflow.field_typing import Embeddings, NestedDict
|
||||
from langflow.schema.schema import Record
|
||||
|
||||
|
||||
class SupabaseComponent(CustomComponent):
|
||||
display_name = "Supabase"
|
||||
|
|
@ -14,7 +16,7 @@ class SupabaseComponent(CustomComponent):
|
|||
|
||||
def build_config(self):
|
||||
return {
|
||||
"documents": {"display_name": "Documents"},
|
||||
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
|
||||
"embedding": {"display_name": "Embedding"},
|
||||
"query_name": {"display_name": "Query Name"},
|
||||
"search_kwargs": {"display_name": "Search Kwargs", "advanced": True},
|
||||
|
|
@ -26,14 +28,22 @@ class SupabaseComponent(CustomComponent):
|
|||
def build(
|
||||
self,
|
||||
embedding: Embeddings,
|
||||
documents: List[Document],
|
||||
inputs: List[Record],
|
||||
query_name: str = "",
|
||||
search_kwargs: NestedDict = {},
|
||||
supabase_service_key: str = "",
|
||||
supabase_url: str = "",
|
||||
table_name: str = "",
|
||||
) -> Union[VectorStore, SupabaseVectorStore, BaseRetriever]:
|
||||
supabase: Client = create_client(supabase_url, supabase_key=supabase_service_key)
|
||||
supabase: Client = create_client(
|
||||
supabase_url, supabase_key=supabase_service_key
|
||||
)
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(_input)
|
||||
return SupabaseVectorStore.from_documents(
|
||||
documents=documents,
|
||||
embedding=embedding,
|
||||
|
|
|
|||
|
|
@ -8,13 +8,16 @@ from langchain_community.vectorstores.vectara import Vectara
|
|||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
from langflow import CustomComponent
|
||||
from langflow.field_typing import BaseRetriever, Document
|
||||
from langflow.field_typing import BaseRetriever
|
||||
from langflow.schema.schema import Record
|
||||
|
||||
|
||||
class VectaraComponent(CustomComponent):
|
||||
display_name: str = "Vectara"
|
||||
description: str = "Implementation of Vector Store using Vectara"
|
||||
documentation = "https://python.langchain.com/docs/integrations/vectorstores/vectara"
|
||||
documentation = (
|
||||
"https://python.langchain.com/docs/integrations/vectorstores/vectara"
|
||||
)
|
||||
beta = True
|
||||
icon = "Vectara"
|
||||
field_config = {
|
||||
|
|
@ -28,8 +31,9 @@ class VectaraComponent(CustomComponent):
|
|||
"display_name": "Vectara API Key",
|
||||
"password": True,
|
||||
},
|
||||
"documents": {
|
||||
"display_name": "Documents",
|
||||
"inputs": {
|
||||
"display_name": "Input",
|
||||
"input_types": ["Document", "Record"],
|
||||
"info": "If provided, will be upserted to corpus (optional)",
|
||||
},
|
||||
"files_url": {
|
||||
|
|
@ -44,11 +48,18 @@ class VectaraComponent(CustomComponent):
|
|||
vectara_corpus_id: str,
|
||||
vectara_api_key: str,
|
||||
files_url: Optional[List[str]] = None,
|
||||
documents: Optional[Document] = None,
|
||||
inputs: Optional[Record] = None,
|
||||
) -> Union[VectorStore, BaseRetriever]:
|
||||
source = "Langflow"
|
||||
|
||||
if documents is not None:
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(_input)
|
||||
|
||||
if documents:
|
||||
return Vectara.from_documents(
|
||||
documents=documents, # type: ignore
|
||||
embedding=FakeEmbeddings(size=768),
|
||||
|
|
|
|||
|
|
@ -33,10 +33,6 @@ class VectaraSearchComponent(VectaraComponent, LCVectorStoreComponent):
|
|||
"display_name": "Vectara API Key",
|
||||
"password": True,
|
||||
},
|
||||
"documents": {
|
||||
"display_name": "Documents",
|
||||
"info": "If provided, will be upserted to corpus (optional)",
|
||||
},
|
||||
"files_url": {
|
||||
"display_name": "Files Url",
|
||||
"info": "Make vectara object using url of files (optional)",
|
||||
|
|
|
|||
|
|
@ -2,16 +2,19 @@ from typing import Optional, Union
|
|||
|
||||
import weaviate # type: ignore
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
from langchain.schema import BaseRetriever
|
||||
from langchain_community.vectorstores import VectorStore, Weaviate
|
||||
|
||||
from langflow import CustomComponent
|
||||
from langflow.schema.schema import Record
|
||||
|
||||
|
||||
class WeaviateVectorStoreComponent(CustomComponent):
|
||||
display_name: str = "Weaviate"
|
||||
description: str = "Implementation of Vector Store using Weaviate"
|
||||
documentation = "https://python.langchain.com/docs/integrations/vectorstores/weaviate"
|
||||
documentation = (
|
||||
"https://python.langchain.com/docs/integrations/vectorstores/weaviate"
|
||||
)
|
||||
beta = True
|
||||
field_config = {
|
||||
"url": {"display_name": "Weaviate URL", "value": "http://localhost:8080"},
|
||||
|
|
@ -30,7 +33,7 @@ class WeaviateVectorStoreComponent(CustomComponent):
|
|||
"advanced": True,
|
||||
"value": "text",
|
||||
},
|
||||
"documents": {"display_name": "Documents", "is_list": True},
|
||||
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
|
||||
"embedding": {"display_name": "Embedding"},
|
||||
"attributes": {
|
||||
"display_name": "Attributes",
|
||||
|
|
@ -55,7 +58,7 @@ class WeaviateVectorStoreComponent(CustomComponent):
|
|||
index_name: Optional[str] = None,
|
||||
text_key: str = "text",
|
||||
embedding: Optional[Embeddings] = None,
|
||||
documents: Optional[Document] = None,
|
||||
inputs: Optional[Record] = None,
|
||||
attributes: Optional[list] = None,
|
||||
) -> Union[VectorStore, BaseRetriever]:
|
||||
if api_key:
|
||||
|
|
@ -78,8 +81,14 @@ class WeaviateVectorStoreComponent(CustomComponent):
|
|||
return pascal_case_word
|
||||
|
||||
index_name = _to_pascal_case(index_name) if index_name else None
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(_input)
|
||||
|
||||
if documents is not None and embedding is not None:
|
||||
if documents and embedding is not None:
|
||||
return Weaviate.from_documents(
|
||||
client=client,
|
||||
index_name=index_name,
|
||||
|
|
|
|||
|
|
@ -39,7 +39,6 @@ class WeaviateSearchVectorStore(WeaviateVectorStoreComponent, LCVectorStoreCompo
|
|||
"advanced": True,
|
||||
"value": "text",
|
||||
},
|
||||
"documents": {"display_name": "Documents", "is_list": True},
|
||||
"embedding": {"display_name": "Embedding"},
|
||||
"attributes": {
|
||||
"display_name": "Attributes",
|
||||
|
|
|
|||
|
|
@ -3,9 +3,10 @@ from typing import Optional, Union
|
|||
from langchain.embeddings.base import Embeddings
|
||||
from langchain_community.vectorstores import VectorStore
|
||||
from langchain_community.vectorstores.pgvector import PGVector
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
|
||||
from langflow import CustomComponent
|
||||
from langflow.schema.schema import Record
|
||||
|
||||
|
||||
class PGVectorComponent(CustomComponent):
|
||||
|
|
@ -15,7 +16,9 @@ class PGVectorComponent(CustomComponent):
|
|||
|
||||
display_name: str = "PGVector"
|
||||
description: str = "Implementation of Vector Store using PostgreSQL"
|
||||
documentation = "https://python.langchain.com/docs/integrations/vectorstores/pgvector"
|
||||
documentation = (
|
||||
"https://python.langchain.com/docs/integrations/vectorstores/pgvector"
|
||||
)
|
||||
|
||||
def build_config(self):
|
||||
"""
|
||||
|
|
@ -26,7 +29,7 @@ class PGVectorComponent(CustomComponent):
|
|||
"""
|
||||
return {
|
||||
"code": {"show": False},
|
||||
"documents": {"display_name": "Documents", "is_list": True},
|
||||
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
|
||||
"embedding": {"display_name": "Embedding"},
|
||||
"pg_server_url": {
|
||||
"display_name": "PostgreSQL Server Connection String",
|
||||
|
|
@ -40,7 +43,7 @@ class PGVectorComponent(CustomComponent):
|
|||
embedding: Embeddings,
|
||||
pg_server_url: str,
|
||||
collection_name: str,
|
||||
documents: Optional[Document] = None,
|
||||
inputs: Optional[Record] = None,
|
||||
) -> Union[VectorStore, BaseRetriever]:
|
||||
"""
|
||||
Builds the Vector Store or BaseRetriever object.
|
||||
|
|
@ -55,6 +58,12 @@ class PGVectorComponent(CustomComponent):
|
|||
- VectorStore: The Vector Store object.
|
||||
"""
|
||||
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(_input)
|
||||
try:
|
||||
if documents is None:
|
||||
vector_store = PGVector.from_existing_index(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue