Update text splitter and vector stores

This commit is contained in:
anovazzi1 2024-01-16 15:50:19 -03:00
commit ef4a195e7a
3 changed files with 11 additions and 23 deletions

View file

@ -1,6 +1,6 @@
from langflow import CustomComponent
from langchain.text_splitter import TextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.documents.base import Document
from typing import List
@ -23,10 +23,9 @@ class CharacterTextSplitterComponent(CustomComponent):
chunk_overlap: int = 200,
chunk_size: int = 1000,
separator: str = "\n",
) -> TextSplitter:
return TextSplitter(
documents=documents,
) -> List[Document]:
return CharacterTextSplitter(
chunk_overlap=chunk_overlap,
chunk_size=chunk_size,
separator=separator,
)
).split_documents(documents)

View file

@ -3,7 +3,7 @@ from typing import List, Optional, Union
import chromadb # type: ignore
from langchain.embeddings.base import Embeddings
from langchain.schema import BaseRetriever, Document
from langchain.vectorstores import Chroma
from langchain.vectorstores.chroma import Chroma
from langchain.vectorstores.base import VectorStore
from langflow import CustomComponent

View file

@ -1,11 +1,12 @@
from langflow import CustomComponent
from langchain.vectorstores import FAISS
from typing import Optional, List
from langchain_community.vectorstores.faiss import FAISS
from typing import Optional, List, Union
from langchain.schema import BaseRetriever
from langchain.vectorstores.base import VectorStore
from langflow.field_typing import (
Document,
Embeddings,
NestedDict,
)
class FAISSComponent(CustomComponent):
@ -17,23 +18,11 @@ class FAISSComponent(CustomComponent):
return {
"documents": {"display_name": "Documents"},
"embedding": {"display_name": "Embedding"},
"folder_path": {"display_name": "Local Path"},
"index_name": {"display_name": "Index Name"},
"search_kwargs": {"display_name": "Search Kwargs", "advanced": True},
}
def build(
self,
embedding: Embeddings,
documents: Optional[List[Document]] = None,
folder_path: str = "",
index_name: str = "",
search_kwargs: Optional[NestedDict] = None,
) -> FAISS:
return FAISS(
embedding=embedding,
documents=documents,
folder_path=folder_path,
index_name=index_name,
search_kwargs=search_kwargs or {},
)
) -> Union[VectorStore,FAISS,BaseRetriever]:
return FAISS.from_documents(documents=documents,embedding=embedding)