From fda2f17a9f42e69411793aff5e7c2504999b7b51 Mon Sep 17 00:00:00 2001 From: Cristhian Zanforlin Lousa Date: Wed, 12 Feb 2025 15:32:08 -0300 Subject: [PATCH] fix: Improve path handling and type annotations in FaissVectorStoreComponent (#6081) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📝 (faiss.py): import Path and List modules for better type hinting and file path handling 🐛 (faiss.py): fix issue with building vector store when persist_directory is not provided 🐛 (faiss.py): fix issue with loading FAISS index when index file does not exist 📝 (faiss.py): add type hints for search_documents method parameters and return value 📝 (faiss.py): remove unnecessary logging statements from search_documents method * [autofix.ci] apply automated fixes * 📝 (faiss.py): add 'required' flag to the 'Persist Directory' input field to ensure it is mandatory for the user to provide a value * 🔧 (faiss.py): refactor build_vector_store method to handle persist_directory more efficiently 🔧 (faiss.py): refactor search_documents method to handle persist_directory more efficiently * [autofix.ci] apply automated fixes * 🔧 (faiss.py): refactor get_persist_directory method to return resolved persist directory path or current directory if not set ♻️ (faiss.py): refactor build_vector_store and search_documents methods to use get_persist_directory method for path resolution * ♻️ (faiss.py): refactor resolve_path method to be static and return a string instead of Path object for consistency and clarity --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- .../langflow/components/vectorstores/faiss.py | 60 ++++++++++--------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/src/backend/base/langflow/components/vectorstores/faiss.py b/src/backend/base/langflow/components/vectorstores/faiss.py index a002121aa..fd1b9c360 100644 --- a/src/backend/base/langflow/components/vectorstores/faiss.py +++ b/src/backend/base/langflow/components/vectorstores/faiss.py @@ -1,3 +1,5 @@ +from pathlib import Path + from langchain_community.vectorstores import FAISS from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store @@ -44,16 +46,30 @@ class FaissVectorStoreComponent(LCVectorStoreComponent): ), ] + @staticmethod + def resolve_path(path: str) -> str: + """Resolve the path relative to the Langflow root. + + Args: + path: The path to resolve + Returns: + str: The resolved path as a string + """ + return str(Path(path).resolve()) + + def get_persist_directory(self) -> Path: + """Returns the resolved persist directory path or the current directory if not set.""" + if self.persist_directory: + return Path(self.resolve_path(self.persist_directory)) + return Path() + @check_cached_vector_store def build_vector_store(self) -> FAISS: """Builds the FAISS object.""" - if not self.persist_directory: - msg = "Folder path is required to save the FAISS index." - raise ValueError(msg) - path = self.resolve_path(self.persist_directory) + path = self.get_persist_directory() + path.mkdir(parents=True, exist_ok=True) documents = [] - for _input in self.ingest_data or []: if isinstance(_input, Data): documents.append(_input.to_lc_document()) @@ -62,41 +78,31 @@ class FaissVectorStoreComponent(LCVectorStoreComponent): faiss = FAISS.from_documents(documents=documents, embedding=self.embedding) faiss.save_local(str(path), self.index_name) - return faiss def search_documents(self) -> list[Data]: """Search for documents in the FAISS vector store.""" - if not self.persist_directory: - msg = "Folder path is required to load the FAISS index." - raise ValueError(msg) - path = self.resolve_path(self.persist_directory) + path = self.get_persist_directory() + index_path = path / f"{self.index_name}.faiss" - vector_store = FAISS.load_local( - folder_path=path, - embeddings=self.embedding, - index_name=self.index_name, - allow_dangerous_deserialization=self.allow_dangerous_deserialization, - ) + if not index_path.exists(): + vector_store = self.build_vector_store() + else: + vector_store = FAISS.load_local( + folder_path=str(path), + embeddings=self.embedding, + index_name=self.index_name, + allow_dangerous_deserialization=self.allow_dangerous_deserialization, + ) if not vector_store: msg = "Failed to load the FAISS index." raise ValueError(msg) - self.log(f"Search input: {self.search_query}") - self.log(f"Number of results: {self.number_of_results}") - if self.search_query and isinstance(self.search_query, str) and self.search_query.strip(): docs = vector_store.similarity_search( query=self.search_query, k=self.number_of_results, ) - - self.log(f"Retrieved documents: {len(docs)}") - - data = docs_to_data(docs) - self.log(f"Converted documents to data: {len(data)}") - self.log(data) - return data # Return the search results data - self.log("No search input provided. Skipping search.") + return docs_to_data(docs) return []