From b8141dca7dc8dd206e11d90b212ce3fa5118618c Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Thu, 22 Jun 2023 13:45:04 -0300 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=80=20feat(loading.py):=20move=20loadi?= =?UTF-8?q?ng=20into=20initialize=20module=20=F0=9F=93=9D=20docs(initializ?= =?UTF-8?q?e):=20add=20empty=20=5F=5Finit=5F=5F.py=20file=20to=20initializ?= =?UTF-8?q?e=20directory=20The=20initialize=20directory=20was=20added=20to?= =?UTF-8?q?=20the=20project,=20but=20it=20was=20missing=20an=20=5F=5Finit?= =?UTF-8?q?=5F=5F.py=20file.=20This=20file=20is=20necessary=20to=20make=20?= =?UTF-8?q?the=20directory=20a=20package=20and=20allow=20importing=20modul?= =?UTF-8?q?es=20from=20it.=20An=20empty=20=5F=5Finit=5F=5F.py=20file=20was?= =?UTF-8?q?=20added=20to=20the=20directory=20to=20fix=20this=20issue.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../langflow/interface/initialize/__init__.py | 0 .../interface/{ => initialize}/loading.py | 45 ++++++++++++------- 2 files changed, 29 insertions(+), 16 deletions(-) create mode 100644 src/backend/langflow/interface/initialize/__init__.py rename src/backend/langflow/interface/{ => initialize}/loading.py (93%) diff --git a/src/backend/langflow/interface/initialize/__init__.py b/src/backend/langflow/interface/initialize/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/backend/langflow/interface/loading.py b/src/backend/langflow/interface/initialize/loading.py similarity index 93% rename from src/backend/langflow/interface/loading.py rename to src/backend/langflow/interface/initialize/loading.py index 260104323..9567d27c7 100644 --- a/src/backend/langflow/interface/loading.py +++ b/src/backend/langflow/interface/initialize/loading.py @@ -17,6 +17,11 @@ from langchain.base_language import BaseLanguageModel from langchain.callbacks.base import BaseCallbackManager from langchain.chains.loading import load_chain_from_config from langchain.llms.loading import load_llm_from_config +from langflow.interface.initialize.vector_store import ( + initialize_chroma, + initialize_pinecone, + initialize_qdrant, +) from pydantic import ValidationError from langflow.interface.custom_lists import CUSTOM_NODES @@ -153,30 +158,38 @@ def instantiate_embedding(class_object, params): def instantiate_vectorstore(class_object, params): - if len(params.get("documents", [])) == 0: - raise ValueError( - "The source you provided did not load correctly or was empty." - "This may cause an error in the vectorstore." - ) + # could be documents or texts + if class_object.__name__ == "Pinecone": + return initialize_pinecone(class_object, params) # Chroma requires all metadata values to not be None if class_object.__name__ == "Chroma": - persist = params.pop("persist", False) - for doc in params["documents"]: - if doc.metadata is None: - doc.metadata = {} - for key, value in doc.metadata.items(): - if value is None: - doc.metadata[key] = "" - vector_store = class_object.from_documents(**params) - if persist: - vector_store.persist() + return initialize_chroma(class_object, params) + + if class_object.__name__ == "Qdrant": + return initialize_qdrant(class_object, params) else: + if "texts" in params: + params["documents"] = params.pop("texts") + vector_store = class_object.from_documents(**params) return vector_store def instantiate_documentloader(class_object, params): - return class_object(**params).load() + metadata = params.pop("metadata", None) + docs = class_object(**params).load() + if metadata: + if isinstance(metadata, str): + try: + metadata = json.loads(metadata) + except json.JSONDecodeError as exc: + raise ValueError( + "The metadata you provided is not a valid JSON string." + ) from exc + + for doc in docs: + doc.metadata = metadata + return docs def instantiate_textsplitter(class_object, params):