From 7557c7979c2366f3ba53b8406557e73bf5da72b6 Mon Sep 17 00:00:00 2001 From: ogabrielluiz Date: Mon, 10 Jun 2024 12:02:53 -0300 Subject: [PATCH] refactor: add utility function for converting chroma collection to records --- .../langflow/base/vectorstores/__init__.py | 0 .../base/langflow/base/vectorstores/utils.py | 24 +++++++++++++++++++ .../components/vectorstores/Chroma.py | 5 +++- 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 src/backend/base/langflow/base/vectorstores/__init__.py create mode 100644 src/backend/base/langflow/base/vectorstores/utils.py diff --git a/src/backend/base/langflow/base/vectorstores/__init__.py b/src/backend/base/langflow/base/vectorstores/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/backend/base/langflow/base/vectorstores/utils.py b/src/backend/base/langflow/base/vectorstores/utils.py new file mode 100644 index 000000000..e8d93ff37 --- /dev/null +++ b/src/backend/base/langflow/base/vectorstores/utils.py @@ -0,0 +1,24 @@ +from langflow.schema import Record + + +def chroma_collection_to_records(collection_dict: dict): + """ + Converts a collection of chroma vectors into a list of records. + + Args: + collection_dict (dict): A dictionary containing the collection of chroma vectors. + + Returns: + list: A list of records, where each record represents a document in the collection. + """ + records = [] + for i, doc in enumerate(collection_dict["documents"]): + record_dict = { + "id": collection_dict["ids"][i], + "document": doc, + } + if "metadatas" in collection_dict: + for key, value in collection_dict["metadatas"][i].items(): + record_dict[key] = value + records.append(Record(**record_dict)) + return records diff --git a/src/backend/base/langflow/components/vectorstores/Chroma.py b/src/backend/base/langflow/components/vectorstores/Chroma.py index 5742aad7b..b7cfad7d4 100644 --- a/src/backend/base/langflow/components/vectorstores/Chroma.py +++ b/src/backend/base/langflow/components/vectorstores/Chroma.py @@ -6,7 +6,7 @@ from langchain_chroma import Chroma from langchain_core.embeddings import Embeddings from langchain_core.retrievers import BaseRetriever from langchain_core.vectorstores import VectorStore - +from langflow.base.vectorstores.utils import chroma_collection_to_records from langflow.custom import CustomComponent from langflow.schema import Record @@ -121,4 +121,7 @@ class ChromaComponent(CustomComponent): client=client, embedding_function=embedding, ) + + store = chroma.get() + self.status = chroma_collection_to_records(store) return chroma