Refactor Vector stores creator to help implementing others (#365)

This commit is contained in:
Gabriel Luiz Freitas Almeida 2023-05-26 22:50:26 -03:00 committed by GitHub
commit 6f94fa60bc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 124 additions and 50 deletions

19
poetry.lock generated
View file

@ -1337,23 +1337,6 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4
[package.extras]
grpc = ["grpcio (>=1.44.0,<2.0.0dev)"]
[[package]]
name = "gptcache"
version = "0.1.26"
description = "GPTCache, a powerful caching library that can be used to speed up and lower the cost of chat applications that rely on the LLM service. GPTCache works as a memcache for AIGC applications, similar to how Redis works for traditional applications."
category = "main"
optional = false
python-versions = ">=3.8.1"
files = [
{file = "gptcache-0.1.26-py3-none-any.whl", hash = "sha256:878c7741ffadbb9766211768936880bd9a660cefa1ed5f3d3746eaf9db9e014c"},
{file = "gptcache-0.1.26.tar.gz", hash = "sha256:f960a56fa6e6b0cf7c2151892714e6089ca206b067bee821ead4c1661e478f7c"},
]
[package.dependencies]
cachetools = "*"
numpy = "*"
requests = "*"
[[package]]
name = "greenlet"
version = "2.0.2"
@ -5503,4 +5486,4 @@ deploy = ["langchain-serve"]
[metadata]
lock-version = "2.0"
python-versions = "^3.9"
content-hash = "0ab60d05d829739ee29dd813743d302353765846cefa71b64ade9778039f0c7c"
content-hash = "71a272bd7d914de11a801ecdfeaffe7d7ea63340852341c84eb1889e3e19f987"

View file

@ -50,7 +50,6 @@ pyarrow = "^11.0.0"
websockets = "^11.0.2"
tiktoken = "^0.3.3"
wikipedia = "^1.4.0"
gptcache = "^0.1.23"
langchain-serve = { version = "^0.0.33", optional = true }
[tool.poetry.group.dev.dependencies]

View file

@ -180,7 +180,13 @@ class Node:
elif isinstance(value, list) and all(
isinstance(node, Node) for node in value
):
self.params[key] = [node.build() for node in value] # type: ignore
self.params[key] = []
for node in value:
built = node.build()
if isinstance(built, list):
self.params[key].extend(built)
else:
self.params[key].append(built)
# Get the class from LANGCHAIN_TYPES_DICT
# and instantiate it with the params

View file

@ -60,11 +60,6 @@ embedding_type_to_cls_dict: dict[str, Any] = {
for embedding_name in embeddings.__all__
}
## Vector Stores
vectorstores_type_to_cls_dict: dict[str, Any] = {
vectorstore_name: import_class(f"langchain.vectorstores.{vectorstore_name}")
for vectorstore_name in vectorstores.__all__
}
## Document Loaders
documentloaders_type_to_cls_dict: dict[str, Any] = {

View file

@ -1,43 +1,41 @@
from typing import Dict, List, Optional
from typing import Any, Dict, List, Optional, Type
from langchain import vectorstores
from langflow.interface.base import LangChainTypeCreator
from langflow.interface.custom_lists import vectorstores_type_to_cls_dict
from langflow.interface.importing.utils import import_class
from langflow.settings import settings
from langflow.template.nodes import VectorStoreFrontendNode
from langflow.utils.logger import logger
from langflow.utils.util import build_template_from_class
from langflow.utils.util import build_template_from_method
class VectorstoreCreator(LangChainTypeCreator):
type_name: str = "vectorstores"
@property
def frontend_node_class(self) -> Type[VectorStoreFrontendNode]:
return VectorStoreFrontendNode
@property
def type_to_loader_dict(self) -> Dict:
return vectorstores_type_to_cls_dict
if self.type_dict is None:
self.type_dict: dict[str, Any] = {
vectorstore_name: import_class(
f"langchain.vectorstores.{vectorstore_name}"
)
for vectorstore_name in vectorstores.__all__
}
return self.type_dict
def get_signature(self, name: str) -> Optional[Dict]:
"""Get the signature of an embedding."""
try:
signature = build_template_from_class(name, vectorstores_type_to_cls_dict)
# TODO: Use FrontendendNode class to build the signature
signature["template"] = {
"documents": {
"type": "TextSplitter",
"required": True,
"show": True,
"name": "documents",
"display_name": "Text Splitter",
},
"embedding": {
"type": "Embeddings",
"required": True,
"show": True,
"name": "embedding",
"display_name": "Embedding",
},
}
return signature
return build_template_from_method(
name,
type_to_cls_dict=self.type_to_loader_dict,
method_name="from_texts",
)
except ValueError as exc:
raise ValueError(f"Vector Store {name} not found") from exc
except AttributeError as exc:

View file

@ -628,3 +628,32 @@ class EmbeddingFrontendNode(FrontendNode):
FrontendNode.format_field(field, name)
if field.name == "headers":
field.show = False
class VectorStoreFrontendNode(FrontendNode):
@staticmethod
def format_field(field: TemplateField, name: Optional[str] = None) -> None:
FrontendNode.format_field(field, name)
if field.name == "texts":
field.name = "documents"
field.field_type = "TextSplitter"
field.display_name = "Text Splitter"
field.required = True
field.show = True
field.advanced = False
if "embedding" in field.name:
# for backwards compatibility
field.name = "embedding"
field.required = True
field.show = True
field.advanced = False
field.display_name = "Embedding"
field.field_type = "Embeddings"
elif field.name == "n_dim":
field.show = True
field.advanced = True
elif field.name == "work_dir":
field.show = True
field.advanced = False

View file

@ -160,6 +160,70 @@ def build_template_from_class(
}
def build_template_from_method(
class_name: str,
method_name: str,
type_to_cls_dict: Dict,
add_function: bool = False,
):
classes = [item.__name__ for item in type_to_cls_dict.values()]
# Raise error if class_name is not in classes
if class_name not in classes:
raise ValueError(f"{class_name} not found.")
for _type, v in type_to_cls_dict.items():
if v.__name__ == class_name:
_class = v
# Check if the method exists in this class
if not hasattr(_class, method_name):
raise ValueError(
f"Method {method_name} not found in class {class_name}"
)
# Get the method
method = getattr(_class, method_name)
# Get the docstring
docs = parse(method.__doc__)
# Get the signature of the method
sig = inspect.signature(method)
# Get the parameters of the method
params = sig.parameters
# Initialize the variables dictionary with method parameters
variables = {
"_type": _type,
**{
name: {
"default": param.default
if param.default != param.empty
else None,
"type": param.annotation
if param.annotation != param.empty
else None,
"required": param.default == param.empty,
}
for name, param in params.items()
},
}
base_classes = get_base_classes(_class)
# Adding function to base classes to allow the output to be a function
if add_function:
base_classes.append("function")
return {
"template": format_dict(variables, class_name),
"description": docs.short_description or "",
"base_classes": base_classes,
}
def get_base_classes(cls):
"""Get the base classes of a class.
These are used to determine the output of the nodes.