feat: add csv output parser (#3483)

* Adds the CSV Output Parser

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
Jordan Frazier 2024-09-27 09:36:04 -07:00 committed by GitHub
commit 8e50f2fdbb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 544 additions and 93 deletions

View file

@ -4,6 +4,7 @@ from abc import abstractmethod
from langchain_core.language_models.llms import LLM
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
from langchain_core.output_parsers import BaseOutputParser
from langflow.base.constants import STREAM_INFO_TEXT
from langflow.custom import Component
@ -19,6 +20,9 @@ class LCModelComponent(Component):
description: str = "Model Description"
trace_type = "llm"
# Optional output parser to pass to the runnable. Subclasses may allow the user to input an `output_parser`
output_parser: BaseOutputParser | None = None
_base_inputs: list[InputTypes] = [
MessageInput(name="input_value", display_name="Input"),
MessageTextInput(
@ -162,6 +166,9 @@ class LCModelComponent(Component):
messages.append(SystemMessage(content=system_message))
inputs: list | dict = messages or {}
try:
if self.output_parser is not None:
runnable = runnable | self.output_parser
runnable = runnable.with_config( # type: ignore
{
"run_name": self.display_name,

View file

@ -220,20 +220,4 @@ def process_prompt_template(
# Update the input variables field in the template
update_input_variables_field(input_variables, frontend_node_template)
# Optional: cleanup fields based on specific conditions
cleanup_prompt_template_fields(input_variables, frontend_node_template)
return input_variables
def cleanup_prompt_template_fields(input_variables, template):
"""Removes unused fields if the conditions are met in the template."""
prompt_fields = [
key for key, field in template.items() if isinstance(field, dict) and field.get("type") == "prompt"
]
if len(prompt_fields) == 1:
for key in list(template.keys()): # Use list to copy keys
field = template.get(key, {})
if isinstance(field, dict) and field.get("type") != "code" and key not in input_variables + prompt_fields:
del template[key]

View file

@ -31,6 +31,7 @@ __all__ = [
"link_extractors",
"memories",
"outputs",
"output_parsers",
"retrievers",
"textsplitters",
"toolkits",

View file

@ -13,6 +13,7 @@ from langflow.inputs import (
SecretStrInput,
StrInput,
)
from langflow.inputs.inputs import HandleInput
class AIMLModelComponent(LCModelComponent):
@ -59,6 +60,13 @@ class AIMLModelComponent(LCModelComponent):
advanced=True,
value=1,
),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def build_model(self) -> LanguageModel: # type: ignore[type-var]

View file

@ -1,6 +1,7 @@
from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.inputs import MessageTextInput, SecretStrInput
from langflow.inputs.inputs import HandleInput
from langflow.io import DictInput, DropdownInput
@ -55,6 +56,13 @@ class AmazonBedrockComponent(LCModelComponent):
MessageTextInput(name="region_name", display_name="Region Name", value="us-east-1"),
DictInput(name="model_kwargs", display_name="Model Kwargs", advanced=True, is_list=True),
MessageTextInput(name="endpoint_url", display_name="Endpoint URL", advanced=True),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def build_model(self) -> LanguageModel: # type: ignore[type-var]

View file

@ -2,6 +2,7 @@ from pydantic.v1 import SecretStr
from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.inputs.inputs import HandleInput
from langflow.io import DropdownInput, FloatInput, IntInput, MessageTextInput, SecretStrInput
@ -49,6 +50,13 @@ class AnthropicModelComponent(LCModelComponent):
info="Prefill text to guide the model's response.",
advanced=True,
),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def build_model(self) -> LanguageModel: # type: ignore[type-var]

View file

@ -3,6 +3,7 @@ from langchain_openai import AzureChatOpenAI
from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.inputs import MessageTextInput
from langflow.inputs.inputs import HandleInput
from langflow.io import DropdownInput, FloatInput, IntInput, SecretStrInput
@ -48,6 +49,13 @@ class AzureChatOpenAIComponent(LCModelComponent):
advanced=True,
info="The maximum number of tokens to generate. Set to 0 for unlimited tokens.",
),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def build_model(self) -> LanguageModel: # type: ignore[type-var]

View file

@ -3,6 +3,7 @@ from pydantic.v1 import SecretStr
from langflow.base.models.model import LCModelComponent
from langflow.field_typing.constants import LanguageModel
from langflow.inputs.inputs import HandleInput
from langflow.io import DropdownInput, FloatInput, MessageTextInput, SecretStrInput
@ -67,6 +68,13 @@ class QianfanChatEndpointComponent(LCModelComponent):
display_name="Endpoint",
info="Endpoint of the Qianfan LLM, required if custom model used.",
),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def build_model(self) -> LanguageModel: # type: ignore[type-var]

View file

@ -3,6 +3,7 @@ from pydantic.v1 import SecretStr
from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.inputs.inputs import HandleInput
from langflow.io import FloatInput, SecretStrInput
@ -22,6 +23,13 @@ class CohereComponent(LCModelComponent):
value="COHERE_API_KEY",
),
FloatInput(name="temperature", display_name="Temperature", value=0.75),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def build_model(self) -> LanguageModel: # type: ignore[type-var]

View file

@ -3,6 +3,7 @@ from pydantic.v1 import SecretStr
from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.inputs import DropdownInput, FloatInput, IntInput, SecretStrInput
from langflow.inputs.inputs import HandleInput
class GoogleGenerativeAIComponent(LCModelComponent):
@ -48,6 +49,13 @@ class GoogleGenerativeAIComponent(LCModelComponent):
info="Decode using top-k sampling: consider the set of top_k most probable tokens. Must be positive.",
advanced=True,
),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def build_model(self) -> LanguageModel: # type: ignore[type-var]

View file

@ -4,6 +4,7 @@ from pydantic.v1 import SecretStr
from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.inputs.inputs import HandleInput
from langflow.io import DropdownInput, FloatInput, IntInput, MessageTextInput, SecretStrInput
@ -51,6 +52,13 @@ class GroqModel(LCModelComponent):
options=[],
refresh_button=True,
),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def get_models(self) -> list[str]:

View file

@ -6,7 +6,8 @@ from tenacity import retry, stop_after_attempt, wait_fixed
# TODO: langchain_community.llms.huggingface_endpoint is depreciated. Need to update to langchain_huggingface, but have dependency with langchain_core 0.3.0
from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.io import DictInput, DropdownInput, FloatInput, IntInput, SecretStrInput, StrInput
from langflow.inputs.inputs import HandleInput
from langflow.io import DictInput, DropdownInput, IntInput, SecretStrInput, StrInput
class HuggingFaceEndpointsComponent(LCModelComponent):
@ -37,41 +38,12 @@ class HuggingFaceEndpointsComponent(LCModelComponent):
SecretStrInput(name="huggingfacehub_api_token", display_name="API Token", password=True),
DictInput(name="model_kwargs", display_name="Model Keyword Arguments", advanced=True),
IntInput(name="retry_attempts", display_name="Retry Attempts", value=1, advanced=True),
IntInput(
name="max_new_tokens", display_name="Max New Tokens", value=512, info="Maximum number of generated tokens"
),
IntInput(
name="top_k",
display_name="Top K",
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
info="The number of highest probability vocabulary tokens to keep for top-k-filtering",
),
FloatInput(
name="top_p",
display_name="Top P",
value=0.95,
advanced=True,
info="If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation",
),
FloatInput(
name="typical_p",
display_name="Typical P",
value=0.95,
advanced=True,
info="Typical Decoding mass.",
),
FloatInput(
name="temperature",
display_name="Temperature",
value=0.8,
advanced=True,
info="The value used to module the logits distribution",
),
FloatInput(
name="repetition_penalty",
display_name="Repetition Penalty",
advanced=True,
info="The parameter for repetition penalty. 1.0 means no penalty.",
input_types=["OutputParser"],
),
]

View file

@ -4,6 +4,7 @@ from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.field_typing.range_spec import RangeSpec
from langflow.inputs import DropdownInput, FloatInput, IntInput, SecretStrInput
from langflow.inputs.inputs import HandleInput
class MaritalkModelComponent(LCModelComponent):
@ -33,6 +34,13 @@ class MaritalkModelComponent(LCModelComponent):
advanced=False,
),
FloatInput(name="temperature", display_name="Temperature", value=0.1, range_spec=RangeSpec(min=0, max=1)),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def build_model(self) -> LanguageModel: # type: ignore[type-var]

View file

@ -3,6 +3,7 @@ from pydantic.v1 import SecretStr
from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.inputs.inputs import HandleInput
from langflow.io import BoolInput, DropdownInput, FloatInput, IntInput, SecretStrInput, StrInput
@ -55,6 +56,13 @@ class MistralAIModelComponent(LCModelComponent):
FloatInput(name="top_p", display_name="Top P", advanced=True, value=1),
IntInput(name="random_seed", display_name="Random Seed", value=1, advanced=True),
BoolInput(name="safe_mode", display_name="Safe Mode", advanced=True),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def build_model(self) -> LanguageModel: # type: ignore[type-var]

View file

@ -3,6 +3,7 @@ from typing import Any
from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.inputs import DropdownInput, FloatInput, IntInput, SecretStrInput, StrInput
from langflow.inputs.inputs import HandleInput
from langflow.schema.dotdict import dotdict
@ -47,6 +48,13 @@ class NVIDIAModelComponent(LCModelComponent):
advanced=True,
value=1,
),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None):

View file

@ -6,6 +6,7 @@ from langchain_community.chat_models import ChatOllama
from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.inputs.inputs import HandleInput
from langflow.io import BoolInput, DictInput, DropdownInput, FloatInput, IntInput, StrInput
@ -204,6 +205,13 @@ class ChatOllamaComponent(LCModelComponent):
info="Template to use for generating text.",
advanced=True,
),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def build_model(self) -> LanguageModel: # type: ignore[type-var]

View file

@ -17,6 +17,7 @@ from langflow.inputs import (
SecretStrInput,
StrInput,
)
from langflow.inputs.inputs import HandleInput
class OpenAIModelComponent(LCModelComponent):
@ -75,6 +76,13 @@ class OpenAIModelComponent(LCModelComponent):
advanced=True,
value=1,
),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def build_model(self) -> LanguageModel: # type: ignore[type-var]

View file

@ -3,6 +3,7 @@ from pydantic.v1 import SecretStr
from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.inputs.inputs import HandleInput
from langflow.io import DropdownInput, FloatInput, IntInput, SecretStrInput
@ -59,6 +60,13 @@ class PerplexityComponent(LCModelComponent):
info="Decode using top-k sampling: consider the set of top_k most probable tokens. Must be positive.",
advanced=True,
),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def build_model(self) -> LanguageModel: # type: ignore[type-var]

View file

@ -3,6 +3,7 @@ from typing import cast
from langflow.base.models.model import LCModelComponent
from langflow.field_typing import LanguageModel
from langflow.inputs import MessageTextInput
from langflow.inputs.inputs import HandleInput
from langflow.io import BoolInput, FileInput, FloatInput, IntInput, StrInput
@ -28,6 +29,13 @@ class ChatVertexAIComponent(LCModelComponent):
IntInput(name="top_k", display_name="Top K", advanced=True),
FloatInput(name="top_p", display_name="Top P", value=0.95, advanced=True),
BoolInput(name="verbose", display_name="Verbose", value=False, advanced=True),
HandleInput(
name="output_parser",
display_name="Output Parser",
info="The parser to use to parse the output of the model",
advanced=True,
input_types=["OutputParser"],
),
]
def build_model(self) -> LanguageModel:

View file

@ -0,0 +1,31 @@
from langchain_core.output_parsers import CommaSeparatedListOutputParser
from langflow.custom.custom_component.component import Component
from langflow.field_typing.constants import OutputParser
from langflow.io import Output
from langflow.schema.message import Message
class CSVOutputParserComponent(Component):
display_name = "CSV Output Parser"
description = "Use with an LLM to return a comma separated list."
icon = "type"
name = "CSVOutputParser"
inputs = [] # no inputs necessary
outputs = [
Output(
display_name="Format Instructions",
name="format_instructions",
info="Pass to a prompt template to include formatting instructions for LLM responses.",
method="format_instructions",
),
Output(display_name="Output Parser", name="output_parser", method="build_parser"),
]
def build_parser(self) -> OutputParser:
return CommaSeparatedListOutputParser()
def format_instructions(self) -> Message:
return Message(text=CommaSeparatedListOutputParser().get_format_instructions())

View file

@ -0,0 +1,3 @@
from .CSVOutputParser import CSVOutputParserComponent
__all__ = ["CSVOutputParserComponent"]

View file

@ -11,7 +11,7 @@ from langchain_core.embeddings import Embeddings
from langchain_core.language_models import BaseLanguageModel, BaseLLM
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.memory import BaseMemory
from langchain_core.output_parsers import BaseOutputParser
from langchain_core.output_parsers import BaseLLMOutputParser, BaseOutputParser
from langchain_core.prompts import BasePromptTemplate, ChatPromptTemplate, PromptTemplate
from langchain_core.retrievers import BaseRetriever
from langchain_core.tools import BaseTool, Tool
@ -29,6 +29,11 @@ Retriever = TypeVar(
BaseRetriever,
VectorStoreRetriever,
)
OutputParser = TypeVar(
"OutputParser",
BaseOutputParser,
BaseLLMOutputParser,
)
class Object:

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -235,7 +235,8 @@ class Message(Data):
if contents:
message = HumanMessage(content=[{"type": "text", "text": text}] + contents)
prompt_template = ChatPromptTemplate(messages=[message]) # type: ignore
prompt_template = ChatPromptTemplate.from_messages([message]) # type: ignore
instance.prompt = jsonable_encoder(prompt_template.to_json())
instance.messages = instance.prompt.get("kwargs", {}).get("messages", [])
return instance

View file

@ -56,9 +56,11 @@ class Template(BaseModel):
_input = Input(**value)
data["fields"].append(_input)
# Handles components with no inputs
# Necessary for components with no inputs(?)
if "fields" not in data:
data["fields"] = []
return cls(**data)
# For backwards compatibility

View file

@ -0,0 +1,40 @@
import os
import pytest
from langflow.components.models.OpenAIModel import OpenAIModelComponent
from langflow.components.output_parsers.CSVOutputParser import CSVOutputParserComponent
from langflow.components.prompts.Prompt import PromptComponent
from tests.integration.utils import ComponentInputHandle, run_single_component
@pytest.mark.asyncio
@pytest.mark.api_key_required
async def test_csv_output_parser_openai():
format_instructions = ComponentInputHandle(
clazz=CSVOutputParserComponent,
inputs={},
output_name="format_instructions",
)
output_parser_handle = ComponentInputHandle(
clazz=CSVOutputParserComponent,
inputs={},
output_name="output_parser",
)
prompt_handler = ComponentInputHandle(
clazz=PromptComponent,
inputs={
"template": "List the first five positive integers.\n\n{format_instructions}",
"format_instructions": format_instructions,
},
output_name="prompt",
)
outputs = await run_single_component(
OpenAIModelComponent,
inputs={
"api_key": os.environ["OPENAI_API_KEY"],
"output_parser": output_parser_handle,
"input_value": prompt_handler,
},
)
assert outputs["text_output"] == "1, 2, 3, 4, 5"