From a03e21018a7fa1a99d2a38e6019c5f1338c4cf43 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Thu, 26 Jun 2025 13:34:35 -0300 Subject: [PATCH] refactor(chunk_documents): move optional imports inside method with error handling (#8750) fix: handle ImportError for optional chunkers and tokenizers in ChunkDoclingDocumentComponent --- .../docling/chunk_docling_document.py | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/backend/base/langflow/components/docling/chunk_docling_document.py b/src/backend/base/langflow/components/docling/chunk_docling_document.py index ae920021e..1ffadc35d 100644 --- a/src/backend/base/langflow/components/docling/chunk_docling_document.py +++ b/src/backend/base/langflow/components/docling/chunk_docling_document.py @@ -3,9 +3,6 @@ import json import tiktoken from docling_core.transforms.chunker import BaseChunker, DocMeta from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker -from docling_core.transforms.chunker.hybrid_chunker import HybridChunker -from docling_core.transforms.chunker.tokenizer.huggingface import HuggingFaceTokenizer -from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer from langflow.base.data.docling_utils import extract_docling_documents from langflow.custom import Component @@ -122,13 +119,38 @@ class ChunkDoclingDocumentComponent(Component): chunker: BaseChunker if self.chunker == "HybridChunker": + try: + from docling_core.transforms.chunker.hybrid_chunker import HybridChunker + except ImportError as e: + msg = ( + "HybridChunker is not installed. Please install it with `uv pip install docling-core[chunking] " + "or `uv pip install transformers`" + ) + raise ImportError(msg) from e max_tokens: int | None = self.max_tokens if self.max_tokens else None if self.provider == "Hugging Face": + try: + from docling_core.transforms.chunker.tokenizer.huggingface import HuggingFaceTokenizer + except ImportError as e: + msg = ( + "HuggingFaceTokenizer is not installed." + " Please install it with `uv pip install docling-core[chunking]`" + ) + raise ImportError(msg) from e tokenizer = HuggingFaceTokenizer.from_pretrained( model_name=self.hf_model_name, max_tokens=max_tokens, ) elif self.provider == "OpenAI": + try: + from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer + except ImportError as e: + msg = ( + "OpenAITokenizer is not installed." + " Please install it with `uv pip install docling-core[chunking]`" + " or `uv pip install transformers`" + ) + raise ImportError(msg) from e if max_tokens is None: max_tokens = 128 * 1024 # context window length required for OpenAI tokenizers tokenizer = OpenAITokenizer(