refactor(chunk_documents): move optional imports inside method with error handling (#8750)

fix: handle ImportError for optional chunkers and tokenizers in ChunkDoclingDocumentComponent
This commit is contained in:
Gabriel Luiz Freitas Almeida 2025-06-26 13:34:35 -03:00 committed by GitHub
commit a03e21018a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -3,9 +3,6 @@ import json
import tiktoken
from docling_core.transforms.chunker import BaseChunker, DocMeta
from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker
from docling_core.transforms.chunker.hybrid_chunker import HybridChunker
from docling_core.transforms.chunker.tokenizer.huggingface import HuggingFaceTokenizer
from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer
from langflow.base.data.docling_utils import extract_docling_documents
from langflow.custom import Component
@ -122,13 +119,38 @@ class ChunkDoclingDocumentComponent(Component):
chunker: BaseChunker
if self.chunker == "HybridChunker":
try:
from docling_core.transforms.chunker.hybrid_chunker import HybridChunker
except ImportError as e:
msg = (
"HybridChunker is not installed. Please install it with `uv pip install docling-core[chunking] "
"or `uv pip install transformers`"
)
raise ImportError(msg) from e
max_tokens: int | None = self.max_tokens if self.max_tokens else None
if self.provider == "Hugging Face":
try:
from docling_core.transforms.chunker.tokenizer.huggingface import HuggingFaceTokenizer
except ImportError as e:
msg = (
"HuggingFaceTokenizer is not installed."
" Please install it with `uv pip install docling-core[chunking]`"
)
raise ImportError(msg) from e
tokenizer = HuggingFaceTokenizer.from_pretrained(
model_name=self.hf_model_name,
max_tokens=max_tokens,
)
elif self.provider == "OpenAI":
try:
from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer
except ImportError as e:
msg = (
"OpenAITokenizer is not installed."
" Please install it with `uv pip install docling-core[chunking]`"
" or `uv pip install transformers`"
)
raise ImportError(msg) from e
if max_tokens is None:
max_tokens = 128 * 1024 # context window length required for OpenAI tokenizers
tokenizer = OpenAITokenizer(