refactor(chunk_documents): move optional imports inside method with error handling (#8750)
fix: handle ImportError for optional chunkers and tokenizers in ChunkDoclingDocumentComponent
This commit is contained in:
parent
ba192428f9
commit
a03e21018a
1 changed files with 25 additions and 3 deletions
|
|
@ -3,9 +3,6 @@ import json
|
|||
import tiktoken
|
||||
from docling_core.transforms.chunker import BaseChunker, DocMeta
|
||||
from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker
|
||||
from docling_core.transforms.chunker.hybrid_chunker import HybridChunker
|
||||
from docling_core.transforms.chunker.tokenizer.huggingface import HuggingFaceTokenizer
|
||||
from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer
|
||||
|
||||
from langflow.base.data.docling_utils import extract_docling_documents
|
||||
from langflow.custom import Component
|
||||
|
|
@ -122,13 +119,38 @@ class ChunkDoclingDocumentComponent(Component):
|
|||
|
||||
chunker: BaseChunker
|
||||
if self.chunker == "HybridChunker":
|
||||
try:
|
||||
from docling_core.transforms.chunker.hybrid_chunker import HybridChunker
|
||||
except ImportError as e:
|
||||
msg = (
|
||||
"HybridChunker is not installed. Please install it with `uv pip install docling-core[chunking] "
|
||||
"or `uv pip install transformers`"
|
||||
)
|
||||
raise ImportError(msg) from e
|
||||
max_tokens: int | None = self.max_tokens if self.max_tokens else None
|
||||
if self.provider == "Hugging Face":
|
||||
try:
|
||||
from docling_core.transforms.chunker.tokenizer.huggingface import HuggingFaceTokenizer
|
||||
except ImportError as e:
|
||||
msg = (
|
||||
"HuggingFaceTokenizer is not installed."
|
||||
" Please install it with `uv pip install docling-core[chunking]`"
|
||||
)
|
||||
raise ImportError(msg) from e
|
||||
tokenizer = HuggingFaceTokenizer.from_pretrained(
|
||||
model_name=self.hf_model_name,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
elif self.provider == "OpenAI":
|
||||
try:
|
||||
from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer
|
||||
except ImportError as e:
|
||||
msg = (
|
||||
"OpenAITokenizer is not installed."
|
||||
" Please install it with `uv pip install docling-core[chunking]`"
|
||||
" or `uv pip install transformers`"
|
||||
)
|
||||
raise ImportError(msg) from e
|
||||
if max_tokens is None:
|
||||
max_tokens = 128 * 1024 # context window length required for OpenAI tokenizers
|
||||
tokenizer = OpenAITokenizer(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue