feat: mypy for all type check (#10921)
This commit is contained in:
parent
c91e8b1737
commit
56e15d09a9
584 changed files with 3975 additions and 2826 deletions
|
|
@ -26,8 +26,8 @@ class EnhanceRecursiveCharacterTextSplitter(RecursiveCharacterTextSplitter):
|
|||
def from_encoder(
|
||||
cls: type[TS],
|
||||
embedding_model_instance: Optional[ModelInstance],
|
||||
allowed_special: Union[Literal[all], Set[str]] = set(),
|
||||
disallowed_special: Union[Literal[all], Collection[str]] = "all",
|
||||
allowed_special: Union[Literal["all"], Set[str]] = set(), # noqa: UP037
|
||||
disallowed_special: Union[Literal["all"], Collection[str]] = "all", # noqa: UP037
|
||||
**kwargs: Any,
|
||||
):
|
||||
def _token_encoder(text: str) -> int:
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
|
|||
texts, metadatas = [], []
|
||||
for doc in documents:
|
||||
texts.append(doc.page_content)
|
||||
metadatas.append(doc.metadata)
|
||||
metadatas.append(doc.metadata or {})
|
||||
return self.create_documents(texts, metadatas=metadatas)
|
||||
|
||||
def _join_docs(self, docs: list[str], separator: str) -> Optional[str]:
|
||||
|
|
@ -143,7 +143,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
|
|||
def from_huggingface_tokenizer(cls, tokenizer: Any, **kwargs: Any) -> TextSplitter:
|
||||
"""Text splitter that uses HuggingFace tokenizer to count length."""
|
||||
try:
|
||||
from transformers import PreTrainedTokenizerBase
|
||||
from transformers import PreTrainedTokenizerBase # type: ignore
|
||||
|
||||
if not isinstance(tokenizer, PreTrainedTokenizerBase):
|
||||
raise ValueError("Tokenizer received was not an instance of PreTrainedTokenizerBase")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue