Feat/firecrawl data source (#5232)
Co-authored-by: Nicolas <nicolascamara29@gmail.com> Co-authored-by: chenhe <guchenhe@gmail.com> Co-authored-by: takatost <takatost@gmail.com>
This commit is contained in:
parent
918ebe1620
commit
ba5f8afaa8
36 changed files with 1174 additions and 64 deletions
|
|
@ -4,3 +4,4 @@ from enum import Enum
|
|||
class DatasourceType(Enum):
|
||||
FILE = "upload_file"
|
||||
NOTION = "notion_import"
|
||||
WEBSITE = "website_crawl"
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from models.dataset import Document
|
||||
|
|
@ -19,14 +21,33 @@ class NotionInfo(BaseModel):
|
|||
super().__init__(**data)
|
||||
|
||||
|
||||
class WebsiteInfo(BaseModel):
|
||||
"""
|
||||
website import info.
|
||||
"""
|
||||
provider: str
|
||||
job_id: str
|
||||
url: str
|
||||
mode: str
|
||||
tenant_id: str
|
||||
only_main_content: bool = False
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
def __init__(self, **data) -> None:
|
||||
super().__init__(**data)
|
||||
|
||||
|
||||
class ExtractSetting(BaseModel):
|
||||
"""
|
||||
Model class for provider response.
|
||||
"""
|
||||
datasource_type: str
|
||||
upload_file: UploadFile = None
|
||||
notion_info: NotionInfo = None
|
||||
document_model: str = None
|
||||
upload_file: Optional[UploadFile]
|
||||
notion_info: Optional[NotionInfo]
|
||||
website_info: Optional[WebsiteInfo]
|
||||
document_model: Optional[str]
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
def __init__(self, **data) -> None:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue