feat(website-crawl): add jina reader as additional alternative for website crawling (#8761)

This commit is contained in:
Zhaofeng Miao 2024-09-30 09:57:19 +08:00 committed by GitHub
commit 369e1e6f58
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
38 changed files with 927 additions and 75 deletions

View file

@ -23,7 +23,7 @@ import type {
SegmentsResponse,
createDocumentResponse,
} from '@/models/datasets'
import type { CommonResponse, DataSourceNotionWorkspace } from '@/models/common'
import { type CommonResponse, type DataSourceNotionWorkspace, DataSourceProvider } from '@/models/common'
import type {
ApiKeysListResponse,
CreateApiKeyResponse,
@ -253,7 +253,7 @@ export const createFirecrawlTask: Fetcher<CommonResponse, Record<string, any>> =
return post<CommonResponse>('website/crawl', {
body: {
...body,
provider: 'firecrawl',
provider: DataSourceProvider.fireCrawl,
},
})
}
@ -261,7 +261,26 @@ export const createFirecrawlTask: Fetcher<CommonResponse, Record<string, any>> =
export const checkFirecrawlTaskStatus: Fetcher<CommonResponse, string> = (jobId: string) => {
return get<CommonResponse>(`website/crawl/status/${jobId}`, {
params: {
provider: 'firecrawl',
provider: DataSourceProvider.fireCrawl,
},
}, {
silent: true,
})
}
export const createJinaReaderTask: Fetcher<CommonResponse, Record<string, any>> = (body) => {
return post<CommonResponse>('website/crawl', {
body: {
...body,
provider: DataSourceProvider.jinaReader,
},
})
}
export const checkJinaReaderTaskStatus: Fetcher<CommonResponse, string> = (jobId: string) => {
return get<CommonResponse>(`website/crawl/status/${jobId}`, {
params: {
provider: 'jinareader',
},
}, {
silent: true,