Make gemini compatible
This commit is contained in:
parent
b80254418b
commit
1d234d6c0e
1 changed files with 24 additions and 8 deletions
|
|
@ -1,29 +1,33 @@
|
||||||
import officeParser from 'officeparser';
|
import officeParser from 'officeparser';
|
||||||
|
|
||||||
// MIME types that Gemini can handle natively (send as-is)
|
|
||||||
export const GEMINI_NATIVE_TYPES = [
|
export const GEMINI_NATIVE_TYPES = [
|
||||||
'application/pdf',
|
'application/pdf',
|
||||||
'text/plain',
|
'text/plain',
|
||||||
'text/markdown',
|
'text/markdown',
|
||||||
'text/csv',
|
'text/csv',
|
||||||
'text/html',
|
'text/html',
|
||||||
|
'text/css',
|
||||||
|
'text/javascript',
|
||||||
|
'image/jpeg',
|
||||||
|
'image/png',
|
||||||
|
'image/gif',
|
||||||
|
'image/webp',
|
||||||
|
'image/heic',
|
||||||
|
'image/heif',
|
||||||
|
];
|
||||||
|
|
||||||
|
export const TYPES_TO_CONVERT_TO_PLAIN_TEXT = [
|
||||||
'text/xml',
|
'text/xml',
|
||||||
'application/xml',
|
'application/xml',
|
||||||
'application/json',
|
'application/json',
|
||||||
'text/javascript',
|
|
||||||
'application/javascript',
|
'application/javascript',
|
||||||
'text/x-python',
|
'text/x-python',
|
||||||
'text/x-java-source',
|
'text/x-java-source',
|
||||||
'text/x-c',
|
'text/x-c',
|
||||||
'text/x-c++',
|
'text/x-c++',
|
||||||
'text/x-typescript',
|
'text/x-typescript',
|
||||||
'text/css',
|
|
||||||
'text/yaml',
|
'text/yaml',
|
||||||
'application/x-yaml',
|
'application/x-yaml',
|
||||||
'image/jpeg',
|
|
||||||
'image/png',
|
|
||||||
'image/gif',
|
|
||||||
'image/webp'
|
|
||||||
];
|
];
|
||||||
|
|
||||||
// MIME types that officeparser can extract text from
|
// MIME types that officeparser can extract text from
|
||||||
|
|
@ -45,7 +49,7 @@ export const OCR_CAPABLE_TYPES = [
|
||||||
'image/webp'
|
'image/webp'
|
||||||
];
|
];
|
||||||
|
|
||||||
export const SUPPORTED_TYPES = [...GEMINI_NATIVE_TYPES, ...OFFICEPARSER_TYPES];
|
export const SUPPORTED_TYPES = [...GEMINI_NATIVE_TYPES, ...OFFICEPARSER_TYPES, ...TYPES_TO_CONVERT_TO_PLAIN_TEXT];
|
||||||
|
|
||||||
const EXTENSION_TO_MIME: Record<string, string> = {
|
const EXTENSION_TO_MIME: Record<string, string> = {
|
||||||
'.txt': 'text/plain',
|
'.txt': 'text/plain',
|
||||||
|
|
@ -118,6 +122,10 @@ export function needsOfficeParser(mimeType: string): boolean {
|
||||||
return OFFICEPARSER_TYPES.includes(mimeType);
|
return OFFICEPARSER_TYPES.includes(mimeType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function needsPlainTextConversion(mimeType: string): boolean {
|
||||||
|
return TYPES_TO_CONVERT_TO_PLAIN_TEXT.includes(mimeType);
|
||||||
|
}
|
||||||
|
|
||||||
async function extractWithOfficeParser(buffer: Buffer, useOcr: boolean = false): Promise<string> {
|
async function extractWithOfficeParser(buffer: Buffer, useOcr: boolean = false): Promise<string> {
|
||||||
const config = useOcr ? {
|
const config = useOcr ? {
|
||||||
extractAttachments: true,
|
extractAttachments: true,
|
||||||
|
|
@ -177,5 +185,13 @@ export async function processDocument(
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (needsPlainTextConversion(mimeType)) {
|
||||||
|
return {
|
||||||
|
type: 'text',
|
||||||
|
content: buffer.toString('utf-8'),
|
||||||
|
mimeType: 'text/plain'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
throw new Error(`No extraction handler for: ${mimeType}`);
|
throw new Error(`No extraction handler for: ${mimeType}`);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue