Make gemini compatible

This commit is contained in:
Joey Yakimowich-Payne 2026-01-19 15:02:28 -07:00
commit 1d234d6c0e
No known key found for this signature in database
GPG key ID: DDF6AF5B21B407D4

View file

@ -1,29 +1,33 @@
import officeParser from 'officeparser';
// MIME types that Gemini can handle natively (send as-is)
export const GEMINI_NATIVE_TYPES = [
'application/pdf',
'text/plain',
'text/markdown',
'text/csv',
'text/html',
'text/css',
'text/javascript',
'image/jpeg',
'image/png',
'image/gif',
'image/webp',
'image/heic',
'image/heif',
];
export const TYPES_TO_CONVERT_TO_PLAIN_TEXT = [
'text/xml',
'application/xml',
'application/json',
'text/javascript',
'application/javascript',
'text/x-python',
'text/x-java-source',
'text/x-c',
'text/x-c++',
'text/x-typescript',
'text/css',
'text/yaml',
'application/x-yaml',
'image/jpeg',
'image/png',
'image/gif',
'image/webp'
];
// MIME types that officeparser can extract text from
@ -45,7 +49,7 @@ export const OCR_CAPABLE_TYPES = [
'image/webp'
];
export const SUPPORTED_TYPES = [...GEMINI_NATIVE_TYPES, ...OFFICEPARSER_TYPES];
export const SUPPORTED_TYPES = [...GEMINI_NATIVE_TYPES, ...OFFICEPARSER_TYPES, ...TYPES_TO_CONVERT_TO_PLAIN_TEXT];
const EXTENSION_TO_MIME: Record<string, string> = {
'.txt': 'text/plain',
@ -118,6 +122,10 @@ export function needsOfficeParser(mimeType: string): boolean {
return OFFICEPARSER_TYPES.includes(mimeType);
}
export function needsPlainTextConversion(mimeType: string): boolean {
return TYPES_TO_CONVERT_TO_PLAIN_TEXT.includes(mimeType);
}
async function extractWithOfficeParser(buffer: Buffer, useOcr: boolean = false): Promise<string> {
const config = useOcr ? {
extractAttachments: true,
@ -177,5 +185,13 @@ export async function processDocument(
};
}
if (needsPlainTextConversion(mimeType)) {
return {
type: 'text',
content: buffer.toString('utf-8'),
mimeType: 'text/plain'
};
}
throw new Error(`No extraction handler for: ${mimeType}`);
}