Make gemini compatible

This commit is contained in:
Joey Yakimowich-Payne 2026-01-19 15:02:28 -07:00
commit 1d234d6c0e
No known key found for this signature in database
GPG key ID: DDF6AF5B21B407D4

View file

@ -1,29 +1,33 @@
import officeParser from 'officeparser'; import officeParser from 'officeparser';
// MIME types that Gemini can handle natively (send as-is)
export const GEMINI_NATIVE_TYPES = [ export const GEMINI_NATIVE_TYPES = [
'application/pdf', 'application/pdf',
'text/plain', 'text/plain',
'text/markdown', 'text/markdown',
'text/csv', 'text/csv',
'text/html', 'text/html',
'text/css',
'text/javascript',
'image/jpeg',
'image/png',
'image/gif',
'image/webp',
'image/heic',
'image/heif',
];
export const TYPES_TO_CONVERT_TO_PLAIN_TEXT = [
'text/xml', 'text/xml',
'application/xml', 'application/xml',
'application/json', 'application/json',
'text/javascript',
'application/javascript', 'application/javascript',
'text/x-python', 'text/x-python',
'text/x-java-source', 'text/x-java-source',
'text/x-c', 'text/x-c',
'text/x-c++', 'text/x-c++',
'text/x-typescript', 'text/x-typescript',
'text/css',
'text/yaml', 'text/yaml',
'application/x-yaml', 'application/x-yaml',
'image/jpeg',
'image/png',
'image/gif',
'image/webp'
]; ];
// MIME types that officeparser can extract text from // MIME types that officeparser can extract text from
@ -45,7 +49,7 @@ export const OCR_CAPABLE_TYPES = [
'image/webp' 'image/webp'
]; ];
export const SUPPORTED_TYPES = [...GEMINI_NATIVE_TYPES, ...OFFICEPARSER_TYPES]; export const SUPPORTED_TYPES = [...GEMINI_NATIVE_TYPES, ...OFFICEPARSER_TYPES, ...TYPES_TO_CONVERT_TO_PLAIN_TEXT];
const EXTENSION_TO_MIME: Record<string, string> = { const EXTENSION_TO_MIME: Record<string, string> = {
'.txt': 'text/plain', '.txt': 'text/plain',
@ -118,6 +122,10 @@ export function needsOfficeParser(mimeType: string): boolean {
return OFFICEPARSER_TYPES.includes(mimeType); return OFFICEPARSER_TYPES.includes(mimeType);
} }
export function needsPlainTextConversion(mimeType: string): boolean {
return TYPES_TO_CONVERT_TO_PLAIN_TEXT.includes(mimeType);
}
async function extractWithOfficeParser(buffer: Buffer, useOcr: boolean = false): Promise<string> { async function extractWithOfficeParser(buffer: Buffer, useOcr: boolean = false): Promise<string> {
const config = useOcr ? { const config = useOcr ? {
extractAttachments: true, extractAttachments: true,
@ -177,5 +185,13 @@ export async function processDocument(
}; };
} }
if (needsPlainTextConversion(mimeType)) {
return {
type: 'text',
content: buffer.toString('utf-8'),
mimeType: 'text/plain'
};
}
throw new Error(`No extraction handler for: ${mimeType}`); throw new Error(`No extraction handler for: ${mimeType}`);
} }