From 1d234d6c0ea09456dc25db19ff24587e7fb20708 Mon Sep 17 00:00:00 2001 From: Joey Yakimowich-Payne Date: Mon, 19 Jan 2026 15:02:28 -0700 Subject: [PATCH] Make gemini compatible --- server/src/services/documentParser.ts | 32 ++++++++++++++++++++------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/server/src/services/documentParser.ts b/server/src/services/documentParser.ts index 6616866..76d148f 100644 --- a/server/src/services/documentParser.ts +++ b/server/src/services/documentParser.ts @@ -1,29 +1,33 @@ import officeParser from 'officeparser'; -// MIME types that Gemini can handle natively (send as-is) export const GEMINI_NATIVE_TYPES = [ 'application/pdf', 'text/plain', 'text/markdown', 'text/csv', 'text/html', + 'text/css', + 'text/javascript', + 'image/jpeg', + 'image/png', + 'image/gif', + 'image/webp', + 'image/heic', + 'image/heif', +]; + +export const TYPES_TO_CONVERT_TO_PLAIN_TEXT = [ 'text/xml', 'application/xml', 'application/json', - 'text/javascript', 'application/javascript', 'text/x-python', 'text/x-java-source', 'text/x-c', 'text/x-c++', 'text/x-typescript', - 'text/css', 'text/yaml', 'application/x-yaml', - 'image/jpeg', - 'image/png', - 'image/gif', - 'image/webp' ]; // MIME types that officeparser can extract text from @@ -45,7 +49,7 @@ export const OCR_CAPABLE_TYPES = [ 'image/webp' ]; -export const SUPPORTED_TYPES = [...GEMINI_NATIVE_TYPES, ...OFFICEPARSER_TYPES]; +export const SUPPORTED_TYPES = [...GEMINI_NATIVE_TYPES, ...OFFICEPARSER_TYPES, ...TYPES_TO_CONVERT_TO_PLAIN_TEXT]; const EXTENSION_TO_MIME: Record = { '.txt': 'text/plain', @@ -118,6 +122,10 @@ export function needsOfficeParser(mimeType: string): boolean { return OFFICEPARSER_TYPES.includes(mimeType); } +export function needsPlainTextConversion(mimeType: string): boolean { + return TYPES_TO_CONVERT_TO_PLAIN_TEXT.includes(mimeType); +} + async function extractWithOfficeParser(buffer: Buffer, useOcr: boolean = false): Promise { const config = useOcr ? { extractAttachments: true, @@ -177,5 +185,13 @@ export async function processDocument( }; } + if (needsPlainTextConversion(mimeType)) { + return { + type: 'text', + content: buffer.toString('utf-8'), + mimeType: 'text/plain' + }; + } + throw new Error(`No extraction handler for: ${mimeType}`); }