From 1d234d6c0ea09456dc25db19ff24587e7fb20708 Mon Sep 17 00:00:00 2001
From: Joey Yakimowich-Payne <jyapayne@pm.me>
Date: Mon, 19 Jan 2026 15:02:28 -0700
Subject: [PATCH] Make gemini compatible

---
 server/src/services/documentParser.ts | 32 ++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 8 deletions(-)
diff --git a/server/src/services/documentParser.ts b/server/src/services/documentParser.ts
index 6616866..76d148f 100644
--- a/server/src/services/documentParser.ts
+++ b/server/src/services/documentParser.ts
@@ -1,29 +1,33 @@
 import officeParser from 'officeparser';
 
-// MIME types that Gemini can handle natively (send as-is)
 export const GEMINI_NATIVE_TYPES = [
   'application/pdf',
   'text/plain',
   'text/markdown',
   'text/csv',
   'text/html',
+  'text/css',
+  'text/javascript',
+  'image/jpeg',
+  'image/png',
+  'image/gif',
+  'image/webp',
+  'image/heic',
+  'image/heif',
+];
+
+export const TYPES_TO_CONVERT_TO_PLAIN_TEXT = [
   'text/xml',
   'application/xml',
   'application/json',
-  'text/javascript',
   'application/javascript',
   'text/x-python',
   'text/x-java-source',
   'text/x-c',
   'text/x-c++',
   'text/x-typescript',
-  'text/css',
   'text/yaml',
   'application/x-yaml',
-  'image/jpeg',
-  'image/png',
-  'image/gif',
-  'image/webp'
 ];
 
 // MIME types that officeparser can extract text from
@@ -45,7 +49,7 @@ export const OCR_CAPABLE_TYPES = [
   'image/webp'
 ];
 
-export const SUPPORTED_TYPES = [...GEMINI_NATIVE_TYPES, ...OFFICEPARSER_TYPES];
+export const SUPPORTED_TYPES = [...GEMINI_NATIVE_TYPES, ...OFFICEPARSER_TYPES, ...TYPES_TO_CONVERT_TO_PLAIN_TEXT];
 
 const EXTENSION_TO_MIME: Record<string, string> = {
   '.txt': 'text/plain',
@@ -118,6 +122,10 @@ export function needsOfficeParser(mimeType: string): boolean {
   return OFFICEPARSER_TYPES.includes(mimeType);
 }
 
+export function needsPlainTextConversion(mimeType: string): boolean {
+  return TYPES_TO_CONVERT_TO_PLAIN_TEXT.includes(mimeType);
+}
+
 async function extractWithOfficeParser(buffer: Buffer, useOcr: boolean = false): Promise<string> {
   const config = useOcr ? {
     extractAttachments: true,
@@ -177,5 +185,13 @@ export async function processDocument(
     };
   }
 
+  if (needsPlainTextConversion(mimeType)) {
+    return {
+      type: 'text',
+      content: buffer.toString('utf-8'),
+      mimeType: 'text/plain'
+    };
+  }
+
   throw new Error(`No extraction handler for: ${mimeType}`);
 }