Fix stuff
This commit is contained in:
parent
e301d9d1eb
commit
255497837b
4 changed files with 22 additions and 36 deletions
|
|
@ -2,7 +2,10 @@ import officeParser from 'officeparser';
|
|||
import WordExtractor from 'word-extractor';
|
||||
import * as XLSX from 'xlsx';
|
||||
import PPT from 'ppt';
|
||||
import CFB from 'cfb';
|
||||
import { writeFileSync, unlinkSync } from 'fs';
|
||||
import { tmpdir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { randomUUID } from 'crypto';
|
||||
|
||||
export const GEMINI_NATIVE_TYPES = [
|
||||
'application/pdf',
|
||||
|
|
@ -197,10 +200,20 @@ function extractWithSheetJS(buffer: Buffer): string {
|
|||
}
|
||||
|
||||
function extractWithSheetJSPPT(buffer: Buffer): string {
|
||||
const cfb = CFB.read(buffer, { type: 'buffer' });
|
||||
const pres = PPT.parse_pptcfb(cfb);
|
||||
const textArray = PPT.utils.to_text(pres);
|
||||
return textArray.join('\n\n');
|
||||
// PPT library requires file path due to CFB API compatibility issues
|
||||
const tempPath = join(tmpdir(), `ppt-${randomUUID()}.ppt`);
|
||||
try {
|
||||
writeFileSync(tempPath, buffer);
|
||||
const pres = PPT.readFile(tempPath);
|
||||
const textArray = PPT.utils.to_text(pres);
|
||||
return textArray.join('\n\n');
|
||||
} finally {
|
||||
try {
|
||||
unlinkSync(tempPath);
|
||||
} catch {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function processDocument(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue