Security audit #1
This commit is contained in:
parent
68bc591150
commit
cd04d34b23
8 changed files with 131 additions and 56 deletions
|
|
@ -5,6 +5,7 @@ import jwksClient from 'jwks-rsa';
|
|||
const OIDC_ISSUER = process.env.OIDC_ISSUER || 'http://localhost:9000/application/o/kaboot/';
|
||||
const OIDC_JWKS_URI = process.env.OIDC_JWKS_URI || 'http://localhost:9000/application/o/kaboot/jwks/';
|
||||
const OIDC_INTERNAL_JWKS_URI = process.env.OIDC_INTERNAL_JWKS_URI || OIDC_JWKS_URI;
|
||||
const OIDC_AUDIENCE = process.env.OIDC_AUDIENCE || process.env.OIDC_CLIENT_ID;
|
||||
|
||||
const client = jwksClient({
|
||||
jwksUri: OIDC_INTERNAL_JWKS_URI,
|
||||
|
|
@ -55,17 +56,22 @@ export function requireAuth(
|
|||
|
||||
const token = authHeader.slice(7);
|
||||
|
||||
const verifyOptions: jwt.VerifyOptions = {
|
||||
issuer: OIDC_ISSUER,
|
||||
algorithms: ['RS256'],
|
||||
};
|
||||
if (OIDC_AUDIENCE) {
|
||||
verifyOptions.audience = OIDC_AUDIENCE;
|
||||
}
|
||||
|
||||
jwt.verify(
|
||||
token,
|
||||
getSigningKey,
|
||||
{
|
||||
issuer: OIDC_ISSUER,
|
||||
algorithms: ['RS256'],
|
||||
},
|
||||
verifyOptions,
|
||||
(err, decoded) => {
|
||||
if (err) {
|
||||
console.error('Token verification failed:', err.message);
|
||||
res.status(401).json({ error: 'Invalid token', details: err.message });
|
||||
res.status(401).json({ error: 'Invalid token' });
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,52 @@
|
|||
import { Router } from 'express';
|
||||
import { Router, Response, NextFunction } from 'express';
|
||||
import multer from 'multer';
|
||||
import rateLimit from 'express-rate-limit';
|
||||
import { processDocument, SUPPORTED_TYPES, normalizeMimeType } from '../services/documentParser.js';
|
||||
import { requireAuth, AuthenticatedRequest } from '../middleware/auth.js';
|
||||
import { getSubscriptionStatus } from '../services/stripe.js';
|
||||
|
||||
const router = Router();
|
||||
|
||||
const isDev = process.env.NODE_ENV !== 'production';
|
||||
const isTest = process.env.NODE_ENV === 'test';
|
||||
|
||||
const freeUploadLimiter = rateLimit({
|
||||
windowMs: 15 * 60 * 1000,
|
||||
max: isDev ? 50 : 10,
|
||||
standardHeaders: true,
|
||||
legacyHeaders: false,
|
||||
message: { error: 'Too many uploads, please try again later.' },
|
||||
skip: () => isTest,
|
||||
keyGenerator: (req: AuthenticatedRequest) => req.user?.sub || req.ip || 'unknown',
|
||||
});
|
||||
|
||||
const paidUploadLimiter = rateLimit({
|
||||
windowMs: 5 * 60 * 1000,
|
||||
max: isDev ? 200 : 100,
|
||||
standardHeaders: true,
|
||||
legacyHeaders: false,
|
||||
message: { error: 'Too many uploads, please try again later.' },
|
||||
skip: () => isTest,
|
||||
keyGenerator: (req: AuthenticatedRequest) => req.user?.sub || req.ip || 'unknown',
|
||||
});
|
||||
|
||||
function tieredUploadLimiter(req: AuthenticatedRequest, res: Response, next: NextFunction) {
|
||||
const groups = req.user?.groups || [];
|
||||
const hasGroupAccess = groups.includes('kaboot-ai-access');
|
||||
const status = req.user ? getSubscriptionStatus(req.user.sub) : null;
|
||||
const isPaidUser = hasGroupAccess || status?.status === 'active';
|
||||
|
||||
if (isPaidUser) {
|
||||
return paidUploadLimiter(req, res, next);
|
||||
}
|
||||
return freeUploadLimiter(req, res, next);
|
||||
}
|
||||
|
||||
let activeUploads = 0;
|
||||
const MAX_CONCURRENT_UPLOADS = 5;
|
||||
|
||||
router.use(requireAuth);
|
||||
router.use(tieredUploadLimiter);
|
||||
|
||||
const storage = multer.memoryStorage();
|
||||
|
||||
|
|
@ -26,8 +66,15 @@ const upload = multer({
|
|||
});
|
||||
|
||||
router.post('/', upload.single('document'), async (req: AuthenticatedRequest, res) => {
|
||||
if (activeUploads >= MAX_CONCURRENT_UPLOADS) {
|
||||
return res.status(503).json({ error: 'Server busy processing uploads. Please try again shortly.' });
|
||||
}
|
||||
|
||||
activeUploads++;
|
||||
|
||||
try {
|
||||
if (!req.file) {
|
||||
activeUploads--;
|
||||
return res.status(400).json({ error: 'No file uploaded' });
|
||||
}
|
||||
|
||||
|
|
@ -65,6 +112,8 @@ router.post('/', upload.single('document'), async (req: AuthenticatedRequest, re
|
|||
res.status(500).json({
|
||||
error: error instanceof Error ? error.message : 'Failed to process document'
|
||||
});
|
||||
} finally {
|
||||
activeUploads--;
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -2,11 +2,13 @@ import officeParser from 'officeparser';
|
|||
import WordExtractor from 'word-extractor';
|
||||
import * as XLSX from 'xlsx';
|
||||
import { execSync } from 'child_process';
|
||||
import { writeFileSync, readFileSync, unlinkSync, existsSync } from 'fs';
|
||||
import { writeFileSync, readFileSync, unlinkSync, existsSync, mkdirSync, rmdirSync } from 'fs';
|
||||
import { tmpdir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { randomUUID } from 'crypto';
|
||||
|
||||
const PROCESSING_TIMEOUT_MS = 30000;
|
||||
|
||||
export const GEMINI_NATIVE_TYPES = [
|
||||
'application/pdf',
|
||||
'text/plain',
|
||||
|
|
@ -207,62 +209,60 @@ const LEGACY_TO_MODERN: Record<string, string> = {
|
|||
};
|
||||
|
||||
async function extractWithLibreOffice(buffer: Buffer, extension: string, useOcr: boolean = false): Promise<string> {
|
||||
// Use LibreOffice to convert legacy Office files to modern format, then parse
|
||||
const tempId = randomUUID();
|
||||
const tempDir = tmpdir();
|
||||
const inputPath = join(tempDir, `input-${tempId}${extension}`);
|
||||
const privateTempDir = join(tmpdir(), `kaboot-${tempId}`);
|
||||
const inputPath = join(privateTempDir, `input${extension}`);
|
||||
const modernExt = LEGACY_TO_MODERN[extension] || 'pdf';
|
||||
const outputPath = join(tempDir, `input-${tempId}.${modernExt}`);
|
||||
const outputPath = join(privateTempDir, `input.${modernExt}`);
|
||||
|
||||
const cleanup = () => {
|
||||
try { unlinkSync(inputPath); } catch { /* ignore */ }
|
||||
try { unlinkSync(outputPath); } catch { /* ignore */ }
|
||||
try { rmdirSync(privateTempDir); } catch { /* ignore */ }
|
||||
};
|
||||
|
||||
try {
|
||||
// Write input file
|
||||
writeFileSync(inputPath, buffer);
|
||||
mkdirSync(privateTempDir, { mode: 0o700 });
|
||||
writeFileSync(inputPath, buffer, { mode: 0o600 });
|
||||
|
||||
// Convert to modern format using LibreOffice
|
||||
try {
|
||||
execSync(
|
||||
`libreoffice --headless --convert-to ${modernExt} --outdir "${tempDir}" "${inputPath}"`,
|
||||
{ timeout: 60000, stdio: 'pipe' }
|
||||
`libreoffice --headless --convert-to ${modernExt} --outdir "${privateTempDir}" "${inputPath}"`,
|
||||
{ timeout: PROCESSING_TIMEOUT_MS, stdio: 'pipe', maxBuffer: 10 * 1024 * 1024 }
|
||||
);
|
||||
} catch (execError) {
|
||||
// LibreOffice not available or conversion failed
|
||||
const error = execError as Error & { code?: string };
|
||||
const error = execError as Error & { code?: string; killed?: boolean };
|
||||
if (error.killed) {
|
||||
throw new Error('Document conversion timed out. Try a smaller file.');
|
||||
}
|
||||
if (error.code === 'ENOENT' || error.message?.includes('not found')) {
|
||||
throw new Error(
|
||||
`Legacy ${extension} files require LibreOffice for text extraction. ` +
|
||||
`Please convert to .${modernExt} format or ensure LibreOffice is installed.`
|
||||
);
|
||||
}
|
||||
throw new Error(`LibreOffice conversion failed: ${error.message}`);
|
||||
throw new Error('Document conversion failed. The file may be corrupted.');
|
||||
}
|
||||
|
||||
// Read the converted file and extract text using officeparser (with OCR if enabled)
|
||||
if (existsSync(outputPath)) {
|
||||
const convertedBuffer = readFileSync(outputPath);
|
||||
const config = useOcr ? {
|
||||
extractAttachments: true,
|
||||
ocr: true,
|
||||
ocrLanguage: 'eng'
|
||||
} : {};
|
||||
const ast = await officeParser.parseOffice(convertedBuffer, config);
|
||||
let text = ast.toText();
|
||||
|
||||
// Include OCR text from attachments if available
|
||||
if (useOcr && ast.attachments) {
|
||||
for (const attachment of ast.attachments) {
|
||||
if (attachment.ocrText) {
|
||||
text += '\n' + attachment.ocrText;
|
||||
}
|
||||
if (!existsSync(outputPath)) {
|
||||
throw new Error('Document conversion produced no output.');
|
||||
}
|
||||
|
||||
const convertedBuffer = readFileSync(outputPath);
|
||||
const config = useOcr ? { extractAttachments: true, ocr: true, ocrLanguage: 'eng' } : {};
|
||||
const ast = await officeParser.parseOffice(convertedBuffer, config);
|
||||
let text = ast.toText();
|
||||
|
||||
if (useOcr && ast.attachments) {
|
||||
for (const attachment of ast.attachments) {
|
||||
if (attachment.ocrText) {
|
||||
text += '\n' + attachment.ocrText;
|
||||
}
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
throw new Error('LibreOffice conversion produced no output');
|
||||
return text;
|
||||
} finally {
|
||||
// Cleanup temp files
|
||||
try { unlinkSync(inputPath); } catch { /* ignore */ }
|
||||
try { unlinkSync(outputPath); } catch { /* ignore */ }
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue