Sandboxing

This commit is contained in:
Joey Yakimowich-Payne 2026-02-03 08:24:48 -07:00
commit 70df689701
7 changed files with 324 additions and 34 deletions

View file

@ -85,6 +85,29 @@ services:
networks: networks:
- kaboot-network - kaboot-network
kaboot-sandbox:
build:
context: ./server/sandbox
dockerfile: Dockerfile
restart: unless-stopped
read_only: true
security_opt:
- no-new-privileges:true
cap_drop:
- ALL
environment:
MAX_CONCURRENT: "2"
tmpfs:
- /tmp:size=200M,mode=1777
deploy:
replicas: ${SANDBOX_REPLICAS:-4}
resources:
limits:
cpus: '1'
memory: 512M
networks:
- kaboot-network
kaboot-backend: kaboot-backend:
build: build:
context: ./server context: ./server
@ -105,10 +128,14 @@ services:
STRIPE_WEBHOOK_SECRET: ${STRIPE_WEBHOOK_SECRET:-} STRIPE_WEBHOOK_SECRET: ${STRIPE_WEBHOOK_SECRET:-}
STRIPE_PRICE_ID_MONTHLY: ${STRIPE_PRICE_ID_MONTHLY:-} STRIPE_PRICE_ID_MONTHLY: ${STRIPE_PRICE_ID_MONTHLY:-}
STRIPE_PRICE_ID_YEARLY: ${STRIPE_PRICE_ID_YEARLY:-} STRIPE_PRICE_ID_YEARLY: ${STRIPE_PRICE_ID_YEARLY:-}
SANDBOX_URL: http://kaboot-sandbox:3002
USE_SANDBOX: "true"
volumes: volumes:
- kaboot-data:/data - kaboot-data:/data
tmpfs: tmpfs:
- /tmp:size=100M - /tmp:size=100M
depends_on:
- kaboot-sandbox
networks: networks:
- kaboot-network - kaboot-network

View file

@ -97,6 +97,33 @@ services:
networks: networks:
- kaboot-network - kaboot-network
# ═══════════════════════════════════════════════════════════════════════════
# KABOOT - Document Conversion Sandbox (isolated LibreOffice)
# ═══════════════════════════════════════════════════════════════════════════
kaboot-sandbox:
build:
context: ./server/sandbox
dockerfile: Dockerfile
restart: unless-stopped
read_only: true
security_opt:
- no-new-privileges:true
cap_drop:
- ALL
environment:
MAX_CONCURRENT: "2"
tmpfs:
- /tmp:size=200M,mode=1777
deploy:
replicas: ${SANDBOX_REPLICAS:-2}
resources:
limits:
cpus: '1'
memory: 512M
networks:
- kaboot-network
# ═══════════════════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════════════════
# KABOOT - Application Backend # KABOOT - Application Backend
# ═══════════════════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════════════════
@ -118,6 +145,8 @@ services:
CORS_ORIGIN: http://localhost:${KABOOT_FRONTEND_PORT:-5173},http://${KABOOT_HOST:-localhost}:${KABOOT_FRONTEND_PORT:-5173} CORS_ORIGIN: http://localhost:${KABOOT_FRONTEND_PORT:-5173},http://${KABOOT_HOST:-localhost}:${KABOOT_FRONTEND_PORT:-5173}
LOG_REQUESTS: ${LOG_REQUESTS:-true} LOG_REQUESTS: ${LOG_REQUESTS:-true}
GEMINI_API_KEY: ${GEMINI_API_KEY:-} GEMINI_API_KEY: ${GEMINI_API_KEY:-}
SANDBOX_URL: http://kaboot-sandbox:3002
USE_SANDBOX: "true"
volumes: volumes:
- ./data:/data - ./data:/data
tmpfs: tmpfs:
@ -126,6 +155,7 @@ services:
- "${KABOOT_BACKEND_PORT:-3001}:3001" - "${KABOOT_BACKEND_PORT:-3001}:3001"
depends_on: depends_on:
- authentik-server - authentik-server
- kaboot-sandbox
networks: networks:
- kaboot-network - kaboot-network

View file

@ -2,8 +2,7 @@ FROM node:22-alpine
WORKDIR /app WORKDIR /app
# Build dependencies + LibreOffice for legacy Office format conversion RUN apk add --no-cache python3 make g++ curl
RUN apk add --no-cache python3 make g++ libreoffice curl
COPY package*.json ./ COPY package*.json ./
RUN npm install RUN npm install

23
server/sandbox/Dockerfile Normal file
View file

@ -0,0 +1,23 @@
FROM node:22-alpine
RUN apk add --no-cache libreoffice curl \
&& addgroup -g 1000 sandbox \
&& adduser -u 1000 -G sandbox -s /bin/sh -D sandbox \
&& mkdir -p /app /tmp/convert \
&& chown -R sandbox:sandbox /app /tmp/convert
WORKDIR /app
COPY --chown=sandbox:sandbox package.json ./
RUN npm install --omit=dev
COPY --chown=sandbox:sandbox convert.js ./
USER sandbox
EXPOSE 3002
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
CMD curl -f http://localhost:3002/health || exit 1
CMD ["node", "convert.js"]

168
server/sandbox/convert.js Normal file
View file

@ -0,0 +1,168 @@
import { createServer } from 'http';
import { exec } from 'child_process';
import { writeFile, readFile, unlink, mkdir, readdir } from 'fs/promises';
import { existsSync } from 'fs';
import { join } from 'path';
import { randomUUID } from 'crypto';
const PORT = process.env.PORT || 3002;
const TEMP_DIR = '/tmp/convert';
const TIMEOUT_MS = 30000;
const MAX_FILE_SIZE = 50 * 1024 * 1024;
const MAX_CONCURRENT = parseInt(process.env.MAX_CONCURRENT || '3', 10);
const EXTENSION_TO_OUTPUT = {
'.ppt': 'pptx',
'.doc': 'docx',
'.xls': 'xlsx',
};
let activeJobs = 0;
let queuedJobs = 0;
async function cleanup(dir) {
if (!existsSync(dir)) return;
try {
for (const file of await readdir(dir)) {
await unlink(join(dir, file)).catch(() => {});
}
await unlink(dir).catch(() => {});
} catch {}
}
function runLibreOffice(inputPath, outputDir, outputExt) {
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
reject(new Error('timeout'));
}, TIMEOUT_MS);
exec(
`libreoffice --headless --convert-to ${outputExt} --outdir "${outputDir}" "${inputPath}"`,
{ maxBuffer: 10 * 1024 * 1024 },
(error) => {
clearTimeout(timer);
if (error) reject(error);
else resolve();
}
);
});
}
async function processConversion(buffer, ext) {
const jobId = randomUUID();
const jobDir = join(TEMP_DIR, jobId);
const inputPath = join(jobDir, `input${ext}`);
const outputExt = EXTENSION_TO_OUTPUT[ext];
const outputPath = join(jobDir, `input.${outputExt}`);
try {
await mkdir(jobDir, { mode: 0o700 });
await writeFile(inputPath, buffer, { mode: 0o600 });
await runLibreOffice(inputPath, jobDir, outputExt);
if (!existsSync(outputPath)) {
throw new Error('no_output');
}
return await readFile(outputPath);
} finally {
await cleanup(jobDir);
}
}
async function handleConvert(req, res) {
if (activeJobs >= MAX_CONCURRENT) {
queuedJobs++;
if (queuedJobs > MAX_CONCURRENT * 2) {
queuedJobs--;
res.writeHead(503, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: 'Server busy, try again later' }));
return;
}
await new Promise((resolve) => {
const check = setInterval(() => {
if (activeJobs < MAX_CONCURRENT) {
clearInterval(check);
queuedJobs--;
resolve();
}
}, 100);
});
}
activeJobs++;
const chunks = [];
let size = 0;
req.on('data', (chunk) => {
size += chunk.length;
if (size <= MAX_FILE_SIZE) {
chunks.push(chunk);
}
});
req.on('end', async () => {
if (size > MAX_FILE_SIZE) {
activeJobs--;
res.writeHead(413, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: 'File too large' }));
return;
}
const url = new URL(req.url, `http://localhost:${PORT}`);
const ext = url.searchParams.get('ext');
if (!ext || !EXTENSION_TO_OUTPUT[ext]) {
activeJobs--;
res.writeHead(400, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: 'Invalid or missing extension parameter' }));
return;
}
try {
const converted = await processConversion(Buffer.concat(chunks), ext);
res.writeHead(200, {
'Content-Type': 'application/octet-stream',
'Content-Length': converted.length,
});
res.end(converted);
} catch (err) {
const message = err.message === 'timeout'
? 'Conversion timed out'
: err.message === 'no_output'
? 'Conversion produced no output'
: 'Conversion failed';
res.writeHead(500, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: message }));
} finally {
activeJobs--;
}
});
}
const server = createServer((req, res) => {
if (req.method === 'GET' && req.url === '/health') {
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ status: 'ok', active: activeJobs, queued: queuedJobs }));
return;
}
if (req.method === 'POST' && req.url?.startsWith('/convert')) {
handleConvert(req, res);
return;
}
res.writeHead(404, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: 'Not found' }));
});
if (!existsSync(TEMP_DIR)) {
await mkdir(TEMP_DIR, { recursive: true, mode: 0o700 });
}
server.listen(PORT, '0.0.0.0', () => {
console.log(`Sandbox running on port ${PORT} (max ${MAX_CONCURRENT} concurrent)`);
});

View file

@ -0,0 +1,9 @@
{
"name": "kaboot-sandbox",
"version": "1.0.0",
"type": "module",
"private": true,
"scripts": {
"start": "node convert.js"
}
}

View file

@ -8,6 +8,8 @@ import { join } from 'path';
import { randomUUID } from 'crypto'; import { randomUUID } from 'crypto';
const PROCESSING_TIMEOUT_MS = 30000; const PROCESSING_TIMEOUT_MS = 30000;
const SANDBOX_URL = process.env.SANDBOX_URL || 'http://localhost:3002';
const USE_SANDBOX = process.env.USE_SANDBOX !== 'false';
export const GEMINI_NATIVE_TYPES = [ export const GEMINI_NATIVE_TYPES = [
'application/pdf', 'application/pdf',
@ -201,14 +203,41 @@ function extractWithSheetJS(buffer: Buffer): string {
return textParts.join('\n\n'); return textParts.join('\n\n');
} }
// Map legacy extensions to their modern equivalents for LibreOffice conversion
const LEGACY_TO_MODERN: Record<string, string> = { const LEGACY_TO_MODERN: Record<string, string> = {
'.ppt': 'pptx', '.ppt': 'pptx',
'.doc': 'docx', '.doc': 'docx',
'.xls': 'xlsx', '.xls': 'xlsx',
}; };
async function extractWithLibreOffice(buffer: Buffer, extension: string, useOcr: boolean = false): Promise<string> { async function convertViaSandbox(buffer: Buffer, extension: string): Promise<Buffer> {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), PROCESSING_TIMEOUT_MS);
try {
const response = await fetch(`${SANDBOX_URL}/convert?ext=${extension}`, {
method: 'POST',
body: buffer,
signal: controller.signal,
headers: { 'Content-Type': 'application/octet-stream' },
});
if (!response.ok) {
const error = await response.json().catch(() => ({ error: 'Conversion failed' }));
throw new Error(error.error || 'Document conversion failed');
}
return Buffer.from(await response.arrayBuffer());
} catch (err) {
if (err instanceof Error && err.name === 'AbortError') {
throw new Error('Document conversion timed out. Try a smaller file.');
}
throw err;
} finally {
clearTimeout(timeout);
}
}
async function convertLocally(buffer: Buffer, extension: string): Promise<Buffer> {
const tempId = randomUUID(); const tempId = randomUUID();
const privateTempDir = join(tmpdir(), `kaboot-${tempId}`); const privateTempDir = join(tmpdir(), `kaboot-${tempId}`);
const inputPath = join(privateTempDir, `input${extension}`); const inputPath = join(privateTempDir, `input${extension}`);
@ -225,47 +254,52 @@ async function extractWithLibreOffice(buffer: Buffer, extension: string, useOcr:
mkdirSync(privateTempDir, { mode: 0o700 }); mkdirSync(privateTempDir, { mode: 0o700 });
writeFileSync(inputPath, buffer, { mode: 0o600 }); writeFileSync(inputPath, buffer, { mode: 0o600 });
try { execSync(
execSync( `libreoffice --headless --convert-to ${modernExt} --outdir "${privateTempDir}" "${inputPath}"`,
`libreoffice --headless --convert-to ${modernExt} --outdir "${privateTempDir}" "${inputPath}"`, { timeout: PROCESSING_TIMEOUT_MS, stdio: 'pipe', maxBuffer: 10 * 1024 * 1024 }
{ timeout: PROCESSING_TIMEOUT_MS, stdio: 'pipe', maxBuffer: 10 * 1024 * 1024 } );
);
} catch (execError) {
const error = execError as Error & { code?: string; killed?: boolean };
if (error.killed) {
throw new Error('Document conversion timed out. Try a smaller file.');
}
if (error.code === 'ENOENT' || error.message?.includes('not found')) {
throw new Error(
`Legacy ${extension} files require LibreOffice for text extraction. ` +
`Please convert to .${modernExt} format or ensure LibreOffice is installed.`
);
}
throw new Error('Document conversion failed. The file may be corrupted.');
}
if (!existsSync(outputPath)) { if (!existsSync(outputPath)) {
throw new Error('Document conversion produced no output.'); throw new Error('Document conversion produced no output.');
} }
const convertedBuffer = readFileSync(outputPath); return readFileSync(outputPath);
const config = useOcr ? { extractAttachments: true, ocr: true, ocrLanguage: 'eng' } : {}; } catch (execError) {
const ast = await officeParser.parseOffice(convertedBuffer, config); const error = execError as Error & { code?: string; killed?: boolean };
let text = ast.toText(); if (error.killed) {
throw new Error('Document conversion timed out. Try a smaller file.');
if (useOcr && ast.attachments) {
for (const attachment of ast.attachments) {
if (attachment.ocrText) {
text += '\n' + attachment.ocrText;
}
}
} }
return text; if (error.code === 'ENOENT' || error.message?.includes('not found')) {
throw new Error(
`Legacy ${extension} files require LibreOffice for text extraction. ` +
`Please convert to .${LEGACY_TO_MODERN[extension]} format or ensure LibreOffice is installed.`
);
}
throw new Error('Document conversion failed. The file may be corrupted.');
} finally { } finally {
cleanup(); cleanup();
} }
} }
async function extractWithLibreOffice(buffer: Buffer, extension: string, useOcr: boolean = false): Promise<string> {
const convertedBuffer = USE_SANDBOX
? await convertViaSandbox(buffer, extension)
: await convertLocally(buffer, extension);
const config = useOcr ? { extractAttachments: true, ocr: true, ocrLanguage: 'eng' } : {};
const ast = await officeParser.parseOffice(convertedBuffer, config);
let text = ast.toText();
if (useOcr && ast.attachments) {
for (const attachment of ast.attachments) {
if (attachment.ocrText) {
text += '\n' + attachment.ocrText;
}
}
}
return text;
}
export async function processDocument( export async function processDocument(
buffer: Buffer, buffer: Buffer,
mimeType: string, mimeType: string,