import { createFlow, parse, extract } from '@doclo/flows';
import { createVLMProvider } from '@doclo/providers-llm';
import { createOCRProvider } from '@doclo/providers-datalab';
import { readFileSync } from 'fs';
// Types
interface FinancialExtraction {
documentType: string;
amount: number;
currency: string;
date: string;
referenceNumber: string;
parties: {
payer: string;
payee: string;
};
}
// Schema
const financialSchema = {
type: 'object',
properties: {
documentType: {
type: 'string',
enum: ['invoice', 'receipt', 'wire_transfer', 'check'],
description: 'Type of financial document'
},
amount: {
type: 'number',
description: 'Transaction amount with full decimal precision'
},
currency: {
type: 'string',
description: '3-letter ISO currency code'
},
date: {
type: 'string',
description: 'Transaction date in YYYY-MM-DD format'
},
referenceNumber: {
type: 'string',
description: 'Reference, invoice, or transaction number'
},
parties: {
type: 'object',
properties: {
payer: { type: 'string', description: 'Entity making payment' },
payee: { type: 'string', description: 'Entity receiving payment' }
},
required: ['payer', 'payee']
}
},
required: ['amount', 'currency', 'documentType']
};
// Providers
const ocrProvider = createOCRProvider({
endpoint: 'https://www.datalab.to/api/v1/marker',
apiKey: process.env.DATALAB_API_KEY!
});
const geminiProvider = createVLMProvider({
provider: 'google',
model: 'google/gemini-2.5-pro',
apiKey: process.env.OPENROUTER_API_KEY!,
via: 'openrouter'
});
const claudeProvider = createVLMProvider({
provider: 'anthropic',
model: 'anthropic/claude-sonnet-4.5',
apiKey: process.env.OPENROUTER_API_KEY!,
via: 'openrouter'
});
const gptProvider = createVLMProvider({
provider: 'openai',
model: 'openai/gpt-4.1',
apiKey: process.env.OPENROUTER_API_KEY!,
via: 'openrouter'
});
// Build flow with multi-provider consensus
const financialFlow = createFlow({
observability: {
onConsensusComplete: (ctx) => {
console.log(`Consensus: ${(ctx.agreement * 100).toFixed(0)}% agreement`);
// Flag low-agreement extractions for review
if (ctx.agreement < 0.8) {
console.warn('Low agreement - flagging for human review');
}
}
}
})
.step('parse', parse({ provider: ocrProvider }))
.step('extract', extract<FinancialExtraction>({
providers: [geminiProvider, claudeProvider, gptProvider],
schema: financialSchema,
inputMode: 'ir+source',
consensus: {
strategy: 'majority',
threshold: 0.66
},
additionalInstructions: `
- Amount must have exact decimal precision (e.g., 1234.56)
- Currency must be a valid 3-letter ISO code
- Date must be in YYYY-MM-DD format
- Reference number should include any prefixes (INV-, REF-, etc.)
`
}))
.build();
// Process document
async function validateFinancialDocument(filePath: string) {
const fileBuffer = readFileSync(filePath);
const base64 = `data:application/pdf;base64,${fileBuffer.toString('base64')}`;
const result = await financialFlow.run({ base64 });
console.log('\n--- Financial Document Validation ---');
console.log('Type:', result.output.documentType);
console.log('Amount:', result.output.currency, result.output.amount);
console.log('Date:', result.output.date);
console.log('Reference:', result.output.referenceNumber);
console.log('Payer:', result.output.parties?.payer);
console.log('Payee:', result.output.parties?.payee);
console.log('\n--- Confidence ---');
if (result.output.agreement) {
for (const [field, info] of Object.entries(result.output.agreement)) {
const status = info.uncertain ? '⚠️' : '✓';
console.log(`${status} ${field}: ${(info.agreement * 100).toFixed(0)}%`);
}
}
console.log('\n--- Cost ---');
console.log(`Total: $${result.aggregated.totalCostUSD.toFixed(4)}`);
return result.output;
}
validateFinancialDocument('./financial-doc.pdf').catch(console.error);