markerOCRProvider({
apiKey: string, // Required: API key
endpoint?: string, // Custom endpoint (default: datalab API)
// Processing mode
mode?: 'fast' | 'balanced' | 'high_accuracy',
// Page selection
maxPages?: number, // Process first N pages only
pageRange?: string, // Specific pages, e.g., "0,2-4,10"
// Output options
force_ocr?: boolean, // Force OCR (default: true)
extractImages?: boolean, // Extract images (default: true)
paginate?: boolean, // Add page delimiters
formatLines?: boolean, // Format lines in output
// Language
langs?: string[], // OCR languages, e.g., ['en', 'de']
// Processing
stripExistingOCR?: boolean, // Redo OCR from scratch
polling?: {
maxAttempts?: number, // Max polling attempts (default: 60)
pollingInterval?: number // Polling interval ms (default: 2000)
}
})