AI Workflow Automation: Email Triage, Document Processing, and Code Review Bots
·12 min read
AI automation delivers ROI fastest when applied to high-volume, repetitive knowledge work. This guide covers concrete, production-ready implementations for the most common workflow automation targets.
Email triage and response drafting
interface EmailTriage {
category: 'billing' | 'technical' | 'sales' | 'spam' | 'other';
urgency: 'high' | 'medium' | 'low';
sentiment: 'positive' | 'negative' | 'neutral';
summary: string;
draftReply?: string;
escalate: boolean;
tags: string[];
}
async function triageEmail(email: { subject: string; body: string; from: string }): Promise<EmailTriage> {
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [
{
role: 'system',
content: `You are an email triage assistant for Acme Corp support.
Classify each email and draft a reply only for billing and technical categories.
Escalate immediately for: legal threats, data breach reports, enterprise contract issues.`,
},
{
role: 'user',
content: `Email:
From: ${email.from}
Subject: ${email.subject}
Body: ${email.body}`,
},
],
response_format: {
type: 'json_schema',
json_schema: {
name: 'EmailTriage',
strict: true,
schema: {
type: 'object',
properties: {
category: { type: 'string', enum: ['billing', 'technical', 'sales', 'spam', 'other'] },
urgency: { type: 'string', enum: ['high', 'medium', 'low'] },
sentiment: { type: 'string', enum: ['positive', 'negative', 'neutral'] },
summary: { type: 'string' },
draftReply: { type: 'string' },
escalate: { type: 'boolean' },
tags: { type: 'array', items: { type: 'string' } },
},
required: ['category', 'urgency', 'sentiment', 'summary', 'escalate', 'tags'],
additionalProperties: false,
},
},
},
max_tokens: 500,
});
return JSON.parse(response.choices[0].message.content!) as EmailTriage;
}
// Process inbox batch
async function processInbox(emails: Email[]) {
const results = await Promise.allSettled(emails.map(triageEmail));
for (const [i, result] of results.entries()) {
if (result.status === 'fulfilled') {
await saveTriageResult(emails[i].id, result.value);
if (result.value.escalate) await notifyHumanAgent(emails[i]);
}
}
}Invoice / document extraction pipeline
import pdf2pic from 'pdf2pic';
import { z } from 'zod';
const InvoiceSchema = z.object({
invoiceNumber: z.string(),
vendor: z.string(),
date: z.string().regex(/^d{4}-d{2}-d{2}$/),
dueDate: z.string().optional(),
currency: z.string().length(3),
subtotal: z.number(),
tax: z.number(),
total: z.number(),
lineItems: z.array(z.object({
description: z.string(),
quantity: z.number(),
unitPrice: z.number(),
amount: z.number(),
})),
});
async function extractInvoice(pdfPath: string): Promise<z.infer<typeof InvoiceSchema>> {
// Convert PDF to images
const converter = pdf2pic.fromPath(pdfPath, { format: 'png', width: 2000 });
const pages = await converter.bulk(-1, { responseType: 'base64' });
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [{
role: 'user',
content: [
...pages.map(p => ({
type: 'image_url' as const,
image_url: { url: `data:image/png;base64,${p.base64}`, detail: 'high' as const },
})),
{ type: 'text', text: 'Extract all invoice data from these pages.' },
],
}],
response_format: {
type: 'json_schema',
json_schema: { name: 'Invoice', strict: true, schema: toJsonSchema(InvoiceSchema) },
},
});
return InvoiceSchema.parse(JSON.parse(response.choices[0].message.content!));
}
// Batch processing with queue
async function processInvoiceQueue(invoicePaths: string[]) {
const CONCURRENCY = 5;
const results = [];
for (let i = 0; i < invoicePaths.length; i += CONCURRENCY) {
const batch = invoicePaths.slice(i, i + CONCURRENCY);
const batchResults = await Promise.allSettled(batch.map(extractInvoice));
results.push(...batchResults);
// Respect rate limits
if (i + CONCURRENCY < invoicePaths.length) await sleep(1000);
}
return results;
}Automated code review bot
// GitHub Actions or webhook handler
interface CodeReviewComment {
file: string;
line: number;
severity: 'error' | 'warning' | 'suggestion';
category: 'security' | 'performance' | 'logic' | 'style';
comment: string;
suggestion?: string; // code fix
}
async function reviewPullRequest(diff: string, prContext: { title: string; description: string }): Promise<CodeReviewComment[]> {
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'system',
content: `You are a senior code reviewer. Focus ONLY on:
- Security vulnerabilities (SQL injection, XSS, hardcoded secrets, exposed PII)
- Logic bugs (off-by-one, null pointer, race conditions)
- Performance issues (N+1 queries, missing indexes, inefficient loops)
Skip: style, formatting, naming conventions.
Be specific and actionable. Include code suggestions.`,
},
{
role: 'user',
content: `PR: ${prContext.title}
${prContext.description ? `Description: ${prContext.description}` : ''}
Diff:
${diff.slice(0, 8000)}`, // truncate large diffs
},
],
response_format: {
type: 'json_schema',
json_schema: {
name: 'CodeReview',
strict: true,
schema: {
type: 'object',
properties: {
comments: {
type: 'array',
items: {
type: 'object',
properties: {
file: { type: 'string' },
line: { type: 'number' },
severity: { type: 'string', enum: ['error', 'warning', 'suggestion'] },
category: { type: 'string', enum: ['security', 'performance', 'logic', 'style'] },
comment: { type: 'string' },
suggestion: { type: 'string' },
},
required: ['file', 'line', 'severity', 'category', 'comment'],
additionalProperties: false,
},
},
},
required: ['comments'],
additionalProperties: false,
},
},
},
max_tokens: 2000,
});
const { comments } = JSON.parse(response.choices[0].message.content!) as { comments: CodeReviewComment[] };
return comments.filter(c => c.severity !== 'style'); // only meaningful issues
}Document classification pipeline
// Classify and route incoming documents automatically
type DocType = 'invoice' | 'contract' | 'receipt' | 'report' | 'other';
async function classifyDocument(firstPageText: string): Promise<{ type: DocType; confidence: number }> {
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [{
role: 'user',
content: `Classify this document as: invoice, contract, receipt, report, or other.
Reply with JSON: {"type": "...", "confidence": 0.0-1.0}
Document excerpt:
${firstPageText.slice(0, 500)}`,
}],
response_format: { type: 'json_object' },
max_tokens: 30,
temperature: 0,
});
return JSON.parse(response.choices[0].message.content!);
}
// Route to different processors
const processors: Record<DocType, (path: string) => Promise<void>> = {
invoice: processInvoice,
contract: processContract,
receipt: processReceipt,
report: processReport,
other: flagForManualReview,
};
async function ingestDocument(filePath: string) {
const text = await extractText(filePath);
const { type, confidence } = await classifyDocument(text);
if (confidence < 0.8) {
await flagForManualReview(filePath, `Low classification confidence: ${confidence}`);
return;
}
await processors[type](filePath);
}Workflow automation checklist
- Use the Batch API for any non-realtime workload — 50% cost saving.
- Always validate extracted data with Zod before writing to databases.
- Add a human-review queue for low-confidence outputs (<0.8).
- Log every automation run with input hash, model, latency, and outcome.
- Set
max_tokenslimits — document extraction can be verbose. - Implement idempotency keys to prevent duplicate processing on retries.
Takeaway
Email triage and document extraction are the highest-ROI automation targets — they require minimal fine-tuning and deliver immediate time savings. Always gate automated actions on confidence scores and route low-confidence results to human review before sending to downstream systems.