AI Workflow Automation: Email Triage, Document Processing, and Code Review Bots

AI automation delivers ROI fastest when applied to high-volume, repetitive knowledge work. This guide covers concrete, production-ready implementations for the most common workflow automation targets.

Email triage and response drafting

interface EmailTriage {
  category:    'billing' | 'technical' | 'sales' | 'spam' | 'other';
  urgency:     'high' | 'medium' | 'low';
  sentiment:   'positive' | 'negative' | 'neutral';
  summary:     string;
  draftReply?: string;
  escalate:    boolean;
  tags:        string[];
}

async function triageEmail(email: { subject: string; body: string; from: string }): Promise<EmailTriage> {
  const response = await openai.chat.completions.create({
    model: 'gpt-4o-mini',
    messages: [
      {
        role: 'system',
        content: `You are an email triage assistant for Acme Corp support.
Classify each email and draft a reply only for billing and technical categories.
Escalate immediately for: legal threats, data breach reports, enterprise contract issues.`,
      },
      {
        role: 'user',
        content: `Email:
From: ${email.from}
Subject: ${email.subject}
Body: ${email.body}`,
      },
    ],
    response_format: {
      type: 'json_schema',
      json_schema: {
        name: 'EmailTriage',
        strict: true,
        schema: {
          type: 'object',
          properties: {
            category:    { type: 'string', enum: ['billing', 'technical', 'sales', 'spam', 'other'] },
            urgency:     { type: 'string', enum: ['high', 'medium', 'low'] },
            sentiment:   { type: 'string', enum: ['positive', 'negative', 'neutral'] },
            summary:     { type: 'string' },
            draftReply:  { type: 'string' },
            escalate:    { type: 'boolean' },
            tags:        { type: 'array', items: { type: 'string' } },
          },
          required: ['category', 'urgency', 'sentiment', 'summary', 'escalate', 'tags'],
          additionalProperties: false,
        },
      },
    },
    max_tokens: 500,
  });

  return JSON.parse(response.choices[0].message.content!) as EmailTriage;
}

// Process inbox batch
async function processInbox(emails: Email[]) {
  const results = await Promise.allSettled(emails.map(triageEmail));

  for (const [i, result] of results.entries()) {
    if (result.status === 'fulfilled') {
      await saveTriageResult(emails[i].id, result.value);
      if (result.value.escalate) await notifyHumanAgent(emails[i]);
    }
  }
}

Invoice / document extraction pipeline

import pdf2pic from 'pdf2pic';
import { z } from 'zod';

const InvoiceSchema = z.object({
  invoiceNumber: z.string(),
  vendor:        z.string(),
  date:          z.string().regex(/^d{4}-d{2}-d{2}$/),
  dueDate:       z.string().optional(),
  currency:      z.string().length(3),
  subtotal:      z.number(),
  tax:           z.number(),
  total:         z.number(),
  lineItems:     z.array(z.object({
    description: z.string(),
    quantity:    z.number(),
    unitPrice:   z.number(),
    amount:      z.number(),
  })),
});

async function extractInvoice(pdfPath: string): Promise<z.infer<typeof InvoiceSchema>> {
  // Convert PDF to images
  const converter = pdf2pic.fromPath(pdfPath, { format: 'png', width: 2000 });
  const pages = await converter.bulk(-1, { responseType: 'base64' });

  const response = await openai.chat.completions.create({
    model: 'gpt-4o',
    messages: [{
      role: 'user',
      content: [
        ...pages.map(p => ({
          type: 'image_url' as const,
          image_url: { url: `data:image/png;base64,${p.base64}`, detail: 'high' as const },
        })),
        { type: 'text', text: 'Extract all invoice data from these pages.' },
      ],
    }],
    response_format: {
      type: 'json_schema',
      json_schema: { name: 'Invoice', strict: true, schema: toJsonSchema(InvoiceSchema) },
    },
  });

  return InvoiceSchema.parse(JSON.parse(response.choices[0].message.content!));
}

// Batch processing with queue
async function processInvoiceQueue(invoicePaths: string[]) {
  const CONCURRENCY = 5;
  const results = [];

  for (let i = 0; i < invoicePaths.length; i += CONCURRENCY) {
    const batch = invoicePaths.slice(i, i + CONCURRENCY);
    const batchResults = await Promise.allSettled(batch.map(extractInvoice));
    results.push(...batchResults);

    // Respect rate limits
    if (i + CONCURRENCY < invoicePaths.length) await sleep(1000);
  }
  return results;
}

Automated code review bot

// GitHub Actions or webhook handler
interface CodeReviewComment {
  file:        string;
  line:        number;
  severity:    'error' | 'warning' | 'suggestion';
  category:    'security' | 'performance' | 'logic' | 'style';
  comment:     string;
  suggestion?: string;  // code fix
}

async function reviewPullRequest(diff: string, prContext: { title: string; description: string }): Promise<CodeReviewComment[]> {
  const response = await openai.chat.completions.create({
    model: 'gpt-4o',
    messages: [
      {
        role: 'system',
        content: `You are a senior code reviewer. Focus ONLY on:
- Security vulnerabilities (SQL injection, XSS, hardcoded secrets, exposed PII)
- Logic bugs (off-by-one, null pointer, race conditions)
- Performance issues (N+1 queries, missing indexes, inefficient loops)

Skip: style, formatting, naming conventions.
Be specific and actionable. Include code suggestions.`,
      },
      {
        role: 'user',
        content: `PR: ${prContext.title}
${prContext.description ? `Description: ${prContext.description}` : ''}

Diff:
${diff.slice(0, 8000)}`,  // truncate large diffs
      },
    ],
    response_format: {
      type: 'json_schema',
      json_schema: {
        name: 'CodeReview',
        strict: true,
        schema: {
          type: 'object',
          properties: {
            comments: {
              type: 'array',
              items: {
                type: 'object',
                properties: {
                  file:        { type: 'string' },
                  line:        { type: 'number' },
                  severity:    { type: 'string', enum: ['error', 'warning', 'suggestion'] },
                  category:    { type: 'string', enum: ['security', 'performance', 'logic', 'style'] },
                  comment:     { type: 'string' },
                  suggestion:  { type: 'string' },
                },
                required: ['file', 'line', 'severity', 'category', 'comment'],
                additionalProperties: false,
              },
            },
          },
          required: ['comments'],
          additionalProperties: false,
        },
      },
    },
    max_tokens: 2000,
  });

  const { comments } = JSON.parse(response.choices[0].message.content!) as { comments: CodeReviewComment[] };
  return comments.filter(c => c.severity !== 'style');  // only meaningful issues
}

Document classification pipeline

// Classify and route incoming documents automatically
type DocType = 'invoice' | 'contract' | 'receipt' | 'report' | 'other';

async function classifyDocument(firstPageText: string): Promise<{ type: DocType; confidence: number }> {
  const response = await openai.chat.completions.create({
    model: 'gpt-4o-mini',
    messages: [{
      role: 'user',
      content: `Classify this document as: invoice, contract, receipt, report, or other.
Reply with JSON: {"type": "...", "confidence": 0.0-1.0}

Document excerpt:
${firstPageText.slice(0, 500)}`,
    }],
    response_format: { type: 'json_object' },
    max_tokens: 30,
    temperature: 0,
  });

  return JSON.parse(response.choices[0].message.content!);
}

// Route to different processors
const processors: Record<DocType, (path: string) => Promise<void>> = {
  invoice:  processInvoice,
  contract: processContract,
  receipt:  processReceipt,
  report:   processReport,
  other:    flagForManualReview,
};

async function ingestDocument(filePath: string) {
  const text = await extractText(filePath);
  const { type, confidence } = await classifyDocument(text);

  if (confidence < 0.8) {
    await flagForManualReview(filePath, `Low classification confidence: ${confidence}`);
    return;
  }

  await processors[type](filePath);
}

Workflow automation checklist

Use the Batch API for any non-realtime workload — 50% cost saving.
Always validate extracted data with Zod before writing to databases.
Add a human-review queue for low-confidence outputs (<0.8).
Log every automation run with input hash, model, latency, and outcome.
Set max_tokens limits — document extraction can be verbose.
Implement idempotency keys to prevent duplicate processing on retries.

Takeaway

Email triage and document extraction are the highest-ROI automation targets — they require minimal fine-tuning and deliver immediate time savings. Always gate automated actions on confidence scores and route low-confidence results to human review before sending to downstream systems.