AI Agents and Automation

Specification for AI-powered agents, automation workflows, and intelligent processing

Overview
Agent Architecture
Classification Agents
Extraction Agents
Generation Agents
Monitoring Agents
Learning Agents
Orchestration
Prompt Engineering
Cost Optimization

Overview

Agent Philosophy

Agents in the platform follow a tiered approach:

Rules First - Fast, free, deterministic
Vectors Second - Fast, cheap, semantic
LLM Third - Slow, expensive, intelligent

Each agent is designed to minimize LLM calls while maximizing accuracy.

Agent Types

Agent	Purpose	Trigger	Cost
Classifier	Categorize articles	New article	Low-High
Extractor	Extract entities/claims	New article	Medium
Summarizer	Generate summaries	New article	Medium
Clusterer	Group into stories	New article	Low
Matcher	Match to profiles	New article	Low
Briefer	Generate briefings	Scheduled	High
Answerer	Q&A over articles	API request	High
Reviewer	Quality control	Low confidence	Medium
Learner	Improve models	Feedback	Low

Agent Architecture

Base Agent Interface

// src/agents/base.ts

interface AgentContext {
  db: D1Database;
  vectorize: VectorizeIndex;
  ai: Ai;
  kv: KVNamespace;
  costTracker: CostTracker;
}

interface AgentInput<T> {
  data: T;
  options?: Record<string, any>;
  priority?: 'high' | 'normal' | 'low';
}

interface AgentOutput<T> {
  result: T;
  confidence: number;
  method: 'rules' | 'vector' | 'llm' | 'hybrid';
  costMicros: number;
  latencyMs: number;
  metadata?: Record<string, any>;
}

abstract class BaseAgent<TInput, TOutput> {
  constructor(protected ctx: AgentContext) {}
  
  abstract name: string;
  abstract version: string;
  
  abstract process(input: AgentInput<TInput>): Promise<AgentOutput<TOutput>>;
  
  protected async trackCost(service: string, operation: string, costMicros: number) {
    await this.ctx.costTracker.track({
      service,
      operation,
      cost_micros: costMicros,
      agent: this.name,
    });
  }
  
  protected async shouldUseLLM(vectorConfidence: number): Promise<boolean> {
    // Check cost budget
    const budget = await this.ctx.kv.get('llm_daily_budget');
    const spent = await this.ctx.kv.get('llm_daily_spent');
    
    if (spent && budget && parseInt(spent) >= parseInt(budget)) {
      return false; // Budget exhausted
    }
    
    // Check confidence threshold
    const threshold = await this.ctx.kv.get('llm_confidence_threshold') || '0.7';
    return vectorConfidence < parseFloat(threshold);
  }
}

Agent Registry

// src/agents/registry.ts

const agents = new Map<string, BaseAgent<any, any>>();

export function registerAgent(agent: BaseAgent<any, any>) {
  agents.set(agent.name, agent);
}

export function getAgent<T extends BaseAgent<any, any>>(name: string): T {
  const agent = agents.get(name);
  if (!agent) {
    throw new Error(`Agent not found: ${name}`);
  }
  return agent as T;
}

// Register all agents on startup
export function initializeAgents(ctx: AgentContext) {
  registerAgent(new ClassifierAgent(ctx));
  registerAgent(new EntityExtractorAgent(ctx));
  registerAgent(new SummarizerAgent(ctx));
  registerAgent(new ClusterAgent(ctx));
  registerAgent(new MatcherAgent(ctx));
  registerAgent(new BrieferAgent(ctx));
  registerAgent(new AnswererAgent(ctx));
}

Classification Agents

Topic Classifier

Assigns topic labels from taxonomy to articles.

// src/agents/classifier.ts

interface ClassifierInput {
  articleId: string;
  headline: string;
  body: string;
  source?: string;
}

interface ClassifierOutput {
  topics: LabelScore[];
  industries: LabelScore[];
  tactics: LabelScore[];
  risks: LabelScore[];
  sentiment: number;
  politicalLean?: number;
}

class ClassifierAgent extends BaseAgent<ClassifierInput, ClassifierOutput> {
  name = 'classifier';
  version = '1.0.0';
  
  async process(input: AgentInput<ClassifierInput>): Promise<AgentOutput<ClassifierOutput>> {
    const startTime = Date.now();
    let method: 'rules' | 'vector' | 'llm' | 'hybrid' = 'rules';
    let totalCost = 0;
    
    const { headline, body } = input.data;
    const text = `${headline}\n\n${body}`;
    
    // Stage 1: Rule-based classification (free, fast)
    const ruleResults = await this.classifyByRules(text);
    
    if (ruleResults.confidence >= 0.8) {
      return {
        result: ruleResults.output,
        confidence: ruleResults.confidence,
        method: 'rules',
        costMicros: 0,
        latencyMs: Date.now() - startTime,
      };
    }
    
    // Stage 2: Vector similarity (cheap, fast)
    const embedding = await this.getEmbedding(text);
    totalCost += 20; // Embedding cost
    
    const vectorResults = await this.classifyByVector(embedding);
    method = 'vector';
    
    if (vectorResults.confidence >= 0.75) {
      // Merge rule and vector results
      const merged = this.mergeResults(ruleResults.output, vectorResults.output);
      return {
        result: merged,
        confidence: vectorResults.confidence,
        method: 'hybrid',
        costMicros: totalCost,
        latencyMs: Date.now() - startTime,
      };
    }
    
    // Stage 3: LLM classification (expensive, slow)
    if (await this.shouldUseLLM(vectorResults.confidence)) {
      const llmResults = await this.classifyByLLM(headline, body);
      totalCost += llmResults.cost;
      method = 'llm';
      
      const merged = this.mergeResults(
        ruleResults.output,
        vectorResults.output,
        llmResults.output
      );
      
      return {
        result: merged,
        confidence: Math.max(vectorResults.confidence, llmResults.confidence),
        method: 'hybrid',
        costMicros: totalCost,
        latencyMs: Date.now() - startTime,
      };
    }
    
    // Fallback to vector results with flag for review
    return {
      result: vectorResults.output,
      confidence: vectorResults.confidence,
      method,
      costMicros: totalCost,
      latencyMs: Date.now() - startTime,
      metadata: { needsReview: true },
    };
  }
  
  private async classifyByRules(text: string): Promise<{
    output: ClassifierOutput;
    confidence: number;
  }> {
    const labels = await this.ctx.db.prepare(
      'SELECT * FROM taxonomy_labels WHERE is_active = 1'
    ).all();
    
    const matches: Record<string, LabelScore[]> = {
      topics: [],
      industries: [],
      tactics: [],
      risks: [],
    };
    
    for (const label of labels.results) {
      const patterns = JSON.parse(label.keyword_patterns || '[]');
      let matchScore = 0;
      
      for (const pattern of patterns) {
        const regex = new RegExp(pattern, 'gi');
        const matchCount = (text.match(regex) || []).length;
        matchScore += matchCount * 0.1;
      }
      
      if (matchScore > 0) {
        const category = label.category + 's'; // topics, industries, etc.
        if (matches[category]) {
          matches[category].push({
            id: label.id,
            label: label.label,
            confidence: Math.min(matchScore, 0.95),
          });
        }
      }
    }
    
    // Sort by confidence and take top results
    for (const category of Object.keys(matches)) {
      matches[category] = matches[category]
        .sort((a, b) => b.confidence - a.confidence)
        .slice(0, 5);
    }
    
    const maxConfidence = Math.max(
      ...Object.values(matches).flatMap(m => m.map(l => l.confidence)),
      0
    );
    
    return {
      output: {
        topics: matches.topics,
        industries: matches.industries,
        tactics: matches.tactics,
        risks: matches.risks,
        sentiment: 0, // Rules don't detect sentiment
      },
      confidence: maxConfidence,
    };
  }
  
  private async classifyByVector(embedding: number[]): Promise<{
    output: ClassifierOutput;
    confidence: number;
  }> {
    const results = await this.ctx.vectorize.query(embedding, {
      topK: 20,
      namespace: 'taxonomy',
    });
    
    const matches: Record<string, LabelScore[]> = {
      topics: [],
      industries: [],
      tactics: [],
      risks: [],
    };
    
    for (const match of results.matches) {
      const category = match.metadata?.category + 's';
      if (matches[category]) {
        matches[category].push({
          id: match.id,
          label: match.metadata?.label,
          confidence: match.score,
        });
      }
    }
    
    const maxConfidence = results.matches[0]?.score || 0;
    
    return {
      output: {
        topics: matches.topics.slice(0, 5),
        industries: matches.industries.slice(0, 3),
        tactics: matches.tactics.slice(0, 2),
        risks: matches.risks.slice(0, 3),
        sentiment: 0,
      },
      confidence: maxConfidence,
    };
  }
  
  private async classifyByLLM(headline: string, body: string): Promise<{
    output: ClassifierOutput;
    confidence: number;
    cost: number;
  }> {
    const taxonomy = await this.getTaxonomyLabels();
    
    const prompt = `Classify this news article.

Available labels:
Topics: ${taxonomy.topics.join(', ')}
Industries: ${taxonomy.industries.join(', ')}
Tactics: ${taxonomy.tactics.join(', ')}
Risks: ${taxonomy.risks.join(', ')}

Article headline: ${headline}

Article body (truncated):
${body.slice(0, 2000)}

Respond with JSON only:
{
  "topics": [{"label": "...", "confidence": 0.0-1.0}],
  "industries": [{"label": "...", "confidence": 0.0-1.0}],
  "tactics": [{"label": "...", "confidence": 0.0-1.0}],
  "risks": [{"label": "...", "confidence": 0.0-1.0}],
  "sentiment": -1.0 to 1.0,
  "political_lean": -1.0 to 1.0 or null
}`;

    const response = await this.ctx.ai.run('@cf/meta/llama-3-8b-instruct', {
      messages: [
        { role: 'system', content: 'You are a news classifier. Respond only with valid JSON.' },
        { role: 'user', content: prompt },
      ],
      max_tokens: 500,
    });
    
    try {
      const parsed = JSON.parse(response.response);
      return {
        output: {
          topics: parsed.topics || [],
          industries: parsed.industries || [],
          tactics: parsed.tactics || [],
          risks: parsed.risks || [],
          sentiment: parsed.sentiment || 0,
          politicalLean: parsed.political_lean,
        },
        confidence: 0.85, // LLM outputs are generally high confidence
        cost: 50, // Estimate for Workers AI
      };
    } catch (e) {
      return {
        output: {
          topics: [],
          industries: [],
          tactics: [],
          risks: [],
          sentiment: 0,
        },
        confidence: 0,
        cost: 50,
      };
    }
  }
  
  private mergeResults(...results: ClassifierOutput[]): ClassifierOutput {
    // Weighted merge of multiple classification results
    const merged: ClassifierOutput = {
      topics: [],
      industries: [],
      tactics: [],
      risks: [],
      sentiment: 0,
    };
    
    const weights = [0.3, 0.3, 0.4]; // rules, vector, llm
    
    // Merge each category
    for (const category of ['topics', 'industries', 'tactics', 'risks'] as const) {
      const labelScores = new Map<string, { label: string; totalScore: number }>();
      
      results.forEach((result, i) => {
        const weight = weights[i] || 0.33;
        for (const item of result[category]) {
          const existing = labelScores.get(item.label) || { label: item.label, totalScore: 0 };
          existing.totalScore += item.confidence * weight;
          labelScores.set(item.label, existing);
        }
      });
      
      merged[category] = Array.from(labelScores.values())
        .map(({ label, totalScore }) => ({
          id: label.toLowerCase().replace(/\s+/g, '-'),
          label,
          confidence: Math.min(totalScore, 1),
        }))
        .sort((a, b) => b.confidence - a.confidence)
        .slice(0, 5);
    }
    
    // Average sentiment
    const sentiments = results.map(r => r.sentiment).filter(s => s !== 0);
    merged.sentiment = sentiments.length > 0
      ? sentiments.reduce((a, b) => a + b, 0) / sentiments.length
      : 0;
    
    return merged;
  }
  
  private async getEmbedding(text: string): Promise<number[]> {
    const result = await this.ctx.ai.run('@cf/baai/bge-base-en-v1.5', {
      text: [text.slice(0, 8000)],
    });
    return result.data[0];
  }
  
  private async getTaxonomyLabels(): Promise<Record<string, string[]>> {
    const labels = await this.ctx.db.prepare(
      'SELECT category, label FROM taxonomy_labels WHERE is_active = 1'
    ).all();
    
    const grouped: Record<string, string[]> = {
      topics: [],
      industries: [],
      tactics: [],
      risks: [],
    };
    
    for (const row of labels.results) {
      const key = row.category + 's';
      if (grouped[key]) {
        grouped[key].push(row.label);
      }
    }
    
    return grouped;
  }
}

Sentiment Analyzer

Specialized agent for sentiment analysis.

// src/agents/sentiment.ts

class SentimentAgent extends BaseAgent<{ text: string }, number> {
  name = 'sentiment';
  version = '1.0.0';
  
  private positivePatterns = [
    /\b(breakthrough|triumph|exceeds|soars|surges)\b/gi,
    /\b(exceptional|outstanding|remarkable)\b/gi,
    /\b(growth|gains|success|wins|profit)\b/gi,
  ];
  
  private negativePatterns = [
    /\b(crisis|disaster|collapse|scandal)\b/gi,
    /\b(fraud|criminal|devastating)\b/gi,
    /\b(decline|drop|fall|plunge|slump)\b/gi,
  ];
  
  async process(input: AgentInput<{ text: string }>): Promise<AgentOutput<number>> {
    const startTime = Date.now();
    const { text } = input.data;
    
    // Rule-based sentiment
    let score = 0;
    let matchCount = 0;
    
    for (const pattern of this.positivePatterns) {
      const matches = text.match(pattern) || [];
      score += matches.length * 0.3;
      matchCount += matches.length;
    }
    
    for (const pattern of this.negativePatterns) {
      const matches = text.match(pattern) || [];
      score -= matches.length * 0.3;
      matchCount += matches.length;
    }
    
    // Normalize to -1 to 1
    const normalized = Math.max(-1, Math.min(1, score));
    const confidence = Math.min(matchCount * 0.2, 0.9);
    
    return {
      result: normalized,
      confidence,
      method: 'rules',
      costMicros: 0,
      latencyMs: Date.now() - startTime,
    };
  }
}

Extraction Agents

Entity Extractor

Extracts named entities from article text.

// src/agents/entity-extractor.ts

interface Entity {
  name: string;
  type: 'person' | 'org' | 'location' | 'product' | 'event';
  mentions: number;
  sentiment?: number;
  isPrimary: boolean;
}

class EntityExtractorAgent extends BaseAgent<{ text: string }, Entity[]> {
  name = 'entity-extractor';
  version = '1.0.0';
  
  async process(input: AgentInput<{ text: string }>): Promise<AgentOutput<Entity[]>> {
    const startTime = Date.now();
    const { text } = input.data;
    
    // Use Workers AI NER model
    const nerResult = await this.ctx.ai.run('@cf/huggingface/distilbert-ner', {
      text: text.slice(0, 5000),
    });
    
    // Group and deduplicate entities
    const entityMap = new Map<string, Entity>();
    
    for (const entity of nerResult) {
      const normalized = this.normalizeEntityName(entity.word);
      const type = this.mapEntityType(entity.entity_group);
      
      if (entityMap.has(normalized)) {
        const existing = entityMap.get(normalized)!;
        existing.mentions++;
      } else {
        entityMap.set(normalized, {
          name: normalized,
          type,
          mentions: 1,
          isPrimary: false,
        });
      }
    }
    
    // Sort by mentions and mark top entities as primary
    const entities = Array.from(entityMap.values())
      .sort((a, b) => b.mentions - a.mentions);
    
    if (entities.length > 0) {
      entities[0].isPrimary = true;
      if (entities.length > 1 && entities[1].mentions >= entities[0].mentions * 0.7) {
        entities[1].isPrimary = true;
      }
    }
    
    return {
      result: entities.slice(0, 20),
      confidence: 0.8,
      method: 'llm', // NER model counts as LLM
      costMicros: 20,
      latencyMs: Date.now() - startTime,
    };
  }
  
  private normalizeEntityName(name: string): string {
    return name
      .replace(/^(the|a|an)\s+/i, '')
      .replace(/['']/g, "'")
      .trim();
  }
  
  private mapEntityType(nerType: string): Entity['type'] {
    const mapping: Record<string, Entity['type']> = {
      'PER': 'person',
      'ORG': 'org',
      'LOC': 'location',
      'MISC': 'product',
    };
    return mapping[nerType] || 'org';
  }
}

Location Extractor

Extracts and geocodes locations from text.

// src/agents/location-extractor.ts

interface LocationMatch {
  locationId: string;
  name: string;
  type: string;
  mentionCount: number;
  isPrimary: boolean;
  confidence: number;
}

class LocationExtractorAgent extends BaseAgent<{ text: string }, LocationMatch[]> {
  name = 'location-extractor';
  version = '1.0.0';
  
  async process(input: AgentInput<{ text: string }>): Promise<AgentOutput<LocationMatch[]>> {
    const startTime = Date.now();
    const { text } = input.data;
    
    // First, get location entities from NER
    const nerResult = await this.ctx.ai.run('@cf/huggingface/distilbert-ner', {
      text: text.slice(0, 5000),
    });
    
    const locationMentions = nerResult
      .filter((e: any) => e.entity_group === 'LOC')
      .map((e: any) => e.word);
    
    // Match against our location database using vector search
    const matches: LocationMatch[] = [];
    
    for (const mention of [...new Set(locationMentions)]) {
      const embedding = await this.getEmbedding(mention);
      const results = await this.ctx.vectorize.query(embedding, {
        topK: 3,
        namespace: 'locations',
        filter: { type: { $in: ['city', 'state', 'country'] } },
      });
      
      if (results.matches[0]?.score > 0.8) {
        const match = results.matches[0];
        const mentionCount = locationMentions.filter(m => m === mention).length;
        
        matches.push({
          locationId: match.id,
          name: match.metadata?.name || mention,
          type: match.metadata?.type || 'unknown',
          mentionCount,
          isPrimary: false,
          confidence: match.score,
        });
      }
    }
    
    // Sort by mention count and mark primary
    matches.sort((a, b) => b.mentionCount - a.mentionCount);
    if (matches.length > 0) {
      matches[0].isPrimary = true;
    }
    
    return {
      result: matches,
      confidence: matches[0]?.confidence || 0,
      method: 'hybrid',
      costMicros: 30,
      latencyMs: Date.now() - startTime,
    };
  }
  
  private async getEmbedding(text: string): Promise<number[]> {
    const result = await this.ctx.ai.run('@cf/baai/bge-base-en-v1.5', {
      text: [text],
    });
    return result.data[0];
  }
}

Generation Agents

Summarizer Agent

Generates article summaries.

// src/agents/summarizer.ts

class SummarizerAgent extends BaseAgent<{ headline: string; body: string }, string> {
  name = 'summarizer';
  version = '1.0.0';
  
  async process(input: AgentInput<{ headline: string; body: string }>): Promise<AgentOutput<string>> {
    const startTime = Date.now();
    const { headline, body } = input.data;
    
    // For short articles, headline + first paragraph is sufficient
    if (body.length < 500) {
      const firstPara = body.split('\n\n')[0];
      return {
        result: firstPara,
        confidence: 0.9,
        method: 'rules',
        costMicros: 0,
        latencyMs: Date.now() - startTime,
      };
    }
    
    // Use LLM for longer articles
    const prompt = `Summarize this news article in 2-3 sentences. Be concise and factual.

Headline: ${headline}

Article:
${body.slice(0, 3000)}

Summary:`;

    const response = await this.ctx.ai.run('@cf/meta/llama-3-8b-instruct', {
      messages: [
        { role: 'user', content: prompt },
      ],
      max_tokens: 150,
    });
    
    return {
      result: response.response.trim(),
      confidence: 0.85,
      method: 'llm',
      costMicros: 50,
      latencyMs: Date.now() - startTime,
    };
  }
}

Briefing Agent

Generates daily/weekly briefings.

// src/agents/briefer.ts

interface BriefingInput {
  profileId: string;
  period: 'daily' | 'weekly';
  articles: Array<{
    id: string;
    headline: string;
    summary: string;
    topics: string[];
    relevanceScore: number;
  }>;
}

interface BriefingOutput {
  title: string;
  summary: string;
  sections: Array<{
    title: string;
    content: string;
    articleIds: string[];
  }>;
}

class BrieferAgent extends BaseAgent<BriefingInput, BriefingOutput> {
  name = 'briefer';
  version = '1.0.0';
  
  async process(input: AgentInput<BriefingInput>): Promise<AgentOutput<BriefingOutput>> {
    const startTime = Date.now();
    const { period, articles } = input.data;
    
    // Group articles by topic
    const topicGroups = new Map<string, typeof articles>();
    for (const article of articles) {
      const primaryTopic = article.topics[0] || 'General';
      if (!topicGroups.has(primaryTopic)) {
        topicGroups.set(primaryTopic, []);
      }
      topicGroups.get(primaryTopic)!.push(article);
    }
    
    // Generate sections for each topic
    const sections: BriefingOutput['sections'] = [];
    let totalCost = 0;
    
    for (const [topic, topicArticles] of topicGroups) {
      if (topicArticles.length < 2) continue;
      
      const prompt = `Write a brief 2-3 sentence summary of these ${topic} news stories:

${topicArticles.slice(0, 5).map(a => `- ${a.headline}: ${a.summary}`).join('\n')}

Summary:`;

      const response = await this.ctx.ai.run('@cf/meta/llama-3-8b-instruct', {
        messages: [{ role: 'user', content: prompt }],
        max_tokens: 150,
      });
      
      totalCost += 50;
      
      sections.push({
        title: topic,
        content: response.response.trim(),
        articleIds: topicArticles.slice(0, 5).map(a => a.id),
      });
    }
    
    // Generate overall summary
    const overallPrompt = `Write a 2-sentence executive summary of this ${period} news briefing:

Topics covered:
${sections.map(s => `- ${s.title}: ${s.content.slice(0, 100)}...`).join('\n')}

Executive Summary:`;

    const overallResponse = await this.ctx.ai.run('@cf/meta/llama-3-8b-instruct', {
      messages: [{ role: 'user', content: overallPrompt }],
      max_tokens: 100,
    });
    
    totalCost += 50;
    
    return {
      result: {
        title: `${period === 'daily' ? 'Daily' : 'Weekly'} Briefing`,
        summary: overallResponse.response.trim(),
        sections,
      },
      confidence: 0.85,
      method: 'llm',
      costMicros: totalCost,
      latencyMs: Date.now() - startTime,
    };
  }
}

Q&A Agent

Answers questions about articles using RAG.

// src/agents/answerer.ts

interface QAInput {
  question: string;
  profileId?: string;
  contextDays?: number;
}

interface QAOutput {
  answer: string;
  sources: Array<{ id: string; headline: string; relevance: number }>;
  confidence: number;
}

class AnswererAgent extends BaseAgent<QAInput, QAOutput> {
  name = 'answerer';
  version = '1.0.0';
  
  async process(input: AgentInput<QAInput>): Promise<AgentOutput<QAOutput>> {
    const startTime = Date.now();
    const { question, profileId, contextDays = 7 } = input.data;
    
    // 1. Embed the question
    const questionEmbedding = await this.getEmbedding(question);
    
    // 2. Find relevant articles
    const results = await this.ctx.vectorize.query(questionEmbedding, {
      topK: 10,
      namespace: 'articles',
    });
    
    // 3. Fetch article content
    const articleIds = results.matches.map(m => m.id);
    const articles = await this.ctx.db.prepare(`
      SELECT id, headline, summary, body_text
      FROM articles
      WHERE id IN (${articleIds.map(() => '?').join(',')})
        AND published_at > datetime('now', '-${contextDays} days')
    `).bind(...articleIds).all();
    
    // 4. Build context
    const context = articles.results
      .slice(0, 5)
      .map(a => `[${a.headline}]\n${a.summary || a.body_text?.slice(0, 500)}`)
      .join('\n\n---\n\n');
    
    // 5. Generate answer
    const prompt = `Based on these recent news articles, answer the following question.

Context:
${context}

Question: ${question}

Provide a direct, factual answer based only on the information in the articles. If the answer cannot be determined from the context, say so.

Answer:`;

    const response = await this.ctx.ai.run('@cf/meta/llama-3-8b-instruct', {
      messages: [
        { role: 'system', content: 'You are a news analyst. Answer questions based on provided context.' },
        { role: 'user', content: prompt },
      ],
      max_tokens: 300,
    });
    
    return {
      result: {
        answer: response.response.trim(),
        sources: results.matches.slice(0, 5).map((m, i) => ({
          id: m.id,
          headline: articles.results[i]?.headline || 'Unknown',
          relevance: m.score,
        })),
        confidence: results.matches[0]?.score || 0,
      },
      confidence: results.matches[0]?.score || 0,
      method: 'llm',
      costMicros: 100,
      latencyMs: Date.now() - startTime,
    };
  }
  
  private async getEmbedding(text: string): Promise<number[]> {
    const result = await this.ctx.ai.run('@cf/baai/bge-base-en-v1.5', {
      text: [text],
    });
    return result.data[0];
  }
}

Monitoring Agents

Trend Detection Agent

Identifies emerging trends from article velocity.

// src/agents/trend-detector.ts

interface TrendSignal {
  type: 'keyword_spike' | 'entity_surge' | 'topic_emergence';
  target: string;
  baselineValue: number;
  currentValue: number;
  deltaPct: number;
  velocity: number;
  confidence: number;
}

class TrendDetectorAgent extends BaseAgent<{ windowHours: number }, TrendSignal[]> {
  name = 'trend-detector';
  version = '1.0.0';
  
  async process(input: AgentInput<{ windowHours: number }>): Promise<AgentOutput<TrendSignal[]>> {
    const startTime = Date.now();
    const { windowHours } = input.data;
    
    // Get keyword velocity data
    const velocityData = await this.ctx.db.prepare(`
      SELECT 
        keyword,
        SUM(article_count) as current_count,
        AVG(article_count) as avg_count
      FROM keyword_velocity
      WHERE hour > datetime('now', '-${windowHours} hours')
      GROUP BY keyword
      HAVING current_count > 5
      ORDER BY current_count DESC
      LIMIT 100
    `).all();
    
    // Get baseline (previous equivalent period)
    const baselineData = await this.ctx.db.prepare(`
      SELECT 
        keyword,
        SUM(article_count) as baseline_count
      FROM keyword_velocity
      WHERE hour BETWEEN datetime('now', '-${windowHours * 2} hours') 
                      AND datetime('now', '-${windowHours} hours')
      GROUP BY keyword
    `).all();
    
    const baselineMap = new Map(
      baselineData.results.map(r => [r.keyword, r.baseline_count])
    );
    
    // Calculate trends
    const signals: TrendSignal[] = [];
    
    for (const row of velocityData.results) {
      const baseline = baselineMap.get(row.keyword) || row.avg_count;
      const delta = baseline > 0 ? (row.current_count - baseline) / baseline : 0;
      
      if (delta > 0.5) { // 50% increase
        signals.push({
          type: 'keyword_spike',
          target: row.keyword,
          baselineValue: baseline,
          currentValue: row.current_count,
          deltaPct: delta * 100,
          velocity: row.current_count / windowHours,
          confidence: Math.min(delta, 1),
        });
      }
    }
    
    return {
      result: signals.sort((a, b) => b.deltaPct - a.deltaPct).slice(0, 20),
      confidence: 0.8,
      method: 'rules',
      costMicros: 0,
      latencyMs: Date.now() - startTime,
    };
  }
}

Learning Agents

Feedback Learner

Incorporates user feedback to improve classification.

// src/agents/feedback-learner.ts

interface FeedbackInput {
  articleId: string;
  originalLabels: Record<string, string[]>;
  correctedLabels: Record<string, string[]>;
  feedbackType: 'customer' | 'admin' | 'heuristic';
}

class FeedbackLearnerAgent extends BaseAgent<FeedbackInput, { updated: boolean }> {
  name = 'feedback-learner';
  version = '1.0.0';
  
  async process(input: AgentInput<FeedbackInput>): Promise<AgentOutput<{ updated: boolean }>> {
    const startTime = Date.now();
    const { articleId, originalLabels, correctedLabels, feedbackType } = input.data;
    
    // 1. Store feedback for analysis
    await this.ctx.db.prepare(`
      INSERT INTO classification_feedback 
      (id, article_id, original_labels, corrected_labels, reviewer, incorporated)
      VALUES (?, ?, ?, ?, ?, 0)
    `).bind(
      crypto.randomUUID(),
      articleId,
      JSON.stringify(originalLabels),
      JSON.stringify(correctedLabels),
      feedbackType
    ).run();
    
    // 2. Check if we have enough feedback to update taxonomy centroids
    const feedbackCount = await this.ctx.db.prepare(`
      SELECT COUNT(*) as count FROM classification_feedback
      WHERE incorporated = 0
    `).first();
    
    if (feedbackCount.count >= 100) {
      // Trigger centroid recalculation (async)
      await this.ctx.kv.put('taxonomy_recalc_needed', 'true');
    }
    
    // 3. For admin corrections, update immediately
    if (feedbackType === 'admin') {
      await this.updateArticleClassification(articleId, correctedLabels);
    }
    
    return {
      result: { updated: feedbackType === 'admin' },
      confidence: 1,
      method: 'rules',
      costMicros: 0,
      latencyMs: Date.now() - startTime,
    };
  }
  
  private async updateArticleClassification(
    articleId: string,
    labels: Record<string, string[]>
  ) {
    await this.ctx.db.prepare(`
      UPDATE article_classifications
      SET topics = ?, industries = ?, tactics = ?, risks = ?,
          classified_by = 'human', needs_review = 0
      WHERE article_id = ?
    `).bind(
      JSON.stringify(labels.topics || []),
      JSON.stringify(labels.industries || []),
      JSON.stringify(labels.tactics || []),
      JSON.stringify(labels.risks || []),
      articleId
    ).run();
  }
}

Orchestration

Agent Pipeline

// src/agents/pipeline.ts

interface PipelineConfig {
  stages: Array<{
    agent: string;
    input: (ctx: PipelineContext) => any;
    output: (ctx: PipelineContext, result: any) => void;
    condition?: (ctx: PipelineContext) => boolean;
  }>;
}

interface PipelineContext {
  articleId: string;
  article: Article;
  results: Record<string, any>;
  totalCost: number;
}

async function runPipeline(
  config: PipelineConfig,
  initialContext: Partial<PipelineContext>,
  agentCtx: AgentContext
): Promise<PipelineContext> {
  const ctx: PipelineContext = {
    articleId: initialContext.articleId!,
    article: initialContext.article!,
    results: {},
    totalCost: 0,
  };
  
  for (const stage of config.stages) {
    // Check condition
    if (stage.condition && !stage.condition(ctx)) {
      continue;
    }
    
    // Get agent
    const agent = getAgent(stage.agent);
    
    // Prepare input
    const input = stage.input(ctx);
    
    // Run agent
    const output = await agent.process({ data: input });
    
    // Store output
    stage.output(ctx, output.result);
    ctx.totalCost += output.costMicros;
    ctx.results[stage.agent] = output;
  }
  
  return ctx;
}

// Article processing pipeline
const articlePipeline: PipelineConfig = {
  stages: [
    {
      agent: 'summarizer',
      input: (ctx) => ({ headline: ctx.article.headline, body: ctx.article.body_text }),
      output: (ctx, result) => { ctx.article.summary = result; },
    },
    {
      agent: 'entity-extractor',
      input: (ctx) => ({ text: ctx.article.body_text }),
      output: (ctx, result) => { ctx.results.entities = result; },
    },
    {
      agent: 'location-extractor',
      input: (ctx) => ({ text: ctx.article.body_text }),
      output: (ctx, result) => { ctx.results.locations = result; },
    },
    {
      agent: 'classifier',
      input: (ctx) => ({
        articleId: ctx.articleId,
        headline: ctx.article.headline,
        body: ctx.article.body_text,
      }),
      output: (ctx, result) => { ctx.results.classification = result; },
    },
    {
      agent: 'sentiment',
      input: (ctx) => ({ text: ctx.article.body_text }),
      output: (ctx, result) => { ctx.results.sentiment = result; },
    },
  ],
};

Prompt Engineering

Prompt Templates

// src/agents/prompts.ts

export const PROMPTS = {
  classification: {
    system: `You are a news classifier. Analyze articles and assign relevant labels from the provided taxonomy. Be precise and only assign labels with high confidence.`,
    
    user: (headline: string, body: string, taxonomy: string[]) => `
Classify this news article using ONLY labels from this taxonomy:
${taxonomy.join(', ')}

Headline: ${headline}

Article (truncated):
${body.slice(0, 2500)}

Return JSON with this structure:
{
  "labels": [{"label": "...", "confidence": 0.0-1.0}],
  "reasoning": "brief explanation"
}`,
  },
  
  summary: {
    system: `You are a news summarizer. Create concise, factual summaries.`,
    
    user: (headline: string, body: string) => `
Summarize this article in 2-3 sentences. Focus on the key facts.

Headline: ${headline}

Article:
${body.slice(0, 3000)}

Summary:`,
  },
  
  qa: {
    system: `You are a news analyst. Answer questions based only on the provided context. If the answer cannot be determined, say "I cannot determine this from the available articles."`,
    
    user: (question: string, context: string) => `
Context:
${context}

Question: ${question}

Answer:`,
  },
  
  briefing: {
    system: `You are a news briefing writer. Create executive-style summaries.`,
    
    sectionPrompt: (topic: string, articles: string[]) => `
Write a 2-3 sentence summary of these ${topic} news stories:

${articles.map((a, i) => `${i + 1}. ${a}`).join('\n')}

Summary:`,
  },
};

Cost Optimization

Budget Management

// src/agents/cost-manager.ts

class CostManager {
  constructor(
    private db: D1Database,
    private kv: KVNamespace
  ) {}
  
  async checkBudget(service: string): Promise<{ allowed: boolean; remaining: number }> {
    const budgetKey = `budget:${service}:daily`;
    const spentKey = `spent:${service}:${this.getTodayKey()}`;
    
    const [budget, spent] = await Promise.all([
      this.kv.get(budgetKey),
      this.kv.get(spentKey),
    ]);
    
    const budgetMicros = budget ? parseInt(budget) : 100000000; // $100 default
    const spentMicros = spent ? parseInt(spent) : 0;
    const remaining = budgetMicros - spentMicros;
    
    return {
      allowed: remaining > 0,
      remaining,
    };
  }
  
  async recordSpend(service: string, costMicros: number): Promise<void> {
    const spentKey = `spent:${service}:${this.getTodayKey()}`;
    const current = await this.kv.get(spentKey);
    const newSpent = (current ? parseInt(current) : 0) + costMicros;
    
    await this.kv.put(spentKey, String(newSpent), {
      expirationTtl: 86400 * 2, // 2 days
    });
  }
  
  async getOptimalMethod(
    service: string,
    options: { cheap: () => Promise<any>; expensive: () => Promise<any> }
  ): Promise<any> {
    const { allowed, remaining } = await this.checkBudget(service);
    
    if (!allowed || remaining < 1000) {
      return options.cheap();
    }
    
    return options.expensive();
  }
  
  private getTodayKey(): string {
    return new Date().toISOString().split('T')[0];
  }
}

Caching Strategy

// src/agents/cache.ts

class AgentCache {
  constructor(private kv: KVNamespace) {}
  
  async getOrCompute<T>(
    key: string,
    compute: () => Promise<T>,
    ttlSeconds: number = 3600
  ): Promise<T> {
    const cached = await this.kv.get(key);
    if (cached) {
      return JSON.parse(cached);
    }
    
    const result = await compute();
    await this.kv.put(key, JSON.stringify(result), {
      expirationTtl: ttlSeconds,
    });
    
    return result;
  }
  
  // Cache embeddings (stable, long TTL)
  async cacheEmbedding(text: string, embedding: number[]): Promise<void> {
    const hash = await this.hashText(text);
    await this.kv.put(`emb:${hash}`, JSON.stringify(embedding), {
      expirationTtl: 86400 * 30, // 30 days
    });
  }
  
  async getEmbedding(text: string): Promise<number[] | null> {
    const hash = await this.hashText(text);
    const cached = await this.kv.get(`emb:${hash}`);
    return cached ? JSON.parse(cached) : null;
  }
  
  private async hashText(text: string): Promise<string> {
    const encoder = new TextEncoder();
    const data = encoder.encode(text.slice(0, 1000));
    const hashBuffer = await crypto.subtle.digest('SHA-256', data);
    const hashArray = Array.from(new Uint8Array(hashBuffer));
    return hashArray.map(b => b.toString(16).padStart(2, '0')).join('').slice(0, 16);
  }
}

Last updated: 2024-01-15

Table of Contents​

Overview​

Agent Philosophy​

Agent Types​

Agent Architecture​

Base Agent Interface​

Agent Registry​

Classification Agents​

Topic Classifier​

Sentiment Analyzer​

Extraction Agents​

Entity Extractor​

Location Extractor​

Generation Agents​

Summarizer Agent​

Briefing Agent​

Q&A Agent​

Monitoring Agents​

Trend Detection Agent​

Learning Agents​

Feedback Learner​

Orchestration​

Agent Pipeline​

Prompt Engineering​

Prompt Templates​

Cost Optimization​

Budget Management​

Caching Strategy​

Table of Contents