Skip to main content

AI Agents and Automation

Specification for AI-powered agents, automation workflows, and intelligent processing


Table of Contents

  1. Overview
  2. Agent Architecture
  3. Classification Agents
  4. Extraction Agents
  5. Generation Agents
  6. Monitoring Agents
  7. Learning Agents
  8. Orchestration
  9. Prompt Engineering
  10. Cost Optimization

Overview

Agent Philosophy

Agents in the platform follow a tiered approach:

  1. Rules First - Fast, free, deterministic
  2. Vectors Second - Fast, cheap, semantic
  3. LLM Third - Slow, expensive, intelligent

Each agent is designed to minimize LLM calls while maximizing accuracy.

Agent Types

AgentPurposeTriggerCost
ClassifierCategorize articlesNew articleLow-High
ExtractorExtract entities/claimsNew articleMedium
SummarizerGenerate summariesNew articleMedium
ClustererGroup into storiesNew articleLow
MatcherMatch to profilesNew articleLow
BrieferGenerate briefingsScheduledHigh
AnswererQ&A over articlesAPI requestHigh
ReviewerQuality controlLow confidenceMedium
LearnerImprove modelsFeedbackLow

Agent Architecture

Base Agent Interface

// src/agents/base.ts

interface AgentContext {
db: D1Database;
vectorize: VectorizeIndex;
ai: Ai;
kv: KVNamespace;
costTracker: CostTracker;
}

interface AgentInput<T> {
data: T;
options?: Record<string, any>;
priority?: 'high' | 'normal' | 'low';
}

interface AgentOutput<T> {
result: T;
confidence: number;
method: 'rules' | 'vector' | 'llm' | 'hybrid';
costMicros: number;
latencyMs: number;
metadata?: Record<string, any>;
}

abstract class BaseAgent<TInput, TOutput> {
constructor(protected ctx: AgentContext) {}

abstract name: string;
abstract version: string;

abstract process(input: AgentInput<TInput>): Promise<AgentOutput<TOutput>>;

protected async trackCost(service: string, operation: string, costMicros: number) {
await this.ctx.costTracker.track({
service,
operation,
cost_micros: costMicros,
agent: this.name,
});
}

protected async shouldUseLLM(vectorConfidence: number): Promise<boolean> {
// Check cost budget
const budget = await this.ctx.kv.get('llm_daily_budget');
const spent = await this.ctx.kv.get('llm_daily_spent');

if (spent && budget && parseInt(spent) >= parseInt(budget)) {
return false; // Budget exhausted
}

// Check confidence threshold
const threshold = await this.ctx.kv.get('llm_confidence_threshold') || '0.7';
return vectorConfidence < parseFloat(threshold);
}
}

Agent Registry

// src/agents/registry.ts

const agents = new Map<string, BaseAgent<any, any>>();

export function registerAgent(agent: BaseAgent<any, any>) {
agents.set(agent.name, agent);
}

export function getAgent<T extends BaseAgent<any, any>>(name: string): T {
const agent = agents.get(name);
if (!agent) {
throw new Error(`Agent not found: ${name}`);
}
return agent as T;
}

// Register all agents on startup
export function initializeAgents(ctx: AgentContext) {
registerAgent(new ClassifierAgent(ctx));
registerAgent(new EntityExtractorAgent(ctx));
registerAgent(new SummarizerAgent(ctx));
registerAgent(new ClusterAgent(ctx));
registerAgent(new MatcherAgent(ctx));
registerAgent(new BrieferAgent(ctx));
registerAgent(new AnswererAgent(ctx));
}

Classification Agents

Topic Classifier

Assigns topic labels from taxonomy to articles.

// src/agents/classifier.ts

interface ClassifierInput {
articleId: string;
headline: string;
body: string;
source?: string;
}

interface ClassifierOutput {
topics: LabelScore[];
industries: LabelScore[];
tactics: LabelScore[];
risks: LabelScore[];
sentiment: number;
politicalLean?: number;
}

class ClassifierAgent extends BaseAgent<ClassifierInput, ClassifierOutput> {
name = 'classifier';
version = '1.0.0';

async process(input: AgentInput<ClassifierInput>): Promise<AgentOutput<ClassifierOutput>> {
const startTime = Date.now();
let method: 'rules' | 'vector' | 'llm' | 'hybrid' = 'rules';
let totalCost = 0;

const { headline, body } = input.data;
const text = `${headline}\n\n${body}`;

// Stage 1: Rule-based classification (free, fast)
const ruleResults = await this.classifyByRules(text);

if (ruleResults.confidence >= 0.8) {
return {
result: ruleResults.output,
confidence: ruleResults.confidence,
method: 'rules',
costMicros: 0,
latencyMs: Date.now() - startTime,
};
}

// Stage 2: Vector similarity (cheap, fast)
const embedding = await this.getEmbedding(text);
totalCost += 20; // Embedding cost

const vectorResults = await this.classifyByVector(embedding);
method = 'vector';

if (vectorResults.confidence >= 0.75) {
// Merge rule and vector results
const merged = this.mergeResults(ruleResults.output, vectorResults.output);
return {
result: merged,
confidence: vectorResults.confidence,
method: 'hybrid',
costMicros: totalCost,
latencyMs: Date.now() - startTime,
};
}

// Stage 3: LLM classification (expensive, slow)
if (await this.shouldUseLLM(vectorResults.confidence)) {
const llmResults = await this.classifyByLLM(headline, body);
totalCost += llmResults.cost;
method = 'llm';

const merged = this.mergeResults(
ruleResults.output,
vectorResults.output,
llmResults.output
);

return {
result: merged,
confidence: Math.max(vectorResults.confidence, llmResults.confidence),
method: 'hybrid',
costMicros: totalCost,
latencyMs: Date.now() - startTime,
};
}

// Fallback to vector results with flag for review
return {
result: vectorResults.output,
confidence: vectorResults.confidence,
method,
costMicros: totalCost,
latencyMs: Date.now() - startTime,
metadata: { needsReview: true },
};
}

private async classifyByRules(text: string): Promise<{
output: ClassifierOutput;
confidence: number;
}> {
const labels = await this.ctx.db.prepare(
'SELECT * FROM taxonomy_labels WHERE is_active = 1'
).all();

const matches: Record<string, LabelScore[]> = {
topics: [],
industries: [],
tactics: [],
risks: [],
};

for (const label of labels.results) {
const patterns = JSON.parse(label.keyword_patterns || '[]');
let matchScore = 0;

for (const pattern of patterns) {
const regex = new RegExp(pattern, 'gi');
const matchCount = (text.match(regex) || []).length;
matchScore += matchCount * 0.1;
}

if (matchScore > 0) {
const category = label.category + 's'; // topics, industries, etc.
if (matches[category]) {
matches[category].push({
id: label.id,
label: label.label,
confidence: Math.min(matchScore, 0.95),
});
}
}
}

// Sort by confidence and take top results
for (const category of Object.keys(matches)) {
matches[category] = matches[category]
.sort((a, b) => b.confidence - a.confidence)
.slice(0, 5);
}

const maxConfidence = Math.max(
...Object.values(matches).flatMap(m => m.map(l => l.confidence)),
0
);

return {
output: {
topics: matches.topics,
industries: matches.industries,
tactics: matches.tactics,
risks: matches.risks,
sentiment: 0, // Rules don't detect sentiment
},
confidence: maxConfidence,
};
}

private async classifyByVector(embedding: number[]): Promise<{
output: ClassifierOutput;
confidence: number;
}> {
const results = await this.ctx.vectorize.query(embedding, {
topK: 20,
namespace: 'taxonomy',
});

const matches: Record<string, LabelScore[]> = {
topics: [],
industries: [],
tactics: [],
risks: [],
};

for (const match of results.matches) {
const category = match.metadata?.category + 's';
if (matches[category]) {
matches[category].push({
id: match.id,
label: match.metadata?.label,
confidence: match.score,
});
}
}

const maxConfidence = results.matches[0]?.score || 0;

return {
output: {
topics: matches.topics.slice(0, 5),
industries: matches.industries.slice(0, 3),
tactics: matches.tactics.slice(0, 2),
risks: matches.risks.slice(0, 3),
sentiment: 0,
},
confidence: maxConfidence,
};
}

private async classifyByLLM(headline: string, body: string): Promise<{
output: ClassifierOutput;
confidence: number;
cost: number;
}> {
const taxonomy = await this.getTaxonomyLabels();

const prompt = `Classify this news article.

Available labels:
Topics: ${taxonomy.topics.join(', ')}
Industries: ${taxonomy.industries.join(', ')}
Tactics: ${taxonomy.tactics.join(', ')}
Risks: ${taxonomy.risks.join(', ')}

Article headline: ${headline}

Article body (truncated):
${body.slice(0, 2000)}

Respond with JSON only:
{
"topics": [{"label": "...", "confidence": 0.0-1.0}],
"industries": [{"label": "...", "confidence": 0.0-1.0}],
"tactics": [{"label": "...", "confidence": 0.0-1.0}],
"risks": [{"label": "...", "confidence": 0.0-1.0}],
"sentiment": -1.0 to 1.0,
"political_lean": -1.0 to 1.0 or null
}`;

const response = await this.ctx.ai.run('@cf/meta/llama-3-8b-instruct', {
messages: [
{ role: 'system', content: 'You are a news classifier. Respond only with valid JSON.' },
{ role: 'user', content: prompt },
],
max_tokens: 500,
});

try {
const parsed = JSON.parse(response.response);
return {
output: {
topics: parsed.topics || [],
industries: parsed.industries || [],
tactics: parsed.tactics || [],
risks: parsed.risks || [],
sentiment: parsed.sentiment || 0,
politicalLean: parsed.political_lean,
},
confidence: 0.85, // LLM outputs are generally high confidence
cost: 50, // Estimate for Workers AI
};
} catch (e) {
return {
output: {
topics: [],
industries: [],
tactics: [],
risks: [],
sentiment: 0,
},
confidence: 0,
cost: 50,
};
}
}

private mergeResults(...results: ClassifierOutput[]): ClassifierOutput {
// Weighted merge of multiple classification results
const merged: ClassifierOutput = {
topics: [],
industries: [],
tactics: [],
risks: [],
sentiment: 0,
};

const weights = [0.3, 0.3, 0.4]; // rules, vector, llm

// Merge each category
for (const category of ['topics', 'industries', 'tactics', 'risks'] as const) {
const labelScores = new Map<string, { label: string; totalScore: number }>();

results.forEach((result, i) => {
const weight = weights[i] || 0.33;
for (const item of result[category]) {
const existing = labelScores.get(item.label) || { label: item.label, totalScore: 0 };
existing.totalScore += item.confidence * weight;
labelScores.set(item.label, existing);
}
});

merged[category] = Array.from(labelScores.values())
.map(({ label, totalScore }) => ({
id: label.toLowerCase().replace(/\s+/g, '-'),
label,
confidence: Math.min(totalScore, 1),
}))
.sort((a, b) => b.confidence - a.confidence)
.slice(0, 5);
}

// Average sentiment
const sentiments = results.map(r => r.sentiment).filter(s => s !== 0);
merged.sentiment = sentiments.length > 0
? sentiments.reduce((a, b) => a + b, 0) / sentiments.length
: 0;

return merged;
}

private async getEmbedding(text: string): Promise<number[]> {
const result = await this.ctx.ai.run('@cf/baai/bge-base-en-v1.5', {
text: [text.slice(0, 8000)],
});
return result.data[0];
}

private async getTaxonomyLabels(): Promise<Record<string, string[]>> {
const labels = await this.ctx.db.prepare(
'SELECT category, label FROM taxonomy_labels WHERE is_active = 1'
).all();

const grouped: Record<string, string[]> = {
topics: [],
industries: [],
tactics: [],
risks: [],
};

for (const row of labels.results) {
const key = row.category + 's';
if (grouped[key]) {
grouped[key].push(row.label);
}
}

return grouped;
}
}

Sentiment Analyzer

Specialized agent for sentiment analysis.

// src/agents/sentiment.ts

class SentimentAgent extends BaseAgent<{ text: string }, number> {
name = 'sentiment';
version = '1.0.0';

private positivePatterns = [
/\b(breakthrough|triumph|exceeds|soars|surges)\b/gi,
/\b(exceptional|outstanding|remarkable)\b/gi,
/\b(growth|gains|success|wins|profit)\b/gi,
];

private negativePatterns = [
/\b(crisis|disaster|collapse|scandal)\b/gi,
/\b(fraud|criminal|devastating)\b/gi,
/\b(decline|drop|fall|plunge|slump)\b/gi,
];

async process(input: AgentInput<{ text: string }>): Promise<AgentOutput<number>> {
const startTime = Date.now();
const { text } = input.data;

// Rule-based sentiment
let score = 0;
let matchCount = 0;

for (const pattern of this.positivePatterns) {
const matches = text.match(pattern) || [];
score += matches.length * 0.3;
matchCount += matches.length;
}

for (const pattern of this.negativePatterns) {
const matches = text.match(pattern) || [];
score -= matches.length * 0.3;
matchCount += matches.length;
}

// Normalize to -1 to 1
const normalized = Math.max(-1, Math.min(1, score));
const confidence = Math.min(matchCount * 0.2, 0.9);

return {
result: normalized,
confidence,
method: 'rules',
costMicros: 0,
latencyMs: Date.now() - startTime,
};
}
}

Extraction Agents

Entity Extractor

Extracts named entities from article text.

// src/agents/entity-extractor.ts

interface Entity {
name: string;
type: 'person' | 'org' | 'location' | 'product' | 'event';
mentions: number;
sentiment?: number;
isPrimary: boolean;
}

class EntityExtractorAgent extends BaseAgent<{ text: string }, Entity[]> {
name = 'entity-extractor';
version = '1.0.0';

async process(input: AgentInput<{ text: string }>): Promise<AgentOutput<Entity[]>> {
const startTime = Date.now();
const { text } = input.data;

// Use Workers AI NER model
const nerResult = await this.ctx.ai.run('@cf/huggingface/distilbert-ner', {
text: text.slice(0, 5000),
});

// Group and deduplicate entities
const entityMap = new Map<string, Entity>();

for (const entity of nerResult) {
const normalized = this.normalizeEntityName(entity.word);
const type = this.mapEntityType(entity.entity_group);

if (entityMap.has(normalized)) {
const existing = entityMap.get(normalized)!;
existing.mentions++;
} else {
entityMap.set(normalized, {
name: normalized,
type,
mentions: 1,
isPrimary: false,
});
}
}

// Sort by mentions and mark top entities as primary
const entities = Array.from(entityMap.values())
.sort((a, b) => b.mentions - a.mentions);

if (entities.length > 0) {
entities[0].isPrimary = true;
if (entities.length > 1 && entities[1].mentions >= entities[0].mentions * 0.7) {
entities[1].isPrimary = true;
}
}

return {
result: entities.slice(0, 20),
confidence: 0.8,
method: 'llm', // NER model counts as LLM
costMicros: 20,
latencyMs: Date.now() - startTime,
};
}

private normalizeEntityName(name: string): string {
return name
.replace(/^(the|a|an)\s+/i, '')
.replace(/['']/g, "'")
.trim();
}

private mapEntityType(nerType: string): Entity['type'] {
const mapping: Record<string, Entity['type']> = {
'PER': 'person',
'ORG': 'org',
'LOC': 'location',
'MISC': 'product',
};
return mapping[nerType] || 'org';
}
}

Location Extractor

Extracts and geocodes locations from text.

// src/agents/location-extractor.ts

interface LocationMatch {
locationId: string;
name: string;
type: string;
mentionCount: number;
isPrimary: boolean;
confidence: number;
}

class LocationExtractorAgent extends BaseAgent<{ text: string }, LocationMatch[]> {
name = 'location-extractor';
version = '1.0.0';

async process(input: AgentInput<{ text: string }>): Promise<AgentOutput<LocationMatch[]>> {
const startTime = Date.now();
const { text } = input.data;

// First, get location entities from NER
const nerResult = await this.ctx.ai.run('@cf/huggingface/distilbert-ner', {
text: text.slice(0, 5000),
});

const locationMentions = nerResult
.filter((e: any) => e.entity_group === 'LOC')
.map((e: any) => e.word);

// Match against our location database using vector search
const matches: LocationMatch[] = [];

for (const mention of [...new Set(locationMentions)]) {
const embedding = await this.getEmbedding(mention);
const results = await this.ctx.vectorize.query(embedding, {
topK: 3,
namespace: 'locations',
filter: { type: { $in: ['city', 'state', 'country'] } },
});

if (results.matches[0]?.score > 0.8) {
const match = results.matches[0];
const mentionCount = locationMentions.filter(m => m === mention).length;

matches.push({
locationId: match.id,
name: match.metadata?.name || mention,
type: match.metadata?.type || 'unknown',
mentionCount,
isPrimary: false,
confidence: match.score,
});
}
}

// Sort by mention count and mark primary
matches.sort((a, b) => b.mentionCount - a.mentionCount);
if (matches.length > 0) {
matches[0].isPrimary = true;
}

return {
result: matches,
confidence: matches[0]?.confidence || 0,
method: 'hybrid',
costMicros: 30,
latencyMs: Date.now() - startTime,
};
}

private async getEmbedding(text: string): Promise<number[]> {
const result = await this.ctx.ai.run('@cf/baai/bge-base-en-v1.5', {
text: [text],
});
return result.data[0];
}
}

Generation Agents

Summarizer Agent

Generates article summaries.

// src/agents/summarizer.ts

class SummarizerAgent extends BaseAgent<{ headline: string; body: string }, string> {
name = 'summarizer';
version = '1.0.0';

async process(input: AgentInput<{ headline: string; body: string }>): Promise<AgentOutput<string>> {
const startTime = Date.now();
const { headline, body } = input.data;

// For short articles, headline + first paragraph is sufficient
if (body.length < 500) {
const firstPara = body.split('\n\n')[0];
return {
result: firstPara,
confidence: 0.9,
method: 'rules',
costMicros: 0,
latencyMs: Date.now() - startTime,
};
}

// Use LLM for longer articles
const prompt = `Summarize this news article in 2-3 sentences. Be concise and factual.

Headline: ${headline}

Article:
${body.slice(0, 3000)}

Summary:`;

const response = await this.ctx.ai.run('@cf/meta/llama-3-8b-instruct', {
messages: [
{ role: 'user', content: prompt },
],
max_tokens: 150,
});

return {
result: response.response.trim(),
confidence: 0.85,
method: 'llm',
costMicros: 50,
latencyMs: Date.now() - startTime,
};
}
}

Briefing Agent

Generates daily/weekly briefings.

// src/agents/briefer.ts

interface BriefingInput {
profileId: string;
period: 'daily' | 'weekly';
articles: Array<{
id: string;
headline: string;
summary: string;
topics: string[];
relevanceScore: number;
}>;
}

interface BriefingOutput {
title: string;
summary: string;
sections: Array<{
title: string;
content: string;
articleIds: string[];
}>;
}

class BrieferAgent extends BaseAgent<BriefingInput, BriefingOutput> {
name = 'briefer';
version = '1.0.0';

async process(input: AgentInput<BriefingInput>): Promise<AgentOutput<BriefingOutput>> {
const startTime = Date.now();
const { period, articles } = input.data;

// Group articles by topic
const topicGroups = new Map<string, typeof articles>();
for (const article of articles) {
const primaryTopic = article.topics[0] || 'General';
if (!topicGroups.has(primaryTopic)) {
topicGroups.set(primaryTopic, []);
}
topicGroups.get(primaryTopic)!.push(article);
}

// Generate sections for each topic
const sections: BriefingOutput['sections'] = [];
let totalCost = 0;

for (const [topic, topicArticles] of topicGroups) {
if (topicArticles.length < 2) continue;

const prompt = `Write a brief 2-3 sentence summary of these ${topic} news stories:

${topicArticles.slice(0, 5).map(a => `- ${a.headline}: ${a.summary}`).join('\n')}

Summary:`;

const response = await this.ctx.ai.run('@cf/meta/llama-3-8b-instruct', {
messages: [{ role: 'user', content: prompt }],
max_tokens: 150,
});

totalCost += 50;

sections.push({
title: topic,
content: response.response.trim(),
articleIds: topicArticles.slice(0, 5).map(a => a.id),
});
}

// Generate overall summary
const overallPrompt = `Write a 2-sentence executive summary of this ${period} news briefing:

Topics covered:
${sections.map(s => `- ${s.title}: ${s.content.slice(0, 100)}...`).join('\n')}

Executive Summary:`;

const overallResponse = await this.ctx.ai.run('@cf/meta/llama-3-8b-instruct', {
messages: [{ role: 'user', content: overallPrompt }],
max_tokens: 100,
});

totalCost += 50;

return {
result: {
title: `${period === 'daily' ? 'Daily' : 'Weekly'} Briefing`,
summary: overallResponse.response.trim(),
sections,
},
confidence: 0.85,
method: 'llm',
costMicros: totalCost,
latencyMs: Date.now() - startTime,
};
}
}

Q&A Agent

Answers questions about articles using RAG.

// src/agents/answerer.ts

interface QAInput {
question: string;
profileId?: string;
contextDays?: number;
}

interface QAOutput {
answer: string;
sources: Array<{ id: string; headline: string; relevance: number }>;
confidence: number;
}

class AnswererAgent extends BaseAgent<QAInput, QAOutput> {
name = 'answerer';
version = '1.0.0';

async process(input: AgentInput<QAInput>): Promise<AgentOutput<QAOutput>> {
const startTime = Date.now();
const { question, profileId, contextDays = 7 } = input.data;

// 1. Embed the question
const questionEmbedding = await this.getEmbedding(question);

// 2. Find relevant articles
const results = await this.ctx.vectorize.query(questionEmbedding, {
topK: 10,
namespace: 'articles',
});

// 3. Fetch article content
const articleIds = results.matches.map(m => m.id);
const articles = await this.ctx.db.prepare(`
SELECT id, headline, summary, body_text
FROM articles
WHERE id IN (${articleIds.map(() => '?').join(',')})
AND published_at > datetime('now', '-${contextDays} days')
`).bind(...articleIds).all();

// 4. Build context
const context = articles.results
.slice(0, 5)
.map(a => `[${a.headline}]\n${a.summary || a.body_text?.slice(0, 500)}`)
.join('\n\n---\n\n');

// 5. Generate answer
const prompt = `Based on these recent news articles, answer the following question.

Context:
${context}

Question: ${question}

Provide a direct, factual answer based only on the information in the articles. If the answer cannot be determined from the context, say so.

Answer:`;

const response = await this.ctx.ai.run('@cf/meta/llama-3-8b-instruct', {
messages: [
{ role: 'system', content: 'You are a news analyst. Answer questions based on provided context.' },
{ role: 'user', content: prompt },
],
max_tokens: 300,
});

return {
result: {
answer: response.response.trim(),
sources: results.matches.slice(0, 5).map((m, i) => ({
id: m.id,
headline: articles.results[i]?.headline || 'Unknown',
relevance: m.score,
})),
confidence: results.matches[0]?.score || 0,
},
confidence: results.matches[0]?.score || 0,
method: 'llm',
costMicros: 100,
latencyMs: Date.now() - startTime,
};
}

private async getEmbedding(text: string): Promise<number[]> {
const result = await this.ctx.ai.run('@cf/baai/bge-base-en-v1.5', {
text: [text],
});
return result.data[0];
}
}

Monitoring Agents

Trend Detection Agent

Identifies emerging trends from article velocity.

// src/agents/trend-detector.ts

interface TrendSignal {
type: 'keyword_spike' | 'entity_surge' | 'topic_emergence';
target: string;
baselineValue: number;
currentValue: number;
deltaPct: number;
velocity: number;
confidence: number;
}

class TrendDetectorAgent extends BaseAgent<{ windowHours: number }, TrendSignal[]> {
name = 'trend-detector';
version = '1.0.0';

async process(input: AgentInput<{ windowHours: number }>): Promise<AgentOutput<TrendSignal[]>> {
const startTime = Date.now();
const { windowHours } = input.data;

// Get keyword velocity data
const velocityData = await this.ctx.db.prepare(`
SELECT
keyword,
SUM(article_count) as current_count,
AVG(article_count) as avg_count
FROM keyword_velocity
WHERE hour > datetime('now', '-${windowHours} hours')
GROUP BY keyword
HAVING current_count > 5
ORDER BY current_count DESC
LIMIT 100
`).all();

// Get baseline (previous equivalent period)
const baselineData = await this.ctx.db.prepare(`
SELECT
keyword,
SUM(article_count) as baseline_count
FROM keyword_velocity
WHERE hour BETWEEN datetime('now', '-${windowHours * 2} hours')
AND datetime('now', '-${windowHours} hours')
GROUP BY keyword
`).all();

const baselineMap = new Map(
baselineData.results.map(r => [r.keyword, r.baseline_count])
);

// Calculate trends
const signals: TrendSignal[] = [];

for (const row of velocityData.results) {
const baseline = baselineMap.get(row.keyword) || row.avg_count;
const delta = baseline > 0 ? (row.current_count - baseline) / baseline : 0;

if (delta > 0.5) { // 50% increase
signals.push({
type: 'keyword_spike',
target: row.keyword,
baselineValue: baseline,
currentValue: row.current_count,
deltaPct: delta * 100,
velocity: row.current_count / windowHours,
confidence: Math.min(delta, 1),
});
}
}

return {
result: signals.sort((a, b) => b.deltaPct - a.deltaPct).slice(0, 20),
confidence: 0.8,
method: 'rules',
costMicros: 0,
latencyMs: Date.now() - startTime,
};
}
}

Learning Agents

Feedback Learner

Incorporates user feedback to improve classification.

// src/agents/feedback-learner.ts

interface FeedbackInput {
articleId: string;
originalLabels: Record<string, string[]>;
correctedLabels: Record<string, string[]>;
feedbackType: 'customer' | 'admin' | 'heuristic';
}

class FeedbackLearnerAgent extends BaseAgent<FeedbackInput, { updated: boolean }> {
name = 'feedback-learner';
version = '1.0.0';

async process(input: AgentInput<FeedbackInput>): Promise<AgentOutput<{ updated: boolean }>> {
const startTime = Date.now();
const { articleId, originalLabels, correctedLabels, feedbackType } = input.data;

// 1. Store feedback for analysis
await this.ctx.db.prepare(`
INSERT INTO classification_feedback
(id, article_id, original_labels, corrected_labels, reviewer, incorporated)
VALUES (?, ?, ?, ?, ?, 0)
`).bind(
crypto.randomUUID(),
articleId,
JSON.stringify(originalLabels),
JSON.stringify(correctedLabels),
feedbackType
).run();

// 2. Check if we have enough feedback to update taxonomy centroids
const feedbackCount = await this.ctx.db.prepare(`
SELECT COUNT(*) as count FROM classification_feedback
WHERE incorporated = 0
`).first();

if (feedbackCount.count >= 100) {
// Trigger centroid recalculation (async)
await this.ctx.kv.put('taxonomy_recalc_needed', 'true');
}

// 3. For admin corrections, update immediately
if (feedbackType === 'admin') {
await this.updateArticleClassification(articleId, correctedLabels);
}

return {
result: { updated: feedbackType === 'admin' },
confidence: 1,
method: 'rules',
costMicros: 0,
latencyMs: Date.now() - startTime,
};
}

private async updateArticleClassification(
articleId: string,
labels: Record<string, string[]>
) {
await this.ctx.db.prepare(`
UPDATE article_classifications
SET topics = ?, industries = ?, tactics = ?, risks = ?,
classified_by = 'human', needs_review = 0
WHERE article_id = ?
`).bind(
JSON.stringify(labels.topics || []),
JSON.stringify(labels.industries || []),
JSON.stringify(labels.tactics || []),
JSON.stringify(labels.risks || []),
articleId
).run();
}
}

Orchestration

Agent Pipeline

// src/agents/pipeline.ts

interface PipelineConfig {
stages: Array<{
agent: string;
input: (ctx: PipelineContext) => any;
output: (ctx: PipelineContext, result: any) => void;
condition?: (ctx: PipelineContext) => boolean;
}>;
}

interface PipelineContext {
articleId: string;
article: Article;
results: Record<string, any>;
totalCost: number;
}

async function runPipeline(
config: PipelineConfig,
initialContext: Partial<PipelineContext>,
agentCtx: AgentContext
): Promise<PipelineContext> {
const ctx: PipelineContext = {
articleId: initialContext.articleId!,
article: initialContext.article!,
results: {},
totalCost: 0,
};

for (const stage of config.stages) {
// Check condition
if (stage.condition && !stage.condition(ctx)) {
continue;
}

// Get agent
const agent = getAgent(stage.agent);

// Prepare input
const input = stage.input(ctx);

// Run agent
const output = await agent.process({ data: input });

// Store output
stage.output(ctx, output.result);
ctx.totalCost += output.costMicros;
ctx.results[stage.agent] = output;
}

return ctx;
}

// Article processing pipeline
const articlePipeline: PipelineConfig = {
stages: [
{
agent: 'summarizer',
input: (ctx) => ({ headline: ctx.article.headline, body: ctx.article.body_text }),
output: (ctx, result) => { ctx.article.summary = result; },
},
{
agent: 'entity-extractor',
input: (ctx) => ({ text: ctx.article.body_text }),
output: (ctx, result) => { ctx.results.entities = result; },
},
{
agent: 'location-extractor',
input: (ctx) => ({ text: ctx.article.body_text }),
output: (ctx, result) => { ctx.results.locations = result; },
},
{
agent: 'classifier',
input: (ctx) => ({
articleId: ctx.articleId,
headline: ctx.article.headline,
body: ctx.article.body_text,
}),
output: (ctx, result) => { ctx.results.classification = result; },
},
{
agent: 'sentiment',
input: (ctx) => ({ text: ctx.article.body_text }),
output: (ctx, result) => { ctx.results.sentiment = result; },
},
],
};

Prompt Engineering

Prompt Templates

// src/agents/prompts.ts

export const PROMPTS = {
classification: {
system: `You are a news classifier. Analyze articles and assign relevant labels from the provided taxonomy. Be precise and only assign labels with high confidence.`,

user: (headline: string, body: string, taxonomy: string[]) => `
Classify this news article using ONLY labels from this taxonomy:
${taxonomy.join(', ')}

Headline: ${headline}

Article (truncated):
${body.slice(0, 2500)}

Return JSON with this structure:
{
"labels": [{"label": "...", "confidence": 0.0-1.0}],
"reasoning": "brief explanation"
}`,
},

summary: {
system: `You are a news summarizer. Create concise, factual summaries.`,

user: (headline: string, body: string) => `
Summarize this article in 2-3 sentences. Focus on the key facts.

Headline: ${headline}

Article:
${body.slice(0, 3000)}

Summary:`,
},

qa: {
system: `You are a news analyst. Answer questions based only on the provided context. If the answer cannot be determined, say "I cannot determine this from the available articles."`,

user: (question: string, context: string) => `
Context:
${context}

Question: ${question}

Answer:`,
},

briefing: {
system: `You are a news briefing writer. Create executive-style summaries.`,

sectionPrompt: (topic: string, articles: string[]) => `
Write a 2-3 sentence summary of these ${topic} news stories:

${articles.map((a, i) => `${i + 1}. ${a}`).join('\n')}

Summary:`,
},
};

Cost Optimization

Budget Management

// src/agents/cost-manager.ts

class CostManager {
constructor(
private db: D1Database,
private kv: KVNamespace
) {}

async checkBudget(service: string): Promise<{ allowed: boolean; remaining: number }> {
const budgetKey = `budget:${service}:daily`;
const spentKey = `spent:${service}:${this.getTodayKey()}`;

const [budget, spent] = await Promise.all([
this.kv.get(budgetKey),
this.kv.get(spentKey),
]);

const budgetMicros = budget ? parseInt(budget) : 100000000; // $100 default
const spentMicros = spent ? parseInt(spent) : 0;
const remaining = budgetMicros - spentMicros;

return {
allowed: remaining > 0,
remaining,
};
}

async recordSpend(service: string, costMicros: number): Promise<void> {
const spentKey = `spent:${service}:${this.getTodayKey()}`;
const current = await this.kv.get(spentKey);
const newSpent = (current ? parseInt(current) : 0) + costMicros;

await this.kv.put(spentKey, String(newSpent), {
expirationTtl: 86400 * 2, // 2 days
});
}

async getOptimalMethod(
service: string,
options: { cheap: () => Promise<any>; expensive: () => Promise<any> }
): Promise<any> {
const { allowed, remaining } = await this.checkBudget(service);

if (!allowed || remaining < 1000) {
return options.cheap();
}

return options.expensive();
}

private getTodayKey(): string {
return new Date().toISOString().split('T')[0];
}
}

Caching Strategy

// src/agents/cache.ts

class AgentCache {
constructor(private kv: KVNamespace) {}

async getOrCompute<T>(
key: string,
compute: () => Promise<T>,
ttlSeconds: number = 3600
): Promise<T> {
const cached = await this.kv.get(key);
if (cached) {
return JSON.parse(cached);
}

const result = await compute();
await this.kv.put(key, JSON.stringify(result), {
expirationTtl: ttlSeconds,
});

return result;
}

// Cache embeddings (stable, long TTL)
async cacheEmbedding(text: string, embedding: number[]): Promise<void> {
const hash = await this.hashText(text);
await this.kv.put(`emb:${hash}`, JSON.stringify(embedding), {
expirationTtl: 86400 * 30, // 30 days
});
}

async getEmbedding(text: string): Promise<number[] | null> {
const hash = await this.hashText(text);
const cached = await this.kv.get(`emb:${hash}`);
return cached ? JSON.parse(cached) : null;
}

private async hashText(text: string): Promise<string> {
const encoder = new TextEncoder();
const data = encoder.encode(text.slice(0, 1000));
const hashBuffer = await crypto.subtle.digest('SHA-256', data);
const hashArray = Array.from(new Uint8Array(hashBuffer));
return hashArray.map(b => b.toString(16).padStart(2, '0')).join('').slice(0, 16);
}
}

Last updated: 2024-01-15