Files
cloud-orchestrator/src/handlers/recommend.ts
kappa f6c35067f9 feat: add available_regions to recommendations
- Add AvailableRegion interface in types.ts
- Show all regions where the same server spec is available
- Helps users see regional options (e.g., Tokyo and Osaka for japan)
- Sorted by price, excludes current region

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-25 19:41:42 +09:00

1182 lines
46 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* POST /api/recommend - AI-powered server recommendation handler
*/
import type {
Env,
RecommendRequest,
Server,
BenchmarkData,
VPSBenchmark,
TechSpec,
BandwidthEstimate,
RecommendationResult,
BenchmarkReference,
AIRecommendationResponse,
AvailableRegion
} from '../types';
import { i18n, LIMITS } from '../config';
import {
jsonResponse,
validateRecommendRequest,
generateCacheKey,
estimateBandwidth,
calculateBandwidthInfo,
escapeLikePattern,
isValidServer,
isValidBenchmarkData,
isValidVPSBenchmark,
isValidTechSpec,
isValidAIRecommendation,
sanitizeForAIPrompt,
DEFAULT_REGION_FILTER_SQL,
buildFlexibleRegionConditions
} from '../utils';
export async function handleRecommend(
request: Request,
env: Env,
corsHeaders: Record<string, string>
): Promise<Response> {
const requestId = crypto.randomUUID();
try {
// Check request body size to prevent large payload attacks
const contentLength = request.headers.get('Content-Length');
if (contentLength && parseInt(contentLength, 10) > LIMITS.MAX_REQUEST_BODY_BYTES) {
return jsonResponse(
{ error: 'Request body too large', max_size: '10KB' },
413,
corsHeaders
);
}
// Parse and validate request with actual body size check
const bodyText = await request.text();
const actualBodySize = new TextEncoder().encode(bodyText).length;
if (actualBodySize > LIMITS.MAX_REQUEST_BODY_BYTES) {
return jsonResponse(
{ error: 'Request body too large', max_size: '10KB', actual_size: actualBodySize },
413,
corsHeaders
);
}
const body = JSON.parse(bodyText) as RecommendRequest;
const lang = body.lang || 'en';
const validationError = validateRecommendRequest(body, lang);
if (validationError) {
return jsonResponse(validationError, 400, corsHeaders);
}
console.log('[Recommend] Request summary:', {
tech_stack_count: body.tech_stack.length,
expected_users: body.expected_users,
use_case_length: body.use_case.length,
traffic_pattern: body.traffic_pattern,
has_region_pref: !!body.region_preference,
has_budget: !!body.budget_limit,
has_provider_filter: !!body.provider_filter,
lang: lang,
});
// Generate cache key
const cacheKey = generateCacheKey(body);
console.log('[Recommend] Cache key:', cacheKey);
// Check cache (optional - may not be configured)
if (env.CACHE) {
const cached = await env.CACHE.get(cacheKey);
if (cached) {
console.log('[Recommend] Cache hit');
return jsonResponse(
{ ...JSON.parse(cached), cached: true },
200,
corsHeaders
);
}
}
console.log('[Recommend] Cache miss or disabled');
// Phase 1: Execute independent queries in parallel
const [techSpecs, benchmarkDataAll] = await Promise.all([
queryTechSpecs(env.DB, body.tech_stack),
queryBenchmarkData(env.DB, body.tech_stack).catch(err => {
console.warn('[Recommend] Benchmark data unavailable:', err.message);
return [] as BenchmarkData[];
}),
]);
console.log('[Recommend] Tech specs matched:', techSpecs.length);
console.log('[Recommend] Benchmark data points (initial):', benchmarkDataAll.length);
// Calculate minimum memory with proper aggregation
// Memory-intensive services (Java, Elasticsearch, Redis): sum their memory requirements
// Non-memory-intensive services: 256MB overhead each
const memoryIntensiveSpecs = techSpecs.filter(s => s.is_memory_intensive);
const otherSpecs = techSpecs.filter(s => !s.is_memory_intensive);
let minMemoryMb: number | undefined;
if (memoryIntensiveSpecs.length > 0 || otherSpecs.length > 0) {
// Sum memory-intensive requirements
const memoryIntensiveSum = memoryIntensiveSpecs.reduce((sum, s) => sum + s.min_memory_mb, 0);
// Add 256MB overhead per non-memory-intensive service
const otherOverhead = otherSpecs.length * 256;
minMemoryMb = memoryIntensiveSum + otherOverhead;
console.log(`[Recommend] Memory calculation: ${memoryIntensiveSpecs.length} memory-intensive (${(memoryIntensiveSum/1024).toFixed(1)}GB) + ${otherSpecs.length} other services (${(otherOverhead/1024).toFixed(1)}GB) = ${(minMemoryMb/1024).toFixed(1)}GB total`);
}
// Calculate minimum vCPU with category-based weighting
// Different tech categories have different bottleneck characteristics
let minVcpu: number | undefined;
// DB workload multiplier based on use_case (databases need different resources based on workload type)
// Lower multiplier = heavier workload = more resources needed
const getDbWorkloadMultiplier = (useCase: string): { multiplier: number; type: string } => {
const lowerUseCase = useCase.toLowerCase();
// Heavy DB workloads (analytics, big data, reporting) - multiplier 0.3x
// Note: use \blog(s|ging)?\b to match "log", "logs", "logging" but NOT "blog"
if (/analytics|warehouse|reporting|dashboard|\bbi\b|olap|\blog(s|ging)?\b|metric|monitoring|time.?series|대시보드|분석|리포트|로그/.test(lowerUseCase)) {
return { multiplier: 0.3, type: 'heavy (analytics/reporting)' };
}
// Medium-heavy DB workloads (e-commerce, ERP, CRM, social) - multiplier 0.5x
if (/e.?commerce|shop|store|cart|order|payment|erp|crm|inventory|social|community|forum|게시판|쇼핑몰|주문|결제|커뮤니티/.test(lowerUseCase)) {
return { multiplier: 0.5, type: 'medium-heavy (transactional)' };
}
// Medium DB workloads (API, SaaS, app backend) - multiplier 0.7x
if (/api|saas|backend|service|app|mobile|플랫폼|서비스|앱/.test(lowerUseCase)) {
return { multiplier: 0.7, type: 'medium (API/SaaS)' };
}
// Light DB workloads (blog, landing, portfolio, docs) - multiplier 1.0x (use default)
if (/blog|landing|portfolio|doc|wiki|static|personal|홈페이지|블로그|포트폴리오|문서/.test(lowerUseCase)) {
return { multiplier: 1.0, type: 'light (content/read-heavy)' };
}
// Default: medium workload
return { multiplier: 0.7, type: 'default (medium)' };
};
const dbWorkload = getDbWorkloadMultiplier(body.use_case);
console.log(`[Recommend] DB workload inferred from use_case: ${dbWorkload.type} (multiplier: ${dbWorkload.multiplier})`);
if (techSpecs.length > 0) {
// Group specs by category
const categoryWeights: Record<string, number> = {
'web_server': 0.1, // nginx, apache: reverse proxy uses minimal resources
'runtime': 1.0, // nodejs, php, python: actual computation
'database': 1.0, // mysql, postgresql, mongodb: major bottleneck
'cache': 0.5, // redis, memcached: supporting role
'search': 0.8, // elasticsearch: CPU-intensive but not always primary
'container': 0.3, // docker: orchestration overhead
'messaging': 0.5, // rabbitmq, kafka: I/O bound
'default': 0.7 // unknown categories
};
// Calculate weighted vCPU requirements per category
const categoryRequirements = new Map<string, number>();
for (const spec of techSpecs) {
const category = spec.category || 'default';
const weight = categoryWeights[category] || categoryWeights['default'];
// Apply DB workload multiplier for database category
// Lower multiplier = heavier workload = higher resource needs (lower vcpu_per_users)
let effectiveVcpuPerUsers = spec.vcpu_per_users;
if (category === 'database') {
effectiveVcpuPerUsers = Math.max(1, Math.floor(spec.vcpu_per_users * dbWorkload.multiplier));
}
const vcpuNeeded = Math.ceil(body.expected_users / effectiveVcpuPerUsers);
const weightedVcpu = vcpuNeeded * weight;
const existing = categoryRequirements.get(category) || 0;
// Take max within same category (not additive)
categoryRequirements.set(category, Math.max(existing, weightedVcpu));
const dbNote = category === 'database' ? ` (adjusted for ${dbWorkload.type})` : '';
console.log(`[Recommend] ${spec.name} (${category}): ${vcpuNeeded} vCPU × ${weight} weight = ${weightedVcpu.toFixed(1)} weighted vCPU${dbNote}`);
}
// Find bottleneck: use MAX across categories, not SUM
// Request flow (web_server → runtime → database) means the slowest component is the bottleneck
// SUM would over-provision since components process the SAME requests sequentially
let maxWeightedVcpu = 0;
let bottleneckCategory = '';
for (const [category, vcpu] of categoryRequirements) {
console.log(`[Recommend] Category '${category}': ${vcpu.toFixed(1)} weighted vCPU`);
if (vcpu > maxWeightedVcpu) {
maxWeightedVcpu = vcpu;
bottleneckCategory = category;
}
}
minVcpu = Math.max(Math.ceil(maxWeightedVcpu), 1); // At least 1 vCPU
console.log(`[Recommend] Bottleneck: '${bottleneckCategory}' with ${maxWeightedVcpu.toFixed(1)} weighted vCPU → ${minVcpu} vCPU (for ${body.expected_users} users)`);
}
// Calculate bandwidth estimate for provider filtering
const bandwidthEstimate = estimateBandwidth(body.expected_users, body.use_case, body.traffic_pattern);
console.log(`[Recommend] Bandwidth estimate: ${bandwidthEstimate.monthly_tb >= 1 ? bandwidthEstimate.monthly_tb + ' TB' : bandwidthEstimate.monthly_gb + ' GB'}/month (${bandwidthEstimate.category})`);
// Estimate specs for VPS benchmark query (doesn't need exact candidates)
const estimatedCores = minVcpu || 2;
const estimatedMemory = minMemoryMb ? Math.ceil(minMemoryMb / 1024) : 4;
const defaultProviders = bandwidthEstimate?.category === 'very_heavy' ? ['Linode'] : ['Linode', 'Vultr'];
// Phase 2: Query candidate servers and VPS benchmarks in parallel
const [candidates, vpsBenchmarks] = await Promise.all([
queryCandidateServers(env.DB, body, minMemoryMb, minVcpu, bandwidthEstimate, lang),
queryVPSBenchmarksBatch(env.DB, estimatedCores, estimatedMemory, defaultProviders).catch((err: unknown) => {
const message = err instanceof Error ? err.message : String(err);
console.warn('[Recommend] VPS benchmarks unavailable:', message);
return [] as VPSBenchmark[];
}),
]);
console.log('[Recommend] Candidate servers:', candidates.length);
console.log('[Recommend] VPS benchmark data points:', vpsBenchmarks.length);
if (candidates.length === 0) {
return jsonResponse(
{
error: 'No servers found matching your requirements',
recommendations: [],
request_id: requestId,
},
200,
corsHeaders
);
}
// Use initially fetched benchmark data (already filtered by tech stack)
const benchmarkData = benchmarkDataAll;
// Use OpenAI GPT-4o-mini to analyze and recommend (techSpecs already queried above)
const aiResult = await getAIRecommendations(
env,
env.OPENAI_API_KEY,
body,
candidates,
benchmarkData,
vpsBenchmarks,
techSpecs,
bandwidthEstimate,
lang
);
console.log('[Recommend] Generated recommendations:', aiResult.recommendations.length);
const response = {
recommendations: aiResult.recommendations,
infrastructure_tips: aiResult.infrastructure_tips || [],
bandwidth_estimate: {
monthly_tb: bandwidthEstimate.monthly_tb,
monthly_gb: bandwidthEstimate.monthly_gb,
daily_gb: bandwidthEstimate.daily_gb,
category: bandwidthEstimate.category,
description: bandwidthEstimate.description,
active_ratio: bandwidthEstimate.active_ratio,
calculation_note: `Based on ${body.expected_users} concurrent users with ${Math.round(bandwidthEstimate.active_ratio * 100)}% active ratio`,
},
total_candidates: candidates.length,
cached: false,
};
// Cache result only if we have recommendations (don't cache empty/failed results)
if (env.CACHE && response.recommendations && response.recommendations.length > 0) {
await env.CACHE.put(cacheKey, JSON.stringify(response), {
expirationTtl: 300, // 5 minutes (reduced from 1 hour for faster iteration)
});
}
return jsonResponse(response, 200, corsHeaders);
} catch (error) {
console.error('[Recommend] Error:', error);
console.error('[Recommend] Error stack:', error instanceof Error ? error.stack : 'No stack');
console.error('[Recommend] Error details:', error instanceof Error ? error.message : 'Unknown error');
return jsonResponse(
{
error: 'Failed to generate recommendations',
request_id: requestId,
},
500,
corsHeaders
);
}
}
async function queryCandidateServers(
db: D1Database,
req: RecommendRequest,
minMemoryMb?: number,
minVcpu?: number,
bandwidthEstimate?: BandwidthEstimate,
lang: string = 'en'
): Promise<Server[]> {
// Select price column based on language
// Korean → monthly_price_krw (KRW), Others → monthly_price_retail (1.21x USD)
const priceColumn = lang === 'ko' ? 'pr.monthly_price_krw' : 'pr.monthly_price_retail';
const currency = lang === 'ko' ? 'KRW' : 'USD';
// Check if region preference is specified
const hasRegionPref = req.region_preference && req.region_preference.length > 0;
let query = `
SELECT
it.id,
p.display_name as provider_name,
it.instance_id,
it.instance_name,
it.vcpu,
it.memory_mb,
ROUND(it.memory_mb / 1024.0, 1) as memory_gb,
it.storage_gb,
it.network_speed_gbps,
it.instance_family,
it.gpu_count,
it.gpu_type,
MIN(${priceColumn}) as monthly_price,
r.region_name as region_name,
r.region_code as region_code,
r.country_code as country_code
FROM instance_types it
JOIN providers p ON it.provider_id = p.id
JOIN pricing pr ON pr.instance_type_id = it.id
JOIN regions r ON pr.region_id = r.id
WHERE LOWER(p.name) IN ('linode', 'vultr') -- Linode, Vultr only
`;
const params: (string | number)[] = [];
if (req.budget_limit) {
// Use same price column as display for budget filtering
query += ` AND ${priceColumn} <= ?`;
params.push(req.budget_limit);
}
// Filter by minimum memory requirement (from tech specs)
if (minMemoryMb && minMemoryMb > 0) {
query += ` AND it.memory_mb >= ?`;
params.push(minMemoryMb);
console.log(`[Candidates] Filtering by minimum memory: ${minMemoryMb}MB (${(minMemoryMb/1024).toFixed(1)}GB)`);
}
// Filter by minimum vCPU requirement (from expected users + tech specs)
if (minVcpu && minVcpu > 0) {
query += ` AND it.vcpu >= ?`;
params.push(minVcpu);
console.log(`[Candidates] Filtering by minimum vCPU: ${minVcpu}`);
}
// Provider preference based on bandwidth requirements (no hard filtering to avoid empty results)
// Heavy/Very heavy bandwidth → Prefer Linode (better bandwidth allowance), but allow all providers
// AI prompt will warn about bandwidth costs for non-Linode providers
if (bandwidthEstimate) {
if (bandwidthEstimate.category === 'very_heavy') {
// >6TB/month: Strongly prefer Linode, but don't exclude others (Linode may not be available in all regions)
console.log(`[Candidates] Very heavy bandwidth (${bandwidthEstimate.monthly_tb}TB/month): Linode strongly preferred, all providers included`);
} else if (bandwidthEstimate.category === 'heavy') {
// 2-6TB/month: Prefer Linode
console.log(`[Candidates] Heavy bandwidth (${bandwidthEstimate.monthly_tb}TB/month): Linode preferred`);
}
}
// Flexible region matching: region_code, region_name, or country_code
if (req.region_preference && req.region_preference.length > 0) {
const { conditions, params: regionParams } = buildFlexibleRegionConditions(req.region_preference);
query += ` AND (${conditions.join(' OR ')})`;
params.push(...regionParams);
} else {
// No region specified → default to Seoul/Tokyo/Osaka/Singapore
query += ` AND ${DEFAULT_REGION_FILTER_SQL}`;
}
// Filter by provider if specified
if (req.provider_filter && req.provider_filter.length > 0) {
const placeholders = req.provider_filter.map(() => '?').join(',');
query += ` AND (p.name IN (${placeholders}) OR p.display_name IN (${placeholders}))`;
params.push(...req.provider_filter, ...req.provider_filter);
}
// Group by instance + region to show each server per region
// For heavy/very_heavy bandwidth, prioritize Linode due to generous bandwidth allowance
const isHighBandwidth = bandwidthEstimate?.category === 'heavy' || bandwidthEstimate?.category === 'very_heavy';
const orderByClause = isHighBandwidth
? `ORDER BY CASE WHEN LOWER(p.name) = 'linode' THEN 0 ELSE 1 END, monthly_price ASC`
: `ORDER BY monthly_price ASC`;
query += ` GROUP BY it.id, r.id ${orderByClause} LIMIT 50`;
const result = await db.prepare(query).bind(...params).all();
if (!result.success) {
throw new Error('Failed to query candidate servers');
}
// Add currency to each result and validate with type guard
const serversWithCurrency = (result.results as unknown[]).map(server => {
if (typeof server === 'object' && server !== null) {
return { ...server, currency };
}
return server;
});
const validServers = serversWithCurrency.filter(isValidServer);
const invalidCount = result.results.length - validServers.length;
if (invalidCount > 0) {
console.warn(`[Candidates] Filtered out ${invalidCount} invalid server records`);
}
return validServers;
}
/**
* Query relevant benchmark data for tech stack
*/
async function queryBenchmarkData(
db: D1Database,
techStack: string[],
coreCount?: number
): Promise<BenchmarkData[]> {
// Map tech stack to relevant benchmark types
const techToBenchmark: Record<string, string[]> = {
'node.js': ['pts-node-octane', 'pts-node-express-loadtest'],
'nodejs': ['pts-node-octane', 'pts-node-express-loadtest'],
'express': ['pts-node-express-loadtest'],
'nginx': ['pts-nginx'],
'apache': ['pts-apache'],
'php': ['pts-phpbench'],
'redis': ['pts-redis'],
'mysql': ['pts-mysqlslap'],
'postgresql': ['pts-mysqlslap'], // Use MySQL benchmark as proxy
'docker': ['pts-compress-7zip', 'pts-postmark'], // CPU + I/O for containers
'mongodb': ['pts-postmark'], // I/O intensive
'python': ['pts-coremark', 'pts-compress-7zip'],
'java': ['pts-coremark', 'pts-compress-7zip'],
'go': ['pts-coremark', 'pts-compress-7zip'],
'rust': ['pts-coremark', 'pts-compress-7zip'],
};
// Find relevant benchmark types
const relevantBenchmarks = new Set<string>();
for (const tech of techStack) {
const benchmarks = techToBenchmark[tech.toLowerCase()] || [];
benchmarks.forEach(b => relevantBenchmarks.add(b));
}
// Always include general CPU benchmark
relevantBenchmarks.add('pts-compress-7zip');
if (relevantBenchmarks.size === 0) {
return [];
}
const benchmarkNames = Array.from(relevantBenchmarks);
const placeholders = benchmarkNames.map(() => '?').join(',');
// Query benchmark data, optionally filtering by core count
let query = `
SELECT
p.id,
p.name as processor_name,
bt.name as benchmark_name,
bt.category,
br.score,
br.percentile,
p.cores
FROM benchmark_results br
JOIN processors p ON br.processor_id = p.id
JOIN benchmark_types bt ON br.benchmark_type_id = bt.id
WHERE bt.name IN (${placeholders})
`;
const params: (string | number)[] = [...benchmarkNames];
// If we know core count, filter to similar processors
if (coreCount && coreCount > 0) {
query += ` AND (p.cores IS NULL OR (p.cores >= ? AND p.cores <= ?))`;
params.push(Math.max(1, coreCount - 2), coreCount + 4);
}
query += ` ORDER BY br.percentile DESC, br.score DESC LIMIT 50`;
const result = await db.prepare(query).bind(...params).all();
if (!result.success) {
console.warn('[Benchmark] Query failed');
return [];
}
// Validate each result with type guard
return (result.results as unknown[]).filter(isValidBenchmarkData);
}
/**
* Get benchmark reference for a server
*/
function getBenchmarkReference(
benchmarks: BenchmarkData[],
vcpu: number
): BenchmarkReference | undefined {
// Find benchmarks from processors with similar core count
const similarBenchmarks = benchmarks.filter(b =>
b.cores === null || (b.cores >= vcpu - 2 && b.cores <= vcpu + 4)
);
if (similarBenchmarks.length === 0) {
return undefined;
}
// Group by processor and get the best match
const byProcessor = new Map<string, BenchmarkData[]>();
for (const b of similarBenchmarks) {
const existing = byProcessor.get(b.processor_name) || [];
existing.push(b);
byProcessor.set(b.processor_name, existing);
}
// Find processor with most benchmark data
let bestProcessor = '';
let maxBenchmarks = 0;
for (const [name, data] of byProcessor) {
if (data.length > maxBenchmarks) {
maxBenchmarks = data.length;
bestProcessor = name;
}
}
if (!bestProcessor) {
return undefined;
}
const processorBenchmarks = byProcessor.get(bestProcessor)!;
return {
processor_name: bestProcessor,
benchmarks: processorBenchmarks.map(b => ({
name: b.benchmark_name,
category: b.category,
score: b.score,
percentile: b.percentile,
})),
};
}
/**
* Query VPS benchmarks in a single batched query
* Consolidates multiple provider-specific queries into one for better performance
*/
async function queryVPSBenchmarksBatch(
db: D1Database,
vcpu: number,
memoryGb: number,
providers: string[]
): Promise<VPSBenchmark[]> {
const vcpuMin = Math.max(1, vcpu - 1);
const vcpuMax = vcpu + 2;
const memMin = Math.max(1, memoryGb - 2);
const memMax = memoryGb + 4;
// Build provider conditions for up to 3 providers
const providerConditions: string[] = [];
const params: (string | number)[] = [];
const limitedProviders = providers.slice(0, 3);
for (const provider of limitedProviders) {
const pattern = `%${escapeLikePattern(provider.toLowerCase())}%`;
providerConditions.push(`(LOWER(provider_name) LIKE ? ESCAPE '\\' OR LOWER(plan_name) LIKE ? ESCAPE '\\')`);
params.push(pattern, pattern);
}
// Build query with provider matching OR spec matching
const query = `
SELECT * FROM vps_benchmarks
WHERE ${providerConditions.length > 0 ? `(${providerConditions.join(' OR ')})` : '1=0'}
OR (vcpu >= ? AND vcpu <= ? AND memory_gb >= ? AND memory_gb <= ?)
ORDER BY gb6_single_normalized DESC
LIMIT 30
`;
params.push(vcpuMin, vcpuMax, memMin, memMax);
const result = await db.prepare(query).bind(...params).all();
if (!result.success) {
console.warn('[VPSBenchmarksBatch] Query failed');
return [];
}
// Validate each result with type guard
return (result.results as unknown[]).filter(isValidVPSBenchmark);
}
/**
* Format VPS benchmark data for AI prompt
* Uses GB6-normalized scores (GB5 scores converted with ×1.45 factor)
*/
function formatVPSBenchmarkSummary(benchmarks: VPSBenchmark[]): string {
if (benchmarks.length === 0) {
return '';
}
const lines = ['Real VPS performance data (Geekbench 6 normalized):'];
for (const b of benchmarks.slice(0, 5)) {
const versionNote = b.geekbench_version?.startsWith('5.') ? ' [GB5→6]' : '';
lines.push(
`- ${b.plan_name} (${b.country_code}): Single=${b.gb6_single_normalized}, Multi=${b.gb6_multi_normalized}${versionNote}, $${b.monthly_price_usd}/mo, Perf/$=${b.performance_per_dollar.toFixed(1)}`
);
}
return lines.join('\n');
}
/**
* Format benchmark data for AI prompt
*/
function formatBenchmarkSummary(benchmarks: BenchmarkData[]): string {
if (benchmarks.length === 0) {
return '';
}
// Group by benchmark type
const byType = new Map<string, BenchmarkData[]>();
for (const b of benchmarks) {
const existing = byType.get(b.benchmark_name) || [];
existing.push(b);
byType.set(b.benchmark_name, existing);
}
const lines: string[] = [];
for (const [type, data] of byType) {
// Get top 3 performers for this benchmark
const top3 = data.slice(0, 3);
const scores = top3.map(d =>
`${d.processor_name}${d.cores ? ` (${d.cores} cores)` : ''}: ${d.score} (${d.percentile}th percentile)`
);
lines.push(`### ${type} (${data[0].category})`);
lines.push(scores.join('\n'));
lines.push('');
}
return lines.join('\n');
}
/**
* Query tech stack specifications from database
* Matches user's tech_stack against canonical names and aliases
*/
async function queryTechSpecs(
db: D1Database,
techStack: string[]
): Promise<TechSpec[]> {
if (!techStack || techStack.length === 0) {
return [];
}
// Normalize user input
const normalizedStack = techStack.map(t => t.toLowerCase().trim());
// Build query that matches both name and aliases (case-insensitive)
// Using LOWER() for alias matching since aliases are stored as JSON array strings
const conditions: string[] = [];
const params: string[] = [];
for (const tech of normalizedStack) {
conditions.push(`(LOWER(name) = ? OR LOWER(aliases) LIKE ?)`);
params.push(tech, `%"${tech}"%`);
}
const query = `
SELECT
id, name, category,
vcpu_per_users, vcpu_per_users_max,
min_memory_mb, max_memory_mb,
description, aliases,
is_memory_intensive, is_cpu_intensive
FROM tech_specs
WHERE ${conditions.join(' OR ')}
ORDER BY category, name
`;
try {
const result = await db.prepare(query).bind(...params).all();
if (!result.success) {
console.warn('[TechSpecs] Query failed');
return [];
}
// Validate each result with type guard
const validSpecs = (result.results as unknown[]).filter(isValidTechSpec);
console.log(`[TechSpecs] Found ${validSpecs.length} specs for: ${normalizedStack.join(', ')}`);
return validSpecs;
} catch (error) {
console.error('[TechSpecs] Error:', error);
return [];
}
}
/**
* Format tech specs for AI prompt
*/
function formatTechSpecsForPrompt(techSpecs: TechSpec[]): string {
if (!techSpecs || techSpecs.length === 0) {
return `Tech stack resource guidelines:
- Default: 1 vCPU per 100-300 users, 1-2GB RAM`;
}
const lines = ['Tech stack resource guidelines (MUST follow minimum RAM requirements):'];
for (const spec of techSpecs) {
const vcpuRange = spec.vcpu_per_users_max
? `${spec.vcpu_per_users}-${spec.vcpu_per_users_max}`
: `${spec.vcpu_per_users}`;
// Convert MB to GB for readability
const minMemoryGB = (spec.min_memory_mb / 1024).toFixed(1).replace('.0', '');
const maxMemoryGB = spec.max_memory_mb ? (spec.max_memory_mb / 1024).toFixed(1).replace('.0', '') : null;
const memoryRange = maxMemoryGB ? `${minMemoryGB}-${maxMemoryGB}GB` : `${minMemoryGB}GB+`;
let line = `- ${spec.name}: 1 vCPU per ${vcpuRange} users, MINIMUM ${minMemoryGB}GB RAM`;
// Add warnings for special requirements
const warnings: string[] = [];
if (spec.is_memory_intensive) warnings.push('⚠️ MEMORY-INTENSIVE: must have at least ' + minMemoryGB + 'GB RAM');
if (spec.is_cpu_intensive) warnings.push('⚠️ CPU-INTENSIVE');
if (warnings.length > 0) {
line += ` [${warnings.join(', ')}]`;
}
lines.push(line);
}
// Add explicit warning for memory-intensive apps
const memoryIntensive = techSpecs.filter(s => s.is_memory_intensive);
if (memoryIntensive.length > 0) {
const maxMinMemory = Math.max(...memoryIntensive.map(s => s.min_memory_mb));
lines.push('');
lines.push(`⚠️ CRITICAL: This tech stack includes memory-intensive apps. Servers with less than ${(maxMinMemory / 1024).toFixed(0)}GB RAM will NOT work properly!`);
}
return lines.join('\n');
}
/**
* Get AI-powered recommendations using OpenAI GPT-4o-mini
*/
async function getAIRecommendations(
env: Env,
apiKey: string,
req: RecommendRequest,
candidates: Server[],
benchmarkData: BenchmarkData[],
vpsBenchmarks: VPSBenchmark[],
techSpecs: TechSpec[],
bandwidthEstimate: BandwidthEstimate,
lang: string = 'en'
): Promise<{ recommendations: RecommendationResult[]; infrastructure_tips?: string[] }> {
// Validate API key before making any API calls
if (!apiKey || !apiKey.trim()) {
console.error('[AI] OPENAI_API_KEY is not configured or empty');
throw new Error('OPENAI_API_KEY not configured. Please set the secret via: wrangler secret put OPENAI_API_KEY');
}
if (!apiKey.startsWith('sk-')) {
console.error('[AI] OPENAI_API_KEY has invalid format (should start with sk-)');
throw new Error('Invalid OPENAI_API_KEY format');
}
console.log('[AI] API key validated (format: sk-***)');
// Build dynamic tech specs prompt from database
const techSpecsPrompt = formatTechSpecsForPrompt(techSpecs);
// Ensure lang is valid
const validLang = ['en', 'zh', 'ja', 'ko'].includes(lang) ? lang : 'en';
const languageInstruction = i18n[validLang].aiLanguageInstruction;
// Build system prompt with benchmark awareness
const systemPrompt = `You are a cloud infrastructure expert focused on COST-EFFECTIVE solutions. Your goal is to recommend the SMALLEST and CHEAPEST server that can handle the user's requirements.
CRITICAL RULES:
1. NEVER over-provision. Recommend the minimum specs needed.
2. Cost efficiency is the PRIMARY factor - cheaper is better if it meets requirements.
3. A 1-2 vCPU server can handle 100-500 concurrent users for most web workloads.
4. Nginx/reverse proxy needs very little resources - 1 vCPU can handle 1000+ req/sec.
5. Provide 3 options: Budget (cheapest viable), Balanced (some headroom), Premium (growth ready).
BANDWIDTH CONSIDERATIONS (VERY IMPORTANT):
- Estimated monthly bandwidth is provided based on concurrent users and use case.
- TOTAL COST = Base server price + Bandwidth overage charges
- Provider bandwidth allowances:
* Linode: 1TB (1GB plan) to 20TB (192GB plan) included free, $0.005/GB overage
* Vultr: 1TB-10TB depending on plan, $0.01/GB overage (2x Linode rate)
* DigitalOcean: 1TB-12TB depending on plan, $0.01/GB overage
- For bandwidth >1TB/month: Linode is often cheaper despite higher base price
- For bandwidth >3TB/month: Linode is STRONGLY preferred (overage savings significant)
- Always mention bandwidth implications in cost_efficiency analysis
${techSpecsPrompt}
Use REAL BENCHMARK DATA to validate capacity estimates.
${languageInstruction}`;
// Build user prompt with requirements and candidates
console.log('[AI] Bandwidth estimate:', bandwidthEstimate);
// Detect high-traffic based on bandwidth estimate (more accurate than keyword matching)
const isHighTraffic = bandwidthEstimate.category === 'heavy' || bandwidthEstimate.category === 'very_heavy';
// Format benchmark data for the prompt
const benchmarkSummary = formatBenchmarkSummary(benchmarkData);
const vpsBenchmarkSummary = formatVPSBenchmarkSummary(vpsBenchmarks);
// Pre-filter candidates to reduce AI prompt size and cost
// Sort by price and limit to top 15 most affordable options
const topCandidates = candidates
.sort((a, b) => a.monthly_price - b.monthly_price)
.slice(0, 15);
console.log(`[AI] Filtered ${candidates.length} candidates to ${topCandidates.length} for AI analysis`);
// Sanitize user inputs to prevent prompt injection
const sanitizedTechStack = req.tech_stack.map(t => sanitizeForAIPrompt(t, 50)).join(', ');
const sanitizedUseCase = sanitizeForAIPrompt(req.use_case, 200);
const userPrompt = `Analyze these server options and recommend the top 3 best matches.
## User Requirements
- Tech Stack: ${sanitizedTechStack}
- Expected Concurrent Users: ${req.expected_users} ${req.traffic_pattern === 'spiky' ? '(with traffic spikes)' : req.traffic_pattern === 'growing' ? '(growing user base)' : '(steady traffic)'}
- **Estimated DAU (Daily Active Users)**: ${bandwidthEstimate.estimated_dau_min.toLocaleString()}-${bandwidthEstimate.estimated_dau_max.toLocaleString()}명 (동시 접속 ${req.expected_users}명 기준)
- Use Case: ${sanitizedUseCase}
- Traffic Pattern: ${req.traffic_pattern || 'steady'}
- **Estimated Monthly Bandwidth**: ${bandwidthEstimate.monthly_tb >= 1 ? `${bandwidthEstimate.monthly_tb} TB` : `${bandwidthEstimate.monthly_gb} GB`} (${bandwidthEstimate.category})
${isHighTraffic ? `- ⚠️ HIGH BANDWIDTH WORKLOAD (${bandwidthEstimate.monthly_tb} TB/month): MUST recommend Linode over Vultr. Linode includes 1-6TB/month transfer vs Vultr overage charges ($0.01/GB). Bandwidth cost savings > base price difference.` : ''}
${req.region_preference ? `- Region Preference: ${req.region_preference.join(', ')}` : ''}
${req.budget_limit ? `- Budget Limit: $${req.budget_limit}/month` : ''}
## Real VPS Benchmark Data (Geekbench 6 normalized - actual VPS tests)
${vpsBenchmarkSummary || 'No similar VPS benchmark data available.'}
## CPU Benchmark Reference (from Phoronix Test Suite)
${benchmarkSummary || 'No relevant CPU benchmark data available.'}
## Available Servers (IMPORTANT: Use the server_id value, NOT the list number!)
${topCandidates.map((s) => `
[server_id=${s.id}] ${s.provider_name} - ${s.instance_name}${s.instance_family ? ` (${s.instance_family})` : ''}
Instance: ${s.instance_id}
vCPU: ${s.vcpu} | Memory: ${s.memory_gb} GB | Storage: ${s.storage_gb} GB
Network: ${s.network_speed_gbps ? `${s.network_speed_gbps} Gbps` : 'N/A'}${s.gpu_count > 0 ? ` | GPU: ${s.gpu_count}x ${s.gpu_type || 'Unknown'}` : ' | GPU: None'}
Price: ${s.currency === 'KRW' ? '₩' : '$'}${s.currency === 'KRW' ? Math.round(s.monthly_price).toLocaleString() : s.monthly_price.toFixed(2)}/month (${s.currency}) | Region: ${s.region_name} (${s.region_code})
`).join('\n')}
Return ONLY a valid JSON object (no markdown, no code blocks) with this exact structure:
{
"recommendations": [
{
"server_id": 2045, // Use the actual server_id from [server_id=XXXX] above, NOT list position!
"score": 95,
"analysis": {
"tech_fit": "Why this server fits the tech stack",
"capacity": "MUST mention: '동시 접속 X명 요청 (DAU A-B명), 최대 동시 Y명까지 처리 가능' format",
"cost_efficiency": "MUST include: base price + bandwidth cost estimate. Example: '$5/month + ~$X bandwidth = ~$Y total'",
"scalability": "Scalability potential including bandwidth headroom"
},
"estimated_capacity": {
"max_concurrent_users": 7500,
"requests_per_second": 1000
}
}
],
"infrastructure_tips": [
"Practical tip 1",
"Practical tip 2"
]
}
Provide exactly 3 recommendations:
1. BUDGET option: Cheapest TOTAL cost (base + bandwidth) that can handle the load (highest score if viable)
2. BALANCED option: Some headroom for traffic spikes
3. PREMIUM option: Ready for 2-3x growth
SCORING (100 points total):
- Total Cost Efficiency (40%): Base price + estimated bandwidth overage. Lower total = higher score.
- Capacity Fit (30%): Can it handle the concurrent users and bandwidth?
- Scalability (30%): Room for growth in CPU, memory, AND bandwidth allowance.
The option with the LOWEST TOTAL MONTHLY COST (including bandwidth) should have the HIGHEST score.`;
// Use AI Gateway if configured (bypasses regional restrictions like HKG)
// AI Gateway URL format: https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_name}/openai
const useAIGateway = !!env.AI_GATEWAY_URL;
const apiEndpoint = useAIGateway
? `${env.AI_GATEWAY_URL}/chat/completions`
: 'https://api.openai.com/v1/chat/completions';
console.log(`[AI] Sending request to ${useAIGateway ? 'AI Gateway → ' : ''}OpenAI GPT-4o-mini`);
if (useAIGateway) {
console.log('[AI] Using Cloudflare AI Gateway to bypass regional restrictions');
}
// Create AbortController with 30 second timeout
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 30000);
try {
const openaiResponse = await fetch(apiEndpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${apiKey}`,
},
body: JSON.stringify({
model: 'gpt-4o-mini',
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userPrompt },
],
max_tokens: 2000,
temperature: 0.3,
}),
signal: controller.signal,
});
clearTimeout(timeoutId);
if (!openaiResponse.ok) {
const errorText = await openaiResponse.text();
// Parse error details for better debugging
let errorDetails = '';
try {
const errorObj = JSON.parse(errorText);
errorDetails = errorObj?.error?.message || errorObj?.error?.type || '';
} catch {
errorDetails = errorText.slice(0, 200);
}
// Sanitize API keys from error messages
const sanitized = errorDetails.replace(/sk-[a-zA-Z0-9-_]+/g, 'sk-***');
// Enhanced logging for specific error codes
if (openaiResponse.status === 403) {
const isRegionalBlock = errorDetails.includes('Country') || errorDetails.includes('region') || errorDetails.includes('territory');
if (isRegionalBlock && !useAIGateway) {
console.error('[AI] ❌ REGIONAL BLOCK (403) - OpenAI blocked this region');
console.error('[AI] Worker is running in a blocked region (e.g., HKG)');
console.error('[AI] FIX: Set AI_GATEWAY_URL secret to use Cloudflare AI Gateway');
console.error('[AI] 1. Create AI Gateway: https://dash.cloudflare.com → AI → AI Gateway');
console.error('[AI] 2. Run: wrangler secret put AI_GATEWAY_URL');
console.error('[AI] 3. Enter: https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_name}/openai');
} else {
console.error('[AI] ❌ AUTH FAILED (403) - Possible causes:');
console.error('[AI] 1. Invalid or expired OPENAI_API_KEY');
console.error('[AI] 2. API key not properly set in Cloudflare secrets');
console.error('[AI] 3. Account billing issue or quota exceeded');
}
console.error('[AI] Error details:', sanitized);
} else if (openaiResponse.status === 429) {
console.error('[AI] ⚠️ RATE LIMITED (429) - Too many requests');
console.error('[AI] Error details:', sanitized);
} else if (openaiResponse.status === 401) {
console.error('[AI] ❌ UNAUTHORIZED (401) - API key invalid');
console.error('[AI] Error details:', sanitized);
} else {
console.error('[AI] OpenAI API error:', openaiResponse.status, sanitized);
}
throw new Error(`OpenAI API error: ${openaiResponse.status}`);
}
const openaiResult = await openaiResponse.json() as {
choices: Array<{ message: { content: string } }>;
};
const response = openaiResult.choices[0]?.message?.content || '';
console.log('[AI] Response received from OpenAI, length:', response.length);
console.log('[AI] Raw response preview:', response.substring(0, 500));
// Parse AI response
const aiResult = parseAIResponse(response);
console.log('[AI] Parsed recommendations count:', aiResult.recommendations.length);
// Pre-index VPS benchmarks by provider for O(1) lookups
const vpsByProvider = new Map<string, VPSBenchmark[]>();
for (const vps of vpsBenchmarks) {
const providerKey = vps.provider_name.toLowerCase();
const existing = vpsByProvider.get(providerKey) || [];
existing.push(vps);
vpsByProvider.set(providerKey, existing);
}
// Map AI recommendations to full results
const results: RecommendationResult[] = [];
for (const aiRec of aiResult.recommendations) {
// Handle both string and number server_id from AI
const serverId = Number(aiRec.server_id);
const server = candidates.find((s) => s.id === serverId);
if (!server) {
console.warn('[AI] Server not found:', aiRec.server_id);
continue;
}
// Get benchmark reference for this server's CPU count
const benchmarkRef = getBenchmarkReference(benchmarkData, server.vcpu);
// Find matching VPS benchmark using pre-indexed data
const providerName = server.provider_name.toLowerCase();
let matchingVPS: VPSBenchmark | undefined;
// Try to find from indexed provider benchmarks
for (const [providerKey, benchmarks] of vpsByProvider.entries()) {
if (providerKey.includes(providerName) || providerName.includes(providerKey)) {
// First try exact or close vCPU match
matchingVPS = benchmarks.find(
(v) => v.vcpu === server.vcpu || (v.vcpu >= server.vcpu - 1 && v.vcpu <= server.vcpu + 1)
);
// Fallback to any from this provider
if (!matchingVPS && benchmarks.length > 0) {
matchingVPS = benchmarks[0];
}
if (matchingVPS) break;
}
}
// Final fallback: similar specs from any provider
if (!matchingVPS) {
matchingVPS = vpsBenchmarks.find(
(v) => v.vcpu === server.vcpu || (v.vcpu >= server.vcpu - 1 && v.vcpu <= server.vcpu + 1)
);
}
// Calculate bandwidth info for this server
const bandwidthInfo = calculateBandwidthInfo(server, bandwidthEstimate);
// Find all available regions for the same server spec
const availableRegions: AvailableRegion[] = candidates
.filter(c =>
c.provider_name === server.provider_name &&
c.instance_id === server.instance_id &&
c.region_code !== server.region_code // Exclude current region
)
.map(c => ({
region_name: c.region_name,
region_code: c.region_code,
monthly_price: c.monthly_price
}))
.sort((a, b) => a.monthly_price - b.monthly_price);
results.push({
server: server,
score: aiRec.score,
analysis: aiRec.analysis,
estimated_capacity: aiRec.estimated_capacity,
bandwidth_info: bandwidthInfo,
benchmark_reference: benchmarkRef,
vps_benchmark_reference: matchingVPS
? {
plan_name: matchingVPS.plan_name,
geekbench_single: matchingVPS.geekbench_single,
geekbench_multi: matchingVPS.geekbench_multi,
monthly_price_usd: matchingVPS.monthly_price_usd,
performance_per_dollar: matchingVPS.performance_per_dollar,
}
: undefined,
available_regions: availableRegions.length > 0 ? availableRegions : undefined,
});
if (results.length >= 3) break;
}
return {
recommendations: results,
infrastructure_tips: aiResult.infrastructure_tips,
};
} catch (error) {
clearTimeout(timeoutId);
// Handle timeout specifically
if (error instanceof Error && error.name === 'AbortError') {
console.error('[AI] Request timed out after 30 seconds');
throw new Error('AI request timed out - please try again');
}
console.error('[AI] Error:', error);
throw new Error(`AI processing failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Parse AI response and extract JSON
*/
function parseAIResponse(response: unknown): AIRecommendationResponse {
try {
// Handle different response formats
let content: string;
if (typeof response === 'string') {
content = response;
} else if (typeof response === 'object' && response !== null) {
// Type guard for response object with different structures
const resp = response as Record<string, unknown>;
if (typeof resp.response === 'string') {
content = resp.response;
} else if (typeof resp.result === 'object' && resp.result !== null) {
const result = resp.result as Record<string, unknown>;
if (typeof result.response === 'string') {
content = result.response;
} else {
throw new Error('Unexpected AI response format');
}
} else if (Array.isArray(resp.choices) && resp.choices.length > 0) {
const choice = resp.choices[0] as Record<string, unknown>;
const message = choice?.message as Record<string, unknown>;
if (typeof message?.content === 'string') {
content = message.content;
} else {
throw new Error('Unexpected AI response format');
}
} else {
console.error('[AI] Unexpected response format:', response);
throw new Error('Unexpected AI response format');
}
} else {
console.error('[AI] Unexpected response format:', response);
throw new Error('Unexpected AI response format');
}
// Remove markdown code blocks if present
content = content.replace(/```json\s*/g, '').replace(/```\s*/g, '').trim();
// Find JSON object in response
const jsonMatch = content.match(/\{[\s\S]*\}/);
if (!jsonMatch) {
throw new Error('No JSON found in AI response');
}
const parsed = JSON.parse(jsonMatch[0]);
if (!parsed.recommendations || !Array.isArray(parsed.recommendations)) {
throw new Error('Invalid recommendations structure');
}
// Validate each recommendation with type guard
const validRecommendations = parsed.recommendations.filter(isValidAIRecommendation);
if (validRecommendations.length === 0 && parsed.recommendations.length > 0) {
console.warn('[AI] All recommendations failed validation, raw:', JSON.stringify(parsed.recommendations[0]).slice(0, 200));
throw new Error('AI recommendations failed validation');
}
return {
recommendations: validRecommendations,
infrastructure_tips: Array.isArray(parsed.infrastructure_tips) ? parsed.infrastructure_tips : [],
} as AIRecommendationResponse;
} catch (error) {
console.error('[AI] Parse error:', error);
console.error('[AI] Response was:', response);
throw new Error(`Failed to parse AI response: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}