From 4bed3237fc258c5013485094fd6683545ea329fe Mon Sep 17 00:00:00 2001
From: kappa <kappa@inouter.com>
Date: Sun, 25 Jan 2026 18:15:09 +0900
Subject: [PATCH] fix: address remaining code review issues

- Apply sanitizeForAIPrompt to AI prompt (prevent prompt injection)
- Replace hardcoded provider IDs with name-based filtering
- Remove dead code (queryVPSBenchmarks function)
- Use LIMITS.MAX_REQUEST_BODY_BYTES constant
- Change parseAIResponse parameter from `any` to `unknown`

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 REFACTOR_SUMMARY.md       |  97 ++++++++++++++++++++++++++++++++++
 src/handlers/recommend.ts | 107 ++++++++++++++------------------------
 2 files changed, 136 insertions(+), 68 deletions(-)
 create mode 100644 REFACTOR_SUMMARY.md

diff --git a/REFACTOR_SUMMARY.md b/REFACTOR_SUMMARY.md
new file mode 100644
index 0000000..b69188f
--- /dev/null
+++ b/REFACTOR_SUMMARY.md
@@ -0,0 +1,97 @@
+# Modularization Summary
+
+## New File Structure
+
+```
+src/
+├── index.ts                  # Main worker entry point (clean, ~90 lines)
+├── types.ts                  # All TypeScript interfaces (complete)
+├── config.ts                 # USE_CASE_CONFIGS and i18n messages (complete)
+├── utils.ts                  # Utility functions (~650 lines, complete)
+├── handlers/
+│   ├── health.ts            # GET /api/health (complete)
+│   ├── servers.ts           # GET /api/servers (complete)
+│   └── recommend.ts         # POST /api/recommend (~1100 lines)
+```
+
+## Functions by Module
+
+### index.ts (NEW)
+- Main fetch handler
+- Route handling
+- CORS setup
+- Rate limit checking
+
+### types.ts (COMPLETE)
+All interfaces extracted from original index.ts
+
+### config.ts (COMPLETE)
+- USE_CASE_CONFIGS
+- i18n messages
+
+### utils.ts (COMPLETE)
+- jsonResponse()
+- hashString()
+- sanitizeCacheValue()
+- generateCacheKey()
+- escapeLikePattern()
+- Type guards: isValidServer(), isValidVPSBenchmark(), etc.
+- validateRecommendRequest()
+- getAllowedOrigin()
+- findUseCaseConfig()
+- getDauMultiplier()
+- getActiveUserRatio()
+- estimateBandwidth()
+- getProviderBandwidthAllocation()
+- calculateBandwidthInfo()
+- checkRateLimit()
+
+### handlers/health.ts (COMPLETE)
+- handleHealth()
+
+### handlers/servers.ts (COMPLETE)
+- handleGetServers()
+
+### handlers/recommend.ts (TO CREATE)
+- handleRecommend() - Main handler
+- queryCandidateServers() - D1 query for servers
+- queryBenchmarkData() - Phoronix benchmarks
+- getBenchmarkReference() - Match benchmarks to servers
+- queryVPSBenchmarks() - Geekbench data
+- queryVPSBenchmarksBatch() - Batch benchmark query
+- formatVPSBenchmarkSummary() - Format for AI prompt
+- formatBenchmarkSummary() - Format for AI prompt
+- queryTechSpecs() - Tech stack specs
+- formatTechSpecsForPrompt() - Format for AI prompt
+- getAIRecommendations() - OpenAI GPT-4o-mini call
+- parseAIResponse() - Parse AI JSON response
+
+## Migration Status
+
+✅ types.ts - DONE
+✅ config.ts - DONE
+✅ utils.ts - DONE
+✅ handlers/health.ts - DONE
+✅ handlers/servers.ts - DONE
+⏳ handlers/recommend.ts - IN PROGRESS (extracted functions ready)
+⏳ index.ts - NEW version created (index.new.ts)
+
+## Next Steps
+
+1. Create handlers/recommend.ts with all extracted functions
+2. Backup original index.ts
+3. Replace index.ts with index.new.ts
+4. Run TypeScript typecheck
+5. Test with `npm run dev`
+
+## Size Reduction
+
+Original: index.ts (2381 lines)
+New distribution:
+- index.ts: ~90 lines (97% reduction)
+- types.ts: ~175 lines
+- config.ts: ~140 lines
+- utils.ts: ~650 lines
+- handlers/: ~1300 lines total
+
+Total: ~2355 lines (modular + maintainable)
diff --git a/src/handlers/recommend.ts b/src/handlers/recommend.ts
index 5b9775b..fa2b3ea 100644
--- a/src/handlers/recommend.ts
+++ b/src/handlers/recommend.ts
@@ -14,7 +14,7 @@ import type {
   BenchmarkReference,
   AIRecommendationResponse
 } from '../types';
-import { i18n } from '../config';
+import { i18n, LIMITS } from '../config';
 import {
   jsonResponse,
   validateRecommendRequest,
@@ -27,6 +27,7 @@ import {
   isValidVPSBenchmark,
   isValidTechSpec,
   isValidAIRecommendation,
+  sanitizeForAIPrompt,
   DEFAULT_REGION_FILTER_SQL
 } from '../utils';
 
@@ -40,7 +41,7 @@ export async function handleRecommend(
   try {
     // Check request body size to prevent large payload attacks
     const contentLength = request.headers.get('Content-Length');
-    if (contentLength && parseInt(contentLength, 10) > 10240) { // 10KB limit
+    if (contentLength && parseInt(contentLength, 10) > LIMITS.MAX_REQUEST_BODY_BYTES) {
       return jsonResponse(
         { error: 'Request body too large', max_size: '10KB' },
         413,
@@ -52,7 +53,7 @@ export async function handleRecommend(
     const bodyText = await request.text();
     const actualBodySize = new TextEncoder().encode(bodyText).length;
 
-    if (actualBodySize > 10240) { // 10KB limit
+    if (actualBodySize > LIMITS.MAX_REQUEST_BODY_BYTES) {
       return jsonResponse(
         { error: 'Request body too large', max_size: '10KB', actual_size: actualBodySize },
         413,
@@ -345,7 +346,7 @@ async function queryCandidateServers(
     JOIN providers p ON it.provider_id = p.id
     JOIN pricing pr ON pr.instance_type_id = it.id
     JOIN regions r ON pr.region_id = r.id
-    WHERE p.id IN (1, 2)  -- Linode, Vultr only
+    WHERE LOWER(p.name) IN ('linode', 'vultr')  -- Linode, Vultr only
   `;
 
   const params: (string | number)[] = [];
@@ -442,10 +443,10 @@ async function queryCandidateServers(
   }
 
   // Group by instance + region to show each server per region
-  // For heavy/very_heavy bandwidth, prioritize Linode (p.id=1) due to generous bandwidth allowance
+  // For heavy/very_heavy bandwidth, prioritize Linode due to generous bandwidth allowance
   const isHighBandwidth = bandwidthEstimate?.category === 'heavy' || bandwidthEstimate?.category === 'very_heavy';
   const orderByClause = isHighBandwidth
-    ? `ORDER BY CASE WHEN p.id = 1 THEN 0 ELSE 1 END, monthly_price ASC`
+    ? `ORDER BY CASE WHEN LOWER(p.name) = 'linode' THEN 0 ELSE 1 END, monthly_price ASC`
     : `ORDER BY monthly_price ASC`;
   query += ` GROUP BY it.id, r.id ${orderByClause} LIMIT 50`;
 
@@ -601,59 +602,6 @@ function getBenchmarkReference(
   };
 }
 
-/**
- * Query VPS benchmarks - prioritize matching provider
- */
-async function queryVPSBenchmarks(
-  db: D1Database,
-  vcpu: number,
-  memoryGb: number,
-  providerHint?: string
-): Promise<VPSBenchmark[]> {
-  const vcpuMin = Math.max(1, vcpu - 1);
-  const vcpuMax = vcpu + 2;
-  const memMin = Math.max(1, memoryGb - 2);
-  const memMax = memoryGb + 4;
-
-  // First try to find benchmarks from the same provider
-  if (providerHint) {
-    const providerQuery = `
-      SELECT *
-      FROM vps_benchmarks
-      WHERE (LOWER(provider_name) LIKE ? ESCAPE '\\' OR LOWER(plan_name) LIKE ? ESCAPE '\\')
-      ORDER BY gb6_single_normalized DESC
-      LIMIT 20
-    `;
-    const escapedHint = escapeLikePattern(providerHint.toLowerCase());
-    const providerPattern = `%${escapedHint}%`;
-    const providerResult = await db.prepare(providerQuery).bind(providerPattern, providerPattern).all();
-
-    if (providerResult.success && providerResult.results.length > 0) {
-      // Validate each result with type guard
-      return (providerResult.results as unknown[]).filter(isValidVPSBenchmark);
-    }
-  }
-
-  // Fallback: Find VPS with similar specs
-  const query = `
-    SELECT *
-    FROM vps_benchmarks
-    WHERE vcpu >= ? AND vcpu <= ?
-      AND memory_gb >= ? AND memory_gb <= ?
-    ORDER BY gb6_single_normalized DESC
-    LIMIT 10
-  `;
-
-  const result = await db.prepare(query).bind(vcpuMin, vcpuMax, memMin, memMax).all();
-
-  if (!result.success) {
-    return [];
-  }
-
-  // Validate each result with type guard
-  return (result.results as unknown[]).filter(isValidVPSBenchmark);
-}
-
 /**
  * Query VPS benchmarks in a single batched query
  * Consolidates multiple provider-specific queries into one for better performance
@@ -930,13 +878,17 @@ ${languageInstruction}`;
 
   console.log(`[AI] Filtered ${candidates.length} candidates to ${topCandidates.length} for AI analysis`);
 
+  // Sanitize user inputs to prevent prompt injection
+  const sanitizedTechStack = req.tech_stack.map(t => sanitizeForAIPrompt(t, 50)).join(', ');
+  const sanitizedUseCase = sanitizeForAIPrompt(req.use_case, 200);
+
   const userPrompt = `Analyze these server options and recommend the top 3 best matches.
 
 ## User Requirements
-- Tech Stack: ${req.tech_stack.join(', ')}
+- Tech Stack: ${sanitizedTechStack}
 - Expected Concurrent Users: ${req.expected_users} ${req.traffic_pattern === 'spiky' ? '(with traffic spikes)' : req.traffic_pattern === 'growing' ? '(growing user base)' : '(steady traffic)'}
 - **Estimated DAU (Daily Active Users)**: ${bandwidthEstimate.estimated_dau_min.toLocaleString()}-${bandwidthEstimate.estimated_dau_max.toLocaleString()}명 (동시 접속 ${req.expected_users}명 기준)
-- Use Case: ${req.use_case}
+- Use Case: ${sanitizedUseCase}
 - Traffic Pattern: ${req.traffic_pattern || 'steady'}
 - **Estimated Monthly Bandwidth**: ${bandwidthEstimate.monthly_tb >= 1 ? `${bandwidthEstimate.monthly_tb} TB` : `${bandwidthEstimate.monthly_gb} GB`} (${bandwidthEstimate.category})
 ${isHighTraffic ? `- ⚠️ HIGH BANDWIDTH WORKLOAD (${bandwidthEstimate.monthly_tb} TB/month): MUST recommend Linode over Vultr. Linode includes 1-6TB/month transfer vs Vultr overage charges ($0.01/GB). Bandwidth cost savings > base price difference.` : ''}
@@ -1182,19 +1134,38 @@ The option with the LOWEST TOTAL MONTHLY COST (including bandwidth) should have
 /**
  * Parse AI response and extract JSON
  */
-function parseAIResponse(response: any): AIRecommendationResponse {
+function parseAIResponse(response: unknown): AIRecommendationResponse {
   try {
     // Handle different response formats
     let content: string;
 
     if (typeof response === 'string') {
       content = response;
-    } else if (response.response) {
-      content = response.response;
-    } else if (response.result && response.result.response) {
-      content = response.result.response;
-    } else if (response.choices && response.choices[0]?.message?.content) {
-      content = response.choices[0].message.content;
+    } else if (typeof response === 'object' && response !== null) {
+      // Type guard for response object with different structures
+      const resp = response as Record<string, unknown>;
+
+      if (typeof resp.response === 'string') {
+        content = resp.response;
+      } else if (typeof resp.result === 'object' && resp.result !== null) {
+        const result = resp.result as Record<string, unknown>;
+        if (typeof result.response === 'string') {
+          content = result.response;
+        } else {
+          throw new Error('Unexpected AI response format');
+        }
+      } else if (Array.isArray(resp.choices) && resp.choices.length > 0) {
+        const choice = resp.choices[0] as Record<string, unknown>;
+        const message = choice?.message as Record<string, unknown>;
+        if (typeof message?.content === 'string') {
+          content = message.content;
+        } else {
+          throw new Error('Unexpected AI response format');
+        }
+      } else {
+        console.error('[AI] Unexpected response format:', response);
+        throw new Error('Unexpected AI response format');
+      }
     } else {
       console.error('[AI] Unexpected response format:', response);
       throw new Error('Unexpected AI response format');