From f7046f4c66adaf22105c7aca6b68fc7844145716 Mon Sep 17 00:00:00 2001 From: kappa Date: Wed, 11 Feb 2026 18:09:13 +0900 Subject: [PATCH] Add RAG semantic search and proactive event notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement hybrid knowledge search using Cloudflare Vectorize + Workers AI embeddings (bge-base-en-v1.5, 768d) merged with existing D1 LIKE queries, with graceful degradation when Vectorize is unavailable. Add admin API endpoints for batch/single article indexing. Add 4 proactive notification cron jobs: server status changes, deposit confirmation/rejection alerts, pending payment reminders (1h+), and bank deposit matching notifications — all with DB-column-based deduplication. Co-Authored-By: Claude Opus 4.6 --- schema.sql | 24 ++++ src/index.ts | 12 +- src/routes/api.ts | 64 +++++++++ src/services/cron-jobs.ts | 247 ++++++++++++++++++++++++++++++++++- src/services/embedding.ts | 245 ++++++++++++++++++++++++++++++++++ src/services/notification.ts | 17 +++ src/tools/knowledge-tool.ts | 183 +++++++++++++++++++++----- src/types.ts | 4 + wrangler.toml | 6 + 9 files changed, 768 insertions(+), 34 deletions(-) create mode 100644 src/services/embedding.ts diff --git a/schema.sql b/schema.sql index 8d4a054..1141e6d 100644 --- a/schema.sql +++ b/schema.sql @@ -52,6 +52,8 @@ CREATE TABLE IF NOT EXISTS transactions ( reference_id INTEGER, confirmed_by INTEGER, confirmed_at DATETIME, + status_notified_at DATETIME, + reminder_sent_at DATETIME, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (user_id) REFERENCES users(id), FOREIGN KEY (confirmed_by) REFERENCES users(id) @@ -68,6 +70,7 @@ CREATE TABLE IF NOT EXISTS bank_notifications ( transaction_time DATETIME, raw_message TEXT, matched_transaction_id INTEGER, + match_notified_at DATETIME, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (matched_transaction_id) REFERENCES transactions(id) ); @@ -107,6 +110,7 @@ CREATE TABLE IF NOT EXISTS servers ( provisioned_at DATETIME, terminated_at DATETIME, expires_at DATETIME, + last_notified_status TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (user_id) REFERENCES users(id) @@ -194,6 +198,8 @@ CREATE TABLE IF NOT EXISTS knowledge_articles ( tags TEXT, language TEXT DEFAULT 'ko', is_active INTEGER DEFAULT 1, + embedding_indexed INTEGER DEFAULT 0, + embedding_indexed_at DATETIME, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); @@ -335,3 +341,21 @@ CREATE INDEX IF NOT EXISTS idx_onboarding_expires ON onboarding_sessions(expires CREATE INDEX IF NOT EXISTS idx_troubleshoot_expires ON troubleshoot_sessions(expires_at); CREATE INDEX IF NOT EXISTS idx_asset_expires ON asset_sessions(expires_at); CREATE INDEX IF NOT EXISTS idx_billing_expires ON billing_sessions(expires_at); + +---------------------------------------------------------------------- +-- Notification Tracking Columns (Migration) +---------------------------------------------------------------------- + +-- servers: 상태 변경 알림 추적 +-- ALTER TABLE servers ADD COLUMN last_notified_status TEXT; + +-- transactions: 입금 승인/거부 알림 + 리마인더 추적 +-- ALTER TABLE transactions ADD COLUMN status_notified_at DATETIME; +-- ALTER TABLE transactions ADD COLUMN reminder_sent_at DATETIME; + +-- bank_notifications: 매칭 알림 추적 +-- ALTER TABLE bank_notifications ADD COLUMN match_notified_at DATETIME; + +-- Knowledge articles: 임베딩 인덱싱 추적 +-- ALTER TABLE knowledge_articles ADD COLUMN embedding_indexed INTEGER DEFAULT 0; +-- ALTER TABLE knowledge_articles ADD COLUMN embedding_indexed_at DATETIME; diff --git a/src/index.ts b/src/index.ts index ac616eb..9663634 100644 --- a/src/index.ts +++ b/src/index.ts @@ -124,6 +124,10 @@ export default { archiveOldConversations, cleanupStaleOrders, monitoringCheck, + notifyServerStatusChanges, + notifyTransactionStatusChanges, + sendPaymentReminders, + notifyBankMatches, } = await import('./services/cron-jobs'); try { @@ -135,14 +139,18 @@ export default { await cleanupExpiredSessions(env); break; - // Every 5 minutes: stale session/order cleanup + // Every 5 minutes: stale cleanup + proactive notifications case '*/5 * * * *': await cleanupStaleOrders(env); + await notifyServerStatusChanges(env); + await notifyTransactionStatusChanges(env); + await notifyBankMatches(env); break; - // Every hour: monitoring checks + // Every hour: monitoring checks + payment reminders case '0 * * * *': await monitoringCheck(env); + await sendPaymentReminders(env); break; default: diff --git a/src/routes/api.ts b/src/routes/api.ts index 0b35c28..c73cc26 100644 --- a/src/routes/api.ts +++ b/src/routes/api.ts @@ -2,6 +2,7 @@ import { Hono } from 'hono'; import type { Env, User, Transaction } from '../types'; import { timingSafeEqual } from '../security'; import { getPendingActions } from '../services/pending-actions'; +import { indexArticle, batchIndexArticles } from '../services/embedding'; import { sendMessage } from '../telegram'; import { createLogger } from '../utils/logger'; @@ -171,4 +172,67 @@ api.post('/broadcast', async (c) => { } }); +// POST /api/knowledge/index - Batch index all knowledge articles +api.post('/knowledge/index', async (c) => { + try { + if (!c.env.VECTORIZE) { + return c.json({ error: 'VECTORIZE binding not configured' }, 400); + } + + const body = await c.req.json<{ force_reindex?: boolean }>().catch(() => ({ force_reindex: false })); + const result = await batchIndexArticles(c.env, { + forceReindex: body.force_reindex ?? false, + }); + + logger.info('Knowledge batch indexing completed', { + indexed: result.indexed, + failed: result.failed, + }); + return c.json(result); + } catch (error) { + logger.error('Knowledge batch indexing failed', error as Error); + return c.json({ error: 'Internal error' }, 500); + } +}); + +// POST /api/knowledge/:id/index - Index a single knowledge article +api.post('/knowledge/:id/index', async (c) => { + try { + if (!c.env.VECTORIZE) { + return c.json({ error: 'VECTORIZE binding not configured' }, 400); + } + + const articleId = parseInt(c.req.param('id')); + if (isNaN(articleId)) { + return c.json({ error: 'Invalid article ID' }, 400); + } + + const article = await c.env.DB + .prepare( + `SELECT id, category, title, content, tags + FROM knowledge_articles WHERE id = ? AND is_active = 1` + ) + .bind(articleId) + .first<{ id: number; category: string; title: string; content: string; tags: string | null }>(); + + if (!article) { + return c.json({ error: 'Article not found' }, 404); + } + + await indexArticle( + c.env, + article.id, + article.title, + article.content, + article.category, + article.tags ?? undefined + ); + + return c.json({ success: true, articleId: article.id }); + } catch (error) { + logger.error('Knowledge article indexing failed', error as Error); + return c.json({ error: 'Internal error' }, 500); + } +}); + export { api as apiRouter }; diff --git a/src/services/cron-jobs.ts b/src/services/cron-jobs.ts index 6b1e170..4427dd2 100644 --- a/src/services/cron-jobs.ts +++ b/src/services/cron-jobs.ts @@ -10,7 +10,7 @@ import { createLogger } from '../utils/logger'; import { sendMessage } from '../telegram'; -import { notifyAdmins } from './notification'; +import { notifyAdmins, notifyUser } from './notification'; import type { Env } from '../types'; const logger = createLogger('cron-jobs'); @@ -294,6 +294,251 @@ export async function monitoringCheck(env: Env): Promise { } } +/** + * Notify users when their server status changes. + * Checks servers where status != last_notified_status. + */ +export async function notifyServerStatusChanges(env: Env): Promise { + try { + const db = env.DB; + + const changed = await db + .prepare( + `SELECT s.id, s.label, s.ip_address, s.status, s.last_notified_status, u.telegram_id + FROM servers s + JOIN users u ON s.user_id = u.id + WHERE u.is_blocked = 0 + AND s.status != 'pending' + AND (s.last_notified_status IS NULL OR s.status != s.last_notified_status) + LIMIT 50` + ) + .all<{ + id: number; + label: string | null; + ip_address: string | null; + status: string; + last_notified_status: string | null; + telegram_id: string; + }>(); + + let notified = 0; + for (const s of changed.results) { + const name = s.label ?? s.ip_address ?? `서버 #${s.id}`; + const statusEmoji: Record = { + running: '🟢', + stopped: '🔴', + provisioning: '🟡', + terminated: '⚫', + failed: '🔴', + }; + const emoji = statusEmoji[s.status] ?? '🔵'; + + const sent = await notifyUser( + env, + s.telegram_id, + `${emoji} 서버 상태 변경\n\n` + + `서버: ${name}\n` + + `상태: ${s.last_notified_status ?? '(초기)'} → ${s.status}` + ); + + if (sent) { + await db + .prepare('UPDATE servers SET last_notified_status = ? WHERE id = ?') + .bind(s.status, s.id) + .run(); + notified++; + } + } + + if (notified > 0) { + logger.info('Server status notifications sent', { notified }); + } + } catch (error) { + logger.error('Server status notification job failed', error as Error); + } +} + +/** + * Notify users when their deposit transactions are confirmed or rejected. + */ +export async function notifyTransactionStatusChanges(env: Env): Promise { + try { + const db = env.DB; + + const changed = await db + .prepare( + `SELECT t.id, t.amount, t.status, t.description, u.telegram_id + FROM transactions t + JOIN users u ON t.user_id = u.id + WHERE u.is_blocked = 0 + AND t.type = 'deposit' + AND t.status IN ('confirmed', 'rejected') + AND t.status_notified_at IS NULL + LIMIT 50` + ) + .all<{ + id: number; + amount: number; + status: string; + description: string | null; + telegram_id: string; + }>(); + + let notified = 0; + for (const t of changed.results) { + const isConfirmed = t.status === 'confirmed'; + const emoji = isConfirmed ? '✅' : '❌'; + const statusText = isConfirmed ? '승인' : '거부'; + + const message = [ + `${emoji} 입금 ${statusText}`, + '', + `거래 #${t.id}`, + `금액: ${t.amount.toLocaleString()}원`, + `상태: ${statusText}`, + ]; + if (t.description) { + message.push(`사유: ${t.description}`); + } + + const sent = await notifyUser(env, t.telegram_id, message.join('\n')); + + if (sent) { + await db + .prepare('UPDATE transactions SET status_notified_at = CURRENT_TIMESTAMP WHERE id = ?') + .bind(t.id) + .run(); + notified++; + } + } + + if (notified > 0) { + logger.info('Transaction status notifications sent', { notified }); + } + } catch (error) { + logger.error('Transaction status notification job failed', error as Error); + } +} + +/** + * Send reminders for deposits pending more than 1 hour. + */ +export async function sendPaymentReminders(env: Env): Promise { + try { + const db = env.DB; + + const pending = await db + .prepare( + `SELECT t.id, t.amount, t.depositor_name, t.created_at, u.telegram_id + FROM transactions t + JOIN users u ON t.user_id = u.id + WHERE u.is_blocked = 0 + AND t.type = 'deposit' + AND t.status = 'pending' + AND t.created_at < datetime('now', '-1 hours') + AND t.reminder_sent_at IS NULL + LIMIT 30` + ) + .all<{ + id: number; + amount: number; + depositor_name: string | null; + created_at: string; + telegram_id: string; + }>(); + + let notified = 0; + for (const t of pending.results) { + const sent = await notifyUser( + env, + t.telegram_id, + `⏰ 입금 대기 알림\n\n` + + `거래 #${t.id}\n` + + `금액: ${t.amount.toLocaleString()}원\n` + + `입금자: ${t.depositor_name ?? '미입력'}\n\n` + + `1시간 이상 대기 중입니다. 입금을 완료해주시거나, 취소가 필요하시면 알려주세요.` + ); + + if (sent) { + await db + .prepare('UPDATE transactions SET reminder_sent_at = CURRENT_TIMESTAMP WHERE id = ?') + .bind(t.id) + .run(); + notified++; + } + } + + if (notified > 0) { + logger.info('Payment reminders sent', { notified }); + } + } catch (error) { + logger.error('Payment reminder job failed', error as Error); + } +} + +/** + * Notify users and admins when bank deposits are matched to transactions. + */ +export async function notifyBankMatches(env: Env): Promise { + try { + const db = env.DB; + + const matched = await db + .prepare( + `SELECT bn.id as notification_id, bn.amount, bn.depositor_name, bn.bank_name, + bn.matched_transaction_id, t.user_id, u.telegram_id + FROM bank_notifications bn + JOIN transactions t ON bn.matched_transaction_id = t.id + JOIN users u ON t.user_id = u.id + WHERE bn.matched_transaction_id IS NOT NULL + AND bn.match_notified_at IS NULL + AND u.is_blocked = 0 + LIMIT 30` + ) + .all<{ + notification_id: number; + amount: number; + depositor_name: string; + bank_name: string | null; + matched_transaction_id: number; + user_id: number; + telegram_id: string; + }>(); + + let notified = 0; + for (const m of matched.results) { + const userMessage = + `🏦 입금 매칭 완료\n\n` + + `입금자: ${m.depositor_name}\n` + + `금액: ${m.amount.toLocaleString()}원\n` + + (m.bank_name ? `은행: ${m.bank_name}\n` : '') + + `거래 #${m.matched_transaction_id}에 매칭되었습니다.`; + + const sent = await notifyUser(env, m.telegram_id, userMessage); + + // Also notify admins + await notifyAdmins( + env, + `🏦 입금 매칭: ${m.depositor_name} ${m.amount.toLocaleString()}원 → 거래 #${m.matched_transaction_id}` + ); + + if (sent) { + await db + .prepare('UPDATE bank_notifications SET match_notified_at = CURRENT_TIMESTAMP WHERE id = ?') + .bind(m.notification_id) + .run(); + notified++; + } + } + + if (notified > 0) { + logger.info('Bank match notifications sent', { notified }); + } + } catch (error) { + logger.error('Bank match notification job failed', error as Error); + } +} + // Legacy aliases for backward compatibility export { sendExpiryNotifications as notifyExpiringAssets }; export { cleanupStaleOrders as cleanupStalePending }; diff --git a/src/services/embedding.ts b/src/services/embedding.ts new file mode 100644 index 0000000..d3df1a3 --- /dev/null +++ b/src/services/embedding.ts @@ -0,0 +1,245 @@ +/** + * Embedding Service - RAG 시맨틱 검색 + * + * Workers AI (@cf/baai/bge-base-en-v1.5) + Cloudflare Vectorize + * - 768차원 임베딩 생성 + * - Vectorize 인덱싱 및 검색 + */ + +import { createLogger } from '../utils/logger'; +import type { Env, KnowledgeArticle } from '../types'; + +const logger = createLogger('embedding'); + +const EMBEDDING_MODEL = '@cf/baai/bge-base-en-v1.5' as const; +const BATCH_SIZE = 50; +const VECTOR_NAMESPACE = 'knowledge'; + +interface SemanticSearchResult { + id: number; + score: number; + category?: string; + title?: string; +} + +interface IndexResult { + indexed: number; + failed: number; + errors: string[]; +} + +/** + * Generate embedding for a single text using Workers AI. + */ +export async function generateEmbedding(ai: Ai, text: string): Promise { + const result = await ai.run(EMBEDDING_MODEL, { + text: [text], + }); + + const data = result as { data: number[][] }; + if (!data.data?.[0]) { + throw new Error('Failed to generate embedding: empty result'); + } + + return data.data[0]; +} + +/** + * Generate embeddings for multiple texts in batches of BATCH_SIZE. + */ +export async function generateEmbeddings( + ai: Ai, + texts: string[] +): Promise { + const allEmbeddings: number[][] = []; + + for (let i = 0; i < texts.length; i += BATCH_SIZE) { + const batch = texts.slice(i, i + BATCH_SIZE); + const result = await ai.run(EMBEDDING_MODEL, { + text: batch, + }); + + const data = result as { data: number[][] }; + if (!data.data) { + throw new Error(`Failed to generate embeddings for batch starting at index ${i}`); + } + + allEmbeddings.push(...data.data); + } + + return allEmbeddings; +} + +/** + * Build the text representation of an article for embedding. + */ +function buildArticleText(title: string, content: string, category?: string, tags?: string): string { + const parts = []; + if (category) parts.push(`[${category}]`); + parts.push(title); + parts.push(content); + if (tags) parts.push(`Tags: ${tags}`); + return parts.join('\n'); +} + +/** + * Index a single article into Vectorize. + */ +export async function indexArticle( + env: Env, + articleId: number, + title: string, + content: string, + category?: string, + tags?: string +): Promise { + if (!env.VECTORIZE) { + throw new Error('VECTORIZE binding not configured'); + } + + const text = buildArticleText(title, content, category, tags); + const embedding = await generateEmbedding(env.AI, text); + + const vector: VectorizeVector = { + id: `${VECTOR_NAMESPACE}:${articleId}`, + values: embedding, + metadata: { + articleId, + category: category ?? '', + title, + }, + }; + + await env.VECTORIZE.upsert([vector]); + + // Mark as indexed in DB + await env.DB + .prepare( + `UPDATE knowledge_articles + SET embedding_indexed = 1, embedding_indexed_at = CURRENT_TIMESTAMP + WHERE id = ?` + ) + .bind(articleId) + .run(); + + logger.info('Article indexed', { articleId, title }); +} + +/** + * Batch index all unindexed (or all if forceReindex) articles. + */ +export async function batchIndexArticles( + env: Env, + options?: { forceReindex?: boolean } +): Promise { + if (!env.VECTORIZE) { + throw new Error('VECTORIZE binding not configured'); + } + + const forceReindex = options?.forceReindex ?? false; + + const query = forceReindex + ? `SELECT id, category, title, content, tags FROM knowledge_articles WHERE is_active = 1` + : `SELECT id, category, title, content, tags FROM knowledge_articles WHERE is_active = 1 AND embedding_indexed = 0`; + + const articles = await env.DB + .prepare(query) + .all>(); + + if (articles.results.length === 0) { + return { indexed: 0, failed: 0, errors: [] }; + } + + const result: IndexResult = { indexed: 0, failed: 0, errors: [] }; + + // Process in batches + for (let i = 0; i < articles.results.length; i += BATCH_SIZE) { + const batch = articles.results.slice(i, i + BATCH_SIZE); + + try { + const texts = batch.map((a) => + buildArticleText(a.title, a.content, a.category, a.tags ?? undefined) + ); + + const embeddings = await generateEmbeddings(env.AI, texts); + + const vectors: VectorizeVector[] = batch.map((a, idx) => ({ + id: `${VECTOR_NAMESPACE}:${a.id}`, + values: embeddings[idx], + metadata: { + articleId: a.id, + category: a.category, + title: a.title, + }, + })); + + await env.VECTORIZE.upsert(vectors); + + // Mark batch as indexed + const ids = batch.map((a) => a.id); + await env.DB + .prepare( + `UPDATE knowledge_articles + SET embedding_indexed = 1, embedding_indexed_at = CURRENT_TIMESTAMP + WHERE id IN (${ids.map(() => '?').join(',')})` + ) + .bind(...ids) + .run(); + + result.indexed += batch.length; + } catch (error) { + const errMsg = error instanceof Error ? error.message : String(error); + result.failed += batch.length; + result.errors.push(`Batch ${i}-${i + batch.length}: ${errMsg}`); + logger.error('Batch indexing failed', error as Error, { + batchStart: i, + batchSize: batch.length, + }); + } + } + + logger.info('Batch indexing completed', { + total: articles.results.length, + indexed: result.indexed, + failed: result.failed, + }); + + return result; +} + +/** + * Semantic search using Vectorize. + */ +export async function semanticSearch( + env: Env, + query: string, + options?: { topK?: number; category?: string } +): Promise { + if (!env.VECTORIZE) { + return []; + } + + const topK = options?.topK ?? 10; + + const queryEmbedding = await generateEmbedding(env.AI, query); + + const filter: VectorizeVectorMetadataFilter = {}; + if (options?.category) { + filter.category = options.category; + } + + const results = await env.VECTORIZE.query(queryEmbedding, { + topK, + returnMetadata: 'all', + ...(options?.category ? { filter } : {}), + }); + + return results.matches.map((match) => ({ + id: typeof match.metadata?.articleId === 'number' + ? match.metadata.articleId + : parseInt(String(match.id).split(':')[1] ?? '0'), + score: match.score, + category: match.metadata?.category as string | undefined, + title: match.metadata?.title as string | undefined, + })); +} diff --git a/src/services/notification.ts b/src/services/notification.ts index e44db1f..76f3bfe 100644 --- a/src/services/notification.ts +++ b/src/services/notification.ts @@ -37,6 +37,23 @@ export async function notifyAdmins(env: Env, message: string): Promise { } } +/** + * Send a notification to a specific user by telegram_id. + */ +export async function notifyUser( + env: Env, + telegramId: string, + message: string +): Promise { + try { + await sendTelegramMessage(env.BOT_TOKEN, telegramId, message); + return true; + } catch (error) { + logger.error('User notification failed', error as Error, { telegramId }); + return false; + } +} + async function sendTelegramMessage( botToken: string, chatId: string, diff --git a/src/tools/knowledge-tool.ts b/src/tools/knowledge-tool.ts index 9d3957b..52743ed 100644 --- a/src/tools/knowledge-tool.ts +++ b/src/tools/knowledge-tool.ts @@ -1,4 +1,5 @@ import { createLogger } from '../utils/logger'; +import { semanticSearch } from '../services/embedding'; import type { Env, ToolDefinition, KnowledgeArticle } from '../types'; const logger = createLogger('knowledge-tool'); @@ -26,57 +27,177 @@ export const searchKnowledgeTool: ToolDefinition = { }, }; +type ArticleRow = Pick; + +interface ScoredArticle extends ArticleRow { + score: number; +} + +const KEYWORD_WEIGHT = 0.6; +const SEMANTIC_WEIGHT = 0.7; +const BOTH_BOOST = 0.15; + +/** + * Keyword search using D1 LIKE queries. + */ +async function keywordSearch( + db: D1Database, + query: string, + category?: string +): Promise { + const searchTerm = `%${query}%`; + let result; + + if (category) { + result = await db + .prepare( + `SELECT id, category, title, content, tags + FROM knowledge_articles + WHERE is_active = 1 AND category = ? + AND (title LIKE ? OR content LIKE ?) + ORDER BY updated_at DESC + LIMIT 10` + ) + .bind(category, searchTerm, searchTerm) + .all(); + } else { + result = await db + .prepare( + `SELECT id, category, title, content, tags + FROM knowledge_articles + WHERE is_active = 1 + AND (title LIKE ? OR content LIKE ?) + ORDER BY updated_at DESC + LIMIT 10` + ) + .bind(searchTerm, searchTerm) + .all(); + } + + // Score: title match gets higher score than content-only match + return result.results.map((a) => { + const titleMatch = a.title.toLowerCase().includes(query.toLowerCase()); + return { + ...a, + score: titleMatch ? KEYWORD_WEIGHT : KEYWORD_WEIGHT * 0.8, + }; + }); +} + +/** + * Merge keyword and semantic search results. + * Articles found by both methods get a score boost. + */ +function mergeResults( + keywordResults: ScoredArticle[], + semanticResults: { id: number; score: number }[], + allArticles: Map +): ScoredArticle[] { + const mergedMap = new Map(); + + // Add keyword scores + for (const r of keywordResults) { + mergedMap.set(r.id, r.score); + allArticles.set(r.id, r); + } + + // Add semantic scores + for (const r of semanticResults) { + const existing = mergedMap.get(r.id); + if (existing !== undefined) { + // Found in both: boost + mergedMap.set(r.id, existing + r.score * SEMANTIC_WEIGHT + BOTH_BOOST); + } else { + mergedMap.set(r.id, r.score * SEMANTIC_WEIGHT); + } + } + + // Sort by score descending and take top 5 + const sorted = [...mergedMap.entries()] + .sort((a, b) => b[1] - a[1]) + .slice(0, 5); + + return sorted + .map(([id, score]) => { + const article = allArticles.get(id); + if (!article) return null; + return { ...article, score }; + }) + .filter((a): a is ScoredArticle => a !== null); +} + export async function executeSearchKnowledge( args: { query: string; category?: string }, - _env?: Env, + env?: Env, _userId?: string, db?: D1Database ): Promise { try { if (!db) return '데이터베이스 연결 정보가 없습니다.'; - const searchTerm = `%${args.query}%`; - let result; + // Keyword search (always available) + const keywordResults = await keywordSearch(db, args.query, args.category); - if (args.category) { - result = await db - .prepare( - `SELECT id, category, title, content, tags - FROM knowledge_articles - WHERE is_active = 1 AND category = ? - AND (title LIKE ? OR content LIKE ?) - ORDER BY updated_at DESC - LIMIT 5` - ) - .bind(args.category, searchTerm, searchTerm) - .all>(); - } else { - result = await db - .prepare( - `SELECT id, category, title, content, tags - FROM knowledge_articles - WHERE is_active = 1 - AND (title LIKE ? OR content LIKE ?) - ORDER BY updated_at DESC - LIMIT 5` - ) - .bind(searchTerm, searchTerm) - .all>(); + // Semantic search (when VECTORIZE is available) + let semanticResults: { id: number; score: number }[] = []; + if (env?.VECTORIZE) { + try { + semanticResults = await semanticSearch(env, args.query, { + topK: 10, + category: args.category, + }); + } catch (error) { + logger.error('Semantic search failed, using keyword only', error as Error); + } } - if (result.results.length === 0) { + // If we have semantic results, fetch their full article data + const allArticles = new Map(); + for (const a of keywordResults) { + allArticles.set(a.id, a); + } + + if (semanticResults.length > 0) { + // Fetch articles for semantic results not already in keyword results + const missingIds = semanticResults + .map((r) => r.id) + .filter((id) => !allArticles.has(id)); + + if (missingIds.length > 0) { + const placeholders = missingIds.map(() => '?').join(','); + const fetched = await db + .prepare( + `SELECT id, category, title, content, tags + FROM knowledge_articles + WHERE id IN (${placeholders}) AND is_active = 1` + ) + .bind(...missingIds) + .all(); + + for (const a of fetched.results) { + allArticles.set(a.id, a); + } + } + } + + // Merge results + const finalResults = semanticResults.length > 0 + ? mergeResults(keywordResults, semanticResults, allArticles) + : keywordResults.slice(0, 5).map((a) => ({ ...a })); + + if (finalResults.length === 0) { return `"${args.query}"에 대한 검색 결과가 없습니다.`; } - const articles = result.results.map((a) => { + const articles = finalResults.map((a) => { const tags = a.tags ? ` [${a.tags}]` : ''; - // Truncate content for display const preview = a.content.length > 200 ? a.content.substring(0, 200) + '...' : a.content; return `[${a.category}] ${a.title}${tags}\n${preview}`; }); - return `검색 결과 (${result.results.length}건):\n\n${articles.join('\n\n---\n\n')}`; + const searchMode = semanticResults.length > 0 ? '하이브리드' : '키워드'; + return `검색 결과 (${finalResults.length}건, ${searchMode} 검색):\n\n${articles.join('\n\n---\n\n')}`; } catch (error) { logger.error('Knowledge search error', error as Error, { query: args.query }); return '지식 베이스 검색 중 오류가 발생했습니다.'; diff --git a/src/types.ts b/src/types.ts index 109b9c6..11e055a 100644 --- a/src/types.ts +++ b/src/types.ts @@ -27,6 +27,8 @@ export interface Env { CACHE_KV: KVNamespace; // R2 R2_BUCKET: R2Bucket; + // Vectorize + VECTORIZE?: VectorizeIndex; } // ============================================ @@ -296,6 +298,8 @@ export interface KnowledgeArticle { tags: string | null; language: string; is_active: number; + embedding_indexed: number; + embedding_indexed_at: string | null; created_at: string; updated_at: string; } diff --git a/wrangler.toml b/wrangler.toml index d7783c1..9dcddbd 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -45,6 +45,12 @@ bucket_name = "ai-support-assets" binding = "CLOUD_ORCHESTRATOR" service = "cloud-orchestrator" +# Vectorize (RAG semantic search) +# Create index: npx wrangler vectorize create knowledge-embeddings --dimensions=768 --metric=cosine +[[vectorize]] +binding = "VECTORIZE" +index_name = "knowledge-embeddings" + # Cron Triggers: # - 매일 자정(KST): 만료 알림 + 데이터 아카이빙 + 정합성 검증 # - 매 5분: pending 상태 자동 정리