Add RAG semantic search and proactive event notifications

Implement hybrid knowledge search using Cloudflare Vectorize + Workers AI
embeddings (bge-base-en-v1.5, 768d) merged with existing D1 LIKE queries,
with graceful degradation when Vectorize is unavailable. Add admin API
endpoints for batch/single article indexing.

Add 4 proactive notification cron jobs: server status changes, deposit
confirmation/rejection alerts, pending payment reminders (1h+), and bank
deposit matching notifications — all with DB-column-based deduplication.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
kappa
2026-02-11 18:09:13 +09:00
parent 1d6b64c9e4
commit f7046f4c66
9 changed files with 768 additions and 34 deletions

View File

@@ -52,6 +52,8 @@ CREATE TABLE IF NOT EXISTS transactions (
reference_id INTEGER,
confirmed_by INTEGER,
confirmed_at DATETIME,
status_notified_at DATETIME,
reminder_sent_at DATETIME,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users(id),
FOREIGN KEY (confirmed_by) REFERENCES users(id)
@@ -68,6 +70,7 @@ CREATE TABLE IF NOT EXISTS bank_notifications (
transaction_time DATETIME,
raw_message TEXT,
matched_transaction_id INTEGER,
match_notified_at DATETIME,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (matched_transaction_id) REFERENCES transactions(id)
);
@@ -107,6 +110,7 @@ CREATE TABLE IF NOT EXISTS servers (
provisioned_at DATETIME,
terminated_at DATETIME,
expires_at DATETIME,
last_notified_status TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users(id)
@@ -194,6 +198,8 @@ CREATE TABLE IF NOT EXISTS knowledge_articles (
tags TEXT,
language TEXT DEFAULT 'ko',
is_active INTEGER DEFAULT 1,
embedding_indexed INTEGER DEFAULT 0,
embedding_indexed_at DATETIME,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
@@ -335,3 +341,21 @@ CREATE INDEX IF NOT EXISTS idx_onboarding_expires ON onboarding_sessions(expires
CREATE INDEX IF NOT EXISTS idx_troubleshoot_expires ON troubleshoot_sessions(expires_at);
CREATE INDEX IF NOT EXISTS idx_asset_expires ON asset_sessions(expires_at);
CREATE INDEX IF NOT EXISTS idx_billing_expires ON billing_sessions(expires_at);
----------------------------------------------------------------------
-- Notification Tracking Columns (Migration)
----------------------------------------------------------------------
-- servers: 상태 변경 알림 추적
-- ALTER TABLE servers ADD COLUMN last_notified_status TEXT;
-- transactions: 입금 승인/거부 알림 + 리마인더 추적
-- ALTER TABLE transactions ADD COLUMN status_notified_at DATETIME;
-- ALTER TABLE transactions ADD COLUMN reminder_sent_at DATETIME;
-- bank_notifications: 매칭 알림 추적
-- ALTER TABLE bank_notifications ADD COLUMN match_notified_at DATETIME;
-- Knowledge articles: 임베딩 인덱싱 추적
-- ALTER TABLE knowledge_articles ADD COLUMN embedding_indexed INTEGER DEFAULT 0;
-- ALTER TABLE knowledge_articles ADD COLUMN embedding_indexed_at DATETIME;

View File

@@ -124,6 +124,10 @@ export default {
archiveOldConversations,
cleanupStaleOrders,
monitoringCheck,
notifyServerStatusChanges,
notifyTransactionStatusChanges,
sendPaymentReminders,
notifyBankMatches,
} = await import('./services/cron-jobs');
try {
@@ -135,14 +139,18 @@ export default {
await cleanupExpiredSessions(env);
break;
// Every 5 minutes: stale session/order cleanup
// Every 5 minutes: stale cleanup + proactive notifications
case '*/5 * * * *':
await cleanupStaleOrders(env);
await notifyServerStatusChanges(env);
await notifyTransactionStatusChanges(env);
await notifyBankMatches(env);
break;
// Every hour: monitoring checks
// Every hour: monitoring checks + payment reminders
case '0 * * * *':
await monitoringCheck(env);
await sendPaymentReminders(env);
break;
default:

View File

@@ -2,6 +2,7 @@ import { Hono } from 'hono';
import type { Env, User, Transaction } from '../types';
import { timingSafeEqual } from '../security';
import { getPendingActions } from '../services/pending-actions';
import { indexArticle, batchIndexArticles } from '../services/embedding';
import { sendMessage } from '../telegram';
import { createLogger } from '../utils/logger';
@@ -171,4 +172,67 @@ api.post('/broadcast', async (c) => {
}
});
// POST /api/knowledge/index - Batch index all knowledge articles
api.post('/knowledge/index', async (c) => {
try {
if (!c.env.VECTORIZE) {
return c.json({ error: 'VECTORIZE binding not configured' }, 400);
}
const body = await c.req.json<{ force_reindex?: boolean }>().catch(() => ({ force_reindex: false }));
const result = await batchIndexArticles(c.env, {
forceReindex: body.force_reindex ?? false,
});
logger.info('Knowledge batch indexing completed', {
indexed: result.indexed,
failed: result.failed,
});
return c.json(result);
} catch (error) {
logger.error('Knowledge batch indexing failed', error as Error);
return c.json({ error: 'Internal error' }, 500);
}
});
// POST /api/knowledge/:id/index - Index a single knowledge article
api.post('/knowledge/:id/index', async (c) => {
try {
if (!c.env.VECTORIZE) {
return c.json({ error: 'VECTORIZE binding not configured' }, 400);
}
const articleId = parseInt(c.req.param('id'));
if (isNaN(articleId)) {
return c.json({ error: 'Invalid article ID' }, 400);
}
const article = await c.env.DB
.prepare(
`SELECT id, category, title, content, tags
FROM knowledge_articles WHERE id = ? AND is_active = 1`
)
.bind(articleId)
.first<{ id: number; category: string; title: string; content: string; tags: string | null }>();
if (!article) {
return c.json({ error: 'Article not found' }, 404);
}
await indexArticle(
c.env,
article.id,
article.title,
article.content,
article.category,
article.tags ?? undefined
);
return c.json({ success: true, articleId: article.id });
} catch (error) {
logger.error('Knowledge article indexing failed', error as Error);
return c.json({ error: 'Internal error' }, 500);
}
});
export { api as apiRouter };

View File

@@ -10,7 +10,7 @@
import { createLogger } from '../utils/logger';
import { sendMessage } from '../telegram';
import { notifyAdmins } from './notification';
import { notifyAdmins, notifyUser } from './notification';
import type { Env } from '../types';
const logger = createLogger('cron-jobs');
@@ -294,6 +294,251 @@ export async function monitoringCheck(env: Env): Promise<void> {
}
}
/**
* Notify users when their server status changes.
* Checks servers where status != last_notified_status.
*/
export async function notifyServerStatusChanges(env: Env): Promise<void> {
try {
const db = env.DB;
const changed = await db
.prepare(
`SELECT s.id, s.label, s.ip_address, s.status, s.last_notified_status, u.telegram_id
FROM servers s
JOIN users u ON s.user_id = u.id
WHERE u.is_blocked = 0
AND s.status != 'pending'
AND (s.last_notified_status IS NULL OR s.status != s.last_notified_status)
LIMIT 50`
)
.all<{
id: number;
label: string | null;
ip_address: string | null;
status: string;
last_notified_status: string | null;
telegram_id: string;
}>();
let notified = 0;
for (const s of changed.results) {
const name = s.label ?? s.ip_address ?? `서버 #${s.id}`;
const statusEmoji: Record<string, string> = {
running: '🟢',
stopped: '🔴',
provisioning: '🟡',
terminated: '⚫',
failed: '🔴',
};
const emoji = statusEmoji[s.status] ?? '🔵';
const sent = await notifyUser(
env,
s.telegram_id,
`${emoji} <b>서버 상태 변경</b>\n\n` +
`서버: ${name}\n` +
`상태: ${s.last_notified_status ?? '(초기)'} → <b>${s.status}</b>`
);
if (sent) {
await db
.prepare('UPDATE servers SET last_notified_status = ? WHERE id = ?')
.bind(s.status, s.id)
.run();
notified++;
}
}
if (notified > 0) {
logger.info('Server status notifications sent', { notified });
}
} catch (error) {
logger.error('Server status notification job failed', error as Error);
}
}
/**
* Notify users when their deposit transactions are confirmed or rejected.
*/
export async function notifyTransactionStatusChanges(env: Env): Promise<void> {
try {
const db = env.DB;
const changed = await db
.prepare(
`SELECT t.id, t.amount, t.status, t.description, u.telegram_id
FROM transactions t
JOIN users u ON t.user_id = u.id
WHERE u.is_blocked = 0
AND t.type = 'deposit'
AND t.status IN ('confirmed', 'rejected')
AND t.status_notified_at IS NULL
LIMIT 50`
)
.all<{
id: number;
amount: number;
status: string;
description: string | null;
telegram_id: string;
}>();
let notified = 0;
for (const t of changed.results) {
const isConfirmed = t.status === 'confirmed';
const emoji = isConfirmed ? '✅' : '❌';
const statusText = isConfirmed ? '승인' : '거부';
const message = [
`${emoji} <b>입금 ${statusText}</b>`,
'',
`거래 #${t.id}`,
`금액: ${t.amount.toLocaleString()}`,
`상태: <b>${statusText}</b>`,
];
if (t.description) {
message.push(`사유: ${t.description}`);
}
const sent = await notifyUser(env, t.telegram_id, message.join('\n'));
if (sent) {
await db
.prepare('UPDATE transactions SET status_notified_at = CURRENT_TIMESTAMP WHERE id = ?')
.bind(t.id)
.run();
notified++;
}
}
if (notified > 0) {
logger.info('Transaction status notifications sent', { notified });
}
} catch (error) {
logger.error('Transaction status notification job failed', error as Error);
}
}
/**
* Send reminders for deposits pending more than 1 hour.
*/
export async function sendPaymentReminders(env: Env): Promise<void> {
try {
const db = env.DB;
const pending = await db
.prepare(
`SELECT t.id, t.amount, t.depositor_name, t.created_at, u.telegram_id
FROM transactions t
JOIN users u ON t.user_id = u.id
WHERE u.is_blocked = 0
AND t.type = 'deposit'
AND t.status = 'pending'
AND t.created_at < datetime('now', '-1 hours')
AND t.reminder_sent_at IS NULL
LIMIT 30`
)
.all<{
id: number;
amount: number;
depositor_name: string | null;
created_at: string;
telegram_id: string;
}>();
let notified = 0;
for (const t of pending.results) {
const sent = await notifyUser(
env,
t.telegram_id,
`⏰ <b>입금 대기 알림</b>\n\n` +
`거래 #${t.id}\n` +
`금액: ${t.amount.toLocaleString()}\n` +
`입금자: ${t.depositor_name ?? '미입력'}\n\n` +
`1시간 이상 대기 중입니다. 입금을 완료해주시거나, 취소가 필요하시면 알려주세요.`
);
if (sent) {
await db
.prepare('UPDATE transactions SET reminder_sent_at = CURRENT_TIMESTAMP WHERE id = ?')
.bind(t.id)
.run();
notified++;
}
}
if (notified > 0) {
logger.info('Payment reminders sent', { notified });
}
} catch (error) {
logger.error('Payment reminder job failed', error as Error);
}
}
/**
* Notify users and admins when bank deposits are matched to transactions.
*/
export async function notifyBankMatches(env: Env): Promise<void> {
try {
const db = env.DB;
const matched = await db
.prepare(
`SELECT bn.id as notification_id, bn.amount, bn.depositor_name, bn.bank_name,
bn.matched_transaction_id, t.user_id, u.telegram_id
FROM bank_notifications bn
JOIN transactions t ON bn.matched_transaction_id = t.id
JOIN users u ON t.user_id = u.id
WHERE bn.matched_transaction_id IS NOT NULL
AND bn.match_notified_at IS NULL
AND u.is_blocked = 0
LIMIT 30`
)
.all<{
notification_id: number;
amount: number;
depositor_name: string;
bank_name: string | null;
matched_transaction_id: number;
user_id: number;
telegram_id: string;
}>();
let notified = 0;
for (const m of matched.results) {
const userMessage =
`🏦 <b>입금 매칭 완료</b>\n\n` +
`입금자: ${m.depositor_name}\n` +
`금액: ${m.amount.toLocaleString()}\n` +
(m.bank_name ? `은행: ${m.bank_name}\n` : '') +
`거래 #${m.matched_transaction_id}에 매칭되었습니다.`;
const sent = await notifyUser(env, m.telegram_id, userMessage);
// Also notify admins
await notifyAdmins(
env,
`🏦 입금 매칭: ${m.depositor_name} ${m.amount.toLocaleString()}원 → 거래 #${m.matched_transaction_id}`
);
if (sent) {
await db
.prepare('UPDATE bank_notifications SET match_notified_at = CURRENT_TIMESTAMP WHERE id = ?')
.bind(m.notification_id)
.run();
notified++;
}
}
if (notified > 0) {
logger.info('Bank match notifications sent', { notified });
}
} catch (error) {
logger.error('Bank match notification job failed', error as Error);
}
}
// Legacy aliases for backward compatibility
export { sendExpiryNotifications as notifyExpiringAssets };
export { cleanupStaleOrders as cleanupStalePending };

245
src/services/embedding.ts Normal file
View File

@@ -0,0 +1,245 @@
/**
* Embedding Service - RAG 시맨틱 검색
*
* Workers AI (@cf/baai/bge-base-en-v1.5) + Cloudflare Vectorize
* - 768차원 임베딩 생성
* - Vectorize 인덱싱 및 검색
*/
import { createLogger } from '../utils/logger';
import type { Env, KnowledgeArticle } from '../types';
const logger = createLogger('embedding');
const EMBEDDING_MODEL = '@cf/baai/bge-base-en-v1.5' as const;
const BATCH_SIZE = 50;
const VECTOR_NAMESPACE = 'knowledge';
interface SemanticSearchResult {
id: number;
score: number;
category?: string;
title?: string;
}
interface IndexResult {
indexed: number;
failed: number;
errors: string[];
}
/**
* Generate embedding for a single text using Workers AI.
*/
export async function generateEmbedding(ai: Ai, text: string): Promise<number[]> {
const result = await ai.run(EMBEDDING_MODEL, {
text: [text],
});
const data = result as { data: number[][] };
if (!data.data?.[0]) {
throw new Error('Failed to generate embedding: empty result');
}
return data.data[0];
}
/**
* Generate embeddings for multiple texts in batches of BATCH_SIZE.
*/
export async function generateEmbeddings(
ai: Ai,
texts: string[]
): Promise<number[][]> {
const allEmbeddings: number[][] = [];
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE);
const result = await ai.run(EMBEDDING_MODEL, {
text: batch,
});
const data = result as { data: number[][] };
if (!data.data) {
throw new Error(`Failed to generate embeddings for batch starting at index ${i}`);
}
allEmbeddings.push(...data.data);
}
return allEmbeddings;
}
/**
* Build the text representation of an article for embedding.
*/
function buildArticleText(title: string, content: string, category?: string, tags?: string): string {
const parts = [];
if (category) parts.push(`[${category}]`);
parts.push(title);
parts.push(content);
if (tags) parts.push(`Tags: ${tags}`);
return parts.join('\n');
}
/**
* Index a single article into Vectorize.
*/
export async function indexArticle(
env: Env,
articleId: number,
title: string,
content: string,
category?: string,
tags?: string
): Promise<void> {
if (!env.VECTORIZE) {
throw new Error('VECTORIZE binding not configured');
}
const text = buildArticleText(title, content, category, tags);
const embedding = await generateEmbedding(env.AI, text);
const vector: VectorizeVector = {
id: `${VECTOR_NAMESPACE}:${articleId}`,
values: embedding,
metadata: {
articleId,
category: category ?? '',
title,
},
};
await env.VECTORIZE.upsert([vector]);
// Mark as indexed in DB
await env.DB
.prepare(
`UPDATE knowledge_articles
SET embedding_indexed = 1, embedding_indexed_at = CURRENT_TIMESTAMP
WHERE id = ?`
)
.bind(articleId)
.run();
logger.info('Article indexed', { articleId, title });
}
/**
* Batch index all unindexed (or all if forceReindex) articles.
*/
export async function batchIndexArticles(
env: Env,
options?: { forceReindex?: boolean }
): Promise<IndexResult> {
if (!env.VECTORIZE) {
throw new Error('VECTORIZE binding not configured');
}
const forceReindex = options?.forceReindex ?? false;
const query = forceReindex
? `SELECT id, category, title, content, tags FROM knowledge_articles WHERE is_active = 1`
: `SELECT id, category, title, content, tags FROM knowledge_articles WHERE is_active = 1 AND embedding_indexed = 0`;
const articles = await env.DB
.prepare(query)
.all<Pick<KnowledgeArticle, 'id' | 'category' | 'title' | 'content' | 'tags'>>();
if (articles.results.length === 0) {
return { indexed: 0, failed: 0, errors: [] };
}
const result: IndexResult = { indexed: 0, failed: 0, errors: [] };
// Process in batches
for (let i = 0; i < articles.results.length; i += BATCH_SIZE) {
const batch = articles.results.slice(i, i + BATCH_SIZE);
try {
const texts = batch.map((a) =>
buildArticleText(a.title, a.content, a.category, a.tags ?? undefined)
);
const embeddings = await generateEmbeddings(env.AI, texts);
const vectors: VectorizeVector[] = batch.map((a, idx) => ({
id: `${VECTOR_NAMESPACE}:${a.id}`,
values: embeddings[idx],
metadata: {
articleId: a.id,
category: a.category,
title: a.title,
},
}));
await env.VECTORIZE.upsert(vectors);
// Mark batch as indexed
const ids = batch.map((a) => a.id);
await env.DB
.prepare(
`UPDATE knowledge_articles
SET embedding_indexed = 1, embedding_indexed_at = CURRENT_TIMESTAMP
WHERE id IN (${ids.map(() => '?').join(',')})`
)
.bind(...ids)
.run();
result.indexed += batch.length;
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
result.failed += batch.length;
result.errors.push(`Batch ${i}-${i + batch.length}: ${errMsg}`);
logger.error('Batch indexing failed', error as Error, {
batchStart: i,
batchSize: batch.length,
});
}
}
logger.info('Batch indexing completed', {
total: articles.results.length,
indexed: result.indexed,
failed: result.failed,
});
return result;
}
/**
* Semantic search using Vectorize.
*/
export async function semanticSearch(
env: Env,
query: string,
options?: { topK?: number; category?: string }
): Promise<SemanticSearchResult[]> {
if (!env.VECTORIZE) {
return [];
}
const topK = options?.topK ?? 10;
const queryEmbedding = await generateEmbedding(env.AI, query);
const filter: VectorizeVectorMetadataFilter = {};
if (options?.category) {
filter.category = options.category;
}
const results = await env.VECTORIZE.query(queryEmbedding, {
topK,
returnMetadata: 'all',
...(options?.category ? { filter } : {}),
});
return results.matches.map((match) => ({
id: typeof match.metadata?.articleId === 'number'
? match.metadata.articleId
: parseInt(String(match.id).split(':')[1] ?? '0'),
score: match.score,
category: match.metadata?.category as string | undefined,
title: match.metadata?.title as string | undefined,
}));
}

View File

@@ -37,6 +37,23 @@ export async function notifyAdmins(env: Env, message: string): Promise<void> {
}
}
/**
* Send a notification to a specific user by telegram_id.
*/
export async function notifyUser(
env: Env,
telegramId: string,
message: string
): Promise<boolean> {
try {
await sendTelegramMessage(env.BOT_TOKEN, telegramId, message);
return true;
} catch (error) {
logger.error('User notification failed', error as Error, { telegramId });
return false;
}
}
async function sendTelegramMessage(
botToken: string,
chatId: string,

View File

@@ -1,4 +1,5 @@
import { createLogger } from '../utils/logger';
import { semanticSearch } from '../services/embedding';
import type { Env, ToolDefinition, KnowledgeArticle } from '../types';
const logger = createLogger('knowledge-tool');
@@ -26,57 +27,177 @@ export const searchKnowledgeTool: ToolDefinition = {
},
};
type ArticleRow = Pick<KnowledgeArticle, 'id' | 'category' | 'title' | 'content' | 'tags'>;
interface ScoredArticle extends ArticleRow {
score: number;
}
const KEYWORD_WEIGHT = 0.6;
const SEMANTIC_WEIGHT = 0.7;
const BOTH_BOOST = 0.15;
/**
* Keyword search using D1 LIKE queries.
*/
async function keywordSearch(
db: D1Database,
query: string,
category?: string
): Promise<ScoredArticle[]> {
const searchTerm = `%${query}%`;
let result;
if (category) {
result = await db
.prepare(
`SELECT id, category, title, content, tags
FROM knowledge_articles
WHERE is_active = 1 AND category = ?
AND (title LIKE ? OR content LIKE ?)
ORDER BY updated_at DESC
LIMIT 10`
)
.bind(category, searchTerm, searchTerm)
.all<ArticleRow>();
} else {
result = await db
.prepare(
`SELECT id, category, title, content, tags
FROM knowledge_articles
WHERE is_active = 1
AND (title LIKE ? OR content LIKE ?)
ORDER BY updated_at DESC
LIMIT 10`
)
.bind(searchTerm, searchTerm)
.all<ArticleRow>();
}
// Score: title match gets higher score than content-only match
return result.results.map((a) => {
const titleMatch = a.title.toLowerCase().includes(query.toLowerCase());
return {
...a,
score: titleMatch ? KEYWORD_WEIGHT : KEYWORD_WEIGHT * 0.8,
};
});
}
/**
* Merge keyword and semantic search results.
* Articles found by both methods get a score boost.
*/
function mergeResults(
keywordResults: ScoredArticle[],
semanticResults: { id: number; score: number }[],
allArticles: Map<number, ArticleRow>
): ScoredArticle[] {
const mergedMap = new Map<number, number>();
// Add keyword scores
for (const r of keywordResults) {
mergedMap.set(r.id, r.score);
allArticles.set(r.id, r);
}
// Add semantic scores
for (const r of semanticResults) {
const existing = mergedMap.get(r.id);
if (existing !== undefined) {
// Found in both: boost
mergedMap.set(r.id, existing + r.score * SEMANTIC_WEIGHT + BOTH_BOOST);
} else {
mergedMap.set(r.id, r.score * SEMANTIC_WEIGHT);
}
}
// Sort by score descending and take top 5
const sorted = [...mergedMap.entries()]
.sort((a, b) => b[1] - a[1])
.slice(0, 5);
return sorted
.map(([id, score]) => {
const article = allArticles.get(id);
if (!article) return null;
return { ...article, score };
})
.filter((a): a is ScoredArticle => a !== null);
}
export async function executeSearchKnowledge(
args: { query: string; category?: string },
_env?: Env,
env?: Env,
_userId?: string,
db?: D1Database
): Promise<string> {
try {
if (!db) return '데이터베이스 연결 정보가 없습니다.';
const searchTerm = `%${args.query}%`;
let result;
// Keyword search (always available)
const keywordResults = await keywordSearch(db, args.query, args.category);
if (args.category) {
result = await db
.prepare(
`SELECT id, category, title, content, tags
FROM knowledge_articles
WHERE is_active = 1 AND category = ?
AND (title LIKE ? OR content LIKE ?)
ORDER BY updated_at DESC
LIMIT 5`
)
.bind(args.category, searchTerm, searchTerm)
.all<Pick<KnowledgeArticle, 'id' | 'category' | 'title' | 'content' | 'tags'>>();
} else {
result = await db
.prepare(
`SELECT id, category, title, content, tags
FROM knowledge_articles
WHERE is_active = 1
AND (title LIKE ? OR content LIKE ?)
ORDER BY updated_at DESC
LIMIT 5`
)
.bind(searchTerm, searchTerm)
.all<Pick<KnowledgeArticle, 'id' | 'category' | 'title' | 'content' | 'tags'>>();
// Semantic search (when VECTORIZE is available)
let semanticResults: { id: number; score: number }[] = [];
if (env?.VECTORIZE) {
try {
semanticResults = await semanticSearch(env, args.query, {
topK: 10,
category: args.category,
});
} catch (error) {
logger.error('Semantic search failed, using keyword only', error as Error);
}
}
if (result.results.length === 0) {
// If we have semantic results, fetch their full article data
const allArticles = new Map<number, ArticleRow>();
for (const a of keywordResults) {
allArticles.set(a.id, a);
}
if (semanticResults.length > 0) {
// Fetch articles for semantic results not already in keyword results
const missingIds = semanticResults
.map((r) => r.id)
.filter((id) => !allArticles.has(id));
if (missingIds.length > 0) {
const placeholders = missingIds.map(() => '?').join(',');
const fetched = await db
.prepare(
`SELECT id, category, title, content, tags
FROM knowledge_articles
WHERE id IN (${placeholders}) AND is_active = 1`
)
.bind(...missingIds)
.all<ArticleRow>();
for (const a of fetched.results) {
allArticles.set(a.id, a);
}
}
}
// Merge results
const finalResults = semanticResults.length > 0
? mergeResults(keywordResults, semanticResults, allArticles)
: keywordResults.slice(0, 5).map((a) => ({ ...a }));
if (finalResults.length === 0) {
return `"${args.query}"에 대한 검색 결과가 없습니다.`;
}
const articles = result.results.map((a) => {
const articles = finalResults.map((a) => {
const tags = a.tags ? ` [${a.tags}]` : '';
// Truncate content for display
const preview =
a.content.length > 200 ? a.content.substring(0, 200) + '...' : a.content;
return `[${a.category}] ${a.title}${tags}\n${preview}`;
});
return `검색 결과 (${result.results.length}건):\n\n${articles.join('\n\n---\n\n')}`;
const searchMode = semanticResults.length > 0 ? '하이브리드' : '키워드';
return `검색 결과 (${finalResults.length}건, ${searchMode} 검색):\n\n${articles.join('\n\n---\n\n')}`;
} catch (error) {
logger.error('Knowledge search error', error as Error, { query: args.query });
return '지식 베이스 검색 중 오류가 발생했습니다.';

View File

@@ -27,6 +27,8 @@ export interface Env {
CACHE_KV: KVNamespace;
// R2
R2_BUCKET: R2Bucket;
// Vectorize
VECTORIZE?: VectorizeIndex;
}
// ============================================
@@ -296,6 +298,8 @@ export interface KnowledgeArticle {
tags: string | null;
language: string;
is_active: number;
embedding_indexed: number;
embedding_indexed_at: string | null;
created_at: string;
updated_at: string;
}

View File

@@ -45,6 +45,12 @@ bucket_name = "ai-support-assets"
binding = "CLOUD_ORCHESTRATOR"
service = "cloud-orchestrator"
# Vectorize (RAG semantic search)
# Create index: npx wrangler vectorize create knowledge-embeddings --dimensions=768 --metric=cosine
[[vectorize]]
binding = "VECTORIZE"
index_name = "knowledge-embeddings"
# Cron Triggers:
# - 매일 자정(KST): 만료 알림 + 데이터 아카이빙 + 정합성 검증
# - 매 5분: pending 상태 자동 정리