Add RAG semantic search and proactive event notifications
Implement hybrid knowledge search using Cloudflare Vectorize + Workers AI embeddings (bge-base-en-v1.5, 768d) merged with existing D1 LIKE queries, with graceful degradation when Vectorize is unavailable. Add admin API endpoints for batch/single article indexing. Add 4 proactive notification cron jobs: server status changes, deposit confirmation/rejection alerts, pending payment reminders (1h+), and bank deposit matching notifications — all with DB-column-based deduplication. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
24
schema.sql
24
schema.sql
@@ -52,6 +52,8 @@ CREATE TABLE IF NOT EXISTS transactions (
|
||||
reference_id INTEGER,
|
||||
confirmed_by INTEGER,
|
||||
confirmed_at DATETIME,
|
||||
status_notified_at DATETIME,
|
||||
reminder_sent_at DATETIME,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (user_id) REFERENCES users(id),
|
||||
FOREIGN KEY (confirmed_by) REFERENCES users(id)
|
||||
@@ -68,6 +70,7 @@ CREATE TABLE IF NOT EXISTS bank_notifications (
|
||||
transaction_time DATETIME,
|
||||
raw_message TEXT,
|
||||
matched_transaction_id INTEGER,
|
||||
match_notified_at DATETIME,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (matched_transaction_id) REFERENCES transactions(id)
|
||||
);
|
||||
@@ -107,6 +110,7 @@ CREATE TABLE IF NOT EXISTS servers (
|
||||
provisioned_at DATETIME,
|
||||
terminated_at DATETIME,
|
||||
expires_at DATETIME,
|
||||
last_notified_status TEXT,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (user_id) REFERENCES users(id)
|
||||
@@ -194,6 +198,8 @@ CREATE TABLE IF NOT EXISTS knowledge_articles (
|
||||
tags TEXT,
|
||||
language TEXT DEFAULT 'ko',
|
||||
is_active INTEGER DEFAULT 1,
|
||||
embedding_indexed INTEGER DEFAULT 0,
|
||||
embedding_indexed_at DATETIME,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
@@ -335,3 +341,21 @@ CREATE INDEX IF NOT EXISTS idx_onboarding_expires ON onboarding_sessions(expires
|
||||
CREATE INDEX IF NOT EXISTS idx_troubleshoot_expires ON troubleshoot_sessions(expires_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_asset_expires ON asset_sessions(expires_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_billing_expires ON billing_sessions(expires_at);
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Notification Tracking Columns (Migration)
|
||||
----------------------------------------------------------------------
|
||||
|
||||
-- servers: 상태 변경 알림 추적
|
||||
-- ALTER TABLE servers ADD COLUMN last_notified_status TEXT;
|
||||
|
||||
-- transactions: 입금 승인/거부 알림 + 리마인더 추적
|
||||
-- ALTER TABLE transactions ADD COLUMN status_notified_at DATETIME;
|
||||
-- ALTER TABLE transactions ADD COLUMN reminder_sent_at DATETIME;
|
||||
|
||||
-- bank_notifications: 매칭 알림 추적
|
||||
-- ALTER TABLE bank_notifications ADD COLUMN match_notified_at DATETIME;
|
||||
|
||||
-- Knowledge articles: 임베딩 인덱싱 추적
|
||||
-- ALTER TABLE knowledge_articles ADD COLUMN embedding_indexed INTEGER DEFAULT 0;
|
||||
-- ALTER TABLE knowledge_articles ADD COLUMN embedding_indexed_at DATETIME;
|
||||
|
||||
12
src/index.ts
12
src/index.ts
@@ -124,6 +124,10 @@ export default {
|
||||
archiveOldConversations,
|
||||
cleanupStaleOrders,
|
||||
monitoringCheck,
|
||||
notifyServerStatusChanges,
|
||||
notifyTransactionStatusChanges,
|
||||
sendPaymentReminders,
|
||||
notifyBankMatches,
|
||||
} = await import('./services/cron-jobs');
|
||||
|
||||
try {
|
||||
@@ -135,14 +139,18 @@ export default {
|
||||
await cleanupExpiredSessions(env);
|
||||
break;
|
||||
|
||||
// Every 5 minutes: stale session/order cleanup
|
||||
// Every 5 minutes: stale cleanup + proactive notifications
|
||||
case '*/5 * * * *':
|
||||
await cleanupStaleOrders(env);
|
||||
await notifyServerStatusChanges(env);
|
||||
await notifyTransactionStatusChanges(env);
|
||||
await notifyBankMatches(env);
|
||||
break;
|
||||
|
||||
// Every hour: monitoring checks
|
||||
// Every hour: monitoring checks + payment reminders
|
||||
case '0 * * * *':
|
||||
await monitoringCheck(env);
|
||||
await sendPaymentReminders(env);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
||||
@@ -2,6 +2,7 @@ import { Hono } from 'hono';
|
||||
import type { Env, User, Transaction } from '../types';
|
||||
import { timingSafeEqual } from '../security';
|
||||
import { getPendingActions } from '../services/pending-actions';
|
||||
import { indexArticle, batchIndexArticles } from '../services/embedding';
|
||||
import { sendMessage } from '../telegram';
|
||||
import { createLogger } from '../utils/logger';
|
||||
|
||||
@@ -171,4 +172,67 @@ api.post('/broadcast', async (c) => {
|
||||
}
|
||||
});
|
||||
|
||||
// POST /api/knowledge/index - Batch index all knowledge articles
|
||||
api.post('/knowledge/index', async (c) => {
|
||||
try {
|
||||
if (!c.env.VECTORIZE) {
|
||||
return c.json({ error: 'VECTORIZE binding not configured' }, 400);
|
||||
}
|
||||
|
||||
const body = await c.req.json<{ force_reindex?: boolean }>().catch(() => ({ force_reindex: false }));
|
||||
const result = await batchIndexArticles(c.env, {
|
||||
forceReindex: body.force_reindex ?? false,
|
||||
});
|
||||
|
||||
logger.info('Knowledge batch indexing completed', {
|
||||
indexed: result.indexed,
|
||||
failed: result.failed,
|
||||
});
|
||||
return c.json(result);
|
||||
} catch (error) {
|
||||
logger.error('Knowledge batch indexing failed', error as Error);
|
||||
return c.json({ error: 'Internal error' }, 500);
|
||||
}
|
||||
});
|
||||
|
||||
// POST /api/knowledge/:id/index - Index a single knowledge article
|
||||
api.post('/knowledge/:id/index', async (c) => {
|
||||
try {
|
||||
if (!c.env.VECTORIZE) {
|
||||
return c.json({ error: 'VECTORIZE binding not configured' }, 400);
|
||||
}
|
||||
|
||||
const articleId = parseInt(c.req.param('id'));
|
||||
if (isNaN(articleId)) {
|
||||
return c.json({ error: 'Invalid article ID' }, 400);
|
||||
}
|
||||
|
||||
const article = await c.env.DB
|
||||
.prepare(
|
||||
`SELECT id, category, title, content, tags
|
||||
FROM knowledge_articles WHERE id = ? AND is_active = 1`
|
||||
)
|
||||
.bind(articleId)
|
||||
.first<{ id: number; category: string; title: string; content: string; tags: string | null }>();
|
||||
|
||||
if (!article) {
|
||||
return c.json({ error: 'Article not found' }, 404);
|
||||
}
|
||||
|
||||
await indexArticle(
|
||||
c.env,
|
||||
article.id,
|
||||
article.title,
|
||||
article.content,
|
||||
article.category,
|
||||
article.tags ?? undefined
|
||||
);
|
||||
|
||||
return c.json({ success: true, articleId: article.id });
|
||||
} catch (error) {
|
||||
logger.error('Knowledge article indexing failed', error as Error);
|
||||
return c.json({ error: 'Internal error' }, 500);
|
||||
}
|
||||
});
|
||||
|
||||
export { api as apiRouter };
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
|
||||
import { createLogger } from '../utils/logger';
|
||||
import { sendMessage } from '../telegram';
|
||||
import { notifyAdmins } from './notification';
|
||||
import { notifyAdmins, notifyUser } from './notification';
|
||||
import type { Env } from '../types';
|
||||
|
||||
const logger = createLogger('cron-jobs');
|
||||
@@ -294,6 +294,251 @@ export async function monitoringCheck(env: Env): Promise<void> {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify users when their server status changes.
|
||||
* Checks servers where status != last_notified_status.
|
||||
*/
|
||||
export async function notifyServerStatusChanges(env: Env): Promise<void> {
|
||||
try {
|
||||
const db = env.DB;
|
||||
|
||||
const changed = await db
|
||||
.prepare(
|
||||
`SELECT s.id, s.label, s.ip_address, s.status, s.last_notified_status, u.telegram_id
|
||||
FROM servers s
|
||||
JOIN users u ON s.user_id = u.id
|
||||
WHERE u.is_blocked = 0
|
||||
AND s.status != 'pending'
|
||||
AND (s.last_notified_status IS NULL OR s.status != s.last_notified_status)
|
||||
LIMIT 50`
|
||||
)
|
||||
.all<{
|
||||
id: number;
|
||||
label: string | null;
|
||||
ip_address: string | null;
|
||||
status: string;
|
||||
last_notified_status: string | null;
|
||||
telegram_id: string;
|
||||
}>();
|
||||
|
||||
let notified = 0;
|
||||
for (const s of changed.results) {
|
||||
const name = s.label ?? s.ip_address ?? `서버 #${s.id}`;
|
||||
const statusEmoji: Record<string, string> = {
|
||||
running: '🟢',
|
||||
stopped: '🔴',
|
||||
provisioning: '🟡',
|
||||
terminated: '⚫',
|
||||
failed: '🔴',
|
||||
};
|
||||
const emoji = statusEmoji[s.status] ?? '🔵';
|
||||
|
||||
const sent = await notifyUser(
|
||||
env,
|
||||
s.telegram_id,
|
||||
`${emoji} <b>서버 상태 변경</b>\n\n` +
|
||||
`서버: ${name}\n` +
|
||||
`상태: ${s.last_notified_status ?? '(초기)'} → <b>${s.status}</b>`
|
||||
);
|
||||
|
||||
if (sent) {
|
||||
await db
|
||||
.prepare('UPDATE servers SET last_notified_status = ? WHERE id = ?')
|
||||
.bind(s.status, s.id)
|
||||
.run();
|
||||
notified++;
|
||||
}
|
||||
}
|
||||
|
||||
if (notified > 0) {
|
||||
logger.info('Server status notifications sent', { notified });
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Server status notification job failed', error as Error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify users when their deposit transactions are confirmed or rejected.
|
||||
*/
|
||||
export async function notifyTransactionStatusChanges(env: Env): Promise<void> {
|
||||
try {
|
||||
const db = env.DB;
|
||||
|
||||
const changed = await db
|
||||
.prepare(
|
||||
`SELECT t.id, t.amount, t.status, t.description, u.telegram_id
|
||||
FROM transactions t
|
||||
JOIN users u ON t.user_id = u.id
|
||||
WHERE u.is_blocked = 0
|
||||
AND t.type = 'deposit'
|
||||
AND t.status IN ('confirmed', 'rejected')
|
||||
AND t.status_notified_at IS NULL
|
||||
LIMIT 50`
|
||||
)
|
||||
.all<{
|
||||
id: number;
|
||||
amount: number;
|
||||
status: string;
|
||||
description: string | null;
|
||||
telegram_id: string;
|
||||
}>();
|
||||
|
||||
let notified = 0;
|
||||
for (const t of changed.results) {
|
||||
const isConfirmed = t.status === 'confirmed';
|
||||
const emoji = isConfirmed ? '✅' : '❌';
|
||||
const statusText = isConfirmed ? '승인' : '거부';
|
||||
|
||||
const message = [
|
||||
`${emoji} <b>입금 ${statusText}</b>`,
|
||||
'',
|
||||
`거래 #${t.id}`,
|
||||
`금액: ${t.amount.toLocaleString()}원`,
|
||||
`상태: <b>${statusText}</b>`,
|
||||
];
|
||||
if (t.description) {
|
||||
message.push(`사유: ${t.description}`);
|
||||
}
|
||||
|
||||
const sent = await notifyUser(env, t.telegram_id, message.join('\n'));
|
||||
|
||||
if (sent) {
|
||||
await db
|
||||
.prepare('UPDATE transactions SET status_notified_at = CURRENT_TIMESTAMP WHERE id = ?')
|
||||
.bind(t.id)
|
||||
.run();
|
||||
notified++;
|
||||
}
|
||||
}
|
||||
|
||||
if (notified > 0) {
|
||||
logger.info('Transaction status notifications sent', { notified });
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Transaction status notification job failed', error as Error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send reminders for deposits pending more than 1 hour.
|
||||
*/
|
||||
export async function sendPaymentReminders(env: Env): Promise<void> {
|
||||
try {
|
||||
const db = env.DB;
|
||||
|
||||
const pending = await db
|
||||
.prepare(
|
||||
`SELECT t.id, t.amount, t.depositor_name, t.created_at, u.telegram_id
|
||||
FROM transactions t
|
||||
JOIN users u ON t.user_id = u.id
|
||||
WHERE u.is_blocked = 0
|
||||
AND t.type = 'deposit'
|
||||
AND t.status = 'pending'
|
||||
AND t.created_at < datetime('now', '-1 hours')
|
||||
AND t.reminder_sent_at IS NULL
|
||||
LIMIT 30`
|
||||
)
|
||||
.all<{
|
||||
id: number;
|
||||
amount: number;
|
||||
depositor_name: string | null;
|
||||
created_at: string;
|
||||
telegram_id: string;
|
||||
}>();
|
||||
|
||||
let notified = 0;
|
||||
for (const t of pending.results) {
|
||||
const sent = await notifyUser(
|
||||
env,
|
||||
t.telegram_id,
|
||||
`⏰ <b>입금 대기 알림</b>\n\n` +
|
||||
`거래 #${t.id}\n` +
|
||||
`금액: ${t.amount.toLocaleString()}원\n` +
|
||||
`입금자: ${t.depositor_name ?? '미입력'}\n\n` +
|
||||
`1시간 이상 대기 중입니다. 입금을 완료해주시거나, 취소가 필요하시면 알려주세요.`
|
||||
);
|
||||
|
||||
if (sent) {
|
||||
await db
|
||||
.prepare('UPDATE transactions SET reminder_sent_at = CURRENT_TIMESTAMP WHERE id = ?')
|
||||
.bind(t.id)
|
||||
.run();
|
||||
notified++;
|
||||
}
|
||||
}
|
||||
|
||||
if (notified > 0) {
|
||||
logger.info('Payment reminders sent', { notified });
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Payment reminder job failed', error as Error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify users and admins when bank deposits are matched to transactions.
|
||||
*/
|
||||
export async function notifyBankMatches(env: Env): Promise<void> {
|
||||
try {
|
||||
const db = env.DB;
|
||||
|
||||
const matched = await db
|
||||
.prepare(
|
||||
`SELECT bn.id as notification_id, bn.amount, bn.depositor_name, bn.bank_name,
|
||||
bn.matched_transaction_id, t.user_id, u.telegram_id
|
||||
FROM bank_notifications bn
|
||||
JOIN transactions t ON bn.matched_transaction_id = t.id
|
||||
JOIN users u ON t.user_id = u.id
|
||||
WHERE bn.matched_transaction_id IS NOT NULL
|
||||
AND bn.match_notified_at IS NULL
|
||||
AND u.is_blocked = 0
|
||||
LIMIT 30`
|
||||
)
|
||||
.all<{
|
||||
notification_id: number;
|
||||
amount: number;
|
||||
depositor_name: string;
|
||||
bank_name: string | null;
|
||||
matched_transaction_id: number;
|
||||
user_id: number;
|
||||
telegram_id: string;
|
||||
}>();
|
||||
|
||||
let notified = 0;
|
||||
for (const m of matched.results) {
|
||||
const userMessage =
|
||||
`🏦 <b>입금 매칭 완료</b>\n\n` +
|
||||
`입금자: ${m.depositor_name}\n` +
|
||||
`금액: ${m.amount.toLocaleString()}원\n` +
|
||||
(m.bank_name ? `은행: ${m.bank_name}\n` : '') +
|
||||
`거래 #${m.matched_transaction_id}에 매칭되었습니다.`;
|
||||
|
||||
const sent = await notifyUser(env, m.telegram_id, userMessage);
|
||||
|
||||
// Also notify admins
|
||||
await notifyAdmins(
|
||||
env,
|
||||
`🏦 입금 매칭: ${m.depositor_name} ${m.amount.toLocaleString()}원 → 거래 #${m.matched_transaction_id}`
|
||||
);
|
||||
|
||||
if (sent) {
|
||||
await db
|
||||
.prepare('UPDATE bank_notifications SET match_notified_at = CURRENT_TIMESTAMP WHERE id = ?')
|
||||
.bind(m.notification_id)
|
||||
.run();
|
||||
notified++;
|
||||
}
|
||||
}
|
||||
|
||||
if (notified > 0) {
|
||||
logger.info('Bank match notifications sent', { notified });
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Bank match notification job failed', error as Error);
|
||||
}
|
||||
}
|
||||
|
||||
// Legacy aliases for backward compatibility
|
||||
export { sendExpiryNotifications as notifyExpiringAssets };
|
||||
export { cleanupStaleOrders as cleanupStalePending };
|
||||
|
||||
245
src/services/embedding.ts
Normal file
245
src/services/embedding.ts
Normal file
@@ -0,0 +1,245 @@
|
||||
/**
|
||||
* Embedding Service - RAG 시맨틱 검색
|
||||
*
|
||||
* Workers AI (@cf/baai/bge-base-en-v1.5) + Cloudflare Vectorize
|
||||
* - 768차원 임베딩 생성
|
||||
* - Vectorize 인덱싱 및 검색
|
||||
*/
|
||||
|
||||
import { createLogger } from '../utils/logger';
|
||||
import type { Env, KnowledgeArticle } from '../types';
|
||||
|
||||
const logger = createLogger('embedding');
|
||||
|
||||
const EMBEDDING_MODEL = '@cf/baai/bge-base-en-v1.5' as const;
|
||||
const BATCH_SIZE = 50;
|
||||
const VECTOR_NAMESPACE = 'knowledge';
|
||||
|
||||
interface SemanticSearchResult {
|
||||
id: number;
|
||||
score: number;
|
||||
category?: string;
|
||||
title?: string;
|
||||
}
|
||||
|
||||
interface IndexResult {
|
||||
indexed: number;
|
||||
failed: number;
|
||||
errors: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embedding for a single text using Workers AI.
|
||||
*/
|
||||
export async function generateEmbedding(ai: Ai, text: string): Promise<number[]> {
|
||||
const result = await ai.run(EMBEDDING_MODEL, {
|
||||
text: [text],
|
||||
});
|
||||
|
||||
const data = result as { data: number[][] };
|
||||
if (!data.data?.[0]) {
|
||||
throw new Error('Failed to generate embedding: empty result');
|
||||
}
|
||||
|
||||
return data.data[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embeddings for multiple texts in batches of BATCH_SIZE.
|
||||
*/
|
||||
export async function generateEmbeddings(
|
||||
ai: Ai,
|
||||
texts: string[]
|
||||
): Promise<number[][]> {
|
||||
const allEmbeddings: number[][] = [];
|
||||
|
||||
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
||||
const batch = texts.slice(i, i + BATCH_SIZE);
|
||||
const result = await ai.run(EMBEDDING_MODEL, {
|
||||
text: batch,
|
||||
});
|
||||
|
||||
const data = result as { data: number[][] };
|
||||
if (!data.data) {
|
||||
throw new Error(`Failed to generate embeddings for batch starting at index ${i}`);
|
||||
}
|
||||
|
||||
allEmbeddings.push(...data.data);
|
||||
}
|
||||
|
||||
return allEmbeddings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the text representation of an article for embedding.
|
||||
*/
|
||||
function buildArticleText(title: string, content: string, category?: string, tags?: string): string {
|
||||
const parts = [];
|
||||
if (category) parts.push(`[${category}]`);
|
||||
parts.push(title);
|
||||
parts.push(content);
|
||||
if (tags) parts.push(`Tags: ${tags}`);
|
||||
return parts.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Index a single article into Vectorize.
|
||||
*/
|
||||
export async function indexArticle(
|
||||
env: Env,
|
||||
articleId: number,
|
||||
title: string,
|
||||
content: string,
|
||||
category?: string,
|
||||
tags?: string
|
||||
): Promise<void> {
|
||||
if (!env.VECTORIZE) {
|
||||
throw new Error('VECTORIZE binding not configured');
|
||||
}
|
||||
|
||||
const text = buildArticleText(title, content, category, tags);
|
||||
const embedding = await generateEmbedding(env.AI, text);
|
||||
|
||||
const vector: VectorizeVector = {
|
||||
id: `${VECTOR_NAMESPACE}:${articleId}`,
|
||||
values: embedding,
|
||||
metadata: {
|
||||
articleId,
|
||||
category: category ?? '',
|
||||
title,
|
||||
},
|
||||
};
|
||||
|
||||
await env.VECTORIZE.upsert([vector]);
|
||||
|
||||
// Mark as indexed in DB
|
||||
await env.DB
|
||||
.prepare(
|
||||
`UPDATE knowledge_articles
|
||||
SET embedding_indexed = 1, embedding_indexed_at = CURRENT_TIMESTAMP
|
||||
WHERE id = ?`
|
||||
)
|
||||
.bind(articleId)
|
||||
.run();
|
||||
|
||||
logger.info('Article indexed', { articleId, title });
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch index all unindexed (or all if forceReindex) articles.
|
||||
*/
|
||||
export async function batchIndexArticles(
|
||||
env: Env,
|
||||
options?: { forceReindex?: boolean }
|
||||
): Promise<IndexResult> {
|
||||
if (!env.VECTORIZE) {
|
||||
throw new Error('VECTORIZE binding not configured');
|
||||
}
|
||||
|
||||
const forceReindex = options?.forceReindex ?? false;
|
||||
|
||||
const query = forceReindex
|
||||
? `SELECT id, category, title, content, tags FROM knowledge_articles WHERE is_active = 1`
|
||||
: `SELECT id, category, title, content, tags FROM knowledge_articles WHERE is_active = 1 AND embedding_indexed = 0`;
|
||||
|
||||
const articles = await env.DB
|
||||
.prepare(query)
|
||||
.all<Pick<KnowledgeArticle, 'id' | 'category' | 'title' | 'content' | 'tags'>>();
|
||||
|
||||
if (articles.results.length === 0) {
|
||||
return { indexed: 0, failed: 0, errors: [] };
|
||||
}
|
||||
|
||||
const result: IndexResult = { indexed: 0, failed: 0, errors: [] };
|
||||
|
||||
// Process in batches
|
||||
for (let i = 0; i < articles.results.length; i += BATCH_SIZE) {
|
||||
const batch = articles.results.slice(i, i + BATCH_SIZE);
|
||||
|
||||
try {
|
||||
const texts = batch.map((a) =>
|
||||
buildArticleText(a.title, a.content, a.category, a.tags ?? undefined)
|
||||
);
|
||||
|
||||
const embeddings = await generateEmbeddings(env.AI, texts);
|
||||
|
||||
const vectors: VectorizeVector[] = batch.map((a, idx) => ({
|
||||
id: `${VECTOR_NAMESPACE}:${a.id}`,
|
||||
values: embeddings[idx],
|
||||
metadata: {
|
||||
articleId: a.id,
|
||||
category: a.category,
|
||||
title: a.title,
|
||||
},
|
||||
}));
|
||||
|
||||
await env.VECTORIZE.upsert(vectors);
|
||||
|
||||
// Mark batch as indexed
|
||||
const ids = batch.map((a) => a.id);
|
||||
await env.DB
|
||||
.prepare(
|
||||
`UPDATE knowledge_articles
|
||||
SET embedding_indexed = 1, embedding_indexed_at = CURRENT_TIMESTAMP
|
||||
WHERE id IN (${ids.map(() => '?').join(',')})`
|
||||
)
|
||||
.bind(...ids)
|
||||
.run();
|
||||
|
||||
result.indexed += batch.length;
|
||||
} catch (error) {
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
result.failed += batch.length;
|
||||
result.errors.push(`Batch ${i}-${i + batch.length}: ${errMsg}`);
|
||||
logger.error('Batch indexing failed', error as Error, {
|
||||
batchStart: i,
|
||||
batchSize: batch.length,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
logger.info('Batch indexing completed', {
|
||||
total: articles.results.length,
|
||||
indexed: result.indexed,
|
||||
failed: result.failed,
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Semantic search using Vectorize.
|
||||
*/
|
||||
export async function semanticSearch(
|
||||
env: Env,
|
||||
query: string,
|
||||
options?: { topK?: number; category?: string }
|
||||
): Promise<SemanticSearchResult[]> {
|
||||
if (!env.VECTORIZE) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const topK = options?.topK ?? 10;
|
||||
|
||||
const queryEmbedding = await generateEmbedding(env.AI, query);
|
||||
|
||||
const filter: VectorizeVectorMetadataFilter = {};
|
||||
if (options?.category) {
|
||||
filter.category = options.category;
|
||||
}
|
||||
|
||||
const results = await env.VECTORIZE.query(queryEmbedding, {
|
||||
topK,
|
||||
returnMetadata: 'all',
|
||||
...(options?.category ? { filter } : {}),
|
||||
});
|
||||
|
||||
return results.matches.map((match) => ({
|
||||
id: typeof match.metadata?.articleId === 'number'
|
||||
? match.metadata.articleId
|
||||
: parseInt(String(match.id).split(':')[1] ?? '0'),
|
||||
score: match.score,
|
||||
category: match.metadata?.category as string | undefined,
|
||||
title: match.metadata?.title as string | undefined,
|
||||
}));
|
||||
}
|
||||
@@ -37,6 +37,23 @@ export async function notifyAdmins(env: Env, message: string): Promise<void> {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a notification to a specific user by telegram_id.
|
||||
*/
|
||||
export async function notifyUser(
|
||||
env: Env,
|
||||
telegramId: string,
|
||||
message: string
|
||||
): Promise<boolean> {
|
||||
try {
|
||||
await sendTelegramMessage(env.BOT_TOKEN, telegramId, message);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error('User notification failed', error as Error, { telegramId });
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function sendTelegramMessage(
|
||||
botToken: string,
|
||||
chatId: string,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { createLogger } from '../utils/logger';
|
||||
import { semanticSearch } from '../services/embedding';
|
||||
import type { Env, ToolDefinition, KnowledgeArticle } from '../types';
|
||||
|
||||
const logger = createLogger('knowledge-tool');
|
||||
@@ -26,57 +27,177 @@ export const searchKnowledgeTool: ToolDefinition = {
|
||||
},
|
||||
};
|
||||
|
||||
type ArticleRow = Pick<KnowledgeArticle, 'id' | 'category' | 'title' | 'content' | 'tags'>;
|
||||
|
||||
interface ScoredArticle extends ArticleRow {
|
||||
score: number;
|
||||
}
|
||||
|
||||
const KEYWORD_WEIGHT = 0.6;
|
||||
const SEMANTIC_WEIGHT = 0.7;
|
||||
const BOTH_BOOST = 0.15;
|
||||
|
||||
/**
|
||||
* Keyword search using D1 LIKE queries.
|
||||
*/
|
||||
async function keywordSearch(
|
||||
db: D1Database,
|
||||
query: string,
|
||||
category?: string
|
||||
): Promise<ScoredArticle[]> {
|
||||
const searchTerm = `%${query}%`;
|
||||
let result;
|
||||
|
||||
if (category) {
|
||||
result = await db
|
||||
.prepare(
|
||||
`SELECT id, category, title, content, tags
|
||||
FROM knowledge_articles
|
||||
WHERE is_active = 1 AND category = ?
|
||||
AND (title LIKE ? OR content LIKE ?)
|
||||
ORDER BY updated_at DESC
|
||||
LIMIT 10`
|
||||
)
|
||||
.bind(category, searchTerm, searchTerm)
|
||||
.all<ArticleRow>();
|
||||
} else {
|
||||
result = await db
|
||||
.prepare(
|
||||
`SELECT id, category, title, content, tags
|
||||
FROM knowledge_articles
|
||||
WHERE is_active = 1
|
||||
AND (title LIKE ? OR content LIKE ?)
|
||||
ORDER BY updated_at DESC
|
||||
LIMIT 10`
|
||||
)
|
||||
.bind(searchTerm, searchTerm)
|
||||
.all<ArticleRow>();
|
||||
}
|
||||
|
||||
// Score: title match gets higher score than content-only match
|
||||
return result.results.map((a) => {
|
||||
const titleMatch = a.title.toLowerCase().includes(query.toLowerCase());
|
||||
return {
|
||||
...a,
|
||||
score: titleMatch ? KEYWORD_WEIGHT : KEYWORD_WEIGHT * 0.8,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge keyword and semantic search results.
|
||||
* Articles found by both methods get a score boost.
|
||||
*/
|
||||
function mergeResults(
|
||||
keywordResults: ScoredArticle[],
|
||||
semanticResults: { id: number; score: number }[],
|
||||
allArticles: Map<number, ArticleRow>
|
||||
): ScoredArticle[] {
|
||||
const mergedMap = new Map<number, number>();
|
||||
|
||||
// Add keyword scores
|
||||
for (const r of keywordResults) {
|
||||
mergedMap.set(r.id, r.score);
|
||||
allArticles.set(r.id, r);
|
||||
}
|
||||
|
||||
// Add semantic scores
|
||||
for (const r of semanticResults) {
|
||||
const existing = mergedMap.get(r.id);
|
||||
if (existing !== undefined) {
|
||||
// Found in both: boost
|
||||
mergedMap.set(r.id, existing + r.score * SEMANTIC_WEIGHT + BOTH_BOOST);
|
||||
} else {
|
||||
mergedMap.set(r.id, r.score * SEMANTIC_WEIGHT);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by score descending and take top 5
|
||||
const sorted = [...mergedMap.entries()]
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, 5);
|
||||
|
||||
return sorted
|
||||
.map(([id, score]) => {
|
||||
const article = allArticles.get(id);
|
||||
if (!article) return null;
|
||||
return { ...article, score };
|
||||
})
|
||||
.filter((a): a is ScoredArticle => a !== null);
|
||||
}
|
||||
|
||||
export async function executeSearchKnowledge(
|
||||
args: { query: string; category?: string },
|
||||
_env?: Env,
|
||||
env?: Env,
|
||||
_userId?: string,
|
||||
db?: D1Database
|
||||
): Promise<string> {
|
||||
try {
|
||||
if (!db) return '데이터베이스 연결 정보가 없습니다.';
|
||||
|
||||
const searchTerm = `%${args.query}%`;
|
||||
let result;
|
||||
// Keyword search (always available)
|
||||
const keywordResults = await keywordSearch(db, args.query, args.category);
|
||||
|
||||
if (args.category) {
|
||||
result = await db
|
||||
.prepare(
|
||||
`SELECT id, category, title, content, tags
|
||||
FROM knowledge_articles
|
||||
WHERE is_active = 1 AND category = ?
|
||||
AND (title LIKE ? OR content LIKE ?)
|
||||
ORDER BY updated_at DESC
|
||||
LIMIT 5`
|
||||
)
|
||||
.bind(args.category, searchTerm, searchTerm)
|
||||
.all<Pick<KnowledgeArticle, 'id' | 'category' | 'title' | 'content' | 'tags'>>();
|
||||
} else {
|
||||
result = await db
|
||||
.prepare(
|
||||
`SELECT id, category, title, content, tags
|
||||
FROM knowledge_articles
|
||||
WHERE is_active = 1
|
||||
AND (title LIKE ? OR content LIKE ?)
|
||||
ORDER BY updated_at DESC
|
||||
LIMIT 5`
|
||||
)
|
||||
.bind(searchTerm, searchTerm)
|
||||
.all<Pick<KnowledgeArticle, 'id' | 'category' | 'title' | 'content' | 'tags'>>();
|
||||
// Semantic search (when VECTORIZE is available)
|
||||
let semanticResults: { id: number; score: number }[] = [];
|
||||
if (env?.VECTORIZE) {
|
||||
try {
|
||||
semanticResults = await semanticSearch(env, args.query, {
|
||||
topK: 10,
|
||||
category: args.category,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Semantic search failed, using keyword only', error as Error);
|
||||
}
|
||||
}
|
||||
|
||||
if (result.results.length === 0) {
|
||||
// If we have semantic results, fetch their full article data
|
||||
const allArticles = new Map<number, ArticleRow>();
|
||||
for (const a of keywordResults) {
|
||||
allArticles.set(a.id, a);
|
||||
}
|
||||
|
||||
if (semanticResults.length > 0) {
|
||||
// Fetch articles for semantic results not already in keyword results
|
||||
const missingIds = semanticResults
|
||||
.map((r) => r.id)
|
||||
.filter((id) => !allArticles.has(id));
|
||||
|
||||
if (missingIds.length > 0) {
|
||||
const placeholders = missingIds.map(() => '?').join(',');
|
||||
const fetched = await db
|
||||
.prepare(
|
||||
`SELECT id, category, title, content, tags
|
||||
FROM knowledge_articles
|
||||
WHERE id IN (${placeholders}) AND is_active = 1`
|
||||
)
|
||||
.bind(...missingIds)
|
||||
.all<ArticleRow>();
|
||||
|
||||
for (const a of fetched.results) {
|
||||
allArticles.set(a.id, a);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Merge results
|
||||
const finalResults = semanticResults.length > 0
|
||||
? mergeResults(keywordResults, semanticResults, allArticles)
|
||||
: keywordResults.slice(0, 5).map((a) => ({ ...a }));
|
||||
|
||||
if (finalResults.length === 0) {
|
||||
return `"${args.query}"에 대한 검색 결과가 없습니다.`;
|
||||
}
|
||||
|
||||
const articles = result.results.map((a) => {
|
||||
const articles = finalResults.map((a) => {
|
||||
const tags = a.tags ? ` [${a.tags}]` : '';
|
||||
// Truncate content for display
|
||||
const preview =
|
||||
a.content.length > 200 ? a.content.substring(0, 200) + '...' : a.content;
|
||||
return `[${a.category}] ${a.title}${tags}\n${preview}`;
|
||||
});
|
||||
|
||||
return `검색 결과 (${result.results.length}건):\n\n${articles.join('\n\n---\n\n')}`;
|
||||
const searchMode = semanticResults.length > 0 ? '하이브리드' : '키워드';
|
||||
return `검색 결과 (${finalResults.length}건, ${searchMode} 검색):\n\n${articles.join('\n\n---\n\n')}`;
|
||||
} catch (error) {
|
||||
logger.error('Knowledge search error', error as Error, { query: args.query });
|
||||
return '지식 베이스 검색 중 오류가 발생했습니다.';
|
||||
|
||||
@@ -27,6 +27,8 @@ export interface Env {
|
||||
CACHE_KV: KVNamespace;
|
||||
// R2
|
||||
R2_BUCKET: R2Bucket;
|
||||
// Vectorize
|
||||
VECTORIZE?: VectorizeIndex;
|
||||
}
|
||||
|
||||
// ============================================
|
||||
@@ -296,6 +298,8 @@ export interface KnowledgeArticle {
|
||||
tags: string | null;
|
||||
language: string;
|
||||
is_active: number;
|
||||
embedding_indexed: number;
|
||||
embedding_indexed_at: string | null;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}
|
||||
|
||||
@@ -45,6 +45,12 @@ bucket_name = "ai-support-assets"
|
||||
binding = "CLOUD_ORCHESTRATOR"
|
||||
service = "cloud-orchestrator"
|
||||
|
||||
# Vectorize (RAG semantic search)
|
||||
# Create index: npx wrangler vectorize create knowledge-embeddings --dimensions=768 --metric=cosine
|
||||
[[vectorize]]
|
||||
binding = "VECTORIZE"
|
||||
index_name = "knowledge-embeddings"
|
||||
|
||||
# Cron Triggers:
|
||||
# - 매일 자정(KST): 만료 알림 + 데이터 아카이빙 + 정합성 검증
|
||||
# - 매 5분: pending 상태 자동 정리
|
||||
|
||||
Reference in New Issue
Block a user