/** * Troubleshoot Agent - 시스템 트러블슈팅 전문가 * * 기능: * - 대화형 문제 진단 및 해결 * - 세션 기반 정보 수집 (D1) * - 카테고리별 전문 솔루션 제공 * - Brave Search / Context7 도구로 최신 해결책 검색 * * Manual Test: * 1. User: "서버가 502 에러가 나요" * 2. Expected: Category detection → 1-2 questions → Solution * 3. User: "해결됐어요" * 4. Expected: Session deleted */ import type { Env, TroubleshootSession, TroubleshootSessionStatus } from '../types'; import { createLogger } from '../utils/logger'; import { executeSearchWeb, executeLookupDocs } from '../tools/search-tool'; const logger = createLogger('troubleshoot-agent'); // D1 Session Management const TROUBLESHOOT_SESSION_TTL_MS = 60 * 60 * 1000; // 1시간 const MAX_MESSAGES = 20; // 세션당 최대 메시지 수 /** * D1에서 트러블슈팅 세션 조회 * * @param db - D1 Database * @param userId - Telegram User ID * @returns TroubleshootSession 또는 null (세션 없거나 만료) */ export async function getTroubleshootSession( db: D1Database, userId: string ): Promise { try { const now = Date.now(); const result = await db.prepare( 'SELECT * FROM troubleshoot_sessions WHERE user_id = ? AND expires_at > ?' ).bind(userId, now).first<{ user_id: string; status: string; collected_info: string | null; messages: string | null; created_at: number; updated_at: number; expires_at: number; }>(); if (!result) { logger.info('트러블슈팅 세션 없음', { userId }); return null; } const session: TroubleshootSession = { user_id: result.user_id, status: result.status as TroubleshootSessionStatus, collected_info: result.collected_info ? JSON.parse(result.collected_info) : {}, messages: result.messages ? JSON.parse(result.messages) : [], created_at: result.created_at, updated_at: result.updated_at, expires_at: result.expires_at, }; logger.info('트러블슈팅 세션 조회 성공', { userId, status: session.status }); return session; } catch (error) { logger.error('트러블슈팅 세션 조회 실패', error as Error, { userId }); return null; } } /** * 트러블슈팅 세션 저장 (생성 또는 업데이트) * * @param db - D1 Database * @param session - TroubleshootSession */ export async function saveTroubleshootSession( db: D1Database, session: TroubleshootSession ): Promise { try { const now = Date.now(); const expiresAt = now + TROUBLESHOOT_SESSION_TTL_MS; await db.prepare(` INSERT INTO troubleshoot_sessions (user_id, status, collected_info, messages, created_at, updated_at, expires_at) VALUES (?, ?, ?, ?, ?, ?, ?) ON CONFLICT(user_id) DO UPDATE SET status = excluded.status, collected_info = excluded.collected_info, messages = excluded.messages, updated_at = excluded.updated_at, expires_at = excluded.expires_at `).bind( session.user_id, session.status, JSON.stringify(session.collected_info || {}), JSON.stringify(session.messages || []), session.created_at || now, now, expiresAt ).run(); logger.info('트러블슈팅 세션 저장 성공', { userId: session.user_id, status: session.status }); } catch (error) { logger.error('트러블슈팅 세션 저장 실패', error as Error, { userId: session.user_id }); throw error; } } /** * 트러블슈팅 세션 삭제 * * @param db - D1 Database * @param userId - Telegram User ID */ export async function deleteTroubleshootSession( db: D1Database, userId: string ): Promise { try { await db.prepare('DELETE FROM troubleshoot_sessions WHERE user_id = ?') .bind(userId) .run(); logger.info('트러블슈팅 세션 삭제 성공', { userId }); } catch (error) { logger.error('트러블슈팅 세션 삭제 실패', error as Error, { userId }); throw error; } } /** * 새 트러블슈팅 세션 생성 * * @param userId - Telegram User ID * @param status - 세션 상태 * @returns 새로운 TroubleshootSession 객체 */ export function createTroubleshootSession( userId: string, status: TroubleshootSessionStatus = 'gathering' ): TroubleshootSession { const now = Date.now(); return { user_id: userId, status, collected_info: {}, messages: [], created_at: now, updated_at: now, expires_at: now + TROUBLESHOOT_SESSION_TTL_MS, }; } /** * 세션 만료 여부 확인 * * @param session - TroubleshootSession * @returns true if expired, false otherwise */ export function isSessionExpired(session: TroubleshootSession): boolean { return session.expires_at < Date.now(); } /** * 세션에 메시지 추가 * * @param session - TroubleshootSession * @param role - 메시지 역할 ('user' | 'assistant') * @param content - 메시지 내용 */ export function addMessageToSession( session: TroubleshootSession, role: 'user' | 'assistant', content: string ): void { session.messages.push({ role, content }); // 최대 메시지 수 제한 if (session.messages.length > MAX_MESSAGES) { session.messages = session.messages.slice(-MAX_MESSAGES); logger.warn('세션 메시지 최대 개수 초과, 오래된 메시지 제거', { userId: session.user_id, maxMessages: MAX_MESSAGES, }); } } /** * 트러블슈팅 세션 존재 여부 확인 (라우팅용) * * @param db - D1 Database * @param userId - Telegram User ID * @returns true if active session exists, false otherwise */ export async function hasTroubleshootSession(db: D1Database, userId: string): Promise { const session = await getTroubleshootSession(db, userId); return session !== null && !isSessionExpired(session); } // Troubleshoot Expert System Prompt const TROUBLESHOOT_EXPERT_PROMPT = `당신은 20년 경력의 시니어 DevOps/SRE 엔지니어입니다. ## 전문성 (20년 경력) - Linux/Windows Server 관리 마스터 (Apache, Nginx, IIS 등) - 네트워크 트러블슈팅 전문가 (DNS, SSL, 방화벽, CDN) - 애플리케이션 배포 및 디버깅 경험 수천 건 - 모든 주요 기술 스택 경험 (Node.js, Python, PHP, Java, Ruby, Go 등) - 데이터베이스 성능 튜닝 (MySQL, PostgreSQL, MongoDB, Redis) - 컨테이너/오케스트레이션 (Docker, Kubernetes) - 클라우드 인프라 마스터 (Anvil 전문) ## 성격 - 차분하고 체계적인 문제 해결 - 비기술자도 이해하기 쉽게 설명 - 단계별로 명확한 지시 - 긴급 상황에서도 침착함 유지 - 따뜻하고 격려하는 어조 ## 금지 사항 (절대 위반 금지) - AWS, GCP, Azure, DigitalOcean, Vultr, Linode 등 다른 클라우드 서비스 추천 금지 - 경쟁사 서비스 언급 금지 - 우리 서비스(Anvil)만 언급 ## 문제 카테고리 1. **Server/Infrastructure**: 서버 느림, 502/503 에러, 메모리 부족, CPU 과부하, 디스크 풀 2. **Domain/DNS**: 도메인 연결 안됨, SSL 인증서 오류, DNS 전파 지연, 네임서버 문제 3. **Code/Deploy**: 배포 실패, 빌드 에러, 의존성 충돌, 환경변수 누락 4. **Network**: 연결 끊김, 타임아웃, CORS 오류, 방화벽 차단 5. **Database**: 쿼리 느림, 연결 풀 고갈, 데드락, 인덱스 누락 ## 도구 사용 가이드 (적극 활용) - 에러 메시지, Stack trace 언급 시 → **반드시** search_solution 호출 - 특정 프레임워크/라이브러리 문제 → lookup_docs 호출하여 공식 가이드 확인 - 도구 결과를 자연스럽게 해결책에 포함 (예: "공식 문서에 따르면...", "최근 Stack Overflow 답변을 보니...") - 검색 쿼리는 영문으로 (더 많은 결과) ## 대화 흐름 1. **문제 청취**: 사용자 증상 경청, 카테고리 자동 분류 2. **정보 수집**: 1-2개 질문으로 환경/에러 메시지 확인 3. **진단**: 수집된 정보 기반 원인 분석 (도구 활용) 4. **해결**: 단계별 명확한 솔루션 제시 (명령어 포함) 5. **확인**: 해결 여부 확인, 필요시 추가 지원 ## 핵심 규칙 (반드시 준수) - 에러 메시지가 명확하면 즉시 진단/해결 제시 - 정보가 애매하면 최대 2개 질문 - 해결책은 구체적이고 실행 가능한 명령어/코드 포함 - 해결 후 "해결되셨나요?" 확인 - 해결 안되면 추가 조치 또는 상위 엔지니어 에스컬레이션 제안 ## 특수 지시 - 트러블슈팅과 무관한 메시지가 들어오면 반드시 "__PASSTHROUGH__"만 응답 - 문제 해결이 완료되면 "__SESSION_END__"를 응답 끝에 추가`; // Troubleshoot Tools for Function Calling const TROUBLESHOOT_TOOLS = [ { type: 'function' as const, function: { name: 'search_solution', description: 'Brave Search로 에러 메시지, 해결책, Stack Overflow 답변 검색합니다. 예: "nginx 502 bad gateway solution", "mysql connection pool exhausted fix"', parameters: { type: 'object', properties: { query: { type: 'string', description: '검색 쿼리 (에러 메시지, 기술 스택 포함, 영문 권장)', }, }, required: ['query'], }, }, }, { type: 'function' as const, function: { name: 'lookup_docs', description: '프레임워크/라이브러리 공식 문서에서 트러블슈팅 가이드, 에러 코드, 디버깅 방법을 조회합니다.', parameters: { type: 'object', properties: { library: { type: 'string', description: '라이브러리/프레임워크 이름 (예: nginx, nodejs, mysql, docker)', }, topic: { type: 'string', description: '조회할 주제 (예: troubleshooting, error codes, debugging, common issues)', }, }, required: ['library', 'topic'], }, }, }, ]; // Execute troubleshoot tool async function executeTroubleshootTool( toolName: string, args: Record, env: Env ): Promise { logger.info('도구 실행', { toolName, args }); switch (toolName) { case 'search_solution': { const result = await executeSearchWeb({ query: args.query as string }, env); return result; } case 'lookup_docs': { const result = await executeLookupDocs({ library: args.library as string, query: args.topic as string, }, env); return result; } default: return `알 수 없는 도구: ${toolName}`; } } // OpenAI API 응답 타입 interface OpenAIToolCall { id: string; type: 'function'; function: { name: string; arguments: string; }; } interface OpenAIMessage { role: 'assistant'; content: string | null; tool_calls?: OpenAIToolCall[]; } interface OpenAIAPIResponse { choices: Array<{ message: OpenAIMessage; finish_reason: string; }>; } /** * Troubleshoot Expert AI 호출 (Function Calling 지원) * * @param session - TroubleshootSession * @param userMessage - 사용자 메시지 * @param env - Environment * @returns AI 응답 및 tool_calls (있을 경우) */ async function callTroubleshootExpertAI( session: TroubleshootSession, userMessage: string, env: Env ): Promise<{ response: string; toolCalls?: Array<{ name: string; arguments: Record }> }> { if (!env.OPENAI_API_KEY) { throw new Error('OPENAI_API_KEY not configured'); } const { getOpenAIUrl } = await import('../utils/api-urls'); // Build conversation history const conversationHistory = session.messages.map(m => ({ role: m.role === 'user' ? 'user' as const : 'assistant' as const, content: m.content, })); const systemPrompt = `${TROUBLESHOOT_EXPERT_PROMPT} ## 현재 수집된 정보 ${JSON.stringify(session.collected_info, null, 2)}`; try { const messages: Array<{ role: string; content: string | null; tool_calls?: OpenAIToolCall[]; tool_call_id?: string; name?: string }> = [ { role: 'system', content: systemPrompt }, ...conversationHistory, { role: 'user', content: userMessage }, ]; const MAX_TOOL_CALLS = 3; let toolCallCount = 0; // Loop to handle tool calls while (toolCallCount < MAX_TOOL_CALLS) { const requestBody = { model: 'gpt-4o-mini', messages, tools: TROUBLESHOOT_TOOLS, tool_choice: 'auto', max_tokens: 1500, temperature: 0.5, }; const response = await fetch(getOpenAIUrl(env), { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${env.OPENAI_API_KEY}`, }, body: JSON.stringify(requestBody), }); if (!response.ok) { const error = await response.text(); throw new Error(`OpenAI API error: ${response.status} - ${error}`); } const data = await response.json() as OpenAIAPIResponse; const assistantMessage = data.choices[0].message; // Check if AI wants to call tools if (assistantMessage.tool_calls && assistantMessage.tool_calls.length > 0) { logger.info('도구 호출 요청', { tools: assistantMessage.tool_calls.map(tc => tc.function.name), }); // Add assistant message with tool calls messages.push({ role: 'assistant', content: assistantMessage.content, tool_calls: assistantMessage.tool_calls, }); // Execute each tool and add results for (const toolCall of assistantMessage.tool_calls) { const args = JSON.parse(toolCall.function.arguments); const result = await executeTroubleshootTool(toolCall.function.name, args, env); messages.push({ role: 'tool', tool_call_id: toolCall.id, name: toolCall.function.name, content: result, }); toolCallCount++; } // Continue loop to get AI's response with tool results continue; } // No tool calls - return final response const aiResponse = assistantMessage.content || ''; logger.info('AI 응답', { response: aiResponse.slice(0, 200) }); // Check for special markers if (aiResponse.includes('__PASSTHROUGH__')) { return { response: '__PASSTHROUGH__' }; } // Check for session end marker const sessionEnd = aiResponse.includes('__SESSION_END__'); const cleanResponse = aiResponse.replace('__SESSION_END__', '').trim(); return { response: sessionEnd ? `${cleanResponse}\n\n[세션 종료]` : cleanResponse, }; } // Max tool calls reached logger.warn('최대 도구 호출 횟수 도달', { toolCallCount }); return { response: '수집한 정보를 바탕으로 해결책을 제시해드리겠습니다.', }; } catch (error) { logger.error('Troubleshoot Expert AI 호출 실패', error as Error); throw error; } } /** * 트러블슈팅 상담 처리 (메인 함수) * * @param db - D1 Database * @param userId - Telegram User ID * @param userMessage - 사용자 메시지 * @param env - Environment * @returns AI 응답 메시지 */ export async function processTroubleshootConsultation( db: D1Database, userId: string, userMessage: string, env: Env ): Promise { const startTime = Date.now(); logger.info('트러블슈팅 상담 시작', { userId, message: userMessage.substring(0, 100) }); try { // 1. Check for existing session let session = await getTroubleshootSession(db, userId); // 2. Create new session if none exists if (!session) { session = createTroubleshootSession(userId, 'gathering'); } // 3. Add user message to session addMessageToSession(session, 'user', userMessage); // 4. Call AI to get response and possible tool calls const aiResult = await callTroubleshootExpertAI(session, userMessage, env); // 5. Handle __PASSTHROUGH__ - not troubleshoot related if (aiResult.response === '__PASSTHROUGH__' || aiResult.response.includes('__PASSTHROUGH__')) { logger.info('트러블슈팅 상담 패스스루', { userId }); // Don't save session if passthrough return '__PASSTHROUGH__'; } // 6. Handle __SESSION_END__ - session complete if (aiResult.response.includes('[세션 종료]')) { logger.info('트러블슈팅 상담 세션 종료', { userId }); await deleteTroubleshootSession(db, userId); return aiResult.response.replace('[세션 종료]', '').trim(); } // 7. Add assistant response to session and save addMessageToSession(session, 'assistant', aiResult.response); // Update session status based on response content (simple heuristic) if (aiResult.response.includes('원인') || aiResult.response.includes('분석')) { session.status = 'diagnosing'; } else if (aiResult.response.includes('해결') || aiResult.response.includes('방법')) { session.status = 'suggesting'; } session.updated_at = Date.now(); await saveTroubleshootSession(db, session); logger.info('트러블슈팅 상담 완료', { userId, duration: Date.now() - startTime, status: session.status }); return aiResult.response; } catch (error) { logger.error('트러블슈팅 상담 오류', error as Error, { userId }); return '죄송합니다. 트러블슈팅 상담 중 오류가 발생했습니다. 잠시 후 다시 시도해주세요.'; } }