telegram-bot-workers/src/agents/troubleshoot-agent.ts

/**
 * Troubleshoot Agent - 시스템 트러블슈팅 전문가
 *
 * 기능:
 * - 대화형 문제 진단 및 해결
 * - 세션 기반 정보 수집 (D1)
 * - 카테고리별 전문 솔루션 제공
 * - Brave Search / Context7 도구로 최신 해결책 검색
 *
 * Manual Test:
 * 1. User: "서버가 502 에러가 나요"
 * 2. Expected: Category detection → 1-2 questions → Solution
 * 3. User: "해결됐어요"
 * 4. Expected: Session deleted
 */

import type { Env, TroubleshootSession, TroubleshootSessionStatus } from '../types';
import { createLogger } from '../utils/logger';
import { executeSearchWeb, executeLookupDocs } from '../tools/search-tool';

const logger = createLogger('troubleshoot-agent');

// D1 Session Management
const TROUBLESHOOT_SESSION_TTL_MS = 60 * 60 * 1000; // 1시간
const MAX_MESSAGES = 20; // 세션당 최대 메시지 수

/**
 * D1에서 트러블슈팅 세션 조회
 *
 * @param db - D1 Database
 * @param userId - Telegram User ID
 * @returns TroubleshootSession 또는 null (세션 없거나 만료)
 */
export async function getTroubleshootSession(
  db: D1Database,
  userId: string
): Promise<TroubleshootSession | null> {
  try {
    const now = Date.now();
    const result = await db.prepare(
      'SELECT * FROM troubleshoot_sessions WHERE user_id = ? AND expires_at > ?'
    ).bind(userId, now).first<{
      user_id: string;
      status: string;
      collected_info: string | null;
      messages: string | null;
      created_at: number;
      updated_at: number;
      expires_at: number;
    }>();

    if (!result) {
      logger.info('트러블슈팅 세션 없음', { userId });
      return null;
    }

    const session: TroubleshootSession = {
      user_id: result.user_id,
      status: result.status as TroubleshootSessionStatus,
      collected_info: result.collected_info ? JSON.parse(result.collected_info) : {},
      messages: result.messages ? JSON.parse(result.messages) : [],
      created_at: result.created_at,
      updated_at: result.updated_at,
      expires_at: result.expires_at,
    };

    logger.info('트러블슈팅 세션 조회 성공', { userId, status: session.status });
    return session;
  } catch (error) {
    logger.error('트러블슈팅 세션 조회 실패', error as Error, { userId });
    return null;
  }
}

/**
 * 트러블슈팅 세션 저장 (생성 또는 업데이트)
 *
 * @param db - D1 Database
 * @param session - TroubleshootSession
 */
export async function saveTroubleshootSession(
  db: D1Database,
  session: TroubleshootSession
): Promise<void> {
  try {
    const now = Date.now();
    const expiresAt = now + TROUBLESHOOT_SESSION_TTL_MS;

    await db.prepare(`
      INSERT INTO troubleshoot_sessions
        (user_id, status, collected_info, messages, created_at, updated_at, expires_at)
      VALUES (?, ?, ?, ?, ?, ?, ?)
      ON CONFLICT(user_id) DO UPDATE SET
        status = excluded.status,
        collected_info = excluded.collected_info,
        messages = excluded.messages,
        updated_at = excluded.updated_at,
        expires_at = excluded.expires_at
    `).bind(
      session.user_id,
      session.status,
      JSON.stringify(session.collected_info || {}),
      JSON.stringify(session.messages || []),
      session.created_at || now,
      now,
      expiresAt
    ).run();

    logger.info('트러블슈팅 세션 저장 성공', { userId: session.user_id, status: session.status });
  } catch (error) {
    logger.error('트러블슈팅 세션 저장 실패', error as Error, { userId: session.user_id });
    throw error;
  }
}

/**
 * 트러블슈팅 세션 삭제
 *
 * @param db - D1 Database
 * @param userId - Telegram User ID
 */
export async function deleteTroubleshootSession(
  db: D1Database,
  userId: string
): Promise<void> {
  try {
    await db.prepare('DELETE FROM troubleshoot_sessions WHERE user_id = ?')
      .bind(userId)
      .run();
    logger.info('트러블슈팅 세션 삭제 성공', { userId });
  } catch (error) {
    logger.error('트러블슈팅 세션 삭제 실패', error as Error, { userId });
    throw error;
  }
}

/**
 * 새 트러블슈팅 세션 생성
 *
 * @param userId - Telegram User ID
 * @param status - 세션 상태
 * @returns 새로운 TroubleshootSession 객체
 */
export function createTroubleshootSession(
  userId: string,
  status: TroubleshootSessionStatus = 'gathering'
): TroubleshootSession {
  const now = Date.now();
  return {
    user_id: userId,
    status,
    collected_info: {},
    messages: [],
    created_at: now,
    updated_at: now,
    expires_at: now + TROUBLESHOOT_SESSION_TTL_MS,
  };
}

/**
 * 세션 만료 여부 확인
 *
 * @param session - TroubleshootSession
 * @returns true if expired, false otherwise
 */
export function isSessionExpired(session: TroubleshootSession): boolean {
  return session.expires_at < Date.now();
}

/**
 * 세션에 메시지 추가
 *
 * @param session - TroubleshootSession
 * @param role - 메시지 역할 ('user' | 'assistant')
 * @param content - 메시지 내용
 */
export function addMessageToSession(
  session: TroubleshootSession,
  role: 'user' | 'assistant',
  content: string
): void {
  session.messages.push({ role, content });

  // 최대 메시지 수 제한
  if (session.messages.length > MAX_MESSAGES) {
    session.messages = session.messages.slice(-MAX_MESSAGES);
    logger.warn('세션 메시지 최대 개수 초과, 오래된 메시지 제거', {
      userId: session.user_id,
      maxMessages: MAX_MESSAGES,
    });
  }
}

/**
 * 트러블슈팅 세션 존재 여부 확인 (라우팅용)
 *
 * @param db - D1 Database
 * @param userId - Telegram User ID
 * @returns true if active session exists, false otherwise
 */
export async function hasTroubleshootSession(db: D1Database, userId: string): Promise<boolean> {
  const session = await getTroubleshootSession(db, userId);
  return session !== null && !isSessionExpired(session);
}

// Troubleshoot Expert System Prompt
const TROUBLESHOOT_EXPERT_PROMPT = `당신은 20년 경력의 시니어 DevOps/SRE 엔지니어입니다.

## 전문성 (20년 경력)
- Linux/Windows Server 관리 마스터 (Apache, Nginx, IIS 등)
- 네트워크 트러블슈팅 전문가 (DNS, SSL, 방화벽, CDN)
- 애플리케이션 배포 및 디버깅 경험 수천 건
- 모든 주요 기술 스택 경험 (Node.js, Python, PHP, Java, Ruby, Go 등)
- 데이터베이스 성능 튜닝 (MySQL, PostgreSQL, MongoDB, Redis)
- 컨테이너/오케스트레이션 (Docker, Kubernetes)
- 클라우드 인프라 마스터 (Anvil 전문)

## 성격
- 차분하고 체계적인 문제 해결
- 비기술자도 이해하기 쉽게 설명
- 단계별로 명확한 지시
- 긴급 상황에서도 침착함 유지
- 따뜻하고 격려하는 어조

## 금지 사항 (절대 위반 금지)
- AWS, GCP, Azure, DigitalOcean, Vultr, Linode 등 다른 클라우드 서비스 추천 금지
- 경쟁사 서비스 언급 금지
- 우리 서비스(Anvil)만 언급

## 문제 카테고리
1. **Server/Infrastructure**: 서버 느림, 502/503 에러, 메모리 부족, CPU 과부하, 디스크 풀
2. **Domain/DNS**: 도메인 연결 안됨, SSL 인증서 오류, DNS 전파 지연, 네임서버 문제
3. **Code/Deploy**: 배포 실패, 빌드 에러, 의존성 충돌, 환경변수 누락
4. **Network**: 연결 끊김, 타임아웃, CORS 오류, 방화벽 차단
5. **Database**: 쿼리 느림, 연결 풀 고갈, 데드락, 인덱스 누락

## 도구 사용 가이드 (적극 활용)
- 에러 메시지, Stack trace 언급 시 → **반드시** search_solution 호출
- 특정 프레임워크/라이브러리 문제 → lookup_docs 호출하여 공식 가이드 확인
- 도구 결과를 자연스럽게 해결책에 포함 (예: "공식 문서에 따르면...", "최근 Stack Overflow 답변을 보니...")
- 검색 쿼리는 영문으로 (더 많은 결과)

## 대화 흐름
1. **문제 청취**: 사용자 증상 경청, 카테고리 자동 분류
2. **정보 수집**: 1-2개 질문으로 환경/에러 메시지 확인
3. **진단**: 수집된 정보 기반 원인 분석 (도구 활용)
4. **해결**: 단계별 명확한 솔루션 제시 (명령어 포함)
5. **확인**: 해결 여부 확인, 필요시 추가 지원

## 핵심 규칙 (반드시 준수)
- 에러 메시지가 명확하면 즉시 진단/해결 제시
- 정보가 애매하면 최대 2개 질문
- 해결책은 구체적이고 실행 가능한 명령어/코드 포함
- 해결 후 "해결되셨나요?" 확인
- 해결 안되면 추가 조치 또는 상위 엔지니어 에스컬레이션 제안

## 특수 지시
- 트러블슈팅과 무관한 메시지가 들어오면 반드시 "__PASSTHROUGH__"만 응답
- 문제 해결이 완료되면 "__SESSION_END__"를 응답 끝에 추가`;

// Troubleshoot Tools for Function Calling
const TROUBLESHOOT_TOOLS = [
  {
    type: 'function' as const,
    function: {
      name: 'search_solution',
      description: 'Brave Search로 에러 메시지, 해결책, Stack Overflow 답변 검색합니다. 예: "nginx 502 bad gateway solution", "mysql connection pool exhausted fix"',
      parameters: {
        type: 'object',
        properties: {
          query: {
            type: 'string',
            description: '검색 쿼리 (에러 메시지, 기술 스택 포함, 영문 권장)',
          },
        },
        required: ['query'],
      },
    },
  },
  {
    type: 'function' as const,
    function: {
      name: 'lookup_docs',
      description: '프레임워크/라이브러리 공식 문서에서 트러블슈팅 가이드, 에러 코드, 디버깅 방법을 조회합니다.',
      parameters: {
        type: 'object',
        properties: {
          library: {
            type: 'string',
            description: '라이브러리/프레임워크 이름 (예: nginx, nodejs, mysql, docker)',
          },
          topic: {
            type: 'string',
            description: '조회할 주제 (예: troubleshooting, error codes, debugging, common issues)',
          },
        },
        required: ['library', 'topic'],
      },
    },
  },
];

// Execute troubleshoot tool
async function executeTroubleshootTool(
  toolName: string,
  args: Record<string, unknown>,
  env: Env
): Promise<string> {
  logger.info('도구 실행', { toolName, args });

  switch (toolName) {
    case 'search_solution': {
      const result = await executeSearchWeb({ query: args.query as string }, env);
      return result;
    }
    case 'lookup_docs': {
      const result = await executeLookupDocs({
        library: args.library as string,
        query: args.topic as string,
      }, env);
      return result;
    }
    default:
      return `알 수 없는 도구: ${toolName}`;
  }
}

// OpenAI API 응답 타입
interface OpenAIToolCall {
  id: string;
  type: 'function';
  function: {
    name: string;
    arguments: string;
  };
}

interface OpenAIMessage {
  role: 'assistant';
  content: string | null;
  tool_calls?: OpenAIToolCall[];
}

interface OpenAIAPIResponse {
  choices: Array<{
    message: OpenAIMessage;
    finish_reason: string;
  }>;
}

/**
 * Troubleshoot Expert AI 호출 (Function Calling 지원)
 *
 * @param session - TroubleshootSession
 * @param userMessage - 사용자 메시지
 * @param env - Environment
 * @returns AI 응답 및 tool_calls (있을 경우)
 */
async function callTroubleshootExpertAI(
  session: TroubleshootSession,
  userMessage: string,
  env: Env
): Promise<{ response: string; toolCalls?: Array<{ name: string; arguments: Record<string, unknown> }> }> {
  if (!env.OPENAI_API_KEY) {
    throw new Error('OPENAI_API_KEY not configured');
  }

  const { getOpenAIUrl } = await import('../utils/api-urls');

  // Build conversation history
  const conversationHistory = session.messages.map(m => ({
    role: m.role === 'user' ? 'user' as const : 'assistant' as const,
    content: m.content,
  }));

  const systemPrompt = `${TROUBLESHOOT_EXPERT_PROMPT}

## 현재 수집된 정보
${JSON.stringify(session.collected_info, null, 2)}`;

  try {
    const messages: Array<{
      role: string;
      content: string | null;
      tool_calls?: OpenAIToolCall[];
      tool_call_id?: string;
      name?: string
    }> = [
      { role: 'system', content: systemPrompt },
      ...conversationHistory,
      { role: 'user', content: userMessage },
    ];

    const MAX_TOOL_CALLS = 3;
    let toolCallCount = 0;

    // Loop to handle tool calls
    while (toolCallCount < MAX_TOOL_CALLS) {
      const requestBody = {
        model: 'gpt-4o-mini',
        messages,
        tools: TROUBLESHOOT_TOOLS,
        tool_choice: 'auto',
        max_tokens: 1500,
        temperature: 0.5,
      };

      const response = await fetch(getOpenAIUrl(env), {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
          'Authorization': `Bearer ${env.OPENAI_API_KEY}`,
        },
        body: JSON.stringify(requestBody),
      });

      if (!response.ok) {
        const error = await response.text();
        throw new Error(`OpenAI API error: ${response.status} - ${error}`);
      }

      const data = await response.json() as OpenAIAPIResponse;
      const assistantMessage = data.choices[0].message;

      // Check if AI wants to call tools
      if (assistantMessage.tool_calls && assistantMessage.tool_calls.length > 0) {
        logger.info('도구 호출 요청', {
          tools: assistantMessage.tool_calls.map(tc => tc.function.name),
        });

        // Add assistant message with tool calls
        messages.push({
          role: 'assistant',
          content: assistantMessage.content,
          tool_calls: assistantMessage.tool_calls,
        });

        // Execute each tool and add results
        for (const toolCall of assistantMessage.tool_calls) {
          const args = JSON.parse(toolCall.function.arguments);
          const result = await executeTroubleshootTool(toolCall.function.name, args, env);

          messages.push({
            role: 'tool',
            tool_call_id: toolCall.id,
            name: toolCall.function.name,
            content: result,
          });

          toolCallCount++;
        }

        // Continue loop to get AI's response with tool results
        continue;
      }

      // No tool calls - return final response
      const aiResponse = assistantMessage.content || '';
      logger.info('AI 응답', { response: aiResponse.slice(0, 200) });

      // Check for special markers
      if (aiResponse.includes('__PASSTHROUGH__')) {
        return { response: '__PASSTHROUGH__' };
      }

      // Check for session end marker
      const sessionEnd = aiResponse.includes('__SESSION_END__');
      const cleanResponse = aiResponse.replace('__SESSION_END__', '').trim();

      return {
        response: sessionEnd ? `${cleanResponse}\n\n[세션 종료]` : cleanResponse,
      };
    }

    // Max tool calls reached
    logger.warn('최대 도구 호출 횟수 도달', { toolCallCount });
    return {
      response: '수집한 정보를 바탕으로 해결책을 제시해드리겠습니다.',
    };
  } catch (error) {
    logger.error('Troubleshoot Expert AI 호출 실패', error as Error);
    throw error;
  }
}

/**
 * 트러블슈팅 상담 처리 (메인 함수)
 *
 * @param db - D1 Database
 * @param userId - Telegram User ID
 * @param userMessage - 사용자 메시지
 * @param env - Environment
 * @returns AI 응답 메시지
 */
export async function processTroubleshootConsultation(
  db: D1Database,
  userId: string,
  userMessage: string,
  env: Env
): Promise<string> {
  const startTime = Date.now();
  logger.info('트러블슈팅 상담 시작', { userId, message: userMessage.substring(0, 100) });

  try {
    // 1. Check for existing session
    let session = await getTroubleshootSession(db, userId);

    // 2. Create new session if none exists
    if (!session) {
      session = createTroubleshootSession(userId, 'gathering');
    }

    // 3. Add user message to session
    addMessageToSession(session, 'user', userMessage);

    // 4. Call AI to get response and possible tool calls
    const aiResult = await callTroubleshootExpertAI(session, userMessage, env);

    // 5. Handle __PASSTHROUGH__ - not troubleshoot related
    if (aiResult.response === '__PASSTHROUGH__' || aiResult.response.includes('__PASSTHROUGH__')) {
      logger.info('트러블슈팅 상담 패스스루', { userId });
      // Don't save session if passthrough
      return '__PASSTHROUGH__';
    }

    // 6. Handle __SESSION_END__ - session complete
    if (aiResult.response.includes('[세션 종료]')) {
      logger.info('트러블슈팅 상담 세션 종료', { userId });
      await deleteTroubleshootSession(db, userId);
      return aiResult.response.replace('[세션 종료]', '').trim();
    }

    // 7. Add assistant response to session and save
    addMessageToSession(session, 'assistant', aiResult.response);

    // Update session status based on response content (simple heuristic)
    if (aiResult.response.includes('원인') || aiResult.response.includes('분석')) {
      session.status = 'diagnosing';
    } else if (aiResult.response.includes('해결') || aiResult.response.includes('방법')) {
      session.status = 'suggesting';
    }

    session.updated_at = Date.now();
    await saveTroubleshootSession(db, session);

    logger.info('트러블슈팅 상담 완료', {
      userId,
      duration: Date.now() - startTime,
      status: session.status
    });

    return aiResult.response;

  } catch (error) {
    logger.error('트러블슈팅 상담 오류', error as Error, { userId });
    return '죄송합니다. 트러블슈팅 상담 중 오류가 발생했습니다. 잠시 후 다시 시도해주세요.';
  }
}