feat(phase-5-2): 에러 복구 전략 구현
Phase 5-2 완료: 재시도 로직, 서킷 브레이커, 관리자 알림 생성된 파일: - src/utils/retry.ts (지수 백오프 재시도) - src/utils/circuit-breaker.ts (서킷 브레이커 패턴) - src/services/notification.ts (관리자 알림) - src/services/__test__/notification.test.ts (테스트 가이드) 수정된 파일: - src/openai-service.ts (Circuit Breaker + Retry 적용) - src/tools/search-tool.ts (4개 API 재시도) - src/tools/domain-tool.ts (11개 API 재시도) - CLAUDE.md (알림 시스템 문서 추가) 주요 기능: - 지수 백오프: 1초 → 2초 → 4초 (Jitter ±20%) - Circuit Breaker: 3회 실패 시 30초 차단 (OpenAI) - 재시도: 총 15개 외부 API 호출에 적용 - 알림: 3가지 유형 (Circuit Breaker, Retry, API Error) - Rate Limiting: 같은 알림 1시간 1회 검증: - ✅ TypeScript 컴파일 성공 - ✅ Wrangler 로컬 빌드 성공 - ✅ 프로덕션 배포 완료 (Version: c4a1a8e9)
This commit is contained in:
248
src/utils/circuit-breaker.ts
Normal file
248
src/utils/circuit-breaker.ts
Normal file
@@ -0,0 +1,248 @@
|
||||
/**
|
||||
* Circuit Breaker pattern implementation
|
||||
*
|
||||
* Prevents cascading failures by temporarily blocking requests
|
||||
* to a failing service, giving it time to recover.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const breaker = new CircuitBreaker({ failureThreshold: 5 });
|
||||
*
|
||||
* try {
|
||||
* const result = await breaker.execute(async () => {
|
||||
* return await fetch('https://api.example.com');
|
||||
* });
|
||||
* } catch (error) {
|
||||
* if (error instanceof CircuitBreakerError) {
|
||||
* console.log('Circuit is open, service unavailable');
|
||||
* }
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
|
||||
/**
|
||||
* Circuit breaker states
|
||||
*/
|
||||
export enum CircuitState {
|
||||
/** Circuit is closed - requests pass through normally */
|
||||
CLOSED = 'CLOSED',
|
||||
/** Circuit is open - all requests are immediately rejected */
|
||||
OPEN = 'OPEN',
|
||||
/** Circuit is half-open - one test request is allowed */
|
||||
HALF_OPEN = 'HALF_OPEN',
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration options for circuit breaker
|
||||
*/
|
||||
export interface CircuitBreakerOptions {
|
||||
/** Number of consecutive failures before opening circuit (default: 5) */
|
||||
failureThreshold?: number;
|
||||
/** Time in ms to wait before attempting recovery (default: 60000) */
|
||||
resetTimeoutMs?: number;
|
||||
/** Time window in ms for monitoring failures (default: 120000) */
|
||||
monitoringWindowMs?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom error thrown when circuit is open
|
||||
*/
|
||||
export class CircuitBreakerError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly state: CircuitState
|
||||
) {
|
||||
super(message);
|
||||
this.name = 'CircuitBreakerError';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracks failure events with timestamps
|
||||
*/
|
||||
interface FailureRecord {
|
||||
timestamp: number;
|
||||
error: Error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Circuit Breaker implementation
|
||||
*
|
||||
* Monitors operation failures and automatically opens the circuit
|
||||
* when failure threshold is exceeded, preventing further attempts
|
||||
* until a reset timeout has elapsed.
|
||||
*/
|
||||
export class CircuitBreaker {
|
||||
private state: CircuitState = CircuitState.CLOSED;
|
||||
private failures: FailureRecord[] = [];
|
||||
private openedAt: number | null = null;
|
||||
private successCount = 0;
|
||||
private failureCount = 0;
|
||||
|
||||
private readonly failureThreshold: number;
|
||||
private readonly resetTimeoutMs: number;
|
||||
private readonly monitoringWindowMs: number;
|
||||
|
||||
constructor(options?: CircuitBreakerOptions) {
|
||||
this.failureThreshold = options?.failureThreshold ?? 5;
|
||||
this.resetTimeoutMs = options?.resetTimeoutMs ?? 60000;
|
||||
this.monitoringWindowMs = options?.monitoringWindowMs ?? 120000;
|
||||
|
||||
console.log('[CircuitBreaker] Initialized', {
|
||||
failureThreshold: this.failureThreshold,
|
||||
resetTimeoutMs: this.resetTimeoutMs,
|
||||
monitoringWindowMs: this.monitoringWindowMs,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current circuit state
|
||||
*/
|
||||
getState(): CircuitState {
|
||||
return this.state;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get circuit statistics
|
||||
*/
|
||||
getStats() {
|
||||
return {
|
||||
state: this.state,
|
||||
successCount: this.successCount,
|
||||
failureCount: this.failureCount,
|
||||
recentFailures: this.failures.length,
|
||||
openedAt: this.openedAt,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Manually reset the circuit to closed state
|
||||
*/
|
||||
reset(): void {
|
||||
console.log('[CircuitBreaker] Manual reset');
|
||||
this.state = CircuitState.CLOSED;
|
||||
this.failures = [];
|
||||
this.openedAt = null;
|
||||
this.successCount = 0;
|
||||
this.failureCount = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove old failure records outside monitoring window
|
||||
*/
|
||||
private cleanupOldFailures(): void {
|
||||
const now = Date.now();
|
||||
const cutoff = now - this.monitoringWindowMs;
|
||||
|
||||
this.failures = this.failures.filter(
|
||||
record => record.timestamp > cutoff
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if circuit should transition to half-open state
|
||||
*/
|
||||
private checkResetTimeout(): void {
|
||||
if (this.state === CircuitState.OPEN && this.openedAt !== null) {
|
||||
const now = Date.now();
|
||||
const elapsed = now - this.openedAt;
|
||||
|
||||
if (elapsed >= this.resetTimeoutMs) {
|
||||
console.log('[CircuitBreaker] Reset timeout reached, transitioning to HALF_OPEN');
|
||||
this.state = CircuitState.HALF_OPEN;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a successful operation
|
||||
*/
|
||||
private onSuccess(): void {
|
||||
this.successCount++;
|
||||
|
||||
if (this.state === CircuitState.HALF_OPEN) {
|
||||
console.log('[CircuitBreaker] Half-open test succeeded, closing circuit');
|
||||
this.state = CircuitState.CLOSED;
|
||||
this.failures = [];
|
||||
this.openedAt = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a failed operation
|
||||
*/
|
||||
private onFailure(error: Error): void {
|
||||
this.failureCount++;
|
||||
|
||||
const now = Date.now();
|
||||
this.failures.push({ timestamp: now, error });
|
||||
|
||||
// Clean up old failures
|
||||
this.cleanupOldFailures();
|
||||
|
||||
// If in half-open state, one failure reopens the circuit
|
||||
if (this.state === CircuitState.HALF_OPEN) {
|
||||
console.log('[CircuitBreaker] Half-open test failed, reopening circuit');
|
||||
this.state = CircuitState.OPEN;
|
||||
this.openedAt = now;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if we should open the circuit
|
||||
if (this.state === CircuitState.CLOSED) {
|
||||
if (this.failures.length >= this.failureThreshold) {
|
||||
console.log(
|
||||
`[CircuitBreaker] Failure threshold (${this.failureThreshold}) exceeded, opening circuit`
|
||||
);
|
||||
this.state = CircuitState.OPEN;
|
||||
this.openedAt = now;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a function through the circuit breaker
|
||||
*
|
||||
* @param fn - Async function to execute
|
||||
* @returns Promise resolving to the function's result
|
||||
* @throws CircuitBreakerError if circuit is open
|
||||
* @throws Original error if function fails
|
||||
*/
|
||||
async execute<T>(fn: () => Promise<T>): Promise<T> {
|
||||
// Check if we should transition to half-open
|
||||
this.checkResetTimeout();
|
||||
|
||||
// If circuit is open, reject immediately
|
||||
if (this.state === CircuitState.OPEN) {
|
||||
const error = new CircuitBreakerError(
|
||||
'Circuit breaker is open - service unavailable',
|
||||
this.state
|
||||
);
|
||||
console.log('[CircuitBreaker] Request blocked - circuit is OPEN');
|
||||
throw error;
|
||||
}
|
||||
|
||||
try {
|
||||
// Execute the function
|
||||
const result = await fn();
|
||||
|
||||
// Record success
|
||||
this.onSuccess();
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
// Record failure
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
this.onFailure(err);
|
||||
|
||||
// Log failure
|
||||
console.error(
|
||||
`[CircuitBreaker] Operation failed (${this.failures.length}/${this.failureThreshold} failures):`,
|
||||
err.message
|
||||
);
|
||||
|
||||
// Re-throw the original error
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user