fix: Improve error handling and reliability

4. Pool exhaustion - explicit error
   - Add NoAvailablePoolError exception class
   - find_available_pool() now raises instead of returning None
   - haproxy_add_domain() catches and returns user-friendly error

5. haproxy_add_server - disk-first pattern
   - Save to config FIRST, then update HAProxy
   - If HAProxy update fails, rollback config automatically
   - Prevents inconsistency between disk and runtime

6. Wildcard removal - log failures
   - Changed silent pass to logger.warning()
   - Failures now visible in logs for debugging
   - Does not block domain removal operation

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
kaffa
2026-02-01 13:54:24 +00:00
parent b8fb4e7f4a
commit 4e7d0a8969

View File

@@ -90,6 +90,11 @@ class HaproxyError(Exception):
pass
class NoAvailablePoolError(HaproxyError):
"""All pool backends are in use."""
pass
# CSV field indices for HAProxy stats (show stat command)
class StatField:
"""HAProxy CSV stat field indices."""
@@ -358,11 +363,14 @@ def get_map_contents() -> List[Tuple[str, str]]:
return entries
def find_available_pool() -> Optional[str]:
def find_available_pool() -> str:
"""Find first unused pool from pool_1 to pool_{POOL_COUNT}.
Returns:
Pool name (e.g., 'pool_1') if available, None if all pools are used
Pool name (e.g., 'pool_1') if available
Raises:
NoAvailablePoolError: If all pools are in use
"""
used_pools: Set[str] = set()
for domain, backend in get_map_contents():
@@ -373,7 +381,8 @@ def find_available_pool() -> Optional[str]:
pool_name = f"pool_{i}"
if pool_name not in used_pools:
return pool_name
return None
raise NoAvailablePoolError(f"All {POOL_COUNT} pool backends are in use")
def get_domain_backend(domain: str) -> Optional[str]:
@@ -746,9 +755,10 @@ def haproxy_add_domain(domain: str, ip: str = "", http_port: int = 80) -> str:
return f"Error: Domain {domain} already exists (mapped to {existing_backend})"
# Find available pool
try:
pool = find_available_pool()
if not pool:
return f"Error: No available pools (all {POOL_COUNT} pools are in use)"
except NoAvailablePoolError as e:
return f"Error: {e}"
try:
# Save to disk first (atomic write for persistence)
@@ -816,7 +826,10 @@ def haproxy_remove_domain(domain: str) -> str:
# Clear map entries via Runtime API (immediate effect)
haproxy_cmd(f"del map {MAP_FILE_CONTAINER} {domain}")
try:
haproxy_cmd(f"del map {MAP_FILE_CONTAINER} .{domain}")
except HaproxyError as e:
logger.warning("Failed to remove wildcard entry for %s: %s", domain, e)
# Disable all servers in the pool (reset to 0.0.0.0:0)
for slot in range(1, MAX_SLOTS + 1):
@@ -921,6 +934,10 @@ def haproxy_add_server(domain: str, slot: int, ip: str, http_port: int = 80) ->
try:
backend, server_prefix = get_backend_and_prefix(domain)
# Save to persistent config FIRST (disk-first pattern)
add_server_to_config(domain, slot, ip, http_port)
try:
results = []
for suffix, port in get_server_suffixes(http_port):
server = f"{server_prefix}{suffix}_{slot}"
@@ -928,11 +945,12 @@ def haproxy_add_server(domain: str, slot: int, ip: str, http_port: int = 80) ->
haproxy_cmd_checked(f"set server {backend}/{server} state ready")
results.append(f"{server}{ip}:{port}")
# Save to persistent config
add_server_to_config(domain, slot, ip, http_port)
return f"Added to {domain} ({backend}) slot {slot}:\n" + "\n".join(results)
except (HaproxyError, ValueError, IOError) as e:
except HaproxyError as e:
# Rollback config on HAProxy failure
remove_server_from_config(domain, slot)
return f"Error: {e}"
except (ValueError, IOError) as e:
return f"Error: {e}"