fix: Improve error handling and reliability

4. Pool exhaustion - explicit error
   - Add NoAvailablePoolError exception class
   - find_available_pool() now raises instead of returning None
   - haproxy_add_domain() catches and returns user-friendly error

5. haproxy_add_server - disk-first pattern
   - Save to config FIRST, then update HAProxy
   - If HAProxy update fails, rollback config automatically
   - Prevents inconsistency between disk and runtime

6. Wildcard removal - log failures
   - Changed silent pass to logger.warning()
   - Failures now visible in logs for debugging
   - Does not block domain removal operation

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
kaffa
2026-02-01 13:54:24 +00:00
parent b8fb4e7f4a
commit 4e7d0a8969

View File

@@ -90,6 +90,11 @@ class HaproxyError(Exception):
pass pass
class NoAvailablePoolError(HaproxyError):
"""All pool backends are in use."""
pass
# CSV field indices for HAProxy stats (show stat command) # CSV field indices for HAProxy stats (show stat command)
class StatField: class StatField:
"""HAProxy CSV stat field indices.""" """HAProxy CSV stat field indices."""
@@ -358,11 +363,14 @@ def get_map_contents() -> List[Tuple[str, str]]:
return entries return entries
def find_available_pool() -> Optional[str]: def find_available_pool() -> str:
"""Find first unused pool from pool_1 to pool_{POOL_COUNT}. """Find first unused pool from pool_1 to pool_{POOL_COUNT}.
Returns: Returns:
Pool name (e.g., 'pool_1') if available, None if all pools are used Pool name (e.g., 'pool_1') if available
Raises:
NoAvailablePoolError: If all pools are in use
""" """
used_pools: Set[str] = set() used_pools: Set[str] = set()
for domain, backend in get_map_contents(): for domain, backend in get_map_contents():
@@ -373,7 +381,8 @@ def find_available_pool() -> Optional[str]:
pool_name = f"pool_{i}" pool_name = f"pool_{i}"
if pool_name not in used_pools: if pool_name not in used_pools:
return pool_name return pool_name
return None
raise NoAvailablePoolError(f"All {POOL_COUNT} pool backends are in use")
def get_domain_backend(domain: str) -> Optional[str]: def get_domain_backend(domain: str) -> Optional[str]:
@@ -746,9 +755,10 @@ def haproxy_add_domain(domain: str, ip: str = "", http_port: int = 80) -> str:
return f"Error: Domain {domain} already exists (mapped to {existing_backend})" return f"Error: Domain {domain} already exists (mapped to {existing_backend})"
# Find available pool # Find available pool
pool = find_available_pool() try:
if not pool: pool = find_available_pool()
return f"Error: No available pools (all {POOL_COUNT} pools are in use)" except NoAvailablePoolError as e:
return f"Error: {e}"
try: try:
# Save to disk first (atomic write for persistence) # Save to disk first (atomic write for persistence)
@@ -816,7 +826,10 @@ def haproxy_remove_domain(domain: str) -> str:
# Clear map entries via Runtime API (immediate effect) # Clear map entries via Runtime API (immediate effect)
haproxy_cmd(f"del map {MAP_FILE_CONTAINER} {domain}") haproxy_cmd(f"del map {MAP_FILE_CONTAINER} {domain}")
haproxy_cmd(f"del map {MAP_FILE_CONTAINER} .{domain}") try:
haproxy_cmd(f"del map {MAP_FILE_CONTAINER} .{domain}")
except HaproxyError as e:
logger.warning("Failed to remove wildcard entry for %s: %s", domain, e)
# Disable all servers in the pool (reset to 0.0.0.0:0) # Disable all servers in the pool (reset to 0.0.0.0:0)
for slot in range(1, MAX_SLOTS + 1): for slot in range(1, MAX_SLOTS + 1):
@@ -921,18 +934,23 @@ def haproxy_add_server(domain: str, slot: int, ip: str, http_port: int = 80) ->
try: try:
backend, server_prefix = get_backend_and_prefix(domain) backend, server_prefix = get_backend_and_prefix(domain)
results = [] # Save to persistent config FIRST (disk-first pattern)
for suffix, port in get_server_suffixes(http_port):
server = f"{server_prefix}{suffix}_{slot}"
haproxy_cmd_checked(f"set server {backend}/{server} addr {ip} port {port}")
haproxy_cmd_checked(f"set server {backend}/{server} state ready")
results.append(f"{server}{ip}:{port}")
# Save to persistent config
add_server_to_config(domain, slot, ip, http_port) add_server_to_config(domain, slot, ip, http_port)
return f"Added to {domain} ({backend}) slot {slot}:\n" + "\n".join(results) try:
except (HaproxyError, ValueError, IOError) as e: results = []
for suffix, port in get_server_suffixes(http_port):
server = f"{server_prefix}{suffix}_{slot}"
haproxy_cmd_checked(f"set server {backend}/{server} addr {ip} port {port}")
haproxy_cmd_checked(f"set server {backend}/{server} state ready")
results.append(f"{server}{ip}:{port}")
return f"Added to {domain} ({backend}) slot {slot}:\n" + "\n".join(results)
except HaproxyError as e:
# Rollback config on HAProxy failure
remove_server_from_config(domain, slot)
return f"Error: {e}"
except (ValueError, IOError) as e:
return f"Error: {e}" return f"Error: {e}"