fix: Improve consistency and add rollback support

1. haproxy_add_servers - disk-first pattern
   - Save ALL servers to config FIRST
   - Then update HAProxy
   - Rollback all on unexpected error
   - Rollback failed slots individually

2. remove_domain_from_config - file locking
   - Add fcntl.LOCK_EX for consistency with other config ops
   - Prevents race conditions during concurrent access

3. haproxy_add_domain - rollback on HAProxy failure
   - Wrap HAProxy map update in try/except
   - Rollback map file if HAProxy command fails
   - Rollback server config if server setup fails

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
kaffa
2026-02-01 14:03:31 +00:00
parent ab5b4aa648
commit 18d0126b15

View File

@@ -561,15 +561,21 @@ def remove_server_from_config(domain: str, slot: int) -> None:
def remove_domain_from_config(domain: str) -> None:
"""Remove all server configurations for a domain.
"""Remove domain from persistent config with file locking.
Args:
domain: Domain name to remove
"""
lock_path = f"{SERVERS_FILE}.lock"
with open(lock_path, 'w') as lock_file:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
try:
config = load_servers_config()
if domain in config:
del config[domain]
save_servers_config(config)
finally:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
def get_server_suffixes(http_port: int) -> List[Tuple[str, int]]:
@@ -788,28 +794,43 @@ def haproxy_add_domain(domain: str, ip: str = "", http_port: int = 80) -> str:
entries = get_map_contents()
entries.append((domain, pool))
entries.append((f".{domain}", pool))
try:
save_map_file(entries)
except IOError as e:
return f"Error: Failed to save map file: {e}"
# Update HAProxy map via Runtime API (immediate effect)
# Then update HAProxy map via Runtime API
try:
haproxy_cmd(f"add map {MAP_FILE_CONTAINER} {domain} {pool}")
haproxy_cmd(f"add map {MAP_FILE_CONTAINER} .{domain} {pool}")
except HaproxyError as e:
# Rollback: remove the domain we just added from entries and re-save
rollback_entries = [(d, b) for d, b in entries if d != domain and d != f".{domain}"]
try:
save_map_file(rollback_entries)
except IOError:
logger.error("Failed to rollback map file after HAProxy error")
return f"Error: Failed to update HAProxy map: {e}"
# If IP provided, add server to slot 1
if ip:
# Save server config to disk first
add_server_to_config(domain, 1, ip, http_port)
try:
for suffix, port in get_server_suffixes(http_port):
server = f"{pool}{suffix}_1"
haproxy_cmd(f"set server {pool}/{server} addr {ip} port {port}")
haproxy_cmd(f"set server {pool}/{server} state ready")
except HaproxyError as e:
# Rollback server config on failure
remove_server_from_config(domain, 1)
return f"Domain {domain} added to {pool} but server config failed: {e}"
return f"Domain {domain} added to {pool} with server {ip}:{http_port}"
return f"Domain {domain} added to {pool} (no servers configured)"
except IOError as e:
return f"Error: Failed to update map file: {e}"
except HaproxyError as e:
return f"Error: {e}"
@@ -1072,22 +1093,38 @@ def haproxy_add_servers(domain: str, servers: str) -> str:
except ValueError as e:
return f"Error: {e}"
# Add all servers
# Save ALL servers to config FIRST (disk-first pattern)
for server_config in validated_servers:
slot = server_config["slot"]
ip = server_config["ip"]
http_port = server_config["http_port"]
add_server_to_config(domain, slot, ip, http_port)
# Then update HAProxy
added = []
errors = []
for srv in validated_servers:
slot = srv["slot"]
ip = srv["ip"]
http_port = srv["http_port"]
failed_slots = []
try:
server = configure_server_slot(backend, server_prefix, slot, ip, http_port)
# Save to persistent config
add_server_to_config(domain, slot, ip, http_port)
for server_config in validated_servers:
slot = server_config["slot"]
ip = server_config["ip"]
http_port = server_config["http_port"]
try:
configure_server_slot(backend, server_prefix, slot, ip, http_port)
added.append(f"slot {slot}: {ip}:{http_port}")
except (HaproxyError, IOError) as e:
except HaproxyError as e:
failed_slots.append(slot)
errors.append(f"slot {slot}: {e}")
except Exception as e:
# Rollback all saved configs on unexpected error
for server_config in validated_servers:
remove_server_from_config(domain, server_config["slot"])
return f"Error: {e}"
# Rollback failed slots from config
for slot in failed_slots:
remove_server_from_config(domain, slot)
# Build result message
result_parts = []