fix: HAProxy batch commands and improve routing/subdomain handling

- Fix haproxy_cmd_batch to send each command on separate connection (HAProxy Runtime API only processes first command on single connection) - HTTP frontend now routes to backends instead of redirecting to HTTPS - Add subdomain detection to avoid duplicate wildcard entries - Add reload verification with retry logic - Optimize SSL: TLS 1.3 ciphersuites, extended session lifetime - Add CPU steal monitoring script Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 00:55:24 +09:00
parent 95aecccb03
commit 46c86b62f2
5 changed files with 81 additions and 149 deletions
--- a/conf/haproxy.cfg
+++ b/conf/haproxy.cfg
@@ -9,14 +9,16 @@ global
    cpu-map auto:1/1-2 0-1
    tune.ssl.default-dh-param 2048
    tune.ssl.cachesize 100000
-    tune.ssl.lifetime 600
+    tune.ssl.lifetime 3600
    tune.bufsize 32768
    tune.maxrewrite 8192
    tune.http.maxhdr 128

    # SSL optimization
-    ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256
-    ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets
+    # ECDSA 우선 (RSA 대비 10배 빠름), AES-GCM 하드웨어 가속 활용
+    ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
+    ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384
+    ssl-default-bind-options ssl-min-ver TLSv1.2

    # Runtime API socket for dynamic updates
    stats socket /var/run/haproxy/haproxy.sock mode 666 level admin expose-fd listeners
@@ -48,14 +50,18 @@ frontend stats
    stats refresh 10s
    stats admin if TRUE

-# HTTP Frontend - redirect to HTTPS
+# HTTP Frontend - forward to backend (same as HTTPS)
 frontend http_front
    bind *:80
-    # ACME challenge for certbot
-    acl is_acme path_beg /.well-known/acme-challenge/
-    use_backend acme_backend if is_acme
-    # Redirect to HTTPS
-    http-request redirect scheme https unless is_acme
+    # ACME challenge for certbot (unused - using DNS-01)
+    # acl is_acme path_beg /.well-known/acme-challenge/
+    # use_backend acme_backend if is_acme
+    # http-request redirect scheme https unless is_acme
+
+    # Map-based dynamic routing (same as HTTPS)
+    use_backend %[req.hdr(host),lower,map_dom(/usr/local/etc/haproxy/domains.map)] if { req.hdr(host),lower,map_dom(/usr/local/etc/haproxy/domains.map) -m found }
+
+    default_backend default_backend

 # HTTPS Frontend
 frontend https_front
--- a/haproxy_mcp/haproxy_client.py
+++ b/haproxy_mcp/haproxy_client.py
@@ -106,10 +106,11 @@ def _check_response_for_errors(response: str) -> None:


 def haproxy_cmd_batch(commands: list[str]) -> list[str]:
-    """Send multiple commands to HAProxy in a single connection.
+    """Send multiple commands to HAProxy.

-    This is more efficient than multiple haproxy_cmd calls as it reuses
-    the same TCP connection for all commands.
+    Note: HAProxy Runtime API only processes the first command when multiple
+    commands are sent on a single connection. This function sends each command
+    on a separate connection to ensure all commands are executed.

    Args:
        commands: List of HAProxy commands to execute
@@ -126,64 +127,16 @@ def haproxy_cmd_batch(commands: list[str]) -> list[str]:
    if len(commands) == 1:
        return [haproxy_cmd_checked(commands[0])]

-    # Send all commands separated by newlines
-    combined = "\n".join(commands)
-    try:
-        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-            s.settimeout(SOCKET_TIMEOUT)
-            s.connect(HAPROXY_SOCKET)
-            s.sendall(f"{combined}\n".encode())
-            s.shutdown(socket.SHUT_WR)
+    # Send each command on separate connection (HAProxy limitation)
+    responses = []
+    for cmd in commands:
+        try:
+            resp = haproxy_cmd_checked(cmd)
+            responses.append(resp)
+        except HaproxyError:
+            raise

-            # Set socket to non-blocking for select-based recv loop
-            s.setblocking(False)
-            response = b""
-            start_time = time.time()
-
-            while True:
-                elapsed = time.time() - start_time
-                if elapsed >= SOCKET_RECV_TIMEOUT:
-                    raise HaproxyError(f"Response timeout after {SOCKET_RECV_TIMEOUT} seconds")
-
-                remaining = SOCKET_RECV_TIMEOUT - elapsed
-                ready, _, _ = select.select([s], [], [], min(remaining, 1.0))
-
-                if ready:
-                    data = s.recv(8192)
-                    if not data:
-                        break
-                    response += data
-                    if len(response) > MAX_RESPONSE_SIZE:
-                        raise HaproxyError(f"Response exceeded {MAX_RESPONSE_SIZE} bytes limit")
-
-            full_response = response.decode().strip()
-
-            # Split responses - HAProxy separates responses with empty lines
-            # For commands that return nothing, we get empty strings
-            responses = full_response.split("\n\n") if full_response else [""] * len(commands)
-
-            # If we got fewer responses than commands, pad with empty strings
-            while len(responses) < len(commands):
-                responses.append("")
-
-            # Check each response for errors
-            for i, resp in enumerate(responses):
-                resp = resp.strip()
-                _check_response_for_errors(resp)
-                responses[i] = resp
-
-            return responses
-
-    except socket.timeout:
-        raise HaproxyError("Connection timeout")
-    except ConnectionRefusedError:
-        raise HaproxyError("Connection refused - HAProxy not running?")
-    except UnicodeDecodeError:
-        raise HaproxyError("Invalid UTF-8 in response")
-    except HaproxyError:
-        raise
-    except Exception as e:
-        raise HaproxyError(str(e)) from e
+    return responses


 def reload_haproxy() -> tuple[bool, str]:
--- a/haproxy_mcp/tools/configuration.py
+++ b/haproxy_mcp/tools/configuration.py
@@ -1,6 +1,5 @@
 """Configuration management tools for HAProxy MCP Server."""

-import fcntl
 import subprocess
 import time

@@ -9,12 +8,9 @@ from ..config import (
    HAPROXY_CONTAINER,
    SUBPROCESS_TIMEOUT,
    STARTUP_RETRY_COUNT,
-    StateField,
-    STATE_MIN_COLUMNS,
    logger,
 )
 from ..exceptions import HaproxyError
-from ..validation import validate_ip, validate_port, validate_backend_name
 from ..haproxy_client import haproxy_cmd, haproxy_cmd_batch, reload_haproxy
 from ..file_ops import (
    atomic_write_file,
@@ -76,7 +72,7 @@ def restore_servers_from_config() -> int:
    if not commands:
        return 0

-    # Execute all commands in single batch
+    # Execute all commands
    try:
        haproxy_cmd_batch(commands)
        return len(server_info_list)
@@ -141,6 +137,20 @@ def register_config_tools(mcp):
        if not success:
            return msg

+        # Wait for HAProxy to fully reload (new process takes over)
+        # USR2 signal spawns new process but old one may still be serving
+        time.sleep(2)
+
+        # Verify HAProxy is responding
+        for _ in range(STARTUP_RETRY_COUNT):
+            try:
+                haproxy_cmd("show info")
+                break
+            except HaproxyError:
+                time.sleep(0.5)
+        else:
+            return "HAProxy reloaded but not responding after reload"
+
        # Restore servers from config after reload
        try:
            restored = restore_servers_from_config()
@@ -191,82 +201,17 @@ def register_config_tools(mcp):
    def haproxy_restore_state() -> str:
        """Restore server state from disk.

-        Uses batched commands for efficiency.
+        Reads server configuration from servers.json and restores to HAProxy.

        Returns:
            Summary of restored servers or error description
        """
        try:
-            with open(STATE_FILE, "r", encoding="utf-8") as f:
-                try:
-                    fcntl.flock(f.fileno(), fcntl.LOCK_SH)
-                except OSError:
-                    pass  # Continue without lock if not supported
-                try:
-                    state = f.read()
-                finally:
-                    try:
-                        fcntl.flock(f.fileno(), fcntl.LOCK_UN)
-                    except OSError:
-                        pass
-
-            # Build batch of all commands
-            commands: list[str] = []
-            server_info_list: list[tuple[str, str]] = []
-            skipped = 0
-
-            for line in state.split("\n"):
-                parts = line.split()
-                if len(parts) >= STATE_MIN_COLUMNS and not line.startswith("#"):
-                    backend = parts[StateField.BE_NAME]
-                    server = parts[StateField.SRV_NAME]
-                    addr = parts[StateField.SRV_ADDR]
-                    port = parts[StateField.SRV_PORT]
-
-                    # Skip disabled servers
-                    if addr == "0.0.0.0":
-                        continue
-
-                    # Validate names from state file to prevent injection
-                    if not validate_backend_name(backend) or not validate_backend_name(server):
-                        skipped += 1
-                        continue
-
-                    # Validate IP and port
-                    if not validate_ip(addr) or not validate_port(port):
-                        skipped += 1
-                        continue
-
-                    commands.append(f"set server {backend}/{server} addr {addr} port {port}")
-                    commands.append(f"set server {backend}/{server} state ready")
-                    server_info_list.append((backend, server))
-
-            if not commands:
-                result = "No servers to restore"
-                if skipped:
-                    result += f", {skipped} entries skipped due to validation"
-                return result
-
-            # Execute all commands in single batch
-            try:
-                haproxy_cmd_batch(commands)
-                restored = len(server_info_list)
-            except HaproxyError:
-                # Fallback: try individual pairs
-                restored = 0
-                for i in range(0, len(commands), 2):
-                    try:
-                        haproxy_cmd_batch([commands[i], commands[i + 1]])
-                        restored += 1
-                    except HaproxyError as e:
-                        backend, server = server_info_list[i // 2]
-                        logger.warning("Failed to restore %s/%s: %s", backend, server, e)
-
-            result = f"Server state restored ({restored} servers)"
-            if skipped:
-                result += f", {skipped} entries skipped due to validation"
-            return result
-        except FileNotFoundError:
-            return "Error: No saved state found"
+            restored = restore_servers_from_config()
+            if restored == 0:
+                return "No servers to restore"
+            return f"Server state restored ({restored} servers)"
        except HaproxyError as e:
            return f"Error: {e}"
+        except (OSError, ValueError) as e:
+            return f"Error: {e}"
--- a/haproxy_mcp/tools/domains.py
+++ b/haproxy_mcp/tools/domains.py
@@ -159,9 +159,13 @@ def register_domain_tools(mcp):

                # Find available pool (using cached entries)
                used_pools: set[str] = set()
-                for _, backend in entries:
+                registered_domains: set[str] = set()
+                for entry_domain, backend in entries:
                    if backend.startswith("pool_"):
                        used_pools.add(backend)
+                    # Collect non-wildcard domains for subdomain check
+                    if not entry_domain.startswith("."):
+                        registered_domains.add(entry_domain)

                pool = None
                for i in range(1, POOL_COUNT + 1):
@@ -172,10 +176,24 @@ def register_domain_tools(mcp):
                if not pool:
                    return f"Error: All {POOL_COUNT} pool backends are in use"

+                # Check if this is a subdomain of an existing domain
+                # e.g., vault.anvil.it.com is subdomain if anvil.it.com exists
+                is_subdomain = False
+                parent_domain = None
+                parts = domain.split(".")
+                for i in range(1, len(parts)):
+                    candidate = ".".join(parts[i:])
+                    if candidate in registered_domains:
+                        is_subdomain = True
+                        parent_domain = candidate
+                        break
+
                try:
                    # Save to disk first (atomic write for persistence)
                    entries.append((domain, pool))
-                    entries.append((f".{domain}", pool))
+                    # Only add wildcard for root domains, not subdomains
+                    if not is_subdomain:
+                        entries.append((f".{domain}", pool))
                    try:
                        save_map_file(entries)
                    except IOError as e:
@@ -184,7 +202,8 @@ def register_domain_tools(mcp):
                    # Then update HAProxy map via Runtime API
                    try:
                        haproxy_cmd(f"add map {MAP_FILE_CONTAINER} {domain} {pool}")
-                        haproxy_cmd(f"add map {MAP_FILE_CONTAINER} .{domain} {pool}")
+                        if not is_subdomain:
+                            haproxy_cmd(f"add map {MAP_FILE_CONTAINER} .{domain} {pool}")
                    except HaproxyError as e:
                        # Rollback: remove the domain we just added from entries and re-save
                        rollback_entries = [(d, b) for d, b in entries if d != domain and d != f".{domain}"]
@@ -209,8 +228,12 @@ def register_domain_tools(mcp):
                            return f"Domain {domain} added to {pool} but server config failed: {e}"

                        result = f"Domain {domain} added to {pool} with server {ip}:{http_port}"
+                        if is_subdomain:
+                            result += f" (subdomain of {parent_domain}, no wildcard)"
                    else:
                        result = f"Domain {domain} added to {pool} (no servers configured)"
+                        if is_subdomain:
+                            result += f" (subdomain of {parent_domain}, no wildcard)"

                    # Check certificate coverage
                    cert_covered, cert_info = check_certificate_coverage(domain)
--- a/scripts/check-steal.sh
+++ b/scripts/check-steal.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+STEAL=$(vmstat 1 2 | tail -1 | awk '{print $17}')
+if [ "$STEAL" -gt 5 ]; then
+    echo "$(date): CPU steal high: ${STEAL}%" >> /var/log/cpu-steal.log
+fi