Add infra-tool: infrastructure registry with Incus container deployment

Service registry & discovery system that aggregates infrastructure metadata
from Incus, K8s, APISIX, and BunnyCDN into NocoDB. Includes FastAPI HTTP API,
systemd timer for 15-min auto-sync, and dual-mode collectors (REST API for
container deployment, CLI/SSH fallback for local use). Deployed to jp1:infra-tool
with Tailscale socket proxy for host network visibility.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
kappa
2026-03-03 09:13:43 +09:00
commit 5e59261f63
20 changed files with 1962 additions and 0 deletions

0
collectors/__init__.py Normal file
View File

101
collectors/apisix.py Normal file
View File

@@ -0,0 +1,101 @@
"""Collect APISIX routes and upstreams via Admin API."""
from __future__ import annotations
import json
from datetime import datetime, timezone
import requests
import config
def _get(path: str) -> dict:
url = f"{config.APISIX_ADMIN_URL}/apisix/admin{path}"
resp = requests.get(url, headers={"X-API-KEY": config.apisix_admin_key()})
resp.raise_for_status()
return resp.json()
def _upstream_map() -> dict[str, dict]:
"""Map upstream ID → upstream object."""
data = _get("/upstreams")
result = {}
for item in data.get("list", []):
val = item.get("value", item)
uid = str(val.get("id", ""))
result[uid] = val
return result
def collect_routes() -> list[dict]:
"""Return list of route records for NocoDB infra_routes."""
data = _get("/routes")
upstreams = _upstream_map()
now = datetime.now(timezone.utc).isoformat()
results = []
for item in data.get("list", []):
val = item.get("value", item)
route_id = str(val.get("id", ""))
name = val.get("name", route_id)
hosts = val.get("hosts", val.get("host", []))
if isinstance(hosts, str):
hosts = [hosts]
host = ", ".join(hosts) if hosts else ""
uri = val.get("uri", val.get("uris", ""))
if isinstance(uri, list):
uri = ", ".join(uri)
# resolve upstream nodes
nodes = {}
inline_upstream = val.get("upstream", {})
if inline_upstream:
nodes = inline_upstream.get("nodes", {})
elif val.get("upstream_id"):
up = upstreams.get(str(val["upstream_id"]), {})
nodes = up.get("nodes", {})
if isinstance(nodes, list):
nodes = {f'{n["host"]}:{n.get("port",80)}': n.get("weight", 1) for n in nodes}
plugins = list(val.get("plugins", {}).keys())
results.append({
"Title": name or route_id,
"host": host,
"uri": uri,
"upstream_nodes": json.dumps(nodes) if nodes else "",
"plugins": ", ".join(plugins),
"last_synced": now,
})
return results
def collect_services() -> list[dict]:
"""Derive infra_services entries from APISIX routes (gateway layer)."""
routes = collect_routes()
now = datetime.now(timezone.utc).isoformat()
services = []
for r in routes:
host = r["host"]
if not host:
continue
first_host = host.split(",")[0].strip()
nodes_str = r["upstream_nodes"]
services.append({
"Title": first_host,
"display_name": r["Title"],
"domain": first_host,
"source": "apisix",
"layer": "gateway",
"status": "up",
"upstream_ip": nodes_str,
"cluster": "apisix-osaka",
"last_seen": now,
})
return services

64
collectors/bunnycdn.py Normal file
View File

@@ -0,0 +1,64 @@
"""Collect BunnyCDN pull zones via REST API."""
from __future__ import annotations
import json
from datetime import datetime, timezone
import requests
import config
def _get(path: str) -> list | dict:
url = f"https://api.bunny.net{path}"
resp = requests.get(url, headers={"AccessKey": config.bunny_api_key()})
resp.raise_for_status()
return resp.json()
def collect_zones() -> list[dict]:
"""Return list of CDN zone records for NocoDB infra_cdn_zones."""
zones = _get("/pullzone")
now = datetime.now(timezone.utc).isoformat()
results = []
for z in zones:
name = z.get("Name", "")
zone_id = z.get("Id", 0)
origin = z.get("OriginUrl", "")
hostnames = [h.get("Value", "") for h in z.get("Hostnames", [])]
results.append({
"Title": name,
"zone_id": zone_id,
"origin_url": origin,
"hostnames": json.dumps(hostnames),
"last_synced": now,
})
return results
def collect_services() -> list[dict]:
"""Derive infra_services entries from BunnyCDN zones (cdn layer)."""
zones = collect_zones()
now = datetime.now(timezone.utc).isoformat()
services = []
for z in zones:
hostnames = json.loads(z["hostnames"]) if z["hostnames"] else []
for hostname in hostnames:
if not hostname or hostname.endswith(".b-cdn.net"):
continue
services.append({
"Title": f"cdn:{hostname}",
"display_name": f"CDN {z['Title']}",
"domain": hostname,
"source": "bunnycdn",
"layer": "cdn",
"status": "up",
"upstream_ip": z["origin_url"],
"cluster": "bunnycdn",
"last_seen": now,
})
return services

120
collectors/incus.py Normal file
View File

@@ -0,0 +1,120 @@
"""Collect Incus containers via REST API or local CLI (auto-detected)."""
from __future__ import annotations
import json
import subprocess
from datetime import datetime, timezone
import requests
import urllib3
import config
# Suppress InsecureRequestWarning when using self-signed Incus certs.
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# Used by CLI mode only; REST mode reads from config.INCUS_REMOTES.
REMOTES = {
"jp1": ["monitoring", "db", "default"],
"kr1": ["default", "inbest", "karakeep", "security"],
}
def _extract_ipv4(instance: dict) -> str:
"""Return the first global IPv4 address found in instance network state."""
net = (instance.get("state") or {}).get("network") or {}
for iface in net.values():
for addr in iface.get("addresses", []):
if addr.get("family") == "inet" and addr.get("scope") == "global":
return addr["address"]
return ""
def _collect_rest() -> list[dict]:
"""Collect containers from all Incus remotes via REST API with TLS client certs."""
results: list[dict] = []
now = datetime.now(timezone.utc).isoformat()
cert = (config.incus_cert(), config.incus_key())
for remote, remote_cfg in config.INCUS_REMOTES.items():
base_url = remote_cfg["url"]
for project in remote_cfg["projects"]:
url = f"{base_url}/1.0/instances?project={project}&recursion=1"
try:
resp = requests.get(url, cert=cert, verify=False, timeout=30)
if not resp.ok:
print(
f"[incus] REST request failed for {remote}/{project}: "
f"{resp.status_code} {resp.status_text if hasattr(resp, 'status_text') else resp.reason}"
)
continue
data = resp.json()
containers = data.get("metadata") or []
except requests.RequestException as exc:
print(f"[incus] REST request error for {remote}/{project}: {exc}")
continue
except (ValueError, KeyError) as exc:
print(f"[incus] REST response parse error for {remote}/{project}: {exc}")
continue
for c in containers:
name = c.get("name", "")
results.append({
"Title": f"{remote}/{project}/{name}",
"name": name,
"remote": remote,
"project": project,
"status": c.get("status", "Unknown"),
"ipv4": _extract_ipv4(c),
"last_synced": now,
})
return results
def _collect_cli() -> list[dict]:
"""Collect containers from all Incus remotes via local CLI subprocess."""
results: list[dict] = []
now = datetime.now(timezone.utc).isoformat()
for remote, projects in REMOTES.items():
for project in projects:
try:
out = subprocess.run(
["incus", "list", f"{remote}:", f"--project={project}", "--format=json"],
capture_output=True, text=True, timeout=30,
)
if out.returncode != 0:
continue
containers = json.loads(out.stdout)
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
continue
for c in containers:
name = c.get("name", "")
results.append({
"Title": f"{remote}/{project}/{name}",
"name": name,
"remote": remote,
"project": project,
"status": c.get("status", "Unknown"),
"ipv4": _extract_ipv4(c),
"last_synced": now,
})
return results
def collect() -> list[dict]:
"""Return list of container records for NocoDB infra_containers.
Uses REST API when TLS client certificates are available,
falls back to local CLI otherwise.
"""
if config.incus_certs_available():
print("[incus] Using REST API mode")
return _collect_rest()
print("[incus] Using CLI mode (certs not available)")
return _collect_cli()

165
collectors/k8s.py Normal file
View File

@@ -0,0 +1,165 @@
"""Collect K8s services via K8s REST API or SSH → incus exec → kubectl.
Mode selection (auto-detected at call time):
- K8s REST API: config.K8S_API_SERVER is set (non-empty)
- SSH fallback: config.K8S_API_SERVER is empty (original behavior)
"""
from __future__ import annotations
import json
import subprocess
from datetime import datetime, timezone
import urllib3
import config
# ---------------------------------------------------------------------------
# Internal data fetchers
# ---------------------------------------------------------------------------
def _get_services_data() -> dict | None:
"""Return parsed JSON from `kubectl get svc -A -o json`, via API or SSH."""
if config.K8S_API_SERVER:
return _kubectl_json_api("/api/v1/services")
return _kubectl_json_ssh("kubectl get svc -A -o json")
def _get_pods_data() -> dict | None:
"""Return parsed JSON from `kubectl get pods -A -o json`, via API or SSH."""
if config.K8S_API_SERVER:
return _kubectl_json_api("/api/v1/pods")
return _kubectl_json_ssh("kubectl get pods -A -o json")
# ---------------------------------------------------------------------------
# Transport implementations
# ---------------------------------------------------------------------------
def _kubectl_json_api(path: str) -> dict | None:
"""Call the K8s REST API and return parsed JSON.
Uses Bearer token auth. TLS verification uses config.K8S_CA_CERT when
provided; disables verification (with suppressed warnings) otherwise.
"""
try:
import requests # imported lazily — only needed in API mode
except ImportError:
print("[k8s] 'requests' package not installed; cannot use K8s REST API mode")
return None
verify: str | bool = config.K8S_CA_CERT if config.K8S_CA_CERT else False
if not verify:
# Suppress InsecureRequestWarning for self-signed cluster certs
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
url = f"{config.K8S_API_SERVER.rstrip('/')}{path}"
token = config.k8s_token()
headers = {"Authorization": f"Bearer {token}"} if token else {}
try:
resp = requests.get(url, headers=headers, verify=verify, timeout=30)
if resp.status_code != 200:
print(f"[k8s] API returned {resp.status_code} for {path}")
return None
return resp.json()
except requests.exceptions.RequestException as exc:
print(f"[k8s] REST API request failed for {path}: {exc}")
return None
except ValueError as exc:
print(f"[k8s] Failed to parse JSON from {path}: {exc}")
return None
def _kubectl_json_ssh(
cmd: str,
host: str = "incus-jp1",
container: str = "k8s",
) -> dict | None:
"""Run kubectl via SSH → incus exec and return parsed JSON.
This is the original transport, preserved for backward compatibility.
"""
full_cmd = f"incus exec {container} -- {cmd}"
try:
out = subprocess.run(
["ssh", host, full_cmd],
capture_output=True,
text=True,
timeout=30,
)
if out.returncode != 0:
return None
return json.loads(out.stdout)
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
return None
# ---------------------------------------------------------------------------
# Public collectors
# ---------------------------------------------------------------------------
def collect_services() -> list[dict]:
"""Return list of K8s service records for NocoDB infra_services."""
data = _get_services_data()
if not data:
return []
now = datetime.now(timezone.utc).isoformat()
results = []
for item in data.get("items", []):
meta = item.get("metadata", {})
spec = item.get("spec", {})
name = meta.get("name", "")
ns = meta.get("namespace", "")
# skip kubernetes internal services
if name == "kubernetes" or ns == "kube-system":
continue
cluster_ip = spec.get("clusterIP", "")
ports = spec.get("ports", [])
upstream = (
f"{cluster_ip}:{ports[0]['port']}" if ports and cluster_ip else cluster_ip
)
results.append({
"Title": f"k8s:{ns}/{name}",
"display_name": name,
"domain": "",
"source": "k8s",
"layer": "backend",
"status": "up",
"upstream_ip": upstream,
"upstream_host": name,
"namespace": ns,
"cluster": "jp1/k8s",
"last_seen": now,
})
return results
def collect_pods_status() -> dict[str, str]:
"""Return {svc_name: phase} derived from pod names.
Pod suffix (the last two hyphen-separated segments) is stripped to
approximate the owning service name.
"""
data = _get_pods_data()
if not data:
return {}
result: dict[str, str] = {}
for pod in data.get("items", []):
name = pod.get("metadata", {}).get("name", "")
phase = pod.get("status", {}).get("phase", "Unknown")
# strip pod suffix to approximate svc name
base = "-".join(name.split("-")[:-2]) if name.count("-") >= 2 else name
if base:
result[base] = phase
return result