Initial commit: cert-manager API server

FastAPI-based SSL certificate automation server.
- Google Public CA wildcard cert issuance via certbot
- Cloudflare DNS-01 challenge with auto EAB key generation
- APISIX multi-instance deployment with domain-instance mapping
- Vault integration for all secrets
- Bearer token auth, retry logic, Discord DM alerts
- Auto-renewal scheduler (daily 03:00 UTC)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
kappa
2026-02-28 17:39:14 +09:00
commit 1cd1f0cfc2
12 changed files with 782 additions and 0 deletions

0
app/__init__.py Normal file
View File

43
app/alert.py Normal file
View File

@@ -0,0 +1,43 @@
import logging
import httpx
logger = logging.getLogger(__name__)
DISCORD_API = "https://discord.com/api/v10"
async def send_discord_dm(bot_token: str, user_id: str, message: str) -> bool:
"""Discord DM으로 알림 전송."""
if not bot_token or not user_id:
logger.warning("Discord credentials not configured, skipping alert")
return False
headers = {
"Authorization": f"Bot {bot_token}",
"Content-Type": "application/json",
}
try:
async with httpx.AsyncClient(timeout=15) as client:
# DM 채널 생성
resp = await client.post(
f"{DISCORD_API}/users/@me/channels",
headers=headers,
json={"recipient_id": user_id},
)
resp.raise_for_status()
channel_id = resp.json()["id"]
# 메시지 전송
resp = await client.post(
f"{DISCORD_API}/channels/{channel_id}/messages",
headers=headers,
json={"content": message},
)
resp.raise_for_status()
logger.info("Discord alert sent to user %s", user_id)
return True
except Exception as e:
logger.error("Failed to send Discord alert: %s", e)
return False

71
app/apisix.py Normal file
View File

@@ -0,0 +1,71 @@
import hashlib
import logging
from pathlib import Path
import httpx
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
from .config import AppConfig, ApisixInstance
logger = logging.getLogger(__name__)
def _ssl_id(domain: str) -> str:
"""도메인 기반의 안정적인 SSL ID 생성."""
return hashlib.md5(domain.encode()).hexdigest()[:16]
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=2, max=10),
retry=retry_if_exception_type((httpx.ConnectError, httpx.TimeoutException)),
reraise=True,
)
async def _put_ssl(client: httpx.AsyncClient, url: str, payload: dict, headers: dict):
resp = await client.put(url, json=payload, headers=headers)
resp.raise_for_status()
return resp
async def deploy_certificate(
domain: str,
config: AppConfig,
instances: list[ApisixInstance] | None = None,
) -> list[dict]:
"""인증서를 APISIX 인스턴스들에 배포."""
targets = instances or config.apisix_instances
live_dir = Path(config.certbot_config_dir) / "live" / domain
cert_path = live_dir / "fullchain.pem"
key_path = live_dir / "privkey.pem"
if not cert_path.exists() or not key_path.exists():
return [{"instance": t.name, "success": False, "error": "Certificate files not found"} for t in targets]
cert_pem = cert_path.read_text()
key_pem = key_path.read_text()
ssl_id = _ssl_id(domain)
payload = {
"cert": cert_pem,
"key": key_pem,
"snis": [f"*.{domain}", domain],
}
results = []
async with httpx.AsyncClient(timeout=30) as client:
for inst in targets:
try:
await _put_ssl(
client,
f"{inst.admin_url}/apisix/admin/ssls/{ssl_id}",
payload,
{"X-API-KEY": inst.admin_key},
)
logger.info("Deployed %s to %s", domain, inst.name)
results.append({"instance": inst.name, "success": True})
except Exception as e:
logger.error("Failed to deploy %s to %s: %s", domain, inst.name, e)
results.append({"instance": inst.name, "success": False, "error": str(e)})
return results

129
app/certbot.py Normal file
View File

@@ -0,0 +1,129 @@
import asyncio
import logging
import os
import shutil
import subprocess
import tempfile
from datetime import datetime, timezone
from pathlib import Path
from .config import AppConfig
from .google_eab import create_eab_key
logger = logging.getLogger(__name__)
async def issue_certificate(domain: str, config: AppConfig) -> dict:
"""certbot으로 와일드카드 인증서 발급. 도메인별 디렉토리로 lock 분리."""
# EAB 키 자동 생성
try:
eab = create_eab_key(config.gcp_service_account_json, config.gcp_project)
except Exception as e:
logger.error("Failed to create EAB key: %s", e)
return {"domain": domain, "success": False, "error": f"EAB key creation failed: {e}"}
# 도메인별 config/work/logs 디렉토리 (lock 완전 분리)
config_dir = f"{config.certbot_config_dir}/{domain}"
work_dir = f"{config.certbot_work_dir}/{domain}"
logs_dir = f"{config.certbot_logs_dir}/{domain}"
os.makedirs(config_dir, exist_ok=True)
os.makedirs(work_dir, exist_ok=True)
os.makedirs(logs_dir, exist_ok=True)
# Cloudflare credentials 임시 파일
with tempfile.NamedTemporaryFile(mode="w", suffix=".ini", delete=False) as f:
f.write(f"dns_cloudflare_api_token = {config.cloudflare_api_token}\n")
credentials_path = f.name
os.chmod(credentials_path, 0o600)
try:
cmd = [
"certbot", "certonly",
"--dns-cloudflare",
"--dns-cloudflare-credentials", credentials_path,
"--dns-cloudflare-propagation-seconds", str(config.dns_propagation_seconds),
"--server", config.google_acme_server,
"-d", f"*.{domain}",
"-d", domain,
"--email", config.certbot_email,
"--eab-kid", eab["key_id"],
"--eab-hmac-key", eab["b64_mac_key"],
"--agree-tos",
"--non-interactive",
"--config-dir", config_dir,
"--work-dir", work_dir,
"--logs-dir", logs_dir,
]
logger.info("Running certbot for %s", domain)
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
error_msg = stderr.decode().strip() or stdout.decode().strip()
logger.error("certbot failed for %s: %s", domain, error_msg)
return {"domain": domain, "success": False, "error": error_msg}
# 인증서를 공용 디렉토리로 복사
src = Path(config_dir) / "live" / domain
dst = Path(config.certbot_config_dir) / "live" / domain
if src.exists():
dst.mkdir(parents=True, exist_ok=True)
for f in ("fullchain.pem", "privkey.pem", "chain.pem", "cert.pem"):
src_file = src / f
if src_file.exists():
# symlink를 따라가서 실제 파일 복사
shutil.copy2(str(src_file.resolve()), str(dst / f))
logger.info("Certificate issued for %s", domain)
return {"domain": domain, "success": True}
finally:
os.unlink(credentials_path)
def get_certificate_info(domain: str, config: AppConfig) -> dict | None:
"""발급된 인증서의 경로와 만료일 반환."""
live_dir = Path(config.certbot_config_dir) / "live" / domain
cert_path = live_dir / "fullchain.pem"
key_path = live_dir / "privkey.pem"
if not cert_path.exists() or not key_path.exists():
return None
result = subprocess.run(
["openssl", "x509", "-enddate", "-noout", "-in", str(cert_path)],
capture_output=True, text=True,
)
if result.returncode != 0:
return None
line = result.stdout.strip()
date_str = line.split("=", 1)[1]
expiry = datetime.strptime(date_str, "%b %d %H:%M:%S %Y %Z").replace(tzinfo=timezone.utc)
return {
"domain": domain,
"cert_path": str(cert_path),
"key_path": str(key_path),
"expiry": expiry.isoformat(),
"days_remaining": (expiry - datetime.now(timezone.utc)).days,
}
def list_certificates(config: AppConfig) -> list[dict]:
"""발급된 모든 인증서 목록 반환."""
live_dir = Path(config.certbot_config_dir) / "live"
if not live_dir.exists():
return []
certs = []
for entry in sorted(live_dir.iterdir()):
if entry.is_dir() and not entry.name.startswith("."):
info = get_certificate_info(entry.name, config)
if info:
certs.append(info)
return certs

37
app/cloudflare.py Normal file
View File

@@ -0,0 +1,37 @@
import httpx
from .config import AppConfig
async def list_domains(config: AppConfig) -> list[dict]:
"""Cloudflare API로 zone 목록 조회."""
headers = {
"Authorization": f"Bearer {config.cloudflare_api_token}",
"Content-Type": "application/json",
}
zones = []
page = 1
async with httpx.AsyncClient(timeout=30) as client:
while True:
resp = await client.get(
"https://api.cloudflare.com/client/v4/zones",
headers=headers,
params={"page": page, "per_page": 50},
)
resp.raise_for_status()
data = resp.json()
for zone in data["result"]:
zones.append({
"id": zone["id"],
"name": zone["name"],
"status": zone["status"],
})
info = data.get("result_info", {})
if page >= info.get("total_pages", 1):
break
page += 1
return zones

144
app/config.py Normal file
View File

@@ -0,0 +1,144 @@
import json
import logging
import os
import re
from dataclasses import dataclass, field
import httpx
logger = logging.getLogger(__name__)
@dataclass
class ApisixInstance:
name: str
admin_url: str
admin_key: str
@dataclass
class AppConfig:
cloudflare_api_token: str
google_acme_server: str
certbot_email: str
dns_propagation_seconds: int
gcp_project: str
gcp_service_account_json: str
apisix_instances: list[ApisixInstance]
api_token: str = ""
discord_bot_token: str = ""
discord_alert_user_id: str = ""
domain_instance_map: dict[str, list[str]] = field(default_factory=dict)
certbot_config_dir: str = "/data/certbot/config"
certbot_work_dir: str = "/data/certbot/work"
certbot_logs_dir: str = "/data/certbot/logs"
# --- Domain validation ---
_DOMAIN_RE = re.compile(
r"^(?:\*\.)?(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}$"
)
def validate_domain(domain: str) -> str | None:
"""도메인 유효성 검사. 유효하면 None, 아니면 에러 메시지."""
if not domain or len(domain) > 253:
return "Invalid domain length"
if ".." in domain or "/" in domain or "\\" in domain:
return "Invalid characters in domain"
if not _DOMAIN_RE.match(domain):
return "Invalid domain format"
return None
# --- Vault ---
def _vault_read(path: str) -> dict | None:
"""Vault KV v2에서 시크릿 읽기. 실패 시 None 반환."""
addr = os.environ.get("VAULT_ADDR", "")
token = os.environ.get("VAULT_TOKEN", "")
if not addr or not token:
return None
try:
resp = httpx.get(
f"{addr}/v1/secret/data/{path}",
headers={"X-Vault-Token": token},
timeout=10,
)
if resp.status_code == 200:
return resp.json()["data"]["data"]
if resp.status_code == 403:
logger.error("Vault token expired or invalid for %s (403)", path)
except Exception as e:
logger.warning("Vault read failed for %s: %s", path, e)
return None
def load_config(path: str = "/data/config/config.json") -> AppConfig:
with open(path) as f:
raw = json.load(f)
# --- Vault에서 시크릿 로드 ---
vault_cf = _vault_read("cloudflare")
vault_apisix = _vault_read("infra/apisix")
vault_sa = _vault_read("google/ca/service-account")
vault_cm = _vault_read("infra/cert-manager")
vault_discord = _vault_read("discord/bot")
# Cloudflare token: Vault → config → env
cf_token = (
(vault_cf or {}).get("api_token")
or raw.get("cloudflare_api_token")
or os.environ.get("CLOUDFLARE_API_TOKEN", "")
)
# GCP service account: Vault → config(파일경로 또는 JSON문자열)
sa_json = (vault_sa or {}).get("service_account_json", "")
if not sa_json:
sa_json = raw.get("gcp_service_account_json", "")
if sa_json and not sa_json.startswith("{"):
with open(sa_json) as f:
sa_json = f.read()
# APISIX instances: 통일 admin_key를 Vault에서 로드
apisix_admin_key = (vault_apisix or {}).get("admin_key", "")
instances = []
for inst in raw.get("apisix_instances", []):
instances.append(ApisixInstance(
name=inst["name"],
admin_url=inst["admin_url"],
admin_key=apisix_admin_key or inst.get("admin_key", ""),
))
# API token: Vault → config → env
api_token = (
(vault_cm or {}).get("api_token")
or raw.get("api_token")
or os.environ.get("CERT_MANAGER_API_TOKEN", "")
)
# Discord: Vault → config
discord_bot_token = (vault_discord or {}).get("bot_token") or raw.get("discord_bot_token", "")
discord_alert_user_id = (vault_discord or {}).get("alert_user_id") or raw.get("discord_alert_user_id", "")
# Domain-instance mapping
domain_instance_map = raw.get("domain_instance_map", {})
vault_status = "connected" if vault_cf else "unavailable, using fallback"
logger.info("Config loaded (vault: %s)", vault_status)
return AppConfig(
cloudflare_api_token=cf_token,
google_acme_server=raw.get("google_acme_server", "https://dv.acme-v02.api.pki.goog/directory"),
certbot_email=raw.get("certbot_email", ""),
dns_propagation_seconds=raw.get("dns_propagation_seconds", 30),
gcp_project=raw.get("gcp_project", ""),
gcp_service_account_json=sa_json,
apisix_instances=instances,
api_token=api_token,
discord_bot_token=discord_bot_token,
discord_alert_user_id=discord_alert_user_id,
domain_instance_map=domain_instance_map,
)

52
app/google_eab.py Normal file
View File

@@ -0,0 +1,52 @@
import json
import logging
import httpx
from google.auth.transport.requests import Request
from google.oauth2 import service_account
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
logger = logging.getLogger(__name__)
SCOPES = ["https://www.googleapis.com/auth/cloud-platform"]
PUBLIC_CA_API = "https://publicca.googleapis.com/v1"
def _get_credentials(sa_json: str) -> service_account.Credentials:
info = json.loads(sa_json)
return service_account.Credentials.from_service_account_info(info, scopes=SCOPES)
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=2, max=10),
retry=retry_if_exception_type((httpx.ConnectError, httpx.TimeoutException)),
reraise=True,
)
def _request_eab_key(token: str, project: str) -> dict:
url = f"{PUBLIC_CA_API}/projects/{project}/locations/global/externalAccountKeys"
response = httpx.post(
url,
headers={
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
},
json={},
timeout=30,
)
response.raise_for_status()
return response.json()
def create_eab_key(sa_json: str, project: str) -> dict:
"""Google Public CA API로 새 EAB 키 생성."""
creds = _get_credentials(sa_json)
creds.refresh(Request())
data = _request_eab_key(creds.token, project)
logger.info("Created new EAB key: %s", data.get("keyId"))
return {
"key_id": data["keyId"],
"b64_mac_key": data["b64MacKey"],
}

238
app/main.py Normal file
View File

@@ -0,0 +1,238 @@
import asyncio
import logging
from contextlib import asynccontextmanager
from datetime import datetime, timezone
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from fastapi import FastAPI, HTTPException, Request, Security
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from pydantic import BaseModel
from . import apisix, certbot, cloudflare
from .alert import send_discord_dm
from .config import AppConfig, load_config, validate_domain
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
)
logger = logging.getLogger(__name__)
config: AppConfig = None # type: ignore
scheduler = AsyncIOScheduler()
security = HTTPBearer()
# --- Auth ---
async def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)) -> str:
if not config.api_token:
raise HTTPException(status_code=500, detail="API token not configured")
if credentials.credentials != config.api_token:
raise HTTPException(status_code=401, detail="Invalid token")
return credentials.credentials
# --- Request/Response models ---
class DomainRequest(BaseModel):
domain: str
instances: list[str] | None = None
class SyncResponse(BaseModel):
results: list[dict]
# --- Alert helper ---
async def _alert(message: str):
"""Discord DM으로 알림. 실패해도 무시."""
try:
await send_discord_dm(config.discord_bot_token, config.discord_alert_user_id, message)
except Exception as e:
logger.warning("Alert send failed: %s", e)
# --- Scheduled task ---
async def auto_renew():
"""만료 30일 이내 인증서 자동 갱신 + APISIX 재배포."""
global config
# 설정 리로드 (Vault 토큰 만료 대응)
try:
config = load_config()
except Exception as e:
logger.error("Config reload failed: %s", e)
await _alert(f"[cert-manager] Config reload failed: {e}")
return
logger.info("Starting auto-renewal check")
certs = certbot.list_certificates(config)
now = datetime.now(timezone.utc)
failures = []
for cert in certs:
if cert["days_remaining"] <= 30:
domain = cert["domain"]
logger.info("Renewing %s (expires in %d days)", domain, cert["days_remaining"])
result = await certbot.issue_certificate(domain, config)
if result["success"]:
# domain_instance_map에 따라 대상 인스턴스 결정
target_instances = _resolve_instances(domain)
await apisix.deploy_certificate(domain, config, instances=target_instances)
else:
error = result.get("error", "Unknown")
logger.error("Renewal failed for %s: %s", domain, error)
failures.append(f"{domain}: {error}")
if failures:
msg = "[cert-manager] Renewal failures:\n" + "\n".join(failures)
await _alert(msg)
logger.info("Auto-renewal check completed")
def _resolve_instances(domain: str) -> list | None:
"""domain_instance_map에서 도메인에 맞는 APISIX 인스턴스 목록 반환."""
if not config.domain_instance_map:
return None # 매핑 없으면 전체
for pattern, instance_names in config.domain_instance_map.items():
if domain == pattern or domain.endswith(f".{pattern.lstrip('*.')}"):
matched = [i for i in config.apisix_instances if i.name in instance_names]
if matched:
return matched
return None # 매핑에 없으면 전체
# --- Lifespan ---
@asynccontextmanager
async def lifespan(app: FastAPI):
global config
config = load_config()
scheduler.add_job(auto_renew, CronTrigger(hour=3, minute=0), id="auto_renew")
scheduler.start()
logger.info("Scheduler started")
yield
scheduler.shutdown()
app = FastAPI(title="cert-manager", lifespan=lifespan)
# --- Endpoints ---
@app.get("/health")
async def health():
return {"status": "ok", "timestamp": datetime.now(timezone.utc).isoformat()}
@app.get("/domains")
async def get_domains(token: str = Security(verify_token)):
try:
domains = await cloudflare.list_domains(config)
return {"domains": domains, "count": len(domains)}
except Exception as e:
raise HTTPException(status_code=502, detail=str(e))
@app.get("/certificates")
async def get_certificates(token: str = Security(verify_token)):
certs = certbot.list_certificates(config)
return {"certificates": certs, "count": len(certs)}
@app.get("/certificates/{domain}")
async def get_certificate_detail(domain: str, token: str = Security(verify_token)):
"""특정 도메인의 인증서 정보 + PEM 내용 반환."""
error = validate_domain(domain)
if error:
raise HTTPException(status_code=400, detail=error)
info = certbot.get_certificate_info(domain, config)
if not info:
raise HTTPException(status_code=404, detail=f"Certificate not found for {domain}")
from pathlib import Path
cert_pem = Path(info["cert_path"]).read_text()
key_pem = Path(info["key_path"]).read_text()
return {
**info,
"cert": cert_pem,
"key": key_pem,
}
@app.post("/certificates/issue")
async def issue_certificate(req: DomainRequest, token: str = Security(verify_token)):
error = validate_domain(req.domain)
if error:
raise HTTPException(status_code=400, detail=error)
result = await certbot.issue_certificate(req.domain, config)
if not result["success"]:
raise HTTPException(status_code=500, detail=result.get("error", "Unknown error"))
return result
@app.post("/certificates/deploy")
async def deploy_certificate(req: DomainRequest, token: str = Security(verify_token)):
error = validate_domain(req.domain)
if error:
raise HTTPException(status_code=400, detail=error)
targets = None
if req.instances:
targets = [i for i in config.apisix_instances if i.name in req.instances]
if not targets:
raise HTTPException(status_code=400, detail=f"Unknown instances: {req.instances}")
results = await apisix.deploy_certificate(req.domain, config, instances=targets)
failures = [r for r in results if not r["success"]]
if failures and len(failures) == len(results):
raise HTTPException(status_code=500, detail="All deployments failed")
return {"domain": req.domain, "results": results}
async def _sync_one(domain: str) -> dict:
"""단일 도메인 발급 + 배포."""
logger.info("Syncing %s", domain)
issue_result = await certbot.issue_certificate(domain, config)
if not issue_result["success"]:
return {"domain": domain, "issue": issue_result, "deploy": None}
target_instances = _resolve_instances(domain)
deploy_results = await apisix.deploy_certificate(domain, config, instances=target_instances)
return {"domain": domain, "issue": issue_result, "deploy": deploy_results}
@app.post("/certificates/sync")
async def sync_all(token: str = Security(verify_token)):
"""전체 도메인 조회 → 3개씩 병렬 발급+배포."""
domains = await cloudflare.list_domains(config)
sem = asyncio.Semaphore(3)
async def _limited(domain: str):
async with sem:
return await _sync_one(domain)
results = await asyncio.gather(
*[_limited(z["name"]) for z in domains],
return_exceptions=True,
)
# 예외를 dict로 변환
final = []
failures = []
for i, r in enumerate(results):
if isinstance(r, Exception):
domain = domains[i]["name"]
final.append({"domain": domain, "error": str(r)})
failures.append(f"{domain}: {r}")
else:
final.append(r)
if failures:
await _alert("[cert-manager] Sync failures:\n" + "\n".join(failures))
return {"results": final, "total": len(final)}