Add time-aware traffic logger and auto-retrain system

- Log traffic features with timestamps to CSV every 5s
- Add hour_sin/hour_cos time features (15 → 17 feature vector)
- Auto-retrain from traffic log at configurable interval (default 24h)
- Detect old 15-feature models and switch to learning mode
- SIGUSR1 now retrains from traffic log first, falls back to collect mode
- Add CLI: `ai traffic` (time-bucketed summary), `ai log` (recent entries)
- Add config keys: traffic_log_file, retention_days, retrain_window

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
kaffa
2026-02-07 10:14:07 +09:00
parent 667c6eac81
commit 11c1ab0134
3 changed files with 337 additions and 10 deletions

View File

@@ -801,6 +801,157 @@ cmd_ai_retrain() {
fi
}
cmd_ai_traffic() {
local log_file
log_file=$(python3 -c "
import yaml
with open('$CONFIG_FILE') as f:
cfg = yaml.safe_load(f)
print(cfg.get('ai',{}).get('traffic_log_file', '/var/lib/xdp-defense/traffic_log.csv'))
" 2>/dev/null || echo "/var/lib/xdp-defense/traffic_log.csv")
[ ! -f "$log_file" ] && { log_err "Traffic log not found: $log_file"; exit 1; }
python3 -c "
import csv, sys
from datetime import datetime, timedelta
log_file = sys.argv[1]
cutoff = datetime.now() - timedelta(hours=24)
# Buckets: 0-6, 6-12, 12-18, 18-24
buckets = {0: [], 1: [], 2: [], 3: []}
total_samples = 0
with open(log_file, 'r') as f:
reader = csv.reader(f)
header = next(reader, None)
if header is None:
print('Traffic log is empty')
sys.exit(0)
for row in reader:
try:
ts = datetime.fromisoformat(row[0])
if ts < cutoff:
continue
hour = float(row[1])
bucket = min(int(hour // 6), 3)
# features: row[2]=hour_sin, row[3]=hour_cos, row[4]=total_packets, row[5]=total_bytes, ...
pps = float(row[4])
bps = float(row[5])
buckets[bucket].append((pps, bps))
total_samples += 1
except (ValueError, IndexError):
continue
labels = ['00:00-06:00', '06:00-12:00', '12:00-18:00', '18:00-24:00']
print()
print('\033[1m=== Traffic Summary (last 24h) ===\033[0m')
print(f'{\"Period\":>15} {\"Avg PPS\":>10} {\"Peak PPS\":>10} {\"Avg BPS\":>12} {\"Samples\":>8}')
print(f'{\"-\"*15} {\"-\"*10} {\"-\"*10} {\"-\"*12} {\"-\"*8}')
for i, label in enumerate(labels):
data = buckets[i]
if not data:
print(f'{label:>15} {\"--\":>10} {\"--\":>10} {\"--\":>12} {0:>8}')
continue
pps_list = [d[0] for d in data]
bps_list = [d[1] for d in data]
avg_pps = sum(pps_list) / len(pps_list)
peak_pps = max(pps_list)
avg_bps = sum(bps_list) / len(bps_list)
def fmt_bytes(b):
if b >= 1024*1024:
return f'{b/1024/1024:.1f}MB'
elif b >= 1024:
return f'{b/1024:.1f}KB'
return f'{b:.0f}B'
print(f'{label:>15} {avg_pps:>10.0f} {peak_pps:>10.0f} {fmt_bytes(avg_bps):>12} {len(data):>8}')
hours = total_samples * 5 / 3600 # 5s intervals
print(f'Total: {total_samples} samples ({hours:.1f}h)')
# Show next retrain time
import yaml, os, time
try:
with open('$CONFIG_FILE') as f:
cfg = yaml.safe_load(f)
retrain_interval = cfg.get('ai',{}).get('retrain_interval', 86400)
model_file = cfg.get('ai',{}).get('model_file', '/var/lib/xdp-defense/ai_model.pkl')
if os.path.exists(model_file):
mtime = os.path.getmtime(model_file)
next_retrain = mtime + retrain_interval - time.time()
if next_retrain > 0:
h = int(next_retrain // 3600)
m = int((next_retrain % 3600) // 60)
print(f'Next retrain: {h}h {m}m')
else:
print('Next retrain: imminent')
else:
print('Next retrain: model not yet trained')
except:
pass
print()
" "$log_file"
}
cmd_ai_log() {
local n=${1:-20}
[[ "$n" =~ ^[0-9]+$ ]] || n=20
local log_file
log_file=$(python3 -c "
import yaml
with open('$CONFIG_FILE') as f:
cfg = yaml.safe_load(f)
print(cfg.get('ai',{}).get('traffic_log_file', '/var/lib/xdp-defense/traffic_log.csv'))
" 2>/dev/null || echo "/var/lib/xdp-defense/traffic_log.csv")
[ ! -f "$log_file" ] && { log_err "Traffic log not found: $log_file"; exit 1; }
python3 -c "
import csv, sys
log_file = sys.argv[1]
n = int(sys.argv[2])
rows = []
with open(log_file, 'r') as f:
reader = csv.reader(f)
header = next(reader, None)
if header is None:
print('Traffic log is empty')
sys.exit(0)
for row in reader:
rows.append(row)
# Show last N rows
display = rows[-n:]
print()
print('\033[1m=== Recent Traffic Log ===\033[0m')
print(f'{\"Timestamp\":>22} {\"Hour\":>6} {\"PPS\":>10} {\"Bytes\":>12} {\"SYN%\":>6} {\"UDP%\":>6} {\"ICMP%\":>6}')
print(f'{\"-\"*22} {\"-\"*6} {\"-\"*10} {\"-\"*12} {\"-\"*6} {\"-\"*6} {\"-\"*6}')
for row in display:
try:
ts = row[0][:19] # trim microseconds
hour = float(row[1])
pkts = float(row[4])
bts = float(row[5])
syn_r = float(row[14]) * 100 if len(row) > 14 else 0
udp_r = float(row[15]) * 100 if len(row) > 15 else 0
icmp_r = float(row[16]) * 100 if len(row) > 16 else 0
print(f'{ts:>22} {hour:>6.1f} {pkts:>10.0f} {bts:>12.0f} {syn_r:>5.1f}% {udp_r:>5.1f}% {icmp_r:>5.1f}%')
except (ValueError, IndexError):
continue
print(f'Showing {len(display)} of {len(rows)} entries')
print()
" "$log_file" "$n"
}
# ==================== GeoIP ====================
cmd_geoip() {
@@ -923,6 +1074,8 @@ DDoS:
AI:
ai status Show AI model status
ai retrain Trigger AI model retrain
ai traffic Show time-of-day traffic summary (last 24h)
ai log [N] Show recent N traffic log entries (default 20)
Daemon:
daemon start Start defense daemon (background)
@@ -1026,6 +1179,8 @@ case "${1:-help}" in
case "${2:-status}" in
status) cmd_ai_status ;;
retrain) cmd_ai_retrain ;;
traffic) cmd_ai_traffic ;;
log) cmd_ai_log "$3" ;;
*) cmd_ai_status ;;
esac
;;