Replace CSV traffic log with SQLite for better performance

- traffic_log.csv → traffic_log.db (SQLite with indexed timestamp)
- INSERT instead of CSV append, DELETE instead of file rewrite
- CLI queries use SQL (GROUP BY for traffic, LIMIT for log)
- retrain_from_log() uses read-only connection with time range query
- Config key: traffic_log_file → traffic_log_db

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
kaffa
2026-02-07 10:30:10 +09:00
parent 11c1ab0134
commit 3d1e353b1a
3 changed files with 147 additions and 132 deletions

View File

@@ -802,47 +802,41 @@ cmd_ai_retrain() {
} }
cmd_ai_traffic() { cmd_ai_traffic() {
local log_file local db_file
log_file=$(python3 -c " db_file=$(python3 -c "
import yaml import yaml
with open('$CONFIG_FILE') as f: with open('$CONFIG_FILE') as f:
cfg = yaml.safe_load(f) cfg = yaml.safe_load(f)
print(cfg.get('ai',{}).get('traffic_log_file', '/var/lib/xdp-defense/traffic_log.csv')) print(cfg.get('ai',{}).get('traffic_log_db', '/var/lib/xdp-defense/traffic_log.db'))
" 2>/dev/null || echo "/var/lib/xdp-defense/traffic_log.csv") " 2>/dev/null || echo "/var/lib/xdp-defense/traffic_log.db")
[ ! -f "$log_file" ] && { log_err "Traffic log not found: $log_file"; exit 1; } [ ! -f "$db_file" ] && { log_err "Traffic log not found: $db_file"; exit 1; }
python3 -c " python3 -c "
import csv, sys import sqlite3, sys
from datetime import datetime, timedelta from datetime import datetime, timedelta
log_file = sys.argv[1] db_file = sys.argv[1]
cutoff = datetime.now() - timedelta(hours=24) cutoff = (datetime.now() - timedelta(hours=24)).isoformat()
conn = sqlite3.connect(db_file)
cur = conn.cursor()
# Buckets: 0-6, 6-12, 12-18, 18-24 # Buckets: 0-6, 6-12, 12-18, 18-24
buckets = {0: [], 1: [], 2: [], 3: []} buckets = {0: [], 1: [], 2: [], 3: []}
total_samples = 0 total_samples = 0
with open(log_file, 'r') as f: cur.execute('SELECT hour, total_packets, total_bytes FROM traffic_samples WHERE timestamp >= ?', (cutoff,))
reader = csv.reader(f) for row in cur.fetchall():
header = next(reader, None) try:
if header is None: hour = float(row[0])
print('Traffic log is empty') bucket = min(int(hour // 6), 3)
sys.exit(0) pps = float(row[1])
for row in reader: bps = float(row[2])
try: buckets[bucket].append((pps, bps))
ts = datetime.fromisoformat(row[0]) total_samples += 1
if ts < cutoff: except (ValueError, TypeError):
continue continue
hour = float(row[1])
bucket = min(int(hour // 6), 3)
# features: row[2]=hour_sin, row[3]=hour_cos, row[4]=total_packets, row[5]=total_bytes, ...
pps = float(row[4])
bps = float(row[5])
buckets[bucket].append((pps, bps))
total_samples += 1
except (ValueError, IndexError):
continue
labels = ['00:00-06:00', '06:00-12:00', '12:00-18:00', '18:00-24:00'] labels = ['00:00-06:00', '06:00-12:00', '12:00-18:00', '18:00-24:00']
print() print()
@@ -873,6 +867,8 @@ for i, label in enumerate(labels):
hours = total_samples * 5 / 3600 # 5s intervals hours = total_samples * 5 / 3600 # 5s intervals
print(f'Total: {total_samples} samples ({hours:.1f}h)') print(f'Total: {total_samples} samples ({hours:.1f}h)')
conn.close()
# Show next retrain time # Show next retrain time
import yaml, os, time import yaml, os, time
try: try:
@@ -894,62 +890,67 @@ try:
except: except:
pass pass
print() print()
" "$log_file" " "$db_file"
} }
cmd_ai_log() { cmd_ai_log() {
local n=${1:-20} local n=${1:-20}
[[ "$n" =~ ^[0-9]+$ ]] || n=20 [[ "$n" =~ ^[0-9]+$ ]] || n=20
local log_file local db_file
log_file=$(python3 -c " db_file=$(python3 -c "
import yaml import yaml
with open('$CONFIG_FILE') as f: with open('$CONFIG_FILE') as f:
cfg = yaml.safe_load(f) cfg = yaml.safe_load(f)
print(cfg.get('ai',{}).get('traffic_log_file', '/var/lib/xdp-defense/traffic_log.csv')) print(cfg.get('ai',{}).get('traffic_log_db', '/var/lib/xdp-defense/traffic_log.db'))
" 2>/dev/null || echo "/var/lib/xdp-defense/traffic_log.csv") " 2>/dev/null || echo "/var/lib/xdp-defense/traffic_log.db")
[ ! -f "$log_file" ] && { log_err "Traffic log not found: $log_file"; exit 1; } [ ! -f "$db_file" ] && { log_err "Traffic log not found: $db_file"; exit 1; }
python3 -c " python3 -c "
import csv, sys import sqlite3, sys
log_file = sys.argv[1] db_file = sys.argv[1]
n = int(sys.argv[2]) n = int(sys.argv[2])
rows = [] conn = sqlite3.connect(db_file)
with open(log_file, 'r') as f: cur = conn.cursor()
reader = csv.reader(f)
header = next(reader, None) cur.execute('SELECT COUNT(*) FROM traffic_samples')
if header is None: total_count = cur.fetchone()[0]
print('Traffic log is empty')
sys.exit(0) if total_count == 0:
for row in reader: print('Traffic log is empty')
rows.append(row) conn.close()
sys.exit(0)
cur.execute('SELECT timestamp, hour, total_packets, total_bytes, syn_ratio, udp_ratio, icmp_ratio FROM traffic_samples ORDER BY id DESC LIMIT ?', (n,))
rows = cur.fetchall()
rows.reverse()
conn.close()
# Show last N rows
display = rows[-n:]
print() print()
print('\033[1m=== Recent Traffic Log ===\033[0m') print('\033[1m=== Recent Traffic Log ===\033[0m')
print(f'{\"Timestamp\":>22} {\"Hour\":>6} {\"PPS\":>10} {\"Bytes\":>12} {\"SYN%\":>6} {\"UDP%\":>6} {\"ICMP%\":>6}') print(f'{\"Timestamp\":>22} {\"Hour\":>6} {\"PPS\":>10} {\"Bytes\":>12} {\"SYN%\":>6} {\"UDP%\":>6} {\"ICMP%\":>6}')
print(f'{\"-\"*22} {\"-\"*6} {\"-\"*10} {\"-\"*12} {\"-\"*6} {\"-\"*6} {\"-\"*6}') print(f'{\"-\"*22} {\"-\"*6} {\"-\"*10} {\"-\"*12} {\"-\"*6} {\"-\"*6} {\"-\"*6}')
for row in display: for row in rows:
try: try:
ts = row[0][:19] # trim microseconds ts = str(row[0])[:19] # trim microseconds
hour = float(row[1]) hour = float(row[1])
pkts = float(row[4]) pkts = float(row[2])
bts = float(row[5]) bts = float(row[3])
syn_r = float(row[14]) * 100 if len(row) > 14 else 0 syn_r = float(row[4]) * 100 if row[4] is not None else 0
udp_r = float(row[15]) * 100 if len(row) > 15 else 0 udp_r = float(row[5]) * 100 if row[5] is not None else 0
icmp_r = float(row[16]) * 100 if len(row) > 16 else 0 icmp_r = float(row[6]) * 100 if row[6] is not None else 0
print(f'{ts:>22} {hour:>6.1f} {pkts:>10.0f} {bts:>12.0f} {syn_r:>5.1f}% {udp_r:>5.1f}% {icmp_r:>5.1f}%') print(f'{ts:>22} {hour:>6.1f} {pkts:>10.0f} {bts:>12.0f} {syn_r:>5.1f}% {udp_r:>5.1f}% {icmp_r:>5.1f}%')
except (ValueError, IndexError): except (ValueError, TypeError):
continue continue
print(f'Showing {len(display)} of {len(rows)} entries') print(f'Showing {len(rows)} of {total_count} entries')
print() print()
" "$log_file" "$n" " "$db_file" "$n"
} }
# ==================== GeoIP ==================== # ==================== GeoIP ====================

View File

@@ -71,5 +71,5 @@ ai:
training_data_file: /var/lib/xdp-defense/training_data.csv training_data_file: /var/lib/xdp-defense/training_data.csv
# Traffic logging # Traffic logging
traffic_log_file: /var/lib/xdp-defense/traffic_log.csv traffic_log_db: /var/lib/xdp-defense/traffic_log.db
traffic_log_retention_days: 7 # days to keep traffic log data traffic_log_retention_days: 7 # days to keep traffic log data

View File

@@ -22,6 +22,7 @@ import logging
import logging.handlers import logging.handlers
import csv import csv
import pickle import pickle
import sqlite3
from collections import defaultdict from collections import defaultdict
from datetime import datetime, timedelta from datetime import datetime, timedelta
@@ -81,7 +82,7 @@ DEFAULT_CONFIG = {
'min_packets_for_sample': 20, 'min_packets_for_sample': 20,
'model_file': '/var/lib/xdp-defense/ai_model.pkl', 'model_file': '/var/lib/xdp-defense/ai_model.pkl',
'training_data_file': '/var/lib/xdp-defense/training_data.csv', 'training_data_file': '/var/lib/xdp-defense/training_data.csv',
'traffic_log_file': '/var/lib/xdp-defense/traffic_log.csv', 'traffic_log_db': '/var/lib/xdp-defense/traffic_log.db',
'traffic_log_retention_days': 7, 'traffic_log_retention_days': 7,
'retrain_interval': 86400, 'retrain_interval': 86400,
'retrain_window': 86400, 'retrain_window': 86400,
@@ -331,36 +332,30 @@ class AIDetector:
log.error("AI prediction error: %s", e) log.error("AI prediction error: %s", e)
return False, 0.0 return False, 0.0
def retrain_from_log(self): def retrain_from_log(self, db_path=None):
"""Retrain the model from traffic_log.csv data.""" """Retrain the model from traffic_log.db data."""
log_file = self.cfg.get('traffic_log_file', '/var/lib/xdp-defense/traffic_log.csv') if db_path is None:
if not os.path.exists(log_file): db_path = self.cfg.get('traffic_log_db', '/var/lib/xdp-defense/traffic_log.db')
log.warning("Traffic log not found: %s", log_file) if not os.path.exists(db_path):
log.warning("Traffic log DB not found: %s", db_path)
return False return False
retrain_window = self.cfg.get('retrain_window', 86400) retrain_window = self.cfg.get('retrain_window', 86400)
cutoff = datetime.now() - timedelta(seconds=retrain_window) cutoff = (datetime.now() - timedelta(seconds=retrain_window)).isoformat()
conn = None
try: try:
samples = [] conn = sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
with open(log_file, 'r', newline='') as f: cur = conn.execute(
reader = csv.reader(f) 'SELECT hour_sin, hour_cos, total_packets, total_bytes, '
header = next(reader, None) 'tcp_syn_count, tcp_other_count, udp_count, icmp_count, '
if header is None: 'other_proto_count, unique_ips_approx, small_pkt_count, '
log.warning("Traffic log is empty") 'large_pkt_count, syn_ratio, udp_ratio, icmp_ratio, '
return False 'small_pkt_ratio, avg_pkt_size '
'FROM traffic_samples WHERE timestamp >= ? ORDER BY timestamp',
# Feature columns: skip timestamp and hour (first 2), take remaining 17 (cutoff,)
for row in reader: )
try: samples = [list(row) for row in cur.fetchall()]
ts = datetime.fromisoformat(row[0])
if ts < cutoff:
continue
features = [float(v) for v in row[2:]] # skip timestamp, hour
if len(features) == 17:
samples.append(features)
except (ValueError, IndexError):
continue
if len(samples) < 10: if len(samples) < 10:
log.warning("Not enough recent samples for retrain (%d)", len(samples)) log.warning("Not enough recent samples for retrain (%d)", len(samples))
@@ -375,6 +370,9 @@ class AIDetector:
except Exception as e: except Exception as e:
log.error("retrain_from_log failed: %s", e) log.error("retrain_from_log failed: %s", e)
return False return False
finally:
if conn:
conn.close()
# ==================== ProfileManager ==================== # ==================== ProfileManager ====================
@@ -488,6 +486,8 @@ class DDoSDaemon:
self._last_retrain_time = self._get_model_mtime() self._last_retrain_time = self._get_model_mtime()
self._last_log_cleanup = time.time() self._last_log_cleanup = time.time()
self._init_traffic_db()
level = self.cfg['general'].get('log_level', 'info').upper() level = self.cfg['general'].get('log_level', 'info').upper()
log.setLevel(getattr(logging, level, logging.INFO)) log.setLevel(getattr(logging, level, logging.INFO))
@@ -550,69 +550,82 @@ class DDoSDaemon:
def _handle_sigusr1(self, signum, frame): def _handle_sigusr1(self, signum, frame):
log.info("SIGUSR1 received, triggering retrain from traffic log...") log.info("SIGUSR1 received, triggering retrain from traffic log...")
if self.ai_detector.retrain_from_log(): db_path = self.cfg['ai'].get('traffic_log_db', '/var/lib/xdp-defense/traffic_log.db')
if self.ai_detector.retrain_from_log(db_path):
self._last_retrain_time = time.time() self._last_retrain_time = time.time()
log.info("SIGUSR1 retrain completed successfully") log.info("SIGUSR1 retrain completed successfully")
else: else:
log.warning("SIGUSR1 retrain failed (falling back to collect mode)") log.warning("SIGUSR1 retrain failed (falling back to collect mode)")
self.ai_detector.request_retrain() self.ai_detector.request_retrain()
# ---- Traffic Logging ---- # ---- Traffic Logging (SQLite) ----
TRAFFIC_CSV_HEADER = [ def _init_traffic_db(self):
'timestamp', 'hour', """Initialize SQLite database for traffic logging."""
'hour_sin', 'hour_cos', db_path = self.cfg['ai'].get('traffic_log_db', '/var/lib/xdp-defense/traffic_log.db')
'total_packets', 'total_bytes', 'tcp_syn_count', 'tcp_other_count', os.makedirs(os.path.dirname(db_path), exist_ok=True)
'udp_count', 'icmp_count', 'other_proto_count', 'unique_ips_approx', self._traffic_db = sqlite3.connect(db_path, check_same_thread=False)
'small_pkt_count', 'large_pkt_count', self._traffic_db.execute(
'syn_ratio', 'udp_ratio', 'icmp_ratio', 'small_pkt_ratio', 'avg_pkt_size' 'CREATE TABLE IF NOT EXISTS traffic_samples ('
] ' id INTEGER PRIMARY KEY AUTOINCREMENT,'
' timestamp TEXT NOT NULL,'
' hour REAL NOT NULL,'
' hour_sin REAL NOT NULL,'
' hour_cos REAL NOT NULL,'
' total_packets REAL NOT NULL,'
' total_bytes REAL NOT NULL,'
' tcp_syn_count REAL NOT NULL,'
' tcp_other_count REAL NOT NULL,'
' udp_count REAL NOT NULL,'
' icmp_count REAL NOT NULL,'
' other_proto_count REAL NOT NULL,'
' unique_ips_approx REAL NOT NULL,'
' small_pkt_count REAL NOT NULL,'
' large_pkt_count REAL NOT NULL,'
' syn_ratio REAL NOT NULL,'
' udp_ratio REAL NOT NULL,'
' icmp_ratio REAL NOT NULL,'
' small_pkt_ratio REAL NOT NULL,'
' avg_pkt_size REAL NOT NULL'
')'
)
self._traffic_db.execute(
'CREATE INDEX IF NOT EXISTS idx_timestamp ON traffic_samples(timestamp)'
)
self._traffic_db.commit()
log.info("Traffic log DB initialized: %s", db_path)
def _log_traffic(self, now, hour, features): def _log_traffic(self, now, hour, features):
"""Append one row to traffic_log.csv.""" """Insert one row into traffic_samples table."""
log_file = self.cfg['ai'].get('traffic_log_file', '/var/lib/xdp-defense/traffic_log.csv')
try: try:
write_header = not os.path.exists(log_file) or os.path.getsize(log_file) == 0 self._traffic_db.execute(
os.makedirs(os.path.dirname(log_file), exist_ok=True) 'INSERT INTO traffic_samples ('
with open(log_file, 'a', newline='') as f: ' timestamp, hour, hour_sin, hour_cos,'
writer = csv.writer(f) ' total_packets, total_bytes, tcp_syn_count, tcp_other_count,'
if write_header: ' udp_count, icmp_count, other_proto_count, unique_ips_approx,'
writer.writerow(self.TRAFFIC_CSV_HEADER) ' small_pkt_count, large_pkt_count,'
row = [now.isoformat(), f'{hour:.4f}'] + [f'{v:.6f}' for v in features] ' syn_ratio, udp_ratio, icmp_ratio, small_pkt_ratio, avg_pkt_size'
writer.writerow(row) ') VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
(now.isoformat(), hour, *features)
)
self._traffic_db.commit()
except Exception as e: except Exception as e:
log.error("Failed to write traffic log: %s", e) log.error("Failed to write traffic log: %s", e)
def _cleanup_traffic_log(self): def _cleanup_traffic_log(self):
"""Remove entries older than retention_days from traffic_log.csv.""" """Remove entries older than retention_days from traffic_samples."""
log_file = self.cfg['ai'].get('traffic_log_file', '/var/lib/xdp-defense/traffic_log.csv')
retention_days = self.cfg['ai'].get('traffic_log_retention_days', 7) retention_days = self.cfg['ai'].get('traffic_log_retention_days', 7)
cutoff = datetime.now() - timedelta(days=retention_days) cutoff = (datetime.now() - timedelta(days=retention_days)).isoformat()
if not os.path.exists(log_file):
return
try: try:
kept = [] cur = self._traffic_db.execute(
header = None 'DELETE FROM traffic_samples WHERE timestamp < ?', (cutoff,)
with open(log_file, 'r', newline='') as f: )
reader = csv.reader(f) deleted = cur.rowcount
header = next(reader, None) self._traffic_db.commit()
for row in reader: if deleted > 1000:
try: self._traffic_db.execute('VACUUM')
ts = datetime.fromisoformat(row[0]) log.info("Traffic log cleanup: deleted %d rows (retention=%dd)", deleted, retention_days)
if ts >= cutoff:
kept.append(row)
except (ValueError, IndexError):
kept.append(row) # keep unparseable rows
with open(log_file, 'w', newline='') as f:
writer = csv.writer(f)
if header:
writer.writerow(header)
writer.writerows(kept)
log.info("Traffic log cleanup: kept %d rows (retention=%dd)", len(kept), retention_days)
except Exception as e: except Exception as e:
log.error("Traffic log cleanup failed: %s", e) log.error("Traffic log cleanup failed: %s", e)
@@ -740,7 +753,7 @@ class DDoSDaemon:
hour_cos = math.cos(2 * math.pi * hour / 24) hour_cos = math.cos(2 * math.pi * hour / 24)
deltas_with_time = [hour_sin, hour_cos] + deltas # 17 features deltas_with_time = [hour_sin, hour_cos] + deltas # 17 features
# Log to traffic CSV # Log to traffic DB
self._log_traffic(now, hour, deltas_with_time) self._log_traffic(now, hour, deltas_with_time)
# Periodic log file cleanup (once per day) # Periodic log file cleanup (once per day)
@@ -758,7 +771,8 @@ class DDoSDaemon:
retrain_interval = self.cfg['ai'].get('retrain_interval', 86400) retrain_interval = self.cfg['ai'].get('retrain_interval', 86400)
if time.time() - self._last_retrain_time >= retrain_interval: if time.time() - self._last_retrain_time >= retrain_interval:
log.info("Auto-retrain triggered (interval=%ds)", retrain_interval) log.info("Auto-retrain triggered (interval=%ds)", retrain_interval)
if self.ai_detector.retrain_from_log(): db_path = self.cfg['ai'].get('traffic_log_db', '/var/lib/xdp-defense/traffic_log.db')
if self.ai_detector.retrain_from_log(db_path):
self._last_retrain_time = time.time() self._last_retrain_time = time.time()
log.info("Auto-retrain completed successfully") log.info("Auto-retrain completed successfully")
else: else: