Replace CSV traffic log with SQLite for better performance

- traffic_log.csv → traffic_log.db (SQLite with indexed timestamp)
- INSERT instead of CSV append, DELETE instead of file rewrite
- CLI queries use SQL (GROUP BY for traffic, LIMIT for log)
- retrain_from_log() uses read-only connection with time range query
- Config key: traffic_log_file → traffic_log_db

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
kaffa
2026-02-07 10:30:10 +09:00
parent 11c1ab0134
commit 3d1e353b1a
3 changed files with 147 additions and 132 deletions

View File

@@ -22,6 +22,7 @@ import logging
import logging.handlers
import csv
import pickle
import sqlite3
from collections import defaultdict
from datetime import datetime, timedelta
@@ -81,7 +82,7 @@ DEFAULT_CONFIG = {
'min_packets_for_sample': 20,
'model_file': '/var/lib/xdp-defense/ai_model.pkl',
'training_data_file': '/var/lib/xdp-defense/training_data.csv',
'traffic_log_file': '/var/lib/xdp-defense/traffic_log.csv',
'traffic_log_db': '/var/lib/xdp-defense/traffic_log.db',
'traffic_log_retention_days': 7,
'retrain_interval': 86400,
'retrain_window': 86400,
@@ -331,36 +332,30 @@ class AIDetector:
log.error("AI prediction error: %s", e)
return False, 0.0
def retrain_from_log(self):
"""Retrain the model from traffic_log.csv data."""
log_file = self.cfg.get('traffic_log_file', '/var/lib/xdp-defense/traffic_log.csv')
if not os.path.exists(log_file):
log.warning("Traffic log not found: %s", log_file)
def retrain_from_log(self, db_path=None):
"""Retrain the model from traffic_log.db data."""
if db_path is None:
db_path = self.cfg.get('traffic_log_db', '/var/lib/xdp-defense/traffic_log.db')
if not os.path.exists(db_path):
log.warning("Traffic log DB not found: %s", db_path)
return False
retrain_window = self.cfg.get('retrain_window', 86400)
cutoff = datetime.now() - timedelta(seconds=retrain_window)
cutoff = (datetime.now() - timedelta(seconds=retrain_window)).isoformat()
conn = None
try:
samples = []
with open(log_file, 'r', newline='') as f:
reader = csv.reader(f)
header = next(reader, None)
if header is None:
log.warning("Traffic log is empty")
return False
# Feature columns: skip timestamp and hour (first 2), take remaining 17
for row in reader:
try:
ts = datetime.fromisoformat(row[0])
if ts < cutoff:
continue
features = [float(v) for v in row[2:]] # skip timestamp, hour
if len(features) == 17:
samples.append(features)
except (ValueError, IndexError):
continue
conn = sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
cur = conn.execute(
'SELECT hour_sin, hour_cos, total_packets, total_bytes, '
'tcp_syn_count, tcp_other_count, udp_count, icmp_count, '
'other_proto_count, unique_ips_approx, small_pkt_count, '
'large_pkt_count, syn_ratio, udp_ratio, icmp_ratio, '
'small_pkt_ratio, avg_pkt_size '
'FROM traffic_samples WHERE timestamp >= ? ORDER BY timestamp',
(cutoff,)
)
samples = [list(row) for row in cur.fetchall()]
if len(samples) < 10:
log.warning("Not enough recent samples for retrain (%d)", len(samples))
@@ -375,6 +370,9 @@ class AIDetector:
except Exception as e:
log.error("retrain_from_log failed: %s", e)
return False
finally:
if conn:
conn.close()
# ==================== ProfileManager ====================
@@ -488,6 +486,8 @@ class DDoSDaemon:
self._last_retrain_time = self._get_model_mtime()
self._last_log_cleanup = time.time()
self._init_traffic_db()
level = self.cfg['general'].get('log_level', 'info').upper()
log.setLevel(getattr(logging, level, logging.INFO))
@@ -550,69 +550,82 @@ class DDoSDaemon:
def _handle_sigusr1(self, signum, frame):
log.info("SIGUSR1 received, triggering retrain from traffic log...")
if self.ai_detector.retrain_from_log():
db_path = self.cfg['ai'].get('traffic_log_db', '/var/lib/xdp-defense/traffic_log.db')
if self.ai_detector.retrain_from_log(db_path):
self._last_retrain_time = time.time()
log.info("SIGUSR1 retrain completed successfully")
else:
log.warning("SIGUSR1 retrain failed (falling back to collect mode)")
self.ai_detector.request_retrain()
# ---- Traffic Logging ----
# ---- Traffic Logging (SQLite) ----
TRAFFIC_CSV_HEADER = [
'timestamp', 'hour',
'hour_sin', 'hour_cos',
'total_packets', 'total_bytes', 'tcp_syn_count', 'tcp_other_count',
'udp_count', 'icmp_count', 'other_proto_count', 'unique_ips_approx',
'small_pkt_count', 'large_pkt_count',
'syn_ratio', 'udp_ratio', 'icmp_ratio', 'small_pkt_ratio', 'avg_pkt_size'
]
def _init_traffic_db(self):
"""Initialize SQLite database for traffic logging."""
db_path = self.cfg['ai'].get('traffic_log_db', '/var/lib/xdp-defense/traffic_log.db')
os.makedirs(os.path.dirname(db_path), exist_ok=True)
self._traffic_db = sqlite3.connect(db_path, check_same_thread=False)
self._traffic_db.execute(
'CREATE TABLE IF NOT EXISTS traffic_samples ('
' id INTEGER PRIMARY KEY AUTOINCREMENT,'
' timestamp TEXT NOT NULL,'
' hour REAL NOT NULL,'
' hour_sin REAL NOT NULL,'
' hour_cos REAL NOT NULL,'
' total_packets REAL NOT NULL,'
' total_bytes REAL NOT NULL,'
' tcp_syn_count REAL NOT NULL,'
' tcp_other_count REAL NOT NULL,'
' udp_count REAL NOT NULL,'
' icmp_count REAL NOT NULL,'
' other_proto_count REAL NOT NULL,'
' unique_ips_approx REAL NOT NULL,'
' small_pkt_count REAL NOT NULL,'
' large_pkt_count REAL NOT NULL,'
' syn_ratio REAL NOT NULL,'
' udp_ratio REAL NOT NULL,'
' icmp_ratio REAL NOT NULL,'
' small_pkt_ratio REAL NOT NULL,'
' avg_pkt_size REAL NOT NULL'
')'
)
self._traffic_db.execute(
'CREATE INDEX IF NOT EXISTS idx_timestamp ON traffic_samples(timestamp)'
)
self._traffic_db.commit()
log.info("Traffic log DB initialized: %s", db_path)
def _log_traffic(self, now, hour, features):
"""Append one row to traffic_log.csv."""
log_file = self.cfg['ai'].get('traffic_log_file', '/var/lib/xdp-defense/traffic_log.csv')
"""Insert one row into traffic_samples table."""
try:
write_header = not os.path.exists(log_file) or os.path.getsize(log_file) == 0
os.makedirs(os.path.dirname(log_file), exist_ok=True)
with open(log_file, 'a', newline='') as f:
writer = csv.writer(f)
if write_header:
writer.writerow(self.TRAFFIC_CSV_HEADER)
row = [now.isoformat(), f'{hour:.4f}'] + [f'{v:.6f}' for v in features]
writer.writerow(row)
self._traffic_db.execute(
'INSERT INTO traffic_samples ('
' timestamp, hour, hour_sin, hour_cos,'
' total_packets, total_bytes, tcp_syn_count, tcp_other_count,'
' udp_count, icmp_count, other_proto_count, unique_ips_approx,'
' small_pkt_count, large_pkt_count,'
' syn_ratio, udp_ratio, icmp_ratio, small_pkt_ratio, avg_pkt_size'
') VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
(now.isoformat(), hour, *features)
)
self._traffic_db.commit()
except Exception as e:
log.error("Failed to write traffic log: %s", e)
def _cleanup_traffic_log(self):
"""Remove entries older than retention_days from traffic_log.csv."""
log_file = self.cfg['ai'].get('traffic_log_file', '/var/lib/xdp-defense/traffic_log.csv')
"""Remove entries older than retention_days from traffic_samples."""
retention_days = self.cfg['ai'].get('traffic_log_retention_days', 7)
cutoff = datetime.now() - timedelta(days=retention_days)
if not os.path.exists(log_file):
return
cutoff = (datetime.now() - timedelta(days=retention_days)).isoformat()
try:
kept = []
header = None
with open(log_file, 'r', newline='') as f:
reader = csv.reader(f)
header = next(reader, None)
for row in reader:
try:
ts = datetime.fromisoformat(row[0])
if ts >= cutoff:
kept.append(row)
except (ValueError, IndexError):
kept.append(row) # keep unparseable rows
with open(log_file, 'w', newline='') as f:
writer = csv.writer(f)
if header:
writer.writerow(header)
writer.writerows(kept)
log.info("Traffic log cleanup: kept %d rows (retention=%dd)", len(kept), retention_days)
cur = self._traffic_db.execute(
'DELETE FROM traffic_samples WHERE timestamp < ?', (cutoff,)
)
deleted = cur.rowcount
self._traffic_db.commit()
if deleted > 1000:
self._traffic_db.execute('VACUUM')
log.info("Traffic log cleanup: deleted %d rows (retention=%dd)", deleted, retention_days)
except Exception as e:
log.error("Traffic log cleanup failed: %s", e)
@@ -740,7 +753,7 @@ class DDoSDaemon:
hour_cos = math.cos(2 * math.pi * hour / 24)
deltas_with_time = [hour_sin, hour_cos] + deltas # 17 features
# Log to traffic CSV
# Log to traffic DB
self._log_traffic(now, hour, deltas_with_time)
# Periodic log file cleanup (once per day)
@@ -758,7 +771,8 @@ class DDoSDaemon:
retrain_interval = self.cfg['ai'].get('retrain_interval', 86400)
if time.time() - self._last_retrain_time >= retrain_interval:
log.info("Auto-retrain triggered (interval=%ds)", retrain_interval)
if self.ai_detector.retrain_from_log():
db_path = self.cfg['ai'].get('traffic_log_db', '/var/lib/xdp-defense/traffic_log.db')
if self.ai_detector.retrain_from_log(db_path):
self._last_retrain_time = time.time()
log.info("Auto-retrain completed successfully")
else: