Files
xdp-defense/bpf/xdp_ddos.c
kaffa 667c6eac81 Fix 12 code review issues (4 MEDIUM + 8 LOW)
MEDIUM:
- M1: Whitelist direct IP/CIDR additions now persist to direct.txt
- M2: get_map_id() uses 5s TTL cache (single bpftool call for all maps)
- M3: IPv6 extension header parsing in xdp_ddos.c (hop-by-hop/routing/frag/dst)
- M4: Shell injection prevention - sanitize_input() + sys.argv[] for all Python calls

LOW:
- L1: Remove redundant self.running (uses _stop_event only)
- L2: Remove unused config values (rate_limit_after, cooldown_multiplier, retrain_interval)
- L3: Thread poll intervals reloaded on SIGHUP
- L4: batch_map_operation counts only successfully written entries
- L5: Clarify unique_ips_approx comment (per-packet counter)
- L6: Document LRU_HASH multi-CPU race condition as acceptable
- L7: Download Cloudflare IPv6 ranges in whitelist preset
- L8: Fix file handle leak in xdp_country.py list_countries()

Also: SIGHUP now preserves EWMA/violation state, daemon skips whitelisted
IPs in EWMA/AI escalation, deep copy for default config, IHL validation.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-07 09:23:41 +09:00

478 lines
14 KiB
C

// SPDX-License-Identifier: GPL-2.0
// XDP DDoS Defense - Adaptive rate limiting with traffic feature collection
// Per-IP rate counters, automatic blocking with expiry, AI feature aggregation
// Part of xdp-defense: chained via libxdp dispatcher (priority 20)
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
#include <linux/in.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include <xdp/xdp_helpers.h>
// VLAN header (802.1Q)
struct vlan_hdr {
__be16 h_vlan_TCI;
__be16 h_vlan_encapsulated_proto;
};
// LPM trie keys for shared whitelist maps
struct ipv4_lpm_key {
__u32 prefixlen;
__u32 addr;
};
struct ipv6_lpm_key {
__u32 prefixlen;
__u8 addr[16];
};
// Rate counter entry per IP
struct rate_entry {
__u64 packets;
__u64 bytes;
__u64 last_seen; // ktime_ns
};
// Rate configuration (set by userspace daemon)
struct rate_cfg {
__u64 pps_threshold; // packets per second threshold
__u64 bps_threshold; // bytes per second threshold (0 = disabled)
__u64 window_ns; // time window in nanoseconds (default 1s)
};
// Blocked IP entry with expiry
struct block_entry {
__u64 expire_ns; // ktime_ns when block expires (0 = permanent)
__u64 blocked_at; // ktime_ns when blocked
__u64 drop_count; // packets dropped while blocked
};
// Traffic features for AI analysis (per-CPU, aggregated by daemon)
struct traffic_features {
__u64 total_packets;
__u64 total_bytes;
__u64 tcp_syn_count;
__u64 tcp_other_count;
__u64 udp_count;
__u64 icmp_count;
__u64 other_proto_count;
__u64 unique_ips_approx; // per-packet counter (not truly unique, used as relative indicator)
__u64 small_pkt_count; // packets < 100 bytes
__u64 large_pkt_count; // packets > 1400 bytes
};
// ==================== BPF Maps ====================
// Shared whitelist maps (pinned by xdp_blocker, reused here)
struct {
__uint(type, BPF_MAP_TYPE_LPM_TRIE);
__type(key, struct ipv4_lpm_key);
__type(value, __u64);
__uint(max_entries, 4096);
__uint(map_flags, BPF_F_NO_PREALLOC);
__uint(pinning, LIBBPF_PIN_BY_NAME);
} whitelist_v4 SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_LPM_TRIE);
__type(key, struct ipv6_lpm_key);
__type(value, __u64);
__uint(max_entries, 4096);
__uint(map_flags, BPF_F_NO_PREALLOC);
__uint(pinning, LIBBPF_PIN_BY_NAME);
} whitelist_v6 SEC(".maps");
// Per-IPv4 rate counters
struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, __u32); // IPv4 address
__type(value, struct rate_entry);
__uint(max_entries, 65536);
} rate_counter_v4 SEC(".maps");
// Per-IPv6 rate counters
struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, struct in6_addr); // IPv6 address
__type(value, struct rate_entry);
__uint(max_entries, 32768);
} rate_counter_v6 SEC(".maps");
// Rate configuration (index 0)
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, __u32);
__type(value, struct rate_cfg);
__uint(max_entries, 1);
} rate_config SEC(".maps");
// Blocked IPv4 addresses
struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, __u32);
__type(value, struct block_entry);
__uint(max_entries, 16384);
} blocked_ips_v4 SEC(".maps");
// Blocked IPv6 addresses
struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, struct in6_addr);
__type(value, struct block_entry);
__uint(max_entries, 8192);
} blocked_ips_v6 SEC(".maps");
// Global statistics: 0=passed, 1=dropped_blocked, 2=dropped_rate, 3=total, 4=errors
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, __u32);
__type(value, __u64);
__uint(max_entries, 5);
} global_stats SEC(".maps");
// Traffic features for AI (index 0)
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, __u32);
__type(value, struct traffic_features);
__uint(max_entries, 1);
} traffic_feature SEC(".maps");
// ==================== Helpers ====================
static __always_inline void inc_stat(__u32 idx) {
__u64 *val = bpf_map_lookup_elem(&global_stats, &idx);
if (val)
(*val)++;
}
static __always_inline void update_features(__u64 pkt_len, __u8 proto, __u8 tcp_flags) {
__u32 key = 0;
struct traffic_features *f = bpf_map_lookup_elem(&traffic_feature, &key);
if (!f)
return;
f->total_packets++;
f->total_bytes += pkt_len;
f->unique_ips_approx++;
if (pkt_len < 100)
f->small_pkt_count++;
else if (pkt_len > 1400)
f->large_pkt_count++;
switch (proto) {
case IPPROTO_TCP:
if (tcp_flags & 0x02) // SYN
f->tcp_syn_count++;
else
f->tcp_other_count++;
break;
case IPPROTO_UDP:
f->udp_count++;
break;
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
f->icmp_count++;
break;
default:
f->other_proto_count++;
break;
}
}
// Check if an IPv4 IP is blocked (with expiry check)
static __always_inline int check_blocked_v4(__u32 ip, __u64 now) {
struct block_entry *b = bpf_map_lookup_elem(&blocked_ips_v4, &ip);
if (!b)
return 0;
// Check expiry (0 = permanent)
if (b->expire_ns != 0 && now > b->expire_ns) {
bpf_map_delete_elem(&blocked_ips_v4, &ip);
return 0;
}
b->drop_count++;
return 1;
}
// Check if an IPv6 IP is blocked (with expiry check)
static __always_inline int check_blocked_v6(struct in6_addr *ip, __u64 now) {
struct block_entry *b = bpf_map_lookup_elem(&blocked_ips_v6, ip);
if (!b)
return 0;
if (b->expire_ns != 0 && now > b->expire_ns) {
bpf_map_delete_elem(&blocked_ips_v6, ip);
return 0;
}
b->drop_count++;
return 1;
}
// Rate check for IPv4: returns 1 if rate exceeded
// Note: LRU_HASH lookups on multi-CPU are racy (no per-entry lock), so counters
// may be slightly inaccurate under high concurrency. This is acceptable for rate
// limiting where approximate enforcement is sufficient.
static __always_inline int rate_check_v4(__u32 ip, __u64 now, __u64 pkt_len) {
__u32 cfg_key = 0;
struct rate_cfg *cfg = bpf_map_lookup_elem(&rate_config, &cfg_key);
if (!cfg || cfg->pps_threshold == 0)
return 0;
__u64 window = cfg->window_ns;
if (window == 0)
window = 1000000000ULL; // default 1 second
struct rate_entry *entry = bpf_map_lookup_elem(&rate_counter_v4, &ip);
if (entry) {
__u64 elapsed = now - entry->last_seen;
if (elapsed < window) {
entry->packets++;
entry->bytes += pkt_len;
if (entry->packets > cfg->pps_threshold)
return 1;
if (cfg->bps_threshold > 0 && entry->bytes > cfg->bps_threshold)
return 1;
} else {
// Reset window
entry->packets = 1;
entry->bytes = pkt_len;
entry->last_seen = now;
}
} else {
struct rate_entry new_entry = {
.packets = 1,
.bytes = pkt_len,
.last_seen = now,
};
bpf_map_update_elem(&rate_counter_v4, &ip, &new_entry, BPF_ANY);
}
return 0;
}
// Rate check for IPv6: returns 1 if rate exceeded
static __always_inline int rate_check_v6(struct in6_addr *ip, __u64 now, __u64 pkt_len) {
__u32 cfg_key = 0;
struct rate_cfg *cfg = bpf_map_lookup_elem(&rate_config, &cfg_key);
if (!cfg || cfg->pps_threshold == 0)
return 0;
__u64 window = cfg->window_ns;
if (window == 0)
window = 1000000000ULL;
struct rate_entry *entry = bpf_map_lookup_elem(&rate_counter_v6, ip);
if (entry) {
__u64 elapsed = now - entry->last_seen;
if (elapsed < window) {
entry->packets++;
entry->bytes += pkt_len;
if (entry->packets > cfg->pps_threshold)
return 1;
if (cfg->bps_threshold > 0 && entry->bytes > cfg->bps_threshold)
return 1;
} else {
entry->packets = 1;
entry->bytes = pkt_len;
entry->last_seen = now;
}
} else {
struct rate_entry new_entry = {
.packets = 1,
.bytes = pkt_len,
.last_seen = now,
};
bpf_map_update_elem(&rate_counter_v6, ip, &new_entry, BPF_ANY);
}
return 0;
}
// ==================== Main XDP Program ====================
SEC("xdp")
int xdp_ddos(struct xdp_md *ctx) {
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
__u64 now = bpf_ktime_get_ns();
struct ethhdr *eth = data;
if ((void *)(eth + 1) > data_end) {
inc_stat(4);
return XDP_PASS;
}
__u16 eth_proto = bpf_ntohs(eth->h_proto);
__u64 pkt_len = data_end - data;
void *l3_hdr = (void *)(eth + 1);
// Handle VLAN tags (802.1Q and QinQ)
if (eth_proto == ETH_P_8021Q || eth_proto == ETH_P_8021AD) {
struct vlan_hdr *vhdr = l3_hdr;
if ((void *)(vhdr + 1) > data_end) {
inc_stat(4);
return XDP_PASS;
}
eth_proto = bpf_ntohs(vhdr->h_vlan_encapsulated_proto);
l3_hdr = (void *)(vhdr + 1);
// Handle QinQ (double VLAN)
if (eth_proto == ETH_P_8021Q || eth_proto == ETH_P_8021AD) {
vhdr = l3_hdr;
if ((void *)(vhdr + 1) > data_end) {
inc_stat(4);
return XDP_PASS;
}
eth_proto = bpf_ntohs(vhdr->h_vlan_encapsulated_proto);
l3_hdr = (void *)(vhdr + 1);
}
}
// Increment total counter
inc_stat(3);
// Handle IPv4
if (eth_proto == ETH_P_IP) {
struct iphdr *iph = l3_hdr;
if ((void *)(iph + 1) > data_end) {
inc_stat(4);
return XDP_PASS;
}
__u32 saddr = iph->saddr;
__u8 proto = iph->protocol;
__u8 tcp_flags = 0;
// Validate IHL (minimum 5 = 20 bytes)
if (iph->ihl < 5) {
inc_stat(4);
return XDP_PASS;
}
// Extract TCP flags if applicable
if (proto == IPPROTO_TCP) {
struct tcphdr *tcph = l3_hdr + (iph->ihl * 4);
if ((void *)(tcph + 1) <= data_end) {
tcp_flags = ((__u8 *)tcph)[13];
}
}
// Update traffic features (always, even for whitelisted)
update_features(pkt_len, proto, tcp_flags);
// Whitelist check - bypass rate limiting but collect stats
struct ipv4_lpm_key wl_key = {.prefixlen = 32, .addr = saddr};
if (bpf_map_lookup_elem(&whitelist_v4, &wl_key)) {
inc_stat(0); // passed
return XDP_PASS;
}
// Check blocked list
if (check_blocked_v4(saddr, now)) {
inc_stat(1);
return XDP_DROP;
}
// Rate check
if (rate_check_v4(saddr, now, pkt_len)) {
inc_stat(2);
return XDP_DROP;
}
inc_stat(0);
return XDP_PASS;
}
// Handle IPv6
else if (eth_proto == ETH_P_IPV6) {
struct ipv6hdr *ip6h = l3_hdr;
if ((void *)(ip6h + 1) > data_end) {
inc_stat(4);
return XDP_PASS;
}
struct in6_addr saddr = ip6h->saddr;
__u8 proto = ip6h->nexthdr;
__u8 tcp_flags = 0;
void *next_hdr = (void *)(ip6h + 1);
// Skip known IPv6 extension headers (up to 4 to stay within verifier limits)
#pragma unroll
for (int i = 0; i < 4; i++) {
if (proto != IPPROTO_HOPOPTS && proto != IPPROTO_ROUTING &&
proto != IPPROTO_DSTOPTS && proto != IPPROTO_FRAGMENT)
break;
if (proto == IPPROTO_FRAGMENT) {
// Fragment header is fixed 8 bytes
if (next_hdr + 8 > data_end)
break;
proto = *(__u8 *)next_hdr;
next_hdr += 8;
} else {
// Other extension headers: length in 2nd byte (units of 8 octets, +8)
if (next_hdr + 2 > data_end)
break;
__u8 ext_len = *((__u8 *)next_hdr + 1);
__u32 hdr_len = (((__u32)ext_len) + 1) * 8;
if (next_hdr + hdr_len > data_end)
break;
proto = *(__u8 *)next_hdr;
next_hdr += hdr_len;
}
}
if (proto == IPPROTO_TCP) {
struct tcphdr *tcph = next_hdr;
if ((void *)(tcph + 1) <= data_end) {
tcp_flags = ((__u8 *)tcph)[13];
}
}
// Update traffic features (always, even for whitelisted)
update_features(pkt_len, proto, tcp_flags);
// Whitelist check - bypass rate limiting but collect stats
struct ipv6_lpm_key wl_key = {.prefixlen = 128};
__builtin_memcpy(wl_key.addr, &saddr, 16);
if (bpf_map_lookup_elem(&whitelist_v6, &wl_key)) {
inc_stat(0); // passed
return XDP_PASS;
}
// Check blocked list
if (check_blocked_v6(&saddr, now)) {
inc_stat(1);
return XDP_DROP;
}
// Rate check
if (rate_check_v6(&saddr, now, pkt_len)) {
inc_stat(2);
return XDP_DROP;
}
inc_stat(0);
return XDP_PASS;
}
// Non-IP traffic: pass through
inc_stat(0);
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";
// libxdp dispatcher configuration: priority 20, chain on XDP_PASS
struct {
__uint(priority, 20);
__uint(XDP_PASS, 1);
} XDP_RUN_CONFIG(xdp_ddos);