需改ebpf程序探测内核,测试性能,验证想法,更新笔记。
This commit is contained in:
BIN
ebpf/c/replica
Executable file
BIN
ebpf/c/replica
Executable file
Binary file not shown.
@@ -189,6 +189,7 @@ static void* reader_thread_func(void *arg)
|
||||
uint64_t last = __atomic_load_n(&g_shm.hdr->last_seq, __ATOMIC_ACQUIRE);
|
||||
if (local_seq > last) {
|
||||
// 没有新数据,短暂休眠避免空转
|
||||
usleep(500);
|
||||
continue;
|
||||
}
|
||||
if (read_off+ sizeof(replica_rec_hdr_t) >= g_shm.hdr->capacity) {
|
||||
|
||||
39
ebpf/leak_detect/complete.bpf
Normal file
39
ebpf/leak_detect/complete.bpf
Normal file
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env bpftrace
|
||||
|
||||
BEGIN
|
||||
{
|
||||
printf("开始统计 kvstore 进程的 __completed_cmd 调用次数...\n");
|
||||
printf("每 5 秒打印一次统计,Ctrl-C 退出\n\n");
|
||||
|
||||
// 统计变量
|
||||
@enter = 0;
|
||||
@exit = 0;
|
||||
}
|
||||
|
||||
interval:s:5
|
||||
{
|
||||
time("%H:%M:%S");
|
||||
printf(" __completed_enter_cmd 调用次数: %10d\n", @enter);
|
||||
printf(" __completed_exit_cmd 调用次数: %10d\n", @exit);
|
||||
|
||||
// 可选:如果想每轮清零统计,取消下面注释
|
||||
// clear(@enter);
|
||||
// clear(@exit);
|
||||
}
|
||||
|
||||
uprobe:/home/lian/share/9.1-kvstore/kvstore:__completed_cmd
|
||||
{
|
||||
@exit++;
|
||||
}
|
||||
|
||||
uretprobe:/home/lian/share/9.1-kvstore/kvstore:__completed_cmd
|
||||
{
|
||||
@enter++;
|
||||
}
|
||||
|
||||
|
||||
|
||||
END
|
||||
{
|
||||
printf("\n最终统计:\n");
|
||||
}
|
||||
39
ebpf/leak_detect/tcp_probe.bpf
Normal file
39
ebpf/leak_detect/tcp_probe.bpf
Normal file
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env bpftrace
|
||||
|
||||
BEGIN
|
||||
{
|
||||
printf("开始统计 kvstore 进程的 tcp_rcv_established 调用次数...\n");
|
||||
printf("每 5 秒打印一次统计,Ctrl-C 退出\n\n");
|
||||
|
||||
// 统计变量
|
||||
@enter = 0;
|
||||
@exit = 0;
|
||||
}
|
||||
|
||||
interval:s:5
|
||||
{
|
||||
time("%H:%M:%S");
|
||||
printf(" tcp_rcv_established 调用次数: %10d\n", @enter);
|
||||
printf(" tcp_rcv_established ret 调用次数: %10d\n", @exit);
|
||||
|
||||
// 可选:如果想每轮清零统计,取消下面注释
|
||||
// clear(@enter);
|
||||
// clear(@exit);
|
||||
}
|
||||
|
||||
kprobe:tcp_rcv_established
|
||||
{
|
||||
@enter++;
|
||||
}
|
||||
|
||||
kretprobe:tcp_rcv_established
|
||||
{
|
||||
@exit++;
|
||||
}
|
||||
|
||||
END
|
||||
{
|
||||
printf("\n最终统计:\n");
|
||||
printf("tcp_rcv_established : %d 次\n", @enter);
|
||||
printf("tcp_rcv_established ret: %d 次\n", @exit);
|
||||
}
|
||||
41
ebpf/leak_detect/tracepoint.bpf
Normal file
41
ebpf/leak_detect/tracepoint.bpf
Normal file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env bpftrace
|
||||
|
||||
BEGIN
|
||||
{
|
||||
printf("开始统计 kvstore 进程的 recvfrom 调用次数...\n");
|
||||
printf("每 5 秒打印一次统计,Ctrl-C 退出\n\n");
|
||||
|
||||
// 统计变量
|
||||
@enter = 0;
|
||||
@exit = 0;
|
||||
}
|
||||
|
||||
interval:s:5
|
||||
{
|
||||
time("%H:%M:%S");
|
||||
printf(" sys_enter_recvfrom 调用次数: %10d\n", @enter);
|
||||
printf(" sys_exit_recvfrom 调用次数: %10d\n", @exit);
|
||||
|
||||
// 可选:如果想每轮清零统计,取消下面注释
|
||||
// clear(@enter);
|
||||
// clear(@exit);
|
||||
}
|
||||
|
||||
tracepoint:syscalls:sys_enter_recvfrom
|
||||
/comm == "kvstore"/
|
||||
{
|
||||
@enter++;
|
||||
}
|
||||
|
||||
tracepoint:syscalls:sys_exit_recvfrom
|
||||
/comm == "kvstore"/
|
||||
{
|
||||
@exit++;
|
||||
}
|
||||
|
||||
END
|
||||
{
|
||||
printf("\n最终统计:\n");
|
||||
printf("sys_enter_recvfrom: %d 次\n", @enter);
|
||||
printf("sys_exit_recvfrom : %d 次\n", @exit);
|
||||
}
|
||||
@@ -1,12 +1,12 @@
|
||||
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
OUTPUT := .output
|
||||
CLANG ?= clang
|
||||
LIBBPF_SRC := $(abspath ../../libbpf/src)
|
||||
BPFTOOL_SRC := $(abspath ../../bpftool/src)
|
||||
LIBBPF_SRC := $(abspath ../../libbpf-bootstrap/libbpf/src)
|
||||
BPFTOOL_SRC := $(abspath ../../libbpf-bootstrap/bpftool/src)
|
||||
LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a)
|
||||
BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool)
|
||||
BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool
|
||||
LIBBLAZESYM_SRC := $(abspath ../../blazesym/)
|
||||
LIBBLAZESYM_SRC := $(abspath ../../libbpf-bootstrap/blazesym/)
|
||||
LIBBLAZESYM_INC := $(abspath $(LIBBLAZESYM_SRC)/capi/include)
|
||||
LIBBLAZESYM_OBJ := $(abspath $(OUTPUT)/libblazesym_c.a)
|
||||
ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \
|
||||
@@ -16,11 +16,11 @@ ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \
|
||||
| sed 's/mips.*/mips/' \
|
||||
| sed 's/riscv64/riscv/' \
|
||||
| sed 's/loongarch64/loongarch/')
|
||||
VMLINUX := ../../vmlinux.h/include/$(ARCH)/vmlinux.h
|
||||
VMLINUX := ../../libbpf-bootstrap/vmlinux.h/include/$(ARCH)/vmlinux.h
|
||||
# Use our own libbpf API headers and Linux UAPI headers distributed with
|
||||
# libbpf to avoid dependency on system-wide headers, which could be missing or
|
||||
# outdated
|
||||
INCLUDES := -I$(OUTPUT) -I../../libbpf/include/uapi -I$(dir $(VMLINUX)) -I$(LIBBLAZESYM_INC)
|
||||
INCLUDES := -I$(OUTPUT) -I../../libbpf-bootstrap/libbpf/include/uapi -I$(dir $(VMLINUX)) -I$(LIBBLAZESYM_INC)
|
||||
CFLAGS := -g -Wall
|
||||
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)
|
||||
|
||||
|
||||
BIN
ebpf/old.c/replica
Executable file
BIN
ebpf/old.c/replica
Executable file
Binary file not shown.
@@ -1,80 +1,133 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_endian.h>
|
||||
|
||||
#include "replica.h"
|
||||
|
||||
char LICENSE[] SEC("license") = "Dual BSD/GPL";
|
||||
|
||||
#define FLAG_SSYNC_HAPPENED 0
|
||||
#define TARGET_PORT 8888
|
||||
|
||||
/* ================= BPF Maps ================= */
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(int));
|
||||
} events SEC(".maps");
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, __u32);
|
||||
__type(value, __u32);
|
||||
__uint(max_entries, 1);
|
||||
} flags SEC(".maps");
|
||||
|
||||
/* __completed_cmd(const uint8_t *cmd, size_t len, unsigned long long seq); */
|
||||
SEC("uprobe//home/lian/share/9.1-kvstore/kvstore:__completed_cmd")
|
||||
int BPF_KPROBE(handle_completed_cmd,
|
||||
const __u8 *cmd, size_t len, __u64 seq)
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_RINGBUF);
|
||||
__uint(max_entries, 1 << 26); // 64MB
|
||||
} rb SEC(".maps");
|
||||
|
||||
/* ================= Helper Functions ================= */
|
||||
|
||||
// 无需 process filter,改用 socket port filter
|
||||
|
||||
/* ================= Kernel Hooks (TCP Layer) ================= */
|
||||
|
||||
/*
|
||||
* 使用 kprobe 挂载 tcp_rcv_established
|
||||
* 此时 skb 包含完整的 TCP 包(Header + Payload),数据在内核态。
|
||||
*/
|
||||
SEC("kprobe/tcp_rcv_established")
|
||||
int BPF_KPROBE(trace_tcp_rcv, struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct replica_event evt = {};
|
||||
__u32 copy_len;
|
||||
// 1. 检查 SSYNC 标志是否已开启 (只在全量同步后开始抓包)
|
||||
__u32 flag_key = FLAG_SSYNC_HAPPENED;
|
||||
__u32 *flag_val = bpf_map_lookup_elem(&flags, &flag_key);
|
||||
if (!flag_val || *flag_val == 0)
|
||||
return 0;
|
||||
|
||||
evt.type = EVENT_COMPLETED_CMD;
|
||||
evt.complete.seq = seq;
|
||||
// 2. 过滤端口 8888
|
||||
// sk->sk_num 存储的是 Host Byte Order 的本地端口
|
||||
__u16 lport = BPF_CORE_READ(sk, __sk_common.skc_num);
|
||||
if (lport != TARGET_PORT)
|
||||
return 0;
|
||||
|
||||
copy_len = len;
|
||||
if (copy_len > MAX_CMD_LEN)
|
||||
copy_len = MAX_CMD_LEN;
|
||||
// 3. 计算数据长度
|
||||
// 在 tcp_rcv_established 中,skb->len 是 (TCP Header + Data) 的长度
|
||||
// skb->data 指向 TCP Header 的起始位置
|
||||
unsigned int skb_len = BPF_CORE_READ(skb, len);
|
||||
|
||||
// 读取 TCP Header 长度 (doff 字段,单位是 4字节)
|
||||
// 需要读取 skb->data 指向的内存的前几个字节来获取 doff
|
||||
unsigned char *skb_data = BPF_CORE_READ(skb, data);
|
||||
|
||||
// 读取 TCP Header 的第 12 个字节 (包含 Data Offset)
|
||||
// Offset 12: Data Offset (4 bits) | Reserved (3 bits) | NS (1 bit)
|
||||
unsigned char doff_byte;
|
||||
if (bpf_probe_read_kernel(&doff_byte, 1, skb_data + 12) < 0)
|
||||
return 0;
|
||||
|
||||
unsigned int tcp_hdr_len = (doff_byte >> 4) * 4;
|
||||
|
||||
// 计算 Payload 长度
|
||||
if (skb_len <= tcp_hdr_len)
|
||||
return 0; // 只有 ACK 没有数据,或者是控制包
|
||||
|
||||
evt.complete.len = copy_len;
|
||||
unsigned int payload_len = skb_len - tcp_hdr_len;
|
||||
|
||||
if (cmd)
|
||||
bpf_probe_read_user(evt.complete.cmd, copy_len, cmd);
|
||||
// 4. 准备 RingBuffer 数据
|
||||
struct replica_event *e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0);
|
||||
if (!e)
|
||||
return 0;
|
||||
|
||||
bpf_perf_event_output(ctx, &events,
|
||||
BPF_F_CURRENT_CPU,
|
||||
&evt, sizeof(evt));
|
||||
e->type = EVENT_COMPLETED_CMD;
|
||||
|
||||
// 截断超长数据
|
||||
if (payload_len > MAX_CMD_LEN)
|
||||
e->complete.len = MAX_CMD_LEN;
|
||||
else
|
||||
e->complete.len = payload_len;
|
||||
|
||||
// 5. 核心修改:使用 bpf_probe_read_kernel 读取数据
|
||||
// 数据起始位置 = skb->data + tcp_hdr_len
|
||||
if (bpf_probe_read_kernel(&e->complete.cmd[0], e->complete.len, skb_data + tcp_hdr_len) < 0) {
|
||||
bpf_ringbuf_discard(e, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
bpf_ringbuf_submit(e, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* __ssync(const uint8_t *ip, uint32_t ip_len, int port, unsigned long long seq); */
|
||||
/* ================= Uprobe Hooks================= */
|
||||
|
||||
SEC("uprobe//home/lian/share/9.1-kvstore/kvstore:__ssync")
|
||||
int BPF_KPROBE(handle_ssync,
|
||||
const __u8 *ip, __u32 ip_len, int port, __u64 seq)
|
||||
const __u8 *ip, __u32 ip_len, int port, __u64 seq_unused)
|
||||
{
|
||||
struct replica_event evt = {};
|
||||
__u32 key = FLAG_SSYNC_HAPPENED;
|
||||
__u32 val = 1;
|
||||
bpf_map_update_elem(&flags, &key, &val, BPF_ANY);
|
||||
|
||||
evt.type = EVENT_SSYNC;
|
||||
evt.sync.seq = seq;
|
||||
evt.sync.port = port;
|
||||
struct replica_event *e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0);
|
||||
if (!e) return 0;
|
||||
|
||||
e->type = EVENT_SSYNC;
|
||||
e->sync.port = port;
|
||||
|
||||
__u32 copy_len = ip_len;
|
||||
if (copy_len > sizeof(evt.sync.ip))
|
||||
copy_len = sizeof(evt.sync.ip);
|
||||
if (copy_len > sizeof(e->sync.ip)) copy_len = sizeof(e->sync.ip);
|
||||
if (ip) bpf_probe_read_user(e->sync.ip, copy_len, ip);
|
||||
|
||||
if (ip)
|
||||
bpf_probe_read_user(evt.sync.ip, copy_len, ip);
|
||||
|
||||
bpf_perf_event_output(ctx, &events,
|
||||
BPF_F_CURRENT_CPU,
|
||||
&evt, sizeof(evt));
|
||||
bpf_ringbuf_submit(e, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* __sready(void); */
|
||||
SEC("uprobe//home/lian/share/9.1-kvstore/kvstore:__sready")
|
||||
int BPF_KPROBE(handle_sready)
|
||||
{
|
||||
struct replica_event evt = {};
|
||||
struct replica_event *e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0);
|
||||
if (!e) return 0;
|
||||
|
||||
evt.type = EVENT_SREADY;
|
||||
|
||||
bpf_perf_event_output(ctx, &events,
|
||||
BPF_F_CURRENT_CPU,
|
||||
&evt, sizeof(evt));
|
||||
e->type = EVENT_SREADY;
|
||||
bpf_ringbuf_submit(e, 0);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,5 +1,3 @@
|
||||
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
@@ -12,6 +10,7 @@
|
||||
#include <arpa/inet.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "replica.h"
|
||||
|
||||
@@ -20,10 +19,9 @@
|
||||
typedef enum {
|
||||
OFFLINE = 0,
|
||||
ONLINE = 1,
|
||||
}replica_state_e ;
|
||||
} replica_state_e;
|
||||
|
||||
struct cmd_node {
|
||||
__u64 seq;
|
||||
__u32 len;
|
||||
uint8_t *cmd;
|
||||
struct cmd_node *next;
|
||||
@@ -32,7 +30,7 @@ struct cmd_node {
|
||||
struct pending_queue {
|
||||
struct cmd_node *head;
|
||||
struct cmd_node *tail;
|
||||
int count;
|
||||
int count;
|
||||
};
|
||||
|
||||
/* ================= 全局状态 ================= */
|
||||
@@ -43,7 +41,6 @@ static int epollfd = -1;
|
||||
|
||||
static char peer_ip[MAX_IP_LEN];
|
||||
static int peer_port;
|
||||
static __u64 peer_seq;
|
||||
|
||||
static struct pending_queue pending = {
|
||||
.head = NULL,
|
||||
@@ -66,7 +63,7 @@ static void pending_free()
|
||||
q->count = 0;
|
||||
}
|
||||
|
||||
static void pending_push(__u64 seq, __u32 len, const uint8_t *cmd)
|
||||
static void pending_push(__u32 len, const uint8_t *cmd)
|
||||
{
|
||||
struct cmd_node *node = malloc(sizeof(*node));
|
||||
if (!node)
|
||||
@@ -79,7 +76,6 @@ static void pending_push(__u64 seq, __u32 len, const uint8_t *cmd)
|
||||
}
|
||||
|
||||
memcpy(node->cmd, cmd, len);
|
||||
node->seq = seq;
|
||||
node->len = len;
|
||||
node->next = NULL;
|
||||
|
||||
@@ -93,72 +89,66 @@ static void pending_push(__u64 seq, __u32 len, const uint8_t *cmd)
|
||||
pending.count++;
|
||||
}
|
||||
|
||||
static void pending_gc(__u64 min_seq)
|
||||
{
|
||||
struct cmd_node *cur = pending.head;
|
||||
|
||||
int n = pending.count;
|
||||
while (cur && cur->seq < min_seq) {
|
||||
struct cmd_node *tmp = cur;
|
||||
cur = cur->next;
|
||||
|
||||
free(tmp->cmd);
|
||||
free(tmp);
|
||||
pending.count--;
|
||||
}
|
||||
|
||||
DEBUGLOG("gc:%d\n", n-pending.count);
|
||||
|
||||
pending.head = cur;
|
||||
if (!cur)
|
||||
pending.tail = NULL;
|
||||
}
|
||||
|
||||
static long long int sendn = 0;
|
||||
static void pending_send_all(void)
|
||||
{
|
||||
struct cmd_node *cur = pending.head;
|
||||
while (cur) {
|
||||
int rt = send(sockfd, cur->cmd, cur->len, 0);
|
||||
int need_out = 0;
|
||||
int sent_count = 0;
|
||||
const int MAX_BATCH = 100; // 批量发送上限,避免阻塞过久
|
||||
|
||||
if(rt == (int)cur->len){
|
||||
while (cur && sent_count < MAX_BATCH) {
|
||||
// 使用 MSG_MORE 合并多个小包
|
||||
int flags = (cur->next && sent_count < MAX_BATCH - 1) ? MSG_MORE : 0;
|
||||
int rt = send(sockfd, cur->cmd, cur->len, flags);
|
||||
|
||||
if (rt == (int)cur->len) {
|
||||
sendn += rt;
|
||||
printf("%s\n", cur->cmd);
|
||||
struct cmd_node *tmp = cur;
|
||||
cur = cur->next;
|
||||
|
||||
free(tmp->cmd);
|
||||
free(tmp);
|
||||
pending.count--;
|
||||
}else{
|
||||
DEBUGLOG("error\n");
|
||||
// 失败:不移动 cur,直接 break
|
||||
if (rt < 0) {
|
||||
pending.head = cur;
|
||||
sent_count++;
|
||||
} else if (rt > 0) {
|
||||
sendn += rt;
|
||||
memmove(cur->cmd, cur->cmd + rt, cur->len - rt);
|
||||
cur->len -= rt;
|
||||
need_out = 1;
|
||||
break;
|
||||
} else {
|
||||
if (errno == EAGAIN || errno == EWOULDBLOCK) {
|
||||
need_out = 1;
|
||||
break;
|
||||
} else {
|
||||
perror("send failed");
|
||||
if (errno == ECONNRESET || errno == EPIPE) {
|
||||
state = OFFLINE;
|
||||
if (sockfd >= 0) {
|
||||
close(sockfd);
|
||||
sockfd = -1;
|
||||
DEBUGLOG("connect closed\n");
|
||||
}
|
||||
} else if (rt == 0) {
|
||||
fprintf(stderr, "send returned 0 (peer closed?)\n");
|
||||
} else {
|
||||
fprintf(stderr, "partial send: %d/%u\n", rt, cur->len);
|
||||
}
|
||||
|
||||
state = OFFLINE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
DEBUGLOG("sendn :%lld\n", sendn);
|
||||
|
||||
pending.head = cur;
|
||||
if(!cur)
|
||||
pending.tail = NULL;
|
||||
if (!cur) pending.tail = NULL;
|
||||
|
||||
if (sockfd >= 0 && state == ONLINE) {
|
||||
struct epoll_event ev = {0};
|
||||
ev.data.fd = sockfd;
|
||||
ev.events = EPOLLIN;
|
||||
if (need_out || pending.head) {
|
||||
ev.events |= EPOLLOUT;
|
||||
}
|
||||
epoll_ctl(epollfd, EPOLL_CTL_MOD, sockfd, &ev);
|
||||
}
|
||||
}
|
||||
|
||||
/* ================= 网络逻辑 ================= */
|
||||
static void try_connect(void)
|
||||
{
|
||||
if(sockfd > 0){
|
||||
if (sockfd > 0) {
|
||||
close(sockfd);
|
||||
sockfd = -1;
|
||||
}
|
||||
@@ -170,14 +160,14 @@ static void try_connect(void)
|
||||
addr.sin_port = htons(peer_port);
|
||||
inet_pton(AF_INET, peer_ip, &addr.sin_addr);
|
||||
|
||||
for(i = 0;i < 10; ++ i){
|
||||
for (i = 0; i < 10; ++i) {
|
||||
sockfd = socket(AF_INET, SOCK_STREAM, 0);
|
||||
if (sockfd < 0) {
|
||||
perror("socket");
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUGLOG("connect try %d...\n", i + 1);
|
||||
DEBUGLOG("connect try %d... %s:%d\n", i + 1, peer_ip, peer_port);
|
||||
if (connect(sockfd, (struct sockaddr *)&addr, sizeof(addr)) == 0) {
|
||||
DEBUGLOG("connect success: %s:%d\n", peer_ip, peer_port);
|
||||
|
||||
@@ -190,7 +180,10 @@ static void try_connect(void)
|
||||
epoll_ctl(epollfd, EPOLL_CTL_ADD, sockfd, &ev);
|
||||
|
||||
state = ONLINE;
|
||||
pending_send_all();
|
||||
if (pending.head) {
|
||||
ev.events = EPOLLIN | EPOLLOUT;
|
||||
epoll_ctl(epollfd, EPOLL_CTL_MOD, sockfd, &ev);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -234,17 +227,10 @@ static void handle_socket_readable(void)
|
||||
static void handle_socket_writable(void)
|
||||
{
|
||||
pending_send_all();
|
||||
if (!pending.head) {
|
||||
struct epoll_event ev;
|
||||
ev.events = EPOLLIN; // 只监听读
|
||||
ev.data.fd = sockfd;
|
||||
epoll_ctl(epollfd, EPOLL_CTL_MOD, sockfd, &ev);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ================= perf buffer 回调 ================= */
|
||||
static void handle_event(void *ctx, int cpu, void *data, __u32 size)
|
||||
/* ================= ring buffer 回调 ================= */
|
||||
static int handle_event(void *ctx, void *data, size_t size)
|
||||
{
|
||||
struct replica_event *evt = data;
|
||||
switch (evt->type) {
|
||||
@@ -252,20 +238,18 @@ static void handle_event(void *ctx, int cpu, void *data, __u32 size)
|
||||
case EVENT_SSYNC:
|
||||
strncpy(peer_ip, evt->sync.ip, sizeof(peer_ip));
|
||||
peer_port = evt->sync.port;
|
||||
peer_seq = evt->sync.seq;
|
||||
DEBUGLOG("SSYNC [seq:%lld], [%s:%d]\n", peer_seq, peer_ip, peer_port);
|
||||
|
||||
DEBUGLOG("SSYNC [%s:%d]\n", peer_ip, peer_port);
|
||||
state = OFFLINE;
|
||||
pending_gc(peer_seq);
|
||||
break;
|
||||
|
||||
case EVENT_COMPLETED_CMD:
|
||||
// DEBUGLOG("CMD [seq:%lld], cmd:\n[\n%s]\n", evt->complete.seq, evt->complete.cmd);
|
||||
pending_push(evt->complete.seq,
|
||||
evt->complete.len,
|
||||
evt->complete.cmd);
|
||||
// 这里收到的可能是半个命令,或者是多个命令的粘包
|
||||
// 但对于转发器来说,只是字节流,直接 push 即可
|
||||
if (evt->complete.len > 0) {
|
||||
pending_push(evt->complete.len, evt->complete.cmd);
|
||||
}
|
||||
|
||||
if (state == ONLINE && sockfd >= 0) {
|
||||
if (state == ONLINE && sockfd >= 0 && pending.head) {
|
||||
struct epoll_event ev;
|
||||
ev.events = EPOLLIN | EPOLLOUT;
|
||||
ev.data.fd = sockfd;
|
||||
@@ -274,82 +258,80 @@ static void handle_event(void *ctx, int cpu, void *data, __u32 size)
|
||||
break;
|
||||
|
||||
case EVENT_SREADY:
|
||||
DEBUGLOG("SREADY \n");
|
||||
DEBUGLOG("SREADY\n");
|
||||
if (state == OFFLINE)
|
||||
try_connect();
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ================= main ================= */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct replica_bpf *skel;
|
||||
struct perf_buffer *pb = NULL;
|
||||
int err;
|
||||
struct replica_bpf *skel;
|
||||
struct ring_buffer *rb = NULL;
|
||||
int err;
|
||||
|
||||
/* Open BPF application */
|
||||
skel = replica_bpf__open();
|
||||
if (!skel) {
|
||||
fprintf(stderr, "Failed to open BPF skeleton\n");
|
||||
return 1;
|
||||
}
|
||||
// 提高 rlimit 以允许加载 BPF
|
||||
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
||||
setrlimit(RLIMIT_MEMLOCK, &r);
|
||||
|
||||
/* Load & verify BPF programs */
|
||||
err = replica_bpf__load(skel);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
|
||||
goto cleanup;
|
||||
}
|
||||
skel = replica_bpf__open();
|
||||
if (!skel) {
|
||||
fprintf(stderr, "Failed to open BPF skeleton\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Attach tracepoint handler */
|
||||
err = replica_bpf__attach(skel);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed to attach BPF skeleton\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
printf("Successfully started! \n");
|
||||
|
||||
|
||||
pb = perf_buffer__new(bpf_map__fd(skel->maps.events), 8,
|
||||
handle_event, NULL, NULL, NULL);
|
||||
|
||||
if(!pb){
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
epollfd = epoll_create1(0);
|
||||
if (epollfd < 0) {
|
||||
fprintf(stderr, "epoll_create1 failed\n");
|
||||
err = replica_bpf__load(skel);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed to load BPF skeleton\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
struct epoll_event events[10];
|
||||
err = replica_bpf__attach(skel);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed to attach BPF skeleton\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
perf_buffer__poll(pb, 1000); // 处理事件
|
||||
printf("Successfully started! Monitoring TCP port 8888 (Kernel Side)...\n");
|
||||
|
||||
rb = ring_buffer__new(bpf_map__fd(skel->maps.rb), handle_event, NULL, NULL);
|
||||
if (!rb) {
|
||||
fprintf(stderr, "Failed to create ring buffer\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
epollfd = epoll_create1(0);
|
||||
// ... (主循环保持不变) ...
|
||||
|
||||
// 主循环建议:
|
||||
while (1) {
|
||||
struct epoll_event events[10];
|
||||
|
||||
if(OFFLINE) continue;
|
||||
// 既然追求性能,Polling 依然是必要的
|
||||
// 10ms 的延迟对于 RingBuffer 消费是可以接受的
|
||||
int poll_timeout = (state == ONLINE) ? 10 : 100;
|
||||
|
||||
ring_buffer__poll(rb, poll_timeout);
|
||||
|
||||
if (state == OFFLINE) continue;
|
||||
|
||||
int nfds = epoll_wait(epollfd, events, 10, 0);
|
||||
for (int i = 0; i < nfds; i++) {
|
||||
if (events[i].data.fd == sockfd) {
|
||||
if (events[i].events & EPOLLIN) {
|
||||
handle_socket_readable(); // 快速消费接收数据
|
||||
}
|
||||
if (events[i].events & EPOLLOUT) {
|
||||
handle_socket_writable(); // 发送数据
|
||||
}
|
||||
if (events[i].events & EPOLLIN) handle_socket_readable();
|
||||
if (events[i].events & EPOLLOUT) handle_socket_writable();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
perf_buffer__free(pb);
|
||||
|
||||
cleanup:
|
||||
pending_free();
|
||||
if (sockfd >= 0) close(sockfd);
|
||||
replica_bpf__destroy(skel);
|
||||
return -err;
|
||||
}
|
||||
// ... (清理代码保持不变) ...
|
||||
if (rb) ring_buffer__free(rb);
|
||||
pending_free();
|
||||
if (sockfd >= 0) close(sockfd);
|
||||
if (epollfd >= 0) close(epollfd);
|
||||
replica_bpf__destroy(skel);
|
||||
return -err;
|
||||
}
|
||||
@@ -1,24 +1,21 @@
|
||||
#ifndef __REPLICA_H__
|
||||
#define __REPLICA_H__
|
||||
|
||||
|
||||
#define MAX_CMD_LEN 256
|
||||
#define MAX_CMD_LEN 4096
|
||||
#define MAX_IP_LEN 64
|
||||
|
||||
enum event_type {
|
||||
EVENT_COMPLETED_CMD,
|
||||
EVENT_SSYNC,
|
||||
EVENT_SREADY,
|
||||
EVENT_COMPLETED_CMD = 1,
|
||||
EVENT_SSYNC = 2,
|
||||
EVENT_SREADY = 3,
|
||||
};
|
||||
|
||||
struct complete_cmd_evt {
|
||||
__u64 seq;
|
||||
__u32 len;
|
||||
__u8 cmd[MAX_CMD_LEN];
|
||||
};
|
||||
|
||||
struct sync_evt {
|
||||
__u64 seq;
|
||||
char ip[MAX_IP_LEN];
|
||||
__s32 port;
|
||||
};
|
||||
@@ -33,5 +30,4 @@ struct replica_event {
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user