主从同步性能优化,主从同步性能测试。
This commit is contained in:
@@ -130,7 +130,7 @@ $(BZS_APPS): $(LIBBLAZESYM_OBJ)
|
||||
# Build application binary
|
||||
$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT)
|
||||
$(call msg,BINARY,$@)
|
||||
$(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@
|
||||
$(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -lrt -lpthread -o $@
|
||||
|
||||
# delete failed targets
|
||||
.DELETE_ON_ERROR:
|
||||
|
||||
@@ -15,66 +15,46 @@ struct {
|
||||
__uint(value_size, sizeof(int));
|
||||
} events SEC(".maps");
|
||||
|
||||
/* __completed_cmd(const uint8_t *cmd, size_t len, unsigned long long seq); */
|
||||
SEC("uprobe//home/lian/share/9.1-kvstore/kvstore:__completed_cmd")
|
||||
int BPF_KPROBE(handle_completed_cmd,
|
||||
const __u8 *cmd, size_t len, __u64 seq)
|
||||
{
|
||||
struct replica_event evt = {};
|
||||
__u32 copy_len;
|
||||
// 1) notify: __replica_notify(seq, off, len)
|
||||
// SEC("uprobe//home/lian/share/9.1-kvstore/kvstore:__replica_notify")
|
||||
// int BPF_KPROBE(handle_replica_notify, __u64 seq, __u32 off, __u32 len)
|
||||
// {
|
||||
// struct replica_event evt = {};
|
||||
// evt.type = EVENT_CMD_META;
|
||||
// evt.meta.seq = seq;
|
||||
// evt.meta.off = off;
|
||||
// evt.meta.len = len;
|
||||
|
||||
evt.type = EVENT_COMPLETED_CMD;
|
||||
evt.complete.seq = seq;
|
||||
// bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &evt, sizeof(evt));
|
||||
// return 0;
|
||||
// }
|
||||
|
||||
copy_len = len;
|
||||
if (copy_len > MAX_CMD_LEN)
|
||||
copy_len = MAX_CMD_LEN;
|
||||
|
||||
evt.complete.len = copy_len;
|
||||
|
||||
if (cmd)
|
||||
bpf_probe_read_user(evt.complete.cmd, copy_len, cmd);
|
||||
|
||||
bpf_perf_event_output(ctx, &events,
|
||||
BPF_F_CURRENT_CPU,
|
||||
&evt, sizeof(evt));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* __ssync(const uint8_t *ip, uint32_t ip_len, int port, unsigned long long seq); */
|
||||
// 2) ssync: __ssync(ip, ip_len, port, seq)
|
||||
SEC("uprobe//home/lian/share/9.1-kvstore/kvstore:__ssync")
|
||||
int BPF_KPROBE(handle_ssync,
|
||||
const __u8 *ip, __u32 ip_len, int port, __u64 seq)
|
||||
int BPF_KPROBE(handle_ssync, const __u8 *ip, __u32 ip_len, int port, __u64 seq)
|
||||
{
|
||||
struct replica_event evt = {};
|
||||
|
||||
evt.type = EVENT_SSYNC;
|
||||
evt.sync.seq = seq;
|
||||
evt.sync.port = port;
|
||||
|
||||
__u32 copy_len = ip_len;
|
||||
if (copy_len > sizeof(evt.sync.ip))
|
||||
copy_len = sizeof(evt.sync.ip);
|
||||
if (copy_len > MAX_IP_LEN) copy_len = MAX_IP_LEN;
|
||||
evt.sync.ip_len = copy_len;
|
||||
|
||||
if (ip)
|
||||
bpf_probe_read_user(evt.sync.ip, copy_len, ip);
|
||||
|
||||
bpf_perf_event_output(ctx, &events,
|
||||
BPF_F_CURRENT_CPU,
|
||||
&evt, sizeof(evt));
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &evt, sizeof(evt));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* __sready(void); */
|
||||
// 3) sready: __sready()
|
||||
SEC("uprobe//home/lian/share/9.1-kvstore/kvstore:__sready")
|
||||
int BPF_KPROBE(handle_sready)
|
||||
{
|
||||
struct replica_event evt = {};
|
||||
|
||||
evt.type = EVENT_SREADY;
|
||||
|
||||
bpf_perf_event_output(ctx, &events,
|
||||
BPF_F_CURRENT_CPU,
|
||||
&evt, sizeof(evt));
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &evt, sizeof(evt));
|
||||
return 0;
|
||||
}
|
||||
559
ebpf/c/replica.c
559
ebpf/c/replica.c
@@ -10,202 +10,474 @@
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "replica_shm.h"
|
||||
#include "replica.h"
|
||||
|
||||
typedef enum {
|
||||
OFFLINE = 0,
|
||||
ONLINE = 1,
|
||||
}replica_state_e ;
|
||||
#define DEBUGLOG(...) fprintf(stderr, __VA_ARGS__)
|
||||
|
||||
struct cmd_node {
|
||||
__u64 seq;
|
||||
__u32 len;
|
||||
uint8_t *cmd;
|
||||
struct cmd_node *next;
|
||||
};
|
||||
/* ============================================================ */
|
||||
#define REPLICA_SHM_MAGIC 0x52504C43u /* 'RPLC' */
|
||||
#define REPLICA_SHM_VER 1
|
||||
|
||||
static inline uint64_t align8_u64(uint64_t x) { return (x + 7u) & ~7ull; }
|
||||
|
||||
int replica_shm_open(replica_shm_t *s, const char *name, size_t total_size, int create)
|
||||
{
|
||||
if (!s || !name || total_size < (sizeof(replica_shm_hdr_t) + 4096)) return -EINVAL;
|
||||
memset(s, 0, sizeof(*s));
|
||||
|
||||
int flags = O_RDWR;
|
||||
if (create) flags |= O_CREAT;
|
||||
|
||||
int fd = shm_open(name, flags, 0666);
|
||||
if (fd < 0) return -errno;
|
||||
|
||||
if (create) {
|
||||
if (ftruncate(fd, (off_t)total_size) != 0) {
|
||||
int e = -errno; close(fd); return e;
|
||||
}
|
||||
}
|
||||
|
||||
void *p = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if (p == MAP_FAILED) {
|
||||
int e = -errno; close(fd); return e;
|
||||
}
|
||||
|
||||
s->fd = fd;
|
||||
s->map_size = total_size;
|
||||
s->hdr = (replica_shm_hdr_t *)p;
|
||||
s->data = (uint8_t *)p + sizeof(replica_shm_hdr_t);
|
||||
|
||||
// 初始化头
|
||||
if (create || s->hdr->magic != REPLICA_SHM_MAGIC) {
|
||||
memset(s->hdr, 0, sizeof(*s->hdr));
|
||||
s->hdr->magic = REPLICA_SHM_MAGIC;
|
||||
s->hdr->version = REPLICA_SHM_VER;
|
||||
s->hdr->capacity = total_size - sizeof(replica_shm_hdr_t);
|
||||
s->hdr->write_off = 0;
|
||||
s->hdr->last_seq = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int replica_shm_peek(replica_shm_t *s, uint32_t off, replica_rec_hdr_t *out_hdr)
|
||||
{
|
||||
if (!s || !s->hdr || !s->data || !out_hdr) return -EINVAL;
|
||||
if ((uint64_t)off + sizeof(replica_rec_hdr_t) > s->hdr->capacity) return -EINVAL;
|
||||
memcpy(out_hdr, s->data + off, sizeof(*out_hdr));
|
||||
return 0;
|
||||
}
|
||||
|
||||
void replica_shm_close(replica_shm_t *s)
|
||||
{
|
||||
if (!s) return;
|
||||
if (s->hdr && s->map_size) munmap(s->hdr, s->map_size);
|
||||
if (s->fd > 0) close(s->fd);
|
||||
memset(s, 0, sizeof(*s));
|
||||
}
|
||||
|
||||
/* ================================================================================*/
|
||||
|
||||
struct pending_queue {
|
||||
struct cmd_node *head;
|
||||
struct cmd_node *tail;
|
||||
int count;
|
||||
};
|
||||
|
||||
/* ================= 全局状态 ================= */
|
||||
|
||||
static replica_state_e state = OFFLINE;
|
||||
static int sockfd = -1;
|
||||
#define DEBUGLOG(...) fprintf(stderr, __VA_ARGS__)
|
||||
|
||||
static replica_shm_t g_shm;
|
||||
static int g_sockfd = -1;
|
||||
|
||||
static char peer_ip[MAX_IP_LEN];
|
||||
static int peer_port;
|
||||
static __u64 peer_seq;
|
||||
static int peer_port = 0;
|
||||
static uint64_t SYNC_SEQ = 0;
|
||||
|
||||
static struct pending_queue pending = {
|
||||
.head = NULL,
|
||||
.tail = NULL,
|
||||
.count = 0,
|
||||
static uint64_t local_seq = 0;
|
||||
static uint32_t read_off = 0;
|
||||
|
||||
static pthread_t reader_thread;
|
||||
static pthread_t sender_thread;
|
||||
static volatile int should_stop = 0;
|
||||
|
||||
/* ================= ================= */
|
||||
struct send_node {
|
||||
uint8_t *data;
|
||||
uint32_t len;
|
||||
uint32_t sent;
|
||||
struct send_node *next;
|
||||
};
|
||||
|
||||
/* ================= pending 队列操作 ================= */
|
||||
static void pending_free()
|
||||
static struct {
|
||||
struct send_node *head;
|
||||
struct send_node *tail;
|
||||
int count;
|
||||
pthread_mutex_t lock;
|
||||
pthread_cond_t not_empty;
|
||||
} sendq = {
|
||||
.lock = PTHREAD_MUTEX_INITIALIZER,
|
||||
.not_empty = PTHREAD_COND_INITIALIZER
|
||||
};
|
||||
|
||||
static void sendq_free_all(void)
|
||||
{
|
||||
struct pending_queue *q = &pending;
|
||||
struct cmd_node *cur = q->head;
|
||||
while (cur) {
|
||||
struct cmd_node *tmp = cur;
|
||||
cur = cur->next;
|
||||
free(tmp->cmd);
|
||||
free(tmp);
|
||||
pthread_mutex_lock(&sendq.lock);
|
||||
struct send_node *c = sendq.head;
|
||||
while (c) {
|
||||
struct send_node *n = c->next;
|
||||
free(c->data);
|
||||
free(c);
|
||||
c = n;
|
||||
}
|
||||
q->head = q->tail = NULL;
|
||||
q->count = 0;
|
||||
sendq.head = sendq.tail = NULL;
|
||||
sendq.count = 0;
|
||||
pthread_mutex_unlock(&sendq.lock);
|
||||
}
|
||||
|
||||
static void pending_push(__u64 seq, __u32 len, const uint8_t *cmd)
|
||||
{
|
||||
struct cmd_node *node = malloc(sizeof(*node));
|
||||
if (!node)
|
||||
return;
|
||||
|
||||
node->cmd = malloc(len);
|
||||
if (!node->cmd) {
|
||||
free(node);
|
||||
static void sendq_push(uint8_t *data, uint32_t len)
|
||||
{
|
||||
struct send_node *n = (struct send_node *)malloc(sizeof(*n));
|
||||
if (!n) {
|
||||
free(data);
|
||||
return;
|
||||
}
|
||||
n->data = data;
|
||||
n->len = len;
|
||||
n->sent = 0;
|
||||
n->next = NULL;
|
||||
|
||||
memcpy(node->cmd, cmd, len);
|
||||
node->seq = seq;
|
||||
node->len = len;
|
||||
node->next = NULL;
|
||||
pthread_mutex_lock(&sendq.lock);
|
||||
|
||||
if (!pending.tail) {
|
||||
pending.head = pending.tail = node;
|
||||
if (!sendq.tail) {
|
||||
sendq.head = sendq.tail = n;
|
||||
} else {
|
||||
pending.tail->next = node;
|
||||
pending.tail = node;
|
||||
sendq.tail->next = n;
|
||||
sendq.tail = n;
|
||||
}
|
||||
|
||||
pending.count++;
|
||||
sendq.count++;
|
||||
pthread_cond_signal(&sendq.not_empty);
|
||||
pthread_mutex_unlock(&sendq.lock);
|
||||
}
|
||||
|
||||
static void pending_gc(__u64 min_seq)
|
||||
static void sendq_pop(void)
|
||||
{
|
||||
struct cmd_node *cur = pending.head;
|
||||
|
||||
int n = pending.count;
|
||||
while (cur && cur->seq < min_seq) {
|
||||
struct cmd_node *tmp = cur;
|
||||
cur = cur->next;
|
||||
|
||||
free(tmp->cmd);
|
||||
free(tmp);
|
||||
pending.count--;
|
||||
}
|
||||
|
||||
printf("gc:%d\n", n-pending.count);
|
||||
|
||||
pending.head = cur;
|
||||
if (!cur)
|
||||
pending.tail = NULL;
|
||||
if (!sendq.head) return;
|
||||
struct send_node *n = sendq.head;
|
||||
sendq.head = n->next;
|
||||
if (!sendq.head) sendq.tail = NULL;
|
||||
free(n->data);
|
||||
free(n);
|
||||
sendq.count--;
|
||||
}
|
||||
|
||||
static void pending_send_one(struct cmd_node *node){
|
||||
int rt = send(sockfd, node->cmd, node->len, 0);
|
||||
printf("send seq:%lld, rt=%d\n", node->seq, rt);
|
||||
}
|
||||
|
||||
static void pending_send_all(void)
|
||||
/* ================= Reader 线程:读共享内存 ================= */
|
||||
static void* reader_thread_func(void *arg)
|
||||
{
|
||||
struct cmd_node *cur = pending.head;
|
||||
|
||||
while (cur) {
|
||||
pending_send_one(cur);
|
||||
cur = cur->next;
|
||||
(void)arg;
|
||||
|
||||
DEBUGLOG("Reader thread started\n");
|
||||
|
||||
while (!should_stop) {
|
||||
replica_rec_hdr_t h;
|
||||
|
||||
uint64_t last = __atomic_load_n(&g_shm.hdr->last_seq, __ATOMIC_ACQUIRE);
|
||||
if (local_seq > last) {
|
||||
// 没有新数据,短暂休眠避免空转
|
||||
continue;
|
||||
}
|
||||
if (read_off+ sizeof(replica_rec_hdr_t) >= g_shm.hdr->capacity) {
|
||||
DEBUGLOG("Reader: read_off overflow, reset\n");
|
||||
// read_off = 0;
|
||||
break;
|
||||
// continue;
|
||||
}
|
||||
|
||||
if (replica_shm_peek(&g_shm, read_off, &h) != 0) {
|
||||
DEBUGLOG("Reader: peek failed at %u\n", read_off);
|
||||
break;
|
||||
// continue;
|
||||
}
|
||||
|
||||
// 检测 wrap
|
||||
if (h.len == 0) {
|
||||
DEBUGLOG("Reader: wrap at offset %u\n", read_off);
|
||||
read_off = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 跳过 SYNC_SEQ 之前的
|
||||
if (h.seq < SYNC_SEQ) {
|
||||
uint64_t step = align8_u64((uint64_t)sizeof(replica_rec_hdr_t) + (uint64_t)h.len);
|
||||
if (read_off + step > g_shm.hdr->capacity) {
|
||||
read_off = 0;
|
||||
} else {
|
||||
read_off += (uint32_t)step;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// 序列号检查
|
||||
if (h.seq != local_seq) {
|
||||
DEBUGLOG("Reader: seq mismatch! h.seq=%lu, local_seq=%lu, off=%u\n",
|
||||
h.seq, local_seq, read_off);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 读取数据
|
||||
uint8_t *buf = (uint8_t *)malloc(h.len);
|
||||
if (!buf) {
|
||||
DEBUGLOG("Reader: malloc failed\n");
|
||||
usleep(1000);
|
||||
continue;
|
||||
}
|
||||
|
||||
memcpy(buf, g_shm.data + read_off + sizeof(replica_rec_hdr_t), h.len);
|
||||
sendq_push(buf, h.len);
|
||||
|
||||
uint64_t step = align8_u64((uint64_t)sizeof(replica_rec_hdr_t) + (uint64_t)h.len);
|
||||
if (read_off + step > g_shm.hdr->capacity) {
|
||||
read_off = 0;
|
||||
} else {
|
||||
read_off += (uint32_t)step;
|
||||
}
|
||||
|
||||
local_seq++;
|
||||
}
|
||||
|
||||
DEBUGLOG("Reader thread stopped\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* ================= Sender 线程:发送数据 ================= */
|
||||
static void* sender_thread_func(void *arg)
|
||||
{
|
||||
(void)arg;
|
||||
|
||||
DEBUGLOG("Sender thread started\n");
|
||||
|
||||
int epfd = epoll_create1(0);
|
||||
if (epfd < 0) {
|
||||
perror("epoll_create1");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct epoll_event ev;
|
||||
memset(&ev, 0, sizeof(ev));
|
||||
ev.events = EPOLLIN | EPOLLOUT;
|
||||
ev.data.fd = g_sockfd;
|
||||
|
||||
if (epoll_ctl(epfd, EPOLL_CTL_ADD, g_sockfd, &ev) != 0) {
|
||||
perror("epoll_ctl ADD");
|
||||
close(epfd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
while (!should_stop && g_sockfd >= 0) {
|
||||
struct epoll_event events[4];
|
||||
int nfds = epoll_wait(epfd, events, 4, 100); // 100ms timeout
|
||||
|
||||
if (nfds < 0) {
|
||||
if (errno == EINTR) continue;
|
||||
perror("epoll_wait");
|
||||
break;
|
||||
}
|
||||
|
||||
for (int i = 0; i < nfds; i++) {
|
||||
if (events[i].data.fd != g_sockfd)
|
||||
continue;
|
||||
|
||||
if (events[i].events & (EPOLLERR | EPOLLHUP)) {
|
||||
DEBUGLOG("Sender: EPOLLERR/EPOLLHUP\n");
|
||||
close(g_sockfd);
|
||||
g_sockfd = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (events[i].events & EPOLLIN) {
|
||||
char buf[4096];
|
||||
recv(g_sockfd, buf, sizeof(buf), 0);
|
||||
}
|
||||
|
||||
if (events[i].events & EPOLLOUT) {
|
||||
pthread_mutex_lock(&sendq.lock);
|
||||
|
||||
while (sendq.head) {
|
||||
struct send_node *n = sendq.head;
|
||||
|
||||
pthread_mutex_unlock(&sendq.lock);
|
||||
|
||||
int nbytes = send(g_sockfd, n->data + n->sent,
|
||||
(int)(n->len - n->sent), MSG_NOSIGNAL);
|
||||
|
||||
pthread_mutex_lock(&sendq.lock);
|
||||
|
||||
if (nbytes > 0) {
|
||||
n->sent += (uint32_t)nbytes;
|
||||
if (n->sent == n->len) {
|
||||
sendq_pop();
|
||||
continue;
|
||||
}
|
||||
// partial send
|
||||
break;
|
||||
}
|
||||
|
||||
if (nbytes < 0) {
|
||||
if (errno == EAGAIN || errno == EWOULDBLOCK) {
|
||||
break;
|
||||
}
|
||||
DEBUGLOG("Sender: send error errno=%d\n", errno);
|
||||
pthread_mutex_unlock(&sendq.lock);
|
||||
close(g_sockfd);
|
||||
g_sockfd = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
// nbytes == 0
|
||||
DEBUGLOG("Sender: send returned 0\n");
|
||||
pthread_mutex_unlock(&sendq.lock);
|
||||
close(g_sockfd);
|
||||
g_sockfd = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&sendq.lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
close(epfd);
|
||||
DEBUGLOG("Sender thread stopped\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* ================= 网络逻辑 ================= */
|
||||
static void try_connect(void)
|
||||
static int connect_peer(void)
|
||||
{
|
||||
if(sockfd > 0){
|
||||
close(sockfd);
|
||||
sockfd = -1;
|
||||
if (peer_port <= 0 || peer_ip[0] == '\0')
|
||||
return -1;
|
||||
|
||||
if (g_sockfd >= 0) {
|
||||
close(g_sockfd);
|
||||
g_sockfd = -1;
|
||||
}
|
||||
|
||||
struct sockaddr_in addr = {};
|
||||
int i = 0;
|
||||
|
||||
addr.sin_family = AF_INET;
|
||||
addr.sin_port = htons(peer_port);
|
||||
inet_pton(AF_INET, peer_ip, &addr.sin_addr);
|
||||
|
||||
for(i = 0;i < 10; ++ i){
|
||||
sockfd = socket(AF_INET, SOCK_STREAM, 0);
|
||||
if (sockfd < 0) {
|
||||
perror("socket");
|
||||
return;
|
||||
}
|
||||
|
||||
printf("connect try %d...\n", i + 1);
|
||||
if (connect(sockfd, (struct sockaddr *)&addr, sizeof(addr)) == 0) {
|
||||
printf("connect success: %s:%d\n", peer_ip, peer_port);
|
||||
state = ONLINE;
|
||||
pending_send_all();
|
||||
return;
|
||||
}
|
||||
|
||||
perror("connect");
|
||||
close(sockfd);
|
||||
sockfd = -1;
|
||||
|
||||
sleep(1);
|
||||
int fd = socket(AF_INET, SOCK_STREAM, 0);
|
||||
if (fd < 0) {
|
||||
perror("socket");
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("connect failed after 10 retries\n");
|
||||
|
||||
struct sockaddr_in a;
|
||||
memset(&a, 0, sizeof(a));
|
||||
a.sin_family = AF_INET;
|
||||
a.sin_port = htons(peer_port);
|
||||
if (inet_pton(AF_INET, peer_ip, &a.sin_addr) != 1) {
|
||||
DEBUGLOG("inet_pton failed for ip=%s\n", peer_ip);
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (connect(fd, (struct sockaddr *)&a, sizeof(a)) != 0) {
|
||||
// 这里可以重试;按你的要求先简单返回失败
|
||||
// perror("connect");
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// non-blocking(配合 epoll)
|
||||
int flags = fcntl(fd, F_GETFL, 0);
|
||||
if (flags >= 0) fcntl(fd, F_SETFL, flags | O_NONBLOCK);
|
||||
|
||||
g_sockfd = fd;
|
||||
DEBUGLOG("connect ok %s:%d\n", peer_ip, peer_port);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ================= perf buffer 回调 ================= */
|
||||
static void handle_event(void *ctx, int cpu, void *data, __u32 size)
|
||||
{
|
||||
struct replica_event *evt = data;
|
||||
(void)ctx; (void)cpu;
|
||||
if (size < sizeof(struct replica_event)) return;
|
||||
|
||||
switch (evt->type) {
|
||||
struct replica_event *e = (struct replica_event*)data;
|
||||
|
||||
case EVENT_SSYNC:
|
||||
strncpy(peer_ip, evt->sync.ip, sizeof(peer_ip));
|
||||
peer_port = evt->sync.port;
|
||||
peer_seq = evt->sync.seq;
|
||||
printf("SSYNC [seq:%lld], [%s:%d]\n", peer_seq, peer_ip, peer_port);
|
||||
if (e->type == EVENT_SSYNC) {
|
||||
memset(peer_ip, 0, sizeof(peer_ip));
|
||||
memcpy(peer_ip, e->sync.ip, e->sync.ip_len);
|
||||
peer_port = e->sync.port;
|
||||
SYNC_SEQ = e->sync.seq;
|
||||
|
||||
state = OFFLINE;
|
||||
pending_gc(peer_seq);
|
||||
break;
|
||||
local_seq = SYNC_SEQ;
|
||||
read_off = 0;
|
||||
|
||||
case EVENT_COMPLETED_CMD:
|
||||
// printf("CMD [seq:%lld], cmd:\n[\n%s]\n", evt->complete.seq, evt->complete.cmd);
|
||||
pending_push(evt->complete.seq,
|
||||
evt->complete.len,
|
||||
evt->complete.cmd);
|
||||
DEBUGLOG("SSYNC: peer=%s:%d SYNC_SEQ=%llu\n",
|
||||
peer_ip, peer_port, (unsigned long long)SYNC_SEQ);
|
||||
|
||||
if (state == ONLINE && pending.tail) {
|
||||
struct cmd_node *n = pending.tail;
|
||||
pending_send_one(n);
|
||||
// 停止旧线程
|
||||
should_stop = 1;
|
||||
if (reader_thread) {
|
||||
pthread_join(reader_thread, NULL);
|
||||
reader_thread = 0;
|
||||
}
|
||||
break;
|
||||
if (sender_thread) {
|
||||
pthread_join(sender_thread, NULL);
|
||||
sender_thread = 0;
|
||||
}
|
||||
|
||||
if (g_sockfd >= 0) {
|
||||
close(g_sockfd);
|
||||
g_sockfd = -1;
|
||||
}
|
||||
sendq_free_all();
|
||||
return;
|
||||
}
|
||||
|
||||
case EVENT_SREADY:
|
||||
printf("SREADY \n");
|
||||
if (state == OFFLINE)
|
||||
try_connect();
|
||||
break;
|
||||
if (e->type == EVENT_SREADY) {
|
||||
DEBUGLOG("SREADY\n");
|
||||
|
||||
if (connect_peer() != 0) {
|
||||
DEBUGLOG("connect_peer failed\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// 启动双线程
|
||||
should_stop = 0;
|
||||
|
||||
if (pthread_create(&reader_thread, NULL, reader_thread_func, NULL) != 0) {
|
||||
perror("pthread_create reader");
|
||||
return;
|
||||
}
|
||||
|
||||
if (pthread_create(&sender_thread, NULL, sender_thread_func, NULL) != 0) {
|
||||
perror("pthread_create sender");
|
||||
pthread_cancel(reader_thread);
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUGLOG("Reader and Sender threads started\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* ================= main ================= */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
|
||||
int rc = replica_shm_open(&g_shm, REPLICA_SHM_NAME, REPLICA_SHM_SIZE, 0);
|
||||
if (rc != 0) {
|
||||
fprintf(stderr, "replica_shm_open failed rc=%d (did you create it in kvstore?)\n", rc);
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct replica_bpf *skel;
|
||||
struct perf_buffer *pb = NULL;
|
||||
int err;
|
||||
@@ -231,8 +503,7 @@ int main(int argc, char **argv)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
printf("Successfully started! Please run `sudo cat /sys/kernel/debug/tracing/trace_pipe` "
|
||||
"to see output of the BPF programs.\n");
|
||||
printf("Successfully started! \n");
|
||||
|
||||
|
||||
pb = perf_buffer__new(bpf_map__fd(skel->maps.events), 8,
|
||||
@@ -244,13 +515,19 @@ int main(int argc, char **argv)
|
||||
|
||||
while (1) {
|
||||
perf_buffer__poll(pb, 1000); // 处理事件
|
||||
|
||||
|
||||
}
|
||||
|
||||
perf_buffer__free(pb);
|
||||
|
||||
cleanup:
|
||||
pending_free();
|
||||
if (sockfd >= 0) close(sockfd);
|
||||
should_stop = 1;
|
||||
if (reader_thread) pthread_join(reader_thread, NULL);
|
||||
if (sender_thread) pthread_join(sender_thread, NULL);
|
||||
if (g_sockfd >= 0) close(g_sockfd);
|
||||
replica_shm_close(&g_shm);
|
||||
sendq_free_all();
|
||||
replica_bpf__destroy(skel);
|
||||
return -err;
|
||||
}
|
||||
|
||||
@@ -2,34 +2,25 @@
|
||||
#define __REPLICA_H__
|
||||
|
||||
|
||||
#define MAX_CMD_LEN 256
|
||||
#define MAX_IP_LEN 64
|
||||
|
||||
enum event_type {
|
||||
EVENT_COMPLETED_CMD,
|
||||
EVENT_SSYNC,
|
||||
EVENT_SREADY,
|
||||
};
|
||||
#define MAX_IP_LEN 64
|
||||
|
||||
struct complete_cmd_evt {
|
||||
__u64 seq;
|
||||
__u32 len;
|
||||
__u8 cmd[MAX_CMD_LEN];
|
||||
};
|
||||
|
||||
struct sync_evt {
|
||||
__u64 seq;
|
||||
char ip[MAX_IP_LEN];
|
||||
__s32 port;
|
||||
enum {
|
||||
EVENT_SSYNC = 1,
|
||||
EVENT_SREADY = 2,
|
||||
};
|
||||
|
||||
struct replica_event {
|
||||
__u32 type;
|
||||
__u32 _pad;
|
||||
uint32_t type;
|
||||
uint32_t _pad;
|
||||
|
||||
union {
|
||||
struct complete_cmd_evt complete;
|
||||
struct sync_evt sync;
|
||||
struct {
|
||||
uint64_t seq; // SYNC_SEQ:从这个 seq 开始增量
|
||||
int32_t port;
|
||||
uint32_t ip_len;
|
||||
char ip[MAX_IP_LEN];
|
||||
} sync;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
59
ebpf/c/replica_shm.h
Normal file
59
ebpf/c/replica_shm.h
Normal file
@@ -0,0 +1,59 @@
|
||||
#ifndef __REPLICA_SHM_H__
|
||||
#define __REPLICA_SHM_H__
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#ifndef REPLICA_SHM_NAME
|
||||
#define REPLICA_SHM_NAME "/kvs_replica_shm"
|
||||
#endif
|
||||
|
||||
#ifndef REPLICA_SHM_SIZE
|
||||
// 64MB,按需调
|
||||
#define REPLICA_SHM_SIZE (256u * 1024u * 1024u)
|
||||
#endif
|
||||
|
||||
// 每条记录头部(放在 shm 的 data 区里)
|
||||
typedef struct __attribute__((packed)) {
|
||||
uint64_t seq; // 单调递增
|
||||
uint32_t len; // payload bytes
|
||||
uint32_t flags; // 预留:压缩、类型等
|
||||
uint32_t crc32; // 可选:0 表示不校验
|
||||
uint32_t reserved; // 对齐
|
||||
// uint8_t payload[len] 紧跟其后
|
||||
} replica_rec_hdr_t;
|
||||
|
||||
// shm 顶部元数据
|
||||
typedef struct __attribute__((packed)) {
|
||||
uint32_t magic;
|
||||
uint32_t version;
|
||||
uint64_t capacity; // data 区大小(字节)
|
||||
uint64_t write_off; // producer 写指针(0..capacity-1)
|
||||
uint64_t last_seq; // producer 最新 seq
|
||||
uint8_t _pad[64]; // cacheline padding
|
||||
// 后面紧跟 data[capacity]
|
||||
} replica_shm_hdr_t;
|
||||
|
||||
typedef struct {
|
||||
int fd;
|
||||
size_t map_size;
|
||||
replica_shm_hdr_t *hdr;
|
||||
uint8_t *data;
|
||||
} replica_shm_t;
|
||||
|
||||
// kvstore: 初始化(create/open + mmap)
|
||||
int replica_shm_open(replica_shm_t *s, const char *name, size_t total_size, int create);
|
||||
|
||||
// kvstore: append 一条记录,返回 off(相对 data 起始),用于 notify
|
||||
// 单写者设计:无需锁。返回 0 成功,<0 失败(空间不足或参数错误)
|
||||
int replica_shm_append(replica_shm_t *s, uint64_t seq, const void *buf, uint32_t len, uint32_t *out_off);
|
||||
|
||||
// replicator: 读取记录头(不移动游标),你也可以直接 memcpy payload
|
||||
// off 是 data 内偏移
|
||||
int replica_shm_peek(replica_shm_t *s, uint32_t off, replica_rec_hdr_t *out_hdr);
|
||||
|
||||
// 关闭
|
||||
void replica_shm_close(replica_shm_t *s);
|
||||
|
||||
|
||||
extern replica_shm_t g_rep_shm;
|
||||
#endif
|
||||
Reference in New Issue
Block a user