// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* Copyright (c) 2020 Facebook */ #include #include #include #include #include #include #include "replica.skel.h" #include #include #include #include #include #include "replica.h" #define DEBUGLOG printf typedef enum { OFFLINE = 0, ONLINE = 1, }replica_state_e ; struct cmd_node { __u64 seq; __u32 len; uint8_t *cmd; struct cmd_node *next; }; struct pending_queue { struct cmd_node *head; struct cmd_node *tail; int count; }; /* ================= 全局状态 ================= */ static replica_state_e state = OFFLINE; static int sockfd = -1; static int epollfd = -1; static char peer_ip[MAX_IP_LEN]; static int peer_port; static __u64 peer_seq; static struct pending_queue pending = { .head = NULL, .tail = NULL, .count = 0, }; /* ================= pending 队列操作 ================= */ static void pending_free() { struct pending_queue *q = &pending; struct cmd_node *cur = q->head; while (cur) { struct cmd_node *tmp = cur; cur = cur->next; free(tmp->cmd); free(tmp); } q->head = q->tail = NULL; q->count = 0; } static void pending_push(__u64 seq, __u32 len, const uint8_t *cmd) { struct cmd_node *node = malloc(sizeof(*node)); if (!node) return; node->cmd = malloc(len); if (!node->cmd) { free(node); return; } memcpy(node->cmd, cmd, len); node->seq = seq; node->len = len; node->next = NULL; if (!pending.tail) { pending.head = pending.tail = node; } else { pending.tail->next = node; pending.tail = node; } pending.count++; } static void pending_gc(__u64 min_seq) { struct cmd_node *cur = pending.head; int n = pending.count; while (cur && cur->seq < min_seq) { struct cmd_node *tmp = cur; cur = cur->next; free(tmp->cmd); free(tmp); pending.count--; } DEBUGLOG("gc:%d\n", n-pending.count); pending.head = cur; if (!cur) pending.tail = NULL; } static void pending_send_all(void) { struct cmd_node *cur = pending.head; while (cur) { int rt = send(sockfd, cur->cmd, cur->len, 0); if(rt == (int)cur->len){ struct cmd_node *tmp = cur; cur = cur->next; free(tmp->cmd); free(tmp); pending.count--; }else{ DEBUGLOG("error\n"); // 失败:不移动 cur,直接 break if (rt < 0) { perror("send failed"); if (errno == ECONNRESET || errno == EPIPE) { state = OFFLINE; if (sockfd >= 0) { close(sockfd); sockfd = -1; DEBUGLOG("connect closed\n"); } } else if (rt == 0) { fprintf(stderr, "send returned 0 (peer closed?)\n"); } else { fprintf(stderr, "partial send: %d/%u\n", rt, cur->len); } break; } } } pending.head = cur; if(!cur) pending.tail = NULL; } /* ================= 网络逻辑 ================= */ static void try_connect(void) { if(sockfd > 0){ close(sockfd); sockfd = -1; } struct sockaddr_in addr = {}; int i = 0; addr.sin_family = AF_INET; addr.sin_port = htons(peer_port); inet_pton(AF_INET, peer_ip, &addr.sin_addr); for(i = 0;i < 10; ++ i){ sockfd = socket(AF_INET, SOCK_STREAM, 0); if (sockfd < 0) { perror("socket"); return; } DEBUGLOG("connect try %d...\n", i + 1); if (connect(sockfd, (struct sockaddr *)&addr, sizeof(addr)) == 0) { DEBUGLOG("connect success: %s:%d\n", peer_ip, peer_port); int flags = fcntl(sockfd, F_GETFL, 0); fcntl(sockfd, F_SETFL, flags | O_NONBLOCK); struct epoll_event ev; ev.events = EPOLLIN; ev.data.fd = sockfd; epoll_ctl(epollfd, EPOLL_CTL_ADD, sockfd, &ev); state = ONLINE; pending_send_all(); return; } perror("connect"); close(sockfd); sockfd = -1; sleep(1); } DEBUGLOG("connect failed after 10 retries\n"); } static void handle_socket_readable(void) { char buf[65536]; while (1) { int n = recv(sockfd, buf, sizeof(buf), MSG_DONTWAIT); if (n > 0) { continue; } else if (n == 0) { state = OFFLINE; epoll_ctl(epollfd, EPOLL_CTL_DEL, sockfd, NULL); close(sockfd); sockfd = -1; DEBUGLOG("connection closed\n"); break; } else { if (errno == EAGAIN || errno == EWOULDBLOCK) { break; } perror("recv"); state = OFFLINE; epoll_ctl(epollfd, EPOLL_CTL_DEL, sockfd, NULL); close(sockfd); sockfd = -1; break; } } } static void handle_socket_writable(void) { pending_send_all(); if (!pending.head) { struct epoll_event ev; ev.events = EPOLLIN; // 只监听读 ev.data.fd = sockfd; epoll_ctl(epollfd, EPOLL_CTL_MOD, sockfd, &ev); } } /* ================= perf buffer 回调 ================= */ static void handle_event(void *ctx, int cpu, void *data, __u32 size) { struct replica_event *evt = data; switch (evt->type) { case EVENT_SSYNC: strncpy(peer_ip, evt->sync.ip, sizeof(peer_ip)); peer_port = evt->sync.port; peer_seq = evt->sync.seq; DEBUGLOG("SSYNC [seq:%lld], [%s:%d]\n", peer_seq, peer_ip, peer_port); state = OFFLINE; pending_gc(peer_seq); break; case EVENT_COMPLETED_CMD: // DEBUGLOG("CMD [seq:%lld], cmd:\n[\n%s]\n", evt->complete.seq, evt->complete.cmd); pending_push(evt->complete.seq, evt->complete.len, evt->complete.cmd); if (state == ONLINE && sockfd >= 0) { struct epoll_event ev; ev.events = EPOLLIN | EPOLLOUT; ev.data.fd = sockfd; epoll_ctl(epollfd, EPOLL_CTL_MOD, sockfd, &ev); } break; case EVENT_SREADY: DEBUGLOG("SREADY \n"); if (state == OFFLINE) try_connect(); break; } } /* ================= main ================= */ int main(int argc, char **argv) { struct replica_bpf *skel; struct perf_buffer *pb = NULL; int err; /* Open BPF application */ skel = replica_bpf__open(); if (!skel) { fprintf(stderr, "Failed to open BPF skeleton\n"); return 1; } /* Load & verify BPF programs */ err = replica_bpf__load(skel); if (err) { fprintf(stderr, "Failed to load and verify BPF skeleton\n"); goto cleanup; } /* Attach tracepoint handler */ err = replica_bpf__attach(skel); if (err) { fprintf(stderr, "Failed to attach BPF skeleton\n"); goto cleanup; } printf("Successfully started! \n"); pb = perf_buffer__new(bpf_map__fd(skel->maps.events), 8, handle_event, NULL, NULL, NULL); if(!pb){ goto cleanup; } epollfd = epoll_create1(0); if (epollfd < 0) { fprintf(stderr, "epoll_create1 failed\n"); goto cleanup; } while (1) { struct epoll_event events[10]; perf_buffer__poll(pb, 1000); // 处理事件 if(OFFLINE) continue; int nfds = epoll_wait(epollfd, events, 10, 0); for (int i = 0; i < nfds; i++) { if (events[i].data.fd == sockfd) { if (events[i].events & EPOLLIN) { handle_socket_readable(); // 快速消费接收数据 } if (events[i].events & EPOLLOUT) { handle_socket_writable(); // 发送数据 } } } } perf_buffer__free(pb); cleanup: pending_free(); if (sockfd >= 0) close(sockfd); replica_bpf__destroy(skel); return -err; }