356 lines
8.2 KiB
C
356 lines
8.2 KiB
C
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||
/* Copyright (c) 2020 Facebook */
|
||
#include <stdio.h>
|
||
#include <unistd.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <sys/resource.h>
|
||
#include <bpf/libbpf.h>
|
||
#include "replica.skel.h"
|
||
#include <sys/socket.h>
|
||
#include <netinet/in.h>
|
||
#include <arpa/inet.h>
|
||
#include <sys/epoll.h>
|
||
#include <fcntl.h>
|
||
|
||
#include "replica.h"
|
||
|
||
#define DEBUGLOG printf
|
||
|
||
typedef enum {
|
||
OFFLINE = 0,
|
||
ONLINE = 1,
|
||
}replica_state_e ;
|
||
|
||
struct cmd_node {
|
||
__u64 seq;
|
||
__u32 len;
|
||
uint8_t *cmd;
|
||
struct cmd_node *next;
|
||
};
|
||
|
||
struct pending_queue {
|
||
struct cmd_node *head;
|
||
struct cmd_node *tail;
|
||
int count;
|
||
};
|
||
|
||
/* ================= 全局状态 ================= */
|
||
|
||
static replica_state_e state = OFFLINE;
|
||
static int sockfd = -1;
|
||
static int epollfd = -1;
|
||
|
||
static char peer_ip[MAX_IP_LEN];
|
||
static int peer_port;
|
||
static __u64 peer_seq;
|
||
|
||
static struct pending_queue pending = {
|
||
.head = NULL,
|
||
.tail = NULL,
|
||
.count = 0,
|
||
};
|
||
|
||
/* ================= pending 队列操作 ================= */
|
||
static void pending_free()
|
||
{
|
||
struct pending_queue *q = &pending;
|
||
struct cmd_node *cur = q->head;
|
||
while (cur) {
|
||
struct cmd_node *tmp = cur;
|
||
cur = cur->next;
|
||
free(tmp->cmd);
|
||
free(tmp);
|
||
}
|
||
q->head = q->tail = NULL;
|
||
q->count = 0;
|
||
}
|
||
|
||
static void pending_push(__u64 seq, __u32 len, const uint8_t *cmd)
|
||
{
|
||
struct cmd_node *node = malloc(sizeof(*node));
|
||
if (!node)
|
||
return;
|
||
|
||
node->cmd = malloc(len);
|
||
if (!node->cmd) {
|
||
free(node);
|
||
return;
|
||
}
|
||
|
||
memcpy(node->cmd, cmd, len);
|
||
node->seq = seq;
|
||
node->len = len;
|
||
node->next = NULL;
|
||
|
||
if (!pending.tail) {
|
||
pending.head = pending.tail = node;
|
||
} else {
|
||
pending.tail->next = node;
|
||
pending.tail = node;
|
||
}
|
||
|
||
pending.count++;
|
||
}
|
||
|
||
static void pending_gc(__u64 min_seq)
|
||
{
|
||
struct cmd_node *cur = pending.head;
|
||
|
||
int n = pending.count;
|
||
while (cur && cur->seq < min_seq) {
|
||
struct cmd_node *tmp = cur;
|
||
cur = cur->next;
|
||
|
||
free(tmp->cmd);
|
||
free(tmp);
|
||
pending.count--;
|
||
}
|
||
|
||
DEBUGLOG("gc:%d\n", n-pending.count);
|
||
|
||
pending.head = cur;
|
||
if (!cur)
|
||
pending.tail = NULL;
|
||
}
|
||
|
||
static void pending_send_all(void)
|
||
{
|
||
struct cmd_node *cur = pending.head;
|
||
while (cur) {
|
||
int rt = send(sockfd, cur->cmd, cur->len, 0);
|
||
|
||
if(rt == (int)cur->len){
|
||
struct cmd_node *tmp = cur;
|
||
cur = cur->next;
|
||
|
||
free(tmp->cmd);
|
||
free(tmp);
|
||
pending.count--;
|
||
}else{
|
||
DEBUGLOG("error\n");
|
||
// 失败:不移动 cur,直接 break
|
||
if (rt < 0) {
|
||
perror("send failed");
|
||
if (errno == ECONNRESET || errno == EPIPE) {
|
||
state = OFFLINE;
|
||
if (sockfd >= 0) {
|
||
close(sockfd);
|
||
sockfd = -1;
|
||
DEBUGLOG("connect closed\n");
|
||
}
|
||
} else if (rt == 0) {
|
||
fprintf(stderr, "send returned 0 (peer closed?)\n");
|
||
} else {
|
||
fprintf(stderr, "partial send: %d/%u\n", rt, cur->len);
|
||
}
|
||
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
pending.head = cur;
|
||
if(!cur)
|
||
pending.tail = NULL;
|
||
}
|
||
|
||
/* ================= 网络逻辑 ================= */
|
||
static void try_connect(void)
|
||
{
|
||
if(sockfd > 0){
|
||
close(sockfd);
|
||
sockfd = -1;
|
||
}
|
||
|
||
struct sockaddr_in addr = {};
|
||
int i = 0;
|
||
|
||
addr.sin_family = AF_INET;
|
||
addr.sin_port = htons(peer_port);
|
||
inet_pton(AF_INET, peer_ip, &addr.sin_addr);
|
||
|
||
for(i = 0;i < 10; ++ i){
|
||
sockfd = socket(AF_INET, SOCK_STREAM, 0);
|
||
if (sockfd < 0) {
|
||
perror("socket");
|
||
return;
|
||
}
|
||
|
||
DEBUGLOG("connect try %d...\n", i + 1);
|
||
if (connect(sockfd, (struct sockaddr *)&addr, sizeof(addr)) == 0) {
|
||
DEBUGLOG("connect success: %s:%d\n", peer_ip, peer_port);
|
||
|
||
int flags = fcntl(sockfd, F_GETFL, 0);
|
||
fcntl(sockfd, F_SETFL, flags | O_NONBLOCK);
|
||
|
||
struct epoll_event ev;
|
||
ev.events = EPOLLIN;
|
||
ev.data.fd = sockfd;
|
||
epoll_ctl(epollfd, EPOLL_CTL_ADD, sockfd, &ev);
|
||
|
||
state = ONLINE;
|
||
pending_send_all();
|
||
return;
|
||
}
|
||
|
||
perror("connect");
|
||
close(sockfd);
|
||
sockfd = -1;
|
||
sleep(1);
|
||
}
|
||
|
||
DEBUGLOG("connect failed after 10 retries\n");
|
||
}
|
||
|
||
static void handle_socket_readable(void)
|
||
{
|
||
char buf[65536];
|
||
while (1) {
|
||
int n = recv(sockfd, buf, sizeof(buf), MSG_DONTWAIT);
|
||
if (n > 0) {
|
||
continue;
|
||
} else if (n == 0) {
|
||
state = OFFLINE;
|
||
epoll_ctl(epollfd, EPOLL_CTL_DEL, sockfd, NULL);
|
||
close(sockfd);
|
||
sockfd = -1;
|
||
DEBUGLOG("connection closed\n");
|
||
break;
|
||
} else {
|
||
if (errno == EAGAIN || errno == EWOULDBLOCK) {
|
||
break;
|
||
}
|
||
perror("recv");
|
||
state = OFFLINE;
|
||
epoll_ctl(epollfd, EPOLL_CTL_DEL, sockfd, NULL);
|
||
close(sockfd);
|
||
sockfd = -1;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
static void handle_socket_writable(void)
|
||
{
|
||
pending_send_all();
|
||
if (!pending.head) {
|
||
struct epoll_event ev;
|
||
ev.events = EPOLLIN; // 只监听读
|
||
ev.data.fd = sockfd;
|
||
epoll_ctl(epollfd, EPOLL_CTL_MOD, sockfd, &ev);
|
||
}
|
||
}
|
||
|
||
|
||
/* ================= perf buffer 回调 ================= */
|
||
static void handle_event(void *ctx, int cpu, void *data, __u32 size)
|
||
{
|
||
struct replica_event *evt = data;
|
||
switch (evt->type) {
|
||
|
||
case EVENT_SSYNC:
|
||
strncpy(peer_ip, evt->sync.ip, sizeof(peer_ip));
|
||
peer_port = evt->sync.port;
|
||
peer_seq = evt->sync.seq;
|
||
DEBUGLOG("SSYNC [seq:%lld], [%s:%d]\n", peer_seq, peer_ip, peer_port);
|
||
|
||
state = OFFLINE;
|
||
pending_gc(peer_seq);
|
||
break;
|
||
|
||
case EVENT_COMPLETED_CMD:
|
||
// DEBUGLOG("CMD [seq:%lld], cmd:\n[\n%s]\n", evt->complete.seq, evt->complete.cmd);
|
||
pending_push(evt->complete.seq,
|
||
evt->complete.len,
|
||
evt->complete.cmd);
|
||
|
||
if (state == ONLINE && sockfd >= 0) {
|
||
struct epoll_event ev;
|
||
ev.events = EPOLLIN | EPOLLOUT;
|
||
ev.data.fd = sockfd;
|
||
epoll_ctl(epollfd, EPOLL_CTL_MOD, sockfd, &ev);
|
||
}
|
||
break;
|
||
|
||
case EVENT_SREADY:
|
||
DEBUGLOG("SREADY \n");
|
||
if (state == OFFLINE)
|
||
try_connect();
|
||
break;
|
||
}
|
||
}
|
||
|
||
/* ================= main ================= */
|
||
int main(int argc, char **argv)
|
||
{
|
||
struct replica_bpf *skel;
|
||
struct perf_buffer *pb = NULL;
|
||
int err;
|
||
|
||
/* Open BPF application */
|
||
skel = replica_bpf__open();
|
||
if (!skel) {
|
||
fprintf(stderr, "Failed to open BPF skeleton\n");
|
||
return 1;
|
||
}
|
||
|
||
/* Load & verify BPF programs */
|
||
err = replica_bpf__load(skel);
|
||
if (err) {
|
||
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
|
||
goto cleanup;
|
||
}
|
||
|
||
/* Attach tracepoint handler */
|
||
err = replica_bpf__attach(skel);
|
||
if (err) {
|
||
fprintf(stderr, "Failed to attach BPF skeleton\n");
|
||
goto cleanup;
|
||
}
|
||
|
||
printf("Successfully started! \n");
|
||
|
||
|
||
pb = perf_buffer__new(bpf_map__fd(skel->maps.events), 8,
|
||
handle_event, NULL, NULL, NULL);
|
||
|
||
if(!pb){
|
||
goto cleanup;
|
||
}
|
||
|
||
epollfd = epoll_create1(0);
|
||
if (epollfd < 0) {
|
||
fprintf(stderr, "epoll_create1 failed\n");
|
||
goto cleanup;
|
||
}
|
||
|
||
while (1) {
|
||
struct epoll_event events[10];
|
||
|
||
perf_buffer__poll(pb, 1000); // 处理事件
|
||
|
||
if(OFFLINE) continue;
|
||
|
||
int nfds = epoll_wait(epollfd, events, 10, 0);
|
||
for (int i = 0; i < nfds; i++) {
|
||
if (events[i].data.fd == sockfd) {
|
||
if (events[i].events & EPOLLIN) {
|
||
handle_socket_readable(); // 快速消费接收数据
|
||
}
|
||
if (events[i].events & EPOLLOUT) {
|
||
handle_socket_writable(); // 发送数据
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
perf_buffer__free(pb);
|
||
|
||
cleanup:
|
||
pending_free();
|
||
if (sockfd >= 0) close(sockfd);
|
||
replica_bpf__destroy(skel);
|
||
return -err;
|
||
}
|