Files
ldb/reactor.c

616 lines
13 KiB
C

#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <netinet/in.h>
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/epoll.h>
#include <sys/eventfd.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/timerfd.h>
#include <unistd.h>
#include "diskuring/diskuring.h"
#include "server.h"
#define CONNECTION_SIZE 65536
#define MAX_PORTS 20
#define RECV_BATCH_BYTES 4096
#define CHAIN_BUFFER_CHUNK 4096
#define MAX_CONN_READ_BYTES (32u * 1024u * 1024u)
#define MAX_CONN_WRITE_BYTES (32u * 1024u * 1024u)
#if ENABLE_KVSTORE
typedef int (*msg_handler)(struct conn *conn);
static msg_handler kvs_handler;
extern iouring_ctx_t global_uring_ctx;
int kvs_request(struct conn *c) {
return kvs_handler ? kvs_handler(c) : -1;
}
int kvs_response(struct conn *c) {
(void)c;
return 0;
}
#endif
int accept_cb(int fd);
int recv_cb(int fd);
int send_cb(int fd);
static int epfd = -1;
static int wakeup_fd = -1;
static int timer_fd = -1;
static struct timeval begin;
static struct conn conn_list[CONNECTION_SIZE];
static int conn_fd_valid(int fd) {
return fd >= 0 && fd < CONNECTION_SIZE;
}
static int set_nonblocking(int fd) {
int flags = fcntl(fd, F_GETFL, 0);
if (flags < 0) {
return -1;
}
if ((flags & O_NONBLOCK) != 0) {
return 0;
}
return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
}
static void conn_clear_slot(int fd) {
struct conn *c;
if (!conn_fd_valid(fd)) {
return;
}
c = &conn_list[fd];
chain_buffer_reset(&c->rbuf);
chain_buffer_reset(&c->wbuf);
memset(c, 0, sizeof(*c));
c->fd = -1;
}
static void close_conn(int fd) {
if (!conn_fd_valid(fd)) {
return;
}
epoll_ctl(epfd, EPOLL_CTL_DEL, fd, NULL);
close(fd);
conn_clear_slot(fd);
}
static int set_event(int fd, int event, int is_add) {
struct epoll_event ev;
int op = is_add ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
memset(&ev, 0, sizeof(ev));
ev.events = (uint32_t)event;
ev.data.fd = fd;
if (epoll_ctl(epfd, op, fd, &ev) < 0) {
return -1;
}
return 0;
}
static int update_conn_events(int fd) {
struct conn *c;
int events = EPOLLIN;
if (!conn_fd_valid(fd)) {
return -1;
}
c = &conn_list[fd];
if (chain_buffer_len(&c->wbuf) > 0) {
events |= EPOLLOUT;
}
return set_event(fd, events, 0);
}
int event_register(int fd, int event) {
struct conn *c;
if (!conn_fd_valid(fd)) {
if (fd >= 0) {
close(fd);
}
return -1;
}
if (set_nonblocking(fd) < 0) {
close(fd);
return -1;
}
conn_clear_slot(fd);
c = &conn_list[fd];
c->fd = fd;
c->r_action.recv_callback = recv_cb;
c->send_callback = send_cb;
c->is_stop = 0;
chain_buffer_init(&c->rbuf, CHAIN_BUFFER_CHUNK);
chain_buffer_init(&c->wbuf, CHAIN_BUFFER_CHUNK);
if (set_event(fd, event, 1) < 0) {
close_conn(fd);
return -1;
}
return 0;
}
int accept_cb(int fd) {
while (1) {
struct sockaddr_in clientaddr;
socklen_t len = sizeof(clientaddr);
int clientfd = accept4(fd, (struct sockaddr *)&clientaddr, &len, SOCK_NONBLOCK | SOCK_CLOEXEC);
if (clientfd < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
return 0;
}
if (errno == EINTR) {
continue;
}
printf("accept errno: %d --> %s\n", errno, strerror(errno));
return -1;
}
if (!conn_fd_valid(clientfd)) {
printf("drop client fd=%d, out of conn_list range\n", clientfd);
close(clientfd);
continue;
}
if (event_register(clientfd, EPOLLIN) < 0) {
continue;
}
if ((clientfd % 1000) == 0) {
struct timeval current;
int time_used;
gettimeofday(&current, NULL);
time_used = (int)((current.tv_sec - begin.tv_sec) * 1000 +
(current.tv_usec - begin.tv_usec) / 1000);
begin = current;
(void)time_used;
}
}
}
int recv_cb(int fd) {
struct conn *c;
int total = 0;
if (!conn_fd_valid(fd)) {
return -1;
}
c = &conn_list[fd];
while (1) {
uint8_t tmp[RECV_BATCH_BYTES];
ssize_t n = recv(fd, tmp, sizeof(tmp), 0);
if (n > 0) {
size_t cur_len = chain_buffer_len(&c->rbuf);
if (cur_len > MAX_CONN_READ_BYTES - (size_t)n) {
printf("fd=%d read buffer overflow, close connection\n", fd);
close_conn(fd);
return 0;
}
if (chain_buffer_append(&c->rbuf, tmp, (size_t)n) < 0) {
printf("fd=%d append read buffer failed: %s\n", fd, strerror(errno));
close_conn(fd);
return 0;
}
total += (int)n;
continue;
}
if (n == 0) {
close_conn(fd);
return 0;
}
if (errno == EINTR) {
continue;
}
if (errno == EAGAIN || errno == EWOULDBLOCK) {
break;
}
printf("recv fd=%d errno=%d, %s\n", fd, errno, strerror(errno));
close_conn(fd);
return 0;
}
if (total <= 0) {
return 0;
}
#if ENABLE_HTTP
http_request(c);
#elif ENABLE_WEBSOCKET
ws_request(c);
#elif ENABLE_KVSTORE
{
int consumed = kvs_request(c);
size_t readable = chain_buffer_len(&c->rbuf);
if (consumed < 0) {
close_conn(fd);
return 0;
}
if ((size_t)consumed > readable) {
printf("fd=%d invalid consumed=%d readable=%zu\n", fd, consumed, readable);
close_conn(fd);
return 0;
}
if (consumed > 0) {
chain_buffer_drain(&c->rbuf, (size_t)consumed);
}
if (chain_buffer_len(&c->wbuf) > MAX_CONN_WRITE_BYTES) {
printf("fd=%d write buffer overflow, close connection\n", fd);
close_conn(fd);
return 0;
}
}
#endif
if (update_conn_events(fd) < 0) {
close_conn(fd);
return 0;
}
return total;
}
int send_cb(int fd) {
struct conn *c;
int sent_total = 0;
if (!conn_fd_valid(fd)) {
return -1;
}
c = &conn_list[fd];
#if ENABLE_HTTP
http_response(c);
#elif ENABLE_WEBSOCKET
ws_response(c);
#elif ENABLE_KVSTORE
kvs_response(c);
#endif
while (chain_buffer_len(&c->wbuf) > 0) {
ssize_t n = chain_buffer_send_fd(&c->wbuf, fd, MSG_NOSIGNAL);
if (n > 0) {
sent_total += (int)n;
continue;
}
if (n == 0) {
break;
}
if (errno == EINTR) {
continue;
}
if (errno == EAGAIN || errno == EWOULDBLOCK) {
break;
}
printf("send fd=%d errno=%d %s\n", fd, errno, strerror(errno));
close_conn(fd);
return 0;
}
if (update_conn_events(fd) < 0) {
close_conn(fd);
return 0;
}
return sent_total;
}
int handle_wakeup_fd_cb(int fd) {
uint64_t v;
while (1) {
ssize_t n = read(fd, &v, sizeof(v));
if (n == (ssize_t)sizeof(v)) {
continue;
}
if (n < 0 && errno == EINTR) {
continue;
}
if (n < 0 && errno == EAGAIN) {
break;
}
break;
}
cleanup_finished_iouring_tasks(&global_uring_ctx);
return 0;
}
int init_wakeup_fd(void) {
int wfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
struct conn *c;
if (wfd < 0) {
printf("eventfd failed: errno=%d %s\n", errno, strerror(errno));
return -1;
}
if (!conn_fd_valid(wfd)) {
close(wfd);
return -1;
}
conn_clear_slot(wfd);
c = &conn_list[wfd];
c->fd = wfd;
c->r_action.recv_callback = handle_wakeup_fd_cb;
if (set_event(wfd, EPOLLIN, 1) < 0) {
close_conn(wfd);
return -1;
}
return wfd;
}
void sync_wakeup() {
uint64_t one = 1;
ssize_t n;
if (wakeup_fd < 0) {
return;
}
while (1) {
n = write(wakeup_fd, &one, sizeof(one));
if (n == (ssize_t)sizeof(one)) {
return;
}
if (n < 0 && errno == EINTR) {
continue;
}
if (n < 0 && errno == EAGAIN) {
return;
}
return;
}
}
int handle_timer_fd_cb(int fd) {
uint64_t v;
while (1) {
ssize_t n = read(fd, &v, sizeof(v));
if (n == (ssize_t)sizeof(v)) {
continue;
}
if (n < 0 && errno == EINTR) {
continue;
}
if (n < 0 && errno == EAGAIN) {
break;
}
break;
}
return 0;
}
int init_timer_fd(void) {
int tfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
struct itimerspec its;
struct conn *c;
if (tfd < 0) {
printf("timerfd_create failed: errno=%d %s\n", errno, strerror(errno));
return -1;
}
if (!conn_fd_valid(tfd)) {
close(tfd);
return -1;
}
memset(&its, 0, sizeof(its));
its.it_interval.tv_nsec = 100 * 1000 * 1000;
its.it_value.tv_nsec = 100 * 1000 * 1000;
if (timerfd_settime(tfd, 0, &its, NULL) < 0) {
close(tfd);
return -1;
}
conn_clear_slot(tfd);
c = &conn_list[tfd];
c->fd = tfd;
c->r_action.recv_callback = handle_timer_fd_cb;
if (set_event(tfd, EPOLLIN, 1) < 0) {
close_conn(tfd);
return -1;
}
return tfd;
}
int r_init_server(unsigned short port) {
int sockfd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
int opt = 1;
struct sockaddr_in servaddr;
if (sockfd < 0) {
return -1;
}
if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
close(sockfd);
return -1;
}
memset(&servaddr, 0, sizeof(servaddr));
servaddr.sin_family = AF_INET;
servaddr.sin_addr.s_addr = htonl(INADDR_ANY);
servaddr.sin_port = htons(port);
if (bind(sockfd, (struct sockaddr *)&servaddr, sizeof(servaddr)) < 0) {
printf("bind failed on port %u: %s\n", port, strerror(errno));
close(sockfd);
return -1;
}
if (listen(sockfd, 128) < 0) {
close(sockfd);
return -1;
}
return sockfd;
}
int reactor_start(unsigned short port, msg_handler handler) {
int listen_fds[MAX_PORTS];
int listen_count = 0;
int i;
if (!handler) {
return -1;
}
for (i = 0; i < CONNECTION_SIZE; i++) {
conn_list[i].fd = -1;
}
kvs_handler = handler;
epfd = epoll_create1(EPOLL_CLOEXEC);
if (epfd < 0) {
return -1;
}
wakeup_fd = init_wakeup_fd();
if (wakeup_fd < 0) {
close(epfd);
epfd = -1;
return -1;
}
timer_fd = init_timer_fd();
if (timer_fd < 0) {
close_conn(wakeup_fd);
close(epfd);
wakeup_fd = -1;
epfd = -1;
return -1;
}
for (i = 0; i < MAX_PORTS; i++) {
int sockfd = r_init_server((unsigned short)(port + i));
struct conn *c;
if (sockfd < 0) {
continue;
}
if (!conn_fd_valid(sockfd)) {
close(sockfd);
continue;
}
conn_clear_slot(sockfd);
c = &conn_list[sockfd];
c->fd = sockfd;
c->r_action.recv_callback = accept_cb;
c->is_stop = 0;
if (set_event(sockfd, EPOLLIN, 1) < 0) {
close_conn(sockfd);
continue;
}
listen_fds[listen_count++] = sockfd;
}
if (listen_count == 0) {
close_conn(timer_fd);
close_conn(wakeup_fd);
close(epfd);
timer_fd = -1;
wakeup_fd = -1;
epfd = -1;
return -1;
}
gettimeofday(&begin, NULL);
while (1) {
struct epoll_event events[1024];
int nready = epoll_wait(epfd, events, 1024, -1);
if (nready < 0) {
if (errno == EINTR) {
continue;
}
break;
}
for (i = 0; i < nready; i++) {
int connfd = events[i].data.fd;
uint32_t ev = events[i].events;
if (!conn_fd_valid(connfd)) {
continue;
}
if ((ev & (EPOLLERR | EPOLLHUP | EPOLLRDHUP)) &&
conn_list[connfd].r_action.recv_callback == recv_cb) {
close_conn(connfd);
continue;
}
if ((ev & EPOLLIN) && conn_list[connfd].r_action.recv_callback) {
conn_list[connfd].r_action.recv_callback(connfd);
}
if (!conn_fd_valid(connfd) || conn_list[connfd].fd < 0) {
continue;
}
if ((ev & EPOLLOUT) && conn_list[connfd].send_callback) {
conn_list[connfd].send_callback(connfd);
}
}
}
for (i = 0; i < listen_count; i++) {
close_conn(listen_fds[i]);
}
if (timer_fd >= 0) {
close_conn(timer_fd);
}
if (wakeup_fd >= 0) {
close_conn(wakeup_fd);
}
if (epfd >= 0) {
close(epfd);
}
timer_fd = -1;
wakeup_fd = -1;
epfd = -1;
return 0;
}