#define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include "diskuring/diskuring.h" #include "server.h" #define CONNECTION_SIZE 65536 #define MAX_PORTS 20 #define RECV_BATCH_BYTES 4096 #define CHAIN_BUFFER_CHUNK 4096 #define MAX_CONN_READ_BYTES (32u * 1024u * 1024u) #define MAX_CONN_WRITE_BYTES (32u * 1024u * 1024u) #if ENABLE_KVSTORE typedef int (*msg_handler)(struct conn *conn); static msg_handler kvs_handler; extern iouring_ctx_t global_uring_ctx; int kvs_request(struct conn *c) { return kvs_handler ? kvs_handler(c) : -1; } int kvs_response(struct conn *c) { (void)c; return 0; } #endif int accept_cb(int fd); int recv_cb(int fd); int send_cb(int fd); static int epfd = -1; static int wakeup_fd = -1; static int timer_fd = -1; static struct timeval begin; static struct conn conn_list[CONNECTION_SIZE]; static int conn_fd_valid(int fd) { return fd >= 0 && fd < CONNECTION_SIZE; } static int set_nonblocking(int fd) { int flags = fcntl(fd, F_GETFL, 0); if (flags < 0) { return -1; } if ((flags & O_NONBLOCK) != 0) { return 0; } return fcntl(fd, F_SETFL, flags | O_NONBLOCK); } static void conn_clear_slot(int fd) { struct conn *c; if (!conn_fd_valid(fd)) { return; } c = &conn_list[fd]; chain_buffer_reset(&c->rbuf); chain_buffer_reset(&c->wbuf); memset(c, 0, sizeof(*c)); c->fd = -1; } static void close_conn(int fd) { if (!conn_fd_valid(fd)) { return; } epoll_ctl(epfd, EPOLL_CTL_DEL, fd, NULL); close(fd); conn_clear_slot(fd); } static int set_event(int fd, int event, int is_add) { struct epoll_event ev; int op = is_add ? EPOLL_CTL_ADD : EPOLL_CTL_MOD; memset(&ev, 0, sizeof(ev)); ev.events = (uint32_t)event; ev.data.fd = fd; if (epoll_ctl(epfd, op, fd, &ev) < 0) { return -1; } return 0; } static int update_conn_events(int fd) { struct conn *c; int events = EPOLLIN; if (!conn_fd_valid(fd)) { return -1; } c = &conn_list[fd]; if (chain_buffer_len(&c->wbuf) > 0) { events |= EPOLLOUT; } return set_event(fd, events, 0); } int event_register(int fd, int event) { struct conn *c; if (!conn_fd_valid(fd)) { if (fd >= 0) { close(fd); } return -1; } if (set_nonblocking(fd) < 0) { close(fd); return -1; } conn_clear_slot(fd); c = &conn_list[fd]; c->fd = fd; c->r_action.recv_callback = recv_cb; c->send_callback = send_cb; c->is_stop = 0; chain_buffer_init(&c->rbuf, CHAIN_BUFFER_CHUNK); chain_buffer_init(&c->wbuf, CHAIN_BUFFER_CHUNK); if (set_event(fd, event, 1) < 0) { close_conn(fd); return -1; } return 0; } int accept_cb(int fd) { while (1) { struct sockaddr_in clientaddr; socklen_t len = sizeof(clientaddr); int clientfd = accept4(fd, (struct sockaddr *)&clientaddr, &len, SOCK_NONBLOCK | SOCK_CLOEXEC); if (clientfd < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK) { return 0; } if (errno == EINTR) { continue; } printf("accept errno: %d --> %s\n", errno, strerror(errno)); return -1; } if (!conn_fd_valid(clientfd)) { printf("drop client fd=%d, out of conn_list range\n", clientfd); close(clientfd); continue; } if (event_register(clientfd, EPOLLIN) < 0) { continue; } if ((clientfd % 1000) == 0) { struct timeval current; int time_used; gettimeofday(¤t, NULL); time_used = (int)((current.tv_sec - begin.tv_sec) * 1000 + (current.tv_usec - begin.tv_usec) / 1000); begin = current; (void)time_used; } } } int recv_cb(int fd) { struct conn *c; int total = 0; if (!conn_fd_valid(fd)) { return -1; } c = &conn_list[fd]; while (1) { uint8_t tmp[RECV_BATCH_BYTES]; ssize_t n = recv(fd, tmp, sizeof(tmp), 0); if (n > 0) { size_t cur_len = chain_buffer_len(&c->rbuf); if (cur_len > MAX_CONN_READ_BYTES - (size_t)n) { printf("fd=%d read buffer overflow, close connection\n", fd); close_conn(fd); return 0; } if (chain_buffer_append(&c->rbuf, tmp, (size_t)n) < 0) { printf("fd=%d append read buffer failed: %s\n", fd, strerror(errno)); close_conn(fd); return 0; } total += (int)n; continue; } if (n == 0) { close_conn(fd); return 0; } if (errno == EINTR) { continue; } if (errno == EAGAIN || errno == EWOULDBLOCK) { break; } printf("recv fd=%d errno=%d, %s\n", fd, errno, strerror(errno)); close_conn(fd); return 0; } if (total <= 0) { return 0; } #if ENABLE_HTTP http_request(c); #elif ENABLE_WEBSOCKET ws_request(c); #elif ENABLE_KVSTORE { int consumed = kvs_request(c); size_t readable = chain_buffer_len(&c->rbuf); if (consumed < 0) { close_conn(fd); return 0; } if ((size_t)consumed > readable) { printf("fd=%d invalid consumed=%d readable=%zu\n", fd, consumed, readable); close_conn(fd); return 0; } if (consumed > 0) { chain_buffer_drain(&c->rbuf, (size_t)consumed); } if (chain_buffer_len(&c->wbuf) > MAX_CONN_WRITE_BYTES) { printf("fd=%d write buffer overflow, close connection\n", fd); close_conn(fd); return 0; } } #endif if (update_conn_events(fd) < 0) { close_conn(fd); return 0; } return total; } int send_cb(int fd) { struct conn *c; int sent_total = 0; if (!conn_fd_valid(fd)) { return -1; } c = &conn_list[fd]; #if ENABLE_HTTP http_response(c); #elif ENABLE_WEBSOCKET ws_response(c); #elif ENABLE_KVSTORE kvs_response(c); #endif while (chain_buffer_len(&c->wbuf) > 0) { ssize_t n = chain_buffer_send_fd(&c->wbuf, fd, MSG_NOSIGNAL); if (n > 0) { sent_total += (int)n; continue; } if (n == 0) { break; } if (errno == EINTR) { continue; } if (errno == EAGAIN || errno == EWOULDBLOCK) { break; } printf("send fd=%d errno=%d %s\n", fd, errno, strerror(errno)); close_conn(fd); return 0; } if (update_conn_events(fd) < 0) { close_conn(fd); return 0; } return sent_total; } int handle_wakeup_fd_cb(int fd) { uint64_t v; while (1) { ssize_t n = read(fd, &v, sizeof(v)); if (n == (ssize_t)sizeof(v)) { continue; } if (n < 0 && errno == EINTR) { continue; } if (n < 0 && errno == EAGAIN) { break; } break; } cleanup_finished_iouring_tasks(&global_uring_ctx); return 0; } int init_wakeup_fd(void) { int wfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); struct conn *c; if (wfd < 0) { printf("eventfd failed: errno=%d %s\n", errno, strerror(errno)); return -1; } if (!conn_fd_valid(wfd)) { close(wfd); return -1; } conn_clear_slot(wfd); c = &conn_list[wfd]; c->fd = wfd; c->r_action.recv_callback = handle_wakeup_fd_cb; if (set_event(wfd, EPOLLIN, 1) < 0) { close_conn(wfd); return -1; } return wfd; } void sync_wakeup() { uint64_t one = 1; ssize_t n; if (wakeup_fd < 0) { return; } while (1) { n = write(wakeup_fd, &one, sizeof(one)); if (n == (ssize_t)sizeof(one)) { return; } if (n < 0 && errno == EINTR) { continue; } if (n < 0 && errno == EAGAIN) { return; } return; } } int handle_timer_fd_cb(int fd) { uint64_t v; while (1) { ssize_t n = read(fd, &v, sizeof(v)); if (n == (ssize_t)sizeof(v)) { continue; } if (n < 0 && errno == EINTR) { continue; } if (n < 0 && errno == EAGAIN) { break; } break; } return 0; } int init_timer_fd(void) { int tfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC); struct itimerspec its; struct conn *c; if (tfd < 0) { printf("timerfd_create failed: errno=%d %s\n", errno, strerror(errno)); return -1; } if (!conn_fd_valid(tfd)) { close(tfd); return -1; } memset(&its, 0, sizeof(its)); its.it_interval.tv_nsec = 100 * 1000 * 1000; its.it_value.tv_nsec = 100 * 1000 * 1000; if (timerfd_settime(tfd, 0, &its, NULL) < 0) { close(tfd); return -1; } conn_clear_slot(tfd); c = &conn_list[tfd]; c->fd = tfd; c->r_action.recv_callback = handle_timer_fd_cb; if (set_event(tfd, EPOLLIN, 1) < 0) { close_conn(tfd); return -1; } return tfd; } int r_init_server(unsigned short port) { int sockfd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0); int opt = 1; struct sockaddr_in servaddr; if (sockfd < 0) { return -1; } if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) { close(sockfd); return -1; } memset(&servaddr, 0, sizeof(servaddr)); servaddr.sin_family = AF_INET; servaddr.sin_addr.s_addr = htonl(INADDR_ANY); servaddr.sin_port = htons(port); if (bind(sockfd, (struct sockaddr *)&servaddr, sizeof(servaddr)) < 0) { printf("bind failed on port %u: %s\n", port, strerror(errno)); close(sockfd); return -1; } if (listen(sockfd, 128) < 0) { close(sockfd); return -1; } return sockfd; } int reactor_start(unsigned short port, msg_handler handler) { int listen_fds[MAX_PORTS]; int listen_count = 0; int i; if (!handler) { return -1; } for (i = 0; i < CONNECTION_SIZE; i++) { conn_list[i].fd = -1; } kvs_handler = handler; epfd = epoll_create1(EPOLL_CLOEXEC); if (epfd < 0) { return -1; } wakeup_fd = init_wakeup_fd(); if (wakeup_fd < 0) { close(epfd); epfd = -1; return -1; } timer_fd = init_timer_fd(); if (timer_fd < 0) { close_conn(wakeup_fd); close(epfd); wakeup_fd = -1; epfd = -1; return -1; } for (i = 0; i < MAX_PORTS; i++) { int sockfd = r_init_server((unsigned short)(port + i)); struct conn *c; if (sockfd < 0) { continue; } if (!conn_fd_valid(sockfd)) { close(sockfd); continue; } conn_clear_slot(sockfd); c = &conn_list[sockfd]; c->fd = sockfd; c->r_action.recv_callback = accept_cb; c->is_stop = 0; if (set_event(sockfd, EPOLLIN, 1) < 0) { close_conn(sockfd); continue; } listen_fds[listen_count++] = sockfd; } if (listen_count == 0) { close_conn(timer_fd); close_conn(wakeup_fd); close(epfd); timer_fd = -1; wakeup_fd = -1; epfd = -1; return -1; } gettimeofday(&begin, NULL); while (1) { struct epoll_event events[1024]; int nready = epoll_wait(epfd, events, 1024, -1); if (nready < 0) { if (errno == EINTR) { continue; } break; } for (i = 0; i < nready; i++) { int connfd = events[i].data.fd; uint32_t ev = events[i].events; if (!conn_fd_valid(connfd)) { continue; } if ((ev & (EPOLLERR | EPOLLHUP | EPOLLRDHUP)) && conn_list[connfd].r_action.recv_callback == recv_cb) { close_conn(connfd); continue; } if ((ev & EPOLLIN) && conn_list[connfd].r_action.recv_callback) { conn_list[connfd].r_action.recv_callback(connfd); } if (!conn_fd_valid(connfd) || conn_list[connfd].fd < 0) { continue; } if ((ev & EPOLLOUT) && conn_list[connfd].send_callback) { conn_list[connfd].send_callback(connfd); } } } for (i = 0; i < listen_count; i++) { close_conn(listen_fds[i]); } if (timer_fd >= 0) { close_conn(timer_fd); } if (wakeup_fd >= 0) { close_conn(wakeup_fd); } if (epfd >= 0) { close(epfd); } timer_fd = -1; wakeup_fd = -1; epfd = -1; return 0; }