主从同步性能优化,主从同步性能测试。

This commit is contained in:
1iaan
2026-02-01 16:49:50 +00:00
parent 003566b69a
commit 6d1a50cf88
31 changed files with 2119 additions and 400 deletions

View File

@@ -67,53 +67,30 @@ static void queue_push(iouring_ctx_t *ctx, task_t *t)
pthread_mutex_unlock(&ctx->q_m);
}
static void queue_push_front(iouring_ctx_t *ctx, task_t *list_head, task_t *list_tail) {
pthread_mutex_lock(&ctx->q_m);
list_tail->next = ctx->q_head;
ctx->q_head = list_head;
if (!ctx->q_tail) {
ctx->q_tail = list_tail;
}
pthread_cond_signal(&ctx->q_cv);
pthread_mutex_unlock(&ctx->q_m); }
static task_t *queue_pop_all(iouring_ctx_t *ctx)
static task_t *queue_pop(iouring_ctx_t *ctx)
{
pthread_mutex_lock(&ctx->q_m);
task_t *list = ctx->q_head;
ctx->q_head = ctx->q_tail = NULL;
task_t *t = ctx->q_head;
if (t) {
ctx->q_head = t->next;
if (!ctx->q_head) {
ctx->q_tail = NULL;
}
t->next = NULL;
}
pthread_mutex_unlock(&ctx->q_m);
return list;
return t;
}
static task_t *queue_pop_n(iouring_ctx_t *ctx, int n)
static void queue_push_front(iouring_ctx_t *ctx, task_t *t)
{
if (n <= 0)
return NULL;
pthread_mutex_lock(&ctx->q_m);
task_t *head = ctx->q_head;
if (!head) {
pthread_mutex_unlock(&ctx->q_m);
return NULL;
t->next = ctx->q_head;
ctx->q_head = t;
if (!ctx->q_tail) {
ctx->q_tail = t;
}
task_t *curr = head;
task_t *prev = NULL;
int count = 0;
while (curr && count < n) {
prev = curr;
curr = curr->next;
count++;
}
ctx->q_head = curr;
if (!curr) {
// 队列被取空
ctx->q_tail = NULL;
}
prev->next = NULL;
pthread_mutex_unlock(&ctx->q_m);
return head;
}
extern void sync_wakeup();
@@ -126,25 +103,26 @@ static void *worker_main(void *arg)
{
int cq_count = 0;
// ========== 1. 疯狂收割 CQE(必须优先做,释放 in_flight 额度)==========
// 使用 while 而不是 if确保把 CQ 薅干净
// ========== 1. 收割 CQE ==========
// 检查溢出
if (*ctx->ring.sq.kflags & IORING_SQ_CQ_OVERFLOW) {
fprintf(stderr, "FATAL: CQ overflow detected! Backpressure broken!\n");
abort();
}
while (true) {
struct io_uring_cqe *cqe;
unsigned head;
io_uring_for_each_cqe(&ctx->ring, head, cqe) {
task_t *done = (task_t *)(uintptr_t)cqe->user_data;
// 先减计数(必须在处理前减,否则可能瞬间突破上限)
task_t *done = (task_t *)(uintptr_t)cqe->user_data;
atomic_fetch_sub(&ctx->in_flight, 1);
task_finish(done, cqe->res);
if (cqe->res < 0) {
fprintf(stderr, "write fail: fd=%d res=%d\n", done->fd, cqe->res);
fprintf(stderr, "write fail: fd=%d res=%d, offset=%ld\n", done->fd, cqe->res, done->off);
}
// 加入销毁队列
pthread_mutex_lock(&g_destroy_queue.lock);
done->next = g_destroy_queue.head;
g_destroy_queue.head = done;
@@ -159,99 +137,87 @@ static void *worker_main(void *arg)
sync_wakeup();
}
// 如果这次没收满,说明 CQ 空了,退出收割循环
if (cq_count == 0) break;
cq_count = 0; // 重置继续薅(可能有新的完成了)
cq_count = 0;
}
// 检查溢出(保险起见,虽然有了背压不该再溢出)
if (*ctx->ring.sq.kflags & IORING_SQ_CQ_OVERFLOW) {
fprintf(stderr, "FATAL: CQ overflow detected! Backpressure broken!\n");
abort(); // 直接崩溃,说明逻辑有 bug
}
// ========== 2. 计算还能提交多少 ==========
int current_in_flight = atomic_load(&ctx->in_flight);
int available_slots = ctx->max_in_flight - current_in_flight;
if (available_slots <= 0) {
// 满了!不能取新任务,必须等待 CQE忙等或阻塞等
// 方案 B阻塞等 CQE推荐
struct io_uring_cqe *cqe;
int ret = io_uring_wait_cqe(&ctx->ring, &cqe);
if (ret == 0 && !ctx->stop) {
// 收到一个 CQE回循环开头处理
continue;
}
continue;
}
// ========== 3. 从任务队列取任务(只取 available_slots 个)==========
task_t *task_list = queue_pop_n(ctx, available_slots);
if (!task_list) {
if (!ctx->stop && atomic_load(&ctx->in_flight) > 0) {
int ret = io_uring_submit_and_wait(&ctx->ring, 1);
continue;
}
// 没任务,等待条件变量
pthread_mutex_lock(&ctx->q_m);
while (ctx->q_head == NULL && !ctx->stop) {
pthread_cond_wait(&ctx->q_cv, &ctx->q_m);
}
pthread_mutex_unlock(&ctx->q_m);
continue;
}
// ========== 4. 准备 SQE受限于 available_slots==========
// ========== 2. 批量准备 SQE ==========
int batch_count = 0;
task_t *curr = task_list;
task_t *prev = NULL;
task_t *submitted_head = task_list; // 记录这次实际要提交的部分
task_t *remaining_head = NULL; // 装不下的部分
while (curr && batch_count < available_slots) {
while (true) {
int current_in_flight = atomic_load(&ctx->in_flight);
if (current_in_flight >= ctx->max_in_flight) {
break; // 满了,停止取任务
}
task_t *t = queue_pop(ctx);
if (!t) break;
struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring);
if (!sqe) {
// SQ 满了(这种情况在控制 inflight 后很少见,但保险起见)
queue_push_front(ctx, t);
break;
}
io_uring_prep_writev(sqe, curr->fd, curr->iovs, curr->iovcnt, curr->off);
sqe->user_data = (uint64_t)(uintptr_t)curr;
io_uring_prep_writev(sqe, t->fd, t->iovs, t->iovcnt, t->off);
sqe->user_data = (uint64_t)(uintptr_t)t;
batch_count++;
prev = curr;
curr = curr->next;
}
// 断开链表:已准备的 和 未准备的
if (prev) {
prev->next = NULL; // 已提交的部分结尾
}
remaining_head = curr; // 剩下的部分(如果有)
// ========== 5. 提交并增加计数 ==========
// ========== 3. 提交 ==========
if (batch_count > 0) {
int submitted = io_uring_submit(&ctx->ring);
if (submitted != batch_count) {
fprintf(stderr, "CRITICAL: prep %d but submit %d\n", batch_count, submitted);
// 这种情况很严重,说明 ring 损坏了,建议 abort
abort();
}
atomic_fetch_add(&ctx->in_flight, submitted);
push_to_sqe += submitted;
atomic_fetch_add(&ctx->in_flight, submitted);
continue;
}
// ========== 6. 把没提交的任务塞回队列头部(保持顺序)==========
if (remaining_head) {
task_t *tail = remaining_head;
while (tail->next) tail = tail->next;
// ========== 4. 没事做就等待 ==========
if (batch_count == 0) {
int inflight = atomic_load(&ctx->in_flight);
if (inflight > 0) {
// 有任务在飞等一个CQE
continue;
} else {
// 真没事了,等新任务
pthread_mutex_lock(&ctx->q_m);
while (ctx->q_head == NULL && !ctx->stop) {
pthread_cond_wait(&ctx->q_cv, &ctx->q_m);
}
pthread_mutex_unlock(&ctx->q_m);
}
}
}
queue_push_front(ctx, remaining_head, tail);
printf("Shutdown: draining remaining CQEs...\n");
int final_cq = 0;
struct io_uring_cqe *cqe;
unsigned head;
while (atomic_load(&ctx->in_flight) > 0) {
io_uring_for_each_cqe(&ctx->ring, head, cqe) {
task_t *done = (task_t *)(uintptr_t)cqe->user_data;
atomic_fetch_sub(&ctx->in_flight, 1);
task_finish(done, cqe->res);
pthread_mutex_lock(&g_destroy_queue.lock);
done->next = g_destroy_queue.head;
g_destroy_queue.head = done;
pthread_mutex_unlock(&g_destroy_queue.lock);
get_from_cqe++;
final_cq++;
}
if (final_cq > 0) {
io_uring_cq_advance(&ctx->ring, final_cq);
final_cq = 0;
}
// 如果还有 inflight等一下
if (atomic_load(&ctx->in_flight) > 0) {
io_uring_submit_and_wait(&ctx->ring, 1);
}
}
@@ -260,12 +226,6 @@ static void *worker_main(void *arg)
return NULL;
}
int iouring_register_fd(iouring_ctx_t *ctx, int fd) {
int fds[1] = {fd};
int ret = io_uring_register_files(&ctx->ring, fds, 1);
return ret;
}
int iouring_init(iouring_ctx_t *ctx, unsigned entries)
{
memset(ctx, 0, sizeof(*ctx));
@@ -352,8 +312,17 @@ task_t* submit_write(iouring_ctx_t *ctx, int fd, void **bufs, size_t *lens, int
return t;
}
int uring_task_complete(iouring_ctx_t *ctx){
pthread_mutex_lock(&ctx->q_m);
int notask = ctx->q_head == NULL;
pthread_mutex_unlock(&ctx->q_m);
int noflight = atomic_load(&ctx->in_flight);
// printf("%d\n", noflight);
return (noflight == 0) && notask;
}
// 主线程定期调用此函数清理
void cleanup_finished_iouring_tasks() {
void cleanup_finished_iouring_tasks(iouring_ctx_t *ctx) {
pthread_mutex_lock(&g_destroy_queue.lock);
task_t *list = g_destroy_queue.head;
g_destroy_queue.head = NULL;
@@ -366,8 +335,6 @@ void cleanup_finished_iouring_tasks() {
task_destroy(list); // 在主线程执行销毁
list = next;
}
// printf("clean: %d\n\n", cnt);
// mp_print();
release_cnt += cnt;
// printf("push:%lld, sqe:%lld, cqe:%lld, rls:%lld\n", push_to_queue, push_to_sqe, get_from_cqe, release_cnt);
}

View File

@@ -0,0 +1,199 @@
#include "diskuring.h"
#include "memory/alloc_dispatch.h"
#include <poll.h>
#include <sys/eventfd.h>
void task_init(task_t *t)
{
t->done = 0;
t->res = 0;
t->next = NULL;
}
void task_finish(task_t *t, int res)
{
t->res = res;
t->done = 1;
}
void task_destroy(task_t *t)
{
if (t->iovs) {
for (int i = 0; i < t->iovcnt; i++) {
if (t->iovs[i].iov_base) {
kvs_free(t->iovs[i].iov_base);
}
}
kvs_free(t->iovs);
}
kvs_free(t);
}
int iouring_init(iouring_ctx_t *ctx, unsigned entries)
{
memset(ctx, 0, sizeof(*ctx));
struct io_uring_params params;
memset(&params, 0, sizeof(params));
// params.flags |= IORING_SETUP_CQSIZE;
// params.cq_entries = 256 * 1024;
// params.sq_entries = 128 * 1024;
int ret = io_uring_queue_init_params(entries, &ctx->ring, &params);
if (ret < 0) {
fprintf(stderr, "io_uring_queue_init_params failed: %d (%s)\n",
ret, strerror(-ret));
return ret;
}
unsigned cq_size = *ctx->ring.cq.kring_entries;
printf("Kernel CQ size: %u\n", cq_size);
if (ret != 0)
{
io_uring_queue_exit(&ctx->ring);
return -ret;
}
return 0;
}
void iouring_shutdown(iouring_ctx_t *ctx)
{
io_uring_queue_exit(&ctx->ring);
}
void harvest_cqes(iouring_ctx_t *ctx)
{
struct io_uring_cqe *cqe;
unsigned head;
int cq_count = 0;
// 使用 for_each_cqe 薅干净 CQ
io_uring_for_each_cqe(&ctx->ring, head, cqe) {
task_t *done = (task_t *)(uintptr_t)cqe->user_data;
task_finish(done, cqe->res);
if (cqe->res < 0) {
fprintf(stderr, "write fail: fd=%d res=%d\n", done->fd, cqe->res);
}
// 直接 destroy单线程无需全局队列
task_destroy(done);
cq_count++;
}
if (cq_count > 0) {
// printf("harvest cq:%d\n", cq_count);
io_uring_cq_advance(&ctx->ring, cq_count);
}
// 检查 CQ overflow保险
if (*ctx->ring.sq.kflags & IORING_SQ_CQ_OVERFLOW) {
fprintf(stderr, "FATAL: CQ overflow detected!\n");
abort();
}
}
task_t* submit_write(iouring_ctx_t *ctx, int fd, void **bufs, size_t *lens, int count, off_t off){
if (!bufs || !lens || count <= 0) return NULL;
task_t *t = (task_t *)kvs_malloc(sizeof(task_t));
task_init(t);
t->op = TASK_WRITE;
t->fd = fd;
t->off = off;
t->iovs = (struct iovec *)kvs_malloc(sizeof(struct iovec) * count);
if(!t->iovs) {
kvs_free(t);
return NULL;
}
for(int i = 0;i < count; ++ i){
size_t len = lens[i];
void *buf = kvs_malloc(len);
if(!buf){
for(int j = 0; j < i; ++j){
if(t->iovs[j].iov_base) kvs_free(t->iovs[j].iov_base);
}
kvs_free(t->iovs);
kvs_free(t);
return NULL;
}
memcpy(buf, bufs[i], len);
t->iovs[i].iov_base = buf;
t->iovs[i].iov_len = len;
}
t->iovcnt = count;
harvest_cqes(ctx);
if(!ctx->head){
ctx->head = t;
ctx->tail = t;
}else{
ctx->tail->next = t;
ctx->tail = t;
}
int submitted = 0;
while(true){
task_t *cur = ctx->head;
if(!cur){
break;
}
ctx->head = cur->next;
if (!ctx->head) {
ctx->tail = NULL;
}
cur->next = NULL;
struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring);
if (!sqe) {
break;
}
io_uring_prep_writev(sqe, cur->fd, cur->iovs, cur->iovcnt, cur->off);
sqe->user_data = (uint64_t)(uintptr_t)cur;
submitted++;
}
if(submitted > 0){
int ret = io_uring_submit(&ctx->ring);
}
return t;
}
void iouring_tick(iouring_ctx_t *ctx) {
harvest_cqes(ctx);
int submitted = 0;
while(ctx->head){
struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring);
if (!sqe) {
break;
}
task_t *cur = ctx->head;
ctx->head = cur->next;
if (!ctx->head) {
ctx->tail = NULL;
}
cur->next = NULL;
io_uring_prep_writev(sqe, cur->fd, cur->iovs, cur->iovcnt, cur->off);
sqe->user_data = (uint64_t)(uintptr_t)cur;
submitted++;
}
if(submitted > 0){
int ret = io_uring_submit(&ctx->ring);
}
}

View File

@@ -18,7 +18,6 @@ typedef struct task {
int fd;
off_t off;
int refcount;
int res; // cqe->res
int done; // 0/1
@@ -49,7 +48,6 @@ typedef struct {
pthread_mutex_t lock;
} destroy_queue_t;
int iouring_register_fd(iouring_ctx_t *ctx, int fd);
void task_init(task_t *t);
void task_finish(task_t *t, int res);
@@ -61,6 +59,7 @@ int iouring_init(iouring_ctx_t *ctx, unsigned entries);
void iouring_shutdown(iouring_ctx_t *ctx);
task_t* submit_write(iouring_ctx_t *ctx, int fd, void **bufs, size_t *lens, int count, off_t off);
int uring_task_complete(iouring_ctx_t *ctx);
void cleanup_finished_iouring_tasks();

View File

@@ -0,0 +1,50 @@
#ifndef __DISK_IOURING_H__
#define __DISK_IOURING_H__
#include <liburing.h>
#include <pthread.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
#define BATCH_SIZE 256
typedef enum { TASK_READ, TASK_WRITE } task_op_t;
typedef struct task {
task_op_t op;
int fd;
off_t off;
int res; // cqe->res
int done; // 0/1
struct iovec *iovs; // iovec 数组
int iovcnt; // iovec 数量
struct task *next;
} task_t;
typedef struct {
struct io_uring ring;
int pending_count;
task_t *head;
task_t *tail;
} iouring_ctx_t;
void task_init(task_t *t);
void task_finish(task_t *t, int res);
void task_destroy(task_t *t);
int iouring_init(iouring_ctx_t *ctx, unsigned entries);
void iouring_shutdown(iouring_ctx_t *ctx);
task_t* submit_write(iouring_ctx_t *ctx, int fd, void **bufs, size_t *lens, int count, off_t off);
void iouring_tick(iouring_ctx_t *ctx);
extern iouring_ctx_t global_uring_ctx;
#endif