落盘机制修改

This commit is contained in:
2026-03-06 11:54:30 +00:00
parent c4e9bedd0a
commit 2e6baf0efe
14 changed files with 1302 additions and 443 deletions

467
kvstore.c
View File

@@ -14,12 +14,12 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <errno.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <libxml/parser.h>
#include <limits.h>
#include <pthread.h>
#include <errno.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <libxml/parser.h>
#include <limits.h>
#define TIME_COLLECT 0
@@ -41,147 +41,147 @@ void __completed_cmd(const uint8_t *cmd, size_t len, unsigned long long seq){
}
#include <sys/time.h>
#define TIME_SUB_MS(tv1, tv2) ((tv1.tv_sec - tv2.tv_sec) * 1000 + (tv1.tv_usec - tv2.tv_usec) / 1000)
#define TIME_SUB_US(tv1, tv2) ((tv1.tv_sec - tv2.tv_sec) * 1000000 + (tv1.tv_usec - tv2.tv_usec))
static int checked_size_add(size_t a, size_t b, size_t *out) {
if (!out || a > SIZE_MAX - b) {
return -1;
}
*out = a + b;
return 0;
}
static int resp_value_encoded_len(const resp_value_t *v, size_t *out_len) {
size_t len = 0;
if (!v || !out_len) {
return -1;
}
switch (v->type) {
case RESP_T_SIMPLE_STR:
case RESP_T_ERROR:
if (checked_size_add(1, (size_t)v->bulk.len, &len) < 0 ||
checked_size_add(len, 2, &len) < 0) {
return -1;
}
break;
case RESP_T_INTEGER: {
char tmp[64];
int n = snprintf(tmp, sizeof(tmp), "%lld", (long long)v->i64);
if (n <= 0) {
return -1;
}
if (checked_size_add(1, (size_t)n, &len) < 0 ||
checked_size_add(len, 2, &len) < 0) {
return -1;
}
break;
}
case RESP_T_NIL:
len = 5; /* "$-1\r\n" */
break;
case RESP_T_BULK_STR: {
char tmp[32];
int n;
size_t t;
if (v->bulk.len > 0 && !v->bulk.ptr) {
return -1;
}
n = snprintf(tmp, sizeof(tmp), "%u", (unsigned)v->bulk.len);
if (n <= 0) {
return -1;
}
if (checked_size_add(1, (size_t)n, &t) < 0 || /* '$' + len digits */
checked_size_add(t, 2, &t) < 0 || /* \r\n */
checked_size_add(t, (size_t)v->bulk.len, &t) < 0 ||
checked_size_add(t, 2, &len) < 0) { /* trailing \r\n */
return -1;
}
break;
}
default:
return -1;
}
*out_len = len;
return 0;
}
static int flush_pending_response(struct conn *conn, uint8_t *buf, size_t *out_len) {
if (!conn || !buf || !out_len) {
return -1;
}
if (*out_len == 0) {
return 0;
}
if (chain_buffer_append(&conn->wbuf, buf, *out_len) < 0) {
return -1;
}
*out_len = 0;
return 0;
}
static int is_update_cmd(const resp_cmd_t *cmd) {
const resp_slice_t *c0;
if (!cmd || cmd->argc == 0 || !cmd->argv[0].ptr || cmd->argv[0].len == 0) {
return 0;
}
c0 = &cmd->argv[0];
return ascii_casecmp(c0->ptr, c0->len, "SET") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "DEL") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "MOD") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "RSET") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "RDEL") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "RMOD") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "HSET") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "HDEL") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "HMOD") == 0;
}
int kvs_protocol(struct conn* conn){
#if TIME_COLLECT == 1
struct timeval func_start;
gettimeofday(&func_start, NULL);
long total_oplog_us = 0;
#include <sys/time.h>
#define TIME_SUB_MS(tv1, tv2) ((tv1.tv_sec - tv2.tv_sec) * 1000 + (tv1.tv_usec - tv2.tv_usec) / 1000)
#define TIME_SUB_US(tv1, tv2) ((tv1.tv_sec - tv2.tv_sec) * 1000000 + (tv1.tv_usec - tv2.tv_usec))
static int checked_size_add(size_t a, size_t b, size_t *out) {
if (!out || a > SIZE_MAX - b) {
return -1;
}
*out = a + b;
return 0;
}
static int resp_value_encoded_len(const resp_value_t *v, size_t *out_len) {
size_t len = 0;
if (!v || !out_len) {
return -1;
}
switch (v->type) {
case RESP_T_SIMPLE_STR:
case RESP_T_ERROR:
if (checked_size_add(1, (size_t)v->bulk.len, &len) < 0 ||
checked_size_add(len, 2, &len) < 0) {
return -1;
}
break;
case RESP_T_INTEGER: {
char tmp[64];
int n = snprintf(tmp, sizeof(tmp), "%lld", (long long)v->i64);
if (n <= 0) {
return -1;
}
if (checked_size_add(1, (size_t)n, &len) < 0 ||
checked_size_add(len, 2, &len) < 0) {
return -1;
}
break;
}
case RESP_T_NIL:
len = 5; /* "$-1\r\n" */
break;
case RESP_T_BULK_STR: {
char tmp[32];
int n;
size_t t;
if (v->bulk.len > 0 && !v->bulk.ptr) {
return -1;
}
n = snprintf(tmp, sizeof(tmp), "%u", (unsigned)v->bulk.len);
if (n <= 0) {
return -1;
}
if (checked_size_add(1, (size_t)n, &t) < 0 || /* '$' + len digits */
checked_size_add(t, 2, &t) < 0 || /* \r\n */
checked_size_add(t, (size_t)v->bulk.len, &t) < 0 ||
checked_size_add(t, 2, &len) < 0) { /* trailing \r\n */
return -1;
}
break;
}
default:
return -1;
}
*out_len = len;
return 0;
}
static int flush_pending_response(struct conn *conn, uint8_t *buf, size_t *out_len) {
if (!conn || !buf || !out_len) {
return -1;
}
if (*out_len == 0) {
return 0;
}
if (chain_buffer_append(&conn->wbuf, buf, *out_len) < 0) {
return -1;
}
*out_len = 0;
return 0;
}
static int is_update_cmd(const resp_cmd_t *cmd) {
const resp_slice_t *c0;
if (!cmd || cmd->argc == 0 || !cmd->argv[0].ptr || cmd->argv[0].len == 0) {
return 0;
}
c0 = &cmd->argv[0];
return ascii_casecmp(c0->ptr, c0->len, "SET") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "DEL") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "MOD") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "RSET") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "RDEL") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "RMOD") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "HSET") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "HDEL") == 0 ||
ascii_casecmp(c0->ptr, c0->len, "HMOD") == 0;
}
int kvs_protocol(struct conn* conn){
#if TIME_COLLECT == 1
struct timeval func_start;
gettimeofday(&func_start, NULL);
long total_oplog_us = 0;
#endif
if (!conn) return -1;
size_t request_size = 0;
const uint8_t *request = chain_buffer_linearize(&conn->rbuf, &request_size);
if (!request || request_size == 0) return 0;
if (request_size > (size_t)INT_MAX) return -1;
int request_length = (int)request_size;
uint8_t response[KVS_MAX_RESPONSE];
int consumed = 0;
size_t out_len = 0;
while(consumed < request_length ){
const uint8_t *p = request+consumed;
int remain = request_length - consumed;
if (!conn) return -1;
size_t request_size = 0;
const uint8_t *request = chain_buffer_linearize(&conn->rbuf, &request_size);
if (!request || request_size == 0) return 0;
if (request_size > (size_t)INT_MAX) return -1;
int request_length = (int)request_size;
uint8_t response[KVS_MAX_RESPONSE];
int consumed = 0;
size_t out_len = 0;
while(consumed < request_length ){
const uint8_t *p = request+consumed;
int remain = request_length - consumed;
resp_cmd_t cmd;
memset(&cmd, 0, sizeof(cmd));
int len = resp_parse_one_cmd(p, remain, &cmd);
if(len < 0){
/* 协议错误:直接返回 */
return -1;
}
int len = resp_parse_one_cmd(p, remain, &cmd);
if(len < 0){
/* 协议错误:直接返回 */
return -1;
}
else if(len == 0){
// 半包
break;
@@ -192,11 +192,6 @@ int kvs_protocol(struct conn* conn){
int dr = resp_dispatch(&cmd, &val);
// if(global_cfg.persistence == PERSIST_INCREMENTAL){
// kvs_oplog_append(p, len, global_oplog_fd);
// }
/*
* 语义建议:
* - resp_dispatch() 即使返回 -1比如 unknown command / wrong argc
@@ -207,69 +202,22 @@ int kvs_protocol(struct conn* conn){
struct timeval oplog_start, oplog_end;
gettimeofday(&oplog_start, NULL);
#endif
// if(dr < 0){
// if (val.type != RESP_T_SIMPLE_STR &&
// val.type != RESP_T_ERROR &&
// val.type != RESP_T_INTEGER &&
// val.type != RESP_T_BULK_STR &&
// val.type != RESP_T_NIL) {
// val = resp_error("ERR dispatch failed");
// }
// } else {
// // persist into oplog
// /* 执行成功:在这里保存到日志中(只记录更新类命令) */
// if (cmd.argc > 0 && cmd.argv[0].ptr) {
// /* 更新类命令SET/DEL/MOD/RSET/RDEL/RMOD/HSET/HDEL/HMOD/SAVE */
// const resp_slice_t *c0 = &cmd.argv[0];
// int is_update = 0;
// if (c0->ptr && c0->len) {
// if (ascii_casecmp(c0->ptr, c0->len, "SET") == 0 ||
// ascii_casecmp(c0->ptr, c0->len, "DEL") == 0 ||
// ascii_casecmp(c0->ptr, c0->len, "MOD") == 0 ||
// ascii_casecmp(c0->ptr, c0->len, "RSET") == 0 ||
// ascii_casecmp(c0->ptr, c0->len, "RDEL") == 0 ||
// ascii_casecmp(c0->ptr, c0->len, "RMOD") == 0 ||
// ascii_casecmp(c0->ptr, c0->len, "HSET") == 0 ||
// ascii_casecmp(c0->ptr, c0->len, "HDEL") == 0 ||
// ascii_casecmp(c0->ptr, c0->len, "HMOD") == 0) {
// is_update = 1;
// }
// }
// if (is_update) {
// if(global_cfg.persistence == PERSIST_INCREMENTAL){
// kvs_oplog_append(p, len, global_oplog_fd);
// }
int need_persist = is_update_cmd(&cmd);
// // __completed_cmd(p, len, global_seq);
// // global_seq ++;
// if (global_cfg.replica_mode == REPLICA_ENABLE) {
// uint32_t off = 0;
// int ar = replica_shm_append(&g_rep_shm, global_seq, p, (uint32_t)len, &off);
// if (ar == 0) {
// // __replica_notify(global_seq, off, (uint32_t)len);
// global_seq++;
// } else {
// // shm 满或异常:你可以选择降级(比如直接跳过复制,或阻塞/丢弃)
// // 为了不影响主路径,这里先打印并跳过
// fprintf(stderr, "replica_shm_append failed %d\n", ar);
// }
// }
// }
// }
// }
int need_persist = is_update_cmd(&cmd);
if(global_cfg.persistence == PERSIST_INCREMENTAL && need_persist){
kvs_oplog_append(p, len, global_oplog_fd);
}
if(global_cfg.persistence == PERSIST_INCREMENTAL && need_persist){
int ar = kvs_oplog_buffer_append(p, (size_t)len, global_oplog_fd);
if (ar < 0) {
return -1;
}
if (ar == KVS_OPLOG_BUF_FULL && kvs_oplog_flush(global_oplog_fd, 0) < 0) {
return -1;
}
}
// __completed_cmd(p, len, global_seq);
// global_seq ++;
if (global_cfg.replica_mode == REPLICA_ENABLE && need_persist) {
if (global_cfg.replica_mode == REPLICA_ENABLE && need_persist) {
uint32_t off = 0;
int ar = replica_shm_append(&g_rep_shm, global_seq, p, (uint32_t)len, &off);
if (ar == 0) {
@@ -286,63 +234,69 @@ int kvs_protocol(struct conn* conn){
gettimeofday(&oplog_end, NULL);
total_oplog_us += (oplog_end.tv_sec - oplog_start.tv_sec) * 1000000 +
(oplog_end.tv_usec - oplog_start.tv_usec);
#endif
/* 构建响应 */
int resp_len = resp_build_value(&val, response + out_len, sizeof(response) - out_len);
if (resp_len < 0) {
/* 当前批次剩余空间不够,先把已拼好的刷到发送队列再重试 */
if (flush_pending_response(conn, response, &out_len) < 0) {
return -1;
}
resp_len = resp_build_value(&val, response, sizeof(response));
if (resp_len < 0) {
size_t resp_need = 0;
uint8_t *resp_heap = NULL;
if (resp_value_encoded_len(&val, &resp_need) < 0) {
return -1;
}
resp_heap = (uint8_t *)malloc(resp_need);
if (!resp_heap) {
return -1;
}
resp_len = resp_build_value(&val, resp_heap, resp_need);
if (resp_len < 0 ||
chain_buffer_append(&conn->wbuf, resp_heap, (size_t)resp_len) < 0) {
free(resp_heap);
return -1;
}
free(resp_heap);
resp_len = 0;
}
}
out_len += (size_t)resp_len;
__completed_cmd(request, consumed, 0);
consumed += len;
}
#endif
#if TIME_COLLECT == 1
/* 构建响应 */
int resp_len = resp_build_value(&val, response + out_len, sizeof(response) - out_len);
if (resp_len < 0) {
/* 当前批次剩余空间不够,先把已拼好的刷到发送队列再重试 */
if (flush_pending_response(conn, response, &out_len) < 0) {
return -1;
}
resp_len = resp_build_value(&val, response, sizeof(response));
if (resp_len < 0) {
size_t resp_need = 0;
uint8_t *resp_heap = NULL;
if (resp_value_encoded_len(&val, &resp_need) < 0) {
return -1;
}
resp_heap = (uint8_t *)kvs_malloc(resp_need);
if (!resp_heap) {
return -1;
}
resp_len = resp_build_value(&val, resp_heap, resp_need);
if (resp_len < 0 ||
chain_buffer_append(&conn->wbuf, resp_heap, (size_t)resp_len) < 0) {
free(resp_heap);
return -1;
}
free(resp_heap);
resp_len = 0;
}
}
out_len += (size_t)resp_len;
// __completed_cmd(request, consumed, 0);
consumed += len;
}
if (global_cfg.persistence == PERSIST_INCREMENTAL) {
if (kvs_oplog_flush(global_oplog_fd, 1) < 0) {
return -1;
}
}
#if TIME_COLLECT == 1
struct timeval func_end;
gettimeofday(&func_end, NULL);
long func_us = (func_end.tv_sec - func_start.tv_sec) * 1000000 +
(func_end.tv_usec - func_start.tv_usec);
fprintf(stderr, "kvs_protocol: total %ld us, oplog %ld us\n", func_us, total_oplog_us);
#endif
if (flush_pending_response(conn, response, &out_len) < 0) {
return -1;
}
return consumed;
}
fprintf(stderr, "kvs_protocol: total %ld us, oplog %ld us\n", func_us, total_oplog_us);
#endif
if (flush_pending_response(conn, response, &out_len) < 0) {
return -1;
}
return consumed;
}
@@ -461,6 +415,7 @@ int init_config(AppConfig *cfg){
printf("Persistence : %s\n", persistence_to_string(cfg->persistence));
printf("|—— Persist-dir : %s\n", cfg->persist_dir);
printf("|—— Persist-oplog : %s\n", cfg->oplog_file);
printf("|—— Oplog-sync : %s\n", oplog_sync_mode_to_string(cfg->oplog_sync_mode));
printf("|—— Persist-array : %s\n", cfg->array_file);
printf("|—— Persist-rbtree : %s\n", cfg->rbtree_file);
printf("|—— Persist-hash : %s\n", cfg->hash_file);