#include "kvstore.h" #include "kvs_rw_tools.h" #include "memory/alloc_dispatch.h" #include "diskuring/diskuring.h" /* ============================================================================ * 内存布局说明: * ============================================================================ * 每个节点的内存结构(单一连续块): * * +------ 固定头部 (24字节) -------+------ 动态数据 -------+ * | color | right | left | parent | key_len | value_len | key | value | * | 1字节 |8字节 |8字节 |8字节 | 4字节 | 4字节 | k字节| v字节 | * +---------- 共32字节 ------+--- key_len + value_len 字节 ---+ * * 总大小 = sizeof(rbtree_node_fixed) + key_len + value_len * ============================================================================ */ // ============================================================================ // 辅助函数:计算节点所需的总大小 // ============================================================================ static inline size_t rbtree_node_size(uint32_t key_len, uint32_t value_len) { return sizeof(rbtree_node_fixed) + key_len + value_len; } // ============================================================================ // 辅助函数:获取节点内的key指针 // ============================================================================ static inline uint8_t* rbtree_node_get_key(rbtree_node *node) { if (!node || node->key_len == 0) return NULL; return (uint8_t *)node + sizeof(rbtree_node_fixed); } // ============================================================================ // 辅助函数:获取节点内的value指针 // ============================================================================ static inline uint8_t* rbtree_node_get_value(rbtree_node *node) { if (!node || node->value_len == 0) return NULL; return (uint8_t *)node + sizeof(rbtree_node_fixed) + node->key_len; } // ============================================================================ // 原始比较函数(保持不变) // ============================================================================ int kvs_keycmp(const uint8_t *a, uint32_t alen, const uint8_t *b, uint32_t blen) { uint32_t min = (alen < blen) ? alen : blen; int r = 0; if (min > 0) { r = memcmp(a, b, min); if (r != 0) return r; } // 前缀相同,则短的更小 if (alen < blen) return -1; if (alen > blen) return 1; return 0; } rbtree_node *rbtree_mini(rbtree *T, rbtree_node *x) { while (x->left != T->nil) { x = x->left; } return x; } rbtree_node *rbtree_maxi(rbtree *T, rbtree_node *x) { while (x->right != T->nil) { x = x->right; } return x; } rbtree_node *rbtree_successor(rbtree *T, rbtree_node *x) { rbtree_node *y = x->parent; if (x->right != T->nil) { return rbtree_mini(T, x->right); } while ((y != T->nil) && (x == y->right)) { x = y; y = y->parent; } return y; } void rbtree_left_rotate(rbtree *T, rbtree_node *x) { rbtree_node *y = x->right; // x --> y , y --> x, right --> left, left --> right x->right = y->left; //1 1 if (y->left != T->nil) { //1 2 y->left->parent = x; } y->parent = x->parent; //1 3 if (x->parent == T->nil) { //1 4 T->root = y; } else if (x == x->parent->left) { x->parent->left = y; } else { x->parent->right = y; } y->left = x; //1 5 x->parent = y; //1 6 } void rbtree_right_rotate(rbtree *T, rbtree_node *y) { rbtree_node *x = y->left; y->left = x->right; if (x->right != T->nil) { x->right->parent = y; } x->parent = y->parent; if (y->parent == T->nil) { T->root = x; } else if (y == y->parent->right) { y->parent->right = x; } else { y->parent->left = x; } x->right = y; y->parent = x; } void rbtree_insert_fixup(rbtree *T, rbtree_node *z) { while (z->parent->color == RED) { //z ---> RED if (z->parent == z->parent->parent->left) { rbtree_node *y = z->parent->parent->right; if (y->color == RED) { z->parent->color = BLACK; y->color = BLACK; z->parent->parent->color = RED; z = z->parent->parent; //z --> RED } else { if (z == z->parent->right) { z = z->parent; rbtree_left_rotate(T, z); } z->parent->color = BLACK; z->parent->parent->color = RED; rbtree_right_rotate(T, z->parent->parent); } }else { rbtree_node *y = z->parent->parent->left; if (y->color == RED) { z->parent->color = BLACK; y->color = BLACK; z->parent->parent->color = RED; z = z->parent->parent; //z --> RED } else { if (z == z->parent->left) { z = z->parent; rbtree_right_rotate(T, z); } z->parent->color = BLACK; z->parent->parent->color = RED; rbtree_left_rotate(T, z->parent->parent); } } } T->root->color = BLACK; } int rbtree_insert(rbtree *T, rbtree_node *z) { rbtree_node *y = T->nil; rbtree_node *x = T->root; while (x != T->nil) { y = x; uint8_t *xkey = rbtree_node_get_key(x); uint8_t *zkey = rbtree_node_get_key(z); int c = kvs_keycmp(zkey, z->key_len, xkey, x->key_len); if (c < 0) { x = x->left; } else if (c > 0) { x = x->right; } else { return -1; } } z->parent = y; if (y == T->nil) { T->root = z; }else{ uint8_t *ykey = rbtree_node_get_key(y); uint8_t *zkey = rbtree_node_get_key(z); int c = kvs_keycmp(zkey, z->key_len, ykey, y->key_len); if (c < 0) y->left = z; else y->right = z; } z->left = T->nil; z->right = T->nil; z->color = RED; rbtree_insert_fixup(T, z); return 0; } void rbtree_delete_fixup(rbtree *T, rbtree_node *x) { while ((x != T->root) && (x->color == BLACK)) { if (x == x->parent->left) { rbtree_node *w= x->parent->right; if (w->color == RED) { w->color = BLACK; x->parent->color = RED; rbtree_left_rotate(T, x->parent); w = x->parent->right; } if ((w->left->color == BLACK) && (w->right->color == BLACK)) { w->color = RED; x = x->parent; } else { if (w->right->color == BLACK) { w->left->color = BLACK; w->color = RED; rbtree_right_rotate(T, w); w = x->parent->right; } w->color = x->parent->color; x->parent->color = BLACK; w->right->color = BLACK; rbtree_left_rotate(T, x->parent); x = T->root; } } else { rbtree_node *w = x->parent->left; if (w->color == RED) { w->color = BLACK; x->parent->color = RED; rbtree_right_rotate(T, x->parent); w = x->parent->left; } if ((w->left->color == BLACK) && (w->right->color == BLACK)) { w->color = RED; x = x->parent; } else { if (w->left->color == BLACK) { w->right->color = BLACK; w->color = RED; rbtree_left_rotate(T, w); w = x->parent->left; } w->color = x->parent->color; x->parent->color = BLACK; w->left->color = BLACK; rbtree_right_rotate(T, x->parent); x = T->root; } } } x->color = BLACK; } rbtree_node *rbtree_delete(rbtree *T, rbtree_node *z) { rbtree_node *y = T->nil; rbtree_node *x = T->nil; if ((z->left == T->nil) || (z->right == T->nil)) { y = z; } else { y = rbtree_successor(T, z); } if (y->left != T->nil) { x = y->left; } else if (y->right != T->nil) { x = y->right; } x->parent = y->parent; if (y->parent == T->nil) { T->root = x; } else if (y == y->parent->left) { y->parent->left = x; } else { y->parent->right = x; } if (y != z) { // 交换键值:由于键和值内嵌在节点内存中,需要交换内存内容 // 注意:这里假设 z 的内存大小足够容纳 y 的数据 // 更安全的做法是只交换指针或重新分配 // 保存原始长度 uint32_t z_klen = z->key_len; uint32_t z_vlen = z->value_len; uint32_t y_klen = y->key_len; uint32_t y_vlen = y->value_len; uint8_t *z_key = rbtree_node_get_key(z); uint8_t *z_val = rbtree_node_get_value(z); uint8_t *y_key = rbtree_node_get_key(y); uint8_t *y_val = rbtree_node_get_value(y); // 如果长度相同,直接交换内存 if (z_klen == y_klen && z_vlen == y_vlen) { if (z_klen > 0) memcpy(z_key, y_key, z_klen); if (z_vlen > 0) { uint8_t tmp[z_vlen]; memcpy(tmp, z_val, z_vlen); memcpy(z_val, y_val, z_vlen); memcpy(y_val, tmp, z_vlen); } if (z_klen > 0) { uint8_t tmp[z_klen]; memcpy(tmp, z_key, z_klen); memcpy(z_key, y_key, z_klen); memcpy(y_key, tmp, z_klen); } } else { // 长度不同时,只能交换值的拷贝(保留长度不变) // 这是一个限制,实际应用中需要重新分配更大的节点 if (z_klen == y_klen && z_klen > 0) { uint8_t tmp[z_klen]; memcpy(tmp, z_key, z_klen); memcpy(z_key, y_key, z_klen); memcpy(y_key, tmp, z_klen); } if (z_vlen == y_vlen && z_vlen > 0) { uint8_t tmp[z_vlen]; memcpy(tmp, z_val, z_vlen); memcpy(z_val, y_val, z_vlen); memcpy(y_val, tmp, z_vlen); } } } if (y->color == BLACK) { rbtree_delete_fixup(T, x); } return y; } rbtree_node *rbtree_search(rbtree *T, const uint8_t *key, uint32_t keylen) { rbtree_node *node = T->root; while (node != T->nil) { uint8_t *node_key = rbtree_node_get_key(node); int c = kvs_keycmp(key, keylen, node_key, node->key_len); if (c < 0) node = node->left; else if (c > 0) node = node->right; else return node; } return T->nil; } void rbtree_traversal(rbtree *T, rbtree_node *node) { if (node != T->nil) { rbtree_traversal(T, node->left); uint8_t *key = rbtree_node_get_key(node); printf("key:%s, color:%d\n", (char*)key, node->color); rbtree_traversal(T, node->right); } } typedef struct _rbtree kvs_rbtree_t; kvs_rbtree_t global_rbtree; // ============================================================================ // 创建红黑树 // ============================================================================ int kvs_rbtree_create(kvs_rbtree_t *inst) { if (inst == NULL) return 1; // nil 节点:特殊的哨兵节点,也使用优化的分配 inst->nil = (rbtree_node*)kvs_malloc(sizeof(rbtree_node_fixed)); if (!inst->nil) return 2; inst->nil->color = BLACK; inst->nil->left = inst->nil->right = inst->nil->parent = inst->nil; inst->nil->key_len = 0; inst->nil->value_len = 0; inst->root = inst->nil; return 0; } void kvs_rbtree_destroy(kvs_rbtree_t *inst) { if (inst == NULL) return ; rbtree_node *node = NULL; while (inst->root != inst->nil) { rbtree_node *mini = rbtree_mini(inst, inst->root); rbtree_node *cur = rbtree_delete(inst, mini); if (cur != inst->nil) { kvs_free(cur); // 只需释放节点本身,key和value已内嵌 } } kvs_free(inst->nil); inst->nil = NULL; inst->root = NULL; return ; } /* * @return: <0 error; 0 success; 1 exist */ int kvs_rbtree_set(kvs_rbtree_t *inst, const void *key, uint32_t key_len, const void *value, uint32_t value_len) { if (!inst || !key || !value) return -1; // 1. 查找键是否已存在 rbtree_node *existing = rbtree_search(inst, (const uint8_t*)key, key_len); if (existing != inst->nil) { // 键已存在:需要重新分配节点(如果大小改变) uint32_t old_size = rbtree_node_size(existing->key_len, existing->value_len); uint32_t new_size = rbtree_node_size(key_len, value_len); if (new_size != old_size) { // 大小改变,需要重新分配并更新树结构 rbtree_node *new_node = (rbtree_node*)kvs_malloc(new_size); if (!new_node) return -2; // 复制固定部分(除了 key_len 和 value_len) new_node->color = existing->color; new_node->right = existing->right; new_node->left = existing->left; new_node->parent = existing->parent; new_node->key_len = key_len; new_node->value_len = value_len; // 复制 key 和 value uint8_t *new_key = rbtree_node_get_key(new_node); uint8_t *new_val = rbtree_node_get_value(new_node); if (key_len > 0) memcpy(new_key, key, key_len); if (value_len > 0) memcpy(new_val, value, value_len); // 更新父节点的指针 if (existing->parent != inst->nil) { if (existing->parent->left == existing) { existing->parent->left = new_node; } else { existing->parent->right = new_node; } } else { inst->root = new_node; } // 更新子节点的父指针 if (new_node->left != inst->nil) { new_node->left->parent = new_node; } if (new_node->right != inst->nil) { new_node->right->parent = new_node; } kvs_free(existing); } else { // 大小相同,直接更新值 uint8_t *val = rbtree_node_get_value(existing); if (value_len > 0) memcpy(val, value, value_len); } return 0; } // 2. 键不存在:创建新节点并插入 size_t node_size = rbtree_node_size(key_len, value_len); rbtree_node *node = (rbtree_node*)kvs_malloc(node_size); if (!node) return -2; memset(node, 0, node_size); node->key_len = key_len; node->value_len = value_len; uint8_t *node_key = rbtree_node_get_key(node); uint8_t *node_val = rbtree_node_get_value(node); if (key_len > 0) memcpy(node_key, key, key_len); if (value_len > 0) memcpy(node_val, value, value_len); if (rbtree_insert(inst, node) < 0) { // 插入失败,释放资源 kvs_free(node); return -2; } return 0; } /* * @return: NULL not exist, NOTNULL exist。out_value_len 是长度。 */ void* kvs_rbtree_get(kvs_rbtree_t *inst, const void *key, uint32_t key_len, uint32_t *out_valuelen) { if (!inst || !key || key_len == 0 || !out_valuelen) return NULL; rbtree_node *node = rbtree_search(inst, (const uint8_t *)key, key_len); if (!node || node == inst->nil) return NULL; *out_valuelen = node->value_len; return (void*)rbtree_node_get_value(node); } /* * @return <0 error; =0 success; >0 no exist */ int kvs_rbtree_del(rbtree *inst, const void *key, uint32_t key_len) { if (!inst || !key || key_len == 0) return -1; rbtree_node *node = rbtree_search(inst, (const uint8_t *)key, key_len); if (!node || node == inst->nil) return 1; rbtree_node *cur = rbtree_delete(inst, node); if (cur != inst->nil) { kvs_free(cur); // 只需释放节点本身 } return 0; } /* * @return < 0 error; =0 success; >0 no exist */ int kvs_rbtree_mod(kvs_rbtree_t *inst, const void *key, uint32_t key_len, const void *value, uint32_t value_len) { if (!inst || !key || key_len==0 || !value) return -1; rbtree_node *node = rbtree_search(inst, (const uint8_t *)key, key_len); if (!node || node == inst->nil) return 1; // 如果新的 value_len 与旧的相同,可以直接覆盖 if (node->value_len == value_len) { uint8_t *val = rbtree_node_get_value(node); if (value_len > 0) memcpy(val, value, value_len); return 0; } // 否则需要重新分配节点 uint32_t new_size = rbtree_node_size(key_len, value_len); rbtree_node *new_node = (rbtree_node*)kvs_malloc(new_size); if (!new_node) return -2; // 复制所有内容 uint8_t *old_key = rbtree_node_get_key(node); uint8_t *new_key = rbtree_node_get_key(new_node); uint8_t *new_val = rbtree_node_get_value(new_node); new_node->color = node->color; new_node->left = node->left; new_node->right = node->right; new_node->parent = node->parent; new_node->key_len = node->key_len; new_node->value_len = value_len; if (key_len > 0) memcpy(new_key, old_key, key_len); if (value_len > 0) memcpy(new_val, value, value_len); // 更新父节点指针 if (node->parent != inst->nil) { if (node->parent->left == node) { node->parent->left = new_node; } else { node->parent->right = new_node; } } else { inst->root = new_node; } // 更新子节点的父指针 if (new_node->left != inst->nil) { new_node->left->parent = new_node; } if (new_node->right != inst->nil) { new_node->right->parent = new_node; } kvs_free(node); return 0; } /* * @return =0 exist, =1 no exist */ int kvs_rbtree_exist(kvs_rbtree_t *inst, const void *key, uint32_t key_len) { if (!inst || !key || key_len == 0) return -1; rbtree_node *node = rbtree_search(inst, (const uint8_t*)key, key_len); if (!node || node == inst->nil) return 1; return 0; } static int kvs_rbtree_save_node(iouring_ctx_t *uring, int fd, off_t *current_off, kvs_rbtree_t *inst, rbtree_node *node) { if (!current_off || !inst || !node) return -1; if (node == inst->nil) return 0; int rc = 0; rc = kvs_rbtree_save_node(uring, fd, current_off, inst, node->left); if (rc < 0) return rc; uint32_t klen = htonl(node->key_len); uint32_t vlen = htonl(node->value_len); void *bufs[4]; size_t lens[4]; int count = 0; bufs[count] = &klen; lens[count] = sizeof(klen); count++; bufs[count] = &vlen; lens[count] = sizeof(vlen); count++; uint8_t *node_key = rbtree_node_get_key(node); if (node->key_len > 0) { bufs[count] = node_key; lens[count] = node->key_len; count++; } uint8_t *node_val = rbtree_node_get_value(node); if (node->value_len > 0) { bufs[count] = node_val; lens[count] = node->value_len; count++; } size_t total = 0; for (int i = 0; i < count; i++) total += lens[i]; task_t *t = submit_write(uring, fd, bufs, lens, count, *current_off); cleanup_finished_iouring_tasks(uring); if(!t) { perror("task init failed"); return -4; } *current_off += (off_t) total; rc = kvs_rbtree_save_node(uring, fd, current_off, inst, node->right); if (rc < 0) return rc; return 0; } // 0 success, <0 error int kvs_rbtree_save(iouring_ctx_t *uring, kvs_rbtree_t *inst, const char* filename){ if (!uring || !inst || !filename) return -1; int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); if(fd < 0) return -2; off_t current_off = 0; int rc = kvs_rbtree_save_node(uring, fd, ¤t_off, inst, inst->root); while (!uring_task_complete(uring)) { usleep(1000); cleanup_finished_iouring_tasks(uring); } close(fd); return rc; } int kvs_rbtree_load(kvs_rbtree_t *inst, const char* filename){ if (!inst || !filename) return -1; FILE *fp = fopen(filename, "rb"); if (!fp) return -2; for (;;) { uint32_t klen_n = 0, vlen_n = 0; if (kvs_read_file(fp, &klen_n, 4) < 0) { fclose(fp); return -3; } if (kvs_read_file(fp, &vlen_n, 4) < 0) { fclose(fp); return -3; } uint32_t klen = ntohl(klen_n); uint32_t vlen = ntohl(vlen_n); if (klen == 0) { fclose(fp); return -3; } // 分配单一块内存,包含节点和键值 size_t node_size = rbtree_node_size(klen, vlen); rbtree_node *node = (rbtree_node*)kvs_malloc(node_size); if (!node) { fclose(fp); return -4; } memset(node, 0, node_size); node->key_len = klen; node->value_len = vlen; uint8_t *keybuf = rbtree_node_get_key(node); if (kvs_read_file(fp, keybuf, (size_t)klen) < 0) { kvs_free(node); fclose(fp); return -3; } uint8_t *valbuf = NULL; if (vlen > 0) { valbuf = rbtree_node_get_value(node); if (kvs_read_file(fp, valbuf, (size_t)vlen) < 0) { kvs_free(node); fclose(fp); return -3; } } // 使用原生 rbtree_insert 而非 kvs_rbtree_set // 因为 kvs_rbtree_set 会重新分配节点 if (rbtree_insert(inst, node) < 0) { kvs_free(node); fclose(fp); return -5; } } fclose(fp); return 0; }