#include "zvfs.h" #undef SPDK_DEBUGLOG #define SPDK_DEBUGLOG(...) do {} while(0) struct spdk_thread *global_thread = NULL; const char *json_file = "/home/lian/share/10.1-spdk/zvfs/zvfs.json"; // mount void zvfs_do_mount(void *arg); void zvfs_spdk_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx); void zvfs_spdk_bs_load_cb(void *arg, struct spdk_blob_store *bs, int bserrno); void zvfs_spdk_bs_init_cb(void *arg, struct spdk_blob_store *bs, int bserrno); // create void zvfs_do_create(void *arg); void zvfs_spdk_bs_create_blob_cb(void *arg, spdk_blob_id blobid, int bserrno); void zvfs_spdk_bs_open_blob_cb(void *arg, struct spdk_blob *blb, int bserrno); void zvfs_spdk_blob_resize_cb(void *arg, int bserrno); void zvfs_spdk_blob_sync_cb(void *arg, int bserrno); // open void zvfs_do_open(void *arg); void zvfs_spdk_bs_open_blob_cb2(void *arg, struct spdk_blob *blb, int bserrno); // read void zvfs_do_read(void *arg); void zvfs_spdk_blob_read_cb(void *arg, int bserrno); // write void zvfs_do_write(void *arg); void zvfs_do_write_io(zvfs_file_t *file); void zvfs_spdk_blob_write_preread_cb(void *arg, int bserrno); void zvfs_spdk_blob_write_resize_cb(void *arg, int bserrno); void zvfs_spdk_blob_write_sync_cb(void *arg, int bserrno); void zvfs_spdk_blob_write_cb(void *arg, int bserrno); // close void zvfs_do_close(void *arg); void zvfs_spdk_blob_close_cb(void *arg, int bserrno); // delete void zvfs_do_delete(void *arg); void zvfs_spdk_blob_delete_cb(void *arg, int bserrno); // setup void zvfs_json_load_fn(void *arg); void json_app_load_done(int rc, void *ctx); // unmount void zvfs_do_umount(void *arg); void zvfs_spdk_bs_unload_cb(void *arg, int bserrno); /* ================================================================== */ /* HELPER */ /* ================================================================== */ static uint64_t zvfs_need_clusters(zvfs_t *fs, uint64_t end_byte) { uint64_t cluster_size = spdk_bs_get_cluster_size(fs->bs); return (end_byte + cluster_size - 1) / cluster_size; } /* ---------- 辅助:计算本次 IO 涉及的 LBA 范围 ---------- */ static void calc_lba_range(zvfs_file_t *file, uint64_t *out_lba, uint64_t *out_page_off, uint64_t *out_lba_count) { uint64_t io_unit = file->fs->io_unit_size; uint64_t off = file->current_offset; uint64_t cnt = file->io_count; *out_lba = off / io_unit; *out_page_off = off % io_unit; *out_lba_count = (*out_page_off + cnt + io_unit - 1) / io_unit; } /* ---------- 确保 dma_buf 足够大 ---------- */ static int ensure_dma_buf(zvfs_file_t *file, uint64_t need_bytes) { if (file->dma_buf && file->dma_buf_size >= need_bytes) return 0; if (file->dma_buf) spdk_free(file->dma_buf); file->dma_buf = spdk_malloc(need_bytes, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); if (!file->dma_buf) { file->dma_buf_size = 0; return -1; } file->dma_buf_size = need_bytes; return 0; } // waiter bool waiter(struct spdk_thread *thread, spdk_msg_fn start_fn, void *ctx, bool *finished) { spdk_thread_send_msg(thread, start_fn, ctx); int waiter_count = 0; do { spdk_thread_poll(thread, 0, 0); waiter_count ++; } while(!(*finished) && waiter_count < WAITER_MAX_TIME); if (!(*finished) && waiter_count >= WAITER_MAX_TIME) { return false; // timeout } return true; } /* ================================================================== */ /* MOUNT */ /* ================================================================== */ void zvfs_do_mount(void *arg) { zvfs_t *fs = (zvfs_t*)arg; struct spdk_bs_dev *bs_dev = NULL; int rc = spdk_bdev_create_bs_dev_ext("Malloc0", zvfs_spdk_bdev_event_cb, NULL, &bs_dev); if (rc != 0) { spdk_app_stop(0); } fs->bs_dev = bs_dev; // spdk_bs_init(bs_dev, NULL, zvfs_spdk_bs_init_cb, fs); spdk_bs_load(bs_dev, NULL, zvfs_spdk_bs_load_cb, fs); } void zvfs_spdk_bs_load_cb(void *arg, struct spdk_blob_store *bs, int bserrno) { zvfs_t *fs = (zvfs_t*)arg; if (bserrno != 0) { SPDK_DEBUGLOG("load failed, new device, re-create bs_dev and init\n"); struct spdk_bs_dev *bs_dev = NULL; int rc = spdk_bdev_create_bs_dev_ext("Malloc0", zvfs_spdk_bdev_event_cb, NULL, &bs_dev); if (rc != 0) { SPDK_ERRLOG("re-create bs_dev failed\n"); spdk_app_stop(-1); return; } fs->bs_dev = bs_dev; spdk_bs_init(fs->bs_dev, NULL, zvfs_spdk_bs_init_cb, fs); return; } uint64_t io_unit_size = spdk_bs_get_io_unit_size(bs); SPDK_DEBUGLOG("io_unit_size : %"PRIu64"\n", io_unit_size); fs->io_unit_size = io_unit_size; fs->bs = bs; fs->channel = spdk_bs_alloc_io_channel(fs->bs); if (fs->channel == NULL) { return ; } fs->finished = true; } void zvfs_spdk_bs_init_cb(void *arg, struct spdk_blob_store *bs, int bserrno) { zvfs_t *fs = (zvfs_t*)arg; uint64_t io_unit_size = spdk_bs_get_io_unit_size(bs); SPDK_DEBUGLOG("io_unit_size : %"PRIu64"\n", io_unit_size); fs->io_unit_size = io_unit_size; fs->bs = bs; fs->channel = spdk_bs_alloc_io_channel(fs->bs); if (fs->channel == NULL) { return ; } fs->finished = true; } void zvfs_spdk_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) { } /* ================================================================== */ /* CREATE */ /* ================================================================== */ void zvfs_do_create(void *arg) { zvfs_file_t *file = (zvfs_file_t *)arg; spdk_bs_create_blob(file->fs->bs, zvfs_spdk_bs_create_blob_cb, file); } void zvfs_spdk_bs_create_blob_cb(void *arg, spdk_blob_id blobid, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; file->blob_id = blobid; SPDK_DEBUGLOG("create blobid : %"PRIu64"\n", blobid); spdk_bs_open_blob(file->fs->bs, blobid, zvfs_spdk_bs_open_blob_cb, file); } void zvfs_spdk_bs_open_blob_cb(void *arg, struct spdk_blob *blb, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; if (bserrno) { SPDK_ERRLOG("load blob error: %d\n", bserrno); file->finished = true; return; } file->blob = blb; uint64_t free_cluster = spdk_bs_free_cluster_count(file->fs->bs); if(free_cluster == 0){ SPDK_ERRLOG("no free cluster: %d\n", bserrno); file->finished = true; return ; } spdk_blob_resize(blb, 1, zvfs_spdk_blob_resize_cb, file); } void zvfs_spdk_blob_resize_cb(void *arg, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; uint64_t total = spdk_blob_get_num_clusters(file->blob); SPDK_DEBUGLOG("resize blob :%"PRIu64"\n", total); if (file->dirent) { file->dirent->allocated_clusters = total; } spdk_blob_sync_md(file->blob, zvfs_spdk_blob_sync_cb, file); } void zvfs_spdk_blob_sync_cb(void *arg, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; file->dma_buf = spdk_malloc(BUFFER_SIZE, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); if (file->dma_buf == NULL) { SPDK_ERRLOG("spdk_malloc failed\n"); file->finished = true; return ; } file->dma_buf_size = BUFFER_SIZE; file->finished = true; } /* ================================================================== */ /* OPEN */ /* ================================================================== */ void zvfs_do_open(void *arg) { zvfs_file_t *file = (zvfs_file_t *)arg; spdk_bs_open_blob(file->fs->bs, file->blob_id, zvfs_spdk_bs_open_blob_cb2, file); } void zvfs_spdk_bs_open_blob_cb2(void *arg, struct spdk_blob *blb, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; if (bserrno) { SPDK_ERRLOG("load blob error: %d\n", bserrno); file->finished = true; return; } file->blob = blb; file->dma_buf = spdk_malloc(BUFFER_SIZE, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); if (!file->dma_buf) { SPDK_ERRLOG("spdk_malloc failed\n"); file->finished = true; return; } file->dma_buf_size = BUFFER_SIZE; file->finished = true; } /* ================================================================== */ /* READ */ /* ================================================================== */ void zvfs_do_read(void *arg) { zvfs_file_t *file = (zvfs_file_t *)arg; uint64_t io_unit = file->fs->io_unit_size; uint64_t offset = file->current_offset; uint64_t file_sz = file->dirent ? file->dirent->file_size : 0; /* EOF 检查 */ if (offset >= file_sz) { SPDK_DEBUGLOG("read: EOF\n"); file->io_count = 0; file->actual_io_count = 0; file->finished = true; return; } /* 截断到文件末尾 */ if (offset + file->io_count > file_sz){ file->io_count = file_sz - offset; } file->actual_io_count = file->io_count; uint64_t lba, page_off, lba_count; calc_lba_range(file, &lba, &page_off, &lba_count); uint64_t buf_need = lba_count * io_unit; if (ensure_dma_buf(file, buf_need) != 0) { SPDK_ERRLOG("ensure_dma_buf failed\n"); file->actual_io_count = 0; file->finished = true; return; } spdk_blob_io_read(file->blob, file->fs->channel, file->dma_buf, lba, lba_count, zvfs_spdk_blob_read_cb, file); } void zvfs_spdk_blob_read_cb(void *arg, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; if (bserrno) { SPDK_ERRLOG("blob_read error: %d\n", bserrno); file->io_count = 0; file->finished = true; return; } file->current_offset += file->io_count; SPDK_DEBUGLOG("read complete, new offset=%" PRIu64 "\n", file->current_offset); file->finished = true; } /* ================================================================== */ /* WRITE */ /* ================================================================== */ /** * 1. write 的 callback 链 * zvfs_do_write * └─→ 先用 spdk_blob_io_read 读出覆盖范围内的扇区 * └─→ zvfs_spdk_blob_write_preread_cb * (在 dma_buf 里 patch 新数据) * ├─(需扩容)─→ spdk_blob_resize * │ └─→ zvfs_spdk_blob_write_resize_cb * │ └─→ spdk_blob_sync_md * │ └─→ zvfs_spdk_blob_write_sync_cb * │ └─→ zvfs_do_write_io * │ └─→ zvfs_spdk_blob_write_cb * └─(不需扩容)─→ zvfs_do_write_io * └─→ zvfs_spdk_blob_write_cb */ /* Step 1 : 进入 write,先把覆盖范围内的扇区读出来(read-modify-write) */ void zvfs_do_write(void *arg) { zvfs_file_t *file = (zvfs_file_t *)arg; uint64_t io_unit = file->fs->io_unit_size; uint64_t lba, page_off, lba_count; calc_lba_range(file, &lba, &page_off, &lba_count); uint64_t buf_need = lba_count * io_unit; if (ensure_dma_buf(file, buf_need) != 0) { SPDK_ERRLOG("ensure_dma_buf failed\n"); file->finished = true; return; } /* * 先把涉及的扇区读出,read 完成后在 preread_cb 里 patch 数据再写。 * 注意:把用户数据暂存在 file->write_buf / write_count, * 或者借用 file->io_count(io_count 不变)。 * 这里我们把用户数据已经由上层调用者拷贝到了 write_staging_buf, */ /* 不管是否需要扩容,先 preread */ spdk_blob_io_read(file->blob, file->fs->channel, file->dma_buf, lba, lba_count, zvfs_spdk_blob_write_preread_cb, file); } /* Step 2 : preread 完成,patch dma_buf,然后决定是否扩容 */ void zvfs_spdk_blob_write_preread_cb(void *arg, int bserrno){ zvfs_file_t *file = (zvfs_file_t *)arg; /* preread 失败也没关系——如果是新分配区域全零即可, 这里仍然继续(SPDK 对未写过的区域返回全零)。*/ if (bserrno) { SPDK_DEBUGLOG("preread error %d (may be uninitialized, continue)\n", bserrno); } /* patch:把用户数据覆写到 dma_buf 的正确偏移处 */ uint64_t page_off = file->current_offset % file->fs->io_unit_size; memcpy((uint8_t *)file->dma_buf + page_off, file->write_staging_buf, file->io_count); /* 判断是否需要扩容 */ uint64_t end_byte = file->current_offset + file->io_count; uint64_t need_clusters = zvfs_need_clusters(file->fs, end_byte); uint64_t cur_clusters = file->dirent ? file->dirent->allocated_clusters : spdk_blob_get_num_clusters(file->blob); if (need_clusters > cur_clusters) { uint64_t free_clusters = spdk_bs_free_cluster_count(file->fs->bs); if (need_clusters - cur_clusters > free_clusters) { SPDK_ERRLOG("no free clusters\n"); file->finished = true; return; } spdk_blob_resize(file->blob, need_clusters, zvfs_spdk_blob_write_resize_cb, file); } else { zvfs_do_write_io(file); } } /* Step 3a : resize 完成 → sync */ void zvfs_spdk_blob_write_resize_cb(void *arg, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; if (bserrno) { SPDK_ERRLOG("write resize error: %d\n", bserrno); file->finished = true; return; } spdk_blob_sync_md(file->blob, zvfs_spdk_blob_write_sync_cb, file); } /* Step 3b : sync 完成 → 真正写 */ void zvfs_spdk_blob_write_sync_cb(void *arg, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; if (bserrno) { SPDK_ERRLOG("write sync error: %d\n", bserrno); file->finished = true; return; } if (file->dirent) { file->dirent->allocated_clusters = (uint32_t)spdk_blob_get_num_clusters(file->blob); } zvfs_do_write_io(file); } /* Step 4 : 实际写入(dma_buf 已经是 patch 后的整扇区数据) */ void zvfs_do_write_io(zvfs_file_t *file) { uint64_t io_unit_size = file->fs->io_unit_size; uint64_t lba = file->current_offset / io_unit_size; uint64_t page_off = file->current_offset % io_unit_size; uint64_t lba_count = (page_off + file->io_count + io_unit_size - 1) / io_unit_size; spdk_blob_io_write(file->blob, file->fs->channel, file->dma_buf, lba, lba_count, zvfs_spdk_blob_write_cb, file); } /* Step 5 : 写完成 */ void zvfs_spdk_blob_write_cb(void *arg, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; if (bserrno) { SPDK_ERRLOG("blob_write error: %d\n", bserrno); file->finished = true; return; } uint64_t new_end = file->current_offset + file->io_count; if (file->dirent && new_end > file->dirent->file_size) { file->dirent->file_size = new_end; } file->current_offset = new_end; SPDK_DEBUGLOG("write complete, new offset=%" PRIu64 "\n", file->current_offset); file->finished = true; } /* ================================================================== */ /* CLOSE */ /* ================================================================== */ void zvfs_do_close(void *arg) { zvfs_file_t *file = (zvfs_file_t *)arg; spdk_blob_close(file->blob, zvfs_spdk_blob_close_cb, file); } void zvfs_spdk_blob_close_cb(void *arg, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; if (bserrno) { SPDK_ERRLOG("blob_close error: %d\n", bserrno); } spdk_free(file->dma_buf); file->dma_buf = NULL; file->current_offset = 0; file->finished = true; } /* ================================================================== */ /* DELETE */ /* ================================================================== */ void zvfs_do_delete(void *arg) { zvfs_file_t *file = (zvfs_file_t *)arg; spdk_bs_delete_blob(file->fs->bs, file->blob_id, zvfs_spdk_blob_delete_cb, file); } void zvfs_spdk_blob_delete_cb(void *arg, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; if (bserrno) { SPDK_ERRLOG("blob_delete error: %d\n", bserrno); } file->finished = true; } /* ================================================================== */ /* UNMOUNT */ /* ================================================================== */ void zvfs_do_umount(void *arg) { zvfs_t *fs = (zvfs_t *)arg; if (fs->bs) { if (fs->channel) { spdk_bs_free_io_channel(fs->channel); } spdk_bs_unload(fs->bs, zvfs_spdk_bs_unload_cb, fs); } } void zvfs_spdk_bs_unload_cb(void *arg, int bserrno) { zvfs_t *fs = (zvfs_t *)arg; fs->finished = true; } // setup // zvfs.json int zvfs_env_setup(void) { struct spdk_env_opts opts; spdk_env_opts_init(&opts); opts.name = "zvfs"; int rc = spdk_env_init(&opts); if (rc != 0) { return -1; } spdk_log_set_print_level(SPDK_LOG_NOTICE); spdk_log_set_level(SPDK_LOG_NOTICE); spdk_log_open(NULL); int rc2 = spdk_thread_lib_init(NULL, 0); if (rc2 != 0) { SPDK_ERRLOG("spdk_thread_lib_init failed\n"); return -1; } global_thread = spdk_thread_create("global", NULL); spdk_set_thread(global_thread); bool done = false; waiter(global_thread, zvfs_json_load_fn, &done, &done); SPDK_DEBUGLOG("zvfs_env_setup complete\n"); return 0; } void zvfs_json_load_fn(void *arg) { spdk_subsystem_init_from_json_config(json_file, SPDK_DEFAULT_RPC_ADDR, json_app_load_done, arg, true); } void json_app_load_done(int rc, void *ctx) { bool *done = ctx; *done = true; SPDK_DEBUGLOG("json_app_load_done\n"); } // filesystem // load int zvfs_mount(struct zvfs_s *fs) { fs->finished = false; bool ok = waiter(global_thread, zvfs_do_mount, fs, &fs->finished); SPDK_DEBUGLOG("mount finished\n"); return ok; } // unload int zvfs_umount(struct zvfs_s *fs) { fs->finished = false; bool ok = waiter(global_thread, zvfs_do_umount, fs, &fs->finished); SPDK_DEBUGLOG("umount finished\n"); return ok; } // file // create int zvfs_create(struct zvfs_file_s *file) { file->finished = false; bool ok = waiter(global_thread, zvfs_do_create, file, &file->finished); SPDK_DEBUGLOG("create finished\n"); return ok; } // open int zvfs_open(struct zvfs_file_s *file) { file->finished = false; bool ok = waiter(global_thread, zvfs_do_open, file, &file->finished); SPDK_DEBUGLOG("open finished\n"); return ok; } // read int zvfs_read(struct zvfs_file_s *file, uint8_t *buffer, size_t count) { file->io_count = count; file->actual_io_count = 0; file->finished = false; bool ok = waiter(global_thread, zvfs_do_read, file, &file->finished); if (!ok || file->actual_io_count == 0) return -1; /* * dma_buf 里存的是从 LBA 边界开始的整扇区数据, * page_off 是 current_offset(读之前)相对于 LBA 边界的字节偏移。 * * current_offset 在 read_cb 里已经 += actual_io_count, * 所以读之前的 offset = current_offset - actual_io_count。 */ uint64_t pre_offset = file->current_offset - file->actual_io_count; uint64_t page_off = pre_offset % file->fs->io_unit_size; memcpy(buffer, (uint8_t *)file->dma_buf + page_off, file->actual_io_count); SPDK_DEBUGLOG("read finished\n"); return (int)file->actual_io_count; } // write int zvfs_write(struct zvfs_file_s *file, const uint8_t *buffer, size_t count) { file->io_count = count; file->write_staging_buf = buffer; file->finished = false; bool ok = waiter(global_thread, zvfs_do_write, file, &file->finished); SPDK_DEBUGLOG("write finished\n"); return ok ? (int)count : -1; } // close int zvfs_close(struct zvfs_file_s *file) { file->finished = false; bool ok = waiter(global_thread, zvfs_do_close, file, &file->finished); SPDK_DEBUGLOG("close finished\n"); return ok; } // delete int zvfs_delete(struct zvfs_file_s *file) { file->finished = false; bool ok = waiter(global_thread, zvfs_do_delete, file, &file->finished); SPDK_DEBUGLOG("delete finished\n"); return ok; } int main(int argc, char *argv[]) { if (zvfs_env_setup()) { return -1; } SPDK_NOTICELOG("zvfs_env_setup success\n"); SPDK_NOTICELOG("\n\n zvfs mount start \n\n"); zvfs_t *fs = calloc(1, sizeof(zvfs_t)); zvfs_mount(fs); SPDK_NOTICELOG("\n\n zvfs open start \n\n"); zvfs_file_t *file = calloc(1, sizeof(zvfs_file_t)); file->fs = fs; zvfs_dirent_t *dirent = calloc(1, sizeof(zvfs_dirent_t)); file->dirent = dirent; zvfs_create(file); SPDK_NOTICELOG("\n\n zvfs write start \n\n"); char *buffer = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; zvfs_write(file, buffer, strlen(buffer)); char *buffer2 = "abcdefghijklmnopqrstuvwxyz"; zvfs_write(file, buffer2, strlen(buffer2)); SPDK_NOTICELOG("\n\n zvfs read start \n\n"); file->current_offset = 0; char rbuffer[BUFFER_SIZE] = {0}; int n = zvfs_read(file, rbuffer, BUFFER_SIZE); SPDK_NOTICELOG("READ BUFFER:%d, %s\n", n, rbuffer); SPDK_NOTICELOG("\n\n zvfs close start \n\n"); zvfs_close(file); SPDK_NOTICELOG("\n\n zvfs delete start \n\n"); zvfs_delete(file); free(dirent); free(file); SPDK_NOTICELOG("\n\n zvfs umount start \n\n"); zvfs_umount(fs); free(fs); }