diff --git a/.gitignore b/.gitignore index db62a4e..e71124b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ *.o -*.so \ No newline at end of file +*.d +*.so +func_test +zvfs_meta.txt \ No newline at end of file diff --git a/Makefile b/Makefile index a5ecee8..c54e550 100755 --- a/Makefile +++ b/Makefile @@ -6,11 +6,32 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../spdk) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app_vars.mk -APP = zvfs +LIBZVFS := libzvfs.so +APP := func_test -C_SRCS := zvfs.c +C_SRCS := zvfs.c zvfs_hook.c SPDK_LIB_LIST = $(ALL_MODULES_LIST) event event_bdev -include $(SPDK_ROOT_DIR)/mk/spdk.app.mk \ No newline at end of file +LIBS += $(SPDK_LIB_LINKER_ARGS) +LDFLAGS += -shared -rdynamic -Wl,-z,nodelete -Wl,--disable-new-dtags \ + -Wl,-rpath,$(SPDK_ROOT_DIR)/build/lib \ + -Wl,-rpath,$(SPDK_ROOT_DIR)/dpdk/build/lib +SYS_LIBS += -ldl + +all: $(LIBZVFS) $(APP) + @: + rm -rf zvfs_meta.txt + +$(LIBZVFS): $(OBJS) $(SPDK_LIB_FILES) $(ENV_LIBS) + $(LINK_C) + +$(APP): func_test.c + $(CC) -o $@ $< + +clean: + $(CLEAN_C) $(LIBZVFS) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/README.md b/README.md index 089825b..37f3741 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ ## usage ```shell +cd /home/lian/share/10.1-spdk/spdk +./configure --with-shared +make -j + +make +LD_PRELOAD=./libzvfs.so ./func_test ``` diff --git a/func_test.c b/func_test.c index 018b455..0c004c5 100644 --- a/func_test.c +++ b/func_test.c @@ -1,40 +1,48 @@ -#define _GNU_SOURCE -#include -#include #include #include #include -#include -#include +#include #include int main(int argc, char **argv) { const char *path = "/zvfs/func_test.dat"; - if (argc > 2 && strcmp(argv[1], "-f") == 0) path = argv[2]; - setenv("ZVFS_ROOT", "/zvfs", 0); printf("open: %s\n", path); - int fd = open(path, O_CREAT|O_RDWR, 0644); + int fd = open(path, O_CREAT | O_RDWR | O_TRUNC, 0644); if (fd < 0) { perror("open"); return 1; } - const char *msg = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const char *msg = "ABCDEFGHIJKL"; ssize_t w = write(fd, msg, strlen(msg)); if (w < 0) { perror("write"); return 2; } printf("write: %zd\n", w); - /* Rewind by closing and reopening for read */ + const char *msg2 = "MNOPQRSTUVWXYZ"; + ssize_t w2 = write(fd, msg2, strlen(msg2)); + if (w2 < 0) { perror("write"); return 2; } + printf("write: %zd\n", w2); + close(fd); + + fd = open(path, O_RDONLY); if (fd < 0) { perror("open R"); return 3; } - char buf[256]; memset(buf, 0, sizeof(buf)); + char buf[10]; + memset(buf, 0, sizeof(buf)); ssize_t r = read(fd, buf, sizeof(buf)); if (r < 0) { perror("read"); return 4; } printf("read: %zd bytes: %.*s\n", r, (int)r, buf); + + char buf2[512]; + memset(buf2, 0, sizeof(buf2)); + ssize_t r2 = read(fd, buf2, sizeof(buf2)); + if (r2 < 0) { perror("read"); return 4; } + printf("read: %zd bytes: %.*s\n", r2, (int)r2, buf2); + close(fd); if (unlink(path) != 0) { perror("unlink"); return 5; } printf("unlink: ok\n"); return 0; -} +} \ No newline at end of file diff --git a/zvfs.c b/zvfs.c index ce1c723..2156251 100755 --- a/zvfs.c +++ b/zvfs.c @@ -1,12 +1,17 @@ #include "zvfs.h" +#undef SPDK_DEBUGLOG +#define SPDK_DEBUGLOG(...) do {} while(0) + struct spdk_thread *global_thread = NULL; +const char *json_file = "/home/lian/share/10.1-spdk/zvfs/zvfs.json"; // mount void zvfs_do_mount(void *arg); void zvfs_spdk_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx); +void zvfs_spdk_bs_load_cb(void *arg, struct spdk_blob_store *bs, int bserrno); void zvfs_spdk_bs_init_cb(void *arg, struct spdk_blob_store *bs, int bserrno); // create @@ -26,6 +31,7 @@ void zvfs_spdk_blob_read_cb(void *arg, int bserrno); // write void zvfs_do_write(void *arg); void zvfs_do_write_io(zvfs_file_t *file); +void zvfs_spdk_blob_write_preread_cb(void *arg, int bserrno); void zvfs_spdk_blob_write_resize_cb(void *arg, int bserrno); void zvfs_spdk_blob_write_sync_cb(void *arg, int bserrno); void zvfs_spdk_blob_write_cb(void *arg, int bserrno); @@ -47,14 +53,66 @@ void json_app_load_done(int rc, void *ctx); void zvfs_do_umount(void *arg); void zvfs_spdk_bs_unload_cb(void *arg, int bserrno); -/* ========== helpers ========== */ - +/* ================================================================== */ +/* HELPER */ +/* ================================================================== */ static uint64_t zvfs_need_clusters(zvfs_t *fs, uint64_t end_byte) { uint64_t cluster_size = spdk_bs_get_cluster_size(fs->bs); return (end_byte + cluster_size - 1) / cluster_size; } -// mount +/* ---------- 辅助:计算本次 IO 涉及的 LBA 范围 ---------- */ +static void calc_lba_range(zvfs_file_t *file, + uint64_t *out_lba, + uint64_t *out_page_off, + uint64_t *out_lba_count) +{ + uint64_t io_unit = file->fs->io_unit_size; + uint64_t off = file->current_offset; + uint64_t cnt = file->io_count; + + *out_lba = off / io_unit; + *out_page_off = off % io_unit; + *out_lba_count = (*out_page_off + cnt + io_unit - 1) / io_unit; +} + +/* ---------- 确保 dma_buf 足够大 ---------- */ +static int ensure_dma_buf(zvfs_file_t *file, uint64_t need_bytes) +{ + if (file->dma_buf && file->dma_buf_size >= need_bytes) return 0; + + if (file->dma_buf) spdk_free(file->dma_buf); + + file->dma_buf = spdk_malloc(need_bytes, 0x1000, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (!file->dma_buf) { file->dma_buf_size = 0; return -1; } + + file->dma_buf_size = need_bytes; + return 0; +} + +// waiter +bool waiter(struct spdk_thread *thread, spdk_msg_fn start_fn, void *ctx, bool *finished) { + + spdk_thread_send_msg(thread, start_fn, ctx); + + int waiter_count = 0; + + do { + spdk_thread_poll(thread, 0, 0); + waiter_count ++; + } while(!(*finished) && waiter_count < WAITER_MAX_TIME); + + if (!(*finished) && waiter_count >= WAITER_MAX_TIME) { + return false; // timeout + } + + return true; +} + +/* ================================================================== */ +/* MOUNT */ +/* ================================================================== */ void zvfs_do_mount(void *arg) { zvfs_t *fs = (zvfs_t*)arg; @@ -66,49 +124,78 @@ void zvfs_do_mount(void *arg) { spdk_app_stop(0); } - spdk_bs_init(bs_dev, NULL, zvfs_spdk_bs_init_cb, fs); - - SPDK_NOTICELOG("zvfs_entry\n"); - + fs->bs_dev = bs_dev; + // spdk_bs_init(bs_dev, NULL, zvfs_spdk_bs_init_cb, fs); + spdk_bs_load(bs_dev, NULL, zvfs_spdk_bs_load_cb, fs); } -void zvfs_spdk_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, - void *event_ctx) { - SPDK_NOTICELOG("zvfs_spdk_bdev_event_cb\n"); -} - -void zvfs_spdk_bs_init_cb(void *arg, struct spdk_blob_store *bs, int bserrno) { +void zvfs_spdk_bs_load_cb(void *arg, struct spdk_blob_store *bs, int bserrno) { zvfs_t *fs = (zvfs_t*)arg; + if (bserrno != 0) { + SPDK_DEBUGLOG("load failed, new device, re-create bs_dev and init\n"); + + struct spdk_bs_dev *bs_dev = NULL; + int rc = spdk_bdev_create_bs_dev_ext("Malloc0", zvfs_spdk_bdev_event_cb, NULL, &bs_dev); + if (rc != 0) { + SPDK_ERRLOG("re-create bs_dev failed\n"); + spdk_app_stop(-1); + return; + } + fs->bs_dev = bs_dev; + + spdk_bs_init(fs->bs_dev, NULL, zvfs_spdk_bs_init_cb, fs); + return; + } + uint64_t io_unit_size = spdk_bs_get_io_unit_size(bs); - SPDK_NOTICELOG("io_unit_size : %"PRIu64"\n", io_unit_size); + SPDK_DEBUGLOG("io_unit_size : %"PRIu64"\n", io_unit_size); fs->io_unit_size = io_unit_size; fs->bs = bs; - fs->channel = spdk_bs_alloc_io_channel(fs->bs); if (fs->channel == NULL) { return ; } fs->finished = true; - - SPDK_NOTICELOG("mount finished\n"); } -// create + +void zvfs_spdk_bs_init_cb(void *arg, struct spdk_blob_store *bs, int bserrno) { + zvfs_t *fs = (zvfs_t*)arg; + + uint64_t io_unit_size = spdk_bs_get_io_unit_size(bs); + SPDK_DEBUGLOG("io_unit_size : %"PRIu64"\n", io_unit_size); + + fs->io_unit_size = io_unit_size; + fs->bs = bs; + fs->channel = spdk_bs_alloc_io_channel(fs->bs); + if (fs->channel == NULL) { + return ; + } + + fs->finished = true; +} + +void zvfs_spdk_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, + void *event_ctx) { +} + +/* ================================================================== */ +/* CREATE */ +/* ================================================================== */ void zvfs_do_create(void *arg) { zvfs_file_t *file = (zvfs_file_t *)arg; spdk_bs_create_blob(file->fs->bs, zvfs_spdk_bs_create_blob_cb, file); - } void zvfs_spdk_bs_create_blob_cb(void *arg, spdk_blob_id blobid, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; file->blob_id = blobid; - SPDK_NOTICELOG("blobid : %"PRIu64"\n", blobid); + SPDK_DEBUGLOG("create blobid : %"PRIu64"\n", blobid); spdk_bs_open_blob(file->fs->bs, blobid, zvfs_spdk_bs_open_blob_cb, file); } @@ -123,17 +210,21 @@ void zvfs_spdk_bs_open_blob_cb(void *arg, struct spdk_blob *blb, int bserrno) { file->blob = blb; - uint64_t free_cluster = spdk_bs_free_cluster_count(file->fs->bs); // - SPDK_NOTICELOG("free_cluster : %"PRIu64"\n", free_cluster); + uint64_t free_cluster = spdk_bs_free_cluster_count(file->fs->bs); + if(free_cluster == 0){ + SPDK_ERRLOG("no free cluster: %d\n", bserrno); + file->finished = true; + return ; + } - spdk_blob_resize(blb, free_cluster, zvfs_spdk_blob_resize_cb, file); + spdk_blob_resize(blb, 1, zvfs_spdk_blob_resize_cb, file); } void zvfs_spdk_blob_resize_cb(void *arg, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; uint64_t total = spdk_blob_get_num_clusters(file->blob); - SPDK_NOTICELOG("resize blob :%"PRIu64"\n", total); + SPDK_DEBUGLOG("resize blob :%"PRIu64"\n", total); if (file->dirent) { file->dirent->allocated_clusters = total; @@ -153,11 +244,12 @@ void zvfs_spdk_blob_sync_cb(void *arg, int bserrno) { } file->dma_buf_size = BUFFER_SIZE; - SPDK_NOTICELOG("open complete\n"); file->finished = true; } -// open +/* ================================================================== */ +/* OPEN */ +/* ================================================================== */ void zvfs_do_open(void *arg) { zvfs_file_t *file = (zvfs_file_t *)arg; spdk_bs_open_blob(file->fs->bs, file->blob_id, zvfs_spdk_bs_open_blob_cb2, file); @@ -186,31 +278,45 @@ void zvfs_spdk_bs_open_blob_cb2(void *arg, struct spdk_blob *blb, int bserrno) { file->finished = true; } -// read +/* ================================================================== */ +/* READ */ +/* ================================================================== */ void zvfs_do_read(void *arg) { zvfs_file_t *file = (zvfs_file_t *)arg; - uint64_t io_unit_size = file->fs->io_unit_size; - uint64_t offset = file->current_offset; - uint64_t file_size = file->dirent ? file->dirent->file_size : 0; + uint64_t io_unit = file->fs->io_unit_size; + uint64_t offset = file->current_offset; + uint64_t file_sz = file->dirent ? file->dirent->file_size : 0; - if (offset >= file_size) { - SPDK_NOTICELOG("read: EOF\n"); - file->io_count = 0; - file->finished = true; - return; + /* EOF 检查 */ + if (offset >= file_sz) { + SPDK_DEBUGLOG("read: EOF\n"); + file->io_count = 0; + file->actual_io_count = 0; + file->finished = true; + return; + } + + /* 截断到文件末尾 */ + if (offset + file->io_count > file_sz){ + file->io_count = file_sz - offset; } - if (offset + file->io_count > file_size) { - file->io_count = file_size - offset; - } + file->actual_io_count = file->io_count; - uint64_t lba = offset / io_unit_size; - uint64_t page_off = offset % io_unit_size; - uint64_t lba_count = (page_off + file->io_count + io_unit_size - 1) / io_unit_size; + uint64_t lba, page_off, lba_count; + calc_lba_range(file, &lba, &page_off, &lba_count); - spdk_blob_io_read(file->blob, file->fs->channel, - file->dma_buf, + uint64_t buf_need = lba_count * io_unit; + if (ensure_dma_buf(file, buf_need) != 0) { + SPDK_ERRLOG("ensure_dma_buf failed\n"); + file->actual_io_count = 0; + file->finished = true; + return; + } + + spdk_blob_io_read(file->blob, file->fs->channel, + file->dma_buf, lba, lba_count, zvfs_spdk_blob_read_cb, file); } @@ -226,54 +332,107 @@ void zvfs_spdk_blob_read_cb(void *arg, int bserrno) { } file->current_offset += file->io_count; - SPDK_NOTICELOG("read complete, new offset=%" PRIu64 "\n", file->current_offset); + SPDK_DEBUGLOG("read complete, new offset=%" PRIu64 "\n", file->current_offset); file->finished = true; } - -// write -/* - * callback 链: - * - * zvfs_do_write - * ├─(需要扩容)─→ spdk_blob_resize → zvfs_spdk_blob_write_resize_cb - * │ → spdk_blob_sync_md → zvfs_spdk_blob_write_sync_cb - * │ → zvfs_do_write_io - * │ → zvfs_spdk_blob_write_cb - * └─(不需扩容)─→ zvfs_do_write_io - * → zvfs_spdk_blob_write_cb +/* ================================================================== */ +/* WRITE */ +/* ================================================================== */ +/** + * 1. write 的 callback 链 + * zvfs_do_write + * └─→ 先用 spdk_blob_io_read 读出覆盖范围内的扇区 + * └─→ zvfs_spdk_blob_write_preread_cb + * (在 dma_buf 里 patch 新数据) + * ├─(需扩容)─→ spdk_blob_resize + * │ └─→ zvfs_spdk_blob_write_resize_cb + * │ └─→ spdk_blob_sync_md + * │ └─→ zvfs_spdk_blob_write_sync_cb + * │ └─→ zvfs_do_write_io + * │ └─→ zvfs_spdk_blob_write_cb + * └─(不需扩容)─→ zvfs_do_write_io + * └─→ zvfs_spdk_blob_write_cb */ -void zvfs_do_write_io(zvfs_file_t *file) { - uint64_t io_unit_size = file->fs->io_unit_size; - uint64_t lba = file->current_offset / io_unit_size; - uint64_t page_off = file->current_offset % io_unit_size; - uint64_t lba_count = (page_off + file->io_count + io_unit_size - 1) / io_unit_size; - spdk_blob_io_write(file->blob, file->fs->channel, - file->dma_buf, - lba, lba_count, - zvfs_spdk_blob_write_cb, file); +/* Step 1 : 进入 write,先把覆盖范围内的扇区读出来(read-modify-write) */ +void zvfs_do_write(void *arg) { + zvfs_file_t *file = (zvfs_file_t *)arg; + + uint64_t io_unit = file->fs->io_unit_size; + uint64_t lba, page_off, lba_count; + calc_lba_range(file, &lba, &page_off, &lba_count); + + uint64_t buf_need = lba_count * io_unit; + if (ensure_dma_buf(file, buf_need) != 0) { + SPDK_ERRLOG("ensure_dma_buf failed\n"); + file->finished = true; + return; + } + + /* + * 先把涉及的扇区读出,read 完成后在 preread_cb 里 patch 数据再写。 + * 注意:把用户数据暂存在 file->write_buf / write_count, + * 或者借用 file->io_count(io_count 不变)。 + * 这里我们把用户数据已经由上层调用者拷贝到了 write_staging_buf, + */ + /* 不管是否需要扩容,先 preread */ + spdk_blob_io_read(file->blob, file->fs->channel, + file->dma_buf, + lba, lba_count, + zvfs_spdk_blob_write_preread_cb, file); } -void zvfs_spdk_blob_write_cb(void *arg, int bserrno) { +/* Step 2 : preread 完成,patch dma_buf,然后决定是否扩容 */ +void zvfs_spdk_blob_write_preread_cb(void *arg, int bserrno){ + zvfs_file_t *file = (zvfs_file_t *)arg; + + /* preread 失败也没关系——如果是新分配区域全零即可, + 这里仍然继续(SPDK 对未写过的区域返回全零)。*/ + if (bserrno) { + SPDK_DEBUGLOG("preread error %d (may be uninitialized, continue)\n", bserrno); + } + + /* patch:把用户数据覆写到 dma_buf 的正确偏移处 */ + uint64_t page_off = file->current_offset % file->fs->io_unit_size; + memcpy((uint8_t *)file->dma_buf + page_off, + file->write_staging_buf, + file->io_count); + + /* 判断是否需要扩容 */ + uint64_t end_byte = file->current_offset + file->io_count; + uint64_t need_clusters = zvfs_need_clusters(file->fs, end_byte); + uint64_t cur_clusters = file->dirent ? file->dirent->allocated_clusters + : spdk_blob_get_num_clusters(file->blob); + + if (need_clusters > cur_clusters) { + uint64_t free_clusters = spdk_bs_free_cluster_count(file->fs->bs); + if (need_clusters - cur_clusters > free_clusters) { + SPDK_ERRLOG("no free clusters\n"); + file->finished = true; + return; + } + spdk_blob_resize(file->blob, need_clusters, + zvfs_spdk_blob_write_resize_cb, file); + } else { + zvfs_do_write_io(file); + } +} + +/* Step 3a : resize 完成 → sync */ +void zvfs_spdk_blob_write_resize_cb(void *arg, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; if (bserrno) { - SPDK_ERRLOG("blob_write error: %d\n", bserrno); + SPDK_ERRLOG("write resize error: %d\n", bserrno); file->finished = true; return; } - uint64_t new_end = file->current_offset + file->io_count; - if (file->dirent && new_end > file->dirent->file_size) { - file->dirent->file_size = new_end; - } - file->current_offset = new_end; - - SPDK_NOTICELOG("write complete, new offset=%" PRIu64 "\n", file->current_offset); - file->finished = true; + spdk_blob_sync_md(file->blob, zvfs_spdk_blob_write_sync_cb, file); } +/* Step 3b : sync 完成 → 真正写 */ void zvfs_spdk_blob_write_sync_cb(void *arg, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; @@ -291,43 +450,43 @@ void zvfs_spdk_blob_write_sync_cb(void *arg, int bserrno) { zvfs_do_write_io(file); } -void zvfs_spdk_blob_write_resize_cb(void *arg, int bserrno) { +/* Step 4 : 实际写入(dma_buf 已经是 patch 后的整扇区数据) */ +void zvfs_do_write_io(zvfs_file_t *file) { + uint64_t io_unit_size = file->fs->io_unit_size; + uint64_t lba = file->current_offset / io_unit_size; + uint64_t page_off = file->current_offset % io_unit_size; + uint64_t lba_count = (page_off + file->io_count + io_unit_size - 1) / io_unit_size; + + spdk_blob_io_write(file->blob, file->fs->channel, + file->dma_buf, + lba, lba_count, + zvfs_spdk_blob_write_cb, file); +} + +/* Step 5 : 写完成 */ +void zvfs_spdk_blob_write_cb(void *arg, int bserrno) { zvfs_file_t *file = (zvfs_file_t *)arg; if (bserrno) { - SPDK_ERRLOG("write resize error: %d\n", bserrno); + SPDK_ERRLOG("blob_write error: %d\n", bserrno); file->finished = true; return; } - spdk_blob_sync_md(file->blob, zvfs_spdk_blob_write_sync_cb, file); -} - -void zvfs_do_write(void *arg) { - zvfs_file_t *file = (zvfs_file_t *)arg; - - uint64_t end_byte = file->current_offset + file->io_count; - uint64_t need_clusters = zvfs_need_clusters(file->fs, end_byte); - uint64_t cur_clusters = file->dirent ? file->dirent->allocated_clusters - : spdk_blob_get_num_clusters(file->blob); - - SPDK_NOTICELOG("endbyte:%ld, needcluster:%ld, curcluster:%ld\n", end_byte, need_clusters, cur_clusters); - if (need_clusters > cur_clusters) { - uint64_t free_clusters = spdk_bs_free_cluster_count(file->fs->bs); - SPDK_NOTICELOG("free_cluster : %"PRIu64"\n", free_clusters); - if (need_clusters - cur_clusters > free_clusters) { - SPDK_ERRLOG("no free clusters\n"); - file->finished = true; - return; - } - spdk_blob_resize(file->blob, need_clusters, - zvfs_spdk_blob_write_resize_cb, file); - } else { - zvfs_do_write_io(file); + uint64_t new_end = file->current_offset + file->io_count; + if (file->dirent && new_end > file->dirent->file_size) { + file->dirent->file_size = new_end; } + file->current_offset = new_end; + + SPDK_DEBUGLOG("write complete, new offset=%" PRIu64 "\n", file->current_offset); + file->finished = true; } -// close + +/* ================================================================== */ +/* CLOSE */ +/* ================================================================== */ void zvfs_do_close(void *arg) { zvfs_file_t *file = (zvfs_file_t *)arg; spdk_blob_close(file->blob, zvfs_spdk_blob_close_cb, file); @@ -343,11 +502,12 @@ void zvfs_spdk_blob_close_cb(void *arg, int bserrno) { file->dma_buf = NULL; file->current_offset = 0; - SPDK_NOTICELOG("close complete\n"); file->finished = true; } -// delete +/* ================================================================== */ +/* DELETE */ +/* ================================================================== */ void zvfs_do_delete(void *arg) { zvfs_file_t *file = (zvfs_file_t *)arg; spdk_bs_delete_blob(file->fs->bs, file->blob_id, zvfs_spdk_blob_delete_cb, file); @@ -360,11 +520,12 @@ void zvfs_spdk_blob_delete_cb(void *arg, int bserrno) { SPDK_ERRLOG("blob_delete error: %d\n", bserrno); } - SPDK_NOTICELOG("delete complete\n"); file->finished = true; } -// unmount +/* ================================================================== */ +/* UNMOUNT */ +/* ================================================================== */ void zvfs_do_umount(void *arg) { zvfs_t *fs = (zvfs_t *)arg; @@ -384,50 +545,36 @@ void zvfs_spdk_bs_unload_cb(void *arg, int bserrno) { fs->finished = true; } -// waiter -bool waiter(struct spdk_thread *thread, spdk_msg_fn start_fn, void *ctx, bool *finished) { - - spdk_thread_send_msg(thread, start_fn, ctx); - - int waiter_count = 0; - - do { - spdk_thread_poll(thread, 0, 0); - waiter_count ++; - } while(!(*finished) && waiter_count < WAITER_MAX_TIME); - - while (spdk_thread_poll(thread, 0, 0) > 0) {} - - if (!(*finished) && waiter_count >= WAITER_MAX_TIME) { - return false; // timeout - } - - return true; -} - // setup // zvfs.json int zvfs_env_setup(void) { struct spdk_env_opts opts; spdk_env_opts_init(&opts); + opts.name = "zvfs"; - if (0 != spdk_env_init(&opts)) { - return -1; - } + int rc = spdk_env_init(&opts); + if (rc != 0) { + return -1; + } spdk_log_set_print_level(SPDK_LOG_NOTICE); spdk_log_set_level(SPDK_LOG_NOTICE); spdk_log_open(NULL); - spdk_thread_lib_init(NULL, 0); + int rc2 = spdk_thread_lib_init(NULL, 0); + if (rc2 != 0) { + SPDK_ERRLOG("spdk_thread_lib_init failed\n"); + return -1; + } + global_thread = spdk_thread_create("global", NULL); spdk_set_thread(global_thread); bool done = false; waiter(global_thread, zvfs_json_load_fn, &done, &done); - SPDK_NOTICELOG("json_app_load_done complete\n"); + SPDK_DEBUGLOG("zvfs_env_setup complete\n"); return 0; } @@ -441,8 +588,7 @@ void zvfs_json_load_fn(void *arg) { void json_app_load_done(int rc, void *ctx) { bool *done = ctx; *done = true; - - SPDK_NOTICELOG("json_app_load_done\n"); + SPDK_DEBUGLOG("json_app_load_done\n"); } @@ -453,53 +599,82 @@ void json_app_load_done(int rc, void *ctx) { // load int zvfs_mount(struct zvfs_s *fs) { fs->finished = false; - return waiter(global_thread, zvfs_do_mount, fs, &fs->finished); + bool ok = waiter(global_thread, zvfs_do_mount, fs, &fs->finished); + SPDK_DEBUGLOG("mount finished\n"); + return ok; } // unload int zvfs_umount(struct zvfs_s *fs) { fs->finished = false; - return waiter(global_thread, zvfs_do_umount, fs, &fs->finished); + bool ok = waiter(global_thread, zvfs_do_umount, fs, &fs->finished); + SPDK_DEBUGLOG("umount finished\n"); + return ok; } // file // create int zvfs_create(struct zvfs_file_s *file) { file->finished = false; - return waiter(global_thread, zvfs_do_create, file, &file->finished); + bool ok = waiter(global_thread, zvfs_do_create, file, &file->finished); + SPDK_DEBUGLOG("create finished\n"); + return ok; } // open int zvfs_open(struct zvfs_file_s *file) { file->finished = false; - return waiter(global_thread, zvfs_do_open, file, &file->finished); + bool ok = waiter(global_thread, zvfs_do_open, file, &file->finished); + SPDK_DEBUGLOG("open finished\n"); + return ok; } // read int zvfs_read(struct zvfs_file_s *file, uint8_t *buffer, size_t count) { file->io_count = count; + file->actual_io_count = 0; file->finished = false; bool ok = waiter(global_thread, zvfs_do_read, file, &file->finished); - uint64_t page_off = (file->current_offset - file->io_count) % file->fs->io_unit_size; - memcpy(buffer, (uint8_t *)file->dma_buf + page_off, file->io_count); + if (!ok || file->actual_io_count == 0) return -1; + + /* + * dma_buf 里存的是从 LBA 边界开始的整扇区数据, + * page_off 是 current_offset(读之前)相对于 LBA 边界的字节偏移。 + * + * current_offset 在 read_cb 里已经 += actual_io_count, + * 所以读之前的 offset = current_offset - actual_io_count。 + */ - return ok ? (int)file->io_count : -1; + uint64_t pre_offset = file->current_offset - file->actual_io_count; + uint64_t page_off = pre_offset % file->fs->io_unit_size; + + memcpy(buffer, + (uint8_t *)file->dma_buf + page_off, + file->actual_io_count); + + SPDK_DEBUGLOG("read finished\n"); + return (int)file->actual_io_count; } // write int zvfs_write(struct zvfs_file_s *file, const uint8_t *buffer, size_t count) { file->io_count = count; + file->write_staging_buf = buffer; file->finished = false; - memcpy(file->dma_buf, buffer, count); bool ok = waiter(global_thread, zvfs_do_write, file, &file->finished); + SPDK_DEBUGLOG("write finished\n"); return ok ? (int)count : -1; } // close int zvfs_close(struct zvfs_file_s *file) { file->finished = false; - return waiter(global_thread, zvfs_do_close, file, &file->finished); + bool ok = waiter(global_thread, zvfs_do_close, file, &file->finished); + SPDK_DEBUGLOG("close finished\n"); + return ok; } // delete int zvfs_delete(struct zvfs_file_s *file) { file->finished = false; - return waiter(global_thread, zvfs_do_delete, file, &file->finished); + bool ok = waiter(global_thread, zvfs_do_delete, file, &file->finished); + SPDK_DEBUGLOG("delete finished\n"); + return ok; } int main(int argc, char *argv[]) { @@ -526,7 +701,7 @@ int main(int argc, char *argv[]) { char *buffer = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; zvfs_write(file, buffer, strlen(buffer)); char *buffer2 = "abcdefghijklmnopqrstuvwxyz"; - zvfs_write(file, buffer, strlen(buffer2)); + zvfs_write(file, buffer2, strlen(buffer2)); SPDK_NOTICELOG("\n\n zvfs read start \n\n"); diff --git a/zvfs.h b/zvfs.h index 1ccab33..b141d6b 100644 --- a/zvfs.h +++ b/zvfs.h @@ -13,7 +13,7 @@ #define ZVFS_MAX_FD 64 #define BUFFER_SIZE (1024*8) -static const char *json_file = "/home/lian/share/10.1-spdk/zvfs/zvfs.json"; +extern const char *json_file; extern struct spdk_thread *global_thread; static const int WAITER_MAX_TIME = 100000; @@ -29,6 +29,7 @@ typedef struct { /* 文件系统全局结构 */ typedef struct zvfs_s { + struct spdk_bs_dev *bs_dev; struct spdk_blob_store *bs; struct spdk_io_channel *channel; struct spdk_blob *super_blob; // 承载目录日志的blob @@ -39,7 +40,7 @@ typedef struct zvfs_s { uint32_t dirent_count; // 当前有效项数 /* 伪FD表 */ - struct zvfs_file *fd_table[ZVFS_MAX_FD]; // // e.g., #define ZVFS_MAX_FD 64 + struct zvfs_file_s *fd_table[ZVFS_MAX_FD]; // // e.g., #define ZVFS_MAX_FD 64 int fd_base; // 伪FD起始值,如1000 int openfd_count; @@ -66,6 +67,9 @@ typedef struct zvfs_file_s { void *dma_buf; uint64_t dma_buf_size; + size_t actual_io_count; + const uint8_t *write_staging_buf; + size_t io_count; bool finished; } zvfs_file_t; @@ -83,9 +87,10 @@ int zvfs_close(struct zvfs_file_s *file); int zvfs_delete(struct zvfs_file_s *file); /* POSIX hook API(zvfs_hook.c 实现) */ -int zvfs_open_hook(const char *path, int flags, ...); -ssize_t zvfs_read_hook(int fd, void *buf, size_t count); -ssize_t zvfs_write_hook(int fd, const void *buf, size_t count); -int zvfs_close_hook(int fd); +int open(const char *path, int flags, ...); +ssize_t read(int fd, void *buf, size_t count); +ssize_t write(int fd, const void *buf, size_t count); +int close(int fd); +int unlink(const char *name); #endif \ No newline at end of file diff --git a/zvfs_hook.c b/zvfs_hook.c index a1e95fd..86fc660 100644 --- a/zvfs_hook.c +++ b/zvfs_hook.c @@ -1,5 +1,8 @@ -#include "zvfs.h" +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include #include #include #include @@ -8,6 +11,7 @@ #include #include #include +#include "zvfs.h" /* ------------------------------------------------------------------ */ /* 全局状态 */ @@ -15,13 +19,42 @@ static zvfs_t *g_fs = NULL; /* 全局文件系统,NULL 表示未初始化 */ static bool g_mounted = false; +static bool g_env_init = false; + /* 元数据文件路径 */ static const char *META_FILE = "/home/lian/share/10.1-spdk/zvfs/zvfs_meta.txt"; /* 伪 fd 起始值,避免和真实 fd 冲突 */ -#define FD_BASE 1000 +#define FD_BASE 10000 +/* 只拦截以 /zvfs 开头的路径 */ +#define ZVFS_PATH_PREFIX "/zvfs" + +static int (*real_open_fn) (const char*, int, ...) = NULL; +static ssize_t (*real_read_fn) (int, void*, size_t) = NULL; +static ssize_t (*real_write_fn)(int, const void*, size_t) = NULL; +static int (*real_close_fn)(int) = NULL; +static int (*real_unlink_fn)(const char *name) = NULL; + +__attribute__((constructor)) +static void zvfs_preload_init(void) { + real_open_fn = dlsym(RTLD_NEXT, "open"); + real_read_fn = dlsym(RTLD_NEXT, "read"); + real_write_fn = dlsym(RTLD_NEXT, "write"); + real_close_fn = dlsym(RTLD_NEXT, "close"); + real_unlink_fn= dlsym(RTLD_NEXT, "unlink"); +} + +/* 判断路径是否由我们接管 */ +static inline bool is_zvfs_path(const char *path) { + return path && strncmp(path, ZVFS_PATH_PREFIX, sizeof(ZVFS_PATH_PREFIX) - 1) == 0; +} + +/* 判断 fd 是否是我们的伪 fd */ +static inline bool is_zvfs_fd(int fd) { + return fd >= FD_BASE && fd < FD_BASE + ZVFS_MAX_FD; +} /* ------------------------------------------------------------------ */ /* 元数据文件 I/O */ @@ -36,7 +69,7 @@ static const char *META_FILE = "/home/lian/share/10.1-spdk/zvfs/zvfs_meta.txt"; */ static int meta_load(zvfs_t *fs) { - int fd = open(META_FILE, O_RDONLY); + int fd = real_open_fn(META_FILE, O_RDONLY); if (fd < 0) { /* 文件不存在,当作空目录 */ fs->dirent_count = 0; @@ -45,7 +78,7 @@ static int meta_load(zvfs_t *fs) { /* 一次性读进来 */ char buf[4096] = {0}; - ssize_t n = read(fd, buf, sizeof(buf) - 1); + ssize_t n = real_read_fn(fd, buf, sizeof(buf) - 1); close(fd); if (n <= 0) return 0; @@ -65,7 +98,7 @@ static int meta_load(zvfs_t *fs) { zvfs_dirent_t *d = calloc(1, sizeof(zvfs_dirent_t)); if (!d) break; - int ret = sscanf(line, "%255s %"PRIu64" %"PRIu64" %"PRIu32, + int ret = sscanf(line, "%255s %"PRIu64" %"PRIu64" %"PRIu64, d->filename, &d->blob_id, &d->file_size, @@ -85,7 +118,7 @@ static int meta_load(zvfs_t *fs) { } static int meta_save(zvfs_t *fs) { - int fd = open(META_FILE, O_WRONLY | O_CREAT | O_TRUNC, 0644); + int fd = real_open_fn (META_FILE, O_WRONLY | O_CREAT | O_TRUNC, 0644); if (fd < 0) return -1; for (uint32_t i = 0; i < fs->dirent_count; i++) { @@ -93,12 +126,12 @@ static int meta_save(zvfs_t *fs) { if (!d || !d->is_valid) continue; char line[512]; - int len = snprintf(line, sizeof(line), "%s %"PRIu64" %"PRIu64" %"PRIu32"\n", + int len = snprintf(line, sizeof(line), "%s %"PRIu64" %"PRIu64" %"PRIu64"\n", d->filename, d->blob_id, d->file_size, d->allocated_clusters); - write(fd, line, len); + real_write_fn(fd, line, len); } - close(fd); + real_close_fn(fd); return 0; } @@ -106,6 +139,13 @@ static int meta_save(zvfs_t *fs) { /* ------------------------------------------------------------------ */ /* 初始化(第一次 open 时调用) */ /* ------------------------------------------------------------------ */ +// 退出的时候调用 save 和 unmount +static void zvfs_atexit(void) { + if (!g_mounted || !g_fs) return; + SPDK_NOTICELOG("umount\n"); + meta_save(g_fs); + zvfs_umount(g_fs); +} static int zvfs_ensure_mounted(void) { if (g_mounted) return 0; @@ -123,10 +163,13 @@ static int zvfs_ensure_mounted(void) { } /* 初始化 SPDK 环境并 mount */ - if (zvfs_env_setup() != 0) { - free(g_fs); - g_fs = NULL; - return -1; + if (!g_env_init) { + if( zvfs_env_setup() != 0) { + free(g_fs); + g_fs = NULL; + return -1; + } + g_env_init = true; } if (!zvfs_mount(g_fs)) { @@ -136,6 +179,8 @@ static int zvfs_ensure_mounted(void) { } g_mounted = true; + atexit(zvfs_atexit); + SPDK_NOTICELOG("mount\n"); return 0; } @@ -164,6 +209,7 @@ static zvfs_dirent_t *dirent_alloc(const char *filename) { d->open_count = 0; d->file_size = 0; d->allocated_clusters = 0; + d->blob_id = 0; g_fs->dirents[g_fs->dirent_count++] = d; return d; @@ -202,11 +248,25 @@ static zvfs_file_t *fd_lookup(int pseudo_fd) { return g_fs->fd_table[idx]; } +/* ------------------------------------------------------------------ */ +/* POSIX hook */ +/* ------------------------------------------------------------------ */ + /* ------------------------------------------------------------------ */ /* POSIX hook: open */ /* ------------------------------------------------------------------ */ -int zvfs_open_hook(const char *path, int flags, ...) { +int open(const char *path, int flags, ...) { + if (!is_zvfs_path(path)) { + mode_t mode = 0; + if (flags & O_CREAT) { + va_list ap; + va_start(ap, flags); + mode = va_arg(ap, mode_t); + va_end(ap); + } + return real_open_fn(path, flags, mode); + } /* 确保 fs 已经 mount */ if (zvfs_ensure_mounted() != 0) { @@ -229,6 +289,7 @@ int zvfs_open_hook(const char *path, int flags, ...) { errno = ENOMEM; return -1; } + meta_save(g_fs); } /* 创建 file 句柄 */ @@ -246,13 +307,14 @@ int zvfs_open_hook(const char *path, int flags, ...) { int ok; if (dirent->blob_id == 0) { /* 新文件:create blob,open 时不 resize,write 时按需扩容 */ - file->blob_id = 0; - ok = zvfs_create(file); /* 内部 create → open → (no resize) → alloc dma_buf */ + ok = zvfs_create(file); /* 内部 create → open → resize → alloc dma_buf */ + SPDK_DEBUGLOG("create: %ld\n", file->blob_id); /* 把新分配的 blob_id 写回 dirent */ dirent->blob_id = file->blob_id; } else { /* 已有文件:直接 open 已有 blob */ file->blob_id = dirent->blob_id; + SPDK_DEBUGLOG("open: %ld\n", file->blob_id); ok = zvfs_open(file); } @@ -279,7 +341,11 @@ int zvfs_open_hook(const char *path, int flags, ...) { /* ------------------------------------------------------------------ */ /* POSIX hook: read */ /* ------------------------------------------------------------------ */ -ssize_t zvfs_read_hook(int fd, void *buf, size_t count) { +ssize_t read(int fd, void *buf, size_t count) { + if (!is_zvfs_fd(fd)) { + return real_read_fn(fd, buf, count); + } + zvfs_file_t *file = fd_lookup(fd); if (!file) { errno = EBADF; @@ -299,7 +365,11 @@ ssize_t zvfs_read_hook(int fd, void *buf, size_t count) { /* POSIX hook: write */ /* ------------------------------------------------------------------ */ -ssize_t zvfs_write_hook(int fd, const void *buf, size_t count) { +ssize_t write(int fd, const void *buf, size_t count) { + if (!is_zvfs_fd(fd)) { + return real_write_fn(fd, buf, count); + } + zvfs_file_t *file = fd_lookup(fd); if (!file) { errno = EBADF; @@ -314,7 +384,11 @@ ssize_t zvfs_write_hook(int fd, const void *buf, size_t count) { /* POSIX hook: close */ /* ------------------------------------------------------------------ */ -int zvfs_close_hook(int fd) { +int close(int fd) { + if (!is_zvfs_fd(fd)) { + return real_close_fn(fd); + } + zvfs_file_t *file = fd_lookup(fd); if (!file) { errno = EBADF; @@ -334,25 +408,64 @@ int zvfs_close_hook(int fd) { dirent->open_count--; if(dirent->open_count == 0 && !dirent->is_valid){ zvfs_delete(file); + /* 从 dirents 数组中移除 */ + for (uint32_t i = 0; i < g_fs->dirent_count; i++) { + if (g_fs->dirents[i] == dirent) { + free(dirent); + g_fs->dirents[i] = g_fs->dirents[--g_fs->dirent_count]; + g_fs->dirents[g_fs->dirent_count] = NULL; + break; + } + } + meta_save(g_fs); } } free(file); + return 0; +} - /* 如果没有任何打开的文件了,保存元数据并 unmount */ - if (g_fs->openfd_count == 0) { - meta_save(g_fs); - zvfs_umount(g_fs); +/* ------------------------------------------------------------------ */ +/* POSIX hook: unlink */ +/* ------------------------------------------------------------------ */ +int unlink(const char *name) { + if (!is_zvfs_path(name)) { + return real_unlink_fn(name); + } - /* 释放所有 dirent */ + if (zvfs_ensure_mounted() != 0) { + errno = EIO; + return -1; + } + + zvfs_dirent_t *dirent = dirent_find(name); + if (!dirent) { + errno = ENOENT; + return -1; + } + + if (dirent->open_count > 0) { + /* 还有人打开着,延迟删除:标记无效,等最后一次 close 时再 delete blob */ + dirent->is_valid = false; + } else { + /* 没人打开,直接删除 blob */ + zvfs_file_t tmp = {0}; + tmp.fs = g_fs; + tmp.dirent = dirent; + tmp.blob_id = dirent->blob_id; + zvfs_delete(&tmp); + + /* 从 dirents 数组中移除 */ for (uint32_t i = 0; i < g_fs->dirent_count; i++) { - free(g_fs->dirents[i]); - g_fs->dirents[i] = NULL; + if (g_fs->dirents[i] == dirent) { + free(dirent); + g_fs->dirents[i] = g_fs->dirents[--g_fs->dirent_count]; + g_fs->dirents[g_fs->dirent_count] = NULL; + break; + } } - free(g_fs); - g_fs = NULL; - g_mounted = false; + meta_save(g_fs); } return 0;