zvfs: 完成open/close/read/write/unlink的hook动态库代码编写、编译与简单功能测试。

This commit is contained in:
2026-02-23 16:01:55 +00:00
parent 31dc307d0b
commit 6f8f2148c3
7 changed files with 527 additions and 196 deletions

3
.gitignore vendored
View File

@@ -1,2 +1,5 @@
*.o
*.d
*.so
func_test
zvfs_meta.txt

View File

@@ -6,11 +6,32 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../spdk)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk
include $(SPDK_ROOT_DIR)/mk/spdk.app_vars.mk
APP = zvfs
LIBZVFS := libzvfs.so
APP := func_test
C_SRCS := zvfs.c
C_SRCS := zvfs.c zvfs_hook.c
SPDK_LIB_LIST = $(ALL_MODULES_LIST) event event_bdev
include $(SPDK_ROOT_DIR)/mk/spdk.app.mk
LIBS += $(SPDK_LIB_LINKER_ARGS)
LDFLAGS += -shared -rdynamic -Wl,-z,nodelete -Wl,--disable-new-dtags \
-Wl,-rpath,$(SPDK_ROOT_DIR)/build/lib \
-Wl,-rpath,$(SPDK_ROOT_DIR)/dpdk/build/lib
SYS_LIBS += -ldl
all: $(LIBZVFS) $(APP)
@:
rm -rf zvfs_meta.txt
$(LIBZVFS): $(OBJS) $(SPDK_LIB_FILES) $(ENV_LIBS)
$(LINK_C)
$(APP): func_test.c
$(CC) -o $@ $<
clean:
$(CLEAN_C) $(LIBZVFS) $(APP)
include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk

View File

@@ -1,6 +1,12 @@
## usage
```shell
cd /home/lian/share/10.1-spdk/spdk
./configure --with-shared
make -j
make
LD_PRELOAD=./libzvfs.so ./func_test
```

View File

@@ -1,37 +1,45 @@
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <unistd.h>
int main(int argc, char **argv)
{
const char *path = "/zvfs/func_test.dat";
if (argc > 2 && strcmp(argv[1], "-f") == 0) path = argv[2];
setenv("ZVFS_ROOT", "/zvfs", 0);
printf("open: %s\n", path);
int fd = open(path, O_CREAT|O_RDWR, 0644);
int fd = open(path, O_CREAT | O_RDWR | O_TRUNC, 0644);
if (fd < 0) { perror("open"); return 1; }
const char *msg = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const char *msg = "ABCDEFGHIJKL";
ssize_t w = write(fd, msg, strlen(msg));
if (w < 0) { perror("write"); return 2; }
printf("write: %zd\n", w);
/* Rewind by closing and reopening for read */
const char *msg2 = "MNOPQRSTUVWXYZ";
ssize_t w2 = write(fd, msg2, strlen(msg2));
if (w2 < 0) { perror("write"); return 2; }
printf("write: %zd\n", w2);
close(fd);
fd = open(path, O_RDONLY);
if (fd < 0) { perror("open R"); return 3; }
char buf[256]; memset(buf, 0, sizeof(buf));
char buf[10];
memset(buf, 0, sizeof(buf));
ssize_t r = read(fd, buf, sizeof(buf));
if (r < 0) { perror("read"); return 4; }
printf("read: %zd bytes: %.*s\n", r, (int)r, buf);
char buf2[512];
memset(buf2, 0, sizeof(buf2));
ssize_t r2 = read(fd, buf2, sizeof(buf2));
if (r2 < 0) { perror("read"); return 4; }
printf("read: %zd bytes: %.*s\n", r2, (int)r2, buf2);
close(fd);
if (unlink(path) != 0) { perror("unlink"); return 5; }

447
zvfs.c
View File

@@ -1,12 +1,17 @@
#include "zvfs.h"
#undef SPDK_DEBUGLOG
#define SPDK_DEBUGLOG(...) do {} while(0)
struct spdk_thread *global_thread = NULL;
const char *json_file = "/home/lian/share/10.1-spdk/zvfs/zvfs.json";
// mount
void zvfs_do_mount(void *arg);
void zvfs_spdk_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx);
void zvfs_spdk_bs_load_cb(void *arg, struct spdk_blob_store *bs, int bserrno);
void zvfs_spdk_bs_init_cb(void *arg, struct spdk_blob_store *bs, int bserrno);
// create
@@ -26,6 +31,7 @@ void zvfs_spdk_blob_read_cb(void *arg, int bserrno);
// write
void zvfs_do_write(void *arg);
void zvfs_do_write_io(zvfs_file_t *file);
void zvfs_spdk_blob_write_preread_cb(void *arg, int bserrno);
void zvfs_spdk_blob_write_resize_cb(void *arg, int bserrno);
void zvfs_spdk_blob_write_sync_cb(void *arg, int bserrno);
void zvfs_spdk_blob_write_cb(void *arg, int bserrno);
@@ -47,14 +53,66 @@ void json_app_load_done(int rc, void *ctx);
void zvfs_do_umount(void *arg);
void zvfs_spdk_bs_unload_cb(void *arg, int bserrno);
/* ========== helpers ========== */
/* ================================================================== */
/* HELPER */
/* ================================================================== */
static uint64_t zvfs_need_clusters(zvfs_t *fs, uint64_t end_byte) {
uint64_t cluster_size = spdk_bs_get_cluster_size(fs->bs);
return (end_byte + cluster_size - 1) / cluster_size;
}
// mount
/* ---------- 辅助:计算本次 IO 涉及的 LBA 范围 ---------- */
static void calc_lba_range(zvfs_file_t *file,
uint64_t *out_lba,
uint64_t *out_page_off,
uint64_t *out_lba_count)
{
uint64_t io_unit = file->fs->io_unit_size;
uint64_t off = file->current_offset;
uint64_t cnt = file->io_count;
*out_lba = off / io_unit;
*out_page_off = off % io_unit;
*out_lba_count = (*out_page_off + cnt + io_unit - 1) / io_unit;
}
/* ---------- 确保 dma_buf 足够大 ---------- */
static int ensure_dma_buf(zvfs_file_t *file, uint64_t need_bytes)
{
if (file->dma_buf && file->dma_buf_size >= need_bytes) return 0;
if (file->dma_buf) spdk_free(file->dma_buf);
file->dma_buf = spdk_malloc(need_bytes, 0x1000, NULL,
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
if (!file->dma_buf) { file->dma_buf_size = 0; return -1; }
file->dma_buf_size = need_bytes;
return 0;
}
// waiter
bool waiter(struct spdk_thread *thread, spdk_msg_fn start_fn, void *ctx, bool *finished) {
spdk_thread_send_msg(thread, start_fn, ctx);
int waiter_count = 0;
do {
spdk_thread_poll(thread, 0, 0);
waiter_count ++;
} while(!(*finished) && waiter_count < WAITER_MAX_TIME);
if (!(*finished) && waiter_count >= WAITER_MAX_TIME) {
return false; // timeout
}
return true;
}
/* ================================================================== */
/* MOUNT */
/* ================================================================== */
void zvfs_do_mount(void *arg) {
zvfs_t *fs = (zvfs_t*)arg;
@@ -66,49 +124,78 @@ void zvfs_do_mount(void *arg) {
spdk_app_stop(0);
}
spdk_bs_init(bs_dev, NULL, zvfs_spdk_bs_init_cb, fs);
SPDK_NOTICELOG("zvfs_entry\n");
fs->bs_dev = bs_dev;
// spdk_bs_init(bs_dev, NULL, zvfs_spdk_bs_init_cb, fs);
spdk_bs_load(bs_dev, NULL, zvfs_spdk_bs_load_cb, fs);
}
void zvfs_spdk_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
void *event_ctx) {
SPDK_NOTICELOG("zvfs_spdk_bdev_event_cb\n");
}
void zvfs_spdk_bs_init_cb(void *arg, struct spdk_blob_store *bs, int bserrno) {
void zvfs_spdk_bs_load_cb(void *arg, struct spdk_blob_store *bs, int bserrno) {
zvfs_t *fs = (zvfs_t*)arg;
if (bserrno != 0) {
SPDK_DEBUGLOG("load failed, new device, re-create bs_dev and init\n");
struct spdk_bs_dev *bs_dev = NULL;
int rc = spdk_bdev_create_bs_dev_ext("Malloc0", zvfs_spdk_bdev_event_cb, NULL, &bs_dev);
if (rc != 0) {
SPDK_ERRLOG("re-create bs_dev failed\n");
spdk_app_stop(-1);
return;
}
fs->bs_dev = bs_dev;
spdk_bs_init(fs->bs_dev, NULL, zvfs_spdk_bs_init_cb, fs);
return;
}
uint64_t io_unit_size = spdk_bs_get_io_unit_size(bs);
SPDK_NOTICELOG("io_unit_size : %"PRIu64"\n", io_unit_size);
SPDK_DEBUGLOG("io_unit_size : %"PRIu64"\n", io_unit_size);
fs->io_unit_size = io_unit_size;
fs->bs = bs;
fs->channel = spdk_bs_alloc_io_channel(fs->bs);
if (fs->channel == NULL) {
return ;
}
fs->finished = true;
SPDK_NOTICELOG("mount finished\n");
}
// create
void zvfs_spdk_bs_init_cb(void *arg, struct spdk_blob_store *bs, int bserrno) {
zvfs_t *fs = (zvfs_t*)arg;
uint64_t io_unit_size = spdk_bs_get_io_unit_size(bs);
SPDK_DEBUGLOG("io_unit_size : %"PRIu64"\n", io_unit_size);
fs->io_unit_size = io_unit_size;
fs->bs = bs;
fs->channel = spdk_bs_alloc_io_channel(fs->bs);
if (fs->channel == NULL) {
return ;
}
fs->finished = true;
}
void zvfs_spdk_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
void *event_ctx) {
}
/* ================================================================== */
/* CREATE */
/* ================================================================== */
void zvfs_do_create(void *arg) {
zvfs_file_t *file = (zvfs_file_t *)arg;
spdk_bs_create_blob(file->fs->bs, zvfs_spdk_bs_create_blob_cb, file);
}
void zvfs_spdk_bs_create_blob_cb(void *arg, spdk_blob_id blobid, int bserrno) {
zvfs_file_t *file = (zvfs_file_t *)arg;
file->blob_id = blobid;
SPDK_NOTICELOG("blobid : %"PRIu64"\n", blobid);
SPDK_DEBUGLOG("create blobid : %"PRIu64"\n", blobid);
spdk_bs_open_blob(file->fs->bs, blobid, zvfs_spdk_bs_open_blob_cb, file);
}
@@ -123,17 +210,21 @@ void zvfs_spdk_bs_open_blob_cb(void *arg, struct spdk_blob *blb, int bserrno) {
file->blob = blb;
uint64_t free_cluster = spdk_bs_free_cluster_count(file->fs->bs); //
SPDK_NOTICELOG("free_cluster : %"PRIu64"\n", free_cluster);
uint64_t free_cluster = spdk_bs_free_cluster_count(file->fs->bs);
if(free_cluster == 0){
SPDK_ERRLOG("no free cluster: %d\n", bserrno);
file->finished = true;
return ;
}
spdk_blob_resize(blb, free_cluster, zvfs_spdk_blob_resize_cb, file);
spdk_blob_resize(blb, 1, zvfs_spdk_blob_resize_cb, file);
}
void zvfs_spdk_blob_resize_cb(void *arg, int bserrno) {
zvfs_file_t *file = (zvfs_file_t *)arg;
uint64_t total = spdk_blob_get_num_clusters(file->blob);
SPDK_NOTICELOG("resize blob :%"PRIu64"\n", total);
SPDK_DEBUGLOG("resize blob :%"PRIu64"\n", total);
if (file->dirent) {
file->dirent->allocated_clusters = total;
@@ -153,11 +244,12 @@ void zvfs_spdk_blob_sync_cb(void *arg, int bserrno) {
}
file->dma_buf_size = BUFFER_SIZE;
SPDK_NOTICELOG("open complete\n");
file->finished = true;
}
// open
/* ================================================================== */
/* OPEN */
/* ================================================================== */
void zvfs_do_open(void *arg) {
zvfs_file_t *file = (zvfs_file_t *)arg;
spdk_bs_open_blob(file->fs->bs, file->blob_id, zvfs_spdk_bs_open_blob_cb2, file);
@@ -186,28 +278,42 @@ void zvfs_spdk_bs_open_blob_cb2(void *arg, struct spdk_blob *blb, int bserrno) {
file->finished = true;
}
// read
/* ================================================================== */
/* READ */
/* ================================================================== */
void zvfs_do_read(void *arg) {
zvfs_file_t *file = (zvfs_file_t *)arg;
uint64_t io_unit_size = file->fs->io_unit_size;
uint64_t io_unit = file->fs->io_unit_size;
uint64_t offset = file->current_offset;
uint64_t file_size = file->dirent ? file->dirent->file_size : 0;
uint64_t file_sz = file->dirent ? file->dirent->file_size : 0;
if (offset >= file_size) {
SPDK_NOTICELOG("read: EOF\n");
/* EOF 检查 */
if (offset >= file_sz) {
SPDK_DEBUGLOG("read: EOF\n");
file->io_count = 0;
file->actual_io_count = 0;
file->finished = true;
return;
}
if (offset + file->io_count > file_size) {
file->io_count = file_size - offset;
/* 截断到文件末尾 */
if (offset + file->io_count > file_sz){
file->io_count = file_sz - offset;
}
uint64_t lba = offset / io_unit_size;
uint64_t page_off = offset % io_unit_size;
uint64_t lba_count = (page_off + file->io_count + io_unit_size - 1) / io_unit_size;
file->actual_io_count = file->io_count;
uint64_t lba, page_off, lba_count;
calc_lba_range(file, &lba, &page_off, &lba_count);
uint64_t buf_need = lba_count * io_unit;
if (ensure_dma_buf(file, buf_need) != 0) {
SPDK_ERRLOG("ensure_dma_buf failed\n");
file->actual_io_count = 0;
file->finished = true;
return;
}
spdk_blob_io_read(file->blob, file->fs->channel,
file->dma_buf,
@@ -226,54 +332,107 @@ void zvfs_spdk_blob_read_cb(void *arg, int bserrno) {
}
file->current_offset += file->io_count;
SPDK_NOTICELOG("read complete, new offset=%" PRIu64 "\n", file->current_offset);
SPDK_DEBUGLOG("read complete, new offset=%" PRIu64 "\n", file->current_offset);
file->finished = true;
}
// write
/*
* callback 链:
*
/* ================================================================== */
/* WRITE */
/* ================================================================== */
/**
* 1. write 的 callback 链
* zvfs_do_write
* ├─(需要扩容)─→ spdk_blob_resize → zvfs_spdk_blob_write_resize_cb
* │ → spdk_blob_sync_md → zvfs_spdk_blob_write_sync_cb
* → zvfs_do_write_io
* │ → zvfs_spdk_blob_write_cb
* └─→ 先用 spdk_blob_io_read 读出覆盖范围内的扇区
* └─→ zvfs_spdk_blob_write_preread_cb
* (在 dma_buf 里 patch 新数据)
* ├─(需扩容)─→ spdk_blob_resize
* │ └─→ zvfs_spdk_blob_write_resize_cb
* │ └─→ spdk_blob_sync_md
* │ └─→ zvfs_spdk_blob_write_sync_cb
* │ └─→ zvfs_do_write_io
* │ └─→ zvfs_spdk_blob_write_cb
* └─(不需扩容)─→ zvfs_do_write_io
* → zvfs_spdk_blob_write_cb
* └─→ zvfs_spdk_blob_write_cb
*/
void zvfs_do_write_io(zvfs_file_t *file) {
uint64_t io_unit_size = file->fs->io_unit_size;
uint64_t lba = file->current_offset / io_unit_size;
uint64_t page_off = file->current_offset % io_unit_size;
uint64_t lba_count = (page_off + file->io_count + io_unit_size - 1) / io_unit_size;
spdk_blob_io_write(file->blob, file->fs->channel,
file->dma_buf,
lba, lba_count,
zvfs_spdk_blob_write_cb, file);
}
void zvfs_spdk_blob_write_cb(void *arg, int bserrno) {
/* Step 1 : 进入 write先把覆盖范围内的扇区读出来read-modify-write) */
void zvfs_do_write(void *arg) {
zvfs_file_t *file = (zvfs_file_t *)arg;
if (bserrno) {
SPDK_ERRLOG("blob_write error: %d\n", bserrno);
uint64_t io_unit = file->fs->io_unit_size;
uint64_t lba, page_off, lba_count;
calc_lba_range(file, &lba, &page_off, &lba_count);
uint64_t buf_need = lba_count * io_unit;
if (ensure_dma_buf(file, buf_need) != 0) {
SPDK_ERRLOG("ensure_dma_buf failed\n");
file->finished = true;
return;
}
uint64_t new_end = file->current_offset + file->io_count;
if (file->dirent && new_end > file->dirent->file_size) {
file->dirent->file_size = new_end;
}
file->current_offset = new_end;
SPDK_NOTICELOG("write complete, new offset=%" PRIu64 "\n", file->current_offset);
file->finished = true;
/*
* 先把涉及的扇区读出read 完成后在 preread_cb 里 patch 数据再写。
* 注意:把用户数据暂存在 file->write_buf / write_count
* 或者借用 file->io_countio_count 不变)。
* 这里我们把用户数据已经由上层调用者拷贝到了 write_staging_buf
*/
/* 不管是否需要扩容,先 preread */
spdk_blob_io_read(file->blob, file->fs->channel,
file->dma_buf,
lba, lba_count,
zvfs_spdk_blob_write_preread_cb, file);
}
/* Step 2 : preread 完成patch dma_buf然后决定是否扩容 */
void zvfs_spdk_blob_write_preread_cb(void *arg, int bserrno){
zvfs_file_t *file = (zvfs_file_t *)arg;
/* preread 失败也没关系——如果是新分配区域全零即可,
这里仍然继续SPDK 对未写过的区域返回全零)。*/
if (bserrno) {
SPDK_DEBUGLOG("preread error %d (may be uninitialized, continue)\n", bserrno);
}
/* patch把用户数据覆写到 dma_buf 的正确偏移处 */
uint64_t page_off = file->current_offset % file->fs->io_unit_size;
memcpy((uint8_t *)file->dma_buf + page_off,
file->write_staging_buf,
file->io_count);
/* 判断是否需要扩容 */
uint64_t end_byte = file->current_offset + file->io_count;
uint64_t need_clusters = zvfs_need_clusters(file->fs, end_byte);
uint64_t cur_clusters = file->dirent ? file->dirent->allocated_clusters
: spdk_blob_get_num_clusters(file->blob);
if (need_clusters > cur_clusters) {
uint64_t free_clusters = spdk_bs_free_cluster_count(file->fs->bs);
if (need_clusters - cur_clusters > free_clusters) {
SPDK_ERRLOG("no free clusters\n");
file->finished = true;
return;
}
spdk_blob_resize(file->blob, need_clusters,
zvfs_spdk_blob_write_resize_cb, file);
} else {
zvfs_do_write_io(file);
}
}
/* Step 3a : resize 完成 → sync */
void zvfs_spdk_blob_write_resize_cb(void *arg, int bserrno) {
zvfs_file_t *file = (zvfs_file_t *)arg;
if (bserrno) {
SPDK_ERRLOG("write resize error: %d\n", bserrno);
file->finished = true;
return;
}
spdk_blob_sync_md(file->blob, zvfs_spdk_blob_write_sync_cb, file);
}
/* Step 3b : sync 完成 → 真正写 */
void zvfs_spdk_blob_write_sync_cb(void *arg, int bserrno) {
zvfs_file_t *file = (zvfs_file_t *)arg;
@@ -291,43 +450,43 @@ void zvfs_spdk_blob_write_sync_cb(void *arg, int bserrno) {
zvfs_do_write_io(file);
}
void zvfs_spdk_blob_write_resize_cb(void *arg, int bserrno) {
/* Step 4 : 实际写入dma_buf 已经是 patch 后的整扇区数据) */
void zvfs_do_write_io(zvfs_file_t *file) {
uint64_t io_unit_size = file->fs->io_unit_size;
uint64_t lba = file->current_offset / io_unit_size;
uint64_t page_off = file->current_offset % io_unit_size;
uint64_t lba_count = (page_off + file->io_count + io_unit_size - 1) / io_unit_size;
spdk_blob_io_write(file->blob, file->fs->channel,
file->dma_buf,
lba, lba_count,
zvfs_spdk_blob_write_cb, file);
}
/* Step 5 : 写完成 */
void zvfs_spdk_blob_write_cb(void *arg, int bserrno) {
zvfs_file_t *file = (zvfs_file_t *)arg;
if (bserrno) {
SPDK_ERRLOG("write resize error: %d\n", bserrno);
SPDK_ERRLOG("blob_write error: %d\n", bserrno);
file->finished = true;
return;
}
spdk_blob_sync_md(file->blob, zvfs_spdk_blob_write_sync_cb, file);
}
uint64_t new_end = file->current_offset + file->io_count;
if (file->dirent && new_end > file->dirent->file_size) {
file->dirent->file_size = new_end;
}
file->current_offset = new_end;
void zvfs_do_write(void *arg) {
zvfs_file_t *file = (zvfs_file_t *)arg;
uint64_t end_byte = file->current_offset + file->io_count;
uint64_t need_clusters = zvfs_need_clusters(file->fs, end_byte);
uint64_t cur_clusters = file->dirent ? file->dirent->allocated_clusters
: spdk_blob_get_num_clusters(file->blob);
SPDK_NOTICELOG("endbyte:%ld, needcluster:%ld, curcluster:%ld\n", end_byte, need_clusters, cur_clusters);
if (need_clusters > cur_clusters) {
uint64_t free_clusters = spdk_bs_free_cluster_count(file->fs->bs);
SPDK_NOTICELOG("free_cluster : %"PRIu64"\n", free_clusters);
if (need_clusters - cur_clusters > free_clusters) {
SPDK_ERRLOG("no free clusters\n");
SPDK_DEBUGLOG("write complete, new offset=%" PRIu64 "\n", file->current_offset);
file->finished = true;
return;
}
spdk_blob_resize(file->blob, need_clusters,
zvfs_spdk_blob_write_resize_cb, file);
} else {
zvfs_do_write_io(file);
}
}
// close
/* ================================================================== */
/* CLOSE */
/* ================================================================== */
void zvfs_do_close(void *arg) {
zvfs_file_t *file = (zvfs_file_t *)arg;
spdk_blob_close(file->blob, zvfs_spdk_blob_close_cb, file);
@@ -343,11 +502,12 @@ void zvfs_spdk_blob_close_cb(void *arg, int bserrno) {
file->dma_buf = NULL;
file->current_offset = 0;
SPDK_NOTICELOG("close complete\n");
file->finished = true;
}
// delete
/* ================================================================== */
/* DELETE */
/* ================================================================== */
void zvfs_do_delete(void *arg) {
zvfs_file_t *file = (zvfs_file_t *)arg;
spdk_bs_delete_blob(file->fs->bs, file->blob_id, zvfs_spdk_blob_delete_cb, file);
@@ -360,11 +520,12 @@ void zvfs_spdk_blob_delete_cb(void *arg, int bserrno) {
SPDK_ERRLOG("blob_delete error: %d\n", bserrno);
}
SPDK_NOTICELOG("delete complete\n");
file->finished = true;
}
// unmount
/* ================================================================== */
/* UNMOUNT */
/* ================================================================== */
void zvfs_do_umount(void *arg) {
zvfs_t *fs = (zvfs_t *)arg;
@@ -384,35 +545,16 @@ void zvfs_spdk_bs_unload_cb(void *arg, int bserrno) {
fs->finished = true;
}
// waiter
bool waiter(struct spdk_thread *thread, spdk_msg_fn start_fn, void *ctx, bool *finished) {
spdk_thread_send_msg(thread, start_fn, ctx);
int waiter_count = 0;
do {
spdk_thread_poll(thread, 0, 0);
waiter_count ++;
} while(!(*finished) && waiter_count < WAITER_MAX_TIME);
while (spdk_thread_poll(thread, 0, 0) > 0) {}
if (!(*finished) && waiter_count >= WAITER_MAX_TIME) {
return false; // timeout
}
return true;
}
// setup
// zvfs.json
int zvfs_env_setup(void) {
struct spdk_env_opts opts;
spdk_env_opts_init(&opts);
opts.name = "zvfs";
if (0 != spdk_env_init(&opts)) {
int rc = spdk_env_init(&opts);
if (rc != 0) {
return -1;
}
@@ -420,14 +562,19 @@ int zvfs_env_setup(void) {
spdk_log_set_level(SPDK_LOG_NOTICE);
spdk_log_open(NULL);
spdk_thread_lib_init(NULL, 0);
int rc2 = spdk_thread_lib_init(NULL, 0);
if (rc2 != 0) {
SPDK_ERRLOG("spdk_thread_lib_init failed\n");
return -1;
}
global_thread = spdk_thread_create("global", NULL);
spdk_set_thread(global_thread);
bool done = false;
waiter(global_thread, zvfs_json_load_fn, &done, &done);
SPDK_NOTICELOG("json_app_load_done complete\n");
SPDK_DEBUGLOG("zvfs_env_setup complete\n");
return 0;
}
@@ -441,8 +588,7 @@ void zvfs_json_load_fn(void *arg) {
void json_app_load_done(int rc, void *ctx) {
bool *done = ctx;
*done = true;
SPDK_NOTICELOG("json_app_load_done\n");
SPDK_DEBUGLOG("json_app_load_done\n");
}
@@ -453,53 +599,82 @@ void json_app_load_done(int rc, void *ctx) {
// load
int zvfs_mount(struct zvfs_s *fs) {
fs->finished = false;
return waiter(global_thread, zvfs_do_mount, fs, &fs->finished);
bool ok = waiter(global_thread, zvfs_do_mount, fs, &fs->finished);
SPDK_DEBUGLOG("mount finished\n");
return ok;
}
// unload
int zvfs_umount(struct zvfs_s *fs) {
fs->finished = false;
return waiter(global_thread, zvfs_do_umount, fs, &fs->finished);
bool ok = waiter(global_thread, zvfs_do_umount, fs, &fs->finished);
SPDK_DEBUGLOG("umount finished\n");
return ok;
}
// file
// create
int zvfs_create(struct zvfs_file_s *file) {
file->finished = false;
return waiter(global_thread, zvfs_do_create, file, &file->finished);
bool ok = waiter(global_thread, zvfs_do_create, file, &file->finished);
SPDK_DEBUGLOG("create finished\n");
return ok;
}
// open
int zvfs_open(struct zvfs_file_s *file) {
file->finished = false;
return waiter(global_thread, zvfs_do_open, file, &file->finished);
bool ok = waiter(global_thread, zvfs_do_open, file, &file->finished);
SPDK_DEBUGLOG("open finished\n");
return ok;
}
// read
int zvfs_read(struct zvfs_file_s *file, uint8_t *buffer, size_t count) {
file->io_count = count;
file->actual_io_count = 0;
file->finished = false;
bool ok = waiter(global_thread, zvfs_do_read, file, &file->finished);
uint64_t page_off = (file->current_offset - file->io_count) % file->fs->io_unit_size;
memcpy(buffer, (uint8_t *)file->dma_buf + page_off, file->io_count);
if (!ok || file->actual_io_count == 0) return -1;
return ok ? (int)file->io_count : -1;
/*
* dma_buf 里存的是从 LBA 边界开始的整扇区数据,
* page_off 是 current_offset读之前相对于 LBA 边界的字节偏移。
*
* current_offset 在 read_cb 里已经 += actual_io_count
* 所以读之前的 offset = current_offset - actual_io_count。
*/
uint64_t pre_offset = file->current_offset - file->actual_io_count;
uint64_t page_off = pre_offset % file->fs->io_unit_size;
memcpy(buffer,
(uint8_t *)file->dma_buf + page_off,
file->actual_io_count);
SPDK_DEBUGLOG("read finished\n");
return (int)file->actual_io_count;
}
// write
int zvfs_write(struct zvfs_file_s *file, const uint8_t *buffer, size_t count) {
file->io_count = count;
file->write_staging_buf = buffer;
file->finished = false;
memcpy(file->dma_buf, buffer, count);
bool ok = waiter(global_thread, zvfs_do_write, file, &file->finished);
SPDK_DEBUGLOG("write finished\n");
return ok ? (int)count : -1;
}
// close
int zvfs_close(struct zvfs_file_s *file) {
file->finished = false;
return waiter(global_thread, zvfs_do_close, file, &file->finished);
bool ok = waiter(global_thread, zvfs_do_close, file, &file->finished);
SPDK_DEBUGLOG("close finished\n");
return ok;
}
// delete
int zvfs_delete(struct zvfs_file_s *file) {
file->finished = false;
return waiter(global_thread, zvfs_do_delete, file, &file->finished);
bool ok = waiter(global_thread, zvfs_do_delete, file, &file->finished);
SPDK_DEBUGLOG("delete finished\n");
return ok;
}
int main(int argc, char *argv[]) {
@@ -526,7 +701,7 @@ int main(int argc, char *argv[]) {
char *buffer = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
zvfs_write(file, buffer, strlen(buffer));
char *buffer2 = "abcdefghijklmnopqrstuvwxyz";
zvfs_write(file, buffer, strlen(buffer2));
zvfs_write(file, buffer2, strlen(buffer2));
SPDK_NOTICELOG("\n\n zvfs read start \n\n");

17
zvfs.h
View File

@@ -13,7 +13,7 @@
#define ZVFS_MAX_FD 64
#define BUFFER_SIZE (1024*8)
static const char *json_file = "/home/lian/share/10.1-spdk/zvfs/zvfs.json";
extern const char *json_file;
extern struct spdk_thread *global_thread;
static const int WAITER_MAX_TIME = 100000;
@@ -29,6 +29,7 @@ typedef struct {
/* 文件系统全局结构 */
typedef struct zvfs_s {
struct spdk_bs_dev *bs_dev;
struct spdk_blob_store *bs;
struct spdk_io_channel *channel;
struct spdk_blob *super_blob; // 承载目录日志的blob
@@ -39,7 +40,7 @@ typedef struct zvfs_s {
uint32_t dirent_count; // 当前有效项数
/* 伪FD表 */
struct zvfs_file *fd_table[ZVFS_MAX_FD]; // // e.g., #define ZVFS_MAX_FD 64
struct zvfs_file_s *fd_table[ZVFS_MAX_FD]; // // e.g., #define ZVFS_MAX_FD 64
int fd_base; // 伪FD起始值如1000
int openfd_count;
@@ -66,6 +67,9 @@ typedef struct zvfs_file_s {
void *dma_buf;
uint64_t dma_buf_size;
size_t actual_io_count;
const uint8_t *write_staging_buf;
size_t io_count;
bool finished;
} zvfs_file_t;
@@ -83,9 +87,10 @@ int zvfs_close(struct zvfs_file_s *file);
int zvfs_delete(struct zvfs_file_s *file);
/* POSIX hook APIzvfs_hook.c 实现) */
int zvfs_open_hook(const char *path, int flags, ...);
ssize_t zvfs_read_hook(int fd, void *buf, size_t count);
ssize_t zvfs_write_hook(int fd, const void *buf, size_t count);
int zvfs_close_hook(int fd);
int open(const char *path, int flags, ...);
ssize_t read(int fd, void *buf, size_t count);
ssize_t write(int fd, const void *buf, size_t count);
int close(int fd);
int unlink(const char *name);
#endif

View File

@@ -1,5 +1,8 @@
#include "zvfs.h"
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <dlfcn.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -8,6 +11,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include "zvfs.h"
/* ------------------------------------------------------------------ */
/* 全局状态 */
@@ -15,13 +19,42 @@
static zvfs_t *g_fs = NULL; /* 全局文件系统NULL 表示未初始化 */
static bool g_mounted = false;
static bool g_env_init = false;
/* 元数据文件路径 */
static const char *META_FILE = "/home/lian/share/10.1-spdk/zvfs/zvfs_meta.txt";
/* 伪 fd 起始值,避免和真实 fd 冲突 */
#define FD_BASE 1000
#define FD_BASE 10000
/* 只拦截以 /zvfs 开头的路径 */
#define ZVFS_PATH_PREFIX "/zvfs"
static int (*real_open_fn) (const char*, int, ...) = NULL;
static ssize_t (*real_read_fn) (int, void*, size_t) = NULL;
static ssize_t (*real_write_fn)(int, const void*, size_t) = NULL;
static int (*real_close_fn)(int) = NULL;
static int (*real_unlink_fn)(const char *name) = NULL;
__attribute__((constructor))
static void zvfs_preload_init(void) {
real_open_fn = dlsym(RTLD_NEXT, "open");
real_read_fn = dlsym(RTLD_NEXT, "read");
real_write_fn = dlsym(RTLD_NEXT, "write");
real_close_fn = dlsym(RTLD_NEXT, "close");
real_unlink_fn= dlsym(RTLD_NEXT, "unlink");
}
/* 判断路径是否由我们接管 */
static inline bool is_zvfs_path(const char *path) {
return path && strncmp(path, ZVFS_PATH_PREFIX, sizeof(ZVFS_PATH_PREFIX) - 1) == 0;
}
/* 判断 fd 是否是我们的伪 fd */
static inline bool is_zvfs_fd(int fd) {
return fd >= FD_BASE && fd < FD_BASE + ZVFS_MAX_FD;
}
/* ------------------------------------------------------------------ */
/* 元数据文件 I/O */
@@ -36,7 +69,7 @@ static const char *META_FILE = "/home/lian/share/10.1-spdk/zvfs/zvfs_meta.txt";
*/
static int meta_load(zvfs_t *fs) {
int fd = open(META_FILE, O_RDONLY);
int fd = real_open_fn(META_FILE, O_RDONLY);
if (fd < 0) {
/* 文件不存在,当作空目录 */
fs->dirent_count = 0;
@@ -45,7 +78,7 @@ static int meta_load(zvfs_t *fs) {
/* 一次性读进来 */
char buf[4096] = {0};
ssize_t n = read(fd, buf, sizeof(buf) - 1);
ssize_t n = real_read_fn(fd, buf, sizeof(buf) - 1);
close(fd);
if (n <= 0) return 0;
@@ -65,7 +98,7 @@ static int meta_load(zvfs_t *fs) {
zvfs_dirent_t *d = calloc(1, sizeof(zvfs_dirent_t));
if (!d) break;
int ret = sscanf(line, "%255s %"PRIu64" %"PRIu64" %"PRIu32,
int ret = sscanf(line, "%255s %"PRIu64" %"PRIu64" %"PRIu64,
d->filename,
&d->blob_id,
&d->file_size,
@@ -85,7 +118,7 @@ static int meta_load(zvfs_t *fs) {
}
static int meta_save(zvfs_t *fs) {
int fd = open(META_FILE, O_WRONLY | O_CREAT | O_TRUNC, 0644);
int fd = real_open_fn (META_FILE, O_WRONLY | O_CREAT | O_TRUNC, 0644);
if (fd < 0) return -1;
for (uint32_t i = 0; i < fs->dirent_count; i++) {
@@ -93,12 +126,12 @@ static int meta_save(zvfs_t *fs) {
if (!d || !d->is_valid) continue;
char line[512];
int len = snprintf(line, sizeof(line), "%s %"PRIu64" %"PRIu64" %"PRIu32"\n",
int len = snprintf(line, sizeof(line), "%s %"PRIu64" %"PRIu64" %"PRIu64"\n",
d->filename, d->blob_id, d->file_size, d->allocated_clusters);
write(fd, line, len);
real_write_fn(fd, line, len);
}
close(fd);
real_close_fn(fd);
return 0;
}
@@ -106,6 +139,13 @@ static int meta_save(zvfs_t *fs) {
/* ------------------------------------------------------------------ */
/* 初始化(第一次 open 时调用) */
/* ------------------------------------------------------------------ */
// 退出的时候调用 save 和 unmount
static void zvfs_atexit(void) {
if (!g_mounted || !g_fs) return;
SPDK_NOTICELOG("umount\n");
meta_save(g_fs);
zvfs_umount(g_fs);
}
static int zvfs_ensure_mounted(void) {
if (g_mounted) return 0;
@@ -123,11 +163,14 @@ static int zvfs_ensure_mounted(void) {
}
/* 初始化 SPDK 环境并 mount */
if (zvfs_env_setup() != 0) {
if (!g_env_init) {
if( zvfs_env_setup() != 0) {
free(g_fs);
g_fs = NULL;
return -1;
}
g_env_init = true;
}
if (!zvfs_mount(g_fs)) {
free(g_fs);
@@ -136,6 +179,8 @@ static int zvfs_ensure_mounted(void) {
}
g_mounted = true;
atexit(zvfs_atexit);
SPDK_NOTICELOG("mount\n");
return 0;
}
@@ -164,6 +209,7 @@ static zvfs_dirent_t *dirent_alloc(const char *filename) {
d->open_count = 0;
d->file_size = 0;
d->allocated_clusters = 0;
d->blob_id = 0;
g_fs->dirents[g_fs->dirent_count++] = d;
return d;
@@ -202,11 +248,25 @@ static zvfs_file_t *fd_lookup(int pseudo_fd) {
return g_fs->fd_table[idx];
}
/* ------------------------------------------------------------------ */
/* POSIX hook */
/* ------------------------------------------------------------------ */
/* ------------------------------------------------------------------ */
/* POSIX hook: open */
/* ------------------------------------------------------------------ */
int zvfs_open_hook(const char *path, int flags, ...) {
int open(const char *path, int flags, ...) {
if (!is_zvfs_path(path)) {
mode_t mode = 0;
if (flags & O_CREAT) {
va_list ap;
va_start(ap, flags);
mode = va_arg(ap, mode_t);
va_end(ap);
}
return real_open_fn(path, flags, mode);
}
/* 确保 fs 已经 mount */
if (zvfs_ensure_mounted() != 0) {
@@ -229,6 +289,7 @@ int zvfs_open_hook(const char *path, int flags, ...) {
errno = ENOMEM;
return -1;
}
meta_save(g_fs);
}
/* 创建 file 句柄 */
@@ -246,13 +307,14 @@ int zvfs_open_hook(const char *path, int flags, ...) {
int ok;
if (dirent->blob_id == 0) {
/* 新文件create blobopen 时不 resizewrite 时按需扩容 */
file->blob_id = 0;
ok = zvfs_create(file); /* 内部 create → open → (no resize) → alloc dma_buf */
ok = zvfs_create(file); /* 内部 create → open → resize → alloc dma_buf */
SPDK_DEBUGLOG("create: %ld\n", file->blob_id);
/* 把新分配的 blob_id 写回 dirent */
dirent->blob_id = file->blob_id;
} else {
/* 已有文件:直接 open 已有 blob */
file->blob_id = dirent->blob_id;
SPDK_DEBUGLOG("open: %ld\n", file->blob_id);
ok = zvfs_open(file);
}
@@ -279,7 +341,11 @@ int zvfs_open_hook(const char *path, int flags, ...) {
/* ------------------------------------------------------------------ */
/* POSIX hook: read */
/* ------------------------------------------------------------------ */
ssize_t zvfs_read_hook(int fd, void *buf, size_t count) {
ssize_t read(int fd, void *buf, size_t count) {
if (!is_zvfs_fd(fd)) {
return real_read_fn(fd, buf, count);
}
zvfs_file_t *file = fd_lookup(fd);
if (!file) {
errno = EBADF;
@@ -299,7 +365,11 @@ ssize_t zvfs_read_hook(int fd, void *buf, size_t count) {
/* POSIX hook: write */
/* ------------------------------------------------------------------ */
ssize_t zvfs_write_hook(int fd, const void *buf, size_t count) {
ssize_t write(int fd, const void *buf, size_t count) {
if (!is_zvfs_fd(fd)) {
return real_write_fn(fd, buf, count);
}
zvfs_file_t *file = fd_lookup(fd);
if (!file) {
errno = EBADF;
@@ -314,7 +384,11 @@ ssize_t zvfs_write_hook(int fd, const void *buf, size_t count) {
/* POSIX hook: close */
/* ------------------------------------------------------------------ */
int zvfs_close_hook(int fd) {
int close(int fd) {
if (!is_zvfs_fd(fd)) {
return real_close_fn(fd);
}
zvfs_file_t *file = fd_lookup(fd);
if (!file) {
errno = EBADF;
@@ -334,25 +408,64 @@ int zvfs_close_hook(int fd) {
dirent->open_count--;
if(dirent->open_count == 0 && !dirent->is_valid){
zvfs_delete(file);
/* 从 dirents 数组中移除 */
for (uint32_t i = 0; i < g_fs->dirent_count; i++) {
if (g_fs->dirents[i] == dirent) {
free(dirent);
g_fs->dirents[i] = g_fs->dirents[--g_fs->dirent_count];
g_fs->dirents[g_fs->dirent_count] = NULL;
break;
}
}
meta_save(g_fs);
}
}
free(file);
return 0;
}
/* 如果没有任何打开的文件了,保存元数据并 unmount */
if (g_fs->openfd_count == 0) {
meta_save(g_fs);
zvfs_umount(g_fs);
/* 释放所有 dirent */
for (uint32_t i = 0; i < g_fs->dirent_count; i++) {
free(g_fs->dirents[i]);
g_fs->dirents[i] = NULL;
/* ------------------------------------------------------------------ */
/* POSIX hook: unlink */
/* ------------------------------------------------------------------ */
int unlink(const char *name) {
if (!is_zvfs_path(name)) {
return real_unlink_fn(name);
}
free(g_fs);
g_fs = NULL;
g_mounted = false;
if (zvfs_ensure_mounted() != 0) {
errno = EIO;
return -1;
}
zvfs_dirent_t *dirent = dirent_find(name);
if (!dirent) {
errno = ENOENT;
return -1;
}
if (dirent->open_count > 0) {
/* 还有人打开着,延迟删除:标记无效,等最后一次 close 时再 delete blob */
dirent->is_valid = false;
} else {
/* 没人打开,直接删除 blob */
zvfs_file_t tmp = {0};
tmp.fs = g_fs;
tmp.dirent = dirent;
tmp.blob_id = dirent->blob_id;
zvfs_delete(&tmp);
/* 从 dirents 数组中移除 */
for (uint32_t i = 0; i < g_fs->dirent_count; i++) {
if (g_fs->dirents[i] == dirent) {
free(dirent);
g_fs->dirents[i] = g_fs->dirents[--g_fs->dirent_count];
g_fs->dirents[g_fs->dirent_count] = NULL;
break;
}
}
meta_save(g_fs);
}
return 0;