diff --git a/README.md b/README.md index 37f3741..2315a60 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,263 @@ cd /home/lian/share/10.1-spdk/spdk make -j make + +# sometimes dd if=/dev/zero of=/dev/nvme0n1 bs=1M count=10 LD_PRELOAD=./libzvfs.so ./func_test ``` +## 测试 +```shell +cd /home/lian/share/10.1-spdk/spdk + +export LD_LIBRARY_PATH=/home/lian/share/10.1-spdk/spdk/build/lib:/home/lian/share/10.1-spdk/spdk/dpdk/build/lib:$LD_LIBRARY_PATH +export PATH=/home/lian/share/10.1-spdk/spdk/build/bin:$PATH + +./build/bin/spdk_nvme_perf \ + -r 'trtype:PCIe traddr:0000:03:00.0' \ + -q 1 -o 4096 -w randwrite -t 5 + +root@ubuntu:/home/lian/share/10.1-spdk/spdk# ./build/bin/spdk_nvme_perf -r 'trtype:PCIe traddr:0000:03:00.0' -q 1 -o 4096 -w randwrite -t 5 +Initializing NVMe Controllers +Attached to NVMe Controller at 0000:03:00.0 [15ad:07f0] +Associating PCIE (0000:03:00.0) NSID 1 with lcore 0 +Initialization complete. Launching workers. +======================================================== + Latency(us) +Device Information : IOPS MiB/s Average min max +PCIE (0000:03:00.0) NSID 1 from core 0: 22097.20 86.32 45.21 21.93 1639.58 +======================================================== +Total : 22097.20 86.32 45.21 21.93 1639.58 + + +./build/bin/spdk_nvme_perf \ + -r 'trtype:PCIe traddr:0000:03:00.0' \ + -q 32 -o 4096 -w randwrite -t 5 + + +root@ubuntu:/home/lian/share/10.1-spdk/spdk# ./build/bin/spdk_nvme_perf -r 'trtype:PCIe traddr:0000:03:00.0' -q 32 -o 4096 -w randwrite -t 5 +Initializing NVMe Controllers +Attached to NVMe Controller at 0000:03:00.0 [15ad:07f0] +Associating PCIE (0000:03:00.0) NSID 1 with lcore 0 +Initialization complete. Launching workers. +======================================================== + Latency(us) +Device Information : IOPS MiB/s Average min max +PCIE (0000:03:00.0) NSID 1 from core 0: 80122.94 312.98 399.36 36.31 2225.64 +======================================================== +Total : 80122.94 312.98 399.36 36.31 2225.64 + + +./build/bin/spdk_nvme_perf \ + -r 'trtype:PCIe traddr:0000:03:00.0' \ + -q 1 -o 131072 -w write -t 5 + +root@ubuntu:/home/lian/share/10.1-spdk/spdk# export LD_LIBRARY_PATH=/home/lian/share/10.1-spdk/spdk/build/lib:/home/lian/share/10.1-spdk/spdk/dpdk/build/lib:$LD_LIBRARY_PATH +root@ubuntu:/home/lian/share/10.1-spdk/spdk# export PATH=/home/lian/share/10.1-spdk/spdk/build/bin:$PATH +root@ubuntu:/home/lian/share/10.1-spdk/spdk# ./build/bin/spdk_nvme_perf -r 'trtype:PCIe traddr:0000:03:00.0' -q 1 -o 131072 -w write -t 5 +Initializing NVMe Controllers +Attached to NVMe Controller at 0000:03:00.0 [15ad:07f0] +Associating PCIE (0000:03:00.0) NSID 1 with lcore 0 +Initialization complete. Launching workers. +======================================================== + Latency(us) +Device Information : IOPS MiB/s Average min max +PCIE (0000:03:00.0) NSID 1 from core 0: 14746.80 1843.35 67.79 40.16 4324.96 +======================================================== +Total : 14746.80 1843.35 67.79 40.16 4324.96 + + +./build/bin/spdk_nvme_perf \ + -r 'trtype:PCIe traddr:0000:03:00.0' \ + -q 32 -o 131072 -w write -t 5 + +root@ubuntu:/home/lian/share/10.1-spdk/spdk# ./build/bin/spdk_nvme_perf -r 'trtype:PCIe traddr:0000:03:00.0' -q 32 -o 131072 -w write -t 5 +Initializing NVMe Controllers +Attached to NVMe Controller at 0000:03:00.0 [15ad:07f0] +Associating PCIE (0000:03:00.0) NSID 1 with lcore 0 +Initialization complete. Launching workers. +======================================================== + Latency(us) +Device Information : IOPS MiB/s Average min max +PCIE (0000:03:00.0) NSID 1 from core 0: 21997.40 2749.68 1455.09 96.64 26152.13 +======================================================== +Total : 21997.40 2749.68 1455.09 96.64 26152.13 +``` +### 系统调用 +#### no O_DIRECT 小块 + +```shell +root@ubuntu:/home/lian/share/10.1-spdk/zvfs# ./func_test + +=== test_single_file_perf === +Path : /tmp/test.dat +IO size : 4 KB +Max file: 2048 MB +Duration: 10 sec + +WRITE: + total : 12668.9 MB + time : 10.003 sec + IOPS : 324211 ops/sec + BW : 1266.45 MB/s + +READ: + total : 7664.5 MB + time : 10.000 sec + IOPS : 196210 ops/sec + BW : 766.44 MB/s + +=== all tests PASSED === +``` +#### no O_DIRECT 大块 + +```shell +root@ubuntu:/home/lian/share/10.1-spdk/zvfs# ./func_test + +=== test_single_file_perf === +Path : /tmp/test.dat +IO size : 128 KB +Max file: 2048 MB +Duration: 10 sec + +WRITE: + total : 14609.5 MB + time : 10.000 sec + IOPS : 11688 ops/sec + BW : 1460.95 MB/s + +READ: + total : 8138.6 MB + time : 10.000 sec + IOPS : 6511 ops/sec + BW : 813.85 MB/s + +=== all tests PASSED === +``` + +#### O_DIRECT 小块 +```shell +root@ubuntu:/home/lian/share/10.1-spdk/zvfs# ./func_test + +=== test_single_file_perf === +Path : /tmp/test.dat +IO size : 4 KB +Max file: 2048 MB +Duration: 10 sec + +WRITE: + total : 434.5 MB + time : 10.000 sec + IOPS : 11122 ops/sec + BW : 43.45 MB/s + +READ: + total : 373.8 MB + time : 10.000 sec + IOPS : 9568 ops/sec + BW : 37.38 MB/s + +=== all tests PASSED === +``` +#### O_DIRECT 大块 +```shell +root@ubuntu:/home/lian/share/10.1-spdk/zvfs# ./func_test + +=== test_single_file_perf === +Path : /tmp/test.dat +IO size : 128 KB +Max file: 2048 MB +Duration: 10 sec + +WRITE: + total : 7245.4 MB + time : 10.000 sec + IOPS : 5796 ops/sec + BW : 724.53 MB/s + +READ: + total : 9006.5 MB + time : 10.000 sec + IOPS : 7205 ops/sec + BW : 900.64 MB/s + +=== all tests PASSED === +``` + +### SPDK +#### 非对齐 +```shell +root@ubuntu:/home/lian/share/10.1-spdk/zvfs# LD_PRELOAD=./libzvfs.so ./func_test /zvfs + +=== test_single_file_perf === +Path : /zvfs/file.dat +IO size : 128 KB +Max file: 2048 MB +Duration: 10 sec + +WRITE: + total : 10304.0 MB + time : 10.000 sec + IOPS : 8243 ops/sec + BW : 1030.40 MB/s + +READ: + total : 17788.5 MB + time : 10.000 sec + IOPS : 14231 ops/sec + BW : 1778.85 MB/s + +=== all tests PASSED === +``` +#### 全对齐大块 +```shell +root@ubuntu:/home/lian/share/10.1-spdk/zvfs# LD_PRELOAD=./libzvfs.so ./func_test /zvfs + +=== test_single_file_perf === +Path : /zvfs/file.dat +IO size : 128 KB +Max file: 2048 MB +Duration: 10 sec + +WRITE: + total : 16624.4 MB + time : 10.000 sec + IOPS : 13299 ops/sec + BW : 1662.43 MB/s + +READ: + total : 16430.8 MB + time : 10.000 sec + IOPS : 13145 ops/sec + BW : 1643.07 MB/s + +=== all tests PASSED === +``` + +#### 全对齐小块 +```shell +root@ubuntu:/home/lian/share/10.1-spdk/zvfs# LD_PRELOAD=./libzvfs.so ./func_test /zvfs + +=== test_single_file_perf === +Path : /zvfs/file.dat +IO size : 4 KB +Max file: 2048 MB +Duration: 10 sec + +WRITE: + total : 944.5 MB + time : 10.000 sec + IOPS : 24179 ops/sec + BW : 94.45 MB/s + +READ: + total : 982.8 MB + time : 10.000 sec + IOPS : 25159 ops/sec + BW : 98.28 MB/s + +=== all tests PASSED === +``` ## SPDK 1. blob_store: blob仓库,管理多个blob对象。 diff --git a/func_test.c b/func_test.c index 0c004c5..c4f4443 100644 --- a/func_test.c +++ b/func_test.c @@ -1,12 +1,113 @@ +#define _GNU_SOURCE + #include #include #include #include #include +#include +#include -int main(int argc, char **argv) +static double time_diff_sec(struct timespec a, struct timespec b) { - const char *path = "/zvfs/func_test.dat"; + return (b.tv_sec - a.tv_sec) + + (b.tv_nsec - a.tv_nsec) / 1000000000.0; +} + +static int test_single_file_perf(const char *path) +{ + // size_t io_size = 128 * 1024; + size_t io_size = 4096; + size_t max_size = 2ULL * 1024 * 1024 * 1024; /* 最大 2GB,循环覆写 */ + size_t max_count = max_size / io_size; + int test_sec = 10; + + int direct = 0; + + printf("\n=== test_single_file_perf ===\n"); + printf("Path : %s\n", path); + printf("IO size : %zu KB\n", io_size / 1024); + printf("Max file: %zu MB\n", max_size / 1024 / 1024); + printf("Duration: %d sec\n", test_sec); + + unlink(path); + char *buf = aligned_alloc(4096, io_size); + if (!buf) { perror("aligned_alloc"); return 1; } + memset(buf, 'A', io_size); + + struct timespec t1, t2, now; + + /* ================= WRITE ================= */ + int fd = open(path, O_CREAT | O_RDWR | direct, 0644); + if (fd < 0) { perror("open write"); free(buf); return 1; } + + clock_gettime(CLOCK_MONOTONIC, &t1); + size_t wcount = 0; + size_t wpos = 0; /* 当前写位置(以块为单位) */ + do { + /* 超过最大文件大小,seek 回头循环覆写 */ + if (wpos >= max_count) { + lseek(fd, 0, SEEK_SET); + wpos = 0; + } + if (write(fd, buf, io_size) != (ssize_t)io_size) { + perror("write"); + close(fd); + free(buf); + return 2; + } + wcount++; + wpos++; + clock_gettime(CLOCK_MONOTONIC, &now); + } while (time_diff_sec(t1, now) < test_sec); + clock_gettime(CLOCK_MONOTONIC, &t2); + close(fd); + + double wsec = time_diff_sec(t1, t2); + double wmb = (double)(wcount * io_size) / (1024.0 * 1024.0); + printf("\nWRITE:\n"); + printf(" total : %.1f MB\n", wmb); + printf(" time : %.3f sec\n", wsec); + printf(" IOPS : %.0f ops/sec\n", wcount / wsec); + printf(" BW : %.2f MB/s\n", wmb / wsec); + + /* ================= READ ================= */ + fd = open(path, O_RDONLY | direct); + if (fd < 0) { perror("open read"); free(buf); return 3; } + + clock_gettime(CLOCK_MONOTONIC, &t1); + size_t rcount = 0; + do { + ssize_t r = read(fd, buf, io_size); + if (r <= 0) { + lseek(fd, 0, SEEK_SET); + continue; + } + rcount++; + clock_gettime(CLOCK_MONOTONIC, &now); + } while (time_diff_sec(t1, now) < test_sec); + clock_gettime(CLOCK_MONOTONIC, &t2); + close(fd); + + double rsec = time_diff_sec(t1, t2); + double rmb = (double)(rcount * io_size) / (1024.0 * 1024.0); + printf("\nREAD:\n"); + printf(" total : %.1f MB\n", rmb); + printf(" time : %.3f sec\n", rsec); + printf(" IOPS : %.0f ops/sec\n", rcount / rsec); + printf(" BW : %.2f MB/s\n", rmb / rsec); + + unlink(path); + free(buf); + return 0; +} + +/* ------------------------------------------------------------------ */ +/* Test 1: 原有基础测试 */ +/* ------------------------------------------------------------------ */ +static int test_basic(const char *path) +{ + printf("\n=== test_basic ===\n"); printf("open: %s\n", path); int fd = open(path, O_CREAT | O_RDWR | O_TRUNC, 0644); @@ -24,7 +125,6 @@ int main(int argc, char **argv) close(fd); - fd = open(path, O_RDONLY); if (fd < 0) { perror("open R"); return 3; } @@ -45,4 +145,266 @@ int main(int argc, char **argv) if (unlink(path) != 0) { perror("unlink"); return 5; } printf("unlink: ok\n"); return 0; +} + + +/* ------------------------------------------------------------------ */ +/* Test 2: lseek */ +/* ------------------------------------------------------------------ */ +static int test_lseek(const char *path) +{ + printf("\n=== test_lseek ===\n"); + + int fd = open(path, O_CREAT | O_RDWR | O_TRUNC, 0644); + if (fd < 0) { perror("open"); return 1; } + + /* 写入 26 个字母 */ + const char *alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + if (write(fd, alpha, 26) != 26) { perror("write"); return 2; } + printf("write 26 bytes: %s\n", alpha); + + /* SEEK_SET: 跳到第 0 字节,读 5 个 */ + off_t pos = lseek(fd, 0, SEEK_SET); + printf("lseek SEEK_SET 0 -> %ld\n", (long)pos); + char buf[32] = {0}; + ssize_t r = read(fd, buf, 5); + printf("read 5 bytes: %.*s (expect: ABCDE)\n", (int)r, buf); + + /* SEEK_CUR: 从当前位置(5)再向前跳 3 个,应读到第 8 字节 'I' */ + pos = lseek(fd, 3, SEEK_CUR); + printf("lseek SEEK_CUR +3 -> %ld\n", (long)pos); + memset(buf, 0, sizeof(buf)); + r = read(fd, buf, 5); + printf("read 5 bytes: %.*s (expect: IJKLM)\n", (int)r, buf); + + /* SEEK_END: 从文件尾往回 5 字节,读到最后 5 个字母 */ + pos = lseek(fd, -5, SEEK_END); + printf("lseek SEEK_END -5 -> %ld\n", (long)pos); + memset(buf, 0, sizeof(buf)); + r = read(fd, buf, 10); /* 最多只剩 5 字节 */ + printf("read %zd bytes: %.*s (expect: VWXYZ)\n", r, (int)r, buf); + + /* SEEK_SET 超过文件末尾:写一个字节,制造"空洞" */ + pos = lseek(fd, 30, SEEK_SET); + printf("lseek SEEK_SET 30 -> %ld\n", (long)pos); + if (write(fd, "!", 1) != 1) { perror("write hole"); return 3; } + + /* 重新读回空洞区域,[26,29] 应为 0x00 */ + lseek(fd, 26, SEEK_SET); + memset(buf, 0xAA, sizeof(buf)); + r = read(fd, buf, 5); + printf("read hole+1: %zd bytes, hole[0]=%02X hole[1]=%02X hole[2]=%02X " + "hole[3]=%02X last='%c' (expect: 00 00 00 00 '!')\n", + r, (unsigned char)buf[0], (unsigned char)buf[1], + (unsigned char)buf[2], (unsigned char)buf[3], buf[4]); + + close(fd); + unlink(path); + return 0; +} + +/* ------------------------------------------------------------------ */ +/* Test 3: 同一文件被两个 fd 同时打开(一读一写) */ +/* ------------------------------------------------------------------ */ +static int test_dual_open_same_file(const char *path) +{ + printf("\n=== test_dual_open_same_file ===\n"); + + /* 先创建并写入初始内容 */ + int fd_init = open(path, O_CREAT | O_RDWR | O_TRUNC, 0644); + if (fd_init < 0) { perror("open init"); return 1; } + const char *init = "0123456789"; + if (write(fd_init, init, 10) != 10) { perror("write init"); return 2; } + close(fd_init); + + /* 用写句柄和读句柄分别打开同一文件 */ + int fd_w = open(path, O_WRONLY); + if (fd_w < 0) { perror("open W"); return 3; } + + int fd_r = open(path, O_RDONLY); + if (fd_r < 0) { perror("open R"); return 4; } + + printf("fd_w=%d fd_r=%d\n", fd_w, fd_r); + + /* 通过写句柄覆写前 5 字节 */ + if (write(fd_w, "HELLO", 5) != 5) { perror("write"); return 5; } + printf("write via fd_w: HELLO (overwrite first 5 bytes)\n"); + + /* 通过读句柄从头读回全部内容 */ + char buf[32] = {0}; + lseek(fd_r, 0, SEEK_SET); + ssize_t r = read(fd_r, buf, sizeof(buf)); + printf("read via fd_r: %zd bytes: %.*s (expect: HELLO56789)\n", + r, (int)r, buf); + + /* 继续用写句柄追加 */ + lseek(fd_w, 0, SEEK_END); + if (write(fd_w, "!!!", 3) != 3) { perror("write append"); return 6; } + printf("write append via fd_w: !!!\n"); + + /* 读句柄读追加部分 */ + lseek(fd_r, 10, SEEK_SET); + memset(buf, 0, sizeof(buf)); + r = read(fd_r, buf, sizeof(buf)); + printf("read appended via fd_r: %zd bytes: %.*s (expect: !!!)\n", + r, (int)r, buf); + + close(fd_w); + close(fd_r); + unlink(path); + return 0; +} + + +/* ------------------------------------------------------------------ */ +/* Test 4: 两个不同文件同时打开,分别读写 */ +/* ------------------------------------------------------------------ */ +static int test_two_files(const char *path_a, const char *path_b) +{ + printf("\n=== test_two_files ===\n"); + + int fd_a = open(path_a, O_CREAT | O_RDWR | O_TRUNC, 0644); + if (fd_a < 0) { perror("open A"); return 1; } + + int fd_b = open(path_b, O_CREAT | O_RDWR | O_TRUNC, 0644); + if (fd_b < 0) { perror("open B"); return 2; } + + printf("fd_a=%d fd_b=%d\n", fd_a, fd_b); + + /* 分别写入不同内容 */ + const char *data_a = "File-A: Hello World!"; + const char *data_b = "File-B: Goodbye World!"; + if (write(fd_a, data_a, strlen(data_a)) < 0) { perror("write A"); return 3; } + if (write(fd_b, data_b, strlen(data_b)) < 0) { perror("write B"); return 4; } + printf("write A: %s\n", data_a); + printf("write B: %s\n", data_b); + + /* 各自 seek 回头读取,验证内容互不干扰 */ + lseek(fd_a, 0, SEEK_SET); + lseek(fd_b, 0, SEEK_SET); + + char buf_a[64] = {0}; + char buf_b[64] = {0}; + ssize_t r_a = read(fd_a, buf_a, sizeof(buf_a)); + ssize_t r_b = read(fd_b, buf_b, sizeof(buf_b)); + + printf("read A: %zd bytes: %.*s\n", r_a, (int)r_a, buf_a); + printf("read B: %zd bytes: %.*s\n", r_b, (int)r_b, buf_b); + + int ok = 1; + if (strncmp(buf_a, data_a, strlen(data_a)) != 0) { + printf("FAIL: A content mismatch!\n"); ok = 0; + } + if (strncmp(buf_b, data_b, strlen(data_b)) != 0) { + printf("FAIL: B content mismatch!\n"); ok = 0; + } + if (ok) printf("PASS: both files read back correctly\n"); + + /* 交叉写:向 A 追加,向 B 中段覆写,再各自读回验证 */ + lseek(fd_a, 0, SEEK_END); + write(fd_a, "[A-TAIL]", 8); + + lseek(fd_b, 8, SEEK_SET); /* "File-B: " 之后 */ + write(fd_b, "Hi! ", 7); /* 覆写 "Goodbye" */ + + lseek(fd_a, 0, SEEK_SET); + lseek(fd_b, 0, SEEK_SET); + memset(buf_a, 0, sizeof(buf_a)); + memset(buf_b, 0, sizeof(buf_b)); + r_a = read(fd_a, buf_a, sizeof(buf_a)); + r_b = read(fd_b, buf_b, sizeof(buf_b)); + printf("after cross-write:\n"); + printf(" A: %.*s\n", (int)r_a, buf_a); + printf(" B: %.*s\n", (int)r_b, buf_b); + + close(fd_a); + close(fd_b); + unlink(path_a); + unlink(path_b); + return 0; +} + +static int test_write_file(const char *path) +{ + printf("\n=== test_write_file ===\n"); + + int fd = open(path, O_CREAT | O_RDWR, 0644); + if (fd < 0) { perror("open"); return 1; } + printf("open: %s fd=%d\n", path, fd); + + const char *msg = "Hello, zvfs!"; + ssize_t w = write(fd, msg, strlen(msg)); + if (w < 0) { perror("write"); close(fd); return 2; } + printf("write: %zd bytes: %s\n", w, msg); + + close(fd); + printf("close: ok\n"); + return 0; +} + +static int test_read_delete_file(const char *path) +{ + printf("\n=== test_read_delete_file ===\n"); + + int fd = open(path, O_RDONLY); + if (fd < 0) { perror("open"); return 1; } + printf("open: %s fd=%d\n", path, fd); + + char buf[256] = {0}; + ssize_t r = read(fd, buf, sizeof(buf)); + if (r < 0) { perror("read"); close(fd); return 2; } + printf("read: %zd bytes: %.*s\n", r, (int)r, buf); + + close(fd); + printf("close: ok\n"); + + if (unlink(path) != 0) { perror("unlink"); return 3; } + printf("unlink: ok\n"); + + return 0; +} + +/* ------------------------------------------------------------------ */ +/* main */ +/* ------------------------------------------------------------------ */ +int main(int argc, char **argv) +{ + int rc = 0; + + + char path[256]; + char path_a[256]; + char path_b[256]; + if(argc >= 2){ + sprintf(path, "%s/file.dat", argv[1]); + sprintf(path_a, "%s/file_a.dat", argv[1]); + sprintf(path_b, "%s/file_a.dat", argv[1]); + }else { + sprintf(path, "/tmp/test.dat"); + } + + if(argc == 3){ + int choose = atoi(argv[2]); + if(choose == 0){ + rc = test_write_file(path); + }else if(choose == 1){ + rc = test_read_delete_file(path); + } + return rc; + } + + // printf("argv[0]: %s\n", argv[0]); + // printf("argv[1]: %s\n", argv[1]); + // printf("path_a: %s\n", path_a); + // printf("path_b: %s\n", path_b); + + // rc |= test_basic(path); + // rc |= test_lseek(path); + // rc |= test_dual_open_same_file(path); + // rc |= test_two_files(path_a, path_b); + rc |= test_single_file_perf(path); + + + printf("\n=== all tests %s ===\n", rc == 0 ? "PASSED" : "FAILED"); + return rc; } \ No newline at end of file diff --git a/zvfs.c b/zvfs.c index 2156251..6c67f59 100755 --- a/zvfs.c +++ b/zvfs.c @@ -4,6 +4,10 @@ #undef SPDK_DEBUGLOG #define SPDK_DEBUGLOG(...) do {} while(0) +#define ZVFS_BDEV "Nvme0n1" +#ifndef ZVFS_BDEV +#define ZVFS_BDEV "Malloc0" +#endif struct spdk_thread *global_thread = NULL; const char *json_file = "/home/lian/share/10.1-spdk/zvfs/zvfs.json"; @@ -114,18 +118,29 @@ bool waiter(struct spdk_thread *thread, spdk_msg_fn start_fn, void *ctx, bool *f /* MOUNT */ /* ================================================================== */ void zvfs_do_mount(void *arg) { - zvfs_t *fs = (zvfs_t*)arg; - struct spdk_bs_dev *bs_dev = NULL; - int rc = spdk_bdev_create_bs_dev_ext("Malloc0", zvfs_spdk_bdev_event_cb, NULL, &bs_dev); + // SPDK_DEBUGLOG("=== Listing ALL bdevs after JSON load ===\n"); + // struct spdk_bdev *bdev = spdk_bdev_first(); + // while (bdev) { + // SPDK_DEBUGLOG("Found bdev: [%s] product: %s\n", + // spdk_bdev_get_name(bdev), + // spdk_bdev_get_product_name(bdev)); + // bdev = spdk_bdev_next(bdev); + // } + // SPDK_DEBUGLOG("---------------------------------\n"); + // SPDK_DEBUGLOG("Trying to open: %s\n", ZVFS_BDEV); + + int rc = spdk_bdev_create_bs_dev_ext(ZVFS_BDEV, zvfs_spdk_bdev_event_cb, NULL, &bs_dev); if (rc != 0) { - spdk_app_stop(0); + SPDK_ERRLOG("=== bdev_open FAILED rc=%d (probably still not registered) ===\n", rc); + fs->finished = true; + spdk_app_stop(-1); + return; } fs->bs_dev = bs_dev; - // spdk_bs_init(bs_dev, NULL, zvfs_spdk_bs_init_cb, fs); spdk_bs_load(bs_dev, NULL, zvfs_spdk_bs_load_cb, fs); } @@ -136,7 +151,7 @@ void zvfs_spdk_bs_load_cb(void *arg, struct spdk_blob_store *bs, int bserrno) { SPDK_DEBUGLOG("load failed, new device, re-create bs_dev and init\n"); struct spdk_bs_dev *bs_dev = NULL; - int rc = spdk_bdev_create_bs_dev_ext("Malloc0", zvfs_spdk_bdev_event_cb, NULL, &bs_dev); + int rc = spdk_bdev_create_bs_dev_ext(ZVFS_BDEV, zvfs_spdk_bdev_event_cb, NULL, &bs_dev); if (rc != 0) { SPDK_ERRLOG("re-create bs_dev failed\n"); spdk_app_stop(-1); @@ -150,7 +165,8 @@ void zvfs_spdk_bs_load_cb(void *arg, struct spdk_blob_store *bs, int bserrno) { uint64_t io_unit_size = spdk_bs_get_io_unit_size(bs); SPDK_DEBUGLOG("io_unit_size : %"PRIu64"\n", io_unit_size); - + SPDK_NOTICELOG("io_unit_size=%lu\n", io_unit_size); + fs->io_unit_size = io_unit_size; fs->bs = bs; fs->channel = spdk_bs_alloc_io_channel(fs->bs); @@ -167,6 +183,7 @@ void zvfs_spdk_bs_init_cb(void *arg, struct spdk_blob_store *bs, int bserrno) { uint64_t io_unit_size = spdk_bs_get_io_unit_size(bs); SPDK_DEBUGLOG("io_unit_size : %"PRIu64"\n", io_unit_size); + SPDK_NOTICELOG("io_unit_size=%lu\n", io_unit_size); fs->io_unit_size = io_unit_size; fs->bs = bs; @@ -370,17 +387,38 @@ void zvfs_do_write(void *arg) { return; } - /* - * 先把涉及的扇区读出,read 完成后在 preread_cb 里 patch 数据再写。 - * 注意:把用户数据暂存在 file->write_buf / write_count, - * 或者借用 file->io_count(io_count 不变)。 - * 这里我们把用户数据已经由上层调用者拷贝到了 write_staging_buf, - */ - /* 不管是否需要扩容,先 preread */ - spdk_blob_io_read(file->blob, file->fs->channel, - file->dma_buf, - lba, lba_count, - zvfs_spdk_blob_write_preread_cb, file); + file->aligned = (file->current_offset % io_unit == 0) && + (file->io_count % io_unit == 0); + + // static uint64_t aligned_count = 0; + // static uint64_t unaligned_count = 0; + // if (aligned) { + // aligned_count++; + // } else { + // unaligned_count++; + // } + // if ((aligned_count + unaligned_count) % 1000 == 0) { + // printf("aligned=%lu unaligned=%lu\n", aligned_count, unaligned_count); + // } + + if (file->aligned) { + /* 直接把用户数据拷到 dma_buf,跳过 preread */ + memcpy(file->dma_buf, file->write_staging_buf, file->io_count); + /* 直接进 preread_cb 的后半段逻辑(扩容判断+写) */ + zvfs_spdk_blob_write_preread_cb(file, 0); + } else { + /* + * 先把涉及的扇区读出,read 完成后在 preread_cb 里 patch 数据再写。 + * 注意:把用户数据暂存在 file->write_buf / write_count, + * 或者借用 file->io_count(io_count 不变)。 + * 这里我们把用户数据已经由上层调用者拷贝到了 write_staging_buf, + */ + /* 不管是否需要扩容,先 preread */ + spdk_blob_io_read(file->blob, file->fs->channel, + file->dma_buf, + lba, lba_count, + zvfs_spdk_blob_write_preread_cb, file); + } } /* Step 2 : preread 完成,patch dma_buf,然后决定是否扩容 */ @@ -393,11 +431,15 @@ void zvfs_spdk_blob_write_preread_cb(void *arg, int bserrno){ SPDK_DEBUGLOG("preread error %d (may be uninitialized, continue)\n", bserrno); } - /* patch:把用户数据覆写到 dma_buf 的正确偏移处 */ - uint64_t page_off = file->current_offset % file->fs->io_unit_size; - memcpy((uint8_t *)file->dma_buf + page_off, - file->write_staging_buf, - file->io_count); + /* 只有非对齐情况才需要 patch,对齐情况下数据已经在 dma_buf 里了(do_write 里拷好的)*/ + uint64_t io_unit = file->fs->io_unit_size; + + if (!file->aligned) { + uint64_t page_off = file->current_offset % io_unit; + memcpy((uint8_t *)file->dma_buf + page_off, + file->write_staging_buf, + file->io_count); + } /* 判断是否需要扩容 */ uint64_t end_byte = file->current_offset + file->io_count; @@ -547,7 +589,6 @@ void zvfs_spdk_bs_unload_cb(void *arg, int bserrno) { // setup // zvfs.json - int zvfs_env_setup(void) { struct spdk_env_opts opts; spdk_env_opts_init(&opts); @@ -572,10 +613,26 @@ int zvfs_env_setup(void) { spdk_set_thread(global_thread); bool done = false; - waiter(global_thread, zvfs_json_load_fn, &done, &done); - SPDK_DEBUGLOG("zvfs_env_setup complete\n"); + int retry = 0; + while (retry < 200) { // 最多等 20 秒 + spdk_thread_poll(global_thread, 0, 0); + if (spdk_bdev_get_by_name(ZVFS_BDEV) != NULL) { + SPDK_DEBUGLOG("bdev %s ready!\n", ZVFS_BDEV); + break; + } + usleep(100 * 1000); // 100ms + retry++; + } + + if (spdk_bdev_get_by_name(ZVFS_BDEV) == NULL) { + SPDK_ERRLOG("bdev %s not found after 20s timeout!\n", ZVFS_BDEV); + return -1; + } + + + SPDK_DEBUGLOG("zvfs_env_setup complete\n"); return 0; } @@ -586,9 +643,12 @@ void zvfs_json_load_fn(void *arg) { } void json_app_load_done(int rc, void *ctx) { - bool *done = ctx; - *done = true; - SPDK_DEBUGLOG("json_app_load_done\n"); + bool *done = ctx; + if (rc != 0) { + SPDK_ERRLOG("JSON config load FAILED! rc=%d\n", rc); + } + // 不要 sleep!直接标记完成,让外部 waiter 去轮询 + *done = true; } @@ -600,14 +660,14 @@ void json_app_load_done(int rc, void *ctx) { int zvfs_mount(struct zvfs_s *fs) { fs->finished = false; bool ok = waiter(global_thread, zvfs_do_mount, fs, &fs->finished); - SPDK_DEBUGLOG("mount finished\n"); + if(!ok) SPDK_ERRLOG("mount result: ok=%d\n", ok); return ok; } // unload int zvfs_umount(struct zvfs_s *fs) { fs->finished = false; bool ok = waiter(global_thread, zvfs_do_umount, fs, &fs->finished); - SPDK_DEBUGLOG("umount finished\n"); + if(!ok) SPDK_ERRLOG("umount result: ok=%d\n", ok); return ok; } // file @@ -615,14 +675,14 @@ int zvfs_umount(struct zvfs_s *fs) { int zvfs_create(struct zvfs_file_s *file) { file->finished = false; bool ok = waiter(global_thread, zvfs_do_create, file, &file->finished); - SPDK_DEBUGLOG("create finished\n"); + if(!ok) SPDK_ERRLOG("create result: ok=%d\n", ok); return ok; } // open int zvfs_open(struct zvfs_file_s *file) { file->finished = false; bool ok = waiter(global_thread, zvfs_do_open, file, &file->finished); - SPDK_DEBUGLOG("open finished\n"); + if(!ok) SPDK_ERRLOG("open result: ok=%d\n", ok); return ok; } // read @@ -632,6 +692,7 @@ int zvfs_read(struct zvfs_file_s *file, uint8_t *buffer, size_t count) { file->finished = false; bool ok = waiter(global_thread, zvfs_do_read, file, &file->finished); + if(!ok) SPDK_ERRLOG("read result: ok=%d\n", ok); if (!ok || file->actual_io_count == 0) return -1; /* @@ -649,7 +710,6 @@ int zvfs_read(struct zvfs_file_s *file, uint8_t *buffer, size_t count) { (uint8_t *)file->dma_buf + page_off, file->actual_io_count); - SPDK_DEBUGLOG("read finished\n"); return (int)file->actual_io_count; } // write @@ -659,21 +719,21 @@ int zvfs_write(struct zvfs_file_s *file, const uint8_t *buffer, size_t count) { file->finished = false; bool ok = waiter(global_thread, zvfs_do_write, file, &file->finished); - SPDK_DEBUGLOG("write finished\n"); + if(!ok) SPDK_ERRLOG("write result: ok=%d\n", ok); return ok ? (int)count : -1; } // close int zvfs_close(struct zvfs_file_s *file) { file->finished = false; bool ok = waiter(global_thread, zvfs_do_close, file, &file->finished); - SPDK_DEBUGLOG("close finished\n"); + if(!ok) SPDK_ERRLOG("close result: ok=%d\n", ok); return ok; } // delete int zvfs_delete(struct zvfs_file_s *file) { file->finished = false; bool ok = waiter(global_thread, zvfs_do_delete, file, &file->finished); - SPDK_DEBUGLOG("delete finished\n"); + if(!ok) SPDK_ERRLOG("delete result: ok=%d\n", ok); return ok; } diff --git a/zvfs.h b/zvfs.h index b141d6b..d805180 100644 --- a/zvfs.h +++ b/zvfs.h @@ -15,7 +15,7 @@ extern const char *json_file; extern struct spdk_thread *global_thread; -static const int WAITER_MAX_TIME = 100000; +static const int WAITER_MAX_TIME = 10000000; /* 目录项(内存中的目录) */ typedef struct { @@ -69,6 +69,7 @@ typedef struct zvfs_file_s { size_t actual_io_count; const uint8_t *write_staging_buf; + int aligned; size_t io_count; bool finished; @@ -92,5 +93,6 @@ ssize_t read(int fd, void *buf, size_t count); ssize_t write(int fd, const void *buf, size_t count); int close(int fd); int unlink(const char *name); +off_t lseek(int fd, off_t offset, int whence); #endif \ No newline at end of file diff --git a/zvfs.json b/zvfs.json index 10ded9d..c942d16 100644 --- a/zvfs.json +++ b/zvfs.json @@ -4,14 +4,14 @@ "subsystem": "bdev", "config": [ { - "method": "bdev_malloc_create", + "method": "bdev_nvme_attach_controller", "params": { - "name": "Malloc0", - "num_blocks": 32768, - "block_size": 512 + "name": "Nvme0", + "trtype": "PCIe", + "traddr": "0000:03:00.0" } } ] } ] -} +} \ No newline at end of file diff --git a/zvfs.old/zvfs.json b/zvfs.old/zvfs.json index 10ded9d..9f788d7 100644 --- a/zvfs.old/zvfs.json +++ b/zvfs.old/zvfs.json @@ -7,7 +7,7 @@ "method": "bdev_malloc_create", "params": { "name": "Malloc0", - "num_blocks": 32768, + "num_blocks": 655360, "block_size": 512 } } diff --git a/zvfs_hook.c b/zvfs_hook.c index 86fc660..7607b95 100644 --- a/zvfs_hook.c +++ b/zvfs_hook.c @@ -36,6 +36,7 @@ static ssize_t (*real_read_fn) (int, void*, size_t) = NULL; static ssize_t (*real_write_fn)(int, const void*, size_t) = NULL; static int (*real_close_fn)(int) = NULL; static int (*real_unlink_fn)(const char *name) = NULL; +static off_t (*real_lseek_fn)(int fd, off_t offset, int whence) = NULL; __attribute__((constructor)) static void zvfs_preload_init(void) { @@ -44,6 +45,7 @@ static void zvfs_preload_init(void) { real_write_fn = dlsym(RTLD_NEXT, "write"); real_close_fn = dlsym(RTLD_NEXT, "close"); real_unlink_fn= dlsym(RTLD_NEXT, "unlink"); + real_lseek_fn = dlsym(RTLD_NEXT, "lseek"); } /* 判断路径是否由我们接管 */ @@ -173,6 +175,7 @@ static int zvfs_ensure_mounted(void) { } if (!zvfs_mount(g_fs)) { + zvfs_umount(g_fs); free(g_fs); g_fs = NULL; return -1; @@ -255,7 +258,12 @@ static zvfs_file_t *fd_lookup(int pseudo_fd) { /* ------------------------------------------------------------------ */ /* POSIX hook: open */ /* ------------------------------------------------------------------ */ - +/** + * O_RDONLY + * O_WRONLY + * O_RDWR + * O_CREAT + */ int open(const char *path, int flags, ...) { if (!is_zvfs_path(path)) { mode_t mode = 0; @@ -469,4 +477,49 @@ int unlink(const char *name) { } return 0; +} + +/* ------------------------------------------------------------------ */ +/* POSIX hook: unlink */ +/* ------------------------------------------------------------------ */ +/** + * SEEK_SET + * SEEK_CUR + * SEEK_END + */ +off_t lseek(int fd, off_t offset, int whence){ + if (!is_zvfs_fd(fd)) { + return real_lseek_fn(fd, offset, whence); + } + + zvfs_file_t *file = fd_lookup(fd); + if (!file) { errno = EBADF; return -1; } + + off_t new_offset; + uint64_t file_size = file->dirent ? file->dirent->file_size : 0; + + switch (whence) + { + case SEEK_SET: + new_offset = offset; + break; + case SEEK_CUR: + new_offset = (off_t)file->current_offset + offset; + break; + case SEEK_END: + new_offset = (off_t)file_size + offset; + break; + + default: + errno = EINVAL; + return -1; + } + + if (new_offset < 0) { + errno = EINVAL; + return -1; + } + + file->current_offset = (uint64_t)new_offset; + return new_offset; } \ No newline at end of file