fio & pgbench
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -4,3 +4,4 @@
|
||||
|
||||
codex/
|
||||
tests/bin
|
||||
zvfs_daemon
|
||||
146
README.md
146
README.md
@@ -179,6 +179,152 @@ LD_PRELOAD=./src/libzvfs.so ZVFS_TEST_ROOT=/zvfs ./tests/bin/hook_api_test
|
||||
- 相对同机 `O_DIRECT`,顺序写吞吐可有约 `2.2x~2.3x` 提升。
|
||||
- 非对齐写因 RMW 开销,吞吐明显下降。
|
||||
|
||||
### 5.4 fio
|
||||
```shell
|
||||
root@ubuntu20:/home/lian/try/zvfs# fio ./fio_script/psync.fio
|
||||
test: (g=0): rw=randwrite, bs=(R) 16.0KiB-16.0KiB, (W) 16.0KiB-16.0KiB, (T) 16.0KiB-16.0KiB, ioengine=psync, iodepth=64
|
||||
fio-3.16
|
||||
Starting 1 thread
|
||||
Jobs: 1 (f=1): [w(1)][100.0%][w=53.0MiB/s][w=3455 IOPS][eta 00m:00s]
|
||||
test: (groupid=0, jobs=1): err= 0: pid=23035: Fri Mar 13 13:25:32 2026
|
||||
Description : ["variable bs"]
|
||||
write: IOPS=3644, BW=56.9MiB/s (59.7MB/s)(570MiB/10001msec); 0 zone resets
|
||||
clat (usec): min=127, max=3496, avg=272.07, stdev=91.62
|
||||
lat (usec): min=128, max=3497, avg=272.41, stdev=91.69
|
||||
clat percentiles (usec):
|
||||
| 1.00th=[ 155], 5.00th=[ 169], 10.00th=[ 184], 20.00th=[ 202],
|
||||
| 30.00th=[ 225], 40.00th=[ 249], 50.00th=[ 262], 60.00th=[ 277],
|
||||
| 70.00th=[ 293], 80.00th=[ 322], 90.00th=[ 371], 95.00th=[ 420],
|
||||
| 99.00th=[ 545], 99.50th=[ 611], 99.90th=[ 881], 99.95th=[ 1467],
|
||||
| 99.99th=[ 2409]
|
||||
bw ( KiB/s): min=49376, max=70387, per=99.97%, avg=58295.30, stdev=6828.37, samples=20
|
||||
iops : min= 3086, max= 4399, avg=3643.40, stdev=426.71, samples=20
|
||||
lat (usec) : 250=41.44%, 500=56.78%, 750=1.59%, 1000=0.10%
|
||||
lat (msec) : 2=0.07%, 4=0.01%
|
||||
cpu : usr=1.82%, sys=37.19%, ctx=54169, majf=0, minf=0
|
||||
IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
|
||||
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
|
||||
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
|
||||
issued rwts: total=0,36448,0,0 short=0,0,0,0 dropped=0,0,0,0
|
||||
latency : target=0, window=0, percentile=100.00%, depth=64
|
||||
|
||||
Run status group 0 (all jobs):
|
||||
WRITE: bw=56.9MiB/s (59.7MB/s), 56.9MiB/s-56.9MiB/s (59.7MB/s-59.7MB/s), io=570MiB (597MB), run=10001-10001msec
|
||||
|
||||
Disk stats (read/write):
|
||||
dm-0: ios=122/36085, merge=0/0, ticks=32/7748, in_queue=7780, util=99.13%, aggrios=109/36455, aggrmerge=13/12, aggrticks=28/7260, aggrin_queue=0, aggrutil=98.95%
|
||||
sda: ios=109/36455, merge=13/12, ticks=28/7260, in_queue=0, util=98.95%
|
||||
root@ubuntu20:/home/lian/try/zvfs# LD_PRELOAD=/home/lian/try/zvfs/src/libzvfs.so fio ./fio_script/zvfs.fio
|
||||
test: (g=0): rw=randwrite, bs=(R) 16.0KiB-16.0KiB, (W) 16.0KiB-16.0KiB, (T) 16.0KiB-16.0KiB, ioengine=psync, iodepth=64
|
||||
fio-3.16
|
||||
Starting 1 thread
|
||||
Jobs: 1 (f=1): [w(1)][100.0%][w=10.2MiB/s][w=650 IOPS][eta 00m:00s]
|
||||
test: (groupid=0, jobs=1): err= 0: pid=23891: Fri Mar 13 13:26:54 2026
|
||||
Description : ["variable bs"]
|
||||
write: IOPS=521, BW=8345KiB/s (8545kB/s)(81.5MiB/10001msec); 0 zone resets
|
||||
clat (usec): min=529, max=52465, avg=1909.16, stdev=1181.14
|
||||
lat (usec): min=530, max=52467, avg=1909.72, stdev=1181.21
|
||||
clat percentiles (usec):
|
||||
| 1.00th=[ 734], 5.00th=[ 922], 10.00th=[ 1037], 20.00th=[ 1237],
|
||||
| 30.00th=[ 1418], 40.00th=[ 1500], 50.00th=[ 1614], 60.00th=[ 1860],
|
||||
| 70.00th=[ 2024], 80.00th=[ 2311], 90.00th=[ 3130], 95.00th=[ 3982],
|
||||
| 99.00th=[ 5669], 99.50th=[ 5932], 99.90th=[ 6456], 99.95th=[ 6849],
|
||||
| 99.99th=[52691]
|
||||
bw ( KiB/s): min= 4704, max=11200, per=99.95%, avg=8339.75, stdev=2577.19, samples=20
|
||||
iops : min= 294, max= 700, avg=521.10, stdev=161.03, samples=20
|
||||
lat (usec) : 750=1.23%, 1000=7.07%
|
||||
lat (msec) : 2=60.10%, 4=26.84%, 10=4.74%, 100=0.02%
|
||||
cpu : usr=0.00%, sys=3.84%, ctx=5461, majf=0, minf=7
|
||||
IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
|
||||
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
|
||||
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
|
||||
issued rwts: total=0,5216,0,0 short=0,0,0,0 dropped=0,0,0,0
|
||||
latency : target=0, window=0, percentile=100.00%, depth=64
|
||||
|
||||
Run status group 0 (all jobs):
|
||||
WRITE: bw=8345KiB/s (8545kB/s), 8345KiB/s-8345KiB/s (8545kB/s-8545kB/s), io=81.5MiB (85.5MB), run=10001-10001msec
|
||||
|
||||
Disk stats (read/write):
|
||||
dm-0: ios=39/7287, merge=0/0, ticks=12/7888, in_queue=7900, util=1.72%, aggrios=39/478, aggrmerge=0/6818, aggrticks=12/352, aggrin_queue=0, aggrutil=1.70%
|
||||
sda: ios=39/478, merge=0/6818, ticks=12/352, in_queue=0, util=1.70%
|
||||
```
|
||||
|
||||
#### psync
|
||||
```shell
|
||||
root@ubuntu20:/home/lian/try/zvfs# fio ./fio_script/psync.fio
|
||||
test: (g=0): rw=randwrite, bs=(R) 16.0KiB-16.0KiB, (W) 16.0KiB-16.0KiB, (T) 16.0KiB-16.0KiB, ioengine=psync, iodepth=64
|
||||
fio-3.16
|
||||
Starting 1 thread
|
||||
Jobs: 1 (f=1): [w(1)][100.0%][w=53.0MiB/s][w=3455 IOPS][eta 00m:00s]
|
||||
test: (groupid=0, jobs=1): err= 0: pid=23035: Fri Mar 13 13:25:32 2026
|
||||
Description : ["variable bs"]
|
||||
write: IOPS=3644, BW=56.9MiB/s (59.7MB/s)(570MiB/10001msec); 0 zone resets
|
||||
clat (usec): min=127, max=3496, avg=272.07, stdev=91.62
|
||||
lat (usec): min=128, max=3497, avg=272.41, stdev=91.69
|
||||
clat percentiles (usec):
|
||||
| 1.00th=[ 155], 5.00th=[ 169], 10.00th=[ 184], 20.00th=[ 202],
|
||||
| 30.00th=[ 225], 40.00th=[ 249], 50.00th=[ 262], 60.00th=[ 277],
|
||||
| 70.00th=[ 293], 80.00th=[ 322], 90.00th=[ 371], 95.00th=[ 420],
|
||||
| 99.00th=[ 545], 99.50th=[ 611], 99.90th=[ 881], 99.95th=[ 1467],
|
||||
| 99.99th=[ 2409]
|
||||
bw ( KiB/s): min=49376, max=70387, per=99.97%, avg=58295.30, stdev=6828.37, samples=20
|
||||
iops : min= 3086, max= 4399, avg=3643.40, stdev=426.71, samples=20
|
||||
lat (usec) : 250=41.44%, 500=56.78%, 750=1.59%, 1000=0.10%
|
||||
lat (msec) : 2=0.07%, 4=0.01%
|
||||
cpu : usr=1.82%, sys=37.19%, ctx=54169, majf=0, minf=0
|
||||
IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
|
||||
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
|
||||
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
|
||||
issued rwts: total=0,36448,0,0 short=0,0,0,0 dropped=0,0,0,0
|
||||
latency : target=0, window=0, percentile=100.00%, depth=64
|
||||
|
||||
Run status group 0 (all jobs):
|
||||
WRITE: bw=56.9MiB/s (59.7MB/s), 56.9MiB/s-56.9MiB/s (59.7MB/s-59.7MB/s), io=570MiB (597MB), run=10001-10001msec
|
||||
|
||||
Disk stats (read/write):
|
||||
dm-0: ios=122/36085, merge=0/0, ticks=32/7748, in_queue=7780, util=99.13%, aggrios=109/36455, aggrmerge=13/12, aggrticks=28/7260, aggrin_queue=0, aggrutil=98.95%
|
||||
sda: ios=109/36455, merge=13/12, ticks=28/7260, in_queue=0, util=98.95%
|
||||
```
|
||||
|
||||
### 5.5 pgbench
|
||||
```shell
|
||||
root@ubuntu20:/home/lian/try/zvfs# ./scripts/run_pgbench_no_mmap.sh
|
||||
当前配置:
|
||||
host=127.0.0.1 port=5432 db=benchdb
|
||||
scale=1 clients=1 threads=1 time=15s preload=1
|
||||
init_jobs=1 init_steps=dtg skip_init=0
|
||||
|
||||
[1/2] pgbench 初始化(-i)
|
||||
some of the specified options cannot be used in initialization (-i) mode
|
||||
root@ubuntu20:/home/lian/try/zvfs# ./scripts/run_pgbench_no_mmap.sh
|
||||
当前配置:
|
||||
host=127.0.0.1 port=5432 db=benchdb
|
||||
scale=1 clients=1 threads=1 time=15s preload=1
|
||||
init_jobs=1 init_steps=dtg skip_init=0
|
||||
|
||||
[1/2] pgbench 初始化(-i)
|
||||
dropping old tables...
|
||||
NOTICE: table "pgbench_accounts" does not exist, skipping
|
||||
NOTICE: table "pgbench_branches" does not exist, skipping
|
||||
NOTICE: table "pgbench_history" does not exist, skipping
|
||||
NOTICE: table "pgbench_tellers" does not exist, skipping
|
||||
creating tables...
|
||||
generating data...
|
||||
100000 of 100000 tuples (100%) done (elapsed 0.02 s, remaining 0.00 s)
|
||||
done.
|
||||
[2/2] pgbench 压测(-T)
|
||||
starting vacuum...end.
|
||||
transaction type: <builtin: TPC-B (sort of)>
|
||||
scaling factor: 1
|
||||
query mode: simple
|
||||
number of clients: 1
|
||||
number of threads: 1
|
||||
duration: 15 s
|
||||
number of transactions actually processed: 1381
|
||||
latency average = 10.869 ms
|
||||
tps = 92.003503 (including connections establishing)
|
||||
tps = 92.206743 (excluding connections establishing)
|
||||
```
|
||||
---
|
||||
|
||||
## 6. 关键工程难点与踩坑复盘(重点)
|
||||
|
||||
Binary file not shown.
20
fio_script/zvfs.fio
Normal file
20
fio_script/zvfs.fio
Normal file
@@ -0,0 +1,20 @@
|
||||
|
||||
[global]
|
||||
thread=1
|
||||
group_reporting=1
|
||||
direct=1
|
||||
verify=0
|
||||
time_based=1
|
||||
runtime=10
|
||||
bs=16K
|
||||
size=16384
|
||||
iodepth=64
|
||||
rw=randwrite
|
||||
filename=/zvfs/fio/zvfsfio
|
||||
ioengine=psync
|
||||
|
||||
[test]
|
||||
stonewall
|
||||
description="variable bs"
|
||||
bs=16K
|
||||
|
||||
@@ -12,9 +12,13 @@ set -euo pipefail
|
||||
PG_HOST="127.0.0.1"
|
||||
PG_PORT="5432"
|
||||
PG_DB="benchdb"
|
||||
PG_SCALE="10"
|
||||
PG_TIME="20"
|
||||
PG_CLIENTS="2"
|
||||
PG_SCALE="1"
|
||||
PG_TIME="15"
|
||||
PG_CLIENTS="1"
|
||||
PG_THREADS="1"
|
||||
PG_INIT_JOBS="1"
|
||||
PG_INIT_STEPS="dtg"
|
||||
PG_SKIP_INIT="0"
|
||||
PG_SUPERUSER="postgres"
|
||||
USE_LD_PRELOAD="1"
|
||||
LD_PRELOAD_PATH="/home/lian/try/zvfs/src/libzvfs.so"
|
||||
@@ -41,13 +45,19 @@ run_pg_cmd() {
|
||||
|
||||
echo "当前配置:"
|
||||
echo " host=${PG_HOST} port=${PG_PORT} db=${PG_DB}"
|
||||
echo " scale=${PG_SCALE} clients=${PG_CLIENTS} time=${PG_TIME}s preload=${USE_LD_PRELOAD}"
|
||||
echo " scale=${PG_SCALE} clients=${PG_CLIENTS} threads=${PG_THREADS} time=${PG_TIME}s preload=${USE_LD_PRELOAD}"
|
||||
echo " init_jobs=${PG_INIT_JOBS} init_steps=${PG_INIT_STEPS} skip_init=${PG_SKIP_INIT}"
|
||||
echo
|
||||
|
||||
echo "[1/2] pgbench 初始化(-i)"
|
||||
run_pg_cmd "${PG_BIN_DIR}/pgbench" \
|
||||
-h "${PG_HOST}" -p "${PG_PORT}" -i -s "${PG_SCALE}" "${PG_DB}"
|
||||
if [[ "${PG_SKIP_INIT}" != "1" ]]; then
|
||||
echo "[1/2] pgbench 初始化(-i)"
|
||||
run_pg_cmd "${PG_BIN_DIR}/pgbench" \
|
||||
-h "${PG_HOST}" -p "${PG_PORT}" -i \
|
||||
-s "${PG_SCALE}" -I "${PG_INIT_STEPS}" "${PG_DB}"
|
||||
else
|
||||
echo "[1/2] 跳过初始化(PG_SKIP_INIT=1)"
|
||||
fi
|
||||
|
||||
echo "[2/2] pgbench 压测(-T)"
|
||||
run_pg_cmd "${PG_BIN_DIR}/pgbench" \
|
||||
-h "${PG_HOST}" -p "${PG_PORT}" -c "${PG_CLIENTS}" -T "${PG_TIME}" "${PG_DB}"
|
||||
-h "${PG_HOST}" -p "${PG_PORT}" -c "${PG_CLIENTS}" -j "${PG_THREADS}" -T "${PG_TIME}" "${PG_DB}"
|
||||
|
||||
@@ -185,7 +185,7 @@ static void on_read(struct zvfs_conn *c, void *ctx)
|
||||
break; /* 等待更多数据 */
|
||||
}
|
||||
|
||||
printf("[req][%s]\n", cast_opcode2string(req->opcode));
|
||||
// printf("[req][%s]\n", cast_opcode2string(req->opcode));
|
||||
req->conn = c;
|
||||
offset += consumed;
|
||||
|
||||
|
||||
@@ -30,6 +30,17 @@ static uint64_t now_mono_ms(void) {
|
||||
return (uint64_t)ts.tv_sec * 1000ULL + (uint64_t)ts.tv_nsec / 1000000ULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* SPDK's spdk_blob_close() requires a non-NULL completion callback.
|
||||
* Error paths that "fire-and-forget" close should use this no-op callback.
|
||||
*/
|
||||
static void blob_close_noop_cb(void *arg, int bserrno) {
|
||||
(void)arg;
|
||||
if (bserrno != 0) {
|
||||
SPDK_ERRLOG("blob_close_noop_cb: close failed: %d\n", bserrno);
|
||||
}
|
||||
}
|
||||
|
||||
/** ===========================================================
|
||||
* 内部辅助:错误路径统一 push resp 并释放 req
|
||||
* 仅用于无法构造正常 resp 的错误情形
|
||||
@@ -509,7 +520,7 @@ static void create_resize_cb(void *arg, int bserrno) {
|
||||
struct create_chain_ctx *cctx = arg;
|
||||
if (bserrno != 0) {
|
||||
SPDK_ERRLOG("create resize failed: %d\n", bserrno);
|
||||
spdk_blob_close(cctx->blob, NULL, NULL);
|
||||
spdk_blob_close(cctx->blob, blob_close_noop_cb, NULL);
|
||||
push_err_resp(cctx->req, bserrno);
|
||||
free(cctx);
|
||||
return;
|
||||
@@ -521,7 +532,7 @@ static void create_sync_cb(void *arg, int bserrno) {
|
||||
struct create_chain_ctx *cctx = arg;
|
||||
if (bserrno != 0) {
|
||||
SPDK_ERRLOG("create sync_md failed: %d\n", bserrno);
|
||||
spdk_blob_close(cctx->blob, NULL, NULL);
|
||||
spdk_blob_close(cctx->blob, blob_close_noop_cb, NULL);
|
||||
push_err_resp(cctx->req, bserrno);
|
||||
free(cctx);
|
||||
return;
|
||||
@@ -530,7 +541,7 @@ static void create_sync_cb(void *arg, int bserrno) {
|
||||
/* 构造 handle */
|
||||
struct zvfs_blob_handle *handle = calloc(1, sizeof(*handle));
|
||||
if (!handle) {
|
||||
spdk_blob_close(cctx->blob, NULL, NULL);
|
||||
spdk_blob_close(cctx->blob, blob_close_noop_cb, NULL);
|
||||
push_err_resp(cctx->req, -ENOMEM);
|
||||
free(cctx);
|
||||
return;
|
||||
@@ -541,7 +552,7 @@ static void create_sync_cb(void *arg, int bserrno) {
|
||||
atomic_init(&handle->ref_count, 1);
|
||||
handle->dma_buf = spdk_dma_malloc(ZVFS_DMA_BUF_SIZE, g_engine.io_unit_size, NULL);
|
||||
if (!handle->dma_buf) {
|
||||
spdk_blob_close(cctx->blob, NULL, NULL);
|
||||
spdk_blob_close(cctx->blob, blob_close_noop_cb, NULL);
|
||||
free(handle);
|
||||
push_err_resp(cctx->req, -ENOMEM);
|
||||
free(cctx);
|
||||
@@ -552,7 +563,7 @@ static void create_sync_cb(void *arg, int bserrno) {
|
||||
struct zvfs_resp *resp = calloc(1, sizeof(*resp));
|
||||
if (!resp) {
|
||||
spdk_dma_free(handle->dma_buf);
|
||||
spdk_blob_close(cctx->blob, NULL, NULL);
|
||||
spdk_blob_close(cctx->blob, blob_close_noop_cb, NULL);
|
||||
free(handle);
|
||||
push_err_resp(cctx->req, -ENOMEM);
|
||||
free(cctx);
|
||||
@@ -566,7 +577,7 @@ static void create_sync_cb(void *arg, int bserrno) {
|
||||
zvfs_handle_id_t handle_id;
|
||||
if (engine_cache_insert(handle, &handle_id) != 0) {
|
||||
spdk_dma_free(handle->dma_buf);
|
||||
spdk_blob_close(cctx->blob, NULL, NULL);
|
||||
spdk_blob_close(cctx->blob, blob_close_noop_cb, NULL);
|
||||
free(handle);
|
||||
push_err_resp(cctx->req, -ENOMEM);
|
||||
free(cctx);
|
||||
@@ -609,7 +620,7 @@ static void blob_open_done_cb(void *arg, struct spdk_blob *blob, int bserrno) {
|
||||
atomic_init(&handle->ref_count, 1);
|
||||
handle->dma_buf = spdk_dma_malloc(ZVFS_DMA_BUF_SIZE, g_engine.io_unit_size, NULL);
|
||||
if (!handle->dma_buf) {
|
||||
spdk_blob_close(blob, NULL, NULL);
|
||||
spdk_blob_close(blob, blob_close_noop_cb, NULL);
|
||||
free(handle);
|
||||
push_err_resp(octx->req, -ENOMEM);
|
||||
free(octx);
|
||||
@@ -619,7 +630,7 @@ static void blob_open_done_cb(void *arg, struct spdk_blob *blob, int bserrno) {
|
||||
struct zvfs_resp *resp = calloc(1, sizeof(*resp));
|
||||
if (!resp) {
|
||||
spdk_dma_free(handle->dma_buf);
|
||||
spdk_blob_close(blob, NULL, NULL);
|
||||
spdk_blob_close(blob, blob_close_noop_cb, NULL);
|
||||
free(handle);
|
||||
push_err_resp(octx->req, -ENOMEM);
|
||||
free(octx);
|
||||
@@ -632,7 +643,7 @@ static void blob_open_done_cb(void *arg, struct spdk_blob *blob, int bserrno) {
|
||||
zvfs_handle_id_t handle_id;
|
||||
if (engine_cache_insert(handle, &handle_id) != 0) {
|
||||
spdk_dma_free(handle->dma_buf);
|
||||
spdk_blob_close(blob, NULL, NULL);
|
||||
spdk_blob_close(blob, blob_close_noop_cb, NULL);
|
||||
free(handle);
|
||||
push_err_resp(octx->req, -ENOMEM);
|
||||
free(octx);
|
||||
|
||||
Binary file not shown.
@@ -17,6 +17,31 @@
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static int
|
||||
zvfs_fcntl_cmd_has_arg(int cmd)
|
||||
{
|
||||
switch (cmd) {
|
||||
case F_GETFD:
|
||||
case F_GETFL:
|
||||
case F_GETOWN:
|
||||
#ifdef F_GETSIG
|
||||
case F_GETSIG:
|
||||
#endif
|
||||
#ifdef F_GETLEASE
|
||||
case F_GETLEASE:
|
||||
#endif
|
||||
#ifdef F_GETPIPE_SZ
|
||||
case F_GETPIPE_SZ:
|
||||
#endif
|
||||
#ifdef F_GET_SEALS
|
||||
case F_GET_SEALS:
|
||||
#endif
|
||||
return 0;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 内部:fcntl 核心逻辑(已确认是 zvfs fd) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
@@ -98,13 +123,11 @@ zvfs_fcntl_impl(int fd, int cmd, va_list ap)
|
||||
|
||||
/* ---- 其他 cmd:透传给内核(同时维护真实 fd 状态)-------------- */
|
||||
default: {
|
||||
/*
|
||||
* 取出可变参数作为 void* 透传。
|
||||
* 大多数 fcntl cmd 的第三个参数是 long 或指针,
|
||||
* 用 void* 接收足够覆盖所有平台(64-bit)。
|
||||
*/
|
||||
void *arg = va_arg(ap, void *);
|
||||
return real_fcntl(fd, cmd, arg);
|
||||
if (zvfs_fcntl_cmd_has_arg(cmd)) {
|
||||
long arg = va_arg(ap, long);
|
||||
return real_fcntl(fd, cmd, arg);
|
||||
}
|
||||
return real_fcntl(fd, cmd);
|
||||
}
|
||||
|
||||
} /* switch */
|
||||
@@ -124,13 +147,12 @@ fcntl(int fd, int cmd, ...)
|
||||
|
||||
int ret;
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_fd(fd)) {
|
||||
/*
|
||||
* 非 zvfs fd:透传。
|
||||
* va_list 转发需要用 vfprintf 风格,但 fcntl 没有标准的
|
||||
* va_list 版本。用 void* 提取第三参数再透传。
|
||||
*/
|
||||
void *arg = va_arg(ap, void *);
|
||||
ret = real_fcntl(fd, cmd, arg);
|
||||
if (zvfs_fcntl_cmd_has_arg(cmd)) {
|
||||
long arg = va_arg(ap, long);
|
||||
ret = real_fcntl(fd, cmd, arg);
|
||||
} else {
|
||||
ret = real_fcntl(fd, cmd);
|
||||
}
|
||||
va_end(ap);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
@@ -147,30 +169,29 @@ fcntl(int fd, int cmd, ...)
|
||||
int
|
||||
fcntl64(int fd, int cmd, ...)
|
||||
{
|
||||
/*
|
||||
* fcntl64 是 glibc 在 32-bit 系统上的 large-file 变体,
|
||||
* 语义与 fcntl 相同,直接转发。
|
||||
*/
|
||||
ZVFS_HOOK_ENTER();
|
||||
va_list ap;
|
||||
va_start(ap, cmd);
|
||||
void *arg = va_arg(ap, void *);
|
||||
va_end(ap);
|
||||
|
||||
ZVFS_HOOK_ENTER();
|
||||
int ret;
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_fd(fd)) {
|
||||
ret = real_fcntl64 ? real_fcntl64(fd, cmd, arg)
|
||||
: real_fcntl(fd, cmd, arg);
|
||||
if (zvfs_fcntl_cmd_has_arg(cmd)) {
|
||||
long arg = va_arg(ap, long);
|
||||
ret = real_fcntl64 ? real_fcntl64(fd, cmd, arg)
|
||||
: real_fcntl(fd, cmd, arg);
|
||||
} else {
|
||||
ret = real_fcntl64 ? real_fcntl64(fd, cmd)
|
||||
: real_fcntl(fd, cmd);
|
||||
}
|
||||
va_end(ap);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
va_list ap2;
|
||||
va_start(ap2, cmd);
|
||||
ret = zvfs_fcntl_impl(fd, cmd, ap2);
|
||||
va_end(ap2);
|
||||
ret = zvfs_fcntl_impl(fd, cmd, ap);
|
||||
va_end(ap);
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
|
||||
@@ -887,6 +887,10 @@ int __libc_openat64(int dirfd, const char *path, int flags, ...)
|
||||
|
||||
int __open_2(const char *path, int flags)
|
||||
{
|
||||
if ((flags & O_CREAT) || ((flags & O_TMPFILE) == O_TMPFILE)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
zvfs_debug_open_log(path, NULL,
|
||||
"__open_2 called path=%s flags=0x%x",
|
||||
zvfs_dbg_str(path), flags);
|
||||
@@ -895,11 +899,19 @@ int __open_2(const char *path, int flags)
|
||||
|
||||
int __open64_2(const char *path, int flags)
|
||||
{
|
||||
if ((flags & O_CREAT) || ((flags & O_TMPFILE) == O_TMPFILE)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
return open64(path, flags);
|
||||
}
|
||||
|
||||
int __openat_2(int dirfd, const char *path, int flags)
|
||||
{
|
||||
if ((flags & O_CREAT) || ((flags & O_TMPFILE) == O_TMPFILE)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
zvfs_debug_open_log(path, NULL,
|
||||
"__openat_2 called dirfd=%d path=%s flags=0x%x",
|
||||
dirfd, zvfs_dbg_str(path), flags);
|
||||
@@ -908,6 +920,10 @@ int __openat_2(int dirfd, const char *path, int flags)
|
||||
|
||||
int __openat64_2(int dirfd, const char *path, int flags)
|
||||
{
|
||||
if ((flags & O_CREAT) || ((flags & O_TMPFILE) == O_TMPFILE)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
return openat64(dirfd, path, flags);
|
||||
}
|
||||
|
||||
|
||||
@@ -382,6 +382,8 @@ zvfs_vfscanf_passthrough(FILE *stream, const char *format, va_list ap, int use_i
|
||||
return vfscanf(stream, format, ap);
|
||||
}
|
||||
|
||||
#if 1
|
||||
/* Disabled temporarily: fscanf hook causes db_bench crash under LD_PRELOAD. */
|
||||
static int
|
||||
zvfs_vfscanf_impl(FILE *stream, const char *format, va_list ap, int use_isoc99)
|
||||
{
|
||||
@@ -503,6 +505,7 @@ int fscanf(FILE *stream, const char *format, ...)
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* read */
|
||||
|
||||
@@ -16,12 +16,15 @@
|
||||
|
||||
struct ipc_client_ctx {
|
||||
int fd;
|
||||
uint8_t rx_buf[ZVFS_IPC_BUF_SIZE];
|
||||
uint8_t *rx_buf;
|
||||
uint8_t *tx_buf;
|
||||
size_t rx_len;
|
||||
};
|
||||
|
||||
static __thread struct ipc_client_ctx g_ipc_tls = {
|
||||
.fd = -1,
|
||||
.rx_buf = NULL,
|
||||
.tx_buf = NULL,
|
||||
.rx_len = 0,
|
||||
};
|
||||
|
||||
@@ -47,6 +50,24 @@ static void ipc_close_conn(struct ipc_client_ctx *ctx) {
|
||||
ctx->rx_len = 0;
|
||||
}
|
||||
|
||||
static int ipc_ensure_buffers(struct ipc_client_ctx *ctx) {
|
||||
if (!ctx->rx_buf) {
|
||||
ctx->rx_buf = (uint8_t *)malloc(ZVFS_IPC_BUF_SIZE);
|
||||
if (!ctx->rx_buf) {
|
||||
errno = ENOMEM;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (!ctx->tx_buf) {
|
||||
ctx->tx_buf = (uint8_t *)malloc(ZVFS_IPC_BUF_SIZE);
|
||||
if (!ctx->tx_buf) {
|
||||
errno = ENOMEM;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ipc_connect(struct ipc_client_ctx *ctx) {
|
||||
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||
if (fd < 0) {
|
||||
@@ -113,12 +134,12 @@ static int try_pop_resp(struct ipc_client_ctx *ctx, struct zvfs_resp *resp) {
|
||||
|
||||
static int read_into_rx(struct ipc_client_ctx *ctx) {
|
||||
while (1) {
|
||||
if (ctx->rx_len >= sizeof(ctx->rx_buf)) {
|
||||
if (ctx->rx_len >= ZVFS_IPC_BUF_SIZE) {
|
||||
errno = EOVERFLOW;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ssize_t n = read(ctx->fd, ctx->rx_buf + ctx->rx_len, sizeof(ctx->rx_buf) - ctx->rx_len);
|
||||
ssize_t n = read(ctx->fd, ctx->rx_buf + ctx->rx_len, ZVFS_IPC_BUF_SIZE - ctx->rx_len);
|
||||
if (n > 0) {
|
||||
ctx->rx_len += (size_t)n;
|
||||
return 0;
|
||||
@@ -152,7 +173,7 @@ static int recv_one_resp(struct ipc_client_ctx *ctx, struct zvfs_resp *resp_out)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ctx->rx_len == sizeof(ctx->rx_buf)) {
|
||||
if (ctx->rx_len == ZVFS_IPC_BUF_SIZE) {
|
||||
struct zvfs_resp probe;
|
||||
memset(&probe, 0, sizeof(probe));
|
||||
if (zvfs_deserialize_resp(ctx->rx_buf, ctx->rx_len, &probe) == 0) {
|
||||
@@ -183,22 +204,24 @@ static int set_errno_by_status(int status) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint8_t tx[ZVFS_IPC_BUF_SIZE];
|
||||
|
||||
static int ipc_do_req(struct zvfs_req *req, struct zvfs_resp *resp_out) {
|
||||
struct ipc_client_ctx *ctx = &g_ipc_tls;
|
||||
|
||||
if (ipc_ensure_buffers(ctx) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ipc_ensure_connected(ctx) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t tx_len = zvfs_serialize_req(req, tx, sizeof(tx));
|
||||
size_t tx_len = zvfs_serialize_req(req, ctx->tx_buf, ZVFS_IPC_BUF_SIZE);
|
||||
if (tx_len == 0) {
|
||||
errno = EMSGSIZE;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (write_all(ctx->fd, tx, tx_len) != 0) {
|
||||
if (write_all(ctx->fd, ctx->tx_buf, tx_len) != 0) {
|
||||
ipc_close_conn(ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"method": "bdev_malloc_create",
|
||||
"params": {
|
||||
"name": "Malloc0",
|
||||
"num_blocks": 1048576,
|
||||
"num_blocks": 524288,
|
||||
"block_size": 512
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user