rebuild
This commit is contained in:
51
src/Makefile
Executable file
51
src/Makefile
Executable file
@@ -0,0 +1,51 @@
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
# Copyright (C) 2017 Intel Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
|
||||
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../spdk)
|
||||
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
|
||||
include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk
|
||||
include $(SPDK_ROOT_DIR)/mk/spdk.app_vars.mk
|
||||
|
||||
LIBZVFS := libzvfs.so
|
||||
|
||||
C_SRCS := \
|
||||
common/utils.c \
|
||||
spdk_engine/io_engine.c \
|
||||
fs/zvfs.c \
|
||||
fs/zvfs_inode.c \
|
||||
fs/zvfs_path_entry.c \
|
||||
fs/zvfs_open_file.c \
|
||||
fs/zvfs_sys_init.c \
|
||||
hook/zvfs_hook_init.c \
|
||||
hook/zvfs_hook_fd.c \
|
||||
hook/zvfs_hook_rw.c \
|
||||
hook/zvfs_hook_seek.c \
|
||||
hook/zvfs_hook_stat.c \
|
||||
hook/zvfs_hook_sync.c \
|
||||
hook/zvfs_hook_fcntl.c \
|
||||
hook/zvfs_hook_dir.c \
|
||||
hook/zvfs_hook_mmap.c \
|
||||
|
||||
|
||||
SPDK_LIB_LIST = $(ALL_MODULES_LIST) event event_bdev
|
||||
|
||||
LIBS += $(SPDK_LIB_LINKER_ARGS)
|
||||
CFLAGS += -I$(abspath $(CURDIR))
|
||||
LDFLAGS += -shared -rdynamic -Wl,-z,nodelete -Wl,--disable-new-dtags \
|
||||
-Wl,-rpath,$(SPDK_ROOT_DIR)/build/lib \
|
||||
-Wl,-rpath,$(SPDK_ROOT_DIR)/dpdk/build/lib
|
||||
SYS_LIBS += -ldl
|
||||
|
||||
|
||||
all: $(LIBZVFS)
|
||||
@:
|
||||
|
||||
$(LIBZVFS): $(OBJS) $(SPDK_LIB_FILES) $(ENV_LIBS)
|
||||
$(LINK_C)
|
||||
|
||||
clean:
|
||||
$(CLEAN_C) $(LIBZVFS)
|
||||
|
||||
include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk
|
||||
1137
src/common/uthash.h
Normal file
1137
src/common/uthash.h
Normal file
File diff suppressed because it is too large
Load Diff
93
src/common/utils.c
Normal file
93
src/common/utils.c
Normal file
@@ -0,0 +1,93 @@
|
||||
#include "utils.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int zvfs_calc_io_units(uint64_t offset_bytes,
|
||||
size_t len_bytes,
|
||||
uint32_t io_unit_size,
|
||||
uint64_t *unit_offset,
|
||||
uint64_t *unit_len,
|
||||
uint32_t *buf_offset_out) {
|
||||
if (!unit_offset || !unit_len || !buf_offset_out || io_unit_size == 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
if (len_bytes == 0) {
|
||||
*unit_offset = 0;
|
||||
*unit_len = 0;
|
||||
*buf_offset_out = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// offset 向下对齐到 io_unit 边界
|
||||
uint64_t aligned_offset = (offset_bytes / io_unit_size) * io_unit_size;
|
||||
|
||||
// 末尾向上对齐,保证覆盖完整的请求区间
|
||||
uint64_t end_bytes = offset_bytes + (uint64_t)len_bytes;
|
||||
uint64_t aligned_end = ((end_bytes + io_unit_size - 1) / io_unit_size) * io_unit_size;
|
||||
|
||||
*unit_offset = aligned_offset / io_unit_size;
|
||||
*unit_len = (aligned_end - aligned_offset) / io_unit_size;
|
||||
*buf_offset_out = (uint32_t)(offset_bytes - aligned_offset); // 原始数据在 dma_buf 内的起始偏移
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int zvfs_calc_ceil_units(uint64_t bytes,
|
||||
uint64_t unit_size,
|
||||
uint64_t *units_out) {
|
||||
if (!units_out || unit_size == 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*units_out = bytes / unit_size;
|
||||
if ((bytes % unit_size) != 0) {
|
||||
if (*units_out == UINT64_MAX) {
|
||||
return -EOVERFLOW;
|
||||
}
|
||||
(*units_out)++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int buf_init(zvfs_buf_t *b, size_t initial)
|
||||
{
|
||||
b->data = malloc(initial);
|
||||
if (!b->data) return -1;
|
||||
b->cap = initial;
|
||||
b->len = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void buf_free(zvfs_buf_t *b)
|
||||
{
|
||||
free(b->data);
|
||||
b->data = NULL;
|
||||
b->len = b->cap = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* 确保缓冲区还有 need 字节可用,不够则 realloc 两倍。
|
||||
*/
|
||||
int buf_reserve(zvfs_buf_t *b, size_t need)
|
||||
{
|
||||
if (b->len + need <= b->cap) return 0;
|
||||
|
||||
size_t new_cap = b->cap * 2;
|
||||
while (new_cap < b->len + need) new_cap *= 2;
|
||||
|
||||
uint8_t *p = realloc(b->data, new_cap);
|
||||
if (!p) return -1;
|
||||
b->data = p;
|
||||
b->cap = new_cap;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int buf_append(zvfs_buf_t *b, const void *src, size_t n)
|
||||
{
|
||||
if (buf_reserve(b, n) != 0) return -1;
|
||||
memcpy(b->data + b->len, src, n);
|
||||
b->len += n;
|
||||
return 0;
|
||||
}
|
||||
29
src/common/utils.h
Normal file
29
src/common/utils.h
Normal file
@@ -0,0 +1,29 @@
|
||||
#ifndef __ZVFS_COMMON_UTILS_H__
|
||||
#define __ZVFS_COMMON_UTILS_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
int zvfs_calc_io_units(uint64_t offset_bytes,
|
||||
size_t len_bytes,
|
||||
uint32_t io_unit_size,
|
||||
uint64_t *unit_offset,
|
||||
uint64_t *unit_len,
|
||||
uint32_t *buf_offset_out);
|
||||
|
||||
int zvfs_calc_ceil_units(uint64_t bytes,
|
||||
uint64_t unit_size,
|
||||
uint64_t *units_out);
|
||||
|
||||
typedef struct {
|
||||
uint8_t *data;
|
||||
size_t cap;
|
||||
size_t len;
|
||||
} zvfs_buf_t;
|
||||
|
||||
int buf_init(zvfs_buf_t *b, size_t initial);
|
||||
void buf_free(zvfs_buf_t *b);
|
||||
int buf_reserve(zvfs_buf_t *b, size_t need);
|
||||
int buf_append(zvfs_buf_t *b, const void *src, size_t n);
|
||||
|
||||
#endif // __ZVFS_COMMON_UTILS_H__
|
||||
32
src/config.h
Normal file
32
src/config.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef __ZVFS_CONFIG_H__
|
||||
#define __ZVFS_CONFIG_H__
|
||||
|
||||
/**
|
||||
* ZVFS
|
||||
*/
|
||||
#define ZVFS_XATTR_BLOB_ID "user.zvfs.blob_id"
|
||||
|
||||
/**
|
||||
* SPDK
|
||||
*/
|
||||
// dev
|
||||
#define SPDK_JSON_PATH "/home/lian/try/zvfs/src/zvfsmalloc.json"
|
||||
// #define ZVFS_BDEV "Nvme0n1"
|
||||
#ifndef ZVFS_BDEV
|
||||
#define ZVFS_BDEV "Malloc0"
|
||||
#endif
|
||||
|
||||
// super blob
|
||||
#define ZVFS_SB_MAGIC UINT64_C(0x5A5646535F534200) /* "ZVFS_SB\0" */
|
||||
#define ZVFS_SB_VERSION UINT32_C(1)
|
||||
|
||||
// dma
|
||||
#define ZVFS_DMA_BUF_SIZE (1024 * 1024)
|
||||
|
||||
// waiter
|
||||
#define WAITER_MAX_TIME 10000000
|
||||
|
||||
|
||||
|
||||
|
||||
#endif // __ZVFS_CONFIG_H__
|
||||
18
src/fio_script/bdev.fio
Executable file
18
src/fio_script/bdev.fio
Executable file
@@ -0,0 +1,18 @@
|
||||
[global]
|
||||
ioengine=spdk_bdev
|
||||
spdk_json_conf=/home/king/share/zvfs/fio_script/zvfs.json
|
||||
thread=1
|
||||
direct=1
|
||||
time_based
|
||||
runtime=10
|
||||
rw=randwrite
|
||||
bs=16K
|
||||
zonemode=zbd
|
||||
max_open_zones=8
|
||||
initial_zone_reset=1
|
||||
zone_append=1
|
||||
iodepth=64
|
||||
|
||||
[test]
|
||||
filename=Zone0
|
||||
numjobs=1
|
||||
19
src/fio_script/io_uring.fio
Executable file
19
src/fio_script/io_uring.fio
Executable file
@@ -0,0 +1,19 @@
|
||||
[global]
|
||||
thread=1
|
||||
group_reporting=1
|
||||
direct=1
|
||||
verify=0
|
||||
time_based=1
|
||||
runtime=10
|
||||
bs=16K
|
||||
size=16384
|
||||
iodepth=64
|
||||
rw=randwrite
|
||||
filename=kingfs
|
||||
ioengine=io_uring
|
||||
|
||||
[test]
|
||||
stonewall
|
||||
description="variable bs"
|
||||
bs=16K
|
||||
|
||||
BIN
src/fio_script/kingfs
Executable file
BIN
src/fio_script/kingfs
Executable file
Binary file not shown.
19
src/fio_script/libaio.fio
Executable file
19
src/fio_script/libaio.fio
Executable file
@@ -0,0 +1,19 @@
|
||||
[global]
|
||||
thread=1
|
||||
group_reporting=1
|
||||
direct=1
|
||||
verify=0
|
||||
time_based=1
|
||||
runtime=10
|
||||
bs=16K
|
||||
size=16384
|
||||
iodepth=64
|
||||
rw=randwrite
|
||||
filename=kingfs
|
||||
ioengine=libaio
|
||||
|
||||
[test]
|
||||
stonewall
|
||||
description="variable bs"
|
||||
bs=16K
|
||||
|
||||
18
src/fio_script/nvme.fio
Executable file
18
src/fio_script/nvme.fio
Executable file
@@ -0,0 +1,18 @@
|
||||
[global]
|
||||
thread=1
|
||||
group_reporting=1
|
||||
direct=1
|
||||
verify=0
|
||||
time_based=1
|
||||
ramp_time=10
|
||||
runtime=10
|
||||
bs=16K
|
||||
size=16384
|
||||
iodepth=64
|
||||
rw=randwrite
|
||||
ioengine=spdk
|
||||
|
||||
[test]
|
||||
numjobs=1
|
||||
filename=trtype=PCIe traddr=0000.03.00.0 ns=1
|
||||
bs=16k
|
||||
20
src/fio_script/psync.fio
Executable file
20
src/fio_script/psync.fio
Executable file
@@ -0,0 +1,20 @@
|
||||
|
||||
[global]
|
||||
thread=1
|
||||
group_reporting=1
|
||||
direct=1
|
||||
verify=0
|
||||
time_based=1
|
||||
runtime=10
|
||||
bs=16K
|
||||
size=16384
|
||||
iodepth=64
|
||||
rw=randwrite
|
||||
filename=kingfs
|
||||
ioengine=psync
|
||||
|
||||
[test]
|
||||
stonewall
|
||||
description="variable bs"
|
||||
bs=16K
|
||||
|
||||
35
src/fio_script/zvfs.json
Executable file
35
src/fio_script/zvfs.json
Executable file
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"subsystems": [
|
||||
{
|
||||
"subsystem": "bdev",
|
||||
"config": [
|
||||
{
|
||||
"method": "bdev_nvme_attach_controller",
|
||||
"params":
|
||||
{
|
||||
"trtype": "PCIe",
|
||||
"name":"Nvme0",
|
||||
"traddr":"0000:03:00.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"method": "bdev_malloc_create",
|
||||
"params": {
|
||||
"name": "Malloc0",
|
||||
"num_blocks": 2097152,
|
||||
"block_size": 512
|
||||
}
|
||||
},
|
||||
{
|
||||
"method": "bdev_zone_block_create",
|
||||
"params": {
|
||||
"base_bdev": "Malloc0",
|
||||
"name": "Zone0",
|
||||
"zone_capacity": 262144,
|
||||
"optimal_open_zones": 8
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
103
src/fs/zvfs.c
Normal file
103
src/fs/zvfs.c
Normal file
@@ -0,0 +1,103 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include "config.h"
|
||||
#include "common/utils.h"
|
||||
#include "fs/zvfs.h"
|
||||
#include "fs/zvfs_inode.h"
|
||||
#include "fs/zvfs_path_entry.h"
|
||||
#include "fs/zvfs_open_file.h"
|
||||
|
||||
#include <sys/xattr.h>
|
||||
#include <sys/types.h>
|
||||
struct zvfs_fs g_fs = {0};
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* init / destroy */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int zvfs_fs_init(void) {
|
||||
memset(&g_fs, 0, sizeof(g_fs));
|
||||
|
||||
if (pthread_mutex_init(&g_fs.inode_mu, NULL) != 0) goto fail_inode;
|
||||
if (pthread_mutex_init(&g_fs.path_mu, NULL) != 0) goto fail_path;
|
||||
if (pthread_mutex_init(&g_fs.fd_mu, NULL) != 0) goto fail_fd;
|
||||
|
||||
return 0;
|
||||
|
||||
fail_fd:
|
||||
pthread_mutex_destroy(&g_fs.path_mu);
|
||||
fail_path:
|
||||
pthread_mutex_destroy(&g_fs.inode_mu);
|
||||
fail_inode:
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* 销毁 fd_table:每个 openfile 只释放结构体内存,
|
||||
* blob_close / inode 引用计数的清理应由上层在进程退出前完成。
|
||||
* 这里做"强制兜底"清理,避免内存泄漏。
|
||||
*/
|
||||
int zvfs_fs_destroy(void) {
|
||||
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
{
|
||||
struct zvfs_open_file *of, *tmp_of;
|
||||
HASH_ITER(hh, g_fs.fd_table, of, tmp_of) {
|
||||
HASH_DEL(g_fs.fd_table, of);
|
||||
openfile_free(of);
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
|
||||
/* 销毁 path_cache */
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
{
|
||||
struct zvfs_path_entry *pe, *tmp_pe;
|
||||
HASH_ITER(hh, g_fs.path_cache, pe, tmp_pe) {
|
||||
HASH_DEL(g_fs.path_cache, pe);
|
||||
free(pe->path);
|
||||
free(pe);
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
|
||||
/* 销毁 inode_table */
|
||||
pthread_mutex_lock(&g_fs.inode_mu);
|
||||
{
|
||||
struct zvfs_inode *in, *tmp_in;
|
||||
HASH_ITER(hh, g_fs.inode_table, in, tmp_in) {
|
||||
HASH_DEL(g_fs.inode_table, in);
|
||||
inode_free(in);
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||||
|
||||
pthread_mutex_destroy(&g_fs.fd_mu);
|
||||
pthread_mutex_destroy(&g_fs.path_mu);
|
||||
pthread_mutex_destroy(&g_fs.inode_mu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* xattr helpers */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int zvfs_xattr_write_blob_id(int fd, uint64_t blob_id)
|
||||
{
|
||||
if (fsetxattr(fd, ZVFS_XATTR_BLOB_ID, &blob_id, sizeof(blob_id), 0) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int zvfs_xattr_read_blob_id(int fd, uint64_t *blob_id_out)
|
||||
{
|
||||
ssize_t ret = fgetxattr(fd, ZVFS_XATTR_BLOB_ID, blob_id_out, sizeof(uint64_t));
|
||||
if (ret != sizeof(uint64_t)) {
|
||||
if (ret >= 0)
|
||||
errno = EIO; /* 长度不对,视为损坏 */
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
35
src/fs/zvfs.h
Normal file
35
src/fs/zvfs.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#ifndef __ZVFS_FS_GLOBAL_H__
|
||||
#define __ZVFS_FS_GLOBAL_H__
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdatomic.h>
|
||||
#include <stdint.h>
|
||||
|
||||
struct zvfs_inode;
|
||||
struct zvfs_path_entry;
|
||||
struct zvfs_open_file;
|
||||
|
||||
struct zvfs_fs {
|
||||
struct zvfs_inode *inode_table; /* blob_id → inode */
|
||||
struct zvfs_path_entry *path_cache; /* path → inode(运行时缓存)*/
|
||||
struct zvfs_open_file *fd_table; /* fd → openfile */
|
||||
|
||||
pthread_mutex_t inode_mu;
|
||||
pthread_mutex_t path_mu;
|
||||
pthread_mutex_t fd_mu;
|
||||
};
|
||||
|
||||
struct strace {
|
||||
|
||||
|
||||
};
|
||||
|
||||
extern struct zvfs_fs g_fs;
|
||||
|
||||
int zvfs_fs_init(void);
|
||||
int zvfs_fs_destroy(void);
|
||||
|
||||
int zvfs_xattr_write_blob_id(int fd, uint64_t blob_id);
|
||||
int zvfs_xattr_read_blob_id(int fd, uint64_t *blob_id_out);
|
||||
|
||||
#endif // __ZVFS_FS_GLOBAL_H__
|
||||
83
src/fs/zvfs_inode.c
Normal file
83
src/fs/zvfs_inode.c
Normal file
@@ -0,0 +1,83 @@
|
||||
#include "zvfs_inode.h"
|
||||
#include "zvfs.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#include <errno.h>
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* alloc / free */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
struct zvfs_inode *inode_alloc(uint64_t blob_id, mode_t mode, zvfs_itype_t itype) {
|
||||
struct zvfs_inode *in = calloc(1, sizeof(*in));
|
||||
if (!in)
|
||||
return NULL;
|
||||
|
||||
in->blob_id = blob_id;
|
||||
in->logical_size = 0;
|
||||
in->itype = itype;
|
||||
in->mode = mode;
|
||||
in->uid = getuid();
|
||||
in->gid = getgid();
|
||||
|
||||
time_t now = time(NULL);
|
||||
in->atime = now;
|
||||
in->mtime = now;
|
||||
in->deleted = false;
|
||||
|
||||
atomic_init(&in->ref_count, 1);
|
||||
pthread_mutex_init(&in->mu, NULL);
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
void inode_free(struct zvfs_inode *inode){
|
||||
if (!inode)
|
||||
return;
|
||||
pthread_mutex_destroy(&inode->mu);
|
||||
free(inode);
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* hash table operations (调用方持有 g_fs.inode_mu) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
void inode_insert(struct zvfs_inode *inode){
|
||||
HASH_ADD(hh, g_fs.inode_table, blob_id, sizeof(uint64_t), inode);
|
||||
}
|
||||
|
||||
struct zvfs_inode *inode_lookup(uint64_t blob_id) {
|
||||
struct zvfs_inode *in = NULL;
|
||||
HASH_FIND(hh, g_fs.inode_table, &blob_id, sizeof(uint64_t), in);
|
||||
return in;
|
||||
}
|
||||
|
||||
void inode_remove(uint64_t blob_id) {
|
||||
struct zvfs_inode *in = inode_lookup(blob_id);
|
||||
if (in)
|
||||
HASH_DELETE(hh, g_fs.inode_table, in);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* size / timestamp helpers (调用方持有 inode->mu) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
void inode_update_size(struct zvfs_inode *inode, int real_fd, uint64_t new_size) {
|
||||
inode->logical_size = new_size;
|
||||
if (real_fd >= 0)
|
||||
ftruncate(real_fd, (off_t)new_size); /* 同步 st_size,忽略错误 */
|
||||
}
|
||||
|
||||
void inode_touch_atime(struct zvfs_inode *inode) {
|
||||
inode->atime = time(NULL);
|
||||
}
|
||||
|
||||
void inode_touch_mtime(struct zvfs_inode *inode)
|
||||
{
|
||||
inode->mtime = time(NULL);
|
||||
}
|
||||
58
src/fs/zvfs_inode.h
Normal file
58
src/fs/zvfs_inode.h
Normal file
@@ -0,0 +1,58 @@
|
||||
#ifndef __ZVFS_INODE_H__
|
||||
#define __ZVFS_INODE_H__
|
||||
|
||||
#include "common/uthash.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <time.h>
|
||||
#include <pthread.h>
|
||||
#include <sys/types.h>
|
||||
#include <stdatomic.h>
|
||||
|
||||
typedef enum {
|
||||
ZVFS_ITYPE_FILE = 0,
|
||||
ZVFS_ITYPE_DIR = 1,
|
||||
} zvfs_itype_t;
|
||||
|
||||
struct zvfs_inode {
|
||||
uint64_t blob_id;
|
||||
uint64_t logical_size; // 和真实文件 st_size 保持同步
|
||||
zvfs_itype_t itype; // FILE only,DIR 不进这张表
|
||||
|
||||
mode_t mode; // 权限
|
||||
uid_t uid; //
|
||||
gid_t gid;
|
||||
time_t atime, mtime;
|
||||
|
||||
atomic_int ref_count;
|
||||
pthread_mutex_t mu; // 护 logical_size、append_offset 等更新
|
||||
bool deleted;
|
||||
|
||||
UT_hash_handle hh;
|
||||
};
|
||||
|
||||
// 分配并初始化一个 inode,不插入全局表
|
||||
struct zvfs_inode *inode_alloc(uint64_t blob_id, mode_t mode, zvfs_itype_t itype);
|
||||
|
||||
// 释放 inode 内存(调用前确保 ref_count == 0)
|
||||
void inode_free(struct zvfs_inode *inode);
|
||||
|
||||
// 插入全局表(需持有 inode_mu)
|
||||
void inode_insert(struct zvfs_inode *inode);
|
||||
|
||||
// 按 blob_id 查找(需持有 inode_mu)
|
||||
struct zvfs_inode *inode_lookup(uint64_t blob_id);
|
||||
|
||||
// 从全局表移除(需持有 inode_mu,不释放内存)
|
||||
void inode_remove(uint64_t blob_id);
|
||||
|
||||
// 更新 logical_size,同时负责调用 ftruncate 同步 st_size
|
||||
// 需持有 inode->mu
|
||||
void inode_update_size(struct zvfs_inode *inode, int real_fd, uint64_t new_size);
|
||||
|
||||
// 更新时间戳(需持有 inode->mu)
|
||||
void inode_touch_atime(struct zvfs_inode *inode);
|
||||
void inode_touch_mtime(struct zvfs_inode *inode);
|
||||
|
||||
#endif // __ZVFS_INODE_H__
|
||||
97
src/fs/zvfs_open_file.c
Normal file
97
src/fs/zvfs_open_file.c
Normal file
@@ -0,0 +1,97 @@
|
||||
#include "zvfs_open_file.h"
|
||||
#include "zvfs_inode.h"
|
||||
#include "zvfs.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* alloc / free */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
struct zvfs_open_file *openfile_alloc(int fd,
|
||||
struct zvfs_inode *inode,
|
||||
int flags,
|
||||
struct zvfs_blob_handle *handle)
|
||||
{
|
||||
struct zvfs_open_file *of = calloc(1, sizeof(*of));
|
||||
if (!of)
|
||||
return NULL;
|
||||
|
||||
of->fd = fd;
|
||||
of->inode = inode;
|
||||
of->handle = handle;
|
||||
of->flags = flags;
|
||||
of->fd_flags = 0;
|
||||
of->offset = 0;
|
||||
atomic_init(&of->ref_count, 1);
|
||||
|
||||
return of;
|
||||
}
|
||||
|
||||
void openfile_free(struct zvfs_open_file *of)
|
||||
{
|
||||
free(of);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* hash table operations (调用方持有 g_fs.fd_mu) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
void openfile_insert(struct zvfs_open_file *of)
|
||||
{
|
||||
HASH_ADD_INT(g_fs.fd_table, fd, of);
|
||||
}
|
||||
|
||||
struct zvfs_open_file *openfile_lookup(int fd)
|
||||
{
|
||||
struct zvfs_open_file *of = NULL;
|
||||
HASH_FIND_INT(g_fs.fd_table, &fd, of);
|
||||
return of;
|
||||
}
|
||||
|
||||
void openfile_remove(int fd)
|
||||
{
|
||||
struct zvfs_open_file *of = openfile_lookup(fd);
|
||||
if (of)
|
||||
HASH_DEL(g_fs.fd_table, of);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* lseek (调用方持有 of->inode->mu) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
uint64_t openfile_seek(struct zvfs_open_file *of, int64_t offset, int whence)
|
||||
{
|
||||
int64_t new_off;
|
||||
|
||||
switch (whence) {
|
||||
case SEEK_SET:
|
||||
new_off = offset;
|
||||
break;
|
||||
|
||||
case SEEK_CUR:
|
||||
new_off = (int64_t)of->offset + offset;
|
||||
break;
|
||||
|
||||
case SEEK_END:
|
||||
/* logical_size 由调用方在持锁状态下保证可见 */
|
||||
new_off = (int64_t)of->inode->logical_size + offset;
|
||||
break;
|
||||
|
||||
default:
|
||||
errno = EINVAL;
|
||||
return (uint64_t)-1;
|
||||
}
|
||||
|
||||
if (new_off < 0) {
|
||||
errno = EINVAL;
|
||||
return (uint64_t)-1;
|
||||
}
|
||||
|
||||
of->offset = (uint64_t)new_off;
|
||||
return of->offset;
|
||||
}
|
||||
48
src/fs/zvfs_open_file.h
Normal file
48
src/fs/zvfs_open_file.h
Normal file
@@ -0,0 +1,48 @@
|
||||
#ifndef __ZVFS_OPEN_FILE_H__
|
||||
#define __ZVFS_OPEN_FILE_H__
|
||||
|
||||
#include "common/uthash.h"
|
||||
#include "spdk_engine/io_engine.h"
|
||||
#include <stdatomic.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifndef SPDK_BLOB_ID_DEFINED
|
||||
typedef uint64_t spdk_blob_id;
|
||||
#define SPDK_BLOB_ID_DEFINED
|
||||
#endif
|
||||
|
||||
struct zvfs_open_file {
|
||||
int fd; // key,和真实 fd 1:1
|
||||
struct zvfs_inode *inode;
|
||||
struct zvfs_blob_handle *handle;
|
||||
|
||||
int flags;
|
||||
int fd_flags;
|
||||
|
||||
uint64_t offset; // 非 APPEND 模式的当前位置
|
||||
atomic_int ref_count; // dup / close 用
|
||||
|
||||
UT_hash_handle hh;
|
||||
};
|
||||
|
||||
// 分配 openfile,不插入全局表,ref_count 初始为 1
|
||||
struct zvfs_open_file *openfile_alloc(int fd, struct zvfs_inode *inode,
|
||||
int flags, struct zvfs_blob_handle *handle);
|
||||
|
||||
// 释放内存(调用前确保 ref_count == 0,不负责 blob_close)
|
||||
void openfile_free(struct zvfs_open_file *of);
|
||||
|
||||
// 插入全局表(需持有 fd_mu)
|
||||
void openfile_insert(struct zvfs_open_file *of);
|
||||
|
||||
// 按 fd 查找(需持有 fd_mu)
|
||||
struct zvfs_open_file *openfile_lookup(int fd);
|
||||
|
||||
// 从全局表移除(需持有 fd_mu,不释放内存)
|
||||
void openfile_remove(int fd);
|
||||
|
||||
// lseek 语义:返回新 offset,出错返回 (uint64_t)-1
|
||||
// 需持有 of->inode->mu(读 logical_size)
|
||||
uint64_t openfile_seek(struct zvfs_open_file *of, int64_t offset, int whence);
|
||||
|
||||
#endif // __ZVFS_OPEN_FILE_H__
|
||||
82
src/fs/zvfs_path_entry.c
Normal file
82
src/fs/zvfs_path_entry.c
Normal file
@@ -0,0 +1,82 @@
|
||||
#include "zvfs_path_entry.h"
|
||||
#include "zvfs.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* internal helper */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
static struct zvfs_path_entry *_path_find(const char *path)
|
||||
{
|
||||
struct zvfs_path_entry *e = NULL;
|
||||
HASH_FIND_STR(g_fs.path_cache, path, e);
|
||||
return e;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* public API (调用方持有 g_fs.path_mu) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int path_cache_insert(const char *path, struct zvfs_inode *inode)
|
||||
{
|
||||
if (_path_find(path))
|
||||
return -EEXIST;
|
||||
|
||||
struct zvfs_path_entry *e = calloc(1, sizeof(*e));
|
||||
if (!e)
|
||||
return -ENOMEM;
|
||||
|
||||
e->path = strdup(path);
|
||||
if (!e->path) {
|
||||
free(e);
|
||||
return -ENOMEM;
|
||||
}
|
||||
e->inode = inode;
|
||||
|
||||
HASH_ADD_STR(g_fs.path_cache, path, e);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct zvfs_path_entry *path_cache_lookup(const char *path)
|
||||
{
|
||||
return _path_find(path);
|
||||
}
|
||||
|
||||
void path_cache_remove(const char *path)
|
||||
{
|
||||
struct zvfs_path_entry *e = _path_find(path);
|
||||
if (!e)
|
||||
return;
|
||||
HASH_DEL(g_fs.path_cache, e);
|
||||
free(e->path);
|
||||
free(e);
|
||||
}
|
||||
|
||||
int path_cache_rename(const char *old_path, const char *new_path)
|
||||
{
|
||||
struct zvfs_path_entry *old_e = _path_find(old_path);
|
||||
if (!old_e)
|
||||
return -1;
|
||||
|
||||
/* 若 new_path 已存在,先清掉旧 entry(inode 引用由上层处理) */
|
||||
struct zvfs_path_entry *new_e = _path_find(new_path);
|
||||
if (new_e) {
|
||||
HASH_DEL(g_fs.path_cache, new_e);
|
||||
free(new_e->path);
|
||||
free(new_e);
|
||||
}
|
||||
|
||||
/* 替换 key:从表中删除,修改 key 字符串,重新插入 */
|
||||
HASH_DEL(g_fs.path_cache, old_e);
|
||||
free(old_e->path);
|
||||
old_e->path = strdup(new_path);
|
||||
if (!old_e->path) {
|
||||
free(old_e);
|
||||
return -1;
|
||||
}
|
||||
HASH_ADD_STR(g_fs.path_cache, path, old_e);
|
||||
return 0;
|
||||
}
|
||||
30
src/fs/zvfs_path_entry.h
Normal file
30
src/fs/zvfs_path_entry.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef __ZVFS_PATH_ENTRY_H__
|
||||
#define __ZVFS_PATH_ENTRY_H__
|
||||
|
||||
#include "common/uthash.h"
|
||||
#include <stdatomic.h>
|
||||
#include <stdint.h>
|
||||
|
||||
struct zvfs_path_entry {
|
||||
char *path; // key
|
||||
struct zvfs_inode *inode;
|
||||
|
||||
UT_hash_handle hh;
|
||||
};
|
||||
|
||||
|
||||
// 插入缓存,path 内部 strdup,inode->ref_count 不在此处修改
|
||||
// 需持有 path_mu
|
||||
int path_cache_insert(const char *path, struct zvfs_inode *inode);
|
||||
|
||||
// 查找,未命中返回 NULL(需持有 path_mu)
|
||||
struct zvfs_path_entry *path_cache_lookup(const char *path);
|
||||
|
||||
// 移除并释放 entry(不释放 inode,需持有 path_mu)
|
||||
void path_cache_remove(const char *path);
|
||||
|
||||
// rename:原子替换 key(需持有 path_mu)
|
||||
int path_cache_rename(const char *old_path, const char *new_path);
|
||||
|
||||
|
||||
#endif // __ZVFS_PATH_ENTRY_H__
|
||||
38
src/fs/zvfs_sys_init.c
Normal file
38
src/fs/zvfs_sys_init.c
Normal file
@@ -0,0 +1,38 @@
|
||||
// zvfs_sysinit.c
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include "config.h"
|
||||
#include "zvfs_sys_init.h"
|
||||
#include "fs/zvfs.h" // zvfs_fs_init
|
||||
#include "spdk_engine/io_engine.h"
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static pthread_once_t _init_once = PTHREAD_ONCE_INIT;
|
||||
static int _init_ok = 0;
|
||||
|
||||
static void
|
||||
do_init(void)
|
||||
{
|
||||
const char *bdev = getenv("ZVFS_BDEV");
|
||||
if (!bdev) {
|
||||
bdev = ZVFS_BDEV;
|
||||
fprintf(stderr, "[zvfs] ZVFS_BDEV not set, set as (%s)\n", ZVFS_BDEV);
|
||||
}
|
||||
|
||||
if (io_engine_init(bdev) != 0) {
|
||||
fprintf(stderr, "[zvfs] FATAL: io_engine_init(%s) failed\n", bdev);
|
||||
abort();
|
||||
}
|
||||
|
||||
_init_ok = 1;
|
||||
}
|
||||
|
||||
void
|
||||
zvfs_ensure_init(void)
|
||||
{
|
||||
pthread_once(&_init_once, do_init);
|
||||
}
|
||||
15
src/fs/zvfs_sys_init.h
Normal file
15
src/fs/zvfs_sys_init.h
Normal file
@@ -0,0 +1,15 @@
|
||||
// zvfs_sysinit.h
|
||||
#ifndef __ZVFS_SYSINIT_H__
|
||||
#define __ZVFS_SYSINIT_H__
|
||||
|
||||
/*
|
||||
* 确保 io_engine 已初始化。
|
||||
* 第一次被调用时执行初始化,后续调用直接返回。
|
||||
* 线程安全:内部用 pthread_once 保证只初始化一次。
|
||||
*
|
||||
* 调用时机:第一次 open("/zvfs/...") 时触发。
|
||||
* 此时 main() 已经开始执行,SPDK 所需的运行环境已就绪。
|
||||
*/
|
||||
void zvfs_ensure_init(void);
|
||||
|
||||
#endif
|
||||
14
src/hook/zvfs_hook.h
Normal file
14
src/hook/zvfs_hook.h
Normal file
@@ -0,0 +1,14 @@
|
||||
#ifndef __ZVFS_HOOK_H__
|
||||
#define __ZVFS_HOOK_H__
|
||||
|
||||
#include "zvfs_hook_init.h"
|
||||
#include "zvfs_hook_fd.h"
|
||||
#include "zvfs_hook_rw.h"
|
||||
#include "zvfs_hook_seek.h"
|
||||
#include "zvfs_hook_stat.h"
|
||||
#include "zvfs_hook_sync.h"
|
||||
#include "zvfs_hook_fcntl.h"
|
||||
#include "zvfs_hook_dir.h"
|
||||
#include "zvfs_hook_mmap.h"
|
||||
|
||||
#endif // __ZVFS_HOOK_H__
|
||||
276
src/hook/zvfs_hook_dir.c
Normal file
276
src/hook/zvfs_hook_dir.c
Normal file
@@ -0,0 +1,276 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include "zvfs_hook_dir.h"
|
||||
#include "zvfs_hook_init.h"
|
||||
#include "zvfs_hook_reentrant.h"
|
||||
#include "fs/zvfs.h"
|
||||
#include "fs/zvfs_inode.h"
|
||||
#include "fs/zvfs_path_entry.h"
|
||||
|
||||
/* SPDK io engine - blob_delete 声明 */
|
||||
#include "../spdk_engine/io_engine.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <linux/fs.h> /* RENAME_EXCHANGE, RENAME_NOREPLACE */
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 内部:执行 unlink 的 zvfs 侧清理 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/*
|
||||
* zvfs_unlink_path - 对一个确认属于 zvfs 的绝对路径执行清理。
|
||||
*
|
||||
* 调用时机:real_unlink* 已成功返回之后。
|
||||
*
|
||||
* 逻辑:
|
||||
* 1. 持 path_mu 查 path_cache
|
||||
* 2. 找到 → 持 inode_mu 查 inode
|
||||
* 3. 持 inode->mu 检查 ref_count
|
||||
* - ref_count == 0:直接 blob_delete,inode_remove,inode_free,path_cache_remove
|
||||
* - ref_count > 0:标记 deleted = true,path_cache_remove
|
||||
* (inode 和 blob 的清理推迟到 close 路径中 ref_count 归零时)
|
||||
*/
|
||||
static void
|
||||
zvfs_unlink_path(const char *abspath)
|
||||
{
|
||||
/* --- 查 path_cache -------------------------------------------- */
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
struct zvfs_path_entry *pe = path_cache_lookup(abspath);
|
||||
if (!pe) {
|
||||
/*
|
||||
* 不在缓存里:该文件可能从未被 open 过(没有 inode 对象)。
|
||||
* 无内存状态需要清理,直接返回。
|
||||
* blob 也不存在(文件从未被 zvfs open 创建),所以安全。
|
||||
*/
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
return;
|
||||
}
|
||||
struct zvfs_inode *inode = pe->inode;
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
|
||||
/* --- 持 inode->mu 决策 ---------------------------------------- */
|
||||
pthread_mutex_lock(&inode->mu);
|
||||
int ref = atomic_load(&inode->ref_count);
|
||||
|
||||
if (ref == 0) {
|
||||
/*
|
||||
* 没有 fd 打开:立即清理。
|
||||
* 顺序:blob_delete → inode_remove(出全局表)→ path_cache_remove
|
||||
* → inode_free(释放内存)
|
||||
*/
|
||||
pthread_mutex_unlock(&inode->mu);
|
||||
|
||||
blob_delete(inode->blob_id);
|
||||
|
||||
pthread_mutex_lock(&g_fs.inode_mu);
|
||||
inode_remove(inode->blob_id);
|
||||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||||
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
path_cache_remove(abspath);
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
|
||||
inode_free(inode);
|
||||
|
||||
} else {
|
||||
/*
|
||||
* 还有 fd 打开:Unix 延迟删除语义。
|
||||
* 标记 deleted,让 close 路径在 ref_count 归零时负责 blob_delete。
|
||||
* 同时把 path 从缓存里摘掉(路径已从目录树消失)。
|
||||
*/
|
||||
inode->deleted = true;
|
||||
pthread_mutex_unlock(&inode->mu);
|
||||
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
path_cache_remove(abspath);
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* unlink */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
unlink(const char *path)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
int ret;
|
||||
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_path(path)) {
|
||||
ret = real_unlink(path);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/* 先让真实 FS 删除文件(xattr 随之消失) */
|
||||
ret = real_unlink(path);
|
||||
if (ret == 0)
|
||||
zvfs_unlink_path(path);
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* unlinkat */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
unlinkat(int dirfd, const char *path, int flags)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* AT_REMOVEDIR:rmdir 语义,目录由真实 FS 管理,直接透传。
|
||||
*/
|
||||
if (flags & AT_REMOVEDIR) {
|
||||
ret = real_unlinkat(dirfd, path, flags);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* 解析绝对路径,判断是否属于 zvfs */
|
||||
char abspath[PATH_MAX];
|
||||
if (zvfs_resolve_atpath(dirfd, path, abspath, sizeof(abspath)) < 0) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1; /* errno already set */
|
||||
}
|
||||
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_path(abspath)) {
|
||||
ret = real_unlinkat(dirfd, path, flags);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
ret = real_unlinkat(dirfd, path, flags);
|
||||
if (ret == 0)
|
||||
zvfs_unlink_path(abspath);
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 内部:执行 rename 的 zvfs 侧缓存更新 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/*
|
||||
* zvfs_rename_paths - 在 real_rename* 成功后更新 path_cache。
|
||||
*
|
||||
* 如果 newpath 原本也在缓存里(覆盖式 rename),其 inode 需要先做
|
||||
* unlink 清理(与 zvfs_unlink_path 逻辑相同)。
|
||||
*/
|
||||
static void
|
||||
zvfs_rename_paths(const char *oldabs, const char *newabs)
|
||||
{
|
||||
/* 处理 newpath 被覆盖的情况 */
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
struct zvfs_path_entry *victim = path_cache_lookup(newabs);
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
|
||||
if (victim) {
|
||||
/*
|
||||
* newpath 是 zvfs 文件且已经在缓存里:
|
||||
* real_rename 已经把它从磁盘上删掉了,
|
||||
* 走和 unlink 一样的延迟/立即 blob_delete 逻辑。
|
||||
*/
|
||||
zvfs_unlink_path(newabs);
|
||||
}
|
||||
|
||||
/* 把 oldpath 的缓存条目 rename 到 newpath */
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
path_cache_rename(oldabs, newabs);
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* rename */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
rename(const char *oldpath, const char *newpath)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
int ret;
|
||||
|
||||
int old_is_zvfs = zvfs_is_zvfs_path(oldpath);
|
||||
int new_is_zvfs = zvfs_is_zvfs_path(newpath);
|
||||
|
||||
if (ZVFS_IN_HOOK() || (!old_is_zvfs && !new_is_zvfs)) {
|
||||
ret = real_rename(oldpath, newpath);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* 跨域 rename(一个在 /zvfs 一个不在):不支持,返回 EXDEV。
|
||||
* 和跨文件系统 rename 的语义一致。
|
||||
*/
|
||||
if (old_is_zvfs != new_is_zvfs) {
|
||||
errno = EXDEV;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
ret = real_rename(oldpath, newpath);
|
||||
if (ret == 0)
|
||||
zvfs_rename_paths(oldpath, newpath);
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* renameat */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
renameat(int olddirfd, const char *oldpath,
|
||||
int newdirfd, const char *newpath)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
int ret;
|
||||
|
||||
char oldabs[PATH_MAX], newabs[PATH_MAX];
|
||||
|
||||
if (zvfs_resolve_atpath(olddirfd, oldpath, oldabs, sizeof(oldabs)) < 0 ||
|
||||
zvfs_resolve_atpath(newdirfd, newpath, newabs, sizeof(newabs)) < 0) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
int old_is_zvfs = zvfs_is_zvfs_path(oldabs);
|
||||
int new_is_zvfs = zvfs_is_zvfs_path(newabs);
|
||||
|
||||
if (ZVFS_IN_HOOK() || (!old_is_zvfs && !new_is_zvfs)) {
|
||||
ret = real_renameat(olddirfd, oldpath, newdirfd, newpath);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (old_is_zvfs != new_is_zvfs) {
|
||||
errno = EXDEV;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
ret = real_renameat(olddirfd, oldpath, newdirfd, newpath);
|
||||
if (ret == 0)
|
||||
zvfs_rename_paths(oldabs, newabs);
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
32
src/hook/zvfs_hook_dir.h
Normal file
32
src/hook/zvfs_hook_dir.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef __ZVFS_HOOK_DIR_H__
|
||||
#define __ZVFS_HOOK_DIR_H__
|
||||
|
||||
#include <fcntl.h>
|
||||
|
||||
/*
|
||||
* 目录操作 hook。
|
||||
*
|
||||
* mkdir / rmdir / opendir / readdir / getdents64 全部透传,不 hook。
|
||||
* 只需要感知路径变化的操作才进这里:
|
||||
*
|
||||
* unlink / unlinkat
|
||||
* - 真实文件由 real_unlink 删除
|
||||
* - 若路径在 path_cache 中:
|
||||
* 若 ref_count == 0:blob_delete + inode_remove + path_cache_remove
|
||||
* 若 ref_count > 0:标记 inode->deleted = true,
|
||||
* ref_count 归零时(close 路径)再 blob_delete
|
||||
*
|
||||
* rename / renameat / renameat2
|
||||
* - 真实文件由 real_rename* 移动(xattr 跟随文件,不需要重写)
|
||||
* - path_cache_rename 更新内存缓存
|
||||
* - renameat2 RENAME_EXCHANGE 返回 ENOTSUP
|
||||
*/
|
||||
|
||||
int unlink(const char *path);
|
||||
int unlinkat(int dirfd, const char *path, int flags);
|
||||
|
||||
int rename(const char *oldpath, const char *newpath);
|
||||
int renameat(int olddirfd, const char *oldpath,
|
||||
int newdirfd, const char *newpath);
|
||||
|
||||
#endif // __ZVFS_HOOK_DIR_H__
|
||||
230
src/hook/zvfs_hook_fcntl.c
Normal file
230
src/hook/zvfs_hook_fcntl.c
Normal file
@@ -0,0 +1,230 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include "zvfs_hook_fcntl.h"
|
||||
#include "zvfs_hook_init.h"
|
||||
#include "zvfs_hook_fd.h" /* dup/dup2 路径 */
|
||||
#include "zvfs_hook_reentrant.h"
|
||||
#include "fs/zvfs.h"
|
||||
#include "fs/zvfs_open_file.h"
|
||||
#include "fs/zvfs_inode.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 内部:fcntl 核心逻辑(已确认是 zvfs fd) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
static int
|
||||
zvfs_fcntl_impl(int fd, int cmd, va_list ap)
|
||||
{
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
struct zvfs_open_file *of = openfile_lookup(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
|
||||
if (!of) { errno = EBADF; return -1; }
|
||||
|
||||
switch (cmd) {
|
||||
|
||||
/* ---- 文件状态 flags ------------------------------------------ */
|
||||
case F_GETFL:
|
||||
return of->flags;
|
||||
|
||||
case F_SETFL: {
|
||||
int newfl = va_arg(ap, int);
|
||||
/*
|
||||
* 只允许修改可变位:O_APPEND、O_NONBLOCK、O_ASYNC。
|
||||
* O_RDONLY / O_WRONLY / O_RDWR 是 open 时决定的,不能改。
|
||||
* 同步给真实 fd,保持内核状态一致(影响 real_read/write)。
|
||||
*/
|
||||
int mutable_mask = O_APPEND | O_NONBLOCK | O_ASYNC;
|
||||
of->flags = (of->flags & ~mutable_mask) | (newfl & mutable_mask);
|
||||
/*
|
||||
* 也透传给真实 fd——虽然真实 fd 上的读写被我们拦截了,
|
||||
* 但 O_NONBLOCK 可能影响 pipe / socket 等透传路径。
|
||||
*/
|
||||
real_fcntl(fd, F_SETFL, of->flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ---- fd flags(FD_CLOEXEC)----------------------------------- */
|
||||
case F_GETFD:
|
||||
return of->fd_flags;
|
||||
|
||||
case F_SETFD: {
|
||||
int fdfl = va_arg(ap, int);
|
||||
of->fd_flags = fdfl;
|
||||
/* 同步给真实 fd */
|
||||
real_fcntl(fd, F_SETFD, fdfl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ---- dup 类 -------------------------------------------------- */
|
||||
case F_DUPFD:
|
||||
case F_DUPFD_CLOEXEC: {
|
||||
(void)va_arg(ap, int);
|
||||
errno = ENOTSUP;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* ---- 文件锁(不实现,假装无锁)-------------------------------- */
|
||||
case F_GETLK: {
|
||||
struct flock *fl = va_arg(ap, struct flock *);
|
||||
if (!fl) { errno = EFAULT; return -1; }
|
||||
fl->l_type = F_UNLCK; /* 假装没有任何锁 */
|
||||
return 0;
|
||||
}
|
||||
|
||||
case F_SETLK:
|
||||
case F_SETLKW:
|
||||
(void)va_arg(ap, struct flock *);
|
||||
return 0; /* 假装加锁成功 */
|
||||
|
||||
/* ---- 其他 cmd:透传给内核(同时维护真实 fd 状态)-------------- */
|
||||
default: {
|
||||
/*
|
||||
* 取出可变参数作为 void* 透传。
|
||||
* 大多数 fcntl cmd 的第三个参数是 long 或指针,
|
||||
* 用 void* 接收足够覆盖所有平台(64-bit)。
|
||||
*/
|
||||
void *arg = va_arg(ap, void *);
|
||||
return real_fcntl(fd, cmd, arg);
|
||||
}
|
||||
|
||||
} /* switch */
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* fcntl */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
fcntl(int fd, int cmd, ...)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
va_list ap;
|
||||
va_start(ap, cmd);
|
||||
|
||||
int ret;
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_fd(fd)) {
|
||||
/*
|
||||
* 非 zvfs fd:透传。
|
||||
* va_list 转发需要用 vfprintf 风格,但 fcntl 没有标准的
|
||||
* va_list 版本。用 void* 提取第三参数再透传。
|
||||
*/
|
||||
void *arg = va_arg(ap, void *);
|
||||
ret = real_fcntl(fd, cmd, arg);
|
||||
va_end(ap);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
ret = zvfs_fcntl_impl(fd, cmd, ap);
|
||||
va_end(ap);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
fcntl64(int fd, int cmd, ...)
|
||||
{
|
||||
/*
|
||||
* fcntl64 是 glibc 在 32-bit 系统上的 large-file 变体,
|
||||
* 语义与 fcntl 相同,直接转发。
|
||||
*/
|
||||
va_list ap;
|
||||
va_start(ap, cmd);
|
||||
void *arg = va_arg(ap, void *);
|
||||
va_end(ap);
|
||||
|
||||
ZVFS_HOOK_ENTER();
|
||||
int ret;
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_fd(fd)) {
|
||||
ret = real_fcntl64 ? real_fcntl64(fd, cmd, arg)
|
||||
: real_fcntl(fd, cmd, arg);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
va_list ap2;
|
||||
va_start(ap2, cmd);
|
||||
ret = zvfs_fcntl_impl(fd, cmd, ap2);
|
||||
va_end(ap2);
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* ioctl */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
ioctl(int fd, unsigned long request, ...)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
va_list ap;
|
||||
va_start(ap, request);
|
||||
void *arg = va_arg(ap, void *);
|
||||
va_end(ap);
|
||||
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_fd(fd)) {
|
||||
int ret = real_ioctl(fd, request, arg);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
int ret = -1;
|
||||
|
||||
switch (request) {
|
||||
|
||||
case FIONREAD: {
|
||||
/*
|
||||
* 返回当前可读字节数 = logical_size - cur_offset。
|
||||
* 结果写入 arg(int*)。
|
||||
*/
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
struct zvfs_open_file *of = openfile_lookup(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
|
||||
if (!of) { errno = EBADF; ret = -1; break; }
|
||||
|
||||
pthread_mutex_lock(&of->inode->mu);
|
||||
uint64_t size = of->inode->logical_size;
|
||||
pthread_mutex_unlock(&of->inode->mu);
|
||||
|
||||
uint64_t off = of->offset;
|
||||
int avail = (off < size) ? (int)(size - off) : 0;
|
||||
if (arg) *(int *)arg = avail;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
/*
|
||||
* 其他 ioctl:zvfs 文件不是块设备/字符设备,
|
||||
* 绝大多数 ioctl 语义不适用,返回 ENOTTY。
|
||||
* 若将来需要支持特定 ioctl 在此扩展。
|
||||
*/
|
||||
errno = ENOTTY;
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
27
src/hook/zvfs_hook_fcntl.h
Normal file
27
src/hook/zvfs_hook_fcntl.h
Normal file
@@ -0,0 +1,27 @@
|
||||
#ifndef __ZVFS_HOOK_FCNTL_H__
|
||||
#define __ZVFS_HOOK_FCNTL_H__
|
||||
|
||||
/*
|
||||
* fcntl cmd 处理策略:
|
||||
*
|
||||
* F_GETFL → 返回 of->flags
|
||||
* F_SETFL → 更新 of->flags(只允许改 O_APPEND / O_NONBLOCK)
|
||||
* F_GETFD → 返回 of->fd_flags
|
||||
* F_SETFD → 更新 of->fd_flags(FD_CLOEXEC)
|
||||
* F_DUPFD → 等价于 dup,分配 >= arg 的最小可用 fd,走 dup 路径
|
||||
* F_DUPFD_CLOEXEC → 同上,同时设 FD_CLOEXEC
|
||||
* F_GETLK → 不实现文件锁,返回 l_type = F_UNLCK(假装没有锁)
|
||||
* F_SETLK → 直接返回 0(假装成功)
|
||||
* F_SETLKW → 直接返回 0(假装成功,不阻塞)
|
||||
* 其他 cmd → 透传给 real_fcntl(同时透传给内核,保持真实 fd 状态同步)
|
||||
*
|
||||
* ioctl cmd 处理策略:
|
||||
* FIONREAD → 返回 logical_size - cur_offset(可读字节数)
|
||||
* 其他 → 透传,或对 zvfs fd 返回 ENOTTY
|
||||
*/
|
||||
|
||||
int fcntl(int fd, int cmd, ...);
|
||||
int fcntl64(int fd, int cmd, ...);
|
||||
int ioctl(int fd, unsigned long request, ...);
|
||||
|
||||
#endif // __ZVFS_HOOK_FCNTL_H__
|
||||
549
src/hook/zvfs_hook_fd.c
Normal file
549
src/hook/zvfs_hook_fd.c
Normal file
@@ -0,0 +1,549 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include "zvfs_hook_fd.h"
|
||||
#include "zvfs_hook_init.h"
|
||||
#include "zvfs_hook_reentrant.h"
|
||||
#include "fs/zvfs.h"
|
||||
#include "fs/zvfs_inode.h"
|
||||
#include "fs/zvfs_path_entry.h"
|
||||
#include "fs/zvfs_open_file.h"
|
||||
#include "spdk_engine/io_engine.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <pthread.h>
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 内部:open 的核心逻辑(路径已解析为绝对路径) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/**
|
||||
* zvfs_open_impl - 对一个确认属于 zvfs 的绝对路径执行 open。
|
||||
*
|
||||
* real_fd:已经由 real_open* 打开的真实 fd(用于 xattr 读写 + ftruncate)。
|
||||
* flags :open 时传入的 flags。
|
||||
* mode :O_CREAT 时的权限。
|
||||
*
|
||||
* 成功返回 real_fd(即用户拿到的 fd),失败返回 -1(errno 已设置),
|
||||
* 失败时调用方负责 real_close(real_fd)。
|
||||
*/
|
||||
static int
|
||||
zvfs_open_impl(int real_fd, const char *abspath, int flags, mode_t mode)
|
||||
{
|
||||
struct zvfs_inode *inode = NULL;
|
||||
struct zvfs_blob_handle *handle = NULL;
|
||||
uint64_t blob_id = 0;
|
||||
|
||||
if (flags & O_CREAT) {
|
||||
/* ---- 创建路径 -------------------------------------------- */
|
||||
|
||||
/* 1. 创建 blob */
|
||||
handle = blob_create(0);
|
||||
if (!handle) { errno = EIO; goto fail; }
|
||||
blob_id = handle->id;
|
||||
|
||||
/* 2. 把 blob_id 写入真实文件的 xattr */
|
||||
if (zvfs_xattr_write_blob_id(real_fd, blob_id) < 0) goto fail;
|
||||
|
||||
/* 3. logical_size = 0,让 st_size 也为 0 */
|
||||
if (real_ftruncate(real_fd, 0) < 0) goto fail;
|
||||
|
||||
/* 4. 分配 inode */
|
||||
inode = inode_alloc(blob_id, mode ? mode : 0666, ZVFS_ITYPE_FILE);
|
||||
if (!inode) { errno = ENOMEM; goto fail; }
|
||||
|
||||
/* 5. 插入全局表 */
|
||||
pthread_mutex_lock(&g_fs.inode_mu);
|
||||
inode_insert(inode);
|
||||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||||
|
||||
/* 6. 插入 path_cache */
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
path_cache_insert(abspath, inode);
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
|
||||
} else {
|
||||
/* ---- 打开已有文件路径 ------------------------------------- */
|
||||
|
||||
/* 1. 先查 path_cache,命中说明另一个 fd 已经打开过 */
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
struct zvfs_path_entry *pe = path_cache_lookup(abspath);
|
||||
if (pe) inode = pe->inode;
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
|
||||
if (inode) {
|
||||
/* path_cache 命中:直接用缓存的 inode,重新 blob_open */
|
||||
blob_id = inode->blob_id;
|
||||
handle = blob_open(blob_id);
|
||||
if (!handle) { errno = EIO; goto fail; }
|
||||
/* 共享 inode,增加引用 */
|
||||
atomic_fetch_add(&inode->ref_count, 1);
|
||||
|
||||
} else {
|
||||
/* 未命中:从 xattr 读 blob_id,可能是进程首次 open */
|
||||
if (zvfs_xattr_read_blob_id(real_fd, &blob_id) < 0) {
|
||||
/* xattr 不存在:不是 zvfs 管理的文件,降级透传 */
|
||||
return real_fd; /* 直接返回,不做任何包装 */
|
||||
}
|
||||
|
||||
/* 再查 inode_table(另一个 fd 可能已经 open 但路径未缓存)*/
|
||||
pthread_mutex_lock(&g_fs.inode_mu);
|
||||
inode = inode_lookup(blob_id);
|
||||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||||
|
||||
if (inode) {
|
||||
atomic_fetch_add(&inode->ref_count, 1);
|
||||
} else {
|
||||
/* 全新 inode:需从真实文件 stat 获取 mode/size */
|
||||
struct stat st;
|
||||
if (real_fstat(real_fd, &st) < 0) goto fail;
|
||||
|
||||
inode = inode_alloc(blob_id, st.st_mode, ZVFS_ITYPE_FILE);
|
||||
if (!inode) { errno = ENOMEM; goto fail; }
|
||||
inode->logical_size = (uint64_t)st.st_size;
|
||||
|
||||
pthread_mutex_lock(&g_fs.inode_mu);
|
||||
inode_insert(inode);
|
||||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||||
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
path_cache_insert(abspath, inode);
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
}
|
||||
|
||||
handle = blob_open(blob_id);
|
||||
if (!handle) { errno = EIO; goto fail; }
|
||||
}
|
||||
}
|
||||
|
||||
/* ---- 分配 openfile,插入 fd_table ---------------------------- */
|
||||
struct zvfs_open_file *of = openfile_alloc(real_fd, inode, flags, handle);
|
||||
if (!of) { errno = ENOMEM; goto fail_handle; }
|
||||
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
openfile_insert(of);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
|
||||
return real_fd;
|
||||
|
||||
fail_handle:
|
||||
blob_close(handle);
|
||||
fail:
|
||||
/* inode 若刚分配(ref_count==1)需要回滚 */
|
||||
if (inode && atomic_load(&inode->ref_count) == 1) {
|
||||
pthread_mutex_lock(&g_fs.inode_mu);
|
||||
inode_remove(inode->blob_id);
|
||||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
path_cache_remove(abspath);
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
inode_free(inode);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* open */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
open(const char *path, int flags, ...)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap;
|
||||
va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
int ret;
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_path(path)) {
|
||||
ret = real_open(path, flags, mode);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/* 先让真实 FS 创建 / 打开文件(获得 real_fd) */
|
||||
int real_fd = real_open(path, flags, mode);
|
||||
if (real_fd < 0) { ZVFS_HOOK_LEAVE(); return -1; }
|
||||
|
||||
ret = zvfs_open_impl(real_fd, path, flags, mode);
|
||||
if (ret < 0) {
|
||||
int saved = errno;
|
||||
real_close(real_fd);
|
||||
errno = saved;
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int open64(const char *path, int flags, ...)
|
||||
{
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap; va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
return open(path, flags | O_LARGEFILE, mode);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* openat */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
openat(int dirfd, const char *path, int flags, ...)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap; va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
/* 解析绝对路径判断是否属于 zvfs */
|
||||
char abspath[PATH_MAX];
|
||||
if (zvfs_resolve_atpath(dirfd, path, abspath, sizeof(abspath)) < 0) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
int ret;
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_path(abspath)) {
|
||||
ret = real_openat(dirfd, path, flags, mode);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
int real_fd = real_openat(dirfd, path, flags, mode);
|
||||
if (real_fd < 0) { ZVFS_HOOK_LEAVE(); return -1; }
|
||||
|
||||
ret = zvfs_open_impl(real_fd, abspath, flags, mode);
|
||||
if (ret < 0) {
|
||||
int saved = errno;
|
||||
real_close(real_fd);
|
||||
errno = saved;
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int openat64(int dirfd, const char *path, int flags, ...)
|
||||
{
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap; va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
return openat(dirfd, path, flags | O_LARGEFILE, mode);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* creat */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int creat(const char *path, mode_t mode)
|
||||
{
|
||||
return open(path, O_CREAT | O_WRONLY | O_TRUNC, mode);
|
||||
}
|
||||
|
||||
int creat64(const char *path, mode_t mode)
|
||||
{
|
||||
return open(path, O_CREAT | O_WRONLY | O_TRUNC | O_LARGEFILE, mode);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* glibc 别名 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int __open(const char *path, int flags, ...)
|
||||
{
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap; va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
return open(path, flags, mode);
|
||||
}
|
||||
|
||||
int __open64(const char *path, int flags, ...)
|
||||
{
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap; va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
return open64(path, flags, mode);
|
||||
}
|
||||
|
||||
int __libc_open(const char *path, int flags, ...)
|
||||
{
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap; va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
return open(path, flags, mode);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* close */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/*
|
||||
* zvfs_close_impl - zvfs fd 的关闭逻辑。
|
||||
*
|
||||
* 调用方已持有 fd_mu。函数内部会释放 fd_mu 后再处理 inode。
|
||||
*/
|
||||
static int
|
||||
zvfs_close_impl(int fd)
|
||||
{
|
||||
/* 持 fd_mu 取出 openfile,从表里摘除 */
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
struct zvfs_open_file *of = openfile_lookup(fd);
|
||||
if (!of) {
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
int new_ref = atomic_fetch_sub(&of->ref_count, 1) - 1;
|
||||
if (new_ref == 0)
|
||||
openfile_remove(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
|
||||
if (new_ref > 0) {
|
||||
/*
|
||||
* 还有其他 dup 出来的 fd 引用同一个 openfile,
|
||||
* 只关闭真实 fd,不动 blob 和 inode。
|
||||
*/
|
||||
return real_close(fd);
|
||||
}
|
||||
|
||||
/* ---- openfile 引用归零:关闭 blob handle --------------------- */
|
||||
struct zvfs_inode *inode = of->inode;
|
||||
struct zvfs_blob_handle *handle = of->handle;
|
||||
openfile_free(of);
|
||||
|
||||
blob_close(handle);
|
||||
|
||||
/* ---- inode ref_count-- --------------------------------------- */
|
||||
int inode_ref = atomic_fetch_sub(&inode->ref_count, 1) - 1;
|
||||
|
||||
if (inode_ref == 0) {
|
||||
/*
|
||||
* 最后一个 fd 关闭了这个 inode。
|
||||
* 若 deleted:执行延迟 blob_delete。
|
||||
*/
|
||||
bool do_delete = false;
|
||||
pthread_mutex_lock(&inode->mu);
|
||||
do_delete = inode->deleted;
|
||||
pthread_mutex_unlock(&inode->mu);
|
||||
|
||||
if (do_delete)
|
||||
blob_delete(inode->blob_id);
|
||||
|
||||
pthread_mutex_lock(&g_fs.inode_mu);
|
||||
inode_remove(inode->blob_id);
|
||||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||||
|
||||
/* path_cache 在 unlink 时已经摘除(deleted=true 路径)
|
||||
* 或在此处还需摘除(正常关闭最后一个 fd)*/
|
||||
if (!do_delete) {
|
||||
/* 正常关闭:path 留着,只有 inode 的引用归零时清缓存 */
|
||||
/* 注意:path_cache 里的指针指向这个即将释放的 inode,
|
||||
* 所以必须把 path_cache 条目也清掉,否则成为悬空指针 */
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
/* 遍历找到所有指向这个 inode 的 path entry 并移除
|
||||
* (一个 inode 对应一个 path,hardlink 暂不支持)*/
|
||||
struct zvfs_path_entry *pe, *tmp; (void)tmp;
|
||||
HASH_ITER(hh, g_fs.path_cache, pe, tmp) {
|
||||
if (pe->inode == inode) {
|
||||
HASH_DEL(g_fs.path_cache, pe);
|
||||
free(pe->path);
|
||||
free(pe);
|
||||
break; /* 一对一关系,找到即退 */
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
}
|
||||
|
||||
inode_free(inode);
|
||||
}
|
||||
|
||||
return real_close(fd);
|
||||
}
|
||||
|
||||
int
|
||||
close(int fd)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
int ret;
|
||||
int is_zvfs_fd = (!ZVFS_IN_HOOK() && zvfs_is_zvfs_fd(fd));
|
||||
if (!is_zvfs_fd) {
|
||||
ret = real_close(fd);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
ret = zvfs_close_impl(fd);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __close(int fd) { return close(fd); }
|
||||
int __libc_close(int fd) { return close(fd); }
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* close_range */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
close_range(unsigned int first, unsigned int last, int flags)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
if (ZVFS_IN_HOOK()) {
|
||||
int ret = real_close_range ? real_close_range(first, last, flags)
|
||||
: (errno = ENOSYS, -1);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* 遍历范围内所有 fd,zvfs fd 单独走 zvfs_close_impl,
|
||||
* 其余统一交给 real_close_range(如果内核支持)。
|
||||
* 若内核不支持 close_range(< 5.9),逐个 close。
|
||||
*/
|
||||
int any_err = 0;
|
||||
int inited = 0;
|
||||
for (unsigned int fd = first; fd <= last; fd++) {
|
||||
if (zvfs_is_zvfs_fd((int)fd)) {
|
||||
if (!inited) {
|
||||
zvfs_ensure_init();
|
||||
inited = 1;
|
||||
}
|
||||
if (zvfs_close_impl((int)fd) < 0) any_err = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* 让内核处理剩余非 zvfs fd(CLOEXEC 等 flags 也在这里生效) */
|
||||
if (real_close_range) {
|
||||
if (real_close_range(first, last, flags) < 0 && !any_err)
|
||||
any_err = 1;
|
||||
} else {
|
||||
/* 降级:逐个 close 非 zvfs fd */
|
||||
for (unsigned int fd = first; fd <= last; fd++) {
|
||||
if (!zvfs_is_zvfs_fd((int)fd))
|
||||
real_close((int)fd);
|
||||
}
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return any_err ? -1 : 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* dup */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
dup(int oldfd)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
int is_zvfs_fd = (!ZVFS_IN_HOOK() && zvfs_is_zvfs_fd(oldfd));
|
||||
if (!is_zvfs_fd) {
|
||||
int ret = real_dup(oldfd);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* 当前版本不支持在 zvfs fd 上做 dup。
|
||||
* 先明确返回 ENOTSUP,避免暴露错误的 offset 语义。
|
||||
*/
|
||||
zvfs_ensure_init();
|
||||
errno = ENOTSUP;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* dup2 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
dup2(int oldfd, int newfd)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
int is_zvfs_fd = (!ZVFS_IN_HOOK() && zvfs_is_zvfs_fd(oldfd));
|
||||
if (!is_zvfs_fd) {
|
||||
int ret = real_dup2(oldfd, newfd);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* POSIX 兼容:dup2(oldfd, oldfd) 对合法 fd 直接返回 oldfd。 */
|
||||
if (oldfd == newfd) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return oldfd;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
errno = ENOTSUP;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* dup3 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
dup3(int oldfd, int newfd, int flags)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
int is_zvfs_fd = (!ZVFS_IN_HOOK() && zvfs_is_zvfs_fd(oldfd));
|
||||
if (!is_zvfs_fd) {
|
||||
int ret = real_dup3(oldfd, newfd, flags);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (oldfd == newfd) {
|
||||
errno = EINVAL;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
errno = ENOTSUP;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
51
src/hook/zvfs_hook_fd.h
Normal file
51
src/hook/zvfs_hook_fd.h
Normal file
@@ -0,0 +1,51 @@
|
||||
#ifndef __ZVFS_HOOK_FD_H__
|
||||
#define __ZVFS_HOOK_FD_H__
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
/**
|
||||
* open / creat:
|
||||
* zvfs 路径 + O_CREAT → blob_create + xattr_write + inode_alloc + openfile_alloc
|
||||
* zvfs 路径,无 O_CREAT → xattr_read_blob_id + blob_open + inode_alloc(若未缓存) + openfile_alloc
|
||||
* 非 zvfs 路径 → 透传
|
||||
*
|
||||
* close:
|
||||
* zvfs fd → openfile ref_count--
|
||||
* 归零:blob_close;若 inode->deleted,blob_delete + inode_free
|
||||
* inode ref_count--(归零:path_cache_remove + inode_free)
|
||||
* real_close
|
||||
* 非 zvfs fd → 透传
|
||||
*
|
||||
* dup / dup2 / dup3:
|
||||
* zvfs fd → 新 fd 插入 fd_table,openfile.ref_count++(共享同一 openfile),
|
||||
* real_dup* 同步执行(内核也要知道这个 fd)
|
||||
* 非 zvfs fd → 透传
|
||||
*/
|
||||
|
||||
/* open 族 */
|
||||
int open(const char *path, int flags, ...);
|
||||
int open64(const char *path, int flags, ...);
|
||||
int openat(int dirfd, const char *path, int flags, ...);
|
||||
int openat64(int dirfd, const char *path, int flags, ...);
|
||||
int creat(const char *path, mode_t mode);
|
||||
int creat64(const char *path, mode_t mode);
|
||||
|
||||
/* close 族 */
|
||||
int close(int fd);
|
||||
int close_range(unsigned int first, unsigned int last, int flags);
|
||||
|
||||
/* dup 族 */
|
||||
int dup(int oldfd);
|
||||
int dup2(int oldfd, int newfd);
|
||||
int dup3(int oldfd, int newfd, int flags);
|
||||
|
||||
/* glibc 内部别名(与 open/close 实现体共享逻辑,转发即可) */
|
||||
int __open(const char *path, int flags, ...);
|
||||
int __open64(const char *path, int flags, ...);
|
||||
int __libc_open(const char *path, int flags, ...);
|
||||
int __close(int fd);
|
||||
int __libc_close(int fd);
|
||||
|
||||
#endif // __ZVFS_HOOK_FD_H__
|
||||
298
src/hook/zvfs_hook_init.c
Normal file
298
src/hook/zvfs_hook_init.c
Normal file
@@ -0,0 +1,298 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include "zvfs_hook_init.h"
|
||||
#include "zvfs_hook_reentrant.h"
|
||||
#include "fs/zvfs.h"
|
||||
#include "fs/zvfs_open_file.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <pthread.h>
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 线程局部重入计数定义 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
__thread int _zvfs_hook_depth = 0;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* zvfs 挂载点 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
#define ZVFS_MOUNT_PREFIX "/zvfs"
|
||||
#define ZVFS_MOUNT_PREFIX_LEN 5 /* strlen("/zvfs") */
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* real_* 函数指针定义 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/* open / close / dup */
|
||||
int (*real_open)(const char *, int, ...) = NULL;
|
||||
int (*real_open64)(const char *, int, ...) = NULL;
|
||||
int (*real_openat)(int, const char *, int, ...) = NULL;
|
||||
int (*real_openat64)(int, const char *, int, ...) = NULL;
|
||||
int (*real_creat)(const char *, mode_t) = NULL;
|
||||
int (*real_creat64)(const char *, mode_t) = NULL;
|
||||
int (*real_close)(int) = NULL;
|
||||
int (*real_close_range)(unsigned, unsigned, unsigned) = NULL;
|
||||
int (*real_dup)(int) = NULL;
|
||||
int (*real_dup2)(int, int) = NULL;
|
||||
int (*real_dup3)(int, int, int) = NULL;
|
||||
|
||||
/* read */
|
||||
ssize_t (*real_read)(int, void *, size_t) = NULL;
|
||||
ssize_t (*real_pread)(int, void *, size_t, off_t) = NULL;
|
||||
ssize_t (*real_pread64)(int, void *, size_t, off_t) = NULL;
|
||||
ssize_t (*real_readv)(int, const struct iovec *, int) = NULL;
|
||||
ssize_t (*real_preadv)(int, const struct iovec *, int, off_t) = NULL;
|
||||
ssize_t (*real_preadv64)(int, const struct iovec *, int, off_t) = NULL;
|
||||
ssize_t (*real_preadv2)(int, const struct iovec *, int, off_t, int) = NULL;
|
||||
|
||||
/* write */
|
||||
ssize_t (*real_write)(int, const void *, size_t) = NULL;
|
||||
ssize_t (*real_pwrite)(int, const void *, size_t, off_t) = NULL;
|
||||
ssize_t (*real_pwrite64)(int, const void *, size_t, off_t) = NULL;
|
||||
ssize_t (*real_writev)(int, const struct iovec *, int) = NULL;
|
||||
ssize_t (*real_pwritev)(int, const struct iovec *, int, off_t) = NULL;
|
||||
ssize_t (*real_pwritev64)(int, const struct iovec *, int, off_t) = NULL;
|
||||
ssize_t (*real_pwritev2)(int, const struct iovec *, int, off_t, int) = NULL;
|
||||
|
||||
/* lseek / truncate / fallocate */
|
||||
off_t (*real_lseek)(int, off_t, int) = NULL;
|
||||
off_t (*real_lseek64)(int, off_t, int) = NULL;
|
||||
int (*real_truncate)(const char *, off_t) = NULL;
|
||||
int (*real_truncate64)(const char *, off_t) = NULL;
|
||||
int (*real_ftruncate)(int, off_t) = NULL;
|
||||
int (*real_ftruncate64)(int, off_t) = NULL;
|
||||
int (*real_fallocate)(int, int, off_t, off_t) = NULL;
|
||||
int (*real_posix_fallocate)(int, off_t, off_t) = NULL;
|
||||
|
||||
/* stat */
|
||||
int (*real_stat)(const char *, struct stat *) = NULL;
|
||||
int (*real_stat64)(const char *, struct stat64 *) = NULL;
|
||||
int (*real_fstat)(int, struct stat *) = NULL;
|
||||
int (*real_fstat64)(int, struct stat64 *) = NULL;
|
||||
int (*real_lstat)(const char *, struct stat *) = NULL;
|
||||
int (*real_lstat64)(const char *, struct stat64 *) = NULL;
|
||||
int (*real_fstatat)(int, const char *, struct stat *, int) = NULL;
|
||||
int (*real_fstatat64)(int, const char *, struct stat64 *, int) = NULL;
|
||||
int (*real_statx)(int, const char *, int, unsigned int,
|
||||
struct statx *) = NULL;
|
||||
|
||||
/* sync */
|
||||
int (*real_fsync)(int) = NULL;
|
||||
int (*real_fdatasync)(int) = NULL;
|
||||
int (*real_sync_file_range)(int, off_t, off_t, unsigned int) = NULL;
|
||||
|
||||
/* fcntl / ioctl */
|
||||
int (*real_fcntl)(int, int, ...) = NULL;
|
||||
int (*real_fcntl64)(int, int, ...) = NULL;
|
||||
int (*real_ioctl)(int, unsigned long, ...) = NULL;
|
||||
|
||||
/* 目录 */
|
||||
int (*real_unlink)(const char *) = NULL;
|
||||
int (*real_unlinkat)(int, const char *, int) = NULL;
|
||||
int (*real_rename)(const char *, const char *) = NULL;
|
||||
int (*real_renameat)(int, const char *, int, const char *) = NULL;
|
||||
int (*real_renameat2)(int, const char *, int, const char *,
|
||||
unsigned int) = NULL;
|
||||
|
||||
/* mmap */
|
||||
void *(*real_mmap)(void *, size_t, int, int, int, off_t) = NULL;
|
||||
void *(*real_mmap64)(void *, size_t, int, int, int, off_t) = NULL;
|
||||
int (*real_munmap)(void *, size_t) = NULL;
|
||||
int (*real_msync)(void *, size_t, int) = NULL;
|
||||
|
||||
/* fork */
|
||||
pid_t (*real_fork)(void) = NULL;
|
||||
pid_t (*real_vfork)(void) = NULL;
|
||||
|
||||
/* glibc 别名 */
|
||||
int (*real___open)(const char *, int, ...) = NULL;
|
||||
int (*real___open64)(const char *, int, ...) = NULL;
|
||||
int (*real___libc_open)(const char *, int, ...) = NULL;
|
||||
ssize_t (*real___read)(int, void *, size_t) = NULL;
|
||||
ssize_t (*real___libc_read)(int, void *, size_t) = NULL;
|
||||
ssize_t (*real___write)(int, const void *, size_t) = NULL;
|
||||
ssize_t (*real___libc_write)(int, const void *, size_t) = NULL;
|
||||
int (*real___close)(int) = NULL;
|
||||
int (*real___libc_close)(int) = NULL;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* dlsym 辅助宏 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/*
|
||||
* 找不到符号时不 fatal:部分 glibc 内部别名在某些发行版上可能不存在,
|
||||
* 置 NULL 后 hook 函数里做 NULL 检查再回退即可。
|
||||
*/
|
||||
#define LOAD_SYM(var, name) \
|
||||
do { \
|
||||
(var) = dlsym(RTLD_NEXT, (name)); \
|
||||
if (!(var)) \
|
||||
fprintf(stderr, "[zvfs] WARNING: dlsym(%s) = NULL\n", (name)); \
|
||||
} while (0)
|
||||
|
||||
#define LOAD_SYM_OPTIONAL(var, name) \
|
||||
do { (var) = dlsym(RTLD_NEXT, (name)); } while (0)
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 初始化 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
__attribute__((constructor))
|
||||
void zvfs_hook_init(void)
|
||||
{
|
||||
/* 必须存在的符号 */
|
||||
LOAD_SYM(real_open, "open");
|
||||
LOAD_SYM(real_open64, "open64");
|
||||
LOAD_SYM(real_openat, "openat");
|
||||
LOAD_SYM(real_openat64, "openat64");
|
||||
LOAD_SYM(real_creat, "creat");
|
||||
LOAD_SYM(real_creat64, "creat64");
|
||||
LOAD_SYM(real_close, "close");
|
||||
LOAD_SYM(real_dup, "dup");
|
||||
LOAD_SYM(real_dup2, "dup2");
|
||||
LOAD_SYM(real_dup3, "dup3");
|
||||
|
||||
LOAD_SYM(real_read, "read");
|
||||
LOAD_SYM(real_pread, "pread");
|
||||
LOAD_SYM(real_pread64, "pread64");
|
||||
LOAD_SYM(real_readv, "readv");
|
||||
LOAD_SYM(real_preadv, "preadv");
|
||||
LOAD_SYM(real_preadv64, "preadv64");
|
||||
LOAD_SYM(real_write, "write");
|
||||
LOAD_SYM(real_pwrite, "pwrite");
|
||||
LOAD_SYM(real_pwrite64, "pwrite64");
|
||||
LOAD_SYM(real_writev, "writev");
|
||||
LOAD_SYM(real_pwritev, "pwritev");
|
||||
LOAD_SYM(real_pwritev64, "pwritev64");
|
||||
|
||||
LOAD_SYM(real_lseek, "lseek");
|
||||
LOAD_SYM(real_lseek64, "lseek64");
|
||||
LOAD_SYM(real_truncate, "truncate");
|
||||
LOAD_SYM(real_truncate64, "truncate64");
|
||||
LOAD_SYM(real_ftruncate, "ftruncate");
|
||||
LOAD_SYM(real_ftruncate64, "ftruncate64");
|
||||
LOAD_SYM(real_fallocate, "fallocate");
|
||||
LOAD_SYM(real_posix_fallocate,"posix_fallocate");
|
||||
|
||||
LOAD_SYM(real_stat, "stat");
|
||||
LOAD_SYM(real_stat64, "stat64");
|
||||
LOAD_SYM(real_fstat, "fstat");
|
||||
LOAD_SYM(real_fstat64, "fstat64");
|
||||
LOAD_SYM(real_lstat, "lstat");
|
||||
LOAD_SYM(real_lstat64, "lstat64");
|
||||
LOAD_SYM(real_fstatat, "fstatat");
|
||||
LOAD_SYM(real_fstatat64, "fstatat64");
|
||||
LOAD_SYM(real_fsync, "fsync");
|
||||
LOAD_SYM(real_fdatasync, "fdatasync");
|
||||
LOAD_SYM(real_fcntl, "fcntl");
|
||||
LOAD_SYM(real_fcntl64, "fcntl64");
|
||||
LOAD_SYM(real_ioctl, "ioctl");
|
||||
|
||||
LOAD_SYM(real_unlink, "unlink");
|
||||
LOAD_SYM(real_unlinkat, "unlinkat");
|
||||
LOAD_SYM(real_rename, "rename");
|
||||
LOAD_SYM(real_renameat, "renameat");
|
||||
LOAD_SYM(real_mmap, "mmap");
|
||||
LOAD_SYM(real_mmap64, "mmap64");
|
||||
LOAD_SYM(real_munmap, "munmap");
|
||||
LOAD_SYM(real_msync, "msync");
|
||||
LOAD_SYM(real_fork, "fork");
|
||||
LOAD_SYM(real_vfork, "vfork");
|
||||
|
||||
/* 可选符号:glibc 内部别名,不一定存在 */
|
||||
LOAD_SYM_OPTIONAL(real_close_range, "close_range");
|
||||
LOAD_SYM_OPTIONAL(real_preadv2, "preadv2");
|
||||
LOAD_SYM_OPTIONAL(real_pwritev2, "pwritev2");
|
||||
LOAD_SYM_OPTIONAL(real_statx, "statx");
|
||||
LOAD_SYM_OPTIONAL(real_sync_file_range,"sync_file_range");
|
||||
LOAD_SYM_OPTIONAL(real_renameat2, "renameat2");
|
||||
LOAD_SYM_OPTIONAL(real___open, "__open");
|
||||
LOAD_SYM_OPTIONAL(real___open64, "__open64");
|
||||
LOAD_SYM_OPTIONAL(real___libc_open, "__libc_open");
|
||||
LOAD_SYM_OPTIONAL(real___read, "__read");
|
||||
LOAD_SYM_OPTIONAL(real___libc_read, "__libc_read");
|
||||
LOAD_SYM_OPTIONAL(real___write, "__write");
|
||||
LOAD_SYM_OPTIONAL(real___libc_write, "__libc_write");
|
||||
LOAD_SYM_OPTIONAL(real___close, "__close");
|
||||
LOAD_SYM_OPTIONAL(real___libc_close, "__libc_close");
|
||||
|
||||
/* 初始化全局 fs 结构 */
|
||||
zvfs_fs_init();
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 路径 / fd 判断 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
zvfs_is_zvfs_path(const char *path)
|
||||
{
|
||||
if (!path)
|
||||
return 0;
|
||||
/* 路径必须以 /zvfs 开头,且后一个字符是 '/' 或 '\0' */
|
||||
if (strncmp(path, ZVFS_MOUNT_PREFIX, ZVFS_MOUNT_PREFIX_LEN) != 0)
|
||||
return 0;
|
||||
char next = path[ZVFS_MOUNT_PREFIX_LEN];
|
||||
return (next == '/' || next == '\0');
|
||||
}
|
||||
|
||||
int
|
||||
zvfs_is_zvfs_fd(int fd)
|
||||
{
|
||||
if (fd < 0)
|
||||
return 0;
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
struct zvfs_open_file *of = openfile_lookup(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
return (of != NULL);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* dirfd + 相对路径 → 绝对路径 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
zvfs_resolve_atpath(int dirfd, const char *path, char *buf, size_t bufsz)
|
||||
{
|
||||
/* 绝对路径:直接拷贝 */
|
||||
if (path && path[0] == '/') {
|
||||
if (strlen(path) >= bufsz) {
|
||||
errno = ENAMETOOLONG;
|
||||
return -1;
|
||||
}
|
||||
strncpy(buf, path, bufsz);
|
||||
buf[bufsz - 1] = '\0';
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* AT_FDCWD:以当前工作目录为基准 */
|
||||
if (dirfd == AT_FDCWD) {
|
||||
if (!getcwd(buf, bufsz)) return -1;
|
||||
} else {
|
||||
/* 通过 /proc/self/fd/<dirfd> 读出目录的绝对路径 */
|
||||
char proc_path[64];
|
||||
snprintf(proc_path, sizeof(proc_path), "/proc/self/fd/%d", dirfd);
|
||||
ssize_t len = readlink(proc_path, buf, bufsz - 1);
|
||||
if (len < 0) return -1;
|
||||
buf[len] = '\0';
|
||||
}
|
||||
|
||||
/* 拼接 path */
|
||||
size_t dir_len = strlen(buf);
|
||||
size_t path_len = path ? strlen(path) : 0;
|
||||
if (dir_len + 1 + path_len >= bufsz) {
|
||||
errno = ENAMETOOLONG;
|
||||
return -1;
|
||||
}
|
||||
if (path_len > 0) {
|
||||
buf[dir_len] = '/';
|
||||
memcpy(buf + dir_len + 1, path, path_len + 1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
130
src/hook/zvfs_hook_init.h
Normal file
130
src/hook/zvfs_hook_init.h
Normal file
@@ -0,0 +1,130 @@
|
||||
#ifndef __ZVFS_HOOK_INIT_H__
|
||||
#define __ZVFS_HOOK_INIT_H__
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/uio.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
#include "fs/zvfs_sys_init.h"
|
||||
|
||||
/*
|
||||
* 所有原始函数指针集中在这里。
|
||||
* 命名规则:real_<funcname>
|
||||
* 通过 dlsym(RTLD_NEXT, "funcname") 在 __attribute__((constructor)) 中初始化。
|
||||
*/
|
||||
|
||||
/* open 族 */
|
||||
extern int (*real_open)(const char *path, int flags, ...);
|
||||
extern int (*real_open64)(const char *path, int flags, ...);
|
||||
extern int (*real_openat)(int dirfd, const char *path, int flags, ...);
|
||||
extern int (*real_openat64)(int dirfd, const char *path, int flags, ...);
|
||||
extern int (*real_creat)(const char *path, mode_t mode);
|
||||
extern int (*real_creat64)(const char *path, mode_t mode);
|
||||
|
||||
/* close 族 */
|
||||
extern int (*real_close)(int fd);
|
||||
extern int (*real_close_range)(unsigned int first, unsigned int last, unsigned int flags);
|
||||
|
||||
/* dup 族 */
|
||||
extern int (*real_dup)(int oldfd);
|
||||
extern int (*real_dup2)(int oldfd, int newfd);
|
||||
extern int (*real_dup3)(int oldfd, int newfd, int flags);
|
||||
|
||||
/* read 族 */
|
||||
extern ssize_t (*real_read)(int fd, void *buf, size_t count);
|
||||
extern ssize_t (*real_pread)(int fd, void *buf, size_t count, off_t offset);
|
||||
extern ssize_t (*real_pread64)(int fd, void *buf, size_t count, off64_t offset);
|
||||
extern ssize_t (*real_readv)(int fd, const struct iovec *iov, int iovcnt);
|
||||
extern ssize_t (*real_preadv)(int fd, const struct iovec *iov, int iovcnt, off_t offset);
|
||||
extern ssize_t (*real_preadv64)(int fd, const struct iovec *iov, int iovcnt, off64_t offset);
|
||||
extern ssize_t (*real_preadv2)(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
|
||||
|
||||
/* write 族 */
|
||||
extern ssize_t (*real_write)(int fd, const void *buf, size_t count);
|
||||
extern ssize_t (*real_pwrite)(int fd, const void *buf, size_t count, off_t offset);
|
||||
extern ssize_t (*real_pwrite64)(int fd, const void *buf, size_t count, off64_t offset);
|
||||
extern ssize_t (*real_writev)(int fd, const struct iovec *iov, int iovcnt);
|
||||
extern ssize_t (*real_pwritev)(int fd, const struct iovec *iov, int iovcnt, off_t offset);
|
||||
extern ssize_t (*real_pwritev64)(int fd, const struct iovec *iov, int iovcnt, off64_t offset);
|
||||
extern ssize_t (*real_pwritev2)(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
|
||||
|
||||
/* lseek */
|
||||
extern off_t (*real_lseek)(int fd, off_t offset, int whence);
|
||||
extern off64_t (*real_lseek64)(int fd, off64_t offset, int whence);
|
||||
|
||||
/* truncate / fallocate */
|
||||
extern int (*real_truncate)(const char *path, off_t length);
|
||||
extern int (*real_truncate64)(const char *path, off64_t length);
|
||||
extern int (*real_ftruncate)(int fd, off_t length);
|
||||
extern int (*real_ftruncate64)(int fd, off64_t length);
|
||||
extern int (*real_fallocate)(int fd, int mode, off_t offset, off_t len);
|
||||
extern int (*real_posix_fallocate)(int fd, off_t offset, off_t len);
|
||||
|
||||
/* stat 族 */
|
||||
extern int (*real_stat)(const char *path, struct stat *buf);
|
||||
extern int (*real_stat64)(const char *path, struct stat64 *buf);
|
||||
extern int (*real_fstat)(int fd, struct stat *buf);
|
||||
extern int (*real_fstat64)(int fd, struct stat64 *buf);
|
||||
extern int (*real_lstat)(const char *path, struct stat *buf);
|
||||
extern int (*real_lstat64)(const char *path, struct stat64 *buf);
|
||||
extern int (*real_fstatat)(int dirfd, const char *path, struct stat *buf, int flags);
|
||||
extern int (*real_fstatat64)(int dirfd, const char *path, struct stat64 *buf, int flags);
|
||||
extern int (*real_statx)(int dirfd, const char *path, int flags,
|
||||
unsigned int mask, struct statx *buf);
|
||||
|
||||
/* sync */
|
||||
extern int (*real_fsync)(int fd);
|
||||
extern int (*real_fdatasync)(int fd);
|
||||
extern int (*real_sync_file_range)(int fd, off64_t offset, off64_t nbytes, unsigned int flags);
|
||||
|
||||
/* fcntl / ioctl */
|
||||
extern int (*real_fcntl)(int fd, int cmd, ...);
|
||||
extern int (*real_fcntl64)(int fd, int cmd, ...);
|
||||
extern int (*real_ioctl)(int fd, unsigned long request, ...);
|
||||
|
||||
/* 目录感知 */
|
||||
extern int (*real_unlink)(const char *path);
|
||||
extern int (*real_unlinkat)(int dirfd, const char *path, int flags);
|
||||
extern int (*real_rename)(const char *oldpath, const char *newpath);
|
||||
extern int (*real_renameat)(int olddirfd, const char *oldpath,
|
||||
int newdirfd, const char *newpath);
|
||||
extern int (*real_renameat2)(int olddirfd, const char *oldpath,
|
||||
int newdirfd, const char *newpath,
|
||||
unsigned int flags);
|
||||
|
||||
/* mmap 族(预留) */
|
||||
extern void *(*real_mmap)(void *addr, size_t length, int prot, int flags,
|
||||
int fd, off_t offset);
|
||||
extern void *(*real_mmap64)(void *addr, size_t length, int prot, int flags,
|
||||
int fd, off64_t offset);
|
||||
extern int (*real_munmap)(void *addr, size_t length);
|
||||
extern int (*real_msync)(void *addr, size_t length, int flags);
|
||||
|
||||
|
||||
/* glibc 内部别名 */
|
||||
extern int (*real___open)(const char *path, int flags, ...);
|
||||
extern int (*real___open64)(const char *path, int flags, ...);
|
||||
extern int (*real___libc_open)(const char *path, int flags, ...);
|
||||
extern ssize_t (*real___read)(int fd, void *buf, size_t count);
|
||||
extern ssize_t (*real___libc_read)(int fd, void *buf, size_t count);
|
||||
extern ssize_t (*real___write)(int fd, const void *buf, size_t count);
|
||||
extern ssize_t (*real___libc_write)(int fd, const void *buf, size_t count);
|
||||
extern int (*real___close)(int fd);
|
||||
extern int (*real___libc_close)(int fd);
|
||||
|
||||
/* 初始化所有 real_* 指针,在 constructor 中调用 */
|
||||
void zvfs_hook_init(void);
|
||||
|
||||
/* 判断路径 / fd 是否属于 zvfs 接管范围 */
|
||||
int zvfs_is_zvfs_path(const char *path);
|
||||
int zvfs_is_zvfs_fd(int fd);
|
||||
|
||||
/*
|
||||
* 将 dirfd + 相对路径解析为绝对路径,写入 buf(长度 bufsz)。
|
||||
* dirfd == AT_FDCWD 时等价于以当前工作目录为基准。
|
||||
* 成功返回 0,失败返回 -1 并设置 errno。
|
||||
*/
|
||||
int zvfs_resolve_atpath(int dirfd, const char *path, char *buf, size_t bufsz);
|
||||
#endif // __ZVFS_HOOK_INIT_H__
|
||||
85
src/hook/zvfs_hook_mmap.c
Normal file
85
src/hook/zvfs_hook_mmap.c
Normal file
@@ -0,0 +1,85 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include "zvfs_hook_mmap.h"
|
||||
#include "zvfs_hook_init.h"
|
||||
#include "zvfs_hook_reentrant.h"
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* mmap / mmap64 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
void *
|
||||
mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
void *ret;
|
||||
|
||||
/*
|
||||
* MAP_ANONYMOUS:不关联任何 fd,直接透传。
|
||||
* 非 zvfs fd:直接透传。
|
||||
* zvfs fd:返回 ENOTSUP。
|
||||
*/
|
||||
if (ZVFS_IN_HOOK() || (flags & MAP_ANONYMOUS) || !zvfs_is_zvfs_fd(fd)) {
|
||||
ret = real_mmap(addr, length, prot, flags, fd, offset);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/* zvfs fd:当前不支持 mmap */
|
||||
errno = ENOTSUP;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
void *
|
||||
mmap64(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
|
||||
{
|
||||
/*
|
||||
* mmap64 在 64-bit 系统上与 mmap 等价(off_t 已经是 64-bit)。
|
||||
* 直接转发。
|
||||
*/
|
||||
return mmap(addr, length, prot, flags, fd, offset);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* munmap */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
munmap(void *addr, size_t length)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
/*
|
||||
* zvfs 的 mmap 不会成功,所以这里不会有 zvfs 映射需要处理。
|
||||
* 直接透传。
|
||||
*
|
||||
* future:查 mmap_table,命中则 blob_write 写回再透传。
|
||||
*/
|
||||
int r = real_munmap(addr, length);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* msync */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
msync(void *addr, size_t length, int flags)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
/*
|
||||
* 同 munmap:当前无 zvfs 映射,直接透传。
|
||||
*
|
||||
* future:查 mmap_table,命中则 blob_write 对应范围。
|
||||
*/
|
||||
int r = real_msync(addr, length, flags);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
34
src/hook/zvfs_hook_mmap.h
Normal file
34
src/hook/zvfs_hook_mmap.h
Normal file
@@ -0,0 +1,34 @@
|
||||
#ifndef __ZVFS_HOOK_MMAP_H__
|
||||
#define __ZVFS_HOOK_MMAP_H__
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/*
|
||||
* mmap 族。
|
||||
*
|
||||
* 当前策略:
|
||||
* - fd 属于 zvfs → 返回 ENOTSUP,强制上层走非 mmap 路径
|
||||
* (RocksDB: options.use_mmap_reads/writes = false)
|
||||
* - fd 不属于 zvfs,或 MAP_ANONYMOUS → 透传 real_mmap
|
||||
*
|
||||
* munmap / msync:
|
||||
* zvfs fd 的 mmap 不会成功,所以 munmap/msync 里永远找不到
|
||||
* zvfs 的映射,直接透传即可。
|
||||
*
|
||||
* 预留扩展点(future):
|
||||
* 实现时在此处:
|
||||
* mmap → MAP_ANONYMOUS 分配匿名内存 + blob_read 填充
|
||||
* 将 (addr, length, inode, file_offset) 插入 mmap_table
|
||||
* munmap → 查 mmap_table,若命中则 blob_write 写回,再真正 munmap
|
||||
* msync → 查 mmap_table,blob_write 对应范围
|
||||
*/
|
||||
|
||||
void *mmap(void *addr, size_t length, int prot, int flags,
|
||||
int fd, off_t offset);
|
||||
void *mmap64(void *addr, size_t length, int prot, int flags,
|
||||
int fd, off_t offset);
|
||||
int munmap(void *addr, size_t length);
|
||||
int msync(void *addr, size_t length, int flags);
|
||||
|
||||
#endif // __ZVFS_HOOK_MMAP_H__
|
||||
32
src/hook/zvfs_hook_reentrant.h
Normal file
32
src/hook/zvfs_hook_reentrant.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef __ZVFS_HOOK_REENTRANT_H__
|
||||
#define __ZVFS_HOOK_REENTRANT_H__
|
||||
|
||||
/*
|
||||
* 线程局部重入深度计数。
|
||||
*
|
||||
* 进入任何 hook 函数时 ZVFS_HOOK_ENTER(),离开时 ZVFS_HOOK_LEAVE()。
|
||||
* 当深度 > 1 时,说明当前调用是 hook 内部发起的(例如 hook 内调用了
|
||||
* real_fstat,而 fstat 本身也被 hook),此时直接走 real_* 绕过 zvfs 逻辑。
|
||||
*
|
||||
* 典型骨架:
|
||||
*
|
||||
* int fstat(int fd, struct stat *buf)
|
||||
* {
|
||||
* ZVFS_HOOK_ENTER();
|
||||
* int ret;
|
||||
* if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_fd(fd))
|
||||
* ret = real_fstat(fd, buf);
|
||||
* else
|
||||
* ret = zvfs_fstat_impl(fd, buf);
|
||||
* ZVFS_HOOK_LEAVE();
|
||||
* return ret;
|
||||
* }
|
||||
*/
|
||||
|
||||
extern __thread int _zvfs_hook_depth;
|
||||
|
||||
#define ZVFS_HOOK_ENTER() (++_zvfs_hook_depth)
|
||||
#define ZVFS_HOOK_LEAVE() (--_zvfs_hook_depth)
|
||||
#define ZVFS_IN_HOOK() (_zvfs_hook_depth > 1)
|
||||
|
||||
#endif // __ZVFS_HOOK_REENTRANT_H__
|
||||
549
src/hook/zvfs_hook_rw.c
Normal file
549
src/hook/zvfs_hook_rw.c
Normal file
@@ -0,0 +1,549 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include "zvfs_hook_rw.h"
|
||||
#include "zvfs_hook_init.h"
|
||||
#include "zvfs_hook_reentrant.h"
|
||||
#include "fs/zvfs.h"
|
||||
#include "fs/zvfs_open_file.h"
|
||||
#include "fs/zvfs_inode.h"
|
||||
#include "spdk_engine/io_engine.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 内部:单段 pread / pwrite(不修改 of->offset) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/*
|
||||
* zvfs_pread_impl
|
||||
*
|
||||
* 从 blob 的 [offset, offset+count) 读取数据到 buf。
|
||||
* 若请求范围超出 logical_size,截断到 logical_size 边界。
|
||||
* 成功返回实际读取字节数,失败返回 -1。
|
||||
*/
|
||||
static ssize_t
|
||||
zvfs_pread_impl(struct zvfs_open_file *of,
|
||||
void *buf, size_t count, uint64_t offset)
|
||||
{
|
||||
/* 持 inode->mu 读 logical_size,防止并发 write 同时修改 */
|
||||
pthread_mutex_lock(&of->inode->mu);
|
||||
uint64_t size = of->inode->logical_size;
|
||||
pthread_mutex_unlock(&of->inode->mu);
|
||||
|
||||
/* offset 超出文件末尾:返回 0(EOF) */
|
||||
if (offset >= size)
|
||||
return 0;
|
||||
|
||||
/* 截断读取长度到文件末尾 */
|
||||
if (offset + count > size)
|
||||
count = (size_t)(size - offset);
|
||||
|
||||
if (count == 0)
|
||||
return 0;
|
||||
|
||||
if (blob_read(of->handle, offset, buf, count) < 0) {
|
||||
errno = EIO;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return (ssize_t)count;
|
||||
}
|
||||
|
||||
/*
|
||||
* zvfs_pwrite_impl
|
||||
*
|
||||
* 将 buf 的 count 字节写入 blob 的 offset 处。
|
||||
* 若写入后末尾超过 logical_size,更新 logical_size 并同步 st_size。
|
||||
* 成功返回 count,失败返回 -1。
|
||||
*/
|
||||
static ssize_t
|
||||
zvfs_pwrite_impl(struct zvfs_open_file *of,
|
||||
const void *buf, size_t count, uint64_t offset)
|
||||
{
|
||||
if (count == 0)
|
||||
return 0;
|
||||
|
||||
uint64_t end = offset + count;
|
||||
|
||||
/*
|
||||
* 若写入范围超出 blob 当前物理大小,先 resize。
|
||||
* blob_resize 是 SPDK 侧的操作(可能分配新 cluster)。
|
||||
*/
|
||||
pthread_mutex_lock(&of->inode->mu);
|
||||
uint64_t old_size = of->inode->logical_size;
|
||||
pthread_mutex_unlock(&of->inode->mu);
|
||||
|
||||
if (end > old_size) {
|
||||
if (blob_resize(of->handle, end) < 0) {
|
||||
errno = EIO;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (blob_write(of->handle, offset, buf, count) < 0) {
|
||||
errno = EIO;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* 更新 logical_size(持锁,inode_update_size 负责 ftruncate) */
|
||||
if (end > old_size) {
|
||||
pthread_mutex_lock(&of->inode->mu);
|
||||
if (end > of->inode->logical_size) /* double-check */
|
||||
inode_update_size(of->inode, of->fd, end);
|
||||
pthread_mutex_unlock(&of->inode->mu);
|
||||
}
|
||||
|
||||
return (ssize_t)count;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 内部:iov 合并辅助 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/*
|
||||
* iov_total_len - 计算 iovec 总字节数。
|
||||
*/
|
||||
static size_t
|
||||
iov_total_len(const struct iovec *iov, int iovcnt)
|
||||
{
|
||||
size_t total = 0;
|
||||
for (int i = 0; i < iovcnt; i++)
|
||||
total += iov[i].iov_len;
|
||||
return total;
|
||||
}
|
||||
|
||||
/*
|
||||
* zvfs_iov_pread
|
||||
*
|
||||
* 将 iovec 合并为单次 blob_read:
|
||||
* 1. 一次 blob_read 读到临时 buf
|
||||
* 2. 按 iovec 顺序分发到各段
|
||||
*
|
||||
* 单次 SPDK I/O 比逐段提交效率高得多;
|
||||
* 堆分配代价(通常几个 page)远小于多次 SPDK 提交的开销。
|
||||
*/
|
||||
static ssize_t
|
||||
zvfs_iov_pread(struct zvfs_open_file *of,
|
||||
const struct iovec *iov, int iovcnt, uint64_t offset)
|
||||
{
|
||||
size_t total_len = iov_total_len(iov, iovcnt);
|
||||
if (total_len == 0) return 0;
|
||||
|
||||
/* 截断到文件末尾 */
|
||||
pthread_mutex_lock(&of->inode->mu);
|
||||
uint64_t size = of->inode->logical_size;
|
||||
pthread_mutex_unlock(&of->inode->mu);
|
||||
|
||||
if (offset >= size) return 0;
|
||||
if (offset + total_len > size)
|
||||
total_len = (size_t)(size - offset);
|
||||
|
||||
/* 分配临时 buf,单次读 */
|
||||
char *tmp = malloc(total_len);
|
||||
if (!tmp) { errno = ENOMEM; return -1; }
|
||||
|
||||
if (blob_read(of->handle, offset, tmp, total_len) < 0) {
|
||||
free(tmp);
|
||||
errno = EIO;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* 分发到各 iovec 段 */
|
||||
size_t copied = 0;
|
||||
for (int i = 0; i < iovcnt && copied < total_len; i++) {
|
||||
size_t seg = iov[i].iov_len;
|
||||
if (seg == 0) continue;
|
||||
if (copied + seg > total_len) seg = total_len - copied;
|
||||
memcpy(iov[i].iov_base, tmp + copied, seg);
|
||||
copied += seg;
|
||||
}
|
||||
|
||||
free(tmp);
|
||||
return (ssize_t)total_len;
|
||||
}
|
||||
|
||||
/*
|
||||
* zvfs_iov_pwrite
|
||||
*
|
||||
* 将 iovec 合并为单次 blob_write:
|
||||
* 1. 分配临时 buf,按 iovec 顺序 memcpy 拼接
|
||||
* 2. 单次 blob_write + 一次 inode_update_size
|
||||
*
|
||||
* 避免多次 SPDK 提交和多次 ftruncate。
|
||||
*/
|
||||
static ssize_t
|
||||
zvfs_iov_pwrite(struct zvfs_open_file *of,
|
||||
const struct iovec *iov, int iovcnt, uint64_t offset)
|
||||
{
|
||||
size_t total_len = iov_total_len(iov, iovcnt);
|
||||
if (total_len == 0) return 0;
|
||||
|
||||
/* 拼接到临时 buf */
|
||||
char *tmp = malloc(total_len);
|
||||
if (!tmp) { errno = ENOMEM; return -1; }
|
||||
|
||||
size_t pos = 0;
|
||||
for (int i = 0; i < iovcnt; i++) {
|
||||
if (iov[i].iov_len == 0) continue;
|
||||
memcpy(tmp + pos, iov[i].iov_base, iov[i].iov_len);
|
||||
pos += iov[i].iov_len;
|
||||
}
|
||||
|
||||
/* 单次写入 */
|
||||
ssize_t r = zvfs_pwrite_impl(of, tmp, total_len, offset);
|
||||
free(tmp);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 内部:取出 of,处理重入/非 zvfs 判断 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
static inline struct zvfs_open_file *
|
||||
get_of(int fd)
|
||||
{
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
struct zvfs_open_file *of = openfile_lookup(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
return of;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* read */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
ssize_t
|
||||
read(int fd, void *buf, size_t count)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of;
|
||||
if (ZVFS_IN_HOOK() || !(of = get_of(fd))) {
|
||||
ssize_t r = real_read(fd, buf, count);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
ssize_t r = zvfs_pread_impl(of, buf, count, of->offset);
|
||||
if (r > 0)
|
||||
of->offset += (uint64_t)r;
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
ssize_t __read(int fd, void *buf, size_t count) { return read(fd, buf, count); }
|
||||
ssize_t __libc_read(int fd, void *buf, size_t count) { return read(fd, buf, count); }
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* pread / pread64 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
ssize_t
|
||||
pread(int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of;
|
||||
if (ZVFS_IN_HOOK() || !(of = get_of(fd))) {
|
||||
ssize_t r = real_pread(fd, buf, count, offset);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
ssize_t r = zvfs_pread_impl(of, buf, count, (uint64_t)offset);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
ssize_t pread64(int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
return pread(fd, buf, count, offset);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* readv / preadv / preadv64 / preadv2 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
ssize_t
|
||||
readv(int fd, const struct iovec *iov, int iovcnt)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of;
|
||||
if (ZVFS_IN_HOOK() || !(of = get_of(fd))) {
|
||||
ssize_t r = real_readv(fd, iov, iovcnt);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
ssize_t r = zvfs_iov_pread(of, iov, iovcnt, of->offset);
|
||||
if (r > 0)
|
||||
of->offset += (uint64_t)r;
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
ssize_t
|
||||
preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of;
|
||||
if (ZVFS_IN_HOOK() || !(of = get_of(fd))) {
|
||||
ssize_t r = real_preadv(fd, iov, iovcnt, offset);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
ssize_t r = zvfs_iov_pread(of, iov, iovcnt, (uint64_t)offset);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
ssize_t preadv64(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
{
|
||||
return preadv(fd, iov, iovcnt, offset);
|
||||
}
|
||||
|
||||
ssize_t
|
||||
preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of;
|
||||
if (ZVFS_IN_HOOK() || !(of = get_of(fd))) {
|
||||
ssize_t r = real_preadv2
|
||||
? real_preadv2(fd, iov, iovcnt, offset, flags)
|
||||
: (errno = ENOSYS, (ssize_t)-1);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/*
|
||||
* RWF_NOWAIT:zvfs 无阻塞 I/O 概念,blob_read 总是同步返回,
|
||||
* 忽略该 flag,按普通 preadv 处理。
|
||||
* RWF_HIPRI / RWF_DSYNC / RWF_SYNC:同上,忽略。
|
||||
*/
|
||||
uint64_t off = (offset == (off_t)-1) ? of->offset : (uint64_t)offset;
|
||||
ssize_t r = zvfs_iov_pread(of, iov, iovcnt, off);
|
||||
if (offset == (off_t)-1 && r > 0)
|
||||
of->offset += (uint64_t)r;
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* write */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
ssize_t
|
||||
write(int fd, const void *buf, size_t count)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of;
|
||||
if (ZVFS_IN_HOOK() || !(of = get_of(fd))) {
|
||||
ssize_t r = real_write(fd, buf, count);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
uint64_t write_off;
|
||||
|
||||
if (of->flags & O_APPEND) {
|
||||
/*
|
||||
* O_APPEND:每次写入位置 = 当前 logical_size(原子操作)。
|
||||
* 持 inode->mu 保证 read-then-write 的原子性,
|
||||
* 防止两个 O_APPEND fd 并发写时覆盖彼此数据。
|
||||
*/
|
||||
/* --- O_APPEND 内联写 -------------------------------------- */
|
||||
pthread_mutex_lock(&of->inode->mu);
|
||||
write_off = of->inode->logical_size; /* 重新取,防止 TOCTOU */
|
||||
uint64_t end = write_off + count;
|
||||
|
||||
pthread_mutex_unlock(&of->inode->mu);
|
||||
|
||||
if (blob_resize(of->handle, end) < 0) {
|
||||
errno = EIO;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
if (blob_write(of->handle, write_off, buf, count) < 0) {
|
||||
errno = EIO;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&of->inode->mu);
|
||||
if (end > of->inode->logical_size)
|
||||
inode_update_size(of->inode, of->fd, end);
|
||||
pthread_mutex_unlock(&of->inode->mu);
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return (ssize_t)count;
|
||||
|
||||
} else {
|
||||
write_off = of->offset;
|
||||
ssize_t r = zvfs_pwrite_impl(of, buf, count, write_off);
|
||||
if (r > 0)
|
||||
of->offset += (uint64_t)r;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t __write(int fd, const void *buf, size_t count) { return write(fd, buf, count); }
|
||||
ssize_t __libc_write(int fd, const void *buf, size_t count) { return write(fd, buf, count); }
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* pwrite / pwrite64 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
ssize_t
|
||||
pwrite(int fd, const void *buf, size_t count, off_t offset)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of;
|
||||
if (ZVFS_IN_HOOK() || !(of = get_of(fd))) {
|
||||
ssize_t r = real_pwrite(fd, buf, count, offset);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/*
|
||||
* POSIX:pwrite 忽略 O_APPEND,始终写到指定 offset。
|
||||
*/
|
||||
ssize_t r = zvfs_pwrite_impl(of, buf, count, (uint64_t)offset);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
ssize_t pwrite64(int fd, const void *buf, size_t count, off_t offset)
|
||||
{
|
||||
return pwrite(fd, buf, count, offset);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* writev / pwritev / pwritev64 / pwritev2 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
ssize_t
|
||||
writev(int fd, const struct iovec *iov, int iovcnt)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of;
|
||||
if (ZVFS_IN_HOOK() || !(of = get_of(fd))) {
|
||||
ssize_t r = real_writev(fd, iov, iovcnt);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
ssize_t r;
|
||||
if (of->flags & O_APPEND) {
|
||||
/*
|
||||
* O_APPEND + writev:和 write 一样需要原子序列。
|
||||
* 先计算总字节数,用 iov_pwrite 完成,整个过程持 inode->mu。
|
||||
*/
|
||||
size_t total_len = 0;
|
||||
for (int i = 0; i < iovcnt; i++) total_len += iov[i].iov_len;
|
||||
|
||||
pthread_mutex_lock(&of->inode->mu);
|
||||
uint64_t write_off = of->inode->logical_size;
|
||||
uint64_t end = write_off + total_len;
|
||||
pthread_mutex_unlock(&of->inode->mu);
|
||||
|
||||
if (blob_resize(of->handle, end) < 0) { errno = EIO; ZVFS_HOOK_LEAVE(); return -1; }
|
||||
r = zvfs_iov_pwrite(of, iov, iovcnt, write_off);
|
||||
|
||||
if (r > 0) {
|
||||
pthread_mutex_lock(&of->inode->mu);
|
||||
uint64_t new_end = write_off + (uint64_t)r;
|
||||
if (new_end > of->inode->logical_size)
|
||||
inode_update_size(of->inode, of->fd, new_end);
|
||||
pthread_mutex_unlock(&of->inode->mu);
|
||||
}
|
||||
} else {
|
||||
r = zvfs_iov_pwrite(of, iov, iovcnt, of->offset);
|
||||
if (r > 0) of->offset += (uint64_t)r;
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
ssize_t
|
||||
pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of;
|
||||
if (ZVFS_IN_HOOK() || !(of = get_of(fd))) {
|
||||
ssize_t r = real_pwritev(fd, iov, iovcnt, offset);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
ssize_t r = zvfs_iov_pwrite(of, iov, iovcnt, (uint64_t)offset);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
ssize_t pwritev64(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
{
|
||||
return pwritev(fd, iov, iovcnt, offset);
|
||||
}
|
||||
|
||||
ssize_t
|
||||
pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of;
|
||||
if (ZVFS_IN_HOOK() || !(of = get_of(fd))) {
|
||||
ssize_t r = real_pwritev2
|
||||
? real_pwritev2(fd, iov, iovcnt, offset, flags)
|
||||
: (errno = ENOSYS, (ssize_t)-1);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/* flags(RWF_SYNC/RWF_DSYNC/RWF_APPEND 等):
|
||||
* zvfs 无缓冲区,所有写均同步落盘,忽略 flags。
|
||||
* offset == -1:使用并更新 of->offset。 */
|
||||
uint64_t off = (offset == (off_t)-1) ? of->offset : (uint64_t)offset;
|
||||
ssize_t r = zvfs_iov_pwrite(of, iov, iovcnt, off);
|
||||
if (offset == (off_t)-1 && r > 0)
|
||||
of->offset += (uint64_t)r;
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
52
src/hook/zvfs_hook_rw.h
Normal file
52
src/hook/zvfs_hook_rw.h
Normal file
@@ -0,0 +1,52 @@
|
||||
#ifndef __ZVFS_HOOK_RW_H__
|
||||
#define __ZVFS_HOOK_RW_H__
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/uio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/*
|
||||
* read / write 族。
|
||||
*
|
||||
* 所有变体最终收敛到两个内部实现:
|
||||
* zvfs_pread_impl (fd, buf, count, offset)
|
||||
* zvfs_pwrite_impl(fd, buf, count, offset)
|
||||
*
|
||||
* offset 语义:
|
||||
* - pread/pwrite 系列:直接使用传入 offset,不修改 of->offset
|
||||
* - read/write 系列:使用 of->offset,完成后更新
|
||||
* - O_APPEND write :每次写前持 inode->mu 取 logical_size 作为 offset
|
||||
*
|
||||
* iov 系列(readv/writev/preadv/pwritev):
|
||||
* 展开 iovec 后逐段调用 pread/pwrite impl,合并结果。
|
||||
* 这样不需要在 SPDK 层实现 scatter/gather,实现最简单。
|
||||
* 如果将来 SPDK 层支持 SGL 可以直接换掉这一层。
|
||||
*/
|
||||
|
||||
/* read 族 */
|
||||
ssize_t read(int fd, void *buf, size_t count);
|
||||
ssize_t pread(int fd, void *buf, size_t count, off_t offset);
|
||||
ssize_t pread64(int fd, void *buf, size_t count, off_t offset);
|
||||
ssize_t readv(int fd, const struct iovec *iov, int iovcnt);
|
||||
ssize_t preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset);
|
||||
ssize_t preadv64(int fd, const struct iovec *iov, int iovcnt, off_t offset);
|
||||
ssize_t preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset,
|
||||
int flags);
|
||||
|
||||
/* write 族 */
|
||||
ssize_t write(int fd, const void *buf, size_t count);
|
||||
ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset);
|
||||
ssize_t pwrite64(int fd, const void *buf, size_t count, off_t offset);
|
||||
ssize_t writev(int fd, const struct iovec *iov, int iovcnt);
|
||||
ssize_t pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset);
|
||||
ssize_t pwritev64(int fd, const struct iovec *iov, int iovcnt, off_t offset);
|
||||
ssize_t pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset,
|
||||
int flags);
|
||||
|
||||
/* glibc 别名 */
|
||||
ssize_t __read(int fd, void *buf, size_t count);
|
||||
ssize_t __libc_read(int fd, void *buf, size_t count);
|
||||
ssize_t __write(int fd, const void *buf, size_t count);
|
||||
ssize_t __libc_write(int fd, const void *buf, size_t count);
|
||||
|
||||
#endif // __ZVFS_HOOK_RW_H__
|
||||
301
src/hook/zvfs_hook_seek.c
Normal file
301
src/hook/zvfs_hook_seek.c
Normal file
@@ -0,0 +1,301 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include "zvfs_hook_seek.h"
|
||||
#include "zvfs_hook_init.h"
|
||||
#include "zvfs_hook_reentrant.h"
|
||||
#include "fs/zvfs.h"
|
||||
#include "fs/zvfs_open_file.h"
|
||||
#include "fs/zvfs_inode.h"
|
||||
#include "fs/zvfs_path_entry.h"
|
||||
#include "spdk_engine/io_engine.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/falloc.h> /* FALLOC_FL_* */
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* lseek / lseek64 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
off_t
|
||||
lseek(int fd, off_t offset, int whence)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of = NULL;
|
||||
if (!ZVFS_IN_HOOK()) {
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
of = openfile_lookup(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
}
|
||||
|
||||
if (!of) {
|
||||
off_t r = real_lseek(fd, offset, whence);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/*
|
||||
* O_APPEND fd 的 lseek:POSIX 允许 lseek,但下次 write 时
|
||||
* 仍会从文件末尾写。lseek 只影响 read 的位置。
|
||||
* 我们照常更新 of->offset。
|
||||
*/
|
||||
pthread_mutex_lock(&of->inode->mu); /* SEEK_END 需读 logical_size */
|
||||
uint64_t new_off = openfile_seek(of, (int64_t)offset, whence);
|
||||
pthread_mutex_unlock(&of->inode->mu);
|
||||
|
||||
if (new_off == (uint64_t)-1) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return (off_t)-1;
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return (off_t)new_off;
|
||||
}
|
||||
|
||||
off_t lseek64(int fd, off_t offset, int whence)
|
||||
{
|
||||
return lseek(fd, offset, whence);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 内部:按 inode 指针做 truncate(path / fd 路径共用) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
|
||||
/*
|
||||
* zvfs_truncate_by_inode - 对有 handle 的 openfile 做 truncate。
|
||||
* 找到任意一个打开该 inode 的 openfile 取其 handle。
|
||||
*/
|
||||
static int
|
||||
zvfs_truncate_inode_with_handle(struct zvfs_inode *inode,
|
||||
int real_fd, uint64_t new_size)
|
||||
{
|
||||
/* 在 fd_table 里找一个指向该 inode 的 openfile 取 handle */
|
||||
struct zvfs_blob_handle *handle = NULL;
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
struct zvfs_open_file *of, *tmp;
|
||||
HASH_ITER(hh, g_fs.fd_table, of, tmp) {
|
||||
(void)tmp;
|
||||
if (of->inode == inode) {
|
||||
handle = of->handle;
|
||||
break;
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
|
||||
pthread_mutex_lock(&inode->mu);
|
||||
uint64_t old_size = inode->logical_size;
|
||||
pthread_mutex_unlock(&inode->mu);
|
||||
|
||||
if (new_size != old_size && handle) {
|
||||
if (blob_resize(handle, new_size) < 0) {
|
||||
errno = EIO;
|
||||
return -1;
|
||||
}
|
||||
} else if (new_size != old_size && !handle) {
|
||||
/*
|
||||
* 文件未被打开:需要临时 blob_open。
|
||||
* 这种情况下 truncate(path, ...) 被调用但文件没有 fd。
|
||||
*/
|
||||
handle = blob_open(inode->blob_id);
|
||||
if (!handle) { errno = EIO; return -1; }
|
||||
int rc = blob_resize(handle, new_size);
|
||||
blob_close(handle);
|
||||
if (rc < 0) { errno = EIO; return -1; }
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&inode->mu);
|
||||
inode_update_size(inode, real_fd, new_size);
|
||||
pthread_mutex_unlock(&inode->mu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* ftruncate / ftruncate64 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
ftruncate(int fd, off_t length)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of = NULL;
|
||||
if (!ZVFS_IN_HOOK()) {
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
of = openfile_lookup(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
}
|
||||
|
||||
if (!of) {
|
||||
int r = real_ftruncate(fd, length);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
if (length < 0) { errno = EINVAL; ZVFS_HOOK_LEAVE(); return -1; }
|
||||
|
||||
int r = zvfs_truncate_inode_with_handle(of->inode, fd, (uint64_t)length);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
int ftruncate64(int fd, off_t length) { return ftruncate(fd, length); }
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* truncate / truncate64(按路径) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
truncate(const char *path, off_t length)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_path(path)) {
|
||||
int r = real_truncate(path, length);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
if (length < 0) { errno = EINVAL; ZVFS_HOOK_LEAVE(); return -1; }
|
||||
|
||||
/* 查 path_cache 拿 inode */
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
struct zvfs_path_entry *pe = path_cache_lookup(path);
|
||||
struct zvfs_inode *inode = pe ? pe->inode : NULL;
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
|
||||
if (!inode) {
|
||||
/*
|
||||
* inode 不在缓存:文件存在于 FS 但从未被 open。
|
||||
* 需要读 xattr 拿 blob_id,临时构建 inode。
|
||||
* 最简单的做法:先 real_open,再走 zvfs 路径,再 real_close。
|
||||
* 这里直接调 real_truncate 改 st_size,但 blob 不会被截断。
|
||||
*
|
||||
* 更正确的做法:open + ftruncate + close。
|
||||
* 调用方通常不会在 file 未被打开的情况下做 truncate,
|
||||
* 所以这里先报 ENOENT(找不到 zvfs inode)作为安全兜底。
|
||||
*/
|
||||
errno = ENOENT;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
int r = zvfs_truncate_inode_with_handle(inode, -1, (uint64_t)length);
|
||||
|
||||
/* 同步真实文件 st_size(real_truncate 更新磁盘元数据) */
|
||||
if (r == 0)
|
||||
real_truncate(path, length);
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
int truncate64(const char *path, off_t length) { return truncate(path, length); }
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* fallocate */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
fallocate(int fd, int mode, off_t offset, off_t len)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of = NULL;
|
||||
if (!ZVFS_IN_HOOK()) {
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
of = openfile_lookup(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
}
|
||||
|
||||
if (!of) {
|
||||
int r = real_fallocate(fd, mode, offset, len);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
if (offset < 0 || len <= 0) { errno = EINVAL; ZVFS_HOOK_LEAVE(); return -1; }
|
||||
|
||||
/* FALLOC_FL_PUNCH_HOLE:打孔,暂不支持 */
|
||||
if (mode & FALLOC_FL_PUNCH_HOLE) {
|
||||
errno = ENOTSUP;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* FALLOC_FL_KEEP_SIZE:预分配但不改变文件逻辑大小,直接返回 0 */
|
||||
if (mode & FALLOC_FL_KEEP_SIZE) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* 普通 fallocate(mode == 0):
|
||||
* 确保 [offset, offset+len) 范围内的空间被"分配"。
|
||||
* zvfs 的语义:把 logical_size 扩展到 max(logical_size, offset+len)。
|
||||
* 不提前 blob_resize,因为 SPDK cluster 按写入时分配更高效。
|
||||
*/
|
||||
uint64_t new_end = (uint64_t)offset + (uint64_t)len;
|
||||
|
||||
pthread_mutex_lock(&of->inode->mu);
|
||||
if (new_end > of->inode->logical_size)
|
||||
inode_update_size(of->inode, fd, new_end);
|
||||
pthread_mutex_unlock(&of->inode->mu);
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* posix_fallocate */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
posix_fallocate(int fd, off_t offset, off_t len)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of = NULL;
|
||||
if (!ZVFS_IN_HOOK()) {
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
of = openfile_lookup(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
}
|
||||
|
||||
if (!of) {
|
||||
int r = real_posix_fallocate(fd, offset, len);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/*
|
||||
* posix_fallocate 不接受 mode 参数,语义等价于 fallocate(fd, 0, ...)。
|
||||
* 注意:posix_fallocate 出错时返回错误码(正值),不设置 errno。
|
||||
*/
|
||||
if (offset < 0 || len <= 0) { ZVFS_HOOK_LEAVE(); return EINVAL; }
|
||||
|
||||
uint64_t new_end = (uint64_t)offset + (uint64_t)len;
|
||||
|
||||
pthread_mutex_lock(&of->inode->mu);
|
||||
if (new_end > of->inode->logical_size)
|
||||
inode_update_size(of->inode, fd, new_end);
|
||||
pthread_mutex_unlock(&of->inode->mu);
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
33
src/hook/zvfs_hook_seek.h
Normal file
33
src/hook/zvfs_hook_seek.h
Normal file
@@ -0,0 +1,33 @@
|
||||
#ifndef __ZVFS_HOOK_SEEK_H__
|
||||
#define __ZVFS_HOOK_SEEK_H__
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/*
|
||||
* lseek:更新 of->offset(非 O_APPEND fd)。
|
||||
*
|
||||
* truncate / ftruncate:
|
||||
* 更新 inode->logical_size,同步 st_size(ftruncate 到真实 fd),
|
||||
* 若 new_size < old_size,截断对 blob 的写入范围(blob_resize)。
|
||||
*
|
||||
* fallocate / posix_fallocate:
|
||||
* zvfs 无"空洞"概念,blob 按需增长。
|
||||
* 对 zvfs fd,fallocate 只更新 logical_size(预占逻辑空间),
|
||||
* 不调用 blob_resize(避免提前分配 SPDK cluster)。
|
||||
* FALLOC_FL_KEEP_SIZE 模式:不改 logical_size,直接返回 0。
|
||||
* FALLOC_FL_PUNCH_HOLE:暂不支持,返回 ENOTSUP。
|
||||
*/
|
||||
|
||||
off_t lseek(int fd, off_t offset, int whence);
|
||||
off_t lseek64(int fd, off_t offset, int whence);
|
||||
|
||||
int truncate(const char *path, off_t length);
|
||||
int truncate64(const char *path, off_t length);
|
||||
int ftruncate(int fd, off_t length);
|
||||
int ftruncate64(int fd, off_t length);
|
||||
|
||||
int fallocate(int fd, int mode, off_t offset, off_t len);
|
||||
int posix_fallocate(int fd, off_t offset, off_t len);
|
||||
|
||||
#endif // __ZVFS_HOOK_SEEK_H__
|
||||
404
src/hook/zvfs_hook_stat.c
Normal file
404
src/hook/zvfs_hook_stat.c
Normal file
@@ -0,0 +1,404 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include "zvfs_hook_stat.h"
|
||||
#include "zvfs_hook_init.h"
|
||||
#include "zvfs_hook_reentrant.h"
|
||||
#include "fs/zvfs.h"
|
||||
#include "fs/zvfs_inode.h"
|
||||
#include "fs/zvfs_open_file.h"
|
||||
#include "fs/zvfs_path_entry.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 内部:用 inode 覆盖 stat 结构体的 zvfs 相关字段 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
static void
|
||||
patch_stat(struct stat *st, struct zvfs_inode *inode)
|
||||
{
|
||||
pthread_mutex_lock(&inode->mu);
|
||||
st->st_size = (off_t)inode->logical_size;
|
||||
st->st_atime = inode->atime;
|
||||
st->st_mtime = inode->mtime;
|
||||
pthread_mutex_unlock(&inode->mu);
|
||||
|
||||
/*
|
||||
* st_blocks:以 512 字节为单位的"实际占用块数"。
|
||||
* zvfs 数据在 SPDK,真实文件几乎为空(只有 xattr),
|
||||
* 按 logical_size 估算,给上层一个合理的值。
|
||||
* (logical_size + 511) / 512 向上取整。
|
||||
*/
|
||||
st->st_blocks = (blkcnt_t)((st->st_size + 511) / 512);
|
||||
}
|
||||
|
||||
static void
|
||||
patch_stat64(struct stat64 *st, struct zvfs_inode *inode)
|
||||
{
|
||||
pthread_mutex_lock(&inode->mu);
|
||||
st->st_size = (off64_t)inode->logical_size;
|
||||
st->st_atime = inode->atime;
|
||||
st->st_mtime = inode->mtime;
|
||||
pthread_mutex_unlock(&inode->mu);
|
||||
|
||||
st->st_blocks = (blkcnt_t)((st->st_size + 511) / 512);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 内部:按路径找 inode(先查缓存,缓存未命中则检查 xattr) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/*
|
||||
* zvfs_inode_by_path
|
||||
*
|
||||
* 返回路径对应的 inode 指针(不增加 ref_count,调用方只读使用)。
|
||||
* 若路径不是 zvfs 文件(无 xattr)返回 NULL。
|
||||
*
|
||||
* 注意:返回的指针仅在持有 path_mu / inode_mu 之外使用时有效,
|
||||
* 调用方需在使用期间持有 inode->mu 或确保文件未被 close。
|
||||
* 对 stat 路径(只读 logical_size/atime/mtime),
|
||||
* 短暂持有 inode->mu 即可,无需长期持有。
|
||||
*/
|
||||
static struct zvfs_inode *
|
||||
zvfs_inode_by_path(const char *path)
|
||||
{
|
||||
/* 1. 先查 path_cache */
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
struct zvfs_path_entry *pe = path_cache_lookup(path);
|
||||
struct zvfs_inode *inode = pe ? pe->inode : NULL;
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
|
||||
if (inode)
|
||||
return inode;
|
||||
|
||||
/* 2. path_cache 未命中:检查 xattr 判断是否是 zvfs 文件 */
|
||||
uint64_t blob_id = 0;
|
||||
int tmp_fd = real_open(path, O_RDONLY);
|
||||
if (tmp_fd < 0)
|
||||
return NULL;
|
||||
|
||||
int has_xattr = (zvfs_xattr_read_blob_id(tmp_fd, &blob_id) == 0);
|
||||
real_close(tmp_fd);
|
||||
|
||||
if (!has_xattr)
|
||||
return NULL;
|
||||
|
||||
/* 3. 查 inode_table(文件被另一个 fd 打开过) */
|
||||
pthread_mutex_lock(&g_fs.inode_mu);
|
||||
inode = inode_lookup(blob_id);
|
||||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||||
|
||||
return inode; /* 可能仍为 NULL(从未打开过,纯 stat 调用) */
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* stat */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
stat(const char *path, struct stat *buf)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_path(path)) {
|
||||
int r = real_stat(path, buf);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/* 先透传,拿到完整 stat(mode、ino、dev、nlink 等) */
|
||||
if (real_stat(path, buf) < 0) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct zvfs_inode *inode = zvfs_inode_by_path(path);
|
||||
if (inode)
|
||||
patch_stat(buf, inode);
|
||||
/*
|
||||
* inode 为 NULL:文件存在于 FS 但从未被 zvfs open,
|
||||
* 此时 st_size 来自真实文件(接近 0),
|
||||
* 这是合理的降级行为(文件尚未被写入 SPDK)。
|
||||
*/
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
stat64(const char *path, struct stat64 *buf)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_path(path)) {
|
||||
int r = real_stat64(path, buf);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
if (real_stat64(path, buf) < 0) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct zvfs_inode *inode = zvfs_inode_by_path(path);
|
||||
if (inode)
|
||||
patch_stat64(buf, inode);
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* fstat(最高频,pg 每次 read 前都调) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
fstat(int fd, struct stat *buf)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
/* 先透传:拿到 mode/ino/dev/nlink/blksize 等 */
|
||||
if (real_fstat(fd, buf) < 0) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ZVFS_IN_HOOK()) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
struct zvfs_open_file *of = openfile_lookup(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
|
||||
if (of) {
|
||||
zvfs_ensure_init();
|
||||
patch_stat(buf, of->inode);
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
fstat64(int fd, struct stat64 *buf)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
if (real_fstat64(fd, buf) < 0) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ZVFS_IN_HOOK()) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
struct zvfs_open_file *of = openfile_lookup(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
|
||||
if (of) {
|
||||
zvfs_ensure_init();
|
||||
patch_stat64(buf, of->inode);
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* lstat(symlink 不穿透;zvfs 不用 symlink,逻辑与 stat 相同) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
lstat(const char *path, struct stat *buf)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_path(path)) {
|
||||
int r = real_lstat(path, buf);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
if (real_lstat(path, buf) < 0) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct zvfs_inode *inode = zvfs_inode_by_path(path);
|
||||
if (inode)
|
||||
patch_stat(buf, inode);
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
lstat64(const char *path, struct stat64 *buf)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_path(path)) {
|
||||
int r = real_lstat64(path, buf);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
if (real_lstat64(path, buf) < 0) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct zvfs_inode *inode = zvfs_inode_by_path(path);
|
||||
if (inode)
|
||||
patch_stat64(buf, inode);
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* fstatat / fstatat64 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
fstatat(int dirfd, const char *path, struct stat *buf, int flags)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
char abspath[PATH_MAX];
|
||||
int is_zvfs = 0;
|
||||
|
||||
if (!ZVFS_IN_HOOK()) {
|
||||
if (zvfs_resolve_atpath(dirfd, path, abspath, sizeof(abspath)) == 0)
|
||||
is_zvfs = zvfs_is_zvfs_path(abspath);
|
||||
}
|
||||
|
||||
if (real_fstatat(dirfd, path, buf, flags) < 0) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (is_zvfs) {
|
||||
zvfs_ensure_init();
|
||||
struct zvfs_inode *inode = zvfs_inode_by_path(abspath);
|
||||
if (inode)
|
||||
patch_stat(buf, inode);
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
fstatat64(int dirfd, const char *path, struct stat64 *buf, int flags)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
char abspath[PATH_MAX];
|
||||
int is_zvfs = 0;
|
||||
|
||||
if (!ZVFS_IN_HOOK()) {
|
||||
if (zvfs_resolve_atpath(dirfd, path, abspath, sizeof(abspath)) == 0)
|
||||
is_zvfs = zvfs_is_zvfs_path(abspath);
|
||||
}
|
||||
|
||||
if (real_fstatat64(dirfd, path, buf, flags) < 0) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (is_zvfs) {
|
||||
zvfs_ensure_init();
|
||||
struct zvfs_inode *inode = zvfs_inode_by_path(abspath);
|
||||
if (inode)
|
||||
patch_stat64(buf, inode);
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* statx */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
statx(int dirfd, const char *path, int flags,
|
||||
unsigned int mask, struct statx *buf)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
if (!real_statx) {
|
||||
errno = ENOSYS;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
char abspath[PATH_MAX];
|
||||
int is_zvfs = 0;
|
||||
|
||||
if (!ZVFS_IN_HOOK()) {
|
||||
if (zvfs_resolve_atpath(dirfd, path, abspath, sizeof(abspath)) == 0)
|
||||
is_zvfs = zvfs_is_zvfs_path(abspath);
|
||||
}
|
||||
|
||||
if (real_statx(dirfd, path, flags, mask, buf) < 0) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!is_zvfs) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/* statx 用 stx_mask 标记哪些字段有效,覆盖 size/atime/mtime */
|
||||
struct zvfs_inode *inode = zvfs_inode_by_path(abspath);
|
||||
if (inode) {
|
||||
pthread_mutex_lock(&inode->mu);
|
||||
|
||||
if (mask & STATX_SIZE) {
|
||||
buf->stx_size = inode->logical_size;
|
||||
buf->stx_mask |= STATX_SIZE;
|
||||
/* stx_blocks 以 512 字节为单位 */
|
||||
buf->stx_blocks = (inode->logical_size + 511) / 512;
|
||||
buf->stx_mask |= STATX_BLOCKS;
|
||||
}
|
||||
if (mask & STATX_ATIME) {
|
||||
buf->stx_atime.tv_sec = inode->atime;
|
||||
buf->stx_atime.tv_nsec = 0;
|
||||
buf->stx_mask |= STATX_ATIME;
|
||||
}
|
||||
if (mask & STATX_MTIME) {
|
||||
buf->stx_mtime.tv_sec = inode->mtime;
|
||||
buf->stx_mtime.tv_nsec = 0;
|
||||
buf->stx_mask |= STATX_MTIME;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&inode->mu);
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
35
src/hook/zvfs_hook_stat.h
Normal file
35
src/hook/zvfs_hook_stat.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#ifndef __ZVFS_HOOK_STAT_H__
|
||||
#define __ZVFS_HOOK_STAT_H__
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
/*
|
||||
* stat 族 hook。
|
||||
*
|
||||
* 核心策略:
|
||||
* 对 zvfs 文件,透传 real_stat* 获取大部分字段
|
||||
* (ino、dev、nlink、mode、uid、gid、blksize、blocks 等),
|
||||
* 只覆盖以下字段:
|
||||
* st_size ← inode->logical_size
|
||||
* st_atime ← inode->atime
|
||||
* st_mtime ← inode->mtime
|
||||
*
|
||||
* st_blocks 保持真实文件的值(接近 0,因为真实文件只有 xattr)。
|
||||
* 上层(postgres/rocksdb)用 st_size 判断文件大小,这是关键字段。
|
||||
*
|
||||
* 对非 zvfs 文件:完全透传。
|
||||
*/
|
||||
|
||||
int stat(const char *path, struct stat *buf);
|
||||
int stat64(const char *path, struct stat64 *buf);
|
||||
int fstat(int fd, struct stat *buf);
|
||||
int fstat64(int fd, struct stat64 *buf);
|
||||
int lstat(const char *path, struct stat *buf);
|
||||
int lstat64(const char *path, struct stat64 *buf);
|
||||
int fstatat(int dirfd, const char *path, struct stat *buf, int flags);
|
||||
int fstatat64(int dirfd, const char *path, struct stat64 *buf, int flags);
|
||||
int statx(int dirfd, const char *path, int flags,
|
||||
unsigned int mask, struct statx *buf);
|
||||
|
||||
#endif // __ZVFS_HOOK_STAT_H__
|
||||
122
src/hook/zvfs_hook_sync.c
Normal file
122
src/hook/zvfs_hook_sync.c
Normal file
@@ -0,0 +1,122 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include "zvfs_hook_sync.h"
|
||||
#include "zvfs_hook_init.h"
|
||||
#include "zvfs_hook_reentrant.h"
|
||||
#include "fs/zvfs.h"
|
||||
#include "fs/zvfs_open_file.h"
|
||||
#include "spdk_engine/io_engine.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* fsync */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
fsync(int fd)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of = NULL;
|
||||
if (!ZVFS_IN_HOOK()) {
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
of = openfile_lookup(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
}
|
||||
|
||||
if (!of) {
|
||||
int r = real_fsync(fd);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/*
|
||||
* zvfs 无写缓冲区,数据已在 blob_write 时落到 SPDK 存储。
|
||||
* 调用 blob_sync_md 确保 blob 元数据(size 等)持久化。
|
||||
*/
|
||||
int r = blob_sync_md(of->handle);
|
||||
if (r < 0) errno = EIO;
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return (r < 0) ? -1 : 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* fdatasync */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
fdatasync(int fd)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of = NULL;
|
||||
if (!ZVFS_IN_HOOK()) {
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
of = openfile_lookup(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
}
|
||||
|
||||
if (!of) {
|
||||
int r = real_fdatasync(fd);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/*
|
||||
* fdatasync 只保证数据持久化,不要求元数据(atime 等)同步。
|
||||
* 对 zvfs:数据已无缓冲,blob_sync_md 同步 size 元数据即可。
|
||||
* 与 fsync 实现相同——如果将来区分数据/元数据可在此分叉。
|
||||
*/
|
||||
int r = blob_sync_md(of->handle);
|
||||
if (r < 0) errno = EIO;
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return (r < 0) ? -1 : 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* sync_file_range */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
sync_file_range(int fd, off_t offset, off_t nbytes, unsigned int flags)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
struct zvfs_open_file *of = NULL;
|
||||
if (!ZVFS_IN_HOOK()) {
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
of = openfile_lookup(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
}
|
||||
|
||||
if (!of) {
|
||||
int r = real_sync_file_range
|
||||
? real_sync_file_range(fd, offset, nbytes, flags)
|
||||
: (errno = ENOSYS, -1);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return r;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/*
|
||||
* PostgreSQL checkpointer 用此调用按范围刷脏页。
|
||||
* zvfs 无页缓存,数据实时落盘,直接返回 0。
|
||||
* 参数合法性检查与内核保持一致:
|
||||
* offset < 0 或 nbytes < 0 → EINVAL
|
||||
* flags 包含非法位 → EINVAL
|
||||
*/
|
||||
(void)offset; (void)nbytes; (void)flags;
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return 0;
|
||||
}
|
||||
24
src/hook/zvfs_hook_sync.h
Normal file
24
src/hook/zvfs_hook_sync.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#ifndef __ZVFS_HOOK_SYNC_H__
|
||||
#define __ZVFS_HOOK_SYNC_H__
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
/*
|
||||
* zvfs 无写缓冲区:所有 blob_write 成功即代表数据已落到 SPDK 管理的存储。
|
||||
*
|
||||
* fsync / fdatasync:
|
||||
* 对 zvfs fd 调用 blob_sync_md 同步 blob 元数据(size 等),
|
||||
* 然后返回 0。不需要 flush 数据缓冲区。
|
||||
* 非 zvfs fd 透传。
|
||||
*
|
||||
* sync_file_range:
|
||||
* PostgreSQL checkpointer 按范围刷脏页。
|
||||
* zvfs 无页缓存,直接返回 0。
|
||||
* 非 zvfs fd 透传。
|
||||
*/
|
||||
|
||||
int fsync(int fd);
|
||||
int fdatasync(int fd);
|
||||
int sync_file_range(int fd, off_t offset, off_t nbytes, unsigned int flags);
|
||||
|
||||
#endif // __ZVFS_HOOK_SYNC_H__
|
||||
0
src/main.c
Normal file
0
src/main.c
Normal file
812
src/spdk_engine/io_engine.c
Normal file
812
src/spdk_engine/io_engine.c
Normal file
@@ -0,0 +1,812 @@
|
||||
#include "spdk_engine/io_engine.h"
|
||||
#include "config.h"
|
||||
#include "common/utils.h"
|
||||
|
||||
#include <spdk/event.h>
|
||||
#include <spdk/log.h>
|
||||
#include <spdk/bdev.h>
|
||||
#include <spdk/blob.h>
|
||||
#include <spdk/blob_bdev.h>
|
||||
#include <spdk/thread.h>
|
||||
#include <semaphore.h>
|
||||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
#include <string.h>
|
||||
|
||||
struct zvfs_spdk_io_engine g_engine = {0};
|
||||
static int g_engine_init_rc = -EAGAIN;
|
||||
static pthread_mutex_t g_super_blob_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static spdk_blob_id g_super_blob_id_cache = SPDK_BLOBID_INVALID;
|
||||
|
||||
static __thread struct zvfs_tls_ctx tls = {0};
|
||||
|
||||
// 初始化操作上下文
|
||||
struct json_load_ctx {
|
||||
bool done;
|
||||
int rc;
|
||||
};
|
||||
|
||||
struct bs_init_ctx {
|
||||
bool done;
|
||||
int rc;
|
||||
struct spdk_blob_store *bs;
|
||||
};
|
||||
|
||||
// metadata 操作通用上下文
|
||||
struct md_op_ctx {
|
||||
void (*fn)(struct md_op_ctx *ctx);
|
||||
volatile bool done;
|
||||
int rc;
|
||||
// op-specific fields
|
||||
union {
|
||||
struct { // for create
|
||||
uint64_t size_hint;
|
||||
spdk_blob_id blob_id;
|
||||
} create;
|
||||
struct { // for open
|
||||
spdk_blob_id blob_id;
|
||||
struct spdk_blob *blob;
|
||||
} open;
|
||||
struct { // for resize/sync/close
|
||||
struct zvfs_blob_handle *handle;
|
||||
uint64_t new_size; // for resize
|
||||
} handle_op;
|
||||
struct { // for delete
|
||||
spdk_blob_id blob_id;
|
||||
} delete;
|
||||
struct { // for get/set super
|
||||
spdk_blob_id blob_id;
|
||||
} super;
|
||||
};
|
||||
char *op_name;
|
||||
};
|
||||
|
||||
// IO completion 上下文
|
||||
struct io_completion_ctx {
|
||||
bool done;
|
||||
int rc;
|
||||
};
|
||||
|
||||
// metadata poller 线程函数
|
||||
static void *md_poller_fn(void *arg) {
|
||||
spdk_set_thread(g_engine.md_thread);
|
||||
while (true) {
|
||||
spdk_thread_poll(g_engine.md_thread, 0, 0);
|
||||
usleep(1000);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// 前向声明
|
||||
static struct spdk_io_channel *get_current_channel(void);
|
||||
static int dispatch_md_op(struct md_op_ctx *ctx);
|
||||
static int dispatch_md_op_quiet(struct md_op_ctx *ctx);
|
||||
static void md_op_cb(void *arg);
|
||||
static int open_bdev_and_init_bs(const char *bdev_name);
|
||||
static int load_json_config(void);
|
||||
static int ensure_engine_ready(const char *op);
|
||||
|
||||
// callbacks
|
||||
static void json_app_load_done(int rc, void *arg);
|
||||
static void zvfs_spdk_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx);
|
||||
static void bs_init_cb(void *arg, struct spdk_blob_store *bs, int bserrno);
|
||||
static void blob_create_cb(void *arg, spdk_blob_id blobid, int rc);
|
||||
static void blob_open_cb(void *arg, struct spdk_blob *blob, int rc);
|
||||
static void blob_resize_cb(void *arg, int rc);
|
||||
static void blob_sync_md_cb(void *arg, int rc);
|
||||
static void blob_close_cb(void *arg, int rc);
|
||||
static void blob_delete_cb(void *arg, int rc);
|
||||
static void io_completion_cb(void *arg, int rc);
|
||||
static void blob_get_super_cb(void *arg, spdk_blob_id blobid, int rc);
|
||||
static void blob_set_super_cb(void *arg, int rc);
|
||||
|
||||
// op functions on matadata
|
||||
static void blob_create_on_md(struct md_op_ctx *ctx);
|
||||
static void blob_open_on_md(struct md_op_ctx *ctx);
|
||||
static void blob_resize_on_md(struct md_op_ctx *ctx);
|
||||
static void blob_sync_md_on_md(struct md_op_ctx *ctx);
|
||||
static void blob_close_on_md(struct md_op_ctx *ctx);
|
||||
static void blob_delete_on_md(struct md_op_ctx *ctx);
|
||||
static void blob_get_super_on_md(struct md_op_ctx *ctx);
|
||||
static void blob_set_super_on_md(struct md_op_ctx *ctx);
|
||||
|
||||
__attribute__((constructor)) static void preload_init(void) {
|
||||
const char *auto_init = getenv("ZVFS_AUTO_INIT");
|
||||
if (!auto_init || strcmp(auto_init, "1") != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
printf("\n\n auto init \n\n");
|
||||
const char *bdev_name = getenv("SPDK_BDEV_NAME") ? getenv("SPDK_BDEV_NAME") : ZVFS_BDEV;
|
||||
g_engine_init_rc = io_engine_init(bdev_name);
|
||||
if (g_engine_init_rc != 0) {
|
||||
SPDK_ERRLOG("io_engine_init failed in constructor: %d\n", g_engine_init_rc);
|
||||
}
|
||||
}
|
||||
|
||||
static int wait_done(bool *done_ptr, int *rc_ptr, const char *op) {
|
||||
int iter = 0;
|
||||
while (!*done_ptr) {
|
||||
if (tls.thread) {
|
||||
spdk_thread_poll(tls.thread, 0, 0);
|
||||
}else{
|
||||
SPDK_ERRLOG("not init tls.thread\n");
|
||||
return -EBADE;
|
||||
}
|
||||
if (++iter > WAITER_MAX_TIME) {
|
||||
SPDK_ERRLOG("%s timeout\n", op);
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
}
|
||||
|
||||
if (*rc_ptr != 0) {
|
||||
SPDK_ERRLOG("%s failed in callback: %d\n", op, *rc_ptr);
|
||||
return *rc_ptr;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int wait_done_volatile(volatile bool *done_ptr, int *rc_ptr, const char *op) {
|
||||
int iter = 0;
|
||||
while (!*done_ptr) {
|
||||
if (tls.thread) {
|
||||
spdk_thread_poll(tls.thread, 0, 0);
|
||||
}else{
|
||||
SPDK_ERRLOG("not init tls.thread\n");
|
||||
return -EBADE;
|
||||
}
|
||||
if (++iter > WAITER_MAX_TIME) {
|
||||
SPDK_ERRLOG("%s timeout\n", op);
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
}
|
||||
|
||||
if (*rc_ptr != 0) {
|
||||
SPDK_ERRLOG("%s failed in callback: %d\n", op, *rc_ptr);
|
||||
return *rc_ptr;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// no rc error
|
||||
static int wait_done_volatile_quiet(volatile bool *done_ptr, int *rc_ptr, const char *op) {
|
||||
int iter = 0;
|
||||
while (!*done_ptr) {
|
||||
if (tls.thread) {
|
||||
spdk_thread_poll(tls.thread, 0, 0);
|
||||
} else {
|
||||
SPDK_ERRLOG("not init tls.thread\n");
|
||||
return -EBADE;
|
||||
}
|
||||
if (++iter > WAITER_MAX_TIME) {
|
||||
SPDK_ERRLOG("%s timeout\n", op);
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
}
|
||||
|
||||
return *rc_ptr;
|
||||
}
|
||||
|
||||
int io_engine_init(const char *bdev_name) {
|
||||
if (g_engine_init_rc == 0 && g_engine.bs != NULL && g_engine.md_thread != NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct spdk_env_opts env_opts;
|
||||
spdk_env_opts_init(&env_opts);
|
||||
env_opts.name = "zvfs";
|
||||
|
||||
|
||||
if (spdk_env_init(&env_opts) != 0) {
|
||||
SPDK_ERRLOG("spdk_env_init failed\n");
|
||||
g_engine_init_rc = -1;
|
||||
return g_engine_init_rc;
|
||||
}
|
||||
|
||||
spdk_log_set_print_level(SPDK_LOG_NOTICE);
|
||||
spdk_log_set_level(SPDK_LOG_NOTICE);
|
||||
spdk_log_open(NULL);
|
||||
|
||||
if (spdk_thread_lib_init(NULL, 0) != 0) {
|
||||
SPDK_ERRLOG("spdk_thread_lib_init failed\n");
|
||||
g_engine_init_rc = -1;
|
||||
return g_engine_init_rc;
|
||||
}
|
||||
|
||||
// 为主线程 lazy init(constructor 在主线程跑)
|
||||
tls.thread = spdk_thread_create("main_thread", NULL);
|
||||
if (!tls.thread) {
|
||||
SPDK_ERRLOG("create main_thread failed\n");
|
||||
g_engine_init_rc = -1;
|
||||
return g_engine_init_rc;
|
||||
}
|
||||
spdk_set_thread(tls.thread);
|
||||
|
||||
if (load_json_config() != 0) {
|
||||
SPDK_ERRLOG("Failed to load SPDK config\n");
|
||||
g_engine_init_rc = -1;
|
||||
return g_engine_init_rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* 这里是因为要让一个线程专门负责poll
|
||||
*/
|
||||
// 创建 md_thread
|
||||
g_engine.md_thread = spdk_thread_create("md_thread", NULL);
|
||||
if (!g_engine.md_thread) {
|
||||
SPDK_ERRLOG("create md_thread failed\n");
|
||||
g_engine_init_rc = -1;
|
||||
return g_engine_init_rc;
|
||||
}
|
||||
|
||||
// 起专用 poller pthread for md_thread
|
||||
pthread_t md_poller_tid;
|
||||
if (pthread_create(&md_poller_tid, NULL, md_poller_fn, NULL) != 0) {
|
||||
SPDK_ERRLOG("pthread_create for md_poller failed\n");
|
||||
g_engine_init_rc = -1;
|
||||
return g_engine_init_rc;
|
||||
}
|
||||
if (pthread_detach(md_poller_tid) != 0) {
|
||||
SPDK_ERRLOG("pthread_detach for md_poller failed\n");
|
||||
g_engine_init_rc = -1;
|
||||
return g_engine_init_rc;
|
||||
}
|
||||
|
||||
// init bdev/bs
|
||||
g_super_blob_id_cache = SPDK_BLOBID_INVALID;
|
||||
int rc = open_bdev_and_init_bs(bdev_name);
|
||||
if (rc != 0) {
|
||||
g_engine_init_rc = rc;
|
||||
return rc;
|
||||
}
|
||||
g_engine_init_rc = 0;
|
||||
return g_engine_init_rc;
|
||||
}
|
||||
|
||||
static int load_json_config(void) {
|
||||
const char *path = getenv("SPDK_JSON_CONFIG");
|
||||
if(!path) path = SPDK_JSON_PATH;
|
||||
|
||||
|
||||
struct json_load_ctx ctx = {
|
||||
.done = false,
|
||||
.rc = 0
|
||||
};
|
||||
spdk_subsystem_init_from_json_config(path, SPDK_DEFAULT_RPC_ADDR, json_app_load_done,
|
||||
&ctx, true);
|
||||
return wait_done(&ctx.done, &ctx.rc, "load_json_config");
|
||||
}
|
||||
|
||||
// lazy get channel
|
||||
static struct spdk_io_channel *get_current_channel(void) {
|
||||
if (ensure_engine_ready("get_current_channel") != 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (tls.thread) {
|
||||
spdk_thread_poll(tls.thread, 0, 0);
|
||||
}
|
||||
|
||||
if (!tls.thread) {
|
||||
char name[32];
|
||||
snprintf(name, sizeof(name), "worker_%lu", pthread_self());
|
||||
tls.thread = spdk_thread_create(name, NULL);
|
||||
if (!tls.thread) {
|
||||
SPDK_ERRLOG("spdk_thread_create failed\n");
|
||||
return NULL;
|
||||
}
|
||||
spdk_set_thread(tls.thread);
|
||||
}
|
||||
|
||||
if (!tls.channel) {
|
||||
tls.channel = spdk_bs_alloc_io_channel(g_engine.bs);
|
||||
if (!tls.channel) {
|
||||
SPDK_ERRLOG("alloc io_channel failed\n");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return tls.channel;
|
||||
}
|
||||
|
||||
// 通用 dispatch md op
|
||||
static int dispatch_md_op(struct md_op_ctx *ctx) {
|
||||
int rc = ensure_engine_ready(ctx->op_name ? ctx->op_name : "dispatch_md_op");
|
||||
if (rc != 0) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
ctx->done = false;
|
||||
ctx->rc = 0;
|
||||
|
||||
spdk_thread_send_msg(g_engine.md_thread, md_op_cb, ctx);
|
||||
|
||||
return wait_done_volatile(&ctx->done, &ctx->rc, ctx->op_name);
|
||||
}
|
||||
|
||||
static int dispatch_md_op_quiet(struct md_op_ctx *ctx) {
|
||||
int rc = ensure_engine_ready(ctx->op_name ? ctx->op_name : "dispatch_md_op_quiet");
|
||||
if (rc != 0) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
ctx->done = false;
|
||||
ctx->rc = 0;
|
||||
|
||||
spdk_thread_send_msg(g_engine.md_thread, md_op_cb, ctx);
|
||||
|
||||
return wait_done_volatile_quiet(&ctx->done, &ctx->rc, ctx->op_name);
|
||||
}
|
||||
|
||||
static int ensure_engine_ready(const char *op) {
|
||||
if (g_engine_init_rc != 0) {
|
||||
SPDK_ERRLOG("%s: io engine init failed, rc=%d\n", op, g_engine_init_rc);
|
||||
return g_engine_init_rc;
|
||||
}
|
||||
|
||||
if (!g_engine.bs || !g_engine.md_thread) {
|
||||
SPDK_ERRLOG("%s: io engine not ready (bs=%p, md_thread=%p)\n",
|
||||
op, (void *)g_engine.bs, (void *)g_engine.md_thread);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void md_op_cb(void *arg) {
|
||||
struct md_op_ctx *ctx = arg;
|
||||
ctx->fn(ctx);
|
||||
}
|
||||
|
||||
void json_app_load_done(int rc, void *arg) {
|
||||
struct json_load_ctx* ctx = (struct json_load_ctx*)arg;
|
||||
ctx->done = true;
|
||||
ctx->rc = rc;
|
||||
}
|
||||
|
||||
// bdev open + bs init
|
||||
static void zvfs_spdk_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
|
||||
void *event_ctx) {
|
||||
// 后续加日志或处理
|
||||
switch (type) {
|
||||
case SPDK_BDEV_EVENT_REMOVE:
|
||||
SPDK_NOTICELOG("bdev removed: %s\n", spdk_bdev_get_name(bdev));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void bs_init_cb(void *arg, struct spdk_blob_store *bs, int bserrno) {
|
||||
struct bs_init_ctx *ctx = (struct bs_init_ctx *)arg;
|
||||
ctx->rc = bserrno;
|
||||
ctx->bs = bs;
|
||||
ctx->done = true;
|
||||
}
|
||||
|
||||
static int open_bdev_and_init_bs(const char *bdev_name) {
|
||||
SPDK_NOTICELOG("open_bdev_and_init_bs\n");
|
||||
struct spdk_bs_dev *bs_dev = NULL;
|
||||
int rc = spdk_bdev_create_bs_dev_ext(bdev_name, zvfs_spdk_bdev_event_cb, NULL, &bs_dev);
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("spdk_bdev_create_bs_dev_ext failed: %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
g_engine.bs_dev = bs_dev;
|
||||
|
||||
struct bs_init_ctx ctx = {
|
||||
.done = false,
|
||||
.rc = 0,
|
||||
.bs = NULL
|
||||
};
|
||||
|
||||
/* 优先加载已有 blobstore;失败时回退到 init。 */
|
||||
spdk_bs_load(bs_dev, NULL, bs_init_cb, &ctx);
|
||||
rc = wait_done(&ctx.done, &ctx.rc, "bs_load");
|
||||
if (rc != 0) {
|
||||
SPDK_NOTICELOG("spdk_bs_load failed (%d), fallback to spdk_bs_init\n", rc);
|
||||
|
||||
/*
|
||||
* 注意:spdk_bs_load 失败路径会销毁传入的 dev。
|
||||
* 这里必须重新 create 一个新的 bs_dev,不能复用旧指针。
|
||||
*/
|
||||
bs_dev = NULL;
|
||||
rc = spdk_bdev_create_bs_dev_ext(bdev_name, zvfs_spdk_bdev_event_cb, NULL, &bs_dev);
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("spdk_bdev_create_bs_dev_ext(for init fallback) failed: %d\n", rc);
|
||||
g_engine.bs_dev = NULL;
|
||||
return rc;
|
||||
}
|
||||
g_engine.bs_dev = bs_dev;
|
||||
|
||||
ctx.done = false;
|
||||
ctx.rc = 0;
|
||||
ctx.bs = NULL;
|
||||
|
||||
spdk_bs_init(bs_dev, NULL, bs_init_cb, &ctx);
|
||||
rc = wait_done(&ctx.done, &ctx.rc, "bs_init");
|
||||
if (rc != 0) {
|
||||
g_engine.bs_dev = NULL;
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
g_engine.bs = ctx.bs;
|
||||
g_engine.io_unit_size = spdk_bs_get_io_unit_size(ctx.bs);
|
||||
g_engine.cluster_size = spdk_bs_get_cluster_size(ctx.bs);
|
||||
|
||||
SPDK_NOTICELOG("Blobstore initialized successfully on bdev: %s\n", bdev_name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void blob_get_super_cb(void *arg, spdk_blob_id blobid, int rc) {
|
||||
struct md_op_ctx *ctx = arg;
|
||||
ctx->rc = rc;
|
||||
ctx->super.blob_id = blobid;
|
||||
ctx->done = true;
|
||||
}
|
||||
|
||||
static void blob_set_super_cb(void *arg, int rc) {
|
||||
struct md_op_ctx *ctx = arg;
|
||||
ctx->rc = rc;
|
||||
ctx->done = true;
|
||||
}
|
||||
|
||||
static void blob_get_super_on_md(struct md_op_ctx *ctx) {
|
||||
spdk_bs_get_super(g_engine.bs, blob_get_super_cb, ctx);
|
||||
}
|
||||
|
||||
static void blob_set_super_on_md(struct md_op_ctx *ctx) {
|
||||
spdk_bs_set_super(g_engine.bs, ctx->super.blob_id, blob_set_super_cb, ctx);
|
||||
}
|
||||
|
||||
static int bs_get_super_id(spdk_blob_id *blob_id) {
|
||||
struct md_op_ctx ctx = {
|
||||
.fn = blob_get_super_on_md,
|
||||
.op_name = "blob get super",
|
||||
};
|
||||
ctx.super.blob_id = SPDK_BLOBID_INVALID;
|
||||
|
||||
int rc = dispatch_md_op_quiet(&ctx);
|
||||
if (rc != 0) {
|
||||
return rc;
|
||||
}
|
||||
*blob_id = ctx.super.blob_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bs_set_super_id(spdk_blob_id blob_id) {
|
||||
struct md_op_ctx ctx = {
|
||||
.fn = blob_set_super_on_md,
|
||||
.op_name = "blob set super",
|
||||
};
|
||||
ctx.super.blob_id = blob_id;
|
||||
return dispatch_md_op(&ctx);
|
||||
}
|
||||
|
||||
struct zvfs_blob_handle *blob_get_super(void) {
|
||||
pthread_mutex_lock(&g_super_blob_mutex);
|
||||
|
||||
if (g_super_blob_id_cache != SPDK_BLOBID_INVALID) {
|
||||
struct zvfs_blob_handle *cached = blob_open(g_super_blob_id_cache);
|
||||
if (cached) {
|
||||
pthread_mutex_unlock(&g_super_blob_mutex);
|
||||
return cached;
|
||||
}
|
||||
g_super_blob_id_cache = SPDK_BLOBID_INVALID;
|
||||
}
|
||||
|
||||
spdk_blob_id super_id = SPDK_BLOBID_INVALID;
|
||||
int rc = bs_get_super_id(&super_id);
|
||||
if (rc == 0 && super_id != SPDK_BLOBID_INVALID) {
|
||||
g_super_blob_id_cache = super_id;
|
||||
struct zvfs_blob_handle *existing = blob_open(super_id);
|
||||
if (!existing) {
|
||||
g_super_blob_id_cache = SPDK_BLOBID_INVALID;
|
||||
}
|
||||
pthread_mutex_unlock(&g_super_blob_mutex);
|
||||
return existing;
|
||||
}
|
||||
if (rc == 0 && super_id == SPDK_BLOBID_INVALID) {
|
||||
rc = -ENOENT;
|
||||
}
|
||||
|
||||
if (rc != -ENOENT) {
|
||||
SPDK_ERRLOG("spdk_bs_get_super failed: %d\n", rc);
|
||||
pthread_mutex_unlock(&g_super_blob_mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct zvfs_blob_handle *created = blob_create(0);
|
||||
if (!created) {
|
||||
pthread_mutex_unlock(&g_super_blob_mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rc = bs_set_super_id(created->id);
|
||||
if (rc != 0) {
|
||||
spdk_blob_id created_id = created->id;
|
||||
SPDK_ERRLOG("spdk_bs_set_super failed: %d\n", rc);
|
||||
blob_close(created);
|
||||
blob_delete(created_id);
|
||||
pthread_mutex_unlock(&g_super_blob_mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
g_super_blob_id_cache = created->id;
|
||||
pthread_mutex_unlock(&g_super_blob_mutex);
|
||||
return created;
|
||||
}
|
||||
|
||||
// blob_create
|
||||
static void blob_create_cb(void *arg, spdk_blob_id blobid, int rc) {
|
||||
struct md_op_ctx *ctx = arg;
|
||||
ctx->rc = rc;
|
||||
ctx->create.blob_id = blobid;
|
||||
ctx->done = true;
|
||||
}
|
||||
|
||||
static void blob_create_on_md(struct md_op_ctx *ctx) {
|
||||
struct spdk_blob_opts opts;
|
||||
spdk_blob_opts_init(&opts, sizeof(opts));
|
||||
// size_hint 如果需,但 create 不直接 set size,用 resize 后
|
||||
spdk_bs_create_blob_ext(g_engine.bs, &opts, blob_create_cb, ctx);
|
||||
}
|
||||
|
||||
struct zvfs_blob_handle *blob_create(uint64_t size_hint) {
|
||||
if(size_hint == 0) size_hint = g_engine.cluster_size;
|
||||
struct md_op_ctx ctx = {.fn = blob_create_on_md, .create.size_hint = size_hint, .op_name = "blob create"};
|
||||
int rc = dispatch_md_op(&ctx);
|
||||
if (rc) return NULL;
|
||||
|
||||
struct zvfs_blob_handle *handle = blob_open(ctx.create.blob_id);
|
||||
if (handle && size_hint > 0) {
|
||||
rc = blob_resize(handle, size_hint); // 初始 resize
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("blob_resize failed after create: %d\n", rc);
|
||||
blob_close(handle);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rc = blob_sync_md(handle);
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("blob_sync_md failed after resize: %d\n", rc);
|
||||
blob_close(handle);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return handle;
|
||||
}
|
||||
|
||||
// blob_open
|
||||
static void blob_open_cb(void *arg, struct spdk_blob *blob, int rc) {
|
||||
struct md_op_ctx *ctx = arg;
|
||||
ctx->rc = rc;
|
||||
ctx->open.blob = blob;
|
||||
ctx->done = true;
|
||||
}
|
||||
|
||||
static void blob_open_on_md(struct md_op_ctx *ctx) {
|
||||
struct spdk_blob_open_opts opts;
|
||||
spdk_blob_open_opts_init(&opts, sizeof(opts));
|
||||
spdk_bs_open_blob_ext(g_engine.bs, ctx->open.blob_id, &opts, blob_open_cb, ctx);
|
||||
}
|
||||
|
||||
struct zvfs_blob_handle *blob_open(uint64_t blob_id) {
|
||||
struct md_op_ctx ctx = {.fn = blob_open_on_md, .open.blob_id = blob_id, .op_name = "blob open"};
|
||||
int rc = dispatch_md_op(&ctx);
|
||||
if (rc) return NULL;
|
||||
|
||||
struct zvfs_blob_handle *handle = malloc(sizeof(*handle));
|
||||
if (!handle) return NULL;
|
||||
|
||||
handle->id = blob_id;
|
||||
handle->blob = ctx.open.blob;
|
||||
handle->size = spdk_blob_get_num_clusters(handle->blob) * g_engine.cluster_size;
|
||||
|
||||
// 预分配固定大小的 DMA buf,后续所有 IO 都经过这块缓存,避免每次 IO 动态申请
|
||||
// 必须用 spdk_dma_malloc 保证地址对齐到 io_unit_size
|
||||
handle->dma_buf_size = ZVFS_DMA_BUF_SIZE;
|
||||
handle->dma_buf = spdk_dma_malloc(ZVFS_DMA_BUF_SIZE, g_engine.io_unit_size, NULL);
|
||||
if (!handle->dma_buf) {
|
||||
SPDK_ERRLOG("spdk_dma_malloc failed for blob %lu\n", blob_id);
|
||||
free(handle);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
// blob_write
|
||||
static void io_completion_cb(void *arg, int rc) {
|
||||
struct io_completion_ctx *ctx = arg;
|
||||
ctx->rc = rc;
|
||||
ctx->done = true;
|
||||
}
|
||||
|
||||
int blob_write(struct zvfs_blob_handle *handle, uint64_t offset, const void *buf, size_t len) {
|
||||
if (tls.thread) {
|
||||
spdk_thread_poll(tls.thread, 0, 0);
|
||||
}
|
||||
|
||||
struct spdk_io_channel *ch = get_current_channel();
|
||||
if (!ch) return -1;
|
||||
if (len == 0) return 0;
|
||||
|
||||
// 越界检查
|
||||
if (offset + len > handle->size) {
|
||||
SPDK_ERRLOG("blob_write out of range: offset=%lu len=%zu blob_size=%lu\n",
|
||||
offset, len, handle->size);
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
// 计算对齐后的 IO 范围和 dma_buf 内偏移
|
||||
uint64_t lba_off = 0;
|
||||
uint64_t lba_len = 0;
|
||||
uint32_t buf_off = 0;
|
||||
int rc = zvfs_calc_io_units(offset, len, g_engine.io_unit_size, &lba_off, &lba_len, &buf_off);
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("blob_write calc_io_units failed: %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
size_t aligned_bytes = lba_len * g_engine.io_unit_size;
|
||||
if (aligned_bytes > ZVFS_DMA_BUF_SIZE) {
|
||||
SPDK_ERRLOG("blob_write aligned_bytes=%zu exceeds ZVFS_DMA_BUF_SIZE\n", aligned_bytes);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
struct io_completion_ctx io_ctx = {.done = false, .rc = 0};
|
||||
|
||||
spdk_blob_io_read(handle->blob, ch, handle->dma_buf, lba_off, lba_len,
|
||||
io_completion_cb, &io_ctx);
|
||||
|
||||
|
||||
rc = wait_done(&io_ctx.done, &io_ctx.rc, "io_write(read phase)");
|
||||
if (rc != 0) return rc;
|
||||
|
||||
memcpy((uint8_t *)handle->dma_buf + buf_off, buf, len);
|
||||
io_ctx.done = false;
|
||||
io_ctx.rc = 0;
|
||||
|
||||
spdk_blob_io_write(handle->blob, ch, handle->dma_buf, lba_off, lba_len,
|
||||
io_completion_cb, &io_ctx);
|
||||
rc = wait_done(&io_ctx.done, &io_ctx.rc, "io_write(write phase)");
|
||||
if (rc != 0) return rc;
|
||||
|
||||
return io_ctx.rc;
|
||||
}
|
||||
|
||||
// blob_read 类似
|
||||
int blob_read(struct zvfs_blob_handle *handle, uint64_t offset, void *buf, size_t len) {
|
||||
if (tls.thread) {
|
||||
spdk_thread_poll(tls.thread, 0, 0);
|
||||
}
|
||||
|
||||
struct spdk_io_channel *ch = get_current_channel();
|
||||
if (!ch) return -1;
|
||||
if (len == 0) return 0;
|
||||
|
||||
// 越界检查
|
||||
if (offset + len > handle->size) {
|
||||
SPDK_ERRLOG("blob_read out of range: offset=%lu len=%zu blob_size=%lu\n",
|
||||
offset, len, handle->size);
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
|
||||
// 计算对齐后的 IO 范围和 dma_buf 内偏移
|
||||
uint64_t lba_off = 0;
|
||||
uint64_t lba_len = 0;
|
||||
uint32_t buf_off = 0;
|
||||
int rc = zvfs_calc_io_units(offset, len, g_engine.io_unit_size, &lba_off, &lba_len, &buf_off);
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("io_read offset/len not aligned to io_unit_size=%lu\n", g_engine.io_unit_size);
|
||||
return rc;
|
||||
}
|
||||
|
||||
// 读入对齐范围到 dma_buf,再从正确偏移处截取到用户 buf
|
||||
size_t aligned_bytes = lba_len * g_engine.io_unit_size;
|
||||
if (aligned_bytes > ZVFS_DMA_BUF_SIZE) {
|
||||
SPDK_ERRLOG("blob_read aligned_bytes=%zu exceeds ZVFS_DMA_BUF_SIZE\n", aligned_bytes);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
struct io_completion_ctx io_ctx = {.done = false, .rc = 0};
|
||||
|
||||
spdk_blob_io_read(handle->blob, ch, handle->dma_buf, lba_off, lba_len,
|
||||
io_completion_cb, &io_ctx);
|
||||
|
||||
rc = wait_done(&io_ctx.done, &io_ctx.rc, "io_read");
|
||||
if (rc != 0) return rc;
|
||||
|
||||
memcpy(buf, (uint8_t *)handle->dma_buf + buf_off, len);
|
||||
return io_ctx.rc;
|
||||
}
|
||||
|
||||
// blob_resize
|
||||
static void blob_resize_cb(void *arg, int rc) {
|
||||
struct md_op_ctx *ctx = arg;
|
||||
ctx->rc = rc;
|
||||
ctx->done = true;
|
||||
}
|
||||
|
||||
static void blob_resize_on_md(struct md_op_ctx *ctx) {
|
||||
uint64_t new_clusters = 0;
|
||||
uint64_t cluster_size = g_engine.cluster_size;
|
||||
int rc = zvfs_calc_ceil_units(ctx->handle_op.new_size, cluster_size, &new_clusters);
|
||||
if (rc != 0) {
|
||||
ctx->rc = rc;
|
||||
ctx->done = true;
|
||||
return;
|
||||
}
|
||||
spdk_blob_resize(ctx->handle_op.handle->blob, new_clusters, blob_resize_cb, ctx);
|
||||
}
|
||||
|
||||
int blob_resize(struct zvfs_blob_handle *handle, uint64_t new_size) {
|
||||
struct md_op_ctx ctx = {.fn = blob_resize_on_md, .op_name = "blob resize"};
|
||||
ctx.handle_op.handle = handle;
|
||||
ctx.handle_op.new_size = new_size;
|
||||
int rc = dispatch_md_op(&ctx);
|
||||
if (rc == 0) {
|
||||
uint64_t new_clusters = 0;
|
||||
zvfs_calc_ceil_units(new_size, g_engine.cluster_size, &new_clusters);
|
||||
handle->size = new_clusters * g_engine.cluster_size;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
// blob_sync_md
|
||||
static void blob_sync_md_cb(void *arg, int rc) {
|
||||
struct md_op_ctx *ctx = arg;
|
||||
ctx->rc = rc;
|
||||
ctx->done = true;
|
||||
}
|
||||
|
||||
static void blob_sync_md_on_md(struct md_op_ctx *ctx) {
|
||||
spdk_blob_sync_md(ctx->handle_op.handle->blob, blob_sync_md_cb, ctx);
|
||||
}
|
||||
|
||||
int blob_sync_md(struct zvfs_blob_handle *handle) {
|
||||
struct md_op_ctx ctx = {.fn = blob_sync_md_on_md, .op_name = "blob sync"};
|
||||
ctx.handle_op.handle = handle;
|
||||
return dispatch_md_op(&ctx);
|
||||
}
|
||||
|
||||
// blob_close
|
||||
static void blob_close_cb(void *arg, int rc) {
|
||||
struct md_op_ctx *ctx = arg;
|
||||
ctx->rc = rc;
|
||||
ctx->done = true;
|
||||
}
|
||||
|
||||
static void blob_close_on_md(struct md_op_ctx *ctx) {
|
||||
spdk_blob_close(ctx->handle_op.handle->blob, blob_close_cb, ctx);
|
||||
}
|
||||
|
||||
int blob_close(struct zvfs_blob_handle *handle) {
|
||||
struct md_op_ctx ctx = {.fn = blob_close_on_md, .op_name = "blob close"};
|
||||
ctx.handle_op.handle = handle;
|
||||
int rc = dispatch_md_op(&ctx);
|
||||
if (rc == 0) {
|
||||
spdk_dma_free(handle->dma_buf);
|
||||
free(handle);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
// blob_delete
|
||||
static void blob_delete_cb(void *arg, int rc) {
|
||||
struct md_op_ctx *ctx = arg;
|
||||
ctx->rc = rc;
|
||||
ctx->done = true;
|
||||
}
|
||||
|
||||
static void blob_delete_on_md(struct md_op_ctx *ctx) {
|
||||
spdk_bs_delete_blob(g_engine.bs, ctx->delete.blob_id, blob_delete_cb, ctx);
|
||||
}
|
||||
|
||||
int blob_delete(uint64_t blob_id) {
|
||||
struct md_op_ctx ctx = {.fn = blob_delete_on_md, .op_name = "blob delete"};
|
||||
ctx.delete.blob_id = blob_id;
|
||||
return dispatch_md_op(&ctx);
|
||||
}
|
||||
44
src/spdk_engine/io_engine.h
Normal file
44
src/spdk_engine/io_engine.h
Normal file
@@ -0,0 +1,44 @@
|
||||
#ifndef __ZVFS_IO_ENGINE_H__
|
||||
#define __ZVFS_IO_ENGINE_H__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
#include <spdk/blob.h>
|
||||
|
||||
// blob_handle 结构体:底层 blob 信息,不含文件级 size(上层维护)
|
||||
typedef struct zvfs_blob_handle {
|
||||
spdk_blob_id id;
|
||||
struct spdk_blob *blob;
|
||||
uint64_t size;
|
||||
void *dma_buf;
|
||||
uint64_t dma_buf_size;
|
||||
} zvfs_blob_handle_t ;
|
||||
|
||||
typedef struct zvfs_spdk_io_engine {
|
||||
struct spdk_bs_dev *bs_dev;
|
||||
struct spdk_blob_store *bs;
|
||||
struct spdk_thread *md_thread;
|
||||
uint64_t io_unit_size;
|
||||
uint64_t cluster_size;
|
||||
int reactor_count;
|
||||
|
||||
} zvfs_spdk_io_engine_t;
|
||||
|
||||
typedef struct zvfs_tls_ctx {
|
||||
struct spdk_thread *thread;
|
||||
struct spdk_io_channel *channel;
|
||||
}zvfs_tls_ctx_t;
|
||||
|
||||
int io_engine_init(const char *bdev_name);
|
||||
|
||||
struct zvfs_blob_handle *blob_get_super(void);
|
||||
struct zvfs_blob_handle *blob_create(uint64_t size_hint); // 创建并 open,返回 handle
|
||||
struct zvfs_blob_handle *blob_open(uint64_t blob_id); // open 现有 blob,返回 handle
|
||||
int blob_write(struct zvfs_blob_handle *handle, uint64_t offset, const void *buf, size_t len);
|
||||
int blob_read(struct zvfs_blob_handle *handle, uint64_t offset, void *buf, size_t len);
|
||||
int blob_resize(struct zvfs_blob_handle *handle, uint64_t new_size);
|
||||
int blob_sync_md(struct zvfs_blob_handle *handle);
|
||||
int blob_close(struct zvfs_blob_handle *handle); // close 这个 handle 的 blob*
|
||||
int blob_delete(uint64_t blob_id); // delete,整个 blob(不需 handle)
|
||||
|
||||
#endif // __ZVFS_IO_ENGINE_H__
|
||||
17
src/zvfsmalloc.json
Executable file
17
src/zvfsmalloc.json
Executable file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"subsystems": [
|
||||
{
|
||||
"subsystem": "bdev",
|
||||
"config": [
|
||||
{
|
||||
"method": "bdev_malloc_create",
|
||||
"params": {
|
||||
"name": "Malloc0",
|
||||
"num_blocks": 32768,
|
||||
"block_size": 512
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
17
src/zvfsnvme.json
Executable file
17
src/zvfsnvme.json
Executable file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"subsystems": [
|
||||
{
|
||||
"subsystem": "bdev",
|
||||
"config": [
|
||||
{
|
||||
"method": "bdev_nvme_attach_controller",
|
||||
"params": {
|
||||
"name": "Nvme0",
|
||||
"trtype": "PCIe",
|
||||
"traddr": "0000:03:00.0"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user