rebuild
This commit is contained in:
549
src/hook/zvfs_hook_fd.c
Normal file
549
src/hook/zvfs_hook_fd.c
Normal file
@@ -0,0 +1,549 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include "zvfs_hook_fd.h"
|
||||
#include "zvfs_hook_init.h"
|
||||
#include "zvfs_hook_reentrant.h"
|
||||
#include "fs/zvfs.h"
|
||||
#include "fs/zvfs_inode.h"
|
||||
#include "fs/zvfs_path_entry.h"
|
||||
#include "fs/zvfs_open_file.h"
|
||||
#include "spdk_engine/io_engine.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <pthread.h>
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* 内部:open 的核心逻辑(路径已解析为绝对路径) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/**
|
||||
* zvfs_open_impl - 对一个确认属于 zvfs 的绝对路径执行 open。
|
||||
*
|
||||
* real_fd:已经由 real_open* 打开的真实 fd(用于 xattr 读写 + ftruncate)。
|
||||
* flags :open 时传入的 flags。
|
||||
* mode :O_CREAT 时的权限。
|
||||
*
|
||||
* 成功返回 real_fd(即用户拿到的 fd),失败返回 -1(errno 已设置),
|
||||
* 失败时调用方负责 real_close(real_fd)。
|
||||
*/
|
||||
static int
|
||||
zvfs_open_impl(int real_fd, const char *abspath, int flags, mode_t mode)
|
||||
{
|
||||
struct zvfs_inode *inode = NULL;
|
||||
struct zvfs_blob_handle *handle = NULL;
|
||||
uint64_t blob_id = 0;
|
||||
|
||||
if (flags & O_CREAT) {
|
||||
/* ---- 创建路径 -------------------------------------------- */
|
||||
|
||||
/* 1. 创建 blob */
|
||||
handle = blob_create(0);
|
||||
if (!handle) { errno = EIO; goto fail; }
|
||||
blob_id = handle->id;
|
||||
|
||||
/* 2. 把 blob_id 写入真实文件的 xattr */
|
||||
if (zvfs_xattr_write_blob_id(real_fd, blob_id) < 0) goto fail;
|
||||
|
||||
/* 3. logical_size = 0,让 st_size 也为 0 */
|
||||
if (real_ftruncate(real_fd, 0) < 0) goto fail;
|
||||
|
||||
/* 4. 分配 inode */
|
||||
inode = inode_alloc(blob_id, mode ? mode : 0666, ZVFS_ITYPE_FILE);
|
||||
if (!inode) { errno = ENOMEM; goto fail; }
|
||||
|
||||
/* 5. 插入全局表 */
|
||||
pthread_mutex_lock(&g_fs.inode_mu);
|
||||
inode_insert(inode);
|
||||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||||
|
||||
/* 6. 插入 path_cache */
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
path_cache_insert(abspath, inode);
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
|
||||
} else {
|
||||
/* ---- 打开已有文件路径 ------------------------------------- */
|
||||
|
||||
/* 1. 先查 path_cache,命中说明另一个 fd 已经打开过 */
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
struct zvfs_path_entry *pe = path_cache_lookup(abspath);
|
||||
if (pe) inode = pe->inode;
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
|
||||
if (inode) {
|
||||
/* path_cache 命中:直接用缓存的 inode,重新 blob_open */
|
||||
blob_id = inode->blob_id;
|
||||
handle = blob_open(blob_id);
|
||||
if (!handle) { errno = EIO; goto fail; }
|
||||
/* 共享 inode,增加引用 */
|
||||
atomic_fetch_add(&inode->ref_count, 1);
|
||||
|
||||
} else {
|
||||
/* 未命中:从 xattr 读 blob_id,可能是进程首次 open */
|
||||
if (zvfs_xattr_read_blob_id(real_fd, &blob_id) < 0) {
|
||||
/* xattr 不存在:不是 zvfs 管理的文件,降级透传 */
|
||||
return real_fd; /* 直接返回,不做任何包装 */
|
||||
}
|
||||
|
||||
/* 再查 inode_table(另一个 fd 可能已经 open 但路径未缓存)*/
|
||||
pthread_mutex_lock(&g_fs.inode_mu);
|
||||
inode = inode_lookup(blob_id);
|
||||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||||
|
||||
if (inode) {
|
||||
atomic_fetch_add(&inode->ref_count, 1);
|
||||
} else {
|
||||
/* 全新 inode:需从真实文件 stat 获取 mode/size */
|
||||
struct stat st;
|
||||
if (real_fstat(real_fd, &st) < 0) goto fail;
|
||||
|
||||
inode = inode_alloc(blob_id, st.st_mode, ZVFS_ITYPE_FILE);
|
||||
if (!inode) { errno = ENOMEM; goto fail; }
|
||||
inode->logical_size = (uint64_t)st.st_size;
|
||||
|
||||
pthread_mutex_lock(&g_fs.inode_mu);
|
||||
inode_insert(inode);
|
||||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||||
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
path_cache_insert(abspath, inode);
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
}
|
||||
|
||||
handle = blob_open(blob_id);
|
||||
if (!handle) { errno = EIO; goto fail; }
|
||||
}
|
||||
}
|
||||
|
||||
/* ---- 分配 openfile,插入 fd_table ---------------------------- */
|
||||
struct zvfs_open_file *of = openfile_alloc(real_fd, inode, flags, handle);
|
||||
if (!of) { errno = ENOMEM; goto fail_handle; }
|
||||
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
openfile_insert(of);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
|
||||
return real_fd;
|
||||
|
||||
fail_handle:
|
||||
blob_close(handle);
|
||||
fail:
|
||||
/* inode 若刚分配(ref_count==1)需要回滚 */
|
||||
if (inode && atomic_load(&inode->ref_count) == 1) {
|
||||
pthread_mutex_lock(&g_fs.inode_mu);
|
||||
inode_remove(inode->blob_id);
|
||||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
path_cache_remove(abspath);
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
inode_free(inode);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* open */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
open(const char *path, int flags, ...)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap;
|
||||
va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
int ret;
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_path(path)) {
|
||||
ret = real_open(path, flags, mode);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
/* 先让真实 FS 创建 / 打开文件(获得 real_fd) */
|
||||
int real_fd = real_open(path, flags, mode);
|
||||
if (real_fd < 0) { ZVFS_HOOK_LEAVE(); return -1; }
|
||||
|
||||
ret = zvfs_open_impl(real_fd, path, flags, mode);
|
||||
if (ret < 0) {
|
||||
int saved = errno;
|
||||
real_close(real_fd);
|
||||
errno = saved;
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int open64(const char *path, int flags, ...)
|
||||
{
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap; va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
return open(path, flags | O_LARGEFILE, mode);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* openat */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
openat(int dirfd, const char *path, int flags, ...)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap; va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
/* 解析绝对路径判断是否属于 zvfs */
|
||||
char abspath[PATH_MAX];
|
||||
if (zvfs_resolve_atpath(dirfd, path, abspath, sizeof(abspath)) < 0) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
int ret;
|
||||
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_path(abspath)) {
|
||||
ret = real_openat(dirfd, path, flags, mode);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
int real_fd = real_openat(dirfd, path, flags, mode);
|
||||
if (real_fd < 0) { ZVFS_HOOK_LEAVE(); return -1; }
|
||||
|
||||
ret = zvfs_open_impl(real_fd, abspath, flags, mode);
|
||||
if (ret < 0) {
|
||||
int saved = errno;
|
||||
real_close(real_fd);
|
||||
errno = saved;
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int openat64(int dirfd, const char *path, int flags, ...)
|
||||
{
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap; va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
return openat(dirfd, path, flags | O_LARGEFILE, mode);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* creat */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int creat(const char *path, mode_t mode)
|
||||
{
|
||||
return open(path, O_CREAT | O_WRONLY | O_TRUNC, mode);
|
||||
}
|
||||
|
||||
int creat64(const char *path, mode_t mode)
|
||||
{
|
||||
return open(path, O_CREAT | O_WRONLY | O_TRUNC | O_LARGEFILE, mode);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* glibc 别名 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int __open(const char *path, int flags, ...)
|
||||
{
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap; va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
return open(path, flags, mode);
|
||||
}
|
||||
|
||||
int __open64(const char *path, int flags, ...)
|
||||
{
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap; va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
return open64(path, flags, mode);
|
||||
}
|
||||
|
||||
int __libc_open(const char *path, int flags, ...)
|
||||
{
|
||||
mode_t mode = 0;
|
||||
if (flags & O_CREAT) {
|
||||
va_list ap; va_start(ap, flags);
|
||||
mode = (mode_t)va_arg(ap, unsigned int);
|
||||
va_end(ap);
|
||||
}
|
||||
return open(path, flags, mode);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* close */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/*
|
||||
* zvfs_close_impl - zvfs fd 的关闭逻辑。
|
||||
*
|
||||
* 调用方已持有 fd_mu。函数内部会释放 fd_mu 后再处理 inode。
|
||||
*/
|
||||
static int
|
||||
zvfs_close_impl(int fd)
|
||||
{
|
||||
/* 持 fd_mu 取出 openfile,从表里摘除 */
|
||||
pthread_mutex_lock(&g_fs.fd_mu);
|
||||
struct zvfs_open_file *of = openfile_lookup(fd);
|
||||
if (!of) {
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
int new_ref = atomic_fetch_sub(&of->ref_count, 1) - 1;
|
||||
if (new_ref == 0)
|
||||
openfile_remove(fd);
|
||||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||||
|
||||
if (new_ref > 0) {
|
||||
/*
|
||||
* 还有其他 dup 出来的 fd 引用同一个 openfile,
|
||||
* 只关闭真实 fd,不动 blob 和 inode。
|
||||
*/
|
||||
return real_close(fd);
|
||||
}
|
||||
|
||||
/* ---- openfile 引用归零:关闭 blob handle --------------------- */
|
||||
struct zvfs_inode *inode = of->inode;
|
||||
struct zvfs_blob_handle *handle = of->handle;
|
||||
openfile_free(of);
|
||||
|
||||
blob_close(handle);
|
||||
|
||||
/* ---- inode ref_count-- --------------------------------------- */
|
||||
int inode_ref = atomic_fetch_sub(&inode->ref_count, 1) - 1;
|
||||
|
||||
if (inode_ref == 0) {
|
||||
/*
|
||||
* 最后一个 fd 关闭了这个 inode。
|
||||
* 若 deleted:执行延迟 blob_delete。
|
||||
*/
|
||||
bool do_delete = false;
|
||||
pthread_mutex_lock(&inode->mu);
|
||||
do_delete = inode->deleted;
|
||||
pthread_mutex_unlock(&inode->mu);
|
||||
|
||||
if (do_delete)
|
||||
blob_delete(inode->blob_id);
|
||||
|
||||
pthread_mutex_lock(&g_fs.inode_mu);
|
||||
inode_remove(inode->blob_id);
|
||||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||||
|
||||
/* path_cache 在 unlink 时已经摘除(deleted=true 路径)
|
||||
* 或在此处还需摘除(正常关闭最后一个 fd)*/
|
||||
if (!do_delete) {
|
||||
/* 正常关闭:path 留着,只有 inode 的引用归零时清缓存 */
|
||||
/* 注意:path_cache 里的指针指向这个即将释放的 inode,
|
||||
* 所以必须把 path_cache 条目也清掉,否则成为悬空指针 */
|
||||
pthread_mutex_lock(&g_fs.path_mu);
|
||||
/* 遍历找到所有指向这个 inode 的 path entry 并移除
|
||||
* (一个 inode 对应一个 path,hardlink 暂不支持)*/
|
||||
struct zvfs_path_entry *pe, *tmp; (void)tmp;
|
||||
HASH_ITER(hh, g_fs.path_cache, pe, tmp) {
|
||||
if (pe->inode == inode) {
|
||||
HASH_DEL(g_fs.path_cache, pe);
|
||||
free(pe->path);
|
||||
free(pe);
|
||||
break; /* 一对一关系,找到即退 */
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&g_fs.path_mu);
|
||||
}
|
||||
|
||||
inode_free(inode);
|
||||
}
|
||||
|
||||
return real_close(fd);
|
||||
}
|
||||
|
||||
int
|
||||
close(int fd)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
int ret;
|
||||
int is_zvfs_fd = (!ZVFS_IN_HOOK() && zvfs_is_zvfs_fd(fd));
|
||||
if (!is_zvfs_fd) {
|
||||
ret = real_close(fd);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
|
||||
ret = zvfs_close_impl(fd);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __close(int fd) { return close(fd); }
|
||||
int __libc_close(int fd) { return close(fd); }
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* close_range */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
close_range(unsigned int first, unsigned int last, int flags)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
if (ZVFS_IN_HOOK()) {
|
||||
int ret = real_close_range ? real_close_range(first, last, flags)
|
||||
: (errno = ENOSYS, -1);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* 遍历范围内所有 fd,zvfs fd 单独走 zvfs_close_impl,
|
||||
* 其余统一交给 real_close_range(如果内核支持)。
|
||||
* 若内核不支持 close_range(< 5.9),逐个 close。
|
||||
*/
|
||||
int any_err = 0;
|
||||
int inited = 0;
|
||||
for (unsigned int fd = first; fd <= last; fd++) {
|
||||
if (zvfs_is_zvfs_fd((int)fd)) {
|
||||
if (!inited) {
|
||||
zvfs_ensure_init();
|
||||
inited = 1;
|
||||
}
|
||||
if (zvfs_close_impl((int)fd) < 0) any_err = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* 让内核处理剩余非 zvfs fd(CLOEXEC 等 flags 也在这里生效) */
|
||||
if (real_close_range) {
|
||||
if (real_close_range(first, last, flags) < 0 && !any_err)
|
||||
any_err = 1;
|
||||
} else {
|
||||
/* 降级:逐个 close 非 zvfs fd */
|
||||
for (unsigned int fd = first; fd <= last; fd++) {
|
||||
if (!zvfs_is_zvfs_fd((int)fd))
|
||||
real_close((int)fd);
|
||||
}
|
||||
}
|
||||
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return any_err ? -1 : 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* dup */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
dup(int oldfd)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
int is_zvfs_fd = (!ZVFS_IN_HOOK() && zvfs_is_zvfs_fd(oldfd));
|
||||
if (!is_zvfs_fd) {
|
||||
int ret = real_dup(oldfd);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* 当前版本不支持在 zvfs fd 上做 dup。
|
||||
* 先明确返回 ENOTSUP,避免暴露错误的 offset 语义。
|
||||
*/
|
||||
zvfs_ensure_init();
|
||||
errno = ENOTSUP;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* dup2 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
dup2(int oldfd, int newfd)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
int is_zvfs_fd = (!ZVFS_IN_HOOK() && zvfs_is_zvfs_fd(oldfd));
|
||||
if (!is_zvfs_fd) {
|
||||
int ret = real_dup2(oldfd, newfd);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* POSIX 兼容:dup2(oldfd, oldfd) 对合法 fd 直接返回 oldfd。 */
|
||||
if (oldfd == newfd) {
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return oldfd;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
errno = ENOTSUP;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* dup3 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int
|
||||
dup3(int oldfd, int newfd, int flags)
|
||||
{
|
||||
ZVFS_HOOK_ENTER();
|
||||
|
||||
int is_zvfs_fd = (!ZVFS_IN_HOOK() && zvfs_is_zvfs_fd(oldfd));
|
||||
if (!is_zvfs_fd) {
|
||||
int ret = real_dup3(oldfd, newfd, flags);
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (oldfd == newfd) {
|
||||
errno = EINVAL;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
|
||||
zvfs_ensure_init();
|
||||
errno = ENOTSUP;
|
||||
ZVFS_HOOK_LEAVE();
|
||||
return -1;
|
||||
}
|
||||
Reference in New Issue
Block a user