1558 lines
45 KiB
C
1558 lines
45 KiB
C
#ifndef _GNU_SOURCE
|
||
#define _GNU_SOURCE
|
||
#endif
|
||
#include "zvfs_hook_fd.h"
|
||
#include "zvfs_hook_init.h"
|
||
#include "zvfs_hook_reentrant.h"
|
||
#include "fs/zvfs.h"
|
||
#include "fs/zvfs_inode.h"
|
||
#include "fs/zvfs_path_entry.h"
|
||
#include "fs/zvfs_open_file.h"
|
||
#include "spdk_engine/io_engine.h"
|
||
|
||
#include <stdarg.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <errno.h>
|
||
#include <unistd.h>
|
||
#include <limits.h>
|
||
#include <pthread.h>
|
||
#include <stdio.h>
|
||
|
||
#define zvfs_debug_open_log(...) ((void)0)
|
||
|
||
/* close 路径辅助:在文件后半段实现。 */
|
||
static int zvfs_detach_fd_mapping(int fd, int do_sync_md);
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* 内部:路径判定辅助 */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
/**
|
||
* openat 到达符号链接之后跳转到 /zvfs 下,导致捕获不了。
|
||
*
|
||
* 1. 判断路径是不是 /zvfs
|
||
* 2. 判断readpath是不是 /zvfs
|
||
* 3. 如果O_CREATE并且目标不存在,realpath什么也拿不到。先解析父路径,再拼接看是不是落在 /zvfs
|
||
*/
|
||
static int
|
||
zvfs_classify_path(const char *abspath, int may_create,
|
||
char *normalized_out, size_t out_size)
|
||
{
|
||
char resolved[PATH_MAX];
|
||
char tmp[PATH_MAX];
|
||
char parent[PATH_MAX];
|
||
char candidate[PATH_MAX];
|
||
const char *name;
|
||
char *slash;
|
||
int n;
|
||
|
||
if (!abspath || !normalized_out || out_size == 0) {
|
||
return 0;
|
||
}
|
||
|
||
strncpy(normalized_out, abspath, out_size);
|
||
normalized_out[out_size - 1] = '\0';
|
||
|
||
if (zvfs_is_zvfs_path(abspath)) {
|
||
return 1;
|
||
}
|
||
|
||
if (realpath(abspath, resolved) != NULL) {
|
||
if (zvfs_is_zvfs_path(resolved)) {
|
||
strncpy(normalized_out, resolved, out_size);
|
||
normalized_out[out_size - 1] = '\0';
|
||
return 1;
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
if (!may_create) {
|
||
return 0;
|
||
}
|
||
|
||
strncpy(tmp, abspath, sizeof(tmp));
|
||
tmp[sizeof(tmp) - 1] = '\0';
|
||
slash = strrchr(tmp, '/');
|
||
if (!slash) {
|
||
return 0;
|
||
}
|
||
|
||
name = slash + 1;
|
||
if (*name == '\0') {
|
||
return 0;
|
||
}
|
||
|
||
if (slash == tmp) {
|
||
strcpy(parent, "/");
|
||
} else {
|
||
*slash = '\0';
|
||
strncpy(parent, tmp, sizeof(parent));
|
||
parent[sizeof(parent) - 1] = '\0';
|
||
}
|
||
|
||
if (realpath(parent, resolved) == NULL) {
|
||
return 0;
|
||
}
|
||
|
||
n = snprintf(candidate, sizeof(candidate), "%s/%s", resolved, name);
|
||
if (n <= 0 || (size_t)n >= sizeof(candidate)) {
|
||
return 0;
|
||
}
|
||
|
||
if (!zvfs_is_zvfs_path(candidate)) {
|
||
return 0;
|
||
}
|
||
|
||
strncpy(normalized_out, candidate, out_size);
|
||
normalized_out[out_size - 1] = '\0';
|
||
return 1;
|
||
}
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* 内部:fopen 模式解析 */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
static int
|
||
zvfs_parse_fopen_mode(const char *mode, int extra_flags, int *flags_out, mode_t *create_mode_out)
|
||
{
|
||
int flags = 0;
|
||
int plus = 0;
|
||
int excl = 0;
|
||
int cloexec = 0;
|
||
const char *p;
|
||
|
||
if (!mode || !*mode || !flags_out || !create_mode_out) {
|
||
errno = EINVAL;
|
||
return -1;
|
||
}
|
||
|
||
for (p = mode + 1; *p && *p != ','; ++p) {
|
||
if (*p == '+') plus = 1;
|
||
else if (*p == 'x') excl = 1;
|
||
else if (*p == 'e') cloexec = 1;
|
||
}
|
||
|
||
switch (mode[0]) {
|
||
case 'r':
|
||
flags = plus ? O_RDWR : O_RDONLY;
|
||
break;
|
||
case 'w':
|
||
flags = (plus ? O_RDWR : O_WRONLY) | O_CREAT | O_TRUNC;
|
||
break;
|
||
case 'a':
|
||
flags = (plus ? O_RDWR : O_WRONLY) | O_CREAT | O_APPEND;
|
||
break;
|
||
default:
|
||
errno = EINVAL;
|
||
return -1;
|
||
}
|
||
|
||
if (excl) {
|
||
flags |= O_EXCL;
|
||
}
|
||
if (cloexec) {
|
||
flags |= O_CLOEXEC;
|
||
}
|
||
flags |= extra_flags;
|
||
|
||
*flags_out = flags;
|
||
*create_mode_out = 0666;
|
||
return 0;
|
||
}
|
||
|
||
static void
|
||
zvfs_sanitize_fdopen_mode(const char *mode, char out[4])
|
||
{
|
||
int i = 0;
|
||
int plus = 0;
|
||
int binary = 0;
|
||
const char *p;
|
||
|
||
out[0] = 'r';
|
||
out[1] = '\0';
|
||
|
||
if (!mode || !*mode) {
|
||
return;
|
||
}
|
||
|
||
for (p = mode + 1; *p && *p != ','; ++p) {
|
||
if (*p == '+') plus = 1;
|
||
else if (*p == 'b') binary = 1;
|
||
}
|
||
|
||
out[i++] = mode[0];
|
||
if (binary && i < 3) out[i++] = 'b';
|
||
if (plus && i < 3) out[i++] = '+';
|
||
out[i] = '\0';
|
||
}
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* 内部:open 的核心逻辑(路径已解析为绝对路径) */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
/**
|
||
* zvfs_open_impl - 对一个确认属于 zvfs 的绝对路径执行 open。
|
||
*
|
||
* real_fd:已经由 real_open* 打开的真实 fd(用于 xattr 读写 + ftruncate)。
|
||
* flags :open 时传入的 flags。
|
||
* mode :O_CREAT 时的权限。
|
||
*
|
||
* 成功返回 real_fd(即用户拿到的 fd),失败返回 -1(errno 已设置),
|
||
* 失败时调用方负责 real_close(real_fd)。
|
||
*/
|
||
static int
|
||
zvfs_open_impl(int real_fd, const char *abspath, int flags, mode_t mode)
|
||
{
|
||
struct zvfs_inode *inode = NULL;
|
||
uint64_t blob_id = 0;
|
||
uint64_t handle_id = 0;
|
||
int create_new = 0;
|
||
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open_impl enter real_fd=%d path=%s flags=0x%x mode=%#o",
|
||
real_fd, zvfs_dbg_str(abspath), flags, (unsigned)mode);
|
||
|
||
if (flags & O_CREAT) {
|
||
/*
|
||
* O_CREAT does not imply the file is newly created.
|
||
* fio, for example, may open an existing file with O_CREAT again
|
||
* during the worker phase. Only create a new blob when the backing
|
||
* file does not already carry a ZVFS blob_id xattr.
|
||
*/
|
||
if (zvfs_xattr_read_blob_id(real_fd, &blob_id) == 0) {
|
||
create_new = 0;
|
||
} else if (errno == ENODATA
|
||
#ifdef ENOATTR
|
||
|| errno == ENOATTR
|
||
#endif
|
||
) {
|
||
create_new = 1;
|
||
blob_id = 0;
|
||
} else {
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open_impl xattr probe fail errno=%d(%s)",
|
||
errno, strerror(errno));
|
||
goto fail;
|
||
}
|
||
}
|
||
|
||
if (create_new) {
|
||
/* ---- 创建路径 -------------------------------------------- */
|
||
|
||
/* 1. 创建 blob */
|
||
if (blob_create(0, flags, &blob_id, &handle_id) != 0) {
|
||
int saved = errno;
|
||
if (saved == 0) saved = EIO;
|
||
fprintf(stderr,
|
||
"[zvfs] create blob failed path=%s flags=0x%x errno=%d(%s)\n",
|
||
abspath, flags, saved, strerror(saved));
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"create branch blob_create fail errno=%d(%s)",
|
||
saved, strerror(saved));
|
||
errno = saved;
|
||
goto fail;
|
||
}
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"create branch blob_create ok blob_id=%lu handle_id=%lu",
|
||
(unsigned long)blob_id, (unsigned long)handle_id);
|
||
|
||
/* 2. 把 blob_id 写入真实文件的 xattr */
|
||
if (zvfs_xattr_write_blob_id(real_fd, blob_id) < 0) {
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"create branch xattr_write fail errno=%d(%s)",
|
||
errno, strerror(errno));
|
||
goto fail;
|
||
}
|
||
zvfs_debug_open_log(abspath, NULL, "create branch xattr_write ok");
|
||
|
||
/* 3. logical_size = 0,让 st_size 也为 0 */
|
||
if (real_ftruncate(real_fd, 0) < 0) {
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"create branch real_ftruncate(0) fail errno=%d(%s)",
|
||
errno, strerror(errno));
|
||
goto fail;
|
||
}
|
||
zvfs_debug_open_log(abspath, NULL, "create branch real_ftruncate(0) ok");
|
||
|
||
/* 4. 分配 inode */
|
||
inode = inode_alloc(blob_id, mode ? mode : 0666, ZVFS_ITYPE_FILE);
|
||
if (!inode) {
|
||
errno = ENOMEM;
|
||
zvfs_debug_open_log(abspath, NULL, "create branch inode_alloc fail ENOMEM");
|
||
goto fail;
|
||
}
|
||
|
||
/* 5. 插入全局表 */
|
||
pthread_mutex_lock(&g_fs.inode_mu);
|
||
inode_insert(inode);
|
||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||
|
||
/* 6. 插入 path_cache */
|
||
pthread_mutex_lock(&g_fs.path_mu);
|
||
path_cache_insert(abspath, inode);
|
||
pthread_mutex_unlock(&g_fs.path_mu);
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"create branch inode/path_cache inserted logical_size=%lu",
|
||
(unsigned long)inode->logical_size);
|
||
|
||
} else {
|
||
/* ---- 打开已有文件路径 ------------------------------------- */
|
||
|
||
/* 1. 先查 path_cache,命中说明另一个 fd 已经打开过 */
|
||
pthread_mutex_lock(&g_fs.path_mu);
|
||
struct zvfs_path_entry *pe = path_cache_lookup(abspath);
|
||
if (pe) inode = pe->inode;
|
||
pthread_mutex_unlock(&g_fs.path_mu);
|
||
|
||
if (inode) {
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open existing path_cache hit inode_blob_id=%lu",
|
||
(unsigned long)inode->blob_id);
|
||
/* path_cache 命中:直接用缓存的 inode,重新 blob_open */
|
||
blob_id = inode->blob_id;
|
||
if (blob_open(blob_id, flags, &handle_id) != 0) {
|
||
if (errno == 0) errno = EIO;
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open existing path_cache-hit blob_open fail errno=%d(%s)",
|
||
errno, strerror(errno));
|
||
goto fail;
|
||
}
|
||
/* 共享 inode,增加引用 */
|
||
atomic_fetch_add(&inode->ref_count, 1);
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open existing path_cache-hit blob_open ok handle_id=%lu",
|
||
(unsigned long)handle_id);
|
||
|
||
} else {
|
||
zvfs_debug_open_log(abspath, NULL, "open existing path_cache miss");
|
||
/* 未命中:从 xattr 读 blob_id,可能是进程首次 open */
|
||
if (zvfs_xattr_read_blob_id(real_fd, &blob_id) < 0) {
|
||
/* xattr 不存在:不是 zvfs 管理的文件,降级透传 */
|
||
return real_fd; /* 直接返回,不做任何包装 */
|
||
}
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open existing xattr_read ok blob_id=%lu",
|
||
(unsigned long)blob_id);
|
||
|
||
/* 再查 inode_table(另一个 fd 可能已经 open 但路径未缓存)*/
|
||
pthread_mutex_lock(&g_fs.inode_mu);
|
||
inode = inode_lookup(blob_id);
|
||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||
|
||
if (inode) {
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open existing inode_table hit blob_id=%lu",
|
||
(unsigned long)blob_id);
|
||
if (blob_open(blob_id, flags, &handle_id) != 0) {
|
||
if (errno == 0) errno = EIO;
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open existing inode_table-hit blob_open fail errno=%d(%s)",
|
||
errno, strerror(errno));
|
||
goto fail;
|
||
}
|
||
atomic_fetch_add(&inode->ref_count, 1);
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open existing inode_table-hit blob_open ok handle_id=%lu",
|
||
(unsigned long)handle_id);
|
||
} else {
|
||
/* 全新 inode:需从真实文件 stat 获取 mode/size */
|
||
struct stat st;
|
||
if (zvfs_real_fstat(real_fd, &st) < 0) {
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open existing fstat fail errno=%d(%s)",
|
||
errno, strerror(errno));
|
||
goto fail;
|
||
}
|
||
|
||
inode = inode_alloc(blob_id, st.st_mode, ZVFS_ITYPE_FILE);
|
||
if (!inode) {
|
||
errno = ENOMEM;
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open existing inode_alloc fail ENOMEM");
|
||
goto fail;
|
||
}
|
||
inode->logical_size = (uint64_t)st.st_size;
|
||
|
||
pthread_mutex_lock(&g_fs.inode_mu);
|
||
inode_insert(inode);
|
||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||
|
||
pthread_mutex_lock(&g_fs.path_mu);
|
||
path_cache_insert(abspath, inode);
|
||
pthread_mutex_unlock(&g_fs.path_mu);
|
||
if (blob_open(blob_id, flags, &handle_id) != 0) {
|
||
if (errno == 0) errno = EIO;
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open existing new-inode blob_open fail errno=%d(%s)",
|
||
errno, strerror(errno));
|
||
goto fail;
|
||
}
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open existing new-inode ready handle_id=%lu logical_size=%lu",
|
||
(unsigned long)handle_id,
|
||
(unsigned long)inode->logical_size);
|
||
}
|
||
}
|
||
}
|
||
|
||
/* ---- 分配 openfile,插入 fd_table ---------------------------- */
|
||
struct zvfs_open_file *of = openfile_alloc(real_fd, inode, handle_id);
|
||
if (!of) { errno = ENOMEM; goto fail_handle; }
|
||
|
||
pthread_mutex_lock(&g_fs.fd_mu);
|
||
openfile_insert(of);
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open_impl success real_fd=%d handle_id=%lu inode_blob_id=%lu",
|
||
real_fd,
|
||
(unsigned long)handle_id,
|
||
(unsigned long)(inode ? inode->blob_id : 0));
|
||
|
||
return real_fd;
|
||
|
||
fail_handle:
|
||
if (handle_id != 0) {
|
||
blob_close(handle_id);
|
||
}
|
||
fail:
|
||
zvfs_debug_open_log(abspath, NULL,
|
||
"open_impl fail errno=%d(%s) real_fd=%d",
|
||
errno, strerror(errno), real_fd);
|
||
/* inode 若刚分配(ref_count==1)需要回滚 */
|
||
if (inode && atomic_load(&inode->ref_count) == 1) {
|
||
pthread_mutex_lock(&g_fs.inode_mu);
|
||
inode_remove(inode->blob_id);
|
||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||
pthread_mutex_lock(&g_fs.path_mu);
|
||
path_cache_remove(abspath);
|
||
pthread_mutex_unlock(&g_fs.path_mu);
|
||
inode_free(inode);
|
||
}
|
||
return -1;
|
||
}
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* open */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
int
|
||
open(const char *path, int flags, ...)
|
||
{
|
||
ZVFS_HOOK_ENTER();
|
||
|
||
char abspath[PATH_MAX];
|
||
char normpath[PATH_MAX];
|
||
abspath[0] = '\0';
|
||
normpath[0] = '\0';
|
||
int is_zvfs_path = 0;
|
||
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap;
|
||
va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
|
||
if (zvfs_resolve_atpath(AT_FDCWD, path, abspath, sizeof(abspath)) == 0) {
|
||
is_zvfs_path = zvfs_classify_path(abspath, (flags & O_CREAT) != 0,
|
||
normpath, sizeof(normpath));
|
||
zvfs_debug_open_log(path, abspath,
|
||
"open resolve ok path=%s abspath=%s norm=%s flags=0x%x is_zvfs=%d",
|
||
zvfs_dbg_str(path), zvfs_dbg_str(abspath),
|
||
zvfs_dbg_str(normpath), flags, is_zvfs_path);
|
||
} else {
|
||
zvfs_debug_open_log(path, NULL,
|
||
"open resolve fail path=%s flags=0x%x errno=%d(%s)",
|
||
zvfs_dbg_str(path), flags, errno, strerror(errno));
|
||
}
|
||
|
||
int ret;
|
||
if (ZVFS_IN_HOOK() || !is_zvfs_path) {
|
||
zvfs_debug_open_log(path, abspath,
|
||
"open passthrough reason=%s path=%s flags=0x%x",
|
||
ZVFS_IN_HOOK() ? "reentrant" : "non-zvfs",
|
||
zvfs_dbg_str(path), flags);
|
||
ret = real_open(path, flags, mode);
|
||
zvfs_debug_open_log(path, abspath,
|
||
"open passthrough ret=%d errno=%d(%s)",
|
||
ret, (ret < 0) ? errno : 0, (ret < 0) ? strerror(errno) : "OK");
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
zvfs_ensure_init();
|
||
|
||
/* 先让真实 FS 创建 / 打开文件(获得 real_fd) */
|
||
int real_fd = real_open(path, flags, mode);
|
||
if (real_fd < 0) {
|
||
zvfs_debug_open_log(path, abspath,
|
||
"open real_open fail path=%s flags=0x%x errno=%d(%s)",
|
||
zvfs_dbg_str(path), flags, errno, strerror(errno));
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
zvfs_debug_open_log(path, abspath,
|
||
"open real_open ok real_fd=%d path=%s norm=%s",
|
||
real_fd, zvfs_dbg_str(path), zvfs_dbg_str(normpath));
|
||
|
||
ret = zvfs_open_impl(real_fd, normpath, flags, mode);
|
||
if (ret < 0) {
|
||
int saved = errno;
|
||
real_close(real_fd);
|
||
errno = saved;
|
||
zvfs_debug_open_log(path, abspath,
|
||
"open zvfs_open_impl fail real_fd=%d errno=%d(%s)",
|
||
real_fd, saved, strerror(saved));
|
||
} else {
|
||
zvfs_debug_open_log(path, abspath,
|
||
"open zvfs_open_impl success fd=%d", ret);
|
||
}
|
||
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
int open64(const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return open(path, flags | O_LARGEFILE, mode);
|
||
}
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* openat */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
int
|
||
openat(int dirfd, const char *path, int flags, ...)
|
||
{
|
||
ZVFS_HOOK_ENTER();
|
||
|
||
char normpath[PATH_MAX];
|
||
char abspath[PATH_MAX];
|
||
normpath[0] = '\0';
|
||
abspath[0] = '\0';
|
||
int is_zvfs_path = 0;
|
||
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
|
||
/* 解析绝对路径判断是否属于 zvfs */
|
||
if (zvfs_resolve_atpath(dirfd, path, abspath, sizeof(abspath)) < 0) {
|
||
zvfs_debug_open_log(path, NULL,
|
||
"openat resolve fail dirfd=%d path=%s flags=0x%x errno=%d(%s)",
|
||
dirfd, zvfs_dbg_str(path), flags, errno, strerror(errno));
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
is_zvfs_path = zvfs_classify_path(abspath, (flags & O_CREAT) != 0,
|
||
normpath, sizeof(normpath));
|
||
zvfs_debug_open_log(path, abspath,
|
||
"openat resolve ok dirfd=%d path=%s abspath=%s norm=%s flags=0x%x is_zvfs=%d",
|
||
dirfd, zvfs_dbg_str(path), zvfs_dbg_str(abspath),
|
||
zvfs_dbg_str(normpath), flags, is_zvfs_path);
|
||
|
||
int ret;
|
||
if (ZVFS_IN_HOOK() || !is_zvfs_path) {
|
||
zvfs_debug_open_log(path, abspath,
|
||
"openat passthrough reason=%s dirfd=%d path=%s flags=0x%x",
|
||
ZVFS_IN_HOOK() ? "reentrant" : "non-zvfs",
|
||
dirfd, zvfs_dbg_str(path), flags);
|
||
ret = real_openat(dirfd, path, flags, mode);
|
||
zvfs_debug_open_log(path, abspath,
|
||
"openat passthrough ret=%d errno=%d(%s)",
|
||
ret, (ret < 0) ? errno : 0, (ret < 0) ? strerror(errno) : "OK");
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
zvfs_ensure_init();
|
||
|
||
int real_fd = real_openat(dirfd, path, flags, mode);
|
||
if (real_fd < 0) {
|
||
zvfs_debug_open_log(path, abspath,
|
||
"openat real_openat fail dirfd=%d path=%s flags=0x%x errno=%d(%s)",
|
||
dirfd, zvfs_dbg_str(path), flags, errno, strerror(errno));
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
zvfs_debug_open_log(path, abspath,
|
||
"openat real_openat ok real_fd=%d dirfd=%d path=%s norm=%s",
|
||
real_fd, dirfd, zvfs_dbg_str(path), zvfs_dbg_str(normpath));
|
||
|
||
ret = zvfs_open_impl(real_fd, normpath, flags, mode);
|
||
if (ret < 0) {
|
||
int saved = errno;
|
||
real_close(real_fd);
|
||
errno = saved;
|
||
zvfs_debug_open_log(path, abspath,
|
||
"openat zvfs_open_impl fail real_fd=%d errno=%d(%s)",
|
||
real_fd, saved, strerror(saved));
|
||
} else {
|
||
zvfs_debug_open_log(path, abspath,
|
||
"openat zvfs_open_impl success fd=%d", ret);
|
||
}
|
||
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
int openat64(int dirfd, const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return openat(dirfd, path, flags | O_LARGEFILE, mode);
|
||
}
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* fopen / fopen64 */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
static FILE *
|
||
zvfs_fopen_common(const char *path, const char *mode, int extra_open_flags, int use_fopen64)
|
||
{
|
||
char abspath[PATH_MAX];
|
||
char normpath[PATH_MAX];
|
||
char fdopen_mode[4];
|
||
int is_zvfs_path = 0;
|
||
int flags = 0;
|
||
mode_t create_mode = 0666;
|
||
int real_fd = -1;
|
||
FILE *fp = NULL;
|
||
|
||
if (zvfs_parse_fopen_mode(mode, extra_open_flags, &flags, &create_mode) != 0) {
|
||
if (use_fopen64 && real_fopen64) return real_fopen64(path, mode);
|
||
if (real_fopen) return real_fopen(path, mode);
|
||
errno = ENOSYS;
|
||
return NULL;
|
||
}
|
||
|
||
if (zvfs_resolve_atpath(AT_FDCWD, path, abspath, sizeof(abspath)) == 0) {
|
||
is_zvfs_path = zvfs_classify_path(abspath, (flags & O_CREAT) != 0,
|
||
normpath, sizeof(normpath));
|
||
zvfs_debug_open_log(path, abspath,
|
||
"fopen resolve ok path=%s mode=%s norm=%s flags=0x%x is_zvfs=%d",
|
||
zvfs_dbg_str(path), zvfs_dbg_str(mode),
|
||
zvfs_dbg_str(normpath), flags, is_zvfs_path);
|
||
} else {
|
||
zvfs_debug_open_log(path, NULL,
|
||
"fopen resolve fail path=%s mode=%s errno=%d(%s)",
|
||
zvfs_dbg_str(path), zvfs_dbg_str(mode), errno, strerror(errno));
|
||
}
|
||
|
||
if (ZVFS_IN_HOOK() || !is_zvfs_path) {
|
||
if (use_fopen64 && real_fopen64) return real_fopen64(path, mode);
|
||
if (real_fopen) return real_fopen(path, mode);
|
||
errno = ENOSYS;
|
||
return NULL;
|
||
}
|
||
|
||
zvfs_ensure_init();
|
||
|
||
real_fd = real_open(path, flags, create_mode);
|
||
if (real_fd < 0) {
|
||
return NULL;
|
||
}
|
||
|
||
if (zvfs_open_impl(real_fd, normpath, flags, create_mode) < 0) {
|
||
int saved = errno;
|
||
real_close(real_fd);
|
||
errno = saved;
|
||
return NULL;
|
||
}
|
||
zvfs_debug_open_log(path, normpath,
|
||
"fopen mapped-after-open_impl fd=%d mapped=%d",
|
||
real_fd, zvfs_debug_has_fd_mapping(real_fd));
|
||
|
||
zvfs_sanitize_fdopen_mode(mode, fdopen_mode);
|
||
if (real_fdopen) {
|
||
fp = real_fdopen(real_fd, fdopen_mode);
|
||
} else {
|
||
fp = fdopen(real_fd, fdopen_mode);
|
||
}
|
||
if (!fp) {
|
||
int saved = errno;
|
||
close(real_fd);
|
||
errno = saved;
|
||
return NULL;
|
||
}
|
||
zvfs_debug_open_log(path, normpath,
|
||
"fopen mapped-after-fdopen fd=%d mapped=%d",
|
||
real_fd, zvfs_debug_has_fd_mapping(real_fd));
|
||
return fp;
|
||
}
|
||
|
||
FILE *
|
||
fopen(const char *path, const char *mode)
|
||
{
|
||
ZVFS_HOOK_ENTER();
|
||
FILE *fp = zvfs_fopen_common(path, mode, 0, 0);
|
||
ZVFS_HOOK_LEAVE();
|
||
return fp;
|
||
}
|
||
|
||
FILE *
|
||
fopen64(const char *path, const char *mode)
|
||
{
|
||
ZVFS_HOOK_ENTER();
|
||
FILE *fp = zvfs_fopen_common(path, mode, O_LARGEFILE, 1);
|
||
ZVFS_HOOK_LEAVE();
|
||
return fp;
|
||
}
|
||
|
||
int
|
||
fclose(FILE *stream)
|
||
{
|
||
ZVFS_HOOK_ENTER();
|
||
|
||
int ret;
|
||
int ret_errno = 0;
|
||
int bk_rc = 0;
|
||
int bk_errno = 0;
|
||
int fd = -1;
|
||
int need_bookkeeping = 0;
|
||
|
||
if (!stream) {
|
||
errno = EINVAL;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
if (!ZVFS_IN_HOOK()) {
|
||
fd = fileno(stream);
|
||
if (fd >= 0 && zvfs_is_zvfs_fd(fd)) {
|
||
need_bookkeeping = 1;
|
||
}
|
||
}
|
||
|
||
if (!real_fclose) {
|
||
errno = ENOSYS;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
if (ZVFS_IN_HOOK() || !need_bookkeeping) {
|
||
ret = real_fclose(stream);
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
zvfs_ensure_init();
|
||
|
||
ret = real_fclose(stream);
|
||
if (ret < 0) {
|
||
ret_errno = errno;
|
||
}
|
||
|
||
/*
|
||
* 无论 real_fclose 是否报错,都尝试回收 zvfs bookkeeping。
|
||
* 某些 libc 实现即使返回 EOF,也可能已经关闭了底层 fd。
|
||
*/
|
||
if (zvfs_detach_fd_mapping(fd, 1) < 0) {
|
||
bk_rc = -1;
|
||
bk_errno = errno;
|
||
}
|
||
|
||
if (ret < 0) {
|
||
errno = ret_errno;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
if (bk_rc < 0) {
|
||
errno = bk_errno;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
ZVFS_HOOK_LEAVE();
|
||
return 0;
|
||
}
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* creat */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
int creat(const char *path, mode_t mode)
|
||
{
|
||
return open(path, O_CREAT | O_WRONLY | O_TRUNC, mode);
|
||
}
|
||
|
||
int creat64(const char *path, mode_t mode)
|
||
{
|
||
return open(path, O_CREAT | O_WRONLY | O_TRUNC | O_LARGEFILE, mode);
|
||
}
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* glibc 别名 */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
int __open(const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return open(path, flags, mode);
|
||
}
|
||
|
||
int __open64(const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return open64(path, flags, mode);
|
||
}
|
||
|
||
int __openat(int dirfd, const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return openat(dirfd, path, flags, mode);
|
||
}
|
||
|
||
int __openat64(int dirfd, const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return openat64(dirfd, path, flags, mode);
|
||
}
|
||
|
||
int __libc_open(const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return open(path, flags, mode);
|
||
}
|
||
|
||
int __libc_open64(const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return open64(path, flags, mode);
|
||
}
|
||
|
||
int __libc_openat(int dirfd, const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return openat(dirfd, path, flags, mode);
|
||
}
|
||
|
||
int __libc_openat64(int dirfd, const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return openat64(dirfd, path, flags, mode);
|
||
}
|
||
|
||
int __open_2(const char *path, int flags)
|
||
{
|
||
if ((flags & O_CREAT) || ((flags & O_TMPFILE) == O_TMPFILE)) {
|
||
errno = EINVAL;
|
||
return -1;
|
||
}
|
||
zvfs_debug_open_log(path, NULL,
|
||
"__open_2 called path=%s flags=0x%x",
|
||
zvfs_dbg_str(path), flags);
|
||
return open(path, flags);
|
||
}
|
||
|
||
int __open64_2(const char *path, int flags)
|
||
{
|
||
if ((flags & O_CREAT) || ((flags & O_TMPFILE) == O_TMPFILE)) {
|
||
errno = EINVAL;
|
||
return -1;
|
||
}
|
||
return open64(path, flags);
|
||
}
|
||
|
||
int __openat_2(int dirfd, const char *path, int flags)
|
||
{
|
||
if ((flags & O_CREAT) || ((flags & O_TMPFILE) == O_TMPFILE)) {
|
||
errno = EINVAL;
|
||
return -1;
|
||
}
|
||
zvfs_debug_open_log(path, NULL,
|
||
"__openat_2 called dirfd=%d path=%s flags=0x%x",
|
||
dirfd, zvfs_dbg_str(path), flags);
|
||
return openat(dirfd, path, flags);
|
||
}
|
||
|
||
int __openat64_2(int dirfd, const char *path, int flags)
|
||
{
|
||
if ((flags & O_CREAT) || ((flags & O_TMPFILE) == O_TMPFILE)) {
|
||
errno = EINVAL;
|
||
return -1;
|
||
}
|
||
return openat64(dirfd, path, flags);
|
||
}
|
||
|
||
int __open_nocancel(const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return open(path, flags, mode);
|
||
}
|
||
|
||
int __open64_nocancel(const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return open64(path, flags, mode);
|
||
}
|
||
|
||
int __openat_nocancel(int dirfd, const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return openat(dirfd, path, flags, mode);
|
||
}
|
||
|
||
int __openat64_nocancel(int dirfd, const char *path, int flags, ...)
|
||
{
|
||
mode_t mode = 0;
|
||
if (flags & O_CREAT) {
|
||
va_list ap; va_start(ap, flags);
|
||
mode = (mode_t)va_arg(ap, unsigned int);
|
||
va_end(ap);
|
||
}
|
||
return openat64(dirfd, path, flags, mode);
|
||
}
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* close */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
/*
|
||
* zvfs_release_openfile - 释放一个 openfile 对应的 zvfs 资源。
|
||
* 这里只处理 zvfs bookkeeping,不做 real_close(fd)。
|
||
*/
|
||
static int
|
||
zvfs_release_openfile(struct zvfs_open_file *of, int do_sync_md)
|
||
{
|
||
int saved_errno = 0;
|
||
struct zvfs_inode *inode = of->inode;
|
||
uint64_t handle_id = of->handle_id;
|
||
openfile_free(of);
|
||
|
||
if (do_sync_md && handle_id != 0 && blob_sync_md(handle_id) < 0) {
|
||
saved_errno = (errno != 0) ? errno : EIO;
|
||
}
|
||
if (handle_id != 0 && blob_close(handle_id) < 0 && saved_errno == 0) {
|
||
saved_errno = (errno != 0) ? errno : EIO;
|
||
}
|
||
|
||
/* ---- inode ref_count-- --------------------------------------- */
|
||
int inode_ref = atomic_fetch_sub(&inode->ref_count, 1) - 1;
|
||
|
||
if (inode_ref == 0) {
|
||
/*
|
||
* 最后一个 fd 关闭了这个 inode。
|
||
* 若 deleted:执行延迟 blob_delete。
|
||
*/
|
||
bool do_delete = false;
|
||
pthread_mutex_lock(&inode->mu);
|
||
do_delete = inode->deleted;
|
||
pthread_mutex_unlock(&inode->mu);
|
||
|
||
if (do_delete && blob_delete(inode->blob_id) < 0 && saved_errno == 0)
|
||
saved_errno = (errno != 0) ? errno : EIO;
|
||
|
||
pthread_mutex_lock(&g_fs.inode_mu);
|
||
inode_remove(inode->blob_id);
|
||
pthread_mutex_unlock(&g_fs.inode_mu);
|
||
|
||
/* path_cache 在 unlink 时已经摘除(deleted=true 路径)
|
||
* 或在此处还需摘除(正常关闭最后一个 fd)*/
|
||
if (!do_delete) {
|
||
/* 正常关闭:path 留着,只有 inode 的引用归零时清缓存 */
|
||
/* 注意:path_cache 里的指针指向这个即将释放的 inode,
|
||
* 所以必须把 path_cache 条目也清掉,否则成为悬空指针 */
|
||
pthread_mutex_lock(&g_fs.path_mu);
|
||
/* 遍历找到所有指向这个 inode 的 path entry 并移除
|
||
* (一个 inode 对应一个 path,hardlink 暂不支持)*/
|
||
struct zvfs_path_entry *pe, *tmp; (void)tmp;
|
||
HASH_ITER(hh, g_fs.path_cache, pe, tmp) {
|
||
if (pe->inode == inode) {
|
||
HASH_DEL(g_fs.path_cache, pe);
|
||
free(pe->path);
|
||
free(pe);
|
||
break; /* 一对一关系,找到即退 */
|
||
}
|
||
}
|
||
pthread_mutex_unlock(&g_fs.path_mu);
|
||
}
|
||
|
||
inode_free(inode);
|
||
}
|
||
|
||
if (saved_errno != 0) {
|
||
errno = saved_errno;
|
||
return -1;
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
/*
|
||
* zvfs_detach_fd_mapping - 仅摘除 fd -> openfile 映射并释放 zvfs 资源。
|
||
* 不调用 real_close(fd),用于 dup2/dup3 中 newfd 旧值清理。
|
||
*/
|
||
static int
|
||
zvfs_detach_fd_mapping(int fd, int do_sync_md)
|
||
{
|
||
pthread_mutex_lock(&g_fs.fd_mu);
|
||
struct zvfs_open_file *of = openfile_lookup(fd);
|
||
if (!of) {
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
errno = EBADF;
|
||
return -1;
|
||
}
|
||
openfile_remove(fd);
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
|
||
return zvfs_release_openfile(of, do_sync_md);
|
||
}
|
||
|
||
/*
|
||
* zvfs_close_impl - close(fd) 的 zvfs 路径:
|
||
* 先做 bookkeeping,再做 real_close(fd)。
|
||
*/
|
||
static int
|
||
zvfs_close_impl(int fd)
|
||
{
|
||
int bk_rc = zvfs_detach_fd_mapping(fd, 1);
|
||
int bk_errno = (bk_rc < 0) ? errno : 0;
|
||
|
||
int rc = real_close(fd);
|
||
if (rc < 0)
|
||
return -1;
|
||
|
||
if (bk_rc < 0) {
|
||
errno = bk_errno;
|
||
return -1;
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
int
|
||
close(int fd)
|
||
{
|
||
ZVFS_HOOK_ENTER();
|
||
|
||
int ret;
|
||
int is_zvfs_fd = (!ZVFS_IN_HOOK() && zvfs_is_zvfs_fd(fd));
|
||
if (!is_zvfs_fd) {
|
||
ret = real_close(fd);
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
zvfs_ensure_init();
|
||
|
||
ret = zvfs_close_impl(fd);
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
int __close(int fd) { return close(fd); }
|
||
int __libc_close(int fd) { return close(fd); }
|
||
int __close_nocancel(int fd) { return close(fd); }
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* dup helper */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
int
|
||
zvfs_dup_attach_newfd(int oldfd, int newfd, int new_fd_flags)
|
||
{
|
||
struct zvfs_open_file *old_of, *new_of;
|
||
int fd_flags;
|
||
int rc;
|
||
int saved;
|
||
|
||
if (oldfd < 0 || newfd < 0) {
|
||
errno = EBADF;
|
||
return -1;
|
||
}
|
||
|
||
pthread_mutex_lock(&g_fs.fd_mu);
|
||
old_of = openfile_lookup(oldfd);
|
||
if (!old_of) {
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
errno = EBADF;
|
||
return -1;
|
||
}
|
||
if (openfile_lookup(newfd) != NULL) {
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
errno = EEXIST;
|
||
return -1;
|
||
}
|
||
|
||
rc = blob_add_ref(old_of->handle_id, 1);
|
||
if (rc != 0) {
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
return -1;
|
||
}
|
||
|
||
new_of = openfile_alloc(newfd, old_of->inode, old_of->handle_id);
|
||
if (!new_of) {
|
||
saved = (errno != 0) ? errno : ENOMEM;
|
||
(void)blob_close(old_of->handle_id);
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
errno = saved;
|
||
return -1;
|
||
}
|
||
|
||
fd_flags = (new_fd_flags >= 0) ? new_fd_flags : old_of->fd_flags;
|
||
new_of->fd_flags = fd_flags;
|
||
|
||
atomic_fetch_add(&old_of->inode->ref_count, 1);
|
||
openfile_insert(new_of);
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
return 0;
|
||
}
|
||
|
||
static int
|
||
zvfs_add_ref_batch_or_fallback(const uint64_t *handle_ids,
|
||
const uint32_t *ref_deltas,
|
||
uint32_t count)
|
||
{
|
||
uint32_t i;
|
||
|
||
if (count == 0)
|
||
return 0;
|
||
|
||
if (blob_add_ref_batch(handle_ids, ref_deltas, count) == 0)
|
||
return 0;
|
||
|
||
for (i = 0; i < count; i++) {
|
||
if (blob_add_ref(handle_ids[i], ref_deltas[i]) != 0)
|
||
return -1;
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
static void
|
||
zvfs_rollback_added_refs(const uint64_t *handle_ids, uint32_t count)
|
||
{
|
||
uint32_t i;
|
||
for (i = 0; i < count; i++) {
|
||
if (handle_ids[i] != 0)
|
||
(void)blob_close(handle_ids[i]);
|
||
}
|
||
}
|
||
|
||
static int
|
||
zvfs_snapshot_fd_handles(uint64_t **handle_ids_out,
|
||
uint32_t **ref_deltas_out,
|
||
uint32_t *count_out)
|
||
{
|
||
struct zvfs_open_file *of, *tmp;
|
||
uint32_t i = 0;
|
||
uint32_t count;
|
||
uint64_t *handle_ids = NULL;
|
||
uint32_t *ref_deltas = NULL;
|
||
|
||
*handle_ids_out = NULL;
|
||
*ref_deltas_out = NULL;
|
||
*count_out = 0;
|
||
|
||
pthread_mutex_lock(&g_fs.fd_mu);
|
||
count = (uint32_t)HASH_COUNT(g_fs.fd_table);
|
||
if (count == 0) {
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
return 0;
|
||
}
|
||
|
||
handle_ids = calloc(count, sizeof(*handle_ids));
|
||
ref_deltas = calloc(count, sizeof(*ref_deltas));
|
||
if (!handle_ids || !ref_deltas) {
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
free(handle_ids);
|
||
free(ref_deltas);
|
||
errno = ENOMEM;
|
||
return -1;
|
||
}
|
||
|
||
HASH_ITER(hh, g_fs.fd_table, of, tmp) {
|
||
if (i >= count)
|
||
break;
|
||
handle_ids[i] = of->handle_id;
|
||
ref_deltas[i] = 1;
|
||
i++;
|
||
}
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
|
||
*handle_ids_out = handle_ids;
|
||
*ref_deltas_out = ref_deltas;
|
||
*count_out = i;
|
||
return 0;
|
||
}
|
||
|
||
static int
|
||
zvfs_snapshot_fds_in_range(unsigned int first, unsigned int last,
|
||
int **fds_out, uint32_t *count_out)
|
||
{
|
||
struct zvfs_open_file *of, *tmp;
|
||
uint32_t cap;
|
||
uint32_t n = 0;
|
||
int *fds = NULL;
|
||
|
||
*fds_out = NULL;
|
||
*count_out = 0;
|
||
|
||
pthread_mutex_lock(&g_fs.fd_mu);
|
||
cap = (uint32_t)HASH_COUNT(g_fs.fd_table);
|
||
if (cap == 0) {
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
return 0;
|
||
}
|
||
|
||
fds = calloc(cap, sizeof(*fds));
|
||
if (!fds) {
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
errno = ENOMEM;
|
||
return -1;
|
||
}
|
||
|
||
HASH_ITER(hh, g_fs.fd_table, of, tmp) {
|
||
if (of->fd < 0) {
|
||
continue;
|
||
}
|
||
if ((unsigned int)of->fd < first || (unsigned int)of->fd > last) {
|
||
continue;
|
||
}
|
||
fds[n++] = of->fd;
|
||
}
|
||
pthread_mutex_unlock(&g_fs.fd_mu);
|
||
|
||
*fds_out = fds;
|
||
*count_out = n;
|
||
return 0;
|
||
}
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* close_range */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
int
|
||
close_range(unsigned int first, unsigned int last, int flags)
|
||
{
|
||
ZVFS_HOOK_ENTER();
|
||
|
||
if (ZVFS_IN_HOOK()) {
|
||
int ret = real_close_range ? real_close_range(first, last, flags)
|
||
: (errno = ENOSYS, -1);
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
if (first > last) {
|
||
errno = EINVAL;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
/*
|
||
* 只快照当前 zvfs fd_table 中命中的 fd,避免对 [first,last] 做
|
||
* 全范围扫描(last=UINT_MAX 时会非常慢,且旧逻辑存在回绕风险)。
|
||
*/
|
||
int any_err = 0;
|
||
int inited = 0;
|
||
int *zvfs_fds = NULL;
|
||
uint32_t zvfs_fd_count = 0;
|
||
if (zvfs_snapshot_fds_in_range(first, last, &zvfs_fds, &zvfs_fd_count) < 0) {
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
for (uint32_t i = 0; i < zvfs_fd_count; i++) {
|
||
if (!inited) {
|
||
zvfs_ensure_init();
|
||
inited = 1;
|
||
}
|
||
if (zvfs_close_impl(zvfs_fds[i]) < 0) {
|
||
any_err = 1;
|
||
}
|
||
}
|
||
free(zvfs_fds);
|
||
|
||
/* 让内核处理剩余非 zvfs fd(CLOEXEC 等 flags 也在这里生效) */
|
||
if (real_close_range) {
|
||
if (real_close_range(first, last, flags) < 0 && !any_err)
|
||
any_err = 1;
|
||
} else {
|
||
/* 降级:逐个 close 非 zvfs fd(按 open-max 做上界截断) */
|
||
unsigned int upper = last;
|
||
long open_max = sysconf(_SC_OPEN_MAX);
|
||
if (open_max > 0 && upper >= (unsigned int)open_max) {
|
||
upper = (unsigned int)open_max - 1;
|
||
}
|
||
|
||
for (unsigned int fd = first; fd <= upper; fd++) {
|
||
if (!zvfs_is_zvfs_fd((int)fd))
|
||
real_close((int)fd);
|
||
if (fd == upper)
|
||
break;
|
||
}
|
||
}
|
||
|
||
ZVFS_HOOK_LEAVE();
|
||
return any_err ? -1 : 0;
|
||
}
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* dup */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
int
|
||
dup(int oldfd)
|
||
{
|
||
ZVFS_HOOK_ENTER();
|
||
|
||
int is_zvfs_fd = (!ZVFS_IN_HOOK() && zvfs_is_zvfs_fd(oldfd));
|
||
if (!is_zvfs_fd) {
|
||
int ret = real_dup(oldfd);
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
zvfs_ensure_init();
|
||
|
||
int newfd = real_dup(oldfd);
|
||
if (newfd < 0) {
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
if (zvfs_dup_attach_newfd(oldfd, newfd, 0) < 0) {
|
||
int saved = errno;
|
||
(void)real_close(newfd);
|
||
errno = saved;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
ZVFS_HOOK_LEAVE();
|
||
return newfd;
|
||
}
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* dup2 */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
int
|
||
dup2(int oldfd, int newfd)
|
||
{
|
||
ZVFS_HOOK_ENTER();
|
||
|
||
int is_zvfs_fd = (!ZVFS_IN_HOOK() && zvfs_is_zvfs_fd(oldfd));
|
||
if (!is_zvfs_fd) {
|
||
int ret = real_dup2(oldfd, newfd);
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
/* POSIX 兼容:dup2(oldfd, oldfd) 对合法 fd 直接返回 oldfd。 */
|
||
if (oldfd == newfd) {
|
||
ZVFS_HOOK_LEAVE();
|
||
return oldfd;
|
||
}
|
||
|
||
zvfs_ensure_init();
|
||
int newfd_was_zvfs = zvfs_is_zvfs_fd(newfd);
|
||
|
||
int ret = real_dup2(oldfd, newfd);
|
||
if (ret < 0) {
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
if (newfd_was_zvfs && zvfs_detach_fd_mapping(newfd, 1) < 0) {
|
||
int saved = errno;
|
||
(void)real_close(newfd);
|
||
errno = saved;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
if (zvfs_dup_attach_newfd(oldfd, newfd, 0) < 0) {
|
||
int saved = errno;
|
||
(void)real_close(newfd);
|
||
errno = saved;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* dup3 */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
int
|
||
dup3(int oldfd, int newfd, int flags)
|
||
{
|
||
ZVFS_HOOK_ENTER();
|
||
|
||
int is_zvfs_fd = (!ZVFS_IN_HOOK() && zvfs_is_zvfs_fd(oldfd));
|
||
if (!is_zvfs_fd) {
|
||
int ret = real_dup3(oldfd, newfd, flags);
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
if (oldfd == newfd) {
|
||
errno = EINVAL;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
if ((flags & ~O_CLOEXEC) != 0) {
|
||
errno = EINVAL;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
zvfs_ensure_init();
|
||
int newfd_was_zvfs = zvfs_is_zvfs_fd(newfd);
|
||
|
||
int ret = real_dup3(oldfd, newfd, flags);
|
||
if (ret < 0) {
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
if (newfd_was_zvfs && zvfs_detach_fd_mapping(newfd, 1) < 0) {
|
||
int saved = errno;
|
||
(void)real_close(newfd);
|
||
errno = saved;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
int fd_flags = (flags & O_CLOEXEC) ? FD_CLOEXEC : 0;
|
||
if (zvfs_dup_attach_newfd(oldfd, newfd, fd_flags) < 0) {
|
||
int saved = errno;
|
||
(void)real_close(newfd);
|
||
errno = saved;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
/* ------------------------------------------------------------------ */
|
||
/* fork */
|
||
/* ------------------------------------------------------------------ */
|
||
|
||
pid_t
|
||
fork(void)
|
||
{
|
||
ZVFS_HOOK_ENTER();
|
||
|
||
if (ZVFS_IN_HOOK()) {
|
||
pid_t ret = real_fork();
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|
||
|
||
uint64_t *handle_ids = NULL;
|
||
uint32_t *ref_deltas = NULL;
|
||
uint32_t count = 0;
|
||
|
||
if (zvfs_snapshot_fd_handles(&handle_ids, &ref_deltas, &count) < 0) {
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
if (count > 0) {
|
||
zvfs_ensure_init();
|
||
if (zvfs_add_ref_batch_or_fallback(handle_ids, ref_deltas, count) < 0) {
|
||
int saved = errno;
|
||
free(handle_ids);
|
||
free(ref_deltas);
|
||
errno = saved;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
}
|
||
|
||
pid_t ret = real_fork();
|
||
if (ret < 0) {
|
||
int saved = errno;
|
||
if (count > 0)
|
||
zvfs_rollback_added_refs(handle_ids, count);
|
||
free(handle_ids);
|
||
free(ref_deltas);
|
||
errno = saved;
|
||
ZVFS_HOOK_LEAVE();
|
||
return -1;
|
||
}
|
||
|
||
free(handle_ids);
|
||
free(ref_deltas);
|
||
ZVFS_HOOK_LEAVE();
|
||
return ret;
|
||
}
|