postgres hook 测试成功

This commit is contained in:
2026-03-13 01:59:05 +00:00
parent a153ca5040
commit 544f532bf5
53 changed files with 5964 additions and 1674 deletions

View File

@@ -19,6 +19,91 @@
#include <pthread.h>
#include <stdio.h>
/* ------------------------------------------------------------------ */
/* 内部:路径判定辅助 */
/* ------------------------------------------------------------------ */
/**
* openat 到达符号链接之后跳转到 /zvfs 下,导致捕获不了。
*
* 1. 判断路径是不是 /zvfs
* 2. 判断readpath是不是 /zvfs
* 3. 如果O_CREATE并且目标不存在realpath什么也拿不到。先解析父路径再拼接看是不是落在 /zvfs
*/
static int
zvfs_classify_path(const char *abspath, int may_create,
char *normalized_out, size_t out_size)
{
char resolved[PATH_MAX];
char tmp[PATH_MAX];
char parent[PATH_MAX];
char candidate[PATH_MAX];
const char *name;
char *slash;
int n;
if (!abspath || !normalized_out || out_size == 0) {
return 0;
}
strncpy(normalized_out, abspath, out_size);
normalized_out[out_size - 1] = '\0';
if (zvfs_is_zvfs_path(abspath)) {
return 1;
}
if (realpath(abspath, resolved) != NULL) {
if (zvfs_is_zvfs_path(resolved)) {
strncpy(normalized_out, resolved, out_size);
normalized_out[out_size - 1] = '\0';
return 1;
}
return 0;
}
if (!may_create) {
return 0;
}
strncpy(tmp, abspath, sizeof(tmp));
tmp[sizeof(tmp) - 1] = '\0';
slash = strrchr(tmp, '/');
if (!slash) {
return 0;
}
name = slash + 1;
if (*name == '\0') {
return 0;
}
if (slash == tmp) {
strcpy(parent, "/");
} else {
*slash = '\0';
strncpy(parent, tmp, sizeof(parent));
parent[sizeof(parent) - 1] = '\0';
}
if (realpath(parent, resolved) == NULL) {
return 0;
}
n = snprintf(candidate, sizeof(candidate), "%s/%s", resolved, name);
if (n <= 0 || (size_t)n >= sizeof(candidate)) {
return 0;
}
if (!zvfs_is_zvfs_path(candidate)) {
return 0;
}
strncpy(normalized_out, candidate, out_size);
normalized_out[out_size - 1] = '\0';
return 1;
}
/* ------------------------------------------------------------------ */
/* 内部open 的核心逻辑(路径已解析为绝对路径) */
/* ------------------------------------------------------------------ */
@@ -36,16 +121,15 @@
static int
zvfs_open_impl(int real_fd, const char *abspath, int flags, mode_t mode)
{
struct zvfs_inode *inode = NULL;
struct zvfs_blob_handle *handle = NULL;
uint64_t blob_id = 0;
struct zvfs_inode *inode = NULL;
uint64_t blob_id = 0;
uint64_t handle_id = 0;
if (flags & O_CREAT) {
/* ---- 创建路径 -------------------------------------------- */
/* 1. 创建 blob */
handle = blob_create(0);
if (!handle) {
if (blob_create(0, &blob_id, &handle_id) != 0) {
int saved = errno;
if (saved == 0) saved = EIO;
fprintf(stderr,
@@ -54,7 +138,6 @@ zvfs_open_impl(int real_fd, const char *abspath, int flags, mode_t mode)
errno = saved;
goto fail;
}
blob_id = handle->id;
/* 2. 把 blob_id 写入真实文件的 xattr */
if (zvfs_xattr_write_blob_id(real_fd, blob_id) < 0) goto fail;
@@ -88,8 +171,10 @@ zvfs_open_impl(int real_fd, const char *abspath, int flags, mode_t mode)
if (inode) {
/* path_cache 命中:直接用缓存的 inode重新 blob_open */
blob_id = inode->blob_id;
handle = blob_open(blob_id);
if (!handle) { if (errno == 0) errno = EIO; goto fail; }
if (blob_open(blob_id, &handle_id) != 0) {
if (errno == 0) errno = EIO;
goto fail;
}
/* 共享 inode增加引用 */
atomic_fetch_add(&inode->ref_count, 1);
@@ -106,6 +191,10 @@ zvfs_open_impl(int real_fd, const char *abspath, int flags, mode_t mode)
pthread_mutex_unlock(&g_fs.inode_mu);
if (inode) {
if (blob_open(blob_id, &handle_id) != 0) {
if (errno == 0) errno = EIO;
goto fail;
}
atomic_fetch_add(&inode->ref_count, 1);
} else {
/* 全新 inode需从真实文件 stat 获取 mode/size */
@@ -123,15 +212,16 @@ zvfs_open_impl(int real_fd, const char *abspath, int flags, mode_t mode)
pthread_mutex_lock(&g_fs.path_mu);
path_cache_insert(abspath, inode);
pthread_mutex_unlock(&g_fs.path_mu);
if (blob_open(blob_id, &handle_id) != 0) {
if (errno == 0) errno = EIO;
goto fail;
}
}
handle = blob_open(blob_id);
if (!handle) { if (errno == 0) errno = EIO; goto fail; }
}
}
/* ---- 分配 openfile插入 fd_table ---------------------------- */
struct zvfs_open_file *of = openfile_alloc(real_fd, inode, flags, handle);
struct zvfs_open_file *of = openfile_alloc(real_fd, inode, flags, handle_id);
if (!of) { errno = ENOMEM; goto fail_handle; }
pthread_mutex_lock(&g_fs.fd_mu);
@@ -141,7 +231,9 @@ zvfs_open_impl(int real_fd, const char *abspath, int flags, mode_t mode)
return real_fd;
fail_handle:
blob_close(handle);
if (handle_id != 0) {
blob_close(handle_id);
}
fail:
/* inode 若刚分配ref_count==1需要回滚 */
if (inode && atomic_load(&inode->ref_count) == 1) {
@@ -165,6 +257,10 @@ open(const char *path, int flags, ...)
{
ZVFS_HOOK_ENTER();
char abspath[PATH_MAX];
char normpath[PATH_MAX];
int is_zvfs_path = 0;
mode_t mode = 0;
if (flags & O_CREAT) {
va_list ap;
@@ -173,8 +269,13 @@ open(const char *path, int flags, ...)
va_end(ap);
}
if (zvfs_resolve_atpath(AT_FDCWD, path, abspath, sizeof(abspath)) == 0) {
is_zvfs_path = zvfs_classify_path(abspath, (flags & O_CREAT) != 0,
normpath, sizeof(normpath));
}
int ret;
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_path(path)) {
if (ZVFS_IN_HOOK() || !is_zvfs_path) {
ret = real_open(path, flags, mode);
ZVFS_HOOK_LEAVE();
return ret;
@@ -186,7 +287,7 @@ open(const char *path, int flags, ...)
int real_fd = real_open(path, flags, mode);
if (real_fd < 0) { ZVFS_HOOK_LEAVE(); return -1; }
ret = zvfs_open_impl(real_fd, path, flags, mode);
ret = zvfs_open_impl(real_fd, normpath, flags, mode);
if (ret < 0) {
int saved = errno;
real_close(real_fd);
@@ -217,6 +318,9 @@ openat(int dirfd, const char *path, int flags, ...)
{
ZVFS_HOOK_ENTER();
char normpath[PATH_MAX];
int is_zvfs_path = 0;
mode_t mode = 0;
if (flags & O_CREAT) {
va_list ap; va_start(ap, flags);
@@ -230,9 +334,11 @@ openat(int dirfd, const char *path, int flags, ...)
ZVFS_HOOK_LEAVE();
return -1;
}
is_zvfs_path = zvfs_classify_path(abspath, (flags & O_CREAT) != 0,
normpath, sizeof(normpath));
int ret;
if (ZVFS_IN_HOOK() || !zvfs_is_zvfs_path(abspath)) {
if (ZVFS_IN_HOOK() || !is_zvfs_path) {
ret = real_openat(dirfd, path, flags, mode);
ZVFS_HOOK_LEAVE();
return ret;
@@ -243,7 +349,7 @@ openat(int dirfd, const char *path, int flags, ...)
int real_fd = real_openat(dirfd, path, flags, mode);
if (real_fd < 0) { ZVFS_HOOK_LEAVE(); return -1; }
ret = zvfs_open_impl(real_fd, abspath, flags, mode);
ret = zvfs_open_impl(real_fd, normpath, flags, mode);
if (ret < 0) {
int saved = errno;
real_close(real_fd);
@@ -321,43 +427,23 @@ int __libc_open(const char *path, int flags, ...)
/* ------------------------------------------------------------------ */
/*
* zvfs_close_impl - zvfs fd 的关闭逻辑
*
* 调用方已持有 fd_mu。函数内部会释放 fd_mu 后再处理 inode。
* zvfs_release_openfile - 释放一个 openfile 对应的 zvfs 资源
* 这里只处理 zvfs bookkeeping不做 real_close(fd)。
*/
static int
zvfs_close_impl(int fd)
zvfs_release_openfile(struct zvfs_open_file *of, int do_sync_md)
{
/* 持 fd_mu 取出 openfile从表里摘除 */
pthread_mutex_lock(&g_fs.fd_mu);
struct zvfs_open_file *of = openfile_lookup(fd);
if (!of) {
pthread_mutex_unlock(&g_fs.fd_mu);
errno = EBADF;
return -1;
}
int new_ref = atomic_fetch_sub(&of->ref_count, 1) - 1;
if (new_ref == 0)
openfile_remove(fd);
pthread_mutex_unlock(&g_fs.fd_mu);
if (new_ref > 0) {
/*
* 还有其他 dup 出来的 fd 引用同一个 openfile
* 只关闭真实 fd不动 blob 和 inode。
*/
return real_close(fd);
}
/* ---- openfile 引用归零:先刷 metadata再关闭 blob handle ------ */
struct zvfs_inode *inode = of->inode;
struct zvfs_blob_handle *handle = of->handle;
int sync_failed = 0;
int saved_errno = 0;
struct zvfs_inode *inode = of->inode;
uint64_t handle_id = of->handle_id;
openfile_free(of);
if (blob_sync_md(handle) < 0)
sync_failed = 1;
blob_close(handle);
if (do_sync_md && handle_id != 0 && blob_sync_md(handle_id) < 0) {
saved_errno = (errno != 0) ? errno : EIO;
}
if (handle_id != 0 && blob_close(handle_id) < 0 && saved_errno == 0) {
saved_errno = (errno != 0) ? errno : EIO;
}
/* ---- inode ref_count-- --------------------------------------- */
int inode_ref = atomic_fetch_sub(&inode->ref_count, 1) - 1;
@@ -372,8 +458,8 @@ zvfs_close_impl(int fd)
do_delete = inode->deleted;
pthread_mutex_unlock(&inode->mu);
if (do_delete)
blob_delete(inode->blob_id);
if (do_delete && blob_delete(inode->blob_id) < 0 && saved_errno == 0)
saved_errno = (errno != 0) ? errno : EIO;
pthread_mutex_lock(&g_fs.inode_mu);
inode_remove(inode->blob_id);
@@ -403,13 +489,52 @@ zvfs_close_impl(int fd)
inode_free(inode);
}
if (saved_errno != 0) {
errno = saved_errno;
return -1;
}
return 0;
}
/*
* zvfs_detach_fd_mapping - 仅摘除 fd -> openfile 映射并释放 zvfs 资源。
* 不调用 real_close(fd),用于 dup2/dup3 中 newfd 旧值清理。
*/
static int
zvfs_detach_fd_mapping(int fd, int do_sync_md)
{
pthread_mutex_lock(&g_fs.fd_mu);
struct zvfs_open_file *of = openfile_lookup(fd);
if (!of) {
pthread_mutex_unlock(&g_fs.fd_mu);
errno = EBADF;
return -1;
}
openfile_remove(fd);
pthread_mutex_unlock(&g_fs.fd_mu);
return zvfs_release_openfile(of, do_sync_md);
}
/*
* zvfs_close_impl - close(fd) 的 zvfs 路径:
* 先做 bookkeeping再做 real_close(fd)。
*/
static int
zvfs_close_impl(int fd)
{
int bk_rc = zvfs_detach_fd_mapping(fd, 1);
int bk_errno = (bk_rc < 0) ? errno : 0;
int rc = real_close(fd);
if (rc < 0)
return -1;
if (sync_failed) {
errno = EIO;
if (bk_rc < 0) {
errno = bk_errno;
return -1;
}
return 0;
}
@@ -436,6 +561,180 @@ close(int fd)
int __close(int fd) { return close(fd); }
int __libc_close(int fd) { return close(fd); }
/* ------------------------------------------------------------------ */
/* dup helper */
/* ------------------------------------------------------------------ */
int
zvfs_dup_attach_newfd(int oldfd, int newfd, int new_fd_flags)
{
struct zvfs_open_file *old_of, *new_of;
int fd_flags;
int rc;
int saved;
if (oldfd < 0 || newfd < 0) {
errno = EBADF;
return -1;
}
pthread_mutex_lock(&g_fs.fd_mu);
old_of = openfile_lookup(oldfd);
if (!old_of) {
pthread_mutex_unlock(&g_fs.fd_mu);
errno = EBADF;
return -1;
}
if (openfile_lookup(newfd) != NULL) {
pthread_mutex_unlock(&g_fs.fd_mu);
errno = EEXIST;
return -1;
}
rc = blob_add_ref(old_of->handle_id, 1);
if (rc != 0) {
pthread_mutex_unlock(&g_fs.fd_mu);
return -1;
}
new_of = openfile_alloc(newfd, old_of->inode, old_of->flags, old_of->handle_id);
if (!new_of) {
saved = (errno != 0) ? errno : ENOMEM;
(void)blob_close(old_of->handle_id);
pthread_mutex_unlock(&g_fs.fd_mu);
errno = saved;
return -1;
}
new_of->offset = old_of->offset;
fd_flags = (new_fd_flags >= 0) ? new_fd_flags : old_of->fd_flags;
new_of->fd_flags = fd_flags;
atomic_fetch_add(&old_of->inode->ref_count, 1);
openfile_insert(new_of);
pthread_mutex_unlock(&g_fs.fd_mu);
return 0;
}
static int
zvfs_add_ref_batch_or_fallback(const uint64_t *handle_ids,
const uint32_t *ref_deltas,
uint32_t count)
{
uint32_t i;
if (count == 0)
return 0;
if (blob_add_ref_batch(handle_ids, ref_deltas, count) == 0)
return 0;
for (i = 0; i < count; i++) {
if (blob_add_ref(handle_ids[i], ref_deltas[i]) != 0)
return -1;
}
return 0;
}
static void
zvfs_rollback_added_refs(const uint64_t *handle_ids, uint32_t count)
{
uint32_t i;
for (i = 0; i < count; i++) {
if (handle_ids[i] != 0)
(void)blob_close(handle_ids[i]);
}
}
static int
zvfs_snapshot_fd_handles(uint64_t **handle_ids_out,
uint32_t **ref_deltas_out,
uint32_t *count_out)
{
struct zvfs_open_file *of, *tmp;
uint32_t i = 0;
uint32_t count;
uint64_t *handle_ids = NULL;
uint32_t *ref_deltas = NULL;
*handle_ids_out = NULL;
*ref_deltas_out = NULL;
*count_out = 0;
pthread_mutex_lock(&g_fs.fd_mu);
count = (uint32_t)HASH_COUNT(g_fs.fd_table);
if (count == 0) {
pthread_mutex_unlock(&g_fs.fd_mu);
return 0;
}
handle_ids = calloc(count, sizeof(*handle_ids));
ref_deltas = calloc(count, sizeof(*ref_deltas));
if (!handle_ids || !ref_deltas) {
pthread_mutex_unlock(&g_fs.fd_mu);
free(handle_ids);
free(ref_deltas);
errno = ENOMEM;
return -1;
}
HASH_ITER(hh, g_fs.fd_table, of, tmp) {
if (i >= count)
break;
handle_ids[i] = of->handle_id;
ref_deltas[i] = 1;
i++;
}
pthread_mutex_unlock(&g_fs.fd_mu);
*handle_ids_out = handle_ids;
*ref_deltas_out = ref_deltas;
*count_out = i;
return 0;
}
static int
zvfs_snapshot_fds_in_range(unsigned int first, unsigned int last,
int **fds_out, uint32_t *count_out)
{
struct zvfs_open_file *of, *tmp;
uint32_t cap;
uint32_t n = 0;
int *fds = NULL;
*fds_out = NULL;
*count_out = 0;
pthread_mutex_lock(&g_fs.fd_mu);
cap = (uint32_t)HASH_COUNT(g_fs.fd_table);
if (cap == 0) {
pthread_mutex_unlock(&g_fs.fd_mu);
return 0;
}
fds = calloc(cap, sizeof(*fds));
if (!fds) {
pthread_mutex_unlock(&g_fs.fd_mu);
errno = ENOMEM;
return -1;
}
HASH_ITER(hh, g_fs.fd_table, of, tmp) {
if (of->fd < 0) {
continue;
}
if ((unsigned int)of->fd < first || (unsigned int)of->fd > last) {
continue;
}
fds[n++] = of->fd;
}
pthread_mutex_unlock(&g_fs.fd_mu);
*fds_out = fds;
*count_out = n;
return 0;
}
/* ------------------------------------------------------------------ */
/* close_range */
/* ------------------------------------------------------------------ */
@@ -452,32 +751,53 @@ close_range(unsigned int first, unsigned int last, int flags)
return ret;
}
if (first > last) {
errno = EINVAL;
ZVFS_HOOK_LEAVE();
return -1;
}
/*
* 遍历范围内所有 fdzvfs fd 单独走 zvfs_close_impl
* 其余统一交给 real_close_range如果内核支持)。
* 若内核不支持 close_range< 5.9),逐个 close。
* 只快照当前 zvfs fd_table 中命中的 fd避免对 [first,last] 做
* 全范围扫描last=UINT_MAX 时会非常慢,且旧逻辑存在回绕风险)。
*/
int any_err = 0;
int inited = 0;
for (unsigned int fd = first; fd <= last; fd++) {
if (zvfs_is_zvfs_fd((int)fd)) {
if (!inited) {
zvfs_ensure_init();
inited = 1;
}
if (zvfs_close_impl((int)fd) < 0) any_err = 1;
int *zvfs_fds = NULL;
uint32_t zvfs_fd_count = 0;
if (zvfs_snapshot_fds_in_range(first, last, &zvfs_fds, &zvfs_fd_count) < 0) {
ZVFS_HOOK_LEAVE();
return -1;
}
for (uint32_t i = 0; i < zvfs_fd_count; i++) {
if (!inited) {
zvfs_ensure_init();
inited = 1;
}
if (zvfs_close_impl(zvfs_fds[i]) < 0) {
any_err = 1;
}
}
free(zvfs_fds);
/* 让内核处理剩余非 zvfs fdCLOEXEC 等 flags 也在这里生效) */
if (real_close_range) {
if (real_close_range(first, last, flags) < 0 && !any_err)
any_err = 1;
} else {
/* 降级:逐个 close 非 zvfs fd */
for (unsigned int fd = first; fd <= last; fd++) {
/* 降级:逐个 close 非 zvfs fd(按 open-max 做上界截断) */
unsigned int upper = last;
long open_max = sysconf(_SC_OPEN_MAX);
if (open_max > 0 && upper >= (unsigned int)open_max) {
upper = (unsigned int)open_max - 1;
}
for (unsigned int fd = first; fd <= upper; fd++) {
if (!zvfs_is_zvfs_fd((int)fd))
real_close((int)fd);
if (fd == upper)
break;
}
}
@@ -501,14 +821,24 @@ dup(int oldfd)
return ret;
}
/*
* 当前版本不支持在 zvfs fd 上做 dup。
* 先明确返回 ENOTSUP避免暴露错误的 offset 语义。
*/
zvfs_ensure_init();
errno = ENOTSUP;
int newfd = real_dup(oldfd);
if (newfd < 0) {
ZVFS_HOOK_LEAVE();
return -1;
}
if (zvfs_dup_attach_newfd(oldfd, newfd, 0) < 0) {
int saved = errno;
(void)real_close(newfd);
errno = saved;
ZVFS_HOOK_LEAVE();
return -1;
}
ZVFS_HOOK_LEAVE();
return -1;
return newfd;
}
/* ------------------------------------------------------------------ */
@@ -534,9 +864,32 @@ dup2(int oldfd, int newfd)
}
zvfs_ensure_init();
errno = ENOTSUP;
int newfd_was_zvfs = zvfs_is_zvfs_fd(newfd);
int ret = real_dup2(oldfd, newfd);
if (ret < 0) {
ZVFS_HOOK_LEAVE();
return -1;
}
if (newfd_was_zvfs && zvfs_detach_fd_mapping(newfd, 1) < 0) {
int saved = errno;
(void)real_close(newfd);
errno = saved;
ZVFS_HOOK_LEAVE();
return -1;
}
if (zvfs_dup_attach_newfd(oldfd, newfd, 0) < 0) {
int saved = errno;
(void)real_close(newfd);
errno = saved;
ZVFS_HOOK_LEAVE();
return -1;
}
ZVFS_HOOK_LEAVE();
return -1;
return ret;
}
/* ------------------------------------------------------------------ */
@@ -561,8 +914,92 @@ dup3(int oldfd, int newfd, int flags)
return -1;
}
if ((flags & ~O_CLOEXEC) != 0) {
errno = EINVAL;
ZVFS_HOOK_LEAVE();
return -1;
}
zvfs_ensure_init();
errno = ENOTSUP;
int newfd_was_zvfs = zvfs_is_zvfs_fd(newfd);
int ret = real_dup3(oldfd, newfd, flags);
if (ret < 0) {
ZVFS_HOOK_LEAVE();
return -1;
}
if (newfd_was_zvfs && zvfs_detach_fd_mapping(newfd, 1) < 0) {
int saved = errno;
(void)real_close(newfd);
errno = saved;
ZVFS_HOOK_LEAVE();
return -1;
}
int fd_flags = (flags & O_CLOEXEC) ? FD_CLOEXEC : 0;
if (zvfs_dup_attach_newfd(oldfd, newfd, fd_flags) < 0) {
int saved = errno;
(void)real_close(newfd);
errno = saved;
ZVFS_HOOK_LEAVE();
return -1;
}
ZVFS_HOOK_LEAVE();
return -1;
return ret;
}
/* ------------------------------------------------------------------ */
/* fork */
/* ------------------------------------------------------------------ */
pid_t
fork(void)
{
ZVFS_HOOK_ENTER();
if (ZVFS_IN_HOOK()) {
pid_t ret = real_fork();
ZVFS_HOOK_LEAVE();
return ret;
}
uint64_t *handle_ids = NULL;
uint32_t *ref_deltas = NULL;
uint32_t count = 0;
if (zvfs_snapshot_fd_handles(&handle_ids, &ref_deltas, &count) < 0) {
ZVFS_HOOK_LEAVE();
return -1;
}
if (count > 0) {
zvfs_ensure_init();
if (zvfs_add_ref_batch_or_fallback(handle_ids, ref_deltas, count) < 0) {
int saved = errno;
free(handle_ids);
free(ref_deltas);
errno = saved;
ZVFS_HOOK_LEAVE();
return -1;
}
}
pid_t ret = real_fork();
if (ret < 0) {
int saved = errno;
if (count > 0)
zvfs_rollback_added_refs(handle_ids, count);
free(handle_ids);
free(ref_deltas);
errno = saved;
ZVFS_HOOK_LEAVE();
return -1;
}
free(handle_ids);
free(ref_deltas);
ZVFS_HOOK_LEAVE();
return ret;
}