This commit is contained in:
blue-lemon0104
2026-04-07 13:35:22 +08:00
commit 0120fa9ce3
1530 changed files with 424864 additions and 0 deletions

27
pg_include/storage/backendid.h Executable file
View File

@@ -0,0 +1,27 @@
/*-------------------------------------------------------------------------
*
* backendid.h
* POSTGRES backend id communication definitions
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/backendid.h
*
*-------------------------------------------------------------------------
*/
#ifndef BACKENDID_H
#define BACKENDID_H
/* ----------------
* -cim 8/17/90
* ----------------
*/
typedef int BackendId; /* unique currently active backend identifier */
#define InvalidBackendId (-1)
extern PGDLLIMPORT BackendId MyBackendId; /* backend id of this backend */
#endif /* BACKENDID_H */

170
pg_include/storage/barrier.h Executable file
View File

@@ -0,0 +1,170 @@
/*-------------------------------------------------------------------------
*
* barrier.h
* Memory barrier operations.
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/barrier.h
*
*-------------------------------------------------------------------------
*/
#ifndef BARRIER_H
#define BARRIER_H
#include "storage/s_lock.h"
extern slock_t dummy_spinlock;
/*
* A compiler barrier need not (and preferably should not) emit any actual
* machine code, but must act as an optimization fence: the compiler must not
* reorder loads or stores to main memory around the barrier. However, the
* CPU may still reorder loads or stores at runtime, if the architecture's
* memory model permits this.
*
* A memory barrier must act as a compiler barrier, and in addition must
* guarantee that all loads and stores issued prior to the barrier are
* completed before any loads or stores issued after the barrier. Unless
* loads and stores are totally ordered (which is not the case on most
* architectures) this requires issuing some sort of memory fencing
* instruction.
*
* A read barrier must act as a compiler barrier, and in addition must
* guarantee that any loads issued prior to the barrier are completed before
* any loads issued after the barrier. Similarly, a write barrier acts
* as a compiler barrier, and also orders stores. Read and write barriers
* are thus weaker than a full memory barrier, but stronger than a compiler
* barrier. In practice, on machines with strong memory ordering, read and
* write barriers may require nothing more than a compiler barrier.
*
* For an introduction to using memory barriers within the PostgreSQL backend,
* see src/backend/storage/lmgr/README.barrier
*/
#if defined(DISABLE_BARRIERS)
/*
* Fall through to the spinlock-based implementation.
*/
#elif defined(__INTEL_COMPILER)
/*
* icc defines __GNUC__, but doesn't support gcc's inline asm syntax
*/
#define pg_memory_barrier() _mm_mfence()
#define pg_compiler_barrier() __memory_barrier()
#elif defined(__GNUC__)
/* This works on any architecture, since it's only talking to GCC itself. */
#define pg_compiler_barrier() __asm__ __volatile__("" : : : "memory")
#if defined(__i386__)
/*
* i386 does not allow loads to be reordered with other loads, or stores to be
* reordered with other stores, but a load can be performed before a subsequent
* store.
*
* "lock; addl" has worked for longer than "mfence".
*/
#define pg_memory_barrier() \
__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory")
#define pg_read_barrier() pg_compiler_barrier()
#define pg_write_barrier() pg_compiler_barrier()
#elif defined(__x86_64__) /* 64 bit x86 */
/*
* x86_64 has similar ordering characteristics to i386.
*
* Technically, some x86-ish chips support uncached memory access and/or
* special instructions that are weakly ordered. In those cases we'd need
* the read and write barriers to be lfence and sfence. But since we don't
* do those things, a compiler barrier should be enough.
*/
#define pg_memory_barrier() \
__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory")
#define pg_read_barrier() pg_compiler_barrier()
#define pg_write_barrier() pg_compiler_barrier()
#elif defined(__ia64__) || defined(__ia64)
/*
* Itanium is weakly ordered, so read and write barriers require a full
* fence.
*/
#define pg_memory_barrier() __asm__ __volatile__ ("mf" : : : "memory")
#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
/*
* lwsync orders loads with respect to each other, and similarly with stores.
* But a load can be performed before a subsequent store, so sync must be used
* for a full memory barrier.
*/
#define pg_memory_barrier() __asm__ __volatile__ ("sync" : : : "memory")
#define pg_read_barrier() __asm__ __volatile__ ("lwsync" : : : "memory")
#define pg_write_barrier() __asm__ __volatile__ ("lwsync" : : : "memory")
#elif defined(__alpha) || defined(__alpha__) /* Alpha */
/*
* Unlike all other known architectures, Alpha allows dependent reads to be
* reordered, but we don't currently find it necessary to provide a conditional
* read barrier to cover that case. We might need to add that later.
*/
#define pg_memory_barrier() __asm__ __volatile__ ("mb" : : : "memory")
#define pg_read_barrier() __asm__ __volatile__ ("rmb" : : : "memory")
#define pg_write_barrier() __asm__ __volatile__ ("wmb" : : : "memory")
#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
/*
* If we're on GCC 4.1.0 or higher, we should be able to get a memory
* barrier out of this compiler built-in. But we prefer to rely on our
* own definitions where possible, and use this only as a fallback.
*/
#define pg_memory_barrier() __sync_synchronize()
#endif
#elif defined(__ia64__) || defined(__ia64)
#define pg_compiler_barrier() _Asm_sched_fence()
#define pg_memory_barrier() _Asm_mf()
#elif defined(WIN32_ONLY_COMPILER)
/* Should work on both MSVC and Borland. */
#include <intrin.h>
#pragma intrinsic(_ReadWriteBarrier)
#define pg_compiler_barrier() _ReadWriteBarrier()
#define pg_memory_barrier() MemoryBarrier()
#endif
/*
* If we have no memory barrier implementation for this architecture, we
* fall back to acquiring and releasing a spinlock. This might, in turn,
* fall back to the semaphore-based spinlock implementation, which will be
* amazingly slow.
*
* It's not self-evident that every possible legal implementation of a
* spinlock acquire-and-release would be equivalent to a full memory barrier.
* For example, I'm not sure that Itanium's acq and rel add up to a full
* fence. But all of our actual implementations seem OK in this regard.
*/
#if !defined(pg_memory_barrier)
#define pg_memory_barrier(x) \
do { S_LOCK(&dummy_spinlock); S_UNLOCK(&dummy_spinlock); } while (0)
#endif
/*
* If read or write barriers are undefined, we upgrade them to full memory
* barriers.
*
* If a compiler barrier is unavailable, you probably don't want a full
* memory barrier instead, so if you have a use case for a compiler barrier,
* you'd better use #ifdef.
*/
#if !defined(pg_read_barrier)
#define pg_read_barrier() pg_memory_barrier()
#endif
#if !defined(pg_write_barrier)
#define pg_write_barrier() pg_memory_barrier()
#endif
#endif /* BARRIER_H */

121
pg_include/storage/block.h Executable file
View File

@@ -0,0 +1,121 @@
/*-------------------------------------------------------------------------
*
* block.h
* POSTGRES disk block definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/block.h
*
*-------------------------------------------------------------------------
*/
#ifndef BLOCK_H
#define BLOCK_H
/*
* BlockNumber:
*
* each data file (heap or index) is divided into postgres disk blocks
* (which may be thought of as the unit of i/o -- a postgres buffer
* contains exactly one disk block). the blocks are numbered
* sequentially, 0 to 0xFFFFFFFE.
*
* InvalidBlockNumber is the same thing as P_NEW in buf.h.
*
* the access methods, the buffer manager and the storage manager are
* more or less the only pieces of code that should be accessing disk
* blocks directly.
*/
typedef uint32 BlockNumber;
#define InvalidBlockNumber ((BlockNumber) 0xFFFFFFFF)
#define MaxBlockNumber ((BlockNumber) 0xFFFFFFFE)
/*
* BlockId:
*
* this is a storage type for BlockNumber. in other words, this type
* is used for on-disk structures (e.g., in HeapTupleData) whereas
* BlockNumber is the type on which calculations are performed (e.g.,
* in access method code).
*
* there doesn't appear to be any reason to have separate types except
* for the fact that BlockIds can be SHORTALIGN'd (and therefore any
* structures that contains them, such as ItemPointerData, can also be
* SHORTALIGN'd). this is an important consideration for reducing the
* space requirements of the line pointer (ItemIdData) array on each
* page and the header of each heap or index tuple, so it doesn't seem
* wise to change this without good reason.
*/
typedef struct BlockIdData
{
uint16 bi_hi;
uint16 bi_lo;
} BlockIdData;
typedef BlockIdData *BlockId; /* block identifier */
/* ----------------
* support macros
* ----------------
*/
/*
* BlockNumberIsValid
* True iff blockNumber is valid.
*/
#define BlockNumberIsValid(blockNumber) \
((bool) ((BlockNumber) (blockNumber) != InvalidBlockNumber))
/*
* BlockIdIsValid
* True iff the block identifier is valid.
*/
#define BlockIdIsValid(blockId) \
((bool) PointerIsValid(blockId))
/*
* BlockIdSet
* Sets a block identifier to the specified value.
*/
#define BlockIdSet(blockId, blockNumber) \
( \
AssertMacro(PointerIsValid(blockId)), \
(blockId)->bi_hi = (blockNumber) >> 16, \
(blockId)->bi_lo = (blockNumber) & 0xffff \
)
/*
* BlockIdCopy
* Copy a block identifier.
*/
#define BlockIdCopy(toBlockId, fromBlockId) \
( \
AssertMacro(PointerIsValid(toBlockId)), \
AssertMacro(PointerIsValid(fromBlockId)), \
(toBlockId)->bi_hi = (fromBlockId)->bi_hi, \
(toBlockId)->bi_lo = (fromBlockId)->bi_lo \
)
/*
* BlockIdEquals
* Check for block number equality.
*/
#define BlockIdEquals(blockId1, blockId2) \
((blockId1)->bi_hi == (blockId2)->bi_hi && \
(blockId1)->bi_lo == (blockId2)->bi_lo)
/*
* BlockIdGetBlockNumber
* Retrieve the block number from a block identifier.
*/
#define BlockIdGetBlockNumber(blockId) \
( \
AssertMacro(BlockIdIsValid(blockId)), \
(BlockNumber) (((blockId)->bi_hi << 16) | ((uint16) (blockId)->bi_lo)) \
)
#endif /* BLOCK_H */

46
pg_include/storage/buf.h Executable file
View File

@@ -0,0 +1,46 @@
/*-------------------------------------------------------------------------
*
* buf.h
* Basic buffer manager data types.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/buf.h
*
*-------------------------------------------------------------------------
*/
#ifndef BUF_H
#define BUF_H
/*
* Buffer identifiers.
*
* Zero is invalid, positive is the index of a shared buffer (1..NBuffers),
* negative is the index of a local buffer (-1 .. -NLocBuffer).
*/
typedef int Buffer;
#define InvalidBuffer 0
/*
* BufferIsInvalid
* True iff the buffer is invalid.
*/
#define BufferIsInvalid(buffer) ((buffer) == InvalidBuffer)
/*
* BufferIsLocal
* True iff the buffer is local (not visible to other backends).
*/
#define BufferIsLocal(buffer) ((buffer) < 0)
/*
* Buffer access strategy objects.
*
* BufferAccessStrategyData is private to freelist.c
*/
typedef struct BufferAccessStrategyData *BufferAccessStrategy;
#endif /* BUF_H */

View File

@@ -0,0 +1,216 @@
/*-------------------------------------------------------------------------
*
* buf_internals.h
* Internal definitions for buffer manager and the buffer replacement
* strategy.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/buf_internals.h
*
*-------------------------------------------------------------------------
*/
#ifndef BUFMGR_INTERNALS_H
#define BUFMGR_INTERNALS_H
#include "storage/buf.h"
#include "storage/latch.h"
#include "storage/lwlock.h"
#include "storage/shmem.h"
#include "storage/smgr.h"
#include "storage/spin.h"
#include "utils/relcache.h"
/*
* Flags for buffer descriptors
*
* Note: TAG_VALID essentially means that there is a buffer hashtable
* entry associated with the buffer's tag.
*/
#define BM_DIRTY (1 << 0) /* data needs writing */
#define BM_VALID (1 << 1) /* data is valid */
#define BM_TAG_VALID (1 << 2) /* tag is assigned */
#define BM_IO_IN_PROGRESS (1 << 3) /* read or write in progress */
#define BM_IO_ERROR (1 << 4) /* previous I/O failed */
#define BM_JUST_DIRTIED (1 << 5) /* dirtied since write started */
#define BM_PIN_COUNT_WAITER (1 << 6) /* have waiter for sole pin */
#define BM_CHECKPOINT_NEEDED (1 << 7) /* must write for checkpoint */
#define BM_PERMANENT (1 << 8) /* permanent relation (not
* unlogged) */
typedef bits16 BufFlags;
/*
* The maximum allowed value of usage_count represents a tradeoff between
* accuracy and speed of the clock-sweep buffer management algorithm. A
* large value (comparable to NBuffers) would approximate LRU semantics.
* But it can take as many as BM_MAX_USAGE_COUNT+1 complete cycles of
* clock sweeps to find a free buffer, so in practice we don't want the
* value to be very large.
*/
#define BM_MAX_USAGE_COUNT 5
/*
* Buffer tag identifies which disk block the buffer contains.
*
* Note: the BufferTag data must be sufficient to determine where to write the
* block, without reference to pg_class or pg_tablespace entries. It's
* possible that the backend flushing the buffer doesn't even believe the
* relation is visible yet (its xact may have started before the xact that
* created the rel). The storage manager must be able to cope anyway.
*
* Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have
* to be fixed to zero them, since this struct is used as a hash key.
*/
typedef struct buftag
{
RelFileNode rnode; /* physical relation identifier */
ForkNumber forkNum;
BlockNumber blockNum; /* blknum relative to begin of reln */
} BufferTag;
#define CLEAR_BUFFERTAG(a) \
( \
(a).rnode.spcNode = InvalidOid, \
(a).rnode.dbNode = InvalidOid, \
(a).rnode.relNode = InvalidOid, \
(a).forkNum = InvalidForkNumber, \
(a).blockNum = InvalidBlockNumber \
)
#define INIT_BUFFERTAG(a,xx_rnode,xx_forkNum,xx_blockNum) \
( \
(a).rnode = (xx_rnode), \
(a).forkNum = (xx_forkNum), \
(a).blockNum = (xx_blockNum) \
)
#define BUFFERTAGS_EQUAL(a,b) \
( \
RelFileNodeEquals((a).rnode, (b).rnode) && \
(a).blockNum == (b).blockNum && \
(a).forkNum == (b).forkNum \
)
/*
* The shared buffer mapping table is partitioned to reduce contention.
* To determine which partition lock a given tag requires, compute the tag's
* hash code with BufTableHashCode(), then apply BufMappingPartitionLock().
* NB: NUM_BUFFER_PARTITIONS must be a power of 2!
*/
#define BufTableHashPartition(hashcode) \
((hashcode) % NUM_BUFFER_PARTITIONS)
#define BufMappingPartitionLock(hashcode) \
((LWLockId) (FirstBufMappingLock + BufTableHashPartition(hashcode)))
/*
* BufferDesc -- shared descriptor/state data for a single shared buffer.
*
* Note: buf_hdr_lock must be held to examine or change the tag, flags,
* usage_count, refcount, or wait_backend_pid fields. buf_id field never
* changes after initialization, so does not need locking. freeNext is
* protected by the BufFreelistLock not buf_hdr_lock. The LWLocks can take
* care of themselves. The buf_hdr_lock is *not* used to control access to
* the data in the buffer!
*
* An exception is that if we have the buffer pinned, its tag can't change
* underneath us, so we can examine the tag without locking the spinlock.
* Also, in places we do one-time reads of the flags without bothering to
* lock the spinlock; this is generally for situations where we don't expect
* the flag bit being tested to be changing.
*
* We can't physically remove items from a disk page if another backend has
* the buffer pinned. Hence, a backend may need to wait for all other pins
* to go away. This is signaled by storing its own PID into
* wait_backend_pid and setting flag bit BM_PIN_COUNT_WAITER. At present,
* there can be only one such waiter per buffer.
*
* We use this same struct for local buffer headers, but the lock fields
* are not used and not all of the flag bits are useful either.
*/
typedef struct sbufdesc
{
BufferTag tag; /* ID of page contained in buffer */
BufFlags flags; /* see bit definitions above */
uint16 usage_count; /* usage counter for clock sweep code */
unsigned refcount; /* # of backends holding pins on buffer */
int wait_backend_pid; /* backend PID of pin-count waiter */
slock_t buf_hdr_lock; /* protects the above fields */
int buf_id; /* buffer's index number (from 0) */
int freeNext; /* link in freelist chain */
LWLockId io_in_progress_lock; /* to wait for I/O to complete */
LWLockId content_lock; /* to lock access to buffer contents */
} BufferDesc;
#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
/*
* The freeNext field is either the index of the next freelist entry,
* or one of these special values:
*/
#define FREENEXT_END_OF_LIST (-1)
#define FREENEXT_NOT_IN_LIST (-2)
/*
* Macros for acquiring/releasing a shared buffer header's spinlock.
* Do not apply these to local buffers!
*
* Note: as a general coding rule, if you are using these then you probably
* need to be using a volatile-qualified pointer to the buffer header, to
* ensure that the compiler doesn't rearrange accesses to the header to
* occur before or after the spinlock is acquired/released.
*/
#define LockBufHdr(bufHdr) SpinLockAcquire(&(bufHdr)->buf_hdr_lock)
#define UnlockBufHdr(bufHdr) SpinLockRelease(&(bufHdr)->buf_hdr_lock)
/* in buf_init.c */
extern PGDLLIMPORT BufferDesc *BufferDescriptors;
/* in localbuf.c */
extern BufferDesc *LocalBufferDescriptors;
/*
* Internal routines: only called by bufmgr
*/
/* freelist.c */
extern volatile BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy,
bool *lock_held);
extern void StrategyFreeBuffer(volatile BufferDesc *buf);
extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
volatile BufferDesc *buf);
extern int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc);
extern void StrategyNotifyBgWriter(Latch *bgwriterLatch);
extern Size StrategyShmemSize(void);
extern void StrategyInitialize(bool init);
/* buf_table.c */
extern Size BufTableShmemSize(int size);
extern void InitBufTable(int size);
extern uint32 BufTableHashCode(BufferTag *tagPtr);
extern int BufTableLookup(BufferTag *tagPtr, uint32 hashcode);
extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
/* localbuf.c */
extern void LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
BlockNumber blockNum);
extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
BlockNumber blockNum, bool *foundPtr);
extern void MarkLocalBufferDirty(Buffer buffer);
extern void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
BlockNumber firstDelBlock);
extern void DropRelFileNodeAllLocalBuffers(RelFileNode rnode);
extern void AtEOXact_LocalBuffers(bool isCommit);
#endif /* BUFMGR_INTERNALS_H */

45
pg_include/storage/buffile.h Executable file
View File

@@ -0,0 +1,45 @@
/*-------------------------------------------------------------------------
*
* buffile.h
* Management of large buffered files, primarily temporary files.
*
* The BufFile routines provide a partial replacement for stdio atop
* virtual file descriptors managed by fd.c. Currently they only support
* buffered access to a virtual file, without any of stdio's formatting
* features. That's enough for immediate needs, but the set of facilities
* could be expanded if necessary.
*
* BufFile also supports working with temporary files that exceed the OS
* file size limit and/or the largest offset representable in an int.
* It might be better to split that out as a separately accessible module,
* but currently we have no need for oversize temp files without buffered
* access.
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/buffile.h
*
*-------------------------------------------------------------------------
*/
#ifndef BUFFILE_H
#define BUFFILE_H
/* BufFile is an opaque type whose details are not known outside buffile.c. */
typedef struct BufFile BufFile;
/*
* prototypes for functions in buffile.c
*/
extern BufFile *BufFileCreateTemp(bool interXact);
extern void BufFileClose(BufFile *file);
extern size_t BufFileRead(BufFile *file, void *ptr, size_t size);
extern size_t BufFileWrite(BufFile *file, void *ptr, size_t size);
extern int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence);
extern void BufFileTell(BufFile *file, int *fileno, off_t *offset);
extern int BufFileSeekBlock(BufFile *file, long blknum);
#endif /* BUFFILE_H */

226
pg_include/storage/bufmgr.h Executable file
View File

@@ -0,0 +1,226 @@
/*-------------------------------------------------------------------------
*
* bufmgr.h
* POSTGRES buffer manager definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/bufmgr.h
*
*-------------------------------------------------------------------------
*/
#ifndef BUFMGR_H
#define BUFMGR_H
#include "storage/block.h"
#include "storage/buf.h"
#include "storage/bufpage.h"
#include "storage/relfilenode.h"
#include "utils/relcache.h"
typedef void *Block;
/* Possible arguments for GetAccessStrategy() */
typedef enum BufferAccessStrategyType
{
BAS_NORMAL, /* Normal random access */
BAS_BULKREAD, /* Large read-only scan (hint bit updates are
* ok) */
BAS_BULKWRITE, /* Large multi-block write (e.g. COPY IN) */
BAS_VACUUM /* VACUUM */
} BufferAccessStrategyType;
/* Possible modes for ReadBufferExtended() */
typedef enum
{
RBM_NORMAL, /* Normal read */
RBM_ZERO, /* Don't read from disk, caller will
* initialize */
RBM_ZERO_ON_ERROR /* Read, but return an all-zeros page on error */
} ReadBufferMode;
/* in globals.c ... this duplicates miscadmin.h */
extern PGDLLIMPORT int NBuffers;
/* in bufmgr.c */
extern bool zero_damaged_pages;
extern int bgwriter_lru_maxpages;
extern double bgwriter_lru_multiplier;
extern bool track_io_timing;
extern int target_prefetch_pages;
/* in buf_init.c */
extern PGDLLIMPORT char *BufferBlocks;
extern PGDLLIMPORT int32 *PrivateRefCount;
/* in localbuf.c */
extern PGDLLIMPORT int NLocBuffer;
extern PGDLLIMPORT Block *LocalBufferBlockPointers;
extern PGDLLIMPORT int32 *LocalRefCount;
/* special block number for ReadBuffer() */
#define P_NEW InvalidBlockNumber /* grow the file to get a new page */
/*
* Buffer content lock modes (mode argument for LockBuffer())
*/
#define BUFFER_LOCK_UNLOCK 0
#define BUFFER_LOCK_SHARE 1
#define BUFFER_LOCK_EXCLUSIVE 2
/*
* These routines are beaten on quite heavily, hence the macroization.
*/
/*
* BufferIsValid
* True iff the given buffer number is valid (either as a shared
* or local buffer).
*
* Note: For a long time this was defined the same as BufferIsPinned,
* that is it would say False if you didn't hold a pin on the buffer.
* I believe this was bogus and served only to mask logic errors.
* Code should always know whether it has a buffer reference,
* independently of the pin state.
*
* Note: For a further long time this was not quite the inverse of the
* BufferIsInvalid() macro, in that it also did sanity checks to verify
* that the buffer number was in range. Most likely, this macro was
* originally intended only to be used in assertions, but its use has
* since expanded quite a bit, and the overhead of making those checks
* even in non-assert-enabled builds can be significant. Thus, we've
* now demoted the range checks to assertions within the macro itself.
*/
#define BufferIsValid(bufnum) \
( \
AssertMacro((bufnum) <= NBuffers && (bufnum) >= -NLocBuffer), \
(bufnum) != InvalidBuffer \
)
/*
* BufferIsPinned
* True iff the buffer is pinned (also checks for valid buffer number).
*
* NOTE: what we check here is that *this* backend holds a pin on
* the buffer. We do not care whether some other backend does.
*/
#define BufferIsPinned(bufnum) \
( \
!BufferIsValid(bufnum) ? \
false \
: \
BufferIsLocal(bufnum) ? \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
(PrivateRefCount[(bufnum) - 1] > 0) \
)
/*
* BufferGetBlock
* Returns a reference to a disk page image associated with a buffer.
*
* Note:
* Assumes buffer is valid.
*/
#define BufferGetBlock(buffer) \
( \
AssertMacro(BufferIsValid(buffer)), \
BufferIsLocal(buffer) ? \
LocalBufferBlockPointers[-(buffer) - 1] \
: \
(Block) (BufferBlocks + ((Size) ((buffer) - 1)) * BLCKSZ) \
)
/*
* BufferGetPageSize
* Returns the page size within a buffer.
*
* Notes:
* Assumes buffer is valid.
*
* The buffer can be a raw disk block and need not contain a valid
* (formatted) disk page.
*/
/* XXX should dig out of buffer descriptor */
#define BufferGetPageSize(buffer) \
( \
AssertMacro(BufferIsValid(buffer)), \
(Size)BLCKSZ \
)
/*
* BufferGetPage
* Returns the page associated with a buffer.
*/
#define BufferGetPage(buffer) ((Page)BufferGetBlock(buffer))
/*
* prototypes for functions in bufmgr.c
*/
extern void PrefetchBuffer(Relation reln, ForkNumber forkNum,
BlockNumber blockNum);
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
BlockNumber blockNum, ReadBufferMode mode,
BufferAccessStrategy strategy);
extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode,
ForkNumber forkNum, BlockNumber blockNum,
ReadBufferMode mode, BufferAccessStrategy strategy);
extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer);
extern void MarkBufferDirty(Buffer buffer);
extern void IncrBufferRefCount(Buffer buffer);
extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
BlockNumber blockNum);
extern void InitBufferPool(void);
extern void InitBufferPoolAccess(void);
extern void InitBufferPoolBackend(void);
extern void AtEOXact_Buffers(bool isCommit);
extern void PrintBufferLeakWarning(Buffer buffer);
extern void CheckPointBuffers(int flags);
extern BlockNumber BufferGetBlockNumber(Buffer buffer);
extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation,
ForkNumber forkNum);
extern void FlushRelationBuffers(Relation rel);
extern void FlushDatabaseBuffers(Oid dbid);
extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode,
ForkNumber forkNum, BlockNumber firstDelBlock);
extern void DropRelFileNodeAllBuffers(RelFileNodeBackend rnode);
extern void DropDatabaseBuffers(Oid dbid);
#define RelationGetNumberOfBlocks(reln) \
RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)
extern bool BufferIsPermanent(Buffer buffer);
#ifdef NOT_USED
extern void PrintPinnedBufs(void);
#endif
extern Size BufferShmemSize(void);
extern void BufferGetTag(Buffer buffer, RelFileNode *rnode,
ForkNumber *forknum, BlockNumber *blknum);
extern void SetBufferCommitInfoNeedsSave(Buffer buffer);
extern void UnlockBuffers(void);
extern void LockBuffer(Buffer buffer, int mode);
extern bool ConditionalLockBuffer(Buffer buffer);
extern void LockBufferForCleanup(Buffer buffer);
extern bool ConditionalLockBufferForCleanup(Buffer buffer);
extern bool HoldingBufferPinThatDelaysRecovery(void);
extern void AbortBufferIO(void);
extern void BufmgrCommit(void);
extern bool BgBufferSync(void);
extern void AtProcExit_LocalBuffers(void);
/* in freelist.c */
extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
extern void FreeAccessStrategy(BufferAccessStrategy strategy);
#endif

385
pg_include/storage/bufpage.h Executable file
View File

@@ -0,0 +1,385 @@
/*-------------------------------------------------------------------------
*
* bufpage.h
* Standard POSTGRES buffer page definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/bufpage.h
*
*-------------------------------------------------------------------------
*/
#ifndef BUFPAGE_H
#define BUFPAGE_H
#include "access/xlogdefs.h"
#include "storage/item.h"
#include "storage/off.h"
/*
* A postgres disk page is an abstraction layered on top of a postgres
* disk block (which is simply a unit of i/o, see block.h).
*
* specifically, while a disk block can be unformatted, a postgres
* disk page is always a slotted page of the form:
*
* +----------------+---------------------------------+
* | PageHeaderData | linp1 linp2 linp3 ... |
* +-----------+----+---------------------------------+
* | ... linpN | |
* +-----------+--------------------------------------+
* | ^ pd_lower |
* | |
* | v pd_upper |
* +-------------+------------------------------------+
* | | tupleN ... |
* +-------------+------------------+-----------------+
* | ... tuple3 tuple2 tuple1 | "special space" |
* +--------------------------------+-----------------+
* ^ pd_special
*
* a page is full when nothing can be added between pd_lower and
* pd_upper.
*
* all blocks written out by an access method must be disk pages.
*
* EXCEPTIONS:
*
* obviously, a page is not formatted before it is initialized by
* a call to PageInit.
*
* NOTES:
*
* linp1..N form an ItemId array. ItemPointers point into this array
* rather than pointing directly to a tuple. Note that OffsetNumbers
* conventionally start at 1, not 0.
*
* tuple1..N are added "backwards" on the page. because a tuple's
* ItemPointer points to its ItemId entry rather than its actual
* byte-offset position, tuples can be physically shuffled on a page
* whenever the need arises.
*
* AM-generic per-page information is kept in PageHeaderData.
*
* AM-specific per-page data (if any) is kept in the area marked "special
* space"; each AM has an "opaque" structure defined somewhere that is
* stored as the page trailer. an access method should always
* initialize its pages with PageInit and then set its own opaque
* fields.
*/
typedef Pointer Page;
/*
* location (byte offset) within a page.
*
* note that this is actually limited to 2^15 because we have limited
* ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
*/
typedef uint16 LocationIndex;
/*
* disk page organization
*
* space management information generic to any page
*
* pd_lsn - identifies xlog record for last change to this page.
* pd_tli - ditto.
* pd_flags - flag bits.
* pd_lower - offset to start of free space.
* pd_upper - offset to end of free space.
* pd_special - offset to start of special space.
* pd_pagesize_version - size in bytes and page layout version number.
* pd_prune_xid - oldest XID among potentially prunable tuples on page.
*
* The LSN is used by the buffer manager to enforce the basic rule of WAL:
* "thou shalt write xlog before data". A dirty buffer cannot be dumped
* to disk until xlog has been flushed at least as far as the page's LSN.
* We also store the 16 least significant bits of the TLI for identification
* purposes (it is not clear that this is actually necessary, but it seems
* like a good idea).
*
* pd_prune_xid is a hint field that helps determine whether pruning will be
* useful. It is currently unused in index pages.
*
* The page version number and page size are packed together into a single
* uint16 field. This is for historical reasons: before PostgreSQL 7.3,
* there was no concept of a page version number, and doing it this way
* lets us pretend that pre-7.3 databases have page version number zero.
* We constrain page sizes to be multiples of 256, leaving the low eight
* bits available for a version number.
*
* Minimum possible page size is perhaps 64B to fit page header, opaque space
* and a minimal tuple; of course, in reality you want it much bigger, so
* the constraint on pagesize mod 256 is not an important restriction.
* On the high end, we can only support pages up to 32KB because lp_off/lp_len
* are 15 bits.
*/
typedef struct PageHeaderData
{
/* XXX LSN is member of *any* block, not only page-organized ones */
XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
* record for last change to this page */
uint16 pd_tli; /* least significant bits of the TimeLineID
* containing the LSN */
uint16 pd_flags; /* flag bits, see below */
LocationIndex pd_lower; /* offset to start of free space */
LocationIndex pd_upper; /* offset to end of free space */
LocationIndex pd_special; /* offset to start of special space */
uint16 pd_pagesize_version;
TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
ItemIdData pd_linp[1]; /* beginning of line pointer array */
} PageHeaderData;
typedef PageHeaderData *PageHeader;
/*
* pd_flags contains the following flag bits. Undefined bits are initialized
* to zero and may be used in the future.
*
* PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
* pd_lower. This should be considered a hint rather than the truth, since
* changes to it are not WAL-logged.
*
* PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
* page for its new tuple version; this suggests that a prune is needed.
* Again, this is just a hint.
*/
#define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
#define PD_PAGE_FULL 0x0002 /* not enough free space for new
* tuple? */
#define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
* everyone */
#define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
/*
* Page layout version number 0 is for pre-7.3 Postgres releases.
* Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
* Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
* Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
* Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
* added the pd_flags field (by stealing some bits from pd_tli),
* as well as adding the pd_prune_xid field (which enlarges the header).
*/
#define PG_PAGE_LAYOUT_VERSION 4
/* ----------------------------------------------------------------
* page support macros
* ----------------------------------------------------------------
*/
/*
* PageIsValid
* True iff page is valid.
*/
#define PageIsValid(page) PointerIsValid(page)
/*
* line pointer(s) do not count as part of header
*/
#define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
/*
* PageIsEmpty
* returns true iff no itemid has been allocated on the page
*/
#define PageIsEmpty(page) \
(((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData)
/*
* PageIsNew
* returns true iff page has not been initialized (by PageInit)
*/
#define PageIsNew(page) (((PageHeader) (page))->pd_upper == 0)
/*
* PageGetItemId
* Returns an item identifier of a page.
*/
#define PageGetItemId(page, offsetNumber) \
((ItemId) (&((PageHeader) (page))->pd_linp[(offsetNumber) - 1]))
/*
* PageGetContents
* To be used in case the page does not contain item pointers.
*
* Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
* Now it is. Beware of old code that might think the offset to the contents
* is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
*/
#define PageGetContents(page) \
((char *) (page) + MAXALIGN(SizeOfPageHeaderData))
/* ----------------
* macros to access page size info
* ----------------
*/
/*
* PageSizeIsValid
* True iff the page size is valid.
*/
#define PageSizeIsValid(pageSize) ((pageSize) == BLCKSZ)
/*
* PageGetPageSize
* Returns the page size of a page.
*
* this can only be called on a formatted page (unlike
* BufferGetPageSize, which can be called on an unformatted page).
* however, it can be called on a page that is not stored in a buffer.
*/
#define PageGetPageSize(page) \
((Size) (((PageHeader) (page))->pd_pagesize_version & (uint16) 0xFF00))
/*
* PageGetPageLayoutVersion
* Returns the page layout version of a page.
*/
#define PageGetPageLayoutVersion(page) \
(((PageHeader) (page))->pd_pagesize_version & 0x00FF)
/*
* PageSetPageSizeAndVersion
* Sets the page size and page layout version number of a page.
*
* We could support setting these two values separately, but there's
* no real need for it at the moment.
*/
#define PageSetPageSizeAndVersion(page, size, version) \
( \
AssertMacro(((size) & 0xFF00) == (size)), \
AssertMacro(((version) & 0x00FF) == (version)), \
((PageHeader) (page))->pd_pagesize_version = (size) | (version) \
)
/* ----------------
* page special data macros
* ----------------
*/
/*
* PageGetSpecialSize
* Returns size of special space on a page.
*/
#define PageGetSpecialSize(page) \
((uint16) (PageGetPageSize(page) - ((PageHeader)(page))->pd_special))
/*
* PageGetSpecialPointer
* Returns pointer to special space on a page.
*/
#define PageGetSpecialPointer(page) \
( \
AssertMacro(PageIsValid(page)), \
(char *) ((char *) (page) + ((PageHeader) (page))->pd_special) \
)
/*
* PageGetItem
* Retrieves an item on the given page.
*
* Note:
* This does not change the status of any of the resources passed.
* The semantics may change in the future.
*/
#define PageGetItem(page, itemId) \
( \
AssertMacro(PageIsValid(page)), \
AssertMacro(ItemIdHasStorage(itemId)), \
(Item)(((char *)(page)) + ItemIdGetOffset(itemId)) \
)
/*
* PageGetMaxOffsetNumber
* Returns the maximum offset number used by the given page.
* Since offset numbers are 1-based, this is also the number
* of items on the page.
*
* NOTE: if the page is not initialized (pd_lower == 0), we must
* return zero to ensure sane behavior. Accept double evaluation
* of the argument so that we can ensure this.
*/
#define PageGetMaxOffsetNumber(page) \
(((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData ? 0 : \
((((PageHeader) (page))->pd_lower - SizeOfPageHeaderData) \
/ sizeof(ItemIdData)))
/*
* Additional macros for access to page headers
*/
#define PageGetLSN(page) \
(((PageHeader) (page))->pd_lsn)
#define PageSetLSN(page, lsn) \
(((PageHeader) (page))->pd_lsn = (lsn))
/* NOTE: only the 16 least significant bits are stored */
#define PageGetTLI(page) \
(((PageHeader) (page))->pd_tli)
#define PageSetTLI(page, tli) \
(((PageHeader) (page))->pd_tli = (uint16) (tli))
#define PageHasFreeLinePointers(page) \
(((PageHeader) (page))->pd_flags & PD_HAS_FREE_LINES)
#define PageSetHasFreeLinePointers(page) \
(((PageHeader) (page))->pd_flags |= PD_HAS_FREE_LINES)
#define PageClearHasFreeLinePointers(page) \
(((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES)
#define PageIsFull(page) \
(((PageHeader) (page))->pd_flags & PD_PAGE_FULL)
#define PageSetFull(page) \
(((PageHeader) (page))->pd_flags |= PD_PAGE_FULL)
#define PageClearFull(page) \
(((PageHeader) (page))->pd_flags &= ~PD_PAGE_FULL)
#define PageIsAllVisible(page) \
(((PageHeader) (page))->pd_flags & PD_ALL_VISIBLE)
#define PageSetAllVisible(page) \
(((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE)
#define PageClearAllVisible(page) \
(((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
#define PageIsPrunable(page, oldestxmin) \
( \
AssertMacro(TransactionIdIsNormal(oldestxmin)), \
TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) && \
TransactionIdPrecedes(((PageHeader) (page))->pd_prune_xid, oldestxmin) \
)
#define PageSetPrunable(page, xid) \
do { \
Assert(TransactionIdIsNormal(xid)); \
if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
((PageHeader) (page))->pd_prune_xid = (xid); \
} while (0)
#define PageClearPrunable(page) \
(((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
/* ----------------------------------------------------------------
* extern declarations
* ----------------------------------------------------------------
*/
extern void PageInit(Page page, Size pageSize, Size specialSize);
extern bool PageHeaderIsValid(PageHeader page);
extern OffsetNumber PageAddItem(Page page, Item item, Size size,
OffsetNumber offsetNumber, bool overwrite, bool is_heap);
extern Page PageGetTempPage(Page page);
extern Page PageGetTempPageCopy(Page page);
extern Page PageGetTempPageCopySpecial(Page page);
extern void PageRestoreTempPage(Page tempPage, Page oldPage);
extern void PageRepairFragmentation(Page page);
extern Size PageGetFreeSpace(Page page);
extern Size PageGetExactFreeSpace(Page page);
extern Size PageGetHeapFreeSpace(Page page);
extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
#endif /* BUFPAGE_H */

19
pg_include/storage/copydir.h Executable file
View File

@@ -0,0 +1,19 @@
/*-------------------------------------------------------------------------
*
* copydir.h
* Copy a directory.
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/copydir.h
*
*-------------------------------------------------------------------------
*/
#ifndef COPYDIR_H
#define COPYDIR_H
extern void copydir(char *fromdir, char *todir, bool recurse);
extern void copy_file(char *fromfile, char *tofile);
#endif /* COPYDIR_H */

112
pg_include/storage/fd.h Executable file
View File

@@ -0,0 +1,112 @@
/*-------------------------------------------------------------------------
*
* fd.h
* Virtual file descriptor definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/fd.h
*
*-------------------------------------------------------------------------
*/
/*
* calls:
*
* File {Close, Read, Write, Seek, Tell, Sync}
* {File Name Open, Allocate, Free} File
*
* These are NOT JUST RENAMINGS OF THE UNIX ROUTINES.
* Use them for all file activity...
*
* File fd;
* fd = FilePathOpenFile("foo", O_RDONLY, 0600);
*
* AllocateFile();
* FreeFile();
*
* Use AllocateFile, not fopen, if you need a stdio file (FILE*); then
* use FreeFile, not fclose, to close it. AVOID using stdio for files
* that you intend to hold open for any length of time, since there is
* no way for them to share kernel file descriptors with other files.
*
* Likewise, use AllocateDir/FreeDir, not opendir/closedir, to allocate
* open directories (DIR*).
*/
#ifndef FD_H
#define FD_H
#include <dirent.h>
/*
* FileSeek uses the standard UNIX lseek(2) flags.
*/
typedef char *FileName;
typedef int File;
/* GUC parameter */
extern int max_files_per_process;
/*
* This is private to fd.c, but exported for save/restore_backend_variables()
*/
extern int max_safe_fds;
/*
* prototypes for functions in fd.c
*/
/* Operations on virtual Files --- equivalent to Unix kernel file ops */
extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
extern File OpenTemporaryFile(bool interXact);
extern void FileClose(File file);
extern int FilePrefetch(File file, off_t offset, int amount);
extern int FileRead(File file, char *buffer, int amount);
extern int FileWrite(File file, char *buffer, int amount);
extern int FileSync(File file);
extern off_t FileSeek(File file, off_t offset, int whence);
extern int FileTruncate(File file, off_t offset);
extern char *FilePathName(File file);
/* Operations that allow use of regular stdio --- USE WITH CAUTION */
extern FILE *AllocateFile(const char *name, const char *mode);
extern int FreeFile(FILE *file);
/* Operations to allow use of the <dirent.h> library routines */
extern DIR *AllocateDir(const char *dirname);
extern struct dirent *ReadDir(DIR *dir, const char *dirname);
extern int FreeDir(DIR *dir);
/* If you've really really gotta have a plain kernel FD, use this */
extern int BasicOpenFile(FileName fileName, int fileFlags, int fileMode);
/* Miscellaneous support routines */
extern void InitFileAccess(void);
extern void set_max_safe_fds(void);
extern void closeAllVfds(void);
extern void SetTempTablespaces(Oid *tableSpaces, int numSpaces);
extern bool TempTablespacesAreSet(void);
extern Oid GetNextTempTableSpace(void);
extern void AtEOXact_Files(void);
extern void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
SubTransactionId parentSubid);
extern void RemovePgTempFiles(void);
extern int pg_fsync(int fd);
extern int pg_fsync_no_writethrough(int fd);
extern int pg_fsync_writethrough(int fd);
extern int pg_fdatasync(int fd);
extern int pg_flush_data(int fd, off_t offset, off_t amount);
/* Filename components for OpenTemporaryFile */
#define PG_TEMP_FILES_DIR "pgsql_tmp"
#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
#endif /* FD_H */

36
pg_include/storage/freespace.h Executable file
View File

@@ -0,0 +1,36 @@
/*-------------------------------------------------------------------------
*
* freespace.h
* POSTGRES free space map for quickly finding free space in relations
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/freespace.h
*
*-------------------------------------------------------------------------
*/
#ifndef FREESPACE_H_
#define FREESPACE_H_
#include "storage/block.h"
#include "storage/relfilenode.h"
#include "utils/relcache.h"
/* prototypes for public functions in freespace.c */
extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk);
extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded);
extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel,
BlockNumber oldPage,
Size oldSpaceAvail,
Size spaceNeeded);
extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
Size spaceAvail);
extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
Size spaceAvail);
extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks);
extern void FreeSpaceMapVacuum(Relation rel);
#endif /* FREESPACE_H_ */

View File

@@ -0,0 +1,72 @@
/*-------------------------------------------------------------------------
*
* fsm_internal.h
* internal functions for free space map
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/fsm_internals.h
*
*-------------------------------------------------------------------------
*/
#ifndef FSM_INTERNALS_H
#define FSM_INTERNALS_H
#include "storage/buf.h"
#include "storage/bufpage.h"
/*
* Structure of a FSM page. See src/backend/storage/freespace/README for
* details.
*/
typedef struct
{
/*
* fsm_search_avail() tries to spread the load of multiple backends by
* returning different pages to different backends in a round-robin
* fashion. fp_next_slot points to the next slot to be returned (assuming
* there's enough space on it for the request). It's defined as an int,
* because it's updated without an exclusive lock. uint16 would be more
* appropriate, but int is more likely to be atomically
* fetchable/storable.
*/
int fp_next_slot;
/*
* fp_nodes contains the binary tree, stored in array. The first
* NonLeafNodesPerPage elements are upper nodes, and the following
* LeafNodesPerPage elements are leaf nodes. Unused nodes are zero.
*/
uint8 fp_nodes[1];
} FSMPageData;
typedef FSMPageData *FSMPage;
/*
* Number of non-leaf and leaf nodes, and nodes in total, on an FSM page.
* These definitions are internal to fsmpage.c.
*/
#define NodesPerPage (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \
offsetof(FSMPageData, fp_nodes))
#define NonLeafNodesPerPage (BLCKSZ / 2 - 1)
#define LeafNodesPerPage (NodesPerPage - NonLeafNodesPerPage)
/*
* Number of FSM "slots" on a FSM page. This is what should be used
* outside fsmpage.c.
*/
#define SlotsPerFSMPage LeafNodesPerPage
/* Prototypes for functions in fsmpage.c */
extern int fsm_search_avail(Buffer buf, uint8 min_cat, bool advancenext,
bool exclusive_lock_held);
extern uint8 fsm_get_avail(Page page, int slot);
extern uint8 fsm_get_max_avail(Page page);
extern bool fsm_set_avail(Page page, int slot, uint8 value);
extern bool fsm_truncate_avail(Page page, int nslots);
extern bool fsm_rebuild_page(Page page);
#endif /* FSM_INTERNALS_H */

26
pg_include/storage/indexfsm.h Executable file
View File

@@ -0,0 +1,26 @@
/*-------------------------------------------------------------------------
*
* indexfsm.h
* POSTGRES free space map for quickly finding an unused page in index
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/indexfsm.h
*
*-------------------------------------------------------------------------
*/
#ifndef INDEXFSM_H_
#define INDEXFSM_H_
#include "storage/block.h"
#include "utils/relcache.h"
extern BlockNumber GetFreeIndexPage(Relation rel);
extern void RecordFreeIndexPage(Relation rel, BlockNumber page);
extern void RecordUsedIndexPage(Relation rel, BlockNumber page);
extern void IndexFreeSpaceMapVacuum(Relation rel);
#endif /* INDEXFSM_H_ */

79
pg_include/storage/ipc.h Executable file
View File

@@ -0,0 +1,79 @@
/*-------------------------------------------------------------------------
*
* ipc.h
* POSTGRES inter-process communication definitions.
*
* This file is misnamed, as it no longer has much of anything directly
* to do with IPC. The functionality here is concerned with managing
* exit-time cleanup for either a postmaster or a backend.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/ipc.h
*
*-------------------------------------------------------------------------
*/
#ifndef IPC_H
#define IPC_H
typedef void (*pg_on_exit_callback) (int code, Datum arg);
typedef void (*shmem_startup_hook_type) (void);
/*----------
* API for handling cleanup that must occur during either ereport(ERROR)
* or ereport(FATAL) exits from a block of code. (Typical examples are
* undoing transient changes to shared-memory state.)
*
* PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg);
* {
* ... code that might throw ereport(ERROR) or ereport(FATAL) ...
* }
* PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg);
*
* where the cleanup code is in a function declared per pg_on_exit_callback.
* The Datum value "arg" can carry any information the cleanup function
* needs.
*
* This construct ensures that cleanup_function() will be called during
* either ERROR or FATAL exits. It will not be called on successful
* exit from the controlled code. (If you want it to happen then too,
* call the function yourself from just after the construct.)
*
* Note: the macro arguments are multiply evaluated, so avoid side-effects.
*----------
*/
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg) \
do { \
on_shmem_exit(cleanup_function, arg); \
PG_TRY()
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg) \
cancel_shmem_exit(cleanup_function, arg); \
PG_CATCH(); \
{ \
cancel_shmem_exit(cleanup_function, arg); \
cleanup_function (0, arg); \
PG_RE_THROW(); \
} \
PG_END_TRY(); \
} while (0)
/* ipc.c */
extern bool proc_exit_inprogress;
extern void proc_exit(int code);
extern void shmem_exit(int code);
extern void on_proc_exit(pg_on_exit_callback function, Datum arg);
extern void on_shmem_exit(pg_on_exit_callback function, Datum arg);
extern void cancel_shmem_exit(pg_on_exit_callback function, Datum arg);
extern void on_exit_reset(void);
/* ipci.c */
extern PGDLLIMPORT shmem_startup_hook_type shmem_startup_hook;
extern void CreateSharedMemoryAndSemaphores(bool makePrivate, int port);
#endif /* IPC_H */

19
pg_include/storage/item.h Executable file
View File

@@ -0,0 +1,19 @@
/*-------------------------------------------------------------------------
*
* item.h
* POSTGRES disk item definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/item.h
*
*-------------------------------------------------------------------------
*/
#ifndef ITEM_H
#define ITEM_H
typedef Pointer Item;
#endif /* ITEM_H */

183
pg_include/storage/itemid.h Executable file
View File

@@ -0,0 +1,183 @@
/*-------------------------------------------------------------------------
*
* itemid.h
* Standard POSTGRES buffer page item identifier definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/itemid.h
*
*-------------------------------------------------------------------------
*/
#ifndef ITEMID_H
#define ITEMID_H
/*
* An item pointer (also called line pointer) on a buffer page
*
* In some cases an item pointer is "in use" but does not have any associated
* storage on the page. By convention, lp_len == 0 in every item pointer
* that does not have storage, independently of its lp_flags state.
*/
typedef struct ItemIdData
{
unsigned lp_off:15, /* offset to tuple (from start of page) */
lp_flags:2, /* state of item pointer, see below */
lp_len:15; /* byte length of tuple */
} ItemIdData;
typedef ItemIdData *ItemId;
/*
* lp_flags has these possible states. An UNUSED line pointer is available
* for immediate re-use, the other states are not.
*/
#define LP_UNUSED 0 /* unused (should always have lp_len=0) */
#define LP_NORMAL 1 /* used (should always have lp_len>0) */
#define LP_REDIRECT 2 /* HOT redirect (should have lp_len=0) */
#define LP_DEAD 3 /* dead, may or may not have storage */
/*
* Item offsets and lengths are represented by these types when
* they're not actually stored in an ItemIdData.
*/
typedef uint16 ItemOffset;
typedef uint16 ItemLength;
/* ----------------
* support macros
* ----------------
*/
/*
* ItemIdGetLength
*/
#define ItemIdGetLength(itemId) \
((itemId)->lp_len)
/*
* ItemIdGetOffset
*/
#define ItemIdGetOffset(itemId) \
((itemId)->lp_off)
/*
* ItemIdGetFlags
*/
#define ItemIdGetFlags(itemId) \
((itemId)->lp_flags)
/*
* ItemIdGetRedirect
* In a REDIRECT pointer, lp_off holds the link to the next item pointer
*/
#define ItemIdGetRedirect(itemId) \
((itemId)->lp_off)
/*
* ItemIdIsValid
* True iff item identifier is valid.
* This is a pretty weak test, probably useful only in Asserts.
*/
#define ItemIdIsValid(itemId) PointerIsValid(itemId)
/*
* ItemIdIsUsed
* True iff item identifier is in use.
*/
#define ItemIdIsUsed(itemId) \
((itemId)->lp_flags != LP_UNUSED)
/*
* ItemIdIsNormal
* True iff item identifier is in state NORMAL.
*/
#define ItemIdIsNormal(itemId) \
((itemId)->lp_flags == LP_NORMAL)
/*
* ItemIdIsRedirected
* True iff item identifier is in state REDIRECT.
*/
#define ItemIdIsRedirected(itemId) \
((itemId)->lp_flags == LP_REDIRECT)
/*
* ItemIdIsDead
* True iff item identifier is in state DEAD.
*/
#define ItemIdIsDead(itemId) \
((itemId)->lp_flags == LP_DEAD)
/*
* ItemIdHasStorage
* True iff item identifier has associated storage.
*/
#define ItemIdHasStorage(itemId) \
((itemId)->lp_len != 0)
/*
* ItemIdSetUnused
* Set the item identifier to be UNUSED, with no storage.
* Beware of multiple evaluations of itemId!
*/
#define ItemIdSetUnused(itemId) \
( \
(itemId)->lp_flags = LP_UNUSED, \
(itemId)->lp_off = 0, \
(itemId)->lp_len = 0 \
)
/*
* ItemIdSetNormal
* Set the item identifier to be NORMAL, with the specified storage.
* Beware of multiple evaluations of itemId!
*/
#define ItemIdSetNormal(itemId, off, len) \
( \
(itemId)->lp_flags = LP_NORMAL, \
(itemId)->lp_off = (off), \
(itemId)->lp_len = (len) \
)
/*
* ItemIdSetRedirect
* Set the item identifier to be REDIRECT, with the specified link.
* Beware of multiple evaluations of itemId!
*/
#define ItemIdSetRedirect(itemId, link) \
( \
(itemId)->lp_flags = LP_REDIRECT, \
(itemId)->lp_off = (link), \
(itemId)->lp_len = 0 \
)
/*
* ItemIdSetDead
* Set the item identifier to be DEAD, with no storage.
* Beware of multiple evaluations of itemId!
*/
#define ItemIdSetDead(itemId) \
( \
(itemId)->lp_flags = LP_DEAD, \
(itemId)->lp_off = 0, \
(itemId)->lp_len = 0 \
)
/*
* ItemIdMarkDead
* Set the item identifier to be DEAD, keeping its existing storage.
*
* Note: in indexes, this is used as if it were a hint-bit mechanism;
* we trust that multiple processors can do this in parallel and get
* the same result.
*/
#define ItemIdMarkDead(itemId) \
( \
(itemId)->lp_flags = LP_DEAD \
)
#endif /* ITEMID_H */

146
pg_include/storage/itemptr.h Executable file
View File

@@ -0,0 +1,146 @@
/*-------------------------------------------------------------------------
*
* itemptr.h
* POSTGRES disk item pointer definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/itemptr.h
*
*-------------------------------------------------------------------------
*/
#ifndef ITEMPTR_H
#define ITEMPTR_H
#include "storage/block.h"
#include "storage/off.h"
/*
* ItemPointer:
*
* This is a pointer to an item within a disk page of a known file
* (for example, a cross-link from an index to its parent table).
* blkid tells us which block, posid tells us which entry in the linp
* (ItemIdData) array we want.
*
* Note: because there is an item pointer in each tuple header and index
* tuple header on disk, it's very important not to waste space with
* structure padding bytes. The struct is designed to be six bytes long
* (it contains three int16 fields) but a few compilers will pad it to
* eight bytes unless coerced. We apply appropriate persuasion where
* possible, and to cope with unpersuadable compilers, we try to use
* "SizeOfIptrData" rather than "sizeof(ItemPointerData)" when computing
* on-disk sizes.
*/
typedef struct ItemPointerData
{
BlockIdData ip_blkid;
OffsetNumber ip_posid;
}
#ifdef __arm__
__attribute__((packed)) /* Appropriate whack upside the head for ARM */
#endif
ItemPointerData;
#define SizeOfIptrData \
(offsetof(ItemPointerData, ip_posid) + sizeof(OffsetNumber))
typedef ItemPointerData *ItemPointer;
/* ----------------
* support macros
* ----------------
*/
/*
* ItemPointerIsValid
* True iff the disk item pointer is not NULL.
*/
#define ItemPointerIsValid(pointer) \
((bool) (PointerIsValid(pointer) && ((pointer)->ip_posid != 0)))
/*
* ItemPointerGetBlockNumber
* Returns the block number of a disk item pointer.
*/
#define ItemPointerGetBlockNumber(pointer) \
( \
AssertMacro(ItemPointerIsValid(pointer)), \
BlockIdGetBlockNumber(&(pointer)->ip_blkid) \
)
/*
* ItemPointerGetOffsetNumber
* Returns the offset number of a disk item pointer.
*/
#define ItemPointerGetOffsetNumber(pointer) \
( \
AssertMacro(ItemPointerIsValid(pointer)), \
(pointer)->ip_posid \
)
/*
* ItemPointerSet
* Sets a disk item pointer to the specified block and offset.
*/
#define ItemPointerSet(pointer, blockNumber, offNum) \
( \
AssertMacro(PointerIsValid(pointer)), \
BlockIdSet(&((pointer)->ip_blkid), blockNumber), \
(pointer)->ip_posid = offNum \
)
/*
* ItemPointerSetBlockNumber
* Sets a disk item pointer to the specified block.
*/
#define ItemPointerSetBlockNumber(pointer, blockNumber) \
( \
AssertMacro(PointerIsValid(pointer)), \
BlockIdSet(&((pointer)->ip_blkid), blockNumber) \
)
/*
* ItemPointerSetOffsetNumber
* Sets a disk item pointer to the specified offset.
*/
#define ItemPointerSetOffsetNumber(pointer, offsetNumber) \
( \
AssertMacro(PointerIsValid(pointer)), \
(pointer)->ip_posid = (offsetNumber) \
)
/*
* ItemPointerCopy
* Copies the contents of one disk item pointer to another.
*/
#define ItemPointerCopy(fromPointer, toPointer) \
( \
AssertMacro(PointerIsValid(toPointer)), \
AssertMacro(PointerIsValid(fromPointer)), \
*(toPointer) = *(fromPointer) \
)
/*
* ItemPointerSetInvalid
* Sets a disk item pointer to be invalid.
*/
#define ItemPointerSetInvalid(pointer) \
( \
AssertMacro(PointerIsValid(pointer)), \
BlockIdSet(&((pointer)->ip_blkid), InvalidBlockNumber), \
(pointer)->ip_posid = InvalidOffsetNumber \
)
/* ----------------
* externs
* ----------------
*/
extern bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2);
extern int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2);
#endif /* ITEMPTR_H */

View File

@@ -0,0 +1,83 @@
/*-------------------------------------------------------------------------
*
* large_object.h
* Declarations for PostgreSQL large objects. POSTGRES 4.2 supported
* zillions of large objects (internal, external, jaquith, inversion).
* Now we only support inversion.
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/large_object.h
*
*-------------------------------------------------------------------------
*/
#ifndef LARGE_OBJECT_H
#define LARGE_OBJECT_H
#include "utils/snapshot.h"
/*----------
* Data about a currently-open large object.
*
* id is the logical OID of the large object
* snapshot is the snapshot to use for read/write operations
* subid is the subtransaction that opened the desc (or currently owns it)
* offset is the current seek offset within the LO
* flags contains some flag bits
*
* NOTE: before 7.1, we also had to store references to the separate table
* and index of a specific large object. Now they all live in pg_largeobject
* and are accessed via a common relation descriptor.
*----------
*/
typedef struct LargeObjectDesc
{
Oid id; /* LO's identifier */
Snapshot snapshot; /* snapshot to use */
SubTransactionId subid; /* owning subtransaction ID */
uint32 offset; /* current seek pointer */
int flags; /* locking info, etc */
/* flag bits: */
#define IFS_RDLOCK (1 << 0)
#define IFS_WRLOCK (1 << 1)
} LargeObjectDesc;
/*
* Each "page" (tuple) of a large object can hold this much data
*
* We could set this as high as BLCKSZ less some overhead, but it seems
* better to make it a smaller value, so that not as much space is used
* up when a page-tuple is updated. Note that the value is deliberately
* chosen large enough to trigger the tuple toaster, so that we will
* attempt to compress page tuples in-line. (But they won't be moved off
* unless the user creates a toast-table for pg_largeobject...)
*
* Also, it seems to be a smart move to make the page size be a power of 2,
* since clients will often be written to send data in power-of-2 blocks.
* This avoids unnecessary tuple updates caused by partial-page writes.
*/
#define LOBLKSIZE (BLCKSZ / 4)
/*
* Function definitions...
*/
/* inversion stuff in inv_api.c */
extern void close_lo_relation(bool isCommit);
extern Oid inv_create(Oid lobjId);
extern LargeObjectDesc *inv_open(Oid lobjId, int flags, MemoryContext mcxt);
extern void inv_close(LargeObjectDesc *obj_desc);
extern int inv_drop(Oid lobjId);
extern int inv_seek(LargeObjectDesc *obj_desc, int offset, int whence);
extern int inv_tell(LargeObjectDesc *obj_desc);
extern int inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes);
extern int inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes);
extern void inv_truncate(LargeObjectDesc *obj_desc, int len);
#endif /* LARGE_OBJECT_H */

139
pg_include/storage/latch.h Executable file
View File

@@ -0,0 +1,139 @@
/*-------------------------------------------------------------------------
*
* latch.h
* Routines for interprocess latches
*
* A latch is a boolean variable, with operations that let processes sleep
* until it is set. A latch can be set from another process, or a signal
* handler within the same process.
*
* The latch interface is a reliable replacement for the common pattern of
* using pg_usleep() or select() to wait until a signal arrives, where the
* signal handler sets a flag variable. Because on some platforms an
* incoming signal doesn't interrupt sleep, and even on platforms where it
* does there is a race condition if the signal arrives just before
* entering the sleep, the common pattern must periodically wake up and
* poll the flag variable. The pselect() system call was invented to solve
* this problem, but it is not portable enough. Latches are designed to
* overcome these limitations, allowing you to sleep without polling and
* ensuring quick response to signals from other processes.
*
* There are two kinds of latches: local and shared. A local latch is
* initialized by InitLatch, and can only be set from the same process.
* A local latch can be used to wait for a signal to arrive, by calling
* SetLatch in the signal handler. A shared latch resides in shared memory,
* and must be initialized at postmaster startup by InitSharedLatch. Before
* a shared latch can be waited on, it must be associated with a process
* with OwnLatch. Only the process owning the latch can wait on it, but any
* process can set it.
*
* There are three basic operations on a latch:
*
* SetLatch - Sets the latch
* ResetLatch - Clears the latch, allowing it to be set again
* WaitLatch - Waits for the latch to become set
*
* WaitLatch includes a provision for timeouts (which should be avoided
* when possible, as they incur extra overhead) and a provision for
* postmaster child processes to wake up immediately on postmaster death.
* See unix_latch.c for detailed specifications for the exported functions.
*
* The correct pattern to wait for event(s) is:
*
* for (;;)
* {
* ResetLatch();
* if (work to do)
* Do Stuff();
* WaitLatch();
* }
*
* It's important to reset the latch *before* checking if there's work to
* do. Otherwise, if someone sets the latch between the check and the
* ResetLatch call, you will miss it and Wait will incorrectly block.
*
* To wake up the waiter, you must first set a global flag or something
* else that the wait loop tests in the "if (work to do)" part, and call
* SetLatch *after* that. SetLatch is designed to return quickly if the
* latch is already set.
*
* Presently, when using a shared latch for interprocess signalling, the
* flag variable(s) set by senders and inspected by the wait loop must
* be protected by spinlocks or LWLocks, else it is possible to miss events
* on machines with weak memory ordering (such as PPC). This restriction
* will be lifted in future by inserting suitable memory barriers into
* SetLatch and ResetLatch.
*
* On some platforms, signals will not interrupt the latch wait primitive
* by themselves. Therefore, it is critical that any signal handler that
* is meant to terminate a WaitLatch wait calls SetLatch.
*
* Note that use of the process latch (PGPROC.procLatch) is generally better
* than an ad-hoc shared latch for signaling auxiliary processes. This is
* because generic signal handlers will call SetLatch on the process latch
* only, so using any latch other than the process latch effectively precludes
* use of any generic handler.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/latch.h
*
*-------------------------------------------------------------------------
*/
#ifndef LATCH_H
#define LATCH_H
#include <signal.h>
/*
* Latch structure should be treated as opaque and only accessed through
* the public functions. It is defined here to allow embedding Latches as
* part of bigger structs.
*/
typedef struct
{
sig_atomic_t is_set;
bool is_shared;
int owner_pid;
#ifdef WIN32
HANDLE event;
#endif
} Latch;
/* Bitmasks for events that may wake-up WaitLatch() clients */
#define WL_LATCH_SET (1 << 0)
#define WL_SOCKET_READABLE (1 << 1)
#define WL_SOCKET_WRITEABLE (1 << 2)
#define WL_TIMEOUT (1 << 3)
#define WL_POSTMASTER_DEATH (1 << 4)
/*
* prototypes for functions in latch.c
*/
extern void InitializeLatchSupport(void);
extern void InitLatch(volatile Latch *latch);
extern void InitSharedLatch(volatile Latch *latch);
extern void OwnLatch(volatile Latch *latch);
extern void DisownLatch(volatile Latch *latch);
extern int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout);
extern int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents,
pgsocket sock, long timeout);
extern void SetLatch(volatile Latch *latch);
extern void ResetLatch(volatile Latch *latch);
/* beware of memory ordering issues if you use this macro! */
#define TestLatch(latch) (((volatile Latch *) (latch))->is_set)
/*
* Unix implementation uses SIGUSR1 for inter-process signaling.
* Win32 doesn't need this.
*/
#ifndef WIN32
extern void latch_sigusr1_handler(void);
#else
#define latch_sigusr1_handler() ((void) 0)
#endif
#endif /* LATCH_H */

80
pg_include/storage/lmgr.h Executable file
View File

@@ -0,0 +1,80 @@
/*-------------------------------------------------------------------------
*
* lmgr.h
* POSTGRES lock manager definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/lmgr.h
*
*-------------------------------------------------------------------------
*/
#ifndef LMGR_H
#define LMGR_H
#include "lib/stringinfo.h"
#include "storage/itemptr.h"
#include "storage/lock.h"
#include "utils/rel.h"
extern void RelationInitLockInfo(Relation relation);
/* Lock a relation */
extern void LockRelationOid(Oid relid, LOCKMODE lockmode);
extern bool ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode);
extern void UnlockRelationId(LockRelId *relid, LOCKMODE lockmode);
extern void UnlockRelationOid(Oid relid, LOCKMODE lockmode);
extern void LockRelation(Relation relation, LOCKMODE lockmode);
extern bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode);
extern void UnlockRelation(Relation relation, LOCKMODE lockmode);
extern bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode);
extern void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
/* Lock a relation for extension */
extern void LockRelationForExtension(Relation relation, LOCKMODE lockmode);
extern void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode);
/* Lock a page (currently only used within indexes) */
extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
extern bool ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
extern void UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
/* Lock a tuple (see heap_lock_tuple before assuming you understand this) */
extern void LockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode);
extern bool ConditionalLockTuple(Relation relation, ItemPointer tid,
LOCKMODE lockmode);
extern void UnlockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode);
/* Lock an XID (used to wait for a transaction to finish) */
extern void XactLockTableInsert(TransactionId xid);
extern void XactLockTableDelete(TransactionId xid);
extern void XactLockTableWait(TransactionId xid);
extern bool ConditionalXactLockTableWait(TransactionId xid);
/* Lock a general object (other than a relation) of the current database */
extern void LockDatabaseObject(Oid classid, Oid objid, uint16 objsubid,
LOCKMODE lockmode);
extern void UnlockDatabaseObject(Oid classid, Oid objid, uint16 objsubid,
LOCKMODE lockmode);
/* Lock a shared-across-databases object (other than a relation) */
extern void LockSharedObject(Oid classid, Oid objid, uint16 objsubid,
LOCKMODE lockmode);
extern void UnlockSharedObject(Oid classid, Oid objid, uint16 objsubid,
LOCKMODE lockmode);
extern void LockSharedObjectForSession(Oid classid, Oid objid, uint16 objsubid,
LOCKMODE lockmode);
extern void UnlockSharedObjectForSession(Oid classid, Oid objid, uint16 objsubid,
LOCKMODE lockmode);
/* Describe a locktag for error messages */
extern void DescribeLockTag(StringInfo buf, const LOCKTAG *tag);
#endif /* LMGR_H */

552
pg_include/storage/lock.h Executable file
View File

@@ -0,0 +1,552 @@
/*-------------------------------------------------------------------------
*
* lock.h
* POSTGRES low-level lock mechanism
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/lock.h
*
*-------------------------------------------------------------------------
*/
#ifndef LOCK_H_
#define LOCK_H_
#include "storage/backendid.h"
#include "storage/lwlock.h"
#include "storage/shmem.h"
/* struct PGPROC is declared in proc.h, but must forward-reference it */
typedef struct PGPROC PGPROC;
typedef struct PROC_QUEUE
{
SHM_QUEUE links; /* head of list of PGPROC objects */
int size; /* number of entries in list */
} PROC_QUEUE;
/* GUC variables */
extern int max_locks_per_xact;
#ifdef LOCK_DEBUG
extern int Trace_lock_oidmin;
extern bool Trace_locks;
extern bool Trace_userlocks;
extern int Trace_lock_table;
extern bool Debug_deadlocks;
#endif /* LOCK_DEBUG */
/*
* Top-level transactions are identified by VirtualTransactionIDs comprising
* the BackendId of the backend running the xact, plus a locally-assigned
* LocalTransactionId. These are guaranteed unique over the short term,
* but will be reused after a database restart; hence they should never
* be stored on disk.
*
* Note that struct VirtualTransactionId can not be assumed to be atomically
* assignable as a whole. However, type LocalTransactionId is assumed to
* be atomically assignable, and the backend ID doesn't change often enough
* to be a problem, so we can fetch or assign the two fields separately.
* We deliberately refrain from using the struct within PGPROC, to prevent
* coding errors from trying to use struct assignment with it; instead use
* GET_VXID_FROM_PGPROC().
*/
typedef struct
{
BackendId backendId; /* determined at backend startup */
LocalTransactionId localTransactionId; /* backend-local transaction
* id */
} VirtualTransactionId;
#define InvalidLocalTransactionId 0
#define LocalTransactionIdIsValid(lxid) ((lxid) != InvalidLocalTransactionId)
#define VirtualTransactionIdIsValid(vxid) \
(((vxid).backendId != InvalidBackendId) && \
LocalTransactionIdIsValid((vxid).localTransactionId))
#define VirtualTransactionIdEquals(vxid1, vxid2) \
((vxid1).backendId == (vxid2).backendId && \
(vxid1).localTransactionId == (vxid2).localTransactionId)
#define SetInvalidVirtualTransactionId(vxid) \
((vxid).backendId = InvalidBackendId, \
(vxid).localTransactionId = InvalidLocalTransactionId)
#define GET_VXID_FROM_PGPROC(vxid, proc) \
((vxid).backendId = (proc).backendId, \
(vxid).localTransactionId = (proc).lxid)
/*
* LOCKMODE is an integer (1..N) indicating a lock type. LOCKMASK is a bit
* mask indicating a set of held or requested lock types (the bit 1<<mode
* corresponds to a particular lock mode).
*/
typedef int LOCKMASK;
typedef int LOCKMODE;
/* MAX_LOCKMODES cannot be larger than the # of bits in LOCKMASK */
#define MAX_LOCKMODES 10
#define LOCKBIT_ON(lockmode) (1 << (lockmode))
#define LOCKBIT_OFF(lockmode) (~(1 << (lockmode)))
/*
* This data structure defines the locking semantics associated with a
* "lock method". The semantics specify the meaning of each lock mode
* (by defining which lock modes it conflicts with).
* All of this data is constant and is kept in const tables.
*
* numLockModes -- number of lock modes (READ,WRITE,etc) that
* are defined in this lock method. Must be less than MAX_LOCKMODES.
*
* conflictTab -- this is an array of bitmasks showing lock
* mode conflicts. conflictTab[i] is a mask with the j-th bit
* turned on if lock modes i and j conflict. Lock modes are
* numbered 1..numLockModes; conflictTab[0] is unused.
*
* lockModeNames -- ID strings for debug printouts.
*
* trace_flag -- pointer to GUC trace flag for this lock method. (The
* GUC variable is not constant, but we use "const" here to denote that
* it can't be changed through this reference.)
*/
typedef struct LockMethodData
{
int numLockModes;
const LOCKMASK *conflictTab;
const char *const * lockModeNames;
const bool *trace_flag;
} LockMethodData;
typedef const LockMethodData *LockMethod;
/*
* Lock methods are identified by LOCKMETHODID. (Despite the declaration as
* uint16, we are constrained to 256 lockmethods by the layout of LOCKTAG.)
*/
typedef uint16 LOCKMETHODID;
/* These identify the known lock methods */
#define DEFAULT_LOCKMETHOD 1
#define USER_LOCKMETHOD 2
/*
* These are the valid values of type LOCKMODE for all the standard lock
* methods (both DEFAULT and USER).
*/
/* NoLock is not a lock mode, but a flag value meaning "don't get a lock" */
#define NoLock 0
#define AccessShareLock 1 /* SELECT */
#define RowShareLock 2 /* SELECT FOR UPDATE/FOR SHARE */
#define RowExclusiveLock 3 /* INSERT, UPDATE, DELETE */
#define ShareUpdateExclusiveLock 4 /* VACUUM (non-FULL),ANALYZE, CREATE
* INDEX CONCURRENTLY */
#define ShareLock 5 /* CREATE INDEX (WITHOUT CONCURRENTLY) */
#define ShareRowExclusiveLock 6 /* like EXCLUSIVE MODE, but allows ROW
* SHARE */
#define ExclusiveLock 7 /* blocks ROW SHARE/SELECT...FOR
* UPDATE */
#define AccessExclusiveLock 8 /* ALTER TABLE, DROP TABLE, VACUUM
* FULL, and unqualified LOCK TABLE */
/*
* LOCKTAG is the key information needed to look up a LOCK item in the
* lock hashtable. A LOCKTAG value uniquely identifies a lockable object.
*
* The LockTagType enum defines the different kinds of objects we can lock.
* We can handle up to 256 different LockTagTypes.
*/
typedef enum LockTagType
{
LOCKTAG_RELATION, /* whole relation */
/* ID info for a relation is DB OID + REL OID; DB OID = 0 if shared */
LOCKTAG_RELATION_EXTEND, /* the right to extend a relation */
/* same ID info as RELATION */
LOCKTAG_PAGE, /* one page of a relation */
/* ID info for a page is RELATION info + BlockNumber */
LOCKTAG_TUPLE, /* one physical tuple */
/* ID info for a tuple is PAGE info + OffsetNumber */
LOCKTAG_TRANSACTION, /* transaction (for waiting for xact done) */
/* ID info for a transaction is its TransactionId */
LOCKTAG_VIRTUALTRANSACTION, /* virtual transaction (ditto) */
/* ID info for a virtual transaction is its VirtualTransactionId */
LOCKTAG_OBJECT, /* non-relation database object */
/* ID info for an object is DB OID + CLASS OID + OBJECT OID + SUBID */
/*
* Note: object ID has same representation as in pg_depend and
* pg_description, but notice that we are constraining SUBID to 16 bits.
* Also, we use DB OID = 0 for shared objects such as tablespaces.
*/
LOCKTAG_USERLOCK, /* reserved for old contrib/userlock code */
LOCKTAG_ADVISORY /* advisory user locks */
} LockTagType;
#define LOCKTAG_LAST_TYPE LOCKTAG_ADVISORY
/*
* The LOCKTAG struct is defined with malice aforethought to fit into 16
* bytes with no padding. Note that this would need adjustment if we were
* to widen Oid, BlockNumber, or TransactionId to more than 32 bits.
*
* We include lockmethodid in the locktag so that a single hash table in
* shared memory can store locks of different lockmethods.
*/
typedef struct LOCKTAG
{
uint32 locktag_field1; /* a 32-bit ID field */
uint32 locktag_field2; /* a 32-bit ID field */
uint32 locktag_field3; /* a 32-bit ID field */
uint16 locktag_field4; /* a 16-bit ID field */
uint8 locktag_type; /* see enum LockTagType */
uint8 locktag_lockmethodid; /* lockmethod indicator */
} LOCKTAG;
/*
* These macros define how we map logical IDs of lockable objects into
* the physical fields of LOCKTAG. Use these to set up LOCKTAG values,
* rather than accessing the fields directly. Note multiple eval of target!
*/
#define SET_LOCKTAG_RELATION(locktag,dboid,reloid) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = 0, \
(locktag).locktag_field4 = 0, \
(locktag).locktag_type = LOCKTAG_RELATION, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
#define SET_LOCKTAG_RELATION_EXTEND(locktag,dboid,reloid) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = 0, \
(locktag).locktag_field4 = 0, \
(locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
#define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = (blocknum), \
(locktag).locktag_field4 = 0, \
(locktag).locktag_type = LOCKTAG_PAGE, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
#define SET_LOCKTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = (blocknum), \
(locktag).locktag_field4 = (offnum), \
(locktag).locktag_type = LOCKTAG_TUPLE, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
#define SET_LOCKTAG_TRANSACTION(locktag,xid) \
((locktag).locktag_field1 = (xid), \
(locktag).locktag_field2 = 0, \
(locktag).locktag_field3 = 0, \
(locktag).locktag_field4 = 0, \
(locktag).locktag_type = LOCKTAG_TRANSACTION, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
#define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \
((locktag).locktag_field1 = (vxid).backendId, \
(locktag).locktag_field2 = (vxid).localTransactionId, \
(locktag).locktag_field3 = 0, \
(locktag).locktag_field4 = 0, \
(locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
#define SET_LOCKTAG_OBJECT(locktag,dboid,classoid,objoid,objsubid) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (classoid), \
(locktag).locktag_field3 = (objoid), \
(locktag).locktag_field4 = (objsubid), \
(locktag).locktag_type = LOCKTAG_OBJECT, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
#define SET_LOCKTAG_ADVISORY(locktag,id1,id2,id3,id4) \
((locktag).locktag_field1 = (id1), \
(locktag).locktag_field2 = (id2), \
(locktag).locktag_field3 = (id3), \
(locktag).locktag_field4 = (id4), \
(locktag).locktag_type = LOCKTAG_ADVISORY, \
(locktag).locktag_lockmethodid = USER_LOCKMETHOD)
/*
* Per-locked-object lock information:
*
* tag -- uniquely identifies the object being locked
* grantMask -- bitmask for all lock types currently granted on this object.
* waitMask -- bitmask for all lock types currently awaited on this object.
* procLocks -- list of PROCLOCK objects for this lock.
* waitProcs -- queue of processes waiting for this lock.
* requested -- count of each lock type currently requested on the lock
* (includes requests already granted!!).
* nRequested -- total requested locks of all types.
* granted -- count of each lock type currently granted on the lock.
* nGranted -- total granted locks of all types.
*
* Note: these counts count 1 for each backend. Internally to a backend,
* there may be multiple grabs on a particular lock, but this is not reflected
* into shared memory.
*/
typedef struct LOCK
{
/* hash key */
LOCKTAG tag; /* unique identifier of lockable object */
/* data */
LOCKMASK grantMask; /* bitmask for lock types already granted */
LOCKMASK waitMask; /* bitmask for lock types awaited */
SHM_QUEUE procLocks; /* list of PROCLOCK objects assoc. with lock */
PROC_QUEUE waitProcs; /* list of PGPROC objects waiting on lock */
int requested[MAX_LOCKMODES]; /* counts of requested locks */
int nRequested; /* total of requested[] array */
int granted[MAX_LOCKMODES]; /* counts of granted locks */
int nGranted; /* total of granted[] array */
} LOCK;
#define LOCK_LOCKMETHOD(lock) ((LOCKMETHODID) (lock).tag.locktag_lockmethodid)
/*
* We may have several different backends holding or awaiting locks
* on the same lockable object. We need to store some per-holder/waiter
* information for each such holder (or would-be holder). This is kept in
* a PROCLOCK struct.
*
* PROCLOCKTAG is the key information needed to look up a PROCLOCK item in the
* proclock hashtable. A PROCLOCKTAG value uniquely identifies the combination
* of a lockable object and a holder/waiter for that object. (We can use
* pointers here because the PROCLOCKTAG need only be unique for the lifespan
* of the PROCLOCK, and it will never outlive the lock or the proc.)
*
* Internally to a backend, it is possible for the same lock to be held
* for different purposes: the backend tracks transaction locks separately
* from session locks. However, this is not reflected in the shared-memory
* state: we only track which backend(s) hold the lock. This is OK since a
* backend can never block itself.
*
* The holdMask field shows the already-granted locks represented by this
* proclock. Note that there will be a proclock object, possibly with
* zero holdMask, for any lock that the process is currently waiting on.
* Otherwise, proclock objects whose holdMasks are zero are recycled
* as soon as convenient.
*
* releaseMask is workspace for LockReleaseAll(): it shows the locks due
* to be released during the current call. This must only be examined or
* set by the backend owning the PROCLOCK.
*
* Each PROCLOCK object is linked into lists for both the associated LOCK
* object and the owning PGPROC object. Note that the PROCLOCK is entered
* into these lists as soon as it is created, even if no lock has yet been
* granted. A PGPROC that is waiting for a lock to be granted will also be
* linked into the lock's waitProcs queue.
*/
typedef struct PROCLOCKTAG
{
/* NB: we assume this struct contains no padding! */
LOCK *myLock; /* link to per-lockable-object information */
PGPROC *myProc; /* link to PGPROC of owning backend */
} PROCLOCKTAG;
typedef struct PROCLOCK
{
/* tag */
PROCLOCKTAG tag; /* unique identifier of proclock object */
/* data */
LOCKMASK holdMask; /* bitmask for lock types currently held */
LOCKMASK releaseMask; /* bitmask for lock types to be released */
SHM_QUEUE lockLink; /* list link in LOCK's list of proclocks */
SHM_QUEUE procLink; /* list link in PGPROC's list of proclocks */
} PROCLOCK;
#define PROCLOCK_LOCKMETHOD(proclock) \
LOCK_LOCKMETHOD(*((proclock).tag.myLock))
/*
* Each backend also maintains a local hash table with information about each
* lock it is currently interested in. In particular the local table counts
* the number of times that lock has been acquired. This allows multiple
* requests for the same lock to be executed without additional accesses to
* shared memory. We also track the number of lock acquisitions per
* ResourceOwner, so that we can release just those locks belonging to a
* particular ResourceOwner.
*/
typedef struct LOCALLOCKTAG
{
LOCKTAG lock; /* identifies the lockable object */
LOCKMODE mode; /* lock mode for this table entry */
} LOCALLOCKTAG;
typedef struct LOCALLOCKOWNER
{
/*
* Note: if owner is NULL then the lock is held on behalf of the session;
* otherwise it is held on behalf of my current transaction.
*
* Must use a forward struct reference to avoid circularity.
*/
struct ResourceOwnerData *owner;
int64 nLocks; /* # of times held by this owner */
} LOCALLOCKOWNER;
typedef struct LOCALLOCK
{
/* tag */
LOCALLOCKTAG tag; /* unique identifier of locallock entry */
/* data */
LOCK *lock; /* associated LOCK object in shared mem */
PROCLOCK *proclock; /* associated PROCLOCK object in shmem */
uint32 hashcode; /* copy of LOCKTAG's hash value */
int64 nLocks; /* total number of times lock is held */
int numLockOwners; /* # of relevant ResourceOwners */
int maxLockOwners; /* allocated size of array */
bool holdsStrongLockCount; /* bumped FastPathStrongRelatonLocks? */
LOCALLOCKOWNER *lockOwners; /* dynamically resizable array */
} LOCALLOCK;
#define LOCALLOCK_LOCKMETHOD(llock) ((llock).tag.lock.locktag_lockmethodid)
/*
* These structures hold information passed from lmgr internals to the lock
* listing user-level functions (in lockfuncs.c).
*/
typedef struct LockInstanceData
{
LOCKTAG locktag; /* locked object */
LOCKMASK holdMask; /* locks held by this PGPROC */
LOCKMODE waitLockMode; /* lock awaited by this PGPROC, if any */
BackendId backend; /* backend ID of this PGPROC */
LocalTransactionId lxid; /* local transaction ID of this PGPROC */
int pid; /* pid of this PGPROC */
bool fastpath; /* taken via fastpath? */
} LockInstanceData;
typedef struct LockData
{
int nelements; /* The length of the array */
LockInstanceData *locks;
} LockData;
/* Result codes for LockAcquire() */
typedef enum
{
LOCKACQUIRE_NOT_AVAIL, /* lock not available, and dontWait=true */
LOCKACQUIRE_OK, /* lock successfully acquired */
LOCKACQUIRE_ALREADY_HELD /* incremented count for lock already held */
} LockAcquireResult;
/* Deadlock states identified by DeadLockCheck() */
typedef enum
{
DS_NOT_YET_CHECKED, /* no deadlock check has run yet */
DS_NO_DEADLOCK, /* no deadlock detected */
DS_SOFT_DEADLOCK, /* deadlock avoided by queue rearrangement */
DS_HARD_DEADLOCK, /* deadlock, no way out but ERROR */
DS_BLOCKED_BY_AUTOVACUUM /* no deadlock; queue blocked by autovacuum
* worker */
} DeadLockState;
/*
* The lockmgr's shared hash tables are partitioned to reduce contention.
* To determine which partition a given locktag belongs to, compute the tag's
* hash code with LockTagHashCode(), then apply one of these macros.
* NB: NUM_LOCK_PARTITIONS must be a power of 2!
*/
#define LockHashPartition(hashcode) \
((hashcode) % NUM_LOCK_PARTITIONS)
#define LockHashPartitionLock(hashcode) \
((LWLockId) (FirstLockMgrLock + LockHashPartition(hashcode)))
/*
* function prototypes
*/
extern void InitLocks(void);
extern LockMethod GetLocksMethodTable(const LOCK *lock);
extern uint32 LockTagHashCode(const LOCKTAG *locktag);
extern LockAcquireResult LockAcquire(const LOCKTAG *locktag,
LOCKMODE lockmode,
bool sessionLock,
bool dontWait);
extern LockAcquireResult LockAcquireExtended(const LOCKTAG *locktag,
LOCKMODE lockmode,
bool sessionLock,
bool dontWait,
bool report_memory_error);
extern void AbortStrongLockAcquire(void);
extern bool LockRelease(const LOCKTAG *locktag,
LOCKMODE lockmode, bool sessionLock);
extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks);
extern void LockReleaseSession(LOCKMETHODID lockmethodid);
extern void LockReleaseCurrentOwner(void);
extern void LockReassignCurrentOwner(void);
extern bool LockHasWaiters(const LOCKTAG *locktag,
LOCKMODE lockmode, bool sessionLock);
extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag,
LOCKMODE lockmode);
extern void AtPrepare_Locks(void);
extern void PostPrepare_Locks(TransactionId xid);
extern int LockCheckConflicts(LockMethod lockMethodTable,
LOCKMODE lockmode,
LOCK *lock, PROCLOCK *proclock, PGPROC *proc);
extern void GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode);
extern void GrantAwaitedLock(void);
extern void RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode);
extern Size LockShmemSize(void);
extern LockData *GetLockStatusData(void);
extern void ReportLockTableError(bool report);
typedef struct xl_standby_lock
{
TransactionId xid; /* xid of holder of AccessExclusiveLock */
Oid dbOid;
Oid relOid;
} xl_standby_lock;
extern xl_standby_lock *GetRunningTransactionLocks(int *nlocks);
extern const char *GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode);
extern void lock_twophase_recover(TransactionId xid, uint16 info,
void *recdata, uint32 len);
extern void lock_twophase_postcommit(TransactionId xid, uint16 info,
void *recdata, uint32 len);
extern void lock_twophase_postabort(TransactionId xid, uint16 info,
void *recdata, uint32 len);
extern void lock_twophase_standby_recover(TransactionId xid, uint16 info,
void *recdata, uint32 len);
extern DeadLockState DeadLockCheck(PGPROC *proc);
extern PGPROC *GetBlockingAutoVacuumPgproc(void);
extern void DeadLockReport(void);
extern void RememberSimpleDeadLock(PGPROC *proc1,
LOCKMODE lockmode,
LOCK *lock,
PGPROC *proc2);
extern void InitDeadLockChecking(void);
#ifdef LOCK_DEBUG
extern void DumpLocks(PGPROC *proc);
extern void DumpAllLocks(void);
#endif
/* Lock a VXID (used to wait for a transaction to finish) */
extern void VirtualXactLockTableInsert(VirtualTransactionId vxid);
extern void VirtualXactLockTableCleanup(void);
extern bool VirtualXactLock(VirtualTransactionId vxid, bool wait);
#endif /* LOCK_H */

122
pg_include/storage/lwlock.h Executable file
View File

@@ -0,0 +1,122 @@
/*-------------------------------------------------------------------------
*
* lwlock.h
* Lightweight lock manager
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/lwlock.h
*
*-------------------------------------------------------------------------
*/
#ifndef LWLOCK_H
#define LWLOCK_H
/*
* It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS
* here, but we need them to set up enum LWLockId correctly, and having
* this file include lock.h or bufmgr.h would be backwards.
*/
/* Number of partitions of the shared buffer mapping hashtable */
#define NUM_BUFFER_PARTITIONS 16
/* Number of partitions the shared lock tables are divided into */
#define LOG2_NUM_LOCK_PARTITIONS 4
#define NUM_LOCK_PARTITIONS (1 << LOG2_NUM_LOCK_PARTITIONS)
/* Number of partitions the shared predicate lock tables are divided into */
#define LOG2_NUM_PREDICATELOCK_PARTITIONS 4
#define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
/*
* We have a number of predefined LWLocks, plus a bunch of LWLocks that are
* dynamically assigned (e.g., for shared buffers). The LWLock structures
* live in shared memory (since they contain shared data) and are identified
* by values of this enumerated type. We abuse the notion of an enum somewhat
* by allowing values not listed in the enum declaration to be assigned.
* The extra value MaxDynamicLWLock is there to keep the compiler from
* deciding that the enum can be represented as char or short ...
*
* If you remove a lock, please replace it with a placeholder. This retains
* the lock numbering, which is helpful for DTrace and other external
* debugging scripts.
*/
typedef enum LWLockId
{
BufFreelistLock,
ShmemIndexLock,
OidGenLock,
XidGenLock,
ProcArrayLock,
SInvalReadLock,
SInvalWriteLock,
WALInsertLock,
WALWriteLock,
ControlFileLock,
CheckpointLock,
CLogControlLock,
SubtransControlLock,
MultiXactGenLock,
MultiXactOffsetControlLock,
MultiXactMemberControlLock,
RelCacheInitLock,
CheckpointerCommLock,
TwoPhaseStateLock,
TablespaceCreateLock,
BtreeVacuumLock,
AddinShmemInitLock,
AutovacuumLock,
AutovacuumScheduleLock,
SyncScanLock,
RelationMappingLock,
AsyncCtlLock,
AsyncQueueLock,
SerializableXactHashLock,
SerializableFinishedListLock,
SerializablePredicateLockListLock,
OldSerXidLock,
SyncRepLock,
/* Individual lock IDs end here */
FirstBufMappingLock,
FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
FirstPredicateLockMgrLock = FirstLockMgrLock + NUM_LOCK_PARTITIONS,
/* must be last except for MaxDynamicLWLock: */
NumFixedLWLocks = FirstPredicateLockMgrLock + NUM_PREDICATELOCK_PARTITIONS,
MaxDynamicLWLock = 1000000000
} LWLockId;
typedef enum LWLockMode
{
LW_EXCLUSIVE,
LW_SHARED,
LW_WAIT_UNTIL_FREE /* A special mode used in PGPROC->lwlockMode,
* when waiting for lock to become free. Not
* to be used as LWLockAcquire argument */
} LWLockMode;
#ifdef LOCK_DEBUG
extern bool Trace_lwlocks;
#endif
extern LWLockId LWLockAssign(void);
extern void LWLockAcquire(LWLockId lockid, LWLockMode mode);
extern bool LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode);
extern bool LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode);
extern void LWLockRelease(LWLockId lockid);
extern void LWLockReleaseAll(void);
extern bool LWLockHeldByMe(LWLockId lockid);
extern int NumLWLocks(void);
extern Size LWLockShmemSize(void);
extern void CreateLWLocks(void);
extern void RequestAddinLWLocks(int n);
#endif /* LWLOCK_H */

58
pg_include/storage/off.h Executable file
View File

@@ -0,0 +1,58 @@
/*-------------------------------------------------------------------------
*
* off.h
* POSTGRES disk "offset" definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/off.h
*
*-------------------------------------------------------------------------
*/
#ifndef OFF_H
#define OFF_H
#include "storage/itemid.h"
/*
* OffsetNumber:
*
* this is a 1-based index into the linp (ItemIdData) array in the
* header of each disk page.
*/
typedef uint16 OffsetNumber;
#define InvalidOffsetNumber ((OffsetNumber) 0)
#define FirstOffsetNumber ((OffsetNumber) 1)
#define MaxOffsetNumber ((OffsetNumber) (BLCKSZ / sizeof(ItemIdData)))
#define OffsetNumberMask (0xffff) /* valid uint16 bits */
/* ----------------
* support macros
* ----------------
*/
/*
* OffsetNumberIsValid
* True iff the offset number is valid.
*/
#define OffsetNumberIsValid(offsetNumber) \
((bool) ((offsetNumber != InvalidOffsetNumber) && \
(offsetNumber <= MaxOffsetNumber)))
/*
* OffsetNumberNext
* OffsetNumberPrev
* Increments/decrements the argument. These macros look pointless
* but they help us disambiguate the different manipulations on
* OffsetNumbers (e.g., sometimes we subtract one from an
* OffsetNumber to move back, and sometimes we do so to form a
* real C array index).
*/
#define OffsetNumberNext(offsetNumber) \
((OffsetNumber) (1 + (offsetNumber)))
#define OffsetNumberPrev(offsetNumber) \
((OffsetNumber) (-1 + (offsetNumber)))
#endif /* OFF_H */

83
pg_include/storage/pg_sema.h Executable file
View File

@@ -0,0 +1,83 @@
/*-------------------------------------------------------------------------
*
* pg_sema.h
* Platform-independent API for semaphores.
*
* PostgreSQL requires counting semaphores (the kind that keep track of
* multiple unlock operations, and will allow an equal number of subsequent
* lock operations before blocking). The underlying implementation is
* not the same on every platform. This file defines the API that must
* be provided by each port.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/pg_sema.h
*
*-------------------------------------------------------------------------
*/
#ifndef PG_SEMA_H
#define PG_SEMA_H
/*
* PGSemaphoreData and pointer type PGSemaphore are the data structure
* representing an individual semaphore. The contents of PGSemaphoreData
* vary across implementations and must never be touched by platform-
* independent code. PGSemaphoreData structures are always allocated
* in shared memory (to support implementations where the data changes during
* lock/unlock).
*
* pg_config.h must define exactly one of the USE_xxx_SEMAPHORES symbols.
*/
#ifdef USE_NAMED_POSIX_SEMAPHORES
#include <semaphore.h>
typedef sem_t *PGSemaphoreData;
#endif
#ifdef USE_UNNAMED_POSIX_SEMAPHORES
#include <semaphore.h>
typedef sem_t PGSemaphoreData;
#endif
#ifdef USE_SYSV_SEMAPHORES
typedef struct PGSemaphoreData
{
int semId; /* semaphore set identifier */
int semNum; /* semaphore number within set */
} PGSemaphoreData;
#endif
#ifdef USE_WIN32_SEMAPHORES
typedef HANDLE PGSemaphoreData;
#endif
typedef PGSemaphoreData *PGSemaphore;
/* Module initialization (called during postmaster start or shmem reinit) */
extern void PGReserveSemaphores(int maxSemas, int port);
/* Initialize a PGSemaphore structure to represent a sema with count 1 */
extern void PGSemaphoreCreate(PGSemaphore sema);
/* Reset a previously-initialized PGSemaphore to have count 0 */
extern void PGSemaphoreReset(PGSemaphore sema);
/* Lock a semaphore (decrement count), blocking if count would be < 0 */
extern void PGSemaphoreLock(PGSemaphore sema, bool interruptOK);
/* Unlock a semaphore (increment count) */
extern void PGSemaphoreUnlock(PGSemaphore sema);
/* Lock a semaphore only if able to do so without blocking */
extern bool PGSemaphoreTryLock(PGSemaphore sema);
#endif /* PG_SEMA_H */

58
pg_include/storage/pg_shmem.h Executable file
View File

@@ -0,0 +1,58 @@
/*-------------------------------------------------------------------------
*
* pg_shmem.h
* Platform-independent API for shared memory support.
*
* Every port is expected to support shared memory with approximately
* SysV-ish semantics; in particular, a memory block is not anonymous
* but has an ID, and we must be able to tell whether there are any
* remaining processes attached to a block of a specified ID.
*
* To simplify life for the SysV implementation, the ID is assumed to
* consist of two unsigned long values (these are key and ID in SysV
* terms). Other platforms may ignore the second value if they need
* only one ID number.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/pg_shmem.h
*
*-------------------------------------------------------------------------
*/
#ifndef PG_SHMEM_H
#define PG_SHMEM_H
typedef struct PGShmemHeader /* standard header for all Postgres shmem */
{
int32 magic; /* magic # to identify Postgres segments */
#define PGShmemMagic 679834894
pid_t creatorPID; /* PID of creating process */
Size totalsize; /* total size of segment */
Size freeoffset; /* offset to first free space */
void *index; /* pointer to ShmemIndex table */
#ifndef WIN32 /* Windows doesn't have useful inode#s */
dev_t device; /* device data directory is on */
ino_t inode; /* inode number of data directory */
#endif
} PGShmemHeader;
#ifdef EXEC_BACKEND
#ifndef WIN32
extern unsigned long UsedShmemSegID;
#else
extern HANDLE UsedShmemSegID;
#endif
extern void *UsedShmemSegAddr;
extern void PGSharedMemoryReAttach(void);
#endif
extern PGShmemHeader *PGSharedMemoryCreate(Size size, bool makePrivate,
int port);
extern bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2);
extern void PGSharedMemoryDetach(void);
#endif /* PG_SHMEM_H */

55
pg_include/storage/pmsignal.h Executable file
View File

@@ -0,0 +1,55 @@
/*-------------------------------------------------------------------------
*
* pmsignal.h
* routines for signaling the postmaster from its child processes
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/pmsignal.h
*
*-------------------------------------------------------------------------
*/
#ifndef PMSIGNAL_H
#define PMSIGNAL_H
/*
* Reasons for signaling the postmaster. We can cope with simultaneous
* signals for different reasons. If the same reason is signaled multiple
* times in quick succession, however, the postmaster is likely to observe
* only one notification of it. This is okay for the present uses.
*/
typedef enum
{
PMSIGNAL_RECOVERY_STARTED, /* recovery has started */
PMSIGNAL_BEGIN_HOT_STANDBY, /* begin Hot Standby */
PMSIGNAL_WAKEN_ARCHIVER, /* send a NOTIFY signal to xlog archiver */
PMSIGNAL_ROTATE_LOGFILE, /* send SIGUSR1 to syslogger to rotate logfile */
PMSIGNAL_START_AUTOVAC_LAUNCHER, /* start an autovacuum launcher */
PMSIGNAL_START_AUTOVAC_WORKER, /* start an autovacuum worker */
PMSIGNAL_START_WALRECEIVER, /* start a walreceiver */
PMSIGNAL_ADVANCE_STATE_MACHINE, /* advance postmaster's state machine */
NUM_PMSIGNALS /* Must be last value of enum! */
} PMSignalReason;
/* PMSignalData is an opaque struct, details known only within pmsignal.c */
typedef struct PMSignalData PMSignalData;
/*
* prototypes for functions in pmsignal.c
*/
extern Size PMSignalShmemSize(void);
extern void PMSignalShmemInit(void);
extern void SendPostmasterSignal(PMSignalReason reason);
extern bool CheckPostmasterSignal(PMSignalReason reason);
extern int AssignPostmasterChildSlot(void);
extern bool ReleasePostmasterChildSlot(int slot);
extern bool IsPostmasterChildWalSender(int slot);
extern void MarkPostmasterChildActive(void);
extern void MarkPostmasterChildInactive(void);
extern void MarkPostmasterChildWalSender(void);
extern bool PostmasterIsAlive(void);
#endif /* PMSIGNAL_H */

64
pg_include/storage/pos.h Executable file
View File

@@ -0,0 +1,64 @@
/*-------------------------------------------------------------------------
*
* pos.h
* POSTGRES "position" definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/pos.h
*
*-------------------------------------------------------------------------
*/
#ifndef POS_H
#define POS_H
/*
* a 'position' used to be <pagenumber, offset> in postgres. this has
* been changed to just <offset> as the notion of having multiple pages
* within a block has been removed.
*
* the 'offset' abstraction is somewhat confusing. it is NOT a byte
* offset within the page; instead, it is an offset into the line
* pointer array contained on every page that store (heap or index)
* tuples.
*/
typedef bits16 PositionIdData;
typedef PositionIdData *PositionId;
/* ----------------
* support macros
* ----------------
*/
/*
* PositionIdIsValid
* True iff the position identifier is valid.
*/
#define PositionIdIsValid(positionId) \
PointerIsValid(positionId)
/*
* PositionIdSetInvalid
* Make an invalid position.
*/
#define PositionIdSetInvalid(positionId) \
*(positionId) = (bits16) 0
/*
* PositionIdSet
* Sets a position identifier to the specified value.
*/
#define PositionIdSet(positionId, offsetNumber) \
*(positionId) = (offsetNumber)
/*
* PositionIdGetOffsetNumber
* Retrieve the offset number from a position identifier.
*/
#define PositionIdGetOffsetNumber(positionId) \
((OffsetNumber) *(positionId))
#endif /* POS_H */

73
pg_include/storage/predicate.h Executable file
View File

@@ -0,0 +1,73 @@
/*-------------------------------------------------------------------------
*
* predicate.h
* POSTGRES public predicate locking definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/predicate.h
*
*-------------------------------------------------------------------------
*/
#ifndef PREDICATE_H
#define PREDICATE_H
#include "utils/relcache.h"
#include "utils/snapshot.h"
/*
* GUC variables
*/
extern int max_predicate_locks_per_xact;
/* Number of SLRU buffers to use for predicate locking */
#define NUM_OLDSERXID_BUFFERS 16
/*
* function prototypes
*/
/* housekeeping for shared memory predicate lock structures */
extern void InitPredicateLocks(void);
extern Size PredicateLockShmemSize(void);
extern void CheckPointPredicate(void);
/* predicate lock reporting */
extern bool PageIsPredicateLocked(Relation relation, BlockNumber blkno);
/* predicate lock maintenance */
extern Snapshot GetSerializableTransactionSnapshot(Snapshot snapshot);
extern void SetSerializableTransactionSnapshot(Snapshot snapshot,
TransactionId sourcexid);
extern void RegisterPredicateLockingXid(TransactionId xid);
extern void PredicateLockRelation(Relation relation, Snapshot snapshot);
extern void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot);
extern void PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snapshot);
extern void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);
extern void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);
extern void TransferPredicateLocksToHeapRelation(Relation relation);
extern void ReleasePredicateLocks(bool isCommit);
/* conflict detection (may also trigger rollback) */
extern void CheckForSerializableConflictOut(bool valid, Relation relation, HeapTuple tuple,
Buffer buffer, Snapshot snapshot);
extern void CheckForSerializableConflictIn(Relation relation, HeapTuple tuple, Buffer buffer);
extern void CheckTableForSerializableConflictIn(Relation relation);
/* final rollback checking */
extern void PreCommit_CheckForSerializationFailure(void);
/* two-phase commit support */
extern void AtPrepare_PredicateLocks(void);
extern void PostPrepare_PredicateLocks(TransactionId xid);
extern void PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit);
extern void predicatelock_twophase_recover(TransactionId xid, uint16 info,
void *recdata, uint32 len);
#endif /* PREDICATE_H */

View File

@@ -0,0 +1,490 @@
/*-------------------------------------------------------------------------
*
* predicate_internals.h
* POSTGRES internal predicate locking definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/predicate_internals.h
*
*-------------------------------------------------------------------------
*/
#ifndef PREDICATE_INTERNALS_H
#define PREDICATE_INTERNALS_H
#include "storage/lock.h"
/*
* Commit number.
*/
typedef uint64 SerCommitSeqNo;
/*
* Reserved commit sequence numbers:
* - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
* used as a SerCommitSeqNo, even an invalid one
* - InvalidSerCommitSeqNo is used to indicate a transaction that
* hasn't committed yet, so use a number greater than all valid
* ones to make comparison do the expected thing
* - RecoverySerCommitSeqNo is used to refer to transactions that
* happened before a crash/recovery, since we restart the sequence
* at that point. It's earlier than all normal sequence numbers,
* and is only used by recovered prepared transactions
*/
#define InvalidSerCommitSeqNo ((SerCommitSeqNo) UINT64CONST(0xFFFFFFFFFFFFFFFF))
#define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
#define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
/*
* The SERIALIZABLEXACT struct contains information needed for each
* serializable database transaction to support SSI techniques.
*
* A home-grown list is maintained in shared memory to manage these.
* An entry is used when the serializable transaction acquires a snapshot.
* Unless the transaction is rolled back, this entry must generally remain
* until all concurrent transactions have completed. (There are special
* optimizations for READ ONLY transactions which often allow them to be
* cleaned up earlier.) A transaction which is rolled back is cleaned up
* as soon as possible.
*
* Eligibility for cleanup of committed transactions is generally determined
* by comparing the transaction's finishedBefore field to
* SerializableGlobalXmin.
*/
typedef struct SERIALIZABLEXACT
{
VirtualTransactionId vxid; /* The executing process always has one of
* these. */
/*
* We use two numbers to track the order that transactions commit. Before
* commit, a transaction is marked as prepared, and prepareSeqNo is set.
* Shortly after commit, it's marked as committed, and commitSeqNo is set.
* This doesn't give a strict commit order, but these two values together
* are good enough for us, as we can always err on the safe side and
* assume that there's a conflict, if we can't be sure of the exact
* ordering of two commits.
*
* Note that a transaction is marked as prepared for a short period during
* commit processing, even if two-phase commit is not used. But with
* two-phase commit, a transaction can stay in prepared state for some
* time.
*/
SerCommitSeqNo prepareSeqNo;
SerCommitSeqNo commitSeqNo;
/* these values are not both interesting at the same time */
union
{
SerCommitSeqNo earliestOutConflictCommit; /* when committed with
* conflict out */
SerCommitSeqNo lastCommitBeforeSnapshot; /* when not committed or
* no conflict out */
} SeqNo;
SHM_QUEUE outConflicts; /* list of write transactions whose data we
* couldn't read. */
SHM_QUEUE inConflicts; /* list of read transactions which couldn't
* see our write. */
SHM_QUEUE predicateLocks; /* list of associated PREDICATELOCK objects */
SHM_QUEUE finishedLink; /* list link in
* FinishedSerializableTransactions */
/*
* for r/o transactions: list of concurrent r/w transactions that we could
* potentially have conflicts with, and vice versa for r/w transactions
*/
SHM_QUEUE possibleUnsafeConflicts;
TransactionId topXid; /* top level xid for the transaction, if one
* exists; else invalid */
TransactionId finishedBefore; /* invalid means still running; else
* the struct expires when no
* serializable xids are before this. */
TransactionId xmin; /* the transaction's snapshot xmin */
uint32 flags; /* OR'd combination of values defined below */
int pid; /* pid of associated process */
} SERIALIZABLEXACT;
#define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
#define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
#define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
#define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
/*
* The following flag actually means that the flagged transaction has a
* conflict out *to a transaction which committed ahead of it*. It's hard
* to get that into a name of a reasonable length.
*/
#define SXACT_FLAG_CONFLICT_OUT 0x00000010
#define SXACT_FLAG_READ_ONLY 0x00000020
#define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
#define SXACT_FLAG_RO_SAFE 0x00000080
#define SXACT_FLAG_RO_UNSAFE 0x00000100
#define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
#define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
/*
* The following types are used to provide an ad hoc list for holding
* SERIALIZABLEXACT objects. An HTAB is overkill, since there is no need to
* access these by key -- there are direct pointers to these objects where
* needed. If a shared memory list is created, these types can probably be
* eliminated in favor of using the general solution.
*/
typedef struct PredXactListElementData
{
SHM_QUEUE link;
SERIALIZABLEXACT sxact;
} PredXactListElementData;
typedef struct PredXactListElementData *PredXactListElement;
#define PredXactListElementDataSize \
((Size)MAXALIGN(sizeof(PredXactListElementData)))
typedef struct PredXactListData
{
SHM_QUEUE availableList;
SHM_QUEUE activeList;
/*
* These global variables are maintained when registering and cleaning up
* serializable transactions. They must be global across all backends,
* but are not needed outside the predicate.c source file. Protected by
* SerializableXactHashLock.
*/
TransactionId SxactGlobalXmin; /* global xmin for active serializable
* transactions */
int SxactGlobalXminCount; /* how many active serializable
* transactions have this xmin */
int WritableSxactCount; /* how many non-read-only serializable
* transactions are active */
SerCommitSeqNo LastSxactCommitSeqNo; /* a strictly monotonically
* increasing number for
* commits of serializable
* transactions */
/* Protected by SerializableXactHashLock. */
SerCommitSeqNo CanPartialClearThrough; /* can clear predicate locks
* and inConflicts for
* committed transactions
* through this seq no */
/* Protected by SerializableFinishedListLock. */
SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
* seq no */
SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
PredXactListElement element;
} PredXactListData;
typedef struct PredXactListData *PredXactList;
#define PredXactListDataSize \
((Size)MAXALIGN(sizeof(PredXactListData)))
/*
* The following types are used to provide lists of rw-conflicts between
* pairs of transactions. Since exactly the same information is needed,
* they are also used to record possible unsafe transaction relationships
* for purposes of identifying safe snapshots for read-only transactions.
*
* When a RWConflictData is not in use to record either type of relationship
* between a pair of transactions, it is kept on an "available" list. The
* outLink field is used for maintaining that list.
*/
typedef struct RWConflictData
{
SHM_QUEUE outLink; /* link for list of conflicts out from a sxact */
SHM_QUEUE inLink; /* link for list of conflicts in to a sxact */
SERIALIZABLEXACT *sxactOut;
SERIALIZABLEXACT *sxactIn;
} RWConflictData;
typedef struct RWConflictData *RWConflict;
#define RWConflictDataSize \
((Size)MAXALIGN(sizeof(RWConflictData)))
typedef struct RWConflictPoolHeaderData
{
SHM_QUEUE availableList;
RWConflict element;
} RWConflictPoolHeaderData;
typedef struct RWConflictPoolHeaderData *RWConflictPoolHeader;
#define RWConflictPoolHeaderDataSize \
((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
/*
* The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
* transaction or any of its subtransactions.
*/
typedef struct SERIALIZABLEXIDTAG
{
TransactionId xid;
} SERIALIZABLEXIDTAG;
/*
* The SERIALIZABLEXID struct provides a link from a TransactionId for a
* serializable transaction to the related SERIALIZABLEXACT record, even if
* the transaction has completed and its connection has been closed.
*
* These are created as new top level transaction IDs are first assigned to
* transactions which are participating in predicate locking. This may
* never happen for a particular transaction if it doesn't write anything.
* They are removed with their related serializable transaction objects.
*
* The SubTransGetTopmostTransaction method is used where necessary to get
* from an XID which might be from a subtransaction to the top level XID.
*/
typedef struct SERIALIZABLEXID
{
/* hash key */
SERIALIZABLEXIDTAG tag;
/* data */
SERIALIZABLEXACT *myXact; /* pointer to the top level transaction data */
} SERIALIZABLEXID;
/*
* The PREDICATELOCKTARGETTAG struct identifies a database object which can
* be the target of predicate locks.
*
* Note that the hash function being used doesn't properly respect tag
* length -- it will go to a four byte boundary past the end of the tag.
* If you change this struct, make sure any slack space is initialized,
* so that any random bytes in the middle or at the end are not included
* in the hash.
*
* TODO SSI: If we always use the same fields for the same type of value, we
* should rename these. Holding off until it's clear there are no exceptions.
* Since indexes are relations with blocks and tuples, it's looking likely that
* the rename will be possible. If not, we may need to divide the last field
* and use part of it for a target type, so that we know how to interpret the
* data..
*/
typedef struct PREDICATELOCKTARGETTAG
{
uint32 locktag_field1; /* a 32-bit ID field */
uint32 locktag_field2; /* a 32-bit ID field */
uint32 locktag_field3; /* a 32-bit ID field */
uint32 locktag_field4; /* a 32-bit ID field */
uint32 locktag_field5; /* a 32-bit ID field */
} PREDICATELOCKTARGETTAG;
/*
* The PREDICATELOCKTARGET struct represents a database object on which there
* are predicate locks.
*
* A hash list of these objects is maintained in shared memory. An entry is
* added when a predicate lock is requested on an object which doesn't
* already have one. An entry is removed when the last lock is removed from
* its list.
*
* Because a particular target might become obsolete, due to update to a new
* version, before the reading transaction is obsolete, we need some way to
* prevent errors from reuse of a tuple ID. Rather than attempting to clean
* up the targets as the related tuples are pruned or vacuumed, we check the
* xmin on access. This should be far less costly.
*/
typedef struct PREDICATELOCKTARGET
{
/* hash key */
PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
/* data */
SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with
* predicate lock target */
} PREDICATELOCKTARGET;
/*
* The PREDICATELOCKTAG struct identifies an individual predicate lock.
*
* It is the combination of predicate lock target (which is a lockable
* object) and a serializable transaction which has acquired a lock on that
* target.
*/
typedef struct PREDICATELOCKTAG
{
PREDICATELOCKTARGET *myTarget;
SERIALIZABLEXACT *myXact;
} PREDICATELOCKTAG;
/*
* The PREDICATELOCK struct represents an individual lock.
*
* An entry can be created here when the related database object is read, or
* by promotion of multiple finer-grained targets. All entries related to a
* serializable transaction are removed when that serializable transaction is
* cleaned up. Entries can also be removed when they are combined into a
* single coarser-grained lock entry.
*/
typedef struct PREDICATELOCK
{
/* hash key */
PREDICATELOCKTAG tag; /* unique identifier of lock */
/* data */
SHM_QUEUE targetLink; /* list link in PREDICATELOCKTARGET's list of
* predicate locks */
SHM_QUEUE xactLink; /* list link in SERIALIZABLEXACT's list of
* predicate locks */
SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
} PREDICATELOCK;
/*
* The LOCALPREDICATELOCK struct represents a local copy of data which is
* also present in the PREDICATELOCK table, organized for fast access without
* needing to acquire a LWLock. It is strictly for optimization.
*
* Each serializable transaction creates its own local hash table to hold a
* collection of these. This information is used to determine when a number
* of fine-grained locks should be promoted to a single coarser-grained lock.
* The information is maintained more-or-less in parallel to the
* PREDICATELOCK data, but because this data is not protected by locks and is
* only used in an optimization heuristic, it is allowed to drift in a few
* corner cases where maintaining exact data would be expensive.
*
* The hash table is created when the serializable transaction acquires its
* snapshot, and its memory is released upon completion of the transaction.
*/
typedef struct LOCALPREDICATELOCK
{
/* hash key */
PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
/* data */
bool held; /* is lock held, or just its children? */
int childLocks; /* number of child locks currently held */
} LOCALPREDICATELOCK;
/*
* The types of predicate locks which can be acquired.
*/
typedef enum PredicateLockTargetType
{
PREDLOCKTAG_RELATION,
PREDLOCKTAG_PAGE,
PREDLOCKTAG_TUPLE
/* TODO SSI: Other types may be needed for index locking */
} PredicateLockTargetType;
/*
* This structure is used to quickly capture a copy of all predicate
* locks. This is currently used only by the pg_lock_status function,
* which in turn is used by the pg_locks view.
*/
typedef struct PredicateLockData
{
int nelements;
PREDICATELOCKTARGETTAG *locktags;
SERIALIZABLEXACT *xacts;
} PredicateLockData;
/*
* These macros define how we map logical IDs of lockable objects into the
* physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
* rather than accessing the fields directly. Note multiple eval of target!
*/
#define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = InvalidBlockNumber, \
(locktag).locktag_field4 = InvalidOffsetNumber, \
(locktag).locktag_field5 = InvalidTransactionId)
#define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = (blocknum), \
(locktag).locktag_field4 = InvalidOffsetNumber, \
(locktag).locktag_field5 = InvalidTransactionId)
#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum,xmin) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = (blocknum), \
(locktag).locktag_field4 = (offnum), \
(locktag).locktag_field5 = (xmin))
#define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
((Oid) (locktag).locktag_field1)
#define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
((Oid) (locktag).locktag_field2)
#define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
((BlockNumber) (locktag).locktag_field3)
#define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
((OffsetNumber) (locktag).locktag_field4)
#define GET_PREDICATELOCKTARGETTAG_XMIN(locktag) \
((TransactionId) (locktag).locktag_field5)
#define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
(((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
(((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
PREDLOCKTAG_RELATION))
/*
* Two-phase commit statefile records. There are two types: for each
* transaction, we generate one per-transaction record and a variable
* number of per-predicate-lock records.
*/
typedef enum TwoPhasePredicateRecordType
{
TWOPHASEPREDICATERECORD_XACT,
TWOPHASEPREDICATERECORD_LOCK
} TwoPhasePredicateRecordType;
/*
* Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
* much is needed because most of it not meaningful for a recovered
* prepared transaction.
*
* In particular, we do not record the in and out conflict lists for a
* prepared transaction because the associated SERIALIZABLEXACTs will
* not be available after recovery. Instead, we simply record the
* existence of each type of conflict by setting the transaction's
* summary conflict in/out flag.
*/
typedef struct TwoPhasePredicateXactRecord
{
TransactionId xmin;
uint32 flags;
} TwoPhasePredicateXactRecord;
/* Per-lock state */
typedef struct TwoPhasePredicateLockRecord
{
PREDICATELOCKTARGETTAG target;
} TwoPhasePredicateLockRecord;
typedef struct TwoPhasePredicateRecord
{
TwoPhasePredicateRecordType type;
union
{
TwoPhasePredicateXactRecord xactRecord;
TwoPhasePredicateLockRecord lockRecord;
} data;
} TwoPhasePredicateRecord;
/*
* Define a macro to use for an "empty" SERIALIZABLEXACT reference.
*/
#define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
/*
* Function definitions for functions needing awareness of predicate
* locking internals.
*/
extern PredicateLockData *GetPredicateLockStatusData(void);
#endif /* PREDICATE_INTERNALS_H */

264
pg_include/storage/proc.h Executable file
View File

@@ -0,0 +1,264 @@
/*-------------------------------------------------------------------------
*
* proc.h
* per-process shared memory data structures
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/proc.h
*
*-------------------------------------------------------------------------
*/
#ifndef _PROC_H_
#define _PROC_H_
#include "access/xlog.h"
#include "datatype/timestamp.h"
#include "storage/latch.h"
#include "storage/lock.h"
#include "storage/pg_sema.h"
/*
* Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds
* for non-aborted subtransactions of its current top transaction. These
* have to be treated as running XIDs by other backends.
*
* We also keep track of whether the cache overflowed (ie, the transaction has
* generated at least one subtransaction that didn't fit in the cache).
* If none of the caches have overflowed, we can assume that an XID that's not
* listed anywhere in the PGPROC array is not a running transaction. Else we
* have to look at pg_subtrans.
*/
#define PGPROC_MAX_CACHED_SUBXIDS 64 /* XXX guessed-at value */
struct XidCache
{
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS];
};
/* Flags for PGXACT->vacuumFlags */
#define PROC_IS_AUTOVACUUM 0x01 /* is it an autovac worker? */
#define PROC_IN_VACUUM 0x02 /* currently running lazy vacuum */
#define PROC_IN_ANALYZE 0x04 /* currently running analyze */
#define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */
/* flags reset at EOXact */
#define PROC_VACUUM_STATE_MASK (0x0E)
/*
* We allow a small number of "weak" relation locks (AccesShareLock,
* RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure
* rather than the main lock table. This eases contention on the lock
* manager LWLocks. See storage/lmgr/README for additional details.
*/
#define FP_LOCK_SLOTS_PER_BACKEND 16
/*
* Each backend has a PGPROC struct in shared memory. There is also a list of
* currently-unused PGPROC structs that will be reallocated to new backends.
*
* links: list link for any list the PGPROC is in. When waiting for a lock,
* the PGPROC is linked into that lock's waitProcs queue. A recycled PGPROC
* is linked into ProcGlobal's freeProcs list.
*
* Note: twophase.c also sets up a dummy PGPROC struct for each currently
* prepared transaction. These PGPROCs appear in the ProcArray data structure
* so that the prepared transactions appear to be still running and are
* correctly shown as holding locks. A prepared transaction PGPROC can be
* distinguished from a real one at need by the fact that it has pid == 0.
* The semaphore and lock-activity fields in a prepared-xact PGPROC are unused,
* but its myProcLocks[] lists are valid.
*/
struct PGPROC
{
/* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */
SHM_QUEUE links; /* list link if process is in a list */
PGSemaphoreData sem; /* ONE semaphore to sleep on */
int waitStatus; /* STATUS_WAITING, STATUS_OK or STATUS_ERROR */
Latch procLatch; /* generic latch for process */
LocalTransactionId lxid; /* local id of top-level transaction currently
* being executed by this proc, if running;
* else InvalidLocalTransactionId */
int pid; /* Backend's process ID; 0 if prepared xact */
int pgprocno;
/* These fields are zero while a backend is still starting up: */
BackendId backendId; /* This backend's backend ID (if assigned) */
Oid databaseId; /* OID of database this backend is using */
Oid roleId; /* OID of role using this backend */
/*
* While in hot standby mode, shows that a conflict signal has been sent
* for the current transaction. Set/cleared while holding ProcArrayLock,
* though not required. Accessed without lock, if needed.
*/
bool recoveryConflictPending;
/* Info about LWLock the process is currently waiting for, if any. */
bool lwWaiting; /* true if waiting for an LW lock */
uint8 lwWaitMode; /* lwlock mode being waited for */
struct PGPROC *lwWaitLink; /* next waiter for same LW lock */
/* Info about lock the process is currently waiting for, if any. */
/* waitLock and waitProcLock are NULL if not currently waiting. */
LOCK *waitLock; /* Lock object we're sleeping on ... */
PROCLOCK *waitProcLock; /* Per-holder info for awaited lock */
LOCKMODE waitLockMode; /* type of lock we're waiting for */
LOCKMASK heldLocks; /* bitmask for lock types already held on this
* lock object by this backend */
/*
* Info to allow us to wait for synchronous replication, if needed.
* waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend.
* syncRepState must not be touched except by owning process or WALSender.
* syncRepLinks used only while holding SyncRepLock.
*/
XLogRecPtr waitLSN; /* waiting for this LSN or higher */
int syncRepState; /* wait state for sync rep */
SHM_QUEUE syncRepLinks; /* list link if process is in syncrep queue */
/*
* All PROCLOCK objects for locks held or awaited by this backend are
* linked into one of these lists, according to the partition number of
* their lock.
*/
SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS];
struct XidCache subxids; /* cache for subtransaction XIDs */
/* Per-backend LWLock. Protects fields below. */
LWLockId backendLock; /* protects the fields below */
/* Lock manager data, recording fast-path locks taken by this backend. */
uint64 fpLockBits; /* lock modes held for each fast-path slot */
Oid fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */
bool fpVXIDLock; /* are we holding a fast-path VXID lock? */
LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID
* lock */
};
/* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
extern PGDLLIMPORT PGPROC *MyProc;
extern PGDLLIMPORT struct PGXACT *MyPgXact;
/*
* Prior to PostgreSQL 9.2, the fields below were stored as part of the
* PGPROC. However, benchmarking revealed that packing these particular
* members into a separate array as tightly as possible sped up GetSnapshotData
* considerably on systems with many CPU cores, by reducing the number of
* cache lines needing to be fetched. Thus, think very carefully before adding
* anything else here.
*/
typedef struct PGXACT
{
TransactionId xid; /* id of top-level transaction currently being
* executed by this proc, if running and XID
* is assigned; else InvalidTransactionId */
TransactionId xmin; /* minimal running XID as it was when we were
* starting our xact, excluding LAZY VACUUM:
* vacuum must not remove tuples deleted by
* xid >= xmin ! */
uint8 vacuumFlags; /* vacuum-related flags, see above */
bool overflowed;
bool inCommit; /* true if within commit critical section */
uint8 nxids;
} PGXACT;
/*
* There is one ProcGlobal struct for the whole database cluster.
*/
typedef struct PROC_HDR
{
/* Array of PGPROC structures (not including dummies for prepared txns) */
PGPROC *allProcs;
/* Array of PGXACT structures (not including dummies for prepared txns) */
PGXACT *allPgXact;
/* Length of allProcs array */
uint32 allProcCount;
/* Head of list of free PGPROC structures */
PGPROC *freeProcs;
/* Head of list of autovacuum's free PGPROC structures */
PGPROC *autovacFreeProcs;
/* WALWriter process's latch */
Latch *walwriterLatch;
/* Checkpointer process's latch */
Latch *checkpointerLatch;
/* Current shared estimate of appropriate spins_per_delay value */
int spins_per_delay;
/* The proc of the Startup process, since not in ProcArray */
PGPROC *startupProc;
int startupProcPid;
/* Buffer id of the buffer that Startup process waits for pin on, or -1 */
int startupBufferPinWaitBufId;
} PROC_HDR;
extern PROC_HDR *ProcGlobal;
extern PGPROC *PreparedXactProcs;
/*
* We set aside some extra PGPROC structures for auxiliary processes,
* ie things that aren't full-fledged backends but need shmem access.
*
* Background writer, checkpointer and WAL writer run during normal operation.
* Startup process and WAL receiver also consume 2 slots, but WAL writer is
* launched only after startup has exited, so we only need 4 slots.
*/
#define NUM_AUXILIARY_PROCS 4
/* configurable options */
extern int DeadlockTimeout;
extern int StatementTimeout;
extern bool log_lock_waits;
extern volatile bool cancel_from_timeout;
/*
* Function Prototypes
*/
extern int ProcGlobalSemas(void);
extern Size ProcGlobalShmemSize(void);
extern void InitProcGlobal(void);
extern void InitProcess(void);
extern void InitProcessPhase2(void);
extern void InitAuxiliaryProcess(void);
extern void PublishStartupProcessInformation(void);
extern void SetStartupBufferPinWaitBufId(int bufid);
extern int GetStartupBufferPinWaitBufId(void);
extern bool HaveNFreeProcs(int n);
extern void ProcReleaseLocks(bool isCommit);
extern void ProcQueueInit(PROC_QUEUE *queue);
extern int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
extern bool IsWaitingForLock(void);
extern void LockErrorCleanup(void);
extern void ProcWaitForSignal(void);
extern void ProcSendSignal(int pid);
extern bool enable_sig_alarm(int delayms, bool is_statement_timeout);
extern bool disable_sig_alarm(bool is_statement_timeout);
extern void handle_sig_alarm(SIGNAL_ARGS);
extern bool enable_standby_sig_alarm(TimestampTz now,
TimestampTz fin_time, bool deadlock_only);
extern bool disable_standby_sig_alarm(void);
extern void handle_standby_sig_alarm(SIGNAL_ARGS);
#endif /* PROC_H */

79
pg_include/storage/procarray.h Executable file
View File

@@ -0,0 +1,79 @@
/*-------------------------------------------------------------------------
*
* procarray.h
* POSTGRES process array definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/procarray.h
*
*-------------------------------------------------------------------------
*/
#ifndef PROCARRAY_H
#define PROCARRAY_H
#include "storage/standby.h"
#include "utils/snapshot.h"
extern Size ProcArrayShmemSize(void);
extern void CreateSharedProcArray(void);
extern void ProcArrayAdd(PGPROC *proc);
extern void ProcArrayRemove(PGPROC *proc, TransactionId latestXid);
extern void ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid);
extern void ProcArrayClearTransaction(PGPROC *proc);
extern void ProcArrayApplyRecoveryInfo(RunningTransactions running);
extern void ProcArrayApplyXidAssignment(TransactionId topxid,
int nsubxids, TransactionId *subxids);
extern void RecordKnownAssignedTransactionIds(TransactionId xid);
extern void ExpireTreeKnownAssignedTransactionIds(TransactionId xid,
int nsubxids, TransactionId *subxids,
TransactionId max_xid);
extern void ExpireAllKnownAssignedTransactionIds(void);
extern void ExpireOldKnownAssignedTransactionIds(TransactionId xid);
extern int GetMaxSnapshotXidCount(void);
extern int GetMaxSnapshotSubxidCount(void);
extern Snapshot GetSnapshotData(Snapshot snapshot);
extern bool ProcArrayInstallImportedXmin(TransactionId xmin,
TransactionId sourcexid);
extern RunningTransactions GetRunningTransactionData(void);
extern bool TransactionIdIsInProgress(TransactionId xid);
extern bool TransactionIdIsActive(TransactionId xid);
extern TransactionId GetOldestXmin(bool allDbs, bool ignoreVacuum);
extern TransactionId GetOldestActiveTransactionId(void);
extern int GetTransactionsInCommit(TransactionId **xids_p);
extern bool HaveTransactionsInCommit(TransactionId *xids, int nxids);
extern PGPROC *BackendPidGetProc(int pid);
extern int BackendXidGetPid(TransactionId xid);
extern bool IsBackendPid(int pid);
extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin,
bool excludeXmin0, bool allDbs, int excludeVacuum,
int *nvxids);
extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid);
extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode);
extern bool MinimumActiveBackends(int min);
extern int CountDBBackends(Oid databaseid);
extern void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending);
extern int CountUserBackends(Oid roleid);
extern bool CountOtherDBBackends(Oid databaseId,
int *nbackends, int *nprepared);
extern void XidCacheRemoveRunningXids(TransactionId xid,
int nxids, const TransactionId *xids,
TransactionId latestXid);
#endif /* PROCARRAY_H */

58
pg_include/storage/procsignal.h Executable file
View File

@@ -0,0 +1,58 @@
/*-------------------------------------------------------------------------
*
* procsignal.h
* Routines for interprocess signalling
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/procsignal.h
*
*-------------------------------------------------------------------------
*/
#ifndef PROCSIGNAL_H
#define PROCSIGNAL_H
#include "storage/backendid.h"
/*
* Reasons for signalling a Postgres child process (a backend or an auxiliary
* process, like checkpointer). We can cope with concurrent signals for different
* reasons. However, if the same reason is signaled multiple times in quick
* succession, the process is likely to observe only one notification of it.
* This is okay for the present uses.
*
* Also, because of race conditions, it's important that all the signals be
* defined so that no harm is done if a process mistakenly receives one.
*/
typedef enum
{
PROCSIG_CATCHUP_INTERRUPT, /* sinval catchup interrupt */
PROCSIG_NOTIFY_INTERRUPT, /* listen/notify interrupt */
/* Recovery conflict reasons */
PROCSIG_RECOVERY_CONFLICT_DATABASE,
PROCSIG_RECOVERY_CONFLICT_TABLESPACE,
PROCSIG_RECOVERY_CONFLICT_LOCK,
PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
NUM_PROCSIGNALS /* Must be last! */
} ProcSignalReason;
/*
* prototypes for functions in procsignal.c
*/
extern Size ProcSignalShmemSize(void);
extern void ProcSignalShmemInit(void);
extern void ProcSignalInit(int pss_idx);
extern int SendProcSignal(pid_t pid, ProcSignalReason reason,
BackendId backendId);
extern void procsignal_sigusr1_handler(SIGNAL_ARGS);
#endif /* PROCSIGNAL_H */

23
pg_include/storage/reinit.h Executable file
View File

@@ -0,0 +1,23 @@
/*-------------------------------------------------------------------------
*
* reinit.h
* Reinitialization of unlogged relations
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/fd.h
*
*-------------------------------------------------------------------------
*/
#ifndef REINIT_H
#define REINIT_H
extern void ResetUnloggedRelations(int op);
#define UNLOGGED_RELATION_CLEANUP 0x0001
#define UNLOGGED_RELATION_INIT 0x0002
#endif /* REINIT_H */

119
pg_include/storage/relfilenode.h Executable file
View File

@@ -0,0 +1,119 @@
/*-------------------------------------------------------------------------
*
* relfilenode.h
* Physical access information for relations.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/relfilenode.h
*
*-------------------------------------------------------------------------
*/
#ifndef RELFILENODE_H
#define RELFILENODE_H
#include "storage/backendid.h"
/*
* The physical storage of a relation consists of one or more forks. The
* main fork is always created, but in addition to that there can be
* additional forks for storing various metadata. ForkNumber is used when
* we need to refer to a specific fork in a relation.
*/
typedef enum ForkNumber
{
InvalidForkNumber = -1,
MAIN_FORKNUM = 0,
FSM_FORKNUM,
VISIBILITYMAP_FORKNUM,
INIT_FORKNUM
/*
* NOTE: if you add a new fork, change MAX_FORKNUM below and update the
* forkNames array in catalog.c
*/
} ForkNumber;
#define MAX_FORKNUM INIT_FORKNUM
/*
* RelFileNode must provide all that we need to know to physically access
* a relation, with the exception of the backend ID, which can be provided
* separately. Note, however, that a "physical" relation is comprised of
* multiple files on the filesystem, as each fork is stored as a separate
* file, and each fork can be divided into multiple segments. See md.c.
*
* spcNode identifies the tablespace of the relation. It corresponds to
* pg_tablespace.oid.
*
* dbNode identifies the database of the relation. It is zero for
* "shared" relations (those common to all databases of a cluster).
* Nonzero dbNode values correspond to pg_database.oid.
*
* relNode identifies the specific relation. relNode corresponds to
* pg_class.relfilenode (NOT pg_class.oid, because we need to be able
* to assign new physical files to relations in some situations).
* Notice that relNode is only unique within a particular database.
*
* Note: spcNode must be GLOBALTABLESPACE_OID if and only if dbNode is
* zero. We support shared relations only in the "global" tablespace.
*
* Note: in pg_class we allow reltablespace == 0 to denote that the
* relation is stored in its database's "default" tablespace (as
* identified by pg_database.dattablespace). However this shorthand
* is NOT allowed in RelFileNode structs --- the real tablespace ID
* must be supplied when setting spcNode.
*
* Note: in pg_class, relfilenode can be zero to denote that the relation
* is a "mapped" relation, whose current true filenode number is available
* from relmapper.c. Again, this case is NOT allowed in RelFileNodes.
*
* Note: various places use RelFileNode in hashtable keys. Therefore,
* there *must not* be any unused padding bytes in this struct. That
* should be safe as long as all the fields are of type Oid.
*/
typedef struct RelFileNode
{
Oid spcNode; /* tablespace */
Oid dbNode; /* database */
Oid relNode; /* relation */
} RelFileNode;
/*
* Augmenting a relfilenode with the backend ID provides all the information
* we need to locate the physical storage. The backend ID is InvalidBackendId
* for regular relations (those accessible to more than one backend), or the
* owning backend's ID for backend-local relations. Backend-local relations
* are always transient and removed in case of a database crash; they are
* never WAL-logged or fsync'd.
*/
typedef struct RelFileNodeBackend
{
RelFileNode node;
BackendId backend;
} RelFileNodeBackend;
#define RelFileNodeBackendIsTemp(rnode) \
((rnode).backend != InvalidBackendId)
/*
* Note: RelFileNodeEquals and RelFileNodeBackendEquals compare relNode first
* since that is most likely to be different in two unequal RelFileNodes. It
* is probably redundant to compare spcNode if the other fields are found equal,
* but do it anyway to be sure. Likewise for checking the backend ID in
* RelFileNodeBackendEquals.
*/
#define RelFileNodeEquals(node1, node2) \
((node1).relNode == (node2).relNode && \
(node1).dbNode == (node2).dbNode && \
(node1).spcNode == (node2).spcNode)
#define RelFileNodeBackendEquals(node1, node2) \
((node1).node.relNode == (node2).node.relNode && \
(node1).node.dbNode == (node2).node.dbNode && \
(node1).backend == (node2).backend && \
(node1).node.spcNode == (node2).node.spcNode)
#endif /* RELFILENODE_H */

1026
pg_include/storage/s_lock.h Executable file

File diff suppressed because it is too large Load Diff

78
pg_include/storage/shmem.h Executable file
View File

@@ -0,0 +1,78 @@
/*-------------------------------------------------------------------------
*
* shmem.h
* shared memory management structures
*
* Historical note:
* A long time ago, Postgres' shared memory region was allowed to be mapped
* at a different address in each process, and shared memory "pointers" were
* passed around as offsets relative to the start of the shared memory region.
* That is no longer the case: each process must map the shared memory region
* at the same address. This means shared memory pointers can be passed
* around directly between different processes.
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/shmem.h
*
*-------------------------------------------------------------------------
*/
#ifndef SHMEM_H
#define SHMEM_H
#include "utils/hsearch.h"
/* shmqueue.c */
typedef struct SHM_QUEUE
{
struct SHM_QUEUE *prev;
struct SHM_QUEUE *next;
} SHM_QUEUE;
/* shmem.c */
extern void InitShmemAccess(void *seghdr);
extern void InitShmemAllocation(void);
extern void *ShmemAlloc(Size size);
extern bool ShmemAddrIsValid(const void *addr);
extern void InitShmemIndex(void);
extern HTAB *ShmemInitHash(const char *name, long init_size, long max_size,
HASHCTL *infoP, int hash_flags);
extern void *ShmemInitStruct(const char *name, Size size, bool *foundPtr);
extern Size add_size(Size s1, Size s2);
extern Size mul_size(Size s1, Size s2);
/* ipci.c */
extern void RequestAddinShmemSpace(Size size);
/* size constants for the shmem index table */
/* max size of data structure string name */
#define SHMEM_INDEX_KEYSIZE (48)
/* estimated size of the shmem index table (not a hard limit) */
#define SHMEM_INDEX_SIZE (64)
/* this is a hash bucket in the shmem index table */
typedef struct
{
char key[SHMEM_INDEX_KEYSIZE]; /* string name */
void *location; /* location in shared mem */
Size size; /* # bytes allocated for the structure */
} ShmemIndexEnt;
/*
* prototypes for functions in shmqueue.c
*/
extern void SHMQueueInit(SHM_QUEUE *queue);
extern void SHMQueueElemInit(SHM_QUEUE *queue);
extern void SHMQueueDelete(SHM_QUEUE *queue);
extern void SHMQueueInsertBefore(SHM_QUEUE *queue, SHM_QUEUE *elem);
extern void SHMQueueInsertAfter(SHM_QUEUE *queue, SHM_QUEUE *elem);
extern Pointer SHMQueueNext(const SHM_QUEUE *queue, const SHM_QUEUE *curElem,
Size linkOffset);
extern Pointer SHMQueuePrev(const SHM_QUEUE *queue, const SHM_QUEUE *curElem,
Size linkOffset);
extern bool SHMQueueEmpty(const SHM_QUEUE *queue);
extern bool SHMQueueIsDetached(const SHM_QUEUE *queue);
#endif /* SHMEM_H */

139
pg_include/storage/sinval.h Executable file
View File

@@ -0,0 +1,139 @@
/*-------------------------------------------------------------------------
*
* sinval.h
* POSTGRES shared cache invalidation communication definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/sinval.h
*
*-------------------------------------------------------------------------
*/
#ifndef SINVAL_H
#define SINVAL_H
#include "storage/relfilenode.h"
/*
* We support several types of shared-invalidation messages:
* * invalidate a specific tuple in a specific catcache
* * invalidate all catcache entries from a given system catalog
* * invalidate a relcache entry for a specific logical relation
* * invalidate an smgr cache entry for a specific physical relation
* * invalidate the mapped-relation mapping for a given database
* More types could be added if needed. The message type is identified by
* the first "int8" field of the message struct. Zero or positive means a
* specific-catcache inval message (and also serves as the catcache ID field).
* Negative values identify the other message types, as per codes below.
*
* Catcache inval events are initially driven by detecting tuple inserts,
* updates and deletions in system catalogs (see CacheInvalidateHeapTuple).
* An update can generate two inval events, one for the old tuple and one for
* the new, but this is reduced to one event if the tuple's hash key doesn't
* change. Note that the inval events themselves don't actually say whether
* the tuple is being inserted or deleted. Also, since we transmit only a
* hash key, there is a small risk of unnecessary invalidations due to chance
* matches of hash keys.
*
* Note that some system catalogs have multiple caches on them (with different
* indexes). On detecting a tuple invalidation in such a catalog, separate
* catcache inval messages must be generated for each of its caches, since
* the hash keys will generally be different.
*
* Catcache and relcache invalidations are transactional, and so are sent
* to other backends upon commit. Internally to the generating backend,
* they are also processed at CommandCounterIncrement so that later commands
* in the same transaction see the new state. The generating backend also
* has to process them at abort, to flush out any cache state it's loaded
* from no-longer-valid entries.
*
* smgr and relation mapping invalidations are non-transactional: they are
* sent immediately when the underlying file change is made.
*/
typedef struct
{
int8 id; /* cache ID --- must be first */
Oid dbId; /* database ID, or 0 if a shared relation */
uint32 hashValue; /* hash value of key for this catcache */
} SharedInvalCatcacheMsg;
#define SHAREDINVALCATALOG_ID (-1)
typedef struct
{
int8 id; /* type field --- must be first */
Oid dbId; /* database ID, or 0 if a shared catalog */
Oid catId; /* ID of catalog whose contents are invalid */
} SharedInvalCatalogMsg;
#define SHAREDINVALRELCACHE_ID (-2)
typedef struct
{
int8 id; /* type field --- must be first */
Oid dbId; /* database ID, or 0 if a shared relation */
Oid relId; /* relation ID */
} SharedInvalRelcacheMsg;
#define SHAREDINVALSMGR_ID (-3)
typedef struct
{
/* note: field layout chosen to pack into 16 bytes */
int8 id; /* type field --- must be first */
int8 backend_hi; /* high bits of backend ID, if temprel */
uint16 backend_lo; /* low bits of backend ID, if temprel */
RelFileNode rnode; /* spcNode, dbNode, relNode */
} SharedInvalSmgrMsg;
#define SHAREDINVALRELMAP_ID (-4)
typedef struct
{
int8 id; /* type field --- must be first */
Oid dbId; /* database ID, or 0 for shared catalogs */
} SharedInvalRelmapMsg;
typedef union
{
int8 id; /* type field --- must be first */
SharedInvalCatcacheMsg cc;
SharedInvalCatalogMsg cat;
SharedInvalRelcacheMsg rc;
SharedInvalSmgrMsg sm;
SharedInvalRelmapMsg rm;
} SharedInvalidationMessage;
/* Counter of messages processed; don't worry about overflow. */
extern uint64 SharedInvalidMessageCounter;
extern void SendSharedInvalidMessages(const SharedInvalidationMessage *msgs,
int n);
extern void ReceiveSharedInvalidMessages(
void (*invalFunction) (SharedInvalidationMessage *msg),
void (*resetFunction) (void));
/* signal handler for catchup events (PROCSIG_CATCHUP_INTERRUPT) */
extern void HandleCatchupInterrupt(void);
/*
* enable/disable processing of catchup events directly from signal handler.
* The enable routine first performs processing of any catchup events that
* have occurred since the last disable.
*/
extern void EnableCatchupInterrupt(void);
extern bool DisableCatchupInterrupt(void);
extern int xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
bool *RelcacheInitFileInval);
extern void ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
int nmsgs, bool RelcacheInitFileInval,
Oid dbid, Oid tsid);
#endif /* SINVAL_H */

42
pg_include/storage/sinvaladt.h Executable file
View File

@@ -0,0 +1,42 @@
/*-------------------------------------------------------------------------
*
* sinvaladt.h
* POSTGRES shared cache invalidation data manager.
*
* The shared cache invalidation manager is responsible for transmitting
* invalidation messages between backends. Any message sent by any backend
* must be delivered to all already-running backends before it can be
* forgotten. (If we run out of space, we instead deliver a "RESET"
* message to backends that have fallen too far behind.)
*
* The struct type SharedInvalidationMessage, defining the contents of
* a single message, is defined in sinval.h.
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/sinvaladt.h
*
*-------------------------------------------------------------------------
*/
#ifndef SINVALADT_H
#define SINVALADT_H
#include "storage/proc.h"
#include "storage/sinval.h"
/*
* prototypes for functions in sinvaladt.c
*/
extern Size SInvalShmemSize(void);
extern void CreateSharedInvalidationState(void);
extern void SharedInvalBackendInit(bool sendOnly);
extern PGPROC *BackendIdGetProc(int backendID);
extern void SIInsertDataEntries(const SharedInvalidationMessage *data, int n);
extern int SIGetDataEntries(SharedInvalidationMessage *data, int datasize);
extern void SICleanupQueue(bool callerHasWriteLock, int minFree);
extern LocalTransactionId GetNextLocalTransactionId(void);
#endif /* SINVALADT_H */

143
pg_include/storage/smgr.h Executable file
View File

@@ -0,0 +1,143 @@
/*-------------------------------------------------------------------------
*
* smgr.h
* storage manager switch public interface declarations.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/smgr.h
*
*-------------------------------------------------------------------------
*/
#ifndef SMGR_H
#define SMGR_H
#include "fmgr.h"
#include "storage/block.h"
#include "storage/relfilenode.h"
/*
* smgr.c maintains a table of SMgrRelation objects, which are essentially
* cached file handles. An SMgrRelation is created (if not already present)
* by smgropen(), and destroyed by smgrclose(). Note that neither of these
* operations imply I/O, they just create or destroy a hashtable entry.
* (But smgrclose() may release associated resources, such as OS-level file
* descriptors.)
*
* An SMgrRelation may have an "owner", which is just a pointer to it from
* somewhere else; smgr.c will clear this pointer if the SMgrRelation is
* closed. We use this to avoid dangling pointers from relcache to smgr
* without having to make the smgr explicitly aware of relcache. There
* can't be more than one "owner" pointer per SMgrRelation, but that's
* all we need.
*
* SMgrRelations that do not have an "owner" are considered to be transient,
* and are deleted at end of transaction.
*/
typedef struct SMgrRelationData
{
/* rnode is the hashtable lookup key, so it must be first! */
RelFileNodeBackend smgr_rnode; /* relation physical identifier */
/* pointer to owning pointer, or NULL if none */
struct SMgrRelationData **smgr_owner;
/*
* These next three fields are not actually used or manipulated by smgr,
* except that they are reset to InvalidBlockNumber upon a cache flush
* event (in particular, upon truncation of the relation). Higher levels
* store cached state here so that it will be reset when truncation
* happens. In all three cases, InvalidBlockNumber means "unknown".
*/
BlockNumber smgr_targblock; /* current insertion target block */
BlockNumber smgr_fsm_nblocks; /* last known size of fsm fork */
BlockNumber smgr_vm_nblocks; /* last known size of vm fork */
/* additional public fields may someday exist here */
/*
* Fields below here are intended to be private to smgr.c and its
* submodules. Do not touch them from elsewhere.
*/
int smgr_which; /* storage manager selector */
/* for md.c; NULL for forks that are not open */
struct _MdfdVec *md_fd[MAX_FORKNUM + 1];
/* if unowned, list link in list of all unowned SMgrRelations */
struct SMgrRelationData *next_unowned_reln;
} SMgrRelationData;
typedef SMgrRelationData *SMgrRelation;
#define SmgrIsTemp(smgr) \
RelFileNodeBackendIsTemp((smgr)->smgr_rnode)
extern void smgrinit(void);
extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend);
extern bool smgrexists(SMgrRelation reln, ForkNumber forknum);
extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln);
extern void smgrclose(SMgrRelation reln);
extern void smgrcloseall(void);
extern void smgrclosenode(RelFileNodeBackend rnode);
extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern void smgrdounlink(SMgrRelation reln, bool isRedo);
extern void smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
extern void smgrread(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer);
extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
extern void smgrtruncate(SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks);
extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum);
extern void smgrpreckpt(void);
extern void smgrsync(void);
extern void smgrpostckpt(void);
extern void AtEOXact_SMgr(void);
/* internals: move me elsewhere -- ay 7/94 */
/* in md.c */
extern void mdinit(void);
extern void mdclose(SMgrRelation reln, ForkNumber forknum);
extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
extern void mdunlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
extern void mdextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
extern void mdprefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer);
extern void mdwrite(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum);
extern void mdtruncate(SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks);
extern void mdimmedsync(SMgrRelation reln, ForkNumber forknum);
extern void mdpreckpt(void);
extern void mdsync(void);
extern void mdpostckpt(void);
extern void SetForwardFsyncRequests(void);
extern void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum,
BlockNumber segno);
extern void ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum);
extern void ForgetDatabaseFsyncRequests(Oid dbid);
/* smgrtype.c */
extern Datum smgrout(PG_FUNCTION_ARGS);
extern Datum smgrin(PG_FUNCTION_ARGS);
extern Datum smgreq(PG_FUNCTION_ARGS);
extern Datum smgrne(PG_FUNCTION_ARGS);
#endif /* SMGR_H */

73
pg_include/storage/spin.h Executable file
View File

@@ -0,0 +1,73 @@
/*-------------------------------------------------------------------------
*
* spin.h
* Hardware-independent implementation of spinlocks.
*
*
* The hardware-independent interface to spinlocks is defined by the
* typedef "slock_t" and these macros:
*
* void SpinLockInit(volatile slock_t *lock)
* Initialize a spinlock (to the unlocked state).
*
* void SpinLockAcquire(volatile slock_t *lock)
* Acquire a spinlock, waiting if necessary.
* Time out and abort() if unable to acquire the lock in a
* "reasonable" amount of time --- typically ~ 1 minute.
*
* void SpinLockRelease(volatile slock_t *lock)
* Unlock a previously acquired lock.
*
* bool SpinLockFree(slock_t *lock)
* Tests if the lock is free. Returns TRUE if free, FALSE if locked.
* This does *not* change the state of the lock.
*
* Callers must beware that the macro argument may be evaluated multiple
* times!
*
* CAUTION: Care must be taken to ensure that loads and stores of
* shared memory values are not rearranged around spinlock acquire
* and release. This is done using the "volatile" qualifier: the C
* standard states that loads and stores of volatile objects cannot
* be rearranged *with respect to other volatile objects*. The
* spinlock is always written through a volatile pointer by the
* spinlock macros, but this is not sufficient by itself: code that
* protects shared data with a spinlock MUST reference that shared
* data through a volatile pointer.
*
* Keep in mind the coding rule that spinlocks must not be held for more
* than a few instructions. In particular, we assume it is not possible
* for a CHECK_FOR_INTERRUPTS() to occur while holding a spinlock, and so
* it is not necessary to do HOLD/RESUME_INTERRUPTS() in these macros.
*
* These macros are implemented in terms of hardware-dependent macros
* supplied by s_lock.h. There is not currently any extra functionality
* added by this header, but there has been in the past and may someday
* be again.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/spin.h
*
*-------------------------------------------------------------------------
*/
#ifndef SPIN_H
#define SPIN_H
#include "storage/s_lock.h"
#define SpinLockInit(lock) S_INIT_LOCK(lock)
#define SpinLockAcquire(lock) S_LOCK(lock)
#define SpinLockRelease(lock) S_UNLOCK(lock)
#define SpinLockFree(lock) S_LOCK_FREE(lock)
extern int SpinlockSemas(void);
#endif /* SPIN_H */

115
pg_include/storage/standby.h Executable file
View File

@@ -0,0 +1,115 @@
/*-------------------------------------------------------------------------
*
* standby.h
* Definitions for hot standby mode.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/standby.h
*
*-------------------------------------------------------------------------
*/
#ifndef STANDBY_H
#define STANDBY_H
#include "access/xlog.h"
#include "storage/lock.h"
#include "storage/procsignal.h"
#include "storage/relfilenode.h"
/* User-settable GUC parameters */
extern int vacuum_defer_cleanup_age;
extern int max_standby_archive_delay;
extern int max_standby_streaming_delay;
extern void InitRecoveryTransactionEnvironment(void);
extern void ShutdownRecoveryTransactionEnvironment(void);
extern void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid,
RelFileNode node);
extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
extern void ResolveRecoveryConflictWithBufferPin(void);
extern void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
extern void CheckRecoveryConflictDeadlock(void);
/*
* Standby Rmgr (RM_STANDBY_ID)
*
* Standby recovery manager exists to perform actions that are required
* to make hot standby work. That includes logging AccessExclusiveLocks taken
* by transactions and running-xacts snapshots.
*/
extern void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid);
extern void StandbyReleaseLockTree(TransactionId xid,
int nsubxids, TransactionId *subxids);
extern void StandbyReleaseAllLocks(void);
extern void StandbyReleaseOldLocks(int nxids, TransactionId *xids);
/*
* XLOG message types
*/
#define XLOG_STANDBY_LOCK 0x00
#define XLOG_RUNNING_XACTS 0x10
typedef struct xl_standby_locks
{
int nlocks; /* number of entries in locks array */
xl_standby_lock locks[1]; /* VARIABLE LENGTH ARRAY */
} xl_standby_locks;
/*
* When we write running xact data to WAL, we use this structure.
*/
typedef struct xl_running_xacts
{
int xcnt; /* # of xact ids in xids[] */
bool subxid_overflow; /* snapshot overflowed, subxids missing */
TransactionId nextXid; /* copy of ShmemVariableCache->nextXid */
TransactionId oldestRunningXid; /* *not* oldestXmin */
TransactionId latestCompletedXid; /* so we can set xmax */
TransactionId xids[1]; /* VARIABLE LENGTH ARRAY */
} xl_running_xacts;
#define MinSizeOfXactRunningXacts offsetof(xl_running_xacts, xids)
/* Recovery handlers for the Standby Rmgr (RM_STANDBY_ID) */
extern void standby_redo(XLogRecPtr lsn, XLogRecord *record);
extern void standby_desc(StringInfo buf, uint8 xl_info, char *rec);
/*
* Declarations for GetRunningTransactionData(). Similar to Snapshots, but
* not quite. This has nothing at all to do with visibility on this server,
* so this is completely separate from snapmgr.c and snapmgr.h.
* This data is important for creating the initial snapshot state on a
* standby server. We need lots more information than a normal snapshot,
* hence we use a specific data structure for our needs. This data
* is written to WAL as a separate record immediately after each
* checkpoint. That means that wherever we start a standby from we will
* almost immediately see the data we need to begin executing queries.
*/
typedef struct RunningTransactionsData
{
int xcnt; /* # of xact ids in xids[] */
bool subxid_overflow; /* snapshot overflowed, subxids missing */
TransactionId nextXid; /* copy of ShmemVariableCache->nextXid */
TransactionId oldestRunningXid; /* *not* oldestXmin */
TransactionId latestCompletedXid; /* so we can set xmax */
TransactionId *xids; /* array of (sub)xids still running */
} RunningTransactionsData;
typedef RunningTransactionsData *RunningTransactions;
extern void LogAccessExclusiveLock(Oid dbOid, Oid relOid);
extern void LogAccessExclusiveLockPrepare(void);
extern void LogStandbySnapshot(void);
#endif /* STANDBY_H */