init
This commit is contained in:
27
pg_include/storage/backendid.h
Executable file
27
pg_include/storage/backendid.h
Executable file
@@ -0,0 +1,27 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* backendid.h
|
||||
* POSTGRES backend id communication definitions
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/backendid.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef BACKENDID_H
|
||||
#define BACKENDID_H
|
||||
|
||||
/* ----------------
|
||||
* -cim 8/17/90
|
||||
* ----------------
|
||||
*/
|
||||
typedef int BackendId; /* unique currently active backend identifier */
|
||||
|
||||
#define InvalidBackendId (-1)
|
||||
|
||||
extern PGDLLIMPORT BackendId MyBackendId; /* backend id of this backend */
|
||||
|
||||
#endif /* BACKENDID_H */
|
||||
170
pg_include/storage/barrier.h
Executable file
170
pg_include/storage/barrier.h
Executable file
@@ -0,0 +1,170 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* barrier.h
|
||||
* Memory barrier operations.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/barrier.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef BARRIER_H
|
||||
#define BARRIER_H
|
||||
|
||||
#include "storage/s_lock.h"
|
||||
|
||||
extern slock_t dummy_spinlock;
|
||||
|
||||
/*
|
||||
* A compiler barrier need not (and preferably should not) emit any actual
|
||||
* machine code, but must act as an optimization fence: the compiler must not
|
||||
* reorder loads or stores to main memory around the barrier. However, the
|
||||
* CPU may still reorder loads or stores at runtime, if the architecture's
|
||||
* memory model permits this.
|
||||
*
|
||||
* A memory barrier must act as a compiler barrier, and in addition must
|
||||
* guarantee that all loads and stores issued prior to the barrier are
|
||||
* completed before any loads or stores issued after the barrier. Unless
|
||||
* loads and stores are totally ordered (which is not the case on most
|
||||
* architectures) this requires issuing some sort of memory fencing
|
||||
* instruction.
|
||||
*
|
||||
* A read barrier must act as a compiler barrier, and in addition must
|
||||
* guarantee that any loads issued prior to the barrier are completed before
|
||||
* any loads issued after the barrier. Similarly, a write barrier acts
|
||||
* as a compiler barrier, and also orders stores. Read and write barriers
|
||||
* are thus weaker than a full memory barrier, but stronger than a compiler
|
||||
* barrier. In practice, on machines with strong memory ordering, read and
|
||||
* write barriers may require nothing more than a compiler barrier.
|
||||
*
|
||||
* For an introduction to using memory barriers within the PostgreSQL backend,
|
||||
* see src/backend/storage/lmgr/README.barrier
|
||||
*/
|
||||
|
||||
#if defined(DISABLE_BARRIERS)
|
||||
|
||||
/*
|
||||
* Fall through to the spinlock-based implementation.
|
||||
*/
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
|
||||
/*
|
||||
* icc defines __GNUC__, but doesn't support gcc's inline asm syntax
|
||||
*/
|
||||
#define pg_memory_barrier() _mm_mfence()
|
||||
#define pg_compiler_barrier() __memory_barrier()
|
||||
#elif defined(__GNUC__)
|
||||
|
||||
/* This works on any architecture, since it's only talking to GCC itself. */
|
||||
#define pg_compiler_barrier() __asm__ __volatile__("" : : : "memory")
|
||||
|
||||
#if defined(__i386__)
|
||||
|
||||
/*
|
||||
* i386 does not allow loads to be reordered with other loads, or stores to be
|
||||
* reordered with other stores, but a load can be performed before a subsequent
|
||||
* store.
|
||||
*
|
||||
* "lock; addl" has worked for longer than "mfence".
|
||||
*/
|
||||
#define pg_memory_barrier() \
|
||||
__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory")
|
||||
#define pg_read_barrier() pg_compiler_barrier()
|
||||
#define pg_write_barrier() pg_compiler_barrier()
|
||||
#elif defined(__x86_64__) /* 64 bit x86 */
|
||||
|
||||
/*
|
||||
* x86_64 has similar ordering characteristics to i386.
|
||||
*
|
||||
* Technically, some x86-ish chips support uncached memory access and/or
|
||||
* special instructions that are weakly ordered. In those cases we'd need
|
||||
* the read and write barriers to be lfence and sfence. But since we don't
|
||||
* do those things, a compiler barrier should be enough.
|
||||
*/
|
||||
#define pg_memory_barrier() \
|
||||
__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory")
|
||||
#define pg_read_barrier() pg_compiler_barrier()
|
||||
#define pg_write_barrier() pg_compiler_barrier()
|
||||
#elif defined(__ia64__) || defined(__ia64)
|
||||
|
||||
/*
|
||||
* Itanium is weakly ordered, so read and write barriers require a full
|
||||
* fence.
|
||||
*/
|
||||
#define pg_memory_barrier() __asm__ __volatile__ ("mf" : : : "memory")
|
||||
#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
|
||||
|
||||
/*
|
||||
* lwsync orders loads with respect to each other, and similarly with stores.
|
||||
* But a load can be performed before a subsequent store, so sync must be used
|
||||
* for a full memory barrier.
|
||||
*/
|
||||
#define pg_memory_barrier() __asm__ __volatile__ ("sync" : : : "memory")
|
||||
#define pg_read_barrier() __asm__ __volatile__ ("lwsync" : : : "memory")
|
||||
#define pg_write_barrier() __asm__ __volatile__ ("lwsync" : : : "memory")
|
||||
#elif defined(__alpha) || defined(__alpha__) /* Alpha */
|
||||
|
||||
/*
|
||||
* Unlike all other known architectures, Alpha allows dependent reads to be
|
||||
* reordered, but we don't currently find it necessary to provide a conditional
|
||||
* read barrier to cover that case. We might need to add that later.
|
||||
*/
|
||||
#define pg_memory_barrier() __asm__ __volatile__ ("mb" : : : "memory")
|
||||
#define pg_read_barrier() __asm__ __volatile__ ("rmb" : : : "memory")
|
||||
#define pg_write_barrier() __asm__ __volatile__ ("wmb" : : : "memory")
|
||||
#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
|
||||
|
||||
/*
|
||||
* If we're on GCC 4.1.0 or higher, we should be able to get a memory
|
||||
* barrier out of this compiler built-in. But we prefer to rely on our
|
||||
* own definitions where possible, and use this only as a fallback.
|
||||
*/
|
||||
#define pg_memory_barrier() __sync_synchronize()
|
||||
#endif
|
||||
#elif defined(__ia64__) || defined(__ia64)
|
||||
|
||||
#define pg_compiler_barrier() _Asm_sched_fence()
|
||||
#define pg_memory_barrier() _Asm_mf()
|
||||
#elif defined(WIN32_ONLY_COMPILER)
|
||||
|
||||
/* Should work on both MSVC and Borland. */
|
||||
#include <intrin.h>
|
||||
#pragma intrinsic(_ReadWriteBarrier)
|
||||
#define pg_compiler_barrier() _ReadWriteBarrier()
|
||||
#define pg_memory_barrier() MemoryBarrier()
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If we have no memory barrier implementation for this architecture, we
|
||||
* fall back to acquiring and releasing a spinlock. This might, in turn,
|
||||
* fall back to the semaphore-based spinlock implementation, which will be
|
||||
* amazingly slow.
|
||||
*
|
||||
* It's not self-evident that every possible legal implementation of a
|
||||
* spinlock acquire-and-release would be equivalent to a full memory barrier.
|
||||
* For example, I'm not sure that Itanium's acq and rel add up to a full
|
||||
* fence. But all of our actual implementations seem OK in this regard.
|
||||
*/
|
||||
#if !defined(pg_memory_barrier)
|
||||
#define pg_memory_barrier(x) \
|
||||
do { S_LOCK(&dummy_spinlock); S_UNLOCK(&dummy_spinlock); } while (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If read or write barriers are undefined, we upgrade them to full memory
|
||||
* barriers.
|
||||
*
|
||||
* If a compiler barrier is unavailable, you probably don't want a full
|
||||
* memory barrier instead, so if you have a use case for a compiler barrier,
|
||||
* you'd better use #ifdef.
|
||||
*/
|
||||
#if !defined(pg_read_barrier)
|
||||
#define pg_read_barrier() pg_memory_barrier()
|
||||
#endif
|
||||
#if !defined(pg_write_barrier)
|
||||
#define pg_write_barrier() pg_memory_barrier()
|
||||
#endif
|
||||
|
||||
#endif /* BARRIER_H */
|
||||
121
pg_include/storage/block.h
Executable file
121
pg_include/storage/block.h
Executable file
@@ -0,0 +1,121 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* block.h
|
||||
* POSTGRES disk block definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/block.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef BLOCK_H
|
||||
#define BLOCK_H
|
||||
|
||||
/*
|
||||
* BlockNumber:
|
||||
*
|
||||
* each data file (heap or index) is divided into postgres disk blocks
|
||||
* (which may be thought of as the unit of i/o -- a postgres buffer
|
||||
* contains exactly one disk block). the blocks are numbered
|
||||
* sequentially, 0 to 0xFFFFFFFE.
|
||||
*
|
||||
* InvalidBlockNumber is the same thing as P_NEW in buf.h.
|
||||
*
|
||||
* the access methods, the buffer manager and the storage manager are
|
||||
* more or less the only pieces of code that should be accessing disk
|
||||
* blocks directly.
|
||||
*/
|
||||
typedef uint32 BlockNumber;
|
||||
|
||||
#define InvalidBlockNumber ((BlockNumber) 0xFFFFFFFF)
|
||||
|
||||
#define MaxBlockNumber ((BlockNumber) 0xFFFFFFFE)
|
||||
|
||||
/*
|
||||
* BlockId:
|
||||
*
|
||||
* this is a storage type for BlockNumber. in other words, this type
|
||||
* is used for on-disk structures (e.g., in HeapTupleData) whereas
|
||||
* BlockNumber is the type on which calculations are performed (e.g.,
|
||||
* in access method code).
|
||||
*
|
||||
* there doesn't appear to be any reason to have separate types except
|
||||
* for the fact that BlockIds can be SHORTALIGN'd (and therefore any
|
||||
* structures that contains them, such as ItemPointerData, can also be
|
||||
* SHORTALIGN'd). this is an important consideration for reducing the
|
||||
* space requirements of the line pointer (ItemIdData) array on each
|
||||
* page and the header of each heap or index tuple, so it doesn't seem
|
||||
* wise to change this without good reason.
|
||||
*/
|
||||
typedef struct BlockIdData
|
||||
{
|
||||
uint16 bi_hi;
|
||||
uint16 bi_lo;
|
||||
} BlockIdData;
|
||||
|
||||
typedef BlockIdData *BlockId; /* block identifier */
|
||||
|
||||
/* ----------------
|
||||
* support macros
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* BlockNumberIsValid
|
||||
* True iff blockNumber is valid.
|
||||
*/
|
||||
#define BlockNumberIsValid(blockNumber) \
|
||||
((bool) ((BlockNumber) (blockNumber) != InvalidBlockNumber))
|
||||
|
||||
/*
|
||||
* BlockIdIsValid
|
||||
* True iff the block identifier is valid.
|
||||
*/
|
||||
#define BlockIdIsValid(blockId) \
|
||||
((bool) PointerIsValid(blockId))
|
||||
|
||||
/*
|
||||
* BlockIdSet
|
||||
* Sets a block identifier to the specified value.
|
||||
*/
|
||||
#define BlockIdSet(blockId, blockNumber) \
|
||||
( \
|
||||
AssertMacro(PointerIsValid(blockId)), \
|
||||
(blockId)->bi_hi = (blockNumber) >> 16, \
|
||||
(blockId)->bi_lo = (blockNumber) & 0xffff \
|
||||
)
|
||||
|
||||
/*
|
||||
* BlockIdCopy
|
||||
* Copy a block identifier.
|
||||
*/
|
||||
#define BlockIdCopy(toBlockId, fromBlockId) \
|
||||
( \
|
||||
AssertMacro(PointerIsValid(toBlockId)), \
|
||||
AssertMacro(PointerIsValid(fromBlockId)), \
|
||||
(toBlockId)->bi_hi = (fromBlockId)->bi_hi, \
|
||||
(toBlockId)->bi_lo = (fromBlockId)->bi_lo \
|
||||
)
|
||||
|
||||
/*
|
||||
* BlockIdEquals
|
||||
* Check for block number equality.
|
||||
*/
|
||||
#define BlockIdEquals(blockId1, blockId2) \
|
||||
((blockId1)->bi_hi == (blockId2)->bi_hi && \
|
||||
(blockId1)->bi_lo == (blockId2)->bi_lo)
|
||||
|
||||
/*
|
||||
* BlockIdGetBlockNumber
|
||||
* Retrieve the block number from a block identifier.
|
||||
*/
|
||||
#define BlockIdGetBlockNumber(blockId) \
|
||||
( \
|
||||
AssertMacro(BlockIdIsValid(blockId)), \
|
||||
(BlockNumber) (((blockId)->bi_hi << 16) | ((uint16) (blockId)->bi_lo)) \
|
||||
)
|
||||
|
||||
#endif /* BLOCK_H */
|
||||
46
pg_include/storage/buf.h
Executable file
46
pg_include/storage/buf.h
Executable file
@@ -0,0 +1,46 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* buf.h
|
||||
* Basic buffer manager data types.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/buf.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef BUF_H
|
||||
#define BUF_H
|
||||
|
||||
/*
|
||||
* Buffer identifiers.
|
||||
*
|
||||
* Zero is invalid, positive is the index of a shared buffer (1..NBuffers),
|
||||
* negative is the index of a local buffer (-1 .. -NLocBuffer).
|
||||
*/
|
||||
typedef int Buffer;
|
||||
|
||||
#define InvalidBuffer 0
|
||||
|
||||
/*
|
||||
* BufferIsInvalid
|
||||
* True iff the buffer is invalid.
|
||||
*/
|
||||
#define BufferIsInvalid(buffer) ((buffer) == InvalidBuffer)
|
||||
|
||||
/*
|
||||
* BufferIsLocal
|
||||
* True iff the buffer is local (not visible to other backends).
|
||||
*/
|
||||
#define BufferIsLocal(buffer) ((buffer) < 0)
|
||||
|
||||
/*
|
||||
* Buffer access strategy objects.
|
||||
*
|
||||
* BufferAccessStrategyData is private to freelist.c
|
||||
*/
|
||||
typedef struct BufferAccessStrategyData *BufferAccessStrategy;
|
||||
|
||||
#endif /* BUF_H */
|
||||
216
pg_include/storage/buf_internals.h
Executable file
216
pg_include/storage/buf_internals.h
Executable file
@@ -0,0 +1,216 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* buf_internals.h
|
||||
* Internal definitions for buffer manager and the buffer replacement
|
||||
* strategy.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/buf_internals.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef BUFMGR_INTERNALS_H
|
||||
#define BUFMGR_INTERNALS_H
|
||||
|
||||
#include "storage/buf.h"
|
||||
#include "storage/latch.h"
|
||||
#include "storage/lwlock.h"
|
||||
#include "storage/shmem.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "storage/spin.h"
|
||||
#include "utils/relcache.h"
|
||||
|
||||
|
||||
/*
|
||||
* Flags for buffer descriptors
|
||||
*
|
||||
* Note: TAG_VALID essentially means that there is a buffer hashtable
|
||||
* entry associated with the buffer's tag.
|
||||
*/
|
||||
#define BM_DIRTY (1 << 0) /* data needs writing */
|
||||
#define BM_VALID (1 << 1) /* data is valid */
|
||||
#define BM_TAG_VALID (1 << 2) /* tag is assigned */
|
||||
#define BM_IO_IN_PROGRESS (1 << 3) /* read or write in progress */
|
||||
#define BM_IO_ERROR (1 << 4) /* previous I/O failed */
|
||||
#define BM_JUST_DIRTIED (1 << 5) /* dirtied since write started */
|
||||
#define BM_PIN_COUNT_WAITER (1 << 6) /* have waiter for sole pin */
|
||||
#define BM_CHECKPOINT_NEEDED (1 << 7) /* must write for checkpoint */
|
||||
#define BM_PERMANENT (1 << 8) /* permanent relation (not
|
||||
* unlogged) */
|
||||
|
||||
typedef bits16 BufFlags;
|
||||
|
||||
/*
|
||||
* The maximum allowed value of usage_count represents a tradeoff between
|
||||
* accuracy and speed of the clock-sweep buffer management algorithm. A
|
||||
* large value (comparable to NBuffers) would approximate LRU semantics.
|
||||
* But it can take as many as BM_MAX_USAGE_COUNT+1 complete cycles of
|
||||
* clock sweeps to find a free buffer, so in practice we don't want the
|
||||
* value to be very large.
|
||||
*/
|
||||
#define BM_MAX_USAGE_COUNT 5
|
||||
|
||||
/*
|
||||
* Buffer tag identifies which disk block the buffer contains.
|
||||
*
|
||||
* Note: the BufferTag data must be sufficient to determine where to write the
|
||||
* block, without reference to pg_class or pg_tablespace entries. It's
|
||||
* possible that the backend flushing the buffer doesn't even believe the
|
||||
* relation is visible yet (its xact may have started before the xact that
|
||||
* created the rel). The storage manager must be able to cope anyway.
|
||||
*
|
||||
* Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have
|
||||
* to be fixed to zero them, since this struct is used as a hash key.
|
||||
*/
|
||||
typedef struct buftag
|
||||
{
|
||||
RelFileNode rnode; /* physical relation identifier */
|
||||
ForkNumber forkNum;
|
||||
BlockNumber blockNum; /* blknum relative to begin of reln */
|
||||
} BufferTag;
|
||||
|
||||
#define CLEAR_BUFFERTAG(a) \
|
||||
( \
|
||||
(a).rnode.spcNode = InvalidOid, \
|
||||
(a).rnode.dbNode = InvalidOid, \
|
||||
(a).rnode.relNode = InvalidOid, \
|
||||
(a).forkNum = InvalidForkNumber, \
|
||||
(a).blockNum = InvalidBlockNumber \
|
||||
)
|
||||
|
||||
#define INIT_BUFFERTAG(a,xx_rnode,xx_forkNum,xx_blockNum) \
|
||||
( \
|
||||
(a).rnode = (xx_rnode), \
|
||||
(a).forkNum = (xx_forkNum), \
|
||||
(a).blockNum = (xx_blockNum) \
|
||||
)
|
||||
|
||||
#define BUFFERTAGS_EQUAL(a,b) \
|
||||
( \
|
||||
RelFileNodeEquals((a).rnode, (b).rnode) && \
|
||||
(a).blockNum == (b).blockNum && \
|
||||
(a).forkNum == (b).forkNum \
|
||||
)
|
||||
|
||||
/*
|
||||
* The shared buffer mapping table is partitioned to reduce contention.
|
||||
* To determine which partition lock a given tag requires, compute the tag's
|
||||
* hash code with BufTableHashCode(), then apply BufMappingPartitionLock().
|
||||
* NB: NUM_BUFFER_PARTITIONS must be a power of 2!
|
||||
*/
|
||||
#define BufTableHashPartition(hashcode) \
|
||||
((hashcode) % NUM_BUFFER_PARTITIONS)
|
||||
#define BufMappingPartitionLock(hashcode) \
|
||||
((LWLockId) (FirstBufMappingLock + BufTableHashPartition(hashcode)))
|
||||
|
||||
/*
|
||||
* BufferDesc -- shared descriptor/state data for a single shared buffer.
|
||||
*
|
||||
* Note: buf_hdr_lock must be held to examine or change the tag, flags,
|
||||
* usage_count, refcount, or wait_backend_pid fields. buf_id field never
|
||||
* changes after initialization, so does not need locking. freeNext is
|
||||
* protected by the BufFreelistLock not buf_hdr_lock. The LWLocks can take
|
||||
* care of themselves. The buf_hdr_lock is *not* used to control access to
|
||||
* the data in the buffer!
|
||||
*
|
||||
* An exception is that if we have the buffer pinned, its tag can't change
|
||||
* underneath us, so we can examine the tag without locking the spinlock.
|
||||
* Also, in places we do one-time reads of the flags without bothering to
|
||||
* lock the spinlock; this is generally for situations where we don't expect
|
||||
* the flag bit being tested to be changing.
|
||||
*
|
||||
* We can't physically remove items from a disk page if another backend has
|
||||
* the buffer pinned. Hence, a backend may need to wait for all other pins
|
||||
* to go away. This is signaled by storing its own PID into
|
||||
* wait_backend_pid and setting flag bit BM_PIN_COUNT_WAITER. At present,
|
||||
* there can be only one such waiter per buffer.
|
||||
*
|
||||
* We use this same struct for local buffer headers, but the lock fields
|
||||
* are not used and not all of the flag bits are useful either.
|
||||
*/
|
||||
typedef struct sbufdesc
|
||||
{
|
||||
BufferTag tag; /* ID of page contained in buffer */
|
||||
BufFlags flags; /* see bit definitions above */
|
||||
uint16 usage_count; /* usage counter for clock sweep code */
|
||||
unsigned refcount; /* # of backends holding pins on buffer */
|
||||
int wait_backend_pid; /* backend PID of pin-count waiter */
|
||||
|
||||
slock_t buf_hdr_lock; /* protects the above fields */
|
||||
|
||||
int buf_id; /* buffer's index number (from 0) */
|
||||
int freeNext; /* link in freelist chain */
|
||||
|
||||
LWLockId io_in_progress_lock; /* to wait for I/O to complete */
|
||||
LWLockId content_lock; /* to lock access to buffer contents */
|
||||
} BufferDesc;
|
||||
|
||||
#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
|
||||
|
||||
/*
|
||||
* The freeNext field is either the index of the next freelist entry,
|
||||
* or one of these special values:
|
||||
*/
|
||||
#define FREENEXT_END_OF_LIST (-1)
|
||||
#define FREENEXT_NOT_IN_LIST (-2)
|
||||
|
||||
/*
|
||||
* Macros for acquiring/releasing a shared buffer header's spinlock.
|
||||
* Do not apply these to local buffers!
|
||||
*
|
||||
* Note: as a general coding rule, if you are using these then you probably
|
||||
* need to be using a volatile-qualified pointer to the buffer header, to
|
||||
* ensure that the compiler doesn't rearrange accesses to the header to
|
||||
* occur before or after the spinlock is acquired/released.
|
||||
*/
|
||||
#define LockBufHdr(bufHdr) SpinLockAcquire(&(bufHdr)->buf_hdr_lock)
|
||||
#define UnlockBufHdr(bufHdr) SpinLockRelease(&(bufHdr)->buf_hdr_lock)
|
||||
|
||||
|
||||
/* in buf_init.c */
|
||||
extern PGDLLIMPORT BufferDesc *BufferDescriptors;
|
||||
|
||||
/* in localbuf.c */
|
||||
extern BufferDesc *LocalBufferDescriptors;
|
||||
|
||||
|
||||
/*
|
||||
* Internal routines: only called by bufmgr
|
||||
*/
|
||||
|
||||
/* freelist.c */
|
||||
extern volatile BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy,
|
||||
bool *lock_held);
|
||||
extern void StrategyFreeBuffer(volatile BufferDesc *buf);
|
||||
extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
|
||||
volatile BufferDesc *buf);
|
||||
|
||||
extern int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc);
|
||||
extern void StrategyNotifyBgWriter(Latch *bgwriterLatch);
|
||||
|
||||
extern Size StrategyShmemSize(void);
|
||||
extern void StrategyInitialize(bool init);
|
||||
|
||||
/* buf_table.c */
|
||||
extern Size BufTableShmemSize(int size);
|
||||
extern void InitBufTable(int size);
|
||||
extern uint32 BufTableHashCode(BufferTag *tagPtr);
|
||||
extern int BufTableLookup(BufferTag *tagPtr, uint32 hashcode);
|
||||
extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
|
||||
extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
|
||||
|
||||
/* localbuf.c */
|
||||
extern void LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
|
||||
BlockNumber blockNum);
|
||||
extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
|
||||
BlockNumber blockNum, bool *foundPtr);
|
||||
extern void MarkLocalBufferDirty(Buffer buffer);
|
||||
extern void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
|
||||
BlockNumber firstDelBlock);
|
||||
extern void DropRelFileNodeAllLocalBuffers(RelFileNode rnode);
|
||||
extern void AtEOXact_LocalBuffers(bool isCommit);
|
||||
|
||||
#endif /* BUFMGR_INTERNALS_H */
|
||||
45
pg_include/storage/buffile.h
Executable file
45
pg_include/storage/buffile.h
Executable file
@@ -0,0 +1,45 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* buffile.h
|
||||
* Management of large buffered files, primarily temporary files.
|
||||
*
|
||||
* The BufFile routines provide a partial replacement for stdio atop
|
||||
* virtual file descriptors managed by fd.c. Currently they only support
|
||||
* buffered access to a virtual file, without any of stdio's formatting
|
||||
* features. That's enough for immediate needs, but the set of facilities
|
||||
* could be expanded if necessary.
|
||||
*
|
||||
* BufFile also supports working with temporary files that exceed the OS
|
||||
* file size limit and/or the largest offset representable in an int.
|
||||
* It might be better to split that out as a separately accessible module,
|
||||
* but currently we have no need for oversize temp files without buffered
|
||||
* access.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/buffile.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef BUFFILE_H
|
||||
#define BUFFILE_H
|
||||
|
||||
/* BufFile is an opaque type whose details are not known outside buffile.c. */
|
||||
|
||||
typedef struct BufFile BufFile;
|
||||
|
||||
/*
|
||||
* prototypes for functions in buffile.c
|
||||
*/
|
||||
|
||||
extern BufFile *BufFileCreateTemp(bool interXact);
|
||||
extern void BufFileClose(BufFile *file);
|
||||
extern size_t BufFileRead(BufFile *file, void *ptr, size_t size);
|
||||
extern size_t BufFileWrite(BufFile *file, void *ptr, size_t size);
|
||||
extern int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence);
|
||||
extern void BufFileTell(BufFile *file, int *fileno, off_t *offset);
|
||||
extern int BufFileSeekBlock(BufFile *file, long blknum);
|
||||
|
||||
#endif /* BUFFILE_H */
|
||||
226
pg_include/storage/bufmgr.h
Executable file
226
pg_include/storage/bufmgr.h
Executable file
@@ -0,0 +1,226 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* bufmgr.h
|
||||
* POSTGRES buffer manager definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/bufmgr.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef BUFMGR_H
|
||||
#define BUFMGR_H
|
||||
|
||||
#include "storage/block.h"
|
||||
#include "storage/buf.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "storage/relfilenode.h"
|
||||
#include "utils/relcache.h"
|
||||
|
||||
typedef void *Block;
|
||||
|
||||
/* Possible arguments for GetAccessStrategy() */
|
||||
typedef enum BufferAccessStrategyType
|
||||
{
|
||||
BAS_NORMAL, /* Normal random access */
|
||||
BAS_BULKREAD, /* Large read-only scan (hint bit updates are
|
||||
* ok) */
|
||||
BAS_BULKWRITE, /* Large multi-block write (e.g. COPY IN) */
|
||||
BAS_VACUUM /* VACUUM */
|
||||
} BufferAccessStrategyType;
|
||||
|
||||
/* Possible modes for ReadBufferExtended() */
|
||||
typedef enum
|
||||
{
|
||||
RBM_NORMAL, /* Normal read */
|
||||
RBM_ZERO, /* Don't read from disk, caller will
|
||||
* initialize */
|
||||
RBM_ZERO_ON_ERROR /* Read, but return an all-zeros page on error */
|
||||
} ReadBufferMode;
|
||||
|
||||
/* in globals.c ... this duplicates miscadmin.h */
|
||||
extern PGDLLIMPORT int NBuffers;
|
||||
|
||||
/* in bufmgr.c */
|
||||
extern bool zero_damaged_pages;
|
||||
extern int bgwriter_lru_maxpages;
|
||||
extern double bgwriter_lru_multiplier;
|
||||
extern bool track_io_timing;
|
||||
extern int target_prefetch_pages;
|
||||
|
||||
/* in buf_init.c */
|
||||
extern PGDLLIMPORT char *BufferBlocks;
|
||||
extern PGDLLIMPORT int32 *PrivateRefCount;
|
||||
|
||||
/* in localbuf.c */
|
||||
extern PGDLLIMPORT int NLocBuffer;
|
||||
extern PGDLLIMPORT Block *LocalBufferBlockPointers;
|
||||
extern PGDLLIMPORT int32 *LocalRefCount;
|
||||
|
||||
/* special block number for ReadBuffer() */
|
||||
#define P_NEW InvalidBlockNumber /* grow the file to get a new page */
|
||||
|
||||
/*
|
||||
* Buffer content lock modes (mode argument for LockBuffer())
|
||||
*/
|
||||
#define BUFFER_LOCK_UNLOCK 0
|
||||
#define BUFFER_LOCK_SHARE 1
|
||||
#define BUFFER_LOCK_EXCLUSIVE 2
|
||||
|
||||
/*
|
||||
* These routines are beaten on quite heavily, hence the macroization.
|
||||
*/
|
||||
|
||||
/*
|
||||
* BufferIsValid
|
||||
* True iff the given buffer number is valid (either as a shared
|
||||
* or local buffer).
|
||||
*
|
||||
* Note: For a long time this was defined the same as BufferIsPinned,
|
||||
* that is it would say False if you didn't hold a pin on the buffer.
|
||||
* I believe this was bogus and served only to mask logic errors.
|
||||
* Code should always know whether it has a buffer reference,
|
||||
* independently of the pin state.
|
||||
*
|
||||
* Note: For a further long time this was not quite the inverse of the
|
||||
* BufferIsInvalid() macro, in that it also did sanity checks to verify
|
||||
* that the buffer number was in range. Most likely, this macro was
|
||||
* originally intended only to be used in assertions, but its use has
|
||||
* since expanded quite a bit, and the overhead of making those checks
|
||||
* even in non-assert-enabled builds can be significant. Thus, we've
|
||||
* now demoted the range checks to assertions within the macro itself.
|
||||
*/
|
||||
#define BufferIsValid(bufnum) \
|
||||
( \
|
||||
AssertMacro((bufnum) <= NBuffers && (bufnum) >= -NLocBuffer), \
|
||||
(bufnum) != InvalidBuffer \
|
||||
)
|
||||
|
||||
/*
|
||||
* BufferIsPinned
|
||||
* True iff the buffer is pinned (also checks for valid buffer number).
|
||||
*
|
||||
* NOTE: what we check here is that *this* backend holds a pin on
|
||||
* the buffer. We do not care whether some other backend does.
|
||||
*/
|
||||
#define BufferIsPinned(bufnum) \
|
||||
( \
|
||||
!BufferIsValid(bufnum) ? \
|
||||
false \
|
||||
: \
|
||||
BufferIsLocal(bufnum) ? \
|
||||
(LocalRefCount[-(bufnum) - 1] > 0) \
|
||||
: \
|
||||
(PrivateRefCount[(bufnum) - 1] > 0) \
|
||||
)
|
||||
|
||||
/*
|
||||
* BufferGetBlock
|
||||
* Returns a reference to a disk page image associated with a buffer.
|
||||
*
|
||||
* Note:
|
||||
* Assumes buffer is valid.
|
||||
*/
|
||||
#define BufferGetBlock(buffer) \
|
||||
( \
|
||||
AssertMacro(BufferIsValid(buffer)), \
|
||||
BufferIsLocal(buffer) ? \
|
||||
LocalBufferBlockPointers[-(buffer) - 1] \
|
||||
: \
|
||||
(Block) (BufferBlocks + ((Size) ((buffer) - 1)) * BLCKSZ) \
|
||||
)
|
||||
|
||||
/*
|
||||
* BufferGetPageSize
|
||||
* Returns the page size within a buffer.
|
||||
*
|
||||
* Notes:
|
||||
* Assumes buffer is valid.
|
||||
*
|
||||
* The buffer can be a raw disk block and need not contain a valid
|
||||
* (formatted) disk page.
|
||||
*/
|
||||
/* XXX should dig out of buffer descriptor */
|
||||
#define BufferGetPageSize(buffer) \
|
||||
( \
|
||||
AssertMacro(BufferIsValid(buffer)), \
|
||||
(Size)BLCKSZ \
|
||||
)
|
||||
|
||||
/*
|
||||
* BufferGetPage
|
||||
* Returns the page associated with a buffer.
|
||||
*/
|
||||
#define BufferGetPage(buffer) ((Page)BufferGetBlock(buffer))
|
||||
|
||||
/*
|
||||
* prototypes for functions in bufmgr.c
|
||||
*/
|
||||
extern void PrefetchBuffer(Relation reln, ForkNumber forkNum,
|
||||
BlockNumber blockNum);
|
||||
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
|
||||
extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
|
||||
BlockNumber blockNum, ReadBufferMode mode,
|
||||
BufferAccessStrategy strategy);
|
||||
extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode,
|
||||
ForkNumber forkNum, BlockNumber blockNum,
|
||||
ReadBufferMode mode, BufferAccessStrategy strategy);
|
||||
extern void ReleaseBuffer(Buffer buffer);
|
||||
extern void UnlockReleaseBuffer(Buffer buffer);
|
||||
extern void MarkBufferDirty(Buffer buffer);
|
||||
extern void IncrBufferRefCount(Buffer buffer);
|
||||
extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
|
||||
BlockNumber blockNum);
|
||||
|
||||
extern void InitBufferPool(void);
|
||||
extern void InitBufferPoolAccess(void);
|
||||
extern void InitBufferPoolBackend(void);
|
||||
extern void AtEOXact_Buffers(bool isCommit);
|
||||
extern void PrintBufferLeakWarning(Buffer buffer);
|
||||
extern void CheckPointBuffers(int flags);
|
||||
extern BlockNumber BufferGetBlockNumber(Buffer buffer);
|
||||
extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation,
|
||||
ForkNumber forkNum);
|
||||
extern void FlushRelationBuffers(Relation rel);
|
||||
extern void FlushDatabaseBuffers(Oid dbid);
|
||||
extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode,
|
||||
ForkNumber forkNum, BlockNumber firstDelBlock);
|
||||
extern void DropRelFileNodeAllBuffers(RelFileNodeBackend rnode);
|
||||
extern void DropDatabaseBuffers(Oid dbid);
|
||||
|
||||
#define RelationGetNumberOfBlocks(reln) \
|
||||
RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)
|
||||
|
||||
extern bool BufferIsPermanent(Buffer buffer);
|
||||
|
||||
#ifdef NOT_USED
|
||||
extern void PrintPinnedBufs(void);
|
||||
#endif
|
||||
extern Size BufferShmemSize(void);
|
||||
extern void BufferGetTag(Buffer buffer, RelFileNode *rnode,
|
||||
ForkNumber *forknum, BlockNumber *blknum);
|
||||
|
||||
extern void SetBufferCommitInfoNeedsSave(Buffer buffer);
|
||||
|
||||
extern void UnlockBuffers(void);
|
||||
extern void LockBuffer(Buffer buffer, int mode);
|
||||
extern bool ConditionalLockBuffer(Buffer buffer);
|
||||
extern void LockBufferForCleanup(Buffer buffer);
|
||||
extern bool ConditionalLockBufferForCleanup(Buffer buffer);
|
||||
extern bool HoldingBufferPinThatDelaysRecovery(void);
|
||||
|
||||
extern void AbortBufferIO(void);
|
||||
|
||||
extern void BufmgrCommit(void);
|
||||
extern bool BgBufferSync(void);
|
||||
|
||||
extern void AtProcExit_LocalBuffers(void);
|
||||
|
||||
/* in freelist.c */
|
||||
extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
|
||||
extern void FreeAccessStrategy(BufferAccessStrategy strategy);
|
||||
|
||||
#endif
|
||||
385
pg_include/storage/bufpage.h
Executable file
385
pg_include/storage/bufpage.h
Executable file
@@ -0,0 +1,385 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* bufpage.h
|
||||
* Standard POSTGRES buffer page definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/bufpage.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef BUFPAGE_H
|
||||
#define BUFPAGE_H
|
||||
|
||||
#include "access/xlogdefs.h"
|
||||
#include "storage/item.h"
|
||||
#include "storage/off.h"
|
||||
|
||||
/*
|
||||
* A postgres disk page is an abstraction layered on top of a postgres
|
||||
* disk block (which is simply a unit of i/o, see block.h).
|
||||
*
|
||||
* specifically, while a disk block can be unformatted, a postgres
|
||||
* disk page is always a slotted page of the form:
|
||||
*
|
||||
* +----------------+---------------------------------+
|
||||
* | PageHeaderData | linp1 linp2 linp3 ... |
|
||||
* +-----------+----+---------------------------------+
|
||||
* | ... linpN | |
|
||||
* +-----------+--------------------------------------+
|
||||
* | ^ pd_lower |
|
||||
* | |
|
||||
* | v pd_upper |
|
||||
* +-------------+------------------------------------+
|
||||
* | | tupleN ... |
|
||||
* +-------------+------------------+-----------------+
|
||||
* | ... tuple3 tuple2 tuple1 | "special space" |
|
||||
* +--------------------------------+-----------------+
|
||||
* ^ pd_special
|
||||
*
|
||||
* a page is full when nothing can be added between pd_lower and
|
||||
* pd_upper.
|
||||
*
|
||||
* all blocks written out by an access method must be disk pages.
|
||||
*
|
||||
* EXCEPTIONS:
|
||||
*
|
||||
* obviously, a page is not formatted before it is initialized by
|
||||
* a call to PageInit.
|
||||
*
|
||||
* NOTES:
|
||||
*
|
||||
* linp1..N form an ItemId array. ItemPointers point into this array
|
||||
* rather than pointing directly to a tuple. Note that OffsetNumbers
|
||||
* conventionally start at 1, not 0.
|
||||
*
|
||||
* tuple1..N are added "backwards" on the page. because a tuple's
|
||||
* ItemPointer points to its ItemId entry rather than its actual
|
||||
* byte-offset position, tuples can be physically shuffled on a page
|
||||
* whenever the need arises.
|
||||
*
|
||||
* AM-generic per-page information is kept in PageHeaderData.
|
||||
*
|
||||
* AM-specific per-page data (if any) is kept in the area marked "special
|
||||
* space"; each AM has an "opaque" structure defined somewhere that is
|
||||
* stored as the page trailer. an access method should always
|
||||
* initialize its pages with PageInit and then set its own opaque
|
||||
* fields.
|
||||
*/
|
||||
|
||||
typedef Pointer Page;
|
||||
|
||||
|
||||
/*
|
||||
* location (byte offset) within a page.
|
||||
*
|
||||
* note that this is actually limited to 2^15 because we have limited
|
||||
* ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
|
||||
*/
|
||||
typedef uint16 LocationIndex;
|
||||
|
||||
|
||||
/*
|
||||
* disk page organization
|
||||
*
|
||||
* space management information generic to any page
|
||||
*
|
||||
* pd_lsn - identifies xlog record for last change to this page.
|
||||
* pd_tli - ditto.
|
||||
* pd_flags - flag bits.
|
||||
* pd_lower - offset to start of free space.
|
||||
* pd_upper - offset to end of free space.
|
||||
* pd_special - offset to start of special space.
|
||||
* pd_pagesize_version - size in bytes and page layout version number.
|
||||
* pd_prune_xid - oldest XID among potentially prunable tuples on page.
|
||||
*
|
||||
* The LSN is used by the buffer manager to enforce the basic rule of WAL:
|
||||
* "thou shalt write xlog before data". A dirty buffer cannot be dumped
|
||||
* to disk until xlog has been flushed at least as far as the page's LSN.
|
||||
* We also store the 16 least significant bits of the TLI for identification
|
||||
* purposes (it is not clear that this is actually necessary, but it seems
|
||||
* like a good idea).
|
||||
*
|
||||
* pd_prune_xid is a hint field that helps determine whether pruning will be
|
||||
* useful. It is currently unused in index pages.
|
||||
*
|
||||
* The page version number and page size are packed together into a single
|
||||
* uint16 field. This is for historical reasons: before PostgreSQL 7.3,
|
||||
* there was no concept of a page version number, and doing it this way
|
||||
* lets us pretend that pre-7.3 databases have page version number zero.
|
||||
* We constrain page sizes to be multiples of 256, leaving the low eight
|
||||
* bits available for a version number.
|
||||
*
|
||||
* Minimum possible page size is perhaps 64B to fit page header, opaque space
|
||||
* and a minimal tuple; of course, in reality you want it much bigger, so
|
||||
* the constraint on pagesize mod 256 is not an important restriction.
|
||||
* On the high end, we can only support pages up to 32KB because lp_off/lp_len
|
||||
* are 15 bits.
|
||||
*/
|
||||
typedef struct PageHeaderData
|
||||
{
|
||||
/* XXX LSN is member of *any* block, not only page-organized ones */
|
||||
XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
|
||||
* record for last change to this page */
|
||||
uint16 pd_tli; /* least significant bits of the TimeLineID
|
||||
* containing the LSN */
|
||||
uint16 pd_flags; /* flag bits, see below */
|
||||
LocationIndex pd_lower; /* offset to start of free space */
|
||||
LocationIndex pd_upper; /* offset to end of free space */
|
||||
LocationIndex pd_special; /* offset to start of special space */
|
||||
uint16 pd_pagesize_version;
|
||||
TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
|
||||
ItemIdData pd_linp[1]; /* beginning of line pointer array */
|
||||
} PageHeaderData;
|
||||
|
||||
typedef PageHeaderData *PageHeader;
|
||||
|
||||
/*
|
||||
* pd_flags contains the following flag bits. Undefined bits are initialized
|
||||
* to zero and may be used in the future.
|
||||
*
|
||||
* PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
|
||||
* pd_lower. This should be considered a hint rather than the truth, since
|
||||
* changes to it are not WAL-logged.
|
||||
*
|
||||
* PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
|
||||
* page for its new tuple version; this suggests that a prune is needed.
|
||||
* Again, this is just a hint.
|
||||
*/
|
||||
#define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
|
||||
#define PD_PAGE_FULL 0x0002 /* not enough free space for new
|
||||
* tuple? */
|
||||
#define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
|
||||
* everyone */
|
||||
|
||||
#define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
|
||||
|
||||
/*
|
||||
* Page layout version number 0 is for pre-7.3 Postgres releases.
|
||||
* Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
|
||||
* Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
|
||||
* Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
|
||||
* Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
|
||||
* added the pd_flags field (by stealing some bits from pd_tli),
|
||||
* as well as adding the pd_prune_xid field (which enlarges the header).
|
||||
*/
|
||||
#define PG_PAGE_LAYOUT_VERSION 4
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* page support macros
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* PageIsValid
|
||||
* True iff page is valid.
|
||||
*/
|
||||
#define PageIsValid(page) PointerIsValid(page)
|
||||
|
||||
/*
|
||||
* line pointer(s) do not count as part of header
|
||||
*/
|
||||
#define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
|
||||
|
||||
/*
|
||||
* PageIsEmpty
|
||||
* returns true iff no itemid has been allocated on the page
|
||||
*/
|
||||
#define PageIsEmpty(page) \
|
||||
(((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData)
|
||||
|
||||
/*
|
||||
* PageIsNew
|
||||
* returns true iff page has not been initialized (by PageInit)
|
||||
*/
|
||||
#define PageIsNew(page) (((PageHeader) (page))->pd_upper == 0)
|
||||
|
||||
/*
|
||||
* PageGetItemId
|
||||
* Returns an item identifier of a page.
|
||||
*/
|
||||
#define PageGetItemId(page, offsetNumber) \
|
||||
((ItemId) (&((PageHeader) (page))->pd_linp[(offsetNumber) - 1]))
|
||||
|
||||
/*
|
||||
* PageGetContents
|
||||
* To be used in case the page does not contain item pointers.
|
||||
*
|
||||
* Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
|
||||
* Now it is. Beware of old code that might think the offset to the contents
|
||||
* is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
|
||||
*/
|
||||
#define PageGetContents(page) \
|
||||
((char *) (page) + MAXALIGN(SizeOfPageHeaderData))
|
||||
|
||||
/* ----------------
|
||||
* macros to access page size info
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* PageSizeIsValid
|
||||
* True iff the page size is valid.
|
||||
*/
|
||||
#define PageSizeIsValid(pageSize) ((pageSize) == BLCKSZ)
|
||||
|
||||
/*
|
||||
* PageGetPageSize
|
||||
* Returns the page size of a page.
|
||||
*
|
||||
* this can only be called on a formatted page (unlike
|
||||
* BufferGetPageSize, which can be called on an unformatted page).
|
||||
* however, it can be called on a page that is not stored in a buffer.
|
||||
*/
|
||||
#define PageGetPageSize(page) \
|
||||
((Size) (((PageHeader) (page))->pd_pagesize_version & (uint16) 0xFF00))
|
||||
|
||||
/*
|
||||
* PageGetPageLayoutVersion
|
||||
* Returns the page layout version of a page.
|
||||
*/
|
||||
#define PageGetPageLayoutVersion(page) \
|
||||
(((PageHeader) (page))->pd_pagesize_version & 0x00FF)
|
||||
|
||||
/*
|
||||
* PageSetPageSizeAndVersion
|
||||
* Sets the page size and page layout version number of a page.
|
||||
*
|
||||
* We could support setting these two values separately, but there's
|
||||
* no real need for it at the moment.
|
||||
*/
|
||||
#define PageSetPageSizeAndVersion(page, size, version) \
|
||||
( \
|
||||
AssertMacro(((size) & 0xFF00) == (size)), \
|
||||
AssertMacro(((version) & 0x00FF) == (version)), \
|
||||
((PageHeader) (page))->pd_pagesize_version = (size) | (version) \
|
||||
)
|
||||
|
||||
/* ----------------
|
||||
* page special data macros
|
||||
* ----------------
|
||||
*/
|
||||
/*
|
||||
* PageGetSpecialSize
|
||||
* Returns size of special space on a page.
|
||||
*/
|
||||
#define PageGetSpecialSize(page) \
|
||||
((uint16) (PageGetPageSize(page) - ((PageHeader)(page))->pd_special))
|
||||
|
||||
/*
|
||||
* PageGetSpecialPointer
|
||||
* Returns pointer to special space on a page.
|
||||
*/
|
||||
#define PageGetSpecialPointer(page) \
|
||||
( \
|
||||
AssertMacro(PageIsValid(page)), \
|
||||
(char *) ((char *) (page) + ((PageHeader) (page))->pd_special) \
|
||||
)
|
||||
|
||||
/*
|
||||
* PageGetItem
|
||||
* Retrieves an item on the given page.
|
||||
*
|
||||
* Note:
|
||||
* This does not change the status of any of the resources passed.
|
||||
* The semantics may change in the future.
|
||||
*/
|
||||
#define PageGetItem(page, itemId) \
|
||||
( \
|
||||
AssertMacro(PageIsValid(page)), \
|
||||
AssertMacro(ItemIdHasStorage(itemId)), \
|
||||
(Item)(((char *)(page)) + ItemIdGetOffset(itemId)) \
|
||||
)
|
||||
|
||||
/*
|
||||
* PageGetMaxOffsetNumber
|
||||
* Returns the maximum offset number used by the given page.
|
||||
* Since offset numbers are 1-based, this is also the number
|
||||
* of items on the page.
|
||||
*
|
||||
* NOTE: if the page is not initialized (pd_lower == 0), we must
|
||||
* return zero to ensure sane behavior. Accept double evaluation
|
||||
* of the argument so that we can ensure this.
|
||||
*/
|
||||
#define PageGetMaxOffsetNumber(page) \
|
||||
(((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData ? 0 : \
|
||||
((((PageHeader) (page))->pd_lower - SizeOfPageHeaderData) \
|
||||
/ sizeof(ItemIdData)))
|
||||
|
||||
/*
|
||||
* Additional macros for access to page headers
|
||||
*/
|
||||
#define PageGetLSN(page) \
|
||||
(((PageHeader) (page))->pd_lsn)
|
||||
#define PageSetLSN(page, lsn) \
|
||||
(((PageHeader) (page))->pd_lsn = (lsn))
|
||||
|
||||
/* NOTE: only the 16 least significant bits are stored */
|
||||
#define PageGetTLI(page) \
|
||||
(((PageHeader) (page))->pd_tli)
|
||||
#define PageSetTLI(page, tli) \
|
||||
(((PageHeader) (page))->pd_tli = (uint16) (tli))
|
||||
|
||||
#define PageHasFreeLinePointers(page) \
|
||||
(((PageHeader) (page))->pd_flags & PD_HAS_FREE_LINES)
|
||||
#define PageSetHasFreeLinePointers(page) \
|
||||
(((PageHeader) (page))->pd_flags |= PD_HAS_FREE_LINES)
|
||||
#define PageClearHasFreeLinePointers(page) \
|
||||
(((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES)
|
||||
|
||||
#define PageIsFull(page) \
|
||||
(((PageHeader) (page))->pd_flags & PD_PAGE_FULL)
|
||||
#define PageSetFull(page) \
|
||||
(((PageHeader) (page))->pd_flags |= PD_PAGE_FULL)
|
||||
#define PageClearFull(page) \
|
||||
(((PageHeader) (page))->pd_flags &= ~PD_PAGE_FULL)
|
||||
|
||||
#define PageIsAllVisible(page) \
|
||||
(((PageHeader) (page))->pd_flags & PD_ALL_VISIBLE)
|
||||
#define PageSetAllVisible(page) \
|
||||
(((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE)
|
||||
#define PageClearAllVisible(page) \
|
||||
(((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
|
||||
|
||||
#define PageIsPrunable(page, oldestxmin) \
|
||||
( \
|
||||
AssertMacro(TransactionIdIsNormal(oldestxmin)), \
|
||||
TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) && \
|
||||
TransactionIdPrecedes(((PageHeader) (page))->pd_prune_xid, oldestxmin) \
|
||||
)
|
||||
#define PageSetPrunable(page, xid) \
|
||||
do { \
|
||||
Assert(TransactionIdIsNormal(xid)); \
|
||||
if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
|
||||
TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
|
||||
((PageHeader) (page))->pd_prune_xid = (xid); \
|
||||
} while (0)
|
||||
#define PageClearPrunable(page) \
|
||||
(((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* extern declarations
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
|
||||
extern void PageInit(Page page, Size pageSize, Size specialSize);
|
||||
extern bool PageHeaderIsValid(PageHeader page);
|
||||
extern OffsetNumber PageAddItem(Page page, Item item, Size size,
|
||||
OffsetNumber offsetNumber, bool overwrite, bool is_heap);
|
||||
extern Page PageGetTempPage(Page page);
|
||||
extern Page PageGetTempPageCopy(Page page);
|
||||
extern Page PageGetTempPageCopySpecial(Page page);
|
||||
extern void PageRestoreTempPage(Page tempPage, Page oldPage);
|
||||
extern void PageRepairFragmentation(Page page);
|
||||
extern Size PageGetFreeSpace(Page page);
|
||||
extern Size PageGetExactFreeSpace(Page page);
|
||||
extern Size PageGetHeapFreeSpace(Page page);
|
||||
extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
|
||||
extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
|
||||
|
||||
#endif /* BUFPAGE_H */
|
||||
19
pg_include/storage/copydir.h
Executable file
19
pg_include/storage/copydir.h
Executable file
@@ -0,0 +1,19 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* copydir.h
|
||||
* Copy a directory.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/copydir.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef COPYDIR_H
|
||||
#define COPYDIR_H
|
||||
|
||||
extern void copydir(char *fromdir, char *todir, bool recurse);
|
||||
extern void copy_file(char *fromfile, char *tofile);
|
||||
|
||||
#endif /* COPYDIR_H */
|
||||
112
pg_include/storage/fd.h
Executable file
112
pg_include/storage/fd.h
Executable file
@@ -0,0 +1,112 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* fd.h
|
||||
* Virtual file descriptor definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/fd.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* calls:
|
||||
*
|
||||
* File {Close, Read, Write, Seek, Tell, Sync}
|
||||
* {File Name Open, Allocate, Free} File
|
||||
*
|
||||
* These are NOT JUST RENAMINGS OF THE UNIX ROUTINES.
|
||||
* Use them for all file activity...
|
||||
*
|
||||
* File fd;
|
||||
* fd = FilePathOpenFile("foo", O_RDONLY, 0600);
|
||||
*
|
||||
* AllocateFile();
|
||||
* FreeFile();
|
||||
*
|
||||
* Use AllocateFile, not fopen, if you need a stdio file (FILE*); then
|
||||
* use FreeFile, not fclose, to close it. AVOID using stdio for files
|
||||
* that you intend to hold open for any length of time, since there is
|
||||
* no way for them to share kernel file descriptors with other files.
|
||||
*
|
||||
* Likewise, use AllocateDir/FreeDir, not opendir/closedir, to allocate
|
||||
* open directories (DIR*).
|
||||
*/
|
||||
#ifndef FD_H
|
||||
#define FD_H
|
||||
|
||||
#include <dirent.h>
|
||||
|
||||
|
||||
/*
|
||||
* FileSeek uses the standard UNIX lseek(2) flags.
|
||||
*/
|
||||
|
||||
typedef char *FileName;
|
||||
|
||||
typedef int File;
|
||||
|
||||
|
||||
/* GUC parameter */
|
||||
extern int max_files_per_process;
|
||||
|
||||
/*
|
||||
* This is private to fd.c, but exported for save/restore_backend_variables()
|
||||
*/
|
||||
extern int max_safe_fds;
|
||||
|
||||
|
||||
/*
|
||||
* prototypes for functions in fd.c
|
||||
*/
|
||||
|
||||
/* Operations on virtual Files --- equivalent to Unix kernel file ops */
|
||||
extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
|
||||
extern File OpenTemporaryFile(bool interXact);
|
||||
extern void FileClose(File file);
|
||||
extern int FilePrefetch(File file, off_t offset, int amount);
|
||||
extern int FileRead(File file, char *buffer, int amount);
|
||||
extern int FileWrite(File file, char *buffer, int amount);
|
||||
extern int FileSync(File file);
|
||||
extern off_t FileSeek(File file, off_t offset, int whence);
|
||||
extern int FileTruncate(File file, off_t offset);
|
||||
extern char *FilePathName(File file);
|
||||
|
||||
/* Operations that allow use of regular stdio --- USE WITH CAUTION */
|
||||
extern FILE *AllocateFile(const char *name, const char *mode);
|
||||
extern int FreeFile(FILE *file);
|
||||
|
||||
/* Operations to allow use of the <dirent.h> library routines */
|
||||
extern DIR *AllocateDir(const char *dirname);
|
||||
extern struct dirent *ReadDir(DIR *dir, const char *dirname);
|
||||
extern int FreeDir(DIR *dir);
|
||||
|
||||
/* If you've really really gotta have a plain kernel FD, use this */
|
||||
extern int BasicOpenFile(FileName fileName, int fileFlags, int fileMode);
|
||||
|
||||
/* Miscellaneous support routines */
|
||||
extern void InitFileAccess(void);
|
||||
extern void set_max_safe_fds(void);
|
||||
extern void closeAllVfds(void);
|
||||
extern void SetTempTablespaces(Oid *tableSpaces, int numSpaces);
|
||||
extern bool TempTablespacesAreSet(void);
|
||||
extern Oid GetNextTempTableSpace(void);
|
||||
extern void AtEOXact_Files(void);
|
||||
extern void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
|
||||
SubTransactionId parentSubid);
|
||||
extern void RemovePgTempFiles(void);
|
||||
|
||||
extern int pg_fsync(int fd);
|
||||
extern int pg_fsync_no_writethrough(int fd);
|
||||
extern int pg_fsync_writethrough(int fd);
|
||||
extern int pg_fdatasync(int fd);
|
||||
extern int pg_flush_data(int fd, off_t offset, off_t amount);
|
||||
|
||||
/* Filename components for OpenTemporaryFile */
|
||||
#define PG_TEMP_FILES_DIR "pgsql_tmp"
|
||||
#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
|
||||
|
||||
#endif /* FD_H */
|
||||
36
pg_include/storage/freespace.h
Executable file
36
pg_include/storage/freespace.h
Executable file
@@ -0,0 +1,36 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* freespace.h
|
||||
* POSTGRES free space map for quickly finding free space in relations
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/freespace.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef FREESPACE_H_
|
||||
#define FREESPACE_H_
|
||||
|
||||
#include "storage/block.h"
|
||||
#include "storage/relfilenode.h"
|
||||
#include "utils/relcache.h"
|
||||
|
||||
/* prototypes for public functions in freespace.c */
|
||||
extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk);
|
||||
extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded);
|
||||
extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel,
|
||||
BlockNumber oldPage,
|
||||
Size oldSpaceAvail,
|
||||
Size spaceNeeded);
|
||||
extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
|
||||
Size spaceAvail);
|
||||
extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
|
||||
Size spaceAvail);
|
||||
|
||||
extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks);
|
||||
extern void FreeSpaceMapVacuum(Relation rel);
|
||||
|
||||
#endif /* FREESPACE_H_ */
|
||||
72
pg_include/storage/fsm_internals.h
Executable file
72
pg_include/storage/fsm_internals.h
Executable file
@@ -0,0 +1,72 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* fsm_internal.h
|
||||
* internal functions for free space map
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/fsm_internals.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef FSM_INTERNALS_H
|
||||
#define FSM_INTERNALS_H
|
||||
|
||||
#include "storage/buf.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
/*
|
||||
* Structure of a FSM page. See src/backend/storage/freespace/README for
|
||||
* details.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
/*
|
||||
* fsm_search_avail() tries to spread the load of multiple backends by
|
||||
* returning different pages to different backends in a round-robin
|
||||
* fashion. fp_next_slot points to the next slot to be returned (assuming
|
||||
* there's enough space on it for the request). It's defined as an int,
|
||||
* because it's updated without an exclusive lock. uint16 would be more
|
||||
* appropriate, but int is more likely to be atomically
|
||||
* fetchable/storable.
|
||||
*/
|
||||
int fp_next_slot;
|
||||
|
||||
/*
|
||||
* fp_nodes contains the binary tree, stored in array. The first
|
||||
* NonLeafNodesPerPage elements are upper nodes, and the following
|
||||
* LeafNodesPerPage elements are leaf nodes. Unused nodes are zero.
|
||||
*/
|
||||
uint8 fp_nodes[1];
|
||||
} FSMPageData;
|
||||
|
||||
typedef FSMPageData *FSMPage;
|
||||
|
||||
/*
|
||||
* Number of non-leaf and leaf nodes, and nodes in total, on an FSM page.
|
||||
* These definitions are internal to fsmpage.c.
|
||||
*/
|
||||
#define NodesPerPage (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \
|
||||
offsetof(FSMPageData, fp_nodes))
|
||||
|
||||
#define NonLeafNodesPerPage (BLCKSZ / 2 - 1)
|
||||
#define LeafNodesPerPage (NodesPerPage - NonLeafNodesPerPage)
|
||||
|
||||
/*
|
||||
* Number of FSM "slots" on a FSM page. This is what should be used
|
||||
* outside fsmpage.c.
|
||||
*/
|
||||
#define SlotsPerFSMPage LeafNodesPerPage
|
||||
|
||||
/* Prototypes for functions in fsmpage.c */
|
||||
extern int fsm_search_avail(Buffer buf, uint8 min_cat, bool advancenext,
|
||||
bool exclusive_lock_held);
|
||||
extern uint8 fsm_get_avail(Page page, int slot);
|
||||
extern uint8 fsm_get_max_avail(Page page);
|
||||
extern bool fsm_set_avail(Page page, int slot, uint8 value);
|
||||
extern bool fsm_truncate_avail(Page page, int nslots);
|
||||
extern bool fsm_rebuild_page(Page page);
|
||||
|
||||
#endif /* FSM_INTERNALS_H */
|
||||
26
pg_include/storage/indexfsm.h
Executable file
26
pg_include/storage/indexfsm.h
Executable file
@@ -0,0 +1,26 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* indexfsm.h
|
||||
* POSTGRES free space map for quickly finding an unused page in index
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/indexfsm.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef INDEXFSM_H_
|
||||
#define INDEXFSM_H_
|
||||
|
||||
#include "storage/block.h"
|
||||
#include "utils/relcache.h"
|
||||
|
||||
extern BlockNumber GetFreeIndexPage(Relation rel);
|
||||
extern void RecordFreeIndexPage(Relation rel, BlockNumber page);
|
||||
extern void RecordUsedIndexPage(Relation rel, BlockNumber page);
|
||||
|
||||
extern void IndexFreeSpaceMapVacuum(Relation rel);
|
||||
|
||||
#endif /* INDEXFSM_H_ */
|
||||
79
pg_include/storage/ipc.h
Executable file
79
pg_include/storage/ipc.h
Executable file
@@ -0,0 +1,79 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* ipc.h
|
||||
* POSTGRES inter-process communication definitions.
|
||||
*
|
||||
* This file is misnamed, as it no longer has much of anything directly
|
||||
* to do with IPC. The functionality here is concerned with managing
|
||||
* exit-time cleanup for either a postmaster or a backend.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/ipc.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef IPC_H
|
||||
#define IPC_H
|
||||
|
||||
typedef void (*pg_on_exit_callback) (int code, Datum arg);
|
||||
typedef void (*shmem_startup_hook_type) (void);
|
||||
|
||||
/*----------
|
||||
* API for handling cleanup that must occur during either ereport(ERROR)
|
||||
* or ereport(FATAL) exits from a block of code. (Typical examples are
|
||||
* undoing transient changes to shared-memory state.)
|
||||
*
|
||||
* PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg);
|
||||
* {
|
||||
* ... code that might throw ereport(ERROR) or ereport(FATAL) ...
|
||||
* }
|
||||
* PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg);
|
||||
*
|
||||
* where the cleanup code is in a function declared per pg_on_exit_callback.
|
||||
* The Datum value "arg" can carry any information the cleanup function
|
||||
* needs.
|
||||
*
|
||||
* This construct ensures that cleanup_function() will be called during
|
||||
* either ERROR or FATAL exits. It will not be called on successful
|
||||
* exit from the controlled code. (If you want it to happen then too,
|
||||
* call the function yourself from just after the construct.)
|
||||
*
|
||||
* Note: the macro arguments are multiply evaluated, so avoid side-effects.
|
||||
*----------
|
||||
*/
|
||||
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg) \
|
||||
do { \
|
||||
on_shmem_exit(cleanup_function, arg); \
|
||||
PG_TRY()
|
||||
|
||||
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg) \
|
||||
cancel_shmem_exit(cleanup_function, arg); \
|
||||
PG_CATCH(); \
|
||||
{ \
|
||||
cancel_shmem_exit(cleanup_function, arg); \
|
||||
cleanup_function (0, arg); \
|
||||
PG_RE_THROW(); \
|
||||
} \
|
||||
PG_END_TRY(); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* ipc.c */
|
||||
extern bool proc_exit_inprogress;
|
||||
|
||||
extern void proc_exit(int code);
|
||||
extern void shmem_exit(int code);
|
||||
extern void on_proc_exit(pg_on_exit_callback function, Datum arg);
|
||||
extern void on_shmem_exit(pg_on_exit_callback function, Datum arg);
|
||||
extern void cancel_shmem_exit(pg_on_exit_callback function, Datum arg);
|
||||
extern void on_exit_reset(void);
|
||||
|
||||
/* ipci.c */
|
||||
extern PGDLLIMPORT shmem_startup_hook_type shmem_startup_hook;
|
||||
|
||||
extern void CreateSharedMemoryAndSemaphores(bool makePrivate, int port);
|
||||
|
||||
#endif /* IPC_H */
|
||||
19
pg_include/storage/item.h
Executable file
19
pg_include/storage/item.h
Executable file
@@ -0,0 +1,19 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* item.h
|
||||
* POSTGRES disk item definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/item.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef ITEM_H
|
||||
#define ITEM_H
|
||||
|
||||
typedef Pointer Item;
|
||||
|
||||
#endif /* ITEM_H */
|
||||
183
pg_include/storage/itemid.h
Executable file
183
pg_include/storage/itemid.h
Executable file
@@ -0,0 +1,183 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* itemid.h
|
||||
* Standard POSTGRES buffer page item identifier definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/itemid.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef ITEMID_H
|
||||
#define ITEMID_H
|
||||
|
||||
/*
|
||||
* An item pointer (also called line pointer) on a buffer page
|
||||
*
|
||||
* In some cases an item pointer is "in use" but does not have any associated
|
||||
* storage on the page. By convention, lp_len == 0 in every item pointer
|
||||
* that does not have storage, independently of its lp_flags state.
|
||||
*/
|
||||
typedef struct ItemIdData
|
||||
{
|
||||
unsigned lp_off:15, /* offset to tuple (from start of page) */
|
||||
lp_flags:2, /* state of item pointer, see below */
|
||||
lp_len:15; /* byte length of tuple */
|
||||
} ItemIdData;
|
||||
|
||||
typedef ItemIdData *ItemId;
|
||||
|
||||
/*
|
||||
* lp_flags has these possible states. An UNUSED line pointer is available
|
||||
* for immediate re-use, the other states are not.
|
||||
*/
|
||||
#define LP_UNUSED 0 /* unused (should always have lp_len=0) */
|
||||
#define LP_NORMAL 1 /* used (should always have lp_len>0) */
|
||||
#define LP_REDIRECT 2 /* HOT redirect (should have lp_len=0) */
|
||||
#define LP_DEAD 3 /* dead, may or may not have storage */
|
||||
|
||||
/*
|
||||
* Item offsets and lengths are represented by these types when
|
||||
* they're not actually stored in an ItemIdData.
|
||||
*/
|
||||
typedef uint16 ItemOffset;
|
||||
typedef uint16 ItemLength;
|
||||
|
||||
|
||||
/* ----------------
|
||||
* support macros
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* ItemIdGetLength
|
||||
*/
|
||||
#define ItemIdGetLength(itemId) \
|
||||
((itemId)->lp_len)
|
||||
|
||||
/*
|
||||
* ItemIdGetOffset
|
||||
*/
|
||||
#define ItemIdGetOffset(itemId) \
|
||||
((itemId)->lp_off)
|
||||
|
||||
/*
|
||||
* ItemIdGetFlags
|
||||
*/
|
||||
#define ItemIdGetFlags(itemId) \
|
||||
((itemId)->lp_flags)
|
||||
|
||||
/*
|
||||
* ItemIdGetRedirect
|
||||
* In a REDIRECT pointer, lp_off holds the link to the next item pointer
|
||||
*/
|
||||
#define ItemIdGetRedirect(itemId) \
|
||||
((itemId)->lp_off)
|
||||
|
||||
/*
|
||||
* ItemIdIsValid
|
||||
* True iff item identifier is valid.
|
||||
* This is a pretty weak test, probably useful only in Asserts.
|
||||
*/
|
||||
#define ItemIdIsValid(itemId) PointerIsValid(itemId)
|
||||
|
||||
/*
|
||||
* ItemIdIsUsed
|
||||
* True iff item identifier is in use.
|
||||
*/
|
||||
#define ItemIdIsUsed(itemId) \
|
||||
((itemId)->lp_flags != LP_UNUSED)
|
||||
|
||||
/*
|
||||
* ItemIdIsNormal
|
||||
* True iff item identifier is in state NORMAL.
|
||||
*/
|
||||
#define ItemIdIsNormal(itemId) \
|
||||
((itemId)->lp_flags == LP_NORMAL)
|
||||
|
||||
/*
|
||||
* ItemIdIsRedirected
|
||||
* True iff item identifier is in state REDIRECT.
|
||||
*/
|
||||
#define ItemIdIsRedirected(itemId) \
|
||||
((itemId)->lp_flags == LP_REDIRECT)
|
||||
|
||||
/*
|
||||
* ItemIdIsDead
|
||||
* True iff item identifier is in state DEAD.
|
||||
*/
|
||||
#define ItemIdIsDead(itemId) \
|
||||
((itemId)->lp_flags == LP_DEAD)
|
||||
|
||||
/*
|
||||
* ItemIdHasStorage
|
||||
* True iff item identifier has associated storage.
|
||||
*/
|
||||
#define ItemIdHasStorage(itemId) \
|
||||
((itemId)->lp_len != 0)
|
||||
|
||||
/*
|
||||
* ItemIdSetUnused
|
||||
* Set the item identifier to be UNUSED, with no storage.
|
||||
* Beware of multiple evaluations of itemId!
|
||||
*/
|
||||
#define ItemIdSetUnused(itemId) \
|
||||
( \
|
||||
(itemId)->lp_flags = LP_UNUSED, \
|
||||
(itemId)->lp_off = 0, \
|
||||
(itemId)->lp_len = 0 \
|
||||
)
|
||||
|
||||
/*
|
||||
* ItemIdSetNormal
|
||||
* Set the item identifier to be NORMAL, with the specified storage.
|
||||
* Beware of multiple evaluations of itemId!
|
||||
*/
|
||||
#define ItemIdSetNormal(itemId, off, len) \
|
||||
( \
|
||||
(itemId)->lp_flags = LP_NORMAL, \
|
||||
(itemId)->lp_off = (off), \
|
||||
(itemId)->lp_len = (len) \
|
||||
)
|
||||
|
||||
/*
|
||||
* ItemIdSetRedirect
|
||||
* Set the item identifier to be REDIRECT, with the specified link.
|
||||
* Beware of multiple evaluations of itemId!
|
||||
*/
|
||||
#define ItemIdSetRedirect(itemId, link) \
|
||||
( \
|
||||
(itemId)->lp_flags = LP_REDIRECT, \
|
||||
(itemId)->lp_off = (link), \
|
||||
(itemId)->lp_len = 0 \
|
||||
)
|
||||
|
||||
/*
|
||||
* ItemIdSetDead
|
||||
* Set the item identifier to be DEAD, with no storage.
|
||||
* Beware of multiple evaluations of itemId!
|
||||
*/
|
||||
#define ItemIdSetDead(itemId) \
|
||||
( \
|
||||
(itemId)->lp_flags = LP_DEAD, \
|
||||
(itemId)->lp_off = 0, \
|
||||
(itemId)->lp_len = 0 \
|
||||
)
|
||||
|
||||
/*
|
||||
* ItemIdMarkDead
|
||||
* Set the item identifier to be DEAD, keeping its existing storage.
|
||||
*
|
||||
* Note: in indexes, this is used as if it were a hint-bit mechanism;
|
||||
* we trust that multiple processors can do this in parallel and get
|
||||
* the same result.
|
||||
*/
|
||||
#define ItemIdMarkDead(itemId) \
|
||||
( \
|
||||
(itemId)->lp_flags = LP_DEAD \
|
||||
)
|
||||
|
||||
#endif /* ITEMID_H */
|
||||
146
pg_include/storage/itemptr.h
Executable file
146
pg_include/storage/itemptr.h
Executable file
@@ -0,0 +1,146 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* itemptr.h
|
||||
* POSTGRES disk item pointer definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/itemptr.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef ITEMPTR_H
|
||||
#define ITEMPTR_H
|
||||
|
||||
#include "storage/block.h"
|
||||
#include "storage/off.h"
|
||||
|
||||
/*
|
||||
* ItemPointer:
|
||||
*
|
||||
* This is a pointer to an item within a disk page of a known file
|
||||
* (for example, a cross-link from an index to its parent table).
|
||||
* blkid tells us which block, posid tells us which entry in the linp
|
||||
* (ItemIdData) array we want.
|
||||
*
|
||||
* Note: because there is an item pointer in each tuple header and index
|
||||
* tuple header on disk, it's very important not to waste space with
|
||||
* structure padding bytes. The struct is designed to be six bytes long
|
||||
* (it contains three int16 fields) but a few compilers will pad it to
|
||||
* eight bytes unless coerced. We apply appropriate persuasion where
|
||||
* possible, and to cope with unpersuadable compilers, we try to use
|
||||
* "SizeOfIptrData" rather than "sizeof(ItemPointerData)" when computing
|
||||
* on-disk sizes.
|
||||
*/
|
||||
typedef struct ItemPointerData
|
||||
{
|
||||
BlockIdData ip_blkid;
|
||||
OffsetNumber ip_posid;
|
||||
}
|
||||
|
||||
#ifdef __arm__
|
||||
__attribute__((packed)) /* Appropriate whack upside the head for ARM */
|
||||
#endif
|
||||
ItemPointerData;
|
||||
|
||||
#define SizeOfIptrData \
|
||||
(offsetof(ItemPointerData, ip_posid) + sizeof(OffsetNumber))
|
||||
|
||||
typedef ItemPointerData *ItemPointer;
|
||||
|
||||
/* ----------------
|
||||
* support macros
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* ItemPointerIsValid
|
||||
* True iff the disk item pointer is not NULL.
|
||||
*/
|
||||
#define ItemPointerIsValid(pointer) \
|
||||
((bool) (PointerIsValid(pointer) && ((pointer)->ip_posid != 0)))
|
||||
|
||||
/*
|
||||
* ItemPointerGetBlockNumber
|
||||
* Returns the block number of a disk item pointer.
|
||||
*/
|
||||
#define ItemPointerGetBlockNumber(pointer) \
|
||||
( \
|
||||
AssertMacro(ItemPointerIsValid(pointer)), \
|
||||
BlockIdGetBlockNumber(&(pointer)->ip_blkid) \
|
||||
)
|
||||
|
||||
/*
|
||||
* ItemPointerGetOffsetNumber
|
||||
* Returns the offset number of a disk item pointer.
|
||||
*/
|
||||
#define ItemPointerGetOffsetNumber(pointer) \
|
||||
( \
|
||||
AssertMacro(ItemPointerIsValid(pointer)), \
|
||||
(pointer)->ip_posid \
|
||||
)
|
||||
|
||||
/*
|
||||
* ItemPointerSet
|
||||
* Sets a disk item pointer to the specified block and offset.
|
||||
*/
|
||||
#define ItemPointerSet(pointer, blockNumber, offNum) \
|
||||
( \
|
||||
AssertMacro(PointerIsValid(pointer)), \
|
||||
BlockIdSet(&((pointer)->ip_blkid), blockNumber), \
|
||||
(pointer)->ip_posid = offNum \
|
||||
)
|
||||
|
||||
/*
|
||||
* ItemPointerSetBlockNumber
|
||||
* Sets a disk item pointer to the specified block.
|
||||
*/
|
||||
#define ItemPointerSetBlockNumber(pointer, blockNumber) \
|
||||
( \
|
||||
AssertMacro(PointerIsValid(pointer)), \
|
||||
BlockIdSet(&((pointer)->ip_blkid), blockNumber) \
|
||||
)
|
||||
|
||||
/*
|
||||
* ItemPointerSetOffsetNumber
|
||||
* Sets a disk item pointer to the specified offset.
|
||||
*/
|
||||
#define ItemPointerSetOffsetNumber(pointer, offsetNumber) \
|
||||
( \
|
||||
AssertMacro(PointerIsValid(pointer)), \
|
||||
(pointer)->ip_posid = (offsetNumber) \
|
||||
)
|
||||
|
||||
/*
|
||||
* ItemPointerCopy
|
||||
* Copies the contents of one disk item pointer to another.
|
||||
*/
|
||||
#define ItemPointerCopy(fromPointer, toPointer) \
|
||||
( \
|
||||
AssertMacro(PointerIsValid(toPointer)), \
|
||||
AssertMacro(PointerIsValid(fromPointer)), \
|
||||
*(toPointer) = *(fromPointer) \
|
||||
)
|
||||
|
||||
/*
|
||||
* ItemPointerSetInvalid
|
||||
* Sets a disk item pointer to be invalid.
|
||||
*/
|
||||
#define ItemPointerSetInvalid(pointer) \
|
||||
( \
|
||||
AssertMacro(PointerIsValid(pointer)), \
|
||||
BlockIdSet(&((pointer)->ip_blkid), InvalidBlockNumber), \
|
||||
(pointer)->ip_posid = InvalidOffsetNumber \
|
||||
)
|
||||
|
||||
/* ----------------
|
||||
* externs
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
extern bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2);
|
||||
extern int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2);
|
||||
|
||||
#endif /* ITEMPTR_H */
|
||||
83
pg_include/storage/large_object.h
Executable file
83
pg_include/storage/large_object.h
Executable file
@@ -0,0 +1,83 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* large_object.h
|
||||
* Declarations for PostgreSQL large objects. POSTGRES 4.2 supported
|
||||
* zillions of large objects (internal, external, jaquith, inversion).
|
||||
* Now we only support inversion.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/large_object.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef LARGE_OBJECT_H
|
||||
#define LARGE_OBJECT_H
|
||||
|
||||
#include "utils/snapshot.h"
|
||||
|
||||
|
||||
/*----------
|
||||
* Data about a currently-open large object.
|
||||
*
|
||||
* id is the logical OID of the large object
|
||||
* snapshot is the snapshot to use for read/write operations
|
||||
* subid is the subtransaction that opened the desc (or currently owns it)
|
||||
* offset is the current seek offset within the LO
|
||||
* flags contains some flag bits
|
||||
*
|
||||
* NOTE: before 7.1, we also had to store references to the separate table
|
||||
* and index of a specific large object. Now they all live in pg_largeobject
|
||||
* and are accessed via a common relation descriptor.
|
||||
*----------
|
||||
*/
|
||||
typedef struct LargeObjectDesc
|
||||
{
|
||||
Oid id; /* LO's identifier */
|
||||
Snapshot snapshot; /* snapshot to use */
|
||||
SubTransactionId subid; /* owning subtransaction ID */
|
||||
uint32 offset; /* current seek pointer */
|
||||
int flags; /* locking info, etc */
|
||||
|
||||
/* flag bits: */
|
||||
#define IFS_RDLOCK (1 << 0)
|
||||
#define IFS_WRLOCK (1 << 1)
|
||||
|
||||
} LargeObjectDesc;
|
||||
|
||||
|
||||
/*
|
||||
* Each "page" (tuple) of a large object can hold this much data
|
||||
*
|
||||
* We could set this as high as BLCKSZ less some overhead, but it seems
|
||||
* better to make it a smaller value, so that not as much space is used
|
||||
* up when a page-tuple is updated. Note that the value is deliberately
|
||||
* chosen large enough to trigger the tuple toaster, so that we will
|
||||
* attempt to compress page tuples in-line. (But they won't be moved off
|
||||
* unless the user creates a toast-table for pg_largeobject...)
|
||||
*
|
||||
* Also, it seems to be a smart move to make the page size be a power of 2,
|
||||
* since clients will often be written to send data in power-of-2 blocks.
|
||||
* This avoids unnecessary tuple updates caused by partial-page writes.
|
||||
*/
|
||||
#define LOBLKSIZE (BLCKSZ / 4)
|
||||
|
||||
|
||||
/*
|
||||
* Function definitions...
|
||||
*/
|
||||
|
||||
/* inversion stuff in inv_api.c */
|
||||
extern void close_lo_relation(bool isCommit);
|
||||
extern Oid inv_create(Oid lobjId);
|
||||
extern LargeObjectDesc *inv_open(Oid lobjId, int flags, MemoryContext mcxt);
|
||||
extern void inv_close(LargeObjectDesc *obj_desc);
|
||||
extern int inv_drop(Oid lobjId);
|
||||
extern int inv_seek(LargeObjectDesc *obj_desc, int offset, int whence);
|
||||
extern int inv_tell(LargeObjectDesc *obj_desc);
|
||||
extern int inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes);
|
||||
extern int inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes);
|
||||
extern void inv_truncate(LargeObjectDesc *obj_desc, int len);
|
||||
|
||||
#endif /* LARGE_OBJECT_H */
|
||||
139
pg_include/storage/latch.h
Executable file
139
pg_include/storage/latch.h
Executable file
@@ -0,0 +1,139 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* latch.h
|
||||
* Routines for interprocess latches
|
||||
*
|
||||
* A latch is a boolean variable, with operations that let processes sleep
|
||||
* until it is set. A latch can be set from another process, or a signal
|
||||
* handler within the same process.
|
||||
*
|
||||
* The latch interface is a reliable replacement for the common pattern of
|
||||
* using pg_usleep() or select() to wait until a signal arrives, where the
|
||||
* signal handler sets a flag variable. Because on some platforms an
|
||||
* incoming signal doesn't interrupt sleep, and even on platforms where it
|
||||
* does there is a race condition if the signal arrives just before
|
||||
* entering the sleep, the common pattern must periodically wake up and
|
||||
* poll the flag variable. The pselect() system call was invented to solve
|
||||
* this problem, but it is not portable enough. Latches are designed to
|
||||
* overcome these limitations, allowing you to sleep without polling and
|
||||
* ensuring quick response to signals from other processes.
|
||||
*
|
||||
* There are two kinds of latches: local and shared. A local latch is
|
||||
* initialized by InitLatch, and can only be set from the same process.
|
||||
* A local latch can be used to wait for a signal to arrive, by calling
|
||||
* SetLatch in the signal handler. A shared latch resides in shared memory,
|
||||
* and must be initialized at postmaster startup by InitSharedLatch. Before
|
||||
* a shared latch can be waited on, it must be associated with a process
|
||||
* with OwnLatch. Only the process owning the latch can wait on it, but any
|
||||
* process can set it.
|
||||
*
|
||||
* There are three basic operations on a latch:
|
||||
*
|
||||
* SetLatch - Sets the latch
|
||||
* ResetLatch - Clears the latch, allowing it to be set again
|
||||
* WaitLatch - Waits for the latch to become set
|
||||
*
|
||||
* WaitLatch includes a provision for timeouts (which should be avoided
|
||||
* when possible, as they incur extra overhead) and a provision for
|
||||
* postmaster child processes to wake up immediately on postmaster death.
|
||||
* See unix_latch.c for detailed specifications for the exported functions.
|
||||
*
|
||||
* The correct pattern to wait for event(s) is:
|
||||
*
|
||||
* for (;;)
|
||||
* {
|
||||
* ResetLatch();
|
||||
* if (work to do)
|
||||
* Do Stuff();
|
||||
* WaitLatch();
|
||||
* }
|
||||
*
|
||||
* It's important to reset the latch *before* checking if there's work to
|
||||
* do. Otherwise, if someone sets the latch between the check and the
|
||||
* ResetLatch call, you will miss it and Wait will incorrectly block.
|
||||
*
|
||||
* To wake up the waiter, you must first set a global flag or something
|
||||
* else that the wait loop tests in the "if (work to do)" part, and call
|
||||
* SetLatch *after* that. SetLatch is designed to return quickly if the
|
||||
* latch is already set.
|
||||
*
|
||||
* Presently, when using a shared latch for interprocess signalling, the
|
||||
* flag variable(s) set by senders and inspected by the wait loop must
|
||||
* be protected by spinlocks or LWLocks, else it is possible to miss events
|
||||
* on machines with weak memory ordering (such as PPC). This restriction
|
||||
* will be lifted in future by inserting suitable memory barriers into
|
||||
* SetLatch and ResetLatch.
|
||||
*
|
||||
* On some platforms, signals will not interrupt the latch wait primitive
|
||||
* by themselves. Therefore, it is critical that any signal handler that
|
||||
* is meant to terminate a WaitLatch wait calls SetLatch.
|
||||
*
|
||||
* Note that use of the process latch (PGPROC.procLatch) is generally better
|
||||
* than an ad-hoc shared latch for signaling auxiliary processes. This is
|
||||
* because generic signal handlers will call SetLatch on the process latch
|
||||
* only, so using any latch other than the process latch effectively precludes
|
||||
* use of any generic handler.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/latch.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef LATCH_H
|
||||
#define LATCH_H
|
||||
|
||||
#include <signal.h>
|
||||
|
||||
/*
|
||||
* Latch structure should be treated as opaque and only accessed through
|
||||
* the public functions. It is defined here to allow embedding Latches as
|
||||
* part of bigger structs.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
sig_atomic_t is_set;
|
||||
bool is_shared;
|
||||
int owner_pid;
|
||||
#ifdef WIN32
|
||||
HANDLE event;
|
||||
#endif
|
||||
} Latch;
|
||||
|
||||
/* Bitmasks for events that may wake-up WaitLatch() clients */
|
||||
#define WL_LATCH_SET (1 << 0)
|
||||
#define WL_SOCKET_READABLE (1 << 1)
|
||||
#define WL_SOCKET_WRITEABLE (1 << 2)
|
||||
#define WL_TIMEOUT (1 << 3)
|
||||
#define WL_POSTMASTER_DEATH (1 << 4)
|
||||
|
||||
/*
|
||||
* prototypes for functions in latch.c
|
||||
*/
|
||||
extern void InitializeLatchSupport(void);
|
||||
extern void InitLatch(volatile Latch *latch);
|
||||
extern void InitSharedLatch(volatile Latch *latch);
|
||||
extern void OwnLatch(volatile Latch *latch);
|
||||
extern void DisownLatch(volatile Latch *latch);
|
||||
extern int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout);
|
||||
extern int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents,
|
||||
pgsocket sock, long timeout);
|
||||
extern void SetLatch(volatile Latch *latch);
|
||||
extern void ResetLatch(volatile Latch *latch);
|
||||
|
||||
/* beware of memory ordering issues if you use this macro! */
|
||||
#define TestLatch(latch) (((volatile Latch *) (latch))->is_set)
|
||||
|
||||
/*
|
||||
* Unix implementation uses SIGUSR1 for inter-process signaling.
|
||||
* Win32 doesn't need this.
|
||||
*/
|
||||
#ifndef WIN32
|
||||
extern void latch_sigusr1_handler(void);
|
||||
#else
|
||||
#define latch_sigusr1_handler() ((void) 0)
|
||||
#endif
|
||||
|
||||
#endif /* LATCH_H */
|
||||
80
pg_include/storage/lmgr.h
Executable file
80
pg_include/storage/lmgr.h
Executable file
@@ -0,0 +1,80 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* lmgr.h
|
||||
* POSTGRES lock manager definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/lmgr.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef LMGR_H
|
||||
#define LMGR_H
|
||||
|
||||
#include "lib/stringinfo.h"
|
||||
#include "storage/itemptr.h"
|
||||
#include "storage/lock.h"
|
||||
#include "utils/rel.h"
|
||||
|
||||
|
||||
extern void RelationInitLockInfo(Relation relation);
|
||||
|
||||
/* Lock a relation */
|
||||
extern void LockRelationOid(Oid relid, LOCKMODE lockmode);
|
||||
extern bool ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode);
|
||||
extern void UnlockRelationId(LockRelId *relid, LOCKMODE lockmode);
|
||||
extern void UnlockRelationOid(Oid relid, LOCKMODE lockmode);
|
||||
|
||||
extern void LockRelation(Relation relation, LOCKMODE lockmode);
|
||||
extern bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode);
|
||||
extern void UnlockRelation(Relation relation, LOCKMODE lockmode);
|
||||
extern bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode);
|
||||
|
||||
extern void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
|
||||
extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
|
||||
|
||||
/* Lock a relation for extension */
|
||||
extern void LockRelationForExtension(Relation relation, LOCKMODE lockmode);
|
||||
extern void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode);
|
||||
|
||||
/* Lock a page (currently only used within indexes) */
|
||||
extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
|
||||
extern bool ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
|
||||
extern void UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
|
||||
|
||||
/* Lock a tuple (see heap_lock_tuple before assuming you understand this) */
|
||||
extern void LockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode);
|
||||
extern bool ConditionalLockTuple(Relation relation, ItemPointer tid,
|
||||
LOCKMODE lockmode);
|
||||
extern void UnlockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode);
|
||||
|
||||
/* Lock an XID (used to wait for a transaction to finish) */
|
||||
extern void XactLockTableInsert(TransactionId xid);
|
||||
extern void XactLockTableDelete(TransactionId xid);
|
||||
extern void XactLockTableWait(TransactionId xid);
|
||||
extern bool ConditionalXactLockTableWait(TransactionId xid);
|
||||
|
||||
/* Lock a general object (other than a relation) of the current database */
|
||||
extern void LockDatabaseObject(Oid classid, Oid objid, uint16 objsubid,
|
||||
LOCKMODE lockmode);
|
||||
extern void UnlockDatabaseObject(Oid classid, Oid objid, uint16 objsubid,
|
||||
LOCKMODE lockmode);
|
||||
|
||||
/* Lock a shared-across-databases object (other than a relation) */
|
||||
extern void LockSharedObject(Oid classid, Oid objid, uint16 objsubid,
|
||||
LOCKMODE lockmode);
|
||||
extern void UnlockSharedObject(Oid classid, Oid objid, uint16 objsubid,
|
||||
LOCKMODE lockmode);
|
||||
|
||||
extern void LockSharedObjectForSession(Oid classid, Oid objid, uint16 objsubid,
|
||||
LOCKMODE lockmode);
|
||||
extern void UnlockSharedObjectForSession(Oid classid, Oid objid, uint16 objsubid,
|
||||
LOCKMODE lockmode);
|
||||
|
||||
/* Describe a locktag for error messages */
|
||||
extern void DescribeLockTag(StringInfo buf, const LOCKTAG *tag);
|
||||
|
||||
#endif /* LMGR_H */
|
||||
552
pg_include/storage/lock.h
Executable file
552
pg_include/storage/lock.h
Executable file
@@ -0,0 +1,552 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* lock.h
|
||||
* POSTGRES low-level lock mechanism
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/lock.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef LOCK_H_
|
||||
#define LOCK_H_
|
||||
|
||||
#include "storage/backendid.h"
|
||||
#include "storage/lwlock.h"
|
||||
#include "storage/shmem.h"
|
||||
|
||||
|
||||
/* struct PGPROC is declared in proc.h, but must forward-reference it */
|
||||
typedef struct PGPROC PGPROC;
|
||||
|
||||
typedef struct PROC_QUEUE
|
||||
{
|
||||
SHM_QUEUE links; /* head of list of PGPROC objects */
|
||||
int size; /* number of entries in list */
|
||||
} PROC_QUEUE;
|
||||
|
||||
/* GUC variables */
|
||||
extern int max_locks_per_xact;
|
||||
|
||||
#ifdef LOCK_DEBUG
|
||||
extern int Trace_lock_oidmin;
|
||||
extern bool Trace_locks;
|
||||
extern bool Trace_userlocks;
|
||||
extern int Trace_lock_table;
|
||||
extern bool Debug_deadlocks;
|
||||
#endif /* LOCK_DEBUG */
|
||||
|
||||
|
||||
/*
|
||||
* Top-level transactions are identified by VirtualTransactionIDs comprising
|
||||
* the BackendId of the backend running the xact, plus a locally-assigned
|
||||
* LocalTransactionId. These are guaranteed unique over the short term,
|
||||
* but will be reused after a database restart; hence they should never
|
||||
* be stored on disk.
|
||||
*
|
||||
* Note that struct VirtualTransactionId can not be assumed to be atomically
|
||||
* assignable as a whole. However, type LocalTransactionId is assumed to
|
||||
* be atomically assignable, and the backend ID doesn't change often enough
|
||||
* to be a problem, so we can fetch or assign the two fields separately.
|
||||
* We deliberately refrain from using the struct within PGPROC, to prevent
|
||||
* coding errors from trying to use struct assignment with it; instead use
|
||||
* GET_VXID_FROM_PGPROC().
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
BackendId backendId; /* determined at backend startup */
|
||||
LocalTransactionId localTransactionId; /* backend-local transaction
|
||||
* id */
|
||||
} VirtualTransactionId;
|
||||
|
||||
#define InvalidLocalTransactionId 0
|
||||
#define LocalTransactionIdIsValid(lxid) ((lxid) != InvalidLocalTransactionId)
|
||||
#define VirtualTransactionIdIsValid(vxid) \
|
||||
(((vxid).backendId != InvalidBackendId) && \
|
||||
LocalTransactionIdIsValid((vxid).localTransactionId))
|
||||
#define VirtualTransactionIdEquals(vxid1, vxid2) \
|
||||
((vxid1).backendId == (vxid2).backendId && \
|
||||
(vxid1).localTransactionId == (vxid2).localTransactionId)
|
||||
#define SetInvalidVirtualTransactionId(vxid) \
|
||||
((vxid).backendId = InvalidBackendId, \
|
||||
(vxid).localTransactionId = InvalidLocalTransactionId)
|
||||
#define GET_VXID_FROM_PGPROC(vxid, proc) \
|
||||
((vxid).backendId = (proc).backendId, \
|
||||
(vxid).localTransactionId = (proc).lxid)
|
||||
|
||||
|
||||
/*
|
||||
* LOCKMODE is an integer (1..N) indicating a lock type. LOCKMASK is a bit
|
||||
* mask indicating a set of held or requested lock types (the bit 1<<mode
|
||||
* corresponds to a particular lock mode).
|
||||
*/
|
||||
typedef int LOCKMASK;
|
||||
typedef int LOCKMODE;
|
||||
|
||||
/* MAX_LOCKMODES cannot be larger than the # of bits in LOCKMASK */
|
||||
#define MAX_LOCKMODES 10
|
||||
|
||||
#define LOCKBIT_ON(lockmode) (1 << (lockmode))
|
||||
#define LOCKBIT_OFF(lockmode) (~(1 << (lockmode)))
|
||||
|
||||
|
||||
/*
|
||||
* This data structure defines the locking semantics associated with a
|
||||
* "lock method". The semantics specify the meaning of each lock mode
|
||||
* (by defining which lock modes it conflicts with).
|
||||
* All of this data is constant and is kept in const tables.
|
||||
*
|
||||
* numLockModes -- number of lock modes (READ,WRITE,etc) that
|
||||
* are defined in this lock method. Must be less than MAX_LOCKMODES.
|
||||
*
|
||||
* conflictTab -- this is an array of bitmasks showing lock
|
||||
* mode conflicts. conflictTab[i] is a mask with the j-th bit
|
||||
* turned on if lock modes i and j conflict. Lock modes are
|
||||
* numbered 1..numLockModes; conflictTab[0] is unused.
|
||||
*
|
||||
* lockModeNames -- ID strings for debug printouts.
|
||||
*
|
||||
* trace_flag -- pointer to GUC trace flag for this lock method. (The
|
||||
* GUC variable is not constant, but we use "const" here to denote that
|
||||
* it can't be changed through this reference.)
|
||||
*/
|
||||
typedef struct LockMethodData
|
||||
{
|
||||
int numLockModes;
|
||||
const LOCKMASK *conflictTab;
|
||||
const char *const * lockModeNames;
|
||||
const bool *trace_flag;
|
||||
} LockMethodData;
|
||||
|
||||
typedef const LockMethodData *LockMethod;
|
||||
|
||||
/*
|
||||
* Lock methods are identified by LOCKMETHODID. (Despite the declaration as
|
||||
* uint16, we are constrained to 256 lockmethods by the layout of LOCKTAG.)
|
||||
*/
|
||||
typedef uint16 LOCKMETHODID;
|
||||
|
||||
/* These identify the known lock methods */
|
||||
#define DEFAULT_LOCKMETHOD 1
|
||||
#define USER_LOCKMETHOD 2
|
||||
|
||||
/*
|
||||
* These are the valid values of type LOCKMODE for all the standard lock
|
||||
* methods (both DEFAULT and USER).
|
||||
*/
|
||||
|
||||
/* NoLock is not a lock mode, but a flag value meaning "don't get a lock" */
|
||||
#define NoLock 0
|
||||
|
||||
#define AccessShareLock 1 /* SELECT */
|
||||
#define RowShareLock 2 /* SELECT FOR UPDATE/FOR SHARE */
|
||||
#define RowExclusiveLock 3 /* INSERT, UPDATE, DELETE */
|
||||
#define ShareUpdateExclusiveLock 4 /* VACUUM (non-FULL),ANALYZE, CREATE
|
||||
* INDEX CONCURRENTLY */
|
||||
#define ShareLock 5 /* CREATE INDEX (WITHOUT CONCURRENTLY) */
|
||||
#define ShareRowExclusiveLock 6 /* like EXCLUSIVE MODE, but allows ROW
|
||||
* SHARE */
|
||||
#define ExclusiveLock 7 /* blocks ROW SHARE/SELECT...FOR
|
||||
* UPDATE */
|
||||
#define AccessExclusiveLock 8 /* ALTER TABLE, DROP TABLE, VACUUM
|
||||
* FULL, and unqualified LOCK TABLE */
|
||||
|
||||
|
||||
/*
|
||||
* LOCKTAG is the key information needed to look up a LOCK item in the
|
||||
* lock hashtable. A LOCKTAG value uniquely identifies a lockable object.
|
||||
*
|
||||
* The LockTagType enum defines the different kinds of objects we can lock.
|
||||
* We can handle up to 256 different LockTagTypes.
|
||||
*/
|
||||
typedef enum LockTagType
|
||||
{
|
||||
LOCKTAG_RELATION, /* whole relation */
|
||||
/* ID info for a relation is DB OID + REL OID; DB OID = 0 if shared */
|
||||
LOCKTAG_RELATION_EXTEND, /* the right to extend a relation */
|
||||
/* same ID info as RELATION */
|
||||
LOCKTAG_PAGE, /* one page of a relation */
|
||||
/* ID info for a page is RELATION info + BlockNumber */
|
||||
LOCKTAG_TUPLE, /* one physical tuple */
|
||||
/* ID info for a tuple is PAGE info + OffsetNumber */
|
||||
LOCKTAG_TRANSACTION, /* transaction (for waiting for xact done) */
|
||||
/* ID info for a transaction is its TransactionId */
|
||||
LOCKTAG_VIRTUALTRANSACTION, /* virtual transaction (ditto) */
|
||||
/* ID info for a virtual transaction is its VirtualTransactionId */
|
||||
LOCKTAG_OBJECT, /* non-relation database object */
|
||||
/* ID info for an object is DB OID + CLASS OID + OBJECT OID + SUBID */
|
||||
|
||||
/*
|
||||
* Note: object ID has same representation as in pg_depend and
|
||||
* pg_description, but notice that we are constraining SUBID to 16 bits.
|
||||
* Also, we use DB OID = 0 for shared objects such as tablespaces.
|
||||
*/
|
||||
LOCKTAG_USERLOCK, /* reserved for old contrib/userlock code */
|
||||
LOCKTAG_ADVISORY /* advisory user locks */
|
||||
} LockTagType;
|
||||
|
||||
#define LOCKTAG_LAST_TYPE LOCKTAG_ADVISORY
|
||||
|
||||
/*
|
||||
* The LOCKTAG struct is defined with malice aforethought to fit into 16
|
||||
* bytes with no padding. Note that this would need adjustment if we were
|
||||
* to widen Oid, BlockNumber, or TransactionId to more than 32 bits.
|
||||
*
|
||||
* We include lockmethodid in the locktag so that a single hash table in
|
||||
* shared memory can store locks of different lockmethods.
|
||||
*/
|
||||
typedef struct LOCKTAG
|
||||
{
|
||||
uint32 locktag_field1; /* a 32-bit ID field */
|
||||
uint32 locktag_field2; /* a 32-bit ID field */
|
||||
uint32 locktag_field3; /* a 32-bit ID field */
|
||||
uint16 locktag_field4; /* a 16-bit ID field */
|
||||
uint8 locktag_type; /* see enum LockTagType */
|
||||
uint8 locktag_lockmethodid; /* lockmethod indicator */
|
||||
} LOCKTAG;
|
||||
|
||||
/*
|
||||
* These macros define how we map logical IDs of lockable objects into
|
||||
* the physical fields of LOCKTAG. Use these to set up LOCKTAG values,
|
||||
* rather than accessing the fields directly. Note multiple eval of target!
|
||||
*/
|
||||
#define SET_LOCKTAG_RELATION(locktag,dboid,reloid) \
|
||||
((locktag).locktag_field1 = (dboid), \
|
||||
(locktag).locktag_field2 = (reloid), \
|
||||
(locktag).locktag_field3 = 0, \
|
||||
(locktag).locktag_field4 = 0, \
|
||||
(locktag).locktag_type = LOCKTAG_RELATION, \
|
||||
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
||||
|
||||
#define SET_LOCKTAG_RELATION_EXTEND(locktag,dboid,reloid) \
|
||||
((locktag).locktag_field1 = (dboid), \
|
||||
(locktag).locktag_field2 = (reloid), \
|
||||
(locktag).locktag_field3 = 0, \
|
||||
(locktag).locktag_field4 = 0, \
|
||||
(locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \
|
||||
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
||||
|
||||
#define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \
|
||||
((locktag).locktag_field1 = (dboid), \
|
||||
(locktag).locktag_field2 = (reloid), \
|
||||
(locktag).locktag_field3 = (blocknum), \
|
||||
(locktag).locktag_field4 = 0, \
|
||||
(locktag).locktag_type = LOCKTAG_PAGE, \
|
||||
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
||||
|
||||
#define SET_LOCKTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
|
||||
((locktag).locktag_field1 = (dboid), \
|
||||
(locktag).locktag_field2 = (reloid), \
|
||||
(locktag).locktag_field3 = (blocknum), \
|
||||
(locktag).locktag_field4 = (offnum), \
|
||||
(locktag).locktag_type = LOCKTAG_TUPLE, \
|
||||
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
||||
|
||||
#define SET_LOCKTAG_TRANSACTION(locktag,xid) \
|
||||
((locktag).locktag_field1 = (xid), \
|
||||
(locktag).locktag_field2 = 0, \
|
||||
(locktag).locktag_field3 = 0, \
|
||||
(locktag).locktag_field4 = 0, \
|
||||
(locktag).locktag_type = LOCKTAG_TRANSACTION, \
|
||||
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
||||
|
||||
#define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \
|
||||
((locktag).locktag_field1 = (vxid).backendId, \
|
||||
(locktag).locktag_field2 = (vxid).localTransactionId, \
|
||||
(locktag).locktag_field3 = 0, \
|
||||
(locktag).locktag_field4 = 0, \
|
||||
(locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \
|
||||
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
||||
|
||||
#define SET_LOCKTAG_OBJECT(locktag,dboid,classoid,objoid,objsubid) \
|
||||
((locktag).locktag_field1 = (dboid), \
|
||||
(locktag).locktag_field2 = (classoid), \
|
||||
(locktag).locktag_field3 = (objoid), \
|
||||
(locktag).locktag_field4 = (objsubid), \
|
||||
(locktag).locktag_type = LOCKTAG_OBJECT, \
|
||||
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
||||
|
||||
#define SET_LOCKTAG_ADVISORY(locktag,id1,id2,id3,id4) \
|
||||
((locktag).locktag_field1 = (id1), \
|
||||
(locktag).locktag_field2 = (id2), \
|
||||
(locktag).locktag_field3 = (id3), \
|
||||
(locktag).locktag_field4 = (id4), \
|
||||
(locktag).locktag_type = LOCKTAG_ADVISORY, \
|
||||
(locktag).locktag_lockmethodid = USER_LOCKMETHOD)
|
||||
|
||||
|
||||
/*
|
||||
* Per-locked-object lock information:
|
||||
*
|
||||
* tag -- uniquely identifies the object being locked
|
||||
* grantMask -- bitmask for all lock types currently granted on this object.
|
||||
* waitMask -- bitmask for all lock types currently awaited on this object.
|
||||
* procLocks -- list of PROCLOCK objects for this lock.
|
||||
* waitProcs -- queue of processes waiting for this lock.
|
||||
* requested -- count of each lock type currently requested on the lock
|
||||
* (includes requests already granted!!).
|
||||
* nRequested -- total requested locks of all types.
|
||||
* granted -- count of each lock type currently granted on the lock.
|
||||
* nGranted -- total granted locks of all types.
|
||||
*
|
||||
* Note: these counts count 1 for each backend. Internally to a backend,
|
||||
* there may be multiple grabs on a particular lock, but this is not reflected
|
||||
* into shared memory.
|
||||
*/
|
||||
typedef struct LOCK
|
||||
{
|
||||
/* hash key */
|
||||
LOCKTAG tag; /* unique identifier of lockable object */
|
||||
|
||||
/* data */
|
||||
LOCKMASK grantMask; /* bitmask for lock types already granted */
|
||||
LOCKMASK waitMask; /* bitmask for lock types awaited */
|
||||
SHM_QUEUE procLocks; /* list of PROCLOCK objects assoc. with lock */
|
||||
PROC_QUEUE waitProcs; /* list of PGPROC objects waiting on lock */
|
||||
int requested[MAX_LOCKMODES]; /* counts of requested locks */
|
||||
int nRequested; /* total of requested[] array */
|
||||
int granted[MAX_LOCKMODES]; /* counts of granted locks */
|
||||
int nGranted; /* total of granted[] array */
|
||||
} LOCK;
|
||||
|
||||
#define LOCK_LOCKMETHOD(lock) ((LOCKMETHODID) (lock).tag.locktag_lockmethodid)
|
||||
|
||||
|
||||
/*
|
||||
* We may have several different backends holding or awaiting locks
|
||||
* on the same lockable object. We need to store some per-holder/waiter
|
||||
* information for each such holder (or would-be holder). This is kept in
|
||||
* a PROCLOCK struct.
|
||||
*
|
||||
* PROCLOCKTAG is the key information needed to look up a PROCLOCK item in the
|
||||
* proclock hashtable. A PROCLOCKTAG value uniquely identifies the combination
|
||||
* of a lockable object and a holder/waiter for that object. (We can use
|
||||
* pointers here because the PROCLOCKTAG need only be unique for the lifespan
|
||||
* of the PROCLOCK, and it will never outlive the lock or the proc.)
|
||||
*
|
||||
* Internally to a backend, it is possible for the same lock to be held
|
||||
* for different purposes: the backend tracks transaction locks separately
|
||||
* from session locks. However, this is not reflected in the shared-memory
|
||||
* state: we only track which backend(s) hold the lock. This is OK since a
|
||||
* backend can never block itself.
|
||||
*
|
||||
* The holdMask field shows the already-granted locks represented by this
|
||||
* proclock. Note that there will be a proclock object, possibly with
|
||||
* zero holdMask, for any lock that the process is currently waiting on.
|
||||
* Otherwise, proclock objects whose holdMasks are zero are recycled
|
||||
* as soon as convenient.
|
||||
*
|
||||
* releaseMask is workspace for LockReleaseAll(): it shows the locks due
|
||||
* to be released during the current call. This must only be examined or
|
||||
* set by the backend owning the PROCLOCK.
|
||||
*
|
||||
* Each PROCLOCK object is linked into lists for both the associated LOCK
|
||||
* object and the owning PGPROC object. Note that the PROCLOCK is entered
|
||||
* into these lists as soon as it is created, even if no lock has yet been
|
||||
* granted. A PGPROC that is waiting for a lock to be granted will also be
|
||||
* linked into the lock's waitProcs queue.
|
||||
*/
|
||||
typedef struct PROCLOCKTAG
|
||||
{
|
||||
/* NB: we assume this struct contains no padding! */
|
||||
LOCK *myLock; /* link to per-lockable-object information */
|
||||
PGPROC *myProc; /* link to PGPROC of owning backend */
|
||||
} PROCLOCKTAG;
|
||||
|
||||
typedef struct PROCLOCK
|
||||
{
|
||||
/* tag */
|
||||
PROCLOCKTAG tag; /* unique identifier of proclock object */
|
||||
|
||||
/* data */
|
||||
LOCKMASK holdMask; /* bitmask for lock types currently held */
|
||||
LOCKMASK releaseMask; /* bitmask for lock types to be released */
|
||||
SHM_QUEUE lockLink; /* list link in LOCK's list of proclocks */
|
||||
SHM_QUEUE procLink; /* list link in PGPROC's list of proclocks */
|
||||
} PROCLOCK;
|
||||
|
||||
#define PROCLOCK_LOCKMETHOD(proclock) \
|
||||
LOCK_LOCKMETHOD(*((proclock).tag.myLock))
|
||||
|
||||
/*
|
||||
* Each backend also maintains a local hash table with information about each
|
||||
* lock it is currently interested in. In particular the local table counts
|
||||
* the number of times that lock has been acquired. This allows multiple
|
||||
* requests for the same lock to be executed without additional accesses to
|
||||
* shared memory. We also track the number of lock acquisitions per
|
||||
* ResourceOwner, so that we can release just those locks belonging to a
|
||||
* particular ResourceOwner.
|
||||
*/
|
||||
typedef struct LOCALLOCKTAG
|
||||
{
|
||||
LOCKTAG lock; /* identifies the lockable object */
|
||||
LOCKMODE mode; /* lock mode for this table entry */
|
||||
} LOCALLOCKTAG;
|
||||
|
||||
typedef struct LOCALLOCKOWNER
|
||||
{
|
||||
/*
|
||||
* Note: if owner is NULL then the lock is held on behalf of the session;
|
||||
* otherwise it is held on behalf of my current transaction.
|
||||
*
|
||||
* Must use a forward struct reference to avoid circularity.
|
||||
*/
|
||||
struct ResourceOwnerData *owner;
|
||||
int64 nLocks; /* # of times held by this owner */
|
||||
} LOCALLOCKOWNER;
|
||||
|
||||
typedef struct LOCALLOCK
|
||||
{
|
||||
/* tag */
|
||||
LOCALLOCKTAG tag; /* unique identifier of locallock entry */
|
||||
|
||||
/* data */
|
||||
LOCK *lock; /* associated LOCK object in shared mem */
|
||||
PROCLOCK *proclock; /* associated PROCLOCK object in shmem */
|
||||
uint32 hashcode; /* copy of LOCKTAG's hash value */
|
||||
int64 nLocks; /* total number of times lock is held */
|
||||
int numLockOwners; /* # of relevant ResourceOwners */
|
||||
int maxLockOwners; /* allocated size of array */
|
||||
bool holdsStrongLockCount; /* bumped FastPathStrongRelatonLocks? */
|
||||
LOCALLOCKOWNER *lockOwners; /* dynamically resizable array */
|
||||
} LOCALLOCK;
|
||||
|
||||
#define LOCALLOCK_LOCKMETHOD(llock) ((llock).tag.lock.locktag_lockmethodid)
|
||||
|
||||
|
||||
/*
|
||||
* These structures hold information passed from lmgr internals to the lock
|
||||
* listing user-level functions (in lockfuncs.c).
|
||||
*/
|
||||
|
||||
typedef struct LockInstanceData
|
||||
{
|
||||
LOCKTAG locktag; /* locked object */
|
||||
LOCKMASK holdMask; /* locks held by this PGPROC */
|
||||
LOCKMODE waitLockMode; /* lock awaited by this PGPROC, if any */
|
||||
BackendId backend; /* backend ID of this PGPROC */
|
||||
LocalTransactionId lxid; /* local transaction ID of this PGPROC */
|
||||
int pid; /* pid of this PGPROC */
|
||||
bool fastpath; /* taken via fastpath? */
|
||||
} LockInstanceData;
|
||||
|
||||
typedef struct LockData
|
||||
{
|
||||
int nelements; /* The length of the array */
|
||||
LockInstanceData *locks;
|
||||
} LockData;
|
||||
|
||||
|
||||
/* Result codes for LockAcquire() */
|
||||
typedef enum
|
||||
{
|
||||
LOCKACQUIRE_NOT_AVAIL, /* lock not available, and dontWait=true */
|
||||
LOCKACQUIRE_OK, /* lock successfully acquired */
|
||||
LOCKACQUIRE_ALREADY_HELD /* incremented count for lock already held */
|
||||
} LockAcquireResult;
|
||||
|
||||
/* Deadlock states identified by DeadLockCheck() */
|
||||
typedef enum
|
||||
{
|
||||
DS_NOT_YET_CHECKED, /* no deadlock check has run yet */
|
||||
DS_NO_DEADLOCK, /* no deadlock detected */
|
||||
DS_SOFT_DEADLOCK, /* deadlock avoided by queue rearrangement */
|
||||
DS_HARD_DEADLOCK, /* deadlock, no way out but ERROR */
|
||||
DS_BLOCKED_BY_AUTOVACUUM /* no deadlock; queue blocked by autovacuum
|
||||
* worker */
|
||||
} DeadLockState;
|
||||
|
||||
|
||||
/*
|
||||
* The lockmgr's shared hash tables are partitioned to reduce contention.
|
||||
* To determine which partition a given locktag belongs to, compute the tag's
|
||||
* hash code with LockTagHashCode(), then apply one of these macros.
|
||||
* NB: NUM_LOCK_PARTITIONS must be a power of 2!
|
||||
*/
|
||||
#define LockHashPartition(hashcode) \
|
||||
((hashcode) % NUM_LOCK_PARTITIONS)
|
||||
#define LockHashPartitionLock(hashcode) \
|
||||
((LWLockId) (FirstLockMgrLock + LockHashPartition(hashcode)))
|
||||
|
||||
|
||||
/*
|
||||
* function prototypes
|
||||
*/
|
||||
extern void InitLocks(void);
|
||||
extern LockMethod GetLocksMethodTable(const LOCK *lock);
|
||||
extern uint32 LockTagHashCode(const LOCKTAG *locktag);
|
||||
extern LockAcquireResult LockAcquire(const LOCKTAG *locktag,
|
||||
LOCKMODE lockmode,
|
||||
bool sessionLock,
|
||||
bool dontWait);
|
||||
extern LockAcquireResult LockAcquireExtended(const LOCKTAG *locktag,
|
||||
LOCKMODE lockmode,
|
||||
bool sessionLock,
|
||||
bool dontWait,
|
||||
bool report_memory_error);
|
||||
extern void AbortStrongLockAcquire(void);
|
||||
extern bool LockRelease(const LOCKTAG *locktag,
|
||||
LOCKMODE lockmode, bool sessionLock);
|
||||
extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks);
|
||||
extern void LockReleaseSession(LOCKMETHODID lockmethodid);
|
||||
extern void LockReleaseCurrentOwner(void);
|
||||
extern void LockReassignCurrentOwner(void);
|
||||
extern bool LockHasWaiters(const LOCKTAG *locktag,
|
||||
LOCKMODE lockmode, bool sessionLock);
|
||||
extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag,
|
||||
LOCKMODE lockmode);
|
||||
extern void AtPrepare_Locks(void);
|
||||
extern void PostPrepare_Locks(TransactionId xid);
|
||||
extern int LockCheckConflicts(LockMethod lockMethodTable,
|
||||
LOCKMODE lockmode,
|
||||
LOCK *lock, PROCLOCK *proclock, PGPROC *proc);
|
||||
extern void GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode);
|
||||
extern void GrantAwaitedLock(void);
|
||||
extern void RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode);
|
||||
extern Size LockShmemSize(void);
|
||||
extern LockData *GetLockStatusData(void);
|
||||
|
||||
extern void ReportLockTableError(bool report);
|
||||
|
||||
typedef struct xl_standby_lock
|
||||
{
|
||||
TransactionId xid; /* xid of holder of AccessExclusiveLock */
|
||||
Oid dbOid;
|
||||
Oid relOid;
|
||||
} xl_standby_lock;
|
||||
|
||||
extern xl_standby_lock *GetRunningTransactionLocks(int *nlocks);
|
||||
extern const char *GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode);
|
||||
|
||||
extern void lock_twophase_recover(TransactionId xid, uint16 info,
|
||||
void *recdata, uint32 len);
|
||||
extern void lock_twophase_postcommit(TransactionId xid, uint16 info,
|
||||
void *recdata, uint32 len);
|
||||
extern void lock_twophase_postabort(TransactionId xid, uint16 info,
|
||||
void *recdata, uint32 len);
|
||||
extern void lock_twophase_standby_recover(TransactionId xid, uint16 info,
|
||||
void *recdata, uint32 len);
|
||||
|
||||
extern DeadLockState DeadLockCheck(PGPROC *proc);
|
||||
extern PGPROC *GetBlockingAutoVacuumPgproc(void);
|
||||
extern void DeadLockReport(void);
|
||||
extern void RememberSimpleDeadLock(PGPROC *proc1,
|
||||
LOCKMODE lockmode,
|
||||
LOCK *lock,
|
||||
PGPROC *proc2);
|
||||
extern void InitDeadLockChecking(void);
|
||||
|
||||
#ifdef LOCK_DEBUG
|
||||
extern void DumpLocks(PGPROC *proc);
|
||||
extern void DumpAllLocks(void);
|
||||
#endif
|
||||
|
||||
/* Lock a VXID (used to wait for a transaction to finish) */
|
||||
extern void VirtualXactLockTableInsert(VirtualTransactionId vxid);
|
||||
extern void VirtualXactLockTableCleanup(void);
|
||||
extern bool VirtualXactLock(VirtualTransactionId vxid, bool wait);
|
||||
|
||||
#endif /* LOCK_H */
|
||||
122
pg_include/storage/lwlock.h
Executable file
122
pg_include/storage/lwlock.h
Executable file
@@ -0,0 +1,122 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* lwlock.h
|
||||
* Lightweight lock manager
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/lwlock.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef LWLOCK_H
|
||||
#define LWLOCK_H
|
||||
|
||||
/*
|
||||
* It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS
|
||||
* here, but we need them to set up enum LWLockId correctly, and having
|
||||
* this file include lock.h or bufmgr.h would be backwards.
|
||||
*/
|
||||
|
||||
/* Number of partitions of the shared buffer mapping hashtable */
|
||||
#define NUM_BUFFER_PARTITIONS 16
|
||||
|
||||
/* Number of partitions the shared lock tables are divided into */
|
||||
#define LOG2_NUM_LOCK_PARTITIONS 4
|
||||
#define NUM_LOCK_PARTITIONS (1 << LOG2_NUM_LOCK_PARTITIONS)
|
||||
|
||||
/* Number of partitions the shared predicate lock tables are divided into */
|
||||
#define LOG2_NUM_PREDICATELOCK_PARTITIONS 4
|
||||
#define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
|
||||
|
||||
/*
|
||||
* We have a number of predefined LWLocks, plus a bunch of LWLocks that are
|
||||
* dynamically assigned (e.g., for shared buffers). The LWLock structures
|
||||
* live in shared memory (since they contain shared data) and are identified
|
||||
* by values of this enumerated type. We abuse the notion of an enum somewhat
|
||||
* by allowing values not listed in the enum declaration to be assigned.
|
||||
* The extra value MaxDynamicLWLock is there to keep the compiler from
|
||||
* deciding that the enum can be represented as char or short ...
|
||||
*
|
||||
* If you remove a lock, please replace it with a placeholder. This retains
|
||||
* the lock numbering, which is helpful for DTrace and other external
|
||||
* debugging scripts.
|
||||
*/
|
||||
typedef enum LWLockId
|
||||
{
|
||||
BufFreelistLock,
|
||||
ShmemIndexLock,
|
||||
OidGenLock,
|
||||
XidGenLock,
|
||||
ProcArrayLock,
|
||||
SInvalReadLock,
|
||||
SInvalWriteLock,
|
||||
WALInsertLock,
|
||||
WALWriteLock,
|
||||
ControlFileLock,
|
||||
CheckpointLock,
|
||||
CLogControlLock,
|
||||
SubtransControlLock,
|
||||
MultiXactGenLock,
|
||||
MultiXactOffsetControlLock,
|
||||
MultiXactMemberControlLock,
|
||||
RelCacheInitLock,
|
||||
CheckpointerCommLock,
|
||||
TwoPhaseStateLock,
|
||||
TablespaceCreateLock,
|
||||
BtreeVacuumLock,
|
||||
AddinShmemInitLock,
|
||||
AutovacuumLock,
|
||||
AutovacuumScheduleLock,
|
||||
SyncScanLock,
|
||||
RelationMappingLock,
|
||||
AsyncCtlLock,
|
||||
AsyncQueueLock,
|
||||
SerializableXactHashLock,
|
||||
SerializableFinishedListLock,
|
||||
SerializablePredicateLockListLock,
|
||||
OldSerXidLock,
|
||||
SyncRepLock,
|
||||
/* Individual lock IDs end here */
|
||||
FirstBufMappingLock,
|
||||
FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
|
||||
FirstPredicateLockMgrLock = FirstLockMgrLock + NUM_LOCK_PARTITIONS,
|
||||
|
||||
/* must be last except for MaxDynamicLWLock: */
|
||||
NumFixedLWLocks = FirstPredicateLockMgrLock + NUM_PREDICATELOCK_PARTITIONS,
|
||||
|
||||
MaxDynamicLWLock = 1000000000
|
||||
} LWLockId;
|
||||
|
||||
|
||||
typedef enum LWLockMode
|
||||
{
|
||||
LW_EXCLUSIVE,
|
||||
LW_SHARED,
|
||||
LW_WAIT_UNTIL_FREE /* A special mode used in PGPROC->lwlockMode,
|
||||
* when waiting for lock to become free. Not
|
||||
* to be used as LWLockAcquire argument */
|
||||
} LWLockMode;
|
||||
|
||||
|
||||
#ifdef LOCK_DEBUG
|
||||
extern bool Trace_lwlocks;
|
||||
#endif
|
||||
|
||||
extern LWLockId LWLockAssign(void);
|
||||
extern void LWLockAcquire(LWLockId lockid, LWLockMode mode);
|
||||
extern bool LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode);
|
||||
extern bool LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode);
|
||||
extern void LWLockRelease(LWLockId lockid);
|
||||
extern void LWLockReleaseAll(void);
|
||||
extern bool LWLockHeldByMe(LWLockId lockid);
|
||||
|
||||
extern int NumLWLocks(void);
|
||||
extern Size LWLockShmemSize(void);
|
||||
extern void CreateLWLocks(void);
|
||||
|
||||
extern void RequestAddinLWLocks(int n);
|
||||
|
||||
#endif /* LWLOCK_H */
|
||||
58
pg_include/storage/off.h
Executable file
58
pg_include/storage/off.h
Executable file
@@ -0,0 +1,58 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* off.h
|
||||
* POSTGRES disk "offset" definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/off.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef OFF_H
|
||||
#define OFF_H
|
||||
|
||||
#include "storage/itemid.h"
|
||||
/*
|
||||
* OffsetNumber:
|
||||
*
|
||||
* this is a 1-based index into the linp (ItemIdData) array in the
|
||||
* header of each disk page.
|
||||
*/
|
||||
typedef uint16 OffsetNumber;
|
||||
|
||||
#define InvalidOffsetNumber ((OffsetNumber) 0)
|
||||
#define FirstOffsetNumber ((OffsetNumber) 1)
|
||||
#define MaxOffsetNumber ((OffsetNumber) (BLCKSZ / sizeof(ItemIdData)))
|
||||
#define OffsetNumberMask (0xffff) /* valid uint16 bits */
|
||||
|
||||
/* ----------------
|
||||
* support macros
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* OffsetNumberIsValid
|
||||
* True iff the offset number is valid.
|
||||
*/
|
||||
#define OffsetNumberIsValid(offsetNumber) \
|
||||
((bool) ((offsetNumber != InvalidOffsetNumber) && \
|
||||
(offsetNumber <= MaxOffsetNumber)))
|
||||
|
||||
/*
|
||||
* OffsetNumberNext
|
||||
* OffsetNumberPrev
|
||||
* Increments/decrements the argument. These macros look pointless
|
||||
* but they help us disambiguate the different manipulations on
|
||||
* OffsetNumbers (e.g., sometimes we subtract one from an
|
||||
* OffsetNumber to move back, and sometimes we do so to form a
|
||||
* real C array index).
|
||||
*/
|
||||
#define OffsetNumberNext(offsetNumber) \
|
||||
((OffsetNumber) (1 + (offsetNumber)))
|
||||
#define OffsetNumberPrev(offsetNumber) \
|
||||
((OffsetNumber) (-1 + (offsetNumber)))
|
||||
|
||||
#endif /* OFF_H */
|
||||
83
pg_include/storage/pg_sema.h
Executable file
83
pg_include/storage/pg_sema.h
Executable file
@@ -0,0 +1,83 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pg_sema.h
|
||||
* Platform-independent API for semaphores.
|
||||
*
|
||||
* PostgreSQL requires counting semaphores (the kind that keep track of
|
||||
* multiple unlock operations, and will allow an equal number of subsequent
|
||||
* lock operations before blocking). The underlying implementation is
|
||||
* not the same on every platform. This file defines the API that must
|
||||
* be provided by each port.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/pg_sema.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PG_SEMA_H
|
||||
#define PG_SEMA_H
|
||||
|
||||
/*
|
||||
* PGSemaphoreData and pointer type PGSemaphore are the data structure
|
||||
* representing an individual semaphore. The contents of PGSemaphoreData
|
||||
* vary across implementations and must never be touched by platform-
|
||||
* independent code. PGSemaphoreData structures are always allocated
|
||||
* in shared memory (to support implementations where the data changes during
|
||||
* lock/unlock).
|
||||
*
|
||||
* pg_config.h must define exactly one of the USE_xxx_SEMAPHORES symbols.
|
||||
*/
|
||||
|
||||
#ifdef USE_NAMED_POSIX_SEMAPHORES
|
||||
|
||||
#include <semaphore.h>
|
||||
|
||||
typedef sem_t *PGSemaphoreData;
|
||||
#endif
|
||||
|
||||
#ifdef USE_UNNAMED_POSIX_SEMAPHORES
|
||||
|
||||
#include <semaphore.h>
|
||||
|
||||
typedef sem_t PGSemaphoreData;
|
||||
#endif
|
||||
|
||||
#ifdef USE_SYSV_SEMAPHORES
|
||||
|
||||
typedef struct PGSemaphoreData
|
||||
{
|
||||
int semId; /* semaphore set identifier */
|
||||
int semNum; /* semaphore number within set */
|
||||
} PGSemaphoreData;
|
||||
#endif
|
||||
|
||||
#ifdef USE_WIN32_SEMAPHORES
|
||||
|
||||
typedef HANDLE PGSemaphoreData;
|
||||
#endif
|
||||
|
||||
typedef PGSemaphoreData *PGSemaphore;
|
||||
|
||||
|
||||
/* Module initialization (called during postmaster start or shmem reinit) */
|
||||
extern void PGReserveSemaphores(int maxSemas, int port);
|
||||
|
||||
/* Initialize a PGSemaphore structure to represent a sema with count 1 */
|
||||
extern void PGSemaphoreCreate(PGSemaphore sema);
|
||||
|
||||
/* Reset a previously-initialized PGSemaphore to have count 0 */
|
||||
extern void PGSemaphoreReset(PGSemaphore sema);
|
||||
|
||||
/* Lock a semaphore (decrement count), blocking if count would be < 0 */
|
||||
extern void PGSemaphoreLock(PGSemaphore sema, bool interruptOK);
|
||||
|
||||
/* Unlock a semaphore (increment count) */
|
||||
extern void PGSemaphoreUnlock(PGSemaphore sema);
|
||||
|
||||
/* Lock a semaphore only if able to do so without blocking */
|
||||
extern bool PGSemaphoreTryLock(PGSemaphore sema);
|
||||
|
||||
#endif /* PG_SEMA_H */
|
||||
58
pg_include/storage/pg_shmem.h
Executable file
58
pg_include/storage/pg_shmem.h
Executable file
@@ -0,0 +1,58 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pg_shmem.h
|
||||
* Platform-independent API for shared memory support.
|
||||
*
|
||||
* Every port is expected to support shared memory with approximately
|
||||
* SysV-ish semantics; in particular, a memory block is not anonymous
|
||||
* but has an ID, and we must be able to tell whether there are any
|
||||
* remaining processes attached to a block of a specified ID.
|
||||
*
|
||||
* To simplify life for the SysV implementation, the ID is assumed to
|
||||
* consist of two unsigned long values (these are key and ID in SysV
|
||||
* terms). Other platforms may ignore the second value if they need
|
||||
* only one ID number.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/pg_shmem.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PG_SHMEM_H
|
||||
#define PG_SHMEM_H
|
||||
|
||||
typedef struct PGShmemHeader /* standard header for all Postgres shmem */
|
||||
{
|
||||
int32 magic; /* magic # to identify Postgres segments */
|
||||
#define PGShmemMagic 679834894
|
||||
pid_t creatorPID; /* PID of creating process */
|
||||
Size totalsize; /* total size of segment */
|
||||
Size freeoffset; /* offset to first free space */
|
||||
void *index; /* pointer to ShmemIndex table */
|
||||
#ifndef WIN32 /* Windows doesn't have useful inode#s */
|
||||
dev_t device; /* device data directory is on */
|
||||
ino_t inode; /* inode number of data directory */
|
||||
#endif
|
||||
} PGShmemHeader;
|
||||
|
||||
|
||||
#ifdef EXEC_BACKEND
|
||||
#ifndef WIN32
|
||||
extern unsigned long UsedShmemSegID;
|
||||
#else
|
||||
extern HANDLE UsedShmemSegID;
|
||||
#endif
|
||||
extern void *UsedShmemSegAddr;
|
||||
|
||||
extern void PGSharedMemoryReAttach(void);
|
||||
#endif
|
||||
|
||||
extern PGShmemHeader *PGSharedMemoryCreate(Size size, bool makePrivate,
|
||||
int port);
|
||||
extern bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2);
|
||||
extern void PGSharedMemoryDetach(void);
|
||||
|
||||
#endif /* PG_SHMEM_H */
|
||||
55
pg_include/storage/pmsignal.h
Executable file
55
pg_include/storage/pmsignal.h
Executable file
@@ -0,0 +1,55 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pmsignal.h
|
||||
* routines for signaling the postmaster from its child processes
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/pmsignal.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PMSIGNAL_H
|
||||
#define PMSIGNAL_H
|
||||
|
||||
/*
|
||||
* Reasons for signaling the postmaster. We can cope with simultaneous
|
||||
* signals for different reasons. If the same reason is signaled multiple
|
||||
* times in quick succession, however, the postmaster is likely to observe
|
||||
* only one notification of it. This is okay for the present uses.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
PMSIGNAL_RECOVERY_STARTED, /* recovery has started */
|
||||
PMSIGNAL_BEGIN_HOT_STANDBY, /* begin Hot Standby */
|
||||
PMSIGNAL_WAKEN_ARCHIVER, /* send a NOTIFY signal to xlog archiver */
|
||||
PMSIGNAL_ROTATE_LOGFILE, /* send SIGUSR1 to syslogger to rotate logfile */
|
||||
PMSIGNAL_START_AUTOVAC_LAUNCHER, /* start an autovacuum launcher */
|
||||
PMSIGNAL_START_AUTOVAC_WORKER, /* start an autovacuum worker */
|
||||
PMSIGNAL_START_WALRECEIVER, /* start a walreceiver */
|
||||
PMSIGNAL_ADVANCE_STATE_MACHINE, /* advance postmaster's state machine */
|
||||
|
||||
NUM_PMSIGNALS /* Must be last value of enum! */
|
||||
} PMSignalReason;
|
||||
|
||||
/* PMSignalData is an opaque struct, details known only within pmsignal.c */
|
||||
typedef struct PMSignalData PMSignalData;
|
||||
|
||||
/*
|
||||
* prototypes for functions in pmsignal.c
|
||||
*/
|
||||
extern Size PMSignalShmemSize(void);
|
||||
extern void PMSignalShmemInit(void);
|
||||
extern void SendPostmasterSignal(PMSignalReason reason);
|
||||
extern bool CheckPostmasterSignal(PMSignalReason reason);
|
||||
extern int AssignPostmasterChildSlot(void);
|
||||
extern bool ReleasePostmasterChildSlot(int slot);
|
||||
extern bool IsPostmasterChildWalSender(int slot);
|
||||
extern void MarkPostmasterChildActive(void);
|
||||
extern void MarkPostmasterChildInactive(void);
|
||||
extern void MarkPostmasterChildWalSender(void);
|
||||
extern bool PostmasterIsAlive(void);
|
||||
|
||||
#endif /* PMSIGNAL_H */
|
||||
64
pg_include/storage/pos.h
Executable file
64
pg_include/storage/pos.h
Executable file
@@ -0,0 +1,64 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pos.h
|
||||
* POSTGRES "position" definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/pos.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef POS_H
|
||||
#define POS_H
|
||||
|
||||
|
||||
/*
|
||||
* a 'position' used to be <pagenumber, offset> in postgres. this has
|
||||
* been changed to just <offset> as the notion of having multiple pages
|
||||
* within a block has been removed.
|
||||
*
|
||||
* the 'offset' abstraction is somewhat confusing. it is NOT a byte
|
||||
* offset within the page; instead, it is an offset into the line
|
||||
* pointer array contained on every page that store (heap or index)
|
||||
* tuples.
|
||||
*/
|
||||
typedef bits16 PositionIdData;
|
||||
typedef PositionIdData *PositionId;
|
||||
|
||||
/* ----------------
|
||||
* support macros
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* PositionIdIsValid
|
||||
* True iff the position identifier is valid.
|
||||
*/
|
||||
#define PositionIdIsValid(positionId) \
|
||||
PointerIsValid(positionId)
|
||||
|
||||
/*
|
||||
* PositionIdSetInvalid
|
||||
* Make an invalid position.
|
||||
*/
|
||||
#define PositionIdSetInvalid(positionId) \
|
||||
*(positionId) = (bits16) 0
|
||||
|
||||
/*
|
||||
* PositionIdSet
|
||||
* Sets a position identifier to the specified value.
|
||||
*/
|
||||
#define PositionIdSet(positionId, offsetNumber) \
|
||||
*(positionId) = (offsetNumber)
|
||||
|
||||
/*
|
||||
* PositionIdGetOffsetNumber
|
||||
* Retrieve the offset number from a position identifier.
|
||||
*/
|
||||
#define PositionIdGetOffsetNumber(positionId) \
|
||||
((OffsetNumber) *(positionId))
|
||||
|
||||
#endif /* POS_H */
|
||||
73
pg_include/storage/predicate.h
Executable file
73
pg_include/storage/predicate.h
Executable file
@@ -0,0 +1,73 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* predicate.h
|
||||
* POSTGRES public predicate locking definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/predicate.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PREDICATE_H
|
||||
#define PREDICATE_H
|
||||
|
||||
#include "utils/relcache.h"
|
||||
#include "utils/snapshot.h"
|
||||
|
||||
|
||||
/*
|
||||
* GUC variables
|
||||
*/
|
||||
extern int max_predicate_locks_per_xact;
|
||||
|
||||
|
||||
/* Number of SLRU buffers to use for predicate locking */
|
||||
#define NUM_OLDSERXID_BUFFERS 16
|
||||
|
||||
|
||||
/*
|
||||
* function prototypes
|
||||
*/
|
||||
|
||||
/* housekeeping for shared memory predicate lock structures */
|
||||
extern void InitPredicateLocks(void);
|
||||
extern Size PredicateLockShmemSize(void);
|
||||
|
||||
extern void CheckPointPredicate(void);
|
||||
|
||||
/* predicate lock reporting */
|
||||
extern bool PageIsPredicateLocked(Relation relation, BlockNumber blkno);
|
||||
|
||||
/* predicate lock maintenance */
|
||||
extern Snapshot GetSerializableTransactionSnapshot(Snapshot snapshot);
|
||||
extern void SetSerializableTransactionSnapshot(Snapshot snapshot,
|
||||
TransactionId sourcexid);
|
||||
extern void RegisterPredicateLockingXid(TransactionId xid);
|
||||
extern void PredicateLockRelation(Relation relation, Snapshot snapshot);
|
||||
extern void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot);
|
||||
extern void PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snapshot);
|
||||
extern void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);
|
||||
extern void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);
|
||||
extern void TransferPredicateLocksToHeapRelation(Relation relation);
|
||||
extern void ReleasePredicateLocks(bool isCommit);
|
||||
|
||||
/* conflict detection (may also trigger rollback) */
|
||||
extern void CheckForSerializableConflictOut(bool valid, Relation relation, HeapTuple tuple,
|
||||
Buffer buffer, Snapshot snapshot);
|
||||
extern void CheckForSerializableConflictIn(Relation relation, HeapTuple tuple, Buffer buffer);
|
||||
extern void CheckTableForSerializableConflictIn(Relation relation);
|
||||
|
||||
/* final rollback checking */
|
||||
extern void PreCommit_CheckForSerializationFailure(void);
|
||||
|
||||
/* two-phase commit support */
|
||||
extern void AtPrepare_PredicateLocks(void);
|
||||
extern void PostPrepare_PredicateLocks(TransactionId xid);
|
||||
extern void PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit);
|
||||
extern void predicatelock_twophase_recover(TransactionId xid, uint16 info,
|
||||
void *recdata, uint32 len);
|
||||
|
||||
#endif /* PREDICATE_H */
|
||||
490
pg_include/storage/predicate_internals.h
Executable file
490
pg_include/storage/predicate_internals.h
Executable file
@@ -0,0 +1,490 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* predicate_internals.h
|
||||
* POSTGRES internal predicate locking definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/predicate_internals.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PREDICATE_INTERNALS_H
|
||||
#define PREDICATE_INTERNALS_H
|
||||
|
||||
#include "storage/lock.h"
|
||||
|
||||
/*
|
||||
* Commit number.
|
||||
*/
|
||||
typedef uint64 SerCommitSeqNo;
|
||||
|
||||
/*
|
||||
* Reserved commit sequence numbers:
|
||||
* - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
|
||||
* used as a SerCommitSeqNo, even an invalid one
|
||||
* - InvalidSerCommitSeqNo is used to indicate a transaction that
|
||||
* hasn't committed yet, so use a number greater than all valid
|
||||
* ones to make comparison do the expected thing
|
||||
* - RecoverySerCommitSeqNo is used to refer to transactions that
|
||||
* happened before a crash/recovery, since we restart the sequence
|
||||
* at that point. It's earlier than all normal sequence numbers,
|
||||
* and is only used by recovered prepared transactions
|
||||
*/
|
||||
#define InvalidSerCommitSeqNo ((SerCommitSeqNo) UINT64CONST(0xFFFFFFFFFFFFFFFF))
|
||||
#define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
|
||||
#define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
|
||||
|
||||
/*
|
||||
* The SERIALIZABLEXACT struct contains information needed for each
|
||||
* serializable database transaction to support SSI techniques.
|
||||
*
|
||||
* A home-grown list is maintained in shared memory to manage these.
|
||||
* An entry is used when the serializable transaction acquires a snapshot.
|
||||
* Unless the transaction is rolled back, this entry must generally remain
|
||||
* until all concurrent transactions have completed. (There are special
|
||||
* optimizations for READ ONLY transactions which often allow them to be
|
||||
* cleaned up earlier.) A transaction which is rolled back is cleaned up
|
||||
* as soon as possible.
|
||||
*
|
||||
* Eligibility for cleanup of committed transactions is generally determined
|
||||
* by comparing the transaction's finishedBefore field to
|
||||
* SerializableGlobalXmin.
|
||||
*/
|
||||
typedef struct SERIALIZABLEXACT
|
||||
{
|
||||
VirtualTransactionId vxid; /* The executing process always has one of
|
||||
* these. */
|
||||
|
||||
/*
|
||||
* We use two numbers to track the order that transactions commit. Before
|
||||
* commit, a transaction is marked as prepared, and prepareSeqNo is set.
|
||||
* Shortly after commit, it's marked as committed, and commitSeqNo is set.
|
||||
* This doesn't give a strict commit order, but these two values together
|
||||
* are good enough for us, as we can always err on the safe side and
|
||||
* assume that there's a conflict, if we can't be sure of the exact
|
||||
* ordering of two commits.
|
||||
*
|
||||
* Note that a transaction is marked as prepared for a short period during
|
||||
* commit processing, even if two-phase commit is not used. But with
|
||||
* two-phase commit, a transaction can stay in prepared state for some
|
||||
* time.
|
||||
*/
|
||||
SerCommitSeqNo prepareSeqNo;
|
||||
SerCommitSeqNo commitSeqNo;
|
||||
|
||||
/* these values are not both interesting at the same time */
|
||||
union
|
||||
{
|
||||
SerCommitSeqNo earliestOutConflictCommit; /* when committed with
|
||||
* conflict out */
|
||||
SerCommitSeqNo lastCommitBeforeSnapshot; /* when not committed or
|
||||
* no conflict out */
|
||||
} SeqNo;
|
||||
SHM_QUEUE outConflicts; /* list of write transactions whose data we
|
||||
* couldn't read. */
|
||||
SHM_QUEUE inConflicts; /* list of read transactions which couldn't
|
||||
* see our write. */
|
||||
SHM_QUEUE predicateLocks; /* list of associated PREDICATELOCK objects */
|
||||
SHM_QUEUE finishedLink; /* list link in
|
||||
* FinishedSerializableTransactions */
|
||||
|
||||
/*
|
||||
* for r/o transactions: list of concurrent r/w transactions that we could
|
||||
* potentially have conflicts with, and vice versa for r/w transactions
|
||||
*/
|
||||
SHM_QUEUE possibleUnsafeConflicts;
|
||||
|
||||
TransactionId topXid; /* top level xid for the transaction, if one
|
||||
* exists; else invalid */
|
||||
TransactionId finishedBefore; /* invalid means still running; else
|
||||
* the struct expires when no
|
||||
* serializable xids are before this. */
|
||||
TransactionId xmin; /* the transaction's snapshot xmin */
|
||||
uint32 flags; /* OR'd combination of values defined below */
|
||||
int pid; /* pid of associated process */
|
||||
} SERIALIZABLEXACT;
|
||||
|
||||
#define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
|
||||
#define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
|
||||
#define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
|
||||
#define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
|
||||
/*
|
||||
* The following flag actually means that the flagged transaction has a
|
||||
* conflict out *to a transaction which committed ahead of it*. It's hard
|
||||
* to get that into a name of a reasonable length.
|
||||
*/
|
||||
#define SXACT_FLAG_CONFLICT_OUT 0x00000010
|
||||
#define SXACT_FLAG_READ_ONLY 0x00000020
|
||||
#define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
|
||||
#define SXACT_FLAG_RO_SAFE 0x00000080
|
||||
#define SXACT_FLAG_RO_UNSAFE 0x00000100
|
||||
#define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
|
||||
#define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
|
||||
|
||||
/*
|
||||
* The following types are used to provide an ad hoc list for holding
|
||||
* SERIALIZABLEXACT objects. An HTAB is overkill, since there is no need to
|
||||
* access these by key -- there are direct pointers to these objects where
|
||||
* needed. If a shared memory list is created, these types can probably be
|
||||
* eliminated in favor of using the general solution.
|
||||
*/
|
||||
typedef struct PredXactListElementData
|
||||
{
|
||||
SHM_QUEUE link;
|
||||
SERIALIZABLEXACT sxact;
|
||||
} PredXactListElementData;
|
||||
|
||||
typedef struct PredXactListElementData *PredXactListElement;
|
||||
|
||||
#define PredXactListElementDataSize \
|
||||
((Size)MAXALIGN(sizeof(PredXactListElementData)))
|
||||
|
||||
typedef struct PredXactListData
|
||||
{
|
||||
SHM_QUEUE availableList;
|
||||
SHM_QUEUE activeList;
|
||||
|
||||
/*
|
||||
* These global variables are maintained when registering and cleaning up
|
||||
* serializable transactions. They must be global across all backends,
|
||||
* but are not needed outside the predicate.c source file. Protected by
|
||||
* SerializableXactHashLock.
|
||||
*/
|
||||
TransactionId SxactGlobalXmin; /* global xmin for active serializable
|
||||
* transactions */
|
||||
int SxactGlobalXminCount; /* how many active serializable
|
||||
* transactions have this xmin */
|
||||
int WritableSxactCount; /* how many non-read-only serializable
|
||||
* transactions are active */
|
||||
SerCommitSeqNo LastSxactCommitSeqNo; /* a strictly monotonically
|
||||
* increasing number for
|
||||
* commits of serializable
|
||||
* transactions */
|
||||
/* Protected by SerializableXactHashLock. */
|
||||
SerCommitSeqNo CanPartialClearThrough; /* can clear predicate locks
|
||||
* and inConflicts for
|
||||
* committed transactions
|
||||
* through this seq no */
|
||||
/* Protected by SerializableFinishedListLock. */
|
||||
SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
|
||||
* seq no */
|
||||
SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
|
||||
|
||||
PredXactListElement element;
|
||||
} PredXactListData;
|
||||
|
||||
typedef struct PredXactListData *PredXactList;
|
||||
|
||||
#define PredXactListDataSize \
|
||||
((Size)MAXALIGN(sizeof(PredXactListData)))
|
||||
|
||||
|
||||
/*
|
||||
* The following types are used to provide lists of rw-conflicts between
|
||||
* pairs of transactions. Since exactly the same information is needed,
|
||||
* they are also used to record possible unsafe transaction relationships
|
||||
* for purposes of identifying safe snapshots for read-only transactions.
|
||||
*
|
||||
* When a RWConflictData is not in use to record either type of relationship
|
||||
* between a pair of transactions, it is kept on an "available" list. The
|
||||
* outLink field is used for maintaining that list.
|
||||
*/
|
||||
typedef struct RWConflictData
|
||||
{
|
||||
SHM_QUEUE outLink; /* link for list of conflicts out from a sxact */
|
||||
SHM_QUEUE inLink; /* link for list of conflicts in to a sxact */
|
||||
SERIALIZABLEXACT *sxactOut;
|
||||
SERIALIZABLEXACT *sxactIn;
|
||||
} RWConflictData;
|
||||
|
||||
typedef struct RWConflictData *RWConflict;
|
||||
|
||||
#define RWConflictDataSize \
|
||||
((Size)MAXALIGN(sizeof(RWConflictData)))
|
||||
|
||||
typedef struct RWConflictPoolHeaderData
|
||||
{
|
||||
SHM_QUEUE availableList;
|
||||
RWConflict element;
|
||||
} RWConflictPoolHeaderData;
|
||||
|
||||
typedef struct RWConflictPoolHeaderData *RWConflictPoolHeader;
|
||||
|
||||
#define RWConflictPoolHeaderDataSize \
|
||||
((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
|
||||
|
||||
|
||||
/*
|
||||
* The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
|
||||
* transaction or any of its subtransactions.
|
||||
*/
|
||||
typedef struct SERIALIZABLEXIDTAG
|
||||
{
|
||||
TransactionId xid;
|
||||
} SERIALIZABLEXIDTAG;
|
||||
|
||||
/*
|
||||
* The SERIALIZABLEXID struct provides a link from a TransactionId for a
|
||||
* serializable transaction to the related SERIALIZABLEXACT record, even if
|
||||
* the transaction has completed and its connection has been closed.
|
||||
*
|
||||
* These are created as new top level transaction IDs are first assigned to
|
||||
* transactions which are participating in predicate locking. This may
|
||||
* never happen for a particular transaction if it doesn't write anything.
|
||||
* They are removed with their related serializable transaction objects.
|
||||
*
|
||||
* The SubTransGetTopmostTransaction method is used where necessary to get
|
||||
* from an XID which might be from a subtransaction to the top level XID.
|
||||
*/
|
||||
typedef struct SERIALIZABLEXID
|
||||
{
|
||||
/* hash key */
|
||||
SERIALIZABLEXIDTAG tag;
|
||||
|
||||
/* data */
|
||||
SERIALIZABLEXACT *myXact; /* pointer to the top level transaction data */
|
||||
} SERIALIZABLEXID;
|
||||
|
||||
|
||||
/*
|
||||
* The PREDICATELOCKTARGETTAG struct identifies a database object which can
|
||||
* be the target of predicate locks.
|
||||
*
|
||||
* Note that the hash function being used doesn't properly respect tag
|
||||
* length -- it will go to a four byte boundary past the end of the tag.
|
||||
* If you change this struct, make sure any slack space is initialized,
|
||||
* so that any random bytes in the middle or at the end are not included
|
||||
* in the hash.
|
||||
*
|
||||
* TODO SSI: If we always use the same fields for the same type of value, we
|
||||
* should rename these. Holding off until it's clear there are no exceptions.
|
||||
* Since indexes are relations with blocks and tuples, it's looking likely that
|
||||
* the rename will be possible. If not, we may need to divide the last field
|
||||
* and use part of it for a target type, so that we know how to interpret the
|
||||
* data..
|
||||
*/
|
||||
typedef struct PREDICATELOCKTARGETTAG
|
||||
{
|
||||
uint32 locktag_field1; /* a 32-bit ID field */
|
||||
uint32 locktag_field2; /* a 32-bit ID field */
|
||||
uint32 locktag_field3; /* a 32-bit ID field */
|
||||
uint32 locktag_field4; /* a 32-bit ID field */
|
||||
uint32 locktag_field5; /* a 32-bit ID field */
|
||||
} PREDICATELOCKTARGETTAG;
|
||||
|
||||
/*
|
||||
* The PREDICATELOCKTARGET struct represents a database object on which there
|
||||
* are predicate locks.
|
||||
*
|
||||
* A hash list of these objects is maintained in shared memory. An entry is
|
||||
* added when a predicate lock is requested on an object which doesn't
|
||||
* already have one. An entry is removed when the last lock is removed from
|
||||
* its list.
|
||||
*
|
||||
* Because a particular target might become obsolete, due to update to a new
|
||||
* version, before the reading transaction is obsolete, we need some way to
|
||||
* prevent errors from reuse of a tuple ID. Rather than attempting to clean
|
||||
* up the targets as the related tuples are pruned or vacuumed, we check the
|
||||
* xmin on access. This should be far less costly.
|
||||
*/
|
||||
typedef struct PREDICATELOCKTARGET
|
||||
{
|
||||
/* hash key */
|
||||
PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
|
||||
|
||||
/* data */
|
||||
SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with
|
||||
* predicate lock target */
|
||||
} PREDICATELOCKTARGET;
|
||||
|
||||
|
||||
/*
|
||||
* The PREDICATELOCKTAG struct identifies an individual predicate lock.
|
||||
*
|
||||
* It is the combination of predicate lock target (which is a lockable
|
||||
* object) and a serializable transaction which has acquired a lock on that
|
||||
* target.
|
||||
*/
|
||||
typedef struct PREDICATELOCKTAG
|
||||
{
|
||||
PREDICATELOCKTARGET *myTarget;
|
||||
SERIALIZABLEXACT *myXact;
|
||||
} PREDICATELOCKTAG;
|
||||
|
||||
/*
|
||||
* The PREDICATELOCK struct represents an individual lock.
|
||||
*
|
||||
* An entry can be created here when the related database object is read, or
|
||||
* by promotion of multiple finer-grained targets. All entries related to a
|
||||
* serializable transaction are removed when that serializable transaction is
|
||||
* cleaned up. Entries can also be removed when they are combined into a
|
||||
* single coarser-grained lock entry.
|
||||
*/
|
||||
typedef struct PREDICATELOCK
|
||||
{
|
||||
/* hash key */
|
||||
PREDICATELOCKTAG tag; /* unique identifier of lock */
|
||||
|
||||
/* data */
|
||||
SHM_QUEUE targetLink; /* list link in PREDICATELOCKTARGET's list of
|
||||
* predicate locks */
|
||||
SHM_QUEUE xactLink; /* list link in SERIALIZABLEXACT's list of
|
||||
* predicate locks */
|
||||
SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
|
||||
} PREDICATELOCK;
|
||||
|
||||
|
||||
/*
|
||||
* The LOCALPREDICATELOCK struct represents a local copy of data which is
|
||||
* also present in the PREDICATELOCK table, organized for fast access without
|
||||
* needing to acquire a LWLock. It is strictly for optimization.
|
||||
*
|
||||
* Each serializable transaction creates its own local hash table to hold a
|
||||
* collection of these. This information is used to determine when a number
|
||||
* of fine-grained locks should be promoted to a single coarser-grained lock.
|
||||
* The information is maintained more-or-less in parallel to the
|
||||
* PREDICATELOCK data, but because this data is not protected by locks and is
|
||||
* only used in an optimization heuristic, it is allowed to drift in a few
|
||||
* corner cases where maintaining exact data would be expensive.
|
||||
*
|
||||
* The hash table is created when the serializable transaction acquires its
|
||||
* snapshot, and its memory is released upon completion of the transaction.
|
||||
*/
|
||||
typedef struct LOCALPREDICATELOCK
|
||||
{
|
||||
/* hash key */
|
||||
PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
|
||||
|
||||
/* data */
|
||||
bool held; /* is lock held, or just its children? */
|
||||
int childLocks; /* number of child locks currently held */
|
||||
} LOCALPREDICATELOCK;
|
||||
|
||||
|
||||
/*
|
||||
* The types of predicate locks which can be acquired.
|
||||
*/
|
||||
typedef enum PredicateLockTargetType
|
||||
{
|
||||
PREDLOCKTAG_RELATION,
|
||||
PREDLOCKTAG_PAGE,
|
||||
PREDLOCKTAG_TUPLE
|
||||
/* TODO SSI: Other types may be needed for index locking */
|
||||
} PredicateLockTargetType;
|
||||
|
||||
|
||||
/*
|
||||
* This structure is used to quickly capture a copy of all predicate
|
||||
* locks. This is currently used only by the pg_lock_status function,
|
||||
* which in turn is used by the pg_locks view.
|
||||
*/
|
||||
typedef struct PredicateLockData
|
||||
{
|
||||
int nelements;
|
||||
PREDICATELOCKTARGETTAG *locktags;
|
||||
SERIALIZABLEXACT *xacts;
|
||||
} PredicateLockData;
|
||||
|
||||
|
||||
/*
|
||||
* These macros define how we map logical IDs of lockable objects into the
|
||||
* physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
|
||||
* rather than accessing the fields directly. Note multiple eval of target!
|
||||
*/
|
||||
#define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
|
||||
((locktag).locktag_field1 = (dboid), \
|
||||
(locktag).locktag_field2 = (reloid), \
|
||||
(locktag).locktag_field3 = InvalidBlockNumber, \
|
||||
(locktag).locktag_field4 = InvalidOffsetNumber, \
|
||||
(locktag).locktag_field5 = InvalidTransactionId)
|
||||
|
||||
#define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
|
||||
((locktag).locktag_field1 = (dboid), \
|
||||
(locktag).locktag_field2 = (reloid), \
|
||||
(locktag).locktag_field3 = (blocknum), \
|
||||
(locktag).locktag_field4 = InvalidOffsetNumber, \
|
||||
(locktag).locktag_field5 = InvalidTransactionId)
|
||||
|
||||
#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum,xmin) \
|
||||
((locktag).locktag_field1 = (dboid), \
|
||||
(locktag).locktag_field2 = (reloid), \
|
||||
(locktag).locktag_field3 = (blocknum), \
|
||||
(locktag).locktag_field4 = (offnum), \
|
||||
(locktag).locktag_field5 = (xmin))
|
||||
|
||||
#define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
|
||||
((Oid) (locktag).locktag_field1)
|
||||
#define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
|
||||
((Oid) (locktag).locktag_field2)
|
||||
#define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
|
||||
((BlockNumber) (locktag).locktag_field3)
|
||||
#define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
|
||||
((OffsetNumber) (locktag).locktag_field4)
|
||||
#define GET_PREDICATELOCKTARGETTAG_XMIN(locktag) \
|
||||
((TransactionId) (locktag).locktag_field5)
|
||||
#define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
|
||||
(((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
|
||||
(((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
|
||||
PREDLOCKTAG_RELATION))
|
||||
|
||||
/*
|
||||
* Two-phase commit statefile records. There are two types: for each
|
||||
* transaction, we generate one per-transaction record and a variable
|
||||
* number of per-predicate-lock records.
|
||||
*/
|
||||
typedef enum TwoPhasePredicateRecordType
|
||||
{
|
||||
TWOPHASEPREDICATERECORD_XACT,
|
||||
TWOPHASEPREDICATERECORD_LOCK
|
||||
} TwoPhasePredicateRecordType;
|
||||
|
||||
/*
|
||||
* Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
|
||||
* much is needed because most of it not meaningful for a recovered
|
||||
* prepared transaction.
|
||||
*
|
||||
* In particular, we do not record the in and out conflict lists for a
|
||||
* prepared transaction because the associated SERIALIZABLEXACTs will
|
||||
* not be available after recovery. Instead, we simply record the
|
||||
* existence of each type of conflict by setting the transaction's
|
||||
* summary conflict in/out flag.
|
||||
*/
|
||||
typedef struct TwoPhasePredicateXactRecord
|
||||
{
|
||||
TransactionId xmin;
|
||||
uint32 flags;
|
||||
} TwoPhasePredicateXactRecord;
|
||||
|
||||
/* Per-lock state */
|
||||
typedef struct TwoPhasePredicateLockRecord
|
||||
{
|
||||
PREDICATELOCKTARGETTAG target;
|
||||
} TwoPhasePredicateLockRecord;
|
||||
|
||||
typedef struct TwoPhasePredicateRecord
|
||||
{
|
||||
TwoPhasePredicateRecordType type;
|
||||
union
|
||||
{
|
||||
TwoPhasePredicateXactRecord xactRecord;
|
||||
TwoPhasePredicateLockRecord lockRecord;
|
||||
} data;
|
||||
} TwoPhasePredicateRecord;
|
||||
|
||||
/*
|
||||
* Define a macro to use for an "empty" SERIALIZABLEXACT reference.
|
||||
*/
|
||||
#define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
|
||||
|
||||
|
||||
/*
|
||||
* Function definitions for functions needing awareness of predicate
|
||||
* locking internals.
|
||||
*/
|
||||
extern PredicateLockData *GetPredicateLockStatusData(void);
|
||||
|
||||
|
||||
#endif /* PREDICATE_INTERNALS_H */
|
||||
264
pg_include/storage/proc.h
Executable file
264
pg_include/storage/proc.h
Executable file
@@ -0,0 +1,264 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* proc.h
|
||||
* per-process shared memory data structures
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/proc.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef _PROC_H_
|
||||
#define _PROC_H_
|
||||
|
||||
#include "access/xlog.h"
|
||||
#include "datatype/timestamp.h"
|
||||
#include "storage/latch.h"
|
||||
#include "storage/lock.h"
|
||||
#include "storage/pg_sema.h"
|
||||
|
||||
/*
|
||||
* Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds
|
||||
* for non-aborted subtransactions of its current top transaction. These
|
||||
* have to be treated as running XIDs by other backends.
|
||||
*
|
||||
* We also keep track of whether the cache overflowed (ie, the transaction has
|
||||
* generated at least one subtransaction that didn't fit in the cache).
|
||||
* If none of the caches have overflowed, we can assume that an XID that's not
|
||||
* listed anywhere in the PGPROC array is not a running transaction. Else we
|
||||
* have to look at pg_subtrans.
|
||||
*/
|
||||
#define PGPROC_MAX_CACHED_SUBXIDS 64 /* XXX guessed-at value */
|
||||
|
||||
struct XidCache
|
||||
{
|
||||
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS];
|
||||
};
|
||||
|
||||
/* Flags for PGXACT->vacuumFlags */
|
||||
#define PROC_IS_AUTOVACUUM 0x01 /* is it an autovac worker? */
|
||||
#define PROC_IN_VACUUM 0x02 /* currently running lazy vacuum */
|
||||
#define PROC_IN_ANALYZE 0x04 /* currently running analyze */
|
||||
#define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */
|
||||
|
||||
/* flags reset at EOXact */
|
||||
#define PROC_VACUUM_STATE_MASK (0x0E)
|
||||
|
||||
/*
|
||||
* We allow a small number of "weak" relation locks (AccesShareLock,
|
||||
* RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure
|
||||
* rather than the main lock table. This eases contention on the lock
|
||||
* manager LWLocks. See storage/lmgr/README for additional details.
|
||||
*/
|
||||
#define FP_LOCK_SLOTS_PER_BACKEND 16
|
||||
|
||||
/*
|
||||
* Each backend has a PGPROC struct in shared memory. There is also a list of
|
||||
* currently-unused PGPROC structs that will be reallocated to new backends.
|
||||
*
|
||||
* links: list link for any list the PGPROC is in. When waiting for a lock,
|
||||
* the PGPROC is linked into that lock's waitProcs queue. A recycled PGPROC
|
||||
* is linked into ProcGlobal's freeProcs list.
|
||||
*
|
||||
* Note: twophase.c also sets up a dummy PGPROC struct for each currently
|
||||
* prepared transaction. These PGPROCs appear in the ProcArray data structure
|
||||
* so that the prepared transactions appear to be still running and are
|
||||
* correctly shown as holding locks. A prepared transaction PGPROC can be
|
||||
* distinguished from a real one at need by the fact that it has pid == 0.
|
||||
* The semaphore and lock-activity fields in a prepared-xact PGPROC are unused,
|
||||
* but its myProcLocks[] lists are valid.
|
||||
*/
|
||||
struct PGPROC
|
||||
{
|
||||
/* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */
|
||||
SHM_QUEUE links; /* list link if process is in a list */
|
||||
|
||||
PGSemaphoreData sem; /* ONE semaphore to sleep on */
|
||||
int waitStatus; /* STATUS_WAITING, STATUS_OK or STATUS_ERROR */
|
||||
|
||||
Latch procLatch; /* generic latch for process */
|
||||
|
||||
LocalTransactionId lxid; /* local id of top-level transaction currently
|
||||
* being executed by this proc, if running;
|
||||
* else InvalidLocalTransactionId */
|
||||
int pid; /* Backend's process ID; 0 if prepared xact */
|
||||
int pgprocno;
|
||||
|
||||
/* These fields are zero while a backend is still starting up: */
|
||||
BackendId backendId; /* This backend's backend ID (if assigned) */
|
||||
Oid databaseId; /* OID of database this backend is using */
|
||||
Oid roleId; /* OID of role using this backend */
|
||||
|
||||
/*
|
||||
* While in hot standby mode, shows that a conflict signal has been sent
|
||||
* for the current transaction. Set/cleared while holding ProcArrayLock,
|
||||
* though not required. Accessed without lock, if needed.
|
||||
*/
|
||||
bool recoveryConflictPending;
|
||||
|
||||
/* Info about LWLock the process is currently waiting for, if any. */
|
||||
bool lwWaiting; /* true if waiting for an LW lock */
|
||||
uint8 lwWaitMode; /* lwlock mode being waited for */
|
||||
struct PGPROC *lwWaitLink; /* next waiter for same LW lock */
|
||||
|
||||
/* Info about lock the process is currently waiting for, if any. */
|
||||
/* waitLock and waitProcLock are NULL if not currently waiting. */
|
||||
LOCK *waitLock; /* Lock object we're sleeping on ... */
|
||||
PROCLOCK *waitProcLock; /* Per-holder info for awaited lock */
|
||||
LOCKMODE waitLockMode; /* type of lock we're waiting for */
|
||||
LOCKMASK heldLocks; /* bitmask for lock types already held on this
|
||||
* lock object by this backend */
|
||||
|
||||
/*
|
||||
* Info to allow us to wait for synchronous replication, if needed.
|
||||
* waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend.
|
||||
* syncRepState must not be touched except by owning process or WALSender.
|
||||
* syncRepLinks used only while holding SyncRepLock.
|
||||
*/
|
||||
XLogRecPtr waitLSN; /* waiting for this LSN or higher */
|
||||
int syncRepState; /* wait state for sync rep */
|
||||
SHM_QUEUE syncRepLinks; /* list link if process is in syncrep queue */
|
||||
|
||||
/*
|
||||
* All PROCLOCK objects for locks held or awaited by this backend are
|
||||
* linked into one of these lists, according to the partition number of
|
||||
* their lock.
|
||||
*/
|
||||
SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS];
|
||||
|
||||
struct XidCache subxids; /* cache for subtransaction XIDs */
|
||||
|
||||
/* Per-backend LWLock. Protects fields below. */
|
||||
LWLockId backendLock; /* protects the fields below */
|
||||
|
||||
/* Lock manager data, recording fast-path locks taken by this backend. */
|
||||
uint64 fpLockBits; /* lock modes held for each fast-path slot */
|
||||
Oid fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */
|
||||
bool fpVXIDLock; /* are we holding a fast-path VXID lock? */
|
||||
LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID
|
||||
* lock */
|
||||
};
|
||||
|
||||
/* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
|
||||
|
||||
|
||||
extern PGDLLIMPORT PGPROC *MyProc;
|
||||
extern PGDLLIMPORT struct PGXACT *MyPgXact;
|
||||
|
||||
/*
|
||||
* Prior to PostgreSQL 9.2, the fields below were stored as part of the
|
||||
* PGPROC. However, benchmarking revealed that packing these particular
|
||||
* members into a separate array as tightly as possible sped up GetSnapshotData
|
||||
* considerably on systems with many CPU cores, by reducing the number of
|
||||
* cache lines needing to be fetched. Thus, think very carefully before adding
|
||||
* anything else here.
|
||||
*/
|
||||
typedef struct PGXACT
|
||||
{
|
||||
TransactionId xid; /* id of top-level transaction currently being
|
||||
* executed by this proc, if running and XID
|
||||
* is assigned; else InvalidTransactionId */
|
||||
|
||||
TransactionId xmin; /* minimal running XID as it was when we were
|
||||
* starting our xact, excluding LAZY VACUUM:
|
||||
* vacuum must not remove tuples deleted by
|
||||
* xid >= xmin ! */
|
||||
|
||||
uint8 vacuumFlags; /* vacuum-related flags, see above */
|
||||
bool overflowed;
|
||||
bool inCommit; /* true if within commit critical section */
|
||||
|
||||
uint8 nxids;
|
||||
} PGXACT;
|
||||
|
||||
/*
|
||||
* There is one ProcGlobal struct for the whole database cluster.
|
||||
*/
|
||||
typedef struct PROC_HDR
|
||||
{
|
||||
/* Array of PGPROC structures (not including dummies for prepared txns) */
|
||||
PGPROC *allProcs;
|
||||
/* Array of PGXACT structures (not including dummies for prepared txns) */
|
||||
PGXACT *allPgXact;
|
||||
/* Length of allProcs array */
|
||||
uint32 allProcCount;
|
||||
/* Head of list of free PGPROC structures */
|
||||
PGPROC *freeProcs;
|
||||
/* Head of list of autovacuum's free PGPROC structures */
|
||||
PGPROC *autovacFreeProcs;
|
||||
/* WALWriter process's latch */
|
||||
Latch *walwriterLatch;
|
||||
/* Checkpointer process's latch */
|
||||
Latch *checkpointerLatch;
|
||||
/* Current shared estimate of appropriate spins_per_delay value */
|
||||
int spins_per_delay;
|
||||
/* The proc of the Startup process, since not in ProcArray */
|
||||
PGPROC *startupProc;
|
||||
int startupProcPid;
|
||||
/* Buffer id of the buffer that Startup process waits for pin on, or -1 */
|
||||
int startupBufferPinWaitBufId;
|
||||
} PROC_HDR;
|
||||
|
||||
extern PROC_HDR *ProcGlobal;
|
||||
|
||||
extern PGPROC *PreparedXactProcs;
|
||||
|
||||
/*
|
||||
* We set aside some extra PGPROC structures for auxiliary processes,
|
||||
* ie things that aren't full-fledged backends but need shmem access.
|
||||
*
|
||||
* Background writer, checkpointer and WAL writer run during normal operation.
|
||||
* Startup process and WAL receiver also consume 2 slots, but WAL writer is
|
||||
* launched only after startup has exited, so we only need 4 slots.
|
||||
*/
|
||||
#define NUM_AUXILIARY_PROCS 4
|
||||
|
||||
|
||||
/* configurable options */
|
||||
extern int DeadlockTimeout;
|
||||
extern int StatementTimeout;
|
||||
extern bool log_lock_waits;
|
||||
|
||||
extern volatile bool cancel_from_timeout;
|
||||
|
||||
|
||||
/*
|
||||
* Function Prototypes
|
||||
*/
|
||||
extern int ProcGlobalSemas(void);
|
||||
extern Size ProcGlobalShmemSize(void);
|
||||
extern void InitProcGlobal(void);
|
||||
extern void InitProcess(void);
|
||||
extern void InitProcessPhase2(void);
|
||||
extern void InitAuxiliaryProcess(void);
|
||||
|
||||
extern void PublishStartupProcessInformation(void);
|
||||
extern void SetStartupBufferPinWaitBufId(int bufid);
|
||||
extern int GetStartupBufferPinWaitBufId(void);
|
||||
|
||||
extern bool HaveNFreeProcs(int n);
|
||||
extern void ProcReleaseLocks(bool isCommit);
|
||||
|
||||
extern void ProcQueueInit(PROC_QUEUE *queue);
|
||||
extern int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
|
||||
extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
|
||||
extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
|
||||
extern bool IsWaitingForLock(void);
|
||||
extern void LockErrorCleanup(void);
|
||||
|
||||
extern void ProcWaitForSignal(void);
|
||||
extern void ProcSendSignal(int pid);
|
||||
|
||||
extern bool enable_sig_alarm(int delayms, bool is_statement_timeout);
|
||||
extern bool disable_sig_alarm(bool is_statement_timeout);
|
||||
extern void handle_sig_alarm(SIGNAL_ARGS);
|
||||
|
||||
extern bool enable_standby_sig_alarm(TimestampTz now,
|
||||
TimestampTz fin_time, bool deadlock_only);
|
||||
extern bool disable_standby_sig_alarm(void);
|
||||
extern void handle_standby_sig_alarm(SIGNAL_ARGS);
|
||||
|
||||
#endif /* PROC_H */
|
||||
79
pg_include/storage/procarray.h
Executable file
79
pg_include/storage/procarray.h
Executable file
@@ -0,0 +1,79 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* procarray.h
|
||||
* POSTGRES process array definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/procarray.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PROCARRAY_H
|
||||
#define PROCARRAY_H
|
||||
|
||||
#include "storage/standby.h"
|
||||
#include "utils/snapshot.h"
|
||||
|
||||
|
||||
extern Size ProcArrayShmemSize(void);
|
||||
extern void CreateSharedProcArray(void);
|
||||
extern void ProcArrayAdd(PGPROC *proc);
|
||||
extern void ProcArrayRemove(PGPROC *proc, TransactionId latestXid);
|
||||
|
||||
extern void ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid);
|
||||
extern void ProcArrayClearTransaction(PGPROC *proc);
|
||||
|
||||
extern void ProcArrayApplyRecoveryInfo(RunningTransactions running);
|
||||
extern void ProcArrayApplyXidAssignment(TransactionId topxid,
|
||||
int nsubxids, TransactionId *subxids);
|
||||
|
||||
extern void RecordKnownAssignedTransactionIds(TransactionId xid);
|
||||
extern void ExpireTreeKnownAssignedTransactionIds(TransactionId xid,
|
||||
int nsubxids, TransactionId *subxids,
|
||||
TransactionId max_xid);
|
||||
extern void ExpireAllKnownAssignedTransactionIds(void);
|
||||
extern void ExpireOldKnownAssignedTransactionIds(TransactionId xid);
|
||||
|
||||
extern int GetMaxSnapshotXidCount(void);
|
||||
extern int GetMaxSnapshotSubxidCount(void);
|
||||
|
||||
extern Snapshot GetSnapshotData(Snapshot snapshot);
|
||||
|
||||
extern bool ProcArrayInstallImportedXmin(TransactionId xmin,
|
||||
TransactionId sourcexid);
|
||||
|
||||
extern RunningTransactions GetRunningTransactionData(void);
|
||||
|
||||
extern bool TransactionIdIsInProgress(TransactionId xid);
|
||||
extern bool TransactionIdIsActive(TransactionId xid);
|
||||
extern TransactionId GetOldestXmin(bool allDbs, bool ignoreVacuum);
|
||||
extern TransactionId GetOldestActiveTransactionId(void);
|
||||
|
||||
extern int GetTransactionsInCommit(TransactionId **xids_p);
|
||||
extern bool HaveTransactionsInCommit(TransactionId *xids, int nxids);
|
||||
|
||||
extern PGPROC *BackendPidGetProc(int pid);
|
||||
extern int BackendXidGetPid(TransactionId xid);
|
||||
extern bool IsBackendPid(int pid);
|
||||
|
||||
extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin,
|
||||
bool excludeXmin0, bool allDbs, int excludeVacuum,
|
||||
int *nvxids);
|
||||
extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid);
|
||||
extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode);
|
||||
|
||||
extern bool MinimumActiveBackends(int min);
|
||||
extern int CountDBBackends(Oid databaseid);
|
||||
extern void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending);
|
||||
extern int CountUserBackends(Oid roleid);
|
||||
extern bool CountOtherDBBackends(Oid databaseId,
|
||||
int *nbackends, int *nprepared);
|
||||
|
||||
extern void XidCacheRemoveRunningXids(TransactionId xid,
|
||||
int nxids, const TransactionId *xids,
|
||||
TransactionId latestXid);
|
||||
|
||||
#endif /* PROCARRAY_H */
|
||||
58
pg_include/storage/procsignal.h
Executable file
58
pg_include/storage/procsignal.h
Executable file
@@ -0,0 +1,58 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* procsignal.h
|
||||
* Routines for interprocess signalling
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/procsignal.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PROCSIGNAL_H
|
||||
#define PROCSIGNAL_H
|
||||
|
||||
#include "storage/backendid.h"
|
||||
|
||||
|
||||
/*
|
||||
* Reasons for signalling a Postgres child process (a backend or an auxiliary
|
||||
* process, like checkpointer). We can cope with concurrent signals for different
|
||||
* reasons. However, if the same reason is signaled multiple times in quick
|
||||
* succession, the process is likely to observe only one notification of it.
|
||||
* This is okay for the present uses.
|
||||
*
|
||||
* Also, because of race conditions, it's important that all the signals be
|
||||
* defined so that no harm is done if a process mistakenly receives one.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
PROCSIG_CATCHUP_INTERRUPT, /* sinval catchup interrupt */
|
||||
PROCSIG_NOTIFY_INTERRUPT, /* listen/notify interrupt */
|
||||
|
||||
/* Recovery conflict reasons */
|
||||
PROCSIG_RECOVERY_CONFLICT_DATABASE,
|
||||
PROCSIG_RECOVERY_CONFLICT_TABLESPACE,
|
||||
PROCSIG_RECOVERY_CONFLICT_LOCK,
|
||||
PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
|
||||
PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
|
||||
PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
|
||||
|
||||
NUM_PROCSIGNALS /* Must be last! */
|
||||
} ProcSignalReason;
|
||||
|
||||
/*
|
||||
* prototypes for functions in procsignal.c
|
||||
*/
|
||||
extern Size ProcSignalShmemSize(void);
|
||||
extern void ProcSignalShmemInit(void);
|
||||
|
||||
extern void ProcSignalInit(int pss_idx);
|
||||
extern int SendProcSignal(pid_t pid, ProcSignalReason reason,
|
||||
BackendId backendId);
|
||||
|
||||
extern void procsignal_sigusr1_handler(SIGNAL_ARGS);
|
||||
|
||||
#endif /* PROCSIGNAL_H */
|
||||
23
pg_include/storage/reinit.h
Executable file
23
pg_include/storage/reinit.h
Executable file
@@ -0,0 +1,23 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* reinit.h
|
||||
* Reinitialization of unlogged relations
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/fd.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef REINIT_H
|
||||
#define REINIT_H
|
||||
|
||||
extern void ResetUnloggedRelations(int op);
|
||||
|
||||
#define UNLOGGED_RELATION_CLEANUP 0x0001
|
||||
#define UNLOGGED_RELATION_INIT 0x0002
|
||||
|
||||
#endif /* REINIT_H */
|
||||
119
pg_include/storage/relfilenode.h
Executable file
119
pg_include/storage/relfilenode.h
Executable file
@@ -0,0 +1,119 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* relfilenode.h
|
||||
* Physical access information for relations.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/relfilenode.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef RELFILENODE_H
|
||||
#define RELFILENODE_H
|
||||
|
||||
#include "storage/backendid.h"
|
||||
|
||||
/*
|
||||
* The physical storage of a relation consists of one or more forks. The
|
||||
* main fork is always created, but in addition to that there can be
|
||||
* additional forks for storing various metadata. ForkNumber is used when
|
||||
* we need to refer to a specific fork in a relation.
|
||||
*/
|
||||
typedef enum ForkNumber
|
||||
{
|
||||
InvalidForkNumber = -1,
|
||||
MAIN_FORKNUM = 0,
|
||||
FSM_FORKNUM,
|
||||
VISIBILITYMAP_FORKNUM,
|
||||
INIT_FORKNUM
|
||||
|
||||
/*
|
||||
* NOTE: if you add a new fork, change MAX_FORKNUM below and update the
|
||||
* forkNames array in catalog.c
|
||||
*/
|
||||
} ForkNumber;
|
||||
|
||||
#define MAX_FORKNUM INIT_FORKNUM
|
||||
|
||||
/*
|
||||
* RelFileNode must provide all that we need to know to physically access
|
||||
* a relation, with the exception of the backend ID, which can be provided
|
||||
* separately. Note, however, that a "physical" relation is comprised of
|
||||
* multiple files on the filesystem, as each fork is stored as a separate
|
||||
* file, and each fork can be divided into multiple segments. See md.c.
|
||||
*
|
||||
* spcNode identifies the tablespace of the relation. It corresponds to
|
||||
* pg_tablespace.oid.
|
||||
*
|
||||
* dbNode identifies the database of the relation. It is zero for
|
||||
* "shared" relations (those common to all databases of a cluster).
|
||||
* Nonzero dbNode values correspond to pg_database.oid.
|
||||
*
|
||||
* relNode identifies the specific relation. relNode corresponds to
|
||||
* pg_class.relfilenode (NOT pg_class.oid, because we need to be able
|
||||
* to assign new physical files to relations in some situations).
|
||||
* Notice that relNode is only unique within a particular database.
|
||||
*
|
||||
* Note: spcNode must be GLOBALTABLESPACE_OID if and only if dbNode is
|
||||
* zero. We support shared relations only in the "global" tablespace.
|
||||
*
|
||||
* Note: in pg_class we allow reltablespace == 0 to denote that the
|
||||
* relation is stored in its database's "default" tablespace (as
|
||||
* identified by pg_database.dattablespace). However this shorthand
|
||||
* is NOT allowed in RelFileNode structs --- the real tablespace ID
|
||||
* must be supplied when setting spcNode.
|
||||
*
|
||||
* Note: in pg_class, relfilenode can be zero to denote that the relation
|
||||
* is a "mapped" relation, whose current true filenode number is available
|
||||
* from relmapper.c. Again, this case is NOT allowed in RelFileNodes.
|
||||
*
|
||||
* Note: various places use RelFileNode in hashtable keys. Therefore,
|
||||
* there *must not* be any unused padding bytes in this struct. That
|
||||
* should be safe as long as all the fields are of type Oid.
|
||||
*/
|
||||
typedef struct RelFileNode
|
||||
{
|
||||
Oid spcNode; /* tablespace */
|
||||
Oid dbNode; /* database */
|
||||
Oid relNode; /* relation */
|
||||
} RelFileNode;
|
||||
|
||||
/*
|
||||
* Augmenting a relfilenode with the backend ID provides all the information
|
||||
* we need to locate the physical storage. The backend ID is InvalidBackendId
|
||||
* for regular relations (those accessible to more than one backend), or the
|
||||
* owning backend's ID for backend-local relations. Backend-local relations
|
||||
* are always transient and removed in case of a database crash; they are
|
||||
* never WAL-logged or fsync'd.
|
||||
*/
|
||||
typedef struct RelFileNodeBackend
|
||||
{
|
||||
RelFileNode node;
|
||||
BackendId backend;
|
||||
} RelFileNodeBackend;
|
||||
|
||||
#define RelFileNodeBackendIsTemp(rnode) \
|
||||
((rnode).backend != InvalidBackendId)
|
||||
|
||||
/*
|
||||
* Note: RelFileNodeEquals and RelFileNodeBackendEquals compare relNode first
|
||||
* since that is most likely to be different in two unequal RelFileNodes. It
|
||||
* is probably redundant to compare spcNode if the other fields are found equal,
|
||||
* but do it anyway to be sure. Likewise for checking the backend ID in
|
||||
* RelFileNodeBackendEquals.
|
||||
*/
|
||||
#define RelFileNodeEquals(node1, node2) \
|
||||
((node1).relNode == (node2).relNode && \
|
||||
(node1).dbNode == (node2).dbNode && \
|
||||
(node1).spcNode == (node2).spcNode)
|
||||
|
||||
#define RelFileNodeBackendEquals(node1, node2) \
|
||||
((node1).node.relNode == (node2).node.relNode && \
|
||||
(node1).node.dbNode == (node2).node.dbNode && \
|
||||
(node1).backend == (node2).backend && \
|
||||
(node1).node.spcNode == (node2).node.spcNode)
|
||||
|
||||
#endif /* RELFILENODE_H */
|
||||
1026
pg_include/storage/s_lock.h
Executable file
1026
pg_include/storage/s_lock.h
Executable file
File diff suppressed because it is too large
Load Diff
78
pg_include/storage/shmem.h
Executable file
78
pg_include/storage/shmem.h
Executable file
@@ -0,0 +1,78 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* shmem.h
|
||||
* shared memory management structures
|
||||
*
|
||||
* Historical note:
|
||||
* A long time ago, Postgres' shared memory region was allowed to be mapped
|
||||
* at a different address in each process, and shared memory "pointers" were
|
||||
* passed around as offsets relative to the start of the shared memory region.
|
||||
* That is no longer the case: each process must map the shared memory region
|
||||
* at the same address. This means shared memory pointers can be passed
|
||||
* around directly between different processes.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/shmem.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef SHMEM_H
|
||||
#define SHMEM_H
|
||||
|
||||
#include "utils/hsearch.h"
|
||||
|
||||
|
||||
/* shmqueue.c */
|
||||
typedef struct SHM_QUEUE
|
||||
{
|
||||
struct SHM_QUEUE *prev;
|
||||
struct SHM_QUEUE *next;
|
||||
} SHM_QUEUE;
|
||||
|
||||
/* shmem.c */
|
||||
extern void InitShmemAccess(void *seghdr);
|
||||
extern void InitShmemAllocation(void);
|
||||
extern void *ShmemAlloc(Size size);
|
||||
extern bool ShmemAddrIsValid(const void *addr);
|
||||
extern void InitShmemIndex(void);
|
||||
extern HTAB *ShmemInitHash(const char *name, long init_size, long max_size,
|
||||
HASHCTL *infoP, int hash_flags);
|
||||
extern void *ShmemInitStruct(const char *name, Size size, bool *foundPtr);
|
||||
extern Size add_size(Size s1, Size s2);
|
||||
extern Size mul_size(Size s1, Size s2);
|
||||
|
||||
/* ipci.c */
|
||||
extern void RequestAddinShmemSpace(Size size);
|
||||
|
||||
/* size constants for the shmem index table */
|
||||
/* max size of data structure string name */
|
||||
#define SHMEM_INDEX_KEYSIZE (48)
|
||||
/* estimated size of the shmem index table (not a hard limit) */
|
||||
#define SHMEM_INDEX_SIZE (64)
|
||||
|
||||
/* this is a hash bucket in the shmem index table */
|
||||
typedef struct
|
||||
{
|
||||
char key[SHMEM_INDEX_KEYSIZE]; /* string name */
|
||||
void *location; /* location in shared mem */
|
||||
Size size; /* # bytes allocated for the structure */
|
||||
} ShmemIndexEnt;
|
||||
|
||||
/*
|
||||
* prototypes for functions in shmqueue.c
|
||||
*/
|
||||
extern void SHMQueueInit(SHM_QUEUE *queue);
|
||||
extern void SHMQueueElemInit(SHM_QUEUE *queue);
|
||||
extern void SHMQueueDelete(SHM_QUEUE *queue);
|
||||
extern void SHMQueueInsertBefore(SHM_QUEUE *queue, SHM_QUEUE *elem);
|
||||
extern void SHMQueueInsertAfter(SHM_QUEUE *queue, SHM_QUEUE *elem);
|
||||
extern Pointer SHMQueueNext(const SHM_QUEUE *queue, const SHM_QUEUE *curElem,
|
||||
Size linkOffset);
|
||||
extern Pointer SHMQueuePrev(const SHM_QUEUE *queue, const SHM_QUEUE *curElem,
|
||||
Size linkOffset);
|
||||
extern bool SHMQueueEmpty(const SHM_QUEUE *queue);
|
||||
extern bool SHMQueueIsDetached(const SHM_QUEUE *queue);
|
||||
|
||||
#endif /* SHMEM_H */
|
||||
139
pg_include/storage/sinval.h
Executable file
139
pg_include/storage/sinval.h
Executable file
@@ -0,0 +1,139 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* sinval.h
|
||||
* POSTGRES shared cache invalidation communication definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/sinval.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef SINVAL_H
|
||||
#define SINVAL_H
|
||||
|
||||
#include "storage/relfilenode.h"
|
||||
|
||||
|
||||
/*
|
||||
* We support several types of shared-invalidation messages:
|
||||
* * invalidate a specific tuple in a specific catcache
|
||||
* * invalidate all catcache entries from a given system catalog
|
||||
* * invalidate a relcache entry for a specific logical relation
|
||||
* * invalidate an smgr cache entry for a specific physical relation
|
||||
* * invalidate the mapped-relation mapping for a given database
|
||||
* More types could be added if needed. The message type is identified by
|
||||
* the first "int8" field of the message struct. Zero or positive means a
|
||||
* specific-catcache inval message (and also serves as the catcache ID field).
|
||||
* Negative values identify the other message types, as per codes below.
|
||||
*
|
||||
* Catcache inval events are initially driven by detecting tuple inserts,
|
||||
* updates and deletions in system catalogs (see CacheInvalidateHeapTuple).
|
||||
* An update can generate two inval events, one for the old tuple and one for
|
||||
* the new, but this is reduced to one event if the tuple's hash key doesn't
|
||||
* change. Note that the inval events themselves don't actually say whether
|
||||
* the tuple is being inserted or deleted. Also, since we transmit only a
|
||||
* hash key, there is a small risk of unnecessary invalidations due to chance
|
||||
* matches of hash keys.
|
||||
*
|
||||
* Note that some system catalogs have multiple caches on them (with different
|
||||
* indexes). On detecting a tuple invalidation in such a catalog, separate
|
||||
* catcache inval messages must be generated for each of its caches, since
|
||||
* the hash keys will generally be different.
|
||||
*
|
||||
* Catcache and relcache invalidations are transactional, and so are sent
|
||||
* to other backends upon commit. Internally to the generating backend,
|
||||
* they are also processed at CommandCounterIncrement so that later commands
|
||||
* in the same transaction see the new state. The generating backend also
|
||||
* has to process them at abort, to flush out any cache state it's loaded
|
||||
* from no-longer-valid entries.
|
||||
*
|
||||
* smgr and relation mapping invalidations are non-transactional: they are
|
||||
* sent immediately when the underlying file change is made.
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int8 id; /* cache ID --- must be first */
|
||||
Oid dbId; /* database ID, or 0 if a shared relation */
|
||||
uint32 hashValue; /* hash value of key for this catcache */
|
||||
} SharedInvalCatcacheMsg;
|
||||
|
||||
#define SHAREDINVALCATALOG_ID (-1)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int8 id; /* type field --- must be first */
|
||||
Oid dbId; /* database ID, or 0 if a shared catalog */
|
||||
Oid catId; /* ID of catalog whose contents are invalid */
|
||||
} SharedInvalCatalogMsg;
|
||||
|
||||
#define SHAREDINVALRELCACHE_ID (-2)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int8 id; /* type field --- must be first */
|
||||
Oid dbId; /* database ID, or 0 if a shared relation */
|
||||
Oid relId; /* relation ID */
|
||||
} SharedInvalRelcacheMsg;
|
||||
|
||||
#define SHAREDINVALSMGR_ID (-3)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* note: field layout chosen to pack into 16 bytes */
|
||||
int8 id; /* type field --- must be first */
|
||||
int8 backend_hi; /* high bits of backend ID, if temprel */
|
||||
uint16 backend_lo; /* low bits of backend ID, if temprel */
|
||||
RelFileNode rnode; /* spcNode, dbNode, relNode */
|
||||
} SharedInvalSmgrMsg;
|
||||
|
||||
#define SHAREDINVALRELMAP_ID (-4)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int8 id; /* type field --- must be first */
|
||||
Oid dbId; /* database ID, or 0 for shared catalogs */
|
||||
} SharedInvalRelmapMsg;
|
||||
|
||||
typedef union
|
||||
{
|
||||
int8 id; /* type field --- must be first */
|
||||
SharedInvalCatcacheMsg cc;
|
||||
SharedInvalCatalogMsg cat;
|
||||
SharedInvalRelcacheMsg rc;
|
||||
SharedInvalSmgrMsg sm;
|
||||
SharedInvalRelmapMsg rm;
|
||||
} SharedInvalidationMessage;
|
||||
|
||||
|
||||
/* Counter of messages processed; don't worry about overflow. */
|
||||
extern uint64 SharedInvalidMessageCounter;
|
||||
|
||||
|
||||
extern void SendSharedInvalidMessages(const SharedInvalidationMessage *msgs,
|
||||
int n);
|
||||
extern void ReceiveSharedInvalidMessages(
|
||||
void (*invalFunction) (SharedInvalidationMessage *msg),
|
||||
void (*resetFunction) (void));
|
||||
|
||||
/* signal handler for catchup events (PROCSIG_CATCHUP_INTERRUPT) */
|
||||
extern void HandleCatchupInterrupt(void);
|
||||
|
||||
/*
|
||||
* enable/disable processing of catchup events directly from signal handler.
|
||||
* The enable routine first performs processing of any catchup events that
|
||||
* have occurred since the last disable.
|
||||
*/
|
||||
extern void EnableCatchupInterrupt(void);
|
||||
extern bool DisableCatchupInterrupt(void);
|
||||
|
||||
extern int xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
|
||||
bool *RelcacheInitFileInval);
|
||||
extern void ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
|
||||
int nmsgs, bool RelcacheInitFileInval,
|
||||
Oid dbid, Oid tsid);
|
||||
|
||||
#endif /* SINVAL_H */
|
||||
42
pg_include/storage/sinvaladt.h
Executable file
42
pg_include/storage/sinvaladt.h
Executable file
@@ -0,0 +1,42 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* sinvaladt.h
|
||||
* POSTGRES shared cache invalidation data manager.
|
||||
*
|
||||
* The shared cache invalidation manager is responsible for transmitting
|
||||
* invalidation messages between backends. Any message sent by any backend
|
||||
* must be delivered to all already-running backends before it can be
|
||||
* forgotten. (If we run out of space, we instead deliver a "RESET"
|
||||
* message to backends that have fallen too far behind.)
|
||||
*
|
||||
* The struct type SharedInvalidationMessage, defining the contents of
|
||||
* a single message, is defined in sinval.h.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/sinvaladt.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef SINVALADT_H
|
||||
#define SINVALADT_H
|
||||
|
||||
#include "storage/proc.h"
|
||||
#include "storage/sinval.h"
|
||||
|
||||
/*
|
||||
* prototypes for functions in sinvaladt.c
|
||||
*/
|
||||
extern Size SInvalShmemSize(void);
|
||||
extern void CreateSharedInvalidationState(void);
|
||||
extern void SharedInvalBackendInit(bool sendOnly);
|
||||
extern PGPROC *BackendIdGetProc(int backendID);
|
||||
|
||||
extern void SIInsertDataEntries(const SharedInvalidationMessage *data, int n);
|
||||
extern int SIGetDataEntries(SharedInvalidationMessage *data, int datasize);
|
||||
extern void SICleanupQueue(bool callerHasWriteLock, int minFree);
|
||||
|
||||
extern LocalTransactionId GetNextLocalTransactionId(void);
|
||||
|
||||
#endif /* SINVALADT_H */
|
||||
143
pg_include/storage/smgr.h
Executable file
143
pg_include/storage/smgr.h
Executable file
@@ -0,0 +1,143 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* smgr.h
|
||||
* storage manager switch public interface declarations.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/smgr.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef SMGR_H
|
||||
#define SMGR_H
|
||||
|
||||
#include "fmgr.h"
|
||||
#include "storage/block.h"
|
||||
#include "storage/relfilenode.h"
|
||||
|
||||
|
||||
/*
|
||||
* smgr.c maintains a table of SMgrRelation objects, which are essentially
|
||||
* cached file handles. An SMgrRelation is created (if not already present)
|
||||
* by smgropen(), and destroyed by smgrclose(). Note that neither of these
|
||||
* operations imply I/O, they just create or destroy a hashtable entry.
|
||||
* (But smgrclose() may release associated resources, such as OS-level file
|
||||
* descriptors.)
|
||||
*
|
||||
* An SMgrRelation may have an "owner", which is just a pointer to it from
|
||||
* somewhere else; smgr.c will clear this pointer if the SMgrRelation is
|
||||
* closed. We use this to avoid dangling pointers from relcache to smgr
|
||||
* without having to make the smgr explicitly aware of relcache. There
|
||||
* can't be more than one "owner" pointer per SMgrRelation, but that's
|
||||
* all we need.
|
||||
*
|
||||
* SMgrRelations that do not have an "owner" are considered to be transient,
|
||||
* and are deleted at end of transaction.
|
||||
*/
|
||||
typedef struct SMgrRelationData
|
||||
{
|
||||
/* rnode is the hashtable lookup key, so it must be first! */
|
||||
RelFileNodeBackend smgr_rnode; /* relation physical identifier */
|
||||
|
||||
/* pointer to owning pointer, or NULL if none */
|
||||
struct SMgrRelationData **smgr_owner;
|
||||
|
||||
/*
|
||||
* These next three fields are not actually used or manipulated by smgr,
|
||||
* except that they are reset to InvalidBlockNumber upon a cache flush
|
||||
* event (in particular, upon truncation of the relation). Higher levels
|
||||
* store cached state here so that it will be reset when truncation
|
||||
* happens. In all three cases, InvalidBlockNumber means "unknown".
|
||||
*/
|
||||
BlockNumber smgr_targblock; /* current insertion target block */
|
||||
BlockNumber smgr_fsm_nblocks; /* last known size of fsm fork */
|
||||
BlockNumber smgr_vm_nblocks; /* last known size of vm fork */
|
||||
|
||||
/* additional public fields may someday exist here */
|
||||
|
||||
/*
|
||||
* Fields below here are intended to be private to smgr.c and its
|
||||
* submodules. Do not touch them from elsewhere.
|
||||
*/
|
||||
int smgr_which; /* storage manager selector */
|
||||
|
||||
/* for md.c; NULL for forks that are not open */
|
||||
struct _MdfdVec *md_fd[MAX_FORKNUM + 1];
|
||||
|
||||
/* if unowned, list link in list of all unowned SMgrRelations */
|
||||
struct SMgrRelationData *next_unowned_reln;
|
||||
} SMgrRelationData;
|
||||
|
||||
typedef SMgrRelationData *SMgrRelation;
|
||||
|
||||
#define SmgrIsTemp(smgr) \
|
||||
RelFileNodeBackendIsTemp((smgr)->smgr_rnode)
|
||||
|
||||
extern void smgrinit(void);
|
||||
extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend);
|
||||
extern bool smgrexists(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln);
|
||||
extern void smgrclose(SMgrRelation reln);
|
||||
extern void smgrcloseall(void);
|
||||
extern void smgrclosenode(RelFileNodeBackend rnode);
|
||||
extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
|
||||
extern void smgrdounlink(SMgrRelation reln, bool isRedo);
|
||||
extern void smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo);
|
||||
extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum);
|
||||
extern void smgrread(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer);
|
||||
extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void smgrtruncate(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber nblocks);
|
||||
extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void smgrpreckpt(void);
|
||||
extern void smgrsync(void);
|
||||
extern void smgrpostckpt(void);
|
||||
extern void AtEOXact_SMgr(void);
|
||||
|
||||
|
||||
/* internals: move me elsewhere -- ay 7/94 */
|
||||
|
||||
/* in md.c */
|
||||
extern void mdinit(void);
|
||||
extern void mdclose(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
|
||||
extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void mdunlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
|
||||
extern void mdextend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
extern void mdprefetch(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum);
|
||||
extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
extern void mdwrite(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void mdtruncate(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber nblocks);
|
||||
extern void mdimmedsync(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void mdpreckpt(void);
|
||||
extern void mdsync(void);
|
||||
extern void mdpostckpt(void);
|
||||
|
||||
extern void SetForwardFsyncRequests(void);
|
||||
extern void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum,
|
||||
BlockNumber segno);
|
||||
extern void ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum);
|
||||
extern void ForgetDatabaseFsyncRequests(Oid dbid);
|
||||
|
||||
/* smgrtype.c */
|
||||
extern Datum smgrout(PG_FUNCTION_ARGS);
|
||||
extern Datum smgrin(PG_FUNCTION_ARGS);
|
||||
extern Datum smgreq(PG_FUNCTION_ARGS);
|
||||
extern Datum smgrne(PG_FUNCTION_ARGS);
|
||||
|
||||
#endif /* SMGR_H */
|
||||
73
pg_include/storage/spin.h
Executable file
73
pg_include/storage/spin.h
Executable file
@@ -0,0 +1,73 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* spin.h
|
||||
* Hardware-independent implementation of spinlocks.
|
||||
*
|
||||
*
|
||||
* The hardware-independent interface to spinlocks is defined by the
|
||||
* typedef "slock_t" and these macros:
|
||||
*
|
||||
* void SpinLockInit(volatile slock_t *lock)
|
||||
* Initialize a spinlock (to the unlocked state).
|
||||
*
|
||||
* void SpinLockAcquire(volatile slock_t *lock)
|
||||
* Acquire a spinlock, waiting if necessary.
|
||||
* Time out and abort() if unable to acquire the lock in a
|
||||
* "reasonable" amount of time --- typically ~ 1 minute.
|
||||
*
|
||||
* void SpinLockRelease(volatile slock_t *lock)
|
||||
* Unlock a previously acquired lock.
|
||||
*
|
||||
* bool SpinLockFree(slock_t *lock)
|
||||
* Tests if the lock is free. Returns TRUE if free, FALSE if locked.
|
||||
* This does *not* change the state of the lock.
|
||||
*
|
||||
* Callers must beware that the macro argument may be evaluated multiple
|
||||
* times!
|
||||
*
|
||||
* CAUTION: Care must be taken to ensure that loads and stores of
|
||||
* shared memory values are not rearranged around spinlock acquire
|
||||
* and release. This is done using the "volatile" qualifier: the C
|
||||
* standard states that loads and stores of volatile objects cannot
|
||||
* be rearranged *with respect to other volatile objects*. The
|
||||
* spinlock is always written through a volatile pointer by the
|
||||
* spinlock macros, but this is not sufficient by itself: code that
|
||||
* protects shared data with a spinlock MUST reference that shared
|
||||
* data through a volatile pointer.
|
||||
*
|
||||
* Keep in mind the coding rule that spinlocks must not be held for more
|
||||
* than a few instructions. In particular, we assume it is not possible
|
||||
* for a CHECK_FOR_INTERRUPTS() to occur while holding a spinlock, and so
|
||||
* it is not necessary to do HOLD/RESUME_INTERRUPTS() in these macros.
|
||||
*
|
||||
* These macros are implemented in terms of hardware-dependent macros
|
||||
* supplied by s_lock.h. There is not currently any extra functionality
|
||||
* added by this header, but there has been in the past and may someday
|
||||
* be again.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/spin.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef SPIN_H
|
||||
#define SPIN_H
|
||||
|
||||
#include "storage/s_lock.h"
|
||||
|
||||
|
||||
#define SpinLockInit(lock) S_INIT_LOCK(lock)
|
||||
|
||||
#define SpinLockAcquire(lock) S_LOCK(lock)
|
||||
|
||||
#define SpinLockRelease(lock) S_UNLOCK(lock)
|
||||
|
||||
#define SpinLockFree(lock) S_LOCK_FREE(lock)
|
||||
|
||||
|
||||
extern int SpinlockSemas(void);
|
||||
|
||||
#endif /* SPIN_H */
|
||||
115
pg_include/storage/standby.h
Executable file
115
pg_include/storage/standby.h
Executable file
@@ -0,0 +1,115 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* standby.h
|
||||
* Definitions for hot standby mode.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/standby.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef STANDBY_H
|
||||
#define STANDBY_H
|
||||
|
||||
#include "access/xlog.h"
|
||||
#include "storage/lock.h"
|
||||
#include "storage/procsignal.h"
|
||||
#include "storage/relfilenode.h"
|
||||
|
||||
/* User-settable GUC parameters */
|
||||
extern int vacuum_defer_cleanup_age;
|
||||
extern int max_standby_archive_delay;
|
||||
extern int max_standby_streaming_delay;
|
||||
|
||||
extern void InitRecoveryTransactionEnvironment(void);
|
||||
extern void ShutdownRecoveryTransactionEnvironment(void);
|
||||
|
||||
extern void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid,
|
||||
RelFileNode node);
|
||||
extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
|
||||
extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
|
||||
|
||||
extern void ResolveRecoveryConflictWithBufferPin(void);
|
||||
extern void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
|
||||
extern void CheckRecoveryConflictDeadlock(void);
|
||||
|
||||
/*
|
||||
* Standby Rmgr (RM_STANDBY_ID)
|
||||
*
|
||||
* Standby recovery manager exists to perform actions that are required
|
||||
* to make hot standby work. That includes logging AccessExclusiveLocks taken
|
||||
* by transactions and running-xacts snapshots.
|
||||
*/
|
||||
extern void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid);
|
||||
extern void StandbyReleaseLockTree(TransactionId xid,
|
||||
int nsubxids, TransactionId *subxids);
|
||||
extern void StandbyReleaseAllLocks(void);
|
||||
extern void StandbyReleaseOldLocks(int nxids, TransactionId *xids);
|
||||
|
||||
/*
|
||||
* XLOG message types
|
||||
*/
|
||||
#define XLOG_STANDBY_LOCK 0x00
|
||||
#define XLOG_RUNNING_XACTS 0x10
|
||||
|
||||
typedef struct xl_standby_locks
|
||||
{
|
||||
int nlocks; /* number of entries in locks array */
|
||||
xl_standby_lock locks[1]; /* VARIABLE LENGTH ARRAY */
|
||||
} xl_standby_locks;
|
||||
|
||||
/*
|
||||
* When we write running xact data to WAL, we use this structure.
|
||||
*/
|
||||
typedef struct xl_running_xacts
|
||||
{
|
||||
int xcnt; /* # of xact ids in xids[] */
|
||||
bool subxid_overflow; /* snapshot overflowed, subxids missing */
|
||||
TransactionId nextXid; /* copy of ShmemVariableCache->nextXid */
|
||||
TransactionId oldestRunningXid; /* *not* oldestXmin */
|
||||
TransactionId latestCompletedXid; /* so we can set xmax */
|
||||
|
||||
TransactionId xids[1]; /* VARIABLE LENGTH ARRAY */
|
||||
} xl_running_xacts;
|
||||
|
||||
#define MinSizeOfXactRunningXacts offsetof(xl_running_xacts, xids)
|
||||
|
||||
|
||||
/* Recovery handlers for the Standby Rmgr (RM_STANDBY_ID) */
|
||||
extern void standby_redo(XLogRecPtr lsn, XLogRecord *record);
|
||||
extern void standby_desc(StringInfo buf, uint8 xl_info, char *rec);
|
||||
|
||||
/*
|
||||
* Declarations for GetRunningTransactionData(). Similar to Snapshots, but
|
||||
* not quite. This has nothing at all to do with visibility on this server,
|
||||
* so this is completely separate from snapmgr.c and snapmgr.h.
|
||||
* This data is important for creating the initial snapshot state on a
|
||||
* standby server. We need lots more information than a normal snapshot,
|
||||
* hence we use a specific data structure for our needs. This data
|
||||
* is written to WAL as a separate record immediately after each
|
||||
* checkpoint. That means that wherever we start a standby from we will
|
||||
* almost immediately see the data we need to begin executing queries.
|
||||
*/
|
||||
|
||||
typedef struct RunningTransactionsData
|
||||
{
|
||||
int xcnt; /* # of xact ids in xids[] */
|
||||
bool subxid_overflow; /* snapshot overflowed, subxids missing */
|
||||
TransactionId nextXid; /* copy of ShmemVariableCache->nextXid */
|
||||
TransactionId oldestRunningXid; /* *not* oldestXmin */
|
||||
TransactionId latestCompletedXid; /* so we can set xmax */
|
||||
|
||||
TransactionId *xids; /* array of (sub)xids still running */
|
||||
} RunningTransactionsData;
|
||||
|
||||
typedef RunningTransactionsData *RunningTransactions;
|
||||
|
||||
extern void LogAccessExclusiveLock(Oid dbOid, Oid relOid);
|
||||
extern void LogAccessExclusiveLockPrepare(void);
|
||||
|
||||
extern void LogStandbySnapshot(void);
|
||||
|
||||
#endif /* STANDBY_H */
|
||||
Reference in New Issue
Block a user