init
This commit is contained in:
493
db_include/storage/predicate_internals.h
Executable file
493
db_include/storage/predicate_internals.h
Executable file
@@ -0,0 +1,493 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* predicate_internals.h
|
||||
* POSTGRES internal predicate locking definitions.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/storage/predicate_internals.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PREDICATE_INTERNALS_H
|
||||
#define PREDICATE_INTERNALS_H
|
||||
|
||||
#include "storage/lock.h"
|
||||
#include "storage/lwlock.h"
|
||||
|
||||
/*
|
||||
* Commit number.
|
||||
*/
|
||||
typedef uint64 SerCommitSeqNo;
|
||||
|
||||
/*
|
||||
* Reserved commit sequence numbers:
|
||||
* - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
|
||||
* used as a SerCommitSeqNo, even an invalid one
|
||||
* - InvalidSerCommitSeqNo is used to indicate a transaction that
|
||||
* hasn't committed yet, so use a number greater than all valid
|
||||
* ones to make comparison do the expected thing
|
||||
* - RecoverySerCommitSeqNo is used to refer to transactions that
|
||||
* happened before a crash/recovery, since we restart the sequence
|
||||
* at that point. It's earlier than all normal sequence numbers,
|
||||
* and is only used by recovered prepared transactions
|
||||
*/
|
||||
#define InvalidSerCommitSeqNo ((SerCommitSeqNo) PG_UINT64_MAX)
|
||||
#define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
|
||||
#define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
|
||||
|
||||
/*
|
||||
* The SERIALIZABLEXACT struct contains information needed for each
|
||||
* serializable database transaction to support SSI techniques.
|
||||
*
|
||||
* A home-grown list is maintained in shared memory to manage these.
|
||||
* An entry is used when the serializable transaction acquires a snapshot.
|
||||
* Unless the transaction is rolled back, this entry must generally remain
|
||||
* until all concurrent transactions have completed. (There are special
|
||||
* optimizations for READ ONLY transactions which often allow them to be
|
||||
* cleaned up earlier.) A transaction which is rolled back is cleaned up
|
||||
* as soon as possible.
|
||||
*
|
||||
* Eligibility for cleanup of committed transactions is generally determined
|
||||
* by comparing the transaction's finishedBefore field to
|
||||
* SxactGlobalXmin.
|
||||
*/
|
||||
typedef struct SERIALIZABLEXACT
|
||||
{
|
||||
VirtualTransactionId vxid; /* The executing process always has one of
|
||||
* these. */
|
||||
|
||||
/*
|
||||
* We use two numbers to track the order that transactions commit. Before
|
||||
* commit, a transaction is marked as prepared, and prepareSeqNo is set.
|
||||
* Shortly after commit, it's marked as committed, and commitSeqNo is set.
|
||||
* This doesn't give a strict commit order, but these two values together
|
||||
* are good enough for us, as we can always err on the safe side and
|
||||
* assume that there's a conflict, if we can't be sure of the exact
|
||||
* ordering of two commits.
|
||||
*
|
||||
* Note that a transaction is marked as prepared for a short period during
|
||||
* commit processing, even if two-phase commit is not used. But with
|
||||
* two-phase commit, a transaction can stay in prepared state for some
|
||||
* time.
|
||||
*/
|
||||
SerCommitSeqNo prepareSeqNo;
|
||||
SerCommitSeqNo commitSeqNo;
|
||||
|
||||
/* these values are not both interesting at the same time */
|
||||
union
|
||||
{
|
||||
SerCommitSeqNo earliestOutConflictCommit; /* when committed with
|
||||
* conflict out */
|
||||
SerCommitSeqNo lastCommitBeforeSnapshot; /* when not committed or
|
||||
* no conflict out */
|
||||
} SeqNo;
|
||||
SHM_QUEUE outConflicts; /* list of write transactions whose data we
|
||||
* couldn't read. */
|
||||
SHM_QUEUE inConflicts; /* list of read transactions which couldn't
|
||||
* see our write. */
|
||||
SHM_QUEUE predicateLocks; /* list of associated PREDICATELOCK objects */
|
||||
SHM_QUEUE finishedLink; /* list link in
|
||||
* FinishedSerializableTransactions */
|
||||
|
||||
/*
|
||||
* perXactPredicateListLock is only used in parallel queries: it protects
|
||||
* this SERIALIZABLEXACT's predicate lock list against other workers of
|
||||
* the same session.
|
||||
*/
|
||||
LWLock perXactPredicateListLock;
|
||||
|
||||
/*
|
||||
* for r/o transactions: list of concurrent r/w transactions that we could
|
||||
* potentially have conflicts with, and vice versa for r/w transactions
|
||||
*/
|
||||
SHM_QUEUE possibleUnsafeConflicts;
|
||||
|
||||
TransactionId topXid; /* top level xid for the transaction, if one
|
||||
* exists; else invalid */
|
||||
TransactionId finishedBefore; /* invalid means still running; else the
|
||||
* struct expires when no serializable
|
||||
* xids are before this. */
|
||||
TransactionId xmin; /* the transaction's snapshot xmin */
|
||||
uint32 flags; /* OR'd combination of values defined below */
|
||||
int pid; /* pid of associated process */
|
||||
} SERIALIZABLEXACT;
|
||||
|
||||
#define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
|
||||
#define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
|
||||
#define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
|
||||
#define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
|
||||
/*
|
||||
* The following flag actually means that the flagged transaction has a
|
||||
* conflict out *to a transaction which committed ahead of it*. It's hard
|
||||
* to get that into a name of a reasonable length.
|
||||
*/
|
||||
#define SXACT_FLAG_CONFLICT_OUT 0x00000010
|
||||
#define SXACT_FLAG_READ_ONLY 0x00000020
|
||||
#define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
|
||||
#define SXACT_FLAG_RO_SAFE 0x00000080
|
||||
#define SXACT_FLAG_RO_UNSAFE 0x00000100
|
||||
#define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
|
||||
#define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
|
||||
/*
|
||||
* The following flag means the transaction has been partially released
|
||||
* already, but is being preserved because parallel workers might have a
|
||||
* reference to it. It'll be recycled by the leader at end-of-transaction.
|
||||
*/
|
||||
#define SXACT_FLAG_PARTIALLY_RELEASED 0x00000800
|
||||
|
||||
/*
|
||||
* The following types are used to provide an ad hoc list for holding
|
||||
* SERIALIZABLEXACT objects. An HTAB is overkill, since there is no need to
|
||||
* access these by key -- there are direct pointers to these objects where
|
||||
* needed. If a shared memory list is created, these types can probably be
|
||||
* eliminated in favor of using the general solution.
|
||||
*/
|
||||
typedef struct PredXactListElementData
|
||||
{
|
||||
SHM_QUEUE link;
|
||||
SERIALIZABLEXACT sxact;
|
||||
} PredXactListElementData;
|
||||
|
||||
typedef struct PredXactListElementData *PredXactListElement;
|
||||
|
||||
#define PredXactListElementDataSize \
|
||||
((Size)MAXALIGN(sizeof(PredXactListElementData)))
|
||||
|
||||
typedef struct PredXactListData
|
||||
{
|
||||
SHM_QUEUE availableList;
|
||||
SHM_QUEUE activeList;
|
||||
|
||||
/*
|
||||
* These global variables are maintained when registering and cleaning up
|
||||
* serializable transactions. They must be global across all backends,
|
||||
* but are not needed outside the predicate.c source file. Protected by
|
||||
* SerializableXactHashLock.
|
||||
*/
|
||||
TransactionId SxactGlobalXmin; /* global xmin for active serializable
|
||||
* transactions */
|
||||
int SxactGlobalXminCount; /* how many active serializable
|
||||
* transactions have this xmin */
|
||||
int WritableSxactCount; /* how many non-read-only serializable
|
||||
* transactions are active */
|
||||
SerCommitSeqNo LastSxactCommitSeqNo; /* a strictly monotonically
|
||||
* increasing number for commits
|
||||
* of serializable transactions */
|
||||
/* Protected by SerializableXactHashLock. */
|
||||
SerCommitSeqNo CanPartialClearThrough; /* can clear predicate locks and
|
||||
* inConflicts for committed
|
||||
* transactions through this seq
|
||||
* no */
|
||||
/* Protected by SerializableFinishedListLock. */
|
||||
SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
|
||||
* seq no */
|
||||
SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
|
||||
|
||||
PredXactListElement element;
|
||||
} PredXactListData;
|
||||
|
||||
typedef struct PredXactListData *PredXactList;
|
||||
|
||||
#define PredXactListDataSize \
|
||||
((Size)MAXALIGN(sizeof(PredXactListData)))
|
||||
|
||||
|
||||
/*
|
||||
* The following types are used to provide lists of rw-conflicts between
|
||||
* pairs of transactions. Since exactly the same information is needed,
|
||||
* they are also used to record possible unsafe transaction relationships
|
||||
* for purposes of identifying safe snapshots for read-only transactions.
|
||||
*
|
||||
* When a RWConflictData is not in use to record either type of relationship
|
||||
* between a pair of transactions, it is kept on an "available" list. The
|
||||
* outLink field is used for maintaining that list.
|
||||
*/
|
||||
typedef struct RWConflictData
|
||||
{
|
||||
SHM_QUEUE outLink; /* link for list of conflicts out from a sxact */
|
||||
SHM_QUEUE inLink; /* link for list of conflicts in to a sxact */
|
||||
SERIALIZABLEXACT *sxactOut;
|
||||
SERIALIZABLEXACT *sxactIn;
|
||||
} RWConflictData;
|
||||
|
||||
typedef struct RWConflictData *RWConflict;
|
||||
|
||||
#define RWConflictDataSize \
|
||||
((Size)MAXALIGN(sizeof(RWConflictData)))
|
||||
|
||||
typedef struct RWConflictPoolHeaderData
|
||||
{
|
||||
SHM_QUEUE availableList;
|
||||
RWConflict element;
|
||||
} RWConflictPoolHeaderData;
|
||||
|
||||
typedef struct RWConflictPoolHeaderData *RWConflictPoolHeader;
|
||||
|
||||
#define RWConflictPoolHeaderDataSize \
|
||||
((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
|
||||
|
||||
|
||||
/*
|
||||
* The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
|
||||
* transaction or any of its subtransactions.
|
||||
*/
|
||||
typedef struct SERIALIZABLEXIDTAG
|
||||
{
|
||||
TransactionId xid;
|
||||
} SERIALIZABLEXIDTAG;
|
||||
|
||||
/*
|
||||
* The SERIALIZABLEXID struct provides a link from a TransactionId for a
|
||||
* serializable transaction to the related SERIALIZABLEXACT record, even if
|
||||
* the transaction has completed and its connection has been closed.
|
||||
*
|
||||
* These are created as new top level transaction IDs are first assigned to
|
||||
* transactions which are participating in predicate locking. This may
|
||||
* never happen for a particular transaction if it doesn't write anything.
|
||||
* They are removed with their related serializable transaction objects.
|
||||
*
|
||||
* The SubTransGetTopmostTransaction method is used where necessary to get
|
||||
* from an XID which might be from a subtransaction to the top level XID.
|
||||
*/
|
||||
typedef struct SERIALIZABLEXID
|
||||
{
|
||||
/* hash key */
|
||||
SERIALIZABLEXIDTAG tag;
|
||||
|
||||
/* data */
|
||||
SERIALIZABLEXACT *myXact; /* pointer to the top level transaction data */
|
||||
} SERIALIZABLEXID;
|
||||
|
||||
|
||||
/*
|
||||
* The PREDICATELOCKTARGETTAG struct identifies a database object which can
|
||||
* be the target of predicate locks.
|
||||
*
|
||||
* Note that the hash function being used doesn't properly respect tag
|
||||
* length -- if the length of the structure isn't a multiple of four bytes it
|
||||
* will go to a four byte boundary past the end of the tag. If you change
|
||||
* this struct, make sure any slack space is initialized, so that any random
|
||||
* bytes in the middle or at the end are not included in the hash.
|
||||
*
|
||||
* TODO SSI: If we always use the same fields for the same type of value, we
|
||||
* should rename these. Holding off until it's clear there are no exceptions.
|
||||
* Since indexes are relations with blocks and tuples, it's looking likely that
|
||||
* the rename will be possible. If not, we may need to divide the last field
|
||||
* and use part of it for a target type, so that we know how to interpret the
|
||||
* data..
|
||||
*/
|
||||
typedef struct PREDICATELOCKTARGETTAG
|
||||
{
|
||||
uint32 locktag_field1; /* a 32-bit ID field */
|
||||
uint32 locktag_field2; /* a 32-bit ID field */
|
||||
uint32 locktag_field3; /* a 32-bit ID field */
|
||||
uint32 locktag_field4; /* a 32-bit ID field */
|
||||
} PREDICATELOCKTARGETTAG;
|
||||
|
||||
/*
|
||||
* The PREDICATELOCKTARGET struct represents a database object on which there
|
||||
* are predicate locks.
|
||||
*
|
||||
* A hash list of these objects is maintained in shared memory. An entry is
|
||||
* added when a predicate lock is requested on an object which doesn't
|
||||
* already have one. An entry is removed when the last lock is removed from
|
||||
* its list.
|
||||
*/
|
||||
typedef struct PREDICATELOCKTARGET
|
||||
{
|
||||
/* hash key */
|
||||
PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
|
||||
|
||||
/* data */
|
||||
SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with
|
||||
* predicate lock target */
|
||||
} PREDICATELOCKTARGET;
|
||||
|
||||
|
||||
/*
|
||||
* The PREDICATELOCKTAG struct identifies an individual predicate lock.
|
||||
*
|
||||
* It is the combination of predicate lock target (which is a lockable
|
||||
* object) and a serializable transaction which has acquired a lock on that
|
||||
* target.
|
||||
*/
|
||||
typedef struct PREDICATELOCKTAG
|
||||
{
|
||||
PREDICATELOCKTARGET *myTarget;
|
||||
SERIALIZABLEXACT *myXact;
|
||||
} PREDICATELOCKTAG;
|
||||
|
||||
/*
|
||||
* The PREDICATELOCK struct represents an individual lock.
|
||||
*
|
||||
* An entry can be created here when the related database object is read, or
|
||||
* by promotion of multiple finer-grained targets. All entries related to a
|
||||
* serializable transaction are removed when that serializable transaction is
|
||||
* cleaned up. Entries can also be removed when they are combined into a
|
||||
* single coarser-grained lock entry.
|
||||
*/
|
||||
typedef struct PREDICATELOCK
|
||||
{
|
||||
/* hash key */
|
||||
PREDICATELOCKTAG tag; /* unique identifier of lock */
|
||||
|
||||
/* data */
|
||||
SHM_QUEUE targetLink; /* list link in PREDICATELOCKTARGET's list of
|
||||
* predicate locks */
|
||||
SHM_QUEUE xactLink; /* list link in SERIALIZABLEXACT's list of
|
||||
* predicate locks */
|
||||
SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
|
||||
} PREDICATELOCK;
|
||||
|
||||
|
||||
/*
|
||||
* The LOCALPREDICATELOCK struct represents a local copy of data which is
|
||||
* also present in the PREDICATELOCK table, organized for fast access without
|
||||
* needing to acquire a LWLock. It is strictly for optimization.
|
||||
*
|
||||
* Each serializable transaction creates its own local hash table to hold a
|
||||
* collection of these. This information is used to determine when a number
|
||||
* of fine-grained locks should be promoted to a single coarser-grained lock.
|
||||
* The information is maintained more-or-less in parallel to the
|
||||
* PREDICATELOCK data, but because this data is not protected by locks and is
|
||||
* only used in an optimization heuristic, it is allowed to drift in a few
|
||||
* corner cases where maintaining exact data would be expensive.
|
||||
*
|
||||
* The hash table is created when the serializable transaction acquires its
|
||||
* snapshot, and its memory is released upon completion of the transaction.
|
||||
*/
|
||||
typedef struct LOCALPREDICATELOCK
|
||||
{
|
||||
/* hash key */
|
||||
PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
|
||||
|
||||
/* data */
|
||||
bool held; /* is lock held, or just its children? */
|
||||
int childLocks; /* number of child locks currently held */
|
||||
} LOCALPREDICATELOCK;
|
||||
|
||||
|
||||
/*
|
||||
* The types of predicate locks which can be acquired.
|
||||
*/
|
||||
typedef enum PredicateLockTargetType
|
||||
{
|
||||
PREDLOCKTAG_RELATION,
|
||||
PREDLOCKTAG_PAGE,
|
||||
PREDLOCKTAG_TUPLE
|
||||
/* TODO SSI: Other types may be needed for index locking */
|
||||
} PredicateLockTargetType;
|
||||
|
||||
|
||||
/*
|
||||
* This structure is used to quickly capture a copy of all predicate
|
||||
* locks. This is currently used only by the pg_lock_status function,
|
||||
* which in turn is used by the pg_locks view.
|
||||
*/
|
||||
typedef struct PredicateLockData
|
||||
{
|
||||
int nelements;
|
||||
PREDICATELOCKTARGETTAG *locktags;
|
||||
SERIALIZABLEXACT *xacts;
|
||||
} PredicateLockData;
|
||||
|
||||
|
||||
/*
|
||||
* These macros define how we map logical IDs of lockable objects into the
|
||||
* physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
|
||||
* rather than accessing the fields directly. Note multiple eval of target!
|
||||
*/
|
||||
#define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
|
||||
((locktag).locktag_field1 = (dboid), \
|
||||
(locktag).locktag_field2 = (reloid), \
|
||||
(locktag).locktag_field3 = InvalidBlockNumber, \
|
||||
(locktag).locktag_field4 = InvalidOffsetNumber)
|
||||
|
||||
#define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
|
||||
((locktag).locktag_field1 = (dboid), \
|
||||
(locktag).locktag_field2 = (reloid), \
|
||||
(locktag).locktag_field3 = (blocknum), \
|
||||
(locktag).locktag_field4 = InvalidOffsetNumber)
|
||||
|
||||
#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
|
||||
((locktag).locktag_field1 = (dboid), \
|
||||
(locktag).locktag_field2 = (reloid), \
|
||||
(locktag).locktag_field3 = (blocknum), \
|
||||
(locktag).locktag_field4 = (offnum))
|
||||
|
||||
#define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
|
||||
((Oid) (locktag).locktag_field1)
|
||||
#define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
|
||||
((Oid) (locktag).locktag_field2)
|
||||
#define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
|
||||
((BlockNumber) (locktag).locktag_field3)
|
||||
#define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
|
||||
((OffsetNumber) (locktag).locktag_field4)
|
||||
#define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
|
||||
(((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
|
||||
(((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
|
||||
PREDLOCKTAG_RELATION))
|
||||
|
||||
/*
|
||||
* Two-phase commit statefile records. There are two types: for each
|
||||
* transaction, we generate one per-transaction record and a variable
|
||||
* number of per-predicate-lock records.
|
||||
*/
|
||||
typedef enum TwoPhasePredicateRecordType
|
||||
{
|
||||
TWOPHASEPREDICATERECORD_XACT,
|
||||
TWOPHASEPREDICATERECORD_LOCK
|
||||
} TwoPhasePredicateRecordType;
|
||||
|
||||
/*
|
||||
* Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
|
||||
* much is needed because most of it not meaningful for a recovered
|
||||
* prepared transaction.
|
||||
*
|
||||
* In particular, we do not record the in and out conflict lists for a
|
||||
* prepared transaction because the associated SERIALIZABLEXACTs will
|
||||
* not be available after recovery. Instead, we simply record the
|
||||
* existence of each type of conflict by setting the transaction's
|
||||
* summary conflict in/out flag.
|
||||
*/
|
||||
typedef struct TwoPhasePredicateXactRecord
|
||||
{
|
||||
TransactionId xmin;
|
||||
uint32 flags;
|
||||
} TwoPhasePredicateXactRecord;
|
||||
|
||||
/* Per-lock state */
|
||||
typedef struct TwoPhasePredicateLockRecord
|
||||
{
|
||||
PREDICATELOCKTARGETTAG target;
|
||||
uint32 filler; /* to avoid length change in back-patched fix */
|
||||
} TwoPhasePredicateLockRecord;
|
||||
|
||||
typedef struct TwoPhasePredicateRecord
|
||||
{
|
||||
TwoPhasePredicateRecordType type;
|
||||
union
|
||||
{
|
||||
TwoPhasePredicateXactRecord xactRecord;
|
||||
TwoPhasePredicateLockRecord lockRecord;
|
||||
} data;
|
||||
} TwoPhasePredicateRecord;
|
||||
|
||||
/*
|
||||
* Define a macro to use for an "empty" SERIALIZABLEXACT reference.
|
||||
*/
|
||||
#define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
|
||||
|
||||
|
||||
/*
|
||||
* Function definitions for functions needing awareness of predicate
|
||||
* locking internals.
|
||||
*/
|
||||
extern PredicateLockData *GetPredicateLockStatusData(void);
|
||||
extern int GetSafeSnapshotBlockingPids(int blocked_pid,
|
||||
int *output, int output_size);
|
||||
|
||||
#endif /* PREDICATE_INTERNALS_H */
|
||||
Reference in New Issue
Block a user