1 /*-------------------------------------------------------------------------
4 * Internal definitions for buffer manager and the buffer replacement
8 * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
9 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/include/storage/buf_internals.h
13 *-------------------------------------------------------------------------
15 #ifndef BUFMGR_INTERNALS_H
16 #define BUFMGR_INTERNALS_H
18 #include "storage/buf.h"
19 #include "storage/latch.h"
20 #include "storage/lwlock.h"
21 #include "storage/shmem.h"
22 #include "storage/smgr.h"
23 #include "storage/spin.h"
24 #include "utils/relcache.h"
28 * Flags for buffer descriptors
30 * Note: TAG_VALID essentially means that there is a buffer hashtable
31 * entry associated with the buffer's tag.
33 #define BM_DIRTY (1 << 0) /* data needs writing */
34 #define BM_VALID (1 << 1) /* data is valid */
35 #define BM_TAG_VALID (1 << 2) /* tag is assigned */
36 #define BM_IO_IN_PROGRESS (1 << 3) /* read or write in progress */
37 #define BM_IO_ERROR (1 << 4) /* previous I/O failed */
38 #define BM_JUST_DIRTIED (1 << 5) /* dirtied since write started */
39 #define BM_PIN_COUNT_WAITER (1 << 6) /* have waiter for sole pin */
40 #define BM_CHECKPOINT_NEEDED (1 << 7) /* must write for checkpoint */
41 #define BM_PERMANENT (1 << 8) /* permanent relation (not
44 typedef bits16 BufFlags;
47 * The maximum allowed value of usage_count represents a tradeoff between
48 * accuracy and speed of the clock-sweep buffer management algorithm. A
49 * large value (comparable to NBuffers) would approximate LRU semantics.
50 * But it can take as many as BM_MAX_USAGE_COUNT+1 complete cycles of
51 * clock sweeps to find a free buffer, so in practice we don't want the
52 * value to be very large.
54 #define BM_MAX_USAGE_COUNT 5
57 * Buffer tag identifies which disk block the buffer contains.
59 * Note: the BufferTag data must be sufficient to determine where to write the
60 * block, without reference to pg_class or pg_tablespace entries. It's
61 * possible that the backend flushing the buffer doesn't even believe the
62 * relation is visible yet (its xact may have started before the xact that
63 * created the rel). The storage manager must be able to cope anyway.
65 * Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have
66 * to be fixed to zero them, since this struct is used as a hash key.
70 RelFileNode rnode; /* physical relation identifier */
72 BlockNumber blockNum; /* blknum relative to begin of reln */
75 #define CLEAR_BUFFERTAG(a) \
77 (a).rnode.spcNode = InvalidOid, \
78 (a).rnode.dbNode = InvalidOid, \
79 (a).rnode.relNode = InvalidOid, \
80 (a).forkNum = InvalidForkNumber, \
81 (a).blockNum = InvalidBlockNumber \
84 #define INIT_BUFFERTAG(a,xx_rnode,xx_forkNum,xx_blockNum) \
86 (a).rnode = (xx_rnode), \
87 (a).forkNum = (xx_forkNum), \
88 (a).blockNum = (xx_blockNum) \
91 #define BUFFERTAGS_EQUAL(a,b) \
93 RelFileNodeEquals((a).rnode, (b).rnode) && \
94 (a).blockNum == (b).blockNum && \
95 (a).forkNum == (b).forkNum \
99 * The shared buffer mapping table is partitioned to reduce contention.
100 * To determine which partition lock a given tag requires, compute the tag's
101 * hash code with BufTableHashCode(), then apply BufMappingPartitionLock().
102 * NB: NUM_BUFFER_PARTITIONS must be a power of 2!
104 #define BufTableHashPartition(hashcode) \
105 ((hashcode) % NUM_BUFFER_PARTITIONS)
106 #define BufMappingPartitionLock(hashcode) \
107 (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + \
108 BufTableHashPartition(hashcode)].lock)
109 #define BufMappingPartitionLockByIndex(i) \
110 (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + (i)].lock)
113 * BufferDesc -- shared descriptor/state data for a single shared buffer.
115 * Note: buf_hdr_lock must be held to examine or change the tag, flags,
116 * usage_count, refcount, or wait_backend_pid fields. buf_id field never
117 * changes after initialization, so does not need locking. freeNext is
118 * protected by the BufFreelistLock not buf_hdr_lock. The LWLocks can take
119 * care of themselves. The buf_hdr_lock is *not* used to control access to
120 * the data in the buffer!
122 * An exception is that if we have the buffer pinned, its tag can't change
123 * underneath us, so we can examine the tag without locking the spinlock.
124 * Also, in places we do one-time reads of the flags without bothering to
125 * lock the spinlock; this is generally for situations where we don't expect
126 * the flag bit being tested to be changing.
128 * We can't physically remove items from a disk page if another backend has
129 * the buffer pinned. Hence, a backend may need to wait for all other pins
130 * to go away. This is signaled by storing its own PID into
131 * wait_backend_pid and setting flag bit BM_PIN_COUNT_WAITER. At present,
132 * there can be only one such waiter per buffer.
134 * We use this same struct for local buffer headers, but the lock fields
135 * are not used and not all of the flag bits are useful either.
137 typedef struct sbufdesc
139 BufferTag tag; /* ID of page contained in buffer */
140 BufFlags flags; /* see bit definitions above */
141 uint16 usage_count; /* usage counter for clock sweep code */
142 unsigned refcount; /* # of backends holding pins on buffer */
143 int wait_backend_pid; /* backend PID of pin-count waiter */
145 slock_t buf_hdr_lock; /* protects the above fields */
147 int buf_id; /* buffer's index number (from 0) */
148 int freeNext; /* link in freelist chain */
150 LWLock *io_in_progress_lock; /* to wait for I/O to complete */
151 LWLock *content_lock; /* to lock access to buffer contents */
154 #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
157 * The freeNext field is either the index of the next freelist entry,
158 * or one of these special values:
160 #define FREENEXT_END_OF_LIST (-1)
161 #define FREENEXT_NOT_IN_LIST (-2)
164 * Macros for acquiring/releasing a shared buffer header's spinlock.
165 * Do not apply these to local buffers!
167 * Note: as a general coding rule, if you are using these then you probably
168 * need to be using a volatile-qualified pointer to the buffer header, to
169 * ensure that the compiler doesn't rearrange accesses to the header to
170 * occur before or after the spinlock is acquired/released.
172 #define LockBufHdr(bufHdr) SpinLockAcquire(&(bufHdr)->buf_hdr_lock)
173 #define UnlockBufHdr(bufHdr) SpinLockRelease(&(bufHdr)->buf_hdr_lock)
177 extern PGDLLIMPORT BufferDesc *BufferDescriptors;
180 extern BufferDesc *LocalBufferDescriptors;
184 * Internal routines: only called by bufmgr
188 extern volatile BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy,
190 extern void StrategyFreeBuffer(volatile BufferDesc *buf);
191 extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
192 volatile BufferDesc *buf);
194 extern int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc);
195 extern void StrategyNotifyBgWriter(Latch *bgwriterLatch);
197 extern Size StrategyShmemSize(void);
198 extern void StrategyInitialize(bool init);
201 extern Size BufTableShmemSize(int size);
202 extern void InitBufTable(int size);
203 extern uint32 BufTableHashCode(BufferTag *tagPtr);
204 extern int BufTableLookup(BufferTag *tagPtr, uint32 hashcode);
205 extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
206 extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
209 extern void LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
210 BlockNumber blockNum);
211 extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
212 BlockNumber blockNum, bool *foundPtr);
213 extern void MarkLocalBufferDirty(Buffer buffer);
214 extern void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
215 BlockNumber firstDelBlock);
216 extern void DropRelFileNodeAllLocalBuffers(RelFileNode rnode);
217 extern void AtEOXact_LocalBuffers(bool isCommit);
219 #endif /* BUFMGR_INTERNALS_H */