这篇文章主要介绍“PostgreSQL中BufferAlloc函数有什么作用”,在日常操作中,相信很多人在PostgreSQL中BufferAlloc函数有什么作用问题上存在疑惑,小编查阅了各式资料,整理出简单好用的操作方法,希望对大家解答”PostgreSQL中BufferAlloc函数有什么作用”的疑惑有所帮助!接下来,请跟着小编一起来学习吧!
一、数据结构
BufferDesc
共享缓冲区的共享描述符(状态)数据
//buffer header锁定
#define BM_LOCKED (1U << 22)
//数据需要写入(标记为DIRTY)
#define BM_DIRTY (1U << 23)
//数据是有效的
#define BM_VALID (1U << 24)
//已分配buffer tag
#define BM_TAG_VALID (1U << 25)
//正在R/W
#define BM_IO_IN_PROGRESS (1U << 26)
//上一个I/O出现错误
#define BM_IO_ERROR (1U << 27)
//开始写则变DIRTY
#define BM_JUST_DIRTIED (1U << 28)
//存在等待sole pin的其他进程
#define BM_PIN_COUNT_WAITER (1U << 29)
//checkpoint发生,必须刷到磁盘上
#define BM_CHECKPOINT_NEEDED (1U << 30)
//持久化buffer(不是unlogged或者初始化fork)
#define BM_PERMANENT (1U << 31)
typedef struct BufferDesc
{
//buffer tag
BufferTag tag;
//buffer索引编号(0开始)
int buf_id;
//tag状态,包括flags/refcount和usagecount
pg_atomic_uint32 state;
//pin-count等待进程ID
int wait_backend_pid;
//空闲链表链中下一个空闲的buffer
int freeNext;
//缓冲区内容锁
LWLock content_lock;
} BufferDesc;
BufferTag
Buffer tag标记了buffer存储的是磁盘中哪个block
typedef struct buftag
{
//物理relation标识符
RelFileNode rnode;
ForkNumber forkNum;
//相对于relation起始的块号
BlockNumber blockNum;
} BufferTag;
SMgrRelation
smgr.c维护一个包含SMgrRelation对象的hash表,SMgrRelation对象本质上是缓存的文件句柄.
typedef struct SMgrRelationData
{
//-------- rnode是哈希表的搜索键,因此在结构体的首位
//关系物理定义ID
RelFileNodeBackend smgr_rnode;
//--------- 指向拥有的指针,如无则为NULL
struct SMgrRelationData **smgr_owner;
//当前插入的目标bloc
BlockNumber smgr_targblock;
//最后已知的fsm fork大小
BlockNumber smgr_fsm_nblocks;
//最后已知的vm fork大小
BlockNumber smgr_vm_nblocks;
//------- 未来可能新增的公共域
//存储管理器选择器
int smgr_which;
int md_num_open_segs[MAX_FORKNUM + 1];
struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1];
//如没有宿主,未宿主的SMgrRelations链表的链表链接.
struct SMgrRelationData *next_unowned_reln;
} SMgrRelationData;
typedef SMgrRelationData *SMgrRelation;
RelFileNodeBackend
组合relfilenode和后台进程ID,用于提供需要定位物理存储的所有信息.
typedef struct RelFileNodeBackend
{
RelFileNode node;//节点
BackendId backend;//后台进程
} RelFileNodeBackend;
二、源码解读
BufferAlloc是ReadBuffer的子过程.处理共享缓存的搜索.如果已无buffer可用,则选择一个可替换的buffer并删除旧页面,但注意不要读入新页面.
该函数的主要处理逻辑如下:
1.初始化,根据Tag确定hash值和分区锁定ID
2.检查block是否已在buffer pool中
3.在缓冲区中找到该buffer(buf_id >= 0)
3.1获取buffer描述符并Pin buffer
3.2如PinBuffer返回F,则执行StartBufferIO,如该函数返回F,则设置标记*foundPtr为F
3.3返回buf
4.在缓冲区中找不到该buffer(buf_id < 0)
4.1释放newPartitionLock
4.2执行循环,寻找合适的buffer
4.2.1确保在自旋锁尚未持有时,有一个空闲的refcount入口(条目)
4.2.2选择一个待淘汰的buffer
4.2.3拷贝buffer flags到oldFlags中
4.2.4Pin buffer,然后释放buffer自旋锁
4.2.5如buffer标记位BM_DIRTY,FlushBuffer
4.2.6如buffer标记为BM_TAG_VALID,计算原tag的hashcode和partition lock ID,并锁定新旧分区锁
否则需要新的分区,锁定新分区锁,重置原分区锁和原hash值
4.2.7尝试使用buffer新的tag构造hash表入口
4.2.8存在冲突(buf_id >= 0),在这里只需要像一开始处理的那样,视为已在缓冲池发现该buffer
4.2.9不存在冲突(buf_id < 0),锁定buffer header,如缓冲区没有变脏或者被pinned,则已找到buf,跳出循环
否则,解锁buffer header,删除hash表入口,释放锁,重新寻找buffer
4.3可以重新设置buffer tag,完成后解锁buffer header,删除原有的hash表入口,释放分区锁
4.4执行StartBufferIO,设置*foundPtr标记
4.5返回buf
static BufferDesc *
BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
BlockNumber blockNum,
BufferAccessStrategy strategy,
bool *foundPtr)
{
//请求block的ID
BufferTag newTag;
//newTag的Hash值
uint32 newHash;
//缓冲区分区锁
LWLock *newPartitionLock;
//选中缓冲区对应的上一个ID
BufferTag oldTag;
//oldTag的hash值
uint32 oldHash;
//原缓冲区分区锁
LWLock *oldPartitionLock;
//原标记位
uint32 oldFlags;
//buffer ID编号
int buf_id;
//buffer描述符
BufferDesc *buf;
//是否有效
bool valid;
//buffer状态
uint32 buf_state;
//创建一个tag,用于检索buffer
INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
//根据Tag确定hash值和分区锁定ID
newHash = BufTableHashCode(&newTag);
newPartitionLock = BufMappingPartitionLock(newHash);
//检查block是否已在buffer pool中
LWLockAcquire(newPartitionLock, LW_SHARED);
buf_id = BufTableLookup(&newTag, newHash);
if (buf_id >= 0)
{
//---- 在缓冲区中找到该buffer
buf = GetBufferDescriptor(buf_id);
//Pin缓冲区
valid = PinBuffer(buf, strategy);
//一旦pinned,立即释放newPartitionLock
LWLockRelease(newPartitionLock);
//设置返回参数
*foundPtr = true;
if (!valid)
{
//如无效
if (StartBufferIO(buf, true))
{
//上一次尝试读取已然失败,这里还是需要勇敢的再试一次!
*foundPtr = false;//设置为F
}
}
//返回buf
return buf;
}
LWLockRelease(newPartitionLock);
//循环,寻找合适的buffer
for (;;)
{
ReservePrivateRefCountEntry();
buf = StrategyGetBuffer(strategy, &buf_state);
Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
//在仍持有自旋锁的情况下必须拷贝buffer flags
oldFlags = buf_state & BUF_FLAG_MASK;
//Pin buffer,然后释放buffer自旋锁
PinBuffer_Locked(buf);
if (oldFlags & BM_DIRTY)
{
if (LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf),
LW_SHARED))
{
//---- 执行有条件锁定请求(buffer内容共享锁)
if (strategy != NULL)
{
//非默认策略
XLogRecPtr lsn;
//在持有buffer header lock时读取LSN
buf_state = LockBufHdr(buf);
lsn = BufferGetLSN(buf);
UnlockBufHdr(buf, buf_state);
if (XLogNeedsFlush(lsn) &&
StrategyRejectBuffer(strategy, buf))
{
//需要flush WAL并且StrategyRejectBuffer
//清除lock/pin并循环到另外一个buffer
LWLockRelease(BufferDescriptorGetContentLock(buf));
UnpinBuffer(buf, true);
continue;
}
}
//现在可以执行I/O了
TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum,
smgr->smgr_rnode.node.spcNode,
smgr->smgr_rnode.node.dbNode,
smgr->smgr_rnode.node.relNode);
FlushBuffer(buf, NULL);
LWLockRelease(BufferDescriptorGetContentLock(buf));
ScheduleBufferTagForWriteback(&BackendWritebackContext,
&buf->tag);
TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum,
smgr->smgr_rnode.node.spcNode,
smgr->smgr_rnode.node.dbNode,
smgr->smgr_rnode.node.relNode);
}
else
{
UnpinBuffer(buf, true);
continue;
}
}
if (oldFlags & BM_TAG_VALID)
{
//----------- buffer标记为BM_TAG_VALID
oldTag = buf->tag;
oldHash = BufTableHashCode(&oldTag);
oldPartitionLock = BufMappingPartitionLock(oldHash);
if (oldPartitionLock < newPartitionLock)
{
//按顺序锁定
LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
}
else if (oldPartitionLock > newPartitionLock)
{
//按顺序锁定
LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
}
else
{
//只有一个分区,只需要一个锁
LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
}
}
else
{
//----------- buffer未标记为BM_TAG_VALID
//buffer无效,需要新的分区
LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
//不需要原有分区的锁&tag
oldPartitionLock = NULL;
//这行代码的目的是让编译器"闭嘴"
oldHash = 0;
}
buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
if (buf_id >= 0)
{
UnpinBuffer(buf, true);
//放弃原有的partition lock
if (oldPartitionLock != NULL &&
oldPartitionLock != newPartitionLock)
LWLockRelease(oldPartitionLock);
//剩余的代码应匹配上面的处理过程
//详细参见以上代码注释
buf = GetBufferDescriptor(buf_id);
valid = PinBuffer(buf, strategy);
//是否新partition lock
LWLockRelease(newPartitionLock);
//设置标记
*foundPtr = true;
if (!valid)
{
if (StartBufferIO(buf, true))
{
*foundPtr = false;
}
}
return buf;
}
buf_state = LockBufHdr(buf);
oldFlags = buf_state & BUF_FLAG_MASK;
if (BUF_STATE_GET_REFCOUNT(buf_state) == 1 && !(oldFlags & BM_DIRTY))
//已经OK了
break;
//解锁buffer header
UnlockBufHdr(buf, buf_state);
//删除hash表入口
BufTableDelete(&newTag, newHash);
//释放锁
if (oldPartitionLock != NULL &&
oldPartitionLock != newPartitionLock)
LWLockRelease(oldPartitionLock);
LWLockRelease(newPartitionLock);
UnpinBuffer(buf, true);
//重新寻找buffer
}
buf->tag = newTag;
buf_state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED |
BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT |
BUF_USAGECOUNT_MASK);
if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
buf_state |= BM_TAG_VALID | BM_PERMANENT | BUF_USAGECOUNT_ONE;
else
buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
UnlockBufHdr(buf, buf_state);
if (oldPartitionLock != NULL)
{
BufTableDelete(&oldTag, oldHash);
if (oldPartitionLock != newPartitionLock)
LWLockRelease(oldPartitionLock);
}
LWLockRelease(newPartitionLock);
if (StartBufferIO(buf, true))
*foundPtr = false;
else
*foundPtr = true;
return buf;
}
三、跟踪分析
测试脚本,查询数据表:
10:01:54 (xdb@[local]:5432)testdb=# select * from t1 limit 10;
启动gdb,设置断点
(gdb) b BufferAlloc
Breakpoint 1 at 0x8778ad: file bufmgr.c, line 1005.
(gdb) c
Continuing.
Breakpoint 1, BufferAlloc (smgr=0x2267430, relpersistence=112 'p', forkNum=MAIN_FORKNUM, blockNum=0, strategy=0x0,
foundPtr=0x7ffcc97fb4f3) at bufmgr.c:1005
1005 INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
(gdb)
输入参数
smgr-SMgrRelationData结构体指针
relpersistence-关系是否持久化
forkNum-fork类型,MAIN_FORKNUM对应数据文件,还有fsm/vm文件
blockNum-块号
strategy-buffer访问策略,为NULL
*foundPtr-输出参数
(gdb) p *smgr
$1 = {smgr_rnode = {node = {spcNode = 1663, dbNode = 16402, relNode = 51439}, backend = -1}, smgr_owner = 0x7f86133f3778,
smgr_targblock = 4294967295, smgr_fsm_nblocks = 4294967295, smgr_vm_nblocks = 4294967295, smgr_which = 0,
md_num_open_segs = {0, 0, 0, 0}, md_seg_fds = {0x0, 0x0, 0x0, 0x0}, next_unowned_reln = 0x0}
(gdb) p *smgr->smgr_owner
$2 = (struct SMgrRelationData *) 0x2267430
(gdb) p **smgr->smgr_owner
$3 = {smgr_rnode = {node = {spcNode = 1663, dbNode = 16402, relNode = 51439}, backend = -1}, smgr_owner = 0x7f86133f3778,
smgr_targblock = 4294967295, smgr_fsm_nblocks = 4294967295, smgr_vm_nblocks = 4294967295, smgr_which = 0,
md_num_open_segs = {0, 0, 0, 0}, md_seg_fds = {0x0, 0x0, 0x0, 0x0}, next_unowned_reln = 0x0}
(gdb)
1.初始化,根据Tag确定hash值和分区锁定ID
(gdb) n
1008 newHash = BufTableHashCode(&newTag);
(gdb) p newTag
$4 = {rnode = {spcNode = 1663, dbNode = 16402, relNode = 51439}, forkNum = MAIN_FORKNUM, blockNum = 0}
(gdb) n
1009 newPartitionLock = BufMappingPartitionLock(newHash);
(gdb)
1012 LWLockAcquire(newPartitionLock, LW_SHARED);
(gdb)
1013 buf_id = BufTableLookup(&newTag, newHash);
(gdb) p newHash
$5 = 1398580903
(gdb) p newPartitionLock
$6 = (LWLock *) 0x7f85e5db9600
(gdb) p *newPartitionLock
$7 = {tranche = 59, state = {value = 536870913}, waiters = {head = 2147483647, tail = 2147483647}}
(gdb)
2.检查block是否已在buffer pool中
(gdb) n
1014 if (buf_id >= 0)
(gdb) p buf_id
$8 = -1
4.在缓冲区中找不到该buffer(buf_id < 0)
4.1释放newPartitionLock
4.2执行循环,寻找合适的buffer
4.2.1确保在自旋锁尚未持有时,有一个空闲的refcount入口(条目) —-> ReservePrivateRefCountEntry
(gdb) n
1056 LWLockRelease(newPartitionLock);
(gdb)
1065 ReservePrivateRefCountEntry();
(gdb)
4.2.2选择一个待淘汰的buffer
(gdb) n
1071 buf = StrategyGetBuffer(strategy, &buf_state);
(gdb) n
1073 Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
(gdb) p buf
$9 = (BufferDesc *) 0x7f85e705fd80
(gdb) p *buf
$10 = {tag = {rnode = {spcNode = 0, dbNode = 0, relNode = 0}, forkNum = InvalidForkNumber, blockNum = 4294967295},
buf_id = 104, state = {value = 4194304}, wait_backend_pid = 0, freeNext = -2, content_lock = {tranche = 54, state = {
value = 536870912}, waiters = {head = 2147483647, tail = 2147483647}}}
(gdb)
4.2.3拷贝buffer flags到oldFlags中
(gdb) n
1076 oldFlags = buf_state & BUF_FLAG_MASK;
(gdb)
4.2.4Pin buffer,然后释放buffer自旋锁
(gdb)
1079 PinBuffer_Locked(buf);
(gdb)
4.2.5如buffer标记位BM_DIRTY,FlushBuffer
1088 if (oldFlags & BM_DIRTY)
(gdb)
4.2.6如buffer标记为BM_TAG_VALID,计算原tag的hashcode和partition lock ID,并锁定新旧分区锁
否则需要新的分区,锁定新分区锁,重置原分区锁和原hash值
(gdb)
1166 if (oldFlags & BM_TAG_VALID)
(gdb)
1200 LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
(gdb)
1202 oldPartitionLock = NULL;
(gdb)
1204 oldHash = 0;
(gdb) p oldFlags
$11 = 4194304
(gdb)
4.2.7尝试使用buffer新的tag构造hash表入口
(gdb)
1214 buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
(gdb) n
1216 if (buf_id >= 0)
(gdb) p buf_id
$12 = -1
(gdb)
4.2.9不存在冲突(buf_id < 0),锁定buffer header,如缓冲区没有变脏或者被pinned,则已找到buf,跳出循环
否则,解锁buffer header,删除hash表入口,释放锁,重新寻找buffer
(gdb) n
1267 buf_state = LockBufHdr(buf);
(gdb)
1275 oldFlags = buf_state & BUF_FLAG_MASK;
(gdb)
1276 if (BUF_STATE_GET_REFCOUNT(buf_state) == 1 && !(oldFlags & BM_DIRTY))
(gdb)
1277 break;
(gdb)
4.3可以重新设置buffer tag,完成后解锁buffer header,删除原有的hash表入口,释放分区锁
1301 buf->tag = newTag;
(gdb)
1302 buf_state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED |
(gdb)
1305 if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
(gdb)
1306 buf_state |= BM_TAG_VALID | BM_PERMANENT | BUF_USAGECOUNT_ONE;
(gdb)
1310 UnlockBufHdr(buf, buf_state);
(gdb)
1312 if (oldPartitionLock != NULL)
(gdb)
1319 LWLockRelease(newPartitionLock);
(gdb) p *buf
$13 = {tag = {rnode = {spcNode = 1663, dbNode = 16402, relNode = 51439}, forkNum = MAIN_FORKNUM, blockNum = 0},
buf_id = 104, state = {value = 2181300225}, wait_backend_pid = 0, freeNext = -2, content_lock = {tranche = 54, state = {
value = 536870912}, waiters = {head = 2147483647, tail = 2147483647}}}
(gdb)
4.4执行StartBufferIO,设置*foundPtr标记
(gdb)
1326 if (StartBufferIO(buf, true))
(gdb) n
1327 *foundPtr = false;
(gdb)
4.5返回buf
(gdb)
1331 return buf;
(gdb)
1332 }
(gdb)
执行完成
(gdb)
ReadBuffer_common (smgr=0x2267430, relpersistence=112 'p', forkNum=MAIN_FORKNUM, blockNum=0, mode=RBM_NORMAL, strategy=0x0,
hit=0x7ffcc97fb5eb) at bufmgr.c:747
747 if (found)
(gdb)
750 pgBufferUsage.shared_blks_read++;
(gdb)
到此,关于“PostgreSQL中BufferAlloc函数有什么作用”的学习就结束了,希望能够解决大家的疑惑。理论与实践的搭配能更好的帮助大家学习,快去试试吧!若想继续学习更多相关知识,请继续关注亿速云网站,小编会继续努力为大家带来更多实用的文章!