文章详情

短信预约-IT技能 免费直播动态提醒

请输入下面的图形验证码

提交验证

短信预约提醒成功

PostgreSQL 源码解读(146)- Storage Manager#2(fsm_search_avail函数)

2024-04-02 19:55

关注

本节简单介绍了PostgreSQL在执行插入过程中与存储相关的函数RecordAndGetPageWithFreeSpace->fsm_set_and_search,该函数设置给定的FSM page的相应值,并返回合适的slot。

一、数据结构

FSMAddress
内部的FSM处理过程以逻辑地址scheme的方式工作,树的每一个层次都可以认为是一个独立的地址文件.




typedef struct
{
    //层次
    int         level;          
    //该层次内的页编号
    int         logpageno;      
} FSMAddress;

//根页地址
static const FSMAddress FSM_ROOT_ADDRESS = {FSM_ROOT_LEVEL, 0};

FSMPage
FSM page数据结构.详细可参看src/backend/storage/freespace/README.




typedef struct
{
    
    int         fp_next_slot;
    
    uint8       fp_nodes[FLEXIBLE_ARRAY_MEMBER];
} FSMPageData;
typedef FSMPageData *FSMPage;

FSMLocalMap
对于小表,不需要创建FSM来存储空间信息,使用本地的内存映射信息.




//已尝试或者已在表的末尾之后
#define FSM_LOCAL_NOT_AVAIL 0x00

//可用于尝试
#define FSM_LOCAL_AVAIL     0x01

typedef struct
{
    BlockNumber nblocks;//块数
    uint8       map[HEAP_FSM_CREATION_THRESHOLD];//数组
}           FSMLocalMap;
static FSMLocalMap fsm_local_map =
{
    0,
    {
        FSM_LOCAL_NOT_AVAIL
    }
};
#define FSM_LOCAL_MAP_EXISTS (fsm_local_map.nblocks > 0)

二、源码解读

fsm_set_and_search设置给定的FSM page的相应值,并返回合适的slot.
主要处理逻辑如下:
1.初始化相关变量
2.设置相应页面的FSM可用标记
3.如search_cat/minvalue(请求空间)不为0,则调用fsm_search_avail搜索slot
4.解锁,返回

搜索树的算法是逐渐扩展”search triangle”(暂且称为搜索三角),也就是说,被当前节点所覆盖的所有节点,确保我们从开始点向右搜索.在第一步,只有目标slot会被检查.当我们从左边子节点往上移动到父节点时,我们同时添加了父节点的右子树到搜索三角中.当我们从右边子节点往上移动到父节点时,我们同时删除了当前搜索三角(这时候已经知道了没有合适的page), 同时向右检索下一个更大尺寸的三角.因此,我们不会从起点向左搜索,在每一步搜索三角的尺寸都会翻倍,确保只需要log2(N)步就可以搜索N个页面.
例如,考虑下面这棵树:



       7
   7       6
 5   7   6   5
4 5 5 7 2 6 5 2
            T

假定目标节点是字母T指示的节点,同时我们正在使用6或更大的数字搜索节点.检索从T开始.在第一轮迭代,移到右边,然后到父节点,到达最右边的节点 5.在第二轮迭代,移到右边,回卷,然后上溯,在第三个层次上到达节点 7这时候7满足我们的搜索要求,因此我们沿着7这条路径下降到底部.实际上,这是(考虑到回卷)起点右侧第一个满足条件的页面.




//search_slot = fsm_set_and_search(rel, addr, slot, old_cat, search_cat);
static int
fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
                   uint8 newValue, uint8 minValue)
{
    Buffer      buf;
    Page        page;
    int         newslot = -1;
    //获取FSM的buffer
    buf = fsm_readbuf(rel, addr, true);
    //锁定
    LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
    //获取FSM的page
    page = BufferGetPage(buf);
    if (fsm_set_avail(page, slot, newValue))
        MarkBufferDirtyHint(buf, false);
    if (minValue != 0)
    {
        
        //搜索slot
        newslot = fsm_search_avail(buf, minValue,
                                   addr.level == FSM_BOTTOM_LEVEL,
                                   true);
    }
    UnlockReleaseBuffer(buf);
    return newslot;
}

//newslot = fsm_search_avail(buf, minValue, \
                                   addr.level == FSM_BOTTOM_LEVEL, \
                                   true);
int
fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext,
                 bool exclusive_lock_held)
{
    Page        page = BufferGetPage(buf);//获取page
    FSMPage     fsmpage = (FSMPage) PageGetContents(page);//获取FSMPage
    int         nodeno;
    int         target;
    uint16      slot;
restart:
    
    if (fsmpage->fp_nodes[0] < minvalue)
        return -1;
    
    //#define SlotsPerFSMPage   LeafNodesPerPage
    //#define LeafNodesPerPage   (NodesPerPage - NonLeafNodesPerPage)
    //#define NodesPerPage (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \
                       offsetof(FSMPageData, fp_nodes)) 
    //#define NonLeafNodesPerPage (BLCKSZ / 2 - 1) = 4095 
    target = fsmpage->fp_next_slot;
    if (target < 0 || target >= LeafNodesPerPage)
        target = 0;
    target += NonLeafNodesPerPage;
    
    nodeno = target;
    while (nodeno > 0)
    {
        if (fsmpage->fp_nodes[nodeno] >= minvalue)
            break;
        
        nodeno = parentof(rightneighbor(nodeno));
    }
    
    while (nodeno < NonLeafNodesPerPage)
    {
        //左树节点
        int         childnodeno = leftchild(nodeno);
        if (childnodeno < NodesPerPage &&
            fsmpage->fp_nodes[childnodeno] >= minvalue)
        {
            //如有机会,往左移动
            nodeno = childnodeno;
            continue;
        }
        //指向右树节点
        childnodeno++;          
        if (childnodeno < NodesPerPage &&
            fsmpage->fp_nodes[childnodeno] >= minvalue)
        {
            //相当于下降了一层
            nodeno = childnodeno;
        }
        else
        {
            
            RelFileNode rnode;
            ForkNumber  forknum;
            BlockNumber blknum;
            //获取tag
            BufferGetTag(buf, &rnode, &forknum, &blknum);
            elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u",
                 blknum, rnode.spcNode, rnode.dbNode, rnode.relNode);
            
            //确保持有独占锁
            if (!exclusive_lock_held)
            {
                LockBuffer(buf, BUFFER_LOCK_UNLOCK);
                LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
                exclusive_lock_held = true;
            }
            //重建FSM
            fsm_rebuild_page(page);
            MarkBufferDirtyHint(buf, false);
            //重新搜索
            goto restart;
        }
    }
    
    //现在已到底层,在有足够空间的节点上.
    slot = nodeno - NonLeafNodesPerPage;
    
    fsmpage->fp_next_slot = slot + (advancenext ? 1 : 0);
    return slot;
}

三、跟踪分析

测试脚本



15:54:13 (xdb@[local]:5432)testdb=# insert into t1 values (1,'1','1');

启动gdb,设置断点



(gdb) b fsm_set_and_search
Breakpoint 1 at 0x888850: file freespace.c, line 676.
(gdb) c
Continuing.
Breakpoint 1, fsm_set_and_search (rel=0x7fd0d2f10788, addr=..., slot=5, newValue=0 '\000', minValue=1 '\001')
    at freespace.c:676
676     int         newslot = -1;
(gdb)

输入参数



(gdb) p *rel
$1 = {rd_node = {spcNode = 1663, dbNode = 16402, relNode = 50820}, rd_smgr = 0x1233b00, rd_refcnt = 1, rd_backend = -1, 
  rd_islocaltemp = false, rd_isnailed = false, rd_isvalid = true, rd_indexvalid = 1 '\001', rd_statvalid = false, 
  rd_createSubid = 0, rd_newRelfilenodeSubid = 0, rd_rel = 0x7fd0d2f109a0, rd_att = 0x7fd0d2f10ab8, rd_id = 50820, 
  rd_lockInfo = {lockRelId = {relId = 50820, dbId = 16402}}, rd_rules = 0x0, rd_rulescxt = 0x0, trigdesc = 0x0, 
  rd_rsdesc = 0x0, rd_fkeylist = 0x0, rd_fkeyvalid = false, rd_partkeycxt = 0x0, rd_partkey = 0x0, rd_pdcxt = 0x0, 
  rd_partdesc = 0x0, rd_partcheck = 0x0, rd_indexlist = 0x7fd0d2f0f820, rd_oidindex = 0, rd_pkindex = 0, 
  rd_replidindex = 0, rd_statlist = 0x0, rd_indexattr = 0x0, rd_projindexattr = 0x0, rd_keyattr = 0x0, rd_pkattr = 0x0, 
  rd_idattr = 0x0, rd_projidx = 0x0, rd_pubactions = 0x0, rd_options = 0x0, rd_index = 0x0, rd_indextuple = 0x0, 
  rd_amhandler = 0, rd_indexcxt = 0x0, rd_amroutine = 0x0, rd_opfamily = 0x0, rd_opcintype = 0x0, rd_support = 0x0, 
  rd_supportinfo = 0x0, rd_indoption = 0x0, rd_indexprs = 0x0, rd_indpred = 0x0, rd_exclops = 0x0, rd_exclprocs = 0x0, 
  rd_exclstrats = 0x0, rd_amcache = 0x0, rd_indcollation = 0x0, rd_fdwroutine = 0x0, rd_toastoid = 0, 
  pgstat_info = 0x12275f0}
(gdb) 
(gdb) p addr
$2 = {level = 0, logpageno = 0}

获取buffere,并锁定



(gdb) n
678     buf = fsm_readbuf(rel, addr, true);
(gdb) 
679     LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
(gdb) p buf
$3 = 105
(gdb)

获取page



(gdb) p page
$4 = (Page) 0x7fd0bf41dc00 ""
(gdb) p *page
$5 = 0 '\000'

调用fsm_search_avail函数,进入fsm_search_avail



(gdb) n
684         MarkBufferDirtyHint(buf, false);
(gdb) 
686     if (minValue != 0)
(gdb) 
690                                    addr.level == FSM_BOTTOM_LEVEL,
(gdb) step
689         newslot = fsm_search_avail(buf, minValue,
(gdb) 
fsm_search_avail (buf=105, minvalue=1 '\001', advancenext=true, exclusive_lock_held=true) at fsmpage.c:161
161     Page        page = BufferGetPage(buf);
(gdb)

获取FSMPage,fp_nodes数组,0表示未使用



(gdb) n
162     FSMPage     fsmpage = (FSMPage) PageGetContents(page);
(gdb) p page
$6 = (Page) 0x7fd0bf41dc00 ""
(gdb) n
173     if (fsmpage->fp_nodes[0] < minvalue)
(gdb) p *fsmpage
$7 = {fp_next_slot = 6, fp_nodes = 0x7fd0bf41dc1c "{{"}
(gdb) p *fsmpage->fp_nodes
$8 = 123 '{'
(gdb) p fsmpage->fp_nodes[0]
$9 = 123 '{'
(gdb) p fsmpage->fp_nodes[1]
$10 = 123 '{'
(gdb) p fsmpage->fp_nodes[2]
$11 = 0 '\000'
(gdb) p fsmpage->fp_nodes[3]
$12 = 123 '{'
(gdb) p fsmpage->fp_nodes[4]
$13 = 0 '\000'
(gdb) p fsmpage->fp_nodes[5]
$14 = 0 '\000'
(gdb) p fsmpage->fp_nodes[6]
$15 = 0 '\000'

使用fp_next_slot开始搜索.
从目标slot开始检索.每一步,把节点移到右边,然后上溯父节点.
在到达一个有足够空闲空间的节点时停止(因为根节点有足够的空间).



(gdb) n
181     target = fsmpage->fp_next_slot;
(gdb) 
182     if (target < 0 || target >= LeafNodesPerPage)
(gdb) p target
$16 = 6
(gdb) p LeafNodesPerPage
No symbol "__builtin_offsetof" in current context.
(gdb) n
184     target += NonLeafNodesPerPage;
(gdb) 
227     nodeno = target;
(gdb) p target
$17 = 4101
(gdb)

循环,直至找到满足条件的节点.
方法是移动到右边,如需要在同一层次上回卷,然后上溯.



(gdb) p fsmpage->fp_nodes[nodeno]
$19 = 0 '\000'
(gdb) p minvalue
$20 = 1 '\001'
(gdb) n
237         nodeno = parentof(rightneighbor(nodeno));
(gdb) n
228     while (nodeno > 0)
(gdb) p nodeno
$21 = 2050
(gdb) n
230         if (fsmpage->fp_nodes[nodeno] >= minvalue)
(gdb) fsmpage->fp_nodes[nodeno]
Undefined command: "fsmpage->fp_nodes".  Try "help".
(gdb) p fsmpage->fp_nodes[nodeno]
$22 = 0 '\000'
(gdb) n
237         nodeno = parentof(rightneighbor(nodeno));
(gdb) 
228     while (nodeno > 0)
(gdb) 
230         if (fsmpage->fp_nodes[nodeno] >= minvalue)
(gdb) p nodeno
$23 = 1025
(gdb) n
231             break;
(gdb) p fsmpage->fp_nodes[nodeno]
$24 = 1 '\001'
(gdb)

现在已到达了有足够空闲空间的节点,树中间的某个位置上面.
沿着有足够空闲空间的路径下降到底部.
如有选择,往左移动.本例,往左移动了.



(gdb) n
245     while (nodeno < NonLeafNodesPerPage)
(gdb) p nodeno
$25 = 1025
(gdb) n
247         int         childnodeno = leftchild(nodeno);
(gdb) 
249         if (childnodeno < NodesPerPage &&
(gdb) p childnodeno
$26 = 2051
(gdb) n
250             fsmpage->fp_nodes[childnodeno] >= minvalue)
(gdb) p fsmpage->fp_nodes[childnodeno]
$27 = 1 '\001'
(gdb) n
249         if (childnodeno < NodesPerPage &&
(gdb) 
252             nodeno = childnodeno;
(gdb) 
253             continue;
(gdb)

找到了相应的叶子节点



(gdb) 
245     while (nodeno < NonLeafNodesPerPage)
(gdb) n
247         int         childnodeno = leftchild(nodeno);
(gdb) 
249         if (childnodeno < NodesPerPage &&
(gdb) 
250             fsmpage->fp_nodes[childnodeno] >= minvalue)
(gdb) 
249         if (childnodeno < NodesPerPage &&
(gdb) 
255         childnodeno++;          
(gdb) 
256         if (childnodeno < NodesPerPage &&
(gdb) p childnodeno
$28 = 4104
(gdb) n
257             fsmpage->fp_nodes[childnodeno] >= minvalue)
(gdb) 
256         if (childnodeno < NodesPerPage &&
(gdb) 
259             nodeno = childnodeno;
(gdb) p childnodeno
$29 = 4104
(gdb) n
245     while (nodeno < NonLeafNodesPerPage)
(gdb)

现在已到底层,在有足够空间的节点上.
同时,更新下一个目标块指针.



(gdb) 
293     slot = nodeno - NonLeafNodesPerPage;
(gdb) n
303     fsmpage->fp_next_slot = slot + (advancenext ? 1 : 0);
(gdb) p slot
$30 = 9
(gdb) p nodeno
$31 = 4104
(gdb) n
305     return slot;
(gdb) 
306 }
(gdb)

回到fsm_set_and_search,返回slot



(gdb) 
fsm_set_and_search (rel=0x7fd0d2f10788, addr=..., slot=5, newValue=0 '\000', minValue=1 '\001') at freespace.c:694
694     UnlockReleaseBuffer(buf);
(gdb) 
696     return newslot;
(gdb) 
(gdb) p newslot
$32 = 9
(gdb)

DONE!

四、参考资料

PG Source Code
Database Cluster, Databases, and Tables

免责声明:

① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。

② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341

软考中级精品资料免费领

  • 历年真题答案解析
  • 备考技巧名师总结
  • 高频考点精准押题
  • 资料下载
  • 历年真题
  • 2024年上半年信息系统项目管理师第二批次真题及答案解析(完整版)

    难度     813人已做
    查看
  • 【考后总结】2024年5月26日信息系统项目管理师第2批次考情分析

    难度     354人已做
    查看
  • 【考后总结】2024年5月25日信息系统项目管理师第1批次考情分析

    难度     318人已做
    查看
  • 2024年上半年软考高项第一、二批次真题考点汇总(完整版)

    难度     435人已做
    查看
  • 2024年上半年系统架构设计师考试综合知识真题

    难度     224人已做
    查看

相关文章

发现更多好内容

猜你喜欢

AI推送时光机
咦!没有更多了?去看看其它编程学习网 内容吧
首页课程
资料下载
问答资讯