文章详情

短信预约-IT技能 免费直播动态提醒

请输入下面的图形验证码

提交验证

短信预约提醒成功

PostgreSQL 源码解读(236)- 后台进程#14(autovacuum进程#2)

2024-04-02 19:55

关注

本节简单介绍了PostgreSQL的后台进程:autovacuum,主要分析了launch_worker函数的实现逻辑。

一、数据结构

AutoVacuumShmem
主要的autovacuum共享内存结构体,存储在shared memory中,同时WorkerInfo也会存储在其中.




typedef struct
{
    sig_atomic_t av_signal[AutoVacNumSignals];
    pid_t       av_launcherpid;
    dlist_head  av_freeWorkers;
    dlist_head  av_runningWorkers;
    WorkerInfo  av_startingWorker;
    AutoVacuumWorkItem av_workItems[NUM_WORKITEMS];
} AutoVacuumShmemStruct;
static AutoVacuumShmemStruct *AutoVacuumShmem;

FullTransactionId
64 bit的事务ID



typedef struct FullTransactionId
{
    uint64      value;
} FullTransactionId;

avw_dbase
用于跟踪worker中的数据库的结构体




//用于跟踪worker中的数据库的结构体
typedef struct avw_dbase
{
    Oid         adw_datid;
    char       *adw_name;
    TransactionId adw_frozenxid;
    MultiXactId adw_minmulti;
    PgStat_StatDBEntry *adw_entry;
} avw_dbase;

WorkerInfo



typedef struct
{
    ArchiveHandle *AH;          
    ParallelSlot *slot;         
} WorkerInfo;

二、源码解读

主要的实现逻辑在do_start_worker中



static void
launch_worker(TimestampTz now)
{
    Oid         dbid;
    dlist_iter  iter;
    dbid = do_start_worker();
    if (OidIsValid(dbid))
    {
        bool        found = false;
        
        dlist_foreach(iter, &DatabaseList)
        {
            avl_dbase  *avdb = dlist_container(avl_dbase, adl_node, iter.cur);
            if (avdb->adl_datid == dbid)
            {
                found = true;
                
                avdb->adl_next_worker =
                    TimestampTzPlusMilliseconds(now, autovacuum_naptime * 1000);
                dlist_move_head(&DatabaseList, iter.cur);
                break;
            }
        }
        
        if (!found)
            rebuild_database_list(dbid);
    }
}

do_start_worker
选择一个DB,算法如下:
选择最近最小清理的DB,或者需要清理以防止XID回卷导致数据丢失的DB.
如果存在XID回卷风险的DB,那么选择datfrozenxid最老的DB,而不管该DB做了多少次autovacuum.
自动忽略没有连接过(统计信息为空)的DB.



static Oid
do_start_worker(void)
{
    List       *dblist;//数据库链表
    ListCell   *cell;//临时变量
    //typedef uint32 TransactionId;
    TransactionId xidForceLimit;//事务id,无符号32bit整型
    MultiXactId multiForceLimit;//
    bool        for_xid_wrap;
    bool        for_multi_wrap;
    avw_dbase  *avdb;
    TimestampTz current_time;//当前时间
    bool        skipit = false;//是否跳过?
    Oid         retval = InvalidOid;//返回的数据库OID
    MemoryContext tmpcxt,
                oldcxt;//内存上下文
    
    //如无空闲的worker(AutoVacuumShmem数据结构维护),则退出
    LWLockAcquire(AutovacuumLock, LW_SHARED);
    if (dlist_is_empty(&AutoVacuumShmem->av_freeWorkers))
    {
        LWLockRelease(AutovacuumLock);
        return InvalidOid;
    }
    LWLockRelease(AutovacuumLock);
    
    tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
                                   "Start worker tmp cxt",
                                   ALLOCSET_DEFAULT_SIZES);
    oldcxt = MemoryContextSwitchTo(tmpcxt);
    
    //统计信息刷新
    autovac_refresh_stats();
    
    //获取数据库链表
    dblist = get_database_list();
    
    recentXid = ReadNewTransactionId();
    xidForceLimit = recentXid - autovacuum_freeze_max_age;
    
    
    //#define FirstNormalTransactionId  ((TransactionId) 3)
    //小于3(常规的XID),则减去3
    if (xidForceLimit < FirstNormalTransactionId)
        xidForceLimit -= FirstNormalTransactionId;
    
    //确定需要考虑的最老的datminmxid
    //从MultiXactState->nextMXact中获取MultiXactId
    recentMulti = ReadNextMultiXactId();
    multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
    if (multiForceLimit < FirstMultiXactId)
        multiForceLimit -= FirstMultiXactId;
    
    avdb = NULL;//待清理的DB
    for_xid_wrap = false;//xid回卷
    for_multi_wrap = false;
    current_time = GetCurrentTimestamp();//当前时间
    foreach(cell, dblist)//循环db链表
    {
        avw_dbase  *tmp = lfirst(cell);
        dlist_iter  iter;
        
        //判断是否存在回卷风险?
        //TransactionIdPrecedes --- is id1 logically < id2?
        if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
        {
            if (avdb == NULL ||
                TransactionIdPrecedes(tmp->adw_frozenxid,
                                      avdb->adw_frozenxid))
                avdb = tmp;//选择较旧的那个
            for_xid_wrap = true;
            continue;
        }
        else if (for_xid_wrap)
            continue;           
        else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
        {
            if (avdb == NULL ||
                MultiXactIdPrecedes(tmp->adw_minmulti, avdb->adw_minmulti))
                avdb = tmp;
            for_multi_wrap = true;
            continue;
        }
        else if (for_multi_wrap)
            continue;           
        
        tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
        
        if (!tmp->adw_entry)
            continue;
        
        skipit = false;
        dlist_reverse_foreach(iter, &DatabaseList)
        {
            avl_dbase  *dbp = dlist_container(avl_dbase, adl_node, iter.cur);
            if (dbp->adl_datid == tmp->adw_datid)
            {
                
                if (!TimestampDifferenceExceeds(dbp->adl_next_worker,
                                                current_time, 0) &&
                    !TimestampDifferenceExceeds(current_time,
                                                dbp->adl_next_worker,
                                                autovacuum_naptime * 1000))
                    skipit = true;
                break;
            }
        }
        if (skipit)
            continue;
        
        if (avdb == NULL ||
            tmp->adw_entry->last_autovac_time < avdb->adw_entry->last_autovac_time)
            avdb = tmp;
    }
    
    if (avdb != NULL)
    {
        WorkerInfo  worker;
        dlist_node *wptr;
        LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
        
        wptr = dlist_pop_head_node(&AutoVacuumShmem->av_freeWorkers);
        worker = dlist_container(WorkerInfoData, wi_links, wptr);
        worker->wi_dboid = avdb->adw_datid;
        worker->wi_proc = NULL;
        worker->wi_launchtime = GetCurrentTimestamp();
        AutoVacuumShmem->av_startingWorker = worker;
        LWLockRelease(AutovacuumLock);
        SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);
        retval = avdb->adw_datid;
    }
    else if (skipit)
    {
        
        rebuild_database_list(InvalidOid);
    }
    MemoryContextSwitchTo(oldcxt);
    MemoryContextDelete(tmpcxt);
    return retval;
}

static inline TransactionId
ReadNewTransactionId(void)
{
    return XidFromFullTransactionId(ReadNextFullTransactionId());
}
#define XidFromFullTransactionId(x)     ((uint32) (x).value)

FullTransactionId
ReadNextFullTransactionId(void)
{
    FullTransactionId fullXid;
    LWLockAcquire(XidGenLock, LW_SHARED);
    fullXid = ShmemVariableCache->nextFullXid;
    LWLockRelease(XidGenLock);
    return fullXid;
}

三、跟踪分析

启动gdb,设置信号处理,设置断点


(gdb) handle SIGINT print nostop pass
SIGINT is used by the debugger.
Are you sure you want to change it? (y or n) 
Please answer y or n.
SIGINT is used by the debugger.
Are you sure you want to change it? (y or n) y
Signal        Stop  Print   Pass to program Description
SIGINT        No    Yes Yes     Interrupt
(gdb) b autovacuum.c:launch_worker
Breakpoint 1 at 0x82f3e7: file autovacuum.c, line 1338.
(gdb) b autovacuum.c:783
Breakpoint 2 at 0x82e8f0: file autovacuum.c, line 783.
(gdb) c
Continuing.

在其他session执行更新等操作


[pg12@localhost test]$ psql -c "update tbl set id = 1;"
Expanded display is used automatically.
UPDATE 2000000
[pg12@localhost test]$ psql -c "update t1 set id = 1;"
Expanded display is used automatically.
UPDATE 20000
[pg12@localhost test]$ psql -c "update t2 set id = 1;"
Expanded display is used automatically.
UPDATE 10000
[pg12@localhost test]$ psql -c "select txid_current();"
Expanded display is used automatically.
 txid_current 
--------------
         2917
(1 row)

60s后在gdb console中continue


Breakpoint 2, AutoVacLauncherMain (argc=0, argv=0x0) at autovacuum.c:783
783         if (dlist_is_empty(&DatabaseList))
(gdb) n
804             avdb = dlist_tail_element(avl_dbase, adl_node, &DatabaseList);
(gdb) n
810             if (TimestampDifferenceExceeds(avdb->adl_next_worker,
(gdb) 
812                 launch_worker(current_time);
(gdb) p *avdb
$1 = {adl_datid = 16384, adl_next_worker = 628852948486950, adl_score = 0, adl_node = {
    prev = 0xfd9880 <DatabaseList>, next = 0xfd9880 <DatabaseList>}}
(gdb) step
Breakpoint 1, launch_worker (now=628853296722794) at autovacuum.c:1338
1338        dbid = do_start_worker();

进入do_start_worker


(gdb) step
do_start_worker () at autovacuum.c:1128
1128        bool        skipit = false;
(gdb) n
1129        Oid         retval = InvalidOid;
(gdb) 
1134        LWLockAcquire(AutovacuumLock, LW_SHARED);
(gdb) 
1135        if (dlist_is_empty(&AutoVacuumShmem->av_freeWorkers))

查看AutoVacuumShmem结构体


(gdb) p *AutoVacuumShmem
$2 = {av_signal = {0, 0}, av_launcherpid = 5476, av_freeWorkers = {head = {prev = 0x7f8ccf1a4938, 
      next = 0x7f8ccf1a49b8}}, av_runningWorkers = {head = {prev = 0x7f8ccf1a3520, next = 0x7f8ccf1a3520}}, 
  av_startingWorker = 0x0, av_workItems = {{avw_type = AVW_BRINSummarizeRange, avw_used = false, 
      avw_active = false, avw_database = 0, avw_relation = 0, avw_blockNumber = 0} <repeats 256 times>}}
(gdb) n
1140        LWLockRelease(AutovacuumLock);
(gdb) p AutoVacuumShmem->av_runningWorkers
$3 = {head = {prev = 0x7f8ccf1a3520, next = 0x7f8ccf1a3520}}
(gdb) n

找到需要vacuum的database


1146        tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
(gdb) 
1149        oldcxt = MemoryContextSwitchTo(tmpcxt);
(gdb) 
1152        autovac_refresh_stats();
(gdb) n
1155        dblist = get_database_list();
(gdb) 
1162        recentXid = ReadNewTransactionId();
(gdb) p *dblist
$8 = {type = T_List, length = 5, head = 0x2382d48, tail = 0x2382f90}
(gdb) n
1163        xidForceLimit = recentXid - autovacuum_freeze_max_age;
(gdb) p recentXid
$9 = 2917
(gdb) p autovacuum_freeze_max_age
$10 = 200000000
(gdb) n
1166        if (xidForceLimit < FirstNormalTransactionId)
(gdb) p xidForceLimit
$11 = 4094970213
(gdb) p FirstNormalTransactionId
$12 = 3
(gdb) n
1170        recentMulti = ReadNextMultiXactId();
(gdb) 
1171        multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
(gdb) 
1172        if (multiForceLimit < FirstMultiXactId)
(gdb) p recentMulti
$13 = 1
(gdb) p MultiXactMemberFreezeThreshold()
$14 = 400000000
(gdb) n
1196        avdb = NULL;
(gdb) 
1197        for_xid_wrap = false;
(gdb) 
1198        for_multi_wrap = false;
(gdb) 
1199        current_time = GetCurrentTimestamp();
(gdb) 
1200        foreach(cell, dblist)
(gdb) 
1202            avw_dbase  *tmp = lfirst(cell);
(gdb) 
1206            if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
(gdb) p *tmp --> 这是postgres数据库
$15 = {adw_datid = 13591, adw_name = 0x2382d20 "postgres", adw_frozenxid = 479, adw_minmulti = 1, 
  adw_entry = 0x0}
(gdb) n
1215            else if (for_xid_wrap)
(gdb) 
1217            else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
(gdb) 
1225            else if (for_multi_wrap)
(gdb) 
1229            tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
(gdb) 
1235            if (!tmp->adw_entry)
(gdb) 
1236                continue;
(gdb) 
1200        foreach(cell, dblist)
(gdb) 
1202            avw_dbase  *tmp = lfirst(cell);
(gdb) 
1206            if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
(gdb) p *tmp --> 这是testdb数据库
$16 = {adw_datid = 16384, adw_name = 0x2382de0 "testdb", adw_frozenxid = 531, adw_minmulti = 1, adw_entry = 0x0}
(gdb) p tmp->adw_frozenxid
$17 = 531
(gdb) p xidForceLimit
$18 = 4094970213
(gdb) n
1215            else if (for_xid_wrap)
(gdb) 
1217            else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
(gdb) 
1225            else if (for_multi_wrap)
(gdb) 
1229            tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
(gdb) 
1235            if (!tmp->adw_entry)
(gdb) 
1245            skipit = false;
(gdb) 
1247            dlist_reverse_foreach(iter, &DatabaseList)
(gdb) 
1249                avl_dbase  *dbp = dlist_container(avl_dbase, adl_node, iter.cur);
(gdb) 
1251                if (dbp->adl_datid == tmp->adw_datid)
(gdb) 
1257                    if (!TimestampDifferenceExceeds(dbp->adl_next_worker,
(gdb) 
1267            if (skipit)
(gdb) 
1274            if (avdb == NULL ||
(gdb) 
1276                avdb = tmp;
(gdb) n
1200        foreach(cell, dblist)
(gdb) 
1202            avw_dbase  *tmp = lfirst(cell);
(gdb) 
1206            if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
(gdb) 
1215            else if (for_xid_wrap)
(gdb) p *tmp
$19 = {adw_datid = 1, adw_name = 0x2382e60 "template1", adw_frozenxid = 479, adw_minmulti = 1, adw_entry = 0x0}
(gdb) n
1217            else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
(gdb) 
1225            else if (for_multi_wrap)
(gdb) 
1229            tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
(gdb) 
1235            if (!tmp->adw_entry)
(gdb) 
1236                continue; --> 没有统计信息的,忽略
(gdb) 
1200        foreach(cell, dblist)
(gdb) 
1202            avw_dbase  *tmp = lfirst(cell);
(gdb) 
1206            if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
(gdb) 
1215            else if (for_xid_wrap)
(gdb) 
1217            else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
(gdb) 
1225            else if (for_multi_wrap)
(gdb) 
1229            tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
(gdb) 
1235            if (!tmp->adw_entry)
(gdb) 
1236                continue;
(gdb) 
1200        foreach(cell, dblist)
(gdb) 
1202            avw_dbase  *tmp = lfirst(cell);
(gdb) 
1206            if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
(gdb) 
1215            else if (for_xid_wrap)
(gdb) 
1217            else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
(gdb) 
1225            else if (for_multi_wrap)
(gdb) 
1229            tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
(gdb) 
1235            if (!tmp->adw_entry)
(gdb) 
1236                continue;
(gdb) 
1200        foreach(cell, dblist)
(gdb)

完成db遍历,找到了需要处理的数据库->testdb,接下来就是找空闲worker并启动此worker执行vacuum


1280        if (avdb != NULL)
(gdb) 
1285            LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
(gdb) 
1291            wptr = dlist_pop_head_node(&AutoVacuumShmem->av_freeWorkers);
(gdb) 
1293            worker = dlist_container(WorkerInfoData, wi_links, wptr);
(gdb) p *wptr
$20 = {prev = 0x7f8ccf1a3510, next = 0x7f8ccf1a4978}
(gdb) n
1294            worker->wi_dboid = avdb->adw_datid;
(gdb) p *worker
$21 = {wi_links = {prev = 0x7f8ccf1a3510, next = 0x7f8ccf1a4978}, wi_dboid = 0, wi_tableoid = 0, wi_proc = 0x0, 
  wi_launchtime = 0, wi_dobalance = false, wi_sharedrel = false, wi_cost_delay = 0, wi_cost_limit = 0, 
  wi_cost_limit_base = 0}
(gdb) n
1295            worker->wi_proc = NULL;
(gdb) 
1296            worker->wi_launchtime = GetCurrentTimestamp();
(gdb) 
1298            AutoVacuumShmem->av_startingWorker = worker;
(gdb) 
1300            LWLockRelease(AutovacuumLock);
(gdb) 
1302            SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);
(gdb) p *AutoVacuumShmem
$22 = {av_signal = {0, 0}, av_launcherpid = 5476, av_freeWorkers = {head = {prev = 0x7f8ccf1a4938, 
      next = 0x7f8ccf1a4978}}, av_runningWorkers = {head = {prev = 0x7f8ccf1a3520, next = 0x7f8ccf1a3520}}, 
  av_startingWorker = 0x7f8ccf1a49b8, av_workItems = {{avw_type = AVW_BRINSummarizeRange, avw_used = false, 
      avw_active = false, avw_database = 0, avw_relation = 0, avw_blockNumber = 0} <repeats 256 times>}}
(gdb) n
1304            retval = avdb->adw_datid;
(gdb) 
Program received signal SIGUSR2, User defined signal 2.
do_start_worker () at autovacuum.c:1304
1304            retval = avdb->adw_datid;
(gdb) 
avl_sigusr2_handler (postgres_signal_arg=32764) at autovacuum.c:1405
1405    {
(gdb)

DONE!

四、参考资料

PG Source Code

免责声明:

① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。

② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341

软考中级精品资料免费领

  • 历年真题答案解析
  • 备考技巧名师总结
  • 高频考点精准押题
  • 资料下载
  • 历年真题
  • 2024年上半年信息系统项目管理师第二批次真题及答案解析(完整版)

    难度     813人已做
    查看
  • 【考后总结】2024年5月26日信息系统项目管理师第2批次考情分析

    难度     354人已做
    查看
  • 【考后总结】2024年5月25日信息系统项目管理师第1批次考情分析

    难度     318人已做
    查看
  • 2024年上半年软考高项第一、二批次真题考点汇总(完整版)

    难度     435人已做
    查看
  • 2024年上半年系统架构设计师考试综合知识真题

    难度     224人已做
    查看

相关文章

发现更多好内容
咦!没有更多了?去看看其它编程学习网 内容吧
首页课程
资料下载
问答资讯