这篇文章主要介绍“PostgreSQL的vacuum过程中heap_vacuum_rel函数分析”,在日常操作中,相信很多人在PostgreSQL的vacuum过程中heap_vacuum_rel函数分析问题上存在疑惑,小编查阅了各式资料,整理出简单好用的操作方法,希望对大家解答”PostgreSQL的vacuum过程中heap_vacuum_rel函数分析”的疑惑有所帮助!接下来,请跟着小编一起来学习吧!
本节简单介绍了PostgreSQL手工执行vacuum的实现逻辑,主要分析了ExecVacuum->vacuum->vacuum_rel->heap_vacuum_rel函数的实现逻辑。
一、数据结构
宏定义
Vacuum和Analyze命令选项
typedef enum VacuumOption
{
VACOPT_VACUUM = 1 << 0,
VACOPT_ANALYZE = 1 << 1,
VACOPT_VERBOSE = 1 << 2,
VACOPT_FREEZE = 1 << 3,
VACOPT_FULL = 1 << 4,
VACOPT_SKIP_LOCKED = 1 << 5,
VACOPT_SKIPTOAST = 1 << 6,
VACOPT_DISABLE_PAGE_SKIPPING = 1 << 7
} VacuumOption;
VacuumStmt
存储vacuum命令的option&Relation链表
typedef struct VacuumStmt
{
NodeTag type;//Tag
//VacuumOption位标记
int options;
//VacuumRelation链表,如为NIL-->所有Relation.
List *rels;
} VacuumStmt;
VacuumParams
vacuum命令参数
typedef struct VacuumParams
{
//最小freeze age,-1表示使用默认
int freeze_min_age;
//扫描整个table的freeze age
int freeze_table_age;
//最小的multixact freeze age,-1表示默认
int multixact_freeze_min_age;
//扫描全表的freeze age,-1表示默认
int multixact_freeze_table_age;
//是否强制wraparound?
bool is_wraparound;
//以毫秒为单位的最小执行阈值
int log_min_duration;
} VacuumParams;
VacuumRelation
VACUUM/ANALYZE命令的目标表信息
typedef struct VacuumRelation
{
NodeTag type;
RangeVar *relation;
Oid oid;
List *va_cols;
} VacuumRelation;
BufferAccessStrategy
Buffer访问策略对象
typedef int Buffer;
#define InvalidBuffer 0
typedef struct BufferAccessStrategyData *BufferAccessStrategy;
typedef struct BufferAccessStrategyData
{
//全局的策略类型
BufferAccessStrategyType btype;
//buffers[]中的元素个数
int ring_size;
int current;
bool current_was_in_ring;
Buffer buffers[FLEXIBLE_ARRAY_MEMBER];
} BufferAccessStrategyData;
//Block结构体指针
typedef void *Block;
//GetAccessStrategy()函数可取值的参数
typedef enum BufferAccessStrategyType
{
//常规的随机访问
BAS_NORMAL,
//大规模的只读扫描
BAS_BULKREAD,
//大量的多块写(如 COPY IN)
BAS_BULKWRITE,
//VACUUM
BAS_VACUUM
} BufferAccessStrategyType;
LVRelStats
typedef struct LVRelStats
{
//T表示two-pass strategy,F表示one-pass strategy
bool hasindex;
//rel的全局统计信息
//pg_class.relpages的上一个值
BlockNumber old_rel_pages;
//pages的总数
BlockNumber rel_pages;
//扫描的pages
BlockNumber scanned_pages;
//由于pin跳过的pages
BlockNumber pinskipped_pages;
//跳过的frozen pages
BlockNumber frozenskipped_pages;
//计算其元组的pages
BlockNumber tupcount_pages;
//pg_class.reltuples的前值
double old_live_tuples;
//新估算的总元组数
double new_rel_tuples;
//新估算的存活元组数
double new_live_tuples;
//新估算的废弃元组数
double new_dead_tuples;
//已清除的pages
BlockNumber pages_removed;
//已删除的tuples
double tuples_deleted;
//实际上是非空page + 1
BlockNumber nonempty_pages;
//将要删除的元组TIDs链表
//注意:该链表已使用TID地址排序
//当前的入口/条目数
int num_dead_tuples;
//数组中已分配的slots(最大已废弃元组数)
int max_dead_tuples;
//ItemPointerData数组
ItemPointer dead_tuples;
//扫描的索引数
int num_index_scans;
//最后被清除的事务ID
TransactionId latestRemovedXid;
//是否存在waiter?
bool lock_waiter_detected;
} LVRelStats;
PGRUsage
pg_rusage_init/pg_rusage_show的状态结构体
//pg_rusage_init/pg_rusage_show的状态结构体
typedef struct PGRUsage
{
struct timeval tv;
struct rusage ru;
} PGRUsage;
struct rusage
{
struct timeval ru_utime;
struct timeval ru_stime;
};
struct timeval
{
__time_t tv_sec;
__suseconds_t tv_usec;
};
二、源码解读
heap_vacuum_rel() — 为heap relation执行VACUUM
大体逻辑如下:
1.初始化相关变量,如本地变量/日志记录级别/访问策略等
2.调用vacuum_set_xid_limits计算最老的xmin和冻结截止点
3.判断是否执行全表(不跳过pages)扫描,标记变量为aggressive
4.初始化统计信息结构体vacrelstats
5.打开索引,执行函数lazy_scan_heap进行vacuuming,关闭索引
6.更新pg_class中的统计信息
7.收尾工作
void
heap_vacuum_rel(Relation onerel, int options, VacuumParams *params,
BufferAccessStrategy bstrategy)
{
LVRelStats *vacrelstats;//统计信息
Relation *Irel;//关系指针
int nindexes;
PGRUsage ru0;//状态结构体
TimestampTz starttime = 0;//时间戳
long secs;//秒数
int usecs;//微秒数
double read_rate,//读比率
write_rate;//写比率
//是否扫描所有未冻结的pages?
bool aggressive;
//实际上是否扫描了所有这样的pages?
bool scanned_all_unfrozen;
TransactionId xidFullScanLimit;
MultiXactId mxactFullScanLimit;
BlockNumber new_rel_pages;
BlockNumber new_rel_allvisible;
double new_live_tuples;
TransactionId new_frozen_xid;
MultiXactId new_min_multi;
Assert(params != NULL);
//如autovacuum日志记录需要,则测量耗费的时间
if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
{
pg_rusage_init(&ru0);
starttime = GetCurrentTimestamp();
}
if (options & VACOPT_VERBOSE)
//需要VERBOSE
elevel = INFO;
else
elevel = DEBUG2;
pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
RelationGetRelid(onerel));
vac_strategy = bstrategy;
//计算最老的xmin和冻结截止点
//输出:OldestXmin/FreezeLimit/FreezeLimit/MultiXactCutoff/mxactFullScanLimit
vacuum_set_xid_limits(onerel,
params->freeze_min_age,
params->freeze_table_age,
params->multixact_freeze_min_age,
params->multixact_freeze_table_age,
&OldestXmin, &FreezeLimit, &xidFullScanLimit,
&MultiXactCutoff, &mxactFullScanLimit);
//比较onerel->rd_rel->relfrozenxid & xidFullScanLimit
//如小于等于,则aggressive为T,否则为F
aggressive = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
xidFullScanLimit);
//比较onerel->rd_rel->relminmxid &mxactFullScanLimit
//如小于等于,则aggressive为T
//否则aggressive原值为T,则为T,否则为F
aggressive |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid,
mxactFullScanLimit);
if (options & VACOPT_DISABLE_PAGE_SKIPPING)
//禁用跳过页,则强制为T
aggressive = true;
//分配统计结构体内存
vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
//记录统计信息
vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
vacrelstats->old_live_tuples = onerel->rd_rel->reltuples;
vacrelstats->num_index_scans = 0;
vacrelstats->pages_removed = 0;
vacrelstats->lock_waiter_detected = false;
//打开该relation所有的索引
vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
vacrelstats->hasindex = (nindexes > 0);
//执行vacuuming
lazy_scan_heap(onerel, options, vacrelstats, Irel, nindexes, aggressive);
//已完成index的处理
vac_close_indexes(nindexes, Irel, NoLock);
if ((vacrelstats->scanned_pages + vacrelstats->frozenskipped_pages)
< vacrelstats->rel_pages)
{
Assert(!aggressive);
scanned_all_unfrozen = false;
}
else
//扫描pages + 冻结跳过的pages >= 总pages,则为T
scanned_all_unfrozen = true;
if (should_attempt_truncation(vacrelstats))
lazy_truncate_heap(onerel, vacrelstats);
//通知其他进程,正在进行最后的清理
pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
new_rel_pages = vacrelstats->rel_pages;
new_live_tuples = vacrelstats->new_live_tuples;
if (vacrelstats->tupcount_pages == 0 && new_rel_pages > 0)
{
//实际处理的tuple为0而且总页面不为0,则调整回原页数
new_rel_pages = vacrelstats->old_rel_pages;
new_live_tuples = vacrelstats->old_live_tuples;
}
visibilitymap_count(onerel, &new_rel_allvisible, NULL);
if (new_rel_allvisible > new_rel_pages)
new_rel_allvisible = new_rel_pages;
new_frozen_xid = scanned_all_unfrozen ? FreezeLimit : InvalidTransactionId;
new_min_multi = scanned_all_unfrozen ? MultiXactCutoff : InvalidMultiXactId;
//更新pg_class中的统计信息
vac_update_relstats(onerel,
new_rel_pages,
new_live_tuples,
new_rel_allvisible,
vacrelstats->hasindex,
new_frozen_xid,
new_min_multi,
false);
//同时,发送结果给统计收集器
pgstat_report_vacuum(RelationGetRelid(onerel),
onerel->rd_rel->relisshared,
new_live_tuples,
vacrelstats->new_dead_tuples);
pgstat_progress_end_command();
//并在适当的情况下记录操作
if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
{
//autovacuum && 参数log_min_duration >= 0
TimestampTz endtime = GetCurrentTimestamp();
if (params->log_min_duration == 0 ||
TimestampDifferenceExceeds(starttime, endtime,
params->log_min_duration))
{
StringInfoData buf;
char *msgfmt;
TimestampDifference(starttime, endtime, &secs, &usecs);
read_rate = 0;
write_rate = 0;
if ((secs > 0) || (usecs > 0))
{
read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) /
(secs + usecs / 1000000.0);
write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) /
(secs + usecs / 1000000.0);
}
initStringInfo(&buf);
if (params->is_wraparound)
{
if (aggressive)
msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
else
msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
}
else
{
if (aggressive)
msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
else
msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
}
appendStringInfo(&buf, msgfmt,
get_database_name(MyDatabaseId),
get_namespace_name(RelationGetNamespace(onerel)),
RelationGetRelationName(onerel),
vacrelstats->num_index_scans);
appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen\n"),
vacrelstats->pages_removed,
vacrelstats->rel_pages,
vacrelstats->pinskipped_pages,
vacrelstats->frozenskipped_pages);
appendStringInfo(&buf,
_("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable, oldest xmin: %u\n"),
vacrelstats->tuples_deleted,
vacrelstats->new_rel_tuples,
vacrelstats->new_dead_tuples,
OldestXmin);
appendStringInfo(&buf,
_("buffer usage: %d hits, %d misses, %d dirtied\n"),
VacuumPageHit,
VacuumPageMiss,
VacuumPageDirty);
appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
read_rate, write_rate);
appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
ereport(LOG,
(errmsg_internal("%s", buf.data)));
pfree(buf.data);
}
}
}
三、跟踪分析
测试脚本
11:45:37 (xdb@[local]:5432)testdb=# vacuum t1;
启动gdb,设置断点
注:PG主线函数名称已改为heap_vacuum_rel,PG 11.1仍为lazy_vacuum_rel
(gdb) c
Continuing.
Breakpoint 1, lazy_vacuum_rel (onerel=0x7f226cd9e9a0, options=1, params=0x7ffe010d5b70, bstrategy=0x1da9708)
at vacuumlazy.c:197
197 TimestampTz starttime = 0;
(gdb)
输入参数
relation
(gdb) p *onerel
$1 = {rd_node = {spcNode = 1663, dbNode = 16402, relNode = 50820}, rd_smgr = 0x0, rd_refcnt = 1, rd_backend = -1,
rd_islocaltemp = false, rd_isnailed = false, rd_isvalid = true, rd_indexvalid = 0 '\000', rd_statvalid = false,
rd_createSubid = 0, rd_newRelfilenodeSubid = 0, rd_rel = 0x7f226cd9ebb8, rd_att = 0x7f226cd9ecd0, rd_id = 50820,
rd_lockInfo = {lockRelId = {relId = 50820, dbId = 16402}}, rd_rules = 0x0, rd_rulescxt = 0x0, trigdesc = 0x0,
rd_rsdesc = 0x0, rd_fkeylist = 0x0, rd_fkeyvalid = false, rd_partkeycxt = 0x0, rd_partkey = 0x0, rd_pdcxt = 0x0,
rd_partdesc = 0x0, rd_partcheck = 0x0, rd_indexlist = 0x0, rd_oidindex = 0, rd_pkindex = 0, rd_replidindex = 0,
rd_statlist = 0x0, rd_indexattr = 0x0, rd_projindexattr = 0x0, rd_keyattr = 0x0, rd_pkattr = 0x0, rd_idattr = 0x0,
rd_projidx = 0x0, rd_pubactions = 0x0, rd_options = 0x0, rd_index = 0x0, rd_indextuple = 0x0, rd_amhandler = 0,
rd_indexcxt = 0x0, rd_amroutine = 0x0, rd_opfamily = 0x0, rd_opcintype = 0x0, rd_support = 0x0, rd_supportinfo = 0x0,
rd_indoption = 0x0, rd_indexprs = 0x0, rd_indpred = 0x0, rd_exclops = 0x0, rd_exclprocs = 0x0, rd_exclstrats = 0x0,
rd_amcache = 0x0, rd_indcollation = 0x0, rd_fdwroutine = 0x0, rd_toastoid = 0, pgstat_info = 0x1d5a030}
(gdb)
vacuum参数
(gdb) p *params
$2 = {freeze_min_age = -1, freeze_table_age = -1, multixact_freeze_min_age = -1, multixact_freeze_table_age = -1,
is_wraparound = false, log_min_duration = -1}
(gdb)
buffer访问策略对象
(gdb) p *bstrategy
$3 = {btype = BAS_VACUUM, ring_size = 32, current = 0, current_was_in_ring = false, buffers = 0x1da9718}
(gdb)
(gdb) p *bstrategy->buffers
$4 = 0
(gdb)
1.初始化相关变量,如本地变量/日志记录级别/访问策略等
$4 = 0
(gdb) n
215 if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
(gdb)
221 if (options & VACOPT_VERBOSE)
(gdb)
224 elevel = DEBUG2;
(gdb)
226 pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
(gdb)
229 vac_strategy = bstrategy;
(gdb)
2.调用vacuum_set_xid_limits计算最老的xmin和冻结截止点
返回值均为默认值,其中OldestXmin是当前最小的活动事务ID
231 vacuum_set_xid_limits(onerel,
(gdb)
245 aggressive = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
(gdb) p OldestXmin
$5 = 307590
(gdb) p FreezeLimit
$6 = 4245274886
(gdb) p xidFullScanLimit
$10 = 4145274886
(gdb) p MultiXactCutoff
$8 = 4289967297
(gdb) p mxactFullScanLimit
$9 = 4144967297
(gdb)
3.判断是否执行全表(不跳过pages)扫描,标记变量为aggressive,值为F
(gdb) n
247 aggressive |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid,
(gdb) p aggressive
$11 = false
(gdb) n
249 if (options & VACOPT_DISABLE_PAGE_SKIPPING)
(gdb) p onerel->rd_rel->relfrozenxid
$12 = 144983
(gdb) p xidFullScanLimit
$13 = 4145274886
(gdb)
4.初始化统计信息结构体vacrelstats
(gdb) n
252 vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
(gdb)
254 vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
(gdb)
255 vacrelstats->old_live_tuples = onerel->rd_rel->reltuples;
(gdb)
256 vacrelstats->num_index_scans = 0;
(gdb)
257 vacrelstats->pages_removed = 0;
(gdb)
258 vacrelstats->lock_waiter_detected = false;
(gdb)
261 vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
(gdb) p *vacrelstats
$14 = {hasindex = false, old_rel_pages = 75, rel_pages = 0, scanned_pages = 0, pinskipped_pages = 0,
frozenskipped_pages = 0, tupcount_pages = 0, old_live_tuples = 10000, new_rel_tuples = 0, new_live_tuples = 0,
new_dead_tuples = 0, pages_removed = 0, tuples_deleted = 0, nonempty_pages = 0, num_dead_tuples = 0, max_dead_tuples = 0,
dead_tuples = 0x0, num_index_scans = 0, latestRemovedXid = 0, lock_waiter_detected = false}
(gdb)
5.打开索引,执行函数lazy_scan_heap进行vacuuming,关闭索引
(gdb) n
262 vacrelstats->hasindex = (nindexes > 0);
(gdb)
265 lazy_scan_heap(onerel, options, vacrelstats, Irel, nindexes, aggressive);
(gdb)
268 vac_close_indexes(nindexes, Irel, NoLock);
(gdb)
(gdb)
277 if ((vacrelstats->scanned_pages + vacrelstats->frozenskipped_pages)
(gdb)
278 < vacrelstats->rel_pages)
(gdb)
277 if ((vacrelstats->scanned_pages + vacrelstats->frozenskipped_pages)
(gdb)
284 scanned_all_unfrozen = true;
(gdb) p *vacrelstats
$15 = {hasindex = true, old_rel_pages = 75, rel_pages = 75, scanned_pages = 75, pinskipped_pages = 0,
frozenskipped_pages = 0, tupcount_pages = 75, old_live_tuples = 10000, new_rel_tuples = 10154, new_live_tuples = 10000,
new_dead_tuples = 154, pages_removed = 0, tuples_deleted = 0, nonempty_pages = 75, num_dead_tuples = 0,
max_dead_tuples = 21825, dead_tuples = 0x1db5030, num_index_scans = 0, latestRemovedXid = 0, lock_waiter_detected = false}
(gdb) p vacrelstats->scanned_pages
$16 = 75
(gdb) p vacrelstats->frozenskipped_pages
$17 = 0
(gdb) p vacrelstats->rel_pages
$18 = 75
(gdb)
6.更新pg_class中的统计信息
(gdb) n
289 if (should_attempt_truncation(vacrelstats))
(gdb)
293 pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
(gdb)
317 new_rel_pages = vacrelstats->rel_pages;
(gdb)
318 new_live_tuples = vacrelstats->new_live_tuples;
(gdb)
319 if (vacrelstats->tupcount_pages == 0 && new_rel_pages > 0)
(gdb)
325 visibilitymap_count(onerel, &new_rel_allvisible, NULL);
(gdb)
326 if (new_rel_allvisible > new_rel_pages)
(gdb) p new_rel_allvisible
$19 = 0
(gdb) p new_rel_pages
$20 = 75
(gdb) n
329 new_frozen_xid = scanned_all_unfrozen ? FreezeLimit : InvalidTransactionId;
(gdb)
330 new_min_multi = scanned_all_unfrozen ? MultiXactCutoff : InvalidMultiXactId;
(gdb)
336 vacrelstats->hasindex,
(gdb)
332 vac_update_relstats(onerel,
(gdb) p new_frozen_xid
$21 = 4245274886
(gdb) p new_min_multi
$22 = 4289967297
(gdb)
7.收尾工作
(gdb) n
345 vacrelstats->new_dead_tuples);
(gdb)
342 pgstat_report_vacuum(RelationGetRelid(onerel),
(gdb)
343 onerel->rd_rel->relisshared,
(gdb)
342 pgstat_report_vacuum(RelationGetRelid(onerel),
(gdb)
346 pgstat_progress_end_command();
(gdb)
349 if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
(gdb)
完成调用
411 }
(gdb)
vacuum_rel (relid=50820, relation=0x1cdb8d0, options=1, params=0x7ffe010d5b70) at vacuum.c:1560
1560 AtEOXact_GUC(false, save_nestlevel);
(gdb)
到此,关于“PostgreSQL的vacuum过程中heap_vacuum_rel函数分析”的学习就结束了,希望能够解决大家的疑惑。理论与实践的搭配能更好的帮助大家学习,快去试试吧!若想继续学习更多相关知识,请继续关注亿速云网站,小编会继续努力为大家带来更多实用的文章!