以一条delete from test where a = 123;的SQL语句为例,跟踪删除数据的代码逻辑。(PG版本为12.2)
删除数据主要的函数是heap_delete。查看调用栈:
#0 heap_delete (relation=0x7f67ac24ca28, tid=0x7fff92b2c8ba, cid=0,
crosscheck=0x0, wait=true, tmfd=0x7fff92b2c7d0, changingPart=false)
at heapam.c:2447
#1 0x00000000004d3bb7 in heapam_tuple_delete (relation=0x7f67ac24ca28,
tid=0x7fff92b2c8ba, cid=0, snapshot=0x2e37060, crosscheck=0x0, wait=true,
tmfd=0x7fff92b2c7d0, changingPart=false) at heapam_handler.c:314
#2 0x00000000006dac91 in table_tuple_delete (rel=0x7f67ac24ca28,
tid=0x7fff92b2c8ba, cid=0, snapshot=0x2e37060, crosscheck=0x0, wait=true,
tmfd=0x7fff92b2c7d0, changingPart=false)
at ../../../src/include/access/tableam.h:1230
#3 0x00000000006dbfcb in ExecDelete (mtstate=0x2dcd1f0,
tupleid=0x7fff92b2c8ba, oldtuple=0x0, planSlot=0x2dce5c8,
epqstate=0x2dcd2e8, estate=0x2dcce70, processReturning=true,
canSetTag=true, changingPart=false, tupleDeleted=0x0, epqreturnslot=0x0)
at nodeModifyTable.c:768
#4 0x00000000006de016 in ExecModifyTable (pstate=0x2dcd1f0)
at nodeModifyTable.c:2226
//src/include/access/heapam.h
extern TM_Result heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait,
struct TM_FailureData *tmfd, bool changingPart);
TransactionId xid = GetCurrentTransactionId();
Assert(ItemPointerIsValid(tid));
if (IsInParallelMode())
ereport(ERROR,
(errcode(ERRCODE_INVALID_TRANSACTION_STATE),
errmsg("cannot delete tuples during a parallel operation")));
block = ItemPointerGetBlockNumber(tid);
buffer = ReadBuffer(relation, block);
page = BufferGetPage(buffer);
if (PageIsAllVisible(page))
visibilitymap_pin(relation, block, &vmbuffer);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
visibilitymap_pin(relation, block, &vmbuffer);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
}
lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
Assert(ItemIdIsNormal(lp));
tp.t_tableOid = RelationGetRelid(relation);
tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
tp.t_len = ItemIdGetLength(lp);
tp.t_self = *tid;
l1:
result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
if (result == TM_Invisible)
{
UnlockReleaseBuffer(buffer);
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("attempted to delete invisible tuple")));
}
else if (result == TM_BeingModified && wait)
{
xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
infomask = tp.t_data->t_infomask;
if (infomask & HEAP_XMAX_IS_MULTI)
{
if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
LockTupleExclusive, ¤t_is_member))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
if (!current_is_member)
heap_acquire_tuplock(relation, &(tp.t_self), LockTupleExclusive,
LockWaitBlock, &have_tuple_lock);
MultiXactIdWait((MultiXactId) xwait, MultiXactStatusUpdate, infomask,
relation, &(tp.t_self), XLTW_Delete,
NULL);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data),
xwait))
goto l1;
}
}
else if (!TransactionIdIsCurrentTransactionId(xwait))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
heap_acquire_tuplock(relation, &(tp.t_self), LockTupleExclusive,
LockWaitBlock, &have_tuple_lock);
XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data),
xwait))
goto l1;
UpdateXmaxHintBits(tp.t_data, buffer, xwait);
}
if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask) ||
HeapTupleHeaderIsOnlyLocked(tp.t_data))
result = TM_Ok;
else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid) ||
HeapTupleHeaderIndicatesMovedPartitions(tp.t_data))
result = TM_Updated;
else
result = TM_Deleted;
}
if (crosscheck != InvalidSnapshot && result == TM_Ok)
{
if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
result = TM_Updated;
}
if (result != TM_Ok)
{
Assert(result == TM_SelfModified ||
result == TM_Updated ||
result == TM_Deleted ||
result == TM_BeingModified);
Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID));
Assert(result != TM_Updated ||
!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid));
tmfd->ctid = tp.t_data->t_ctid;
tmfd->xmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
if (result == TM_SelfModified)
tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
else
tmfd->cmax = InvalidCommandId;
UnlockReleaseBuffer(buffer);
if (have_tuple_lock)
UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
if (vmbuffer != InvalidBuffer)
ReleaseBuffer(vmbuffer);
return result;
}
CheckForSerializableConflictIn(relation, &tp, buffer);
HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
MultiXactIdSetOldestMember();
compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(tp.t_data),
tp.t_data->t_infomask, tp.t_data->t_infomask2,
xid, LockTupleExclusive, true,
&new_xmax, &new_infomask, &new_infomask2);
START_CRIT_SECTION();
PageSetPrunable(page, xid);
if (PageIsAllVisible(page))
{
all_visible_cleared = true;
PageClearAllVisible(page);
visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
vmbuffer, VISIBILITYMAP_VALID_BITS);
}
tp.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
tp.t_data->t_infomask |= new_infomask;
tp.t_data->t_infomask2 |= new_infomask2;
HeapTupleHeaderClearHotUpdated(tp.t_data);
HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
tp.t_data->t_ctid = tp.t_self;
if (changingPart)
HeapTupleHeaderSetMovedPartitions(tp.t_data);
MarkBufferDirty(buffer);
if (RelationNeedsWAL(relation))
{
if (RelationIsAccessibleInLogicalDecoding(relation))
log_heap_new_cid(relation, &tp);
xlrec.flags = 0;
if (all_visible_cleared)
xlrec.flags |= XLH_DELETE_ALL_VISIBLE_CLEARED;
if (changingPart)
xlrec.flags |= XLH_DELETE_IS_PARTITION_MOVE;
xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
tp.t_data->t_infomask2);
xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
xlrec.xmax = new_xmax;
if (old_key_tuple != NULL)
{
if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
xlrec.flags |= XLH_DELETE_CONTAINS_OLD_TUPLE;
else
xlrec.flags |= XLH_DELETE_CONTAINS_OLD_KEY;
}
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
if (old_key_tuple != NULL)
{
xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
XLogRegisterData((char *) old_key_tuple->t_data
+ SizeofHeapTupleHeader,
old_key_tuple->t_len
- SizeofHeapTupleHeader);
}
XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
PageSetLSN(page, recptr);
}
END_CRIT_SECTION();
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
if (vmbuffer != InvalidBuffer)
ReleaseBuffer(vmbuffer);
if (relation->rd_rel->relkind != RELKIND_RELATION &&
relation->rd_rel->relkind != RELKIND_MATVIEW)
{
Assert(!HeapTupleHasExternal(&tp));
}
else if (HeapTupleHasExternal(&tp))
toast_delete(relation, &tp, false);
CacheInvalidateHeapTuple(relation, &tp, NULL);
ReleaseBuffer(buffer);
if (have_tuple_lock)
UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
pgstat_count_heap_delete(relation);
if (old_key_tuple != NULL && old_key_copied)
heap_freetuple(old_key_tuple);
return TM_Ok;