当SQL请求锁等待超过deadlock_timeout指定的时间时,报类似如下日志:
LOG: process xxx1 acquired RowExclusiveLock on relation xxx2 of database xxx3 after xxx4 ms at xxx
STATEMENT: INSERT ...........
解释: xxx1进程请求位于数据库xxx3中的xxx2对象的RowExclusiveLock锁,已等待xxx4秒。
如何检查或监控锁等待呢?
PostgreSQL提供了两个视图
-
- pg_locks展示锁信息,每一个被锁或者等待锁的对象一条记录。
-
- pg_stat_activity,每个会话一条记录,显示会话状态信息。 我们通过这两个视图可以查看锁,锁等待情况。同时可以了解发生锁冲突的情况。
- 注意:pg_stat_activity.query反映的是当前正在执行或请求的SQL,而同一个事务中以前已经执行的SQL不能在pg_stat_activity中显示出来。所以如果你发现两个会话发生了冲突,但是他们的pg_stat_activity.query没有冲突的话,那就有可能是他们之间的某个事务之前的SQL获取的锁与另一个事务当前请求的QUERY发生了锁冲突。
通常锁的排查方法如下
-
- 开启审计日志log_statement = "all"
-
- psql 挂一个打印锁等待的窗口(sql语句参考如下)
-
- tail 挂一个日志观测窗口
查看锁等待sql
with
t_wait as
(
select a.mode,a.locktype,a.database,a.relation,a.page,a.tuple,a.classid,a.granted,
a.objid,a.objsubid,a.pid,a.virtualtransaction,a.virtualxid,a.transactionid,a.fastpath,
b.state,b.query,b.xact_start,b.query_start,b.usename,b.datname,b.client_addr,b.client_port,b.application_name
from pg_locks a,pg_stat_activity b where a.pid=b.pid and not a.granted
),
t_run as
(
select a.mode,a.locktype,a.database,a.relation,a.page,a.tuple,a.classid,a.granted,
a.objid,a.objsubid,a.pid,a.virtualtransaction,a.virtualxid,a.transactionid,a.fastpath,
b.state,b.query,b.xact_start,b.query_start,b.usename,b.datname,b.client_addr,b.client_port,b.application_name
from pg_locks a,pg_stat_activity b where a.pid=b.pid and a.granted
),
t_overlap as
(
select r.* from t_wait w join t_run r on
(
r.locktype is not distinct from w.locktype and
r.database is not distinct from w.database and
r.relation is not distinct from w.relation and
r.page is not distinct from w.page and
r.tuple is not distinct from w.tuple and
r.virtualxid is not distinct from w.virtualxid and
r.transactionid is not distinct from w.transactionid and
r.classid is not distinct from w.classid and
r.objid is not distinct from w.objid and
r.objsubid is not distinct from w.objsubid and
r.pid <> w.pid
)
),
t_unionall as
(
select r.* from t_overlap r
union all
select w.* from t_wait w
)
select locktype,datname,relation::regclass,page,tuple,virtualxid,transactionid::text,classid::regclass,objid,objsubid,
string_agg(
"Pid: "||case when pid is null then "NULL" else pid::text end||chr(10)||
"Lock_Granted: "||case when granted is null then "NULL" else granted::text end||" , Mode: "||case when mode is null then "NULL" else mode::text end||" , FastPath: "||case when fastpath is null then "NULL" else fastpath::text end||" , VirtualTransaction: "||case when virtualtransaction is null then "NULL" else virtualtransaction::text end||" , Session_State: "||case when state is null then "NULL" else state::text end||chr(10)||
"Username: "||case when usename is null then "NULL" else usename::text end||" , Database: "||case when datname is null then "NULL" else datname::text end||" , Client_Addr: "||case when client_addr is null then "NULL" else client_addr::text end||" , Client_Port: "||case when client_port is null then "NULL" else client_port::text end||" , Application_Name: "||case when application_name is null then "NULL" else application_name::text end||chr(10)||
"Xact_Start: "||case when xact_start is null then "NULL" else xact_start::text end||" , Query_Start: "||case when query_start is null then "NULL" else query_start::text end||" , Xact_Elapse: "||case when (now()-xact_start) is null then "NULL" else (now()-xact_start)::text end||" , Query_Elapse: "||case when (now()-query_start) is null then "NULL" else (now()-query_start)::text end||chr(10)||
"SQL (Current SQL in Transaction): "||chr(10)||
case when query is null then "NULL" else query::text end,
chr(10)||"--------"||chr(10)
order by
( case mode
when "INVALID" then 0
when "AccessShareLock" then 1
when "RowShareLock" then 2
when "RowExclusiveLock" then 3
when "ShareUpdateExclusiveLock" then 4
when "ShareLock" then 5
when "ShareRowExclusiveLock" then 6
when "ExclusiveLock" then 7
when "AccessExclusiveLock" then 8
else 0
end ) desc,
(case when granted then 0 else 1 end)
) as lock_conflict
from t_unionall
group by
locktype,datname,relation,page,tuple,virtualxid,transactionid::text,classid,objid,objsubid;
#如果觉得写SQL麻烦,可以将它创建为视图
create view v_locks_monitor as
with
t_wait as
(
select a.mode,a.locktype,a.database,a.relation,a.page,a.tuple,a.classid,a.granted,
a.objid,a.objsubid,a.pid,a.virtualtransaction,a.virtualxid,a.transactionid,a.fastpath,
b.state,b.query,b.xact_start,b.query_start,b.usename,b.datname,b.client_addr,b.client_port,b.application_name
from pg_locks a,pg_stat_activity b where a.pid=b.pid and not a.granted
),
t_run as
(
select a.mode,a.locktype,a.database,a.relation,a.page,a.tuple,a.classid,a.granted,
a.objid,a.objsubid,a.pid,a.virtualtransaction,a.virtualxid,a.transactionid,a.fastpath,
b.state,b.query,b.xact_start,b.query_start,b.usename,b.datname,b.client_addr,b.client_port,b.application_name
from pg_locks a,pg_stat_activity b where a.pid=b.pid and a.granted
),
t_overlap as
(
select r.* from t_wait w join t_run r on
(
r.locktype is not distinct from w.locktype and
r.database is not distinct from w.database and
r.relation is not distinct from w.relation and
r.page is not distinct from w.page and
r.tuple is not distinct from w.tuple and
r.virtualxid is not distinct from w.virtualxid and
r.transactionid is not distinct from w.transactionid and
r.classid is not distinct from w.classid and
r.objid is not distinct from w.objid and
r.objsubid is not distinct from w.objsubid and
r.pid <> w.pid
)
),
t_unionall as
(
select r.* from t_overlap r
union all
select w.* from t_wait w
)
select locktype,datname,relation::regclass,page,tuple,virtualxid,transactionid::text,classid::regclass,objid,objsubid,
string_agg(
"Pid: "||case when pid is null then "NULL" else pid::text end||chr(10)||
"Lock_Granted: "||case when granted is null then "NULL" else granted::text end||" , Mode: "||case when mode is null then "NULL" else mode::text end||" , FastPath: "||case when fastpath is null then "NULL" else fastpath::text end||" , VirtualTransaction: "||case when virtualtransaction is null then "NULL" else virtualtransaction::text end||" , Session_State: "||case when state is null then "NULL" else state::text end||chr(10)||
"Username: "||case when usename is null then "NULL" else usename::text end||" , Database: "||case when datname is null then "NULL" else datname::text end||" , Client_Addr: "||case when client_addr is null then "NULL" else client_addr::text end||" , Client_Port: "||case when client_port is null then "NULL" else client_port::text end||" , Application_Name: "||case when application_name is null then "NULL" else application_name::text end||chr(10)||
"Xact_Start: "||case when xact_start is null then "NULL" else xact_start::text end||" , Query_Start: "||case when query_start is null then "NULL" else query_start::text end||" , Xact_Elapse: "||case when (now()-xact_start) is null then "NULL" else (now()-xact_start)::text end||" , Query_Elapse: "||case when (now()-query_start) is null then "NULL" else (now()-query_start)::text end||chr(10)||
"SQL (Current SQL in Transaction): "||chr(10)||
case when query is null then "NULL" else query::text end,
chr(10)||"--------"||chr(10)
order by
( case mode
when "INVALID" then 0
when "AccessShareLock" then 1
when "RowShareLock" then 2
when "RowExclusiveLock" then 3
when "ShareUpdateExclusiveLock" then 4
when "ShareLock" then 5
when "ShareRowExclusiveLock" then 6
when "ExclusiveLock" then 7
when "AccessExclusiveLock" then 8
else 0
end ) desc,
(case when granted then 0 else 1 end)
) as lock_conflict
from t_unionall
group by
locktype,datname,relation,page,tuple,virtualxid,transactionid::text,classid,objid,objsubid ;
#eg:
create table table_lock(id int primary key, info text);
insert into table_lock values (1,"a");
#session A
begin;
update table_lock set info="aa" where id=1;
select * from table_lock;
#session B
begin;
select * from table_lock;
#session C
begin;
insert into table_lock values (2,"b");
#session D
begin;
truncate table_lock;
waiting......
#or
#ALTER TABLE XXX RENAME TO XXXXX;
#session E
select * from table_lock;
waiting......
#eg:
Pid: 1980
Lock_Granted: false , Mode: AccessExclusiveLock , FastPath: false , VirtualTransaction: 9/4 , Session_State: active
Username: test , Database: postgres , Client_Addr: NULL , Client_Port: -1 , Application_Name: psql
Xact_Start: 2019-02-11 15:35:33.054468+08 , Query_Start: 2019-02-11 15:35:34.283192+08 , Xact_Elapse: 00:01:18.422846 , Query_Elapse: 00:01:17.194122
SQL (Current SQL in Transaction):
truncate table_lock;
--------
Pid: 1894
Lock_Granted: true , Mode: RowExclusiveLock , FastPath: false , VirtualTransaction: 5/128 , Session_State: idle in transaction
Username: test , Database: postgres , Client_Addr: NULL , Client_Port: -1 , Application_Name: psql
Xact_Start: 2019-02-11 15:17:48.342793+08 , Query_Start: 2019-02-11 15:17:48.344543+08 , Xact_Elapse: 00:19:03.134521 , Query_Elapse: 00:19:03.132771
SQL (Current SQL in Transaction):
insert into table_lock values (2,"b");
--------
-
- 前面的锁查询SQL,已经清晰的显示了每一个发生了锁等待的对象,Lock_Granted: true阻塞了Lock_Granted: false
-
- 同时按锁的大小排序,第一行的锁最大(Mode: AccessExclusiveLock级别最高)
处理方法
- 确认会话状态 查看状态可用以下语句
select pid, state from pg_stat_activity;
# 查看当前会话的pid
highgo=# select pg_backend_pid();
#idle in transaction状态
or
如果他们想手动或定时杀idle的会话,可以用这个语句
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE
pid <> pg_backend_pid()
AND state in ("idle", "idle in transaction", "idle in transaction (aborted)", "disabled")
AND state_change < current_timestamp - INTERVAL "15" MINUTE;
- 注意会话的类型state in ("idle", "idle in transaction", "idle in transaction (aborted)", "disabled")
state text Current overall state of this backend. Possible values are: active: The backend is executing a query. idle: The backend is waiting for a new client command. idle in transaction: The backend is in a transaction, but is not currently executing a query. idle in transaction (aborted): This state is similar to idle in transaction, except one of the statements in the transaction caused an error. fastpath function call: The backend is executing a fast-path function. disabled: This state is reported if track_activities is disabled in this backend.
要快速解出这种状态,terminate最大的锁对应的PID即可。
select pg_terminate_backend(2066);
postgres=# select pg_terminate_backend(1980);
-[ RECORD 1 ]--------+--
pg_terminate_backend | t
- 注意:此处不要使用操作系统命令kill -9,其会造成所有活动进程被终止,数据库重启。