目录
在学习 MySQL 正则表达式时可能会发现,有些常用功能并未提供。最典型的两个是:返回匹配项数目;一次返回所有匹配项。但我们可以创建自定义函数,在其中使用递归查询来实现。
一、regexp_count
drop function if exists regexp_count;delimiter //create function regexp_count(x text,r text, match_type varchar(5)) returns bigint reads sql data deterministicbegin set @ret=0; with recursive cte as (select n, regexp_substr(x,r,1,t.n,match_type) b from (select 1 n) t union all select n+1, regexp_substr(x,r,1, n + 1,match_type) from cte where b is not null) select count(*) into @ret from cte where b is not null; return @ret;end;//delimiter ;
二、regexp_extract
drop function if exists regexp_extract;delimiter //create function regexp_extract(x text,r text, match_type varchar(5)) returns text charset utf8mb4 reads sql data deterministicbegin set @ret=''; with recursive cte as (select n, regexp_substr(x,r,1,t.n,match_type) b from (select 1 n) t union all select n+1, regexp_substr(x,r,1, n + 1,match_type) from cte where b is not null) select * into @ret from (select convert(group_concat(b) using utf8mb4) a from cte) t where a is not null; return @ret;end;//delimiter ;
三、regexp_extract_index
drop function if exists regexp_extract_index;delimiter //create function regexp_extract_index(x text,r text, return_option int, match_type varchar(5)) returns text charset utf8mb4 reads sql data deterministicbegin set @ret=''; with recursive cte as (select n, regexp_instr(x,r,1,t.n,return_option,match_type) b from (select 1 n) t union all select n+1, regexp_instr(x,r,1, n + 1,return_option,match_type) from cte where b > 0) select * into @ret from (select convert(group_concat(b) using utf8mb4) a from cte where b >0) t; return @ret;end;//delimiter ;
四、测试
1. 测试数据
drop table if exists t_regexp;create table t_regexp(a text);insert into t_regexp values ('THE RIME OF THE ANCYENT MARINERE, IN SEVEN PARTS.ARGUMENT.How a Ship having passed the Line was driven by Storms to the cold Countrytowards the South Pole; and how from thence she made her course to the tropicalLatitude of the Great Pacific Ocean; and of the strange things that befell;and in what manner the Ancyent Marinere came back to his own Country.I.1 It is an ancyent Marinere,2 And he stoppeth one of three:3 "By thy long grey beard and thy glittering eye4 "Now wherefore stoppest me?'); insert into t_regexp values ('THE RIME OF THE ANCYENT MARINERE, IN SEVEN PARTS.'),('ARGUMENT.'),('How a Ship having passed the Line was driven by Storms to the cold Country'),('towards the South Pole; and how from thence she made her course to the tropical'),('Latitude of the Great Pacific Ocean; and of the strange things that befell;'),('and in what manner the Ancyent Marinere came back to his own Country.'),('I.'),('1 It is an ancyent Marinere,'),('2 And he stoppeth one of three:'),('3 "By thy long grey beard and thy glittering eye'),('4 "Now wherefore stoppest me?');
2. 测试结果
mysql> -- 匹配单词the,统计出现的次数mysql> select regexp_extract(a,'\\bthe\\b','') a,regexp_count(a,'\\bthe\\b','') c from t_regexp;+-------------------------------------+------+| a | c |+-------------------------------------+------+| THE,THE,the,the,the,the,the,the,the | 9 || THE,THE | 2 || | 0 || the,the | 2 || the,the | 2 || the,the | 2 || the | 1 || | 0 || | 0 || | 0 || | 0 || | 0 |+-------------------------------------+------+12 rows in set (0.00 sec)mysql> -- 匹配小写单词the,统计出现的次数mysql> select regexp_extract(a,'\\bthe\\b','c') a,regexp_count(a,'\\bthe\\b','c') c from t_regexp;+-----------------------------+------+| a | c |+-----------------------------+------+| the,the,the,the,the,the,the | 7 || | 0 || | 0 || the,the | 2 || the,the | 2 || the,the | 2 || the | 1 || | 0 || | 0 || | 0 || | 0 || | 0 |+-----------------------------+------+12 rows in set (0.00 sec)mysql> -- 多行模式匹配的所有单词,统计单词个数mysql> select regexp_extract(a,'\\w+','') a,regexp_count(a,'\\w+','m') c from t_regexp;+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+| a | c |+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+| THE,RIME,OF,THE,ANCYENT,MARINERE,IN,SEVEN,PARTS,ARGUMENT,How,a,Ship,having,passed,the,Line,was,driven,by,Storms,to,the,cold,Country,towards,the,South,Pole,and,how,from,thence,she,made,her,course,to,the,tropical,Latitude,of,the,Great,Pacific,Ocean,and,of,the,strange,things,that,befell,and,in,what,manner,the,Ancyent,Marinere,came,back,to,his,own,Country,I,1,It,is,an,ancyent,Marinere,2,And,he,stoppeth,one,of,three,3,By,thy,long,grey,beard,and,thy,glittering,eye,4,Now,wherefore,stoppest,me | 95 || THE,RIME,OF,THE,ANCYENT,MARINERE,IN,SEVEN,PARTS | 9 || ARGUMENT | 1 || How,a,Ship,having,passed,the,Line,was,driven,by,Storms,to,the,cold,Country | 15 || towards,the,South,Pole,and,how,from,thence,she,made,her,course,to,the,tropical | 15 || Latitude,of,the,Great,Pacific,Ocean,and,of,the,strange,things,that,befell | 13 || and,in,what,manner,the,Ancyent,Marinere,came,back,to,his,own,Country | 13 || I | 1 || 1,It,is,an,ancyent,Marinere | 6 || 2,And,he,stoppeth,one,of,three | 7 || 3,By,thy,long,grey,beard,and,thy,glittering,eye | 10 || 4,Now,wherefore,stoppest,me | 5 |+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+12 rows in set (0.01 sec)mysql> -- 区分大小写、多行、dotall模式匹配999,统计出现的次数mysql> select regexp_extract(a,'999','') a,regexp_count(a,'999','cmn') c from t_regexp;+------+------+| a | c |+------+------+| | 0 || | 0 || | 0 || | 0 || | 0 || | 0 || | 0 || | 0 || | 0 || | 0 || | 0 || | 0 |+------+------+12 rows in set (0.00 sec)mysql> -- 返回单词the出现的位置mysql> select regexp_extract_index(a,'\\bthe\\b',0,'') a from t_regexp;+---------------------------------+| a |+---------------------------------+| 1,13,86,119,144,203,228,260,311 || 1,13|| NULL|| 26,59 || 9,68|| 13,45 || 20 || NULL|| NULL|| NULL|| NULL|| NULL|+---------------------------------+12 rows in set (0.00 sec)mysql> -- 返回小写单词the出现的位置(后)mysql> select regexp_extract_index(a,'\\bthe\\b',1,'c') a from t_regexp;+----------------------------+| a |+----------------------------+| 89,122,147,206,231,263,314 || NULL || NULL || 29,62 || 12,71 || 16,48 || 23 || NULL || NULL || NULL || NULL || NULL |+----------------------------+12 rows in set (0.00 sec)mysql> -- 多行模式匹配,返回所有单词出现的位置mysql> select regexp_extract_index(a,'\\w+',0,'m') a from t_regexp;+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+| a |+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+| 1,5,10,13,17,25,35,38,44,51,61,65,67,72,79,86,90,95,99,106,109,116,119,123,128,136,144,148,154,160,164,168,173,180,184,189,193,200,203,207,216,225,228,232,238,246,253,257,260,264,272,279,284,292,296,299,304,311,315,323,332,337,342,345,349,353,362,365,373,376,379,382,390,400,411,415,418,427,431,434,441,450,453,457,462,467,473,477,481,492,496,508,512,522,531 || 1,5,10,13,17,25,35,38,44 || 1 || 1,5,7,12,19,26,30,35,39,46,49,56,59,63,68 || 1,9,13,19,25,29,33,38,45,49,54,58,65,68,72 || 1,10,13,17,23,31,38,42,45,49,57,64,69 || 1,5,8,13,20,24,32,41,46,51,54,58,62 || 1 || 1,9,12,15,18,26 || 1,12,16,19,28,32,35 || 1,10,13,17,22,27,33,37,41,52 || 1,13,17,27,36 |+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+12 rows in set (0.00 sec)mysql> -- 区分大小写、多行、dotall模式匹配,返回999出现的位置mysql> select regexp_extract_index(a,'999',1,'cmn') a from t_regexp;+------+| a |+------+| NULL || NULL || NULL || NULL || NULL || NULL || NULL || NULL || NULL || NULL || NULL || NULL |+------+12 rows in set (0.00 sec)
来源地址:https://blog.csdn.net/wzy0623/article/details/130792837