[20250103]使用递归实现distinct功能.txt
[20250103]使用递归实现distinct功能.txt
--//生产系统遇到实际上许多条类似语句,顺便拿其中几个出来,真心不知道开发如何学计算机的。
1.问题提出:
SYS@127.0.0.1:9106/xtdb/xtdb2> @ sql_id c29undaquszs6
-- SQL_ID = c29undaquszs6 come from shared pool
select distinct ritem from routine2;
SYS@127.0.0.1:9106/xtdb/xtdb2> @ sql_id fhz2xwcnx2uyh
-- SQL_ID = fhz2xwcnx2uyh come from shared pool
select distinct rtype from routine2;
SYS@127.0.0.1:9106/xtdb/xtdb2> @ seg2 nis5.routine2 ''
SEG_MB OWNER SEGMENT_NAME SEGMENT_TYPE SEG_TABLESPACE_NAME BLOCKS HDRFIL HDRBLK
------ ----- ------------ ------------ ------------------- ------ ------ ------
1088 NIS5 ROUTINE2 TABLE NIS 139264 21 843528
SYS@127.0.0.1:9106/xtdb/xtdb2> @ desczz nis5.routine2 ritem,rtype
eXtended describe of nis5.routine2
DISPLAY TABLE_NAME OF COLUMN_NAME INFORMATION.
INPUT OWNER.TABLE_NAME <filters>
SAMPLE : @ desczz TABLE_NAME column_name1,column_name2
IF NOT INPUT <filters> ,USE "" .
Owner Table_Name SAMPLE_SIZE LAST_ANALYZED Col# Column Name Null? Type NUM_DISTINCT Density NUM_NULLS HISTOGRAM NUM_BUCKETS Low_value High_value
----- ---------- ----------- ------------------- ---- ----------- ----- ------------ ------------ -------------- ---------- --------- ----------- -------------------- ----------------------
NIS5 ROUTINE2 4861544 2025-01-01 22:01:25 7 RTYPE VARCHAR2(40) 36 .00000010285 0 FREQUENCY 36 13种呼吸道病原体检测 真菌三项病原体核酸检测
4861544 2025-01-01 22:01:25 8 RITEM VARCHAR2(40) 32 .00000010285 0 FREQUENCY 32 13种呼吸道病原体检测 中性粒细胞
--//表ROUTINE2 1088M,而RTYPE,RITEM的不同值分布为36,32。要优化上面的sql语句分布建立对应索引简直就是资源浪费。
--//而且类似的sql语句还有一大堆,不想贴出来了,这样的项目简直就是豆腐渣中的豆腐渣工程。
--//上该公司网站,我真心不知道这样的产品竟然到处在买,简直就是垃圾,站在优化的角度又是一个豆腐渣工程。
--//要优化它,只能建立索引,还好NUM_NULLS=0,要给字段加not null约束,然后建立相应索引。
--//执行计划可以猜测出来走快速全索引扫描(需要另外排序),或者走全索引扫描(不需要排序).
--//我在想实际上每个值仅仅返回1行,是否可以通过递归实现类似功能,这样可以减少逻辑读,规避快速全索引扫描或者全索引扫描带
--//来的逻辑读,这样表索引应该也不小,即使选择压缩模式。
--//通过例子验证我的想法。
2.环境:
SCOTT@book> @ ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
create table t as select * from all_objects;
create index i_t_owner on t(owner) COMPRESS 1;
--//alter table t modify owner not null;这步不需要。
--//分析略。
3.测试:
SCOTT@book> @ sl all
alter session set statistics_level = all;
Session altered.
SCOTT@book> select distinct owner from t;
OWNER
------------------------------
OWBSYS_AUDIT
MDSYS
CTXSYS
FLOWS_FILES
HR
OLAPSYS
OUTLN
OWBSYS
PUBLIC
APEX_030200
EXFSYS
ORACLE_OCM
SCOTT
SYSTEM
DBSNMP
OE
ORDPLUGINS
ORDSYS
PM
SH
SYSMAN
APPQOSSYS
BI
IX
ORDDATA
XDB
SI_INFORMTN_SCHEMA
SYS
WMSYS
29 rows selected.
SCOTT@book> @ dpc '' '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID g3ywa5u5raj7d, child number 0
-------------------------------------
select distinct owner from t
Plan hash value: 4043955095
--------------------------------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
--------------------------------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 40 (100)| | 29 |00:00:00.04 | 138 | | | |
| 1 | HASH UNIQUE | | 1 | 29 | 174 | 40 (8)| 00:00:01 | 29 |00:00:00.04 | 138 | 5686K| 1858K| 1246K (0)|
| 2 | INDEX FAST FULL SCAN| I_T_OWNER | 1 | 84801 | 496K| 37 (0)| 00:00:01 | 84801 |00:00:00.02 | 138 | | | |
--------------------------------------------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1
2 - SEL$1 / T@SEL$1
--//oracle选择快速全扫描,然后hash unique排序,逻辑读138.
4.使用递归方式:
WITH t_rec (owner) AS (
SELECT MIN (owner) FROM t
UNION ALL
SELECT (SELECT MIN (owner) FROM t WHERE owner > c.owner) FROM t_rec c WHERE c.owner IS NOT NULL)
SELECT * FROM t_rec;
OWNER
------------------------------
APEX_030200
APPQOSSYS
BI
CTXSYS
DBSNMP
EXFSYS
FLOWS_FILES
HR
IX
MDSYS
OE
OLAPSYS
ORACLE_OCM
ORDDATA
ORDPLUGINS
ORDSYS
OUTLN
OWBSYS
OWBSYS_AUDIT
PM
PUBLIC
SCOTT
SH
SI_INFORMTN_SCHEMA
SYS
SYSMAN
SYSTEM
WMSYS
XDB
30 rows selected.
--//注意这样查询返回一个空行.
SCOTT@book> @ dpc '' '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID 46c0waacwrkh6, child number 0
-------------------------------------
WITH t_rec (owner) AS ( SELECT MIN (owner) FROM t UNION ALL SELECT
(SELECT MIN (owner) FROM t WHERE owner > c.owner) FROM t_rec c WHERE
c.owner IS NOT NULL) SELECT * FROM t_rec
Plan hash value: 3372523748
-------------------------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
-------------------------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 4 (100)| | 30 |00:00:00.01 | 35 |
| 1 | VIEW | | 1 | 2 | 34 | 4 (0)| 00:00:01 | 30 |00:00:00.01 | 35 |
| 2 | UNION ALL (RECURSIVE WITH) BREADTH FIRST| | 1 | | | | | 30 |00:00:00.01 | 35 |
| 3 | SORT AGGREGATE | | 1 | 1 | 6 | | | 1 |00:00:00.01 | 2 |
| 4 | INDEX FULL SCAN (MIN/MAX) | I_T_OWNER | 1 | 1 | 6 | 2 (0)| 00:00:01 | 1 |00:00:00.01 | 2 |
| 5 | SORT AGGREGATE | | 29 | 1 | 6 | | | 29 |00:00:00.01 | 33 |
| 6 | FIRST ROW
--//生产系统遇到实际上许多条类似语句,顺便拿其中几个出来,真心不知道开发如何学计算机的。
1.问题提出:
SYS@127.0.0.1:9106/xtdb/xtdb2> @ sql_id c29undaquszs6
-- SQL_ID = c29undaquszs6 come from shared pool
select distinct ritem from routine2;
SYS@127.0.0.1:9106/xtdb/xtdb2> @ sql_id fhz2xwcnx2uyh
-- SQL_ID = fhz2xwcnx2uyh come from shared pool
select distinct rtype from routine2;
SYS@127.0.0.1:9106/xtdb/xtdb2> @ seg2 nis5.routine2 ''
SEG_MB OWNER SEGMENT_NAME SEGMENT_TYPE SEG_TABLESPACE_NAME BLOCKS HDRFIL HDRBLK
------ ----- ------------ ------------ ------------------- ------ ------ ------
1088 NIS5 ROUTINE2 TABLE NIS 139264 21 843528
SYS@127.0.0.1:9106/xtdb/xtdb2> @ desczz nis5.routine2 ritem,rtype
eXtended describe of nis5.routine2
DISPLAY TABLE_NAME OF COLUMN_NAME INFORMATION.
INPUT OWNER.TABLE_NAME <filters>
SAMPLE : @ desczz TABLE_NAME column_name1,column_name2
IF NOT INPUT <filters> ,USE "" .
Owner Table_Name SAMPLE_SIZE LAST_ANALYZED Col# Column Name Null? Type NUM_DISTINCT Density NUM_NULLS HISTOGRAM NUM_BUCKETS Low_value High_value
----- ---------- ----------- ------------------- ---- ----------- ----- ------------ ------------ -------------- ---------- --------- ----------- -------------------- ----------------------
NIS5 ROUTINE2 4861544 2025-01-01 22:01:25 7 RTYPE VARCHAR2(40) 36 .00000010285 0 FREQUENCY 36 13种呼吸道病原体检测 真菌三项病原体核酸检测
4861544 2025-01-01 22:01:25 8 RITEM VARCHAR2(40) 32 .00000010285 0 FREQUENCY 32 13种呼吸道病原体检测 中性粒细胞
--//表ROUTINE2 1088M,而RTYPE,RITEM的不同值分布为36,32。要优化上面的sql语句分布建立对应索引简直就是资源浪费。
--//而且类似的sql语句还有一大堆,不想贴出来了,这样的项目简直就是豆腐渣中的豆腐渣工程。
--//上该公司网站,我真心不知道这样的产品竟然到处在买,简直就是垃圾,站在优化的角度又是一个豆腐渣工程。
--//要优化它,只能建立索引,还好NUM_NULLS=0,要给字段加not null约束,然后建立相应索引。
--//执行计划可以猜测出来走快速全索引扫描(需要另外排序),或者走全索引扫描(不需要排序).
--//我在想实际上每个值仅仅返回1行,是否可以通过递归实现类似功能,这样可以减少逻辑读,规避快速全索引扫描或者全索引扫描带
--//来的逻辑读,这样表索引应该也不小,即使选择压缩模式。
--//通过例子验证我的想法。
2.环境:
SCOTT@book> @ ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
create table t as select * from all_objects;
create index i_t_owner on t(owner) COMPRESS 1;
--//alter table t modify owner not null;这步不需要。
--//分析略。
3.测试:
SCOTT@book> @ sl all
alter session set statistics_level = all;
Session altered.
SCOTT@book> select distinct owner from t;
OWNER
------------------------------
OWBSYS_AUDIT
MDSYS
CTXSYS
FLOWS_FILES
HR
OLAPSYS
OUTLN
OWBSYS
PUBLIC
APEX_030200
EXFSYS
ORACLE_OCM
SCOTT
SYSTEM
DBSNMP
OE
ORDPLUGINS
ORDSYS
PM
SH
SYSMAN
APPQOSSYS
BI
IX
ORDDATA
XDB
SI_INFORMTN_SCHEMA
SYS
WMSYS
29 rows selected.
SCOTT@book> @ dpc '' '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID g3ywa5u5raj7d, child number 0
-------------------------------------
select distinct owner from t
Plan hash value: 4043955095
--------------------------------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
--------------------------------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 40 (100)| | 29 |00:00:00.04 | 138 | | | |
| 1 | HASH UNIQUE | | 1 | 29 | 174 | 40 (8)| 00:00:01 | 29 |00:00:00.04 | 138 | 5686K| 1858K| 1246K (0)|
| 2 | INDEX FAST FULL SCAN| I_T_OWNER | 1 | 84801 | 496K| 37 (0)| 00:00:01 | 84801 |00:00:00.02 | 138 | | | |
--------------------------------------------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1
2 - SEL$1 / T@SEL$1
--//oracle选择快速全扫描,然后hash unique排序,逻辑读138.
4.使用递归方式:
WITH t_rec (owner) AS (
SELECT MIN (owner) FROM t
UNION ALL
SELECT (SELECT MIN (owner) FROM t WHERE owner > c.owner) FROM t_rec c WHERE c.owner IS NOT NULL)
SELECT * FROM t_rec;
OWNER
------------------------------
APEX_030200
APPQOSSYS
BI
CTXSYS
DBSNMP
EXFSYS
FLOWS_FILES
HR
IX
MDSYS
OE
OLAPSYS
ORACLE_OCM
ORDDATA
ORDPLUGINS
ORDSYS
OUTLN
OWBSYS
OWBSYS_AUDIT
PM
PUBLIC
SCOTT
SH
SI_INFORMTN_SCHEMA
SYS
SYSMAN
SYSTEM
WMSYS
XDB
30 rows selected.
--//注意这样查询返回一个空行.
SCOTT@book> @ dpc '' '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID 46c0waacwrkh6, child number 0
-------------------------------------
WITH t_rec (owner) AS ( SELECT MIN (owner) FROM t UNION ALL SELECT
(SELECT MIN (owner) FROM t WHERE owner > c.owner) FROM t_rec c WHERE
c.owner IS NOT NULL) SELECT * FROM t_rec
Plan hash value: 3372523748
-------------------------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
-------------------------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 4 (100)| | 30 |00:00:00.01 | 35 |
| 1 | VIEW | | 1 | 2 | 34 | 4 (0)| 00:00:01 | 30 |00:00:00.01 | 35 |
| 2 | UNION ALL (RECURSIVE WITH) BREADTH FIRST| | 1 | | | | | 30 |00:00:00.01 | 35 |
| 3 | SORT AGGREGATE | | 1 | 1 | 6 | | | 1 |00:00:00.01 | 2 |
| 4 | INDEX FULL SCAN (MIN/MAX) | I_T_OWNER | 1 | 1 | 6 | 2 (0)| 00:00:01 | 1 |00:00:00.01 | 2 |
| 5 | SORT AGGREGATE | | 29 | 1 | 6 | | | 29 |00:00:00.01 | 33 |
| 6 | FIRST ROW