[20250103]使用递归实现distinct功能.txt

[20250103]使用递归实现distinct功能.txt

--//生产系统遇到实际上许多条类似语句,顺便拿其中几个出来,真心不知道开发如何学计算机的。

1.问题提出:
SYS@127.0.0.1:9106/xtdb/xtdb2> @ sql_id c29undaquszs6
-- SQL_ID = c29undaquszs6 come from shared pool
select distinct ritem from routine2;

SYS@127.0.0.1:9106/xtdb/xtdb2> @ sql_id fhz2xwcnx2uyh
-- SQL_ID = fhz2xwcnx2uyh come from shared pool
select distinct rtype from routine2;

SYS@127.0.0.1:9106/xtdb/xtdb2> @ seg2 nis5.routine2 ''
SEG_MB OWNER SEGMENT_NAME SEGMENT_TYPE SEG_TABLESPACE_NAME BLOCKS HDRFIL HDRBLK
------ ----- ------------ ------------ ------------------- ------ ------ ------
  1088 NIS5  ROUTINE2     TABLE        NIS                 139264     21 843528

SYS@127.0.0.1:9106/xtdb/xtdb2> @ desczz nis5.routine2 ritem,rtype
eXtended describe of nis5.routine2

DISPLAY TABLE_NAME OF COLUMN_NAME INFORMATION.
INPUT   OWNER.TABLE_NAME  <filters>
SAMPLE  : @ desczz TABLE_NAME column_name1,column_name2
IF NOT INPUT <filters> ,USE "" .
Owner Table_Name SAMPLE_SIZE LAST_ANALYZED       Col# Column Name Null? Type         NUM_DISTINCT        Density  NUM_NULLS HISTOGRAM NUM_BUCKETS Low_value            High_value
----- ---------- ----------- ------------------- ---- ----------- ----- ------------ ------------ -------------- ---------- --------- ----------- -------------------- ----------------------
NIS5  ROUTINE2       4861544 2025-01-01 22:01:25    7 RTYPE             VARCHAR2(40)           36   .00000010285          0 FREQUENCY          36 13种呼吸道病原体检测 真菌三项病原体核酸检测
                     4861544 2025-01-01 22:01:25    8 RITEM             VARCHAR2(40)           32   .00000010285          0 FREQUENCY          32 13种呼吸道病原体检测 中性粒细胞

--//表ROUTINE2 1088M,而RTYPE,RITEM的不同值分布为36,32。要优化上面的sql语句分布建立对应索引简直就是资源浪费。
--//而且类似的sql语句还有一大堆,不想贴出来了,这样的项目简直就是豆腐渣中的豆腐渣工程。
--//上该公司网站,我真心不知道这样的产品竟然到处在买,简直就是垃圾,站在优化的角度又是一个豆腐渣工程。

--//要优化它,只能建立索引,还好NUM_NULLS=0,要给字段加not null约束,然后建立相应索引。
--//执行计划可以猜测出来走快速全索引扫描(需要另外排序),或者走全索引扫描(不需要排序).
--//我在想实际上每个值仅仅返回1行,是否可以通过递归实现类似功能,这样可以减少逻辑读,规避快速全索引扫描或者全索引扫描带
--//来的逻辑读,这样表索引应该也不小,即使选择压缩模式。
--//通过例子验证我的想法。

2.环境:
SCOTT@book> @ ver1
PORT_STRING                    VERSION        BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx            11.2.0.4.0     Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production

create table t as select * from all_objects;
create index i_t_owner on t(owner) COMPRESS 1;
--//alter table t modify owner not null;这步不需要。
--//分析略。

3.测试:

SCOTT@book> @ sl all
alter session set statistics_level = all;
Session altered.

SCOTT@book> select distinct owner from t;
OWNER
------------------------------
OWBSYS_AUDIT
MDSYS
CTXSYS
FLOWS_FILES
HR
OLAPSYS
OUTLN
OWBSYS
PUBLIC
APEX_030200
EXFSYS
ORACLE_OCM
SCOTT
SYSTEM
DBSNMP
OE
ORDPLUGINS
ORDSYS
PM
SH
SYSMAN
APPQOSSYS
BI
IX
ORDDATA
XDB
SI_INFORMTN_SCHEMA
SYS
WMSYS
29 rows selected.

SCOTT@book> @ dpc '' '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID  g3ywa5u5raj7d, child number 0
-------------------------------------
select distinct owner from t
Plan hash value: 4043955095
--------------------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation             | Name      | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers |  OMem |  1Mem | Used-Mem |
--------------------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT      |           |      1 |        |       |    40 (100)|          |     29 |00:00:00.04 |     138 |       |       |          |
|   1 |  HASH UNIQUE          |           |      1 |     29 |   174 |    40   (8)| 00:00:01 |     29 |00:00:00.04 |     138 |  5686K|  1858K| 1246K (0)|
|   2 |   INDEX FAST FULL SCAN| I_T_OWNER |      1 |  84801 |   496K|    37   (0)| 00:00:01 |  84801 |00:00:00.02 |     138 |       |       |          |
--------------------------------------------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1
   2 - SEL$1 / T@SEL$1
--//oracle选择快速全扫描,然后hash unique排序,逻辑读138.

4.使用递归方式:
WITH t_rec (owner) AS (
SELECT MIN (owner) FROM t
UNION ALL
SELECT (SELECT MIN (owner) FROM t WHERE owner > c.owner) FROM t_rec c WHERE c.owner IS NOT NULL)
SELECT * FROM t_rec;

OWNER
------------------------------
APEX_030200
APPQOSSYS
BI
CTXSYS
DBSNMP
EXFSYS
FLOWS_FILES
HR
IX
MDSYS
OE
OLAPSYS
ORACLE_OCM
ORDDATA
ORDPLUGINS
ORDSYS
OUTLN
OWBSYS
OWBSYS_AUDIT
PM
PUBLIC
SCOTT
SH
SI_INFORMTN_SCHEMA
SYS
SYSMAN
SYSTEM
WMSYS
XDB


30 rows selected.
--//注意这样查询返回一个空行.

SCOTT@book> @ dpc '' '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID  46c0waacwrkh6, child number 0
-------------------------------------
WITH t_rec (owner) AS ( SELECT MIN (owner) FROM t UNION ALL SELECT
(SELECT MIN (owner) FROM t WHERE owner > c.owner) FROM t_rec c WHERE
c.owner IS NOT NULL) SELECT * FROM t_rec
Plan hash value: 3372523748
-------------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                                 | Name      | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers |
-------------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                          |           |      1 |        |       |     4 (100)|          |     30 |00:00:00.01 |      35 |
|   1 |  VIEW                                     |           |      1 |      2 |    34 |     4   (0)| 00:00:01 |     30 |00:00:00.01 |      35 |
|   2 |   UNION ALL (RECURSIVE WITH) BREADTH FIRST|           |      1 |        |       |            |          |     30 |00:00:00.01 |      35 |
|   3 |    SORT AGGREGATE                         |           |      1 |      1 |     6 |            |          |      1 |00:00:00.01 |       2 |
|   4 |     INDEX FULL SCAN (MIN/MAX)             | I_T_OWNER |      1 |      1 |     6 |     2   (0)| 00:00:01 |      1 |00:00:00.01 |       2 |
|   5 |    SORT AGGREGATE                         |           |     29 |      1 |     6 |            |          |     29 |00:00:00.01 |      33 |
|   6 |     FIRST ROW