相同SQL在不同實例結果竟然不同,你知道嗎?
1 問題背景
這是一個遺留的老庫,4節點12.2的RAC。我們每天都會通過EM對數據庫所有PDB的表空間使用量進行巡檢,針對使用率較高的表空間將和業務方進行溝通并擴容。但是最近發現一個非常奇怪的現象,即根據表空間當前使用數據量和數據文件自動增長的最大值比值得出的已用空間使用率(Available Space Used(%))沒有產生變化了,但是其余的值比如已用的分配空間占用率(Allocated Space Used (%))、分配大小(Allocated Size (GB))、已用空間(Space Used(GB))、數據文件數量(Datafiles)等其他數據卻又是變化的,且可以通過這些數值又可以人工算出正確的已用空間使用率結果:而奇怪的是,進入PDB中又會發現已用用空間使用率結果是正確的。
2 前期排查
其實這個頁面的后臺語句。
select * from (
WITH df AS (
SELECT
con_id,
tablespace_name,
SUM(bytes) bytes,
COUNT(*) cnt,
DECODE(SUM(DECODE(autoextensible, 'NO', 0, 1)), 0, 'NO', 'YES') autoext
FROM
cdb_data_files
GROUPBY
con_id,
tablespace_name),
um AS (SELECT con_id, tablespace_name, used_space ub, used_percent FROM cdb_tablespace_usage_metrics),
pdb AS (SELECT con_id, NAMEFROM v$containers) SELECT
p.NAME,
d.tablespace_name,
TO_CHAR (u.used_percent, '99999990.00'),
NVL ((a.bytes - NVL (f.bytes, 0)) / a.bytes * 100, 0),
a.autoext,
NVL (a.bytes, 0) / 1024 / 1024 / 1024,
NVL (a.bytes - NVL (f.bytes, 0), 0) / 1024 / 1024 / 1024,
NVL (f.bytes, 0) / 1024 / 1024 / 1024,
d.STATUS,
a.cnt,
d.contents,
d.extent_management,
d.segment_space_management
FROM
cdb_tablespaces d,
df a,
um u,
pdb p,
(SELECT CON_ID, tablespace_name, SUM(bytes) bytesFROM cdb_free_space GROUPBY CON_ID, tablespace_name) f
WHERE
d.tablespace_name = a.tablespace_name (+)
AND d.tablespace_name = f.tablespace_name (+)
AND d.tablespace_name = u.tablespace_name (+)
ANDNOT d.contents = 'UNDO'
ANDNOT (d.extent_management = 'LOCAL'AND d.contents = 'TEMPORARY')
AND p.con_id = d.con_id
AND p.con_id = u.con_id (+)
AND p.con_id = a.con_id (+)
AND p.con_id = f.con_id (+) UNIONALL
SELECT p.NAME,
d.tablespace_name,
TO_CHAR (u.used_percent, '99999990.00'),
NVL ((u.ub * d.block_size) / tf.bytes * 100, 0),
tf.autoext,
NVL (tf.bytes, 0) / 1024 / 1024 / 1024,
NVL (u.ub * d.block_size, 0) / 1024 / 1024 / 1024,
(NVL (tf.bytes, 0) - NVL (u.ub * d.block_size, 0)) / 1024 / 1024 / 1024,
d.STATUS,
tf.cnt,
d.contents,
d.extent_management,
d.segment_space_management
FROM
cdb_tablespaces d,
um u,
pdb p,
(
SELECT
con_id,
tablespace_name,
SUM(bytes) bytes,
COUNT(*) cnt,
DECODE(SUM(DECODE(autoextensible, 'NO', 0, 1)), 0, 'NO', 'YES') autoext
FROM
cdb_temp_files
GROUPBY
con_id,
tablespace_name) tf
WHERE
d.tablespace_name = tf.tablespace_name (+)
AND d.tablespace_name = u.tablespace_name (+)
AND d.extent_management = 'LOCAL'
AND d.contents = 'TEMPORARY'
AND p.con_id = d.con_id
AND p.con_id = u.con_id (+)
AND p.con_id = tf.con_id (+) UNIONALL
SELECT
p.NAME,
d.tablespace_name,
TO_CHAR (u.used_percent, '99999990.00'),
NVL ((u.ub * d.block_size) / a.bytes * 100, 0),
a.autoext,
NVL (a.bytes, 0) / 1024 / 1024 / 1024,
NVL (u.ub * d.block_size, 0) / 1024 / 1024 / 1024,
(NVL (a.bytes, 0) - NVL (u.ub * d.block_size, 0)) / 1024 / 1024 / 1024,
d.STATUS,
a.cnt,
d.contents,
d.extent_management,
d.segment_space_management
FROM
cdb_tablespaces d,
df a,
um u,
pdb p
WHERE
d.tablespace_name = a.tablespace_name (+)
AND d.tablespace_name = u.tablespace_name (+)
AND d.contents = 'UNDO'
AND p.con_id = d.con_id
AND p.con_id = u.con_id (+)
AND p.con_id = a.con_id (+)
) orderby3;
這里在外面嵌套了一層用于排序。因為計算結果沒有更新,一開始的排查方向是EM的緩存沒有清理,但是對EM的各項設置進行檢查后,并沒有發現相關問題。隨即在各個節點上執行該SQL,發現在節點1上執行結果有問題,在EM上將表空間查詢操作指定到其他實例結果也是正確的。隨即又開了個和數據庫相關的SR。
3 深入排查
在數據庫SR的指引下,收集了SQLHC的相關診斷信息,然后給了一大堆hint:
select
/*+
BEGIN_OUTLINE_DATA
IGNORE_OPTIM_EMBEDDED_HINTS
OPTIMIZER_FEATURES_ENABLE('12.2.0.1')
DB_VERSION('12.2.0.1')
ALL_ROWS
OUTLINE_LEAF(@"SEL$07BDC5B4")
MERGE(@"SEL$4" >"SEL$3")
OUTLINE_LEAF(@"SEL$ABDE6DFF")
MERGE(@"SEL$6" >"SEL$5")
OUTLINE_LEAF(@"SEL$DFD66ADD")
MERGE(@"SEL$CF5359D5" >"SEL$7")
OUTLINE_LEAF(@"SEL$22C746FF")
OUTLINE_LEAF(@"SEL$513E9771")
OUTLINE_LEAF(@"SEL$522E92D8")
OUTLINE_LEAF(@"SEL$42DFC41A")
MERGE(@"SEL$12" >"SEL$11")
OUTLINE_LEAF(@"SEL$1F78930A")
MERGE(@"SEL$10" >"SEL$2")
OUTLINE_LEAF(@"SEL$513E9770")
OUTLINE_LEAF(@"SEL$522E92D7")
OUTLINE_LEAF(@"SEL$29F99543")
MERGE(@"SEL$16" >"SEL$15")
OUTLINE_LEAF(@"SEL$1CF66C63")
MERGE(@"SEL$14" >"SEL$13")
OUTLINE_LEAF(@"SEL$22C746FE")
MATERIALIZE(@"SEL$07BDC5B4")
OUTLINE_LEAF(@"SEL$513E976F")
MATERIALIZE(@"SEL$ABDE6DFF")
OUTLINE_LEAF(@"SEL$522E92D6")
MATERIALIZE(@"SEL$DFD66ADD")
OUTLINE_LEAF(@"SEL$DC4B4145")
MERGE(@"SEL$18" >"SEL$17")
OUTLINE_LEAF(@"SET$1")
OUTLINE_LEAF(@"SEL$1")
OUTLINE(@"SEL$3")
OUTLINE(@"SEL$4")
OUTLINE(@"SEL$5")
OUTLINE(@"SEL$6")
OUTLINE(@"SEL$7")
OUTLINE(@"SEL$CF5359D5")
MERGE(@"SEL$9" >"SEL$8")
OUTLINE(@"SEL$07BDC5B4")
MERGE(@"SEL$4" >"SEL$3")
OUTLINE(@"SEL$ABDE6DFF")
MERGE(@"SEL$6" >"SEL$5")
OUTLINE(@"SEL$DFD66ADD")
MERGE(@"SEL$CF5359D5" >"SEL$7")
OUTLINE(@"SEL$11")
OUTLINE(@"SEL$12")
OUTLINE(@"SEL$2")
OUTLINE(@"SEL$10")
OUTLINE(@"SEL$15")
OUTLINE(@"SEL$16")
OUTLINE(@"SEL$13")
OUTLINE(@"SEL$14")
OUTLINE(@"SEL$17")
OUTLINE(@"SEL$18")
OUTLINE(@"SEL$8")
OUTLINE(@"SEL$9")
NO_ACCESS(@"SEL$1" "from$_subquery$_001"@"SEL$1")
PQ_CONCURRENT_UNION(@"SET$1")
NO_ACCESS(@"SEL$DC4B4145" "P"@"SEL$17")
FULL(@"SEL$DC4B4145" "K"@"SEL$18")
NO_ACCESS(@"SEL$DC4B4145" "A"@"SEL$17")
NO_ACCESS(@"SEL$DC4B4145" "U"@"SEL$17")
LEADING(@"SEL$DC4B4145" "P"@"SEL$17" "K"@"SEL$18" "A"@"SEL$17" "U"@"SEL$17")
USE_HASH(@"SEL$DC4B4145" "K"@"SEL$18")
USE_HASH(@"SEL$DC4B4145" "A"@"SEL$17")
USE_HASH(@"SEL$DC4B4145" "U"@"SEL$17")
PQ_DISTRIBUTE(@"SEL$DC4B4145" "K"@"SEL$18" HASH HASH)
PQ_DISTRIBUTE(@"SEL$DC4B4145" "A"@"SEL$17" HASH HASH)
PQ_DISTRIBUTE(@"SEL$DC4B4145" "U"@"SEL$17" HASH HASH)
NO_ACCESS(@"SEL$1CF66C63" "P"@"SEL$13")
FULL(@"SEL$1CF66C63" "K"@"SEL$14")
NO_ACCESS(@"SEL$1CF66C63" "U"@"SEL$13")
NO_ACCESS(@"SEL$1CF66C63" "TF"@"SEL$13")
LEADING(@"SEL$1CF66C63" "P"@"SEL$13" "K"@"SEL$14" "U"@"SEL$13" "TF"@"SEL$13")
USE_HASH(@"SEL$1CF66C63" "K"@"SEL$14")
USE_HASH(@"SEL$1CF66C63" "U"@"SEL$13")
USE_HASH(@"SEL$1CF66C63" "TF"@"SEL$13")
PQ_DISTRIBUTE(@"SEL$1CF66C63" "K"@"SEL$14" HASH HASH)
PQ_DISTRIBUTE(@"SEL$1CF66C63" "U"@"SEL$13" HASH HASH)
PQ_DISTRIBUTE(@"SEL$1CF66C63" "TF"@"SEL$13" HASH HASH)
PX_JOIN_FILTER(@"SEL$1CF66C63" "TF"@"SEL$13")
NO_ACCESS(@"SEL$1F78930A" "P"@"SEL$2")
FULL(@"SEL$1F78930A" "K"@"SEL$10")
NO_ACCESS(@"SEL$1F78930A" "A"@"SEL$2")
NO_ACCESS(@"SEL$1F78930A" "U"@"SEL$2")
NO_ACCESS(@"SEL$1F78930A" "F"@"SEL$2")
LEADING(@"SEL$1F78930A" "P"@"SEL$2" "K"@"SEL$10" "A"@"SEL$2" "U"@"SEL$2" "F"@"SEL$2")
USE_HASH(@"SEL$1F78930A" "K"@"SEL$10")
USE_HASH(@"SEL$1F78930A" "A"@"SEL$2")
USE_HASH(@"SEL$1F78930A" "U"@"SEL$2")
USE_HASH(@"SEL$1F78930A" "F"@"SEL$2")
PQ_DISTRIBUTE(@"SEL$1F78930A" "K"@"SEL$10" HASH HASH)
PQ_DISTRIBUTE(@"SEL$1F78930A" "A"@"SEL$2" HASH HASH)
PQ_DISTRIBUTE(@"SEL$1F78930A" "U"@"SEL$2" HASH HASH)
PQ_DISTRIBUTE(@"SEL$1F78930A" "F"@"SEL$2" HASH HASH)
PX_JOIN_FILTER(@"SEL$1F78930A" "F"@"SEL$2")
FULL(@"SEL$522E92D8" "T1"@"SEL$522E92D8")
FULL(@"SEL$513E9771" "T1"@"SEL$513E9771")
FULL(@"SEL$22C746FF" "T1"@"SEL$22C746FF")
FULL(@"SEL$42DFC41A" "K"@"SEL$12")
GBY_PUSHDOWN(@"SEL$42DFC41A")
USE_HASH_AGGREGATION(@"SEL$42DFC41A")
FULL(@"SEL$522E92D7" "T1"@"SEL$522E92D7")
FULL(@"SEL$513E9770" "T1"@"SEL$513E9770")
FULL(@"SEL$29F99543" "K"@"SEL$16")
GBY_PUSHDOWN(@"SEL$29F99543")
USE_HASH_AGGREGATION(@"SEL$29F99543")
FULL(@"SEL$522E92D6" "T1"@"SEL$522E92D6")
FULL(@"SEL$513E976F" "T1"@"SEL$513E976F")
FULL(@"SEL$22C746FE" "T1"@"SEL$22C746FE")
FULL(@"SEL$DFD66ADD" "X$CON"@"SEL$9")
FULL(@"SEL$ABDE6DFF" "K"@"SEL$6")
FULL(@"SEL$07BDC5B4" "K"@"SEL$4")
USE_HASH_AGGREGATION(@"SEL$07BDC5B4")
USE_PARTITION_WISE_GBY(@"SEL$07BDC5B4")
END_OUTLINE_DATA
*/
* from (
WITH df AS (
SELECT
...
對比不同節點的SQL的實際執行計劃的Outline信息(這里不做展示),這里是通過hint的方式SQL執行將指向到正確的執行計劃,輸出結果也回歸正常。SR的回復則是:
實例 1 hard parse 出來的執行計劃產生了錯誤結果,這個執行計劃可能是一個不正確的執行計劃。一個錯誤的執行計劃有可能產生錯誤的記錄條數,也可能產生正確的結果條數,但是每個記錄中的 sum / count 數據項卻不正確。這都是錯誤執行計劃可能導致的結果。
4 嘗試解決
既然執行計劃有誤,SQL PLAN會緩存在Shared Pool中,那么是不是可以通過清理Shared Pool的執行計劃緩存來解決這一問題呢:
-- 查詢語句的相關信息
SELECT sql_text, plan_hash_value, address, hash_value
FROM v$sqlarea
WHERE sql_id = '1fr0p0hnav1bq';
-- 清理執行計劃緩存
-- EXEC DBMS_SHARED_POOL.PURGE('ADDRESS,HASH_VALUE', 'C');
EXEC DBMS_SHARED_POOL.PURGE('0000000A32100428,682460534', 'C');
再次查詢,結果恢復正常:目前得到的消息,這一現象僅會出現在對系統視圖、元數據的復雜查詢中。將對應幾條語句的執行計劃緩存都清理過后,直接執行語句沒問題了,但EM顯示還是有點問題,相關問題還得繼續處理。
總結
這是一個比較奇怪的從EM中發現的現象,目前已解決了數據庫層面的問題。