create table bricks (
brick_id integer not null primary key,
colour_rgb_value varchar2(10) not null,
shape varchar2(10) not null,
weight integer not null
);
create table colours (
colour_rgb_value varchar2(10) not null,
colour_name varchar2(10) not null
);
insert into bricks
select rownum,
case mod ( level, 3 )
when 0 then 'FF0000'
when 1 then '00FF00'
when 2 then '0000FF'
end,
case mod ( level, 3 )
when 0 then 'cylinder'
when 1 then 'cube'
when 2 then 'pyramid'
end,
floor ( 100 / rownum )
from dual
connect by level <= 100;
insert into bricks
select rownum + 1000,
case mod ( level, 3 )
when 0 then 'FF0000'
when 1 then '00FF00'
when 2 then '0000FF'
end,
case mod ( level, 3 )
when 0 then 'cylinder'
when 1 then 'cube'
when 2 then 'pyramid'
end,
floor ( 200 / rownum )
from dual
connect by level <= 200;
总共300行记录
declare
stats dbms_stats.statrec;
distcnt number;
density number;
nullcnt number;
avgclen number;
begin
dbms_stats.gather_table_stats ( null, 'colours' );
dbms_stats.gather_table_stats ( null, 'bricks' );
dbms_stats.set_table_stats ( null, 'bricks', numrows => 30 );
dbms_stats.set_table_stats ( null, 'colours', numrows => 3000 );
dbms_stats.get_column_stats ( null, 'colours', 'colour_rgb_value',
distcnt => distcnt,
density => density,
nullcnt => nullcnt,
avgclen => avgclen,
srec => stats
);
stats.minval := utl_raw.cast_to_raw ( '0000FF' );
stats.maxval := utl_raw.cast_to_raw ( 'FF0000' );
dbms_stats.set_column_stats ( null, 'colours', 'colour_rgb_value', distcnt => 10, srec => stats );
dbms_stats.set_column_stats ( null, 'bricks', 'colour_rgb_value', distcnt => 10, srec => stats );
end;
/
查看表统计信息
表中的总行数
每列中不同(不同)值的数量
每列的最高值和最低值
select ut.table_name, ut.num_rows,
utcs.column_name, utcs.num_distinct,
case utc.data_type
when 'VARCHAR2' then
utl_raw.cast_to_varchar2 ( utcs.low_value )
when 'NUMBER' then
to_char ( utl_raw.cast_to_number ( utcs.low_value ) )
end low_val,
case utc.data_type
when 'VARCHAR2' then
utl_raw.cast_to_varchar2 ( utcs.high_value )
when 'NUMBER' then
to_char ( utl_raw.cast_to_number ( utcs.high_value ) )
end high_val
from user_tables ut
join user_tab_cols utc
on ut.table_name = utc.table_name
join user_tab_col_statistics utcs
on ut.table_name = utcs.table_name
and utc.column_name = utcs.column_name
order by ut.table_name, utcs.column_name;
注意:高低值列是二进制格式。您需要转换它们以查看值。
select count ( distinct b.colour_rgb_value ) , count (*)
from bricks b;
select count ( distinct c.colour_rgb_value ) , count (*)
from colours c
select /*+ gather_plan_statistics */c.colour_name, count (*)
from bricks b
join colours c
on c.colour_rgb_value = b.colour_rgb_value
group by c.colour_name;
select * from table(dbms_xplan.display_cursor(format => 'ROWSTATS LAST'));
查询首先读取 BRICKS,它返回 300 行。而第二个表 COLOURS 只返回三个。表应该在连接中相反!
收集统计数据
要更新表的统计信息,请调用 DBMS_stats 中的 GATHER_TABLE_STATS 例程:
exec dbms_stats.gather_table_stats ( null, 'colours' ) ;
如果您插入、更新或删除表中超过 10% 的行,则统计数据与真实数据之间可能存在很大的不匹配。您知道该过程对数据进行了重大更改。因此,值得立即收集统计信息,而不是等待后台作业。
默认情况下,优化器会收集超过 10% 的行已更改的表的统计信息。在具有数十亿行的表上,这意味着在优化器重新收集统计信息之前需要更改 1 亿行!
更改数据收集
您可以通过降低收集阈值使数据库更快地开始收集。通过设置表格首选项来做到这一点。您可以使用 DBMS_stats 查看和设置这些。
这将颜色表的更改阈值(陈旧百分比)设置为 1%:
select dbms_stats.get_prefs ( 'STALE_PERCENT', null, 'colours' ) from dual;
exec dbms_stats.set_table_prefs ( null, 'colours', 'STALE_PERCENT', 1 );
select dbms_stats.get_prefs ( 'STALE_PERCENT', null, 'colours' ) from dual;
要强制优化器立即检查新计划的查询,请在调用 gather stats 时将 NO_INVALIDATE 设置为 FALSE:
exec dbms_stats.gather_table_stats ( null, 'colours', no_invalidate => false ) ;
数据倾斜
select /*+ gather_plan_statistics */count (*)
from bricks
where weight = 1;
select * from table(dbms_xplan.display_cursor(format => 'ROWSTATS LAST'));
价值偏斜
WEIGHT列为数字1一共多少个
select weight, count (*)
from bricks
group by weight
order by weight
砖块表中有 300 行。以及 27 个独特的值。所以这些优化器估计 (300 / 27) ~ 11 行用于这两个查询。但是第一个返回 150 行,第二个返回一个!
select /*+ gather_plan_statistics */count (*) from bricks
where weight = 1;
select * from table(dbms_xplan.display_cursor(format => 'ROWSTATS LAST'));
select /*+ gather_plan_statistics */count (*) from bricks
where weight = 200;
select * from table(dbms_xplan.display_cursor(format => 'ROWSTATS LAST'));
范围偏斜
您还可以在值的分布中出现偏差。例如,BRICK_ID 从 101 到 999 之间存在间隙:
with rws as (
select level r from dual
connect by level <= 15
)
select r, count ( brick_id )
from rws
left join bricks
on ceil ( brick_id / 100 ) = r
group by r
order by r;
同样,最初数据库假定值在 BRICK_ID 的最小值和最大值之间均匀分布。所以它估计这些查询的行数相同。即使第一个和最后一个返回 100 行和中间的零行:
select /*+ gather_plan_statistics */count (*) from bricks
where brick_id between 0 and 100;
select /*+ gather_plan_statistics */count (*) from bricks
where brick_id between 400 and 500;
select /*+ gather_plan_statistics */count (*) from bricks
where brick_id between 1000 and 1100;
收集直方图
select utcs.column_name, utcs.histogram, utcs.num_buckets
from user_tables ut
join user_tab_col_statistics utcs
on ut.table_name = utcs.table_name
where ut.table_name = 'BRICKS'
and utcs.column_name in ( 'BRICK_ID', 'WEIGHT' );
BRICKS 中没有关于 BRICK_ID 和 WEIGHT 的直方图。您可以通过检查 *_TAB_COL_STATISTICS 中的 HISTOGRAM 和 NUM_BUCKETS 列来验证这一点
收集统计数据将在 WEIGHT 和 BRICK_ID 列上创建直方图
exec dbms_stats.gather_table_stats ( null, 'bricks', no_invalidate => false ) ;
select utcs.column_name, utcs.histogram, utcs.num_buckets
from user_tables ut
join user_tab_col_statistics utcs
on ut.table_name = utcs.table_name
where ut.table_name = 'BRICKS'
and utcs.column_name in ( 'BRICK_ID', 'WEIGHT' );
select /*+ gather_plan_statistics */count (*) c
from bricks
where weight = 1
select /*+ gather_plan_statistics */count (*) c
from bricks
where weight = 200;
select /*+ gather_plan_statistics */count (*) c
from bricks
where brick_id between 400 and 500;
禁用直方图
exec dbms_stats.gather_table_stats ( null, 'bricks', method_opt => 'for all columns size 1' ) ;
select utcs.column_name, utcs.histogram, utcs.num_buckets
from user_tables ut
join user_tab_col_statistics utcs
on ut.table_name = utcs.table_name
where ut.table_name = 'BRICKS'
如果要确保直方图保持禁用状态,最好通过设置表首选项来做到这一点:
begin
dbms_stats.set_table_prefs (
null, 'bricks',
'method_opt', 'for all columns size 1'
);
dbms_stats.gather_table_stats ( null, 'bricks' ) ;
end;
/
select utcs.column_name, utcs.histogram
from user_tables ut
join user_tab_col_statistics utcs
on ut.table_name = utcs.table_name
where ut.table_name = 'BRICKS';
创建扩展统计
select dbms_stats.create_extended_stats ( null, 'bricks', '(colour_rgb_value, shape)') from dual;
参数 (colour_rgb_value, shape) 告诉优化器收集这些列组合的统计信息。
exec dbms_stats.gather_table_stats ( null, 'bricks', method_opt => 'for columns (colour_rgb_value, shape)', no_invalidate => false ) ;
select /*+ gather_plan_statistics */count (*) c
from bricks
where colour_rgb_value = 'FF0000'
and shape = 'cylinder'
select * from table(dbms_xplan.display_cursor(format => 'ROWSTATS LAST'));