vertica系统管理语句 + vertica实时消费kafka

--看锁表及锁的类型
select object_name,lock_mode,transaction_id,request_timestamp,transaction_description from locks;
select transaction_id from locks where object_name like '%servefc%';
--查历史查询的记录
select schema_name,table_name,user_name,query_type,is_executing,query_start from query_profiles;
select *from query_profiles;
select schema_name,table_name,user_name,query_type,is_executing,query_start from query_profiles where is_executing='t'
--查询执行次数做多的10个SQL语句
SELECT request,COUNT(*) FROM query_requests GROUP BY request ORDER BY COUNT(*) DESC LIMIT 10; 
--查询执行时间最长的10个SQL 
SELECT request,request_duration_ms FROM query_requests ORDER BY request_duration_ms DESC LIMIT 10; 
--查询memory消耗最多的10个SQL
SELECT request,memory_acquired_mb FROM query_requests WHERE memory_acquired_mb IS NOT NULL ORDER BY memory_acquired_mb DESC LIMIT 10; 
--SESSION管理
select * from locks;                                              --获取transaction_id字段
select * from sessions where transaction_id in();                  --将上面获取的transaction_id带入,查看transaction_start,判断是否是以前锁的
select CLOSE_SESSION ('sessionid' )                               --带入上面查出来的session_id 
--查看造成死锁的那个会话session有哪些历史操作
select c.query,c.query_start 
from locks a 
left join sessions b 
on a.transaction_id=b.transaction_id 
left join query_profiles c 
on b.session_id=c.session_id 
where a.object_name like '%tb_dw_ct_cti_agent_call_list_min%';
--资源池
select * from RESOURCE_POOLS;
--设置
SELECT GET_COMPLIANCE_STATUS();
-- 创建ODM用户资源池,主要做外部表查询
create resource pool pool_noas_odmMAXMEMORYSIZE '50%' EXECUTIONPARALLELISM AUTO PRIORITY 0 QUEUETIMEOUT NONEPLANNEDCONCURRENCY 12;
-- 创建DW用户资源池,主要做大表加载、关联、汇总
create resource pool pool_noas_dwMAXMEMORYSIZE '90%' EXECUTIONPARALLELISM AUTO PRIORITY 0 QUEUETIMEOUT NONEPLANNEDCONCURRENCY AUTO;
-- 创建APP用户资源池,主要做查询、表关联、指标运算
create resource pool pool_noas_appMAXMEMORYSIZE '80%' EXECUTIONPARALLELISM AUTO PRIORITY 0 QUEUETIMEOUT NONEPLANNEDCONCURRENCY 24;
CREATE USER "user_dw"     WITH PASSWORD 'cmcc';
-- 资源池范例:低并发大查询
CREATE RESOURCE POOL l_poolQUEUETIMEOUT NONE PLANNEDCONCURRENCY 6 MAXCONCURRENCY 4;
-- 资源池范例:高发小查询
CREATE RESOURCE POOL s_poolMEMORYSIZE '1G' EXECUTIONPARALLELISM 4 PRIORITY 10 QUEUETIMEOUT NONEPLANNEDCONCURRENCY 36 MAXCONCURRENCY 50;
GRANT ALL  ON RESOURCE POOL l_pool TO user_dw;
ALTER USER user_dw RESOURCE POOLl_pool;
GRANT ALL  ON SCHEMA DW    TO user_dw;
-- 创建用户
create user "dev_noas_odm"identified by 'noas_odm' resource pool pool_noas_odm;
create user "dev_noas_dw"identified by 'noas_dw' resource pool pool_noas_dw;
create user "dev_noas_app"identified by 'noas_app' resource pool pool_noas_app;
-- 创建schema
create schema if not existsnoas.noas_odm authorization dev_noas_odm;
create schema if not existsnoas.noas_dw authorization dev_noas_dw;
create schema if not existsnoas.noas_app authorization dev_noas_app;

select start_timestamp,
    request_id,
    statement_id,
    request_type,
    substr(request,1,85),
    request_duration_ms 
from query_requests 
    where request like '%prov_code as 220%' 
    and request_type ='LOAD' 
order by start_timestamp desc limit 10;

SELECT
    stream_name,
    schema_name,
    table_name,
    is_executing,
    accepted_row_count,
    rejected_row_count,
    DATEDIFF(ss,load_start::TIMESTAMP,GETDATE()::TIMESTAMP) AS DurationSec,
    ROUND((accepted_row_count+rejected_row_count)/DATEDIFF(ss,load_start::TIMESTAMP,GETDATE()::TIMESTAMP),3.0) AS RowsPerSec
FROM
    load_streams
WHERE
    is_executing='true'

	
--看表怎么建的
select export_objects('','tb_dw_ct_tape_new_onest_day');

select node_name,storage_path,disk_space_free_percent from disk_storage  where storage_path not ilike '%catalog%' order by disk_space_free_percent;

wangguofei=> select table_schema,count(1) as cnt from tables where table_schema like '%csap%' or table_schema like '%huangzhan%' group by table_schema order by cnt desc;

wangguofei=> select count(1) from tables where table_schema like '%csap%';
select substr(table_name,),count(1) as cnt from tables where table_schema like '%csap%' or table_schema like '%huangzhan%' group by table_schema order by cnt desc;

select  substr(table_name,regexp_instr(table_name,'_',1,3),regexp_instr(table_name,'_',1,4)-regexp_instr(table_name,'_',1,3)) ,
count(1) cnt
from tables 
where table_schema like '%csap%' 
or table_schema like '%huangzhan%' 
and table_name like 'tb_'
group by substr(table_name,regexp_instr(table_name,'_',1,3),regexp_instr(table_name,'_',1,4)-regexp_instr(table_name,'_',1,3)) order by cnt desc;

select split_part()

select count(1) from tables where table_name like '%rena%';

select sum(TABLE_SIZE_GB) from public.tb_wh_tableinfo_20180510 where  table_name like '%inre%' or table_name like '%sqm%' or table_name like '%vona%'  or table_name like '%qymn%' ;

#!/bin/bash
CurrentDir=`pwd`
VSQL='/opt/vertica/bin/vsql -Udbadmin -wvertica11'
DB_Name=CSAP_20_132
Logfile=${CurrentDir}/${DB_Name}_DB_`date "+%Y%m%d_%H%M%S"`.log
$VSQL <<EOF |tee ${Logfile}
-----------------------------echo
-----------------------------echo >>> License状态
select get_compliance_status() ;
\\\-----------------------------echo >>> 磁盘空间检查-检查data目录和catalog目录使用情况
select node_name, storage_path, storage_usage, rank, disk_space_free_percent from disk_storage order by node_name;
-----------------------------echo >>> 客户端连接版本 
select case when instr(client_label,'-')>0 then substr(client_label,1,instr(client_label,'-',1,2)) else client_label end client_label, count(*) from dc_session_starts 
group by 1 order by 2 desc;
-----------------------------echo >>> 节点状态 
select node_name,last_msg_from_node_at ts, node_type, node_state, node_address,catalog_path from nodes order by node_name;
-----------------------------echo >>> CATALOG与DATA目录 
select node_name, storage_path, storage_usage, rank, disk_space_free_percent from disk_storage order by node_name;
-----------------------------echo >>> 资源池设置情况 
select name,memorysize,maxmemorysize,plannedconcurrency ,maxconcurrency ,priority ,runtimepriority ,queuetimeout,runtimecap ,cascadeto  from resource_pools;
-----------------------------echo >>> 集群catalog size 
select node_name,max(ts) as ts, max(catalog_size_in_MB) as catlog_size_in_MB from (select node_name,trunc((dc_allocation_pool_statistics_by_second."time")::TIMESTAMP,'
SS'::VARCHAR(2)) AS ts, sum((dc_allocation_pool_statistics_by_second.total_memory_max_value - dc_allocation_pool_statistics_by_second.free_memory_min_value))/1024//102
4 AS catalog_size_in_MB from dc_allocation_pool_statistics_by_second group by 1,trunc((dc_allocation_pool_statistics_by_second."time")::TIMESTAMP,'SS'::VARCHAR(2))) fo
o group by 1 order by 1;
-----------------------------echo >>> 数据库原始数据大小 
select audit_start_timestamp ts,database_size_bytes/1024/1024//1024 as dbsize_gb,trunc(usage_percent,2) "use_per(%)" from license_audits where audited_data= 'Total' order by audit_start_timestamp desc limit 1;
-----------------------------echo >>> 数据库压缩后数据大小 
select sysdate ds,trunc(SUM(ps.wos_used_bytes+ps.ros_used_bytes)/1024/1024/1024::float) AS total_size_gb from projection_storage ps WHERE (ps.wos_used_bytes + ps.ros_u
sed_bytes) > 0 group by 1;
-----------------------------echo >>> 表分区数 
select sysdate ds,table_schema,projection_name,count(distinct partition_key) partition_cnt, avg(ROS_ROW_COUNT) avg_rows from partitions group by 1,2,3 having count(dis
tinct partition_key)>900 order by 4 desc,2 limit 10;
-----------------------------echo >>> 纬度表或小表不合理分区 
select distinct a.table_schema||'.'||t.table_name as table_name,a.is_segmented, substr(t.partition_expression,instr(t.partition_expression,'.')+1) partition_exp, a.par
tition_cnt,a.rows_cnt,a.avg_partition_rows_cnt from (select pt.table_schema,pt.projection_name,pj.anchor_table_id,pj.is_segmented, count(distinct pt.partition_key) par
tition_cnt, case when pj.is_segmented then sum(pt.ros_row_count) when not pj.is_segmented then sum(pt.ros_row_count)//count(distinct pt.node_name) end rows_cnt, (case 
when pj.is_segmented then sum(pt.ros_row_count) when not pj.is_segmented then sum(pt.ros_row_count)//count(distinct pt.node_name) end)//count(distinct pt.partition_key
) avg_partition_rows_cnt from partitions pt join projections pj using(projection_id) group by 1,2,3,4) a join tables t on t.table_id = a.anchor_table_id where a.rows_c
nt < 10000000 order by a.avg_partition_rows_cnt,a.rows_cnt desc, partition_cnt desc limit 10;
-----------------------------echo >>> 事实表不合理分区 
select distinct a.table_schema||'.'||a.table_name as table_name,a.is_segmented, a.rows_cnt from (select t.table_schema,t.table_name,pj.projection_name,pj.is_segmented,
 case when pj.is_segmented then sum(pt.ros_row_count) when not pj.is_segmented then sum(pt.ros_row_count)//count(distinct pt.node_name) end rows_cnt from projections p
j join tables t on pj.anchor_table_id = t.table_id join projection_storage pt using (projection_id) group by 1,2,3,4) a where a.rows_cnt >= 1000000 and not exists(sele
ct 'x' from partitions p where p.projection_name = a.projection_name) order by a.rows_cnt desc limit 10;
-----------------------------echo >>> 维度表或小表不做数据分片 
select p.projection_Schema || '.' || p.anchor_Table_name, sum(ps.ros_row_count) from projections p, projection_storage ps where p.projection_name = ps.projection_name 
and ps.ros_row_count < 1000000 and p.is_segmented group by 1 order by 2 asc limit 10;
-----------------------------echo >>> 事实表进行数据分片 
select proj, row_count/(proj_count) as table_row_count from ( select p.projection_schema || '.' || p.anchor_table_name as proj, sum(ps.ros_row_count) as row_Count, cou
nt(distinct ps.projection_name) as proj_count from projections p, projection_storage ps where p.projection_name = ps.projection_name and p.projection_schema = ps.proje
ction_schema and not p.is_segmented group by 1 ) pps where row_count/(proj_count) > 1000000 order by table_row_count desc limit 10;
-----------------------------echo >>> 数据分布倾斜 
select projection,min_used_bytes//1024^3 min_used_GB, max_used_bytes//1024^3 max_used_GB,round(skew_pct::float,2) skew_pct from (select distinct trim(ps.projection) pr
ojection, first_value(used_bytes) over (w order by used_bytes asc) as min_used_bytes, first_value(used_bytes) over (w order by used_bytes desc) as max_used_bytes, firs
t_value(used_bytes) over (w order by used_bytes asc) /first_value(used_bytes) over (w order by used_bytes desc) as skew_pct from (select node_name, projection_id, proj
ection_schema || '.' || projection_name as projection, sum(used_bytes) as used_bytes from projection_storage group by 1,2,3 ) as ps join projections p using (projectio
n_id) where p.is_segmented and ps.used_bytes > 0 window w as (partition by ps.projection)) t where skew_pct< 0.8 order by 4 limit 10;
-----------------------------echo >>> 表模型Projection个数 
select t.table_schema,t.table_name, count(distinct p.projection_name) projection_cnt from tables t join projections p on t.table_id = p.anchor_table_id group by 1,2 having count(distinct p.projection_id)>10 order by 3 desc limit 10;
-----------------------------echo >>> 每节点投影的ROS容器个数
select projection_name, node_name, sum(ros_count) as ros_cnt from projection_storage group by projection_name, node_name having sum(ros_count)>900 order by ros_cnt desc; 
-----------------------------echo >>> 未使用的投影 
select anchor_table_name from projections where projection_name not in (select projection_name from projection_usage);
-----------------------------echo >>> SQL执行类别统计 
select query_type,case when query_duration_us < 1000000 then 'A. sub-second'
when query_duration_us between 1000000 and 3000000 then 'B. 1-3 seconds'
when query_duration_us between 3000000 and 7000000 then 'C. 3-7 seconds'
when query_duration_us between 7000000 and 15000000 then 'D. 7-15 seconds'
when query_duration_us between 15000000 and 30000000 then 'E. 15-30 seconds'
when query_duration_us between 30000000 and 60000000 then 'F. 30-60 seconds'
when query_duration_us between 60000000 and 180000000 then 'G. 1-3 minutes'
when query_duration_us between 180000000 and 600000000 then 'H. 3-10 minutes'
when query_duration_us between 600000000 and 1800000000 then 'I. 10-30 minutes'
when query_duration_us > 1800000000 then 'J. more than 30 minutes' end, count(*)
from query_profiles group by 1,2 order by 1,2 asc ;

-----------------------------echo >>> 大表统计 
select projection_schema, anchor_table_name, to_char(sum(used_bytes)/1024/1024/1024,'999,999.99') as disk_space_used_gb from projection_storage group by projection_schema, anchor_table_name order by disk_space_used_gb desc limit 10;
-----------------------------echo >>> Top SQL 
select query_duration_us, table_name, user_name, processed_row_count as rows_processed, substr(query, 0,70) from query_profiles order by query_duration_us desc limit 10;



---------------近12小时内平均执行时常------------------
select 
(now() - 1/24) from_date, 
now(),  
query_type,  
count(1) total_exec_sql,   
min(query_duration_us//1000) min_ms,   
max(query_duration_us//1000) max_ms,   
avg(query_duration_us//1000) avg_ms
from query_profiles 
where query_start::timestamp > now() - 1/24 
group by 1,2,3 order by 7 desc ;

------------------------------json 入库vertica-------------------------------
dbadmin@LY1F-R021706-VM09:[/home/dbadmin]cat json.dat 
{ "name": "Everest", "type":"mountain", "height": 29029, "hike_safety": 34.1  }
{ "name": "Mt St Helens", "type": "volcano", "hike_safety": 15.4 }

CREATE TABLE mountains(name varchar(64), type varchar(32), height integer);
COPY mountains FROM local '/home/dbadmin/json.dat' WITH PARSER fjsonParser();

-- Compute New Values for the Target Table

https://my.vertica.com/docs/8.1.x/HTML/index.htm#Authoring/AdministratorsGuide/BulkLoadCOPY/IgnoringColumnsAndFieldsInTheLoadFile.htm


--------------------------入库的同时,对数据进行转换-------------------------------------------

CREATE TABLE names(first_name VARCHAR(20), last_name VARCHAR(20), full_name VARCHAR(60));
CREATE TABLE
=> COPY names(first_name,middle_name FILLER VARCHAR(20),last_name,full_name AS first_name||' '||middle_name||' '||last_name) FROM STDIN;
Enter data to be copied followed by a newline.
End with a backslash and a period on a line by itself.

--------------------例子----------------------------
copy tb_dw_ct_knba_klg_webpage_day_yzg 
(statis_date as to_char(to_timestamp(op_time/1000)::date,'yyyymmdd'),data_time,data_ip,data_type,sis_id ,prov_code,serial_num ,staff_id,call_num,op_time,title,url,refer,
page_loadtime,dom_loadtime ,white_time ,konwledge_channel,knowledge_id ,event_value,call_bgntime ,call_endtime)
from local '/data/interface/servefc/zhishikubak/test/page_zhishiku_2018-06-13_13079.txt'



select distinct(substr(request,1,100)),request_duration_ms,start_timestamp from query_requests where request like '%select%'order by start_timestamp desc limit 50

vertica实时消费kafka入库操作的sh脚本配置:

# Vertica 8.1.0
kafka_config=" --config-schema kafka_date_dimension4 --dbhost 192.168.1.1 --username dbadmin --password xxxx"

# shutdown instance
/opt/vertica/packages/kafka/bin/vkconfig shutdown --instance-name kafka_date_dimension4 ${kafka_config}
echo "Shutdown Instance Complete!"
# truncate table
$VSQL <<- EOF
drop schema kafka_date_dimension4 cascade;
truncate table csapsmpl.tb_svr_u_cntmng;
EOF

# Create and Configure Scheduler
/opt/vertica/packages/kafka/bin/vkconfig scheduler --create --add ${kafka_config} --frame-duration '00:00:10' --eof-timeout-ms 3000 --operator dbadmin
echo "Create and Configure Scheduler Complete!"

# Create a Cluster
/opt/vertica/packages/kafka/bin/vkconfig cluster --create --cluster kafka_cluster --hosts 192.168.125.199:6667,192.168.125.136:6667,192.168.125.110:6667 ${kafka_config}
echo "Create Cluster Complete!"

# Create a Data Table


# Create a Source
/opt/vertica/packages/kafka/bin/vkconfig source --create --source KAFKA_CSAP_SERVERCORE_USMPL_CNTMNGT_TOPIC  --cluster kafka_cluster --partitions 1 ${kafka_config}
echo "Create Kafka Source Complete!"

# Create a Target
/opt/vertica/packages/kafka/bin/vkconfig target --create --target-schema csapsmpl --target-table tb_svr_u_cntmng ${kafka_config}
echo "Create Target Complete!"

# Create a Load-Spec
/opt/vertica/packages/kafka/bin/vkconfig load-spec --create --load-spec load_date_dimension_spec2 --parser KafkaJSONParser --parser-parameters flatten_arrays=False,flatten_maps=False ${kafka_config}
#/opt/vertica/packages/kafka/bin/vkconfig load-spec --create --load-spec load_date_dimension_spec --parser KafkaJSONParser --filters "FILTER KafkaInsertDelimiters(delimiter=E'\n')" ${kafka_config}

echo "Create Load-Spec Complete!"

# Create a Microbatch
/opt/vertica/packages/kafka/bin/vkconfig microbatch --create --microbatch tb_svr_u_cntmng --target-schema csapsmpl --target-table tb_svr_u_cntmng --rejection-schema csapsmpl --rejection-table tb_svr_u_cntmng_rej --load-spec load_date_dimension_spec2 --add-source KAFKA_CSAP_SERVERCORE_USMPL_CNTMNGT_TOPIC --add-source-cluster kafka_cluster ${kafka_config}
echo "Create Microbatch Complete!"

# Launch the Scheduler
/opt/vertica/packages/kafka/bin/vkconfig launch --instance-name load_date_dimension_spec2 ${kafka_config} &
echo "Launch the Scheduler Complete!"
echo "Done!"

发布了62 篇原创文章 · 获赞 158 · 访问量 32万+

猜你喜欢

转载自blog.csdn.net/yezonggang/article/details/83339434