hive 和 presto 常用的区别函数

hive和presto的pt分区函数

-- hive的时间函数
pt = from_unixtime(unix_timestamp()-86400,'yyyyMMdd')

-- presto的时间函数
pt = format_datetime(DATE_ADD('day', - 1, CURRENT_TIMESTAMP),'yyyyMMdd')

hive将一列数据拆成多行数据(lateral view)

your_table

col1 col2
a 1,5,6
b 2,6
-- Hive
-- 第一种方法
select col1,collect_set(col2) from your_table
group by col1;

-- 第二种方法
-- 先将其分裂
select explode(split(t.col2,',')) col2 from your_table t
-- 再与原表进行连接
select b.col1,a.col2
from 
(select explode(split(t.col2,',')) col2 from your_table t) a,your_table b;

-- 第三种方法
select a.col1,b.bieming
from your_table a
lateral view explode(split(a.col2,',')) b as bieming

-- 如果想要对分裂数据进行聚合函数
select bieming,count(1)
from your_table lateral view explode(col2) t as bieming
group by 1;

-- 多个lateral view 
SELECT myCol1, myCol2 FROM baseTable
LATERAL VIEW explode(col1) myTable1 AS myCol1
LATERAL VIEW explode(col2) myTable2 AS myCol2;

presto将一列数据拆成多行数据(lateral view)

-- presto 的列变函数
---- 字符串
select col1,split_col2
from your_table
cross join unnest(split(col2,',')) as t (split_col2)
遇到特殊情形
replace(col2,'()','') 替换
---- 数组 [summer,winter]
select col1,split_col2
from your_table
cross join unnest(col2) as t (split_col2)

行表转列表

![在这里插入图片描述](https://img-blog.csdnimg.cn/760e09f1807a4c9d833c258c064dff13.png =300x

SELECT *
FROM student
PIVOT ( SUM(score) FOR subject IN (语文, 数学, 英语)
)
SELECT name,
 MAX(CASE WHEN subject='语文' THEN score ELSE 0 END) AS "语文",
 MAX(CASE WHEN subject='数学' THEN score END) AS "数学",
 MAX(CASE WHEN subject='英语' THEN score END) AS "英语"
FROM student
GROUP BY name

列表转行表

SELECT *
FROM student1
UNPIVOT (
  score FOR subject IN ("语文","数学","英语")
)
SELECT NAME, '语文' AS subject ,MAX("语文") AS score
FROM student1 GROUP BY NAME
UNION
SELECT NAME, '数学' AS subject ,MAX("数学") AS score
FROM student1 GROUP BY NAME
UNION
SELECT NAME, '英语' AS subject , MAX("英语") AS score
FROM student1 GROUP BY NAME

猜你喜欢

转载自blog.csdn.net/F13122298/article/details/128064368