在hive中创建create database db_baidu;use db_baidu;-- 创建表create table tb_shop(id int ,staytime int ,day string)hive中插入数据贼慢INSERT INTO tb_shop VALUES (1, 300, '20210101'); INSERT INTO tb_shop VALUES (2, 250, '20210101'); INSERT INTO tb_shop VALUES (3, 400, '20210101'); INSERT INTO tb_shop VALUES (4, 100, '20210101'); INSERT INTO tb_shop VALUES (1, 200, '20210102'); INSERT INTO tb_shop VALUES (3, 120, '20210102'); INSERT INTO tb_shop VALUES (4, 230, '20210102'); INSERT INTO tb_shop VALUES (1, 90, '20210103'); INSERT INTO tb_shop VALUES (2, 480, '20210103'); INSERT INTO tb_shop VALUES (3, 240, '20210103'); INSERT INTO tb_shop VALUES (1, 120, '20210104'); INSERT INTO tb_shop VALUES (2, 450, '20210104'); INSERT INTO tb_shop VALUES (4, 180, '20210104');
法一: ①先拟出四张表 当day分别为20210101 -- 20210104with t1 as (select id, staytime r1 from tb_shop where day = '20210101')②然后对这四张表进行左连接 求出每个id 每天的staytime ③通过concat_ws进行连接select id,concat_ws('_',rr1,rr2,rr3,rr4)stay_times from t5 order by id ;法二: ①先对id去重 再对day去重 然后对两者进行笛卡尔积连接的这张表 再和tb_shop进行左连接 得出staytime这列 同时用nvl进行判空 select v1.id, v1.day,nvl(staytime, 0) r1 from v1 left join tb_shop on v1.id = tb_shop.id and v1.day = tb_shop.daycollect_list转化前
②使用collect_list函数进行行转列操作 select t2.id, -- concat_ws('_', r1)rr2 collect_list(r1) from (select v1.id, nvl(staytime, 0) r1 from v1 left join tb_shop on v1.id = tb_shop.id and v1.day = tb_shop.day order by v1.id asc, v1.day asc)t2 group by t2.id;列转行效果
最终结果
# 法一
with t1 as (select id, staytime r1
from tb_shop
where day = '20210101')
, t2 as (select id, staytime r2
from tb_shop
where day = '20210102')
, t3 as (select id, staytime r3 from tb_shop where day = '20210103')
, t4 as (select id, staytime r4
from tb_shop
where day = '20210104')
, t5 as (select t1.id,
nvl(r1, '0') as rr1,
nvl(r2, '0') as rr2,
nvl(r3, '0') as rr3,
nvl(r4, '0') as rr4
from t1
left join t2 on t1.id = t2.id
left join t3 on t1.id = t3.id
left join t4 on t1.id = t4.id)
select id, concat_ws('_', rr1, rr2, rr3, rr4) staytimes
from t5
order by id ;
-- ---------------------------------------------------------------------
# 法二
create view v1 as
select id, day
from (select distinct id
from tb_shop) t1
join (select distinct day from tb_shop) t2;
select v1.id,v1.day,nvl(staytime,0)r1 from v1 left join tb_shop on v1.id = tb_shop.id and v1.day = tb_shop.day;
select t2.id,
concat_ws('_', collect_list(cast(r1 as string)))rr2
-- collect_list(r1)
from (select v1.id, v1.day,nvl(staytime, 0) r1
from v1
left join tb_shop on v1.id = tb_shop.id and v1.day = tb_shop.day
order by v1.id , v1.day )t2
group by t2.id;
①在hql中 没有 ifnull函数 可以用nvl 函数代替 但是类型得是字符串 (mysql中无要求)
②collect_list()的妙用 只有hivesql有 是一个列转行的区别
collect_set()也可以实现 后者会自动去重