table of Contents
1. Requirement: UV (number of visitors) of each store and the number of visits to each store top3
2. Give the number of orders, number of users, and total transaction amount for each month in 2017
3. Find the total number and average age of all users and active users
1. Requirement: UV (number of visitors) of each store and the number of visits to each store top3
There are 50W JD stores. When each customer visits any product in any store, an access log will be generated. The table stored in the access log is Visit, the visitor's user id is user_id, and the visited store is called shop. statistics:
1) UV per store (number of visitors)
2) The visitor information of the top 3 visits per store. Output store name, visitor id, number of visits
data:
u1 a
u2 b
u1 b
u1 a
u3 c
u4 b
u1 a
u2 c
u5 b
u4 b
u6 c
u2 c
u1 b
u2 a
u2 a
u3 a
u5 a
u5 a
u5 a
Create table first
create table visit(
user_id string,
shop string
)
row format delimited fields terminated by '\t';
1.1 UV per store (number of visitors)
select
shop,
count(distinct user_id)
from visit
group by shop;
1.2 Visitor information of top3 visits to each store. Output store name, visitor id, number of visits
1.2.1 Query the number of times each store is visited by each user
select
shop,
user_id,
count(*) ct
from visit
group by shop,user_id;t1
1.2.2 Calculate the ranking of each store by the number of user visits
select
shop,
user_id,
ct,
rank() over(partition by shop order by ct) rk
from t1;t2
1.3 Take the top three of each store
select
shop,
user_id,
ct
from t2
where rk<=3
1.4 Final SQL
select
shop,
user_id,
ct
from
(select
shop,
user_id,
ct,
rank() over(partition by shop order by ct) rk
from
(select
shop,
user_id,
count(*) ct
from visit
group by
shop,
user_id)t1
)t2
where rk<=3;
2. Give the number of orders, number of users, and total transaction amount for each month in 2017
Given a table STG.ORDER, there are the following fields: Date, Order_id, User_id, amount. Please give sql for statistics: data sample: 2017-01-01, 10029028, 1000003251, 33.57.
1) Give the number of orders, number of users, and total transaction amount for each month in 2017.
2) Give the number of new customers in November 2017 (referring to the first order only in November)
Create table first
create table order_tab(
dt string,
order_id string,
user_id string,
amount decimal(10,2)
)
row format delimited fields terminated by '\t';
2.1 Give the number of orders, number of users, and total transaction amount for each month in 2017
select
date_format(dt,'yyyy-MM'),
count(order_id),
count(distinct user_id),
sum(amount)
from order_tab
where
date_format(dt,'yyyy')='2017'
group by
data_format(dt,'yyyy-MM');
2.2 Given the number of new customers in November 2017 (referring to the first order in November)
select
count(user_id)
from
order_tab
group by
user_id
having
date_format(min(dt),'yyyy-MM')='2017-11';
3. Find the total number and average age of all users and active users
There are logs as follows, please write the code to get the total number and average age of all users and active users. (Active users refer to users who have access records for two consecutive days)
data
Date (dt) |
User (user_id) |
Age |
2019-02-11 |
test_1 |
23 |
2019-02-11 |
test_2 |
19 |
2019-02-11 |
test_3 |
39 |
2019-02-11 |
test_1 |
23 |
2019-02-11 |
test_3 |
39 |
2019-02-11 |
test_1 |
23 |
2019-02-12 |
test_2 |
19 |
2019-02-13 |
test_1 |
23 |
2019-02-15 |
test_2 |
19 |
2019-02-16 |
test_2 |
19 |
Create table first
create table user_age(
dt string,
user_id string,
age int
)
row format delimited fields terminated by ',';
3.1 Group by date and user, sort by date and rank
select
dt,
user_id,
min(age) age,
rank() over(partition by user_id order by dt) rk
from user_age
group by
dt,user_id;t1
3.2 Calculate the difference between date and ranking
select
user_id,
age,
date_sub(dt,rk) flag
from t1;t2
3.3 Filter the users whose travel value is greater than or equal to 2, that is, users who are active for two consecutive days
select
user_id,
min(age) age
from t2
group by
user_id,
flag
having
coount(*)>=2;t3
3.4 De-duplicate data (a user can log in continuously at multiple different points in time)
select
user_id
min(age) age
from t3
group by
user_id;t4
3.5 Calculate the number and average age of active users (with two consecutive visits)
select
count(*) ct,
cast(sum(age)/count(*) as decimal (10,2))
from t4;
3.6 Deduplicate the global data set according to the user
select
user_id,
min(age) age
from user_age
group by user_id;t5
3.7 Calculate the number and average age of all users
select
count(*) user_count,
cast((sum(age)/count(*)) as decimal(10,1))
from t5;
3.8 Perform the union all operation on the two data sets of step 4 and step 6
select
0 user_total_count,
0 user_total_avg_age,
count(*) twice_count,
cast(sum (age)/count(*) as decimal(10,2))twice_count_avg_age
from(
select
user_id,
min(age) age
from(
select
user_id,
age,
date_sub(dt,rk) flag
from(
select
dt,
user_id,
min(age) age,
rank() over(partition by user_id order by dt) rk
from
user_age
group by
dt,user_id
)t1
)t2
group by
user_id,flag
having
count(*)>=2
)t3
group by user_id
)t4
union all
select
count(*) user_total_count,
cast((sum(age)/count(*)) as decimal(10,1)),
0 twice_count,
0 twice_count_avg_age
from(
select
user_id,
min(age) age
from
user_age
group by
user_id
)t5;t6