sql interview questions-handwritten sql case (two))

table of Contents

 

1. Requirement: UV (number of visitors) of each store and the number of visits to each store top3

2. Give the number of orders, number of users, and total transaction amount for each month in 2017

3. Find the total number and average age of all users and active users


1. Requirement: UV (number of visitors) of each store and the number of visits to each store top3

There are 50W JD stores. When each customer visits any product in any store, an access log will be generated. The table stored in the access log is Visit, the visitor's user id is user_id, and the visited store is called shop. statistics:

1) UV per store (number of visitors)

2) The visitor information of the top 3 visits per store. Output store name, visitor id, number of visits

data:

u1	a
u2	b
u1	b
u1	a
u3	c
u4	b
u1	a
u2	c
u5	b
u4	b
u6	c
u2	c
u1	b
u2	a
u2	a
u3	a
u5	a
u5	a
u5	a

 Create table first

create table visit(
user_id string,
shop string
) 
row format delimited fields terminated by '\t';

1.1 UV per store (number of visitors)

select
    shop,
    count(distinct user_id)
from visit
group by shop;

1.2 Visitor information of top3 visits to each store. Output store name, visitor id, number of visits

1.2.1 Query the number of times each store is visited by each user

select
    shop,
    user_id,
    count(*) ct
from visit
group by shop,user_id;t1

1.2.2 Calculate the ranking of each store by the number of user visits

select
     shop,
     user_id,
     ct, 
     rank() over(partition by shop order by ct) rk
from t1;t2

1.3 Take the top three of each store

select
    shop,
    user_id,
    ct
from t2
where rk<=3

1.4 Final SQL

select 
   shop,
   user_id,
   ct
from
 (select 
   shop,
   user_id,
   ct,
   rank() over(partition by shop order by ct) rk
from 
 (select 
   shop,
   user_id,
   count(*) ct
from visit
group by 
   shop,
   user_id)t1
)t2
where rk<=3;

2. Give the number of orders, number of users, and total transaction amount for each month in 2017

Given a table STG.ORDER, there are the following fields: Date, Order_id, User_id, amount. Please give sql for statistics: data sample: 2017-01-01, 10029028, 1000003251, 33.57.

1) Give the number of orders, number of users, and total transaction amount for each month in 2017.

2) Give the number of new customers in November 2017 (referring to the first order only in November)

Create table first

create table order_tab(
   dt string,
   order_id string,
   user_id string,
   amount decimal(10,2)
) 
row format delimited fields terminated by '\t';

2.1 Give the number of orders, number of users, and total transaction amount for each month in 2017

select
    date_format(dt,'yyyy-MM'),
    count(order_id),
    count(distinct user_id),
    sum(amount)
from order_tab
where
    date_format(dt,'yyyy')='2017'
group by
    data_format(dt,'yyyy-MM');

2.2 Given the number of new customers in November 2017 (referring to the first order in November)

select
    count(user_id)
from
    order_tab
group by
    user_id
having
    date_format(min(dt),'yyyy-MM')='2017-11';

3. Find the total number and average age of all users and active users

There are logs as follows, please write the code to get the total number and average age of all users and active users. (Active users refer to users who have access records for two consecutive days)

data

Date (dt)

User (user_id)

Age

2019-02-11

test_1

23

2019-02-11

test_2

19

2019-02-11

test_3

39

2019-02-11

test_1

23

2019-02-11

test_3

39

2019-02-11

test_1

23

2019-02-12

test_2

19

2019-02-13

test_1

23

2019-02-15

test_2

19

2019-02-16

test_2

19

Create table first

create table user_age(
      dt string,
      user_id string,
      age int
)
row format delimited fields terminated by ',';

3.1 Group by date and user, sort by date and rank

select
    dt,
    user_id,
    min(age) age,
    rank() over(partition by user_id order by dt) rk
from user_age
group by
    dt,user_id;t1

3.2 Calculate the difference between date and ranking

select
    user_id,
    age,
    date_sub(dt,rk) flag
from t1;t2

3.3 Filter the users whose travel value is greater than or equal to 2, that is, users who are active for two consecutive days

select
    user_id,
    min(age) age
from t2
group by
    user_id,
    flag
having
    coount(*)>=2;t3

3.4 De-duplicate data (a user can log in continuously at multiple different points in time)

select
    user_id
    min(age) age
from t3
group by
    user_id;t4

3.5 Calculate the number and average age of active users (with two consecutive visits)

select
    count(*) ct,
    cast(sum(age)/count(*) as decimal (10,2))
from t4;

3.6 Deduplicate the global data set according to the user

select
    user_id,
    min(age) age
from user_age
group by user_id;t5

3.7 Calculate the number and average age of all users

select
    count(*) user_count,
    cast((sum(age)/count(*)) as decimal(10,1))
from t5;

3.8 Perform the union all operation on the two data sets of step 4 and step 6

select
    0 user_total_count,
    0 user_total_avg_age,
    count(*) twice_count,
    cast(sum (age)/count(*) as decimal(10,2))twice_count_avg_age
from(
  select
    user_id,
    min(age) age
from(
  select
    user_id,
    age,
    date_sub(dt,rk) flag
from(
  select
    dt,
    user_id,
    min(age) age,
    rank() over(partition by user_id order by dt) rk
  from
    user_age
  group by
    dt,user_id
)t1
)t2
group by
    user_id,flag
having
    count(*)>=2
)t3
group by user_id
)t4

union all

select
    count(*) user_total_count,
    cast((sum(age)/count(*)) as decimal(10,1)),
    0 twice_count,
    0 twice_count_avg_age
from(
  select
     user_id,
     min(age) age 
  from 
      user_age 
  group by 
      user_id
 )t5;t6

 

Guess you like

Origin blog.csdn.net/Poolweet_/article/details/109614982