SQL Whiteboard

Amazon

BIE People Analytics

Sample Data

1
2
3
4
5
6
7
8
9
10
11
12
DROP TABLE IF EXISTS CURRENT_AVAILABLE_SUPPLY;
DROP TABLE IF EXISTS FORECAST_WEEKLY;
Create table If Not Exists CURRENT_AVAILABLE_SUPPLY (AZ char(6), CURRENT_AVAILABLE_SUPPLY int);
Create table If Not Exists FORECAST_WEEKLY (AZ char(6), FORECAST_WEEK date, NEW_DEMAND int, NEW_SUPPLY int);
Truncate table CURRENT_AVAILABLE_SUPPLY;
insert into CURRENT_AVAILABLE_SUPPLY (AZ, CURRENT_AVAILABLE_SUPPLY) values ('IAD001', 40);
insert into CURRENT_AVAILABLE_SUPPLY (AZ, CURRENT_AVAILABLE_SUPPLY) values ('IAD002', 30);
Truncate table FORECAST_WEEKLY;
insert into FORECAST_WEEKLY (AZ, FORECAST_WEEK, NEW_DEMAND, NEW_SUPPLY) values ('IAD001', '2021-04-15', 10, 0);
insert into FORECAST_WEEKLY (AZ, FORECAST_WEEK, NEW_DEMAND, NEW_SUPPLY) values ('IAD001', '2021-04-22', 15, 4);
insert into FORECAST_WEEKLY (AZ, FORECAST_WEEK, NEW_DEMAND, NEW_SUPPLY) values ('IAD001', '2021-04-29', 10, 0);
insert into FORECAST_WEEKLY (AZ, FORECAST_WEEK, NEW_DEMAND, NEW_SUPPLY) values ('IAD001', '2021-05-05', 15, 200);
  1. What is the available supply for each AZ, for each week in the next year?
  • Step 1. UNION ALL two tables. Treat CURRENT_AVAILABLE_SUPPLY as initial NEW_SUPPLY
  • Step 2. Cumulate NEW_SUPPLY then minus cumulative NEW_DEMAND
  • Remove the records from CURRENT_AVAILABLE_SUPPLY
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
/*
Table: CURRENT_AVAILABLE_SUPPLY
COLUMN NAMES/Sample Data below:
AZ CURRENT_AVAILABLE_SUPPLY
IAD001 40
IAD002 30

Table: FORECAST_WEEKLY
COLUMN NAMES/Sample Data below:
AZ FORECAST_WEEK NEW_DEMAND NEW_SUPPLY
IAD001 2021-04-15 10 0
IAD001 2021-04-22 15 4
IAD001 2021-04-29 10 0
IAD001 2021-05-05 15 200
*/

# Step 1. UNION ALL two tables. Treat CURRENT_AVAILABLE_SUPPLY as initial NEW_SUPPLY
WITH CTE1 AS
(
SELECT
AZ,
NULL AS FORECAST_WEEK,
NULL AS NEW_DEMAND,
CURRENT_AVAILABLE_SUPPLY AS NEW_SUPPLY
FROM CURRENT_AVAILABLE_SUPPLY
UNION
SELECT
AZ,
WEEK(FORECAST_WEEK),
SUM(NEW_DEMAND) AS NEW_DEMAND, # sum if there are multiple records in same week
SUM(NEW_SUPPLY) AS NEW_SUPPLY # sum if there are multiple records in same week
FROM FORECAST_WEEKLY
GROUP BY 1, 2
),
# Step 2. Cumulate NEW_SUPPLY then minus cumulative NEW_DEMAND
CTE2 AS
(
SELECT
AZ, FORECAST_WEEK,
SUM(NEW_SUPPLY) OVER(PARTITION BY AZ ORDER BY FORECAST_WEEK) -
SUM(NEW_DEMAND) OVER(PARTITION BY AZ ORDER BY FORECAST_WEEK) AS CURRENT_AVAILABLE_SUPPLY
FROM CTE1
)
SELECT * FROM CTE2
WHERE FORECAST_WEEK IS NOT NULL # Remove the records from CURRENT_AVAILABLE_SUPPLY
;

BIE Amazon Prime Video

1
2
buyer_tb: buyer_id, signup_dt
order_tb: order_dt, country, buyer_id, order_id
  • Some customers placed an order after 30 days of registration

  • Some customers placed an order within 30 days of registration

  • Some customers never placed any orders

  • Place orders

    • T -> within 30 days
      • T
      • F
    • F
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
DROP TABLE IF EXISTS buyer_tb;
CREATE TABLE buyer_tb (
buyer_id BIGINT NOT NULL,
signup_dt TIMESTAMP DEFAULT NULL,
PRIMARY KEY (buyer_id)
);
TRUNCATE TABLE buyer_tb;

INSERT INTO buyer_tb (buyer_id, signup_dt) VALUES
(1,'2020-01-30 00:13:52'),
(2,'2020-02-28 00:13:52'),
(3,'2020-03-30 00:13:52'),
(4,'2020-04-30 00:13:52'),
(5,'2020-07-30 00:13:52'),
(6,'2020-08-31 13:12:32'),
(7,'2020-08-31 13:12:32'),
(8,'2020-08-31 13:12:32'),
(9,'2020-08-31 13:12:32'),
(10,'2020-09-01 14:20:35'),
(11,'2020-09-01 14:20:35'),
(12,'2020-09-01 14:20:35'),
(13,'2020-09-01 14:20:35'),
(14,'2020-09-01 14:20:35'),
(15,'2020-09-01 14:20:35');
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
DROP TABLE IF EXISTS order_tb;
CREATE TABLE order_tb (
order_id BIGINT NOT NULL,
buyer_id BIGINT NOT NULL,
order_dt TIMESTAMP DEFAULT NULL,
country VARCHAR(255) DEFAULT NULL,
PRIMARY KEY (order_id),
FOREIGN KEY (buyer_id) REFERENCES buyer_tb(buyer_id)
);
TRUNCATE TABLE order_tb;

INSERT INTO order_tb (order_id, buyer_id, order_dt, country) VALUES
(1,1,'2020-09-21 21:15:52','CHINA'),
(2,2,'2020-09-21 21:14:33','CHINA'),
(3,3,'2020-09-21 20:49:54','UNITED STATES'),
(4,4,'2020-09-21 20:06:53','CANADA'),
(5,5,'2020-09-21 20:02:11','MEXICO'),
(6,6,'2020-09-21 20:00:32','CHINA'),
(7,7,'2020-09-21 16:01:29','BRAZIL'),
(8,8,'2020-09-21 15:34:06','CHINA'),
(9,9,'2020-09-21 12:04:54','ENGLAND'),
(10,10,'2020-09-22 11:58:57','CHINA'),
(11,10,'2020-09-22 12:02:11','CHINA'),
(12,10,'2020-09-22 12:03:25','CHINA'),
(13,11,'2020-09-23 11:55:40','CHINA'),
(14,12,'2020-09-23 11:53:08','FRANCE'),
(15,13,'2020-09-23 11:51:28','CHINA'),
(16,7,'2021-06-21 16:01:29','BRAZIL'),
(17,8,'2021-06-21 15:34:06','CHINA'),
(18,9,'2021-06-21 12:04:54','ENGLAND'),
(19,10,'2021-07-22 11:58:57','CHINA'),
(20,10,'2021-08-22 12:02:11','CHINA'),
(21,10,'2021-08-22 12:03:25','CHINA'),
(22,11,'2021-08-23 11:55:40','CHINA'),
(23,12,'2021-08-23 11:53:08','FRANCE'),
(24,13,'2021-08-23 11:51:28','CHINA'),
(25,7,'2021-09-21 16:01:29','BRAZIL'),
(26,8,'2021-09-21 15:34:06','CHINA'),
(27,9,'2021-09-21 12:04:54','ENGLAND'),
(28,10,'2021-09-22 11:58:57','CHINA'),
(29,10,'2021-09-22 12:02:11','CHINA'),
(30,10,'2021-09-22 12:03:25','CHINA'),
(31,11,'2021-09-23 11:55:40','CHINA'),
(32,12,'2021-09-23 11:53:08','FRANCE'),
(33,13,'2021-09-23 11:51:28','CHINA');

Q1: For each buyer, count the number of orders from each year?

Solution

1
2
3
4
SELECT buyer_id, YEAR(order_dt) AS order_year, COUNT(order_id)
FROM order_tb
GROUP BY 1, 2
ORDER BY buyer_id;

Q2: list top 10 buyers by order count for each country.

Solution

1
2
3
4
5
6
7
8
9
10
11
12
13
WITH CTE AS
(
SELECT
Country, buyer_id,
COUNT(order_id) AS order_count,
DENSE_RANK() OVER(PARTITION BY country ORDER BY COUNT(order_id) DESC) AS buyer_DR
FROM Order_tb
GROUP BY country, buyer_id
)
SELECT *
FROM CTE
WHERE buyer_DR <= 10
ORDER BY Country, buyer_DR;

Q3: What’s the average orders for all buyers within 30 days after signup.

Get all records with datediff

1
2
3
4
5
6
7
8
9
10
# Get all records with datediff 
SELECT
b.buyer_id,
o.order_id,
DATE(b.signup_dt) AS signup_dt,
DATE(o.order_dt) AS order_dt,
DATEDIFF(order_dt, signup_dt)
FROM buyer_tb b
LEFT JOIN order_tb o # LEFT JOIN to get buyers never place any orders
ON b.buyer_id = o.buyer_id;

Get all records with datediff that only <= 30

1
2
3
4
5
6
7
8
9
10
11
# Get all records with datediff that only <= 30
SELECT
b.buyer_id,
o.order_id,
DATE(b.signup_dt) AS signup_dt,
DATE(o.order_dt) AS order_dt,
DATEDIFF(order_dt, signup_dt)
FROM buyer_tb b
LEFT JOIN order_tb o # LEFT JOIN to get buyers never place any orders
ON b.buyer_id = o.buyer_id
WHERE DATEDIFF(order_dt, signup_dt) <= 30;

Solution: the average number of orders for buyers who placed orders within 30 days after signup.

1
2
3
4
5
6
7
8
# Solution: the average number of orders for buyers who placed orders within 30 days after signup. 
# 10 / 8
SELECT
COUNT(o.order_id) / COUNT(DISTINCT b.buyer_id)
FROM buyer_tb b
LEFT JOIN order_tb o
ON b.buyer_id = o.buyer_id
WHERE DATEDIFF(order_dt, signup_dt) <= 30;

Solution: the average number of orders for all buyers.

  • Consider the buyers must registered > 30 days.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# Solution: the average number of orders for all buyers. 
# Consider the buyers must registered > 30 days.
# 10 / 15
WITH CTE AS
(
SELECT COUNT(DISTINCT buyer_id)
FROM buyer_tb
WHERE DATEDIFF(CURDATE(), signup_dt) > 30
)
SELECT
COUNT(o.order_id) / (SELECT * FROM CTE)
FROM buyer_tb b
LEFT JOIN order_tb o
ON b.buyer_id = o.buyer_id
WHERE DATEDIFF(order_dt, signup_dt) <= 30;

DE

Q1. For Instance how can we return from a table like the one below following results?

Table Employee

  • No PK
1
2
3
4
5
6
7
8
9
10
employee_id        salary_component_id        salary_component_desc        Value            month
1 1 salary 2000 "2015-09"
1 2 bonus 100 "2015-09"
1 3 indemnity 250 "2015-09"
1 1 salary 2000 "2015-10"
1 2 bonus 0 "2015-10"
1 3 indemnity 150 "2015-10"
2 1 salary 1500 "2015-09"
2 2 bonus 0 "2015-09"
2 3 indemnity 50 "2015-09"

Sample Result

1
2
3
4
employee_id        month             salary    bonus    indemnity    
1 "2015-09" 2000 100 250
1 "2015-10" 2000 0 150
2 "2015-09" 1500 0 50

Sample Data

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
DROP TABLE IF EXISTS Employee;
CREATE TABLE Employee (
employee_id INT NOT NULL,
salary_component_id INT NOT NULL,
salary_component_desc VARCHAR(255) NOT NULL,
Value INT NOT NULL,
month DATE
);
TRUNCATE TABLE Employee;
INSERT INTO Employee (employee_id, salary_component_id, salary_component_desc, Value, month) VALUES
(1, 1, 'salary', 2000, '2015-09-01'),
(1, 2, 'bonus', 100, '2015-09-01'),
(1, 3, 'indemnity', 250, '2015-09-01'),
(1, 1, 'salary', 2000, '2015-10-01'),
(1, 2, 'bonus', 0, '2015-10-01'),
(1, 3, 'indemnity', 150, '2015-10-01'),
(2, 1, 'salary', 1500, '2015-09-01'),
(2, 2, 'bonus', 0, '2015-09-01'),
(2, 3, 'indemnity', 50, '2015-09-01');

Solution

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
WITH CTE AS
(
SELECT
employee_id, month,
IF(salary_component_desc = 'salary', Value, 0) AS salary,
IF(salary_component_desc = 'bonus', Value, 0) AS bonus,
IF(salary_component_desc = 'indemnity', Value, 0) AS indemnity
FROM Employee
)
SELECT
employee_id, month,
SUM(salary) AS salary,
SUM(bonus) AS bonus,
SUM(indemnity) AS indemnity
FROM CTE
GROUP BY employee_id, month;

Q2. Question: Can you compute the average number of subscribers by customer age range using the following age buckets:

Assume we have the following table in a relational DB:

Table SUBSCRIBERS

  • USER_ID
  • DATE_OF_BIRTH
  • NUMBER_OF_SUBSCRIBERS
1
2
3
user_id DOB Subs
1 2000-01-01 3
2 1990-01-01 5

Table USERS

  • USER_ID
  • NAME
  • SURNAME
  • COUNTRY

Sample Result

1
2
3
4
5
* 0-18 y.o.
* 19-30 y.o.
* 31-45 y.o.
* 46-65 y.o.
* greater than 65 y.o.

Sample Data

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import datetime
import random

# user_ids
user_ids = range(1, 101)

# Random dates
def random_date():
start_date = datetime.date(1940, 1, 1)
end_date = datetime.date(2020, 12, 31)
days_between_dates = end_date - start_date
random_number_of_days = random.randrange(days_between_dates.days)
random_date = start_date + datetime.timedelta(days=random_number_of_days)
return random_date

for user_id in user_ids:
print(f"({user_id}, '{random_date()}', {random.randrange(6)}),")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
DROP TABLE IF EXISTS SUBSCRIBERS;
CREATE TABLE SUBSCRIBERS (
USER_ID INT NOT NULL,
DATE_OF_BIRTH DATE NOT NULL,
NUMBER_OF_SUBSCRIBERS TINYINT NOT NULL,
PRIMARY KEY(USER_ID)
);
TRUNCATE TABLE SUBSCRIBERS;
INSERT INTO SUBSCRIBERS (USER_ID, DATE_OF_BIRTH, NUMBER_OF_SUBSCRIBERS) VALUES
(1, '1944-12-02', 1),
(2, '1976-06-22', 2),
(3, '1972-09-11', 5),
(4, '1970-08-30', 0),
(5, '2014-12-06', 1),
(6, '1953-03-16', 0),
(7, '1947-01-15', 3),
(8, '1974-10-05', 0),
(9, '1969-08-02', 1),
(10, '1973-12-30', 0),
(11, '1967-10-07', 2),
(12, '1987-06-30', 5),
(13, '1962-12-02', 1),
(14, '1946-08-12', 0),
(15, '1969-10-20', 4),
(16, '1941-12-02', 3),
(17, '1998-03-19', 4),
(18, '1967-05-12', 0),
(19, '2004-05-03', 3),
(20, '1971-05-05', 0),
(21, '1946-11-03', 2),
(22, '2010-02-06', 0),
(23, '1946-10-17', 1),
(24, '2014-05-09', 5),
(25, '1984-12-15', 2),
(26, '1984-08-23', 5),
(27, '1998-07-17', 3),
(28, '1965-09-11', 5),
(29, '1976-01-22', 2),
(30, '1945-02-02', 4),
(31, '1972-10-19', 1),
(32, '1973-09-07', 4),
(33, '2017-12-11', 0),
(34, '1956-03-23', 3),
(35, '1947-01-01', 1),
(36, '1971-12-16', 3),
(37, '1957-12-14', 3),
(38, '1967-10-17', 3),
(39, '1959-03-10', 4),
(40, '1980-12-02', 2),
(41, '1974-05-27', 3),
(42, '1967-09-14', 2),
(43, '1984-09-18', 1),
(44, '1970-10-23', 3),
(45, '1991-05-29', 0),
(46, '1952-01-10', 5),
(47, '1940-06-12', 1),
(48, '2007-01-08', 3),
(49, '1981-02-11', 3),
(50, '1964-10-21', 3),
(51, '2011-08-17', 2),
(52, '1954-08-01', 4),
(53, '2008-06-04', 1),
(54, '1958-10-05', 1),
(55, '1993-11-10', 3),
(56, '1964-06-20', 1),
(57, '1989-02-28', 5),
(58, '1982-10-05', 5),
(59, '1992-01-09', 0),
(60, '1999-06-21', 1),
(61, '1944-10-05', 1),
(62, '2019-03-12', 5),
(63, '1980-03-17', 4),
(64, '2000-06-30', 0),
(65, '2005-08-24', 4),
(66, '2009-11-28', 1),
(67, '1995-04-08', 3),
(68, '2006-04-01', 2),
(69, '1973-06-15', 2),
(70, '1967-04-10', 4),
(71, '1967-12-04', 0),
(72, '1987-03-16', 1),
(73, '1973-01-29', 2),
(74, '2002-02-15', 1),
(75, '1979-12-12', 4),
(76, '2010-11-29', 5),
(77, '1978-02-23', 4),
(78, '1997-09-18', 4),
(79, '2014-07-27', 2),
(80, '1954-05-24', 3),
(81, '1988-02-12', 3),
(82, '1994-05-07', 4),
(83, '1958-05-31', 1),
(84, '1979-10-06', 5),
(85, '1979-04-23', 0),
(86, '1999-02-05', 5),
(87, '1958-10-20', 5),
(88, '2013-12-23', 5),
(89, '1955-12-09', 4),
(90, '1989-05-25', 3),
(91, '2000-11-30', 2),
(92, '1967-09-23', 2),
(93, '1946-02-20', 1),
(94, '1957-09-22', 2),
(95, '1947-01-12', 5),
(96, '1996-09-22', 3),
(97, '1960-01-24', 4),
(98, '1969-02-07', 3),
(99, '2015-06-12', 5),
(100, '1947-01-04', 3);

Solution

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
WITH CTE1 AS
(
SELECT
NUMBER_OF_SUBSCRIBERS,
#YEAR(CURDATE()) - YEAR(DATE_OF_BIRTH) AS AGE # 21, 20, 18
TIMESTAMPDIFF(YEAR, DATE_OF_BIRTH, CURDATE()) AS AGE
FROM SUBSCRIBERS
),
CTE2 AS
(
SELECT
NUMBER_OF_SUBSCRIBERS,
CASE
WHEN AGE BETWEEN 0 AND 18 THEN '0-18'
WHEN AGE BETWEEN 19 AND 30 THEN '19-30'
WHEN AGE BETWEEN 31 AND 45 THEN '31-45'
WHEN AGE BETWEEN 46 AND 65 THEN '46-65'
WHEN AGE > 65 THEN '> 65'
END AS AGE_RANGE
FROM CTE1
)
SELECT
AGE_RANGE,
AVG(NUMBER_OF_SUBSCRIBERS)
FROM CTE2
GROUP BY AGE_RANGE
ORDER BY AGE_RANGE;

Q3. what’s the order number and the order amount for the order with the highest amount sold?

Assume we have the following three tables:

  • ORDERS (order_id PK, customer_id, order_date)
  • ORDER_DETAILS (order_id, product_id, price_each, qty)
  • CUSTOMERS (customer_id, name, surname, address, city, country)

ORDER_DETAILS

1
2
3
order_id  product_id price_each  qty
001 item1 10 2
001 item2 50 100

Sample data

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import random

# order_ids
order_ids = range(1, 21)

# product_ids, price_each
#product_ids = {'item1': 10, 'item2':50, 'item3': 20, 'item4': 30, 'item5': 40}
product_ids = ['item1', 'item2', 'item3', 'item4', 'item5']
price_each = [10, 50, 20, 30, 40]

# n_items
n_items = len(product_ids)

# qty
qty = range(1, 101)

for order_id in order_ids: # each order may have mutiple items, but the order_id is the same
items = list(range(0, n_items))
for i in range(random.randrange(1, n_items+1)): # for each order, how many different items
item = random.choice(items) # the item must be different in the same order
items.remove(item) # remove the item that has been selected
print(f"({order_id}, '{product_ids[item]}', {price_each[item]}, {random.choice(qty)}),")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
DROP TABLE IF EXISTS ORDER_DETAILS;
CREATE TABLE ORDER_DETAILS (
order_id INT NOT NULL,
product_id VARCHAR(255) NOT NULL,
price_each FLOAT NOT NULL,
qty TINYINT NOT NULL
);
TRUNCATE TABLE ORDER_DETAILS;
INSERT INTO ORDER_DETAILS (order_id, product_id, price_each, qty) VALUES
(1, 'item5', 40, 1),
(1, 'item1', 10, 75),
(2, 'item2', 50, 32),
(2, 'item4', 30, 96),
(3, 'item2', 50, 19),
(3, 'item4', 30, 63),
(4, 'item5', 40, 36),
(4, 'item4', 30, 54),
(4, 'item2', 50, 98),
(4, 'item1', 10, 53),
(5, 'item4', 30, 44),
(5, 'item2', 50, 30),
(5, 'item1', 10, 2),
(5, 'item3', 20, 30),
(6, 'item3', 20, 28),
(6, 'item1', 10, 40),
(6, 'item2', 50, 83),
(6, 'item5', 40, 83),
(6, 'item4', 30, 77),
(7, 'item1', 10, 72),
(7, 'item4', 30, 85),
(7, 'item2', 50, 50),
(7, 'item5', 40, 58),
(7, 'item3', 20, 22),
(8, 'item1', 10, 93),
(9, 'item3', 20, 11),
(9, 'item5', 40, 53),
(9, 'item1', 10, 71),
(9, 'item2', 50, 82),
(9, 'item4', 30, 30),
(10, 'item1', 10, 72),
(11, 'item2', 50, 89),
(11, 'item4', 30, 79),
(11, 'item3', 20, 18),
(11, 'item5', 40, 21),
(11, 'item1', 10, 41),
(12, 'item2', 50, 41),
(13, 'item1', 10, 45),
(13, 'item3', 20, 38),
(14, 'item4', 30, 74),
(14, 'item5', 40, 4),
(15, 'item3', 20, 90),
(15, 'item4', 30, 100),
(16, 'item2', 50, 27),
(16, 'item3', 20, 25),
(16, 'item5', 40, 70),
(17, 'item1', 10, 35),
(17, 'item2', 50, 91),
(17, 'item5', 40, 87),
(17, 'item4', 30, 30),
(18, 'item5', 40, 9),
(18, 'item4', 30, 99),
(19, 'item4', 30, 82),
(20, 'item5', 40, 20);

Solution

1
2
3
4
5
6
7
SELECT 
order_id,
SUM(price_each * qty) AS higest_amount # amount for each order_id's product_id
FROM ORDER_DETAILS
GROUP BY order_id
ORDER BY higest_amount DESC
LIMIT 1;

Q4. what’s the order number and the order amount for the order with the highest amount sold? – get top 10 - 20 orders

Solution

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
WITH CTE1 AS
(
SELECT
order_id,
SUM(price_each * qty) AS amount # amount for each order_id's product_id
FROM ORDER_DETAILS
GROUP BY order_id
),
CTE2 AS
(
SELECT
order_id, amount,
DENSE_RANK() OVER(ORDER BY amount DESC) AS amount_DR
FROM CTE1
)
SELECT order_id, amount, amount_DR
FROM CTE2
WHERE amount_DR BETWEEN 10 AND 20;

Q5. who are the customers who have not placed any order in February 2018?

Sample Data

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import datetime
import random

# order_ids
order_ids = range(1, 101)

# customer_ids
customer_ids = range(1, 21)

# Random dates
def random_date():
start_date = datetime.date(2018, 1, 1)
end_date = datetime.date(2018, 3, 31)
days_between_dates = end_date - start_date
random_number_of_days = random.randrange(days_between_dates.days)
random_date = start_date + datetime.timedelta(days=random_number_of_days)
return random_date

for order_id in order_ids:
print(f"({order_id}, {random.choice(customer_ids)}, '{random_date()}'),")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
DROP TABLE IF EXISTS ORDERS;
CREATE TABLE ORDERS (
order_id INT NOT NULL,
customer_id INT NOT NULL,
order_date DATE NOT NULL,
PRIMARY KEY (order_id)
);
TRUNCATE TABLE ORDERS;
INSERT INTO ORDERS (order_id, customer_id, order_date) VALUES
(1, 20, '2018-03-07'),
(2, 9, '2018-02-05'),
(3, 6, '2018-02-18'),
(4, 9, '2018-03-11'),
(5, 14, '2018-03-28'),
(6, 8, '2018-01-02'),
(7, 6, '2018-03-16'),
(8, 10, '2018-02-25'),
(9, 14, '2018-02-01'),
(10, 19, '2018-02-28'),
(11, 16, '2018-03-02'),
(12, 7, '2018-02-27'),
(13, 8, '2018-01-16'),
(14, 15, '2018-01-21'),
(15, 7, '2018-01-15'),
(16, 19, '2018-02-26'),
(17, 20, '2018-03-26'),
(18, 2, '2018-03-29'),
(19, 16, '2018-03-10'),
(20, 11, '2018-03-25'),
(21, 20, '2018-03-20'),
(22, 15, '2018-02-12'),
(23, 16, '2018-03-17'),
(24, 5, '2018-03-19'),
(25, 18, '2018-02-03'),
(26, 4, '2018-03-30'),
(27, 20, '2018-02-23'),
(28, 5, '2018-03-25'),
(29, 19, '2018-01-11'),
(30, 3, '2018-01-06'),
(31, 18, '2018-03-17'),
(32, 8, '2018-03-21'),
(33, 13, '2018-01-28'),
(34, 5, '2018-01-25'),
(35, 13, '2018-03-26'),
(36, 4, '2018-02-26'),
(37, 11, '2018-02-06'),
(38, 7, '2018-01-13'),
(39, 20, '2018-03-29'),
(40, 2, '2018-03-01'),
(41, 11, '2018-02-21'),
(42, 17, '2018-02-18'),
(43, 4, '2018-02-12'),
(44, 3, '2018-01-01'),
(45, 10, '2018-03-08'),
(46, 6, '2018-03-17'),
(47, 8, '2018-03-12'),
(48, 18, '2018-03-02'),
(49, 16, '2018-01-09'),
(50, 19, '2018-02-03'),
(51, 18, '2018-03-14'),
(52, 8, '2018-03-28'),
(53, 10, '2018-01-16'),
(54, 1, '2018-01-27'),
(55, 8, '2018-02-26'),
(56, 15, '2018-02-21'),
(57, 18, '2018-01-05'),
(58, 20, '2018-01-07'),
(59, 12, '2018-02-25'),
(60, 11, '2018-03-18'),
(61, 6, '2018-03-16'),
(62, 9, '2018-01-07'),
(63, 11, '2018-01-20'),
(64, 7, '2018-01-03'),
(65, 10, '2018-02-17'),
(66, 4, '2018-01-30'),
(67, 1, '2018-01-24'),
(68, 20, '2018-02-01'),
(69, 4, '2018-02-21'),
(70, 4, '2018-01-31'),
(71, 12, '2018-01-25'),
(72, 9, '2018-03-13'),
(73, 2, '2018-02-05'),
(74, 3, '2018-01-02'),
(75, 4, '2018-02-11'),
(76, 8, '2018-03-15'),
(77, 17, '2018-01-01'),
(78, 7, '2018-02-01'),
(79, 16, '2018-02-07'),
(80, 14, '2018-03-18'),
(81, 4, '2018-03-11'),
(82, 9, '2018-03-04'),
(83, 4, '2018-02-01'),
(84, 8, '2018-01-21'),
(85, 15, '2018-03-18'),
(86, 9, '2018-03-23'),
(87, 18, '2018-03-13'),
(88, 14, '2018-03-15'),
(89, 5, '2018-03-18'),
(90, 14, '2018-03-16'),
(91, 9, '2018-02-01'),
(92, 11, '2018-03-04'),
(93, 17, '2018-02-12'),
(94, 6, '2018-02-27'),
(95, 15, '2018-03-01'),
(96, 19, '2018-01-25'),
(97, 20, '2018-03-18'),
(98, 14, '2018-02-26'),
(99, 2, '2018-01-16'),
(100, 17, '2018-03-25');

Solution

1
2
3
4
5
6
7
8
9
WITH CTE AS
(
SELECT customer_id
FROM ORDERS
WHERE DATE_FORMAT(order_date, '%Y-%m') = '2018-02'
)
SELECT DISTINCT customer_id
FROM ORDERS
WHERE customer_id NOT IN (SELECT customer_id FROM CTE);