SQL Whiteboard

Posted on 2021-10-01 Edited on 2024-09-06

Amazon

BIE People Analytics

Sample Data

DROP TABLE IF EXISTS CURRENT_AVAILABLE_SUPPLY;
DROP TABLE IF EXISTS FORECAST_WEEKLY;
Create table If Not Exists CURRENT_AVAILABLE_SUPPLY (AZ char(6), CURRENT_AVAILABLE_SUPPLY int);
Create table If Not Exists FORECAST_WEEKLY (AZ char(6), FORECAST_WEEK date, NEW_DEMAND int, NEW_SUPPLY int);
Truncate table CURRENT_AVAILABLE_SUPPLY;
insert into CURRENT_AVAILABLE_SUPPLY (AZ, CURRENT_AVAILABLE_SUPPLY) values ('IAD001', 40);
insert into CURRENT_AVAILABLE_SUPPLY (AZ, CURRENT_AVAILABLE_SUPPLY) values ('IAD002', 30);
Truncate table FORECAST_WEEKLY;
insert into FORECAST_WEEKLY (AZ, FORECAST_WEEK, NEW_DEMAND, NEW_SUPPLY) values ('IAD001', '2021-04-15', 10, 0);
insert into FORECAST_WEEKLY (AZ, FORECAST_WEEK, NEW_DEMAND, NEW_SUPPLY) values ('IAD001', '2021-04-22', 15, 4);
insert into FORECAST_WEEKLY (AZ, FORECAST_WEEK, NEW_DEMAND, NEW_SUPPLY) values ('IAD001', '2021-04-29', 10, 0);
insert into FORECAST_WEEKLY (AZ, FORECAST_WEEK, NEW_DEMAND, NEW_SUPPLY) values ('IAD001', '2021-05-05', 15, 200);

What is the available supply for each AZ, for each week in the next year?

Step 1. UNION ALL two tables. Treat CURRENT_AVAILABLE_SUPPLY as initial NEW_SUPPLY
Step 2. Cumulate NEW_SUPPLY then minus cumulative NEW_DEMAND
Remove the records from CURRENT_AVAILABLE_SUPPLY

/*
Table: CURRENT_AVAILABLE_SUPPLY
COLUMN NAMES/Sample Data below:
AZ       CURRENT_AVAILABLE_SUPPLY
IAD001   40
IAD002   30

Table: FORECAST_WEEKLY
COLUMN NAMES/Sample Data below:
AZ       FORECAST_WEEK  NEW_DEMAND   NEW_SUPPLY
IAD001   2021-04-15     10           0
IAD001   2021-04-22     15           4
IAD001   2021-04-29     10           0
IAD001   2021-05-05     15           200
*/

# Step 1. UNION ALL two tables. Treat CURRENT_AVAILABLE_SUPPLY as initial NEW_SUPPLY
WITH CTE1 AS
	(
	SELECT 
		AZ, 
        NULL AS FORECAST_WEEK, 
        NULL AS NEW_DEMAND, 
        CURRENT_AVAILABLE_SUPPLY AS NEW_SUPPLY
	FROM CURRENT_AVAILABLE_SUPPLY
	UNION
	SELECT 
		AZ, 
        WEEK(FORECAST_WEEK), 
        SUM(NEW_DEMAND) AS NEW_DEMAND, # sum if there are multiple records in same week
        SUM(NEW_SUPPLY) AS NEW_SUPPLY # sum if there are multiple records in same week
	FROM FORECAST_WEEKLY
    GROUP BY 1, 2
	),
# Step 2. Cumulate NEW_SUPPLY then minus cumulative NEW_DEMAND
CTE2 AS
	(
	SELECT 
		AZ, FORECAST_WEEK,
		SUM(NEW_SUPPLY) OVER(PARTITION BY AZ ORDER BY FORECAST_WEEK) - 
        SUM(NEW_DEMAND) OVER(PARTITION BY AZ ORDER BY FORECAST_WEEK) AS CURRENT_AVAILABLE_SUPPLY
	FROM CTE1
    )
SELECT * FROM CTE2
WHERE FORECAST_WEEK IS NOT NULL # Remove the records from CURRENT_AVAILABLE_SUPPLY 
;

BIE Amazon Prime Video

1 2	buyer_tb: buyer_id, signup_dt order_tb: order_dt, country, buyer_id, order_id

Some customers placed an order after 30 days of registration
Some customers placed an order within 30 days of registration
Some customers never placed any orders
Place orders
- T -> within 30 days
  - T
  - F
- F

DROP TABLE IF EXISTS buyer_tb;
CREATE TABLE buyer_tb (
  buyer_id BIGINT NOT NULL,
  signup_dt TIMESTAMP DEFAULT NULL,
  PRIMARY KEY (buyer_id)
);
TRUNCATE TABLE buyer_tb;

INSERT INTO buyer_tb (buyer_id, signup_dt) VALUES
(1,'2020-01-30 00:13:52'),
(2,'2020-02-28 00:13:52'),
(3,'2020-03-30 00:13:52'),
(4,'2020-04-30 00:13:52'),
(5,'2020-07-30 00:13:52'),
(6,'2020-08-31 13:12:32'),
(7,'2020-08-31 13:12:32'),
(8,'2020-08-31 13:12:32'),
(9,'2020-08-31 13:12:32'),
(10,'2020-09-01 14:20:35'),
(11,'2020-09-01 14:20:35'),
(12,'2020-09-01 14:20:35'),
(13,'2020-09-01 14:20:35'),
(14,'2020-09-01 14:20:35'),
(15,'2020-09-01 14:20:35');

DROP TABLE IF EXISTS order_tb;
CREATE TABLE order_tb (
  order_id BIGINT NOT NULL,
  buyer_id BIGINT NOT NULL,
  order_dt TIMESTAMP DEFAULT NULL,
  country VARCHAR(255) DEFAULT NULL,
  PRIMARY KEY (order_id),
  FOREIGN KEY (buyer_id) REFERENCES buyer_tb(buyer_id)
);
TRUNCATE TABLE order_tb;

INSERT INTO order_tb (order_id, buyer_id, order_dt, country) VALUES
(1,1,'2020-09-21 21:15:52','CHINA'),
(2,2,'2020-09-21 21:14:33','CHINA'),
(3,3,'2020-09-21 20:49:54','UNITED STATES'),
(4,4,'2020-09-21 20:06:53','CANADA'),
(5,5,'2020-09-21 20:02:11','MEXICO'),
(6,6,'2020-09-21 20:00:32','CHINA'),
(7,7,'2020-09-21 16:01:29','BRAZIL'),
(8,8,'2020-09-21 15:34:06','CHINA'),
(9,9,'2020-09-21 12:04:54','ENGLAND'),
(10,10,'2020-09-22 11:58:57','CHINA'),
(11,10,'2020-09-22 12:02:11','CHINA'),
(12,10,'2020-09-22 12:03:25','CHINA'),
(13,11,'2020-09-23 11:55:40','CHINA'),
(14,12,'2020-09-23 11:53:08','FRANCE'),
(15,13,'2020-09-23 11:51:28','CHINA'),
(16,7,'2021-06-21 16:01:29','BRAZIL'),
(17,8,'2021-06-21 15:34:06','CHINA'),
(18,9,'2021-06-21 12:04:54','ENGLAND'),
(19,10,'2021-07-22 11:58:57','CHINA'),
(20,10,'2021-08-22 12:02:11','CHINA'),
(21,10,'2021-08-22 12:03:25','CHINA'),
(22,11,'2021-08-23 11:55:40','CHINA'),
(23,12,'2021-08-23 11:53:08','FRANCE'),
(24,13,'2021-08-23 11:51:28','CHINA'),
(25,7,'2021-09-21 16:01:29','BRAZIL'),
(26,8,'2021-09-21 15:34:06','CHINA'),
(27,9,'2021-09-21 12:04:54','ENGLAND'),
(28,10,'2021-09-22 11:58:57','CHINA'),
(29,10,'2021-09-22 12:02:11','CHINA'),
(30,10,'2021-09-22 12:03:25','CHINA'),
(31,11,'2021-09-23 11:55:40','CHINA'),
(32,12,'2021-09-23 11:53:08','FRANCE'),
(33,13,'2021-09-23 11:51:28','CHINA');

Q1: For each buyer, count the number of orders from each year?

Solution

SELECT buyer_id, YEAR(order_dt) AS order_year, COUNT(order_id)
FROM order_tb
GROUP BY 1, 2
ORDER BY buyer_id;

Q2: list top 10 buyers by order count for each country.

Solution

WITH CTE AS
	(
	SELECT
		Country, buyer_id,
        COUNT(order_id) AS order_count,
		DENSE_RANK() OVER(PARTITION BY country ORDER BY COUNT(order_id) DESC) AS buyer_DR
	FROM Order_tb
	GROUP BY country, buyer_id
	)
SELECT *
FROM CTE
WHERE buyer_DR <= 10
ORDER BY Country, buyer_DR;

Get all records with datediff

# Get all records with datediff 
SELECT 
	b.buyer_id, 
    o.order_id, 
    DATE(b.signup_dt) AS signup_dt, 
    DATE(o.order_dt) AS order_dt,
    DATEDIFF(order_dt, signup_dt)
FROM buyer_tb b
LEFT JOIN order_tb o # LEFT JOIN to get buyers never place any orders
	ON b.buyer_id = o.buyer_id;

Get all records with datediff that only <= 30

# Get all records with datediff that only <= 30
SELECT 
	b.buyer_id, 
    o.order_id, 
    DATE(b.signup_dt) AS signup_dt, 
    DATE(o.order_dt) AS order_dt,
    DATEDIFF(order_dt, signup_dt)
FROM buyer_tb b
LEFT JOIN order_tb o # LEFT JOIN to get buyers never place any orders
	ON b.buyer_id = o.buyer_id
WHERE DATEDIFF(order_dt, signup_dt) <= 30;

Solution: the average number of orders for buyers who placed orders within 30 days after signup.

# Solution: the average number of orders for buyers who placed orders within 30 days after signup. 
# 10 / 8
SELECT 
	COUNT(o.order_id) / COUNT(DISTINCT b.buyer_id)
FROM buyer_tb b
LEFT JOIN order_tb o
	ON b.buyer_id = o.buyer_id
WHERE DATEDIFF(order_dt, signup_dt) <= 30;

Solution: the average number of orders for all buyers.

Consider the buyers must registered > 30 days.

# Solution: the average number of orders for all buyers. 
# Consider the buyers must registered > 30 days.
# 10 / 15
WITH CTE AS
	(
    SELECT COUNT(DISTINCT buyer_id)
    FROM buyer_tb
    WHERE DATEDIFF(CURDATE(), signup_dt) > 30
    )
SELECT
	COUNT(o.order_id) / (SELECT * FROM CTE)
FROM buyer_tb b
LEFT JOIN order_tb o
	ON b.buyer_id = o.buyer_id
WHERE DATEDIFF(order_dt, signup_dt) <= 30;

DE

Q1. For Instance how can we return from a table like the one below following results?

Table Employee

No PK

employee_id        salary_component_id        salary_component_desc        Value            month
1                1                        salary                       2000            "2015-09"
1                2                        bonus                        100             "2015-09"
1                3                        indemnity                    250             "2015-09"
1                1                        salary                       2000            "2015-10"
1                2                        bonus                        0               "2015-10"
1                3                        indemnity                    150             "2015-10"
2                1                        salary                       1500            "2015-09"
2                2                        bonus                        0               "2015-09"
2                3                        indemnity                    50              "2015-09"

Sample Result

employee_id        month             salary    bonus    indemnity    
1                "2015-09"        2000    100        250
1                "2015-10"        2000    0        150
2                "2015-09"        1500    0        50

Sample Data

DROP TABLE IF EXISTS Employee;
CREATE TABLE Employee (
	employee_id INT NOT NULL,
    salary_component_id INT NOT NULL,
    salary_component_desc VARCHAR(255) NOT NULL,
    Value INT NOT NULL,
    month DATE
);
TRUNCATE TABLE Employee;
INSERT INTO Employee (employee_id, salary_component_id, salary_component_desc, Value, month) VALUES
(1, 1, 'salary', 2000, '2015-09-01'),
(1, 2, 'bonus', 100, '2015-09-01'),
(1, 3, 'indemnity', 250, '2015-09-01'),
(1, 1, 'salary', 2000, '2015-10-01'),
(1, 2, 'bonus', 0, '2015-10-01'),
(1, 3, 'indemnity', 150, '2015-10-01'),
(2, 1, 'salary', 1500, '2015-09-01'),
(2, 2, 'bonus', 0, '2015-09-01'),
(2, 3, 'indemnity', 50, '2015-09-01');

Solution

WITH CTE AS
	(
	SELECT 
		employee_id, month,
		IF(salary_component_desc = 'salary', Value, 0) AS salary,
		IF(salary_component_desc = 'bonus', Value, 0) AS bonus,
		IF(salary_component_desc = 'indemnity', Value, 0) AS indemnity
	FROM Employee
    )
SELECT 
	employee_id, month,
    SUM(salary) AS salary,
    SUM(bonus) AS bonus,
    SUM(indemnity) AS indemnity
FROM CTE
GROUP BY employee_id, month;

Q2. Question: Can you compute the average number of subscribers by customer age range using the following age buckets:

Assume we have the following table in a relational DB:

Table SUBSCRIBERS

USER_ID
DATE_OF_BIRTH
NUMBER_OF_SUBSCRIBERS

1
2
3

user_id DOB Subs
1   2000-01-01 3
2   1990-01-01 5

Table USERS

USER_ID
NAME
SURNAME
COUNTRY

Sample Result

* 0-18 y.o.
* 19-30 y.o.
* 31-45 y.o.
* 46-65 y.o.
* greater than 65 y.o.

Sample Data

import datetime
import random

# user_ids
user_ids = range(1, 101)

# Random dates
def random_date():
    start_date = datetime.date(1940, 1, 1)
    end_date = datetime.date(2020, 12, 31)
    days_between_dates = end_date - start_date
    random_number_of_days = random.randrange(days_between_dates.days)
    random_date = start_date + datetime.timedelta(days=random_number_of_days)
    return random_date

for user_id in user_ids:
    print(f"({user_id}, '{random_date()}', {random.randrange(6)}),")

DROP TABLE IF EXISTS SUBSCRIBERS;
CREATE TABLE SUBSCRIBERS (
	USER_ID INT NOT NULL,
    DATE_OF_BIRTH DATE NOT NULL,
    NUMBER_OF_SUBSCRIBERS TINYINT NOT NULL,
    PRIMARY KEY(USER_ID)
);
TRUNCATE TABLE SUBSCRIBERS;
INSERT INTO SUBSCRIBERS (USER_ID, DATE_OF_BIRTH, NUMBER_OF_SUBSCRIBERS) VALUES
(1, '1944-12-02', 1),
(2, '1976-06-22', 2),
(3, '1972-09-11', 5),
(4, '1970-08-30', 0),
(5, '2014-12-06', 1),
(6, '1953-03-16', 0),
(7, '1947-01-15', 3),
(8, '1974-10-05', 0),
(9, '1969-08-02', 1),
(10, '1973-12-30', 0),
(11, '1967-10-07', 2),
(12, '1987-06-30', 5),
(13, '1962-12-02', 1),
(14, '1946-08-12', 0),
(15, '1969-10-20', 4),
(16, '1941-12-02', 3),
(17, '1998-03-19', 4),
(18, '1967-05-12', 0),
(19, '2004-05-03', 3),
(20, '1971-05-05', 0),
(21, '1946-11-03', 2),
(22, '2010-02-06', 0),
(23, '1946-10-17', 1),
(24, '2014-05-09', 5),
(25, '1984-12-15', 2),
(26, '1984-08-23', 5),
(27, '1998-07-17', 3),
(28, '1965-09-11', 5),
(29, '1976-01-22', 2),
(30, '1945-02-02', 4),
(31, '1972-10-19', 1),
(32, '1973-09-07', 4),
(33, '2017-12-11', 0),
(34, '1956-03-23', 3),
(35, '1947-01-01', 1),
(36, '1971-12-16', 3),
(37, '1957-12-14', 3),
(38, '1967-10-17', 3),
(39, '1959-03-10', 4),
(40, '1980-12-02', 2),
(41, '1974-05-27', 3),
(42, '1967-09-14', 2),
(43, '1984-09-18', 1),
(44, '1970-10-23', 3),
(45, '1991-05-29', 0),
(46, '1952-01-10', 5),
(47, '1940-06-12', 1),
(48, '2007-01-08', 3),
(49, '1981-02-11', 3),
(50, '1964-10-21', 3),
(51, '2011-08-17', 2),
(52, '1954-08-01', 4),
(53, '2008-06-04', 1),
(54, '1958-10-05', 1),
(55, '1993-11-10', 3),
(56, '1964-06-20', 1),
(57, '1989-02-28', 5),
(58, '1982-10-05', 5),
(59, '1992-01-09', 0),
(60, '1999-06-21', 1),
(61, '1944-10-05', 1),
(62, '2019-03-12', 5),
(63, '1980-03-17', 4),
(64, '2000-06-30', 0),
(65, '2005-08-24', 4),
(66, '2009-11-28', 1),
(67, '1995-04-08', 3),
(68, '2006-04-01', 2),
(69, '1973-06-15', 2),
(70, '1967-04-10', 4),
(71, '1967-12-04', 0),
(72, '1987-03-16', 1),
(73, '1973-01-29', 2),
(74, '2002-02-15', 1),
(75, '1979-12-12', 4),
(76, '2010-11-29', 5),
(77, '1978-02-23', 4),
(78, '1997-09-18', 4),
(79, '2014-07-27', 2),
(80, '1954-05-24', 3),
(81, '1988-02-12', 3),
(82, '1994-05-07', 4),
(83, '1958-05-31', 1),
(84, '1979-10-06', 5),
(85, '1979-04-23', 0),
(86, '1999-02-05', 5),
(87, '1958-10-20', 5),
(88, '2013-12-23', 5),
(89, '1955-12-09', 4),
(90, '1989-05-25', 3),
(91, '2000-11-30', 2),
(92, '1967-09-23', 2),
(93, '1946-02-20', 1),
(94, '1957-09-22', 2),
(95, '1947-01-12', 5),
(96, '1996-09-22', 3),
(97, '1960-01-24', 4),
(98, '1969-02-07', 3),
(99, '2015-06-12', 5),
(100, '1947-01-04', 3);

Solution

WITH CTE1 AS
    (
    SELECT
        NUMBER_OF_SUBSCRIBERS,
        #YEAR(CURDATE()) - YEAR(DATE_OF_BIRTH) AS AGE # 21, 20, 18
        TIMESTAMPDIFF(YEAR, DATE_OF_BIRTH, CURDATE()) AS AGE
    FROM SUBSCRIBERS
    ),
CTE2 AS
    (
    SELECT
        NUMBER_OF_SUBSCRIBERS,
        CASE
            WHEN AGE BETWEEN 0 AND 18 THEN '0-18'
            WHEN AGE BETWEEN 19 AND 30 THEN '19-30'
            WHEN AGE BETWEEN 31 AND 45 THEN '31-45'
            WHEN AGE BETWEEN 46 AND 65 THEN '46-65'
            WHEN AGE > 65 THEN '> 65'
        END AS AGE_RANGE
    FROM CTE1
    )
SELECT
	AGE_RANGE,
    AVG(NUMBER_OF_SUBSCRIBERS)
FROM CTE2
GROUP BY AGE_RANGE
ORDER BY AGE_RANGE;

Q3. what’s the order number and the order amount for the order with the highest amount sold?

Assume we have the following three tables:

ORDERS (order_id PK, customer_id, order_date)
ORDER_DETAILS (order_id, product_id, price_each, qty)
CUSTOMERS (customer_id, name, surname, address, city, country)

ORDER_DETAILS

1
2
3

order_id  product_id price_each  qty
001       item1      10          2
001       item2      50          100

Sample data

import random

# order_ids
order_ids = range(1, 21)

# product_ids, price_each
#product_ids = {'item1': 10, 'item2':50, 'item3': 20, 'item4': 30, 'item5': 40}
product_ids = ['item1', 'item2', 'item3', 'item4', 'item5']
price_each = [10, 50, 20, 30, 40]

# n_items
n_items = len(product_ids)

# qty
qty = range(1, 101)

for order_id in order_ids: # each order may have mutiple items, but the order_id is the same
    items = list(range(0, n_items))
    for i in range(random.randrange(1, n_items+1)): # for each order, how many different items
        item = random.choice(items) # the item must be different in the same order
        items.remove(item) # remove the item that has been selected
        print(f"({order_id}, '{product_ids[item]}', {price_each[item]}, {random.choice(qty)}),")

DROP TABLE IF EXISTS ORDER_DETAILS;
CREATE TABLE ORDER_DETAILS (
	order_id INT NOT NULL,
    product_id VARCHAR(255) NOT NULL,
    price_each FLOAT NOT NULL,
    qty TINYINT NOT NULL
);
TRUNCATE TABLE ORDER_DETAILS;
INSERT INTO ORDER_DETAILS (order_id, product_id, price_each, qty) VALUES        
(1, 'item5', 40, 1),
(1, 'item1', 10, 75),
(2, 'item2', 50, 32),
(2, 'item4', 30, 96),
(3, 'item2', 50, 19),
(3, 'item4', 30, 63),
(4, 'item5', 40, 36),
(4, 'item4', 30, 54),
(4, 'item2', 50, 98),
(4, 'item1', 10, 53),
(5, 'item4', 30, 44),
(5, 'item2', 50, 30),
(5, 'item1', 10, 2),
(5, 'item3', 20, 30),
(6, 'item3', 20, 28),
(6, 'item1', 10, 40),
(6, 'item2', 50, 83),
(6, 'item5', 40, 83),
(6, 'item4', 30, 77),
(7, 'item1', 10, 72),
(7, 'item4', 30, 85),
(7, 'item2', 50, 50),
(7, 'item5', 40, 58),
(7, 'item3', 20, 22),
(8, 'item1', 10, 93),
(9, 'item3', 20, 11),
(9, 'item5', 40, 53),
(9, 'item1', 10, 71),
(9, 'item2', 50, 82),
(9, 'item4', 30, 30),
(10, 'item1', 10, 72),
(11, 'item2', 50, 89),
(11, 'item4', 30, 79),
(11, 'item3', 20, 18),
(11, 'item5', 40, 21),
(11, 'item1', 10, 41),
(12, 'item2', 50, 41),
(13, 'item1', 10, 45),
(13, 'item3', 20, 38),
(14, 'item4', 30, 74),
(14, 'item5', 40, 4),
(15, 'item3', 20, 90),
(15, 'item4', 30, 100),
(16, 'item2', 50, 27),
(16, 'item3', 20, 25),
(16, 'item5', 40, 70),
(17, 'item1', 10, 35),
(17, 'item2', 50, 91),
(17, 'item5', 40, 87),
(17, 'item4', 30, 30),
(18, 'item5', 40, 9),
(18, 'item4', 30, 99),
(19, 'item4', 30, 82),
(20, 'item5', 40, 20);

Solution

SELECT 
	order_id, 
	SUM(price_each * qty) AS higest_amount # amount for each order_id's product_id
FROM ORDER_DETAILS
GROUP BY order_id
ORDER BY higest_amount DESC
LIMIT 1;

Q4. what’s the order number and the order amount for the order with the highest amount sold? – get top 10 - 20 orders

Solution

WITH CTE1 AS
    (
    SELECT 
        order_id, 
        SUM(price_each * qty) AS amount # amount for each order_id's product_id
    FROM ORDER_DETAILS
    GROUP BY order_id
    ),
CTE2 AS
    (
    SELECT 
        order_id, amount,
        DENSE_RANK() OVER(ORDER BY amount DESC) AS amount_DR
    FROM CTE1
    )
SELECT order_id, amount, amount_DR
FROM CTE2
WHERE amount_DR BETWEEN 10 AND 20;

Q5. who are the customers who have not placed any order in February 2018?

Sample Data

import datetime
import random

# order_ids
order_ids = range(1, 101)

# customer_ids
customer_ids = range(1, 21)

# Random dates
def random_date():
    start_date = datetime.date(2018, 1, 1)
    end_date = datetime.date(2018, 3, 31)
    days_between_dates = end_date - start_date
    random_number_of_days = random.randrange(days_between_dates.days)
    random_date = start_date + datetime.timedelta(days=random_number_of_days)
    return random_date

for order_id in order_ids:
    print(f"({order_id}, {random.choice(customer_ids)}, '{random_date()}'),")

DROP TABLE IF EXISTS ORDERS;
CREATE TABLE ORDERS (
	order_id INT NOT NULL,
    customer_id INT NOT NULL,
    order_date DATE NOT NULL,
    PRIMARY KEY (order_id)
);
TRUNCATE TABLE ORDERS;
INSERT INTO ORDERS (order_id, customer_id, order_date) VALUES 
(1, 20, '2018-03-07'),
(2, 9, '2018-02-05'),
(3, 6, '2018-02-18'),
(4, 9, '2018-03-11'),
(5, 14, '2018-03-28'),
(6, 8, '2018-01-02'),
(7, 6, '2018-03-16'),
(8, 10, '2018-02-25'),
(9, 14, '2018-02-01'),
(10, 19, '2018-02-28'),
(11, 16, '2018-03-02'),
(12, 7, '2018-02-27'),
(13, 8, '2018-01-16'),
(14, 15, '2018-01-21'),
(15, 7, '2018-01-15'),
(16, 19, '2018-02-26'),
(17, 20, '2018-03-26'),
(18, 2, '2018-03-29'),
(19, 16, '2018-03-10'),
(20, 11, '2018-03-25'),
(21, 20, '2018-03-20'),
(22, 15, '2018-02-12'),
(23, 16, '2018-03-17'),
(24, 5, '2018-03-19'),
(25, 18, '2018-02-03'),
(26, 4, '2018-03-30'),
(27, 20, '2018-02-23'),
(28, 5, '2018-03-25'),
(29, 19, '2018-01-11'),
(30, 3, '2018-01-06'),
(31, 18, '2018-03-17'),
(32, 8, '2018-03-21'),
(33, 13, '2018-01-28'),
(34, 5, '2018-01-25'),
(35, 13, '2018-03-26'),
(36, 4, '2018-02-26'),
(37, 11, '2018-02-06'),
(38, 7, '2018-01-13'),
(39, 20, '2018-03-29'),
(40, 2, '2018-03-01'),
(41, 11, '2018-02-21'),
(42, 17, '2018-02-18'),
(43, 4, '2018-02-12'),
(44, 3, '2018-01-01'),
(45, 10, '2018-03-08'),
(46, 6, '2018-03-17'),
(47, 8, '2018-03-12'),
(48, 18, '2018-03-02'),
(49, 16, '2018-01-09'),
(50, 19, '2018-02-03'),
(51, 18, '2018-03-14'),
(52, 8, '2018-03-28'),
(53, 10, '2018-01-16'),
(54, 1, '2018-01-27'),
(55, 8, '2018-02-26'),
(56, 15, '2018-02-21'),
(57, 18, '2018-01-05'),
(58, 20, '2018-01-07'),
(59, 12, '2018-02-25'),
(60, 11, '2018-03-18'),
(61, 6, '2018-03-16'),
(62, 9, '2018-01-07'),
(63, 11, '2018-01-20'),
(64, 7, '2018-01-03'),
(65, 10, '2018-02-17'),
(66, 4, '2018-01-30'),
(67, 1, '2018-01-24'),
(68, 20, '2018-02-01'),
(69, 4, '2018-02-21'),
(70, 4, '2018-01-31'),
(71, 12, '2018-01-25'),
(72, 9, '2018-03-13'),
(73, 2, '2018-02-05'),
(74, 3, '2018-01-02'),
(75, 4, '2018-02-11'),
(76, 8, '2018-03-15'),
(77, 17, '2018-01-01'),
(78, 7, '2018-02-01'),
(79, 16, '2018-02-07'),
(80, 14, '2018-03-18'),
(81, 4, '2018-03-11'),
(82, 9, '2018-03-04'),
(83, 4, '2018-02-01'),
(84, 8, '2018-01-21'),
(85, 15, '2018-03-18'),
(86, 9, '2018-03-23'),
(87, 18, '2018-03-13'),
(88, 14, '2018-03-15'),
(89, 5, '2018-03-18'),
(90, 14, '2018-03-16'),
(91, 9, '2018-02-01'),
(92, 11, '2018-03-04'),
(93, 17, '2018-02-12'),
(94, 6, '2018-02-27'),
(95, 15, '2018-03-01'),
(96, 19, '2018-01-25'),
(97, 20, '2018-03-18'),
(98, 14, '2018-02-26'),
(99, 2, '2018-01-16'),
(100, 17, '2018-03-25');

Solution

WITH CTE AS
    (
    SELECT customer_id
    FROM ORDERS
    WHERE DATE_FORMAT(order_date, '%Y-%m') = '2018-02'
    )
SELECT DISTINCT customer_id
FROM ORDERS
WHERE customer_id NOT IN (SELECT customer_id FROM CTE);

Amazon

BIE People Analytics

BIE Amazon Prime Video

Q1: For each buyer, count the number of orders from each year?

Q2: list top 10 buyers by order count for each country.

Q3: What’s the average orders for all buyers within 30 days after signup.

DE

Q1. For Instance how can we return from a table like the one below following results?

Q2. Question: Can you compute the average number of subscribers by customer age range using the following age buckets:

Q3. what’s the order number and the order amount for the order with the highest amount sold?

Q4. what’s the order number and the order amount for the order with the highest amount sold? – get top 10 - 20 orders

Q5. who are the customers who have not placed any order in February 2018?