LeetCode SQL - Math

Median

569. Median Employee Salary((returns one or two values))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
DROP TABLE IF EXISTS Employee;
CREATE TABLE
If NOT EXISTS Employee (Id INT, Company VARCHAR(255), Salary INT);
TRUNCATE TABLE Employee;
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('1', 'A', '2341');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('2', 'A', '341');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('3', 'A', '15');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('4', 'A', '15314');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('5', 'A', '451');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('6', 'A', '513');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('7', 'B', '15');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('8', 'B', '13');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('9', 'B', '1154');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('10', 'B', '1345');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('11', 'B', '1221');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('12', 'B', '234');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('13', 'C', '2345');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('14', 'C', '2645');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('15', 'C', '2645');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('16', 'C', '2652');
INSERT INTO
Employee (Id, Company, Salary)
VALUES
('17', 'C', '65');
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
/*Select the median records
- Employee PK: Id

- SOLUTION: use ROW_NUMBER to rank ascending and descending the records, then take the middle one.
*/
WITH
cte AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY
company
ORDER BY
salary asc
) AS salary_r,
ROW_NUMBER() OVER (
PARTITION BY
company
ORDER BY
salary desc
) AS salary_dr
FROM
Employee
)
-- select * from cte
SELECT
MIN(id) AS id,
company,
salary
FROM
cte
WHERE
salary_r = salary_dr
OR salary_r = salary_dr + 1
OR salary_r = salary_dr - 1
GROUP BY
2,
3;

571. Find Median Given Frequency of Numbers

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
DROP TABLE if EXISTS Numbers;
CREATE TABLE
If NOT EXISTS Numbers (num INT, frequency INT);
TRUNCATE TABLE Numbers;
INSERT INTO
Numbers (num, frequency)
VALUES
('0', '7');
INSERT INTO
Numbers (num, frequency)
VALUES
('1', '1');
INSERT INTO
Numbers (num, frequency)
VALUES
('2', '3');
INSERT INTO
Numbers (num, frequency)
VALUES
('3', '1');
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
SELECT
AVG(n.num) AS median
FROM
Numbers n
WHERE
n.Frequency >= ABS(
(
SELECT
SUM(Frequency)
FROM
Numbers
WHERE
num <= n.num
) - (
SELECT
SUM(Frequency)
FROM
Numbers
WHERE
num >= n.num
)
);

Euclidean distance

612. Shortest Distance in a Plane

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
DROP TABLE if EXISTS Point2D;
CREATE TABLE
If NOT EXISTS Point2D (x INT NOT NULL, y INT NOT NULL);
TRUNCATE TABLE Point2D;
INSERT INTO
Point2D (x, y)
VALUES
('-1', '-1');
INSERT INTO
Point2D (x, y)
VALUES
('0', '0');
INSERT INTO
Point2D (x, y)
VALUES
('-1', '-2');
1
2
3
4
5
6
SELECT
MIN(ROUND(SQRT(pow (p1.x - p2.x, 2) + pow (p1.y - p2.y, 2)), 2)) AS shortest
FROM
Point2D p1
INNER JOIN Point2D p2 ON p1.x != p2.x
OR p1.y != p2.y;

Game Scores

1841. League Statistics

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
DROP TABLE if EXISTS Teams;
DROP TABLE if EXISTS Matches;
CREATE TABLE
If NOT EXISTS Teams (team_id INT, team_name VARCHAR(20));
CREATE TABLE
If NOT EXISTS Matches (home_team_id INT, away_team_id INT, home_team_goals INT, away_team_goals INT);
TRUNCATE TABLE Teams;
INSERT INTO
Teams (team_id, team_name)
VALUES
('1', 'Ajax');
INSERT INTO
Teams (team_id, team_name)
VALUES
('4', 'Dortmund');
INSERT INTO
Teams (team_id, team_name)
VALUES
('6', 'Arsenal');
TRUNCATE TABLE Matches;
INSERT INTO
Matches (home_team_id, away_team_id, home_team_goals, away_team_goals)
VALUES
('1', '4', '0', '1');
INSERT INTO
Matches (home_team_id, away_team_id, home_team_goals, away_team_goals)
VALUES
('1', '6', '3', '3');
INSERT INTO
Matches (home_team_id, away_team_id, home_team_goals, away_team_goals)
VALUES
('4', '1', '5', '2');
INSERT INTO
Matches (home_team_id, away_team_id, home_team_goals, away_team_goals)
VALUES
('6', '1', '0', '0');
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
WITH
cte AS (
SELECT
home_team_id AS team_id,
home_team_goals AS goal_for,
away_team_goals AS goal_against,
home_team_goals - away_team_goals AS goal_diff
FROM
Matches
UNION ALL
SELECT
away_team_id AS team_id,
away_team_goals AS goal_for,
home_team_goals AS goal_against,
away_team_goals - home_team_goals AS goal_diff
FROM
Matches
)
SELECT
team_name,
COUNT(*) AS matches_played,
SUM(
CASE
WHEN goal_diff = 0 THEN 1
WHEN goal_diff > 0 THEN 3
ELSE 0
END
) AS points,
SUM(goal_for) AS goal_for,
SUM(goal_against) AS goal_against,
SUM(goal_diff) AS goal_diff
FROM
cte
INNER JOIN Teams USING (team_id)
GROUP BY
team_name
ORDER BY
points desc,
goal_diff desc,
team_name asc;

1212. Team Scores in Football Tournament

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
DROP TABLE if EXISTS Teams;
DROP TABLE if EXISTS Matches;
CREATE TABLE
If NOT EXISTS Teams (team_id INT, team_name VARCHAR(30));
CREATE TABLE
If NOT EXISTS Matches (match_id INT, host_team INT, guest_team INT, host_goals INT, guest_goals INT);
TRUNCATE TABLE Teams;
INSERT INTO
Teams (team_id, team_name)
VALUES
('10', 'Leetcode FC');
INSERT INTO
Teams (team_id, team_name)
VALUES
('20', 'NewYork FC');
INSERT INTO
Teams (team_id, team_name)
VALUES
('30', 'Atlanta FC');
INSERT INTO
Teams (team_id, team_name)
VALUES
('40', 'Chicago FC');
INSERT INTO
Teams (team_id, team_name)
VALUES
('50', 'Toronto FC');
TRUNCATE TABLE Matches;
INSERT INTO
Matches (match_id, host_team, guest_team, host_goals, guest_goals)
VALUES
('1', '10', '20', '3', '0');
INSERT INTO
Matches (match_id, host_team, guest_team, host_goals, guest_goals)
VALUES
('2', '30', '10', '2', '2');
INSERT INTO
Matches (match_id, host_team, guest_team, host_goals, guest_goals)
VALUES
('3', '10', '50', '5', '1');
INSERT INTO
Matches (match_id, host_team, guest_team, host_goals, guest_goals)
VALUES
('4', '20', '30', '1', '0');
INSERT INTO
Matches (match_id, host_team, guest_team, host_goals, guest_goals)
VALUES
('5', '50', '30', '1', '0');
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
-- A team receives three points if they win a match (i.e., Scored more goals than the opponent team).
-- A team receives one point if they draw a match (i.e., Scored the same number of goals as the opponent team).
-- A team receives no points if they lose a match (i.e., Scored fewer goals than the opponent team).
WITH
cte AS (
SELECT
host_team AS team_id,
CASE
WHEN host_goals > guest_goals THEN 3
WHEN host_goals < guest_goals THEN 0
ELSE 1
END AS num_points
FROM
Matches
UNION ALL
SELECT
guest_team AS team_id,
CASE
WHEN host_goals > guest_goals THEN 0
WHEN host_goals < guest_goals THEN 3
ELSE 1
END AS num_points
FROM
Matches
)
SELECT
team_id,
team_name,
ifnull (SUM(num_points), 0) AS num_points
FROM
Teams
LEFT JOIN cte USING (team_id)
GROUP BY
1,
2
ORDER BY
3 desc,
1 asc;

Rate

1934. Confirmation Rate

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
DROP TABLE if EXISTS Signups;
DROP TABLE if EXISTS Confirmations;
CREATE TABLE
If NOT EXISTS Signups (user_id INT, time_stamp datetime);
CREATE TABLE
If NOT EXISTS Confirmations (user_id INT, time_stamp datetime, action ENUM ('confirmed', 'timeout'));
TRUNCATE TABLE Signups;
INSERT INTO
Signups (user_id, time_stamp)
VALUES
('3', '2020-03-21 10:16:13');
INSERT INTO
Signups (user_id, time_stamp)
VALUES
('7', '2020-01-04 13:57:59');
INSERT INTO
Signups (user_id, time_stamp)
VALUES
('2', '2020-07-29 23:09:44');
INSERT INTO
Signups (user_id, time_stamp)
VALUES
('6', '2020-12-09 10:39:37');
TRUNCATE TABLE Confirmations;
INSERT INTO
Confirmations (user_id, time_stamp, action)
VALUES
('3', '2021-01-06 03:30:46', 'timeout');
INSERT INTO
Confirmations (user_id, time_stamp, action)
VALUES
('3', '2021-07-14 14:00:00', 'timeout');
INSERT INTO
Confirmations (user_id, time_stamp, action)
VALUES
('7', '2021-06-12 11:57:29', 'confirmed');
INSERT INTO
Confirmations (user_id, time_stamp, action)
VALUES
('7', '2021-06-13 12:58:28', 'confirmed');
INSERT INTO
Confirmations (user_id, time_stamp, action)
VALUES
('7', '2021-06-14 13:59:27', 'confirmed');
INSERT INTO
Confirmations (user_id, time_stamp, action)
VALUES
('2', '2021-01-22 00:00:00', 'confirmed');
INSERT INTO
Confirmations (user_id, time_stamp, action)
VALUES
('2', '2021-02-28 23:59:59', 'timeout');
1
2
3
4
5
6
7
8
SELECT
s.user_id,
ifnull (ROUND(AVG(if (action = 'confirmed', 1, 0)), 2), 0) AS confirmation_rate
FROM
Signups s
LEFT JOIN Confirmations c USING (user_id)
GROUP BY
s.user_id;

First and Last

1972. First and Last Call On the Same Day

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
DROP TABLE if EXISTS Calls;
CREATE TABLE
If NOT EXISTS Calls (caller_id INT, recipient_id INT, call_time datetime);
TRUNCATE TABLE Calls;
INSERT INTO
Calls (caller_id, recipient_id, call_time)
VALUES
("8", "4", "2021-08-24 17:46:07");
INSERT INTO
Calls (caller_id, recipient_id, call_time)
VALUES
("4", "8", "2021-08-24 19:57:13");
INSERT INTO
Calls (caller_id, recipient_id, call_time)
VALUES
("5", "1", "2021-08-11 05:28:44");
INSERT INTO
Calls (caller_id, recipient_id, call_time)
VALUES
("8", "3", "2021-08-17 04:04:15");
INSERT INTO
Calls (caller_id, recipient_id, call_time)
VALUES
("11", "3", "2021-08-17 13:07:00");
INSERT INTO
Calls (caller_id, recipient_id, call_time)
VALUES
("8", "11", "2021-08-17 22:22:22");
INSERT INTO
Calls (caller_id, recipient_id, call_time)
VALUES
("8", "3", "2021-08-17 23:22:22");
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
WITH
cte AS (
SELECT
caller_id AS a,
recipient_id AS b,
call_time
FROM
Calls
UNION ALL
SELECT
recipient_id AS a,
caller_id AS b,
call_time
FROM
Calls
),
cte1 AS (
SELECT
a,
b,
DATE (call_time) AS call_date,
DENSE_RANK() OVER (
PARTITION BY
a,
DATE (call_time)
ORDER BY
call_time
) AS call_ar,
DENSE_RANK() OVER (
PARTITION BY
a,
DATE (call_time)
ORDER BY
call_time desc
) AS call_dr
FROM
cte
)
SELECT DISTINCT
a AS user_id
FROM
cte1
WHERE
call_ar = 1
OR call_dr = 1
GROUP BY
1,
call_date
HAVING
COUNT(DISTINCT (b)) = 1;

Max

1867. Orders With Maximum Quantity Above Average

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
 TABLE if EXISTS OrdersDetails;
CREATE TABLE
If NOT EXISTS OrdersDetails (order_id INT, product_id INT, quantity INT);
TRUNCATE TABLE OrdersDetails;
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('1', '1', '12');
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('1', '2', '10');
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('1', '3', '15');
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('2', '1', '8');
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('2', '4', '4');
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('2', '5', '6');
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('3', '3', '5');
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('3', '4', '18');
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('4', '5', '2');
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('4', '6', '8');
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('5', '7', '9');
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('5', '8', '9');
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('3', '9', '20');
INSERT INTO
OrdersDetails (order_id, product_id, quantity)
VALUES
('2', '9', '4');
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
DROPwith cte AS (
SELECT
order_id,
AVG(quantity) AS avg_quantity
FROM
OrdersDetails
GROUP BY
order_id
)
SELECT DISTINCT
order_id
FROM
OrdersDetails
WHERE
quantity > (
SELECT
MAX(avg_quantity)
FROM
cte
);

Avg

1132. Reported Posts II

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
DROP TABLE if EXISTS Actions;
DROP TABLE if EXISTS Removals;
CREATE TABLE
If NOT EXISTS Actions (
user_id INT,
post_id INT,
action_date DATE,
action ENUM ('view', 'like', 'reaction', 'comment', 'report', 'share'),
extra VARCHAR(10)
);
CREATE TABLE
if NOT EXISTS Removals (post_id INT, remove_date DATE);
TRUNCATE TABLE Actions;
INSERT INTO
Actions (user_id, post_id, action_date, action, extra)
VALUES
('1', '1', '2019-07-01', 'view', 'None');
INSERT INTO
Actions (user_id, post_id, action_date, action, extra)
VALUES
('1', '1', '2019-07-01', 'like', 'None');
INSERT INTO
Actions (user_id, post_id, action_date, action, extra)
VALUES
('1', '1', '2019-07-01', 'share', 'None');
INSERT INTO
Actions (user_id, post_id, action_date, action, extra)
VALUES
('2', '2', '2019-07-04', 'view', 'None');
INSERT INTO
Actions (user_id, post_id, action_date, action, extra)
VALUES
('2', '2', '2019-07-04', 'report', 'spam');
INSERT INTO
Actions (user_id, post_id, action_date, action, extra)
VALUES
('3', '4', '2019-07-04', 'view', 'None');
INSERT INTO
Actions (user_id, post_id, action_date, action, extra)
VALUES
('3', '4', '2019-07-04', 'report', 'spam');
INSERT INTO
Actions (user_id, post_id, action_date, action, extra)
VALUES
('4', '3', '2019-07-02', 'view', 'None');
INSERT INTO
Actions (user_id, post_id, action_date, action, extra)
VALUES
('4', '3', '2019-07-02', 'report', 'spam');
INSERT INTO
Actions (user_id, post_id, action_date, action, extra)
VALUES
('5', '2', '2019-07-03', 'view', 'None');
INSERT INTO
Actions (user_id, post_id, action_date, action, extra)
VALUES
('5', '2', '2019-07-03', 'report', 'racism');
INSERT INTO
Actions (user_id, post_id, action_date, action, extra)
VALUES
('5', '5', '2019-07-03', 'view', 'None');
INSERT INTO
Actions (user_id, post_id, action_date, action, extra)
VALUES
('5', '5', '2019-07-03', 'report', 'racism');
TRUNCATE TABLE Removals;
INSERT INTO
Removals (post_id, remove_date)
VALUES
('2', '2019-07-20');
INSERT INTO
Removals (post_id, remove_date)
VALUES
('3', '2019-07-18');
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
WITH
cte AS (
SELECT
action_date,
COUNT(DISTINCT r.post_id) / COUNT(DISTINCT a.post_id) * 100 AS daily_percent
FROM
Actions a
LEFT JOIN Removals r ON a.post_id = r.post_id
WHERE
extra = 'spam'
GROUP BY
action_date
)
SELECT
ROUND(AVG(daily_percent), 2) AS average_daily_percent
FROM
cte;

1321. Restaurant Growth

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
DROP TABLE if EXISTS Customer;
CREATE TABLE
If NOT EXISTS Customer (customer_id INT, `name` VARCHAR(20), visited_on DATE, amount INT);
TRUNCATE TABLE Customer;
INSERT INTO
Customer (customer_id, name, visited_on, amount)
VALUES
('1', 'Jhon', '2019-01-01', '100');
INSERT INTO
Customer (customer_id, name, visited_on, amount)
VALUES
('2', 'Daniel', '2019-01-02', '110');
INSERT INTO
Customer (customer_id, name, visited_on, amount)
VALUES
('3', 'Jade', '2019-01-03', '120');
INSERT INTO
Customer (customer_id, name, visited_on, amount)
VALUES
('4', 'Khaled', '2019-01-04', '130');
INSERT INTO
Customer (customer_id, name, visited_on, amount)
VALUES
('5', 'Winston', '2019-01-05', '110');
INSERT INTO
Customer (customer_id, name, visited_on, amount)
VALUES
('6', 'Elvis', '2019-01-06', '140');
INSERT INTO
Customer (customer_id, name, visited_on, amount)
VALUES
('7', 'Anna', '2019-01-07', '150');
INSERT INTO
Customer (customer_id, name, visited_on, amount)
VALUES
('8', 'Maria', '2019-01-08', '80');
INSERT INTO
Customer (customer_id, name, visited_on, amount)
VALUES
('9', 'Jaze', '2019-01-09', '110');
INSERT INTO
Customer (customer_id, name, visited_on, amount)
VALUES
('1', 'Jhon', '2019-01-10', '130');
INSERT INTO
Customer (customer_id, name, visited_on, amount)
VALUES
('3', 'Jade', '2019-01-10', '150');
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
WITH
daily_amount AS (
SELECT
visited_on,
SUM(amount) AS amount
FROM
Customer
GROUP BY
1
)
SELECT
d1.visited_on,
SUM(d2.amount) AS amount,
ROUND(SUM(d2.amount / 7), 2) AS average_amount
FROM
daily_amount d1
LEFT JOIN daily_amount d2 ON datediff (d1.visited_on, d2.visited_on) BETWEEN 0 AND 6
GROUP BY
1
HAVING
COUNT(d2.visited_on) = 7
ORDER BY
1;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
WITH
daily_amount AS (
SELECT
visited_on,
SUM(amount) AS amount
FROM
Customer
GROUP BY
1
)
SELECT
d1.visited_on,
SUM(d2.amount) AS amount,
ROUND(SUM(d2.amount / 7), 2) AS average_amount
FROM
daily_amount d1,
daily_amount d2
WHERE
datediff (d1.visited_on, d2.visited_on) BETWEEN 0 AND 6
GROUP BY
1
HAVING
COUNT(d2.visited_on) = 7
ORDER BY
1;

Cumulative

579. Find Cumulative Salary of an Employee

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
DROP TABLE if EXISTS Employee;
CREATE TABLE
If NOT EXISTS Employee (id INT, MONTH INT, salary INT);
TRUNCATE TABLE Employee;
INSERT INTO
Employee (id, MONTH, salary)
VALUES
('1', '1', '20');
INSERT INTO
Employee (id, MONTH, salary)
VALUES
('2', '1', '20');
INSERT INTO
Employee (id, MONTH, salary)
VALUES
('1', '2', '30');
INSERT INTO
Employee (id, MONTH, salary)
VALUES
('2', '2', '30');
INSERT INTO
Employee (id, MONTH, salary)
VALUES
('3', '2', '40');
INSERT INTO
Employee (id, MONTH, salary)
VALUES
('1', '3', '40');
INSERT INTO
Employee (id, MONTH, salary)
VALUES
('3', '3', '60');
INSERT INTO
Employee (id, MONTH, salary)
VALUES
('1', '4', '60');
INSERT INTO
Employee (id, MONTH, salary)
VALUES
('3', '4', '70');
INSERT INTO
Employee (id, MONTH, salary)
VALUES
('1', '7', '90');
INSERT INTO
Employee (id, MONTH, salary)
VALUES
('1', '8', '90');
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
WITH
cte AS (
SELECT
id,
MONTH,
SUM(salary) OVER (
PARTITION BY
id
ORDER BY
MONTH ASC RANGE BETWEEN 2 PRECEDING
AND CURRENT ROW
) AS salary,
ROW_NUMBER() OVER (
PARTITION BY
id
ORDER BY
MONTH desc
) AS rn
FROM
Employee
)
SELECT
id,
MONTH,
salary
FROM
cte
WHERE
rn > 1;

Rolling

1321. Restaurant Growth

Rolling with rows

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
-- O(nlogn), O(n)
WITH
cte AS (
SELECT
visited_on,
SUM(amount) AS amount
FROM
Customer
GROUP BY
visited_on
),
CustomerWithAmount AS (
SELECT
visited_on,
SUM(amount) OVER (
ORDER BY
visited_on ROWS BETWEEN 6 PRECEDING
AND CURRENT ROW
) AS total_amount,
AVG(amount) OVER (
ORDER BY
visited_on ROWS BETWEEN 6 PRECEDING
AND CURRENT ROW
) AS average_amount,
COUNT(*) OVER (
ORDER BY
visited_on ROWS BETWEEN 6 PRECEDING
AND CURRENT ROW
) AS count_days
FROM
cte
)
SELECT
visited_on,
total_amount AS amount,
ROUND(average_amount, 2) AS average_amount
FROM
CustomerWithAmount
WHERE
count_days = 7
ORDER BY
visited_on;

Rolling with datetime

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
-- O(nlogn), O(n)
WITH
cte AS (
SELECT
visited_on,
SUM(amount) AS amount
FROM
Customer
GROUP BY
visited_on
),
CustomerWithAmount AS (
SELECT
visited_on,
SUM(amount) OVER (
ORDER BY
visited_on RANGE BETWEEN INTERVAL 6 DAY PRECEDING
AND CURRENT ROW
) AS total_amount,
AVG(amount) OVER (
ORDER BY
visited_on RANGE BETWEEN INTERVAL 6 DAY PRECEDING
AND CURRENT ROW
) AS average_amount,
COUNT(*) OVER (
ORDER BY
visited_on RANGE BETWEEN INTERVAL 6 DAY PRECEDING
AND CURRENT ROW
) AS count_days
FROM
cte
)
SELECT
visited_on,
total_amount AS amount,
ROUND(average_amount, 2) AS average_amount
FROM
CustomerWithAmount
WHERE
count_days = 7
ORDER BY
visited_on;

Range JOIN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
WITH
daily_amount AS (
SELECT
visited_on,
SUM(amount) AS amount
FROM
Customer
GROUP BY
1
)
SELECT
d1.visited_on,
SUM(d2.amount) AS amount,
ROUND(SUM(d2.amount / 7), 2) AS average_amount
FROM
daily_amount d1
LEFT JOIN daily_amount d2 ON datediff (d1.visited_on, d2.visited_on) BETWEEN 0 AND 6
GROUP BY
1
HAVING
COUNT(d2.visited_on) = 7
ORDER BY
1

pandas

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import pandas as pd

# O(nlogn), O(n)
def restaurant_growth(customer: pd.DataFrame) -> pd.DataFrame:
# Sort by 'visited_on'
df = customer.sort_values('visited_on')

# Sum amount
df = df.groupby(['visited_on'])['amount'].sum().reset_index()

# Calculate the 7-day moving sum and moving average
df['amount_sum'] = df['amount'].rolling(window=7).sum()
df['average_amount'] = df['amount'].rolling(window=7).mean()

# Filter rows where the 7-day window is complete
result = df[df['amount_sum'].notna()]

# Round the average amount to 2 decimal places
result['average_amount'] = result['average_amount'].round(2)

# Select and rename columns
result = result[['visited_on', 'amount_sum', 'average_amount']]
result.columns = ['visited_on', 'amount', 'average_amount']

return result

Rolling Average

Question: Assume you have a DataFrame df_engagement with columns user_id, login_date, and activity_count. Write a Python script to calculate the 7-day rolling average of activity counts for each user.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import pandas as pd

# Sample data for df_engagement
data = {
'user_id': [1, 1, 1, 2, 2, 2, 2, 3, 3, 3],
'login_date': ['2024-06-01', '2024-06-02', '2024-06-03', '2024-06-01', '2024-06-02', '2024-06-04', '2024-06-05', '2024-06-01', '2024-06-03', '2024-06-04'],
'activity_count': [5, 6, 7, 1, 3, 2, 0, 3, 4, 5]
}

# Create DataFrame
df_engagement = pd.DataFrame(data)

# Convert login_date to datetime type
df_engagement['login_date'] = pd.to_datetime(df_engagement['login_date'])

# Ensure the data is sorted by user and date
df_engagement.sort_values(by=['user_id', 'login_date'], inplace=True)

# Display the DataFrame to understand what it looks like
print(df_engagement)
1
2
3
4
5
6
7
8
9
10
# Ensure the DataFrame is sorted
df_engagement.sort_values(by=['user_id', 'login_date'], inplace=True)

# Set index as login_date for rolling calculation
df_engagement.set_index('login_date', inplace=True)

# Calculate the 7-day rolling average of activity counts
df_rolling_avg = df_engagement.groupby('user_id')['activity_count'].rolling(window='7D').mean().reset_index(name='7-day rolling average')

print(df_rolling_avg)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
-- Create a table named 'engagement'
CREATE TABLE engagement (
user_id INT,
login_date DATE,
activity_count INT
);

-- Insert sample data into the 'engagement' table
INSERT INTO engagement (user_id, login_date, activity_count) VALUES
(1, '2024-06-01', 5),
(1, '2024-06-02', 6),
(1, '2024-06-03', 7),
(2, '2024-06-01', 1),
(2, '2024-06-02', 3),
(2, '2024-06-04', 2),
(2, '2024-06-05', 0),
(3, '2024-06-01', 3),
(3, '2024-06-03', 4),
(3, '2024-06-04', 5);

-- Ensure the data is indexed properly for performance
CREATE INDEX idx_user_login ON engagement(user_id, login_date);

-- Display the data to verify insertion
SELECT * FROM engagement;
1
2
3
4
5
6
SELECT
user_id,
login_date,
activity_count,
AVG(activity_count) OVER (PARTITION BY user_id ORDER BY login_date ASC RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW) AS rolling_avg
FROM engagement;