-
Notifications
You must be signed in to change notification settings - Fork 43
/
reported-posts-ii.sql
108 lines (97 loc) · 3.83 KB
/
reported-posts-ii.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
-- 1132.Reported Posts II
-- Table: Actions
-- +---------------+---------+
-- | Column Name | Type |
-- +---------------+---------+
-- | user_id | int |
-- | post_id | int |
-- | action_date | date |
-- | action | enum |
-- | extra | varchar |
-- +---------------+---------+
-- There is no primary key for this table, it may have duplicate rows.
-- The action column is an ENUM type of ('view', 'like', 'reaction', 'comment', 'report', 'share').
-- The extra column has optional information about the action such as a reason for report or a type of reaction.
-- Table: Removals
-- +---------------+---------+
-- | Column Name | Type |
-- +---------------+---------+
-- | post_id | int |
-- | remove_date | date |
-- +---------------+---------+
-- post_id is the primary key of this table.
-- Each row in this table indicates that some post was removed as a result of being reported or as a result of an admin review.
-- Write an SQL query to find the average for daily percentage of posts that got removed after being reported as spam, rounded to 2 decimal places.
-- The query result format is in the following example:
-- Actions table:
-- +---------+---------+-------------+--------+--------+
-- | user_id | post_id | action_date | action | extra |
-- +---------+---------+-------------+--------+--------+
-- | 1 | 1 | 2019-07-01 | view | null |
-- | 1 | 1 | 2019-07-01 | like | null |
-- | 1 | 1 | 2019-07-01 | share | null |
-- | 2 | 2 | 2019-07-04 | view | null |
-- | 2 | 2 | 2019-07-04 | report | spam |
-- | 3 | 4 | 2019-07-04 | view | null |
-- | 3 | 4 | 2019-07-04 | report | spam |
-- | 4 | 3 | 2019-07-02 | view | null |
-- | 4 | 3 | 2019-07-02 | report | spam |
-- | 5 | 2 | 2019-07-03 | view | null |
-- | 5 | 2 | 2019-07-03 | report | racism |
-- | 5 | 5 | 2019-07-03 | view | null |
-- | 5 | 5 | 2019-07-03 | report | racism |
-- +---------+---------+-------------+--------+--------+
-- Removals table:
-- +---------+-------------+
-- | post_id | remove_date |
-- +---------+-------------+
-- | 2 | 2019-07-20 |
-- | 3 | 2019-07-18 |
-- +---------+-------------+
-- Result table:
-- +-----------------------+
-- | average_daily_percent |
-- +-----------------------+
-- | 75.00 |
-- +-----------------------+
-- The percentage for 2019-07-04 is 50% because only one post of two spam reported posts was removed.
-- The percentage for 2019-07-02 is 100% because one post was reported as spam and it was removed.
-- The other days had no spam reports so the average is (50 + 100) / 2 = 75%
-- Note that the output is only one number and that we do not care about the remove dates.
# V0
select round(avg(daily_count), 2) as average_daily_percent
from (
select
a.action_date,
count(distinct b.post_id)/count(distinct a.post_id)*100 as daily_count
from actions a
left join removals b
on a.post_id = b.post_id
where extra = 'spam'
group by a.action_date
) b
# V1
# https://circlecoder.com/reported-posts-II/
select round(avg(daily_count), 2) as average_daily_percent
from (
select
a.action_date,
count(distinct b.post_id)/count(distinct a.post_id)*100 as daily_count
from actions a
left join removals b
on a.post_id = b.post_id
where extra = 'spam'
group by a.action_date
) b
# V2
# Time: O(m + n)
# Space: O(n)
SELECT ROUND(AVG(removal_percent), 2) average_daily_percent
FROM
(SELECT a.action_date,
COUNT(DISTINCT r.post_id) / COUNT(DISTINCT a.post_id) * 100 removal_percent
FROM Actions a
LEFT JOIN Removals r ON a.post_id = r.post_id
WHERE a.extra = 'spam'
GROUP BY a.action_date
ORDER BY NULL) tmp