-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathquery_part1_analysis.sql
More file actions
122 lines (111 loc) · 2.56 KB
/
query_part1_analysis.sql
File metadata and controls
122 lines (111 loc) · 2.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/* Data preparation
Isolate (or group ) the transaction by each cardholder
- join transaction and credit card tables where the card number is matching.
- flag transaction that is less than $2.00 */
CREATE VIEW v_data_joined AS
SELECT t1.id AS txn_id,
t1.date,
EXTRACT(DAY FROM date) AS txn_day,
EXTRACT(MONTH FROM date) AS txn_month,
EXTRACT(HOUR FROM date) AS txn_hour,
EXTRACT(MIN FROM date) AS txn_min,
t2.cardholder_id,
t1.amount,
t3.name AS merchant_name,
t4.name AS merchant_category,
CASE
WHEN (amount < 2) THEN 1
WHEN (amount >= 2) THEN 0
END AS twoless_ind,
CASE
WHEN(amount > 500) THEN 1
WHEN(amount <=500) THEN 0
END AS large_ind
FROM transaction t1,
credit_card t2,
merchant t3,
merchant_category t4
WHERE t1.card = t2.card
AND t1.id_merchant = t3.id
AND t3.id_merchant_category = t4.id
;
--count the transactions that are less than $2.00 per cardholder
CREATE VIEW v_txn_twoless AS
SELECT
cardholder_id,
sum(twoless_ind) AS num_txn_twoless
FROM v_data_joined
GROUP BY cardholder_id
ORDER BY cardholder_id
;
--top 100 highest transactions made between 7am and 9am
CREATE VIEW v_top_100_txn_7to9am AS
SELECT txn_id,
date,
txn_hour,
txn_min,
amount,
cardholder_id,
merchant_name,
merchant_category
FROM (
( --pick up transactions made from 7am to 8am
SELECT txn_id,
date,
txn_min,
txn_hour,
amount,
cardholder_id,
merchant_name,
merchant_category
FROM V_data_joined
WHERE txn_hour IN (7,8)
)
UNION ALL
( --pick up transaction made at 9am
SELECT txn_id,
date,
txn_min,
txn_hour,
amount,
cardholder_id,
merchant_name,
merchant_category
FROM V_data_joined
WHERE txn_hour = 9
AND txn_min = 0
)
) f
ORDER BY amount DESC
LIMIT 100
;
--additional analysis
--all transaction between 7am and 9am
select *
from v_top_100_txn_7to9am
ORDER BY amount DESC
;
--transaction by hour
SELECT txn_hour,
sum(twoless_ind) small_txn,
sum(large_ind) fraud_txn_suspect
FROM v_data_joined
GROUP BY txn_hour
ORDER BY txn_hour ASC
;
--top 5 merchants prone to being hacked
CREATE VIEW v_top5_merchant AS
SELECT merchant_name,
merchant_category,
small_txn_cnt,
large_txn_cnt
FROM (SELECT merchant_name,
merchant_category,
sum(twoless_ind) AS small_txn_cnt,
sum(large_ind) AS large_txn_cnt
FROM v_data_joined
GROUP BY merchant_name, merchant_category
)f
ORDER BY small_txn_cnt DESC, large_txn_cnt DESC
LIMIT 5
;