-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path11.time-series-partitioning.sql
More file actions
240 lines (197 loc) · 7.64 KB
/
11.time-series-partitioning.sql
File metadata and controls
240 lines (197 loc) · 7.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
-- =====================================================
-- SENSOR METRICS Reset and introduce partitioning
-- =====================================================
DROP TABLE IF EXISTS sensor_metrics;
-- =====================================================
-- SENSOR METRICS TABLE (RAW TIME-SERIES HEAP TABLE)
-- Demonstrates: Unoptimized append-only time-series storage
-- NO indexes, NO partitioning
-- =====================================================
CREATE TABLE sensor_metrics
(
sensor_id INT REFERENCES sensors (sensor_id),
ts TIMESTAMP,
temperature DOUBLE PRECISION,
cpu_usage DOUBLE PRECISION,
status TEXT
) PARTITION BY RANGE (ts);
-- =====================================================
-- WEEKLY PARTITIONS (2026-01-01 → 2026-03-31)
-- Demonstrates: Fixed weekly partitioning strategy
-- =====================================================
DO
$$
DECLARE
start_date DATE := DATE '2026-01-01';
end_date DATE := DATE '2026-04-01'; -- end boundary (exclusive)
week_start DATE;
week_end DATE;
partition_name TEXT;
BEGIN
week_start := start_date;
WHILE week_start < end_date
LOOP
week_end := week_start + INTERVAL '7 days';
partition_name := format(
'sensor_metrics_%s_%s',
to_char(week_start, 'YYYYMMDD'),
to_char(week_end, 'YYYYMMDD')
);
EXECUTE format(
'CREATE TABLE %I PARTITION OF sensor_metrics FOR VALUES FROM (%L) TO (%L);',
partition_name,
week_start,
week_end
);
week_start := week_end;
END LOOP;
END
$$;
-- =====================================================
-- DATA GENERATION (VARIABLE EVENT STREAM)
-- Demonstrates: Irregular time intervals (1ms - 1000ms)
-- Adjustable dataset size via parameter n
-- =====================================================
WITH RECURSIVE params AS (SELECT 10000000 AS n),
base AS (SELECT i,
(random() * 1000)::int AS ms,
(1 + floor(random() * 200))::int AS sensor_id
FROM generate_series(1, (SELECT n FROM params)) i),
timeline AS (SELECT i,
sensor_id,
ms,
SUM(ms) OVER (ORDER BY i) AS cum_ms
FROM base),
final AS (SELECT sensor_id,
(TIMESTAMP '2026-01-01 00:00:00'
+ (cum_ms * INTERVAL '1 millisecond')) AS ts,
20 + random() * 15 AS temperature,
random() * 100 AS cpu_usage,
CASE
WHEN random() > 0.97 THEN 'FAIL'
ELSE 'OK'
END AS status
FROM timeline)
INSERT
INTO sensor_metrics (sensor_id, ts, temperature, cpu_usage, status)
SELECT sensor_id, ts, temperature, cpu_usage, status
FROM final;
-- =====================================================
-- BRIN INDEX ON TIME COLUMN
-- Demonstrates: Efficient storage-aware index for large
-- sequential time-series data
-- =====================================================
CREATE INDEX idx_sensor_metrics_ts_brin
ON sensor_metrics
USING BRIN (ts);
-- =====================================================
-- B-TREE INDEX FOR SENSOR + TIME LOOKUPS
-- Demonstrates: Fast point queries and range filtering
-- on high-selectivity dimensions
-- =====================================================
CREATE INDEX idx_sensor_metrics_sensor_ts
ON sensor_metrics (sensor_id, ts DESC);
-- =====================================================
-- QUERY: CROSS-WEEK RANGE SCAN (MARCH)
-- Demonstrates: Multi-partition pruning
-- =====================================================
SELECT *
FROM sensor_metrics
WHERE ts BETWEEN '2026-02-10' AND '2026-02-18';
-- =====================================================
-- QUERY: FULL MARCH DATA
-- Demonstrates: Multiple partitions scanned efficiently
-- =====================================================
SELECT *
FROM sensor_metrics
WHERE ts >= '2026-02-01'
AND ts < '2026-03-01';
-- =====================================================
-- QUERY: SENSOR ACTIVITY IN MARCH
-- Demonstrates: Composite filtering with partition pruning
-- =====================================================
SELECT *
FROM sensor_metrics
WHERE sensor_id = 42
AND ts >= '2026-03-01'
AND ts < '2026-04-01';
-- =====================================================
-- QUERY: LATEST EVENTS
-- Demonstrates: Access mostly last partitions only
-- =====================================================
SELECT *
FROM sensor_metrics
ORDER BY ts DESC
LIMIT 1000;
-- =====================================================
-- QUERY: WEEKLY AGGREGATION
-- Demonstrates: Cross-partition aggregation cost
-- =====================================================
SELECT date_trunc('week', ts) AS week_bucket,
avg(cpu_usage)
FROM sensor_metrics
GROUP BY week_bucket
ORDER BY week_bucket;
-- =====================================================
-- INDEXED QUERY: SENSOR + TIME RANGE LOOKUP
-- Demonstrates: Uses composite B-tree index
-- (sensor_id, ts) for fast pruning + lookup
-- =====================================================
SELECT *
FROM sensor_metrics
WHERE sensor_id = 42
AND ts >= '2026-02-01'
AND ts < '2026-02-08';
-- =====================================================
-- INDEXED QUERY: LATEST SENSOR READINGS
-- Demonstrates: Index-backed ORDER BY + LIMIT (top-N)
-- Avoids full sort using (sensor_id, ts DESC) index
-- =====================================================
SELECT *
FROM sensor_metrics
WHERE sensor_id = 42
ORDER BY ts DESC
LIMIT 50;
-- =====================================================
-- INDEXED QUERY: HOT SENSOR DETECTION
-- Demonstrates: Selective filter benefits from B-tree
-- + partition pruning on ts
-- =====================================================
SELECT *
FROM sensor_metrics
WHERE sensor_id = 17
AND status = 'FAIL'
AND ts >= now() - interval '2 months 20 days';
-- =====================================================
-- INDEXED QUERY: RECENT SYSTEM FAILURES (ALL SENSORS)
-- Demonstrates: BRIN index on ts enables efficient
-- scanning of large time-range without full table scan
-- =====================================================
SELECT *
FROM sensor_metrics
WHERE status = 'FAIL'
AND ts >= now() - interval '2 months 20 days';
-- =====================================================
-- INDEXED QUERY: SENSOR ACTIVITY DISTRIBUTION
-- Demonstrates: Index-assisted aggregation pre-filtering
-- (reduces scanned rows via sensor_id index)
-- =====================================================
SELECT sensor_id,
count(*) AS events,
avg(cpu_usage) AS avg_cpu
FROM sensor_metrics
WHERE sensor_id BETWEEN 1 AND 50
AND ts >= '2026-02-01'
AND ts < '2026-03-01'
GROUP BY sensor_id;
-- =====================================================
-- INDEXED QUERY: STATUS BREAKDOWN FOR A SENSOR
-- Demonstrates: Filtered scan using composite index
-- =====================================================
SELECT status,
count(*)
FROM sensor_metrics
WHERE sensor_id = 10
AND ts >= '2026-02-01'
AND ts < '2026-03-01'
GROUP BY status;