-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathupdate_partitions_no_unknown_table.sql
203 lines (170 loc) · 9.8 KB
/
update_partitions_no_unknown_table.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
--
-- Update_partitions - Takes a begin time, schema name, primary (parent) table name,
-- table owner, an expression which returns a date,
-- and if we want 'week'ly or 'month'ly partitions.
-- The number of created tables is returned.
-- ex: SELECT public.create_date_partitions_for_table('2010-02-01','my_schema','my_data','postgres','create_date','week',1,true,true)
--
-- Function: public.create_date_partitions_for_table(timestamp without time zone, regclass, text, interval, boolean, boolean)
-- DROP FUNCTION public.create_date_partitions_for_table(timestamp without time zone, regclass, text, interval, boolean, boolean);
CREATE OR REPLACE FUNCTION public.create_date_partitions_for_table(begin_time timestamp without time zone,
primary_table regclass,
date_expression text,
spacing interval,
fill_child_tables boolean,
truncate_parent_table boolean)
RETURNS integer AS
$BODY$
DECLARE startTime timestamp;
DECLARE endTime timestamp;
DECLARE intervalTime timestamp;
DECLARE createStmts text;
DECLARE insertStmts text;
DECLARE createTrigger text;
DECLARE fullTablename text;
DECLARE triggerName text;
DECLARE createdTables integer;
DECLARE intervalEpoch integer;
DECLARE dateFormat text;
DECLARE dateColumnName text;
DECLARE tableOwner text;
DECLARE primary_table_name text;
DECLARE schema_name text;
BEGIN
-- determine if the date_expression is a valid identifier
dateColumnName := CASE WHEN date_expression ~* '^[a-z0-9_$]+$' THEN date_expression ELSE 'date' END;
-- determine the date format for the given interval
intervalEpoch := EXTRACT(EPOCH FROM spacing);
dateFormat := CASE WHEN intervalEpoch < EXTRACT(EPOCH FROM interval '1 day') THEN 'error'
WHEN intervalEpoch < EXTRACT(EPOCH FROM interval '1 week') THEN 'YYYYDDD'
WHEN intervalEpoch < EXTRACT(EPOCH FROM interval '1 month') THEN 'IYYYIW'
WHEN intervalEpoch < EXTRACT(EPOCH FROM interval '1 year') THEN 'YYYYMM'
ELSE 'YYYY'
END;
IF dateFormat = 'error' THEN
RAISE EXCEPTION 'Interval must be greater than 1 day';
END IF;
-- get the table name, schema and owner
SELECT c.relname, n.nspname, a.rolname INTO primary_table_name, schema_name, tableOwner
FROM pg_catalog.pg_class AS c
JOIN pg_catalog.pg_namespace AS n ON c.relnamespace = n.oid
JOIN pg_catalog.pg_authid AS a ON c.relowner = a.oid
WHERE c.oid = primary_table::oid;
-- Store the incoming begin_time, and set the endTime to one month/week/day in the future
-- (this allows use of a cronjob at any time during the month/week/day to generate next month/week/day's table)
startTime := to_timestamp(to_char(begin_time, dateFormat), dateFormat);
endTime := to_timestamp(to_char(now() + spacing, dateFormat), dateFormat);
createdTables := 0;
WHILE (startTime <= endTime) LOOP
fullTablename := primary_table_name||'_'||to_char(startTime, dateFormat);
intervalTime := startTime + spacing;
-- The table creation sql statement
IF NOT EXISTS(SELECT * FROM information_schema.tables WHERE table_schema = schema_name AND table_name = fullTablename) THEN
createStmts := 'CREATE TABLE '||schema_name||'.'||fullTablename||' (
CHECK ('||date_expression||' >= '''||startTime||''' AND '||date_expression||' < '''||intervalTime||''')
) INHERITS ('||schema_name||'.'||primary_table_name||')';
-- Run the table creation
EXECUTE createStmts;
-- Set the table owner
createStmts := 'ALTER TABLE '||schema_name||'.'||fullTablename||' OWNER TO "'||tableOwner||'";';
EXECUTE createStmts;
-- Create an index on the timestamp
createStmts := 'CREATE INDEX idx_'||fullTablename||'_'||dateColumnName||' ON '||schema_name||'.'||fullTablename||' ('||date_expression||');';
EXECUTE createStmts;
RAISE NOTICE 'Child table %.% created',schema_name,fullTablename;
--if fill_child_tables is true then we fill the child table with the parent's table data that satisfies the child's table check constraint
IF (fill_child_tables) THEN
RAISE NOTICE 'Filling child table %.%',schema_name,fullTablename;
insertStmts := 'INSERT INTO '||schema_name||'.'||fullTablename||' (
SELECT * FROM '||schema_name||'.'||primary_table_name||'
WHERE '||date_expression||' >= '''||startTime||''' AND
'||date_expression||' < '''||intervalTime||'''
);';
EXECUTE insertStmts;
END IF;
-- Track how many tables we are creating (should likely be 1, except for initial run and backfilling efforts).
createdTables := createdTables+1;
END IF;
startTime := intervalTime;
END LOOP;
-- The UNDEFINED_TABLE exception is captured on child table is created 'on-the-fly' when new data arrives and
-- no partition is created to match this data criteria
createTrigger := 'CREATE OR REPLACE FUNCTION '||schema_name||'.trf_'||primary_table_name||'_insert_trigger_function()
RETURNS TRIGGER AS $$
DECLARE startTime timestamp;
DECLARE intervalTime timestamp;
DECLARE fullTablename text;
DECLARE insertStatment text;
DECLARE createTableStatment text;
DECLARE formatDate text;
BEGIN
SELECT to_char('||date_expression||','''||dateFormat||''') INTO formatDate FROM (SELECT NEW.*) AS t;
fullTablename := '''||primary_table_name||'_''||'||'formatDate;
insertStatment := ''INSERT INTO '||schema_name||'.'''||'||fullTablename||'' VALUES ($1.*)'';
BEGIN
--Try insert on appropiatte child table if exists
EXECUTE insertStatment using NEW;
--When child tables not exists, generate it on the fly
EXCEPTION WHEN UNDEFINED_TABLE THEN
startTime := to_timestamp(formatDate, '''||dateFormat||''');
intervalTime := startTime + '''||spacing||'''::interval;
createTableStatment := ''CREATE TABLE IF NOT EXISTS '||schema_name||'.''||fullTablename||'' (
CHECK ('||replace(date_expression, '''', '''''')||' >= ''''''||startTime||'''''' AND '||replace(date_expression, '''', '''''')||' < ''''''||intervalTime||'''''')
) INHERITS ('||schema_name||'.'||primary_table_name||')'';
EXECUTE createTableStatment;
createTableStatment := ''ALTER TABLE '||schema_name||'.''||fullTablename||'' OWNER TO "'||tableOwner||'";'';
EXECUTE createTableStatment;
createTableStatment := ''CREATE INDEX idx_''||fullTablename||''_'||dateColumnName||' ON '||schema_name||'.''||fullTablename||'' ('||replace(date_expression, '''', '''''')||');'';
EXECUTE createTableStatment;
--Try the insert again, now the table exists
EXECUTE insertStatment using NEW;
WHEN OTHERS THEN
RAISE EXCEPTION ''Error in trigger'';
RETURN NULL;
END;
RETURN NULL;
END;
$$
LANGUAGE plpgsql;';
EXECUTE createTrigger;
-- Create the trigger that uses the trigger function, if it isn't already created
triggerName := 'tr_'||primary_table_name||'_insert_trigger';
IF NOT EXISTS(SELECT * FROM information_schema.triggers WHERE trigger_name = triggerName) THEN
createTrigger:='CREATE TRIGGER tr_'||primary_table_name||'_insert_trigger
BEFORE INSERT ON '||schema_name||'.'||primary_table_name||'
FOR EACH ROW EXECUTE PROCEDURE '||schema_name||'.trf_'||primary_table_name||'_insert_trigger_function();';
EXECUTE createTrigger;
END IF;
-- If truncate_parent_table parameter is true, we truncate only the parent table data AS this data is in child tables
IF (truncate_parent_table) THEN
RAISE NOTICE 'Truncate ONLY parent table %.%',schema_name,primary_table_name;
insertStmts := 'TRUNCATE TABLE ONLY '||schema_name||'.'||primary_table_name||';';
EXECUTE insertStmts;
END IF;
RETURN createdTables;
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
ALTER FUNCTION public.create_date_partitions_for_table(timestamp without time zone, regclass, text, interval, boolean, boolean)
OWNER TO postgres;
COMMENT ON FUNCTION public.create_date_partitions_for_table(timestamp without time zone, regclass, text, interval, boolean, boolean) IS 'The function is created in the public schema and is owned by user postgres.
The function takes params:
begin_time - Type: timestamp - Desc: time of your earliest data. This allows for backfilling and for reducing trigger function overhead by avoiding legacy date logic.
primary_table - Type: regclass - Desc: name of the parent table. This is used to generate monthly tables ([primary_table_name]_YYYYMM) and an unknown table ([primary_table_name]_unknowns). It is also used in the trigger and trigger function names.
date_expression - Type: text - Desc: an expression that returns a date is used for check constraints and insert trigger function.
spacing - Type: interval - Desc: an interval which determines the timespan for child tables
fill_child_tables - Type: boolean - Desc: if you want to load data from parent table to each child tables.
truncate_parent_table - Type: boolean - Desc: if you want to delete table of the parent table
Considerations:
- The insert trigger function is recreated everytime you run this function.
- If child tables already exist, the function simply updates the trigger
function and moves to the next table in the series.
- This function does not raise exceptions when errant data is encountered.
The trigger captures the UNDEFINED_TABLE exception when any data that does
not have a matching child table and it automatically generates
the appropiate child table and insert the row that generated the exeception.
- The function returns the number of tables that it created.
- The fill_child_tables and truncate_parent_table must be used carefully you may
respald your parent table data before
';