-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathEPC_clean.sql
55 lines (44 loc) · 2.47 KB
/
EPC_clean.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# ------------------------------------------------
#EPC_clean.sql
# ------------------------------------------------
# Code provided as is and can be used or modified freely.
# ------------------------------------------------
# Author: BIN CHI
# Urban Big Data Centre at University of Glasgow
# Date: 1/4/2022
########clean os addressplus
#create geom field
SELECT AddGeometryColumn ('public', 'osadd', 'geom', 27700, 'POINT', 2);
UPDATE public.osadd SET geom = ST_GeomFromText('POINT(' || xcoordinate || ' ' || ycoordinate || ') ', 27700);
#create a new table only records the address fields for the afterwards address matching
SELECT uprn,parentuprn,postcodelocator,class,postcode,buildingname,buildingnumber,subbuildingname,paostartnumber,paostartsuffix,paoendnumber,paoendsuffix,paotext,saostartnumber,saostartsuffix,saoendnumber,saoendsuffix,saotext,streetdescription,ostopotoid,dependentlocality,locality,townname,administrativearea,posttown,geom INTO osaddc FROM osadd;
#create index on two fields(uprn and parentuprn)
CREATE UNIQUE INDEX uprn_idx ON osaddc(uprn);
CREATE INDEX puprn_idx ON osaddc(parentuprn);
#create a new table as pare recording the pareuprn which exit in uprn field
SELECT DISTINCT parentuprn INTO pare FROM osaddc WHERE parentuprn in (select uprn from osaddc);
#add a new field in pare and set the value to 1
ALTER TABLE pare ADD exit int;
update pare set exit=1;
#create unique index on parentuprn in table pare
CREATE UNIQUE INDEX puprn_idx1 ON pare(parentuprn);
#change the parentuprn field name to parentuprn1 in pare table
alter table pare rename column parentuprn to parentuprn1;
#identify uprn which is pareuprn in OS AddressBase
select * into addressgb from osaddc left join pare on osaddc.uprn=pare.parentuprn1;
#SELECT 37385807 Query returned successfully in 2 min 43 secs.
#drop the parentuprn1 field
alter table addressgb drop column parentuprn1;
#remove the uprn which is the pareuprn in OS AddressBase Plus dataset
DELETE FROM addressgb WHERE exit=1;
#delete the unuseful field(exit)
alter table addressgb drop column exit;
#create three new address fields for the afterwards address matching
ALTER TABLE addressgb ADD bb text;
update addressgb set bb=concat(buildingname,', ',buildingnumber);
ALTER TABLE addressgb ADD ss text;
update addressgb set ss=concat(saostartnumber,saostartsuffix);
ALTER TABLE addressgb ADD pp text;
update addressgb set pp=concat(paostartnumber,paostartsuffix);
#addressgb is the data used for address matching