Skip to content

Commit

Permalink
cleaned up functions, and it now processes all tables in the db
Browse files Browse the repository at this point in the history
  • Loading branch information
tannerblair committed Jun 2, 2021
1 parent 80b1e6e commit e884516
Show file tree
Hide file tree
Showing 19 changed files with 77 additions and 69 deletions.
3 changes: 1 addition & 2 deletions layout-files/2020/ABS_SUBD.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
Field Name,Datatype,Start,End,Length,Description
abs_subdv_cd,char(10),1,10,10,Abstract/Subdivision code
abs_subdv_cd,char(10),1,10,10,Abstract/Subdivision code
abs_subdv_desc,char(40),11,50,40,Abstract/Subdivision code description
3 changes: 1 addition & 2 deletions layout-files/2020/AGENT.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Field Name,Datatype,Start,End,Length,Description
agent_id,int(12),1,12,12,Agent ID
agent_id,int(12),1,12,12,Agent ID
agent_name,char(70),13,82,70,Agent Name
agent_addr_line1,char(60),83,142,60,Agent Line 1
agent_addr_line2,char(60),143,202,60,Agent Line 2
Expand Down
3 changes: 1 addition & 2 deletions layout-files/2020/APPR_HDR.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Field Name,Datatype,Start,End,Length,Description
Run Date / Time,char(16),1,16,16,Run Date and Time
Run Date / Time,char(16),1,16,16,Run Date and Time
File Description,char(40),17,56,40,Description of this file
Appraisal Year,numeric(4),57,60,4,Appraisal or Tax Year
Supplement Number,numeric(4),61,64,4,Supplement Number
Expand Down
3 changes: 1 addition & 2 deletions layout-files/2020/ARB.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Field Name,Datatype,Start,End,Length,Description
prop_id,int(12),1,12,12,Property ID
prop_id,int(12),1,12,12,Property ID
prop_val_yr,numeric(5),13,17,5,Year
geo_id,char(50),18,67,50,Geographic ID
ref_id1,char(25),68,92,25,Ref ID 1
Expand Down
11 changes: 5 additions & 6 deletions layout-files/2020/ARBITRATION.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
Field Name,Datatype,Start,End,Length,Description
prop_id,int(12),#REF!,#REF!,12,Property ID
prop_val_yr,numeric(5),#REF!,#REF!,5,Year
geo_id,char(50),#REF!,#REF!,50,Geographic ID
ref_id1,char(25),#REF!,#REF!,25,Ref ID 1
ref_id2,char(25),#REF!,#REF!,25,Ref ID 2
prop_id,int(12),1,12,12,Property ID
prop_val_yr,numeric(5),13,18,5,Year
geo_id,char(50),19,69,50,Geographic ID
ref_id1,char(25),70,95,25,Ref ID 1
ref_id2,char(25),120,145,25,Ref ID 2
3 changes: 1 addition & 2 deletions layout-files/2020/COUNTRY.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
Field Name,Datatype,Start,End,Length,Description
country_cd,char(5),1,5,5,Country Code
country_cd,char(5),1,5,5,Country Code
country_name,char(50),6,55,50,Country Name (or Description
3 changes: 1 addition & 2 deletions layout-files/2020/ENTITY.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
Field Name,Datatype,Start,End,Length,Description
entity_id,int(12),1,12,12,Entity id
entity_id,int(12),1,12,12,Entity id
entity_cd,char(5),13,17,5,Entity Code
3 changes: 1 addition & 2 deletions layout-files/2020/IMP_ATR.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Field Name,Datatype,Start,End,Length,Description
prop_id,int(12),1,12,12,Property ID
prop_id,int(12),1,12,12,Property ID
prop_val_yr,numeric(4),13,16,4,Year
imprv_id,int(12),17,28,12,Improvement ID
imprv_det_id,int(12),29,40,12,Improvement Detail ID
Expand Down
3 changes: 1 addition & 2 deletions layout-files/2020/IMP_DET.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Field Name,Datatype,Start,End,Length,Description
prop_id,int(12),1,12,12,Property ID
prop_id,int(12),1,12,12,Property ID
prop_val_yr,numeric(4),13,16,4,Year
imprv_id,int(12),17,28,12,Improvement ID
imprv_det_id,int(12),29,40,12,Improvement Detail ID
Expand Down
5 changes: 2 additions & 3 deletions layout-files/2020/IMP_INFO.csv
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
Field Name,Datatype,Start,End,Length,Description
prop_id,int(12),1,12,12,Property ID
prop_id,int(12),1,12,12,Property ID
prop_val_yr,numeric(4),13,16,4,Year
imprv_id,int(12),17,28,12,Improvement ID
imprv_type_cd,varchar(10)`,29,38,10,Improvement Type
imprv_type_cd,varchar(10),29,38,10,Improvement Type
imprv_type_desc,varchar(25),39,63,25,Improvement Description
imprv_state_cd,varchar(5),64,68,5,State Code
imprv_homesite,varchar(1),69,69,1,"""Y"" indicates Homesite improvement, ""N"" non Homesite"
Expand Down
3 changes: 1 addition & 2 deletions layout-files/2020/LAND_DET.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Field Name,Datatype,Start,End,Length,Description
prop_id,int(12),1,12,12,Property ID
prop_id,int(12),1,12,12,Property ID
prop_val_yr,numeric(4),13,16,4,Year
land_seg_id,int(12),17,28,12,Unique Identifier
land_type_cd,varcahr(10),29,38,10,Land Type Code
Expand Down
3 changes: 1 addition & 2 deletions layout-files/2020/LAWSUIT.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Field Name,Datatype,Start,End,Length,Description
prop_id,int(12),1,12,12,Property ID
prop_id,int(12),1,12,12,Property ID
prop_val_yr,numeric(5),13,17,5,Year
geo_id,char(50),18,67,50,Geographic ID
ref_id1,char(25),68,92,25,Ref ID 1
Expand Down
3 changes: 1 addition & 2 deletions layout-files/2020/MOBILE_HOME_INFO.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Field Name,Datatype,Start,End,Length,Description
prop_id,int(12),1,12,12,Property ID
prop_id,int(12),1,12,12,Property ID
prop_val_yr,numeric(4),13,16,4,Year
imprv_id,int(12),17,28,12,Improvement ID
mbl_hm_make,varchar(100),29,128,100,Make
Expand Down
3 changes: 1 addition & 2 deletions layout-files/2020/PROP_ENT.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Field Name,Datatype,Start,End,Length,Description
prop_id,int(12),1,12,12,Property Id
prop_id,int(12),1,12,12,Property Id
prop_val_yr,numeric(5),13,17,5,Appraisal or Tax Year
sup_num,int(12),18,29,12,"Supplement Version Number - 0 = represents certified data, supplemental data otherwise"
owner_id,int(12),30,41,12,Owner Id
Expand Down
3 changes: 1 addition & 2 deletions layout-files/2020/STATE_CD.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Field Name,Datatype,Start,End,Length,Description
state_cd,char(10),1,10,10,CAD State Property Tax Code
state_cd,char(10),1,10,10,CAD State Property Tax Code
state_cd_description,char(50),11,60,50,CAD State Property Tax Code Description
ptd_state_cd,char(10),61,70,10,PTD State Property Tax Code
ptd_state_cd_description,char(50),71,120,50,PTD State Property Tax Code Description
Expand Down
3 changes: 1 addition & 2 deletions layout-files/2020/TAX_DEFERRAL_INFO.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Field Name,Datatype,Start,End,Length,Description
prop_id,int(12),1,12,12,property ID
prop_id,int(12),1,12,12,property ID
owner_id,int(12),13,24,12,Owner ID - PACS Internal ID
exmpt_type_cd,char(5),25,29,5,Exempt type code
deferral_start_date,char(25),30,54,25,Tax deferral start date
Expand Down
3 changes: 1 addition & 2 deletions layout-files/2020/TOTALS.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Field Name,Datatype,Start,End,Length,Description
entity_id,int(12),1,12,12,Entity Id
entity_id,int(12),1,12,12,Entity Id
entity_cd,char(5),13,17,5,Entity Code
entity_name,char(70),18,87,70,Entity Descriptive Name
land_hstd_val,numeric(15),88,102,15,Total Land Homestead Value
Expand Down
6 changes: 0 additions & 6 deletions layout-files/2020/UDI_7_8.csv

This file was deleted.

79 changes: 55 additions & 24 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,74 @@
from pathlib import Path


def create_table_from_layout_file(conn: db.Connection, prop_layout_file: Path):
with prop_layout_file.open(encoding="utf-8-sig") as file:

def create_db_table(conn: db.Connection, layout_file: Path):
print(f"Creating table for {layout_file.stem.lower()}")
with layout_file.open(encoding="utf-8-sig") as file:
reader = csv.reader(file)
cols = []
for row in csv.reader(file):
if row[0] != 'filler':
cols.append(f"{row[0]} {row[1]}")
conn.execute(f"""CREATE TABLE IF NOT EXISTS {prop_layout_file.stem} ({', '.join(cols)});""")
key = row[0].replace('/ ', '_').replace(' ', '_')
cols.append(f"{key} {row[1]}")
exec_string = f"""CREATE TABLE IF NOT EXISTS {layout_file.stem.lower()} ({', '.join(cols)});"""
try:
conn.execute(exec_string)
except db.OperationalError as e:
print(exec_string)
raise db.OperationalError(e)


if __name__ == '__main__':
conn = db.connect('tcad2021.db')
prop_layout_file = Path("layout-files/2020/PROP.csv")
prop_file = Path("source-files/2020/PROP.TXT")
create_table_from_layout_file(conn, prop_layout_file)
def get_field_info(layout_file: Path):
fields = {}
with prop_layout_file.open(encoding="utf-8-sig") as layout_file:
reader = csv.reader(layout_file)
for row in csv.reader(layout_file):
with layout_file.open(encoding="utf-8-sig") as file:
reader = csv.reader(file)
for row in csv.reader(file):
if row[0] != 'filler':
fields[row[0]] = {}
fields[row[0]]['start'] = int(row[2])-1
fields[row[0]]['end'] = int(row[3])
key = row[0].replace('/ ', '_').replace(' ', '_')
fields[key] = {}
try:
fields[key]['start'] = int(row[2]) - 1
fields[key]['end'] = int(row[3])
except ValueError as e:
print(fields)
raise ValueError(e)

return fields


with prop_file.open() as file:
def add_rows_to_db(conn: db.Connection, layout_file: Path, data_file: Path):
print(f"Adding rows for {layout_file}")
fields = get_field_info(layout_file)
with data_file.open() as file:
lines = file.readlines()
entries = []
idx = 0
for line in lines:
entry = {}
for name, value in fields.items():
val_string = line[value['start']:value['end']].strip()
val_string = val_string.replace('"', "'")
entry[name] = f'"{val_string}"'
exec_string = f"INSERT INTO PROP ({', '.join(entry.keys())}) VALUES ({', '.join(entry.values())})"
conn.execute(exec_string)
idx += 1
if not idx % 1000:
print(idx)
conn.commit()
exec_string = f"INSERT INTO {layout_file.stem.lower()} ({', '.join(entry.keys())}) VALUES ({', '.join(entry.values())})"
try:
conn.execute(exec_string)
except db.OperationalError as e:
print(exec_string)
raise db.OperationalError(e)


def add_table(conn: db.Connection, layout_file: Path, data_file: Path):
print(f"Adding rows for {layout_file}")
create_db_table(conn, layout_file)
add_rows_to_db(conn, layout_file, data_file)


if __name__ == '__main__':
print("Starting...")
src_file_dir = Path("source-files/2020")
layout_file_dir = Path("layout-files/2020")
db2020 = db.connect('tcad-2020.db')
for file in src_file_dir.iterdir():
print(f"Processing {file}")
layout = layout_file_dir / f"{file.stem}.CSV"
add_table(db2020, layout, file)
db2020.commit()

0 comments on commit e884516

Please sign in to comment.