Skip to content

Commit

Permalink
Fixed Postgis upload bug
Browse files Browse the repository at this point in the history
  • Loading branch information
CHRISCARLON committed Oct 4, 2024
1 parent f80fe58 commit f3d3885
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 22 deletions.
Binary file added .DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
# Rust lib for GridWalk Backend

What does this lib need to do...
67 changes: 49 additions & 18 deletions src/duckdb_load/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ fn determine_file_type(file_content: &[u8]) -> io::Result<FileType> {

// Get data schema
fn query_and_print_schema(conn: &Connection) -> Result<()> {
let query = "SELECT * FROM data LIMIT 50";
let query = "SELECT * FROM data LIMIT 10";
let mut stmt = conn.prepare(query)?;
let arrow_result = stmt.query_arrow([])?;
// Get the schema
Expand All @@ -67,31 +67,59 @@ fn query_and_print_schema(conn: &Connection) -> Result<()> {
}

// Load to postgis
fn load_data_postgis(conn: &Connection) -> Result<(), Box<dyn Error>> {
fn load_data_postgis(conn: &Connection, table_name: &str) -> Result<(), Box<dyn Error>> {
// Attach PostGIS database
conn.execute(
"ATTACH 'dbname=gridwalk user=admin password=password host=localhost port=5432' AS gridwalk_db (TYPE POSTGRES)",
[],
)?;

// Drop the existing table if it exists
conn.execute("DROP TABLE IF EXISTS gridwalk_db.data_1", [])?;
// Let table name
let my_table_name = table_name;

// Create the new table structure
let create_table_query = "
CREATE TABLE gridwalk_db.data_1 AS
SELECT *,
geom::geometry AS geometry
// Drop Table
let delete_if_table_exists_query = &format!(
"
DROP TABLE IF EXISTS gridwalk_db.{};
",
my_table_name
);

conn.execute(delete_if_table_exists_query, [])?;

// Create Table
let create_table_query = &format!(
"
CREATE TABLE gridwalk_db.{} AS
SELECT *
FROM data;
";
",
my_table_name
);

conn.execute(create_table_query, [])?;

println!("Table 'data_1' created and data inserted successfully");
// Postgis Update Table
let postgis_query = &format!(
"CALL postgres_execute('gridwalk_db', '
ALTER TABLE {} ADD COLUMN geom geometry;
UPDATE {} SET geom = ST_GeomFromText(geom_wkt, 4326);
ALTER TABLE {} DROP COLUMN geom_wkt;
');",
table_name, table_name, table_name
);

conn.execute(&postgis_query, [])?;

println!(
"Table {} created and data inserted successfully",
my_table_name
);
Ok(())
}

// DuckDB file loader
fn load_file_duckdb(file_path: &str, file_type: &FileType) -> Result<()> {
fn process_file(file_path: &str, file_type: &FileType) -> Result<()> {
let conn = Connection::open_in_memory()?;
conn.execute("INSTALL spatial;", [])?;
conn.execute("LOAD spatial;", [])?;
Expand All @@ -101,19 +129,22 @@ fn load_file_duckdb(file_path: &str, file_type: &FileType) -> Result<()> {
let create_table_query = match file_type {
FileType::Geopackage | FileType::Shapefile | FileType::Geojson => {
format!(
"CREATE TABLE data AS SELECT * FROM ST_Read('{}');",
"CREATE TABLE data AS
SELECT * EXCLUDE (geom),
ST_AsText(geom) as geom_wkt
FROM ST_Read('{}');",
file_path
)
}
FileType::Excel => {
format!(
"CREATE TABLE data AS SELECT * FROM read_excel('{}');",
"CREATE TABLE data AS SELECT * FROM st_read('{}');",
file_path
)
}
FileType::Csv => {
format!(
"CREATE TABLE data AS SELECT * FROM read_csv_auto('{}');",
"CREATE TABLE data AS SELECT * FROM read_csv('{}');",
file_path
)
}
Expand All @@ -132,7 +163,7 @@ fn load_file_duckdb(file_path: &str, file_type: &FileType) -> Result<()> {
query_and_print_schema(&conn)?;

// Call to load data into postgres and handle the result
match load_data_postgis(&conn) {
match load_data_postgis(&conn, "pop_tart") {
Ok(_) => println!("Data successfully loaded into PostgreSQL"),
Err(e) => eprintln!("Error loading data into PostgreSQL: {}", e),
}
Expand All @@ -141,15 +172,15 @@ fn load_file_duckdb(file_path: &str, file_type: &FileType) -> Result<()> {
}

// Process file
pub fn process_file(file_path: &str) -> io::Result<()> {
pub fn launch_process_file(file_path: &str) -> io::Result<()> {
let mut file = File::open(file_path)?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;

let file_type = determine_file_type(&buffer)?;
println!("Detected file type: {:?}", file_type);

load_file_duckdb(file_path, &file_type).map_err(|e| {
process_file(file_path, &file_type).map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
format!("Error loading {:?} into DuckDB: {}", file_type, e),
Expand Down
4 changes: 2 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
mod duckdb_load;

fn main() -> Result<(), Box<dyn std::error::Error>> {
let file_path = "test_files/GLA_High_Street_boundaries.gpkg";
let file_path = "test_files/hotosm_twn_populated_places_points_geojson.geojson";
println!("Processing file: {}", file_path);

match duckdb_load::process_file(file_path) {
match duckdb_load::launch_process_file(file_path) {
Ok(_) => {
println!("File processed successfully.");
Ok(())
Expand Down
Binary file added test_files/london-reservoir-levels.xlsx
Binary file not shown.
4 changes: 2 additions & 2 deletions tests/duckdb_tests.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use duckdb_transformer::duckdb_load::process_file;
use duckdb_transformer::duckdb_load::launch_process_file;

#[cfg(test)]
mod tests {
Expand All @@ -13,7 +13,7 @@ mod tests {
assert!(Path::new(file_path).exists(), "Test file does not exist");

// Process the file
let result = process_file(file_path);
let result = launch_process_file(file_path);

// Check if the processing was successful
assert!(
Expand Down

0 comments on commit f3d3885

Please sign in to comment.