diff --git a/.gitignore b/.gitignore index 2ca8c583..fbd40bf4 100644 --- a/.gitignore +++ b/.gitignore @@ -88,6 +88,6 @@ docker/benchmark/database/data dataset/TPC-H V3.0.1 dataset/SynData dataset/databases +dataset/data-importer/all-data !dataset/newyork-taxi.zip !dataset/newyork-taxi-sample.txt -`` \ No newline at end of file diff --git a/benchmark/src/test/java/com/hufudb/openhufu/benchmark/OpenHuFuSpatialCSVTest.java b/benchmark/src/test/java/com/hufudb/openhufu/benchmark/OpenHuFuSpatialCSVTest.java index 6b3f96c1..130faf27 100644 --- a/benchmark/src/test/java/com/hufudb/openhufu/benchmark/OpenHuFuSpatialCSVTest.java +++ b/benchmark/src/test/java/com/hufudb/openhufu/benchmark/OpenHuFuSpatialCSVTest.java @@ -63,7 +63,7 @@ public void printLine(ResultSet it) throws SQLException { } @Test - public void testSqlSelect() throws SQLException { + public void testSelect() throws SQLException { String sql = "select * from spatial"; try (Statement stmt = user.createStatement()) { ResultSet dataset = stmt.executeQuery(sql); @@ -78,7 +78,7 @@ public void testSqlSelect() throws SQLException { } @Test - public void testSqlSpatialDistance() throws SQLException { + public void testSpatialDistance() throws SQLException { String sql = "select Distance(S_POINT, POINT(1404050, -4762163)) from spatial"; try (Statement stmt = user.createStatement()) { ResultSet dataset = stmt.executeQuery(sql); @@ -93,7 +93,7 @@ public void testSqlSpatialDistance() throws SQLException { } @Test - public void testSqlRangeQuery() throws SQLException { + public void testRangeQuery() throws SQLException { String sql = "select * from spatial where DWithin(POINT(1404050, -4762163), S_POINT, 5)"; try (Statement stmt = user.createStatement()) { ResultSet dataset = stmt.executeQuery(sql); @@ -108,7 +108,7 @@ public void testSqlRangeQuery() throws SQLException { } @Test - public void testSqlRangeCount() throws SQLException { + public void testRangeCount() throws SQLException { String sql = "select count(*) from spatial where DWithin(POINT(1404050, -4762163), S_POINT, 5)"; try (Statement stmt = user.createStatement()) { ResultSet dataset = stmt.executeQuery(sql); @@ -123,8 +123,8 @@ public void testSqlRangeCount() throws SQLException { } @Test - public void testSqlRangeJoin() throws SQLException { - String sql = "select * from join_left s1 join spatial s2 on DWithin(s1.JL_POINT, s2.S_POINT, 500000)"; + public void testKNNQuery1() throws SQLException { + String sql = "select S_ID from spatial order by Distance(POINT(1404050, -4762163), S_POINT) asc limit 10"; try (Statement stmt = user.createStatement()) { ResultSet dataset = stmt.executeQuery(sql); long count = 0; @@ -132,14 +132,14 @@ public void testSqlRangeJoin() throws SQLException { printLine(dataset); ++count; } - assertEquals(78, count); + assertEquals(10, count); dataset.close(); } } @Test - public void testSqlKNNQuery1() throws SQLException { - String sql = "select S_ID from spatial order by Distance(POINT(1404050, -4762163), S_POINT) asc limit 10"; + public void testKNNQuery2() throws SQLException { + String sql = "select S_ID from spatial where KNN(POINT(1404050, -4762163), S_POINT, 10)"; try (Statement stmt = user.createStatement()) { ResultSet dataset = stmt.executeQuery(sql); long count = 0; @@ -153,8 +153,8 @@ public void testSqlKNNQuery1() throws SQLException { } @Test - public void testSqlKNNQuery2() throws SQLException { - String sql = "select S_ID from spatial where KNN(POINT(1404050, -4762163), S_POINT, 10)"; + public void testRangeJoin() throws SQLException { + String sql = "select * from join_left s1 join spatial s2 on DWithin(s1.JL_POINT, s2.S_POINT, 500000)"; try (Statement stmt = user.createStatement()) { ResultSet dataset = stmt.executeQuery(sql); long count = 0; @@ -162,13 +162,13 @@ public void testSqlKNNQuery2() throws SQLException { printLine(dataset); ++count; } - assertEquals(10, count); + assertEquals(78, count); dataset.close(); } } @Test - public void testSqlKNNJOIN() throws SQLException { + public void testKNNJOIN() throws SQLException { String sql = "select s1.JL_ID, s2.S_ID from join_left s1 join spatial s2 on KNN(s1.JL_POINT, s2.S_POINT, 5)"; try (Statement stmt = user.createStatement()) { ResultSet dataset = stmt.executeQuery(sql); @@ -181,76 +181,4 @@ public void testSqlKNNJOIN() throws SQLException { dataset.close(); } } - - @Test - public void testSelect() { - String tableName = SpatialTableName.SPATIAL.getName(); - LeafPlan plan = new LeafPlan(); - plan.setTableName(tableName); - plan.setSelectExps(ExpressionFactory - .createInputRef(user.getOpenHuFuTableSchema(tableName).getSchema())); - DataSet dataset = user.executeQuery(plan); - DataSetIterator it = dataset.getIterator(); - long count = 0; - while (it.next()) { - for (int i = 0; i < it.size(); i++) { - System.out.print(it.get(i) + "|"); - } - System.out.println(); - ++count; - } - assertEquals(3000, count); - dataset.close(); - } - - @Test - public void testSpatialDistance() { - String tableName = SpatialTableName.SPATIAL.getName(); - LeafPlan plan = new LeafPlan(); - plan.setTableName(tableName); - - // select Distance(S_POINT, POINT((1404050.076199729, -4762163.267865509)) from spatial; - Expression pointFunc = - ExpressionFactory.createLiteral(ColumnType.GEOMETRY, GeometryUtils.fromString("POINT(1404050.076199729 -4762163.267865509)")); - Expression distanceFunc = - ExpressionFactory.createScalarFunc(ColumnType.DOUBLE, "Distance", - ImmutableList.of(pointFunc, pointFunc)); - plan.setSelectExps(ImmutableList.of(distanceFunc)); - DataSet dataset = user.executeQuery(plan); - DataSetIterator it = dataset.getIterator(); - int count = 0; - assertEquals(1, it.size()); - while (it.next()) { - assertEquals(0.0, it.get(0)); - count++; - } - assertEquals(3000, count); - } - - @Test - public void testSpatialDWithin() { - String tableName = SpatialTableName.SPATIAL.getName(); - LeafPlan plan = new LeafPlan(); - plan.setTableName(tableName); - plan.setSelectExps( - ExpressionFactory.createInputRef(user.getOpenHuFuTableSchema(tableName).getSchema())); - // select * from spatial where DWithin(S_POINT, POINT((1404050.076199729, -4762163.267865509), 0.1); - Expression pointFunc = - ExpressionFactory.createLiteral(ColumnType.GEOMETRY, GeometryUtils.fromString("POINT(1404050.076199729 -4762163.267865509)")); - Expression dwithinFunc = - ExpressionFactory.createScalarFunc(ColumnType.BOOLEAN, "DWithin", - ImmutableList.of( - ExpressionFactory.createInputRef(1, ColumnType.GEOMETRY, Modifier.PUBLIC), - pointFunc, ExpressionFactory.createLiteral(ColumnType.DOUBLE, 0.1))); - plan.setWhereExps(ImmutableList.of(dwithinFunc)); - DataSet dataset = user.executeQuery(plan); - DataSetIterator it = dataset.getIterator(); - int count = 0; - assertEquals(2, it.size()); - while (it.next()) { - assertEquals(0L, it.get(0)); - count++; - } - assertEquals(1, count); - } } diff --git a/benchmark/src/test/java/com/hufudb/openhufu/benchmark/OpenHuFuSpatialPostgisTest.java b/benchmark/src/test/java/com/hufudb/openhufu/benchmark/OpenHuFuSpatialPostgisTest.java index 81a4e0a8..24e35692 100644 --- a/benchmark/src/test/java/com/hufudb/openhufu/benchmark/OpenHuFuSpatialPostgisTest.java +++ b/benchmark/src/test/java/com/hufudb/openhufu/benchmark/OpenHuFuSpatialPostgisTest.java @@ -31,18 +31,21 @@ import org.slf4j.LoggerFactory; public class OpenHuFuSpatialPostgisTest { + private static final Logger LOG = LoggerFactory.getLogger(OpenHuFuBenchmark.class); private static final OpenHuFuUser user = new OpenHuFuUser(); @BeforeClass public static void setUp() throws IOException { LinkedTreeMap userConfigs = new Gson().fromJson(Files.newBufferedReader( - Path.of(OpenHuFuBenchmark.class.getClassLoader().getResource("spatial-postgis-configs.json") - .getPath())), - LinkedTreeMap.class); + Path.of(OpenHuFuBenchmark.class.getClassLoader().getResource("spatial-postgis-configs.json") + .getPath())), + LinkedTreeMap.class); List endpoints = (List) userConfigs.get("owners"); - List globalTableConfigs = new Gson().fromJson(new Gson().toJson(userConfigs.get("tables")), - new TypeToken>() {}.getType()); + List globalTableConfigs = + new Gson().fromJson(new Gson().toJson(userConfigs.get("tables")), + new TypeToken>() { + }.getType()); LOG.info("Init benchmark of OpenHuFuSpatialPOSTGIS..."); for (String endpoint : endpoints) { user.addOwner(endpoint, null); @@ -62,7 +65,7 @@ public void printLine(ResultSet it) throws SQLException { } @Test - public void testSqlSelect() throws SQLException { + public void testSelect() throws SQLException { String sql = "select * from osm_a"; try (Statement stmt = user.createStatement()) { ResultSet dataset = stmt.executeQuery(sql); @@ -75,4 +78,141 @@ public void testSqlSelect() throws SQLException { dataset.close(); } } + + @Test + public void testSpatialDistance() throws SQLException { + String sql = "select id, Distance(location, POINT(0, 0)) from osm_a"; + try (Statement stmt = user.createStatement()) { + ResultSet dataset = stmt.executeQuery(sql); + long count = 0; + while (dataset.next()) { + printLine(dataset); + ++count; + } + assertEquals(400, count); + dataset.close(); + } + } + + @Test + public void testRangeQuery() throws SQLException { + String sql = "select * from osm_a where DWithin(POINT(0, 0), location, 50)"; + try (Statement stmt = user.createStatement()) { + ResultSet dataset = stmt.executeQuery(sql); + long count = 0; + while (dataset.next()) { + printLine(dataset); + ++count; + } + dataset.close(); + assertEquals(30, count); + } + } + + /* + Result: osm_a_1: 14, osm_a_2: 16, osm_a_3: 0, osm_a_4: 0 + Validation SQL: + SELECT COUNT(*) from osm_a_1 where ST_DWithin('SRID=4326;POINT (0 0)', location, 50.0) + SELECT COUNT(*) from osm_a_2 where ST_DWithin('SRID=4326;POINT (0 0)', location, 50.0) + SELECT COUNT(*) from osm_a_3 where ST_DWithin('SRID=4326;POINT (0 0)', location, 50.0) + SELECT COUNT(*) from osm_a_4 where ST_DWithin('SRID=4326;POINT (0 0)', location, 50.0) + */ + @Test + public void testRangeCount() throws SQLException { + String sql = "select count(*) from osm_a where DWithin(POINT(0, 0), location, 50)"; + try (Statement stmt = user.createStatement()) { + ResultSet dataset = stmt.executeQuery(sql); + dataset.next(); + assertEquals(30, dataset.getInt(1)); + dataset.close(); + } + } + + /* + Valication SQL: + SELECT id, location, distance + FROM ((SELECT id as id, + st_astext(location) as location, + 'SRID=4326;POINT (0 0)' <-> location as distance + FROM osm_a_1) + union + (SELECT id as id, + st_astext(location) as location, + 'SRID=4326;POINT (0 0)' <-> location as distance + FROM osm_a_2) + union + (SELECT id as id, + st_astext(location) as location, + 'SRID=4326;POINT (0 0)' <-> location as distance + FROM osm_a_3) + union + (SELECT id as id, + st_astext(location) as location, + 'SRID=4326;POINT (0 0)' <-> location as distance + FROM osm_a_4)) AS new_osm_a + ORDER BY distance + ASC + LIMIT 10 + */ + @Test + public void testKNNQuery1() throws SQLException { + String sql = + "select id, location from osm_a order by Distance(POINT(0, 0), location) asc limit 10"; + try (Statement stmt = user.createStatement()) { + ResultSet dataset = stmt.executeQuery(sql); + long count = 0; + while (dataset.next()) { + printLine(dataset); + ++count; + } + assertEquals(10, count); + dataset.close(); + } + } + + @Test + public void testKNNQuery2() throws SQLException { + String sql = "select id, location from osm_a where KNN(POINT(0, 0), location, 10)"; + try (Statement stmt = user.createStatement()) { + ResultSet dataset = stmt.executeQuery(sql); + long count = 0; + while (dataset.next()) { + printLine(dataset); + ++count; + } + assertEquals(10, count); + dataset.close(); + } + } + + @Test + public void testRangeJoin() throws SQLException { + String sql = + "select * from osm_b join osm_a on DWithin(osm_b.location, osm_a.location, 5)"; + try (Statement stmt = user.createStatement()) { + ResultSet dataset = stmt.executeQuery(sql); + long count = 0; + while (dataset.next()) { + printLine(dataset); + ++count; + } + assertEquals(220, count); + dataset.close(); + } + } + + @Test + public void testKNNJOIN() throws SQLException { + String sql = "select * from osm_b join osm_a on KNN(osm_b.location, osm_a.location, 5)"; + try (Statement stmt = user.createStatement()) { + ResultSet dataset = stmt.executeQuery(sql); + long count = 0; + while (dataset.next()) { + printLine(dataset); + ++count; + } + assertEquals(200, count); + dataset.close(); + } + } } diff --git a/core/src/main/java/com/hufudb/openhufu/core/implementor/UserSideImplementor.java b/core/src/main/java/com/hufudb/openhufu/core/implementor/UserSideImplementor.java index ae001091..0cd33713 100644 --- a/core/src/main/java/com/hufudb/openhufu/core/implementor/UserSideImplementor.java +++ b/core/src/main/java/com/hufudb/openhufu/core/implementor/UserSideImplementor.java @@ -237,7 +237,7 @@ private DataSet privacyKNN(UnaryPlan plan, boolean isUsingKNNFunc) { // if (USE_DP) { right = kNNRadiusQuery(plan) * 2; // } - double deviation = 1e-6; + double deviation = 1e-10; int loop = 0; long count = 0L; if (USE_DP) { @@ -268,11 +268,13 @@ private DataSet privacyKNN(UnaryPlan plan, boolean isUsingKNNFunc) { } else if (sign > 0) { right = mid; } else { + LOG.info("kNN radius is {}", mid); DataSet dataSet = ArrayDataSet.materialize(kNNCircleRangeQuery(plan, mid, isUsingKNNFunc)); return dataSet; } loop++; } + LOG.info("kNN radius is {}", right); return kNNCircleRangeQuery(plan, right, isUsingKNNFunc); } diff --git a/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/knn/BinarySearchKNN.java b/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/knn/BinarySearchKNN.java index 73b390d2..72fcdad3 100644 --- a/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/knn/BinarySearchKNN.java +++ b/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/knn/BinarySearchKNN.java @@ -25,8 +25,8 @@ public static Plan generateKNNRadiusQueryPlan(UnaryPlan originalPlan) { OpenHuFuPlan.Expression distance = originalLeaf.getSelectExps() .get(originalLeaf.getOrders().get(0).getRef()); leafPlan.setSelectExps(ImmutableList.of(distance)); - leafPlan.setOrders(ImmutableList.of(OpenHuFuPlan.Collation.newBuilder().setRef(0) - .setDirection(OpenHuFuPlan.Direction.ASC).build())); +// leafPlan.setOrders(ImmutableList.of(OpenHuFuPlan.Collation.newBuilder().setRef(0) +// .setDirection(OpenHuFuPlan.Direction.ASC).build())); leafPlan.setOffset(originalLeaf.getFetch() - 1); leafPlan.setFetch(1); LOG.info(leafPlan.toString()); @@ -53,7 +53,7 @@ public static Plan generatePrivacyComparePlan(UnaryPlan originalPlan, double ran leafPlan.setWhereExps(whereExps); leafPlan.setAggExps(ImmutableList.of(ExpressionFactory.createAggFunc(OpenHuFuData.ColumnType.LONG, OpenHuFuData.Modifier.PROTECTED, AggFuncType.COUNT.getId(), ImmutableList.of()))); - leafPlan.setOrders(originalLeaf.getOrders()); +// leafPlan.setOrders(originalLeaf.getOrders()); UnaryPlan unaryPlan = new UnaryPlan(leafPlan); unaryPlan.setSelectExps(ImmutableList.of(ExpressionFactory diff --git a/plan/src/main/java/com/hufudb/openhufu/expression/BasicTranslator.java b/plan/src/main/java/com/hufudb/openhufu/expression/BasicTranslator.java index 443e9cd5..ddd7fefd 100644 --- a/plan/src/main/java/com/hufudb/openhufu/expression/BasicTranslator.java +++ b/plan/src/main/java/com/hufudb/openhufu/expression/BasicTranslator.java @@ -94,6 +94,8 @@ protected String literal(Expression literal) { return String.valueOf(literal.getF64()); case STRING: return String.format("'%s'", literal.getStr()); + case GEOMETRY: + return String.format("'SRID=4326;%s'", literal.getStr()); default: throw new RuntimeException("can't translate literal " + literal); } diff --git a/release/config/spatial-postgis/spatial-postgis-owner1.json b/release/config/spatial-postgis/spatial-postgis-owner1.json index bbc3f2c8..64e9fff9 100644 --- a/release/config/spatial-postgis/spatial-postgis-owner1.json +++ b/release/config/spatial-postgis/spatial-postgis-owner1.json @@ -42,7 +42,7 @@ { "name": "location", "type": "GEOMETRY", - "modifier": "protected", + "modifier": "public", "columnId": 1 } ] diff --git a/release/config/spatial-postgis/spatial-postgis-owner2.json b/release/config/spatial-postgis/spatial-postgis-owner2.json index 672e84ae..ba8a6d19 100644 --- a/release/config/spatial-postgis/spatial-postgis-owner2.json +++ b/release/config/spatial-postgis/spatial-postgis-owner2.json @@ -42,7 +42,7 @@ { "name": "location", "type": "GEOMETRY", - "modifier": "protected", + "modifier": "public", "columnId": 1 } ] diff --git a/release/config/spatial-postgis/spatial-postgis-owner3.json b/release/config/spatial-postgis/spatial-postgis-owner3.json index 7aa5a419..18fe5789 100644 --- a/release/config/spatial-postgis/spatial-postgis-owner3.json +++ b/release/config/spatial-postgis/spatial-postgis-owner3.json @@ -1,5 +1,5 @@ { - "id": 1, + "id": 3, "port": 12347, "hostname": "localhost", "implementorconfigpath": "./config/owner.yml", @@ -42,7 +42,7 @@ { "name": "location", "type": "GEOMETRY", - "modifier": "protected", + "modifier": "public", "columnId": 1 } ] diff --git a/release/config/spatial-postgis/spatial-postgis-owner4.json b/release/config/spatial-postgis/spatial-postgis-owner4.json index 6c1d0c24..de5eeb8f 100644 --- a/release/config/spatial-postgis/spatial-postgis-owner4.json +++ b/release/config/spatial-postgis/spatial-postgis-owner4.json @@ -42,7 +42,7 @@ { "name": "location", "type": "GEOMETRY", - "modifier": "protected", + "modifier": "public", "columnId": 1 } ] diff --git a/scripts/build/package.sh b/scripts/build/package.sh index e9788c0c..c7c81c6b 100755 --- a/scripts/build/package.sh +++ b/scripts/build/package.sh @@ -20,6 +20,9 @@ elif [ $1 == "adapter" ]; then mvn install -T ${thread} -DskipTests -amd -pl $1 cp adapter/adapter-csv/target/*-with-dependencies.jar ./release/adapter/adapter_csv.jar cp adapter/adapter-postgis/target/*-with-dependencies.jar ./release/adapter/adapter_postgis.jar +elif [ $1 == "udf" ]; then + mvn install -T ${thread} -DskipTests -amd -pl $1 + cp udf/spatial-udf/target/*-with-dependencies.jar ./release/udf/scalar/spatial_udf.jar elif [ $1 == "benchmark" ]; then mvn install -T ${thread} -DskipTests -pl $1 cp benchmark/target/benchmark.jar ./release/bin/benchmark.jar diff --git a/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/DWithin.java b/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/DWithin.java index b976eab0..a78fb1dd 100644 --- a/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/DWithin.java +++ b/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/DWithin.java @@ -48,7 +48,7 @@ public Object implement(List inputs) { @Override public String translate(String dataSource, List inputs) { switch(dataSource) { - case "postgis": + case "POSTGIS": return String.format("ST_DWithin(%s, %s, %s)", inputs.get(0), inputs.get(1), inputs.get(2)); default: throw new RuntimeException("Unsupported datasource for Distance UDF"); diff --git a/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/KNN.java b/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/KNN.java index a035266e..9baca833 100644 --- a/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/KNN.java +++ b/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/KNN.java @@ -29,11 +29,6 @@ public Object implement(List inputs) { @Override public String translate(String dataSource, List inputs) { - switch (dataSource) { - case "POSTGIS": - return String.format("ORDER BY %s<->'SRID=4326;%s' limit %s", inputs.get(0), inputs.get(1), inputs.get(2)); - default: - throw new RuntimeException("Unsupported datasource for Point UDF"); - } + return ""; } } diff --git a/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/Point.java b/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/Point.java index 9530a6c1..4335a378 100644 --- a/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/Point.java +++ b/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/Point.java @@ -44,7 +44,7 @@ public Object implement(List inputs) { @Override public String translate(String dataSource, List inputs) { switch (dataSource) { - case "postgis": + case "POSTGIS": return String.format("'SRID=4326;POINT(%s %s)'", inputs.get(0), inputs.get(1)); default: throw new RuntimeException("Unsupported datasource for Point UDF");