Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix to netcdf read process #460

Merged
merged 28 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
c0bc079
negated
sllynn Nov 20, 2023
741d656
updated python raster test for new isEmpty definition
sllynn Nov 21, 2023
2a1735b
added expressions for separating bands into rows and updating spatial…
sllynn Nov 24, 2023
879e44c
Merge remote-tracking branch 'origin/feature/remove_vsimem' into scal…
sllynn Nov 24, 2023
f278f0f
merged in vsimem changes
sllynn Nov 24, 2023
7070136
changes to make tessellate work for netCDF
sllynn Nov 24, 2023
64dcac3
Merge remote-tracking branch 'origin/feature/remove_vsimem' into scal…
sllynn Nov 24, 2023
015965b
small fixes
sllynn Nov 24, 2023
2e54548
update python module and tests
sllynn Nov 27, 2023
96044c7
alternative isEmpty method across bands
sllynn Dec 18, 2023
05c4ca6
removed `dstalpha` arg from warp parameters
sllynn Dec 22, 2023
41eea37
updated polyfill logic and removed sequence ID from MosaicRasterTile …
sllynn Jan 3, 2024
d2307c8
updated logic for computing radius for buffering in H3 polyfill
sllynn Jan 3, 2024
28cbd79
Merge branch 'main' into scala/fix/netcdf
sllynn Jan 3, 2024
4b94994
removed reference to seqNo in MosaicRasterTile
sllynn Jan 3, 2024
3cc49fc
minor changes, formatting, fixed up netcdf python test
sllynn Jan 30, 2024
7cfcfce
Merge remote-tracking branch 'origin/main' into scala/fix/netcdf
sllynn Jan 30, 2024
deab4ae
merged in main
sllynn Jan 30, 2024
2f177fd
fix r tests
sllynn Jan 30, 2024
a99fb37
added GDAL extension to R package
sllynn Jan 31, 2024
a5998b7
added indexing tests back
sllynn Jan 31, 2024
0d49847
added raster tests to sparklyr bindings
sllynn Feb 2, 2024
9596791
added raster expression tests for SparkR bindings
sllynn Feb 2, 2024
279d88e
small fixes to DESCRIPTION files
sllynn Feb 2, 2024
88a1a46
bring R workflow action in line with python equivalent
sllynn Feb 2, 2024
9043e03
Merge pull request #528 from databrickslabs/r/fix/gdal
Feb 13, 2024
6a79482
updated logic for checking empty band to exclude masked areas
sllynn Feb 13, 2024
32d6f76
Fix raster tests in R.
Feb 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ source("data.R")
test_that("scalar vector functions behave as intended", {
sdf <- SparkR::createDataFrame(
data.frame(
wkt = "POLYGON ((0 0, 0 2, 1 2, 1 0, 0 0))",
wkt = "POLYGON ((2 1, 1 2, 2 3, 2 1))",
point_wkt = "POINT (1 1)"
)
)
Expand Down
3 changes: 2 additions & 1 deletion R/sparkR-mosaic/tests.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
library(testthat)

spark_location <- Sys.getenv("SPARK_HOME")
lib_location <- Sys.getenv("MOSAIC_LIB_PATH")
library(SparkR, lib.loc = c(file.path(spark_location, "R", "lib")))
.libPaths(c(file.path(spark_location, "R", "lib"), .libPaths()))

Expand All @@ -12,7 +13,7 @@ install.packages(package_file, repos=NULL)
library(sparkrMosaic)

# find the mosaic jar in staging
staging_dir <- "/home/runner/work/mosaic/mosaic/staging/"
staging_dir <- if (lib_location == "") {"/home/runner/work/mosaic/mosaic/staging/"} else {lib_location}
mosaic_jar <- list.files(staging_dir)
mosaic_jar <- mosaic_jar[grep("jar-with-dependencies.jar", mosaic_jar, fixed=T)]
print("Looking for mosaic jar in")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ test_that("scalar vector functions behave as intended", {
sdf <- sdf_copy_to(
sc,
data.frame(
wkt = "POLYGON ((0 0, 0 2, 1 2, 1 0, 0 0))",
wkt = "POLYGON ((2 1, 1 2, 2 3, 2 1))",
point_wkt = "POINT (1 1)"
)
)
Expand Down
32 changes: 30 additions & 2 deletions python/mosaic/api/aggregators.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,29 @@ def st_intersects_agg(leftIndex: ColumnOrName, rightIndex: ColumnOrName) -> Colu
)


def st_intersects_agg(leftIndex: ColumnOrName, rightIndex: ColumnOrName) -> Column:
"""
Tests if any `leftIndex` : `rightIndex` pairs intersect.

Parameters
----------
leftIndex : Column
The index field of the left-hand geometry
rightIndex : Column
The index field of the right-hand geometry

Returns
-------
Column (BooleanType)

"""
return config.mosaic_context.invoke_function(
"st_intersects_agg",
pyspark_to_java_column(leftIndex),
pyspark_to_java_column(rightIndex),
)


def st_union_agg(geom: ColumnOrName) -> Column:
"""
Returns the point set union of the aggregated geometries.
Expand Down Expand Up @@ -161,7 +184,9 @@ def rst_combineavg_agg(raster_tile: ColumnOrName) -> Column:
)


def rst_derivedband_agg(raster_tile: ColumnOrName, python_func: ColumnOrName, func_name: ColumnOrName) -> Column:
def rst_derivedband_agg(
raster_tile: ColumnOrName, python_func: ColumnOrName, func_name: ColumnOrName
) -> Column:
"""
Returns the raster tile representing the aggregation of rasters using provided python function.

Expand All @@ -182,5 +207,8 @@ def rst_derivedband_agg(raster_tile: ColumnOrName, python_func: ColumnOrName, fu

"""
return config.mosaic_context.invoke_function(
"rst_derivedband_agg", pyspark_to_java_column(raster_tile), pyspark_to_java_column(python_func), pyspark_to_java_column(func_name)
"rst_derivedband_agg",
pyspark_to_java_column(raster_tile),
pyspark_to_java_column(python_func),
pyspark_to_java_column(func_name),
)
23 changes: 13 additions & 10 deletions python/mosaic/api/enable.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@


def enable_mosaic(
spark: SparkSession, dbutils = None, log_info: bool = False,
jar_path: str = None, jar_autoattach: bool = True
spark: SparkSession,
dbutils=None,
log_info: bool = False,
jar_path: str = None,
jar_autoattach: bool = True,
) -> None:
"""
Enable Mosaic functions.
Expand All @@ -30,20 +33,20 @@ def enable_mosaic(
Logging cannot be adjusted with Unity Catalog Shared Access clusters;
if you try to do so, will throw a Py4JSecurityException.
- True will try to setLogLevel to 'info'
- False will not; Default is False
- False will not; Default is False
jar_path : str
Convenience when you need to change the JAR path for Unity Catalog
Volumes with Shared Access clusters
- Default is None; if provided, sets
"spark.databricks.labs.mosaic.jar.path"
- Default is None; if provided, sets
"spark.databricks.labs.mosaic.jar.path"
jar_autoattach : bool
Convenience when you need to turn off JAR auto-attach for Unity
Catalog Volumes with Shared Access clusters.
Catalog Volumes with Shared Access clusters.
- False will not registers the JAR; sets
"spark.databricks.labs.mosaic.jar.autoattach" to "false"
- True will register the JAR; Default is True


Returns
-------

Expand All @@ -62,7 +65,7 @@ def enable_mosaic(
Explicitly specify the index system to use for optimized spatial joins. (Optional)

"""
# Set spark session, conditionally:
# Set spark session, conditionally:
# - set conf for jar autoattach
# - set conf for jar path
# - set log level to 'info'
Expand All @@ -73,9 +76,9 @@ def enable_mosaic(
spark.conf.set("spark.databricks.labs.mosaic.jar.path", jar_path)
print(f"...set 'spark.databricks.labs.mosaic.jar.path' to '{jar_path}'")
if log_info:
spark.sparkContext.setLogLevel('info')
spark.sparkContext.setLogLevel("info")
config.mosaic_spark = spark
_ = MosaicLibraryHandler(config.mosaic_spark, log_info = log_info)
_ = MosaicLibraryHandler(config.mosaic_spark, log_info=log_info)
config.mosaic_context = MosaicContext(config.mosaic_spark)

# Register SQL functions
Expand Down
34 changes: 18 additions & 16 deletions python/mosaic/api/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ def st_convexhull(geom: ColumnOrName) -> Column:
)


def st_concavehull(geom: ColumnOrName, concavity: ColumnOrName, has_holes: Any = False) -> Column:
def st_concavehull(
geom: ColumnOrName, concavity: ColumnOrName, has_holes: Any = False
) -> Column:
"""
Compute the concave hull of a geometry or multi-geometry object.
It uses lengthRatio and
Expand Down Expand Up @@ -190,7 +192,7 @@ def st_concavehull(geom: ColumnOrName, concavity: ColumnOrName, has_holes: Any =
"st_concavehull",
pyspark_to_java_column(geom),
pyspark_to_java_column(concavity),
pyspark_to_java_column(has_holes)
pyspark_to_java_column(has_holes),
)


Expand All @@ -217,7 +219,7 @@ def st_buffer(geom: ColumnOrName, radius: ColumnOrName) -> Column:


def st_bufferloop(
geom: ColumnOrName, inner_radius: ColumnOrName, outer_radius: ColumnOrName
geom: ColumnOrName, inner_radius: ColumnOrName, outer_radius: ColumnOrName
) -> Column:
"""
Compute the buffered geometry loop (hollow ring) based on geom and provided radius-es.
Expand Down Expand Up @@ -363,7 +365,7 @@ def st_transform(geom: ColumnOrName, srid: ColumnOrName) -> Column:


def st_hasvalidcoordinates(
geom: ColumnOrName, crs: ColumnOrName, which: ColumnOrName
geom: ColumnOrName, crs: ColumnOrName, which: ColumnOrName
) -> Column:
"""
Checks if all points in geometry are valid with respect to crs bounds.
Expand Down Expand Up @@ -570,7 +572,7 @@ def st_distance(geom1: ColumnOrName, geom2: ColumnOrName) -> Column:


def st_haversine(
lat1: ColumnOrName, lng1: ColumnOrName, lat2: ColumnOrName, lng2: ColumnOrName
lat1: ColumnOrName, lng1: ColumnOrName, lat2: ColumnOrName, lng2: ColumnOrName
) -> Column:
"""
Compute the haversine distance in kilometers between two latitude/longitude pairs.
Expand Down Expand Up @@ -722,7 +724,7 @@ def st_unaryunion(geom: ColumnOrName) -> Column:


def st_updatesrid(
geom: ColumnOrName, srcSRID: ColumnOrName, destSRID: ColumnOrName
geom: ColumnOrName, srcSRID: ColumnOrName, destSRID: ColumnOrName
) -> Column:
"""
Updates the SRID of the input geometry `geom` from `srcSRID` to `destSRID`.
Expand Down Expand Up @@ -991,7 +993,7 @@ def grid_boundary(index_id: ColumnOrName, format_name: ColumnOrName) -> Column:


def grid_longlatascellid(
lon: ColumnOrName, lat: ColumnOrName, resolution: ColumnOrName
lon: ColumnOrName, lat: ColumnOrName, resolution: ColumnOrName
) -> Column:
"""
Returns the grid's cell ID associated with the input `lng` and `lat` coordinates at a given grid `resolution`.
Expand Down Expand Up @@ -1059,7 +1061,7 @@ def grid_polyfill(geom: ColumnOrName, resolution: ColumnOrName) -> Column:


def grid_tessellate(
geom: ColumnOrName, resolution: ColumnOrName, keep_core_geometries: Any = True
geom: ColumnOrName, resolution: ColumnOrName, keep_core_geometries: Any = True
) -> Column:
"""
Generates:
Expand Down Expand Up @@ -1094,7 +1096,7 @@ def grid_tessellate(


def grid_tessellateexplode(
geom: ColumnOrName, resolution: ColumnOrName, keep_core_geometries: Any = True
geom: ColumnOrName, resolution: ColumnOrName, keep_core_geometries: Any = True
) -> Column:
"""
Generates:
Expand Down Expand Up @@ -1254,7 +1256,7 @@ def grid_cellkloopexplode(cellid: ColumnOrName, k: ColumnOrName) -> Column:


def grid_geometrykring(
geom: ColumnOrName, resolution: ColumnOrName, k: ColumnOrName
geom: ColumnOrName, resolution: ColumnOrName, k: ColumnOrName
) -> Column:
"""
Returns the k-ring of cells around the input geometry.
Expand All @@ -1279,7 +1281,7 @@ def grid_geometrykring(


def grid_geometrykloop(
geom: ColumnOrName, resolution: ColumnOrName, k: ColumnOrName
geom: ColumnOrName, resolution: ColumnOrName, k: ColumnOrName
) -> Column:
"""
Returns the k loop (hollow ring) of cells around the input geometry.
Expand All @@ -1304,7 +1306,7 @@ def grid_geometrykloop(


def grid_geometrykringexplode(
geom: ColumnOrName, resolution: ColumnOrName, k: ColumnOrName
geom: ColumnOrName, resolution: ColumnOrName, k: ColumnOrName
) -> Column:
"""
Returns the exploded k-ring of cells around the input geometry.
Expand All @@ -1329,7 +1331,7 @@ def grid_geometrykringexplode(


def grid_geometrykloopexplode(
geom: ColumnOrName, resolution: ColumnOrName, k: ColumnOrName
geom: ColumnOrName, resolution: ColumnOrName, k: ColumnOrName
) -> Column:
"""
Returns the exploded k loop (hollow ring) of cells around the input geometry.
Expand Down Expand Up @@ -1376,7 +1378,7 @@ def point_index_geom(geom: ColumnOrName, resolution: ColumnOrName) -> Column:


def point_index_lonlat(
lon: ColumnOrName, lat: ColumnOrName, resolution: ColumnOrName
lon: ColumnOrName, lat: ColumnOrName, resolution: ColumnOrName
) -> Column:
"""
[Deprecated] alias for `grid_longlatascellid`
Expand Down Expand Up @@ -1433,7 +1435,7 @@ def polyfill(geom: ColumnOrName, resolution: ColumnOrName) -> Column:


def mosaic_explode(
geom: ColumnOrName, resolution: ColumnOrName, keep_core_geometries: Any = True
geom: ColumnOrName, resolution: ColumnOrName, keep_core_geometries: Any = True
) -> Column:
"""
[Deprecated] alias for `grid_tessellateexplode`
Expand Down Expand Up @@ -1468,7 +1470,7 @@ def mosaic_explode(


def mosaicfill(
geom: ColumnOrName, resolution: ColumnOrName, keep_core_geometries: Any = True
geom: ColumnOrName, resolution: ColumnOrName, keep_core_geometries: Any = True
) -> Column:
"""
[Deprecated] alias for `grid_tessellate`
Expand Down
Loading
Loading