From fe6cdf810dc51f5e2b48e287d1a33001074598fa Mon Sep 17 00:00:00 2001 From: raychew Date: Thu, 23 May 2024 18:51:45 +0200 Subject: [PATCH] this is an interim commit with parallelisation switched off so the code runs on most cases now, except grid cell indices 0 and 1 on the ICON R2B4 grid. Related to these issues, I anticipate I/O errors across the W180-E180 boundary. --- inputs/icon_regional_run.py | 2 +- runs/icon_merit_global.py | 26 ++++++------ src/io.py | 83 +++++++++++++++++++++++++++++-------- src/utils.py | 4 +- 4 files changed, 81 insertions(+), 34 deletions(-) diff --git a/inputs/icon_regional_run.py b/inputs/icon_regional_run.py index 5589946..132f14e 100644 --- a/inputs/icon_regional_run.py +++ b/inputs/icon_regional_run.py @@ -23,7 +23,7 @@ params.tri_set = [13, 104, 105, 106] -params.merit_cg = 20 +params.merit_cg = 50 # Setup the Fourier parameters and object. params.nhi = 24 diff --git a/runs/icon_merit_global.py b/runs/icon_merit_global.py index be520b7..412374b 100644 --- a/runs/icon_merit_global.py +++ b/runs/icon_merit_global.py @@ -38,8 +38,8 @@ def do_cell(c_idx, lat_verts = grid.clat_vertices[c_idx] lon_verts = grid.clon_vertices[c_idx] - lat_extent = [lat_verts.min() - 1.0,lat_verts.min() - 1.0,lat_verts.max() + 1.0] - lon_extent = [lon_verts.min() - 1.0,lon_verts.min() - 1.0,lon_verts.max() + 1.0] + lat_extent = [lat_verts.min() - 0.0,lat_verts.min() - 0.0,lat_verts.max() + 0.0] + lon_extent = [lon_verts.min() - 0.0,lon_verts.min() - 0.0,lon_verts.max() + 0.0] # we only keep the topography that is inside this lat-lon extent. lat_verts = np.array(lat_extent) lon_verts = np.array(lon_extent) @@ -99,9 +99,9 @@ def do_cell(c_idx, simplex_lat = tri.tri_lat_verts[tri_idx] simplex_lon = tri.tri_lon_verts[tri_idx] - if utils.is_land(cell, simplex_lat, simplex_lon, topo): + if not utils.is_land(cell, simplex_lat, simplex_lon, topo): # writer.output(c_idx, clat_rad[c_idx], clon_rad[c_idx], 0) - print("--> skipping land cell") + print("--> skipping ocean cell") return writer.grp_struct(c_idx, clat_rad[c_idx], clon_rad[c_idx], 0) else: is_land = 1 @@ -177,7 +177,7 @@ def parallel_wrapper(grid, params, reader, writer): # autoreload() from pycsam.inputs.icon_regional_run import params -# from dask.distributed import Client, progress +# from dask.distributed import Client # import dask # dask.config.set(scheduler='synchronous') @@ -186,18 +186,16 @@ def parallel_wrapper(grid, params, reader, writer): if params.self_test(): params.print() - print(params.path_compact_topo) - grid = var.grid() # read grid reader = io.ncdata(padding=params.padding, padding_tol=(60 - params.padding)) + # reader.read_dat(params.path_compact_grid, grid) + reader.read_dat(params.path_icon_grid, grid) # writer object writer = io.nc_writer(params) - reader.read_dat(params.path_compact_grid, grid) - clat_rad = np.copy(grid.clat) clon_rad = np.copy(grid.clon) @@ -209,18 +207,18 @@ def parallel_wrapper(grid, params, reader, writer): pw_run = parallel_wrapper(grid, params, reader, writer) - # client = Client(threads_per_worker=1, n_workers=1) + # NetCDF-4 reader does not work well with multithreading + # Use only 1 thread per worker! (At least on my laptop) + # client = Client(threads_per_worker=1, n_workers=8) - lazy_results = [] + # lazy_results = [] - for c_idx in range(n_cells)[47:48]: + for c_idx in range(n_cells): pw_run(c_idx) # lazy_result = dask.delayed(pw_run)(c_idx) # lazy_results.append(lazy_result) # results = dask.compute(*lazy_results) - # merit_reader.close_all() - # for item in results: # writer.duplicate(item.c_idx, item) diff --git a/src/io.py b/src/io.py index 3cf0448..64d9457 100644 --- a/src/io.py +++ b/src/io.py @@ -285,6 +285,8 @@ def __load_topo(self, cell, fns, dirs, lon_cnt, lat_cnt, init=True, populate=Tru self.__load_topo(cell, fns, dirs, lon_cnt, lat_cnt, init=False, populate=False) if not populate: + n_col = 0 + n_row = 0 nc_lon = 0 nc_lat = 0 else: @@ -295,32 +297,64 @@ def __load_topo(self, cell, fns, dirs, lon_cnt, lat_cnt, init=True, populate=Tru cell.lat = [] cell.lon = [] + ### Handles the case where a cell spans four topographic datasets + cnt_lat = 0 + cnt_lon = 0 + lat_low_old = np.ones((len(fns))) * np.inf + lat_high_old = np.ones((len(fns))) * np.inf + lon_low_old = np.ones((len(fns))) * np.inf + lon_high_old = np.ones((len(fns))) * np.inf + lat_nc_change, lon_nc_change = False, False + for cnt, fn in enumerate(fns): - try: - test.isopen() - except: - test = nc.Dataset(dirs[cnt] + fn, "r") - self.opened_dfs.append(test) + # try: + # test.isopen() + # except: + test = nc.Dataset(dirs[cnt] + fn, "r") + self.opened_dfs.append(test) lat = test["lat"] - lat_min_idx = np.argmin(np.abs(lat - self.lat_verts.min())) - lat_max_idx = np.argmin(np.abs(lat - self.lat_verts.max())) + lat_min_idx = np.argmin(np.abs((lat - np.sign(lat) * 1e-4) - self.lat_verts.min())) + lat_max_idx = np.argmin(np.abs((lat + np.sign(lat) * 1e-4) - self.lat_verts.max())) lat_high = np.max((lat_min_idx, lat_max_idx)) lat_low = np.min((lat_min_idx, lat_max_idx)) lon = test["lon"] - lon_min_idx = np.argmin(np.abs(lon - (self.lon_verts.min()))) - lon_max_idx = np.argmin(np.abs(lon - (self.lon_verts.max()))) + lon_min_idx = np.argmin(np.abs((lon - np.sign(lon) * 1e-4) - (self.lon_verts.min()))) + lon_max_idx = np.argmin(np.abs((lon + np.sign(lon) * 1e-4) - (self.lon_verts.max()))) lon_high = np.max((lon_min_idx, lon_max_idx)) lon_low = np.min((lon_min_idx, lon_max_idx)) + ### Only add lat and lon elements if there are changes to the low and high indices identified: + if (lon_low not in lon_low_old) and (lon_high not in lon_high_old): + lon_nc_change = True + + if (lat_low not in lat_low_old) and (lat_high not in lat_high_old): + lat_nc_change = True + + lon_low_old[cnt] = lon_low + lon_high_old[cnt] = lon_high + lat_low_old[cnt] = lat_low + lat_high_old[cnt] = lat_high + if not populate: - if cnt < (lon_cnt + 1): + if n_row == 0: + + # if (cnt_lon < (lon_cnt + 1)) and lon_nc_change: nc_lon += lon_high - lon_low - if cnt < (lat_cnt + 1): + cnt_lon += 1 + + if n_col == 0: + # if (cnt_lat < (lat_cnt + 1)) and lat_nc_change: nc_lat += lat_high - lat_low + cnt_lat += 1 + + n_col += 1 + if n_col == (lon_cnt+1): + n_col = 0 + n_row += 1 else: topo = test["Elevation"][lat_low:lat_high, lon_low:lon_high] @@ -332,20 +366,35 @@ def __load_topo(self, cell, fns, dirs, lon_cnt, lat_cnt, init=True, populate=Tru lon_sz = lon_high - lon_low lat_sz = lat_high - lat_low + + # if lon_nc_change and cnt > 0: + # n_col += 1 + + # # if n_col == (lon_cnt + 1): + # # n_col = 0 + # if lat_nc_change and cnt > 0: + # n_row += 1 + # lat_sz_old = np.copy(lat_sz) + cell.topo[ - n_row * lat_sz_old : n_row * lat_sz_old + lat_sz, - n_col * lon_sz_old : n_col * lon_sz_old + lon_sz, + lat_sz_old : lat_sz_old + lat_sz, + lon_sz_old : lon_sz_old + lon_sz, ] = topo n_col += 1 - if n_col == (lon_cnt + 1): + lon_sz_old = np.copy(lon_sz) + + if n_col == (lon_cnt+1): n_col = 0 + lon_sz_old = 0 + n_row += 1 - lat_sz_old = np.copy(lat_sz) + lat_sz_old = np.copy(lat_sz) - lon_sz_old = np.copy(lon_sz) + lon_nc_change = False + lat_nc_change = False - # test.close() + test.close() if not populate: cell.topo = np.zeros((nc_lat, nc_lon)) diff --git a/src/utils.py b/src/utils.py index 62e275f..bd8fb5a 100644 --- a/src/utils.py +++ b/src/utils.py @@ -828,6 +828,6 @@ def is_land(cell, simplex_lat, simplex_lon, topo, height_tol=0.5, percent_tol=0. ) if not (((cell.topo <= height_tol).sum() / cell.topo.size) > percent_tol): - return False + return True else: - return True \ No newline at end of file + return False \ No newline at end of file