From 5ad959c9742c32c7d90b8bd83f6613a5716e26b5 Mon Sep 17 00:00:00 2001 From: tillwenke Date: Thu, 9 Jan 2025 22:52:34 +0100 Subject: [PATCH] fix: download raster from hf --- environment.yml | 1 + heatchmap/gpmap.py | 27 ++++++++++++--------------- heatchmap/map_based_model.py | 11 +++++++---- setup.py | 3 ++- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/environment.yml b/environment.yml index 93f7b4a..13891ed 100644 --- a/environment.yml +++ b/environment.yml @@ -3,6 +3,7 @@ dependencies: - pip - python=3.10 - pip: + - datasets==3.2.0 - huggingface-hub==0.27.1 - geopandas==1.0.1 - matplotlib==3.10.0 diff --git a/heatchmap/gpmap.py b/heatchmap/gpmap.py index 27d3535..5adf79c 100644 --- a/heatchmap/gpmap.py +++ b/heatchmap/gpmap.py @@ -18,6 +18,7 @@ from rasterio.transform import from_gcps from shapely.validation import make_valid from tqdm import tqdm +from datasets import load_dataset from .map_based_model import MapBasedModel from .utils.utils_data import get_points @@ -48,22 +49,18 @@ def __init__(self, region="world", resolution=10, version="prod"): super().__init__(method=type(self.gpr).__name__, region=region, resolution=resolution, version=version, verbose=False) - files = glob.glob(f"intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}*.txt") - if len(files) == 0: - raise FileNotFoundError("No base map calculated so far.") - else: - latest_date = pd.Timestamp.min - for file in files: - date = pd.Timestamp(file.split("_")[-1].split(".")[0]) - if date > latest_date: - latest_date = date - self.old_map_path = file + ds = load_dataset("tillwenke/heatchmap-map", cache_dir=f"{HERE}/cache/huggingface") + ds = ds.with_format("np") + self.raw_raster = ds["train"]["numpy"] + + # files = glob.glob(f"intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}*.txt") - self.begin = latest_date + self.begin = pd.Timestamp("2024-12-21") # TODO: read latest date from somewhere self.batch_size = 10000 self.today = pd.Timestamp.now() - self.map_path = f"intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}_{self.today.date()}.txt" + # self.map_path = f"intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}_{self.today.date()}.txt" + self.recalc_radius = 800000 # TODO: determine from model largest influence radius @@ -114,7 +111,7 @@ def recalc_map(self): # recalc the old map - self.raw_raster = np.loadtxt(self.old_map_path) + # self.raw_raster = np.loadtxt(self.old_map_path) self.get_map_grid() self.get_recalc_raster() @@ -153,8 +150,8 @@ def recalc_map(self): print(f"Only {self.recalc_raster.sum()} pixels were recalculated. That is {self.recalc_raster.sum() / (self.raw_raster.shape[0] * self.raw_raster.shape[1]) * 100}% of the map.") print(f"And time per recalculated pixel was {(time.time() - start) / self.recalc_raster.sum()} seconds") - np.savetxt(self.map_path, self.raw_raster) - self.save_as_rasterio() + # np.savetxt(self.map_path, self.raw_raster) + # self.save_as_rasterio() def show_raster(self, raster: np.array): """Show the raster in a plot. diff --git a/heatchmap/map_based_model.py b/heatchmap/map_based_model.py index df9661f..a2738e8 100644 --- a/heatchmap/map_based_model.py +++ b/heatchmap/map_based_model.py @@ -40,6 +40,7 @@ # values higher than the upper boundary are colored in the upmost color BOUNDARIES = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] +HERE = os.path.dirname(os.path.abspath(__file__)) class MapBasedModel(BaseEstimator, RegressorMixin): def __init__( @@ -55,13 +56,15 @@ def __init__( self.resolution = resolution # pixel per degree self.version = version self.verbose = verbose + + landmass_dir = f"{HERE}/cache/landmass" - os.makedirs("temp", exist_ok=True) + os.makedirs(landmass_dir, exist_ok=True) self.map_boundary = self.get_map_boundary() - self.rasterio_path = f"intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}.tif" - self.map_path = f"intermediate/map_{method}_{region}_{resolution}_{version}.txt" - self.landmass_path = "temp/landmass.tif" + self.rasterio_path = f"{HERE}/cache/intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}.tif" + self.map_path = f"{HERE}/cache/intermediate/map_{method}_{region}_{resolution}_{version}.txt" + self.landmass_path = f"{landmass_dir}/landmass.tif" def get_map_boundary(self): diff --git a/setup.py b/setup.py index 3d752dd..1814ac0 100644 --- a/setup.py +++ b/setup.py @@ -2,11 +2,12 @@ import setuptools -VERSION = "0.1.10" +VERSION = "0.1.11" NAME = "heatchmap" INSTALL_REQUIRES = [ + "datasets==3.2.0", "huggingface-hub==0.27.1", "geopandas==1.0.1", "matplotlib==3.10.0",