Skip to content

Commit

Permalink
fix: download raster from hf
Browse files Browse the repository at this point in the history
  • Loading branch information
tillwenke committed Jan 9, 2025
1 parent 3f96d04 commit 5ad959c
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 20 deletions.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ dependencies:
- pip
- python=3.10
- pip:
- datasets==3.2.0
- huggingface-hub==0.27.1
- geopandas==1.0.1
- matplotlib==3.10.0
Expand Down
27 changes: 12 additions & 15 deletions heatchmap/gpmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from rasterio.transform import from_gcps
from shapely.validation import make_valid
from tqdm import tqdm
from datasets import load_dataset

from .map_based_model import MapBasedModel
from .utils.utils_data import get_points
Expand Down Expand Up @@ -48,22 +49,18 @@ def __init__(self, region="world", resolution=10, version="prod"):

super().__init__(method=type(self.gpr).__name__, region=region, resolution=resolution, version=version, verbose=False)

files = glob.glob(f"intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}*.txt")
if len(files) == 0:
raise FileNotFoundError("No base map calculated so far.")
else:
latest_date = pd.Timestamp.min
for file in files:
date = pd.Timestamp(file.split("_")[-1].split(".")[0])
if date > latest_date:
latest_date = date
self.old_map_path = file
ds = load_dataset("tillwenke/heatchmap-map", cache_dir=f"{HERE}/cache/huggingface")
ds = ds.with_format("np")
self.raw_raster = ds["train"]["numpy"]

# files = glob.glob(f"intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}*.txt")

self.begin = latest_date
self.begin = pd.Timestamp("2024-12-21") # TODO: read latest date from somewhere

self.batch_size = 10000
self.today = pd.Timestamp.now()
self.map_path = f"intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}_{self.today.date()}.txt"
# self.map_path = f"intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}_{self.today.date()}.txt"


self.recalc_radius = 800000 # TODO: determine from model largest influence radius

Expand Down Expand Up @@ -114,7 +111,7 @@ def recalc_map(self):

# recalc the old map

self.raw_raster = np.loadtxt(self.old_map_path)
# self.raw_raster = np.loadtxt(self.old_map_path)

self.get_map_grid()
self.get_recalc_raster()
Expand Down Expand Up @@ -153,8 +150,8 @@ def recalc_map(self):
print(f"Only {self.recalc_raster.sum()} pixels were recalculated. That is {self.recalc_raster.sum() / (self.raw_raster.shape[0] * self.raw_raster.shape[1]) * 100}% of the map.")

Check failure on line 150 in heatchmap/gpmap.py

View workflow job for this annotation

GitHub Actions / build

Ruff (E501)

heatchmap/gpmap.py:150:131: E501 Line too long (185 > 130)

Check failure on line 150 in heatchmap/gpmap.py

View workflow job for this annotation

GitHub Actions / build

Ruff (E501)

heatchmap/gpmap.py:150:131: E501 Line too long (185 > 130)
print(f"And time per recalculated pixel was {(time.time() - start) / self.recalc_raster.sum()} seconds")

np.savetxt(self.map_path, self.raw_raster)
self.save_as_rasterio()
# np.savetxt(self.map_path, self.raw_raster)
# self.save_as_rasterio()

def show_raster(self, raster: np.array):
"""Show the raster in a plot.
Expand Down
11 changes: 7 additions & 4 deletions heatchmap/map_based_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
# values higher than the upper boundary are colored in the upmost color
BOUNDARIES = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

HERE = os.path.dirname(os.path.abspath(__file__))

class MapBasedModel(BaseEstimator, RegressorMixin):
def __init__(
Expand All @@ -55,13 +56,15 @@ def __init__(
self.resolution = resolution # pixel per degree
self.version = version
self.verbose = verbose

landmass_dir = f"{HERE}/cache/landmass"

os.makedirs("temp", exist_ok=True)
os.makedirs(landmass_dir, exist_ok=True)

self.map_boundary = self.get_map_boundary()
self.rasterio_path = f"intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}.tif"
self.map_path = f"intermediate/map_{method}_{region}_{resolution}_{version}.txt"
self.landmass_path = "temp/landmass.tif"
self.rasterio_path = f"{HERE}/cache/intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}.tif"
self.map_path = f"{HERE}/cache/intermediate/map_{method}_{region}_{resolution}_{version}.txt"
self.landmass_path = f"{landmass_dir}/landmass.tif"


def get_map_boundary(self):
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@

import setuptools

VERSION = "0.1.10"
VERSION = "0.1.11"

NAME = "heatchmap"

INSTALL_REQUIRES = [
"datasets==3.2.0",
"huggingface-hub==0.27.1",
"geopandas==1.0.1",
"matplotlib==3.10.0",
Expand Down

0 comments on commit 5ad959c

Please sign in to comment.