From 0b3b88fb9df34d962bb094dad86adb927faadee5 Mon Sep 17 00:00:00 2001 From: "raphael.wcosta@gmail.com" Date: Mon, 30 Jan 2023 10:34:25 -0300 Subject: [PATCH 1/4] :sparkles: Cherry pick to add support STAC legacy and 1.0 --- Dockerfile | 2 +- INSTALL.rst | 13 ++- cube_builder/_adapter.py | 201 +++++++++++++++++++++++++++++++++++++++ cube_builder/maestro.py | 20 ++-- setup.py | 2 +- 5 files changed, 223 insertions(+), 15 deletions(-) create mode 100644 cube_builder/_adapter.py diff --git a/Dockerfile b/Dockerfile index 4eff69c..ffa04de 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,7 +34,7 @@ ADD . ${CUBE_BUILDER_INSTALL_PATH} WORKDIR ${CUBE_BUILDER_INSTALL_PATH} -RUN python3 -m pip install pip --upgrade setuptools wheel && \ +RUN python3 -m pip install pip --upgrade "setuptools<67" wheel && \ python3 -m pip install -e .[rabbitmq] && \ python3 -m pip install gunicorn diff --git a/INSTALL.rst b/INSTALL.rst index ca83585..9246cbb 100644 --- a/INSTALL.rst +++ b/INSTALL.rst @@ -86,6 +86,13 @@ Install in development mode: If you have problems with the ``librabbitmq`` installation, please, see [#f1]_. +.. note:: + + The `setuptools v67+ `_ has breaking changes related + Pip versions requirements. For now, you should install ``setuptools<67`` for compatibility. + The packages in ``Cube-Builder`` will be upgraded to support latest version. + + Running in Development Mode --------------------------- @@ -197,12 +204,14 @@ You may need to replace the definition of some parameters: The command line ``cube-builder worker`` is an auxiliary tool that wraps celery command line using ``cube_builder`` as context. In this way, all ``celery worker`` parameters are currently supported. See more in `Celery Workers Guide `_. + If you keep parameters ``WORK_DIR`` and ``DATA_DIR``, just make sure its writable in order to works, otherwise, + you may see issues related ``Permission Denied``. .. warning:: The ``Cube Builder`` can use a lot of memory for each concurrent process, since it opens multiple images in memory. - You can limit the concurrent processes in order to prevent it. + You can limit the concurrent processes with ``--concurrency NUMBER`` in order to prevent it. .. rubric:: Footnotes @@ -249,4 +258,4 @@ You may need to replace the definition of some parameters: .. code-block:: shell - $ sudo apt install autoconf \ No newline at end of file + $ sudo apt install autoconf diff --git a/cube_builder/_adapter.py b/cube_builder/_adapter.py new file mode 100644 index 0000000..72cef2c --- /dev/null +++ b/cube_builder/_adapter.py @@ -0,0 +1,201 @@ +# +# This file is part of Cube Builder. +# Copyright (C) 2022 INPE. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +"""Define basic module to adapt Python libraries like STAC v1 and legacy versions.""" + +from abc import ABC, abstractmethod +from copy import deepcopy +from typing import List +from urllib.parse import urljoin + +import requests +import shapely.geometry +from pystac_client import Client +from werkzeug.exceptions import abort + + +class BaseSTAC(ABC): + """Define base class to represent a STAC interface to communicate with Server.""" + + uri: str + """Represent URI for server.""" + headers: dict + """Represent HTTP headers to be attached in requests.""" + params: dict + """Represent HTTP parameters for requests.""" + + def __init__(self, uri: str, params=None, headers=None, **kwargs): + """Build STAC signature.""" + self.uri = uri + self.params = params + self.headers = headers + self._options = kwargs + + @abstractmethod + def search(self, **parameters) -> dict: + """Search for collection items on STAC server.""" + + @abstractmethod + def items(self, collection_id: str, **kwargs) -> dict: + """Access STAC Collection Items.""" + + @abstractmethod + def collections(self) -> List[dict]: + """Retrieve the collections from STAC.""" + + @abstractmethod + def collection(self, collection_id: str) -> dict: + """Access STAC Collection.""" + + @staticmethod + def _items_result(features: List[dict], matched: int): + return { + "context": { + "returned": len(features), + "matched": matched + }, + "features": features + } + + +class STACV1(BaseSTAC): + """Define structure to add support for STAC v1.0+. + + This implementation uses `pystac-client `_ + to communicate with STAC v1.0. + """ + + def __init__(self, uri: str, params=None, headers=None, **kwargs): + """Build STAC instance.""" + super(STACV1, self).__init__(uri, params, headers, **kwargs) + + self._instance = Client.open(uri, headers=headers, parameters=params, **kwargs) + + def search(self, limit=10, max_items=10, **parameters) -> dict: + """Search for collection items on STAC server.""" + max_items = limit + item_search = self._instance.search(limit=limit, max_items=max_items, **parameters) + + items = item_search.items() + items = [i.to_dict() for i in items] + + return self._items_result(items, matched=item_search.matched()) + + def collections(self) -> List[dict]: + """Retrieve the collections from STAC.""" + return [c.to_dict() for c in self._instance.get_collections()] + + def collection(self, collection_id: str) -> dict: + """Access STAC Collection.""" + collection = self._instance.get_collection(collection_id) + return collection.to_dict() + + def items(self, collection_id: str, **kwargs) -> dict: + """Access STAC Collection Items.""" + collection = self._instance.get_collection(collection_id) + + items = collection.get_items() + items = [i.to_dict() for i in items] + + result = self.search(collections=[collection_id], limit=1, max_items=1) + + return self._items_result(items, matched=result['context']['matched']) + + +class STACLegacy(BaseSTAC): + """Define structure to add support for legacy versions of STAC server.. + + This implementation uses `requests.Session `_ + to communicate with STAC legacy versions 0.8x, 0.9x directly. + + By default, the ssl entries are ignored. You may override this setting using ``verify=False``. + """ + + def __init__(self, uri: str, params=None, headers=None, verify=False, **kwargs): + """Build STAC instance.""" + super(STACLegacy, self).__init__(uri, params, headers, **kwargs) + + params = params or {} + headers = headers or {} + + self._params = params + self._headers = headers + self._session = requests.session() + self._session.verify = verify + + def search(self, **parameters) -> dict: + """Search for collection items on STAC server.""" + options = deepcopy(parameters) + # Remove unsupported values + options.pop('query', None) + url = self._url_resource('search') + + try: + response = self._request(url, method='POST', data=options, headers=self._headers, params=self._params) + except: + # Use bbox instead + geom = options.pop('intersects', None) + if geom is None: + raise + + options['bbox'] = shapely.geometry.shape(geom).bounds + + response = self._request(url, method='POST', data=options, headers=self._headers, params=self._params) + + return response + + def _request(self, uri: str, method: str = 'GET', data=None, headers=None, params=None): + response = self._session.request(method, uri, headers=headers, params=params, json=data) + if response.status_code != 200: + abort(response.status_code, response.content) + return response.json() + + def collections(self) -> List[dict]: + """Retrieve the collections from STAC.""" + uri = self._url_resource('collections') + collections = self._request(uri, params=self._params, headers=self._headers) + return collections + + def collection(self, collection_id: str) -> dict: + """Access STAC Collection.""" + uri = self._url_resource(f'collections/{collection_id}') + collection = self._request(uri, params=self._params, headers=self._headers) + return collection + + def items(self, collection_id: str, **kwargs) -> dict: + """Access STAC Collection Items.""" + return self.search(collections=[collection_id], limit=1) + + def _url_resource(self, resource: str) -> str: + return urljoin(self.uri + '/', resource) + + +def build_stac(uri, headers=None, **parameters) -> BaseSTAC: + """Build a STAC instance according versions.""" + response = requests.get(uri, timeout=15, headers=headers, params=parameters) + + response.raise_for_status() + + catalog = response.json() + if not catalog.get('stac_version'): + raise RuntimeError(f'Invalid STAC "{uri}", missing "stac_version"') + + stac_version = catalog['stac_version'] + if stac_version.startswith('0.'): + return STACLegacy(uri, params=parameters, headers=headers) + return STACV1(uri, params=parameters, headers=headers) diff --git a/cube_builder/maestro.py b/cube_builder/maestro.py index 881f55b..01fa1b2 100644 --- a/cube_builder/maestro.py +++ b/cube_builder/maestro.py @@ -37,9 +37,9 @@ from celery import chain, group from geoalchemy2 import func from geoalchemy2.shape import to_shape -from stac import STAC # Cube Builder +from ._adapter import BaseSTAC, build_stac from .celery.tasks import prepare_blend, warp_merge from .config import Config from .constants import CLEAR_OBSERVATION_NAME, DATASOURCE_NAME, PROVENANCE_NAME, TOTAL_OBSERVATION_NAME @@ -131,7 +131,7 @@ def __init__(self, datacube: str, collections: List[str], tiles: List[str], star self.tiles = [] self.export_files = self.properties.pop('export_files', None) - def get_stac(self, collection: str) -> STAC: + def get_stac(self, collection: str) -> BaseSTAC: """Retrieve STAC client which provides the given collection. By default, it searches for given collection on Brazil Data Cube STAC. @@ -152,7 +152,7 @@ def get_stac(self, collection: str) -> STAC: # Search in INPE STAC return self._stac(collection, 'http://cdsr.dpi.inpe.br/inpe-stac/stac') - def _stac(self, collection: str, url: str, **kwargs) -> STAC: + def _stac(self, collection: str, url: str, **kwargs) -> BaseSTAC: """Check if collection is provided by given STAC url. The provided STAC must follow the `SpatioTemporal Asset Catalogs spec `_. @@ -172,9 +172,7 @@ def _stac(self, collection: str, url: str, **kwargs) -> STAC: if kwargs.get('token'): options['access_token'] = kwargs.get('token') - stac = self.cached_stacs.get(url) or STAC(url, **options) - - _ = stac.catalog + stac = self.cached_stacs.get(url) or build_stac(url, **options) _ = stac.collection(collection) @@ -614,16 +612,16 @@ def search_images(self, feature: dict, start: str, end: str, tile_id: str, **kwa stac_collection = stac.collection(dataset) if stac_collection.get('summaries') and stac_collection['summaries'].get('platform'): platforms = platforms.union(set(stac_collection['summaries'].get('platform'))) - elif stac_collection.properties.get('platform'): - platforms = platforms.union(set(stac_collection.properties.get('platform'))) + elif stac_collection.get('properties').get('platform'): + platforms = platforms.union(set(stac_collection.get('properties').get('platform'))) token = '' print('Searching for {} - {} ({}, {}) using {}...'.format(dataset, tile_id, start, - end, stac.url), end='', flush=True) + end, stac.uri), end='', flush=True) with timing(' total'): - items = stac.search(filter=options) + items = stac.search(**options) for feature in items['features']: if feature['type'] == 'Feature': @@ -632,7 +630,7 @@ def search_images(self, feature: dict, start: str, end: str, tile_id: str, **kwa identifier = feature['id'] # TODO: Add handler to deal with parse result serializer. platform = feature['properties'].get('platform') - if stac.url.startswith('https://landsatlook.usgs.gov'): + if stac.uri.startswith('https://landsatlook.usgs.gov'): # Remove last SR sentence. identifier = f'{identifier[:-3]}{identifier[-3:].replace("_SR", "")}' # Special treatment for missing/invalid platform values diff --git a/setup.py b/setup.py index b9c115c..b5f60da 100644 --- a/setup.py +++ b/setup.py @@ -75,7 +75,7 @@ 'rio_cogeo==3.0.2', 'shapely>=1.7,<2', 'SQLAlchemy-Utils>=0.34.2,<1', - 'stac.py==0.9.0.post12', + 'pystac-client>=0.5', 'MarkupSafe==2.0.1', 'bdc-auth-client @ git+https://github.com/brazil-data-cube/bdc-auth-client.git@v0.2.1#egg=bdc-auth-client' ] From ed2ce0490075b8666895b9a858ab6ffd00fac4c8 Mon Sep 17 00:00:00 2001 From: "raphael.wcosta@gmail.com" Date: Tue, 24 Jan 2023 14:53:06 -0300 Subject: [PATCH 2/4] :hammer: remove usage of distutils for future releases deprecation --- cube_builder/celery/__init__.py | 3 +++ cube_builder/config.py | 4 ++-- cube_builder/constants.py | 22 ++++++++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/cube_builder/celery/__init__.py b/cube_builder/celery/__init__.py index 515ab3f..24a514f 100644 --- a/cube_builder/celery/__init__.py +++ b/cube_builder/celery/__init__.py @@ -19,6 +19,7 @@ """Define Cube Builder celery module initialization.""" import logging +import os import flask from bdc_catalog.models import db @@ -27,6 +28,7 @@ from flask import Flask from cube_builder.config import Config +from ..constants import to_bool CELERY_TASKS = [ 'cube_builder.celery.tasks', @@ -59,6 +61,7 @@ def create_celery_app(flask_app: Flask) -> Celery: always_eager = flask_app.config.get('TESTING', False) celery.conf.update(dict( + CELERY_ACKS_LATE=to_bool(os.getenv('CELERY_ACKS_LATE', '1')), CELERY_TASK_ALWAYS_EAGER=always_eager, CELERYD_PREFETCH_MULTIPLIER=Config.CELERYD_PREFETCH_MULTIPLIER, CELERY_RESULT_BACKEND='db+{}'.format(flask_app.config.get('SQLALCHEMY_DATABASE_URI')), diff --git a/cube_builder/config.py b/cube_builder/config.py index 0b2e70d..ec8b6a8 100644 --- a/cube_builder/config.py +++ b/cube_builder/config.py @@ -18,9 +18,9 @@ """Brazil Data Cube Configuration.""" import os -from distutils.util import strtobool from .version import __version__ +from .constants import to_bool BASE_DIR = os.path.abspath(os.path.dirname(__file__)) @@ -99,7 +99,7 @@ class Config: BDC_AUTH_ACCESS_TOKEN_URL = os.getenv('BDC_AUTH_ACCESS_TOKEN_URL', None) """Access token url used for retrieving user info in BDC-Auth Defaults to ``None``. Used when ``BDC_AUTH_REQUIRED`` is set.""" - BDC_AUTH_REQUIRED = strtobool(os.getenv('BDC_AUTH_REQUIRED', '0')) + BDC_AUTH_REQUIRED = to_bool(os.getenv('BDC_AUTH_REQUIRED', '0')) """Flag to manage when a Auth is required. Defaults to ``0``, that means that there is not authorization request to access ``Cube Builder`` API.""" diff --git a/cube_builder/constants.py b/cube_builder/constants.py index 99af02f..731035d 100644 --- a/cube_builder/constants.py +++ b/cube_builder/constants.py @@ -77,3 +77,25 @@ PNG_MIME_TYPE = 'image/png' SRID_ALBERS_EQUAL_AREA = 100001 + + +def to_bool(val: str): + """Convert a string representation to true or false. + + This method was adapted from `pypa/distutils `_ + to avoid import deprecated module. + + The following values are supported: + - ``True``: 'y', 'yes', 't', 'true', 'on', and '1' + - ``False``: 'n', 'no', 'f', 'false', 'off', and '0' + + Raises: + ValueError: When the given string value could not be converted to boolean. + """ + val = val.lower() + if val in ('y', 'yes', 't', 'true', 'on', '1',): + return 1 + elif val in ('n', 'no', 'f', 'false', 'off', '0',): + return 0 + + raise ValueError(f"invalid boolean value for {val}") From 12b7903d578873841ed373a5a0b6366c80f10273 Mon Sep 17 00:00:00 2001 From: raphaelrpl Date: Wed, 8 Mar 2023 10:13:39 -0300 Subject: [PATCH 3/4] :books: Review dependency setup for legacy version --- INSTALL.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/INSTALL.rst b/INSTALL.rst index 9246cbb..b96748e 100644 --- a/INSTALL.rst +++ b/INSTALL.rst @@ -21,8 +21,6 @@ Installation The ``Cube Builder`` depends essentially on: -- `Python Client Library for STAC (stac.py) `_ - - `Flask `_ - `Celery `_ @@ -77,7 +75,7 @@ Install in development mode: .. code-block:: shell - $ pip3 install -U pip setuptools wheel + $ pip3 install -U pip "setuptools<67" wheel $ pip3 install -e .[all] From f3c390b904aed9a5cd6b0c71e84f3565c586bc56 Mon Sep 17 00:00:00 2001 From: raphaelrpl Date: Wed, 8 Mar 2023 10:14:30 -0300 Subject: [PATCH 4/4] :bookmark: prepare to release 0.8.5 --- CHANGES.rst | 7 +++++++ Dockerfile | 2 +- cube_builder/version.py | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index fdb6858..3a214be 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -21,6 +21,13 @@ Changes ======= +Version 0.8.5 (2023-03-08) +-------------------------- + +- Fix integration with STAC v1 and STAC Legacy versions +- Add notice in INSTALL for compatibility with package and "setuptools<67" + + Version 0.8.4 (2023-01-23) -------------------------- diff --git a/Dockerfile b/Dockerfile index ffa04de..0e2f83e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,7 +27,7 @@ LABEL "org.brazildatacube.description"="Docker image for Data Cube Builder appli LABEL "org.brazildatacube.git_commit"="${GIT_COMMIT}" # Build arguments -ARG CUBE_BUILDER_VERSION="0.8.4" +ARG CUBE_BUILDER_VERSION="0.8.5" ARG CUBE_BUILDER_INSTALL_PATH="/opt/cube-builder/${CUBE_BUILDER_VERSION}" ADD . ${CUBE_BUILDER_INSTALL_PATH} diff --git a/cube_builder/version.py b/cube_builder/version.py index af26f47..c77e24d 100644 --- a/cube_builder/version.py +++ b/cube_builder/version.py @@ -23,4 +23,4 @@ """ -__version__ = '0.8.4' +__version__ = '0.8.5'