Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add command line programs #75

Merged
merged 5 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Changelog
Unreleased changes in master branch
===================================

- Added `ismn collect_metadata` and `ismn export_geojson` CLI programs.
- Added method to NetworkCollection to export metadata as (geo)json.
- Added more options when plotting the station overview map.
- Network citation list updated.
Expand Down
10 changes: 5 additions & 5 deletions docs/examples/interface.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
"\n",
"# Either a .zip file or one folder that contains all networks, here we read from .zip\n",
"data_path = \"/tmp/Data_separate_files_header_20090101_20201231_9289_Cwpc_20221201.zip\"\n",
"ismn_data = ISMN_Interface(data_path)"
"ismn_data = ISMN_Interface(data_path, parallel=False)"
]
},
{
Expand Down Expand Up @@ -1510,7 +1510,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1mMetadata for sensor 5TE_soil_moisture_0.050000_0.050000:\u001b[0m\n"
"\u001B[1mMetadata for sensor 5TE_soil_moisture_0.050000_0.050000:\u001B[0m\n"
]
},
{
Expand Down Expand Up @@ -1672,7 +1672,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1mMetadata for sensor QMR102_precipitation_-1.400000_-1.400000:\u001b[0m\n"
"\u001B[1mMetadata for sensor QMR102_precipitation_-1.400000_-1.400000:\u001B[0m\n"
]
},
{
Expand Down Expand Up @@ -1828,7 +1828,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1mMetadata for sensor 5TM_soil_moisture_0.000000_0.050000:\u001b[0m\n"
"\u001B[1mMetadata for sensor 5TM_soil_moisture_0.000000_0.050000:\u001B[0m\n"
]
},
{
Expand Down Expand Up @@ -2009,4 +2009,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
- defaults
dependencies:
- python>3.7,<3.12
# - python>3.7,<3.12
- numpy
- pandas
- matplotlib
Expand All @@ -14,6 +14,7 @@ dependencies:
- pygeogrids>=0.3.2
- configparser
- tqdm
- click
- more_itertools
- sphinx
- nbsphinx
Expand Down
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ install_requires =
pygeogrids>=0.3.2
numpy
pandas
click
configparser
more_itertools
tqdm
Expand Down Expand Up @@ -67,6 +68,8 @@ testing =
# And any other entry points, for example:
# pyscaffold.cli =
# awesome = pyscaffoldext.awesome.extension:AwesomeExtension
console_scripts =
ismn = ismn.cli:ismn

[test]
# py.test options when running `python setup.py test`
Expand Down
83 changes: 83 additions & 0 deletions src/ismn/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import os
import click
from ismn.interface import ISMN_Interface

@click.command("collect_metadata", short_help="Collect all ISMN metadata.")
@click.argument('data_path', type=click.STRING)
@click.option('--meta_path', type=click.Path(writable=True), default=None,
help="Directory where the metadata should be stored. The file"
"will be created automatically. Existing metadata in this"
"directory will be replaced! If not specified, "
"we use DATA_PATH.")
@click.option('--parallel', '-p', is_flag=True, show_default=True,
default=False,
help="Pass this flag to activate parallel metadata collection "
"(recommended for large archives). Deactivated by default."
)
def collect_metadata(data_path, meta_path, parallel):
"""
Command line program to initialise ISMN metadata collection.
THIS WILL OVERWRITE ANY EXISTING METADATA!

\b
DATA_PATH: string
Path where the downloaded ISMN archive is stored. This is either
- The downloaded ISMN ZIP archive or
- A directory with network folders extracted from the ZIP archive.
ISMN data can be downloaded from https://ismn.earth after registration.
"""
# The docstring above is slightly different to the normal python one to
# display it properly on the command line.
if not os.path.exists(data_path):
raise ValueError("The passed DATA_PATH does not exist.")
if meta_path is not None:
os.makedirs(meta_path, exist_ok=True)
_ = ISMN_Interface(data_path, force_metadata_collection=True,
meta_path=meta_path, parallel=parallel)

@click.command("export_geojson", short_help="Export ISMN sensors to geojson.")
@click.argument('data_path', type=click.STRING)
@click.option('--file_out',
type=click.STRING, default=None,
help="Path to the json file that should be created. "
"If the file already exists it will be overwritten. "
"If not specified this is a file called "
"`ismn_sensors.json` and stored in the DATA_PATH.")
@click.option('--markercolor', '-m',
type=click.STRING, default='"#00aa00"', show_default=True,
help='Hex color (USE QUOTES!, e.g. "#00aa00") to assign to '
'markers in json file. The default color is green.')
def export_geojson(data_path, file_out, markercolor):
"""
Calls
Command line program to initialise ISMN metadata collection. THIS WILL
OVERWRITE ANY EXISTING METADATA!

\b
Parameters
----------
DATA_PATH: string
Path where the downloaded ISMN archive is stored. This is either
- The downloaded ISMN ZIP archive or
- A directory with network folders extracted from the ZIP archive.
ISMN data can be downloaded from https://ismn.earth after registration.
"""
# The docstring above is slightly different to the normal python one to
# display it properly on the command line.
markercolor = str(markercolor.replace('"', '').replace("'", ""))
if not os.path.exists(data_path):
raise ValueError("The passed DATA_PATH does not exist.")
ds = ISMN_Interface(data_path)
if file_out is None:
file_out = os.path.join(ds.root.root_dir, 'ismn_sensors.json')
os.makedirs(os.path.dirname(file_out), exist_ok=True)
print(f"Exporting geojson to: {file_out}")
ds.collection.export_geojson(file_out, markercolor=markercolor)


@click.group(short_help="ISMN Command Line Programs.")
def ismn():
pass

ismn.add_command(collect_metadata)
ismn.add_command(export_geojson)
6 changes: 4 additions & 2 deletions src/ismn/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import os.path

from pygeogrids import BasicGrid
from typing import Union
Expand Down Expand Up @@ -823,7 +824,8 @@ def export_citations(self, out_file=None):

return refs

def export_geojson(self, path, network=True, station=True, sensor=False,
def export_geojson(self, path, markercolor="#00aa00",
network=True, station=True, sensor=False,
depth=True, extra_props=None, **filter_kwargs):
"""
Filter sensors in collection and create geojson file containing all
Expand Down Expand Up @@ -867,7 +869,7 @@ def export_geojson(self, path, network=True, station=True, sensor=False,
],
},
"properties": {
"markerColor": "#00aa00",
"markerColor": markercolor,
"datasetProperties": []
}
}
Expand Down
12 changes: 9 additions & 3 deletions src/ismn/filecollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,16 @@ def build_from_scratch(

logging.info(f"Collecting metadata with {n_proc} processes.")

if not parallel:
hint = 'Hint: Use `parallel=True` to speed up metadata ' \
'generation for large datasets'
else:
hint = ''

print(
f"Processing metadata for all ismn stations into folder {root.path}.\n"
f"This may take a few minutes, but is only done once..."
f"\n{'Hint: Use `parallel=True` to speed up metadata generation for large datasets' if not parallel else ''}"
f"Processing metadata for all ismn stations into folder "
f"{root.path}.\n"
f"This may take a few minutes, but is only done once...\n{hint}"
)

process_stat_dirs = []
Expand Down
5 changes: 2 additions & 3 deletions src/ismn/filehandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,9 +623,8 @@ def __read_format_header_values(self) -> pd.DataFrame:
names=names,
usecols=[0, 1, 2, 3, 4],
skiprows=1,
#sep=" ",
sep=r'\s+',
low_memory=False,
delim_whitespace=True,
)

def __read_csv(self, names=None, usecols=None, skiprows=0, **kwargs):
Expand Down Expand Up @@ -672,7 +671,7 @@ def readf(
skiprows=skiprows,
usecols=usecols,
names=names,
delim_whitespace=True,
sep=r'\s+',
parse_dates=parse_dates,
engine="c",
**kwargs
Expand Down
10 changes: 8 additions & 2 deletions src/ismn/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ class ISMN_Interface:
Additional readers to collect station/sensor metadata
from external sources e.g. csv files.
See :class:`ismn.custom.CustomMetaReader`.
force_metadata_collection: bool, optional (default: False)
If true, will run metadata collection and replace any existing metadata
that would otherwise be re-used.

Raises
------
Expand Down Expand Up @@ -141,6 +144,7 @@ def __init__(
keep_loaded_data=False,
temp_root=gettempdir(),
custom_meta_reader=None,
force_metadata_collection=False,
):
self.climate, self.landcover = KOEPPENGEIGER, LANDCOVER
self.parallel = parallel
Expand All @@ -150,6 +154,7 @@ def __init__(
self.keep_loaded_data = keep_loaded_data

self.custom_meta_reader = custom_meta_reader
self.force_metadata_collection = force_metadata_collection

self.meta_path = meta_path
self.temp_root = temp_root
Expand Down Expand Up @@ -178,7 +183,7 @@ def activate_network(

meta_csv_file = meta_path / meta_csv_filename

if not os.path.isfile(meta_csv_file):
if not os.path.isfile(meta_csv_file) or self.force_metadata_collection:
self.__file_collection = IsmnFileCollection.build_from_scratch(
self.root,
parallel=self.parallel,
Expand Down Expand Up @@ -570,7 +575,8 @@ def read_ts(self, idx, return_meta=False):
m = pd.DataFrame(data={i: m})
metadata.append(m)

data = pd.concat(data, axis=1)
# would it make more sense to concat along time dimension?
data = pd.concat(data, axis=1).sort_index()

if return_meta:
meta = pd.concat(metadata, axis=1)
Expand Down
34 changes: 34 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os
from click.testing import CliRunner
from ismn.cli import collect_metadata, export_geojson
from tempfile import TemporaryDirectory

testdata_root = os.path.join(os.path.dirname(__file__), "test_data")

def test_cli_meta_collect():
with TemporaryDirectory() as tempdir:
data_path = os.path.join(
testdata_root, "zip_archives", "ceop",
"Data_seperate_files_20170810_20180809.zip")
runner = CliRunner()
result = runner.invoke(collect_metadata,
[data_path, "--meta_path", tempdir, "-p"])
assert result.exit_code == 0
assert os.path.isfile(os.path.join(
tempdir, "Data_seperate_files_20170810_20180809.csv"))

def test_cli_export_geojson():
with TemporaryDirectory() as tempdir:
data_path = os.path.join(
testdata_root, "zip_archives", "ceop",
"Data_seperate_files_20170810_20180809.zip")
runner = CliRunner()
result = runner.invoke(export_geojson,
[data_path, "--file_out",
os.path.join(tempdir, "test.geojson"),
"-m", "testcolor"])
assert result.exit_code == 0
assert os.path.isfile(os.path.join(tempdir, "test.geojson"))
with open(os.path.join(tempdir, "test.geojson"), "r") as f:
content = f.readlines()
assert "testcolor" in content[0]
30 changes: 20 additions & 10 deletions tests/test_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from datetime import datetime

import numpy as np
import pandas as pd
import pytest
import logging
from collections import OrderedDict
Expand All @@ -19,7 +20,8 @@ def test_metadata_dataframe():
# make sure that metadata.index represents same values as get_dataset_ids
with TemporaryDirectory() as metadata_path:
testdata = os.path.join(testdata_root, "Data_seperate_files_20170810_20180809")
ds_one = ISMN_Interface(testdata, meta_path=metadata_path, network='FR_Aqui')
ds_one = ISMN_Interface(testdata, meta_path=metadata_path, network='FR_Aqui',
force_metadata_collection=True)

assert np.all(ds_one.metadata.index.values == ds_one.get_dataset_ids(None, -np.inf, np.inf))
ids = ds_one.get_dataset_ids('soil_moisture')
Expand All @@ -37,7 +39,8 @@ def setUpClass(cls):
metadata_path = os.path.join(testdata, "python_metadata")

cleanup(metadata_path)
ds = ISMN_Interface(testdata, network=[], parallel=True)
ds = ISMN_Interface(testdata, network=[], parallel=True,
force_metadata_collection=False)
assert ds.networks == OrderedDict()
cls.testdata = testdata

Expand All @@ -56,8 +59,9 @@ def test_list(self):
assert len(self.ds.list_sensors(station="Barrow-ARM")) == 1

def test_network_for_station(self):
assert self.ds.network_for_station("Barrow-ARM") == "COSMOS"
assert self.ds.network_for_station("ARM-1") == "COSMOS"
with pytest.warns(DeprecationWarning):
assert self.ds.network_for_station("Barrow-ARM") == "COSMOS"
assert self.ds.network_for_station("ARM-1") == "COSMOS"

def test_stations_that_measure(self):
for s in self.ds.stations_that_measure("soil_moisture"):
Expand Down Expand Up @@ -120,9 +124,13 @@ def test_read_metadata(self):
data2, meta = self.ds.read_ts(1, return_meta=True)
assert all(meta == self.ds.read_metadata(1, format="pandas"))
d2, m2 = self.ds.read([0, 1], return_meta=True)
assert np.all(d2[1]['soil_moisture'].dropna() ==
data2['soil_moisture'].dropna())
assert np.all(m2[1].dropna() == meta.dropna())
pd.testing.assert_series_equal(
d2[1]['soil_moisture'].dropna(),
data2['soil_moisture'].dropna()
)
pd.testing.assert_series_equal(
m2[1].dropna(), meta.dropna(), check_names=False
)
assert self.ds.read_metadata(1, format="dict") is not None
assert self.ds.read_metadata([1], format="obj") is not None

Expand Down Expand Up @@ -231,9 +239,11 @@ def test_get_nearest_station(self):
assert net.stations[station.name].lon == should_lon
assert net.stations[station.name].lat == should_lat

station, dist = self.ds.find_nearest_station(
0, 0, return_distance=True, max_dist=100
)
with pytest.warns(UserWarning):
# expect a warning as no points are within the dist
station, dist = self.ds.find_nearest_station(
0, 0, return_distance=True, max_dist=100
)
assert station == dist == None

def test_citation(self):
Expand Down
Loading