diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3391bdd --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +*.pyc +dist +build +*.egg-info +*~ +.coverage +cover +.*.swp diff --git a/.pbuilderrc b/.pbuilderrc new file mode 100644 index 0000000..393cdd9 --- /dev/null +++ b/.pbuilderrc @@ -0,0 +1,95 @@ +# Based on https://wiki.ubuntu.com/PbuilderHowto (2015-04-21, last edited by osamu) +# License: http://creativecommons.org/licenses/by-sa/3.0/ +# Support for ccache based on http://failshell.io/devel/pbuilder-and-ccache-howto/ + +# Codenames for Debian suites according to their alias. Update these when +# needed. +UNSTABLE_CODENAME="sid" +TESTING_CODENAME="jessie" +STABLE_CODENAME="wheezy" +STABLE_BACKPORTS_SUITE="$STABLE_CODENAME-backports" + +# List of Debian suites. +DEBIAN_SUITES=($UNSTABLE_CODENAME $TESTING_CODENAME $STABLE_CODENAME + "unstable" "testing" "stable") + +# List of Ubuntu suites. Update these when needed. +UBUNTU_SUITES=("trusty" "precise" "lucid") + +# Mirrors to use. Update these to your preferred mirror. +DEBIAN_MIRROR="ftp.us.debian.org" +UBUNTU_MIRROR="mirrors.kernel.org" + +# Optionally use the changelog of a package to determine the suite to use if +# none set. +if [ -z "${DIST}" ] && [ -r "debian/changelog" ]; then + DIST=$(dpkg-parsechangelog | awk '/^Distribution: / {print $2}') + DIST="${DIST%%-*}" + # Use the unstable suite for certain suite values. + if $(echo "experimental UNRELEASED" | grep -q $DIST); then + DIST="$UNSTABLE_CODENAME" + fi + # Use the stable suite for stable-backports. + if $(echo "$STABLE_BACKPORTS_SUITE" | grep -q $DIST); then + DIST="$STABLE_CODENAME" + fi +fi + +# Optionally set a default distribution if none is used. Note that you can set +# your own default (i.e. ${DIST:="unstable"}). +: ${DIST:="$(lsb_release --short --codename)"} + +# Optionally change Debian release states in $DIST to their names. +case "$DIST" in + unstable) + DIST="$UNSTABLE_CODENAME" + ;; + testing) + DIST="$TESTING_CODENAME" + ;; + stable) + DIST="$STABLE_CODENAME" + ;; +esac + +# Optionally set the architecture to the host architecture if none set. Note +# that you can set your own default (i.e. ${ARCH:="i386"}). +: ${ARCH:="$(dpkg --print-architecture)"} + +NAME="$DIST" +if [ -n "${ARCH}" ]; then + NAME="$NAME-$ARCH" + DEBOOTSTRAPOPTS=("--arch" "$ARCH" "${DEBOOTSTRAPOPTS[@]}") +fi +BASETGZ="/var/cache/pbuilder/$NAME-base.tgz" +# Optionally, set BASEPATH (and not BASETGZ) if using cowbuilder +# BASEPATH="/var/cache/pbuilder/$NAME/base.cow/" +DISTRIBUTION="$DIST" +BUILDRESULT="/var/cache/pbuilder/$NAME/result/" +APTCACHE="/var/cache/pbuilder/$NAME/aptcache/" +BUILDPLACE="/var/cache/pbuilder/build/" + +if $(echo ${DEBIAN_SUITES[@]} | grep -q $DIST); then + # Debian configuration + MIRRORSITE="http://$DEBIAN_MIRROR/debian/" + COMPONENTS="main contrib non-free" + DEBOOTSTRAPOPTS=("${DEBOOTSTRAPOPTS[@]}" "--keyring=/usr/share/keyrings/debian-archive-keyring.gpg") + +elif $(echo ${UBUNTU_SUITES[@]} | grep -q $DIST); then + # Ubuntu configuration + MIRRORSITE="http://$UBUNTU_MIRROR/ubuntu/" + COMPONENTS="main restricted universe multiverse" + DEBOOTSTRAPOPTS=("${DEBOOTSTRAPOPTS[@]}" "--keyring=/usr/share/keyrings/ubuntu-archive-keyring.gpg") +else + echo "Unknown distribution: $DIST" + exit 1 +fi + +# ccache +export CCACHE_DIR="/var/cache/pbuilder/ccache/$DIST" +export PATH="/usr/lib/ccache:${PATH}" +mkdir -p "${CCACHE_DIR}" +chown -R 1234:root "${CCACHE_DIR}" +chmod 755 "${CCACHE_DIR}" +EXTRAPACKAGES=ccache +BINDMOUNTS="${CCACHE_DIR}" diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..cc7512a --- /dev/null +++ b/.travis.yml @@ -0,0 +1,23 @@ +language: python +python: + - "2.7_with_system_site_packages" + +install: + - sudo apt-get install libblas-dev + - sudo apt-get install liblapack-dev + - sudo apt-get install gfortran + - sudo apt-get install -qq python-numpy python-scipy python-matplotlib + - pip install coveralls + - sudo python setup.py build + - sudo python setup.py install + +script: + - nosetests --with-coverage --cover-erase --cover-package=verif + +after_success: + - coveralls + +env: + - DIST=lucid + +os: linux diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..11fb23a --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2015 Weather Forecast Research Team +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the Weather Forecast Research Team nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE WEATHER FORECAST RESEARCH TEAM BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..be6655f --- /dev/null +++ b/README.rst @@ -0,0 +1,129 @@ +Forecast verification software +============================== + +.. image:: https://travis-ci.org/tnipen/verif.svg?branch=master + :target: https://travis-ci.org/tnipen/verif +.. image:: https://coveralls.io/repos/tnipen/verif/badge.svg?branch=master&service=github + :target: https://coveralls.io/github/tnipen/verif?branch=master + +This software computes verification statistics for weather forecasts at point locations. It can be used to +document the quality of one forecasting system but can also be used to compare different weather models and/or +different post-processing methods. + +The program works by parsing NetCDF files with observations and forecasts in a specific format (see "Input +files" below). + +verif is a command-line tool that can therefore be used to automatically create verification figures. + +Developed by Thomas Nipen, David Siuta, and Tim Chui. + +.. image:: image.jpg + :alt: Example plots + :width: 400 + :align: center + +Features +-------- + +* Deterministic metrics such as MAE, bias, RMSE +* Threshold-based metrics such as the equitable threat score (ETS) +* Probabilistic metrics such as brier score, PIT-histogram, reliability diagrams +* Plot statistics as a function of date, forecast horizon, station elevation, latitude, or longitude +* Show statistics on maps +* Export to text +* Options to adjust font sizes, label positions, tick marks, legends, etc +* Anomaly statistics (relative to a baseline like climatology) +* Output to png, jeg, eps, etc and specify image dimensions and DPI. + +For a full list run verif without arguments. + +Requirements +------------ + +* Python +* matplotlib +* numpy +* scipy + +Installation Instructions +------------------------- +To install, just execute: + +.. code-block:: bash + + python setup.py install + +verif will then be installed /usr/local/share/python/ or where ever your python modules are +installed (Look for "Installing verif script to " when installing). Be sure to add this directory +to your $PATH environment variable. + +Example +------- +.. code-block:: bash + +Fake data for testing the program is found in ./examples/. Use the following command to test: + +.. code-block:: bash + + verif examples/T_raw.nc examples/T_kf.nc -m mae + +Text-based input +---------------- +Two data formats are supported. A simple text format for deterministic forecasts has the following format: + +.. code-block:: bash + + date offset lat lon elev obs fcst + 20150101 0 49.2 -122.1 92 3.4 2.1 + 20150101 1 49.2 -122.1 92 4.7 4.2 + 20150101 0 50.3 -120.3 150 0.2 -1.2 + +The first line must describe the columns. The following attributes are recognized: date (in YYYYMMDD), offset (in hours), lat +(in degrees), lon (in degrees), obs (observations), fcst (deterministic forecast). obs and fcst are required and a value of 0 +is used for any missing column. The columns can be in any order. + +NetCDF input +------------ +For more advanced usage, the files must be in NetCDF and have dimensions and attributes as described below in the +example file. The format is still being decided but will be based on NetCDF/CF standard. + +.. code-block:: bash + + netcdf format { + dimensions : + date = UNLIMITED; + offset = 48; + station = 10; + ensemble = 21; + threshold = 11; + quantile = 11; + variables: + int id(station); + int offset(offset); + int date(date); + float threshold(threshold); + float quantile(quantile); + float lat(station); + float lon(station); + float elev(station); + float obs(date, offset, station); // Observations + float ens(date, offset, ensemble, station); // Ensemble forecast + float fcst(date, offset, station); // Deterministic forecast + float cdf(date, offset, threshold, station); // Accumulated prob at threshold + float pdf(date, offset, threshold, station); // Pdf at threshold + float x(date, offset, quantile, station); // Threshold corresponding to quantile + float pit(date, offset, station); // CDF for threshold=observation + + global attributes: + : name = "raw"; // Used as configuration name + : long_name = "Temperature"; // Used to label plots + : standard_name = "air_temperature_2m"; + : Units = "^oC"; + : Conventions = "verif_1.0.0"; + } + +Copyright and license +--------------------- + +Copyright © 2015 UBC Weather Forecast Research Team. verif is licensed under the 3-clause BSD license. See LICENSE +file. diff --git a/examples/T_kf_0.nc b/examples/T_kf_0.nc new file mode 100644 index 0000000..0f56b87 Binary files /dev/null and b/examples/T_kf_0.nc differ diff --git a/examples/T_raw_0.nc b/examples/T_raw_0.nc new file mode 100644 index 0000000..431f854 Binary files /dev/null and b/examples/T_raw_0.nc differ diff --git a/examples/example.txt b/examples/example.txt new file mode 100644 index 0000000..4760693 --- /dev/null +++ b/examples/example.txt @@ -0,0 +1,6 @@ +date offset lat lon elev obs fcst +20120101 0 1 1 1 12 13 +20120101 1 0 0 1 13 14 +20120101 22 0 0 2 5 4 +20120101 0 1 1 0 10 11 +20120101 3 2 2 1 2 3 diff --git a/image.jpg b/image.jpg new file mode 100644 index 0000000..769ee86 Binary files /dev/null and b/image.jpg differ diff --git a/makefile b/makefile new file mode 100644 index 0000000..c98b46b --- /dev/null +++ b/makefile @@ -0,0 +1,6 @@ +coverage: + #nosetests --with-coverage --cover-erase --cover-package=verif --cover-html --cover-branches + nosetests --with-coverage --cover-erase --cover-package=verif --cover-html + +test: + nosetests diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7c35da2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +scipy +numpy +matplotlib +coveralls diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b82b1d3 --- /dev/null +++ b/setup.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from setuptools import setup, find_packages +# To use a consistent encoding +from codecs import open +from os import path + +here = path.abspath(path.dirname(__file__)) + +# Get the long description from the relevant file +with open(path.join(here, 'README.rst'), encoding='utf-8') as f: + long_description = f.read() + +setup( + name='verif', + + # Versions should comply with PEP440. For a discussion on single-sourcing + # the version across setup.py and the project code, see + # https://packaging.python.org/en/latest/single_source_version.html + version='0.1.0', + + description='A verification program for meteorological forecasts and observations', + long_description=long_description, + + # The project's main homepage. + url='https://github.com/tnipen/verif', + + # Author details + author='Thomas Nipen', + author_email='tnipen@gmail.com', + + # Choose your license + license='MIT', + + # See https://pypi.python.org/pypi?%3Aaction=list_classifiers + classifiers=[ + # How mature is this project? Common values are + # 3 - Alpha + # 4 - Beta + # 5 - Production/Stable + 'Development Status :: 3 - Alpha', + + # Indicate who your project is intended for + 'Intended Audience :: Developers', + 'Topic :: Software Development :: Build Tools', + + # Pick your license as you wish (should match "license" above) + 'License :: OSI Approved :: BSD License', + + # Specify the Python versions you support here. In particular, ensure + # that you indicate whether you support Python 2, Python 3 or both. + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.2', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + ], + + # What does your project relate to? + keywords='meteorology verification weather', + + # You can just specify the packages manually here if your project is + # simple. Or you can use find_packages(). + packages=find_packages(exclude=['contrib', 'docs', 'tests*']), + + # List run-time dependencies here. These will be installed by pip when + # your project is installed. For an analysis of "install_requires" vs pip's + # requirements files see: + # https://packaging.python.org/en/latest/requirements.html + install_requires=['numpy', 'matplotlib', 'scipy', 'coveralls'], + + # List additional groups of dependencies here (e.g. development + # dependencies). You can install these using the following syntax, + # for example: + # $ pip install -e .[dev,test] + extras_require={ + # 'dev': ['check-manifest'], + 'test': ['coverage'], + # 'test': ['pytest'], + }, + + test_suite="tests", + + # If there are data files included in your packages that need to be + # installed, specify them here. If using Python 2.6 or less, then these + # have to be included in MANIFEST.in as well. + #package_data={ + # 'sample': ['package_data.dat'], + #}, + + # Although 'package_data' is the preferred approach, in some case you may + # need to place data files outside of your packages. See: + # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa + # In this case, 'data_file' will be installed into '/my_data' + #data_files=[('my_data', ['data/data_file'])], + + # To provide executable scripts, use entry points in preference to the + # "scripts" keyword. Entry points provide cross-platform support and allow + # pip to create the appropriate form of executable for the target platform. + entry_points={ + 'console_scripts': [ + 'verif=verif:main', + ], + }, +) diff --git a/tests/Metric_Deterministic_test.py b/tests/Metric_Deterministic_test.py new file mode 100644 index 0000000..d63719b --- /dev/null +++ b/tests/Metric_Deterministic_test.py @@ -0,0 +1,29 @@ +import unittest +import verif.Metric as Metric +import numpy as np + +class MyTest(unittest.TestCase): + def test_func(self): + obsSet = [[1,2,3], + [1,2], + [1]] + fcstSet = [[0,2,8], + [3,2], + [2]] + metrics = [Metric.Mae(), Metric.Medae(), Metric.Bias(), Metric.Ef()]; + # Metrics in the inner lists, datasets in the outer + expSet = [[2, 1, -4.0/3, 100.0/3], + [1, 1, -1, 50], + [1, 1, -1, 100]] + for s in range(0, len(obsSet)): + obs = obsSet[s] + fcst = fcstSet[s] + for i in range(0, len(metrics)): + metric = metrics[i] + expected = expSet[s][i] + value = metric.computeObsFcst(np.array(obs), np.array(fcst)) + message = metric.name() + " gives " + str(value) + " value for set " + str(s) + " (expected " + str(expected) + ")" + self.assertAlmostEqual(value, expected, msg=message) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/Metric_test.py b/tests/Metric_test.py new file mode 100644 index 0000000..05d1459 --- /dev/null +++ b/tests/Metric_test.py @@ -0,0 +1,18 @@ +import unittest +import verif.Metric as Metric +import numpy as np + +class MyTest(unittest.TestCase): + def test_func(self): + metric = Metric.Mae() + value = metric.computeObsFcst(np.array([2,1,2]),np.array([2,3,1])) + self.assertEqual(value, 1) + value = metric.computeObsFcst(np.array([-2]),np.array([2])) + self.assertEqual(value, 4) + value = metric.computeObsFcst(np.array([]),np.array([])) + self.assertTrue(np.isnan(value)) + # value = metric.computeObsFcst(np.array([2,np.nan,2]),np.array([2,3,1])) + # self.assertEqual(value, 0.5) + +if __name__ == '__main__': + unittest.main() diff --git a/verif/.gitignore b/verif/.gitignore new file mode 100644 index 0000000..a6d7ecd --- /dev/null +++ b/verif/.gitignore @@ -0,0 +1 @@ +temp/ diff --git a/verif/Common.py b/verif/Common.py new file mode 100644 index 0000000..52df16c --- /dev/null +++ b/verif/Common.py @@ -0,0 +1,162 @@ +import datetime +import numpy as np +import sys +from matplotlib.dates import * +from copy import deepcopy +import matplotlib.pyplot as mpl +def convertDates(dates): + numDates = len(dates) + dates2 = np.zeros([numDates], 'float') + for i in range(0, numDates): + year = int(dates[i] / 10000) + month = int(dates[i] / 100 % 100) + day = int(dates[i] % 100) + dates2[i] = date2num(datetime.datetime(year, month, day, 0)) + return dates2 + +def red(text): + return "\033[31m"+text+"\033[0m" + +def removeMargin(): + mpl.subplots_adjust(left=0, right=1, bottom=0, top=1, wspace=0,hspace=0) + +def green(text): + return "\033[32m"+text+"\033[0m" + +def yellow(text): + return "\033[33m"+text+"\033[0m" + +def experimental(): + return yellow("(experimental)") + +def error(message): + print "\033[1;31mError: " + message + "\033[0m" + sys.exit(1) + +def warning(message): + print "\033[1;33mWarning: " + message + "\033[0m" + +# allowable formats: +# num +# num1,num2,num3 +# start:end +# start:step:end +def parseNumbers(numbers): + # Check if valid string + if(any(char not in set('-01234567890.:,') for char in numbers)): + error("Could not translate '" + numbers + "' into numbers") + + values = list() + commaLists = numbers.split(',') + for commaList in commaLists: + colonList = commaList.split(':') + if(len(colonList) == 1): + values.append(float(colonList[0])) + elif(len(colonList) <= 3): + start = float(colonList[0]) + step = 1 + if(len(colonList) == 3): + step = float(colonList[1]) + end = float(colonList[-1]) + 0.0001 # arange does not include the end point + values = values + list(np.arange(start, end, step)) + else: + error("Could not translate '" + numbers + "' into numbers") + return values + +def testParseNumbers(): + print parseNumbers("1,2,3:5,6,7:2:20") + print parseNumbers("1") + +# Sets up subplot for index i (starts at 0) out of N +def subplot(i, N): + [X,Y] = getSubplotSize(N) + mpl.subplot(Y,X,i+1) + +def getSubplotSize(N): + Y = 1 + if(N > 4): + Y= np.ceil(np.sqrt(N)/1.5) + X = np.ceil(N / Y) + return [int(X),int(Y)] +def getMapResolution(lats, lons): + dlat = (max(lats) - min(lats)) + dlon = (max(lons) - min(lons)) + scale = max(dlat, dlon) + if(np.isnan(scale)): + res = "c" + elif(scale > 60): + res = "c" + elif(scale > 1): + res = "i" + elif(scale > 0.1): + res = "h" + elif(scale > 0.01): + res = "f" + else: + res = "c" + return res + +# Fill an area along x, between yLower and yUpper +# Both yLower and yUpper most correspond to points in x (i.e. be in the same order) +def fill(x, yLower, yUpper, col, alpha=1, zorder=0, hatch=''): + # This approach doesn't work, because it doesn't remove points with missing x or y + #X = np.hstack((x, x[::-1])) + #Y = np.hstack((yLower, yUpper[::-1])) + + # Populate a list of non-missing points + X = list() + Y = list() + for i in range(0,len(x)): + if(not( np.isnan(x[i]) or np.isnan(yLower[i]))): + X.append(x[i]) + Y.append(yLower[i]) + for i in range(len(x)-1, -1, -1): + if(not (np.isnan(x[i]) or np.isnan(yUpper[i]))): + X.append(x[i]) + Y.append(yUpper[i]) + if(len(X) > 0): + mpl.fill(X, Y, facecolor=col, alpha=alpha,linewidth=0, zorder=zorder, hatch=hatch) + +def clean(data): + data = data[:].astype(float) + q = deepcopy(data) + mask = np.where(q == -999); + q[mask] = np.nan + mask = np.where(q < -100000); + q[mask] = np.nan + mask = np.where(q > 1e30); + q[mask] = np.nan + return q + +# Date: YYYYMMDD diff: Add this many days +def getDate(date, diff): + year = int(date / 10000) + month = int(date / 100 % 100) + day = int(date % 100) + date2 = datetime.datetime(year, month, day, 0) + datetime.timedelta(diff) + return int(date2.strftime('%Y%m%d')) + +def nanmean(data, **args): + return np.ma.filled(np.ma.masked_array(data,np.isnan(data)).mean(**args), + fill_value=np.nan) +def nanmedian(data, **args): + I = np.where(np.isnan(data.flatten()) == 0)[0] + return np.median(data.flatten()[I]) +def nanmin(data, **args): + return np.ma.filled(np.ma.masked_array(data,np.isnan(data)).min(**args), + fill_value=np.nan) +def nanmax(data, **args): + return np.ma.filled(np.ma.masked_array(data,np.isnan(data)).max(**args), + fill_value=np.nan) +def nanstd(data, **args): + return np.ma.filled(np.ma.masked_array(data,np.isnan(data)).std(**args), + fill_value=np.nan) +def nanpercentile(data, pers): + I = np.where(np.isnan(data.flatten()) == 0)[0] + p = np.percentile(data.flatten()[I], pers) + return p + #return np.ma.filled(np.ma.masked_array(data,np.isnan(data)).percentile(pers), + # fill_value=np.nan) + +def intersect(list1, list2): + return list(set(list1) & set(list2)) diff --git a/verif/Data.py b/verif/Data.py new file mode 100644 index 0000000..e5fb831 --- /dev/null +++ b/verif/Data.py @@ -0,0 +1,536 @@ +from scipy import io +import numpy as np +import verif.Common as Common +import re +import sys +import os +import verif.Input as Input +from matplotlib.dates import * +from matplotlib.ticker import ScalarFormatter + +# Access verification data from a set of COMPS NetCDF files +# Only returns data that is available for all files, for fair comparisons +# i.e if some dates/offsets/locations are missing +# +# filenames: COMPS NetCDF verification files +# dates: Only allow these dates +# offsets: Only allow these offsets +# locations: Only allow these locationIds +# clim: Use this NetCDF file to compute anomaly. Should therefore be a climatological +# forecast. Subtract/divide the forecasts from this file from all forecasts and +# observations from the other files. +# climType: 'subtract', or 'divide' the climatology +# training: Remove the first 'training' days of data (to allow the forecasts to train its +# adaptive parameters) +class Data: + def __init__(self, filenames, dates=None, offsets=None, locations=None, latlonRange=None, elevRange=None, clim=None, + climType="subtract", training=None): + if(not isinstance(filenames, list)): + filenames = [filenames] + self._axis = "date" + self._index = 0 + + # Organize files + self._files = list() + self._cache = list() + self._clim = None + for filename in filenames: + if(not os.path.exists(filename)): + Common.error("File '" + filename + "' is not a valid input file") + # file = io.netcdf.netcdf_file(filename, 'r') + try: + file = Input.Comps(filename) + except: + file = Input.Text(filename) + self._files.append(file) + self._cache.append(dict()) + if(clim != None): + self._clim = io.netcdf.netcdf_file(clim, 'r') + self._cache.append(dict()) + if(not (climType == "subtract" or climType == "divide")): + Common.error("Data: climType must be 'subtract' or 'divide") + self._climType = climType + + # Climatology file + self._files = self._files + [self._clim] + + # Latitude-Longitude range + if(latlonRange != None): + lat = self._files[0].getLats() + lon = self._files[0].getLons() + locId = self._files[0].getStationIds() + latlonLocations = list() + minLon = latlonRange[0] + maxLon = latlonRange[1] + minLat = latlonRange[2] + maxLat = latlonRange[3] + for i in range(0,len(lat)): + currLat = float(lat[i]) + currLon = float(lon[i]) + if(currLat >= minLat and currLat <= maxLat and currLon >= minLon and currLon <= maxLon): + latlonLocations.append(locId[i]) + useLocations = list() + if(locations != None): + for i in range(0, len(locations)): + currLocation = locations[i] + if(currLocation in latlonLocations): + useLocations.append(currLocation) + else: + useLocations = latlonLocations + if(len(useLocations) == 0): + Common.error("No available locations within lat/lon range") + else: + useLocations = locations + + # Elevation range + if(elevRange != None): + lat = self._files[0].getElevs() + locId = self._files[0].getStationIds() + elevLocations = list() + minElev = elevRange[0] + maxElev = elevRange[1] + for i in range(0,len(elev)): + currElev = float(elev[i]) + if(currElev >= minElev and currElev <= maxElev): + elevLocations.append(locId[i]) + useLocations = Common.intersect(useLocations, elevLocations) + if(len(useLocations) == 0): + Common.error("No available locations within elevation range") + + # Find common indicies + self._datesI = Data._getCommonIndices(self._files, "Date", dates) + self._offsetsI = Data._getCommonIndices(self._files, "Offset", offsets) + self._locationsI = Data._getCommonIndices(self._files, "Location", useLocations) + if(len(self._datesI[0]) == 0): + Common.error("No valid dates selected") + if(len(self._offsetsI[0]) == 0): + Common.error("No valid offsets selected") + if(len(self._locationsI[0]) == 0): + Common.error("No valid locations selected") + + # Training + if(training != None): + for f in range(0, len(self._datesI)): + if(len(self._datesI[f]) <= training): + Common.error("Training period too long for " + self.getFilenames()[f] + \ + ". Max training period is " + str(len(self._datesI[f])-1) + ".") + self._datesI[f] = self._datesI[f][training:] + + self._findex = 0 + + # Returns flattened arrays along the set axis/index + def getScores(self, metrics): + if(not isinstance(metrics, list)): + metrics = [metrics] + data = dict() + valid = None + axis = self._getAxisIndex(self._axis) + + # Compute climatology, if needed + doClim = self._clim != None and ("obs" in metrics or "fcst" in metrics) + if(doClim): + temp = self._getScore("fcst", len(self._files)-1) + if(self._axis == "date"): + clim = temp[self._index,:,:].flatten() + elif(self._axis == "offset"): + clim = temp[:,self._index,:].flatten() + elif(self.isLocationAxis(self._axis)): + clim = temp[:,:,self._index].flatten() + elif(self._axis == "none" or self._axis == "threshold"): + clim = temp.flatten() + elif(self._axis == "all"): + clim = temp + else: + clim = 0 + + for i in range(0, len(metrics)): + metric = metrics[i] + temp = self._getScore(metric) + #print self._axis + + if(self._axis == "date"): + data[metric] = temp[self._index,:,:].flatten() + elif(self._axis == "offset"): + data[metric] = temp[:,self._index,:].flatten() + elif(self.isLocationAxis(self._axis)): + data[metric] = temp[:,:,self._index].flatten() + elif(self._axis == "none" or self._axis == "threshold"): + data[metric] = temp.flatten() + elif(self._axis == "all"): + data[metric] = temp + else: + Common.error("Data.py: unrecognized value of self._axis: " + self._axis) + + # Subtract climatology + if(doClim and (metric == "fcst" or metric == "obs")): + if(self._climType == "subtract"): + data[metric] = data[metric] - clim + else: + data[metric] = data[metric] / clim + + # Remove missing values + if(self._axis != "all"): + currValid = (np.isnan(data[metric]) == 0) & (np.isinf(data[metric]) == 0) + if(valid == None): + valid = currValid + else: + valid = (valid & currValid) + if(self._axis != "all"): + I = np.where(valid) + + q = list() + for i in range(0, len(metrics)): + if(self._axis != "all"): + q.append(data[metrics[i]][I]) + else: + q.append(data[metrics[i]]) + + # No valid data + if(q[0].shape[0] == 0): + for i in range(0, len(metrics)): + q[i] = np.nan*np.zeros([1], 'float') + + return q + + # Find indicies of elements that are present in all files + # Merge in values in 'aux' as well + @staticmethod + def _getCommonIndices(files, name, aux=None): + # Find common values among all files + values = aux + for file in files: + if(name == "Date"): + temp = file.getDates() + elif(name == "Offset"): + temp = file.getOffsets() + elif(name == "Location"): + stations = file.getStations() + temp = np.zeros(len(stations)) + for i in range(0,len(stations)): + temp[i] = stations[i].id() + if(values == None): + values = temp + else: + values = np.intersect1d(values, temp) + # Sort values, since for example, dates may not be in an ascending order + values = np.sort(values) + + # Determine which index each value is at + indices = list() + for file in files: + if(name == "Date"): + temp = file.getDates() + elif(name == "Offset"): + temp = file.getOffsets() + elif(name == "Location"): + stations = file.getStations() + temp = np.zeros(len(stations)) + for i in range(0,len(stations)): + temp[i] = stations[i].id() + I = np.where(np.in1d(temp, values))[0] + II = np.zeros(len(I), 'int') + for i in range(0,len(I)): + II[i] = np.where(values[i] == temp)[0] + + indices.append(II) + return indices + + def _getFiles(self): + if(self._clim == None): + return self._files + else: + return self._files[0:-1] + + def getMetrics(self): + metrics = None + for file in self._files: + currMetrics = file.getMetrics() + if(metrics == None): + metrics = currMetrics + else: + metrics = set(metrics) & set(currMetrics) + + return metrics + + def _getIndices(self, axis, findex=None): + if(axis == "date"): + I = self._getDateIndices(findex) + elif(axis == "offset"): + I = self._getOffsetIndices(findex) + elif(axis == "location"): + I = self._getLocationIndices(findex) + else: + Common.error(axis) + return I + def _getDateIndices(self, findex=None): + if(findex == None): + findex = self._findex + return self._datesI[findex] + + def _getOffsetIndices(self, findex=None): + if(findex == None): + findex = self._findex + return self._offsetsI[findex] + + def _getLocationIndices(self, findex=None): + if(findex == None): + findex = self._findex + return self._locationsI[findex] + + def _getScore(self, metric, findex=None): + if(findex == None): + findex = self._findex + + if(metric in self._cache[findex]): + return self._cache[findex][metric] + + # Load all files + for f in range(0, self.getNumFilesWithClim()): + if(not metric in self._cache[f]): + file = self._files[f] + if(not metric in file.getVariables()): + Common.error("Variable '" + metric + "' does not exist in " + + self.getFilenames()[f]) + temp = file.getScores(metric) + dims = file.getDims(metric) + temp = Common.clean(temp) + for i in range(0, len(dims)): + I = self._getIndices(dims[i].lower(), f) + if(i == 0): + temp = temp[I,Ellipsis] + if(i == 1): + temp = temp[:,I,Ellipsis] + if(i == 2): + temp = temp[:,:,I,Ellipsis] + self._cache[f][metric] = temp + + # Remove missing + # If one configuration has a missing value, set all configurations to missing + # This can happen when the dates are available, but have missing values + isMissing = np.isnan(self._cache[0][metric]) + for f in range(1, self.getNumFilesWithClim()): + isMissing = isMissing | (np.isnan(self._cache[f][metric])) + for f in range(0, self.getNumFilesWithClim()): + self._cache[f][metric][isMissing] = np.nan + + return self._cache[findex][metric] + + # Checks that all files have the variable + def hasMetric(self, metric): + for f in range(0, self.getNumFilesWithClim()): + if(not metric in self._files[f].getVariables()): + return False + return True + + def setAxis(self, axis): + self._index = 0 # Reset index + self._axis = axis + def setIndex(self, index): + self._index = index + def setFileIndex(self, index): + self._findex = index + def getNumFiles(self): + return len(self._files) - (self._clim != None) + def getNumFilesWithClim(self): + return len(self._files) + + def getUnits(self): + # TODO: Only check first file? + return self._files[0].getUnits() + + def isLocationAxis(self, axis): + if(axis == None): + return False + prog = re.compile("location.*") + return prog.match(axis) + + def getAxisSize(self, axis=None): + if(axis == None): + axis = self._axis + return len(self.getAxisValues(axis)) + + # What values represent this axis? + def getAxisValues(self, axis=None): + if(axis == None): + axis = self._axis + if(axis == "date"): + return Common.convertDates(self._getScore("Date").astype(int)) + elif(axis == "offset"): + return self._getScore("Offset").astype(int) + elif(axis == "none"): + return [0] + elif(self.isLocationAxis(axis)): + if(axis == "location"): + data = range(0, len(self._getScore("Location"))) + elif(axis == "locationId"): + data = self._getScore("Location").astype(int) + elif(axis == "locationElev"): + data = self._getScore("Elev") + elif(axis == "locationLat"): + data = self._getScore("Lat") + elif(axis == "locationLon"): + data = self._getScore("Lon") + else: + Common.error("Data.getAxisValues has a bad axis name: " + axis) + return data + else: + return [0] + def isAxisContinuous(self, axis=None): + if(axis == None): + axis = self._axis + return axis in ["date", "offset", "threshold"] + + def getAxisFormatter(self, axis=None): + if(axis == None): + axis = self._axis + if(axis == "date"): + return DateFormatter('\n%Y-%m-%d') + else: + return ScalarFormatter() + + + # filename including path + def getFullFilenames(self): + names = list() + files = self._getFiles() + for i in range(0, len(files)): + names.append(files[i].getFilename()) + return names + def getFilename(self, findex=None): + if(findex == None): + findex = self._findex + return self.getFilenames()[findex] + + # Do not include the path + def getFilenames(self): + names = self.getFullFilenames() + for i in range(0, len(names)): + I = names[i].rfind('/') + names[i] = names[i][I+1:] + return names + def getShortNames(self): + names = self.getFilenames() + for i in range(0, len(names)): + I = names[i].rfind('.') + names[i] = names[i][:I] + return names + + def getAxis(self, axis=None): + if(axis == None): + axis = self._axis + return axis + + def getVariable(self): + return self._files[0].getVariable() + + def getVariableAndUnits(self): + return self.getVariable() + " (" + self.getUnits() + ")" + + def getX0(self): + x0 = None + prog = re.compile("Precip.*") + if(prog.match(self.getVariable())): + x0 = 0 + return x0 + + def getX1(self): + x1 = None + prog = re.compile("RH") + if(prog.match(self.getVariable())): + x1 = 100 + return x1 + + def getAxisLabel(self, axis=None): + if(axis == None): + axis = self._axis + if(axis == "date"): + return "Date" + elif(axis == "offset"): + return "Offset (h)" + elif(axis == "locationElev"): + return "Elevation (m)" + elif(axis == "locationLat"): + return "Latitude ($^o$)" + elif(axis == "locationLon"): + return "Longitude ($^o$)" + elif(axis == "threshold"): + return self.getVariableAndUnits() + + def getLats(self): + return self._getScore("Lat") + + def getLons(self): + return self._getScore("Lon") + + def getElevs(self): + return self._getScore("Elev") + + def getLocationIds(self): + return self._getScore("Location") + + def getAxisDescriptions(self, axis=None): + if(axis == None): + axis = self._axis + prog = re.compile("location.*") + if(prog.match(axis)): + descs = list() + ids = self._getScore("Location") + lats = self._getScore("Lat") + lons = self._getScore("Lon") + elevs = self._getScore("Elev") + for i in range(0, len(ids)): + string = "%6d %5.2f %5.2f %5.0f" % (ids[i],lats[i], lons[i], elevs[i]) + descs.append(string) + return descs + if(axis == "date"): + values = self.getAxisValues(axis) + values = num2date(values) + dates = list() + for i in range(0, len(values)): + dates = dates + [values[i].strftime("%Y/%m/%d")] + return dates + else: + return self.getAxisValues(axis) + + def getAxisDescriptionHeader(self, axis=None): + if(axis == None): + axis = self._axis + prog = re.compile("location.*") + if(prog.match(axis)): + return "%6s %5s %5s %5s" % ("id", "lat", "lon", "elev") + else: + return axis + + def _getAxisIndex(self, axis): + if(axis == "date"): + return 0 + elif(axis == "offset"): + return 1 + elif(axis == "location" or axis == "locationId" or axis == "locationElev" or axis == "locationLat" or axis == "locationLon"): + return 2 + else: + return None + + def getPvar(self, threshold): + minus = "" + if(threshold < 0): + # Negative thresholds + minus = "m" + if(abs(threshold - int(threshold)) > 0.01): + var = "p" + minus + str(abs(threshold)).replace(".", "") + else: + var = "p" + minus + str(int(abs(threshold))) + return var + + def getQvar(self, quantile): + quantile = quantile * 100 + minus = "" + if(abs(quantile - int(quantile)) > 0.01): + var = "q" + minus + str(abs(quantile)).replace(".", "") + else: + var = "q" + minus + str(int(abs(quantile))) + + if(not self.hasMetric(var) and quantile == 50): + Common.warning("Could not find q50, using fcst instead") + return "fcst" + return var diff --git a/verif/Input.py b/verif/Input.py new file mode 100644 index 0000000..ca69af1 --- /dev/null +++ b/verif/Input.py @@ -0,0 +1,252 @@ +from scipy import io +import numpy as np +import verif.Station as Station +import verif.Common as Common + +# Abstract base class representing verification data +class Input: + def __init__(self, filename): + self._filename = filename + def getName(self): + pass + def getFilename(self): + return self.getName() + def getDates(self): + pass + def getStations(self): + pass + def getOffsets(self): + pass + def getThresholds(self): + pass + def getQuantiles(self): + pass + def getObs(self): + pass + def getDeterministic(self): + pass + def getOther(self, name): + pass + def getMetrics(self): + pass + def getFilename(self): + return self._filename + def getVariables(self): + pass + def getUnits(self): + pass + def getLats(self): + stations = self.getStations() + lats = np.zeros(len(stations)) + for i in range(0, len(stations)): + lats[i] = stations[i].lat() + return lats + def getLons(self): + stations = self.getStations() + lons = np.zeros(len(stations)) + for i in range(0, len(stations)): + lons[i] = stations[i].lon() + return lons + def getStationIds(self): + stations = self.getStations() + ids = np.zeros(len(stations)) + for i in range(0, len(stations)): + ids[i] = stations[i].id() + return ids + +# Original fileformat used by OutputVerif in COMPS +class Comps(Input): + def __init__(self, filename): + Input.__init__(self, filename) + self._file = io.netcdf.netcdf_file(filename, 'r') + def getName(self): + return self._file.variables + def getStations(self): + lat = Common.clean(self._file.variables["Lat"]) + lon = Common.clean(self._file.variables["Lon"]) + id = Common.clean(self._file.variables["Location"]) + elev = Common.clean(self._file.variables["Elev"]) + stations = list() + for i in range(0, lat.shape[0]): + station = Station.Station(id[i], lat[i], lon[i], elev[i]) + stations.append(station) + return stations + def getScores(self, metric): + temp = Common.clean(self._file.variables[metric]) + return temp + def getDims(self, metric): + return self._file.variables[metric].dimensions + def getDates(self): + return Common.clean(self._file.variables["Date"]) + def getOffsets(self): + return Common.clean(self._file.variables["Offset"]) + def getMetrics(self): + metrics = list() + for (metric, v) in self._file.variables.iteritems(): + if(not metric in ["Date", "Offset", "Location", "Lat", "Lon", "Elev"]): + metrics.append(metric) + return metrics + def getVariables(self): + metrics = list() + for (metric, v) in self._file.variables.iteritems(): + metrics.append(metric) + return metrics + def getUnits(self): + if(hasattr(self._file, "Units")): + if(self._file.Units == ""): + return "No units" + elif(self._file.Units == "%"): + return "%" + else: + return "$" + self._file.Units + "$" + else: + return "No units" + def getVariable(self): + return self._file.Variable + +# New standard format, based on NetCDF/CF +class NetcdfCf(Input): + def __init__(self, filename): + pass + +# Flat text file format +class Text(Input): + def __init__(self, filename): + import csv + Input.__init__(self, filename) + file = open(filename, 'r') + reader = csv.reader(file, delimiter=' ') + + self._dates = set() + self._offsets = set() + self._stations = set() + obs = dict() + fcst = dict() + indices = dict() + header = None + + # Default values if columns not available + offset = 0 + date = 0 + lat = 0 + lon = 0 + elev = 0 + + # Read the data into dictionary with (date,offset,lat,lon,elev) as key and obs/fcst as values + for row in reader: + if(header == None): + # Parse the header so we know what each column represents + header = row + for i in range(0, len(header)): + att = header[i] + if(att == "date"): + indices["date"] = i + elif(att == "offset"): + indices["offset"] = i + elif(att == "lat"): + indices["lat"] = i + elif(att == "lon"): + indices["lon"] = i + elif(att == "elev"): + indices["elev"] = i + elif(att == "obs"): + indices["obs"] = i + elif(att == "fcst"): + indices["fcst"] = i + + # Ensure we have required columns + requiredColumns = ["obs", "fcst"] + for col in requiredColumns: + if(not indices.has_key(col)): + msg = "Could not parse %s: Missing column '%s'" % (filename, col) + Common.error(msg) + else: + if(indices.has_key("date")): + date = float(row[indices["date"]]) + self._dates.add(date) + if(indices.has_key("offset")): + offset = float(row[indices["offset"]]) + self._offsets.add(offset) + if(indices.has_key("lat")): + lat = float(row[indices["lat"]]) + if(indices.has_key("lon")): + lon = float(row[indices["lon"]]) + if(indices.has_key("elev")): + elev = float(row[indices["elev"]]) + station = Station.Station(0, lat, lon, elev) + self._stations.add(station) + obs[(date,offset,lat,lon,elev)] = float(row[indices["obs"]]) + fcst[(date,offset,lat,lon,elev)] = float(row[indices["fcst"]]) + file.close() + self._dates = list(self._dates) + self._offsets = list(self._offsets) + self._stations = list(self._stations) + Ndates = len(self._dates) + Noffsets = len(self._offsets) + Nlocations = len(self._stations) + + # Put the dictionary data into a regular 3D array + self._obs = np.zeros([Ndates, Noffsets, Nlocations], 'float') * np.nan + self._fcst = np.zeros([Ndates, Noffsets, Nlocations], 'float') * np.nan + for d in range(0,len(self._dates)): + date = self._dates[d] + for o in range(0, len(self._offsets)): + offset = self._offsets[o] + for s in range(0, len(self._stations)): + station = self._stations[s] + lat = station.lat() + lon = station.lon() + elev = station.elev() + key = (date,offset,lat,lon,elev) + if(obs.has_key(key)): + self._obs[d][o][s] = obs[key] + if(fcst.has_key(key)): + self._fcst[d][o][s] = fcst[key] + + counter = 0 + for station in self._stations: + station.id(counter) + counter = counter + 1 + self._dates = np.array(self._dates) + self._offsets = np.array(self._offsets) + def getName(self): + return "Unknown" + def getStations(self): + return self._stations + def getScores(self, metric): + if(metric == "obs"): + return self._obs + elif(metric == "fcst"): + return self._fcst + elif(metric == "Offset"): + return self._offsets + else: + Common.error("Cannot find " + metric) + def getDims(self, metric): + if(metric == "obs" or metric == "fcst"): + return ["Date", "Offset", "Location"] + else: + return [metric] + def getDates(self): + return self._dates + def getOffsets(self): + return self._offsets + def getMetrics(self): + metrics = ["obs", "fcst"] + return metrics + def getVariables(self): + metrics = ["obs", "fcst", "Date", "Offset", "Location", "Lat", "Lon", "Elev"] + return metrics + def getUnits(self): + return "Unknown units" + def getVariable(self): + return "Unknown" + +class Fake(Input): + def __init__(self, obs, fcst): + self._obs = obs + self._fcst = fcst + def getObs(self): + return self._obs + def getMean(self): + return self._fcst diff --git a/verif/Metric.py b/verif/Metric.py new file mode 100644 index 0000000..65c27e7 --- /dev/null +++ b/verif/Metric.py @@ -0,0 +1,923 @@ +import numpy as np +import verif.Common as Common +import sys +import inspect +def getAllMetrics(): + temp = inspect.getmembers(sys.modules[__name__], inspect.isclass) + return temp + +# Computes scores for each xaxis value +class Metric: + # Overload these variables + _description = "" # Overwrite this to let the metric show up in the documentation of ./verif + _min = None # Minimum value this metric can produce + _max = None # Maximum value this mertic can produce + _defaultAxis = "offset" # If no axis is specified, use this axis as default + _defaultBinType = None + _reqThreshold = False # Does this metric require thresholds? + _supThreshold = False # Does this metric support thresholds? + _experimental = False # Is this metric not fully tested yet? + _perfectScore = None + + # Compute the score + # data: use getScores([metric1, metric2...]) to get data + # data has already been configured to only retrieve data along a certain dimension + # tRange: [lowerThreshold, upperThreshold] + def compute(self, data, tRange): + #assert(isinstance(tRange, list)) + #assert(len(tRange) == 2) + size = data.getAxisSize() + scores = np.zeros(size, 'float') + # Loop over x-axis + for i in range(0,size): + data.setIndex(i) + x = self.computeCore(data, tRange) + scores[i] = x + return scores + + # Implement this + def computeCore(self, data, tRange): + Common.error("Metric '" + self.getClassName() + "' has not been implemented yet") + + @classmethod + def description(cls): + return cls._description + + + @classmethod + def summary(cls): + desc = cls.description() + if(desc == ""): + return "" + extra = "" + if(cls._experimental): + extra = " " + Common.experimental() + "." + if(cls._perfectScore != None): + extra = extra + " " + "Perfect score " + str(cls._perfectScore) + "." + return desc + "." + extra + + # Does this metric require thresholds in order to be computable? + @classmethod + def requiresThresholds(cls): + return cls._reqThreshold + + # If this metric is to be plotted, along which axis should it be plotted by default? + @classmethod + def defaultAxis(cls): + return cls._defaultAxis + + @classmethod + def defaultBinType(cls): + return cls._defaultBinType + + @classmethod + def perfectScore(cls): + return cls._perfectScore + + # Does it make sense to use '-x threshold' with this metric? + @classmethod + def supportsThreshold(cls): + return cls._supThreshold + + # Minimum value the metric can take on + @classmethod + def min(cls): + return cls._min + + # Maximum value the metric can take on + @classmethod + def max(cls): + return cls._max + + def getClassName(self): + name = self.__class__.__name__ + return name + def label(self, data): + return self.name() + " (" + data.getUnits() + ")" + def name(self): + return self.getClassName() + +class Default(Metric): + def __init__(self, name): + self._name = name + def compute(self, data, tRange): + return data.getScores(self._name) + def name(self): + return self._name + +class Mean(Metric): + def __init__(self, metric): + self._metric = metric + def computeCore(self, data, tRange): + return np.mean(self._metric.compute(data, tRange)) + def name(self): + return "Mean of " + self._metric.name() + +class Median(Metric): + def __init__(self, metric): + self._metric = metric + def computeCore(self, data, tRange): + return np.median(self._metric.compute(data, tRange)) + def name(self): + return "Median of " + self._metric.name() + +class Max(Metric): + def __init__(self, metric): + self._metric = metric + def computeCore(self, data, tRange): + return np.max(self._metric.compute(data, tRange)) + def name(self): + return "Max of " + self._metric.name() + +class Min(Metric): + def __init__(self, metric): + self._metric = metric + def computeCore(self, data, tRange): + return np.min(self._metric.compute(data, tRange)) + def name(self): + return "Min of " + self._metric.name() + +class Deterministic(Metric): + def computeCore(self, data, tRange): + [obs, fcst] = data.getScores(["obs", "fcst"]) + return self.computeObsFcst(obs,fcst) + +class Mae(Deterministic): + _min = 0 + _description = "Mean absolute error" + _perfectScore = 0 + def computeObsFcst(self, obs, fcst): + return np.mean(abs(obs - fcst)) + def name(self): + return "MAE" + +class Medae(Deterministic): + _min = 0 + _description = "Median absolute error" + _perfectScore = 0 + def computeObsFcst(self, obs, fcst): + return np.median(abs(obs - fcst)) + def name(self): + return "MedianAE" + +class Bias(Deterministic): + _description = "Bias" + _perfectScore = 0 + def computeObsFcst(self, obs, fcst): + return np.mean(obs - fcst) + +class Ef(Deterministic): + _description = "Exeedance fraction: percentage of times that forecasts > observations" + _min = 0 + _max = 100 + _perfectScore = 50 + def computeObsFcst(self, obs, fcst): + Nfcst = np.sum(obs < fcst) + return Nfcst / 1.0 / len(fcst) * 100 + def label(self, data): + return "% times fcst > obs" + +class Extreme(Metric): + def calc(self, data, func, variable): + [value] = data.getScores([variable]) + if(len(value) == 0): + return np.nan + return func(value) + +class MaxObs(Extreme): + _description = "Maximum observed value" + def computeCore(self, data, tRange): + return self.calc(data, np.max, "obs") + +class MinObs(Extreme): + _description = "Minimum observed value" + def computeCore(self, data, tRange): + return self.calc(data, np.min, "obs") + +class MaxFcst(Extreme): + _description = "Maximum forecasted value" + def computeCore(self, data, tRange): + return self.calc(data, np.max, "fcst") + +class MinFcst(Extreme): + _description = "Minimum forecasted value" + def computeCore(self, data, tRange): + return self.calc(data, np.min, "fcst") + +class StdError(Deterministic): + _min = 0 + _description = "Standard error (i.e. RMSE if forecast had no bias)" + _perfectScore = 0 + def computeObsFcst(self, obs, fcst): + bias = np.mean(obs - fcst) + return np.mean((obs - fcst - bias)**2)**0.5 + def name(self): + return "Standard error" + +class Std(Deterministic): + _min = 0 + _description = "Standard deviation of forecast" + def computeObsFcst(self, obs, fcst): + return np.std(fcst) + def label(self, data): + return "STD of forecasts (" + data.getUnits() + ")" + +# Returns all PIT values +class Pit(Metric): + _min = 0 + _max = 1 + def __init__(self, name="pit"): + self._name = name + def label(self, data): + return "PIT" + def compute(self, data, tRange): + x0 = data.getX0() + x1 = data.getX1() + if(x0 == None and x1 == None): + [pit] = data.getScores([self._name]) + else: + [obs,pit] = data.getScores(["obs", self._name]) + if(x0 != None): + I = np.where(obs == x0)[0] + pit[I] = np.random.rand(len(I))*pit[I] + if(x1 != None): + I = np.where(obs == x1)[0] + pit[I] = 1 - np.random.rand(len(I))*(1-pit[I]) + #I = np.where((fcst > 2) & (fcst < 2000))[0] + #I = np.where((fcst > 20))[0] + #pit = pit[I] + return pit + def name(self): + return "PIT" + +# Returns all PIT values +class PitDev(Metric): + _min = 0 + #_max = 1 + _perfectScore = 1 + _description = "Deviation of the PIT histogram" + def __init__(self, numBins=11): + self._metric = Pit() + self._bins = np.linspace(0,1,numBins) + def label(self, data): + return "PIT histogram deviation" + def computeCore(self, data, tRange): + pit = self._metric.compute(data, tRange) + I = np.where(np.isnan(pit) == 0)[0] + pit = pit[np.isnan(pit) == 0] + + nb = len(self._bins)-1 + D = self.deviation(pit, nb) + D0 = self.expectedDeviation(pit, nb) + dev = D/D0 + return dev + + def name(self): + return "PIT deviation factor" + @staticmethod + def expectedDeviation(values, numBins): + if(len(values) == 0 or numBins == 0): + return np.nan + return np.sqrt((1.0 - 1.0 / numBins) / (len(values) * numBins)) + @staticmethod + def deviation(values, numBins): + if(len(values) == 0 or numBins == 0): + return np.nan + x = np.linspace(0,1,numBins+1) + n = np.histogram(values, x)[0] + n = n * 1.0 / sum(n) + return np.sqrt(1.0 / numBins * np.sum((n - 1.0 / numBins)**2)) + @staticmethod + def deviationStd(values, numBins): + if(len(values) == 0 or numBins == 0): + return np.nan + n = len(values) + p = 1.0 / numBins + numPerBinStd = np.sqrt(n * p*(1-p)) + std = numPerBinStd/n + return std + # What reduction in ignorance is possible by calibrating the PIT-histogram? + @staticmethod + def ignorancePotential(values, numBins): + if(len(values) == 0 or numBins == 0): + return np.nan + x = np.linspace(0,1,numBins+1) + n = np.histogram(values, x)[0] + n = n * 1.0 / sum(n) + expected = 1.0 / numBins + ign = np.sum(n*np.log2(n/expected))/sum(n) + return ign + +class MarginalRatio(Metric): + _min = 0 + _description = "Ratio of marginal probability of obs to marginal probability of fcst. Use -r" + _perfectScore = 1 + _reqThreshold = True + _supThreshold = True + _defaultAxis = "threshold" + _experimental = True + def computeCore(self, data, tRange): + if(np.isinf(tRange[0])): + pvar = data.getPvar(tRange[1]) + [obs,p1] = data.getScores(["obs",pvar]) + p0 = 0*p1 + elif(np.isinf(tRange[1])): + pvar = data.getPvar(tRange[0]) + [obs,p0] = data.getScores(["obs",pvar]) + p1 = 0*p0+1 + else: + pvar0 = data.getPvar(tRange[0]) + pvar1 = data.getPvar(tRange[1]) + [obs,p0,p1] = data.getScores(["obs",pvar0,pvar1]) + obs = Threshold.within(obs, tRange) + p = p1-p0 + if(np.mean(p) == 0): + return np.nan + return np.mean(obs)/np.mean(p) + def label(self, data): + return "Ratio of marginal probs: Pobs/Pfcst" + +class SpreadSkillDiff(Metric): + _description = "Difference between spread and skill in %" + _perfectScore = 1 + def computeCore(self, data, tRange): + import scipy.stats + [obs,fcst,spread] = data.getScores(["obs", "fcst", "spread"]) + if(len(obs) <= 1): + return np.nan + rmse = np.sqrt(np.mean((obs-fcst)**2)) + spread = np.mean(spread)/2.563103 + return 100 * (spread / rmse - 1) + def name(self): + return "Spread-skill difference" + def label(self, data): + return "Spread-skill difference (%)" + + +class Rmse(Deterministic): + _min = 0 + _description = "Root mean squared error" + _perfectScore = 0 + def computeObsFcst(self, obs, fcst): + return np.mean((obs - fcst)**2)**0.5 + def name(self): + return "RMSE" + +class Rmsf(Deterministic): + _min = 0 + _description = "Root mean squared factor" + _perfectScore = 1 + def computeObsFcst(self, obs, fcst): + return np.exp(np.mean((np.log(fcst/obs))**2)**0.5) + def name(self): + return "RMSE" + +class Crmse(Deterministic): + _min = 0 + _description = "Centered root mean squared error (RMSE without bias)" + _perfectScore = 0 + def computeObsFcst(self, obs, fcst): + bias = np.mean(obs)-np.mean(fcst) + return np.mean((obs - fcst - bias)**2)**0.5 + def name(self): + return "CRMSE" + + +class Cmae(Deterministic): + _min = 0 + _description = "Cube-root mean absolute cubic error" + _perfectScore = 0 + def computeObsFcst(self, obs, fcst): + return (np.mean(abs(obs**3 - fcst**3)))**(1.0/3) + def name(self): + return "CMAE" + +class Dmb(Deterministic): + _description = "Degree of mass balance (obs/fcst)" + _perfectScore = 1 + def computeObsFcst(self, obs, fcst): + return np.mean(obs)/np.mean(fcst) + def name(self): + return "Degree of mass balance (obs/fcst)" + +class Num(Deterministic): + _description = "Number of valid forecasts" + def computeObsFcst(self, obs, fcst): + [fcst] = data.getScores(["fcst"]) + return len(fcst) + def name(self): + return "Number of valid forecasts" + +class Corr(Deterministic): + _min = 0 # Technically -1, but values below 0 are not as interesting + _max = 1 + _description = "Correlation between obesrvations and forecasts" + _perfectScore = 1 + def computeObsFcst(self, obs, fcst): + if(len(obs) <= 1): + return np.nan + return np.corrcoef(obs,fcst)[1,0] + def name(self): + return "Correlation" + def label(self, data): + return "Correlation" + +class RankCorr(Deterministic): + _min = 0 # Technically -1, but values below 0 are not as interesting + _max = 1 + _description = "Rank correlation between obesrvations and forecasts" + _perfectScore = 1 + def computeObsFcst(self, obs, fcst): + import scipy.stats + if(len(obs) <= 1): + return np.nan + return scipy.stats.spearmanr(obs,fcst)[0] + def name(self): + return "Rank correlation" + def label(self, data): + return "Rank correlation" + +class KendallCorr(Deterministic): + _min = 0 # Technically -1, but values below 0 are not as interesting + _max = 1 + _description = "Kendall correlation between obesrvations and forecasts" + _perfectScore = 1 + def computeObsFcst(self, obs, fcst): + import scipy.stats + if(len(obs) <= 1): + return np.nan + return scipy.stats.kendalltau(obs,fcst)[0] + def name(self): + return "Kendall correlation" + def label(self, data): + return "Kendall correlation" + +# Metrics based on 2x2 contingency table for a given threshold +class Threshold(Deterministic): + _reqThreshold = True + _supThreshold = True + # TODO: Which is correct? + # The second is best for precip, when doing Brier score -r 0 + @staticmethod + def within(x, range): + #return (x >= range[0]) & (x < range[1]) + return (x > range[0]) & (x <= range[1]) + +class Within(Threshold): + _min = 0 + _max = 100 + _description = "The percentage of forecasts within some error bound (use -r)" + _defaultBinType = "below" + _perfectScore = 100 + def computeObsFcst(self, obs, fcst): + diff = abs(obs - fcst) + return np.mean(self.within(diff, tRange))*100 + def name(self): + return "Within" + def label(self, data): + return "% of forecasts" + +# Mean y conditioned on x +# For a given range of x-values, what is the average y-value? +class Conditional(Threshold): + def __init__(self, x="obs", y="fcst", func=np.mean): + self._x = x + self._y = y + self._func = func + def computeCore(self, data, tRange): + [obs,fcst] = data.getScores([self._x, self._y]) + I = np.where(self.within(obs, tRange))[0] + return self._func(fcst[I]) + +# Mean x when conditioned on x +# Average x-value that is within a given range. The reason the y-variable is added +# is to ensure that the same data is used for this metric as for the Conditional metric. +class XConditional(Threshold): + def __init__(self, x="obs", y="fcst"): + self._x = x + self._y = y + def computeCore(self, data, tRange): + [obs,fcst] = data.getScores([self._x, self._y]) + I = np.where(self.within(obs, tRange))[0] + return np.median(obs[I]) + +class Count(Threshold): + def __init__(self, x): + self._x = x + def computeCore(self, data, tRange): + values = data.getScores(self._x) + I = np.where(self.within(values, tRange))[0] + return len(I) + +class Bs(Threshold): + _min = 0 + _max = 1 + _description = "Brier score" + _perfectScore = 0 + def __init__(self, numBins=10): + self._edges = np.linspace(0,1.0001,numBins) + def computeCore(self, data, tRange): + # Compute probabilities based on thresholds + p0 = 0 + p1 = 1 + if(tRange[0] != -np.inf and tRange[1] != np.inf): + var0 = data.getPvar(tRange[0]) + var1 = data.getPvar(tRange[1]) + [obs, p0, p1] = data.getScores(["obs", var0, var1]) + elif(tRange[0] != -np.inf): + var0 = data.getPvar(tRange[0]) + [obs, p0] = data.getScores(["obs", var0]) + elif(tRange[1] != np.inf): + var1 = data.getPvar(tRange[1]) + [obs, p1] = data.getScores(["obs", var1]) + obsP = self.within(obs, tRange) + p = p1 - p0 # Prob of obs within range + bs = np.nan*np.zeros(len(p), 'float') + + # Split into bins and compute Brier score on each bin + for i in range(0, len(self._edges)-1): + I = np.where((p >= self._edges[i]) & (p < self._edges[i+1]))[0] + bs[I] = (np.mean(p[I]) - obsP[I])**2 + return Common.nanmean(bs) + + @staticmethod + def getP(data, tRange): + p0 = 0 + p1 = 1 + if(tRange[0] != -np.inf and tRange[1] != np.inf): + var0 = data.getPvar(tRange[0]) + var1 = data.getPvar(tRange[1]) + [obs, p0, p1] = data.getScores(["obs", var0, var1]) + elif(tRange[0] != -np.inf): + var0 = data.getPvar(tRange[0]) + [obs, p0] = data.getScores(["obs", var0]) + elif(tRange[1] != np.inf): + var1 = data.getPvar(tRange[1]) + [obs, p1] = data.getScores(["obs", var1]) + + obsP = Threshold.within(obs, tRange) + p = p1 - p0 # Prob of obs within range + return [obsP, p] + + @staticmethod + def getQ(data, tRange): + p0 = 0 + p1 = 1 + var = data.getQvar(tRange[0]) + [obs, q] = data.getScores(["obs", var]) + + return [obs, q] + + def label(self, data): + return "Brier score" + +class Bss(Threshold): + _min = 0 + _max = 1 + _description = "Brier skill score" + _perfectScore = 1 + def __init__(self, numBins=10): + self._edges = np.linspace(0,1.0001,numBins) + def computeCore(self, data, tRange): + [obsP,p] = Bs.getP(data, tRange) + bs = np.nan*np.zeros(len(p), 'float') + for i in range(0, len(self._edges)-1): + I = np.where((p >= self._edges[i]) & (p < self._edges[i+1]))[0] + if(len(I) > 0): + bs[I] = (np.mean(p[I]) - obsP[I])**2 + bs = Common.nanmean(bs) + bsunc = np.mean(obsP)*(1-np.mean(obsP)) + if(bsunc == 0): + bss = np.nan + else: + bss = (bsunc - bs)/bsunc + + return bss + def label(self, data): + return "Brier skill score" + +class BsRel(Threshold): + _min = 0 + _max = 1 + _description = "Brier score, reliability term" + _perfectScore = 0 + def __init__(self, numBins=11): + self._edges = np.linspace(0,1.0001,numBins) + def computeCore(self, data, tRange): + [obsP,p] = Bs.getP(data, tRange) + + # Break p into bins, and comute reliability + bs = np.nan*np.zeros(len(p), 'float') + for i in range(0, len(self._edges)-1): + I = np.where((p >= self._edges[i]) & (p < self._edges[i+1]))[0] + if(len(I) > 0): + meanObsI = np.mean(obsP[I]) + bs[I] = (np.mean(p[I]) - meanObsI)**2 + return Common.nanmean(bs) + def label(self, data): + return "Brier score, reliability term" + +class BsUnc(Threshold): + _min = 0 + _max = 1 + _description = "Brier score, uncertainty term" + _perfectScore = None + def computeCore(self, data, tRange): + [obsP, p] = Bs.getP(data, tRange) + meanObs = np.mean(obsP) + bs = meanObs*(1 - meanObs) + return bs + def label(self, data): + return "Brier score, uncertainty term" + +class BsRes(Threshold): + _min = 0 + _max = 1 + _description = "Brier score, resolution term" + _perfectScore = 1 + def __init__(self, numBins=10): + self._edges = np.linspace(0,1.0001,numBins) + def computeCore(self, data, tRange): + [obsP, p] = Bs.getP(data, tRange) + bs = np.nan*np.zeros(len(p), 'float') + meanObs = np.mean(obsP) + for i in range(0, len(self._edges)-1): + I = np.where((p >= self._edges[i]) & (p < self._edges[i+1]))[0] + if(len(I) > 0): + meanObsI = np.mean(obsP[I]) + bs[I] = (meanObsI - meanObs)**2 + return Common.nanmean(bs) + def label(self, data): + return "Brier score, resolution term" + +class QuantileScore(Threshold): + _min = 0 + _description = "Quantile score. Requires quantiles to be stored"\ + "(e.g q10, q90...). Use -x to set which quantiles to use." + _perfectScore = 0 + def computeCore(self, data, tRange): + [obs,q] = Bs.getQ(data, tRange) + qs = np.nan*np.zeros(len(q), 'float') + v = q - obs + qs = v * (tRange[0] - (v < 0)) + return np.mean(qs) + +class Ign0(Threshold): + _description = "Ignorance of the binary probability based on threshold" + def computeCore(self, data, tRange): + [obsP,p] = Bs.getP(data, tRange) + + I0 = np.where(obsP == 0)[0] + I1 = np.where(obsP == 1)[0] + ign = -np.log2(p) + ign[I0] = -np.log2(1-p[I0]) + + return np.mean(ign) + + def label(self, data): + return "Binary Ignorance" + +class Spherical(Threshold): + _description = "Spherical probabilistic scoring rule for binary events" + _max = 1 + _min = 0 + def computeCore(self, data, tRange): + [obsP,p] = Bs.getP(data, tRange) + + I0 = np.where(obsP == 0)[0] + I1 = np.where(obsP == 1)[0] + sp = p / np.sqrt(p**2+(1-p)**2) + sp[I0] = (1-p[I0]) / np.sqrt((p[I0])**2+(1-p[I0])**2) + + return np.mean(sp) + + def label(self, data): + return "Spherical score" + +class Contingency(Threshold): + _min = 0 + _max = 1 + _defaultAxis = "threshold" + _reqThreshold = True + @staticmethod + def getAxisFormatter(self, data): + from matplotlib.ticker import ScalarFormatter + return ScalarFormatter() + def label(self, data): + return self.name() + def computeCore(self, data, tRange): + if(tRange == None): + Common.error("Metric " + self.getClassName() + " requires '-r '") + [obs,fcst] = data.getScores(["obs", "fcst"]) + value = np.nan + if(len(fcst) > 0): + # Compute frequencies + a = np.ma.sum((self.within(fcst,tRange)) & (self.within(obs, tRange))) # Hit + b = np.ma.sum((self.within(fcst,tRange)) & (self.within(obs, tRange)==0)) # FA + c = np.ma.sum((self.within(fcst,tRange)==0) & (self.within(obs, tRange))) # Miss + d = np.ma.sum((self.within(fcst,tRange)==0) & (self.within(obs, tRange)==0)) # CR + value = self.calc(a, b, c, d) + if(np.isinf(value)): + value = np.nan + + return value + def name(self): + return self.description() + +class Ets(Contingency): + _description = "Equitable threat score" + _perfectScore = 1 + def calc(self, a, b, c, d): + N = a + b + c + d + ar = (a + b) / 1.0 / N * (a + c) + if(a + b + c - ar == 0): + return np.nan + return (a - ar) / 1.0 / (a + b + c - ar) + def name(self): + return "ETS" + +class Threat(Contingency): + _description = "Threat score" + _perfectScore = 1 + def calc(self, a, b, c, d): + if(a + b + c == 0): + return np.nan + return a / 1.0 / (a + b + c) + +class Pc(Contingency): + _description = "Proportion correct" + _perfectScore = 1 + def calc(self, a, b, c, d): + return (a + d) / 1.0 / (a + b + c + d) + +class Diff(Contingency): + _description = "Difference between false alarms and misses" + _min = -1 + _max = 1 + _perfectScore = 0 + def calc(self, a, b, c, d): + return (b - c) / 1.0 / (b + c) + +class Edi(Contingency): + _description = "Extremal dependency index" + _perfectScore = 1 + def calc(self, a, b, c, d): + N = a + b + c + d + F = b / 1.0 / (b + d) + H = a / 1.0 / (a + c) + if(H == 0 or F == 0): + return np.nan + denom = (np.log(H) + np.log(F)) + if(denom == 0): + return np.nan + return (np.log(F) - np.log(H)) / denom + def name(self): + return "EDI" + +class Sedi(Contingency): + _description = "Symmetric extremal dependency index" + _perfectScore = 1 + def calc(self, a, b, c, d): + N = a + b + c + d + F = b / 1.0 / (b + d) + H = a / 1.0 / (a + c) + if(F == 0 or F == 1 or H == 0 or H == 1): + return np.nan + denom = np.log(F) + np.log(H) + np.log(1-F) + np.log(1-H) + if(denom == 0): + return np.nan + num = np.log(F) - np.log(H) - np.log(1-F) + np.log(1-H) + return num / denom + def name(self): + return "SEDI" + +class Eds(Contingency): + _description = "Extreme dependency score" + _min = None + _perfectScore = 1 + def calc(self, a, b, c, d): + N = a + b + c + d + H = a / 1.0 / (a + c) + p = (a+c)/1.0/N + if(H == 0 or p == 0): + return np.nan + denom = (np.log(p) + np.log(H)) + if(denom == 0): + return np.nan + + return (np.log(p) - np.log(H))/ denom + def name(self): + return "EDS" + +class Seds(Contingency): + _description = "Symmetric extreme dependency score" + _min = None + _perfectScore = 1 + def calc(self, a, b, c, d): + N = a + b + c + d + H = a / 1.0 / (a + c) + p = (a+c)/1.0/N + q = (a+b)/1.0/N + if(q == 0 or H == 0): + return np.nan + denom = np.log(p) + np.log(H) + if(denom == 0): + return np.nan + return (np.log(q) - np.log(H))/(np.log(p) + np.log(H)) + def name(self): + return "SEDS" + +class BiasFreq(Contingency): + _max = None + _description = "Bias frequency (number of fcsts / number of obs)" + _perfectScore = 1 + def calc(self, a, b, c, d): + if(a + c == 0): + return np.nan + return 1.0 * (a + b) / (a + c) + +class Hss(Contingency): + _max = None + _description = "Heidke skill score" + _perfectScore = 1 + def calc(self, a, b, c, d): + denom = ((a+c)*(c+d) + (a+b)*(b+d)) + if(denom == 0): + return np.nan + return 2.0*(a*d-b*c)/denom + +class BaseRate(Contingency): + _description = "Base rate" + _perfectScore = None + def calc(self, a, b, c, d): + if(a + b + c + d == 0): + return np.nan + return (a + c) / 1.0 / (a + b + c + d) + +class Or(Contingency): + _description = "Odds ratio" + _max = None + _perfectScore = None # Should be infinity + def calc(self, a, b, c, d): + if(b * c == 0): + return np.nan + return (a * d) / 1.0 / (b * c) + +class Lor(Contingency): + _description = "Log odds ratio" + _max = None + _perfectScore = None # Should be infinity + def calc(self, a, b, c, d): + if(a * d == 0 or b * c == 0): + return np.nan + return np.log((a * d) / 1.0 / (b * c)) + +class YulesQ(Contingency): + _description = "Yule's Q (Odds ratio skill score)" + _perfectScore = 1 + def calc(self, a, b, c, d): + if(a * d + b * c == 0): + return np.nan + return (a * d - b * c) / 1.0 / (a * d + b * c) + +class Kss(Contingency): + _description = "Hanssen-Kuiper skill score" + _perfectScore = 1 + def calc(self, a, b, c, d): + if((a + c)*(b + d) == 0): + return np.nan + return (a*d-b*c)* 1.0 / ((a + c)*(b + d)) + +class Hit(Contingency): + _description = "Hit rate" + _perfectScore = 1 + def calc(self, a, b, c, d): + if(a + c == 0): + return np.nan + return a / 1.0 / (a + c) + +class Miss(Contingency): + _description = "Miss rate" + _perfectScore = 0 + def calc(self, a, b, c, d): + if(a + c == 0): + return np.nan + return c / 1.0 / (a + c) + +# Fraction of non-events that are forecasted as events +class Fa(Contingency): + _description = "False alarm rate" + _perfectScore = 0 + def calc(self, a, b, c, d): + if(b+d == 0): + return np.nan + return b / 1.0 / (b + d) + +# Fraction of forecasted events that are false alarms +class Far(Contingency): + _description = "False alarm ratio" + _perfectScore = 0 + def calc(self, a, b, c, d): + if(a + b == 0): + return np.nan + return b / 1.0 / (a + b) diff --git a/verif/Output.py b/verif/Output.py new file mode 100644 index 0000000..e731e75 --- /dev/null +++ b/verif/Output.py @@ -0,0 +1,2035 @@ +# -*- coding: ISO-8859-1 -*- +import matplotlib.pyplot as mpl +import re +import datetime +import verif.Common as Common +import verif.Metric as Metric +import numpy as np +import sys +reload(sys) +sys.setdefaultencoding('ISO-8859-1') +#from matplotlib.dates import * +import os +import inspect + +def getAllOutputs(): + temp = inspect.getmembers(sys.modules[__name__], inspect.isclass) + return temp + +def isNumber(s): # tchui (25/05/15) + try: + float(s) + return True + except ValueError: + return False + +class Output: + _description = "" + _defaultAxis = "offset" + _defaultBinType = "above" + _reqThreshold = False + _supThreshold = True + _supX = True + _experimental = False + _legLoc = "best" # Where should the legend go? + + def __init__(self): + self._filename = None + self._thresholds = [None] + leg = None + self.default_lines = ['-','-','-','--'] + self.default_markers = ['o', '', '.', ''] + self.default_colors = ['r', 'b', 'g', [1,0.73,0.2], 'k'] + self._lc = None + self._ls = None + self.colors = None + self.styles = None + self._ms = 8 + self._lw = 2 + self._labfs = 16 + self._tickfs = 16 + self._legfs = 16 + self._figsize = [5,8] + self._showMargin = True + self._xrot = 0 + self._minlth = None + self._majlth = None + self._majwid = None + self._bot = None ####### + self._top = None ####### + self._left = None ####### + self._right = None ####### + #self._pad = pad ###### + self._xaxis = self.defaultAxis() + self._binType = self.defaultBinType() + self._showPerfect = False + self._dpi = 100 + self._xlim = None + self._ylim = None + self._clim = None + self._title = None + self._xlabel = None + self._ylabel = None + self._xticks = None + self._yticks = None + self._tight = False + + @classmethod + def defaultAxis(cls): + return cls._defaultAxis + + @classmethod + def defaultBinType(cls): + return cls._defaultBinType + + @classmethod + def requiresThresholds(cls): + return cls._reqThreshold + + @classmethod + def supportsX(cls): + return cls._supX + + @classmethod + def supportsThreshold(cls): + return cls._supThreshold + + @classmethod + def description(cls): + extra = "" + if(cls._experimental): + extra = " " + Common.experimental() + return cls._description + extra + + @classmethod + def summary(cls): + return cls.description() + + # Produce output independently for each value along this axis + def setAxis(self, axis): + if(axis != None): + self._xaxis = axis + + def setBinType(self, binType): + if(binType != None): + self._binType = binType + + def setThresholds(self, thresholds): + if(thresholds == None): + thresholds = [None] + thresholds = np.array(thresholds) + self._thresholds = thresholds + def setFigsize(self, size): + self._figsize = size + def setFilename(self, filename): + self._filename = filename + def setLegend(self, legend): + self._legNames = legend + def setLegLoc(self, legLoc): # tchui (25/05/15) + self._legLoc = legLoc + def setShowMargin(self, showMargin): + self._showMargin = showMargin + def setDpi(self, dpi): + self._dpi = dpi + def setXLim(self, lim): + if(len(lim) != 2): + Common.error("xlim must be a vector of length 2") + self._xlim = lim + def setYLim(self, lim): + if(len(lim) != 2): + Common.error("ylim must be a vector of length 2") + self._ylim = lim + def setCLim(self, lim): + if(len(lim) != 2): + Common.error("clim must be a vector of length 2") + self._clim = lim + def setMarkerSize(self, ms): + self._ms = ms + def setLineWidth(self, lw): + self._lw = lw + def setLineColor(self,lc): + self._lc = lc + def setLineStyle(self,ls): + self._ls = ls + def setTickFontSize(self, fs): + self._tickfs = fs + def setLabFontSize(self, fs): + self._labfs = fs + def setLegFontSize(self, fs): + self._legfs = fs + def setXRotation(self, xrot): + self._xrot = xrot + def setMinorLength(self, minlth): + self._minlth = minlth + def setMajorLength(self, majlth): + self._majlth = majlth + def setMajorWidth(self, majwid): + self._majwid = majwid + def setBottom(self, bot): + self._bot = bot + def setTop(self, top): + self._top = top + def setLeft(self, left): + self._left = left + def setRight(self, right): + self._right = right + #def setPad(self, pad): + # self._pad = pad + def setShowPerfect(self, showPerfect): + self._showPerfect = showPerfect + def setYlabel(self, ylabel): + self._ylabel = ylabel + def setXlabel(self, xlabel): + self._xlabel = xlabel + def setTitle(self, title): + self._title = title + def setXticks(self, xticks): + self._xticks = xticks + def setYticks(self, yticks): + self._yticks = yticks + def setTight(self,tight): #potato + self._tight = tight + + + # Public + # Call this to create a plot, saves to file + def plot(self, data): + self._plotCore(data) + self._adjustAxes() + self._legend(data, self._legNames) + self._savePlot(data) + # Call this to write text output + def text(self, data): + self._textCore(data) + # Draws a map of the data + def map(self, data): + self._mapCore(data) + #self._legend(data, self._legNames) + self._savePlot(data) + + def _getLegendNames(self, data): + if(self._legNames != None): + names = self._legNames + else: + names = data.getShortNames() + return(names) + + def _plotPerfectScore(self, x, perfect, color="gray", zorder=-1000): + if(perfect == None): + return + if(self._showPerfect): + # Make 'perfect' same length as 'x' + if(not hasattr(perfect, "__len__")): + perfect = perfect*np.ones(len(x), 'float') + mpl.plot(x, perfect, '-', lw=7, color=color, label="ideal", zorder=zorder) + + # Implement these methods + def _plotCore(self, data): + Common.error("This type does not plot") + def _textCore(self, data): + Common.error("This type does not output text") + def _mapCore(self, data): + Common.error("This type does not produce maps") + + # Helper functions + def _getColor(self, i, total): + if(self._lc != None): + firstList = self._lc.split(",") + numList = [] + finalList = [] + + for string in firstList: + if("[" in string): # for rgba args + if(not numList): + string = string.replace("[","") + numList.append(float(string)) + else: + Common.error("Invalid rgba arg \"{}\"".format(string)) + + elif("]" in string): + if(numList): + string = string.replace("]","") + numList.append(float(string)) + finalList.append(numList) + numList = [] + else: + Common.error("Invalid rgba arg \"{}\"".format(string)) + + elif(isNumber(string)): # append to rgba lists if present, otherwise grayscale intensity + if(numList): + numList.append(float(string)) + else: + finalList.append(string) + + else: + if(not numList): # string args and hexcodes + finalList.append(string) + else: + Common.error("Cannot read color args.") + self.colors = finalList + return self.colors[i % len(self.colors)] + + else: # use default colours if no colour input given + self.colors = self.default_colors + return self.colors[i % len(self.default_colors)] + + + def _getStyle(self, i, total, connectingLine=True, lineOnly=False): # edited by tchui (25/05/15) + if(self._ls != None): + listStyles = self._ls.split(",") + I = i % len(listStyles) # loop through input linestyles (independent of colors) + return listStyles[I] + + else: # default linestyles + I = (i / len(self.colors)) % len(self.default_lines) + line = self.default_lines[I] + marker = self.default_markers[I] + if(lineOnly): + return line + if(connectingLine): + return line + marker + return marker + + + # Saves to file, set figure size + def _savePlot(self, data): + if(self._figsize != None): + mpl.gcf().set_size_inches(int(self._figsize[0]), int(self._figsize[1])) + if(not self._showMargin): + Common.removeMargin() + if(self._filename != None): + mpl.savefig(self._filename, bbox_inches='tight', dpi=self._dpi) + else: + fig = mpl.gcf() + fig.canvas.set_window_title(data.getFilenames()[0]) + mpl.show() + def _legend(self, data, names=None): + if(names == None): + mpl.legend(loc=self._legLoc,prop={'size':self._legfs}) + else: + mpl.legend(names, loc=self._legLoc,prop={'size':self._legfs}) + def _getThresholdLimits(self, thresholds): + x = thresholds + if(self._binType == "below"): + lowerT = [-np.inf for i in range(0, len(thresholds))] + upperT = thresholds + elif(self._binType == "above"): + lowerT = thresholds + upperT = [np.inf for i in range(0, len(thresholds))] + elif(self._binType == "within"): + lowerT = thresholds[0:-1] + upperT = thresholds[1:] + x = [(lowerT[i] + upperT[i])/2 for i in range(0, len(lowerT))] + else: + Common.error("Unrecognized bintype") + return [lowerT,upperT,x] + + def _setYAxisLimits(self, metric): + currYlim = mpl.ylim() + ylim = [metric.min(), metric.max()] + if(ylim[0] == None): + ylim[0] = currYlim[0] + if(ylim[1] == None): + ylim[1] = currYlim[1] + mpl.ylim(ylim) + + def _adjustAxes(self): + # Apply adjustements to all subplots + for ax in mpl.gcf().get_axes(): + # Tick font sizes + for tick in ax.xaxis.get_major_ticks(): + tick.label.set_fontsize(self._tickfs) + for tick in ax.yaxis.get_major_ticks(): + tick.label.set_fontsize(self._tickfs) + ax.set_xlabel(ax.get_xlabel(), fontsize=self._labfs) + ax.set_ylabel(ax.get_ylabel(), fontsize=self._labfs) + #mpl.rcParams['axes.labelsize'] = self._labfs + + # Tick lines + if(len(mpl.yticks()[0]) >= 2 and len(mpl.xticks()[0]) >= 2): + # matplotlib crashes if there are fewer than 2 tick lines + # when determining where to put minor ticks + mpl.minorticks_on() + if(not self._minlth == None): + mpl.tick_params('both', length=self._minlth, which='minor') + if(not self._majlth == None): + mpl.tick_params('both', length=self._majlth, width=self._majwid, which='major') + for label in ax.get_xticklabels(): + label.set_rotation(self._xrot) + + for ax in mpl.gcf().get_axes(): + if(self._xlim != None): + mpl.xlim(self._xlim) + if(self._ylim != None): + mpl.ylim(self._ylim) + if(self._clim != None): + mpl.clim(self._clim) + + # Labels + if(self._xlabel != None): + mpl.xlabel(self._xlabel) + if(self._ylabel != None): + mpl.ylabel(self._ylabel) + if(self._title != None): + mpl.title(self._title) + + # Ticks + if(self._xticks != None): + if(len(self._xticks) <= 1): + Common.error("Xticks must have at least 2 values") + mpl.xticks(self._xticks) + if(self._yticks != None): + if(len(self._yticks) <= 1): + Common.error("Yticks must have at least 2 values") + mpl.yticks(self._yticks) + + # Margins + mpl.gcf().subplots_adjust(bottom=self._bot, top=self._top, left=self._left, right=self._right) + + def _plotObs(self, x, y, isCont=True, zorder=0): + if(isCont): + mpl.plot(x, y, ".-", color="gray", lw=5, label="obs", zorder=zorder) + else: + mpl.plot(x, y, "o", color="gray", ms=self._ms, label="obs", zorder=zorder) + + # maxradius: Don't let the circle go outside an envelope circle with this radius (centered on the origin) + def _drawCircle(self, radius, xcenter=0, ycenter=0, maxradius=np.inf, style="--", color="k", lw=1, label=""): + angles = np.linspace(-np.pi/2, np.pi/2, 360) + x = np.sin(angles)*radius + xcenter + y = np.cos(angles)*radius + ycenter + + # Only keep points within the circle + I = np.where(x**2+y**2 < maxradius**2)[0] + if(len(I) == 0): + return + x = x[I] + y = y[I] + mpl.plot(x, y,style,color=color,lw=lw, zorder=-100, label=label) + mpl.plot(x, -y,style,color=color,lw=lw, zorder=-100) + + def _plotConfidence(self, x, y, variance, n, color): + #variance = y*(1-y) # For bins + + # Remove missing points + I = np.where(n != 0)[0] + if(len(I) == 0): + return + x = x[I] + y = y[I] + variance = variance[I] + n = n[I] + + z = 1.96 # 95% confidence interval + type = "wilson" + style = "--" + if type == "normal": + mean = y + lower = mean - z*np.sqrt(variance/n) + upper = mean + z*np.sqrt(variance/n) + elif type == "wilson": + mean = 1/(1+1.0/n*z**2) * ( y + 0.5*z**2/n) + upper = mean + 1/(1+1.0/n*z**2)*z*np.sqrt(variance/n + 0.25*z**2/n**2) + lower = mean - 1/(1+1.0/n*z**2)*z*np.sqrt(variance/n + 0.25*z**2/n**2) + mpl.plot(x, upper, style, color=color, lw=self._lw, ms=self._ms,label="") + mpl.plot(x, lower, style, color=color, lw=self._lw, ms=self._ms,label="") + Common.fill(x, lower, upper, color, alpha=0.3) + +class Default(Output): + _legLoc = "upper left" + def __init__(self, metric): + Output.__init__(self) + # offsets, dates, location, locationElev, threshold + self._metric = metric + if(metric.defaultAxis() != None): + self._xaxis = metric.defaultAxis() + if(metric.defaultBinType() != None): + self._binType = metric.defaultBinType() + self._showRank = False + self._showAcc = False + self._setLegSort = False + + # Settings + self._mapLowerPerc = 0 # Lower percentile (%) to show in colourmap + self._mapUpperPerc = 100 # Upper percentile (%) to show in colourmap + self._mapLabelLocations = False # Show locationIds in map? + + def setShowRank(self, showRank): + self._showRank = showRank + + def setLegSort(self,dls): + self._setLegSort = dls + + def setShowAcc(self, showAcc): + self._showAcc = showAcc + + def getXY(self, data): + thresholds = self._thresholds + axis = data.getAxis() + + [lowerT,upperT,xx] = self._getThresholdLimits(thresholds) + if(axis != "threshold"): + xx = data.getAxisValues() + + filenames = data.getFilenames() + F = data.getNumFiles() + y = None + x = None + for f in range(0, F): + data.setFileIndex(f) + yy = np.zeros(len(xx), 'float') + if(axis == "threshold"): + for i in range(0, len(lowerT)): + yy[i] = self._metric.compute(data, [lowerT[i], upperT[i]]) + else: + for i in range(0, len(lowerT)): + yy = yy + self._metric.compute(data, [lowerT[i], upperT[i]]) + yy = yy / len(thresholds) + + if(sum(np.isnan(yy)) == len(yy)): + Common.warning("No valid scores for " + filenames[f]) + if(y == None): + y = np.zeros([F, len(yy)],'float') + x = np.zeros([F, len(xx)],'float') + y[f,:] = yy + x[f,:] = xx + if(self._showAcc): + y[f,:] = np.nan_to_num(y[f,:]) + y[f,:] = np.cumsum(y[f,:]) + return [x,y] + + def _legend(self, data, names=None): + mpl.legend(loc=self._legLoc,prop={'size':self._legfs}) + + def _plotCore(self, data): + + data.setAxis(self._xaxis) + + # We have to derive the legend list here, because we might want to specify + # the order + labels = np.array(data.getFilenames()) + if(self._legNames): # append legend names to file list + try: + labels[0:len(self._legNames)]=self._legNames + except ValueError: + Common.error("Too many legend names") + + self._legNames = labels + + F = data.getNumFiles() + [x,y] = self.getXY(data) + + # Sort legend entries such that the appear in the same order as the y-values of + # the lines + if(self._setLegSort): + if(not self._showAcc): + averages = (Common.nanmean(y,axis=1)) # averaging for non-acc plots + ids = averages.argsort()[::-1] + + else: + ends = y[:,-1] # take last points for acc plots + ids = ends.argsort()[::-1] + + self._legNames = [self._legNames[i] for i in ids] + + else: + ids = range(0,F) + + if(self._xaxis == "none"): + w = 0.8 + x = np.linspace(1-w/2,len(y)-w/2,len(y)) + mpl.bar(x,y, color='w', lw=self._lw) + mpl.xticks(range(1,len(y)+1), labels) + else: + for f in range(0, F): + color = self._getColor(ids[f], F) # colors and styles to follow labels + style = self._getStyle(ids[f], F, data.isAxisContinuous()) + alpha = (1 if(data.isAxisContinuous()) else 0.55) + mpl.plot(x[ids[f]], y[ids[f]], style, color=color, label=self._legNames[f], lw=self._lw, ms=self._ms, alpha=alpha) + + mpl.xlabel(data.getAxisLabel()) + mpl.ylabel(self._metric.label(data)) + + mpl.gca().xaxis.set_major_formatter(data.getAxisFormatter()) + perfectScore = self._metric.perfectScore() + self._plotPerfectScore(x[0], perfectScore) + + mpl.grid() + if(not self._showAcc): + self._setYAxisLimits(self._metric) + + if(self._tight): + oldTicks=mpl.gca().get_xticks() + diff = oldTicks[1] - oldTicks[0] # keep auto tick interval + tickRange = np.arange(round(np.min(x)),round(np.max(x))+diff,diff) + mpl.gca().set_xticks(tickRange) # make new ticks, to start from the first day of the desired interval + mpl.autoscale(enable=True,axis=u'x',tight=True) # make xaxis tight + + def _textCore(self, data): + thresholds = self._thresholds + + data.setAxis(self._xaxis) + + # Set configuration names + names = self._getLegendNames(data) + + F = data.getNumFiles() + [x,y] = self.getXY(data) + + if(self._filename != None): + sys.stdout = open(self._filename, 'w') + + maxlength = 0 + for name in names: + maxlength = max(maxlength, len(name)) + maxlength = str(maxlength) + + # Header line + fmt = "%-"+maxlength+"s" + lineDesc = data.getAxisDescriptionHeader() + lineDescN = len(lineDesc) + 2 + lineDescFmt = "%-" + str(lineDescN) + "s |" + print lineDescFmt % lineDesc, + if(data.getAxis() == "threshold"): + descs = self._thresholds + else: + descs = data.getAxisDescriptions() + for name in names: + print fmt % name, + print "" + + # Loop over rows + for i in range(0, len(x[0])): + print lineDescFmt % descs[i], + self._printLine(y[:,i], maxlength, "float") + + # Print stats + for func in [Common.nanmin, Common.nanmean, Common.nanmax, Common.nanstd]: + name = func.__name__[3:] + print lineDescFmt % name, + values = np.zeros(F, 'float') + for f in range(0,F): + values[f] = func(y[f,:]) + self._printLine(values, maxlength, "float") + + # Print count stats + for func in [Common.nanmin, Common.nanmax]: + name = func.__name__[3:] + print lineDescFmt % ("num " + name), + values = np.zeros(F, 'float') + for f in range(0,F): + values[f] = np.sum(y[f,:] == func(y,axis=0)) + self._printLine(values, maxlength, "int") + + def _printLine(self, values , colWidth, type="float"): + if(type == "int"): + fmt = "%-"+colWidth+"i" + else: + fmt = "%-"+colWidth+".2f" + missfmt = "%-"+colWidth+"s" + minI = np.argmin(values) + maxI = np.argmax(values) + for f in range(0, len(values)): + value = values[f] + if(np.isnan(value)): + txt = missfmt % "--" + else: + txt = fmt % value + if(minI == f): + print Common.green(txt), + elif(maxI == f): + print Common.red(txt), + else: + print txt, + print "" + + def _mapCore(self, data): + # Use the Basemap package if it is available + # Note that the word 'map' is an object if Basemap is loaded + # otherwise it is a shorthand name for matplotlib. This is possible + # because Basemap shares the plotting command names with matplotlib + hasBasemap = True + try: + from mpl_toolkits.basemap import Basemap + except ImportError: + Common.warning("Cannot load Basemap package") + import matplotlib.pylab as map + hasBasemap = False + + data.setAxis("location") + labels = self._getLegendNames(data) + F = data.getNumFiles() + lats = data.getLats() + lons = data.getLons() + ids = data.getLocationIds() + dlat = (max(lats) - min(lats)) + dlon = (max(lons) - min(lons)) + llcrnrlat= max(-90, min(lats) - dlat/10) + urcrnrlat= min(90, max(lats) + dlat/10) + llcrnrlon= min(lons) - dlon/10 + urcrnrlon= max(lons) + dlon/10 + res = Common.getMapResolution(lats, lons) + dx = pow(10,np.ceil(np.log10(max(lons) - min(lons))))/10 + dy = pow(10,np.ceil(np.log10(max(lats) - min(lats))))/10 + [x,y] = self.getXY(data) + + # Colorbar limits should be the same for all subplots + clim = [Common.nanpercentile(y.flatten(), self._mapLowerPerc), + Common.nanpercentile(y.flatten(), self._mapUpperPerc)] + + symmetricScore = False + cmap=mpl.cm.jet + if(clim[0] < 0 and clim[1] > 0): + symmetricScore = True + clim[0] = -max(-clim[0],clim[1]) + clim[1] = -clim[0] + cmap=mpl.cm.RdBu + + # Forced limits + if(self._clim != None): + clim = self._clim + + std = Common.nanstd(y) + minDiff = std/50 + + for f in range(0, F): + Common.subplot(f,F) + if(hasBasemap): + map = Basemap(llcrnrlon=llcrnrlon,llcrnrlat=llcrnrlat,urcrnrlon=urcrnrlon,urcrnrlat=urcrnrlat,projection='mill', resolution=res) + map.drawcoastlines(linewidth=0.25) + map.drawcountries(linewidth=0.25) + map.drawmapboundary() + # map.drawparallels(np.arange(-90.,120.,dy),labels=[1,0,0,0]) + # map.drawmeridians(np.arange(0.,420.,dx),labels=[0,0,0,1]) + map.fillcontinents(color='coral',lake_color='aqua', zorder=-1) + x0, y0 = map(lons, lats) + else: + x0 = lons + y0 = lats + I = np.where(np.isnan(y[f,:]))[0] + map.plot(x0[I], y0[I], 'kx') + + isMax = (y[f,:] == np.amax(y,0)) & (y[f,:] > np.mean(y,0)+minDiff) + isMin = (y[f,:] == np.amin(y,0)) & (y[f,:] < np.mean(y,0)-minDiff) + isValid = (np.isnan(y[f,:])==0) + if(self._showRank): + lmissing = map.scatter(x0[I], y0[I], s=40, c="k", marker="x") + lsimilar = map.scatter(x0[isValid], y0[isValid], s=40, c="w") + lmax = map.scatter(x0[isMax], y0[isMax], s=40, c="r") + lmin = map.scatter(x0[isMin], y0[isMin], s=40, c="b") + else: + s = 40 + map.scatter(x0, y0, c=y[f,:], s=s, cmap=cmap)#, linewidths = 1 + 2*isMax) + cb = map.colorbar() + cb.set_label(self._metric.label(data)) + cb.set_clim(clim) + mpl.clim(clim) + if(self._mapLabelLocations): + for i in range(0,len(x0)): + #mpl.text(x0[i], y0[i], "(%d,%d)" % (i,locs[i])) + value = y[f,i] + #if(value == max(y[:,i])): + # mpl.plot(x0[i], y0[i], 'ko', mfc=None, mec="k", ms=10) + + if(not np.isnan(value)): + #if(isMax[i]): + # mpl.plot(x0[i], y0[i], 'w.', ms=30, alpha=0.2) + mpl.text(x0[i], y0[i], "%d %3.2f" % (ids[i],value)) + if(self._legNames != None): + names = self._legNames + else: + names = data.getFilenames() + mpl.title(names[f]) + + # Legend + if(self._showRank): + lines = [lmin,lsimilar,lmax,lmissing] + names = ["min", "similar", "max", "missing"] + mpl.figlegend(lines, names, "lower center", ncol=4) + +class Hist(Output): + _reqThreshold = True + _supThreshold = False + def __init__(self, name): + Output.__init__(self) + self._name = name + + # Settings + self._showPercent = True + def getXY(self, data): + F = data.getNumFiles() + allValues = [0]*F + edges = self._thresholds + for f in range(0, F): + data.setFileIndex(f) + allValues[f] = data.getScores(self._name) + + xx = (edges[0:-1]+edges[1:])/2 + y = np.zeros([F, len(xx)],'float') + x = np.zeros([F, len(xx)],'float') + for f in range(0, F): + data.setFileIndex(f) + N = len(allValues[f][0]) + + for i in range(0, len(xx)): + if(i == len(xx)-1): + I = np.where((allValues[f][0] >= edges[i]) & (allValues[f][0] <= edges[i+1]))[0] + else: + I = np.where((allValues[f][0] >= edges[i]) & (allValues[f][0] < edges[i+1]))[0] + y[f,i] = len(I)*1.0#/N + x[f,:] = xx + return [x,y] + + def _plotCore(self, data): + data.setAxis("none") + labels = self._getLegendNames(data) + F = data.getNumFiles() + [x,y] = self.getXY(data) + for f in range(0, F): + color = self._getColor(f, F) + style = self._getStyle(f, F) + if(self._showPercent): + y[f]= y[f]* 1.0 / sum(y[f]) * 100 + mpl.plot(x[f], y[f], style, color=color, label=labels[f], lw=self._lw, ms=self._ms) + mpl.xlabel(data.getAxisLabel("threshold")) + if(self._showPercent): + mpl.ylabel("Frequency (%)") + else: + mpl.ylabel("Frequency") + mpl.grid() + + def _textCore(self, data): + data.setAxis("none") + labels = self._getLegendNames(data) + + F = data.getNumFiles() + [x,y] = self.getXY(data) + + if(self._filename != None): + sys.stdout = open(self._filename, 'w') + + maxlength = 0 + for label in labels: + maxlength = max(maxlength, len(label)) + maxlength = str(maxlength) + + # Header line + fmt = "%-"+maxlength+"s" + lineDesc = data.getAxisDescriptionHeader() + lineDescN = len(lineDesc) + 2 + lineDescFmt = "%-" + str(lineDescN) + "s |" + print lineDescFmt % lineDesc, + descs = self._thresholds + for label in labels: + print fmt % label, + print "" + + # Loop over rows + for i in range(0, len(x[0])): + print lineDescFmt % descs[i], + self._printLine(y[:,i], maxlength, "int") + + # Print count stats + for func in [Common.nanmin, Common.nanmax]: + name = func.__name__[3:] + print lineDescFmt % ("num " + name), + values = np.zeros(F, 'float') + for f in range(0,F): + values[f] = np.sum(y[f,:] == func(y,axis=0)) + self._printLine(values, maxlength, "int") + + def _printLine(self, values , colWidth, type="float"): + if(type == "int"): + fmt = "%-"+colWidth+"i" + else: + fmt = "%-"+colWidth+".2f" + missfmt = "%-"+colWidth+"s" + minI = np.argmin(values) + maxI = np.argmax(values) + for f in range(0, len(values)): + value = values[f] + if(np.isnan(value)): + txt = missfmt % "--" + else: + txt = fmt % value + if(minI == f): + print Common.green(txt), + elif(maxI == f): + print Common.red(txt), + else: + print txt, + print "" + +class Sort(Output): + _reqThreshold = False + _supThreshold = False + def __init__(self, name): + Output.__init__(self) + self._name = name + + def _plotCore(self, data): + data.setAxis("none") + labels = self._getLegendNames(data) + F = data.getNumFiles() + for f in range(0, F): + data.setFileIndex(f) + [x] = data.getScores(self._name) + x = np.sort(x) + color = self._getColor(f, F) + style = self._getStyle(f, F) + y = np.linspace(0, 1, x.shape[0]) + mpl.plot(x, y, style, color=color, label=labels[f], lw=self._lw, ms=self._ms) + mpl.xlabel("Sorted " + data.getAxisLabel("threshold")) + mpl.grid() + + +class ObsFcst(Output): + _supThreshold = False + _description = "Plot observations and forecasts" + def __init__(self): + Output.__init__(self) + def _plotCore(self, data): + F = data.getNumFiles() + data.setAxis(self._xaxis) + x = data.getAxisValues() + + isCont = data.isAxisContinuous() + + # Obs line + mObs = Metric.Mean(Metric.Default("obs")) + y = mObs.compute(data, None) + self._plotObs(x, y, isCont) + + mFcst = Metric.Mean(Metric.Default("fcst")) + labels = data.getFilenames() + for f in range(0, F): + data.setFileIndex(f) + color = self._getColor(f, F) + style = self._getStyle(f, F, isCont) + + y = mFcst.compute(data, None) + mpl.plot(x, y, style, color=color, label=labels[f], lw=self._lw, + ms=self._ms) + mpl.ylabel(data.getVariableAndUnits()) + mpl.xlabel(data.getAxisLabel()) + mpl.grid() + mpl.gca().xaxis.set_major_formatter(data.getAxisFormatter()) + +class QQ(Output): + _supThreshold = False + _supX = False + _description = "Quantile-quantile plot of obs vs forecasts" + def __init__(self): + Output.__init__(self) + def getXY(self, data): + x = list() + y = list() + F = len(data.getFilenames()) + for f in range(0, F): + data.setFileIndex(f) + [xx,yy] = data.getScores(["obs", "fcst"]) + x.append(np.sort(xx)) + y.append(np.sort(yy)) + return [x,y] + + def _plotCore(self, data): + data.setAxis("none") + data.setIndex(0) + labels = data.getFilenames() + F = data.getNumFiles() + [x,y] = self.getXY(data) + for f in range(0, F): + color = self._getColor(f, F) + style = self._getStyle(f, F) + + mpl.plot(x[f], y[f], style, color=color, label=labels[f], lw=self._lw, + ms=self._ms) + mpl.ylabel("Sorted forecasts (" + data.getUnits() + ")") + mpl.xlabel("Sorted observations (" + data.getUnits() + ")") + ylim = list(mpl.ylim()) + xlim = list(mpl.xlim()) + axismin = min(min(ylim),min(xlim)) + axismax = max(max(ylim),max(xlim)) + #mpl.plot([axismin,axismax], [axismin,axismax], "--", color=[0.3,0.3,0.3], lw=3, zorder=-100) + self._plotPerfectScore([axismin,axismax], [axismin,axismax]) + mpl.grid() + def _textCore(self, data): + data.setAxis("none") + data.setIndex(0) + labels = data.getFilenames() + F = data.getNumFiles() + + # Header + maxlength = 0 + for name in data.getFilenames(): + maxlength = max(maxlength, len(name)) + maxlength = int(np.ceil(maxlength/2)*2) + fmt = "%"+str(maxlength)+"s" + for filename in data.getFilenames(): + print fmt % filename, + print "" + fmt = "%" + str(int(np.ceil(maxlength/2))) + ".1f" + fmt = fmt + fmt + fmtS = "%" + str(int(np.ceil(maxlength/2))) + "s" + fmtS = fmtS + fmtS + for f in range(0, F): + print fmtS % ("obs", "fcst"), + print "" + + [x,y] = self.getXY(data) + maxPairs = len(x[0]) + for f in range(1, F): + maxPairs = max(maxPairs, len(x[f])) + for i in range(0, maxPairs): + for f in range(0, F): + if(len(x[f]) < i): + print " -- -- " + else: + print fmt % (x[f][i], y[f][i]), + print "\n", + +class Scatter(Output): + _description = "Scatter plot of forecasts vs obs" + _supThreshold = False + _supX = False + def __init__(self): + Output.__init__(self) + def _plotCore(self, data): + data.setAxis("none") + data.setIndex(0) + labels = data.getFilenames() + F = data.getNumFiles() + for f in range(0, F): + data.setFileIndex(f) + color = self._getColor(f, F) + style = self._getStyle(f, F, connectingLine=False) + + [x,y] = data.getScores(["obs","fcst"]) + mpl.plot(x,y, ".", color=color, label=labels[f], lw=self._lw, + ms=self._ms, alpha=0.2) + mpl.ylabel("Forecasts (" + data.getUnits() + ")") + mpl.xlabel("Observations (" + data.getUnits() + ")") + ylim = mpl.ylim() + xlim = mpl.xlim() + axismax = max(max(ylim),max(xlim)) + mpl.plot([0,axismax], [0,axismax], "--", color=[0.3,0.3,0.3], lw=3, zorder=-100) + mpl.grid() + +class Change(Output): + _supThreshold = False + _supX = False + _description = "Forecast skill (MAE) as a function of change in obs from previous day" + def __init__(self): + Output.__init__(self) + + def _plotCore(self, data): + data.setAxis("all") + data.setIndex(0) + labels = data.getFilenames() + # Find range + data.setFileIndex(0) + [obs,fcst] = data.getScores(["obs", "fcst"]) + change = obs[1:,Ellipsis]-obs[0:-1,Ellipsis] + maxChange = np.nanmax(abs(change.flatten())) + edges = np.linspace(-maxChange,maxChange,20) + bins = (edges[1:] + edges[0:-1])/2 + F = data.getNumFiles() + + for f in range(0, F): + color = self._getColor(f, F) + style = self._getStyle(f, F) + data.setFileIndex(f) + [obs,fcst] = data.getScores(["obs", "fcst"]) + change = obs[1:,Ellipsis]-obs[0:-1,Ellipsis] + err = abs(obs-fcst) + err = err[1:,Ellipsis] + x = np.nan * np.zeros(len(bins), 'float') + y = np.nan * np.zeros(len(bins), 'float') + + for i in range(0, len(bins)): + I = (change > edges[i] ) & (change <= edges[i+1]) + y[i] = Common.nanmean(err[I]) + x[i] = Common.nanmean(change[I]) + mpl.plot(x, y, style, color=color, lw=self._lw, ms=self._ms, label=labels[f]) + self._plotPerfectScore(x, 0) + mpl.xlabel("Daily obs change (" + data.getUnits() + ")") + mpl.ylabel("MAE (" + data.getUnits() + ")") + mpl.grid() + +class Cond(Output): + _description = "Plots forecasts as a function of obs (use -r to specify bin-edges)" + _defaultAxis = "threshold" + _defaultBinType = "within" + _reqThreshold = True + _supThreshold = True + _supX = False + def supportsThreshold(self): + return True + def _plotCore(self, data): + data.setAxis("none") + data.setIndex(0) + [lowerT,upperT,x] = self._getThresholdLimits(self._thresholds) + + labels = data.getFilenames() + F = data.getNumFiles() + for f in range(0, F): + color = self._getColor(f, F) + style = self._getStyle(f, F) + data.setFileIndex(f) + + of = np.zeros(len(x), 'float') + fo = np.zeros(len(x), 'float') + xof = np.zeros(len(x), 'float') + xfo = np.zeros(len(x), 'float') + mof = Metric.Conditional("obs", "fcst", np.mean) # F | O + mfo = Metric.Conditional("fcst", "obs", np.mean) # O | F + xmof = Metric.XConditional("obs", "fcst") # F | O + xmfo = Metric.XConditional("fcst", "obs") # O | F + mof0 = Metric.Conditional("obs", "fcst", np.mean) # F | O + for i in range(0, len(lowerT)): + fo[i] = mfo.compute(data, [lowerT[i], upperT[i]]) + of[i] = mof.compute(data, [lowerT[i], upperT[i]]) + xfo[i] = xmfo.compute(data, [lowerT[i], upperT[i]]) + xof[i] = xmof.compute(data, [lowerT[i], upperT[i]]) + mpl.plot(xof,of, style, color=color, label=labels[f] + " (F|O)", lw=self._lw, ms=self._ms) + mpl.plot(fo, xfo, style, color=color, label=labels[f] + " (O|F)", lw=self._lw, ms=self._ms, alpha=0.5) + mpl.ylabel("Forecasts (" + data.getUnits() + ")") + mpl.xlabel("Observations (" + data.getUnits() + ")") + ylim = mpl.ylim() + xlim = mpl.xlim() + axismin = min(min(ylim),min(xlim)) + axismax = max(max(ylim),max(xlim)) + #mpl.plot([axismin,axismax], [axismin,axismax], "-", color="k", lw=3, zorder=-100) + self._plotPerfectScore([axismin,axismax], [axismin,axismax]) + mpl.grid() + +class SpreadSkill(Output): + _supThreshold = False + _supX = False + _description = "Spread skill" + def __init__(self): + Output.__init__(self) + + def _plotCore(self, data): + data.setAxis("all") + data.setIndex(0) + labels = data.getFilenames() + F = data.getNumFiles() + for f in range(0, F): + color = self._getColor(f, F) + style = self._getStyle(f, F, connectingLine=False) + data.setFileIndex(f) + + data.setFileIndex(f) + [obs,fcst,spread] = data.getScores(["obs", "fcst", "ensSpread"]) + spread = np.sqrt(spread.flatten()) + skill = abs(obs.flatten()- fcst.flatten()) + #mpl.plot(spread, skill, style, color=color, lw=self._lw, ms=self._ms, label=labels[f]) + x = np.zeros(len(self._thresholds), 'float') + y = np.zeros(len(x), 'float') + for i in range(1, len(self._thresholds)): + I = np.where((np.isnan(spread) == 0) & + (np.isnan(skill) == 0) & + (spread > self._thresholds[i-1]) & + (spread <= self._thresholds[i]))[0] + if(len(I) > 0): + x[i] = np.mean(spread[I]) + y[i] = np.mean(skill[I]) + + style = self._getStyle(f, F) + mpl.plot(x, y, style, color=color, label=labels[f]) + mpl.xlabel("Spread (" + data.getUnits() + ")") + mpl.ylabel("MAE (" + data.getUnits() + ")") + mpl.grid() + +class Count(Output): + _description = "Counts number of forecasts above or within thresholds (use -r to specify bin-edges). Use -binned to count number in bins, instead of number above each threshold." + _defaultAxis = "threshold" + _defaultBinType = "within" + _reqThreshold = True + _supThreshold = True + _supX = False + def _plotCore(self, data): + data.setAxis("none") + data.setIndex(0) + [lowerT,upperT,x] = self._getThresholdLimits(self._thresholds) + + labels = data.getFilenames() + F = data.getNumFiles() + for f in range(0, F): + color = self._getColor(f, F) + style = self._getStyle(f, F) + data.setFileIndex(f) + + Nobs = np.zeros(len(x), 'float') + Nfcst = np.zeros(len(x), 'float') + obs = Metric.Count("obs") + fcst = Metric.Count("fcst") + for i in range(0, len(lowerT)): + Nobs[i] = obs.compute(data, [lowerT[i], upperT[i]]) + Nfcst[i] = fcst.compute(data, [lowerT[i], upperT[i]]) + mpl.plot(x,Nfcst, style, color=color, label=labels[f], lw=self._lw, ms=self._ms) + self._plotObs(x, Nobs) + mpl.ylabel("Number") + mpl.xlabel(data.getAxisLabel()) + mpl.grid() + +class TimeSeries(Output): + _description = "Plot observations and forecasts as a time series (i.e. by concatinating all offsets). '-x ' has no effect, as it is always shown by date." + _supThreshold = False + _supX = False + def _plotCore(self, data): + F = data.getNumFiles() + data.setAxis("all") + dates = data.getAxisValues("date") + offsets = data.getAxisValues("offset") + + # Connect the last offset of a day with the first offset on the next day + # This only makes sense if the obs/fcst don't span more than a day + connect = min(offsets) + 24 > max(offsets) + minOffset = min(offsets) + + # Obs line + obs = data.getScores("obs")[0] + for d in range(0,obs.shape[0]): + x = dates[d] + offsets/24.0 + y = Common.nanmean(obs[d,:,:], axis=1) + if(connect and d < obs.shape[0]-1): + x = np.insert(x,x.shape[0],dates[d+1]+minOffset/24.0) + y = np.insert(y,y.shape[0],Common.nanmean(obs[d+1,0,:], axis=0)) + + if(d==0): # tchui (10/06/15) + xmin=np.min(x) + elif(d==obs.shape[0]-1): + xmax=np.max(x) + + lab = "obs" if d == 0 else "" + mpl.rcParams['ytick.major.pad']='20' ######This changes the buffer zone between tick labels and the axis. (dsiuta) + #mpl.rcParams['ytick.major.pad']='${self._pad}' + #mpl.rcParams['xtick.major.pad']='${self._pad}' + mpl.rcParams['xtick.major.pad']='20' ######This changes the buffer zone between tick labels and the axis. (dsiuta) + mpl.plot(x, y, ".-", color=[0.3,0.3,0.3], lw=5, label=lab) + + # Forecast lines + labels = data.getFilenames() + for f in range(0, F): + data.setFileIndex(f) + color = self._getColor(f, F) + style = self._getStyle(f, F) + + fcst = data.getScores("fcst")[0] + x = dates[d] + offsets/24.0 + y = Common.nanmean(fcst[d,:,:], axis=1) + if(connect and d < obs.shape[0]-1): + x = np.insert(x,x.shape[0],dates[d+1]+minOffset/24.0) + y = np.insert(y,y.shape[0],Common.nanmean(fcst[d+1,0,:])) + lab = labels[f] if d == 0 else "" + mpl.rcParams['ytick.major.pad']='20' ######This changes the buffer zone between tick labels and the axis. (dsiuta) + mpl.rcParams['xtick.major.pad']='20' ######This changes the buffer zone between tick labels and the axis. (dsiuta) + #mpl.rcParams['ytick.major.pad']='${self._pad}' + #mpl.rcParams['xtick.major.pad']='${self._pad}' + mpl.plot(x, y, style, color=color, lw=self._lw, ms=self._ms, label=lab) + + + #mpl.ylabel(data.getVariableAndUnits()) # "Wind Speed (km/hr)") ###hard coded axis label (dsiuta) + mpl.xlabel(data.getAxisLabel("date")) + if(self._ylabel == None): + mpl.ylabel(data.getVariableAndUnits()) + else: + mpl.ylabel(self._ylabel) + # mpl.ylabel(self._ylabel) # "Wind Speed (km/hr)") ###hard coded axis label (dsiuta) + mpl.grid() + mpl.gca().xaxis.set_major_formatter(data.getAxisFormatter("date")) + + if(self._tight): + oldTicks=mpl.gca().get_xticks() + diff = oldTicks[1] - oldTicks[0] # keep auto tick interval + tickRange = np.arange(round(xmin),round(xmax)+diff,diff) + mpl.gca().set_xticks(tickRange) # make new ticks, to start from the first day of the desired interval + mpl.autoscale(enable=True,axis=u'x',tight=True) # make xaxis tight + + + +class PitHist(Output): + _description = "Histogram of PIT values" + _supThreshold = False + _supX = False + def __init__(self, metric): + Output.__init__(self) + self._numBins = 10 + self._metric = metric + def _legend(self, data,names=None): + pass + def _plotCore(self, data): + F = data.getNumFiles() + labels = self._getLegendNames(data) + for f in range(0, F): + Common.subplot(f,F) + color = self._getColor(f, F) + data.setAxis("none") + data.setIndex(0) + data.setFileIndex(f) + pit = self._metric.compute(data,None) + + width = 1.0 / self._numBins + x = np.linspace(0,1,self._numBins+1) + N = np.histogram(pit, x)[0] + n = N * 1.0 / sum(N) + color = "gray" + xx = x[range(0,len(x)-1)] + mpl.bar(xx, n*100.0, width=width, color=color) + mpl.plot([0,1],[100.0/self._numBins, 100.0/self._numBins], 'k--') + #self._plotPerfectScore([0,1],[100.0/self._numBins, 100.0/self._numBins], "r", 100) + mpl.title(labels[f]); + ytop = 200.0/self._numBins + mpl.gca().set_ylim([0,ytop]) + if(f == 0): + mpl.ylabel("Frequency (%)") + else: + mpl.gca().set_yticks([]) + + # Multiply by 100 to get to percent + std = Metric.PitDev.deviationStd(pit, self._numBins)*100 + + mpl.plot([0,1], [100.0/self._numBins - 2*std, 100.0/self._numBins - 2*std], "r-") + mpl.plot([0,1], [100.0/self._numBins + 2*std, 100.0/self._numBins + 2*std], "r-") + Common.fill([0,1], [100.0/self._numBins - 2*std, 100.0/self._numBins - 2*std], [100.0/self._numBins + + 2*std, 100.0/self._numBins + 2*std], "r", zorder=100, alpha=0.5) + + # Compute calibration deviation + D = Metric.PitDev.deviation(pit, self._numBins) + D0 = Metric.PitDev.expectedDeviation(pit, self._numBins) + ign = Metric.PitDev.ignorancePotential(pit, self._numBins) + mpl.text(0, mpl.ylim()[1], "Dev: %2.4f\nExp: %2.4f\nIgn: %2.4f" % (D,D0,ign), verticalalignment="top") + + mpl.xlabel("Cumulative probability") + +class Reliability(Output): + _description = "Reliability diagram for a certain threshold (-r)" + _reqThreshold = True + _supX = False + _legLoc = "lower right" + def __init__(self): + Output.__init__(self) + self._shadeNoSkill = True + def _plotCore(self, data): + labels = data.getFilenames() + + F = data.getNumFiles() + ax = mpl.gca() + axi = mpl.axes([0.2,0.65,0.2,0.2]) + mpl.sca(ax) + + data.setAxis("none") + data.setIndex(0) + data.setFileIndex(0) + for t in range(0,len(self._thresholds)): + threshold = self._thresholds[t] + var = data.getPvar(threshold) + [obs, p] = data.getScores(["obs", var]) + + # Determine the number of bins to use # (at least 11, at most 25) + N = min(25, max(11, int(len(obs)/1000))) + N = 11 + edges = np.linspace(0,1,N+1) + edges = np.array([0,0.05,0.15,0.25,0.35,0.45,0.55,0.65,0.75,0.85,0.95,1]) + #edges = np.linspace(0,1,101) + x = np.zeros([len(edges)-1,F], 'float') + + y = np.nan*np.zeros([F,len(edges)-1],'float') + n = np.zeros([F,len(edges)-1],'float') + v = np.zeros([F,len(edges)-1],'float') # Variance + # Draw reliability lines + for f in range(0, F): + color = self._getColor(f, F) + style = self._getStyle(f, F) + data.setFileIndex(f) + data.setAxis("none") + data.setIndex(0) + var = data.getPvar(threshold) + [obs, p] = data.getScores(["obs", var]) + + if(self._binType == "below"): + p = p + obs = obs < threshold + elif(self._binType == "above"): + p = 1 - p + obs = obs > threshold + else: + Common.error("Bin type must be one of 'below' or 'above' for reliability plot") + + clim = np.mean(obs) + # Compute frequencies + for i in range(0,len(edges)-1): + q = (p >= edges[i])& (p < edges[i+1]) + I = np.where(q)[0] + if(len(I) > 0): + n[f,i] = len(obs[I]) + # Need at least 10 data points to be valid + if(n[f,i] >= 1): + y[f,i] = np.mean(obs[I]) + v[f,i] = np.var(obs[I]) + x[i,f] = np.mean(p[I]) + + label = labels[f] + if(not t == 0): + label = "" + mpl.plot(x[:,f], y[f], style, color=color, lw=self._lw, ms=self._ms, label=label) + + # Draw confidence bands (do this separately so that these lines don't sneak into the legend) + for f in range(0, F): + color = self._getColor(f, F) + self._plotConfidence(x[:,f], y[f], v[f], n[f], color=color) + + # Draw lines in inset diagram + if(np.max(n) > 1): + for f in range(0, F): + color = self._getColor(f, F) + axi.plot(x[:,f], n[f], style, color=color, lw=self._lw, ms=self._ms*0.75) + axi.xaxis.set_major_locator(mpl.NullLocator()) + axi.set_yscale('log') + axi.set_title("Number") + axi.grid('on') + mpl.sca(ax) + self._plotObs([0,1], [0,1]) + mpl.axis([0,1,0,1]) + color = "gray" + mpl.plot([0,1], [clim,clim], "--", color=color,label="") # Climatology line + mpl.plot([clim,clim], [0,1], "--", color=color) # Climatology line + mpl.plot([0,1], [clim/2,1-(1-clim)/2], "--", color=color) # No-skill line + if(self._shadeNoSkill): + Common.fill([clim,1], [0,0], [clim,1-(1-clim)/2], col=[1,1,1], zorder=-100, + hatch="\\") + Common.fill([0,clim], [clim/2,clim,0], [1,1], col=[1,1,1], zorder=-100, + hatch="\\") + mpl.xlabel("Forecasted probability") + mpl.ylabel("Observed frequency") + units = " " + data.getUnits() + mpl.title("Reliability diagram for obs > " + str(threshold) + units) + +class IgnContrib(Output): + _description = "Binary Ignorance contribution diagram for a single threshold (-r). "\ + + "Shows how much each probability issued contributes to the total ignorance." + _reqThreshold = True + _supX = False + _legLoc = "upper center" + _experimental = True + def __init__(self): + Output.__init__(self) + def _plotCore(self, data): + labels = data.getFilenames() + + if(len(self._thresholds) != 1): + Common.error("IgnContrib diagram requires exactly one threshold") + threshold = self._thresholds[0] + + F = data.getNumFiles() + + mpl.subplot(2,1,1) + units = " " + data.getUnits() + titlestr = "Ignorance contribution diagram for obs > " + str(self._thresholds[0]) + units + mpl.title(titlestr) + + data.setAxis("none") + data.setIndex(0) + data.setFileIndex(0) + mpl.subplot(2,1,1) + var = data.getPvar(threshold) + [obs, p] = data.getScores(["obs", var]) + + # Determine the number of bins to use # (at least 11, at most 25) + N = min(25, max(11, int(len(obs)/1000))) + edges = np.linspace(0,1,N+1) + + x = np.zeros([F, len(edges)-1], 'float') + y = np.nan*np.zeros([F,len(edges)-1],'float') + n = np.zeros([F,len(edges)-1],'float') + + # Draw reliability lines + for f in range(0, F): + color = self._getColor(f, F) + style = self._getStyle(f, F) + data.setFileIndex(f) + data.setAxis("none") + data.setIndex(0) + var = data.getPvar(threshold) + [obs, p] = data.getScores(["obs", var]) + + if(self._binType == "below"): + p = p + obs = obs < threshold + elif(self._binType == "above"): + p = 1 - p + obs = obs > threshold + else: + Common.error("Bin type must be one of 'below' or 'above' for reliability plot") + + clim = np.mean(obs) + # Compute frequencies + for i in range(0,len(edges)-1): + q = (p >= edges[i])& (p < edges[i+1]) + I = np.where(q)[0] + if(len(I) > 0): + n[f,i] = len(obs[I]) + x[f,i] = np.mean(p[I]) + # Need at least 10 data points to be valid + if(n[f,i] >= 1): + #y[f,i] = -n[f,i]*(x[f,i]*np.log2(x[f,i]) + (1-x[f,i])*np.log2(1-x[f,i])) + I0 = np.where(obs[I] == 0) + I1 = np.where(obs[I] == 1) + y[f,i] = -np.sum(np.log2(p[I[I1]])) - np.sum(np.log2(1-p[I[I0]])) + + label = labels[f] + mpl.plot(x[f], y[f]/np.sum(n[f])*len(n[f]), style, color=color, lw=self._lw, ms=self._ms, label=label) + mpl.ylabel("Ignorance contribution") + + # Draw expected sharpness + xx = np.linspace(0,1,100) + yy = -(xx*np.log2(xx) + (1-xx)*np.log2(1-xx)) + mpl.plot(xx, yy, "--", color="gray") + yy = -np.log2(clim)*np.ones(len(xx)) + + # Show number in each bin + mpl.subplot(2,1,2) + for f in range(0, F): + color = self._getColor(f, F) + style = self._getStyle(f, F) + mpl.plot(x[f], n[f], style, color=color, lw=self._lw, ms=self._ms) + mpl.xlabel("Forecasted probability") + mpl.ylabel("N") + + # Switch back to top subpplot, so the legend works + mpl.subplot(2,1,1) + +# doClassic: Use the classic definition, by not varying the forecast threshold +# i.e. using the same threshold for observation and forecast. +class DRoc(Output): + _description = "Plots the receiver operating characteristics curve for the deterministic " \ + + "forecast for a single threshold. Uses different forecast thresholds to create points." + _supX = False + _reqThreshold = True + def __init__(self, fthresholds=None, doNorm=False, doClassic=False): + Output.__init__(self) + self._doNorm = doNorm + self._fthresholds = fthresholds + self._doClassic = doClassic + self._showThresholds = False + def _plotCore(self, data): + threshold = self._thresholds[0] # Observation threshold + if(threshold == None): + Common.error("DRoc plot needs a threshold (use -r)") + + if(self._doClassic): + fthresholds = [threshold] + else: + if(self._fthresholds != None): + fthresholds = self._fthresholds + else: + if(data.getVariable() == "Precip"): + fthresholds = [0,1e-7,1e-6,1e-5,1e-4,0.001,0.005,0.01,0.05,0.1,0.2,0.3,0.5,1,2,3,5,10,20,100] + else: + N = 31 + fthresholds = np.linspace(threshold-10, threshold+10, N) + + F = data.getNumFiles() + labels = data.getFilenames() + for f in range(0, F): + color = self._getColor(f, F) + style = self._getStyle(f, F) + data.setAxis("none") + data.setIndex(0) + data.setFileIndex(f) + [obs, fcst] = data.getScores(["obs", "fcst"]) + + y = np.nan*np.zeros([len(fthresholds),1],'float') + x = np.nan*np.zeros([len(fthresholds),1],'float') + for i in range(0,len(fthresholds)): + fthreshold = fthresholds[i] + a = np.ma.sum((fcst >= fthreshold) & (obs >= threshold)) # Hit + b = np.ma.sum((fcst >= fthreshold) & (obs < threshold)) # FA + c = np.ma.sum((fcst < fthreshold) & (obs >= threshold)) # Miss + d = np.ma.sum((fcst < fthreshold) & (obs < threshold)) # Correct rejection + if(a + c > 0 and b + d > 0): + y[i] = a / 1.0 / (a + c) + x[i] = b / 1.0 / (b + d) + if(self._doNorm): + from scipy.stats import norm + y[i] = norm.ppf(a / 1.0 / (a + c)) + x[i] = norm.ppf(b / 1.0 / (b + d)) + if(np.isinf(y[i])): + y[i] = np.nan + if(np.isinf(x[i])): + x[i] = np.nan + if(self._showThresholds and (not np.isnan(x[i]) and not np.isnan(y[i]) and f == 0)): + mpl.text(x[i], y[i], "%2.1f" % fthreshold, color=color) + if(not self._doNorm): + # Add end points at 0,0 and 1,1: + xx = x + yy = y + x = np.zeros([len(fthresholds)+2,1], 'float') + y = np.zeros([len(fthresholds)+2,1], 'float') + x[1:-1] = xx + y[1:-1] = yy + x[0] = 1 + y[0] = 1 + x[len(x)-1] = 0 + y[len(y)-1] = 0 + #I = np.where(np.isnan(x)+np.isnan(y)==0) + mpl.plot(x, y, style, color=color, label=labels[f], lw=self._lw, ms=self._ms) + if(self._doNorm): + xlim = mpl.xlim() + ylim = mpl.ylim() + q0 = max(abs(xlim[0]), abs(ylim[0])) + q1 = max(abs(xlim[1]), abs(ylim[1])) + mpl.plot([-q0,q1], [-q0,q1], 'k--') + mpl.xlabel("Normalized false alarm rate") + mpl.ylabel("Normalized hit rate") + else: + mpl.plot([0,1], [0,1], color="k") + mpl.axis([0,1,0,1]) + mpl.xlabel("False alarm rate") + mpl.ylabel("Hit rate") + self._plotPerfectScore([0,0,1], [0,1,1]) + units = " " + data.getUnits() + mpl.title("Threshold: " + str(threshold) + units) + mpl.grid() + +class DRocNorm(DRoc): + _description = "Same as DRoc, except the hit and false alarm rates are transformed using the " \ + "inverse of the standard normal distribution in order to highlight the extreme " \ + "values." + def __init__(self): + DRoc.__init__(self, doNorm=True) + +class DRoc0(DRoc): + _description = "Same as DRoc, except don't use different forecast thresholds: Use the "\ + "same\n threshold for forecast and obs." + def __init__(self): + DRoc.__init__(self, doNorm=False, doClassic=True) + +class Against(Output): + _description = "Plots the forecasts for each pair of configurations against each other. "\ + "Colours indicate which configuration had the best forecast (but only if the difference is "\ + "more than 10% of the standard deviation of the observation)." + _defaultAxis = "none" + _supThreshold = False + _supX = False + _minStdDiff = 0.1 # How big difference should colour kick in (in number of STDs)? + def _plotCore(self, data): + F = data.getNumFiles() + if(F < 2): + Common.error("Cannot use Against plot with less than 2 configurations") + + data.setAxis("none") + data.setIndex(0) + labels = data.getFilenames() + for f0 in range(0,F): + for f1 in range(0,F): + if(f0 != f1 and (F != 2 or f0 == 0)): + if(F > 2): + mpl.subplot(F,F,f0+f1*F+1) + data.setFileIndex(f0) + x = data.getScores("fcst")[0].flatten() + data.setFileIndex(f1) + y = data.getScores("fcst")[0].flatten() + lower = min(min(x),min(y)) + upper = max(max(x),max(y)) + + mpl.plot(x, y, "x", mec="k", ms=self._ms/2, mfc="k", zorder=-1000) + + # Show which forecast is better + data.setFileIndex(f0) + [obsx,x] = data.getScores(["obs","fcst"]) + data.setFileIndex(f1) + [obsy,y] = data.getScores(["obs","fcst"]) + x = x.flatten() + y = y.flatten() + obs = obsx.flatten() + + mpl.plot(x, y, "s", mec="k", ms=self._ms/2, mfc="w", zorder=-500) + + std = np.std(obs)/2 + minDiff = self._minStdDiff*std + if(len(x) == len(y)): + N = 5 + for k in range(0,N): + Ix = abs(obs - y) > abs(obs - x) + std*k/N + Iy = abs(obs - y) + std*k/N < abs(obs - x) + mpl.plot(x[Ix], y[Ix], "r.", ms=self._ms, alpha=k/1.0/N) + mpl.plot(x[Iy], y[Iy], "b.", ms=self._ms, alpha=k/1.0/N) + + # Contour of the frequency + #q = np.histogram2d(x[1,:], x[0,:], [np.linspace(lower,upper,100), np.linspace(lower,upper,100)]) + #[X,Y] = np.meshgrid(q[1],q[2]) + #mpl.contour(X[1:,1:],Y[1:,1:],q[0],[1,100],zorder=90) + + mpl.xlabel(labels[f0], color="r") + mpl.ylabel(labels[f1], color="b") + mpl.grid() + xlim = mpl.xlim() + ylim = mpl.ylim() + lower = min(xlim[0],ylim[0]) + upper = max(xlim[1],ylim[1]) + mpl.xlim([lower, upper]) + mpl.ylim([lower, upper]) + mpl.plot([lower,upper], [lower, upper], '--', color=[0.3,0.3,0.3], lw=3, zorder=100) + if(F == 2): + break + def _legend(self, data, names=None): + pass + +class Taylor(Output): + _description = "Taylor diagram showing correlation and forecast standard deviation" + _supThreshold = False + _supX = False + _defaultAxis = "none" + _legLoc = "upper left" + + def _plotCore(self, data): + data.setAxis(self._xaxis) + data.setIndex(0) + labels = data.getFilenames() + F = data.getNumFiles() + + # Plot points + maxstd = 0 + for f in range(0, F): + data.setFileIndex(f) + color = self._getColor(f, F) + style = self._getStyle(f, F) + + size = data.getAxisSize() + corr = np.zeros(size, 'float') + std = np.zeros(size, 'float') + stdobs = np.zeros(size, 'float') + for i in range(0,size): + data.setIndex(i) + [obs,fcst] = data.getScores(["obs", "fcst"]) + if(len(obs)>0 and len(fcst)>0): + corr[i] = np.corrcoef(obs,fcst)[1,0] + std[i] = np.sqrt(np.var(fcst)) + stdobs[i] = np.sqrt(np.var(obs)) + maxstd = max(maxstd, max(std)) + ang = np.arccos(corr) + x = std * np.cos(ang) + y = std * np.sin(ang) + mpl.plot(x,y, style, color=color, label=labels[f], lw=self._lw, ms=self._ms) + stdobs = np.mean(stdobs) + + # Set axis limits + if(maxstd < 1.25*stdobs): # Enforce a minimum radius beyond the obs-radius + maxstd = 1.25*stdobs + maxstd = int(np.ceil(maxstd)) + mpl.xlim([-maxstd*1.05,maxstd*1.05]) # Allow for some padding outside the outer ring + mpl.ylim([0,maxstd*1.05]) + mpl.xlabel("Standard deviation (" + data.getUnits() + ")") + xticks = mpl.xticks()[0] + mpl.xticks(xticks[xticks>=0]) + mpl.xlim([-maxstd*1.05,maxstd*1.05]) + mpl.ylim([0,maxstd*1.05]) + mpl.text(np.sin(np.pi/4)*maxstd, np.cos(np.pi/4)*maxstd, "Correlation", rotation=-45, + fontsize=self._labfs, horizontalalignment="center", verticalalignment="bottom") + mpl.gca().yaxis.set_visible(False) + mpl.gca().xaxis.set_ticks_position('bottom') + + # Draw obs point/lines + orange = [1,0.8,0.4] + self._drawCircle(stdobs, style='-', lw=5, color=orange) + mpl.plot(stdobs, 0, 's-', color=orange, label="Observation", mew=2, ms=self._ms, clip_on=False) + + # Draw diagonals + corrs = [-1,-0.99,-0.95,-0.9,-0.8,-0.5,0,0.5,0.8,0.9,0.95,0.99] #np.linspace(-1,1,21) + for i in range(0, len(corrs)): + ang = np.arccos(corrs[i]) # Mathematical angle + x = np.cos(ang)*maxstd + y = np.sin(ang)*maxstd + mpl.plot([0, x], [0, y], 'k--') + mpl.text(x, y, str(corrs[i]), verticalalignment="bottom", fontsize=self._labfs) + + # Draw CRMSE rings + xticks = mpl.xticks()[0] + self._drawCircle(0,style="-", color="gray", lw=3, label="CRMSE") + for R in np.linspace(0, 2*max(xticks), 2*2*max(xticks)/(xticks[1]-xticks[0])+1): + if(R > 0): + self._drawCircle(R, xcenter=stdobs, ycenter=0, maxradius=maxstd, style="-", color="gray", lw=3) + x = np.sin(-np.pi/4)*R+stdobs + y = np.cos(np.pi/4)*R + if(x**2+y**2 < maxstd**2): + mpl.text(x, y, str(R), horizontalalignment="right", verticalalignment="bottom", + fontsize=self._labfs, color="gray") + + # Draw std rings + for X in mpl.xticks()[0]: + if(X <= maxstd): + self._drawCircle(X, style=":") + self._drawCircle(maxstd, style="-", lw=3) + + mpl.gca().set_aspect(1) + +class Error(Output): + _description = "Decomposition of RMSE into systematic and unsystematic components" + _supThreshold = False + _supX = True + _defaultAxis = "none" + + def _plotCore(self, data): + data.setAxis(self._xaxis) + data.setIndex(0) + labels = data.getFilenames() + F = data.getNumFiles() + + mpl.gca().set_aspect(1) + mpl.xlabel("Unsystematic error (CRMSE, " + data.getUnits() + ")") + mpl.ylabel("Systematic error (Bias, " + data.getUnits() + ")") + + + # Plot points + size = data.getAxisSize() + serr = np.nan*np.zeros([size,F], 'float') + uerr = np.nan*np.zeros([size,F], 'float') + rmse = np.nan*np.zeros([size,F], 'float') + for f in range(0, F): + data.setFileIndex(f) + color = self._getColor(f, F) + style = self._getStyle(f, F, connectingLine=False) + + for i in range(0,size): + data.setIndex(i) + [obs,fcst] = data.getScores(["obs", "fcst"]) + mfcst = np.mean(fcst) + mobs = np.mean(obs) + if(len(obs)>0 and len(fcst)>0): + serr[i,f] = np.mean(obs-fcst) + rmse[i,f] = np.sqrt(np.mean((obs-fcst)**2)) + uerr[i,f] = np.sqrt(rmse[i,f]**2 - serr[i,f]**2) + # np.sqrt(np.mean((fcst - mfcst) - (obs - mobs))**2) + mpl.plot(uerr[:,f],serr[:,f], style, color=color, label=labels[f], lw=self._lw, ms=self._ms) + xlim = mpl.xlim() + ylim = mpl.ylim() + + # Draw rings + for f in range(0, F): + color = self._getColor(f, F) + style = self._getStyle(f, F, lineOnly=True) + self._drawCircle(Common.nanmean(rmse[:,f]), style=style, color=color) + + # Set axis limits + maxx = xlim[1] + maxy = ylim[1] + miny = min(0,ylim[0]) + # Try to enforce the x-axis and y-axis to be roughly the same size + if(maxy - miny < maxx/2): + maxy = maxx + elif(maxy - miny > maxx*2): + maxx = maxy-miny + mpl.xlim([0, maxx]) # Not possible to have negative CRMSE + mpl.ylim([miny,maxy]) + + # Draw standard RMSE rings + for X in mpl.xticks()[0]: + self._drawCircle(X, style=":") + + mpl.plot([0,maxx], [0,0], 'k-', lw=2) # Draw x-axis line + mpl.grid() + +class Marginal(Output): + _description = "Show marginal distribution for different thresholds" + _reqThreshold = True + _supX = False + _experimental = True + def __init__(self): + Output.__init__(self) + def _plotCore(self, data): + labels = data.getFilenames() + + F = data.getNumFiles() + + data.setAxis("none") + data.setIndex(0) + data.setFileIndex(0) + clim = np.zeros(len(self._thresholds), 'float') + for f in range(0, F): + x = self._thresholds + y = np.zeros([len(self._thresholds)], 'float') + for t in range(0,len(self._thresholds)): + threshold = self._thresholds[t] + data.setFileIndex(f) + data.setAxis("none") + data.setIndex(0) + var = data.getPvar(threshold) + [obs, p] = data.getScores(["obs", var]) + + color = self._getColor(f, F) + style = self._getStyle(f, F) + + if(self._binType == "below"): + p = p + obs = obs < threshold + elif(self._binType == "above"): + p = 1 - p + obs = obs > threshold + else: + Common.error("Bin type must be one of 'below' or 'above' for reliability plot") + + clim[t] = np.mean(obs) + y[t] = np.mean(p) + + label = labels[f] + mpl.plot(x, y, style, color=color, lw=self._lw, ms=self._ms, label=label) + self._plotObs(x, clim) + + mpl.ylim([0,1]) + mpl.xlabel(data.getAxisLabel("threshold")) + mpl.ylabel("Marginal probability") + mpl.grid() + +class Freq(Output): + _description = "Show frequency of obs and forecasts" + _reqThreshold = True + _supX = False + _experimental = True + def __init__(self): + Output.__init__(self) + def _plotCore(self, data): + labels = data.getFilenames() + + F = data.getNumFiles() + + data.setAxis("none") + data.setIndex(0) + data.setFileIndex(0) + + for f in range(0, F): + # Setup x and y: When -b within, we need one less value in the array + N = len(self._thresholds) + x = self._thresholds + if(self._binType == "within"): + N = len(self._thresholds) - 1 + x = (self._thresholds[1:]+self._thresholds[:-1])/2 + y = np.zeros(N, 'float') + clim = np.zeros(N, 'float') + for t in range(0,N): + threshold = self._thresholds[t] + data.setFileIndex(f) + data.setAxis("none") + data.setIndex(0) + [obs, fcst] = data.getScores(["obs", "fcst"]) + + color = self._getColor(f, F) + style = self._getStyle(f, F) + + if(self._binType == "below"): + fcst = fcst < threshold + obs = obs < threshold + elif(self._binType == "above"): + fcst = fcst > threshold + obs = obs > threshold + elif(self._binType == "within"): + fcst = (fcst >= threshold) & (fcst < self._thresholds[t+1]) + obs = (obs >= threshold) & (obs < self._thresholds[t+1]) + + clim[t] = np.mean(obs) + y[t] = np.mean(fcst) + + label = labels[f] + mpl.plot(x, y, style, color=color, lw=self._lw, ms=self._ms, label=label) + self._plotObs(x, clim) + + mpl.ylim([0,1]) + mpl.xlabel(data.getAxisLabel("threshold")) + mpl.ylabel("Frequency " + self._binType) + mpl.grid() + + +class InvReliability(Output): + _description = "Reliability diagram for a certain quantile (-r)" + _reqThreshold = True + _supX = False + _experimental = True + def __init__(self): + Output.__init__(self) + def _plotCore(self, data): + labels = data.getFilenames() + + F = data.getNumFiles() + ax = mpl.gca() + quantiles = self._thresholds + if(quantiles[0] < 0.5): + axi = mpl.axes([0.66,0.65,0.2,0.2]) + else: + axi = mpl.axes([0.66,0.15,0.2,0.2]) + mpl.sca(ax) + + data.setAxis("none") + data.setIndex(0) + data.setFileIndex(0) + for t in range(0,len(quantiles)): + quantile = self._thresholds[t] + var = data.getQvar(quantile) + [obs, p] = data.getScores(["obs", var]) + + # Determine the number of bins to use # (at least 11, at most 25) + N = min(25, max(11, int(len(obs)/1000))) + N = 21 + edges = np.linspace(0,20,N+1) + #edges = [0,0.001,1,2,3,4,5,6,7,8,9,10] + if(data.getVariable() == "Precip"): + edges = np.linspace(0,np.sqrt(Common.nanmax(obs)), N+1)**2 + else: + edges = np.linspace(Common.nanmin(obs),Common.nanmax(obs), N+1) + + #edges = np.zeros(N, 'float') + #perc = np.linspace(0,100,N) + #for i in range(0,N): + # edges[i] = Common.nanpercentile(obs, perc[i]) + #edges = np.unique(edges) + + x = np.zeros([len(edges)-1,F], 'float') + + y = np.nan*np.zeros([F,len(edges)-1],'float') + n = np.zeros([F,len(edges)-1],'float') + v = np.zeros([F,len(edges)-1],'float') + # Draw reliability lines + for f in range(0, F): + color = self._getColor(f, F) + style = self._getStyle(f, F) + data.setFileIndex(f) + data.setAxis("none") + data.setIndex(0) + var = data.getQvar(quantile) + [obs, p] = data.getScores(["obs", var]) + + obs = obs <= p + + # Compute frequencies + for i in range(0,len(edges)-1): + q = (p >= edges[i])& (p < edges[i+1]) + I = np.where(q)[0] + if(len(I) > 0): + n[f,i] = len(obs[I]) + # Need at least 10 data points to be valid + if(n[f,i] >= 2): + y[f,i] = np.mean(obs[I]) + v[f,i] = np.var(obs[I]) + x[i,f] = np.mean(p[I]) + + label = labels[f] + if(not t == 0): + label = "" + mpl.plot(x[:,f], y[f], style, color=color, lw=self._lw, ms=self._ms, label=label) + self._plotObs(edges,0*edges + quantile) + + # Draw confidence bands (do this separately so that these lines don't sneak into the legend) + for f in range(0, F): + color = self._getColor(f, F) + self._plotConfidence(x[:,f], y[f], v[f], n[f], color=color) + axi.plot(x[:,f], n[f], style, color=color, lw=self._lw, ms=self._ms) + axi.xaxis.set_major_locator(mpl.NullLocator()) + axi.set_yscale('log') + axi.set_title("Number") + mpl.sca(ax) + mpl.ylim([0,1]) + color = "gray" + mpl.xlabel(data.getVariableAndUnits()) + mpl.ylabel("Observed frequency") + units = " " + data.getUnits() + mpl.title("Quantile: " + str(quantile*100) + "%") diff --git a/verif/Station.py b/verif/Station.py new file mode 100644 index 0000000..547959f --- /dev/null +++ b/verif/Station.py @@ -0,0 +1,26 @@ +class Station: + def __init__(self, id, lat, lon, elev): + self._id = id + self._lat = lat + self._lon = lon + self._elev = elev + def id(self, value=None): + if value == None: + return self._id + else: + self._id = value + def lat(self, value=None): + if value == None: + return self._lat + else: + self._lat = value + def lon(self, value=None): + if value == None: + return self._lon + else: + self._lon = value + def elev(self, value=None): + if value == None: + return self._elev + else: + self._elev = value diff --git a/verif/__init__.py b/verif/__init__.py new file mode 100644 index 0000000..3f6fe59 --- /dev/null +++ b/verif/__init__.py @@ -0,0 +1,559 @@ +import sys +import os +import verif.Data as Data +import verif.Output as Output +import verif.Metric as Metric +import verif.Common as Common +import matplotlib.pyplot as mpl +import textwrap +def main(): + ############ + # Defaults # + ############ + ifiles = list() + ofile = None + metric = None + locations = None + latlonRange = None + training = 0 + thresholds = None + startDate = None + endDate = None + climFile = None + climType = "subtract" + leg = None + ylabel = None + xlabel = None + title = None + offsets = None + xdim = None + sdim = None + figSize = None + dpi = 100 + debug = False + showText = False + showMap = False + noMargin = False + binType = None + markerSize = None + lineWidth = None + tickFontSize = None + labFontSize = None + legFontSize = None + type = "plot" + XRotation = None + MajorLength = None + MinorLength = None + MajorWidth = None + Bottom = None + Top = None + Right = None + Left = None + Pad = None + showPerfect = None + cType = "mean" + doHist = False + doSort = False + doAcc = False + xlim = None + ylim = None + clim = None + + # Read command line arguments + i = 1 + while(i < len(sys.argv)): + arg = sys.argv[i] + if(arg[0] == '-'): + # Process option + if(arg == "-debug"): + debug = True + elif(arg == "-nomargin"): + noMargin = True + elif(arg == "-sp"): + showPerfect = True + elif(arg == "-hist"): + doHist = True + elif(arg == "-acc"): + doAcc = True + elif(arg == "-sort"): + doSort = True + else: + if(arg == "-f"): + ofile = sys.argv[i+1] + elif(arg == "-l"): + locations = Common.parseNumbers(sys.argv[i+1]) + elif(arg == "-llrange"): + latlonRange = Common.parseNumbers(sys.argv[i+1]) + elif(arg == "-t"): + training = int(sys.argv[i+1]) + elif(arg == "-x"): + xdim = sys.argv[i+1] + elif(arg == "-o"): + offsets = Common.parseNumbers(sys.argv[i+1]) + elif(arg == "-leg"): + leg = unicode(sys.argv[i+1], 'utf8') + elif(arg == "-ylabel"): + ylabel = unicode(sys.argv[i+1], 'utf8') + elif(arg == "-xlabel"): + xlabel = unicode(sys.argv[i+1], 'utf8') + elif(arg == "-title"): + title = unicode(sys.argv[i+1], 'utf8') + elif(arg == "-b"): + binType = sys.argv[i+1] + elif(arg == "-type"): + type = sys.argv[i+1] + elif(arg == "-fs"): + figSize = sys.argv[i+1] + elif(arg == "-dpi"): + dpi = int(sys.argv[i+1]) + elif(arg == "-d"): + startDate = int(sys.argv[i+1]) + endDate = int(sys.argv[i+2]) + i = i + 1 + elif(arg == "-c"): + climFile = sys.argv[i+1] + climType = "subtract" + elif(arg == "-C"): + climFile = sys.argv[i+1] + climType = "divide" + elif(arg == "-xlim"): + xlim = Common.parseNumbers(sys.argv[i+1]) + elif(arg == "-ylim"): + ylim = Common.parseNumbers(sys.argv[i+1]) + elif(arg == "-clim"): + clim = Common.parseNumbers(sys.argv[i+1]) + elif(arg == "-s"): + sdim = sys.argv[i+1] + elif(arg == "-ct"): + cType = sys.argv[i+1] + elif(arg == "-r"): + thresholds = Common.parseNumbers(sys.argv[i+1]) + elif(arg == "-ms"): + markerSize = float(sys.argv[i+1]) + elif(arg == "-lw"): + lineWidth = float(sys.argv[i+1]) + elif(arg == "-tickfs"): + tickFontSize = float(sys.argv[i+1]) + elif(arg == "-labfs"): + labFontSize = float(sys.argv[i+1]) + elif(arg == "-legfs"): + legFontSize = float(sys.argv[i+1]) + elif(arg == "-xrot"): + XRotation = float(sys.argv[i+1]) + elif(arg == "-majlth"): + MajorLength = float(sys.argv[i+1]) + elif(arg == "-minlth"): + MinorLength = float(sys.argv[i+1]) + elif(arg == "-majwid"): + MajorWidth = float(sys.argv[i+1]) + elif(arg == "-bot"): + Bottom = float(sys.argv[i+1]) + elif(arg == "-top"): + Top = float(sys.argv[i+1]) + elif(arg == "-right"): + Right = float(sys.argv[i+1]) + elif(arg == "-left"): + Left = float(sys.argv[i+1]) + elif(arg == "-pad"): + Pad = sys.argv[i+1] + elif(arg == "-m"): + metric = sys.argv[i+1] + else: + Common.error("Flag '" + sys.argv[i] + "' not recognized") + i = i + 1 + else: + ifiles.append(sys.argv[i]) + i = i + 1 + + # Deal with legend entries + if(leg != None): + leg = leg.split(',') + for i in range(0,len(leg)): + leg[i] = leg[i].replace('_', ' ') + + # Limit dates + dates = None + if(startDate != None and endDate != None): + dates = list() + date = startDate + while(date <= endDate): + dates.append(date) + date = Common.getDate(date, 1) + + if(cType != "mean" and cType != "min" and cType != "max" and cType != "median"): + Common.error("'-ct cType' must be one of min, mean, median, or max") + + if(latlonRange != None and len(latlonRange) != 4): + Common.error("-llRange must have exactly 4 values") + + if(len(ifiles) > 0): + data = Data.Data(ifiles, clim=climFile, climType=climType, dates=dates, offsets=offsets, + locations=locations, latlonRange=latlonRange, training=training) + else: + data = None + if(len(sys.argv) == 1 or len(ifiles) == 0 or metric == None): + showDescription(data) + sys.exit() + + if(figSize != None): + figSize = figSize.split(',') + if(len(figSize) != 2): + print "-fs figSize must be in the form: width,height" + sys.exit(1) + + m = None + + # Handle special plots + if(doHist): + pl = Output.Hist(metric) + elif(doSort): + pl = Output.Sort(metric) + elif(metric == "pithist"): + m = Metric.Pit("pit") + pl = Output.PitHist(m) + elif(metric == "obsfcst"): + pl = Output.ObsFcst() + elif(metric == "timeseries"): + pl = Output.TimeSeries() + elif(metric == "qq"): + pl = Output.QQ() + elif(metric == "cond"): + pl = Output.Cond() + elif(metric == "against"): + pl = Output.Against() + elif(metric == "count"): + pl = Output.Count() + elif(metric == "scatter"): + pl = Output.Scatter() + elif(metric == "change"): + pl = Output.Change() + elif(metric == "spreadskill"): + pl = Output.SpreadSkill() + elif(metric == "taylor"): + pl = Output.Taylor() + elif(metric == "error"): + pl = Output.Error() + elif(metric == "droc"): + pl = Output.DRoc() + elif(metric == "droc0"): + pl = Output.DRoc0() + elif(metric == "drocnorm"): + pl = Output.DRocNorm() + elif(metric == "reliability"): + pl = Output.Reliability() + elif(metric == "invreliability"): + pl = Output.InvReliability() + elif(metric == "igncontrib"): + pl = Output.IgnContrib() + elif(metric == "marginal"): + pl = Output.Marginal() + else: + # Standard plots + ''' + # Attempt at automating + metrics = Metric.getAllMetrics() + m = None + for mm in metrics: + if(metric == mm[0].lower() and mm[1].isStandard()): + m = mm[1]() + break + if(m == None): + m = Metric.Default(metric) + ''' + + # Determine metric + if(metric == "rmse"): + m = Metric.Rmse() + elif(metric == "rmsf"): + m = Metric.Rmsf() + elif(metric == "crmse"): + m = Metric.Crmse() + elif(metric == "cmae"): + m = Metric.Cmae() + elif(metric == "dmb"): + m = Metric.Dmb() + elif(metric == "std"): + m = Metric.Std() + elif(metric == "num"): + m = Metric.Num() + elif(metric == "corr"): + m = Metric.Corr() + elif(metric == "rankcorr"): + m = Metric.RankCorr() + elif(metric == "kendallcorr"): + m = Metric.KendallCorr() + elif(metric == "bias"): + m = Metric.Bias() + elif(metric == "ef"): + m = Metric.Ef() + elif(metric == "maxobs"): + m = Metric.MaxObs() + elif(metric == "minobs"): + m = Metric.MinObs() + elif(metric == "maxfcst"): + m = Metric.MaxFcst() + elif(metric == "minfcst"): + m = Metric.MinFcst() + elif(metric == "stderror"): + m = Metric.StdError() + elif(metric == "mae"): + m = Metric.Mae() + elif(metric == "medae"): + m = Metric.Medae() + # Contingency metrics + elif(metric == "ets"): + m = Metric.Ets() + elif(metric == "threat"): + m = Metric.Threat() + elif(metric == "pc"): + m = Metric.Pc() + elif(metric == "diff"): + m = Metric.Diff() + elif(metric == "edi"): + m = Metric.Edi() + elif(metric == "sedi"): + m = Metric.Sedi() + elif(metric == "eds"): + m = Metric.Eds() + elif(metric == "seds"): + m = Metric.Seds() + elif(metric == "biasfreq"): + m = Metric.BiasFreq() + elif(metric == "hss"): + m = Metric.Hss() + elif(metric == "baserate"): + m = Metric.BaseRate() + elif(metric == "yulesq"): + m = Metric.YulesQ() + elif(metric == "or"): + m = Metric.Or() + elif(metric == "lor"): + m = Metric.Lor() + elif(metric == "yulesq"): + m = Metric.YulesQ() + elif(metric == "kss"): + m = Metric.Kss() + elif(metric == "hit"): + m = Metric.Hit() + elif(metric == "miss"): + m = Metric.Miss() + elif(metric == "fa"): + m = Metric.Fa() + elif(metric == "far"): + m = Metric.Far() + # Other threshold + elif(metric == "bs"): + m = Metric.Bs() + elif(metric == "bss"): + m = Metric.Bss() + elif(metric == "bsrel"): + m = Metric.BsRel() + elif(metric == "bsunc"): + m = Metric.BsUnc() + elif(metric == "bsres"): + m = Metric.BsRes() + elif(metric == "ign0"): + m = Metric.Ign0() + elif(metric == "spherical"): + m = Metric.Spherical() + elif(metric == "within"): + m = Metric.Within() + # Probabilistic + elif(metric == "pit"): + m = Metric.Mean(Metric.Pit()) + elif(metric == "pitdev"): + m = Metric.PitDev() + elif(metric == "marginalratio"): + m = Metric.MarginalRatio() + # Default + else: + if(cType == "min"): + m = Metric.Min(Metric.Default(metric)) + elif(cType == "max"): + m = Metric.Max(Metric.Default(metric)) + elif(cType == "median"): + m = Metric.Median(Metric.Default(metric)) + elif(cType == "mean"): + m = Metric.Mean(Metric.Default(metric)) + else: + Common.error("-ct " + cType + " not understood") + + # Output type + if(type == "plot" or type == "text" or type == "map" or type == "maprank"): + pl = Output.Default(m) + pl.setShowAcc(doAcc) + else: + Common.error("Type not understood") + + # Rest dimension of '-x' is not allowed + if(xdim != None and not pl.supportsX()): + Common.warning(metric + " does not support -x. Ignoring it.") + xdim = None + + # Reset dimension if 'threshold' is not allowed + if(xdim == "threshold" and ((not pl.supportsThreshold()) or (not m.supportsThreshold()))): + Common.warning(metric + " does not support '-x threshold'. Ignoring it.") + thresholds = None + xdim = None + + # Create thresholds if needed + if((thresholds == None) and (pl.requiresThresholds() or (m != None and m.requiresThresholds()))): + data.setAxis("none") + obs = data.getScores("obs")[0] + fcst = data.getScores("fcst")[0] + smin = min(min(obs), min(fcst)) + smax = max(max(obs), max(fcst)) + thresholds = np.linspace(smin,smax,10) + Common.warning("Missing '-r '. Automatically setting thresholds.") + + # Set plot parameters + if(markerSize != None): + pl.setMarkerSize(markerSize) + if(lineWidth != None): + pl.setLineWidth(lineWidth) + if(labFontSize != None): + pl.setLabFontSize(labFontSize) + if(legFontSize != None): + pl.setLegFontSize(legFontSize) + if(tickFontSize != None): + pl.setTickFontSize(tickFontSize) + if(XRotation != None): + pl.setXRotation(XRotation) + if(MajorLength != None): + pl.setMajorLength(MajorLength) + if(MinorLength != None): + pl.setMinorLength(MinorLength) + if(MajorWidth != None): + pl.setMajorWidth(MajorWidth) + if(Bottom != None): + pl.setBottom(Bottom) + if(Top != None): + pl.setTop(Top) + if(Right != None): + pl.setRight(Right) + if(Left != None): + pl.setLeft(Left) + if(Pad != None): + pl.setPad(None) + if(binType != None): + pl.setBinType(binType) + if(showPerfect != None): + pl.setShowPerfect(showPerfect) + if(xlim != None): + pl.setXLim(xlim) + if(ylim != None): + pl.setYLim(ylim) + if(clim != None): + pl.setCLim(clim) + pl.setFilename(ofile) + pl.setThresholds(thresholds) + pl.setLegend(leg) + pl.setFigsize(figSize) + pl.setDpi(dpi) + pl.setAxis(xdim) + pl.setShowMargin(not noMargin) + pl.setYlabel(ylabel) + pl.setXlabel(xlabel) + pl.setTitle(title) + + if(type == "text"): + pl.text(data) + elif(type == "map"): + pl.map(data) + elif(type == "maprank"): + pl.setShowRank(True) + pl.map(data) + else: + pl.plot(data) + +def showDescription(data=None): + print "Compute verification scores for COMPS verification files\n" + print "usage: verif files -m metric [-x x-dim] [-r thresholds]" + print " [-l locationIds] [-llrange latLonRange]" + print " [-o offsets] [-d start-date end-date]" + print " [-t training] [-c climFile] [-C ClimFile]" + print " [-xlim xlim] [-ylim ylim] [-c clim]" + print " [-type type] [-leg legend] [-hist] [-sort] [-acc]" + print " [-f imageFile] [-fs figSize] [-dpi dpi]" + print " [-b binType] [-nomargin] [-debug] [-ct ctype]" + print " [-ms markerSize] [-lw lineWidth] [-xrot XRotation]" + print " [-tickfs tickFontSize] [-labfs labFontSize] [-legfs legFontSize]" + print " [-majlth MajorTickLength] [-minlth MinorTickLength] [-majwid MajorTickWidth]" + print " [-bot Bottom] [-top Top] [-left Left] [-right Right]" + print " [-sp] [-ylabel Y Axis Label] [-xlabel X Axis Label]" + print " [-title Title]" + #print " [-pad Pad]" + print "" + print Common.green("Arguments:") + print " files One or more COMPS verification files in NetCDF format." + print " metric Verification score to use. See available metrics below." + print " x-dim Plot this dimension on the x-axis: date, offset, location, locationId," + print " locationElev, locationLat, locationLon, threshold, or none. Not supported by" + print " all metrics. If not specified, then a default is used based on the metric." + print " 'none' collapses all dimensions and computes one value." + print " thresholds Compute scores for these thresholds (only used by some metrics)." + print " locationIds Limit the verification to these location IDs." + print " latLonRange Limit the verification to locations within minlon,maxlon,minlat,maxlat." + print " offsets Limit the verification to these offsets (in hours)." + print " start-date YYYYMMDD. Only use dates from this day and on" + print " end-date YYYYMMDD. Only use dates up to and including this day" + print " training Remove this many days from the beginning of the verification." + print " climFile NetCDF containing climatology data. Subtract all forecasts and" + print " obs with climatology values." + print " ClimFile NetCDF containing climatology data. Divide all forecasts and" + print " obs by climatology values." + print " xlim Force x-axis limits to the two values lower,upper" + print " ylim Force y-axis limits to the two values lower,upper" + print " clim Force colorbar limits to the two values lower,upper" + print " type One of 'plot' (default),'text', 'map', or 'maprank'." + print " -hist Plot values as histogram. Only works for non-derived metrics" + print " -sort Plot values sorted. Only works for non-derived metrics" + print " -acc Plot accumulated values. Only works for non-derived metrics" + print " legend Comma-separated list of legend titles. Use '_' to represent space." + print " imageFile Save image to this filename" + print " figSize Set figure size width,height (in inches). Default 8x6." + print " dpi Resolution of image in dots per inch (default 100)" + print " binType One of 'below', 'within', or 'above'. For threshold plots (ets, hit, within, etc)" + print " 'below/above' computes frequency below/above the threshold, and 'within' computes" + print " the frequency between consecutive thresholds." + print " -nomargin Remove margins (whitespace) in the plot" + print " not x[i] <= T." + print " -debug Show statistics about files" + print " cType Collapsing type: 'min', 'mean', 'median', or 'max. When a score from the file is plotted" + print " (such as -m 'fcst'), the min/mean/meadian/max will be shown for each value on the x-axis" + print " markerSize How big should markers be?" + print " lineWidth How wide should lines be?" + print " XRotation Rotation angle for x-axis labels" + print " tickFontSize Font size for axis ticks" + print " labFontSize Font size for axis labels" + print " legFontSize Font size for legend" + print " MajorTickLength Length of major tick marks" + print " MinorTickLength Length of minor tick marks" + print " MajorTickWidth Adjust the thickness of the major tick marks" + print " Bottom Bottom boundary location for saved figure [range 0-1]" + print " Top Top boundary location for saved figure [range 0-1]" + print " Left Left boundary location for saved figure [range 0-1]" + print " Right Right boundary location for saved figure [range 0-1]" + print " -sp Show a line indicating the perfect score" + print " -ylabel Custom Y-axis Label" + print " -xlabel Custom X-axis Label" + print " -title Custom Title to Chart Top" + print "" + metrics = Metric.getAllMetrics() + outputs = Output.getAllOutputs() + print Common.green("Metrics (-m):") + for m in metrics+outputs: + name = m[0].lower() + desc = m[1].summary() + if(desc != ""): + print " %-14s%s" % (name, textwrap.fill(desc, 80).replace('\n', '\n ')), + print "" + if(data != None): + print "" + print " Or one of the following, which plots the raw score from the file:" + metrics = data.getMetrics() + for metric in metrics: + print " " + metric + +if __name__ == '__main__': + main()