Updated documentation to match ShakeMap coding stndard;added function…

…ality to retrieve PAGER results;tests for same.
usgs · Nov 28, 2018 · 0e3d6cb · 0e3d6cb
1 parent cf964c7
commit 0e3d6cb
Show file tree

Hide file tree

Showing 9 changed files with 1,332 additions and 932 deletions.
diff --git a/bin/findid b/bin/findid
@@ -12,7 +12,8 @@ import numpy as np
 
 # local imports
 from libcomcat.search import search
-from libcomcat.utils import get_summary_data_frame, maketime
+from libcomcat.utils import maketime
+from libcomcat.dataframes import get_summary_data_frame
 
 # constants
 TIMEFMT = '%Y-%m-%dT%H:%M:%S'
@@ -25,15 +26,15 @@ pd.set_option('display.max_colwidth', 100)
 
 
 def get_parser():
-    desc = '''Find the id(s) of the closest earthquake to input parameters. 
+    desc = '''Find the id(s) of the closest earthquake to input parameters.
 
-    To print the authoritative id of the event closest in time and space 
+    To print the authoritative id of the event closest in time and space
     inside a 100 km, 16 second window to "2017-08-30 03:00:33 UTC 37.571   118.888":
-    
-    
+
+
     %(prog)s  2017-08-30T03:00:33 37.571 -118.888
 
-    To make a similar query but with the time shifted by 2 minutes, and a 
+    To make a similar query but with the time shifted by 2 minutes, and a
     custom time window of 3 minutes:
 
     %(prog)s  -w 180 2017-08-30T03:00:33 37.571 -118.888
@@ -52,7 +53,7 @@ def get_parser():
 
     Notes:
      - The time format at the command line must be of the form "YYYY-MM-DDTHH:MM:SS".  The time format in an input csv file
-     can be either :YYYY-MM-DDTHH:MM:SS" OR "YYYY-MM-DD HH:MM:SS".  This is because on the command line the argument parser 
+     can be either :YYYY-MM-DDTHH:MM:SS" OR "YYYY-MM-DD HH:MM:SS".  This is because on the command line the argument parser
      would be confused by the space between the date and the time, whereas in the csv file the input files are being split
      by commas.
      - Supplying the -a option with the -f option has no effect.
@@ -100,7 +101,8 @@ def get_event_info(time, lat, lon, twindow, radius):
     for idx, row in df.iterrows():
         distance, az, azb = gps2dist_azimuth(
             lat, lon, row['latitude'], row['longitude'])
-        dtime = row['time'] - time
+        row_time = row['time'].to_pydatetime()
+        dtime = row_time - time
         dt = np.abs(dtime.days * 86400 + dtime.seconds)
         df.loc[idx, 'distance'] = distance
         df.loc[idx, 'timedelta'] = dt

diff --git a/bin/getcsv b/bin/getcsv
@@ -3,9 +3,9 @@ import argparse
 import sys
 
 from libcomcat.search import search, count
-from libcomcat.utils import (get_detail_data_frame,
-                             get_summary_data_frame,
-                             maketime)
+from libcomcat.utils import maketime
+from libcomcat.dataframes import (get_detail_data_frame,
+                                  get_summary_data_frame)
 
 
 def get_parser():

diff --git a/bin/getpager b/bin/getpager
@@ -2,53 +2,89 @@
 import argparse
 import sys
 
-from libcomcat.search import search, count
-from libcomcat.utils import (get_detail_data_frame,
-                             get_summary_data_frame,
-                             maketime)
+# third party imports
+import pandas as pd
+import openpyxl
+from openpyxl.styles import Font, Color, colors
+
+# local imports
+from libcomcat.search import search, count, get_event_by_id
+from libcomcat.classes import SummaryEvent
+from libcomcat.utils import maketime
+from libcomcat.dataframes import get_pager_data_frame
+
+
+def add_headers(filename, file_format):
+    headers = ['#This data represents the results of running the PAGER exposure',
+               '#and loss algorithms on the output from ShakeMap.',
+               '#Notes: "Total" in the country column indicates that the',
+               '#results in that row are the sum of exposures/losses for',
+               '#all affected countries.',
+               '#"predicted_fatalities" and "predicted_dollars" are the',
+               '#results of applying loss models to the exposure data -',
+               '#note that these values are not guaranteed to match the',
+               '#actual losses from the earthquake.']
+    if file_format == 'csv':
+        data = open(filename, 'rt').read()
+        headertext = '\n'.join(headers) + '\n'
+        data = headertext + data
+        with open(filename, 'wt') as f:
+            f.write(data)
+    else:
+        font = Font(color=colors.RED, bold=True)
+        wb = openpyxl.load_workbook(filename)
+        ws = wb.active
+        ws.insert_rows(1, amount=len(headers))
+        for cellidx in range(0, len(headers)):
+            coordinate = 'A%i' % (cellidx+1)
+            ws[coordinate] = headers[cellidx].strip('#')
+            cell = ws[coordinate]
+            cell.font = font
+            wb.save(filename)
+            wb.close()
 
 
 def get_parser():
     desc = '''Download PAGER exposure/loss results in line format (csv, tab, etc.).
 
-    To download basic PAGER information (total exposure) for a box around New Zealand from 2013 
+    To download basic PAGER information (total exposure) for events around New Zealand from 2010
     to the present in CSV format:
 
-    %(prog)s nz.csv -b 163.213 -178.945 -48.980 -32.324 -s 2013-01-01 -f csv
+    %(prog)s nz_exposures.csv -f csv -s 2010-01-01 -m 5.5 9.9 -b 163.213 -178.945 -48.980 -32.324
 
     To download the same information in Excel format:
 
-    %(prog)s nz.csv -b 163.213 -178.945 -48.980 -32.324 -s 2013-01-01 -f excel
+    %(prog)s nz_exposures.xlsx -f excel -s 2010-01-01 -m 5.5 9.9 -b 163.213 -178.945 -48.980 -32.324
+
+    To add loss information (see notes below), you can use the -l flag:
+
+    %(prog)s nz_exposures.xlsx -f excel -s 2010-01-01 -m 5.5 9.9 -b 163.213 -178.945 -48.980 -32.324 -l
+
+    To add exposures on a per-country basis (see notes below), you can use the -c flag:
+
+    %(prog)s nz_exposures.xlsx -f excel -s 2010-01-01 -m 5.5 9.9 -b 163.213 -178.945 -48.980 -32.324 -c
 
     NOTES:
 
-    Any start or end time where only date is specified (YYYY-mm-dd) will
+    1) Any start or end time where only date is specified (YYYY-mm-dd) will
     be translated to the beginning of that day.  Thus, a start time of
     "2015-01-01" becomes "2015-01-01T:00:00:00" and an end time of "2015-01-02"
     becomes ""2015-01-02T:00:00:00".
 
-    Older events may not have the predicted loss information in ComCat - in those 
+    2) Older events may not have the predicted loss information in ComCat - in those
     cases, predicted losses and uncertainties will be filled in with NaN values.
 
-    Note that when specifying a search box that crosses the -180/180 meridian,
+    3) Older events may not have the per-country exposure information available in
+    ComCat.
+
+    4) Note that when specifying a search box that crosses the -180/180 meridian,
     you simply specify longitudes as you would if you were not crossing that
     meridian (i.e., lonmin=179, lonmax=-179).  The program will resolve the
     discrepancy.
 
-    The ComCat API has a returned event limit of 20,000.  Queries that
+    5) The ComCat API has a returned event limit of 20,000.  Queries that
     exceed this ComCat limit ARE supported by this software, by
-    breaking up one large request into a number of smaller ones.
-    However, large queries, when also configured to retrieve moment
-    tensor parameters, nodal plane angles, or moment tensor type can
-    take a very long time to download. This delay is caused by the
-    fact that when this program has to retrieve moment tensor
-    parameters, nodal plane angles, or moment tensor type, it must
-    open a URL for EACH event and parse the data it finds.  If these
-    parameters are not requested, then the same request will return in
-    much less time (~10 minutes or less for a 20,000 event query).
-    Queries for all magnitude solutions will take even more time, as
-    this requires parsing an XML file for each event and extracting
-    the magnitude values and associated source and type.  '''
+    breaking up one large request into a number of smaller ones.'''
 
     parser = argparse.ArgumentParser(
         description=desc, formatter_class=argparse.RawDescriptionHelpFormatter)
@@ -83,6 +119,25 @@ def get_parser():
     parser.add_argument('-m', '--mag-range', metavar=('minmag', 'maxmag'),
                         dest='magRange', type=float, nargs=2,
                         help=helpstr)
+    parser.add_argument('-f', '--format', dest='format',
+                        choices=['csv', 'tab', 'excel'], default='csv',
+                        metavar='FORMAT', help='Output format.')
+
+    losshelp = 'Retrieve fatalities and economic losses'
+    parser.add_argument('-l', '--get-losses', help=losshelp, action='store_true',
+                        default=False)
+
+    countryhelp = 'Retrieve information from all countries affected by earthquake'
+    parser.add_argument('-c', '--get-countries', help=countryhelp, action='store_true',
+                        default=False)
+
+    versionhelp = 'Retrieve information from all versions of PAGER'
+    parser.add_argument('-a', '--all-versions', help=versionhelp, action='store_true',
+                        default=False)
+
+    versionhelp = 'Retrieve information from a single PAGER event'
+    parser.add_argument('-i', '--eventid', help=versionhelp,
+                        metavar='EVENTID')
 
     parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
                         help='Print progress')
@@ -124,59 +179,64 @@ def main():
         print('Please specify either a bounding box OR radius search.')
         sys.exit(1)
 
-    events = search(starttime=args.startTime,
-                    endtime=args.endTime,
-                    updatedafter=args.after,
-                    minlatitude=latmin,
-                    maxlatitude=latmax,
-                    minlongitude=lonmin,
-                    maxlongitude=lonmax,
-                    latitude=latitude,
-                    longitude=longitude,
-                    maxradiuskm=radiuskm,
-                    maxmagnitude=maxmag,
-                    minmagnitude=minmag,
-                    producttype='losspager',
-                    host=args.host,
-                    verbose=args.verbose)
+    if args.eventid:
+        event = get_event_by_id(args.eventid,
+                                includesuperseded=args.all_versions)
+        events = [event]
+    else:
+        events = search(starttime=args.startTime,
+                        endtime=args.endTime,
+                        updatedafter=args.after,
+                        minlatitude=latmin,
+                        maxlatitude=latmax,
+                        minlongitude=lonmin,
+                        maxlongitude=lonmax,
+                        latitude=latitude,
+                        longitude=longitude,
+                        maxradiuskm=radiuskm,
+                        maxmagnitude=maxmag,
+                        minmagnitude=minmag,
+                        producttype='losspager',
+                        host=args.host,
+                        verbose=args.verbose)
 
     if not len(events):
         print('No events found matching your search criteria. Exiting.')
         sys.exit(0)
 
-    if args.getAngles != 'none' or args.getAllMags or args.getComponents != 'none':
-        if args.verbose:
-            sys.stderr.write(
-                'Fetched %i events...creating table.\n' % (len(events)))
-
-        df = get_detail_data_frame(events, get_all_magnitudes=args.getAllMags,
-                                   get_tensors=args.getComponents,
-                                   get_focals=args.getAngles,
-                                   get_moment_supplement=args.getMomentSupplement,
-                                   verbose=args.verbose)
-    else:
-        if args.verbose:
-            sys.stderr.write(
-                'Fetched %i events...creating summary table.\n' % (len(events)))
-        df = get_summary_data_frame(events)
-
-    # order the columns so that at least the initial parameters come the way we want them...
-    first_columns = list(events[0].toDict().keys())
-    col_list = list(df.columns)
-    for column in first_columns:
-        col_list.remove(column)
-    df = df[first_columns + col_list]
+    dataframe = None
+    nevents = len(events)
+    i = 1
+    for event in events:
+        if args.verbose and (i == 1 or (not i % (nevents//10))):
+            sys.stderr.write('Processing event %s (%i of %i).\n' %
+                             (event.id, i, nevents))
+        i += 1
+        if isinstance(event, SummaryEvent):
+            detail = event.getDetailEvent(includesuperseded=args.all_versions)
+        else:
+            detail = event
+        df = get_pager_data_frame(detail, get_losses=args.get_losses,
+                                  get_country_exposures=args.get_countries,
+                                  get_all_versions=args.all_versions)
+        if dataframe is None:
+            dataframe = df
+        else:
+            dataframe = pd.concat([dataframe, df])
 
     if args.verbose:
         sys.stderr.write('Created table...saving %i records to %s.\n' %
-                         (len(df), args.filename))
+                         (len(dataframe), args.filename))
     if args.format == 'csv':
-        df.to_csv(args.filename, index=False, chunksize=1000)
+        dataframe.to_csv(args.filename, index=False, chunksize=1000)
     elif args.format == 'tab':
-        df.to_csv(args.filename, sep='\t', index=False)
+        dataframe.to_csv(args.filename, sep='\t', index=False)
     else:
-        df.to_excel(args.filename, index=False)
-    print('%i records saved to %s.' % (len(df), args.filename))
+        dataframe.to_excel(args.filename, index=False)
+
+    add_headers(args.filename, args.format)
+
+    print('%i records saved to %s.' % (len(dataframe), args.filename))
     sys.exit(0)