Merge pull request #722 from emthompson-usgs/clipnn

Add ANN clipping code and test
usgs · Sep 21, 2021 · 57a9994 · 57a9994
2 parents d3c99f3 + bc33cde
commit 57a9994
Show file tree

Hide file tree

Showing 16 changed files with 453 additions and 145 deletions.
diff --git a/gmprocess/data/nn_clipping/bias_1.csv b/gmprocess/data/nn_clipping/bias_1.csv
@@ -0,0 +1,16 @@
+4.761942028999328613e-01
+0.000000000000000000e+00
+-1.225080061703920364e-02
+-1.355059593915939331e-01
+-1.208401098847389221e-01
+-7.738453336060047150e-03
+4.859382510185241699e-01
+4.901650547981262207e-01
+-2.172667719423770905e-02
+4.954338967800140381e-01
+-1.108687669038772583e-01
+2.171477675437927246e-01
+0.000000000000000000e+00
+-2.487021684646606445e-02
+-1.521449834108352661e-01
+-1.403292119503021240e-01
diff --git a/gmprocess/data/nn_clipping/bias_output.csv b/gmprocess/data/nn_clipping/bias_output.csv
@@ -0,0 +1 @@
+-2.244666516780853271e-01
diff --git a/gmprocess/data/nn_clipping/masterF.txt b/gmprocess/data/nn_clipping/masterF.txt
@@ -0,0 +1 @@
+5, 16,'relu',1, 'sigmoid'
diff --git a/gmprocess/data/nn_clipping/weight_1.csv b/gmprocess/data/nn_clipping/weight_1.csv
@@ -0,0 +1,5 @@
+3.450458645820617676e-01,-7.405894994735717773e-02,-1.254874318838119507e-01,6.901898235082626343e-02,6.200880929827690125e-02,-5.206618085503578186e-02,3.653981387615203857e-01,3.055597841739654541e-01,-1.107454746961593628e-01,3.208941221237182617e-01,2.763029001653194427e-02,1.331859640777111053e-02,-6.405838578939437866e-02,-6.867430359125137329e-02,3.231020644307136536e-02,2.827584557235240936e-02
+3.734845221042633057e-01,2.110916376113891602e-02,-8.760424703359603882e-02,2.904709428548812866e-02,2.542885206639766693e-02,1.748742721974849701e-02,3.287482857704162598e-01,3.899096846580505371e-01,1.059170365333557129e-01,3.266265094280242920e-01,-2.623116225004196167e-02,-7.134835422039031982e-02,-3.654503822326660156e-02,5.247752740979194641e-02,-1.276246458292007446e-01,-8.618847280740737915e-02
+-2.637018561363220215e-01,9.931030683219432831e-03,7.612888514995574951e-02,4.233799874782562256e-01,3.749962747097015381e-01,-3.536137193441390991e-02,-2.761034965515136719e-01,-2.436323314905166626e-01,-4.983592405915260315e-02,-2.751033008098602295e-01,3.659239709377288818e-01,-4.732813537120819092e-01,-3.619530797004699707e-02,2.397446148097515106e-02,4.228172004222869873e-01,3.860528171062469482e-01
+-6.323192119598388672e-01,4.159574955701828003e-02,-5.320734810084104538e-03,6.299901008605957031e-01,6.613637804985046387e-01,-6.171951442956924438e-02,-6.351884603500366211e-01,-6.061330437660217285e-01,1.258744485676288605e-02,-5.780773162841796875e-01,6.167728900909423828e-01,-4.708586633205413818e-01,2.903726417571306229e-03,3.173625096678733826e-02,6.945689320564270020e-01,6.336471438407897949e-01
+-7.286261767148971558e-02,-1.223115250468254089e-02,5.675805732607841492e-02,3.030084669589996338e-01,2.992148697376251221e-01,3.216066956520080566e-02,-1.026946008205413818e-01,-3.335395455360412598e-02,2.125462144613265991e-02,-9.568803012371063232e-02,2.686659991741180420e-01,3.815599381923675537e-01,3.298680111765861511e-02,1.124517992138862610e-02,2.315190583467483521e-01,2.518956959247589111e-01
diff --git a/gmprocess/data/nn_clipping/weight_output.csv b/gmprocess/data/nn_clipping/weight_output.csv
@@ -0,0 +1,16 @@
+-5.802513360977172852e-01
+-4.969757422804832458e-02
+-1.531651243567466736e-02
+5.004003047943115234e-01
+5.555523037910461426e-01
+1.285628415644168854e-02
+-4.915536046028137207e-01
+-5.825893878936767578e-01
+5.253348499536514282e-02
+-5.880233645439147949e-01
+5.758425593376159668e-01
+-6.398365497589111328e-01
+8.936516940593719482e-03
+-5.767295602709054947e-03
+5.209476947784423828e-01
+5.556588172912597656e-01
diff --git a/gmprocess/version.py b/gmprocess/version.py
@@ -1 +1 @@
-__version__ = "1.1.9.dev7+g4d21e34.d20210915"
+__version__ = "1.1.9.dev20+gd3c99f3.d20210920"
diff --git a/gmprocess/waveform_processing/clipping/clip_detection.py b/gmprocess/waveform_processing/clipping/clip_detection.py
@@ -1,3 +1,6 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
 class ClipDetection():
     '''
     Parent class for clipping detection algorithms.
@@ -18,6 +21,7 @@ class ClipDetection():
         _get_results():
             Iterates through and runs _detect() on each trace in the stream.
     '''
+
     def __init__(self, st, test_all=False):
         '''
         Constructs all neccessary attributes for the ClipDetection method
@@ -32,7 +36,7 @@ def __init__(self, st, test_all=False):
         self.st = st.copy()
         self.is_clipped = False
         self.test_all = test_all
-    
+
     def _clean_trace(self, tr):
         '''
         Helper function to clean the trace
@@ -86,4 +90,4 @@ def _get_results(self):
                 if self.test_all:
                     continue
                 else:
-                    break
+                    break
diff --git a/gmprocess/waveform_processing/clipping/clipping_ann.py b/gmprocess/waveform_processing/clipping/clipping_ann.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Module for implementing the Artificial Neural Net model for clipping, as
+developed by Kleckner et al. This code is based on Xavier Bellagamba's python
+NN implementation of "A neural network for automated quality screening of
+ground motion records from small magnitude earthquakes"
+DOI: 10.1193/122118EQS292M
+"""
+
+import csv
+import numpy as np
+import pkg_resources
+import os
+
+# Path to model data
+NN_PATH = os.path.join('data', 'nn_clipping')
+NN_PATH = pkg_resources.resource_filename('gmprocess', NN_PATH)
+
+
+class clipNet():
+    '''
+    Class allowing the instantiation and use of simple (1 or 2 layers)
+    neural networks
+    '''
+
+    def __init__(self):
+        '''
+        Instantiate an empty neural network (no weights, functions, or
+        biases loaded
+        '''
+        self.n_input = 0
+        self.n_neuron_H1 = 0
+        self.n_neuron_H2 = -1
+        self.n_output = 0
+        self.activation_H1 = 'NA'
+        self.activation_H2 = 'NA'
+        self.activation_output = 'NA'
+        self.w_H1 = []
+        self.w_H2 = []
+        self.b_H1 = []
+        self.b_H2 = []
+        self.w_output = []
+        self.b_output = []
+
+        data_path = os.path.join(NN_PATH, 'masterF.txt')
+        with open(data_path) as masterF:
+            readCSV = csv.reader(masterF)
+            for row in readCSV:
+                if len(row) == 7:
+                    self.n_input = int(row[0])
+                    self.n_neuron_H1 = int(row[1])
+                    # self.n_neuron_H2 = int(row[3])
+                    self.n_output = int(row[5])
+                    self.activation_H1 = row[2]
+                    # self.activation_H2 = row[4]
+                    self.activation_output = row[6]
+                elif len(row) == 5:
+                    self.n_input = int(row[0])
+                    self.n_neuron_H1 = int(row[1])
+                    self.n_output = int(row[3])
+                    self.activation_H1 = row[2]
+                    self.activation_output = row[4]
+
+        masterF.close()
+
+        # Load weights and biases
+        # Weights first hidden layer
+        data_path = os.path.join(NN_PATH, 'weight_1.csv')
+        self.w_H1 = np.asarray(loadCSV(data_path))
+
+        # Biases first hidden layer
+        data_path = os.path.join(NN_PATH, 'bias_1.csv')
+        self.b_H1 = np.asarray(loadCSV(data_path))
+
+        # Weights output layer
+        data_path = os.path.join(NN_PATH, 'weight_output.csv')
+        self.w_output = np.asarray(loadCSV(data_path))
+
+        # Biases output layer
+        data_path = os.path.join(NN_PATH, 'bias_output.csv')
+        self.b_output = np.asarray(loadCSV(data_path))
+
+        # Second hidden layer
+        if self.n_neuron_H2 != -1:
+            # Weights second hidden layer
+            data_path = os.path.join(NN_PATH, 'weight_2.csv')
+            self.w_H2 = np.asarray(loadCSV(data_path))
+
+            # Biases second hidden layer
+            data_path = os.path.join(NN_PATH, 'bias_2.csv')
+            self.b_H2 = np.asarray(loadCSV(data_path))
+
+    def evaluate(self, v_input):
+        '''
+        Use a populated neural network (i.e. from the input, returns the
+        classification score or the regression result).
+
+        Args:
+            v_input (list or np.array):
+                Values to correspond to the following paramters: mag, dist, 6M
+                amplitude check, histogram check, ping check.
+
+        Returns:
+            np.array: numpy array containing the results.
+        '''
+        # Transform input if required
+        if isinstance(v_input, list):
+            v_input = np.asarray(v_input)
+
+        t1 = np.array([8.8, 445.8965938, 1., 1., 1.])
+        t2 = np.array([4, 0.68681514, 0., 0., 0.])
+        t3 = np.array([0., 0., 0., 0., 0.])
+        v_input = 2.0 / (t1 - t2) * (v_input - t3)
+
+        v_inter = np.array([])
+
+        # First layer
+        if self.activation_H1 == 'sigmoid':
+            v_inter = sigmoid(np.dot(v_input.T, self.w_H1) + self.b_H1)
+        elif self.activation_H1 == 'tanh':
+            v_inter = tanh(np.dot(v_input.T, self.w_H1) + self.b_H1)
+        elif self.activation_H1 == 'relu':
+            v_inter = relu(np.dot(v_input.T, self.w_H1) + self.b_H1)
+        else:
+            v_inter = relu(np.dot(v_input.T, self.w_H1) + self.b_H1.T)
+
+        # If second layer exist
+        if self.n_neuron_H2 != -1:
+            if self.activation_H2 == 'sigmoid':
+                v_inter = sigmoid(np.dot(v_inter, self.w_H2) + self.b_H2)
+            elif self.activation_H2 == 'tanh':
+                v_inter = tanh(np.dot(v_inter, self.w_H2) + self.b_H2)
+            else:
+                v_inter = np.dot(v_inter, self.w_H2) + self.b_H2
+
+        # Final layer
+        if self.activation_output == 'sigmoid':
+            v_inter = sigmoid(np.dot(v_inter, self.w_output) + self.b_output)
+        elif self.activation_output == 'tanh':
+            v_inter = tanh(np.dot(v_inter, self.w_output) + self.b_output)
+        else:
+            v_inter = sigmoid(np.dot(v_inter, self.w_output) + self.b_output)
+
+        return v_inter
+
+
+def loadCSV(data_path, row_ignore=0, col_ignore=0):
+    '''
+    Load csv files from a given path and returns a list of list.
+    For all imported data, check if is a number. If so, returns a
+    float. If not, returns a string.
+
+    Args:
+        data_path (string):
+            path to the csv to load.
+        row_ignore (int):
+            number of rows to ignore.
+        col_ignore (int):
+            number of columns to ignore.
+
+    Returns:
+        list of list: containing the data from the csv
+    '''
+
+    M = []
+    with open(data_path) as csvfile:
+        readCSV = csv.reader(csvfile)
+
+        # Skip header
+        for i in range(row_ignore):
+            next(csvfile)
+
+        for row in readCSV:
+            # Input vector
+            single_line = []
+            for i in range(col_ignore, len(row)):
+                if isNumber(row[i]):
+                    single_line.append(float(row[i]))
+                else:
+                    single_line.append(row[i])
+            M.append(single_line)
+
+    return M
+
+
+def sigmoid(v_input):
+    '''
+    Performs a sigmoid operation on the input (1/(e(-x)+1))
+
+    Args:
+        v_input (float):
+            a number defined on R (real).
+
+    Returns:
+        float: sigmoid result (a number between 0 and 1).
+    '''
+    v_act = []
+    for x in v_input:
+        v_act.append(1. / (1 + np.exp(-x)))
+    return v_act
+
+
+def tanh(v_input):
+    '''
+    Performs a hyperbolic tangent operation on the input (2/(e(2x)+1))
+
+    Args:
+        v_input (float):
+            a number defined on R (real).
+
+    Returns:
+        float: tanh result (a number between -1 and 1).
+    '''
+    v_act = []
+    for x in v_input:
+        v_act.append(np.tanh(x))
+    return v_act
+
+
+def relu(v_input):
+    '''
+    Performs a hyperbolic tangent operation on the input (2/(e(2x)+1))
+
+    Args:
+        v_input (float):
+            a number defined on R (real).
+
+    Returns:
+        float: tanh result (a number between -1 and 1).
+    '''
+    v_act = []
+    for x in v_input:
+        v_act.append(np.maximum(0.0, x))
+    return v_act
+
+
+def isNumber(s):
+    '''
+    Check if given input is a number.
+
+    Args:
+        s (any type):
+            Data to test.
+
+    Returns:
+        bool: True if is a number, False if isn't
+    '''
+    try:
+        float(s)
+        return True
+
+    except ValueError:
+        return False
diff --git a/gmprocess/waveform_processing/clipping/histogram.py b/gmprocess/waveform_processing/clipping/histogram.py
@@ -1,3 +1,6 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
 import numpy as np
 from statsmodels.tsa.holtwinters import ExponentialSmoothing
 from gmprocess.waveform_processing.clipping.clip_detection import ClipDetection
@@ -57,14 +60,6 @@ def __init__(self, st, num_bins=6200, min_width=7,
             self.num_clip_intervals = None
         self._get_results()
 
-    def _clean_trace(self, tr):
-        '''
-        Helper function to clean a trace.
-
-        See parent class.
-        '''
-        return ClipDetection._clean_trace(self, tr)
-
     def _signal_scale(self, signal, alpha):
         '''
         Helper function to scale signal data

diff --git a/gmprocess/waveform_processing/clipping/jerk.py b/gmprocess/waveform_processing/clipping/jerk.py
@@ -1,3 +1,6 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
 import numpy as np
 from gmprocess.waveform_processing.clipping.clip_detection import ClipDetection
 
@@ -45,14 +48,6 @@ def __init__(self, st, point_thresh=25, test_all=False):
             self.num_outliers = None
         self._get_results()
 
-    def _clean_trace(self, tr):
-        '''
-        Helper function to clean a trace.
-
-        See parent class.
-        '''
-        return ClipDetection._clean_trace(self, tr)
-
     def _detect(self, tr):
         '''
         Check for jerk outliers. Based on method described by:
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		__version__ = "1.1.9.dev7+g4d21e34.d20210915"
		__version__ = "1.1.9.dev20+gd3c99f3.d20210920"