Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Subspace Freq domain statistic #92

Merged
merged 16 commits into from
Jun 15, 2017
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ install:
- "conda create -q --yes -n test python=%PYTHON_VERSION%"
- "activate test"
# Install default dependencies
- "conda install -q --yes pip numpy scipy opencv3 matplotlib obspy mock flake8 pyflakes=0.9.0 cython h5py bottleneck"
- "conda install -q --yes pip numpy scipy opencv3 matplotlib==1.5.3 obspy==1.0.3 mock flake8 pyflakes=0.9.0 cython h5py bottleneck"
# additional dependecies
# - "choco install opencv"
# - "powershell copy-item C:\\OpenCV\\opencv\\build\\python\\2.7\\x64\\cv2.pyd C:\\conda\\envs\\test\\lib\\site-packages\\."
Expand Down
13 changes: 8 additions & 5 deletions eqcorrscan/core/match_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1349,8 +1349,7 @@ def __init__(self, name=None, st=None, lowcut=None, highcut=None,
self.process_length = process_length
self.prepick = prepick
if event is not None:
if len(event.comments) > 0 and \
"eqcorrscan_template_" + temp_name not in \
if "eqcorrscan_template_" + temp_name not in \
[c.text for c in event.comments]:
event.comments.append(Comment(
text="eqcorrscan_template_" + temp_name,
Expand Down Expand Up @@ -2888,7 +2887,10 @@ def _group_detect(templates, stream, threshold, threshold_type, trig_int,
:return:
:class:`eqcorrscan.core.match_filter.Party` of families of detections.
"""
ncores = cpu_count()
if parallel_process:
ncores = cpu_count()
else:
ncores = 1
st = [Stream()]
master = templates[0]
# Check that they are all processed the same.
Expand Down Expand Up @@ -3306,8 +3308,9 @@ def extract_from_stream(stream, detections, pad=5.0, length=30.0):
print('No data in stream for pick:')
print(pick)
continue
cut_stream += tr.copy().trim(starttime=pick.time - pad,
endtime=pick.time - pad + length)
cut_stream += tr.slice(
starttime=pick.time - pad,
endtime=pick.time - pad + length).copy()
streams.append(cut_stream)
return streams

Expand Down
66 changes: 47 additions & 19 deletions eqcorrscan/core/subspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from __future__ import unicode_literals

import numpy as np
import scipy
import warnings
import time
import h5py
Expand Down Expand Up @@ -473,7 +472,6 @@ def _detect(detector, st, threshold, trig_int, moveout=0, min_trig=0,
:return: list of detections
:rtype: list of eqcorrscan.core.match_filter.Detection
"""
from eqcorrscan.core import subspace_statistic
detections = []
# First process the stream
if process:
Expand All @@ -493,27 +491,25 @@ def _detect(detector, st, threshold, trig_int, moveout=0, min_trig=0,
stream = [st]
stachans = detector.stachans
outtic = time.clock()
if debug > 0:
print('Computing detection statistics')
# If multiplexed, how many samples do we increment by?
if detector.multiplex:
inc = np.uint32(len(detector.stachans))
Nc = len(detector.stachans)
else:
inc = np.uint32(1)
# Stats must be same size for multiplexed or non multiplexed!
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maintain indent for comment here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment removed in some recent commit.

Nc = 1
# Here do all ffts
fft_vars = do_ffts(detector, stream, Nc)
if debug > 0:
print('Computing detection statistics')
if debug > 0:
print('Preallocating stats matrix')
stats = np.zeros((len(stream[0]),
(len(stream[0][0]) // inc) -
(len(detector.data[0].T[0]) // inc) + 1),
dtype=np.float32)
# Hard typing in Cython loop requires float32 type.
for det_channel, in_channel, i in zip(detector.data, stream[0],
np.arange(len(stream[0]))):
stats[i] = subspace_statistic.\
det_statistic(detector=det_channel.astype(np.float32),
data=in_channel.data.astype(np.float32),
inc=inc)
(len(stream[0][0]) // Nc) - (fft_vars[4] // Nc) + 1))
for det_freq, data_freq_sq, data_freq, i in zip(fft_vars[0], fft_vars[1],
fft_vars[2],
np.arange(len(stream[0]))):
# Calculate det_statistic in frequency domain
stats[i] = det_stat_freq(det_freq, data_freq_sq, data_freq,
fft_vars[3], Nc, fft_vars[4], fft_vars[5])
if debug >= 1:
print('Stats matrix is shape %s' % str(stats[i].shape))
if debug >= 3:
Expand All @@ -525,8 +521,7 @@ def _detect(detector, st, threshold, trig_int, moveout=0, min_trig=0,
ax.plot([min(t), max(t)], [threshold, threshold], color='r', lw=1,
label='Threshold')
ax.legend()
plt.title('%s.%s' % (in_channel.stats.station,
in_channel.stats.channel))
plt.title('%s' % str(stream[0][i].stats.station))
plt.show()
trig_int_samples = detector.sampling_rate * trig_int
if debug > 0:
Expand Down Expand Up @@ -586,6 +581,39 @@ def _detect(detector, st, threshold, trig_int, moveout=0, min_trig=0,
return detections


def do_ffts(detector, stream, Nc):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be a private method? If so it should start with an underscore, if not probably needs a docstring. I think a good rule is to add docstring to all functions. For private functions/methods a simple sentence or two that explains what it does is fine, but all public methods should have fancy formatted docstrings sphinx can use.

min_fftlen = int(stream[0][0].data.shape[0] +
detector.data[0].shape[0] - Nc)
fftlen = 1 << min_fftlen.bit_length()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure this is optimal anymore... I know scipy doesn't require a power of 2 length (although the docs do say it is optimal I suspect it is no longer the case). Consider this (note I am using Anaconda which uses the MLK ):

import numpy as np
import scipy.fftpack

ar_len = 3600 * 400 + 12  # a resonable size for a seismic trace
nearest_2 = 1 << (ar_len - 1).bit_length()
nearest_fast = scipy.fftpack.next_fast_len(ar_len)

%time np.fft.fft(ar, n=nearest_2)  # takes 386 ms
%time np.fft.fft(ar, n=nearest_fast)  # takes 228 ms

Moreover, if you are only expecting real inputs (A fair assumption in seismology no?) consider using rfft:

%time np.fft.rfft(ar, n=nearest_2)  # takes 200 ms
%time np.fft.rfft(ar, n=nearest_fast)  # takes 123 ms

Using rfft with next_fast_length would give nearly a 4x speed up (similar on ifft)

mplen = stream[0][0].data.shape[0]
ulen = detector.data[0].shape[0]
num_st_fd = [np.fft.fft(tr.data, n=fftlen)
for tr in stream[0]]
denom_st_fd = [np.fft.fft(np.square(tr.data), n=fftlen)
for tr in stream[0]]
# Frequency domain of boxcar
w = np.fft.fft(np.ones(detector.data[0].shape[0]),
n=fftlen)
# This should go into the detector object as in Detex
detector_fd = []
for dat_mat in detector.data:
detector_fd.append(np.array([np.fft.fft(col[::-1], n=fftlen)
for col in dat_mat.T]))
return detector_fd, denom_st_fd, num_st_fd, w, ulen, mplen


def det_stat_freq(det_freq, data_freq_sq, data_freq, w, Nc, ulen, mplen):
num_cor = np.multiply(det_freq, data_freq) # Numerator convolution
den_cor = np.multiply(w, data_freq_sq) # Denominator convolution
# Do inverse fft
# First and last Nt - 1 samples are invalid; clip them off
num_ifft = np.real(np.fft.ifft(num_cor))[:, ulen-1:mplen:Nc]
denominator = np.real(np.fft.ifft(den_cor))[ulen-1:mplen:Nc]
# Ratio of projected to envelope energy = det_stat across all channels
result = np.sum(np.square(num_ifft), axis=0) / denominator
return result


def _subspace_process(streams, lowcut, highcut, filt_order, sampling_rate,
multiplex, align, shift_len, reject, no_missed=True,
stachans=None, parallel=False, plot=False):
Expand Down
67 changes: 0 additions & 67 deletions eqcorrscan/core/subspace_statistic.pyx

This file was deleted.

8 changes: 4 additions & 4 deletions eqcorrscan/tests/clustering_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,10 @@ def test_svd(self):
tr.detrend('simple')
tr.filter('bandpass', freqmin=5.0, freqmax=15.0)
tr.trim(tr.stats.starttime + 40, tr.stats.endtime - 45)
SVectors, SValues, Uvectors, stachans = svd(stream_list=stream_list)
UVectors, SValues, SVectors, stachans = svd(stream_list=stream_list)
self.assertEqual(len(SVectors), len(stachans))
self.assertEqual(len(SValues), len(stachans))
self.assertEqual(len(Uvectors), len(stachans))
self.assertEqual(len(UVectors), len(stachans))
for SVec in SVectors:
self.assertEqual(len(SVec), len(stream_list))
with warnings.catch_warnings(record=True) as w:
Expand Down Expand Up @@ -227,11 +227,11 @@ def test_svd_to_stream(self):
tr.resample(sampling_rate=samp_rate)
tr.trim(tr.stats.starttime + 40, tr.stats.endtime - 45)
SVectors, SValues, Uvectors, stachans = svd(stream_list=stream_list)
svstreams = svd_to_stream(svectors=SVectors, stachans=stachans, k=4,
svstreams = svd_to_stream(uvectors=SVectors, stachans=stachans, k=4,
sampling_rate=samp_rate)
self.assertEqual(len(svstreams), 4)
with warnings.catch_warnings(record=True) as w:
SVD_2_stream(SVectors=SVectors, stachans=stachans, k=4,
SVD_2_stream(uvectors=SVectors, stachans=stachans, k=4,
sampling_rate=samp_rate)
self.assertEqual(len(w), 1)
self.assertTrue('Depreciated' in str(w[0].message))
Expand Down
4 changes: 4 additions & 0 deletions eqcorrscan/tests/lag_calc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from eqcorrscan.core.match_filter import normxcorr2, Detection
from eqcorrscan.utils.sfile_util import read_event

warnings.simplefilter("always")


class TestMethods(unittest.TestCase):
@classmethod
Expand Down Expand Up @@ -233,6 +235,8 @@ def test_bad_interp(self):
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
_xcorr_interp(ccc, 0.1)
for _w in w:
print(_w.message)
self.assertEqual(len(w), 2)
self.assertTrue('Less than 5 samples' in str(w[0].message))
self.assertTrue('Residual in quadratic fit' in str(w[1].message))
Expand Down
19 changes: 11 additions & 8 deletions eqcorrscan/tests/subspace_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from obspy import Stream, read

from eqcorrscan.core import subspace, subspace_statistic
from eqcorrscan.core import subspace


class SimpleSubspaceMethods(unittest.TestCase):
Expand Down Expand Up @@ -87,10 +87,13 @@ def test_stat(self):
detector.partition(2)
stream = read(os.path.join(os.path.abspath(os.path.dirname(__file__)),
'test_data', 'subspace', 'test_trace.ms'))
tr_data = stream[0].data[:,None].astype(np.float32)
stat = subspace_statistic.det_statistic(
detector.data[0].astype(np.float32), tr_data, np.uint32(1))
self.assertEqual((stat.max().round(6) - 0.306929).round(6), 0)
st = [stream]
fft_vars = subspace.do_ffts(detector, st, len(detector.stachans))
stat = subspace.det_stat_freq(fft_vars[0][0], fft_vars[1][0],
fft_vars[2][0], fft_vars[3],
len(detector.stachans), fft_vars[4],
fft_vars[5])
self.assertEqual((stat.max().round(6) - 0.229755).round(6), 0)


class SubspaceTestingMethods(unittest.TestCase):
Expand Down Expand Up @@ -327,7 +330,7 @@ def test_not_multiplexed(self):
st = self.st
detections = detector.detect(st=st, threshold=0.5, trig_int=4,
debug=1, moveout=2, min_trig=5)
self.assertEqual(len(detections), 17)
self.assertEqual(len(detections), 16)

def test_multi_detectors(self):
"""Test the efficient looping in subspace."""
Expand All @@ -349,14 +352,14 @@ def test_multi_detectors(self):
trig_int=10, moveout=5,
min_trig=5,
parallel=False, num_cores=2)
self.assertEqual(len(detections), 14)
self.assertEqual(len(detections), 6)
detections = subspace.subspace_detect(detectors=[detector1, detector2],
stream=self.st.copy(),
threshold=0.7,
trig_int=10, moveout=5,
min_trig=5,
parallel=True, num_cores=2)
self.assertEqual(len(detections), 14)
self.assertEqual(len(detections), 6)

def partition_fail(self):
templates = copy.deepcopy(self.templates)
Expand Down
2 changes: 1 addition & 1 deletion eqcorrscan/tests/synth_seis_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_phaseout(self):

class TestSVDSim(unittest.TestCase):
def test_svd_sim(self):
V, s, U, stachans = SVD_sim(sp=15, lowcut=2, highcut=8, samp_rate=20)
U, s, V, stachans = SVD_sim(sp=15, lowcut=2, highcut=8, samp_rate=20)
self.assertEqual(V[0].shape[0], s[0].shape[0])
self.assertEqual(V[0].shape[0], U[0].shape[1])
self.assertEqual(len(stachans), 1)
Expand Down
2 changes: 1 addition & 1 deletion eqcorrscan/tests/tutorials_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def test_lag_calc(self):
def test_subspace(self):
"""Test the subspace tutorial."""
detections = subspace.run_tutorial(plot=False)
self.assertEqual(len(detections), 2)
self.assertEqual(len(detections), 11)

if __name__ == '__main__':
"""
Expand Down
2 changes: 1 addition & 1 deletion eqcorrscan/tutorials/subspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def run_tutorial(plot=False, multiplex=True, return_streams=False):
# We set a very low threshold because the detector is not that great, we
# haven't aligned it particularly well - however, at this threshold we make
# two real detections.
detections, det_streams = detector.detect(st=st, threshold=0.005,
detections, det_streams = detector.detect(st=st, threshold=0.3,
trig_int=2,
extract_detections=True)
if return_streams:
Expand Down
Loading