Skip to content

Commit abc4395

Browse files
cmichelenstroferssolsonakeeste
authored
Automatic threshold for peaks-over-threshold (#268)
* automatic Hs threshold * Revert "automatic Hs threshold" This reverts commit 4477580. * automatic Hs threshold * simplify & include MATLAB example for debugging * fix independent storms * Update mhkit/loads/extreme.py Co-authored-by: Adam Keester <[email protected]> * Update mhkit/loads/extreme.py Co-authored-by: Adam Keester <[email protected]> * Update mhkit/loads/extreme.py Co-authored-by: Adam Keester <[email protected]> * Update mhkit/loads/extreme.py Co-authored-by: Adam Keester <[email protected]> * cleanup * Update mhkit/tests/loads/test_loads.py * Update mhkit/tests/loads/test_loads.py Update threshold test value one more time * Update extreme.py * break out nested function, consolidate scipy imports --------- Co-authored-by: ssolson <[email protected]> Co-authored-by: Adam Keester <[email protected]> Co-authored-by: akeeste <[email protected]>
1 parent e89deab commit abc4395

File tree

3 files changed

+136
-2
lines changed

3 files changed

+136
-2
lines changed

examples/data/loads/data_loads_hs.csv

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

mhkit/loads/extreme.py

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,41 @@
11
import numpy as np
22
import pandas as pd
3-
from scipy import stats
4-
from scipy import optimize
3+
from scipy import stats, optimize, signal
54
from mhkit.wave.resource import frequency_moment
65

6+
def _peaks_over_threshold(peaks, threshold, sampling_rate):
7+
threshold_unit = np.percentile(peaks, 100*threshold, method='hazen')
8+
idx_peaks = np.arange(len(peaks))
9+
idx_storm_peaks, storm_peaks = global_peaks(
10+
idx_peaks, peaks-threshold_unit)
11+
idx_storm_peaks = idx_storm_peaks.astype(int)
12+
13+
# Two storms that are close enough (within specified window) are
14+
# considered the same storm, to ensure independence.
15+
independent_storm_peaks = [storm_peaks[0],]
16+
idx_independent_storm_peaks = [idx_storm_peaks[0],]
17+
# check first 14 days to determine window size
18+
nlags = int(14 * 24 / sampling_rate)
19+
x = peaks - np.mean(peaks)
20+
acf = signal.correlate(x, x, mode="full")
21+
lag = signal.correlation_lags(len(x), len(x), mode="full")
22+
idx_zero = np.argmax(lag==0)
23+
positive_lag = lag[(idx_zero):(idx_zero+nlags+1)]
24+
acf_positive = acf[(idx_zero):(idx_zero+nlags+1)] / acf[idx_zero]
25+
26+
window_size = sampling_rate * positive_lag[acf_positive<0.5][0]
27+
# window size in "observations" instead of "hours" between peaks.
28+
window = window_size / sampling_rate
29+
# keep only independent storm peaks
30+
for idx in idx_storm_peaks[1:]:
31+
if (idx - idx_independent_storm_peaks[-1]) > window:
32+
idx_independent_storm_peaks.append(idx)
33+
independent_storm_peaks.append(peaks[idx]-threshold_unit)
34+
elif peaks[idx] > independent_storm_peaks[-1]:
35+
idx_independent_storm_peaks[-1] = idx
36+
independent_storm_peaks[-1] = peaks[idx]-threshold_unit
37+
38+
return independent_storm_peaks
739

840
def global_peaks(t, data):
941
"""
@@ -157,6 +189,100 @@ def peaks_distribution_weibull_tail_fit(x):
157189
return peaks
158190

159191

192+
def automatic_hs_threshold(
193+
peaks,
194+
sampling_rate,
195+
initial_threshold_range = (0.990, 0.995, 0.001),
196+
max_refinement=5
197+
):
198+
"""
199+
Find the best significant wave height threshold for the
200+
peaks-over-threshold method.
201+
202+
This method was developed by:
203+
204+
> Neary, V. S., S. Ahn, B. E. Seng, M. N. Allahdadi, T. Wang, Z. Yang and R. He (2020).
205+
> "Characterization of Extreme Wave Conditions for Wave Energy Converter Design and Project Risk Assessment.”
206+
> J. Mar. Sci. Eng. 2020, 8(4), 289; https://doi.org/10.3390/jmse8040289.
207+
208+
please cite this paper if using this method.
209+
210+
After all thresholds in the initial range are evaluated, the search
211+
range is refined around the optimal point until either (i) there
212+
is minimal change from the previous refinement results, (ii) the
213+
number of data points become smaller than about 1 per year, or (iii)
214+
the maximum number of iterations is reached.
215+
216+
Parameters
217+
----------
218+
peaks: np.array
219+
Peak values of the response time-series
220+
sampling_rate: float
221+
Sampling rate in hours.
222+
initial_threshold_range: tuple
223+
Initial range of thresholds to search. Described as
224+
(min, max, step).
225+
max_refinement: int
226+
Maximum number of times to refine the search range.
227+
228+
Returns
229+
-------
230+
best_threshold: float
231+
Threshold that results in the best correlation.
232+
"""
233+
if not isinstance(sampling_rate, (float, int)):
234+
raise TypeError('sampling_rate must be of type float or int')
235+
236+
if not isinstance(peaks, np.ndarray):
237+
raise TypeError('peaks must be of type np.ndarray')
238+
239+
if not len(initial_threshold_range) == 3:
240+
raise ValueError('initial_threshold_range must be length 3')
241+
242+
if not isinstance(max_refinement, int):
243+
raise TypeError('max_refinement must be of type int')
244+
245+
range_min, range_max, range_step = initial_threshold_range
246+
best_threshold = -1
247+
years = len(peaks)/(365.25*24/sampling_rate)
248+
249+
for i in range(max_refinement):
250+
thresholds = np.arange(range_min, range_max, range_step)
251+
correlations = []
252+
253+
for threshold in thresholds:
254+
distribution = stats.genpareto
255+
over_threshold = _peaks_over_threshold(
256+
peaks, threshold, sampling_rate)
257+
rate_per_year = len(over_threshold) / years
258+
if rate_per_year < 2:
259+
break
260+
distributions_parameters = distribution.fit(
261+
over_threshold, floc=0.)
262+
_, (_, _, correlation) = stats.probplot(
263+
peaks, distributions_parameters, distribution, fit=True)
264+
correlations.append(correlation)
265+
266+
max_i = np.argmax(correlations)
267+
minimal_change = np.abs(best_threshold - thresholds[max_i]) < 0.0005
268+
best_threshold = thresholds[max_i]
269+
if minimal_change and i<max_refinement-1:
270+
break
271+
range_step /= 10
272+
if max_i == len(thresholds)-1:
273+
range_min = thresholds[max_i - 1]
274+
range_max = thresholds[max_i] + 5*range_step
275+
elif max_i == 0:
276+
range_min = thresholds[max_i] - 9*range_step
277+
range_max = thresholds[max_i + 1]
278+
else:
279+
range_min = thresholds[max_i-1]
280+
range_max = thresholds[max_i+1]
281+
282+
best_threshold_unit = np.percentile(peaks, 100*best_threshold, method='hazen')
283+
return best_threshold, best_threshold_unit
284+
285+
160286
def peaks_distribution_peaks_over_threshold(x, threshold=None):
161287
"""
162288
Estimate the peaks distribution using the peaks over threshold

mhkit/tests/loads/test_loads.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,13 @@ def test_shortterm_extreme(self):
218218
ste = loads.extreme.ste(t, data, t_st, method)
219219
assert_allclose(ste.cdf(x), cdf_1)
220220

221+
def test_automatic_threshold(self):
222+
filename = "data_loads_hs.csv"
223+
data = np.loadtxt(os.path.join(datadir, filename), delimiter=",")
224+
years = 2.97
225+
pct, threshold = loads.extreme.automatic_hs_threshold(data, years)
226+
assert np.isclose(pct, 0.9913)
227+
assert np.isclose(threshold, 1.032092)
221228

222229
if __name__ == '__main__':
223230
unittest.main()

0 commit comments

Comments
 (0)