|
1 | 1 | import numpy as np |
2 | 2 | import pandas as pd |
3 | | -from scipy import stats |
4 | | -from scipy import optimize |
| 3 | +from scipy import stats, optimize, signal |
5 | 4 | from mhkit.wave.resource import frequency_moment |
6 | 5 |
|
| 6 | +def _peaks_over_threshold(peaks, threshold, sampling_rate): |
| 7 | + threshold_unit = np.percentile(peaks, 100*threshold, method='hazen') |
| 8 | + idx_peaks = np.arange(len(peaks)) |
| 9 | + idx_storm_peaks, storm_peaks = global_peaks( |
| 10 | + idx_peaks, peaks-threshold_unit) |
| 11 | + idx_storm_peaks = idx_storm_peaks.astype(int) |
| 12 | + |
| 13 | + # Two storms that are close enough (within specified window) are |
| 14 | + # considered the same storm, to ensure independence. |
| 15 | + independent_storm_peaks = [storm_peaks[0],] |
| 16 | + idx_independent_storm_peaks = [idx_storm_peaks[0],] |
| 17 | + # check first 14 days to determine window size |
| 18 | + nlags = int(14 * 24 / sampling_rate) |
| 19 | + x = peaks - np.mean(peaks) |
| 20 | + acf = signal.correlate(x, x, mode="full") |
| 21 | + lag = signal.correlation_lags(len(x), len(x), mode="full") |
| 22 | + idx_zero = np.argmax(lag==0) |
| 23 | + positive_lag = lag[(idx_zero):(idx_zero+nlags+1)] |
| 24 | + acf_positive = acf[(idx_zero):(idx_zero+nlags+1)] / acf[idx_zero] |
| 25 | + |
| 26 | + window_size = sampling_rate * positive_lag[acf_positive<0.5][0] |
| 27 | + # window size in "observations" instead of "hours" between peaks. |
| 28 | + window = window_size / sampling_rate |
| 29 | + # keep only independent storm peaks |
| 30 | + for idx in idx_storm_peaks[1:]: |
| 31 | + if (idx - idx_independent_storm_peaks[-1]) > window: |
| 32 | + idx_independent_storm_peaks.append(idx) |
| 33 | + independent_storm_peaks.append(peaks[idx]-threshold_unit) |
| 34 | + elif peaks[idx] > independent_storm_peaks[-1]: |
| 35 | + idx_independent_storm_peaks[-1] = idx |
| 36 | + independent_storm_peaks[-1] = peaks[idx]-threshold_unit |
| 37 | + |
| 38 | + return independent_storm_peaks |
7 | 39 |
|
8 | 40 | def global_peaks(t, data): |
9 | 41 | """ |
@@ -157,6 +189,100 @@ def peaks_distribution_weibull_tail_fit(x): |
157 | 189 | return peaks |
158 | 190 |
|
159 | 191 |
|
| 192 | +def automatic_hs_threshold( |
| 193 | + peaks, |
| 194 | + sampling_rate, |
| 195 | + initial_threshold_range = (0.990, 0.995, 0.001), |
| 196 | + max_refinement=5 |
| 197 | +): |
| 198 | + """ |
| 199 | + Find the best significant wave height threshold for the |
| 200 | + peaks-over-threshold method. |
| 201 | +
|
| 202 | + This method was developed by: |
| 203 | +
|
| 204 | + > Neary, V. S., S. Ahn, B. E. Seng, M. N. Allahdadi, T. Wang, Z. Yang and R. He (2020). |
| 205 | + > "Characterization of Extreme Wave Conditions for Wave Energy Converter Design and Project Risk Assessment.” |
| 206 | + > J. Mar. Sci. Eng. 2020, 8(4), 289; https://doi.org/10.3390/jmse8040289. |
| 207 | +
|
| 208 | + please cite this paper if using this method. |
| 209 | +
|
| 210 | + After all thresholds in the initial range are evaluated, the search |
| 211 | + range is refined around the optimal point until either (i) there |
| 212 | + is minimal change from the previous refinement results, (ii) the |
| 213 | + number of data points become smaller than about 1 per year, or (iii) |
| 214 | + the maximum number of iterations is reached. |
| 215 | +
|
| 216 | + Parameters |
| 217 | + ---------- |
| 218 | + peaks: np.array |
| 219 | + Peak values of the response time-series |
| 220 | + sampling_rate: float |
| 221 | + Sampling rate in hours. |
| 222 | + initial_threshold_range: tuple |
| 223 | + Initial range of thresholds to search. Described as |
| 224 | + (min, max, step). |
| 225 | + max_refinement: int |
| 226 | + Maximum number of times to refine the search range. |
| 227 | +
|
| 228 | + Returns |
| 229 | + ------- |
| 230 | + best_threshold: float |
| 231 | + Threshold that results in the best correlation. |
| 232 | + """ |
| 233 | + if not isinstance(sampling_rate, (float, int)): |
| 234 | + raise TypeError('sampling_rate must be of type float or int') |
| 235 | + |
| 236 | + if not isinstance(peaks, np.ndarray): |
| 237 | + raise TypeError('peaks must be of type np.ndarray') |
| 238 | + |
| 239 | + if not len(initial_threshold_range) == 3: |
| 240 | + raise ValueError('initial_threshold_range must be length 3') |
| 241 | + |
| 242 | + if not isinstance(max_refinement, int): |
| 243 | + raise TypeError('max_refinement must be of type int') |
| 244 | + |
| 245 | + range_min, range_max, range_step = initial_threshold_range |
| 246 | + best_threshold = -1 |
| 247 | + years = len(peaks)/(365.25*24/sampling_rate) |
| 248 | + |
| 249 | + for i in range(max_refinement): |
| 250 | + thresholds = np.arange(range_min, range_max, range_step) |
| 251 | + correlations = [] |
| 252 | + |
| 253 | + for threshold in thresholds: |
| 254 | + distribution = stats.genpareto |
| 255 | + over_threshold = _peaks_over_threshold( |
| 256 | + peaks, threshold, sampling_rate) |
| 257 | + rate_per_year = len(over_threshold) / years |
| 258 | + if rate_per_year < 2: |
| 259 | + break |
| 260 | + distributions_parameters = distribution.fit( |
| 261 | + over_threshold, floc=0.) |
| 262 | + _, (_, _, correlation) = stats.probplot( |
| 263 | + peaks, distributions_parameters, distribution, fit=True) |
| 264 | + correlations.append(correlation) |
| 265 | + |
| 266 | + max_i = np.argmax(correlations) |
| 267 | + minimal_change = np.abs(best_threshold - thresholds[max_i]) < 0.0005 |
| 268 | + best_threshold = thresholds[max_i] |
| 269 | + if minimal_change and i<max_refinement-1: |
| 270 | + break |
| 271 | + range_step /= 10 |
| 272 | + if max_i == len(thresholds)-1: |
| 273 | + range_min = thresholds[max_i - 1] |
| 274 | + range_max = thresholds[max_i] + 5*range_step |
| 275 | + elif max_i == 0: |
| 276 | + range_min = thresholds[max_i] - 9*range_step |
| 277 | + range_max = thresholds[max_i + 1] |
| 278 | + else: |
| 279 | + range_min = thresholds[max_i-1] |
| 280 | + range_max = thresholds[max_i+1] |
| 281 | + |
| 282 | + best_threshold_unit = np.percentile(peaks, 100*best_threshold, method='hazen') |
| 283 | + return best_threshold, best_threshold_unit |
| 284 | + |
| 285 | + |
160 | 286 | def peaks_distribution_peaks_over_threshold(x, threshold=None): |
161 | 287 | """ |
162 | 288 | Estimate the peaks distribution using the peaks over threshold |
|
0 commit comments