Source code for mhkit.loads.extreme.extremes

"""
This module provides functionality for estimating the short-term and
long-term extreme distributions of responses in a time series. It 
includes methods for analyzing peaks, block maxima, and applying 
statistical distributions to model extreme events. The module supports 
various methods for short-term extreme estimation, including peaks 
fitting with Weibull, tail fitting, peaks over threshold, and block 
maxima methods with GEV (Generalized Extreme Value) and Gumbel 
distributions. Additionally, it offers functionality to approximate 
the long-term extreme distribution by weighting short-term extremes 
across different sea states.

Functions:
- ste_peaks: Estimates the short-term extreme distribution from peaks 
    distribution using specified statistical methods.
- block_maxima: Finds the block maxima in a time-series data to be used
    in block maxima methods.
- ste_block_maxima_gev: Approximates the short-term extreme distribution 
    using the block maxima method with the GEV distribution.
- ste_block_maxima_gumbel: Approximates the short-term extreme 
    distribution using the block maxima method with the Gumbel distribution.
- ste: Alias for `short_term_extreme`, facilitating easier access to the 
    primary functionality of estimating short-term extremes.
- short_term_extreme: Core function to approximate the short-term extreme 
    distribution from a time series using chosen methods.
- full_seastate_long_term_extreme: Combines short-term extreme 
    distributions using weights to estimate the long-term extreme distribution.
"""

from typing import Union

import numpy as np
from scipy import stats
from scipy.stats import rv_continuous

import mhkit.loads.extreme.peaks as peaks_distributions



[docs]
def ste_peaks(peaks_distribution: rv_continuous, npeaks: float) -> rv_continuous:
    """
    Estimate the short-term extreme distribution from the peaks
    distribution.

    Parameters
    ----------
    peaks_distribution: scipy.stats.rv_frozen
        Probability distribution of the peaks.
    npeaks : float
        Number of peaks in short term period.

    Returns
    -------
    short_term_extreme: scipy.stats.rv_frozen
            Short-term extreme distribution.
    """
    if not callable(peaks_distribution.cdf):
        raise TypeError("peaks_distribution must be a scipy.stat distribution.")
    if not isinstance(npeaks, float):
        raise TypeError(f"npeaks must be of type float. Got: {type(npeaks)}")

    class _ShortTermExtreme(stats.rv_continuous):
        def __init__(self, *args, **kwargs):
            self.peaks = kwargs.pop("peaks_distribution")
            self.npeaks = kwargs.pop("npeaks")
            super().__init__(*args, **kwargs)

        def _cdf(self, x, *args, **kwargs):
            peaks_cdf = np.array(self.peaks.cdf(x, *args, **kwargs))
            peaks_cdf[np.isnan(peaks_cdf)] = 0.0
            if len(peaks_cdf) == 1:
                peaks_cdf = peaks_cdf[0]
            return peaks_cdf**self.npeaks

    short_term_extreme_peaks = _ShortTermExtreme(
        name="short_term_extreme", peaks_distribution=peaks_distribution, npeaks=npeaks
    )
    return short_term_extreme_peaks




[docs]
def block_maxima(
    time: np.ndarray, global_peaks_data: np.ndarray, time_st: float
) -> np.ndarray:
    """
    Find the block maxima of a time-series.

    The timeseries (time, global_peaks) is divided into blocks of length t_st, and the
    maxima of each bloock is returned.

    Parameters
    ----------
    time : np.array
        Time array.
    global_peaks_data : np.array
        global peaks timeseries.
    time_st : float
        Short-term period.

    Returns
    -------
    block_max: np.array
        Block maxima (i.e. largest peak in each block).
    """
    if not isinstance(time, np.ndarray):
        raise TypeError(f"time must be of type np.ndarray. Got: {type(time)}")
    if not isinstance(global_peaks_data, np.ndarray):
        raise TypeError(
            f"global_peaks_data must be of type np.ndarray. Got: {type(global_peaks_data)}"
        )
    if not isinstance(time_st, float):
        raise TypeError(f"time_st must be of type float. Got: {type(time_st)}")

    nblock = int(time[-1] / time_st)
    block_max = np.zeros(int(nblock))
    for iblock in range(nblock):
        i_x = global_peaks_data[
            (time >= iblock * time_st) & (time < (iblock + 1) * time_st)
        ]
        block_max[iblock] = np.max(i_x)
    return block_max




[docs]
def ste_block_maxima_gev(block_max):
    """
    Approximate the short-term extreme distribution using the block
    maxima method and the Generalized Extreme Value distribution.

    Parameters
    ----------
    block_max: np.array
        Block maxima (i.e. largest peak in each block).

    Returns
    -------
    short_term_extreme_rv: scipy.stats.rv_frozen
            Short-term extreme distribution.
    """
    if not isinstance(block_max, np.ndarray):
        raise TypeError(f"block_max must be of type np.ndarray. Got: {type(block_max)}")

    ste_params = stats.genextreme.fit(block_max)
    param_names = ["c", "loc", "scale"]
    ste_params = dict(zip(param_names, ste_params))
    short_term_extreme_rv = stats.genextreme(**ste_params)
    short_term_extreme_rv.params = ste_params
    return short_term_extreme_rv




[docs]
def ste_block_maxima_gumbel(block_max):
    """
    Approximate the short-term extreme distribution using the block
    maxima method and the Gumbel (right) distribution.

    Parameters
    ----------
    block_max: np.array
        Block maxima (i.e. largest peak in each block).

    Returns
    -------
    ste: scipy.stats.rv_frozen
            Short-term extreme distribution.
    """
    if not isinstance(block_max, np.ndarray):
        raise TypeError(f"block_max must be of type np.ndarray. Got: {type(block_max)}")

    ste_params = stats.gumbel_r.fit(block_max)
    param_names = ["loc", "scale"]
    ste_params = dict(zip(param_names, ste_params))
    short_term_extreme_rv = stats.gumbel_r(**ste_params)
    short_term_extreme_rv.params = ste_params
    return short_term_extreme_rv




[docs]
def ste(time: np.ndarray, data: np.ndarray, t_st: float, method: str) -> rv_continuous:
    """
    Alias for `short_term_extreme`.
    """
    ste_dist = short_term_extreme(time, data, t_st, method)
    return ste_dist




[docs]
def short_term_extreme(
    time: np.ndarray, data: np.ndarray, t_st: float, method: str
) -> Union[rv_continuous, None]:
    """
    Approximate the short-term  extreme distribution from a
    timeseries of the response using chosen method.

    The availabe methods are: 'peaks_weibull', 'peaks_weibull_tail_fit',
    'peaks_over_threshold', 'block_maxima_gev', and 'block_maxima_gumbel'.
    For the block maxima methods the timeseries needs to be many times
    longer than the short-term period. For the peak-fitting methods the
    timeseries can be of arbitrary length.

    Parameters
    ----------
    time: np.array
        Time array.
    data: np.array
        Response timeseries.
    t_st: float
        Short-term period.
    method : string
        Method for estimating the short-term extreme distribution.

    Returns
    -------
    short_term_extreme_dist: scipy.stats.rv_frozen
            Short-term extreme distribution.
    """
    if not isinstance(time, np.ndarray):
        raise TypeError(f"time must be of type np.ndarray. Got: {type(time)}")
    if not isinstance(data, np.ndarray):
        raise TypeError(f"data must be of type np.ndarray. Got: {type(data)}")
    if not isinstance(t_st, float):
        raise TypeError(f"t_st must be of type float. Got: {type(t_st)}")
    if not isinstance(method, str):
        raise TypeError(f"method must be of type string. Got: {type(method)}")

    peaks_methods = {
        "peaks_weibull": peaks_distributions.peaks_distribution_weibull,
        "peaks_weibull_tail_fit": peaks_distributions.peaks_distribution_weibull_tail_fit,
        "peaks_over_threshold": peaks_distributions.peaks_distribution_peaks_over_threshold,
    }
    blockmaxima_methods = {
        "block_maxima_gev": ste_block_maxima_gev,
        "block_maxima_gumbel": ste_block_maxima_gumbel,
    }

    if method in peaks_methods:
        fit_peaks = peaks_methods[method]
        _, peaks = peaks_distributions.global_peaks(time, data)
        npeaks = len(peaks)
        time = time[-1] - time[0]
        nst = peaks_distributions.number_of_short_term_peaks(npeaks, time, t_st)
        peaks_dist = fit_peaks(peaks)
        short_term_extreme_dist = ste_peaks(peaks_dist, nst)
    elif method in blockmaxima_methods:
        fit_maxima = blockmaxima_methods[method]
        maxima = block_maxima(time, data, t_st)
        short_term_extreme_dist = fit_maxima(maxima)
    else:
        print("Passed `method` not found.")
        short_term_extreme_dist = None
    return short_term_extreme_dist




[docs]
def full_seastate_long_term_extreme(short_term_extreme_dist, weights):
    """
    Return the long-term extreme distribution of a response of
    interest using the full sea state approach.

    Parameters
    ----------
    ste: list[scipy.stats.rv_frozen]
        Short-term extreme distribution of the quantity of interest for
        each sample sea state.
    weights: list, np.ndarray
        The weights from the full sea state sampling

    Returns
    -------
    ste: scipy.stats.rv_frozen
        Short-term extreme distribution.
    """
    if not isinstance(short_term_extreme_dist, list):
        raise TypeError(
            "short_term_extreme_dist must be of type list[scipy.stats.rv_frozen]."
            + f"Got: {type(short_term_extreme_dist)}"
        )
    if not isinstance(weights, (list, np.ndarray)):
        raise TypeError(
            f"weights must be of type list or np.ndarray. Got: {type(weights)}"
        )

    class _LongTermExtreme(stats.rv_continuous):
        def __init__(self, *args, **kwargs):
            weights = kwargs.pop("weights")
            # make sure weights add to 1.0
            self.weights = weights / np.sum(weights)
            self.ste = kwargs.pop("ste")
            # Disabled bc not sure where/ how n is applied
            self.n = len(self.weights)  # pylint: disable=invalid-name
            super().__init__(*args, **kwargs)

        def _cdf(self, x, *args, **kwargs):
            weighted_cdf = 0.0
            for w_i, ste_i in zip(self.weights, self.ste):
                weighted_cdf += w_i * ste_i.cdf(x, *args, **kwargs)
            return weighted_cdf

    return _LongTermExtreme(
        name="long_term_extreme", weights=weights, ste=short_term_extreme_dist
    )