Source code for epistoch.utils.stats

# -*- coding: utf-8 -*-
"""
Created on Wed Apr 15 15:14:55 2020

@author: Germán Riaño
"""

import math
import warnings

import numpy as np
from scipy import integrate, stats


[docs]def get_lognorm(mu, sigma): """ Builds a lognormal distribution from mean and standard deviation for this variable. (Not the mean and sd of the corresponding normal) Parameters ---------- mu : double The expected value sigma : double standard deviation Returns ------- A frozen ditribution object """ sigma2 = sigma * sigma mu2 = mu * mu ln_mu = np.log(mu2 / np.sqrt(mu2 + sigma2)) ln_sigma = np.sqrt(np.log(1 + sigma2 / mu2)) return stats.lognorm(s=ln_sigma, scale=math.exp(ln_mu))
[docs]def get_gamma(mu, sigma): """ Builds a gamma distribution from mean and standard deviation for this variable. Parameters ---------- mu : double The expected value sigma : double standard deviation Returns ------- A frozen ditribution object """ alpha = (mu / sigma) ** 2 beta = mu / alpha return stats.gamma(a=alpha, scale=beta)
[docs]class ConstantDist(stats.rv_continuous): def _cdf(self, x): return np.where(x >= 0, 1.0, 0.0) def _ppf(self, p): return 0.0 def _loss1(self, x): return np.where(x > 0, 0.0, -x) # [E(0-x)^+]
constant = ConstantDist(a=0.0, name="constant")
[docs]def loss_function(dist, force=False): """ Creates a loss function of order 1 for a distribution from scipy Parameters ---------- dist : scipy.stats._distn_infrastructure.rv_froze a distribution object form scipy.stats force : boolean whether force an integral computation instead of known formula Returns ------- Callable that represent this loss function """ lo, hi = dist.support() loc, scale = None, None if not force: if "loss1" in dir(dist): return dist.loss1 name = None return_fun = None if "name" in dir(dist): name = dist.name elif "dist" in dir(dist): if "name" in dir(dist.dist): name = dist.dist.name if name == "expon": return_fun = lambda x: np.exp(-x) if name == "gamma": a = dist.kwds["a"] return_fun = lambda x: a * stats.gamma.sf(x, a=a + 1) - x * stats.gamma.sf(x, a) # Standard normal loss function used below if name == "norm": # loc and scale not set for the normal loc = dist.args[0] if len(dist.args) > 1 else None scale = dist.args[1] if len(dist.args) > 2 else None return_fun = lambda z: stats.norm.pdf(z) - z * stats.norm.sf(z) if name == "lognorm": def loss1(x): s = dist.kwds["s"] with warnings.catch_warnings(): warnings.filterwarnings("ignore") result = np.exp(0.5 * s * s) * stats.norm.sf(np.log(x) / s - s) - x * stats.norm.sf(np.log(x) / s) return result return_fun = loss1 if name == "constant": loc = dist.args[0] if len(dist.args) > 0 else None return_fun = dist.dist._loss1 if return_fun is not None: loc = dist.kwds.get("loc", 0.0) if loc is None else loc scale = dist.kwds.get("scale", 1.0) if scale is None else scale loss1 = lambda x: np.where(x > lo, scale * return_fun((x - loc) / scale), dist.mean() - x) return loss1 loss1 = np.vectorize(lambda x: integrate.quad(dist.sf, x, hi)[0]) return loss1
[docs]def avg_recovery_rate(dist): loss = loss_function(dist) def integrand(t): sf = dist.sf(t) lss = loss(t) result = np.zeros_like(t) valid = lss > 0 result[valid] = sf[valid] * sf[valid] / lss[valid] return result gam = 1.0 / dist.mean() a, b = dist.support() result = gam * integrate.quad(integrand, a, b)[0] return result