from joblib import Parallel, delayed
from . import imgs
from . import hlpr as _dh
import numpy as np
from sklearn.utils.validation import check_is_fitted
import matplotlib.pyplot as plt
import scipy.stats as stat
[docs]class VacuumSeries(imgs.ImageSeries):
"""
Functionality to generate vacuum region videos for multiple hypothesis testing.
"""
def __init__(self, vacuum_video, observed_ImageSeries,
parametric=True, div=1, n_jobs=None):
super().__init__(vacuum_video, None, div=div, n_jobs=n_jobs)
if not isinstance(observed_ImageSeries, imgs.ImageSeries):
raise TypeError("Argument must be of class Image Series")
self.observed_ImageSeries = observed_ImageSeries
#last two elements of array corresponding to the shape
self.size = self.observed_ImageSeries.video.shape[-2:]
self.parametric = parametric
[docs] def fit(self, convert_to_int=False):
"""
Fits the Poisson mle for the vacuum region if parametric==True
Else, it fits the empirical probability mass function.
"""
if convert_to_int:
emp_vals = np.ndarray.astype(self.video.flatten(), "int64")
else:
if self.video.dtype.kind != "i":
raise TypeError("Vacuum video must be of signed integer data type. If you would like to convert \
to signed integer data type, rerun self.fit with convert_to_int=True")
else:
emp_vals = self.video.flatten()
bin_vals = np.bincount(emp_vals)
self.n_ = len(emp_vals)
self.probs_ = bin_vals/np.sum(bin_vals)
self.vals_ = np.arange(len(bin_vals))
self.mle_ = np.sum(self.probs_ * self.vals_)
[docs] def kolm_dist(self):
"""
Check how far the empirical distribution of vacuum values is from Poisson
with parameter equal to mle, in terms of the Kolmogorov distance
Uses the DKW inequality with the tight constant = 2 for Poisson testing. """
check_is_fitted(self)
emp_dist = stat.rv_discrete(values=(self.vals_, self.probs_))
ks_dist = np.max([np.abs(emp_dist.cdf(k)-stat.poisson.cdf(k, self.mle_))
for k in range(np.min(self.vals_), np.max(self.vals_)+1)])
p_val = 2*np.exp(-2*self.n_*ks_dist**2)
self.ks_test = {'p_val': p_val, 'ks_dist': ks_dist}
print(self.ks_test)
[docs] def gen_images(self, n):
"Generate and return a random image according to estimated null distribution"
check_is_fitted(self)
if self.parametric:
return np.random.poisson(self.mle_, size=(n, *self.size))
else:
return np.random.choice(self.vals_, size=(n, *self.size), p=self.probs_)
[docs] def adjust_alpha(self, alpha, conservative=False):
"""
Adjust p-values based on a different alpha value.
Parameters
----------
alpha : float
Statistical significance level.
conservative : bool, optional
Whether to use Benjamini-Yekutieli. The default is False.
Returns
-------
None.
"""
self.reject_dict = _dh.calc_reject(self.__pvals, self.obs_vals, alpha=alpha, conservative=conservative)
self.alpha=alpha
[docs] def plot_hypo(self):
"""
Plots hypothesis testing sequence.
"""
begins, ends = _dh.get_be(self.reject_dict["reject_bool"])
xv = np.arange(1, len(self.obs_vals)+1)
if self.func == "pers_entr":
plt.plot(xv, -self.obs_vals, lw=0.7, color="black")
####Note this plot becomes really bad when there are ties...
if self.reject_dict["reject_thr_ind"] is None:
print("No hypotheses were rejected.")
else:
plt.axhline(y=-self.obs_vals[self.reject_dict["reject_thr_ind"]], color="black", linestyle="dashed", linewidth=0.75)
plt.scatter(x=self.reject_dict["reject_ind"]+1, y=-self.obs_vals[self.reject_dict["reject_ind"]],
color="red", s=2)
plt.hlines(y=-np.repeat(np.max(self.obs_vals)+0.1, len(begins)), xmin=begins+1, xmax=ends+1, color="black")
#should adjust this 0.1 to be different based on scale...
plt.xlabel("Frame")
plt.ylabel("Persistent entropy")
plt.title("Persistent entropy across frames")
else:
plt.plot(xv, self.obs_vals, lw=0.7, color="black")
####Note this plot becomes really bad when there are ties...
if self.reject_dict["reject_thr_ind"] is None:
print("No hypotheses were rejected.")
else:
plt.axhline(y=self.obs_vals[self.reject_dict["reject_thr_ind"]], color="black", linestyle="dashed", linewidth=0.75)
plt.scatter(x=self.reject_dict["reject_ind"]+1, y=self.obs_vals[self.reject_dict["reject_ind"]],
color="red", s=2)
plt.hlines(y=np.repeat(np.max(self.obs_vals)+0.1, len(begins)), xmin=begins+1, xmax=ends+1, color="black")
#should adjust this 0.1 to be different based on scale...
plt.xlabel("Frame")
if self.func == "alps":
# plt.ylabel(r'$\Delta(A(I_{k}))$')
plt.ylabel("ALPS statistic")
plt.title("ALPS statistic across frames")
elif self.func == "degp_totp":
plt.ylabel("Lifetime sum")
plt.title("Lifetime sum across frames")