import numpy as np
from gudhi import CubicalComplex
from shapely.geometry import Point
from skimage import filters
[docs]def pg0(arr):
"""
Calculate the proportion of the entire array which is at least as much as the first element.
Parameters
----------
arr : array_like
Input array consists of numerical values.
Returns
-------
prop : double
The aforementioned proportion.
"""
try:
prop = np.mean(arr >= arr[0])
except TypeError:
prop = np.mean(np.array(arr) >= arr[0])
return prop
[docs]def getxy_col(arr, nrows):
"""
Returns (x,y) image coordinates from column-major representation
Parameters
----------
arr : ndarray of shape (dim,)
Should be a 1d numpy array
nrows : int
The number of rows in the 2d array. Should be factor of len(arr)
Returns
-------
x : int
The x-coordinate from column-major representation. Note that x-coordinate (for an image)
corresponds to the column of the matrix
y : int
The y-coordinate from column-major representation. Note that y-coordinate (for an image)
corresponds to the row of the matrix
"""
assert isinstance(arr, np.ndarray), "Array must be numpy ndarray"
y = (arr % nrows).astype(int)
x = ((arr-y)/nrows).astype(int)
return x,y
[docs]def std_video(video, flip=False):
"""
Returns a video where each frame is standardized to have mean 0 pixel intensity with
standard deviation 1.
Parameters
----------
video : ndarray
An array of shape (frames, rows, columns).
flip : bool.
Whether or not to invert the pixels of the video. The default is False.
Returns
-------
ndarray
The frame-standardized video of shape (frames, rows, columns).
"""
v_mean = np.mean(video, axis=(1,2))
v_std = np.std(video, axis=(1,2))
v_means=np.transpose(np.tile(v_mean, (video.shape[1], video.shape[2],1)), (2,0,1))
v_stds=np.transpose(np.tile(v_std, (video.shape[1], video.shape[2],1)), (2,0,1))
return (-1)**(flip)*(video-v_means)/(v_stds)
[docs]def degp_totp(arr, p=1, inf=False):
r"""
Calculate degree-p total persistence from array of lifetimes.
Parameters
----------
arr: array_like
Should be an array of persistence lifetimes, with nonnegative entries
p : double, optional
Exponent for degree-p total lifetime. The default is 1.
inf : bool, optional
Whether or not to calculate infinity norm. The default is False.
Raises
------
ValueError
Raises error if p less than 1
Returns
-------
double
The degree-p total persistence.
Notes
------
The degree-p total (0-dimensional) persistence is specified as:
.. math:: L_p(A(I)) := \sum_{(b,d) \in PD_0} (d-b)^p.
If inf==True, this overwrites the chosen value of p.
"""
if p < 1:
raise ValueError("p must be >= 1")
if inf:
return np.max(arr)
else:
return np.sum(arr**p)
[docs]def weight_func(arr):
"""
Weight function for persistence image using defaults from Obayashi, Hiraoka, and Kimura (2018).
Parameters
----------
arr : array_like
Should be an array of persistence lifetimes, with nonnegative entries
Returns
-------
ndarray
Array of weights corresponding to the given persistence lifetimes.
"""
try:
return np.arctan(0.5*(arr[:,1]))
except IndexError:
return np.arctan(0.5*(arr[1]))
[docs]def calc_close(pd, diffs, prox_arr, val=1):
"""
Calculates the nearest point in the persistence diagram
Parameters
----------
pd : ndarray of shape (n_barcodes, 2)
A persistence diagram in (birth, death) coordinates.
Note that n_barcodes is the number of barcodes
diffs : list/tuple of two elements
Corresponds to the distance between elements of grid discretization, such as in
birtt_bd and lifet_bd in the gen_pers_im method of the ImageSeriesPlus class.
prox_arr : ndarray of shape (m, 2)
M observations in (birth, lifetime) coordinates corresponding to elements
of the grid discretization used in calculation of the persistence image.
val : float, optional
The default is 1.
Returns
-------
proxs : list
List of all generators in the persistence diagram pd that lie in the Voronoi cell of
one of the elements of prox_arr. This is indicated with a non-zero entry in proxs.
"""
if len(prox_arr) == 0:
proxs = list(np.zeros(len(pd), dtype=np.int16))
else:
proxs = []
for gen in pd:
prox = 0
for loc in prox_arr:
#calculate whether or not a given point belongs to the voronoi cell
nv = max(abs(gen[3]-gen[2]-loc[1])/diffs[0], abs(gen[2]-loc[0])/diffs[1])
#print(nv)
if nv <= 1/2:
prox = val
proxs += [prox]
return proxs
[docs]def get_cc(point, bin_im):
"""
Gets the connected component in the image bin_im containing the element "point".
Parameters
----------
point : list/tuple of 2 elements
(x,y) coordinate point for image matrix
bin_im : array_like
Boolean array.
Returns
-------
cc : list of lists
list of elements in connected component containing point.
"""
af = 1
cc = [point]
check_cc = [point]
while af > 0:
a = 0
check_cc2 = []
for pt in check_cc:
for i in [-1, 1]:
pt1 = [pt[0]+i, pt[1]]
pt2 = [pt[0], pt[1]+i]
if bin_im[pt[1], pt[0]+i] and pt1 not in cc:
cc = cc+[pt1]
check_cc2 = check_cc2+[pt1]
a += 1
if bin_im[pt[1]+i, pt[0]] and pt2 not in cc:
cc = cc+[pt2]
check_cc2 = check_cc2+[pt2]
a += 1
af = a
check_cc = check_cc2
return cc
[docs]def get_be(arr):
"""
Gets beginnings and ends of `True` sequences in Boolean arrays.
Parameters
----------
arr : array_like
Boolean array
Returns
-------
out: 2-tuple
Tuple consisting of two arrays. Index-0 array consists of beginning of "True" sequences,
and Index-1 array consists of ends of "True" sequences.
"""
begins = []
ends = []
before = False
for x in range(len(arr)):
if before==False and arr[x]==True:
begins.append(x)
before=True
if x==(len(arr)-1):
ends.append(x)
elif before==True and arr[x]==False:
ends.append(x-1)
before=False
elif before==True and x==(len(arr)-1):
ends.append(x)
return (np.array(begins), np.array(ends))
[docs]def calc_reject(arr, val_arr, alpha=0.05, conservative=False):
"""
Returns dictionary of index array, boolean array, rejection threshold index
array of indices of hypotheses that are rejected via BH procedure, and
boolean array of whether or not hypothesis is rejected.
Parameters
----------
arr : array_like
Array of probabilities (p-values) between 0 and 1.
val_arr: array_like
Array of values to break ties in p-values.
alpha : value between 0 and 1, optional
Signficance level for BH procedure. The default is 0.05.
conservative: default = False
Dictates whether or not conservative BH procedure is used.
Returns
-------
out: dict
Dictionary containing information listed above.
"""
if alpha <= 0:
raise ValueError("alpha must be > 0")
N = len(arr)
k = np.arange(1, N+1)
if conservative:
CN = np.sum(1/k)
else:
CN = 1
li = k*alpha/(CN*N)
arr_argsort = np.argsort(-val_arr)
arr_sort = arr[arr_argsort]
try:
rej_max = np.where(arr_sort <= li)[0][-1] #largest element to reject
return {"reject_ind": np.where(arr <= arr_sort[rej_max])[0],
"reject_bool": (arr <= arr_sort[rej_max]),
"reject_thr_ind": arr_argsort[rej_max]}
except IndexError:
return {"reject_ind": [],
"reject_bool": np.repeat(False, len(arr)),
"reject_thr_ind": None}
[docs]def block_sum(arr, m, div=1):
"""
Sums adjacent blocks of m frames.
"""
ran = int(np.floor(len(arr)/m-1)+1)
block_arr = np.stack([np.sum(np.rint(arr[i*m:(i*m+m)]/div), axis=0) for i in range(ran)])
return block_arr
[docs]def alps(arr):
"""
Get the ALPS statistic of an array of values, not all zero.
"""
arr = np.sort(arr)
sums = np.array([np.sum(arr <= y) for y in np.unique(arr)][::-1])
arr = np.append([0], arr)
integral = 0
for i, s in enumerate(sums):
integral += (arr[i+1]-arr[i])*np.log(s)
return integral
[docs]def pers_entr(arr, neg=True):
"""
Gets persistence (shannon) entropy of an array of values, not all zero.
"""
L_sum = np.sum(arr)
Lmod = arr/L_sum
if neg:
a = 1
else:
a = -1
return a*np.sum(Lmod*np.log(Lmod))
[docs]def pd_thresh_calc(diag, pixels, minv, maxv, dim="both", num=50):
"""
Calculates best persistence preserving threshold as described in Chung and Day (2018).
Parameters
----------
diag : ndarray of shape (n_samples, 6)
Persistence diagram object a la the output of the persmoo function.
pixels: array-like
Vector containing the pixels of the image on which to perform PD thresholding.
minv : float
Minimum threshold to consider.
maxv : float
Maximum threshold to consider.
dim : str or int, optional
Integer 0 or 1 corresponds to thresholding only based on dimension 0 and 1 persistence features.
The default is "both", corresponding to both dimensions 0 and 1.
num : int, optional
Positive integer indicating how many evenly spaced pixel distribution quantiles should be
considered in PD thresholding (prior to restricting to the range [minv, maxv]).
Returns
-------
float
The best topology- and geometry-preserving threshold
"""
if dim=="both":
sub_diag = diag
elif isinstance(dim, int):
select = (diag[:, 2]==dim)
sub_diag = diag[select,:]
#pixels gets the empirical pixel distribution and chooses quantiles from this for thresholding.
thrs_ = np.quantile(pixels, q=np.linspace(0,1, num))
thrs = thrs_[np.logical_and(thrs_ > minv, thrs_ < maxv)]
Phi_t = []
for thr in thrs:
orig = np.logical_and(sub_diag[:, 3] <= thr, sub_diag[:, 4] > thr)
minus = (sub_diag[:, 4] <= thr)
plus = (sub_diag[:, 3] > thr)
#Augmenting the persistence diagram in the case of emptiness
if sum(orig) >= 1:
Phi_orig = (1/(np.sum(orig)+1))*np.sum((sub_diag[orig, 4]-thr)*(thr-sub_diag[orig, 3]))
else:
Phi_orig = 0 #Why would we have (maxv-minv)? 1 does not decrease the last threshold enough...
if sum(minus) >= 1:
Phi_minus = np.sum((thr-sub_diag[minus, 4])/(sub_diag[minus, 4]-sub_diag[minus, 3]))
else:
Phi_minus = 1
if sum(plus) >= 1:
Phi_plus = np.sum((sub_diag[plus, 3]-thr)/(sub_diag[plus, 4]-sub_diag[plus, 3]))
else:
Phi_plus = 1
Phi_t.append(Phi_orig*Phi_minus*Phi_plus)
return thrs[np.argmax(Phi_t)]
[docs]def persmoo(im, polygon=None, sigma=None):
"""
Smooths, then fits 0 and 1-dimensional cubical persistence on an image. Returns other important information as well.
Parameters
----------
im : individual image, i.e. two-dimensional array
Greyscale image on which sublevelset homology will be calculated.
polygon : Shapely.Polygon object, optional
Not necessary, only if one wants to restrict region to focus on. The default is None.
sigma : smoothing parameter sigma, optional
Smoothing parameter sigma for Gaussian filter. The default is None.
Returns
-------
cu_tot : ndarray
Array with positional and homology information, as follows:
cu_pos: #(x,y) coordinates of positive/negative cells (which create components/destroy loops)
cu_pers: #(dimension, birth, death)...
cu_ex_inpoly: #row indices of positive/negative cells located within specified polygon
"""
#throughout this, infinite death becomes max pixel value...
if sigma==None:
pass
else:
im = filters.gaussian(im, sigma=sigma, preserve_range=True)
cu_comp = CubicalComplex(top_dimensional_cells=im)
cu_comp.compute_persistence(homology_coeff_field=2)
cu_comp.persistence()
#this step is necessary to get the ordering of the negative/positive cells correct
cu_pers0 = cu_comp.persistence_intervals_in_dimension(0)
cu_pers1 = cu_comp.persistence_intervals_in_dimension(1)
dims = np.concatenate((np.repeat(1, cu_pers1.shape[0]), np.repeat(0, cu_pers0.shape[0])))
cu_pers_ = np.r_[cu_pers1, cu_pers0]
cu_pers = np.c_[dims, cu_pers_]
nr, nc = im.shape
#reassign infinite death pixels
cu_pers[np.logical_and(cu_pers[:, 0]==0, cu_pers[:,2]==np.inf), 2] = np.max(im[im < np.inf])
#get locations of cells...
cu_pers_pairs_ = cu_comp.cofaces_of_persistence_pairs()
#retrieves 'essential feature of dim-0', i.e. component of inf persistence
ess_feat0 = cu_pers_pairs_[1][0]
ess_featx, ess_featy = getxy_col(ess_feat0, nr)
#this is to account for the specific cases when their is no 1st homology or no non-infinite generators of H_0
if len(cu_pers_pairs_[0]) == 0:
cu_pos = np.array([ess_featx, ess_featy]).T
else:
cu_pers_pair0 = cu_pers_pairs_[0][0] #regular persistence pairs of dim-0
x_coords0, y_coords0 = getxy_col(cu_pers_pair0, nr)
x_pos0 = np.append(x_coords0[:,0], ess_featx) #x coords of positive cells for dim-0, local minima
y_pos0 = np.append(y_coords0[:,0], ess_featy) #y coords of positive cells for dim-0, local minima
cu_pos0 = np.stack([x_pos0, y_pos0], axis=1)
if len(cu_pers_pairs_[0]) == 2:
cu_pers_pair1 = cu_pers_pairs_[0][1] ##regular persistence pairs of dim-1
x_coords1, y_coords1 = getxy_col(cu_pers_pair1, nr)
#concatenate x,y coords of negative cells for dim-1, local maxima
cu_pos1 = np.stack([x_coords1[:,1], y_coords1[:,1]], axis=1)
cu_pos = np.r_[cu_pos1, cu_pos0]
else:
cu_pos = cu_pos0
if polygon==None:
cu_ex_inpoly=np.repeat(True, len(cu_pers))
else:
pers_pts = (Point(x,y) for x,y in zip(cu_pos[:,0], cu_pos[:,1]))
cu_ex_inpoly = [polygon.contains(pt) for pt in pers_pts]
cu_tot = np.c_[cu_pos, cu_pers, cu_ex_inpoly]
return cu_tot
[docs]def fitsmoo(im, polygon=None, sigma=None, max_death_pixel_int=True):
"""
Smooths, then fits 0-dimensional cubical persistence on an image.
Returns information on:
1) Location of positive cells, i.e. local minima (cu_pos, or index-0 and index-1 columns)
2) Lifetime information (cu_totpers, or index-2 column)
3) Whether or not a positive cell lies within the polygon (cu_ex_inpoly, or index-3 column)
"""
if sigma==None:
pass
else:
im = filters.gaussian(im, sigma=sigma, preserve_range=True)
cu_comp = CubicalComplex(top_dimensional_cells=im)
cu_comp.compute_persistence(homology_coeff_field=2)
cu_pers = cu_comp.persistence_intervals_in_dimension(0)
nr, nc = im.shape
cu_pers_pairs_ = cu_comp.cofaces_of_persistence_pairs()
cu_pers_pair0 = cu_pers_pairs_[0][0] #regular persistence pairs of dim-0
ess_feat0 = cu_pers_pairs_[1][0] #retrieves 'essential feature of dim-0', i.e. component of inf persistence
ess_featx, ess_featy = getxy_col(ess_feat0, nr)
x_coords, y_coords = getxy_col(cu_pers_pair0, nr)
x_pos = np.append(x_coords[:,0], ess_featx) #x coords of positive cells
y_pos = np.append(y_coords[:,0], ess_featy) #y coords of positive cells
cu_pos = np.stack([x_pos, y_pos], axis=1)
#Now calculate whether each point is within our specified polygon
if polygon==None:
cu_ex_inpoly=np.repeat(True, len(cu_pers))
else:
pers_pts = (Point(x,y) for x,y in zip(x_pos, y_pos))
cu_ex_inpoly = [polygon.contains(pt) for pt in pers_pts]
if max_death_pixel_int==True:
cu_pers[-1, 1] = np.max(im)
else:
cu_pers[-1, 1] = np.max(cu_pers[:-1, 1])
cu_totpers = cu_pers[:,1]-cu_pers[:,0]
return np.c_[cu_pos, cu_totpers, cu_ex_inpoly]