Source code for detectda.hlpr

import numpy as np
from gudhi import CubicalComplex
from shapely.geometry import Point
from skimage import filters

[docs]def pg0(arr): """ Calculate the proportion of the entire array which is at least as much as the first element. Parameters ---------- arr : array_like Input array consists of numerical values. Returns ------- prop : double The aforementioned proportion. """ try: prop = np.mean(arr >= arr[0]) except TypeError: prop = np.mean(np.array(arr) >= arr[0]) return prop
[docs]def getxy_col(arr, nrows): """ Returns (x,y) image coordinates from column-major representation Parameters ---------- arr : ndarray of shape (dim,) Should be a 1d numpy array nrows : int The number of rows in the 2d array. Should be factor of len(arr) Returns ------- x : int The x-coordinate from column-major representation. Note that x-coordinate (for an image) corresponds to the column of the matrix y : int The y-coordinate from column-major representation. Note that y-coordinate (for an image) corresponds to the row of the matrix """ assert isinstance(arr, np.ndarray), "Array must be numpy ndarray" y = (arr % nrows).astype(int) x = ((arr-y)/nrows).astype(int) return x,y
[docs]def std_video(video, flip=False): """ Returns a video where each frame is standardized to have mean 0 pixel intensity with standard deviation 1. Parameters ---------- video : ndarray An array of shape (frames, rows, columns). flip : bool. Whether or not to invert the pixels of the video. The default is False. Returns ------- ndarray The frame-standardized video of shape (frames, rows, columns). """ v_mean = np.mean(video, axis=(1,2)) v_std = np.std(video, axis=(1,2)) v_means=np.transpose(np.tile(v_mean, (video.shape[1], video.shape[2],1)), (2,0,1)) v_stds=np.transpose(np.tile(v_std, (video.shape[1], video.shape[2],1)), (2,0,1)) return (-1)**(flip)*(video-v_means)/(v_stds)
[docs]def degp_totp(arr, p=1, inf=False): r""" Calculate degree-p total persistence from array of lifetimes. Parameters ---------- arr: array_like Should be an array of persistence lifetimes, with nonnegative entries p : double, optional Exponent for degree-p total lifetime. The default is 1. inf : bool, optional Whether or not to calculate infinity norm. The default is False. Raises ------ ValueError Raises error if p less than 1 Returns ------- double The degree-p total persistence. Notes ------ The degree-p total (0-dimensional) persistence is specified as: .. math:: L_p(A(I)) := \sum_{(b,d) \in PD_0} (d-b)^p. If inf==True, this overwrites the chosen value of p. """ if p < 1: raise ValueError("p must be >= 1") if inf: return np.max(arr) else: return np.sum(arr**p)
[docs]def weight_func(arr): """ Weight function for persistence image using defaults from Obayashi, Hiraoka, and Kimura (2018). Parameters ---------- arr : array_like Should be an array of persistence lifetimes, with nonnegative entries Returns ------- ndarray Array of weights corresponding to the given persistence lifetimes. """ try: return np.arctan(0.5*(arr[:,1])) except IndexError: return np.arctan(0.5*(arr[1]))
[docs]def calc_close(pd, diffs, prox_arr, val=1): """ Calculates the nearest point in the persistence diagram Parameters ---------- pd : ndarray of shape (n_barcodes, 2) A persistence diagram in (birth, death) coordinates. Note that n_barcodes is the number of barcodes diffs : list/tuple of two elements Corresponds to the distance between elements of grid discretization, such as in birtt_bd and lifet_bd in the gen_pers_im method of the ImageSeriesPlus class. prox_arr : ndarray of shape (m, 2) M observations in (birth, lifetime) coordinates corresponding to elements of the grid discretization used in calculation of the persistence image. val : float, optional The default is 1. Returns ------- proxs : list List of all generators in the persistence diagram pd that lie in the Voronoi cell of one of the elements of prox_arr. This is indicated with a non-zero entry in proxs. """ if len(prox_arr) == 0: proxs = list(np.zeros(len(pd), dtype=np.int16)) else: proxs = [] for gen in pd: prox = 0 for loc in prox_arr: #calculate whether or not a given point belongs to the voronoi cell nv = max(abs(gen[3]-gen[2]-loc[1])/diffs[0], abs(gen[2]-loc[0])/diffs[1]) #print(nv) if nv <= 1/2: prox = val proxs += [prox] return proxs
[docs]def get_cc(point, bin_im): """ Gets the connected component in the image bin_im containing the element "point". Parameters ---------- point : list/tuple of 2 elements (x,y) coordinate point for image matrix bin_im : array_like Boolean array. Returns ------- cc : list of lists list of elements in connected component containing point. """ af = 1 cc = [point] check_cc = [point] while af > 0: a = 0 check_cc2 = [] for pt in check_cc: for i in [-1, 1]: pt1 = [pt[0]+i, pt[1]] pt2 = [pt[0], pt[1]+i] if bin_im[pt[1], pt[0]+i] and pt1 not in cc: cc = cc+[pt1] check_cc2 = check_cc2+[pt1] a += 1 if bin_im[pt[1]+i, pt[0]] and pt2 not in cc: cc = cc+[pt2] check_cc2 = check_cc2+[pt2] a += 1 af = a check_cc = check_cc2 return cc
[docs]def get_be(arr): """ Gets beginnings and ends of `True` sequences in Boolean arrays. Parameters ---------- arr : array_like Boolean array Returns ------- out: 2-tuple Tuple consisting of two arrays. Index-0 array consists of beginning of "True" sequences, and Index-1 array consists of ends of "True" sequences. """ begins = [] ends = [] before = False for x in range(len(arr)): if before==False and arr[x]==True: begins.append(x) before=True if x==(len(arr)-1): ends.append(x) elif before==True and arr[x]==False: ends.append(x-1) before=False elif before==True and x==(len(arr)-1): ends.append(x) return (np.array(begins), np.array(ends))
[docs]def calc_reject(arr, val_arr, alpha=0.05, conservative=False): """ Returns dictionary of index array, boolean array, rejection threshold index array of indices of hypotheses that are rejected via BH procedure, and boolean array of whether or not hypothesis is rejected. Parameters ---------- arr : array_like Array of probabilities (p-values) between 0 and 1. val_arr: array_like Array of values to break ties in p-values. alpha : value between 0 and 1, optional Signficance level for BH procedure. The default is 0.05. conservative: default = False Dictates whether or not conservative BH procedure is used. Returns ------- out: dict Dictionary containing information listed above. """ if alpha <= 0: raise ValueError("alpha must be > 0") N = len(arr) k = np.arange(1, N+1) if conservative: CN = np.sum(1/k) else: CN = 1 li = k*alpha/(CN*N) arr_argsort = np.argsort(-val_arr) arr_sort = arr[arr_argsort] try: rej_max = np.where(arr_sort <= li)[0][-1] #largest element to reject return {"reject_ind": np.where(arr <= arr_sort[rej_max])[0], "reject_bool": (arr <= arr_sort[rej_max]), "reject_thr_ind": arr_argsort[rej_max]} except IndexError: return {"reject_ind": [], "reject_bool": np.repeat(False, len(arr)), "reject_thr_ind": None}
[docs]def block_sum(arr, m, div=1): """ Sums adjacent blocks of m frames. """ ran = int(np.floor(len(arr)/m-1)+1) block_arr = np.stack([np.sum(np.rint(arr[i*m:(i*m+m)]/div), axis=0) for i in range(ran)]) return block_arr
[docs]def alps(arr): """ Get the ALPS statistic of an array of values, not all zero. """ arr = np.sort(arr) sums = np.array([np.sum(arr <= y) for y in np.unique(arr)][::-1]) arr = np.append([0], arr) integral = 0 for i, s in enumerate(sums): integral += (arr[i+1]-arr[i])*np.log(s) return integral
[docs]def pers_entr(arr, neg=True): """ Gets persistence (shannon) entropy of an array of values, not all zero. """ L_sum = np.sum(arr) Lmod = arr/L_sum if neg: a = 1 else: a = -1 return a*np.sum(Lmod*np.log(Lmod))
[docs]def pd_thresh_calc(diag, pixels, minv, maxv, dim="both", num=50): """ Calculates best persistence preserving threshold as described in Chung and Day (2018). Parameters ---------- diag : ndarray of shape (n_samples, 6) Persistence diagram object a la the output of the persmoo function. pixels: array-like Vector containing the pixels of the image on which to perform PD thresholding. minv : float Minimum threshold to consider. maxv : float Maximum threshold to consider. dim : str or int, optional Integer 0 or 1 corresponds to thresholding only based on dimension 0 and 1 persistence features. The default is "both", corresponding to both dimensions 0 and 1. num : int, optional Positive integer indicating how many evenly spaced pixel distribution quantiles should be considered in PD thresholding (prior to restricting to the range [minv, maxv]). Returns ------- float The best topology- and geometry-preserving threshold """ if dim=="both": sub_diag = diag elif isinstance(dim, int): select = (diag[:, 2]==dim) sub_diag = diag[select,:] #pixels gets the empirical pixel distribution and chooses quantiles from this for thresholding. thrs_ = np.quantile(pixels, q=np.linspace(0,1, num)) thrs = thrs_[np.logical_and(thrs_ > minv, thrs_ < maxv)] Phi_t = [] for thr in thrs: orig = np.logical_and(sub_diag[:, 3] <= thr, sub_diag[:, 4] > thr) minus = (sub_diag[:, 4] <= thr) plus = (sub_diag[:, 3] > thr) #Augmenting the persistence diagram in the case of emptiness if sum(orig) >= 1: Phi_orig = (1/(np.sum(orig)+1))*np.sum((sub_diag[orig, 4]-thr)*(thr-sub_diag[orig, 3])) else: Phi_orig = 0 #Why would we have (maxv-minv)? 1 does not decrease the last threshold enough... if sum(minus) >= 1: Phi_minus = np.sum((thr-sub_diag[minus, 4])/(sub_diag[minus, 4]-sub_diag[minus, 3])) else: Phi_minus = 1 if sum(plus) >= 1: Phi_plus = np.sum((sub_diag[plus, 3]-thr)/(sub_diag[plus, 4]-sub_diag[plus, 3])) else: Phi_plus = 1 Phi_t.append(Phi_orig*Phi_minus*Phi_plus) return thrs[np.argmax(Phi_t)]
[docs]def persmoo(im, polygon=None, sigma=None): """ Smooths, then fits 0 and 1-dimensional cubical persistence on an image. Returns other important information as well. Parameters ---------- im : individual image, i.e. two-dimensional array Greyscale image on which sublevelset homology will be calculated. polygon : Shapely.Polygon object, optional Not necessary, only if one wants to restrict region to focus on. The default is None. sigma : smoothing parameter sigma, optional Smoothing parameter sigma for Gaussian filter. The default is None. Returns ------- cu_tot : ndarray Array with positional and homology information, as follows: cu_pos: #(x,y) coordinates of positive/negative cells (which create components/destroy loops) cu_pers: #(dimension, birth, death)... cu_ex_inpoly: #row indices of positive/negative cells located within specified polygon """ #throughout this, infinite death becomes max pixel value... if sigma==None: pass else: im = filters.gaussian(im, sigma=sigma, preserve_range=True) cu_comp = CubicalComplex(top_dimensional_cells=im) cu_comp.compute_persistence(homology_coeff_field=2) cu_comp.persistence() #this step is necessary to get the ordering of the negative/positive cells correct cu_pers0 = cu_comp.persistence_intervals_in_dimension(0) cu_pers1 = cu_comp.persistence_intervals_in_dimension(1) dims = np.concatenate((np.repeat(1, cu_pers1.shape[0]), np.repeat(0, cu_pers0.shape[0]))) cu_pers_ = np.r_[cu_pers1, cu_pers0] cu_pers = np.c_[dims, cu_pers_] nr, nc = im.shape #reassign infinite death pixels cu_pers[np.logical_and(cu_pers[:, 0]==0, cu_pers[:,2]==np.inf), 2] = np.max(im[im < np.inf]) #get locations of cells... cu_pers_pairs_ = cu_comp.cofaces_of_persistence_pairs() #retrieves 'essential feature of dim-0', i.e. component of inf persistence ess_feat0 = cu_pers_pairs_[1][0] ess_featx, ess_featy = getxy_col(ess_feat0, nr) #this is to account for the specific cases when their is no 1st homology or no non-infinite generators of H_0 if len(cu_pers_pairs_[0]) == 0: cu_pos = np.array([ess_featx, ess_featy]).T else: cu_pers_pair0 = cu_pers_pairs_[0][0] #regular persistence pairs of dim-0 x_coords0, y_coords0 = getxy_col(cu_pers_pair0, nr) x_pos0 = np.append(x_coords0[:,0], ess_featx) #x coords of positive cells for dim-0, local minima y_pos0 = np.append(y_coords0[:,0], ess_featy) #y coords of positive cells for dim-0, local minima cu_pos0 = np.stack([x_pos0, y_pos0], axis=1) if len(cu_pers_pairs_[0]) == 2: cu_pers_pair1 = cu_pers_pairs_[0][1] ##regular persistence pairs of dim-1 x_coords1, y_coords1 = getxy_col(cu_pers_pair1, nr) #concatenate x,y coords of negative cells for dim-1, local maxima cu_pos1 = np.stack([x_coords1[:,1], y_coords1[:,1]], axis=1) cu_pos = np.r_[cu_pos1, cu_pos0] else: cu_pos = cu_pos0 if polygon==None: cu_ex_inpoly=np.repeat(True, len(cu_pers)) else: pers_pts = (Point(x,y) for x,y in zip(cu_pos[:,0], cu_pos[:,1])) cu_ex_inpoly = [polygon.contains(pt) for pt in pers_pts] cu_tot = np.c_[cu_pos, cu_pers, cu_ex_inpoly] return cu_tot
[docs]def fitsmoo(im, polygon=None, sigma=None, max_death_pixel_int=True): """ Smooths, then fits 0-dimensional cubical persistence on an image. Returns information on: 1) Location of positive cells, i.e. local minima (cu_pos, or index-0 and index-1 columns) 2) Lifetime information (cu_totpers, or index-2 column) 3) Whether or not a positive cell lies within the polygon (cu_ex_inpoly, or index-3 column) """ if sigma==None: pass else: im = filters.gaussian(im, sigma=sigma, preserve_range=True) cu_comp = CubicalComplex(top_dimensional_cells=im) cu_comp.compute_persistence(homology_coeff_field=2) cu_pers = cu_comp.persistence_intervals_in_dimension(0) nr, nc = im.shape cu_pers_pairs_ = cu_comp.cofaces_of_persistence_pairs() cu_pers_pair0 = cu_pers_pairs_[0][0] #regular persistence pairs of dim-0 ess_feat0 = cu_pers_pairs_[1][0] #retrieves 'essential feature of dim-0', i.e. component of inf persistence ess_featx, ess_featy = getxy_col(ess_feat0, nr) x_coords, y_coords = getxy_col(cu_pers_pair0, nr) x_pos = np.append(x_coords[:,0], ess_featx) #x coords of positive cells y_pos = np.append(y_coords[:,0], ess_featy) #y coords of positive cells cu_pos = np.stack([x_pos, y_pos], axis=1) #Now calculate whether each point is within our specified polygon if polygon==None: cu_ex_inpoly=np.repeat(True, len(cu_pers)) else: pers_pts = (Point(x,y) for x,y in zip(x_pos, y_pos)) cu_ex_inpoly = [polygon.contains(pt) for pt in pers_pts] if max_death_pixel_int==True: cu_pers[-1, 1] = np.max(im) else: cu_pers[-1, 1] = np.max(cu_pers[:-1, 1]) cu_totpers = cu_pers[:,1]-cu_pers[:,0] return np.c_[cu_pos, cu_totpers, cu_ex_inpoly]