Shortcuts

Source code for slideflow.stats.stats_utils

from typing import Dict, Tuple

import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min


[docs]def calculate_centroid( act: Dict[str, np.ndarray] ) -> Tuple[Dict[str, int], Dict[str, np.ndarray]]: """Calcultes slide-level centroid indices for a provided activations dict. Args: activations (dict): Dict mapping slide names to ndarray of activations across tiles, of shape (n_tiles, n_features) Returns: A tuple containing dict: Dict mapping slides to index of tile nearest to centroid dict: Dict mapping slides to activations of tile nearest to centroid """ optimal_indices = {} centroid_activations = {} for slide in act: if not len(act[slide]): continue km = KMeans(n_clusters=1, n_init=10).fit(act[slide]) closest, _ = pairwise_distances_argmin_min( km.cluster_centers_, act[slide] ) closest_index = closest[0] closest_activations = act[slide][closest_index] optimal_indices.update({slide: closest_index}) centroid_activations.update({slide: closest_activations}) return optimal_indices, centroid_activations
[docs]def get_centroid_index(arr: np.ndarray) -> int: """Calculate index nearest to centroid from a given 2D input array.""" km = KMeans(n_clusters=1, n_init=10).fit(arr) closest, _ = pairwise_distances_argmin_min(km.cluster_centers_, arr) return closest[0]
def normalize_layout( layout: np.ndarray, min_percentile: int = 1, max_percentile: int = 99, relative_margin: float = 0.1 ) -> Tuple[np.ndarray, Tuple[float, float], Tuple[float, float]]: """Removes outliers and scales layout to between [0,1]. Args: layout (np.ndarray): 2D array containing data to be scaled. min_percentile (int, optional): Percentile for scaling. Defaults to 1. max_percentile (int, optional): Percentile for scaling. Defaults to 99. relative_margin (float, optional): Add an additional margin (fraction of total plot width). Defaults to 0.1. Returns: np.ndarray: layout array, re-scaled and clipped. tuple(float, float): Range in original space covered by this layout. tuple(float, float): Clipping values (min, max) used for this layout """ # Compute percentiles mins = np.percentile(layout, min_percentile, axis=(0)) maxs = np.percentile(layout, max_percentile, axis=(0)) # Add margins mins -= relative_margin * (maxs - mins) maxs += relative_margin * (maxs - mins) # `clip` broadcasts, `[None]`s added only for readability clipped = np.clip(layout, mins, maxs) # embed within [0,1] along both axes _min = clipped.min(axis=0) _max = clipped.max(axis=0) clipped -= _min clipped /= (_max - _min) return clipped, (_min, _max), (mins, maxs) def normalize( array: np.ndarray, norm_range: Tuple[np.ndarray, np.ndarray], norm_clip: Tuple[np.ndarray, np.ndarray], ) -> np.ndarray: """Normalize and clip an array.""" _min, _max = norm_range mins, maxs = norm_clip clipped = np.clip(array, mins, maxs) clipped -= _min clipped /= (_max - _min) return clipped def denormalize( array: np.ndarray, norm_range: Tuple[np.ndarray, np.ndarray], ) -> np.ndarray: """De-normalize an array.""" _min, _max = norm_range transformed = array * (_max - _min) transformed += _min return transformed