Source code for slideflow.slide.wsi

'''This module includes tools to convolutionally section whole slide images
into tiles. These tessellated tiles can be exported as PNG or JPG as raw
images or stored in the binary format TFRecords, with or without augmentation.'''

from __future__ import absolute_import, division, print_function


import time
import os
import csv
import json
import multiprocessing as mp
import random
import warnings
import cv2
import numpy as np
import pandas as pd
import rasterio.features
import shapely.affinity as sa
import skimage
import skimage.filters
from shapely import __version__ as shapely_version
from shapely.errors import ShapelyDeprecationWarning
from packaging import version
from PIL import Image, ImageDraw
from rich.progress import Progress
from skimage import img_as_ubyte
from slideflow import errors
from functools import partial
from os.path import exists, join, abspath
from types import SimpleNamespace
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, Sequence

import slideflow as sf
import slideflow.slide.qc
from slideflow.util import log, path_to_name  # noqa F401
from .report import SlideReport
from .utils import *
from .backends import tile_worker, backend_formats, wsi_reader


warnings.simplefilter('ignore', Image.DecompressionBombWarning)
warnings.simplefilter("ignore", ShapelyDeprecationWarning)
Image.MAX_IMAGE_PIXELS = 100000000000

# -----------------------------------------------------------------------

[docs]class WSI:
    '''Loads a slide and its annotated region of interest (ROI).'''

    def __init__(
        self,
        path: str,
        tile_px: int,
        tile_um: Union[int, str],
        stride_div: int = 1,
        *,
        enable_downsample: bool = True,
        roi_dir: Optional[str] = None,
        rois: Optional[List[str]] = None,
        roi_method: str = 'auto',
        roi_filter_method: Union[str, float] = 'center',
        origin: Union[str, Tuple[int, int]] = (0, 0),
        pb: Optional[Progress] = None,
        verbose: bool = True,
        use_edge_tiles: bool = False,
        mpp: Optional[float] = None,
        simplify_roi_tolerance: Optional[float] = None,
        artifact_labels: Optional[List[str]] = None,
        **reader_kwargs: Any
    ) -> None:
        """Loads slide and ROI(s).

        Args:
            path (str): Path to slide.
            tile_px (int): Size of tiles to extract, in pixels.
            tile_um (int or str): Size of tiles to extract, in microns (int) or
                magnification (str, e.g. "20x").
            stride_div (int, optional): Stride divisor for tile extraction
                (1 = no tile overlap; 2 = 50% overlap, etc). Defaults to 1.
            enable_downsample (bool, optional): Allow use of downsampled
                intermediate layers in the slide image pyramid, which greatly
                improves tile extraction speed. May result in artifacts for
                slides with incompletely generated intermediates pyramids.
                Defaults to True.
            roi_dir (str, optional): Directory in which to search for ROI CSV
                files. Defaults to None.
            rois (list(str)): Alternatively, a list of ROI paths can be
                explicitly provided. Defaults to None.
            roi_method (str): Either 'inside', 'outside', 'auto', or 'ignore'.
                Determines how ROIs are used to extract tiles.
                If 'inside' or 'outside', will extract tiles in/out of an ROI,
                and raise errors.MissingROIError if an ROI is not available.
                If 'auto', will extract tiles inside an ROI if available,
                and across the whole-slide if no ROI is found.
                If 'ignore', will extract tiles across the whole-slide
                regardless of whether an ROI is available.
                Defaults to 'auto'.
            roi_filter_method (str or float): Method of filtering tiles with
                ROIs. Either 'center' or float (0-1). If 'center', tiles are
                filtered with ROIs based on the center of the tile. If float,
                tiles are filtered based on the proportion of the tile inside
                the ROI, and ``roi_filter_method`` is interpreted as a
                threshold. If the proportion of a tile inside the ROI is
                greater than this number, the tile is included. For example,
                if ``roi_filter_method=0.7``, a tile that is 80% inside of an
                ROI will be included, and a tile that is 50% inside of an ROI
                will be excluded. Defaults to 'center'.
            origin (str or tuple(int, int)): Offset the starting grid (x, y).
                Either a tuple of ints or 'random'. Defaults to (0, 0).
            pb (:class:`Progress`, optional): Multiprocessing
                capable Progress instance; will update progress bar during
                tile extraction if provided.
            verbose (bool, optional): Controls verbosity of output. If False,
                suppresses warnings about slide skipping when ROIs are missing.
                Defaults to True.
            mpp (float, optional): Override the microns-per-pixel value for
                the slide. Defaults to None (auto-detects).
            ignore_missing_mpp (bool, optional): If a slide does not have
                microns-per-pixel (MPP) information stored in EXIF data
                (key 65326), set the MPP to a default value
                (``sf.slide.DEFAULG_JPG_MPP``). If False and MPP data is
                missing, raises ``sf.errors.SlideMissingMPPError``.
            use_bounds (bool): If True, use the slide bounds to determine
                the slide dimensions. This will crop out unscanned white space.
                If a tuple of int, interprets the bounds as ``(top_left_x,
                top_left_y, width, height)``. If False, use the full slide
                dimensions. **Only available when using Libvips**
                (``SF_SLIDE_BACKEND=libvips``). Defaults to False.
            transforms (list(int), optional): List of transforms to apply to
                the slide before establishing coordinate grid. Options include
                any combination of ``ROTATE_90_CLOCKWISE``,
                ``ROTATE_180_CLOCKWISE``, ``ROTATE_270_CLOCKWISE``,
                ``FLIP_HORIZONTAL``, and ``FLIP_VERTICAL``. **Only available
                when using Libvips** (``SF_SLIDE_BACKEND=libvips``).
                Defaults to None.
            artifact_labels (list(str), optional): List of ROI issue labels
                to treat as artifacts. Whenever this is not None, all the ROIs with
                referred label will be inverted with ROI.invert().
                Defaults to an empty list.

        """
        # Initialize calculated variables
        self.pb = pb
        self.name = path_to_name(path)
        self.shortname = sf.util._shortname(self.name)
        self.tile_px = tile_px
        self.enable_downsample = enable_downsample
        self.thumb_image = None  # type: Optional[Image.Image]
        self.stride_div = stride_div
        self.path = path
        self.filetype = sf.util.path_to_ext(path)
        self.blur_burden = None  # type: Optional[float]
        self.roi_method = None  # type: Optional[str]
        self.extracted_x_size = 0  # type: int
        self.extracted_y_size = 0  # type: int
        self.estimated_num_tiles = 0  # type: int
        self.rois = []  # type: List[ROI]  # List of individual ROI annotations
        self.roi_method = roi_method
        self.roi_grid = None  # type: Optional[np.ndarray]
        self.roi_filter_method = roi_filter_method
        self.qc_masks = []  # type: List[QCMask]
        self.alignment = None  # type: Optional[Alignment]
        self.verbose = verbose
        self.segmentation = None
        self.use_edge_tiles = use_edge_tiles
        self.__slide = None
        self._mpp_override = mpp
        self._reader_kwargs = reader_kwargs
        self.grid: np.ndarray
        self.artifact_labels = artifact_labels # type: Optional[List[str]]
        if self.artifact_labels is None:
            self.artifact_labels = []

        if isinstance(origin, str) and origin != 'random':
            raise ValueError(
                "Unrecognized value for argument 'origin': {} ."
                "Expected either 'random' or a tuple of ints.".format(origin)
            )
        if isinstance(origin, tuple) and len(origin) != 2:
            raise ValueError(
                "If 'origin' is a tuple, it must be of length 2."
            )
        self.origin = origin

        if (not isinstance(roi_filter_method, (int, float))
           and roi_filter_method != 'center'):
            raise ValueError(
                "Unrecognized value for argument 'roi_filter_method': {} ."
                "Expected either float or 'center'.".format(roi_filter_method)
            )
        if (isinstance(roi_filter_method, (int, float))
           and (roi_filter_method < 0 or roi_filter_method > 1)):
            raise ValueError(
                "If 'roi_filter_method' is a float, it must be between 0-1."
            )

        if rois is not None and not isinstance(rois, (list, tuple)):
            rois = [rois]

        # Initiate supported slide reader
        if not os.path.exists(path):
            raise errors.SlideNotFoundError(f"Could not find slide {path}.")
        if self.filetype.lower() not in sf.util.SUPPORTED_FORMATS:
            raise errors.SlideLoadError(
                f"{self.name}: unsupported filetype '{self.filetype}'"
            )
        if self.filetype.lower() not in backend_formats():
            raise errors.IncompatibleBackendError(
                f"{self.name}: filetype '{self.filetype}' is not supported "
                f"by the current backend, {sf.slide_backend()}"
            )

        # Collect basic slide information
        if not self.slide.has_mpp:
            raise errors.SlideMissingMPPError(
                f"Slide {self.path} missing MPP ({OPS_MPP_X})"
            )
        try:
            self.mpp = float(self.slide.mpp)
        except Exception as e:
            raise errors.SlideMissingMPPError(
                f"Unable to parse MPP for slide {self.path} ({OPS_MPP_X}). "
                f"Error raised: {e}"
            )

        # Configure downsample information
        self._configure_downsample(tile_um)

        # Look in ROI directory if available
        if roi_dir and exists(join(roi_dir, self.name + ".csv")):
            self.load_csv_roi(
                join(roi_dir, self.name + ".csv"),
                process=False,
                simplify_tolerance=simplify_roi_tolerance
            )
        elif rois and self.name in [path_to_name(r) for r in rois]:
            matching_rois = []
            for rp in rois:
                rn = path_to_name(rp)
                if rn == self.name:
                    matching_rois += [rp]
            matching = matching_rois[0]
            if len(matching_rois) > 1:
                log.warning(
                    f"Multiple ROIs found for {self.name}; using {matching}"
                )
            self.load_csv_roi(
                matching,
                process=False,
                simplify_tolerance=simplify_roi_tolerance
            )

        # Handle missing ROIs
        if (not len(self.rois)
           and roi_method != 'ignore'
           and not (rois or roi_dir)):
            # No ROIs found because the user did not provide rois or roi_dir,
            # but the roi_method is not set to 'ignore',
            # indicating that this may be user error.
            warn_msg = f"No ROIs provided for {self.name}"
            if verbose and not (rois is None and roi_dir is None):
                log.warning(warn_msg)
            else:
                log.debug(warn_msg)
        if not len(self.rois) and roi_method in ('inside', 'outside'):
            raise errors.MissingROIError(
                f"Slide [green]{self.name}[/] missing ROI."
            )
        elif not len(self.rois):
            info_msg = f"No ROI for {self.name}, using whole slide."
            if verbose and roi_method == 'auto':
                log.info(info_msg)
            else:
                log.debug(info_msg)
        elif len(self.rois) and roi_method == 'auto':
            log.debug(f"Slide {self.name}: extracting tiles from inside ROI.")
            self.roi_method = 'inside'

        # Build coordinate grid
        self.process_rois()

        # Summarize slide information
        self._log_slide_summary()

    def __repr__(self) -> str:
        base = "WSI(\n"
        base += "  path = {!r},\n".format(self.path)
        base += "  tile_px = {!r},\n".format(self.tile_px)
        base += "  tile_um = {!r},\n".format(self.tile_um)
        base += "  stride_div = {!r},\n".format(self.stride_div)
        base += "  enable_downsample = {!r},\n".format(self.enable_downsample)
        base += "  roi_method = {!r},\n".format(self.roi_method)
        base += ")"
        return base

    def __getitem__(self, index) -> Optional[np.ndarray]:
        """Returns a tile at the given index.

        Args:
            index (tuple): (x, y) grid coordinates of tile to extract.

        Returns:
            Optional[numpy.ndarray]: Image tile, or None if tile is filtered.

        """
        # Verify indices are valid
        if (not isinstance(index, (tuple, list, np.ndarray))
           or not len(index) == 2):
            raise IndexError("Must supply exactly two indices: (x, y)")
        if not (index[0] < self.shape[0]):
            raise IndexError(
                "index {} is out of bounds for axis 0 with size {}".format(
                    index[0],
                    self.shape[0]
                )
            )
        if not (index[1] < self.shape[1]):
            raise IndexError(
                "index {} is out of bounds for axis 0 with size {}".format(
                    index[1],
                    self.shape[1]
                )
            )

        # Find the corresponding coordinate given the provided indices.
        coord_idx, = np.where((
            (self.coord[:, 2] == index[0])
            & (self.coord[:, 3] == index[1])
        ))
        if not len(coord_idx):
            return None
        assert len(coord_idx) == 1
        x, y, grid_x, grid_y = self.coord[coord_idx[0]]

        # Check if indices correspond to a tile that is filtered out,
        # either by ROI or QC. If so, return None.
        if not self.grid[grid_x, grid_y]:
            return None

        # Extract the numpy image at this grid location.
        image_dict = tile_worker(
            (x, y, grid_x, grid_y),
            SimpleNamespace(
                full_extract_px=self.full_extract_px,
                mpp_override=self._mpp_override,
                reader_kwargs=self._reader_kwargs,
                grid=self.grid,
                downsample_level=self.downsample_level,
                path=self.path,
                extract_px=self.extract_px,
                tile_px=self.tile_px,
                full_stride=self.full_stride,
                normalizer=None,
                whitespace_fraction=1,
                whitespace_threshold=1,
                grayspace_fraction=1,
                grayspace_threshold=1,
                img_format='numpy',
                yolo=False,
                draw_roi=False,
                dry_run=False,
                has_segmentation=False,
            )
        )
        return image_dict['image']

    def __getstate__(self):
        state = self.__dict__.copy()
        # Remove the unpicklable entries.
        if '__slide' in state:
            state['__slide'] = None
        if '_WSI__slide' in state:
            state['_WSI__slide'] = None
        if 'pb' in state:
            state['pb'] = None
        return state

    def __setstate__(self, state):
        self.__dict__.update(state)

    def _rasterize_rois_to_grid(
        self,
        rois: List["ROI"],
        x_offset: float = 0,
        y_offset: float = 0,
        xfact: float = 1.,
        yfact: float = 1.,
        *,
        grid_scale: int = 1,
        invert: bool = False
    ) -> np.ndarray:
        """Rasterize ROIs to the size of the tile extraction grid.

        Args:
            rois (List[ROI]): ROIs to rasterize.
            x_offset (float): Offset to align the ROI polygons with the image tile grid.
            y_offset (float): Offset to align the ROI polygons with the image tile grid.
            xfact (float): Scaling factor along x dimension.
            yfact (float): Scaling factor along y dimension.

        Keyword Args:
            grid_scale (int): Scaling factor for the grid. Defaults to 1.
            invert (bool): Whether to invert the ROI. Defaults to False.

        Returns:
            Optional[np.ndarray]: Rasterized ROIs.

        """
        def _get_poly(_roi):
            if invert:
                return _roi.invert(*self.dimensions).poly
            else:
                return _roi.poly

        # Convert ROIs to polygons.
        polys = list(map(_get_poly, rois))

        # Translate and scale.
        if x_offset or y_offset:
            polys = [sa.translate(poly, x_offset, y_offset) for poly in polys]
        if xfact != 1 or yfact != 1:
            polys = self._scale_polys(polys, xfact * grid_scale, yfact * grid_scale)

        # Rasterize polygons to the size of the tile extraction grid.
        return self._rasterize_polys(
            polys,
            grid_scale=grid_scale,
            intersection=('min' if invert else 'max')
        )

    def _rasterize_polys(
        self,
        polys: List["sg.Polygon"],
        *,
        grid_scale: float = 1,
        intersection: str = 'max'
    ) -> np.ndarray:
        """Rasterize polygons to the size of the tile extraction grid.

        Args:
            polys (List[sg.Polygon]): Polygons to rasterize.

        Keyword args:
            scale (float): Scaling factor for the grid.
                Defaults to 1.
            intersection (str): Method for combining multiple polygons.
                Either 'max' or 'min'. 'max' yields the union of the polygons,
                'min' yields the intersection. Defaults to 'max'.

        Returns:
            np.ndarray: Rasterized polygons.
        """
        # Rasterize polygons for ROIs individually, to keep track of
        # which ROI each tile belongs to, then merge.
        roi_grid = np.stack([
            rasterio.features.rasterize(
                [poly],
                out_shape=(self.grid.shape[1] * grid_scale,
                           self.grid.shape[0] * grid_scale),
                all_touched=False).astype(bool).astype(int) * (i + 1)
            for i, poly in enumerate(polys)
        ], axis=0)
        if intersection == 'max':
            return roi_grid.max(axis=0).T
        elif intersection == 'min':
            return roi_grid.min(axis=0).T
        else:
            raise ValueError(
                f"Unrecognized value for 'intersection': {intersection}"
            )

    def _scale_polys(
        self,
        polys: List["sg.Polyon"],
        xfact: float,
        yfact: float,
    ):
        """Scale polygons.

        Args:
            polys (List[sg.Polygon]): Polygons to scale.
            xfact (float): Scaling factor along x dimension.
            yfact (float): Scaling factor along y dimension.

        Returns:
            List[sg.Polygon]: Scaled polygons.

        """
        return [
            sa.scale(poly, xfact=xfact, yfact=yfact, origin=(0, 0))
            for poly in polys
        ]

    def _build_coord(self) -> None:
        """Set up coordinate grid for image tiles.

        The coordinate grid, stored in ``self.coord``, is a list of lists,
        where each sublist contains the following information:

        - 0: **x**: x-coordinate of the top-left corner of the tile.
        - 1: **y**: y-coordinate of the top-left corner of the tile.
        - 2: **grid_x**: x-coordinate of the tile in self.grid.
        - 3: **grid_y**: y-coordinate of the tile in self.grid.

        """

        # First, remove any existing ROI QC Masks, as these will be recalculated
        # when the coordinate grid is rebuilt.
        self.remove_roi_qc()

        # Calculate window sizes, strides, and coordinates for windows
        self.extracted_x_size = self.dimensions[0] - self.full_extract_px
        self.extracted_y_size = self.dimensions[1] - self.full_extract_px

        # Randomize origin, if desired
        if self.origin == 'random':
            start_x = random.randint(0, self.full_stride-1)
            start_y = random.randint(0, self.full_stride-1)
        else:
            assert isinstance(self.origin, tuple)
            start_x, start_y = self.origin
        log.debug("Slide origin: ({}, {})".format(start_x, start_y))

        # Coordinates must be in level 0 (full) format
        # for the read_region function.
        # Coordinates correspond to top-left corner of the tile.
        self.coord = []  # type: Union[List, np.ndarray]
        edge_buffer = 0 if self.use_edge_tiles else self.full_extract_px
        y_range = np.arange(
            start_y,
            (self.dimensions[1]+1) - edge_buffer,
            self.full_stride
        )
        x_range = np.arange(
            start_x,
            (self.dimensions[0]+1) - edge_buffer,
            self.full_stride
        )

        self.grid = np.ones((len(x_range), len(y_range)), dtype=bool)

        # For any indexes in y_range or x_range corresponding to a negative value,
        # set the corresponding index in self.grid to False.
        # This may occur after slide alignment.
        self.grid[np.argwhere(x_range < 0), :] = False
        self.grid[:, np.argwhere(y_range < 0)] = False

        # ROI filtering
        roi_by_center = (self.roi_filter_method == 'center')
        if self.has_rois():

            # Full extraction size and stride
            full_extract = self.tile_um / self.mpp
            stride = full_extract / self.stride_div

            # Coverage size of the extracted image tiles
            xtrim = int(stride * (self.grid.shape[0]))  # type: ignore
            ytrim = int(stride * (self.grid.shape[1]))  # type: ignore

            # Degree to which the ROIs will need to be scaled
            # to match the extracted image tile grid
            xfact = self.grid.shape[0] / xtrim
            yfact = self.grid.shape[1] / ytrim

            # Offset to align the ROI polygons with the image tile grid
            x_offset = - (full_extract/2 - stride/2)
            y_offset = - (full_extract/2 - stride/2)

            # Separate ROIs by whether they are artifact or not
            rois = self.get_rois(ignore_artifact=True)
            artifacts = self.get_artifacts()

            # Prepare ROI rasterization arguments
            rasterize_kw = dict(
                x_offset=x_offset,
                y_offset=y_offset,
                xfact=xfact,
                yfact=yfact,
                grid_scale=(1 if roi_by_center else 50),
            )

            # Rasterize ROIs to the grid
            if len(rois):
                self.roi_grid = self._rasterize_rois_to_grid(rois, **rasterize_kw)
            else:
                self.roi_grid = None

            # If there are artifact ROIs, rasterize these to the grid
            # and subtract them from the main ROI grid.
            if len(artifacts):
                roi_grid_issues = self._rasterize_rois_to_grid(artifacts, invert=True, **rasterize_kw)
                if self.roi_grid is None:
                    self.roi_grid = roi_grid_issues
                else:
                    self.roi_grid = np.minimum(roi_grid_issues, self.roi_grid)

            # Create a merged boolean mask.
            self.roi_mask = self.roi_grid.T.astype(bool)  # type: ignore
        else:
            self.roi_mask = None

        for yi, y in enumerate(y_range):
            for xi, x in enumerate(x_range):
                y = int(y)
                x = int(x)

                # Skip the slide if the coordinate has a negative value.
                # This may happen after slide alignment.
                if x < 0 or y < 0:
                    continue

                self.coord.append([x, y, xi, yi])

                # ROI filtering
                if self.has_rois() and roi_by_center:
                    point_in_roi = self.roi_mask[yi, xi]
                    # If the extraction method is 'inside',
                    # skip the tile if it's not in an ROI
                    if (((self.roi_method in ('inside', 'auto')) and not point_in_roi)
                       or ((self.roi_method == 'outside') and point_in_roi)):
                        self.grid[xi, yi] = 0

        # If roi_filter_method is a float, then perform tile selection
        # based on what proportion of the tile is in an ROI,
        # rather than choosing a tile by centroid (roi_filter_method='center')
        if self.roi_method != 'ignore' and self.has_rois() and not roi_by_center:
            self.apply_qc_mask(
                (~self.roi_mask if self.roi_method == 'inside' else self.roi_mask),
                filter_threshold=(1-self.roi_filter_method),  # type: ignore
                is_roi=True
            )

        self.coord = np.array(self.coord)
        # Handle the case where there is only one tile
        if self.coord.ndim == 1 and self.coord.shape[0] > 0:
            self.coord = self.coord[np.newaxis, :]
        self.estimated_num_tiles = int(self.grid.sum())
        log.debug(f"Set up coordinate grid, shape={self.grid.shape}")

    def _configure_downsample(
        self,
        tile_um: Union[str, int],
        enable_downsample: bool = True
    ) -> None:
        """Configure downsample level for tile extraction.

        Args:
            tile_um (int or str): Size of tiles to extract, in microns (int) or
                magnification (str, e.g. "20x").
            enable_downsample (bool, optional): Allow use of downsampled
                intermediate layers in the slide image pyramid, which greatly
                improves tile extraction speed. May result in artifacts for
                slides with incompletely generated intermediates pyramids.
                Defaults to True.

        """
        # Calculate downsample by magnification
        if isinstance(tile_um, str):
            sf.util.assert_is_mag(tile_um)
            _mag_lvl = 10 / (np.array(self.slide.level_downsamples) * self.mpp)
            mag_levels = _mag_lvl.tolist()
            closest_mag = min(
                mag_levels,
                key=lambda x: abs(x - sf.util.to_mag(tile_um))  # type: ignore
            )
            if abs(closest_mag - sf.util.to_mag(tile_um)) > 2:
                raise errors.SlideLoadError(
                    f"{self.name}: Could not find magnification level "
                    f"matching {tile_um} (closest: {closest_mag:.1f})"
                )
            ds_level = mag_levels.index(closest_mag)
            if not enable_downsample and ds_level != 0:
                raise ValueError(f"Unable to use magnification {tile_um} with "
                                 "enable_downsample=False")
            self.downsample_factor = self.slide.level_downsamples[ds_level]
            self.extract_px = self.tile_px
            self.full_extract_px = int(self.downsample_factor * self.tile_px)
            self.tile_um = int(self.downsample_factor * self.mpp * self.tile_px)
            log.debug(f"Using magnification {closest_mag:.1f}x (level="
                      f"{ds_level}, tile_um={self.tile_um})")

        # Calculate downsample level by tile micron size
        else:
            assert isinstance(tile_um, int)
            self.tile_um = tile_um
            self.full_extract_px = int(tile_um / self.mpp)
            ds = self.full_extract_px / self.tile_px
            if enable_downsample:
                ds_level = self.slide.best_level_for_downsample(ds)
            else:
                ds_level = 0
            self.downsample_factor = self.slide.level_downsamples[ds_level]
            self.extract_px = self.full_extract_px // self.downsample_factor

        # Calculate filter dimensions (low magnification for filtering out
        # white background and performing edge detection)
        self.filter_dimensions = self.slide.level_dimensions[-1]
        self.filter_magnification = (self.filter_dimensions[0]
                                    / self.dimensions[0])
        self.filter_px = int(self.full_extract_px * self.filter_magnification)

        # Calculate shape and stride
        self.downsample_level = ds_level
        self.downsample_dimensions = self.slide.level_dimensions[ds_level]
        self.stride = int(np.round(self.extract_px / self.stride_div))
        self.full_stride = int(np.round(self.full_extract_px / self.stride_div))

    def _log_slide_summary(self) -> None:
        """Log slide information (MPP, ROIs, grid shape, number of tiles)."""
        mpp_roi_msg = f'{self.mpp} um/px | {len(self.rois)} ROI(s)'
        size_msg = f'Size: {self.dimensions[0]} x {self.dimensions[1]}'
        log.debug(f"{self.shortname}: Slide info: {mpp_roi_msg} | {size_msg}")
        grid_msg = f"{self.shortname}: Grid shape: {self.grid.shape} "
        grid_msg += f"| Tiles to extract: {self.estimated_num_tiles}"
        log.debug(grid_msg)

    def _log_tile_extraction(self) -> None:
        """Log tile extraction parameters."""
        lead_msg = f'Extracting {self.tile_um}um tiles'
        if self.extract_px != self.tile_px:
            resize_msg = f'(resizing {self.extract_px}px -> {self.tile_px}px)'
        else:
            resize_msg = f'({self.extract_px}px, not resizing)'
        stride_msg = f'stride: {int(self.stride)}px'
        log.debug(f"{self.shortname}: {lead_msg} {resize_msg}; {stride_msg}")
        if self.tile_px > self.extract_px:
            ups_msg = 'Tiles will be up-scaled with bilinear interpolation'
            ups_amnt = f'({self.extract_px}px -> {self.tile_px}px)'
            warn = f"[red]'!WARN!'[/]"
            log.warn(f"{self.shortname}: {warn} {ups_msg} {ups_amnt}")

    @property
    def dimensions(self) -> Tuple[int, int]:
        """Dimensions of highest-magnification level (width, height)"""
        return self.slide.dimensions

    @property
    def levels(self) -> Dict:
        """List of dict, with metadata for each level.

        Each dict has the keys 'dimensions', 'downsample', 'height', and 'weight'.

        - **'dimensions'**: (height, width) of the level.
        - **'downsample'**: Downsample level, where higher numbers indicate
            lower magnification and the highest magnification is 1.
        - **`height'**: Height of the level.
        - **`height'**: Width of the level.

        """
        return self.slide.levels

    @property
    def level_dimensions(self) -> List[List[int]]:
        """List of list, with dimensions for each slide level."""
        return self.slide.level_dimensions

    @property
    def level_downsamples(self) -> List[float]:
        """Downsample of each level (starts at 1, increases with lower mag)."""
        return self.slide.level_downsamples

    @property
    def level_mpp(self) -> List[float]:
        """Microns-per-pixel (MPP) for each level."""
        return [d * self.mpp for d in self.level_downsamples]

    @property
    def properties(self) -> Dict:
        """Dictionary of metadata loaded from the slide."""
        return self.slide.properties

    @property
    def vendor(self) -> Optional[str]:
        """Slide scanner vendor, if available."""
        if OPS_VENDOR in self.slide.properties:
            return self.slide.properties[OPS_VENDOR]
        else:
            return None

    @property
    def shape(self):
        """Returns the shape of the tile grid."""
        return self.grid.shape

    @property
    def slide(self) -> Any:
        """Backend-specific slide object."""
        if self.__slide is not None:
            return self.__slide
        try:
            self.__slide = wsi_reader(
                self.path,
                self._mpp_override,
                **self._reader_kwargs)
            return self.__slide  # type: ignore
        except errors.SlideMissingMPPError:
            raise
        except Exception as e:
            raise errors.SlideLoadError(
                f"Error loading slide {self.shortname}: {e}"
            )

    @property
    def qc_mask(self) -> Optional[np.ndarray]:
        """Returns union of all QC masks."""
        return self.get_qc_mask()

    # --- Alignment --------------------------------------------------------

    def align_to(
        self,
        slide: "WSI",
        apply: bool = True,
        *,
        finetune_depth: Optional[Sequence[float]] = None,
        normalizer: Optional[str] = 'reinhard_mask',
        allow_errors: bool = False
    ) -> Tuple[Tuple[int, int], float]:
        """Align this slide to another slide.

        Alignment is performed by first aligning thumbnails at low magnification
        (mpp = 8), then progressively fine-tuning alignment at increasing
        magnification (mpp = 1, 0.5, 0.25), focused on a dense tissue region.
        The densest tissue region is identified using the QC mask, if available,
        otherwise via Otsu thresholding.

        Args:
            slide (:class:`slideflow.WSI`): Slide to align to.
            apply (bool): Whether to apply the alignment to the slide.

        Keyword Args:
            finetune_depth (Optional[List[int]]): List of magnifications at
                which to fine-tune alignment. Defaults to [1, 0.5, 0.25].
            normalizer (str, optional): Stain normalization method to use.
                Defaults to 'reinhard_mask'.
            allow_errors (bool): Whether to allow and ignore alignment errors
                when finetuning at higher magnification. Defaults to False.

        Returns:
            Tuple of (x, y) offset and MSE of initial alignment.

        Raises:
            TypeError: If ``slide`` is not a :class:`slideflow.WSI` object.

            AlignmentError: If initial, thumbnail-based alignment fails, or
                if finetuning alignment fails at any magnification and
                ``allow_errors`` is False.

        """
        from scipy import ndimage

        if not isinstance(slide, WSI):
            raise TypeError("Can only align to another slide.")

        if finetune_depth is None:
            finetune_depth = [1, 0.5, 0.25]

        # Steps:
        # 1. Identify tissue region as target for alignment.
        # 2. Rough align with low-mag thumbnails (mpp = 8).
        # 3. Fine-tune alignment at a dense tissue region (mpp = 1, 0.5, 0.25).

        # --- 1. Identify tissue regions as targets for alignment. ------------

        # Use QC mask (.qc_mask) if available, otherwise calculate one.
        # Target should be the centroid of unmasked tissue regions, but
        # there may be multiple distinct tissue regions.

        # First, grab the QC mask, or make one if it is not available.
        if self.qc_mask is not None:
            mask = self.qc_mask
        else:
            log.debug("Applying Otsu thresholding to identify tissue regions.")
            mask = sf.slide.qc.Otsu()(self)

        # Next, fill holes and remove small peaks through gaussian blur,
        # thresholding, and morphological closing.
        log.debug("Filling holes and removing small peaks in tissue mask.")
        mask = skimage.morphology.binary_closing(
            skimage.filters.gaussian(mask, sigma=5) > 0.5,
            skimage.morphology.disk(5)
        )

        # For each pixel in the mask, calculate the nearest distance to an
        # unmasked pixel. This will assist us with finding the densest areas
        # of tissue.
        log.debug("Calculating distance transform of tissue mask.")
        distances = ndimage.distance_transform_edt(~mask)

        # Find the coordinates of the pixel with the highest average distance.
        # This is the center of the densest tissue region.
        log.debug("Identifying target for alignment.")
        target = np.unravel_index(np.argmax(distances), distances.shape)

        # Convert from mask coordinates to slide coordinates.
        target = (
            int(target[1] * (self.dimensions[0] / mask.shape[1])),
            int(target[0] * (self.dimensions[1] / mask.shape[0]))
        )
        target_them = (
            int(np.round(target[0] * (self.mpp / slide.mpp))),
            int(np.round(target[1] * (self.mpp / slide.mpp)))
        )
        log.debug("Low-mag alignment complete.")
        log.debug("Target for alignment (us): {}".format(target))
        log.debug("Target for alignment (them, pre-alignment): {}".format(target_them))

        # --- 2. Align low-mag thumbnails. ------------------------------------

        # Calculate thumbnails for alignment.
        log.debug("Calculating low-mag thumbnails for alignment.")
        our_thumb = np.array(self.thumb(mpp=8))
        their_thumb = np.array(slide.thumb(mpp=8))

        # Stain normalization
        if normalizer is not None:
            log.debug("Aligning with stain normalization: {}".format(normalizer))
            if isinstance(normalizer, str):
                norm = sf.norm.autoselect(normalizer, backend='opencv')
            elif isinstance(normalizer, sf.norm.StainNormalizer):
                norm = normalizer
            else:
                raise ValueError("normalizer must be a str or instance of StainNormalizer")
            our_thumb = norm.transform(our_thumb[:, :, 0:3])
            their_thumb = norm.transform(their_thumb[:, :, 0:3])

        # Align thumbnails and adjust for scale.
        try:
            log.debug("Aligning low-mag thumbnails (mpp=8)...")
            alignment_raw, mse = align_by_translation(
                their_thumb, our_thumb, round=True, calculate_mse=True
            )
        except errors.AlignmentError:
            raise errors.AlignmentError("Alignment failed at thumbnail (mpp=8)")
        alignment = (int(np.round(alignment_raw[0] * (8 / self.mpp))),
                     int(np.round(alignment_raw[1] * (8 / self.mpp))))
        alignment_them = (-int(np.round(alignment_raw[0] * (8 / slide.mpp))),
                          -int(np.round(alignment_raw[1] * (8 / slide.mpp))))

        log.debug("Low-mag alignment (us): {}".format(alignment))
        log.debug("Low-mag alignment (them): {}".format(alignment_them))

        # --- 3. Fine-tune alignment at tissue regions. -----------------------

        # Get the coordinates of the tissue region in both slides.
        for finetune_mpp in finetune_depth:
            if (finetune_mpp < self.mpp) or (finetune_mpp < slide.mpp):
                log.debug("Skipping finetune at mpp={}".format(finetune_mpp))
                continue
            # Us
            our_window_size = (
                int(np.round(512 * (finetune_mpp/self.mpp))),
                int(np.round(512 * (finetune_mpp/self.mpp)))
            )
            our_top_left = (
                int(np.round(target[0] - (our_window_size[0]/2))),
                int(np.round(target[1] - (our_window_size[1]/2)))
            )
            log.debug("Extracting mpp={} alignment window (ours) at window_size={}, top_left={}".format(
                finetune_mpp, our_window_size, our_top_left)
            )
            our_region = self.slide.read_from_pyramid(
                top_left=our_top_left,
                window_size=our_window_size,
                target_size=(512, 512),
                convert='numpy',
                flatten=True,
                pad_missing=True
            )
            # Them
            their_window_size = (
                int(np.round(512 * (finetune_mpp/slide.mpp))),
                int(np.round(512 * (finetune_mpp/slide.mpp)))
            )
            their_top_left = (
                int(np.round(target_them[0] - (their_window_size[0]/2))) + alignment_them[0],
                int(np.round(target_them[1] - (their_window_size[1]/2))) + alignment_them[1]
            )
            log.debug("Extracting mpp={} alignment window (theirs) at window_size={}, top_left={}".format(
                finetune_mpp, their_window_size, their_top_left)
            )
            their_region = slide.slide.read_from_pyramid(
                top_left=their_top_left,
                window_size=their_window_size,
                target_size=(512, 512),
                convert='numpy',
                flatten=True,
                pad_missing=True
            )

            if normalizer is not None:
                our_region = norm.transform(our_region[:, :, 0:3])
                their_region = norm.transform(their_region[:, :, 0:3])

            try:
                rough_alignment = sf.slide.utils._find_translation_matrix(their_region, our_region, h=50, search_window=53)
            except cv2.error:
                rough_alignment = None
                log.debug("Initial rough alignment failed at mpp={}".format(finetune_mpp))
            else:
                log.debug("Initial rough alignment complete at mpp={}".format(finetune_mpp))

            # Finetune alignment on this region.
            try:
                alignment_fine = align_by_translation(their_region, our_region, round=True, warp_matrix=rough_alignment)
            except errors.AlignmentError:
                msg = "Alignment failed at finetuning (mpp={})".format(finetune_mpp)
                if allow_errors:
                    log.error(msg)
                else:
                    raise errors.AlignmentError(msg)
            else:
                alignment = (
                    alignment[0] + int(np.round(alignment_fine[0] * (finetune_mpp/self.mpp))),
                    alignment[1] + int(np.round(alignment_fine[1] * (finetune_mpp/self.mpp)))
                )
                alignment_them = (
                    alignment_them[0] - int(np.round(alignment_fine[0] * (finetune_mpp/slide.mpp))),
                    alignment_them[1] - int(np.round(alignment_fine[1] * (finetune_mpp/slide.mpp)))
                )
                log.debug("Finetune alignment complete at mpp={}.".format(finetune_mpp))
                log.debug("Finetuned alignment (us) at mpp={}: {}".format(finetune_mpp, alignment))
                log.debug("Finetuned alignment (them) at mpp={}: {}".format(finetune_mpp, alignment_them))

        # If not applying alignment, return the base alignment and MSE.
        if not apply:
            log.info("Slide aligned with MSE {:.2f}".format(mse))
            return alignment, mse  # type: ignore

        # Apply alignment.
        self.origin = alignment
        self.alignment = Alignment.from_translation(
            origin=self.slide.coord_to_raw(*alignment),
            scale=(slide.mpp / self.mpp),
        )
        log.info("Slide aligned with MSE {:.2f}. Origin set to {}".format(
                mse, self.origin
        ))

        # Rebuild coordinates and reapply QC, if present.
        self._build_coord()
        if self.has_non_roi_qc():
            self.apply_qc_mask()

        return alignment, mse  # type: ignore

    def align_tiles_to(
        self,
        slide: "WSI",
        normalizer: Optional[str] = 'reinhard_mask',
        *,
        allow_errors: bool = True,
        mask_on_fail: bool = True,
        align_by: str = 'fit',
        ignore_outliers = True,
        num_workers: Optional[int] = None,
        **kwargs
    ) -> np.ndarray:
        """Align tiles to another slide.

        Differs from :meth:`slideflow.WSI.align_to` in that it aligns each
        tile individually, rather than the slide as a whole. This is useful
        when aligning slides with distortion, whose alignment may drift across
        the slide.

        Args:
            slide (:class:`slideflow.WSI`): Slide to align to.
            normalizer (str, optional): Stain normalization method to use.

        Keyword Args:
            allow_errors (bool): Whether to allow and ignore alignment errors
                when finetuning alignment fails at any magnification and
                ``allow_errors`` is False. Defaults to True.
            mask_on_fail (bool): Whether to mask tiles that fail alignment.
                Defaults to True.
            align_by (str): Either 'tile' or 'fit'. If 'tile', tiles are
                aligned individually. If 'fit', tiles are aligned by fitting
                a plane to the alignment of all tiles. Defaults to 'tile'.
            ignore_outliers (bool): Whether to ignore outliers when fitting
                a plane to tile alignment. Defaults to True.
            **kwargs: Keyword arguments passed to :meth:`slideflow.WSI.align_to`.

        Raises:
            ValueError: If ``align_by`` is not 'tile' or 'fit'.

        Returns:
            np.ndarray: Alignment grid, with shape = (grid_x, grid_y, 2).

        """
        if align_by not in ('tile', 'fit'):
            raise ValueError("align_by must be 'tile' or 'median'")

        # Stain normalizer.
        if normalizer is not None:
            if isinstance(normalizer, str):
                normalizer = sf.norm.autoselect(normalizer, backend='opencv')
            elif not isinstance(normalizer, sf.norm.StainNormalizer):
                raise ValueError("normalizer must be a str or instance of StainNormalizer")

        # Perform coarse alignment.
        self.align_to(
            slide, apply=True, normalizer=normalizer, allow_errors=allow_errors, **kwargs
        )

        # Finetune alignment at each tile location.
        from tqdm import tqdm

        ctx = mp.get_context('spawn') if sf.slide_backend() == 'libvips' else mp.get_context('fork')
        pool = ctx.Pool(num_workers or sf.util.num_cpu())

        alignment_coords = np.zeros((self.coord.shape[0], 2))
        half_extract_px = int(np.round(self.full_extract_px/2))
        idx_to_remove = []
        for tile_alignment, c in tqdm(pool.imap_unordered(
                                        partial(calc_alignment,
                                                us=self,
                                                them=slide,
                                                n=normalizer),
                                        enumerate(self.coord)),
                                      desc="Aligning tiles...",
                                      total=len(self.coord)):
            idx, (x, y, xi, yi) = c
            if tile_alignment == 'error':
                msg = "Tile alignment failed at x={}, y={} (grid {}, {})".format(
                    x, y, xi, yi
                )
                if allow_errors:
                    log.debug(msg)
                    tile_alignment = None
                else:
                    raise errors.AlignmentError(msg)
            if tile_alignment is None and mask_on_fail and align_by == 'tile':
                self.grid[xi, yi] = False
                idx_to_remove += [idx]
            elif tile_alignment is None:
                idx_to_remove += [idx]
            if tile_alignment is not None:
                pixel_ratio = (self.full_extract_px / self.tile_px)
                x_adjust = int(np.round(tile_alignment[0] * pixel_ratio))
                y_adjust = int(np.round(tile_alignment[1] * pixel_ratio))
                x_base, y_base = self.slide.coord_to_raw(
                    x + half_extract_px,
                    y + half_extract_px
                )
                x_base_adjusted, y_base_adjusted = self.slide.coord_to_raw(
                    x + half_extract_px + x_adjust,
                    y + half_extract_px + y_adjust
                )
                x_base_adjustment = x_base_adjusted - x_base
                y_base_adjustment = y_base_adjusted - y_base
                alignment_coords[idx] = np.array([x_base_adjustment, y_base_adjustment])
                log.debug("Tile alignment complete at x={}, y={} (grid {}, {}): adjust by {}, {}".format(
                    x, y, xi, yi, x_adjust, y_adjust
                ))

        pool.close()

        coord_mask = np.any(self.get_masked_coord().mask, 1)
        coord_mask[np.array(idx_to_remove).astype(int)] = True
        mask = np.repeat(coord_mask[:, None], 2, axis=1)
        all_alignment_coords = np.ma.masked_array(alignment_coords, mask=mask)  # type: ignore
        coord_raw = self.slide.coord_to_raw(
            self.coord[~coord_mask][:, 0] + half_extract_px,
            self.coord[~coord_mask][:, 1] + half_extract_px
        )
        log.debug("Removing {} indices with failed alignment. Max coord size: {}".format(len(idx_to_remove), len(self.coord)))

        if align_by == 'fit':
            log.debug("Fitting to {} coordinates.".format((~coord_mask).sum()))
            x_adjustment_coordinates = np.column_stack((
                coord_raw[0],
                coord_raw[1],
                all_alignment_coords[~coord_mask][:, 0],
            ))
            y_adjustment_coordinates = np.column_stack((
                coord_raw[0],
                coord_raw[1],
                all_alignment_coords[~coord_mask][:, 1],
            ))

            def build_aligned_coords(x_centroid, x_normal, y_centroid, y_normal):
                coord_on_plane = np.zeros((len(self.coord), 2), dtype=int)
                coord_on_plane = np.ma.masked_array(coord_on_plane, mask=mask)
                for idx, (x, y, xi, yi) in enumerate(self.coord):
                    # Convert coordinates to raw base layer coordinates
                    bx, by = self.slide.coord_to_raw(
                        x + half_extract_px,
                        y + half_extract_px
                    )
                    # Align to raw base layer coordinates
                    coord_on_plane[idx] = (
                        int(np.round(z_on_plane(bx, by, x_centroid, x_normal))),
                        int(np.round(z_on_plane(bx, by, y_centroid, y_normal)))
                    )
                return coord_on_plane

            x_centroid, x_normal = best_fit_plane(x_adjustment_coordinates)
            y_centroid, y_normal = best_fit_plane(y_adjustment_coordinates)
            fit_alignment = build_aligned_coords(x_centroid, x_normal, y_centroid, y_normal)

            if ignore_outliers:
                # Calculate outlier threshold (90th percentile)
                diff = np.abs(all_alignment_coords - fit_alignment)
                diff = np.max(diff, axis=-1)
                threshold = np.percentile(diff[~diff.mask].data, 90)
                all_alignment_coords.mask[diff > threshold] = True
                coord_mask[diff > threshold] = True
                fit_alignment.mask = all_alignment_coords.mask
                log.debug("Re-fitting to {} coordinates, ignoring outliers.".format((~coord_mask).sum()))

                coord_raw = self.slide.coord_to_raw(
                    self.coord[~coord_mask][:, 0] + half_extract_px,
                    self.coord[~coord_mask][:, 1] + half_extract_px
                )

                # Recalculate fit without outliers
                x_adjustment_coordinates = np.column_stack((
                    coord_raw[0],
                    coord_raw[1],
                    all_alignment_coords[~coord_mask][:, 0],
                ))
                y_adjustment_coordinates = np.column_stack((
                    coord_raw[0],
                    coord_raw[1],
                    all_alignment_coords[~coord_mask][:, 1],
                ))

                x_centroid, x_normal = best_fit_plane(x_adjustment_coordinates)
                y_centroid, y_normal = best_fit_plane(y_adjustment_coordinates)

                all_alignment_coords = build_aligned_coords(x_centroid, x_normal, y_centroid, y_normal)
            else:
                all_alignment_coords = fit_alignment

            self.alignment = Alignment.from_fit(
                origin=self.slide.coord_to_raw(*self.origin),
                scale=(slide.mpp / self.mpp),
                centroid=(x_centroid, y_centroid),
                normal=(x_normal, y_normal)
            )

        for idx, (x, y, xi, yi) in enumerate(self.coord):
            if np.ma.is_masked(all_alignment_coords[idx][0]):
                continue

            bx, by = self.slide.coord_to_raw(
                x + half_extract_px,
                y + half_extract_px
            )
            x, y = self.slide.raw_to_coord(
                bx + all_alignment_coords[idx][0],
                by + all_alignment_coords[idx][1]
            )
            self.coord[idx, 0] = x - half_extract_px
            self.coord[idx, 1] = y - half_extract_px

        # Delete tiles that failed to align.
        if idx_to_remove and align_by == 'tile':
            log.warning("Removing {} tiles that failed to align.".format(len(idx_to_remove)))
            self.coord = np.delete(self.coord, idx_to_remove, axis=0)

        if align_by != 'fit':
            self.alignment = Alignment.from_coord(
                origin=self.slide.coord_to_raw(*self.origin),
                scale=(slide.mpp / self.mpp),
                coord=self.coord
            )

        log.info("Slide alignment complete and finetuned at each unmasked tile location.")

        return all_alignment_coords

    def apply_alignment(self, alignment: Alignment) -> None:
        """Apply alignment to the slide.

        Args:
            alignment (slideflow.slide.Alignment): Alignment object.

        """
        self.alignment = alignment
        self.origin = self.slide.raw_to_coord(*alignment.origin)
        if alignment.coord is not None:
            self.coord = alignment.coord
        elif alignment.centroid is None:
            self._build_coord()
            if self.qc_mask is not None:
                self.apply_qc_mask()
        else:
            self._build_coord()
            if self.qc_mask is not None:
                self.apply_qc_mask()
            if alignment.centroid is not None:
                x_centroid, y_centroid = alignment.centroid
                x_normal, y_normal = alignment.normal
                half_extract_px = int(np.round(self.full_extract_px/2))
                for idx, (x, y, xi, yi) in enumerate(self.coord):
                    x = (xi * int(np.round(self.full_stride/alignment.scale))) * alignment.scale
                    y = (yi * int(np.round(self.full_stride/alignment.scale))) * alignment.scale
                    x += self.origin[0]
                    y += self.origin[1]
                    bx, by = self.slide.coord_to_raw(
                        x + half_extract_px,
                        y + half_extract_px
                    )
                    adjust_x = int(np.round(z_on_plane(bx, by, x_centroid, x_normal)))
                    adjust_y = int(np.round(z_on_plane(bx, by, y_centroid, y_normal)))
                    x, y = self.slide.raw_to_coord(bx + adjust_x, by + adjust_y)
                    self.coord[idx, 0] = x - half_extract_px
                    self.coord[idx, 1] = y - half_extract_px

    def load_alignment(self, path: str) -> None:
        """Load alignment from a file.

        Args:
            path (str): Path to alignment file.

        """
        self.apply_alignment(Alignment.load(path))

    # --- All other functions -----------------------------------------------

    def apply_qc_mask(
        self,
        mask: Optional[Union[np.ndarray, QCMask]] = None,
        filter_threshold: Optional[float] = None,
        *,
        is_roi: bool = False
    ) -> "Image":
        """Apply custom slide-level QC by filtering grid coordinates.

        The mask should have a shape (height, width) proportional to the
        slide's dimensions.

        If the mask is numerical, the mask is thresholded at filter_threshold,
        with values above the threshold indicating a region to discard.

        If the mask is a boolean array, True indicates a region to
        discard and False indicates a region to keep.

        If the mask is a QCMask, the filter_threshold is ignored.

        Args:
            mask (np.ndarray or :class:`slideflow.slide.QCMask`, optional):
                Boolean QC mask array or ``QCMask`` object. If None, will
                re-apply the current masks. Defaults to None.
            filter_threshold (float): Percent of a tile detected as
                background that will trigger a tile to be discarded.
                Only used if ``mask`` is an np.ndarray.
                Defaults to 0.6.

        Keyword Args:
            is_roi (bool): Whether the mask is an ROI mask. Only used if ``mask``
                is an ``np.ndarray``. Defaults to False.

        Returns:
            Image: Image of applied QC mask.
        """
        # If no mask is provided and none has been previously applied,
        # raise an error.
        if mask is None and not len(self.qc_masks):
            raise errors.QCError("No QC mask available")

        # If no mask provided, re-apply the current masks.
        if mask is None:
            for qc_mask in self.qc_masks:
                self.apply_qc_mask(qc_mask)
            return Image.fromarray(img_as_ubyte(self.qc_mask))

        # Verify that the mask is a np.ndarray or QCMask.
        if not isinstance(mask, (np.ndarray, QCMask)):
            raise TypeError("mask must be a np.ndarray or QCMask")

        # Set the filter threshold if not provided.
        # If mask is a QCMask, use its filter_threshold.
        # Otherwise, default to 0.6.
        if not isinstance(mask, QCMask) and filter_threshold is None:
            filter_threshold = 0.6
        elif filter_threshold is not None and isinstance(mask, QCMask):
            raise ValueError(
                "filter_threshold cannot be provided if mask is a QCMask"
            )
        elif filter_threshold is None:
            filter_threshold = mask.filter_threshold  # type: ignore

        # If the provided mask is an np.ndarray, convert it to a QCMask.
        if not isinstance(mask, QCMask):
            mask = QCMask(mask, filter_threshold=filter_threshold, is_roi=is_roi)  # type: ignore
            self.qc_masks.append(mask)

        # Apply the mask to the grid.
        downsample = self.dimensions[0] / mask.shape[1]
        qc_ratio = 1 / downsample
        qc_width = int(np.round(self.full_extract_px * qc_ratio))
        for x, y, xi, yi in self.coord:  # type: ignore
            # x and y are top-left coordinates for the tile.
            qc_x = int(np.round(x * qc_ratio))
            qc_y = int(np.round(y * qc_ratio))
            submask = mask.mask[qc_y:(qc_y+qc_width), qc_x:(qc_x+qc_width)]
            if (submask.size > 0) and (np.mean(submask) > filter_threshold):
                    self.grid[xi, yi] = 0

        # Update the estimated number of tiles.
        self.estimated_num_tiles = int(self.grid.sum())

        # Return an image of the applied mask.
        return Image.fromarray(img_as_ubyte(self.qc_mask))

    def apply_segmentation(self, segmentation: "sf.cellseg.Segmentation") -> None:
        """Apply cell segmentation to the slide.

        This sets the coordinates to the centroids of the segmentation.

        Args:
            segmentation (slideflow.cellseg.Segmentation): Segmentation object
                to apply.

        """
        # Filter out masks outside of ROIs, if present.
        if self.has_rois():
            log.debug(f"Applying {len(self.rois)} ROIs to segmentation.")
            rois = self.get_rois(ignore_artifact=True)
            segmentation.apply_rois(1, [r.poly for r in rois])

        if segmentation.slide is None:
            segmentation.slide = self
        self.segmentation = segmentation
        centroids = segmentation.centroids(wsi_dim=True)
        self.seg_coord = np.concatenate(
            (centroids, np.expand_dims(np.arange(centroids.shape[0]), axis=-1)),
            axis=-1)
        nonzero = self.seg_coord[:, 0] > 0
        self.seg_coord[:, 0:2][nonzero] -= int(self.full_extract_px/2)
        self.estimated_num_tiles = centroids.shape[0]

    def area(self) -> float:
        """Calculate area (mm^2) of slide that passes QC masking."""
        dim_x, dim_y = self.dimensions[0], self.dimensions[1]
        total_area_in_sq_microns = (dim_x * self.mpp) * (dim_y * self.mpp)
        if self.qc_mask is not None:
            s = self.qc_mask.shape
            p = 1 - (self.qc_mask.sum() / (s[0] * s[1]))
            area_in_sq_microns = p * total_area_in_sq_microns
        else:
            area_in_sq_microns = total_area_in_sq_microns
        area_in_sq_mm = area_in_sq_microns * 1e-6
        return area_in_sq_mm

    def build_generator(
        self,
        *,
        shuffle: bool = True,
        whitespace_fraction: float = None,
        whitespace_threshold: float = None,
        grayspace_fraction: float = None,
        grayspace_threshold: float = None,
        normalizer: Optional[Union[str, "slideflow.norm.StainNormalizer"]] = None,
        normalizer_source: str = None,
        context_normalize: bool = False,
        num_threads: Optional[int] = None,
        num_processes: Optional[int] = None,
        show_progress: bool = False,
        img_format: str = 'numpy',
        full_core: bool = False,
        yolo: bool = False,
        draw_roi: bool = False,
        pool: Optional["mp.pool.Pool"] = None,
        dry_run: bool = False,
        lazy_iter: bool = False,
        shard: Optional[Tuple[int, int]] = None,
        max_tiles: Optional[int] = None,
        from_centroids: bool = False,
        apply_masks: bool = True,
        deterministic: bool = True
    ) -> Optional[Callable]:
        """Builds a tile generator to extract tiles from this slide.

        Keyword args:
            shuffle (bool): Shuffle images during extraction.
            whitespace_fraction (float, optional): Range 0-1. Defaults to 1.
                Discard tiles with this fraction of whitespace. If 1, will not
                perform whitespace filtering.
            whitespace_threshold (int, optional): Range 0-255. Defaults to 230.
                Threshold above which a pixel (RGB average) is whitespace.
            grayspace_fraction (float, optional): Range 0-1. Defaults to 0.6.
                Discard tiles with this fraction of grayspace. If 1, will not
                perform grayspace filtering.
            grayspace_threshold (float, optional): Range 0-1. Defaults to 0.05.
                Pixels in HSV format with saturation below this threshold are
                considered grayspace.
            normalizer (str, optional): Normalization strategy to use on image
                tiles. Defaults to None.
            normalizer_source (str, optional): Stain normalization preset or
                path to a source image. Valid presets include 'v1', 'v2', and
                'v3'. If None, will use the default present ('v3').
                Defaults to None.
            context_normalize (bool): If normalizing, use context from
                the rest of the slide when calculating stain matrix
                concentrations. Defaults to False (normalize each image tile
                as separate images).
            num_threads (int): If specified, will extract tiles with a
                ThreadPool using the specified number of threads. Cannot
                supply both `num_threads` and `num_processes`. Libvips is
                particularly slow with ThreadPools. Defaults to None in the
                Libvips backend, and the number of CPU cores when using cuCIM.
            num_processes (int): If specified, will extract tiles with a
                multiprocessing pool using the specified number of processes.
                Cannot supply both `num_threads` and `num_processes`.
                With the libvips backend, this defaults to half the number of
                CPU cores, and with cuCIM, this defaults to None.
            show_progress (bool, optional): Show a progress bar.
            img_format (str, optional): Image format. Either 'numpy', 'jpg',
                or 'png'. Defaults to 'numpy'.
            yolo (bool, optional): Include yolo-formatted tile-level ROI
                annotations in the return dictionary, under the key 'yolo'.
                Defaults to False.
            draw_roi (bool, optional): Draws ROIs onto extracted tiles.
                Defaults to False.
            dry_run (bool, optional): Determine tiles that would be extracted,
                but do not export any images. Defaults to None.
            max_tiles (int, optional): Only extract this many tiles per slide.
                Defaults to None.
            from_centroids (bool): Extract tiles from cell segmentation
                centroids, rather than in a grid-wise pattern. Requires that
                cell segmentation has already been applied with
                `WSI.apply_segmentation()`. Defaults to False.
            apply_masks (bool): Apply cell segmentation masks to tiles. Ignored
                if cell segmentation has been applied to the slide.
                Defaults to True.
            deterministic (bool): Return tile images in reproducible,
                deterministic order. May slightly decrease iteration time.
                Defaults to True.
            shard (tuple(int, int), optional): If provided, will only extract
                tiles from the shard with index `shard[0]` out of `shard[1]`
                shards. Defaults to None.

        Returns:
            A generator that yields a dictionary with the keys:

                - ``"image"``: image data.
                - ``"yolo"``: yolo-formatted annotations, (x_center, y_center, width, height), optional.
                - ``"grid"``: (x, y) grid coordinates of the tile.
                - ``"loc"``: (x, y) coordinates of tile center, in base (level=0) dimension.

        """
        if (isinstance(num_threads, int)
           and isinstance(num_processes, int)
           and num_threads > 1
           and num_processes > 1):
            raise ValueError("num_threads and num_processes cannot both be "
                             "non-zero.")
        if (shard is not None
           and (not isinstance(shard, (tuple, list))
                or len(shard) != 2
                or any(not isinstance(s, int) for s in shard))):
            raise ValueError("If shard is provided, it must be a tuple of "
                             "two int (shard_idx, shard_count)")

        if from_centroids and self.segmentation is None:
            raise ValueError(
                "Cannot build generator from segmentation centroids; "
                "segmentation not yet applied. Use WSI.apply_segmentation()."
            )

        self._log_tile_extraction()
        if self.estimated_num_tiles == 0:
            log.warning(f"No tiles extracted for slide [green]{self.name}")
            return None

        # Set whitespace / grayspace fraction to defaults if not provided
        if whitespace_fraction is None:
            whitespace_fraction = DEFAULT_WHITESPACE_FRACTION
        if whitespace_threshold is None:
            whitespace_threshold = DEFAULT_WHITESPACE_THRESHOLD
        if grayspace_fraction is None:
            grayspace_fraction = DEFAULT_GRAYSPACE_FRACTION
        if grayspace_threshold is None:
            grayspace_threshold = DEFAULT_GRAYSPACE_THRESHOLD

        # Get information about highest level downsample, as we will filter
        # on that layer if downsampling is enabled
        if self.enable_downsample:
            downsamples = np.array(self.slide.level_downsamples)
            filter_lev = np.max(np.argwhere(downsamples < self.extract_px))
            filter_downsample_factor = self.slide.level_downsamples[filter_lev]
            lev_ds = self.slide.level_downsamples[self.downsample_level]
            filter_downsample_ratio = filter_downsample_factor // lev_ds
        else:
            filter_lev = self.downsample_level
            filter_downsample_ratio = 1

        # Prepare stain normalization
        if normalizer and not isinstance(normalizer, sf.norm.StainNormalizer):
            if sf.slide_backend() == 'cucim':
                normalizer = sf.norm.autoselect(  # type: ignore
                    method=normalizer,
                    source=normalizer_source
                )
            else:
                # Libvips with spawn multiprocessing
                # is not compatible with Tensorflow-native stain normalization
                # due to GPU memory issues
                normalizer = sf.norm.StainNormalizer(normalizer)  # type: ignore
                if normalizer_source is not None:
                    normalizer.fit(normalizer_source)  # type: ignore

        if normalizer and context_normalize:
            assert isinstance(normalizer, sf.norm.StainNormalizer)
            log.debug("Preparing whole-slide context for normalizer")
            normalizer.set_context(self)

        w_args = SimpleNamespace(**{
            'full_extract_px': self.full_extract_px,
            'mpp_override': self._mpp_override,
            'reader_kwargs': self._reader_kwargs,
            'grid': self.grid,
            'downsample_level': self.downsample_level,
            'filter_downsample_level': filter_lev,
            'filter_downsample_ratio': filter_downsample_ratio,
            'path': self.path,
            'extract_px': self.extract_px,
            'tile_px': self.tile_px,
            'full_stride': self.full_stride,
            'normalizer': normalizer,
            'whitespace_fraction': whitespace_fraction,
            'whitespace_threshold': whitespace_threshold,
            'grayspace_fraction': grayspace_fraction,
            'grayspace_threshold': grayspace_threshold,
            'img_format': img_format,
            'yolo': yolo,
            'draw_roi': draw_roi,
            'dry_run': dry_run,
            'has_segmentation': from_centroids
        })

        def generator():
            nonlocal pool, num_threads, num_processes
            should_close = False
            n_extracted = 0

            # Skip tiles filtered out with QC or ROI
            if not from_centroids:
                non_roi_coord = self.coord[
                    self.grid[tuple(self.coord[:, 2:4].T)].astype(bool)
                ]
                # Shuffle coordinates to randomize extraction order
                if shuffle:
                    np.random.shuffle(non_roi_coord)
                num_possible_tiles = len(non_roi_coord)
            else:
                from slideflow.cellseg import seg_utils

                log.info("Building generator from segmentation centroids.")
                nonzero = self.seg_coord[:, 0] > 0
                num_possible_tiles = nonzero.sum()
                if apply_masks:
                    sparse = seg_utils.sparse_mask(self.segmentation.masks)

                def _sparse_generator():

                    def proc(c):
                        mask = None if not apply_masks else self.get_tile_mask(c[2], sparse)
                        return c, mask

                    if shuffle:
                        for idx in np.random.permutation(self.seg_coord.shape[0]):
                            if nonzero[idx]:
                                yield proc(self.seg_coord[idx])
                    else:
                        for c in self.seg_coord[nonzero]:
                            yield proc(c)

                non_roi_coord = _sparse_generator()

            if shard is not None:
                shard_idx, shard_count = shard
                sharded_coords = np.array_split(non_roi_coord, shard_count)
                non_roi_coord = sharded_coords[shard_idx]

            # Set up worker pool
            if pool is None:
                if num_threads is None and num_processes is None:
                    # Libvips is extremely slow with ThreadPools.
                    # In the cuCIM backend, ThreadPools are used by default
                    #   to reduce memory utilization.
                    # In the Libvips backend, a multiprocessing pool is default
                    #   to significantly improve performance.
                    n_cores = sf.util.num_cpu(default=8)
                    if sf.slide_backend() == 'libvips':
                        num_processes = max(int(n_cores/2), 1)
                    else:
                        num_threads = n_cores
                if num_threads is not None and num_threads > 1:
                    log.debug(f"Building generator ThreadPool({num_threads})")
                    pool = mp.dummy.Pool(processes=num_threads)
                    should_close = True
                elif num_processes is not None and num_processes > 1:
                    ptype = 'spawn' if sf.slide_backend() == 'libvips' else 'fork'
                    log.debug(f"Building generator with Pool({num_processes}), "
                              f"type={ptype}")
                    ctx = mp.get_context(ptype)
                    pool = ctx.Pool(
                        processes=num_processes,
                        initializer=sf.util.set_ignore_sigint,
                    )
                    should_close = True
                else:
                    log.debug(f"Building generator without multithreading")
                    def _generator():
                        for c in non_roi_coord:
                            yield tile_worker(c, args=w_args)
                    i_mapped = _generator()
            else:
                log.debug("Building generator with a shared pool")
            if show_progress:
                pbar = Progress(transient=sf.getLoggingLevel() > 20)
                task = pbar.add_task('Extracting...', total=self.estimated_num_tiles)
                pbar.start()
            else:
                pbar = None

            if pool is not None:
                map_fn = pool.imap if deterministic else pool.imap_unordered
                if lazy_iter:
                    if max_tiles:
                        batch_size = min(pool._processes, max_tiles)
                    else:
                        batch_size = pool._processes
                    batched_coord = sf.util.batch(non_roi_coord, batch_size)
                    def _generator():
                        for batch in batched_coord:
                            yield from map_fn(
                                partial(tile_worker, args=w_args),
                                batch
                            )
                    i_mapped = _generator()

                else:
                    csize = max(min(int(self.estimated_num_tiles/pool._processes), 64), 1)
                    log.debug(f"Using imap chunksize={csize}")
                    i_mapped = map_fn(
                        partial(tile_worker, args=w_args),
                        non_roi_coord,
                        chunksize=csize
                    )

            with sf.util.cleanup_progress(pbar):
                for e, result in enumerate(i_mapped):
                    if show_progress:
                        pbar.advance(task, 1)
                    elif self.pb is not None:
                        self.pb.advance(0)
                    if result is None:
                        continue
                    else:
                        yield result
                        n_extracted += 1
                        if max_tiles and n_extracted >= max_tiles:
                            break

            if should_close:
                pool.close()

            # Reset stain normalizer context
            if normalizer and context_normalize:
                assert isinstance(normalizer, sf.norm.StainNormalizer)
                normalizer.clear_context()

            name_msg = f'[green]{self.shortname}[/]'
            num_msg = f'({n_extracted} tiles of {num_possible_tiles} possible)'
            log_fn = log.info if self.verbose else log.debug
            log_fn(f"Finished tile extraction for {name_msg} {num_msg}")

        return generator

    def coord_to_grid(
        self,
        x: int,
        y: int,
        *,
        anchor: str = 'center'
    ) -> Tuple[int, int]:
        """Find the grid index of a tile by its base-level coordinates.

        Args:
            x (int): x-coordinate of the tile, in base (level=0) dimension.
            y (int): y-coordinate of the tile, in base (level=0) dimension.

        Keyword args:
            anchor (str): Anchor point for the coordinates. Either 'topleft'
                or 'center'. Defaults to 'center'.

        Returns:
            Tuple[int, int]: Grid index of the tile.

        Raises:
            ValueError: If anchor is not 'topleft' or 'center'.
            IndexError: If tile is not found at the given coordinates.

        """
        if anchor not in ('topleft', 'center'):
            raise ValueError("anchor must be 'topleft' or 'center'")
        if anchor == 'center':
            x -= int(self.full_extract_px/2)
            y -= int(self.full_extract_px/2)
        coord_idx, = np.where((
            (self.coord[:, 0] == x)
            & (self.coord[:, 1] == y)
        ))
        if not len(coord_idx):
            raise IndexError(f"Tile at coord=({x}, {y}) not found")
        assert len(coord_idx) == 1
        x, y, grid_x, grid_y = self.coord[coord_idx[0]]
        return grid_x, grid_y

    def dim_to_mpp(self, dimensions: Tuple[float, float]) -> float:
        return (self.dimensions[0] * self.mpp) / dimensions[0]

    def export_rois(self, dest: Optional[str] = None) -> str:
        """Export loaded ROIs to a given destination, in CSV format.

        ROIs are exported with the columns 'roi_name', 'x_base', and 'y_base'.
        Coordinates are in base dimension (level 0) of the slide.

        Args:
            dest (str): Path to destination folder. If not provided, will
                export ROIs in the current folder. Defaults to None.

        Returns:
            None

        """
        names, labels, x, y = [], [], [], []

        def append_roi(roi):
            nonlocal names, labels, x, y
            c = np.array(roi.coordinates)
            assert len(c.shape) == 2
            names += [roi.name] * c.shape[0]
            labels += [roi.label] * c.shape[0]
            x += list(c[:, 0])
            y += list(c[:, 1])

        for roi in self.rois:
            append_roi(roi)
            for hole in roi.holes.values():
                append_roi(hole)

        df = pd.DataFrame({
            'roi_name': names,
            'label': labels,
            'x_base': x,
            'y_base': y
        })
        if dest is None:
            dest = f'{self.name}.csv'
        df.to_csv(dest, index=False)
        log.info(f"{len(self.rois)} ROIs exported to {abspath(dest)}")
        return abspath(dest)

    def get_qc_mask(self, roi: bool = True) -> Optional[np.ndarray]:
        """Return the combined QC mask for the slide.

        Args:
            roi (bool): Whether to include ROI masks. Defaults to True.

        """
        _all_masks = [m for m in self.qc_masks if (roi or (not m.is_roi))]
        if not _all_masks:
            return None
        elif len(_all_masks) == 1:
            return _all_masks[0].mask
        else:
            _, smallest = min((m.shape[0], idx)
                               for (idx, m) in enumerate(_all_masks))
            shape = _all_masks[smallest].shape
            mask = skimage.transform.resize(_all_masks[0].mask, shape).astype(bool)
            for _next in _all_masks[1:]:
                _next_m = skimage.transform.resize(_next.mask, shape).astype(bool)
                mask = np.logical_or(mask, _next_m)
            return mask

    def get_masked_coord(self) -> np.ma.core.MaskedArray:
        """Get a masked array of the coordinate grid, masked by QC.

        The returned masked array is of shape (n, 4), where n is the number of tiles.
        The columns are (x, y, grid_x, grid_y), where x and y are the
        top-left coordinates of the tile, and grid_x and grid_y are the
        grid indices of the tile.

        """
        true_grid_indices = np.flatnonzero(self.grid)
        linear_indices_of_coord = np.ravel_multi_index(
            self.coord[:, 2:4].T,
            dims=self.grid.shape
        )
        unmasked_coord_indices = np.in1d(
            linear_indices_of_coord,
            true_grid_indices
        )
        return np.ma.masked_array(
            self.coord,
            mask=~np.repeat(unmasked_coord_indices[:, None], 4, axis=1)
        )

    def get_rois(self, ignore_artifact: bool = False) -> List[ROI]:
        """Get a list of ROIs.

        Args:
            ignore_artifact (bool): Ignore artifact ROIs. Defaults to False.

        Returns:
            List[ROI]: List of ROI objects.

        """
        if ignore_artifact:
            return [roi for roi in self.rois if roi.label not in self.artifact_labels]
        return self.rois

    def get_artifacts(self) -> List[ROI]:
        """Get a list of artifact ROIs.

        Returns:
            List[ROI]: List of artifact ROI objects.

        """
        return [roi for roi in self.rois if roi.label in self.artifact_labels]

    def get_roi_by_name(self, name: str) -> Optional[ROI]:
        """Get an ROI by its name.

        Args:
            name (str): Name of the ROI.

        Returns:
            ROI: ROI object.

        """
        for roi in self.rois:
            if roi.name == name:
                return roi
        return None

    def get_tile_coord(self, anchor='topleft') -> np.ndarray:
        """Get a coordinate grid of all tiles, restricted to those that pass QC
        and any ROI filtering.

        The returned array is of shape (n, 4), where n is the number of tiles.
        The columns are (x, y, grid_x, grid_y), where x and y are the
        top-left coordinates of the tile, and grid_x and grid_y are the
        grid indices of the tile.

        """
        if anchor not in ('center', 'topleft'):
            raise ValueError("Expected `anchor` to be 'center' or 'topleft'")
        c = self.coord[
            self.grid[tuple(self.coord[:, 2:4].T)].astype(bool)
        ].copy()
        if anchor == 'center':
            c[:, 0] += int(self.full_extract_px/2)
            c[:, 1] += int(self.full_extract_px/2)
        return c

    def get_tile_dataframe(self) -> pd.DataFrame:
        """Build a dataframe of tiles and associated ROI labels.

        Returns:
            Pandas dataframe of all tiles, with the following columns:
            - ``loc_x``: X-coordinate of tile center
            - ``loc_y``: Y-coordinate of tile center
            - ``grid_x``: X grid index of the tile
            - ``grid_y``: Y grid index of the tile
            - ``roi_name``: Name of the ROI if tile is in an ROI, else None
            - ``roi_desc``: Description of the ROI if tile is in ROI, else None
            - ``label``: ROI label, if present.

        """
        roi_names = []
        roi_desc = []
        labels = []
        index = []
        loc = []
        grid = []
        for x, y, xi, yi in self.coord:
            if not self.grid[xi, yi]:
                continue
            _, roi = self.get_tile_roi(grid=(xi, yi))

            # Convert from top-left to center coordinates
            x += int(self.full_extract_px/2)
            y += int(self.full_extract_px/2)

            loc.append([x, y])
            grid.append([xi, yi])
            roi_names.append(None if not roi else roi.name)
            roi_desc.append(None if not roi else roi.description)
            labels.append(None if not roi else roi.label)
            index.append(f'{self.name}-{x}-{y}')
        loc = np.array(loc)
        grid = np.array(grid)
        df = pd.DataFrame({
            'loc_x': loc[:, 0],
            'loc_y': loc[:, 1],
            'grid_x': grid[:, 0],
            'grid_y': grid[:, 1],
            'roi_name': roi_names,
            'roi_desc': roi_desc,
            'label': labels
        }, index=index)
        return df

    def get_tile_roi_mask(
        self,
        *,
        grid: Optional[Tuple[int, int]] = None,
        loc: Optional[Tuple[int, int]] = None,
        mode: str = 'binary',
        roi_labels: Optional[List[str]] = None
    ) -> np.ndarray:
        """Get the ROI mask for a tile at the given location.

        Keyword Args:
            grid (tuple[int, int], optional): Grid indices of the tile.
                Must supply either ``grid`` or ``loc``. Defaults to None.
            loc (tuple[int, int], optional): Location of the tile center.
                Must supply either ``grid`` or ``loc``. Defaults to None.
            mode (str, optional): 'binary', 'multiclass', or 'multilabel'.
                Defaults to 'binary'.
            roi_labels (list[str], optional): List of ROI labels to include.
                Defaults to None.

        Returns:
            np.ndarray: ROI mask for the tile, with dtype int and shape
                (n, tile_px, tile_px), where n is the number of ROI labels.

        """
        if grid is None and loc is None:
            raise ValueError("Either grid or loc must be provided.")

        # Definitions.
        fe = self.full_extract_px
        fs = self.full_stride
        scale = self.tile_px / fe

        # Get the polygon vertices for the tile.
        if grid is not None:
            # Convert from grid to top-left coordinates
            gx, gy = grid
            topleft = (gx * fs, gy * fs)
            bottomleft = (gx * fs, (gy * fs) + fe)
            bottomright = ((gx * fs) + fe, (gy * fs) + fe)
            topright = ((gx * fs) + fe, gy * fs)
        else:
            # Convert from center to top-left coordinates
            cx, cy = loc
            cx -= int(fe / 2)
            cy -= int(fe / 2)
            topleft = (cx, cy)
            bottomleft = (cx, cy + fe)
            bottomright = (cx + fe, cy + fe)
            topright = (cx + fe, cy)

        # Get a polygon for the tile, used for determining overlapping ROIs.
        tile = sg.Polygon([topleft, bottomleft, bottomright, topright])

        # Compute the mask from ROIs.
        if len(self.rois) == 0:
            if roi_labels:
                mask = np.zeros((len(roi_labels), self.tile_px, self.tile_px), dtype=int)
            else:
                mask = np.zeros((1, self.tile_px, self.tile_px), dtype=int)

        # Handle ROIs with labels (multilabel or multiclass)
        elif roi_labels:
            labeled_masks = []
            for label in roi_labels:
                wsi_polys = [p.poly for p in self.rois if p.label == label]
                if len(wsi_polys) == 0:
                    mask = np.zeros((self.tile_px, self.tile_px), dtype=int)
                    labeled_masks.append(mask)
                else:
                    all_polys = unary_union(wsi_polys)
                    polys = get_scaled_and_intersecting_polys(
                        all_polys, tile, scale, topleft
                    )
                    if isinstance(polys, sg.Polygon) and polys.is_empty:
                        mask = np.zeros((self.tile_px, self.tile_px), dtype=int)
                    else:
                        # Rasterize to an int mask.
                        mask = rasterio.features.rasterize(
                            [polys],
                            out_shape=[self.tile_px, self.tile_px]
                        )
                        mask = mask.astype(int)
                    labeled_masks.append(mask)
            mask = np.stack(labeled_masks, axis=0)

        # Handle ROIs without labels (binary)
        else:
            # Determine the intersection at the given tile location.
            all_polys = unary_union([p.poly for p in self.rois])
            polys = get_scaled_and_intersecting_polys(
                all_polys, tile, scale, topleft
            )

            if isinstance(polys, sg.Polygon) and polys.is_empty:
                mask = np.zeros((self.tile_px, self.tile_px), dtype=int)
            else:
                # Rasterize to an int mask.
                try:
                    mask = rasterio.features.rasterize(
                        [polys],
                        out_shape=[self.tile_px, self.tile_px]
                    )
                    mask = mask.astype(bool).astype(np.int32)
                except ValueError:
                    mask = np.zeros((self.tile_px, self.tile_px), dtype=int)

            # Add a dummy channel dimension.
            mask = mask[None, :, :]

        # Process according to the mode.
        if mode == 'multiclass':
            mask = mask * np.arange(1, mask.shape[0]+1)[:, None, None]
            mask = mask.max(axis=0)
        elif mode == 'binary' and mask.ndim == 3:
            mask = np.any(mask, axis=0)[None, :, :].astype(int)

        return mask

    def has_non_roi_qc(self) -> bool:
        """Check if the slide has any non-ROI QC masks."""
        return any(not m.is_roi for m in self.qc_masks)

    def extract_tiles(
        self,
        tfrecord_dir: Optional[str] = None,
        tiles_dir: Optional[str] = None,
        img_format: str = 'jpg',
        report: bool = True,
        **kwargs
    ) -> Optional[SlideReport]:
        """Extracts tiles from slide using the build_generator() method,
        saving tiles into a TFRecord file or as loose JPG tiles in a directory.

        Args:
            tfrecord_dir (str): If provided, saves tiles into a TFRecord file
                (named according to slide name) here.
            tiles_dir (str): If provided, saves loose images in a subdirectory
                 (per slide name) here.
            img_format (str): 'png' or 'jpg'. Format of images for internal
                storage in tfrecords. PNG (lossless) format recommended for
                fidelity, JPG (lossy) for efficiency. Defaults to 'jpg'.

        Keyword Args:
            whitespace_fraction (float, optional): Range 0-1. Defaults to 1.
                Discard tiles with this fraction of whitespace. If 1, will not
                perform whitespace filtering.
            whitespace_threshold (int, optional): Range 0-255. Defaults to 230.
                Threshold above which a pixel (RGB average) is whitespace.
            grayspace_fraction (float, optional): Range 0-1. Defaults to 0.6.
                Discard tiles with this fraction of grayspace. If 1, will not
                perform grayspace filtering.
            grayspace_threshold (float, optional): Range 0-1. Defaults to 0.05.
                Pixels in HSV format with saturation below this threshold are
                considered grayspace.
            normalizer (str, optional): Normalization to use on image tiles.
                Defaults to None.
            normalizer_source (str, optional): Stain normalization preset or
                path to a source image. Valid presets include 'v1', 'v2', and
                'v3'. If None, will use the default present ('v3').
                Defaults to None.
            full_core (bool, optional): Extract an entire detected core, rather
                than subdividing into image tiles. Defaults to False.
            shuffle (bool): Shuffle images during extraction.
            num_threads (int): Number of threads to allocate to workers.
            yolo (bool, optional): Export yolo-formatted tile-level ROI
                annotations (.txt) in the tile directory. Requires that
                tiles_dir is set. Defaults to False.
            draw_roi (bool, optional): Draws ROIs onto extracted tiles.
                Defaults to False.
            dry_run (bool, optional): Determine tiles that would be extracted,
                but do not export any images. Defaults to None.
            num_threads (int): If specified, will extract tiles with a
                ThreadPool using the specified number of threads. Cannot
                supply both `num_threads` and `num_processes`. Libvips is
                particularly slow with ThreadPools. Defaults to None in the
                Libvips backend, and the number of CPU cores when using cuCIM.
            num_processes (int): If specified, will extract tiles with a
                multiprocessing pool using the specified number of processes.
                Cannot supply both `num_threads` and `num_processes`.
                With the libvips backend, this defaults to half the number of
                CPU cores, and with cuCIM, this defaults to None.
        """
        if img_format not in ('png', 'jpg', 'jpeg'):
            raise ValueError(f"Invalid image format {img_format}")

        dry_run = kwargs['dry_run'] if 'dry_run' in kwargs else False

        # Make base directories
        if tfrecord_dir and not dry_run:
            if not exists(tfrecord_dir):
                os.makedirs(tfrecord_dir)
        if tiles_dir and not dry_run:
            tiles_dir = os.path.join(tiles_dir, self.name)
            if not os.path.exists(tiles_dir):
                os.makedirs(tiles_dir)

        # Log to keep track of when tiles have finished extracting
        # To be used in case tile extraction is interrupted, so the slide
        # can be flagged for re-extraction

        if (tfrecord_dir or tiles_dir) and not dry_run:
            unfinished_marker = join(
                (tfrecord_dir if tfrecord_dir else tiles_dir),  # type: ignore
                f'{self.name}.unfinished'
            )
            with open(unfinished_marker, 'w') as marker_file:
                marker_file.write(' ')
        if tfrecord_dir and not dry_run:
            writer = sf.io.TFRecordWriter(join(
                tfrecord_dir,
                self.name+".tfrecords"
            ))

        generator = self.build_generator(
            img_format=img_format,
            **kwargs
        )
        if not generator:
            if tfrecord_dir:
                os.remove(join(tfrecord_dir, self.name+".tfrecords"))
            return None

        sample_tiles = []  # type: List
        generator_iterator = generator()
        locations = []
        grid_locations = []
        ws_fractions = []
        gs_fractions = []
        num_wrote_to_tfr = 0
        slide_bytes = bytes(self.name, 'utf-8')

        for index, tile_dict in enumerate(generator_iterator):
            x, y = location = tile_dict['loc']
            locations += [location]
            grid_locations += [tile_dict['grid']]
            if 'ws_fraction' in tile_dict:
                ws_fractions += [tile_dict['ws_fraction']]
            if 'gs_fraction' in tile_dict:
                gs_fractions += [tile_dict['gs_fraction']]

            if dry_run:
                continue

            img_str = tile_dict['image']
            if len(sample_tiles) < 10:
                sample_tiles += [img_str]
            elif (not tiles_dir and not tfrecord_dir) and not dry_run:
                break
            if tiles_dir:
                img_f = join(
                    tiles_dir,
                    f'{self.shortname}-{x}-{y}.{img_format}'
                )
                with open(img_f, 'wb') as outfile:
                    outfile.write(img_str)
                if 'yolo' in tile_dict and len(tile_dict['yolo']):
                    yolo_f = join(tiles_dir, f'{self.shortname}-{x}-{y}.txt')
                    with open(yolo_f, 'w') as outfile:
                        for ann in tile_dict['yolo']:
                            yolo_str_fmt = "0 {:.3f} {:.3f} {:.3f} {:.3f}\n"
                            outfile.write(yolo_str_fmt.format(
                                ann[0],
                                ann[1],
                                ann[2],
                                ann[3]
                            ))
            if tfrecord_dir:
                record = sf.io.serialized_record(slide_bytes, img_str, x, y)
                writer.write(record)
                num_wrote_to_tfr += 1
        if tfrecord_dir and not dry_run:
            writer.close()
            if not num_wrote_to_tfr:
                os.remove(join(tfrecord_dir, self.name+".tfrecords"))
                log.info(f'No tiles extracted for [green]{self.name}')
        if self.pb is None:
            generator_iterator.close()

        if (tfrecord_dir or tiles_dir) and not dry_run:
            try:
                os.remove(unfinished_marker)
            except OSError:
                log.error(f"Unable to mark slide {self.name} as complete")

        # Generate extraction report
        if report:
            log.debug("Generating slide report")
            loc_np = np.array(locations, dtype=np.int64)
            grid_np = np.array(grid_locations, dtype=np.int64)
            df_dict = {
                'loc_x': [] if not len(loc_np) else pd.Series(loc_np[:, 0], dtype=int),
                'loc_y': [] if not len(loc_np) else pd.Series(loc_np[:, 1], dtype=int),
                'grid_x': [] if not len(grid_np) else pd.Series(grid_np[:, 0], dtype=int),
                'grid_y': [] if not len(grid_np) else pd.Series(grid_np[:, 1], dtype=int)
            }
            if ws_fractions:
                df_dict.update({'ws_fraction': pd.Series(ws_fractions, dtype=float)})
            if gs_fractions:
                df_dict.update({'gs_fraction': pd.Series(gs_fractions, dtype=float)})
            report_data = dict(
                blur_burden=self.blur_burden,
                num_tiles=len(locations),
                qc_mask=self.qc_mask,
                locations=pd.DataFrame(df_dict),
                num_rois=(0 if self.roi_method == 'ignore' else len(self.rois)),
                tile_px=self.tile_px,
                tile_um=self.tile_um,
            )
            slide_report = SlideReport(
                sample_tiles,
                self.slide.path,
                data=report_data,
                thumb_coords=locations,
                tile_px=self.tile_px,
                tile_um=self.tile_um,
            )
            return slide_report
        else:
            log.debug("Skipping slide report")
            return None

    def extract_cells(
        self,
        tfrecord_dir: Optional[str] = None,
        tiles_dir: Optional[str] = None,
        img_format: str = 'jpg',
        report: bool = True,
        apply_masks: bool = True,
        **kwargs
    ) -> Optional[SlideReport]:
        """Extract tiles from cell segmentation centroids.

        Args:
            tfrecord_dir (str): If provided, saves tiles into a TFRecord file
                (named according to slide name) here.
            tiles_dir (str): If provided, saves loose images into a
                subdirectory (per slide name) here.
            img_format (str): 'png' or 'jpg'. Format of images for internal
                storage in tfrecords. PNG (lossless) format recommended for
                fidelity, JPG (lossy) for efficiency. Defaults to 'jpg'.
            report (bool): Generate and return PDF report of tile extraction.
            apply_masks (bool): Apply cell segmentation masks to the extracted
                tiles. Defaults to True.

        Keyword Args:
            **kwargs: All keyword arguments are passed to :meth:`WSI.extract_tiles()`.
        """
        if self.segmentation is None:
            raise ValueError(
                "Cannot build generator from segmentation centroids; "
                "segmentation not yet applied. Use WSI.apply_segmentation()."
            )
        return self.extract_tiles(
            tfrecord_dir,
            tiles_dir,
            img_format,
            report,
            apply_masks=apply_masks,
            from_centroids=True,
            **kwargs
        )

    def get_tile_roi(
        self,
        coord: Optional[Tuple[int, int]] = None,
        grid: Optional[Tuple[int, int]] = None,
    ) -> Tuple[Optional[int], Optional[str]]:
        """Find the ROI that contains a given tile.

        Args:
            coord (Tuple[int, int], optional): Base-level coordinates of the
                tile. Cannot supply both ``coord`` and ``grid``. Defaults to None.
            grid (Tuple[int, int], optional): Grid index of the tile.
                Cannot supply both ``coord`` and ``grid``. Defaults to None.

        Returns:
            Tuple[int, ROI]: ROI index (index of WSI.rois) and
                the :class:`slideflow.slide.ROI` that contains the tile.
                If no ROI contains the tile, returns (None, None).

        """
        if coord is not None and grid is not None:
            raise ValueError("Cannot specify both coord and grid")
        if coord is not None:
            grid = self.coord_to_grid(*coord)
        elif grid is None:
            raise ValueError("Must specify either coord or grid")
        if self.roi_grid is None:
            return None, None
        grid_x, grid_y = grid
        roi_idx = self.roi_grid[grid_x, grid_y] - 1
        if roi_idx == -1:
            return None, None
        else:
            return roi_idx, self.rois[roi_idx]

    def grid_to_coord(
        self,
        grid_x: int,
        grid_y: int,
        *,
        anchor: str = 'center'
    ) -> Tuple[int, int]:
        """Find the base-level coordinates of a tile by its grid index.

        Args:
            grid_x (int): x-index of the tile in the grid.
            grid_y (int): y-index of the tile in the grid.

        Keyword args:
            anchor (str): Anchor point for the coordinates. Either 'topleft'
                or 'center'. Defaults to 'center'.

        Returns:
            Tuple[int, int]: Base-level coordinates of the tile.

        Raises:
            ValueError: If anchor is not 'topleft' or 'center'.
            IndexError: If tile is not found at the given coordinates.

        """
        if anchor not in ('topleft', 'center'):
            raise ValueError("anchor must be 'topleft' or 'center'")
        grid_idx, = np.where((
            (self.coord[:, 2] == grid_x)
            & (self.coord[:, 3] == grid_y)
        ))
        if not len(grid_idx):
            raise IndexError(f"Tile at grid=({grid_x}, {grid_y}) not found")
        assert len(grid_idx) == 1
        x, y, grid_x, grid_y = self.coord[grid_idx[0]]
        if anchor == 'center':
            x += int(self.full_extract_px/2)
            y += int(self.full_extract_px/2)
        return x, y

    def get_tile_mask(self, index, sparse_mask) -> np.ndarray:
        """Get a mask for a tile, given a sparse mask.

        Examples
            Get a mask for a tile, given a sparse mask.

                >>> from slideflow.cellseg import seg_utils, Segmentation
                >>> segmentation = Segmentation(...)
                >>> wsi = sf.WSI(...)
                >>> wsi.apply_segmentation(segmentation)
                >>> sparse_mask = seg_utils.sparse_mask(segmentation.masks)
                >>> wsi.get_tile_mask(0, sparse_mask)
                <numpy.ndarray>

        Args:
            index (int): Index of tile.
            sparse_mask (scipy.sparse.csr_matrix): Sparse mask.

        Returns:
            numpy.ndarray: Mask for tile.

        """
        # Get the corresponding segmentation mask, reading from the sparse matrix
        seg = self.segmentation
        if seg is None:
            raise ValueError("Segmentation not yet applied to slide.")
        mask_idx = self.seg_coord[index][2] + 1  # sparse mask index starts at 1
        mask_y, mask_x = np.unravel_index(sparse_mask[mask_idx].data, seg.masks.shape)

        # This is the top-left coordinate, in WSI base dimension,
        # of the tile extraction window.
        wsi_tile_top_left = self.seg_coord[index][0:2]

        # Determine the mask array offset (top-left), in mask coordinate space.
        wsi_mask_x_offset = np.round(seg.wsi_offset[0] / seg.wsi_ratio).astype(np.int32)
        wsi_mask_y_offset = np.round(seg.wsi_offset[1] / seg.wsi_ratio).astype(np.int32)

        # Offset the mask to reflect WSI space (but still in mask coordinates).
        wsi_mask_x = mask_x + wsi_mask_x_offset
        wsi_mask_y = mask_y + wsi_mask_y_offset

        # Determine the tile window offset (top-left), in mask coordinate space.
        tile_offset_x_in_mask_space = np.round(wsi_tile_top_left[0] / seg.wsi_ratio).astype(np.int32)
        tile_offset_y_in_mask_space = np.round(wsi_tile_top_left[1] / seg.wsi_ratio).astype(np.int32)

        # Adjust the mask coordinate space, using the tile window offset as origin.
        tile_mask_x = (wsi_mask_x - tile_offset_x_in_mask_space)
        tile_mask_y = (wsi_mask_y - tile_offset_y_in_mask_space)

        # Calculate the size of the tile window, in mask coordinate space.
        mask_tile_size = int(self.full_extract_px / seg.wsi_ratio)

        # Clip the mask to the tile window view.
        tile_mask_x = tile_mask_x.clip(0, mask_tile_size-1)
        tile_mask_y = tile_mask_y.clip(0, mask_tile_size-1)

        # Convert mask coordinates (in sparse format) to 2D array.
        unsized = np.zeros((mask_tile_size, mask_tile_size), dtype=np.int32)
        unsized[tile_mask_y, tile_mask_x] = 1

        # Resize mask from mask coordinates to tile extraction WSI coordinates.
        return unsized

    def has_rois(self) -> bool:
        """Checks if the slide has loaded ROIs and they are not being ignored."""
        return (self.roi_method != 'ignore'
                and len(self.rois))

    def get_next_roi_name(self) -> str:
        """Get the next available name for an ROI."""
        existing = [
            int(r.name[4:]) for r in self.rois
            if r.name.startswith('ROI_') and r.name[4:].isnumeric()
        ]
        hole_ids = [
            int(hole.name[4:]) for r in self.rois
            for hole in r.holes.values()
            if hole.name.startswith('ROI_') and hole.name[4:].isnumeric()
        ]
        existing += hole_ids
        roi_id = max(existing) + 1 if existing else 0
        name = f'ROI_{roi_id}'
        return name

    def load_roi_array(
        self,
        array: np.ndarray,
        *,
        process: bool = True,
        label: Optional[str] = None,
        name: Optional[str] = None,
        allow_errors: bool = False,
        simplify_tolerance: Optional[float] = None
    ) -> int:
        """Load an ROI from a numpy array.

        Args:
            array (np.ndarray): Array of shape (n_points, 2) containing
                the coordinates of the ROI shape, in base (level=0) dimension.

        Keyword Args:
            process (bool): Process ROIs after loading. Defaults to True.

        """
        name = name or self.get_next_roi_name()
        try:
            roi = ROI(name, array, label=label)
        except errors.InvalidROIError as e:
            if allow_errors:
                log.warn("Unable to load ROI: {}".format(e))
                return
            else:
                raise
        if simplify_tolerance is not None:
            roi.simplify(simplify_tolerance)
        self.rois.append(roi)
        if self.roi_method == 'auto':
            self.roi_method = 'inside'
        if process:
            self.process_rois()
        for i, _roi in enumerate(self.rois):
            if _roi == roi:
                return i
            for hole in _roi.holes.values():
                if hole == roi:
                    return i
        return None

    def load_csv_roi(
        self,
        path: str,
        *,
        process: bool = True,
        scale: int = 1,
        skip_invalid: bool = True,
        simplify_tolerance: Optional[float] = None
    ) -> int:
        """Load ROIs from a CSV file.

        CSV file must contain headers 'ROI_name', 'X_base', and 'Y_base'.

        Any previously loaded ROIs are cleared prior to loading.

        Args:
            path (str): Path to CSV file.

        Keyword Args:
            process (bool): Process ROIs after loading. Defaults to True.
            scale (int): Scale factor to apply to ROI coordinates. Defaults to 1.

        """
        # Clear any previously loaded ROIs.
        self.rois = []

        roi_dict = {}
        with open(path, "r") as csvfile:
            reader = csv.reader(csvfile, delimiter=',')
            try:
                headers = next(reader, None)
                if headers is None:
                    raise Exception
                headers = [h.lower() for h in headers]
                index_name = headers.index("roi_name")
                index_x = headers.index("x_base")
                index_y = headers.index("y_base")
            except Exception:
                raise errors.ROIError(
                    f'Unable to read CSV ROI [green]{path}[/]. Please ensure '
                    'headers contain "ROI_name", "X_base and "Y_base".'
                )
            index_label = None if not "label" in headers else headers.index("label")
            for row in reader:
                roi_name = row[index_name]
                x_coord = int(float(row[index_x]) * scale)
                y_coord = int(float(row[index_y]) * scale)
                label = None if index_label is None else row[index_label]

                if roi_name not in roi_dict:
                    roi_dict[roi_name] = {
                        'coords': [],
                        'label': label
                    }
                roi_dict[roi_name]['coords'].append((x_coord, y_coord))

            for roi_name in roi_dict:
                try:
                    roi = ROI(
                        roi_name,
                        np.array(roi_dict[roi_name]['coords']),
                        label=roi_dict[roi_name]['label']
                    )
                except errors.InvalidROIError as e:
                    if skip_invalid:
                        log.warn("Skipping invalid ROI ({}): {}".format(roi_name, e))
                        continue
                    else:
                        raise
                else:
                    if simplify_tolerance is not None:
                        roi.simplify(simplify_tolerance)
                    self.rois.append(roi)
        if process:
            self.process_rois()
        log.debug(f"Loaded ROIs from {path}")
        return len(self.rois)

    def load_json_roi(
        self,
        path: str,
        *,
        scale: int = 1,
        process: bool = True,
        skip_invalid: bool = True
    ) -> int:
        """Load ROIs from a JSON file.

        JSON file must contain a 'shapes' key, with a list of dictionaries
        containing a 'points' key, whose value is a list of (x, y) coordinates.

        Args:
            path (str): Path to JSON file.
            scale (int): Scale factor to apply to ROI coordinates. Defaults to 1.
            process (bool): Process ROIs after loading. Defaults to True.

        """
        # Clear any previously loaded ROIs.
        self.rois = []

        with open(path, "r") as json_file:
            json_data = json.load(json_file)['shapes']
        for shape in json_data:
            area_reduced = np.multiply(shape['points'], scale).astype(np.int64)
            roi_name = self.get_next_roi_name()
            try:
                self.rois.append(ROI(roi_name, area_reduced))
            except errors.InvalidROIError as e:
                if skip_invalid:
                    log.warn("Skipping invalid ROI ({}): {}".format(roi_name, e))

        if process:
            self.process_rois()
        if self.roi_method == 'auto':
            self.roi_method = 'inside'
        return len(self.rois)

    def masked_thumb(self, background: str = 'white', **kwargs) -> np.ndarray:
        """Return a masked thumbnail of a slide, using QC and/or ROI masks.

        Args:
            background (str, optional): Background color. Defaults to 'white'.

        Keyword args:
            **kwargs: Keyword arguments passed to :meth:`WSI.thumb()`.

        Returns:
            np.ndarray: Masked thumbnail image.

        """
        if background not in ('white', 'black'):
            raise ValueError(
                f"Unexpected background option: '{background}'. Expected "
                "'black' or 'white'."
            )
        qc_mask = self.qc_mask
        roi_mask = self.roi_mask
        image = np.asarray(self.thumb(**kwargs))
        if qc_mask is None and roi_mask is None:
            # Apply Otsu's threshold to background area
            # to prevent whitespace from interfering with normalization
            from slideflow.slide.qc import Otsu, GaussianV2
            sf.log.debug(
                "Applying Otsu's thresholding & Gaussian blur filter "
                "to stain norm context"
            )
            _blur_mask = GaussianV2()(image)
            qc_mask = Otsu()(image, mask=_blur_mask)
        # Mask by ROI and QC, if applied.
        # Use white as background for masked areas.
        if qc_mask is not None:
            qc_img = img_as_ubyte(qc_mask)
            mask = ~cv2.resize(qc_img, (image.shape[1], image.shape[0]))
        if roi_mask is not None:
            roi_img = img_as_ubyte(roi_mask)
            roi_mask = cv2.resize(roi_img, (image.shape[1], image.shape[0]))
            if qc_mask is not None:
                mask = mask & roi_mask
            else:
                mask = roi_mask
        if background == 'white':
            white_bg = np.full(image.shape, 255, dtype=np.uint8)
            white_mask = cv2.bitwise_or(white_bg, white_bg, mask=~mask)
            return cv2.bitwise_or(image, white_mask)
        else:
            return cv2.bitwise_or(image, image, mask=mask)

    def mpp_to_dim(self, mpp: float) -> Tuple[int, int]:
        width = int((self.mpp * self.dimensions[0]) / mpp)
        height = int((self.mpp * self.dimensions[1]) / mpp)
        return (width, height)

    def predict(
        self,
        model: str,
        **kwargs
    ) -> Tuple[np.ndarray, Optional[np.ndarray]]:
        """Generate a whole-slide prediction from a saved model.

        Args:
            model (str): Path to saved model trained in Slideflow.

        Keyword args:
            batch_size (int, optional): Batch size for calculating predictions.
                Defaults to 32.
            num_threads (int, optional): Number of tile worker threads. Cannot
                supply both ``num_threads`` (uses thread pool) and
                ``num_processes`` (uses multiprocessing pool). Defaults to
                CPU core count.
            num_processes (int, optional): Number of child processes to spawn
                for multiprocessing pool. Defaults to None (does not use
                multiprocessing).
            img_format (str, optional): Image format (png, jpg) to use when
                extracting tiles from slide. Must match the image format
                the model was trained on. If 'auto', will use the format
                logged in the model params.json. Defaults to 'auto'.
            device (torch.device, optional): PyTorch device. Defaults to
                initializing a new CUDA device.
            generator_kwargs (dict, optional): Keyword arguments passed to
                the :meth:`slideflow.WSI.build_generator()`.

        Returns:
            np.ndarray: Predictions for each outcome, with shape = (num_classes, )

            np.ndarray, optional: Uncertainty for each outcome, if the model was
            trained with uncertainty, with shape = (num_classes,)

        """
        from slideflow import Heatmap

        config = sf.util.get_model_config(model)
        _compatible = sf.util.is_tile_size_compatible(
            config['tile_px'],
            config['tile_um'],
            self.tile_px,
            self.tile_um
        )
        if not _compatible:
            raise errors.IncompatibleTileSizeError(
                "Slide tile size (tile_px={}, tile_um={}) does not match the "
                "model (tile_px={}, tile_um={}).".format(
                    self.tile_px, self.tile_um,
                    config['tile_px'], config['tile_um']
            ))
        log.info("Calculating whole-slide prediction...")
        heatmap = Heatmap(self, model, generate=True, **kwargs)
        preds = heatmap.predictions.reshape(-1, heatmap.predictions.shape[-1])
        preds = np.nanmean(preds, axis=0).filled()
        if heatmap.uncertainty is not None:
            unc = heatmap.uncertainty.reshape(-1, heatmap.uncertainty.shape[-1])
            unc = np.nanmean(unc, axis=0).filled()
            return preds, unc
        else:
            return preds

    def preview(
        self,
        rois: bool = True,
        thumb_kwargs: Optional[Dict] = None,
        low_res: bool = True,
        **kwargs
    ) -> Optional[Image.Image]:
        """Performs a dry run of tile extraction without saving any images,
        returning a PIL image of the slide thumbnail annotated with a grid of
        tiles that were marked for extraction.

        Args:
            rois (bool, optional): Draw ROI annotation(s) onto the image.
                Defaults to True.

        Keyword Args:
            whitespace_fraction (float, optional): Range 0-1. Defaults to 1.
                Discard tiles with this fraction of whitespace. If 1, will not
                perform whitespace filtering.
            whitespace_threshold (int, optional): Range 0-255. Defaults to 230.
                Threshold above which a pixel (RGB average) is considered
                whitespace.
            grayspace_fraction (float, optional): Range 0-1. Defaults to 0.6.
                Discard tiles with this fraction of grayspace. If 1, will not
                perform grayspace filtering.
            grayspace_threshold (float, optional): Range 0-1. Defaults to 0.05.
                Pixels in HSV format with saturation below this threshold are
                considered grayspace.
            full_core (bool, optional): Extract an entire detected core, rather
                than subdividing into image tiles. Defaults to False.
            num_threads (int): Number of threads to allocate to workers.
            yolo (bool, optional): Export yolo-formatted tile-level ROI
                annotations (.txt) in the tile directory. Requires that
                tiles_dir is set. Defaults to False.
            thumb_kwargs (Optional[Dict], optional): Keyword arguments to pass
                to the thumb method. Defaults to None.
            low_res (bool, optional): Use low resolution thumbnail. Defaults to
                True.
        """
        if 'show_progress' not in kwargs:
            kwargs['show_progress'] = (self.pb is None)
        generator = self.build_generator(
            dry_run=True,
            deterministic=False,
            **kwargs
        )
        if thumb_kwargs is None:
            thumb_kwargs = dict(low_res=low_res)
        if generator is None:
            return self.thumb(rois=rois,  **thumb_kwargs)
        locations = []
        for tile_dict in generator():
            locations += [tile_dict['loc']]
        log.debug(f"Previewing with {len(locations)} extracted tile locations.")
        return self.thumb(
            coords=locations, rois=rois, **thumb_kwargs
        )

    def process_rois(self):
        """Process loaded ROIs and apply to the slide grid.

        Returns:
            int: Number of ROIs processed.

        """
        # Load annotations as shapely.geometry objects.
        if self.roi_method != 'ignore':
            self._find_and_process_holes()

        # Regenerate the grid to reflect the newly-loaded ROIs.
        self._build_coord()

        # Re-apply any existing QC mask, now that the coordinates have changed.
        if self.has_non_roi_qc():
            self.apply_qc_mask()

        return len(self.rois)

    def _find_and_process_holes(self):
        """Find and process holes in ROIs."""

        from shapely.strtree import STRtree

        self.rois.sort(key=lambda x: x.poly.area, reverse=True)

        outer_rois = []

        labels = list(set([roi.label for roi in self.rois]))

        for label in labels:

            rois = [roi for roi in self.rois if roi.label == label]
            polygons = [roi.poly for roi in self.rois if roi.label == label]
            strtree = STRtree(polygons)

            for roi, poly in zip(rois, polygons):

                if version.parse(shapely_version) < version.parse('2.0.0'):
                    possible_containers = strtree.query(poly)
                else:
                    possible_containers_idx = strtree.query(poly)
                    possible_containers = [polygons[i] for i in possible_containers_idx]

                # Filter out the polygon itself
                possible_containers = [p for p in possible_containers if p != poly]

                # Check if the polygon is contained by another
                contained_by = [p for p in possible_containers if p.contains(poly)]

                if not contained_by:
                    # Polygon is an outer polygon
                    outer_rois.append(roi)
                else:
                    # Polygon is a hole, find its immediate outer polygon
                    # Sort by area (smallest to largest) to find the closets outer.
                    contained_by.sort(key=lambda x: x.area)
                    immediate_outer_poly = contained_by[0]
                    immediate_outer_roi = rois[polygons.index(immediate_outer_poly)]

                    # If the immediate outer is not already listed as an outer,
                    # then the immediate outer is a hole and this polygon is a nested
                    # polygon within a hole and should be treated as an outer.
                    if immediate_outer_roi not in outer_rois:
                        outer_rois.append(roi)
                    else:
                        # Otherwise, add the polygon to the immediate outer as a hole
                        immediate_outer_roi.add_hole(roi)

        # Restrict the ROIs to only outer polygons, which have now had the holes applied.
        self.rois = outer_rois

    def qc(
        self,
        method: Union[str, Callable, List[Callable]],
        *,
        blur_radius: int = 3,
        blur_threshold: float = 0.02,
        filter_threshold: float = 0.6,
        blur_mpp: Optional[float] = None,
        pool: Optional["mp.pool.Pool"] = None
    ) -> Optional[Image.Image]:
        """Applies quality control to a slide, performing filtering based on
        a whole-slide image thumbnail.

        'blur' method filters out blurry or out-of-focus slide sections.
        'otsu' method filters out background based on automatic saturation
        thresholding in the HSV colorspace.
        'both' applies both methods of filtering.

        Args:
            method (str, Callable, list(Callable)): Quality control method(s).
                If a string, may be 'blur', 'otsu', or 'both'.
                If a callable (or list of callables), each must accept a sf.WSI
                object and return a np.ndarray (dtype=np.bool).
            blur_radius (int, optional): Blur radius. Only used if method is
                'blur' or 'both'.
            blur_threshold (float, optional): Blur threshold. Only used if
                method is 'blur' or 'both.'
            filter_threshold (float): Percent of a tile detected as
                background that will trigger a tile to be discarded.
                Defaults to 0.6.
            blur_mpp (float, optional): Size of WSI thumbnail on which to
                perform blur QC, in microns-per-pixel. Defaults to 4 times the
                tile extraction MPP (e.g. for a tile_px/tile_um combination
                at 10X effective magnification, where tile_px=tile_um, the
                default blur_mpp would be 4, or effective magnification 2.5x).
                Only used if method is 'blur' or 'both'.

        Returns:
            Image: Image of applied QC mask.
        """

        # Prepare known QC methods - 'blur', 'otsu', and 'both'.
        if not isinstance(method, list):
            method = [method]           # type: ignore
        if 'both' in method:
            idx = method.index('both')  # type: ignore
            method.remove('both')       # type: ignore
            method.insert(idx, 'otsu')  # type: ignore
            # Blur should be performed before Otsu's thresholding
            method.insert(idx, 'blur')  # type: ignore
        if 'blur' in method:
            idx = method.index('blur')  # type: ignore
            method.remove('blur')       # type: ignore
            method.insert(idx, sf.slide.qc.GaussianV2(mpp=blur_mpp,
                                                      sigma=blur_radius,
                                                      threshold=blur_threshold))
        if 'otsu' in method:
            idx = method.index('otsu')  # type: ignore
            method.remove('otsu')       # type: ignore
            method.insert(idx, sf.slide.qc.Otsu())

        starttime = time.time()
        img = None
        log.debug(f"Applying QC: {method}")
        for qc in method:
            if isinstance(method, str):
                raise errors.QCError(f"Unknown QC method {method}")
            if pool is not None:
                try:
                    qc.pool = pool  # type: ignore
                except Exception as e:
                    log.debug(f"Unable to set pool for QC method {qc}")
            mask = qc(self)
            if mask is not None:
                img = self.apply_qc_mask(mask, filter_threshold=filter_threshold)
        dur = f'(time: {time.time()-starttime:.2f}s)'
        log.debug(f'QC ({method}) complete for slide {self.shortname} {dur}')
        return img

    def remove_qc(self) -> None:
        self.qc_masks = [m for m in self.qc_masks if m.is_roi]
        self._build_coord()
        log.debug(f'QC removed from slide {self.shortname}')

    def remove_roi_qc(self) -> None:
        """Remove ROI-based QC from the slide."""
        self.qc_masks = [m for m in self.qc_masks if not m.is_roi]
        if len(self.qc_masks):
            self.apply_qc_mask()

    def remove_roi(
        self,
        idx: Union[int, List[int]],
        *,
        process: bool = True
    ) -> None:
        """Remove an ROI from the slide.

        Args:
            idx (int, list(int)): Index or indices of the ROI(s) to remove.

        Keyword Args:
            process (bool): Process ROIs after removing. Defaults to True.

        """
        if isinstance(idx, int):
            idx = [idx]
        for i in sorted(idx, reverse=True):
            del self.rois[i]
        if process:
            self.process_rois()

    def set_artifacts(
        self,
        artifact_labels: Optional[Union[str, List[str]]]
    ) -> None:
        """Set artifact labels for all ROIs in the slide.

        Rebuilds the ROI grid after setting the artifacts.

        Args:
            artifact_labels (str, list(str)): Artifact label(s) to set.
                ROIs with these labels will be marked as artifacts.

        """
        if isinstance(artifact_labels, str):
            artifact_labels = [artifact_labels]
        if artifact_labels is not None and not all(isinstance(label, str) for label in artifact_labels):
            raise TypeError("Artifact labels must be strings.")
        self.artifact_labels = artifact_labels if artifact_labels is not None else []
        self.process_rois()

    def show_alignment(
        self,
        slide: "WSI",
        mpp: float = 4
    ) -> Image.Image:
        """Show aligned thumbnail of another slide."""
        if not isinstance(slide, WSI):
            raise TypeError("Can only align to another slide.")

        # Calculate thumbnails for alignment.
        our_thumb = np.array(self.thumb(mpp=mpp))
        their_thumb = np.array(slide.thumb(mpp=mpp))

        # Return an image of a thumbnail of the given slide,
        # aligned to this slide.
        return Image.fromarray(align_image(their_thumb, our_thumb))

    def square_thumb(
        self,
        width: int = 512,
        use_associated_image: bool = True,
        **kwargs
    ) -> Image.Image:
        '''Returns a square thumbnail of the slide, with black bar borders.

        Args:
            width (int): Width/height of thumbnail in pixels.

        Returns:
            PIL image
        '''
        thumb = self.thumb(
            width=width,
            use_associated_image=use_associated_image,
            **kwargs)
        height = int(width / (thumb.width / thumb.height))
        thumb = thumb.resize((width, height))
        square_thumb = Image.new("RGB", (width, width))
        square_thumb.paste(thumb, (0, int((width-height)/2)))
        return square_thumb

    def thumb(
        self,
        mpp: Optional[float] = None,
        width: Optional[int] = None,
        *,
        coords: Optional[List[int]] = None,
        rect_linewidth: int = 2,
        rect_color: str = 'black',
        rois: bool = False,
        linewidth: int = 2,
        color: str = 'black',
        use_associated_image: bool = False,
        low_res: bool = False,
    ) -> Image.Image:
        """Generate a PIL Image of the slide thumbnail, with ROI overlay.

        Args:
            mpp (float, optional): Microns-per-pixel, used to determine
                thumbnail size.
            width (int, optional): Goal thumbnail width (alternative to mpp).
            coords (list(int), optional): List of tile extraction coordinates
                to show as rectangles on the thumbnail, in [(x_center,
                y_center), ...] format. Defaults to None.
            rois (bool, optional): Draw ROIs onto thumbnail. Defaults to False.
            linewidth (int, optional): Width of ROI line. Defaults to 2.
            color (str, optional): Color of ROI. Defaults to black.
            use_associated_image (bool): Use the associated thumbnail image
                in the slide, rather than reading from a pyramid layer.
            low_res (bool): Create thumbnail from the lowest-mangnification
                pyramid layer. Defaults to False.

        Returns:
            PIL image

        """
        if rois and len(self.rois):
            if (mpp is not None and width is not None):
                raise ValueError(
                    "Either mpp or width must be given, but not both"
                    f" (got mpp={mpp}, width={width})"
                )
            # If no values provided, create thumbnail of width 1024
            if mpp is None and width is None:
                width = 1024
            if mpp is not None:
                roi_scale = (self.dimensions[0]
                             / (int((self.mpp * self.dimensions[0]) / mpp)))
            else:
                roi_scale = self.dimensions[0] / width  # type: ignore

        # If no values provided, create thumbnail of width 1024
        if mpp is None and width is None:
            width = 1024
        if (mpp is not None and width is not None):
            raise ValueError(
                "Either mpp or width must be given, but not both"
                f" (got mpp={mpp}, width={width})"
            )

        # Calculate goal width/height according to specified microns-per-pixel
        if mpp:
            width = int((self.mpp * self.dimensions[0]) / mpp)
        # Otherwise, calculate approximate mpp based on provided width
        # (to generate proportional height)
        else:
            assert width is not None
            mpp = (self.mpp * self.dimensions[0]) / width
        # Calculate appropriate height
        height = int((self.mpp * self.dimensions[1]) / mpp)

        if use_associated_image:
            log.debug("Requesting thumbnail using associated image")
            thumb_kw = dict(associated='thumbnail')
        elif low_res:
            log.debug("Requesting thumbnail at level={}, width={}".format(
                self.slide.level_count-1, width
            ))
            thumb_kw = dict(level=self.slide.level_count-1, width=width)
        else:
            ds = self.dimensions[0] / width
            level = self.slide.best_level_for_downsample(ds)
            log.debug("Requesting thumbnail at level={}, width={}".format(
                level, width
            ))
            thumb_kw = dict(level=level, width=width)

        np_thumb = self.slide.thumbnail(**thumb_kw)
        thumb = Image.fromarray(np_thumb).resize((width, height))

        if coords:
            draw = ImageDraw.Draw(thumb)
            ratio = width / self.dimensions[0]
            wh = (self.full_extract_px * ratio) / 2
            for (x, y) in coords:  # type: ignore
                x, y = x * ratio, y * ratio  # type: ignore
                coords = (x-wh, y-wh, x+wh, y+wh)  # type: ignore
                draw.rectangle(coords, outline=rect_color, width=rect_linewidth)

        if rois and len(self.rois):
            draw = ImageDraw.Draw(thumb)
            roi_polys = [r.scaled_poly(roi_scale) for r in self.rois]
            for roi in self.rois:
                for hole in roi.holes.values():
                    roi_polys.append(hole.scaled_poly(roi_scale))
            for i, poly in enumerate(roi_polys):
                if poly.geom_type == 'Polygon':
                    x, y = poly.exterior.coords.xy
                    zipped = list(zip(x.tolist(), y.tolist()))
                    draw.line(zipped, joint='curve', fill=color, width=linewidth)
                elif poly.geom_type in ('MultiPolygon', 'GeometryCollection'):
                    for part in poly.geoms:
                        if part.is_empty or part.geom_type != 'Polygon':
                            continue
                        x, y = part.exterior.coords.xy
                        zipped = list(zip(x.tolist(), y.tolist()))
                        draw.line(zipped, joint='curve', fill=color, width=linewidth)
                else:
                    sf.log.error(f"Unable to plot ROI {i}, unknown geometry type: {poly.geom_type}")
            return thumb
        else:
            return thumb

    def tensorflow(
        self,
        img_format: str = 'numpy',
        incl_slidenames: bool = False,
        incl_loc: Optional[str] = None,
        shuffle: bool = True,
        **kwargs
    ) -> Any:
        """Create a Tensorflow Dataset which extractes tiles from this slide.

        Args:
            img_format (str, optional): Image format for returned image tiles.
                Options include 'png', 'jpg', and 'numpy'. Defaults to 'numpy'.
            incl_slidenames (bool, optional): Yield slide names for each
                image tile. Defaults to False.
            incl_loc (Optional[str], optional): Yield image tile location
                with each image tile. Options include True, 'coord', or 'grid'.
                If True or 'coord', will return X/Y coordinates of the tile center
                in the slide's highest magnification layer. If 'grid', returns
                the grid indices for the tile. Defaults to None.
            shuffle (bool, optional): Shuffle image tiles. Defaults to True.

        Returns:
            tf.data.Dataset

        Yields:
            Iterator[Any]: Items yielded by the Dataset are in dictionary
            format, with the keys:

            'image_raw':    Contains the image (jpg, png, or numpy)
            'slide':        Slide name (if ``incl_slidenames=True``)
            'loc_x'         Image tile center x location (if ``incl_loc`` provided)
            'loc_y'         Image tile center y location (if ``incl_loc`` provided)
        """

        import tensorflow as tf

        def tile_generator():
            for image_dict in self.build_generator(
                shuffle=shuffle,
                show_progress=False,
                img_format=img_format,
                **kwargs
            )():
                if not (incl_slidenames or incl_loc):
                    yield image_dict['image']
                else:
                    to_return = {
                        'image_raw': image_dict['image']
                    }
                    if incl_slidenames:
                        to_return['slide'] = self.name
                    if incl_loc == 'coord' or incl_loc == True:
                        to_return['loc_x'] = image_dict['loc'][0]
                        to_return['loc_y'] = image_dict['loc'][1]
                    if incl_loc == 'grid':
                        to_return['loc_x'] = image_dict['grid'][0]
                        to_return['loc_y'] = image_dict['grid'][1]
                    yield to_return

        # Generate dataset from the generator
        with tf.name_scope('dataset_input'):
            # Signatures for imaging data
            if img_format == 'numpy':
                image_sig = tf.TensorSpec(
                    shape=(self.tile_px, self.tile_px, 3),
                    dtype=tf.uint8
                )
            else:
                image_sig = tf.TensorSpec(shape=(), dtype=tf.string)

            # Rest of the signatures
            if incl_slidenames or incl_loc:
                sig = {'image_raw': image_sig}
                if incl_slidenames:
                    sig['slide'] = tf.TensorSpec(shape=(), dtype=tf.string)
                if incl_loc:
                    sig['loc_x'] = tf.TensorSpec(shape=(), dtype=tf.int32)
                    sig['loc_y'] = tf.TensorSpec(shape=(), dtype=tf.int32)
            else:
                sig = image_sig

            # Assemble dataset
            dataset = tf.data.Dataset.from_generator(
                tile_generator,
                output_signature=sig
            )

        return dataset

    def torch(
        self,
        img_format: str = 'numpy',
        incl_slidenames: bool = False,
        incl_loc: Optional[str] = None,
        shuffle: bool = True,
        infinite: bool = False,
        to_tensor: bool = True,
        **kwargs
    ) -> Any:
        """Create a PyTorch iterator which extractes tiles from this slide.

        Args:
            img_format (str, optional): Image format for returned image tiles.
                Options include 'png', 'jpg', and 'numpy'. Defaults to 'numpy'.
            incl_slidenames (bool, optional): Yield slide names for each
                image tile. Defaults to False.
            incl_loc (Optional[str], optional): Yield image tile location
                with each image tile. Options include True, 'coord', or 'grid'.
                If True or 'coord', will return X/Y coordinates of the tile center
                in the slide's highest magnification layer. If 'grid', returns
                the grid indices for the tile. Defaults to None.
            shuffle (bool, optional): Shuffle image tiles. Defaults to True.

        Returns:
            An iterator which yields image tiles as Torch tensors.

        Yields:
            Iterator[Any]: Items yielded by the Dataset are in dictionary
            format, with the keys:

            'image_raw':    Contains the image as a Tensor (jpg, png, or numpy)
            'slide':        Slide name (if ``incl_slidenames=True``)
            'loc_x'         Image tile center x location (if ``incl_loc`` provided)
            'loc_y'         Image tile center y location (if ``incl_loc`` provided)
        """
        import torch

        def tile_generator():
            while True:
                for image_dict in self.build_generator(
                    shuffle=shuffle,
                    show_progress=False,
                    img_format=img_format,
                    **kwargs
                )():
                    if not (incl_slidenames or incl_loc):
                        if to_tensor:
                            yield torch.from_numpy(image_dict['image'])
                        else:
                            yield image_dict['image']
                    else:
                        if to_tensor:
                            to_return = {'image_raw': torch.from_numpy(image_dict['image'])}
                        else:
                            to_return = {'image_raw': image_dict['image']}
                        if incl_slidenames:
                            to_return['slide'] = self.name
                        if incl_loc == 'coord' or incl_loc == True:
                            to_return['loc_x'] = image_dict['loc'][0]
                            to_return['loc_y'] = image_dict['loc'][1]
                        if incl_loc == 'grid':
                            to_return['loc_x'] = image_dict['grid'][0]
                            to_return['loc_y'] = image_dict['grid'][1]
                        yield to_return
                if not infinite:
                    break

        return tile_generator()

    def verify_alignment(
        self,
        slide: "WSI",
        mpp: float = 4
    ) -> float:
        """Verify alignment to another slide by calculating MSE."""
        if not isinstance(slide, WSI):
            raise TypeError("Can only align to another slide.")

        # Calculate thumbnails for alignment.
        our_thumb = np.array(self.thumb(mpp=mpp))
        their_thumb = np.array(slide.thumb(mpp=mpp))

        aligned_theirs = align_image(their_thumb, our_thumb)

        theirs_gray = cv2.cvtColor(aligned_theirs, cv2.COLOR_BGR2GRAY)
        ours_gray = cv2.cvtColor(our_thumb, cv2.COLOR_BGR2GRAY)

        return compute_alignment_mse(theirs_gray, ours_gray)

    def view(self):
        """Open the slide in Slideflow Studio for interactive display.

        See :ref:`studio` for more information.

        """
        from slideflow.studio import Studio

        studio = Studio()
        studio.load_slide(self)
        studio.run()