Source code for topostats.utils

"""Utilities"""
from argparse import Namespace
import logging
from pathlib import Path
from typing import Union, Dict
from collections import defaultdict

import numpy as np
import pandas as pd

from topostats.thresholds import threshold
from topostats.logs.logs import LOGGER_NAME

LOGGER = logging.getLogger(LOGGER_NAME)


ALL_STATISTICS_COLUMNS = (
    "image",
    "basename",
    "molecule_number",
    "area",
    "area_cartesian_bbox",
    "aspect_ratio",
    "bending_angle",
    "centre_x",
    "centre_y",
    "circular",
    "contour_length",
    "end_to_end_distance",
    "height_max",
    "height_mean",
    "height_median",
    "height_min",
    "max_feret",
    "min_feret",
    "radius_max",
    "radius_mean",
    "radius_median",
    "radius_min",
    "smallest_bounding_area",
    "smallest_bounding_length",
    "smallest_bounding_width",
    "threshold",
    "volume",
)


[docs] def convert_path(path: Union[str, Path]) -> Path: """Ensure path is Path object. Parameters ---------- path: Union[str, Path] Path to be converted. Returns ------- Path pathlib Path """ return Path().cwd() if path == "./" else Path(path).expanduser()
[docs] def update_config(config: dict, args: Union[dict, Namespace]) -> Dict: """Update the configuration with any arguments Parameters ---------- config: dict Dictionary of configuration (typically read from YAML file specified with '-c/--config <filename>') args: Namespace Command line arguments Returns ------- Dict Dictionary updated with command arguments. """ args = vars(args) if isinstance(args, Namespace) else args config_keys = config.keys() for arg_key, arg_value in args.items(): if isinstance(arg_value, dict): update_config(config, arg_value) else: if arg_key in config_keys and arg_value is not None: original_value = config[arg_key] config[arg_key] = arg_value LOGGER.info(f"Updated config config[{arg_key}] : {original_value} > {arg_value} ") if "base_dir" in config.keys(): config["base_dir"] = convert_path(config["base_dir"]) if "output_dir" in config.keys(): config["output_dir"] = convert_path(config["output_dir"]) return config
[docs] def update_plotting_config(plotting_config: dict) -> dict: """Update the plotting config for each of the plots in plot_dict to ensure that each entry has all the plotting configuration values that are needed.""" main_config = plotting_config.copy() for opt in ["plot_dict", "run"]: main_config.pop(opt) for image, options in plotting_config["plot_dict"].items(): plotting_config["plot_dict"][image] = {**options, **main_config} # Make it so that binary images do not have the user-defined z-scale # applied, but non-binary images do. if plotting_config["plot_dict"][image]["image_type"] == "binary": plotting_config["plot_dict"][image]["zrange"] = [None, None] return plotting_config
[docs] def _get_mask(image: np.ndarray, thresh: float, threshold_direction: str, img_name: str = None) -> np.ndarray: """Calculate a mask for pixels that exceed the threshold Parameters ---------- image: np.array Numpy array representing image. threshold: float A float representing the threshold threshold_direction: str A string representing the direction that should be thresholded. ("above", "below") img_name: str Name of image being processed Returns ------- np.array Numpy array of image with objects coloured. """ if threshold_direction == "above": LOGGER.info(f"[{img_name}] : Masking (above) Threshold: {thresh}") return image > thresh LOGGER.info(f"[{img_name}] : Masking (below) Threshold: {thresh}") return image < thresh
# LOGGER.fatal(f"[{img_name}] : Threshold direction invalid: {threshold_direction}")
[docs] def get_mask(image: np.ndarray, thresholds: dict, img_name: str = None) -> np.ndarray: """Mask data that should not be included in flattening. Parameters ---------- image: np.ndarray 2D Numpy array of the image to have a mask derived for. thresholds: dict Dictionary of thresholds, at a bare minimum must have key 'below' with an associated value, second key is to have an 'above' threshold. img_name: str Image name that is being masked. Returns ------- np.ndarray 2D Numpy boolean array of points to mask. """ # Both thresholds are applicable if "below" in thresholds and "above" in thresholds: mask_above = _get_mask(image, thresh=thresholds["above"], threshold_direction="above", img_name=img_name) mask_below = _get_mask(image, thresh=thresholds["below"], threshold_direction="below", img_name=img_name) # Masks are combined to remove both the extreme high and extreme low data points. return mask_above + mask_below # Only below threshold is applicable if "below" in thresholds: return _get_mask(image, thresh=thresholds["below"], threshold_direction="below", img_name=img_name) # Only above threshold is applicable return _get_mask(image, thresh=thresholds["above"], threshold_direction="above", img_name=img_name)
# pylint: disable=unused-argument
[docs] def get_thresholds( image: np.ndarray, threshold_method: str, otsu_threshold_multiplier: float = None, threshold_std_dev: dict = None, absolute: dict = None, **kwargs, ) -> Dict: """Obtain thresholds for masking data points. Parameters ---------- image : np.ndarray 2D Numpy array of image to be masked threshold_method : str Method for thresholding, 'otsu', 'std_dev' or 'absolute' are valid options. threshold_std_dev : dict Dict of above and below thresholds for the standard deviation method. absolute : tuple Dict of below and above thresholds. **kwargs: Returns ------- Dict Dictionary of thresholds, contains keys 'below' and optionally 'above'. """ thresholds = defaultdict() if threshold_method == "otsu": thresholds["above"] = threshold(image, method="otsu", otsu_threshold_multiplier=otsu_threshold_multiplier) elif threshold_method == "std_dev": try: if threshold_std_dev["below"] is not None: thresholds["below"] = threshold(image, method="mean") - threshold_std_dev["below"] * np.nanstd(image) if threshold_std_dev["above"] is not None: thresholds["above"] = threshold(image, method="mean") + threshold_std_dev["above"] * np.nanstd(image) except TypeError as typeerror: raise typeerror elif threshold_method == "absolute": if absolute["below"] is not None: thresholds["below"] = absolute["below"] if absolute["above"] is not None: thresholds["above"] = absolute["above"] else: if not isinstance(threshold_method, str): raise TypeError( f"threshold_method ({threshold_method}) should be a string. Valid values : 'otsu' 'std_dev' 'absolute'" ) if threshold_method not in ["otsu", "std_dev", "absolute"]: raise ValueError( f"threshold_method ({threshold_method}) is invalid. Valid values : 'otsu' 'std_dev' 'absolute'" ) return thresholds
[docs] def create_empty_dataframe(columns: set = ALL_STATISTICS_COLUMNS, index: tuple = "molecule_number") -> pd.DataFrame: """Create an empty data frame for returning when no results are found. Parameters ---------- columns: list Columns of the empty dataframe. Returns ------- pd.DataFrame Empty Pandas DataFrame. """ empty_df = pd.DataFrame(columns=columns) empty_df = empty_df.set_index(index) return empty_df