Skip to content

NumPy Utilities

Low-level array helpers used throughout TPTBox for label extraction, morphological operations, connected components, centre-of-mass computation, and more.

TPTBox.core.np_utils

np_extract_label

np_extract_label(arr: ndarray, label: int | list[int], to_label: int = 1, inplace: bool = True) -> np.ndarray

Extracts a label from an given arr (works with zero as well!).

Parameters:

Name Type Description Default
arr ndarray

input arr

required
label int

label to be extracted (all other values are set to zero, label will be set to one, even if label==0!)

required
to_label int

the value of the entries that had the

1
inplace bool

If False, will make a copy of the arr. Defaults to True.

True

Returns:

Type Description
ndarray

np.ndarray: Binary array where the selected label is set to to_label and all other entries are zero.

Source code in TPTBox/core/np_utils.py
def np_extract_label(
    arr: np.ndarray,
    label: int | list[int],
    to_label: int = 1,
    inplace: bool = True,
) -> np.ndarray:
    """Extracts a label from an given arr (works with zero as well!).

    Args:
        arr (np.ndarray): input arr
        label (int): label to be extracted (all other values are set to zero,
            label will be set to one, even if label==0!)
        to_label (int): the value of the entries that had the <label> value. Defaults to 1.
        inplace (bool, optional): If False, will make a copy of the arr. Defaults to True.

    Returns:
        np.ndarray: Binary array where the selected label is set to ``to_label``
            and all other entries are zero.
    """
    if isinstance(label, int) and to_label == 1:
        return arr == label

    if to_label == 0:
        warnings.warn(
            "np_extract_label: to_label is zero, this can have unforeseen consequences!",
            UserWarning,
            stacklevel=4,
        )
    if not inplace:
        arr = arr.copy()

    if isinstance(label, list):
        assert 0 not in label, "label 0 is not supported in list mode"
        arr_msk = np.isin(arr, label)
        arr[arr_msk] = to_label
        arr[~arr_msk] = 0
        return arr

    if label != 0:
        arr[arr != label] = 0
        arr[arr == label] = to_label
        return arr
    # label == 0
    arr[arr != 0] = to_label + 1
    arr[arr == 0] = to_label
    arr[arr != to_label] = 0
    return arr

cc3dstatistics

cc3dstatistics(arr: UINTARRAY, use_crop: bool = True) -> dict

Computes connected component statistics for a labeled array using connected components 3D (cc3d).

Parameters:

Name Type Description Default
arr UINTARRAY

A 3D array of unsigned integers or booleans where each connected component is labeled with a unique integer. Typically output from a labeling function.

required
use_crop bool

If True, the function attempts to crop the input array around non-zero regions to improve performance and focus statistics on the area of interest. Defaults to True.

True

Returns:

Name Type Description
dict dict

A dictionary containing statistics of the connected components, such as their sizes, bounding boxes, and possibly centroids, depending on implementation of _cc3dstats.

Raises:

Type Description
AssertionError

If the input array is not of an unsigned integer or boolean dtype.

Source code in TPTBox/core/np_utils.py
def cc3dstatistics(arr: UINTARRAY, use_crop: bool = True) -> dict:
    """Computes connected component statistics for a labeled array using connected components 3D (cc3d).

    Args:
        arr (UINTARRAY): A 3D array of unsigned integers or booleans where each connected component
                         is labeled with a unique integer. Typically output from a labeling function.
        use_crop (bool): If True, the function attempts to crop the input array around non-zero regions
                         to improve performance and focus statistics on the area of interest. Defaults to True.

    Returns:
        dict: A dictionary containing statistics of the connected components, such as their sizes,
              bounding boxes, and possibly centroids, depending on implementation of `_cc3dstats`.

    Raises:
        AssertionError: If the input array is not of an unsigned integer or boolean dtype.
    """
    assert np.issubdtype(arr.dtype, np.unsignedinteger) or np.issubdtype(arr.dtype, np.int32) or np.issubdtype(arr.dtype, np.bool_), (
        f"cc3dstatistics expects uint type, got {arr.dtype}"
    )
    try:
        if use_crop:
            crop = np_bbox_binary(arr, raise_error=False, px_dist=2)
            arrc = arr[crop]
            return _cc3dstats(arrc)
    except ValueError as e:
        print(e)
    return _cc3dstats(arr)

np_volume

np_volume(arr: UINTARRAY, include_zero: bool = False) -> dict[int, int]

Returns a dictionary mapping each label in the array to its voxel count.

Parameters:

Name Type Description Default
arr UINTARRAY

Input unsigned-integer label array.

required
include_zero bool

If True, also counts voxels with label 0 (background). Defaults to False.

False

Returns:

Type Description
dict[int, int]

dict[int, int]: Mapping from label value to number of voxels with that label.

Source code in TPTBox/core/np_utils.py
def np_volume(arr: UINTARRAY, include_zero: bool = False) -> dict[int, int]:
    """Returns a dictionary mapping each label in the array to its voxel count.

    Args:
        arr (UINTARRAY): Input unsigned-integer label array.
        include_zero (bool, optional): If True, also counts voxels with label 0
            (background). Defaults to False.

    Returns:
        dict[int, int]: Mapping from label value to number of voxels with that label.
    """
    if include_zero:
        return {idx: i for idx, i in dict(enumerate(cc3dstatistics(arr, use_crop=False)["voxel_counts"])).items() if i > 0}
    else:
        return {idx: i for idx, i in dict(enumerate(cc3dstatistics(arr)["voxel_counts"])).items() if i > 0 and idx != 0}

np_is_empty

np_is_empty(arr: UINTARRAY | INTARRAY) -> bool

Returns true if the array is empty (only zeros).

Parameters:

Name Type Description Default
arr UINTARRAY

input uint array

required

Returns:

Name Type Description
bool bool

True if array is empty

ON UINT and INT:
is faster than np_count_nonzero(arr) > 0
is faster than arr.nonzero()[0].size == 0
is faster than arr.sum() > 0
Source code in TPTBox/core/np_utils.py
def np_is_empty(arr: UINTARRAY | INTARRAY) -> bool:
    """Returns true if the array is empty (only zeros).

    Args:
        arr (UINTARRAY): input uint array

    Returns:
        bool: True if array is empty

    #### ON UINT and INT:
    #### is faster than np_count_nonzero(arr) > 0
    #### is faster than arr.nonzero()[0].size == 0
    #### is faster than arr.sum() > 0
    """
    return arr.max() == 0

np_count_nonzero

np_count_nonzero(arr: ndarray) -> int

Returns number of nonzero entries in the array.

Parameters:

Name Type Description Default
arr ndarray

Input array.

required

Returns:

Name Type Description
int int

Number of elements in arr that are not equal to zero.

Source code in TPTBox/core/np_utils.py
def np_count_nonzero(arr: np.ndarray) -> int:
    """Returns number of nonzero entries in the array.

    Args:
        arr (np.ndarray): Input array.

    Returns:
        int: Number of elements in ``arr`` that are not equal to zero.
    """
    return np.count_nonzero(arr)

np_unique

np_unique(arr: ndarray) -> list[int]

Returns each existing label in the array (including zero!).

Uses cc3d statistics for unsigned-integer arrays for speed, and falls back to numpy.unique for other dtypes.

Parameters:

Name Type Description Default
arr ndarray

Input label array.

required

Returns:

Type Description
list[int]

list[int]: Sorted list of every distinct label value present in arr, including 0 (background).

Source code in TPTBox/core/np_utils.py
def np_unique(arr: np.ndarray) -> list[int]:
    """Returns each existing label in the array (including zero!).

    Uses cc3d statistics for unsigned-integer arrays for speed, and falls back
    to ``numpy.unique`` for other dtypes.

    Args:
        arr (np.ndarray): Input label array.

    Returns:
        list[int]: Sorted list of every distinct label value present in ``arr``,
            including 0 (background).
    """
    if np.issubdtype(arr.dtype, np.unsignedinteger):
        try:
            return [idx for idx, i in enumerate(cc3dstatistics(arr)["voxel_counts"]) if i > 0]
        except Exception:
            pass
    return list(np.unique(arr))

np_unique_withoutzero

np_unique_withoutzero(arr: UINTARRAY) -> list[int]

Returns each existing non-zero label in the array (excluding background zero).

Parameters:

Name Type Description Default
arr UINTARRAY

Input unsigned-integer label array.

required

Returns:

Type Description
list[int]

list[int]: Sorted list of every distinct label value present in arr, excluding 0 (background).

Source code in TPTBox/core/np_utils.py
def np_unique_withoutzero(arr: UINTARRAY) -> list[int]:
    """Returns each existing non-zero label in the array (excluding background zero).

    Args:
        arr (UINTARRAY): Input unsigned-integer label array.

    Returns:
        list[int]: Sorted list of every distinct label value present in ``arr``,
            excluding 0 (background).
    """
    try:
        return [idx for idx, i in enumerate(cc3dstatistics(arr)["voxel_counts"]) if i > 0 and idx != 0]
    except Exception:
        pass
    return [i for i in np_unique(arr) if i != 0]

np_center_of_mass

np_center_of_mass(arr: UINTARRAY) -> dict[int, COORDINATE]

Calculates center of mass for each non-zero label in the array.

Parameters:

Name Type Description Default
arr UINTARRAY

Input unsigned-integer label array.

required

Returns:

Type Description
dict[int, COORDINATE]

dict[int, COORDINATE]: Mapping from each non-zero label to its (x, y, z) center-of-mass coordinate as floats.

Source code in TPTBox/core/np_utils.py
def np_center_of_mass(arr: UINTARRAY) -> dict[int, COORDINATE]:
    """Calculates center of mass for each non-zero label in the array.

    Args:
        arr (UINTARRAY): Input unsigned-integer label array.

    Returns:
        dict[int, COORDINATE]: Mapping from each non-zero label to its
            (x, y, z) center-of-mass coordinate as floats.
    """
    stats = cc3dstatistics(arr, use_crop=False)
    # Does not use the other calls for speed reasons
    unique = [idx for idx, i in enumerate(stats["voxel_counts"]) if i > 0 and idx != 0]
    return {idx: v for idx, v in enumerate(stats["centroids"]) if idx in unique}

np_bounding_boxes

np_bounding_boxes(arr: UINTARRAY) -> dict[int, tuple[slice, slice, slice]]

Calculates tight axis-aligned bounding boxes for each non-zero label in the array.

Parameters:

Name Type Description Default
arr UINTARRAY

Input unsigned-integer label array.

required

Returns:

Type Description
dict[int, tuple[slice, slice, slice]]

dict[int, tuple[slice, slice, slice]]: Mapping from each non-zero label to a 3-tuple of slices representing the bounding box of that label in each spatial dimension.

Source code in TPTBox/core/np_utils.py
def np_bounding_boxes(arr: UINTARRAY) -> dict[int, tuple[slice, slice, slice]]:
    """Calculates tight axis-aligned bounding boxes for each non-zero label in the array.

    Args:
        arr (UINTARRAY): Input unsigned-integer label array.

    Returns:
        dict[int, tuple[slice, slice, slice]]: Mapping from each non-zero label
            to a 3-tuple of slices representing the bounding box of that label
            in each spatial dimension.
    """
    stats = cc3dstatistics(arr)
    # Does not use the other calls for speed reasons
    unique = [idx for idx, i in enumerate(stats["voxel_counts"]) if i > 0 and idx != 0]
    return {idx: v for idx, v in enumerate(stats["bounding_boxes"]) if idx in unique}

np_contacts

np_contacts(arr: UINTARRAY, connectivity: int) -> dict[tuple[int, int], int]

Calculates the contacting labels and the amount of touching voxels based on connectivity.

Parameters:

Name Type Description Default
arr UINTARRAY

Input 2D or 3D label array.

required
connectivity int

Connectivity level in range [1, 3]. 1 = face-only, 2 = face+edge, 3 = face+edge+corner adjacency.

required

Returns:

Type Description
dict[tuple[int, int], int]

dict[tuple[int, int], int]: Mapping from a pair of touching labels to the number of voxels where they touch.

Source code in TPTBox/core/np_utils.py
def np_contacts(arr: UINTARRAY, connectivity: int) -> dict[tuple[int, int], int]:
    """Calculates the contacting labels and the amount of touching voxels based on connectivity.

    Args:
        arr (UINTARRAY): Input 2D or 3D label array.
        connectivity (int): Connectivity level in range [1, 3]. 1 = face-only,
            2 = face+edge, 3 = face+edge+corner adjacency.

    Returns:
        dict[tuple[int, int], int]: Mapping from a pair of touching labels to the
            number of voxels where they touch.
    """
    assert 2 <= arr.ndim <= 3, f"expected 2D or 3D, but got {arr.ndim}"
    assert 1 <= connectivity <= 3, f"expected connectivity in [1,3], but got {connectivity}"
    connectivity = min(connectivity * 4, 8) if arr.ndim == 2 else 6 if connectivity == 1 else 18 if connectivity == 2 else 26
    return _contacts(arr, connectivity=connectivity)

np_region_graph

np_region_graph(arr: UINTARRAY, connectivity: int) -> set[tuple[int, int]]

Returns the unique pairs of different labels that are adjacent in the array.

Parameters:

Name Type Description Default
arr UINTARRAY

Input 2D or 3D label array.

required
connectivity int

Connectivity level in range [1, 3]. 1 = face-only, 2 = face+edge, 3 = face+edge+corner adjacency.

required

Returns:

Type Description
set[tuple[int, int]]

set[tuple[int, int]]: Set of (label_a, label_b) pairs where each pair indicates two labels that share at least one adjacent voxel.

Source code in TPTBox/core/np_utils.py
def np_region_graph(arr: UINTARRAY, connectivity: int) -> set[tuple[int, int]]:
    """Returns the unique pairs of different labels that are adjacent in the array.

    Args:
        arr (UINTARRAY): Input 2D or 3D label array.
        connectivity (int): Connectivity level in range [1, 3]. 1 = face-only,
            2 = face+edge, 3 = face+edge+corner adjacency.

    Returns:
        set[tuple[int, int]]: Set of (label_a, label_b) pairs where each pair
            indicates two labels that share at least one adjacent voxel.
    """
    assert 2 <= arr.ndim <= 3, f"expected 2D or 3D, but got {arr.ndim}"
    assert 1 <= connectivity <= 3, f"expected connectivity in [1,3], but got {connectivity}"
    connectivity = min(connectivity * 4, 8) if arr.ndim == 2 else 6 if connectivity == 1 else 18 if connectivity == 2 else 26
    return _region_graph(arr, connectivity=connectivity)

np_voxel_connectivity_graph

np_voxel_connectivity_graph(arr: UINTARRAY, connectivity: int) -> np.ndarray

Returns a voxel connectivity graph of the input array.

For 2D connectivity, the output is an 8-bit unsigned integer.

edges (4,8 way)

5-8: corners (8 way only, zeroed in 4 way)

8 7 6 5 4 3 2 1


-x-y x-y -xy xy -x +y -x +x

For a 3D 26 and 18 connectivity, the output requires 32-bit unsigned integers, for 6-way the output are 8-bit unsigned integers.

faces (6,18,26 way)

7-19: edges (18,26 way) 18-26: corners (26 way) 26-32: unused (zeroed)

Parameters:

Name Type Description Default
arr UINTARRAY

Input 2D or 3D label array.

required
connectivity int

Connectivity level in range [1, 3]. 1 = face-only, 2 = face+edge, 3 = face+edge+corner adjacency.

required

Returns:

Type Description
ndarray

np.ndarray: uint8 or uint32 array the same shape as the input, where each value encodes which neighbors share the same label as that voxel.

Source code in TPTBox/core/np_utils.py
def np_voxel_connectivity_graph(arr: UINTARRAY, connectivity: int) -> np.ndarray:
    """Returns a voxel connectivity graph of the input array.

    For 2D connectivity, the output is an 8-bit unsigned integer.

    Bits 1-4: edges     (4,8 way)
        5-8: corners   (8 way only, zeroed in 4 way)

        8      7      6      5      4      3      2      1
        ------ ------ ------ ------ ------ ------ ------ ------
        -x-y    x-y    -xy     xy     -x     +y     -x     +x

    For a 3D 26 and 18 connectivity, the output requires 32-bit unsigned integers,
        for 6-way the output are 8-bit unsigned integers.

    Bits 1-6: faces     (6,18,26 way)
        7-19: edges     (18,26 way)
        18-26: corners   (26 way)
        26-32: unused (zeroed)

    Args:
        arr (UINTARRAY): Input 2D or 3D label array.
        connectivity (int): Connectivity level in range [1, 3]. 1 = face-only,
            2 = face+edge, 3 = face+edge+corner adjacency.

    Returns:
        np.ndarray: uint8 or uint32 array the same shape as the input, where
            each value encodes which neighbors share the same label as that voxel.
    """
    assert 2 <= arr.ndim <= 3, f"expected 2D or 3D, but got {arr.ndim}"
    assert 1 <= connectivity <= 3, f"expected connectivity in [1,3], but got {connectivity}"
    connectivity = min(connectivity * 4, 8) if arr.ndim == 2 else 6 if connectivity == 1 else 18 if connectivity == 2 else 26
    return _voxel_connectivity_graph(arr, connectivity=connectivity)

np_dice

np_dice(seg: ndarray, gt: ndarray, binary_compare: bool = False, label: int = 1) -> float

Calculates the dice similarity between two numpy arrays.

Parameters:

Name Type Description Default
seg ndarray

segmentation array

required
gt ndarray

other segmentation array

required
binary_compare bool

if the should be binarized before (0/1)

False
label int

if not binary_compare, use this label for dice score

1

Returns:

Name Type Description
float float

dice value

Source code in TPTBox/core/np_utils.py
def np_dice(seg: np.ndarray, gt: np.ndarray, binary_compare: bool = False, label: int = 1) -> float:
    """Calculates the dice similarity between two numpy arrays.

    Args:
        seg: segmentation array
        gt: other segmentation array
        binary_compare: if the should be binarized before (0/1)
        label: if not binary_compare, use this label for dice score

    Returns:
        float: dice value
    """
    assert seg.shape == gt.shape, f"shape mismatch, got {seg.shape}, and {gt.shape}"

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", r"invalid value encountered in double_scalars")
        if binary_compare:
            seg_l = seg != 0
            gt_l = gt != 0
        else:
            seg_l = seg == label  # predicted mask for this label
            gt_l = gt == label  # ground-truth mask for this label
        intersect = np.logical_and(seg_l, gt_l).sum()
        denom = seg_l.sum() + gt_l.sum()
        dice = (2.0 * intersect) / (denom)
    if np.isnan(dice):
        return 1.0
    return dice

np_erode_msk_euclid

np_erode_msk_euclid(arr: ndarray, n_pixel: int = 3, use_crop=True, labels=None, mask=None) -> np.ndarray

Euclidean erosion: shrink each foreground label by n_pixel voxels via distance transform.

Removes voxels whose Euclidean distance to background is ≤ n_pixel.

Source code in TPTBox/core/np_utils.py
def np_erode_msk_euclid(arr: np.ndarray, n_pixel: int = 3, use_crop=True, labels=None, mask=None) -> np.ndarray:
    """Euclidean erosion: shrink each foreground label by ``n_pixel`` voxels via distance transform.

    Removes voxels whose Euclidean distance to background is ≤ ``n_pixel``.
    """
    if use_crop:
        arr_bin = arr.copy()
        if labels is not None:
            arr_bin[np.isin(arr_bin, labels, invert=True)] = 0
        crop = np_bbox_binary(arr_bin, px_dist=1 + n_pixel, raise_error=False)
        arrc = arr[crop]
    else:
        arrc = arr
        if labels is not None:
            arrc = arrc.copy()
            arrc[np.isin(arrc, labels, invert=True)] = 0

    if mask is not None:
        mask = mask.copy()
        mask[mask != 0] = 1
        if use_crop:
            mask = mask[crop]

    foreground = arrc > 0

    # distance inside foreground to nearest background
    dist = distance_transform_edt(foreground)

    # copy original
    out = arrc.copy()

    # remove voxels within erosion distance
    erode_mask = (dist <= n_pixel) & foreground
    out[erode_mask] = 0

    if mask is not None:
        out[mask == 0] = 0

    if use_crop:
        arr[crop][arrc != 0] = out[arrc != 0]
        return arr

    arr[arrc != 0] = out[arrc != 0]
    return arr

np_dilate_msk_euclid

np_dilate_msk_euclid(arr: ndarray, n_pixel: int = 3, use_crop=True, labels=None, mask=None) -> np.ndarray

Euclidean dilation: expand each foreground label by n_pixel voxels via distance transform.

Assigns each newly covered voxel to the nearest existing label.

Source code in TPTBox/core/np_utils.py
def np_dilate_msk_euclid(arr: np.ndarray, n_pixel: int = 3, use_crop=True, labels=None, mask=None) -> np.ndarray:
    """Euclidean dilation: expand each foreground label by ``n_pixel`` voxels via distance transform.

    Assigns each newly covered voxel to the nearest existing label.
    """
    if use_crop:
        arr_bin = arr.copy()
        if labels is not None:
            arr_bin[np.isin(arr_bin, labels, invert=True)] = 0
        crop = np_bbox_binary(arr_bin, px_dist=1 + n_pixel, raise_error=False)
        arrc = arr[crop]
    else:
        arrc = arr
        if labels is not None:
            arrc = arrc.copy()
            arrc[np.isin(arr_bin, labels, invert=True)] = 0
    if mask is not None:
        mask[mask != 0] = 1
        if use_crop:
            mask = mask[crop]
    foreground = arrc > 0

    # distance + nearest label indices
    dist, indices = distance_transform_edt(~foreground, return_indices=True)

    # copy original
    out = arrc.copy()

    # mask of voxels within dilation range
    dist_mask = (dist <= n_pixel) & (~foreground)

    # assign nearest label
    nearest_labels = arrc[tuple(indices)]
    out[dist_mask] = nearest_labels[dist_mask]
    if mask is not None:
        out[mask == 0] = 0
    if use_crop:
        arr[crop][out != 0] = out[out != 0]
        return arr
    arr[out != 0] = out[out != 0]
    return arr

np_dilate_msk

np_dilate_msk(arr: ndarray, label_ref: LABEL_REFERENCE = None, n_pixel: int = 5, connectivity: int = 3, use_crop: bool = True, mask: ndarray | None = None, ignore_axis: None | int = None) -> np.ndarray

Dilates the given array by the specified number of voxels (not including the zero label).

Parameters:

Name Type Description Default
arr ndarray

Input label array.

required
label_ref LABEL_REFERENCE

Label or list of labels to dilate. If None, all non-zero labels are dilated. Defaults to None.

None
n_pixel int

Number of voxels to dilate by. Defaults to 5.

5
connectivity int

Elements up to a squared distance of connectivity from the center are considered neighbors. Ranges from 1 (no diagonal neighbors) to 3 (all neighbors). Defaults to 3.

3
use_crop bool

If True, crops to a bounding box before dilating for speed. Defaults to True.

True
mask ndarray | None

If set, after each iteration all voxels outside this mask are zeroed out. Defaults to None.

None
ignore_axis int | None

If set, dilation is performed in 2D along all slices of this axis (e.g., 0 for slice-wise axial dilation). Defaults to None.

None

Returns:

Type Description
ndarray

np.ndarray: The dilated label array.

Source code in TPTBox/core/np_utils.py
def np_dilate_msk(
    arr: np.ndarray,
    label_ref: LABEL_REFERENCE = None,
    n_pixel: int = 5,
    connectivity: int = 3,
    use_crop: bool = True,
    mask: np.ndarray | None = None,
    ignore_axis: None | int = None,
) -> np.ndarray:
    """Dilates the given array by the specified number of voxels (not including the zero label).

    Args:
        arr (np.ndarray): Input label array.
        label_ref (LABEL_REFERENCE, optional): Label or list of labels to dilate.
            If None, all non-zero labels are dilated. Defaults to None.
        n_pixel (int, optional): Number of voxels to dilate by. Defaults to 5.
        connectivity (int, optional): Elements up to a squared distance of
            ``connectivity`` from the center are considered neighbors.
            Ranges from 1 (no diagonal neighbors) to 3 (all neighbors). Defaults to 3.
        use_crop (bool, optional): If True, crops to a bounding box before dilating
            for speed. Defaults to True.
        mask (np.ndarray | None, optional): If set, after each iteration all voxels
            outside this mask are zeroed out. Defaults to None.
        ignore_axis (int | None, optional): If set, dilation is performed in 2D
            along all slices of this axis (e.g., 0 for slice-wise axial dilation).
            Defaults to None.

    Returns:
        np.ndarray: The dilated label array.
    """
    labels: list[int] = _to_labels(arr, label_ref)
    # present_labels = np_unique(arr)

    if use_crop:
        # try:
        arr_bin = arr.copy()
        arr_bin[np.isin(arr_bin, labels, invert=True)] = 0
        crop = np_bbox_binary(arr_bin, px_dist=1 + n_pixel, raise_error=False)
        arrc = arr[crop]
    else:
        arrc = arr

    if mask is not None:
        mask[mask != 0] = 1
        if use_crop:
            mask = mask[crop]
    if ignore_axis is None:
        struct = generate_binary_structure(arr.ndim, connectivity)
    else:
        struct = generate_binary_structure(arr.ndim - 1, connectivity)
        struct = np.expand_dims(struct, ignore_axis)

    labels: list[int] = [l for l in labels if l != 0]  # and l in present_labels]

    out = arrc
    for _ in range(n_pixel):
        for i in labels:
            data = out.copy()
            data[i != data] = 0
            if use_crop:
                lcrop = np_bbox_binary(data, px_dist=2 + n_pixel, raise_error=False)
                data = data[lcrop]
            msk_ibe_data = _binary_dilation(data, struct=struct)

            if use_crop:
                oc = out[lcrop] == 0
                out[lcrop][oc] = msk_ibe_data[oc] * i
                if mask is not None:
                    out[lcrop][mask == 0] = 0
            else:
                out[out == 0] = msk_ibe_data[out == 0] * i
                if mask is not None:
                    out[mask == 0] = 0
    if use_crop:
        arr[crop] = out
        return arr
    return out

np_erode_msk

np_erode_msk(arr: ndarray, label_ref: LABEL_REFERENCE = None, n_pixel: int = 5, use_crop: bool = True, connectivity: int = 3, border_value=0, ignore_axis: None | int = None) -> np.ndarray

Erodes the given array by the specified number of voxels.

Parameters:

Name Type Description Default
arr ndarray

Input label array.

required
label_ref LABEL_REFERENCE

Label or list of labels to erode. If None, all non-zero labels are eroded. Defaults to None.

None
n_pixel int

Number of voxels to erode by. Defaults to 5.

5
use_crop bool

If True, crops to a bounding box before eroding for speed. Defaults to True.

True
connectivity int

Elements up to a squared distance of connectivity from the center are considered neighbors. Ranges from 1 (no diagonal neighbors) to 3 (all neighbors). Defaults to 3.

3
border_value int

Value to pad the border with during erosion. Defaults to 0.

0
ignore_axis int | None

If set, erosion is performed in 2D along all slices of this axis. Defaults to None.

None

Returns:

Type Description
ndarray

np.ndarray: The eroded label array.

Source code in TPTBox/core/np_utils.py
def np_erode_msk(
    arr: np.ndarray,
    label_ref: LABEL_REFERENCE = None,
    n_pixel: int = 5,
    use_crop: bool = True,
    connectivity: int = 3,
    border_value=0,
    ignore_axis: None | int = None,
) -> np.ndarray:
    """Erodes the given array by the specified number of voxels.

    Args:
        arr (np.ndarray): Input label array.
        label_ref (LABEL_REFERENCE, optional): Label or list of labels to erode.
            If None, all non-zero labels are eroded. Defaults to None.
        n_pixel (int, optional): Number of voxels to erode by. Defaults to 5.
        use_crop (bool, optional): If True, crops to a bounding box before eroding
            for speed. Defaults to True.
        connectivity (int, optional): Elements up to a squared distance of
            ``connectivity`` from the center are considered neighbors.
            Ranges from 1 (no diagonal neighbors) to 3 (all neighbors). Defaults to 3.
        border_value (int, optional): Value to pad the border with during erosion.
            Defaults to 0.
        ignore_axis (int | None, optional): If set, erosion is performed in 2D
            along all slices of this axis. Defaults to None.

    Returns:
        np.ndarray: The eroded label array.
    """
    labels: list[int] = _to_labels(arr, label_ref)

    if use_crop:
        crop = np_bbox_binary(np.isin(arr, labels, invert=False), px_dist=1 + n_pixel, raise_error=False)
        arrc = arr[crop]
    else:
        arrc = arr

    if ignore_axis is None:
        struct = generate_binary_structure(arr.ndim, connectivity)
    else:
        struct = generate_binary_structure(arr.ndim - 1, connectivity)
        struct = np.expand_dims(struct, ignore_axis)
    msk_i_data = arrc.copy()
    out = arrc
    for i in labels:
        if i == 0:  # or i not in present_labels:
            continue
        data = msk_i_data.copy()
        data[i != data] = 0
        if use_crop:
            lcrop = np_bbox_binary(data, px_dist=1, raise_error=False)
            data = data[lcrop]
        msk_ibe_data = binary_erosion(data, structure=struct, iterations=n_pixel, border_value=border_value)
        data[~msk_ibe_data] = 0  # type: ignore
        if use_crop:
            out[lcrop][(msk_i_data[lcrop] == i) & (data == 0)] = 0
        else:
            out[(msk_i_data == i) & (data == 0)] = 0
    if use_crop:
        arr[crop] = out
        return arr
    return out

np_map_labels

np_map_labels(arr: UINTARRAY, label_map: LABEL_MAP) -> np.ndarray

Maps labels in the given array according to a label-map dictionary.

Parameters:

Name Type Description Default
arr UINTARRAY

Input unsigned-integer label array to remap.

required
label_map LABEL_MAP

Dictionary mapping original label values (int or str) to new label values (int or str). Labels not present in the map are left unchanged.

required

Returns:

Type Description
ndarray

np.ndarray: A new array with labels remapped according to label_map.

Source code in TPTBox/core/np_utils.py
def np_map_labels(arr: UINTARRAY, label_map: LABEL_MAP) -> np.ndarray:
    """Maps labels in the given array according to a label-map dictionary.

    Args:
        arr (UINTARRAY): Input unsigned-integer label array to remap.
        label_map (LABEL_MAP): Dictionary mapping original label values (int or str)
            to new label values (int or str). Labels not present in the map are
            left unchanged.

    Returns:
        np.ndarray: A new array with labels remapped according to ``label_map``.
    """
    k = np.array(list(label_map.keys()))
    v = np.array(list(label_map.values()))

    assert len(k) == len(v)
    if len(k) == 0:
        return arr

    max_value = max(arr.max(), *k, *v) + 1

    mapping_ar = np.arange(max_value, dtype=arr.dtype)
    mapping_ar[k] = v
    return mapping_ar[arr]

np_calc_crop_around_centerpoint

np_calc_crop_around_centerpoint(poi: tuple[int, ...] | tuple[float, ...], arr: ndarray, cutout_size: tuple[int, ...], pad_to_size: Sequence[int] | ndarray | int = 0) -> tuple[np.ndarray, tuple[slice, slice, slice], tuple]

Crops a fixed-size region centred on a given point, optionally padding near-edge regions.

Parameters:

Name Type Description Default
poi tuple[int, ...] | tuple[float, ...]

Center point of the cutout, one coordinate per dimension.

required
arr ndarray

Input array to crop.

required
cutout_size tuple[int, ...]

Desired size of the cutout in each dimension.

required
pad_to_size Sequence[int] | ndarray | int

Additional symmetric padding to add around the cutout. Can be a single int (same for all dims) or a per-dim sequence. Defaults to 0.

0

Returns:

Name Type Description
tuple tuple[ndarray, tuple[slice, slice, slice], tuple]

A 3-element tuple containing: - np.ndarray: The cropped (and padded) sub-array. - tuple[slice, ...]: Slices used to extract the cutout from arr. - tuple: Per-dimension padding amounts applied as (pad_before, pad_after).

Source code in TPTBox/core/np_utils.py
def np_calc_crop_around_centerpoint(
    poi: tuple[int, ...] | tuple[float, ...],
    arr: np.ndarray,
    cutout_size: tuple[int, ...],
    pad_to_size: Sequence[int] | np.ndarray | int = 0,
) -> tuple[np.ndarray, tuple[slice, slice, slice], tuple]:
    """Crops a fixed-size region centred on a given point, optionally padding near-edge regions.

    Args:
        poi: Center point of the cutout, one coordinate per dimension.
        arr: Input array to crop.
        cutout_size: Desired size of the cutout in each dimension.
        pad_to_size: Additional symmetric padding to add around the cutout.
            Can be a single int (same for all dims) or a per-dim sequence.
            Defaults to 0.

    Returns:
        tuple: A 3-element tuple containing:
            - np.ndarray: The cropped (and padded) sub-array.
            - tuple[slice, ...]: Slices used to extract the cutout from ``arr``.
            - tuple: Per-dimension padding amounts applied as ``(pad_before, pad_after)``.
    """
    n_dim = len(poi)
    if isinstance(pad_to_size, int):
        pad_to_size = np.ones(n_dim) * pad_to_size
    assert n_dim == len(arr.shape) == len(cutout_size) == len(pad_to_size), (
        f"dimension mismatch, got dim {n_dim}, poi {poi}, arr shape {arr.shape}, cutout {cutout_size}, pad_to_size {pad_to_size}"
    )

    poi = tuple(int(i) for i in poi)
    shape = arr.shape
    # Get cutout range
    cutout_coords = []
    padding = []
    for d in range(n_dim):
        _min, _max, _pad_min, _pad_max = _np_get_min_max_pad(poi[d], shape[d], cutout_size[d] // 2, pad_to_size[d] // 2)
        cutout_coords += [_min, _max]
        padding.append((int(_pad_min), int(_pad_max)))
    # cutout_coords = (x_min, x_max, y_min, y_max, z_min, z_max)
    # padding = ((x_pad_min, x_pad_max), (y_pad_min, y_pad_max), (z_pad_min, z_pad_max))

    cutout_coords_slices = tuple([slice(cutout_coords[i], cutout_coords[i + 1]) for i in range(0, n_dim * 2, 2)])
    arr_cut = arr[cutout_coords_slices]
    arr_cut = np.pad(arr_cut, tuple(padding))
    return (arr_cut, cutout_coords_slices, tuple(padding))

np_bbox_binary

np_bbox_binary(img: ndarray, px_dist: int | Sequence[int] | ndarray = 0, raise_error=True) -> tuple[slice, ...]

Calculates a bounding box in n dimensions given a image (factor ~2 times faster than compute_crop).

Parameters:

Name Type Description Default
img ndarray

input array

required
px_dist int | Sequence[int] | ndarray

int | tuple[int]: dist (int): The amount of padding to be added to the cropped image. If int, will apply the same padding to each dim. Default value is 0.

0

Returns:

Type Description
tuple[slice, ...]

list of boundary coordinates as slices tuple

Source code in TPTBox/core/np_utils.py
def np_bbox_binary(img: np.ndarray, px_dist: int | Sequence[int] | np.ndarray = 0, raise_error=True) -> tuple[slice, ...]:
    """Calculates a bounding box in n dimensions given a image (factor ~2 times faster than compute_crop).

    Args:
        img: input array
        px_dist: int | tuple[int]: dist (int): The amount of padding to be added to the cropped image. If int, will apply the same padding to each dim. Default value is 0.

    Returns:
        list of boundary coordinates as slices tuple
    """
    assert img is not None, "bbox_nd: received None as image"
    if np_is_empty(img):
        if raise_error:
            raise ValueError("bbox_nd: img is empty, cannot calculate a bbox")
        return tuple([slice(None)] * img.ndim)

    n = img.ndim
    shp = img.shape
    if isinstance(px_dist, int):
        px_dist = np.ones(n, dtype=np.uint8) * px_dist
    assert len(px_dist) == n, f"dimension mismatch, got img shape {shp} and px_dist {px_dist}"

    bbox: list[float] = []
    for ax in itertools.combinations(reversed(range(n)), n - 1):
        nonzero = np.any(a=img, axis=ax)
        bbox.extend(np.where(nonzero)[0][[0, -1]])  # type: ignore
    out: tuple[slice, ...] = tuple(
        slice(
            max(bbox[i] - px_dist[i // 2], 0),
            min(bbox[i + 1] + px_dist[i // 2], shp[i // 2]) + 1,
        )
        for i in range(0, len(bbox), 2)
    )
    return out

np_center_of_bbox_binary

np_center_of_bbox_binary(img: ndarray, px_dist: int | Sequence[int] | ndarray = 0) -> list[int]

Calculates the center coordinates of the bounding box around non-zero regions in a binary image.

This function determines the bounding box of non-zero regions in a binary image, optionally expanding it by a specified pixel distance. It then computes and returns the center coordinates of each dimension of the bounding box.

Parameters:

Name Type Description Default
img ndarray

A binary image represented as a NumPy array, where non-zero values indicate points of interest.

required
px_dist int | Sequence[int] | ndarray

The pixel distance by which to expand the bounding box in each dimension. Can be a single integer or a sequence of integers corresponding to each dimension. Default is 0, meaning no expansion.

0

Returns:

Type Description
list[int]

list[int]: A list of center coordinates for each dimension of the bounding box.

Raises:

Type Description
ValueError

If the input image is empty or not a valid binary array.

Source code in TPTBox/core/np_utils.py
def np_center_of_bbox_binary(img: np.ndarray, px_dist: int | Sequence[int] | np.ndarray = 0) -> list[int]:
    """Calculates the center coordinates of the bounding box around non-zero regions in a binary image.

    This function determines the bounding box of non-zero regions in a binary image,
    optionally expanding it by a specified pixel distance. It then computes and returns
    the center coordinates of each dimension of the bounding box.

    Args:
        img (np.ndarray): A binary image represented as a NumPy array, where non-zero values indicate
            points of interest.
        px_dist (int | Sequence[int] | np.ndarray, optional): The pixel distance by which to expand
            the bounding box in each dimension. Can be a single integer or a sequence of integers
            corresponding to each dimension. Default is 0, meaning no expansion.

    Returns:
        list[int]: A list of center coordinates for each dimension of the bounding box.

    Raises:
        ValueError: If the input image is empty or not a valid binary array.
    """
    bbox_nd = np_bbox_binary(img, px_dist=px_dist)
    ctd_bbox = []
    for i in range(len(bbox_nd)):
        size_t = bbox_nd[i].stop - bbox_nd[i].start
        # print(i, size_t)
        ctd_bbox.append(bbox_nd[i].start + (size_t // 2))
    return ctd_bbox

np_find_index_of_k_max_values

np_find_index_of_k_max_values(arr: ndarray, k: int = 2) -> list[int]

Calculates the indices of the k-highest values in the given arr.

Parameters:

Name Type Description Default
arr ndarray

input array

required
k int

number of higest values to calculate the index for

2

Returns:

Type Description
list[int]

list[int]: list of indices sorted. First entry corresponds to the index of the highest value in arr, ...

Source code in TPTBox/core/np_utils.py
def np_find_index_of_k_max_values(arr: np.ndarray, k: int = 2) -> list[int]:
    """Calculates the indices of the k-highest values in the given arr.

    Args:
        arr: input array
        k: number of higest values to calculate the index for

    Returns:
        list[int]: list of indices sorted. First entry corresponds to the index of the highest value in arr, ...
    """
    idx = np.argpartition(arr, -k)[-k:]
    indices = idx[np.argsort((-arr)[idx])]
    return list(indices)

np_compute_surface

np_compute_surface(arr: UINTARRAY, connectivity: int = 3, dilated_surface: bool = False) -> UINTARRAY

Computes the surface of a binary array based on connectivity and dilation options.

This function identifies the surface voxels of a binary array. If dilated_surface is True, it computes a dilated surface by expanding the array and subtracting the original. Otherwise, it computes a contracted surface by eroding the array and subtracting the result from the original.

Parameters:

Name Type Description Default
arr UINTARRAY

A binary array representing the segmentation or mask.

required
connectivity int

The connectivity used to define neighbors for surface computation, where 1 represents face-connectivity, and 3 represents full connectivity. Default is 3.

3
dilated_surface bool

Whether to compute a dilated surface. If True, expands the surface; if False, contracts the surface. Default is False.

False

Returns:

Name Type Description
UINTARRAY UINTARRAY

An array representing the computed surface voxels.

Source code in TPTBox/core/np_utils.py
def np_compute_surface(arr: UINTARRAY, connectivity: int = 3, dilated_surface: bool = False) -> UINTARRAY:
    """Computes the surface of a binary array based on connectivity and dilation options.

    This function identifies the surface voxels of a binary array. If `dilated_surface`
    is True, it computes a dilated surface by expanding the array and subtracting the
    original. Otherwise, it computes a contracted surface by eroding the array and
    subtracting the result from the original.

    Args:
        arr (UINTARRAY): A binary array representing the segmentation or mask.
        connectivity (int, optional): The connectivity used to define neighbors for
            surface computation, where 1 represents face-connectivity, and 3 represents
            full connectivity. Default is 3.
        dilated_surface (bool, optional): Whether to compute a dilated surface. If True,
            expands the surface; if False, contracts the surface. Default is False.

    Returns:
        UINTARRAY: An array representing the computed surface voxels.
    """
    assert 1 <= connectivity <= 3, f"expected connectivity in [1,3], but got {connectivity}"
    if dilated_surface:
        dil = np_dilate_msk(arr.copy(), n_pixel=1, connectivity=connectivity)
        dil[arr != 0] = 0  # remove all non-zero entries
        return dil
    else:
        ero = np_erode_msk(arr.copy(), n_pixel=1, connectivity=connectivity)
        arr = arr.copy()
        arr[ero != 0] = 0  # remove all non-zero entries
        return arr

np_point_coordinates

np_point_coordinates(arr: UINTARRAY) -> list[tuple[int, int, int]]

Extracts the coordinates of non-zero points from a 3D binary array.

This function locates all non-zero voxels within a 3D binary array and returns their coordinates as a list of tuples.

Parameters:

Name Type Description Default
arr UINTARRAY

A 3-dimensional binary array representing the segmentation or mask.

required

Returns:

Type Description
list[tuple[int, int, int]]

list[tuple[int, int, int]]: A list of (X, Y, Z) coordinate tuples for each non-zero

list[tuple[int, int, int]]

point in the array.

Raises:

Type Description
AssertionError

If the input array does not have three dimensions.

Source code in TPTBox/core/np_utils.py
def np_point_coordinates(
    arr: UINTARRAY,
) -> list[tuple[int, int, int]]:
    """Extracts the coordinates of non-zero points from a 3D binary array.

    This function locates all non-zero voxels within a 3D binary array and returns
    their coordinates as a list of tuples.

    Args:
        arr (UINTARRAY): A 3-dimensional binary array representing the segmentation or mask.

    Returns:
        list[tuple[int, int, int]]: A list of (X, Y, Z) coordinate tuples for each non-zero
        point in the array.

    Raises:
        AssertionError: If the input array does not have three dimensions.
    """
    assert arr.ndim == 3, arr.ndim
    x, y, z = np.where(arr)
    surface_points = [(x[i], y[i], z[i]) for i in range(len(x))]
    return surface_points

np_connected_components

np_connected_components(arr: UINTARRAY, label_ref: LABEL_REFERENCE | None = None, connectivity: int = 3, include_zero: bool = False) -> tuple[UINTARRAY, int]

Calculates the connected components of a given array (works with zeros as well!).

Parameters:

Name Type Description Default
arr UINTARRAY

input arr

required
connectivity int

in range [1,3]. For 2D images, 2 and 3 is the same.

3
include_zero bool

If true, will treat the background (0) as another label to calculate connected components from. Significantly slower! Defaults to False.

False
verbose

If true, will print out if the array does not have any CC

required

Returns:

Name Type Description
arr_cc tuple[UINTARRAY, int]

UINTARRAY, N: number of cc

Source code in TPTBox/core/np_utils.py
def np_connected_components(
    arr: UINTARRAY,
    label_ref: LABEL_REFERENCE | None = None,
    connectivity: int = 3,
    include_zero: bool = False,
) -> tuple[UINTARRAY, int]:
    """Calculates the connected components of a given array (works with zeros as well!).

    Args:
        arr: input arr
        connectivity: in range [1,3]. For 2D images, 2 and 3 is the same.
        include_zero (bool): If true, will treat the background (0) as another label to calculate connected components from. Significantly slower! Defaults to False.
        verbose: If true, will print out if the array does not have any CC

    Returns:
        arr_cc: UINTARRAY, N: number of cc
    """
    assert np.min(arr) == 0, f"min value of mask not zero, got {np.min(arr)}"
    assert np.max(arr) >= 0, f"wrong normalization, max value is not >= 0, got {np_unique(arr)}"
    assert 2 <= arr.ndim <= 3, f"expected 2D or 3D, but got {arr.ndim}"
    assert 1 <= connectivity <= 3, f"expected connectivity in [1,3], but got {connectivity}"
    connectivity = min((connectivity + 1) * 2, 8) if arr.ndim == 2 else 6 if connectivity == 1 else 18 if connectivity == 2 else 26

    labels: Sequence[int] = _to_labels(arr, label_ref)
    if include_zero:
        arr[arr == 0] = arr.max() + 1
    arr[np.isin(arr, labels, invert=True)] = 0
    cc_map, n = _connected_components(arr, connectivity=connectivity, return_N=True)
    return cc_map, n

np_connected_components_per_label

np_connected_components_per_label(arr: UINTARRAY, connectivity: int = 3, label_ref: LABEL_REFERENCE = None, include_zero: bool = False) -> dict[int, UINTARRAY]

Calculates the connected components for each label in label_ref.

Returns a dictionary mapping each label to its connected-component mask. Supports zero labels when include_zero=True.

Parameters:

Name Type Description Default
arr UINTARRAY

input arr

required
connectivity int

in range [1,3]. For 2D images, 2 and 3 is the same.

3
labels int | list[int] | None

Labels that the connected components algorithm should be applied to. If none, applies on all labels found in arr. Defaults to None.

required
include_zero bool

If true, will treat the background (0) as another label to calculate connected components from. Significantly slower! Defaults to False.

False

Returns:

Name Type Description
subreg_cc dict[int, UINTARRAY]

dict[label, cc_idx, arr], subreg_cc_N: dict[label, n_connected_components]

Source code in TPTBox/core/np_utils.py
def np_connected_components_per_label(
    arr: UINTARRAY,
    connectivity: int = 3,
    label_ref: LABEL_REFERENCE = None,
    include_zero: bool = False,
) -> dict[int, UINTARRAY]:
    """Calculates the connected components for each label in label_ref.

    Returns a dictionary mapping each label to its connected-component mask.
    Supports zero labels when ``include_zero=True``.

    Args:
        arr: input arr
        connectivity: in range [1,3]. For 2D images, 2 and 3 is the same.
        labels (int | list[int] | None, optional): Labels that the connected components algorithm should be applied to. If none, applies on all labels found in arr. Defaults to None.
        include_zero (bool): If true, will treat the background (0) as another label to calculate connected components from. Significantly slower! Defaults to False.

    Returns:
        subreg_cc: dict[label, cc_idx, arr], subreg_cc_N: dict[label, n_connected_components]
    """
    assert np.min(arr) == 0, f"min value of mask not zero, got {np.min(arr)}"
    assert np.max(arr) >= 0, f"wrong normalization, max value is not >= 0, got {np_unique(arr)}"
    assert 2 <= arr.ndim <= 3, f"expected 2D or 3D, but got {arr.ndim}"
    assert 1 <= connectivity <= 3, f"expected connectivity in [1,3], but got {connectivity}"
    connectivity = min((connectivity + 1) * 2, 8) if arr.ndim == 2 else 6 if connectivity == 1 else 18 if connectivity == 2 else 26

    present_labels = np_unique(arr)
    labels: Sequence[int] = present_labels if label_ref is None else [i for i in _to_labels(arr, label_ref) if i in present_labels]
    # if zero, map it to unused label
    if include_zero:
        zero_label = arr.max() + 1
        arr[arr == 0] = zero_label
        labels = list(labels)
        labels.append(0)
    # call connected components
    labels_out = _connected_components(arr, connectivity=connectivity, return_N=False)
    # if zero, map it back for assignment
    if include_zero:
        arr[arr == zero_label] = 0
    # assign the cc according to original label
    subreg_cc = {}
    for subreg in labels:  # type:ignore
        subreg_cc[subreg] = labels_out * (arr == subreg)

    return subreg_cc

np_filter_connected_components

np_filter_connected_components(arr: UINTARRAY, largest_k_components: int | None = None, label_ref: LABEL_REFERENCE = None, connectivity: int = 3, return_original_labels: bool = True, min_volume: float = 0, max_volume: float | None = None, removed_to_label=0, k_larges_global=False) -> UINTARRAY

Finds the largest k connected components in a given array (does NOT work with zero as label!).

Parameters:

Name Type Description Default
arr ndarray

input array

required
k int | None

finds the k-largest components. If k is None, will find all connected components and still sort them by size

required
labels int | list[int] | None

Labels that the algorithm should be applied to. If none, applies on all labels found in arr. Defaults to None.

required
connectivity int

in range [1,3]. For 2D images, 2 and 3 is the same.

3
return_original_labels bool

If set to False, will label the components from 1 to k. Defaults to True

True
k_larges_global bool

If true largest_k_components is filterd over all labels instead of each lable individualy

False

Returns: np.ndarray: array with the largest k connected components

Source code in TPTBox/core/np_utils.py
def np_filter_connected_components(
    arr: UINTARRAY,
    largest_k_components: int | None = None,
    label_ref: LABEL_REFERENCE = None,
    connectivity: int = 3,
    return_original_labels: bool = True,
    min_volume: float = 0,
    max_volume: float | None = None,
    removed_to_label=0,
    k_larges_global=False,
) -> UINTARRAY:
    """Finds the largest k connected components in a given array (does NOT work with zero as label!).

    Args:
        arr (np.ndarray): input array
        k (int | None): finds the k-largest components. If k is None, will find all connected components and still sort them by size
        labels (int | list[int] | None, optional): Labels that the algorithm should be applied to. If none, applies on all labels found in arr. Defaults to None.
        connectivity: in range [1,3]. For 2D images, 2 and 3 is the same.
        return_original_labels (bool): If set to False, will label the components from 1 to k. Defaults to True
        k_larges_global(bool): If true largest_k_components is filterd over all labels instead of each lable individualy
    Returns:
        np.ndarray: array with the largest k connected components
    """
    assert largest_k_components is None or largest_k_components > 0
    assert 2 <= arr.ndim <= 3, f"expected 2D or 3D, but got {arr.ndim}"
    assert 1 <= connectivity <= 3, f"expected connectivity in [1,3], but got {connectivity}"
    if arr.ndim == 2:  # noqa: SIM108
        connectivity = min(connectivity * 2, 8)  # 1:4, 2:8, 3:8
    else:
        connectivity = 6 if connectivity == 1 else 18 if connectivity == 2 else 26

    arr2 = arr.copy()
    labels: Sequence[int] = _to_labels(arr, label_ref)
    arr2[np.isin(arr2, labels, invert=True)] = 0  # type:ignore

    labels_out, n = _connected_components(arr2, connectivity=connectivity, return_N=True)
    largest_k_components_org = largest_k_components
    if largest_k_components is None:
        largest_k_components = n
    assert largest_k_components is not None
    largest_k_components = min(largest_k_components, n)  # if k > N, will return all N but still sorted
    label_volume_pairs = [
        (i, vol) for i, vol in np_volume(labels_out).items() if vol >= min_volume and (max_volume is None or vol <= max_volume)
    ]
    largest_k_components = min(largest_k_components, len(label_volume_pairs))
    label_volume_pairs.sort(key=lambda x: x[1], reverse=True)

    if len(labels) == 1 or label_volume_pairs == largest_k_components or largest_k_components_org is None or k_larges_global:
        preserve: list[int] = [x[0] for x in label_volume_pairs[:largest_k_components]]
    else:
        counter = dict.fromkeys(labels, 0)
        preserve = []
        for preserve_label, _ in label_volume_pairs:
            idx = arr[labels_out == preserve_label].max()
            if counter.get(idx, largest_k_components + 1) <= largest_k_components_org:
                preserve.append(preserve_label)
                counter[idx] += 1
                # print("add perserve", idx)
            if counter.get(idx, largest_k_components + 1) == largest_k_components_org:
                del counter[idx]
                # print("del perserve", idx)
            if len(counter) == 0:
                break
    cc_out = np.zeros(arr.shape, dtype=arr.dtype)
    i = 1
    for preserve_label in preserve:
        cc_out[labels_out == preserve_label] = i
        i += 1

    if return_original_labels:
        arr *= cc_out > 0  # to get original labels
        if removed_to_label != 0:
            arr[np.logical_and(labels_out != 0, arr == 0)] = removed_to_label
        return arr
    if removed_to_label != 0:
        arr[np.logical_and(labels_out != 0, arr == 0)] = removed_to_label

    return cc_out

np_get_connected_components_center_of_mass

np_get_connected_components_center_of_mass(arr: UINTARRAY, label: int, connectivity: int = 3, sort_by_axis: int | None = None) -> list[COORDINATE]

Calculates the center of mass of each connected component of a given label.

Parameters:

Name Type Description Default
arr UINTARRAY

Input label array.

required
label int

The label whose connected components are analysed.

required
connectivity int

Connectivity for connected components in range [1, 3]. Defaults to 3.

3
sort_by_axis int | None

If not None, the returned list is sorted in ascending order of the coordinate along this axis. Defaults to None.

None

Returns:

Type Description
list[COORDINATE]

list[COORDINATE]: List of (x, y, z) center-of-mass coordinates, one per connected component of label.

Source code in TPTBox/core/np_utils.py
def np_get_connected_components_center_of_mass(
    arr: UINTARRAY, label: int, connectivity: int = 3, sort_by_axis: int | None = None
) -> list[COORDINATE]:
    """Calculates the center of mass of each connected component of a given label.

    Args:
        arr (UINTARRAY): Input label array.
        label (int): The label whose connected components are analysed.
        connectivity (int, optional): Connectivity for connected components in range
            [1, 3]. Defaults to 3.
        sort_by_axis (int | None, optional): If not None, the returned list is sorted
            in ascending order of the coordinate along this axis. Defaults to None.

    Returns:
        list[COORDINATE]: List of (x, y, z) center-of-mass coordinates, one per
            connected component of ``label``.
    """
    # Per label argument true/false
    #
    if sort_by_axis is not None:
        assert 0 <= sort_by_axis <= len(arr.shape) - 1, f"sort_by_axis {sort_by_axis} invalid with an array of shape {arr.shape}"  # type:ignore
    subreg_cc = np_connected_components_per_label(
        arr.copy(),
        connectivity=connectivity,
        label_ref=label,
    )
    coms = list(np_center_of_mass(subreg_cc[label]).values()) if label in subreg_cc else []

    if sort_by_axis is not None:
        coms.sort(key=lambda a: a[sort_by_axis])
    return coms

np_translate_to_center_of_array

np_translate_to_center_of_array(image: ndarray) -> np.ndarray

Moves the nonzero values of an array so its center of mass is in the center of the array shape.

Parameters:

Name Type Description Default
image ndarray

input array

required

Returns:

Type Description
ndarray

np.ndarray: array of the same shape translated to the center

Source code in TPTBox/core/np_utils.py
def np_translate_to_center_of_array(image: np.ndarray) -> np.ndarray:
    """Moves the nonzero values of an array so its center of mass is in the center of the array shape.

    Args:
        image: input array

    Returns:
        np.ndarray: array of the same shape translated to the center
    """
    shape = image.shape
    shape_center = tuple(i // 2 for i in shape)
    com = center_of_mass(image)
    translation_vector: tuple[int, int] | tuple[int, int, int] = tuple(np.int32(np.asarray(shape_center) - np.asarray(com)))
    return np_translate_arr(image, translation_vector)

np_translate_arr

np_translate_arr(arr: ndarray, translation_vector: tuple[int, int] | tuple[int, int, int]) -> np.ndarray

Translates values of an input array according to a 2D or 3D translation vector. Values that would be shifted beyond the boundary are removed!

Parameters:

Name Type Description Default
arr ndarray

input array

required
translation_vector tuple[int, int] | tuple[int, int, int]

vector to translated the array with (2D or 3D)

required

Returns:

Type Description
ndarray

np.ndarray: the translated array

Examples:

>>> a = np.array([[0, 1, 0], [0, 2, 1], [1, 0, 0]])
>>> b = np_translate_arr(a, translation_vector=(1, 0))
>>> print(b)
>>> [[0 0 0],[0 1 0],[0 2 1]]
Source code in TPTBox/core/np_utils.py
def np_translate_arr(arr: np.ndarray, translation_vector: tuple[int, int] | tuple[int, int, int]) -> np.ndarray:
    """Translates values of an input array according to a 2D or 3D translation vector. Values that would be shifted beyond the boundary are removed!

    Args:
        arr: input array
        translation_vector: vector to translated the array with (2D or 3D)

    Returns:
        np.ndarray: the translated array

    Examples:
        >>> a = np.array([[0, 1, 0], [0, 2, 1], [1, 0, 0]])
        >>> b = np_translate_arr(a, translation_vector=(1, 0))
        >>> print(b)
        >>> [[0 0 0],[0 1 0],[0 2 1]]
    """
    assert 2 <= len(translation_vector) <= 3, f"expected translation vector to be 2D or 3D, but got {translation_vector}"
    assert len(arr.shape) == len(translation_vector), f"mismatch dimensions, got arr shape {arr.shape} and vector {translation_vector}"
    arr_translated = np.zeros_like(arr)
    if len(translation_vector) == 3:
        tx, ty, tz = translation_vector  # type:ignore
        H, W, D = arr.shape  # noqa: N806
        arr_translated[
            max(tx, 0) : H + min(tx, 0),
            max(ty, 0) : W + min(ty, 0),
            max(tz, 0) : D + min(tz, 0),
        ] = arr[
            -min(tx, 0) : H - max(tx, 0),
            -min(ty, 0) : W - max(ty, 0),
            -min(tz, 0) : D - max(tz, 0),
        ]
    else:
        tx, ty = translation_vector  # type:ignore
        H, W = arr.shape  # noqa: N806
        arr_translated[max(tx, 0) : H + min(tx, 0), max(ty, 0) : W + min(ty, 0)] = arr[
            -min(tx, 0) : H - max(tx, 0), -min(ty, 0) : W - max(ty, 0)
        ]
    return arr_translated

np_fill_holes

np_fill_holes(arr: ndarray, label_ref: LABEL_REFERENCE = None, slice_wise_dim: int | None = None, use_crop: bool = True, pbar: bool = False) -> np.ndarray

Fills holes in segmentations.

Parameters:

Name Type Description Default
arr ndarray

Input segmentation array

required
labels int | list[int] | None

Labels that the hole-filling should be applied to. If none, applies on all labels found in arr. Defaults to None.

required
slice_wise_dim int | None

If the input is 3D, the specified dimension here cna be used for 2D slice-wise filling. Defaults to None.

None

Returns:

Type Description
ndarray

np.ndarray: The array with holes filled

Source code in TPTBox/core/np_utils.py
def np_fill_holes(
    arr: np.ndarray,
    label_ref: LABEL_REFERENCE = None,
    slice_wise_dim: int | None = None,
    use_crop: bool = True,
    pbar: bool = False,
) -> np.ndarray:
    """Fills holes in segmentations.

    Args:
        arr (np.ndarray): Input segmentation array
        labels (int | list[int] | None, optional): Labels that the hole-filling should be applied to. If none, applies on all labels found in arr. Defaults to None.
        slice_wise_dim (int | None, optional): If the input is 3D, the specified dimension here cna be used for 2D slice-wise filling. Defaults to None.

    Returns:
        np.ndarray: The array with holes filled
    """
    assert 2 <= arr.ndim <= 3
    assert arr.ndim == 3 or slice_wise_dim is None, "slice_wise_dim set but array is 3D"
    labels: Sequence[int] = _to_labels(arr, label_ref)

    if use_crop:
        gcrop = np_bbox_binary(arr, px_dist=1, raise_error=False)
        arrc = arr[gcrop]
    else:
        arrc = arr
    if pbar:
        from tqdm import tqdm

        labels = tqdm(labels, desc="fill_holes")  # type: ignore
    for l in labels:  # type:ignore
        arr_l = arrc == l
        # arr_l = np_extract_label(arr_l, l)
        if use_crop:
            crop = np_bbox_binary(arr_l, px_dist=1, raise_error=False)
            arr_lc = arr_l[crop]
        else:
            arr_lc = arr_l
        if slice_wise_dim is None:
            filled = _fill(arr_lc).astype(arr.dtype)
        else:
            assert 0 <= slice_wise_dim <= arr.ndim - 1, f"slice_wise_dim needs to be in range [0, {arr.ndim - 1}]"
            filled = np.swapaxes(arr_lc.copy(), 0, slice_wise_dim)
            filled = np.stack([_fill(x).astype(arr.dtype) for x in filled])
            filled = np.swapaxes(filled, 0, slice_wise_dim)
        filled[filled != 0] = l
        if use_crop:
            arrc[crop][arrc[crop] == 0] = filled[arrc[crop] == 0]
        else:
            arrc[arrc == 0] = filled[arrc == 0]

    if use_crop:
        arr[gcrop] = arrc
    else:
        arr = arrc
    return arr

np_smooth_gaussian_labelwise

np_smooth_gaussian_labelwise(arr: UINTARRAY, label_to_smooth: list[int] | int, label_weights: dict[int, float] | None = None, sigma: float = 3.0, radius: int = 6, truncate: int = 4, boundary_mode: str = 'nearest', dilate_prior: int = 0, dilate_connectivity: int = 3, dilate_channelwise: bool = False, smooth_background: bool = True, background_threshold: float | None = None) -> UINTARRAY

Smooth selected labels in a segmentation mask using Gaussian filtering, leaving other labels unaffected.

Internal Description
  1. Ensures label(s) to be smoothed are present in the segmentation.
  2. Optionally dilates specified labels prior to smoothing (if dilate_prior > 0).
  3. Iterates over each label:
    • Creates a binary mask for that label.
    • Applies Gaussian smoothing only if the label is in label_to_smooth.
    • Optionally applies a weight from label_weights.
  4. Adds background as a separate smoothed or fixed mask depending on smooth_background.
  5. Stacks all label probability-like maps and computes a new segmentation by taking the argmax over the stacked array, i.e., the label with the highest value wins per voxel.
  6. Replaces the indices in the argmax map with the original label values to preserve semantics.

Parameters:

Name Type Description Default
arr UINTARRAY

Input Segmentation Mask Array

required
label_to_smooth list[int] | int

Which labels to smooth in the mask. Every other label will be untouched

required
sigma float

Sigma of the gaussian blur. Defaults to 3.0.

3.0
radius int

Radius of the gaussian blur. Defaults to 6.

6
truncate int

Truncate of the gaussian blur. Defaults to 4.

4
boundary_mode str

Boundary Mode of the gaussian blur. Defaults to "nearest".

'nearest'
dilate_prior int

Dilate this many voxels before starting the gaussian blur algorithm. Defaults to 0.

0
dilate_connectivity int

Connectivity of the dilation process, if applied. Defaults to 3.

3
smooth_background bool

If true, will also smooth the background. If False, the background voxels stay the same and the segmentation cannot add voxels. Defaults to True.

True

Returns:

Name Type Description
UINTARRAY UINTARRAY

The resulting smoothed array of the segmentation (with the same labels as the input)

Source code in TPTBox/core/np_utils.py
def np_smooth_gaussian_labelwise(
    arr: UINTARRAY,
    label_to_smooth: list[int] | int,
    label_weights: dict[int, float] | None = None,
    sigma: float = 3.0,
    radius: int = 6,
    truncate: int = 4,
    boundary_mode: str = "nearest",
    dilate_prior: int = 0,
    dilate_connectivity: int = 3,
    dilate_channelwise: bool = False,
    smooth_background: bool = True,
    background_threshold: float | None = None,
) -> UINTARRAY:
    """Smooth selected labels in a segmentation mask using Gaussian filtering, leaving other labels unaffected.

    Internal Description:
        1. Ensures label(s) to be smoothed are present in the segmentation.
        2. Optionally dilates specified labels prior to smoothing (if `dilate_prior > 0`).
        3. Iterates over each label:
            - Creates a binary mask for that label.
            - Applies Gaussian smoothing only if the label is in `label_to_smooth`.
            - Optionally applies a weight from `label_weights`.
        4. Adds background as a separate smoothed or fixed mask depending on `smooth_background`.
        5. Stacks all label probability-like maps and computes a new segmentation by taking the
           `argmax` over the stacked array, i.e., the label with the highest value wins per voxel.
        6. Replaces the indices in the argmax map with the original label values to preserve semantics.

    Args:
        arr (UINTARRAY): Input Segmentation Mask Array
        label_to_smooth (list[int] | int): Which labels to smooth in the mask. Every other label will be untouched
        sigma (float, optional): Sigma of the gaussian blur. Defaults to 3.0.
        radius (int, optional): Radius of the gaussian blur. Defaults to 6.
        truncate (int, optional): Truncate of the gaussian blur. Defaults to 4.
        boundary_mode (str, optional): Boundary Mode of the gaussian blur. Defaults to "nearest".
        dilate_prior (int, optional): Dilate this many voxels before starting the gaussian blur algorithm. Defaults to 0.
        dilate_connectivity (int, optional): Connectivity of the dilation process, if applied. Defaults to 3.
        smooth_background (bool, optional): If true, will also smooth the background. If False, the background voxels stay the same and the segmentation cannot add voxels. Defaults to True.

    Returns:
        UINTARRAY: The resulting smoothed array of the segmentation (with the same labels as the input)
    """
    if label_weights is None:
        label_weights = {}
    sem_labels = np_unique_withoutzero(arr)

    if isinstance(label_to_smooth, int):
        label_to_smooth = [label_to_smooth]

    if dilate_prior > 0 and not dilate_channelwise:
        arr = np_dilate_msk(
            arr,
            n_pixel=dilate_prior,
            label_ref=label_to_smooth,
            connectivity=dilate_connectivity,
        )

    smoothed_arrs = []
    sem_labels_plus_background = sem_labels.copy()
    sem_labels_plus_background.append(0)
    for l in sem_labels_plus_background[:-1]:
        arr_l = (arr == l).astype(float)
        if dilate_prior > 0 and dilate_channelwise:
            arr_l = np_dilate_msk(
                arr_l,
                n_pixel=dilate_prior,
                label_ref=1,
                connectivity=dilate_connectivity,
            )
        if l in label_to_smooth:
            arr_l = gaussian_filter(
                arr_l,
                sigma=sigma,
                mode=boundary_mode,
                truncate=truncate,
                radius=radius,
            )
        if l in label_weights:
            arr_l = np.multiply(arr_l, label_weights[l])
        smoothed_arrs.append(arr_l)

    # background
    arr_bg = np_extract_label(arr, label=0, inplace=False).astype(float)
    if smooth_background:
        arr_bg = gaussian_filter(
            arr_bg,
            sigma=sigma,
            mode=boundary_mode,
            truncate=truncate,
            radius=radius,
        )
    if 0 in label_weights:
        arr_bg = np.multiply(arr_bg, label_weights[0])
    smoothed_arrs.append(arr_bg)

    arr_stack = np.stack(smoothed_arrs)
    seg_arr_smoothed = np.argmax(arr_stack, axis=0)
    seg_arr_s = seg_arr_smoothed.copy()

    if background_threshold is not None:
        seg_arr_smoothed[seg_arr_smoothed < background_threshold] = len(sem_labels_plus_background) - 1  # background label

    for idx, l in enumerate(sem_labels_plus_background):
        seg_arr_s[seg_arr_smoothed == idx] = l

    return seg_arr_s

np_calc_convex_hull

np_calc_convex_hull(arr: INTARRAY, axis: int | None = None, verbose: bool = False) -> INTARRAY

Calculates the convex hull of a given array and returns a filled binary mask.

Parameters:

Name Type Description Default
arr INTARRAY

Input integer array (non-zero voxels define the point set).

required
axis int | None

If given, computes the convex hull slice-by-slice along this axis (remaining dimensions must be at least 2D). If None, computes the hull over the full 2D or 3D volume. Defaults to None.

None
verbose bool

If True, prints warnings for degenerate cases. Defaults to False.

False

Returns:

Name Type Description
INTARRAY INTARRAY

Binary array of the same shape as arr with 1 inside the convex hull and 0 outside.

Source code in TPTBox/core/np_utils.py
def np_calc_convex_hull(
    arr: INTARRAY,
    axis: int | None = None,
    verbose: bool = False,
) -> INTARRAY:
    """Calculates the convex hull of a given array and returns a filled binary mask.

    Args:
        arr (INTARRAY): Input integer array (non-zero voxels define the point set).
        axis (int | None, optional): If given, computes the convex hull slice-by-slice
            along this axis (remaining dimensions must be at least 2D). If None,
            computes the hull over the full 2D or 3D volume. Defaults to None.
        verbose (bool, optional): If True, prints warnings for degenerate cases.
            Defaults to False.

    Returns:
        INTARRAY: Binary array of the same shape as ``arr`` with 1 inside the
            convex hull and 0 outside.
    """
    n_dims = arr.ndim
    if axis is None:
        assert 2 <= n_dims <= 3, f"If axis is none, array must be 2- or 3-dimensional, but got {n_dims} with shape {arr.shape}"
        return _convex_hull(arr, verbose=verbose)[0]
    else:
        assert 3 <= n_dims <= 4, f"If axis is given, the array must be 3- or 4-dimensional, but got {n_dims} with shape {arr.shape}"
        assert 0 <= axis <= n_dims, f"Specified axis must be in range of dimension, but got axis={axis} and n_dims={n_dims}"
        h = arr * 0
        for i in range(arr.shape[axis]):
            slices = _select_axis_dynamically(axis=axis, index=i, n_dims=n_dims)
            if np_is_empty(arr[slices]):
                continue
            try:
                convex_hull_slice = _convex_hull(arr[slices], verbose=verbose)[0].astype(arr.dtype)
                h[slices] += convex_hull_slice
            except Exception:
                pass
        return h

np_calc_boundary_mask

np_calc_boundary_mask(img: ndarray, threshold: float = 0, adjust_intensity_for_ct=False) -> np.ndarray

Calculate a boundary mask based on the input image.

Parameters: - img (NII): The image used to create the boundary mask. - threshold(float): threshold - adjust_intensity_for_ct (bool): If True, adjust the image intensity by adding 1000.

Returns: NII: A segmentation of the boundary.

This function takes a NII and generates a boundary mask by marking specific regions. The intensity of the image can be adjusted for CT scans by adding 1000. The boundary mask is created by initializing corner points and using an "infect" process to mark neighboring points. The boundary mask is initiated with zeros, and specific boundary points are set to 1. The "infect" function iteratively marks neighboring points in the mask. The process starts from the initial points and corner points of the image. The infection process continues until the infect_list is empty. The resulting boundary mask is modified by subtracting 1 from all non-zero values and setting the remaining zeros to 2. The sum of the boundary mask values is printed before returning the modified NII object.

Source code in TPTBox/core/np_utils.py
def np_calc_boundary_mask(
    img: np.ndarray,
    threshold: float = 0,
    adjust_intensity_for_ct=False,
) -> np.ndarray:
    """Calculate a boundary mask based on the input image.

    Parameters:
    - img (NII): The image used to create the boundary mask.
    - threshold(float): threshold
    - adjust_intensity_for_ct (bool): If True, adjust the image intensity by adding 1000.

    Returns:
    NII: A segmentation of the boundary.


    This function takes a NII and generates a boundary mask by marking specific regions.
    The intensity of the image can be adjusted for CT scans by adding 1000. The boundary mask is created by initializing
    corner points and using an "infect" process to mark neighboring points. The boundary mask is initiated with
    zeros, and specific boundary points are set to 1. The "infect" function iteratively marks neighboring points in the mask.
    The process starts from the initial points and corner points of the image. The infection process continues until the
    infect_list is empty. The resulting boundary mask is modified by subtracting 1 from all non-zero values and setting
    the remaining zeros to 2. The sum of the boundary mask values is printed before returning the modified NII object.

    """
    if adjust_intensity_for_ct:
        img = img + 1000
    boundary = img.copy()
    boundary[boundary > threshold] = 2
    boundary[boundary <= threshold] = 0
    infect_list = []

    def infect(x, y, z):
        if any(
            [
                x < 0,
                y < 0,
                z < 0,
                x == boundary.shape[0],
                y == boundary.shape[1],
                z == boundary.shape[2],
            ]
        ):
            return
        if boundary[x, y, z] == 0:
            boundary[x, y, z] = 1
            for a, b, c in [
                (1, 0, 0),
                (-1, 0, 0),
                (0, 1, 0),
                (0, -1, 0),
                (0, 1, 0),
                (0, 0, 1),
                (0, 0, -1),
            ]:
                infect_list.append((x + a, y + b, z + c))

        else:
            pass

    infect(0, 0, 0)
    infect(boundary.shape[0] - 1, 0, 0)
    infect(0, boundary.shape[1] - 1, 0)
    infect(boundary.shape[0] - 1, boundary.shape[1] - 1, 0)

    infect(0, 0, boundary.shape[2] - 1)
    infect(boundary.shape[0] - 1, 0, boundary.shape[2] - 1)
    infect(0, boundary.shape[1] - 1, boundary.shape[2] - 1)
    infect(boundary.shape[0] - 1, boundary.shape[1] - 1, boundary.shape[2] - 1)
    while len(infect_list) != 0:
        infect(*infect_list.pop())
    boundary[boundary == 0] = 2
    boundary -= 1
    print(boundary.sum())
    return boundary

np_betti_numbers

np_betti_numbers(img: ndarray, verbose=False) -> tuple[int, int, int]

Calculates the Betti numbers B0, B1, and B2 for a 3D binary image.

Uses the Euler characteristic to derive the counts from connected-component analysis of both the foreground (26-connected) and background (6-connected).

B0: Number of connected components. B1: Number of loops / handles (tunnels). B2: Number of fully enclosed voids.

Code prototyped by Martin Menten (Imperial College), Suprosanna Shit (TU Munich), and Johannes C. Paetzold (Imperial College). Source: https://github.com/CoWBenchmark/TopCoW_Eval_Metrics/blob/master/metric_functions.py

Parameters:

Name Type Description Default
img ndarray

3D binary array (values must be 0 or 1).

required
verbose bool

If True, prints the Betti numbers. Defaults to False.

False

Returns:

Type Description
tuple[int, int, int]

tuple[int, int, int]: (B0, B1, B2) — connected components, holes, and voids.

Source code in TPTBox/core/np_utils.py
def np_betti_numbers(img: np.ndarray, verbose=False) -> tuple[int, int, int]:
    """Calculates the Betti numbers B0, B1, and B2 for a 3D binary image.

    Uses the Euler characteristic to derive the counts from connected-component
    analysis of both the foreground (26-connected) and background (6-connected).

    B0: Number of connected components.
    B1: Number of loops / handles (tunnels).
    B2: Number of fully enclosed voids.

    Code prototyped by Martin Menten (Imperial College), Suprosanna Shit (TU Munich),
    and Johannes C. Paetzold (Imperial College).
    Source: https://github.com/CoWBenchmark/TopCoW_Eval_Metrics/blob/master/metric_functions.py

    Args:
        img (np.ndarray): 3D binary array (values must be 0 or 1).
        verbose (bool, optional): If True, prints the Betti numbers. Defaults to False.

    Returns:
        tuple[int, int, int]: ``(B0, B1, B2)`` — connected components, holes, and voids.
    """
    # make sure the image is 3D (for connectivity settings)
    assert len(img.shape) == 3
    # 6 or 26 neighborhoods are defined for 3D images,
    # (connectivity 1 and 3, respectively)
    # If foreground is 26-connected, then background is 6-connected, and conversely
    N6 = 1  # noqa: N806
    N26 = 3  # noqa: N806
    # important first step is to
    # pad the image with background (0) around the border!
    padded = np.pad(img, pad_width=1)
    # make sure the image is binary with
    assert set(np_unique(padded)).issubset({0, 1})
    # calculate the Betti numbers B0, B2
    # then use Euler characteristic to get B1
    # get the label connected regions for foreground
    _, b0 = _label(padded, return_num=True, connectivity=N26)  # 26 neighborhoods for foreground
    euler_char_num = _euler_number(padded, connectivity=N26)  # 26 neighborhoods for foreground
    # get the label connected regions for background
    _, b2 = _label(1 - padded, return_num=True, connectivity=N6)  # 6 neighborhoods for background
    # NOTE: need to subtract 1 from b2
    b2 -= 1
    b1 = b0 + b2 - euler_char_num  # Euler number = Betti:0 - Betti:1 + Betti:2
    if verbose:
        print(f"Betti number: b0 = {b0}, b1 = {b1}, b2 = {b2}")
    return b0, b1, b2

np_calc_overlapping_labels

np_calc_overlapping_labels(reference_arr: ndarray, prediction_arr: ndarray) -> list[tuple[int, int]]

Calculates the pairs of labels that are overlapping in at least one voxel (fast).

Parameters:

Name Type Description Default
prediction_arr ndarray

Numpy array containing the prediction labels.

required
reference_arr ndarray

Numpy array containing the reference labels.

required
ref_labels list[int]

List of unique reference labels.

required

Returns:

Type Description
list[tuple[int, int]]

list[tuple[int, int]]: List of tuples of labels that overlap in at least one voxel

Source code in TPTBox/core/np_utils.py
def np_calc_overlapping_labels(
    reference_arr: np.ndarray,
    prediction_arr: np.ndarray,
) -> list[tuple[int, int]]:
    """Calculates the pairs of labels that are overlapping in at least one voxel (fast).

    Args:
        prediction_arr (np.ndarray): Numpy array containing the prediction labels.
        reference_arr (np.ndarray): Numpy array containing the reference labels.
        ref_labels (list[int]): List of unique reference labels.

    Returns:
        list[tuple[int, int]]: List of tuples of labels that overlap in at least one voxel
    """
    ref_labels = np_unique_withoutzero(reference_arr)
    overlap_arr = prediction_arr.astype(np.uint32)
    max_ref = max(ref_labels) + 1
    overlap_arr = (overlap_arr * max_ref) + reference_arr
    overlap_arr[reference_arr == 0] = 0
    # overlapping_indices = [(i % (max_ref), i // (max_ref)) for i in np.unique(overlap_arr) if i > max_ref]
    # instance_pairs = [(reference_arr, prediction_arr, i, j) for i, j in overlapping_indices]

    # (ref, pred)
    return [(int(i % (max_ref)), int(i // (max_ref))) for i in np_unique(overlap_arr) if i > max_ref]

np_normalize_to_range

np_normalize_to_range(arr: ndarray, min_value: float = 0, max_value: float = 1500) -> np.ndarray

Normalize array values so the minimum maps to min_value and the maximum is capped at max_value.

Parameters:

Name Type Description Default
arr ndarray

Input array to normalize. Modified in-place.

required
min_value float

Target minimum value after shift. Defaults to 0.

0
max_value float

Upper bound; if the original maximum exceeds this, values are scaled down proportionally. Defaults to 1500.

1500

Returns:

Type Description
ndarray

np.ndarray: The normalized array (same object as input, modified in-place).

Source code in TPTBox/core/np_utils.py
def np_normalize_to_range(arr: np.ndarray, min_value: float = 0, max_value: float = 1500) -> np.ndarray:
    """Normalize array values so the minimum maps to ``min_value`` and the maximum is capped at ``max_value``.

    Args:
        arr (np.ndarray): Input array to normalize. Modified in-place.
        min_value (float, optional): Target minimum value after shift. Defaults to 0.
        max_value (float, optional): Upper bound; if the original maximum exceeds
            this, values are scaled down proportionally. Defaults to 1500.

    Returns:
        np.ndarray: The normalized array (same object as input, modified in-place).
    """
    mi, ma = arr.min(), arr.max()
    arr += -mi + min_value  # min = 0
    max_value2 = ma
    self_dtype = arr.dtype
    if max_value2 > max_value:
        arr *= max_value / max_value2
        arr = arr.astype(self_dtype)
    return arr

np_fill_holes_global_with_majority_voting

np_fill_holes_global_with_majority_voting(arr: UINTARRAY, connectivity: int = 3, inplace: bool = False, verbose=False) -> UINTARRAY

Fill holes globaly (across labels) and resolves inter-label conflicts with majority voting of neighbors.

Parameters:

Name Type Description Default
arr UINTARRAY

input array

required
connectivity int

connectivity of connected components of the holes. Defaults to 3.

3
inplace bool

Defaults to False.

False

Returns:

Name Type Description
arr UINTARRAY

Array with all global holes filled

Source code in TPTBox/core/np_utils.py
def np_fill_holes_global_with_majority_voting(arr: UINTARRAY, connectivity: int = 3, inplace: bool = False, verbose=False) -> UINTARRAY:  # noqa: ARG001
    """Fill holes globaly (across labels) and resolves inter-label conflicts with majority voting of neighbors.

    Args:
        arr (UINTARRAY): input array
        connectivity (int, optional): connectivity of connected components of the holes. Defaults to 3.
        inplace (bool, optional): Defaults to False.

    Returns:
        arr: Array with all global holes filled
    """
    arr_c = arr if inplace else arr.copy()
    # Fill simple holes
    arr_c = np_fill_holes(arr_c)
    # Make binary mask
    seg_nii_bin = arr_c.copy()
    seg_nii_bin[seg_nii_bin != 0] = 1
    seg_nii_bin_fh = np_fill_holes(seg_nii_bin.copy())
    # Only proceed if there were holes filled
    if np_volume(seg_nii_bin_fh)[1] > np_volume(seg_nii_bin)[1]:
        # go for each fill holed CC
        seg_nii_bin_fh[seg_nii_bin == 1] = 0
        cc_msk, _ = np_connected_components(seg_nii_bin_fh, connectivity=connectivity)
        # delete voxels that are already labeled
        cc_msk[seg_nii_bin != 0] = 0
        seg_nii_new = np_map_labels_based_on_majority_label_mask_overlap(
            cc_msk,
            label_mask=arr_c,
            dilate_pixel=1,
            inplace=False,
        )
        arr_c[seg_nii_new != 0] = seg_nii_new[seg_nii_new != 0]
    return arr_c

np_map_labels_based_on_majority_label_mask_overlap

np_map_labels_based_on_majority_label_mask_overlap(arr: UINTARRAY, label_mask: ndarray, label_ref: LABEL_REFERENCE = None, dilate_pixel: int = 1, inplace: bool = False, no_match_label=0) -> UINTARRAY

Relabels all individual labels from input array to the majority labels of a given label_mask.

Parameters:

Name Type Description Default
arr UINTARRAY

input array to be relabeled

required
label_mask ndarray

the mask from which to pull the target labels.

required
labels int | list[int] | None

Which labels in the input to process. Defaults to None.

required
dilate_pixel int

If true, will dilate the input to calculate the overlap. Defaults to 1.

1
inplace bool

Defaults to False.

False

Returns:

Name Type Description
arr UINTARRAY

input array with all labels in labels relabeled

Source code in TPTBox/core/np_utils.py
def np_map_labels_based_on_majority_label_mask_overlap(
    arr: UINTARRAY,
    label_mask: np.ndarray,
    label_ref: LABEL_REFERENCE = None,
    dilate_pixel: int = 1,
    inplace: bool = False,
    no_match_label=0,
) -> UINTARRAY:
    """Relabels all individual labels from input array to the majority labels of a given label_mask.

    Args:
        arr (UINTARRAY): input array to be relabeled
        label_mask (np.ndarray): the mask from which to pull the target labels.
        labels (int | list[int] | None, optional): Which labels in the input to process. Defaults to None.
        dilate_pixel (int, optional): If true, will dilate the input to calculate the overlap. Defaults to 1.
        inplace (bool, optional): Defaults to False.

    Returns:
        arr: input array with all labels in labels relabeled
    """
    arr_cc = arr if inplace else arr.copy()

    labels = _to_labels(arr, label_ref)

    label_list: list[int] = [label for label in np_unique(arr) if label in labels]

    for label in label_list:
        arr_l = np_extract_label(arr, label, inplace=False)
        arr_ld = np_dilate_msk(arr_l.copy(), n_pixel=dilate_pixel, label_ref=1, connectivity=3) if dilate_pixel > 0 else arr_l
        # crop speed up by factor 6
        crop = np_bbox_binary(arr_ld, px_dist=0, raise_error=False)

        mult = label_mask[crop] * arr_ld[crop]
        label_ref, count = np.unique(mult, return_counts=True)
        if 0 in label_ref:
            label_ref = label_ref[1:]
            count = count[1:]
        try:
            new_label = label_ref[np.argmax(count)]
        except ValueError:  # should never happen if called from np_fill_holes_global_with_majority_voting
            new_label = no_match_label
        arr_cc[arr_l != 0] = new_label
    return arr_cc