Source code for torchaug.ta_tensors._batch_labels

# @Copyright: CEA-LIST/DIASI/SIALV/ (2023-    )
# @Author: CEA-LIST/DIASI/SIALV/ <julien.denize@cea.fr>
# @License: CECILL-C

from __future__ import annotations

from typing import Any, List, Mapping, Optional, Sequence, Tuple, Union

import torch
from torch.utils._pytree import tree_flatten

from ._batch_concatenated_ta_tensor import _BatchConcatenatedTATensor
from ._labels import Labels


_CHECK_ATTRS = [
    "requires_grad",
    "device",
    "dtype",
]



[docs]
def convert_labels_to_batch_labels(
    labels: Sequence[Labels],
) -> BatchLabels:
    """Convert a sequence of :class:`~torchaug.ta_tensors.Labels` to a
    :class:`~torchaug.ta_tensors.BatchLabels` object.
    """
    for attr in _CHECK_ATTRS:
        for label in labels:
            if getattr(label, attr) != getattr(labels[0], attr):
                raise ValueError(f"All labels must have the same {attr} attribute.")

    labels_data = torch.cat(labels)

    samples_ranges = []
    sum_labels = 0
    for label in labels:
        samples_ranges.append((sum_labels, sum_labels + label.shape[0]))
        sum_labels += label.shape[0]

    batch_labels = BatchLabels(
        labels_data,
        samples_ranges=samples_ranges,
    )

    return batch_labels




[docs]
def convert_batch_labels_to_labels(
    batch_labels: BatchLabels,
) -> List[Labels]:
    """Convert :class:`~torchaug.ta_tensors.BatchLabels` object to a list of
    :class:`~torchaug.ta_tensors.Labels`.
    """
    samples_ranges = batch_labels.samples_ranges

    list_labels = [Labels(batch_labels[idx_start:idx_stop]) for idx_start, idx_stop in samples_ranges]
    return list_labels




[docs]
class BatchLabels(_BatchConcatenatedTATensor):
    """:class:`BatchLabels` subclass for concatenated labels.

    Useful for labels of bounding boxes or masks, where each sample can have a different number of labels.

    Args:
        data: Any data that can be turned into a tensor with :func:`torch.as_tensor`.
        dtype: Desired data type. If omitted, will be inferred from
            ``data``.
        samples_ranges: Each element is the range of the indices of the labels for each sample.
        device: Desired device. If omitted and ``data`` is a :class:`torch.Tensor`, the device is taken from
            it. Otherwise, the batch of tensor is constructed on the CPU.
        requires_grad: Whether autograd should record operations. If omitted and
            ``data`` is a :class:`Labels`, the value is taken from it. Otherwise, defaults to ``False``.

    """


[docs]
    @classmethod
    def cat(cls, labels_batches: Sequence[BatchLabels]):
        """Concatenates a sequence of :class:`~torchaug.ta_tensors.BatchLabels` along the first dimension.

        Args:
            labels_batches: A sequence of :class:`~torchaug.ta_tensors.BatchLabels` to concatenate.

        Returns:
            The concatenated :class:`~torchaug.ta_tensors.BatchLabels`.
        """
        for batch_label in labels_batches:
            if not isinstance(batch_label, BatchLabels):
                raise ValueError("All batches must be of type BatchLabels.")
            if batch_label.ndim > 1 and not batch_label.shape[1:] == labels_batches[0].shape[1:]:
                raise ValueError("All batches of tensors must have the same size.")
            for attr in _CHECK_ATTRS:
                if getattr(batch_label, attr) != getattr(labels_batches[0], attr):
                    raise ValueError(f"All batches of tensors must have the same {attr} attribute.")

        samples_ranges = []
        sum_labels = 0
        for batch_labels in labels_batches:
            for idx_start, idx_stop in batch_labels.samples_ranges:
                samples_ranges.append((idx_start + sum_labels, idx_stop + sum_labels))
            sum_labels += batch_labels.num_data

        data = torch.cat([batch_label.data for batch_label in labels_batches], 0)

        return cls(
            data,
            samples_ranges=samples_ranges,
        )


    @classmethod
    def _wrap(  # type: ignore[override]
        cls,
        tensor: torch.Tensor,
        *,
        samples_ranges: List[Tuple[int, int]],
    ) -> BatchLabels:
        batch_labels = tensor.as_subclass(cls)
        batch_labels.samples_ranges = samples_ranges
        return batch_labels

    def __new__(
        cls,
        data: Any,
        *,
        samples_ranges: List[Tuple[int, int]],
        dtype: Optional[torch.dtype] = None,
        device: Optional[Union[torch.device, str, int]] = None,
        requires_grad: Optional[bool] = None,
    ) -> BatchLabels:
        tensor = cls._to_tensor(data, dtype=dtype, device=device, requires_grad=requires_grad)

        cls._check_samples_ranges(samples_ranges, tensor)

        return cls._wrap(tensor, samples_ranges=samples_ranges)

    @classmethod
    def _wrap_output(
        cls,
        output: torch.Tensor,
        args: Sequence[Any] = (),
        kwargs: Optional[Mapping[str, Any]] = None,
    ) -> BatchLabels:
        # If there are BatchLabels instances in the output, their metadata got lost when we called
        # super().__torch_function__. We need to restore the metadata somehow, so we choose to take
        # the metadata from the first batch of tensors in the parameters.
        # This should be what we want in most cases. When it's not, it's probably a mis-use anyway, e.g.
        # something like batch_cat_tensor_dims1 + batch_cat_tensor_dims1; we don't guard against those cases.
        flat_params, _ = tree_flatten(args + (tuple(kwargs.values()) if kwargs else ()))  # type: ignore[operator]
        first_batch_labels_from_args = next(x for x in flat_params if isinstance(x, BatchLabels))

        samples_ranges = first_batch_labels_from_args.samples_ranges.copy()  # clone the list.

        if isinstance(output, torch.Tensor) and not isinstance(output, BatchLabels):
            output = BatchLabels._wrap(
                output,
                samples_ranges=samples_ranges,
            )
        elif isinstance(output, (tuple, list)):
            output = type(output)(
                BatchLabels._wrap(
                    part,
                    samples_ranges=samples_ranges,
                )
                for part in output
            )
        return output


[docs]
    def get_sample(self, idx: int) -> Labels:
        """Get the tensors for a sample in the batch.

        Args:
            idx: The index of the sample to get.

        Returns:
            The tensors for the sample.
        """
        labels = self[self.samples_ranges[idx][0] : self.samples_ranges[idx][1]]
        return Labels(labels)



[docs]
    def get_chunk(self, chunk_indices: torch.Tensor) -> BatchLabels:
        """Get a chunk of the batch of tensors.

        Args:
            chunk_indices: The indices of the chunk to get.

        Returns:
            The chunk of the batch of tensors.
        """
        chunk_samples_ranges = self._get_chunk_samples_ranges_from_chunk_indices(chunk_indices)
        data_indices = self._get_data_indices_from_chunk_indices(chunk_indices)
        return BatchLabels(
            self[data_indices],
            samples_ranges=chunk_samples_ranges,
            device=self.device,
            requires_grad=self.requires_grad,
        )



[docs]
    def update_chunk_(self, chunk: BatchLabels, chunk_indices: torch.Tensor) -> BatchLabels:
        """Update a chunk of the batch of labels.

        Args:
            chunk: The chunk update.
            chunk_indices: The indices of the chunk to update.

        Returns:
            The updated batch of labels.
        """
        return super().update_chunk_(chunk, chunk_indices)



[docs]
    def to_samples(self) -> list[Labels]:
        """Get the tensors."""
        return [self.get_sample(i).clone() for i in range(self.batch_size)]



[docs]
    @classmethod
    def masked_select(cls, labels: BatchLabels, mask: torch.Tensor) -> BatchLabels:
        """Remove labels from the batch of labels.

        Args:
            labels: The batch of labels to remove labels from.
            mask: A boolean mask to keep labels.

        Returns:
            The updated batch of labels.
        """
        old_samples_ranges = labels.samples_ranges
        data = labels.data[mask]

        neg_mask = (~mask).cpu()

        num_delete_per_sample = [
            neg_mask[idx_start:idx_stop].sum().item() for idx_start, idx_stop in old_samples_ranges
        ]

        new_samples_ranges = [
            (
                old_samples_ranges[i][0] - sum(num_delete_per_sample[:i]),
                old_samples_ranges[i][1] - sum(num_delete_per_sample[: i + 1]),
            )
            for i in range(len(old_samples_ranges))
        ]

        return cls._wrap(
            data,
            samples_ranges=new_samples_ranges,
        )