sunlabuiuc · Logiquo · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026
diff --git a/pyhealth/processors/audio_processor.py b/pyhealth/processors/audio_processor.py
@@ -134,6 +134,51 @@ def process(self, value: Union[str, Path]) -> Any:
 
         return waveform
 
+    def is_token(self) -> bool:
+        """Audio data is continuous (float-valued), not discrete tokens.
+
+        Returns:
+            False, since audio waveforms and spectrograms are continuous signals.
+        """
+        return False
+
+    def schema(self) -> tuple[str, ...]:
+        """Returns the schema of the processed audio feature.
+
+        The audio processor emits a single tensor (waveform or mel spectrogram).
+
+        Returns:
+            ("value",)
+        """
+        return ("value",)
+
+    def dim(self) -> tuple[int, ...]:
+        """Number of dimensions for the output tensor.
+
+        Returns:
+            (2,) for waveform output (channels, samples), or
+            (3,) for mel spectrogram output (channels, n_mels, time).
+        """
+        if self.n_mels is not None:
+            return (3,)
+        return (2,)
+
+    def spatial(self) -> tuple[bool, ...]:
+        """Whether each dimension of the output tensor is spatial.
+
+        For waveform (channels, samples): channels is not spatial, samples is.
+        For mel spectrogram (channels, n_mels, time): channels is not spatial,
+        n_mels and time are.
+
+        Returns:
+            Tuple of booleans for each axis.
+        """
+        if self.n_mels is not None:
+            # (channels, n_mels, time)
+            return (False, True, True)
+        # (channels, samples)
+        return (False, True)
+
     def __repr__(self) -> str:
         return (
             f"AudioProcessor(sample_rate={self.sample_rate}, "

diff --git a/pyhealth/processors/base_processor.py b/pyhealth/processors/base_processor.py
@@ -52,6 +52,57 @@ def process(self, value: Any) -> Any:
             Processed value.
         """
         pass
+
+    def is_token(self) -> bool:
+        """Returns whether the output (in particular, the value tensor) of the processor 
+        represents discrete token indices (True) or continuous values (False). This is used to 
+        determine whether to apply token-based transformations (e.g. `nn.Embedding`) or 
+        value-based augmentations (e.g. `nn.Linear`). 
+
+        Returns:
+            True if the output of the processor represents discrete token indices, False otherwise.
+        """
+        raise NotImplementedError("is_token method is not implemented for this processor.")
+
+    def schema(self) -> tuple[str, ...]:
+        """Returns the schema of the processed feature. For a processor that emits a single tensor,
+        this should just return `["value"]`. For a processor that emits a tuple of tensors, 
+        this should return a tuple of the same length as the tuple, with the semantic name of each tensor,
+        such as `["time", "value"]`, `["value", "mask"]`, etc.
+
+        Typical semantic names include:
+            - "value": the main processed tensor output of the processor
+            - "time": the time tensor output of the processor (mostly for StageNet)
+            - "mask": the mask tensor output of the processor (if applicable)
+
+        Returns:
+            Tuple of semantic names corresponding to the output of the processor.
+        """
+        raise NotImplementedError("Schema method is not implemented for this processor.")
+
+    def dim(self) -> tuple[int, ...]:
+        """Number of dimensions (`Tensor.dim()`) for each output
+        tensor, in the same order as the output tuple.
+
+        Returns:
+            Tuple of integers corresponding to the number of dimensions of each output tensor.
+        """
+        raise NotImplementedError("dim method is not implemented for this processor.")
+
+    def spatial(self) -> tuple[bool, ...]:
+        """Whether each dimension (axis) of the value tensor is spatial (i.e. corresponds to a spatial 
+        axis like time, height, width, etc.) or not. This is used to determine how to apply 
+        augmentations and other transformations that should only be applied to spatial dimensions.
+
+        E.g. for CNN or RNN features, this would help determine which dimensions to apply spatial augmentations to, 
+        and which dimensions to treat as channels or features.
+
+        Returns:
+            Tuple of booleans corresponding to whether each axis of the value tensor is spatial or not.
+        """
+        raise NotImplementedError("spatial method is not implemented for this processor.")
+
+
 
 
 class SampleProcessor(Processor):

diff --git a/pyhealth/processors/deep_nested_sequence_processor.py b/pyhealth/processors/deep_nested_sequence_processor.py
@@ -185,6 +185,21 @@ def __repr__(self):
             f"max_inner_len={self._max_inner_len})"
         )
 
+    def is_token(self) -> bool:
+        """Deep nested sequence codes are discrete token indices."""
+        return True
+
+    def schema(self) -> tuple[str, ...]:
+        return ("value",)
+
+    def dim(self) -> tuple[int, ...]:
+        """Output is a 3D tensor (groups, visits, codes)."""
+        return (3,)
+
+    def spatial(self) -> tuple[bool, ...]:
+        # Groups are not sequential; visits are temporal/spatial; codes-per-visit is an unordered set
+        return (False, True, False)
+
 
 @register_processor("deep_nested_sequence_floats")
 class DeepNestedFloatsProcessor(FeatureProcessor):
@@ -379,4 +394,19 @@ def __repr__(self):
             f"max_middle_len={self._max_middle_len}, "
             f"max_inner_len={self._max_inner_len}, "
             f"forward_fill={self.forward_fill})"
-        )
+        )
+
+    def is_token(self) -> bool:
+        """Deep nested float values are continuous, not discrete tokens."""
+        return False
+
+    def schema(self) -> tuple[str, ...]:
+        return ("value",)
+
+    def dim(self) -> tuple[int, ...]:
+        """Output is a 3D tensor (groups, visits, features)."""
+        return (3,)
+
+    def spatial(self) -> tuple[bool, ...]:
+        # Groups are not sequential; visits are temporal/spatial; features dimension is not
+        return (False, True, False)
diff --git a/pyhealth/processors/image_processor.py b/pyhealth/processors/image_processor.py
@@ -95,6 +95,40 @@ def process(self, value: Union[str, Path]) -> Any:
             img.load()  # Avoid "too many open files" errors
             return self.transform(img)
 
+    def is_token(self) -> bool:
+        """Image data is continuous (float-valued pixel intensities), not discrete tokens.
+
+        Returns:
+            False.
+        """
+        return False
+
+    def schema(self) -> tuple[str, ...]:
+        """Single tensor output.
+
+        Returns:
+            ("value",)
+        """
+        return ("value",)
+
+    def dim(self) -> tuple[int, ...]:
+        """Output tensor has 3 dimensions: (C, H, W).
+
+        Returns:
+            (3,)
+        """
+        return (3,)
+
+    def spatial(self) -> tuple[bool, ...]:
+        """Spatial axes for the output tensor (C, H, W).
+
+        Channels are not spatial; height and width are.
+
+        Returns:
+            (False, True, True)
+        """
+        return (False, True, True)
+
     def __repr__(self) -> str:
         return (
             f"ImageLoadingProcessor(image_size={self.image_size}, "

diff --git a/pyhealth/processors/label_processor.py b/pyhealth/processors/label_processor.py
@@ -40,6 +40,20 @@ def process(self, value: Any) -> torch.Tensor:
     def size(self):
         return 1
 
+    def is_token(self) -> bool:
+        """Binary labels are continuous float targets for BCE loss."""
+        return False
+
+    def schema(self) -> tuple[str, ...]:
+        return ("value",)
+
+    def dim(self) -> tuple[int, ...]:
+        """Output shape is (1,), so 1 dimension."""
+        return (1,)
+
+    def spatial(self) -> tuple[bool, ...]:
+        return (False,)
+
     def __repr__(self):
         return f"BinaryLabelProcessor(label_vocab_size={len(self.label_vocab)})"
 
@@ -72,6 +86,20 @@ def process(self, value: Any) -> torch.Tensor:
     def size(self):
         return len(self.label_vocab)
 
+    def is_token(self) -> bool:
+        """Multi-class labels are discrete token indices."""
+        return True
+
+    def schema(self) -> tuple[str, ...]:
+        return ("value",)
+
+    def dim(self) -> tuple[int, ...]:
+        """Output is a scalar tensor (dim 0)."""
+        return (0,)
+
+    def spatial(self) -> tuple[bool, ...]:
+        return ()
+
     def __repr__(self):
         return f"MultiClassLabelProcessor(label_vocab_size={len(self.label_vocab)})"
 
@@ -115,6 +143,20 @@ def process(self, value: Any) -> torch.Tensor:
     def size(self):
         return len(self.label_vocab)
 
+    def is_token(self) -> bool:
+        """Multi-label indicators are continuous float targets for BCE loss."""
+        return False
+
+    def schema(self) -> tuple[str, ...]:
+        return ("value",)
+
+    def dim(self) -> tuple[int, ...]:
+        """Output shape is (num_classes,), so 1 dimension."""
+        return (1,)
+
+    def spatial(self) -> tuple[bool, ...]:
+        return (False,)
+
     def __repr__(self):
         return f"MultiLabelProcessor(label_vocab_size={len(self.label_vocab)})"
 
@@ -131,5 +173,19 @@ def process(self, value: Any) -> torch.Tensor:
     def size(self):
         return 1
 
+    def is_token(self) -> bool:
+        """Regression labels are continuous, not discrete tokens."""
+        return False
+
+    def schema(self) -> tuple[str, ...]:
+        return ("value",)
+
+    def dim(self) -> tuple[int, ...]:
+        """Output shape is (1,), so 1 dimension."""
+        return (1,)
+
+    def spatial(self) -> tuple[bool, ...]:
+        return (False,)
+
     def __repr__(self):
         return "RegressionLabelProcessor()"
diff --git a/pyhealth/processors/nested_sequence_processor.py b/pyhealth/processors/nested_sequence_processor.py
@@ -162,6 +162,21 @@ def __repr__(self):
             f"padding={self._padding})"
         )
 
+    def is_token(self) -> bool:
+        """Nested sequence codes are discrete token indices."""
+        return True
+
+    def schema(self) -> tuple[str, ...]:
+        return ("value",)
+
+    def dim(self) -> tuple[int, ...]:
+        """Output is a 2D tensor (visits, codes_per_visit)."""
+        return (2,)
+
+    def spatial(self) -> tuple[bool, ...]:
+        # Visits (time) is spatial; codes-per-visit is an unordered set, not spatial
+        return (True, False)
+
 
 @register_processor("nested_sequence_floats")
 class NestedFloatsProcessor(FeatureProcessor):
@@ -341,3 +356,18 @@ def __repr__(self):
             f"forward_fill={self.forward_fill}, "
             f"padding={self._padding})"
         )
+
+    def is_token(self) -> bool:
+        """Nested float values are continuous, not discrete tokens."""
+        return False
+
+    def schema(self) -> tuple[str, ...]:
+        return ("value",)
+
+    def dim(self) -> tuple[int, ...]:
+        """Output is a 2D tensor (visits, features)."""
+        return (2,)
+
+    def spatial(self) -> tuple[bool, ...]:
+        # Visits (time) is spatial; features dimension is not
+        return (True, False)
diff --git a/pyhealth/processors/sequence_processor.py b/pyhealth/processors/sequence_processor.py
@@ -71,5 +71,19 @@ def add(self, tokens: set[str]):
     def size(self):
         return len(self.code_vocab)
 
+    def is_token(self) -> bool:
+        """Sequence codes are discrete token indices."""
+        return True
+
+    def schema(self) -> tuple[str, ...]:
+        return ("value",)
+
+    def dim(self) -> tuple[int, ...]:
+        """Output is a 1D tensor of code indices."""
+        return (1,)
+
+    def spatial(self) -> tuple[bool, ...]:
+        return (True,)
+
     def __repr__(self):
         return f"SequenceProcessor(code_vocab_size={len(self.code_vocab)})"