Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions pyhealth/processors/audio_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,51 @@ def process(self, value: Union[str, Path]) -> Any:

return waveform

def is_token(self) -> bool:
"""Audio data is continuous (float-valued), not discrete tokens.

Returns:
False, since audio waveforms and spectrograms are continuous signals.
"""
return False

def schema(self) -> tuple[str, ...]:
"""Returns the schema of the processed audio feature.

The audio processor emits a single tensor (waveform or mel spectrogram).

Returns:
("value",)
"""
return ("value",)

def dim(self) -> tuple[int, ...]:
"""Number of dimensions for the output tensor.

Returns:
(2,) for waveform output (channels, samples), or
(3,) for mel spectrogram output (channels, n_mels, time).
"""
if self.n_mels is not None:
return (3,)
return (2,)

def spatial(self) -> tuple[bool, ...]:
"""Whether each dimension of the output tensor is spatial.

For waveform (channels, samples): channels is not spatial, samples is.
For mel spectrogram (channels, n_mels, time): channels is not spatial,
n_mels and time are.

Returns:
Tuple of booleans for each axis.
"""
if self.n_mels is not None:
# (channels, n_mels, time)
return (False, True, True)
# (channels, samples)
return (False, True)

def __repr__(self) -> str:
return (
f"AudioProcessor(sample_rate={self.sample_rate}, "
Expand Down
51 changes: 51 additions & 0 deletions pyhealth/processors/base_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,57 @@ def process(self, value: Any) -> Any:
Processed value.
"""
pass

def is_token(self) -> bool:
"""Returns whether the output (in particular, the value tensor) of the processor
represents discrete token indices (True) or continuous values (False). This is used to
determine whether to apply token-based transformations (e.g. `nn.Embedding`) or
value-based augmentations (e.g. `nn.Linear`).

Returns:
True if the output of the processor represents discrete token indices, False otherwise.
"""
raise NotImplementedError("is_token method is not implemented for this processor.")

def schema(self) -> tuple[str, ...]:
"""Returns the schema of the processed feature. For a processor that emits a single tensor,
this should just return `["value"]`. For a processor that emits a tuple of tensors,
this should return a tuple of the same length as the tuple, with the semantic name of each tensor,
such as `["time", "value"]`, `["value", "mask"]`, etc.

Typical semantic names include:
- "value": the main processed tensor output of the processor
- "time": the time tensor output of the processor (mostly for StageNet)
- "mask": the mask tensor output of the processor (if applicable)

Returns:
Tuple of semantic names corresponding to the output of the processor.
"""
raise NotImplementedError("Schema method is not implemented for this processor.")

def dim(self) -> tuple[int, ...]:
"""Number of dimensions (`Tensor.dim()`) for each output
tensor, in the same order as the output tuple.

Returns:
Tuple of integers corresponding to the number of dimensions of each output tensor.
"""
raise NotImplementedError("dim method is not implemented for this processor.")

def spatial(self) -> tuple[bool, ...]:
"""Whether each dimension (axis) of the value tensor is spatial (i.e. corresponds to a spatial
axis like time, height, width, etc.) or not. This is used to determine how to apply
augmentations and other transformations that should only be applied to spatial dimensions.

E.g. for CNN or RNN features, this would help determine which dimensions to apply spatial augmentations to,
and which dimensions to treat as channels or features.

Returns:
Tuple of booleans corresponding to whether each axis of the value tensor is spatial or not.
"""
raise NotImplementedError("spatial method is not implemented for this processor.")




class SampleProcessor(Processor):
Expand Down
32 changes: 31 additions & 1 deletion pyhealth/processors/deep_nested_sequence_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,21 @@ def __repr__(self):
f"max_inner_len={self._max_inner_len})"
)

def is_token(self) -> bool:
"""Deep nested sequence codes are discrete token indices."""
return True

def schema(self) -> tuple[str, ...]:
return ("value",)

def dim(self) -> tuple[int, ...]:
"""Output is a 3D tensor (groups, visits, codes)."""
return (3,)

def spatial(self) -> tuple[bool, ...]:
# Groups are not sequential; visits are temporal/spatial; codes-per-visit is an unordered set
return (False, True, False)


@register_processor("deep_nested_sequence_floats")
class DeepNestedFloatsProcessor(FeatureProcessor):
Expand Down Expand Up @@ -379,4 +394,19 @@ def __repr__(self):
f"max_middle_len={self._max_middle_len}, "
f"max_inner_len={self._max_inner_len}, "
f"forward_fill={self.forward_fill})"
)
)

def is_token(self) -> bool:
"""Deep nested float values are continuous, not discrete tokens."""
return False

def schema(self) -> tuple[str, ...]:
return ("value",)

def dim(self) -> tuple[int, ...]:
"""Output is a 3D tensor (groups, visits, features)."""
return (3,)

def spatial(self) -> tuple[bool, ...]:
# Groups are not sequential; visits are temporal/spatial; features dimension is not
return (False, True, False)
34 changes: 34 additions & 0 deletions pyhealth/processors/image_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,40 @@ def process(self, value: Union[str, Path]) -> Any:
img.load() # Avoid "too many open files" errors
return self.transform(img)

def is_token(self) -> bool:
"""Image data is continuous (float-valued pixel intensities), not discrete tokens.

Returns:
False.
"""
return False

def schema(self) -> tuple[str, ...]:
"""Single tensor output.

Returns:
("value",)
"""
return ("value",)

def dim(self) -> tuple[int, ...]:
"""Output tensor has 3 dimensions: (C, H, W).

Returns:
(3,)
"""
return (3,)

def spatial(self) -> tuple[bool, ...]:
"""Spatial axes for the output tensor (C, H, W).

Channels are not spatial; height and width are.

Returns:
(False, True, True)
"""
return (False, True, True)

def __repr__(self) -> str:
return (
f"ImageLoadingProcessor(image_size={self.image_size}, "
Expand Down
56 changes: 56 additions & 0 deletions pyhealth/processors/label_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,20 @@ def process(self, value: Any) -> torch.Tensor:
def size(self):
return 1

def is_token(self) -> bool:
"""Binary labels are continuous float targets for BCE loss."""
return False

def schema(self) -> tuple[str, ...]:
return ("value",)

def dim(self) -> tuple[int, ...]:
"""Output shape is (1,), so 1 dimension."""
return (1,)

def spatial(self) -> tuple[bool, ...]:
return (False,)

def __repr__(self):
return f"BinaryLabelProcessor(label_vocab_size={len(self.label_vocab)})"

Expand Down Expand Up @@ -72,6 +86,20 @@ def process(self, value: Any) -> torch.Tensor:
def size(self):
return len(self.label_vocab)

def is_token(self) -> bool:
"""Multi-class labels are discrete token indices."""
return True

def schema(self) -> tuple[str, ...]:
return ("value",)

def dim(self) -> tuple[int, ...]:
"""Output is a scalar tensor (dim 0)."""
return (0,)

def spatial(self) -> tuple[bool, ...]:
return ()

def __repr__(self):
return f"MultiClassLabelProcessor(label_vocab_size={len(self.label_vocab)})"

Expand Down Expand Up @@ -115,6 +143,20 @@ def process(self, value: Any) -> torch.Tensor:
def size(self):
return len(self.label_vocab)

def is_token(self) -> bool:
"""Multi-label indicators are continuous float targets for BCE loss."""
return False

def schema(self) -> tuple[str, ...]:
return ("value",)

def dim(self) -> tuple[int, ...]:
"""Output shape is (num_classes,), so 1 dimension."""
return (1,)

def spatial(self) -> tuple[bool, ...]:
return (False,)

def __repr__(self):
return f"MultiLabelProcessor(label_vocab_size={len(self.label_vocab)})"

Expand All @@ -131,5 +173,19 @@ def process(self, value: Any) -> torch.Tensor:
def size(self):
return 1

def is_token(self) -> bool:
"""Regression labels are continuous, not discrete tokens."""
return False

def schema(self) -> tuple[str, ...]:
return ("value",)

def dim(self) -> tuple[int, ...]:
"""Output shape is (1,), so 1 dimension."""
return (1,)

def spatial(self) -> tuple[bool, ...]:
return (False,)

def __repr__(self):
return "RegressionLabelProcessor()"
30 changes: 30 additions & 0 deletions pyhealth/processors/nested_sequence_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,21 @@ def __repr__(self):
f"padding={self._padding})"
)

def is_token(self) -> bool:
"""Nested sequence codes are discrete token indices."""
return True

def schema(self) -> tuple[str, ...]:
return ("value",)

def dim(self) -> tuple[int, ...]:
"""Output is a 2D tensor (visits, codes_per_visit)."""
return (2,)

def spatial(self) -> tuple[bool, ...]:
# Visits (time) is spatial; codes-per-visit is an unordered set, not spatial
return (True, False)


@register_processor("nested_sequence_floats")
class NestedFloatsProcessor(FeatureProcessor):
Expand Down Expand Up @@ -341,3 +356,18 @@ def __repr__(self):
f"forward_fill={self.forward_fill}, "
f"padding={self._padding})"
)

def is_token(self) -> bool:
"""Nested float values are continuous, not discrete tokens."""
return False

def schema(self) -> tuple[str, ...]:
return ("value",)

def dim(self) -> tuple[int, ...]:
"""Output is a 2D tensor (visits, features)."""
return (2,)

def spatial(self) -> tuple[bool, ...]:
# Visits (time) is spatial; features dimension is not
return (True, False)
14 changes: 14 additions & 0 deletions pyhealth/processors/sequence_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,19 @@ def add(self, tokens: set[str]):
def size(self):
return len(self.code_vocab)

def is_token(self) -> bool:
"""Sequence codes are discrete token indices."""
return True

def schema(self) -> tuple[str, ...]:
return ("value",)

def dim(self) -> tuple[int, ...]:
"""Output is a 1D tensor of code indices."""
return (1,)

def spatial(self) -> tuple[bool, ...]:
return (True,)

def __repr__(self):
return f"SequenceProcessor(code_vocab_size={len(self.code_vocab)})"
Loading