Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ jobs:
export PATH=$SYSTEMDS_ROOT/bin:$PATH
cd src/main/python
./tests/federated/runFedTest.sh

- name: Cache Torch Hub
if: ${{ matrix.test_mode == 'scuro' }}
id: torch-cache
Expand All @@ -158,6 +158,8 @@ jobs:
env:
TORCH_HOME: ${{ github.workspace }}/.torch
run: |
df -h
exit
( while true; do echo "."; sleep 25; done ) &
KA=$!
pip install --upgrade pip wheel setuptools
Expand Down
11 changes: 10 additions & 1 deletion src/main/python/systemds/scuro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,13 @@
OverlappingSplitIndices,
)
from systemds.scuro.representations.elmo import ELMoRepresentation

from systemds.scuro.representations.dimensionality_reduction import (
DimensionalityReduction,
)
from systemds.scuro.representations.mlp_averaging import MLPAveraging
from systemds.scuro.representations.mlp_learned_dim_reduction import (
MLPLearnedDimReduction,
)

__all__ = [
"BaseLoader",
Expand Down Expand Up @@ -202,4 +208,7 @@
"ELMoRepresentation",
"SentenceBoundarySplitIndices",
"OverlappingSplitIndices",
"MLPAveraging",
"MLPLearnedDimReduction",
"DimensionalityReduction",
]
28 changes: 28 additions & 0 deletions src/main/python/systemds/scuro/drsearch/operator_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class Registry:
_fusion_operators = []
_text_context_operators = []
_video_context_operators = []
_dimensionality_reduction_operators = {}

def __new__(cls):
if not cls._instance:
Expand Down Expand Up @@ -73,6 +74,18 @@ def add_context_operator(self, context_operator, modality_type):
def add_fusion_operator(self, fusion_operator):
self._fusion_operators.append(fusion_operator)

def add_dimensionality_reduction_operator(
self, dimensionality_reduction_operator, modality_type
):
if not isinstance(modality_type, list):
modality_type = [modality_type]
for m_type in modality_type:
if not m_type in self._dimensionality_reduction_operators.keys():
self._dimensionality_reduction_operators[m_type] = []
self._dimensionality_reduction_operators[m_type].append(
dimensionality_reduction_operator
)

def get_representations(self, modality: ModalityType):
return self._representations[modality]

Expand All @@ -86,6 +99,9 @@ def get_not_self_contained_representations(self, modality: ModalityType):
def get_context_operators(self, modality_type):
return self._context_operators[modality_type]

def get_dimensionality_reduction_operators(self, modality_type):
return self._dimensionality_reduction_operators[modality_type]

def get_fusion_operators(self):
return self._fusion_operators

Expand Down Expand Up @@ -127,6 +143,18 @@ def decorator(cls):
return decorator


def register_dimensionality_reduction_operator(modality_type):
"""
Decorator to register a dimensionality reduction operator.
"""

def decorator(cls):
Registry().add_dimensionality_reduction_operator(cls, modality_type)
return cls

return decorator


def register_context_operator(modality_type):
"""
Decorator to register a context operator.
Expand Down
5 changes: 5 additions & 0 deletions src/main/python/systemds/scuro/drsearch/representation_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
AggregatedRepresentation,
)
from systemds.scuro.representations.context import Context
from systemds.scuro.representations.dimensionality_reduction import (
DimensionalityReduction,
)
from systemds.scuro.utils.identifier import get_op_id, get_node_id

from collections import OrderedDict
Expand Down Expand Up @@ -195,6 +198,8 @@ def execute_node(node_id: str, task) -> TransformedModality:
# It's a unimodal operation
if isinstance(node_operation, Context):
result = input_mods[0].context(node_operation)
elif isinstance(node_operation, DimensionalityReduction):
result = input_mods[0].dimensionality_reduction(node_operation)
elif isinstance(node_operation, AggregatedRepresentation):
result = node_operation.transform(input_mods[0])
elif isinstance(node_operation, UnimodalRepresentation):
Expand Down
43 changes: 39 additions & 4 deletions src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import multiprocessing as mp
from typing import List, Any
from functools import lru_cache
from systemds.scuro.drsearch.task import Task
from systemds.scuro import ModalityType
from systemds.scuro.drsearch.ranking import rank_by_tradeoff
from systemds.scuro.drsearch.task import PerformanceMeasure
Expand Down Expand Up @@ -92,6 +91,12 @@ def _get_not_self_contained_reps(self, modality_type):
def _get_context_operators(self, modality_type):
return self.operator_registry.get_context_operators(modality_type)

@lru_cache(maxsize=32)
def _get_dimensionality_reduction_operators(self, modality_type):
return self.operator_registry.get_dimensionality_reduction_operators(
modality_type
)

def store_results(self, file_name=None):
if file_name is None:
import time
Expand Down Expand Up @@ -185,9 +190,7 @@ def _process_modality(self, modality, parallel):

external_cache = LRUCache(max_size=32)
for dag in dags:
representations = dag.execute(
[modality], task=self.tasks[0], external_cache=external_cache
) # TODO: dynamic task selection
representations = dag.execute([modality], external_cache=external_cache)
node_id = list(representations.keys())[-1]
node = dag.get_node_by_id(node_id)
if node.operation is None:
Expand Down Expand Up @@ -303,6 +306,27 @@ def _evaluate_local(self, modality, local_results, dag, combination=None):
scores, modality, task.model.name, end - start, combination, dag
)

def add_dimensionality_reduction_operators(self, builder, current_node_id):
dags = []
modality_type = (
builder.get_node(current_node_id).operation().output_modality_type
)

if modality_type is not ModalityType.EMBEDDING:
return None

dimensionality_reduction_operators = (
self._get_dimensionality_reduction_operators(modality_type)
)
for dimensionality_reduction_op in dimensionality_reduction_operators:
dimensionality_reduction_node_id = builder.create_operation_node(
dimensionality_reduction_op,
[current_node_id],
dimensionality_reduction_op().get_current_parameters(),
)
dags.append(builder.build(dimensionality_reduction_node_id))
return dags

def _build_modality_dag(
self, modality: Modality, operator: Any
) -> List[RepresentationDag]:
Expand All @@ -316,6 +340,12 @@ def _build_modality_dag(
current_node_id = rep_node_id
dags.append(builder.build(current_node_id))

dimensionality_reduction_dags = self.add_dimensionality_reduction_operators(
builder, current_node_id
)
if dimensionality_reduction_dags is not None:
dags.extend(dimensionality_reduction_dags)

if operator.needs_context:
context_operators = self._get_context_operators(modality.modality_type)
for context_op in context_operators:
Expand All @@ -339,6 +369,11 @@ def _build_modality_dag(
[context_node_id],
operator.get_current_parameters(),
)
dimensionality_reduction_dags = self.add_dimensionality_reduction_operators(
builder, context_rep_node_id
) # TODO: check if this is correctly using the 3d approach of the dimensionality reduction operator
if dimensionality_reduction_dags is not None:
dags.extend(dimensionality_reduction_dags)

agg_operator = AggregatedRepresentation()
context_agg_node_id = builder.create_operation_node(
Expand Down
9 changes: 9 additions & 0 deletions src/main/python/systemds/scuro/modality/transformed.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,15 @@ def context(self, context_operator):
transformed_modality.transform_time += time.time() - start
return transformed_modality

def dimensionality_reduction(self, dimensionality_reduction_operator):
transformed_modality = TransformedModality(
self, dimensionality_reduction_operator, self_contained=self.self_contained
)
start = time.time()
transformed_modality.data = dimensionality_reduction_operator.execute(self.data)
transformed_modality.transform_time += time.time() - start
return transformed_modality

def apply_representation(self, representation):
start = time.time()
new_modality = representation.transform(self)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------
import abc

import numpy as np

from systemds.scuro.modality.modality import Modality
from systemds.scuro.representations.representation import Representation


class DimensionalityReduction(Representation):
def __init__(self, name, parameters=None):
"""
Parent class for different dimensionality reduction operations
:param name: Name of the dimensionality reduction operator
"""
super().__init__(name, parameters)
self.needs_training = False

@abc.abstractmethod
def execute(self, data, labels=None):
"""
Implemented for every child class and creates a sampled representation for a given modality
:param data: data to apply the dimensionality reduction on
:param labels: labels for learned dimensionality reduction
:return: dimensionality reduced data
"""
if labels is not None:
self.execute_with_training(data, labels)
else:
self.execute(data)

def apply_representation(self, data):
"""
Implemented for every child class and creates a dimensionality reduced representation for a given modality
:param data: data to apply the representation on
:return: dimensionality reduced data
"""
raise f"Not implemented for Dimensionality Reduction Operator: {self.name}"

def execute_with_training(self, modality, task):
fusion_train_indices = task.fusion_train_indices
# Handle 3d data
data = modality.data
if (
len(np.array(modality.data).shape) == 3
and np.array(modality.data).shape[1] == 1
):
data = np.array([x.reshape(-1) for x in modality.data])
transformed_train = self.execute(
np.array(data)[fusion_train_indices], task.labels[fusion_train_indices]
)

all_other_indices = [
i for i in range(len(modality.data)) if i not in fusion_train_indices
]
transformed_other = self.apply_representation(np.array(data)[all_other_indices])

transformed_data = np.zeros((len(data), transformed_train.shape[1]))
transformed_data[fusion_train_indices] = transformed_train
transformed_data[all_other_indices] = transformed_other

return transformed_data
22 changes: 12 additions & 10 deletions src/main/python/systemds/scuro/representations/glove.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,20 @@ def transform(self, modality):
glove_embeddings = load_glove_embeddings(self.glove_path)

embeddings = []
embedding_dim = (
len(next(iter(glove_embeddings.values()))) if glove_embeddings else 100
)

for sentences in modality.data:
tokens = list(tokenize(sentences.lower()))
embeddings.append(
np.mean(
[
glove_embeddings[token]
for token in tokens
if token in glove_embeddings
],
axis=0,
)
)
token_embeddings = [
glove_embeddings[token] for token in tokens if token in glove_embeddings
]

if len(token_embeddings) > 0:
embeddings.append(np.mean(token_embeddings, axis=0))
else:
embeddings.append(np.zeros(embedding_dim, dtype=np.float32))

if self.output_file is not None:
save_embeddings(np.array(embeddings), self.output_file)
Expand Down
Loading
Loading