Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions egomimic/eval/eval_latent.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def __init__(
compute_pca_umap: bool = True,
pca_n_components: int = 50,
pca_for_downstream: bool = False,
compute_knn: bool = False,
knn_k: int = 5,
emit_combined: bool = True,
color_by: str = "embodiment", # "embodiment" or "hash"
):
Expand Down Expand Up @@ -95,6 +97,8 @@ def __init__(
f"color_by must be 'embodiment' or 'hash', got {color_by!r}"
)
self.color_by = color_by
self.compute_knn = compute_knn
self.knn_k = knn_k
self._layer_keys = {} # layer_name -> list[np.ndarray (B, S, D)]
self._row_hashes = [] # one entry per sample (replicated by S at write time)
self._row_embodiments = []
Expand Down Expand Up @@ -414,6 +418,19 @@ def on_validation_end(self):
):
tsne_3d = self._tsne_3d(features_for_reduction)

knn_result = None
if self.compute_knn:
with _timed(f"{layer_name} | KNN-{self.knn_k} ({keys.shape[0]} rows)"):
knn_result = self._knn_accuracy(
features_for_reduction, embs, k=self.knn_k
)
logger.info(
"[KNN] %s: accuracy=%.4f (+/- %.4f)",
layer_name,
knn_result["accuracy"],
knn_result.get("std", 0.0),
)

csv_path = os.path.join(out_dir, f"{layer_name}.csv")
keys_pt_path = os.path.join(out_dir, f"{layer_name}_keys.pt")
with _timed(f"{layer_name} | write_csv ({keys.shape[0]} rows)"):
Expand Down Expand Up @@ -599,6 +616,38 @@ def _pca(features: np.ndarray, n_components: int):
np.float32
), reducer.explained_variance_ratio_.astype(np.float32)

@staticmethod
def _knn_accuracy(features: np.ndarray, labels: list, k: int = 5) -> dict:
"""KNN classification accuracy predicting embodiment from latent features.
Returns dict with accuracy (mean 5-fold CV) and per-fold scores.
Uses cuML GPU KNN when available, sklearn CPU otherwise."""
n = features.shape[0]
if n < k + 1:
return {"accuracy": 0.0, "per_fold": []}
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y = le.fit_transform(labels)
if len(np.unique(y)) < 2:
return {"accuracy": 1.0, "per_fold": [1.0]}
X = features.astype(np.float32)
try:
from cuml.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=k, output_type="numpy")
except ImportError:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=k)
from sklearn.model_selection import cross_val_score

scores = cross_val_score(knn, X, y, cv=min(5, n), scoring="accuracy")
return {
"accuracy": float(scores.mean()),
"std": float(scores.std()),
"per_fold": scores.tolist(),
}

@staticmethod
def _write_csv(
path,
Expand Down
6 changes: 6 additions & 0 deletions egomimic/hydra_configs/evaluator/eval_latent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,9 @@ emit_combined: true
# good for cotrain_pi_latent_random. Override to "hash" for the pairs config
# so each of the 4 specific episodes gets its own color.
color_by: embodiment

# KNN accuracy (embodiment classification from latent features).
# Uses cuML GPU KNN when available, sklearn CPU otherwise.
# Reports 5-fold cross-validation accuracy per layer slice.
compute_knn: false
knn_k: 5
Loading