diff --git a/__pycache__/face_recog.cpython-312.pyc b/__pycache__/face_recog.cpython-312.pyc new file mode 100644 index 0000000..8e8370f Binary files /dev/null and b/__pycache__/face_recog.cpython-312.pyc differ diff --git a/__pycache__/is_important.cpython-312.pyc b/__pycache__/is_important.cpython-312.pyc new file mode 100644 index 0000000..bb255f8 Binary files /dev/null and b/__pycache__/is_important.cpython-312.pyc differ diff --git a/enroll_face.py b/enroll_face.py new file mode 100644 index 0000000..ae6d4c0 --- /dev/null +++ b/enroll_face.py @@ -0,0 +1,178 @@ +""" +enroll_face.py — capture reference images for face recognition. + +Usage: + python enroll_face.py --name Krish # webcam capture + python enroll_face.py --name Krish --source path/to/photos/ # import folder + +Webcam controls: + SPACE — capture current frame + Q — quit (or auto-quits after --count frames are saved) +""" + +import argparse +import os +import shutil +import sys +import threading +import cv2 +from facenet_pytorch import MTCNN +from PIL import Image +import numpy as np + +FACES_DIR = "faces" +DEFAULT_COUNT = 20 + + +def save_from_folder(name: str, source_dir: str): + out_dir = os.path.join(FACES_DIR, name) + os.makedirs(out_dir, exist_ok=True) + exts = (".jpg", ".jpeg", ".png", ".bmp", ".webp") + files = [f for f in os.listdir(source_dir) if f.lower().endswith(exts)] + if not files: + print(f"No image files found in {source_dir}") + sys.exit(1) + + mtcnn = MTCNN(keep_all=False, min_face_size=40) + saved = 0 + for fname in files: + src = os.path.join(source_dir, fname) + img = Image.open(src).convert("RGB") + face = mtcnn(img) + if face is None: + print(f" skip (no face): {fname}") + continue + dst = os.path.join(out_dir, f"{name}_{saved:04d}.jpg") + shutil.copy2(src, dst) + saved += 1 + print(f" saved: {dst}") + + print(f"\nDone — {saved}/{len(files)} images saved to {out_dir}/") + + +def capture_from_webcam(name: str, count: int): + out_dir = os.path.join(FACES_DIR, name) + os.makedirs(out_dir, exist_ok=True) + + print("Loading face detector...") + mtcnn = MTCNN(keep_all=False, min_face_size=40) + print("Done. Opening webcam...") + + cap = cv2.VideoCapture(0) + if not cap.isOpened(): + print("Cannot open camera.") + sys.exit(1) + + print(f"\nWebcam open — saving to: {out_dir}/") + print(f"Target: {count} images") + print("Controls: SPACE = capture | Q = quit\n") + + # Detection state shared between threads + det_lock = threading.Lock() + det_boxes = [] # list of (x1,y1,x2,y2) + det_face_found = False + detecting = threading.Event() + + latest_frame = [None] + frame_lock = threading.Lock() + + def detect_loop(): + nonlocal det_boxes, det_face_found + while not stop.is_set(): + detecting.wait() + detecting.clear() + with frame_lock: + f = latest_frame[0] + if f is None: + continue + pil = Image.fromarray(cv2.cvtColor(f, cv2.COLOR_BGR2RGB)) + boxes, probs = mtcnn.detect(pil) + found_boxes = [] + if boxes is not None: + for box, prob in zip(boxes, probs): + if prob is not None and prob >= 0.85: + found_boxes.append((int(box[0]), int(box[1]), int(box[2]), int(box[3]))) + with det_lock: + det_boxes = found_boxes + det_face_found = len(found_boxes) > 0 + + stop = threading.Event() + det_thread = threading.Thread(target=detect_loop, daemon=True) + det_thread.start() + + saved = 0 + frame_idx = 0 + + while saved < count: + ret, frame = cap.read() + if not ret: + continue + + frame_idx += 1 + + # Feed a new frame to the detector every 3 frames (keeps it busy without flooding) + if frame_idx % 3 == 0 and not detecting.is_set(): + with frame_lock: + latest_frame[0] = frame.copy() + detecting.set() + + # Draw last known detection results (never blocks display) + display = frame.copy() + with det_lock: + boxes_now = list(det_boxes) + face_ok = det_face_found + + for (x1, y1, x2, y2) in boxes_now: + cv2.rectangle(display, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.putText(display, "face", (x1, y1 - 6), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) + + bar = "#" * saved + "-" * (count - saved) + cv2.putText(display, f"[{bar}] {saved}/{count}", (10, 24), + cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 0), 2) + hint = "FACE DETECTED — press SPACE" if face_ok else "No face — move closer or adjust lighting" + hint_color = (0, 255, 0) if face_ok else (0, 100, 255) + cv2.putText(display, hint, (10, 52), + cv2.FONT_HERSHEY_SIMPLEX, 0.55, hint_color, 2) + + cv2.imshow(f"Enroll: {name}", display) + + key = cv2.waitKey(16) & 0xFF # ~60 fps display + if key == ord("q"): + print("Quit.") + break + if key == ord(" "): + with det_lock: + ok = det_face_found + if not ok: + print(" No face detected — skipping, reposition and try again.") + continue + dst = os.path.join(out_dir, f"{name}_{saved:04d}.jpg") + cv2.imwrite(dst, frame) + saved += 1 + print(f" [{saved}/{count}] saved: {dst}") + + stop.set() + cap.release() + cv2.destroyAllWindows() + print(f"\nDone — {saved} image(s) saved to {out_dir}/") + if saved < 5: + print("Tip: fewer than 5 images may reduce accuracy. Run again to add more.") + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--name", required=True, help="Person's name (e.g. Krish)") + ap.add_argument("--source", default=None, help="Import from folder instead of webcam") + ap.add_argument("--count", type=int, default=DEFAULT_COUNT, + help=f"Number of webcam captures (default {DEFAULT_COUNT})") + args = ap.parse_args() + + if args.source: + save_from_folder(args.name, args.source) + else: + capture_from_webcam(args.name, args.count) + + +if __name__ == "__main__": + main() diff --git a/face_recog.py b/face_recog.py new file mode 100644 index 0000000..1ec4cd3 --- /dev/null +++ b/face_recog.py @@ -0,0 +1,303 @@ +import os +import cv2 +import torch +import numpy as np +from PIL import Image +from collections import deque, defaultdict +from facenet_pytorch import MTCNN, InceptionResnetV1 + + +# ── Simple per-face tracker for temporal voting ─────────────────────────────── +class _Track: + """Remembers the last `window` recognition results for one face position.""" + MATCH_PX = 80 # max center-distance to consider same track + + def __init__(self, center, window=10): + self.center = center # (x, y) + self.history = deque(maxlen=window) + + def update(self, center, name): + self.center = center + self.history.append(name) + + def voted_name(self, min_ratio=0.60): + """Return name only if it won >= min_ratio of recent frames.""" + named = [n for n in self.history if n is not None] + if not named: + return None + counts = defaultdict(int) + for n in named: + counts[n] += 1 + best, best_n = max(counts.items(), key=lambda x: x[1]) + if best_n / len(self.history) >= min_ratio: + return best + return None + + def dist(self, center): + return ((self.center[0] - center[0]) ** 2 + + (self.center[1] - center[1]) ** 2) ** 0.5 + + +class FaceRecognizer: + """ + Identifies people in YOLO 'person' detections by matching faces + against a library of reference images in faces/{Name}/*.jpg. + + Guardrails applied: + - High similarity threshold (default 0.68) + - Minimum margin above threshold (default 0.08) — match must clearly win + - Temporal voting across last 10 frames — name only sticks if seen in + >= 60% of recent frames, preventing single-frame false positives + + Usage: + rec = FaceRecognizer(faces_dir="faces", device="cuda") + updated_dets = rec.recognize_in_frame(frame_bgr, yolo_dets) + """ + + def __init__(self, faces_dir="faces", device="cpu", + sim_threshold=0.68, margin=0.08, + vote_window=10, vote_ratio=0.60): + self.device = device + self.sim_threshold = sim_threshold + self.margin = margin # must beat threshold by at least this + self.vote_window = vote_window + self.vote_ratio = vote_ratio + self.faces_dir = faces_dir + + self.mtcnn = MTCNN( + image_size=160, margin=20, keep_all=True, + min_face_size=40, device=device, post_process=True, + ) + self.resnet = InceptionResnetV1(pretrained="vggface2").eval().to(device) + + self.known_names: list[str] = [] + self.known_embeddings: torch.Tensor | None = None + self._tracks: list[_Track] = [] + + if os.path.isdir(faces_dir): + self._load_known_faces() + else: + print(f"[FaceRecog] '{faces_dir}' not found — recognition disabled.") + + # ------------------------------------------------------------------ + def _load_known_faces(self): + names, embeddings = [], [] + + for name in sorted(os.listdir(self.faces_dir)): + person_dir = os.path.join(self.faces_dir, name) + if not os.path.isdir(person_dir): + continue + count = 0 + for fname in os.listdir(person_dir): + if not fname.lower().endswith((".jpg", ".jpeg", ".png")): + continue + fpath = os.path.join(person_dir, fname) + try: + img = Image.open(fpath).convert("RGB") + face = self.mtcnn(img) + if face is None: + continue + if face.ndim == 3: + face = face.unsqueeze(0) + with torch.no_grad(): + emb = self.resnet(face[:1].to(self.device)).cpu() + embeddings.append(emb) + names.append(name) + count += 1 + except Exception as exc: + print(f"[FaceRecog] skip {fname}: {exc}") + if count: + print(f"[FaceRecog] {name}: {count} embedding(s)") + + if embeddings: + self.known_names = names + self.known_embeddings = torch.cat(embeddings, dim=0) + self._build_centroids() + print(f"[FaceRecog] Ready — {len(names)} embedding(s) for: {sorted(set(names))}") + if len(set(names)) < 2: + print("[FaceRecog] WARNING: only one person enrolled. " + "False positives are likely — run enroll_face.py for others too.") + else: + print("[FaceRecog] No usable face images found.") + + # ------------------------------------------------------------------ + def _build_centroids(self): + """ + Average all embeddings per person into one centroid vector, then + compute a per-person acceptance radius from how tightly the enrolled + images cluster around that centroid. + + Rejection rule: incoming similarity < (mean - radius_k * std) + → classified as 'unknown' even if it's the best match. + """ + per_person: dict[str, list[torch.Tensor]] = defaultdict(list) + for name, emb in zip(self.known_names, self.known_embeddings): + per_person[name].append(emb) + + self._centroid_names: list[str] = [] + centroid_list: list[torch.Tensor] = [] + self._reject_below: dict[str, float] = {} # name -> min acceptable similarity + + for name, embs in sorted(per_person.items()): + stack = torch.stack(embs) + centroid = stack.mean(0) + centroid = centroid / centroid.norm().clamp(min=1e-8) + centroid_list.append(centroid) + self._centroid_names.append(name) + + # Cosine similarity of each enrolled image to its centroid + stack_n = stack / stack.norm(dim=1, keepdim=True).clamp(min=1e-8) + sims = (stack_n @ centroid).tolist() + mean_s = sum(sims) / len(sims) + var_s = sum((s - mean_s) ** 2 for s in sims) / max(len(sims) - 1, 1) + std_s = var_s ** 0.5 + + # Reject anything below mean - 2*std (covers ~97% of genuine matches) + reject_below = mean_s - 2.0 * std_s + self._reject_below[name] = reject_below + + print(f"[FaceRecog] '{name}': {len(embs)} image(s), " + f"cluster sim {mean_s:.3f}±{std_s:.3f}, " + f"reject below {reject_below:.3f}") + + self._centroids = torch.stack(centroid_list) # (num_people, 512) + + # ------------------------------------------------------------------ + def _get_or_create_track(self, center) -> _Track: + best, best_d = None, float("inf") + for t in self._tracks: + d = t.dist(center) + if d < best_d: + best_d, best = d, t + if best is not None and best_d <= _Track.MATCH_PX: + return best + t = _Track(center, window=self.vote_window) + self._tracks.append(t) + return t + + def _prune_tracks(self, active_centers): + """Drop tracks whose last known position is far from any current face.""" + if not active_centers: + self._tracks.clear() + return + self._tracks = [ + t for t in self._tracks + if any(t.dist(c) <= _Track.MATCH_PX for c in active_centers) + ] + + # ------------------------------------------------------------------ + def recognize_in_frame(self, frame_bgr: np.ndarray, yolo_dets: list) -> list: + """ + Returns yolo_dets with 'person' labels replaced by the recognised + name, or kept as 'person' if unknown / not detected / vote not yet + confident enough. + """ + if not self.known_names: + return yolo_dets + + pil_img = Image.fromarray(cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)) + + # ── 1. Detect faces ─────────────────────────────────────────── + boxes, probs = self.mtcnn.detect(pil_img) + if boxes is None: + self._prune_tracks([]) + return yolo_dets + + # ── 2. Build face crops + embeddings ───────────────────────── + h, w = frame_bgr.shape[:2] + face_tensors, face_boxes, face_centers = [], [], [] + + for box, prob in zip(boxes, probs): + if prob is None or prob < 0.90: + continue + x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3]) + x1, y1 = max(0, x1), max(0, y1) + x2, y2 = min(w, x2), min(h, y2) + if x2 - x1 < 20 or y2 - y1 < 20: + continue + crop = pil_img.crop((x1, y1, x2, y2)).resize((160, 160)) + t = torch.from_numpy(np.array(crop)).permute(2, 0, 1).float() / 255.0 + t = (t - 0.5) / 0.5 + face_tensors.append(t) + face_boxes.append((x1, y1, x2, y2)) + face_centers.append(((x1 + x2) / 2.0, (y1 + y2) / 2.0)) + + self._prune_tracks(face_centers) + + if not face_tensors: + return yolo_dets + + batch = torch.stack(face_tensors).to(self.device) + with torch.no_grad(): + embeddings = self.resnet(batch).cpu() + + # ── 3. Centroid similarity + guardrails ─────────────────────── + # Compare each face against per-person centroids (not individual images). + # This means the margin gap is always between two *different people*, + # making it a real discriminative comparison. + emb_n = embeddings / embeddings.norm(dim=1, keepdim=True).clamp(min=1e-8) + # _centroids are already L2-normalised + sims = emb_n @ self._centroids.T # (M faces, num_people) + + face_voted: list[str | None] = [] + num_people = len(self._centroid_names) + + for mi, center in enumerate(face_centers): + row = sims[mi] # (num_people,) + sorted_sims, sorted_idx = row.sort(descending=True) + best_sim = sorted_sims[0].item() + best_name = self._centroid_names[sorted_idx[0].item()] + + # Guardrail A — flat threshold + if best_sim < self.sim_threshold: + raw_name = None + + # Guardrail B — must fall within this person's enrolled cluster radius + elif best_sim < self._reject_below.get(best_name, 0.0): + raw_name = None # outside cluster → unknown + + # Guardrail C — margin vs second-best *person* (only meaningful with 2+) + elif num_people >= 2: + second_sim = sorted_sims[1].item() + gap = best_sim - second_sim + raw_name = best_name if gap >= self.margin else None + + else: + raw_name = best_name + + # Guardrail C — temporal vote + track = self._get_or_create_track(center) + track.update(center, raw_name) + face_voted.append(track.voted_name(min_ratio=self.vote_ratio)) + + # ── 4. Associate faces with YOLO person boxes ───────────────── + updated = [] + for det in yolo_dets: + label, conf, px1, py1, px2, py2 = det + if label != "person": + updated.append(det) + continue + + best_name: str | None = None + best_area = 0.0 + for fi, (fx1, fy1, fx2, fy2) in enumerate(face_boxes): + fc_x = (fx1 + fx2) / 2.0 + fc_y = (fy1 + fy2) / 2.0 + if px1 <= fc_x <= px2 and py1 <= fc_y <= py2: + area = (fx2 - fx1) * (fy2 - fy1) + if area > best_area: + best_area = area + best_name = face_voted[fi] + + # best_name=None means face detected but rejected → label "unknown" + # no face found in person box → keep "person" + if best_name is not None: + new_label = best_name + elif any(px1 <= (fx1+fx2)/2 <= px2 and py1 <= (fy1+fy2)/2 <= py2 + for fx1, fy1, fx2, fy2 in face_boxes): + new_label = "unknown" # face seen but didn't pass guardrails + else: + new_label = "person" # no face detected in this box at all + updated.append((new_label, conf, px1, py1, px2, py2)) + + return updated diff --git a/faces/Dhruv/Dhruv_0000.jpg b/faces/Dhruv/Dhruv_0000.jpg new file mode 100644 index 0000000..0159dcd Binary files /dev/null and b/faces/Dhruv/Dhruv_0000.jpg differ diff --git a/faces/Dhruv/Dhruv_0001.jpg b/faces/Dhruv/Dhruv_0001.jpg new file mode 100644 index 0000000..3d76e03 Binary files /dev/null and b/faces/Dhruv/Dhruv_0001.jpg differ diff --git a/faces/Dhruv/Dhruv_0002.jpg b/faces/Dhruv/Dhruv_0002.jpg new file mode 100644 index 0000000..e410f2f Binary files /dev/null and b/faces/Dhruv/Dhruv_0002.jpg differ diff --git a/faces/Dhruv/Dhruv_0003.jpg b/faces/Dhruv/Dhruv_0003.jpg new file mode 100644 index 0000000..71f09ef Binary files /dev/null and b/faces/Dhruv/Dhruv_0003.jpg differ diff --git a/faces/Dhruv/Dhruv_0004.jpg b/faces/Dhruv/Dhruv_0004.jpg new file mode 100644 index 0000000..d5486b0 Binary files /dev/null and b/faces/Dhruv/Dhruv_0004.jpg differ diff --git a/faces/Dhruv/Dhruv_0005.jpg b/faces/Dhruv/Dhruv_0005.jpg new file mode 100644 index 0000000..eafbffc Binary files /dev/null and b/faces/Dhruv/Dhruv_0005.jpg differ diff --git a/faces/Dhruv/Dhruv_0006.jpg b/faces/Dhruv/Dhruv_0006.jpg new file mode 100644 index 0000000..758982e Binary files /dev/null and b/faces/Dhruv/Dhruv_0006.jpg differ diff --git a/faces/Dhruv/Dhruv_0007.jpg b/faces/Dhruv/Dhruv_0007.jpg new file mode 100644 index 0000000..696ccaa Binary files /dev/null and b/faces/Dhruv/Dhruv_0007.jpg differ diff --git a/faces/Dhruv/Dhruv_0008.jpg b/faces/Dhruv/Dhruv_0008.jpg new file mode 100644 index 0000000..ab25d16 Binary files /dev/null and b/faces/Dhruv/Dhruv_0008.jpg differ diff --git a/faces/Dhruv/Dhruv_0009.jpg b/faces/Dhruv/Dhruv_0009.jpg new file mode 100644 index 0000000..651368d Binary files /dev/null and b/faces/Dhruv/Dhruv_0009.jpg differ diff --git a/faces/Dhruv/Dhruv_0010.jpg b/faces/Dhruv/Dhruv_0010.jpg new file mode 100644 index 0000000..99dc878 Binary files /dev/null and b/faces/Dhruv/Dhruv_0010.jpg differ diff --git a/faces/Dhruv/Dhruv_0011.jpg b/faces/Dhruv/Dhruv_0011.jpg new file mode 100644 index 0000000..0cf37ba Binary files /dev/null and b/faces/Dhruv/Dhruv_0011.jpg differ diff --git a/faces/Dhruv/Dhruv_0012.jpg b/faces/Dhruv/Dhruv_0012.jpg new file mode 100644 index 0000000..df8cda9 Binary files /dev/null and b/faces/Dhruv/Dhruv_0012.jpg differ diff --git a/faces/Dhruv/Dhruv_0013.jpg b/faces/Dhruv/Dhruv_0013.jpg new file mode 100644 index 0000000..e3ae0ee Binary files /dev/null and b/faces/Dhruv/Dhruv_0013.jpg differ diff --git a/faces/Dhruv/Dhruv_0014.jpg b/faces/Dhruv/Dhruv_0014.jpg new file mode 100644 index 0000000..00c2e0a Binary files /dev/null and b/faces/Dhruv/Dhruv_0014.jpg differ diff --git a/faces/Dhruv/Dhruv_0015.jpg b/faces/Dhruv/Dhruv_0015.jpg new file mode 100644 index 0000000..fdd8734 Binary files /dev/null and b/faces/Dhruv/Dhruv_0015.jpg differ diff --git a/faces/Dhruv/Dhruv_0016.jpg b/faces/Dhruv/Dhruv_0016.jpg new file mode 100644 index 0000000..22e22f3 Binary files /dev/null and b/faces/Dhruv/Dhruv_0016.jpg differ diff --git a/faces/Dhruv/Dhruv_0017.jpg b/faces/Dhruv/Dhruv_0017.jpg new file mode 100644 index 0000000..381a25c Binary files /dev/null and b/faces/Dhruv/Dhruv_0017.jpg differ diff --git a/faces/Dhruv/Dhruv_0018.jpg b/faces/Dhruv/Dhruv_0018.jpg new file mode 100644 index 0000000..7a1fe1a Binary files /dev/null and b/faces/Dhruv/Dhruv_0018.jpg differ diff --git a/faces/Dhruv/Dhruv_0019.jpg b/faces/Dhruv/Dhruv_0019.jpg new file mode 100644 index 0000000..0f45c9d Binary files /dev/null and b/faces/Dhruv/Dhruv_0019.jpg differ diff --git a/faces/Haren/Haren_0000.jpg b/faces/Haren/Haren_0000.jpg new file mode 100644 index 0000000..1b98d24 Binary files /dev/null and b/faces/Haren/Haren_0000.jpg differ diff --git a/faces/Haren/Haren_0001.jpg b/faces/Haren/Haren_0001.jpg new file mode 100644 index 0000000..d87412f Binary files /dev/null and b/faces/Haren/Haren_0001.jpg differ diff --git a/faces/Haren/Haren_0002.jpg b/faces/Haren/Haren_0002.jpg new file mode 100644 index 0000000..a7e63b2 Binary files /dev/null and b/faces/Haren/Haren_0002.jpg differ diff --git a/faces/Haren/Haren_0003.jpg b/faces/Haren/Haren_0003.jpg new file mode 100644 index 0000000..c268aa4 Binary files /dev/null and b/faces/Haren/Haren_0003.jpg differ diff --git a/faces/Haren/Haren_0004.jpg b/faces/Haren/Haren_0004.jpg new file mode 100644 index 0000000..7f01eb9 Binary files /dev/null and b/faces/Haren/Haren_0004.jpg differ diff --git a/faces/Haren/Haren_0005.jpg b/faces/Haren/Haren_0005.jpg new file mode 100644 index 0000000..438f6a2 Binary files /dev/null and b/faces/Haren/Haren_0005.jpg differ diff --git a/faces/Haren/Haren_0006.jpg b/faces/Haren/Haren_0006.jpg new file mode 100644 index 0000000..1a6b3a3 Binary files /dev/null and b/faces/Haren/Haren_0006.jpg differ diff --git a/faces/Haren/Haren_0007.jpg b/faces/Haren/Haren_0007.jpg new file mode 100644 index 0000000..d4ea368 Binary files /dev/null and b/faces/Haren/Haren_0007.jpg differ diff --git a/faces/Haren/Haren_0008.jpg b/faces/Haren/Haren_0008.jpg new file mode 100644 index 0000000..06b3473 Binary files /dev/null and b/faces/Haren/Haren_0008.jpg differ diff --git a/faces/Haren/Haren_0009.jpg b/faces/Haren/Haren_0009.jpg new file mode 100644 index 0000000..845dca1 Binary files /dev/null and b/faces/Haren/Haren_0009.jpg differ diff --git a/faces/Haren/Haren_0010.jpg b/faces/Haren/Haren_0010.jpg new file mode 100644 index 0000000..629c6cc Binary files /dev/null and b/faces/Haren/Haren_0010.jpg differ diff --git a/faces/Haren/Haren_0011.jpg b/faces/Haren/Haren_0011.jpg new file mode 100644 index 0000000..f8bb348 Binary files /dev/null and b/faces/Haren/Haren_0011.jpg differ diff --git a/faces/Haren/Haren_0012.jpg b/faces/Haren/Haren_0012.jpg new file mode 100644 index 0000000..adb3298 Binary files /dev/null and b/faces/Haren/Haren_0012.jpg differ diff --git a/faces/Haren/Haren_0013.jpg b/faces/Haren/Haren_0013.jpg new file mode 100644 index 0000000..bc41684 Binary files /dev/null and b/faces/Haren/Haren_0013.jpg differ diff --git a/faces/Haren/Haren_0014.jpg b/faces/Haren/Haren_0014.jpg new file mode 100644 index 0000000..ea9a3d7 Binary files /dev/null and b/faces/Haren/Haren_0014.jpg differ diff --git a/faces/Haren/Haren_0015.jpg b/faces/Haren/Haren_0015.jpg new file mode 100644 index 0000000..edbd348 Binary files /dev/null and b/faces/Haren/Haren_0015.jpg differ diff --git a/faces/Haren/Haren_0016.jpg b/faces/Haren/Haren_0016.jpg new file mode 100644 index 0000000..968cac8 Binary files /dev/null and b/faces/Haren/Haren_0016.jpg differ diff --git a/faces/Haren/Haren_0017.jpg b/faces/Haren/Haren_0017.jpg new file mode 100644 index 0000000..111545e Binary files /dev/null and b/faces/Haren/Haren_0017.jpg differ diff --git a/faces/Haren/Haren_0018.jpg b/faces/Haren/Haren_0018.jpg new file mode 100644 index 0000000..16ca7d5 Binary files /dev/null and b/faces/Haren/Haren_0018.jpg differ diff --git a/faces/Haren/Haren_0019.jpg b/faces/Haren/Haren_0019.jpg new file mode 100644 index 0000000..6fd5343 Binary files /dev/null and b/faces/Haren/Haren_0019.jpg differ diff --git a/faces/Krish/Krish_0000.jpg b/faces/Krish/Krish_0000.jpg new file mode 100644 index 0000000..a3f2298 Binary files /dev/null and b/faces/Krish/Krish_0000.jpg differ diff --git a/faces/Krish/Krish_0001.jpg b/faces/Krish/Krish_0001.jpg new file mode 100644 index 0000000..82c69bd Binary files /dev/null and b/faces/Krish/Krish_0001.jpg differ diff --git a/faces/Krish/Krish_0002.jpg b/faces/Krish/Krish_0002.jpg new file mode 100644 index 0000000..5f35c9f Binary files /dev/null and b/faces/Krish/Krish_0002.jpg differ diff --git a/faces/Krish/Krish_0003.jpg b/faces/Krish/Krish_0003.jpg new file mode 100644 index 0000000..695343a Binary files /dev/null and b/faces/Krish/Krish_0003.jpg differ diff --git a/faces/Krish/Krish_0004.jpg b/faces/Krish/Krish_0004.jpg new file mode 100644 index 0000000..2f103a5 Binary files /dev/null and b/faces/Krish/Krish_0004.jpg differ diff --git a/faces/Krish/Krish_0005.jpg b/faces/Krish/Krish_0005.jpg new file mode 100644 index 0000000..11abd2b Binary files /dev/null and b/faces/Krish/Krish_0005.jpg differ diff --git a/faces/Krish/Krish_0006.jpg b/faces/Krish/Krish_0006.jpg new file mode 100644 index 0000000..db4316a Binary files /dev/null and b/faces/Krish/Krish_0006.jpg differ diff --git a/faces/Krish/Krish_0007.jpg b/faces/Krish/Krish_0007.jpg new file mode 100644 index 0000000..29100cb Binary files /dev/null and b/faces/Krish/Krish_0007.jpg differ diff --git a/faces/Krish/Krish_0008.jpg b/faces/Krish/Krish_0008.jpg new file mode 100644 index 0000000..7e854f6 Binary files /dev/null and b/faces/Krish/Krish_0008.jpg differ diff --git a/faces/Krish/Krish_0009.jpg b/faces/Krish/Krish_0009.jpg new file mode 100644 index 0000000..3fa4361 Binary files /dev/null and b/faces/Krish/Krish_0009.jpg differ diff --git a/faces/Krish/Krish_0010.jpg b/faces/Krish/Krish_0010.jpg new file mode 100644 index 0000000..503ee20 Binary files /dev/null and b/faces/Krish/Krish_0010.jpg differ diff --git a/faces/Krish/Krish_0011.jpg b/faces/Krish/Krish_0011.jpg new file mode 100644 index 0000000..27427c5 Binary files /dev/null and b/faces/Krish/Krish_0011.jpg differ diff --git a/faces/Krish/Krish_0012.jpg b/faces/Krish/Krish_0012.jpg new file mode 100644 index 0000000..df50d45 Binary files /dev/null and b/faces/Krish/Krish_0012.jpg differ diff --git a/faces/Krish/Krish_0013.jpg b/faces/Krish/Krish_0013.jpg new file mode 100644 index 0000000..5e5286e Binary files /dev/null and b/faces/Krish/Krish_0013.jpg differ diff --git a/faces/Krish/Krish_0014.jpg b/faces/Krish/Krish_0014.jpg new file mode 100644 index 0000000..a77fe85 Binary files /dev/null and b/faces/Krish/Krish_0014.jpg differ diff --git a/faces/Krish/Krish_0015.jpg b/faces/Krish/Krish_0015.jpg new file mode 100644 index 0000000..108b0f9 Binary files /dev/null and b/faces/Krish/Krish_0015.jpg differ diff --git a/faces/Krish/Krish_0016.jpg b/faces/Krish/Krish_0016.jpg new file mode 100644 index 0000000..0238df9 Binary files /dev/null and b/faces/Krish/Krish_0016.jpg differ diff --git a/faces/Krish/Krish_0017.jpg b/faces/Krish/Krish_0017.jpg new file mode 100644 index 0000000..bd1a754 Binary files /dev/null and b/faces/Krish/Krish_0017.jpg differ diff --git a/faces/Krish/Krish_0018.jpg b/faces/Krish/Krish_0018.jpg new file mode 100644 index 0000000..85f0399 Binary files /dev/null and b/faces/Krish/Krish_0018.jpg differ diff --git a/faces/Krish/Krish_0019.jpg b/faces/Krish/Krish_0019.jpg new file mode 100644 index 0000000..9f672d8 Binary files /dev/null and b/faces/Krish/Krish_0019.jpg differ diff --git a/requirements.txt b/requirements.txt index c3625a1..9449e9d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,9 @@ +torch +torchvision + +facenet-pytorch>=2.5.3 transformers>=4.40.0 -accelerate>=0.27.0 opencv-python>=4.9.0 Pillow>=10.2.0 -Moondream2 numpy>=1.26.0 ultralytics>=8.1.0 ---extra-index-url https://download.pytorch.org/whl/cu130 -torch -torchvision -torchaudio diff --git a/run_input.py b/run_input.py index b5ac551..23fa9c2 100644 --- a/run_input.py +++ b/run_input.py @@ -2,39 +2,54 @@ import torch import threading import time -from collections import defaultdict, deque +from collections import defaultdict from queue import Queue, Empty -from PIL import Image -from transformers import AutoProcessor, AutoModelForImageTextToText, AutoModelForCausalLM, AutoTokenizer from ultralytics import YOLO -from PIL import Image from is_important import is_frame_important -import tempfile -import os -import moondream as md +from face_recog import FaceRecognizer -# ── Config ────────────────────────────────────────────────────────────────── -YOLO_PATH = "C:/Users/dayse/VSCODE files/Projects/Yolov11_PL/Models/YOLOv11obj_model.pt" -SEND_EVERY_S = 8.0 -DETECT_EVERY_S = 0.08 -CONF_THRESH = 0.35 -device = "cuda" if torch.cuda.is_available() else "cpu" +# ── Config ──────────────────────────────────────────────────────────────────── +YOLO_PATH = "yolo11n.pt" # auto-downloaded on first run (~5MB) +REPORT_EVERY_S = 8.0 +DETECT_EVERY_S = 0.08 +CONF_THRESH = 0.35 +FACES_DIR = "faces" # faces/{Name}/*.jpg — set to None to skip +FACE_SIM_THRESH = 0.68 +device = "cuda" if torch.cuda.is_available() else "cpu" -# ── Queues ─────────────────────────────────────────────────────────────────── -raw_q = Queue(maxsize=1) -vlm_q = Queue(maxsize=1) -result_q = Queue() +# ── Queues ──────────────────────────────────────────────────────────────────── +raw_q = Queue(maxsize=1) +result_q = Queue() -stop_event = threading.Event() +stop_event = threading.Event() -# ── Models ─────────────────────────────────────────────────────────────────── +# ── Models ──────────────────────────────────────────────────────────────────── print("Loading YOLO...") yolo = YOLO(YOLO_PATH).to(device) -# ── Helper ─────────────────────────────────────────────────────────────────── -def cv2_to_pil(frame_bgr): - return Image.fromarray(cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)) +print("Loading face recognizer...") +face_recognizer = ( + FaceRecognizer(faces_dir=FACES_DIR, device=device, sim_threshold=FACE_SIM_THRESH) + if FACES_DIR else None +) +# Names we can actually recognise — used to colour boxes differently +known_names = set(face_recognizer.known_names) if face_recognizer else set() + +# ── Shared display state (written by detection, read by main thread) ────────── +_display_lock = threading.Lock() +_display_state = {"frame": None, "dets": []} + +def _set_display(frame, dets): + with _display_lock: + _display_state["frame"] = frame + _display_state["dets"] = list(dets) + +def _get_display(): + with _display_lock: + return _display_state["frame"], list(_display_state["dets"]) + +# ── Helper ──────────────────────────────────────────────────────────────────── def run_yolo(frame): results = yolo.predict(frame, conf=CONF_THRESH, verbose=False) r = results[0] @@ -49,6 +64,37 @@ def run_yolo(frame): dets.append((label, conf, x1, y1, x2, y2)) return dets +def format_labels(dets): + counts = defaultdict(int) + for label, *_ in dets: + counts[label] += 1 + parts = [] + for label, n in sorted(counts.items()): + parts.append(label if n == 1 else f"{label} (x{n})") + return ", ".join(parts) if parts else "nothing" + +def box_color(label): + if label in known_names: + return (0, 255, 0) # green — recognised person + if label == "unknown": + return (0, 60, 255) # red — face seen but not recognised + if label == "person": + return (0, 200, 255) # yellow — person, no face detected + return (255, 180, 0) # blue — object + +def draw_dets(frame, dets): + out = frame.copy() + for label, conf, x1, y1, x2, y2 in dets: + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) + color = box_color(label) + cv2.rectangle(out, (x1, y1), (x2, y2), color, 2) + text = f"{label} {conf:.2f}" + (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.55, 1) + cv2.rectangle(out, (x1, y1 - th - 6), (x1 + tw + 4, y1), color, -1) + cv2.putText(out, text, (x1 + 2, y1 - 4), + cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 0), 1) + return out + # ── Thread 1: Capture ───────────────────────────────────────────────────────── def capture_thread(): cap = cv2.VideoCapture(0) @@ -59,16 +105,20 @@ def capture_thread(): ret, frame = cap.read() if not ret: continue - + # Always update display with the latest raw frame so preview is smooth + frame_copy = frame.copy() + with _display_lock: + if _display_state["frame"] is None: + _display_state["frame"] = frame_copy if not raw_q.full(): - raw_q.put(frame) + raw_q.put(frame_copy) cap.release() -# ── Thread 2: YOLO + Importance ─────────────────────────────────────────────── +# ── Thread 2: YOLO + Face + Importance ─────────────────────────────────────── def detection_thread(): - prev_dets = [] - last_sent_time = 0.0 - vlm_count = 0 + prev_dets = [] + last_sent_time = 0.0 + report_count = 0 importance_state = { "last_fire": 0.0, "new_counts": defaultdict(int), @@ -88,90 +138,79 @@ def detection_thread(): next_det = now + DETECT_EVERY_S curr_dets = run_yolo(frame) + if face_recognizer is not None: + curr_dets = face_recognizer.recognize_in_frame(frame, curr_dets) + + # Push latest annotated frame to display + _set_display(frame, curr_dets) important, info = is_frame_important( prev_dets, curr_dets, - frame_shape = frame.shape, - state = importance_state, - conf_keep = 0.55, - min_area_px = 800, - iou_match_thresh = 0.25, - center_match_px = 40.0, - new_persist = 2, - move_persist = 2, - cooldown_s = 3.0, + frame_shape = frame.shape, + state = importance_state, + conf_keep = 0.55, + min_area_px = 800, + iou_match_thresh = 0.25, + center_match_px = 40.0, + new_persist = 2, + move_persist = 2, + cooldown_s = 3.0, ) - time_up = (now - last_sent_time) >= SEND_EVERY_S + time_up = (now - last_sent_time) >= REPORT_EVERY_S - if (important or time_up) and not vlm_q.full(): - vlm_count += 1 - vlm_q.put({ - "frame": frame.copy(), - "reason": "important" if important else "timer", - "info": info, - "vlm_id": vlm_count, - "ts": now, + if important or time_up: + report_count += 1 + result_q.put({ + "id": report_count, + "reason": "important" if important else "timer", + "info": info, + "dets": curr_dets, + "ts": now, }) last_sent_time = now prev_dets = curr_dets -# ── Thread 3: VLM Inference ─────────────────────────────────────────────────── -def vlm_thread(): - print("Loading Moondream...") - moon_model = AutoModelForCausalLM.from_pretrained( - "vikhyatk/moondream2", - trust_remote_code=True, - torch_dtype=torch.float16, - ).to("cuda").eval() - tokenizer = AutoTokenizer.from_pretrained("vikhyatk/moondream2", trust_remote_code=True) - print("Moondream ready.") - - while not stop_event.is_set(): - try: - job = vlm_q.get(timeout=0.5) - except Empty: - continue - - image = cv2_to_pil(job["frame"]) - enc_image = moon_model.encode_image(image) - text = moon_model.answer_question(enc_image, "Briefly describe what you see.", tokenizer) - - result_q.put({ - "text": text, - "reason": job["reason"], - "info": job["info"], - "vlm_id": job["vlm_id"], - }) - -# ── Thread 4: Print Results ─────────────────────────────────────────────────── +# ── Thread 3: Print Results ─────────────────────────────────────────────────── def printer_thread(): while not stop_event.is_set(): try: r = result_q.get(timeout=0.5) except Empty: continue - print(f"\n[VLM #{r['vlm_id']}] ({r['reason']}) {r['info']}") - print(f" → {r['text']}\n") + ts = time.strftime("%H:%M:%S", time.localtime(r["ts"])) + labels = format_labels(r["dets"]) + print(f"[#{r['id']:04d} {ts}] ({r['reason']}) {labels}") -# ── Main ────────────────────────────────────────────────────────────────────── +# ── Main — display loop runs here (OpenCV needs main thread on Windows) ─────── if __name__ == "__main__": threads = [ threading.Thread(target=capture_thread, daemon=True, name="Capture"), threading.Thread(target=detection_thread, daemon=True, name="Detection"), - threading.Thread(target=vlm_thread, daemon=True, name="VLM"), - threading.Thread(target=printer_thread, daemon=True, name="Printer"), + threading.Thread(target=printer_thread, daemon=True, name="Printer"), ] for t in threads: t.start() - print("Pipeline running. Press Ctrl+C to stop.") + print("Pipeline running. Press Q in the preview window or Ctrl+C to stop.") + try: - while True: - time.sleep(1) + while not stop_event.is_set(): + frame, dets = _get_display() + if frame is not None: + annotated = draw_dets(frame, dets) + cv2.imshow("EDEN — Input Layer", annotated) + + key = cv2.waitKey(16) & 0xFF # ~60 fps + if key == ord("q"): + break + except KeyboardInterrupt: - print("\nStopping...") - stop_event.set() + pass + + print("\nStopping...") + stop_event.set() + cv2.destroyAllWindows() for t in threads: t.join(timeout=3) print("Done.") diff --git a/yolo11n.pt b/yolo11n.pt new file mode 100644 index 0000000..45b273b Binary files /dev/null and b/yolo11n.pt differ