Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: tests

on:
push:
branches: ["main", "feat/**", "fix/**", "chore/**"]
pull_request:
branches: ["main"]

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v4

- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: install test dependencies
run: pip install -r requirements-dev.txt

- name: run tests
run: python -m pytest tests/ -v --tb=short
Binary file added assets/caption/demo_output.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/caption/pipeline_architecture.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/caption/rci_heatmap.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/caption/reading_speed_calibration.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pytest>=7.4.0
112 changes: 112 additions & 0 deletions scripts/demo_localize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""Demo: localize a set of CC events into any of the 9 supported languages.

Usage
-----
python scripts/demo_localize.py
python scripts/demo_localize.py --lang te
python scripts/demo_localize.py --lang ta --level standard
python scripts/demo_localize.py --list-languages

Supported language codes: en hi te ta kn bn mr gu pa
Literacy levels: low standard expert
"""

import argparse
import io
import sys
import os

# Ensure Unicode output works on all platforms (Windows cp1252 would fail otherwise)
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")

sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

from src.caption.localizer import (
CaptionSignal,
LiteracyLevel,
SUPPORTED_LANGUAGES,
batch_localize_with_report,
)

# A realistic set of CC events from an Indian educational video.
SAMPLE_EVENTS = [
CaptionSignal("[temple bells]", start_s=0.5, end_s=2.1, combined_score=0.91),
CaptionSignal("[crowd noise]", start_s=3.0, end_s=5.5, combined_score=0.78),
CaptionSignal("[tabla]", start_s=6.2, end_s=9.0, combined_score=0.95),
CaptionSignal("[baby crying]", start_s=10.1, end_s=11.8, combined_score=0.88),
CaptionSignal("[firecrackers]", start_s=12.5, end_s=14.0, combined_score=0.83),
CaptionSignal("[rain]", start_s=15.3, end_s=18.7, combined_score=0.72),
CaptionSignal("[phone ringing]", start_s=20.0, end_s=21.2, combined_score=0.69),
CaptionSignal("[dhol]", start_s=22.5, end_s=25.0, combined_score=0.93),
CaptionSignal("[applause]", start_s=26.0, end_s=28.5, combined_score=0.85),
CaptionSignal("[thunder]", start_s=30.0, end_s=31.0, combined_score=0.77),
]

_LEVEL_MAP = {
"low": LiteracyLevel.LOW_LITERACY,
"standard": LiteracyLevel.STANDARD,
"expert": LiteracyLevel.EXPERT,
}


def _fmt_time(seconds: float) -> str:
h = int(seconds // 3600)
m = int((seconds % 3600) // 60)
s = seconds % 60
return f"{h:02d}:{m:02d}:{s:06.3f}"


def run(lang: str, level: LiteracyLevel) -> None:
captions, report = batch_localize_with_report(SAMPLE_EVENTS, lang, level)

lang_display = SUPPORTED_LANGUAGES.get(lang, "English")
level_display = level.value.upper()
print(f"\n=== Localized Captions | {lang_display} | {level_display} literacy ===\n")

for cap in captions:
dur = cap.end_s - cap.start_s
print(
f" [{_fmt_time(cap.start_s)} --> {_fmt_time(cap.end_s)}] "
f"{cap.label_local:<32} "
f"aksharas={cap.akshara_count:2d} "
f"dur={dur:.2f}s "
f"RCI={cap.rci:5.1f} "
f"score={cap.combined_score:.2f}"
)

print(f"\n=== Accessibility Report ===")
print(f" Total captions : {report.total_captions}")
print(f" Duration extended : {report.extended_count} / {report.total_captions}")
print(f" Translation coverage : {report.coverage_pct:.1f}%")
print(f" Mean RCI : {report.mean_rci:.1f} / 100")
print(f" Min RCI : {report.min_rci:.1f} / 100")
print(f" Below RCI threshold : {report.under_threshold_count} caption(s)")
if report.under_threshold_count > 0:
print(f" [!] {report.under_threshold_count} caption(s) may be too fast for low-literacy viewers.")
else:
print(f" [✓] All captions are accessible for low-literacy viewers.")
print()


def main() -> None:
parser = argparse.ArgumentParser(description="Demo: CC localization for Indian languages")
parser.add_argument("--lang", default="te", choices=list(SUPPORTED_LANGUAGES.keys()),
help="Target language BCP-47 code (default: te)")
parser.add_argument("--level", default="low", choices=list(_LEVEL_MAP.keys()),
help="Viewer literacy level (default: low)")
parser.add_argument("--list-languages", action="store_true",
help="Print all supported languages and exit")
args = parser.parse_args()

if args.list_languages:
print("\nSupported languages:")
for code, name in SUPPORTED_LANGUAGES.items():
print(f" {code:4s} {name}")
print()
return

run(args.lang, _LEVEL_MAP[args.level])


if __name__ == "__main__":
main()
169 changes: 169 additions & 0 deletions scripts/generate_demo_screenshot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
"""Generate a demo comparison image for the PR.

Shows two panels:
1. RCI scores across all 9 languages for LOW vs EXPERT literacy — the core
accessibility gap this module measures.
2. Duration extension: how many seconds each language/label needs beyond the
raw event duration for a low-literacy viewer.

Run: python scripts/generate_demo_screenshot.py
Output: assets/caption/demo_output.png
"""

import os, sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from src.caption.localizer import (
CaptionSignal, LiteracyLevel, SUPPORTED_LANGUAGES,
batch_localize_with_report,
)

OUT_DIR = os.path.join(os.path.dirname(__file__), "..", "assets", "caption")
os.makedirs(OUT_DIR, exist_ok=True)

BG = "#0D1117"
PANEL = "#161B22"
BD = "#30363D"
FG = "#E6EDF3"
GREEN = "#3FB950"
YELLOW = "#D29922"
RED = "#F85149"
CYAN = "#79C0FF"
GREY = "#8B949E"
AMBER = "#D4A72C"

LOW = LiteracyLevel.LOW_LITERACY
EXP = LiteracyLevel.EXPERT

LABELS = [
"[temple bells]", "[baby crying]", "[phone ringing]",
"[firecrackers]", "[crowd noise]", "[tabla]",
"[thunder]", "[applause]", "[dhol]",
]

LANG_CODES = ["en", "hi", "te", "ta", "kn", "bn", "mr", "gu", "pa"]
LANG_NAMES = ["English", "Hindi", "Telugu", "Tamil", "Kannada",
"Bengali", "Marathi", "Gujarati", "Punjabi"]

# Use 0.8s events so the RCI shows real gaps when calibrated for expert readers
SIGNALS_SHORT = [CaptionSignal(l, 0.0, 0.8) for l in LABELS]
# Use 2.5s events for duration extension panel
SIGNALS_MED = [CaptionSignal(l, 0.0, 2.5) for l in LABELS]


def get_rci_matrix(signals, level):
mat = []
for lang in LANG_CODES:
caps, _ = batch_localize_with_report(signals, lang, level)
mat.append([c.rci for c in caps])
return np.array(mat)


def get_ext_matrix(signals, level):
"""How many extra seconds each caption needs beyond the original event."""
mat = []
for lang in LANG_CODES:
caps, _ = batch_localize_with_report(signals, lang, level)
mat.append([max(0.0, c.end_s - c.start_s - (signals[i].end_s - signals[i].start_s))
for i, c in enumerate(caps)])
return np.array(mat)


rci_low = get_rci_matrix(SIGNALS_SHORT, LOW)
rci_exp = get_rci_matrix(SIGNALS_SHORT, EXP)
ext_low = get_ext_matrix(SIGNALS_MED, LOW)

fig = plt.figure(figsize=(18, 12), facecolor=BG)
gs = gridspec.GridSpec(2, 2, figure=fig, hspace=0.42, wspace=0.32,
left=0.07, right=0.97, top=0.92, bottom=0.06)

label_short = [l.strip("[]") for l in LABELS]
cmap_rg = matplotlib.colormaps["RdYlGn"]
cmap_bl = matplotlib.colormaps["Blues"]

# ── panel 1: RCI at LOW_LITERACY ─────────────────────────────────────────────
ax1 = fig.add_subplot(gs[0, 0])
ax1.set_facecolor(PANEL)
im1 = ax1.imshow(rci_low, cmap=cmap_rg, vmin=0, vmax=100, aspect="auto")
ax1.set_xticks(range(len(LABELS)))
ax1.set_xticklabels(label_short, rotation=30, ha="right", fontsize=8, color=FG)
ax1.set_yticks(range(len(LANG_NAMES)))
ax1.set_yticklabels(LANG_NAMES, fontsize=9, color=FG)
ax1.tick_params(colors=FG)
for spine in ax1.spines.values(): spine.set_edgecolor(BD)
for i in range(len(LANG_CODES)):
for j in range(len(LABELS)):
v = rci_low[i, j]
ax1.text(j, i, f"{v:.0f}", ha="center", va="center",
fontsize=7.5, color="black" if v > 55 else "white", fontweight="bold")
cb1 = fig.colorbar(im1, ax=ax1, fraction=0.03, pad=0.02)
cb1.ax.tick_params(labelcolor=FG, labelsize=7)
cb1.set_label("RCI", color=FG, fontsize=8)
ax1.set_title("RCI — Low-Literacy viewers (0.8 s event)\nGreen = accessible · Red = too fast",
color=FG, fontsize=9, pad=7)

# ── panel 2: RCI at EXPERT ───────────────────────────────────────────────────
ax2 = fig.add_subplot(gs[0, 1])
ax2.set_facecolor(PANEL)
im2 = ax2.imshow(rci_exp, cmap=cmap_rg, vmin=0, vmax=100, aspect="auto")
ax2.set_xticks(range(len(LABELS)))
ax2.set_xticklabels(label_short, rotation=30, ha="right", fontsize=8, color=FG)
ax2.set_yticks(range(len(LANG_NAMES)))
ax2.set_yticklabels(LANG_NAMES, fontsize=9, color=FG)
ax2.tick_params(colors=FG)
for spine in ax2.spines.values(): spine.set_edgecolor(BD)
for i in range(len(LANG_CODES)):
for j in range(len(LABELS)):
v = rci_exp[i, j]
ax2.text(j, i, f"{v:.0f}", ha="center", va="center",
fontsize=7.5, color="black" if v > 55 else "white", fontweight="bold")
cb2 = fig.colorbar(im2, ax=ax2, fraction=0.03, pad=0.02)
cb2.ax.tick_params(labelcolor=FG, labelsize=7)
cb2.set_label("RCI", color=FG, fontsize=8)
ax2.set_title("RCI — Expert-calibrated captions viewed by Low-Literacy viewers\nRed cells = content inaccessible to learners",
color=FG, fontsize=9, pad=7)

# ── panel 3: Duration extension bar chart ───────────────────────────────────
ax3 = fig.add_subplot(gs[1, :])
ax3.set_facecolor(PANEL)
for spine in ax3.spines.values(): spine.set_edgecolor(BD)
ax3.tick_params(colors=FG)

x = np.arange(len(LANG_CODES))
n_lbls = len(LABELS)
bar_w = 0.08
colors = plt.cm.tab10(np.linspace(0, 1, n_lbls))

for j, (lbl, col) in enumerate(zip(label_short, colors)):
offset = (j - n_lbls / 2) * bar_w + bar_w / 2
ax3.bar(x + offset, ext_low[:, j], bar_w, label=lbl,
color=col, alpha=0.85, zorder=3)

ax3.set_xticks(x)
ax3.set_xticklabels(LANG_NAMES, fontsize=9, color=FG)
ax3.set_ylabel("Extra seconds needed\nbeyond original event duration", color=FG, fontsize=9)
ax3.yaxis.grid(True, linestyle="--", alpha=0.35, color=GREY, zorder=0)
ax3.set_axisbelow(True)
ax3.tick_params(colors=FG)
ax3.legend(loc="upper right", fontsize=7.5, ncol=3,
facecolor=PANEL, edgecolor=BD, labelcolor=FG)
ax3.set_title("Duration extension per label per language — Low-Literacy tier (2.5 s source event)\n"
"Tamil / Telugu / Kannada require the most extension because of lower syllabic reading speed",
color=FG, fontsize=9, pad=7)

fig.suptitle(
"Caption Localizer — Accessibility Analysis across 9 Indian Languages\n"
"src/caption/localizer.py · 136 tests · scripts/demo_localize.py",
color=FG, fontsize=12, fontweight="bold", y=0.97,
)

path = os.path.join(OUT_DIR, "demo_output.png")
fig.savefig(path, dpi=160, bbox_inches="tight", facecolor=BG)
plt.close(fig)
print(f" saved: {path}")
Loading