From 568eba641959d3075c73efc71b9418492bbd0311 Mon Sep 17 00:00:00 2001 From: Jakob Date: Thu, 7 May 2026 15:57:42 +0200 Subject: [PATCH 1/5] Changed acceptance, to be more forgiving --- zeeguu/api/test/test_verbal_flashcards.py | 37 +++++++++++--------- zeeguu/core/verbal_flashcards/fuzzy_match.py | 18 +++++++--- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/zeeguu/api/test/test_verbal_flashcards.py b/zeeguu/api/test/test_verbal_flashcards.py index 17623c10..6ba2f189 100644 --- a/zeeguu/api/test/test_verbal_flashcards.py +++ b/zeeguu/api/test/test_verbal_flashcards.py @@ -304,16 +304,17 @@ def test_score_word_match_accepts_common_danish_asr_variants(): @pytest.mark.parametrize( - "user_word, expected_word", + "user_word, expected_word, expected_allowed_distance", [ - ("hat", "kat"), - ("hond", "hund"), - ("pange", "penge"), + ("hat", "kat", 1), + ("hond", "hund", 2), + ("pange", "penge", 2), ], ) -def test_score_word_match_accepts_one_optimal_string_alignment_edit( +def test_score_word_match_accepts_words_within_length_based_edit_budget( user_word, expected_word, + expected_allowed_distance, ): from zeeguu.core.verbal_flashcards.fuzzy_match import score_word_match @@ -322,21 +323,22 @@ def test_score_word_match_accepts_one_optimal_string_alignment_edit( assert result["isMatch"] is True assert result["matchType"] == "fuzzy" assert result["optimalStringAlignmentDistance"] == 1 - assert result["allowedOptimalStringAlignmentDistance"] == 1 + assert result["allowedOptimalStringAlignmentDistance"] == expected_allowed_distance assert result["jaroWinkler"] > 0 @pytest.mark.parametrize( - "user_word, expected_word", + "user_word, expected_word, expected_allowed_distance", [ - ("hot", "kat"), - ("hd", "hund"), - ("pen", "penge"), + ("hot", "kat", 1), + ("zzzz", "hund", 2), + ("xxxxx", "penge", 2), ], ) -def test_score_word_match_rejects_multiple_optimal_string_alignment_edits( +def test_score_word_match_rejects_words_outside_length_based_edit_budget( user_word, expected_word, + expected_allowed_distance, ): from zeeguu.core.verbal_flashcards.fuzzy_match import score_word_match @@ -344,18 +346,21 @@ def test_score_word_match_rejects_multiple_optimal_string_alignment_edits( assert result["isMatch"] is False assert result["matchType"] == "close" - assert result["optimalStringAlignmentDistance"] > 1 - assert result["allowedOptimalStringAlignmentDistance"] == 1 + assert ( + result["optimalStringAlignmentDistance"] + > result["allowedOptimalStringAlignmentDistance"] + ) + assert result["allowedOptimalStringAlignmentDistance"] == expected_allowed_distance -def test_score_word_match_requires_exact_match_for_two_letter_words(): +def test_score_word_match_allows_one_edit_for_two_letter_words(): from zeeguu.core.verbal_flashcards.fuzzy_match import score_word_match result = score_word_match("og", "ok", language_code="da") - assert result["isMatch"] is False + assert result["isMatch"] is True assert result["optimalStringAlignmentDistance"] == 1 - assert result["allowedOptimalStringAlignmentDistance"] == 0 + assert result["allowedOptimalStringAlignmentDistance"] == 1 def test_calculate_accuracy_ignores_word_order_and_matches_fuzzily(): diff --git a/zeeguu/core/verbal_flashcards/fuzzy_match.py b/zeeguu/core/verbal_flashcards/fuzzy_match.py index 75949cc3..b21704fc 100644 --- a/zeeguu/core/verbal_flashcards/fuzzy_match.py +++ b/zeeguu/core/verbal_flashcards/fuzzy_match.py @@ -152,14 +152,22 @@ def allowed_optimal_string_alignment_distance(expected_word, language_code=None) Return the maximum edit distance accepted for a spoken flashcard answer. Acceptance is based on edit distance, not a blended similarity score: - after language-specific normalization, words of length >= 3 may differ by - one optimal string alignment edit. Jaro-Winkler is still returned as a - diagnostic signal for debugging and future analysis, but it does not decide - correctness. + after language-specific normalization, longer words get a larger edit + budget because ASR approximations often drift more on longer Danish words. + Jaro-Winkler is still returned as a diagnostic signal for debugging and + future analysis, but it does not decide correctness. """ normalizer = normalizer_for(language_code) normalized_length = len(normalizer.canonical_form(expected_word)) - return 0 if normalized_length <= 2 else 1 + if normalized_length <= 1: + return 0 + if normalized_length <= 3: + return 1 + if normalized_length <= 5: + return 2 + if normalized_length <= 7: + return 3 + return 4 def fuzzy_match_threshold(expected_word, language_code=None): From 0f2f975b99531d4e260180fcb4773e7d8c9403df Mon Sep 17 00:00:00 2001 From: Jakob Date: Thu, 7 May 2026 16:07:09 +0200 Subject: [PATCH 2/5] Too much permission, tweaked a bit --- zeeguu/core/verbal_flashcards/fuzzy_match.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zeeguu/core/verbal_flashcards/fuzzy_match.py b/zeeguu/core/verbal_flashcards/fuzzy_match.py index b21704fc..1af332f1 100644 --- a/zeeguu/core/verbal_flashcards/fuzzy_match.py +++ b/zeeguu/core/verbal_flashcards/fuzzy_match.py @@ -161,11 +161,11 @@ def allowed_optimal_string_alignment_distance(expected_word, language_code=None) normalized_length = len(normalizer.canonical_form(expected_word)) if normalized_length <= 1: return 0 - if normalized_length <= 3: + if normalized_length <= 4: return 1 - if normalized_length <= 5: + if normalized_length <= 6: return 2 - if normalized_length <= 7: + if normalized_length <= 8: return 3 return 4 From f10855da275cb8d2059b6ee0ad96356f9bd4c4fb Mon Sep 17 00:00:00 2001 From: Jakob Date: Thu, 7 May 2026 16:10:50 +0200 Subject: [PATCH 3/5] small change to permission --- zeeguu/core/verbal_flashcards/fuzzy_match.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zeeguu/core/verbal_flashcards/fuzzy_match.py b/zeeguu/core/verbal_flashcards/fuzzy_match.py index 1af332f1..1d87b6a3 100644 --- a/zeeguu/core/verbal_flashcards/fuzzy_match.py +++ b/zeeguu/core/verbal_flashcards/fuzzy_match.py @@ -165,7 +165,7 @@ def allowed_optimal_string_alignment_distance(expected_word, language_code=None) return 1 if normalized_length <= 6: return 2 - if normalized_length <= 8: + if normalized_length <= 9: return 3 return 4 From d5367d9437f4e3f06a1a4827570435f7a1b896fa Mon Sep 17 00:00:00 2001 From: Jakob Date: Thu, 7 May 2026 16:28:27 +0200 Subject: [PATCH 4/5] Fixed tests --- zeeguu/api/test/test_verbal_flashcards.py | 6 +++--- zeeguu/core/verbal_flashcards/fuzzy_match.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/zeeguu/api/test/test_verbal_flashcards.py b/zeeguu/api/test/test_verbal_flashcards.py index 6ba2f189..bf05fc30 100644 --- a/zeeguu/api/test/test_verbal_flashcards.py +++ b/zeeguu/api/test/test_verbal_flashcards.py @@ -331,7 +331,7 @@ def test_score_word_match_accepts_words_within_length_based_edit_budget( "user_word, expected_word, expected_allowed_distance", [ ("hot", "kat", 1), - ("zzzz", "hund", 2), + ("zzzz", "hund", 1), ("xxxxx", "penge", 2), ], ) @@ -358,9 +358,9 @@ def test_score_word_match_allows_one_edit_for_two_letter_words(): result = score_word_match("og", "ok", language_code="da") - assert result["isMatch"] is True + assert result["isMatch"] is False assert result["optimalStringAlignmentDistance"] == 1 - assert result["allowedOptimalStringAlignmentDistance"] == 1 + assert result["allowedOptimalStringAlignmentDistance"] == 0 def test_calculate_accuracy_ignores_word_order_and_matches_fuzzily(): diff --git a/zeeguu/core/verbal_flashcards/fuzzy_match.py b/zeeguu/core/verbal_flashcards/fuzzy_match.py index 1d87b6a3..d4cc1630 100644 --- a/zeeguu/core/verbal_flashcards/fuzzy_match.py +++ b/zeeguu/core/verbal_flashcards/fuzzy_match.py @@ -159,7 +159,7 @@ def allowed_optimal_string_alignment_distance(expected_word, language_code=None) """ normalizer = normalizer_for(language_code) normalized_length = len(normalizer.canonical_form(expected_word)) - if normalized_length <= 1: + if normalized_length <= 2: return 0 if normalized_length <= 4: return 1 From b332caec49cb63a58634b194923fcc94408b303f Mon Sep 17 00:00:00 2001 From: Jakob Date: Thu, 7 May 2026 16:36:12 +0200 Subject: [PATCH 5/5] Fix test --- zeeguu/api/test/test_verbal_flashcards.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zeeguu/api/test/test_verbal_flashcards.py b/zeeguu/api/test/test_verbal_flashcards.py index bf05fc30..022577f6 100644 --- a/zeeguu/api/test/test_verbal_flashcards.py +++ b/zeeguu/api/test/test_verbal_flashcards.py @@ -307,7 +307,7 @@ def test_score_word_match_accepts_common_danish_asr_variants(): "user_word, expected_word, expected_allowed_distance", [ ("hat", "kat", 1), - ("hond", "hund", 2), + ("hond", "hund", 1), ("pange", "penge", 2), ], )