From 26bdf62dd1fcf5f5dd95f0c5090e235c99fcf2b3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 21 Feb 2026 03:54:24 +0000 Subject: [PATCH 1/8] Initial plan From b29b410ed8b5532a99b290d170df539d6ed831be Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 21 Feb 2026 03:57:24 +0000 Subject: [PATCH 2/8] feat: add python hash helpers Co-authored-by: timurbazhirov <721112+timurbazhirov@users.noreply.github.com> --- src/py/mat3ra/utils/__init__.py | 1 + src/py/mat3ra/utils/hash.py | 36 +++++++++++++++++++++++++++++++++ tests/py/unit/test_hash.py | 28 +++++++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 src/py/mat3ra/utils/hash.py create mode 100644 tests/py/unit/test_hash.py diff --git a/src/py/mat3ra/utils/__init__.py b/src/py/mat3ra/utils/__init__.py index e6a4962..3de0935 100644 --- a/src/py/mat3ra/utils/__init__.py +++ b/src/py/mat3ra/utils/__init__.py @@ -1,2 +1,3 @@ +from .hash import calculate_hash_from_object, calculate_hash_from_string from .search import find_by_key_or_regex from .string import camel_to_snake, snake_to_camel diff --git a/src/py/mat3ra/utils/hash.py b/src/py/mat3ra/utils/hash.py new file mode 100644 index 0000000..a850516 --- /dev/null +++ b/src/py/mat3ra/utils/hash.py @@ -0,0 +1,36 @@ +import hashlib +import json +from typing import Any + + +def _sort_keys_deep(obj: Any) -> Any: + if isinstance(obj, dict): + return {key: _sort_keys_deep(obj[key]) for key in sorted(obj)} + if isinstance(obj, list): + return [_sort_keys_deep(item) for item in obj] + return obj + + +def _get_hasher(hash_function: str): + try: + return getattr(hashlib, (hash_function or "md5").lower())() + except (AttributeError, TypeError, ValueError): + return hashlib.md5() + + +def calculate_hash_from_string(message: str, hash_function: str = "MD5") -> str: + """ + Calculates hash of a given text. + """ + hasher = _get_hasher(hash_function) + hasher.update(message.encode()) + return hasher.hexdigest() + + +def calculate_hash_from_object(obj: Any, hash_function: str = "MD5") -> str: + """ + Calculates hash of a given object. It must be serializable. + """ + sorted_obj = _sort_keys_deep(obj) + message = json.dumps(sorted_obj, separators=(",", ":"), ensure_ascii=False) + return calculate_hash_from_string(message, hash_function) diff --git a/tests/py/unit/test_hash.py b/tests/py/unit/test_hash.py new file mode 100644 index 0000000..a5e6b1d --- /dev/null +++ b/tests/py/unit/test_hash.py @@ -0,0 +1,28 @@ +import hashlib +import json + +import mat3ra.utils.hash as hash_utils + + +def test_calculate_hash_from_string_default_md5(): + message = "hello world" + expected = hashlib.md5(message.encode()).hexdigest() + assert hash_utils.calculate_hash_from_string(message) == expected + + +def test_calculate_hash_from_string_with_algorithm(): + message = "hello world" + expected = hashlib.sha1(message.encode()).hexdigest() + assert hash_utils.calculate_hash_from_string(message, "sha1") == expected + + +def test_calculate_hash_from_object_sorts_keys(): + obj = { + "b": 1, + "a": {"d": 4, "c": 3}, + "list": [{"y": 2, "x": 1}, 3], + } + sorted_obj = {"a": {"c": 3, "d": 4}, "b": 1, "list": [{"x": 1, "y": 2}, 3]} + expected_message = json.dumps(sorted_obj, separators=(",", ":"), ensure_ascii=False) + expected = hashlib.md5(expected_message.encode()).hexdigest() + assert hash_utils.calculate_hash_from_object(obj) == expected From afdff0d9df2cbae9e81d4dff2b4c1ebc7c7adb0c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 21 Feb 2026 03:58:23 +0000 Subject: [PATCH 3/8] chore: refine hash helper typing Co-authored-by: timurbazhirov <721112+timurbazhirov@users.noreply.github.com> --- src/py/mat3ra/utils/hash.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py/mat3ra/utils/hash.py b/src/py/mat3ra/utils/hash.py index a850516..6e854d9 100644 --- a/src/py/mat3ra/utils/hash.py +++ b/src/py/mat3ra/utils/hash.py @@ -1,6 +1,6 @@ import hashlib import json -from typing import Any +from typing import Any, Optional def _sort_keys_deep(obj: Any) -> Any: @@ -11,7 +11,7 @@ def _sort_keys_deep(obj: Any) -> Any: return obj -def _get_hasher(hash_function: str): +def _get_hasher(hash_function: Optional[str]) -> Any: try: return getattr(hashlib, (hash_function or "md5").lower())() except (AttributeError, TypeError, ValueError): From 6ff1b19014b404abbb337fade5f8a1f2608c0911 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 21 Feb 2026 03:59:17 +0000 Subject: [PATCH 4/8] test: expand hash coverage Co-authored-by: timurbazhirov <721112+timurbazhirov@users.noreply.github.com> --- src/py/mat3ra/utils/hash.py | 3 +++ tests/py/unit/test_hash.py | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/py/mat3ra/utils/hash.py b/src/py/mat3ra/utils/hash.py index 6e854d9..0762322 100644 --- a/src/py/mat3ra/utils/hash.py +++ b/src/py/mat3ra/utils/hash.py @@ -21,6 +21,9 @@ def _get_hasher(hash_function: Optional[str]) -> Any: def calculate_hash_from_string(message: str, hash_function: str = "MD5") -> str: """ Calculates hash of a given text. + + Defaults to MD5 for parity with the JS utilities and falls back to MD5 if the + requested algorithm is unavailable. Not intended for security-sensitive use. """ hasher = _get_hasher(hash_function) hasher.update(message.encode()) diff --git a/tests/py/unit/test_hash.py b/tests/py/unit/test_hash.py index a5e6b1d..5c498f6 100644 --- a/tests/py/unit/test_hash.py +++ b/tests/py/unit/test_hash.py @@ -26,3 +26,16 @@ def test_calculate_hash_from_object_sorts_keys(): expected_message = json.dumps(sorted_obj, separators=(",", ":"), ensure_ascii=False) expected = hashlib.md5(expected_message.encode()).hexdigest() assert hash_utils.calculate_hash_from_object(obj) == expected + + +def test_calculate_hash_from_object_with_algorithm(): + obj = {"b": 1, "a": 2} + expected_message = json.dumps({"a": 2, "b": 1}, separators=(",", ":"), ensure_ascii=False) + expected = hashlib.sha256(expected_message.encode()).hexdigest() + assert hash_utils.calculate_hash_from_object(obj, "sha256") == expected + + +def test_calculate_hash_from_string_invalid_algorithm_falls_back_to_md5(): + message = "fallback" + expected = hashlib.md5(message.encode()).hexdigest() + assert hash_utils.calculate_hash_from_string(message, "unknown") == expected From bc4777993a719d0213105d27205c766fb10def64 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 21 Feb 2026 03:59:53 +0000 Subject: [PATCH 5/8] docs: clarify hash helper behavior Co-authored-by: timurbazhirov <721112+timurbazhirov@users.noreply.github.com> --- src/py/mat3ra/utils/hash.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/py/mat3ra/utils/hash.py b/src/py/mat3ra/utils/hash.py index 0762322..9683f93 100644 --- a/src/py/mat3ra/utils/hash.py +++ b/src/py/mat3ra/utils/hash.py @@ -22,7 +22,7 @@ def calculate_hash_from_string(message: str, hash_function: str = "MD5") -> str: """ Calculates hash of a given text. - Defaults to MD5 for parity with the JS utilities and falls back to MD5 if the + Defaults to MD5 for parity with the legacy JS utilities and falls back to MD5 if the requested algorithm is unavailable. Not intended for security-sensitive use. """ hasher = _get_hasher(hash_function) @@ -33,6 +33,8 @@ def calculate_hash_from_string(message: str, hash_function: str = "MD5") -> str: def calculate_hash_from_object(obj: Any, hash_function: str = "MD5") -> str: """ Calculates hash of a given object. It must be serializable. + + Keys are sorted recursively before hashing to ensure deterministic output. """ sorted_obj = _sort_keys_deep(obj) message = json.dumps(sorted_obj, separators=(",", ":"), ensure_ascii=False) From 65bf74aac07a4cf23b2db370f2ca14ddb3dc5e3c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 21 Feb 2026 04:00:35 +0000 Subject: [PATCH 6/8] test: cover hash fallbacks Co-authored-by: timurbazhirov <721112+timurbazhirov@users.noreply.github.com> --- src/py/mat3ra/utils/hash.py | 3 ++- tests/py/unit/test_hash.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/py/mat3ra/utils/hash.py b/src/py/mat3ra/utils/hash.py index 9683f93..67361d5 100644 --- a/src/py/mat3ra/utils/hash.py +++ b/src/py/mat3ra/utils/hash.py @@ -23,7 +23,8 @@ def calculate_hash_from_string(message: str, hash_function: str = "MD5") -> str: Calculates hash of a given text. Defaults to MD5 for parity with the legacy JS utilities and falls back to MD5 if the - requested algorithm is unavailable. Not intended for security-sensitive use. + requested algorithm is unavailable. Not intended for security-sensitive use or any + scenario where collision resistance matters. """ hasher = _get_hasher(hash_function) hasher.update(message.encode()) diff --git a/tests/py/unit/test_hash.py b/tests/py/unit/test_hash.py index 5c498f6..bdf9284 100644 --- a/tests/py/unit/test_hash.py +++ b/tests/py/unit/test_hash.py @@ -39,3 +39,10 @@ def test_calculate_hash_from_string_invalid_algorithm_falls_back_to_md5(): message = "fallback" expected = hashlib.md5(message.encode()).hexdigest() assert hash_utils.calculate_hash_from_string(message, "unknown") == expected + + +def test_calculate_hash_from_object_invalid_algorithm_falls_back_to_md5(): + obj = {"b": 1, "a": 2} + expected_message = json.dumps({"a": 2, "b": 1}, separators=(",", ":"), ensure_ascii=False) + expected = hashlib.md5(expected_message.encode()).hexdigest() + assert hash_utils.calculate_hash_from_object(obj, "unknown") == expected From af640510f99c7eb090a8cb435c46f455fc7811c1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 21 Feb 2026 04:01:18 +0000 Subject: [PATCH 7/8] chore: document hash defaults Co-authored-by: timurbazhirov <721112+timurbazhirov@users.noreply.github.com> --- src/py/mat3ra/utils/hash.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/py/mat3ra/utils/hash.py b/src/py/mat3ra/utils/hash.py index 67361d5..5ee9665 100644 --- a/src/py/mat3ra/utils/hash.py +++ b/src/py/mat3ra/utils/hash.py @@ -18,24 +18,38 @@ def _get_hasher(hash_function: Optional[str]) -> Any: return hashlib.md5() -def calculate_hash_from_string(message: str, hash_function: str = "MD5") -> str: +def calculate_hash_from_string(message: str, hash_function: str = "md5") -> str: """ Calculates hash of a given text. Defaults to MD5 for parity with the legacy JS utilities and falls back to MD5 if the requested algorithm is unavailable. Not intended for security-sensitive use or any scenario where collision resistance matters. + + Args: + message: Input text to hash. + hash_function: Hash function name. Falls back to MD5 if unavailable. + + Returns: + str: Hex digest of the hashed message. """ hasher = _get_hasher(hash_function) hasher.update(message.encode()) return hasher.hexdigest() -def calculate_hash_from_object(obj: Any, hash_function: str = "MD5") -> str: +def calculate_hash_from_object(obj: Any, hash_function: str = "md5") -> str: """ Calculates hash of a given object. It must be serializable. Keys are sorted recursively before hashing to ensure deterministic output. + + Args: + obj: Serializable object to hash. + hash_function: Hash function name. Falls back to MD5 if unavailable. + + Returns: + str: Hex digest of the hashed object representation. """ sorted_obj = _sort_keys_deep(obj) message = json.dumps(sorted_obj, separators=(",", ":"), ensure_ascii=False) From ae0860dda72116829b82d96204dcc9f97fbc9134 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 21 Feb 2026 04:02:02 +0000 Subject: [PATCH 8/8] chore: warn on invalid hash algorithms Co-authored-by: timurbazhirov <721112+timurbazhirov@users.noreply.github.com> --- src/py/mat3ra/utils/hash.py | 6 +++++- tests/py/unit/test_hash.py | 8 ++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/py/mat3ra/utils/hash.py b/src/py/mat3ra/utils/hash.py index 5ee9665..d2e6429 100644 --- a/src/py/mat3ra/utils/hash.py +++ b/src/py/mat3ra/utils/hash.py @@ -1,5 +1,6 @@ import hashlib import json +import warnings from typing import Any, Optional @@ -12,9 +13,12 @@ def _sort_keys_deep(obj: Any) -> Any: def _get_hasher(hash_function: Optional[str]) -> Any: + algorithm = (hash_function or "md5").lower() try: - return getattr(hashlib, (hash_function or "md5").lower())() + return getattr(hashlib, algorithm)() except (AttributeError, TypeError, ValueError): + if algorithm != "md5": + warnings.warn(f"Hash function '{hash_function}' unavailable. Falling back to MD5.", RuntimeWarning) return hashlib.md5() diff --git a/tests/py/unit/test_hash.py b/tests/py/unit/test_hash.py index bdf9284..616d34d 100644 --- a/tests/py/unit/test_hash.py +++ b/tests/py/unit/test_hash.py @@ -1,6 +1,8 @@ import hashlib import json +import pytest + import mat3ra.utils.hash as hash_utils @@ -38,11 +40,13 @@ def test_calculate_hash_from_object_with_algorithm(): def test_calculate_hash_from_string_invalid_algorithm_falls_back_to_md5(): message = "fallback" expected = hashlib.md5(message.encode()).hexdigest() - assert hash_utils.calculate_hash_from_string(message, "unknown") == expected + with pytest.warns(RuntimeWarning): + assert hash_utils.calculate_hash_from_string(message, "unknown") == expected def test_calculate_hash_from_object_invalid_algorithm_falls_back_to_md5(): obj = {"b": 1, "a": 2} expected_message = json.dumps({"a": 2, "b": 1}, separators=(",", ":"), ensure_ascii=False) expected = hashlib.md5(expected_message.encode()).hexdigest() - assert hash_utils.calculate_hash_from_object(obj, "unknown") == expected + with pytest.warns(RuntimeWarning): + assert hash_utils.calculate_hash_from_object(obj, "unknown") == expected