From 4ef2c34e3910d24c0c1d518f0398526e5b6e9129 Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 10:59:06 +0100 Subject: [PATCH 01/21] Get engine coverage up to 100% --- tests/unit/engine/test_engine.py | 121 +++++++++++++++++++++++ tests/unit/engine/test_engine_ext.py | 140 +++++++++++++++++++++++++++ 2 files changed, 261 insertions(+) create mode 100644 tests/unit/engine/test_engine_ext.py diff --git a/tests/unit/engine/test_engine.py b/tests/unit/engine/test_engine.py index fc51e3f..3195c21 100644 --- a/tests/unit/engine/test_engine.py +++ b/tests/unit/engine/test_engine.py @@ -248,6 +248,127 @@ def test_pipeline_unknown_no_fallback(monkeypatch, mock_detectors, tmp_path): assert result["iocs"] == {} +# ------------------------------------------------------------ +# Archive pipeline +# ------------------------------------------------------------ + +def test_extract_from_file_archive_branch(monkeypatch): + engine = Engine() + engine.config.enable_magic = True + + # Force detect_file_type() to return ZIP + monkeypatch.setitem( + engine.extract_from_file.__globals__, + "detect_file_type", + lambda path: FileType.ZIP, + ) + + # Stub _pipeline_archive so we can detect it was called + monkeypatch.setattr( + engine, + "_pipeline_archive", + lambda path: {"file": path, "type": "archive", "iocs": {}, "metadata": {}}, + ) + + result = engine.extract_from_file("dummy.zip") + + # Ensure the archive pipeline was used + assert result["type"] == "archive" + assert result["file"] == "dummy.zip" + + +def test_pipeline_archive_basic(): + + engine = Engine() + + # Stub out internal detector pipeline + engine._run_detectors = lambda path, _: { + "iocs": {"url": ["http://example.com"]} + } + + # Stub post-processing + engine._post_process = lambda raw: raw["iocs"] + + result = engine._pipeline_archive("archive.zip") + + # Structure checks + assert result["file"] == "archive.zip" + assert result["type"] == "archive" + assert result["metadata"] == {} + + # IOC propagation check + assert "url" in result["iocs"] + assert result["iocs"]["url"] == ["http://example.com"] + + +def test_pipeline_pe_full_analysis(monkeypatch): + # --- Patch module-level helpers used inside _pipeline_pe --- + from dataclasses import dataclass + + engine = Engine() + engine._analysis_level = "full" # force all branches + + # Fake section analysis + def fake_sections(pe): + return [{"name": ".text", "entropy": 6.5}] + monkeypatch.setitem(engine._pipeline_pe.__globals__, "analyse_pe_sections", fake_sections) + + # Fake obfuscation analysis (must return dataclass instances!) + @dataclass + class FakeObf: + kind: str = "xor" + score: float = 0.9 + + def fake_obf(sections, text): + return [FakeObf()] + monkeypatch.setitem(engine._pipeline_pe.__globals__, "analyse_obfuscation", fake_obf) + + # Fake extended analysis + def fake_extended(pe, meta, text): + return {"extended_key": 123} + monkeypatch.setitem(engine._pipeline_pe.__globals__, "analyse_extended", fake_extended) + + # --- Patch Engine internal methods --- + + engine._get_pe_metadata = lambda path: ( + {"pe": True}, + {"resource_strings": ["resA"], "meta": "x"}, + ) + + engine._get_strings = lambda path: ["s1", "s2"] + + engine._run_detectors = lambda path, text: { + "iocs": {"url": ["http://example.com"]} + } + + engine._post_process = lambda raw: raw["iocs"] + + # --- Execute --- + result = engine._pipeline_pe("dummy.exe") + + # --- Assertions --- + + assert result["file"] == "dummy.exe" + assert result["type"] == "PE" + assert result["metadata"]["meta"] == "x" + + # IOC propagation + assert result["iocs"]["url"] == ["http://example.com"] + + # Analysis block must exist + assert "analysis" in result + analysis = result["analysis"] + + # Sections + assert analysis["sections"][0]["name"] == ".text" + + # Obfuscation (now dataclass → asdict works) + assert analysis["obfuscation"][0]["kind"] == "xor" + + # Extended + assert analysis["extended"] == {"extended_key": 123} + + # ------------------------------------------------------------ # Caching behaviour # ------------------------------------------------------------ diff --git a/tests/unit/engine/test_engine_ext.py b/tests/unit/engine/test_engine_ext.py new file mode 100644 index 0000000..bcb8e96 --- /dev/null +++ b/tests/unit/engine/test_engine_ext.py @@ -0,0 +1,140 @@ +import pytest +from iocx.engine import Engine +from iocx.models import Detection + + +@pytest.fixture +def engine(): + return Engine() + +@pytest.fixture +def engine_tuple(): + e = Engine() + # Replace plugin registry with our controlled test plugin + e._plugin_registry.detectors = [TupleDetector()] + return e + +@pytest.fixture +def engine_enricher(): + e = Engine() + # Replace enrichers with our failing plugin + e._plugin_registry.enrichers = [FailingEnricher()] + return e + +@pytest.fixture +def engine_malformed(): + e = Engine() + e._plugin_registry.detectors = [MalformedDetector()] + return e + + +class TupleDetector: + """Detector that returns 4‑tuple IOC items.""" + class metadata: + id = "tuple-detector" + + def detect(self, text, ctx): + # category → list of 4‑tuples + return { + "urls": [ + ("http://example.com", 0, 18, "urls") + ] + } + +class FailingEnricher: + """Enricher that always raises to hit the exception branch.""" + class metadata: + id = "failing-enricher" + + def enrich(self, text, ctx): + raise RuntimeError("boom") + +class BadDetector: + """Detector that returns malformed items to hit line 241.""" + def __call__(self, text): + return [ + 123, # malformed → triggers the continue branch (line 241) + ("ok", 0, 2, "test"), # valid → ensures loop continues + ] + +def test_analyze_file_creates_detections_and_manages_depth_stack(engine): + # Start with a known depth stack state + engine.depth_stack = [0] + + # Stub out internal methods to avoid real file parsing + engine._build_plugin_context = lambda path, _: {} + engine.extract_from_file = lambda path: { + "iocs": { + "url": ["http://example.com"], + "ip": ["1.2.3.4"], + } + } + + detections = engine.analyze_file("dummy.bin") + + # depth_stack should return to original state after recursion + assert engine.depth_stack == [0] + + # Should produce two Detection objects + assert len(detections) == 2 + + # Validate categories and values + categories = {d.category for d in detections} + values = {d.value for d in detections} + + assert categories == {"url", "ip"} + assert values == {"http://example.com", "1.2.3.4"} + + # start/end should be zero + for d in detections: + assert d.start == 0 + assert d.end == 0 + + +def test_tuple_detector_branch(engine_tuple): + result = engine_tuple._run_detectors("dummy", "http://example.com") + + # Ensure category exists + assert "urls" in result + + items = result["urls"] + assert len(items) == 1 + + det = items[0] + assert isinstance(det, Detection) + + # Ensure the tuple was converted correctly + assert det.value == "http://example.com" + assert det.start == 0 + assert det.end == 18 + assert det.category == "urls" + + +def test_enricher_exception_branch(engine_enricher, caplog): + # Run any method that triggers the enricher pipeline + # _run_enrichers is internal, but extract() calls it + engine_enricher.extract("dummy text") + + # The exception branch logs a warning — assert it happened + messages = " ".join(record.message for record in caplog.records) + assert "failing-enricher" in messages + assert "failed" in messages + + +def test_all_detectors_malformed_item_hits_continue(engine, monkeypatch): + # Patch all_detectors() to return our bad detector + monkeypatch.setitem( + engine._run_detectors.__globals__, + "all_detectors", + lambda: {"bad": BadDetector()}, + ) + + result = engine._run_detectors("dummy", "text") + + # Only the valid tuple should survive + assert "bad" in result + items = result["bad"] + + assert len(items) == 1 + assert isinstance(items[0], Detection) + assert items[0].value == "ok" From 71a80d92f759b96a7a0231edb8765bb627e91635 Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 11:38:26 +0100 Subject: [PATCH 02/21] Pe_parser now at 100% coverage --- tests/unit/parsers/test_pe_parser.py | 122 ++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/tests/unit/parsers/test_pe_parser.py b/tests/unit/parsers/test_pe_parser.py index 9e2de89..0aa8095 100644 --- a/tests/unit/parsers/test_pe_parser.py +++ b/tests/unit/parsers/test_pe_parser.py @@ -1,7 +1,7 @@ import pytest from types import SimpleNamespace -from iocx.parsers.pe_parser import parse_pe, _walk_resources +from iocx.parsers.pe_parser import parse_pe, _walk_resources, analyse_pe_sections from iocx.parsers.string_extractor import extract_strings_from_bytes @@ -193,6 +193,18 @@ class FakeDir: assert metadata["resource_strings"] == [] +def test_parse_pe_handles_peformaterror(monkeypatch): + import pefile + # Override the autouse patch for this test only + def raise_peformaterror(path, fast_load=True): + raise pefile.PEFormatError("bad file") + + monkeypatch.setattr(pefile, "PE", raise_peformaterror) + + result = parse_pe("not_a_real_pe.exe") + + assert result == {} + # ------------------------------------------------------------ # Direct tests for _walk_resources() # ------------------------------------------------------------ @@ -216,3 +228,111 @@ def size(self): strings = [] _walk_resources(pe, a, strings) assert strings == [] + + +def test_walk_resources_directory_branch(monkeypatch): + # Fake directory structure: root → child (no cycle) + class ChildDir: + entries = [] # no further entries + + class EntryWithDirectory: + directory = ChildDir() + + class RootDir: + entries = [EntryWithDirectory()] + + # Fake __data__ with .size attribute + class FakeData(bytes): + @property + def size(self): + return len(self) + + pe = SimpleNamespace( + __data__=FakeData(b"\x00" * 1000), + get_data=lambda *a, **k: b"" # won't be used + ) + + strings = [] + _walk_resources(pe, RootDir(), strings) + + # No strings expected, but the directory branch was executed + assert strings == [] + + +def test_walk_resources_recursion_guard(): + # Create two directory objects + class Dir: + def __init__(self): + self.entries = [] + + A = Dir() + B = Dir() + + # Entry objects with .directory attributes + class Entry: + def __init__(self, directory): + self.directory = directory + + # Create a cycle: A → B → A + A.entries = [Entry(B)] + B.entries = [Entry(A)] + + # Fake __data__ with .size attribute + class FakeData(bytes): + @property + def size(self): + return len(self) + + pe = SimpleNamespace( + __data__=FakeData(b"\x00" * 1000), + get_data=lambda *a, **k: b"" + ) + + strings = [] + _walk_resources(pe, A, strings) + + # No strings expected, but recursion guard was hit + assert strings == [] + + +# ------------------------------------------------------------ +# Analyse PE sections +# ------------------------------------------------------------ + +class FakeSection: + def __init__(self): + self.Name = b".text\x00\x00\x00" + self.SizeOfRawData = 100 + self.Misc_VirtualSize = 80 + self.Characteristics = 0x60000020 + self._data = b"\x00" * 50 + + def get_data(self): + return self._data + + +class FakePE: + def __init__(self): + self.sections = [FakeSection()] + + +def test_analyse_pe_sections_basic(): + pe = FakePE() + + results = analyse_pe_sections(pe) + + assert len(results) == 1 + sec = results[0] + + # Name should be decoded and stripped of nulls + assert sec["name"] == ".text" + + # Raw + virtual sizes + assert sec["raw_size"] == 100 + assert sec["virtual_size"] == 80 + + # Characteristics preserved + assert sec["characteristics"] == 0x60000020 + + # Entropy should be a float + assert isinstance(sec["entropy"], float) From f27522093377bd85f8b1a879135bcbab28345dfc Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 11:50:55 +0100 Subject: [PATCH 03/21] Obfuscation code now at 100% coverage --- tests/unit/analysis/test_obfuscation_ext.py | 61 ++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/tests/unit/analysis/test_obfuscation_ext.py b/tests/unit/analysis/test_obfuscation_ext.py index 69a7e1a..110eab4 100644 --- a/tests/unit/analysis/test_obfuscation_ext.py +++ b/tests/unit/analysis/test_obfuscation_ext.py @@ -1,5 +1,5 @@ import pytest -from iocx.analysis.obfuscation import analyse_obfuscation +from iocx.analysis.obfuscation import analyse_obfuscation, _detect_high_entropy_sections, _looks_like_rot13, _non_printable_ratio, _detect_string_obfuscation def make_sections(): return [ @@ -84,3 +84,62 @@ def test_full_trigger_suite(): assert "abnormal_section_overlap" in values assert "suspicious_hex_blob_string" in values assert "rot_encoded_string" in values + + +def test_detect_high_entropy_sections_skips_none_data(): + sections = [ + {"name": ".text", "data": None}, + {"name": ".rdata", "data": None}, + ] + + detections = _detect_high_entropy_sections(sections) + + assert detections == [] + + +def test_detect_high_entropy_sections_low_entropy(): + sections = [ + {"name": ".text", "data": b"\x00" * 100}, + ] + + detections = _detect_high_entropy_sections(sections) + + assert detections == [] + + +def test_detect_high_entropy_sections_high_entropy(): + # High entropy: random bytes + data = bytes(range(256)) # 0..255 → very high entropy + + sections = [ + {"name": ".packed", "data": data}, + ] + + detections = _detect_high_entropy_sections(sections) + + assert len(detections) == 1 + det = detections[0] + + assert det.category == "obfuscation_hint" + assert det.value == "high_entropy_section" + assert det.metadata["section"] == ".packed" + assert det.metadata["entropy"] >= det.metadata["threshold"] + + +def test_looks_like_rot13_too_short(): + # MIN_STRING_LENGTH is > 1, so "a" is guaranteed to be too short + assert _looks_like_rot13("a") is False + + +def test_non_printable_ratio_empty_string(): + assert _non_printable_ratio("") == 0.0 + + +def test_detect_string_obfuscation_skips_short_strings(): + # MIN_STRING_LENGTH is > 1, so "a" is guaranteed too short + strings = ["a", "validstring"] + + detections = _detect_string_obfuscation(strings) + + # We don't care about the result here — only that the short string was skipped + assert isinstance(detections, list) From abafb4f6e3bb41efe67cbd6f39f74ddf5f5d7f01 Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 11:54:02 +0100 Subject: [PATCH 04/21] Cover analyse extended --- tests/unit/analysis/test_obfuscation_ext.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/unit/analysis/test_obfuscation_ext.py b/tests/unit/analysis/test_obfuscation_ext.py index 110eab4..13179b6 100644 --- a/tests/unit/analysis/test_obfuscation_ext.py +++ b/tests/unit/analysis/test_obfuscation_ext.py @@ -1,5 +1,6 @@ import pytest from iocx.analysis.obfuscation import analyse_obfuscation, _detect_high_entropy_sections, _looks_like_rot13, _non_printable_ratio, _detect_string_obfuscation +from iocx.analysis.extended import analyse_extended def make_sections(): return [ @@ -143,3 +144,21 @@ def test_detect_string_obfuscation_skips_short_strings(): # We don't care about the result here — only that the short string was skipped assert isinstance(detections, list) + + +def test_analyse_extended_returns_expected_structure(): + result = analyse_extended(pe=None, metadata={}, strings=[]) + + assert isinstance(result, dict) + assert "note" in result + assert "planned_features" in result + + assert result["note"].startswith("Extended analysis is reserved") + assert result["planned_features"] == [ + "packer_detection", + "tls_callbacks", + "anti_debug_heuristics", + "import_anomaly_scoring", + "signature_anomalies", + "control_flow_hints", + ] From 4f86a5b198162edaa44bb623e088d1403b39137b Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 12:02:34 +0100 Subject: [PATCH 05/21] Utils lines covered entirely: addition of archive format tests and MZ magic bytes --- tests/unit/utils/test_utils.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/unit/utils/test_utils.py b/tests/unit/utils/test_utils.py index 7da04e0..29b28b1 100644 --- a/tests/unit/utils/test_utils.py +++ b/tests/unit/utils/test_utils.py @@ -24,6 +24,24 @@ def test_detect_file_type_exception_returns_unknown(patch_magic): assert detect_file_type("x") == FileType.UNKNOWN +def test_filetype_fallback_mz(tmp_path): + p = tmp_path / "mz.bin" + p.write_bytes(b"MZ" + b"\x00\xff\x10\x80") + + result = detect_file_type(str(p)) + + assert result == FileType.PE + + +def test_filetype_fallback_open_exception(tmp_path): + # Passing a directory triggers an exception on open() + result = detect_file_type(str(tmp_path)) + + # The fallback block swallows the exception and continues, + # so assert whatever the function returns after the fallback. + assert result != FileType.PE + + def test_detect_file_type_text_plain(patch_magic): patch_magic(return_value="text/plain") assert detect_file_type("x") == FileType.TEXT @@ -57,3 +75,18 @@ def test_detect_file_type_macho(patch_magic): def test_detect_file_type_unknown_mime(patch_magic): patch_magic(return_value="something/weird") assert detect_file_type("x") == FileType.UNKNOWN + + +def test_detect_file_type_zip(patch_magic): + patch_magic(return_value="application/x-zip-compressed") + assert detect_file_type("x") == FileType.ZIP + + +def test_detect_file_type_tar(patch_magic): + patch_magic(return_value="application/x-gtar") + assert detect_file_type("x") == FileType.TAR + + +def test_detect_file_type_7zip(patch_magic): + patch_magic(return_value="application/x-7z") + assert detect_file_type("x") == FileType.SEVEN_Z From bd91d25a5847e6e2a956e402d9cd8e53b2999a42 Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 12:08:34 +0100 Subject: [PATCH 06/21] Increase plugin registry coverage to 100% --- tests/unit/plugins/test_plugin_loader.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/unit/plugins/test_plugin_loader.py b/tests/unit/plugins/test_plugin_loader.py index 9678fc8..2bc7c4f 100644 --- a/tests/unit/plugins/test_plugin_loader.py +++ b/tests/unit/plugins/test_plugin_loader.py @@ -313,3 +313,21 @@ def test_load_local_plugins_returns_when_directory_missing(monkeypatch): assert loader.registry.detectors == [] assert loader.registry.enrichers == [] assert loader.registry.transformers == [] + + +def test_plugin_registry_register_no_metadata(): + from iocx.plugins.registry import PluginRegistry + registry = PluginRegistry() + + class PluginWithoutMetadata: + pass + + plugin = PluginWithoutMetadata() + + # Should hit: if caps is None: return + registry.register(plugin) + + # Nothing should be added + assert registry.detectors == [] + assert registry.enrichers == [] + assert registry.transformers == [] From 5ccb217357f25badc57b26661fefc2b7b26914a9 Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 12:29:12 +0100 Subject: [PATCH 07/21] Get urls init coverage up to 100% --- tests/unit/extractors/urls/test_urls_init.py | 30 ++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tests/unit/extractors/urls/test_urls_init.py diff --git a/tests/unit/extractors/urls/test_urls_init.py b/tests/unit/extractors/urls/test_urls_init.py new file mode 100644 index 0000000..d5a52cc --- /dev/null +++ b/tests/unit/extractors/urls/test_urls_init.py @@ -0,0 +1,30 @@ +import iocx.detectors.extractors.urls as urls_extract + + +def test_extract_strict_url_continue(monkeypatch): + # Patch the local imported names inside the extract module + monkeypatch.setattr(urls_extract, "extract_strict_urls", + lambda text: [("bad://url", 0, 10)]) + monkeypatch.setattr(urls_extract, "extract_bare_domains", + lambda text: []) + monkeypatch.setattr(urls_extract, "normalise_url", + lambda value: None) + + result = urls_extract.extract("anything") + + assert result["urls"] == [] + assert result["domains"] == [] + + +def test_extract_bare_domain_continue(monkeypatch): + monkeypatch.setattr(urls_extract, "extract_strict_urls", + lambda text: []) + monkeypatch.setattr(urls_extract, "extract_bare_domains", + lambda text: [("example.com", 5, 17)]) + monkeypatch.setattr(urls_extract, "normalise_url", + lambda value: None) + + result = urls_extract.extract("anything") + + assert result["urls"] == [] + assert result["domains"] == [] From 80e46ddf9e76bff0fcb3dc62022e51c69e22e984 Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 12:32:51 +0100 Subject: [PATCH 08/21] Project coverage now at 100% --- .coveragerc | 1 + 1 file changed, 1 insertion(+) diff --git a/.coveragerc b/.coveragerc index ea4a831..dce21fe 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,6 +1,7 @@ [run] omit = */.iocx/* + */iocx-archive/* */site-packages/* */distutils/* */tests/* From ea2950e653c74dc22f5754af3db3e07144994916 Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 12:37:12 +0100 Subject: [PATCH 09/21] Update coverage and tests pass in README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9e72127..ca49718 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ PyPI Version - Coverage - Tests + Coverage + Tests Python Version License From 1fafd2b36066f55b6a84db023778afca4c4c178c Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 15:45:12 +0100 Subject: [PATCH 10/21] Update CLI markdown to refer to the new analyse capability --- iocx/cli/CLI.md | 91 ++++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/iocx/cli/CLI.md b/iocx/cli/CLI.md index d4711ce..180c319 100644 --- a/iocx/cli/CLI.md +++ b/iocx/cli/CLI.md @@ -1,42 +1,38 @@ # iocx Command‑Line Interface (CLI) -The IOCX CLI provides a simple, fast way to extract Indicators of Compromise (IOCs) from text, logs, and binaries. It wraps the same engine used by the Python API and exposes a clean set of options suitable for both interactive use and scripting. +The IOCX CLI provides a fast, script‑friendly interface for extracting Indicators of Compromise (IOCs) from text, logs, binaries, and PE files. It wraps the same engine used by the Python API and exposes a clean, minimal set of options suitable for both interactive use and automation. ## Installation -```bash +```bash pip install iocx - ``` After installation, the `iocx` command becomes available on your system. ## Basic Usage -Extract IOCs from a file or raw text: -```bash +Extract IOCs from raw text: +```bash iocx "Suspicious domain: evil.com" - ``` Extract from a file: -```bash - -iocx sample.bin +```bash +iocx sample.exe ``` Read from stdin: -```bash +```bash echo "visit http://malicious.com" | iocx - - ``` ## Command‑Line Options -The CLI supports a focused set of flags designed to keep the interface simple while still offering meaningful control. +The CLI is intentionally minimal: one command, one input, and a handful of focused flags. ### Input | Flag | Description | @@ -44,19 +40,20 @@ The CLI supports a focused set of flags designed to keep the interface simple wh | input | File path or raw text. Use `-` to read from stdin. | -### Output -| Flag | Description | -|--------------------|----------------------------------------------------| -| -o, --output FILE | Write JSON output to a file instead of stdout. | -| -c, --compact | Minify JSON output (indent=None). | -| -e, --enrich | Add detection enrichment data to the output | +### Output Options +| Flag | Description | +|------------------------------------------------|----------------------------------------------------| +| -o, --output FILE | Write JSON output to a file instead of stdout. | +| -c, --compact | Minify JSON output (no indentation). | +| -e, --enrich | Include enrichment metadata in the output | +| -a, --analyse/--analyze `[basic, deep, full]` | Enable PE analysis (default: deep) | ### Examples -```bash +```bash iocx sample.txt --compact iocx sample.txt -o results.json - +iocx sample.exe -a full ``` ## Engine Options @@ -65,31 +62,33 @@ iocx sample.txt -o results.json | --no-cache | Disable engine caching. Useful for debugging or repeated extraction tests. | -## Detector Options +## Detector, Transformer, and Enricher Options | Flag | Description | |---------------------|------------------------------------------------------------------------| -| --list-detectors | Print all available detectors and exit. | -| --list-transformers | Print all available transformers and exit. | -| --list-enrichers | Print all available enrichers and exit. | +| --list-detectors | List all available detectors and exit. | +| --list-transformers | List all available transformers and exit. | +| --list-enrichers | List all available enrichers and exit. | | -m, --min-length | Minimum printable string length for the string extractor (default: 4). | ### Example: -```bash +```bash iocx --list-detectors - ``` -## Misc +## Miscellaneous | Flag | Description | |------------------|------------------------------------------------------------------------------| | --version | Show the installed version of iocx. | -| -d, --dev | Intended for plugin developers. Loads plugins from the local environment | +| -d, --dev | Loads plugins from the local environment (for plugin developers) | ## Output Format -The CLI always emits JSON (Indent=2 by default). A typical output structure looks like: +IOCX always emits JSON. By default, output is pretty‑printed with indentation. + +### Example + ```json { "file": "example.txt", @@ -108,41 +107,36 @@ The CLI always emits JSON (Indent=2 by default). A typical output structure look ``` -Minified output is enabled with: -```bash +Minified output: +```bash iocx input.txt --compact - ``` -### Examples +### Additional Examples -Extract from a PE file -```bash +Extract from a PE file: +```bash iocx malware.exe - ``` -Extract from logs -```bash +Extract from logs: +```bash iocx logs.txt -o iocs.json - ``` -Pipe data in -```bash +Pipe data in: +```bash cat suspicious.log | iocx - --compact - ``` -List detectors -```bash +List detectors: +```bash iocx --list-detectors - ``` ## Exit Codes @@ -152,6 +146,7 @@ iocx --list-detectors | 1 | Invalid arguments or runtime error | ## iocx --help example + ```text usage: iocx [-h] [-o OUTPUT] [-c] [-e] [--no-cache] [--list-detectors] [--list-transformers] [--list-enrichers] [-m N] [--version] [-d] [input] @@ -168,6 +163,8 @@ Output: Write JSON output to a file instead of stdout. -c, --compact Output compact (minified) JSON. -e, --enrich Write enrichment data to the JSON output. + -a [{basic,deep,full}], --analyse [{basic,deep,full}], --analyze [{basic,deep,full}] + Enable PE analysis (basic, deep, full; default: deep). Engine Options: --no-cache Disable engine caching. @@ -189,6 +186,8 @@ The CLI is intentionally minimal: - One command (iocx) - One required argument (input) -- A handful of intuitive flags +- A small, intuitive set of flags - No subcommands - No unnecessary complexity + +The goal is to provide a fast, predictable, script‑friendly interface that mirrors the Python API without exposing internal complexity. From cb643adf252bb0479897ff50f6a3022f61d07109 Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 16:00:52 +0100 Subject: [PATCH 11/21] Improve the SECURITY.md copy: added threat model section with explicit scope --- SECURITY.md | 60 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 34e7bdb..9860ec6 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -3,26 +3,26 @@ Thank you for your interest in the security of IOCX. We take security seriously and aim to provide a trustworthy, minimal‑dependency tool for static IOC extraction across binaries, text, and logs. -This document explains how we handle security, how to report vulnerabilities, and what you can expect from us. +This document describes our security posture, how we handle vulnerabilities, and how to report issues responsibly. ## Supported Versions -We currently support and maintain the latest release of this project. +We currently support and maintain only the latest released version of IOCX. -| Version | Supported | +| Version | Status | |----------------|------------------| -| Latest release | Active | -| Older versions | Not supported | +| Latest release | Supported | +| Older versions | Unsupported | -Security fixes are applied only to the most recent version. +Security fixes are applied exclusively to the most recent version. ## Security Posture -The project is designed with security and simplicity in mind. +IOCX is designed with security and simplicity in mind. The tool processes untrusted input by design, so the architecture prioritises isolation, defensive parsing, and minimal attack surface. ### Minimal Runtime Dependencies -To reduce attack surface, the project intentionally uses only two runtime dependencies: +To reduce supply‑chain risk and minimise the attack surface, IOCX intentionally uses only two runtime dependencies: - pefile - PE parsing - python-magic - file‑type detection @@ -31,35 +31,52 @@ No additional libraries are required for core functionality. ### Automated Security Scanning -Every commit and pull request triggers automated checks: +All commits and pull requests undergo automated security checks: - pip‑audit — dependency vulnerability scanning - Bandit — static analysis of Python code - Pytest — full test suite execution -These checks run in CI to prevent regressions and catch issues early. +These checks run in CI to catch regressions early. ### Safe Handling of Untrusted Input -The tool is designed to process potentially malicious files. To reduce risk: +IOCX is designed to process potentially malicious files safely. To reduce risk: - No dynamic code execution - No deserialization of untrusted data - No network access - Strict parsing of binary formats - Defensive exception handling in extractors and parsers +- No mutation of input files ### No Elevated Privileges Required -The tool runs entirely in user space and does not require: +IOCX runs entirely in user space and does not require: - root/admin privileges - kernel extensions - system‑level hooks +This reduces the impact of potential vulnerabilities. + +## Threat Model (Scope & Limitations) + +IOCX is a static extraction tool, not a sandbox or malware analysis framework. + +The following are out of scope: + +- Detecting or preventing active exploitation +- Executing or emulating malware +- Analysing runtime behaviour +- Guaranteeing correctness of third‑party plugins +- Protecting against malicious Python environments or compromised dependencies + +Users should run IOCX in a controlled environment when analysing untrusted binaries. + ## Reporting a Vulnerability -If you discover a security issue, we appreciate responsible disclosure. +We appreciate responsible disclosure and welcome reports from the community. ### How to report @@ -67,16 +84,16 @@ Please email: security@malx.io Include: -- Description of the issue +- A clear description of the issue - Steps to reproduce - Potential impact -- Any suggested fixes +- Any suggested fixes or patches We aim to acknowledge reports within 72 hours. -### Please do not open public GitHub issues for security problems +### Do Not Open Public GitHub Issues -This helps protect users while we investigate and patch the issue. +Please avoid filing public issues for security problems. This protects users while we investigate and patch the issue. ## Vulnerability Disclosure Process @@ -85,14 +102,15 @@ This helps protect users while we investigate and patch the issue. 3. We develop and test a fix. 4. We release a patched version. 5. We publish a security advisory (if applicable). -6. We credit the reporter. +6. We credit the reporter (unless anonymity is requested). ## Responsible Disclosure -We ask that you: +We ask that reporters: -- Give us reasonable time to fix the issue before public disclosure +- Allow reasonable time for us to develop a fix - Avoid exploiting the vulnerability beyond what is necessary for proof‑of‑concept - Avoid accessing or modifying user data +- Refrain from public disclosure until a fix is released -We appreciate your help in keeping the project secure. +We appreciate your help in keeping IOCX secure. From d34d26c8580ff4a85dad20429bed7ccf0e6c7976 Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 16:06:50 +0100 Subject: [PATCH 12/21] Initial commit of the threat model and STRIDE diagrams --- docs/security/threat-model.md | 89 +++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 docs/security/threat-model.md diff --git a/docs/security/threat-model.md b/docs/security/threat-model.md new file mode 100644 index 0000000..83b1fc2 --- /dev/null +++ b/docs/security/threat-model.md @@ -0,0 +1,89 @@ +```mermaid +flowchart TD + + subgraph User Environment + U[User / Analyst] + CLI[IOCX CLI] + end + + subgraph IOCX Engine + DET[Detectors
Transformers
Enrichers] + PE[PE Parser (pefile)] + MAGIC[File Type Detection (python-magic)] + CACHE[Local Cache] + end + + subgraph Untrusted Input + F[Untrusted File
(binary, text, logs)] + end + + U --> CLI + CLI -->|Input path or stdin| F + F -->|Read & Parse| MAGIC + F -->|Binary parsing| PE + MAGIC --> DET + PE --> DET + DET --> CACHE + DET -->|Extracted IOCs| CLI + CLI -->|JSON Output| U + + %% Threat Boundaries + classDef boundary fill:#f0f0f0,stroke:#555,stroke-width:2px; + class Untrusted Input boundary; + class IOCX Engine boundary; + class User Environment boundary; + + %% Threat Indicators + F -. Potentially malicious content .-> DET + F -. Potentially malformed binaries .-> PE + +``` + +```mermaid +flowchart TD + + subgraph External Actors + A[Attacker] + U[User] + end + + subgraph IOCX CLI + CLI[CLI Frontend] + end + + subgraph Engine + DET[Detectors / Transformers] + PE[PE Parser] + MAGIC[File Type Detection] + CACHE[Local Cache] + end + + subgraph Data + F[Untrusted Input File] + O[JSON Output] + end + + %% Data Flows + U --> CLI + CLI --> F + F --> MAGIC + F --> PE + MAGIC --> DET + PE --> DET + DET --> CACHE + DET --> O + CLI --> O + O --> U + + %% Threats + A -. Supplies malformed binaries .-> F + A -. Attempts parser abuse .-> PE + A -. Attempts type confusion .-> MAGIC + A -. Attempts detector bypass .-> DET + + %% Boundaries + classDef boundary fill:#f0f0f0,stroke:#333,stroke-width:2px; + class IOCX CLI boundary; + class Engine boundary; + class Data boundary; +``` From 71c29c2b8c19bea0dd0389487b36827e880e1081 Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 16:10:24 +0100 Subject: [PATCH 13/21] Fix mermaid rendering issues --- docs/security/threat-model.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/security/threat-model.md b/docs/security/threat-model.md index 83b1fc2..d41a4e2 100644 --- a/docs/security/threat-model.md +++ b/docs/security/threat-model.md @@ -2,19 +2,19 @@ flowchart TD subgraph User Environment - U[User / Analyst] + U[User or Analyst] CLI[IOCX CLI] end subgraph IOCX Engine - DET[Detectors
Transformers
Enrichers] - PE[PE Parser (pefile)] - MAGIC[File Type Detection (python-magic)] + DET[Detectors, Transformers, Enrichers] + PE[PE Parser] + MAGIC[File Type Detection] CACHE[Local Cache] end subgraph Untrusted Input - F[Untrusted File
(binary, text, logs)] + F[Untrusted File] end U --> CLI @@ -52,7 +52,7 @@ flowchart TD end subgraph Engine - DET[Detectors / Transformers] + DET[Detectors, Transformers] PE[PE Parser] MAGIC[File Type Detection] CACHE[Local Cache] From 6f0e62ac1edb99af47bcdf2df1672d774eb593da Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 16:14:04 +0100 Subject: [PATCH 14/21] Fix mermaid rendering issues #2 --- docs/security/threat-model.md | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/docs/security/threat-model.md b/docs/security/threat-model.md index d41a4e2..9cab096 100644 --- a/docs/security/threat-model.md +++ b/docs/security/threat-model.md @@ -27,12 +27,6 @@ flowchart TD DET -->|Extracted IOCs| CLI CLI -->|JSON Output| U - %% Threat Boundaries - classDef boundary fill:#f0f0f0,stroke:#555,stroke-width:2px; - class Untrusted Input boundary; - class IOCX Engine boundary; - class User Environment boundary; - %% Threat Indicators F -. Potentially malicious content .-> DET F -. Potentially malformed binaries .-> PE @@ -80,10 +74,4 @@ flowchart TD A -. Attempts parser abuse .-> PE A -. Attempts type confusion .-> MAGIC A -. Attempts detector bypass .-> DET - - %% Boundaries - classDef boundary fill:#f0f0f0,stroke:#333,stroke-width:2px; - class IOCX CLI boundary; - class Engine boundary; - class Data boundary; ``` From 56b4d4f3a03c564e0eb8b58c8f4459e74ff01646 Mon Sep 17 00:00:00 2001 From: malx-labs Date: Thu, 9 Apr 2026 16:21:08 +0100 Subject: [PATCH 15/21] Add supporting copy to DFD and stride threat models --- docs/security/threat-model.md | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/docs/security/threat-model.md b/docs/security/threat-model.md index 9cab096..7be466b 100644 --- a/docs/security/threat-model.md +++ b/docs/security/threat-model.md @@ -1,3 +1,19 @@ +# Threat Model Overview + +The following diagrams illustrate the IOCX security model, focusing on how untrusted data flows through the system and where potential threats may arise. IOCX is designed to process hostile input safely, so understanding these boundaries helps clarify the project’s defensive posture. + +IOCX operates as a static extraction tool: it does not execute binaries, load external code, or perform dynamic analysis. The attack surface is intentionally small, with strict parsing, minimal dependencies, and no network access. + +## Data-Flow Diagram (DFD) + +This diagram shows the major components involved when IOCX processes untrusted input. It highlights the trust boundaries between the user environment, the IOCX engine, and the untrusted data being analysed. + +- **User Environment** represents the analyst invoking the CLI. +- **Untrusted Input** includes any file provided to IOCX—text, logs, binaries, or potentially malicious samples. +- **IOCX Engine** contains the detectors, parsers, and supporting components that operate on the input. + +Threat indicators show where malformed or malicious content may attempt to influence the system. + ```mermaid flowchart TD @@ -33,6 +49,19 @@ flowchart TD ``` +## STRIDE‑Oriented Threat Interaction Diagram + +This diagram expands the model to include an explicit attacker. It illustrates how an adversary might attempt to exploit the system by supplying malformed binaries, abusing parsers, or attempting to bypass detectors. + +The diagram also shows the flow of data through the CLI, engine, and output stages, making it clear where IOCX must remain defensive. + +Key points: + +- Attackers interact **only** through untrusted input files. +- IOCX performs **no dynamic execution**, reducing risk. +- All parsing is done in user space with strict error handling. +- Output is deterministic JSON with no side effects. + ```mermaid flowchart TD @@ -75,3 +104,15 @@ flowchart TD A -. Attempts type confusion .-> MAGIC A -. Attempts detector bypass .-> DET ``` + +## How These Diagrams Fit Into IOCX’s Security Posture + +These diagrams support the project’s security goals by: + +- Defining clear trust boundaries +- Identifying where untrusted data enters the system +- Highlighting components that must be hardened +- Demonstrating that IOCX avoids high‑risk behaviours (execution, deserialization, network access) +- Providing transparency for auditors, contributors, and users + +Together, they form the foundation of IOCX’s threat model and help guide secure development practices. From 238b99b43687560a88064ed3ad839a72e43e19cf Mon Sep 17 00:00:00 2001 From: malx-labs Date: Fri, 10 Apr 2026 10:29:26 +0100 Subject: [PATCH 16/21] Fix data flow direction error in STRIDE diagram --- docs/security/threat-model.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/security/threat-model.md b/docs/security/threat-model.md index 7be466b..578e783 100644 --- a/docs/security/threat-model.md +++ b/docs/security/threat-model.md @@ -65,12 +65,12 @@ Key points: ```mermaid flowchart TD - subgraph External Actors + subgraph External_Actors A[Attacker] U[User] end - subgraph IOCX CLI + subgraph IOCX_CLI CLI[CLI Frontend] end @@ -88,7 +88,7 @@ flowchart TD %% Data Flows U --> CLI - CLI --> F + F --> CLI F --> MAGIC F --> PE MAGIC --> DET From bde424c23ec7681d80e96358d7f54a712963aac8 Mon Sep 17 00:00:00 2001 From: malx-labs Date: Fri, 10 Apr 2026 10:33:43 +0100 Subject: [PATCH 17/21] Add missing enrichers from the engine STRIDE diagram --- docs/security/threat-model.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/security/threat-model.md b/docs/security/threat-model.md index 578e783..4c28901 100644 --- a/docs/security/threat-model.md +++ b/docs/security/threat-model.md @@ -65,17 +65,17 @@ Key points: ```mermaid flowchart TD - subgraph External_Actors + subgraph External Actors A[Attacker] U[User] end - subgraph IOCX_CLI + subgraph IOCX CLI CLI[CLI Frontend] end subgraph Engine - DET[Detectors, Transformers] + DET[Detectors, Transformers, Enrichers] PE[PE Parser] MAGIC[File Type Detection] CACHE[Local Cache] From 35418ed3025d59f1c135103977024da092244f9b Mon Sep 17 00:00:00 2001 From: malx-labs Date: Fri, 10 Apr 2026 10:39:18 +0100 Subject: [PATCH 18/21] Add STRIDE category tables --- docs/security/threat-model.md | 77 +++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/docs/security/threat-model.md b/docs/security/threat-model.md index 4c28901..4f4755e 100644 --- a/docs/security/threat-model.md +++ b/docs/security/threat-model.md @@ -105,6 +105,83 @@ flowchart TD A -. Attempts detector bypass .-> DET ``` +### 1. IOCX CLI + +| STRIDE | Threat | Description | Mitigation | +|--------|-------------------------| -----------------------------------------------------------|------------------------------------------------------------------------| +| **S** | Spoofing | Attacker pretends to be a legitimate user invoking the CLI | IOCX relies on OS‑level authentication; no internal identity system | +| **T** | Tampering | Manipulation of CLI arguments or environment | Defensive parsing; no privileged operations; no dynamic code execution | +| **R** | Repudiation | User denies running IOCX | Local‑only tool; no sensitive logging | +| **I** | Information Disclosure | CLI prints sensitive data | Output is deterministic JSON; no internal state leakage | +| **D** | Denial of Service | Attacker supplies huge or malformed input | Timeouts; strict parsing; exception handling | +| **E** | Elevation of Privilege | CLI used to escalate privileges | Runs entirely in user space; no privileged operations | + +### 2. Untrusted Input (Files, Logs, Binaries) + +| STRIDE | Threat | Description | Mitigation | +| --- | --- | --- | --- | +| **S** | Spoofing | Fake file types | Signature‑based detection via python‑magic | +| **T** | Tampering | Malformed binaries crafted to break parsers | Defensive parsing; try/except wrappers | +| **R** | Repudiation | Attacker denies supplying malicious file | Out of scope; IOCX does not track provenance | +| **I** | Information Disclosure | Sensitive data inside files | IOCX does not transmit or store data | +| **D** | Denial of Service | Zip bombs, oversized binaries, pathological inputs | Bounded parsing; timeouts | +| **E** | Elevation of Privilege | Malicious file triggers code execution | No execution, no deserialization, no eval | + +### 3. File Type Detection (python‑magic) + +| STRIDE | Threat | Description | Mitigation | +| --- | --- | --- | --- | +| **S** | Spoofing | File claims incorrect MIME type | Signature‑based detection | +| **T** | Tampering | Malformed headers crash detection | Exception handling; safe fallback | +| **R** | Repudiation | Incorrect type classification | Non‑security‑critical; local‑only | +| **I** | Information Disclosure | Revealing internal detection logic | No sensitive data; local‑only | +| **D** | Denial of Service | Crafted files cause excessive scanning | Bounded reads; timeouts | +| **E** | Elevation of Privilege | Exploiting python‑magic | Minimal dependency; audited regularly | + +### 4. PE Parser (pefile) + +| STRIDE | Threat | Description | Mitigation | +| --- | --- | --- | --- | +| **S** | Spoofing | Fake PE headers | Structural validation by pefile | +| **T** | Tampering | Malformed PE triggers parser bugs | Defensive parsing; try/except wrappers | +| **R** | Repudiation | Incorrect parsing results | Local‑only; no persistence | +| **I** | Information Disclosure | Extracting sensitive metadata | Output is user‑controlled | +| **D** | Denial of Service | Malformed PE causing recursion or heavy parsing | Timeouts; bounded parsing | +| **E** | Elevation of Privilege | Exploiting pefile | No execution of PE content | + +### 5. Detectors / Transformers / Enrichers + +| STRIDE | Threat | Description | Mitigation | +| --- | --- | --- | --- | +| **S** | Spoofing | Fake IOC patterns | Deterministic regex‑based detection | +| **T** | Tampering | Malicious input breaks detectors | Strict parsing; no dynamic code | +| **R** | Repudiation | Incorrect detection results | Local‑only; no persistence | +| **I** | Information Disclosure | Extracted IOCs reveal sensitive data | User controls output; no network access | +| **D** | Denial of Service | Regex backtracking attacks | Pre‑compiled safe regex patterns | +| **E** | Elevation of Privilege | Plugin system abused | ``--dev`` mode opt‑in; no auto‑loading | + +### 6. Local Cache + +| STRIDE | Threat | Description | Mitigation | +| --- | --- | --- | --- | +| **S** | Spoofing | Cache poisoning | Cache is local and ephemeral | +| **T** | Tampering | Attacker modifies cache files | Cache disabled by default; no sensitive data | +| **R** | Repudiation | Cache state disputes | Cache is non‑persistent | +| **I** | Information Disclosure | Cache leaks sensitive data | Cache stores only extraction metadata | +| **D** | Denial of Service | Cache grows unbounded | Cache is small and optional | +| **E** | Elevation of Privilege | Cache used to load code | Cache stores no executable content | + +### 7. JSON Output + +| STRIDE | Threat | Description | Mitigation | +| --- | --- | --- | --- | +| **S** | Spoofing | Fake output injected | Output is deterministic; no external input | +| **T** | Tampering | Output modified in transit | Local‑only; user controls destination | +| **R** | Repudiation | User denies output | Out of scope; no logging | +| **I** | Information Disclosure | Sensitive IOCs exposed | User controls output file | +| **D** | Denial of Service | Huge output from pathological input | Bounded extraction | +| **E** | Elevation of Privilege | Output used to trigger downstream tools | JSON only; no executable content | + ## How These Diagrams Fit Into IOCX’s Security Posture These diagrams support the project’s security goals by: From a63d39be9c3792f44d9b573493490e7a5415482d Mon Sep 17 00:00:00 2001 From: malx-labs Date: Fri, 10 Apr 2026 10:53:34 +0100 Subject: [PATCH 19/21] Fix STRIDE table formatting --- docs/security/threat-model.md | 96 +++++++++++++++++------------------ 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/docs/security/threat-model.md b/docs/security/threat-model.md index 4f4755e..69b4e65 100644 --- a/docs/security/threat-model.md +++ b/docs/security/threat-model.md @@ -118,69 +118,69 @@ flowchart TD ### 2. Untrusted Input (Files, Logs, Binaries) -| STRIDE | Threat | Description | Mitigation | -| --- | --- | --- | --- | -| **S** | Spoofing | Fake file types | Signature‑based detection via python‑magic | -| **T** | Tampering | Malformed binaries crafted to break parsers | Defensive parsing; try/except wrappers | -| **R** | Repudiation | Attacker denies supplying malicious file | Out of scope; IOCX does not track provenance | -| **I** | Information Disclosure | Sensitive data inside files | IOCX does not transmit or store data | -| **D** | Denial of Service | Zip bombs, oversized binaries, pathological inputs | Bounded parsing; timeouts | -| **E** | Elevation of Privilege | Malicious file triggers code execution | No execution, no deserialization, no eval | +| STRIDE | Threat | Description | Mitigation | +|--------|------------------------|----------------------------------------------------|----------------------------------------------| +| **S** | Spoofing | Fake file types | Signature‑based detection via python‑magic | +| **T** | Tampering | Malformed binaries crafted to break parsers | Defensive parsing; try/except wrappers | +| **R** | Repudiation | Attacker denies supplying malicious file | Out of scope; IOCX does not track provenance | +| **I** | Information Disclosure | Sensitive data inside files | IOCX does not transmit or store data | +| **D** | Denial of Service | Zip bombs, oversized binaries, pathological inputs | Bounded parsing; timeouts | +| **E** | Elevation of Privilege | Malicious file triggers code execution | No execution, no deserialization, no eval | ### 3. File Type Detection (python‑magic) -| STRIDE | Threat | Description | Mitigation | -| --- | --- | --- | --- | -| **S** | Spoofing | File claims incorrect MIME type | Signature‑based detection | -| **T** | Tampering | Malformed headers crash detection | Exception handling; safe fallback | -| **R** | Repudiation | Incorrect type classification | Non‑security‑critical; local‑only | -| **I** | Information Disclosure | Revealing internal detection logic | No sensitive data; local‑only | -| **D** | Denial of Service | Crafted files cause excessive scanning | Bounded reads; timeouts | -| **E** | Elevation of Privilege | Exploiting python‑magic | Minimal dependency; audited regularly | +| STRIDE | Threat | Description | Mitigation | +|--------|------------------------|----------------------------------------|---------------------------------------| +| **S** | Spoofing | File claims incorrect MIME type | Signature‑based detection | +| **T** | Tampering | Malformed headers crash detection | Exception handling; safe fallback | +| **R** | Repudiation | Incorrect type classification | Non‑security‑critical; local‑only | +| **I** | Information Disclosure | Revealing internal detection logic | No sensitive data; local‑only | +| **D** | Denial of Service | Crafted files cause excessive scanning | Bounded reads; timeouts | +| **E** | Elevation of Privilege | Exploiting python‑magic | Minimal dependency; audited regularly | ### 4. PE Parser (pefile) -| STRIDE | Threat | Description | Mitigation | -| --- | --- | --- | --- | -| **S** | Spoofing | Fake PE headers | Structural validation by pefile | -| **T** | Tampering | Malformed PE triggers parser bugs | Defensive parsing; try/except wrappers | -| **R** | Repudiation | Incorrect parsing results | Local‑only; no persistence | -| **I** | Information Disclosure | Extracting sensitive metadata | Output is user‑controlled | -| **D** | Denial of Service | Malformed PE causing recursion or heavy parsing | Timeouts; bounded parsing | -| **E** | Elevation of Privilege | Exploiting pefile | No execution of PE content | +| STRIDE | Threat | Description | Mitigation | +|--------|------------------------|-------------------------------------------------|----------------------------------------| +| **S** | Spoofing | Fake PE headers | Structural validation by pefile | +| **T** | Tampering | Malformed PE triggers parser bugs | Defensive parsing; try/except wrappers | +| **R** | Repudiation | Incorrect parsing results | Local‑only; no persistence | +| **I** | Information Disclosure | Extracting sensitive metadata | Output is user‑controlled | +| **D** | Denial of Service | Malformed PE causing recursion or heavy parsing | Timeouts; bounded parsing | +| **E** | Elevation of Privilege | Exploiting pefile | No execution of PE content | ### 5. Detectors / Transformers / Enrichers -| STRIDE | Threat | Description | Mitigation | -| --- | --- | --- | --- | -| **S** | Spoofing | Fake IOC patterns | Deterministic regex‑based detection | -| **T** | Tampering | Malicious input breaks detectors | Strict parsing; no dynamic code | -| **R** | Repudiation | Incorrect detection results | Local‑only; no persistence | -| **I** | Information Disclosure | Extracted IOCs reveal sensitive data | User controls output; no network access | -| **D** | Denial of Service | Regex backtracking attacks | Pre‑compiled safe regex patterns | -| **E** | Elevation of Privilege | Plugin system abused | ``--dev`` mode opt‑in; no auto‑loading | +| STRIDE | Threat | Description | Mitigation | +|--------|------------------------|--------------------------------------|-----------------------------------------| +| **S** | Spoofing | Fake IOC patterns | Deterministic regex‑based detection | +| **T** | Tampering | Malicious input breaks detectors | Strict parsing; no dynamic code | +| **R** | Repudiation | Incorrect detection results | Local‑only; no persistence | +| **I** | Information Disclosure | Extracted IOCs reveal sensitive data | User controls output; no network access | +| **D** | Denial of Service | Regex backtracking attacks | Pre‑compiled safe regex patterns | +| **E** | Elevation of Privilege | Plugin system abused | ``--dev`` mode opt‑in; no auto‑loading | ### 6. Local Cache -| STRIDE | Threat | Description | Mitigation | -| --- | --- | --- | --- | -| **S** | Spoofing | Cache poisoning | Cache is local and ephemeral | -| **T** | Tampering | Attacker modifies cache files | Cache disabled by default; no sensitive data | -| **R** | Repudiation | Cache state disputes | Cache is non‑persistent | -| **I** | Information Disclosure | Cache leaks sensitive data | Cache stores only extraction metadata | -| **D** | Denial of Service | Cache grows unbounded | Cache is small and optional | -| **E** | Elevation of Privilege | Cache used to load code | Cache stores no executable content | +| STRIDE | Threat | Description | Mitigation | +|--------|------------------------|-------------------------------|----------------------------------------------| +| **S** | Spoofing | Cache poisoning | Cache is local and ephemeral | +| **T** | Tampering | Attacker modifies cache files | Cache disabled by default; no sensitive data | +| **R** | Repudiation | Cache state disputes | Cache is non‑persistent | +| **I** | Information Disclosure | Cache leaks sensitive data | Cache stores only extraction metadata | +| **D** | Denial of Service | Cache grows unbounded | Cache is small and optional | +| **E** | Elevation of Privilege | Cache used to load code | Cache stores no executable content | ### 7. JSON Output -| STRIDE | Threat | Description | Mitigation | -| --- | --- | --- | --- | -| **S** | Spoofing | Fake output injected | Output is deterministic; no external input | -| **T** | Tampering | Output modified in transit | Local‑only; user controls destination | -| **R** | Repudiation | User denies output | Out of scope; no logging | -| **I** | Information Disclosure | Sensitive IOCs exposed | User controls output file | -| **D** | Denial of Service | Huge output from pathological input | Bounded extraction | -| **E** | Elevation of Privilege | Output used to trigger downstream tools | JSON only; no executable content | +| STRIDE | Threat | Description | Mitigation | +|--------|------------------------|-----------------------------------------|--------------------------------------------| +| **S** | Spoofing | Fake output injected | Output is deterministic; no external input | +| **T** | Tampering | Output modified in transit | Local‑only; user controls destination | +| **R** | Repudiation | User denies output | Out of scope; no logging | +| **I** | Information Disclosure | Sensitive IOCs exposed | User controls output file | +| **D** | Denial of Service | Huge output from pathological input | Bounded extraction | +| **E** | Elevation of Privilege | Output used to trigger downstream tools | JSON only; no executable content | ## How These Diagrams Fit Into IOCX’s Security Posture From 61f7b247f19999a7c510a8f67f9a0f19c8b2b06e Mon Sep 17 00:00:00 2001 From: malx-labs Date: Fri, 10 Apr 2026 11:10:17 +0100 Subject: [PATCH 20/21] Tighten up CONTRIBUTING.md copy --- CONTRIBUTING.md | 71 +++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ec9bf11..d25e570 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,38 +1,41 @@ # Contributing to IOCX -Thank you for your interest in contributing to IOCX. This project is part of the MalX Labs ecosystem — a collection of modern, developer‑friendly security tools focused on safe, scalable analysis. +Thank you for your interest in contributing to IOCX. +IOCX is part of the MalX Labs ecosystem — a family of modern, deterministic, developer‑friendly security tools designed for safe analysis of untrusted data. -We welcome improvements of all kinds: bug fixes, new extractors, static‑analysis enhancements, documentation updates, and thoughtful discussions. This guide explains how to contribute effectively while keeping the project consistent, deterministic, and maintainable. +We welcome contributions of all kinds: bug fixes, static‑analysis improvements, new extractors, documentation updates, and thoughtful design discussions. +This guide explains how to contribute effectively while keeping IOCX predictable, secure, and maintainable. ## Project Philosophy IOCX is intentionally: -- Minimal — very small dependency footprint +- Minimal — extremely small dependency footprint - Secure — safe handling of untrusted input -- Predictable — deterministic behaviour, no network access -- Extensible — new extractors and static‑analysis modules can be added cleanly +- Deterministic — no network access, no non-deterministic behaviour +- Extensible — new static‑analysis modules can be added cleanly All contributions must align with these principles. ## Core vs Plugins -IOCX is built around a clear separation between core functionality and plugin‑based extensions. This boundary keeps the project focused and deterministic while allowing contributors to extend it safely. +IOCX has a strict boundary between core functionality and plugin‑based extensions. +This keeps the core predictable and universally safe while allowing users to extend IOCX for their own environments. ### What Belongs in the Core -Core functionality is: +Core functionality must be: - derived entirely from the input file or text - deterministic and reproducible -- universally useful to all users +- universally useful - lightweight and dependency‑minimal -- fundamental to static IOC extraction and analysis +- fundamental to static IOC extraction Examples: - PE metadata extraction -- entropy scoring +- entropy calculations - section/structure heuristics - import/API heuristics - phishing/lure string heuristics @@ -45,45 +48,61 @@ If the information comes from the file itself, it belongs in the core. Plugins are for functionality that is: -- optional -- environment‑specific -- user‑provided or user‑maintained +- optional or environment‑specific - based on external data -- not universally applicable -- likely to evolve independently of the core +- organisation‑specific +- user-maintained +- likely to evolve independently Examples: - offline reputation matching (local hash/domain/IP lists) - organisation‑specific heuristics -- custom keyword or lure lists +- custom lure or keyword lists - internal threat‑intel integrations If the information comes from the user’s environment, it belongs in a plugin. -This separation ensures IOCX remains clean, predictable, and safe to run anywhere, while still enabling powerful extensions. +This separation keeps IOCX clean, predictable, and safe to run anywhere. ## How to Contribute ### Fix bugs -Open an issue or submit a PR with a clear description and reproduction steps. +Open an issue or submit a PR with: + +- a clear description +- reproduction steps +- expected vs actual behaviour ### Add new IOC extractors -Regex‑based extractors live under `detectors/extractors/`. +Regex‑based extractors live under: + +``` +detectors/extractors/ +``` Please include: - a clear, well-scoped regex - validation logic - test cases -- test cases - example inputs +Extractors must be: + +- deterministic +- side‑effect‑free +- safe for untrusted input + ### Improve PE parsing -Enhancements to metadata extraction, imports, sections, or resources are welcome — provided they remain deterministic and static. +Enhancements to metadata extraction, imports, sections, or resources are welcome — provided they remain: + +- static +- deterministic +- dependency-minimal ### Add synthetic test samples @@ -92,7 +111,7 @@ See the “Testing” section below. ### Improve documentation -Better examples, diagrams, or explanations are always appreciated. +Better examples, diagrams, and explanations are always appreciated. ### Contribution Process @@ -131,8 +150,6 @@ pip-audit --skip-editable 6. Open a Pull Request -When your changes are ready: - - Target the main branch - Describe what you changed and why - Link any related issues @@ -174,16 +191,18 @@ We use pytest. ## Adding New Extractors +Extractors live in: + ```plaintext iocx/detectors/extractors/ ``` -To add a new extractor: +To add one: - Create a new file in that directory - Follow existing patterns - Ensure it registers itself on import -- Add tests under tests/unit/extractors/ +- Add tests under `tests/unit/extractors/` Extractors must be: From 38d57ea3460e3c191052217355a7f92667a8f216 Mon Sep 17 00:00:00 2001 From: malx-labs Date: Fri, 10 Apr 2026 11:13:00 +0100 Subject: [PATCH 21/21] Link up the threat model diagrams from SECURITY --- SECURITY.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/SECURITY.md b/SECURITY.md index 9860ec6..d98af05 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -74,6 +74,8 @@ The following are out of scope: Users should run IOCX in a controlled environment when analysing untrusted binaries. +Refer to the [threat model overview](/docs/security/threat-model.md) for Data Flow and STRIDE‑Oriented Threat Interaction Diagrams. + ## Reporting a Vulnerability We appreciate responsible disclosure and welcome reports from the community.