ARPAHLS · rosspeili · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -26,6 +26,9 @@ jobs:
         python -m pip install --upgrade pip
         pip install -e ".[dev,all]"
 
+    - name: Check formatting with black
+      run: python -m black --check .
+
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,8 @@ Contributors add user-facing entries under `[Unreleased]` in the same PR. Mainta
 - **CI**: GitHub Actions installs dependencies from `pyproject.toml` only (`pip install -e ".[dev,all]"`); removed redundant manual pip pins. CI runs `pytest tests/` only; co-located `skills/**/test_skill.py` remains a local pre-PR step (#151).
 - **Documentation**: [TESTING.md](docs/TESTING.md) and [CONTRIBUTING.md](CONTRIBUTING.md) aligned with CI scope and local skill-test workflow (#151).
 - **Documentation**: Updated [COMPARISON.md](COMPARISON.md) and README for Agent Skills (SKILL.md) open standard and fairer MCP framing ([Docs]: Light refresh of COMPARISON.md #123).
+- **CI**: Repo-wide Black format pass; GitHub Actions gates on `black --check` before flake8 (#153).
+- **Documentation**: [TESTING.md](docs/TESTING.md) and [CONTRIBUTING.md](CONTRIBUTING.md) updated for CI Black check (#153).
 
 ## [0.3.3] - 2026-05-29
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -111,8 +111,8 @@ Follow the [Agent Code of Conduct](CODE_OF_CONDUCT.md): deterministic skill outp
 ### Tests and CI
 
 - Add or update tests when behavior changes.
-- **GitHub Actions** installs `pip install -e ".[dev,all]"`, runs `flake8 .`, then **`pytest tests/`** only. Do not add per-skill pip lines or test paths to `.github/workflows/ci.yml`.
-- Run `python -m flake8 .` and `pytest tests/` locally before opening a PR (same scope as CI).
+- **GitHub Actions** installs `pip install -e ".[dev,all]"`, runs `python -m black --check .`, then `flake8 .`, then **`pytest tests/`** only. Do not add per-skill pip lines or test paths to `.github/workflows/ci.yml`.
+- Run `python -m black --check .`, `python -m flake8 .`, and `pytest tests/` locally before opening a PR (same scope as CI).
 - For skill work, also run `pytest skills/<category>/<skill_name>/test_skill.py` locally and install any packages from that skill's `manifest.yaml` `requirements`.
 - Wait for GitHub Actions CI to pass before requesting review.
 
@@ -134,7 +134,7 @@ Agents must follow [Agent Contribution Workflow](docs/contributing/ai_native_wor
 4. **Verify locally**:
 
    ```bash
-   python -m black .
+   python -m black --check .
    python -m flake8 .
    pytest tests/
    ```

diff --git a/docs/TESTING.md b/docs/TESTING.md
@@ -31,7 +31,7 @@ Run Black on the entire repository to automatically fix formatting issues:
 python -m black .
 ```
 
-Black is recommended locally before opening a PR. CI does not gate on Black yet; a future release may add `black --check` after the codebase is fully formatted.
+Run `python -m black --check .` to verify formatting without writing files. GitHub Actions runs the same check before flake8 and pytest.
 
 ## 2. Linting (Flake8)
 
@@ -89,7 +89,7 @@ Install any packages listed in the skill's `manifest.yaml` `requirements` before
 Before pushing your code, run the following commands to ensure your changes are ready for review:
 
 1. `skillware list` (Verify install and path resolution are working)
-2. `python -m black .` (Format code)
+2. `python -m black --check .` (Verify formatting; use `python -m black .` to fix)
 3. `python -m flake8 .` (Check quality)
 4. `python -m pytest tests/` (Verify framework functionality — same scope as CI)
 5. `python -m pytest skills/<category>/<skill_name>/test_skill.py` when your PR touches that skill (local only)
diff --git a/examples/build_dataset_demo.py b/examples/build_dataset_demo.py
@@ -9,10 +9,8 @@ def main():
     load_env_file()
 
     print("Loading Synthetic Data Generator Skill...")
-    skill_bundle = SkillLoader.load_skill(
-        "data_engineering/synthetic_generator"
-    )
-    SyntheticGeneratorSkill = skill_bundle['module'].SyntheticGeneratorSkill
+    skill_bundle = SkillLoader.load_skill("data_engineering/synthetic_generator")
+    SyntheticGeneratorSkill = skill_bundle["module"].SyntheticGeneratorSkill
 
     generator = SyntheticGeneratorSkill()
 
@@ -27,21 +25,23 @@ def main():
         "scenarios like obscure comorbidities fighting with dual-insurance."
     )
 
-    result = generator.execute({
-        "domain": "medical_coding_disputes",
-        "num_samples": 10,
-        "entropy_temperature": 0.9,
-        "diversity_prompt": prompt,
-        "model_provider": "gemini",
-        "model_name": "gemini-2.5-flash-lite"
-    })
+    result = generator.execute(
+        {
+            "domain": "medical_coding_disputes",
+            "num_samples": 10,
+            "entropy_temperature": 0.9,
+            "diversity_prompt": prompt,
+            "model_provider": "gemini",
+            "model_name": "gemini-2.5-flash-lite",
+        }
+    )
 
     elapsed = time.time() - start_time
     print(f"Time Taken: {elapsed:.2f} seconds")
 
     if result.get("status") == "success":
-        score = result.get('entropy_score')
-        samples = result.get('samples', [])
+        score = result.get("entropy_score")
+        samples = result.get("samples", [])
         print(f"✅ Success! Entropy Score: {score}")
         print(f"Extracted {len(samples)} samples out of requested 10.")
         dataset.extend(samples)

diff --git a/examples/mica_rag_flow.py b/examples/mica_rag_flow.py
@@ -33,9 +33,7 @@ def main():
 Wait for the response before making your final compliant determination.
 """
 
-    user_query = (
-        "How do I get an authorization to be a crypto-asset service provider (CASP) in the EU?"
-    )
+    user_query = "How do I get an authorization to be a crypto-asset service provider (CASP) in the EU?"
 
     print(f"\n[User]: {user_query}")
     print("-" * 50)

diff --git a/examples/ollama_skills_test.py b/examples/ollama_skills_test.py
@@ -14,7 +14,11 @@ def load_and_initialize_skill(path):
     skill_class = None
     for attr_name in dir(bundle["module"]):
         attr = getattr(bundle["module"], attr_name)
-        if isinstance(attr, type) and issubclass(attr, BaseSkill) and attr is not BaseSkill:
+        if (
+            isinstance(attr, type)
+            and issubclass(attr, BaseSkill)
+            and attr is not BaseSkill
+        ):
             skill_class = attr
             break
     if not skill_class:
@@ -26,7 +30,7 @@ def load_and_initialize_skill(path):
 SKILL_PATHS = [
     "finance/wallet_screening",
     "office/pdf_form_filler",
-    "optimization/prompt_rewriter"
+    "optimization/prompt_rewriter",
 ]
 
 skills_registry = {}
@@ -63,7 +67,9 @@ def load_and_initialize_skill(path):
 Once you have the results, provide your final answer to the user.
 
 Here are the available skills and their instructions:
-""" + "\n---\n".join(tool_descriptions)
+""" + "\n---\n".join(
+    tool_descriptions
+)
 
 # 3. Setup Ollama Chat
 model_name = "llama3"
@@ -76,17 +82,14 @@ def load_and_initialize_skill(path):
 
 messages = [
     {"role": "system", "content": combined_system_prompt},
-    {"role": "user", "content": user_query}
+    {"role": "user", "content": user_query},
 ]
 
 print(f"\n🤖 Calling Ollama model: {model_name}...")
 
 # 4. Handle Conversation & Tool Parsing Loop
 for _ in range(5):  # Max steps to prevent infinite loops
-    response = ollama.chat(
-        model=model_name,
-        messages=messages
-    )
+    response = ollama.chat(model=model_name, messages=messages)
 
     message_content = response.get("message", {}).get("content", "")
     print(f"\n[Model Output]:\n{message_content}")
@@ -115,26 +118,32 @@ def load_and_initialize_skill(path):
                 print(f"📤 Result generated ({len(result_str)} bytes)")
 
                 # Send the result back to the model masquerading as a system/user update
-                messages.append({
-                    "role": "user",
-                    "content": (
-                        f"SYSTEM RESPONSE (Result from {fn_name}):\n"
-                        f"```json\n{result_str}\n```\n"
-                        "Please continue based on this result."
-                    )
-                })
+                messages.append(
+                    {
+                        "role": "user",
+                        "content": (
+                            f"SYSTEM RESPONSE (Result from {fn_name}):\n"
+                            f"```json\n{result_str}\n```\n"
+                            "Please continue based on this result."
+                        ),
+                    }
+                )
             else:
                 print(f"Unknown function requested: {fn_name}")
-                messages.append({
-                    "role": "user",
-                    "content": f"SYSTEM ERROR: Tool '{fn_name}' not found."
-                })
+                messages.append(
+                    {
+                        "role": "user",
+                        "content": f"SYSTEM ERROR: Tool '{fn_name}' not found.",
+                    }
+                )
         except json.JSONDecodeError:
             print("Failed to decode JSON from tool call block.")
-            messages.append({
-                "role": "user",
-                "content": "SYSTEM ERROR: Invalid JSON format. Please output valid JSON."
-            })
+            messages.append(
+                {
+                    "role": "user",
+                    "content": "SYSTEM ERROR: Invalid JSON format. Please output valid JSON.",
+                }
+            )
     else:
         # If no tool block was found, assume the agent is done and providing final answer
         print("\n💬 Final Answer reached. End of execution.")

diff --git a/examples/pii_guardrail_flow.py b/examples/pii_guardrail_flow.py
@@ -22,17 +22,21 @@ def simulate_agentic_flow():
 
     # 2. Load the Privacy Firewall Skill
     print("[System] Loading compliance/pii_masker skill...")
-    pii_skill = SkillLoader.load_skill("compliance/pii_masker")["module"].PIIMaskerSkill()
+    pii_skill = SkillLoader.load_skill("compliance/pii_masker")[
+        "module"
+    ].PIIMaskerSkill()
 
     # 3. Intercept and Sanitize (Redact mode)
     print("[System] Intercepting prompt...")
     # NOTE: This requires Ollama running locally with the arpacorp/micro-f1-mask model.
     # If Ollama is not running, the skill falls back to returning the original string.
-    result = pii_skill.execute({
-        "text": raw_user_input,
-        "mode": "redact",  # Change to "mask" to see entity tags like [PERSON_1] instead of XXXX
-        "ollama_url": "http://localhost:11434"
-    })
+    result = pii_skill.execute(
+        {
+            "text": raw_user_input,
+            "mode": "redact",  # Change to "mask" to see entity tags like [PERSON_1] instead of XXXX
+            "ollama_url": "http://localhost:11434",
+        }
+    )
 
     scrubbed_input = result["sanitized_text"]
     metadata = result["metadata"]

diff --git a/examples/prompt_compression_demo.py b/examples/prompt_compression_demo.py
@@ -5,7 +5,7 @@ def run_demo():
     print("Loading Prompt Token Rewriter...")
     # Load the skill via the global loader just like an LLM agent would
     skill_bundle = SkillLoader.load_skill("optimization/prompt_rewriter")
-    skill_instance = skill_bundle['module'].PromptRewriter()
+    skill_instance = skill_bundle["module"].PromptRewriter()
 
     massive_prompt = (
         "Hello, could you please make sure to read this entirely? "
@@ -16,13 +16,14 @@ def run_demo():
     print(f"\n[RAW TEXT]: {massive_prompt}")
 
     # Execute the offline compression logic
-    result = skill_instance.execute({
-        "raw_text": massive_prompt,
-        "compression_aggression": "high"
-    })
+    result = skill_instance.execute(
+        {"raw_text": massive_prompt, "compression_aggression": "high"}
+    )
 
     print(f"\n[COMPRESSED TEXT]: {result['compressed_text']}")
-    print(f"[REDUCTION]: {result['original_tokens']} tokens -> {result['new_tokens']} tokens")
+    print(
+        f"[REDUCTION]: {result['original_tokens']} tokens -> {result['new_tokens']} tokens"
+    )
     print(f"[SAVED]: {result['tokens_saved']} tokens")
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -69,3 +69,7 @@ include = ["skillware*", "skills*"]
 # and this wildcard. No per-skill entries needed when adding registry skills.
 [tool.setuptools.package-data]
 skills = ["**/*"]
+
+[tool.black]
+line-length = 88
+target-version = ["py310"]
diff --git a/skills/compliance/pii_masker/skill.py b/skills/compliance/pii_masker/skill.py
@@ -11,10 +11,7 @@ class PIIMaskerSkill(BaseSkill):
 
     @property
     def manifest(self) -> Dict[str, Any]:
-        return {
-            "name": "compliance/pii_masker",
-            "version": "0.1.0"
-        }
+        return {"name": "compliance/pii_masker", "version": "0.1.0"}
 
     def execute(self, params: Dict[str, Any]) -> Dict[str, Any]:
         text = params.get("text", "")
@@ -25,25 +22,29 @@ def execute(self, params: Dict[str, Any]) -> Dict[str, Any]:
         sanitized_text = self._apply_mode(sanitized_text, mode)
 
         # Build unique entity types list
-        entities = list(set([re.sub(r'_[0-9]+$', '', e) for e in detected_entities]))
+        entities = list(set([re.sub(r"_[0-9]+$", "", e) for e in detected_entities]))
 
         return {
             "sanitized_text": sanitized_text,
             "metadata": {
                 "detected_entities": entities,
                 "entity_count": len(detected_entities),
                 "security_level": "local-only",
-                "model": "arpacorp/micro-f1-mask"
-            }
+                "model": "arpacorp/micro-f1-mask",
+            },
         }
 
     def _call_ollama(self, text: str, endpoint: str) -> Tuple[str, List[str]]:
         try:
-            response = requests.post(f"{endpoint}/api/generate", json={
-                "model": "arpacorp/micro-f1-mask",
-                "prompt": text,
-                "stream": False
-            }, timeout=30)
+            response = requests.post(
+                f"{endpoint}/api/generate",
+                json={
+                    "model": "arpacorp/micro-f1-mask",
+                    "prompt": text,
+                    "stream": False,
+                },
+                timeout=30,
+            )
             if response.status_code == 200:
                 result_text = response.json().get("response", text)
             else:
@@ -56,22 +57,22 @@ def _call_ollama(self, text: str, endpoint: str) -> Tuple[str, List[str]]:
             result_text = text
 
         # Detect entities in the response
-        detected = re.findall(r'\[([A-Z_]+(?:_[0-9]+)?)\]', result_text)
+        detected = re.findall(r"\[([A-Z_]+(?:_[0-9]+)?)\]", result_text)
         return result_text, detected
 
     def _apply_mode(self, text: str, mode: str) -> str:
         if mode == "mask":
             return text
 
         # Pattern to catch [DOCUMENT], [PERSON_1], etc.
-        pattern = r'\[[A-Z_]+(?:_[0-9]+)?\]'
+        pattern = r"\[[A-Z_]+(?:_[0-9]+)?\]"
         if mode == "redact":
             return re.sub(pattern, "XXXX", text)
         elif mode == "remove":
             # Replace token and any immediate preceding/following spaces safely
             # A simple sub is sufficient. Cleaning up double spaces.
             text = re.sub(pattern, "", text)
-            text = re.sub(r'\s+', ' ', text).strip()
+            text = re.sub(r"\s+", " ", text).strip()
             return text
 
         return text