From 65e4a1596904f6ebb6da55b60da8a85b1e06c0d2 Mon Sep 17 00:00:00 2001 From: PrasadhNanjundan Date: Mon, 4 May 2026 13:11:03 +0530 Subject: [PATCH] OPSLAB-366: move transformers to cloudclapp-wf SASE --- transformers/__init__.py | 5 - transformers/domains/__init__.py | 7 - transformers/domains/url/__init__.py | 7 - transformers/domains/url/models.py | 68 ----- transformers/domains/url/vendors/__init__.py | 5 - transformers/domains/url/vendors/fortinet.py | 178 ----------- transformers/domains/url/vendors/netskope.py | 280 ------------------ transformers/framework/__init__.py | 5 - transformers/framework/pipelines.py | 40 --- .../framework/udm_transformers/__init__.py | 5 - .../udm_transformers/action_mapper.py | 48 --- .../udm_transformers/base_transformer.py | 36 --- .../udm_transformers/category_mapper.py | 48 --- .../udm_transformers/metadata_enricher.py | 48 --- .../udm_transformers/pattern_normalizer.py | 33 --- .../framework/udm_transformers/type_mapper.py | 48 --- 16 files changed, 861 deletions(-) delete mode 100644 transformers/__init__.py delete mode 100644 transformers/domains/__init__.py delete mode 100644 transformers/domains/url/__init__.py delete mode 100644 transformers/domains/url/models.py delete mode 100644 transformers/domains/url/vendors/__init__.py delete mode 100644 transformers/domains/url/vendors/fortinet.py delete mode 100644 transformers/domains/url/vendors/netskope.py delete mode 100644 transformers/framework/__init__.py delete mode 100644 transformers/framework/pipelines.py delete mode 100644 transformers/framework/udm_transformers/__init__.py delete mode 100644 transformers/framework/udm_transformers/action_mapper.py delete mode 100644 transformers/framework/udm_transformers/base_transformer.py delete mode 100644 transformers/framework/udm_transformers/category_mapper.py delete mode 100644 transformers/framework/udm_transformers/metadata_enricher.py delete mode 100644 transformers/framework/udm_transformers/pattern_normalizer.py delete mode 100644 transformers/framework/udm_transformers/type_mapper.py diff --git a/transformers/__init__.py b/transformers/__init__.py deleted file mode 100644 index 93236b6..0000000 --- a/transformers/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -Transformers package initialization. - -This package provides core functionality and public APIs for the transformers library. -""" diff --git a/transformers/domains/__init__.py b/transformers/domains/__init__.py deleted file mode 100644 index 04ff6eb..0000000 --- a/transformers/domains/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Transformers package initialization. - -This package provides core functionality and public APIs for the transformers library. -""" - -# Sub-package for different logic domains (url, firewall, etc.) diff --git a/transformers/domains/url/__init__.py b/transformers/domains/url/__init__.py deleted file mode 100644 index 1cbf6e7..0000000 --- a/transformers/domains/url/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Transformers package initialization. - -This package provides core functionality and public APIs for the transformers library. -""" - -# url domain package diff --git a/transformers/domains/url/models.py b/transformers/domains/url/models.py deleted file mode 100644 index 66b4e01..0000000 --- a/transformers/domains/url/models.py +++ /dev/null @@ -1,68 +0,0 @@ -""" -URL Domain Models - Unified Data Model (UDM). - -This module defines the canonical schema for URLs, URL collections. - -Design Principles: - -- Domain-Level Logic: Operates purely on domain concepts. -- Vendor-Agnostic: No vendor-specific logic is contained here. -- Strong Typing: Enforces RFC-compliant formatting and normalization. -""" - -from datetime import datetime -from typing import List -from typing import Literal -from typing import Optional - -from pydantic import BaseModel -from pydantic import ConfigDict -from pydantic import Field - - -class Metadata(BaseModel): - """ - Extensible container for enrichment data. - - This includes timestamps, source information, and optional metadata fields. - """ - - processed_at: datetime = Field( - ..., description="Timestamp of when the record was processed" - ) - source: Optional[str] = Field( - None, description="The origin system of the data" - ) - additional_info: Optional[dict] = Field( - None, description="Placeholder for custom metadata expansion" - ) - - -class URL_UDM(BaseModel): - """ - Unified Data Model for URL entities. - - This model serves as the source of truth for processing, independent - of any external vendor system. - """ - - model_config = ConfigDict(populate_by_name=True) - - pattern: str = Field( - ..., description="The URL pattern (literal, wildcard, or regex)" - ) - type: Literal["literal", "wildcard", "regex"] = Field( - ..., description="The syntax type of the pattern" - ) - url_list_id: str = Field( - ..., description="Unique ID for the parent URL list" - ) - url_list_name: str = Field( - ..., description="Human-readable name of the URL list" - ) - vendor: Optional[str] = Field( - None, description="Original vendor for traceability purposes" - ) - metadata: Optional[Metadata] = Field( - None, description="Processing metadata and timestamps" - ) diff --git a/transformers/domains/url/vendors/__init__.py b/transformers/domains/url/vendors/__init__.py deleted file mode 100644 index 93236b6..0000000 --- a/transformers/domains/url/vendors/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -Transformers package initialization. - -This package provides core functionality and public APIs for the transformers library. -""" diff --git a/transformers/domains/url/vendors/fortinet.py b/transformers/domains/url/vendors/fortinet.py deleted file mode 100644 index 720442d..0000000 --- a/transformers/domains/url/vendors/fortinet.py +++ /dev/null @@ -1,178 +0,0 @@ -""" -Fortinet URL Domain Integration. - -This module implements the Transformer, Mapper, and Exporter for Fortinet, -converting between Fortinet-specific configurations and the Pydantic -Unified Data Model (UDM). -""" - -from datetime import datetime -from typing import Any -from typing import Dict -from typing import List -from typing import Optional - -import jmespath - -# Domain Model imports -from transformers.domains.url.models import URL_UDM -from transformers.domains.url.models import Category -from transformers.domains.url.models import Metadata -# Framework imports - Absolute paths -from transformers.framework.udm_transformers.action_mapper import ActionMapper -from transformers.framework.udm_transformers.category_mapper import \ - CategoryMapper -from transformers.framework.udm_transformers.metadata_enricher import \ - MetadataEnricher -from transformers.framework.udm_transformers.pattern_normalizer import \ - PatternNormalizer -from transformers.framework.udm_transformers.type_mapper import TypeMapper - -# ---------------- FORTINET MAPPINGS ---------------- - -FORTINET_TYPE_MAP = { - "simple": "literal", - "wildcard": "wildcard", - "regex": "regex", -} - -# ---------------- EXTRACTION LAYER ---------------- - - -JMESPATH_FLATTEN_URLS = """ -*.urls.*.{ - pattern: url, - type: type, - url_id: url_id -} -""" - -def flatten_fortinet_jmespath(raw_data): - """Flatten nested Fortinet dict into a list of record dictionaries.""" - flat = [] - - for _, url_list in raw_data.items(): - list_id = url_list["object_id"] - list_name = url_list["filter_name"] - - for _, item in url_list["urls"].items(): - flat.append({ - "pattern": item["url"], - "type": item["type"], - "url_id": item["url_id"], - "list_id": list_id, - "list_name": list_name, - }) - - return flat - -# ---------------- MAPPER & EXPORTER ---------------- - -class FortinetMapper: - """Handle semantic alignment and Pydantic UDM instantiation.""" - - def to_udm(self, item: Dict[str, Any]) -> URL_UDM: - """Convert a transformed dictionary into a validated URL_UDM instance.""" - # Construct the Metadata model - # MetadataEnricher provides the ISO timestamp string - meta = Metadata( - processed_at=datetime.fromisoformat(item["metadata"]["processed_at"]), - ) - - return URL_UDM( - pattern=item["pattern"], - type=item["type"], - url_list_id=str(item["list_id"]), - url_list_name=item["list_name"], - vendor=item["vendor"], - metadata=meta, - ) - -class FortinetExporter: - """Universal Model -> Fortinet Format.""" - - def transform(self, udm: URL_UDM) -> Dict[str, Any]: - """Reconstruct Fortinet-specific pattern and type syntax.""" - # Reverse mapping for Type - reverse_type_map = {v: k for k, v in FORTINET_TYPE_MAP.items()} - - return { - "url": udm.pattern, - "type": reverse_type_map.get(udm.type, "simple") - } - -def run_universal_to_fortinet_pipeline(records: List[URL_UDM]) -> List[dict]: - """Execute the pipeline to convert UDM records back to Fortinet dicts.""" - output = [] - - for r in records: - output.append({ - "pattern": r.pattern, - "type": r.type, - "list_id": r.url_list_id, - "list_name": r.url_list_name - }) - - return output - - -def export_fortinet_json(records: List[dict]) -> dict: - """Group flat records into the Fortinet-specific JSON structure.""" - grouped = {} - counters = {} - - for r in records: - key = r["list_id"] - - if key not in grouped: - grouped[key] = { - "object_id": r["list_id"], - "filter_name": r["list_name"], - "urls": {} - } - counters[key] = 0 - - idx = str(counters[key]) - counters[key] += 1 - - grouped[key]["urls"][idx] = { - "url_id": str(counters[key]), - "url": r["pattern"], - "type": r["type"], - } - - return grouped - -# ---------------- EXECUTION PIPELINE ---------------- - -def run_fortinet_to_universal_pipeline(raw_data: Dict[str, Any]) -> List[URL_UDM]: - """Orchestrate deterministic flow from raw Fortinet data to UDM objects.""" - # 1. Extraction - flat_data = flatten_fortinet_jmespath(raw_data) - - # 2. Transformation Pipeline - steps = [ - PatternNormalizer(), - TypeMapper(FORTINET_TYPE_MAP), - MetadataEnricher("fortinet") - ] - - mapper = FortinetMapper() - udm_records = [] - - for record in flat_data: - # Apply each modular transformation unit - for step in steps: - record = step.transform(record) - - # 3. Validation & Pydantic Conversion - udm_records.append(mapper.to_udm(record)) - - return udm_records - -# ---------------- REGISTRATION ---------------- - -# This is what debugPythonScript.py is looking for -VENDOR_TO_UNIVERSAL_PIPELINES = { - "fortinet": run_fortinet_to_universal_pipeline -} diff --git a/transformers/domains/url/vendors/netskope.py b/transformers/domains/url/vendors/netskope.py deleted file mode 100644 index a0ac54f..0000000 --- a/transformers/domains/url/vendors/netskope.py +++ /dev/null @@ -1,280 +0,0 @@ -""" -Netskope URL Domain Integration. - -This module implements the Transformer, Mapper, and Exporter for Netskope, -converting between Netskope-specific configurations and the Pydantic -Unified Data Model (UDM). -""" - -import re -from datetime import datetime -from typing import Any -from typing import Dict -from typing import List -from typing import Optional - -import jmespath - -# Domain Model imports -from transformers.domains.url.models import URL_UDM -from transformers.domains.url.models import Category -from transformers.domains.url.models import Metadata -# Framework imports - Absolute paths -from transformers.framework.udm_transformers.action_mapper import ActionMapper -from transformers.framework.udm_transformers.category_mapper import \ - CategoryMapper -from transformers.framework.udm_transformers.metadata_enricher import \ - MetadataEnricher -from transformers.framework.udm_transformers.pattern_normalizer import \ - PatternNormalizer -from transformers.framework.udm_transformers.type_mapper import TypeMapper - -# ---------------- NETSKOPE MAPPINGS ---------------- - - -NETSKOPE_TO_UNIVERSAL_TYPE_MAP = { - "exact": "literal", - "regex": "regex", -} - -UNIVERSAL_TO_NETSKOPE_TYPE_MAP = { - "literal": "exact", - "regex": "regex", - "wildcard": "regex", - "substring": "regex", -} - -# ---------------- EXTRACTION LAYER ---------------- - -JMESPATH_NETSKOPE = """ -values(@)[?modify_type!='Deleted'].{ - list_name: name, - list_id: object_id, - type: data_type, - urls: values(data_urls) -} -""" - -def flatten_netskope_jmespath(url_lists: Dict[str, Any]) -> List[Dict[str, Any]]: - """Flatten the structure using jmespath.""" - extracted = jmespath.search(JMESPATH_NETSKOPE, url_lists) or [] - flat = [] - - now_iso = datetime.utcnow().isoformat() - - for lst in extracted: - for entry in lst.get("urls", []): - url = entry.get("url") - if not url: - continue - flat.append({ - "pattern": url, - "list_name": lst["list_name"], - "list_id": str(lst["list_id"]), - "type": lst["type"], - "metadata": {"processed_at": now_iso} - }) - return flat - -# ---------------- TRANSFORMERS ---------------- - -class NetskopePatternNormalizer(BaseTransformer): - """Normalize Netskope patterns for the universal model.""" - - def wildcard_to_regex(self, pattern: str) -> str: - """Convert a wildcard pattern to a regex string.""" - if not pattern.startswith("*."): - return pattern - - domain = re.escape(pattern[2:]) - return rf"^([^.]+\.)*{domain}$" - - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - """Transform Netskope patterns into normalized UDM formats.""" - item = item.copy() - - raw_pattern = item.get("pattern", "") - universal_type = item.get("type", "literal") - - if universal_type in ("literal", "exact"): - item["pattern"] = raw_pattern - item["type"] = "literal" - - elif universal_type in ("wildcard", "regex"): - item["pattern"] = self.wildcard_to_regex(raw_pattern) - item["type"] = "regex" - - else: - item["pattern"] = raw_pattern - item["type"] = "literal" - - return item - - -class NetskopePatternDenormalizer(BaseTransformer): - """Convert Netskope patterns back to universal model patterns.""" - - def regex_to_wildcard(self, pattern: str) -> Optional[str]: - """Attempt to convert a regex back to a wildcard string.""" - prefix = "^([^.]+\\.)*" - suffix = "$" - - if pattern.startswith(prefix) and pattern.endswith(suffix): - domain = pattern[len(prefix):-len(suffix)] - domain = domain.replace("\\.", ".") - return f"*.{domain}" - - return None - - def is_regex(self, pattern: str) -> bool: - """Check if a pattern contains regex special characters.""" - regex_markers = ("^", "$", "(", ")", "[", "]", "+", "?", "|", "{", "}") - return any(marker in pattern for marker in regex_markers) - - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - """Denormalize patterns into standard UDM type and format.""" - pattern = item.get("pattern", "").replace("\\\\", "\\") - - # already wildcard - if pattern.startswith("*.") and pattern.count("*") == 1: - item["type"] = "wildcard" - - # regex patterns FIRST - elif self.is_regex(pattern): - wildcard = self.regex_to_wildcard(pattern) - - if wildcard: - item["type"] = "wildcard" - pattern = wildcard - else: - item["type"] = "regex" - - # non-regex wildcard syntax - elif "*" in pattern: - item["type"] = "wildcard" - - else: - item["type"] = "literal" - - item["pattern"] = pattern - item.pop("netskope_type", None) - - return item - -# ---------------- MAPPER & EXPORTER ---------------- - -class NetskopeMapper: - """Handle semantic alignment and Pydantic UDM instantiation.""" - - def to_udm(self, item: Dict[str, Any]) -> URL_UDM: - """Convert transformed dictionary into validated URL_UDM instance.""" - meta = Metadata( - processed_at=datetime.fromisoformat(item["metadata"]["processed_at"]), - ) - - return URL_UDM( - pattern=item["pattern"], - type=item["type"], - url_list_id=item["list_id"], - url_list_name=item["list_name"], - vendor="netskope", - ) - -class NetskopeExporter: - """Universal Model -> Netskope Format.""" - - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - """Convert UDM fields into Netskope-specific schema.""" - return { - "object_id": item.get("url_list_id"), - "name": item.get("url_list_name"), - "data_type": item.get("type"), - "data_urls": item.get("urls", []) - } - -# ---------------- EXECUTION PIPELINE ---------------- - -def run_netskope_to_universal_pipeline(raw_data: Dict[str, Any]) -> List[URL_UDM]: - """Orchestrate the flow from raw Netskope data to UDM objects.""" - # 1. Extraction - flat_data = flatten_netskope_jmespath(raw_data) - - # 2. Transformation Pipeline - steps = [ - TypeMapper(NETSKOPE_TO_UNIVERSAL_TYPE_MAP), - NetskopePatternDenormalizer(), - ] - - mapper = NetskopeMapper() - udm_records = [] - - for record in flat_data: - for step in steps: - record = step.transform(record) - udm_records.append(mapper.to_udm(record)) - - return udm_records - -def run_universal_to_netskope_pipeline(udm_records: List[Any]) -> List[Dict[str, Any]]: - """Convert UDM records into the structured Netskope payload.""" - if not udm_records: - return [] - - steps = [ - TypeMapper(UNIVERSAL_TO_NETSKOPE_TYPE_MAP), - NetskopePatternNormalizer(), - MetadataEnricher("netskope") - ] - - grouped = defaultdict( - lambda: { - "name": "", - "data_type": "literal", - "data_urls": set() - } - ) - - for entry in udm_records: - - # Apply all transformers sequentially - transformed = entry - - for step in steps: - transformed = step.transform(transformed) - - # Use transformed record - obj_id = str(transformed.get("url_list_id", "0")) - name = transformed.get("url_list_name", "Default_List") - url_val = transformed.get("pattern", "") - d_type = transformed.get("type", "literal") - - if not url_val: - continue - - group = grouped[obj_id] - group["name"] = name - group["data_urls"].add(url_val) - - if d_type == "wildcard" or d_type == "regex" or "*" in url_val: - group["data_type"] = "regex" - - final_payload = [] - - for oid, data in grouped.items(): - final_payload.append({ - "object_id": int(oid) if oid.isdigit() else oid, - "name": data["name"], - "data_type": data["data_type"], - "data_urls": sorted(list(data["data_urls"])) - }) - - return final_payload - -# Pipeline definition -VENDOR_TO_UNIVERSAL_PIPELINES = { - "netskope": run_netskope_to_universal_pipeline -} - -UNIVERSAL_TO_VENDOR_PIPELINES = { - "netskope": run_universal_to_netskope_pipeline -} diff --git a/transformers/framework/__init__.py b/transformers/framework/__init__.py deleted file mode 100644 index 93236b6..0000000 --- a/transformers/framework/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -Transformers package initialization. - -This package provides core functionality and public APIs for the transformers library. -""" diff --git a/transformers/framework/pipelines.py b/transformers/framework/pipelines.py deleted file mode 100644 index 46927ee..0000000 --- a/transformers/framework/pipelines.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Pipeline helper functions for URL transformations. - -This module provides utilities to apply a sequence of transformers -to vendor configuration items, producing universal model dictionaries. -""" - -from typing import Any -from typing import Dict -from typing import List - -from transformers.framework.udm_transformers.base_transformer import \ - BaseTransformer - - -def apply_transformers( - items: List[Dict[str, Any]], - transformers: List[BaseTransformer] -) -> List[Dict[str, Any]]: - """Apply a sequence of transformers to a list of items. - - Each item in the input list is processed sequentially by all - transformers in the given order. - - Args: - items: A list of dictionaries representing vendor configuration - entries. - transformers: An ordered list of transformer instances that - implement the `transform` method. - - Returns: - A list of transformed dictionaries. - """ - result: List[Dict[str, Any]] = [] - - for item in items: - for transformer in transformers: - item = transformer.transform(item) - result.append(item) - - return result diff --git a/transformers/framework/udm_transformers/__init__.py b/transformers/framework/udm_transformers/__init__.py deleted file mode 100644 index 93236b6..0000000 --- a/transformers/framework/udm_transformers/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -Transformers package initialization. - -This package provides core functionality and public APIs for the transformers library. -""" diff --git a/transformers/framework/udm_transformers/action_mapper.py b/transformers/framework/udm_transformers/action_mapper.py deleted file mode 100644 index 279629a..0000000 --- a/transformers/framework/udm_transformers/action_mapper.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Action mapping transformer. - -This module defines a transformer responsible for mapping action values -between vendor-specific representations and the universal data model. -""" - -from typing import Any -from typing import Dict - -# Change from: from .base_transformer import BaseTransformer -# To the absolute framework path: -from transformers.framework.udm_transformers.base_transformer import \ - BaseTransformer - - -class ActionMapper(BaseTransformer): - """Map action values between vendor and universal models. - - This transformer replaces the ``action`` field of an item using a - predefined mapping dictionary. If the action is not found in the - mapping, it is left unchanged. - """ - - def __init__(self, action_map: Dict[str, str]) -> None: - """Initialize the ActionMapper. - - Args: - action_map: A dictionary mapping source action values to - destination action values. - """ - self.action_map = action_map - - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - """Transform an item's action field using the action mapping. - - Args: - item: A dictionary representing a single rule or configuration - entry containing an ``action`` field. - - Returns: - The transformed item with its ``action`` field mapped according - to the configured action map. - """ - action = item.get("action") - if action in self.action_map: - item["action"] = self.action_map[action] - - return item diff --git a/transformers/framework/udm_transformers/base_transformer.py b/transformers/framework/udm_transformers/base_transformer.py deleted file mode 100644 index 161d545..0000000 --- a/transformers/framework/udm_transformers/base_transformer.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -Base Transformer Definition - Framework Layer. - -This module defines the abstract base class (ABC) used by all transformers -within the generic transformation engine. It ensures a consistent interface -for reusable transformation components. -""" - -from abc import ABC -from abc import abstractmethod -from typing import Any -from typing import Dict - - -class BaseTransformer(ABC): - """ - Define the interface for all transformers. - - All concrete transformers must implement the ``transform`` method, - which takes a single dictionary item and returns a transformed - dictionary. This maintains a domain-agnostic execution model. - """ - - @abstractmethod - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - """ - Transform a single dictionary item. - - Args: - item: A dictionary representing a single configuration or - URL entry. - - Returns: - A transformed dictionary. - """ - raise NotImplementedError diff --git a/transformers/framework/udm_transformers/category_mapper.py b/transformers/framework/udm_transformers/category_mapper.py deleted file mode 100644 index a1d99c0..0000000 --- a/transformers/framework/udm_transformers/category_mapper.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Category mapping transformer. - -This module defines a transformer responsible for mapping category -identifiers between vendor-specific representations and the universal -data model. -""" - -from typing import Any -from typing import Dict - -# Restructured to use the absolute path within the Framework layer -from transformers.framework.udm_transformers.base_transformer import \ - BaseTransformer - - -class CategoryMapper(BaseTransformer): - """Map category identifiers between vendor and universal models. - - This transformer replaces the ``category_id`` field of an item using - a predefined mapping dictionary. If the category is not found in the - mapping, it is left unchanged. - """ - - def __init__(self, category_map: Dict[str, str]) -> None: - """Initialize the CategoryMapper. - - Args: - category_map: A dictionary mapping source category identifiers - to destination category identifiers. - """ - self.category_map = category_map - - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - """Transform an item's category identifier using the category map. - - Args: - item: A dictionary representing a single rule or configuration - entry containing a ``category_id`` field. - - Returns: - The transformed item with its ``category_id`` field mapped - according to the configured category map. - """ - category_id = item.get("category_id") - if category_id in self.category_map: - item["category_id"] = self.category_map[category_id] - - return item diff --git a/transformers/framework/udm_transformers/metadata_enricher.py b/transformers/framework/udm_transformers/metadata_enricher.py deleted file mode 100644 index 39e4d10..0000000 --- a/transformers/framework/udm_transformers/metadata_enricher.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Metadata enrichment transformer. - -This module defines a transformer that adds vendor information and -metadata timestamps to each item in the transformation pipeline. -""" - -from datetime import datetime -from typing import Any -from typing import Dict - -# Restructured to use the absolute path within the Framework layer -from transformers.framework.udm_transformers.base_transformer import \ - BaseTransformer - - -class MetadataEnricher(BaseTransformer): - """Enrich items with vendor and metadata information. - - This transformer adds a ``vendor`` field and a ``metadata`` dictionary - containing a ``processed_at`` timestamp to each item. - """ - - def __init__(self, vendor: str) -> None: - """Initialize the MetadataEnricher. - - Args: - vendor: The vendor name to attach to each item. - """ - self.vendor = vendor - - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - """Add vendor and metadata information to an item. - - Args: - item: A dictionary representing a single configuration or URL entry. - - Returns: - The transformed dictionary containing the ``vendor`` field and - a ``metadata.processed_at`` timestamp. - """ - item["vendor"] = self.vendor - if "metadata" not in item: - item["metadata"] = {} - - # Standardizing to the UDM requirement for processed_at timestamps - item["metadata"]["processed_at"] = datetime.utcnow().isoformat() - - return item diff --git a/transformers/framework/udm_transformers/pattern_normalizer.py b/transformers/framework/udm_transformers/pattern_normalizer.py deleted file mode 100644 index 7a85b25..0000000 --- a/transformers/framework/udm_transformers/pattern_normalizer.py +++ /dev/null @@ -1,33 +0,0 @@ -"""Pattern normalization transformer. - -This module defines a generic pattern normalizer that ensures each item -has a ``pattern`` field. Currently, this transformer acts as a pass-through -but serves as a hook for formal pattern representations. -""" - -from typing import Any -from typing import Dict - -# Restructured to use the absolute path within the Framework layer -from transformers.framework.udm_transformers.base_transformer import \ - BaseTransformer - - -class PatternNormalizer(BaseTransformer): - """Normalize or enforce the presence of a pattern field in items. - - This transformer guarantees that each dictionary item contains a - ``pattern`` key. If the key is missing, it is initialized to an empty string. - """ - - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - """Ensure the item has a pattern field. - - Args: - item: A dictionary representing a single configuration or URL entry. - - Returns: - The same dictionary with a ``pattern`` key ensured. - """ - item["pattern"] = item.get("pattern", "") - return item diff --git a/transformers/framework/udm_transformers/type_mapper.py b/transformers/framework/udm_transformers/type_mapper.py deleted file mode 100644 index d058d59..0000000 --- a/transformers/framework/udm_transformers/type_mapper.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Type mapping transformer. - -This module defines a transformer that maps item types (e.g., literal, -wildcard, regex, substring) between vendor-specific representations -and the universal data model. -""" - -from typing import Any -from typing import Dict - -# Restructured to use the absolute path within the Framework layer -from transformers.framework.udm_transformers.base_transformer import \ - BaseTransformer - - -class TypeMapper(BaseTransformer): - """Map type values between vendor and universal models. - - This transformer replaces the ``type`` field of an item using a - predefined mapping dictionary. If the type is not found in the - mapping, it is left unchanged. - """ - - def __init__(self, type_map: Dict[str, str]) -> None: - """Initialize the TypeMapper. - - Args: - type_map: A dictionary mapping source type values to - destination type values. - """ - self.type_map = type_map - - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - """Transform an item's type field using the type mapping. - - Args: - item: A dictionary representing a single rule or configuration - entry containing a ``type`` field. - - Returns: - The transformed item with its ``type`` field mapped according - to the configured type map. - """ - item_type = item.get("type") - if item_type in self.type_map: - item["type"] = self.type_map[item_type] - - return item